sfeed

Simple RSS and Atom feed parser
git clone https://git.sinitax.com/codemadness/sfeed
Log | Files | Refs | README | LICENSE | Upstream | sfeed.txt

commit b7e288a96418e1ea5e7904ab2896edb3f4615a10
parent a3b6627ae945c11af92c8bb079bf4a12b8ae4c28
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Thu, 16 Aug 2018 14:16:58 +0200

sfeed_frames: overhaul

sfeed_frames used to write HTML pages for each entry for each feed. This can
be useful but had security issues, because the context of the content changes.

sfeed_frames is now a HTML version which works better with browsers that don't
support CSS or tables well like w3m and lynx. It is now an alternative for
sfeed_html.

- Don't reference and embed HTML content for security reasons. This was
  documented under "SECURITY CONSIDERATIONS" in the man page.
- Tighten pledge(2).
- Simplify

Diffstat:
Msfeed_frames.1 | 31++++++++-----------------------
Msfeed_frames.c | 208+++++++++++--------------------------------------------------------------------
2 files changed, 36 insertions(+), 203 deletions(-)

diff --git a/sfeed_frames.1 b/sfeed_frames.1 @@ -1,4 +1,4 @@ -.Dd August 5, 2015 +.Dd August 16, 2018 .Dt SFEED_FRAMES 1 .Os .Sh NAME @@ -14,39 +14,24 @@ formats feed data (TSV) from to HTML. It reads TSV data from stdin or .Ar file -and writes HTML files to the current directory. +and writes HTML files for the frameset to the current directory. If no .Ar file parameters are specified and therefore the data is read from stdin then the -feed name is named "unnamed". +menu.html file is not written. .Sh FILES WRITTEN .Bl -tag -width 13n .It index.html -The main HTML file referencing to the frames items.html and -menu.html. +The main HTML file referencing to the frames items.html and menu.html. .It items.html -Contains all the items as HTML links to the local content. +The items frame contains all the item HTML links to the remote content. .It menu.html -Menu frame which contains navigation "anchor" links to the feed names -in items.html. +The menu frame which contains navigation "anchor" links to the feed names in +items.html. .El -.Sh FILE STRUCTURE -Items for each feed category are in the format: feedname/itemname.html. -The feedname and item names are normalized, whitespace characters are replaced -with a - character, multiple whitespaces are replaced by a single - character -and trailing whitespace will be removed. -The itemname is based on the title of the items. -The feedname and title is truncated to a maximum of 128 bytes. -The maximum length of the path is PATH_MAX or filesystem-specific (truncated). .Sh SEE ALSO .Xr sfeed 1 , +.Xr sfeed_html 1 , .Xr sfeed_plain 1 .Sh AUTHORS .An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org -.Sh SECURITY CONSIDERATIONS -Each item content file contains the content formatted as HTML, if the feed data -contains HTML like Javascripts, tracking cookies, custom styles and such -these will also be displayed. -Due to the crazy nature of "the web" these things are complex to filter. -Some security and privacy can be gained by using an adblocker, script blocker -and to set your browser settings more strictly. diff --git a/sfeed_frames.c b/sfeed_frames.c @@ -12,120 +12,23 @@ #include <string.h> #include <time.h> #include <unistd.h> -#include <utime.h> #include "util.h" static struct feed **feeds; static char *line; static size_t linesize; -static struct timespec times[2]; static time_t comparetime; static unsigned long totalnew; -/* Unescape / decode fields printed by string_print_encoded() - * "\\" to "\", "\t", to TAB, "\n" to newline. Unrecognised escape sequences - * are ignored: "\z" etc. */ -static void -printcontent(const char *s, FILE *fp) -{ - for (; *s; s++) { - switch (*s) { - case '\\': - switch (*(++s)) { - case '\0': return; /* ignore */ - case '\\': fputc('\\', fp); break; - case 't': fputc('\t', fp); break; - case 'n': fputc('\n', fp); break; - } - break; - default: - fputc((int)*s, fp); - } - } -} - -/* Unescape / decode fields printed by string_print_encoded() - * "\\" to "\", "\t", to TAB, "\n" to newline. Unrecognised escape sequences - * are ignored: "\z" etc. Encode HTML 2.0 / XML 1.0 entities. */ -static void -printcontentxml(const char *s, FILE *fp) -{ - for (; *s; s++) { - switch (*s) { - case '\\': - switch (*(++s)) { - case '\0': return; /* ignore */ - case '\\': fputc('\\', fp); break; - case 't': fputc('\t', fp); break; - case 'n': fputc('\n', fp); break; - } - break; - /* XML entities */ - case '<': fputs("&lt;", fp); break; - case '>': fputs("&gt;", fp); break; - case '\'': fputs("&#39;", fp); break; - case '&': fputs("&amp;", fp); break; - case '"': fputs("&quot;", fp); break; - default: fputc((int)*s, fp); - } - } -} - -/* normalize path names, transform to lower-case and replace non-alpha and - * non-digit with '-' */ -static size_t -normalizepath(const char *path, char *buf, size_t bufsiz) -{ - size_t i, r = 0; - - for (i = 0; *path && i < bufsiz; path++) { - if (isalpha((int)*path) || isdigit((int)*path)) { - buf[i++] = tolower((int)*path); - r = 0; - } else { - /* don't repeat '-', don't start with '-' */ - if (!r && i) - buf[i++] = '-'; - r++; - } - } - /* remove trailing '-' */ - for (; i > 0 && (buf[i - 1] == '-'); i--) - ; - - if (bufsiz > 0) - buf[i] = '\0'; - - return i; -} - static void printfeed(FILE *fpitems, FILE *fpin, struct feed *f) { - char dirpath[PATH_MAX], filepath[PATH_MAX]; - char *fields[FieldLast], *feedname, name[128]; + char *fields[FieldLast]; ssize_t linelen; - FILE *fpcontent = NULL; unsigned int isnew; struct tm *tm; time_t parsedtime; - int fd, r; - - if (f->name[0]) - feedname = f->name; - else - feedname = "unnamed"; - - /* make directory for feedname */ - if (!normalizepath(feedname, name, sizeof(name))) - return; - - strlcpy(dirpath, name, sizeof(dirpath)); - - /* error creating directory and it doesn't exist. */ - if (mkdir(dirpath, S_IRWXU | S_IRWXG | S_IRWXO) == -1 && errno != EEXIST) - err(1, "mkdir: %s", dirpath); /* menu if not unnamed */ if (f->name[0]) { @@ -150,68 +53,6 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f) if (!(tm = localtime(&parsedtime))) err(1, "localtime"); - if (!normalizepath(fields[FieldTitle], name, sizeof(name))) - continue; - - r = snprintf(filepath, sizeof(filepath), "%s/%s-%lld.html", - dirpath, name, (long long)parsedtime); - if (r == -1 || (size_t)r >= sizeof(filepath)) - errx(1, "snprintf: path truncation: '%s/%s-%lld.html'", - dirpath, name, (long long)parsedtime); - - /* content file doesn't exist yet and has error? */ - if ((fd = open(filepath, O_CREAT | O_EXCL | O_WRONLY, - S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) == -1) { - if (errno != EEXIST) - err(1, "open: %s", filepath); - } else { - if (!(fpcontent = fdopen(fd, "wb"))) - err(1, "fdopen: %s", filepath); - fputs("<html><head>" - "<link rel=\"stylesheet\" type=\"text/css\" href=\"../../style.css\" />" - "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />" - "</head>\n<body class=\"frame\">" - "<div class=\"content\"><h2>", fpcontent); - - if (fields[FieldLink][0]) { - fputs("<a href=\"", fpcontent); - xmlencode(fields[FieldLink], fpcontent); - fputs("\">", fpcontent); - } - xmlencode(fields[FieldTitle], fpcontent); - if (fields[FieldLink][0]) - fputs("</a>", fpcontent); - fputs("</h2>", fpcontent); - - /* NOTE: this prints the raw HTML of the feed, this is - * potentially dangerous, it is left up to the - * user / browser to trust a feed it's HTML content. */ - if (!strcmp(fields[FieldContentType], "html")) { - printcontent(fields[FieldContent], fpcontent); - } else { - /* plain-text, wrap with <pre> */ - fputs("<pre>", fpcontent); - printcontentxml(fields[FieldContent], fpcontent); - fputs("</pre>", fpcontent); - } - fputs("</div></body></html>\n", fpcontent); - - /* set modified and access time of file to time of item. */ - if (parsedtime) { - /* flush writes before setting atime and mtime - else the remaining (buffered) write can occur at - fclose() and overwrite our time again. */ - fflush(fpcontent); - - times[0].tv_sec = parsedtime; - times[1].tv_sec = parsedtime; - - if (futimens(fd, times) == -1) - err(1, "futimens"); - } - fclose(fpcontent); - } - isnew = (parsedtime >= comparetime) ? 1 : 0; totalnew += isnew; f->totalnew += isnew; @@ -223,11 +64,15 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f) if (isnew) fputs("<b><u>", fpitems); - fputs("<a href=\"", fpitems); - fputs(filepath, fpitems); - fputs("\" target=\"content\">", fpitems); - xmlencode(fields[FieldTitle], fpitems); - fputs("</a>", fpitems); + if (fields[FieldLink][0]) { + fputs("<a href=\"", fpitems); + xmlencode(fields[FieldLink], fpitems); + fputs("\">", fpitems); + xmlencode(fields[FieldTitle], fpitems); + fputs("</a>", fpitems); + } else { + xmlencode(fields[FieldTitle], fpitems); + } if (isnew) fputs("</u></b>", fpitems); fputs("\n", fpitems); @@ -237,12 +82,12 @@ printfeed(FILE *fpitems, FILE *fpin, struct feed *f) int main(int argc, char *argv[]) { - FILE *fpindex, *fpitems, *fpmenu, *fp; + FILE *fpindex, *fpitems, *fpmenu = NULL, *fp; char *name; int i, showsidebar = (argc > 1); struct feed *f; - if (pledge("stdio rpath wpath cpath fattr", NULL) == -1) + if (pledge("stdio rpath wpath cpath", NULL) == -1) err(1, "pledge"); if (!(feeds = calloc(argc, sizeof(struct feed *)))) @@ -256,11 +101,15 @@ main(int argc, char *argv[]) /* write main index page */ if (!(fpindex = fopen("index.html", "wb"))) err(1, "fopen: index.html"); - if (!(fpmenu = fopen("menu.html", "wb"))) - err(1, "fopen: menu.html"); if (!(fpitems = fopen("items.html", "wb"))) err(1, "fopen: items.html"); - fputs("<html><head><link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />" + if (showsidebar && !(fpmenu = fopen("menu.html", "wb"))) + err(1, "fopen: menu.html"); + + if (pledge("stdio rpath", NULL) == -1) + err(1, "pledge"); + + fputs("<html><head><link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />" "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /></head>" "<body class=\"frame\"><div id=\"items\"><pre>", fpitems); @@ -288,7 +137,7 @@ main(int argc, char *argv[]) if (showsidebar) { fputs("<html><head>" - "<link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />\n" + "<link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />\n" "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n" "</head><body class=\"frame\"><div id=\"sidebar\">", fpmenu); @@ -312,25 +161,24 @@ main(int argc, char *argv[]) } fputs("<!DOCTYPE html><html><head>\n\t<title>Newsfeed (", fpindex); fprintf(fpindex, "%lu", totalnew); - fputs(")</title>\n\t<link rel=\"stylesheet\" type=\"text/css\" href=\"../style.css\" />\n" + fputs(")</title>\n\t<link rel=\"stylesheet\" type=\"text/css\" href=\"style.css\" />\n" "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n" "</head>\n", fpindex); if (showsidebar) { - fputs("<frameset framespacing=\"0\" cols=\"200,*\" frameborder=\"1\">\n" - " <frame name=\"menu\" src=\"menu.html\" target=\"menu\">\n", fpindex); + fputs("<frameset framespacing=\"0\" cols=\"250,*\" frameborder=\"1\">\n" + "\t<frame name=\"menu\" src=\"menu.html\" target=\"menu\">\n", fpindex); } else { fputs("<frameset framespacing=\"0\" cols=\"*\" frameborder=\"1\">\n", fpindex); } - fputs("\t<frameset id=\"frameset\" framespacing=\"0\" cols=\"50%,50%\" frameborder=\"1\">\n" - "\t\t<frame name=\"items\" src=\"items.html\" target=\"items\">\n" - "\t\t<frame name=\"content\" target=\"content\">\n" - "\t</frameset>\n" + fputs( + "\t<frame name=\"items\" src=\"items.html\" target=\"items\">\n" "</frameset>\n" "</html>\n", fpindex); - fclose(fpitems); - fclose(fpmenu); fclose(fpindex); + fclose(fpitems); + if (fpmenu) + fclose(fpmenu); return 0; }