sfeed

Simple RSS and Atom feed parser
git clone https://git.sinitax.com/codemadness/sfeed
Log | Files | Refs | README | LICENSE | Upstream | sfeed.txt

commit ace0f818d172c81e23783b9c7e571464dcd0f604
parent 6446070da557bf8b56fa44b2bbdc4690edf490a9
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Wed, 22 Aug 2018 16:29:20 +0200

sfeed_tail: improvements

- sfeed_tail only reads from files now, not from stdin anymore. This had too many
  caveats.
- Instead of a timer it now detects changes faster and is more efficient using
  memory.
- Improve documentation of its behaviour.

Diffstat:
Msfeed_tail.1 | 35++++++++++++++---------------------
Msfeed_tail.c | 62+++++++++++++++++++++++++++++++-------------------------------
2 files changed, 45 insertions(+), 52 deletions(-)

diff --git a/sfeed_tail.1 b/sfeed_tail.1 @@ -1,4 +1,4 @@ -.Dd January 29, 2016 +.Dd August 22, 2018 .Dt SFEED_TAIL 1 .Os .Sh NAME @@ -6,40 +6,33 @@ .Nd format unseen feed data to a plain-text list .Sh SYNOPSIS .Nm -.Op Ar file... +.Ar file... .Sh DESCRIPTION .Nm -formats unseen feed data (TSV) from -.Xr sfeed 1 -from stdin or +formats only new and unseen feed data (TSV) from +one or more .Ar file to stdout as a plain-text list. -If one or more -.Ar file -are specified, the basename of the +The basename of the .Ar file is used as the feed name in the output. -If no -.Ar file -parameters are specified and so the data is read from stdin the feed name -is empty. -.Pp -.Nm -will mark the initial items on the first run as seen. -Then it will print the initial items if data is read from stdin, if the data -is read on the first run by specifying the argument(s) -.Ar file -it will not show the initial seen items. .Pp Unseen items are printed per line in a similar format to .Xr sfeed_plain 1 , duplicate items are ignored. -The list of unique items is determined by the fields: feedname, item id and +The list of unique items is determined by the fields: item id, item link and UNIX timestamp of the item date. +.Pp +.Nm +will also only process and show items that are considered new: the item +timestamp is not older than a day ago. .Sh IMPLEMENTATION NOTES +.Nm +checks for file modifications each second by checking the filesize and +modification time. Keep in mind that because .Nm -keeps a list of all the items it can consume much memory. +keeps a list of items it can potentially consume much memory. .Sh SEE ALSO .Xr sfeed 1 , .Xr sfeed_plain 1 , diff --git a/sfeed_tail.c b/sfeed_tail.c @@ -1,3 +1,4 @@ +#include <sys/stat.h> #include <sys/types.h> #include <ctype.h> @@ -11,11 +12,10 @@ #include "tree.h" #include "util.h" -static int firsttime; -static int sleepsecs; static char *line; static size_t linesize; -time_t comparetime; +static int changed; +static time_t comparetime; struct line { char *id; @@ -47,8 +47,6 @@ gc(void) RB_FOREACH_SAFE(line, linetree, &head, tmp) { if (line->timestamp < comparetime) { -/* printf("DEBUG: gc: removing: %s %s\n", - line->id, line->title);*/ free(line->id); free(line->link); free(line->title); @@ -90,8 +88,7 @@ printfeed(FILE *fp, const char *feedname) if (RB_FIND(linetree, &head, &search)) continue; -/* printf("DEBUG: new: id: %s, link: %s, title: %s\n", - fields[FieldId], fields[FieldLink], fields[FieldTitle]);*/ + changed = 1; if (!(add = calloc(1, sizeof(*add)))) err(1, "calloc"); @@ -104,9 +101,6 @@ printfeed(FILE *fp, const char *feedname) add->timestamp = parsedtime; RB_INSERT(linetree, &head, add); - if (firsttime) - continue; - if (feedname[0]) { printutf8pad(stdout, feedname, 15, ' '); fputs(" ", stdout); @@ -123,6 +117,7 @@ printfeed(FILE *fp, const char *feedname) int main(int argc, char *argv[]) { + struct stat *stfiles, st; char *name; FILE *fp; int i, slept = 0; @@ -130,45 +125,50 @@ main(int argc, char *argv[]) if (pledge("stdio rpath", NULL) == -1) err(1, "pledge"); + if (argc <= 1) { + fprintf(stderr, "usage: %s <file>...\n", argv[0]); + return 1; + } + setlocale(LC_CTYPE, ""); - if (pledge(argc == 1 ? "stdio" : "stdio rpath", NULL) == -1) - err(1, "pledge"); + if (!(stfiles = calloc(argc - 1, sizeof(*stfiles)))) + err(1, "calloc"); - if (argc == 1) - sleepsecs = 1; - else - sleepsecs = 300; + while (1) { + changed = 0; - for (firsttime = (argc > 1); ; firsttime = 0) { if ((comparetime = time(NULL)) == -1) err(1, "time"); /* 1 day is old news */ comparetime -= 86400; - if (argc == 1) { - printfeed(stdin, ""); - } else { - for (i = 1; i < argc; i++) { - if (!(fp = fopen(argv[i], "r"))) - err(1, "fopen: %s", argv[i]); + + for (i = 1; i < argc; i++) { + if (!(fp = fopen(argv[i], "r"))) + err(1, "fopen: %s", argv[i]); + if (fstat(fileno(fp), &st) == -1) + err(1, "fstat: %s", argv[i]); + + /* did the file change? by size, modification */ + if (stfiles[i - 1].st_size != st.st_size || + stfiles[i - 1].st_mtime != st.st_mtime) { name = ((name = strrchr(argv[i], '/'))) ? name + 1 : argv[i]; printfeed(fp, name); if (ferror(fp)) err(1, "ferror: %s", argv[i]); - fclose(fp); } + memcpy(&stfiles[i - 1], &st, sizeof(st)); + fclose(fp); } - /* DEBUG: TODO: gc first run. */ - gc(); - - sleep(sleepsecs); - slept += sleepsecs; - /* gc once every hour (excluding run-time) */ - if (slept >= 3600) { + /* "garbage collect" on a change or every 5 minutes */ + if (changed || slept > 10) { gc(); + changed = 0; slept = 0; } + sleep(1); + slept++; } return 0; }