sfeed

Simple RSS and Atom feed parser
git clone https://git.sinitax.com/codemadness/sfeed
Log | Files | Refs | README | LICENSE | Upstream | sfeed.txt

commit c75f540ac59c5d6e3676878170c42d35b11d0c34
parent 77a603a904087dd9fd3350da029f279f076e4f4b
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sun, 31 Jan 2016 15:38:19 +0100

add sfeed_tail (test), might be removed again later

fix Makefile (compat)

Diffstat:
MMakefile | 34++++++++++++++++++++--------------
Asfeed_tail.1 | 41+++++++++++++++++++++++++++++++++++++++++
Asfeed_tail.c | 154+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 215 insertions(+), 14 deletions(-)

diff --git a/Makefile b/Makefile @@ -9,6 +9,7 @@ SRC = \ sfeed_mbox.c\ sfeed_opml_import.c\ sfeed_plain.c\ + sfeed_tail.c\ sfeed_web.c\ sfeed_xmlenc.c\ util.c\ @@ -22,6 +23,7 @@ BIN = \ sfeed_mbox\ sfeed_opml_import\ sfeed_plain\ + sfeed_tail\ sfeed_web\ sfeed_xmlenc SCRIPTS = \ @@ -35,6 +37,7 @@ MAN1 = \ sfeed_opml_export.1\ sfeed_opml_import.1\ sfeed_plain.1\ + sfeed_tail.1\ sfeed_update.1\ sfeed_web.1\ sfeed_xmlenc.1 @@ -71,26 +74,29 @@ dist: $(BIN) ${OBJ}: config.mk ${HDR} -sfeed: sfeed.o xml.o util.o ${EXTRAOBJ} - ${CC} -o $@ sfeed.o xml.o util.o ${EXTRAOBJ} ${LDFLAGS} +sfeed: sfeed.o xml.o util.o + ${CC} -o $@ sfeed.o xml.o util.o ${LDFLAGS} -sfeed_frames: sfeed_frames.o util.o ${EXTRAOBJ} - ${CC} -o $@ sfeed_frames.o util.o ${EXTRAOBJ} ${LDFLAGS} +sfeed_frames: sfeed_frames.o util.o + ${CC} -o $@ sfeed_frames.o util.o ${LDFLAGS} -sfeed_html: sfeed_html.o util.o ${EXTRAOBJ} - ${CC} -o $@ sfeed_html.o util.o ${EXTRAOBJ} ${LDFLAGS} +sfeed_html: sfeed_html.o util.o + ${CC} -o $@ sfeed_html.o util.o ${LDFLAGS} -sfeed_mbox: sfeed_mbox.o util.o ${EXTRAOBJ} - ${CC} -o $@ sfeed_mbox.o util.o ${EXTRAOBJ} ${LDFLAGS} +sfeed_mbox: sfeed_mbox.o util.o + ${CC} -o $@ sfeed_mbox.o util.o ${LDFLAGS} -sfeed_opml_import: sfeed_opml_import.o xml.o ${EXTRAOBJ} - ${CC} -o $@ sfeed_opml_import.o xml.o ${EXTRAOBJ} ${LDFLAGS} +sfeed_opml_import: sfeed_opml_import.o xml.o + ${CC} -o $@ sfeed_opml_import.o xml.o ${LDFLAGS} -sfeed_plain: sfeed_plain.o util.o ${EXTRAOBJ} - ${CC} -o $@ sfeed_plain.o util.o ${EXTRAOBJ} ${LDFLAGS} +sfeed_plain: sfeed_plain.o util.o + ${CC} -o $@ sfeed_plain.o util.o ${LDFLAGS} -sfeed_web: sfeed_web.o xml.o util.o ${EXTRAOBJ} - ${CC} -o $@ sfeed_web.o xml.o util.o ${EXTRAOBJ} ${LDFLAGS} +sfeed_tail: sfeed_tail.o util.o + ${CC} -o $@ sfeed_tail.o util.o ${LDFLAGS} + +sfeed_web: sfeed_web.o xml.o util.o + ${CC} -o $@ sfeed_web.o xml.o util.o ${LDFLAGS} sfeed_xmlenc: sfeed_xmlenc.o xml.o ${CC} -o $@ sfeed_xmlenc.o xml.o ${LDFLAGS} diff --git a/sfeed_tail.1 b/sfeed_tail.1 @@ -0,0 +1,41 @@ +.Dd January 29, 2016 +.Dt SFEED_TAIL 1 +.Os +.Sh NAME +.Nm sfeed_tail +.Nd format unseen feed data to a plain-text list +.Sh SYNOPSIS +.Nm +.Op Ar file... +.Sh DESCRIPTION +.Nm +formats unseen feed data (TSV) from +.Xr sfeed 1 +from stdin or +.Ar file +to stdout as a plain-text list. If one or more +.Ar file +are specified, the basename of the +.Ar file +is used as the feed name in the output. If no +.Ar file +parameters are specified and so the data is read from stdin the feed name +is empty. +.Pp +.Nm +will mark the initial items on the first run as seen. Then it will print the +initial items if data is read from stdin, if the data is read on the first +run by specifying the argument(s) +.Ar file +it will not show the initial seen items. +.Pp +Unseen items are printed per line in a similar format to +.Xr sfeed_plain 1 , +duplicate items are ignored. The list of unique items is determined by the +fields: feedname, item id and UNIX timestamp of the item date. +.Sh SEE ALSO +.Xr sfeed 1 , +.Xr sfeed_plain 1 , +.Xr tail 1 +.Sh AUTHORS +.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org diff --git a/sfeed_tail.c b/sfeed_tail.c @@ -0,0 +1,154 @@ +#include <ctype.h> +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <wchar.h> + +#include "util.h" + +static int firsttime; +static char *line; +static size_t linesize; + +struct line { + char *timestamp; + char *id; + struct line *next; +}; + +/* ofcourse: bigger bucket size uses more memory, but has less collisions. */ +#define BUCKET_SIZE 65535 +struct bucket { + struct line cols[BUCKET_SIZE]; +}; +static struct bucket *buckets; +static struct bucket *bucket; + +static char * +estrdup(const char *s) +{ + char *p; + + if (!(p = strdup(s))) + err(1, "strdup"); + return p; +} + +static void * +ecalloc(size_t nmemb, size_t size) +{ + void *p; + + if (!(p = calloc(nmemb, size))) + err(1, "calloc"); + return p; +} + +/* jenkins one-at-a-time hash */ +static uint32_t +jenkins1(const char *s) +{ + uint32_t hash = 0; + + for (; *s; s++) { + hash += (int)*s; + hash += (hash << 10); + hash ^= (hash >> 6); + } + hash += (hash << 3); + hash ^= (hash >> 11); + + return hash + (hash << 15); +} + +/* print `len' columns of characters. If string is shorter pad the rest + * with characters `pad`. */ +static void +printutf8pad(FILE *fp, const char *s, size_t len, int pad) +{ + wchar_t w; + size_t n = 0, i; + int r; + + for (i = 0; *s && n < len; i++, s++) { + if (ISUTF8(*s)) { + if ((r = mbtowc(&w, s, 4)) == -1) + break; + if ((r = wcwidth(w)) == -1) + r = 1; + n += (size_t)r; + } + putc(*s, fp); + } + for (; n < len; n++) + putc(pad, fp); +} + +static void +printfeed(FILE *fp, const char *feedname) +{ + struct line *match; + char *fields[FieldLast]; + uint32_t hash; + int uniq; + + while (parseline(&line, &linesize, fields, fp) > 0) { + hash = (jenkins1(fields[FieldUnixTimestamp]) + + jenkins1(fields[FieldId])) % BUCKET_SIZE; + for (uniq = 1, match = &(bucket->cols[hash]); + match; + match = match->next) { + /* check for collision, can still be unique. */ + if (match->id && !strcmp(match->id, fields[FieldId]) && + match->timestamp && !strcmp(match->timestamp, fields[FieldUnixTimestamp])) { + uniq = 0; + break; + } + /* nonexistent or no collision */ + if (!match->next) { + match = match->next = ecalloc(1, sizeof(struct line)); + match->id = estrdup(fields[FieldId]); + match->timestamp = estrdup(fields[FieldUnixTimestamp]); + break; + } + } + if (!uniq || firsttime) + continue; + if (feedname[0]) + printf("%-15.15s %-30.30s", + feedname, fields[FieldTimeFormatted]); + printutf8pad(stdout, fields[FieldTitle], 70, ' '); + printf(" %s\n", fields[FieldLink]); + } +} + +int +main(int argc, char *argv[]) +{ + char *name; + FILE *fp; + int i; + + bucket = buckets = ecalloc(argc, sizeof(struct bucket)); + for (firsttime = (argc > 1); ; firsttime = 0) { + if (argc == 1) { + printfeed(stdin, ""); + } else { + for (i = 1; i < argc; i++) { + bucket = &buckets[i - 1]; + if (!(fp = fopen(argv[i], "r"))) + err(1, "fopen: %s", argv[i]); + name = xbasename(argv[i]); + printfeed(fp, name); + free(name); + if (ferror(fp)) + err(1, "ferror: %s", argv[i]); + fclose(fp); + } + } + sleep(60); + } + return 0; +}