sfeed

Simple RSS and Atom feed parser
git clone https://git.sinitax.com/codemadness/sfeed
Log | Files | Refs | README | LICENSE | Upstream | sfeed.txt

commit 19430fb45636614d96a08342cd7d83774e888c2e
parent 46b756cc19e199c89fe3b090885243d1c501262b
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Thu,  6 Aug 2015 17:54:09 +0200

general cleanups

Diffstat:
Msfeed.c | 144++++++++++++++++++++++++++++++++++++++++----------------------------------------
Msfeed_html.1 | 6++----
Msfeed_mbox.1 | 11++++-------
Msfeed_opml_import.c | 5++---
Mxml.c | 19++++++++-----------
5 files changed, 88 insertions(+), 97 deletions(-)

diff --git a/sfeed.c b/sfeed.c @@ -432,29 +432,56 @@ isattr(const char *name, size_t len, const char *name2, size_t len2) return (len == len2 && !strcasecmp(name, name2)); } -/* NOTE: this handler can be called multiple times if the data in this - * block is bigger than the buffer. */ static void -xml_handler_data(XMLParser *p, const char *s, size_t len) +xml_handler_attr(XMLParser *p, const char *tag, size_t taglen, + const char *name, size_t namelen, const char *value, + size_t valuelen) { - if (!ctx.field) + (void)tag; + (void)taglen; + + /* handles transforming inline XML to data */ + if (ISINCONTENT(ctx)) { + xml_handler_data(p, value, valuelen); return; + } - /* add only data from <name> inside <author> tag - * or any other non-<author> tag */ - if (ctx.tagid != AtomTagAuthor || !strcmp(p->tag, "name")) - string_append(ctx.field, s, len); + if (ctx.item.feedtype == FeedTypeAtom) { + if (ISCONTENTTAG(ctx)) { + if (isattr(name, namelen, STRP("type")) && + (isattr(value, valuelen, STRP("xhtml")) || + isattr(value, valuelen, STRP("text/xhtml")) || + isattr(value, valuelen, STRP("html")) || + isattr(value, valuelen, STRP("text/html")))) + { + ctx.item.contenttype = ContentTypeHTML; + p->xmlattrstart = xml_handler_attr_start; + p->xmlattrend = xml_handler_attr_end; + } + } else if (ctx.tagid == AtomTagLink && + isattr(name, namelen, STRP("href"))) + { + /* link href attribute */ + string_append(&ctx.item.link, value, valuelen); + } + } } static void -xml_handler_cdata(XMLParser *p, const char *s, size_t len) +xml_handler_attr_end(XMLParser *p, const char *tag, size_t taglen, + const char *name, size_t namelen) { - (void)p; + (void)tag; + (void)taglen; + (void)name; + (void)namelen; - if (!ctx.field) + if (!ISINCONTENT(ctx)) return; - string_append(ctx.field, s, len); + /* handles transforming inline XML to data */ + xml_handler_data(p, "\"", 1); + ctx.attrcount = 0; } static void @@ -476,55 +503,50 @@ xml_handler_attr_start(XMLParser *p, const char *tag, size_t taglen, } static void -xml_handler_attr_end(XMLParser *p, const char *tag, size_t taglen, - const char *name, size_t namelen) +xml_handler_cdata(XMLParser *p, const char *s, size_t len) { - (void)tag; - (void)taglen; - (void)name; - (void)namelen; + (void)p; - if (!ISINCONTENT(ctx)) + if (!ctx.field) return; - /* handles transforming inline XML to data */ - xml_handler_data(p, "\"", 1); - ctx.attrcount = 0; + string_append(ctx.field, s, len); } +/* NOTE: this handler can be called multiple times if the data in this + * block is bigger than the buffer. */ static void -xml_handler_attr(XMLParser *p, const char *tag, size_t taglen, - const char *name, size_t namelen, const char *value, - size_t valuelen) +xml_handler_data(XMLParser *p, const char *s, size_t len) { - (void)tag; - (void)taglen; + if (!ctx.field) + return; - /* handles transforming inline XML to data */ - if (ISINCONTENT(ctx)) { - xml_handler_data(p, value, valuelen); + /* add only data from <name> inside <author> tag + * or any other non-<author> tag */ + if (ctx.tagid != AtomTagAuthor || !strcmp(p->tag, "name")) + string_append(ctx.field, s, len); +} + +static void +xml_handler_data_entity(XMLParser *p, const char *data, size_t datalen) +{ + char buffer[16]; + int len; + + if (!ctx.field) return; - } - if (ctx.item.feedtype == FeedTypeAtom) { - if (ISCONTENTTAG(ctx)) { - if (isattr(name, namelen, STRP("type")) && - (isattr(value, valuelen, STRP("xhtml")) || - isattr(value, valuelen, STRP("text/xhtml")) || - isattr(value, valuelen, STRP("html")) || - isattr(value, valuelen, STRP("text/html")))) - { - ctx.item.contenttype = ContentTypeHTML; - p->xmlattrstart = xml_handler_attr_start; - p->xmlattrend = xml_handler_attr_end; - } - } else if (ctx.tagid == AtomTagLink && - isattr(name, namelen, STRP("href"))) - { - /* link href attribute */ - string_append(&ctx.item.link, value, valuelen); - } - } + /* try to translate entity, else just pass as data to + * xml_data_handler */ + len = xml_entitytostr(data, buffer, sizeof(buffer)); + /* this should never happen (buffer too small) */ + if (len < 0) + return; + + if (len > 0) + xml_handler_data(p, buffer, (size_t)len); + else + xml_handler_data(p, data, datalen); } static void @@ -646,28 +668,6 @@ xml_handler_start_el_parsed(XMLParser *p, const char *tag, size_t taglen, } static void -xml_handler_data_entity(XMLParser *p, const char *data, size_t datalen) -{ - char buffer[16]; - int len; - - if (!ctx.field) - return; - - /* try to translate entity, else just pass as data to - * xml_data_handler */ - len = xml_entitytostr(data, buffer, sizeof(buffer)); - /* this should never happen (buffer too small) */ - if (len < 0) - return; - - if (len > 0) - xml_handler_data(p, buffer, (size_t)len); - else - xml_handler_data(p, data, datalen); -} - -static void xml_handler_end_el(XMLParser *p, const char *name, size_t namelen, int isshort) { if (ctx.item.feedtype == FeedTypeNone) diff --git a/sfeed_html.1 b/sfeed_html.1 @@ -13,13 +13,11 @@ formats feed data (TSV) from .Xr sfeed 1 from stdin or .Ar file -to stdout in HTML. -If one or more +to stdout in HTML. If one or more .Ar file are specified, the basename of the .Ar file -is used as the feed name in the output. -If no +is used as the feed name in the output. If no .Ar file parameters are specified and so the data is read from stdin the feed name is empty. diff --git a/sfeed_mbox.1 b/sfeed_mbox.1 @@ -13,18 +13,15 @@ formats feed data (TSV) from .Xr sfeed 1 from stdin or .Ar file -to stdout in the mboxrd format. -If one or more +to stdout in the mboxrd format. If one or more .Ar file are specified, the basename of the .Ar file -is used as the feed name in the output. -If no +is used as the feed name in the output. If no .Ar file parameters are specified and so the data is read from stdin the feed name -is empty. -Lines starting with "From " will be mangled in the mboxrd-style. The mbox -data can be further processed by tools like +is empty. Lines starting with "From " will be mangled in the mboxrd-style. +The mbox data can be further processed by tools like .Xr procmail 1 or .Xr fdm 1 diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c @@ -74,10 +74,9 @@ xml_handler_attr(XMLParser *p, const char *tag, size_t taglen, int main(void) { - memset(&parser, 0, sizeof(parser)); - parser.xmltagstart = xml_handler_start_element; - parser.xmltagend = xml_handler_end_element; parser.xmlattr = xml_handler_attr; + parser.xmltagend = xml_handler_end_element; + parser.xmltagstart = xml_handler_start_element; fputs( "# paths\n" diff --git a/xml.c b/xml.c @@ -332,6 +332,7 @@ xml_numericentitytostr(const char *e, char *buf, size_t bufsiz) for (b = 0; b < len; b++) buf[b] = (cp >> (8 * (len - 1 - b))) & 0xff; buf[len] = '\0'; + return (ssize_t)len; } @@ -359,7 +360,8 @@ xmlparser_parse(XMLParser *x) int c, ispi; size_t datalen, tagdatalen, taglen; - while ((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < */ + while ((c = xmlparser_getnext(x)) != EOF && c != '<') + ; /* skip until < */ while (c != EOF) { if (c == '<') { /* parse tag */ @@ -369,33 +371,28 @@ xmlparser_parse(XMLParser *x) x->taglen = 0; if (c == '!') { /* cdata and comments */ for (tagdatalen = 0; (c = xmlparser_getnext(x)) != EOF;) { - if (tagdatalen <= strlen("[CDATA[")) /* if (d < sizeof(x->data)) */ + if (tagdatalen <= sizeof("[CDATA[") - 1) /* if (d < sizeof(x->data)) */ x->data[tagdatalen++] = c; /* TODO: prevent overflow */ if (c == '>') break; - else if (c == '-' && tagdatalen == strlen("--") && + else if (c == '-' && tagdatalen == sizeof("--") - 1 && (x->data[0] == '-')) { /* comment */ xmlparser_parsecomment(x); break; } else if (c == '[') { - if (tagdatalen == strlen("[CDATA[") && + if (tagdatalen == sizeof("[CDATA[") - 1 && x->data[1] == 'C' && x->data[2] == 'D' && x->data[3] == 'A' && x->data[4] == 'T' && x->data[5] == 'A' && x->data[6] == '[') { /* CDATA */ xmlparser_parsecdata(x); break; - #if 0 - } else { - /* TODO ? */ - /* markup declaration section */ - while ((c = xmlparser_getnext(x)) != EOF && c != ']'); - #endif } } } } else { /* normal tag (open, short open, close), processing instruction. */ if (isspace(c)) - while ((c = xmlparser_getnext(x)) != EOF && isspace(c)); + while ((c = xmlparser_getnext(x)) != EOF && isspace(c)) + ; if (c == EOF) return; x->tag[0] = c;