sfeed: changes, needs some testing - sfeed - Simple RSS and Atom feed parser

	sfeed Simple RSS and Atom feed parser
	git clone https://git.sinitax.com/codemadness/sfeed
	Log \| Files \| Refs \| README \| LICENSE \| Upstream \| sfeed.txt

commit 5dde51b9f898de419f90217c3b6e2759731f861b
parent b0a26a5d5f2796b3387ec4c28f05f4201d41a575
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Fri,  2 Jan 2015 19:44:24 +0100

sfeed: changes, needs some testing

Diffstat:
M sfeed.c  | 122 ++++++++++++++++++++++++++++++++++++++++++++-----------------------------------

1 file changed, 68 insertions(+), 54 deletions(-)
diff --git a/sfeed.c b/sfeed.c
@@ -11,7 +11,14 @@
 #include "util.h"
 #include "xml.h"
 
-#define ISWSNOSPACE(c) (((unsigned)c - '\t') < 5) /* isspace(c) && c != ' ' */
+/* fast isspace(c) && c != ' ' check. */
+#define ISWSNOSPACE(c)    (((unsigned)c - '\t') < 5)
+#define ISINCONTENT(ctx)  ((ctx).iscontent && !((ctx).iscontenttag))
+#define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag)
+/* string and size */
+#define STRP(s)           s,sizeof(s)-1
+/* length of string */
+#define STRSIZ(s)         (sizeof(s)-1)
 
 enum { FeedTypeNone = 0, FeedTypeRSS = 1, FeedTypeAtom = 2 };
 static const char *feedtypes[] = { "", "rss", "atom" };
@@ -106,28 +113,28 @@ gettag(int feedtype, const char *name, size_t namelen)
 {
 	/* RSS, alphabetical order */
 	static FeedTag rsstag[] = {
-		{ "author",           6, RSSTagAuthor },
-		{ "content:encoded", 15, RSSTagContentencoded },
-		{ "dc:creator",      10, RSSTagDccreator },
-		{ "dc:date",          7, RSSTagDcdate },
-		{ "description",     11, RSSTagDescription },
-		{ "guid",             4, RSSTagGuid },
-		{ "link",             4, RSSTagLink },
-		{ "pubdate",          7, RSSTagPubdate },
-		{ "title",            5, RSSTagTitle },
-		{ NULL,               0, -1 }
+		{ STRP("author"),          RSSTagAuthor },
+		{ STRP("content:encoded"), RSSTagContentencoded },
+		{ STRP("dc:creator"),      RSSTagDccreator },
+		{ STRP("dc:date"),         RSSTagDcdate },
+		{ STRP("description"),     RSSTagDescription },
+		{ STRP("guid"),            RSSTagGuid },
+		{ STRP("link"),            RSSTagLink },
+		{ STRP("pubdate"),         RSSTagPubdate },
+		{ STRP("title"),           RSSTagTitle },
+		{ NULL, 0, -1 }
 	};
 	/* Atom, alphabetical order */
 	static FeedTag atomtag[] = {
-		{ "author",    6, AtomTagAuthor },
-		{ "content",   7, AtomTagContent },
-		{ "id",        2, AtomTagId },
-		{ "link",      4, AtomTagLink },
-		{ "published", 9, AtomTagPublished },
-		{ "summary",   7, AtomTagSummary },
-		{ "title",     5, AtomTagTitle },
-		{ "updated",   7, AtomTagUpdated },
-		{ NULL,        0, -1 }
+		{ STRP("author"),    AtomTagAuthor },
+		{ STRP("content"),   AtomTagContent },
+		{ STRP("id"),        AtomTagId },
+		{ STRP("link"),      AtomTagLink },
+		{ STRP("published"), AtomTagPublished },
+		{ STRP("summary"),   AtomTagSummary },
+		{ STRP("title"),     AtomTagTitle },
+		{ STRP("updated"),   AtomTagUpdated },
+		{ NULL, 0, -1 }
 	};
 	int i, n;
 
@@ -303,7 +310,7 @@ gettimetz(const char *s, char *buf, size_t bufsiz)
 	char c;
 
 	buf[0] = '\0';
-	if(bufsiz < sizeof(tzname) + strlen(" -00:00"))
+	if(bufsiz < sizeof(tzname) + STRSIZ(" -00:00"))
 		return 0;
 	for(; *p && isspace((int)*p); p++); /* skip whitespace */
 	/* loop until some common timezone delimiters are found */
@@ -430,14 +437,15 @@ xml_handler_attr_start(XMLParser *p, const char *tag, size_t taglen,
 	(void)tag;
 	(void)taglen;
 
-	if(ctx.iscontent && !ctx.iscontenttag) {
-		if(!ctx.attrcount)
-			xml_handler_data(p, " ", 1);
-		ctx.attrcount++;
-		xml_handler_data(p, name, namelen);
-		xml_handler_data(p, "=\"", 2);
+	if(!ISINCONTENT(ctx))
 		return;
-	}
+
+	/* handles transforming inline XML to data */
+	if(!ctx.attrcount)
+		xml_handler_data(p, " ", 1);
+	ctx.attrcount++;
+	xml_handler_data(p, name, namelen);
+	xml_handler_data(p, "=\"", 2);
 }
 
 static void
@@ -449,10 +457,12 @@ xml_handler_attr_end(struct xmlparser *p, const char *tag, size_t taglen,
 	(void)name;
 	(void)namelen;
 
-	if(ctx.iscontent && !ctx.iscontenttag) {
-		xml_handler_data(p, "\"", 1);
-		ctx.attrcount = 0;
-	}
+	if(!ISINCONTENT(ctx))
+		return;
+
+	/* handles transforming inline XML to data */
+	xml_handler_data(p, "\"", 1);
+	ctx.attrcount = 0;
 }
 
 static void
@@ -462,12 +472,13 @@ xml_handler_start_element_parsed(XMLParser *p, const char *tag, size_t taglen,
 	(void)tag;
 	(void)taglen;
 
-	if(ctx.iscontent && !ctx.iscontenttag) {
-		if(isshort)
-			xml_handler_data(p, "/>", 2);
-		else
-			xml_handler_data(p, ">", 1);
-	}
+	if(!ISINCONTENT(ctx))
+		return;
+
+	if(isshort)
+		xml_handler_data(p, "/>", 2);
+	else
+		xml_handler_data(p, ">", 1);
 }
 
 static void
@@ -478,18 +489,20 @@ xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
 	(void)tag;
 	(void)taglen;
 
-	if(ctx.iscontent && !ctx.iscontenttag) {
+	/* handles transforming inline XML to data */
+	if(ISINCONTENT(ctx)) {
 		xml_handler_data(p, value, valuelen);
 		return;
 	}
+
 	if(ctx.item.feedtype == FeedTypeAtom) {
 		/*if(ctx.tagid == AtomTagContent || ctx.tagid == AtomTagSummary) {*/
-		if(ctx.iscontenttag) {
-			if(isattr(name, namelen, "type", strlen("type")) &&
-			   (isattr(value, valuelen, "xhtml", strlen("xhtml")) ||
-			   isattr(value, valuelen, "text/xhtml", strlen("text/xhtml")) ||
-			   isattr(value, valuelen, "html", strlen("html")) ||
-			   isattr(value, valuelen, "text/html", strlen("text/html"))))
+		if(ISCONTENTTAG(ctx)) {
+			if(isattr(name, namelen, STRP("type")) &&
+			   (isattr(value, valuelen, STRP("xhtml")) ||
+			   isattr(value, valuelen, STRP("text/xhtml")) ||
+			   isattr(value, valuelen, STRP("html")) ||
+			   isattr(value, valuelen, STRP("text/html"))))
 			{
 				ctx.item.contenttype = ContentTypeHTML;
 				ctx.iscontent = 1;
@@ -499,7 +512,7 @@ xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
 				p->xmltagstartparsed = xml_handler_start_element_parsed;
 			}
 		} else if(ctx.tagid == AtomTagLink &&
-		          isattr(name, namelen, "href", strlen("href")))
+		          isattr(name, namelen, STRP("href")))
 		{
 			/* link href attribute */
 			string_append(&ctx.item.link, value, valuelen);
@@ -510,15 +523,16 @@ xml_handler_attr(XMLParser *p, const char *tag, size_t taglen,
 static void
 xml_handler_start_element(XMLParser *p, const char *name, size_t namelen)
 {
-	if(ctx.iscontenttag) {
+	if(ISCONTENTTAG(ctx)) {
 		/* starts with div, handle as XML, don't convert entities (set handle to NULL) */
 		if(ctx.item.feedtype == FeedTypeAtom &&
-		   namelen == strlen("div") &&
-		   !strncmp(name, "div", strlen("div"))) {
+		   namelen == STRSIZ("div") &&
+		   !strncmp(name, STRP("div"))) {
 			p->xmldataentity = NULL;
 		}
 	}
-	if(ctx.iscontent) {
+	/* TODO: changed, iscontenttag can be 0 or 1 ? */
+	if(ISINCONTENT(ctx)) {
 		ctx.attrcount = 0;
 		ctx.iscontenttag = 0;
 		xml_handler_data(p, "<", 1);
@@ -531,11 +545,11 @@ xml_handler_start_element(XMLParser *p, const char *name, size_t namelen)
 
 	/* start of RSS or Atom item / entry */
 	if(ctx.item.feedtype == FeedTypeNone) {
-		if(istag(name, namelen, "entry", strlen("entry"))) { /* Atom */
+		if(istag(name, namelen, STRP("entry"))) { /* Atom */
 			ctx.item.feedtype = FeedTypeAtom;
 			ctx.item.contenttype = ContentTypePlain; /* Default content type */
 			ctx.field = NULL; /* XXX: optimization */
-		} else if(istag(name, namelen, "item", strlen("item"))) { /* RSS */
+		} else if(istag(name, namelen, STRP("item"))) { /* RSS */
 			ctx.item.feedtype = FeedTypeRSS;
 			ctx.item.contenttype = ContentTypeHTML; /* Default content type */
 			ctx.field = NULL; /* XXX: optimization */
@@ -646,9 +660,9 @@ xml_handler_end_element(XMLParser *p, const char *name, size_t namelen, int issh
 	/* end of RSS or Atom entry / item */
 	/* TODO: optimize, use gettag() ? to tagid? */
 	if((ctx.item.feedtype == FeedTypeAtom &&
-	   istag(name, namelen, "entry", strlen("entry"))) || /* Atom */
+	   istag(name, namelen, STRP("entry"))) || /* Atom */
 	   (ctx.item.feedtype == FeedTypeRSS &&
-	   istag(name, namelen, "item", strlen("item")))) /* RSS */
+	   istag(name, namelen, STRP("item")))) /* RSS */
 	{
 		printf("%ld", (long)parsetime((&ctx.item.timestamp)->data,
 		       timebuf, sizeof(timebuf)));