sfeed

Simple RSS and Atom feed parser
git clone https://git.sinitax.com/codemadness/sfeed
Log | Files | Refs | README | LICENSE | Upstream | sfeed.txt

commit d5ee385b4b5f19934a00408a2addc70f965ea4a9
parent 880256b8bfde746cd54993f3abcb4dc648895af7
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Tue, 29 Mar 2022 11:03:54 +0200

compatibility: reduce the assumption the builtin libc locale is ASCII-compatible

This is not clearly defined by the C99 standard.
Define ctype-like macros to force it to be ASCII / UTF-8 (not extended ASCII or
something like noticed on OpenBSD 3.8).

(In practise modern libc libraries are all ASCII and UTF-8-compatible. Otherwise
this would break many programs)

Diffstat:
Msfeed.c | 50+++++++++++++++++++++++++-------------------------
Msfeed_opml_import.c | 1-
Msfeed_web.c | 1-
Msfeed_xmlenc.c | 7+++----
Mutil.c | 9++++-----
Mutil.h | 6+++++-
Mxml.c | 20+++++++++++---------
7 files changed, 48 insertions(+), 46 deletions(-)

diff --git a/sfeed.c b/sfeed.c @@ -246,7 +246,7 @@ gettag(enum FeedType feedtype, const char *name, size_t namelen) static char * ltrim(const char *s) { - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; return (char *)s; } @@ -256,7 +256,7 @@ rtrim(const char *s) { const char *e; - for (e = s + strlen(s); e > s && isspace((unsigned char)*(e - 1)); e--) + for (e = s + strlen(s); e > s && ISSPACE((unsigned char)*(e - 1)); e--) ; return (char *)e; } @@ -341,7 +341,7 @@ printtrimmed(const char *s) p = ltrim(s); e = rtrim(p); for (; *p && p != e; p++) { - if (isspace((unsigned char)*p)) + if (ISSPACE((unsigned char)*p)) putchar(' '); /* any whitespace to space */ else if (!ISCNTRL((unsigned char)*p)) /* ignore other control chars */ @@ -514,20 +514,20 @@ gettzoffset(const char *s) long tzhour = 0, tzmin = 0; size_t i; - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; switch (*s) { case '-': /* offset */ case '+': - for (i = 0, p = s + 1; i < 2 && isdigit((unsigned char)*p); i++, p++) + for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++) tzhour = (tzhour * 10) + (*p - '0'); if (*p == ':') p++; - for (i = 0; i < 2 && isdigit((unsigned char)*p); i++, p++) + for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++) tzmin = (tzmin * 10) + (*p - '0'); return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1); default: /* timezone name */ - for (i = 0; isalpha((unsigned char)s[i]); i++) + for (i = 0; ISALPHA((unsigned char)s[i]); i++) ; if (i != 3) return 0; @@ -565,35 +565,35 @@ parsetime(const char *s, long long *tp) int va[6] = { 0 }, i, j, v, vi; size_t m; - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; - if (!isdigit((unsigned char)*s) && !isalpha((unsigned char)*s)) + if (!ISDIGIT((unsigned char)*s) && !ISALPHA((unsigned char)*s)) return -1; - if (isdigit((unsigned char)s[0]) && - isdigit((unsigned char)s[1]) && - isdigit((unsigned char)s[2]) && - isdigit((unsigned char)s[3])) { + if (ISDIGIT((unsigned char)s[0]) && + ISDIGIT((unsigned char)s[1]) && + ISDIGIT((unsigned char)s[2]) && + ISDIGIT((unsigned char)s[3])) { /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */ vi = 0; } else { /* format: "[%a, ]%d %b %Y %H:%M:%S" */ /* parse "[%a, ]%d %b %Y " part, then use time parsing as above */ - for (; isalpha((unsigned char)*s); s++) + for (; ISALPHA((unsigned char)*s); s++) ; - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; if (*s == ',') s++; - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; - for (v = 0, i = 0; i < 2 && isdigit((unsigned char)*s); s++, i++) + for (v = 0, i = 0; i < 2 && ISDIGIT((unsigned char)*s); s++, i++) v = (v * 10) + (*s - '0'); va[2] = v; /* day */ - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; /* end of word month */ - for (j = 0; isalpha((unsigned char)s[j]); j++) + for (j = 0; ISALPHA((unsigned char)s[j]); j++) ; /* check month name */ if (j < 3 || j > 9) @@ -609,15 +609,15 @@ parsetime(const char *s, long long *tp) } if (m >= 12) return -1; /* no month found */ - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; - for (v = 0, i = 0; i < 4 && isdigit((unsigned char)*s); s++, i++) + for (v = 0, i = 0; i < 4 && ISDIGIT((unsigned char)*s); s++, i++) v = (v * 10) + (*s - '0'); /* obsolete short year: RFC2822 4.3 */ if (i <= 3) v += (v >= 0 && v <= 49) ? 2000 : 1900; va[0] = v; /* year */ - for (; isspace((unsigned char)*s); s++) + for (; ISSPACE((unsigned char)*s); s++) ; /* parse only regular time part, see below */ vi = 3; @@ -626,20 +626,20 @@ parsetime(const char *s, long long *tp) /* parse time parts (and possibly remaining date parts) */ for (; *s && vi < 6; vi++) { for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) && - isdigit((unsigned char)*s); s++, i++) { + ISDIGIT((unsigned char)*s); s++, i++) { v = (v * 10) + (*s - '0'); } va[vi] = v; if ((vi < 2 && *s == '-') || - (vi == 2 && (*s == 'T' || isspace((unsigned char)*s))) || + (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) || (vi > 2 && *s == ':')) s++; } /* skip milliseconds in for example: "%Y-%m-%dT%H:%M:%S.000Z" */ if (*s == '.') { - for (s++; isdigit((unsigned char)*s); s++) + for (s++; ISDIGIT((unsigned char)*s); s++) ; } diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c @@ -1,4 +1,3 @@ -#include <ctype.h> #include <stdio.h> #include <strings.h> diff --git a/sfeed_web.c b/sfeed_web.c @@ -1,4 +1,3 @@ -#include <ctype.h> #include <stdio.h> #include <strings.h> diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c @@ -1,4 +1,3 @@ -#include <ctype.h> #include <stdio.h> #include <stdlib.h> #include <strings.h> @@ -26,10 +25,10 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl, return; for (; *v; v++) { - if (isalpha((unsigned char)*v) || - isdigit((unsigned char)*v) || + if (ISALPHA((unsigned char)*v) || + ISDIGIT((unsigned char)*v) || *v == '.' || *v == ':' || *v == '-' || *v == '_') - putchar(tolower((unsigned char)*v)); + putchar(TOLOWER((unsigned char)*v)); } } diff --git a/util.c b/util.c @@ -1,4 +1,3 @@ -#include <ctype.h> #include <errno.h> #include <stdarg.h> #include <stdio.h> @@ -66,8 +65,8 @@ strcasestr(const char *h, const char *n) return (char *)h; for (; *h; ++h) { - for (i = 0; n[i] && tolower((unsigned char)n[i]) == - tolower((unsigned char)h[i]); ++i) + for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) == + TOLOWER((unsigned char)h[i]); ++i) ; if (n[i] == '\0') return (char *)h; @@ -82,7 +81,7 @@ uri_hasscheme(const char *s) { const char *p = s; - for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || + for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) || *p == '+' || *p == '-' || *p == '.'; p++) ; /* scheme, except if empty and starts with ":" then it is a path */ @@ -109,7 +108,7 @@ uri_parse(const char *s, struct uri *u) } /* scheme / protocol part */ - for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || + for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) || *p == '+' || *p == '-' || *p == '.'; p++) ; /* scheme, except if empty and starts with ":" then it is a path */ diff --git a/util.h b/util.h @@ -8,8 +8,12 @@ #define unveil(p1,p2) 0 #endif -/* control-character in the ASCII range 0-127: compatible with UTF-8 */ +/* ctype-like macros, but always compatible with ASCII / UTF-8 */ +#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26) #define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f) +#define ISDIGIT(c) (((unsigned)c) - '0' < 10) +#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5)) +#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c)) #undef strcasestr char *strcasestr(const char *, const char *); diff --git a/xml.c b/xml.c @@ -1,4 +1,3 @@ -#include <ctype.h> #include <errno.h> #include <stdio.h> #include <stdlib.h> @@ -6,6 +5,9 @@ #include "xml.h" +#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26) +#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5)) + static void xml_parseattrs(XMLParser *x) { @@ -13,7 +15,7 @@ xml_parseattrs(XMLParser *x) int c, endsep, endname = 0, valuestart = 0; while ((c = GETNEXT()) != EOF) { - if (isspace(c)) { + if (ISSPACE(c)) { if (namelen) endname = 1; continue; @@ -23,7 +25,7 @@ xml_parseattrs(XMLParser *x) x->name[namelen] = '\0'; valuestart = 1; endname = 1; - } else if (namelen && ((endname && !valuestart && isalpha(c)) || (c == '>' || c == '/'))) { + } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) { /* attribute without value */ x->name[namelen] = '\0'; if (x->xmlattrstart) @@ -44,7 +46,7 @@ xml_parseattrs(XMLParser *x) if (c == '\'' || c == '"') { endsep = c; } else { - endsep = ' '; /* isspace() */ + endsep = ' '; /* ISSPACE() */ goto startvalue; } @@ -58,7 +60,7 @@ startvalue: x->data[0] = c; valuelen = 1; while ((c = GETNEXT()) != EOF) { - if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c)))) + if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) break; if (valuelen < sizeof(x->data) - 1) x->data[valuelen++] = c; @@ -79,7 +81,7 @@ startvalue: break; } } - } else if (c != endsep && !(endsep == ' ' && (c == '>' || isspace(c)))) { + } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) { if (valuelen < sizeof(x->data) - 1) { x->data[valuelen++] = c; } else { @@ -90,7 +92,7 @@ startvalue: valuelen = 1; } } - if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c)))) { + if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) { x->data[valuelen] = '\0'; if (x->xmlattr) x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen); @@ -328,7 +330,7 @@ xml_parse(XMLParser *x) while ((c = GETNEXT()) != EOF) { if (c == '/') x->isshorttag = 1; /* short tag */ - else if (c == '>' || isspace(c)) { + else if (c == '>' || ISSPACE(c)) { x->tag[x->taglen] = '\0'; if (isend) { /* end tag, starts with </ */ if (x->xmltagend) @@ -339,7 +341,7 @@ xml_parse(XMLParser *x) /* start tag */ if (x->xmltagstart) x->xmltagstart(x, x->tag, x->taglen); - if (isspace(c)) + if (ISSPACE(c)) xml_parseattrs(x); if (x->xmltagstartparsed) x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);