sfeed

Simple RSS and Atom feed parser
git clone https://git.sinitax.com/codemadness/sfeed
Log | Files | Refs | README | LICENSE | Upstream | sfeed.txt

commit 5c5a526d0abe22cb7fc30707bd14f09d8b5d1f9c
parent 04a72b9634ada77f59f409989d36cb2063d93cb4
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Fri,  7 Sep 2018 19:00:57 +0200

fix many undefined behaviour in usage of ctype functions

- cast all ctype(3) function argument to (unsigned char) to avoid UB

POSIX says:
"The c argument is an int, the value of which the application shall ensure is a
character representable as an unsigned char or equal to the value of the macro
EOF. If the argument has any other value, the behavior is undefined."

Many libc cast implicitly the value, but NetBSD does not, which is probably the
correct thing to interpret it.

- no need to cast for putchar + rename some fputc(..., stdout) to putchar

POSIX says:
"The fputc() function shall write the byte specified by c (converted to an
unsigned char) to the output stream pointed to by stream [...]"

Major thanks to Leonardo Taccari <iamleot@gmail.com> for reporting and testing
it on NetBSD!

Diffstat:
Msfeed.c | 58+++++++++++++++++++++++++++++-----------------------------
Msfeed_opml_import.c | 4++--
Msfeed_web.c | 4++--
Msfeed_xmlenc.c | 2+-
Mutil.c | 4++--
5 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/sfeed.c b/sfeed.c @@ -247,10 +247,10 @@ string_print_encoded(String *s) return; /* skip leading whitespace */ - for (p = s->data; *p && isspace((int)*p); p++) + for (p = s->data; *p && isspace((unsigned char)*p); p++) ; /* seek location of trailing whitespace */ - for (e = s->data + s->len; e > p && isspace((int)*(e - 1)); e--) + for (e = s->data + s->len; e > p && isspace((unsigned char)*(e - 1)); e--) ; for (; *p && p != e; p++) { @@ -260,7 +260,7 @@ string_print_encoded(String *s) case '\t': fputs("\\t", stdout); break; default: /* ignore control chars */ - if (!iscntrl((int)*p)) + if (!iscntrl((unsigned char)*p)) putchar(*p); break; } @@ -278,18 +278,18 @@ string_print_trimmed(String *s) return; /* skip leading whitespace */ - for (p = s->data; *p && isspace((int)*p); p++) + for (p = s->data; *p && isspace((unsigned char)*p); p++) ; /* seek location of trailing whitespace */ - for (e = s->data + s->len; e > p && isspace((int)*(e - 1)); e--) + for (e = s->data + s->len; e > p && isspace((unsigned char)*(e - 1)); e--) ; for (; *p && p != e; p++) { - if (isspace((int)*p)) + if (isspace((unsigned char)*p)) putchar(' '); /* any whitespace to space */ - else if (!iscntrl((int)*p)) + else if (!iscntrl((unsigned char)*p)) /* ignore other control chars */ - putchar((int)*p); + putchar(*p); } } @@ -376,20 +376,20 @@ gettzoffset(const char *s) int tzhour = 0, tzmin = 0; size_t i, namelen; - for (; *s && isspace((int)*s); s++) + for (; *s && isspace((unsigned char)*s); s++) ; switch (s[0]) { case '-': /* offset */ case '+': - for (i = 0, p = s + 1; i < 2 && *p && isdigit(*p); i++, p++) + for (i = 0, p = s + 1; i < 2 && *p && isdigit((unsigned char)*p); i++, p++) tzhour = (tzhour * 10) + (*p - '0'); - if (*p && !isdigit(*p)) + if (*p && !isdigit((unsigned char)*p)) p++; - for (i = 0; i < 2 && *p && isdigit(*p); i++, p++) + for (i = 0; i < 2 && *p && isdigit((unsigned char)*p); i++, p++) tzmin = (tzmin * 10) + (*p - '0'); return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1); default: /* timezone name */ - for (i = 0; *s && isalpha((int)s[i]); i++) + for (i = 0; *s && isalpha((unsigned char)s[i]); i++) ; namelen = i; /* end of name */ /* optimization: these are always non-matching */ @@ -429,49 +429,49 @@ parsetime(const char *s, time_t *tp) int va[6] = { 0 }, i, j, v, vi; size_t m; - for (; *s && isspace((int)*s); s++) + for (; *s && isspace((unsigned char)*s); s++) ; - if (!isdigit((int)*s) && !isalpha((int)*s)) + if (!isdigit((unsigned char)*s) && !isalpha((unsigned char)*s)) return -1; - if (isdigit((int)*s)) { + if (isdigit((unsigned char)*s)) { /* format "%Y-%m-%d %H:%M:%S" or "%Y-%m-%dT%H:%M:%S" */ vi = 0; time: for (; *s && vi < 6; vi++) { - for (i = 0, v = 0; *s && i < 4 && isdigit((int)*s); s++, i++) + for (i = 0, v = 0; *s && i < 4 && isdigit((unsigned char)*s); s++, i++) v = (v * 10) + (*s - '0'); va[vi] = v; if ((vi < 2 && *s == '-') || - (vi == 2 && (*s == 'T' || isspace((int)*s))) || + (vi == 2 && (*s == 'T' || isspace((unsigned char)*s))) || (vi > 2 && *s == ':')) s++; } /* TODO: only if seconds are parsed (vi == 5)? */ /* skip milliseconds for: %Y-%m-%dT%H:%M:%S.000Z */ if (*s == '.') { - for (s++; *s && isdigit((int)*s); s++) + for (s++; *s && isdigit((unsigned char)*s); s++) ; } end = s; - } else if (isalpha((int)*s)) { + } else if (isalpha((unsigned char)*s)) { /* format: "%a, %d %b %Y %H:%M:%S" */ /* parse "%a, %d %b %Y " part, then use time parsing as above */ - for (; *s && isalpha((int)*s); s++) + for (; *s && isalpha((unsigned char)*s); s++) ; - for (; *s && isspace((int)*s); s++) + for (; *s && isspace((unsigned char)*s); s++) ; if (*s != ',') return -1; - for (s++; *s && isspace((int)*s); s++) + for (s++; *s && isspace((unsigned char)*s); s++) ; - for (v = 0, i = 0; *s && i < 4 && isdigit((int)*s); s++, i++) + for (v = 0, i = 0; *s && i < 4 && isdigit((unsigned char)*s); s++, i++) v = (v * 10) + (*s - '0'); va[2] = v; /* day */ - for (; *s && isspace((int)*s); s++) + for (; *s && isspace((unsigned char)*s); s++) ; /* end of word month */ - for (j = 0; *s && isalpha((int)s[j]); j++) + for (j = 0; *s && isalpha((unsigned char)s[j]); j++) ; /* check month name */ if (j < 3 || j > 9) @@ -487,12 +487,12 @@ time: } if (m >= 12) return -1; /* no month found */ - for (; *s && isspace((int)*s); s++) + for (; *s && isspace((unsigned char)*s); s++) ; - for (v = 0, i = 0; *s && i < 4 && isdigit((int)*s); s++, i++) + for (v = 0, i = 0; *s && i < 4 && isdigit((unsigned char)*s); s++, i++) v = (v * 10) + (*s - '0'); va[0] = v; /* year */ - for (; *s && isspace((int)*s); s++) + for (; *s && isspace((unsigned char)*s); s++) ; /* parse regular time, see above */ vi = 3; diff --git a/sfeed_opml_import.c b/sfeed_opml_import.c @@ -18,14 +18,14 @@ static void printsafe(const char *s) { for (; *s; s++) { - if (iscntrl((int)*s)) + if (iscntrl((unsigned char)*s)) continue; else if (*s == '\\') fputs("\\\\", stdout); else if (*s == '\'') fputs("'\\''", stdout); else - putchar((int)*s); + putchar(*s); } } diff --git a/sfeed_web.c b/sfeed_web.c @@ -22,7 +22,7 @@ static void printfeedtype(const char *s, FILE *fp) { for (; *s; s++) - if (!isspace((int)*s)) + if (!isspace((unsigned char)*s)) fputc(*s, fp); } @@ -49,7 +49,7 @@ xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort) fputs(abslink, stdout); else fputs(feedlink, stdout); - fputc('\t', stdout); + putchar('\t'); printfeedtype(feedtype, stdout); putchar('\n'); found++; diff --git a/sfeed_xmlenc.c b/sfeed_xmlenc.c @@ -37,7 +37,7 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name, if (*value) { /* output lowercase */ for (; *value; value++) - putc(tolower((int)*value), stdout); + putchar(tolower((unsigned char)*value)); putchar('\n'); } exit(0); diff --git a/util.c b/util.c @@ -31,7 +31,7 @@ parseuri(const char *s, struct uri *u, int rel) p += 2; /* skip "//" */ } else { /* protocol part */ - for (p = s; *p && (isalpha((int)*p) || isdigit((int)*p) || + for (p = s; *p && (isalpha((unsigned char)*p) || isdigit((unsigned char)*p) || *p == '+' || *p == '-' || *p == '.'); p++) ; if (!strncmp(p, "://", 3)) { @@ -101,7 +101,7 @@ encodeuri(char *buf, size_t bufsiz, const char *s) for (i = 0, b = 0; s[i]; i++) { if ((int)s[i] == ' ' || (unsigned char)s[i] > 127 || - iscntrl((int)s[i])) { + iscntrl((unsigned char)s[i])) { if (b + 3 >= bufsiz) return -1; buf[b++] = '%';