sfeed

Simple RSS and Atom feed parser
git clone https://git.sinitax.com/codemadness/sfeed
Log | Files | Refs | README | LICENSE | Upstream | sfeed.txt

commit b4a6220906d4ceb047e73e35554b5ab6898a8db3
parent af021fb1161c2b0f669991bf64a7cbb696830156
Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date:   Sun,  2 Dec 2018 12:33:20 +0100

XML tag parse improvements for PI and end tags

- Stricter parsing of tags, no whitespace stripping after <.
- For end tags the "internal" context x->tag would be "/sometag". Make sure
  this matches exactly with the parameter tag.
- Reset tagname after parsing an end tag.
- Make end tag handling more consistent.
- Remove temporary variable taglen.

Diffstat:
Mxml.c | 52+++++++++++++++++++++++++++++-----------------------
1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/xml.c b/xml.c @@ -334,8 +334,8 @@ xml_entitytostr(const char *e, char *buf, size_t bufsiz) void xml_parse(XMLParser *x) { - int c, ispi; - size_t datalen, tagdatalen, taglen; + size_t datalen, tagdatalen; + int c, isend; if (!x->getnext) return; @@ -367,30 +367,32 @@ xml_parse(XMLParser *x) } } } else { - x->tag[0] = '\0'; - x->taglen = 0; - /* normal tag (open, short open, close), processing instruction. */ - if (isspace(c)) - while ((c = x->getnext()) != EOF && isspace(c)) - ; - if (c == EOF) - return; x->tag[0] = c; - ispi = (c == '?') ? 1 : 0; - x->isshorttag = ispi; - taglen = 1; + x->taglen = 1; + x->isshorttag = isend = 0; + + /* treat processing instruction as shorttag, don't strip "?" prefix. */ + if (c == '?') { + x->isshorttag = 1; + } else if (c == '/') { + if ((c = x->getnext()) == EOF) + return; + x->tag[0] = c; + isend = 1; + } + while ((c = x->getnext()) != EOF) { if (c == '/') x->isshorttag = 1; /* short tag */ else if (c == '>' || isspace(c)) { - x->tag[taglen] = '\0'; - if (x->tag[0] == '/') { /* end tag, starts with </ */ - x->taglen = --taglen; /* len -1 because of / */ - if (taglen && x->xmltagend) - x->xmltagend(x, &(x->tag)[1], x->taglen, 0); + x->tag[x->taglen] = '\0'; + if (isend) { /* end tag, starts with </ */ + if (x->xmltagend) + x->xmltagend(x, x->tag, x->taglen, x->isshorttag); + x->tag[0] = '\0'; + x->taglen = 0; } else { - x->taglen = taglen; /* start tag */ if (x->xmltagstart) x->xmltagstart(x, x->tag, x->taglen); @@ -400,11 +402,15 @@ xml_parse(XMLParser *x) x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag); } /* call tagend for shortform or processing instruction */ - if ((x->isshorttag || ispi) && x->xmltagend) - x->xmltagend(x, x->tag, x->taglen, 1); + if (x->isshorttag) { + if (x->xmltagend) + x->xmltagend(x, x->tag, x->taglen, x->isshorttag); + x->tag[0] = '\0'; + x->taglen = 0; + } break; - } else if (taglen < sizeof(x->tag) - 1) - x->tag[taglen++] = c; /* NOTE: tag name truncation */ + } else if (x->taglen < sizeof(x->tag) - 1) + x->tag[x->taglen++] = c; /* NOTE: tag name truncation */ } } } else {