libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

commit 92a5a67ed17573d9ad7d0a85ae602cb17d154f82
parent 361126ddd8800b4db43eeb34b83dbb283c653cff
Author: Laslo Hunhold <dev@frign.de>
Date:   Sun, 12 Dec 2021 15:51:04 +0100

Improve parsing of hexadecimal strings

Trying to get strtoul()-usage right is so frustrating that it's easier
and simpler to just roll your own. While at it, modify it in such a
way that we don't modify the constant str in range_parse().

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
Mgen/util.c | 58+++++++++++++++++++++++++++++++++-------------------------
1 file changed, 33 insertions(+), 25 deletions(-)

diff --git a/gen/util.c b/gen/util.c @@ -20,31 +20,42 @@ struct segment_test_payload }; static int -valid_hexstring(const char *str) +hextocp(const char *str, size_t len, uint_least32_t *cp) { - const char *p = str; + size_t i; + int off; + char relative; - while ((*p >= '0' && *p <= '9') || - (*p >= 'a' && *p <= 'f') || - (*p >= 'A' && *p <= 'F')) { - p++; + /* the maximum valid codepoint is 0x10FFFF */ + if (len > 6) { + fprintf(stderr, "hextocp: '%.*s' is too long.\n", (int)len, str); + return 1; } - if (*p != '\0') { - fprintf(stderr, "valid_hexstring: Invalid code point range '%s'\n", str); - return 0; - } + for (i = 0, *cp = 0; i < len; i++) { + if (str[i] >= '0' && str[i] <= '9') { + relative = '0'; + off = 0; + } else if (str[i] >= 'a' && str[i] <= 'f') { + relative = 'a'; + off = 10; + } else if (str[i] >= 'A' && str[i] <= 'F') { + relative = 'A'; + off = 10; + } else { + fprintf(stderr, "hextocp: '%.*s' is not hexadecimal.\n", + (int)len, str); + return 1; + } - return 1; -} + *cp += ((uint_least32_t)1 << (4 * (len - i - 1))) * + (uint_least32_t)(str[i] - relative + off); + } -static int -cp_parse(const char *str, uint_least32_t *cp) -{ - if (!valid_hexstring(str)) { + if (*cp > 0x10ffff) { + fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len, str); return 1; } - *cp = strtol(str, NULL, 16); return 0; } @@ -56,19 +67,16 @@ range_parse(const char *str, struct range *range) if ((p = strstr(str, "..")) == NULL) { /* input has the form "XXXXXX" */ - if (!valid_hexstring(str)) { + if (hextocp(str, strlen(str), &range->lower)) { return 1; } - range->lower = range->upper = strtol(str, NULL, 16); + range->upper = range->lower; } else { /* input has the form "XXXXXX..XXXXXX" */ - *p = '\0'; - p += 2; - if (!valid_hexstring(str) || !valid_hexstring(p)) { + if (hextocp(str, (size_t)(p - str), &range->lower) || + hextocp(p + 2, strlen(p + 2), &range->upper)) { return 1; } - range->lower = strtol(str, NULL, 16); - range->upper = strtol(p, NULL, 16); } return 0; @@ -308,7 +316,7 @@ segment_test_callback(char *fname, char **field, size_t nfields, char *comment, fprintf(stderr, "realloc: %s\n", strerror(errno)); return 1; } - if (cp_parse(token, &t->cp[t->cplen - 1])) { + if (hextocp(token, strlen(token), &t->cp[t->cplen - 1])) { return 1; } if (t->lenlen > 0) {