utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

commit c6858e955c6a6f0c991d48cc8818062863bceb13
parent 243875b456f23717dfc4b0de4d275b23145c91d1
Author: Steven G. Johnson <stevenj@mit.edu>
Date:   Sun, 29 Mar 2020 10:44:42 -0400

use unsigned char more consistently, silence -Wextra compiler warnings (#188)


Diffstat:
MMakefile | 2+-
Mtest/graphemetest.c | 8++++----
Mtest/iterate.c | 4+++-
Mtest/normtest.c | 8++++----
Mtest/tests.c | 10+++++-----
Mtest/tests.h | 6+++---
6 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/Makefile b/Makefile @@ -11,7 +11,7 @@ PERL=perl CFLAGS ?= -O2 PICFLAG = -fPIC C99FLAG = -std=c99 -WCFLAGS = -Wall -pedantic +WCFLAGS = -Wall -Wextra -pedantic UCFLAGS = $(CPPFLAGS) $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS $(UTF8PROC_DEFINES) LDFLAG_SHARED = -shared SOFLAG = -Wl,-soname diff --git a/test/graphemetest.c b/test/graphemetest.c @@ -2,7 +2,7 @@ int main(int argc, char **argv) { - char buf[8192]; + unsigned char buf[8192]; FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL; utf8proc_uint8_t src[1024]; @@ -18,18 +18,18 @@ int main(int argc, char **argv) while (buf[bi]) { bi = skipspaces(buf, bi); - if ((uint8_t)buf[bi] == 0xc3 && (uint8_t)buf[bi+1] == 0xb7) { /* U+00f7 = grapheme break */ + if (buf[bi] == 0xc3 && buf[bi+1] == 0xb7) { /* U+00f7 = grapheme break */ src[si++] = '/'; bi += 2; } - else if ((uint8_t)buf[bi] == 0xc3 && (uint8_t)buf[bi+1] == 0x97) { /* U+00d7 = no break */ + else if (buf[bi] == 0xc3 && buf[bi+1] == 0x97) { /* U+00d7 = no break */ bi += 2; } else if (buf[bi] == '#') { /* start of comments */ break; } else { /* hex-encoded codepoint */ - size_t len = encode((char*) (src + si), buf + bi) - 1; + size_t len = encode((unsigned char*) (src + si), buf + bi) - 1; while (src[si]) ++si; /* advance to NUL termination */ bi += len; } diff --git a/test/iterate.c b/test/iterate.c @@ -35,6 +35,8 @@ int main(int argc, char **argv) uint32_t byt; unsigned char buf[16]; + (void) argc; (void) argv; /* unused */ + tests = error = 0; // Check valid sequences that were considered valid erroneously before @@ -54,7 +56,7 @@ int main(int argc, char **argv) CHECKVALID(3, 0xbe, 4); CHECKVALID(3, 0xbf, 4); } - + // Continuation byte not after lead for (byt = 0x80; byt < 0xc0; byt++) { CHECKINVALID(0, byt, 1); diff --git a/test/normtest.c b/test/normtest.c @@ -1,17 +1,17 @@ #include "tests.h" #define CHECK_NORM(NRM, norm, src) { \ - char *src_norm = (char*) utf8proc_ ## NRM((utf8proc_uint8_t*) src); \ - check(!strcmp(norm, src_norm), \ + unsigned char *src_norm = (unsigned char*) utf8proc_ ## NRM((utf8proc_uint8_t*) src); \ + check(!strcmp((char *) norm, (char *) src_norm), \ "normalization failed for %s -> %s", src, norm); \ free(src_norm); \ } int main(int argc, char **argv) { - char buf[8192]; + unsigned char buf[8192]; FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL; - char source[1024], NFC[1024], NFD[1024], NFKC[1024], NFKD[1024]; + unsigned char source[1024], NFC[1024], NFD[1024], NFKC[1024], NFKD[1024]; check(f != NULL, "error opening NormalizationTest.txt"); while (simple_getline(buf, f) > 0) { diff --git a/test/tests.c b/test/tests.c @@ -17,7 +17,7 @@ void check(int cond, const char *format, ...) } } -size_t skipspaces(const char *buf, size_t i) +size_t skipspaces(const unsigned char *buf, size_t i) { while (isspace(buf[i])) ++i; return i; @@ -27,7 +27,7 @@ size_t skipspaces(const char *buf, size_t i) separated by whitespace, and terminated by any character not in [0-9a-fA-F] or whitespace, then stores the corresponding utf8 string in dest, returning the number of bytes read from buf */ -size_t encode(char *dest, const char *buf) +size_t encode(unsigned char *dest, const unsigned char *buf) { size_t i = 0, j, d = 0; for (;;) { @@ -39,19 +39,19 @@ size_t encode(char *dest, const char *buf) dest[d] = 0; /* NUL-terminate destination string */ return i + 1; } - check(sscanf(buf + i, "%x", (unsigned int *)&c) == 1, "invalid hex input %s", buf+i); + check(sscanf((char *) (buf + i), "%x", (unsigned int *)&c) == 1, "invalid hex input %s", buf+i); i = j; /* skip to char after hex input */ d += utf8proc_encode_char(c, (utf8proc_uint8_t *) (dest + d)); } } /* simplistic, portable replacement for getline, sufficient for our tests */ -size_t simple_getline(char buf[8192], FILE *f) { +size_t simple_getline(unsigned char buf[8192], FILE *f) { size_t i = 0; while (i < 8191) { int c = getc(f); if (c == EOF || c == '\n') break; - buf[i++] = (char) ((uint8_t) c); + buf[i++] = (unsigned char) c; } buf[i] = 0; return i; diff --git a/test/tests.h b/test/tests.h @@ -22,6 +22,6 @@ extern size_t lineno; void check(int cond, const char *format, ...); -size_t skipspaces(const char *buf, size_t i); -size_t encode(char *dest, const char *buf); -size_t simple_getline(char buf[8192], FILE *f); +size_t skipspaces(const unsigned char *buf, size_t i); +size_t encode(unsigned char *dest, const unsigned char *buf); +size_t simple_getline(unsigned char buf[8192], FILE *f);