libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

commit c0e14c9b89c1ac78b72b7d8840261fbb7285d07a
parent 7981a5db713073992d00ee2231b88558977671aa
Author: Laslo Hunhold <dev@frign.de>
Date:   Sat, 11 Dec 2021 14:17:39 +0100

Refine types (uint8_t -> char, uint32_t -> uint_least32_t)

The type uint32_t is not guaranteed by the standard to be present,
but it guarantees uint_least32_t. If a libgrapheme-user passes a pointer
to an uint32_t (instead of uint_least32_t) there will be no problem,
as the presence of uint32_t immediately implies uint32_t ==
uint_least32_t. However, we won't depend on it internally and are
strict with using uint_least32_t. The type name is a mouthful, but still
clearer and not much longer than "long int" (which is guaranteed to be
at least 32 bits).

Regarding uint8_t, it was a bit clumsy to require it in the API. C does
not guarantee that a byte is actually an octet (i.e. char can have
more than 8 bits), and even though the relevance of non-8-bit-char
seems to be waning, I don't want to rely on that. But more importantly,
accepting "char *" saves some casts on the user-side.
Adapting the lg_utf8_* functions is trivial, as it requires just
being careful with casts. The cast "signed char" <-> "unsigned char"
is unproblematic, so every time we need the bit representation, we
explicitly cast to unsigned char and are done with it. Likewise, every
time we write to a char, we make sure that what we pass is explicitly
an unsigned char.
This became a bit awkward in the test cases where we have char-arrays
with hex literals. As C does not really have a concept of a sub-int
literal, all hexadecimal literals had to first be explicitly cast to
unsigned char, but that's it.

One more aspect where we've become more portable. :)

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
Mgen/util.c | 6+++---
Mgen/util.h | 6+++---
Mgrapheme.h | 6+++---
Msrc/grapheme.c | 8++++----
Msrc/utf8.c | 49+++++++++++++++++++++++++++++--------------------
Msrc/util.c | 6+++---
Msrc/util.h | 6+++---
Mtest/utf8-decode.c | 127++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mtest/utf8-encode.c | 42++++++++++++++++++++++++++++++++----------
9 files changed, 183 insertions(+), 73 deletions(-)

diff --git a/gen/util.c b/gen/util.c @@ -39,7 +39,7 @@ valid_hexstring(const char *str) } static int -cp_parse(const char *str, uint32_t *cp) +cp_parse(const char *str, uint_least32_t *cp) { if (!valid_hexstring(str)) { return 1; @@ -348,13 +348,13 @@ segment_test_list_print(struct segment_test *st, size_t numsegtests, printf("/* Automatically generated by %s */\n" "#include <stdint.h>\n#include <stddef.h>\n\n", progname); - printf("static const struct {\n\tuint32_t *cp;\n" + printf("static const struct {\n\tuint_least32_t *cp;\n" "\tsize_t cplen;\n\tsize_t *len;\n\tsize_t lenlen;\n" "\tchar *descr;\n} %s[] = {\n", identifier); for (i = 0; i < numsegtests; i++) { printf("\t{\n"); - printf("\t\t.cp = (uint32_t[]){"); + printf("\t\t.cp = (uint_least32_t[]){"); for (j = 0; j < st[i].cplen; j++) { printf(" UINT32_C(0x%06X)", st[i].cp[j]); if (j + 1 < st[i].cplen) { diff --git a/gen/util.h b/gen/util.h @@ -8,8 +8,8 @@ #define LEN(x) (sizeof (x) / sizeof *(x)) struct range { - uint32_t lower; - uint32_t upper; + uint_least32_t lower; + uint_least32_t upper; }; struct property { @@ -21,7 +21,7 @@ struct property { }; struct segment_test { - uint32_t *cp; + uint_least32_t *cp; size_t cplen; size_t *len; size_t lenlen; diff --git a/grapheme.h b/grapheme.h @@ -20,9 +20,9 @@ typedef struct lg_internal_segmentation_state { size_t lg_grapheme_nextbreak(const char *); -int lg_grapheme_isbreak(uint32_t, uint32_t, LG_SEGMENTATION_STATE *); +int lg_grapheme_isbreak(uint_least32_t, uint_least32_t, LG_SEGMENTATION_STATE *); -size_t lg_utf8_decode(const uint8_t *, size_t, uint32_t *); -size_t lg_utf8_encode(uint32_t, uint8_t *, size_t); +size_t lg_utf8_decode(const char *, size_t, uint_least32_t *); +size_t lg_utf8_encode(uint_least32_t, char *, size_t); #endif /* GRAPHEME_H */ diff --git a/src/grapheme.c b/src/grapheme.c @@ -13,7 +13,7 @@ enum { }; int -lg_grapheme_isbreak(uint32_t a, uint32_t b, LG_SEGMENTATION_STATE *state) +lg_grapheme_isbreak(uint_least32_t a, uint_least32_t b, LG_SEGMENTATION_STATE *state) { struct lg_internal_heisenstate *p[2] = { 0 }; int ret = 1, flags = 0; @@ -179,7 +179,7 @@ hasbreak: size_t lg_grapheme_nextbreak(const char *str) { - uint32_t cp0, cp1; + uint_least32_t cp0, cp1; size_t ret, len = 0; LG_SEGMENTATION_STATE state = { 0 }; @@ -200,14 +200,14 @@ lg_grapheme_nextbreak(const char *str) */ /* get first code point */ - len += lg_utf8_decode((uint8_t *)str, 5, &cp0); + len += lg_utf8_decode(str, 5, &cp0); if (cp0 == LG_CODEPOINT_INVALID) { return len; } while (cp0 != 0) { /* get next code point */ - ret = lg_utf8_decode((uint8_t *)(str + len), 5, &cp1); + ret = lg_utf8_decode(str + len, 5, &cp1); if (cp1 == LG_CODEPOINT_INVALID || lg_grapheme_isbreak(cp0, cp1, &state)) { diff --git a/src/utf8.c b/src/utf8.c @@ -8,10 +8,10 @@ /* lookup-table for the types of sequence first bytes */ static const struct { - uint8_t lower; /* lower bound of sequence first byte */ - uint8_t upper; /* upper bound of sequence first byte */ - uint32_t mincp; /* smallest non-overlong encoded code point */ - uint32_t maxcp; /* largest encodable code point */ + uint8_t lower; /* lower bound of sequence first byte */ + uint8_t upper; /* upper bound of sequence first byte */ + uint_least32_t mincp; /* smallest non-overlong encoded code point */ + uint_least32_t maxcp; /* largest encodable code point */ /* * implicit: table-offset represents the number of following * bytes of the form 10xxxxxx (6 bits capacity each) @@ -21,37 +21,44 @@ static const struct { /* 0xxxxxxx */ .lower = 0x00, /* 00000000 */ .upper = 0x7F, /* 01111111 */ - .mincp = (uint32_t)0, - .maxcp = ((uint32_t)1 << 7) - 1, /* 7 bits capacity */ + .mincp = (uint_least32_t)0, + .maxcp = ((uint_least32_t)1 << 7) - 1, /* 7 bits capacity */ }, [1] = { /* 110xxxxx */ .lower = 0xC0, /* 11000000 */ .upper = 0xDF, /* 11011111 */ - .mincp = (uint32_t)1 << 7, - .maxcp = ((uint32_t)1 << 11) - 1, /* 5+6=11 bits capacity */ + .mincp = (uint_least32_t)1 << 7, + .maxcp = ((uint_least32_t)1 << 11) - 1, /* 5+6=11 bits capacity */ }, [2] = { /* 1110xxxx */ .lower = 0xE0, /* 11100000 */ .upper = 0xEF, /* 11101111 */ - .mincp = (uint32_t)1 << 11, - .maxcp = ((uint32_t)1 << 16) - 1, /* 4+6+6=16 bits capacity */ + .mincp = (uint_least32_t)1 << 11, + .maxcp = ((uint_least32_t)1 << 16) - 1, /* 4+6+6=16 bits capacity */ }, [3] = { /* 11110xxx */ .lower = 0xF0, /* 11110000 */ .upper = 0xF7, /* 11110111 */ - .mincp = (uint32_t)1 << 16, - .maxcp = ((uint32_t)1 << 21) - 1, /* 3+6+6+6=21 bits capacity */ + .mincp = (uint_least32_t)1 << 16, + .maxcp = ((uint_least32_t)1 << 21) - 1, /* 3+6+6+6=21 bits capacity */ }, }; size_t -lg_utf8_decode(const uint8_t *s, size_t n, uint32_t *cp) +lg_utf8_decode(const char *s, size_t n, uint_least32_t *cp) { size_t off, i; + /* + * char is guaranteed to be at least 8 bits, but it could + * be more. We assume that the encoding is faithful such + * that any higher bits are zero. If we encounter anything + * else, we treat it as an encoding error. + */ + if (n == 0) { /* a sequence must be at least 1 byte long */ *cp = LG_CODEPOINT_INVALID; @@ -60,13 +67,15 @@ lg_utf8_decode(const uint8_t *s, size_t n, uint32_t *cp) /* identify sequence type with the first byte */ for (off = 0; off < LEN(lut); off++) { - if (BETWEEN(s[0], lut[off].lower, lut[off].upper)) { + if (BETWEEN((unsigned char)s[0], lut[off].lower, + lut[off].upper)) { /* * first byte is within the bounds; fill * p with the the first bits contained in * the first byte (by subtracting the high bits) + * and discarding any higher bits than 8 */ - *cp = s[0] - lut[off].lower; + *cp = ((unsigned char)s[0] - lut[off].lower) & 0xff; break; } } @@ -92,7 +101,7 @@ lg_utf8_decode(const uint8_t *s, size_t n, uint32_t *cp) * (i.e. between 0x80 (10000000) and 0xBF (10111111)) */ for (i = 1; i <= off; i++) { - if(!BETWEEN(s[i], 0x80, 0xBF)) { + if(!BETWEEN((unsigned char)s[i], 0x80, 0xBF)) { /* * byte does not match format; return * number of bytes processed excluding the @@ -106,7 +115,7 @@ lg_utf8_decode(const uint8_t *s, size_t n, uint32_t *cp) * shift code point by 6 bits and add the 6 stored bits * in s[i] to it using the bitmask 0x3F (00111111) */ - *cp = (*cp << 6) | (s[i] & 0x3F); + *cp = (*cp << 6) | ((unsigned char)s[i] & 0x3F); } if (*cp < lut[off].mincp || @@ -125,7 +134,7 @@ lg_utf8_decode(const uint8_t *s, size_t n, uint32_t *cp) } size_t -lg_utf8_encode(uint32_t cp, uint8_t *s, size_t n) +lg_utf8_encode(uint_least32_t cp, char *s, size_t n) { size_t off, i; @@ -161,7 +170,7 @@ lg_utf8_encode(uint32_t cp, uint8_t *s, size_t n) * We do not overwrite the mask because we guaranteed earlier * that there are no bits higher than the mask allows. */ - s[0] = lut[off].lower | (cp >> (6 * off)); + s[0] = (unsigned char)(lut[off].lower | (cp >> (6 * off))); for (i = 1; i <= off; i++) { /* @@ -170,7 +179,7 @@ lg_utf8_encode(uint32_t cp, uint8_t *s, size_t n) * extract from the properly-shifted value using the * mask 00111111 (0x3F) */ - s[i] = 0x80 | ((cp >> (6 * (off - i))) & 0x3F); + s[i] = (unsigned char)(0x80 | ((cp >> (6 * (off - i))) & 0x3F)); } return 1 + off; diff --git a/src/util.c b/src/util.c @@ -41,14 +41,14 @@ heisenstate_set(struct lg_internal_heisenstate *h, int slot, int state) static int cp_cmp(const void *a, const void *b) { - uint32_t cp = *(uint32_t *)a; - uint32_t *range = (uint32_t *)b; + uint_least32_t cp = *(uint_least32_t *)a; + uint_least32_t *range = (uint_least32_t *)b; return (cp >= range[0] && cp <= range[1]) ? 0 : (cp - range[0]); } int -has_property(uint32_t cp, struct lg_internal_heisenstate *cpstate, +has_property(uint_least32_t cp, struct lg_internal_heisenstate *cpstate, const struct range_list *proptable, int property) { int res; diff --git a/src/util.h b/src/util.h @@ -10,8 +10,8 @@ #define LEN(x) (sizeof(x) / sizeof(*(x))) struct range { - uint32_t lower; - uint32_t upper; + uint_least32_t lower; + uint_least32_t upper; }; struct range_list { @@ -22,7 +22,7 @@ struct range_list { int heisenstate_get(struct lg_internal_heisenstate *, int); int heisenstate_set(struct lg_internal_heisenstate *, int, int); -int has_property(uint32_t, struct lg_internal_heisenstate *, +int has_property(uint_least32_t, struct lg_internal_heisenstate *, const struct range_list *, int); #endif /* UTIL_H */ diff --git a/test/utf8-decode.c b/test/utf8-decode.c @@ -9,7 +9,7 @@ #define LEN(x) (sizeof(x) / sizeof(*(x))) static const struct { - uint8_t *arr; /* UTF-8 byte sequence */ + char *arr; /* UTF-8 byte sequence */ size_t len; /* length of UTF-8 byte sequence */ size_t exp_len; /* expected length returned */ uint32_t exp_cp; /* expected code point returned */ @@ -29,7 +29,9 @@ static const struct { * [ 11111101 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xFD }, + .arr = (char[]){ + (unsigned char)0xFD, + }, .len = 1, .exp_len = 1, .exp_cp = LG_CODEPOINT_INVALID, @@ -39,7 +41,9 @@ static const struct { * [ 00000001 ] -> * 0000001 */ - .arr = (uint8_t[]){ 0x01 }, + .arr = (char[]){ + (unsigned char)0x01, + }, .len = 1, .exp_len = 1, .exp_cp = 0x1, @@ -49,7 +53,10 @@ static const struct { * [ 11000011 10111111 ] -> * 00011111111 */ - .arr = (uint8_t[]){ 0xC3, 0xBF }, + .arr = (char[]){ + (unsigned char)0xC3, + (unsigned char)0xBF, + }, .len = 2, .exp_len = 2, .exp_cp = 0xFF, @@ -59,7 +66,9 @@ static const struct { * [ 11000011 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xC3 }, + .arr = (char[]){ + (unsigned char)0xC3 + }, .len = 1, .exp_len = 2, .exp_cp = LG_CODEPOINT_INVALID, @@ -69,7 +78,10 @@ static const struct { * [ 11000011 11111111 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xC3, 0xFF }, + .arr = (char[]){ + (unsigned char)0xC3, + (unsigned char)0xFF, + }, .len = 2, .exp_len = 1, .exp_cp = LG_CODEPOINT_INVALID, @@ -79,7 +91,10 @@ static const struct { * [ 11000001 10111111 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xC1, 0xBF }, + .arr = (char[]){ + (unsigned char)0xC1, + (unsigned char)0xBF, + }, .len = 2, .exp_len = 2, .exp_cp = LG_CODEPOINT_INVALID, @@ -89,7 +104,11 @@ static const struct { * [ 11100000 10111111 10111111 ] -> * 0000111111111111 */ - .arr = (uint8_t[]){ 0xE0, 0xBF, 0xBF }, + .arr = (char[]){ + (unsigned char)0xE0, + (unsigned char)0xBF, + (unsigned char)0xBF, + }, .len = 3, .exp_len = 3, .exp_cp = 0xFFF, @@ -99,7 +118,9 @@ static const struct { * [ 11100000 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xE0 }, + .arr = (char[]){ + (unsigned char)0xE0, + }, .len = 1, .exp_len = 3, .exp_cp = LG_CODEPOINT_INVALID, @@ -109,7 +130,11 @@ static const struct { * [ 11100000 01111111 10111111 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xE0, 0x7F, 0xBF }, + .arr = (char[]){ + (unsigned char)0xE0, + (unsigned char)0x7F, + (unsigned char)0xBF, + }, .len = 3, .exp_len = 1, .exp_cp = LG_CODEPOINT_INVALID, @@ -119,7 +144,10 @@ static const struct { * [ 11100000 10111111 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xE0, 0xBF }, + .arr = (char[]){ + (unsigned char)0xE0, + (unsigned char)0xBF, + }, .len = 2, .exp_len = 3, .exp_cp = LG_CODEPOINT_INVALID, @@ -129,7 +157,11 @@ static const struct { * [ 11100000 10111111 01111111 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xE0, 0xBF, 0x7F }, + .arr = (char[]){ + (unsigned char)0xE0, + (unsigned char)0xBF, + (unsigned char)0x7F, + }, .len = 3, .exp_len = 2, .exp_cp = LG_CODEPOINT_INVALID, @@ -139,7 +171,11 @@ static const struct { * [ 11100000 10011111 10111111 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xE0, 0x9F, 0xBF }, + .arr = (char[]){ + (unsigned char)0xE0, + (unsigned char)0x9F, + (unsigned char)0xBF, + }, .len = 3, .exp_len = 3, .exp_cp = LG_CODEPOINT_INVALID, @@ -149,7 +185,11 @@ static const struct { * [ 11101101 10100000 10000000 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xED, 0xA0, 0x80 }, + .arr = (char[]){ + (unsigned char)0xED, + (unsigned char)0xA0, + (unsigned char)0x80, + }, .len = 3, .exp_len = 3, .exp_cp = LG_CODEPOINT_INVALID, @@ -159,7 +199,12 @@ static const struct { * [ 11110011 10111111 10111111 10111111 ] -> * 011111111111111111111 */ - .arr = (uint8_t[]){ 0xF3, 0xBF, 0xBF, 0xBF }, + .arr = (char[]){ + (unsigned char)0xF3, + (unsigned char)0xBF, + (unsigned char)0xBF, + (unsigned char)0xBF, + }, .len = 4, .exp_len = 4, .exp_cp = UINT32_C(0xFFFFF), @@ -169,7 +214,9 @@ static const struct { * [ 11110011 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xF3 }, + .arr = (char[]){ + (unsigned char)0xF3, + }, .len = 1, .exp_len = 4, .exp_cp = LG_CODEPOINT_INVALID, @@ -179,7 +226,12 @@ static const struct { * [ 11110011 01111111 10111111 10111111 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xF3, 0x7F, 0xBF, 0xBF }, + .arr = (char[]){ + (unsigned char)0xF3, + (unsigned char)0x7F, + (unsigned char)0xBF, + (unsigned char)0xBF, + }, .len = 4, .exp_len = 1, .exp_cp = LG_CODEPOINT_INVALID, @@ -189,7 +241,10 @@ static const struct { * [ 11110011 10111111 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xF3, 0xBF }, + .arr = (char[]){ + (unsigned char)0xF3, + (unsigned char)0xBF, + }, .len = 2, .exp_len = 4, .exp_cp = LG_CODEPOINT_INVALID, @@ -199,7 +254,12 @@ static const struct { * [ 11110011 10111111 01111111 10111111 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xF3, 0xBF, 0x7F, 0xBF }, + .arr = (char[]){ + (unsigned char)0xF3, + (unsigned char)0xBF, + (unsigned char)0x7F, + (unsigned char)0xBF, + }, .len = 4, .exp_len = 2, .exp_cp = LG_CODEPOINT_INVALID, @@ -209,7 +269,11 @@ static const struct { * [ 11110011 10111111 10111111 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xF3, 0xBF, 0xBF }, + .arr = (char[]){ + (unsigned char)0xF3, + (unsigned char)0xBF, + (unsigned char)0xBF, + }, .len = 3, .exp_len = 4, .exp_cp = LG_CODEPOINT_INVALID, @@ -219,7 +283,12 @@ static const struct { * [ 11110011 10111111 10111111 01111111 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xF3, 0xBF, 0xBF, 0x7F }, + .arr = (char[]){ + (unsigned char)0xF3, + (unsigned char)0xBF, + (unsigned char)0xBF, + (unsigned char)0x7F, + }, .len = 4, .exp_len = 3, .exp_cp = LG_CODEPOINT_INVALID, @@ -229,7 +298,12 @@ static const struct { * [ 11110000 10000000 10000001 10111111 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xF0, 0x80, 0x81, 0xBF }, + .arr = (char[]){ + (unsigned char)0xF0, + (unsigned char)0x80, + (unsigned char)0x81, + (unsigned char)0xBF, + }, .len = 4, .exp_len = 4, .exp_cp = LG_CODEPOINT_INVALID, @@ -239,7 +313,12 @@ static const struct { * [ 11110100 10010000 10000000 10000000 ] -> * INVALID */ - .arr = (uint8_t[]){ 0xF4, 0x90, 0x80, 0x80 }, + .arr = (char[]){ + (unsigned char)0xF4, + (unsigned char)0x90, + (unsigned char)0x80, + (unsigned char)0x80, + }, .len = 4, .exp_len = 4, .exp_cp = LG_CODEPOINT_INVALID, @@ -254,7 +333,7 @@ main(void) /* UTF-8 decoder test */ for (i = 0, failed = 0; i < LEN(dec_test); i++) { size_t len; - uint32_t cp; + uint_least32_t cp; len = lg_utf8_decode(dec_test[i].arr, dec_test[i].len, &cp); diff --git a/test/utf8-encode.c b/test/utf8-encode.c @@ -9,44 +9,66 @@ #define LEN(x) (sizeof(x) / sizeof(*(x))) static const struct { - uint32_t cp; /* input code point */ - uint8_t *exp_arr; /* expected UTF-8 byte sequence */ - size_t exp_len; /* expected length of UTF-8 sequence */ + uint_least32_t cp; /* input code point */ + char *exp_arr; /* expected UTF-8 byte sequence */ + size_t exp_len; /* expected length of UTF-8 sequence */ } enc_test[] = { { /* invalid code point (UTF-16 surrogate half) */ .cp = UINT32_C(0xD800), - .exp_arr = (uint8_t[]){ 0xEF, 0xBF, 0xBD }, + .exp_arr = (char[]){ + (unsigned char)0xEF, + (unsigned char)0xBF, + (unsigned char)0xBD, + }, .exp_len = 3, }, { /* invalid code point (UTF-16-unrepresentable) */ .cp = UINT32_C(0x110000), - .exp_arr = (uint8_t[]){ 0xEF, 0xBF, 0xBD }, + .exp_arr = (char[]){ + (unsigned char)0xEF, + (unsigned char)0xBF, + (unsigned char)0xBD, + }, .exp_len = 3, }, { /* code point encoded to a 1-byte sequence */ .cp = 0x01, - .exp_arr = (uint8_t[]){ 0x01 }, + .exp_arr = (char[]){ + (unsigned char)0x01 + }, .exp_len = 1, }, { /* code point encoded to a 2-byte sequence */ .cp = 0xFF, - .exp_arr = (uint8_t[]){ 0xC3, 0xBF }, + .exp_arr = (char[]){ + (unsigned char)0xC3, + (unsigned char)0xBF, + }, .exp_len = 2, }, { /* code point encoded to a 3-byte sequence */ .cp = 0xFFF, - .exp_arr = (uint8_t[]){ 0xE0, 0xBF, 0xBF }, + .exp_arr = (char[]){ + (unsigned char)0xE0, + (unsigned char)0xBF, + (unsigned char)0xBF, + }, .exp_len = 3, }, { /* code point encoded to a 4-byte sequence */ .cp = UINT32_C(0xFFFFF), - .exp_arr = (uint8_t[]){ 0xF3, 0xBF, 0xBF, 0xBF }, + .exp_arr = (char[]){ + (unsigned char)0xF3, + (unsigned char)0xBF, + (unsigned char)0xBF, + (unsigned char)0xBF, + }, .exp_len = 4, }, }; @@ -58,7 +80,7 @@ main(void) /* UTF-8 encoder test */ for (i = 0, failed = 0; i < LEN(enc_test); i++) { - uint8_t arr[4]; + char arr[4]; size_t len; len = lg_utf8_encode(enc_test[i].cp, arr, LEN(arr));