libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

commit d515a3d96e1301b7d9ba0d38a00038894ebefcd4
parent 0b0cbfa18ba4d0cc1ef9e88d7c12d96e7a72ff9b
Author: Laslo Hunhold <dev@frign.de>
Date:   Fri, 17 Dec 2021 01:28:29 +0100

Rename LG_CODEPOINT_INVALID to LG_INVALID_CODE_POINT

It's a bad habit but the word "code point" is made up of two different
words and should not be written together. The new ordering makes
it easier to read as it has a more natural sound to it.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
Mgrapheme.h | 2+-
Msrc/grapheme.c | 4++--
Msrc/utf8.c | 12++++++------
Mtest/utf8-decode.c | 38+++++++++++++++++++-------------------
4 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/grapheme.h b/grapheme.h @@ -17,7 +17,7 @@ typedef struct lg_internal_segmentation_state { uint_least16_t flags; } LG_SEGMENTATION_STATE; -#define LG_CODEPOINT_INVALID UINT32_C(0xFFFD) +#define LG_INVALID_CODE_POINT UINT32_C(0xFFFD) size_t lg_grapheme_nextbreak(const char *); diff --git a/src/grapheme.c b/src/grapheme.c @@ -203,7 +203,7 @@ lg_grapheme_nextbreak(const char *str) /* get first code point */ len += lg_utf8_decode(str, 5, &cp0); - if (cp0 == LG_CODEPOINT_INVALID) { + if (cp0 == LG_INVALID_CODE_POINT) { return len; } @@ -211,7 +211,7 @@ lg_grapheme_nextbreak(const char *str) /* get next code point */ ret = lg_utf8_decode(str + len, 5, &cp1); - if (cp1 == LG_CODEPOINT_INVALID || + if (cp1 == LG_INVALID_CODE_POINT || lg_grapheme_isbreak(cp0, cp1, &state)) { /* we read an invalid cp or have a breakpoint */ break; diff --git a/src/utf8.c b/src/utf8.c @@ -54,7 +54,7 @@ lg_utf8_decode(const char *s, size_t n, uint_least32_t *cp) if (s == NULL || n == 0) { /* a sequence must be at least 1 byte long */ - *cp = LG_CODEPOINT_INVALID; + *cp = LG_INVALID_CODE_POINT; return 0; } @@ -79,7 +79,7 @@ lg_utf8_decode(const char *s, size_t n, uint_least32_t *cp) * this also includes the cases where bits higher than * the 8th are set on systems with CHAR_BIT > 8 */ - *cp = LG_CODEPOINT_INVALID; + *cp = LG_INVALID_CODE_POINT; return 1; } if (1 + off > n) { @@ -87,7 +87,7 @@ lg_utf8_decode(const char *s, size_t n, uint_least32_t *cp) * input is not long enough, set cp as invalid and * return number of bytes needed */ - *cp = LG_CODEPOINT_INVALID; + *cp = LG_INVALID_CODE_POINT; return 1 + off; } @@ -107,7 +107,7 @@ lg_utf8_decode(const char *s, size_t n, uint_least32_t *cp) * higher than the 8th are set on systems * with CHAR_BIT > 8 */ - *cp = LG_CODEPOINT_INVALID; + *cp = LG_INVALID_CODE_POINT; return 1 + (i - 1); } /* @@ -126,7 +126,7 @@ lg_utf8_decode(const char *s, size_t n, uint_least32_t *cp) * not representable in UTF-16 (>0x10FFFF) (RFC-3629 * specifies the latter two conditions) */ - *cp = LG_CODEPOINT_INVALID; + *cp = LG_INVALID_CODE_POINT; } return 1 + off; @@ -144,7 +144,7 @@ lg_utf8_encode(uint_least32_t cp, char *s, size_t n) * (0xD800..0xDFFF) or not representable in UTF-16 * (>0x10FFFF), which RFC-3629 deems invalid for UTF-8. */ - cp = LG_CODEPOINT_INVALID; + cp = LG_INVALID_CODE_POINT; } /* determine necessary sequence type */ diff --git a/test/utf8-decode.c b/test/utf8-decode.c @@ -21,7 +21,7 @@ static const struct { .arr = NULL, .len = 0, .exp_len = 0, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid lead byte @@ -31,7 +31,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xFD }, .len = 1, .exp_len = 1, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* valid 1-byte sequence @@ -61,7 +61,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xC3 }, .len = 1, .exp_len = 2, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 2-byte sequence (second byte malformed) @@ -71,7 +71,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xC3, 0xFF }, .len = 2, .exp_len = 1, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 2-byte sequence (overlong encoded) @@ -81,7 +81,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xC1, 0xBF }, .len = 2, .exp_len = 2, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* valid 3-byte sequence @@ -101,7 +101,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xE0 }, .len = 1, .exp_len = 3, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 3-byte sequence (second byte malformed) @@ -111,7 +111,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xE0, 0x7F, 0xBF }, .len = 3, .exp_len = 1, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 3-byte sequence (third byte missing) @@ -121,7 +121,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xE0, 0xBF }, .len = 2, .exp_len = 3, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 3-byte sequence (third byte malformed) @@ -131,7 +131,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0x7F }, .len = 3, .exp_len = 2, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 3-byte sequence (overlong encoded) @@ -141,7 +141,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xE0, 0x9F, 0xBF }, .len = 3, .exp_len = 3, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 3-byte sequence (UTF-16 surrogate half) @@ -151,7 +151,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xED, 0xA0, 0x80 }, .len = 3, .exp_len = 3, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* valid 4-byte sequence @@ -171,7 +171,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3 }, .len = 1, .exp_len = 4, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 4-byte sequence (second byte malformed) @@ -181,7 +181,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF, 0xBF }, .len = 4, .exp_len = 1, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 4-byte sequence (third byte missing) @@ -191,7 +191,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0xBF }, .len = 2, .exp_len = 4, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 4-byte sequence (third byte malformed) @@ -201,7 +201,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F, 0xBF }, .len = 4, .exp_len = 2, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 4-byte sequence (fourth byte missing) @@ -211,7 +211,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF }, .len = 3, .exp_len = 4, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 4-byte sequence (fourth byte malformed) @@ -221,7 +221,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0x7F }, .len = 4, .exp_len = 3, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 4-byte sequence (overlong encoded) @@ -231,7 +231,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF0, 0x80, 0x81, 0xBF }, .len = 4, .exp_len = 4, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, { /* invalid 4-byte sequence (UTF-16-unrepresentable) @@ -241,7 +241,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF4, 0x90, 0x80, 0x80 }, .len = 4, .exp_len = 4, - .exp_cp = LG_CODEPOINT_INVALID, + .exp_cp = LG_INVALID_CODE_POINT, }, };