libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

commit 62e7175fe5b2d7bebf16d39d334a645ec4361f83
parent 59952de9863572fbca88c3f9f1292709d381407b
Author: Laslo Hunhold <dev@frign.de>
Date:   Sat, 18 Dec 2021 13:43:58 +0100

Rename GRAPHEME_INVALID_CODE_POINT to GRAPHEME_CODEPOINT_INVALID

Now that the underline is gone by convention in this codebase, the
new ordering is much easier to read. It also conveys that this define
is a codepoint.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
Mgrapheme.h | 2+-
Mman/grapheme_utf8_decode.3 | 2+-
Msrc/character.c | 4++--
Msrc/utf8.c | 12++++++------
Mtest/utf8-decode.c | 46+++++++++++++++++++++++-----------------------
5 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/grapheme.h b/grapheme.h @@ -17,7 +17,7 @@ typedef struct grapheme_internal_segmentation_state { uint_least16_t flags; } GRAPHEME_STATE; -#define GRAPHEME_INVALID_CODE_POINT UINT32_C(0xFFFD) +#define GRAPHEME_CODEPOINT_INVALID UINT32_C(0xFFFD) size_t grapheme_character_nextbreak(const char *); diff --git a/man/grapheme_utf8_decode.3 b/man/grapheme_utf8_decode.3 @@ -18,7 +18,7 @@ of length If the UTF-8-sequence is invalid (overlong encoding, unexpected byte, string ends unexpectedly, empty string, etc.) the decoding is stopped at the last processed byte and the decoded codepoint set to -.Dv GRAPHEME_INVALID_CODE_POINT. +.Dv GRAPHEME_CODEPOINT_INVALID. .Pp If .Va cp diff --git a/src/character.c b/src/character.c @@ -203,7 +203,7 @@ grapheme_character_nextbreak(const char *str) /* get first codepoint */ len += grapheme_utf8_decode(str, (size_t)-1, &cp0); - if (cp0 == GRAPHEME_INVALID_CODE_POINT) { + if (cp0 == GRAPHEME_CODEPOINT_INVALID) { return len; } @@ -211,7 +211,7 @@ grapheme_character_nextbreak(const char *str) /* get next codepoint */ ret = grapheme_utf8_decode(str + len, (size_t)-1, &cp1); - if (cp1 == GRAPHEME_INVALID_CODE_POINT || + if (cp1 == GRAPHEME_CODEPOINT_INVALID || grapheme_character_isbreak(cp0, cp1, &state)) { /* we read an invalid cp or have a breakpoint */ break; diff --git a/src/utf8.c b/src/utf8.c @@ -54,7 +54,7 @@ grapheme_utf8_decode(const char *s, size_t n, uint_least32_t *cp) if (s == NULL || n == 0) { /* a sequence must be at least 1 byte long */ - *cp = GRAPHEME_INVALID_CODE_POINT; + *cp = GRAPHEME_CODEPOINT_INVALID; return 0; } @@ -79,14 +79,14 @@ grapheme_utf8_decode(const char *s, size_t n, uint_least32_t *cp) * this also includes the cases where bits higher than * the 8th are set on systems with CHAR_BIT > 8 */ - *cp = GRAPHEME_INVALID_CODE_POINT; + *cp = GRAPHEME_CODEPOINT_INVALID; return 1; } if (1 + off > n) { /* * input is not long enough, set cp as invalid */ - *cp = GRAPHEME_INVALID_CODE_POINT; + *cp = GRAPHEME_CODEPOINT_INVALID; /* * count the following continuation bytes, but nothing @@ -125,7 +125,7 @@ grapheme_utf8_decode(const char *s, size_t n, uint_least32_t *cp) * higher than the 8th are set on systems * with CHAR_BIT > 8 */ - *cp = GRAPHEME_INVALID_CODE_POINT; + *cp = GRAPHEME_CODEPOINT_INVALID; return 1 + (i - 1); } /* @@ -144,7 +144,7 @@ grapheme_utf8_decode(const char *s, size_t n, uint_least32_t *cp) * not representable in UTF-16 (>0x10FFFF) (RFC-3629 * specifies the latter two conditions) */ - *cp = GRAPHEME_INVALID_CODE_POINT; + *cp = GRAPHEME_CODEPOINT_INVALID; } return 1 + off; @@ -162,7 +162,7 @@ grapheme_utf8_encode(uint_least32_t cp, char *s, size_t n) * (0xD800..0xDFFF) or not representable in UTF-16 * (>0x10FFFF), which RFC-3629 deems invalid for UTF-8. */ - cp = GRAPHEME_INVALID_CODE_POINT; + cp = GRAPHEME_CODEPOINT_INVALID; } /* determine necessary sequence type */ diff --git a/test/utf8-decode.c b/test/utf8-decode.c @@ -21,7 +21,7 @@ static const struct { .arr = NULL, .len = 0, .exp_len = 0, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid lead byte @@ -31,7 +31,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xFD }, .len = 1, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* valid 1-byte sequence @@ -61,7 +61,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xC3 }, .len = 1, .exp_len = 2, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 2-byte sequence (second byte malformed) @@ -71,7 +71,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xC3, 0xFF }, .len = 2, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 2-byte sequence (overlong encoded) @@ -81,7 +81,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xC1, 0xBF }, .len = 2, .exp_len = 2, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* valid 3-byte sequence @@ -101,7 +101,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xE0 }, .len = 1, .exp_len = 3, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 3-byte sequence (second byte malformed) @@ -111,7 +111,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xE0, 0x7F, 0xBF }, .len = 3, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 3-byte sequence (short string, second byte malformed) @@ -121,7 +121,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xE0, 0x7F }, .len = 2, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 3-byte sequence (third byte missing) @@ -131,7 +131,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xE0, 0xBF }, .len = 2, .exp_len = 3, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 3-byte sequence (third byte malformed) @@ -141,7 +141,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0x7F }, .len = 3, .exp_len = 2, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 3-byte sequence (overlong encoded) @@ -151,7 +151,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xE0, 0x9F, 0xBF }, .len = 3, .exp_len = 3, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 3-byte sequence (UTF-16 surrogate half) @@ -161,7 +161,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xED, 0xA0, 0x80 }, .len = 3, .exp_len = 3, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* valid 4-byte sequence @@ -181,7 +181,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3 }, .len = 1, .exp_len = 4, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 4-byte sequence (second byte malformed) @@ -191,7 +191,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF, 0xBF }, .len = 4, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 4-byte sequence (short string 1, second byte malformed) @@ -201,7 +201,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0x7F }, .len = 2, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 4-byte sequence (short string 2, second byte malformed) @@ -211,7 +211,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF }, .len = 3, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { @@ -222,7 +222,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0xBF }, .len = 2, .exp_len = 4, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 4-byte sequence (third byte malformed) @@ -232,7 +232,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F, 0xBF }, .len = 4, .exp_len = 2, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 4-byte sequence (short string, third byte malformed) @@ -242,7 +242,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F }, .len = 3, .exp_len = 2, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 4-byte sequence (fourth byte missing) @@ -252,7 +252,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF }, .len = 3, .exp_len = 4, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 4-byte sequence (fourth byte malformed) @@ -262,7 +262,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0x7F }, .len = 4, .exp_len = 3, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 4-byte sequence (overlong encoded) @@ -272,7 +272,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF0, 0x80, 0x81, 0xBF }, .len = 4, .exp_len = 4, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, { /* invalid 4-byte sequence (UTF-16-unrepresentable) @@ -282,7 +282,7 @@ static const struct { .arr = (char *)(unsigned char[]){ 0xF4, 0x90, 0x80, 0x80 }, .len = 4, .exp_len = 4, - .exp_cp = GRAPHEME_INVALID_CODE_POINT, + .exp_cp = GRAPHEME_CODEPOINT_INVALID, }, };