libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

commit 7e82c69bf25b4937a62ab96c5f7bba05b0fd2604
parent 33ba7ecc6bfe24f5de0e6d347f03d374547bcd07
Author: Laslo Hunhold <dev@frign.de>
Date:   Mon,  1 Jun 2020 12:11:57 +0200

Rename CP_INVALID to GRAPHEME_CP_INVALID

As all other symbols in the library are prefixed with grapheme_
to indicate their origin, we should do the same with the define
for an invalid code point.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
Mgrapheme.h | 2+-
Msrc/codepoint.c | 12++++++------
Msrc/grapheme.c | 5+++--
Msrc/test_body.c | 38+++++++++++++++++++-------------------
4 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/grapheme.h b/grapheme.h @@ -5,7 +5,7 @@ #include <stddef.h> #include <stdint.h> -#define CP_INVALID UINT32_C(0xFFFD) +#define GRAPHEME_CP_INVALID UINT32_C(0xFFFD) int grapheme_boundary(uint32_t, uint32_t, int *); diff --git a/src/codepoint.c b/src/codepoint.c @@ -53,7 +53,7 @@ grapheme_cp_decode(uint32_t *cp, const uint8_t *s, size_t n) if (n == 0) { /* a sequence must be at least 1 byte long */ - *cp = CP_INVALID; + *cp = GRAPHEME_CP_INVALID; return 1; } @@ -74,7 +74,7 @@ grapheme_cp_decode(uint32_t *cp, const uint8_t *s, size_t n) * first byte does not match a sequence type; * set cp as invalid and return 1 byte processed */ - *cp = CP_INVALID; + *cp = GRAPHEME_CP_INVALID; return 1; } if (1 + off > n) { @@ -82,7 +82,7 @@ grapheme_cp_decode(uint32_t *cp, const uint8_t *s, size_t n) * input is not long enough, set cp as invalid and * return number of bytes needed */ - *cp = CP_INVALID; + *cp = GRAPHEME_CP_INVALID; return 1 + off; } @@ -98,7 +98,7 @@ grapheme_cp_decode(uint32_t *cp, const uint8_t *s, size_t n) * unexpected character as recommended since * Unicode 6 (chapter 3) */ - *cp = CP_INVALID; + *cp = GRAPHEME_CP_INVALID; return 1 + (i - 1); } /* @@ -117,7 +117,7 @@ grapheme_cp_decode(uint32_t *cp, const uint8_t *s, size_t n) * not representable in UTF-16 (>0x10FFFF) (RFC-3629 * specifies the latter two conditions) */ - *cp = CP_INVALID; + *cp = GRAPHEME_CP_INVALID; } return 1 + off; @@ -135,7 +135,7 @@ grapheme_cp_encode(uint32_t cp, uint8_t *s, size_t n) * (0xD800..0xDFFF) or not representable in UTF-16 * (>0x10FFFF), which RFC-3629 deems invalid for UTF-8. */ - cp = CP_INVALID; + cp = GRAPHEME_CP_INVALID; } /* determine necessary sequence type */ diff --git a/src/grapheme.c b/src/grapheme.c @@ -29,7 +29,7 @@ grapheme_len(const char *str) /* get first code point */ len += grapheme_cp_decode(&cp0, (uint8_t *)str, 5); - if (cp0 == CP_INVALID) { + if (cp0 == GRAPHEME_CP_INVALID) { return len; } @@ -37,7 +37,8 @@ grapheme_len(const char *str) /* get next code point */ ret = grapheme_cp_decode(&cp1, (uint8_t *)(str + len), 5); - if (cp1 == CP_INVALID || grapheme_boundary(cp0, cp1, &state)) { + if (cp1 == GRAPHEME_CP_INVALID || + grapheme_boundary(cp0, cp1, &state)) { /* we read an invalid cp or have a breakpoint */ break; } else { diff --git a/src/test_body.c b/src/test_body.c @@ -65,7 +65,7 @@ static const struct { .arr = NULL, .len = 0, .exp_len = 1, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid lead byte @@ -75,7 +75,7 @@ static const struct { .arr = (uint8_t[]){ 0xFD }, .len = 1, .exp_len = 1, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* valid 1-byte sequence @@ -105,7 +105,7 @@ static const struct { .arr = (uint8_t[]){ 0xC3 }, .len = 1, .exp_len = 2, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 2-byte sequence (second byte malformed) @@ -115,7 +115,7 @@ static const struct { .arr = (uint8_t[]){ 0xC3, 0xFF }, .len = 2, .exp_len = 1, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 2-byte sequence (overlong encoded) @@ -125,7 +125,7 @@ static const struct { .arr = (uint8_t[]){ 0xC1, 0xBF }, .len = 2, .exp_len = 2, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* valid 3-byte sequence @@ -145,7 +145,7 @@ static const struct { .arr = (uint8_t[]){ 0xE0 }, .len = 1, .exp_len = 3, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 3-byte sequence (second byte malformed) @@ -155,7 +155,7 @@ static const struct { .arr = (uint8_t[]){ 0xE0, 0x7F, 0xBF }, .len = 3, .exp_len = 1, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 3-byte sequence (third byte missing) @@ -165,7 +165,7 @@ static const struct { .arr = (uint8_t[]){ 0xE0, 0xBF }, .len = 2, .exp_len = 3, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 3-byte sequence (third byte malformed) @@ -175,7 +175,7 @@ static const struct { .arr = (uint8_t[]){ 0xE0, 0xBF, 0x7F }, .len = 3, .exp_len = 2, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 3-byte sequence (overlong encoded) @@ -185,7 +185,7 @@ static const struct { .arr = (uint8_t[]){ 0xE0, 0x9F, 0xBF }, .len = 3, .exp_len = 3, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 3-byte sequence (UTF-16 surrogate half) @@ -195,7 +195,7 @@ static const struct { .arr = (uint8_t[]){ 0xED, 0xA0, 0x80 }, .len = 3, .exp_len = 3, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* valid 4-byte sequence @@ -215,7 +215,7 @@ static const struct { .arr = (uint8_t[]){ 0xF3 }, .len = 1, .exp_len = 4, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 4-byte sequence (second byte malformed) @@ -225,7 +225,7 @@ static const struct { .arr = (uint8_t[]){ 0xF3, 0x7F, 0xBF, 0xBF }, .len = 4, .exp_len = 1, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 4-byte sequence (third byte missing) @@ -235,7 +235,7 @@ static const struct { .arr = (uint8_t[]){ 0xF3, 0xBF }, .len = 2, .exp_len = 4, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 4-byte sequence (third byte malformed) @@ -245,7 +245,7 @@ static const struct { .arr = (uint8_t[]){ 0xF3, 0xBF, 0x7F, 0xBF }, .len = 4, .exp_len = 2, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 4-byte sequence (fourth byte missing) @@ -255,7 +255,7 @@ static const struct { .arr = (uint8_t[]){ 0xF3, 0xBF, 0xBF }, .len = 3, .exp_len = 4, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 4-byte sequence (fourth byte malformed) @@ -265,7 +265,7 @@ static const struct { .arr = (uint8_t[]){ 0xF3, 0xBF, 0xBF, 0x7F }, .len = 4, .exp_len = 3, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 4-byte sequence (overlong encoded) @@ -275,7 +275,7 @@ static const struct { .arr = (uint8_t[]){ 0xF0, 0x80, 0x81, 0xBF }, .len = 4, .exp_len = 4, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, { /* invalid 4-byte sequence (UTF-16-unrepresentable) @@ -285,7 +285,7 @@ static const struct { .arr = (uint8_t[]){ 0xF4, 0x90, 0x80, 0x80 }, .len = 4, .exp_len = 4, - .exp_cp = CP_INVALID, + .exp_cp = GRAPHEME_CP_INVALID, }, };