libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

commit abdc2ba0c764c527aaa2ed9fe42db27d71a10bc2
parent 50efb9a3396588e6e1266f51ec5446a9fa8013ea
Author: Laslo Hunhold <dev@frign.de>
Date:   Tue, 15 Nov 2022 15:53:56 +0100

Apply clang-format

Even though this disrupts the backtrackability of the code a bit,
it's better to rip the band aid off now than to push it on into the
future.

With these changes, formatting is automatically governed and ensured by
a simple call to

	make format

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
Mbenchmark/bidirectional.c | 2+-
Mbenchmark/case.c | 5+++--
Mbenchmark/character.c | 12++++++------
Mbenchmark/line.c | 4++--
Mbenchmark/sentence.c | 7++++---
Mbenchmark/utf8-decode.c | 24+++++++++++-------------
Mbenchmark/util.c | 25++++++++++++-------------
Mbenchmark/util.h | 8++++----
Mbenchmark/word.c | 4++--
Mgen/bidirectional-test.c | 150+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
Mgen/bidirectional.c | 144++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mgen/case.c | 79+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Mgen/character.c | 64++++++++++++++++++++++++++++++++--------------------------------
Mgen/line.c | 343++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mgen/sentence.c | 66+++++++++++++++++++++++++++++++++---------------------------------
Mgen/util.c | 202+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
Mgen/util.h | 39++++++++++++++++++++-------------------
Mgen/word.c | 97++++++++++++++++++++++++++++++++++++++++---------------------------------------
Mgrapheme.h | 24++++++++++++++----------
Msrc/bidirectional.c | 323++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
Msrc/case.c | 125+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
Msrc/character.c | 160+++++++++++++++++++++++++++++++++++++++++--------------------------------------
Msrc/line.c | 108++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Msrc/sentence.c | 44++++++++++++++++++++++----------------------
Msrc/utf8.c | 26+++++++++++++-------------
Msrc/util.c | 51+++++++++++++++++++++++++++++----------------------
Msrc/util.h | 26++++++++++++++------------
Msrc/word.c | 95++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mtest/bidirectional.c | 20+++++++++++++-------
Mtest/case.c | 331+++++++++++++++++++++++++++++++++++++++++++++++++------------------------------
Mtest/character.c | 19+++++++++----------
Mtest/line.c | 21++++++++-------------
Mtest/sentence.c | 23++++++++++-------------
Mtest/utf8-decode.c | 344+++++++++++++++++++++++++++++++++++++++----------------------------------------
Mtest/utf8-encode.c | 39++++++++++++++++++++-------------------
Mtest/util.c | 44+++++++++++++++++++++++++++-----------------
Mtest/util.h | 15+++++++++++----
Mtest/word.c | 16++++++----------
38 files changed, 1736 insertions(+), 1393 deletions(-)

diff --git a/benchmark/bidirectional.c b/benchmark/bidirectional.c @@ -5,8 +5,8 @@ #include <stdlib.h> #include <string.h> -#include "../grapheme.h" #include "../gen/bidirectional-test.h" +#include "../grapheme.h" #include "util.h" #define NUM_ITERATIONS 100000 diff --git a/benchmark/case.c b/benchmark/case.c @@ -6,8 +6,8 @@ #include <stdlib.h> #include <string.h> -#include "../grapheme.h" #include "../gen/word-test.h" +#include "../grapheme.h" #include "util.h" #define NUM_ITERATIONS 10000 @@ -40,7 +40,8 @@ main(int argc, char *argv[]) &(p.srclen))) == NULL) { return 1; } - if ((p.dest = calloc((p.destlen = 2 * p.srclen), sizeof(*(p.dest)))) == NULL) { + if ((p.dest = calloc((p.destlen = 2 * p.srclen), sizeof(*(p.dest)))) == + NULL) { fprintf(stderr, "calloc: Out of memory\n"); } diff --git a/benchmark/character.c b/benchmark/character.c @@ -6,8 +6,8 @@ #include <stdlib.h> #include <string.h> -#include "../grapheme.h" #include "../gen/character-test.h" +#include "../grapheme.h" #include "util.h" #include <utf8proc.h> @@ -28,7 +28,7 @@ libgrapheme(const void *payload) size_t i; for (i = 0; i + 1 < p->buflen; i++) { - (void)grapheme_is_character_break(p->buf[i], p->buf[i+1], + (void)grapheme_is_character_break(p->buf[i], p->buf[i + 1], &state); } } @@ -41,9 +41,8 @@ libutf8proc(const void *payload) size_t i; for (i = 0; i + 1 < p->buflen; i++) { - (void)utf8proc_grapheme_break_stateful(p->buf_utf8proc[i], - p->buf_utf8proc[i+1], - &state); + (void)utf8proc_grapheme_break_stateful( + p->buf_utf8proc[i], p->buf_utf8proc[i + 1], &state); } } @@ -61,7 +60,8 @@ main(int argc, char *argv[]) &(p.buflen))) == NULL) { return 1; } - if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) == NULL) { + if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) == + NULL) { fprintf(stderr, "malloc: %s\n", strerror(errno)); exit(1); } diff --git a/benchmark/line.c b/benchmark/line.c @@ -6,8 +6,8 @@ #include <stdlib.h> #include <string.h> -#include "../grapheme.h" #include "../gen/line-test.h" +#include "../grapheme.h" #include "util.h" #define NUM_ITERATIONS 10000 @@ -23,7 +23,7 @@ libgrapheme(const void *payload) const struct break_benchmark_payload *p = payload; size_t off; - for (off = 0; off < p->buflen; ) { + for (off = 0; off < p->buflen;) { off += grapheme_next_line_break(p->buf + off, p->buflen - off); } } diff --git a/benchmark/sentence.c b/benchmark/sentence.c @@ -6,8 +6,8 @@ #include <stdlib.h> #include <string.h> -#include "../grapheme.h" #include "../gen/sentence-test.h" +#include "../grapheme.h" #include "util.h" #define NUM_ITERATIONS 100000 @@ -23,8 +23,9 @@ libgrapheme(const void *payload) const struct break_benchmark_payload *p = payload; size_t off; - for (off = 0; off < p->buflen; ) { - off += grapheme_next_sentence_break(p->buf + off, p->buflen - off); + for (off = 0; off < p->buflen;) { + off += grapheme_next_sentence_break(p->buf + off, + p->buflen - off); } } diff --git a/benchmark/utf8-decode.c b/benchmark/utf8-decode.c @@ -6,8 +6,8 @@ #include <stdlib.h> #include <string.h> -#include "../grapheme.h" #include "../gen/character-test.h" +#include "../grapheme.h" #include "util.h" #include <utf8proc.h> @@ -28,9 +28,8 @@ libgrapheme(const void *payload) size_t ret, off; for (off = 0; off < p->buflen; off += ret) { - if ((ret = grapheme_decode_utf8(p->buf + off, - p->buflen - off, &cp)) > - (p->buflen - off)) { + if ((ret = grapheme_decode_utf8(p->buf + off, p->buflen - off, + &cp)) > (p->buflen - off)) { break; } (void)cp; @@ -48,7 +47,7 @@ libutf8proc(const void *payload) for (off = 0; off < p->buflen; off += (size_t)ret) { if ((ret = utf8proc_iterate(p->buf_utf8proc + off, (utf8proc_ssize_t)(p->buflen - off), - &cp)) < 0) { + &cp)) < 0) { break; } (void)cp; @@ -64,9 +63,8 @@ main(int argc, char *argv[]) (void)argc; - p.buf = generate_utf8_test_buffer(character_break_test, - LEN(character_break_test), - &(p.buflen)); + p.buf = generate_utf8_test_buffer( + character_break_test, LEN(character_break_test), &(p.buflen)); /* convert cp-buffer to stupid custom libutf8proc-uint8-type */ if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) { @@ -74,7 +72,7 @@ main(int argc, char *argv[]) exit(1); } for (i = 0; i < p.buflen; i++) { - /* + /* * even if char is larger than 8 bit, it will only have * any of the first 8 bits set (by construction). */ @@ -82,11 +80,11 @@ main(int argc, char *argv[]) } printf("%s\n", argv[0]); - run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, - "byte", &baseline, NUM_ITERATIONS, p.buflen); + run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "byte", &baseline, + NUM_ITERATIONS, p.buflen); run_benchmark(libutf8proc, &p, "libutf8proc ", - "but unsafe (does not detect overlong encodings)", - "byte", &baseline, NUM_ITERATIONS, p.buflen); + "but unsafe (does not detect overlong encodings)", "byte", + &baseline, NUM_ITERATIONS, p.buflen); free(p.buf); free(p.buf_utf8proc); diff --git a/benchmark/util.c b/benchmark/util.c @@ -1,7 +1,7 @@ /* See LICENSE file for copyright and license details. */ #include <math.h> -#include <stdlib.h> #include <stdio.h> +#include <stdlib.h> #include <time.h> #include "../gen/types.h" @@ -20,7 +20,8 @@ generate_cp_test_buffer(const struct break_test *test, size_t testlen, *buflen += test[i].cplen; } if (!(buf = calloc(*buflen, sizeof(*buf)))) { - fprintf(stderr, "generate_test_buffer: calloc: Out of memory.\n"); + fprintf(stderr, + "generate_test_buffer: calloc: Out of memory.\n"); exit(1); } for (i = 0, off = 0; i < testlen; i++) { @@ -48,18 +49,18 @@ generate_utf8_test_buffer(const struct break_test *test, size_t testlen, } (*buflen)++; /* terminating NUL-byte */ if (!(buf = malloc(*buflen))) { - fprintf(stderr, "generate_test_buffer: malloc: Out of memory.\n"); + fprintf(stderr, + "generate_test_buffer: malloc: Out of memory.\n"); exit(1); } for (i = 0, off = 0; i < testlen; i++) { for (j = 0; j < test[i].cplen; j++, off += ret) { - if ((ret = grapheme_encode_utf8(test[i].cp[j], - buf + off, - *buflen - off)) > + if ((ret = grapheme_encode_utf8( + test[i].cp[j], buf + off, *buflen - off)) > (*buflen - off)) { /* shouldn't happen */ fprintf(stderr, "generate_utf8_test_buffer: " - "Buffer too small.\n"); + "Buffer too small.\n"); exit(1); } } @@ -77,10 +78,9 @@ time_diff(struct timespec *a, struct timespec *b) } void -run_benchmark(void (*func)(const void *), const void *payload, - const char *name, const char *comment, const char *unit, - double *baseline, size_t num_iterations, - size_t units_per_iteration) +run_benchmark(void (*func)(const void *), const void *payload, const char *name, + const char *comment, const char *unit, double *baseline, + size_t num_iterations, size_t units_per_iteration) { struct timespec start, end; size_t i; @@ -109,7 +109,6 @@ run_benchmark(void (*func)(const void *), const void *payload, printf(" avg. %.3es/%s (%.2f%% %s%s%s)\n", diff, unit, fabs(1.0 - diff / *baseline) * 100, (diff < *baseline) ? "faster" : "slower", - comment ? ", " : "", - comment ? comment : ""); + comment ? ", " : "", comment ? comment : ""); } } diff --git a/benchmark/util.h b/benchmark/util.h @@ -7,10 +7,10 @@ #define LEN(x) (sizeof(x) / sizeof(*(x))) #ifdef __has_attribute - #if __has_attribute(optnone) - void libgrapheme(const void *) __attribute__((optnone)); - void libutf8proc(const void *) __attribute__((optnone)); - #endif +#if __has_attribute(optnone) +void libgrapheme(const void *) __attribute__((optnone)); +void libutf8proc(const void *) __attribute__((optnone)); +#endif #endif uint_least32_t *generate_cp_test_buffer(const struct break_test *, size_t, diff --git a/benchmark/word.c b/benchmark/word.c @@ -6,8 +6,8 @@ #include <stdlib.h> #include <string.h> -#include "../grapheme.h" #include "../gen/word-test.h" +#include "../grapheme.h" #include "util.h" #define NUM_ITERATIONS 10000 @@ -23,7 +23,7 @@ libgrapheme(const void *payload) const struct break_benchmark_payload *p = payload; size_t off; - for (off = 0; off < p->buflen; ) { + for (off = 0; off < p->buflen;) { off += grapheme_next_word_break(p->buf + off, p->buflen - off); } } diff --git a/gen/bidirectional-test.c b/gen/bidirectional-test.c @@ -3,8 +3,8 @@ #include <inttypes.h> #include <stddef.h> #include <stdio.h> -#include <string.h> #include <stdlib.h> +#include <string.h> #include "../grapheme.h" #include "util.h" @@ -23,29 +23,29 @@ static const struct { const char *class; const uint_least32_t cp; } classcpmap[] = { - { .class = "L", .cp = UINT32_C(0x0041) }, - { .class = "AL", .cp = UINT32_C(0x0608) }, - { .class = "AN", .cp = UINT32_C(0x0600) }, - { .class = "B", .cp = UINT32_C(0x000A) }, - { .class = "BN", .cp = UINT32_C(0x0000) }, - { .class = "CS", .cp = UINT32_C(0x002C) }, - { .class = "EN", .cp = UINT32_C(0x0030) }, - { .class = "ES", .cp = UINT32_C(0x002B) }, - { .class = "ET", .cp = UINT32_C(0x0023) }, + { .class = "L", .cp = UINT32_C(0x0041) }, + { .class = "AL", .cp = UINT32_C(0x0608) }, + { .class = "AN", .cp = UINT32_C(0x0600) }, + { .class = "B", .cp = UINT32_C(0x000A) }, + { .class = "BN", .cp = UINT32_C(0x0000) }, + { .class = "CS", .cp = UINT32_C(0x002C) }, + { .class = "EN", .cp = UINT32_C(0x0030) }, + { .class = "ES", .cp = UINT32_C(0x002B) }, + { .class = "ET", .cp = UINT32_C(0x0023) }, { .class = "FSI", .cp = UINT32_C(0x2068) }, { .class = "LRE", .cp = UINT32_C(0x202A) }, { .class = "LRI", .cp = UINT32_C(0x2066) }, { .class = "LRO", .cp = UINT32_C(0x202D) }, { .class = "NSM", .cp = UINT32_C(0x0300) }, - { .class = "ON", .cp = UINT32_C(0x0021) }, + { .class = "ON", .cp = UINT32_C(0x0021) }, { .class = "PDF", .cp = UINT32_C(0x202C) }, { .class = "PDI", .cp = UINT32_C(0x2069) }, - { .class = "R", .cp = UINT32_C(0x05BE) }, + { .class = "R", .cp = UINT32_C(0x05BE) }, { .class = "RLE", .cp = UINT32_C(0x202B) }, { .class = "RLI", .cp = UINT32_C(0x2067) }, { .class = "RLO", .cp = UINT32_C(0x202E) }, - { .class = "S", .cp = UINT32_C(0x0009) }, - { .class = "WS", .cp = UINT32_C(0x000C) }, + { .class = "S", .cp = UINT32_C(0x0009) }, + { .class = "WS", .cp = UINT32_C(0x000C) }, }; static int @@ -59,7 +59,8 @@ classtocp(const char *str, size_t len, uint_least32_t *cp) return 0; } } - fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len, str); + fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len, + str); return 1; } @@ -77,8 +78,10 @@ parse_class_list(const char *str, uint_least32_t **cp, size_t *cplen) } /* count the number of spaces in the string and infer list length */ - for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count++, tmp1 = tmp2 + 1) + for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; + count++, tmp1 = tmp2 + 1) { ; + } /* allocate resources */ if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) { @@ -89,7 +92,8 @@ parse_class_list(const char *str, uint_least32_t **cp, size_t *cplen) /* go through the string again, parsing the classes */ for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) { tmp2 = strchr(tmp1, ' '); - if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), &((*cp)[i]))) { + if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), + &((*cp)[i]))) { return 1; } if (tmp2 != NULL) { @@ -135,12 +139,10 @@ strtolevel(const char *str, size_t len, int_least8_t *level) if (str[0] != '1') { goto toolarge; } - *level = (str[0] - '0') * 100 + - (str[1] - '0') * 10 + - (str[2] - '0'); + *level = (str[0] - '0') * 100 + (str[1] - '0') * 10 + + (str[2] - '0'); } else if (len == 2) { - *level = (str[0] - '0') * 10 + - (str[1] - '0'); + *level = (str[0] - '0') * 10 + (str[1] - '0'); } else if (len == 1) { *level = (str[0] - '0'); } else { /* len == 0 */ @@ -149,8 +151,7 @@ strtolevel(const char *str, size_t len, int_least8_t *level) return 0; toolarge: - fprintf(stderr, "hextocp: '%.*s' is too large.\n", - (int)len, str); + fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len, str); return 1; } @@ -167,8 +168,10 @@ parse_level_list(const char *str, int_least8_t **level, size_t *levellen) } /* count the number of spaces in the string and infer list length */ - for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count++, tmp1 = tmp2 + 1) + for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; + count++, tmp1 = tmp2 + 1) { ; + } /* allocate resources */ if (!(*level = calloc((*levellen = count), sizeof(**level)))) { @@ -179,7 +182,9 @@ parse_level_list(const char *str, int_least8_t **level, size_t *levellen) /* go through the string again, parsing the levels */ for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) { tmp2 = strchr(tmp1, ' '); - if (strtolevel(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), &((*level)[i]))) { + if (strtolevel(tmp1, + tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), + &((*level)[i]))) { return 1; } if (tmp2 != NULL) { @@ -199,7 +204,8 @@ bidirectional_test_list_print(const struct bidirectional_test *test, printf("/* Automatically generated by %s */\n" "#include <stdint.h>\n#include <stddef.h>\n\n" - "#include \"../grapheme.h\"\n\n", progname); + "#include \"../grapheme.h\"\n\n", + progname); printf("static const struct {\n" "\tuint_least32_t *cp;\n" @@ -208,7 +214,8 @@ bidirectional_test_list_print(const struct bidirectional_test *test, "\tsize_t modelen;\n" "\tint_least8_t *level;\n" "\tint_least8_t *reorder;\n" - "\tsize_t reorderlen;\n} %s[] = {\n", identifier); + "\tsize_t reorderlen;\n} %s[] = {\n", + identifier); for (i = 0; i < testlen; i++) { printf("\t{\n"); @@ -222,11 +229,13 @@ bidirectional_test_list_print(const struct bidirectional_test *test, printf(" },\n"); printf("\t\t.cplen = %zu,\n", test[i].cplen); - printf("\t\t.mode = (enum grapheme_bidirectional_override[]){"); + printf("\t\t.mode = (enum " + "grapheme_bidirectional_override[]){"); for (j = 0; j < test[i].modelen; j++) { if (test[i].mode[j] == GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL) { - printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL"); + printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_" + "NEUTRAL"); } else if (test[i].mode[j] == GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR"); @@ -279,8 +288,8 @@ static int_least8_t *current_reorder; static size_t current_reorder_len; static int -test_callback(const char *file, char **field, size_t nfields, - char *comment, void *payload) +test_callback(const char *file, char **field, size_t nfields, char *comment, + void *payload) { char *tmp; @@ -292,23 +301,31 @@ test_callback(const char *file, char **field, size_t nfields, if (nfields > 0 && field[0][0] == '@') { if (!strncmp(field[0], "@Levels:", sizeof("@Levels:") - 1)) { tmp = field[0] + sizeof("@Levels:") - 1; - for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); tmp++) + for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); + tmp++) { ; + } free(current_level); - parse_level_list(tmp, &current_level, &current_level_len); - } else if (!strncmp(field[0], "@Reorder:", sizeof("@Reorder:") - 1)) { + parse_level_list(tmp, &current_level, + &current_level_len); + } else if (!strncmp(field[0], + "@Reorder:", sizeof("@Reorder:") - 1)) { tmp = field[0] + sizeof("@Reorder:") - 1; - for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); tmp++) + for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); + tmp++) { ; + } free(current_reorder); - parse_level_list(tmp, &current_reorder, &current_reorder_len); + parse_level_list(tmp, &current_reorder, + &current_reorder_len); } else { fprintf(stderr, "Unknown @-input-line.\n"); exit(1); } } else { if (nfields < 2) { - /* discard any line that does not have at least 2 fields */ + /* discard any line that does not have at least 2 fields + */ return 0; } @@ -321,26 +338,33 @@ test_callback(const char *file, char **field, size_t nfields, /* parse field data */ parse_class_list(field[0], &(test[testlen - 1].cp), &(test[testlen - 1].cplen)); - + /* copy current level- and reorder-arrays */ - if (!(test[testlen - 1].level = calloc(current_level_len, sizeof(*(test[testlen - 1].level))))) { + if (!(test[testlen - 1].level = + calloc(current_level_len, + sizeof(*(test[testlen - 1].level))))) { fprintf(stderr, "calloc: %s\n", strerror(errno)); exit(1); } - memcpy(test[testlen - 1].level, current_level, current_level_len * sizeof(*(test[testlen - 1].level))); + memcpy(test[testlen - 1].level, current_level, + current_level_len * sizeof(*(test[testlen - 1].level))); - if (!(test[testlen - 1].reorder = calloc(current_reorder_len, sizeof(*(test[testlen - 1].reorder))))) { + if (!(test[testlen - 1].reorder = + calloc(current_reorder_len, + sizeof(*(test[testlen - 1].reorder))))) { fprintf(stderr, "calloc: %s\n", strerror(errno)); exit(1); } if (current_reorder != NULL) { memcpy(test[testlen - 1].reorder, current_reorder, - current_reorder_len * sizeof(*(test[testlen - 1].reorder))); + current_reorder_len * + sizeof(*(test[testlen - 1].reorder))); } test[testlen - 1].reorderlen = current_reorder_len; - + if (current_level_len != test[testlen - 1].cplen) { - fprintf(stderr, "mismatch between string and level lengths.\n"); + fprintf(stderr, + "mismatch between string and level lengths.\n"); exit(1); } @@ -349,27 +373,38 @@ test_callback(const char *file, char **field, size_t nfields, fprintf(stderr, "malformed paragraph-level-bitset.\n"); exit(1); } else if (field[1][0] == '2') { - test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR; + test[testlen - 1].mode[0] = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR; test[testlen - 1].modelen = 1; } else if (field[1][0] == '3') { /* auto=0 and LTR=1 */ - test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; - test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR; + test[testlen - 1].mode[0] = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; + test[testlen - 1].mode[1] = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR; test[testlen - 1].modelen = 2; } else if (field[1][0] == '4') { - test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; + test[testlen - 1].mode[0] = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; test[testlen - 1].modelen = 1; - } else if (field[1][0] == '5') { - test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; - test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; + } else if (field[1][0] == '5') { + test[testlen - 1].mode[0] = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; + test[testlen - 1].mode[1] = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; test[testlen - 1].modelen = 2; } else if (field[1][0] == '7') { - test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; - test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR; - test[testlen - 1].mode[2] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; + test[testlen - 1].mode[0] = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; + test[testlen - 1].mode[1] = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR; + test[testlen - 1].mode[2] = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; test[testlen - 1].modelen = 3; } else { - fprintf(stderr, "unhandled paragraph-level-bitset %s.\n", field[1]); + fprintf(stderr, + "unhandled paragraph-level-bitset %s.\n", + field[1]); exit(1); } } @@ -414,7 +449,8 @@ character_test_callback(const char *file, char **field, size_t nfields, } else if (field[1][0] == '1') { test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; } else if (field[1][0] == '2') { - test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; + test[testlen - 1].mode[0] = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; } else { fprintf(stderr, "unhandled paragraph-level-setting.\n"); exit(1); diff --git a/gen/bidirectional.c b/gen/bidirectional.c @@ -15,118 +15,118 @@ static const struct property_spec bidi_property[] = { { /* default */ .enumname = "L", - .file = FILE_BIDI_CLASS, - .ucdname = "L", + .file = FILE_BIDI_CLASS, + .ucdname = "L", }, { .enumname = "AL", - .file = FILE_BIDI_CLASS, - .ucdname = "AL", + .file = FILE_BIDI_CLASS, + .ucdname = "AL", }, { .enumname = "AN", - .file = FILE_BIDI_CLASS, - .ucdname = "AN", + .file = FILE_BIDI_CLASS, + .ucdname = "AN", }, { .enumname = "B", - .file = FILE_BIDI_CLASS, - .ucdname = "B", + .file = FILE_BIDI_CLASS, + .ucdname = "B", }, { .enumname = "BN", - .file = FILE_BIDI_CLASS, - .ucdname = "BN", + .file = FILE_BIDI_CLASS, + .ucdname = "BN", }, { .enumname = "CS", - .file = FILE_BIDI_CLASS, - .ucdname = "CS", + .file = FILE_BIDI_CLASS, + .ucdname = "CS", }, { .enumname = "EN", - .file = FILE_BIDI_CLASS, - .ucdname = "EN", + .file = FILE_BIDI_CLASS, + .ucdname = "EN", }, { .enumname = "ES", - .file = FILE_BIDI_CLASS, - .ucdname = "ES", + .file = FILE_BIDI_CLASS, + .ucdname = "ES", }, { .enumname = "ET", - .file = FILE_BIDI_CLASS, - .ucdname = "ET", + .file = FILE_BIDI_CLASS, + .ucdname = "ET", }, { .enumname = "FSI", - .file = FILE_BIDI_CLASS, - .ucdname = "FSI", + .file = FILE_BIDI_CLASS, + .ucdname = "FSI", }, { .enumname = "LRE", - .file = FILE_BIDI_CLASS, - .ucdname = "LRE", + .file = FILE_BIDI_CLASS, + .ucdname = "LRE", }, { .enumname = "LRI", - .file = FILE_BIDI_CLASS, - .ucdname = "LRI", + .file = FILE_BIDI_CLASS, + .ucdname = "LRI", }, { .enumname = "LRO", - .file = FILE_BIDI_CLASS, - .ucdname = "LRO", + .file = FILE_BIDI_CLASS, + .ucdname = "LRO", }, { .enumname = "NSM", - .file = FILE_BIDI_CLASS, - .ucdname = "NSM", + .file = FILE_BIDI_CLASS, + .ucdname = "NSM", }, { .enumname = "ON", - .file = FILE_BIDI_CLASS, - .ucdname = "ON", + .file = FILE_BIDI_CLASS, + .ucdname = "ON", }, { .enumname = "PDF", - .file = FILE_BIDI_CLASS, - .ucdname = "PDF", + .file = FILE_BIDI_CLASS, + .ucdname = "PDF", }, { .enumname = "PDI", - .file = FILE_BIDI_CLASS, - .ucdname = "PDI", + .file = FILE_BIDI_CLASS, + .ucdname = "PDI", }, { .enumname = "R", - .file = FILE_BIDI_CLASS, - .ucdname = "R", + .file = FILE_BIDI_CLASS, + .ucdname = "R", }, { .enumname = "RLE", - .file = FILE_BIDI_CLASS, - .ucdname = "RLE", + .file = FILE_BIDI_CLASS, + .ucdname = "RLE", }, { .enumname = "RLI", - .file = FILE_BIDI_CLASS, - .ucdname = "RLI", + .file = FILE_BIDI_CLASS, + .ucdname = "RLI", }, { .enumname = "RLO", - .file = FILE_BIDI_CLASS, - .ucdname = "RLO", + .file = FILE_BIDI_CLASS, + .ucdname = "RLO", }, { .enumname = "S", - .file = FILE_BIDI_CLASS, - .ucdname = "S", + .file = FILE_BIDI_CLASS, + .ucdname = "S", }, { .enumname = "WS", - .file = FILE_BIDI_CLASS, - .ucdname = "WS", + .file = FILE_BIDI_CLASS, + .ucdname = "WS", }, }; @@ -135,11 +135,12 @@ static struct { uint_least32_t cp_pair; char type; } *b = NULL; + static size_t blen; static int -bracket_callback(const char *file, char **field, size_t nfields, - char *comment, void *payload) +bracket_callback(const char *file, char **field, size_t nfields, char *comment, + void *payload) { (void)file; (void)comment; @@ -189,11 +190,12 @@ post_process(struct properties *prop) } static uint_least8_t -fill_missing(uint_least32_t cp) { +fill_missing(uint_least32_t cp) +{ /* based on the @missing-properties in data/DerivedBidiClass.txt */ - if ((cp >= UINT32_C(0x0590) && cp <= UINT32_C(0x05FF)) || - (cp >= UINT32_C(0x07C0) && cp <= UINT32_C(0x085F)) || - (cp >= UINT32_C(0xFB1D) && cp <= UINT32_C(0xFB4F)) || + if ((cp >= UINT32_C(0x0590) && cp <= UINT32_C(0x05FF)) || + (cp >= UINT32_C(0x07C0) && cp <= UINT32_C(0x085F)) || + (cp >= UINT32_C(0xFB1D) && cp <= UINT32_C(0xFB4F)) || (cp >= UINT32_C(0x10800) && cp <= UINT32_C(0x10CFF)) || (cp >= UINT32_C(0x10D40) && cp <= UINT32_C(0x10EBF)) || (cp >= UINT32_C(0x10F00) && cp <= UINT32_C(0x10F2F)) || @@ -203,22 +205,22 @@ fill_missing(uint_least32_t cp) { (cp >= UINT32_C(0x1ED50) && cp <= UINT32_C(0x1EDFF)) || (cp >= UINT32_C(0x1EF00) && cp <= UINT32_C(0x1EFFF))) { return 17; /* class R */ - } else if ((cp >= UINT32_C(0x0600) && cp <= UINT32_C(0x07BF)) || - (cp >= UINT32_C(0x0860) && cp <= UINT32_C(0x08FF)) || - (cp >= UINT32_C(0xFB50) && cp <= UINT32_C(0xFDCF)) || - (cp >= UINT32_C(0xFDF0) && cp <= UINT32_C(0xFDFF)) || - (cp >= UINT32_C(0xFE70) && cp <= UINT32_C(0xFEFF)) || + } else if ((cp >= UINT32_C(0x0600) && cp <= UINT32_C(0x07BF)) || + (cp >= UINT32_C(0x0860) && cp <= UINT32_C(0x08FF)) || + (cp >= UINT32_C(0xFB50) && cp <= UINT32_C(0xFDCF)) || + (cp >= UINT32_C(0xFDF0) && cp <= UINT32_C(0xFDFF)) || + (cp >= UINT32_C(0xFE70) && cp <= UINT32_C(0xFEFF)) || (cp >= UINT32_C(0x10D00) && cp <= UINT32_C(0x10D3F)) || (cp >= UINT32_C(0x10EC0) && cp <= UINT32_C(0x10EFF)) || - (cp >= UINT32_C(0x10F30) && cp <= UINT32_C(0x10F6F)) || + (cp >= UINT32_C(0x10F30) && cp <= UINT32_C(0x10F6F)) || (cp >= UINT32_C(0x1EC70) && cp <= UINT32_C(0x1ECBF)) || (cp >= UINT32_C(0x1ED00) && cp <= UINT32_C(0x1ED4F)) || (cp >= UINT32_C(0x1EE00) && cp <= UINT32_C(0x1EEFF))) { - return 1; /* class AL */ + return 1; /* class AL */ } else if (cp >= UINT32_C(0x20A0) && cp <= UINT32_C(0x20CF)) { - return 8; /* class ET */ + return 8; /* class ET */ } else { - return 0; /* class L */ + return 0; /* class L */ } } @@ -238,13 +240,11 @@ main(int argc, char *argv[]) fprintf(stderr, "calloc: %s\n", strerror(errno)); exit(1); } - parse_file_with_callback(FILE_BIDI_BRACKETS, bracket_callback, - NULL); + parse_file_with_callback(FILE_BIDI_BRACKETS, bracket_callback, NULL); - properties_generate_break_property(bidi_property, - LEN(bidi_property), fill_missing, - NULL, post_process, "bidi", - argv[0]); + properties_generate_break_property(bidi_property, LEN(bidi_property), + fill_missing, NULL, post_process, + "bidi", argv[0]); printf("\nenum bracket_type {\n\tBIDI_BRACKET_NONE,\n\t" "BIDI_BRACKET_OPEN,\n\tBIDI_BRACKET_CLOSE,\n};\n\n" @@ -252,10 +252,12 @@ main(int argc, char *argv[]) "\tuint_least32_t pair;\n};\n\n" "static const struct bracket bidi_bracket[] = {\n"); for (i = 0; i < blen; i++) { - printf("\t{\n\t\t.type = %s,\n\t\t.pair = UINT32_C(0x%06X),\n\t},\n", - (b[i].type == 'o') ? "BIDI_BRACKET_OPEN" : - (b[i].type == 'c') ? "BIDI_BRACKET_CLOSE" : "BIDI_BRACKET_NONE", - b[i].cp_pair); + printf("\t{\n\t\t.type = %s,\n\t\t.pair = " + "UINT32_C(0x%06X),\n\t},\n", + (b[i].type == 'o') ? "BIDI_BRACKET_OPEN" : + (b[i].type == 'c') ? "BIDI_BRACKET_CLOSE" : + "BIDI_BRACKET_NONE", + b[i].cp_pair); } printf("};\n"); diff --git a/gen/case.c b/gen/case.c @@ -12,28 +12,28 @@ static const struct property_spec case_property[] = { { .enumname = "OTHER", - .file = NULL, - .ucdname = NULL, + .file = NULL, + .ucdname = NULL, }, { .enumname = "BOTH_CASED_CASE_IGNORABLE", - .file = NULL, - .ucdname = NULL, + .file = NULL, + .ucdname = NULL, }, - { + { .enumname = "CASED", - .file = FILE_DCP, - .ucdname = "Cased", + .file = FILE_DCP, + .ucdname = "Cased", }, { .enumname = "CASE_IGNORABLE", - .file = FILE_DCP, - .ucdname = "Case_Ignorable", + .file = FILE_DCP, + .ucdname = "Case_Ignorable", }, { .enumname = "UNCASED", - .file = FILE_DCP, - .ucdname = "Uncased", + .file = FILE_DCP, + .ucdname = "Uncased", }, }; @@ -67,12 +67,14 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2) } static struct properties *prop_upper = NULL, *prop_lower, *prop_title; + static struct special_case { struct { uint_least32_t *cp; size_t cplen; } upper, lower, title; } *sc = NULL; + static size_t sclen = 0; static int @@ -89,9 +91,12 @@ unicodedata_callback(const char *file, char **field, size_t nfields, upper = lower = title = cp; - if ((strlen(field[12]) > 0 && hextocp(field[12], strlen(field[12]), &upper)) || - (strlen(field[13]) > 0 && hextocp(field[13], strlen(field[13]), &lower)) || - (nfields >= 15 && strlen(field[14]) > 0 && hextocp(field[14], strlen(field[14]), &title))) { + if ((strlen(field[12]) > 0 && + hextocp(field[12], strlen(field[12]), &upper)) || + (strlen(field[13]) > 0 && + hextocp(field[13], strlen(field[13]), &lower)) || + (nfields >= 15 && strlen(field[14]) > 0 && + hextocp(field[14], strlen(field[14]), &title))) { return 1; } @@ -126,7 +131,7 @@ specialcasing_callback(const char *file, char **field, size_t nfields, /* extend special case array */ if (!(sc = realloc(sc, (++sclen) * sizeof(*sc)))) { fprintf(stderr, "realloc: %s\n", strerror(errno)); - exit(1); + exit(1); } /* parse field data */ @@ -142,9 +147,12 @@ specialcasing_callback(const char *file, char **field, size_t nfields, * special value 0x110000 + (offset in special case array), * even if the special case has length 1 */ - prop_upper[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen - 1)); - prop_lower[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen - 1)); - prop_title[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen - 1)); + prop_upper[cp].property = + (int_least64_t)(UINT32_C(0x110000) + (sclen - 1)); + prop_lower[cp].property = + (int_least64_t)(UINT32_C(0x110000) + (sclen - 1)); + prop_title[cp].property = + (int_least64_t)(UINT32_C(0x110000) + (sclen - 1)); return 0; } @@ -165,9 +173,8 @@ main(int argc, char *argv[]) (void)argc; /* generate case property table from the specification */ - properties_generate_break_property(case_property, - LEN(case_property), NULL, - handle_conflict, NULL, "case", + properties_generate_break_property(case_property, LEN(case_property), + NULL, handle_conflict, NULL, "case", argv[0]); /* @@ -186,38 +193,46 @@ main(int argc, char *argv[]) } parse_file_with_callback("data/UnicodeData.txt", unicodedata_callback, NULL); - parse_file_with_callback("data/SpecialCasing.txt", specialcasing_callback, - NULL); + parse_file_with_callback("data/SpecialCasing.txt", + specialcasing_callback, NULL); /* compress properties */ properties_compress(prop_upper, &comp_upper); properties_compress(prop_lower, &comp_lower); properties_compress(prop_title, &comp_title); - fprintf(stderr, "%s: LUT compression-ratios: upper=%.2f%%, lower=%.2f%%, title=%.2f%%\n", + fprintf(stderr, + "%s: LUT compression-ratios: upper=%.2f%%, lower=%.2f%%, " + "title=%.2f%%\n", argv[0], properties_get_major_minor(&comp_upper, &mm_upper), properties_get_major_minor(&comp_lower, &mm_lower), properties_get_major_minor(&comp_title, &mm_title)); /* print tables */ - printf("/* Automatically generated by %s */\n#include <stdint.h>\n#include <stddef.h>\n\n", argv[0]); + printf("/* Automatically generated by %s */\n#include " + "<stdint.h>\n#include <stddef.h>\n\n", + argv[0]); - printf("struct special_case {\n\tuint_least32_t *cp;\n\tsize_t cplen;\n};\n\n"); + printf("struct special_case {\n\tuint_least32_t *cp;\n\tsize_t " + "cplen;\n};\n\n"); properties_print_lookup_table("upper_major", mm_upper.major, 0x1100); printf("\n"); - properties_print_derived_lookup_table("upper_minor", "int_least32_t", mm_upper.minor, - mm_upper.minorlen, get_value, comp_upper.data); + properties_print_derived_lookup_table("upper_minor", "int_least32_t", + mm_upper.minor, mm_upper.minorlen, + get_value, comp_upper.data); printf("\n"); properties_print_lookup_table("lower_major", mm_lower.major, 0x1100); printf("\n"); - properties_print_derived_lookup_table("lower_minor", "int_least32_t", mm_lower.minor, - mm_lower.minorlen, get_value, comp_lower.data); + properties_print_derived_lookup_table("lower_minor", "int_least32_t", + mm_lower.minor, mm_lower.minorlen, + get_value, comp_lower.data); printf("\n"); properties_print_lookup_table("title_major", mm_title.major, 0x1100); printf("\n"); - properties_print_derived_lookup_table("title_minor", "int_least32_t", mm_title.minor, - mm_title.minorlen, get_value, comp_title.data); + properties_print_derived_lookup_table("title_minor", "int_least32_t", + mm_title.minor, mm_title.minorlen, + get_value, comp_title.data); printf("\n"); printf("static const struct special_case upper_special[] = {\n"); diff --git a/gen/character.c b/gen/character.c @@ -9,78 +9,78 @@ static const struct property_spec char_break_property[] = { { .enumname = "OTHER", - .file = NULL, - .ucdname = NULL, + .file = NULL, + .ucdname = NULL, }, { .enumname = "CONTROL", - .file = FILE_GRAPHEME, - .ucdname = "Control", + .file = FILE_GRAPHEME, + .ucdname = "Control", }, { .enumname = "CR", - .file = FILE_GRAPHEME, - .ucdname = "CR", + .file = FILE_GRAPHEME, + .ucdname = "CR", }, { .enumname = "EXTEND", - .file = FILE_GRAPHEME, - .ucdname = "Extend", + .file = FILE_GRAPHEME, + .ucdname = "Extend", }, { .enumname = "EXTENDED_PICTOGRAPHIC", - .file = FILE_EMOJI, - .ucdname = "Extended_Pictographic", + .file = FILE_EMOJI, + .ucdname = "Extended_Pictographic", }, { .enumname = "HANGUL_L", - .file = FILE_GRAPHEME, - .ucdname = "L", + .file = FILE_GRAPHEME, + .ucdname = "L", }, { .enumname = "HANGUL_V", - .file = FILE_GRAPHEME, - .ucdname = "V", + .file = FILE_GRAPHEME, + .ucdname = "V", }, { .enumname = "HANGUL_T", - .file = FILE_GRAPHEME, - .ucdname = "T", + .file = FILE_GRAPHEME, + .ucdname = "T", }, { .enumname = "HANGUL_LV", - .file = FILE_GRAPHEME, - .ucdname = "LV", + .file = FILE_GRAPHEME, + .ucdname = "LV", }, { .enumname = "HANGUL_LVT", - .file = FILE_GRAPHEME, - .ucdname = "LVT", + .file = FILE_GRAPHEME, + .ucdname = "LVT", }, { .enumname = "LF", - .file = FILE_GRAPHEME, - .ucdname = "LF", + .file = FILE_GRAPHEME, + .ucdname = "LF", }, { .enumname = "PREPEND", - .file = FILE_GRAPHEME, - .ucdname = "Prepend", + .file = FILE_GRAPHEME, + .ucdname = "Prepend", }, { .enumname = "REGIONAL_INDICATOR", - .file = FILE_GRAPHEME, - .ucdname = "Regional_Indicator", + .file = FILE_GRAPHEME, + .ucdname = "Regional_Indicator", }, { .enumname = "SPACINGMARK", - .file = FILE_GRAPHEME, - .ucdname = "SpacingMark", + .file = FILE_GRAPHEME, + .ucdname = "SpacingMark", }, { .enumname = "ZWJ", - .file = FILE_GRAPHEME, - .ucdname = "ZWJ", + .file = FILE_GRAPHEME, + .ucdname = "ZWJ", }, }; @@ -90,8 +90,8 @@ main(int argc, char *argv[]) (void)argc; properties_generate_break_property(char_break_property, - LEN(char_break_property), NULL, - NULL, NULL, "char_break", argv[0]); + LEN(char_break_property), NULL, NULL, + NULL, "char_break", argv[0]); return 0; } diff --git a/gen/line.c b/gen/line.c @@ -12,8 +12,8 @@ static const struct property_spec line_break_property[] = { { .enumname = "AL", - .file = FILE_LINE, - .ucdname = "AL", + .file = FILE_LINE, + .ucdname = "AL", }, /* * Both extended pictographic and cn are large classes, @@ -32,269 +32,269 @@ static const struct property_spec line_break_property[] = { */ { .enumname = "TMP_CN", - .file = FILE_LINE, - .ucdname = "Cn", + .file = FILE_LINE, + .ucdname = "Cn", }, { .enumname = "TMP_EXTENDED_PICTOGRAPHIC", - .file = FILE_EMOJI, - .ucdname = "Extended_Pictographic", + .file = FILE_EMOJI, + .ucdname = "Extended_Pictographic", }, /* end of special block */ { .enumname = "B2", - .file = FILE_LINE, - .ucdname = "B2", + .file = FILE_LINE, + .ucdname = "B2", }, { .enumname = "BA", - .file = FILE_LINE, - .ucdname = "BA", + .file = FILE_LINE, + .ucdname = "BA", }, { .enumname = "BB", - .file = FILE_LINE, - .ucdname = "BB", + .file = FILE_LINE, + .ucdname = "BB", }, { .enumname = "BK", - .file = FILE_LINE, - .ucdname = "BK", + .file = FILE_LINE, + .ucdname = "BK", }, { .enumname = "BOTH_CN_EXTPICT", - .file = NULL, - .ucdname = NULL, + .file = NULL, + .ucdname = NULL, }, { .enumname = "CB", - .file = FILE_LINE, - .ucdname = "CB", + .file = FILE_LINE, + .ucdname = "CB", }, { .enumname = "CL", - .file = FILE_LINE, - .ucdname = "CL", + .file = FILE_LINE, + .ucdname = "CL", }, { .enumname = "CM", - .file = FILE_LINE, - .ucdname = "CM", + .file = FILE_LINE, + .ucdname = "CM", }, { .enumname = "CP_WITHOUT_EAW_HWF", - .file = FILE_LINE, - .ucdname = "CP", + .file = FILE_LINE, + .ucdname = "CP", }, { .enumname = "CP_WITH_EAW_HWF", - .file = NULL, - .ucdname = NULL, + .file = NULL, + .ucdname = NULL, }, { .enumname = "CR", - .file = FILE_LINE, - .ucdname = "CR", + .file = FILE_LINE, + .ucdname = "CR", }, { .enumname = "EB", - .file = FILE_LINE, - .ucdname = "EB", + .file = FILE_LINE, + .ucdname = "EB", }, { .enumname = "EM", - .file = FILE_LINE, - .ucdname = "EM", + .file = FILE_LINE, + .ucdname = "EM", }, { .enumname = "EX", - .file = FILE_LINE, - .ucdname = "EX", + .file = FILE_LINE, + .ucdname = "EX", }, { .enumname = "GL", - .file = FILE_LINE, - .ucdname = "GL", + .file = FILE_LINE, + .ucdname = "GL", }, { .enumname = "H2", - .file = FILE_LINE, - .ucdname = "H2", + .file = FILE_LINE, + .ucdname = "H2", }, { .enumname = "H3", - .file = FILE_LINE, - .ucdname = "H3", + .file = FILE_LINE, + .ucdname = "H3", }, { .enumname = "HL", - .file = FILE_LINE, - .ucdname = "HL", + .file = FILE_LINE, + .ucdname = "HL", }, { .enumname = "HY", - .file = FILE_LINE, - .ucdname = "HY", + .file = FILE_LINE, + .ucdname = "HY", }, { .enumname = "ID", - .file = FILE_LINE, - .ucdname = "ID", + .file = FILE_LINE, + .ucdname = "ID", }, { .enumname = "IN", - .file = FILE_LINE, - .ucdname = "IN", + .file = FILE_LINE, + .ucdname = "IN", }, { .enumname = "IS", - .file = FILE_LINE, - .ucdname = "IS", + .file = FILE_LINE, + .ucdname = "IS", }, { .enumname = "JL", - .file = FILE_LINE, - .ucdname = "JL", + .file = FILE_LINE, + .ucdname = "JL", }, { .enumname = "JT", - .file = FILE_LINE, - .ucdname = "JT", + .file = FILE_LINE, + .ucdname = "JT", }, { .enumname = "JV", - .file = FILE_LINE, - .ucdname = "JV", + .file = FILE_LINE, + .ucdname = "JV", }, { .enumname = "LF", - .file = FILE_LINE, - .ucdname = "LF", + .file = FILE_LINE, + .ucdname = "LF", }, { .enumname = "NL", - .file = FILE_LINE, - .ucdname = "NL", + .file = FILE_LINE, + .ucdname = "NL", }, { .enumname = "NS", - .file = FILE_LINE, - .ucdname = "NS", + .file = FILE_LINE, + .ucdname = "NS", }, { .enumname = "NU", - .file = FILE_LINE, - .ucdname = "NU", + .file = FILE_LINE, + .ucdname = "NU", }, { .enumname = "OP_WITHOUT_EAW_HWF", - .file = FILE_LINE, - .ucdname = "OP", + .file = FILE_LINE, + .ucdname = "OP", }, { .enumname = "OP_WITH_EAW_HWF", - .file = NULL, - .ucdname = NULL, + .file = NULL, + .ucdname = NULL, }, { .enumname = "PO", - .file = FILE_LINE, - .ucdname = "PO", + .file = FILE_LINE, + .ucdname = "PO", }, { .enumname = "PR", - .file = FILE_LINE, - .ucdname = "PR", + .file = FILE_LINE, + .ucdname = "PR", }, { .enumname = "QU", - .file = FILE_LINE, - .ucdname = "QU", + .file = FILE_LINE, + .ucdname = "QU", }, { .enumname = "RI", - .file = FILE_LINE, - .ucdname = "RI", + .file = FILE_LINE, + .ucdname = "RI", }, { .enumname = "SP", - .file = FILE_LINE, - .ucdname = "SP", + .file = FILE_LINE, + .ucdname = "SP", }, { .enumname = "SY", - .file = FILE_LINE, - .ucdname = "SY", + .file = FILE_LINE, + .ucdname = "SY", }, { .enumname = "WJ", - .file = FILE_LINE, - .ucdname = "WJ", + .file = FILE_LINE, + .ucdname = "WJ", }, { .enumname = "ZW", - .file = FILE_LINE, - .ucdname = "ZW", + .file = FILE_LINE, + .ucdname = "ZW", }, { .enumname = "ZWJ", - .file = FILE_LINE, - .ucdname = "ZWJ", + .file = FILE_LINE, + .ucdname = "ZWJ", }, { .enumname = "TMP_AI", - .file = FILE_LINE, - .ucdname = "AI", + .file = FILE_LINE, + .ucdname = "AI", }, { .enumname = "TMP_CJ", - .file = FILE_LINE, - .ucdname = "CJ", + .file = FILE_LINE, + .ucdname = "CJ", }, { .enumname = "TMP_XX", - .file = NULL, - .ucdname = NULL, + .file = NULL, + .ucdname = NULL, }, { .enumname = "TMP_MN", - .file = FILE_LINE, - .ucdname = "Mn", + .file = FILE_LINE, + .ucdname = "Mn", }, { .enumname = "TMP_MC", - .file = FILE_LINE, - .ucdname = "Mc", + .file = FILE_LINE, + .ucdname = "Mc", }, { .enumname = "TMP_SA_WITHOUT_MN_OR_MC", - .file = FILE_LINE, - .ucdname = "SA", + .file = FILE_LINE, + .ucdname = "SA", }, { .enumname = "TMP_SA_WITH_MN_OR_MC", - .file = FILE_LINE, - .ucdname = "SA", + .file = FILE_LINE, + .ucdname = "SA", }, { .enumname = "TMP_SG", - .file = FILE_LINE, - .ucdname = "SG", + .file = FILE_LINE, + .ucdname = "SG", }, { .enumname = "TMP_EAW_H", - .file = FILE_EAW, - .ucdname = "H", + .file = FILE_EAW, + .ucdname = "H", }, { .enumname = "TMP_EAW_W", - .file = FILE_EAW, - .ucdname = "W", + .file = FILE_EAW, + .ucdname = "W", }, { .enumname = "TMP_EAW_F", - .file = FILE_EAW, - .ucdname = "F", + .file = FILE_EAW, + .ucdname = "F", }, }; @@ -306,23 +306,30 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2) (void)cp; - if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") || - !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") || + if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") || + !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") || !strcmp(line_break_property[prop1].enumname, "TMP_EAW_F")) || (!strcmp(line_break_property[prop2].enumname, "TMP_EAW_H") || !strcmp(line_break_property[prop2].enumname, "TMP_EAW_W") || !strcmp(line_break_property[prop2].enumname, "TMP_EAW_F"))) { - if (!strcmp(line_break_property[prop1].enumname, "CP_WITHOUT_EAW_HWF") || - !strcmp(line_break_property[prop2].enumname, "CP_WITHOUT_EAW_HWF")) { + if (!strcmp(line_break_property[prop1].enumname, + "CP_WITHOUT_EAW_HWF") || + !strcmp(line_break_property[prop2].enumname, + "CP_WITHOUT_EAW_HWF")) { target = "CP_WITH_EAW_HWF"; - } else if (!strcmp(line_break_property[prop1].enumname, "OP_WITHOUT_EAW_HWF") || - !strcmp(line_break_property[prop2].enumname, "OP_WITHOUT_EAW_HWF")) { + } else if (!strcmp(line_break_property[prop1].enumname, + "OP_WITHOUT_EAW_HWF") || + !strcmp(line_break_property[prop2].enumname, + "OP_WITHOUT_EAW_HWF")) { target = "OP_WITH_EAW_HWF"; } else { /* ignore EAW for the rest */ - if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") || - !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") || - !strcmp(line_break_property[prop1].enumname, "TMP_EAW_F"))) { + if ((!strcmp(line_break_property[prop1].enumname, + "TMP_EAW_H") || + !strcmp(line_break_property[prop1].enumname, + "TMP_EAW_W") || + !strcmp(line_break_property[prop1].enumname, + "TMP_EAW_F"))) { result = prop2; } else { result = prop1; @@ -330,15 +337,19 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2) } } else if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") || !strcmp(line_break_property[prop1].enumname, "TMP_MC")) || - (!strcmp(line_break_property[prop2].enumname, "TMP_MN") || - !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) { - if (!strcmp(line_break_property[prop1].enumname, "SA_WITHOUT_MN_OR_MC") || - !strcmp(line_break_property[prop2].enumname, "SA_WITHOUT_MN_OR_MC")) { + (!strcmp(line_break_property[prop2].enumname, "TMP_MN") || + !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) { + if (!strcmp(line_break_property[prop1].enumname, + "SA_WITHOUT_MN_OR_MC") || + !strcmp(line_break_property[prop2].enumname, + "SA_WITHOUT_MN_OR_MC")) { target = "SA_WITH_MN_OR_MC"; } else { /* ignore Mn and Mc for the rest */ - if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") || - !strcmp(line_break_property[prop1].enumname, "TMP_MC"))) { + if ((!strcmp(line_break_property[prop1].enumname, + "TMP_MN") || + !strcmp(line_break_property[prop1].enumname, + "TMP_MC"))) { result = prop2; } else { result = prop1; @@ -346,33 +357,42 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2) } } else if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") || !strcmp(line_break_property[prop2].enumname, "TMP_CN")) { - if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC") || - !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) { + if (!strcmp(line_break_property[prop1].enumname, + "TMP_EXTENDED_PICTOGRAPHIC") || + !strcmp(line_break_property[prop2].enumname, + "TMP_EXTENDED_PICTOGRAPHIC")) { target = "BOTH_CN_EXTPICT"; } else { /* ignore Cn for all the other properties */ - if (!strcmp(line_break_property[prop1].enumname, "TMP_CN")) { + if (!strcmp(line_break_property[prop1].enumname, + "TMP_CN")) { result = prop2; } else { result = prop1; } } - } else if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC") || - !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) { + } else if (!strcmp(line_break_property[prop1].enumname, + "TMP_EXTENDED_PICTOGRAPHIC") || + !strcmp(line_break_property[prop2].enumname, + "TMP_EXTENDED_PICTOGRAPHIC")) { if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") || !strcmp(line_break_property[prop2].enumname, "TMP_CN")) { target = "BOTH_CN_EXTPICT"; } else { - /* ignore Extended_Pictographic for all the other properties */ - if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) { + /* ignore Extended_Pictographic for all the other + * properties */ + if (!strcmp(line_break_property[prop1].enumname, + "TMP_EXTENDED_PICTOGRAPHIC")) { result = prop2; } else { result = prop1; } } } else { - fprintf(stderr, "handle_conflict: Cannot handle conflict %s <- %s.\n", - line_break_property[prop1].enumname, line_break_property[prop2].enumname); + fprintf(stderr, + "handle_conflict: Cannot handle conflict %s <- %s.\n", + line_break_property[prop1].enumname, + line_break_property[prop2].enumname); exit(1); } @@ -402,27 +422,44 @@ post_process(struct properties *prop) /* post-mapping according to the line breaking algorithm */ for (i = 0; i < UINT32_C(0x110000); i++) { /* LB1 */ - if (!strcmp(line_break_property[prop[i].property].enumname, "TMP_AI") || - !strcmp(line_break_property[prop[i].property].enumname, "TMP_SG") || - !strcmp(line_break_property[prop[i].property].enumname, "TMP_XX")) { + if (!strcmp(line_break_property[prop[i].property].enumname, + "TMP_AI") || + !strcmp(line_break_property[prop[i].property].enumname, + "TMP_SG") || + !strcmp(line_break_property[prop[i].property].enumname, + "TMP_XX")) { /* map AI, SG and XX to AL */ target = "AL"; - } else if (!strcmp(line_break_property[prop[i].property].enumname, "TMP_SA_WITH_MN_OR_MC")) { + } else if (!strcmp(line_break_property[prop[i].property] + .enumname, + "TMP_SA_WITH_MN_OR_MC")) { /* map SA (with General_Category Mn or Mc) to CM */ target = "CM"; - } else if (!strcmp(line_break_property[prop[i].property].enumname, "TMP_SA_WITHOUT_MN_OR_MC")) { + } else if (!strcmp(line_break_property[prop[i].property] + .enumname, + "TMP_SA_WITHOUT_MN_OR_MC")) { /* map SA (without General_Category Mn or Mc) to AL */ target = "AL"; - } else if (!strcmp(line_break_property[prop[i].property].enumname, "TMP_CJ")) { + } else if (!strcmp(line_break_property[prop[i].property] + .enumname, + "TMP_CJ")) { /* map CJ to NS */ target = "NS"; - } else if (!strcmp(line_break_property[prop[i].property].enumname, "TMP_CN") || - !strcmp(line_break_property[prop[i].property].enumname, "TMP_EXTENDED_PICTOGRAPHIC") || - !strcmp(line_break_property[prop[i].property].enumname, "TMP_MN") || - !strcmp(line_break_property[prop[i].property].enumname, "TMP_MC") || - !strcmp(line_break_property[prop[i].property].enumname, "TMP_EAW_H") || - !strcmp(line_break_property[prop[i].property].enumname, "TMP_EAW_W") || - !strcmp(line_break_property[prop[i].property].enumname, "TMP_EAW_F")) { + } else if ( + !strcmp(line_break_property[prop[i].property].enumname, + "TMP_CN") || + !strcmp(line_break_property[prop[i].property].enumname, + "TMP_EXTENDED_PICTOGRAPHIC") || + !strcmp(line_break_property[prop[i].property].enumname, + "TMP_MN") || + !strcmp(line_break_property[prop[i].property].enumname, + "TMP_MC") || + !strcmp(line_break_property[prop[i].property].enumname, + "TMP_EAW_H") || + !strcmp(line_break_property[prop[i].property].enumname, + "TMP_EAW_W") || + !strcmp(line_break_property[prop[i].property].enumname, + "TMP_EAW_F")) { /* map all the temporary classes "residue" to AL */ target = "AL"; } else { @@ -430,14 +467,17 @@ post_process(struct properties *prop) } if (target) { - for (result = 0; result < LEN(line_break_property); result++) { - if (!strcmp(line_break_property[result].enumname, + for (result = 0; result < LEN(line_break_property); + result++) { + if (!strcmp(line_break_property[result] + .enumname, target)) { break; } } if (result == LEN(line_break_property)) { - fprintf(stderr, "handle_conflict: Internal error.\n"); + fprintf(stderr, + "handle_conflict: Internal error.\n"); exit(1); } @@ -451,10 +491,9 @@ main(int argc, char *argv[]) { (void)argc; - properties_generate_break_property(line_break_property, - LEN(line_break_property), NULL, - handle_conflict, post_process, - "line_break", argv[0]); + properties_generate_break_property( + line_break_property, LEN(line_break_property), NULL, + handle_conflict, post_process, "line_break", argv[0]); return 0; } diff --git a/gen/sentence.c b/gen/sentence.c @@ -6,78 +6,78 @@ static const struct property_spec sentence_break_property[] = { { .enumname = "OTHER", - .file = NULL, - .ucdname = NULL, + .file = NULL, + .ucdname = NULL, }, { .enumname = "CR", - .file = FILE_SENTENCE, - .ucdname = "CR", + .file = FILE_SENTENCE, + .ucdname = "CR", }, { .enumname = "LF", - .file = FILE_SENTENCE, - .ucdname = "LF", + .file = FILE_SENTENCE, + .ucdname = "LF", }, { .enumname = "EXTEND", - .file = FILE_SENTENCE, - .ucdname = "Extend", + .file = FILE_SENTENCE, + .ucdname = "Extend", }, { .enumname = "SEP", - .file = FILE_SENTENCE, - .ucdname = "Sep", + .file = FILE_SENTENCE, + .ucdname = "Sep", }, { .enumname = "FORMAT", - .file = FILE_SENTENCE, - .ucdname = "Format", + .file = FILE_SENTENCE, + .ucdname = "Format", }, { .enumname = "SP", - .file = FILE_SENTENCE, - .ucdname = "Sp", + .file = FILE_SENTENCE, + .ucdname = "Sp", }, { .enumname = "LOWER", - .file = FILE_SENTENCE, - .ucdname = "Lower", + .file = FILE_SENTENCE, + .ucdname = "Lower", }, { .enumname = "UPPER", - .file = FILE_SENTENCE, - .ucdname = "Upper", + .file = FILE_SENTENCE, + .ucdname = "Upper", }, { .enumname = "OLETTER", - .file = FILE_SENTENCE, - .ucdname = "OLetter", + .file = FILE_SENTENCE, + .ucdname = "OLetter", }, { .enumname = "NUMERIC", - .file = FILE_SENTENCE, - .ucdname = "Numeric", + .file = FILE_SENTENCE, + .ucdname = "Numeric", }, { .enumname = "ATERM", - .file = FILE_SENTENCE, - .ucdname = "ATerm", + .file = FILE_SENTENCE, + .ucdname = "ATerm", }, { .enumname = "SCONTINUE", - .file = FILE_SENTENCE, - .ucdname = "SContinue", + .file = FILE_SENTENCE, + .ucdname = "SContinue", }, { .enumname = "STERM", - .file = FILE_SENTENCE, - .ucdname = "STerm", + .file = FILE_SENTENCE, + .ucdname = "STerm", }, { .enumname = "CLOSE", - .file = FILE_SENTENCE, - .ucdname = "Close", + .file = FILE_SENTENCE, + .ucdname = "Close", }, }; @@ -86,9 +86,9 @@ main(int argc, char *argv[]) { (void)argc; - properties_generate_break_property(sentence_break_property, - LEN(sentence_break_property), NULL, - NULL, NULL, "sentence_break", argv[0]); + properties_generate_break_property( + sentence_break_property, LEN(sentence_break_property), NULL, + NULL, NULL, "sentence_break", argv[0]); return 0; } diff --git a/gen/util.c b/gen/util.c @@ -1,13 +1,12 @@ /* See LICENSE file for copyright and license details. */ -#include <stdbool.h> #include <ctype.h> #include <errno.h> #include <inttypes.h> #include <stdbool.h> #include <stddef.h> #include <stdint.h> -#include <stdlib.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include "util.h" @@ -21,12 +20,13 @@ struct properties_payload { struct properties *prop; const struct property_spec *spec; uint_least8_t speclen; - int (*set_value)(struct properties_payload *, uint_least32_t, int_least64_t); - uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, uint_least8_t); + int (*set_value)(struct properties_payload *, uint_least32_t, + int_least64_t); + uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, + uint_least8_t); }; -struct break_test_payload -{ +struct break_test_payload { struct break_test **test; size_t *testlen; }; @@ -51,8 +51,8 @@ hextocp(const char *str, size_t len, uint_least32_t *cp) /* the maximum valid codepoint is 0x10FFFF */ if (len > 6) { - fprintf(stderr, "hextocp: '%.*s' is too long.\n", - (int)len, str); + fprintf(stderr, "hextocp: '%.*s' is too long.\n", (int)len, + str); return 1; } @@ -77,8 +77,8 @@ hextocp(const char *str, size_t len, uint_least32_t *cp) } if (*cp > UINT32_C(0x10FFFF)) { - fprintf(stderr, "hextocp: '%.*s' is too large.\n", - (int)len, str); + fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len, + str); return 1; } @@ -98,8 +98,10 @@ parse_cp_list(const char *str, uint_least32_t **cp, size_t *cplen) } /* count the number of spaces in the string and infer list length */ - for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count++, tmp1 = tmp2 + 1) + for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; + count++, tmp1 = tmp2 + 1) { ; + } /* allocate resources */ if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) { @@ -110,7 +112,8 @@ parse_cp_list(const char *str, uint_least32_t **cp, size_t *cplen) /* go through the string again, parsing the numbers */ for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) { tmp2 = strchr(tmp1, ' '); - if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), &((*cp)[i]))) { + if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), + &((*cp)[i]))) { return 1; } if (tmp2 != NULL) { @@ -144,8 +147,10 @@ range_parse(const char *str, struct range *range) } void -parse_file_with_callback(const char *fname, int (*callback)(const char *, - char **, size_t, char *, void *), void *payload) +parse_file_with_callback(const char *fname, + int (*callback)(const char *, char **, size_t, char *, + void *), + void *payload) { FILE *fp; char *line = NULL, **field = NULL, *comment; @@ -182,10 +187,15 @@ parse_file_with_callback(const char *fname, int (*callback)(const char *, if (line[i] != '#') { /* extend field buffer, if necessary */ if (++nfields > fieldbufsize) { - if ((field = realloc(field, nfields * - sizeof(*field))) == NULL) { - fprintf(stderr, "parse_file_with_" - "callback: realloc: %s.\n", + if ((field = realloc( + field, + nfields * + sizeof(*field))) == + NULL) { + fprintf(stderr, + "parse_file_with_" + "callback: realloc: " + "%s.\n", strerror(errno)); exit(1); } @@ -209,8 +219,9 @@ parse_file_with_callback(const char *fname, int (*callback)(const char *, /* go back whitespace and terminate field there */ if (i > 0) { - for (j = i - 1; line[j] == ' '; j--) + for (j = i - 1; line[j] == ' '; j--) { ; + } line[j + 1] = '\0'; } else { line[i] = '\0'; @@ -230,7 +241,7 @@ parse_file_with_callback(const char *fname, int (*callback)(const char *, /* call callback function */ if (callback(fname, field, nfields, comment, payload)) { fprintf(stderr, "parse_file_with_callback: " - "Malformed input.\n"); + "Malformed input.\n"); exit(1); } } @@ -257,10 +268,11 @@ properties_callback(const char *file, char **field, size_t nfields, for (i = 0; i < p->speclen; i++) { /* identify fitting file and identifier */ - if (p->spec[i].file && - !strcmp(p->spec[i].file, file) && + if (p->spec[i].file && !strcmp(p->spec[i].file, file) && (!strcmp(p->spec[i].ucdname, field[1]) || - (comment != NULL && !strncmp(p->spec[i].ucdname, comment, strlen(p->spec[i].ucdname)) && + (comment != NULL && + !strncmp(p->spec[i].ucdname, comment, + strlen(p->spec[i].ucdname)) && comment[strlen(p->spec[i].ucdname)] == ' '))) { /* parse range in first field */ if (range_parse(field[0], &r)) { @@ -287,7 +299,8 @@ properties_compress(const struct properties *prop, uint_least32_t cp, i; /* initialization */ - if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) * sizeof(*(comp->offset))))) { + if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) * + sizeof(*(comp->offset))))) { fprintf(stderr, "malloc: %s\n", strerror(errno)); exit(1); } @@ -296,7 +309,8 @@ properties_compress(const struct properties *prop, for (cp = 0; cp < UINT32_C(0x110000); cp++) { for (i = 0; i < comp->datalen; i++) { - if (!memcmp(&(prop[cp]), &(comp->data[i]), sizeof(*prop))) { + if (!memcmp(&(prop[cp]), &(comp->data[i]), + sizeof(*prop))) { /* found a match! */ comp->offset[cp] = i; break; @@ -308,9 +322,9 @@ properties_compress(const struct properties *prop, * add current properties to data and add the * offset in the offset-table */ - if (!(comp->data = reallocate_array(comp->data, - ++(comp->datalen), - sizeof(*(comp->data))))) { + if (!(comp->data = reallocate_array( + comp->data, ++(comp->datalen), + sizeof(*(comp->data))))) { fprintf(stderr, "reallocate_array: %s\n", strerror(errno)); exit(1); @@ -357,8 +371,7 @@ properties_get_major_minor(const struct properties_compressed *comp, * and need less storage) */ for (j = 0; j + 0xFF < mm->minorlen; j++) { - if (!memcmp(&(comp->offset[i << 8]), - &(mm->minor[j]), + if (!memcmp(&(comp->offset[i << 8]), &(mm->minor[j]), sizeof(*(comp->offset)) * 0x100)) { break; } @@ -373,9 +386,9 @@ properties_get_major_minor(const struct properties_compressed *comp, * in major */ mm->minorlen += 0x100; - if (!(mm->minor = reallocate_array(mm->minor, - mm->minorlen, - sizeof(*(mm->minor))))) { + if (!(mm->minor = + reallocate_array(mm->minor, mm->minorlen, + sizeof(*(mm->minor))))) { fprintf(stderr, "reallocate_array: %s\n", strerror(errno)); exit(1); @@ -403,7 +416,7 @@ properties_print_lookup_table(char *name, size_t *data, size_t datalen) } } - type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" : + type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" : (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" : (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" : "uint_least64_t"; @@ -418,21 +431,21 @@ properties_print_lookup_table(char *name, size_t *data, size_t datalen) } else { printf(",\n\t"); } - } printf("};\n"); } void -properties_print_derived_lookup_table(char *name, char *type, size_t *offset, size_t offsetlen, - int_least64_t (*get_value)(const struct properties *, - size_t), const void *payload) +properties_print_derived_lookup_table( + char *name, char *type, size_t *offset, size_t offsetlen, + int_least64_t (*get_value)(const struct properties *, size_t), + const void *payload) { size_t i; printf("static const %s %s[] = {\n\t", type, name); for (i = 0; i < offsetlen; i++) { - printf("%"PRIiLEAST64, get_value(payload, offset[i])); + printf("%" PRIiLEAST64, get_value(payload, offset[i])); if (i + 1 == offsetlen) { printf("\n"); } else if ((i + 1) % 8 != 0) { @@ -440,7 +453,6 @@ properties_print_derived_lookup_table(char *name, char *type, size_t *offset, si } else { printf(",\n\t"); } - } printf("};\n"); } @@ -464,17 +476,19 @@ set_value_bp(struct properties_payload *payload, uint_least32_t cp, { if (payload->prop[cp].property != payload->speclen) { if (payload->handle_conflict == NULL) { - fprintf(stderr, "set_value_bp: " - "Unhandled character break property " + fprintf(stderr, + "set_value_bp: " + "Unhandled character break property " "overwrite for 0x%06X (%s <- %s).\n", - cp, payload->spec[payload->prop[cp]. - property].enumname, + cp, + payload->spec[payload->prop[cp].property] + .enumname, payload->spec[value].enumname); return 1; } else { - value = payload->handle_conflict(cp, - (uint_least8_t)payload->prop[cp].property, - (uint_least8_t)value); + value = payload->handle_conflict( + cp, (uint_least8_t)payload->prop[cp].property, + (uint_least8_t)value); } } payload->prop[cp].property = value; @@ -489,15 +503,13 @@ get_value_bp(const struct properties *prop, size_t offset) } void -properties_generate_break_property(const struct property_spec *spec, - uint_least8_t speclen, - uint_least8_t (*fill_missing)( - uint_least32_t), - uint_least8_t (*handle_conflict)( - uint_least32_t, uint_least8_t, - uint_least8_t), void - (*post_process)(struct properties *), - const char *prefix, const char *argv0) +properties_generate_break_property( + const struct property_spec *spec, uint_least8_t speclen, + uint_least8_t (*fill_missing)(uint_least32_t), + uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, + uint_least8_t), + void (*post_process)(struct properties *), const char *prefix, + const char *argv0) { struct properties_compressed comp; struct properties_major_minor mm; @@ -537,8 +549,7 @@ properties_generate_break_property(const struct property_spec *spec, if (i == j && spec[i].file) { /* file has not been processed yet */ parse_file_with_callback(spec[i].file, - properties_callback, - &payload); + properties_callback, &payload); } } @@ -546,7 +557,8 @@ properties_generate_break_property(const struct property_spec *spec, for (i = 0; i < UINT32_C(0x110000); i++) { if (payload.prop[i].property == speclen) { if (fill_missing != NULL) { - payload.prop[i].property = fill_missing((uint_least32_t)i); + payload.prop[i].property = + fill_missing((uint_least32_t)i); } else { payload.prop[i].property = 0; } @@ -559,14 +571,16 @@ properties_generate_break_property(const struct property_spec *spec, } /* compress data */ - printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n", argv0); + printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n", + argv0); properties_compress(prop, &comp); - fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0, - prefix, properties_get_major_minor(&comp, &mm)); + fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0, prefix, + properties_get_major_minor(&comp, &mm)); /* prepare names */ - if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >= LEN(buf1)) { + if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >= + LEN(buf1)) { fprintf(stderr, "snprintf: String truncated.\n"); exit(1); } @@ -578,9 +592,12 @@ properties_generate_break_property(const struct property_spec *spec, prefix_uc[i] = (char)toupper(prefix[i]); } prefix_uc[prefixlen] = '\0'; - if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >= LEN(buf2) || - (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >= LEN(buf3) || - (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >= LEN(buf4)) { + if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >= + LEN(buf2) || + (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >= + LEN(buf3) || + (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >= + LEN(buf4)) { fprintf(stderr, "snprintf: String truncated.\n"); exit(1); } @@ -589,8 +606,9 @@ properties_generate_break_property(const struct property_spec *spec, properties_print_enum(spec, speclen, buf1, buf2); properties_print_lookup_table(buf3, mm.major, 0x1100); printf("\n"); - properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor, mm.minorlen, - get_value_bp, comp.data); + properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor, + mm.minorlen, get_value_bp, + comp.data); /* free data */ free(prop); @@ -625,42 +643,50 @@ break_test_callback(const char *fname, char **field, size_t nfields, memset(t, 0, sizeof(*t)); /* parse testcase "<÷|×> <cp> <÷|×> ... <cp> <÷|×>" */ - for (token = strtok(field[0], " "), i = 0; token != NULL; i++, - token = strtok(NULL, " ")) { + for (token = strtok(field[0], " "), i = 0; token != NULL; + i++, token = strtok(NULL, " ")) { if (i % 2 == 0) { /* delimiter or start of sequence */ - if (i == 0 || !strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */ + if (i == 0 || + !strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */ /* * '÷' indicates a breakpoint, * the current length is done; allocate * a new length field and set it to 0 */ - if ((t->len = realloc(t->len, - ++t->lenlen * sizeof(*t->len))) == NULL) { - fprintf(stderr, "break_test_" + if ((t->len = realloc( + t->len, + ++t->lenlen * sizeof(*t->len))) == + NULL) { + fprintf(stderr, + "break_test_" "callback: realloc: %s.\n", strerror(errno)); return 1; } t->len[t->lenlen - 1] = 0; } else if (!strncmp(token, "\xC3\x97", 2)) { /* UTF-8 */ - /* - * '×' indicates a non-breakpoint, do nothing - */ + /* '×' indicates a non-breakpoint, do nothing */ } else { - fprintf(stderr, "break_test_callback: " - "Malformed delimiter '%s'.\n", token); + fprintf(stderr, + "break_test_callback: " + "Malformed delimiter '%s'.\n", + token); return 1; } } else { /* add codepoint to cp-array */ - if ((t->cp = realloc(t->cp, ++t->cplen * - sizeof(*t->cp))) == NULL) { - fprintf(stderr, "break_test_callback: " - "realloc: %s.\n", strerror(errno)); + if ((t->cp = realloc(t->cp, + ++t->cplen * sizeof(*t->cp))) == + NULL) { + fprintf(stderr, + "break_test_callback: " + "realloc: %s.\n", + strerror(errno)); return 1; } - if (hextocp(token, strlen(token), &t->cp[t->cplen - 1])) { + if (hextocp(token, strlen(token), + &t->cp[t->cplen - 1])) { return 1; } if (t->lenlen > 0) { @@ -688,8 +714,7 @@ break_test_callback(const char *fname, char **field, size_t nfields, } void -break_test_list_parse(char *fname, struct break_test **test, - size_t *testlen) +break_test_list_parse(char *fname, struct break_test **test, size_t *testlen) { struct break_test_payload pl = { .test = test, @@ -703,13 +728,14 @@ break_test_list_parse(char *fname, struct break_test **test, void break_test_list_print(const struct break_test *test, size_t testlen, - const char *identifier, const char *progname) + const char *identifier, const char *progname) { size_t i, j; printf("/* Automatically generated by %s */\n" "#include <stdint.h>\n#include <stddef.h>\n\n" - "#include \"../gen/types.h\"\n\n", progname); + "#include \"../gen/types.h\"\n\n", + progname); printf("static const struct break_test %s[] = {\n", identifier); for (i = 0; i < testlen; i++) { diff --git a/gen/util.h b/gen/util.h @@ -7,7 +7,7 @@ #include "types.h" -#define LEN(x) (sizeof (x) / sizeof *(x)) +#define LEN(x) (sizeof(x) / sizeof *(x)) struct property_spec { const char *enumname; @@ -34,30 +34,31 @@ struct properties_major_minor { int hextocp(const char *, size_t, uint_least32_t *cp); int parse_cp_list(const char *, uint_least32_t **, size_t *); -void parse_file_with_callback(const char *, int (*callback)(const char *, - char **, size_t, char *, void *), void *payload); +void parse_file_with_callback(const char *, + int (*callback)(const char *, char **, size_t, + char *, void *), + void *payload); -void properties_compress(const struct properties *, struct properties_compressed *comp); +void properties_compress(const struct properties *, + struct properties_compressed *comp); double properties_get_major_minor(const struct properties_compressed *, struct properties_major_minor *); void properties_print_lookup_table(char *, size_t *, size_t); -void properties_print_derived_lookup_table(char *, char *, size_t *, size_t, - int_least64_t (*get_value)(const struct properties *, - size_t), const void *); - -void properties_generate_break_property(const struct property_spec *, - uint_least8_t, uint_least8_t - (*fill_missing)(uint_least32_t), - uint_least8_t - (*handle_conflict)(uint_least32_t, - uint_least8_t, uint_least8_t), - void (*post_process) - (struct properties *), - const char *, const char *); +void properties_print_derived_lookup_table( + char *, char *, size_t *, size_t, + int_least64_t (*get_value)(const struct properties *, size_t), + const void *); + +void properties_generate_break_property( + const struct property_spec *, uint_least8_t, + uint_least8_t (*fill_missing)(uint_least32_t), + uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, + uint_least8_t), + void (*post_process)(struct properties *), const char *, const char *); void break_test_list_parse(char *, struct break_test **, size_t *); -void break_test_list_print(const struct break_test *, size_t, - const char *, const char *); +void break_test_list_print(const struct break_test *, size_t, const char *, + const char *); void break_test_list_free(struct break_test *, size_t); #endif /* UTIL_H */ diff --git a/gen/word.c b/gen/word.c @@ -11,108 +11,108 @@ static const struct property_spec word_break_property[] = { { .enumname = "OTHER", - .file = NULL, - .ucdname = NULL, + .file = NULL, + .ucdname = NULL, }, { .enumname = "ALETTER", - .file = FILE_WORD, - .ucdname = "ALetter", + .file = FILE_WORD, + .ucdname = "ALetter", }, { .enumname = "BOTH_ALETTER_EXTPICT", - .file = NULL, - .ucdname = NULL, + .file = NULL, + .ucdname = NULL, }, { .enumname = "CR", - .file = FILE_WORD, - .ucdname = "CR", + .file = FILE_WORD, + .ucdname = "CR", }, { .enumname = "DOUBLE_QUOTE", - .file = FILE_WORD, - .ucdname = "Double_Quote", + .file = FILE_WORD, + .ucdname = "Double_Quote", }, { .enumname = "EXTEND", - .file = FILE_WORD, - .ucdname = "Extend", + .file = FILE_WORD, + .ucdname = "Extend", }, { .enumname = "EXTENDED_PICTOGRAPHIC", - .file = FILE_EMOJI, - .ucdname = "Extended_Pictographic", + .file = FILE_EMOJI, + .ucdname = "Extended_Pictographic", }, { .enumname = "EXTENDNUMLET", - .file = FILE_WORD, - .ucdname = "ExtendNumLet", + .file = FILE_WORD, + .ucdname = "ExtendNumLet", }, { .enumname = "FORMAT", - .file = FILE_WORD, - .ucdname = "Format", + .file = FILE_WORD, + .ucdname = "Format", }, { .enumname = "HEBREW_LETTER", - .file = FILE_WORD, - .ucdname = "Hebrew_Letter", + .file = FILE_WORD, + .ucdname = "Hebrew_Letter", }, { .enumname = "KATAKANA", - .file = FILE_WORD, - .ucdname = "Katakana", + .file = FILE_WORD, + .ucdname = "Katakana", }, { .enumname = "LF", - .file = FILE_WORD, - .ucdname = "LF", + .file = FILE_WORD, + .ucdname = "LF", }, { .enumname = "MIDLETTER", - .file = FILE_WORD, - .ucdname = "MidLetter", + .file = FILE_WORD, + .ucdname = "MidLetter", }, { .enumname = "MIDNUM", - .file = FILE_WORD, - .ucdname = "MidNum", + .file = FILE_WORD, + .ucdname = "MidNum", }, { .enumname = "MIDNUMLET", - .file = FILE_WORD, - .ucdname = "MidNumLet", + .file = FILE_WORD, + .ucdname = "MidNumLet", }, { .enumname = "NEWLINE", - .file = FILE_WORD, - .ucdname = "Newline", + .file = FILE_WORD, + .ucdname = "Newline", }, { .enumname = "NUMERIC", - .file = FILE_WORD, - .ucdname = "Numeric", + .file = FILE_WORD, + .ucdname = "Numeric", }, { .enumname = "REGIONAL_INDICATOR", - .file = FILE_WORD, - .ucdname = "Regional_Indicator", + .file = FILE_WORD, + .ucdname = "Regional_Indicator", }, { .enumname = "SINGLE_QUOTE", - .file = FILE_WORD, - .ucdname = "Single_Quote", + .file = FILE_WORD, + .ucdname = "Single_Quote", }, { .enumname = "WSEGSPACE", - .file = FILE_WORD, - .ucdname = "WSegSpace", + .file = FILE_WORD, + .ucdname = "WSegSpace", }, { .enumname = "ZWJ", - .file = FILE_WORD, - .ucdname = "ZWJ", + .file = FILE_WORD, + .ucdname = "ZWJ", }, }; @@ -124,8 +124,10 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2) (void)cp; if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") && - !strcmp(word_break_property[prop2].enumname, "EXTENDED_PICTOGRAPHIC")) || - (!strcmp(word_break_property[prop1].enumname, "EXTENDED_PICTOGRAPHIC") && + !strcmp(word_break_property[prop2].enumname, + "EXTENDED_PICTOGRAPHIC")) || + (!strcmp(word_break_property[prop1].enumname, + "EXTENDED_PICTOGRAPHIC") && !strcmp(word_break_property[prop2].enumname, "ALETTER"))) { for (result = 0; result < LEN(word_break_property); result++) { if (!strcmp(word_break_property[result].enumname, @@ -150,10 +152,9 @@ main(int argc, char *argv[]) { (void)argc; - properties_generate_break_property(word_break_property, - LEN(word_break_property), NULL, - handle_conflict, NULL, "word_break", - argv[0]); + properties_generate_break_property( + word_break_property, LEN(word_break_property), NULL, + handle_conflict, NULL, "word_break", argv[0]); return 0; } diff --git a/grapheme.h b/grapheme.h @@ -18,14 +18,15 @@ enum grapheme_bidirectional_override { size_t grapheme_decode_utf8(const char *, size_t, uint_least32_t *); size_t grapheme_encode_utf8(uint_least32_t, char *, size_t); -size_t grapheme_get_bidirectional_embedding_levels(const uint_least32_t *, size_t, - enum grapheme_bidirectional_override, - int_least32_t *, size_t); -size_t grapheme_get_bidirectional_embedding_levels_utf8(const char *, size_t, - enum grapheme_bidirectional_override, - int_least32_t *, size_t); +size_t grapheme_get_bidirectional_embedding_levels( + const uint_least32_t *, size_t, enum grapheme_bidirectional_override, + int_least32_t *, size_t); +size_t grapheme_get_bidirectional_embedding_levels_utf8( + const char *, size_t, enum grapheme_bidirectional_override, + int_least32_t *, size_t); -bool grapheme_is_character_break(uint_least32_t, uint_least32_t, uint_least16_t *); +bool grapheme_is_character_break(uint_least32_t, uint_least32_t, + uint_least16_t *); bool grapheme_is_lowercase(const uint_least32_t *, size_t, size_t *); bool grapheme_is_titlecase(const uint_least32_t *, size_t, size_t *); @@ -45,9 +46,12 @@ size_t grapheme_next_line_break_utf8(const char *, size_t); size_t grapheme_next_sentence_break_utf8(const char *, size_t); size_t grapheme_next_word_break_utf8(const char *, size_t); -size_t grapheme_to_lowercase(const uint_least32_t *, size_t, uint_least32_t *, size_t); -size_t grapheme_to_titlecase(const uint_least32_t *, size_t, uint_least32_t *, size_t); -size_t grapheme_to_uppercase(const uint_least32_t *, size_t, uint_least32_t *, size_t); +size_t grapheme_to_lowercase(const uint_least32_t *, size_t, uint_least32_t *, + size_t); +size_t grapheme_to_titlecase(const uint_least32_t *, size_t, uint_least32_t *, + size_t); +size_t grapheme_to_uppercase(const uint_least32_t *, size_t, uint_least32_t *, + size_t); size_t grapheme_to_lowercase_utf8(const char *, size_t, char *, size_t); size_t grapheme_to_titlecase_utf8(const char *, size_t, char *, size_t); diff --git a/src/bidirectional.c b/src/bidirectional.c @@ -12,15 +12,18 @@ struct isolate_runner { int_least32_t *buf; size_t buflen; enum bidi_property prev_prop; + struct { size_t off; enum bidi_property prop; int_least8_t level; } cur; + struct { size_t off; enum bidi_property prop; } next; + uint_least8_t paragraph_level; int_least8_t isolating_run_level; enum bidi_property last_strong_type; @@ -57,24 +60,42 @@ struct state { static inline void state_serialize(const struct state *s, int_least32_t *out) { - *out = (int_least32_t)( - ((((uint_least32_t)(s->paragraph_level)) & 0x01 /* 00000001 */) << 0) | - ((((uint_least32_t)(s->level + 1)) & 0x7F /* 01111111 */) << 1) | - ((((uint_least32_t)(s->prop)) & 0x1F /* 00011111 */) << 8) | - ((((uint_least32_t)(s->bracket - bidi_bracket)) & 0xFF /* 11111111 */) << 13) | - ((((uint_least32_t)(s->visited)) & 0x01 /* 00000001 */) << 21) | - ((((uint_least32_t)(s->rawprop)) & 0x1F /* 00011111 */) << 22)); + *out = (int_least32_t)(((((uint_least32_t)(s->paragraph_level)) & + 0x01 /* 00000001 */) + << 0) | + ((((uint_least32_t)(s->level + 1)) & + 0x7F /* 01111111 */) + << 1) | + ((((uint_least32_t)(s->prop)) & + 0x1F /* 00011111 */) + << 8) | + ((((uint_least32_t)(s->bracket - bidi_bracket)) & + 0xFF /* 11111111 */) + << 13) | + ((((uint_least32_t)(s->visited)) & + 0x01 /* 00000001 */) + << 21) | + ((((uint_least32_t)(s->rawprop)) & + 0x1F /* 00011111 */) + << 22)); } static inline void state_deserialize(int_least32_t in, struct state *s) { - s->paragraph_level = (uint_least8_t)((((uint_least32_t)in) >> 0) & 0x01 /* 00000001 */); - s->level = (int_least8_t)((((uint_least32_t)in) >> 1) & 0x7F /* 01111111 */) - 1; - s->prop = (enum bidi_property)((((uint_least32_t)in) >> 8) & 0x1F /* 00011111 */); - s->bracket = bidi_bracket + (uint_least8_t)((((uint_least32_t)in) >> 13) & 0xFF /* 11111111 */); - s->visited = (bool)((((uint_least32_t)in) >> 21) & 0x01 /* 00000001 */); - s->rawprop = (enum bidi_property)((((uint_least32_t)in) >> 22) & 0x1F /* 00011111 */); + s->paragraph_level = (uint_least8_t)((((uint_least32_t)in) >> 0) & + 0x01 /* 00000001 */); + s->level = (int_least8_t)((((uint_least32_t)in) >> 1) & + 0x7F /* 01111111 */) - + 1; + s->prop = (enum bidi_property)((((uint_least32_t)in) >> 8) & + 0x1F /* 00011111 */); + s->bracket = + bidi_bracket + (uint_least8_t)((((uint_least32_t)in) >> 13) & + 0xFF /* 11111111 */); + s->visited = (bool)((((uint_least32_t)in) >> 21) & 0x01 /* 00000001 */); + s->rawprop = (enum bidi_property)((((uint_least32_t)in) >> 22) & + 0x1F /* 00011111 */); } static void @@ -171,7 +192,6 @@ isolate_runner_advance(struct isolate_runner *ir) return 1; } - /* shift in */ ir->prev_prop = ir->cur.prop; ir->cur.off = ir->next.off; @@ -188,13 +208,13 @@ isolate_runner_advance(struct isolate_runner *ir) * on the first advancement as the prev_prop holds the sos type, * which can only be either R or L, which are both strong types */ - if (ir->prev_prop == BIDI_PROP_R || - ir->prev_prop == BIDI_PROP_L || + if (ir->prev_prop == BIDI_PROP_R || ir->prev_prop == BIDI_PROP_L || ir->prev_prop == BIDI_PROP_AL) { ir->last_strong_type = ir->prev_prop; } - /* initialize next state by going to the next character in the sequence */ + /* initialize next state by going to the next character in the sequence + */ ir->next.off = SIZE_MAX; ir->next.prop = NUM_BIDI_PROPS; @@ -210,8 +230,7 @@ isolate_runner_advance(struct isolate_runner *ir) } /* follow BD8/BD9 and P2 to traverse the current sequence */ - if (s.prop == BIDI_PROP_LRI || - s.prop == BIDI_PROP_RLI || + if (s.prop == BIDI_PROP_LRI || s.prop == BIDI_PROP_RLI || s.prop == BIDI_PROP_FSI) { /* * we encountered an isolate initiator, increment @@ -224,8 +243,7 @@ isolate_runner_advance(struct isolate_runner *ir) if (isolate_level != 1) { continue; } - } else if (s.prop == BIDI_PROP_PDI && - isolate_level > 0) { + } else if (s.prop == BIDI_PROP_PDI && isolate_level > 0) { isolate_level--; /* @@ -250,12 +268,14 @@ isolate_runner_advance(struct isolate_runner *ir) /* we were in the first initializing round */ continue; } else if (s.level == ir->isolating_run_level) { - /* isolate_level-skips have been handled before, we're good */ + /* isolate_level-skips have been handled before, we're + * good */ /* still in the sequence */ ir->next.off = (size_t)i; ir->next.prop = s.prop; } else { - /* out of sequence or isolated, compare levels via eos */ + /* out of sequence or isolated, compare levels via eos + */ if (MAX(last_isolate_level, s.level) % 2 == 0) { ir->next.prop = BIDI_PROP_L; } else { @@ -286,7 +306,8 @@ isolate_runner_advance(struct isolate_runner *ir) } static void -isolate_runner_set_current_prop(struct isolate_runner *ir, enum bidi_property prop) +isolate_runner_set_current_prop(struct isolate_runner *ir, + enum bidi_property prop) { struct state s; @@ -301,9 +322,9 @@ static inline enum bidi_property get_bidi_property(uint_least32_t cp) { if (likely(cp <= 0x10FFFF)) { - return (enum bidi_property) - ((bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) & - 0x1F /* 00011111 */); + return (enum bidi_property)( + (bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) & + 0x1F /* 00011111 */); } else { return BIDI_PROP_L; } @@ -320,8 +341,8 @@ get_bidi_bracket_off(uint_least32_t cp) } static size_t -process_isolating_run_sequence(int_least32_t *buf, size_t buflen, - size_t off, uint_least8_t paragraph_level) +process_isolating_run_sequence(int_least32_t *buf, size_t buflen, size_t off, + uint_least8_t paragraph_level) { enum bidi_property sequence_prop; struct isolate_runner ir, tmp; @@ -335,7 +356,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen, ir.prev_prop == BIDI_PROP_RLI || ir.prev_prop == BIDI_PROP_FSI || ir.prev_prop == BIDI_PROP_PDI) { - isolate_runner_set_current_prop(&ir, BIDI_PROP_ON); + isolate_runner_set_current_prop(&ir, + BIDI_PROP_ON); } else { isolate_runner_set_current_prop(&ir, ir.prev_prop); @@ -371,7 +393,7 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen, } if (ir.prev_prop == BIDI_PROP_AN && - ir.cur.prop == BIDI_PROP_CS && + ir.cur.prop == BIDI_PROP_CS && ir.next.prop == BIDI_PROP_AN) { isolate_runner_set_current_prop(&ir, BIDI_PROP_AN); } @@ -389,14 +411,19 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen, } else if (ir.cur.prop == BIDI_PROP_EN) { /* set the preceding sequence */ if (runsince != SIZE_MAX) { - isolate_runner_init(buf, buflen, runsince, paragraph_level, (runsince > off), &tmp); + isolate_runner_init(buf, buflen, runsince, + paragraph_level, + (runsince > off), &tmp); while (!isolate_runner_advance(&tmp) && tmp.cur.off < ir.cur.off) { - isolate_runner_set_current_prop(&tmp, BIDI_PROP_EN); + isolate_runner_set_current_prop( + &tmp, BIDI_PROP_EN); } runsince = SIZE_MAX; } else { - isolate_runner_init(buf, buflen, ir.cur.off, paragraph_level, (ir.cur.off > off), &tmp); + isolate_runner_init(buf, buflen, ir.cur.off, + paragraph_level, + (ir.cur.off > off), &tmp); isolate_runner_advance(&tmp); } /* follow the succeeding sequence */ @@ -404,7 +431,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen, if (tmp.cur.prop != BIDI_PROP_ET) { break; } - isolate_runner_set_current_prop(&tmp, BIDI_PROP_EN); + isolate_runner_set_current_prop(&tmp, + BIDI_PROP_EN); } } else { /* sequence ended */ @@ -439,23 +467,26 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen, isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); while (!isolate_runner_advance(&ir)) { if (sequence_end == SIZE_MAX) { - if (ir.cur.prop == BIDI_PROP_B || - ir.cur.prop == BIDI_PROP_S || - ir.cur.prop == BIDI_PROP_WS || - ir.cur.prop == BIDI_PROP_ON || + if (ir.cur.prop == BIDI_PROP_B || + ir.cur.prop == BIDI_PROP_S || + ir.cur.prop == BIDI_PROP_WS || + ir.cur.prop == BIDI_PROP_ON || ir.cur.prop == BIDI_PROP_FSI || ir.cur.prop == BIDI_PROP_LRI || ir.cur.prop == BIDI_PROP_RLI || ir.cur.prop == BIDI_PROP_PDI) { - /* the current character is an NI (neutral or isolate) */ + /* the current character is an NI (neutral or + * isolate) */ /* scan ahead to the end of the NI-sequence */ - isolate_runner_init(buf, buflen, ir.cur.off, paragraph_level, (ir.cur.off > off), &tmp); + isolate_runner_init(buf, buflen, ir.cur.off, + paragraph_level, + (ir.cur.off > off), &tmp); while (!isolate_runner_advance(&tmp)) { - if (tmp.next.prop != BIDI_PROP_B && - tmp.next.prop != BIDI_PROP_S && - tmp.next.prop != BIDI_PROP_WS && - tmp.next.prop != BIDI_PROP_ON && + if (tmp.next.prop != BIDI_PROP_B && + tmp.next.prop != BIDI_PROP_S && + tmp.next.prop != BIDI_PROP_WS && + tmp.next.prop != BIDI_PROP_ON && tmp.next.prop != BIDI_PROP_FSI && tmp.next.prop != BIDI_PROP_LRI && tmp.next.prop != BIDI_PROP_RLI && @@ -465,17 +496,17 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen, } /* - * check what follows and see if the text has the - * same direction on both sides + * check what follows and see if the text has + * the same direction on both sides */ if (ir.prev_prop == BIDI_PROP_L && tmp.next.prop == BIDI_PROP_L) { sequence_end = tmp.cur.off; sequence_prop = BIDI_PROP_L; - } else if ((ir.prev_prop == BIDI_PROP_R || + } else if ((ir.prev_prop == BIDI_PROP_R || ir.prev_prop == BIDI_PROP_EN || ir.prev_prop == BIDI_PROP_AN) && - (tmp.next.prop == BIDI_PROP_R || + (tmp.next.prop == BIDI_PROP_R || tmp.next.prop == BIDI_PROP_EN || tmp.next.prop == BIDI_PROP_AN)) { sequence_end = tmp.cur.off; @@ -486,7 +517,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen, if (sequence_end != SIZE_MAX) { if (ir.cur.off <= sequence_end) { - isolate_runner_set_current_prop(&ir, sequence_prop); + isolate_runner_set_current_prop(&ir, + sequence_prop); } else { /* end of sequence, reset */ sequence_end = SIZE_MAX; @@ -498,10 +530,9 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen, /* N2 */ isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir); while (!isolate_runner_advance(&ir)) { - if (ir.cur.prop == BIDI_PROP_B || - ir.cur.prop == BIDI_PROP_S || - ir.cur.prop == BIDI_PROP_WS || - ir.cur.prop == BIDI_PROP_ON || + if (ir.cur.prop == BIDI_PROP_B || ir.cur.prop == BIDI_PROP_S || + ir.cur.prop == BIDI_PROP_WS || + ir.cur.prop == BIDI_PROP_ON || ir.cur.prop == BIDI_PROP_FSI || ir.cur.prop == BIDI_PROP_LRI || ir.cur.prop == BIDI_PROP_RLI || @@ -509,10 +540,12 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen, /* N2 */ if (ir.cur.level % 2 == 0) { /* even embedding level */ - isolate_runner_set_current_prop(&ir, BIDI_PROP_L); + isolate_runner_set_current_prop(&ir, + BIDI_PROP_L); } else { /* odd embedding level */ - isolate_runner_set_current_prop(&ir, BIDI_PROP_R); + isolate_runner_set_current_prop(&ir, + BIDI_PROP_R); } } } @@ -522,8 +555,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen, static uint_least8_t get_paragraph_level(enum grapheme_bidirectional_override override, - bool terminate_on_pdi, - const int_least32_t *buf, size_t buflen) + bool terminate_on_pdi, const int_least32_t *buf, + size_t buflen) { struct state s; int_least8_t isolate_level; @@ -541,8 +574,7 @@ get_paragraph_level(enum grapheme_bidirectional_override override, for (bufoff = 0, isolate_level = 0; bufoff < buflen; bufoff++) { state_deserialize(buf[bufoff], &s); - if (s.prop == BIDI_PROP_PDI && - isolate_level == 0 && + if (s.prop == BIDI_PROP_PDI && isolate_level == 0 && terminate_on_pdi) { /* * we are in a FSI-subsection of a paragraph and @@ -552,8 +584,7 @@ get_paragraph_level(enum grapheme_bidirectional_override override, } /* BD8/BD9 */ - if ((s.prop == BIDI_PROP_LRI || - s.prop == BIDI_PROP_RLI || + if ((s.prop == BIDI_PROP_LRI || s.prop == BIDI_PROP_RLI || s.prop == BIDI_PROP_FSI) && isolate_level < MAX_DEPTH) { /* we hit an isolate initiator, increment counter */ @@ -570,8 +601,7 @@ get_paragraph_level(enum grapheme_bidirectional_override override, /* P3 */ if (s.prop == BIDI_PROP_L) { return 0; - } else if (s.prop == BIDI_PROP_AL || - s.prop == BIDI_PROP_R) { + } else if (s.prop == BIDI_PROP_AL || s.prop == BIDI_PROP_R) { return 1; } } @@ -585,13 +615,15 @@ get_paragraph_embedding_levels(enum grapheme_bidirectional_override override, { enum bidi_property tmp_prop; struct state s, t; + struct { int_least8_t level; enum grapheme_bidirectional_override override; bool directional_isolate; } directional_status[MAX_DEPTH + 2], *dirstat = directional_status; + size_t overflow_isolate_count, overflow_embedding_count, - valid_isolate_count, bufoff, i, runsince; + valid_isolate_count, bufoff, i, runsince; uint_least8_t paragraph_level; paragraph_level = get_paragraph_level(override, false, buf, buflen); @@ -600,7 +632,8 @@ get_paragraph_embedding_levels(enum grapheme_bidirectional_override override, dirstat->level = (int_least8_t)paragraph_level; dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; dirstat->directional_isolate = false; - overflow_isolate_count = overflow_embedding_count = valid_isolate_count = 0; + overflow_isolate_count = overflow_embedding_count = + valid_isolate_count = 0; for (bufoff = 0; bufoff < buflen; bufoff++) { state_deserialize(buf[bufoff], &s); @@ -608,79 +641,105 @@ get_paragraph_embedding_levels(enum grapheme_bidirectional_override override, again: if (tmp_prop == BIDI_PROP_RLE) { /* X2 */ - if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= MAX_DEPTH && + if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= + MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0) { /* valid RLE */ dirstat++; - dirstat->level = (dirstat - 1)->level + ((dirstat - 1)->level % 2 != 0) + 1; - dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; + dirstat->level = + (dirstat - 1)->level + + ((dirstat - 1)->level % 2 != 0) + 1; + dirstat->override = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; dirstat->directional_isolate = false; } else { /* overflow RLE */ - overflow_embedding_count += (overflow_isolate_count == 0); + overflow_embedding_count += + (overflow_isolate_count == 0); } } else if (tmp_prop == BIDI_PROP_LRE) { /* X3 */ - if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= MAX_DEPTH && + if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= + MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0) { /* valid LRE */ dirstat++; - dirstat->level = (dirstat - 1)->level + ((dirstat - 1)->level % 2 == 0) + 1; - dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; + dirstat->level = + (dirstat - 1)->level + + ((dirstat - 1)->level % 2 == 0) + 1; + dirstat->override = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; dirstat->directional_isolate = false; } else { /* overflow LRE */ - overflow_embedding_count += (overflow_isolate_count == 0); + overflow_embedding_count += + (overflow_isolate_count == 0); } } else if (tmp_prop == BIDI_PROP_RLO) { /* X4 */ - if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= MAX_DEPTH && + if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= + MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0) { /* valid RLO */ dirstat++; - dirstat->level = (dirstat - 1)->level + ((dirstat - 1)->level % 2 != 0) + 1; - dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; + dirstat->level = + (dirstat - 1)->level + + ((dirstat - 1)->level % 2 != 0) + 1; + dirstat->override = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL; dirstat->directional_isolate = false; } else { /* overflow RLO */ - overflow_embedding_count += (overflow_isolate_count == 0); + overflow_embedding_count += + (overflow_isolate_count == 0); } } else if (tmp_prop == BIDI_PROP_LRO) { /* X5 */ - if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= MAX_DEPTH && + if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= + MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0) { /* valid LRE */ dirstat++; - dirstat->level = (dirstat - 1)->level + ((dirstat - 1)->level % 2 == 0) + 1; - dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR; + dirstat->level = + (dirstat - 1)->level + + ((dirstat - 1)->level % 2 == 0) + 1; + dirstat->override = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR; dirstat->directional_isolate = false; } else { /* overflow LRO */ - overflow_embedding_count += (overflow_isolate_count == 0); + overflow_embedding_count += + (overflow_isolate_count == 0); } } else if (tmp_prop == BIDI_PROP_RLI) { /* X5a */ s.level = dirstat->level; - if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { + if (dirstat->override == + GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { s.prop = BIDI_PROP_L; - } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { + } else if (dirstat->override == + GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { s.prop = BIDI_PROP_R; } state_serialize(&s, &(buf[bufoff])); - if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= MAX_DEPTH && + if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= + MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0) { /* valid RLI */ valid_isolate_count++; dirstat++; - dirstat->level = (dirstat - 1)->level + ((dirstat - 1)->level % 2 != 0) + 1; - dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; + dirstat->level = + (dirstat - 1)->level + + ((dirstat - 1)->level % 2 != 0) + 1; + dirstat->override = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; dirstat->directional_isolate = true; } else { /* overflow RLI */ @@ -689,22 +748,28 @@ again: } else if (tmp_prop == BIDI_PROP_LRI) { /* X5b */ s.level = dirstat->level; - if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { + if (dirstat->override == + GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { s.prop = BIDI_PROP_L; - } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { + } else if (dirstat->override == + GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { s.prop = BIDI_PROP_R; } state_serialize(&s, &(buf[bufoff])); - if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= MAX_DEPTH && + if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= + MAX_DEPTH && overflow_isolate_count == 0 && overflow_embedding_count == 0) { /* valid LRI */ valid_isolate_count++; dirstat++; - dirstat->level = (dirstat - 1)->level + ((dirstat - 1)->level % 2 == 0) + 1; - dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; + dirstat->level = + (dirstat - 1)->level + + ((dirstat - 1)->level % 2 == 0) + 1; + dirstat->override = + GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL; dirstat->directional_isolate = true; } else { /* overflow LRI */ @@ -712,23 +777,27 @@ again: } } else if (tmp_prop == BIDI_PROP_FSI) { /* X5c */ - if (get_paragraph_level(GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL, true, - buf + (bufoff + 1), buflen - (bufoff + 1)) == 1) { + if (get_paragraph_level( + GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL, + true, buf + (bufoff + 1), + buflen - (bufoff + 1)) == 1) { tmp_prop = BIDI_PROP_RLI; goto again; } else { /* ... == 0 */ tmp_prop = BIDI_PROP_LRI; goto again; } - } else if (tmp_prop != BIDI_PROP_B && - tmp_prop != BIDI_PROP_BN && + } else if (tmp_prop != BIDI_PROP_B && + tmp_prop != BIDI_PROP_BN && tmp_prop != BIDI_PROP_PDF && tmp_prop != BIDI_PROP_PDI) { /* X6 */ s.level = dirstat->level; - if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { + if (dirstat->override == + GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { s.prop = BIDI_PROP_L; - } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { + } else if (dirstat->override == + GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { s.prop = BIDI_PROP_R; } state_serialize(&s, &(buf[bufoff])); @@ -773,9 +842,11 @@ again: } s.level = dirstat->level; - if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { + if (dirstat->override == + GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { s.prop = BIDI_PROP_L; - } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { + } else if (dirstat->override == + GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { s.prop = BIDI_PROP_R; } state_serialize(&s, &(buf[bufoff])); @@ -796,12 +867,9 @@ again: } /* X9 */ - if (tmp_prop == BIDI_PROP_RLE || - tmp_prop == BIDI_PROP_LRE || - tmp_prop == BIDI_PROP_RLO || - tmp_prop == BIDI_PROP_LRO || - tmp_prop == BIDI_PROP_PDF || - tmp_prop == BIDI_PROP_BN) { + if (tmp_prop == BIDI_PROP_RLE || tmp_prop == BIDI_PROP_LRE || + tmp_prop == BIDI_PROP_RLO || tmp_prop == BIDI_PROP_LRO || + tmp_prop == BIDI_PROP_PDF || tmp_prop == BIDI_PROP_BN) { s.level = -1; state_serialize(&s, &(buf[bufoff])); } @@ -811,8 +879,8 @@ again: for (bufoff = 0; bufoff < buflen; bufoff++) { state_deserialize(buf[bufoff], &s); if (!s.visited && s.level != -1) { - bufoff += process_isolating_run_sequence(buf, buflen, bufoff, - paragraph_level); + bufoff += process_isolating_run_sequence( + buf, buflen, bufoff, paragraph_level); } } @@ -823,7 +891,7 @@ again: for (bufoff = 0; bufoff < buflen; bufoff++) { state_deserialize(buf[bufoff], &s); - if (s.level % 2 == 0 ) { + if (s.level % 2 == 0) { /* even level */ if (s.prop == BIDI_PROP_R) { s.level += 1; @@ -833,8 +901,7 @@ again: } } else { /* odd level */ - if (s.prop == BIDI_PROP_L || - s.prop == BIDI_PROP_EN || + if (s.prop == BIDI_PROP_L || s.prop == BIDI_PROP_EN || s.prop == BIDI_PROP_AN) { s.level += 1; } @@ -853,10 +920,8 @@ again: continue; } - if (s.rawprop == BIDI_PROP_WS || - s.rawprop == BIDI_PROP_FSI || - s.rawprop == BIDI_PROP_LRI || - s.rawprop == BIDI_PROP_RLI || + if (s.rawprop == BIDI_PROP_WS || s.rawprop == BIDI_PROP_FSI || + s.rawprop == BIDI_PROP_LRI || s.rawprop == BIDI_PROP_RLI || s.rawprop == BIDI_PROP_PDI) { if (runsince == SIZE_MAX) { /* a new run has begun */ @@ -878,8 +943,7 @@ again: runsince = SIZE_MAX; } - if (s.rawprop == BIDI_PROP_S || - s.rawprop == BIDI_PROP_B) { + if (s.rawprop == BIDI_PROP_S || s.rawprop == BIDI_PROP_B) { s.level = (int_least8_t)paragraph_level; state_serialize(&s, &(buf[bufoff])); } @@ -902,7 +966,8 @@ again: } static size_t -get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bidirectional_override override, +get_embedding_levels(HERODOTUS_READER *r, + enum grapheme_bidirectional_override override, int_least32_t *buf, size_t buflen) { struct state s; @@ -911,8 +976,9 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bidirectional_override o if (buf == NULL) { for (; herodotus_read_codepoint(r, true, &cp) == - HERODOTUS_STATUS_SUCCESS;) + HERODOTUS_STATUS_SUCCESS;) { ; + } /* see below for return value reasoning */ return herodotus_reader_number_read(r); @@ -922,8 +988,9 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bidirectional_override o * the first step is to determine the bidirectional properties * and store them in the buffer */ - for (bufoff = 0; herodotus_read_codepoint(r, true, &cp) == - HERODOTUS_STATUS_SUCCESS; bufoff++) { + for (bufoff = 0; + herodotus_read_codepoint(r, true, &cp) == HERODOTUS_STATUS_SUCCESS; + bufoff++) { if (bufoff < buflen) { /* * actually only do something when we have @@ -974,9 +1041,10 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bidirectional_override o } size_t -grapheme_get_bidirectional_embedding_levels(const uint_least32_t *src, size_t srclen, - enum grapheme_bidirectional_override override, - int_least32_t *dest, size_t destlen) +grapheme_get_bidirectional_embedding_levels( + const uint_least32_t *src, size_t srclen, + enum grapheme_bidirectional_override override, int_least32_t *dest, + size_t destlen) { HERODOTUS_READER r; @@ -986,9 +1054,10 @@ grapheme_get_bidirectional_embedding_levels(const uint_least32_t *src, size_t sr } size_t -grapheme_get_bidirectional_embedding_levels_utf8(const char *src, size_t srclen, - enum grapheme_bidirectional_override override, - int_least32_t *dest, size_t destlen) +grapheme_get_bidirectional_embedding_levels_utf8( + const char *src, size_t srclen, + enum grapheme_bidirectional_override override, int_least32_t *dest, + size_t destlen) { HERODOTUS_READER r; diff --git a/src/case.c b/src/case.c @@ -2,8 +2,8 @@ #include <stddef.h> #include <stdint.h> -#include "../grapheme.h" #include "../gen/case.h" +#include "../grapheme.h" #include "util.h" static inline enum case_property @@ -11,7 +11,7 @@ get_case_property(uint_least32_t cp) { if (likely(cp <= UINT32_C(0x10FFFF))) { return (enum case_property) - case_minor[case_major[cp >> 8] + (cp & 0xFF)]; + case_minor[case_major[cp >> 8] + (cp & 0xFF)]; } else { return CASE_PROP_OTHER; } @@ -45,58 +45,64 @@ to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w, uint_least32_t cp, tmp_cp; int_least32_t map; - for (; herodotus_read_codepoint(r, true, &cp) == HERODOTUS_STATUS_SUCCESS;) { + for (; herodotus_read_codepoint(r, true, &cp) == + HERODOTUS_STATUS_SUCCESS;) { if (sc == lower_special) { /* - * For the special Final_Sigma-rule (see SpecialCasing.txt), - * which is the only non-localized case-dependent rule, - * we apply a different mapping when a sigma is at the - * end of a word. + * For the special Final_Sigma-rule (see + * SpecialCasing.txt), which is the only non-localized + * case-dependent rule, we apply a different mapping + * when a sigma is at the end of a word. * * Before: cased case-ignorable* * After: not(case-ignorable* cased) * - * We check the after-condition on demand, but the before- - * condition is best checked using the "level"-heuristic - * also used in the sentence and line breaking-implementations. + * We check the after-condition on demand, but the + * before- condition is best checked using the + * "level"-heuristic also used in the sentence and line + * breaking-implementations. */ - if (cp == UINT32_C(0x03A3) && /* GREEK CAPITAL LETTER SIGMA */ + if (cp == UINT32_C(0x03A3) && /* GREEK CAPITAL LETTER + SIGMA */ (final_sigma_level == 1 || final_sigma_level == 2)) { /* * check succeeding characters by first skipping - * all case-ignorable characters and then checking - * if the succeeding character is cased, invalidating - * the after-condition + * all case-ignorable characters and then + * checking if the succeeding character is + * cased, invalidating the after-condition */ herodotus_reader_copy(r, &tmp); for (prop = NUM_CASE_PROPS; - (s = herodotus_read_codepoint(&tmp, true, &tmp_cp)) == - HERODOTUS_STATUS_SUCCESS; ) { + (s = herodotus_read_codepoint(&tmp, true, + &tmp_cp)) == + HERODOTUS_STATUS_SUCCESS;) { prop = get_case_property(tmp_cp); if (prop != CASE_PROP_CASE_IGNORABLE && prop != CASE_PROP_BOTH_CASED_CASE_IGNORABLE) { - break; + break; } } /* - * Now prop is something other than case-ignorable or - * the source-string ended. - * If it is something other than cased, we know + * Now prop is something other than + * case-ignorable or the source-string ended. If + * it is something other than cased, we know * that the after-condition holds */ if (s != HERODOTUS_STATUS_SUCCESS || (prop != CASE_PROP_CASED && prop != CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) { /* - * write GREEK SMALL LETTER FINAL SIGMA to - * destination + * write GREEK SMALL LETTER FINAL SIGMA + * to destination + */ + herodotus_write_codepoint( + w, UINT32_C(0x03C2)); + + /* reset Final_Sigma-state and continue */ - herodotus_write_codepoint(w, UINT32_C(0x03C2)); - - /* reset Final_Sigma-state and continue */ final_sigma_level = 0; continue; } @@ -110,11 +116,13 @@ to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w, prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) { /* sequence has begun */ final_sigma_level = 1; - } else if ((final_sigma_level == 1 || - final_sigma_level == 2) && - (prop == CASE_PROP_CASE_IGNORABLE || - prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) { - /* case-ignorable sequence begins or continued */ + } else if ( + (final_sigma_level == 1 || + final_sigma_level == 2) && + (prop == CASE_PROP_CASE_IGNORABLE || + prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) { + /* case-ignorable sequence begins or continued + */ final_sigma_level = 2; } else { /* sequence broke */ @@ -134,8 +142,8 @@ to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w, } } else { /* we have a simple mapping */ - herodotus_write_codepoint(w, (uint_least32_t) - ((int_least32_t)cp + map)); + herodotus_write_codepoint( + w, (uint_least32_t)((int_least32_t)cp + map)); } } @@ -168,14 +176,16 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w) for (; (nwb = herodotus_next_word_break(r)) > 0;) { herodotus_reader_push_advance_limit(r, nwb); - for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODOTUS_STATUS_SUCCESS;) { + for (; (s = herodotus_read_codepoint(r, false, &cp)) == + HERODOTUS_STATUS_SUCCESS;) { /* check if we have a cased character */ prop = get_case_property(cp); if (prop == CASE_PROP_CASED || prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE) { break; } else { - /* write the data to the output verbatim, it if permits */ + /* write the data to the output verbatim, it if + * permits */ herodotus_write_codepoint(w, cp); /* increment reader */ @@ -199,9 +209,10 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w) * we encountered a cased character before the word * break, convert it to titlecase */ - herodotus_reader_push_advance_limit(r, - herodotus_reader_next_codepoint_break(r)); - to_case(r, w, 0, title_major, title_minor, title_special); + herodotus_reader_push_advance_limit( + r, herodotus_reader_next_codepoint_break(r)); + to_case(r, w, 0, title_major, title_minor, + title_special); herodotus_reader_pop_limit(r); } @@ -218,7 +229,8 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w) } size_t -grapheme_to_uppercase(const uint_least32_t *src, size_t srclen, uint_least32_t *dest, size_t destlen) +grapheme_to_uppercase(const uint_least32_t *src, size_t srclen, + uint_least32_t *dest, size_t destlen) { HERODOTUS_READER r; HERODOTUS_WRITER w; @@ -230,7 +242,8 @@ grapheme_to_uppercase(const uint_least32_t *src, size_t srclen, uint_least32_t * } size_t -grapheme_to_lowercase(const uint_least32_t *src, size_t srclen, uint_least32_t *dest, size_t destlen) +grapheme_to_lowercase(const uint_least32_t *src, size_t srclen, + uint_least32_t *dest, size_t destlen) { HERODOTUS_READER r; HERODOTUS_WRITER w; @@ -242,7 +255,8 @@ grapheme_to_lowercase(const uint_least32_t *src, size_t srclen, uint_least32_t * } size_t -grapheme_to_titlecase(const uint_least32_t *src, size_t srclen, uint_least32_t *dest, size_t destlen) +grapheme_to_titlecase(const uint_least32_t *src, size_t srclen, + uint_least32_t *dest, size_t destlen) { HERODOTUS_READER r; HERODOTUS_WRITER w; @@ -254,7 +268,8 @@ grapheme_to_titlecase(const uint_least32_t *src, size_t srclen, uint_least32_t * } size_t -grapheme_to_uppercase_utf8(const char *src, size_t srclen, char *dest, size_t destlen) +grapheme_to_uppercase_utf8(const char *src, size_t srclen, char *dest, + size_t destlen) { HERODOTUS_READER r; HERODOTUS_WRITER w; @@ -266,7 +281,8 @@ grapheme_to_uppercase_utf8(const char *src, size_t srclen, char *dest, size_t de } size_t -grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest, size_t destlen) +grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest, + size_t destlen) { HERODOTUS_READER r; HERODOTUS_WRITER w; @@ -278,7 +294,8 @@ grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest, size_t de } size_t -grapheme_to_titlecase_utf8(const char *src, size_t srclen, char *dest, size_t destlen) +grapheme_to_titlecase_utf8(const char *src, size_t srclen, char *dest, + size_t destlen) { HERODOTUS_READER r; HERODOTUS_WRITER w; @@ -299,7 +316,8 @@ is_case(HERODOTUS_READER *r, const uint_least16_t *major, uint_least32_t cp; int_least32_t map; - for (; herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCESS;) { + for (; herodotus_read_codepoint(r, false, &cp) == + HERODOTUS_STATUS_SUCCESS;) { /* get and handle case mapping */ if (unlikely((map = get_case_offset(cp, major, minor)) >= INT32_C(0x110000))) { @@ -315,7 +333,8 @@ is_case(HERODOTUS_READER *r, const uint_least16_t *major, goto done; } else { /* move forward */ - herodotus_read_codepoint(r, true, &cp); + herodotus_read_codepoint( + r, true, &cp); } } else { /* @@ -357,7 +376,8 @@ is_titlecase(HERODOTUS_READER *r, size_t *output) for (; (nwb = herodotus_next_word_break(r)) > 0;) { herodotus_reader_push_advance_limit(r, nwb); - for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODOTUS_STATUS_SUCCESS;) { + for (; (s = herodotus_read_codepoint(r, false, &cp)) == + HERODOTUS_STATUS_SUCCESS;) { /* check if we have a cased character */ prop = get_case_property(cp); if (prop == CASE_PROP_CASED || @@ -384,17 +404,20 @@ is_titlecase(HERODOTUS_READER *r, size_t *output) * we encountered a cased character before the word * break, check if it's titlecase */ - herodotus_reader_push_advance_limit(r, - herodotus_reader_next_codepoint_break(r)); - if (!is_case(r, title_major, title_minor, title_special, NULL)) { + herodotus_reader_push_advance_limit( + r, herodotus_reader_next_codepoint_break(r)); + if (!is_case(r, title_major, title_minor, title_special, + NULL)) { ret = false; goto done; } herodotus_reader_pop_limit(r); } - /* check if the rest of the codepoints in the word are lowercase */ - if (!is_case(r, lower_major, lower_minor, lower_special, NULL)) { + /* check if the rest of the codepoints in the word are lowercase + */ + if (!is_case(r, lower_major, lower_minor, lower_special, + NULL)) { ret = false; goto done; } diff --git a/src/character.c b/src/character.c @@ -16,83 +16,80 @@ struct character_break_state { static const uint_least16_t dont_break[NUM_CHAR_BREAK_PROPS] = { [CHAR_BREAK_PROP_OTHER] = - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ - [CHAR_BREAK_PROP_CR] = - UINT16_C(1) << CHAR_BREAK_PROP_LF, /* GB3 */ + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ + [CHAR_BREAK_PROP_CR] = UINT16_C(1) << CHAR_BREAK_PROP_LF, /* GB3 */ [CHAR_BREAK_PROP_EXTEND] = - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ [CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] = - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ [CHAR_BREAK_PROP_HANGUL_L] = - UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L | /* GB6 */ - UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB6 */ - UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV | /* GB6 */ - UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT | /* GB6 */ - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ + UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L | /* GB6 */ + UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB6 */ + UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV | /* GB6 */ + UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT | /* GB6 */ + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ [CHAR_BREAK_PROP_HANGUL_V] = - UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */ - UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */ - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ + UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */ + UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */ + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ [CHAR_BREAK_PROP_HANGUL_T] = - UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */ - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ + UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */ + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ [CHAR_BREAK_PROP_HANGUL_LV] = - UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */ - UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */ - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ + UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */ + UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */ + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ [CHAR_BREAK_PROP_HANGUL_LVT] = - UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */ - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ + UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */ + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ [CHAR_BREAK_PROP_PREPEND] = - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK | /* GB9a */ + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK | /* GB9a */ (UINT16_C(0xFFFF) & - ~(UINT16_C(1) << CHAR_BREAK_PROP_CR | - UINT16_C(1) << CHAR_BREAK_PROP_LF | - UINT16_C(1) << CHAR_BREAK_PROP_CONTROL - ) - ), /* GB9b */ + ~(UINT16_C(1) << CHAR_BREAK_PROP_CR | + UINT16_C(1) << CHAR_BREAK_PROP_LF | + UINT16_C(1) << CHAR_BREAK_PROP_CONTROL)), /* GB9b */ [CHAR_BREAK_PROP_REGIONAL_INDICATOR] = - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ [CHAR_BREAK_PROP_SPACINGMARK] = - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ [CHAR_BREAK_PROP_ZWJ] = - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ - UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */ + UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */ }; static const uint_least16_t flag_update_gb11[2 * NUM_CHAR_BREAK_PROPS] = { [CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] = - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | UINT16_C(1) << CHAR_BREAK_PROP_EXTEND, [CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] = UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC, [CHAR_BREAK_PROP_EXTEND + NUM_CHAR_BREAK_PROPS] = - UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | + UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | UINT16_C(1) << CHAR_BREAK_PROP_ZWJ, [CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC + NUM_CHAR_BREAK_PROPS] = - UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | + UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | UINT16_C(1) << CHAR_BREAK_PROP_EXTEND, }; static const uint_least16_t dont_break_gb11[2 * NUM_CHAR_BREAK_PROPS] = { @@ -113,7 +110,8 @@ get_break_prop(uint_least32_t cp) { if (likely(cp <= UINT32_C(0x10FFFF))) { return (enum char_break_property) - char_break_minor[char_break_major[cp >> 8] + (cp & 0xFF)]; + char_break_minor[char_break_major[cp >> 8] + + (cp & 0xFF)]; } else { return CHAR_BREAK_PROP_OTHER; } @@ -122,23 +120,27 @@ get_break_prop(uint_least32_t cp) static inline void state_serialize(const struct character_break_state *in, uint_least16_t *out) { - *out = (uint_least16_t)(in->prop & UINT8_C(0xFF)) | /* first 8 bits */ - (uint_least16_t)(((uint_least16_t)(in->prop_set)) << 8) | /* 9th bit */ - (uint_least16_t)(((uint_least16_t)(in->gb11_flag)) << 9) | /* 10th bit */ - (uint_least16_t)(((uint_least16_t)(in->gb12_13_flag)) << 10); /* 11th bit */ + *out = (uint_least16_t)(in->prop & UINT8_C(0xFF)) | /* first 8 bits */ + (uint_least16_t)(((uint_least16_t)(in->prop_set)) + << 8) | /* 9th bit */ + (uint_least16_t)(((uint_least16_t)(in->gb11_flag)) + << 9) | /* 10th bit */ + (uint_least16_t)(((uint_least16_t)(in->gb12_13_flag)) + << 10); /* 11th bit */ } static inline void state_deserialize(uint_least16_t in, struct character_break_state *out) { - out->prop = in & UINT8_C(0xFF); - out->prop_set = in & (UINT16_C(1) << 8); - out->gb11_flag = in & (UINT16_C(1) << 9); + out->prop = in & UINT8_C(0xFF); + out->prop_set = in & (UINT16_C(1) << 8); + out->gb11_flag = in & (UINT16_C(1) << 9); out->gb12_13_flag = in & (UINT16_C(1) << 10); } bool -grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, uint_least16_t *s) +grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, + uint_least16_t *s) { struct character_break_state state; enum char_break_property cp0_prop, cp1_prop; @@ -161,23 +163,26 @@ grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, uint_least16 /* update flags */ state.gb11_flag = flag_update_gb11[cp0_prop + NUM_CHAR_BREAK_PROPS * - state.gb11_flag] & + state.gb11_flag] & UINT16_C(1) << cp1_prop; state.gb12_13_flag = - flag_update_gb12_13[cp0_prop + NUM_CHAR_BREAK_PROPS * - state.gb12_13_flag] & - UINT16_C(1) << cp1_prop; + flag_update_gb12_13[cp0_prop + + NUM_CHAR_BREAK_PROPS * + state.gb12_13_flag] & + UINT16_C(1) << cp1_prop; /* * Apply grapheme cluster breaking algorithm (UAX #29), see * http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules */ notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) || - (dont_break_gb11[cp0_prop + state.gb11_flag * - NUM_CHAR_BREAK_PROPS] & + (dont_break_gb11[cp0_prop + + state.gb11_flag * + NUM_CHAR_BREAK_PROPS] & (UINT16_C(1) << cp1_prop)) || - (dont_break_gb12_13[cp0_prop + state.gb12_13_flag * - NUM_CHAR_BREAK_PROPS] & + (dont_break_gb12_13[cp0_prop + + state.gb12_13_flag * + NUM_CHAR_BREAK_PROPS] & (UINT16_C(1) << cp1_prop)); /* update or reset flags (when we have a break) */ @@ -198,8 +203,10 @@ grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, uint_least16 * were all set to false */ notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) || - (dont_break_gb11[cp0_prop] & (UINT16_C(1) << cp1_prop)) || - (dont_break_gb12_13[cp0_prop] & (UINT16_C(1) << cp1_prop)); + (dont_break_gb11[cp0_prop] & + (UINT16_C(1) << cp1_prop)) || + (dont_break_gb12_13[cp0_prop] & + (UINT16_C(1) << cp1_prop)); } return !notbreak; @@ -212,7 +219,8 @@ next_character_break(HERODOTUS_READER *r) uint_least32_t cp0 = 0, cp1 = 0; for (herodotus_read_codepoint(r, true, &cp0); - herodotus_read_codepoint(r, false, &cp1) == HERODOTUS_STATUS_SUCCESS; + herodotus_read_codepoint(r, false, &cp1) == + HERODOTUS_STATUS_SUCCESS; herodotus_read_codepoint(r, true, &cp0)) { if (grapheme_is_character_break(cp0, cp1, &state)) { break; diff --git a/src/line.c b/src/line.c @@ -11,7 +11,8 @@ get_break_prop(uint_least32_t cp) { if (likely(cp <= UINT32_C(0x10FFFF))) { return (enum line_break_property) - line_break_minor[line_break_major[cp >> 8] + (cp & 0xff)]; + line_break_minor[line_break_major[cp >> 8] + + (cp & 0xff)]; } else { return LINE_BREAK_PROP_AL; } @@ -22,7 +23,7 @@ next_line_break(HERODOTUS_READER *r) { HERODOTUS_READER tmp; enum line_break_property cp0_prop, cp1_prop, last_non_cm_or_zwj_prop, - last_non_sp_prop, last_non_sp_cm_or_zwj_prop; + last_non_sp_prop, last_non_sp_cm_or_zwj_prop; uint_least32_t cp; uint_least8_t lb25_level = 0; bool lb21a_flag = false, ri_even = true; @@ -43,8 +44,10 @@ next_line_break(HERODOTUS_READER *r) last_non_cm_or_zwj_prop = LINE_BREAK_PROP_AL; /* according to LB10 */ last_non_sp_prop = last_non_sp_cm_or_zwj_prop = NUM_LINE_BREAK_PROPS; - for (herodotus_read_codepoint(r, true, &cp), cp0_prop = get_break_prop(cp); - herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCESS; + for (herodotus_read_codepoint(r, true, &cp), + cp0_prop = get_break_prop(cp); + herodotus_read_codepoint(r, false, &cp) == + HERODOTUS_STATUS_SUCCESS; herodotus_read_codepoint(r, true, &cp), cp0_prop = cp1_prop) { /* get property of the right codepoint */ cp1_prop = get_break_prop(cp); @@ -59,10 +62,11 @@ next_line_break(HERODOTUS_READER *r) cp0_prop != LINE_BREAK_PROP_ZWJ) { /* * check if the property we are overwriting now is an - * HL. If so, we set the LB21a-flag which depends on this - * knowledge. + * HL. If so, we set the LB21a-flag which depends on + * this knowledge. */ - lb21a_flag = (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL); + lb21a_flag = + (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL); /* check regional indicator state */ if (cp0_prop == LINE_BREAK_PROP_RI) { @@ -109,8 +113,7 @@ next_line_break(HERODOTUS_READER *r) * and one (CL | CP) to the left of the middle * spot */ - if ((lb25_level == 0 || - lb25_level == 1) && + if ((lb25_level == 0 || lb25_level == 1) && cp0_prop == LINE_BREAK_PROP_NU) { /* sequence has begun */ lb25_level = 1; @@ -118,12 +121,15 @@ next_line_break(HERODOTUS_READER *r) (cp0_prop == LINE_BREAK_PROP_NU || cp0_prop == LINE_BREAK_PROP_SY || cp0_prop == LINE_BREAK_PROP_IS)) { - /* (NU | SY | IS) sequence begins or continued */ + /* (NU | SY | IS) sequence begins or continued + */ lb25_level = 2; - } else if ((lb25_level == 1 || lb25_level == 2) && - (cp0_prop == LINE_BREAK_PROP_CL || - cp0_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF || - cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) { + } else if ( + (lb25_level == 1 || lb25_level == 2) && + (cp0_prop == LINE_BREAK_PROP_CL || + cp0_prop == + LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF || + cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) { /* CL or CP at the end of the sequence */ lb25_level = 3; } else { @@ -229,17 +235,19 @@ next_line_break(HERODOTUS_READER *r) /* LB13 (affected by tailoring for LB25, see example 7) */ if (cp1_prop == LINE_BREAK_PROP_EX || (last_non_cm_or_zwj_prop != LINE_BREAK_PROP_NU && - (cp1_prop == LINE_BREAK_PROP_CL || + (cp1_prop == LINE_BREAK_PROP_CL || cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF || - cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF || - cp1_prop == LINE_BREAK_PROP_IS || + cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF || + cp1_prop == LINE_BREAK_PROP_IS || cp1_prop == LINE_BREAK_PROP_SY))) { continue; } /* LB14 */ - if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF || - last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF) { + if (last_non_sp_cm_or_zwj_prop == + LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF || + last_non_sp_cm_or_zwj_prop == + LINE_BREAK_PROP_OP_WITH_EAW_HWF) { continue; } @@ -251,9 +259,11 @@ next_line_break(HERODOTUS_READER *r) } /* LB16 */ - if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL || - last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF || - last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF) && + if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL || + last_non_sp_cm_or_zwj_prop == + LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF || + last_non_sp_cm_or_zwj_prop == + LINE_BREAK_PROP_CP_WITH_EAW_HWF) && cp1_prop == LINE_BREAK_PROP_NS) { continue; } @@ -308,7 +318,7 @@ next_line_break(HERODOTUS_READER *r) } /* LB23 */ - if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || + if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) && cp1_prop == LINE_BREAK_PROP_NU) { continue; @@ -336,11 +346,11 @@ next_line_break(HERODOTUS_READER *r) /* LB24 */ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR || last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PO) && - (cp1_prop == LINE_BREAK_PROP_AL || + (cp1_prop == LINE_BREAK_PROP_AL || cp1_prop == LINE_BREAK_PROP_HL)) { continue; } - if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || + if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) && (cp1_prop == LINE_BREAK_PROP_PR || cp1_prop == LINE_BREAK_PROP_PO)) { @@ -362,32 +372,33 @@ next_line_break(HERODOTUS_READER *r) herodotus_reader_copy(r, &tmp); herodotus_read_codepoint(&tmp, true, &cp); if (herodotus_read_codepoint(&tmp, true, &cp) == - HERODOTUS_STATUS_SUCCESS && + HERODOTUS_STATUS_SUCCESS && (cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF || - cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF || + cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF || cp1_prop == LINE_BREAK_PROP_HY)) { if (get_break_prop(cp) == LINE_BREAK_PROP_NU) { continue; } } } - if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF || - last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF || + if ((last_non_cm_or_zwj_prop == + LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF || + last_non_cm_or_zwj_prop == + LINE_BREAK_PROP_OP_WITH_EAW_HWF || last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HY) && cp1_prop == LINE_BREAK_PROP_NU) { continue; } - if (lb25_level == 1 && - (cp1_prop == LINE_BREAK_PROP_NU || - cp1_prop == LINE_BREAK_PROP_SY || - cp1_prop == LINE_BREAK_PROP_IS)) { + if (lb25_level == 1 && (cp1_prop == LINE_BREAK_PROP_NU || + cp1_prop == LINE_BREAK_PROP_SY || + cp1_prop == LINE_BREAK_PROP_IS)) { continue; } if ((lb25_level == 1 || lb25_level == 2) && - (cp1_prop == LINE_BREAK_PROP_NU || - cp1_prop == LINE_BREAK_PROP_SY || - cp1_prop == LINE_BREAK_PROP_IS || - cp1_prop == LINE_BREAK_PROP_CL || + (cp1_prop == LINE_BREAK_PROP_NU || + cp1_prop == LINE_BREAK_PROP_SY || + cp1_prop == LINE_BREAK_PROP_IS || + cp1_prop == LINE_BREAK_PROP_CL || cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF || cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) { continue; @@ -437,37 +448,37 @@ next_line_break(HERODOTUS_READER *r) } /* LB28 */ - if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || + if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) && - (cp1_prop == LINE_BREAK_PROP_AL || + (cp1_prop == LINE_BREAK_PROP_AL || cp1_prop == LINE_BREAK_PROP_HL)) { continue; } /* LB29 */ if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_IS && - (cp1_prop == LINE_BREAK_PROP_AL || + (cp1_prop == LINE_BREAK_PROP_AL || cp1_prop == LINE_BREAK_PROP_HL)) { continue; } /* LB30 */ - if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || - last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL || + if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL || + last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL || last_non_cm_or_zwj_prop == LINE_BREAK_PROP_NU) && cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF) { continue; } - if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF && - (cp1_prop == LINE_BREAK_PROP_AL || - cp1_prop == LINE_BREAK_PROP_HL || + if (last_non_cm_or_zwj_prop == + LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF && + (cp1_prop == LINE_BREAK_PROP_AL || + cp1_prop == LINE_BREAK_PROP_HL || cp1_prop == LINE_BREAK_PROP_NU)) { continue; } /* LB30a */ - if (!ri_even && - last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI && + if (!ri_even && last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI && cp1_prop == LINE_BREAK_PROP_RI) { continue; } @@ -477,7 +488,8 @@ next_line_break(HERODOTUS_READER *r) cp1_prop == LINE_BREAK_PROP_EM) { continue; } - if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BOTH_CN_EXTPICT && + if (last_non_cm_or_zwj_prop == + LINE_BREAK_PROP_BOTH_CN_EXTPICT && cp1_prop == LINE_BREAK_PROP_EM) { continue; } diff --git a/src/sentence.c b/src/sentence.c @@ -6,8 +6,7 @@ #include "../grapheme.h" #include "util.h" -struct sentence_break_state -{ +struct sentence_break_state { uint_least8_t aterm_close_sp_level; uint_least8_t saterm_close_sp_parasep_level; }; @@ -17,8 +16,8 @@ get_sentence_break_prop(uint_least32_t cp) { if (likely(cp <= UINT32_C(0x10FFFF))) { return (uint_least8_t) - sentence_break_minor[sentence_break_major[cp >> 8] + - (cp & 0xff)]; + sentence_break_minor[sentence_break_major[cp >> 8] + + (cp & 0xff)]; } else { return SENTENCE_BREAK_PROP_OTHER; } @@ -80,7 +79,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s) state->aterm_close_sp_level = 2; } else if ((state->aterm_close_sp_level == 1 || state->aterm_close_sp_level == 2 || - state->aterm_close_sp_level == 3) && + state->aterm_close_sp_level == 3) && prop == SENTENCE_BREAK_PROP_SP) { /* sp-sequence begins or continued */ state->aterm_close_sp_level = 3; @@ -102,7 +101,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s) state->saterm_close_sp_parasep_level = 2; } else if ((state->saterm_close_sp_parasep_level == 1 || state->saterm_close_sp_parasep_level == 2 || - state->saterm_close_sp_parasep_level == 3) && + state->saterm_close_sp_parasep_level == 3) && prop == SENTENCE_BREAK_PROP_SP) { /* sp-sequence begins or continued */ state->saterm_close_sp_parasep_level = 3; @@ -110,7 +109,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s) state->saterm_close_sp_parasep_level == 2 || state->saterm_close_sp_parasep_level == 3) && (prop == SENTENCE_BREAK_PROP_SEP || - prop == SENTENCE_BREAK_PROP_CR || + prop == SENTENCE_BREAK_PROP_CR || prop == SENTENCE_BREAK_PROP_LF)) { /* ParaSep at the end of the sequence */ state->saterm_close_sp_parasep_level = 4; @@ -146,7 +145,7 @@ next_sentence_break(HERODOTUS_READER *r) /* SB4 */ if (p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_SEP || - p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_CR || + p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_CR || p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_LF) { break; } @@ -179,7 +178,8 @@ next_sentence_break(HERODOTUS_READER *r) * This is the most complicated rule, requiring * the right-hand-side to satisfy the regular expression * - * ( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )* Lower + * ( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )* + * Lower * * which we simply check "manually" given LUT-lookups * are very cheap by starting at the mid_reader. @@ -198,12 +198,12 @@ next_sentence_break(HERODOTUS_READER *r) * match the following condition */ if (prop == SENTENCE_BREAK_PROP_OLETTER || - prop == SENTENCE_BREAK_PROP_UPPER || - prop == SENTENCE_BREAK_PROP_LOWER || - prop == SENTENCE_BREAK_PROP_SEP || - prop == SENTENCE_BREAK_PROP_CR || - prop == SENTENCE_BREAK_PROP_LF || - prop == SENTENCE_BREAK_PROP_STERM || + prop == SENTENCE_BREAK_PROP_UPPER || + prop == SENTENCE_BREAK_PROP_LOWER || + prop == SENTENCE_BREAK_PROP_SEP || + prop == SENTENCE_BREAK_PROP_CR || + prop == SENTENCE_BREAK_PROP_LF || + prop == SENTENCE_BREAK_PROP_STERM || prop == SENTENCE_BREAK_PROP_ATERM) { break; } @@ -219,8 +219,8 @@ next_sentence_break(HERODOTUS_READER *r) state.saterm_close_sp_parasep_level == 2 || state.saterm_close_sp_parasep_level == 3) && (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SCONTINUE || - p.skip.next_prop[0] == SENTENCE_BREAK_PROP_STERM || - p.skip.next_prop[0] == SENTENCE_BREAK_PROP_ATERM)) { + p.skip.next_prop[0] == SENTENCE_BREAK_PROP_STERM || + p.skip.next_prop[0] == SENTENCE_BREAK_PROP_ATERM)) { continue; } @@ -228,9 +228,9 @@ next_sentence_break(HERODOTUS_READER *r) if ((state.saterm_close_sp_parasep_level == 1 || state.saterm_close_sp_parasep_level == 2) && (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CLOSE || - p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP || - p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP || - p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR || + p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP || + p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP || + p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR || p.skip.next_prop[0] == SENTENCE_BREAK_PROP_LF)) { continue; } @@ -239,9 +239,9 @@ next_sentence_break(HERODOTUS_READER *r) if ((state.saterm_close_sp_parasep_level == 1 || state.saterm_close_sp_parasep_level == 2 || state.saterm_close_sp_parasep_level == 3) && - (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP || + (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP || p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP || - p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR || + p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR || p.skip.next_prop[0] == SENTENCE_BREAK_PROP_LF)) { continue; } diff --git a/src/utf8.c b/src/utf8.c @@ -9,14 +9,14 @@ /* lookup-table for the types of sequence first bytes */ static const struct { - uint_least8_t lower; /* lower bound of sequence first byte */ - uint_least8_t upper; /* upper bound of sequence first byte */ + uint_least8_t lower; /* lower bound of sequence first byte */ + uint_least8_t upper; /* upper bound of sequence first byte */ uint_least32_t mincp; /* smallest non-overlong encoded codepoint */ uint_least32_t maxcp; /* largest encodable codepoint */ - /* - * implicit: table-offset represents the number of following - * bytes of the form 10xxxxxx (6 bits capacity each) - */ + /* + * implicit: table-offset represents the number of following + * bytes of the form 10xxxxxx (6 bits capacity each) + */ } lut[] = { [0] = { /* 0xxxxxxx */ @@ -104,8 +104,8 @@ grapheme_decode_utf8(const char *str, size_t len, uint_least32_t *cp) * sequence starter occurs right before a NUL-byte. */ for (i = 0; 1 + i < len; i++) { - if(!BETWEEN(((const unsigned char *)str)[1 + i], - 0x80, 0xBF)) { + if (!BETWEEN(((const unsigned char *)str)[1 + i], 0x80, + 0xBF)) { break; } } @@ -124,7 +124,7 @@ grapheme_decode_utf8(const char *str, size_t len, uint_least32_t *cp) * (i.e. between 0x80 (10000000) and 0xBF (10111111)) */ for (i = 1; i <= off; i++) { - if(!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) { + if (!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) { /* * byte does not match format; return * number of bytes processed excluding the @@ -201,8 +201,8 @@ grapheme_encode_utf8(uint_least32_t cp, char *str, size_t len) * We do not overwrite the mask because we guaranteed earlier * that there are no bits higher than the mask allows. */ - ((unsigned char *)str)[0] = lut[off].lower | - (uint_least8_t)(cp >> (6 * off)); + ((unsigned char *)str)[0] = + lut[off].lower | (uint_least8_t)(cp >> (6 * off)); for (i = 1; i <= off; i++) { /* @@ -211,8 +211,8 @@ grapheme_encode_utf8(uint_least32_t cp, char *str, size_t len) * extract from the properly-shifted value using the * mask 00111111 (0x3F) */ - ((unsigned char *)str)[i] = 0x80 | - ((cp >> (6 * (off - i))) & 0x3F); + ((unsigned char *)str)[i] = + 0x80 | ((cp >> (6 * (off - i))) & 0x3F); } return 1 + off; diff --git a/src/util.c b/src/util.c @@ -37,16 +37,20 @@ herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTUS_READER *dest) */ dest->type = src->type; if (src->type == HERODOTUS_TYPE_CODEPOINT) { - dest->src = (src->src == NULL) ? NULL : - ((const uint_least32_t *)(src->src)) + src->off; + dest->src = + (src->src == NULL) ? + NULL : + ((const uint_least32_t *)(src->src)) + src->off; } else { /* src->type == HERODOTUS_TYPE_UTF8 */ - dest->src = (src->src == NULL) ? NULL : - ((const char *)(src->src)) + src->off; + dest->src = (src->src == NULL) ? + NULL : + ((const char *)(src->src)) + src->off; } if (src->srclen == SIZE_MAX) { dest->srclen = SIZE_MAX; } else { - dest->srclen = (src->off < src->srclen) ? src->srclen - src->off : 0; + dest->srclen = + (src->off < src->srclen) ? src->srclen - src->off : 0; } dest->off = 0; dest->terminated_by_null = src->terminated_by_null; @@ -62,8 +66,10 @@ herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTUS_READER *dest) * to release the limit and, instead, we just * prevent any more reads */ - dest->soft_limit[i] = (src->off < src->soft_limit[i]) ? - src->soft_limit[i] - src->off : 0; + dest->soft_limit[i] = + (src->off < src->soft_limit[i]) ? + src->soft_limit[i] - src->off : + 0; } } } @@ -141,9 +147,9 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32_t *cp) *cp = ((const uint_least32_t *)(r->src))[r->off]; ret = 1; } else { /* r->type == HERODOTUS_TYPE_UTF8 */ - ret = grapheme_decode_utf8((const char *)r->src + r->off, - MIN(r->srclen, r->soft_limit[0]) - - r->off, cp); + ret = grapheme_decode_utf8( + (const char *)r->src + r->off, + MIN(r->srclen, r->soft_limit[0]) - r->off, cp); } if (unlikely(r->srclen == SIZE_MAX && *cp == 0)) { @@ -176,8 +182,8 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32_t *cp) } void -herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type, - void *dest, size_t destlen) +herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type, void *dest, + size_t destlen) { w->type = type; w->dest = dest; @@ -212,8 +218,8 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w) * (the last case meaning truncation). */ if (w->type == HERODOTUS_TYPE_CODEPOINT) { - ((uint_least32_t *)(w->dest)) - [w->first_unwritable_offset] = 0; + ((uint_least32_t + *)(w->dest))[w->first_unwritable_offset] = 0; } else { /* w->type == HERODOTUS_TYPE_UTF8 */ ((char *)(w->dest))[w->first_unwritable_offset] = '\0'; } @@ -226,8 +232,7 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w) * byte. */ if (w->type == HERODOTUS_TYPE_CODEPOINT) { - ((uint_least32_t *)(w->dest)) - [w->destlen - 1] = 0; + ((uint_least32_t *)(w->dest))[w->destlen - 1] = 0; } else { /* w->type == HERODOTUS_TYPE_UTF8 */ ((char *)(w->dest))[w->destlen - 1] = '\0'; } @@ -267,8 +272,8 @@ herodotus_write_codepoint(HERODOTUS_WRITER *w, uint_least32_t cp) if (w->dest != NULL && w->off + ret < w->destlen) { /* we still have enough room in the buffer */ - grapheme_encode_utf8(cp, (char *)(w->dest) + - w->off, w->destlen - w->off); + grapheme_encode_utf8(cp, (char *)(w->dest) + w->off, + w->destlen - w->off); } else if (w->first_unwritable_offset == SIZE_MAX) { /* * the first unwritable offset has not been @@ -328,8 +333,9 @@ proper_init(const HERODOTUS_READER *r, void *state, uint_least8_t no_prop, /* fill in the two next raw properties (after no-initialization) */ p->raw.next_prop[0] = p->raw.next_prop[1] = p->no_prop; - for (i = 0; i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &cp) == - HERODOTUS_STATUS_SUCCESS; ) { + for (i = 0; + i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &cp) == + HERODOTUS_STATUS_SUCCESS;) { p->raw.next_prop[i++] = p->get_break_prop(cp); } @@ -338,8 +344,9 @@ proper_init(const HERODOTUS_READER *r, void *state, uint_least8_t no_prop, /* fill in the two next skip properties (after no-initialization) */ p->skip.next_prop[0] = p->skip.next_prop[1] = p->no_prop; - for (i = 0; i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, &cp) == - HERODOTUS_STATUS_SUCCESS; ) { + for (i = 0; + i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, &cp) == + HERODOTUS_STATUS_SUCCESS;) { prop = p->get_break_prop(cp); if (!p->is_skippable_prop(prop)) { p->skip.next_prop[i++] = prop; diff --git a/src/util.h b/src/util.h @@ -10,25 +10,25 @@ #include "../grapheme.h" #undef MIN -#define MIN(x,y) ((x) < (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) #undef MAX -#define MAX(x,y) ((x) > (y) ? (x) : (y)) +#define MAX(x, y) ((x) > (y) ? (x) : (y)) #undef LEN #define LEN(x) (sizeof(x) / sizeof(*(x))) #undef likely #undef unlikely #ifdef __has_builtin - #if __has_builtin(__builtin_expect) - #define likely(expr) __builtin_expect(!!(expr), 1) - #define unlikely(expr) __builtin_expect(!!(expr), 0) - #else - #define likely(expr) (expr) - #define unlikely(expr) (expr) - #endif +#if __has_builtin(__builtin_expect) +#define likely(expr) __builtin_expect(!!(expr), 1) +#define unlikely(expr) __builtin_expect(!!(expr), 0) #else - #define likely(expr) (expr) - #define unlikely(expr) (expr) +#define likely(expr) (expr) +#define unlikely(expr) (expr) +#endif +#else +#define likely(expr) (expr) +#define unlikely(expr) (expr) #endif /* @@ -84,6 +84,7 @@ struct proper { uint_least8_t prev_prop[2]; uint_least8_t next_prop[2]; } raw, skip; + HERODOTUS_READER mid_reader, raw_reader, skip_reader; void *state; uint_least8_t no_prop; @@ -100,7 +101,8 @@ void herodotus_reader_pop_limit(HERODOTUS_READER *); size_t herodotus_reader_number_read(const HERODOTUS_READER *); size_t herodotus_reader_next_word_break(const HERODOTUS_READER *); size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *); -enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, uint_least32_t *); +enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, + uint_least32_t *); void herodotus_writer_init(HERODOTUS_WRITER *, enum herodotus_type, void *, size_t); diff --git a/src/word.c b/src/word.c @@ -6,8 +6,7 @@ #include "../grapheme.h" #include "util.h" -struct word_break_state -{ +struct word_break_state { bool ri_even; }; @@ -16,7 +15,8 @@ get_word_break_prop(uint_least32_t cp) { if (likely(cp <= UINT32_C(0x10FFFF))) { return (uint_least8_t) - word_break_minor[word_break_major[cp >> 8] + (cp & 0xff)]; + word_break_minor[word_break_major[cp >> 8] + + (cp & 0xff)]; } else { return WORD_BREAK_PROP_OTHER; } @@ -26,8 +26,7 @@ static bool is_skippable_word_prop(uint_least8_t prop) { return prop == WORD_BREAK_PROP_EXTEND || - prop == WORD_BREAK_PROP_FORMAT || - prop == WORD_BREAK_PROP_ZWJ; + prop == WORD_BREAK_PROP_FORMAT || prop == WORD_BREAK_PROP_ZWJ; } static void @@ -79,22 +78,24 @@ next_word_break(HERODOTUS_READER *r) /* WB3a */ if (p.raw.prev_prop[0] == WORD_BREAK_PROP_NEWLINE || - p.raw.prev_prop[0] == WORD_BREAK_PROP_CR || + p.raw.prev_prop[0] == WORD_BREAK_PROP_CR || p.raw.prev_prop[0] == WORD_BREAK_PROP_LF) { break; } /* WB3b */ if (p.raw.next_prop[0] == WORD_BREAK_PROP_NEWLINE || - p.raw.next_prop[0] == WORD_BREAK_PROP_CR || + p.raw.next_prop[0] == WORD_BREAK_PROP_CR || p.raw.next_prop[0] == WORD_BREAK_PROP_LF) { break; } /* WB3c */ if (p.raw.prev_prop[0] == WORD_BREAK_PROP_ZWJ && - (p.raw.next_prop[0] == WORD_BREAK_PROP_EXTENDED_PICTOGRAPHIC || - p.raw.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT)) { + (p.raw.next_prop[0] == + WORD_BREAK_PROP_EXTENDED_PICTOGRAPHIC || + p.raw.next_prop[0] == + WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT)) { continue; } @@ -112,37 +113,43 @@ next_word_break(HERODOTUS_READER *r) } /* WB5 */ - if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER || - p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || + if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER || + p.skip.prev_prop[0] == + WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) && - (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER || - p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || + (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER || + p.skip.next_prop[0] == + WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER)) { continue; } /* WB6 */ - if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER || - p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || + if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER || + p.skip.prev_prop[0] == + WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) && - (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDLETTER || - p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET || + (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDLETTER || + p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET || p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) && - (p.skip.next_prop[1] == WORD_BREAK_PROP_ALETTER || - p.skip.next_prop[1] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || + (p.skip.next_prop[1] == WORD_BREAK_PROP_ALETTER || + p.skip.next_prop[1] == + WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || p.skip.next_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER)) { continue; } /* WB7 */ - if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDLETTER || - p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET || + if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDLETTER || + p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET || p.skip.prev_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) && - (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER || - p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || + (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER || + p.skip.next_prop[0] == + WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) && - (p.skip.prev_prop[1] == WORD_BREAK_PROP_ALETTER || - p.skip.prev_prop[1] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || + (p.skip.prev_prop[1] == WORD_BREAK_PROP_ALETTER || + p.skip.prev_prop[1] == + WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || p.skip.prev_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER)) { continue; } @@ -174,8 +181,9 @@ next_word_break(HERODOTUS_READER *r) } /* WB9 */ - if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER || - p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || + if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER || + p.skip.prev_prop[0] == + WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) && p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC) { continue; @@ -183,15 +191,16 @@ next_word_break(HERODOTUS_READER *r) /* WB10 */ if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC && - (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER || - p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || + (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER || + p.skip.next_prop[0] == + WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER)) { continue; } /* WB11 */ - if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUM || - p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET || + if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUM || + p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET || p.skip.prev_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) && p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC && p.skip.prev_prop[1] == WORD_BREAK_PROP_NUMERIC) { @@ -200,8 +209,8 @@ next_word_break(HERODOTUS_READER *r) /* WB12 */ if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC && - (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUM || - p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET || + (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUM || + p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET || p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) && p.skip.next_prop[1] == WORD_BREAK_PROP_NUMERIC) { continue; @@ -214,11 +223,12 @@ next_word_break(HERODOTUS_READER *r) } /* WB13a */ - if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER || - p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || - p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER || - p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC || - p.skip.prev_prop[0] == WORD_BREAK_PROP_KATAKANA || + if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER || + p.skip.prev_prop[0] == + WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || + p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER || + p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC || + p.skip.prev_prop[0] == WORD_BREAK_PROP_KATAKANA || p.skip.prev_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET) && p.skip.next_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET) { continue; @@ -226,10 +236,11 @@ next_word_break(HERODOTUS_READER *r) /* WB13b */ if (p.skip.prev_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET && - (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER || - p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || - p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER || - p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC || + (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER || + p.skip.next_prop[0] == + WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT || + p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER || + p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC || p.skip.next_prop[0] == WORD_BREAK_PROP_KATAKANA)) { continue; } diff --git a/test/bidirectional.c b/test/bidirectional.c @@ -25,14 +25,16 @@ main(int argc, char *argv[]) for (i = 0, failed = 0; i < LEN(bidirectional_test); i++) { /*if (i != 490798) - continue;*/ + continue;*/ for (m = 0; m < bidirectional_test[i].modelen; m++) { ret = grapheme_get_bidirectional_embedding_levels( - bidirectional_test[i].cp, bidirectional_test[i].cplen, + bidirectional_test[i].cp, + bidirectional_test[i].cplen, bidirectional_test[i].mode[m], lev, levlen); - if (ret != bidirectional_test[i].cplen || ret > levlen) { + if (ret != bidirectional_test[i].cplen || + ret > levlen) { goto err; } @@ -43,18 +45,22 @@ main(int argc, char *argv[]) } continue; err: - fprintf(stderr, "%s: Failed conformance test %zu (mode %i) [", + fprintf(stderr, + "%s: Failed conformance test %zu (mode %i) [", argv[0], i, bidirectional_test[i].mode[m]); for (j = 0; j < bidirectional_test[i].cplen; j++) { - fprintf(stderr, " 0x%04" PRIXLEAST32, bidirectional_test[i].cp[j]); + fprintf(stderr, " 0x%04" PRIXLEAST32, + bidirectional_test[i].cp[j]); } fprintf(stderr, " ],\n\tgot ("); for (j = 0; j < ret; j++) { - fprintf(stderr, " %" PRIdLEAST8, (int_least8_t)lev[j]); + fprintf(stderr, " %" PRIdLEAST8, + (int_least8_t)lev[j]); } fprintf(stderr, " ),\n\texpected ("); for (j = 0; j < ret; j++) { - fprintf(stderr, " %" PRIdLEAST8, bidirectional_test[i].level[j]); + fprintf(stderr, " %" PRIdLEAST8, + bidirectional_test[i].level[j]); } fprintf(stderr, " ).\n"); failed++; diff --git a/test/case.c b/test/case.c @@ -9,10 +9,12 @@ struct unit_test_is_case_utf8 { const char *description; + struct { const char *src; size_t srclen; } input; + struct { bool ret; size_t caselen; @@ -21,11 +23,13 @@ struct unit_test_is_case_utf8 { struct unit_test_to_case_utf8 { const char *description; + struct { const char *src; size_t srclen; size_t destlen; } input; + struct { const char *dest; size_t ret; @@ -35,57 +39,69 @@ struct unit_test_to_case_utf8 { static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = { { .description = "empty input", - .input = { "", 0 }, + .input = { "", 0 }, .output = { true, 0 }, }, { .description = "one character, violation", - .input = { "A", 1 }, + .input = { "A", 1 }, .output = { false, 0 }, }, { .description = "one character, confirmation", - .input = { "\xC3\x9F", 2 }, + .input = { "\xC3\x9F", 2 }, .output = { true, 2 }, }, { .description = "one character, violation, NUL-terminated", - .input = { "A", SIZE_MAX }, + .input = { "A", SIZE_MAX }, .output = { false, 0 }, }, { .description = "one character, confirmation, NUL-terminated", - .input = { "\xC3\x9F", SIZE_MAX }, + .input = { "\xC3\x9F", SIZE_MAX }, .output = { true, 2 }, }, { .description = "one word, violation", - .input = { "Hello", 5 }, + .input = { "Hello", 5 }, .output = { false, 0 }, }, { .description = "one word, partial confirmation", - .input = { "gru" "\xC3\x9F" "fOrmel", 11 }, + .input = { "gru" + "\xC3\x9F" + "fOrmel", + 11 }, .output = { false, 6 }, }, { .description = "one word, full confirmation", - .input = { "gru" "\xC3\x9F" "formel", 11 }, + .input = { "gru" + "\xC3\x9F" + "formel", + 11 }, .output = { true, 11 }, }, { .description = "one word, violation, NUL-terminated", - .input = { "Hello", SIZE_MAX }, + .input = { "Hello", SIZE_MAX }, .output = { false, 0 }, }, { .description = "one word, partial confirmation, NUL-terminated", - .input = { "gru" "\xC3\x9F" "fOrmel", SIZE_MAX }, + .input = { "gru" + "\xC3\x9F" + "fOrmel", + SIZE_MAX }, .output = { false, 6 }, }, { .description = "one word, full confirmation, NUL-terminated", - .input = { "gru" "\xC3\x9F" "formel", SIZE_MAX }, + .input = { "gru" + "\xC3\x9F" + "formel", + SIZE_MAX }, .output = { true, 11 }, }, }; @@ -93,57 +109,63 @@ static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = { static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = { { .description = "empty input", - .input = { "", 0 }, + .input = { "", 0 }, .output = { true, 0 }, }, { .description = "one character, violation", - .input = { "\xC3\x9F", 2 }, + .input = { "\xC3\x9F", 2 }, .output = { false, 0 }, }, { .description = "one character, confirmation", - .input = { "A", 1 }, + .input = { "A", 1 }, .output = { true, 1 }, }, { .description = "one character, violation, NUL-terminated", - .input = { "\xC3\x9F", SIZE_MAX }, + .input = { "\xC3\x9F", SIZE_MAX }, .output = { false, 0 }, }, { .description = "one character, confirmation, NUL-terminated", - .input = { "A", SIZE_MAX }, + .input = { "A", SIZE_MAX }, .output = { true, 1 }, }, { .description = "one word, violation", - .input = { "hello", 5 }, + .input = { "hello", 5 }, .output = { false, 0 }, }, { .description = "one word, partial confirmation", - .input = { "GRU" "\xC3\x9F" "formel", 11 }, + .input = { "GRU" + "\xC3\x9F" + "formel", + 11 }, .output = { false, 3 }, }, { .description = "one word, full confirmation", - .input = { "HELLO", 5 }, + .input = { "HELLO", 5 }, .output = { true, 5 }, }, { .description = "one word, violation, NUL-terminated", - .input = { "hello", SIZE_MAX }, + .input = { "hello", SIZE_MAX }, .output = { false, 0 }, }, { .description = "one word, partial confirmation, NUL-terminated", - .input = { "GRU" "\xC3\x9F" "formel", SIZE_MAX }, + .input = { "GRU" + "\xC3\x9F" + "formel", + SIZE_MAX }, .output = { false, 3 }, }, { .description = "one word, full confirmation, NUL-terminated", - .input = { "HELLO", SIZE_MAX }, + .input = { "HELLO", SIZE_MAX }, .output = { true, 5 }, }, }; @@ -151,77 +173,103 @@ static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = { static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = { { .description = "empty input", - .input = { "", 0 }, + .input = { "", 0 }, .output = { true, 0 }, }, { .description = "one character, violation", - .input = { "\xC3\x9F", 2 }, + .input = { "\xC3\x9F", 2 }, .output = { false, 0 }, }, { .description = "one character, confirmation", - .input = { "A", 1 }, + .input = { "A", 1 }, .output = { true, 1 }, }, { .description = "one character, violation, NUL-terminated", - .input = { "\xC3\x9F", SIZE_MAX }, + .input = { "\xC3\x9F", SIZE_MAX }, .output = { false, 0 }, }, { .description = "one character, confirmation, NUL-terminated", - .input = { "A", SIZE_MAX }, + .input = { "A", SIZE_MAX }, .output = { true, 1 }, }, { .description = "one word, violation", - .input = { "hello", 5 }, + .input = { "hello", 5 }, .output = { false, 0 }, }, { .description = "one word, partial confirmation", - .input = { "Gru" "\xC3\x9F" "fOrmel", 11 }, + .input = { "Gru" + "\xC3\x9F" + "fOrmel", + 11 }, .output = { false, 6 }, }, { .description = "one word, full confirmation", - .input = { "Gru" "\xC3\x9F" "formel", 11 }, + .input = { "Gru" + "\xC3\x9F" + "formel", + 11 }, .output = { true, 11 }, }, { .description = "one word, violation, NUL-terminated", - .input = { "hello", SIZE_MAX }, + .input = { "hello", SIZE_MAX }, .output = { false, 0 }, }, { .description = "one word, partial confirmation, NUL-terminated", - .input = { "Gru" "\xC3\x9F" "fOrmel", SIZE_MAX }, + .input = { "Gru" + "\xC3\x9F" + "fOrmel", + SIZE_MAX }, .output = { false, 6 }, }, { .description = "one word, full confirmation, NUL-terminated", - .input = { "Gru" "\xC3\x9F" "formel", SIZE_MAX }, + .input = { "Gru" + "\xC3\x9F" + "formel", + SIZE_MAX }, .output = { true, 11 }, }, { .description = "multiple words, partial confirmation", - .input = { "Hello Gru" "\xC3\x9F" "fOrmel!", 18 }, + .input = { "Hello Gru" + "\xC3\x9F" + "fOrmel!", + 18 }, .output = { false, 12 }, }, { .description = "multiple words, full confirmation", - .input = { "Hello Gru" "\xC3\x9F" "formel!", 18 }, + .input = { "Hello Gru" + "\xC3\x9F" + "formel!", + 18 }, .output = { true, 18 }, }, { - .description = "multiple words, partial confirmation, NUL-terminated", - .input = { "Hello Gru" "\xC3\x9F" "fOrmel!", SIZE_MAX }, + .description = + "multiple words, partial confirmation, NUL-terminated", + .input = { "Hello Gru" + "\xC3\x9F" + "fOrmel!", + SIZE_MAX }, .output = { false, 12 }, }, { - .description = "multiple words, full confirmation, NUL-terminated", - .input = { "Hello Gru" "\xC3\x9F" "formel!", SIZE_MAX }, + .description = + "multiple words, full confirmation, NUL-terminated", + .input = { "Hello Gru" + "\xC3\x9F" + "formel!", + SIZE_MAX }, .output = { true, 18 }, }, }; @@ -229,72 +277,74 @@ static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = { static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = { { .description = "empty input", - .input = { "", 0, 10 }, + .input = { "", 0, 10 }, .output = { "", 0 }, }, { .description = "empty output", - .input = { "hello", 5, 0 }, + .input = { "hello", 5, 0 }, .output = { "", 5 }, }, { .description = "one character, conversion", - .input = { "A", 1, 10 }, + .input = { "A", 1, 10 }, .output = { "a", 1 }, }, { .description = "one character, no conversion", - .input = { "\xC3\x9F", 2, 10 }, + .input = { "\xC3\x9F", 2, 10 }, .output = { "\xC3\x9F", 2 }, }, { .description = "one character, conversion, truncation", - .input = { "A", 1, 0 }, + .input = { "A", 1, 0 }, .output = { "", 1 }, }, { .description = "one character, conversion, NUL-terminated", - .input = { "A", SIZE_MAX, 10 }, + .input = { "A", SIZE_MAX, 10 }, .output = { "a", 1 }, }, { .description = "one character, no conversion, NUL-terminated", - .input = { "\xC3\x9F", SIZE_MAX, 10 }, + .input = { "\xC3\x9F", SIZE_MAX, 10 }, .output = { "\xC3\x9F", 2 }, }, { - .description = "one character, conversion, NUL-terminated, truncation", - .input = { "A", SIZE_MAX, 0 }, + .description = + "one character, conversion, NUL-terminated, truncation", + .input = { "A", SIZE_MAX, 0 }, .output = { "", 1 }, }, { .description = "one word, conversion", - .input = { "wOrD", 4, 10 }, + .input = { "wOrD", 4, 10 }, .output = { "word", 4 }, }, { .description = "one word, no conversion", - .input = { "word", 4, 10 }, + .input = { "word", 4, 10 }, .output = { "word", 4 }, }, { .description = "one word, conversion, truncation", - .input = { "wOrD", 4, 3 }, + .input = { "wOrD", 4, 3 }, .output = { "wo", 4 }, }, { .description = "one word, conversion, NUL-terminated", - .input = { "wOrD", SIZE_MAX, 10 }, + .input = { "wOrD", SIZE_MAX, 10 }, .output = { "word", 4 }, }, { .description = "one word, no conversion, NUL-terminated", - .input = { "word", SIZE_MAX, 10 }, + .input = { "word", SIZE_MAX, 10 }, .output = { "word", 4 }, }, { - .description = "one word, conversion, NUL-terminated, truncation", - .input = { "wOrD", SIZE_MAX, 3 }, + .description = + "one word, conversion, NUL-terminated, truncation", + .input = { "wOrD", SIZE_MAX, 3 }, .output = { "wo", 4 }, }, }; @@ -302,72 +352,86 @@ static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = { static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = { { .description = "empty input", - .input = { "", 0, 10 }, + .input = { "", 0, 10 }, .output = { "", 0 }, }, { .description = "empty output", - .input = { "hello", 5, 0 }, + .input = { "hello", 5, 0 }, .output = { "", 5 }, }, { .description = "one character, conversion", - .input = { "\xC3\x9F", 2, 10 }, + .input = { "\xC3\x9F", 2, 10 }, .output = { "SS", 2 }, }, { .description = "one character, no conversion", - .input = { "A", 1, 10 }, + .input = { "A", 1, 10 }, .output = { "A", 1 }, }, { .description = "one character, conversion, truncation", - .input = { "\xC3\x9F", 2, 0 }, + .input = { "\xC3\x9F", 2, 0 }, .output = { "", 2 }, }, { .description = "one character, conversion, NUL-terminated", - .input = { "\xC3\x9F", SIZE_MAX, 10 }, + .input = { "\xC3\x9F", SIZE_MAX, 10 }, .output = { "SS", 2 }, }, { .description = "one character, no conversion, NUL-terminated", - .input = { "A", SIZE_MAX, 10 }, + .input = { "A", SIZE_MAX, 10 }, .output = { "A", 1 }, }, { - .description = "one character, conversion, NUL-terminated, truncation", - .input = { "\xC3\x9F", SIZE_MAX, 0 }, + .description = + "one character, conversion, NUL-terminated, truncation", + .input = { "\xC3\x9F", SIZE_MAX, 0 }, .output = { "", 2 }, }, { .description = "one word, conversion", - .input = { "gRu" "\xC3\x9F" "fOrMel", 11, 15 }, + .input = { "gRu" + "\xC3\x9F" + "fOrMel", + 11, 15 }, .output = { "GRUSSFORMEL", 11 }, }, { .description = "one word, no conversion", - .input = { "WORD", 4, 10 }, + .input = { "WORD", 4, 10 }, .output = { "WORD", 4 }, }, { .description = "one word, conversion, truncation", - .input = { "gRu" "\xC3\x9F" "formel", 11, 5 }, + .input = { "gRu" + "\xC3\x9F" + "formel", + 11, 5 }, .output = { "GRUS", 11 }, }, { .description = "one word, conversion, NUL-terminated", - .input = { "gRu" "\xC3\x9F" "formel", SIZE_MAX, 15 }, + .input = { "gRu" + "\xC3\x9F" + "formel", + SIZE_MAX, 15 }, .output = { "GRUSSFORMEL", 11 }, }, { .description = "one word, no conversion, NUL-terminated", - .input = { "WORD", SIZE_MAX, 10 }, + .input = { "WORD", SIZE_MAX, 10 }, .output = { "WORD", 4 }, }, { - .description = "one word, conversion, NUL-terminated, truncation", - .input = { "gRu" "\xC3\x9F" "formel", SIZE_MAX, 5 }, + .description = + "one word, conversion, NUL-terminated, truncation", + .input = { "gRu" + "\xC3\x9F" + "formel", + SIZE_MAX, 5 }, .output = { "GRUS", 11 }, }, }; @@ -375,102 +439,105 @@ static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = { static const struct unit_test_to_case_utf8 to_titlecase_utf8[] = { { .description = "empty input", - .input = { "", 0, 10 }, + .input = { "", 0, 10 }, .output = { "", 0 }, }, { .description = "empty output", - .input = { "hello", 5, 0 }, + .input = { "hello", 5, 0 }, .output = { "", 5 }, }, { .description = "one character, conversion", - .input = { "a", 1, 10 }, + .input = { "a", 1, 10 }, .output = { "A", 1 }, }, { .description = "one character, no conversion", - .input = { "A", 1, 10 }, + .input = { "A", 1, 10 }, .output = { "A", 1 }, }, { .description = "one character, conversion, truncation", - .input = { "a", 1, 0 }, + .input = { "a", 1, 0 }, .output = { "", 1 }, }, { .description = "one character, conversion, NUL-terminated", - .input = { "a", SIZE_MAX, 10 }, + .input = { "a", SIZE_MAX, 10 }, .output = { "A", 1 }, }, { .description = "one character, no conversion, NUL-terminated", - .input = { "A", SIZE_MAX, 10 }, + .input = { "A", SIZE_MAX, 10 }, .output = { "A", 1 }, }, { - .description = "one character, conversion, NUL-terminated, truncation", - .input = { "a", SIZE_MAX, 0 }, + .description = + "one character, conversion, NUL-terminated, truncation", + .input = { "a", SIZE_MAX, 0 }, .output = { "", 1 }, }, { .description = "one word, conversion", - .input = { "heLlo", 5, 10 }, + .input = { "heLlo", 5, 10 }, .output = { "Hello", 5 }, }, { .description = "one word, no conversion", - .input = { "Hello", 5, 10 }, + .input = { "Hello", 5, 10 }, .output = { "Hello", 5 }, }, { .description = "one word, conversion, truncation", - .input = { "heLlo", 5, 2 }, + .input = { "heLlo", 5, 2 }, .output = { "H", 5 }, }, { .description = "one word, conversion, NUL-terminated", - .input = { "heLlo", SIZE_MAX, 10 }, + .input = { "heLlo", SIZE_MAX, 10 }, .output = { "Hello", 5 }, }, { .description = "one word, no conversion, NUL-terminated", - .input = { "Hello", SIZE_MAX, 10 }, + .input = { "Hello", SIZE_MAX, 10 }, .output = { "Hello", 5 }, }, { - .description = "one word, conversion, NUL-terminated, truncation", - .input = { "heLlo", SIZE_MAX, 3 }, + .description = + "one word, conversion, NUL-terminated, truncation", + .input = { "heLlo", SIZE_MAX, 3 }, .output = { "He", 5 }, }, { .description = "two words, conversion", - .input = { "heLlo wORLd!", 12, 20 }, + .input = { "heLlo wORLd!", 12, 20 }, .output = { "Hello World!", 12 }, }, { .description = "two words, no conversion", - .input = { "Hello World!", 12, 20 }, + .input = { "Hello World!", 12, 20 }, .output = { "Hello World!", 12 }, }, { .description = "two words, conversion, truncation", - .input = { "heLlo wORLd!", 12, 8 }, + .input = { "heLlo wORLd!", 12, 8 }, .output = { "Hello W", 12 }, }, { .description = "two words, conversion, NUL-terminated", - .input = { "heLlo wORLd!", SIZE_MAX, 20 }, + .input = { "heLlo wORLd!", SIZE_MAX, 20 }, .output = { "Hello World!", 12 }, }, { .description = "two words, no conversion, NUL-terminated", - .input = { "Hello World!", SIZE_MAX, 20 }, + .input = { "Hello World!", SIZE_MAX, 20 }, .output = { "Hello World!", 12 }, }, { - .description = "two words, conversion, NUL-terminated, truncation", - .input = { "heLlo wORLd!", SIZE_MAX, 4 }, + .description = + "two words, conversion, NUL-terminated, truncation", + .input = { "heLlo wORLd!", SIZE_MAX, 4 }, .output = { "Hel", 12 }, }, }; @@ -485,14 +552,14 @@ unit_test_callback_is_case_utf8(const void *t, size_t off, const char *name, size_t caselen = 0x7f; if (t == is_lowercase_utf8) { - ret = grapheme_is_lowercase_utf8(test->input.src, test->input.srclen, - &caselen); + ret = grapheme_is_lowercase_utf8(test->input.src, + test->input.srclen, &caselen); } else if (t == is_uppercase_utf8) { - ret = grapheme_is_uppercase_utf8(test->input.src, test->input.srclen, - &caselen); + ret = grapheme_is_uppercase_utf8(test->input.src, + test->input.srclen, &caselen); } else if (t == is_titlecase_utf8) { - ret = grapheme_is_titlecase_utf8(test->input.src, test->input.srclen, - &caselen); + ret = grapheme_is_titlecase_utf8(test->input.src, + test->input.srclen, &caselen); } else { goto err; @@ -505,10 +572,11 @@ unit_test_callback_is_case_utf8(const void *t, size_t off, const char *name, return 0; err: - fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" " - "(returned (%s, %zu) instead of (%s, %zu)).\n", argv0, - name, off, test->description, ret ? "true" : "false", - caselen, test->output.ret ? "true" : "false", + fprintf(stderr, + "%s: %s: Failed unit test %zu \"%s\" " + "(returned (%s, %zu) instead of (%s, %zu)).\n", + argv0, name, off, test->description, ret ? "true" : "false", + caselen, test->output.ret ? "true" : "false", test->output.caselen); return 1; } @@ -526,21 +594,25 @@ unit_test_callback_to_case_utf8(const void *t, size_t off, const char *name, memset(buf, 0x7f, LEN(buf)); if (t == to_lowercase_utf8) { - ret = grapheme_to_lowercase_utf8(test->input.src, test->input.srclen, - buf, test->input.destlen); + ret = grapheme_to_lowercase_utf8(test->input.src, + test->input.srclen, buf, + test->input.destlen); } else if (t == to_uppercase_utf8) { - ret = grapheme_to_uppercase_utf8(test->input.src, test->input.srclen, - buf, test->input.destlen); + ret = grapheme_to_uppercase_utf8(test->input.src, + test->input.srclen, buf, + test->input.destlen); } else if (t == to_titlecase_utf8) { - ret = grapheme_to_titlecase_utf8(test->input.src, test->input.srclen, - buf, test->input.destlen); + ret = grapheme_to_titlecase_utf8(test->input.src, + test->input.srclen, buf, + test->input.destlen); } else { goto err; } /* check results */ if (ret != test->output.ret || - memcmp(buf, test->output.dest, MIN(test->input.destlen, test->output.ret))) { + memcmp(buf, test->output.dest, + MIN(test->input.destlen, test->output.ret))) { goto err; } @@ -553,9 +625,10 @@ unit_test_callback_to_case_utf8(const void *t, size_t off, const char *name, return 0; err: - fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" " - "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n", argv0, - name, off, test->description, (int)ret, buf, ret, + fprintf(stderr, + "%s: %s: Failed unit test %zu \"%s\" " + "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n", + argv0, name, off, test->description, (int)ret, buf, ret, (int)test->output.ret, test->output.dest, test->output.ret); return 1; } @@ -565,16 +638,22 @@ main(int argc, char *argv[]) { (void)argc; - return run_unit_tests(unit_test_callback_is_case_utf8, is_lowercase_utf8, - LEN(is_lowercase_utf8), "grapheme_is_lowercase_utf8", argv[0]) + - run_unit_tests(unit_test_callback_is_case_utf8, is_uppercase_utf8, - LEN(is_uppercase_utf8), "grapheme_is_uppercase_utf8", argv[0]) + - run_unit_tests(unit_test_callback_is_case_utf8, is_titlecase_utf8, - LEN(is_titlecase_utf8), "grapheme_is_titlecase_utf8", argv[0]) + - run_unit_tests(unit_test_callback_to_case_utf8, to_lowercase_utf8, - LEN(to_lowercase_utf8), "grapheme_to_lowercase_utf8", argv[0]) + - run_unit_tests(unit_test_callback_to_case_utf8, to_uppercase_utf8, - LEN(to_uppercase_utf8), "grapheme_to_uppercase_utf8", argv[0]) + - run_unit_tests(unit_test_callback_to_case_utf8, to_titlecase_utf8, - LEN(to_titlecase_utf8), "grapheme_to_titlecase_utf8", argv[0]); + return run_unit_tests(unit_test_callback_is_case_utf8, + is_lowercase_utf8, LEN(is_lowercase_utf8), + "grapheme_is_lowercase_utf8", argv[0]) + + run_unit_tests(unit_test_callback_is_case_utf8, + is_uppercase_utf8, LEN(is_uppercase_utf8), + "grapheme_is_uppercase_utf8", argv[0]) + + run_unit_tests(unit_test_callback_is_case_utf8, + is_titlecase_utf8, LEN(is_titlecase_utf8), + "grapheme_is_titlecase_utf8", argv[0]) + + run_unit_tests(unit_test_callback_to_case_utf8, + to_lowercase_utf8, LEN(to_lowercase_utf8), + "grapheme_to_lowercase_utf8", argv[0]) + + run_unit_tests(unit_test_callback_to_case_utf8, + to_uppercase_utf8, LEN(to_uppercase_utf8), + "grapheme_to_uppercase_utf8", argv[0]) + + run_unit_tests(unit_test_callback_to_case_utf8, + to_titlecase_utf8, LEN(to_titlecase_utf8), + "grapheme_to_titlecase_utf8", argv[0]); } diff --git a/test/character.c b/test/character.c @@ -92,12 +92,10 @@ static const struct unit_test_next_break_utf8 next_character_break_utf8[] = { static int unit_test_callback_next_character_break(const void *t, size_t off, - const char *name, - const char *argv0) + const char *name, const char *argv0) { - return unit_test_callback_next_break(t, off, - grapheme_next_character_break, - name, argv0); + return unit_test_callback_next_break( + t, off, grapheme_next_character_break, name, argv0); } static int @@ -105,9 +103,8 @@ unit_test_callback_next_character_break_utf8(const void *t, size_t off, const char *name, const char *argv0) { - return unit_test_callback_next_break_utf8(t, off, - grapheme_next_character_break_utf8, - name, argv0); + return unit_test_callback_next_break_utf8( + t, off, grapheme_next_character_break_utf8, name, argv0); } int @@ -116,11 +113,13 @@ main(int argc, char *argv[]) (void)argc; return run_break_tests(grapheme_next_character_break, - character_break_test, LEN(character_break_test), argv[0]) + + character_break_test, LEN(character_break_test), + argv[0]) + run_unit_tests(unit_test_callback_next_character_break, next_character_break, LEN(next_character_break), "grapheme_next_character_break", argv[0]) + run_unit_tests(unit_test_callback_next_character_break_utf8, - next_character_break_utf8, LEN(next_character_break_utf8), + next_character_break_utf8, + LEN(next_character_break_utf8), "grapheme_next_character_break_utf8", argv[0]); } diff --git a/test/line.c b/test/line.c @@ -91,23 +91,19 @@ static const struct unit_test_next_break_utf8 next_line_break_utf8[] = { }; static int -unit_test_callback_next_line_break(const void *t, size_t off, - const char *name, - const char *argv0) +unit_test_callback_next_line_break(const void *t, size_t off, const char *name, + const char *argv0) { - return unit_test_callback_next_break(t, off, - grapheme_next_line_break, + return unit_test_callback_next_break(t, off, grapheme_next_line_break, name, argv0); } static int unit_test_callback_next_line_break_utf8(const void *t, size_t off, - const char *name, - const char *argv0) + const char *name, const char *argv0) { - return unit_test_callback_next_break_utf8(t, off, - grapheme_next_line_break_utf8, - name, argv0); + return unit_test_callback_next_break_utf8( + t, off, grapheme_next_line_break_utf8, name, argv0); } int @@ -115,9 +111,8 @@ main(int argc, char *argv[]) { (void)argc; - return run_break_tests(grapheme_next_line_break, - line_break_test, LEN(line_break_test), - argv[0]) + + return run_break_tests(grapheme_next_line_break, line_break_test, + LEN(line_break_test), argv[0]) + run_unit_tests(unit_test_callback_next_line_break, next_line_break, LEN(next_line_break), "grapheme_next_line_break", argv[0]) + diff --git a/test/sentence.c b/test/sentence.c @@ -92,22 +92,18 @@ static const struct unit_test_next_break_utf8 next_sentence_break_utf8[] = { static int unit_test_callback_next_sentence_break(const void *t, size_t off, - const char *name, - const char *argv0) + const char *name, const char *argv0) { - return unit_test_callback_next_break(t, off, - grapheme_next_sentence_break, - name, argv0); + return unit_test_callback_next_break( + t, off, grapheme_next_sentence_break, name, argv0); } static int unit_test_callback_next_sentence_break_utf8(const void *t, size_t off, - const char *name, - const char *argv0) + const char *name, const char *argv0) { - return unit_test_callback_next_break_utf8(t, off, - grapheme_next_sentence_break_utf8, - name, argv0); + return unit_test_callback_next_break_utf8( + t, off, grapheme_next_sentence_break_utf8, name, argv0); } int @@ -116,12 +112,13 @@ main(int argc, char *argv[]) (void)argc; return run_break_tests(grapheme_next_sentence_break, - sentence_break_test, - LEN(sentence_break_test), argv[0]) + + sentence_break_test, LEN(sentence_break_test), + argv[0]) + run_unit_tests(unit_test_callback_next_sentence_break, next_sentence_break, LEN(next_sentence_break), "grapheme_next_sentence_break", argv[0]) + run_unit_tests(unit_test_callback_next_sentence_break_utf8, - next_sentence_break_utf8, LEN(next_sentence_break_utf8), + next_sentence_break_utf8, + LEN(next_sentence_break_utf8), "grapheme_next_character_break_utf8", argv[0]); } diff --git a/test/utf8-decode.c b/test/utf8-decode.c @@ -8,281 +8,279 @@ #include "util.h" static const struct { - char *arr; /* UTF-8 byte sequence */ - size_t len; /* length of UTF-8 byte sequence */ - size_t exp_len; /* expected length returned */ - uint_least32_t exp_cp; /* expected codepoint returned */ + char *arr; /* UTF-8 byte sequence */ + size_t len; /* length of UTF-8 byte sequence */ + size_t exp_len; /* expected length returned */ + uint_least32_t exp_cp; /* expected codepoint returned */ } dec_test[] = { { /* empty sequence - * [ ] -> - * INVALID - */ - .arr = NULL, - .len = 0, + * [ ] -> + * INVALID + */ + .arr = NULL, + .len = 0, .exp_len = 0, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid lead byte - * [ 11111101 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xFD }, - .len = 1, + * [ 11111101 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xFD }, + .len = 1, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* valid 1-byte sequence - * [ 00000001 ] -> - * 0000001 - */ - .arr = (char *)(unsigned char[]){ 0x01 }, - .len = 1, + * [ 00000001 ] -> + * 0000001 + */ + .arr = (char *)(unsigned char[]) { 0x01 }, + .len = 1, .exp_len = 1, - .exp_cp = 0x1, + .exp_cp = 0x1, }, { /* valid 2-byte sequence - * [ 11000011 10111111 ] -> - * 00011111111 - */ - .arr = (char *)(unsigned char[]){ 0xC3, 0xBF }, - .len = 2, + * [ 11000011 10111111 ] -> + * 00011111111 + */ + .arr = (char *)(unsigned char[]) { 0xC3, 0xBF }, + .len = 2, .exp_len = 2, - .exp_cp = 0xFF, + .exp_cp = 0xFF, }, { /* invalid 2-byte sequence (second byte missing) - * [ 11000011 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xC3 }, - .len = 1, + * [ 11000011 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xC3 }, + .len = 1, .exp_len = 2, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 2-byte sequence (second byte malformed) - * [ 11000011 11111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xC3, 0xFF }, - .len = 2, + * [ 11000011 11111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xC3, 0xFF }, + .len = 2, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 2-byte sequence (overlong encoded) - * [ 11000001 10111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xC1, 0xBF }, - .len = 2, + * [ 11000001 10111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xC1, 0xBF }, + .len = 2, .exp_len = 2, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* valid 3-byte sequence - * [ 11100000 10111111 10111111 ] -> - * 0000111111111111 - */ - .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF }, - .len = 3, + * [ 11100000 10111111 10111111 ] -> + * 0000111111111111 + */ + .arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF }, + .len = 3, .exp_len = 3, - .exp_cp = 0xFFF, + .exp_cp = 0xFFF, }, { /* invalid 3-byte sequence (second byte missing) - * [ 11100000 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xE0 }, - .len = 1, + * [ 11100000 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xE0 }, + .len = 1, .exp_len = 3, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 3-byte sequence (second byte malformed) - * [ 11100000 01111111 10111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xE0, 0x7F, 0xBF }, - .len = 3, + * [ 11100000 01111111 10111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xE0, 0x7F, 0xBF }, + .len = 3, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 3-byte sequence (short string, second byte malformed) - * [ 11100000 01111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xE0, 0x7F }, - .len = 2, + * [ 11100000 01111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xE0, 0x7F }, + .len = 2, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 3-byte sequence (third byte missing) - * [ 11100000 10111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xE0, 0xBF }, - .len = 2, + * [ 11100000 10111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xE0, 0xBF }, + .len = 2, .exp_len = 3, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 3-byte sequence (third byte malformed) - * [ 11100000 10111111 01111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0x7F }, - .len = 3, + * [ 11100000 10111111 01111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0x7F }, + .len = 3, .exp_len = 2, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 3-byte sequence (overlong encoded) - * [ 11100000 10011111 10111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xE0, 0x9F, 0xBF }, - .len = 3, + * [ 11100000 10011111 10111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xE0, 0x9F, 0xBF }, + .len = 3, .exp_len = 3, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 3-byte sequence (UTF-16 surrogate half) - * [ 11101101 10100000 10000000 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xED, 0xA0, 0x80 }, - .len = 3, + * [ 11101101 10100000 10000000 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xED, 0xA0, 0x80 }, + .len = 3, .exp_len = 3, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* valid 4-byte sequence - * [ 11110011 10111111 10111111 10111111 ] -> - * 011111111111111111111 - */ - .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF }, - .len = 4, + * [ 11110011 10111111 10111111 10111111 ] -> + * 011111111111111111111 + */ + .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF }, + .len = 4, .exp_len = 4, - .exp_cp = UINT32_C(0xFFFFF), + .exp_cp = UINT32_C(0xFFFFF), }, { /* invalid 4-byte sequence (second byte missing) - * [ 11110011 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xF3 }, - .len = 1, + * [ 11110011 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xF3 }, + .len = 1, .exp_len = 4, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 4-byte sequence (second byte malformed) - * [ 11110011 01111111 10111111 10111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF, 0xBF }, - .len = 4, + * [ 11110011 01111111 10111111 10111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xF3, 0x7F, 0xBF, 0xBF }, + .len = 4, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { - /* invalid 4-byte sequence (short string 1, second byte malformed) - * [ 11110011 011111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xF3, 0x7F }, - .len = 2, + /* invalid 4-byte sequence (short string 1, second byte + * malformed) [ 11110011 011111111 ] -> INVALID + */ + .arr = (char *)(unsigned char[]) { 0xF3, 0x7F }, + .len = 2, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { - /* invalid 4-byte sequence (short string 2, second byte malformed) - * [ 11110011 011111111 10111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF }, - .len = 3, + /* invalid 4-byte sequence (short string 2, second byte + * malformed) [ 11110011 011111111 10111111 ] -> INVALID + */ + .arr = (char *)(unsigned char[]) { 0xF3, 0x7F, 0xBF }, + .len = 3, .exp_len = 1, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 4-byte sequence (third byte missing) - * [ 11110011 10111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xF3, 0xBF }, - .len = 2, + * [ 11110011 10111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xF3, 0xBF }, + .len = 2, .exp_len = 4, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 4-byte sequence (third byte malformed) - * [ 11110011 10111111 01111111 10111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F, 0xBF }, - .len = 4, + * [ 11110011 10111111 01111111 10111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0x7F, 0xBF }, + .len = 4, .exp_len = 2, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 4-byte sequence (short string, third byte malformed) - * [ 11110011 10111111 01111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F }, - .len = 3, + * [ 11110011 10111111 01111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0x7F }, + .len = 3, .exp_len = 2, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 4-byte sequence (fourth byte missing) - * [ 11110011 10111111 10111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF }, - .len = 3, + * [ 11110011 10111111 10111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF }, + .len = 3, .exp_len = 4, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 4-byte sequence (fourth byte malformed) - * [ 11110011 10111111 10111111 01111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0x7F }, - .len = 4, + * [ 11110011 10111111 10111111 01111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0x7F }, + .len = 4, .exp_len = 3, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 4-byte sequence (overlong encoded) - * [ 11110000 10000000 10000001 10111111 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xF0, 0x80, 0x81, 0xBF }, - .len = 4, + * [ 11110000 10000000 10000001 10111111 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xF0, 0x80, 0x81, 0xBF }, + .len = 4, .exp_len = 4, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, { /* invalid 4-byte sequence (UTF-16-unrepresentable) - * [ 11110100 10010000 10000000 10000000 ] -> - * INVALID - */ - .arr = (char *)(unsigned char[]){ 0xF4, 0x90, 0x80, 0x80 }, - .len = 4, + * [ 11110100 10010000 10000000 10000000 ] -> + * INVALID + */ + .arr = (char *)(unsigned char[]) { 0xF4, 0x90, 0x80, 0x80 }, + .len = 4, .exp_len = 4, - .exp_cp = GRAPHEME_INVALID_CODEPOINT, + .exp_cp = GRAPHEME_INVALID_CODEPOINT, }, }; @@ -298,12 +296,12 @@ main(int argc, char *argv[]) size_t len; uint_least32_t cp; - len = grapheme_decode_utf8(dec_test[i].arr, - dec_test[i].len, &cp); + len = grapheme_decode_utf8(dec_test[i].arr, dec_test[i].len, + &cp); - if (len != dec_test[i].exp_len || - cp != dec_test[i].exp_cp) { - fprintf(stderr, "%s: Failed test %zu: " + if (len != dec_test[i].exp_len || cp != dec_test[i].exp_cp) { + fprintf(stderr, + "%s: Failed test %zu: " "Expected (%zx,%u), but got (%zx,%u).\n", argv[0], i, dec_test[i].exp_len, dec_test[i].exp_cp, len, cp); diff --git a/test/utf8-encode.c b/test/utf8-encode.c @@ -8,44 +8,44 @@ #include "util.h" static const struct { - uint_least32_t cp; /* input codepoint */ - char *exp_arr; /* expected UTF-8 byte sequence */ - size_t exp_len; /* expected length of UTF-8 sequence */ + uint_least32_t cp; /* input codepoint */ + char *exp_arr; /* expected UTF-8 byte sequence */ + size_t exp_len; /* expected length of UTF-8 sequence */ } enc_test[] = { { /* invalid codepoint (UTF-16 surrogate half) */ - .cp = UINT32_C(0xD800), - .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD }, + .cp = UINT32_C(0xD800), + .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD }, .exp_len = 3, }, { /* invalid codepoint (UTF-16-unrepresentable) */ - .cp = UINT32_C(0x110000), - .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD }, + .cp = UINT32_C(0x110000), + .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD }, .exp_len = 3, }, { /* codepoint encoded to a 1-byte sequence */ - .cp = 0x01, - .exp_arr = (char *)(unsigned char[]){ 0x01 }, + .cp = 0x01, + .exp_arr = (char *)(unsigned char[]) { 0x01 }, .exp_len = 1, }, { /* codepoint encoded to a 2-byte sequence */ - .cp = 0xFF, - .exp_arr = (char *)(unsigned char[]){ 0xC3, 0xBF }, + .cp = 0xFF, + .exp_arr = (char *)(unsigned char[]) { 0xC3, 0xBF }, .exp_len = 2, }, { /* codepoint encoded to a 3-byte sequence */ - .cp = 0xFFF, - .exp_arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF }, + .cp = 0xFFF, + .exp_arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF }, .exp_len = 3, }, { /* codepoint encoded to a 4-byte sequence */ - .cp = UINT32_C(0xFFFFF), - .exp_arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF }, + .cp = UINT32_C(0xFFFFF), + .exp_arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF }, .exp_len = 4, }, }; @@ -66,11 +66,12 @@ main(int argc, char *argv[]) if (len != enc_test[i].exp_len || memcmp(arr, enc_test[i].exp_arr, len)) { - fprintf(stderr, "%s, Failed test %zu: " - "Expected (", argv[0], i); + fprintf(stderr, + "%s, Failed test %zu: " + "Expected (", + argv[0], i); for (j = 0; j < enc_test[i].exp_len; j++) { - fprintf(stderr, "0x%x", - enc_test[i].exp_arr[j]); + fprintf(stderr, "0x%x", enc_test[i].exp_arr[j]); if (j + 1 < enc_test[i].exp_len) { fprintf(stderr, " "); } diff --git a/test/util.c b/test/util.c @@ -5,13 +5,14 @@ #include <stdio.h> #include <string.h> -#include "../grapheme.h" #include "../gen/types.h" +#include "../grapheme.h" #include "util.h" int run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t), - const struct break_test *test, size_t testlen, const char *argv0) + const struct break_test *test, size_t testlen, + const char *argv0) { size_t i, j, off, res, failed; @@ -21,11 +22,14 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t), res = next_break(test[i].cp + off, test[i].cplen - off); /* check if our resulting offset matches */ - if (j == test[i].lenlen || - res != test[i].len[j++]) { - fprintf(stderr, "%s: Failed conformance test %zu \"%s\".\n", + if (j == test[i].lenlen || res != test[i].len[j++]) { + fprintf(stderr, + "%s: Failed conformance test %zu " + "\"%s\".\n", argv0, i, test[i].descr); - fprintf(stderr, "J=%zu: EXPECTED len %zu, got %zu\n", j-1, test[i].len[j-1], res); + fprintf(stderr, + "J=%zu: EXPECTED len %zu, got %zu\n", + j - 1, test[i].len[j - 1], res); failed++; break; } @@ -39,13 +43,15 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t), int run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *, - const char *), const void *test, size_t testlen, const char *name, + const char *), + const void *test, size_t testlen, const char *name, const char *argv0) { size_t i, failed; for (i = 0, failed = 0; i < testlen; i++) { - failed += (unit_test_callback(test, i, name, argv0) == 0) ? 0 : 1; + failed += + (unit_test_callback(test, i, name, argv0) == 0) ? 0 : 1; } printf("%s: %s: %zu/%zu unit tests passed.\n", argv0, name, @@ -56,8 +62,9 @@ run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *, int unit_test_callback_next_break(const struct unit_test_next_break *t, size_t off, - size_t (*next_break)(const uint_least32_t *, size_t), - const char *name, const char *argv0) + size_t (*next_break)(const uint_least32_t *, + size_t), + const char *name, const char *argv0) { const struct unit_test_next_break *test = t + off; @@ -69,16 +76,18 @@ unit_test_callback_next_break(const struct unit_test_next_break *t, size_t off, return 0; err: - fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" " - "(returned %zu instead of %zu).\n", argv0, - name, off, test->description, ret, test->output.ret); + fprintf(stderr, + "%s: %s: Failed unit test %zu \"%s\" " + "(returned %zu instead of %zu).\n", + argv0, name, off, test->description, ret, test->output.ret); return 1; } int unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *t, size_t off, - size_t (*next_break_utf8)(const char *, size_t), + size_t (*next_break_utf8)(const char *, + size_t), const char *name, const char *argv0) { const struct unit_test_next_break_utf8 *test = t + off; @@ -91,8 +100,9 @@ unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *t, return 0; err: - fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" " - "(returned %zu instead of %zu).\n", argv0, - name, off, test->description, ret, test->output.ret); + fprintf(stderr, + "%s: %s: Failed unit test %zu \"%s\" " + "(returned %zu instead of %zu).\n", + argv0, name, off, test->description, ret, test->output.ret); return 1; } diff --git a/test/util.h b/test/util.h @@ -6,16 +6,18 @@ #include "../grapheme.h" #undef MIN -#define MIN(x,y) ((x) < (y) ? (x) : (y)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) #undef LEN #define LEN(x) (sizeof(x) / sizeof(*(x))) struct unit_test_next_break { const char *description; + struct { const uint_least32_t *src; size_t srclen; } input; + struct { size_t ret; } output; @@ -23,10 +25,12 @@ struct unit_test_next_break { struct unit_test_next_break_utf8 { const char *description; + struct { const char *src; size_t srclen; } input; + struct { size_t ret; } output; @@ -36,14 +40,17 @@ int run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t), const struct break_test *test, size_t testlen, const char *); int run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *, - const char *), const void *, size_t, const char *, const char *); + const char *), + const void *, size_t, const char *, const char *); int unit_test_callback_next_break(const struct unit_test_next_break *, size_t, - size_t (*next_break)(const uint_least32_t *, size_t), + size_t (*next_break)(const uint_least32_t *, + size_t), const char *, const char *); int unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *, size_t, - size_t (*next_break_utf8)(const char *, size_t), + size_t (*next_break_utf8)(const char *, + size_t), const char *, const char *); #endif /* UTIL_H */ diff --git a/test/word.c b/test/word.c @@ -91,23 +91,19 @@ static const struct unit_test_next_break_utf8 next_word_break_utf8[] = { }; static int -unit_test_callback_next_word_break(const void *t, size_t off, - const char *name, - const char *argv0) +unit_test_callback_next_word_break(const void *t, size_t off, const char *name, + const char *argv0) { - return unit_test_callback_next_break(t, off, - grapheme_next_word_break, + return unit_test_callback_next_break(t, off, grapheme_next_word_break, name, argv0); } static int unit_test_callback_next_word_break_utf8(const void *t, size_t off, - const char *name, - const char *argv0) + const char *name, const char *argv0) { - return unit_test_callback_next_break_utf8(t, off, - grapheme_next_word_break_utf8, - name, argv0); + return unit_test_callback_next_break_utf8( + t, off, grapheme_next_word_break_utf8, name, argv0); } int