libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

commit e63bcc42010176b300feea6a7412f814a6cc4191
parent 5332f7ee034081618617c2b0785733ccc9ec8753
Author: Laslo Hunhold <dev@frign.de>
Date:   Wed, 21 Sep 2022 20:18:12 +0200

Add case-conversion-unit-tests

To give even more assurance and catch any possible future regressions,
exhaustive unit tests are added for the case-conversion functions.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
MMakefile | 3+++
Atest/case.c | 329+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/util.c | 21+++++++++++++++++++--
Mtest/util.h | 5+++++
4 files changed, 356 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile @@ -53,6 +53,7 @@ SRC =\ src/word\ TEST =\ + test/case\ test/character\ test/line\ test/sentence\ @@ -160,6 +161,7 @@ src/sentence.o: src/sentence.c config.mk gen/sentence.h grapheme.h src/util.h src/utf8.o: src/utf8.c config.mk grapheme.h src/util.o: src/util.c config.mk gen/types.h grapheme.h src/util.h src/word.o: src/word.c config.mk gen/word.h grapheme.h src/util.h +test/case.o: test/case.c config.mk grapheme.h test/util.h test/character.o: test/character.c config.mk gen/character-test.h grapheme.h test/util.h test/line.o: test/line.c config.mk gen/line-test.h grapheme.h test/util.h test/sentence.o: test/sentence.c config.mk gen/sentence-test.h grapheme.h test/util.h @@ -183,6 +185,7 @@ gen/sentence: gen/sentence.o gen/util.o gen/sentence-test: gen/sentence-test.o gen/util.o gen/word: gen/word.o gen/util.o gen/word-test: gen/word-test.o gen/util.o +test/case: test/case.o test/util.o libgrapheme.a test/character: test/character.o test/util.o libgrapheme.a test/line: test/line.o test/util.o libgrapheme.a test/sentence: test/sentence.o test/util.o libgrapheme.a diff --git a/test/case.c b/test/case.c @@ -0,0 +1,329 @@ +/* See LICENSE file for copyright and license details. */ +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include "../grapheme.h" +#include "util.h" + +struct unit_test_to_case_utf8 { + const char *description; + struct { + const char *src; + size_t srclen; + size_t destlen; + } input; + struct { + const char *dest; + size_t ret; + } output; +}; + +struct unit_test_to_case_utf8 lowercase_utf8[] = { + { + .description = "empty input", + .input = { "", 0, 10 }, + .output = { "", 0 }, + }, + { + .description = "empty output", + .input = { "hello", 5, 0 }, + .output = { "", 5 }, + }, + { + .description = "one character, conversion", + .input = { "A", 1, 10 }, + .output = { "a", 1 }, + }, + { + .description = "one character, no conversion", + .input = { "a", 1, 10 }, + .output = { "a", 1 }, + }, + { + .description = "one character, conversion, truncation", + .input = { "A", 1, 0 }, + .output = { "", 1 }, + }, + { + .description = "one character, conversion, NUL-terminated", + .input = { "A", SIZE_MAX, 10 }, + .output = { "a", 1 }, + }, + { + .description = "one character, no conversion, NUL-terminated", + .input = { "a", SIZE_MAX, 10 }, + .output = { "a", 1 }, + }, + { + .description = "one character, conversion, NUL-terminated, truncation", + .input = { "A", SIZE_MAX, 0 }, + .output = { "", 1 }, + }, + { + .description = "one word, conversion", + .input = { "wOrD", 4, 10 }, + .output = { "word", 4 }, + }, + { + .description = "one word, no conversion", + .input = { "word", 4, 10 }, + .output = { "word", 4 }, + }, + { + .description = "one word, conversion, truncation", + .input = { "wOrD", 4, 3 }, + .output = { "wo", 4 }, + }, + { + .description = "one word, conversion, NUL-terminated", + .input = { "wOrD", SIZE_MAX, 10 }, + .output = { "word", 4 }, + }, + { + .description = "one word, no conversion, NUL-terminated", + .input = { "word", SIZE_MAX, 10 }, + .output = { "word", 4 }, + }, + { + .description = "one word, conversion, NUL-terminated, truncation", + .input = { "wOrD", SIZE_MAX, 3 }, + .output = { "wo", 4 }, + }, +}; + +struct unit_test_to_case_utf8 uppercase_utf8[] = { + { + .description = "empty input", + .input = { "", 0, 10 }, + .output = { "", 0 }, + }, + { + .description = "empty output", + .input = { "hello", 5, 0 }, + .output = { "", 5 }, + }, + { + .description = "one character, conversion", + .input = { "a", 1, 10 }, + .output = { "A", 1 }, + }, + { + .description = "one character, no conversion", + .input = { "A", 1, 10 }, + .output = { "A", 1 }, + }, + { + .description = "one character, conversion, truncation", + .input = { "a", 1, 0 }, + .output = { "", 1 }, + }, + { + .description = "one character, conversion, NUL-terminated", + .input = { "a", SIZE_MAX, 10 }, + .output = { "A", 1 }, + }, + { + .description = "one character, no conversion, NUL-terminated", + .input = { "A", SIZE_MAX, 10 }, + .output = { "A", 1 }, + }, + { + .description = "one character, conversion, NUL-terminated, truncation", + .input = { "a", SIZE_MAX, 0 }, + .output = { "", 1 }, + }, + { + .description = "one word, conversion", + .input = { "wOrD", 4, 10 }, + .output = { "WORD", 4 }, + }, + { + .description = "one word, no conversion", + .input = { "WORD", 4, 10 }, + .output = { "WORD", 4 }, + }, + { + .description = "one word, conversion, truncation", + .input = { "wOrD", 4, 3 }, + .output = { "WO", 4 }, + }, + { + .description = "one word, conversion, NUL-terminated", + .input = { "wOrD", SIZE_MAX, 10 }, + .output = { "WORD", 4 }, + }, + { + .description = "one word, no conversion, NUL-terminated", + .input = { "WORD", SIZE_MAX, 10 }, + .output = { "WORD", 4 }, + }, + { + .description = "one word, conversion, NUL-terminated, truncation", + .input = { "wOrD", SIZE_MAX, 3 }, + .output = { "WO", 4 }, + }, +}; + +struct unit_test_to_case_utf8 titlecase_utf8[] = { + { + .description = "empty input", + .input = { "", 0, 10 }, + .output = { "", 0 }, + }, + { + .description = "empty output", + .input = { "hello", 5, 0 }, + .output = { "", 5 }, + }, + { + .description = "one character, conversion", + .input = { "a", 1, 10 }, + .output = { "A", 1 }, + }, + { + .description = "one character, no conversion", + .input = { "A", 1, 10 }, + .output = { "A", 1 }, + }, + { + .description = "one character, conversion, truncation", + .input = { "a", 1, 0 }, + .output = { "", 1 }, + }, + { + .description = "one character, conversion, NUL-terminated", + .input = { "a", SIZE_MAX, 10 }, + .output = { "A", 1 }, + }, + { + .description = "one character, no conversion, NUL-terminated", + .input = { "A", SIZE_MAX, 10 }, + .output = { "A", 1 }, + }, + { + .description = "one character, conversion, NUL-terminated, truncation", + .input = { "a", SIZE_MAX, 0 }, + .output = { "", 1 }, + }, + { + .description = "one word, conversion", + .input = { "heLlo", 5, 10 }, + .output = { "Hello", 5 }, + }, + { + .description = "one word, no conversion", + .input = { "Hello", 5, 10 }, + .output = { "Hello", 5 }, + }, + { + .description = "one word, conversion, truncation", + .input = { "heLlo", 5, 2 }, + .output = { "H", 5 }, + }, + { + .description = "one word, conversion, NUL-terminated", + .input = { "heLlo", SIZE_MAX, 10 }, + .output = { "Hello", 5 }, + }, + { + .description = "one word, no conversion, NUL-terminated", + .input = { "Hello", SIZE_MAX, 10 }, + .output = { "Hello", 5 }, + }, + { + .description = "one word, conversion, NUL-terminated, truncation", + .input = { "heLlo", SIZE_MAX, 3 }, + .output = { "He", 5 }, + }, + { + .description = "two words, conversion", + .input = { "heLlo wORLd!", 12, 20 }, + .output = { "Hello World!", 12 }, + }, + { + .description = "two words, no conversion", + .input = { "Hello World!", 12, 20 }, + .output = { "Hello World!", 12 }, + }, + { + .description = "two words, conversion, truncation", + .input = { "heLlo wORLd!", 12, 8 }, + .output = { "Hello W", 12 }, + }, + { + .description = "two words, conversion, NUL-terminated", + .input = { "heLlo wORLd!", SIZE_MAX, 20 }, + .output = { "Hello World!", 12 }, + }, + { + .description = "two words, no conversion, NUL-terminated", + .input = { "Hello World!", SIZE_MAX, 20 }, + .output = { "Hello World!", 12 }, + }, + { + .description = "two words, conversion, NUL-terminated, truncation", + .input = { "heLlo wORLd!", SIZE_MAX, 4 }, + .output = { "Hel", 12 }, + }, +}; + +static int +unit_test_callback_to_case_utf8(void *t, size_t off, const char *name, const char *argv0) +{ + struct unit_test_to_case_utf8 *test = (struct unit_test_to_case_utf8 *)t + off; + size_t ret = 0, i; + char buf[512]; + + /* fill the array with canary values */ + memset(buf, 0x7f, LEN(buf)); + + if (t == lowercase_utf8) { + ret = grapheme_to_lowercase_utf8(test->input.src, test->input.srclen, + buf, test->input.destlen); + } else if (t == uppercase_utf8) { + ret = grapheme_to_uppercase_utf8(test->input.src, test->input.srclen, + buf, test->input.destlen); + } else if (t == titlecase_utf8) { + ret = grapheme_to_titlecase_utf8(test->input.src, test->input.srclen, + buf, test->input.destlen); + } else { + goto err; + } + + /* check results */ + if (ret != test->output.ret || + memcmp(buf, test->output.dest, MIN(test->input.destlen, test->output.ret))) { + goto err; + } + + /* check that none of the canary values have been overwritten */ + for (i = test->input.destlen; i < LEN(buf); i++) { + if (buf[i] != 0x7f) { +fprintf(stderr, "REEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE\n"); + goto err; + } + } + + return 0; +err: + fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" " + "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n", argv0, + name, off, test->description, (int)ret, buf, ret, + (int)test->output.ret, test->output.dest, test->output.ret); + return 1; +} + +int +main(int argc, char *argv[]) +{ + (void)argc; + + return run_unit_tests(unit_test_callback_to_case_utf8, lowercase_utf8, + LEN(lowercase_utf8), "grapheme_to_lowercase_utf8", argv[0]) + + run_unit_tests(unit_test_callback_to_case_utf8, uppercase_utf8, + LEN(uppercase_utf8), "grapheme_to_uppercase_utf8", argv[0]) + + run_unit_tests(unit_test_callback_to_case_utf8, titlecase_utf8, + LEN(titlecase_utf8), "grapheme_to_titlecase_utf8", argv[0]); +} diff --git a/test/util.c b/test/util.c @@ -23,7 +23,7 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t), /* check if our resulting offset matches */ if (j == test[i].lenlen || res != test[i].len[j++]) { - fprintf(stderr, "%s: Failed test %zu \"%s\".\n", + fprintf(stderr, "%s: Failed conformance test %zu \"%s\".\n", argv0, i, test[i].descr); fprintf(stderr, "J=%zu: EXPECTED len %zu, got %zu\n", j-1, test[i].len[j-1], res); failed++; @@ -31,7 +31,24 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t), } } } - printf("%s: %zu/%zu tests passed.\n", argv0, + printf("%s: %zu/%zu conformance tests passed.\n", argv0, + testlen - failed, testlen); + + return (failed > 0) ? 1 : 0; +} + +int +run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *, + const char *), void *test, size_t testlen, const char *name, + const char *argv0) +{ + size_t i, failed; + + for (i = 0, failed = 0; i < testlen; i++) { + failed += (unit_test_callback(test, i, name, argv0) == 0) ? 0 : 1; + } + + printf("%s: %s: %zu/%zu unit tests passed.\n", argv0, name, testlen - failed, testlen); return (failed > 0) ? 1 : 0; diff --git a/test/util.h b/test/util.h @@ -5,10 +5,15 @@ #include "../gen/types.h" #include "../grapheme.h" +#undef MIN +#define MIN(x,y) ((x) < (y) ? (x) : (y)) +#undef LEN #define LEN(x) (sizeof(x) / sizeof(*(x))) int run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t), const struct break_test *test, size_t testlen, const char *); +int run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *, + const char *), void *, size_t, const char *, const char *); #endif /* UTIL_H */