libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

case.c (16477B)


      1/* See LICENSE file for copyright and license details. */
      2#include <stdbool.h>
      3#include <stdint.h>
      4#include <stdio.h>
      5#include <string.h>
      6
      7#include "../grapheme.h"
      8#include "util.h"
      9
     10struct unit_test_is_case_utf8 {
     11	const char *description;
     12
     13	struct {
     14		const char *src;
     15		size_t srclen;
     16	} input;
     17
     18	struct {
     19		bool ret;
     20		size_t caselen;
     21	} output;
     22};
     23
     24struct unit_test_to_case_utf8 {
     25	const char *description;
     26
     27	struct {
     28		const char *src;
     29		size_t srclen;
     30		size_t destlen;
     31	} input;
     32
     33	struct {
     34		const char *dest;
     35		size_t ret;
     36	} output;
     37};
     38
     39static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = {
     40	{
     41		.description = "empty input",
     42		.input = { "", 0 },
     43		.output = { true, 0 },
     44	},
     45	{
     46		.description = "one character, violation",
     47		.input = { "A", 1 },
     48		.output = { false, 0 },
     49	},
     50	{
     51		.description = "one character, confirmation",
     52		.input = { "\xC3\x9F", 2 },
     53		.output = { true, 2 },
     54	},
     55	{
     56		.description = "one character, violation, NUL-terminated",
     57		.input = { "A", SIZE_MAX },
     58		.output = { false, 0 },
     59	},
     60	{
     61		.description = "one character, confirmation, NUL-terminated",
     62		.input = { "\xC3\x9F", SIZE_MAX },
     63		.output = { true, 2 },
     64	},
     65	{
     66		.description = "one word, violation",
     67		.input = { "Hello", 5 },
     68		.output = { false, 0 },
     69	},
     70	{
     71		.description = "one word, partial confirmation",
     72		.input = { "gru"
     73	                   "\xC3\x9F"
     74	                   "fOrmel",
     75	                   11 },
     76		.output = { false, 6 },
     77	},
     78	{
     79		.description = "one word, full confirmation",
     80		.input = { "gru"
     81	                   "\xC3\x9F"
     82	                   "formel",
     83	                   11 },
     84		.output = { true, 11 },
     85	},
     86	{
     87		.description = "one word, violation, NUL-terminated",
     88		.input = { "Hello", SIZE_MAX },
     89		.output = { false, 0 },
     90	},
     91	{
     92		.description = "one word, partial confirmation, NUL-terminated",
     93		.input = { "gru"
     94	                   "\xC3\x9F"
     95	                   "fOrmel",
     96	                   SIZE_MAX },
     97		.output = { false, 6 },
     98	},
     99	{
    100		.description = "one word, full confirmation, NUL-terminated",
    101		.input = { "gru"
    102	                   "\xC3\x9F"
    103	                   "formel",
    104	                   SIZE_MAX },
    105		.output = { true, 11 },
    106	},
    107};
    108
    109static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = {
    110	{
    111		.description = "empty input",
    112		.input = { "", 0 },
    113		.output = { true, 0 },
    114	},
    115	{
    116		.description = "one character, violation",
    117		.input = { "\xC3\x9F", 2 },
    118		.output = { false, 0 },
    119	},
    120	{
    121		.description = "one character, confirmation",
    122		.input = { "A", 1 },
    123		.output = { true, 1 },
    124	},
    125	{
    126		.description = "one character, violation, NUL-terminated",
    127		.input = { "\xC3\x9F", SIZE_MAX },
    128		.output = { false, 0 },
    129	},
    130	{
    131		.description = "one character, confirmation, NUL-terminated",
    132		.input = { "A", SIZE_MAX },
    133		.output = { true, 1 },
    134	},
    135	{
    136		.description = "one word, violation",
    137		.input = { "hello", 5 },
    138		.output = { false, 0 },
    139	},
    140	{
    141		.description = "one word, partial confirmation",
    142		.input = { "GRU"
    143	                   "\xC3\x9F"
    144	                   "formel",
    145	                   11 },
    146		.output = { false, 3 },
    147	},
    148	{
    149		.description = "one word, full confirmation",
    150		.input = { "HELLO", 5 },
    151		.output = { true, 5 },
    152	},
    153	{
    154		.description = "one word, violation, NUL-terminated",
    155		.input = { "hello", SIZE_MAX },
    156		.output = { false, 0 },
    157	},
    158	{
    159		.description = "one word, partial confirmation, NUL-terminated",
    160		.input = { "GRU"
    161	                   "\xC3\x9F"
    162	                   "formel",
    163	                   SIZE_MAX },
    164		.output = { false, 3 },
    165	},
    166	{
    167		.description = "one word, full confirmation, NUL-terminated",
    168		.input = { "HELLO", SIZE_MAX },
    169		.output = { true, 5 },
    170	},
    171};
    172
    173static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = {
    174	{
    175		.description = "empty input",
    176		.input = { "", 0 },
    177		.output = { true, 0 },
    178	},
    179	{
    180		.description = "one character, violation",
    181		.input = { "\xC3\x9F", 2 },
    182		.output = { false, 0 },
    183	},
    184	{
    185		.description = "one character, confirmation",
    186		.input = { "A", 1 },
    187		.output = { true, 1 },
    188	},
    189	{
    190		.description = "one character, violation, NUL-terminated",
    191		.input = { "\xC3\x9F", SIZE_MAX },
    192		.output = { false, 0 },
    193	},
    194	{
    195		.description = "one character, confirmation, NUL-terminated",
    196		.input = { "A", SIZE_MAX },
    197		.output = { true, 1 },
    198	},
    199	{
    200		.description = "one word, violation",
    201		.input = { "hello", 5 },
    202		.output = { false, 0 },
    203	},
    204	{
    205		.description = "one word, partial confirmation",
    206		.input = { "Gru"
    207	                   "\xC3\x9F"
    208	                   "fOrmel",
    209	                   11 },
    210		.output = { false, 6 },
    211	},
    212	{
    213		.description = "one word, full confirmation",
    214		.input = { "Gru"
    215	                   "\xC3\x9F"
    216	                   "formel",
    217	                   11 },
    218		.output = { true, 11 },
    219	},
    220	{
    221		.description = "one word, violation, NUL-terminated",
    222		.input = { "hello", SIZE_MAX },
    223		.output = { false, 0 },
    224	},
    225	{
    226		.description = "one word, partial confirmation, NUL-terminated",
    227		.input = { "Gru"
    228	                   "\xC3\x9F"
    229	                   "fOrmel",
    230	                   SIZE_MAX },
    231		.output = { false, 6 },
    232	},
    233	{
    234		.description = "one word, full confirmation, NUL-terminated",
    235		.input = { "Gru"
    236	                   "\xC3\x9F"
    237	                   "formel",
    238	                   SIZE_MAX },
    239		.output = { true, 11 },
    240	},
    241	{
    242		.description = "multiple words, partial confirmation",
    243		.input = { "Hello Gru"
    244	                   "\xC3\x9F"
    245	                   "fOrmel!",
    246	                   18 },
    247		.output = { false, 12 },
    248	},
    249	{
    250		.description = "multiple words, full confirmation",
    251		.input = { "Hello Gru"
    252	                   "\xC3\x9F"
    253	                   "formel!",
    254	                   18 },
    255		.output = { true, 18 },
    256	},
    257	{
    258		.description =
    259			"multiple words, partial confirmation, NUL-terminated",
    260		.input = { "Hello Gru"
    261	                   "\xC3\x9F"
    262	                   "fOrmel!",
    263	                   SIZE_MAX },
    264		.output = { false, 12 },
    265	},
    266	{
    267		.description =
    268			"multiple words, full confirmation, NUL-terminated",
    269		.input = { "Hello Gru"
    270	                   "\xC3\x9F"
    271	                   "formel!",
    272	                   SIZE_MAX },
    273		.output = { true, 18 },
    274	},
    275};
    276
    277static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = {
    278	{
    279		.description = "empty input",
    280		.input = { "", 0, 10 },
    281		.output = { "", 0 },
    282	},
    283	{
    284		.description = "empty output",
    285		.input = { "hello", 5, 0 },
    286		.output = { "", 5 },
    287	},
    288	{
    289		.description = "one character, conversion",
    290		.input = { "A", 1, 10 },
    291		.output = { "a", 1 },
    292	},
    293	{
    294		.description = "one character, no conversion",
    295		.input = { "\xC3\x9F", 2, 10 },
    296		.output = { "\xC3\x9F", 2 },
    297	},
    298	{
    299		.description = "one character, conversion, truncation",
    300		.input = { "A", 1, 0 },
    301		.output = { "", 1 },
    302	},
    303	{
    304		.description = "one character, conversion, NUL-terminated",
    305		.input = { "A", SIZE_MAX, 10 },
    306		.output = { "a", 1 },
    307	},
    308	{
    309		.description = "one character, no conversion, NUL-terminated",
    310		.input = { "\xC3\x9F", SIZE_MAX, 10 },
    311		.output = { "\xC3\x9F", 2 },
    312	},
    313	{
    314		.description =
    315			"one character, conversion, NUL-terminated, truncation",
    316		.input = { "A", SIZE_MAX, 0 },
    317		.output = { "", 1 },
    318	},
    319	{
    320		.description = "one word, conversion",
    321		.input = { "wOrD", 4, 10 },
    322		.output = { "word", 4 },
    323	},
    324	{
    325		.description = "one word, no conversion",
    326		.input = { "word", 4, 10 },
    327		.output = { "word", 4 },
    328	},
    329	{
    330		.description = "one word, conversion, truncation",
    331		.input = { "wOrD", 4, 3 },
    332		.output = { "wo", 4 },
    333	},
    334	{
    335		.description = "one word, conversion, NUL-terminated",
    336		.input = { "wOrD", SIZE_MAX, 10 },
    337		.output = { "word", 4 },
    338	},
    339	{
    340		.description = "one word, no conversion, NUL-terminated",
    341		.input = { "word", SIZE_MAX, 10 },
    342		.output = { "word", 4 },
    343	},
    344	{
    345		.description =
    346			"one word, conversion, NUL-terminated, truncation",
    347		.input = { "wOrD", SIZE_MAX, 3 },
    348		.output = { "wo", 4 },
    349	},
    350};
    351
    352static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = {
    353	{
    354		.description = "empty input",
    355		.input = { "", 0, 10 },
    356		.output = { "", 0 },
    357	},
    358	{
    359		.description = "empty output",
    360		.input = { "hello", 5, 0 },
    361		.output = { "", 5 },
    362	},
    363	{
    364		.description = "one character, conversion",
    365		.input = { "\xC3\x9F", 2, 10 },
    366		.output = { "SS", 2 },
    367	},
    368	{
    369		.description = "one character, no conversion",
    370		.input = { "A", 1, 10 },
    371		.output = { "A", 1 },
    372	},
    373	{
    374		.description = "one character, conversion, truncation",
    375		.input = { "\xC3\x9F", 2, 0 },
    376		.output = { "", 2 },
    377	},
    378	{
    379		.description = "one character, conversion, NUL-terminated",
    380		.input = { "\xC3\x9F", SIZE_MAX, 10 },
    381		.output = { "SS", 2 },
    382	},
    383	{
    384		.description = "one character, no conversion, NUL-terminated",
    385		.input = { "A", SIZE_MAX, 10 },
    386		.output = { "A", 1 },
    387	},
    388	{
    389		.description =
    390			"one character, conversion, NUL-terminated, truncation",
    391		.input = { "\xC3\x9F", SIZE_MAX, 0 },
    392		.output = { "", 2 },
    393	},
    394	{
    395		.description = "one word, conversion",
    396		.input = { "gRu"
    397	                   "\xC3\x9F"
    398	                   "fOrMel",
    399	                   11, 15 },
    400		.output = { "GRUSSFORMEL", 11 },
    401	},
    402	{
    403		.description = "one word, no conversion",
    404		.input = { "WORD", 4, 10 },
    405		.output = { "WORD", 4 },
    406	},
    407	{
    408		.description = "one word, conversion, truncation",
    409		.input = { "gRu"
    410	                   "\xC3\x9F"
    411	                   "formel",
    412	                   11, 5 },
    413		.output = { "GRUS", 11 },
    414	},
    415	{
    416		.description = "one word, conversion, NUL-terminated",
    417		.input = { "gRu"
    418	                   "\xC3\x9F"
    419	                   "formel",
    420	                   SIZE_MAX, 15 },
    421		.output = { "GRUSSFORMEL", 11 },
    422	},
    423	{
    424		.description = "one word, no conversion, NUL-terminated",
    425		.input = { "WORD", SIZE_MAX, 10 },
    426		.output = { "WORD", 4 },
    427	},
    428	{
    429		.description =
    430			"one word, conversion, NUL-terminated, truncation",
    431		.input = { "gRu"
    432	                   "\xC3\x9F"
    433	                   "formel",
    434	                   SIZE_MAX, 5 },
    435		.output = { "GRUS", 11 },
    436	},
    437};
    438
    439static const struct unit_test_to_case_utf8 to_titlecase_utf8[] = {
    440	{
    441		.description = "empty input",
    442		.input = { "", 0, 10 },
    443		.output = { "", 0 },
    444	},
    445	{
    446		.description = "empty output",
    447		.input = { "hello", 5, 0 },
    448		.output = { "", 5 },
    449	},
    450	{
    451		.description = "one character, conversion",
    452		.input = { "a", 1, 10 },
    453		.output = { "A", 1 },
    454	},
    455	{
    456		.description = "one character, no conversion",
    457		.input = { "A", 1, 10 },
    458		.output = { "A", 1 },
    459	},
    460	{
    461		.description = "one character, conversion, truncation",
    462		.input = { "a", 1, 0 },
    463		.output = { "", 1 },
    464	},
    465	{
    466		.description = "one character, conversion, NUL-terminated",
    467		.input = { "a", SIZE_MAX, 10 },
    468		.output = { "A", 1 },
    469	},
    470	{
    471		.description = "one character, no conversion, NUL-terminated",
    472		.input = { "A", SIZE_MAX, 10 },
    473		.output = { "A", 1 },
    474	},
    475	{
    476		.description =
    477			"one character, conversion, NUL-terminated, truncation",
    478		.input = { "a", SIZE_MAX, 0 },
    479		.output = { "", 1 },
    480	},
    481	{
    482		.description = "one word, conversion",
    483		.input = { "heLlo", 5, 10 },
    484		.output = { "Hello", 5 },
    485	},
    486	{
    487		.description = "one word, no conversion",
    488		.input = { "Hello", 5, 10 },
    489		.output = { "Hello", 5 },
    490	},
    491	{
    492		.description = "one word, conversion, truncation",
    493		.input = { "heLlo", 5, 2 },
    494		.output = { "H", 5 },
    495	},
    496	{
    497		.description = "one word, conversion, NUL-terminated",
    498		.input = { "heLlo", SIZE_MAX, 10 },
    499		.output = { "Hello", 5 },
    500	},
    501	{
    502		.description = "one word, no conversion, NUL-terminated",
    503		.input = { "Hello", SIZE_MAX, 10 },
    504		.output = { "Hello", 5 },
    505	},
    506	{
    507		.description =
    508			"one word, conversion, NUL-terminated, truncation",
    509		.input = { "heLlo", SIZE_MAX, 3 },
    510		.output = { "He", 5 },
    511	},
    512	{
    513		.description = "two words, conversion",
    514		.input = { "heLlo wORLd!", 12, 20 },
    515		.output = { "Hello World!", 12 },
    516	},
    517	{
    518		.description = "two words, no conversion",
    519		.input = { "Hello World!", 12, 20 },
    520		.output = { "Hello World!", 12 },
    521	},
    522	{
    523		.description = "two words, conversion, truncation",
    524		.input = { "heLlo wORLd!", 12, 8 },
    525		.output = { "Hello W", 12 },
    526	},
    527	{
    528		.description = "two words, conversion, NUL-terminated",
    529		.input = { "heLlo wORLd!", SIZE_MAX, 20 },
    530		.output = { "Hello World!", 12 },
    531	},
    532	{
    533		.description = "two words, no conversion, NUL-terminated",
    534		.input = { "Hello World!", SIZE_MAX, 20 },
    535		.output = { "Hello World!", 12 },
    536	},
    537	{
    538		.description =
    539			"two words, conversion, NUL-terminated, truncation",
    540		.input = { "heLlo wORLd!", SIZE_MAX, 4 },
    541		.output = { "Hel", 12 },
    542	},
    543};
    544
    545static int
    546unit_test_callback_is_case_utf8(const void *t, size_t off, const char *name,
    547                                const char *argv0)
    548{
    549	const struct unit_test_is_case_utf8 *test =
    550		(const struct unit_test_is_case_utf8 *)t + off;
    551	bool ret = false;
    552	size_t caselen = 0x7f;
    553
    554	if (t == is_lowercase_utf8) {
    555		ret = grapheme_is_lowercase_utf8(test->input.src,
    556		                                 test->input.srclen, &caselen);
    557	} else if (t == is_uppercase_utf8) {
    558		ret = grapheme_is_uppercase_utf8(test->input.src,
    559		                                 test->input.srclen, &caselen);
    560	} else if (t == is_titlecase_utf8) {
    561		ret = grapheme_is_titlecase_utf8(test->input.src,
    562		                                 test->input.srclen, &caselen);
    563
    564	} else {
    565		goto err;
    566	}
    567
    568	/* check results */
    569	if (ret != test->output.ret || caselen != test->output.caselen) {
    570		goto err;
    571	}
    572
    573	return 0;
    574err:
    575	fprintf(stderr,
    576	        "%s: %s: Failed unit test %zu \"%s\" "
    577	        "(returned (%s, %zu) instead of (%s, %zu)).\n",
    578	        argv0, name, off, test->description, ret ? "true" : "false",
    579	        caselen, test->output.ret ? "true" : "false",
    580	        test->output.caselen);
    581	return 1;
    582}
    583
    584static int
    585unit_test_callback_to_case_utf8(const void *t, size_t off, const char *name,
    586                                const char *argv0)
    587{
    588	const struct unit_test_to_case_utf8 *test =
    589		(const struct unit_test_to_case_utf8 *)t + off;
    590	size_t ret = 0, i;
    591	char buf[512];
    592
    593	/* fill the array with canary values */
    594	memset(buf, 0x7f, LEN(buf));
    595
    596	if (t == to_lowercase_utf8) {
    597		ret = grapheme_to_lowercase_utf8(test->input.src,
    598		                                 test->input.srclen, buf,
    599		                                 test->input.destlen);
    600	} else if (t == to_uppercase_utf8) {
    601		ret = grapheme_to_uppercase_utf8(test->input.src,
    602		                                 test->input.srclen, buf,
    603		                                 test->input.destlen);
    604	} else if (t == to_titlecase_utf8) {
    605		ret = grapheme_to_titlecase_utf8(test->input.src,
    606		                                 test->input.srclen, buf,
    607		                                 test->input.destlen);
    608	} else {
    609		goto err;
    610	}
    611
    612	/* check results */
    613	if (ret != test->output.ret ||
    614	    memcmp(buf, test->output.dest,
    615	           MIN(test->input.destlen, test->output.ret))) {
    616		goto err;
    617	}
    618
    619	/* check that none of the canary values have been overwritten */
    620	for (i = test->input.destlen; i < LEN(buf); i++) {
    621		if (buf[i] != 0x7f) {
    622			goto err;
    623		}
    624	}
    625
    626	return 0;
    627err:
    628	fprintf(stderr,
    629	        "%s: %s: Failed unit test %zu \"%s\" "
    630	        "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n",
    631	        argv0, name, off, test->description, (int)ret, buf, ret,
    632	        (int)test->output.ret, test->output.dest, test->output.ret);
    633	return 1;
    634}
    635
    636int
    637main(int argc, char *argv[])
    638{
    639	(void)argc;
    640
    641	return run_unit_tests(unit_test_callback_is_case_utf8,
    642	                      is_lowercase_utf8, LEN(is_lowercase_utf8),
    643	                      "grapheme_is_lowercase_utf8", argv[0]) +
    644	       run_unit_tests(unit_test_callback_is_case_utf8,
    645	                      is_uppercase_utf8, LEN(is_uppercase_utf8),
    646	                      "grapheme_is_uppercase_utf8", argv[0]) +
    647	       run_unit_tests(unit_test_callback_is_case_utf8,
    648	                      is_titlecase_utf8, LEN(is_titlecase_utf8),
    649	                      "grapheme_is_titlecase_utf8", argv[0]) +
    650	       run_unit_tests(unit_test_callback_to_case_utf8,
    651	                      to_lowercase_utf8, LEN(to_lowercase_utf8),
    652	                      "grapheme_to_lowercase_utf8", argv[0]) +
    653	       run_unit_tests(unit_test_callback_to_case_utf8,
    654	                      to_uppercase_utf8, LEN(to_uppercase_utf8),
    655	                      "grapheme_to_uppercase_utf8", argv[0]) +
    656	       run_unit_tests(unit_test_callback_to_case_utf8,
    657	                      to_titlecase_utf8, LEN(to_titlecase_utf8),
    658	                      "grapheme_to_titlecase_utf8", argv[0]);
    659}