libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

word.c (3000B)


      1/* See LICENSE file for copyright and license details. */
      2#include <stdbool.h>
      3#include <stdint.h>
      4
      5#include "../gen/word-test.h"
      6#include "../grapheme.h"
      7#include "util.h"
      8
      9static const struct unit_test_next_break next_word_break[] = {
     10	{
     11		.description = "NULL input",
     12		.input = {
     13			.src    = NULL,
     14			.srclen = 0,
     15		},
     16		.output = { 0 },
     17	},
     18	{
     19		.description = "empty input",
     20		.input = {
     21			.src    = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
     22			.srclen = 0,
     23		},
     24		.output = { 0 },
     25	},
     26	{
     27		.description = "empty input, null-terminated",
     28		.input = {
     29			.src    = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
     30			.srclen = SIZE_MAX,
     31		},
     32		.output = { 0 },
     33	},
     34	{
     35		.description = "one word",
     36		.input = {
     37			.src    = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x20, 0x2A },
     38			.srclen = 4,
     39		},
     40		.output = { 2 },
     41	},
     42	{
     43		.description = "one word, null-terminated",
     44		.input = {
     45			.src    = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x20, 0x2A, 0x0 },
     46			.srclen = SIZE_MAX,
     47		},
     48		.output = { 2 },
     49	},
     50};
     51
     52static const struct unit_test_next_break_utf8 next_word_break_utf8[] = {
     53	{
     54		.description = "NULL input",
     55		.input = {
     56			.src    = NULL,
     57			.srclen = 0,
     58		},
     59		.output = { 0 },
     60	},
     61	{
     62		.description = "empty input",
     63		.input = { "", 0 },
     64		.output = { 0 },
     65	},
     66	{
     67		.description = "empty input, NUL-terminated",
     68		.input = { "", SIZE_MAX },
     69		.output = { 0 },
     70	},
     71	{
     72		.description = "one word",
     73		.input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is", 11 },
     74		.output = { 8 },
     75	},
     76	{
     77		.description = "one word, fragment",
     78		.input = { "\xF0\x9F\x87\xA9\xF0", 5 },
     79		.output = { 4 },
     80	},
     81	{
     82		.description = "one word, NUL-terminated",
     83		.input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is", SIZE_MAX },
     84		.output = { 8 },
     85	},
     86	{
     87		.description = "one word, fragment, NUL-terminated",
     88		.input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
     89		.output = { 4 },
     90	},
     91};
     92
     93static int
     94unit_test_callback_next_word_break(const void *t, size_t off, const char *name,
     95                                   const char *argv0)
     96{
     97	return unit_test_callback_next_break(t, off, grapheme_next_word_break,
     98	                                     name, argv0);
     99}
    100
    101static int
    102unit_test_callback_next_word_break_utf8(const void *t, size_t off,
    103                                        const char *name, const char *argv0)
    104{
    105	return unit_test_callback_next_break_utf8(
    106		t, off, grapheme_next_word_break_utf8, name, argv0);
    107}
    108
    109int
    110main(int argc, char *argv[])
    111{
    112	(void)argc;
    113
    114	return run_break_tests(grapheme_next_word_break, word_break_test,
    115	                       LEN(word_break_test), argv[0]) +
    116	       run_unit_tests(unit_test_callback_next_word_break,
    117	                      next_word_break, LEN(next_word_break),
    118	                      "grapheme_next_word_break", argv[0]) +
    119	       run_unit_tests(unit_test_callback_next_word_break_utf8,
    120	                      next_word_break_utf8, LEN(next_word_break_utf8),
    121	                      "grapheme_next_word_break_utf8", argv[0]);
    122}