libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

sentence.c (3181B)


      1/* See LICENSE file for copyright and license details. */
      2#include <stdbool.h>
      3#include <stdint.h>
      4
      5#include "../gen/sentence-test.h"
      6#include "../grapheme.h"
      7#include "util.h"
      8
      9static const struct unit_test_next_break next_sentence_break[] = {
     10	{
     11		.description = "NULL input",
     12		.input = {
     13			.src    = NULL,
     14			.srclen = 0,
     15		},
     16		.output = { 0 },
     17	},
     18	{
     19		.description = "empty input",
     20		.input = {
     21			.src    = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
     22			.srclen = 0,
     23		},
     24		.output = { 0 },
     25	},
     26	{
     27		.description = "empty input, null-terminated",
     28		.input = {
     29			.src    = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
     30			.srclen = SIZE_MAX,
     31		},
     32		.output = { 0 },
     33	},
     34	{
     35		.description = "one sentence",
     36		.input = {
     37			.src    = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x2E, 0x20, 0x2A },
     38			.srclen = 5,
     39		},
     40		.output = { 4 },
     41	},
     42	{
     43		.description = "one sentence, null-terminated",
     44		.input = {
     45			.src    = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x2E, 0x20, 0x2A, 0x0 },
     46			.srclen = SIZE_MAX,
     47		},
     48		.output = { 4 },
     49	},
     50};
     51
     52static const struct unit_test_next_break_utf8 next_sentence_break_utf8[] = {
     53	{
     54		.description = "NULL input",
     55		.input = {
     56			.src    = NULL,
     57			.srclen = 0,
     58		},
     59		.output = { 0 },
     60	},
     61	{
     62		.description = "empty input",
     63		.input = { "", 0 },
     64		.output = { 0 },
     65	},
     66	{
     67		.description = "empty input, NUL-terminated",
     68		.input = { "", SIZE_MAX },
     69		.output = { 0 },
     70	},
     71	{
     72		.description = "one sentence",
     73		.input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is the flag of Germany.  It", 36 },
     74		.output = { 34 },
     75	},
     76	{
     77		.description = "one sentence, fragment",
     78		.input = { "\xF0\x9F\x87\xA9\xF0", 5 },
     79		.output = { 4 },
     80	},
     81	{
     82		.description = "one sentence, NUL-terminated",
     83		.input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is the flag of Germany.  It", SIZE_MAX },
     84		.output = { 34 },
     85	},
     86	{
     87		.description = "one sentence, fragment, NUL-terminated",
     88		.input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
     89		.output = { 6 },
     90	},
     91};
     92
     93static int
     94unit_test_callback_next_sentence_break(const void *t, size_t off,
     95                                       const char *name, const char *argv0)
     96{
     97	return unit_test_callback_next_break(
     98		t, off, grapheme_next_sentence_break, name, argv0);
     99}
    100
    101static int
    102unit_test_callback_next_sentence_break_utf8(const void *t, size_t off,
    103                                            const char *name, const char *argv0)
    104{
    105	return unit_test_callback_next_break_utf8(
    106		t, off, grapheme_next_sentence_break_utf8, name, argv0);
    107}
    108
    109int
    110main(int argc, char *argv[])
    111{
    112	(void)argc;
    113
    114	return run_break_tests(grapheme_next_sentence_break,
    115	                       sentence_break_test, LEN(sentence_break_test),
    116	                       argv[0]) +
    117	       run_unit_tests(unit_test_callback_next_sentence_break,
    118	                      next_sentence_break, LEN(next_sentence_break),
    119	                      "grapheme_next_sentence_break", argv[0]) +
    120	       run_unit_tests(unit_test_callback_next_sentence_break_utf8,
    121	                      next_sentence_break_utf8,
    122	                      LEN(next_sentence_break_utf8),
    123	                      "grapheme_next_character_break_utf8", argv[0]);
    124}