libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

utf8-encode.c (2238B)


      1/* See LICENSE file for copyright and license details. */
      2#include <stddef.h>
      3#include <stdint.h>
      4#include <stdio.h>
      5#include <string.h>
      6
      7#include "../grapheme.h"
      8#include "util.h"
      9
     10static const struct {
     11	uint_least32_t cp; /* input codepoint */
     12	char *exp_arr;     /* expected UTF-8 byte sequence */
     13	size_t exp_len;    /* expected length of UTF-8 sequence */
     14} enc_test[] = {
     15	{
     16		/* invalid codepoint (UTF-16 surrogate half) */
     17		.cp = UINT32_C(0xD800),
     18		.exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
     19		.exp_len = 3,
     20	},
     21	{
     22		/* invalid codepoint (UTF-16-unrepresentable) */
     23		.cp = UINT32_C(0x110000),
     24		.exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
     25		.exp_len = 3,
     26	},
     27	{
     28		/* codepoint encoded to a 1-byte sequence */
     29		.cp = 0x01,
     30		.exp_arr = (char *)(unsigned char[]) { 0x01 },
     31		.exp_len = 1,
     32	},
     33	{
     34		/* codepoint encoded to a 2-byte sequence */
     35		.cp = 0xFF,
     36		.exp_arr = (char *)(unsigned char[]) { 0xC3, 0xBF },
     37		.exp_len = 2,
     38	},
     39	{
     40		/* codepoint encoded to a 3-byte sequence */
     41		.cp = 0xFFF,
     42		.exp_arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF },
     43		.exp_len = 3,
     44	},
     45	{
     46		/* codepoint encoded to a 4-byte sequence */
     47		.cp = UINT32_C(0xFFFFF),
     48		.exp_arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF },
     49		.exp_len = 4,
     50	},
     51};
     52
     53int
     54main(int argc, char *argv[])
     55{
     56	size_t i, j, failed;
     57
     58	(void)argc;
     59
     60	/* UTF-8 encoder test */
     61	for (i = 0, failed = 0; i < LEN(enc_test); i++) {
     62		char arr[4];
     63		size_t len;
     64
     65		len = grapheme_encode_utf8(enc_test[i].cp, arr, LEN(arr));
     66
     67		if (len != enc_test[i].exp_len ||
     68		    memcmp(arr, enc_test[i].exp_arr, len)) {
     69			fprintf(stderr,
     70			        "%s, Failed test %zu: "
     71			        "Expected (",
     72			        argv[0], i);
     73			for (j = 0; j < enc_test[i].exp_len; j++) {
     74				fprintf(stderr, "0x%x", enc_test[i].exp_arr[j]);
     75				if (j + 1 < enc_test[i].exp_len) {
     76					fprintf(stderr, " ");
     77				}
     78			}
     79			fprintf(stderr, "), but got (");
     80			for (j = 0; j < len; j++) {
     81				fprintf(stderr, "0x%x", arr[j]);
     82				if (j + 1 < len) {
     83					fprintf(stderr, " ");
     84				}
     85			}
     86			fprintf(stderr, ").\n");
     87			failed++;
     88		}
     89	}
     90	printf("%s: %zu/%zu unit tests passed.\n", argv[0],
     91	       LEN(enc_test) - failed, LEN(enc_test));
     92
     93	return (failed > 0) ? 1 : 0;
     94}