libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

utf8-decode.c (2129B)


      1/* See LICENSE file for copyright and license details. */
      2#include <errno.h>
      3#include <math.h>
      4#include <stdint.h>
      5#include <stdio.h>
      6#include <stdlib.h>
      7#include <string.h>
      8
      9#include "../gen/character-test.h"
     10#include "../grapheme.h"
     11#include "util.h"
     12
     13#include <utf8proc.h>
     14
     15#define NUM_ITERATIONS 100000
     16
     17struct utf8_benchmark_payload {
     18	char *buf;
     19	utf8proc_uint8_t *buf_utf8proc;
     20	size_t buflen;
     21};
     22
     23void
     24libgrapheme(const void *payload)
     25{
     26	const struct utf8_benchmark_payload *p = payload;
     27	uint_least32_t cp;
     28	size_t ret, off;
     29
     30	for (off = 0; off < p->buflen; off += ret) {
     31		if ((ret = grapheme_decode_utf8(p->buf + off, p->buflen - off,
     32		                                &cp)) > (p->buflen - off)) {
     33			break;
     34		}
     35		(void)cp;
     36	}
     37}
     38
     39void
     40libutf8proc(const void *payload)
     41{
     42	const struct utf8_benchmark_payload *p = payload;
     43	utf8proc_int32_t cp;
     44	utf8proc_ssize_t ret;
     45	size_t off;
     46
     47	for (off = 0; off < p->buflen; off += (size_t)ret) {
     48		if ((ret = utf8proc_iterate(p->buf_utf8proc + off,
     49		                            (utf8proc_ssize_t)(p->buflen - off),
     50		                            &cp)) < 0) {
     51			break;
     52		}
     53		(void)cp;
     54	}
     55}
     56
     57int
     58main(int argc, char *argv[])
     59{
     60	struct utf8_benchmark_payload p;
     61	size_t i;
     62	double baseline = (double)NAN;
     63
     64	(void)argc;
     65
     66	p.buf = generate_utf8_test_buffer(
     67		character_break_test, LEN(character_break_test), &(p.buflen));
     68
     69	/* convert cp-buffer to stupid custom libutf8proc-uint8-type */
     70	if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) {
     71		fprintf(stderr, "malloc: %s\n", strerror(errno));
     72		exit(1);
     73	}
     74	for (i = 0; i < p.buflen; i++) {
     75		/*
     76		 * even if char is larger than 8 bit, it will only have
     77		 * any of the first 8 bits set (by construction).
     78		 */
     79		p.buf_utf8proc[i] = (utf8proc_uint8_t)p.buf[i];
     80	}
     81
     82	printf("%s\n", argv[0]);
     83	run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "byte", &baseline,
     84	              NUM_ITERATIONS, p.buflen);
     85	run_benchmark(libutf8proc, &p, "libutf8proc ",
     86	              "but unsafe (does not detect overlong encodings)", "byte",
     87	              &baseline, NUM_ITERATIONS, p.buflen);
     88
     89	free(p.buf);
     90	free(p.buf_utf8proc);
     91
     92	return 0;
     93}