libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

character.c (1991B)


      1/* See LICENSE file for copyright and license details. */
      2#include <errno.h>
      3#include <math.h>
      4#include <stdint.h>
      5#include <stdio.h>
      6#include <stdlib.h>
      7#include <string.h>
      8
      9#include "../gen/character-test.h"
     10#include "../grapheme.h"
     11#include "util.h"
     12
     13#include <utf8proc.h>
     14
     15#define NUM_ITERATIONS 100000
     16
     17struct break_benchmark_payload {
     18	uint_least32_t *buf;
     19	utf8proc_int32_t *buf_utf8proc;
     20	size_t buflen;
     21};
     22
     23void
     24libgrapheme(const void *payload)
     25{
     26	uint_least16_t state = 0;
     27	const struct break_benchmark_payload *p = payload;
     28	size_t i;
     29
     30	for (i = 0; i + 1 < p->buflen; i++) {
     31		(void)grapheme_is_character_break(p->buf[i], p->buf[i + 1],
     32		                                  &state);
     33	}
     34}
     35
     36void
     37libutf8proc(const void *payload)
     38{
     39	utf8proc_int32_t state = 0;
     40	const struct break_benchmark_payload *p = payload;
     41	size_t i;
     42
     43	for (i = 0; i + 1 < p->buflen; i++) {
     44		(void)utf8proc_grapheme_break_stateful(
     45			p->buf_utf8proc[i], p->buf_utf8proc[i + 1], &state);
     46	}
     47}
     48
     49int
     50main(int argc, char *argv[])
     51{
     52	struct break_benchmark_payload p;
     53	double baseline = (double)NAN;
     54	size_t i;
     55
     56	(void)argc;
     57
     58	if ((p.buf = generate_cp_test_buffer(character_break_test,
     59	                                     LEN(character_break_test),
     60	                                     &(p.buflen))) == NULL) {
     61		return 1;
     62	}
     63	if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) ==
     64	    NULL) {
     65		fprintf(stderr, "malloc: %s\n", strerror(errno));
     66		exit(1);
     67	}
     68	for (i = 0; i < p.buflen; i++) {
     69		/*
     70		 * there is no overflow, as we know that the maximum
     71		 * codepoint is 0x10FFFF, which is way below 2^31
     72		 */
     73		p.buf_utf8proc[i] = (utf8proc_int32_t)p.buf[i];
     74	}
     75
     76	printf("%s\n", argv[0]);
     77	run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "comparison",
     78	              &baseline, NUM_ITERATIONS, p.buflen - 1);
     79	run_benchmark(libutf8proc, &p, "libutf8proc ", NULL, "comparison",
     80	              &baseline, NUM_ITERATIONS, p.buflen - 1);
     81
     82	free(p.buf);
     83	free(p.buf_utf8proc);
     84
     85	return 0;
     86}