libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

commit b5806959dff1f7614e84f116f68f90c7711375ce
parent 1c126d7ee10854b29e606e4eeb491621d021beeb
Author: Laslo Hunhold <dev@frign.de>
Date:   Thu,  9 Dec 2021 17:22:20 +0100

Add grapheme break performance test

This helps test modifications to the algorithms. We currently are
at roughly 3 million codepoint-break-checks (~3-12GB/s) per second
(on my machine(tm)), which is already pretty good in my opinion.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
MMakefile | 4+++-
Atest/grapheme-performance.c | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile @@ -10,7 +10,7 @@ DATA =\ data/GraphemeBreakTest.txt GEN = gen/grapheme gen/grapheme-test LIB = src/grapheme src/utf8 src/util -TEST = test/grapheme test/utf8-decode test/utf8-encode +TEST = test/grapheme test/grapheme-performance test/utf8-decode test/utf8-encode MAN3 = man/grapheme_bytelen.3 MAN7 = man/libgrapheme.7 @@ -24,12 +24,14 @@ src/utf8.o: src/utf8.c config.mk grapheme.h src/grapheme.o: src/grapheme.c config.mk gen/grapheme.h grapheme.h src/util.h src/util.o: src/util.c config.mk src/util.h test/grapheme.o: test/grapheme.c config.mk gen/grapheme-test.h grapheme.h +test/grapheme-performance.o: test/grapheme-performance.c config.mk gen/grapheme-test.h grapheme.h test/utf8-encode.o: test/utf8-encode.c config.mk grapheme.h test/utf8-decode.o: test/utf8-decode.c config.mk grapheme.h gen/grapheme: gen/grapheme.o gen/util.o gen/grapheme-test: gen/grapheme-test.o gen/util.o test/grapheme: test/grapheme.o libgrapheme.a +test/grapheme-performance: test/grapheme-performance.o libgrapheme.a test/utf8-encode: test/utf8-encode.o libgrapheme.a test/utf8-decode: test/utf8-decode.o libgrapheme.a diff --git a/test/grapheme-performance.c b/test/grapheme-performance.c @@ -0,0 +1,64 @@ +/* See LICENSE file for copyright and license details. */ +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +#include "../grapheme.h" +#include "../gen/grapheme-test.h" + +#define LEN(x) (sizeof(x) / sizeof(*x)) +#define NUM_ITERATIONS 10000 + +int64_t time_diff(struct timespec *a, struct timespec *b) +{ + return ((b->tv_sec * 1000000000) + b->tv_nsec) - + ((a->tv_sec * 1000000000) + a->tv_nsec); +} + +int +main(void) +{ + struct timespec start, end; + size_t i, j, bufsiz, off; + uint32_t *buf; + int state; + double cp_per_sec; + + /* allocate and generate buffer */ + for (i = 0, bufsiz = 0; i < LEN(grapheme_test); i++) { + bufsiz += grapheme_test[i].cplen; + } + if (!(buf = calloc(bufsiz, sizeof(*buf)))) { + fprintf(stderr, "calloc: Out of memory.\n"); + return 1; + } + for (i = 0, off = 0; i < LEN(grapheme_test); i++) { + for (j = 0; j < grapheme_test[i].cplen; j++) { + buf[off + j] = grapheme_test[i].cp[j]; + } + off += grapheme_test[i].cplen; + } + + /* run test */ + printf("Grapheme break performance test: "); + fflush(stdout); + + clock_gettime(CLOCK_MONOTONIC, &start); + for (i = 0; i < NUM_ITERATIONS; i++) { + for (j = 0; j < bufsiz - 1; j++) { + (void)lg_grapheme_isbreak(buf[j], buf[j+1], &state); + } + if (i % (NUM_ITERATIONS / 10) == 0) { + printf("."); + fflush(stdout); + } + } + clock_gettime(CLOCK_MONOTONIC, &end); + cp_per_sec = ((double)NUM_ITERATIONS * bufsiz) / + ((double)time_diff(&start, &end) / 1000000000); + + printf(" %.2e CP/s\n", cp_per_sec); + + return 0; +}