utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

commit a39c1a6ea287e10c72c8b5d3013d232b7f85af3c
parent 5fbd15b4ce9db9b12b48cf3b94c70280bf20a13b
Author: Steven G. Johnson <stevenj@alum.mit.edu>
Date:   Sat, 19 Jul 2014 14:55:25 -0400

added GNU libunistring benchmark

Diffstat:
M.gitignore | 7+++++--
Mbench/Makefile | 8+++++++-
Abench/unistring.c | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -10,5 +10,8 @@ *.dSYM *.txt *.out -bench -icu +bench/bench +bench/icu +bench/unistring + + diff --git a/bench/Makefile b/bench/Makefile @@ -26,8 +26,14 @@ icu: icu.o util.o icu.out: $(DATAFILES) icu ./icu $(DATAFILES) > $@ +unistring: unistring.o util.o + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ unistring.o util.o -lunistring + +unistring.out: $(DATAFILES) unistring + ./unistring $(DATAFILES) > $@ + .c.o: $(CC) $(CPPFLAGS) -I.. $(CFLAGS) -c -o $@ $< clean: - rm -rf *.o *.txt bench *.out icu + rm -rf *.o *.txt bench *.out icu unistring diff --git a/bench/unistring.c b/bench/unistring.c @@ -0,0 +1,60 @@ +/* comparitive benchmark of GNU libunistring */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +/* libunistring */ +#include <unistr.h> +#include <uninorm.h> + +#include "util.h" + +int main(int argc, char **argv) +{ + int i; + uninorm_t nf = UNINORM_NFKC; + + for (i = 1; i < argc; ++i) { + if (!strcmp(argv[i], "-nfkc")) { + nf = UNINORM_NFKC; + continue; + } + if (!strcmp(argv[i], "-nfkd")) { + nf = UNINORM_NFKD; + continue; + } + if (!strcmp(argv[i], "-nfc")) { + nf = UNINORM_NFC; + continue; + } + if (!strcmp(argv[i], "-nfd")) { + nf = UNINORM_NFD; + continue; + } + if (argv[i][0] == '-') { + fprintf(stderr, "unrecognized option: %s\n", argv[i]); + return EXIT_FAILURE; + } + + size_t len; + uint8_t *src = readfile(argv[i], &len); + if (!src) { + fprintf(stderr, "error reading %s\n", argv[i]); + return EXIT_FAILURE; + } + + size_t destlen; + uint8_t *dest; + mytime start = gettime(); + for (int i = 0; i < 100; ++i) { + dest = u8_normalize(nf, src, len, NULL, &destlen); + if (!dest) return EXIT_FAILURE; + free(dest); + } + printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100); + free(src); + } + + return EXIT_SUCCESS; +}