utf8-decode.c (2129B)
1/* See LICENSE file for copyright and license details. */ 2#include <errno.h> 3#include <math.h> 4#include <stdint.h> 5#include <stdio.h> 6#include <stdlib.h> 7#include <string.h> 8 9#include "../gen/character-test.h" 10#include "../grapheme.h" 11#include "util.h" 12 13#include <utf8proc.h> 14 15#define NUM_ITERATIONS 100000 16 17struct utf8_benchmark_payload { 18 char *buf; 19 utf8proc_uint8_t *buf_utf8proc; 20 size_t buflen; 21}; 22 23void 24libgrapheme(const void *payload) 25{ 26 const struct utf8_benchmark_payload *p = payload; 27 uint_least32_t cp; 28 size_t ret, off; 29 30 for (off = 0; off < p->buflen; off += ret) { 31 if ((ret = grapheme_decode_utf8(p->buf + off, p->buflen - off, 32 &cp)) > (p->buflen - off)) { 33 break; 34 } 35 (void)cp; 36 } 37} 38 39void 40libutf8proc(const void *payload) 41{ 42 const struct utf8_benchmark_payload *p = payload; 43 utf8proc_int32_t cp; 44 utf8proc_ssize_t ret; 45 size_t off; 46 47 for (off = 0; off < p->buflen; off += (size_t)ret) { 48 if ((ret = utf8proc_iterate(p->buf_utf8proc + off, 49 (utf8proc_ssize_t)(p->buflen - off), 50 &cp)) < 0) { 51 break; 52 } 53 (void)cp; 54 } 55} 56 57int 58main(int argc, char *argv[]) 59{ 60 struct utf8_benchmark_payload p; 61 size_t i; 62 double baseline = (double)NAN; 63 64 (void)argc; 65 66 p.buf = generate_utf8_test_buffer( 67 character_break_test, LEN(character_break_test), &(p.buflen)); 68 69 /* convert cp-buffer to stupid custom libutf8proc-uint8-type */ 70 if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) { 71 fprintf(stderr, "malloc: %s\n", strerror(errno)); 72 exit(1); 73 } 74 for (i = 0; i < p.buflen; i++) { 75 /* 76 * even if char is larger than 8 bit, it will only have 77 * any of the first 8 bits set (by construction). 78 */ 79 p.buf_utf8proc[i] = (utf8proc_uint8_t)p.buf[i]; 80 } 81 82 printf("%s\n", argv[0]); 83 run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "byte", &baseline, 84 NUM_ITERATIONS, p.buflen); 85 run_benchmark(libutf8proc, &p, "libutf8proc ", 86 "but unsafe (does not detect overlong encodings)", "byte", 87 &baseline, NUM_ITERATIONS, p.buflen); 88 89 free(p.buf); 90 free(p.buf_utf8proc); 91 92 return 0; 93}