utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

icu.c (1674B)


      1#include <stdio.h>
      2#include <stdlib.h>
      3
      4/* ICU4C */
      5#include <unicode/utypes.h>
      6#include <unicode/ustring.h>
      7#include <unicode/ucnv.h>
      8#include <unicode/unorm2.h>
      9
     10#include "util.h"
     11
     12int main(int argc, char **argv)
     13{
     14	 int i;
     15
     16	 UErrorCode err;
     17	 UConverter *uc = ucnv_open("UTF8", &err);
     18	 if (U_FAILURE(err)) return EXIT_FAILURE;
     19
     20	 const UNormalizer2 *NFKC = unorm2_getNFKCInstance(&err);
     21	 if (U_FAILURE(err)) return EXIT_FAILURE;
     22	 
     23	 for (i = 1; i < argc; ++i) {
     24		  if (argv[i][0] == '-') {
     25			   fprintf(stderr, "unrecognized option: %s\n", argv[i]);
     26			   return EXIT_FAILURE;
     27		  }
     28
     29		  size_t len;
     30		  uint8_t *src = readfile(argv[i], &len);
     31		  if (!src) {
     32			   fprintf(stderr, "error reading %s\n", argv[i]);
     33			   return EXIT_FAILURE;
     34		  }
     35
     36		  /* convert UTF8 data to ICU's UTF16 */
     37		  UChar *usrc = (UChar*) malloc(2*len * sizeof(UChar));
     38		  ucnv_toUChars(uc, usrc, 2*len, (char*) src, len, &err);
     39		  if (U_FAILURE(err)) return EXIT_FAILURE;
     40		  size_t ulen = u_strlen(usrc);
     41
     42		  /* ICU's insane normalization API requires you to
     43			 know the size of the destination buffer in advance,
     44			 or alternatively to repeatedly try normalizing and
     45			 double the buffer size until it succeeds.  Here, I just
     46			 allocate a huge destination buffer to avoid the issue. */
     47		  UChar *udest = (UChar*) malloc(10*ulen * sizeof(UChar));
     48
     49		  mytime start = gettime();
     50		  for (int i = 0; i < 100; ++i) {
     51			   unorm2_normalize(NFKC, usrc, ulen, udest, 10*ulen, &err);
     52			   if (U_FAILURE(err)) return EXIT_FAILURE;
     53		  }
     54		  printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100);
     55		  free(udest);
     56		  free(usrc);
     57		  free(src);
     58	 }
     59
     60	 return EXIT_SUCCESS;
     61}