icu.c (1674B)
1#include <stdio.h> 2#include <stdlib.h> 3 4/* ICU4C */ 5#include <unicode/utypes.h> 6#include <unicode/ustring.h> 7#include <unicode/ucnv.h> 8#include <unicode/unorm2.h> 9 10#include "util.h" 11 12int main(int argc, char **argv) 13{ 14 int i; 15 16 UErrorCode err; 17 UConverter *uc = ucnv_open("UTF8", &err); 18 if (U_FAILURE(err)) return EXIT_FAILURE; 19 20 const UNormalizer2 *NFKC = unorm2_getNFKCInstance(&err); 21 if (U_FAILURE(err)) return EXIT_FAILURE; 22 23 for (i = 1; i < argc; ++i) { 24 if (argv[i][0] == '-') { 25 fprintf(stderr, "unrecognized option: %s\n", argv[i]); 26 return EXIT_FAILURE; 27 } 28 29 size_t len; 30 uint8_t *src = readfile(argv[i], &len); 31 if (!src) { 32 fprintf(stderr, "error reading %s\n", argv[i]); 33 return EXIT_FAILURE; 34 } 35 36 /* convert UTF8 data to ICU's UTF16 */ 37 UChar *usrc = (UChar*) malloc(2*len * sizeof(UChar)); 38 ucnv_toUChars(uc, usrc, 2*len, (char*) src, len, &err); 39 if (U_FAILURE(err)) return EXIT_FAILURE; 40 size_t ulen = u_strlen(usrc); 41 42 /* ICU's insane normalization API requires you to 43 know the size of the destination buffer in advance, 44 or alternatively to repeatedly try normalizing and 45 double the buffer size until it succeeds. Here, I just 46 allocate a huge destination buffer to avoid the issue. */ 47 UChar *udest = (UChar*) malloc(10*ulen * sizeof(UChar)); 48 49 mytime start = gettime(); 50 for (int i = 0; i < 100; ++i) { 51 unorm2_normalize(NFKC, usrc, ulen, udest, 10*ulen, &err); 52 if (U_FAILURE(err)) return EXIT_FAILURE; 53 } 54 printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100); 55 free(udest); 56 free(usrc); 57 free(src); 58 } 59 60 return EXIT_SUCCESS; 61}