utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

normtest.c (2145B)


      1#include "tests.h"
      2
      3#define CHECK_NORM(NRM, norm, src) {                                 \
      4    unsigned char *src_norm = (unsigned char*) utf8proc_ ## NRM((utf8proc_uint8_t*) src);      \
      5    check(!strcmp((char *) norm, (char *) src_norm),                                  \
      6          "normalization failed for %s -> %s", src, norm);          \
      7    free(src_norm);                                                 \
      8}
      9
     10int main(int argc, char **argv)
     11{
     12     unsigned char buf[8192];
     13     FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;
     14     unsigned char source[1024], NFC[1024], NFD[1024], NFKC[1024], NFKD[1024];
     15
     16     check(f != NULL, "error opening NormalizationTest.txt");
     17     while (simple_getline(buf, f) > 0) {
     18          size_t offset;
     19          lineno += 1;
     20
     21          if (buf[0] == '@') {
     22               printf("line %zd: %s", lineno, buf + 1);
     23               continue;
     24          }
     25          else if (lineno % 1000 == 0)
     26               printf("checking line %zd...\n", lineno);
     27
     28          if (buf[0] == '#') continue;
     29
     30          offset = encode(source, buf);
     31          offset += encode(NFC, buf + offset);
     32          offset += encode(NFD, buf + offset);
     33          offset += encode(NFKC, buf + offset);
     34          offset += encode(NFKD, buf + offset);
     35
     36          CHECK_NORM(NFC, NFC, source);
     37          CHECK_NORM(NFC, NFC, NFC);
     38          CHECK_NORM(NFC, NFC, NFD);
     39          CHECK_NORM(NFC, NFKC, NFKC);
     40          CHECK_NORM(NFC, NFKC, NFKD);
     41
     42          CHECK_NORM(NFD, NFD, source);
     43          CHECK_NORM(NFD, NFD, NFC);
     44          CHECK_NORM(NFD, NFD, NFD);
     45          CHECK_NORM(NFD, NFKD, NFKC);
     46          CHECK_NORM(NFD, NFKD, NFKD);
     47
     48          CHECK_NORM(NFKC, NFKC, source);
     49          CHECK_NORM(NFKC, NFKC, NFC);
     50          CHECK_NORM(NFKC, NFKC, NFD);
     51          CHECK_NORM(NFKC, NFKC, NFKC);
     52          CHECK_NORM(NFKC, NFKC, NFKD);
     53
     54          CHECK_NORM(NFKD, NFKD, source);
     55          CHECK_NORM(NFKD, NFKD, NFC);
     56          CHECK_NORM(NFKD, NFKD, NFD);
     57          CHECK_NORM(NFKD, NFKD, NFKC);
     58          CHECK_NORM(NFKD, NFKD, NFKD);
     59     }
     60     fclose(f);
     61     printf("Passed tests after %zd lines!\n", lineno);
     62     return 0;
     63}