utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

case.c (3221B)


      1#include "tests.h"
      2#include <wctype.h>
      3
      4int main(int argc, char **argv)
      5{
      6     int error = 0, better = 0;
      7     utf8proc_int32_t c;
      8
      9     (void) argc; /* unused */
     10     (void) argv; /* unused */
     11
     12     /* some simple sanity tests of the character widths */
     13     for (c = 0; c <= 0x110000; ++c) {
     14          utf8proc_int32_t l = utf8proc_tolower(c);
     15          utf8proc_int32_t u = utf8proc_toupper(c);
     16          utf8proc_int32_t t = utf8proc_totitle(c);
     17
     18          check(l == c || utf8proc_codepoint_valid(l), "invalid tolower");
     19          check(u == c || utf8proc_codepoint_valid(u), "invalid toupper");
     20          check(t == c || utf8proc_codepoint_valid(t), "invalid totitle");
     21
     22          if (utf8proc_codepoint_valid(c) && (l == u) != (l == t) &&
     23              /* Unicode 11: Georgian Mkhedruli chars have uppercase but no titlecase. */
     24              !(((c >= 0x10d0 && c <= 0x10fa) || c >= (0x10fd && c <= 0x10ff)) && l != u)) {
     25               fprintf(stderr, "unexpected titlecase %x for lowercase %x / uppercase %x\n", t, l, c);
     26               ++error;
     27          }
     28
     29          if (sizeof(wint_t) > 2 || (c < (1<<16) && u < (1<<16) && l < (1<<16))) {
     30               wint_t l0 = towlower((wint_t)c), u0 = towupper((wint_t)c);
     31
     32               /* OS unicode tables may be out of date.  But if they
     33                  do have a lower/uppercase mapping, hopefully it
     34                  is correct? */
     35               if (l0 != (wint_t)c && l0 != (wint_t)l) {
     36                    fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
     37                            l, c, l0);
     38                    ++error;
     39               }
     40               else if (l0 != (wint_t)l) { /* often true for out-of-date OS unicode */
     41                    ++better;
     42                    /* printf("%x != towlower(%x) == %x\n", l, c, l0); */
     43               }
     44               if (u0 != (wint_t)c && u0 != (wint_t)u) {
     45                    fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
     46                            u, c, u0);
     47                    ++error;
     48               }
     49               else if (u0 != (wint_t)u) { /* often true for out-of-date OS unicode */
     50                    ++better;
     51                    /* printf("%x != towupper(%x) == %x\n", u, c, u0); */
     52               }
     53          }
     54     }
     55     check(!error, "utf8proc case conversion FAILED %d tests.", error);
     56
     57     /* issue #130 */
     58     check(utf8proc_toupper(0x00df) == 0x1e9e &&
     59           utf8proc_totitle(0x00df) == 0x1e9e &&
     60           utf8proc_tolower(0x00df) == 0x00df &&
     61           utf8proc_tolower(0x1e9e) == 0x00df &&
     62           utf8proc_toupper(0x1e9e) == 0x1e9e,
     63           "incorrect 0x00df/0x1e9e case conversions");
     64     utf8proc_uint8_t str_00df[] = {0xc3, 0x9f, 0x00};
     65     utf8proc_uint8_t str_1e9e[] = {0xe1, 0xba, 0x9e, 0x00};
     66     utf8proc_uint8_t *s1 = utf8proc_NFKC_Casefold(str_00df);
     67     utf8proc_uint8_t *s2 = utf8proc_NFKC_Casefold(str_1e9e);
     68     check(!strcmp((char*)s1, "ss") &&
     69           !strcmp((char*)s2, "ss"),
     70           "incorrect 0x00df/0x1e9e casefold normalization");
     71     free(s1);
     72     free(s2);
     73     printf("More up-to-date than OS unicode tables for %d tests.\n", better);
     74     printf("utf8proc case conversion tests SUCCEEDED.\n");
     75     return 0;
     76}