utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

misc.c (2478B)


      1/* Miscellaneous tests, e.g. regression tests */
      2
      3#include "tests.h"
      4
      5static void issue128(void) /* #128 */
      6{
      7    utf8proc_uint8_t input[] = {0x72, 0xcc, 0x87, 0xcc, 0xa3, 0x00}; /* "r\u0307\u0323" */
      8    utf8proc_uint8_t nfc[] = {0xe1, 0xb9, 0x9b, 0xcc, 0x87, 0x00}; /* "\u1E5B\u0307" */
      9    utf8proc_uint8_t nfd[] = {0x72, 0xcc, 0xa3, 0xcc, 0x87, 0x00}; /* "r\u0323\u0307" */
     10    utf8proc_uint8_t *nfc_out, *nfd_out;
     11    nfc_out = utf8proc_NFC(input);
     12    printf("NFC \"%s\" -> \"%s\" vs. \"%s\"\n", (char*)input, (char*)nfc_out, (char*)nfc);
     13    check(strlen((char*) nfc_out) == 5, "incorrect nfc length");
     14    check(!memcmp(nfc, nfc_out, 6), "incorrect nfc data");
     15    nfd_out = utf8proc_NFD(input);
     16    printf("NFD \"%s\" -> \"%s\" vs. \"%s\"\n", (char*)input, (char*)nfd_out, (char*)nfd);
     17    check(strlen((char*) nfd_out) == 5, "incorrect nfd length");
     18    check(!memcmp(nfd, nfd_out, 6), "incorrect nfd data");
     19    free(nfd_out); free(nfc_out);
     20}
     21
     22static void issue102(void) /* #128 */
     23{
     24    utf8proc_uint8_t input[] = {0x58, 0xe2, 0x81, 0xa5, 0x45, 0xcc, 0x80, 0xc2, 0xad, 0xe1, 0xb4, 0xac, 0x00}; /* "X\u2065E\u0300\u00ad\u1d2c" */
     25    utf8proc_uint8_t stripna[] = {0x78, 0xc3, 0xa8, 0x61, 0x00}; /* "x\u00e8a" */
     26    utf8proc_uint8_t correct[] = {0x78, 0xe2, 0x81, 0xa5, 0xc3, 0xa8, 0x61, 0x00}; /* "x\u2065\u00e8a" */
     27    utf8proc_uint8_t *output;
     28    utf8proc_map(input, 0, &output, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
     29        UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE | UTF8PROC_STRIPNA);
     30    printf("NFKC_Casefold \"%s\" -> \"%s\" vs. \"%s\"\n", (char*)input, (char*)output, (char*)stripna);
     31    check(strlen((char*) output) == 4, "incorrect NFKC_Casefold+stripna length");
     32    check(!memcmp(stripna, output, 5), "incorrect NFKC_Casefold+stripna data");
     33    free(output);
     34    output = utf8proc_NFKC_Casefold(input);
     35    printf("NFKC_Casefold \"%s\" -> \"%s\" vs. \"%s\"\n", (char*)input, (char*)output, (char*)correct);
     36    check(strlen((char*) output) == 7, "incorrect NFKC_Casefold length");
     37    check(!memcmp(correct, output, 8), "incorrect NFKC_Casefold data");
     38    free(output);
     39}
     40
     41int main(void)
     42{
     43    issue128();
     44    issue102();
     45#ifdef UNICODE_VERSION
     46    printf("Unicode version: Makefile has %s, has API %s\n", UNICODE_VERSION, utf8proc_unicode_version());
     47    check(!strcmp(UNICODE_VERSION, utf8proc_unicode_version()), "utf8proc_unicode_version mismatch");
     48#endif
     49    printf("Misc tests SUCCEEDED.\n");
     50    return 0;
     51}