charwidth.c (3117B)
1#include "tests.h" 2#include <ctype.h> 3#include <wchar.h> 4 5static int my_unassigned(int c) { 6 int cat = utf8proc_get_property(c)->category; 7 return (cat == UTF8PROC_CATEGORY_CN) || (cat == UTF8PROC_CATEGORY_CO); 8} 9 10static int my_isprint(int c) { 11 int cat = utf8proc_get_property(c)->category; 12 return (UTF8PROC_CATEGORY_LU <= cat && cat <= UTF8PROC_CATEGORY_ZS) || 13 (c == 0x0601 || c == 0x0602 || c == 0x0603 || c == 0x06dd || c == 0x00ad) || 14 (cat == UTF8PROC_CATEGORY_CN) || (cat == UTF8PROC_CATEGORY_CO); 15} 16 17int main(int argc, char **argv) 18{ 19 int c, error = 0, updates = 0; 20 21 (void) argc; /* unused */ 22 (void) argv; /* unused */ 23 24 /* some simple sanity tests of the character widths */ 25 for (c = 0; c <= 0x110000; ++c) { 26 int cat = utf8proc_get_property(c)->category; 27 int w = utf8proc_charwidth(c); 28 if ((cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_ME) && w > 0) { 29 fprintf(stderr, "nonzero width %d for combining char %x\n", w, c); 30 error += 1; 31 } 32 if (w == 0 && 33 ((cat >= UTF8PROC_CATEGORY_LU && cat <= UTF8PROC_CATEGORY_LO) || 34 (cat >= UTF8PROC_CATEGORY_ND && cat <= UTF8PROC_CATEGORY_SC) || 35 (cat >= UTF8PROC_CATEGORY_SO && cat <= UTF8PROC_CATEGORY_ZS))) { 36 fprintf(stderr, "zero width for symbol-like char %x\n", c); 37 error += 1; 38 } 39 if (c <= 127 && ((!isprint(c) && w > 0) || (isprint(c) && wcwidth(c) != w))) { 40 fprintf(stderr, "wcwidth %d mismatch %d for %s ASCII %x\n", 41 wcwidth(c), w, 42 isprint(c) ? "printable" : "non-printable", c); 43 error += 1; 44 } 45 if (!my_isprint(c) && w > 0) { 46 fprintf(stderr, "non-printing %x had width %d\n", c, w); 47 error += 1; 48 } 49 if (my_unassigned(c) && w != 1) { 50 fprintf(stderr, "unexpected width %d for unassigned char %x\n", w, c); 51 error += 1; 52 } 53 } 54 check(!error, "utf8proc_charwidth FAILED %d tests.", error); 55 56 check(utf8proc_charwidth(0x00ad) == 1, "incorrect width for U+00AD (soft hyphen)"); 57 check(utf8proc_charwidth(0xe000) == 1, "incorrect width for U+e000 (PUA)"); 58 59 /* print some other information by compariing with system wcwidth */ 60 printf("Mismatches with system wcwidth (not necessarily errors):\n"); 61 for (c = 0; c <= 0x110000; ++c) { 62 int w = utf8proc_charwidth(c); 63 int wc = wcwidth(c); 64 if (sizeof(wchar_t) == 2 && c >= (1<<16)) continue; 65 /* lots of these errors for out-of-date system unicode tables */ 66 if (wc == -1 && my_isprint(c) && !my_unassigned(c) && w > 0) 67 updates += 1; 68 if (wc == -1 && !my_isprint(c) && w > 0) 69 printf(" wcwidth(%x) = -1 for non-printable width-%d char\n", c, w); 70 if (wc >= 0 && wc != w) 71 printf(" wcwidth(%x) = %d != charwidth %d\n", c, wc, w); 72 } 73 printf(" ... (positive widths for %d chars unknown to wcwidth) ...\n", updates); 74 printf("Character-width tests SUCCEEDED.\n"); 75 76 return 0; 77}