utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

tests.c (1793B)


      1/* Common functions for our test programs. */
      2
      3#include "tests.h"
      4
      5size_t lineno = 0;
      6
      7void check(int cond, const char *format, ...)
      8{
      9     if (!cond) {
     10          va_list args;
     11          fprintf(stderr, "line %zd: ", lineno);
     12          va_start(args, format);
     13          vfprintf(stderr, format, args);
     14          va_end(args);
     15          fprintf(stderr, "\n");
     16          exit(1);
     17     }
     18}
     19
     20size_t skipspaces(const unsigned char *buf, size_t i)
     21{
     22    while (isspace(buf[i])) ++i;
     23    return i;
     24}
     25
     26/* if buf points to a sequence of codepoints encoded as hexadecimal strings,
     27   separated by whitespace, and terminated by any character not in
     28   [0-9a-fA-F] or whitespace, then stores the corresponding utf8 string
     29   in dest, returning the number of bytes read from buf */
     30size_t encode(unsigned char *dest, const unsigned char *buf)
     31{
     32     size_t i = 0, j;
     33     utf8proc_ssize_t d = 0;
     34     for (;;) {
     35          int c;
     36          i = skipspaces(buf, i);
     37          for (j=i; buf[j] && strchr("0123456789abcdef", tolower(buf[j])); ++j)
     38               ; /* find end of hex input */
     39          if (j == i) { /* no codepoint found */
     40               dest[d] = 0; /* NUL-terminate destination string */
     41               return i + 1;
     42          }
     43          check(sscanf((char *) (buf + i), "%x", (unsigned int *)&c) == 1, "invalid hex input %s", buf+i);
     44          i = j; /* skip to char after hex input */
     45          d += utf8proc_encode_char(c, (utf8proc_uint8_t *) (dest + d));
     46     }
     47}
     48
     49/* simplistic, portable replacement for getline, sufficient for our tests */
     50size_t simple_getline(unsigned char buf[8192], FILE *f) {
     51    size_t i = 0;
     52    while (i < 8191) {
     53        int c = getc(f);
     54        if (c == EOF || c == '\n') break;
     55        buf[i++] = (unsigned char) c;
     56    }
     57    buf[i] = 0;
     58    return i;
     59}