utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

fuzzer.c (2222B)


      1#include <utf8proc.h>
      2#include <string.h>
      3
      4int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
      5{
      6    if(size < 1) return 0;
      7
      8    /* Avoid timeout with long inputs */
      9    if(size > (64 * 1024)) return 0;
     10
     11    if(data[size-1] != '\0') return 0;
     12
     13    const uint8_t* ptr = data;
     14    utf8proc_int32_t c = 0, c_prev = 0, state = 0;
     15    utf8proc_option_t options;
     16    utf8proc_ssize_t ret, bytes = 0;
     17    size_t len = strlen((const char*)data);
     18    
     19    while(bytes != len)
     20    {
     21        ret = utf8proc_iterate(ptr, -1, &c);
     22        
     23        if(ret < 0 || ret == 0) break;
     24        
     25        bytes += ret;
     26        ptr += ret;
     27
     28        utf8proc_tolower(c);
     29        utf8proc_toupper(c);
     30        utf8proc_totitle(c);
     31        utf8proc_islower(c);
     32        utf8proc_isupper(c);
     33        utf8proc_charwidth(c);
     34        utf8proc_category(c);
     35        utf8proc_category_string(c);
     36        utf8proc_codepoint_valid(c);
     37        
     38        utf8proc_grapheme_break(c_prev, c);
     39        utf8proc_grapheme_break_stateful(c_prev, c, &state);
     40        
     41        c_prev = c;
     42    }
     43    
     44    utf8proc_int32_t *copy = size >= 4 ? NULL : malloc(size);
     45    
     46    if(copy)
     47    {
     48        size /= 4;
     49        
     50        options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS | UTF8PROC_NLF2PS;
     51        memcpy(copy, data, size);
     52        utf8proc_normalize_utf32(copy, size, options);
     53        
     54        options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS;
     55        memcpy(copy, data, size);
     56        utf8proc_normalize_utf32(copy, size, options);
     57        
     58        options = UTF8PROC_STRIPCC | UTF8PROC_NLF2PS;
     59        memcpy(copy, data, size);
     60        utf8proc_normalize_utf32(copy, size, options);
     61        
     62        options = UTF8PROC_STRIPCC;
     63        memcpy(copy, data, size);
     64        utf8proc_normalize_utf32(copy, size, options);
     65
     66        options = UTF8PROC_LUMP;
     67        memcpy(copy, data, size);
     68        utf8proc_normalize_utf32(copy, size, options);
     69
     70        options = 0;
     71        memcpy(copy, data, size);
     72        utf8proc_normalize_utf32(copy, size, options);
     73        
     74        free(copy);
     75    }
     76
     77    free(utf8proc_NFD(data));
     78    free(utf8proc_NFC(data));
     79    free(utf8proc_NFKD(data));
     80    free(utf8proc_NFKC(data));
     81    free(utf8proc_NFKC_Casefold(data));
     82
     83    return 0;
     84}