utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

commit 11bb3d9dc796bb006c79c2962a7d19abcadfb3df
parent 7e834d77024d770875559d853b09b8bb7f9321a1
Author: Steven G. Johnson <stevenj@alum.mit.edu>
Date:   Sun, 29 Mar 2020 08:53:11 -0400

fix grapheme test to work on unmodified data file

Diffstat:
MCMakeLists.txt | 3+--
Mdata/Makefile | 2+-
Mtest/graphemetest.c | 8++++----
3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt @@ -66,8 +66,7 @@ if(UTF8PROC_ENABLE_TESTING) file(MAKE_DIRECTORY data) set(UNICODE_VERSION 13.0.0) file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt data/NormalizationTest.txt SHOW_PROGRESS) - file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTestOrg.txt SHOW_PROGRESS) - execute_process(COMMAND bash -c "cat data/GraphemeBreakTestOrg.txt | /usr/bin/perl -pe 's,÷,/,g;s,×,+,g' && rm -f data/GraphemeBreakTestOrg.txt" OUTPUT_FILE data/GraphemeBreakTest.txt) + file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTest.txt SHOW_PROGRESS) add_executable(case test/tests.h test/tests.c utf8proc.h test/case.c) target_link_libraries(case utf8proc) add_executable(custom test/tests.h test/tests.c utf8proc.h test/custom.c) diff --git a/data/Makefile b/data/Makefile @@ -46,7 +46,7 @@ NormalizationTest.txt: $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/NormalizationTest.txt GraphemeBreakTest.txt: - $(CURL) $(CURLFLAGS) $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@ + $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt emoji-data.txt: $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://unicode.org/Public/$(UNICODE_VERSION)/ucd/emoji/emoji-data.txt diff --git a/test/graphemetest.c b/test/graphemetest.c @@ -18,12 +18,12 @@ int main(int argc, char **argv) while (buf[bi]) { bi = skipspaces(buf, bi); - if (buf[bi] == '/') { /* grapheme break */ + if ((uint8_t)buf[bi] == 0xc3 && (uint8_t)buf[bi+1] == 0xb7) { /* U+00f7 = grapheme break */ src[si++] = '/'; - bi++; + bi += 2; } - else if (buf[bi] == '+') { /* no break */ - bi++; + else if ((uint8_t)buf[bi] == 0xc3 && (uint8_t)buf[bi+1] == 0x97) { /* U+00d7 = no break */ + bi += 2; } else if (buf[bi] == '#') { /* start of comments */ break;