commit 11bb3d9dc796bb006c79c2962a7d19abcadfb3df
parent 7e834d77024d770875559d853b09b8bb7f9321a1
Author: Steven G. Johnson <stevenj@alum.mit.edu>
Date: Sun, 29 Mar 2020 08:53:11 -0400
fix grapheme test to work on unmodified data file
Diffstat:
3 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -66,8 +66,7 @@ if(UTF8PROC_ENABLE_TESTING)
file(MAKE_DIRECTORY data)
set(UNICODE_VERSION 13.0.0)
file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/NormalizationTest.txt data/NormalizationTest.txt SHOW_PROGRESS)
- file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTestOrg.txt SHOW_PROGRESS)
- execute_process(COMMAND bash -c "cat data/GraphemeBreakTestOrg.txt | /usr/bin/perl -pe 's,÷,/,g;s,×,+,g' && rm -f data/GraphemeBreakTestOrg.txt" OUTPUT_FILE data/GraphemeBreakTest.txt)
+ file(DOWNLOAD http://www.unicode.org/Public/${UNICODE_VERSION}/ucd/auxiliary/GraphemeBreakTest.txt data/GraphemeBreakTest.txt SHOW_PROGRESS)
add_executable(case test/tests.h test/tests.c utf8proc.h test/case.c)
target_link_libraries(case utf8proc)
add_executable(custom test/tests.h test/tests.c utf8proc.h test/custom.c)
diff --git a/data/Makefile b/data/Makefile
@@ -46,7 +46,7 @@ NormalizationTest.txt:
$(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/NormalizationTest.txt
GraphemeBreakTest.txt:
- $(CURL) $(CURLFLAGS) $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@
+ $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt
emoji-data.txt:
$(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://unicode.org/Public/$(UNICODE_VERSION)/ucd/emoji/emoji-data.txt
diff --git a/test/graphemetest.c b/test/graphemetest.c
@@ -18,12 +18,12 @@ int main(int argc, char **argv)
while (buf[bi]) {
bi = skipspaces(buf, bi);
- if (buf[bi] == '/') { /* grapheme break */
+ if ((uint8_t)buf[bi] == 0xc3 && (uint8_t)buf[bi+1] == 0xb7) { /* U+00f7 = grapheme break */
src[si++] = '/';
- bi++;
+ bi += 2;
}
- else if (buf[bi] == '+') { /* no break */
- bi++;
+ else if ((uint8_t)buf[bi] == 0xc3 && (uint8_t)buf[bi+1] == 0x97) { /* U+00d7 = no break */
+ bi += 2;
}
else if (buf[bi] == '#') { /* start of comments */
break;