directory cleanup: move tests and data into subdirectories - utf8proc - A clean C library for processing UTF-8 Unicode data

	utf8proc A clean C library for processing UTF-8 Unicode data
	git clone https://git.sinitax.com/juliastrings/utf8proc
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt

commit 90721f2d39b0cdd5d22409f1bf4f6ce4b7382944
parent 10f7e2ed5a7f3d05cbbc45f457be12456e6969d3
Author: Steven G. Johnson <stevenj@mit.edu>
Date:   Fri,  6 Mar 2015 17:36:08 -0500

directory cleanup: move tests and data into subdirectories

Diffstat:
M Makefile  | 50 +++++++++++++++++++++++++-------------------------
R data_generator.rb -> data/data_generator.rb  | 0 
D graphemetest.c  | 72 ------------------------------------------------------------------------
D normtest.c  | 64 ----------------------------------------------------------------
A test/graphemetest.c  | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A test/normtest.c  | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
R printproperty.c -> test/printproperty.c  | 0 
A test/tests.h  | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
D tests.h  | 53 -----------------------------------------------------

9 files changed, 214 insertions(+), 214 deletions(-)
diff --git a/Makefile b/Makefile
@@ -25,7 +25,7 @@ all: c-library
 c-library: libutf8proc.a libutf8proc.$(SHLIB_EXT)
 
 clean:
-	rm -f utf8proc.o libutf8proc.a libutf8proc.$(SHLIB_EXT) normtest graphemetest UnicodeData.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt NormalizationTest.txt GraphemeBreakTest.txt
+	rm -f utf8proc.o libutf8proc.a libutf8proc.$(SHLIB_EXT) test/normtest test/graphemetest data/UnicodeData.txt data/DerivedCoreProperties.txt data/CompositionExclusions.txt data/CaseFolding.txt data/NormalizationTest.txt data/GraphemeBreakTest.txt
 	$(MAKE) -C bench clean
 
 update: utf8proc_data.c.new
@@ -33,23 +33,23 @@ update: utf8proc_data.c.new
 
 # real targets
 
-utf8proc_data.c.new: data_generator.rb UnicodeData.txt GraphemeBreakProperty.txt DerivedCoreProperties.txt CompositionExclusions.txt CaseFolding.txt
-	$(RUBY) data_generator.rb < UnicodeData.txt > utf8proc_data.c.new
+utf8proc_data.c.new: data/data_generator.rb data/UnicodeData.txt data/GraphemeBreakProperty.txt data/DerivedCoreProperties.txt data/CompositionExclusions.txt data/CaseFolding.txt
+	(cd data; $(RUBY) data_generator.rb < UnicodeData.txt) > utf8proc_data.c.new
 
-UnicodeData.txt:
-	$(CURL) -O http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
+data/UnicodeData.txt:
+	$(CURL) -o $@ -O http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
 
-GraphemeBreakProperty.txt:
-	$(CURL) -O http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt
+data/GraphemeBreakProperty.txt:
+	$(CURL) -o $@ -O http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt
 
-DerivedCoreProperties.txt:
-	$(CURL) -O http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt
+data/DerivedCoreProperties.txt:
+	$(CURL) -o $@ -O http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt
 
-CompositionExclusions.txt:
-	$(CURL) -O http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt
+data/CompositionExclusions.txt:
+	$(CURL) -o $@ -O http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt
 
-CaseFolding.txt:
-	$(CURL) -O http://www.unicode.org/Public/UNIDATA/CaseFolding.txt
+data/CaseFolding.txt:
+	$(CURL) -o $@ -O http://www.unicode.org/Public/UNIDATA/CaseFolding.txt
 
 utf8proc.o: utf8proc.h utf8proc.c utf8proc_data.c
 	$(cc) -c -o utf8proc.o utf8proc.c
@@ -68,21 +68,21 @@ libutf8proc.dylib: utf8proc.o
 
 # Test programs
 
-NormalizationTest.txt:
-	$(CURL) -O http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt
+data/NormalizationTest.txt:
+	$(CURL) -o $@ -O http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt
 
-GraphemeBreakTest.txt:
+data/GraphemeBreakTest.txt:
 	$(CURL) http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt | $(PERL) -pe 's,÷,/,g;s,×,+,g' > $@
 
-normtest: normtest.c utf8proc.o utf8proc.h tests.h
-	$(cc) normtest.c utf8proc.o -o $@
+test/normtest: test/normtest.c utf8proc.o utf8proc.h test/tests.h
+	$(cc) test/normtest.c utf8proc.o -o $@
 
-graphemetest: graphemetest.c utf8proc.o utf8proc.h tests.h
-	$(cc) graphemetest.c utf8proc.o -o $@
+test/graphemetest: test/graphemetest.c utf8proc.o utf8proc.h test/tests.h
+	$(cc) test/graphemetest.c utf8proc.o -o $@
 
-printproperty: printproperty.c utf8proc.o utf8proc.h tests.h
-	$(cc) printproperty.c utf8proc.o -o $@
+test/printproperty: test/printproperty.c utf8proc.o utf8proc.h test/tests.h
+	$(cc) test/printproperty.c utf8proc.o -o $@
 
-check: normtest NormalizationTest.txt graphemetest GraphemeBreakTest.txt
-	./normtest
-	./graphemetest
+check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt
+	test/normtest data/NormalizationTest.txt
+	test/graphemetest data/GraphemeBreakTest.txt
diff --git a/data_generator.rb b/data/data_generator.rb
diff --git a/graphemetest.c b/graphemetest.c
@@ -1,72 +0,0 @@
-#include "tests.h"
-
-int main(void)
-{
-    char *buf = NULL;
-    size_t bufsize = 0;
-    FILE *f = fopen("GraphemeBreakTest.txt", "r");
-    uint8_t src[1024];
-    
-    check(f != NULL, "error opening GraphemeBreakTest.txt");
-    while (getline(&buf, &bufsize, f) > 0) {
-        size_t bi = 0, si = 0;
-        lineno += 1;
-        
-        if (lineno % 100 == 0)
-            printf("checking line %zd...\n", lineno);
-        
-        if (buf[0] == '#') continue;
-        
-        while (buf[bi]) {
-            bi = skipspaces(buf, bi);
-            if (buf[bi] == '/') { /* grapheme break */
-                src[si++] = '/';
-                bi++;
-            }
-            else if (buf[bi] == '+') { /* no break */
-                bi++;
-            }
-            else if (buf[bi] == '#') { /* start of comments */
-                break;
-            }
-            else { /* hex-encoded codepoint */
-                bi += encode((char*) (src + si), buf + bi) - 1;
-                while (src[si]) ++si; /* advance to NUL termination */
-            }
-        }
-        if (si && src[si-1] == '/')
-            --si; /* no break after final grapheme */
-        src[si] = 0; /* NUL-terminate */
-        
-        if (si) {
-            uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
-            size_t i = 0, j = 0;
-            ssize_t glen;
-            uint8_t *g; /* utf8proc_map grapheme results */
-            while (i < si) {
-                if (src[i] != '/')
-                    utf8[j++] = src[i++];
-                else
-                    i++;
-            }
-            glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);
-            if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
-                 /* the test file contains surrogate codepoints, which are only for UTF-16 */
-                 printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
-            }
-            else {
-                 check(glen >= 0, "utf8proc_map error = %s",
-                       utf8proc_errmsg(glen));
-                 for (i = 0; i <= glen; ++i)
-                      if (g[i] == 0xff)
-                           g[i] = '/'; /* easier-to-read output (/ is not in test strings) */
-                 check(!strcmp((char*)g, (char*)src),
-                       "grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src);
-            }
-            free(g);
-        }
-    }
-    fclose(f);
-    printf("Passed tests after %zd lines!\n", lineno);
-    return 0;
-}
diff --git a/normtest.c b/normtest.c
@@ -1,64 +0,0 @@
-#include "tests.h"
-
-#define CHECK_NORM(NRM, norm, src) {                                 \
-    char *src_norm = (char*) utf8proc_ ## NRM((uint8_t*) src);      \
-    check(!strcmp(norm, src_norm),                                  \
-          "normalization failed for %s -> %s", src, norm);          \
-    free(src_norm);                                                 \
-}
-
-int main(void)
-{
-     char *buf = NULL;
-     size_t bufsize = 0;
-     FILE *f = fopen("NormalizationTest.txt", "r");
-     char source[1024], NFC[1024], NFD[1024], NFKC[1024], NFKD[1024];
-
-     check(f != NULL, "error opening NormalizationTest.txt");
-     while (getline(&buf, &bufsize, f) > 0) {
-          size_t offset;
-          lineno += 1;
-
-          if (buf[0] == '@') {
-               printf("line %zd: %s", lineno, buf + 1);
-               continue;
-          }
-          else if (lineno % 1000 == 0)
-               printf("checking line %zd...\n", lineno);
-
-          if (buf[0] == '#') continue;
-
-          offset = encode(source, buf);
-          offset += encode(NFC, buf + offset);
-          offset += encode(NFD, buf + offset);
-          offset += encode(NFKC, buf + offset);
-          offset += encode(NFKD, buf + offset);
-
-          CHECK_NORM(NFC, NFC, source);
-          CHECK_NORM(NFC, NFC, NFC);
-          CHECK_NORM(NFC, NFC, NFD);
-          CHECK_NORM(NFC, NFKC, NFKC);
-          CHECK_NORM(NFC, NFKC, NFKD);
-
-          CHECK_NORM(NFD, NFD, source);
-          CHECK_NORM(NFD, NFD, NFC);
-          CHECK_NORM(NFD, NFD, NFD);
-          CHECK_NORM(NFD, NFKD, NFKC);
-          CHECK_NORM(NFD, NFKD, NFKD);
-
-          CHECK_NORM(NFKC, NFKC, source);
-          CHECK_NORM(NFKC, NFKC, NFC);
-          CHECK_NORM(NFKC, NFKC, NFD);
-          CHECK_NORM(NFKC, NFKC, NFKC);
-          CHECK_NORM(NFKC, NFKC, NFKD);
-
-          CHECK_NORM(NFKD, NFKD, source);
-          CHECK_NORM(NFKD, NFKD, NFC);
-          CHECK_NORM(NFKD, NFKD, NFD);
-          CHECK_NORM(NFKD, NFKD, NFKC);
-          CHECK_NORM(NFKD, NFKD, NFKD);
-     }
-     fclose(f);
-     printf("Passed tests after %zd lines!\n", lineno);
-     return 0;
-}
diff --git a/test/graphemetest.c b/test/graphemetest.c
@@ -0,0 +1,72 @@
+#include "tests.h"
+
+int main(int argc, char **argv)
+{
+    char *buf = NULL;
+    size_t bufsize = 0;
+    FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;
+    uint8_t src[1024];
+    
+    check(f != NULL, "error opening GraphemeBreakTest.txt");
+    while (getline(&buf, &bufsize, f) > 0) {
+        size_t bi = 0, si = 0;
+        lineno += 1;
+        
+        if (lineno % 100 == 0)
+            printf("checking line %zd...\n", lineno);
+        
+        if (buf[0] == '#') continue;
+        
+        while (buf[bi]) {
+            bi = skipspaces(buf, bi);
+            if (buf[bi] == '/') { /* grapheme break */
+                src[si++] = '/';
+                bi++;
+            }
+            else if (buf[bi] == '+') { /* no break */
+                bi++;
+            }
+            else if (buf[bi] == '#') { /* start of comments */
+                break;
+            }
+            else { /* hex-encoded codepoint */
+                bi += encode((char*) (src + si), buf + bi) - 1;
+                while (src[si]) ++si; /* advance to NUL termination */
+            }
+        }
+        if (si && src[si-1] == '/')
+            --si; /* no break after final grapheme */
+        src[si] = 0; /* NUL-terminate */
+        
+        if (si) {
+            uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */
+            size_t i = 0, j = 0;
+            ssize_t glen;
+            uint8_t *g; /* utf8proc_map grapheme results */
+            while (i < si) {
+                if (src[i] != '/')
+                    utf8[j++] = src[i++];
+                else
+                    i++;
+            }
+            glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);
+            if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
+                 /* the test file contains surrogate codepoints, which are only for UTF-16 */
+                 printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
+            }
+            else {
+                 check(glen >= 0, "utf8proc_map error = %s",
+                       utf8proc_errmsg(glen));
+                 for (i = 0; i <= glen; ++i)
+                      if (g[i] == 0xff)
+                           g[i] = '/'; /* easier-to-read output (/ is not in test strings) */
+                 check(!strcmp((char*)g, (char*)src),
+                       "grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src);
+            }
+            free(g);
+        }
+    }
+    fclose(f);
+    printf("Passed tests after %zd lines!\n", lineno);
+    return 0;
+}
diff --git a/test/normtest.c b/test/normtest.c
@@ -0,0 +1,64 @@
+#include "tests.h"
+
+#define CHECK_NORM(NRM, norm, src) {                                 \
+    char *src_norm = (char*) utf8proc_ ## NRM((uint8_t*) src);      \
+    check(!strcmp(norm, src_norm),                                  \
+          "normalization failed for %s -> %s", src, norm);          \
+    free(src_norm);                                                 \
+}
+
+int main(int argc, char **argv)
+{
+     char *buf = NULL;
+     size_t bufsize = 0;
+     FILE *f = argc > 1 ? fopen(argv[1], "r") : NULL;
+     char source[1024], NFC[1024], NFD[1024], NFKC[1024], NFKD[1024];
+
+     check(f != NULL, "error opening NormalizationTest.txt");
+     while (getline(&buf, &bufsize, f) > 0) {
+          size_t offset;
+          lineno += 1;
+
+          if (buf[0] == '@') {
+               printf("line %zd: %s", lineno, buf + 1);
+               continue;
+          }
+          else if (lineno % 1000 == 0)
+               printf("checking line %zd...\n", lineno);
+
+          if (buf[0] == '#') continue;
+
+          offset = encode(source, buf);
+          offset += encode(NFC, buf + offset);
+          offset += encode(NFD, buf + offset);
+          offset += encode(NFKC, buf + offset);
+          offset += encode(NFKD, buf + offset);
+
+          CHECK_NORM(NFC, NFC, source);
+          CHECK_NORM(NFC, NFC, NFC);
+          CHECK_NORM(NFC, NFC, NFD);
+          CHECK_NORM(NFC, NFKC, NFKC);
+          CHECK_NORM(NFC, NFKC, NFKD);
+
+          CHECK_NORM(NFD, NFD, source);
+          CHECK_NORM(NFD, NFD, NFC);
+          CHECK_NORM(NFD, NFD, NFD);
+          CHECK_NORM(NFD, NFKD, NFKC);
+          CHECK_NORM(NFD, NFKD, NFKD);
+
+          CHECK_NORM(NFKC, NFKC, source);
+          CHECK_NORM(NFKC, NFKC, NFC);
+          CHECK_NORM(NFKC, NFKC, NFD);
+          CHECK_NORM(NFKC, NFKC, NFKC);
+          CHECK_NORM(NFKC, NFKC, NFKD);
+
+          CHECK_NORM(NFKD, NFKD, source);
+          CHECK_NORM(NFKD, NFKD, NFC);
+          CHECK_NORM(NFKD, NFKD, NFD);
+          CHECK_NORM(NFKD, NFKD, NFKC);
+          CHECK_NORM(NFKD, NFKD, NFKD);
+     }
+     fclose(f);
+     printf("Passed tests after %zd lines!\n", lineno);
+     return 0;
+}
diff --git a/printproperty.c b/test/printproperty.c
diff --git a/test/tests.h b/test/tests.h
@@ -0,0 +1,53 @@
+/* Common functions and includes for our test programs. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include "../utf8proc.h"
+
+size_t lineno = 0;
+
+void check(int cond, const char *format, ...)
+{
+     if (!cond) {
+          va_list args;
+          fprintf(stderr, "line %zd: ", lineno);
+          va_start(args, format);
+          vfprintf(stderr, format, args);
+          va_end(args);
+          fprintf(stderr, "\n");
+          exit(1);
+     }
+}
+
+size_t skipspaces(const char *buf, size_t i)
+{
+    while (isspace(buf[i])) ++i;
+    return i;
+}
+
+/* if buf points to a sequence of codepoints encoded as hexadecimal strings,
+   separated by whitespace, and terminated by any character not in
+   [0-9a-fA-F] or whitespace, then stores the corresponding utf8 string
+   in dest, returning the number of bytes read from buf */
+size_t encode(char *dest, const char *buf)
+{
+     size_t i = 0, j, d = 0;
+     do {
+          int c;
+          i = skipspaces(buf, i);
+          for (j=i; buf[j] && strchr("0123456789abcdef", tolower(buf[j])); ++j)
+               ; /* find end of hex input */
+          if (j == i) { /* no codepoint found */
+               dest[d] = 0; /* NUL-terminate destination string */
+               return i + 1;
+          }
+          check(sscanf(buf + i, "%x", &c) == 1, "invalid hex input %s", buf+i);
+          i = j; /* skip to char after hex input */
+          d += utf8proc_encode_char(c, (uint8_t *) (dest + d));
+     } while (1);
+}
+
diff --git a/tests.h b/tests.h
@@ -1,53 +0,0 @@
-/* Common functions and includes for our test programs. */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <string.h>
-#include <stdarg.h>
-
-#include "utf8proc.h"
-
-size_t lineno = 0;
-
-void check(int cond, const char *format, ...)
-{
-     if (!cond) {
-          va_list args;
-          fprintf(stderr, "line %zd: ", lineno);
-          va_start(args, format);
-          vfprintf(stderr, format, args);
-          va_end(args);
-          fprintf(stderr, "\n");
-          exit(1);
-     }
-}
-
-size_t skipspaces(const char *buf, size_t i)
-{
-    while (isspace(buf[i])) ++i;
-    return i;
-}
-
-/* if buf points to a sequence of codepoints encoded as hexadecimal strings,
-   separated by whitespace, and terminated by any character not in
-   [0-9a-fA-F] or whitespace, then stores the corresponding utf8 string
-   in dest, returning the number of bytes read from buf */
-size_t encode(char *dest, const char *buf)
-{
-     size_t i = 0, j, d = 0;
-     do {
-          int c;
-          i = skipspaces(buf, i);
-          for (j=i; buf[j] && strchr("0123456789abcdef", tolower(buf[j])); ++j)
-               ; /* find end of hex input */
-          if (j == i) { /* no codepoint found */
-               dest[d] = 0; /* NUL-terminate destination string */
-               return i + 1;
-          }
-          check(sscanf(buf + i, "%x", &c) == 1, "invalid hex input %s", buf+i);
-          i = j; /* skip to char after hex input */
-          d += utf8proc_encode_char(c, (uint8_t *) (dest + d));
-     } while (1);
-}
-

M	Makefile	\|	50	+++++++++++++++++++++++++-------------------------
R	data_generator.rb -> data/data_generator.rb	\|	0
D	graphemetest.c	\|	72	------------------------------------------------------------------------
D	normtest.c	\|	64	----------------------------------------------------------------
A	test/graphemetest.c	\|	72	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	test/normtest.c	\|	64	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
R	printproperty.c -> test/printproperty.c	\|	0
A	test/tests.h	\|	53	+++++++++++++++++++++++++++++++++++++++++++++++++++++
D	tests.h	\|	53	-----------------------------------------------------