utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

commit 7d4541ee09ef2579fe5f0478e4e695a045772ad0
parent 7932385a6c008a9b89f7ae1917f2ffbea62eb33e
Author: Jiahao Chen <jiahao@mit.edu>
Date:   Fri, 18 Jul 2014 10:02:09 -0400

Replace all explicitly marked regions with Ruby file read and regex section matches

Diffstat:
Mdata_generator.rb | 37+++++--------------------------------
1 file changed, 5 insertions(+), 32 deletions(-)

diff --git a/data_generator.rb b/data_generator.rb @@ -65,14 +65,7 @@ # authorization of the copyright holder. - -$ignorable_list = <<END_OF_LIST -#From: -# http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt -#Section: -# Derived Property: Default_Ignorable_Code_Point -END_OF_LIST - +$ignorable_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Default_Ignorable_Code_Point.*?# Total code points:/m] $ignorable = [] $ignorable_list.each_line do |entry| if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/ @@ -82,13 +75,7 @@ $ignorable_list.each_line do |entry| end end -$grapheme_extend_list = <<END_OF_LIST -#From: -# http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt -#Section: -# Derived Property: Grapheme_Extend_List -END_OF_LIST - +$grapheme_extend_list = File.read("DerivedCoreProperties.txt")[/# Derived Property: Grapheme_Extend.*?# Total code points:/m] $grapheme_extend = [] $grapheme_extend_list.each_line do |entry| if entry =~ /^([0-9A-F]+)\.\.([0-9A-F]+)/ @@ -98,27 +85,13 @@ $grapheme_extend_list.each_line do |entry| end end -$exclusions = <<END_OF_LIST -#From: -# http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt -#Section: -# (1) Script Specifics -END_OF_LIST +$exclusions = File.read("CompositionExclusions.txt")[/# \(1\) Script Specifics.*?# Total code points:/m] $exclusions = $exclusions.chomp.split("\n").collect { |e| e.hex } -$excl_version = <<END_OF_LIST -#From: -# http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt -#Section: -# (2) Post Composition Version precomposed characters -END_OF_LIST +$excl_version = File.read("CompositionExclusions.txt")[/# \(2\) Post Composition Version precomposed characters.*?# Total code points:/m] $excl_version = $excl_version.chomp.split("\n").collect { |e| e.hex } -$case_folding_string = <<END_OF_LIST -#XXX THE NONEMPTY, NON-COMMENT LINES OF -#XXX http://www.unicode.org/Public/UNIDATA/CaseFolding.txt -#XXX GO HERE -END_OF_LIST +$case_folding_string = File.open("CaseFolding.txt").read $case_folding = {} $case_folding_string.chomp.split("\n").each do |line|