libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

commit f4408163156a468667ae24d3b7eb33b5b8346cf0
parent 919200085f7eaffafa33ec3a5e50c2adbd046ab7
Author: Laslo Hunhold <dev@frign.de>
Date:   Sat, 27 Aug 2022 02:09:10 +0200

Add UNICODE_VERSION variable to Makefile and add to manual-templates

This simplifies updating to new Unicode versions a bit, but will not
be added to config.mk as changing between Unicode versions is not
as simple as downloading new files. Apart from that, it is necessary
to check all the different implemented algorithms for changes.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
MMakefile | 32+++++++++++++++++---------------
Mman/libgrapheme.sh | 2+-
Mman/template/to_case.sh | 2+-
3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/Makefile b/Makefile @@ -4,6 +4,8 @@ include config.mk +UNICODE_VERSION = 14.0.0 + BENCHMARK =\ benchmark/case\ benchmark/character\ @@ -81,46 +83,46 @@ MAN7 =\ all: data/LICENSE $(MAN3:=.3) $(MAN7:=.7) libgrapheme.a libgrapheme.so data/DerivedCoreProperties.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/DerivedCoreProperties.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/DerivedCoreProperties.txt data/EastAsianWidth.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/EastAsianWidth.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/EastAsianWidth.txt data/emoji-data.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/emoji/emoji-data.txt data/GraphemeBreakProperty.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakProperty.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakProperty.txt data/GraphemeBreakTest.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakTest.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt data/LICENSE: wget -O $@ https://www.unicode.org/license.txt data/LineBreak.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/LineBreak.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/LineBreak.txt data/LineBreakTest.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/LineBreakTest.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/LineBreakTest.txt data/SentenceBreakProperty.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakProperty.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/SentenceBreakProperty.txt data/SentenceBreakTest.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakTest.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/SentenceBreakTest.txt data/SpecialCasing.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/SpecialCasing.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/SpecialCasing.txt data/UnicodeData.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt data/WordBreakProperty.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakProperty.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/WordBreakProperty.txt data/WordBreakTest.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakTest.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/WordBreakTest.txt benchmark/case.o: benchmark/case.c config.mk gen/word-test.h grapheme.h benchmark/util.h benchmark/character.o: benchmark/character.c config.mk gen/character-test.h grapheme.h benchmark/util.h @@ -232,10 +234,10 @@ libgrapheme.so: $(SRC:=.o) $(CC) -o $@ $(SOFLAGS) $(LDFLAGS) $(SRC:=.o) $(MAN3:=.3): - SH=$(SH) $(SH) $(@:.3=.sh) > $@ + SH=$(SH) UNICODE_VERSION=$(UNICODE_VERSION) $(SH) $(@:.3=.sh) > $@ $(MAN7:=.7): - SH=$(SH) $(SH) $(@:.7=.sh) > $@ + SH=$(SH) UNICODE_VERSION=$(UNICODE_VERSION) $(SH) $(@:.7=.sh) > $@ benchmark: $(BENCHMARK) for m in $(BENCHMARK); do ./$$m; done diff --git a/man/libgrapheme.sh b/man/libgrapheme.sh @@ -51,7 +51,7 @@ example illustrating the possible usage. .Xr grapheme_next_word_break_utf8 3 .Sh STANDARDS .Nm -is compliant with the Unicode 14.0.0 specification. +is compliant with the Unicode $UNICODE_VERSION specification. .Sh MOTIVATION The idea behind every character encoding scheme like ASCII or Unicode is to express abstract characters (which can be thought of as shapes diff --git a/man/template/to_case.sh b/man/template/to_case.sh @@ -52,7 +52,7 @@ is .Xr libgrapheme 7 .Sh STANDARDS .Fn grapheme_to_$CASE -is compliant with the Unicode 14.0.0 specification. +is compliant with the Unicode $UNICODE_VERSION specification. .Sh AUTHORS .An Laslo Hunhold Aq Mt dev@frign.de EOF