libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

commit f2783665bc71b9b1f1b72830629c3724bd8e1ae4
parent aa5dda2687c4907d6a47e57b1d7973b8f9d158ae
Author: Laslo Hunhold <dev@frign.de>
Date:   Tue, 16 Aug 2022 16:35:27 +0200

Add bidirectional data-parser and function prototypes with structure

The bidirectional algorithm is very interesting and currently mostly
used in the context of the GNU-fribidi-library. Despite its complexity
it should be interesting trying to implement it in libgrapheme.

This commit adds the data-parsing and the first part of the algorithm
(namely paragraph level detection). Development will happen in a
separate branch to allow stabilization on the master branch in
preparation for the release of version 2.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
MMakefile | 10++++++++++
Adata/DerivedBidiClass.txt | 2524+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agen/bidirectional.c | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mgrapheme.h | 13+++++++++++++
Asrc/bidirectional.c | 169+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 2853 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -13,6 +13,7 @@ BENCHMARK =\ benchmark/word\ DATA =\ + data/DerivedBidiClass.txt\ data/DerivedCoreProperties.txt\ data/EastAsianWidth.txt\ data/emoji-data.txt\ @@ -29,6 +30,7 @@ DATA =\ data/WordBreakTest.txt\ GEN =\ + gen/bidirectional\ gen/case\ gen/character\ gen/character-test\ @@ -40,6 +42,7 @@ GEN =\ gen/word-test\ SRC =\ + src/bidirectional\ src/case\ src/character\ src/line\ @@ -72,6 +75,7 @@ benchmark/utf8-decode.o: benchmark/utf8-decode.c config.mk gen/character-test.h benchmark/sentence.o: benchmark/sentence.c config.mk gen/sentence-test.h grapheme.h benchmark/util.h benchmark/util.o: benchmark/util.c config.mk benchmark/util.h benchmark/word.o: benchmark/word.c config.mk gen/word-test.h grapheme.h benchmark/util.h +gen/bidirectional.o: gen/bidirectional.c config.mk gen/util.h gen/case.o: gen/case.c config.mk gen/util.h gen/character.o: gen/character.c config.mk gen/util.h gen/character-test.o: gen/character-test.c config.mk gen/util.h @@ -82,6 +86,7 @@ gen/sentence-test.o: gen/sentence-test.c config.mk gen/util.h gen/word.o: gen/word.c config.mk gen/util.h gen/word-test.o: gen/word-test.c config.mk gen/util.h gen/util.o: gen/util.c config.mk gen/util.h +src/bidirectional.o: src/bidirectional.c config.mk gen/bidirectional.h grapheme.h src/util.h src/case.o: src/case.c config.mk gen/case.h grapheme.h src/util.h src/character.o: src/character.c config.mk gen/character.h grapheme.h src/util.h src/line.o: src/line.c config.mk gen/line.h grapheme.h src/util.h @@ -103,6 +108,7 @@ benchmark/line: benchmark/line.o benchmark/util.o libgrapheme.a benchmark/sentence: benchmark/sentence.o benchmark/util.o libgrapheme.a benchmark/utf8-decode: benchmark/utf8-decode.o benchmark/util.o libgrapheme.a benchmark/word: benchmark/word.o benchmark/util.o libgrapheme.a +gen/bidirectional: gen/bidirectional.o gen/util.o gen/case: gen/case.o gen/util.o gen/character: gen/character.o gen/util.o gen/character-test: gen/character-test.o gen/util.o @@ -119,6 +125,7 @@ test/utf8-encode: test/utf8-encode.o test/util.o libgrapheme.a test/utf8-decode: test/utf8-decode.o test/util.o libgrapheme.a test/word: test/word.o test/util.o libgrapheme.a +gen/bidirectional.h: data/DerivedBidiClass.txt gen/bidirectional gen/case.h: data/DerivedCoreProperties.txt data/UnicodeData.txt data/SpecialCasing.txt gen/case gen/character.h: data/emoji-data.txt data/GraphemeBreakProperty.txt gen/character gen/character-test.h: data/GraphemeBreakTest.txt gen/character-test @@ -129,6 +136,9 @@ gen/sentence-test.h: data/SentenceBreakTest.txt gen/sentence-test gen/word.h: data/WordBreakProperty.txt gen/word gen/word-test.h: data/WordBreakTest.txt gen/word-test +data/DerivedBidiClass.txt: + wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/extracted/DerivedBidiClass.txt + data/DerivedCoreProperties.txt: wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/DerivedCoreProperties.txt diff --git a/data/DerivedBidiClass.txt b/data/DerivedBidiClass.txt @@ -0,0 +1,2524 @@ +# DerivedBidiClass-14.0.0.txt +# Date: 2021-07-10, 00:35:02 GMT +# © 2021 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ + +# ================================================ + +# Bidi Class (listing UnicodeData.txt, field 4: see UAX #44: https://www.unicode.org/reports/tr44/) +# Unlike other properties, unassigned code points in blocks +# reserved for right-to-left scripts are given either types R or AL. +# +# The unassigned code points that default to AL are in the ranges: +# [\u0600-\u07BF \u0860-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF +# \U00010D00-\U00010D3F \U00010F30-\U00010F6F +# \U0001EC70-\U0001ECBF \U0001ED00-\U0001ED4F \U0001EE00-\U0001EEFF] +# +# This includes code points in the Arabic, Syriac, and Thaana blocks, among others. +# +# The unassigned code points that default to R are in the ranges: +# [\u0590-\u05FF \u07C0-\u085F \uFB1D-\uFB4F +# \U00010800-\U00010CFF \U00010D40-\U00010F2F \U00010F70-\U00010FFF +# \U0001E800-\U0001EC6F \U0001ECC0-\U0001ECFF \U0001ED50-\U0001EDFF \U0001EF00-\U0001EFFF] +# +# This includes code points in the Hebrew, NKo, and Phoenician blocks, among others. +# +# The unassigned code points that default to ET are in the range: +# [\u20A0-\u20CF] +# +# This consists of code points in the Currency Symbols block. +# +# The unassigned code points that default to BN have one of the following properties: +# Default_Ignorable_Code_Point +# Noncharacter_Code_Point +# +# For all other cases: + +# All code points not explicitly listed for Bidi_Class +# have the value Left_To_Right (L). + +# @missing: 0000..10FFFF; Left_To_Right + +# ================================================ + +# Bidi_Class=Left_To_Right + +0041..005A ; L # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; L # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; L # Lo FEMININE ORDINAL INDICATOR +00B5 ; L # L& MICRO SIGN +00BA ; L # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; L # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; L # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; L # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; L # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; L # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; L # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; L # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; L # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; L # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02B8 ; L # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02BB..02C1 ; L # Lm [7] MODIFIER LETTER TURNED COMMA..MODIFIER LETTER REVERSED GLOTTAL STOP +02D0..02D1 ; L # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; L # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EE ; L # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0370..0373 ; L # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0376..0377 ; L # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; L # Lm GREEK YPOGEGRAMMENI +037B..037D ; L # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; L # L& GREEK CAPITAL LETTER YOT +0386 ; L # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; L # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; L # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; L # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; L # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; L # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +0482 ; L # So CYRILLIC THOUSANDS SIGN +048A..052F ; L # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; L # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; L # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055F ; L # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK +0560..0588 ; L # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0589 ; L # Po ARMENIAN FULL STOP +0903 ; L # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; L # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093B ; L # Mc DEVANAGARI VOWEL SIGN OOE +093D ; L # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; L # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0949..094C ; L # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; L # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; L # Lo DEVANAGARI OM +0958..0961 ; L # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0964..0965 ; L # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0966..096F ; L # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0970 ; L # Po DEVANAGARI ABBREVIATION SIGN +0971 ; L # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; L # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0982..0983 ; L # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; L # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; L # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; L # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; L # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; L # Lo BENGALI LETTER LA +09B6..09B9 ; L # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; L # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; L # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C7..09C8 ; L # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; L # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CE ; L # Lo BENGALI LETTER KHANDA TA +09D7 ; L # Mc BENGALI AU LENGTH MARK +09DC..09DD ; L # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; L # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E6..09EF ; L # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; L # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09F4..09F9 ; L # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN +09FA ; L # So BENGALI ISSHAR +09FC ; L # Lo BENGALI LETTER VEDIC ANUSVARA +09FD ; L # Po BENGALI ABBREVIATION SIGN +0A03 ; L # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; L # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; L # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; L # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; L # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; L # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; L # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; L # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3E..0A40 ; L # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A59..0A5C ; L # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; L # Lo GURMUKHI LETTER FA +0A66..0A6F ; L # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A72..0A74 ; L # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A76 ; L # Po GURMUKHI ABBREVIATION SIGN +0A83 ; L # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; L # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; L # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; L # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; L # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; L # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; L # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; L # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; L # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC9 ; L # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; L # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AD0 ; L # Lo GUJARATI OM +0AE0..0AE1 ; L # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE6..0AEF ; L # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; L # Po GUJARATI ABBREVIATION SIGN +0AF9 ; L # Lo GUJARATI LETTER ZHA +0B02..0B03 ; L # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; L # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; L # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; L # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; L # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; L # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; L # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; L # Lo ORIYA SIGN AVAGRAHA +0B3E ; L # Mc ORIYA VOWEL SIGN AA +0B40 ; L # Mc ORIYA VOWEL SIGN II +0B47..0B48 ; L # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; L # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B57 ; L # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; L # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; L # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B66..0B6F ; L # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B70 ; L # So ORIYA ISSHAR +0B71 ; L # Lo ORIYA LETTER WA +0B72..0B77 ; L # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS +0B83 ; L # Lo TAMIL SIGN VISARGA +0B85..0B8A ; L # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; L # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; L # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; L # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; L # Lo TAMIL LETTER JA +0B9E..0B9F ; L # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; L # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; L # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; L # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; L # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC1..0BC2 ; L # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; L # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; L # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD0 ; L # Lo TAMIL OM +0BD7 ; L # Mc TAMIL AU LENGTH MARK +0BE6..0BEF ; L # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2 ; L # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0C01..0C03 ; L # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C05..0C0C ; L # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; L # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; L # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; L # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; L # Lo TELUGU SIGN AVAGRAHA +0C41..0C44 ; L # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C58..0C5A ; L # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; L # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; L # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C66..0C6F ; L # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C77 ; L # Po TELUGU SIGN SIDDHAM +0C7F ; L # So TELUGU SIGN TUUMU +0C80 ; L # Lo KANNADA SIGN SPACING CANDRABINDU +0C82..0C83 ; L # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C84 ; L # Po KANNADA SIGN SIDDHAM +0C85..0C8C ; L # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; L # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; L # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; L # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; L # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; L # Lo KANNADA SIGN AVAGRAHA +0CBE ; L # Mc KANNADA VOWEL SIGN AA +0CBF ; L # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; L # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; L # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; L # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; L # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CD5..0CD6 ; L # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE ; L # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; L # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE6..0CEF ; L # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2 ; L # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0D02..0D03 ; L # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; L # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; L # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; L # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; L # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; L # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D46..0D48 ; L # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; L # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4E ; L # Lo MALAYALAM LETTER DOT REPH +0D4F ; L # So MALAYALAM SIGN PARA +0D54..0D56 ; L # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57 ; L # Mc MALAYALAM AU LENGTH MARK +0D58..0D5E ; L # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH +0D5F..0D61 ; L # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D66..0D6F ; L # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D70..0D78 ; L # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS +0D79 ; L # So MALAYALAM DATE MARK +0D7A..0D7F ; L # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D82..0D83 ; L # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; L # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; L # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; L # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; L # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; L # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCF..0DD1 ; L # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD8..0DDF ; L # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DE6..0DEF ; L # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3 ; L # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0DF4 ; L # Po SINHALA PUNCTUATION KUNDDALIYA +0E01..0E30 ; L # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E32..0E33 ; L # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E40..0E45 ; L # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; L # Lm THAI CHARACTER MAIYAMOK +0E4F ; L # Po THAI CHARACTER FONGMAN +0E50..0E59 ; L # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E5A..0E5B ; L # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0E81..0E82 ; L # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; L # Lo LAO LETTER KHO TAM +0E86..0E8A ; L # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; L # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; L # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; L # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB2..0EB3 ; L # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EBD ; L # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; L # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; L # Lm LAO KO LA +0ED0..0ED9 ; L # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF ; L # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; L # Lo TIBETAN SYLLABLE OM +0F01..0F03 ; L # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA +0F04..0F12 ; L # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F13 ; L # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; L # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; L # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F1A..0F1F ; L # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG +0F20..0F29 ; L # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F2A..0F33 ; L # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO +0F34 ; L # So TIBETAN MARK BSDUS RTAGS +0F36 ; L # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F38 ; L # So TIBETAN MARK CHE MGO +0F3E..0F3F ; L # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; L # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; L # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F7F ; L # Mc TIBETAN SIGN RNAM BCAD +0F85 ; L # Po TIBETAN MARK PALUTA +0F88..0F8C ; L # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0FBE..0FC5 ; L # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC7..0FCC ; L # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL +0FCE..0FCF ; L # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM +0FD0..0FD4 ; L # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA +0FD5..0FD8 ; L # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS +0FD9..0FDA ; L # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS +1000..102A ; L # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; L # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +1031 ; L # Mc MYANMAR VOWEL SIGN E +1038 ; L # Mc MYANMAR SIGN VISARGA +103B..103C ; L # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103F ; L # Lo MYANMAR LETTER GREAT SA +1040..1049 ; L # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +104A..104F ; L # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE +1050..1055 ; L # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; L # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +105A..105D ; L # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +1061 ; L # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; L # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; L # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; L # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; L # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1075..1081 ; L # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1083..1084 ; L # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1087..108C ; L # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108E ; L # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; L # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; L # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; L # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109E..109F ; L # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION +10A0..10C5 ; L # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; L # L& GEORGIAN CAPITAL LETTER YN +10CD ; L # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; L # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FB ; L # Po GEORGIAN PARAGRAPH SEPARATOR +10FC ; L # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; L # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; L # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; L # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; L # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; L # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; L # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; L # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; L # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; L # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; L # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; L # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; L # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; L # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; L # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; L # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; L # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; L # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1360..1368 ; L # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +1369..137C ; L # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND +1380..138F ; L # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; L # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; L # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; L # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166D ; L # So CANADIAN SYLLABICS CHI SIGN +166E ; L # Po CANADIAN SYLLABICS FULL STOP +166F..167F ; L # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; L # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; L # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EB..16ED ; L # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +16EE..16F0 ; L # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; L # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; L # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1715 ; L # Mc TAGALOG SIGN PAMUDPOD +171F..1731 ; L # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1734 ; L # Mc HANUNOO SIGN PAMUDPOD +1735..1736 ; L # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1740..1751 ; L # Lo [18] BUHID LETTER A..BUHID LETTER HA +1760..176C ; L # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; L # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1780..17B3 ; L # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B6 ; L # Mc KHMER VOWEL SIGN AA +17BE..17C5 ; L # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C7..17C8 ; L # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17D4..17D6 ; L # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D7 ; L # Lm KHMER SIGN LEK TOO +17D8..17DA ; L # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT +17DC ; L # Lo KHMER SIGN AVAKRAHASANYA +17E0..17E9 ; L # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +1810..1819 ; L # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; L # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; L # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; L # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; L # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1887..18A8 ; L # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18AA ; L # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; L # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; L # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1923..1926 ; L # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1929..192B ; L # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; L # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1933..1938 ; L # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1946..194F ; L # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +1950..196D ; L # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; L # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; L # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; L # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9 ; L # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; L # No NEW TAI LUE THAM DIGIT ONE +1A00..1A16 ; L # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A19..1A1A ; L # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1E..1A1F ; L # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION +1A20..1A54 ; L # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; L # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A57 ; L # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A61 ; L # Mc TAI THAM VOWEL SIGN A +1A63..1A64 ; L # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A6D..1A72 ; L # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A80..1A89 ; L # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; L # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA0..1AA6 ; L # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA +1AA7 ; L # Lm TAI THAM SIGN MAI YAMOK +1AA8..1AAD ; L # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG +1B04 ; L # Mc BALINESE SIGN BISAH +1B05..1B33 ; L # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B35 ; L # Mc BALINESE VOWEL SIGN TEDUNG +1B3B ; L # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3D..1B41 ; L # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B43..1B44 ; L # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4C ; L # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B50..1B59 ; L # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B5A..1B60 ; L # Po [7] BALINESE PANTI..BALINESE PAMENENG +1B61..1B6A ; L # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE +1B74..1B7C ; L # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING +1B7D..1B7E ; L # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1B82 ; L # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; L # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; L # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA6..1BA7 ; L # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BAA ; L # Mc SUNDANESE SIGN PAMAAEH +1BAE..1BAF ; L # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; L # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BE5 ; L # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1BE7 ; L # Mc BATAK VOWEL SIGN E +1BEA..1BEC ; L # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BEE ; L # Mc BATAK VOWEL SIGN U +1BF2..1BF3 ; L # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1BFC..1BFF ; L # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT +1C00..1C23 ; L # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; L # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C34..1C35 ; L # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C3B..1C3F ; L # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C40..1C49 ; L # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; L # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C50..1C59 ; L # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; L # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; L # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C7E..1C7F ; L # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1C80..1C88 ; L # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; L # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; L # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CC0..1CC7 ; L # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA +1CD3 ; L # Po VEDIC SIGN NIHSHVASA +1CE1 ; L # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE9..1CEC ; L # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; L # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; L # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7 ; L # Mc VEDIC SIGN ATIKRAMA +1CFA ; L # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; L # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; L # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; L # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; L # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; L # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; L # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1F15 ; L # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; L # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; L # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; L # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; L # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; L # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; L # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; L # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; L # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; L # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; L # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; L # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; L # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; L # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; L # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; L # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; L # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; L # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; L # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +200E ; L # Cf LEFT-TO-RIGHT MARK +2071 ; L # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; L # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; L # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2102 ; L # L& DOUBLE-STRUCK CAPITAL C +2107 ; L # L& EULER CONSTANT +210A..2113 ; L # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; L # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; L # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; L # L& DOUBLE-STRUCK CAPITAL Z +2126 ; L # L& OHM SIGN +2128 ; L # L& BLACK-LETTER CAPITAL Z +212A..212D ; L # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212F..2134 ; L # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; L # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; L # L& INFORMATION SOURCE +213C..213F ; L # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; L # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; L # L& TURNED SMALL F +214F ; L # So SYMBOL FOR SAMARITAN SOURCE +2160..2182 ; L # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; L # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; L # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2336..237A ; L # So [69] APL FUNCTIONAL SYMBOL I-BEAM..APL FUNCTIONAL SYMBOL ALPHA +2395 ; L # So APL FUNCTIONAL SYMBOL QUAD +249C..24E9 ; L # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +26AC ; L # So MEDIUM SMALL WHITE CIRCLE +2800..28FF ; L # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2C00..2C7B ; L # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; L # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; L # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; L # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; L # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; L # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; L # L& GEORGIAN SMALL LETTER YN +2D2D ; L # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; L # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; L # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D70 ; L # Po TIFINAGH SEPARATOR MARK +2D80..2D96 ; L # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; L # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; L # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; L # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; L # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; L # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; L # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; L # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; L # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +3005 ; L # Lm IDEOGRAPHIC ITERATION MARK +3006 ; L # Lo IDEOGRAPHIC CLOSING MARK +3007 ; L # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; L # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +302E..302F ; L # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3031..3035 ; L # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; L # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; L # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; L # Lo MASU MARK +3041..3096 ; L # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309D..309E ; L # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; L # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; L # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; L # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; L # Lo KATAKANA DIGRAPH KOTO +3105..312F ; L # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; L # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +3190..3191 ; L # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195 ; L # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F ; L # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31A0..31BF ; L # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; L # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3200..321C ; L # So [29] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED HANGUL CIEUC U +3220..3229 ; L # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3247 ; L # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; L # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3260..327B ; L # So [28] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL HIEUH A +327F ; L # So KOREAN STANDARD SYMBOL +3280..3289 ; L # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0 ; L # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32C0..32CB ; L # So [12] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER +32D0..3376 ; L # So [167] CIRCLED KATAKANA A..SQUARE PC +337B..33DD ; L # So [99] SQUARE ERA NAME HEISEI..SQUARE WB +33E0..33FE ; L # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE +3400..4DBF ; L # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; L # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; L # Lm YI SYLLABLE WU +A016..A48C ; L # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; L # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; L # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A4FE..A4FF ; L # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A500..A60B ; L # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; L # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; L # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; L # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; L # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; L # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; L # Lo CYRILLIC LETTER MULTIOCULAR O +A680..A69B ; L # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; L # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6A0..A6E5 ; L # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; L # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F2..A6F7 ; L # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK +A722..A76F ; L # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; L # Lm MODIFIER LETTER US +A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; L # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA ; L # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; L # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; L # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; L # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; L # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; L # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; L # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; L # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; L # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; L # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A803..A805 ; L # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; L # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80C..A822 ; L # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; L # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A827 ; L # Mc SYLOTI NAGRI VOWEL SIGN OO +A830..A835 ; L # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837 ; L # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A840..A873 ; L # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A880..A881 ; L # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; L # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; L # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8CE..A8CF ; L # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8D0..A8D9 ; L # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8F2..A8F7 ; L # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8F8..A8FA ; L # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FB ; L # Lo DEVANAGARI HEADSTROKE +A8FC ; L # Po DEVANAGARI SIGN SIDDHAM +A8FD..A8FE ; L # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A900..A909 ; L # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; L # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A92E..A92F ; L # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA +A930..A946 ; L # Lo [23] REJANG LETTER KA..REJANG LETTER A +A952..A953 ; L # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A95F ; L # Po REJANG SECTION MARK +A960..A97C ; L # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A983 ; L # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; L # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B4..A9B5 ; L # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9BA..A9BB ; L # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BE..A9C0 ; L # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9C1..A9CD ; L # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH +A9CF ; L # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; L # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9DE..A9DF ; L # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN +A9E0..A9E4 ; L # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E6 ; L # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; L # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9 ; L # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE ; L # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; L # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA2F..AA30 ; L # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA33..AA34 ; L # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA40..AA42 ; L # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA44..AA4B ; L # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4D ; L # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; L # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA5C..AA5F ; L # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA +AA60..AA6F ; L # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; L # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; L # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA77..AA79 ; L # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +AA7A ; L # Lo MYANMAR LETTER AITON RA +AA7B ; L # Mc MYANMAR SIGN PAO KAREN TONE +AA7D ; L # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AAAF ; L # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB1 ; L # Lo TAI VIET VOWEL AA +AAB5..AAB6 ; L # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9..AABD ; L # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AAC0 ; L # Lo TAI VIET TONE MAI NUENG +AAC2 ; L # Lo TAI VIET TONE MAI SONG +AADB..AADC ; L # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; L # Lm TAI VIET SYMBOL SAM +AADE..AADF ; L # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; L # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; L # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; L # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; L # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; L # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; L # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; L # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AB01..AB06 ; L # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; L # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; L # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; L # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; L # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; L # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5B ; L # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; L # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; L # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; L # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; L # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; L # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE6..ABE7 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE9..ABEA ; L # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEB ; L # Po MEETEI MAYEK CHEIKHEI +ABEC ; L # Mc MEETEI MAYEK LUM IYEK +ABF0..ABF9 ; L # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +AC00..D7A3 ; L # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; L # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; L # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +E000..F8FF ; L # Co [6400] <private-use-E000>..<private-use-F8FF> +F900..FA6D ; L # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; L # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; L # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; L # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF21..FF3A ; L # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; L # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF66..FF6F ; L # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; L # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; L # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; L # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; L # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; L # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; L # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; L # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; L # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; L # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; L # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; L # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; L # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; L # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; L # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10100 ; L # Po AEGEAN WORD SEPARATOR LINE +10102 ; L # Po AEGEAN CHECK MARK +10107..10133 ; L # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F ; L # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT +1018D..1018E ; L # So [2] GREEK INDICTION SIGN..NOMISMA SIGN +101D0..101FC ; L # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND +10280..1029C ; L # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; L # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +10300..1031F ; L # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +10320..10323 ; L # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY +1032D..10340 ; L # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; L # Nl GOTHIC LETTER NINETY +10342..10349 ; L # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; L # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; L # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10380..1039D ; L # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +1039F ; L # Po UGARITIC WORD DIVIDER +103A0..103C3 ; L # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; L # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D0 ; L # Po OLD PERSIAN WORD DIVIDER +103D1..103D5 ; L # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; L # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; L # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104A0..104A9 ; L # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3 ; L # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; L # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; L # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; L # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +1056F ; L # Po CAUCASIAN ALBANIAN CITATION MARK +10570..1057A ; L # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; L # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; L # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; L # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; L # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; L # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; L # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; L # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10600..10736 ; L # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; L # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; L # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; L # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; L # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; L # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +11000 ; L # Mc BRAHMI SIGN CANDRABINDU +11002 ; L # Mc BRAHMI SIGN VISARGA +11003..11037 ; L # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11047..1104D ; L # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +11066..1106F ; L # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11071..11072 ; L # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11075 ; L # Lo BRAHMI LETTER OLD TAMIL LLA +11082 ; L # Mc KAITHI SIGN VISARGA +11083..110AF ; L # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; L # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B7..110B8 ; L # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110BB..110BC ; L # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN +110BD ; L # Cf KAITHI NUMBER SIGN +110BE..110C1 ; L # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110CD ; L # Cf KAITHI NUMBER SIGN ABOVE +110D0..110E8 ; L # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; L # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11103..11126 ; L # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +1112C ; L # Mc CHAKMA VOWEL SIGN E +11136..1113F ; L # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; L # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11144 ; L # Lo CHAKMA LETTER LHAA +11145..11146 ; L # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; L # Lo CHAKMA LETTER VAA +11150..11172 ; L # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11174..11175 ; L # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK +11176 ; L # Lo MAHAJANI LIGATURE SHRI +11182 ; L # Mc SHARADA SIGN VISARGA +11183..111B2 ; L # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; L # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; L # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; L # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; L # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111CD ; L # Po SHARADA SUTRA MARK +111CE ; L # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111D0..111D9 ; L # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA ; L # Lo SHARADA EKAM +111DB ; L # Po SHARADA SIGN SIDDHAM +111DC ; L # Lo SHARADA HEADSTROKE +111DD..111DF ; L # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 +111E1..111F4 ; L # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND +11200..11211 ; L # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; L # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; L # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +11232..11233 ; L # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11235 ; L # Mc KHOJKI SIGN VIRAMA +11238..1123D ; L # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN +11280..11286 ; L # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; L # Lo MULTANI LETTER GHA +1128A..1128D ; L # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; L # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; L # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112A9 ; L # Po MULTANI SECTION MARK +112B0..112DE ; L # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112E0..112E2 ; L # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112F0..112F9 ; L # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11302..11303 ; L # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; L # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; L # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; L # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; L # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; L # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; L # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; L # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F ; L # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11341..11344 ; L # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; L # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; L # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11350 ; L # Lo GRANTHA OM +11357 ; L # Mc GRANTHA AU LENGTH MARK +1135D..11361 ; L # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; L # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11400..11434 ; L # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; L # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11440..11441 ; L # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11445 ; L # Mc NEWA SIGN VISARGA +11447..1144A ; L # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1144B..1144F ; L # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN +11450..11459 ; L # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145A..1145B ; L # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK +1145D ; L # Po NEWA INSERTION SIGN +1145F..11461 ; L # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; L # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2 ; L # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B9 ; L # Mc TIRHUTA VOWEL SIGN E +114BB..114BE ; L # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114C1 ; L # Mc TIRHUTA SIGN VISARGA +114C4..114C5 ; L # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C6 ; L # Po TIRHUTA ABBREVIATION SIGN +114C7 ; L # Lo TIRHUTA OM +114D0..114D9 ; L # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11580..115AE ; L # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1 ; L # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B8..115BB ; L # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BE ; L # Mc SIDDHAM SIGN VISARGA +115C1..115D7 ; L # Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +115D8..115DB ; L # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +11600..1162F ; L # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; L # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +1163B..1163C ; L # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163E ; L # Mc MODI SIGN VISARGA +11641..11643 ; L # Po [3] MODI DANDA..MODI ABBREVIATION SIGN +11644 ; L # Lo MODI SIGN HUVA +11650..11659 ; L # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11680..116AA ; L # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AC ; L # Mc TAKRI SIGN VISARGA +116AE..116AF ; L # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; L # Mc TAKRI SIGN VIRAMA +116B8 ; L # Lo TAKRI LETTER ARCHAIC KHA +116B9 ; L # Po TAKRI ABBREVIATION SIGN +116C0..116C9 ; L # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +11700..1171A ; L # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +11720..11721 ; L # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11726 ; L # Mc AHOM VOWEL SIGN E +11730..11739 ; L # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +1173A..1173B ; L # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY +1173C..1173E ; L # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +1173F ; L # So AHOM SYMBOL VI +11740..11746 ; L # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; L # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; L # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +11838 ; L # Mc DOGRA SIGN VISARGA +1183B ; L # Po DOGRA ABBREVIATION SIGN +118A0..118DF ; L # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9 ; L # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118EA..118F2 ; L # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY +118FF..11906 ; L # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; L # Lo DIVES AKURU LETTER O +1190C..11913 ; L # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; L # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; L # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935 ; L # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; L # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193D ; L # Mc DIVES AKURU SIGN HALANTA +1193F ; L # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; L # Mc DIVES AKURU MEDIAL YA +11941 ; L # Lo DIVES AKURU INITIAL RA +11942 ; L # Mc DIVES AKURU MEDIAL RA +11944..11946 ; L # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK +11950..11959 ; L # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +119A0..119A7 ; L # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; L # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; L # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119DC..119DF ; L # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E1 ; L # Lo NANDINAGARI SIGN AVAGRAHA +119E2 ; L # Po NANDINAGARI SIGN SIDDHAM +119E3 ; L # Lo NANDINAGARI HEADSTROKE +119E4 ; L # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; L # Lo ZANABAZAR SQUARE LETTER A +11A07..11A08 ; L # Mn [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU +11A0B..11A32 ; L # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A39 ; L # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; L # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3F..11A46 ; L # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK +11A50 ; L # Lo SOYOMBO LETTER A +11A57..11A58 ; L # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A5C..11A89 ; L # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A97 ; L # Mc SOYOMBO SIGN VISARGA +11A9A..11A9C ; L # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD +11A9D ; L # Lo SOYOMBO MARK PLUTA +11A9E..11AA2 ; L # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 +11AB0..11AF8 ; L # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08 ; L # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; L # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; L # Mc BHAIKSUKI VOWEL SIGN AA +11C3E ; L # Mc BHAIKSUKI SIGN VISARGA +11C3F ; L # Mn BHAIKSUKI SIGN VIRAMA +11C40 ; L # Lo BHAIKSUKI SIGN AVAGRAHA +11C41..11C45 ; L # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 +11C50..11C59 ; L # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C5A..11C6C ; L # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK +11C70..11C71 ; L # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD +11C72..11C8F ; L # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11CA9 ; L # Mc MARCHEN SUBJOINED LETTER YA +11CB1 ; L # Mc MARCHEN VOWEL SIGN I +11CB4 ; L # Mc MARCHEN VOWEL SIGN O +11D00..11D06 ; L # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; L # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; L # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; L # Lo MASARAM GONDI REPHA +11D50..11D59 ; L # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11D60..11D65 ; L # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; L # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; L # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; L # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D93..11D94 ; L # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D96 ; L # Mc GUNJALA GONDI SIGN VISARGA +11D98 ; L # Lo GUNJALA GONDI OM +11DA0..11DA9 ; L # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11EE0..11EF2 ; L # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF5..11EF6 ; L # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11EF7..11EF8 ; L # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11FB0 ; L # Lo LISU LETTER YHA +11FC0..11FD4 ; L # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH +11FFF ; L # Po TAMIL PUNCTUATION END OF TEXT +12000..12399 ; L # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; L # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12470..12474 ; L # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +12480..12543 ; L # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; L # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +12FF1..12FF2 ; L # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 +13000..1342E ; L # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +13430..13438 ; L # Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT +14400..14646 ; L # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16800..16A38 ; L # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; L # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69 ; L # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A6E..16A6F ; L # Po [2] MRO DANDA..MRO DOUBLE DANDA +16A70..16ABE ; L # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9 ; L # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16AD0..16AED ; L # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF5 ; L # Po BASSA VAH FULL STOP +16B00..16B2F ; L # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B37..16B3B ; L # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM +16B3C..16B3F ; L # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB +16B40..16B43 ; L # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B44 ; L # Po PAHAWH HMONG SIGN XAUS +16B45 ; L # So PAHAWH HMONG SIGN CIM TSOV ROG +16B50..16B59 ; L # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B5B..16B61 ; L # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS +16B63..16B77 ; L # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; L # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16E40..16E7F ; L # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16E80..16E96 ; L # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +16E97..16E9A ; L # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16F00..16F4A ; L # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F50 ; L # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; L # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F93..16F9F ; L # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; L # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; L # Lm OLD CHINESE ITERATION MARK +16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7 ; L # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; L # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D08 ; L # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; L # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; L # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B150..1B152 ; L # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B164..1B167 ; L # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; L # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; L # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; L # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; L # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; L # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9C ; L # So DUPLOYAN SIGN O WITH CROSS +1BC9F ; L # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1CF50..1CFC3 ; L # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK +1D000..1D0F5 ; L # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO +1D100..1D126 ; L # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 +1D129..1D164 ; L # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE +1D165..1D166 ; L # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D16A..1D16C ; L # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 +1D16D..1D172 ; L # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D183..1D184 ; L # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN +1D18C..1D1A9 ; L # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH +1D1AE..1D1E8 ; L # So [59] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KIEVAN FLAT SIGN +1D2E0..1D2F3 ; L # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN +1D360..1D378 ; L # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE +1D400..1D454 ; L # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; L # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; L # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; L # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; L # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; L # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; L # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; L # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; L # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; L # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; L # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; L # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; L # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; L # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; L # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; L # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; L # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; L # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; L # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; L # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C1 ; L # Sm MATHEMATICAL BOLD NABLA +1D6C2..1D6DA ; L # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; L # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FB ; L # Sm MATHEMATICAL ITALIC NABLA +1D6FC..1D714 ; L # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; L # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D735 ; L # Sm MATHEMATICAL BOLD ITALIC NABLA +1D736..1D74E ; L # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; L # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D76F ; L # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D770..1D788 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; L # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7A9 ; L # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7AA..1D7C2 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; L # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D800..1D9FF ; L # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD +1DA37..1DA3A ; L # So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE +1DA6D..1DA74 ; L # So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING +1DA76..1DA83 ; L # So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH +1DA85..1DA86 ; L # So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS +1DA87..1DA8B ; L # Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS +1DF00..1DF09 ; L # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; L # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; L # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1E100..1E12C ; L # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; L # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149 ; L # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E ; L # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E14F ; L # So NYIAKENG PUACHUE HMONG CIRCLED CA +1E290..1E2AD ; L # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; L # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2F0..1E2F9 ; L # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E7E0..1E7E6 ; L # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; L # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; L # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; L # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1F110..1F12E ; L # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ +1F130..1F169 ; L # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F1AC ; L # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD +1F1E6..1F202 ; L # So [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA +1F210..1F23B ; L # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D +1F240..1F248 ; L # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F250..1F251 ; L # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT +20000..2A6DF ; L # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; L # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2B740..2B81D ; L # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; L # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; L # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2F800..2FA1D ; L # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; L # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +F0000..FFFFD ; L # Co [65534] <private-use-F0000>..<private-use-FFFFD> +100000..10FFFD; L # Co [65534] <private-use-100000>..<private-use-10FFFD> + +# The above property value applies to 825575 code points not listed here. +# Total code points: 1096333 + +# ================================================ + +# Bidi_Class=Right_To_Left + +0590 ; R # Cn <reserved-0590> +05BE ; R # Pd HEBREW PUNCTUATION MAQAF +05C0 ; R # Po HEBREW PUNCTUATION PASEQ +05C3 ; R # Po HEBREW PUNCTUATION SOF PASUQ +05C6 ; R # Po HEBREW PUNCTUATION NUN HAFUKHA +05C8..05CF ; R # Cn [8] <reserved-05C8>..<reserved-05CF> +05D0..05EA ; R # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EB..05EE ; R # Cn [4] <reserved-05EB>..<reserved-05EE> +05EF..05F2 ; R # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +05F3..05F4 ; R # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM +05F5..05FF ; R # Cn [11] <reserved-05F5>..<reserved-05FF> +07C0..07C9 ; R # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; R # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07F4..07F5 ; R # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; R # Lm NKO LAJANYALAN +07FB..07FC ; R # Cn [2] <reserved-07FB>..<reserved-07FC> +07FE..07FF ; R # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN +0800..0815 ; R # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +081A ; R # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +0824 ; R # Lm SAMARITAN MODIFIER LETTER SHORT A +0828 ; R # Lm SAMARITAN MODIFIER LETTER I +082E..082F ; R # Cn [2] <reserved-082E>..<reserved-082F> +0830..083E ; R # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +083F ; R # Cn <reserved-083F> +0840..0858 ; R # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +085C..085D ; R # Cn [2] <reserved-085C>..<reserved-085D> +085E ; R # Po MANDAIC PUNCTUATION +085F ; R # Cn <reserved-085F> +200F ; R # Cf RIGHT-TO-LEFT MARK +FB1D ; R # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; R # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; R # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB37 ; R # Cn <reserved-FB37> +FB38..FB3C ; R # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3D ; R # Cn <reserved-FB3D> +FB3E ; R # Lo HEBREW LETTER MEM WITH DAGESH +FB3F ; R # Cn <reserved-FB3F> +FB40..FB41 ; R # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB42 ; R # Cn <reserved-FB42> +FB43..FB44 ; R # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB45 ; R # Cn <reserved-FB45> +FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED +10800..10805 ; R # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10806..10807 ; R # Cn [2] <reserved-10806>..<reserved-10807> +10808 ; R # Lo CYPRIOT SYLLABLE JO +10809 ; R # Cn <reserved-10809> +1080A..10835 ; R # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10836 ; R # Cn <reserved-10836> +10837..10838 ; R # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +10839..1083B ; R # Cn [3] <reserved-10839>..<reserved-1083B> +1083C ; R # Lo CYPRIOT SYLLABLE ZA +1083D..1083E ; R # Cn [2] <reserved-1083D>..<reserved-1083E> +1083F..10855 ; R # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10856 ; R # Cn <reserved-10856> +10857 ; R # Po IMPERIAL ARAMAIC SECTION SIGN +10858..1085F ; R # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND +10860..10876 ; R # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10877..10878 ; R # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON +10879..1087F ; R # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY +10880..1089E ; R # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +1089F..108A6 ; R # Cn [8] <reserved-1089F>..<reserved-108A6> +108A7..108AF ; R # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED +108B0..108DF ; R # Cn [48] <reserved-108B0>..<reserved-108DF> +108E0..108F2 ; R # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F3 ; R # Cn <reserved-108F3> +108F4..108F5 ; R # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +108F6..108FA ; R # Cn [5] <reserved-108F6>..<reserved-108FA> +108FB..108FF ; R # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED +10900..10915 ; R # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10916..1091B ; R # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE +1091C..1091E ; R # Cn [3] <reserved-1091C>..<reserved-1091E> +10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +1093A..1093E ; R # Cn [5] <reserved-1093A>..<reserved-1093E> +1093F ; R # Po LYDIAN TRIANGULAR MARK +10940..1097F ; R # Cn [64] <reserved-10940>..<reserved-1097F> +10980..109B7 ; R # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109B8..109BB ; R # Cn [4] <reserved-109B8>..<reserved-109BB> +109BC..109BD ; R # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF +109BE..109BF ; R # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +109C0..109CF ; R # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY +109D0..109D1 ; R # Cn [2] <reserved-109D0>..<reserved-109D1> +109D2..109FF ; R # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS +10A00 ; R # Lo KHAROSHTHI LETTER A +10A04 ; R # Cn <reserved-10A04> +10A07..10A0B ; R # Cn [5] <reserved-10A07>..<reserved-10A0B> +10A10..10A13 ; R # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A14 ; R # Cn <reserved-10A14> +10A15..10A17 ; R # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A18 ; R # Cn <reserved-10A18> +10A19..10A35 ; R # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A36..10A37 ; R # Cn [2] <reserved-10A36>..<reserved-10A37> +10A3B..10A3E ; R # Cn [4] <reserved-10A3B>..<reserved-10A3E> +10A40..10A48 ; R # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF +10A49..10A4F ; R # Cn [7] <reserved-10A49>..<reserved-10A4F> +10A50..10A58 ; R # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES +10A59..10A5F ; R # Cn [7] <reserved-10A59>..<reserved-10A5F> +10A60..10A7C ; R # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A7D..10A7E ; R # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY +10A7F ; R # Po OLD SOUTH ARABIAN NUMERIC INDICATOR +10A80..10A9C ; R # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10A9D..10A9F ; R # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY +10AA0..10ABF ; R # Cn [32] <reserved-10AA0>..<reserved-10ABF> +10AC0..10AC7 ; R # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC8 ; R # So MANICHAEAN SIGN UD +10AC9..10AE4 ; R # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AE7..10AEA ; R # Cn [4] <reserved-10AE7>..<reserved-10AEA> +10AEB..10AEF ; R # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED +10AF0..10AF6 ; R # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER +10AF7..10AFF ; R # Cn [9] <reserved-10AF7>..<reserved-10AFF> +10B00..10B35 ; R # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B36..10B38 ; R # Cn [3] <reserved-10B36>..<reserved-10B38> +10B40..10B55 ; R # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B56..10B57 ; R # Cn [2] <reserved-10B56>..<reserved-10B57> +10B58..10B5F ; R # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND +10B60..10B72 ; R # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B73..10B77 ; R # Cn [5] <reserved-10B73>..<reserved-10B77> +10B78..10B7F ; R # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND +10B80..10B91 ; R # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10B92..10B98 ; R # Cn [7] <reserved-10B92>..<reserved-10B98> +10B99..10B9C ; R # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10B9D..10BA8 ; R # Cn [12] <reserved-10B9D>..<reserved-10BA8> +10BA9..10BAF ; R # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED +10BB0..10BFF ; R # Cn [80] <reserved-10BB0>..<reserved-10BFF> +10C00..10C48 ; R # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C49..10C7F ; R # Cn [55] <reserved-10C49>..<reserved-10C7F> +10C80..10CB2 ; R # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CB3..10CBF ; R # Cn [13] <reserved-10CB3>..<reserved-10CBF> +10CC0..10CF2 ; R # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10CF3..10CF9 ; R # Cn [7] <reserved-10CF3>..<reserved-10CF9> +10CFA..10CFF ; R # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND +10D40..10E5F ; R # Cn [288] <reserved-10D40>..<reserved-10E5F> +10E7F ; R # Cn <reserved-10E7F> +10E80..10EA9 ; R # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAA ; R # Cn <reserved-10EAA> +10EAD ; R # Pd YEZIDI HYPHENATION MARK +10EAE..10EAF ; R # Cn [2] <reserved-10EAE>..<reserved-10EAF> +10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EB2..10EFF ; R # Cn [78] <reserved-10EB2>..<reserved-10EFF> +10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF +10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F28..10F2F ; R # Cn [8] <reserved-10F28>..<reserved-10F2F> +10F70..10F81 ; R # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F86..10F89 ; R # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +10F8A..10FAF ; R # Cn [38] <reserved-10F8A>..<reserved-10FAF> +10FB0..10FC4 ; R # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FC5..10FCB ; R # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED +10FCC..10FDF ; R # Cn [20] <reserved-10FCC>..<reserved-10FDF> +10FE0..10FF6 ; R # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +10FF7..10FFF ; R # Cn [9] <reserved-10FF7>..<reserved-10FFF> +1E800..1E8C4 ; R # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8C5..1E8C6 ; R # Cn [2] <reserved-1E8C5>..<reserved-1E8C6> +1E8C7..1E8CF ; R # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE +1E8D7..1E8FF ; R # Cn [41] <reserved-1E8D7>..<reserved-1E8FF> +1E900..1E943 ; R # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; R # Lm ADLAM NASALIZATION MARK +1E94C..1E94F ; R # Cn [4] <reserved-1E94C>..<reserved-1E94F> +1E950..1E959 ; R # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1E95A..1E95D ; R # Cn [4] <reserved-1E95A>..<reserved-1E95D> +1E95E..1E95F ; R # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK +1E960..1EC6F ; R # Cn [784] <reserved-1E960>..<reserved-1EC6F> +1ECC0..1ECFF ; R # Cn [64] <reserved-1ECC0>..<reserved-1ECFF> +1ED50..1EDFF ; R # Cn [176] <reserved-1ED50>..<reserved-1EDFF> +1EF00..1EFFF ; R # Cn [256] <reserved-1EF00>..<reserved-1EFFF> + +# Total code points: 3711 + +# ================================================ + +# Bidi_Class=European_Number + +0030..0039 ; EN # Nd [10] DIGIT ZERO..DIGIT NINE +00B2..00B3 ; EN # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B9 ; EN # No SUPERSCRIPT ONE +06F0..06F9 ; EN # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +2070 ; EN # No SUPERSCRIPT ZERO +2074..2079 ; EN # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE +2080..2089 ; EN # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE +2488..249B ; EN # No [20] DIGIT ONE FULL STOP..NUMBER TWENTY FULL STOP +FF10..FF19 ; EN # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +102E1..102FB ; EN # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED +1D7CE..1D7FF ; EN # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1F100..1F10A ; EN # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA +1FBF0..1FBF9 ; EN # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE + +# Total code points: 168 + +# ================================================ + +# Bidi_Class=European_Separator + +002B ; ES # Sm PLUS SIGN +002D ; ES # Pd HYPHEN-MINUS +207A..207B ; ES # Sm [2] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT MINUS +208A..208B ; ES # Sm [2] SUBSCRIPT PLUS SIGN..SUBSCRIPT MINUS +2212 ; ES # Sm MINUS SIGN +FB29 ; ES # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FE62 ; ES # Sm SMALL PLUS SIGN +FE63 ; ES # Pd SMALL HYPHEN-MINUS +FF0B ; ES # Sm FULLWIDTH PLUS SIGN +FF0D ; ES # Pd FULLWIDTH HYPHEN-MINUS + +# Total code points: 12 + +# ================================================ + +# Bidi_Class=European_Terminator + +0023 ; ET # Po NUMBER SIGN +0024 ; ET # Sc DOLLAR SIGN +0025 ; ET # Po PERCENT SIGN +00A2..00A5 ; ET # Sc [4] CENT SIGN..YEN SIGN +00B0 ; ET # So DEGREE SIGN +00B1 ; ET # Sm PLUS-MINUS SIGN +058F ; ET # Sc ARMENIAN DRAM SIGN +0609..060A ; ET # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN +066A ; ET # Po ARABIC PERCENT SIGN +09F2..09F3 ; ET # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN +09FB ; ET # Sc BENGALI GANDA MARK +0AF1 ; ET # Sc GUJARATI RUPEE SIGN +0BF9 ; ET # Sc TAMIL RUPEE SIGN +0E3F ; ET # Sc THAI CURRENCY SYMBOL BAHT +17DB ; ET # Sc KHMER CURRENCY SYMBOL RIEL +2030..2034 ; ET # Po [5] PER MILLE SIGN..TRIPLE PRIME +20A0..20C0 ; ET # Sc [33] EURO-CURRENCY SIGN..SOM SIGN +20C1..20CF ; ET # Cn [15] <reserved-20C1>..<reserved-20CF> +212E ; ET # So ESTIMATED SYMBOL +2213 ; ET # Sm MINUS-OR-PLUS SIGN +A838 ; ET # Sc NORTH INDIC RUPEE MARK +A839 ; ET # So NORTH INDIC QUANTITY MARK +FE5F ; ET # Po SMALL NUMBER SIGN +FE69 ; ET # Sc SMALL DOLLAR SIGN +FE6A ; ET # Po SMALL PERCENT SIGN +FF03 ; ET # Po FULLWIDTH NUMBER SIGN +FF04 ; ET # Sc FULLWIDTH DOLLAR SIGN +FF05 ; ET # Po FULLWIDTH PERCENT SIGN +FFE0..FFE1 ; ET # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN +FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN +11FDD..11FE0 ; ET # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN +1E2FF ; ET # Sc WANCHO NGUN SIGN + +# Total code points: 92 + +# ================================================ + +# Bidi_Class=Arabic_Number + +0600..0605 ; AN # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +0660..0669 ; AN # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066B..066C ; AN # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR +06DD ; AN # Cf ARABIC END OF AYAH +0890..0891 ; AN # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +08E2 ; AN # Cf ARABIC DISPUTED END OF AYAH +10D30..10D39 ; AN # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10E60..10E7E ; AN # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS + +# Total code points: 63 + +# ================================================ + +# Bidi_Class=Common_Separator + +002C ; CS # Po COMMA +002E..002F ; CS # Po [2] FULL STOP..SOLIDUS +003A ; CS # Po COLON +00A0 ; CS # Zs NO-BREAK SPACE +060C ; CS # Po ARABIC COMMA +202F ; CS # Zs NARROW NO-BREAK SPACE +2044 ; CS # Sm FRACTION SLASH +FE50 ; CS # Po SMALL COMMA +FE52 ; CS # Po SMALL FULL STOP +FE55 ; CS # Po SMALL COLON +FF0C ; CS # Po FULLWIDTH COMMA +FF0E..FF0F ; CS # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS +FF1A ; CS # Po FULLWIDTH COLON + +# Total code points: 15 + +# ================================================ + +# Bidi_Class=Paragraph_Separator + +000A ; B # Cc <control-000A> +000D ; B # Cc <control-000D> +001C..001E ; B # Cc [3] <control-001C>..<control-001E> +0085 ; B # Cc <control-0085> +2029 ; B # Zp PARAGRAPH SEPARATOR + +# Total code points: 7 + +# ================================================ + +# Bidi_Class=Segment_Separator + +0009 ; S # Cc <control-0009> +000B ; S # Cc <control-000B> +001F ; S # Cc <control-001F> + +# Total code points: 3 + +# ================================================ + +# Bidi_Class=White_Space + +000C ; WS # Cc <control-000C> +0020 ; WS # Zs SPACE +1680 ; WS # Zs OGHAM SPACE MARK +2000..200A ; WS # Zs [11] EN QUAD..HAIR SPACE +2028 ; WS # Zl LINE SEPARATOR +205F ; WS # Zs MEDIUM MATHEMATICAL SPACE +3000 ; WS # Zs IDEOGRAPHIC SPACE + +# Total code points: 17 + +# ================================================ + +# Bidi_Class=Other_Neutral + +0021..0022 ; ON # Po [2] EXCLAMATION MARK..QUOTATION MARK +0026..0027 ; ON # Po [2] AMPERSAND..APOSTROPHE +0028 ; ON # Ps LEFT PARENTHESIS +0029 ; ON # Pe RIGHT PARENTHESIS +002A ; ON # Po ASTERISK +003B ; ON # Po SEMICOLON +003C..003E ; ON # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; ON # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; ON # Ps LEFT SQUARE BRACKET +005C ; ON # Po REVERSE SOLIDUS +005D ; ON # Pe RIGHT SQUARE BRACKET +005E ; ON # Sk CIRCUMFLEX ACCENT +005F ; ON # Pc LOW LINE +0060 ; ON # Sk GRAVE ACCENT +007B ; ON # Ps LEFT CURLY BRACKET +007C ; ON # Sm VERTICAL LINE +007D ; ON # Pe RIGHT CURLY BRACKET +007E ; ON # Sm TILDE +00A1 ; ON # Po INVERTED EXCLAMATION MARK +00A6 ; ON # So BROKEN BAR +00A7 ; ON # Po SECTION SIGN +00A8 ; ON # Sk DIAERESIS +00A9 ; ON # So COPYRIGHT SIGN +00AB ; ON # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; ON # Sm NOT SIGN +00AE ; ON # So REGISTERED SIGN +00AF ; ON # Sk MACRON +00B4 ; ON # Sk ACUTE ACCENT +00B6..00B7 ; ON # Po [2] PILCROW SIGN..MIDDLE DOT +00B8 ; ON # Sk CEDILLA +00BB ; ON # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BC..00BE ; ON # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS +00BF ; ON # Po INVERTED QUESTION MARK +00D7 ; ON # Sm MULTIPLICATION SIGN +00F7 ; ON # Sm DIVISION SIGN +02B9..02BA ; ON # Lm [2] MODIFIER LETTER PRIME..MODIFIER LETTER DOUBLE PRIME +02C2..02C5 ; ON # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02CF ; ON # Lm [10] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER LOW ACUTE ACCENT +02D2..02DF ; ON # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E5..02EB ; ON # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; ON # Lm MODIFIER LETTER VOICING +02ED ; ON # Sk MODIFIER LETTER UNASPIRATED +02EF..02FF ; ON # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0374 ; ON # Lm GREEK NUMERAL SIGN +0375 ; ON # Sk GREEK LOWER NUMERAL SIGN +037E ; ON # Po GREEK QUESTION MARK +0384..0385 ; ON # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0387 ; ON # Po GREEK ANO TELEIA +03F6 ; ON # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +058A ; ON # Pd ARMENIAN HYPHEN +058D..058E ; ON # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN +0606..0607 ; ON # Sm [2] ARABIC-INDIC CUBE ROOT..ARABIC-INDIC FOURTH ROOT +060E..060F ; ON # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA +06DE ; ON # So ARABIC START OF RUB EL HIZB +06E9 ; ON # So ARABIC PLACE OF SAJDAH +07F6 ; ON # So NKO SYMBOL OO DENNEN +07F7..07F9 ; ON # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK +0BF3..0BF8 ; ON # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN +0BFA ; ON # So TAMIL NUMBER SIGN +0C78..0C7E ; ON # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR +0F3A ; ON # Ps TIBETAN MARK GUG RTAGS GYON +0F3B ; ON # Pe TIBETAN MARK GUG RTAGS GYAS +0F3C ; ON # Ps TIBETAN MARK ANG KHANG GYON +0F3D ; ON # Pe TIBETAN MARK ANG KHANG GYAS +1390..1399 ; ON # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT +1400 ; ON # Pd CANADIAN SYLLABICS HYPHEN +169B ; ON # Ps OGHAM FEATHER MARK +169C ; ON # Pe OGHAM REVERSED FEATHER MARK +17F0..17F9 ; ON # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON +1800..1805 ; ON # Po [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS +1806 ; ON # Pd MONGOLIAN TODO SOFT HYPHEN +1807..180A ; ON # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU +1940 ; ON # So LIMBU SIGN LOO +1944..1945 ; ON # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +19DE..19FF ; ON # So [34] NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC +1FBD ; ON # Sk GREEK KORONIS +1FBF..1FC1 ; ON # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; ON # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; ON # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; ON # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; ON # Sk [2] GREEK OXIA..GREEK DASIA +2010..2015 ; ON # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; ON # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; ON # Pi LEFT SINGLE QUOTATION MARK +2019 ; ON # Pf RIGHT SINGLE QUOTATION MARK +201A ; ON # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; ON # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; ON # Pf RIGHT DOUBLE QUOTATION MARK +201E ; ON # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; ON # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; ON # Po [8] DAGGER..HYPHENATION POINT +2035..2038 ; ON # Po [4] REVERSED PRIME..CARET +2039 ; ON # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; ON # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; ON # Po [4] REFERENCE MARK..OVERLINE +203F..2040 ; ON # Pc [2] UNDERTIE..CHARACTER TIE +2041..2043 ; ON # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2045 ; ON # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; ON # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; ON # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; ON # Sm COMMERCIAL MINUS SIGN +2053 ; ON # Po SWUNG DASH +2054 ; ON # Pc INVERTED UNDERTIE +2055..205E ; ON # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +207C ; ON # Sm SUPERSCRIPT EQUALS SIGN +207D ; ON # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; ON # Pe SUPERSCRIPT RIGHT PARENTHESIS +208C ; ON # Sm SUBSCRIPT EQUALS SIGN +208D ; ON # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; ON # Pe SUBSCRIPT RIGHT PARENTHESIS +2100..2101 ; ON # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT +2103..2106 ; ON # So [4] DEGREE CELSIUS..CADA UNA +2108..2109 ; ON # So [2] SCRUPLE..DEGREE FAHRENHEIT +2114 ; ON # So L B BAR SYMBOL +2116..2117 ; ON # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT +2118 ; ON # Sm SCRIPT CAPITAL P +211E..2123 ; ON # So [6] PRESCRIPTION TAKE..VERSICLE +2125 ; ON # So OUNCE SIGN +2127 ; ON # So INVERTED OHM SIGN +2129 ; ON # So TURNED GREEK SMALL LETTER IOTA +213A..213B ; ON # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN +2140..2144 ; ON # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +214A ; ON # So PROPERTY LINE +214B ; ON # Sm TURNED AMPERSAND +214C..214D ; ON # So [2] PER SIGN..AKTIESELSKAB +2150..215F ; ON # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE +2189 ; ON # No VULGAR FRACTION ZERO THIRDS +218A..218B ; ON # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE +2190..2194 ; ON # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; ON # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; ON # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; ON # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; ON # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; ON # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; ON # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; ON # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; ON # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; ON # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; ON # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; ON # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; ON # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; ON # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; ON # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; ON # So DOWNWARDS DOUBLE ARROW +21D4 ; ON # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; ON # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..2211 ; ON # Sm [30] RIGHT ARROW WITH SMALL CIRCLE..N-ARY SUMMATION +2214..22FF ; ON # Sm [236] DOT PLUS..Z NOTATION BAG MEMBERSHIP +2300..2307 ; ON # So [8] DIAMETER SIGN..WAVY LINE +2308 ; ON # Ps LEFT CEILING +2309 ; ON # Pe RIGHT CEILING +230A ; ON # Ps LEFT FLOOR +230B ; ON # Pe RIGHT FLOOR +230C..231F ; ON # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; ON # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; ON # So [7] FROWN..KEYBOARD +2329 ; ON # Ps LEFT-POINTING ANGLE BRACKET +232A ; ON # Pe RIGHT-POINTING ANGLE BRACKET +232B..2335 ; ON # So [11] ERASE TO THE LEFT..COUNTERSINK +237B ; ON # So NOT CHECK MARK +237C ; ON # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..2394 ; ON # So [24] SHOULDERED OPEN BOX..SOFTWARE-FUNCTION SYMBOL +2396..239A ; ON # So [5] DECIMAL SEPARATOR KEY SYMBOL..CLEAR SCREEN SYMBOL +239B..23B3 ; ON # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; ON # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; ON # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..2426 ; ON # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO +2440..244A ; ON # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +2460..2487 ; ON # No [40] CIRCLED DIGIT ONE..PARENTHESIZED NUMBER TWENTY +24EA..24FF ; ON # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO +2500..25B6 ; ON # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; ON # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; ON # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; ON # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; ON # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; ON # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; ON # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; ON # Sm MUSIC SHARP SIGN +2670..26AB ; ON # So [60] WEST SYRIAC CROSS..MEDIUM BLACK CIRCLE +26AD..2767 ; ON # So [187] MARRIAGE SYMBOL..ROTATED FLORAL HEART BULLET +2768 ; ON # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; ON # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; ON # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; ON # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; ON # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; ON # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; ON # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; ON # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; ON # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; ON # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; ON # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; ON # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; ON # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; ON # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2776..2793 ; ON # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN +2794..27BF ; ON # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +27C0..27C4 ; ON # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; ON # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; ON # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; ON # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; ON # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; ON # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; ON # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; ON # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; ON # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; ON # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; ON # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; ON # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; ON # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; ON # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; ON # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2900..2982 ; ON # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; ON # Ps LEFT WHITE CURLY BRACKET +2984 ; ON # Pe RIGHT WHITE CURLY BRACKET +2985 ; ON # Ps LEFT WHITE PARENTHESIS +2986 ; ON # Pe RIGHT WHITE PARENTHESIS +2987 ; ON # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; ON # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; ON # Ps Z NOTATION LEFT BINDING BRACKET +298A ; ON # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; ON # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; ON # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; ON # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; ON # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; ON # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; ON # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; ON # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; ON # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; ON # Ps LEFT ARC LESS-THAN BRACKET +2994 ; ON # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; ON # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; ON # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; ON # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; ON # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; ON # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; ON # Ps LEFT WIGGLY FENCE +29D9 ; ON # Pe RIGHT WIGGLY FENCE +29DA ; ON # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; ON # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; ON # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; ON # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; ON # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; ON # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; ON # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; ON # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; ON # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; ON # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B73 ; ON # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B76..2B95 ; ON # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B97..2BFF ; ON # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2CE5..2CEA ; ON # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA +2CF9..2CFC ; ON # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER +2CFD ; ON # No COPTIC FRACTION ONE HALF +2CFE..2CFF ; ON # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER +2E00..2E01 ; ON # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; ON # Pi LEFT SUBSTITUTION BRACKET +2E03 ; ON # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; ON # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; ON # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; ON # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; ON # Pi LEFT TRANSPOSITION BRACKET +2E0A ; ON # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; ON # Po RAISED SQUARE +2E0C ; ON # Pi LEFT RAISED OMISSION BRACKET +2E0D ; ON # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; ON # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; ON # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; ON # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; ON # Pd HYPHEN WITH DIAERESIS +2E1B ; ON # Po TILDE WITH RING ABOVE +2E1C ; ON # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; ON # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; ON # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; ON # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; ON # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; ON # Ps TOP LEFT HALF BRACKET +2E23 ; ON # Pe TOP RIGHT HALF BRACKET +2E24 ; ON # Ps BOTTOM LEFT HALF BRACKET +2E25 ; ON # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; ON # Ps LEFT SIDEWAYS U BRACKET +2E27 ; ON # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; ON # Ps LEFT DOUBLE PARENTHESIS +2E29 ; ON # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; ON # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; ON # Lm VERTICAL TILDE +2E30..2E39 ; ON # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; ON # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; ON # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; ON # Pd DOUBLE HYPHEN +2E41 ; ON # Po REVERSED COMMA +2E42 ; ON # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E4F ; ON # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E50..2E51 ; ON # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR +2E52..2E54 ; ON # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +2E55 ; ON # Ps LEFT SQUARE BRACKET WITH STROKE +2E56 ; ON # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57 ; ON # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58 ; ON # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; ON # Ps TOP HALF LEFT PARENTHESIS +2E5A ; ON # Pe TOP HALF RIGHT PARENTHESIS +2E5B ; ON # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C ; ON # Pe BOTTOM HALF RIGHT PARENTHESIS +2E5D ; ON # Pd OBLIQUE HYPHEN +2E80..2E99 ; ON # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; ON # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; ON # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +2FF0..2FFB ; ON # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +3001..3003 ; ON # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3004 ; ON # So JAPANESE INDUSTRIAL STANDARD SYMBOL +3008 ; ON # Ps LEFT ANGLE BRACKET +3009 ; ON # Pe RIGHT ANGLE BRACKET +300A ; ON # Ps LEFT DOUBLE ANGLE BRACKET +300B ; ON # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; ON # Ps LEFT CORNER BRACKET +300D ; ON # Pe RIGHT CORNER BRACKET +300E ; ON # Ps LEFT WHITE CORNER BRACKET +300F ; ON # Pe RIGHT WHITE CORNER BRACKET +3010 ; ON # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; ON # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; ON # So [2] POSTAL MARK..GETA MARK +3014 ; ON # Ps LEFT TORTOISE SHELL BRACKET +3015 ; ON # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; ON # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; ON # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; ON # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; ON # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; ON # Ps LEFT WHITE SQUARE BRACKET +301B ; ON # Pe RIGHT WHITE SQUARE BRACKET +301C ; ON # Pd WAVE DASH +301D ; ON # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; ON # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; ON # So POSTAL MARK FACE +3030 ; ON # Pd WAVY DASH +3036..3037 ; ON # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +303D ; ON # Po PART ALTERNATION MARK +303E..303F ; ON # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE +309B..309C ; ON # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30A0 ; ON # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30FB ; ON # Po KATAKANA MIDDLE DOT +31C0..31E3 ; ON # So [36] CJK STROKE T..CJK STROKE Q +321D..321E ; ON # So [2] PARENTHESIZED KOREAN CHARACTER OJEON..PARENTHESIZED KOREAN CHARACTER O HU +3250 ; ON # So PARTNERSHIP SIGN +3251..325F ; ON # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE +327C..327E ; ON # So [3] CIRCLED KOREAN CHARACTER CHAMKO..CIRCLED HANGUL IEUNG U +32B1..32BF ; ON # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY +32CC..32CF ; ON # So [4] SQUARE HG..LIMITED LIABILITY SIGN +3377..337A ; ON # So [4] SQUARE DM..SQUARE IU +33DE..33DF ; ON # So [2] SQUARE V OVER M..SQUARE A OVER M +33FF ; ON # So SQUARE GAL +4DC0..4DFF ; ON # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION +A490..A4C6 ; ON # So [55] YI RADICAL QOT..YI RADICAL KE +A60D..A60F ; ON # Po [3] VAI COMMA..VAI QUESTION MARK +A673 ; ON # Po SLAVONIC ASTERISK +A67E ; ON # Po CYRILLIC KAVYKA +A67F ; ON # Lm CYRILLIC PAYEROK +A700..A716 ; ON # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; ON # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; ON # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A788 ; ON # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A828..A82B ; ON # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 +A874..A877 ; ON # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD +AB6A..AB6B ; ON # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +FD3E ; ON # Pe ORNATE LEFT PARENTHESIS +FD3F ; ON # Ps ORNATE RIGHT PARENTHESIS +FD40..FD4F ; ON # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH +FDCF ; ON # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDFD..FDFF ; ON # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL +FE10..FE16 ; ON # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE17 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET +FE18 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET +FE19 ; ON # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS +FE30 ; ON # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER +FE31..FE32 ; ON # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE33..FE34 ; ON # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE35 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS +FE36 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS +FE37 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET +FE38 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET +FE39 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET +FE3A ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET +FE3B ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET +FE3C ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET +FE3D ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET +FE3E ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET +FE3F ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET +FE40 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET +FE41 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FE45..FE46 ; ON # Po [2] SESAME DOT..WHITE SESAME DOT +FE47 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET +FE48 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET +FE49..FE4C ; ON # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE +FE4D..FE4F ; ON # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE51 ; ON # Po SMALL IDEOGRAPHIC COMMA +FE54 ; ON # Po SMALL SEMICOLON +FE56..FE57 ; ON # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK +FE58 ; ON # Pd SMALL EM DASH +FE59 ; ON # Ps SMALL LEFT PARENTHESIS +FE5A ; ON # Pe SMALL RIGHT PARENTHESIS +FE5B ; ON # Ps SMALL LEFT CURLY BRACKET +FE5C ; ON # Pe SMALL RIGHT CURLY BRACKET +FE5D ; ON # Ps SMALL LEFT TORTOISE SHELL BRACKET +FE5E ; ON # Pe SMALL RIGHT TORTOISE SHELL BRACKET +FE60..FE61 ; ON # Po [2] SMALL AMPERSAND..SMALL ASTERISK +FE64..FE66 ; ON # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68 ; ON # Po SMALL REVERSE SOLIDUS +FE6B ; ON # Po SMALL COMMERCIAL AT +FF01..FF02 ; ON # Po [2] FULLWIDTH EXCLAMATION MARK..FULLWIDTH QUOTATION MARK +FF06..FF07 ; ON # Po [2] FULLWIDTH AMPERSAND..FULLWIDTH APOSTROPHE +FF08 ; ON # Ps FULLWIDTH LEFT PARENTHESIS +FF09 ; ON # Pe FULLWIDTH RIGHT PARENTHESIS +FF0A ; ON # Po FULLWIDTH ASTERISK +FF1B ; ON # Po FULLWIDTH SEMICOLON +FF1C..FF1E ; ON # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF1F..FF20 ; ON # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT +FF3B ; ON # Ps FULLWIDTH LEFT SQUARE BRACKET +FF3C ; ON # Po FULLWIDTH REVERSE SOLIDUS +FF3D ; ON # Pe FULLWIDTH RIGHT SQUARE BRACKET +FF3E ; ON # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF3F ; ON # Pc FULLWIDTH LOW LINE +FF40 ; ON # Sk FULLWIDTH GRAVE ACCENT +FF5B ; ON # Ps FULLWIDTH LEFT CURLY BRACKET +FF5C ; ON # Sm FULLWIDTH VERTICAL LINE +FF5D ; ON # Pe FULLWIDTH RIGHT CURLY BRACKET +FF5E ; ON # Sm FULLWIDTH TILDE +FF5F ; ON # Ps FULLWIDTH LEFT WHITE PARENTHESIS +FF60 ; ON # Pe FULLWIDTH RIGHT WHITE PARENTHESIS +FF61 ; ON # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62 ; ON # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; ON # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65 ; ON # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT +FFE2 ; ON # Sm FULLWIDTH NOT SIGN +FFE3 ; ON # Sk FULLWIDTH MACRON +FFE4 ; ON # So FULLWIDTH BROKEN BAR +FFE8 ; ON # So HALFWIDTH FORMS LIGHT VERTICAL +FFE9..FFEC ; ON # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +FFED..FFEE ; ON # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +FFF9..FFFB ; ON # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER +10101 ; ON # Po AEGEAN WORD SEPARATOR DOT +10140..10174 ; ON # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10175..10178 ; ON # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN +10179..10189 ; ON # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN +1018A..1018B ; ON # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN +1018C ; ON # So GREEK SINUSOID SIGN +10190..1019C ; ON # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL +101A0 ; ON # So GREEK SYMBOL TAU RHO +1091F ; ON # Po PHOENICIAN WORD SEPARATOR +10B39..10B3F ; ON # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +11052..11065 ; ON # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND +11660..1166C ; ON # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT +11FD5..11FDC ; ON # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI +11FE1..11FF1 ; ON # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA +16FE2 ; ON # Po OLD CHINESE HOOK MARK +1D1E9..1D1EA ; ON # So [2] MUSICAL SYMBOL SORI..MUSICAL SYMBOL KORON +1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 +1D245 ; ON # So GREEK MUSICAL LEIMMA +1D300..1D356 ; ON # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D6DB ; ON # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D715 ; ON # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D74F ; ON # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D789 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D7C3 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1EEF0..1EEF1 ; ON # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F000..1F02B ; ON # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK +1F030..1F093 ; ON # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F0A0..1F0AE ; ON # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES +1F0B1..1F0BF ; ON # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER +1F0C1..1F0CF ; ON # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER +1F0D1..1F0F5 ; ON # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 +1F10B..1F10C ; ON # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO +1F10D..1F10F ; ON # So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH +1F12F ; ON # So COPYLEFT SYMBOL +1F16A..1F16F ; ON # So [6] RAISED MC SIGN..CIRCLED HUMAN FIGURE +1F1AD ; ON # So MASK WORK SYMBOL +1F260..1F265 ; ON # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI +1F300..1F3FA ; ON # So [251] CYCLONE..AMPHORA +1F3FB..1F3FF ; ON # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +1F400..1F6D7 ; ON # So [728] RAT..ELEVATOR +1F6DD..1F6EC ; ON # So [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING +1F6F0..1F6FC ; ON # So [13] SATELLITE..ROLLER SKATE +1F700..1F773 ; ON # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE +1F780..1F7D8 ; ON # So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE +1F7E0..1F7EB ; ON # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE +1F7F0 ; ON # So HEAVY EQUALS SIGN +1F800..1F80B ; ON # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD +1F810..1F847 ; ON # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW +1F850..1F859 ; ON # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW +1F860..1F887 ; ON # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW +1F890..1F8AD ; ON # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS +1F8B0..1F8B1 ; ON # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F900..1FA53 ; ON # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP +1FA60..1FA6D ; ON # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER +1FA70..1FA74 ; ON # So [5] BALLET SHOES..THONG SANDAL +1FA78..1FA7C ; ON # So [5] DROP OF BLOOD..CRUTCH +1FA80..1FA86 ; ON # So [7] YO-YO..NESTING DOLLS +1FA90..1FAAC ; ON # So [29] RINGED PLANET..HAMSA +1FAB0..1FABA ; ON # So [11] FLY..NEST WITH EGGS +1FAC0..1FAC5 ; ON # So [6] ANATOMICAL HEART..PERSON WITH CROWN +1FAD0..1FAD9 ; ON # So [10] BLUEBERRIES..JAR +1FAE0..1FAE7 ; ON # So [8] MELTING FACE..BUBBLES +1FAF0..1FAF6 ; ON # So [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS +1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK +1FB94..1FBCA ; ON # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON + +# Total code points: 6000 + +# ================================================ + +# Bidi_Class=Boundary_Neutral + +0000..0008 ; BN # Cc [9] <control-0000>..<control-0008> +000E..001B ; BN # Cc [14] <control-000E>..<control-001B> +007F..0084 ; BN # Cc [6] <control-007F>..<control-0084> +0086..009F ; BN # Cc [26] <control-0086>..<control-009F> +00AD ; BN # Cf SOFT HYPHEN +180E ; BN # Cf MONGOLIAN VOWEL SEPARATOR +200B..200D ; BN # Cf [3] ZERO WIDTH SPACE..ZERO WIDTH JOINER +2060..2064 ; BN # Cf [5] WORD JOINER..INVISIBLE PLUS +2065 ; BN # Cn <reserved-2065> +206A..206F ; BN # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +FDD0..FDEF ; BN # Cn [32] <noncharacter-FDD0>..<noncharacter-FDEF> +FEFF ; BN # Cf ZERO WIDTH NO-BREAK SPACE +FFF0..FFF8 ; BN # Cn [9] <reserved-FFF0>..<reserved-FFF8> +FFFE..FFFF ; BN # Cn [2] <noncharacter-FFFE>..<noncharacter-FFFF> +1BCA0..1BCA3 ; BN # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1D173..1D17A ; BN # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +1FFFE..1FFFF ; BN # Cn [2] <noncharacter-1FFFE>..<noncharacter-1FFFF> +2FFFE..2FFFF ; BN # Cn [2] <noncharacter-2FFFE>..<noncharacter-2FFFF> +3FFFE..3FFFF ; BN # Cn [2] <noncharacter-3FFFE>..<noncharacter-3FFFF> +4FFFE..4FFFF ; BN # Cn [2] <noncharacter-4FFFE>..<noncharacter-4FFFF> +5FFFE..5FFFF ; BN # Cn [2] <noncharacter-5FFFE>..<noncharacter-5FFFF> +6FFFE..6FFFF ; BN # Cn [2] <noncharacter-6FFFE>..<noncharacter-6FFFF> +7FFFE..7FFFF ; BN # Cn [2] <noncharacter-7FFFE>..<noncharacter-7FFFF> +8FFFE..8FFFF ; BN # Cn [2] <noncharacter-8FFFE>..<noncharacter-8FFFF> +9FFFE..9FFFF ; BN # Cn [2] <noncharacter-9FFFE>..<noncharacter-9FFFF> +AFFFE..AFFFF ; BN # Cn [2] <noncharacter-AFFFE>..<noncharacter-AFFFF> +BFFFE..BFFFF ; BN # Cn [2] <noncharacter-BFFFE>..<noncharacter-BFFFF> +CFFFE..CFFFF ; BN # Cn [2] <noncharacter-CFFFE>..<noncharacter-CFFFF> +DFFFE..E0000 ; BN # Cn [3] <noncharacter-DFFFE>..<reserved-E0000> +E0001 ; BN # Cf LANGUAGE TAG +E0002..E001F ; BN # Cn [30] <reserved-E0002>..<reserved-E001F> +E0020..E007F ; BN # Cf [96] TAG SPACE..CANCEL TAG +E0080..E00FF ; BN # Cn [128] <reserved-E0080>..<reserved-E00FF> +E01F0..E0FFF ; BN # Cn [3600] <reserved-E01F0>..<reserved-E0FFF> +EFFFE..EFFFF ; BN # Cn [2] <noncharacter-EFFFE>..<noncharacter-EFFFF> +FFFFE..FFFFF ; BN # Cn [2] <noncharacter-FFFFE>..<noncharacter-FFFFF> +10FFFE..10FFFF; BN # Cn [2] <noncharacter-10FFFE>..<noncharacter-10FFFF> + +# Total code points: 4016 + +# ================================================ + +# Bidi_Class=Nonspacing_Mark + +0300..036F ; NSM # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0483..0487 ; NSM # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; NSM # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +0591..05BD ; NSM # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; NSM # Mn HEBREW POINT RAFE +05C1..05C2 ; NSM # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; NSM # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; NSM # Mn HEBREW POINT QAMATS QATAN +0610..061A ; NSM # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..065F ; NSM # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; NSM # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; NSM # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; NSM # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E7..06E8 ; NSM # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; NSM # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +0711 ; NSM # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; NSM # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; NSM # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; NSM # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; NSM # Mn NKO DANTAYALAN +0816..0819 ; NSM # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; NSM # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; NSM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; NSM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; NSM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0898..089F ; NSM # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; NSM # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; NSM # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +093A ; NSM # Mn DEVANAGARI VOWEL SIGN OE +093C ; NSM # Mn DEVANAGARI SIGN NUKTA +0941..0948 ; NSM # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +094D ; NSM # Mn DEVANAGARI SIGN VIRAMA +0951..0957 ; NSM # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; NSM # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; NSM # Mn BENGALI SIGN CANDRABINDU +09BC ; NSM # Mn BENGALI SIGN NUKTA +09C1..09C4 ; NSM # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09CD ; NSM # Mn BENGALI SIGN VIRAMA +09E2..09E3 ; NSM # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; NSM # Mn BENGALI SANDHI MARK +0A01..0A02 ; NSM # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A3C ; NSM # Mn GURMUKHI SIGN NUKTA +0A41..0A42 ; NSM # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; NSM # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; NSM # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; NSM # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; NSM # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; NSM # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; NSM # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0ABC ; NSM # Mn GUJARATI SIGN NUKTA +0AC1..0AC5 ; NSM # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; NSM # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0ACD ; NSM # Mn GUJARATI SIGN VIRAMA +0AE2..0AE3 ; NSM # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; NSM # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; NSM # Mn ORIYA SIGN CANDRABINDU +0B3C ; NSM # Mn ORIYA SIGN NUKTA +0B3F ; NSM # Mn ORIYA VOWEL SIGN I +0B41..0B44 ; NSM # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B4D ; NSM # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; NSM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B62..0B63 ; NSM # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; NSM # Mn TAMIL SIGN ANUSVARA +0BC0 ; NSM # Mn TAMIL VOWEL SIGN II +0BCD ; NSM # Mn TAMIL SIGN VIRAMA +0C00 ; NSM # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C04 ; NSM # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; NSM # Mn TELUGU SIGN NUKTA +0C3E..0C40 ; NSM # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C46..0C48 ; NSM # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; NSM # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; NSM # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; NSM # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; NSM # Mn KANNADA SIGN CANDRABINDU +0CBC ; NSM # Mn KANNADA SIGN NUKTA +0CCC..0CCD ; NSM # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CE2..0CE3 ; NSM # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; NSM # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; NSM # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D41..0D44 ; NSM # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D4D ; NSM # Mn MALAYALAM SIGN VIRAMA +0D62..0D63 ; NSM # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; NSM # Mn SINHALA SIGN CANDRABINDU +0DCA ; NSM # Mn SINHALA SIGN AL-LAKUNA +0DD2..0DD4 ; NSM # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; NSM # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0E31 ; NSM # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; NSM # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E47..0E4E ; NSM # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; NSM # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; NSM # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC8..0ECD ; NSM # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0F18..0F19 ; NSM # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; NSM # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; NSM # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; NSM # Mn TIBETAN MARK TSA -PHRU +0F71..0F7E ; NSM # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F80..0F84 ; NSM # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; NSM # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; NSM # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; NSM # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; NSM # Mn TIBETAN SYMBOL PADMA GDAN +102D..1030 ; NSM # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1032..1037 ; NSM # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; NSM # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; NSM # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1058..1059 ; NSM # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; NSM # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; NSM # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; NSM # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; NSM # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; NSM # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; NSM # Mn MYANMAR VOWEL SIGN AITON AI +135D..135F ; NSM # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; NSM # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1732..1733 ; NSM # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; NSM # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; NSM # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; NSM # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B7..17BD ; NSM # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17C6 ; NSM # Mn KHMER SIGN NIKAHIT +17C9..17D3 ; NSM # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; NSM # Mn KHMER SIGN ATTHACAN +180B..180D ; NSM # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; NSM # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1885..1886 ; NSM # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; NSM # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; NSM # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1927..1928 ; NSM # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1932 ; NSM # Mn LIMBU SMALL LETTER ANUSVARA +1939..193B ; NSM # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; NSM # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A1B ; NSM # Mn BUGINESE VOWEL SIGN AE +1A56 ; NSM # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; NSM # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; NSM # Mn TAI THAM SIGN SAKOT +1A62 ; NSM # Mn TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; NSM # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; NSM # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; NSM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; NSM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B34 ; NSM # Mn BALINESE SIGN REREKAN +1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3C ; NSM # Mn BALINESE VOWEL SIGN LA LENGA +1B42 ; NSM # Mn BALINESE VOWEL SIGN PEPET +1B6B..1B73 ; NSM # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; NSM # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; NSM # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; NSM # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB..1BAD ; NSM # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; NSM # Mn BATAK SIGN TOMPI +1BE8..1BE9 ; NSM # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; NSM # Mn BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; NSM # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C2C..1C33 ; NSM # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; NSM # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1CD0..1CD2 ; NSM # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; NSM # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; NSM # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; NSM # Mn VEDIC SIGN TIRYAK +1CF4 ; NSM # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; NSM # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; NSM # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +20D0..20DC ; NSM # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; NSM # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; NSM # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; NSM # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; NSM # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; NSM # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; NSM # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; NSM # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; NSM # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +3099..309A ; NSM # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; NSM # Mn COMBINING CYRILLIC VZMET +A670..A672 ; NSM # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; NSM # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69E..A69F ; NSM # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; NSM # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A802 ; NSM # Mn SYLOTI NAGRI SIGN DVISVARA +A806 ; NSM # Mn SYLOTI NAGRI SIGN HASANTA +A80B ; NSM # Mn SYLOTI NAGRI SIGN ANUSVARA +A825..A826 ; NSM # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A82C ; NSM # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4..A8C5 ; NSM # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; NSM # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; NSM # Mn DEVANAGARI VOWEL SIGN AY +A926..A92D ; NSM # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; NSM # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A980..A982 ; NSM # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; NSM # Mn JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; NSM # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC..A9BD ; NSM # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9E5 ; NSM # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; NSM # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; NSM # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; NSM # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; NSM # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; NSM # Mn CHAM CONSONANT SIGN FINAL M +AA7C ; NSM # Mn MYANMAR SIGN TAI LAING TONE-2 +AAB0 ; NSM # Mn TAI VIET MAI KANG +AAB2..AAB4 ; NSM # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; NSM # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; NSM # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; NSM # Mn TAI VIET TONE MAI THO +AAEC..AAED ; NSM # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; NSM # Mn MEETEI MAYEK VIRAMA +ABE5 ; NSM # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; NSM # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABED ; NSM # Mn MEETEI MAYEK APUN IYEK +FB1E ; NSM # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE00..FE0F ; NSM # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +101FD ; NSM # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; NSM # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; NSM # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; NSM # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; NSM # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; NSM # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; NSM # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; NSM # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; NSM # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11001 ; NSM # Mn BRAHMI SIGN ANUSVARA +11038..11046 ; NSM # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; NSM # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; NSM # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; NSM # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +110B3..110B6 ; NSM # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; NSM # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; NSM # Mn KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; NSM # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; NSM # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; NSM # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; NSM # Mn MAHAJANI SIGN NUKTA +11180..11181 ; NSM # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; NSM # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C9..111CC ; NSM # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CF ; NSM # Mn SHARADA SIGN INVERTED CANDRABINDU +1122F..11231 ; NSM # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; NSM # Mn KHOJKI SIGN ANUSVARA +11236..11237 ; NSM # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; NSM # Mn KHOJKI SIGN SUKUN +112DF ; NSM # Mn KHUDAWADI SIGN ANUSVARA +112E3..112EA ; NSM # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; NSM # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +1133B..1133C ; NSM # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +11340 ; NSM # Mn GRANTHA VOWEL SIGN II +11366..1136C ; NSM # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; NSM # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11438..1143F ; NSM # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; NSM # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; NSM # Mn NEWA SIGN NUKTA +1145E ; NSM # Mn NEWA SANDHI MARK +114B3..114B8 ; NSM # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; NSM # Mn TIRHUTA VOWEL SIGN SHORT E +114BF..114C0 ; NSM # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; NSM # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115B2..115B5 ; NSM # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; NSM # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; NSM # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; NSM # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11633..1163A ; NSM # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; NSM # Mn MODI SIGN ANUSVARA +1163F..11640 ; NSM # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; NSM # Mn TAKRI SIGN ANUSVARA +116AD ; NSM # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; NSM # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; NSM # Mn TAKRI SIGN NUKTA +1171D..1171F ; NSM # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11722..11725 ; NSM # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11727..1172B ; NSM # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182F..11837 ; NSM # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11839..1183A ; NSM # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1193B..1193C ; NSM # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193E ; NSM # Mn DIVES AKURU VIRAMA +11943 ; NSM # Mn DIVES AKURU SIGN NUKTA +119D4..119D7 ; NSM # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; NSM # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119E0 ; NSM # Mn NANDINAGARI SIGN VIRAMA +11A01..11A06 ; NSM # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O +11A09..11A0A ; NSM # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; NSM # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; NSM # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; NSM # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; NSM # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; NSM # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; NSM # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; NSM # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; NSM # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; NSM # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C92..11CA7 ; NSM # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; NSM # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; NSM # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; NSM # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; NSM # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; NSM # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; NSM # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; NSM # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; NSM # Mn MASARAM GONDI RA-KARA +11D90..11D91 ; NSM # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D95 ; NSM # Mn GUNJALA GONDI SIGN ANUSVARA +11D97 ; NSM # Mn GUNJALA GONDI VIRAMA +11EF3..11EF4 ; NSM # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +16AF0..16AF4 ; NSM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; NSM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F4F ; NSM # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F8F..16F92 ; NSM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FE4 ; NSM # Mn KHITAN SMALL SCRIPT FILLER +1BC9D..1BC9E ; NSM # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; NSM # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; NSM # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D167..1D169 ; NSM # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D17B..1D182 ; NSM # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; NSM # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; NSM # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; NSM # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; NSM # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; NSM # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; NSM # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; NSM # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; NSM # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; NSM # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; NSM # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; NSM # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; NSM # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; NSM # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; NSM # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E130..1E136 ; NSM # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; NSM # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; NSM # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E8D0..1E8D6 ; NSM # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 1958 + +# ================================================ + +# Bidi_Class=Arabic_Letter + +0608 ; AL # Sm ARABIC RAY +060B ; AL # Sc AFGHANI SIGN +060D ; AL # Po ARABIC DATE SEPARATOR +061B ; AL # Po ARABIC SEMICOLON +061C ; AL # Cf ARABIC LETTER MARK +061D..061F ; AL # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +0620..063F ; AL # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; AL # Lm ARABIC TATWEEL +0641..064A ; AL # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +066D ; AL # Po ARABIC FIVE POINTED STAR +066E..066F ; AL # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; AL # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D4 ; AL # Po ARABIC FULL STOP +06D5 ; AL # Lo ARABIC LETTER AE +06E5..06E6 ; AL # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EE..06EF ; AL # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06FA..06FC ; AL # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FD..06FE ; AL # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN +06FF ; AL # Lo ARABIC LETTER HEH WITH INVERTED V +0700..070D ; AL # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS +070E ; AL # Cn <reserved-070E> +070F ; AL # Cf SYRIAC ABBREVIATION MARK +0710 ; AL # Lo SYRIAC LETTER ALAPH +0712..072F ; AL # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +074B..074C ; AL # Cn [2] <reserved-074B>..<reserved-074C> +074D..07A5 ; AL # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07B1 ; AL # Lo THAANA LETTER NAA +07B2..07BF ; AL # Cn [14] <reserved-07B2>..<reserved-07BF> +0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +086B..086F ; AL # Cn [5] <reserved-086B>..<reserved-086F> +0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0888 ; AL # Sk ARABIC RAISED ROUND DOT +0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +088F ; AL # Cn <reserved-088F> +0892..0897 ; AL # Cn [6] <reserved-0892>..<reserved-0897> +08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; AL # Lm ARABIC SMALL FARSI YEH +FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBB2..FBC2 ; AL # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; AL # Cn [16] <reserved-FBC3>..<reserved-FBD2> +FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; AL # Cn [2] <reserved-FD90>..<reserved-FD91> +FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDC8..FDCE ; AL # Cn [7] <reserved-FDC8>..<reserved-FDCE> +FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FDFC ; AL # Sc RIAL SIGN +FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE75 ; AL # Cn <reserved-FE75> +FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FEFD..FEFE ; AL # Cn [2] <reserved-FEFD>..<reserved-FEFE> +10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D28..10D2F ; AL # Cn [8] <reserved-10D28>..<reserved-10D2F> +10D3A..10D3F ; AL # Cn [6] <reserved-10D3A>..<reserved-10D3F> +10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F5A..10F6F ; AL # Cn [22] <reserved-10F5A>..<reserved-10F6F> +1EC70 ; AL # Cn <reserved-1EC70> +1EC71..1ECAB ; AL # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE +1ECAC ; AL # So INDIC SIYAQ PLACEHOLDER +1ECAD..1ECAF ; AL # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS +1ECB0 ; AL # Sc INDIC SIYAQ RUPEE MARK +1ECB1..1ECB4 ; AL # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK +1ECB5..1ECBF ; AL # Cn [11] <reserved-1ECB5>..<reserved-1ECBF> +1ED00 ; AL # Cn <reserved-1ED00> +1ED01..1ED2D ; AL # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND +1ED2E ; AL # So OTTOMAN SIYAQ MARRATAN +1ED2F..1ED3D ; AL # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH +1ED3E..1ED4F ; AL # Cn [18] <reserved-1ED3E>..<reserved-1ED4F> +1EE00..1EE03 ; AL # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE04 ; AL # Cn <reserved-1EE04> +1EE05..1EE1F ; AL # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE20 ; AL # Cn <reserved-1EE20> +1EE21..1EE22 ; AL # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE23 ; AL # Cn <reserved-1EE23> +1EE24 ; AL # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE25..1EE26 ; AL # Cn [2] <reserved-1EE25>..<reserved-1EE26> +1EE27 ; AL # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE28 ; AL # Cn <reserved-1EE28> +1EE29..1EE32 ; AL # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE33 ; AL # Cn <reserved-1EE33> +1EE34..1EE37 ; AL # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE38 ; AL # Cn <reserved-1EE38> +1EE39 ; AL # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3A ; AL # Cn <reserved-1EE3A> +1EE3B ; AL # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE3C..1EE41 ; AL # Cn [6] <reserved-1EE3C>..<reserved-1EE41> +1EE42 ; AL # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE43..1EE46 ; AL # Cn [4] <reserved-1EE43>..<reserved-1EE46> +1EE47 ; AL # Lo ARABIC MATHEMATICAL TAILED HAH +1EE48 ; AL # Cn <reserved-1EE48> +1EE49 ; AL # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4A ; AL # Cn <reserved-1EE4A> +1EE4B ; AL # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4C ; AL # Cn <reserved-1EE4C> +1EE4D..1EE4F ; AL # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE50 ; AL # Cn <reserved-1EE50> +1EE51..1EE52 ; AL # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE53 ; AL # Cn <reserved-1EE53> +1EE54 ; AL # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE55..1EE56 ; AL # Cn [2] <reserved-1EE55>..<reserved-1EE56> +1EE57 ; AL # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE58 ; AL # Cn <reserved-1EE58> +1EE59 ; AL # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5A ; AL # Cn <reserved-1EE5A> +1EE5B ; AL # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5C ; AL # Cn <reserved-1EE5C> +1EE5D ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5E ; AL # Cn <reserved-1EE5E> +1EE5F ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE60 ; AL # Cn <reserved-1EE60> +1EE61..1EE62 ; AL # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE63 ; AL # Cn <reserved-1EE63> +1EE64 ; AL # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE65..1EE66 ; AL # Cn [2] <reserved-1EE65>..<reserved-1EE66> +1EE67..1EE6A ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6B ; AL # Cn <reserved-1EE6B> +1EE6C..1EE72 ; AL # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE73 ; AL # Cn <reserved-1EE73> +1EE74..1EE77 ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE78 ; AL # Cn <reserved-1EE78> +1EE79..1EE7C ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7D ; AL # Cn <reserved-1EE7D> +1EE7E ; AL # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE7F ; AL # Cn <reserved-1EE7F> +1EE80..1EE89 ; AL # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8A ; AL # Cn <reserved-1EE8A> +1EE8B..1EE9B ; AL # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EE9C..1EEA0 ; AL # Cn [5] <reserved-1EE9C>..<reserved-1EEA0> +1EEA1..1EEA3 ; AL # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA4 ; AL # Cn <reserved-1EEA4> +1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAA ; AL # Cn <reserved-1EEAA> +1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEBC..1EEEF ; AL # Cn [52] <reserved-1EEBC>..<reserved-1EEEF> +1EEF2..1EEFF ; AL # Cn [14] <reserved-1EEF2>..<reserved-1EEFF> + +# Total code points: 1708 + +# ================================================ + +# Bidi_Class=Left_To_Right_Override + +202D ; LRO # Cf LEFT-TO-RIGHT OVERRIDE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Right_To_Left_Override + +202E ; RLO # Cf RIGHT-TO-LEFT OVERRIDE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Left_To_Right_Embedding + +202A ; LRE # Cf LEFT-TO-RIGHT EMBEDDING + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Right_To_Left_Embedding + +202B ; RLE # Cf RIGHT-TO-LEFT EMBEDDING + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Pop_Directional_Format + +202C ; PDF # Cf POP DIRECTIONAL FORMATTING + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Left_To_Right_Isolate + +2066 ; LRI # Cf LEFT-TO-RIGHT ISOLATE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Right_To_Left_Isolate + +2067 ; RLI # Cf RIGHT-TO-LEFT ISOLATE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=First_Strong_Isolate + +2068 ; FSI # Cf FIRST STRONG ISOLATE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Pop_Directional_Isolate + +2069 ; PDI # Cf POP DIRECTIONAL ISOLATE + +# Total code points: 1 + +# EOF diff --git a/gen/bidirectional.c b/gen/bidirectional.c @@ -0,0 +1,137 @@ +/* See LICENSE file for copyright and license details. */ +#include <stddef.h> + +#include "util.h" + +#define FILE_BIDICLASS "data/DerivedBidiClass.txt" + +static const struct property_spec bidi_property[] = { + { + /* default */ + .enumname = "L", + .file = FILE_BIDICLASS, + .ucdname = "L", + }, + { + .enumname = "AL", + .file = FILE_BIDICLASS, + .ucdname = "AL", + }, + { + .enumname = "AN", + .file = FILE_BIDICLASS, + .ucdname = "AN", + }, + { + .enumname = "B", + .file = FILE_BIDICLASS, + .ucdname = "B", + }, + { + .enumname = "BN", + .file = FILE_BIDICLASS, + .ucdname = "BN", + }, + { + .enumname = "CS", + .file = FILE_BIDICLASS, + .ucdname = "CS", + }, + { + .enumname = "EN", + .file = FILE_BIDICLASS, + .ucdname = "EN", + }, + { + .enumname = "ES", + .file = FILE_BIDICLASS, + .ucdname = "Es", + }, + { + .enumname = "ET", + .file = FILE_BIDICLASS, + .ucdname = "ET", + }, + { + .enumname = "FSI", + .file = FILE_BIDICLASS, + .ucdname = "FSI", + }, + { + .enumname = "LRE", + .file = FILE_BIDICLASS, + .ucdname = "LRE", + }, + { + .enumname = "LRI", + .file = FILE_BIDICLASS, + .ucdname = "LRI", + }, + { + .enumname = "LRO", + .file = FILE_BIDICLASS, + .ucdname = "LRO", + }, + { + .enumname = "NSM", + .file = FILE_BIDICLASS, + .ucdname = "NSM", + }, + { + .enumname = "ON", + .file = FILE_BIDICLASS, + .ucdname = "ON", + }, + { + .enumname = "PDF", + .file = FILE_BIDICLASS, + .ucdname = "PDF", + }, + { + .enumname = "PDI", + .file = FILE_BIDICLASS, + .ucdname = "PDI", + }, + { + .enumname = "R", + .file = FILE_BIDICLASS, + .ucdname = "R", + }, + { + .enumname = "RLE", + .file = FILE_BIDICLASS, + .ucdname = "RLE", + }, + { + .enumname = "RLI", + .file = FILE_BIDICLASS, + .ucdname = "RLI", + }, + { + .enumname = "RLO", + .file = FILE_BIDICLASS, + .ucdname = "RLO", + }, + { + .enumname = "S", + .file = FILE_BIDICLASS, + .ucdname = "S", + }, + { + .enumname = "WS", + .file = FILE_BIDICLASS, + .ucdname = "WS", + }, +}; + +int +main(int argc, char *argv[]) +{ + (void)argc; + + properties_generate_break_property(bidi_property, + LEN(bidi_property), + NULL, NULL, "bidi", argv[0]); + + return 0; +} diff --git a/grapheme.h b/grapheme.h @@ -13,6 +13,12 @@ typedef struct grapheme_internal_segmentation_state { bool gb12_13_flag; } GRAPHEME_STATE; +enum grapheme_bidirectional_override { + GRAPHEME_BIDIRECTIONAL_OVERRIDE_NONE, + GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR, + GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL, +}; + #define GRAPHEME_INVALID_CODEPOINT UINT32_C(0xFFFD) bool grapheme_is_character_break(uint_least32_t, uint_least32_t, GRAPHEME_STATE *); @@ -43,6 +49,13 @@ bool grapheme_is_uppercase_utf8(const char *, size_t, size_t *); bool grapheme_is_lowercase_utf8(const char *, size_t, size_t *); bool grapheme_is_titlecase_utf8(const char *, size_t, size_t *); +size_t grapheme_bidirectional_logical_to_visual(const uint_least32_t *, size_t, + enum grapheme_bidirectional_override, + uint_least32_t *, size_t); +size_t grapheme_bidirectional_logical_to_visual_utf8(const char *, size_t, + enum grapheme_bidirectional_override, + char *, size_t); + size_t grapheme_decode_utf8(const char *, size_t, uint_least32_t *); size_t grapheme_encode_utf8(uint_least32_t, char *, size_t); diff --git a/src/bidirectional.c b/src/bidirectional.c @@ -0,0 +1,169 @@ +/* See LICENSE file for copyright and license details. */ +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#include "../gen/bidirectional.h" +#include "../grapheme.h" +#include "util.h" + +static inline enum bidi_property +get_bidi_property(uint_least32_t cp) +{ + if (likely(cp <= 0x10FFFF)) { + return (enum bidi_property) + bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]; + } else { + return BIDI_PROP_L; + } +} + +/* + * NOTES: + * https://unicode.org/reports/tr9/ + * https://github.com/omid/Persian-Log2Vis/blob/master/bidi.php + * https://github.com/fribidi/fribidi/blob/master/lib/fribidi.h + */ + +#define MAX_DEPTH 125 + +#include <stdio.h> /* ------------------------------------------------------------------ */ +static size_t +determine_paragraph_level(const void *src, size_t srclen, + size_t (*get_codepoint)(const void *, size_t, size_t, uint_least32_t *), + size_t (*set_codepoint)(uint_least32_t, void *, size_t, size_t)) +{ + enum bidi_property prop; + size_t srcoff, isolate_level; + uint_least32_t cp; + + for (srcoff = 0, isolate_level = 0; srcoff < srclen; ) { + srcoff += get_codepoint(src, srclen, srcoff, &cp); + prop = get_bidi_property(cp); + + /* BD8/BD9 */ + if ((prop == BIDI_PROP_LRI || + prop == BIDI_PROP_RLI || + prop == BIDI_PROP_FSI) && + isolate_level < MAX_DEPTH) { + /* we hit an isolate initiator, increment counter */ + isolate_level++; + } else if (prop == BIDI_PROP_PDI && isolate_level > 0) { + isolate_level--; + } + + /* P2 */ + if (isolate_level > 0) { + continue; + } + + /* P3 */ + if (prop == BIDI_PROP_L) { + return 0; + } else if (prop == BIDI_PROP_AL || + prop == BIDI_PROP_R) { + return 1; + } + } + + return 0; +} + +static size_t +handle_paragraph(const void *src, size_t srclen, enum grapheme_bidirectional_override override, + size_t (*get_codepoint)(const void *, size_t, size_t, uint_least32_t *), + size_t (*set_codepoint)(uint_least32_t, void *, size_t, size_t), + void *dest, size_t destlen) +{ + enum bidi_property prop; + size_t srcoff, destoff, paragraph_level; + +fprintf(stderr, "paragraph-call: par='%.*s'\n", (int)srclen, (const char *)src); + /* determine paragraph level (rules P1-P3, HL1) */ + if (override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) { + paragraph_level = 0; + } else if (override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) { + paragraph_level = 1; + } else { /* GRAPHEME_BIDIRECTIONAL_OVERRIDE_NONE and invalid */ + paragraph_level = determine_paragraph_level(src, srclen, + get_codepoint, + set_codepoint); + } +fprintf(stderr, "\tparagraph_level=%zu\n", paragraph_level); + + /* determine_explicit_levels(...); X1-X8 */ + /* prepare_implicit_processing(); X9-X10, BD13 */ + /* resolve_weak_types(); W1-W7 */ + /* resolve_neutral_and_isolate_formatting_types() N0-N2 */ + /* resolve_implicit_levels(); I1-I2 */ + /* reorder_resolved_levels(); L1-L4 */ + + return destoff; +} + +static size_t +logical_to_visual(const void *src, size_t srclen, enum grapheme_bidirectional_override override, + size_t (*get_codepoint)(const void *, size_t, size_t, uint_least32_t *), + size_t (*set_codepoint)(uint_least32_t, void *, size_t, size_t), + void *dest, size_t destlen) +{ + size_t srcoff, destoff, lastparoff; + uint_least32_t cp; + + for (srcoff = destoff = lastparoff = 0; srcoff < srclen; ) { + srcoff += get_codepoint(src, srclen, srcoff, &cp); + + /* P1 */ + if (get_bidi_property(cp) == BIDI_PROP_B || + srcoff == srclen || + (get_codepoint == get_codepoint_utf8 && + srclen == SIZE_MAX && cp == 0)) { + /* + * we encountered a paragraph separator or + * reached the end of the text. + * Call the paragraph handling function on + * the paragraph including the separator. + */ + if (get_codepoint == get_codepoint_utf8) { + destoff += handle_paragraph((const char *)src + lastparoff, + srcoff - lastparoff, override, + get_codepoint, set_codepoint, + (char *)dest + destoff, + (destoff < destlen) ? + (destlen - destoff) : 0); + } else { + destoff += handle_paragraph((const uint_least32_t *)src + lastparoff, + srcoff - lastparoff, override, + get_codepoint, set_codepoint, + (uint_least32_t *)dest + destoff, + (destoff < destlen) ? + (destlen - destoff) : 0); + } + lastparoff = srcoff; + } + } + + return destoff; +} + +size_t +grapheme_bidirectional_logical_to_visual(const uint_least32_t *src, + size_t srclen, + enum grapheme_bidirectional_override override, + uint_least32_t *dest, + size_t destlen) +{ + return logical_to_visual(src, srclen, override, + get_codepoint, set_codepoint, dest, destlen); +} + +size_t +grapheme_bidirectional_logical_to_visual_utf8(const char *src, size_t srclen, + enum grapheme_bidirectional_override override, + char *dest, size_t destlen) +{ + return logical_to_visual(src, srclen, override, + get_codepoint_utf8, set_codepoint_utf8, + dest, destlen); +}