commit 919200085f7eaffafa33ec3a5e50c2adbd046ab7
parent 8220661fc87e578b2581d4bf1aaced5807168320
Author: Laslo Hunhold <dev@frign.de>
Date: Sat, 27 Aug 2022 01:46:41 +0200
Add manual-template-engine and manuals for the case-conversion functions
Given the undeniable redundancy within the manuals it makes a lot of
sense to have a simple template-engine for generating the manual files
on the fly.
After fiddling with m4 and finding out for the third time that it just
hasn't aged well I realized that it makes most sense to simply use
heredocs within small shell-scripts.
Signed-off-by: Laslo Hunhold <dev@frign.de>
Diffstat:
30 files changed, 1196 insertions(+), 1074 deletions(-)
diff --git a/Makefile b/Makefile
@@ -56,22 +56,71 @@ TEST =\
test/utf8-encode\
test/word\
+MAN_TEMPLATE =\
+ man/template/to_case.sh
+
MAN3 =\
- man/grapheme_decode_utf8.3\
- man/grapheme_encode_utf8.3\
- man/grapheme_is_character_break.3\
- man/grapheme_next_character_break.3\
- man/grapheme_next_line_break.3\
- man/grapheme_next_sentence_break.3\
- man/grapheme_next_word_break.3\
- man/grapheme_next_character_break_utf8.3\
- man/grapheme_next_line_break_utf8.3\
- man/grapheme_next_sentence_break_utf8.3\
- man/grapheme_next_word_break_utf8.3\
-
-MAN7 = man/libgrapheme.7
-
-all: data/LICENSE libgrapheme.a libgrapheme.so
+ man/grapheme_decode_utf8\
+ man/grapheme_encode_utf8\
+ man/grapheme_is_character_break\
+ man/grapheme_next_character_break\
+ man/grapheme_next_line_break\
+ man/grapheme_next_sentence_break\
+ man/grapheme_next_word_break\
+ man/grapheme_next_character_break_utf8\
+ man/grapheme_next_line_break_utf8\
+ man/grapheme_next_sentence_break_utf8\
+ man/grapheme_next_word_break_utf8\
+ man/grapheme_to_uppercase\
+ man/grapheme_to_lowercase\
+ man/grapheme_to_titlecase\
+
+MAN7 =\
+ man/libgrapheme\
+
+all: data/LICENSE $(MAN3:=.3) $(MAN7:=.7) libgrapheme.a libgrapheme.so
+
+data/DerivedCoreProperties.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/DerivedCoreProperties.txt
+
+data/EastAsianWidth.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/EastAsianWidth.txt
+
+data/emoji-data.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt
+
+data/GraphemeBreakProperty.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakProperty.txt
+
+data/GraphemeBreakTest.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakTest.txt
+
+data/LICENSE:
+ wget -O $@ https://www.unicode.org/license.txt
+
+data/LineBreak.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/LineBreak.txt
+
+data/LineBreakTest.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/LineBreakTest.txt
+
+data/SentenceBreakProperty.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakProperty.txt
+
+data/SentenceBreakTest.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakTest.txt
+
+data/SpecialCasing.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/SpecialCasing.txt
+
+data/UnicodeData.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt
+
+data/WordBreakProperty.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakProperty.txt
+
+data/WordBreakTest.txt:
+ wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakTest.txt
benchmark/case.o: benchmark/case.c config.mk gen/word-test.h grapheme.h benchmark/util.h
benchmark/character.o: benchmark/character.c config.mk gen/character-test.h grapheme.h benchmark/util.h
@@ -137,47 +186,22 @@ gen/sentence-test.h: data/SentenceBreakTest.txt gen/sentence-test
gen/word.h: data/WordBreakProperty.txt gen/word
gen/word-test.h: data/WordBreakTest.txt gen/word-test
-data/DerivedCoreProperties.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/DerivedCoreProperties.txt
-
-data/EastAsianWidth.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/EastAsianWidth.txt
-
-data/emoji-data.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt
-
-data/GraphemeBreakProperty.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakProperty.txt
-
-data/GraphemeBreakTest.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakTest.txt
-
-data/LICENSE:
- wget -O $@ https://www.unicode.org/license.txt
-
-data/LineBreak.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/LineBreak.txt
-
-data/LineBreakTest.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/LineBreakTest.txt
-
-data/SentenceBreakProperty.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakProperty.txt
-
-data/SentenceBreakTest.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakTest.txt
-
-data/SpecialCasing.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/SpecialCasing.txt
-
-data/UnicodeData.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt
-
-data/WordBreakProperty.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakProperty.txt
-
-data/WordBreakTest.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakTest.txt
+man/grapheme_decode_utf8.3: man/grapheme_decode_utf8.sh config.mk
+man/grapheme_encode_utf8.3: man/grapheme_encode_utf8.sh config.mk
+man/grapheme_is_character_break.3: man/grapheme_is_character_break.sh config.mk
+man/grapheme_next_character_break.3: man/grapheme_next_character_break.sh config.mk
+man/grapheme_next_line_break.3: man/grapheme_next_line_break.sh config.mk
+man/grapheme_next_sentence_break.3: man/grapheme_next_sentence_break.sh config.mk
+man/grapheme_next_word_break.3: man/grapheme_next_word_break.sh config.mk
+man/grapheme_next_character_break_utf8.3: man/grapheme_next_character_break_utf8.sh config.mk
+man/grapheme_next_line_break_utf8.3: man/grapheme_next_line_break_utf8.sh config.mk
+man/grapheme_next_sentence_break_utf8.3: man/grapheme_next_sentence_break_utf8.sh config.mk
+man/grapheme_next_word_break_utf8.3: man/grapheme_next_word_break_utf8.sh config.mk
+man/grapheme_to_uppercase.3: man/grapheme_to_uppercase.sh man/template/to_case.sh config.mk
+man/grapheme_to_lowercase.3: man/grapheme_to_lowercase.sh man/template/to_case.sh config.mk
+man/grapheme_to_titlecase.3: man/grapheme_to_titlecase.sh man/template/to_case.sh config.mk
+
+man/libgrapheme.7: man/libgrapheme.sh config.mk
$(GEN:=.o):
$(BUILD_CC) -c -o $@ $(BUILD_CPPFLAGS) $(BUILD_CFLAGS) $(@:.o=.c)
@@ -207,6 +231,12 @@ libgrapheme.a: $(SRC:=.o)
libgrapheme.so: $(SRC:=.o)
$(CC) -o $@ $(SOFLAGS) $(LDFLAGS) $(SRC:=.o)
+$(MAN3:=.3):
+ SH=$(SH) $(SH) $(@:.3=.sh) > $@
+
+$(MAN7:=.7):
+ SH=$(SH) $(SH) $(@:.7=.sh) > $@
+
benchmark: $(BENCHMARK)
for m in $(BENCHMARK); do ./$$m; done
@@ -218,23 +248,23 @@ install: all
mkdir -p "$(DESTDIR)$(INCPREFIX)"
mkdir -p "$(DESTDIR)$(MANPREFIX)/man3"
mkdir -p "$(DESTDIR)$(MANPREFIX)/man7"
- cp -f $(MAN3) "$(DESTDIR)$(MANPREFIX)/man3"
- cp -f $(MAN7) "$(DESTDIR)$(MANPREFIX)/man7"
+ cp -f $(MAN3:=.3) "$(DESTDIR)$(MANPREFIX)/man3"
+ cp -f $(MAN7:=.7) "$(DESTDIR)$(MANPREFIX)/man7"
cp -f libgrapheme.a "$(DESTDIR)$(LIBPREFIX)"
cp -f libgrapheme.so "$(DESTDIR)$(LIBPREFIX)"
cp -f grapheme.h "$(DESTDIR)$(INCPREFIX)"
$(LDCONFIG)
uninstall:
- for m in $(MAN3); do rm -f "$(DESTDIR)$(MANPREFIX)/man3/`basename $$m`"; done
- for m in $(MAN7); do rm -f "$(DESTDIR)$(MANPREFIX)/man7/`basename $$m`"; done
+ for m in $(MAN3:=.3); do rm -f "$(DESTDIR)$(MANPREFIX)/man3/`basename $$m`"; done
+ for m in $(MAN7:=.7); do rm -f "$(DESTDIR)$(MANPREFIX)/man7/`basename $$m`"; done
rm -f "$(DESTDIR)$(LIBPREFIX)/libgrapheme.a"
rm -f "$(DESTDIR)$(LIBPREFIX)/libgrapheme.so"
rm -f "$(DESTDIR)$(INCPREFIX)/grapheme.h"
$(LDCONFIG)
clean:
- rm -f $(BENCHMARK:=.o) benchmark/util.o $(BENCHMARK) $(GEN:=.h) $(GEN:=.o) gen/util.o $(GEN) $(SRC:=.o) src/util.o $(TEST:=.o) test/util.o $(TEST) libgrapheme.a libgrapheme.so
+ rm -f $(BENCHMARK:=.o) benchmark/util.o $(BENCHMARK) $(GEN:=.h) $(GEN:=.o) gen/util.o $(GEN) $(SRC:=.o) src/util.o $(TEST:=.o) test/util.o $(TEST) libgrapheme.a libgrapheme.so $(MAN3:=.3) $(MAN7:=.7)
clean-data:
rm -f $(DATA)
@@ -242,12 +272,13 @@ clean-data:
dist:
rm -rf "libgrapheme-$(VERSION)"
mkdir "libgrapheme-$(VERSION)"
- for m in benchmark data gen man src test; do mkdir "libgrapheme-$(VERSION)/$$m"; done
+ for m in benchmark data gen man man/template src test; do mkdir "libgrapheme-$(VERSION)/$$m"; done
cp config.mk grapheme.h LICENSE Makefile README "libgrapheme-$(VERSION)"
cp $(BENCHMARK:=.c) benchmark/util.c benchmark/util.h "libgrapheme-$(VERSION)/benchmark"
cp $(DATA) "libgrapheme-$(VERSION)/data"
cp $(GEN:=.c) gen/util.c gen/types.h gen/util.h "libgrapheme-$(VERSION)/gen"
- cp $(MAN3) $(MAN7) "libgrapheme-$(VERSION)/man"
+ cp $(MAN3:=.sh) $(MAN7:=.sh) "libgrapheme-$(VERSION)/man"
+ cp $(MAN_TEMPLATE) "libgrapheme-$(VERSION)/man/template"
cp $(SRC:=.c) src/util.h "libgrapheme-$(VERSION)/src"
cp $(TEST:=.c) test/util.c test/util.h "libgrapheme-$(VERSION)/test"
tar -cf - "libgrapheme-$(VERSION)" | gzip -c > "libgrapheme-$(VERSION).tar.gz"
diff --git a/config.mk b/config.mk
@@ -27,3 +27,4 @@ BUILD_CC = $(CC)
AR = ar
RANLIB = ranlib
LDCONFIG = ldconfig # unset to not call ldconfig(1) after install/uninstall
+SH = sh
diff --git a/man/grapheme_decode_utf8.3 b/man/grapheme_decode_utf8.3
@@ -1,100 +0,0 @@
-.Dd 2022-08-26
-.Dt GRAPHEME_DECODE_UTF8 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_decode_utf8
-.Nd decode first codepoint in UTF-8-encoded string
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp"
-.Sh DESCRIPTION
-The
-.Fn grapheme_decode_utf8
-function decodes the first codepoint in the UTF-8-encoded string
-.Va str
-of length
-.Va len .
-If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
-string ends unexpectedly, empty string, etc.) the decoding is stopped
-at the last processed byte and the decoded codepoint set to
-.Dv GRAPHEME_INVALID_CODEPOINT .
-.Pp
-If
-.Va cp
-is not
-.Dv NULL
-the decoded codepoint is stored in the memory pointed to by
-.Va cp .
-.Pp
-Given NUL has a unique 1 byte representation, it is safe to operate on
-NUL-terminated strings by setting
-.Va len
-to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) and terminating when
-.Va cp
-is 0 (see
-.Sx EXAMPLES
-for an example).
-.Sh RETURN VALUES
-The
-.Fn grapheme_decode_utf8
-function returns the number of processed bytes and 0 if
-.Va str
-is
-.Dv NULL
-or
-.Va len
-is 0.
-If the string ends unexpectedly in a multibyte sequence, the desired
-length (that is larger than
-.Va len )
-is returned.
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <inttypes.h>
-#include <stdio.h>
-
-void
-print_cps(const char *str, size_t len)
-{
- size_t ret, off;
- uint_least32_t cp;
-
- for (off = 0; off < len; off += ret) {
- if ((ret = grapheme_decode_utf8(str + off,
- len - off, &cp)) > (len - off)) {
- /*
- * string ended unexpectedly in the middle of a
- * multibyte sequence and we have the choice
- * here to possibly expand str by ret - len + off
- * bytes to get a full sequence, but we just
- * bail out in this case.
- */
- break;
- }
- printf("%"PRIxLEAST32"\\n", cp);
- }
-}
-
-void
-print_cps_nul_terminated(const char *str)
-{
- size_t ret, off;
- uint_least32_t cp;
-
- for (off = 0; (ret = grapheme_decode_utf8(str + off,
- SIZE_MAX, &cp)) > 0 &&
- cp != 0; off += ret) {
- printf("%"PRIxLEAST32"\\n", cp);
- }
-}
-.Ed
-.Sh SEE ALSO
-.Xr grapheme_encode_utf8 3 ,
-.Xr libgrapheme 7
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/grapheme_decode_utf8.sh b/man/grapheme_decode_utf8.sh
@@ -0,0 +1,102 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_DECODE_UTF8 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_decode_utf8
+.Nd decode first codepoint in UTF-8-encoded string
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp"
+.Sh DESCRIPTION
+The
+.Fn grapheme_decode_utf8
+function decodes the first codepoint in the UTF-8-encoded string
+.Va str
+of length
+.Va len .
+If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
+string ends unexpectedly, empty string, etc.) the decoding is stopped
+at the last processed byte and the decoded codepoint set to
+.Dv GRAPHEME_INVALID_CODEPOINT .
+.Pp
+If
+.Va cp
+is not
+.Dv NULL
+the decoded codepoint is stored in the memory pointed to by
+.Va cp .
+.Pp
+Given NUL has a unique 1 byte representation, it is safe to operate on
+NUL-terminated strings by setting
+.Va len
+to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) and terminating when
+.Va cp
+is 0 (see
+.Sx EXAMPLES
+for an example).
+.Sh RETURN VALUES
+The
+.Fn grapheme_decode_utf8
+function returns the number of processed bytes and 0 if
+.Va str
+is
+.Dv NULL
+or
+.Va len
+is 0.
+If the string ends unexpectedly in a multibyte sequence, the desired
+length (that is larger than
+.Va len )
+is returned.
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+void
+print_cps(const char *str, size_t len)
+{
+ size_t ret, off;
+ uint_least32_t cp;
+
+ for (off = 0; off < len; off += ret) {
+ if ((ret = grapheme_decode_utf8(str + off,
+ len - off, &cp)) > (len - off)) {
+ /*
+ * string ended unexpectedly in the middle of a
+ * multibyte sequence and we have the choice
+ * here to possibly expand str by ret - len + off
+ * bytes to get a full sequence, but we just
+ * bail out in this case.
+ */
+ break;
+ }
+ printf("%"PRIxLEAST32"\\n", cp);
+ }
+}
+
+void
+print_cps_nul_terminated(const char *str)
+{
+ size_t ret, off;
+ uint_least32_t cp;
+
+ for (off = 0; (ret = grapheme_decode_utf8(str + off,
+ SIZE_MAX, &cp)) > 0 &&
+ cp != 0; off += ret) {
+ printf("%"PRIxLEAST32"\\n", cp);
+ }
+}
+.Ed
+.Sh SEE ALSO
+.Xr grapheme_encode_utf8 3 ,
+.Xr libgrapheme 7
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF
diff --git a/man/grapheme_encode_utf8.3 b/man/grapheme_encode_utf8.3
@@ -1,101 +0,0 @@
-.Dd 2022-08-26
-.Dt GRAPHEME_ENCODE_UTF8 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_encode_utf8
-.Nd encode codepoint into UTF-8 string
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_encode_utf8 "uint_least32_t cp" "char *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_encode_utf8
-function encodes the codepoint
-.Va cp
-into a UTF-8-string.
-If
-.Va str
-is not
-.Dv NULL
-and
-.Va len
-is large enough it writes the UTF-8-string to the memory pointed to by
-.Va str .
-Otherwise no data is written.
-.Sh RETURN VALUES
-The
-.Fn grapheme_encode_utf8
-function returns the length (in bytes) of the UTF-8-string resulting
-from encoding
-.Va cp ,
-even if
-.Va len
-is not large enough or
-.Va str
-is
-.Dv NULL .
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <stddef.h>
-#include <stdlib.h>
-
-size_t
-cps_to_utf8(const uint_least32_t *cp, size_t cplen, char *str, size_t len)
-{
- size_t i, off, ret;
-
- for (i = 0, off = 0; i < cplen; i++, off += ret) {
- if ((ret = grapheme_encode_utf8(cp[i], str + off,
- len - off)) > (len - off)) {
- /* buffer too small */
- break;
- }
- }
-
- return off;
-}
-
-size_t
-cps_bytelen(const uint_least32_t *cp, size_t cplen)
-{
- size_t i, len;
-
- for (i = 0, len = 0; i < cplen; i++) {
- len += grapheme_encode_utf8(cp[i], NULL, 0);
- }
-
- return len;
-}
-
-char *
-cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen)
-{
- char *str;
- size_t len, i, ret, off;
-
- len = cps_bytelen(cp, cplen);
-
- if (!(str = malloc(len))) {
- return NULL;
- }
-
- for (i = 0, off = 0; i < cplen; i++, off += ret) {
- if ((ret = grapheme_encode_utf8(cp[i], str + off,
- len - off)) > (len - off)) {
- /* buffer too small */
- break;
- }
- }
- str[off] = '\\0';
-
- return str;
-}
-.Ed
-.Sh SEE ALSO
-.Xr grapheme_decode_utf8 3 ,
-.Xr libgrapheme 7
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/grapheme_encode_utf8.sh b/man/grapheme_encode_utf8.sh
@@ -0,0 +1,103 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_ENCODE_UTF8 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_encode_utf8
+.Nd encode codepoint into UTF-8 string
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_encode_utf8 "uint_least32_t cp" "char *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_encode_utf8
+function encodes the codepoint
+.Va cp
+into a UTF-8-string.
+If
+.Va str
+is not
+.Dv NULL
+and
+.Va len
+is large enough it writes the UTF-8-string to the memory pointed to by
+.Va str .
+Otherwise no data is written.
+.Sh RETURN VALUES
+The
+.Fn grapheme_encode_utf8
+function returns the length (in bytes) of the UTF-8-string resulting
+from encoding
+.Va cp ,
+even if
+.Va len
+is not large enough or
+.Va str
+is
+.Dv NULL .
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+size_t
+cps_to_utf8(const uint_least32_t *cp, size_t cplen, char *str, size_t len)
+{
+ size_t i, off, ret;
+
+ for (i = 0, off = 0; i < cplen; i++, off += ret) {
+ if ((ret = grapheme_encode_utf8(cp[i], str + off,
+ len - off)) > (len - off)) {
+ /* buffer too small */
+ break;
+ }
+ }
+
+ return off;
+}
+
+size_t
+cps_bytelen(const uint_least32_t *cp, size_t cplen)
+{
+ size_t i, len;
+
+ for (i = 0, len = 0; i < cplen; i++) {
+ len += grapheme_encode_utf8(cp[i], NULL, 0);
+ }
+
+ return len;
+}
+
+char *
+cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen)
+{
+ char *str;
+ size_t len, i, ret, off;
+
+ len = cps_bytelen(cp, cplen);
+
+ if (!(str = malloc(len))) {
+ return NULL;
+ }
+
+ for (i = 0, off = 0; i < cplen; i++, off += ret) {
+ if ((ret = grapheme_encode_utf8(cp[i], str + off,
+ len - off)) > (len - off)) {
+ /* buffer too small */
+ break;
+ }
+ }
+ str[off] = '\\0';
+
+ return str;
+}
+.Ed
+.Sh SEE ALSO
+.Xr grapheme_decode_utf8 3 ,
+.Xr libgrapheme 7
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF
diff --git a/man/grapheme_is_character_break.3 b/man/grapheme_is_character_break.3
@@ -1,81 +0,0 @@
-.Dd 2022-08-26
-.Dt GRAPHEME_IS_CHARACTER_BREAK 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_is_character_break
-.Nd test for a grapheme cluster break between two codepoints
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_is_character_break "uint_least32_t cp1" "uint_least32_t cp2" "GRAPHEME_STATE *state"
-.Sh DESCRIPTION
-The
-.Fn grapheme_is_character_break
-function determines if there is a grapheme cluster break (see
-.Xr libgrapheme 7 )
-between the two codepoints
-.Va cp1
-and
-.Va cp2 .
-By specification this decision depends on a
-.Va state
-that can at most be completely reset after detecting a break and must
-be reset every time one deviates from sequential processing.
-.Pp
-If
-.Va state
-is
-.Dv NULL
-.Fn grapheme_is_character_break
-behaves as if it was called with a fully reset state.
-.Sh RETURN VALUES
-The
-.Fn grapheme_is_character_break
-function returns
-.Va true
-if there is a grapheme cluster break between the codepoints
-.Va cp1
-and
-.Va cp2
-and
-.Va false
-if there is not.
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-int
-main(void)
-{
- GRAPHEME_STATE state = { 0 };
- uint_least32_t s1[] = ..., s2[] = ...; /* two input arrays */
- size_t i;
-
- for (i = 0; i + 1 < sizeof(s1) / sizeof(*s1); i++) {
- if (grapheme_is_character_break(s[i], s[i + 1], &state)) {
- printf("break in s1 at offset %zu\n", i);
- }
- }
- memset(&state, 0, sizeof(state)); /* reset state */
- for (i = 0; i + 1 < sizeof(s2) / sizeof(*s2); i++) {
- if (grapheme_is_character_break(s[i], s[i + 1], &state)) {
- printf("break in s2 at offset %zu\n", i);
- }
- }
-
- return 0;
-}
-.Ed
-.Sh SEE ALSO
-.Xr grapheme_next_character_break 3 ,
-.Xr grapheme_next_character_break_utf8 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_is_character_break
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/grapheme_is_character_break.sh b/man/grapheme_is_character_break.sh
@@ -0,0 +1,83 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_IS_CHARACTER_BREAK 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_is_character_break
+.Nd test for a grapheme cluster break between two codepoints
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_is_character_break "uint_least32_t cp1" "uint_least32_t cp2" "GRAPHEME_STATE *state"
+.Sh DESCRIPTION
+The
+.Fn grapheme_is_character_break
+function determines if there is a grapheme cluster break (see
+.Xr libgrapheme 7 )
+between the two codepoints
+.Va cp1
+and
+.Va cp2 .
+By specification this decision depends on a
+.Va state
+that can at most be completely reset after detecting a break and must
+be reset every time one deviates from sequential processing.
+.Pp
+If
+.Va state
+is
+.Dv NULL
+.Fn grapheme_is_character_break
+behaves as if it was called with a fully reset state.
+.Sh RETURN VALUES
+The
+.Fn grapheme_is_character_break
+function returns
+.Va true
+if there is a grapheme cluster break between the codepoints
+.Va cp1
+and
+.Va cp2
+and
+.Va false
+if there is not.
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main(void)
+{
+ GRAPHEME_STATE state = { 0 };
+ uint_least32_t s1[] = ..., s2[] = ...; /* two input arrays */
+ size_t i;
+
+ for (i = 0; i + 1 < sizeof(s1) / sizeof(*s1); i++) {
+ if (grapheme_is_character_break(s[i], s[i + 1], &state)) {
+ printf("break in s1 at offset %zu\n", i);
+ }
+ }
+ memset(&state, 0, sizeof(state)); /* reset state */
+ for (i = 0; i + 1 < sizeof(s2) / sizeof(*s2); i++) {
+ if (grapheme_is_character_break(s[i], s[i + 1], &state)) {
+ printf("break in s2 at offset %zu\n", i);
+ }
+ }
+
+ return 0;
+}
+.Ed
+.Sh SEE ALSO
+.Xr grapheme_next_character_break 3 ,
+.Xr grapheme_next_character_break_utf8 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_is_character_break
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF
diff --git a/man/grapheme_next_character_break.3 b/man/grapheme_next_character_break.3
@@ -1,55 +0,0 @@
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_CHARACTER_BREAK 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_character_break
-.Nd determine codepoint-offset to next grapheme cluster break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_character_break "const uint_least32_t *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_character_break
-function computes the offset (in codepoints) to the next grapheme
-cluster break (see
-.Xr libgrapheme 7 )
-in the codepoint array
-.Va str
-of length
-.Va len .
-If a grapheme cluster begins at
-.Va str
-this offset is equal to the length of said grapheme cluster.
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For UTF-8-encoded input data
-.Xr grapheme_next_character_break_utf8 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_character_break
-function returns the offset (in codepoints) to the next grapheme cluster
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh SEE ALSO
-.Xr grapheme_is_character_break 3 ,
-.Xr grapheme_next_character_break_utf8 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_character_break
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/grapheme_next_character_break.sh b/man/grapheme_next_character_break.sh
@@ -0,0 +1,57 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_CHARACTER_BREAK 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_character_break
+.Nd determine codepoint-offset to next grapheme cluster break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_character_break "const uint_least32_t *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_character_break
+function computes the offset (in codepoints) to the next grapheme
+cluster break (see
+.Xr libgrapheme 7 )
+in the codepoint array
+.Va str
+of length
+.Va len .
+If a grapheme cluster begins at
+.Va str
+this offset is equal to the length of said grapheme cluster.
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For UTF-8-encoded input data
+.Xr grapheme_next_character_break_utf8 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_character_break
+function returns the offset (in codepoints) to the next grapheme cluster
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh SEE ALSO
+.Xr grapheme_is_character_break 3 ,
+.Xr grapheme_next_character_break_utf8 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_character_break
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF
diff --git a/man/grapheme_next_character_break_utf8.3 b/man/grapheme_next_character_break_utf8.3
@@ -1,95 +0,0 @@
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_CHARACTER_BREAK_UTF8 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_character_break_utf8
-.Nd determine byte-offset to next grapheme cluster break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_character_break_utf8 "const char *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_character_break_utf8
-function computes the offset (in bytes) to the next grapheme
-cluster break (see
-.Xr libgrapheme 7 )
-in the UTF-8-encoded string
-.Va str
-of length
-.Va len .
-If a grapheme cluster begins at
-.Va str
-this offset is equal to the length of said grapheme cluster.
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For non-UTF-8 input data
-.Xr grapheme_is_character_break 3
-and
-.Xr grapheme_next_character_break 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_character_break_utf8
-function returns the offset (in bytes) to the next grapheme cluster
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <stdint.h>
-#include <stdio.h>
-
-int
-main(void)
-{
- /* UTF-8 encoded input */
- char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
- "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
- "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
- "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
- size_t ret, len, off;
-
- printf("Input: \\"%s\\"\\n", s);
-
- /* print each grapheme cluster with byte-length */
- printf("Grapheme clusters in NUL-delimited input:\\n");
- for (off = 0; s[off] != '\\0'; off += ret) {
- ret = grapheme_next_character_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
- printf("\\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_character_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
-}
-.Ed
-.Sh SEE ALSO
-.Xr grapheme_is_character_break 3 ,
-.Xr grapheme_next_character_break 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_character_break_utf8
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/grapheme_next_character_break_utf8.sh b/man/grapheme_next_character_break_utf8.sh
@@ -0,0 +1,97 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_CHARACTER_BREAK_UTF8 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_character_break_utf8
+.Nd determine byte-offset to next grapheme cluster break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_character_break_utf8 "const char *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_character_break_utf8
+function computes the offset (in bytes) to the next grapheme
+cluster break (see
+.Xr libgrapheme 7 )
+in the UTF-8-encoded string
+.Va str
+of length
+.Va len .
+If a grapheme cluster begins at
+.Va str
+this offset is equal to the length of said grapheme cluster.
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For non-UTF-8 input data
+.Xr grapheme_is_character_break 3
+and
+.Xr grapheme_next_character_break 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_character_break_utf8
+function returns the offset (in bytes) to the next grapheme cluster
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <stdint.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ /* UTF-8 encoded input */
+ char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
+ "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
+ "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
+ "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \\"%s\\"\\n", s);
+
+ /* print each grapheme cluster with byte-length */
+ printf("Grapheme clusters in NUL-delimited input:\\n");
+ for (off = 0; s[off] != '\\0'; off += ret) {
+ ret = grapheme_next_character_break_utf8(s + off, SIZE_MAX);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_character_break_utf8(s + off, len - off);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+}
+.Ed
+.Sh SEE ALSO
+.Xr grapheme_is_character_break 3 ,
+.Xr grapheme_next_character_break 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_character_break_utf8
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF
diff --git a/man/grapheme_next_line_break.3 b/man/grapheme_next_line_break.3
@@ -1,51 +0,0 @@
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_LINE_BREAK 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_line_break
-.Nd determine codepoint-offset to next grapheme cluster break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_line_break "const uint_least32_t *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_line_break
-function computes the offset (in codepoints) to the next possible line
-break (see
-.Xr libgrapheme 7 )
-in the codepoint array
-.Va str
-of length
-.Va len .
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For UTF-8-encoded input data
-.Xr grapheme_next_line_break_utf8 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_line_break
-function returns the offset (in codepoints) to the next possible line
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh SEE ALSO
-.Xr grapheme_next_line_break_utf8 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_line_break
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/grapheme_next_line_break.sh b/man/grapheme_next_line_break.sh
@@ -0,0 +1,53 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_LINE_BREAK 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_line_break
+.Nd determine codepoint-offset to next grapheme cluster break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_line_break "const uint_least32_t *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_line_break
+function computes the offset (in codepoints) to the next possible line
+break (see
+.Xr libgrapheme 7 )
+in the codepoint array
+.Va str
+of length
+.Va len .
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For UTF-8-encoded input data
+.Xr grapheme_next_line_break_utf8 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_line_break
+function returns the offset (in codepoints) to the next possible line
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh SEE ALSO
+.Xr grapheme_next_line_break_utf8 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_line_break
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF
diff --git a/man/grapheme_next_line_break_utf8.3 b/man/grapheme_next_line_break_utf8.3
@@ -1,89 +0,0 @@
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_LINE_BREAK_UTF8 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_line_break_utf8
-.Nd determine byte-offset to next possible line break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_line_break_utf8 "const char *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_line_break_utf8
-function computes the offset (in bytes) to the next possible line
-break (see
-.Xr libgrapheme 7 )
-in the UTF-8-encoded string
-.Va str
-of length
-.Va len .
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For non-UTF-8 input data
-.Xr grapheme_next_line_break 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_line_break_utf8
-function returns the offset (in bytes) to the next possible line
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <stdint.h>
-#include <stdio.h>
-
-int
-main(void)
-{
- /* UTF-8 encoded input */
- char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
- "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
- "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
- "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
- size_t ret, len, off;
-
- printf("Input: \\"%s\\"\\n", s);
-
- /* print each grapheme cluster with byte-length */
- printf("Grapheme clusters in NUL-delimited input:\\n");
- for (off = 0; s[off] != '\\0'; off += ret) {
- ret = grapheme_next_line_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
- printf("\\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_line_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
-}
-.Ed
-.Sh SEE ALSO
-.Xr grapheme_next_line_break 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_line_break_utf8
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/grapheme_next_line_break_utf8.sh b/man/grapheme_next_line_break_utf8.sh
@@ -0,0 +1,91 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_LINE_BREAK_UTF8 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_line_break_utf8
+.Nd determine byte-offset to next possible line break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_line_break_utf8 "const char *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_line_break_utf8
+function computes the offset (in bytes) to the next possible line
+break (see
+.Xr libgrapheme 7 )
+in the UTF-8-encoded string
+.Va str
+of length
+.Va len .
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For non-UTF-8 input data
+.Xr grapheme_next_line_break 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_line_break_utf8
+function returns the offset (in bytes) to the next possible line
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <stdint.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ /* UTF-8 encoded input */
+ char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
+ "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
+ "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
+ "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \\"%s\\"\\n", s);
+
+ /* print each grapheme cluster with byte-length */
+ printf("Grapheme clusters in NUL-delimited input:\\n");
+ for (off = 0; s[off] != '\\0'; off += ret) {
+ ret = grapheme_next_line_break_utf8(s + off, SIZE_MAX);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_line_break_utf8(s + off, len - off);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+}
+.Ed
+.Sh SEE ALSO
+.Xr grapheme_next_line_break 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_line_break_utf8
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF
diff --git a/man/grapheme_next_sentence_break.3 b/man/grapheme_next_sentence_break.3
@@ -1,54 +0,0 @@
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_SENTENCE_BREAK 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_sentence_break
-.Nd determine codepoint-offset to next sentence break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_sentence_break "const uint_least32_t *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_sentence_break
-function computes the offset (in codepoints) to the next sentence
-break (see
-.Xr libgrapheme 7 )
-in the codepoint array
-.Va str
-of length
-.Va len .
-If a sentence begins at
-.Va str
-this offset is equal to the length of said sentence.
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For UTF-8-encoded input data
-.Xr grapheme_next_sentence_break_utf8 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_sentence_break
-function returns the offset (in codepoints) to the next sentence
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh SEE ALSO
-.Xr grapheme_next_sentence_break_utf8 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_sentence_break
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/grapheme_next_sentence_break.sh b/man/grapheme_next_sentence_break.sh
@@ -0,0 +1,56 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_SENTENCE_BREAK 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_sentence_break
+.Nd determine codepoint-offset to next sentence break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_sentence_break "const uint_least32_t *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_sentence_break
+function computes the offset (in codepoints) to the next sentence
+break (see
+.Xr libgrapheme 7 )
+in the codepoint array
+.Va str
+of length
+.Va len .
+If a sentence begins at
+.Va str
+this offset is equal to the length of said sentence.
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For UTF-8-encoded input data
+.Xr grapheme_next_sentence_break_utf8 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_sentence_break
+function returns the offset (in codepoints) to the next sentence
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh SEE ALSO
+.Xr grapheme_next_sentence_break_utf8 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_sentence_break
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF
diff --git a/man/grapheme_next_sentence_break_utf8.3 b/man/grapheme_next_sentence_break_utf8.3
@@ -1,92 +0,0 @@
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_SENTENCE_BREAK_UTF8 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_sentence_break_utf8
-.Nd determine byte-offset to next sentence break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_sentence_break_utf8 "const char *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_sentence_break_utf8
-function computes the offset (in bytes) to the next sentence
-break (see
-.Xr libgrapheme 7 )
-in the UTF-8-encoded string
-.Va str
-of length
-.Va len .
-If a sentence begins at
-.Va str
-this offset is equal to the length of said sentence.
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For non-UTF-8 input data
-.Xr grapheme_next_sentence_break 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_sentence_break_utf8
-function returns the offset (in bytes) to the next sentence
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <stdint.h>
-#include <stdio.h>
-
-int
-main(void)
-{
- /* UTF-8 encoded input */
- char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
- "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
- "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
- "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
- size_t ret, len, off;
-
- printf("Input: \\"%s\\"\\n", s);
-
- /* print each grapheme cluster with byte-length */
- printf("Grapheme clusters in NUL-delimited input:\\n");
- for (off = 0; s[off] != '\\0'; off += ret) {
- ret = grapheme_next_sentence_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
- printf("\\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_sentence_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
-}
-.Ed
-.Sh SEE ALSO
-.Xr grapheme_next_sentence_break 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_sentence_break_utf8
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/grapheme_next_sentence_break_utf8.sh b/man/grapheme_next_sentence_break_utf8.sh
@@ -0,0 +1,94 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_SENTENCE_BREAK_UTF8 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_sentence_break_utf8
+.Nd determine byte-offset to next sentence break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_sentence_break_utf8 "const char *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_sentence_break_utf8
+function computes the offset (in bytes) to the next sentence
+break (see
+.Xr libgrapheme 7 )
+in the UTF-8-encoded string
+.Va str
+of length
+.Va len .
+If a sentence begins at
+.Va str
+this offset is equal to the length of said sentence.
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For non-UTF-8 input data
+.Xr grapheme_next_sentence_break 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_sentence_break_utf8
+function returns the offset (in bytes) to the next sentence
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <stdint.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ /* UTF-8 encoded input */
+ char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
+ "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
+ "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
+ "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \\"%s\\"\\n", s);
+
+ /* print each grapheme cluster with byte-length */
+ printf("Grapheme clusters in NUL-delimited input:\\n");
+ for (off = 0; s[off] != '\\0'; off += ret) {
+ ret = grapheme_next_sentence_break_utf8(s + off, SIZE_MAX);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_sentence_break_utf8(s + off, len - off);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+}
+.Ed
+.Sh SEE ALSO
+.Xr grapheme_next_sentence_break 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_sentence_break_utf8
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF
diff --git a/man/grapheme_next_word_break.3 b/man/grapheme_next_word_break.3
@@ -1,54 +0,0 @@
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_WORD_BREAK 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_word_break
-.Nd determine codepoint-offset to next word break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_word_break "const uint_least32_t *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_word_break
-function computes the offset (in codepoints) to the next word
-break (see
-.Xr libgrapheme 7 )
-in the codepoint array
-.Va str
-of length
-.Va len .
-If a word begins at
-.Va str
-this offset is equal to the length of said word.
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For UTF-8-encoded input data
-.Xr grapheme_next_word_break_utf8 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_word_break
-function returns the offset (in codepoints) to the next word
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh SEE ALSO
-.Xr grapheme_next_word_break_utf8 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_word_break
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/grapheme_next_word_break.sh b/man/grapheme_next_word_break.sh
@@ -0,0 +1,56 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_WORD_BREAK 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_word_break
+.Nd determine codepoint-offset to next word break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_word_break "const uint_least32_t *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_word_break
+function computes the offset (in codepoints) to the next word
+break (see
+.Xr libgrapheme 7 )
+in the codepoint array
+.Va str
+of length
+.Va len .
+If a word begins at
+.Va str
+this offset is equal to the length of said word.
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For UTF-8-encoded input data
+.Xr grapheme_next_word_break_utf8 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_word_break
+function returns the offset (in codepoints) to the next word
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh SEE ALSO
+.Xr grapheme_next_word_break_utf8 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_word_break
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF
diff --git a/man/grapheme_next_word_break_utf8.3 b/man/grapheme_next_word_break_utf8.3
@@ -1,92 +0,0 @@
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_WORD_BREAK_UTF8 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_word_break_utf8
-.Nd determine byte-offset to next word break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_word_break_utf8 "const char *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_word_break_utf8
-function computes the offset (in bytes) to the next word
-break (see
-.Xr libgrapheme 7 )
-in the UTF-8-encoded string
-.Va str
-of length
-.Va len .
-If a word begins at
-.Va str
-this offset is equal to the length of said word.
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For non-UTF-8 input data
-.Xr grapheme_next_word_break 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_word_break_utf8
-function returns the offset (in bytes) to the next word
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <stdint.h>
-#include <stdio.h>
-
-int
-main(void)
-{
- /* UTF-8 encoded input */
- char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
- "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
- "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
- "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
- size_t ret, len, off;
-
- printf("Input: \\"%s\\"\\n", s);
-
- /* print each grapheme cluster with byte-length */
- printf("Grapheme clusters in NUL-delimited input:\\n");
- for (off = 0; s[off] != '\\0'; off += ret) {
- ret = grapheme_next_word_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
- printf("\\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_word_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
-}
-.Ed
-.Sh SEE ALSO
-.Xr grapheme_next_word_break 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_word_break_utf8
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/grapheme_next_word_break_utf8.sh b/man/grapheme_next_word_break_utf8.sh
@@ -0,0 +1,94 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_WORD_BREAK_UTF8 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_word_break_utf8
+.Nd determine byte-offset to next word break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_word_break_utf8 "const char *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_word_break_utf8
+function computes the offset (in bytes) to the next word
+break (see
+.Xr libgrapheme 7 )
+in the UTF-8-encoded string
+.Va str
+of length
+.Va len .
+If a word begins at
+.Va str
+this offset is equal to the length of said word.
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For non-UTF-8 input data
+.Xr grapheme_next_word_break 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_word_break_utf8
+function returns the offset (in bytes) to the next word
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <stdint.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ /* UTF-8 encoded input */
+ char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
+ "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
+ "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
+ "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \\"%s\\"\\n", s);
+
+ /* print each grapheme cluster with byte-length */
+ printf("Grapheme clusters in NUL-delimited input:\\n");
+ for (off = 0; s[off] != '\\0'; off += ret) {
+ ret = grapheme_next_word_break_utf8(s + off, SIZE_MAX);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_word_break_utf8(s + off, len - off);
+ printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+}
+.Ed
+.Sh SEE ALSO
+.Xr grapheme_next_word_break 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_word_break_utf8
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF
diff --git a/man/grapheme_to_lowercase.sh b/man/grapheme_to_lowercase.sh
@@ -0,0 +1,2 @@
+CASE="lowercase"\
+ $SH man/template/to_case.sh
diff --git a/man/grapheme_to_titlecase.sh b/man/grapheme_to_titlecase.sh
@@ -0,0 +1,2 @@
+CASE="titlecase"\
+ $SH man/template/to_case.sh
diff --git a/man/grapheme_to_uppercase.sh b/man/grapheme_to_uppercase.sh
@@ -0,0 +1,2 @@
+CASE="uppercase"\
+ $SH man/template/to_case.sh
diff --git a/man/libgrapheme.7 b/man/libgrapheme.7
@@ -1,147 +0,0 @@
-.Dd 2022-08-26
-.Dt LIBGRAPHEME 7
-.Os suckless.org
-.Sh NAME
-.Nm libgrapheme
-.Nd unicode string library
-.Sh SYNOPSIS
-.In grapheme.h
-.Sh DESCRIPTION
-The
-.Nm
-library provides functions to properly handle Unicode strings according
-to the Unicode specification.
-Unicode strings are made up of user-perceived characters (so-called
-.Dq grapheme clusters ,
-see
-.Sx MOTIVATION )
-that are made up of one or more Unicode codepoints, which in turn
-are encoded in one or more bytes in an encoding like UTF-8.
-.Pp
-There is a widespread misconception that it was enough to simply
-determine codepoints in a string and treat them as user-perceived
-characters to be Unicode compliant.
-While this may work in some cases, this assumption quickly breaks,
-especially for non-Western languages and decomposed Unicode strings
-where user-perceived characters are usually represented using multiple
-codepoints.
-.Pp
-Despite this complicated multilevel structure of Unicode strings,
-.Nm
-provides methods to work with them at the byte-level (i.e. UTF-8
-.Sq char
-arrays) while also offering codepoint-level methods.
-.Pp
-Every documented function's manual page provides a self-contained
-example illustrating the possible usage.
-.Sh SEE ALSO
-.Xr grapheme_decode_utf8 3 ,
-.Xr grapheme_encode_utf8 3 ,
-.Xr grapheme_is_character_break 3 ,
-.Xr grapheme_next_character_break 3 ,
-.Xr grapheme_next_line_break 3 ,
-.Xr grapheme_next_sentence_break 3 ,
-.Xr grapheme_next_word_break 3 ,
-.Xr grapheme_next_character_break_utf8 3 ,
-.Xr grapheme_next_line_break_utf8 3 ,
-.Xr grapheme_next_sentence_break_utf8 3 ,
-.Xr grapheme_next_word_break_utf8 3
-.Sh STANDARDS
-.Nm
-is compliant with the Unicode 14.0.0 specification.
-.Sh MOTIVATION
-The idea behind every character encoding scheme like ASCII or Unicode
-is to express abstract characters (which can be thought of as shapes
-making up a written language). ASCII for instance, which comprises the
-range 0 to 127, assigns the number 65 (0x41) to the abstract character
-.Sq A .
-This number is called a
-.Dq codepoint ,
-and all codepoints of an encoding make up its so-called
-.Dq code space .
-.Pp
-Unicode's code space is much larger, ranging from 0 to 0x10FFFF, but its
-first 128 codepoints are identical to ASCII's. The additional code
-points are needed as Unicode's goal is to express all writing systems
-of the world.
-To give an example, the abstract character
-.Sq \[u00C4]
-is not expressable in ASCII, given no ASCII codepoint has been assigned
-to it.
-It can be expressed in Unicode, though, with the codepoint 196 (0xC4).
-.Pp
-One may assume that this process is straightfoward, but as more and
-more codepoints were assigned to abstract characters, the Unicode
-Consortium (that defines the Unicode standard) was facing a problem:
-Many (mostly non-European) languages have such a large amount of
-abstract characters that it would exhaust the available Unicode code
-space if one tried to assign a codepoint to each abstract character.
-The solution to that problem is best introduced with an example: Consider
-the abstract character
-.Sq \[u01DE] ,
-which is
-.Sq A
-with an umlaut and a macron added to it.
-In this sense, one can consider
-.Sq \[u01DE]
-as a two-fold modification (namely
-.Dq add umlaut
-and
-.Dq add macron )
-of the
-.Dq base character
-.Sq A .
-.Pp
-The Unicode Consortium adapted this idea by assigning codepoints to
-modifications.
-For example, the codepoint 0x308 represents adding an umlaut and 0x304
-represents adding a macron, and thus, the codepoint sequence
-.Dq 0x41 0x308 0x304 ,
-namely the base character
-.Sq A
-followed by the umlaut and macron modifiers, represents the abstract
-character
-.Sq \[u01DE] .
-As a side-note, the single codepoint 0x1DE was also assigned to
-.Sq \[u01DE] ,
-which is a good example for the fact that there can be multiple
-representations of a single abstract character in Unicode.
-.Pp
-Expressing a single abstract character with multiple codepoints solved
-the code space exhaustion-problem, and the concept has been greatly
-expanded since its first introduction (emojis, joiners, etc.). A sequence
-(which can also have the length 1) of codepoints that belong together
-this way and represents an abstract character is called a
-.Dq grapheme cluster .
-.Pp
-In many applications it is necessary to count the number of
-user-perceived characters, i.e. grapheme clusters, in a string.
-A good example for this is a terminal text editor, which needs to
-properly align characters on a grid.
-This is pretty simple with ASCII-strings, where you just count the number
-of bytes (as each byte is a codepoint and each codepoint is a grapheme
-cluster).
-With Unicode-strings, it is a common mistake to simply adapt the
-ASCII-approach and count the number of code points.
-This is wrong, as, for example, the sequence
-.Dq 0x41 0x308 0x304 ,
-while made up of 3 codepoints, is a single grapheme cluster and
-represents the user-perceived character
-.Sq \[u01DE] .
-.Pp
-The proper way to segment a string into user-perceived characters
-is to segment it into its grapheme clusters by applying the Unicode
-grapheme cluster breaking algorithm (UAX #29).
-It is based on a complex ruleset and lookup-tables and determines if a
-grapheme cluster ends or is continued between two codepoints.
-Libraries like ICU and libunistring, which also offer this functionality,
-are often bloated, not correct, difficult to use or not reasonably
-statically linkable.
-.Pp
-Analogously, the standard provides algorithms to separate strings by
-words, sentences and lines, convert cases and compare strings.
-The motivation behind
-.Nm
-is to make unicode handling suck less and abide by the UNIX philosophy.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/libgrapheme.sh b/man/libgrapheme.sh
@@ -0,0 +1,151 @@
+#!/bin/sh
+
+cat << EOF
+.Dd 2022-08-26
+.Dt LIBGRAPHEME 7
+.Os suckless.org
+.Sh NAME
+.Nm libgrapheme
+.Nd unicode string library
+.Sh SYNOPSIS
+.In grapheme.h
+.Sh DESCRIPTION
+The
+.Nm
+library provides functions to properly handle Unicode strings according
+to the Unicode specification.
+Unicode strings are made up of user-perceived characters (so-called
+.Dq grapheme clusters ,
+see
+.Sx MOTIVATION )
+that are made up of one or more Unicode codepoints, which in turn
+are encoded in one or more bytes in an encoding like UTF-8.
+.Pp
+There is a widespread misconception that it was enough to simply
+determine codepoints in a string and treat them as user-perceived
+characters to be Unicode compliant.
+While this may work in some cases, this assumption quickly breaks,
+especially for non-Western languages and decomposed Unicode strings
+where user-perceived characters are usually represented using multiple
+codepoints.
+.Pp
+Despite this complicated multilevel structure of Unicode strings,
+.Nm
+provides methods to work with them at the byte-level (i.e. UTF-8
+.Sq char
+arrays) while also offering codepoint-level methods.
+.Pp
+Every documented function's manual page provides a self-contained
+example illustrating the possible usage.
+.Sh SEE ALSO
+.Xr grapheme_decode_utf8 3 ,
+.Xr grapheme_encode_utf8 3 ,
+.Xr grapheme_is_character_break 3 ,
+.Xr grapheme_next_character_break 3 ,
+.Xr grapheme_next_character_break_utf8 3 ,
+.Xr grapheme_next_line_break 3 ,
+.Xr grapheme_next_line_break_utf8 3 ,
+.Xr grapheme_next_sentence_break 3 ,
+.Xr grapheme_next_sentence_break_utf8 3 ,
+.Xr grapheme_next_word_break 3 ,
+.Xr grapheme_next_word_break_utf8 3
+.Sh STANDARDS
+.Nm
+is compliant with the Unicode 14.0.0 specification.
+.Sh MOTIVATION
+The idea behind every character encoding scheme like ASCII or Unicode
+is to express abstract characters (which can be thought of as shapes
+making up a written language). ASCII for instance, which comprises the
+range 0 to 127, assigns the number 65 (0x41) to the abstract character
+.Sq A .
+This number is called a
+.Dq codepoint ,
+and all codepoints of an encoding make up its so-called
+.Dq code space .
+.Pp
+Unicode's code space is much larger, ranging from 0 to 0x10FFFF, but its
+first 128 codepoints are identical to ASCII's. The additional code
+points are needed as Unicode's goal is to express all writing systems
+of the world.
+To give an example, the abstract character
+.Sq \[u00C4]
+is not expressable in ASCII, given no ASCII codepoint has been assigned
+to it.
+It can be expressed in Unicode, though, with the codepoint 196 (0xC4).
+.Pp
+One may assume that this process is straightfoward, but as more and
+more codepoints were assigned to abstract characters, the Unicode
+Consortium (that defines the Unicode standard) was facing a problem:
+Many (mostly non-European) languages have such a large amount of
+abstract characters that it would exhaust the available Unicode code
+space if one tried to assign a codepoint to each abstract character.
+The solution to that problem is best introduced with an example: Consider
+the abstract character
+.Sq \[u01DE] ,
+which is
+.Sq A
+with an umlaut and a macron added to it.
+In this sense, one can consider
+.Sq \[u01DE]
+as a two-fold modification (namely
+.Dq add umlaut
+and
+.Dq add macron )
+of the
+.Dq base character
+.Sq A .
+.Pp
+The Unicode Consortium adapted this idea by assigning codepoints to
+modifications.
+For example, the codepoint 0x308 represents adding an umlaut and 0x304
+represents adding a macron, and thus, the codepoint sequence
+.Dq 0x41 0x308 0x304 ,
+namely the base character
+.Sq A
+followed by the umlaut and macron modifiers, represents the abstract
+character
+.Sq \[u01DE] .
+As a side-note, the single codepoint 0x1DE was also assigned to
+.Sq \[u01DE] ,
+which is a good example for the fact that there can be multiple
+representations of a single abstract character in Unicode.
+.Pp
+Expressing a single abstract character with multiple codepoints solved
+the code space exhaustion-problem, and the concept has been greatly
+expanded since its first introduction (emojis, joiners, etc.). A sequence
+(which can also have the length 1) of codepoints that belong together
+this way and represents an abstract character is called a
+.Dq grapheme cluster .
+.Pp
+In many applications it is necessary to count the number of
+user-perceived characters, i.e. grapheme clusters, in a string.
+A good example for this is a terminal text editor, which needs to
+properly align characters on a grid.
+This is pretty simple with ASCII-strings, where you just count the number
+of bytes (as each byte is a codepoint and each codepoint is a grapheme
+cluster).
+With Unicode-strings, it is a common mistake to simply adapt the
+ASCII-approach and count the number of code points.
+This is wrong, as, for example, the sequence
+.Dq 0x41 0x308 0x304 ,
+while made up of 3 codepoints, is a single grapheme cluster and
+represents the user-perceived character
+.Sq \[u01DE] .
+.Pp
+The proper way to segment a string into user-perceived characters
+is to segment it into its grapheme clusters by applying the Unicode
+grapheme cluster breaking algorithm (UAX #29).
+It is based on a complex ruleset and lookup-tables and determines if a
+grapheme cluster ends or is continued between two codepoints.
+Libraries like ICU and libunistring, which also offer this functionality,
+are often bloated, not correct, difficult to use or not reasonably
+statically linkable.
+.Pp
+Analogously, the standard provides algorithms to separate strings by
+words, sentences and lines, convert cases and compare strings.
+The motivation behind
+.Nm
+is to make unicode handling suck less and abide by the UNIX philosophy.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF
diff --git a/man/template/to_case.sh b/man/template/to_case.sh
@@ -0,0 +1,58 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_TO_$(echo $CASE | tr [:lower:] [:upper:]) 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_to_$CASE
+.Nd convert codepoint array to $CASE
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_to_$CASE "const uint_least32_t *src" "size_t srclen" "uint_least32_t *dest" "size_t destlen"
+.Sh DESCRIPTION
+The
+.Fn grapheme_to_$CASE
+function converts the codepoint array
+.Va str
+to $CASE and writes the result to
+.Va dest
+up to
+.Va destlen ,
+unless
+.Va dest
+is set to
+.Dv NULL .
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va src
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For UTF-8-encoded input data
+.Xr grapheme_to_$CASE_utf8 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_to_$CASE
+function returns the number of codepoints in the array resulting
+from converting
+.Va src
+to $CASE, even if
+.Va len
+is not large enough or
+.Va dest
+is
+.Dv NULL .
+.Sh SEE ALSO
+.Xr grapheme_to_$CASE_utf8 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_to_$CASE
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
+EOF