libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

commit 6e6c538e4efb4d191a2f0391466556eb758d76bd
parent c58eb2ceb965785dbe9fecb688e10250d17aeca7
Author: Laslo Hunhold <dev@frign.de>
Date:   Mon, 29 Aug 2022 10:31:54 +0200

Use parameter expansion of variables within heredoc

Thanks to Quentin Rameau and Thomas Oltmann for remarking that the
workaround $(printf $VARIABLE) was not one of my brightest ideas, given
you can just use ${VARIABLE} instead.

Additionally, make use of an $ANTISUFFIX variable in the
next_break-template.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
Mman/grapheme_decode_utf8.sh | 2+-
Mman/grapheme_encode_utf8.sh | 2+-
Mman/grapheme_is_character_break.sh | 4++--
Mman/libgrapheme.sh | 4++--
Mman/template/next_break.sh | 40+++++++++++++++++++++-------------------
Mman/template/to_case.sh | 26+++++++++++++-------------
6 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/man/grapheme_decode_utf8.sh b/man/grapheme_decode_utf8.sh @@ -1,5 +1,5 @@ cat << EOF -.Dd $MAN_DATE +.Dd ${MAN_DATE} .Dt GRAPHEME_DECODE_UTF8 3 .Os suckless.org .Sh NAME diff --git a/man/grapheme_encode_utf8.sh b/man/grapheme_encode_utf8.sh @@ -1,5 +1,5 @@ cat << EOF -.Dd $MAN_DATE +.Dd ${MAN_DATE} .Dt GRAPHEME_ENCODE_UTF8 3 .Os suckless.org .Sh NAME diff --git a/man/grapheme_is_character_break.sh b/man/grapheme_is_character_break.sh @@ -1,5 +1,5 @@ cat << EOF -.Dd $MAN_DATE +.Dd ${MAN_DATE} .Dt GRAPHEME_IS_CHARACTER_BREAK 3 .Os suckless.org .Sh NAME @@ -77,7 +77,7 @@ main(void) .Xr libgrapheme 7 .Sh STANDARDS .Fn grapheme_is_character_break -is compliant with the Unicode 14.0.0 specification. +is compliant with the Unicode ${UNICODE_VERSION} specification. .Sh AUTHORS .An Laslo Hunhold Aq Mt dev@frign.de EOF diff --git a/man/libgrapheme.sh b/man/libgrapheme.sh @@ -1,5 +1,5 @@ cat << EOF -.Dd $MAN_DATE +.Dd ${MAN_DATE} .Dt LIBGRAPHEME 7 .Os suckless.org .Sh NAME @@ -55,7 +55,7 @@ example illustrating the possible usage. .Xr grapheme_to_titlecase_utf8 3 .Sh STANDARDS .Nm -is compliant with the Unicode $UNICODE_VERSION specification. +is compliant with the Unicode ${UNICODE_VERSION} specification. .Sh MOTIVATION The idea behind every character encoding scheme like ASCII or Unicode is to express abstract characters (which can be thought of as shapes diff --git a/man/template/next_break.sh b/man/template/next_break.sh @@ -1,34 +1,36 @@ if [ "$ENCODING" = "utf8" ]; then UNIT="byte" SUFFIX="_utf8" + ANTISUFFIX="" else UNIT="codepoint" SUFFIX="" + ANTISUFFIX="_utf8" fi cat << EOF -.Dd $MAN_DATE +.Dd ${MAN_DATE} .Dt GRAPHEME_NEXT_$(printf "%s_break%s" "$TYPE" "$SUFFIX" | tr [:lower:] [:upper:]) 3 .Os suckless.org .Sh NAME -.Nm grapheme_next_$(printf $TYPE)_break$SUFFIX -.Nd determine $UNIT-offset to next $REALTYPE break +.Nm grapheme_next_${TYPE}_break${SUFFIX} +.Nd determine ${UNIT}-offset to next ${REALTYPE} break .Sh SYNOPSIS .In grapheme.h .Ft size_t -.Fn grapheme_next_$(printf $TYPE)_break$SUFFIX "const $(if [ "$ENCODING" = "utf8" ]; then printf "char"; else printf "uint_least32_t"; fi) *str" "size_t len" +.Fn grapheme_next_${TYPE}_break${SUFFIX} "const $(if [ "$ENCODING" = "utf8" ]; then printf "char"; else printf "uint_least32_t"; fi) *str" "size_t len" .Sh DESCRIPTION The -.Fn grapheme_next_$(printf $TYPE)_break$SUFFIX -function computes the offset (in $(printf $UNIT)s) to the next $REALTYPE +.Fn grapheme_next_${TYPE}_break${SUFFIX} +function computes the offset (in ${UNIT}s) to the next ${REALTYPE} break (see .Xr libgrapheme 7 ) in the $(if [ "$ENCODING" = "utf8" ]; then printf "UTF-8-encoded string"; else printf "codepoint array"; fi) .Va str of length -.Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a $REALTYPE begins at +.Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a ${REALTYPE} begins at .Va str -this offset is equal to the length of said $REALTYPE."; fi) +this offset is equal to the length of said ${REALTYPE}."; fi) .Pp If .Va len @@ -41,12 +43,12 @@ NUL-byte is encountered. .Pp For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else printf "non-UTF-8"; fi) input data$(if [ "$TYPE" = "character" ] && [ "$ENCODING" = "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 and"; fi) -.Xr grapheme_next_$(printf $TYPE)_break$(if [ "$ENCODING" != "utf8" ]; then printf "_utf8"; fi) 3 +.Xr grapheme_next_${TYPE}_break${ANTISUFFIX} can be used instead. .Sh RETURN VALUES The -.Fn grapheme_next_$(printf $TYPE)_break$SUFFIX -function returns the offset (in $(printf $UNIT)s) to the next $REALTYPE +.Fn grapheme_next_${TYPE}_break${SUFFIX} +function returns the offset (in ${UNIT}s) to the next ${REALTYPE} break in .Va str or 0 if @@ -76,19 +78,19 @@ main(void) printf("Input: \\\\"%s\\\\"\\\\n", s); - /* print each $REALTYPE with byte-length */ - printf("$(printf "$REALTYPE")s in NUL-delimited input:\\\\n"); + /* print each ${REALTYPE} with byte-length */ + printf("${REALTYPE}s in NUL-delimited input:\\\\n"); for (off = 0; s[off] != '\\\\0'; off += ret) { - ret = grapheme_next_$(printf $TYPE)_break_utf8(s + off, SIZE_MAX); + ret = grapheme_next_${TYPE}_break_utf8(s + off, SIZE_MAX); printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off, ret); } printf("\\\\n"); /* do the same, but this time string is length-delimited */ len = 17; - printf("$(printf "$REALTYPE")s in input delimited to %zu bytes:\\\\n", len); + printf("${REALTYPE}s in input delimited to %zu bytes:\\\\n", len); for (off = 0; off < len; off += ret) { - ret = grapheme_next_$(printf $TYPE)_break_utf8(s + off, len - off); + ret = grapheme_next_${TYPE}_break_utf8(s + off, len - off); printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off, ret); } @@ -100,11 +102,11 @@ fi cat << EOF .Sh SEE ALSO$(if [ "$TYPE" = "character" ] && [ "$ENCODING" != "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 ,"; fi) -.Xr grapheme_next_$(printf $TYPE)_break$(if [ "$ENCODING" != "utf8" ]; then printf "_utf8"; fi) 3 , +.Xr grapheme_next_${TYPE}_break${ANTISUFFIX} .Xr libgrapheme 7 .Sh STANDARDS -.Fn grapheme_next_$(printf $TYPE)_break$SUFFIX -is compliant with the Unicode $UNICODE_VERSION specification. +.Fn grapheme_next_${TYPE}_break${SUFFIX} +is compliant with the Unicode ${UNICODE_VERSION} specification. .Sh AUTHORS .An Laslo Hunhold Aq Mt dev@frign.de EOF diff --git a/man/template/to_case.sh b/man/template/to_case.sh @@ -11,22 +11,22 @@ else fi cat << EOF -.Dd $MAN_DATE +.Dd ${MAN_DATE} .Dt GRAPHEME_TO_$(printf "%s%s" "$CASE" "$SUFFIX" | tr [:lower:] [:upper:]) 3 .Os suckless.org .Sh NAME -.Nm grapheme_to_$CASE$SUFFIX -.Nd convert codepoint array to $CASE +.Nm grapheme_to_${CASE}${SUFFIX} +.Nd convert codepoint array to ${CASE} .Sh SYNOPSIS .In grapheme.h .Ft size_t -.Fn grapheme_to_$CASE$SUFFIX "const $DATATYPE *src" "size_t srclen" "$DATATYPE *dest" "size_t destlen" +.Fn grapheme_to_${CASE}${SUFFIX} "const ${DATATYPE} *src" "size_t srclen" "${DATATYPE} *dest" "size_t destlen" .Sh DESCRIPTION The -.Fn grapheme_to_$CASE$SUFFIX +.Fn grapheme_to_${CASE}${SUFFIX} function converts the $(if [ "$ENCODING" = "utf8" ]; then printf "UTF-8-encoded string"; else printf "codepoint array"; fi) .Va str -to $CASE and writes the result to +to ${CASE} and writes the result to .Va dest up to .Va destlen , @@ -45,26 +45,26 @@ is interpreted to be NUL-terminated and processing stops when a NUL-byte is encountered. .Pp For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else printf "non-UTF-8"; fi) input data -.Xr grapheme_to_$ANTISUFFIX 3 +.Xr grapheme_to_${ANTISUFFIX} 3 can be used instead. .Sh RETURN VALUES The -.Fn grapheme_to_$CASE$SUFFIX -function returns the number of $(printf $UNIT)s in the array resulting +.Fn grapheme_to_${CASE}${SUFFIX} +function returns the number of ${UNIT}s in the array resulting from converting .Va src -to $CASE, even if +to ${CASE}, even if .Va len is not large enough or .Va dest is .Dv NULL . .Sh SEE ALSO -.Xr grapheme_to_$ANTISUFFIX 3 , +.Xr grapheme_to_${ANTISUFFIX} 3 , .Xr libgrapheme 7 .Sh STANDARDS -.Fn grapheme_to_$CASE$SUFFIX -is compliant with the Unicode $UNICODE_VERSION specification. +.Fn grapheme_to_${CASE}${SUFFIX} +is compliant with the Unicode ${UNICODE_VERSION} specification. .Sh AUTHORS .An Laslo Hunhold Aq Mt dev@frign.de EOF