libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

next_break.sh (3397B)


      1if [ "$ENCODING" = "utf8" ]; then
      2	UNIT="byte"
      3	SUFFIX="_utf8"
      4	ANTISUFFIX=""
      5else
      6	UNIT="codepoint"
      7	SUFFIX=""
      8	ANTISUFFIX="_utf8"
      9fi
     10
     11cat << EOF
     12.Dd ${MAN_DATE}
     13.Dt GRAPHEME_NEXT_$(printf "%s_break%s" "$TYPE" "$SUFFIX" | tr [:lower:] [:upper:]) 3
     14.Os suckless.org
     15.Sh NAME
     16.Nm grapheme_next_${TYPE}_break${SUFFIX}
     17.Nd determine ${UNIT}-offset to next ${REALTYPE} break
     18.Sh SYNOPSIS
     19.In grapheme.h
     20.Ft size_t
     21.Fn grapheme_next_${TYPE}_break${SUFFIX} "const $(if [ "$ENCODING" = "utf8" ]; then printf "char"; else printf "uint_least32_t"; fi) *str" "size_t len"
     22.Sh DESCRIPTION
     23The
     24.Fn grapheme_next_${TYPE}_break${SUFFIX}
     25function computes the offset (in ${UNIT}s) to the next ${REALTYPE}
     26break (see
     27.Xr libgrapheme 7 )
     28in the $(if [ "$ENCODING" = "utf8" ]; then printf "UTF-8-encoded string"; else printf "codepoint array"; fi)
     29.Va str
     30of length
     31.Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a ${REALTYPE} begins at
     32.Va str
     33this offset is equal to the length of said ${REALTYPE}."; fi)
     34.Pp
     35If
     36.Va len
     37is set to
     38.Dv SIZE_MAX
     39(stdint.h is already included by grapheme.h) the string
     40.Va str
     41is interpreted to be NUL-terminated and processing stops when
     42a $(if [ "$ENCODING" = "utf8" ]; then printf "NUL-byte"; else printf "codepoint with the value 0"; fi) is encountered.
     43.Pp
     44For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else printf "non-UTF-8"; fi) input
     45data$(if [ "$TYPE" = "character" ] && [ "$ENCODING" = "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 and"; fi)
     46.Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3
     47can be used instead.
     48.Sh RETURN VALUES
     49The
     50.Fn grapheme_next_${TYPE}_break${SUFFIX}
     51function returns the offset (in ${UNIT}s) to the next ${REALTYPE}
     52break in
     53.Va str
     54or 0 if
     55.Va str
     56is
     57.Dv NULL .
     58EOF
     59
     60if [ "$ENCODING" = "utf8" ]; then
     61cat << EOF
     62.Sh EXAMPLES
     63.Bd -literal
     64/* cc (-static) -o example example.c -lgrapheme */
     65#include <grapheme.h>
     66#include <stdint.h>
     67#include <stdio.h>
     68
     69int
     70main(void)
     71{
     72	/* UTF-8 encoded input */
     73	char *s = "T\\\\xC3\\\\xABst \\\\xF0\\\\x9F\\\\x91\\\\xA8\\\\xE2\\\\x80\\\\x8D\\\\xF0"
     74	          "\\\\x9F\\\\x91\\\\xA9\\\\xE2\\\\x80\\\\x8D\\\\xF0\\\\x9F\\\\x91\\\\xA6 \\\\xF0"
     75	          "\\\\x9F\\\\x87\\\\xBA\\\\xF0\\\\x9F\\\\x87\\\\xB8 \\\\xE0\\\\xA4\\\\xA8\\\\xE0"
     76	          "\\\\xA5\\\\x80 \\\\xE0\\\\xAE\\\\xA8\\\\xE0\\\\xAE\\\\xBF!";
     77	size_t ret, len, off;
     78
     79	printf("Input: \\\\"%s\\\\"\\\\n", s);
     80
     81	/* print each ${REALTYPE} with byte-length */
     82	printf("${REALTYPE}s in NUL-delimited input:\\\\n");
     83	for (off = 0; s[off] != '\\\\0'; off += ret) {
     84		ret = grapheme_next_${TYPE}_break_utf8(s + off, SIZE_MAX);
     85		printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off);
     86	}
     87	printf("\\\\n");
     88
     89	/* do the same, but this time string is length-delimited */
     90	len = 17;
     91	printf("${REALTYPE}s in input delimited to %zu bytes:\\\\n", len);
     92	for (off = 0; off < len; off += ret) {
     93		ret = grapheme_next_${TYPE}_break_utf8(s + off, len - off);
     94		printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off);
     95	}
     96
     97	return 0;
     98}
     99.Ed
    100EOF
    101fi
    102
    103cat << EOF
    104.Sh SEE ALSO$(if [ "$TYPE" = "character" ] && [ "$ENCODING" != "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 ,"; fi)
    105.Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3 ,
    106.Xr libgrapheme 7
    107.Sh STANDARDS
    108.Fn grapheme_next_${TYPE}_break${SUFFIX}
    109is compliant with the Unicode ${UNICODE_VERSION} specification.
    110.Sh AUTHORS
    111.An Laslo Hunhold Aq Mt dev@frign.de
    112EOF