next_break.sh (3397B)
1if [ "$ENCODING" = "utf8" ]; then 2 UNIT="byte" 3 SUFFIX="_utf8" 4 ANTISUFFIX="" 5else 6 UNIT="codepoint" 7 SUFFIX="" 8 ANTISUFFIX="_utf8" 9fi 10 11cat << EOF 12.Dd ${MAN_DATE} 13.Dt GRAPHEME_NEXT_$(printf "%s_break%s" "$TYPE" "$SUFFIX" | tr [:lower:] [:upper:]) 3 14.Os suckless.org 15.Sh NAME 16.Nm grapheme_next_${TYPE}_break${SUFFIX} 17.Nd determine ${UNIT}-offset to next ${REALTYPE} break 18.Sh SYNOPSIS 19.In grapheme.h 20.Ft size_t 21.Fn grapheme_next_${TYPE}_break${SUFFIX} "const $(if [ "$ENCODING" = "utf8" ]; then printf "char"; else printf "uint_least32_t"; fi) *str" "size_t len" 22.Sh DESCRIPTION 23The 24.Fn grapheme_next_${TYPE}_break${SUFFIX} 25function computes the offset (in ${UNIT}s) to the next ${REALTYPE} 26break (see 27.Xr libgrapheme 7 ) 28in the $(if [ "$ENCODING" = "utf8" ]; then printf "UTF-8-encoded string"; else printf "codepoint array"; fi) 29.Va str 30of length 31.Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a ${REALTYPE} begins at 32.Va str 33this offset is equal to the length of said ${REALTYPE}."; fi) 34.Pp 35If 36.Va len 37is set to 38.Dv SIZE_MAX 39(stdint.h is already included by grapheme.h) the string 40.Va str 41is interpreted to be NUL-terminated and processing stops when 42a $(if [ "$ENCODING" = "utf8" ]; then printf "NUL-byte"; else printf "codepoint with the value 0"; fi) is encountered. 43.Pp 44For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else printf "non-UTF-8"; fi) input 45data$(if [ "$TYPE" = "character" ] && [ "$ENCODING" = "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 and"; fi) 46.Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3 47can be used instead. 48.Sh RETURN VALUES 49The 50.Fn grapheme_next_${TYPE}_break${SUFFIX} 51function returns the offset (in ${UNIT}s) to the next ${REALTYPE} 52break in 53.Va str 54or 0 if 55.Va str 56is 57.Dv NULL . 58EOF 59 60if [ "$ENCODING" = "utf8" ]; then 61cat << EOF 62.Sh EXAMPLES 63.Bd -literal 64/* cc (-static) -o example example.c -lgrapheme */ 65#include <grapheme.h> 66#include <stdint.h> 67#include <stdio.h> 68 69int 70main(void) 71{ 72 /* UTF-8 encoded input */ 73 char *s = "T\\\\xC3\\\\xABst \\\\xF0\\\\x9F\\\\x91\\\\xA8\\\\xE2\\\\x80\\\\x8D\\\\xF0" 74 "\\\\x9F\\\\x91\\\\xA9\\\\xE2\\\\x80\\\\x8D\\\\xF0\\\\x9F\\\\x91\\\\xA6 \\\\xF0" 75 "\\\\x9F\\\\x87\\\\xBA\\\\xF0\\\\x9F\\\\x87\\\\xB8 \\\\xE0\\\\xA4\\\\xA8\\\\xE0" 76 "\\\\xA5\\\\x80 \\\\xE0\\\\xAE\\\\xA8\\\\xE0\\\\xAE\\\\xBF!"; 77 size_t ret, len, off; 78 79 printf("Input: \\\\"%s\\\\"\\\\n", s); 80 81 /* print each ${REALTYPE} with byte-length */ 82 printf("${REALTYPE}s in NUL-delimited input:\\\\n"); 83 for (off = 0; s[off] != '\\\\0'; off += ret) { 84 ret = grapheme_next_${TYPE}_break_utf8(s + off, SIZE_MAX); 85 printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off); 86 } 87 printf("\\\\n"); 88 89 /* do the same, but this time string is length-delimited */ 90 len = 17; 91 printf("${REALTYPE}s in input delimited to %zu bytes:\\\\n", len); 92 for (off = 0; off < len; off += ret) { 93 ret = grapheme_next_${TYPE}_break_utf8(s + off, len - off); 94 printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off); 95 } 96 97 return 0; 98} 99.Ed 100EOF 101fi 102 103cat << EOF 104.Sh SEE ALSO$(if [ "$TYPE" = "character" ] && [ "$ENCODING" != "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 ,"; fi) 105.Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3 , 106.Xr libgrapheme 7 107.Sh STANDARDS 108.Fn grapheme_next_${TYPE}_break${SUFFIX} 109is compliant with the Unicode ${UNICODE_VERSION} specification. 110.Sh AUTHORS 111.An Laslo Hunhold Aq Mt dev@frign.de 112EOF