libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

grapheme_decode_utf8.sh (2322B)


      1cat << EOF
      2.Dd ${MAN_DATE}
      3.Dt GRAPHEME_DECODE_UTF8 3
      4.Os suckless.org
      5.Sh NAME
      6.Nm grapheme_decode_utf8
      7.Nd decode first codepoint in UTF-8-encoded string
      8.Sh SYNOPSIS
      9.In grapheme.h
     10.Ft size_t
     11.Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp"
     12.Sh DESCRIPTION
     13The
     14.Fn grapheme_decode_utf8
     15function decodes the first codepoint in the UTF-8-encoded string
     16.Va str
     17of length
     18.Va len .
     19If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
     20string ends unexpectedly, empty string, etc.) the decoding is stopped
     21at the last processed byte and the decoded codepoint set to
     22.Dv GRAPHEME_INVALID_CODEPOINT .
     23.Pp
     24If
     25.Va cp
     26is not
     27.Dv NULL
     28the decoded codepoint is stored in the memory pointed to by
     29.Va cp .
     30.Pp
     31Given NUL has a unique 1 byte representation, it is safe to operate on
     32NUL-terminated strings by setting
     33.Va len
     34to
     35.Dv SIZE_MAX
     36(stdint.h is already included by grapheme.h) and terminating when
     37.Va cp
     38is 0 (see
     39.Sx EXAMPLES
     40for an example).
     41.Sh RETURN VALUES
     42The
     43.Fn grapheme_decode_utf8
     44function returns the number of processed bytes and 0 if
     45.Va str
     46is
     47.Dv NULL
     48or
     49.Va len
     50is 0.
     51If the string ends unexpectedly in a multibyte sequence, the desired
     52length (that is larger than
     53.Va len )
     54is returned.
     55.Sh EXAMPLES
     56.Bd -literal
     57/* cc (-static) -o example example.c -lgrapheme */
     58#include <grapheme.h>
     59#include <inttypes.h>
     60#include <stdio.h>
     61
     62void
     63print_cps(const char *str, size_t len)
     64{
     65	size_t ret, off;
     66	uint_least32_t cp;
     67
     68	for (off = 0; off < len; off += ret) {
     69		if ((ret = grapheme_decode_utf8(str + off,
     70		                                len - off, &cp)) > (len - off)) {
     71			/*
     72			 * string ended unexpectedly in the middle of a
     73			 * multibyte sequence and we have the choice
     74			 * here to possibly expand str by ret - len + off
     75			 * bytes to get a full sequence, but we just
     76			 * bail out in this case.
     77			 */
     78			break;
     79		}
     80		printf("%"PRIxLEAST32"\\\\n", cp);
     81	}
     82}
     83
     84void
     85print_cps_nul_terminated(const char *str)
     86{
     87	size_t ret, off;
     88	uint_least32_t cp;
     89
     90	for (off = 0; (ret = grapheme_decode_utf8(str + off,
     91	                                          SIZE_MAX, &cp)) > 0 &&
     92	     cp != 0; off += ret) {
     93		printf("%"PRIxLEAST32"\\\\n", cp);
     94	}
     95}
     96.Ed
     97.Sh SEE ALSO
     98.Xr grapheme_encode_utf8 3 ,
     99.Xr libgrapheme 7
    100.Sh AUTHORS
    101.An Laslo Hunhold Aq Mt dev@frign.de
    102EOF