commit 91d7e5af278c6e4231a8330a2ec6e54440b01465
parent 706b4d4ce7d76eb627aea5c9f8d4da8088c0903b
Author: Laslo Hunhold <dev@frign.de>
Date: Mon, 12 Oct 2020 12:32:27 +0200
Rename grapheme_len() to grapheme_bytelen() and refactor manual
I wasn't happy with the previous name, because it can be a bit confusing,
given there are functions like strlen() which count the number of bytes
in a string and one might assume that grapheme_len() counts the number
of grapheme clusters in a string.
Calling it grapheme_bytelen() clears this confusion up, as one can then
tell that it's about the number of bytes in a grapheme cluster and not
the number of grapheme clusters.
The manual was refactored inspired by the high standards set by the
OpenBSD manuals.
Signed-off-by: Laslo Hunhold <dev@frign.de>
Diffstat:
6 files changed, 64 insertions(+), 48 deletions(-)
diff --git a/Makefile b/Makefile
@@ -12,7 +12,7 @@ GBT_URL = https://www.unicode.org/Public/13.0.0/ucd/auxiliary/GraphemeBreakTest.
GBP = data/gbp.txt
EMO = data/emo.txt
GBT = data/gbt.txt
-MAN3 = man/grapheme_len.3
+MAN3 = man/grapheme_bytelen.3
MAN7 = man/libgrapheme.7
all: libgrapheme.a libgrapheme.so $(BIN)
diff --git a/grapheme.h b/grapheme.h
@@ -8,10 +8,9 @@
#define GRAPHEME_CP_INVALID UINT32_C(0xFFFD)
int grapheme_boundary(uint32_t, uint32_t, int *);
+size_t grapheme_bytelen(const char *);
size_t grapheme_cp_decode(uint32_t *, const uint8_t *, size_t);
size_t grapheme_cp_encode(uint32_t, uint8_t *, size_t);
-size_t grapheme_len(const char *);
-
#endif /* GRAPHEME_H */
diff --git a/man/grapheme_bytelen.3 b/man/grapheme_bytelen.3
@@ -0,0 +1,57 @@
+.Dd 2020-03-26
+.Dt GRAPHEME_BYTELEN 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_bytelen
+.Nd compute grapheme cluster byte-length
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_bytelen "const char *str"
+.Sh DESCRIPTION
+The
+.Fn grapheme_bytelen
+function computes the length (in bytes) of the grapheme cluster
+(see
+.Xr libgrapheme 7 )
+beginning at the NUL-terminated string
+.Va str .
+.Sh RETURN VALUES
+The
+.Fn grapheme_bytelen
+function returns the length (in bytes) of the grapheme cluster beginning
+at
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ char *s = "Tëst 👨\\u200d👩\\u200d👦 🇺🇸 नी நி!";
+ size_t len;
+
+ /* print each grapheme cluster with accompanying byte-length */
+ while (*s != '\\0') {
+ len = grapheme_bytelen(s);
+ printf("%2zu bytes | %.*s\\n", len, (int)len, s, len);
+ s += len;
+ }
+
+ return 0;
+}
+.Ed
+.Sh SEE ALSO
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_bytelen
+is compliant with the Unicode 13.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/grapheme_len.3 b/man/grapheme_len.3
@@ -1,40 +0,0 @@
-.Dd 2020-03-26
-.Dt GRAPHEME_LEN 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_len
-.Nd determine grapheme cluster length
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_len "const char *"
-.Sh DESCRIPTION
-.Nm
-returns the length (in bytes) of the grapheme cluster beginning at
-the provided char-address.
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <stdio.h>
-
-int
-main(void)
-{
- char *s = "Tëst 👨\\u200d👩\\u200d👦 🇺🇸 नी நி!";
- size_t len;
-
- /* print each grapheme cluster with accompanying byte-length */
- while (*s != '\\0') {
- len = grapheme_len(s);
- printf("%2zu bytes | %.*s\\n", len, (int)len, s, len);
- s += len;
- }
-
- return 0;
-}
-.Ed
-.Sh SEE ALSO
-.Xr libgrapheme 7
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt dev@frign.de
diff --git a/man/libgrapheme.7 b/man/libgrapheme.7
@@ -16,9 +16,9 @@ see
.Sx MOTIVATION )
using the Unicode grapheme cluster breaking algorithm (UAX #29).
.Pp
-You can either count the byte-length of the grapheme cluster at the
-beginning of an UTF-8-encoded string (see
-.Xr grapheme_len 3 )
+You can either count the length (in bytes) of the grapheme cluster at
+the beginning of an UTF-8-encoded string (see
+.Xr grapheme_bytelen 3 )
or determine if a grapheme cluster breaks between two Unicode code
points (see
.Xr grapheme_boundary 3 ) ,
@@ -28,9 +28,9 @@ and
.Xr grapheme_cp_encode 3 ) .
.Sh SEE ALSO
.Xr grapheme_boundary 3 ,
+.Xr grapheme_bytelen 3
.Xr grapheme_cp_decode 3 ,
.Xr grapheme_cp_encode 3 ,
-.Xr grapheme_len 3
.Sh STANDARDS
.Nm
is compliant with the Unicode 13.0.0 specification.
diff --git a/src/grapheme.c b/src/grapheme.c
@@ -5,7 +5,7 @@
#include "../grapheme.h"
size_t
-grapheme_len(const char *str)
+grapheme_bytelen(const char *str)
{
uint32_t cp0, cp1;
size_t ret, len = 0;