libgrapheme

Freestanding C library for unicode string handling
git clone https://git.sinitax.com/suckless/libgrapheme
Log | Files | Refs | README | LICENSE | sfeed.txt

commit f320b0ad8b7b2bc7ab5b63e91379012adbd19d12
parent c0cab63c5300fa12284194fbef57aa2ed62a94c0
Author: Laslo Hunhold <dev@frign.de>
Date:   Thu, 11 May 2023 18:16:09 +0200

Allow level-array to have different size from line length

This may not be apparent at first, but it allows you to only extract
as many levels of a line as you need, e.g. only the first 10. Truncation
is indicated by the return value being larger than levlen.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
Mgrapheme.h | 5+++--
Msrc/bidirectional.c | 24++++++++++++++++--------
Mtest/bidirectional.c | 12+++++++++---
3 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/grapheme.h b/grapheme.h @@ -15,8 +15,9 @@ enum grapheme_bidirectional_override { GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL, }; -void grapheme_bidirectional_get_line_embedding_levels(const uint_least32_t *, - size_t, int_least8_t *); +size_t grapheme_bidirectional_get_line_embedding_levels(const uint_least32_t *, + size_t, int_least8_t *, + size_t); size_t grapheme_bidirectional_preprocess(const uint_least32_t *, size_t, enum grapheme_bidirectional_override, diff --git a/src/bidirectional.c b/src/bidirectional.c @@ -1435,23 +1435,29 @@ grapheme_bidirectional_preprocess_utf8( return preprocess(&r, override, dest, destlen); } -void +size_t grapheme_bidirectional_get_line_embedding_levels(const uint_least32_t *linedata, size_t linelen, - int_least8_t *linelevel) + int_least8_t *lev, + size_t levlen) { enum bidi_property prop; size_t i, runsince; + int_least8_t level; /* rule L1.4 */ runsince = SIZE_MAX; for (i = 0; i < linelen; i++) { + level = (int_least8_t)get_state(STATE_LEVEL, linedata[i]); prop = (uint_least8_t)get_state(STATE_PRESERVED_PROP, linedata[i]); - /* write level into level array */ - if ((linelevel[i] = (int_least8_t)get_state( - STATE_LEVEL, linedata[i])) == -1) { + /* write level into level array if we still have space */ + if (i < levlen) { + lev[i] = level; + } + + if (level == -1) { /* ignored character */ continue; } @@ -1473,11 +1479,13 @@ grapheme_bidirectional_get_line_embedding_levels(const uint_least32_t *linedata, * we hit the end of the line but were in a run; * reset the line levels to the paragraph level */ - for (i = runsince; i < linelen; i++) { - if (linelevel[i] != -1) { - linelevel[i] = (int_least8_t)get_state( + for (i = runsince; i < MIN(linelen, levlen); i++) { + if (lev[i] != -1) { + lev[i] = (int_least8_t)get_state( STATE_PARAGRAPH_LEVEL, linedata[i]); } } } + + return linelen; } diff --git a/test/bidirectional.c b/test/bidirectional.c @@ -14,9 +14,10 @@ main(int argc, char *argv[]) { uint_least32_t data[512]; /* TODO iterate and get max, allocate */ int_least8_t lev[512]; - size_t i, num_tests, failed, datalen, ret, j, m; + size_t i, num_tests, failed, datalen, levlen, ret, j, m; datalen = LEN(data); + levlen = LEN(lev); (void)argc; @@ -30,14 +31,19 @@ main(int argc, char *argv[]) bidirectional_test[i].cp, bidirectional_test[i].cplen, bidirectional_test[i].mode[m], data, datalen); - grapheme_bidirectional_get_line_embedding_levels( - data, ret, lev); if (ret != bidirectional_test[i].cplen || ret > datalen) { goto err; } + ret = grapheme_bidirectional_get_line_embedding_levels( + data, ret, lev, levlen); + + if (ret > levlen) { + goto err; + } + for (j = 0; j < ret; j++) { if (lev[j] != bidirectional_test[i].level[j]) { goto err;