summaryrefslogtreecommitdiffstats
path: root/src/bootstr.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/bootstr.c')
-rw-r--r--src/bootstr.c325
1 files changed, 325 insertions, 0 deletions
diff --git a/src/bootstr.c b/src/bootstr.c
new file mode 100644
index 0000000..97cf693
--- /dev/null
+++ b/src/bootstr.c
@@ -0,0 +1,325 @@
+#include "bootstr.h"
+
+#include <limits.h>
+#include <stdint.h>
+#include <unistr.h>
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define MIN(a, b) ((a) > (b) ? (b) : (a))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+static int check_realloc(uint32_t **alloc, size_t reserve, size_t *cap);
+static int append_codes(uint32_t **alloc, size_t *len, size_t *cap,
+ const uint32_t *src, size_t srclen);
+static int check_config(const struct bootstr_cfg *cfg);
+
+static inline size_t
+bootstr_adapt(const struct bootstr_cfg *cfg, ssize_t delta,
+ ssize_t len, bool first)
+{
+ size_t k;
+
+ delta = first ? delta / cfg->damp : delta / 2;
+ delta += delta / len;
+
+ k = 0;
+ while (delta > (cfg->baselen - cfg->tmin) * cfg->tmax / 2) {
+ delta /= cfg->baselen - cfg->tmin;
+ k += cfg->baselen;
+ }
+ k += (cfg->baselen - cfg->tmin + 1) * delta / (delta + cfg->skew);
+
+ return k;
+}
+
+int
+check_realloc(uint32_t **alloc, size_t reserve, size_t *cap)
+{
+ if (reserve >= *cap) {
+ if (!*cap) {
+ *cap = reserve;
+ } else {
+ *cap = MAX(*cap * 2, reserve);
+ }
+ *alloc = realloc(*alloc, *cap * sizeof(uint32_t));
+ if (!*alloc) return errno;
+ }
+
+ return 0;
+}
+
+int
+append_codes(uint32_t **alloc, size_t *len, size_t *cap,
+ const uint32_t *src, size_t srclen)
+{
+ int ret;
+
+ ret = check_realloc(alloc, *len + srclen, cap);
+ if (ret) return ret;
+
+ memcpy(*alloc + *len, src, srclen * sizeof(uint32_t));
+ *len += srclen;
+
+ return 0;
+}
+
+int
+check_config(const struct bootstr_cfg *cfg)
+{
+ if (cfg->tmin >= cfg->baselen || cfg->tmin <= 0)
+ return EINVAL;
+
+ if (cfg->tmax < cfg->tmin)
+ return EINVAL;
+
+ if (!cfg->delim)
+ return EINVAL;
+
+ if (!cfg->base || cfg->baselen <= 0)
+ return EINVAL;
+
+ if (!cfg->damp)
+ return EINVAL;
+
+ return 0;
+}
+
+int
+bootstr_encode_delta(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out,
+ size_t *outlen, size_t *outcap, ssize_t bias, ssize_t delta)
+{
+ ssize_t thresh;
+ ssize_t val;
+ ssize_t off;
+ ssize_t ci;
+ int ret;
+
+ val = delta;
+
+ off = cfg->baselen;
+ while (1) {
+ /* final digit must be under threshold */
+ thresh = MIN(cfg->tmax, MAX(cfg->tmin, off - bias));
+ if (val < thresh) break;
+
+ /* no room for encoding, invalid params */
+ if (thresh >= cfg->baselen)
+ return EINVAL;
+
+ /* encode char according to current base */
+ ci = thresh + (val - thresh) % (cfg->baselen - thresh);
+ val = (val - thresh) / (cfg->baselen - thresh);
+ if (ci >= cfg->baselen)
+ return EINVAL;
+
+ ret = append_codes(out, outlen, outcap, &cfg->base[ci], 1);
+ if (ret) return ret;
+
+ off += cfg->baselen;
+ }
+
+ ret = append_codes(out, outlen, outcap, &cfg->base[val], 1);
+ if (ret) return ret;
+
+ return 0;
+}
+
+int
+bootstr_encode(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out)
+{
+ size_t outlen, outcap;
+ size_t inlen;
+ ssize_t processed, basiclen;
+ ssize_t next_code, n;
+ ssize_t delta, bias;
+ ssize_t i;
+ int ret;
+
+ ret = check_config(cfg);
+ if (ret) return ret;
+
+ outlen = 0;
+ outcap = 0;
+
+ /* parse out safe character prefix */
+ inlen = u32_strlen(in);
+ for (i = 0; i < inlen; i++) {
+ if (cfg->is_basic(in[i]))
+ append_codes(out, &outlen, &outcap, &in[i], 1);
+ }
+ processed = outlen;
+ basiclen = outlen;
+
+ /* if basic prefix avail, add delim */
+ if (outlen) {
+ ret = append_codes(out, &outlen, &outcap,
+ cfg->delim, u32_strlen(cfg->delim));
+ if (ret) return ret;
+ }
+
+ bias = cfg->initial_bias;
+ n = cfg->initial_n;
+ delta = 0;
+
+ /* encode rest of non-basic chars */
+ while (processed < inlen) {
+ next_code = SSIZE_MAX;
+ for (i = 0; i < inlen; i++) {
+ if (in[i] >= n && in[i] < next_code)
+ next_code = in[i];
+ }
+
+ /* calc insertions to skip until start of last round:
+ * (processed + 1) insertions possible per round
+ * (next_code - n) rounds todo */
+ if ((next_code - n) > (SSIZE_MAX - delta) / (processed + 1))
+ return EOVERFLOW;
+ delta += (next_code - n) * (processed + 1);
+
+ /* calculate number of skip to reach code in output at n */
+ n = next_code;
+ for (i = 0; i < inlen; i++) {
+ /* only consider characters already in output */
+ if (in[i] < n || cfg->is_basic(in[i])) {
+ delta += 1;
+ if (delta <= 0)
+ return EOVERFLOW;
+ }
+
+ /* reached the position of ONE of next_code */
+ if (in[i] == n) {
+ ret = bootstr_encode_delta(cfg, in, out,
+ &outlen, &outcap, bias, delta);
+ if (ret) return ret;
+ bias = bootstr_adapt(cfg, delta,
+ processed + 1, processed == basiclen);
+ delta = 0;
+ processed += 1;
+ }
+ }
+
+ delta += 1;
+ n += 1;
+ }
+
+ ret = append_codes(out, &outlen, &outcap, U"\x00", 1);
+ if (ret) return ret;
+
+ return 0;
+}
+
+int
+bootstr_decode_delta(const struct bootstr_cfg *cfg, uint32_t *in,
+ ssize_t *processed, ssize_t bias, ssize_t state, ssize_t *state_new)
+{
+ ssize_t thresh;
+ ssize_t digit;
+ ssize_t mul;
+ ssize_t off;
+ uint32_t *tok;
+
+ /* construct integer from digits while accounting
+ * for possibly different bases per digit */
+
+ mul = 1;
+ off = cfg->baselen;
+ while (1) {
+ if (!in[*processed]) return EINVAL;
+
+ tok = u32_strchr(cfg->base, in[*processed]);
+ if (!tok) return EINVAL;
+ *processed += 1;
+
+ digit = tok - cfg->base;
+ if (digit > (SSIZE_MAX - state) / mul)
+ return EOVERFLOW;
+ state += digit * mul;
+
+ thresh = MIN(cfg->tmax, MAX(cfg->tmin, off - bias));
+ if (digit < thresh) break;
+
+ if (mul > SSIZE_MAX / (cfg->baselen - thresh))
+ return EOVERFLOW;
+ mul *= cfg->baselen - thresh;
+
+ off += cfg->baselen;
+ }
+ *state_new = state;
+
+ return 0;
+}
+
+int
+bootstr_decode(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out)
+{
+ size_t outlen, outcap;
+ size_t inlen;
+ ssize_t basiclen;
+ ssize_t processed, n;
+ ssize_t state, state_new, bias;
+ ssize_t i, len;
+ int ret;
+
+ ret = check_config(cfg);
+ if (ret) return ret;
+
+ outlen = 0;
+ outcap = 0;
+
+ basiclen = 0;
+ inlen = u32_strlen(in);
+
+ /* find basic prefix delim */
+ for (i = 0; i < inlen; i++) {
+ if (!u32_strcmp(in + i, cfg->delim)) {
+ basiclen = i;
+ break;
+ }
+ if (!cfg->is_basic(in[i]))
+ return EINVAL;
+ }
+
+ /* copy basic prefix to output */
+ if (basiclen)
+ append_codes(out, &outlen, &outcap, in, basiclen);
+
+ n = cfg->initial_n;
+ bias = cfg->initial_bias;
+ state = 0;
+
+ /* decode rest of non-basic chars */
+ for (processed = basiclen; processed < inlen; ) {
+ /* decode delta and add to state */
+ ret = bootstr_decode_delta(cfg, in, &processed,
+ bias, state, &state_new);
+ if (ret) return ret;
+
+ /* use delta to calculate new bias */
+ bias = bootstr_adapt(cfg, state_new - state,
+ outlen + 1, state == 0);
+ state = state_new;
+
+ /* split up state into rounds and index */
+ if (state / (outlen + 1) > (SSIZE_MAX - n))
+ return EOVERFLOW;
+ n += state / (outlen + 1);
+ state %= outlen + 1;
+
+ /* insert current code */
+ ret = check_realloc(out, outlen + 1, &outcap);
+ if (ret) return ret;
+ memmove(*out + state + 1, *out + state,
+ (outlen - state) * sizeof(uint32_t));
+ (*out)[state] = n;
+ state += 1;
+ outlen += 1;
+ }
+
+ ret = append_codes(out, &outlen, &outcap, U"\x00", 1);
+ if (ret) return ret;
+
+ return 0;
+}