libbootstr-c

C bootstring encoding library
git clone https://git.sinitax.com/sinitax/libbootstr-c
Log | Files | Refs | LICENSE | sfeed.txt

bootstr.c (6965B)


      1#include "bootstr.h"
      2
      3#include <limits.h>
      4#include <stdint.h>
      5#include <unistr.h>
      6#include <errno.h>
      7#include <string.h>
      8#include <stdio.h>
      9#include <stdlib.h>
     10
     11#define MIN(a, b) ((a) > (b) ? (b) : (a))
     12#define MAX(a, b) ((a) > (b) ? (a) : (b))
     13
     14static int bootstr_realloc(uint32_t **alloc, size_t reserve, size_t *cap);
     15static int append_codes(uint32_t **alloc, size_t *len, size_t *cap,
     16	const uint32_t *src, size_t srclen);
     17static int check_config(const struct bootstr_cfg *cfg);
     18
     19static inline size_t
     20bootstr_adapt(const struct bootstr_cfg *cfg, ssize_t delta,
     21	ssize_t len, bool first)
     22{
     23	size_t k;
     24
     25	delta = first ? delta / cfg->damp : delta / 2;
     26	delta += delta / len;
     27
     28	k = 0;
     29	while (delta > (cfg->baselen - cfg->tmin) * cfg->tmax / 2) {
     30		delta /= cfg->baselen - cfg->tmin;
     31		k += cfg->baselen;
     32	}
     33	k += (cfg->baselen - cfg->tmin + 1) * delta / (delta + cfg->skew);
     34
     35	return k;
     36}
     37
     38int
     39bootstr_realloc(uint32_t **alloc, size_t reserve, size_t *cap)
     40{
     41	if (reserve >= *cap) {
     42		if (!*cap) {
     43			*cap = reserve;
     44		} else {
     45			*cap = MAX(*cap * 2, reserve);
     46		}
     47		*alloc = realloc(*alloc, *cap * sizeof(uint32_t));
     48		if (!*alloc) return -errno;
     49	}
     50
     51	return 0;
     52}
     53
     54int
     55append_codes(uint32_t **alloc, size_t *len, size_t *cap,
     56	const uint32_t *src, size_t srclen)
     57{
     58	int ret;
     59
     60	ret = bootstr_realloc(alloc, *len + srclen, cap);
     61	if (ret) return ret;
     62
     63	memcpy(*alloc + *len, src, srclen * sizeof(uint32_t));
     64	*len += srclen;
     65
     66	return 0;
     67}
     68
     69int
     70check_config(const struct bootstr_cfg *cfg)
     71{
     72	if (cfg->tmin >= cfg->baselen || cfg->tmin <= 0)
     73		return -EINVAL;
     74
     75	if (cfg->tmax < cfg->tmin)
     76		return -EINVAL;
     77
     78	if (!cfg->delim)
     79		return -EINVAL;
     80
     81	if (!cfg->base || cfg->baselen <= 0)
     82		return -EINVAL;
     83
     84	if (!cfg->damp)
     85		return -EINVAL;
     86
     87	return 0;
     88}
     89
     90int
     91bootstr_encode_delta(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out,
     92	size_t *outlen, size_t *outcap, ssize_t bias, ssize_t delta)
     93{
     94	ssize_t thresh;
     95	ssize_t val;
     96	ssize_t off;
     97	ssize_t ci;
     98	int ret;
     99
    100	val = delta;
    101
    102	off = cfg->baselen;
    103	while (1) {
    104		/* final digit must be under threshold */
    105		thresh = MIN(cfg->tmax, MAX(cfg->tmin, off - bias));
    106		if (val < thresh) break;
    107
    108		/* no room for encoding, invalid params */
    109		if (thresh >= cfg->baselen)
    110			return -EINVAL;
    111
    112		/* encode char according to current base */
    113		ci = thresh + (val - thresh) % (cfg->baselen - thresh);
    114		val = (val - thresh) / (cfg->baselen - thresh);
    115		if (ci >= cfg->baselen)
    116			return -EINVAL;
    117
    118		ret = append_codes(out, outlen, outcap, &cfg->base[ci], 1);
    119		if (ret) return ret;
    120
    121		off += cfg->baselen;
    122	}
    123
    124	ret = append_codes(out, outlen, outcap, &cfg->base[val], 1);
    125	if (ret) return ret;
    126
    127	return 0;
    128}
    129
    130int
    131bootstr_encode(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out)
    132{
    133	size_t outlen, outcap;
    134	size_t inlen;
    135	ssize_t processed, basiclen;
    136	ssize_t next_code, n;
    137	ssize_t delta, bias;
    138	ssize_t i;
    139	int ret;
    140
    141	ret = check_config(cfg);
    142	if (ret) return ret;
    143
    144	outlen = 0;
    145	outcap = 0;
    146
    147	/* parse out safe character prefix */
    148	inlen = u32_strlen(in);
    149	for (i = 0; i < inlen; i++) {
    150		if (cfg->is_basic(in[i]))
    151			append_codes(out, &outlen, &outcap, &in[i], 1);
    152	}
    153	processed = outlen;
    154	basiclen = outlen;
    155
    156	/* if basic prefix avail, add delim */
    157	if (outlen) {
    158		ret = append_codes(out, &outlen, &outcap,
    159			cfg->delim, u32_strlen(cfg->delim));
    160		if (ret) return ret;
    161	}
    162
    163	bias = cfg->initial_bias;
    164	n = cfg->initial_n;
    165	delta = 0;
    166
    167	/* encode rest of non-basic chars */
    168	while (processed < inlen) {
    169		next_code = SSIZE_MAX;
    170		for (i = 0; i < inlen; i++) {
    171			if (in[i] >= n && in[i] < next_code)
    172				next_code = in[i];
    173		}
    174
    175		/* calc insertions to skip until start of last round:
    176		 * (processed + 1) insertions possible per round
    177		 * (next_code - n) rounds todo */
    178		if ((next_code - n) > (SSIZE_MAX - delta) / (processed + 1))
    179			return -EOVERFLOW;
    180		delta += (next_code - n) * (processed + 1);
    181
    182		/* calculate number of skip to reach code in output at n */
    183		n = next_code;
    184		for (i = 0; i < inlen; i++) {
    185			/* only consider characters already in output */
    186			if (in[i] < n || cfg->is_basic(in[i])) {
    187				delta += 1;
    188				if (delta <= 0)
    189					return -EOVERFLOW;
    190			}
    191
    192			/* reached the position of ONE of next_code */
    193			if (in[i] == n) {
    194				ret = bootstr_encode_delta(cfg, in, out,
    195					&outlen, &outcap, bias, delta);
    196				if (ret) return ret;
    197				bias = bootstr_adapt(cfg, delta,
    198					processed + 1, processed == basiclen);
    199				delta = 0;
    200				processed += 1;
    201			}
    202		}
    203
    204		delta += 1;
    205		n += 1;
    206	}
    207
    208	ret = append_codes(out, &outlen, &outcap, U"\x00", 1);
    209	if (ret) return ret;
    210
    211	return 0;
    212}
    213
    214int
    215bootstr_decode_delta(const struct bootstr_cfg *cfg, uint32_t *in,
    216	ssize_t *processed, ssize_t bias, ssize_t state, ssize_t *state_new)
    217{
    218	ssize_t thresh;
    219	ssize_t digit;
    220	ssize_t mul;
    221	ssize_t off;
    222	uint32_t *tok;
    223
    224	/* construct integer from digits while accounting
    225	 * for possibly different bases per digit */
    226
    227	mul = 1;
    228	off = cfg->baselen;
    229	while (1) {
    230		if (!in[*processed]) return -EINVAL;
    231
    232		tok = u32_strchr(cfg->base, in[*processed]);
    233		if (!tok) return -EINVAL;
    234		*processed += 1;
    235
    236		digit = tok - cfg->base;
    237		if (digit > (SSIZE_MAX - state) / mul)
    238			return -EOVERFLOW;
    239		state += digit * mul;
    240
    241		thresh = MIN(cfg->tmax, MAX(cfg->tmin, off - bias));
    242		if (digit < thresh) break;
    243
    244		if (mul > SSIZE_MAX / (cfg->baselen - thresh))
    245			return -EOVERFLOW;
    246		mul *= cfg->baselen - thresh;
    247
    248		off += cfg->baselen;
    249	}
    250	*state_new = state;
    251
    252	return 0;
    253}
    254
    255int
    256bootstr_decode(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out)
    257{
    258	size_t outlen, outcap;
    259	size_t inlen;
    260	ssize_t basiclen;
    261	ssize_t processed, n;
    262	ssize_t state, state_new, bias;
    263	ssize_t i, len;
    264	int rc;
    265
    266	rc = check_config(cfg);
    267	if (rc) return rc;
    268
    269	outlen = 0;
    270	outcap = 0;
    271
    272	basiclen = 0;
    273	inlen = u32_strlen(in);
    274
    275	/* find basic prefix delim */
    276	for (i = 0; i < inlen; i++) {
    277		if (!u32_strcmp(in + i, cfg->delim)) {
    278			basiclen = i;
    279			break;
    280		}
    281		if (!cfg->is_basic(in[i]))
    282			return -EINVAL;
    283	}
    284
    285	/* copy basic prefix to output */
    286	if (basiclen)
    287		append_codes(out, &outlen, &outcap, in, basiclen);
    288
    289	n = cfg->initial_n;
    290	bias = cfg->initial_bias;
    291	state = 0;
    292
    293	/* decode rest of non-basic chars */
    294	for (processed = basiclen; processed < inlen; ) {
    295		/* decode delta and add to state */
    296		rc = bootstr_decode_delta(cfg, in, &processed,
    297			bias, state, &state_new);
    298		if (rc) return rc;
    299
    300		/* use delta to calculate new bias */
    301		bias = bootstr_adapt(cfg, state_new - state,
    302			outlen + 1, state == 0);
    303		state = state_new;
    304
    305		/* split up state into rounds and index */
    306		if (state / (outlen + 1) > (SSIZE_MAX - n))
    307			return -EOVERFLOW;
    308		n += state / (outlen + 1);
    309		state %= outlen + 1;
    310
    311		/* insert current code */
    312		rc = bootstr_realloc(out, outlen + 1, &outcap);
    313		if (rc) return rc;
    314		memmove(*out + state + 1, *out + state,
    315			(outlen - state) * sizeof(uint32_t));
    316		(*out)[state] = n;
    317		state += 1;
    318		outlen += 1;
    319	}
    320
    321	rc = append_codes(out, &outlen, &outcap, U"\x00", 1);
    322	if (rc) return rc;
    323
    324	return 0;
    325}