From 15ade3d4f156b70da74f599cd2f16dd818e63c41 Mon Sep 17 00:00:00 2001 From: Louis Burda Date: Mon, 13 Mar 2023 21:41:56 +0100 Subject: Reorganize and fix install target --- .gitignore | 5 +- Makefile | 33 +++--- bootstr.c | 325 ---------------------------------------------------- bootstr.h | 19 --- include/bootstr.h | 19 +++ puny.c | 120 ------------------- src/bootstr.c | 325 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/puny.c | 120 +++++++++++++++++++ test/basic.in | 1 - test/basic.out | 1 - test/puny-basic.in | 1 + test/puny-basic.out | 1 + 12 files changed, 485 insertions(+), 485 deletions(-) delete mode 100644 bootstr.c delete mode 100644 bootstr.h create mode 100644 include/bootstr.h delete mode 100644 puny.c create mode 100644 src/bootstr.c create mode 100644 src/puny.c delete mode 100644 test/basic.in delete mode 100644 test/basic.out create mode 100644 test/puny-basic.in create mode 100644 test/puny-basic.out diff --git a/.gitignore b/.gitignore index 3158d4f..378eac2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1 @@ -puny -bootstr -*.o -*.so +build diff --git a/Makefile b/Makefile index a75f410..8aabe31 100644 --- a/Makefile +++ b/Makefile @@ -1,31 +1,34 @@ PREFIX ?= /usr/local -BINDIR ?= /bin LIBDIR ?= /lib +INCLDIR ?= /include -all: libbootstr.so puny +all: build/libbootstr.so build/puny clean: - rm -f puny + rm -rf build -puny: puny.c libbootstr.so - $(CC) -o $@ $(filter %.c,$^) -g -lunistring -L . -lbootstr +build: + mkdir build -test/%.phony: test/%.in test/%.out - @echo "test $*" - test "$(shell cat test/$*.in | ./puny -e)" = "$(shell cat test/$*.out)" - test "$(shell cat test/$*.out | ./puny -d)" = "$(shell cat test/$*.in)" +build/puny: src/puny.c build/libbootstr.so | build + $(CC) -o $@ $< -g -I include -L build -lunistring -lbootstr + +build/libbootstr.so: src/bootstr.c include/bootstr.h | build + $(CC) -o $@ $< -I include -fPIC -shared -lunistring -test: puny test/basic.phony +test/%.phony: test/%.in test/%.out + @echo "> test $*" + test "$(shell cat test/$*.in | ./build/puny -e)" = "$(shell cat test/$*.out)" + test "$(shell cat test/$*.out | ./build/puny -d)" = "$(shell cat test/$*.in)" -libbootstr.so: bootstr.o - $(CC) -o $@ $^ -fPIC -shared -lunistring +test: build/puny test/puny-basic.phony install: - install -m755 libbootstr.so -t "$(DESTDIR)$(PREFIX)$(LIBDIR)" - install -m755 puny -t "$(DESTDIR)$(PREFIX)$(BINDIR)" + install -m644 include/bootstr.h -t "$(DESTDIR)$(PREFIX)$(INCLDIR)" + install -m755 build/libbootstr.so -t "$(DESTDIR)$(PREFIX)$(LIBDIR)" uninstall: + rm -f "$(DESTDIR)$(PREFIX)$(INCLDIR)/bootstr.h" rm -f "$(DESTDIR)$(PREFIX)$(LIBDIR)/libbootstr.so" - rm -f "$(DESTDIR)$(PREFIX)$(BINDIR)/puny" .PHONY: all clean test install uninstall diff --git a/bootstr.c b/bootstr.c deleted file mode 100644 index 97cf693..0000000 --- a/bootstr.c +++ /dev/null @@ -1,325 +0,0 @@ -#include "bootstr.h" - -#include -#include -#include -#include -#include -#include -#include - -#define MIN(a, b) ((a) > (b) ? (b) : (a)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - -static int check_realloc(uint32_t **alloc, size_t reserve, size_t *cap); -static int append_codes(uint32_t **alloc, size_t *len, size_t *cap, - const uint32_t *src, size_t srclen); -static int check_config(const struct bootstr_cfg *cfg); - -static inline size_t -bootstr_adapt(const struct bootstr_cfg *cfg, ssize_t delta, - ssize_t len, bool first) -{ - size_t k; - - delta = first ? delta / cfg->damp : delta / 2; - delta += delta / len; - - k = 0; - while (delta > (cfg->baselen - cfg->tmin) * cfg->tmax / 2) { - delta /= cfg->baselen - cfg->tmin; - k += cfg->baselen; - } - k += (cfg->baselen - cfg->tmin + 1) * delta / (delta + cfg->skew); - - return k; -} - -int -check_realloc(uint32_t **alloc, size_t reserve, size_t *cap) -{ - if (reserve >= *cap) { - if (!*cap) { - *cap = reserve; - } else { - *cap = MAX(*cap * 2, reserve); - } - *alloc = realloc(*alloc, *cap * sizeof(uint32_t)); - if (!*alloc) return errno; - } - - return 0; -} - -int -append_codes(uint32_t **alloc, size_t *len, size_t *cap, - const uint32_t *src, size_t srclen) -{ - int ret; - - ret = check_realloc(alloc, *len + srclen, cap); - if (ret) return ret; - - memcpy(*alloc + *len, src, srclen * sizeof(uint32_t)); - *len += srclen; - - return 0; -} - -int -check_config(const struct bootstr_cfg *cfg) -{ - if (cfg->tmin >= cfg->baselen || cfg->tmin <= 0) - return EINVAL; - - if (cfg->tmax < cfg->tmin) - return EINVAL; - - if (!cfg->delim) - return EINVAL; - - if (!cfg->base || cfg->baselen <= 0) - return EINVAL; - - if (!cfg->damp) - return EINVAL; - - return 0; -} - -int -bootstr_encode_delta(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out, - size_t *outlen, size_t *outcap, ssize_t bias, ssize_t delta) -{ - ssize_t thresh; - ssize_t val; - ssize_t off; - ssize_t ci; - int ret; - - val = delta; - - off = cfg->baselen; - while (1) { - /* final digit must be under threshold */ - thresh = MIN(cfg->tmax, MAX(cfg->tmin, off - bias)); - if (val < thresh) break; - - /* no room for encoding, invalid params */ - if (thresh >= cfg->baselen) - return EINVAL; - - /* encode char according to current base */ - ci = thresh + (val - thresh) % (cfg->baselen - thresh); - val = (val - thresh) / (cfg->baselen - thresh); - if (ci >= cfg->baselen) - return EINVAL; - - ret = append_codes(out, outlen, outcap, &cfg->base[ci], 1); - if (ret) return ret; - - off += cfg->baselen; - } - - ret = append_codes(out, outlen, outcap, &cfg->base[val], 1); - if (ret) return ret; - - return 0; -} - -int -bootstr_encode(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out) -{ - size_t outlen, outcap; - size_t inlen; - ssize_t processed, basiclen; - ssize_t next_code, n; - ssize_t delta, bias; - ssize_t i; - int ret; - - ret = check_config(cfg); - if (ret) return ret; - - outlen = 0; - outcap = 0; - - /* parse out safe character prefix */ - inlen = u32_strlen(in); - for (i = 0; i < inlen; i++) { - if (cfg->is_basic(in[i])) - append_codes(out, &outlen, &outcap, &in[i], 1); - } - processed = outlen; - basiclen = outlen; - - /* if basic prefix avail, add delim */ - if (outlen) { - ret = append_codes(out, &outlen, &outcap, - cfg->delim, u32_strlen(cfg->delim)); - if (ret) return ret; - } - - bias = cfg->initial_bias; - n = cfg->initial_n; - delta = 0; - - /* encode rest of non-basic chars */ - while (processed < inlen) { - next_code = SSIZE_MAX; - for (i = 0; i < inlen; i++) { - if (in[i] >= n && in[i] < next_code) - next_code = in[i]; - } - - /* calc insertions to skip until start of last round: - * (processed + 1) insertions possible per round - * (next_code - n) rounds todo */ - if ((next_code - n) > (SSIZE_MAX - delta) / (processed + 1)) - return EOVERFLOW; - delta += (next_code - n) * (processed + 1); - - /* calculate number of skip to reach code in output at n */ - n = next_code; - for (i = 0; i < inlen; i++) { - /* only consider characters already in output */ - if (in[i] < n || cfg->is_basic(in[i])) { - delta += 1; - if (delta <= 0) - return EOVERFLOW; - } - - /* reached the position of ONE of next_code */ - if (in[i] == n) { - ret = bootstr_encode_delta(cfg, in, out, - &outlen, &outcap, bias, delta); - if (ret) return ret; - bias = bootstr_adapt(cfg, delta, - processed + 1, processed == basiclen); - delta = 0; - processed += 1; - } - } - - delta += 1; - n += 1; - } - - ret = append_codes(out, &outlen, &outcap, U"\x00", 1); - if (ret) return ret; - - return 0; -} - -int -bootstr_decode_delta(const struct bootstr_cfg *cfg, uint32_t *in, - ssize_t *processed, ssize_t bias, ssize_t state, ssize_t *state_new) -{ - ssize_t thresh; - ssize_t digit; - ssize_t mul; - ssize_t off; - uint32_t *tok; - - /* construct integer from digits while accounting - * for possibly different bases per digit */ - - mul = 1; - off = cfg->baselen; - while (1) { - if (!in[*processed]) return EINVAL; - - tok = u32_strchr(cfg->base, in[*processed]); - if (!tok) return EINVAL; - *processed += 1; - - digit = tok - cfg->base; - if (digit > (SSIZE_MAX - state) / mul) - return EOVERFLOW; - state += digit * mul; - - thresh = MIN(cfg->tmax, MAX(cfg->tmin, off - bias)); - if (digit < thresh) break; - - if (mul > SSIZE_MAX / (cfg->baselen - thresh)) - return EOVERFLOW; - mul *= cfg->baselen - thresh; - - off += cfg->baselen; - } - *state_new = state; - - return 0; -} - -int -bootstr_decode(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out) -{ - size_t outlen, outcap; - size_t inlen; - ssize_t basiclen; - ssize_t processed, n; - ssize_t state, state_new, bias; - ssize_t i, len; - int ret; - - ret = check_config(cfg); - if (ret) return ret; - - outlen = 0; - outcap = 0; - - basiclen = 0; - inlen = u32_strlen(in); - - /* find basic prefix delim */ - for (i = 0; i < inlen; i++) { - if (!u32_strcmp(in + i, cfg->delim)) { - basiclen = i; - break; - } - if (!cfg->is_basic(in[i])) - return EINVAL; - } - - /* copy basic prefix to output */ - if (basiclen) - append_codes(out, &outlen, &outcap, in, basiclen); - - n = cfg->initial_n; - bias = cfg->initial_bias; - state = 0; - - /* decode rest of non-basic chars */ - for (processed = basiclen; processed < inlen; ) { - /* decode delta and add to state */ - ret = bootstr_decode_delta(cfg, in, &processed, - bias, state, &state_new); - if (ret) return ret; - - /* use delta to calculate new bias */ - bias = bootstr_adapt(cfg, state_new - state, - outlen + 1, state == 0); - state = state_new; - - /* split up state into rounds and index */ - if (state / (outlen + 1) > (SSIZE_MAX - n)) - return EOVERFLOW; - n += state / (outlen + 1); - state %= outlen + 1; - - /* insert current code */ - ret = check_realloc(out, outlen + 1, &outcap); - if (ret) return ret; - memmove(*out + state + 1, *out + state, - (outlen - state) * sizeof(uint32_t)); - (*out)[state] = n; - state += 1; - outlen += 1; - } - - ret = append_codes(out, &outlen, &outcap, U"\x00", 1); - if (ret) return ret; - - return 0; -} diff --git a/bootstr.h b/bootstr.h deleted file mode 100644 index 8f26a6d..0000000 --- a/bootstr.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include -#include -#include - -struct bootstr_cfg { - const uint32_t *base; - ssize_t baselen; - const uint32_t *delim; - bool (*is_basic)(uint32_t c); - ssize_t tmin, tmax; - ssize_t skew, damp; - ssize_t initial_bias; - ssize_t initial_n; -}; - -int bootstr_encode(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out); -int bootstr_decode(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out); diff --git a/include/bootstr.h b/include/bootstr.h new file mode 100644 index 0000000..8f26a6d --- /dev/null +++ b/include/bootstr.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include +#include + +struct bootstr_cfg { + const uint32_t *base; + ssize_t baselen; + const uint32_t *delim; + bool (*is_basic)(uint32_t c); + ssize_t tmin, tmax; + ssize_t skew, damp; + ssize_t initial_bias; + ssize_t initial_n; +}; + +int bootstr_encode(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out); +int bootstr_decode(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out); diff --git a/puny.c b/puny.c deleted file mode 100644 index 0ea9c29..0000000 --- a/puny.c +++ /dev/null @@ -1,120 +0,0 @@ -#include "bootstr.h" - -#include -#include -#include -#include -#include - -#define CHUNKSIZE 4096 - -bool is_ascii(uint32_t c); - -const struct bootstr_cfg puny_cfg = { - .base = U"abcdefghijklmnopqrstuvwxyz0123456789", - .baselen = 36, - .delim = U"-", - .is_basic = is_ascii, - .tmin = 1, - .tmax = 26, - .skew = 38, - .damp = 700, - .initial_bias = 72, - .initial_n = 128 -}; - -bool -is_ascii(uint32_t c) -{ - return c < 128; -} - -uint8_t * -readall(FILE *file, size_t *len) -{ - ssize_t nread; - size_t cap; - uint8_t *data; - - *len = 0; - cap = CHUNKSIZE + 1; - data = malloc(cap); - if (!data) err(1, "malloc"); - - while (1) { - if (*len + CHUNKSIZE + 1 > cap) { - cap *= 2; - data = realloc(data, cap); - if (!data) err(1, "realloc"); - } - - nread = fread(data + *len, 1, CHUNKSIZE, file); - if (nread <= 0) break; - - *len += nread; - } - - *(data + *len) = '\0'; - - return data; -} - -int -main(int argc, const char **argv) -{ - const char **arg; - uint8_t *in, *out; - uint32_t *u_in, *u_out; - size_t inlen, outlen; - size_t u_inlen, u_outlen; - const char *filepath; - bool encode; - char *tok; - FILE *file; - int ret; - - encode = true; - filepath = NULL; - for (arg = argv + 1; *arg; arg++) { - if (!strcmp(*arg, "-e")) { - encode = true; - } else if (!strcmp(*arg, "-d")) { - encode = false; - } else if (!filepath) { - filepath = *arg; - } else { - errx(1, "unknown arg %s", *arg); - } - } - - out = NULL; - if (filepath) { - file = fopen(filepath, "r"); - if (!file) err(1, "fopen %s", filepath); - in = readall(file, &inlen); - fclose(file); - } else { - in = readall(stdin, &inlen); - } - tok = strchr((char *)in, '\n'); - if (tok) *tok = '\0'; - - u_in = u8_to_u32(in, inlen + 1, NULL, &u_inlen); - u_out = NULL; - - if (encode) { - ret = bootstr_encode(&puny_cfg, u_in, &u_out); - if (ret) errx(1, "encode: %s", strerror(ret)); - } else { - ret = bootstr_decode(&puny_cfg, u_in, &u_out); - if (ret) errx(1, "decode: %s", strerror(ret)); - } - - out = u32_to_u8(u_out, u32_strlen(u_out) + 1, NULL, &outlen); - printf("%s\n", (char *)out); - - free(u_out); - free(u_in); - free(out); - free(in); -} diff --git a/src/bootstr.c b/src/bootstr.c new file mode 100644 index 0000000..97cf693 --- /dev/null +++ b/src/bootstr.c @@ -0,0 +1,325 @@ +#include "bootstr.h" + +#include +#include +#include +#include +#include +#include +#include + +#define MIN(a, b) ((a) > (b) ? (b) : (a)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static int check_realloc(uint32_t **alloc, size_t reserve, size_t *cap); +static int append_codes(uint32_t **alloc, size_t *len, size_t *cap, + const uint32_t *src, size_t srclen); +static int check_config(const struct bootstr_cfg *cfg); + +static inline size_t +bootstr_adapt(const struct bootstr_cfg *cfg, ssize_t delta, + ssize_t len, bool first) +{ + size_t k; + + delta = first ? delta / cfg->damp : delta / 2; + delta += delta / len; + + k = 0; + while (delta > (cfg->baselen - cfg->tmin) * cfg->tmax / 2) { + delta /= cfg->baselen - cfg->tmin; + k += cfg->baselen; + } + k += (cfg->baselen - cfg->tmin + 1) * delta / (delta + cfg->skew); + + return k; +} + +int +check_realloc(uint32_t **alloc, size_t reserve, size_t *cap) +{ + if (reserve >= *cap) { + if (!*cap) { + *cap = reserve; + } else { + *cap = MAX(*cap * 2, reserve); + } + *alloc = realloc(*alloc, *cap * sizeof(uint32_t)); + if (!*alloc) return errno; + } + + return 0; +} + +int +append_codes(uint32_t **alloc, size_t *len, size_t *cap, + const uint32_t *src, size_t srclen) +{ + int ret; + + ret = check_realloc(alloc, *len + srclen, cap); + if (ret) return ret; + + memcpy(*alloc + *len, src, srclen * sizeof(uint32_t)); + *len += srclen; + + return 0; +} + +int +check_config(const struct bootstr_cfg *cfg) +{ + if (cfg->tmin >= cfg->baselen || cfg->tmin <= 0) + return EINVAL; + + if (cfg->tmax < cfg->tmin) + return EINVAL; + + if (!cfg->delim) + return EINVAL; + + if (!cfg->base || cfg->baselen <= 0) + return EINVAL; + + if (!cfg->damp) + return EINVAL; + + return 0; +} + +int +bootstr_encode_delta(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out, + size_t *outlen, size_t *outcap, ssize_t bias, ssize_t delta) +{ + ssize_t thresh; + ssize_t val; + ssize_t off; + ssize_t ci; + int ret; + + val = delta; + + off = cfg->baselen; + while (1) { + /* final digit must be under threshold */ + thresh = MIN(cfg->tmax, MAX(cfg->tmin, off - bias)); + if (val < thresh) break; + + /* no room for encoding, invalid params */ + if (thresh >= cfg->baselen) + return EINVAL; + + /* encode char according to current base */ + ci = thresh + (val - thresh) % (cfg->baselen - thresh); + val = (val - thresh) / (cfg->baselen - thresh); + if (ci >= cfg->baselen) + return EINVAL; + + ret = append_codes(out, outlen, outcap, &cfg->base[ci], 1); + if (ret) return ret; + + off += cfg->baselen; + } + + ret = append_codes(out, outlen, outcap, &cfg->base[val], 1); + if (ret) return ret; + + return 0; +} + +int +bootstr_encode(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out) +{ + size_t outlen, outcap; + size_t inlen; + ssize_t processed, basiclen; + ssize_t next_code, n; + ssize_t delta, bias; + ssize_t i; + int ret; + + ret = check_config(cfg); + if (ret) return ret; + + outlen = 0; + outcap = 0; + + /* parse out safe character prefix */ + inlen = u32_strlen(in); + for (i = 0; i < inlen; i++) { + if (cfg->is_basic(in[i])) + append_codes(out, &outlen, &outcap, &in[i], 1); + } + processed = outlen; + basiclen = outlen; + + /* if basic prefix avail, add delim */ + if (outlen) { + ret = append_codes(out, &outlen, &outcap, + cfg->delim, u32_strlen(cfg->delim)); + if (ret) return ret; + } + + bias = cfg->initial_bias; + n = cfg->initial_n; + delta = 0; + + /* encode rest of non-basic chars */ + while (processed < inlen) { + next_code = SSIZE_MAX; + for (i = 0; i < inlen; i++) { + if (in[i] >= n && in[i] < next_code) + next_code = in[i]; + } + + /* calc insertions to skip until start of last round: + * (processed + 1) insertions possible per round + * (next_code - n) rounds todo */ + if ((next_code - n) > (SSIZE_MAX - delta) / (processed + 1)) + return EOVERFLOW; + delta += (next_code - n) * (processed + 1); + + /* calculate number of skip to reach code in output at n */ + n = next_code; + for (i = 0; i < inlen; i++) { + /* only consider characters already in output */ + if (in[i] < n || cfg->is_basic(in[i])) { + delta += 1; + if (delta <= 0) + return EOVERFLOW; + } + + /* reached the position of ONE of next_code */ + if (in[i] == n) { + ret = bootstr_encode_delta(cfg, in, out, + &outlen, &outcap, bias, delta); + if (ret) return ret; + bias = bootstr_adapt(cfg, delta, + processed + 1, processed == basiclen); + delta = 0; + processed += 1; + } + } + + delta += 1; + n += 1; + } + + ret = append_codes(out, &outlen, &outcap, U"\x00", 1); + if (ret) return ret; + + return 0; +} + +int +bootstr_decode_delta(const struct bootstr_cfg *cfg, uint32_t *in, + ssize_t *processed, ssize_t bias, ssize_t state, ssize_t *state_new) +{ + ssize_t thresh; + ssize_t digit; + ssize_t mul; + ssize_t off; + uint32_t *tok; + + /* construct integer from digits while accounting + * for possibly different bases per digit */ + + mul = 1; + off = cfg->baselen; + while (1) { + if (!in[*processed]) return EINVAL; + + tok = u32_strchr(cfg->base, in[*processed]); + if (!tok) return EINVAL; + *processed += 1; + + digit = tok - cfg->base; + if (digit > (SSIZE_MAX - state) / mul) + return EOVERFLOW; + state += digit * mul; + + thresh = MIN(cfg->tmax, MAX(cfg->tmin, off - bias)); + if (digit < thresh) break; + + if (mul > SSIZE_MAX / (cfg->baselen - thresh)) + return EOVERFLOW; + mul *= cfg->baselen - thresh; + + off += cfg->baselen; + } + *state_new = state; + + return 0; +} + +int +bootstr_decode(const struct bootstr_cfg *cfg, uint32_t *in, uint32_t **out) +{ + size_t outlen, outcap; + size_t inlen; + ssize_t basiclen; + ssize_t processed, n; + ssize_t state, state_new, bias; + ssize_t i, len; + int ret; + + ret = check_config(cfg); + if (ret) return ret; + + outlen = 0; + outcap = 0; + + basiclen = 0; + inlen = u32_strlen(in); + + /* find basic prefix delim */ + for (i = 0; i < inlen; i++) { + if (!u32_strcmp(in + i, cfg->delim)) { + basiclen = i; + break; + } + if (!cfg->is_basic(in[i])) + return EINVAL; + } + + /* copy basic prefix to output */ + if (basiclen) + append_codes(out, &outlen, &outcap, in, basiclen); + + n = cfg->initial_n; + bias = cfg->initial_bias; + state = 0; + + /* decode rest of non-basic chars */ + for (processed = basiclen; processed < inlen; ) { + /* decode delta and add to state */ + ret = bootstr_decode_delta(cfg, in, &processed, + bias, state, &state_new); + if (ret) return ret; + + /* use delta to calculate new bias */ + bias = bootstr_adapt(cfg, state_new - state, + outlen + 1, state == 0); + state = state_new; + + /* split up state into rounds and index */ + if (state / (outlen + 1) > (SSIZE_MAX - n)) + return EOVERFLOW; + n += state / (outlen + 1); + state %= outlen + 1; + + /* insert current code */ + ret = check_realloc(out, outlen + 1, &outcap); + if (ret) return ret; + memmove(*out + state + 1, *out + state, + (outlen - state) * sizeof(uint32_t)); + (*out)[state] = n; + state += 1; + outlen += 1; + } + + ret = append_codes(out, &outlen, &outcap, U"\x00", 1); + if (ret) return ret; + + return 0; +} diff --git a/src/puny.c b/src/puny.c new file mode 100644 index 0000000..0ea9c29 --- /dev/null +++ b/src/puny.c @@ -0,0 +1,120 @@ +#include "bootstr.h" + +#include +#include +#include +#include +#include + +#define CHUNKSIZE 4096 + +bool is_ascii(uint32_t c); + +const struct bootstr_cfg puny_cfg = { + .base = U"abcdefghijklmnopqrstuvwxyz0123456789", + .baselen = 36, + .delim = U"-", + .is_basic = is_ascii, + .tmin = 1, + .tmax = 26, + .skew = 38, + .damp = 700, + .initial_bias = 72, + .initial_n = 128 +}; + +bool +is_ascii(uint32_t c) +{ + return c < 128; +} + +uint8_t * +readall(FILE *file, size_t *len) +{ + ssize_t nread; + size_t cap; + uint8_t *data; + + *len = 0; + cap = CHUNKSIZE + 1; + data = malloc(cap); + if (!data) err(1, "malloc"); + + while (1) { + if (*len + CHUNKSIZE + 1 > cap) { + cap *= 2; + data = realloc(data, cap); + if (!data) err(1, "realloc"); + } + + nread = fread(data + *len, 1, CHUNKSIZE, file); + if (nread <= 0) break; + + *len += nread; + } + + *(data + *len) = '\0'; + + return data; +} + +int +main(int argc, const char **argv) +{ + const char **arg; + uint8_t *in, *out; + uint32_t *u_in, *u_out; + size_t inlen, outlen; + size_t u_inlen, u_outlen; + const char *filepath; + bool encode; + char *tok; + FILE *file; + int ret; + + encode = true; + filepath = NULL; + for (arg = argv + 1; *arg; arg++) { + if (!strcmp(*arg, "-e")) { + encode = true; + } else if (!strcmp(*arg, "-d")) { + encode = false; + } else if (!filepath) { + filepath = *arg; + } else { + errx(1, "unknown arg %s", *arg); + } + } + + out = NULL; + if (filepath) { + file = fopen(filepath, "r"); + if (!file) err(1, "fopen %s", filepath); + in = readall(file, &inlen); + fclose(file); + } else { + in = readall(stdin, &inlen); + } + tok = strchr((char *)in, '\n'); + if (tok) *tok = '\0'; + + u_in = u8_to_u32(in, inlen + 1, NULL, &u_inlen); + u_out = NULL; + + if (encode) { + ret = bootstr_encode(&puny_cfg, u_in, &u_out); + if (ret) errx(1, "encode: %s", strerror(ret)); + } else { + ret = bootstr_decode(&puny_cfg, u_in, &u_out); + if (ret) errx(1, "decode: %s", strerror(ret)); + } + + out = u32_to_u8(u_out, u32_strlen(u_out) + 1, NULL, &outlen); + printf("%s\n", (char *)out); + + free(u_out); + free(u_in); + free(out); + free(in); +} diff --git a/test/basic.in b/test/basic.in deleted file mode 100644 index 71f4119..0000000 --- a/test/basic.in +++ /dev/null @@ -1 +0,0 @@ -他們爲什麽不說中文 \ No newline at end of file diff --git a/test/basic.out b/test/basic.out deleted file mode 100644 index 7e12b07..0000000 --- a/test/basic.out +++ /dev/null @@ -1 +0,0 @@ -ihqwctvzc91f659drss3x8bo0yb \ No newline at end of file diff --git a/test/puny-basic.in b/test/puny-basic.in new file mode 100644 index 0000000..71f4119 --- /dev/null +++ b/test/puny-basic.in @@ -0,0 +1 @@ +他們爲什麽不說中文 \ No newline at end of file diff --git a/test/puny-basic.out b/test/puny-basic.out new file mode 100644 index 0000000..7e12b07 --- /dev/null +++ b/test/puny-basic.out @@ -0,0 +1 @@ +ihqwctvzc91f659drss3x8bo0yb \ No newline at end of file -- cgit v1.2.3-71-gd317