tabular

Flexible input tabulator
git clone https://git.sinitax.com/sinitax/tabular
Log | Files | Refs | Submodules | sfeed.txt

commit 2b1d383ba24ec257bb19667918919d9348e21c71
Author: Louis Burda <quent.burda@gmail.com>
Date:   Fri,  2 Jun 2023 18:36:22 +0200

Add initial version

Diffstat:
A.gitignore | 5+++++
A.gitmodules | 12++++++++++++
Abuild.jst.tmpl | 45+++++++++++++++++++++++++++++++++++++++++++++
Aconfigure | 8++++++++
Alib/liballoc | 1+
Alib/libdvec | 1+
Alib/libhmap | 1+
Alib/libtabular | 1+
Amain.c | 425+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 files changed, 499 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,5 @@ +.cache +.gdb_history +compile_commands.json +build.jst +tabular diff --git a/.gitmodules b/.gitmodules @@ -0,0 +1,12 @@ +[submodule "lib/libtabular"] + path = lib/libtabular + url = git@sinitax.com:sinitax/libtabular +[submodule "lib/libdvec"] + path = lib/libdvec + url = git@sinitax.com:sinitax/libdvec +[submodule "lib/libhmap"] + path = lib/libhmap + url = git@sinitax.com:sinitax/libhmap +[submodule "lib/liballoc"] + path = lib/liballoc + url = git@sinitax.com:sinitax/liballoc diff --git a/build.jst.tmpl b/build.jst.tmpl @@ -0,0 +1,45 @@ +#default PREFIX /usr/local +#default BINDIR /bin +#default CC gcc + +#ifdef DEBUG +#define OPT_CFLAGS -Og -g +#else +#define OPT_CFLAGS -O2 +#endif + +cflags = -I lib/libdvec/include -I lib/libhmap/include + -I lib/libtabular/include -I lib/liballoc/include + -Wunused-variable -Wunused-function -Wconversion + #{OPT_CFLAGS} #{EXTRA_CFLAGS} #{TABULAR_EXTRA_CFLAGS} + +rule cc + #{CC} -o $out $in $cflags + +target lib/libdvec/build/libdvec.a + just lib/libdvec + +target lib/libhmap/build/libhmap.a + just lib/libhmap + +target lib/libtabular/build/libtabular.a + just lib/libtabular + +target lib/liballoc/build/liballoc.a + just lib/liballoc + +target tabular + cc main.c lib/libdvec/build/libdvec.a lib/libhmap/build/libhmap.a + lib/libtabular/build/libtabular.a lib/liballoc/build/liballoc.a + +command clean + rm -f tabular + +command cleanall + just clean + just -C lib/libtabular clean + just -C lib/libhmap clean + just -C lib/libdvec clean + +command all + just tabular diff --git a/configure b/configure @@ -0,0 +1,8 @@ +#!/bin/sh + +tmpl "$@" build.jst.tmpl > build.jst +for lib in ./lib/*; do + pushd $lib + ./configure "$@" + popd +done diff --git a/lib/liballoc b/lib/liballoc @@ -0,0 +1 @@ +Subproject commit 3f388a2659ae2d121322101930d33412815d84e6 diff --git a/lib/libdvec b/lib/libdvec @@ -0,0 +1 @@ +Subproject commit cf90c882f738016cc306799617c367e064883c0b diff --git a/lib/libhmap b/lib/libhmap @@ -0,0 +1 @@ +Subproject commit e86177eb3072c0c755986ff4c8d4c5d0cce72139 diff --git a/lib/libtabular b/lib/libtabular @@ -0,0 +1 @@ +Subproject commit 8c27dfa1a6bcbfee1f40db689f53bcf204a207b8 diff --git a/main.c b/main.c @@ -0,0 +1,425 @@ +#include "tabular.h" +#include "hmap.h" +#include "dvec.h" +#include "allocator.h" + +#include <sys/ioctl.h> +#include <unistd.h> +#include <err.h> +#include <string.h> +#include <stdbool.h> +#include <stdarg.h> +#include <stdio.h> + +#define ARRLEN(x) (sizeof(x)/sizeof(*(x))) + +static bool print_style(FILE *file, const struct tabular_cfg *cfg, + const struct tabular_row *row, const struct tabular_col *col); + +static struct tabular_row *row_gen(const struct tabular_user *user); +static char *col_str(const struct tabular_user *user_row, + const struct tabular_user *user_col); +static bool col_hidden(const struct tabular_user *user_row, + const struct tabular_user *user_col); + +static const struct allocator *ga = &stdlib_strict_heap_allocator; + +static bool hide_empty = false; +static bool skip_empty_lines = true; +static bool skip_empty_entries = false; + +static char entry_sep = '\t'; +static char line_sep = '\n'; + +static struct hmap colmap; +static struct dvec cols; +static size_t colcnt = 0; + +static struct dvec input; +static size_t input_off = 0; +static struct dvec line; +static struct dvec entries; +static size_t linecnt = 0; + +static struct tabular_cfg cfg = { + .colors = 256, + + .columns = NULL, + .column_count = 0, + + .fit_rows = false, + + .hsep = "│ ", + .vsep = "─", + .xsep = "┼─", + + .outw = 0, + .outh = 0, + + .lpad = 1, + .rpad = 1, + + .user.ptr = NULL, + .row_gen = row_gen, + .print_style = print_style, + + .skip_lines = 3, + + .allocator = &stdlib_heap_allocator +}; + +static void __attribute__((noreturn)) +die(const char *fmt, ...) +{ + va_list ap; + + fputs("tabular: ", stderr); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + if (*fmt && fmt[strlen(fmt)-1] == ':') { + fputc(' ', stderr); + perror(NULL); + } else { + fputc('\n', stderr); + } + + exit(1); +} + +static bool +print_style(FILE *file, const struct tabular_cfg *cfg, + const struct tabular_row *row, const struct tabular_col *col) +{ + if (cfg->colors == 256) { + if (!col) { /* separators */ + fprintf(file, "\x1b[90m"); + return true; + } else if (!row) { /* header */ + fprintf(file, "\x1b[1m"); + return true; + } else if (!strcmp(col->name, "Name")) { + fprintf(file, "\x1b[35m"); + return true; + } + } + + return false; +} + +static bool +read_line(void) +{ + void *tok, *sep, *end, *line_end; + ssize_t n; + + while (1) { + while (!(line_end = memchr(input.data + input_off, + line_sep, input.len - input_off))) { + dvec_rm(&input, 0, input_off); + input_off = 0; + dvec_reserve(&input, input.len + BUFSIZ + 1); + n = read(0, input.data + input.len, BUFSIZ); + if (n <= 0) { + line_end = input.data + input.len; + break; + } + input.len += (size_t) n; + } + if (!input.len) return false; + + if (line_end != input.data || skip_empty_lines) + break; + input_off += 1; + } + *(char *)line_end = '\0'; + + dvec_clear(&entries); + tok = input.data + input_off; + while (tok) { + sep = memchr(tok, entry_sep, + (size_t) (input.data + input.len - tok)); + end = (sep && sep < line_end) ? sep : line_end; + *(char *)end = '\0'; + if (tok != end || !skip_empty_entries) { + dvec_add_back(&entries, 1); + *(size_t *)dvec_back(&entries) = + (size_t) (tok - input.data) - input_off; + } + tok = (sep && sep < line_end) ? sep + 1 : NULL; + } + + dvec_clear(&line); + dvec_add_back(&line, (size_t) (line_end - input.data) - input_off + 1); + memcpy(line.data, input.data + input_off, line.len); + input_off += line.len; + + return true; +} + +static struct tabular_row * +row_gen(const struct tabular_user *user_cfg) +{ + struct tabular_row *row; + size_t i; + int rc; + + if (!read_line()) return NULL; + + row = tabular_alloc_row(&cfg, &rc, + (struct tabular_user) { .id = linecnt++ }); + if (!row) errx(1, "tabular_append_row %i", rc); + + for (i = 0; i < cols.len; i++) { + tabular_load_row_entry_hidden(&cfg, row, i); + tabular_load_row_entry_str(&cfg, row, i); + } + + return row; +} + +static char * +col_str(const struct tabular_user *user_row, const struct tabular_user *user_col) +{ + char *str; + size_t *off; + + if (user_col->id >= dvec_len(&entries)) + return NULL; + + off = dvec_at(&entries, user_col->id); + str = strdup(line.data + *off); + if (!str) die("strdup:"); + + return str; +} + +static bool +col_hidden(const struct tabular_user *user, const struct tabular_user *user_col) +{ + size_t *off; + + if (user_col->id >= dvec_len(&entries)) + return true; + + off = dvec_at(&entries, user_col->id); + return !strcmp(line.data + *off, ""); +} + +static bool +bool_arg(const char *arg, const char *name) +{ + if (!strcmp(arg, "1") || !strcmp(arg, "true")) + return true; + else if (!strcmp(arg, "0") || !strcmp(arg, "false")) + return false; + else + die("bad %s", name); +} + +static void +parse(int argc, const char **argv) +{ + struct tabular_col *col; + const char **arg, **dst; + struct hmap_link *link; + struct hmap_iter iter; + struct winsize ws; + char namebuf[64]; + char *end, *c, *upper; + int rc, n; + + hmap_init(&colmap, 16, hmap_str_hash, hmap_str_keycmp, ga); + dvec_init(&cols, sizeof(struct tabular_col), 0, ga); + + /* get general flags */ + for (dst = arg = argv + 1; *arg; arg++) { + if (!strcmp(*arg, "--hsep")) { + if (!*++arg) die("missing args"); + cfg.hsep = *arg; + } else if (!strcmp(*arg, "--vsep")) { + if (!*++arg) die("missing args"); + cfg.vsep = *arg; + } else if (!strcmp(*arg, "--xsep")) { + if (!*++arg) die("missing args"); + cfg.xsep = *arg; + } else if (!strcmp(*arg, "--fit-rows")) { + if (!*++arg) die("missing args"); + cfg.fit_rows = bool_arg(*arg, "--fit-rows"); + } else if (!strcmp(*arg, "--outw")) { + if (!*++arg) die("missing args"); + cfg.outw = strtoul(*arg, &end, 10); + if (end && *end) die("bad %s", arg[-1]); + } else if (!strcmp(*arg, "--outh")) { + if (!*++arg) die("missing args"); + cfg.outh = strtoul(*arg, &end, 10); + if (end && *end) die("bad %s", arg[-1]); + } else if (!strcmp(*arg, "--skip-lines")) { + if (!*++arg) die("missing args"); + cfg.skip_lines = strtoul(*arg, &end, 10); + if (end && *end) die("bad %s", arg[-1]); + } else if (!strcmp(*arg, "--hide-empty")) { + if (!*++arg) die("missing args"); + hide_empty = bool_arg(*arg, arg[-1]); + } else if (!strcmp(*arg, "--line-sep")) { + if (!*++arg || !**arg || *(*arg+1)) + die("missing args"); + line_sep = **arg; + } else if (!strcmp(*arg, "--entry-sep")) { + if (!*++arg || !**arg || *(*arg+1)) + die("missing args"); + entry_sep = **arg; + } else if (!strcmp(*arg, "--skip-empty-entries")) { + if (!*++arg) die("missing args"); + skip_empty_entries = bool_arg(*arg, arg[-1]); + } else if (!strcmp(*arg, "--skip-empty-lines")) { + if (!*++arg) die("missing args"); + skip_empty_lines = bool_arg(*arg, arg[-1]); + } else { + *dst++ = *arg; + } + } + *dst = NULL; + + if (!cfg.outw || !cfg.outh) { + rc = ioctl(1, TIOCGWINSZ, &ws); + if (!rc) { + cfg.outw = ws.ws_col; + cfg.outh = ws.ws_row; + } else { + cfg.outw = 80; + cfg.outh = 26; + } + } + + /* get columns */ + for (dst = arg = argv + 1; *arg; arg++) { + if (!strcmp(*arg, "--col")) { + if (!*++arg) die("missing args"); + col = ga->alloc(ga, sizeof(struct tabular_col), NULL); + + if (strlen(*arg) > 63) die("col name too long"); + strncpy(namebuf, *arg, 64); + for (c = namebuf; *c; c++) + *c = (*c >= 'a' && *c <= 'z') ? *c - 32 : *c; + upper = strdup(namebuf); + if (!upper) die("strdup:"); + + col->name = *arg; + col->align = TABULAR_ALIGN_LEFT; + col->essential = false; + col->is_hidden = col_hidden; + col->to_str = col_str; + col->lpad = 0; + col->rpad = 0; + col->minwidth = strlen(col->name); + col->maxwidth = cfg.outw; + col->strategy = TABULAR_SQUASH_WORDAWARE; + col->user.id = colcnt++; + rc = hmap_add(&colmap, + (struct hmap_key) { .p = upper }, + (struct hmap_val) { .p = col }); + if (rc) die("duplicate col %s", *col->name); + } else { + *dst++ = *arg; + } + } + *dst = NULL; + + /* set column attrs */ + for (dst = arg = argv + 1; *arg; arg++) { + if (!strncmp(*arg, "--", 2)) { + rc = sscanf(*arg, "--%63[^-]-%n", namebuf, &n); + if (rc != 1 || *(*arg+n-1) != '-') goto skip; + link = hmap_get(&colmap, + (struct hmap_key) { .p = namebuf }); + if (!link) goto skip; + col = link->value._p; + + if (!strcmp(*arg + n, "lpad")) { + if (!*++arg) die("missing args"); + col->lpad = strtoul(*arg, &end, 10); + if (end && *end) die("bad %s", arg[-1]); + } else if (!strcmp(*arg + n, "rpad")) { + if (!*++arg) die("missing args"); + col->rpad = strtoul(*arg, &end, 10); + if (end && *end) die("bad %s", arg[-1]); + } else if (!strcmp(*arg + n, "align")) { + if (!*++arg) die("missing args"); + if (!strcmp(*arg, "left")) { + col->align = TABULAR_ALIGN_LEFT; + } else if (!strcmp(*arg, "right")) { + col->align = TABULAR_ALIGN_RIGHT; + } else if (!strcmp(*arg, "center")) { + col->align = TABULAR_ALIGN_CENTER; + } else { + die("bad %s", arg[-1]); + } + } else if (!strcmp(*arg + n, "essential")) { + if (!*++arg) die("missing args"); + col->essential = bool_arg(*arg, arg[-1]); + } else if (!strcmp(*arg + n, "minwidth")) { + if (!*++arg) die("missing args"); + col->minwidth = strtoul(*arg, &end, 10); + if (col->minwidth > col->maxwidth) + col->maxwidth = col->minwidth; + if (end && *end) die("bad %s", arg[-1]); + } else if (!strcmp(*arg + n, "maxwidth")) { + if (!*++arg) die("missing args"); + col->maxwidth = strtoul(*arg, &end, 10); + if (end && *end) die("bad %s", arg[-1]); + } else if (!strcmp(*arg + n, "strategy")) { + if (!*++arg) die("missing args"); + if (!strcmp(*arg, "word-aware")) { + col->strategy = TABULAR_SQUASH_WORDAWARE; + } else if (!strcmp(*arg, "squash")) { + col->strategy = TABULAR_SQUASH; + } else if (!strcmp(*arg, "trunc")) { + col->strategy = TABULAR_TRUNC; + } else { + die("bad %s", arg[-1]); + } + } else { + goto skip; + } + } else { +skip: + *dst++ = *arg; + } + } + *dst = NULL; + + if (argv[1]) die("unused argument '%s'", argv[1]); + + dvec_add_back(&cols, colcnt); + for (HMAP_ITER(&colmap, iter)) { + col = iter.link->value._p; + memcpy(dvec_at(&cols, col->user.id), col, + sizeof(struct tabular_col)); + } + cfg.columns = cols.data; + cfg.column_count = cols.len; +} + +int +main(int argc, const char **argv) +{ + struct tabular_row *rows; + struct tabular_stats stats; + int rc; + + parse(argc, argv); + + dvec_init(&line, 1, 1024, ga); + dvec_init(&input, 1, 1024, ga); + dvec_init(&entries, sizeof(size_t), 1, ga); + + rows = NULL; + rc = tabular_format(stdout, &cfg, &stats, &rows); + if (rc) errx(1, "tabular_format (%i)", rc); + + printf("\n%lu lines, %lu rows\n", + stats.lines_used, stats.rows_displayed); + + tabular_free_rows(&cfg, rows); +}