bingram

2D binary n-gram visualization inspired by ..cantor.dust..
git clone https://git.sinitax.com/sinitax/bingram
Log | Files | Refs | sfeed.txt

commit ceb6b94c05f3d1fb07c3cc79f47dc4619a65c5ce
Author: Louis Burda <quent.burda@gmail.com>
Date:   Sun, 12 Feb 2023 20:39:34 +0100

Initial prototype with raylib

Diffstat:
A.gitignore | 1+
AMakefile | 20++++++++++++++++++++
Abingram.c | 484+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 505 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1 @@ +bingram diff --git a/Makefile b/Makefile @@ -0,0 +1,20 @@ +PREFIX ?= /usr/local +BINDIR ?= /bin + +CFLAGS = -g +LDLIBS = -lraylib -lm + +all: bingram + +clean: + rm -f bingram + +bingram: bingram.c + +install: + install -m755 -t bingram "$(DESTDIR)/$(PREFIX)/$(BINDIR)" + +uninstall: + rm "$(DESTDIR)/$(PREFIX)/$(BINDIR)/bingram" + +.PHONY: all clean install uninstall diff --git a/bingram.c b/bingram.c @@ -0,0 +1,484 @@ +#include "raylib.h" + +#include <err.h> +#include <math.h> +#include <string.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#define CHUNK_SIZE 4096 +#define ZOOM_MAX 64 + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) > (b) ? (b) : (a)) + +const char *file_path; +uint8_t *file_data; +size_t data_len; +ssize_t data_width; +ssize_t data_height; + +ssize_t data_window_start; +ssize_t data_window_len; + +Color *value_colors; + +Color *bigram_colors; +size_t *bigram_counts; + +Color *trigram_colors; +size_t *trigram_counts; + +int entropy_ctx; + +int zoom; +double zoom_x, zoom_y; + +int bar_width; +int bar_zoom; +ssize_t bar_start; +bool show_bar; + +int window_width; +int window_height; +char *window_title; +bool show_pos; +bool show_trigram; + +int mouse_x, mouse_y; + +int drag_mouse_x, drag_mouse_y; +double drag_zoom_x, drag_zoom_y; +bool drag; + +uint64_t hold_times[337] = { 0 }; + +char fmtbuf[256]; + +void +usage(void) +{ + printf("Usage: bingram FILE\n"); + exit(0); +} + +uint8_t * +read_file(const char *path, size_t *len) +{ + FILE *file; + char *chunk; + uint8_t *data; + ssize_t nread; + size_t cap; + + chunk = malloc(CHUNK_SIZE); + if (!chunk) err(1, "malloc"); + + file = fopen(file_path, "r"); + if (!file) err(1, "fopen"); + + cap = 16 * 1024; + data = malloc(cap); + if (!data) err(1, "malloc"); + + *len = 0; + while ((nread = fread(chunk, 1, CHUNK_SIZE, file)) > 0) { + if (*len + nread > cap) { + cap *= 2; + data = realloc(data, cap); + if (!data) err(1, "realloc"); + } + memcpy(data + *len, chunk, nread); + *len += nread; + } + + fclose(file); + + return data; +} + +void +init_value_colors(Color *data_colors) +{ + size_t pos; + Color c; + + c.a = 255; + for (pos = 0; pos < data_len; pos++) { + c.r = file_data[pos]; + c.g = file_data[pos]; + c.b = file_data[pos]; + data_colors[pos] = c; + } +} + +void +update_bigram_colors(void) +{ + ssize_t i, k, x, y; + ssize_t max; + double freq; + Color c; + + max = -1; + memset(bigram_counts, 0, sizeof(size_t) * 256 * 256); + for (i = data_window_start; i < data_window_start + data_window_len - 1; i++) { + x = file_data[i]; + y = file_data[i+1]; + bigram_counts[y * 256 + x] += 1; + if (bigram_counts[y * 256 + x] > max || max == -1) + max = bigram_counts[y * 256 + x]; + } + + c.a = 255; + for (y = 0; y < 256; y++) { + for (x = 0; x < 256; x++) { + freq = MIN(bigram_counts[y * 256 + x] * 255.F / 4, 255); + c.r = freq; + c.g = freq; + c.b = freq; + bigram_colors[y * 256 + x] = c; + } + } +} + +void +update_trigram_colors(void) +{ + memset(trigram_counts, 0, sizeof(size_t) * 256 * 256 * 256); + memset(trigram_colors, 0, sizeof(Color) * 256 * 256 * 256); +} + +bool +key_press_hold(int key) +{ + if (!IsKeyDown(key)) { + hold_times[key] = 0; + return false; + } + + if (IsKeyPressed(key)) + return true; + hold_times[key] += 1; + + return hold_times[key] > 10; +} + +void +vis(void) +{ + Image window_image; + int window_init_frames; + size_t pos, len; + double mouse_move; + ssize_t data_x, data_y; + size_t data_pos; + bool data_update; + size_t x, y; + + show_trigram = true; + + window_title = malloc(1024); + if (!window_title) err(1, "malloc"); + snprintf(window_title, 1024, "bingram %s", file_path); + + SetConfigFlags(FLAG_WINDOW_RESIZABLE); + SetTargetFPS(60); + + SetTraceLogLevel(LOG_NONE); + + InitWindow(800, 600, window_title); + + window_image = GenImageColor(800, 600, (Color) { 0 }); + + zoom = 2; + bar_zoom = 4; + bar_start = 0; + bar_width = MAX(80, window_width / 7); + show_bar = true; + + data_width = 256; + data_height = 256; + data_window_start = 0; + data_window_len = data_len; + + show_pos = false; + + show_trigram = false; + data_update = true; + window_init_frames = 2; + while (!WindowShouldClose()) { + if (IsKeyPressed(KEY_Q)) + break; + + if (window_init_frames || IsWindowResized() || IsKeyPressed(KEY_G)) { + window_width = GetScreenWidth(); + window_height = GetScreenHeight(); + bar_width = MAX(80, window_width / 7); + if (window_init_frames > 0) { + data_window_len = MIN(data_len, + MAX(window_width / 2 / zoom + * window_height / 2 / zoom, 200)); + data_update = true; + } + if (IsKeyPressed(KEY_G)) + zoom = 2; + zoom_x = (data_width - 1.F * window_width / zoom) / 2.F; + zoom_y = (data_height - 1.F * window_height / zoom) / 2.F; + ImageResize(&window_image, window_width, window_height); + if (window_init_frames) + window_init_frames -= 1; + } + + mouse_x = MIN(MAX(0, GetMouseX()), window_width); + mouse_y = MIN(MAX(0, GetMouseY()), window_height); + + if (IsMouseButtonPressed(MOUSE_LEFT_BUTTON)) { + drag_mouse_x = mouse_x; + drag_mouse_y = mouse_y; + drag_zoom_x = zoom_x; + drag_zoom_y = zoom_y; + drag = true; + }else if (!IsMouseButtonDown(MOUSE_BUTTON_LEFT)) { + drag = false; + } + + if (drag && show_bar && drag_mouse_x < bar_width) { + if (mouse_y > window_height - 20) { + bar_start += bar_zoom * bar_width * 20; + } else if (mouse_y < 20) { + bar_start -= bar_zoom * bar_width * 20; + } + bar_start = MIN((ssize_t) data_len + - bar_width * window_height * bar_zoom, bar_start); + bar_start = MAX(0, bar_start); + + data_window_start = bar_start + (mouse_y * bar_width + mouse_x) * bar_zoom; + data_window_start = MAX(0, MIN( + (ssize_t) data_len - data_window_len, + data_window_start - data_window_len / 2)); + } else if (drag) { + zoom_x = drag_zoom_x - (mouse_x - drag_mouse_x) * 1.F / zoom; + zoom_y = drag_zoom_y - (mouse_y - drag_mouse_y) * 1.F / zoom; + } + + mouse_move = GetMouseWheelMove(); + if (show_bar && mouse_x < bar_width) { + if (mouse_move > 0 && bar_zoom < ZOOM_MAX) + bar_zoom *= 2; + else if (mouse_move < 0 && bar_zoom > 1) + bar_zoom /= 2; + } else { + if (mouse_move > 0 && zoom < ZOOM_MAX) { + zoom_x += (1.F * window_width / zoom / 2) + * (1.F * mouse_x / window_width); + zoom_y += (1.F * window_height / zoom / 2) + * (1.F * mouse_y / window_height); + zoom *= 2; + } else if (mouse_move < 0 && zoom > 1) { + zoom_x -= (1.F * window_width / zoom) + * (1.F * mouse_x / window_width); + zoom_y -= (1.F * window_height / zoom) + * (1.F * mouse_y / window_height); + zoom /= 2; + } + } + + show_pos ^= IsKeyPressed(KEY_T); + show_bar ^= IsKeyPressed(KEY_B); + + if (IsKeyDown(KEY_LEFT_CONTROL)) { + if (key_press_hold(KEY_LEFT) || key_press_hold(KEY_A)) { + data_window_start -= 4; + data_update = true; + } else if (key_press_hold(KEY_RIGHT) || key_press_hold(KEY_D)) { + data_window_start += 4; + data_update = true; + } + data_window_start = MAX(0, data_window_start); + } else if (IsKeyDown(KEY_LEFT_ALT)) { + len = MIN(data_window_len, bar_zoom * bar_width * 4); + if (len % bar_width * bar_zoom) + len += bar_width * bar_zoom - (len % bar_width * bar_zoom); + if (key_press_hold(KEY_LEFT) || key_press_hold(KEY_A)) { + data_window_start -= len; + data_update = true; + } else if (key_press_hold(KEY_RIGHT) || key_press_hold(KEY_D)) { + data_window_start += len; + data_update = true; + } + data_window_start = MIN(data_len - data_window_len, + MAX(0, data_window_start)); + if (key_press_hold(KEY_UP) || key_press_hold(KEY_W)) { + data_window_len -= len; + data_update = true; + } else if (key_press_hold(KEY_DOWN) || key_press_hold(KEY_S)) { + data_window_len += len; + data_update = true; + } + data_window_len = MIN(data_len - data_window_start, + MAX(1, data_window_len)); + } else { + if (key_press_hold(KEY_LEFT) || key_press_hold(KEY_A)) { + zoom_x -= 20.F / zoom; + } else if (key_press_hold(KEY_RIGHT) || key_press_hold(KEY_D)) { + zoom_x += 20.F / zoom; + } + + if (key_press_hold(KEY_UP) || key_press_hold(KEY_W)) { + zoom_y -= 20.F / zoom; + } else if (key_press_hold(KEY_DOWN) || key_press_hold(KEY_S)) { + zoom_y += 20.F / zoom; + } + } + + if (IsKeyPressed(KEY_P)) + show_trigram = !show_trigram; + + if (data_update) { + if (show_trigram) + update_trigram_colors(); + else + update_bigram_colors(); + data_update = false; + } + + ImageClearBackground(&window_image, BLACK); + + for (y = 0; y < window_height; y++) { + data_y = (ssize_t) (zoom_y) + (y / zoom); + if (data_y < 0 || data_y >= data_height) + continue; + + for (x = 0; x < window_width; x++) { + data_x = (ssize_t) (zoom_x) + (x / zoom); + if (data_x < 0 || data_x >= data_width) + continue; + + data_pos = data_y * data_width + data_x; + ImageDrawPixel(&window_image, + x, y, bigram_colors[data_pos]); + } + } + + if (show_bar) { + ImageDrawRectangle(&window_image, + 0, 0, bar_width, window_height, BLACK); + ImageDrawRectangle(&window_image, bar_width, 0, + 3, window_height, BLACK); + ImageDrawLine(&window_image, bar_width + 1, 0, + bar_width + 1, window_height, GRAY); + + for (y = 0; y < window_height; y++) { + for (x = 0; x < bar_width; x++) { + data_pos = bar_start + (y * bar_width + x) * bar_zoom; + if (data_pos < data_len) { + ImageDrawPixel(&window_image, + x, y, value_colors[data_pos]); + } + } + } + + pos = (data_window_start - bar_start) / bar_width / bar_zoom; + ImageDrawLine(&window_image, 0, pos, + bar_width, pos, WHITE); + + pos = MIN(data_len, data_window_start + data_window_len); + pos = (pos - bar_start) / bar_width / bar_zoom; + ImageDrawLine(&window_image, 0, pos, + bar_width, pos, WHITE); + } + + if (show_pos) { + data_x = (ssize_t) zoom_x + (mouse_x / zoom); + data_y = (ssize_t) zoom_y + (mouse_y / zoom); + if (data_x >= 0 && data_x < data_width + && data_y >= 0 && data_y < data_height) { + pos = data_window_start + data_y * data_width + data_x; + snprintf(fmtbuf, sizeof(fmtbuf), + "%02lx > %02lx", data_x, data_y); + len = MeasureText(fmtbuf, 20) + 10; + ImageDrawRectangle(&window_image, + window_width - len - 9, 0, + len + 9, 19, WHITE); + ImageDrawText(&window_image, + fmtbuf, window_width - len - 6, + 0, 20, BLACK); + } + } + + Texture2D tex = LoadTextureFromImage(window_image); + + BeginDrawing(); + + ClearBackground(BLACK); + + DrawTexture(tex, 0, 0, WHITE); + + EndDrawing(); + + UnloadTexture(tex); + } + + UnloadImage(window_image); + + CloseWindow(); + + free(window_title); +} + +int +main(int argc, const char **argv) +{ + const char **arg; + + file_path = NULL; + entropy_ctx = 1; + for (arg = argv + 1; *arg; arg++) { + if (!strcmp("-c", *arg)) { + arg += 1; + if (!arg) usage(); + entropy_ctx = atoi(*arg); + if (entropy_ctx <= 0) usage(); + } else if (!file_path) { + file_path = *arg; + } else { + usage(); + } + } + + if (!file_path) + usage(); + + if (!strcmp(file_path, "-")) + file_path = "/dev/stdin"; + + file_data = read_file(file_path, &data_len); + + value_colors = malloc(sizeof(Color) * data_len); + if (!value_colors) err(1, "malloc"); + init_value_colors(value_colors); + + bigram_colors = malloc(sizeof(Color) * 256 * 256); + if (!bigram_colors) err(1, "malloc"); + bigram_counts = malloc(sizeof(size_t) * 256 * 256); + if (!bigram_counts) err(1, "malloc"); + + trigram_colors = malloc(sizeof(Color) * 256 * 256 * 256); + if (!trigram_colors) err(1, "malloc"); + trigram_counts = malloc(sizeof(size_t) * 256 * 256 * 256); + if (!trigram_counts) err(1, "malloc"); + + vis(); + + free(trigram_counts); + free(trigram_colors); + + free(bigram_counts); + free(bigram_colors); + + free(value_colors); + + free(file_data); +}