bingram

2D binary n-gram visualization inspired by ..cantor.dust..
git clone https://git.sinitax.com/sinitax/bingram
Log | Files | Refs | sfeed.txt

bingram.c (11554B)


      1#include "raylib.h"
      2
      3#include <err.h>
      4#include <math.h>
      5#include <string.h>
      6#include <stdint.h>
      7#include <stdio.h>
      8#include <stdlib.h>
      9
     10#define CHUNK_SIZE 4096
     11#define ZOOM_MAX 64
     12
     13#define MAX(a, b) ((a) > (b) ? (a) : (b))
     14#define MIN(a, b) ((a) > (b) ? (b) : (a))
     15
     16const char *file_path;
     17uint8_t *file_data;
     18size_t data_len;
     19ssize_t data_width;
     20ssize_t data_height;
     21
     22ssize_t data_window_start;
     23ssize_t data_window_len;
     24
     25Color *value_colors;
     26
     27Color *bigram_colors;
     28size_t *bigram_counts;
     29
     30Color *trigram_colors;
     31size_t *trigram_counts;
     32
     33int entropy_ctx;
     34
     35int zoom;
     36double zoom_x, zoom_y;
     37
     38int bar_width;
     39int bar_zoom;
     40ssize_t bar_start;
     41bool show_bar;
     42
     43int window_width;
     44int window_height;
     45char *window_title;
     46bool show_pos;
     47bool show_trigram;
     48
     49int mouse_x, mouse_y;
     50
     51int  drag_mouse_x, drag_mouse_y;
     52double drag_zoom_x, drag_zoom_y;
     53bool drag;
     54
     55uint64_t hold_times[337] = { 0 };
     56
     57char fmtbuf[256];
     58
     59void
     60usage(void)
     61{
     62	printf("Usage: bingram FILE\n");
     63	exit(0);
     64}
     65
     66uint8_t *
     67read_file(const char *path, size_t *len)
     68{
     69	FILE *file;
     70	char *chunk;
     71	uint8_t *data;
     72	ssize_t nread;
     73	size_t cap;
     74
     75	chunk = malloc(CHUNK_SIZE);
     76	if (!chunk) err(1, "malloc");
     77
     78	file = fopen(file_path, "r");
     79	if (!file) err(1, "fopen");
     80
     81	cap = 16 * 1024;
     82	data = malloc(cap);
     83	if (!data) err(1, "malloc");
     84
     85	*len = 0;
     86	while ((nread = fread(chunk, 1, CHUNK_SIZE, file)) > 0) {
     87		if (*len + nread > cap) {
     88			cap *= 2;
     89			data = realloc(data, cap);
     90			if (!data) err(1, "realloc");
     91		}
     92		memcpy(data + *len, chunk, nread);
     93		*len += nread;
     94	}
     95
     96	fclose(file);
     97
     98	return data;
     99}
    100
    101void
    102init_value_colors(Color *data_colors)
    103{
    104	size_t pos;
    105	Color c;
    106
    107	c.a = 255;
    108	for (pos = 0; pos < data_len; pos++) {
    109		c.r = file_data[pos];
    110		c.g = file_data[pos];
    111		c.b = file_data[pos];
    112		data_colors[pos] = c;
    113	}
    114}
    115
    116void
    117update_bigram_colors(void)
    118{
    119	ssize_t i, k, x, y;
    120	ssize_t max;
    121	double freq;
    122	Color c;
    123
    124	max = -1;
    125	memset(bigram_counts, 0, sizeof(size_t) * 256 * 256);
    126	for (i = data_window_start; i < data_window_start + data_window_len - 1; i++) {
    127		x = file_data[i];
    128		y = file_data[i+1];
    129		bigram_counts[y * 256 + x] += 1;
    130		if (bigram_counts[y * 256 + x] > max || max == -1)
    131			max = bigram_counts[y * 256 + x];
    132	}
    133
    134	c.a = 255;
    135	for (y = 0; y < 256; y++) {
    136		for (x = 0; x < 256; x++) {
    137			freq = MIN(bigram_counts[y * 256 + x] * 255.F / 4, 255);
    138			c.r = freq;
    139			c.g = freq;
    140			c.b = freq;
    141			bigram_colors[y * 256 + x] = c;
    142		}
    143	}
    144}
    145
    146void
    147update_trigram_colors(void)
    148{
    149	memset(trigram_counts, 0, sizeof(size_t) * 256 * 256 * 256);
    150	memset(trigram_colors, 0, sizeof(Color) * 256 * 256 * 256);
    151}
    152
    153void
    154center_zoom(void)
    155{
    156	if (show_bar) {
    157		zoom_x = -1.F * bar_width / zoom
    158			- (1.F * (window_width - bar_width)
    159				/ zoom - data_width) / 2.F;
    160	} else {
    161		zoom_x = -(1.F * window_width / zoom - data_width) / 2.F;
    162	}
    163	zoom_y = -(1.F * window_height / zoom - data_height) / 2.F;
    164}
    165
    166bool
    167key_press_hold(int key)
    168{
    169	if (!IsKeyDown(key)) {
    170		hold_times[key] = 0;
    171		return false;
    172	}
    173
    174	if (IsKeyPressed(key))
    175		return true;
    176	hold_times[key] += 1;
    177
    178	return hold_times[key] > 10;
    179}
    180
    181void
    182vis(void)
    183{
    184	Image window_image;
    185	int window_init_frames;
    186	size_t pos, len;
    187	double mouse_move;
    188	ssize_t data_x, data_y;
    189	size_t data_pos;
    190	bool data_update;
    191	size_t x, y;
    192
    193	show_trigram = true;
    194
    195	window_title = malloc(1024);
    196	if (!window_title) err(1, "malloc");
    197	snprintf(window_title, 1024, "bingram %s", file_path);
    198
    199	SetConfigFlags(FLAG_WINDOW_RESIZABLE);
    200	SetTargetFPS(60);
    201
    202	SetTraceLogLevel(LOG_NONE);
    203
    204	InitWindow(800, 600, window_title);
    205
    206	window_image = GenImageColor(800, 600, (Color) { 0 });
    207
    208	zoom = 2;
    209	bar_zoom = 4;
    210	bar_start = 0;
    211	bar_width = MAX(80, window_width / 7);
    212	show_bar = true;
    213
    214	data_width = 256;
    215	data_height = 256;
    216	data_window_start = 0;
    217	data_window_len = data_len;
    218
    219	show_pos = false;
    220
    221	show_trigram = false;
    222	data_update = true;
    223	window_init_frames = 2;
    224	while (!WindowShouldClose()) {
    225		if (IsKeyPressed(KEY_Q))
    226			break;
    227
    228		if (window_init_frames || IsWindowResized() || IsKeyPressed(KEY_G)) {
    229			window_width = GetScreenWidth();
    230			window_height = GetScreenHeight();
    231			bar_width = MAX(80, window_width / 7);
    232			if (window_init_frames > 0) {
    233				data_window_len = MIN(data_len,
    234					MAX(window_width / 2 / zoom
    235						* window_height / 2 / zoom, 200));
    236				data_update = true;
    237			}
    238			if (IsKeyPressed(KEY_G))
    239				zoom = 2;
    240			center_zoom();
    241			ImageResize(&window_image, window_width, window_height);
    242			if (window_init_frames)
    243				window_init_frames -= 1;
    244		}
    245
    246		mouse_x = MIN(MAX(0, GetMouseX()), window_width);
    247		mouse_y = MIN(MAX(0, GetMouseY()), window_height);
    248
    249		if (IsMouseButtonPressed(MOUSE_LEFT_BUTTON)) {
    250			drag_mouse_x = mouse_x;
    251			drag_mouse_y = mouse_y;
    252			drag_zoom_x = zoom_x;
    253			drag_zoom_y = zoom_y;
    254			drag = true;
    255		}else if (!IsMouseButtonDown(MOUSE_BUTTON_LEFT)) {
    256			drag = false;
    257		}
    258
    259		if (drag && show_bar && drag_mouse_x < bar_width) {
    260			if (mouse_y > window_height - 20) {
    261				bar_start += bar_zoom * bar_width * 20;
    262			} else if (mouse_y < 20) {
    263				bar_start -= bar_zoom * bar_width * 20;
    264			}
    265			bar_start = MIN((ssize_t) data_len
    266				- bar_width * window_height * bar_zoom, bar_start);
    267			bar_start = MAX(0, bar_start);
    268
    269			data_window_start = bar_start + (mouse_y * bar_width + mouse_x) * bar_zoom;
    270			data_window_start = MAX(0, MIN(
    271				(ssize_t) data_len - data_window_len,
    272				data_window_start - data_window_len / 2));
    273		} else if (drag) {
    274			zoom_x = drag_zoom_x - (mouse_x - drag_mouse_x) * 1.F / zoom;
    275			zoom_y = drag_zoom_y - (mouse_y - drag_mouse_y) * 1.F / zoom;
    276		}
    277
    278		mouse_move = GetMouseWheelMove();
    279		if (show_bar && mouse_x < bar_width) {
    280			if (mouse_move > 0 && bar_zoom < ZOOM_MAX)
    281				bar_zoom *= 2;
    282			else if (mouse_move < 0 && bar_zoom > 1)
    283				bar_zoom /= 2;
    284		} else {
    285			if (mouse_move > 0 && zoom < ZOOM_MAX) {
    286				zoom_x += (1.F * window_width / zoom / 2)
    287					* (1.F * mouse_x / window_width);
    288				zoom_y += (1.F * window_height / zoom / 2)
    289					* (1.F * mouse_y / window_height);
    290				zoom *= 2;
    291			} else if (mouse_move < 0 && zoom > 1) {
    292				zoom_x -= (1.F * window_width / zoom)
    293					* (1.F * mouse_x / window_width);
    294				zoom_y -= (1.F * window_height / zoom)
    295					* (1.F * mouse_y / window_height);
    296				zoom /= 2;
    297			}
    298		}
    299
    300		show_pos ^= IsKeyPressed(KEY_T);
    301		show_bar ^= IsKeyPressed(KEY_B);
    302
    303		if (IsKeyDown(KEY_LEFT_CONTROL)) {
    304			if (key_press_hold(KEY_LEFT) || key_press_hold(KEY_A)) {
    305				data_window_start -= 4;
    306				data_update = true;
    307			} else if (key_press_hold(KEY_RIGHT) || key_press_hold(KEY_D)) {
    308				data_window_start += 4;
    309				data_update = true;
    310			}
    311			data_window_start = MAX(0, data_window_start);
    312		} else if (IsKeyDown(KEY_LEFT_ALT)) {
    313			len = MIN(data_window_len, bar_zoom * bar_width * 4);
    314			if (len % bar_width * bar_zoom)
    315				len += bar_width * bar_zoom - (len % bar_width * bar_zoom);
    316			if (key_press_hold(KEY_LEFT) || key_press_hold(KEY_A)) {
    317				data_window_start -= len;
    318				data_update = true;
    319			} else if (key_press_hold(KEY_RIGHT) || key_press_hold(KEY_D)) {
    320				data_window_start += len;
    321				data_update = true;
    322			}
    323			data_window_start = MIN(data_len - data_window_len,
    324				MAX(0, data_window_start));
    325			if (key_press_hold(KEY_UP) || key_press_hold(KEY_W)) {
    326				data_window_len -= len;
    327				data_update = true;
    328			} else if (key_press_hold(KEY_DOWN) || key_press_hold(KEY_S)) {
    329				data_window_len += len;
    330				data_update = true;
    331			}
    332			data_window_len = MIN(data_len - data_window_start,
    333				MAX(1, data_window_len));
    334		} else {
    335			if (key_press_hold(KEY_LEFT) || key_press_hold(KEY_A)) {
    336				zoom_x -= 20.F / zoom;
    337			} else if (key_press_hold(KEY_RIGHT) || key_press_hold(KEY_D)) {
    338				zoom_x += 20.F / zoom;
    339			}
    340
    341			if (key_press_hold(KEY_UP) || key_press_hold(KEY_W)) {
    342				zoom_y -= 20.F / zoom;
    343			} else if (key_press_hold(KEY_DOWN) || key_press_hold(KEY_S)) {
    344				zoom_y += 20.F / zoom;
    345			}
    346		}
    347
    348		if (IsKeyPressed(KEY_P))
    349			show_trigram = !show_trigram;
    350
    351		if (data_update) {
    352			if (show_trigram)
    353				update_trigram_colors();
    354			else
    355				update_bigram_colors();
    356			data_update = false;
    357		}
    358
    359		ImageClearBackground(&window_image, BLACK);
    360
    361		for (y = 0; y < window_height; y++) {
    362			data_y = (ssize_t) (zoom_y) + (y / zoom);
    363			if (data_y < 0 || data_y >= data_height)
    364				continue;
    365
    366			for (x = 0; x < window_width; x++) {
    367				data_x = (ssize_t) (zoom_x) + (x / zoom);
    368				if (data_x < 0 || data_x >= data_width)
    369					continue;
    370
    371				data_pos = data_y * data_width + data_x;
    372				ImageDrawPixel(&window_image,
    373					x, y, bigram_colors[data_pos]);
    374			}
    375		}
    376
    377		if (show_bar) {
    378			ImageDrawRectangle(&window_image,
    379				0, 0, bar_width, window_height, BLACK);
    380			ImageDrawRectangle(&window_image, bar_width, 0,
    381				3, window_height, BLACK);
    382			ImageDrawLine(&window_image, bar_width + 1, 0,
    383				bar_width + 1, window_height, GRAY);
    384
    385			for (y = 0; y < window_height; y++) {
    386				for (x = 0; x < bar_width; x++) {
    387					data_pos = bar_start + (y * bar_width + x) * bar_zoom;
    388					if (data_pos < data_len) {
    389						ImageDrawPixel(&window_image,
    390							x, y, value_colors[data_pos]);
    391					}
    392				}
    393			}
    394
    395			pos = (data_window_start - bar_start) / bar_width / bar_zoom;
    396			ImageDrawLine(&window_image, 0, pos,
    397				bar_width, pos, WHITE);
    398
    399			pos = MIN(data_len, data_window_start + data_window_len);
    400			pos = (pos - bar_start) / bar_width / bar_zoom;
    401			ImageDrawLine(&window_image, 0, pos,
    402				bar_width, pos, WHITE);
    403		}
    404
    405		if (show_pos) {
    406			snprintf(fmtbuf, sizeof(fmtbuf),
    407				"%08lx > %08lx", data_window_start,
    408				data_window_start + data_window_len);
    409			len = MeasureText(fmtbuf, 20) + 20;
    410			ImageDrawRectangle(&window_image,
    411				window_width - len - 9, 0,
    412				len + 9, 19, WHITE);
    413			ImageDrawText(&window_image,
    414				fmtbuf, window_width - len - 6,
    415				0, 20, BLACK);
    416
    417			data_x = (ssize_t) zoom_x + (mouse_x / zoom);
    418			data_y = (ssize_t) zoom_y + (mouse_y / zoom);
    419			if (data_x >= 0 && data_x < data_width
    420					&& data_y >= 0 && data_y < data_height) {
    421				snprintf(fmtbuf, sizeof(fmtbuf),
    422					"%02lx > %02lx", data_x, data_y);
    423				len = MeasureText(fmtbuf, 20);
    424				ImageDrawRectangle(&window_image,
    425					window_width - len - 4, 19,
    426					len + 4, 19, WHITE);
    427				ImageDrawText(&window_image,
    428					fmtbuf, window_width - len - 1,
    429					20, 19, BLACK);
    430			}
    431		}
    432
    433		Texture2D tex = LoadTextureFromImage(window_image);
    434
    435		BeginDrawing();
    436
    437		ClearBackground(BLACK);
    438
    439		DrawTexture(tex, 0, 0, WHITE);
    440
    441		EndDrawing();
    442
    443		UnloadTexture(tex);
    444	}
    445
    446	UnloadImage(window_image);
    447
    448	CloseWindow();
    449
    450	free(window_title);
    451}
    452
    453int
    454main(int argc, const char **argv)
    455{
    456	const char **arg;
    457
    458	file_path = NULL;
    459	entropy_ctx = 1;
    460	for (arg = argv + 1; *arg; arg++) {
    461		if (!strcmp("-c", *arg)) {
    462			arg += 1;
    463			if (!arg) usage();
    464			entropy_ctx = atoi(*arg);
    465			if (entropy_ctx <= 0) usage();
    466		} else if (!file_path) {
    467			file_path = *arg;
    468		} else {
    469			usage();
    470		}
    471	}
    472
    473	if (!file_path)
    474		usage();
    475
    476	if (!strcmp(file_path, "-"))
    477		file_path = "/dev/stdin";
    478
    479	file_data = read_file(file_path, &data_len);
    480
    481	value_colors = malloc(sizeof(Color) * data_len);
    482	if (!value_colors) err(1, "malloc");
    483	init_value_colors(value_colors);
    484
    485	bigram_colors = malloc(sizeof(Color) * 256 * 256);
    486	if (!bigram_colors) err(1, "malloc");
    487	bigram_counts = malloc(sizeof(size_t) * 256 * 256);
    488	if (!bigram_counts) err(1, "malloc");
    489
    490	trigram_colors = malloc(sizeof(Color) * 256 * 256 * 256);
    491	if (!trigram_colors) err(1, "malloc");
    492	trigram_counts = malloc(sizeof(size_t) * 256 * 256 * 256);
    493	if (!trigram_counts) err(1, "malloc");
    494
    495	vis();
    496
    497	free(trigram_counts);
    498	free(trigram_colors);
    499
    500	free(bigram_counts);
    501	free(bigram_colors);
    502
    503	free(value_colors);
    504
    505	free(file_data);
    506}