bgrep

Grep for binary files
git clone https://git.sinitax.com/sinitax/bgrep
Log | Files | Refs | LICENSE | sfeed.txt

bgrep.c (3981B)


      1#define PCRE2_CODE_UNIT_WIDTH 8
      2
      3#include <pcre2.h>
      4
      5#include <unistd.h>
      6
      7#include <stdio.h>
      8#include <stdarg.h>
      9#include <string.h>
     10#include <stdbool.h>
     11#include <stdlib.h>
     12
     13static const char *marker = "";
     14static const char *pattern = NULL;
     15static bool overlap = false;
     16static size_t group = 0;
     17
     18static void
     19die(const char *fmt, ...)
     20{
     21	va_list ap;
     22
     23	va_start(ap, fmt);
     24	fputs("bgrep: ", stderr);
     25	vfprintf(stderr, fmt, ap);
     26	if (fmt[strlen(fmt)-1] == ':') {
     27		perror(NULL);
     28	} else {
     29		putc('\n', stderr);
     30	}
     31	va_end(ap);
     32
     33	exit(1);
     34}
     35
     36static void
     37pcre2_die(int rc, const char *fmt, ...)
     38{
     39	char error_buf[256];
     40	va_list ap;
     41
     42	pcre2_get_error_message(rc, (uint8_t *) error_buf, sizeof(error_buf));
     43
     44	va_start(ap, fmt);
     45	fputs("bgrep: ", stderr);
     46	vfprintf(stderr, fmt, ap);
     47	fprintf(stderr, "%s\n", error_buf);
     48	va_end(ap);
     49
     50	exit(1);
     51}
     52
     53static void
     54writeall(uint8_t *data, size_t size)
     55{
     56	size_t sent;
     57	ssize_t n;
     58
     59	sent = 0;
     60	while (sent != size) {
     61		n = write(1, data + sent, size - sent);
     62		if (n < 0) die("write:");
     63		if (!n) break;
     64		sent += (size_t) n;
     65	}
     66}
     67
     68static void
     69writef(const char *fmt, ...)
     70{
     71	static void *buf = NULL;
     72	static size_t bufsize = 0;
     73	va_list ap;
     74	va_list cpy;
     75	size_t size;
     76
     77	va_start(ap, fmt);
     78	va_copy(cpy, ap);
     79	size = vsnprintf(NULL, 0, fmt, ap);
     80	if (!bufsize || size + 1 > bufsize) {
     81		bufsize = size + 1;
     82		buf = realloc(buf, bufsize);
     83		if (!buf) die("realloc:");
     84	}
     85	va_end(ap);
     86
     87	va_start(cpy, fmt);
     88	vsnprintf(buf, bufsize, fmt, cpy);
     89	va_end(cpy);
     90
     91	writeall(buf, size);
     92}
     93
     94static void
     95parse(int argc, const char **argv)
     96{
     97	const char **arg;
     98
     99	if (argc < 2) goto usage;
    100
    101	for (arg = argv + 1; *arg; arg++) {
    102		if (!strcmp(*arg, "-h") || !strcmp(*arg, "--help")) {
    103			goto usage;
    104		} else if (!strcmp(*arg, "-g") || !strcmp(*arg, "--group")) {
    105			group = strtoul(*++arg, NULL, 0);
    106		} else if (!strcmp(*arg, "-m") || !strcmp(*arg, "--marker")) {
    107			marker = *++arg;
    108		} else if (!strcmp(*arg, "-o") || !strcmp(*arg, "--overlap")) {
    109			overlap = true;
    110		} else {
    111			if (pattern) goto usage;
    112			pattern = *arg;
    113		}
    114	}
    115	return;
    116
    117usage:
    118	fprintf(stderr, "Usage: bgrep [-g GROUP] [-m MARKER] REGEX\n");
    119	exit(1);
    120}
    121
    122int
    123main(int argc, const char **argv)
    124{
    125	pcre2_match_data *match_data;
    126	pcre2_code *regex = NULL;
    127	uint8_t *match;
    128	uint8_t *file;
    129	size_t *ovector;
    130	size_t filesize;
    131	size_t filecap;
    132	size_t match_size;
    133	size_t error_offset = 0;
    134	size_t offset;
    135	size_t length;
    136	size_t pos;
    137	size_t lastmatch;
    138	ssize_t n;
    139	int rc;
    140
    141	parse(argc, argv);
    142
    143	regex = pcre2_compile((PCRE2_SPTR8)pattern, strlen(pattern),
    144		PCRE2_MATCH_INVALID_UTF, &rc, &error_offset, NULL);
    145	if (!regex) pcre2_die(rc, "bad regex (..'%s')", argv[argc-1] + error_offset);
    146
    147	match_data = pcre2_match_data_create_from_pattern(regex, NULL);
    148	if (!match_data) die("pcre2_match_data_create_from_pattern");
    149
    150	filecap = BUFSIZ;
    151	filesize = 0;
    152	file = malloc(filecap);
    153	if (!file) die("malloc:");
    154
    155	pos = 0;
    156	lastmatch = 0;
    157	while (true) {
    158		if (BUFSIZ > filecap - filesize) {
    159			filecap *= 2;
    160			file = realloc(file, filecap);
    161			if (!file) die("realloc:");
    162		}
    163
    164		n = read(0, file + filesize, BUFSIZ);
    165		if (n < 0) die("read:");
    166		if (!n) break;
    167
    168		length = (size_t) n;
    169		offset = 0;
    170		while (offset != length) {
    171			rc = pcre2_match(regex, file, filesize + length, lastmatch,
    172					PCRE2_NOTEMPTY, match_data, NULL);
    173			if (rc < 0 && rc != PCRE2_ERROR_NOMATCH)
    174				pcre2_die(rc, "pcre2_match: ");
    175
    176			if (rc != PCRE2_ERROR_NOMATCH) {
    177				rc = pcre2_substring_get_bynumber(match_data,
    178					group, &match, &match_size);
    179				if (rc) pcre2_die(rc, "pcre2_substring_get_byname: ");
    180
    181				ovector = pcre2_get_ovector_pointer(match_data);
    182				if (overlap) {
    183					lastmatch = ovector[0] + 1;
    184				} else {
    185					lastmatch = ovector[1];
    186				}
    187
    188				writef("%s:0x%zx:", marker, filesize + ovector[0]); 
    189				writeall(match, match_size);
    190				writef("\n");
    191			} else {
    192				break;
    193			}
    194		}
    195
    196		filesize += length;
    197	}
    198
    199	pcre2_match_data_free(match_data);
    200	pcre2_code_free(regex);
    201
    202	return 0;
    203}