bgrep

Grep for binary files
git clone https://git.sinitax.com/sinitax/bgrep
Log | Files | Refs | LICENSE | sfeed.txt

commit be2d017c214f92a22d1f24520f841ea1180c9ab4
Author: Louis Burda <quent.burda@gmail.com>
Date:   Wed,  3 Apr 2024 01:47:43 +0200

Initial version

Diffstat:
A.gitignore | 1+
ALICENSE | 21+++++++++++++++++++++
AMakefile | 19+++++++++++++++++++
Abgrep.c | 201+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 242 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1 @@ +bgrep diff --git a/LICENSE b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Louis Burda + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile @@ -0,0 +1,19 @@ +PREFIX ?= /usr/local +BINDIR ?= /bin + +LDLIBS = -lpcre2-8 + +all: bgrep + +clean: + rm -f bgrep + +bgrep: bgrep.c + +install: + install bgrep -t "$(DESTDIR)$(PREFIX)$(BINDIR)" + +uninstall: + rm -f "$(DESTDIR)$(PREFIX)$(BINDIR)/bgrep" + +.PHONY: all clean install uninstall diff --git a/bgrep.c b/bgrep.c @@ -0,0 +1,201 @@ +#define PCRE2_CODE_UNIT_WIDTH 8 +#include <pcre2.h> + +#include <unistd.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <stdbool.h> +#include <stdlib.h> + +static const char *marker = "\n"; +static const char *pattern = NULL; +static bool overlap = false; +static size_t group = 0; + +static void +die(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fputs("bgrep: ", stderr); + vfprintf(stderr, fmt, ap); + if (fmt[strlen(fmt)-1] == ':') { + perror(NULL); + } else { + putc('\n', stderr); + } + va_end(ap); + + exit(1); +} + +static void +pcre2_die(int rc, const char *fmt, ...) +{ + char error_buf[256]; + va_list ap; + + pcre2_get_error_message(rc, (uint8_t *) error_buf, sizeof(error_buf)); + + va_start(ap, fmt); + fputs("bgrep: ", stderr); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "%s\n", error_buf); + va_end(ap); + + exit(1); +} + +static void +writeall(uint8_t *data, size_t size) +{ + size_t sent; + ssize_t n; + + sent = 0; + while (sent != size) { + n = write(1, data + sent, size - sent); + if (n < 0) die("write:"); + if (!n) break; + sent += (size_t) n; + } +} + +static void +writef(const char *fmt, ...) +{ + static void *buf = NULL; + static size_t bufsize = 0; + va_list ap; + va_list cpy; + size_t size; + + va_start(ap, fmt); + va_copy(cpy, ap); + size = vsnprintf(NULL, 0, fmt, ap); + if (!bufsize || size + 1 > bufsize) { + bufsize = size + 1; + buf = realloc(buf, bufsize); + if (!buf) die("realloc:"); + } + va_end(ap); + + va_start(cpy, fmt); + vsnprintf(buf, bufsize, fmt, cpy); + va_end(cpy); + + writeall(buf, size); +} + +void +parse(int argc, const char **argv) +{ + const char **arg; + + if (argc < 2) goto usage; + + for (arg = argv + 1; *arg; arg++) { + if (!strcmp(*arg, "-h") || !strcmp(*arg, "--help")) { + goto usage; + } else if (!strcmp(*arg, "-g") || !strcmp(*arg, "--group")) { + group = strtoul(*++arg, NULL, 0); + } else if (!strcmp(*arg, "-m") || !strcmp(*arg, "--marker")) { + marker = *++arg; + } else if (!strcmp(*arg, "-o") || !strcmp(*arg, "--overlap")) { + overlap = true; + } else { + if (pattern) goto usage; + pattern = *arg; + } + } + return; + +usage: + fprintf(stderr, "Usage: bgrep [-g GROUP] [-m MARKER] REGEX\n"); + exit(1); +} + +int +main(int argc, const char **argv) +{ + pcre2_match_data *match_data; + pcre2_code *regex = NULL; + uint8_t *match; + uint8_t *file; + size_t *ovector; + size_t filesize; + size_t filecap; + size_t match_size; + size_t error_offset = 0; + size_t offset; + size_t length; + size_t pos; + size_t lastmatch; + ssize_t n; + int rc; + + parse(argc, argv); + + regex = pcre2_compile((PCRE2_SPTR8)pattern, strlen(pattern), + PCRE2_MATCH_INVALID_UTF, &rc, &error_offset, NULL); + if (!regex) pcre2_die(rc, "bad regex (..'%s')", argv[argc-1] + error_offset); + + match_data = pcre2_match_data_create_from_pattern(regex, NULL); + if (!match_data) die("pcre2_match_data_create_from_pattern"); + + filecap = BUFSIZ; + filesize = 0; + file = malloc(filecap); + if (!file) die("mallo:"); + + pos = 0; + lastmatch = 0; + while (true) { + if (BUFSIZ > filecap - filesize) { + filecap *= 2; + file = realloc(file, filecap); + if (!file) die("realloc:"); + } + + n = read(0, file + filesize, BUFSIZ); + if (n < 0) die("read:"); + if (!n) break; + + length = (size_t) n; + offset = 0; + while (offset != length) { + rc = pcre2_match(regex, file, filesize + length, lastmatch, + PCRE2_NOTEMPTY, match_data, NULL); + if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) + pcre2_die(rc, "pcre2_match: "); + + if (rc != PCRE2_ERROR_NOMATCH) { + rc = pcre2_substring_get_bynumber(match_data, + group, &match, &match_size); + if (rc) pcre2_die(rc, "pcre2_substring_get_byname: "); + + ovector = pcre2_get_ovector_pointer(match_data); + if (overlap) { + lastmatch = ovector[0] + 1; + } else { + lastmatch = ovector[1]; + } + + writef("0x%zx:", filesize + ovector[0]); + writeall(match, match_size); + writef("%s", marker); + } else { + break; + } + } + + filesize += length; + } + + pcre2_match_data_free(match_data); + pcre2_code_free(regex); + + return 0; +}