commit be2d017c214f92a22d1f24520f841ea1180c9ab4
Author: Louis Burda <quent.burda@gmail.com>
Date: Wed, 3 Apr 2024 01:47:43 +0200
Initial version
Diffstat:
A | .gitignore | | | 1 | + |
A | LICENSE | | | 21 | +++++++++++++++++++++ |
A | Makefile | | | 19 | +++++++++++++++++++ |
A | bgrep.c | | | 201 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
4 files changed, 242 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+bgrep
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Louis Burda
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,19 @@
+PREFIX ?= /usr/local
+BINDIR ?= /bin
+
+LDLIBS = -lpcre2-8
+
+all: bgrep
+
+clean:
+ rm -f bgrep
+
+bgrep: bgrep.c
+
+install:
+ install bgrep -t "$(DESTDIR)$(PREFIX)$(BINDIR)"
+
+uninstall:
+ rm -f "$(DESTDIR)$(PREFIX)$(BINDIR)/bgrep"
+
+.PHONY: all clean install uninstall
diff --git a/bgrep.c b/bgrep.c
@@ -0,0 +1,201 @@
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+static const char *marker = "\n";
+static const char *pattern = NULL;
+static bool overlap = false;
+static size_t group = 0;
+
+static void
+die(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ fputs("bgrep: ", stderr);
+ vfprintf(stderr, fmt, ap);
+ if (fmt[strlen(fmt)-1] == ':') {
+ perror(NULL);
+ } else {
+ putc('\n', stderr);
+ }
+ va_end(ap);
+
+ exit(1);
+}
+
+static void
+pcre2_die(int rc, const char *fmt, ...)
+{
+ char error_buf[256];
+ va_list ap;
+
+ pcre2_get_error_message(rc, (uint8_t *) error_buf, sizeof(error_buf));
+
+ va_start(ap, fmt);
+ fputs("bgrep: ", stderr);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "%s\n", error_buf);
+ va_end(ap);
+
+ exit(1);
+}
+
+static void
+writeall(uint8_t *data, size_t size)
+{
+ size_t sent;
+ ssize_t n;
+
+ sent = 0;
+ while (sent != size) {
+ n = write(1, data + sent, size - sent);
+ if (n < 0) die("write:");
+ if (!n) break;
+ sent += (size_t) n;
+ }
+}
+
+static void
+writef(const char *fmt, ...)
+{
+ static void *buf = NULL;
+ static size_t bufsize = 0;
+ va_list ap;
+ va_list cpy;
+ size_t size;
+
+ va_start(ap, fmt);
+ va_copy(cpy, ap);
+ size = vsnprintf(NULL, 0, fmt, ap);
+ if (!bufsize || size + 1 > bufsize) {
+ bufsize = size + 1;
+ buf = realloc(buf, bufsize);
+ if (!buf) die("realloc:");
+ }
+ va_end(ap);
+
+ va_start(cpy, fmt);
+ vsnprintf(buf, bufsize, fmt, cpy);
+ va_end(cpy);
+
+ writeall(buf, size);
+}
+
+void
+parse(int argc, const char **argv)
+{
+ const char **arg;
+
+ if (argc < 2) goto usage;
+
+ for (arg = argv + 1; *arg; arg++) {
+ if (!strcmp(*arg, "-h") || !strcmp(*arg, "--help")) {
+ goto usage;
+ } else if (!strcmp(*arg, "-g") || !strcmp(*arg, "--group")) {
+ group = strtoul(*++arg, NULL, 0);
+ } else if (!strcmp(*arg, "-m") || !strcmp(*arg, "--marker")) {
+ marker = *++arg;
+ } else if (!strcmp(*arg, "-o") || !strcmp(*arg, "--overlap")) {
+ overlap = true;
+ } else {
+ if (pattern) goto usage;
+ pattern = *arg;
+ }
+ }
+ return;
+
+usage:
+ fprintf(stderr, "Usage: bgrep [-g GROUP] [-m MARKER] REGEX\n");
+ exit(1);
+}
+
+int
+main(int argc, const char **argv)
+{
+ pcre2_match_data *match_data;
+ pcre2_code *regex = NULL;
+ uint8_t *match;
+ uint8_t *file;
+ size_t *ovector;
+ size_t filesize;
+ size_t filecap;
+ size_t match_size;
+ size_t error_offset = 0;
+ size_t offset;
+ size_t length;
+ size_t pos;
+ size_t lastmatch;
+ ssize_t n;
+ int rc;
+
+ parse(argc, argv);
+
+ regex = pcre2_compile((PCRE2_SPTR8)pattern, strlen(pattern),
+ PCRE2_MATCH_INVALID_UTF, &rc, &error_offset, NULL);
+ if (!regex) pcre2_die(rc, "bad regex (..'%s')", argv[argc-1] + error_offset);
+
+ match_data = pcre2_match_data_create_from_pattern(regex, NULL);
+ if (!match_data) die("pcre2_match_data_create_from_pattern");
+
+ filecap = BUFSIZ;
+ filesize = 0;
+ file = malloc(filecap);
+ if (!file) die("mallo:");
+
+ pos = 0;
+ lastmatch = 0;
+ while (true) {
+ if (BUFSIZ > filecap - filesize) {
+ filecap *= 2;
+ file = realloc(file, filecap);
+ if (!file) die("realloc:");
+ }
+
+ n = read(0, file + filesize, BUFSIZ);
+ if (n < 0) die("read:");
+ if (!n) break;
+
+ length = (size_t) n;
+ offset = 0;
+ while (offset != length) {
+ rc = pcre2_match(regex, file, filesize + length, lastmatch,
+ PCRE2_NOTEMPTY, match_data, NULL);
+ if (rc < 0 && rc != PCRE2_ERROR_NOMATCH)
+ pcre2_die(rc, "pcre2_match: ");
+
+ if (rc != PCRE2_ERROR_NOMATCH) {
+ rc = pcre2_substring_get_bynumber(match_data,
+ group, &match, &match_size);
+ if (rc) pcre2_die(rc, "pcre2_substring_get_byname: ");
+
+ ovector = pcre2_get_ovector_pointer(match_data);
+ if (overlap) {
+ lastmatch = ovector[0] + 1;
+ } else {
+ lastmatch = ovector[1];
+ }
+
+ writef("0x%zx:", filesize + ovector[0]);
+ writeall(match, match_size);
+ writef("%s", marker);
+ } else {
+ break;
+ }
+ }
+
+ filesize += length;
+ }
+
+ pcre2_match_data_free(match_data);
+ pcre2_code_free(regex);
+
+ return 0;
+}