diff options
Diffstat (limited to 'bgrep.c')
| -rw-r--r-- | bgrep.c | 201 |
1 files changed, 201 insertions, 0 deletions
@@ -0,0 +1,201 @@ +#define PCRE2_CODE_UNIT_WIDTH 8 +#include <pcre2.h> + +#include <unistd.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <stdbool.h> +#include <stdlib.h> + +static const char *marker = "\n"; +static const char *pattern = NULL; +static bool overlap = false; +static size_t group = 0; + +static void +die(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fputs("bgrep: ", stderr); + vfprintf(stderr, fmt, ap); + if (fmt[strlen(fmt)-1] == ':') { + perror(NULL); + } else { + putc('\n', stderr); + } + va_end(ap); + + exit(1); +} + +static void +pcre2_die(int rc, const char *fmt, ...) +{ + char error_buf[256]; + va_list ap; + + pcre2_get_error_message(rc, (uint8_t *) error_buf, sizeof(error_buf)); + + va_start(ap, fmt); + fputs("bgrep: ", stderr); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "%s\n", error_buf); + va_end(ap); + + exit(1); +} + +static void +writeall(uint8_t *data, size_t size) +{ + size_t sent; + ssize_t n; + + sent = 0; + while (sent != size) { + n = write(1, data + sent, size - sent); + if (n < 0) die("write:"); + if (!n) break; + sent += (size_t) n; + } +} + +static void +writef(const char *fmt, ...) +{ + static void *buf = NULL; + static size_t bufsize = 0; + va_list ap; + va_list cpy; + size_t size; + + va_start(ap, fmt); + va_copy(cpy, ap); + size = vsnprintf(NULL, 0, fmt, ap); + if (!bufsize || size + 1 > bufsize) { + bufsize = size + 1; + buf = realloc(buf, bufsize); + if (!buf) die("realloc:"); + } + va_end(ap); + + va_start(cpy, fmt); + vsnprintf(buf, bufsize, fmt, cpy); + va_end(cpy); + + writeall(buf, size); +} + +void +parse(int argc, const char **argv) +{ + const char **arg; + + if (argc < 2) goto usage; + + for (arg = argv + 1; *arg; arg++) { + if (!strcmp(*arg, "-h") || !strcmp(*arg, "--help")) { + goto usage; + } else if (!strcmp(*arg, "-g") || !strcmp(*arg, "--group")) { + group = strtoul(*++arg, NULL, 0); + } else if (!strcmp(*arg, "-m") || !strcmp(*arg, "--marker")) { + marker = *++arg; + } else if (!strcmp(*arg, "-o") || !strcmp(*arg, "--overlap")) { + overlap = true; + } else { + if (pattern) goto usage; + pattern = *arg; + } + } + return; + +usage: + fprintf(stderr, "Usage: bgrep [-g GROUP] [-m MARKER] REGEX\n"); + exit(1); +} + +int +main(int argc, const char **argv) +{ + pcre2_match_data *match_data; + pcre2_code *regex = NULL; + uint8_t *match; + uint8_t *file; + size_t *ovector; + size_t filesize; + size_t filecap; + size_t match_size; + size_t error_offset = 0; + size_t offset; + size_t length; + size_t pos; + size_t lastmatch; + ssize_t n; + int rc; + + parse(argc, argv); + + regex = pcre2_compile((PCRE2_SPTR8)pattern, strlen(pattern), + PCRE2_MATCH_INVALID_UTF, &rc, &error_offset, NULL); + if (!regex) pcre2_die(rc, "bad regex (..'%s')", argv[argc-1] + error_offset); + + match_data = pcre2_match_data_create_from_pattern(regex, NULL); + if (!match_data) die("pcre2_match_data_create_from_pattern"); + + filecap = BUFSIZ; + filesize = 0; + file = malloc(filecap); + if (!file) die("mallo:"); + + pos = 0; + lastmatch = 0; + while (true) { + if (BUFSIZ > filecap - filesize) { + filecap *= 2; + file = realloc(file, filecap); + if (!file) die("realloc:"); + } + + n = read(0, file + filesize, BUFSIZ); + if (n < 0) die("read:"); + if (!n) break; + + length = (size_t) n; + offset = 0; + while (offset != length) { + rc = pcre2_match(regex, file, filesize + length, lastmatch, + PCRE2_NOTEMPTY, match_data, NULL); + if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) + pcre2_die(rc, "pcre2_match: "); + + if (rc != PCRE2_ERROR_NOMATCH) { + rc = pcre2_substring_get_bynumber(match_data, + group, &match, &match_size); + if (rc) pcre2_die(rc, "pcre2_substring_get_byname: "); + + ovector = pcre2_get_ovector_pointer(match_data); + if (overlap) { + lastmatch = ovector[0] + 1; + } else { + lastmatch = ovector[1]; + } + + writef("0x%zx:", filesize + ovector[0]); + writeall(match, match_size); + writef("%s", marker); + } else { + break; + } + } + + filesize += length; + } + + pcre2_match_data_free(match_data); + pcre2_code_free(regex); + + return 0; +} |
