#define PCRE2_CODE_UNIT_WIDTH 8 #include #include #include #include #include #include #include static const char *marker = ""; static const char *pattern = NULL; static bool overlap = false; static size_t group = 0; static void die(const char *fmt, ...) { va_list ap; va_start(ap, fmt); fputs("bgrep: ", stderr); vfprintf(stderr, fmt, ap); if (fmt[strlen(fmt)-1] == ':') { perror(NULL); } else { putc('\n', stderr); } va_end(ap); exit(1); } static void pcre2_die(int rc, const char *fmt, ...) { char error_buf[256]; va_list ap; pcre2_get_error_message(rc, (uint8_t *) error_buf, sizeof(error_buf)); va_start(ap, fmt); fputs("bgrep: ", stderr); vfprintf(stderr, fmt, ap); fprintf(stderr, "%s\n", error_buf); va_end(ap); exit(1); } static void writeall(uint8_t *data, size_t size) { size_t sent; ssize_t n; sent = 0; while (sent != size) { n = write(1, data + sent, size - sent); if (n < 0) die("write:"); if (!n) break; sent += (size_t) n; } } static void writef(const char *fmt, ...) { static void *buf = NULL; static size_t bufsize = 0; va_list ap; va_list cpy; size_t size; va_start(ap, fmt); va_copy(cpy, ap); size = vsnprintf(NULL, 0, fmt, ap); if (!bufsize || size + 1 > bufsize) { bufsize = size + 1; buf = realloc(buf, bufsize); if (!buf) die("realloc:"); } va_end(ap); va_start(cpy, fmt); vsnprintf(buf, bufsize, fmt, cpy); va_end(cpy); writeall(buf, size); } static void parse(int argc, const char **argv) { const char **arg; if (argc < 2) goto usage; for (arg = argv + 1; *arg; arg++) { if (!strcmp(*arg, "-h") || !strcmp(*arg, "--help")) { goto usage; } else if (!strcmp(*arg, "-g") || !strcmp(*arg, "--group")) { group = strtoul(*++arg, NULL, 0); } else if (!strcmp(*arg, "-m") || !strcmp(*arg, "--marker")) { marker = *++arg; } else if (!strcmp(*arg, "-o") || !strcmp(*arg, "--overlap")) { overlap = true; } else { if (pattern) goto usage; pattern = *arg; } } return; usage: fprintf(stderr, "Usage: bgrep [-g GROUP] [-m MARKER] REGEX\n"); exit(1); } int main(int argc, const char **argv) { pcre2_match_data *match_data; pcre2_code *regex = NULL; uint8_t *match; uint8_t *file; size_t *ovector; size_t filesize; size_t filecap; size_t match_size; size_t error_offset = 0; size_t offset; size_t length; size_t pos; size_t lastmatch; ssize_t n; int rc; parse(argc, argv); regex = pcre2_compile((PCRE2_SPTR8)pattern, strlen(pattern), PCRE2_MATCH_INVALID_UTF, &rc, &error_offset, NULL); if (!regex) pcre2_die(rc, "bad regex (..'%s')", argv[argc-1] + error_offset); match_data = pcre2_match_data_create_from_pattern(regex, NULL); if (!match_data) die("pcre2_match_data_create_from_pattern"); filecap = BUFSIZ; filesize = 0; file = malloc(filecap); if (!file) die("malloc:"); pos = 0; lastmatch = 0; while (true) { if (BUFSIZ > filecap - filesize) { filecap *= 2; file = realloc(file, filecap); if (!file) die("realloc:"); } n = read(0, file + filesize, BUFSIZ); if (n < 0) die("read:"); if (!n) break; length = (size_t) n; offset = 0; while (offset != length) { rc = pcre2_match(regex, file, filesize + length, lastmatch, PCRE2_NOTEMPTY, match_data, NULL); if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) pcre2_die(rc, "pcre2_match: "); if (rc != PCRE2_ERROR_NOMATCH) { rc = pcre2_substring_get_bynumber(match_data, group, &match, &match_size); if (rc) pcre2_die(rc, "pcre2_substring_get_byname: "); ovector = pcre2_get_ovector_pointer(match_data); if (overlap) { lastmatch = ovector[0] + 1; } else { lastmatch = ovector[1]; } writef("%s:0x%zx:", marker, filesize + ovector[0]); writeall(match, match_size); writef("\n"); } else { break; } } filesize += length; } pcre2_match_data_free(match_data); pcre2_code_free(regex); return 0; }