bgrep.c (3981B)
1#define PCRE2_CODE_UNIT_WIDTH 8 2 3#include <pcre2.h> 4 5#include <unistd.h> 6 7#include <stdio.h> 8#include <stdarg.h> 9#include <string.h> 10#include <stdbool.h> 11#include <stdlib.h> 12 13static const char *marker = ""; 14static const char *pattern = NULL; 15static bool overlap = false; 16static size_t group = 0; 17 18static void 19die(const char *fmt, ...) 20{ 21 va_list ap; 22 23 va_start(ap, fmt); 24 fputs("bgrep: ", stderr); 25 vfprintf(stderr, fmt, ap); 26 if (fmt[strlen(fmt)-1] == ':') { 27 perror(NULL); 28 } else { 29 putc('\n', stderr); 30 } 31 va_end(ap); 32 33 exit(1); 34} 35 36static void 37pcre2_die(int rc, const char *fmt, ...) 38{ 39 char error_buf[256]; 40 va_list ap; 41 42 pcre2_get_error_message(rc, (uint8_t *) error_buf, sizeof(error_buf)); 43 44 va_start(ap, fmt); 45 fputs("bgrep: ", stderr); 46 vfprintf(stderr, fmt, ap); 47 fprintf(stderr, "%s\n", error_buf); 48 va_end(ap); 49 50 exit(1); 51} 52 53static void 54writeall(uint8_t *data, size_t size) 55{ 56 size_t sent; 57 ssize_t n; 58 59 sent = 0; 60 while (sent != size) { 61 n = write(1, data + sent, size - sent); 62 if (n < 0) die("write:"); 63 if (!n) break; 64 sent += (size_t) n; 65 } 66} 67 68static void 69writef(const char *fmt, ...) 70{ 71 static void *buf = NULL; 72 static size_t bufsize = 0; 73 va_list ap; 74 va_list cpy; 75 size_t size; 76 77 va_start(ap, fmt); 78 va_copy(cpy, ap); 79 size = vsnprintf(NULL, 0, fmt, ap); 80 if (!bufsize || size + 1 > bufsize) { 81 bufsize = size + 1; 82 buf = realloc(buf, bufsize); 83 if (!buf) die("realloc:"); 84 } 85 va_end(ap); 86 87 va_start(cpy, fmt); 88 vsnprintf(buf, bufsize, fmt, cpy); 89 va_end(cpy); 90 91 writeall(buf, size); 92} 93 94static void 95parse(int argc, const char **argv) 96{ 97 const char **arg; 98 99 if (argc < 2) goto usage; 100 101 for (arg = argv + 1; *arg; arg++) { 102 if (!strcmp(*arg, "-h") || !strcmp(*arg, "--help")) { 103 goto usage; 104 } else if (!strcmp(*arg, "-g") || !strcmp(*arg, "--group")) { 105 group = strtoul(*++arg, NULL, 0); 106 } else if (!strcmp(*arg, "-m") || !strcmp(*arg, "--marker")) { 107 marker = *++arg; 108 } else if (!strcmp(*arg, "-o") || !strcmp(*arg, "--overlap")) { 109 overlap = true; 110 } else { 111 if (pattern) goto usage; 112 pattern = *arg; 113 } 114 } 115 return; 116 117usage: 118 fprintf(stderr, "Usage: bgrep [-g GROUP] [-m MARKER] REGEX\n"); 119 exit(1); 120} 121 122int 123main(int argc, const char **argv) 124{ 125 pcre2_match_data *match_data; 126 pcre2_code *regex = NULL; 127 uint8_t *match; 128 uint8_t *file; 129 size_t *ovector; 130 size_t filesize; 131 size_t filecap; 132 size_t match_size; 133 size_t error_offset = 0; 134 size_t offset; 135 size_t length; 136 size_t pos; 137 size_t lastmatch; 138 ssize_t n; 139 int rc; 140 141 parse(argc, argv); 142 143 regex = pcre2_compile((PCRE2_SPTR8)pattern, strlen(pattern), 144 PCRE2_MATCH_INVALID_UTF, &rc, &error_offset, NULL); 145 if (!regex) pcre2_die(rc, "bad regex (..'%s')", argv[argc-1] + error_offset); 146 147 match_data = pcre2_match_data_create_from_pattern(regex, NULL); 148 if (!match_data) die("pcre2_match_data_create_from_pattern"); 149 150 filecap = BUFSIZ; 151 filesize = 0; 152 file = malloc(filecap); 153 if (!file) die("malloc:"); 154 155 pos = 0; 156 lastmatch = 0; 157 while (true) { 158 if (BUFSIZ > filecap - filesize) { 159 filecap *= 2; 160 file = realloc(file, filecap); 161 if (!file) die("realloc:"); 162 } 163 164 n = read(0, file + filesize, BUFSIZ); 165 if (n < 0) die("read:"); 166 if (!n) break; 167 168 length = (size_t) n; 169 offset = 0; 170 while (offset != length) { 171 rc = pcre2_match(regex, file, filesize + length, lastmatch, 172 PCRE2_NOTEMPTY, match_data, NULL); 173 if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) 174 pcre2_die(rc, "pcre2_match: "); 175 176 if (rc != PCRE2_ERROR_NOMATCH) { 177 rc = pcre2_substring_get_bynumber(match_data, 178 group, &match, &match_size); 179 if (rc) pcre2_die(rc, "pcre2_substring_get_byname: "); 180 181 ovector = pcre2_get_ovector_pointer(match_data); 182 if (overlap) { 183 lastmatch = ovector[0] + 1; 184 } else { 185 lastmatch = ovector[1]; 186 } 187 188 writef("%s:0x%zx:", marker, filesize + ovector[0]); 189 writeall(match, match_size); 190 writef("\n"); 191 } else { 192 break; 193 } 194 } 195 196 filesize += length; 197 } 198 199 pcre2_match_data_free(match_data); 200 pcre2_code_free(regex); 201 202 return 0; 203}