smu.c (17096B)
1#include <ctype.h> 2#include <errno.h> 3#include <stdarg.h> 4#include <stdio.h> 5#include <stdlib.h> 6#include <string.h> 7 8#ifdef __OpenBSD__ 9#include <unistd.h> 10#else 11#define pledge(p1,p2) 0 12#endif 13 14#define LENGTH(x) sizeof(x)/sizeof(x[0]) 15#define ADDC(b,i) if (i % BUFSIZ == 0) { b = realloc(b, (i + BUFSIZ)); if (!b) eprint("realloc"); } b[i] 16 17typedef int (*Parser)(const char *, const char *, int); 18typedef struct { 19 char *search; 20 int process; 21 char *before, *after; 22} Tag; 23 24static int doamp(const char *begin, const char *end, int newblock); /* Parser for & */ 25static int docomment(const char *begin, const char *end, int newblock); /* Parser for html-comments */ 26static int dogtlt(const char *begin, const char *end, int newblock); /* Parser for < and > */ 27static int dohtml(const char *begin, const char *end, int newblock); /* Parser for html */ 28static int dolineprefix(const char *begin, const char *end, int newblock);/* Parser for line prefix tags */ 29static int dolink(const char *begin, const char *end, int newblock); /* Parser for links and images */ 30static int dolist(const char *begin, const char *end, int newblock); /* Parser for lists */ 31static int doparagraph(const char *begin, const char *end, int newblock); /* Parser for paragraphs */ 32static int doreplace(const char *begin, const char *end, int newblock); /* Parser for simple replaces */ 33static int doshortlink(const char *begin, const char *end, int newblock); /* Parser for links and images */ 34static int dosurround(const char *begin, const char *end, int newblock); /* Parser for surrounding tags */ 35static int dounderline(const char *begin, const char *end, int newblock); /* Parser for underline tags */ 36static void *ereallocz(void *p, size_t size); 37static void hprint(const char *begin, const char *end); /* Escapes HTML and prints it to output */ 38static void hprintattr(const char *begin, const char *end); /* Escapes HTML for attributes and prints it to output */ 39static void process(const char *begin, const char *end, int isblock); /* Processes range between begin and end. */ 40 41/* list of parsers */ 42static Parser parsers[] = { dounderline, docomment, dolineprefix, 43 dolist, doparagraph, dogtlt, dosurround, dolink, 44 doshortlink, dohtml, doamp, doreplace }; 45static int lazyimg = 0, nohtml = 0; 46 47static Tag lineprefix[] = { 48 { " ", 0, "<pre><code>", "\n</code></pre>" }, 49 { "\t", 0, "<pre><code>", "\n</code></pre>" }, 50 { ">", 2, "<blockquote>", "</blockquote>" }, 51 { "###### ", 1, "<h6>", "</h6>" }, 52 { "##### ", 1, "<h5>", "</h5>" }, 53 { "#### ", 1, "<h4>", "</h4>" }, 54 { "### ", 1, "<h3>", "</h3>" }, 55 { "## ", 1, "<h2>", "</h2>" }, 56 { "# ", 1, "<h1>", "</h1>" }, 57 { "- - -\n", 1, "<hr />", ""}, 58 { "---\n", 1, "<hr />", ""}, 59}; 60 61static Tag underline[] = { 62 { "=", 1, "<h1>", "</h1>\n" }, 63 { "-", 1, "<h2>", "</h2>\n" }, 64}; 65 66static Tag surround[] = { 67 { "``", 0, "<code>", "</code>" }, 68 { "`", 0, "<code>", "</code>" }, 69 { "___", 1, "<strong><em>", "</em></strong>" }, 70 { "***", 1, "<strong><em>", "</em></strong>" }, 71 { "__", 1, "<strong>", "</strong>" }, 72 { "**", 1, "<strong>", "</strong>" }, 73 { "_", 1, "<em>", "</em>" }, 74 { "*", 1, "<em>", "</em>" }, 75}; 76 77static const char *replace[][2] = { 78 { "\\\\", "\\" }, 79 { "\\`", "`" }, 80 { "\\*", "*" }, 81 { "\\_", "_" }, 82 { "\\{", "{" }, 83 { "\\}", "}" }, 84 { "\\[", "[" }, 85 { "\\]", "]" }, 86 { "\\(", "(" }, 87 { "\\)", ")" }, 88 { "\\#", "#" }, 89 { "\\+", "+" }, 90 { "\\-", "-" }, 91 { "\\.", "." }, 92 { "\\!", "!" }, 93}; 94 95static const char *insert[][2] = { 96 { " \n", "<br />" }, 97}; 98 99void 100eprint(const char *format, ...) 101{ 102 va_list ap; 103 104 va_start(ap, format); 105 vfprintf(stderr, format, ap); 106 va_end(ap); 107 exit(1); 108} 109 110int 111doamp(const char *begin, const char *end, int newblock) 112{ 113 const char *p; 114 115 if (*begin != '&') 116 return 0; 117 if (!nohtml) { 118 for (p = begin + 1; p != end && !strchr("; \\\n\t", *p); p++) 119 ; 120 if (p == end || *p == ';') 121 return 0; 122 } 123 fputs("&", stdout); 124 return 1; 125} 126 127int 128dogtlt(const char *begin, const char *end, int newblock) 129{ 130 int brpos; 131 char c; 132 133 if (nohtml || begin + 1 >= end) 134 return 0; 135 brpos = begin[1] == '>'; 136 if (!brpos && *begin != '<') 137 return 0; 138 c = begin[brpos ? 0 : 1]; 139 if (!brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) { 140 fputs("<", stdout); 141 return 1; 142 } else if (brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && !strchr("/\"'", c)) { 143 fprintf(stdout, "%c>",c); 144 return 2; 145 } 146 return 0; 147} 148 149int 150docomment(const char *begin, const char *end, int newblock) 151{ 152 char *p; 153 154 if (nohtml || strncmp("<!--", begin, 4)) 155 return 0; 156 p = strstr(begin, "-->"); 157 if (!p || p + 3 >= end) 158 return 0; 159 fprintf(stdout, "%.*s\n", (int)(p + 3 - begin), begin); 160 return (p + 3 - begin) * (newblock ? -1 : 1); 161} 162 163int 164dohtml(const char *begin, const char *end, int newblock) 165{ 166 const char *p, *tag, *tagend; 167 168 if (nohtml || begin + 2 >= end) 169 return 0; 170 p = begin; 171 if (p[0] != '<' || !isalpha((unsigned char)p[1])) 172 return 0; 173 p++; 174 tag = p; 175 for (; isalnum((unsigned char)*p) && p < end; p++) 176 ; 177 tagend = p; 178 if (p > end || tag == tagend) 179 return 0; 180 while ((p = strstr(p, "</")) && p < end) { 181 p += 2; 182 if (strncmp(p, tag, tagend - tag) == 0 && p[tagend - tag] == '>') { 183 p++; 184 fwrite(begin, 1, p - begin + tagend - tag + 1, stdout); 185 return p - begin + tagend - tag + 1; 186 } 187 } 188 if ((p = strchr(tagend, '>'))) { 189 fwrite(begin, 1, p - begin + 2, stdout); 190 return p - begin + 2; 191 } else { 192 return 0; 193 } 194} 195 196int 197dolineprefix(const char *begin, const char *end, int newblock) 198{ 199 unsigned int i, j, l; 200 char *buffer; 201 const char *p; 202 203 if (newblock) 204 p = begin; 205 else if (*begin == '\n') 206 p = begin + 1; 207 else 208 return 0; 209 for (i = 0; i < LENGTH(lineprefix); i++) { 210 l = strlen(lineprefix[i].search); 211 if (end - p < l) 212 continue; 213 if (strncmp(lineprefix[i].search, p, l)) 214 continue; 215 if (*begin == '\n') 216 putc('\n', stdout); 217 fputs(lineprefix[i].before, stdout); 218 if (lineprefix[i].search[l-1] == '\n') { 219 putc('\n', stdout); 220 return l - 1; 221 } 222 if (!(buffer = malloc(BUFSIZ))) 223 eprint("malloc"); 224 buffer[0] = '\0'; 225 226 /* Collect lines into buffer while they start with the prefix */ 227 j = 0; 228 while ((strncmp(lineprefix[i].search, p, l) == 0) && p + l < end) { 229 p += l; 230 231 /* Special case for blockquotes: optional space after > */ 232 if (lineprefix[i].search[0] == '>' && *p == ' ') { 233 p++; 234 } 235 236 while (p < end) { 237 ADDC(buffer, j) = *p; 238 j++; 239 if (*(p++) == '\n') 240 break; 241 } 242 } 243 244 /* Skip empty lines in block */ 245 while (*(buffer + j - 1) == '\n') 246 j--; 247 248 ADDC(buffer, j) = '\0'; 249 if (lineprefix[i].process) 250 process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2); 251 else 252 hprint(buffer, buffer + strlen(buffer)); 253 puts(lineprefix[i].after); 254 free(buffer); 255 return -(p - begin); 256 } 257 return 0; 258} 259 260int 261dolink(const char *begin, const char *end, int newblock) 262{ 263 long width = 0, height = 0; 264 int img, len, parens_depth = 1; 265 char *numend; 266 const char *desc, *link, *p, *q, *descend, *linkend; 267 const char *title = NULL, *titleend = NULL; 268 269 if (*begin == '[') 270 img = 0; 271 else if (strncmp(begin, "![", 2) == 0) 272 img = 1; 273 else 274 return 0; 275 p = desc = begin + 1 + img; 276 if (!(p = strstr(desc, "](")) || p > end) 277 return 0; 278 for (q = strstr(desc, "!["); q && q < end && q < p; q = strstr(q + 1, "![")) 279 if (!(p = strstr(p + 1, "](")) || p > end) 280 return 0; 281 descend = p; 282 link = p + 2; 283 284 /* find end of link while handling nested parens */ 285 q = link; 286 while (parens_depth) { 287 if (!(q = strpbrk(q, "()")) || q > end) 288 return 0; 289 if (*q == '(') 290 parens_depth++; 291 else 292 parens_depth--; 293 if (parens_depth && q < end) 294 q++; 295 } 296 297 linkend = q; 298 if (*link == '<' && *(linkend - 1) == '>') { 299 link++; 300 linkend--; 301 } else { 302 /* trim leading spaces */ 303 for (p = link; p < q && isspace((unsigned char)*p); p++) 304 ; 305 306 for (link = p; p < q; p++) { 307 if (*p == '=' && img && p != link && 308 isspace((unsigned char)p[-1])) { 309 /* image dimensions */ 310 linkend = p; 311 width = strtol(++p, &numend, 10); 312 p = numend; 313 if (*numend == 'x') 314 height = strtol(++p, &numend, 10); 315 } else if ((*p == '\'' || *p == '"') && p != link && 316 isspace((unsigned char)p[-1])) { 317 /* title attribute: for links and images */ 318 linkend = p; 319 title = ++p; 320 if ((titleend = strchr(title, *(p - 1)))) { 321 if (titleend >= q) 322 titleend = q; 323 else 324 p = titleend; 325 } 326 } 327 } 328 329 /* trim trailing spaces from link */ 330 for (; linkend > link && isspace((unsigned char)linkend[-1]); linkend--) 331 ; 332 } 333 334 len = q + 1 - begin; 335 if (img) { 336 fputs("<img src=\"", stdout); 337 hprintattr(link, linkend); 338 fputs("\" alt=\"", stdout); 339 hprintattr(desc, descend); 340 fputs("\" ", stdout); 341 if (title && titleend && title != titleend) { 342 fputs("title=\"", stdout); 343 hprintattr(title, titleend); 344 fputs("\" ", stdout); 345 } 346 if (width > 0) 347 printf("width=\"%ld\" ", width); 348 if (height > 0) 349 printf("height=\"%ld\" ", height); 350 if (width > 0 && height > 0 && lazyimg) 351 fputs("loading=\"lazy\" ", stdout); 352 fputs("/>", stdout); 353 } else { 354 fputs("<a href=\"", stdout); 355 hprintattr(link, linkend); 356 fputs("\"", stdout); 357 if (title && titleend && title != titleend) { 358 fputs(" title=\"", stdout); 359 hprintattr(title, titleend); 360 fputs("\"", stdout); 361 } 362 fputs(">", stdout); 363 process(desc, descend, 0); 364 fputs("</a>", stdout); 365 } 366 return len; 367} 368 369int 370dolist(const char *begin, const char *end, int newblock) 371{ 372 unsigned int i, j, indent, run, ul, isblock; 373 const char *p, *q; 374 char *buffer = NULL; 375 char marker; 376 377 isblock = 0; 378 if (newblock) 379 p = begin; 380 else if (*begin == '\n') 381 p = begin + 1; 382 else 383 return 0; 384 q = p; 385 if (*p == '-' || *p == '*' || *p == '+') { 386 ul = 1; 387 marker = *p; 388 } else { 389 ul = 0; 390 for (; p < end && *p >= '0' && *p <= '9'; p++) 391 ; 392 if (p >= end || *p != '.') 393 return 0; 394 } 395 p++; 396 if (p >= end || !(*p == ' ' || *p == '\t')) 397 return 0; 398 for (p++; p != end && (*p == ' ' || *p == '\t'); p++) 399 ; 400 indent = p - q; 401 buffer = ereallocz(buffer, BUFSIZ); 402 if (!newblock) 403 putc('\n', stdout); 404 fputs(ul ? "<ul>\n" : "<ol>\n", stdout); 405 run = 1; 406 for (; p < end && run; p++) { 407 for (i = 0; p < end && run; p++, i++) { 408 if (*p == '\n') { 409 if (p + 1 == end) { 410 break; 411 } else { 412 /* Handle empty lines */ 413 for (q = p + 1; (*q == ' ' || *q == '\t') && q < end; q++) 414 ; 415 if (*q == '\n') { 416 ADDC(buffer, i) = '\n'; 417 i++; 418 run = 0; 419 isblock++; 420 p = q; 421 } 422 } 423 q = p + 1; 424 j = 0; 425 if (ul && *q == marker) { 426 j = 1; 427 } else if (!ul) { 428 for (; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++) 429 ; 430 if (q + j == end) 431 break; 432 if (j > 0 && q[j] == '.') 433 j++; 434 else 435 j = 0; 436 } 437 if (q + indent < end) 438 for (; (q[j] == ' ' || q[j] == '\t') && j < indent; j++) 439 ; 440 if (j == indent) { 441 ADDC(buffer, i) = '\n'; 442 i++; 443 p += indent; 444 run = 1; 445 if (*q == ' ' || *q == '\t') 446 p++; 447 else 448 break; 449 } else if (j < indent) { 450 run = 0; 451 } 452 } 453 ADDC(buffer, i) = *p; 454 } 455 ADDC(buffer, i) = '\0'; 456 fputs("<li>", stdout); 457 process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run)); 458 fputs("</li>\n", stdout); 459 } 460 fputs(ul ? "</ul>\n" : "</ol>\n", stdout); 461 free(buffer); 462 p--; 463 while (*(--p) == '\n') 464 ; 465 466 return -(p - begin + 1); 467} 468 469int 470doparagraph(const char *begin, const char *end, int newblock) 471{ 472 const char *p; 473 474 if (!newblock) 475 return 0; 476 p = strstr(begin, "\n\n"); 477 if (!p || p > end) 478 p = end; 479 if (p - begin <= 1) 480 return 0; 481 fputs("<p>", stdout); 482 process(begin, p, 0); 483 fputs("</p>\n", stdout); 484 485 return -(p - begin); 486} 487 488int 489doreplace(const char *begin, const char *end, int newblock) 490{ 491 unsigned int i, l; 492 493 for (i = 0; i < LENGTH(insert); i++) 494 if (strncmp(insert[i][0], begin, strlen(insert[i][0])) == 0) 495 fputs(insert[i][1], stdout); 496 for (i = 0; i < LENGTH(replace); i++) { 497 l = strlen(replace[i][0]); 498 if (end - begin < l) 499 continue; 500 if (strncmp(replace[i][0], begin, l) == 0) { 501 fputs(replace[i][1], stdout); 502 return l; 503 } 504 } 505 return 0; 506} 507 508int 509doshortlink(const char *begin, const char *end, int newblock) 510{ 511 const char *p, *c; 512 int ismail = 0; 513 514 if (*begin != '<') 515 return 0; 516 for (p = begin + 1; p != end; p++) { 517 switch(*p) { 518 case ' ': 519 case '\t': 520 case '\n': 521 return 0; 522 case '#': 523 case ':': 524 ismail = -1; 525 break; 526 case '@': 527 if (ismail == 0) 528 ismail = 1; 529 break; 530 case '>': 531 if (ismail == 0) 532 return 0; 533 fputs("<a href=\"", stdout); 534 if (ismail == 1) { 535 /* mailto: */ 536 fputs("mailto:", stdout); 537 for (c = begin + 1; *c != '>'; c++) 538 fprintf(stdout, "&#%u;", *c); 539 fputs("\">", stdout); 540 for (c = begin + 1; *c != '>'; c++) 541 fprintf(stdout, "&#%u;", *c); 542 } else { 543 hprintattr(begin + 1, p); 544 fputs("\">", stdout); 545 hprint(begin + 1, p); 546 } 547 fputs("</a>", stdout); 548 return p - begin + 1; 549 } 550 } 551 return 0; 552} 553 554int 555dosurround(const char *begin, const char *end, int newblock) 556{ 557 unsigned int i, l; 558 const char *p, *start, *stop; 559 560 for (i = 0; i < LENGTH(surround); i++) { 561 l = strlen(surround[i].search); 562 if (end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0) 563 continue; 564 start = begin + l; 565 p = start - 1; 566 do { 567 stop = p; 568 p = strstr(p + 1, surround[i].search); 569 } while (p && p[-1] == '\\'); 570 if (p && p[-1] != '\\') 571 stop = p; 572 if (!stop || stop < start || stop >= end) 573 continue; 574 fputs(surround[i].before, stdout); 575 576 /* Single space at start and end are ignored */ 577 if (*start == ' ' && *(stop - 1) == ' ') { 578 start++; 579 stop--; 580 l++; 581 } 582 583 if (surround[i].process) 584 process(start, stop, 0); 585 else 586 hprint(start, stop); 587 fputs(surround[i].after, stdout); 588 return stop - begin + l; 589 } 590 return 0; 591} 592 593int 594dounderline(const char *begin, const char *end, int newblock) 595{ 596 unsigned int i, j, l; 597 const char *p; 598 599 if (!newblock) 600 return 0; 601 p = begin; 602 for (l = 0; p + l != end && p[l] != '\n'; l++) 603 ; 604 p += l + 1; 605 if (l == 0) 606 return 0; 607 for (i = 0; i < LENGTH(underline); i++) { 608 for (j = 0; p + j != end && p[j] != '\n' && p[j] == underline[i].search[0]; j++) 609 ; 610 if (j == l || (p[j] == '\n' && j > 3)) { 611 fputs(underline[i].before, stdout); 612 if (underline[i].process) 613 process(begin, begin + l, 0); 614 else 615 hprint(begin, begin + l); 616 fputs(underline[i].after, stdout); 617 return -(j + p - begin); 618 } 619 } 620 return 0; 621} 622 623void * 624ereallocz(void *p, size_t size) 625{ 626 void *res; 627 628 res = realloc(p, size); 629 if (!res) 630 eprint("realloc: %zu bytes\n", size); 631 return res; 632} 633 634void 635hprintattr(const char *begin, const char *end) 636{ 637 const char *p; 638 639 for (p = begin; p != end; p++) { 640 if (*p == '&') 641 fputs("&", stdout); 642 else if (*p == '"') 643 fputs(""", stdout); 644 else if (*p == '>') 645 fputs(">", stdout); 646 else if (*p == '<') 647 fputs("<", stdout); 648 else 649 putc(*p, stdout); 650 } 651} 652 653void 654hprint(const char *begin, const char *end) 655{ 656 const char *p; 657 658 for (p = begin; p != end; p++) { 659 if (*p == '&') 660 fputs("&", stdout); 661 else if (*p == '>') 662 fputs(">", stdout); 663 else if (*p == '<') 664 fputs("<", stdout); 665 else 666 putc(*p, stdout); 667 } 668} 669 670void 671process(const char *begin, const char *end, int newblock) 672{ 673 const char *p, *q; 674 int affected; 675 unsigned int i; 676 677 for (p = begin; p < end;) { 678 if (newblock) 679 while (*p == '\n') 680 if (++p == end) 681 return; 682 affected = 0; 683 for (i = 0; i < LENGTH(parsers) && !affected; i++) 684 affected = parsers[i](p, end, newblock); 685 p += abs(affected); 686 if (!affected) { 687 if (nohtml) 688 hprint(p, p + 1); 689 else 690 putc(*p, stdout); 691 p++; 692 } 693 for (q = p; q != end && *q == '\n'; q++) 694 ; 695 if (q == end) 696 return; 697 else if (p[0] == '\n' && p + 1 != end && p[1] == '\n') 698 newblock = 1; 699 else 700 newblock = affected < 0; 701 } 702} 703 704void 705usage(char **argv) 706{ 707 eprint("usage: %s [-l] [-n] [file]\n", argv[0]); 708} 709 710int 711main(int argc, char *argv[]) 712{ 713 FILE *source = stdin; 714 char *buffer = NULL; 715 int s, i; 716 unsigned long len, bsize; 717 718 for (i = 1; i < argc; i++) { 719 if (!strcmp("-v", argv[i])) { 720 eprint("smu v%s\n", VERSION); 721 } else if (!strcmp("-n", argv[i])) { 722 nohtml = 1; 723 } else if (!strcmp("-l", argv[i])) { 724 lazyimg = 1; 725 } else if (argv[i][0] != '-') { 726 break; /* file specified */ 727 } else if (!strcmp("--", argv[i])) { 728 i++; 729 break; 730 } else { 731 usage(argv); 732 } 733 } 734 if (i < argc && !(source = fopen(argv[i], "r"))) 735 eprint("fopen: %s: %s\n", argv[i], strerror(errno)); 736 737 if (pledge("stdio", NULL) == -1) 738 eprint("pledge"); 739 740 bsize = 2 * BUFSIZ; 741 buffer = ereallocz(buffer, bsize); 742 len = 0; 743 while ((s = fread(buffer + len, 1, BUFSIZ, source))) { 744 len += s; 745 if (BUFSIZ + len + 1 > bsize) { 746 bsize += BUFSIZ; 747 if (!(buffer = realloc(buffer, bsize))) 748 eprint("realloc"); 749 } 750 } 751 buffer[len] = '\0'; 752 process(buffer, buffer + len, 1); 753 free(buffer); 754 if (source != stdin) 755 fclose(source); 756 757 return 0; 758}