util.c (9301B)
1#include <errno.h> 2#include <stdarg.h> 3#include <stdio.h> 4#include <stdlib.h> 5#include <string.h> 6#include <wchar.h> 7 8#include "util.h" 9 10/* print to stderr, print error message of errno and exit(). 11 Unlike BSD err() it does not prefix __progname */ 12__dead void 13err(int exitstatus, const char *fmt, ...) 14{ 15 va_list ap; 16 int saved_errno; 17 18 saved_errno = errno; 19 20 if (fmt) { 21 va_start(ap, fmt); 22 vfprintf(stderr, fmt, ap); 23 va_end(ap); 24 fputs(": ", stderr); 25 } 26 fprintf(stderr, "%s\n", strerror(saved_errno)); 27 28 exit(exitstatus); 29} 30 31/* print to stderr and exit(). 32 Unlike BSD errx() it does not prefix __progname */ 33__dead void 34errx(int exitstatus, const char *fmt, ...) 35{ 36 va_list ap; 37 38 if (fmt) { 39 va_start(ap, fmt); 40 vfprintf(stderr, fmt, ap); 41 va_end(ap); 42 } 43 fputs("\n", stderr); 44 45 exit(exitstatus); 46} 47 48/* Handle read or write errors for a FILE * stream */ 49void 50checkfileerror(FILE *fp, const char *name, int mode) 51{ 52 if (mode == 'r' && ferror(fp)) 53 errx(1, "read error: %s", name); 54 else if (mode == 'w' && (fflush(fp) || ferror(fp))) 55 errx(1, "write error: %s", name); 56} 57 58/* strcasestr() included for portability */ 59char * 60strcasestr(const char *h, const char *n) 61{ 62 size_t i; 63 64 if (!n[0]) 65 return (char *)h; 66 67 for (; *h; ++h) { 68 for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) == 69 TOLOWER((unsigned char)h[i]); ++i) 70 ; 71 if (n[i] == '\0') 72 return (char *)h; 73 } 74 75 return NULL; 76} 77 78/* Check if string has a non-empty scheme / protocol part. */ 79int 80uri_hasscheme(const char *s) 81{ 82 const char *p = s; 83 84 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) || 85 *p == '+' || *p == '-' || *p == '.'; p++) 86 ; 87 /* scheme, except if empty and starts with ":" then it is a path */ 88 return (*p == ':' && p != s); 89} 90 91/* Parse URI string `s` into an uri structure `u`. 92 Returns 0 on success or -1 on failure */ 93int 94uri_parse(const char *s, struct uri *u) 95{ 96 const char *p = s; 97 char *endptr; 98 size_t i; 99 long l; 100 101 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0'; 102 u->path[0] = u->query[0] = u->fragment[0] = '\0'; 103 104 /* protocol-relative */ 105 if (*p == '/' && *(p + 1) == '/') { 106 p += 2; /* skip "//" */ 107 goto parseauth; 108 } 109 110 /* scheme / protocol part */ 111 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) || 112 *p == '+' || *p == '-' || *p == '.'; p++) 113 ; 114 /* scheme, except if empty and starts with ":" then it is a path */ 115 if (*p == ':' && p != s) { 116 if (*(p + 1) == '/' && *(p + 2) == '/') 117 p += 3; /* skip "://" */ 118 else 119 p++; /* skip ":" */ 120 121 if ((size_t)(p - s) >= sizeof(u->proto)) 122 return -1; /* protocol too long */ 123 memcpy(u->proto, s, p - s); 124 u->proto[p - s] = '\0'; 125 126 if (*(p - 1) != '/') 127 goto parsepath; 128 } else { 129 p = s; /* no scheme format, reset to start */ 130 goto parsepath; 131 } 132 133parseauth: 134 /* userinfo (username:password) */ 135 i = strcspn(p, "@/?#"); 136 if (p[i] == '@') { 137 if (i >= sizeof(u->userinfo)) 138 return -1; /* userinfo too long */ 139 memcpy(u->userinfo, p, i); 140 u->userinfo[i] = '\0'; 141 p += i + 1; 142 } 143 144 /* IPv6 address */ 145 if (*p == '[') { 146 /* bracket not found, host too short or too long */ 147 i = strcspn(p, "]"); 148 if (p[i] != ']' || i < 3) 149 return -1; 150 i++; /* including "]" */ 151 } else { 152 /* domain / host part, skip until port, path or end. */ 153 i = strcspn(p, ":/?#"); 154 } 155 if (i >= sizeof(u->host)) 156 return -1; /* host too long */ 157 memcpy(u->host, p, i); 158 u->host[i] = '\0'; 159 p += i; 160 161 /* port */ 162 if (*p == ':') { 163 p++; 164 if ((i = strcspn(p, "/?#")) >= sizeof(u->port)) 165 return -1; /* port too long */ 166 memcpy(u->port, p, i); 167 u->port[i] = '\0'; 168 /* check for valid port: range 1 - 65535, may be empty */ 169 errno = 0; 170 l = strtol(u->port, &endptr, 10); 171 if (i && (errno || *endptr || l <= 0 || l > 65535)) 172 return -1; 173 p += i; 174 } 175 176parsepath: 177 /* path */ 178 if ((i = strcspn(p, "?#")) >= sizeof(u->path)) 179 return -1; /* path too long */ 180 memcpy(u->path, p, i); 181 u->path[i] = '\0'; 182 p += i; 183 184 /* query */ 185 if (*p == '?') { 186 p++; 187 if ((i = strcspn(p, "#")) >= sizeof(u->query)) 188 return -1; /* query too long */ 189 memcpy(u->query, p, i); 190 u->query[i] = '\0'; 191 p += i; 192 } 193 194 /* fragment */ 195 if (*p == '#') { 196 p++; 197 if ((i = strlen(p)) >= sizeof(u->fragment)) 198 return -1; /* fragment too long */ 199 memcpy(u->fragment, p, i); 200 u->fragment[i] = '\0'; 201 } 202 203 return 0; 204} 205 206/* Transform and try to make the URI `u` absolute using base URI `b` into `a`. 207 Follows some of the logic from "RFC 3986 - 5.2.2. Transform References". 208 Returns 0 on success, -1 on error or truncation. */ 209int 210uri_makeabs(struct uri *a, struct uri *u, struct uri *b) 211{ 212 char *p; 213 int c; 214 215 strlcpy(a->fragment, u->fragment, sizeof(a->fragment)); 216 217 if (u->proto[0] || u->host[0]) { 218 strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto)); 219 strlcpy(a->host, u->host, sizeof(a->host)); 220 strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo)); 221 strlcpy(a->host, u->host, sizeof(a->host)); 222 strlcpy(a->port, u->port, sizeof(a->port)); 223 strlcpy(a->path, u->path, sizeof(a->path)); 224 strlcpy(a->query, u->query, sizeof(a->query)); 225 return 0; 226 } 227 228 strlcpy(a->proto, b->proto, sizeof(a->proto)); 229 strlcpy(a->host, b->host, sizeof(a->host)); 230 strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo)); 231 strlcpy(a->host, b->host, sizeof(a->host)); 232 strlcpy(a->port, b->port, sizeof(a->port)); 233 234 if (!u->path[0]) { 235 strlcpy(a->path, b->path, sizeof(a->path)); 236 } else if (u->path[0] == '/') { 237 strlcpy(a->path, u->path, sizeof(a->path)); 238 } else { 239 a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0'; 240 a->path[1] = '\0'; 241 242 if ((p = strrchr(b->path, '/'))) { 243 c = *(++p); 244 *p = '\0'; /* temporary NUL-terminate */ 245 if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path)) 246 return -1; 247 *p = c; /* restore */ 248 } 249 if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path)) 250 return -1; 251 } 252 253 if (u->path[0] || u->query[0]) 254 strlcpy(a->query, u->query, sizeof(a->query)); 255 else 256 strlcpy(a->query, b->query, sizeof(a->query)); 257 258 return 0; 259} 260 261int 262uri_format(char *buf, size_t bufsiz, struct uri *u) 263{ 264 return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s", 265 u->proto, 266 u->userinfo[0] ? u->userinfo : "", 267 u->userinfo[0] ? "@" : "", 268 u->host, 269 u->port[0] ? ":" : "", 270 u->port, 271 u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "", 272 u->path, 273 u->query[0] ? "?" : "", 274 u->query, 275 u->fragment[0] ? "#" : "", 276 u->fragment); 277} 278 279/* Splits fields in the line buffer by replacing TAB separators with NUL ('\0') 280 * terminators and assign these fields as pointers. If there are less fields 281 * than expected then the field is an empty string constant. */ 282void 283parseline(char *line, char *fields[FieldLast]) 284{ 285 char *prev, *s; 286 size_t i; 287 288 for (prev = line, i = 0; 289 (s = strchr(prev, '\t')) && i < FieldLast - 1; 290 i++) { 291 *s = '\0'; 292 fields[i] = prev; 293 prev = s + 1; 294 } 295 fields[i++] = prev; 296 /* make non-parsed fields empty. */ 297 for (; i < FieldLast; i++) 298 fields[i] = ""; 299} 300 301/* Parse time to time_t, assumes time_t is signed, ignores fractions. */ 302int 303strtotime(const char *s, time_t *t) 304{ 305 long long l; 306 char *e; 307 308 errno = 0; 309 l = strtoll(s, &e, 10); 310 if (errno || *s == '\0' || *e) 311 return -1; 312 313 /* NOTE: the type long long supports the 64-bit range. If time_t is 314 64-bit it is "2038-ready", otherwise it is truncated/wrapped. */ 315 if (t) 316 *t = (time_t)l; 317 318 return 0; 319} 320 321time_t 322getcomparetime(void) 323{ 324 time_t now, t; 325 char *p; 326 327 if ((now = time(NULL)) == (time_t)-1) 328 return (time_t)-1; 329 330 if ((p = getenv("SFEED_NEW_AGE"))) { 331 if (strtotime(p, &t) == -1) 332 return (time_t)-1; 333 return now - t; 334 } 335 336 return now - 86400; /* 1 day is old news */ 337} 338 339/* Escape characters below as HTML 2.0 / XML 1.0. */ 340void 341xmlencode(const char *s, FILE *fp) 342{ 343 for (; *s; ++s) { 344 switch (*s) { 345 case '<': fputs("<", fp); break; 346 case '>': fputs(">", fp); break; 347 case '\'': fputs("'", fp); break; 348 case '&': fputs("&", fp); break; 349 case '"': fputs(""", fp); break; 350 default: putc(*s, fp); 351 } 352 } 353} 354 355/* print `len` columns of characters. If string is shorter pad the rest with 356 * characters `pad`. */ 357void 358printutf8pad(FILE *fp, const char *s, size_t len, int pad) 359{ 360 wchar_t wc; 361 size_t col = 0, i, slen; 362 int inc, rl, w; 363 364 if (!len) 365 return; 366 367 slen = strlen(s); 368 for (i = 0; i < slen; i += inc) { 369 inc = 1; /* next byte */ 370 if ((unsigned char)s[i] < 32) { 371 continue; /* skip control characters */ 372 } else if ((unsigned char)s[i] >= 127) { 373 rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4); 374 inc = rl; 375 if (rl < 0) { 376 mbtowc(NULL, NULL, 0); /* reset state */ 377 inc = 1; /* invalid, seek next byte */ 378 w = 1; /* replacement char is one width */ 379 } else if ((w = wcwidth(wc)) == -1) { 380 continue; 381 } 382 383 if (col + w > len || (col + w == len && s[i + inc])) { 384 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */ 385 col++; 386 break; 387 } else if (rl < 0) { 388 fputs(UTF_INVALID_SYMBOL, fp); /* replacement */ 389 col++; 390 continue; 391 } 392 fwrite(&s[i], 1, rl, fp); 393 col += w; 394 } else { 395 /* optimization: simple ASCII character */ 396 if (col + 1 > len || (col + 1 == len && s[i + 1])) { 397 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */ 398 col++; 399 break; 400 } 401 putc(s[i], fp); 402 col++; 403 } 404 405 } 406 for (; col < len; ++col) 407 putc(pad, fp); 408}