cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

unifdef.c (35637B)


      1/*
      2 * Copyright (c) 2002 - 2011 Tony Finch <dot@dotat.at>
      3 *
      4 * Redistribution and use in source and binary forms, with or without
      5 * modification, are permitted provided that the following conditions
      6 * are met:
      7 * 1. Redistributions of source code must retain the above copyright
      8 *    notice, this list of conditions and the following disclaimer.
      9 * 2. Redistributions in binary form must reproduce the above copyright
     10 *    notice, this list of conditions and the following disclaimer in the
     11 *    documentation and/or other materials provided with the distribution.
     12 *
     13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     16 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     23 * SUCH DAMAGE.
     24 */
     25
     26/*
     27 * unifdef - remove ifdef'ed lines
     28 *
     29 * This code was derived from software contributed to Berkeley by Dave Yost.
     30 * It was rewritten to support ANSI C by Tony Finch. The original version
     31 * of unifdef carried the 4-clause BSD copyright licence. None of its code
     32 * remains in this version (though some of the names remain) so it now
     33 * carries a more liberal licence.
     34 *
     35 *  Wishlist:
     36 *      provide an option which will append the name of the
     37 *        appropriate symbol after #else's and #endif's
     38 *      provide an option which will check symbols after
     39 *        #else's and #endif's to see that they match their
     40 *        corresponding #ifdef or #ifndef
     41 *
     42 *   These require better buffer handling, which would also make
     43 *   it possible to handle all "dodgy" directives correctly.
     44 */
     45
     46#include <sys/types.h>
     47#include <sys/stat.h>
     48
     49#include <ctype.h>
     50#include <err.h>
     51#include <errno.h>
     52#include <stdarg.h>
     53#include <stdbool.h>
     54#include <stdio.h>
     55#include <stdlib.h>
     56#include <string.h>
     57#include <unistd.h>
     58
     59const char copyright[] =
     60    "@(#) $Version: unifdef-2.5 $\n"
     61    "@(#) $Author: Tony Finch (dot@dotat.at) $\n"
     62    "@(#) $URL: http://dotat.at/prog/unifdef $\n"
     63;
     64
     65/* types of input lines: */
     66typedef enum {
     67	LT_TRUEI,		/* a true #if with ignore flag */
     68	LT_FALSEI,		/* a false #if with ignore flag */
     69	LT_IF,			/* an unknown #if */
     70	LT_TRUE,		/* a true #if */
     71	LT_FALSE,		/* a false #if */
     72	LT_ELIF,		/* an unknown #elif */
     73	LT_ELTRUE,		/* a true #elif */
     74	LT_ELFALSE,		/* a false #elif */
     75	LT_ELSE,		/* #else */
     76	LT_ENDIF,		/* #endif */
     77	LT_DODGY,		/* flag: directive is not on one line */
     78	LT_DODGY_LAST = LT_DODGY + LT_ENDIF,
     79	LT_PLAIN,		/* ordinary line */
     80	LT_EOF,			/* end of file */
     81	LT_ERROR,		/* unevaluable #if */
     82	LT_COUNT
     83} Linetype;
     84
     85static char const * const linetype_name[] = {
     86	"TRUEI", "FALSEI", "IF", "TRUE", "FALSE",
     87	"ELIF", "ELTRUE", "ELFALSE", "ELSE", "ENDIF",
     88	"DODGY TRUEI", "DODGY FALSEI",
     89	"DODGY IF", "DODGY TRUE", "DODGY FALSE",
     90	"DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",
     91	"DODGY ELSE", "DODGY ENDIF",
     92	"PLAIN", "EOF", "ERROR"
     93};
     94
     95/* state of #if processing */
     96typedef enum {
     97	IS_OUTSIDE,
     98	IS_FALSE_PREFIX,	/* false #if followed by false #elifs */
     99	IS_TRUE_PREFIX,		/* first non-false #(el)if is true */
    100	IS_PASS_MIDDLE,		/* first non-false #(el)if is unknown */
    101	IS_FALSE_MIDDLE,	/* a false #elif after a pass state */
    102	IS_TRUE_MIDDLE,		/* a true #elif after a pass state */
    103	IS_PASS_ELSE,		/* an else after a pass state */
    104	IS_FALSE_ELSE,		/* an else after a true state */
    105	IS_TRUE_ELSE,		/* an else after only false states */
    106	IS_FALSE_TRAILER,	/* #elifs after a true are false */
    107	IS_COUNT
    108} Ifstate;
    109
    110static char const * const ifstate_name[] = {
    111	"OUTSIDE", "FALSE_PREFIX", "TRUE_PREFIX",
    112	"PASS_MIDDLE", "FALSE_MIDDLE", "TRUE_MIDDLE",
    113	"PASS_ELSE", "FALSE_ELSE", "TRUE_ELSE",
    114	"FALSE_TRAILER"
    115};
    116
    117/* state of comment parser */
    118typedef enum {
    119	NO_COMMENT = false,	/* outside a comment */
    120	C_COMMENT,		/* in a comment like this one */
    121	CXX_COMMENT,		/* between // and end of line */
    122	STARTING_COMMENT,	/* just after slash-backslash-newline */
    123	FINISHING_COMMENT,	/* star-backslash-newline in a C comment */
    124	CHAR_LITERAL,		/* inside '' */
    125	STRING_LITERAL		/* inside "" */
    126} Comment_state;
    127
    128static char const * const comment_name[] = {
    129	"NO", "C", "CXX", "STARTING", "FINISHING", "CHAR", "STRING"
    130};
    131
    132/* state of preprocessor line parser */
    133typedef enum {
    134	LS_START,		/* only space and comments on this line */
    135	LS_HASH,		/* only space, comments, and a hash */
    136	LS_DIRTY		/* this line can't be a preprocessor line */
    137} Line_state;
    138
    139static char const * const linestate_name[] = {
    140	"START", "HASH", "DIRTY"
    141};
    142
    143/*
    144 * Minimum translation limits from ISO/IEC 9899:1999 5.2.4.1
    145 */
    146#define	MAXDEPTH        64			/* maximum #if nesting */
    147#define	MAXLINE         4096			/* maximum length of line */
    148#define	MAXSYMS         4096			/* maximum number of symbols */
    149
    150/*
    151 * Sometimes when editing a keyword the replacement text is longer, so
    152 * we leave some space at the end of the tline buffer to accommodate this.
    153 */
    154#define	EDITSLOP        10
    155
    156/*
    157 * For temporary filenames
    158 */
    159#define TEMPLATE        "unifdef.XXXXXX"
    160
    161/*
    162 * Globals.
    163 */
    164
    165static bool             compblank;		/* -B: compress blank lines */
    166static bool             lnblank;		/* -b: blank deleted lines */
    167static bool             complement;		/* -c: do the complement */
    168static bool             debugging;		/* -d: debugging reports */
    169static bool             iocccok;		/* -e: fewer IOCCC errors */
    170static bool             strictlogic;		/* -K: keep ambiguous #ifs */
    171static bool             killconsts;		/* -k: eval constant #ifs */
    172static bool             lnnum;			/* -n: add #line directives */
    173static bool             symlist;		/* -s: output symbol list */
    174static bool             symdepth;		/* -S: output symbol depth */
    175static bool             text;			/* -t: this is a text file */
    176
    177static const char      *symname[MAXSYMS];	/* symbol name */
    178static const char      *value[MAXSYMS];		/* -Dsym=value */
    179static bool             ignore[MAXSYMS];	/* -iDsym or -iUsym */
    180static int              nsyms;			/* number of symbols */
    181
    182static FILE            *input;			/* input file pointer */
    183static const char      *filename;		/* input file name */
    184static int              linenum;		/* current line number */
    185static FILE            *output;			/* output file pointer */
    186static const char      *ofilename;		/* output file name */
    187static bool             overwriting;		/* output overwrites input */
    188static char             tempname[FILENAME_MAX];	/* used when overwriting */
    189
    190static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */
    191static char            *keyword;		/* used for editing #elif's */
    192
    193static const char      *newline;		/* input file format */
    194static const char       newline_unix[] = "\n";
    195static const char       newline_crlf[] = "\r\n";
    196
    197static Comment_state    incomment;		/* comment parser state */
    198static Line_state       linestate;		/* #if line parser state */
    199static Ifstate          ifstate[MAXDEPTH];	/* #if processor state */
    200static bool             ignoring[MAXDEPTH];	/* ignore comments state */
    201static int              stifline[MAXDEPTH];	/* start of current #if */
    202static int              depth;			/* current #if nesting */
    203static int              delcount;		/* count of deleted lines */
    204static unsigned         blankcount;		/* count of blank lines */
    205static unsigned         blankmax;		/* maximum recent blankcount */
    206static bool             constexpr;		/* constant #if expression */
    207static bool             zerosyms = true;	/* to format symdepth output */
    208static bool             firstsym;		/* ditto */
    209
    210static int              exitstat;		/* program exit status */
    211
    212static void             addsym(bool, bool, char *);
    213static void             closeout(void);
    214static void             debug(const char *, ...);
    215static void             done(void);
    216static void             error(const char *);
    217static int              findsym(const char *);
    218static void             flushline(bool);
    219static Linetype         parseline(void);
    220static Linetype         ifeval(const char **);
    221static void             ignoreoff(void);
    222static void             ignoreon(void);
    223static void             keywordedit(const char *);
    224static void             nest(void);
    225static void             process(void);
    226static const char      *skipargs(const char *);
    227static const char      *skipcomment(const char *);
    228static const char      *skipsym(const char *);
    229static void             state(Ifstate);
    230static int              strlcmp(const char *, const char *, size_t);
    231static void             unnest(void);
    232static void             usage(void);
    233static void             version(void);
    234
    235#define endsym(c) (!isalnum((unsigned char)c) && c != '_')
    236
    237/*
    238 * The main program.
    239 */
    240int
    241main(int argc, char *argv[])
    242{
    243	int opt;
    244
    245	while ((opt = getopt(argc, argv, "i:D:U:I:o:bBcdeKklnsStV")) != -1)
    246		switch (opt) {
    247		case 'i': /* treat stuff controlled by these symbols as text */
    248			/*
    249			 * For strict backwards-compatibility the U or D
    250			 * should be immediately after the -i but it doesn't
    251			 * matter much if we relax that requirement.
    252			 */
    253			opt = *optarg++;
    254			if (opt == 'D')
    255				addsym(true, true, optarg);
    256			else if (opt == 'U')
    257				addsym(true, false, optarg);
    258			else
    259				usage();
    260			break;
    261		case 'D': /* define a symbol */
    262			addsym(false, true, optarg);
    263			break;
    264		case 'U': /* undef a symbol */
    265			addsym(false, false, optarg);
    266			break;
    267		case 'I': /* no-op for compatibility with cpp */
    268			break;
    269		case 'b': /* blank deleted lines instead of omitting them */
    270		case 'l': /* backwards compatibility */
    271			lnblank = true;
    272			break;
    273		case 'B': /* compress blank lines around removed section */
    274			compblank = true;
    275			break;
    276		case 'c': /* treat -D as -U and vice versa */
    277			complement = true;
    278			break;
    279		case 'd':
    280			debugging = true;
    281			break;
    282		case 'e': /* fewer errors from dodgy lines */
    283			iocccok = true;
    284			break;
    285		case 'K': /* keep ambiguous #ifs */
    286			strictlogic = true;
    287			break;
    288		case 'k': /* process constant #ifs */
    289			killconsts = true;
    290			break;
    291		case 'n': /* add #line directive after deleted lines */
    292			lnnum = true;
    293			break;
    294		case 'o': /* output to a file */
    295			ofilename = optarg;
    296			break;
    297		case 's': /* only output list of symbols that control #ifs */
    298			symlist = true;
    299			break;
    300		case 'S': /* list symbols with their nesting depth */
    301			symlist = symdepth = true;
    302			break;
    303		case 't': /* don't parse C comments */
    304			text = true;
    305			break;
    306		case 'V': /* print version */
    307			version();
    308		default:
    309			usage();
    310		}
    311	argc -= optind;
    312	argv += optind;
    313	if (compblank && lnblank)
    314		errx(2, "-B and -b are mutually exclusive");
    315	if (argc > 1) {
    316		errx(2, "can only do one file");
    317	} else if (argc == 1 && strcmp(*argv, "-") != 0) {
    318		filename = *argv;
    319		input = fopen(filename, "rb");
    320		if (input == NULL)
    321			err(2, "can't open %s", filename);
    322	} else {
    323		filename = "[stdin]";
    324		input = stdin;
    325	}
    326	if (ofilename == NULL) {
    327		ofilename = "[stdout]";
    328		output = stdout;
    329	} else {
    330		struct stat ist, ost;
    331		if (stat(ofilename, &ost) == 0 &&
    332		    fstat(fileno(input), &ist) == 0)
    333			overwriting = (ist.st_dev == ost.st_dev
    334				    && ist.st_ino == ost.st_ino);
    335		if (overwriting) {
    336			const char *dirsep;
    337			int ofd;
    338
    339			dirsep = strrchr(ofilename, '/');
    340			if (dirsep != NULL)
    341				snprintf(tempname, sizeof(tempname),
    342				    "%.*s/" TEMPLATE,
    343				    (int)(dirsep - ofilename), ofilename);
    344			else
    345				snprintf(tempname, sizeof(tempname),
    346				    TEMPLATE);
    347			ofd = mkstemp(tempname);
    348			if (ofd != -1)
    349				output = fdopen(ofd, "wb+");
    350			if (output == NULL)
    351				err(2, "can't create temporary file");
    352			fchmod(ofd, ist.st_mode & (S_IRWXU|S_IRWXG|S_IRWXO));
    353		} else {
    354			output = fopen(ofilename, "wb");
    355			if (output == NULL)
    356				err(2, "can't open %s", ofilename);
    357		}
    358	}
    359	process();
    360	abort(); /* bug */
    361}
    362
    363static void
    364version(void)
    365{
    366	const char *c = copyright;
    367	for (;;) {
    368		while (*++c != '$')
    369			if (*c == '\0')
    370				exit(0);
    371		while (*++c != '$')
    372			putc(*c, stderr);
    373		putc('\n', stderr);
    374	}
    375}
    376
    377static void
    378usage(void)
    379{
    380	fprintf(stderr, "usage: unifdef [-bBcdeKknsStV] [-Ipath]"
    381	    " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n");
    382	exit(2);
    383}
    384
    385/*
    386 * A state transition function alters the global #if processing state
    387 * in a particular way. The table below is indexed by the current
    388 * processing state and the type of the current line.
    389 *
    390 * Nesting is handled by keeping a stack of states; some transition
    391 * functions increase or decrease the depth. They also maintain the
    392 * ignore state on a stack. In some complicated cases they have to
    393 * alter the preprocessor directive, as follows.
    394 *
    395 * When we have processed a group that starts off with a known-false
    396 * #if/#elif sequence (which has therefore been deleted) followed by a
    397 * #elif that we don't understand and therefore must keep, we edit the
    398 * latter into a #if to keep the nesting correct. We use memcpy() to
    399 * overwrite the 4 byte token "elif" with "if  " without a '\0' byte.
    400 *
    401 * When we find a true #elif in a group, the following block will
    402 * always be kept and the rest of the sequence after the next #elif or
    403 * #else will be discarded. We edit the #elif into a #else and the
    404 * following directive to #endif since this has the desired behaviour.
    405 *
    406 * "Dodgy" directives are split across multiple lines, the most common
    407 * example being a multi-line comment hanging off the right of the
    408 * directive. We can handle them correctly only if there is no change
    409 * from printing to dropping (or vice versa) caused by that directive.
    410 * If the directive is the first of a group we have a choice between
    411 * failing with an error, or passing it through unchanged instead of
    412 * evaluating it. The latter is not the default to avoid questions from
    413 * users about unifdef unexpectedly leaving behind preprocessor directives.
    414 */
    415typedef void state_fn(void);
    416
    417/* report an error */
    418static void Eelif (void) { error("Inappropriate #elif"); }
    419static void Eelse (void) { error("Inappropriate #else"); }
    420static void Eendif(void) { error("Inappropriate #endif"); }
    421static void Eeof  (void) { error("Premature EOF"); }
    422static void Eioccc(void) { error("Obfuscated preprocessor control line"); }
    423/* plain line handling */
    424static void print (void) { flushline(true); }
    425static void drop  (void) { flushline(false); }
    426/* output lacks group's start line */
    427static void Strue (void) { drop();  ignoreoff(); state(IS_TRUE_PREFIX); }
    428static void Sfalse(void) { drop();  ignoreoff(); state(IS_FALSE_PREFIX); }
    429static void Selse (void) { drop();               state(IS_TRUE_ELSE); }
    430/* print/pass this block */
    431static void Pelif (void) { print(); ignoreoff(); state(IS_PASS_MIDDLE); }
    432static void Pelse (void) { print();              state(IS_PASS_ELSE); }
    433static void Pendif(void) { print(); unnest(); }
    434/* discard this block */
    435static void Dfalse(void) { drop();  ignoreoff(); state(IS_FALSE_TRAILER); }
    436static void Delif (void) { drop();  ignoreoff(); state(IS_FALSE_MIDDLE); }
    437static void Delse (void) { drop();               state(IS_FALSE_ELSE); }
    438static void Dendif(void) { drop();  unnest(); }
    439/* first line of group */
    440static void Fdrop (void) { nest();  Dfalse(); }
    441static void Fpass (void) { nest();  Pelif(); }
    442static void Ftrue (void) { nest();  Strue(); }
    443static void Ffalse(void) { nest();  Sfalse(); }
    444/* variable pedantry for obfuscated lines */
    445static void Oiffy (void) { if (!iocccok) Eioccc(); Fpass(); ignoreon(); }
    446static void Oif   (void) { if (!iocccok) Eioccc(); Fpass(); }
    447static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }
    448/* ignore comments in this block */
    449static void Idrop (void) { Fdrop();  ignoreon(); }
    450static void Itrue (void) { Ftrue();  ignoreon(); }
    451static void Ifalse(void) { Ffalse(); ignoreon(); }
    452/* modify this line */
    453static void Mpass (void) { memcpy(keyword, "if  ", 4); Pelif(); }
    454static void Mtrue (void) { keywordedit("else");  state(IS_TRUE_MIDDLE); }
    455static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); }
    456static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); }
    457
    458static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {
    459/* IS_OUTSIDE */
    460{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,
    461  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif,
    462  print, done,  abort },
    463/* IS_FALSE_PREFIX */
    464{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,
    465  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc,
    466  drop,  Eeof,  abort },
    467/* IS_TRUE_PREFIX */
    468{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,
    469  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
    470  print, Eeof,  abort },
    471/* IS_PASS_MIDDLE */
    472{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,
    473  Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif,
    474  print, Eeof,  abort },
    475/* IS_FALSE_MIDDLE */
    476{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,
    477  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc,
    478  drop,  Eeof,  abort },
    479/* IS_TRUE_MIDDLE */
    480{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,
    481  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif,
    482  print, Eeof,  abort },
    483/* IS_PASS_ELSE */
    484{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,
    485  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif,
    486  print, Eeof,  abort },
    487/* IS_FALSE_ELSE */
    488{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,
    489  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc,
    490  drop,  Eeof,  abort },
    491/* IS_TRUE_ELSE */
    492{ Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,
    493  Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc,
    494  print, Eeof,  abort },
    495/* IS_FALSE_TRAILER */
    496{ Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,
    497  Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc,
    498  drop,  Eeof,  abort }
    499/*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF
    500  TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY)
    501  PLAIN  EOF    ERROR */
    502};
    503
    504/*
    505 * State machine utility functions
    506 */
    507static void
    508ignoreoff(void)
    509{
    510	if (depth == 0)
    511		abort(); /* bug */
    512	ignoring[depth] = ignoring[depth-1];
    513}
    514static void
    515ignoreon(void)
    516{
    517	ignoring[depth] = true;
    518}
    519static void
    520keywordedit(const char *replacement)
    521{
    522	snprintf(keyword, tline + sizeof(tline) - keyword,
    523	    "%s%s", replacement, newline);
    524	print();
    525}
    526static void
    527nest(void)
    528{
    529	if (depth > MAXDEPTH-1)
    530		abort(); /* bug */
    531	if (depth == MAXDEPTH-1)
    532		error("Too many levels of nesting");
    533	depth += 1;
    534	stifline[depth] = linenum;
    535}
    536static void
    537unnest(void)
    538{
    539	if (depth == 0)
    540		abort(); /* bug */
    541	depth -= 1;
    542}
    543static void
    544state(Ifstate is)
    545{
    546	ifstate[depth] = is;
    547}
    548
    549/*
    550 * Write a line to the output or not, according to command line options.
    551 */
    552static void
    553flushline(bool keep)
    554{
    555	if (symlist)
    556		return;
    557	if (keep ^ complement) {
    558		bool blankline = tline[strspn(tline, " \t\r\n")] == '\0';
    559		if (blankline && compblank && blankcount != blankmax) {
    560			delcount += 1;
    561			blankcount += 1;
    562		} else {
    563			if (lnnum && delcount > 0)
    564				printf("#line %d%s", linenum, newline);
    565			fputs(tline, output);
    566			delcount = 0;
    567			blankmax = blankcount = blankline ? blankcount + 1 : 0;
    568		}
    569	} else {
    570		if (lnblank)
    571			fputs(newline, output);
    572		exitstat = 1;
    573		delcount += 1;
    574		blankcount = 0;
    575	}
    576	if (debugging)
    577		fflush(output);
    578}
    579
    580/*
    581 * The driver for the state machine.
    582 */
    583static void
    584process(void)
    585{
    586	/* When compressing blank lines, act as if the file
    587	   is preceded by a large number of blank lines. */
    588	blankmax = blankcount = 1000;
    589	for (;;) {
    590		Linetype lineval = parseline();
    591		trans_table[ifstate[depth]][lineval]();
    592		debug("process line %d %s -> %s depth %d",
    593		    linenum, linetype_name[lineval],
    594		    ifstate_name[ifstate[depth]], depth);
    595	}
    596}
    597
    598/*
    599 * Flush the output and handle errors.
    600 */
    601static void
    602closeout(void)
    603{
    604	if (symdepth && !zerosyms)
    605		printf("\n");
    606	if (fclose(output) == EOF) {
    607		warn("couldn't write to %s", ofilename);
    608		if (overwriting) {
    609			unlink(tempname);
    610			errx(2, "%s unchanged", filename);
    611		} else {
    612			exit(2);
    613		}
    614	}
    615}
    616
    617/*
    618 * Clean up and exit.
    619 */
    620static void
    621done(void)
    622{
    623	if (incomment)
    624		error("EOF in comment");
    625	closeout();
    626	if (overwriting && rename(tempname, ofilename) == -1) {
    627		warn("couldn't rename temporary file");
    628		unlink(tempname);
    629		errx(2, "%s unchanged", ofilename);
    630	}
    631	exit(exitstat);
    632}
    633
    634/*
    635 * Parse a line and determine its type. We keep the preprocessor line
    636 * parser state between calls in the global variable linestate, with
    637 * help from skipcomment().
    638 */
    639static Linetype
    640parseline(void)
    641{
    642	const char *cp;
    643	int cursym;
    644	int kwlen;
    645	Linetype retval;
    646	Comment_state wascomment;
    647
    648	linenum++;
    649	if (fgets(tline, MAXLINE, input) == NULL)
    650		return (LT_EOF);
    651	if (newline == NULL) {
    652		if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1)
    653			newline = newline_crlf;
    654		else
    655			newline = newline_unix;
    656	}
    657	retval = LT_PLAIN;
    658	wascomment = incomment;
    659	cp = skipcomment(tline);
    660	if (linestate == LS_START) {
    661		if (*cp == '#') {
    662			linestate = LS_HASH;
    663			firstsym = true;
    664			cp = skipcomment(cp + 1);
    665		} else if (*cp != '\0')
    666			linestate = LS_DIRTY;
    667	}
    668	if (!incomment && linestate == LS_HASH) {
    669		keyword = tline + (cp - tline);
    670		cp = skipsym(cp);
    671		kwlen = cp - keyword;
    672		/* no way can we deal with a continuation inside a keyword */
    673		if (strncmp(cp, "\\\r\n", 3) == 0 ||
    674		    strncmp(cp, "\\\n", 2) == 0)
    675			Eioccc();
    676		if (strlcmp("ifdef", keyword, kwlen) == 0 ||
    677		    strlcmp("ifndef", keyword, kwlen) == 0) {
    678			cp = skipcomment(cp);
    679			if ((cursym = findsym(cp)) < 0)
    680				retval = LT_IF;
    681			else {
    682				retval = (keyword[2] == 'n')
    683				    ? LT_FALSE : LT_TRUE;
    684				if (value[cursym] == NULL)
    685					retval = (retval == LT_TRUE)
    686					    ? LT_FALSE : LT_TRUE;
    687				if (ignore[cursym])
    688					retval = (retval == LT_TRUE)
    689					    ? LT_TRUEI : LT_FALSEI;
    690			}
    691			cp = skipsym(cp);
    692		} else if (strlcmp("if", keyword, kwlen) == 0)
    693			retval = ifeval(&cp);
    694		else if (strlcmp("elif", keyword, kwlen) == 0)
    695			retval = ifeval(&cp) - LT_IF + LT_ELIF;
    696		else if (strlcmp("else", keyword, kwlen) == 0)
    697			retval = LT_ELSE;
    698		else if (strlcmp("endif", keyword, kwlen) == 0)
    699			retval = LT_ENDIF;
    700		else {
    701			linestate = LS_DIRTY;
    702			retval = LT_PLAIN;
    703		}
    704		cp = skipcomment(cp);
    705		if (*cp != '\0') {
    706			linestate = LS_DIRTY;
    707			if (retval == LT_TRUE || retval == LT_FALSE ||
    708			    retval == LT_TRUEI || retval == LT_FALSEI)
    709				retval = LT_IF;
    710			if (retval == LT_ELTRUE || retval == LT_ELFALSE)
    711				retval = LT_ELIF;
    712		}
    713		if (retval != LT_PLAIN && (wascomment || incomment)) {
    714			retval += LT_DODGY;
    715			if (incomment)
    716				linestate = LS_DIRTY;
    717		}
    718		/* skipcomment normally changes the state, except
    719		   if the last line of the file lacks a newline, or
    720		   if there is too much whitespace in a directive */
    721		if (linestate == LS_HASH) {
    722			size_t len = cp - tline;
    723			if (fgets(tline + len, MAXLINE - len, input) == NULL) {
    724				/* append the missing newline */
    725				strcpy(tline + len, newline);
    726				cp += strlen(newline);
    727				linestate = LS_START;
    728			} else {
    729				linestate = LS_DIRTY;
    730			}
    731		}
    732	}
    733	if (linestate == LS_DIRTY) {
    734		while (*cp != '\0')
    735			cp = skipcomment(cp + 1);
    736	}
    737	debug("parser line %d state %s comment %s line", linenum,
    738	    comment_name[incomment], linestate_name[linestate]);
    739	return (retval);
    740}
    741
    742/*
    743 * These are the binary operators that are supported by the expression
    744 * evaluator.
    745 */
    746static Linetype op_strict(int *p, int v, Linetype at, Linetype bt) {
    747	if(at == LT_IF || bt == LT_IF) return (LT_IF);
    748	return (*p = v, v ? LT_TRUE : LT_FALSE);
    749}
    750static Linetype op_lt(int *p, Linetype at, int a, Linetype bt, int b) {
    751	return op_strict(p, a < b, at, bt);
    752}
    753static Linetype op_gt(int *p, Linetype at, int a, Linetype bt, int b) {
    754	return op_strict(p, a > b, at, bt);
    755}
    756static Linetype op_le(int *p, Linetype at, int a, Linetype bt, int b) {
    757	return op_strict(p, a <= b, at, bt);
    758}
    759static Linetype op_ge(int *p, Linetype at, int a, Linetype bt, int b) {
    760	return op_strict(p, a >= b, at, bt);
    761}
    762static Linetype op_eq(int *p, Linetype at, int a, Linetype bt, int b) {
    763	return op_strict(p, a == b, at, bt);
    764}
    765static Linetype op_ne(int *p, Linetype at, int a, Linetype bt, int b) {
    766	return op_strict(p, a != b, at, bt);
    767}
    768static Linetype op_or(int *p, Linetype at, int a, Linetype bt, int b) {
    769	if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE))
    770		return (*p = 1, LT_TRUE);
    771	return op_strict(p, a || b, at, bt);
    772}
    773static Linetype op_and(int *p, Linetype at, int a, Linetype bt, int b) {
    774	if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE))
    775		return (*p = 0, LT_FALSE);
    776	return op_strict(p, a && b, at, bt);
    777}
    778
    779/*
    780 * An evaluation function takes three arguments, as follows: (1) a pointer to
    781 * an element of the precedence table which lists the operators at the current
    782 * level of precedence; (2) a pointer to an integer which will receive the
    783 * value of the expression; and (3) a pointer to a char* that points to the
    784 * expression to be evaluated and that is updated to the end of the expression
    785 * when evaluation is complete. The function returns LT_FALSE if the value of
    786 * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression
    787 * depends on an unknown symbol, or LT_ERROR if there is a parse failure.
    788 */
    789struct ops;
    790
    791typedef Linetype eval_fn(const struct ops *, int *, const char **);
    792
    793static eval_fn eval_table, eval_unary;
    794
    795/*
    796 * The precedence table. Expressions involving binary operators are evaluated
    797 * in a table-driven way by eval_table. When it evaluates a subexpression it
    798 * calls the inner function with its first argument pointing to the next
    799 * element of the table. Innermost expressions have special non-table-driven
    800 * handling.
    801 */
    802static const struct ops {
    803	eval_fn *inner;
    804	struct op {
    805		const char *str;
    806		Linetype (*fn)(int *, Linetype, int, Linetype, int);
    807	} op[5];
    808} eval_ops[] = {
    809	{ eval_table, { { "||", op_or } } },
    810	{ eval_table, { { "&&", op_and } } },
    811	{ eval_table, { { "==", op_eq },
    812			{ "!=", op_ne } } },
    813	{ eval_unary, { { "<=", op_le },
    814			{ ">=", op_ge },
    815			{ "<", op_lt },
    816			{ ">", op_gt } } }
    817};
    818
    819/*
    820 * Function for evaluating the innermost parts of expressions,
    821 * viz. !expr (expr) number defined(symbol) symbol
    822 * We reset the constexpr flag in the last two cases.
    823 */
    824static Linetype
    825eval_unary(const struct ops *ops, int *valp, const char **cpp)
    826{
    827	const char *cp;
    828	char *ep;
    829	int sym;
    830	bool defparen;
    831	Linetype lt;
    832
    833	cp = skipcomment(*cpp);
    834	if (*cp == '!') {
    835		debug("eval%d !", ops - eval_ops);
    836		cp++;
    837		lt = eval_unary(ops, valp, &cp);
    838		if (lt == LT_ERROR)
    839			return (LT_ERROR);
    840		if (lt != LT_IF) {
    841			*valp = !*valp;
    842			lt = *valp ? LT_TRUE : LT_FALSE;
    843		}
    844	} else if (*cp == '(') {
    845		cp++;
    846		debug("eval%d (", ops - eval_ops);
    847		lt = eval_table(eval_ops, valp, &cp);
    848		if (lt == LT_ERROR)
    849			return (LT_ERROR);
    850		cp = skipcomment(cp);
    851		if (*cp++ != ')')
    852			return (LT_ERROR);
    853	} else if (isdigit((unsigned char)*cp)) {
    854		debug("eval%d number", ops - eval_ops);
    855		*valp = strtol(cp, &ep, 0);
    856		if (ep == cp)
    857			return (LT_ERROR);
    858		lt = *valp ? LT_TRUE : LT_FALSE;
    859		cp = skipsym(cp);
    860	} else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) {
    861		cp = skipcomment(cp+7);
    862		debug("eval%d defined", ops - eval_ops);
    863		if (*cp == '(') {
    864			cp = skipcomment(cp+1);
    865			defparen = true;
    866		} else {
    867			defparen = false;
    868		}
    869		sym = findsym(cp);
    870		if (sym < 0) {
    871			lt = LT_IF;
    872		} else {
    873			*valp = (value[sym] != NULL);
    874			lt = *valp ? LT_TRUE : LT_FALSE;
    875		}
    876		cp = skipsym(cp);
    877		cp = skipcomment(cp);
    878		if (defparen && *cp++ != ')')
    879			return (LT_ERROR);
    880		constexpr = false;
    881	} else if (!endsym(*cp)) {
    882		debug("eval%d symbol", ops - eval_ops);
    883		sym = findsym(cp);
    884		cp = skipsym(cp);
    885		if (sym < 0) {
    886			lt = LT_IF;
    887			cp = skipargs(cp);
    888		} else if (value[sym] == NULL) {
    889			*valp = 0;
    890			lt = LT_FALSE;
    891		} else {
    892			*valp = strtol(value[sym], &ep, 0);
    893			if (*ep != '\0' || ep == value[sym])
    894				return (LT_ERROR);
    895			lt = *valp ? LT_TRUE : LT_FALSE;
    896			cp = skipargs(cp);
    897		}
    898		constexpr = false;
    899	} else {
    900		debug("eval%d bad expr", ops - eval_ops);
    901		return (LT_ERROR);
    902	}
    903
    904	*cpp = cp;
    905	debug("eval%d = %d", ops - eval_ops, *valp);
    906	return (lt);
    907}
    908
    909/*
    910 * Table-driven evaluation of binary operators.
    911 */
    912static Linetype
    913eval_table(const struct ops *ops, int *valp, const char **cpp)
    914{
    915	const struct op *op;
    916	const char *cp;
    917	int val;
    918	Linetype lt, rt;
    919
    920	debug("eval%d", ops - eval_ops);
    921	cp = *cpp;
    922	lt = ops->inner(ops+1, valp, &cp);
    923	if (lt == LT_ERROR)
    924		return (LT_ERROR);
    925	for (;;) {
    926		cp = skipcomment(cp);
    927		for (op = ops->op; op->str != NULL; op++)
    928			if (strncmp(cp, op->str, strlen(op->str)) == 0)
    929				break;
    930		if (op->str == NULL)
    931			break;
    932		cp += strlen(op->str);
    933		debug("eval%d %s", ops - eval_ops, op->str);
    934		rt = ops->inner(ops+1, &val, &cp);
    935		if (rt == LT_ERROR)
    936			return (LT_ERROR);
    937		lt = op->fn(valp, lt, *valp, rt, val);
    938	}
    939
    940	*cpp = cp;
    941	debug("eval%d = %d", ops - eval_ops, *valp);
    942	debug("eval%d lt = %s", ops - eval_ops, linetype_name[lt]);
    943	return (lt);
    944}
    945
    946/*
    947 * Evaluate the expression on a #if or #elif line. If we can work out
    948 * the result we return LT_TRUE or LT_FALSE accordingly, otherwise we
    949 * return just a generic LT_IF.
    950 */
    951static Linetype
    952ifeval(const char **cpp)
    953{
    954	int ret;
    955	int val = 0;
    956
    957	debug("eval %s", *cpp);
    958	constexpr = killconsts ? false : true;
    959	ret = eval_table(eval_ops, &val, cpp);
    960	debug("eval = %d", val);
    961	return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret);
    962}
    963
    964/*
    965 * Skip over comments, strings, and character literals and stop at the
    966 * next character position that is not whitespace. Between calls we keep
    967 * the comment state in the global variable incomment, and we also adjust
    968 * the global variable linestate when we see a newline.
    969 * XXX: doesn't cope with the buffer splitting inside a state transition.
    970 */
    971static const char *
    972skipcomment(const char *cp)
    973{
    974	if (text || ignoring[depth]) {
    975		for (; isspace((unsigned char)*cp); cp++)
    976			if (*cp == '\n')
    977				linestate = LS_START;
    978		return (cp);
    979	}
    980	while (*cp != '\0')
    981		/* don't reset to LS_START after a line continuation */
    982		if (strncmp(cp, "\\\r\n", 3) == 0)
    983			cp += 3;
    984		else if (strncmp(cp, "\\\n", 2) == 0)
    985			cp += 2;
    986		else switch (incomment) {
    987		case NO_COMMENT:
    988			if (strncmp(cp, "/\\\r\n", 4) == 0) {
    989				incomment = STARTING_COMMENT;
    990				cp += 4;
    991			} else if (strncmp(cp, "/\\\n", 3) == 0) {
    992				incomment = STARTING_COMMENT;
    993				cp += 3;
    994			} else if (strncmp(cp, "/*", 2) == 0) {
    995				incomment = C_COMMENT;
    996				cp += 2;
    997			} else if (strncmp(cp, "//", 2) == 0) {
    998				incomment = CXX_COMMENT;
    999				cp += 2;
   1000			} else if (strncmp(cp, "\'", 1) == 0) {
   1001				incomment = CHAR_LITERAL;
   1002				linestate = LS_DIRTY;
   1003				cp += 1;
   1004			} else if (strncmp(cp, "\"", 1) == 0) {
   1005				incomment = STRING_LITERAL;
   1006				linestate = LS_DIRTY;
   1007				cp += 1;
   1008			} else if (strncmp(cp, "\n", 1) == 0) {
   1009				linestate = LS_START;
   1010				cp += 1;
   1011			} else if (strchr(" \r\t", *cp) != NULL) {
   1012				cp += 1;
   1013			} else
   1014				return (cp);
   1015			continue;
   1016		case CXX_COMMENT:
   1017			if (strncmp(cp, "\n", 1) == 0) {
   1018				incomment = NO_COMMENT;
   1019				linestate = LS_START;
   1020			}
   1021			cp += 1;
   1022			continue;
   1023		case CHAR_LITERAL:
   1024		case STRING_LITERAL:
   1025			if ((incomment == CHAR_LITERAL && cp[0] == '\'') ||
   1026			    (incomment == STRING_LITERAL && cp[0] == '\"')) {
   1027				incomment = NO_COMMENT;
   1028				cp += 1;
   1029			} else if (cp[0] == '\\') {
   1030				if (cp[1] == '\0')
   1031					cp += 1;
   1032				else
   1033					cp += 2;
   1034			} else if (strncmp(cp, "\n", 1) == 0) {
   1035				if (incomment == CHAR_LITERAL)
   1036					error("unterminated char literal");
   1037				else
   1038					error("unterminated string literal");
   1039			} else
   1040				cp += 1;
   1041			continue;
   1042		case C_COMMENT:
   1043			if (strncmp(cp, "*\\\r\n", 4) == 0) {
   1044				incomment = FINISHING_COMMENT;
   1045				cp += 4;
   1046			} else if (strncmp(cp, "*\\\n", 3) == 0) {
   1047				incomment = FINISHING_COMMENT;
   1048				cp += 3;
   1049			} else if (strncmp(cp, "*/", 2) == 0) {
   1050				incomment = NO_COMMENT;
   1051				cp += 2;
   1052			} else
   1053				cp += 1;
   1054			continue;
   1055		case STARTING_COMMENT:
   1056			if (*cp == '*') {
   1057				incomment = C_COMMENT;
   1058				cp += 1;
   1059			} else if (*cp == '/') {
   1060				incomment = CXX_COMMENT;
   1061				cp += 1;
   1062			} else {
   1063				incomment = NO_COMMENT;
   1064				linestate = LS_DIRTY;
   1065			}
   1066			continue;
   1067		case FINISHING_COMMENT:
   1068			if (*cp == '/') {
   1069				incomment = NO_COMMENT;
   1070				cp += 1;
   1071			} else
   1072				incomment = C_COMMENT;
   1073			continue;
   1074		default:
   1075			abort(); /* bug */
   1076		}
   1077	return (cp);
   1078}
   1079
   1080/*
   1081 * Skip macro arguments.
   1082 */
   1083static const char *
   1084skipargs(const char *cp)
   1085{
   1086	const char *ocp = cp;
   1087	int level = 0;
   1088	cp = skipcomment(cp);
   1089	if (*cp != '(')
   1090		return (cp);
   1091	do {
   1092		if (*cp == '(')
   1093			level++;
   1094		if (*cp == ')')
   1095			level--;
   1096		cp = skipcomment(cp+1);
   1097	} while (level != 0 && *cp != '\0');
   1098	if (level == 0)
   1099		return (cp);
   1100	else
   1101	/* Rewind and re-detect the syntax error later. */
   1102		return (ocp);
   1103}
   1104
   1105/*
   1106 * Skip over an identifier.
   1107 */
   1108static const char *
   1109skipsym(const char *cp)
   1110{
   1111	while (!endsym(*cp))
   1112		++cp;
   1113	return (cp);
   1114}
   1115
   1116/*
   1117 * Look for the symbol in the symbol table. If it is found, we return
   1118 * the symbol table index, else we return -1.
   1119 */
   1120static int
   1121findsym(const char *str)
   1122{
   1123	const char *cp;
   1124	int symind;
   1125
   1126	cp = skipsym(str);
   1127	if (cp == str)
   1128		return (-1);
   1129	if (symlist) {
   1130		if (symdepth && firstsym)
   1131			printf("%s%3d", zerosyms ? "" : "\n", depth);
   1132		firstsym = zerosyms = false;
   1133		printf("%s%.*s%s",
   1134		    symdepth ? " " : "",
   1135		    (int)(cp-str), str,
   1136		    symdepth ? "" : "\n");
   1137		/* we don't care about the value of the symbol */
   1138		return (0);
   1139	}
   1140	for (symind = 0; symind < nsyms; ++symind) {
   1141		if (strlcmp(symname[symind], str, cp-str) == 0) {
   1142			debug("findsym %s %s", symname[symind],
   1143			    value[symind] ? value[symind] : "");
   1144			return (symind);
   1145		}
   1146	}
   1147	return (-1);
   1148}
   1149
   1150/*
   1151 * Add a symbol to the symbol table.
   1152 */
   1153static void
   1154addsym(bool ignorethis, bool definethis, char *sym)
   1155{
   1156	int symind;
   1157	char *val;
   1158
   1159	symind = findsym(sym);
   1160	if (symind < 0) {
   1161		if (nsyms >= MAXSYMS)
   1162			errx(2, "too many symbols");
   1163		symind = nsyms++;
   1164	}
   1165	symname[symind] = sym;
   1166	ignore[symind] = ignorethis;
   1167	val = sym + (skipsym(sym) - sym);
   1168	if (definethis) {
   1169		if (*val == '=') {
   1170			value[symind] = val+1;
   1171			*val = '\0';
   1172		} else if (*val == '\0')
   1173			value[symind] = "1";
   1174		else
   1175			usage();
   1176	} else {
   1177		if (*val != '\0')
   1178			usage();
   1179		value[symind] = NULL;
   1180	}
   1181	debug("addsym %s=%s", symname[symind],
   1182	    value[symind] ? value[symind] : "undef");
   1183}
   1184
   1185/*
   1186 * Compare s with n characters of t.
   1187 * The same as strncmp() except that it checks that s[n] == '\0'.
   1188 */
   1189static int
   1190strlcmp(const char *s, const char *t, size_t n)
   1191{
   1192	while (n-- && *t != '\0')
   1193		if (*s != *t)
   1194			return ((unsigned char)*s - (unsigned char)*t);
   1195		else
   1196			++s, ++t;
   1197	return ((unsigned char)*s);
   1198}
   1199
   1200/*
   1201 * Diagnostics.
   1202 */
   1203static void
   1204debug(const char *msg, ...)
   1205{
   1206	va_list ap;
   1207
   1208	if (debugging) {
   1209		va_start(ap, msg);
   1210		vwarnx(msg, ap);
   1211		va_end(ap);
   1212	}
   1213}
   1214
   1215static void
   1216error(const char *msg)
   1217{
   1218	if (depth == 0)
   1219		warnx("%s: %d: %s", filename, linenum, msg);
   1220	else
   1221		warnx("%s: %d: %s (#if line %d depth %d)",
   1222		    filename, linenum, msg, stifline[depth], depth);
   1223	closeout();
   1224	errx(2, "output may be truncated");
   1225}