cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

lex.l (8463B)


      1/* SPDX-License-Identifier: GPL-2.0-or-later */
      2/*
      3 * Lexical analysis for genksyms.
      4 * Copyright 1996, 1997 Linux International.
      5 *
      6 * New implementation contributed by Richard Henderson <rth@tamu.edu>
      7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
      8 *
      9 * Taken from Linux modutils 2.4.22.
     10 */
     11
     12%{
     13
     14#include <limits.h>
     15#include <stdlib.h>
     16#include <string.h>
     17#include <ctype.h>
     18
     19#include "genksyms.h"
     20#include "parse.tab.h"
     21
     22/* We've got a two-level lexer here.  We let flex do basic tokenization
     23   and then we categorize those basic tokens in the second stage.  */
     24#define YY_DECL		static int yylex1(void)
     25
     26%}
     27
     28IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
     29
     30O_INT			0[0-7]*
     31D_INT			[1-9][0-9]*
     32X_INT			0[Xx][0-9A-Fa-f]+
     33I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
     34INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
     35
     36FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
     37EXP			[Ee][+-]?[0-9]+
     38F_SUF			[FfLl]
     39REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
     40
     41STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
     42CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
     43
     44MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
     45
     46/* We don't do multiple input files.  */
     47%option noyywrap
     48
     49%option noinput
     50
     51%%
     52
     53
     54 /* Keep track of our location in the original source files.  */
     55^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
     56^#.*\n					cur_line++;
     57\n					cur_line++;
     58
     59 /* Ignore all other whitespace.  */
     60[ \t\f\v\r]+				;
     61
     62
     63{STRING}				return STRING;
     64{CHAR}					return CHAR;
     65{IDENT}					return IDENT;
     66
     67 /* The Pedant requires that the other C multi-character tokens be
     68    recognized as tokens.  We don't actually use them since we don't
     69    parse expressions, but we do want whitespace to be arranged
     70    around them properly.  */
     71{MC_TOKEN}				return OTHER;
     72{INT}					return INT;
     73{REAL}					return REAL;
     74
     75"..."					return DOTS;
     76
     77 /* All other tokens are single characters.  */
     78.					return yytext[0];
     79
     80
     81%%
     82
     83/* Bring in the keyword recognizer.  */
     84
     85#include "keywords.c"
     86
     87
     88/* Macros to append to our phrase collection list.  */
     89
     90/*
     91 * We mark any token, that that equals to a known enumerator, as
     92 * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
     93 * the only problem is struct and union members:
     94 *    enum e { a, b }; struct s { int a, b; }
     95 * but in this case, the only effect will be, that the ABI checksums become
     96 * more volatile, which is acceptable. Also, such collisions are quite rare,
     97 * so far it was only observed in include/linux/telephony.h.
     98 */
     99#define _APP(T,L)	do {						   \
    100			  cur_node = next_node;				   \
    101			  next_node = xmalloc(sizeof(*next_node));	   \
    102			  next_node->next = cur_node;			   \
    103			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
    104			  cur_node->tag =				   \
    105			    find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
    106			    SYM_ENUM_CONST : SYM_NORMAL ;		   \
    107			  cur_node->in_source_file = in_source_file;       \
    108			} while (0)
    109
    110#define APP		_APP(yytext, yyleng)
    111
    112
    113/* The second stage lexer.  Here we incorporate knowledge of the state
    114   of the parser to tailor the tokens that are returned.  */
    115
    116int
    117yylex(void)
    118{
    119  static enum {
    120    ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
    121    ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
    122  } lexstate = ST_NOTSTARTED;
    123
    124  static int suppress_type_lookup, dont_want_brace_phrase;
    125  static struct string_list *next_node;
    126  static char *source_file;
    127
    128  int token, count = 0;
    129  struct string_list *cur_node;
    130
    131  if (lexstate == ST_NOTSTARTED)
    132    {
    133      next_node = xmalloc(sizeof(*next_node));
    134      next_node->next = NULL;
    135      lexstate = ST_NORMAL;
    136    }
    137
    138repeat:
    139  token = yylex1();
    140
    141  if (token == 0)
    142    return 0;
    143  else if (token == FILENAME)
    144    {
    145      char *file, *e;
    146
    147      /* Save the filename and line number for later error messages.  */
    148
    149      if (cur_filename)
    150	free(cur_filename);
    151
    152      file = strchr(yytext, '\"')+1;
    153      e = strchr(file, '\"');
    154      *e = '\0';
    155      cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
    156      cur_line = atoi(yytext+2);
    157
    158      if (!source_file) {
    159        source_file = xstrdup(cur_filename);
    160        in_source_file = 1;
    161      } else {
    162        in_source_file = (strcmp(cur_filename, source_file) == 0);
    163      }
    164
    165      goto repeat;
    166    }
    167
    168  switch (lexstate)
    169    {
    170    case ST_NORMAL:
    171      switch (token)
    172	{
    173	case IDENT:
    174	  APP;
    175	  {
    176	    int r = is_reserved_word(yytext, yyleng);
    177	    if (r >= 0)
    178	      {
    179		switch (token = r)
    180		  {
    181		  case ATTRIBUTE_KEYW:
    182		    lexstate = ST_ATTRIBUTE;
    183		    count = 0;
    184		    goto repeat;
    185		  case ASM_KEYW:
    186		    lexstate = ST_ASM;
    187		    count = 0;
    188		    goto repeat;
    189		  case TYPEOF_KEYW:
    190		    lexstate = ST_TYPEOF;
    191		    count = 0;
    192		    goto repeat;
    193
    194		  case STRUCT_KEYW:
    195		  case UNION_KEYW:
    196		  case ENUM_KEYW:
    197		    dont_want_brace_phrase = 3;
    198		    suppress_type_lookup = 2;
    199		    goto fini;
    200
    201		  case EXPORT_SYMBOL_KEYW:
    202		      goto fini;
    203
    204		  case STATIC_ASSERT_KEYW:
    205		    lexstate = ST_STATIC_ASSERT;
    206		    count = 0;
    207		    goto repeat;
    208		  }
    209	      }
    210	    if (!suppress_type_lookup)
    211	      {
    212		if (find_symbol(yytext, SYM_TYPEDEF, 1))
    213		  token = TYPE;
    214	      }
    215	  }
    216	  break;
    217
    218	case '[':
    219	  APP;
    220	  lexstate = ST_BRACKET;
    221	  count = 1;
    222	  goto repeat;
    223
    224	case '{':
    225	  APP;
    226	  if (dont_want_brace_phrase)
    227	    break;
    228	  lexstate = ST_BRACE;
    229	  count = 1;
    230	  goto repeat;
    231
    232	case '=': case ':':
    233	  APP;
    234	  lexstate = ST_EXPRESSION;
    235	  break;
    236
    237	default:
    238	  APP;
    239	  break;
    240	}
    241      break;
    242
    243    case ST_ATTRIBUTE:
    244      APP;
    245      switch (token)
    246	{
    247	case '(':
    248	  ++count;
    249	  goto repeat;
    250	case ')':
    251	  if (--count == 0)
    252	    {
    253	      lexstate = ST_NORMAL;
    254	      token = ATTRIBUTE_PHRASE;
    255	      break;
    256	    }
    257	  goto repeat;
    258	default:
    259	  goto repeat;
    260	}
    261      break;
    262
    263    case ST_ASM:
    264      APP;
    265      switch (token)
    266	{
    267	case '(':
    268	  ++count;
    269	  goto repeat;
    270	case ')':
    271	  if (--count == 0)
    272	    {
    273	      lexstate = ST_NORMAL;
    274	      token = ASM_PHRASE;
    275	      break;
    276	    }
    277	  goto repeat;
    278	default:
    279	  goto repeat;
    280	}
    281      break;
    282
    283    case ST_TYPEOF_1:
    284      if (token == IDENT)
    285	{
    286	  if (is_reserved_word(yytext, yyleng) >= 0
    287	      || find_symbol(yytext, SYM_TYPEDEF, 1))
    288	    {
    289	      yyless(0);
    290	      unput('(');
    291	      lexstate = ST_NORMAL;
    292	      token = TYPEOF_KEYW;
    293	      break;
    294	    }
    295	  _APP("(", 1);
    296	}
    297	lexstate = ST_TYPEOF;
    298	/* FALLTHRU */
    299
    300    case ST_TYPEOF:
    301      switch (token)
    302	{
    303	case '(':
    304	  if ( ++count == 1 )
    305	    lexstate = ST_TYPEOF_1;
    306	  else
    307	    APP;
    308	  goto repeat;
    309	case ')':
    310	  APP;
    311	  if (--count == 0)
    312	    {
    313	      lexstate = ST_NORMAL;
    314	      token = TYPEOF_PHRASE;
    315	      break;
    316	    }
    317	  goto repeat;
    318	default:
    319	  APP;
    320	  goto repeat;
    321	}
    322      break;
    323
    324    case ST_BRACKET:
    325      APP;
    326      switch (token)
    327	{
    328	case '[':
    329	  ++count;
    330	  goto repeat;
    331	case ']':
    332	  if (--count == 0)
    333	    {
    334	      lexstate = ST_NORMAL;
    335	      token = BRACKET_PHRASE;
    336	      break;
    337	    }
    338	  goto repeat;
    339	default:
    340	  goto repeat;
    341	}
    342      break;
    343
    344    case ST_BRACE:
    345      APP;
    346      switch (token)
    347	{
    348	case '{':
    349	  ++count;
    350	  goto repeat;
    351	case '}':
    352	  if (--count == 0)
    353	    {
    354	      lexstate = ST_NORMAL;
    355	      token = BRACE_PHRASE;
    356	      break;
    357	    }
    358	  goto repeat;
    359	default:
    360	  goto repeat;
    361	}
    362      break;
    363
    364    case ST_EXPRESSION:
    365      switch (token)
    366	{
    367	case '(': case '[': case '{':
    368	  ++count;
    369	  APP;
    370	  goto repeat;
    371	case '}':
    372	  /* is this the last line of an enum declaration? */
    373	  if (count == 0)
    374	    {
    375	      /* Put back the token we just read so's we can find it again
    376		 after registering the expression.  */
    377	      unput(token);
    378
    379	      lexstate = ST_NORMAL;
    380	      token = EXPRESSION_PHRASE;
    381	      break;
    382	    }
    383	  /* FALLTHRU */
    384	case ')': case ']':
    385	  --count;
    386	  APP;
    387	  goto repeat;
    388	case ',': case ';':
    389	  if (count == 0)
    390	    {
    391	      /* Put back the token we just read so's we can find it again
    392		 after registering the expression.  */
    393	      unput(token);
    394
    395	      lexstate = ST_NORMAL;
    396	      token = EXPRESSION_PHRASE;
    397	      break;
    398	    }
    399	  APP;
    400	  goto repeat;
    401	default:
    402	  APP;
    403	  goto repeat;
    404	}
    405      break;
    406
    407    case ST_STATIC_ASSERT:
    408      APP;
    409      switch (token)
    410	{
    411	case '(':
    412	  ++count;
    413	  goto repeat;
    414	case ')':
    415	  if (--count == 0)
    416	    {
    417	      lexstate = ST_NORMAL;
    418	      token = STATIC_ASSERT_PHRASE;
    419	      break;
    420	    }
    421	  goto repeat;
    422	default:
    423	  goto repeat;
    424	}
    425      break;
    426
    427    default:
    428      exit(1);
    429    }
    430fini:
    431
    432  if (suppress_type_lookup > 0)
    433    --suppress_type_lookup;
    434  if (dont_want_brace_phrase > 0)
    435    --dont_want_brace_phrase;
    436
    437  yylval = &next_node->next;
    438
    439  return token;
    440}