lex.l (8463B)
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * Lexical analysis for genksyms. 4 * Copyright 1996, 1997 Linux International. 5 * 6 * New implementation contributed by Richard Henderson <rth@tamu.edu> 7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se> 8 * 9 * Taken from Linux modutils 2.4.22. 10 */ 11 12%{ 13 14#include <limits.h> 15#include <stdlib.h> 16#include <string.h> 17#include <ctype.h> 18 19#include "genksyms.h" 20#include "parse.tab.h" 21 22/* We've got a two-level lexer here. We let flex do basic tokenization 23 and then we categorize those basic tokens in the second stage. */ 24#define YY_DECL static int yylex1(void) 25 26%} 27 28IDENT [A-Za-z_\$][A-Za-z0-9_\$]* 29 30O_INT 0[0-7]* 31D_INT [1-9][0-9]* 32X_INT 0[Xx][0-9A-Fa-f]+ 33I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] 34INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? 35 36FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) 37EXP [Ee][+-]?[0-9]+ 38F_SUF [FfLl] 39REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) 40 41STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" 42CHAR L?\'([^\\\']*\\.)*[^\\\']*\' 43 44MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) 45 46/* We don't do multiple input files. */ 47%option noyywrap 48 49%option noinput 50 51%% 52 53 54 /* Keep track of our location in the original source files. */ 55^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; 56^#.*\n cur_line++; 57\n cur_line++; 58 59 /* Ignore all other whitespace. */ 60[ \t\f\v\r]+ ; 61 62 63{STRING} return STRING; 64{CHAR} return CHAR; 65{IDENT} return IDENT; 66 67 /* The Pedant requires that the other C multi-character tokens be 68 recognized as tokens. We don't actually use them since we don't 69 parse expressions, but we do want whitespace to be arranged 70 around them properly. */ 71{MC_TOKEN} return OTHER; 72{INT} return INT; 73{REAL} return REAL; 74 75"..." return DOTS; 76 77 /* All other tokens are single characters. */ 78. return yytext[0]; 79 80 81%% 82 83/* Bring in the keyword recognizer. */ 84 85#include "keywords.c" 86 87 88/* Macros to append to our phrase collection list. */ 89 90/* 91 * We mark any token, that that equals to a known enumerator, as 92 * SYM_ENUM_CONST. The parser will change this for struct and union tags later, 93 * the only problem is struct and union members: 94 * enum e { a, b }; struct s { int a, b; } 95 * but in this case, the only effect will be, that the ABI checksums become 96 * more volatile, which is acceptable. Also, such collisions are quite rare, 97 * so far it was only observed in include/linux/telephony.h. 98 */ 99#define _APP(T,L) do { \ 100 cur_node = next_node; \ 101 next_node = xmalloc(sizeof(*next_node)); \ 102 next_node->next = cur_node; \ 103 cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ 104 cur_node->tag = \ 105 find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ 106 SYM_ENUM_CONST : SYM_NORMAL ; \ 107 cur_node->in_source_file = in_source_file; \ 108 } while (0) 109 110#define APP _APP(yytext, yyleng) 111 112 113/* The second stage lexer. Here we incorporate knowledge of the state 114 of the parser to tailor the tokens that are returned. */ 115 116int 117yylex(void) 118{ 119 static enum { 120 ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, 121 ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT, 122 } lexstate = ST_NOTSTARTED; 123 124 static int suppress_type_lookup, dont_want_brace_phrase; 125 static struct string_list *next_node; 126 static char *source_file; 127 128 int token, count = 0; 129 struct string_list *cur_node; 130 131 if (lexstate == ST_NOTSTARTED) 132 { 133 next_node = xmalloc(sizeof(*next_node)); 134 next_node->next = NULL; 135 lexstate = ST_NORMAL; 136 } 137 138repeat: 139 token = yylex1(); 140 141 if (token == 0) 142 return 0; 143 else if (token == FILENAME) 144 { 145 char *file, *e; 146 147 /* Save the filename and line number for later error messages. */ 148 149 if (cur_filename) 150 free(cur_filename); 151 152 file = strchr(yytext, '\"')+1; 153 e = strchr(file, '\"'); 154 *e = '\0'; 155 cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); 156 cur_line = atoi(yytext+2); 157 158 if (!source_file) { 159 source_file = xstrdup(cur_filename); 160 in_source_file = 1; 161 } else { 162 in_source_file = (strcmp(cur_filename, source_file) == 0); 163 } 164 165 goto repeat; 166 } 167 168 switch (lexstate) 169 { 170 case ST_NORMAL: 171 switch (token) 172 { 173 case IDENT: 174 APP; 175 { 176 int r = is_reserved_word(yytext, yyleng); 177 if (r >= 0) 178 { 179 switch (token = r) 180 { 181 case ATTRIBUTE_KEYW: 182 lexstate = ST_ATTRIBUTE; 183 count = 0; 184 goto repeat; 185 case ASM_KEYW: 186 lexstate = ST_ASM; 187 count = 0; 188 goto repeat; 189 case TYPEOF_KEYW: 190 lexstate = ST_TYPEOF; 191 count = 0; 192 goto repeat; 193 194 case STRUCT_KEYW: 195 case UNION_KEYW: 196 case ENUM_KEYW: 197 dont_want_brace_phrase = 3; 198 suppress_type_lookup = 2; 199 goto fini; 200 201 case EXPORT_SYMBOL_KEYW: 202 goto fini; 203 204 case STATIC_ASSERT_KEYW: 205 lexstate = ST_STATIC_ASSERT; 206 count = 0; 207 goto repeat; 208 } 209 } 210 if (!suppress_type_lookup) 211 { 212 if (find_symbol(yytext, SYM_TYPEDEF, 1)) 213 token = TYPE; 214 } 215 } 216 break; 217 218 case '[': 219 APP; 220 lexstate = ST_BRACKET; 221 count = 1; 222 goto repeat; 223 224 case '{': 225 APP; 226 if (dont_want_brace_phrase) 227 break; 228 lexstate = ST_BRACE; 229 count = 1; 230 goto repeat; 231 232 case '=': case ':': 233 APP; 234 lexstate = ST_EXPRESSION; 235 break; 236 237 default: 238 APP; 239 break; 240 } 241 break; 242 243 case ST_ATTRIBUTE: 244 APP; 245 switch (token) 246 { 247 case '(': 248 ++count; 249 goto repeat; 250 case ')': 251 if (--count == 0) 252 { 253 lexstate = ST_NORMAL; 254 token = ATTRIBUTE_PHRASE; 255 break; 256 } 257 goto repeat; 258 default: 259 goto repeat; 260 } 261 break; 262 263 case ST_ASM: 264 APP; 265 switch (token) 266 { 267 case '(': 268 ++count; 269 goto repeat; 270 case ')': 271 if (--count == 0) 272 { 273 lexstate = ST_NORMAL; 274 token = ASM_PHRASE; 275 break; 276 } 277 goto repeat; 278 default: 279 goto repeat; 280 } 281 break; 282 283 case ST_TYPEOF_1: 284 if (token == IDENT) 285 { 286 if (is_reserved_word(yytext, yyleng) >= 0 287 || find_symbol(yytext, SYM_TYPEDEF, 1)) 288 { 289 yyless(0); 290 unput('('); 291 lexstate = ST_NORMAL; 292 token = TYPEOF_KEYW; 293 break; 294 } 295 _APP("(", 1); 296 } 297 lexstate = ST_TYPEOF; 298 /* FALLTHRU */ 299 300 case ST_TYPEOF: 301 switch (token) 302 { 303 case '(': 304 if ( ++count == 1 ) 305 lexstate = ST_TYPEOF_1; 306 else 307 APP; 308 goto repeat; 309 case ')': 310 APP; 311 if (--count == 0) 312 { 313 lexstate = ST_NORMAL; 314 token = TYPEOF_PHRASE; 315 break; 316 } 317 goto repeat; 318 default: 319 APP; 320 goto repeat; 321 } 322 break; 323 324 case ST_BRACKET: 325 APP; 326 switch (token) 327 { 328 case '[': 329 ++count; 330 goto repeat; 331 case ']': 332 if (--count == 0) 333 { 334 lexstate = ST_NORMAL; 335 token = BRACKET_PHRASE; 336 break; 337 } 338 goto repeat; 339 default: 340 goto repeat; 341 } 342 break; 343 344 case ST_BRACE: 345 APP; 346 switch (token) 347 { 348 case '{': 349 ++count; 350 goto repeat; 351 case '}': 352 if (--count == 0) 353 { 354 lexstate = ST_NORMAL; 355 token = BRACE_PHRASE; 356 break; 357 } 358 goto repeat; 359 default: 360 goto repeat; 361 } 362 break; 363 364 case ST_EXPRESSION: 365 switch (token) 366 { 367 case '(': case '[': case '{': 368 ++count; 369 APP; 370 goto repeat; 371 case '}': 372 /* is this the last line of an enum declaration? */ 373 if (count == 0) 374 { 375 /* Put back the token we just read so's we can find it again 376 after registering the expression. */ 377 unput(token); 378 379 lexstate = ST_NORMAL; 380 token = EXPRESSION_PHRASE; 381 break; 382 } 383 /* FALLTHRU */ 384 case ')': case ']': 385 --count; 386 APP; 387 goto repeat; 388 case ',': case ';': 389 if (count == 0) 390 { 391 /* Put back the token we just read so's we can find it again 392 after registering the expression. */ 393 unput(token); 394 395 lexstate = ST_NORMAL; 396 token = EXPRESSION_PHRASE; 397 break; 398 } 399 APP; 400 goto repeat; 401 default: 402 APP; 403 goto repeat; 404 } 405 break; 406 407 case ST_STATIC_ASSERT: 408 APP; 409 switch (token) 410 { 411 case '(': 412 ++count; 413 goto repeat; 414 case ')': 415 if (--count == 0) 416 { 417 lexstate = ST_NORMAL; 418 token = STATIC_ASSERT_PHRASE; 419 break; 420 } 421 goto repeat; 422 default: 423 goto repeat; 424 } 425 break; 426 427 default: 428 exit(1); 429 } 430fini: 431 432 if (suppress_type_lookup > 0) 433 --suppress_type_lookup; 434 if (dont_want_brace_phrase > 0) 435 --dont_want_brace_phrase; 436 437 yylval = &next_node->next; 438 439 return token; 440}