scc

Simple C99 Compiler
Log | Files | Refs | README | LICENSE

lex.c (12190B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <ctype.h>
      3 #include <errno.h>
      4 #include <setjmp.h>
      5 #include <stdio.h>
      6 #include <stdlib.h>
      7 #include <string.h>
      8 
      9 #include "../inc/sizes.h"
     10 #include "../inc/cc.h"
     11 #include "arch.h"
     12 #include "cc1.h"
     13 
     14 unsigned yytoken;
     15 struct yystype yylval;
     16 char yytext[STRINGSIZ+3];
     17 unsigned short yylen;
     18 int cppoff;
     19 int lexmode = CCMODE;
     20 
     21 int namespace = NS_IDEN;
     22 static int safe, eof;
     23 Input *input;
     24 
     25 void
     26 allocinput(char *fname, FILE *fp, char *line)
     27 {
     28 	Input *ip = xmalloc(sizeof(Input));
     29 
     30 	if (!line) {
     31 		line = xmalloc(INPUTSIZ);
     32 		line[0] = '\0';
     33 	}
     34 	ip->p = ip->begin = ip->line = line;
     35 	ip->nline = 0;
     36 	ip->fname = xstrdup(fname);
     37 	ip->next = input;
     38 	ip->fp = fp;
     39 	input = ip;
     40 }
     41 
     42 void
     43 ilex(void)
     44 {
     45 	static struct keyword keys[] = {
     46 		{"auto", SCLASS, AUTO},
     47 		{"break", BREAK, BREAK},
     48 		{"_Bool", TYPE, BOOL},
     49 		{"case", CASE, CASE},
     50 		{"char", TYPE, CHAR},
     51 		{"const", TQUALIFIER, CONST},
     52 		{"continue", CONTINUE, CONTINUE},
     53 		{"default", DEFAULT, DEFAULT},
     54 		{"do", DO, DO},
     55 		{"double", TYPE, DOUBLE},
     56 		{"else", ELSE, ELSE},
     57 		{"enum", TYPE, ENUM},
     58 		{"extern", SCLASS, EXTERN},
     59 		{"float", TYPE, FLOAT},
     60 		{"for", FOR, FOR},
     61 		{"goto", GOTO, GOTO},
     62 		{"if", IF, IF},
     63 		{"inline", TQUALIFIER, INLINE},
     64 		{"int", TYPE, INT},
     65 		{"long", TYPE, LONG},
     66 		{"register", SCLASS, REGISTER},
     67 		{"restrict", TQUALIFIER, RESTRICT},
     68 		{"return", RETURN, RETURN},
     69 		{"short", TYPE, SHORT},
     70 		{"signed", TYPE, SIGNED},
     71 		{"sizeof", SIZEOF, SIZEOF},
     72 		{"static", SCLASS, STATIC},
     73 		{"struct", TYPE, STRUCT},
     74 		{"switch", SWITCH, SWITCH},
     75 		{"typedef", SCLASS, TYPEDEF},
     76 		{"union", TYPE, UNION},
     77 		{"unsigned", TYPE, UNSIGNED},
     78 		{"void", TYPE, VOID},
     79 		{"volatile", TQUALIFIER, VOLATILE},
     80 		{"while", WHILE, WHILE},
     81 		{NULL, 0, 0},
     82 	};
     83 	keywords(keys, NS_KEYWORD);
     84 }
     85 
     86 int
     87 addinput(char *fname)
     88 {
     89 	FILE *fp;
     90 
     91 	if (fname) {
     92 		if ((fp = fopen(fname, "r")) == NULL)
     93 			return 0;
     94 	} else {
     95 		fp = stdin;
     96 		fname = "<stdin>";
     97 	}
     98 	allocinput(fname, fp, NULL);
     99 	return 1;
    100 }
    101 
    102 void
    103 delinput(void)
    104 {
    105 	Input *ip = input;
    106 
    107 	if (ip->fp) {
    108 		if (fclose(ip->fp))
    109 			die("error: failed to read from input file '%s'",
    110 			    ip->fname);
    111 		if (!ip->next)
    112 			eof = 1;
    113 	}
    114 	if (eof)
    115 		return;
    116 	input = ip->next;
    117 	free(ip->fname);
    118 	free(ip->line);
    119 }
    120 
    121 static void
    122 newline(void)
    123 {
    124 	if (++input->nline == 0)
    125 		die("error: input file '%s' too long", input->fname);
    126 }
    127 
    128 static int
    129 readchar(void)
    130 {
    131 	FILE *fp = input->fp;
    132 	int c;
    133 
    134 	if (eof || !fp)
    135 		return 0;
    136 repeat:
    137 	switch (c = getc(fp)) {
    138 	case EOF:
    139 		c = '\0';
    140 		break;
    141 	case '\\':
    142 		if ((c = getc(fp)) == '\n') {
    143 			newline();
    144 			goto repeat;
    145 		}
    146 		ungetc(c, fp);
    147 		c = '\\';
    148 		break;
    149 	case '\n':
    150 		newline();
    151 		break;
    152 	}
    153 
    154 	return c;
    155 }
    156 
    157 static void
    158 comment(int type)
    159 {
    160 	int c;
    161 
    162 	c = -1;
    163 repeat:
    164 	do {
    165 		if (!c || eof) {
    166 			errorp("unterminated comment");
    167 			return;
    168 		}
    169 	} while ((c = readchar()) != type);
    170 
    171 	if (type == '*' && (c = readchar()) != '/')
    172 		goto repeat;
    173 }
    174 
    175 static int
    176 readline(void)
    177 {
    178 	char *bp, *lim;
    179 	char c, peekc = 0;
    180 
    181 repeat:
    182 	input->begin = input->p = input->line;
    183 	*input->line = '\0';
    184 	if (eof)
    185 		return 0;
    186 	if (feof(input->fp)) {
    187 		delinput();
    188 		goto repeat;
    189 	}
    190 	lim = &input->line[INPUTSIZ-1];
    191 	for (bp = input->line; bp < lim; *bp++ = c) {
    192 		c = (peekc) ? peekc : readchar();
    193 		peekc = 0;
    194 		if (c == '\n' || c == '\0')
    195 			break;
    196 		if (c != '/' || (peekc = readchar()) != '*' && peekc != '/')
    197 			continue;
    198 		comment((peekc == '/') ? '\n' : peekc);
    199 		peekc = 0;
    200 		c = ' ';
    201 	}
    202 
    203 	if (bp == lim)
    204 		error("line too long");
    205 	*bp = '\0';
    206 	return 1;
    207 }
    208 
    209 int
    210 moreinput(void)
    211 {
    212 	static char file[FILENAME_MAX];
    213 	static unsigned nline;
    214 	char *s;
    215 
    216 repeat:
    217 	if (!readline())
    218 		return 0;
    219 	while (isspace(*input->p))
    220 		++input->p;
    221 	input->begin = input->p;
    222 	if (*input->p == '\0' || cpp() || cppoff) {
    223 		*input->begin = '\0';
    224 		goto repeat;
    225 	}
    226 
    227 	if (onlycpp) {
    228 		putchar('\n');
    229 		if (strcmp(file, input->fname)) {
    230 			strcpy(file, input->fname);
    231 			s = "#line %u %s\n";
    232 		} else if (nline+1 != input->nline) {
    233 			s = "#line %u\n";
    234 		} else {
    235 			s = "";
    236 		}
    237 		nline = input->nline;
    238 		printf(s, nline, file);
    239 	}
    240 	input->begin = input->p;
    241 	return 1;
    242 }
    243 
    244 static void
    245 tok2str(void)
    246 {
    247 	if ((yylen = input->p - input->begin) > INTIDENTSIZ)
    248 		error("token too big");
    249 	strncpy(yytext, input->begin, yylen);
    250 	yytext[yylen] = '\0';
    251 	input->begin = input->p;
    252 }
    253 
    254 static Symbol *
    255 readint(char *s, int base, int sign, Symbol *sym)
    256 {
    257 	Type *tp = sym->type;
    258 	struct limits *lim;
    259 	TUINT u, val, max;
    260 	int c;
    261 
    262 	lim = getlimits(tp);
    263 	max = lim->max.i;
    264 	if (*s == '0')
    265 		++s;
    266 	if (toupper(*s) == 'X')
    267 		++s;
    268 
    269 	for (u = 0; isxdigit(c = *s++); u = u*base + val) {
    270 		static char letters[] = "0123456789ABCDEF";
    271 		val = strchr(letters, toupper(c)) - letters;
    272 	repeat:
    273 		if (u <= max/base && u*base <= max - val)
    274 			continue;
    275 		if (tp->prop & TSIGNED) {
    276 			if (tp == inttype)
    277 				tp = (base==10) ? longtype : uinttype;
    278 			else if (tp == longtype)
    279 				tp = (base==10) ? llongtype : ulongtype;
    280 			else
    281 				goto overflow;
    282 		} else {
    283 			if (tp == uinttype)
    284 				tp = (sign==UNSIGNED) ? ulongtype : longtype;
    285 			else if (tp == ulongtype)
    286 				tp = (sign==UNSIGNED) ? ullongtype : llongtype;
    287 			else
    288 				goto overflow;
    289 		}
    290 		sym->type = tp;
    291 		lim = getlimits(tp);
    292 		max = lim->max.i;
    293 		goto repeat;
    294 	}
    295 
    296 	if (tp->prop & TSIGNED)
    297 		sym->u.i = u;
    298 	else
    299 		sym->u.u = u;
    300 
    301 	return sym;
    302 
    303 overflow:
    304 	errorp("overflow in integer constant");
    305 	return sym;
    306 }
    307 
    308 static unsigned
    309 integer(char *s, char base)
    310 {
    311 	Type *tp;
    312 	Symbol *sym;
    313 	unsigned size, sign;
    314 
    315 	for (size = sign = 0; ; ++input->p) {
    316 		switch (toupper(*input->p)) {
    317 		case 'L':
    318 			if (size == LLONG)
    319 				goto wrong_type;
    320 			size = (size == LONG) ? LLONG : LONG;
    321 			continue;
    322 		case 'U':
    323 			if (sign == UNSIGNED)
    324 				goto wrong_type;
    325 			sign = UNSIGNED;
    326 			continue;
    327 		default:
    328 			goto convert;
    329 		wrong_type:
    330 			error("invalid suffix in integer constant");
    331 		}
    332 	}
    333 
    334 convert:
    335 	tp = ctype(INT, sign, size);
    336 	sym = newsym(NS_IDEN);
    337 	sym->type = tp;
    338 	sym->flags |= SCONSTANT;
    339 	yylval.sym = readint(s, base, sign, sym);
    340 	return CONSTANT;
    341 }
    342 
    343 static char *
    344 digits(unsigned base)
    345 {
    346 	char c, *p;
    347 
    348 	for (p = input->p; c = *p; ++p) {
    349 		switch (base) {
    350 		case 8:
    351 			if (!strchr("01234567", c))
    352 				goto end;
    353 			break;
    354 		case 10:
    355 			if (!isdigit(c))
    356 				goto end;
    357 			break;
    358 		case 16:
    359 			if (!isxdigit(c))
    360 				goto end;
    361 			break;
    362 		}
    363 	}
    364 end:
    365 	input->p = p;
    366 	tok2str();
    367 	return yytext;
    368 }
    369 
    370 static unsigned
    371 number(void)
    372 {
    373 	char base;
    374 
    375 	if (*input->p != '0') {
    376 		base = 10;
    377 	} else {
    378 		if (toupper(*++input->p) == 'X') {
    379 			++input->p;
    380 			base = 16;
    381 		} else {
    382 			base = 8;
    383 		}
    384 	}
    385 
    386 	return integer(digits(base), base);
    387 }
    388 
    389 static char
    390 escape(void)
    391 {
    392 	int c, base;
    393 
    394 	switch (*++input->p) {
    395 	case 'a':  return '\a';
    396 	case 'f':  return '\f';
    397 	case 'n':  return '\n';
    398 	case 'r':  return '\r';
    399 	case 't':  return '\t';
    400 	case 'v':  return '\v';
    401 	case '"':  return '"';
    402 	case '\'': return '\'';
    403 	case '\\': return '\\';
    404 	case '\?': return '\?';
    405 	case 'u':
    406 		/*
    407 		 * FIXME: universal constants are not correctly handled
    408 		 */
    409 		if (!isdigit(*++input->p))
    410 			warn("incorrect digit for numerical character constant");
    411 		base = 10;
    412 		break;
    413 	case 'x':
    414 		if (!isxdigit(*++input->p))
    415 			warn("\\x used with no following hex digits");
    416 		base = 16;
    417 		break;
    418 	case '0':
    419 		if (!strchr("01234567", *++input->p))
    420 			warn("\\0 used with no following octal digits");
    421 		base = 8;
    422 		break;
    423 	default:
    424 		warn("unknown escape sequence");
    425 		return ' ';
    426 	}
    427 	errno = 0;
    428 	c = strtoul(input->p, &input->p, base);
    429 	if (errno || c > 255)
    430 		warn("character constant out of range");
    431 	--input->p;
    432 	return c;
    433 }
    434 
    435 static unsigned
    436 character(void)
    437 {
    438 	static char c;
    439 	Symbol *sym;
    440 
    441 	if ((c = *++input->p) == '\\')
    442 		c = escape();
    443 	else
    444 		c = *input->p;
    445 	++input->p;
    446 	if (*input->p != '\'')
    447 		error("invalid character constant");
    448 	else
    449 		++input->p;
    450 
    451 	sym = newsym(NS_IDEN);
    452 	sym->u.i = c;
    453 	sym->type = inttype;
    454 	yylval.sym = sym;
    455 	return CONSTANT;
    456 }
    457 
    458 static unsigned
    459 string(void)
    460 {
    461 	char *bp = yytext, c;
    462 
    463 	*bp++ = '"';
    464 repeat:
    465 	for (++input->p; (c = *input->p) != '"'; ++input->p) {
    466 		if (c == '\0')
    467 			error("missing terminating '\"' character");
    468 		if (c == '\\')
    469 			c = escape();
    470 		if (bp == &yytext[STRINGSIZ+1])
    471 			error("string too long");
    472 		*bp++ = c;
    473 	}
    474 
    475 	input->begin = ++input->p;
    476 	if (ahead() == '"')
    477 		goto repeat;
    478 	*bp = '\0';
    479 
    480 	yylen = bp - yytext + 1;
    481 	yylval.sym = newstring(yytext+1, yylen-1);
    482 	*bp++ = '"';
    483 	*bp = '\0';
    484 	return STRING;
    485 }
    486 
    487 static unsigned
    488 iden(void)
    489 {
    490 	Symbol *sym;
    491 	char *p, *begin;
    492 
    493 	begin = input->p;
    494 	for (p = begin; isalnum(*p) || *p == '_'; ++p)
    495 		/* nothing */;
    496 	input->p = p;
    497 	tok2str();
    498 	sym = lookup(namespace, yytext);
    499 	if (sym->ns == NS_CPP) {
    500 		if (!disexpand && expand(begin, sym))
    501 			return next();
    502 		/*
    503 		 * it is not a correct macro call, so try to find
    504 		 * another definition.
    505 		 */
    506 		if (lexmode != CPPMODE)
    507 			sym = nextsym(sym, namespace);
    508 	}
    509 	yylval.sym = sym;
    510 	if (sym->flags & SCONSTANT)
    511 		return CONSTANT;
    512 	if (sym->token != IDEN)
    513 		yylval.token = sym->u.token;
    514 	return sym->token;
    515 }
    516 
    517 static unsigned
    518 follow(int expect, int ifyes, int ifno)
    519 {
    520 	if (*input->p++ == expect)
    521 		return ifyes;
    522 	--input->p;
    523 	return ifno;
    524 }
    525 
    526 static unsigned
    527 minus(void)
    528 {
    529 	switch (*input->p++) {
    530 	case '-': return DEC;
    531 	case '>': return INDIR;
    532 	case '=': return SUB_EQ;
    533 	default: --input->p; return '-';
    534 	}
    535 }
    536 
    537 static unsigned
    538 plus(void)
    539 {
    540 	switch (*input->p++) {
    541 	case '+': return INC;
    542 	case '=': return ADD_EQ;
    543 	default: --input->p; return '+';
    544 	}
    545 }
    546 
    547 static unsigned
    548 relational(int op, int equal, int shift, int assig)
    549 {
    550 	char c;
    551 
    552 	if ((c = *input->p++) == '=')
    553 		return equal;
    554 	if (c == op)
    555 		return follow('=', assig, shift);
    556 	--input->p;
    557 	return op;
    558 }
    559 
    560 static unsigned
    561 logic(int op, int equal, int logic)
    562 {
    563 	char c;
    564 
    565 	if ((c = *input->p++) == '=')
    566 		return equal;
    567 	if (c == op)
    568 		return logic;
    569 	--input->p;
    570 	return op;
    571 }
    572 
    573 static unsigned
    574 dot(void)
    575 {
    576 	char c;
    577 
    578 	if ((c = *input->p) != '.')
    579 		return '.';
    580 	if ((c = *++input->p) != '.')
    581 		error("incorrect token '..'");
    582 	++input->p;
    583 	return ELLIPSIS;
    584 }
    585 
    586 static unsigned
    587 operator(void)
    588 {
    589 	unsigned t;
    590 
    591 	switch (t = *input->p++) {
    592 	case '<': t = relational('<', LE, SHL, SHL_EQ); break;
    593 	case '>': t = relational('>', GE, SHR, SHR_EQ); break;
    594 	case '&': t = logic('&', AND_EQ, AND); break;
    595 	case '|': t = logic('|', OR_EQ, OR); break;
    596 	case '=': t = follow('=', EQ, '='); break;
    597 	case '^': t = follow('=', XOR_EQ, '^'); break;
    598 	case '*': t = follow('=', MUL_EQ, '*'); break;
    599 	case '/': t = follow('=', DIV_EQ, '/'); break;
    600 	case '!': t = follow('=', NE, '!'); break;
    601 	case '#': t = follow('#', '$', '#'); break;
    602 	case '-': t = minus(); break;
    603 	case '+': t = plus(); break;
    604 	case '.': t = dot(); break;
    605 	}
    606 	tok2str();
    607 	return t;
    608 }
    609 
    610 /* TODO: Ensure that namespace is NS_IDEN after a recovery */
    611 
    612 static void
    613 skipspaces(void)
    614 {
    615 repeat:
    616 	while (isspace(*input->p))
    617 		++input->p;
    618 	if (*input->p == '\0' && lexmode != CPPMODE) {
    619 		if (!moreinput())
    620 			return;
    621 		goto repeat;
    622 	}
    623 	input->begin = input->p;
    624 }
    625 
    626 unsigned
    627 next(void)
    628 {
    629 	char c;
    630 
    631 	skipspaces();
    632 	c = *input->begin;
    633 	if ((eof || lexmode == CPPMODE) && c == '\0') {
    634 		strcpy(yytext, "<EOF>");
    635 		if (cppctx && eof)
    636 			error("#endif expected");
    637 		yytoken = EOFTOK;
    638 		goto exit;
    639 	}
    640 
    641 	if (isalpha(c) || c == '_')
    642 		yytoken = iden();
    643 	else if (isdigit(c))
    644 		yytoken = number();
    645 	else if (c == '"')
    646 		yytoken = string();
    647 	else if (c == '\'')
    648 		yytoken = character();
    649 	else
    650 		yytoken = operator();
    651 
    652 exit:
    653 	DBG("TOKEN %s", yytext);
    654 	return yytoken;
    655 }
    656 
    657 void
    658 expect(unsigned tok)
    659 {
    660 	if (yytoken != tok) {
    661 		if (isgraph(tok))
    662 			errorp("expected '%c' before '%s'", tok, yytext);
    663 		else
    664 			errorp("unexpected '%s'", yytext);
    665 	} else {
    666 		next();
    667 	}
    668 }
    669 
    670 char
    671 ahead(void)
    672 {
    673 	skipspaces();
    674 	return *input->begin;
    675 }
    676 
    677 void
    678 setsafe(int type)
    679 {
    680 	safe = type;
    681 }
    682 
    683 void
    684 discard(void)
    685 {
    686 	extern jmp_buf recover;
    687 	char c;
    688 
    689 	input->begin = input->p;
    690 	for (c = yytoken; ; c = *input->begin++) {
    691 		switch (safe) {
    692 		case END_COMP:
    693 			if (c == '}')
    694 				goto jump;
    695 			goto semicolon;
    696 		case END_COND:
    697 			if (c == ')')
    698 				goto jump;
    699 			break;
    700 		case END_LDECL:
    701 			if (c == ',')
    702 				goto jump;
    703 		case END_DECL:
    704 		semicolon:
    705 			if (c == ';')
    706 				goto jump;
    707 			break;
    708 		}
    709 		if (c == '\0' && !moreinput())
    710 			exit(1);
    711 	}
    712 jump:
    713 	yytoken = c;
    714 	longjmp(recover, 1);
    715 }