scc

Simple C99 Compiler
Log | Files | Refs | README | LICENSE

parser.c (12908B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <errno.h>
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 
      7 #include "../inc/cc.h"
      8 #include "../inc/sizes.h"
      9 
     10 #include "arch.h"
     11 #include "cc2.h"
     12 
     13 #define MAXLINE     200
     14 #define STACKSIZ     50
     15 
     16 extern Type int8type, int16type, int32type, int64type,
     17             uint8type, uint16type, uint32type, uint64type,
     18             float32type, float64type, float80type,
     19             booltype,
     20             ptrtype,
     21             voidtype,
     22             elipsistype;
     23 
     24 Type funtype = {
     25 	.flags = FUNF
     26 };
     27 
     28 union tokenop {
     29 	void *arg;
     30 	unsigned op;
     31 };
     32 
     33 struct swtch {
     34 	int nr;
     35 	Node *first;
     36 	Node *last;
     37 };
     38 
     39 static struct swtch swtbl[NR_BLOCK], *swp = swtbl;
     40 
     41 typedef void parsefun(char *, union tokenop);
     42 static parsefun type, symbol, getname, unary, binary, ternary, call,
     43                 constant, composed, binit, einit,
     44                 jump, oreturn, loop, assign,
     45                 ocase, bswitch, eswitch;
     46 
     47 typedef void evalfun(void);
     48 static evalfun vardecl, beginfun, endfun, endpars, stmt,
     49                array, aggregate, flddecl, labeldcl;
     50 
     51 static struct decoc {
     52 	void (*eval)(void);
     53 	void (*parse)(char *token, union tokenop);
     54 	union tokenop u;
     55 } optbl[] = {      /*  eval     parse           args */
     56 	['A']   = {  vardecl,  symbol, .u.op  =  SAUTO<<8 | OAUTO},
     57 	['R']   = {  vardecl,  symbol, .u.op  =   SREG<<8 |  OREG},
     58 	['G']   = {  vardecl,  symbol, .u.op  =  SGLOB<<8 |  OMEM},
     59 	['X']   = {  vardecl,  symbol, .u.op  = SEXTRN<<8 |  OMEM},
     60 	['Y']   = {  vardecl,  symbol, .u.op  =  SPRIV<<8 |  OMEM},
     61 	['T']   = {  vardecl,  symbol, .u.op  = SLOCAL<<8 |  OMEM},
     62 	['M']   = {  flddecl,  symbol, .u.op  =  SMEMB<<8 |  OMEM},
     63 	['L']   = { labeldcl,  symbol, .u.op  = SLABEL<<8 | OLABEL},
     64 
     65 	['C']   = {     NULL,    type, .u.arg =    &int8type},
     66 	['I']   = {     NULL,    type, .u.arg =   &int16type},
     67 	['W']   = {     NULL,    type, .u.arg =   &int32type},
     68 	['Q']   = {     NULL,    type, .u.arg =   &int64type},
     69 	['K']   = {     NULL,    type, .u.arg =   &uint8type},
     70 	['N']   = {     NULL,    type, .u.arg =  &uint16type},
     71 	['Z']   = {     NULL,    type, .u.arg =  &uint32type},
     72 	['O']   = {     NULL,    type, .u.arg =  &uint64type},
     73 	['J']   = {     NULL,    type, .u.arg = &float32type},
     74 	['D']   = {     NULL,    type, .u.arg = &float64type},
     75 	['H']   = {     NULL,    type, .u.arg = &float80type},
     76 	['0']   = {     NULL,    type, .u.arg =    &voidtype},
     77 	['B']   = {     NULL,    type, .u.arg =    &booltype},
     78 	['P']   = {     NULL,    type, .u.arg =     &ptrtype},
     79 	['E']   = {     NULL,    type, .u.arg = &elipsistype},
     80 
     81 	['F']   = {     NULL,    type, .u.arg =     &funtype},
     82 	['V']   = {    array,composed,                     0},
     83 	['U']   = {aggregate,composed,                     0},
     84 	['S']   = {aggregate,composed,                     0},
     85 
     86 	['"']   = {     NULL, getname,                     0},
     87 	['{']   = { beginfun,    NULL,                     0},
     88 	['}']   = {   endfun,    NULL,                     0},
     89 	['(']   = {     NULL,   binit,                     0},
     90 	[')']   = {     NULL,   einit,                     0},
     91 	['\\']  = {  endpars,    NULL,                     0},
     92 	['\t']  = {     stmt,    NULL,                     0},
     93 
     94 	['~']   = {     NULL,   unary, .u.op =          OCPL},
     95 	['_']   = {     NULL,   unary, .u.op =          ONEG},
     96 	['\'']  = {     NULL,   unary, .u.op =         OADDR},
     97 	['@']   = {     NULL,   unary, .u.op =          OPTR},
     98 	['g']   = {     NULL,   unary, .u.op =         OCAST},
     99 	['p']   = {     NULL,   unary, .u.op =          OPAR},
    100 
    101 	['a']   = {     NULL,  binary, .u.op =          OAND},
    102 	['o']   = {     NULL,  binary, .u.op =           OOR},
    103 	['.']   = {     NULL,  binary, .u.op =        OFIELD},
    104 	['+']   = {     NULL,  binary, .u.op =          OADD},
    105 	['-']   = {     NULL,  binary, .u.op =          OSUB},
    106 	['*']   = {     NULL,  binary, .u.op =          OMUL},
    107 	['%']   = {     NULL,  binary, .u.op =          OMOD},
    108 	['/']   = {     NULL,  binary, .u.op =          ODIV},
    109 	['l']   = {     NULL,  binary, .u.op =          OSHL},
    110 	['r']   = {     NULL,  binary, .u.op =          OSHR},
    111 	['<']   = {     NULL,  binary, .u.op =           OLT},
    112 	['>']   = {     NULL,  binary, .u.op =           OGT},
    113 	['[']   = {     NULL,  binary, .u.op =           OLE},
    114 	[']']   = {     NULL,  binary, .u.op =           OGE},
    115 	['=']   = {     NULL,  binary, .u.op =           OEQ},
    116 	['!']   = {     NULL,  binary, .u.op =           ONE},
    117 	['&']   = {     NULL,  binary, .u.op =         OBAND},
    118 	['|']   = {     NULL,  binary, .u.op =          OBOR},
    119 	['^']   = {     NULL,  binary, .u.op =         OBXOR},
    120 	[',']   = {     NULL,  binary, .u.op =        OCOMMA},
    121 
    122 	[':']   = {     NULL,  assign, .u.op =        OASSIG},
    123 	['?']   = {     NULL, ternary, .u.op =          OASK},
    124 	['c']   = {     NULL,    call, .u.op =         OCALL},
    125 
    126 	['#']   = {     NULL,constant, .u.op =        OCONST},
    127 
    128 	['j']   = {     NULL,    jump, .u.op =          OJMP},
    129 	['y']   = {     NULL,    jump, .u.op =       OBRANCH},
    130 	['h']   = {     NULL, oreturn, .u.op =          ORET},
    131 
    132 	['b']   = {     NULL,    loop, .u.op =        OBLOOP},
    133 	['e']   = {     NULL,    loop, .u.op =        OELOOP},
    134 
    135 	['v']   = {     NULL,   ocase, .u.op =         OCASE},
    136 	['f']   = {     NULL,   ocase, .u.op =      ODEFAULT},
    137 	['t']   = {     NULL, eswitch, .u.op =      OESWITCH},
    138 	['s']   = {     NULL, bswitch, .u.op =      OBSWITCH},
    139 };
    140 
    141 static int sclass, inpars, ininit, endf, lineno;
    142 static void *stack[STACKSIZ], **sp = stack;
    143 
    144 static Node *
    145 push(void *elem)
    146 {
    147 	if (sp == stack[STACKSIZ])
    148 		error(ESTACKO);
    149 	return *sp++ = elem;
    150 }
    151 
    152 static void *
    153 pop(void)
    154 {
    155 	if (sp == stack)
    156 		error(ESTACKU);
    157 	return *--sp;
    158 }
    159 
    160 static int
    161 empty(void)
    162 {
    163 	return sp == stack;
    164 }
    165 
    166 static void
    167 type(char *token, union tokenop u)
    168 {
    169 	push(u.arg);
    170 }
    171 
    172 static void
    173 composed(char *token, union tokenop u)
    174 {
    175 	Symbol *sym;
    176 
    177 	sym = getsym(atoi(token+1));
    178 	push(&sym->type);
    179 }
    180 
    181 static void
    182 getname(char *t, union tokenop u)
    183 {
    184 	push((*++t) ? xstrdup(t) : NULL);
    185 }
    186 
    187 static void
    188 symbol(char *token, union tokenop u)
    189 {
    190 	Node *np = newnode(u.op & 0xFF);
    191 	Symbol *sym = getsym(atoi(token+1));
    192 
    193 	sclass = u.op >> 8;
    194 	np->u.sym = sym;
    195 	np->type = sym->type;
    196 	push(np);
    197 }
    198 
    199 static Type *
    200 gettype(char *token)
    201 {
    202 	struct decoc *dp;
    203 
    204 	dp = &optbl[*token];
    205 	if (!dp->parse)
    206 		error(ESYNTAX);
    207 	(*dp->parse)(token, dp->u);
    208 	return pop();
    209 }
    210 
    211 static void
    212 constant(char *token, union tokenop u)
    213 {
    214 	static char letters[] = "0123456789ABCDEF";
    215 	Node *np;
    216 	TUINT v;
    217 	unsigned c;
    218 
    219 	++token;
    220 	if (*token == OSTRING) {
    221 		++token;
    222 		np = newnode(OSTRING);
    223 		np->type.flags = STRF;
    224 		np->type.size = strlen(token);
    225 		np->type.align = int8type.align;
    226 		np->u.s = xstrdup(token);
    227 	} else {
    228 		np = newnode(OCONST);
    229 		np->type = *gettype(token++);
    230 		for (v = 0; c = *token++; v += c) {
    231 			v <<= 4;
    232 			c = strchr(letters, c) - letters;
    233 		}
    234 		np->u.i = v;
    235 	}
    236 	push(np);
    237 }
    238 
    239 static void
    240 assign(char *token, union tokenop u)
    241 {
    242 	int subop;
    243 	Node *np = newnode(u.op);
    244 
    245 	switch (subop = *++token) {
    246 	case '/':
    247 	case '%':
    248 	case '+':
    249 	case '-':
    250 	case 'l':
    251 	case 'r':
    252 	case '&':
    253 	case '|':
    254 	case '^':
    255 	case 'i':
    256 	case 'd':
    257 		++token;
    258 		subop = optbl[subop].u.op;
    259 		break;
    260 	default:
    261 		subop = 0;
    262 		break;
    263 	}
    264 
    265 	np->u.subop = subop;
    266 	np->type = *gettype(token);
    267 	np->right = pop();
    268 	np->left = pop();
    269 	push(np);
    270 }
    271 
    272 static void
    273 ternary(char *token, union tokenop u)
    274 {
    275 	Node *ask = newnode(OASK), *colon = newnode(OCOLON);
    276 	Type *tp = gettype(token+1);
    277 
    278 	colon->right = pop();
    279 	colon->left = pop();
    280 
    281 	ask->type = *tp;
    282 	ask->left = pop();
    283 	ask->right = colon;
    284 	push(ask);
    285 }
    286 
    287 static void
    288 eval(char *tok)
    289 {
    290 	struct decoc *dp;
    291 
    292 	do {
    293 		dp = &optbl[*tok];
    294 		if (!dp->parse)
    295 			break;
    296 		(*dp->parse)(tok, dp->u);
    297 	} while (tok = strtok(NULL, "\t\n"));
    298 }
    299 
    300 static int
    301 nextline(void)
    302 {
    303 	char line[MAXLINE];
    304 	size_t len;
    305 	int c;
    306 	void (*fun)(void);
    307 
    308 repeat:
    309 	++lineno;
    310 	if (!fgets(line, sizeof(line), stdin))
    311 		return 0;
    312 	if ((len = strlen(line)) == 0 || line[0] == '\n')
    313 		goto repeat;
    314 	if (line[len-1] != '\n')
    315 		error(len < sizeof(line)-1 ? ELNBLNE : ELNLINE);
    316 	line[len-1] = '\0';
    317 
    318 	c = *line;
    319 	eval(strtok(line, "\t\n"));
    320 	if ((fun = *optbl[c].eval) != NULL)
    321 		(*fun)();
    322 	if (sp != stack)
    323 		error(ESTACKA);
    324 	return 1;
    325 }
    326 
    327 static void
    328 oreturn(char *token, union tokenop u)
    329 {
    330 	Node *np = newnode(u.op);
    331 
    332 	eval(strtok(NULL, "\t\n"));
    333 	if (!empty())
    334 		np->left = pop();
    335 	push(np);
    336 }
    337 
    338 static void
    339 waft(Node *np)
    340 {
    341 	Node *p;
    342 	struct swtch *cur;
    343 
    344 	if (swp == swtbl)
    345 		error(EWTACKU);
    346 
    347 	cur = swp - 1;
    348 	p = cur->last;
    349 	np->next = p->next;
    350 	np->prev = p;
    351 	p->next = np;
    352 	cur->last = np;
    353 	cur->nr++;
    354 }
    355 
    356 static void
    357 bswitch(char *token, union tokenop u)
    358 {
    359 	struct swtch *cur;
    360 
    361 	if (swp == &swtbl[NR_BLOCK+1])
    362 		error(EWTACKO);
    363 	cur = swp++;
    364 	cur->nr = 0;
    365 	jump(token, u);
    366 	cur->first = cur->last = push(pop());
    367 }
    368 
    369 static void
    370 eswitch(char *token, union tokenop u)
    371 {
    372 	struct swtch *cur;
    373 
    374 	if (swp == swtbl)
    375 		error(EWTACKU);
    376 	jump(token, u);
    377 	waft(pop());
    378 	cur = swp--;
    379 	cur->first->u.i = cur->nr;
    380 }
    381 
    382 static void
    383 ocase(char *token, union tokenop u)
    384 {
    385 	jump(token, u);
    386 	waft(pop());
    387 }
    388 
    389 static void
    390 jump(char *token, union tokenop u)
    391 {
    392 	Node *aux, *np = newnode(u.op);
    393 
    394 	eval(strtok(NULL, "\t\n"));
    395 
    396 	if (u.op == OBRANCH || u.op == OCASE)
    397 		np->left = pop();
    398 	aux = pop();
    399 	np->u.sym = aux->u.sym;
    400 	delnode(aux);
    401 	push(np);
    402 }
    403 
    404 static void
    405 loop(char *token, union tokenop u)
    406 {
    407 	push(newnode(u.op));
    408 }
    409 
    410 static void
    411 unary(char *token, union tokenop u)
    412 {
    413 	Node *np = newnode(u.op);
    414 
    415 	np->type = *gettype(token+1);
    416 	np->left = pop();
    417 	np->right = NULL;
    418 	push(np);
    419 }
    420 
    421 static void
    422 call(char *token, union tokenop u)
    423 {
    424 	Node *np, *par, *fun = newnode(u.op);
    425 
    426 	for (par = NULL;; par = np) {
    427 		np = pop();
    428 		if (np->op != OPAR)
    429 			break;
    430 		np->right = par;
    431 	}
    432 
    433 	fun->type = *gettype(token+1);
    434 	fun->left = np;
    435 	fun->right = par;
    436 	push(fun);
    437 }
    438 
    439 static void
    440 binary(char *token, union tokenop u)
    441 {
    442 	Node *np = newnode(u.op);
    443 
    444 	np->type = *gettype(token+1);
    445 	np->right = pop();
    446 	np->left = pop();
    447 	push(np);
    448 }
    449 
    450 static void
    451 binit(char *token, union tokenop u)
    452 {
    453 	ininit = 1;
    454 }
    455 
    456 static void
    457 einit(char *token, union tokenop u)
    458 {
    459 	ininit = 0;
    460 	endinit();
    461 }
    462 
    463 static void
    464 endpars(void)
    465 {
    466 	if (!curfun || !inpars)
    467 		error(ESYNTAX);
    468 	inpars = 0;
    469 }
    470 
    471 static void
    472 aggregate(void)
    473 {
    474 	Node *align, *size;
    475 	char *name;
    476 	Type *tp;
    477 	Symbol *sym;
    478 
    479 	align = pop();
    480 	size = pop();
    481 	name = pop();
    482 	tp = pop();
    483 
    484 	tp->size = size->u.i;
    485 	tp->align = align->u.i;
    486 	/*
    487 	 * type is the first field of Symbol so we can obtain the
    488 	 * address of the symbol from the address of the type.
    489 	 * We have to do this because composed returns the pointer
    490 	 * to the type, but in this function we also need the
    491 	 * symbol to store the name.
    492 	 */
    493 	sym = (Symbol *) tp;
    494 	sym->name = name;
    495 
    496 	delnode(align);
    497 	delnode(size);
    498 }
    499 
    500 static void
    501 array(void)
    502 {
    503 	Type *tp, *base;
    504 	Node *size;
    505 
    506 	size = pop();
    507 	base = pop();
    508 	tp = pop();
    509 	tp->size = size->u.i * base->size; /* FIXME check for overflow */
    510 	tp->align = base->align;
    511 
    512 	delnode(size);
    513 }
    514 
    515 static void
    516 decl(Symbol *sym)
    517 {
    518 	Type *tp = &sym->type;
    519 
    520 	if (tp->flags & FUNF) {
    521 		curfun = sym;
    522 	} else {
    523 		switch (sym->kind) {
    524 		case SEXTRN:
    525 		case SGLOB:
    526 		case SPRIV:
    527 		case SLOCAL:
    528 			defglobal(sym);
    529 			break;
    530 		case SAUTO:
    531 		case SREG:
    532 			if (!curfun)
    533 				error(ESYNTAX);
    534 			((inpars) ? defpar : defvar)(sym);
    535 			break;
    536 		default:
    537 			abort();
    538 		}
    539 	}
    540 }
    541 
    542 static void
    543 vardecl(void)
    544 {
    545 	Type *tp, *rp;
    546 	Node *np;
    547 	Symbol *sym;
    548 	char *name;
    549 
    550 	name = pop();
    551 	tp = pop();
    552 	if (tp->flags & FUNF)
    553 		rp = pop();
    554 	np = pop();
    555 
    556 	sym = np->u.sym;
    557 	/*
    558 	 * We have to free sym->name because in tentative declarations
    559 	 * we can have multiple declarations of the same symbol, and in
    560 	 * this case our parser will allocate twice the memory
    561 	 */
    562 	free(sym->name);
    563 	sym->name = name;
    564 	sym->type = *tp;
    565 	if (tp->flags & FUNF)
    566 		sym->rtype = *rp;
    567 	sym->kind = sclass;
    568 
    569 	if (ininit)
    570 		sym->type.flags |= INITF;
    571 	decl(sym);
    572 	delnode(np);
    573 }
    574 
    575 static void
    576 flddecl(void)
    577 {
    578 	Node *off, *np;
    579 	char *name;
    580 	Type *tp;
    581 	Symbol *sym;
    582 
    583 	off = pop();
    584 	name = pop();
    585 	tp = pop();
    586 	np = pop();
    587 
    588 	sym = np->u.sym;
    589 	sym->u.off = off->u.i;
    590 	sym->name = name;
    591 	sym->type = *tp;
    592 
    593 	delnode(np);
    594 	delnode(off);
    595 }
    596 
    597 static void
    598 labeldcl(void)
    599 {
    600 	Node *np;
    601 	Symbol *sym;
    602 
    603 	np = pop();
    604 	np->op = ONOP;
    605 	sym = np->u.sym;
    606 	sym->kind = SLABEL;
    607 	sym->u.stmt = np;
    608 	np->label = sym;
    609 	addstmt(np, SETCUR);
    610 }
    611 
    612 static void
    613 stmt(void)
    614 {
    615 	Node *np;
    616 
    617 	if (empty())
    618 		return;
    619 	np = pop();
    620 	if (ininit) {
    621 		data(np);
    622 		deltree(np);
    623 		return;
    624 	}
    625 	addstmt(np, SETCUR);
    626 }
    627 
    628 static void
    629 beginfun(void)
    630 {
    631 	inpars = 1;
    632 	pushctx();
    633 	addstmt(newnode(OBFUN), SETCUR);
    634 }
    635 
    636 static void
    637 endfun(void)
    638 {
    639 	endf = 1;
    640 	addstmt(newnode(OEFUN), SETCUR);
    641 }
    642 
    643 void
    644 parse(void)
    645 {
    646 	cleannodes();  /* remove code of previous function */
    647 	popctx();  /* remove context of previous function */
    648 	curfun = NULL;
    649 	endf = 0;
    650 
    651 	while (!endf && nextline())
    652 		/* nothing */;
    653 	if (ferror(stdin))
    654 		error(EFERROR, strerror(errno));
    655 }