scc

Simple C99 Compiler
Log | Files | Refs | README | LICENSE

commit 0ece8c92df0019d3eb4d1d427f6001d18104e357
parent 8460bb981bb821656f7bd54a7d5f647a7a9ff400
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Wed,  8 Feb 2012 09:54:48 +0100

First version

This version only has partiall support for declarations, and it is
"a in progress" version.

Diffstat:
.gitignore | 3+++
Makefile | 32++++++++++++++++++++++++++++++++
cc.h | 18++++++++++++++++++
decl.c | 332+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
error.c | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
lex.c | 185+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
symbol.c | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
symbol.h | 22++++++++++++++++++++++
tokens.h | 38++++++++++++++++++++++++++++++++++++++
types.c | 41+++++++++++++++++++++++++++++++++++++++++
types.h | 46++++++++++++++++++++++++++++++++++++++++++++++
11 files changed, 879 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,3 @@ +*.o +makefile +kcc diff --git a/Makefile b/Makefile @@ -0,0 +1,32 @@ + +OBJS = types.o decl.o lex.o error.o symbol.o +LIBS = + +all: kcc + +kcc: $(OBJS) + $(CC) $(LDFLAGS) $(CFLAGS) $(LIBS) $(OBJS) -o $@ + +%.d: %.c + $(CC) -M $(CPPFLAGS) $< | \ + sed -e 's,/usr/[^ ]*,,g' | \ + egrep -v '^ *\\$$' > $@ + +.PHONY: clean distclean dep + +dep: $(OBJS:.o=.d) + cat Makefile $? > makefile + rm -f *.d + +clean: + rm -f $(OBJS) + rm -f cc + +distclean: clean + rm -f *~ + rm -f *.d + rm -f makefile + + + +###Dependencies diff --git a/cc.h b/cc.h @@ -0,0 +1,18 @@ +#ifndef CC_H +#define CC_H + +extern unsigned linenum; +extern unsigned columnum; +extern const char *filename; + +struct { + unsigned implicit_int : 1; + unsigned c99 : 1; +} user_opt; + + +extern void warning(const char *fmt, ...); +extern void error(const char *fmt, ...); +extern void die(const char *fmt, ...); +extern void warning_error(char flag, const char *fmt, ...); +#endif diff --git a/decl.c b/decl.c @@ -0,0 +1,332 @@ +#include <assert.h> +#include <stddef.h> + +#include "cc.h" +#include "tokens.h" +#include "types.h" + +/* ANSI C says minimum maximum for indirection level is 12 */ +#define PTRLEVEL_MAX 12 + +char parser_out_home; + +#ifndef NDEBUG +#include <stdio.h> + +static void ptype(register struct type *t) +{ + assert(t); + + for (; t; t = t->base) { + switch (t->op) { + case ARY: + fputs("array of ", stdout); + break; + case PTR: + fputs("pointer to ", stdout); + break; + case FTN: + fputs("function that returns ", stdout); + break; + default: + fputs("primitive data ", stdout); + break; + } + } + putchar('\n'); +} +#else +# define ptype(t) +#endif + + +static unsigned char stack[30]; +static unsigned char *stackp = stack; + +#define push(x) (*stackp++ = (x)) +#define pop() (*--stackp) +#define empty() (stackp == stack) + + +void decl(void); + + +void dirdcl(void) +{ + puts("dirdecl"); + if (yytoken == '(') { + gettok(); + decl(); + if (yytoken != ')') + error("expected ')'"); + gettok(); + } else if (yytoken == IDENTIFIER) { + gettok(); + /* here we are!!! */; + } + + for (;;) { + switch (yytoken) { + case '(': + push(FTN); + if (gettok() == ')') + gettok(); + else + /* TODO: prototyped function */; + continue; + case '[': + push(ARY); + if (gettok() == ']') + gettok(); + else + /* TODO: specify size of array */; + continue; + default: + printf("leaving dirdcl %c\n", yytoken); + return; + } + } +} + + + + + +/* + * + */ + +struct type *types[][2] = {{T_VOID, NULL}, + {T_SCHAR, T_UCHAR}, + {T_SHORT, T_USHORT}, + {T_INT, T_UINT}, + {T_LONG, T_ULONG}, + {T_LLONG, T_ULLONG}, + {T_FLOAT, NULL}, + {T_DOUBLE, NULL}, + {T_LDOUBLE, NULL}}; + +#define F_VOID 0 +#define F_CHAR 1 +#define F_SHORT 2 +#define F_INT 3 +#define F_LONG 4 +#define F_LLONG 5 +#define F_FLOAT 6 +#define F_DOUBLE 7 +#define F_LDOUBLE 8 + +struct type *specifier(void) +{ + static char sign, sclass, tqlf, nt; + struct type *t = NULL; + + tqlf = sign = sclass = 0; + for (;;) { + switch (gettok()) { + case TYPEDEF:case EXTERN:case STATIC:case AUTO:case REGISTER: + if (sclass != 0) + error("Two or more storage specifier"); + sclass = yytoken; + continue; + case CONST: case VOLATILE: case RESTRICTED: + /* TODO */ + continue; + case VOID: nt = F_VOID; goto check_type; + case CHAR: nt = F_CHAR; goto check_type; + case SHORT: nt = F_SHORT; goto check_type; + case INT: nt = F_INT; goto check_type; + case FLOAT: nt = F_FLOAT; goto check_type; + case DOUBLE: nt = F_DOUBLE; goto check_type; + case LONG: nt = F_LONG; goto check_type; + case SIGNED: case UNSIGNED: + if (sign != 0) { + error((sign != yytoken) ? + "signed and unsigned in declaration" : + "duplicated %s", yytext); + } + sign = yytoken; + if (t == NULL) + continue; /* we don't have type now */ + goto check_type; + case STRUCT: /* TODO */ + case UNION: /* TODO */ + case ENUM: /* TODO */ + case IDENTIFIER: + /* TODO */ + default: + return t; + } + check_type: + if (nt == F_LONG) { + if (t == NULL || + t == T_INT || t == T_UINT || t == T_LONG) { + /* nothing */; + } else if (t == T_LONG || t == T_ULONG) { + nt = F_LLONG; + } else if (t == T_DOUBLE) { + nt = F_LDOUBLE; + } else if (t == T_LLONG || t == T_ULLONG) { + error("'long long long' is too long"); + } else if (t == T_LDOUBLE) { + error("'long long double' is too long"); + } else { + goto two_or_more_btype; + } + } else if (t != NULL) { + goto two_or_more_btype; + } if (nt == F_VOID && sign != 0) { + goto incorrect_sign; + } if (nt == F_CHAR && sign == 0) { + t = T_UCHAR; /* char by default is unsigned */ + } else if (!(t = types[nt][sign == UNSIGNED])) { + goto incorrect_sign; + } + } +two_or_more_btype: + error("two or more basic types"); +incorrect_sign: + error("sign specifier applied to incorrect type"); +} + + +#undef F_VOID +#undef F_CHAR +#undef F_SHORT +#undef F_INT +#undef F_LONG +#undef F_LLong +#undef F_FLOAT +#undef F_DOUBLE +#undef F_LDOUBLE + + +void decl(void) +{ + unsigned char ns = 0; + unsigned char qlf[PTRLEVEL_MAX]; + + puts("decl"); + for (ns = 0; yytoken == '*'; ns++) { + if (ns == PTRLEVEL_MAX) + error("Too much indirection levels"); + switch (gettok()) { + case CONST: + if (!(qlf[ns] ^= 2)) + goto duplicated; + continue; + case RESTRICTED: + if (!(qlf[ns] ^= 4)) + goto duplicated; + continue; + case VOLATILE: + if (!(qlf[ns] ^= 8)) + goto duplicated; + continue; + } + } + dirdcl(); + + if (ns) + push(PTR); /* TODO: pointer qualifiers */ + printf("leaving dcl %c\n", yytoken); + return; + +duplicated: + error("duplicated '%s'", yytext); +} + + + +void declaration(void) +{ + struct type *t; + + t = specifier(); + + for (; ; gettok()) { + decl(); + if (yytoken != ',' && yytoken != ';') + error("unexpected", yytext); + while (!empty()) + t = mktype(t, pop()); + ptype(t); + + if (yytoken == ',') + /* add variable */; + else if (yytoken == ';') { + /* end of sentence */; + return; + } + } +} + + +#if 0 +void specdcl(void) +{ + struct spec_type t = {0, 0, 0}; + +repeat: + parser_out_home = 1; + switch (gettok()) { + case TYPEDEF: + case EXTERN: + case STATIC: + case AUTO: + case REGISTER: + case CONST: + case VOLATILE: + case SIGNED: + case UNSIGNED: + if (!(t.mods ^= MODIFIER(tok))) + error("duplicate '%s'", yytext); + goto repeat; + case IDENTIFIER: + /* This is incorrect!!! */ + t.type = TYPE(INT); + if (user_opt.implicit_int) { + warning_error(user_opt.c99, + "type defaults to ‘int’ in declaration" + " of", yytext); + } + if (gettok() != ';') + goto non_end_after_id; + return ';'; + case VOID: + case CHAR: + case INT: + case LONG: + case FLOAT: + case DOUBLE: + if (!(t.type ^= (1 << TYPE(tok)))) + error("duplicate '%s'", yytext); + + case STRUCT: + case UNION: + case ENUM: + case TYPE_NAME: + ; + } + + +non_end_after_id: + error("';' expected"); +} +#endif + + + + +#include <stddef.h> + + +int main(int argc, char *argv[]) +{ + init_lex(); + + open_file(NULL); + declaration(); + + return 0; +} diff --git a/error.c b/error.c @@ -0,0 +1,57 @@ + +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> + +#include "cc.h" + + + + +static void warning_error_helper(char flag, const char *fmt, va_list va) +{ + fprintf(stderr, "%s:%s:%u:%u: ", + (!flag) ? "warning" : "error", filename, linenum, columnum); + vfprintf(stderr, fmt, va); + putc('\n', stderr); + if (flag) + exit(EXIT_FAILURE); /* TODO: uhmmmm */ +} + + +void warning_error(char flag, const char *fmt, ...) +{ + va_list va; + va_start(va, fmt); + warning_error_helper(flag, fmt, va); + va_end(va); +} + + +void error(const char *fmt, ...) +{ + va_list va; + va_start(va, fmt); + warning_error_helper(1, fmt, va); + va_end(va); +} + + +void warning(const char *fmt, ...) +{ + va_list va; + va_start(va, fmt); + warning_error_helper(0, fmt, va); + va_end(va); +} + + + +void die(const char *fmt, ...) +{ + va_list va; + va_start(va, fmt); + fprintf(stderr, fmt, va); + va_end(va); + exit(EXIT_FAILURE); +} diff --git a/lex.c b/lex.c @@ -0,0 +1,185 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "symbol.h" +#include "tokens.h" + + +#define TOKSIZ_MAX 21 +#define NR_KWD_HASH 32 +/* TODO: move hashfun here */ + +static struct keyword { + char *str; + unsigned char tok; + struct keyword *next; +} keywords [] = {"auto", AUTO, NULL, + "break", BREAK, NULL, + "_Bool", CHAR, NULL, + "case", CASE, NULL, + "char", CHAR, NULL, + "const", CONST, NULL, + "continue", CONTINUE, NULL, + "default", DEFAULT, NULL, + "do", DO, NULL, + "double", DOUBLE, NULL, + "else", ELSE, NULL, + "enum", ENUM, NULL, + "extern", EXTERN, NULL, + "float", FLOAT, NULL, + "for", FOR, NULL, + "goto", GOTO, NULL, + "if", IF, NULL, + "int", INT, NULL, + "long", LONG, NULL, + "register", REGISTER, NULL, + "restricted", RESTRICTED, NULL, + "return", RETURN, NULL, + "short", SHORT, NULL, + "signed", SIGNED, NULL, + "sizeof", SIZEOF, NULL, + "static", STATIC, NULL, + "struct", STRUCT, NULL, + "switch", SWITCH, NULL, + "typedef", TYPEDEF, NULL, + "union", UNION, NULL, + "unsigned", UNSIGNED, NULL, + "void", VOID, NULL, + "volatile", VOLATILE, NULL, + "while", WHILE, NULL, + NULL, 0, NULL +}; + +static struct keyword *khash[NR_KWD_HASH]; +static FILE *yyin; + +unsigned char yytoken; +unsigned char yyhash; +size_t yylen; +char yytext[TOKSIZ_MAX + 1]; +unsigned linenum; +unsigned columnum; +const char *filename; + + +union yyval { + struct symbol *sym; +} yyval; + + + +void init_lex(void) +{ + register struct keyword *bp; + static unsigned char h; + + for (bp = keywords; bp->str; bp++) { + register struct keyword *aux, *ant; + h = hashfun(bp->str); + if (!(aux = khash[h])) { + khash[h] = bp; + continue; + } + ant = aux; + while (aux && strcmp(bp->str, aux->str) < 0) { + ant = aux; + aux = aux->next; + } + ant->next = bp; + bp->next = aux; + } +} + +static unsigned char iden(void) +{ + register struct keyword *kwp; + register char ch; + register char *bp = yytext; + + for (yyhash = 0; bp < yytext + TOKSIZ_MAX; *bp++ = ch) { + if (!isalnum(ch = getc(yyin)) && ch != '_') + break; + yyhash += ch; + } + if (bp == yytext + TOKSIZ_MAX) + error("identifier too long %s", yytext); + ungetc(ch, yyin); + *bp = '\0'; + yylen = bp - yytext; + yyhash &= NR_KWD_HASH - 1; + for (kwp = khash[yyhash]; kwp; kwp = kwp->next) { + if (!strcmp(kwp->str, yytext)) + return kwp->tok; + } + return IDENTIFIER; +} + + + +unsigned char gettok(void) +{ + static unsigned int c; + register unsigned char ch; + extern char parser_out_home; + + while (isspace(c = getc(yyin))) + /* nothing */; + if (c == EOF) { + if (parser_out_home) + error("Find EOF while parsing"); + else + return EOFTOK; + } + ch = c; + if (isalpha(ch) || ch == '_') { + ungetc(ch, yyin); + ch = iden(); + } else if (isdigit(ch)) { + ; + } else { + switch (ch) { + case '&': case '|': + if ((c = getc(yyin)) == ch) { + ch |= 0x80; /* TODO */ + break; + } else { + ungetc(c, yyin); + } + case '^': case '=': case '<': case '>': + case '*': case '+': case '-': case '/': + if ((c = getc(yyin)) == '=') { + ch |= 0x80; /* TODO */ + break; + } else { + ungetc(c, yyin); + } + case ';': case '{': case '}': case '(': case ')': case '~': + case '!': case ',': case '?': case '[': case ']': case ':': + break; + default: + error("Incorrect character '%02x", c); + } + } + +return_token: + printf("Token = %c (%u)\n", (isprint(ch)) ? ch : ' ', (unsigned) ch); + return yytoken = ch; +} + + +void open_file(const char *file) +{ + if (yyin != NULL) + fclose(yyin); + if (file == NULL) { + yyin = stdin; + filename = "(stdin)"; + return; + } + if ((yyin = fopen(file, "r")) == NULL) + die("file '%s' not found", file); + filename = file; +} diff --git a/symbol.c b/symbol.c @@ -0,0 +1,105 @@ + + +#include <stddef.h> + +#include "symbol.h" + +#define NR_SYM_HASH 32 + +struct symhash { + struct symbol *buf[NR_SYM_HASH]; + struct symbol *top; +}; + + +struct symctx { + struct symbol *siden; + struct symbol *sstruct; + struct symbol *sgoto; + struct symctx *next; +}; + + + +static struct symctx global_ctx; +static struct symctx *ctxp = &global_ctx; +struct symhash siden, sgoto, sstruct; + + + +unsigned char hashfun(register const char *s) +{ + register unsigned char h, ch; + + for (h = 0; ch = *s++; h += ch) + /* nothing */; + return h & NR_SYM_HASH - 1; +} + + + + +void new_ctx(struct symctx *ctx) +{ + ctx->siden = siden.top; + ctx->sstruct = sstruct.top; + ctx->sgoto = sgoto.top; + ctx->next = ctxp; + ctxp = ctx; +} + + +/* + * WARNING: This function is not portable and waits that incremental calls + * to alloca return decremented address + */ +static void del_hash_ctx(struct symhash *h, struct symbol *const top) +{ + register struct symbol **bp; + static struct symbol **lim; + + lim = h->buf + NR_SYM_HASH; + for (bp = h->buf; bp < lim; bp++) { + register struct symbol *aux; + for (aux = *bp; aux < top; *bp = aux = aux->next) + if (aux == h->top) + h->top = aux; + } +} + + +void del_ctx(void) +{ + del_hash_ctx(&siden, ctxp->siden); + del_hash_ctx(&sstruct, ctxp->sstruct); + del_hash_ctx(&sgoto, ctxp->sgoto); /* TODO: correct handling in goto */ +} + + + + +struct symbol *pushsym(struct symhash *h, struct symbol *sym) +{ + static unsigned char key; + key = hashfun(sym->str); + + h->top = sym; + sym->next = h->buf[key]; + return h->buf[key] = sym; +} + + + + +struct symbol *findsym(struct symhash *h, char *s) +{ + register struct symbol *bp; + static unsigned char key; + + key = hashfun(s); + for (bp = h->buf[key]; bp; bp = bp->next) { + if (!strcmp(bp->str, s)) + return bp; + } + return NULL; +} diff --git a/symbol.h b/symbol.h @@ -0,0 +1,22 @@ + +#pragma once +#ifndef SYMBOL_H +#define SYMBOL_H + + +struct type; + +struct symbol { + char *str; + struct type *type; + struct symbol *next; +}; + + +struct symhash; +extern struct symhash siden, sgoto, sstruct; + +extern unsigned char hashfun(register const char *s); + + +#endif diff --git a/tokens.h b/tokens.h @@ -0,0 +1,38 @@ +#ifndef TOKENS_H +#define TOKENS_H + +/* Don't change this codification because program used it!!! */ +enum { + /* types */ + INT = 1, CHAR, FLOAT, LONG, LLONG, SHORT, VOID, DOUBLE, + LDOUBLE, STRUCT, UNION, ENUM, UTYPE, BOOL, + /* storage specifier */ + TYPEDEF, EXTERN, STATIC, AUTO, REGISTER, + /* type qualifier */ + VOLATILE, CONST, RESTRICTED, + /* sign specifier */ + UNSIGNED, SIGNED +}; + + + + +enum { + IDENTIFIER = 128, CONSTANT, STRING_LITERAL, SIZEOF, + PTR_OP, INC_OP, DEC_OP, LEFT_OP, RIGHT_OP, LE_OP, GE_OP, EQ_OP, NE_OP, + AND_OP, OR_OP, MUL_ASSIGN, DIV_ASSIGN, MOD_ASSIGN, ADD_ASSIGN, + SUB_ASSIGN, LEFT_ASSIGN, RIGHT_ASSIGN, AND_ASSIGN, + XOR_ASSIGN, OR_ASSIGN, TYPE_NAME, + ELLIPSIS, + CASE, DEFAULT, IF, ELSE, SWITCH, WHILE, DO, FOR, GOTO, + CONTINUE, BREAK, RETURN, EOFTOK +}; + +extern char yytext[]; +extern unsigned char yyhash; +extern size_t yylen; +extern unsigned char yytoken; + +extern unsigned char gettok(void); +extern void init_lex(void); +#endif diff --git a/types.c b/types.c @@ -0,0 +1,41 @@ + +#include <assert.h> +#include <stdlib.h> + +#include "types.h" + +#define xcalloc calloc + +struct type tschar, tuchar; /* signed char, unsigned char */ +struct type tshort, tushort; /* short, unsigned short */ +struct type tint, tuint; /* int, unsigned int */ +struct type tfloat, tdouble, tldouble; /* float, double, long double */ +struct type tlong, tulong; /* long, unsgined long */ +struct type tllong, tullong; /* long long, unsigned long long */ +struct type tvoid; /* void */ + + +struct type *mktype(register struct type *base, unsigned char op) +{ + register struct type **ptr, *nt; + assert(op == PTR || op == ARY || op == FTN); + + switch (op) { + case PTR: + ptr = &base->ptr; + break; + case ARY: + ptr = &base->ary; + break; + case FTN: + ptr = &base->ftn; + break; + } + if (*ptr) return *ptr; + + nt = xcalloc(sizeof(*base), 1); + *ptr = nt; + nt->op = op; + nt->base = base; + return nt; +} diff --git a/types.h b/types.h @@ -0,0 +1,46 @@ +#ifndef TYPES_H_ +#define TYPES_H_ + + +struct type { + unsigned char op; + struct type *base; + struct type *ary; /* array */ + struct type *ptr; /* pointer */ + struct type *ftn; /* function */ + union { + size_t nelem; + } u; +}; + + +extern struct type tschar, tuchar, tshort, tushort, tint, tuint; +extern struct type tfloat, tdouble, tldouble, tlong; +extern struct type tulong, tllong, tullong, tvoid; + +#define T_SCHAR (&tschar) +#define T_UCHAR (&tuchar) +#define T_SHORT (&tshort) +#define T_USHORT (&tushort) +#define T_INT (&tint) +#define T_UINT (&tuint) +#define T_FLOAT (&tfloat) +#define T_DOUBLE (&tdouble) +#define T_LDOUBLE (&tdouble) +#define T_LONG (&tlong) +#define T_ULONG (&tulong) +#define T_LLONG (&tllong) +#define T_ULLONG (&tullong) +#define T_VOID (&tvoid) + + +#define ARY 1 +#define PTR 2 +#define FTN 3 +#define T_CONST 8 +#define T_RESTRICTED 16 +#define T_VOLATILE 32 + +struct type *mktype(register struct type *base, unsigned char op); + +#endif