# HG changeset patch # User William Astle # Date 1378864565 21600 # Node ID 048adfee2933f479785ab1ae757c07d9b22645e1 # Parent c419b3b3d43fe3807e8ac404c1f5de1bf67e9a5d Checkpoint on lwcc-cpp development This checkpoint includes a tokenizer and basic implementation of #if, #ifdef, #ifndef, #else, #endif, #elif, and #undef along with basic symbol table management. diff -r c419b3b3d43f -r 048adfee2933 Makefile --- a/Makefile Mon Sep 09 23:07:19 2013 -0600 +++ b/Makefile Tue Sep 10 19:56:05 2013 -0600 @@ -101,7 +101,7 @@ lwcc_driver_objs := $(lwcc_driver_srcs:.c=.o) lwcc_driver_deps := $(lwcc_driver_srcs:.c=.d) -lwcc_cpp_srcs := main.c error.c file.c preproc.c char_p.c +lwcc_cpp_srcs := main.c error.c file.c preproc.c char_p.c symbol.c lwcc_cpp_srcs := $(addprefix lwcc/cpp/,$(lwcc_cpp_srcs)) lwcc_cpp_objs := $(lwcc_cpp_srcs:.c=.o) lwcc_cpp_deps := $(lwcc_cpp_srcs:.c=.d) diff -r c419b3b3d43f -r 048adfee2933 lwcc/cpp/cpp.h --- a/lwcc/cpp/cpp.h Mon Sep 09 23:07:19 2013 -0600 +++ b/lwcc/cpp/cpp.h Tue Sep 10 19:56:05 2013 -0600 @@ -34,8 +34,28 @@ enum { TOK_NONE = 0, + TOK_EOF, + TOK_EOL, TOK_WSPACE, TOK_IDENT, + TOK_NUMBER, + TOK_STRING, + TOK_CHAR, + TOK_DIV, + TOK_MUL, + TOK_ADD, + TOK_SUB, + TOK_OPAREN, + TOK_CPAREN, + TOK_NE, + TOK_EQ, + TOK_LE, + TOK_LT, + TOK_GE, + TOK_GT, + TOK_BAND, + TOK_BOR, + TOK_BNOT, TOK_MAX }; @@ -90,6 +110,7 @@ extern void unfetch_byte(int); extern void outchr(int); extern void outstr(char *); +extern void skip_eol(void); extern int is_whitespace(int); extern int is_ep(int); diff -r c419b3b3d43f -r 048adfee2933 lwcc/cpp/file.c --- a/lwcc/cpp/file.c Mon Sep 09 23:07:19 2013 -0600 +++ b/lwcc/cpp/file.c Tue Sep 10 19:56:05 2013 -0600 @@ -269,6 +269,30 @@ return c; } +void skip_eol(void) +{ + int c; + for (;;) + { + c = fetch_byte(); + if (c == CPP_EOF || c == CPP_EOL) + { + unfetch_byte(c); + return; + } + if (c == '/') + { + c = munch_comment(); + if (c > 0) + { + while (c--) + outchr(CPP_EOL); + } + } + } +} + + /* This function opens (if not stdin) the file f and pushes it onto the top of the input file stack. It then proceeds to process the file and return. Nonzero return means the file could not be opened. */ diff -r c419b3b3d43f -r 048adfee2933 lwcc/cpp/preproc.c --- a/lwcc/cpp/preproc.c Mon Sep 09 23:07:19 2013 -0600 +++ b/lwcc/cpp/preproc.c Tue Sep 10 19:56:05 2013 -0600 @@ -21,8 +21,10 @@ #include #include +#include #include +#include #include "cpp.h" @@ -31,11 +33,19 @@ char *parse_str_lit(void); char *parse_chr_lit(void); char *parse_num_lit(int); -void preprocess_identifier(int); +char *parse_identifier(int); +void preprocess_identifier(char *); void preprocess_directive(void); +void next_token(void); +void next_token_nws(void); +int eval_expr(void); +int skip_level = 0; +int found_level = 0; +int else_level = 0; +int else_skip_level = 0; -int skip_level; +struct token curtok = { .ttype = TOK_NONE, .strval = NULL }; /* Notes: @@ -161,13 +171,15 @@ continue; } unfetch_byte(c); - preprocess_identifier('L'); + preprocess_identifier(parse_identifier('L')); continue; } else if (is_sidchr(c)) { // identifier of some kind - preprocess_identifier(c); + char *s; + s = parse_identifier(c); + preprocess_identifier(s); continue; } else @@ -178,11 +190,11 @@ } } -void preprocess_identifier(int c) +char *parse_identifier(int c) { - char *ident = NULL; + static char *ident = NULL; int idlen = 0; - int idbufl = 0; + static int idbufl = 0; do { @@ -197,10 +209,14 @@ ident[idlen++] = 0; unfetch_byte(c); - + + return ident; +} + +void preprocess_identifier(char *s) +{ /* do something with the identifier here - macros, etc. */ - outstr(ident); - lw_free(ident); + outstr(s); } #define to_buf(c) do { if (idlen >= idbufl) { idbufl += 100; ident = lw_realloc(ident, idbufl); } ident[idlen++] = (c); } while (0) @@ -224,7 +240,7 @@ c = fetch_byte(); } } - } while ((is_dec(c)) || (c == '.')); + } while ((is_idchr(c)) || (c == '.')); to_buf(0); return ident; @@ -365,9 +381,544 @@ fprintf(output_fp, "# %d \"%s\" %d\n", file_stack -> line, file_stack -> fn, flag); } +void preproc_ifndef(void); +void preproc_ifdef(void); +void preproc_if(void); +void preproc_include(void); +void preproc_else(void); +void preproc_endif(void); +void preproc_error(void); +void preproc_warning(void); +void preproc_define(void); +void preproc_undef(void); +void preproc_line(void); +void preproc_pragma(void); +void preproc_elif(void); + +struct { char *name; void (*fn)(void); } directive_list[] = { + { "ifndef", preproc_ifndef }, + { "ifdef", preproc_ifdef }, + { "if", preproc_if }, + { "include", preproc_include }, + { "else", preproc_else }, + { "endif", preproc_endif }, + { "error", preproc_error }, + { "warning", preproc_warning }, + { "define", preproc_define }, + { "undef", preproc_undef }, + { "line", preproc_line }, + { "pragma", preproc_pragma }, + { "elif", preproc_elif }, + { NULL, NULL } +}; + /* process a preprocessor directive */ +#define DIRBUFLEN 20 void preprocess_directive(void) { - outchr('>'); - outchr('#'); + static char dirbuf[DIRBUFLEN+1]; + int c; + int dl = 0; + + for (;;) + { + c = fetch_byte(); + if (is_whitespace(c)) + continue; + if (c == '/') + { + c = munch_comment(); + if (c < 0) + goto baddir; + if (c > 0) + { + while (c--) + outchr(CPP_EOL); + } + continue; + } + if (c == CPP_EOL) + { + // NULL directive - do nothing + outchr(CPP_EOL); + return; + } + break; + } + + + dl = 0; + while (((c >= 'a' && c <= 'z') || c == '_') && dl < DIRBUFLEN) + { + dirbuf[dl++] = c; + c = fetch_byte(); + } + dirbuf[dl] = 0; + +commagain: + if (c == '/') + { + c = munch_comment(); + if (c < 0) + c = '/'; + else + { + while (c--) + { + outchr(CPP_EOL); + } + c = fetch_byte(); + goto commagain; + } + } + + if (!is_whitespace(c) && c != CPP_EOL && c != CPP_EOF) + goto baddir; + + for (dl = 0; directive_list[dl].name; dl++) + { + if (strcmp(directive_list[dl].name, dirbuf) == 0) + { + (*(directive_list[dl].fn))(); + outchr(CPP_EOL); + return; + } + } + +baddir: + dirbuf[dl] = 0; + if (skip_level == 0) + do_error("Bad preprocessor directive %s", dirbuf); + outchr(CPP_EOL); +} + +void check_eol(void) +{ + next_token_nws(); + if (curtok.ttype == TOK_EOL) + return; + if (curtok.ttype == TOK_EOF) + return; + do_warning("Extra text after preprocessor directive"); + skip_eol(); +} + +void preproc_ifndef(void) +{ + if (skip_level) + { + skip_level++; + skip_eol(); + return; + } + next_token_nws(); + if (curtok.ttype != TOK_IDENT) + { + do_error("Bad #ifndef"); + skip_eol(); + } + + if (symbol_find(curtok.strval)) + { + skip_level++; + } + else + { + found_level++; + } + check_eol(); +} + +void preproc_ifdef(void) +{ + if (skip_level) + { + skip_level++; + skip_eol(); + return; + } + next_token_nws(); + if (curtok.ttype != TOK_IDENT) + { + do_error("Bad #ifdef"); + skip_eol(); + } + + if (symbol_find(curtok.strval) == NULL) + { + skip_level++; + } + else + { + found_level++; + } + check_eol(); +} + +void preproc_if(void) +{ + skip_eol(); +} + +void preproc_include(void) +{ + skip_eol(); +} + +void preproc_else(void) +{ + if (skip_level) + { + if (else_skip_level > found_level) + ; + else if (--skip_level != 0) + skip_level++; + else + found_level++; + } + else if (found_level) + { + skip_level++; + found_level--; + } + else + { + do_error("#else in non-conditional section"); + } + if (else_level == found_level + skip_level) + { + do_error("Too many #else"); + } + else_level = found_level + skip_level; + check_eol(); +} + +void preproc_endif(void) +{ + if (skip_level) + skip_level--; + else if (found_level) + found_level--; + else + do_error("#endif in non-conditional section"); + if (skip_level == 0) + else_skip_level = 0; + else_level = 0; + check_eol(); +} + +void preproc_error(void) +{ + skip_eol(); +} + +void preproc_warning(void) +{ + skip_eol(); +} + +void preproc_define(void) +{ + skip_eol(); +} + +void preproc_undef(void) +{ + if (skip_level) + { + skip_eol(); + return; + } + + next_token_nws(); + if (curtok.ttype != TOK_IDENT) + { + do_error("Bad #undef"); + symbol_undef(curtok.strval); + } + check_eol(); +} + +void preproc_line(void) +{ + skip_eol(); +} + +void preproc_pragma(void) +{ + if (skip_level || !eval_expr()) + skip_level++; + else + found_level++; } + +void preproc_elif(void) +{ + if (skip_level == 0) + else_skip_level = found_level; + if (skip_level) + { + if (else_skip_level > found_level) + ; + else if (--skip_level != 0) + skip_level++; + else if (eval_expr()) + found_level++; + else + skip_level++; + } + else if (found_level) + { + skip_level++; + found_level--; + } + else + do_error("#elif in non-conditional section"); +} + + + +/* tokenizing stuff here */ +#undef to_buf +#define to_buf(c) do { if (strlen >= strbufl) { strbufl += 100; strbuf = lw_realloc(strbuf, strbufl); } strbuf[strlen++] = (c); strbuf[strlen] = 0; } while (0) +void next_token(void) +{ + int strbufl = 0; + int strlen = 0; + char *strbuf = NULL; + int c; + int ttype; + + lw_free(curtok.strval); + curtok.strval = NULL; + curtok.ttype = TOK_NONE; + + c = fetch_byte(); + if (c == CPP_EOL) + { + curtok.ttype = TOK_EOL; + return; + } + + if (c == CPP_EOF) + { + curtok.ttype = TOK_EOF; + return; + } + + if (is_whitespace(c)) + { + do + { + to_buf(c); + c = fetch_byte(); + } while (is_whitespace(c)); + unfetch_byte(c); + ttype = TOK_WSPACE; + goto out; + } + if (c == '/') + { + c = munch_comment(); + if (c >= 0) + { + to_buf(' '); + while (c--) + outchr(CPP_EOL); + ttype = TOK_WSPACE; + goto out; + } + c = '/'; + } + + if (c == '\'') + { + // we have a character constant here + ttype = TOK_NUMBER; + strbuf = lw_strdup(parse_chr_lit()); + goto out; + } + else if (c == '"') + { + // we have a string constant here + ttype = TOK_STRING; + strbuf = lw_strdup(parse_str_lit()); + goto out; + } + else if (c == '.') + { + // we might have a number here + c = fetch_byte(); + if (is_dec(c)) + { + unfetch_byte(c); + ttype = TOK_NUMBER; + strbuf = lw_strdup(parse_num_lit('.')); + goto out; + } + else + { + goto ttypegen; + } + } + else if (is_dec(c)) + { + // we have a number here + ttype = TOK_NUMBER; + strbuf = lw_strdup(parse_num_lit(c)); + } + else if (c == 'L') + { + // wide character string or wide character constant, or identifier + c = fetch_byte(); + if (c == '"') + { + char *s; + to_buf('L'); + s = parse_str_lit(); + while (*s) + to_buf(*s++); + ttype = TOK_STRING; + goto out; + } + else if (c == '\'') + { + char *s; + to_buf('L'); + s = parse_chr_lit(); + while (*s) + to_buf(*s++); + ttype = TOK_NUMBER; + goto out; + } + unfetch_byte(c); + ttype = TOK_IDENT; + strbuf = lw_strdup(parse_identifier('L')); + goto out; + } + else if (is_sidchr(c)) + { + // identifier of some kind + strbuf = lw_strdup(parse_identifier(c)); + ttype = TOK_IDENT; + } + else + { +ttypegen: + ttype = TOK_CHAR; + to_buf(c); + + switch (c) + { + case '/': + ttype = TOK_DIV; + break; + + case '*': + ttype = TOK_MUL; + break; + + case '+': + ttype = TOK_ADD; + break; + + case '-': + ttype = TOK_SUB; + break; + + case '<': + c = fetch_byte(); + if (c == '=') + ttype = TOK_LE; + else + { + ttype = TOK_LT; + unfetch_byte(c); + } + break; + + case '>': + c = fetch_byte(); + if (c == '=') + ttype = TOK_GE; + else + { + ttype = TOK_GT; + unfetch_byte(c); + } + break; + + case '=': + c = fetch_byte(); + if (c == '=') + ttype = TOK_EQ; + else + unfetch_byte(c); + break; + + case '!': + c = fetch_byte(); + if (c == '=') + ttype = TOK_NE; + else + { + ttype = TOK_BNOT; + unfetch_byte(c); + } + break; + + case '&': + c = fetch_byte(); + if (c == '&') + ttype = TOK_BAND; + else + unfetch_byte(c); + break; + + case '|': + c = fetch_byte(); + if (c == '|') + ttype = TOK_BOR; + else + unfetch_byte(c); + break; + + case '(': + ttype = TOK_OPAREN; + break; + + case ')': + ttype = TOK_CPAREN; + break; + + } + goto out; + } + +out: + curtok.ttype = ttype; + curtok.strval = strbuf; +} + +void next_token_nws(void) +{ + do + { + next_token(); + } while (curtok.ttype == TOK_WSPACE); +} + + +/* +evaluate an expression. Return true if expression is true, false if it +is false. Expression ends at the end of the line. Enter at eval_expr(). + +eval_term_real() evaluates a term in the expression. eval_expr_real() is +the main expression evaluator. +*/ + +int eval_expr(void) +{ + skip_eol(); + return 0; +} diff -r c419b3b3d43f -r 048adfee2933 lwcc/cpp/symbol.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/cpp/symbol.c Tue Sep 10 19:56:05 2013 -0600 @@ -0,0 +1,79 @@ +/* +lwcc/cpp/symbol.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include +#include + +#include +#include + +#include "cpp.h" + +struct symtab_e *symtab_head = NULL; + +struct symtab_e *symbol_find(const char *s) +{ + struct symtab_e *r; + + for (r = symtab_head; r; r = r -> next) + if (strcmp(r -> name, s) == 0) + return r; + return NULL; +} + +void symbol_free(struct symtab_e *r) +{ + lw_free(r -> name); + lw_free(r -> strval); + lw_free(r); +} + +void symbol_undef(const char *s) +{ + struct symtab_e *r, **p; + + p = &symtab_head; + for (r = symtab_head; r; r = r -> next) + { + if (strcmp(r -> name, s) == 0) + { + *p = r -> next; + symbol_free(r); + return; + } + p = &(r -> next); + } +} + +struct symtab_e *symbol_add(const char *s, const char *str, int nargs, int vargs) +{ + struct symtab_e *r; + + r = lw_alloc(sizeof (struct symtab_e)); + *r = (struct symtab_e){ + .name = lw_strdup(s), + .strval = lw_strdup(str), + .nargs = nargs, + .vargs = vargs, + .next = symtab_head }; + symtab_head = r; + return r; +}