# HG changeset patch # User William Astle # Date 1378789639 21600 # Node ID c419b3b3d43fe3807e8ac404c1f5de1bf67e9a5d # Parent 40ecbd5da4810d47a05228d875819f9a8c87a40c Checkpoint on lwcc-cpp development This is a checkpoint with some substantial code cleanups on what is so far implemented. This should avoid substantial code duplication later. diff -r 40ecbd5da481 -r c419b3b3d43f Makefile --- a/Makefile Sun Sep 08 21:58:12 2013 -0600 +++ b/Makefile Mon Sep 09 23:07:19 2013 -0600 @@ -101,7 +101,7 @@ lwcc_driver_objs := $(lwcc_driver_srcs:.c=.o) lwcc_driver_deps := $(lwcc_driver_srcs:.c=.d) -lwcc_cpp_srcs := main.c error.c file.c +lwcc_cpp_srcs := main.c error.c file.c preproc.c char_p.c lwcc_cpp_srcs := $(addprefix lwcc/cpp/,$(lwcc_cpp_srcs)) lwcc_cpp_objs := $(lwcc_cpp_srcs:.c=.o) lwcc_cpp_deps := $(lwcc_cpp_srcs:.c=.d) @@ -169,7 +169,7 @@ @echo "Cleaning up" @rm -f lwlib/liblw.a lwasm/lwasm$(PROGSUFFIX) lwlink/lwlink$(PROGSUFFIX) lwlink/lwobjdump$(PROGSUFFIX) lwar/lwar$(PROGSUFFIX) @rm -f lwcc/driver/lwcc$(PROGSUFFIX) lwcc/cpp/lwcc-cpp$(PROGSUFFIX) - @rm -f $(lwcc_driver_ojbs) $(lwcc_preproc_objs) + @rm -f $(lwcc_driver_ojbs) $(lwcc_cpp_objs) @rm -f $(lwasm_objs) $(lwlink_objs) $(lwar_objs) $(lwlib_objs) $(lwobjdump_objs) @rm -f $(extra_clean) @rm -f */*.exe @@ -178,7 +178,7 @@ realclean: clean $(realcleantargs) @echo "Cleaning up even more" @rm -f $(lwasm_deps) $(lwlink_deps) $(lwar_deps) $(lwlib_deps) $(lwobjdump_deps) - @rm -f $(lwcc_driver_deps) + @rm -f $(lwcc_driver_deps) $(lwcc_cpp_deps) @rm -f docs/manual/*.html docs/manual/*.pdf print-%: diff -r 40ecbd5da481 -r c419b3b3d43f lwcc/cpp/char_p.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/cpp/char_p.c Mon Sep 09 23:07:19 2013 -0600 @@ -0,0 +1,52 @@ +int is_whitespace(int c) +{ + switch (c) + { + case ' ': + case '\t': + case '\r': + case '\n': + return 1; + } + return 0; +} + +int is_sidchr(c) +{ + if (c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) + return 1; + return 0; +} + +int is_idchr(int c) +{ + if (c >= '0' && c <= '9') + return 1; + return is_sidchr(c); +} + +int is_ep(int c) +{ + if (c == 'e' || c == 'E' || c == 'p' || c == 'P') + return 1; + return 0; +} + +int is_hex(int c) +{ + if (c >= 'a' && c <= 'f') + return 1; + if (c >= 'A' && c <= 'F') + return 1; + if (c >= '0' && c <= '9') + return 1; + return 0; +} + +int is_dec(int c) +{ + if (c >= '0' && c <= '9') + return 1; + return 0; +} + diff -r 40ecbd5da481 -r c419b3b3d43f lwcc/cpp/cpp.h --- a/lwcc/cpp/cpp.h Sun Sep 08 21:58:12 2013 -0600 +++ b/lwcc/cpp/cpp.h Mon Sep 09 23:07:19 2013 -0600 @@ -31,6 +31,20 @@ CPP_EOF = -1, }; +enum +{ + TOK_NONE = 0, + TOK_WSPACE, + TOK_IDENT, + TOK_MAX +}; + +struct token +{ + int ttype; // token type + char *strval; // string value of token - the text it matched +}; + struct file_stack_e { const char *fn; @@ -43,15 +57,47 @@ int qseen; // number of ? seen during trigraph scan int unget; // character that has been "ungot" int curc; // the most recent character retrieved + int *ungetbuf; // buffer for "unfetch" + int ungetbufl; // length offset in unget buffer + int ungetbufs; // size of unget buffer }; +struct symtab_e +{ + char *name; // the symbol identifier + struct symtab_e *next; // next symbol in table + char *strval; // the actual value of the macro + int nargs; // number of fixed args; -1 for basic, >= 0 for function like + int vargs; // set if macro is varargs +}; + +extern struct symtab_e *symbol_find(const char *); +extern void symbol_undef(const char *); +extern struct symtab_e *symbol_add(const char *, const char *, int, int); + extern FILE *output_fp; extern int trigraphs; extern struct file_stack_e *file_stack; extern int process_file(const char *); +extern void preprocess_file(void); +extern void preprocess_output_location(int); extern void do_error(const char *, ...); extern void do_warning(const char *, ...); +extern int fetch_byte(void); +extern void unfetch_byte(int); +extern void outchr(int); +extern void outstr(char *); + +extern int is_whitespace(int); +extern int is_ep(int); +extern int is_sidchr(int); +extern int is_idchr(int); +extern int is_dec(int); +extern int is_hex(int); + +extern int skip_level; + #endif // cpp_h_seen___ diff -r 40ecbd5da481 -r c419b3b3d43f lwcc/cpp/file.c --- a/lwcc/cpp/file.c Sun Sep 08 21:58:12 2013 -0600 +++ b/lwcc/cpp/file.c Mon Sep 09 23:07:19 2013 -0600 @@ -18,19 +18,6 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . - -NOTES: - -The function fetch_byte() grabs a byte from the input file. It returns -CPP_EOF if end of file has been reached. The resulting byte has passed -through three filters, in order: - -* All CRLF, LFCR, LF, and CR have been converted to CPP_EOL -* If enabled (--trigraphs), trigraphs have been interpreted -* \\n (backslash-newline) has been processed (eliminated) - -To obtain a byte without processing \\n, call fetch_byte_tg(). - */ #include @@ -43,105 +30,74 @@ struct file_stack_e *file_stack = NULL; -int is_whitespace(int c) -{ - switch (c) - { - case ' ': - case '\t': - case '\r': - case '\n': - return 1; - } - return 0; -} - -int is_sidchr(c) -{ - if (c == '_' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) - return 1; - return 0; -} - -int is_idchr(int c) +/* output a byte to the current output stream as long as we aren't in the + middle of a false conditional. CPP_EOL will be converted to '\n' + on output. */ +void outchr(int c) { - if (c >= '0' && c <= '9') - return 1; - return is_sidchr(c); -} - -int is_ep(int c) -{ - if (c == 'e' || c == 'E' || c == 'p' || c == 'P') - return 1; - return 0; -} - -int is_hex(int c) -{ - if (c >= 'a' && c <= 'f') - return 1; - if (c >= 'A' && c <= 'F') - return 1; - if (c >= '0' && c <= '9') - return 1; - return 0; -} - -int is_dec(int c) -{ - if (c >= '0' && c <= '9') - return 1; - return 0; -} - -static void outchr(int c) -{ + if (skip_level) + return; + if (c == CPP_EOL) + c = '\n'; fputc(c, output_fp); } -static void outstr(char *s) +/* output a string to the current output stream as long as we aren't in the + middle of a false conditional */ +void outstr(char *s) { + if (skip_level) + return; while (*s) outchr(*s++); } -int fetch_byte_ll(struct file_stack_e *f) +/* fetch a raw input byte from the current file. Will return CPP_EOF if + EOF is encountered and CPP_EOL if an end of line sequence is encountered. + End of line is defined as either CR, CRLF, LF, or LFCR. CPP_EOL is + returned on the first CR or LF encountered. The complementary CR or LF + is munched, if present, when the *next* character is read. This always + operates on file_stack. + + This function also accounts for line numbers in input files and also + character columns. +*/ +int fetch_byte_ll(void) { int c; - if (f -> eolstate != 0) + if (file_stack -> eolstate != 0) { - f -> line++; - f -> col = 0; + file_stack -> line++; + file_stack -> col = 0; } - c = getc(f -> fp); - f -> col++; - if (f -> eolstate == 1) + c = getc(file_stack -> fp); + file_stack -> col++; + if (file_stack -> eolstate == 1) { // just saw CR, munch LF if (c == 10) - c = getc(f -> fp); - f -> eolstate = 0; + c = getc(file_stack -> fp); + file_stack -> eolstate = 0; } - else if (f -> eolstate == 2) + else if (file_stack -> eolstate == 2) { // just saw LF, much CR if (c == 13) - c = getc(f -> fp); - f -> eolstate = 0; + c = getc(file_stack -> fp); + file_stack -> eolstate = 0; } if (c == 10) { // we have LF - end of line, flag to munch CR - f -> eolstate = 2; + file_stack -> eolstate = 2; c = CPP_EOL; } else if (c == 13) { // we have CR - end of line, flag to munch LF - f -> eolstate = 1; + file_stack -> eolstate = 1; c = CPP_EOL; } else if (c == EOF) @@ -151,454 +107,174 @@ return c; } -int fetch_byte_tg(struct file_stack_e *f) +/* This function takes a sequence of bytes from the _ll function above + and does trigraph interpretation on it, but only if the global + trigraphs is nonzero. */ +int fetch_byte_tg(void) { int c; - + if (!trigraphs) { - c = fetch_byte_ll(f); + c = fetch_byte_ll(); } else { /* we have to do the trigraph shit here */ - if (f -> ra != CPP_NOUNG) + if (file_stack -> ra != CPP_NOUNG) { - if (f -> qseen > 0) + if (file_stack -> qseen > 0) { c = '?'; - f -> qseen -= 1; + file_stack -> qseen -= 1; return c; } else { - c = f -> ra; - f -> ra = CPP_NOUNG; + c = file_stack -> ra; + file_stack -> ra = CPP_NOUNG; return c; } } - c = fetch_byte_ll(f); + c = fetch_byte_ll(); while (c == '?') { - f -> qseen++; - c = fetch_byte_ll(f); + file_stack -> qseen++; + c = fetch_byte_ll(); } - if (f -> qseen >= 2) + if (file_stack -> qseen >= 2) { // we have a trigraph switch (c) { case '=': c = '#'; - f -> qseen -= 2; + file_stack -> qseen -= 2; break; case '/': c = '\\'; - f -> qseen -= 2; + file_stack -> qseen -= 2; break; case '\'': c = '^'; - f -> qseen -= 2; + file_stack -> qseen -= 2; break; case '(': c = '['; - f -> qseen -= 2; + file_stack -> qseen -= 2; break; case ')': c = ']'; - f -> qseen -= 2; + file_stack -> qseen -= 2; break; case '!': c = '|'; - f -> qseen -= 2; + file_stack -> qseen -= 2; break; case '<': c = '{'; - f -> qseen -= 2; + file_stack -> qseen -= 2; break; case '>': c = '}'; - f -> qseen -= 2; + file_stack -> qseen -= 2; break; case '~': c = '~'; - f -> qseen -= 2; + file_stack -> qseen -= 2; break; } - if (f -> qseen > 0) + if (file_stack -> qseen > 0) { - f -> ra = c; + file_stack -> ra = c; c = '?'; - f -> qseen--; + file_stack -> qseen--; } } - else if (f -> qseen > 0) + else if (file_stack -> qseen > 0) { - f -> ra = c; + file_stack -> ra = c; c = '?'; - f -> qseen--; + file_stack -> qseen--; } } return c; } -int fetch_byte(struct file_stack_e *f) +/* This function puts a byte back onto the front of the input stream used + by fetch_byte(). Theoretically, an unlimited number of characters can + be unfetched. Line and column counting may be incorrect if unfetched + characters cross a token boundary. */ +void unfetch_byte(int c) +{ + if (file_stack -> ungetbufl >= file_stack -> ungetbufs) + { + file_stack -> ungetbufs += 100; + file_stack -> ungetbuf = lw_realloc(file_stack -> ungetbuf, file_stack -> ungetbufs); + } + file_stack -> ungetbuf[file_stack -> ungetbufl++] = c; +} + +/* This function retrieves a byte from the input stream. It performs + backslash-newline splicing on the returned bytes. Any character + retrieved from the unfetch buffer is presumed to have already passed + the backslash-newline filter. */ +int fetch_byte(void) { int c; + + if (file_stack -> ungetbufl > 0) + { + file_stack -> ungetbufl--; + c = file_stack -> ungetbuf[file_stack -> ungetbufl]; + if (file_stack -> ungetbufl == 0) + { + lw_free(file_stack -> ungetbuf); + file_stack -> ungetbuf = NULL; + file_stack -> ungetbufs = 0; + } + return c; + } again: - if (f -> unget != CPP_NOUNG) + if (file_stack -> unget != CPP_NOUNG) { - c = f -> unget; - f -> unget = CPP_NOUNG; + c = file_stack -> unget; + file_stack -> unget = CPP_NOUNG; } else { - c = fetch_byte_tg(f); + c = fetch_byte_tg(); } if (c == '\\') { int c2; - c2 = fetch_byte_tg(f); + c2 = fetch_byte_tg(); if (c2 == CPP_EOL) goto again; else - f -> unget = c2; + file_stack -> unget = c2; } - f -> curc = c; + file_stack -> curc = c; return c; } -static void skip_line(struct file_stack_e *f) -{ - int c; - while ((c = fetch_byte(f)) != CPP_EOL && c != CPP_EOF) - /* do nothing */ ; -} - - -struct -{ - char *name; - void (*fn)(struct file_stack_e *); -} directives[] = -{ - { NULL, NULL }, - { NULL, NULL } -}; - -/* -This handles a preprocessing directive. Such a directive goes from the -next character to be retrieved from f until the first instance of CPP_EOL -or CPP_EOF. -*/ -void handle_directive(struct file_stack_e *f) -{ - int c, i; - char kw[20]; - -again: - while ((c = fetch_byte(f)) == ' ' || c == '\t') - /* do nothing */ ; - if (c == '/') - { - // maybe a comment // - c = fetch_byte(f); - if (c == '/') - { - // line comment - skip_line(f); - return; - } - if (c == '*') - { - // block comment - while (1) - { - c = fetch_byte(f); - if (c == CPP_EOF) - return; - if (c == '*') - { - c = fetch_byte(f); - if (c == '/') - { - // end of comment - try again for directive - goto again; - } - if (c == CPP_EOF) - return; - } - } - } - } - - // empty directive - do nothing - if (c == CPP_EOL) - return; - - if (c < 'a' || c > 'z') - goto out; - - i = 0; - do - { - kw[i++] = c; - if (i == sizeof(kw) - 1) - goto out; // keyword too long - c = fetch_byte(f); - } while ((c >= 'a' && c <= 'z') || (c == '_')); - kw[i++] = '\0'; - - /* we have a keyword here */ - for (i = 0; directives[i].name; i++) - { - if (strcmp(directives[i].name, kw) == 0) - { - (*directives[i].fn)(f); - return; - } - } - -/* if we fall through here, we have an unknown directive */ -out: - do_error("invalid preprocessor directive"); - skip_line(f); -} - -/* -Notes: - -Rather than tokenize the entire file, we run through it interpreting -things only as much as we need to in order to identify the following: - -preprocessing directives (#...) -identifiers which might need to be replaced with macros - -We have to interpret strings, character constants, and numbers to prevent -false positives in those situations. - -When we find a preprocessing directive, it is handled with a more -aggressive tokenization process and then intepreted accordingly. - -nlws is used to record the fact that only whitespace has occurred at the -start of a line. Whitespace is defined as comments or isspace(c). It gets -reset to 1 after each EOL character. If a non-whitespace character is -encountered, it is set to -1. If the character processing decides it really -is a whitespace character, it will set nlws back to 1 (block comment). -Elsewise, it will get set to 0 if it is still -1 when the loop starts again. - -This is needed so we can identify whitespace interposed before a -preprocessor directive. This is the only case where it matters for -the preprocessor. - -*/ -void preprocess_file(struct file_stack_e *f) -{ - int c; - int nlws = 1; - - while (1) - { - c = fetch_byte(f); -again: - if (nlws == -1) - nlws = 0; - if (c == CPP_EOF) - { - outchr('\n'); - return; - } - if (c == CPP_EOL) - { - nlws = 1; - outchr('\n'); - continue; - } - - if (!is_whitespace(c)) - nlws = -1; - - if (is_sidchr(c)) - { - // have identifier here - parse it off - char *ident = NULL; - int idlen = 0; - - do - { - ident = lw_realloc(ident, idlen + 1); - ident[idlen++] = c; - ident[idlen] = '\0'; - c = fetch_byte(f); - } while (is_idchr(c)); - - /* do something with the identifier here - macros, etc. */ - outstr(ident); - lw_free(ident); - - goto again; - } - - switch (c) - { - default: - outchr(c); - break; - - case '.': // a number - to prevent seeing an identifier in middle of number - outchr(c); - c = fetch_byte(f); - if (!is_dec(c)) - goto again; - /* fall through */ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - do - { - outchr(c); - c = fetch_byte(f); - if (c == CPP_EOF) - return; - if (is_ep(c)) - { - outchr(c); - c = fetch_byte(f); - if (c == '-' || c == '+') - { - outchr(c); - c = fetch_byte(f); - } - } - } while ((is_idchr(c)) || (c == '.')); - goto again; - - case '#': - if (nlws) - { - handle_directive(f); - /* note: no need to reset nlws */ - } - else - outchr('#'); - break; - - case '\'': // character constant - outchr('\''); - while ((c = fetch_byte(f)) != '\'') - { - if (c == '\\') - { - outchr('\\'); - c = fetch_byte(f); - } - if (c == CPP_EOL) - { - do_warning("Unterminated character constant"); - goto again; - } - if (c == CPP_EOF) - return; - outchr(c); - } - outchr(c); - break; - - case '"': // strings - outchr(c); - while ((c = fetch_byte(f)) != '"') - { - if (c == '\\') - { - outchr('\\'); - c = fetch_byte(f); - } - if (c == CPP_EOL) - { - do_warning("unterminated string literal"); - goto again; - } - if (c == CPP_EOF) - return; - outchr(c); - } - outchr(c); - break; - - case '/': // comments - c = fetch_byte(f); - if (c == '/') - { - // line comment - outchr(' '); - do - { - c = fetch_byte(f); - } while (c != CPP_EOF && c != CPP_EOL); - } - else if (c == '*') - { - // block comment - for (;;) - { - c = fetch_byte(f); - if (c == CPP_EOF) - { - break; - } - if (c == CPP_EOL) - { - continue; - } - if (c == '*') - { - // maybe end of comment - c = fetch_byte(f); - if (c == '/') - { - // end of comment - break; - } - } - } - // replace comment with a single space - outchr(' '); - if (nlws == -1) - nlws = 1; - continue; - } - else - { - // restore eaten '/' - outchr('/'); - // process the character we just fetched - goto again; - } - } // switch - } // processing loop -} - +/* This function opens (if not stdin) the file f and pushes it onto the + top of the input file stack. It then proceeds to process the file + and return. Nonzero return means the file could not be opened. */ int process_file(const char *f) { - struct file_stack_e *nf; + struct file_stack_e nf; FILE *fp; fprintf(stderr, "Processing %s\n", f); @@ -614,23 +290,24 @@ } /* push the file onto the file stack */ - nf = lw_alloc(sizeof(struct file_stack_e)); - nf -> fn = f; - nf -> fp = fp; - nf -> next = file_stack; - nf -> line = 1; - nf -> col = 0; - nf -> qseen = 0; - nf -> ra = CPP_NOUNG; - nf -> unget = CPP_NOUNG; - file_stack = nf; - + nf.fn = f; + nf.fp = fp; + nf.next = file_stack; + nf.line = 1; + nf.col = 0; + nf.qseen = 0; + nf.ra = CPP_NOUNG; + nf.unget = CPP_NOUNG; + file_stack = &nf; + nf.ungetbuf = NULL; + nf.ungetbufs = 0; + nf.ungetbufl = 0; + /* go preprocess the file */ - preprocess_file(nf); + preprocess_file(); - if (nf -> fp != stdin) - fclose(nf -> fp); - file_stack = nf -> next; - lw_free(nf); + if (nf.fp != stdin) + fclose(nf.fp); + file_stack = nf.next; return 0; } diff -r 40ecbd5da481 -r c419b3b3d43f lwcc/cpp/preproc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lwcc/cpp/preproc.c Mon Sep 09 23:07:19 2013 -0600 @@ -0,0 +1,373 @@ +/* +lwcc/cpp/preproc.c + +Copyright © 2013 William Astle + +This file is part of LWTOOLS. + +LWTOOLS is free software: you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the Free Software +Foundation, either version 3 of the License, or (at your option) any later +version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program. If not, see . +*/ + +#include +#include + +#include + +#include "cpp.h" + + +int munch_comment(void); +char *parse_str_lit(void); +char *parse_chr_lit(void); +char *parse_num_lit(int); +void preprocess_identifier(int); +void preprocess_directive(void); + + +int skip_level; + +/* +Notes: + +Rather than tokenize the entire file, we run through it interpreting +things only as much as we need to in order to identify the following: + +preprocessing directives (#...) +identifiers which might need to be replaced with macros + +We have to interpret strings, character constants, and numbers to prevent +false positives in those situations. + +When we find a preprocessing directive, it is handled with a more +aggressive tokenization process and then intepreted accordingly. + +nlws is used to record the fact that only whitespace has occurred at the +start of a line. Whitespace is defined as comments or isspace(c). It gets +reset to 1 after each EOL character. If a non-whitespace character is +encountered, it is set to -1. If the character processing decides it really +is a whitespace character, it will set nlws back to 1 (block comment). +Elsewise, it will get set to 0 if it is still -1 when the loop starts again. + +This is needed so we can identify whitespace interposed before a +preprocessor directive. This is the only case where it matters for +the preprocessor. + +*/ +void preprocess_file() +{ + int c; + int nlws = 1; + + preprocess_output_location(1); + for (;;) + { + c = fetch_byte(); + // if we had non-whitespace that wasn't munched (comment), set flag correctly + if (nlws == -1) + nlws = 0; + if (c == CPP_EOF) + { + // end of input - make sure newline is present + outchr('\n'); + return; + } + if (c == CPP_EOL) + { + // flag that we just hit the start of a new line + nlws = 1; + outchr(CPP_EOL); + continue; + } + + /* if we have a non-whitespace character, flag it as such */ + if (!is_whitespace(c)) + nlws = -1; + + if (c == '#' && nlws) + { + // we have a preprocessor directive here - this call will do + // everything including outputting the blank line, if appropriate + preprocess_directive(); + continue; + } + else if (c == '\'') + { + // we have a character constant here + outstr(parse_chr_lit()); + continue; + } + else if (c == '"') + { + // we have a string constant here + outstr(parse_str_lit()); + continue; + } + else if (c == '.') + { + // we might have a number here + outchr('.'); + c = fetch_byte(); + if (is_dec(c)) + outstr(parse_num_lit(c)); + continue; + } + else if (is_dec(c)) + { + // we have a number here + outstr(parse_num_lit(c)); + } + else if (c == '/') + { + // we might have a comment here + c = munch_comment(); + if (c < 0) + { + outchr('/'); + continue; + } + // comments are white space - count them as such at start of line + if (nlws == -1) + nlws = 0; + /* c is the number of EOL characters the comment spanned */ + while (c--) + outchr(CPP_EOL); + continue; + } + else if (c == 'L') + { + // wide character string or wide character constant, or identifier + c = fetch_byte(); + if (c == '"') + { + outchr('L'); + outstr(parse_str_lit()); + continue; + } + else if (c == '\'') + { + outchr('L'); + outstr(parse_chr_lit()); + continue; + } + unfetch_byte(c); + preprocess_identifier('L'); + continue; + } + else if (is_sidchr(c)) + { + // identifier of some kind + preprocess_identifier(c); + continue; + } + else + { + // random character - pass through + outchr(c); + } + } +} + +void preprocess_identifier(int c) +{ + char *ident = NULL; + int idlen = 0; + int idbufl = 0; + + do + { + if (idlen >= idbufl) + { + idbufl += 50; + ident = lw_realloc(ident, idbufl); + } + ident[idlen++] = c; + c = fetch_byte(); + } while (is_idchr(c)); + + ident[idlen++] = 0; + unfetch_byte(c); + + /* do something with the identifier here - macros, etc. */ + outstr(ident); + lw_free(ident); +} + +#define to_buf(c) do { if (idlen >= idbufl) { idbufl += 100; ident = lw_realloc(ident, idbufl); } ident[idlen++] = (c); } while (0) +char *parse_num_lit(int c) +{ + static char *ident = NULL; + int idlen = 0; + static int idbufl = 0; + + do + { + to_buf(c); + c = fetch_byte(); + if (is_ep(c)) + { + to_buf(c); + c = fetch_byte(); + if (c == '-' || c == '+') + { + to_buf(c); + c = fetch_byte(); + } + } + } while ((is_dec(c)) || (c == '.')); + to_buf(0); + + return ident; +} + +char *parse_chr_lit(void) +{ + static char *ident = NULL; + int idlen = 0; + static int idbufl = 0; + int c; + + to_buf('\''); + while ((c = fetch_byte()) != '\'') + { + if (c == CPP_EOL || c == CPP_EOF) + { + unfetch_byte(c); + to_buf(0); + do_warning("Unterminated character constant"); + return ident; + } + if (c == '\\') + { + to_buf(c); + c = fetch_byte(); + if (c == CPP_EOL || c == CPP_EOF) + { + unfetch_byte(c); + to_buf(0); + do_warning("Unterminated character constant"); + return ident; + } + } + to_buf(c); + } + to_buf(c); + to_buf(0); + return ident; +} + +char *parse_str_lit(void) +{ + static char *ident = NULL; + int idlen = 0; + static int idbufl = 0; + int c; + + to_buf('"'); + while ((c = fetch_byte()) != '"') + { + if (c == CPP_EOL || c == CPP_EOF) + { + unfetch_byte(c); + to_buf(0); + do_warning("Unterminated string literal"); + return ident; + } + if (c == '\\') + { + to_buf(c); + c = fetch_byte(); + if (c == CPP_EOL || c == CPP_EOF) + { + unfetch_byte(c); + to_buf(0); + do_warning("Unterminated string literal"); + return ident; + } + } + to_buf(c); + } + to_buf(c); + to_buf(0); + return ident; +} + +int munch_comment(void) +{ + int nlc = 0; + int c; + + c = fetch_byte(); + if (c == '/') + { + // single line comment + for (;;) + { + c = fetch_byte(); + if (c == CPP_EOL) + nlc = 1; + if (c == CPP_EOL || c == CPP_EOF) + return nlc; + } + } + else if (c == '*') + { + // block comment + for (;;) + { + c = fetch_byte(); + if (c == CPP_EOL) + nlc++; + if (c == CPP_EOF) + return nlc; + if (c == '*') + { + c = fetch_byte(); + if (c == '/' || c == CPP_EOF) + return nlc; + if (c == CPP_EOL) + nlc++; + } + } + return nlc; + } + else + { + unfetch_byte(c); + return -1; + } + + return nlc; +} + +/* Output a location directive to synchronize the compiler with the correct + input line number and file. This is of the form: + +# + +where is the line number inside the file, is the +filename (as a C string), and is the specified flag argument which +should be 1 for the start of a new file or 2 for returning to the file from +another file. is the line number the following line came from. + */ +void preprocess_output_location(int flag) +{ + fprintf(output_fp, "# %d \"%s\" %d\n", file_stack -> line, file_stack -> fn, flag); +} + +/* process a preprocessor directive */ +void preprocess_directive(void) +{ + outchr('>'); + outchr('#'); +}