changeset 294:048adfee2933 ccdev

Checkpoint on lwcc-cpp development This checkpoint includes a tokenizer and basic implementation of #if, #ifdef, #ifndef, #else, #endif, #elif, and #undef along with basic symbol table management.
author William Astle <lost@l-w.ca>
date Tue, 10 Sep 2013 19:56:05 -0600
parents c419b3b3d43f
children 4b17780f2777
files Makefile lwcc/cpp/cpp.h lwcc/cpp/file.c lwcc/cpp/preproc.c lwcc/cpp/symbol.c
diffstat 5 files changed, 689 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Mon Sep 09 23:07:19 2013 -0600
+++ b/Makefile	Tue Sep 10 19:56:05 2013 -0600
@@ -101,7 +101,7 @@
 lwcc_driver_objs := $(lwcc_driver_srcs:.c=.o)
 lwcc_driver_deps := $(lwcc_driver_srcs:.c=.d)
 
-lwcc_cpp_srcs := main.c error.c file.c preproc.c char_p.c
+lwcc_cpp_srcs := main.c error.c file.c preproc.c char_p.c symbol.c
 lwcc_cpp_srcs := $(addprefix lwcc/cpp/,$(lwcc_cpp_srcs))
 lwcc_cpp_objs := $(lwcc_cpp_srcs:.c=.o)
 lwcc_cpp_deps := $(lwcc_cpp_srcs:.c=.d)
--- a/lwcc/cpp/cpp.h	Mon Sep 09 23:07:19 2013 -0600
+++ b/lwcc/cpp/cpp.h	Tue Sep 10 19:56:05 2013 -0600
@@ -34,8 +34,28 @@
 enum
 {
 	TOK_NONE = 0,
+	TOK_EOF,
+	TOK_EOL,
 	TOK_WSPACE,
 	TOK_IDENT,
+	TOK_NUMBER,
+	TOK_STRING,
+	TOK_CHAR,
+	TOK_DIV,
+	TOK_MUL,
+	TOK_ADD,
+	TOK_SUB,
+	TOK_OPAREN,
+	TOK_CPAREN,
+	TOK_NE,
+	TOK_EQ,
+	TOK_LE,
+	TOK_LT,
+	TOK_GE,
+	TOK_GT,
+	TOK_BAND,
+	TOK_BOR,
+	TOK_BNOT,
 	TOK_MAX
 };
 
@@ -90,6 +110,7 @@
 extern void unfetch_byte(int);
 extern void outchr(int);
 extern void outstr(char *);
+extern void skip_eol(void);
 
 extern int is_whitespace(int);
 extern int is_ep(int);
--- a/lwcc/cpp/file.c	Mon Sep 09 23:07:19 2013 -0600
+++ b/lwcc/cpp/file.c	Tue Sep 10 19:56:05 2013 -0600
@@ -269,6 +269,30 @@
 	return c;
 }
 
+void skip_eol(void)
+{
+	int c;
+	for (;;)
+	{
+		c = fetch_byte();
+		if (c == CPP_EOF || c == CPP_EOL)
+		{
+			unfetch_byte(c);
+			return;
+		}
+		if (c == '/')
+		{
+			c = munch_comment();
+			if (c > 0)
+			{
+				while (c--)
+					outchr(CPP_EOL);
+			}
+		}
+	}
+}
+
+
 /* This function opens (if not stdin) the file f and pushes it onto the
    top of the input file stack. It then proceeds to process the file
    and return. Nonzero return means the file could not be opened. */
--- a/lwcc/cpp/preproc.c	Mon Sep 09 23:07:19 2013 -0600
+++ b/lwcc/cpp/preproc.c	Tue Sep 10 19:56:05 2013 -0600
@@ -21,8 +21,10 @@
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include <lw_alloc.h>
+#include <lw_string.h>
 
 #include "cpp.h"
 
@@ -31,11 +33,19 @@
 char *parse_str_lit(void);
 char *parse_chr_lit(void);
 char *parse_num_lit(int);
-void preprocess_identifier(int);
+char *parse_identifier(int);
+void preprocess_identifier(char *);
 void preprocess_directive(void);
+void next_token(void);
+void next_token_nws(void);
+int eval_expr(void);
 
+int skip_level = 0;
+int found_level = 0;
+int else_level = 0;
+int else_skip_level = 0;
 
-int skip_level;
+struct token curtok = { .ttype = TOK_NONE, .strval = NULL };
 
 /*
 Notes:
@@ -161,13 +171,15 @@
 				continue;
 			}
 			unfetch_byte(c);
-			preprocess_identifier('L');
+			preprocess_identifier(parse_identifier('L'));
 			continue;
 		}
 		else if (is_sidchr(c))
 		{
 			// identifier of some kind
-			preprocess_identifier(c);
+			char *s;
+			s = parse_identifier(c);
+			preprocess_identifier(s);
 			continue;
 		}
 		else
@@ -178,11 +190,11 @@
 	}	
 }
 
-void preprocess_identifier(int c)
+char *parse_identifier(int c)
 {
-	char *ident = NULL;
+	static char *ident = NULL;
 	int idlen = 0;
-	int idbufl = 0;
+	static int idbufl = 0;
 
 	do
 	{
@@ -197,10 +209,14 @@
 
 	ident[idlen++] = 0;
 	unfetch_byte(c);
-	
+
+	return ident;
+}
+
+void preprocess_identifier(char *s)
+{
 	/* do something with the identifier here  - macros, etc. */
-	outstr(ident);
-	lw_free(ident);
+	outstr(s);
 }
 
 #define to_buf(c) do { if (idlen >= idbufl) { idbufl += 100; ident = lw_realloc(ident, idbufl); } ident[idlen++] = (c); } while (0)
@@ -224,7 +240,7 @@
 				c = fetch_byte();
 			}
 		}
-	} while ((is_dec(c)) || (c == '.'));
+	} while ((is_idchr(c)) || (c == '.'));
 	to_buf(0);
 	
 	return ident;
@@ -365,9 +381,544 @@
 	fprintf(output_fp, "# %d \"%s\" %d\n", file_stack -> line, file_stack -> fn, flag);
 }
 
+void preproc_ifndef(void);
+void preproc_ifdef(void);
+void preproc_if(void);
+void preproc_include(void);
+void preproc_else(void);
+void preproc_endif(void);
+void preproc_error(void);
+void preproc_warning(void);
+void preproc_define(void);
+void preproc_undef(void);
+void preproc_line(void);
+void preproc_pragma(void);
+void preproc_elif(void);
+
+struct { char *name; void (*fn)(void); } directive_list[] = {
+	{ "ifndef",			preproc_ifndef },
+	{ "ifdef",			preproc_ifdef },
+	{ "if",				preproc_if },
+	{ "include",		preproc_include },
+	{ "else",			preproc_else },
+	{ "endif",			preproc_endif },
+	{ "error",			preproc_error },
+	{ "warning",		preproc_warning },
+	{ "define",			preproc_define },
+	{ "undef",			preproc_undef },
+	{ "line",			preproc_line },
+	{ "pragma",			preproc_pragma },
+	{ "elif",			preproc_elif },
+	{ NULL, NULL }
+};
+
 /* process a preprocessor directive */
+#define DIRBUFLEN 20
 void preprocess_directive(void)
 {
-	outchr('>');
-	outchr('#');
+	static char dirbuf[DIRBUFLEN+1];
+	int c;
+	int dl = 0;
+	
+	for (;;)
+	{
+		c = fetch_byte();
+		if (is_whitespace(c))
+			continue;
+		if (c == '/')
+		{
+			c = munch_comment();
+			if (c < 0)
+				goto baddir;
+			if (c > 0)
+			{
+				while (c--)
+					outchr(CPP_EOL);
+			}
+			continue;
+		}
+		if (c == CPP_EOL)
+		{
+			// NULL directive - do nothing
+			outchr(CPP_EOL);
+			return;
+		}
+		break;
+	}	
+
+
+	dl = 0;
+	while (((c >= 'a' && c <= 'z') || c == '_') && dl < DIRBUFLEN)
+	{
+		dirbuf[dl++] = c;
+		c = fetch_byte();
+	}
+	dirbuf[dl] = 0;
+
+commagain:
+	if (c == '/')
+	{
+		c = munch_comment();
+		if (c < 0)
+			c = '/';
+		else
+		{
+			while (c--)
+			{
+				outchr(CPP_EOL);
+			}
+			c = fetch_byte();
+			goto commagain;
+		}
+	}
+	
+	if (!is_whitespace(c) && c != CPP_EOL && c != CPP_EOF)
+		goto baddir;
+	
+	for (dl = 0; directive_list[dl].name; dl++)
+	{
+		if (strcmp(directive_list[dl].name, dirbuf) == 0)
+		{
+			(*(directive_list[dl].fn))();
+			outchr(CPP_EOL);
+			return;
+		}
+	}
+
+baddir:
+	dirbuf[dl] = 0;
+	if (skip_level == 0)
+		do_error("Bad preprocessor directive %s", dirbuf);
+	outchr(CPP_EOL);
+}
+
+void check_eol(void)
+{
+	next_token_nws();
+	if (curtok.ttype == TOK_EOL)
+		return;
+	if (curtok.ttype == TOK_EOF)
+		return;
+	do_warning("Extra text after preprocessor directive");
+	skip_eol();
+}
+
+void preproc_ifndef(void)
+{
+	if (skip_level)
+	{
+		skip_level++;
+		skip_eol();
+		return;
+	}
+	next_token_nws();
+	if (curtok.ttype != TOK_IDENT)
+	{
+		do_error("Bad #ifndef");
+		skip_eol();
+	}
+	
+	if (symbol_find(curtok.strval))
+	{
+		skip_level++;
+	}
+	else
+	{
+		found_level++;
+	}
+	check_eol();
+}
+
+void preproc_ifdef(void)
+{
+	if (skip_level)
+	{
+		skip_level++;
+		skip_eol();
+		return;
+	}
+	next_token_nws();
+	if (curtok.ttype != TOK_IDENT)
+	{
+		do_error("Bad #ifdef");
+		skip_eol();
+	}
+	
+	if (symbol_find(curtok.strval) == NULL)
+	{
+		skip_level++;
+	}
+	else
+	{
+		found_level++;
+	}
+	check_eol();
+}
+
+void preproc_if(void)
+{
+	skip_eol();
+}
+
+void preproc_include(void)
+{
+	skip_eol();
+}
+
+void preproc_else(void)
+{
+	if (skip_level)
+	{
+		if (else_skip_level > found_level)
+			;
+		else if (--skip_level != 0)
+			skip_level++;
+		else
+			found_level++;
+	}
+	else if (found_level)
+	{
+		skip_level++;
+		found_level--;
+	}
+	else
+	{
+		do_error("#else in non-conditional section");
+	}
+	if (else_level == found_level + skip_level)
+	{
+		do_error("Too many #else");
+	}
+	else_level = found_level + skip_level;
+	check_eol();
+}
+
+void preproc_endif(void)
+{
+	if (skip_level)
+		skip_level--;
+	else if (found_level)
+		found_level--;
+	else
+		do_error("#endif in non-conditional section");
+	if (skip_level == 0)
+		else_skip_level = 0;
+	else_level = 0;
+	check_eol();
+}
+
+void preproc_error(void)
+{
+	skip_eol();
+}
+
+void preproc_warning(void)
+{
+	skip_eol();
+}
+
+void preproc_define(void)
+{
+	skip_eol();
+}
+
+void preproc_undef(void)
+{
+	if (skip_level)
+	{
+		skip_eol();
+		return;
+	}
+	
+	next_token_nws();
+	if (curtok.ttype != TOK_IDENT)
+	{
+		do_error("Bad #undef");
+		symbol_undef(curtok.strval);
+	}
+	check_eol();
+}
+
+void preproc_line(void)
+{
+	skip_eol();
+}
+
+void preproc_pragma(void)
+{
+	if (skip_level || !eval_expr())
+		skip_level++;
+	else
+		found_level++;
 }
+
+void preproc_elif(void)
+{
+	if (skip_level == 0)
+		else_skip_level = found_level;
+	if (skip_level)
+	{
+		if (else_skip_level > found_level)
+			;
+		else if (--skip_level != 0)
+			skip_level++;
+		else if (eval_expr())
+			found_level++;
+		else
+			skip_level++;
+	}
+	else if (found_level)
+	{
+		skip_level++;
+		found_level--;
+	}
+	else
+		do_error("#elif in non-conditional section");
+}
+
+
+
+/* tokenizing stuff here */
+#undef to_buf
+#define to_buf(c) do { if (strlen >= strbufl) { strbufl += 100; strbuf = lw_realloc(strbuf, strbufl); } strbuf[strlen++] = (c); strbuf[strlen] = 0; } while (0)
+void next_token(void)
+{
+	int strbufl = 0;
+	int strlen = 0;
+	char *strbuf = NULL;
+	int c;
+	int ttype;
+		
+	lw_free(curtok.strval);
+	curtok.strval = NULL;
+	curtok.ttype = TOK_NONE;
+	
+	c = fetch_byte();
+	if (c == CPP_EOL)
+	{
+		curtok.ttype = TOK_EOL;
+		return;
+	}
+	
+	if (c == CPP_EOF)
+	{
+		curtok.ttype = TOK_EOF;
+		return;
+	}
+	
+	if (is_whitespace(c))
+	{
+		do
+		{
+			to_buf(c);
+			c = fetch_byte();
+		} while (is_whitespace(c));
+		unfetch_byte(c);
+		ttype = TOK_WSPACE;
+		goto out;
+	}
+	if (c == '/')
+	{
+		c = munch_comment();
+		if (c >= 0)
+		{
+			to_buf(' ');
+			while (c--)
+				outchr(CPP_EOL);
+			ttype = TOK_WSPACE;
+			goto out;
+		}
+		c = '/';
+	}
+
+	if (c == '\'')
+	{
+		// we have a character constant here
+		ttype = TOK_NUMBER;
+		strbuf = lw_strdup(parse_chr_lit());
+		goto out;
+	}
+	else if (c == '"')
+	{
+		// we have a string constant here
+		ttype = TOK_STRING;
+		strbuf = lw_strdup(parse_str_lit());
+		goto out;
+	}
+	else if (c == '.')
+	{
+		// we might have a number here
+		c = fetch_byte();
+		if (is_dec(c))
+		{
+			unfetch_byte(c);
+			ttype = TOK_NUMBER;
+			strbuf = lw_strdup(parse_num_lit('.'));
+			goto out;
+		}
+		else
+		{
+			goto ttypegen;
+		}
+	}
+	else if (is_dec(c))
+	{
+		// we have a number here
+		ttype = TOK_NUMBER;
+		strbuf = lw_strdup(parse_num_lit(c));
+	}
+	else if (c == 'L')
+	{
+		// wide character string or wide character constant, or identifier
+		c = fetch_byte();
+		if (c == '"')
+		{
+			char *s;
+			to_buf('L');
+			s = parse_str_lit();
+			while (*s)
+				to_buf(*s++);
+			ttype = TOK_STRING;
+			goto out;
+		}
+		else if (c == '\'')
+		{
+			char *s;
+			to_buf('L');
+			s = parse_chr_lit();
+			while (*s)
+				to_buf(*s++);
+			ttype = TOK_NUMBER;
+			goto out;
+		}
+		unfetch_byte(c);
+		ttype = TOK_IDENT;
+		strbuf = lw_strdup(parse_identifier('L'));
+		goto out;
+	}
+	else if (is_sidchr(c))
+	{
+		// identifier of some kind
+		strbuf = lw_strdup(parse_identifier(c));
+		ttype = TOK_IDENT;
+	}
+	else
+	{
+ttypegen:
+		ttype = TOK_CHAR;
+		to_buf(c);
+		
+		switch (c)
+		{
+		case '/':
+			ttype = TOK_DIV;
+			break;
+		
+		case '*':
+			ttype = TOK_MUL;
+			break;
+		
+		case '+':
+			ttype = TOK_ADD;
+			break;
+		
+		case '-':
+			ttype = TOK_SUB;
+			break;
+		
+		case '<':
+			c = fetch_byte();
+			if (c == '=')
+				ttype = TOK_LE;
+			else
+			{
+				ttype = TOK_LT;
+				unfetch_byte(c);
+			}
+			break;
+		
+		case '>':
+			c = fetch_byte();
+			if (c == '=')
+				ttype = TOK_GE;
+			else
+			{
+				ttype = TOK_GT;
+				unfetch_byte(c);
+			}
+			break;
+		
+		case '=':
+			c = fetch_byte();
+			if (c == '=')
+				ttype = TOK_EQ;
+			else
+				unfetch_byte(c);
+			break;
+			
+		case '!':
+			c = fetch_byte();
+			if (c == '=')
+				ttype = TOK_NE;
+			else
+			{
+				ttype = TOK_BNOT;
+				unfetch_byte(c);
+			}
+			break;
+			
+		case '&':
+			c = fetch_byte();
+			if (c == '&')
+				ttype = TOK_BAND;
+			else
+				unfetch_byte(c);
+			break;
+		
+		case '|':
+			c = fetch_byte();
+			if (c == '|')
+				ttype = TOK_BOR;
+			else
+				unfetch_byte(c);
+			break;
+				
+		case '(':
+			ttype = TOK_OPAREN;
+			break;
+			
+		case ')':
+			ttype = TOK_CPAREN;
+			break;
+			
+		}
+		goto out;
+	}
+	
+out:
+	curtok.ttype = ttype;
+	curtok.strval = strbuf;
+}
+
+void next_token_nws(void)
+{
+	do
+	{
+		next_token();
+	} while (curtok.ttype == TOK_WSPACE);
+}
+
+
+/*
+evaluate an expression. Return true if expression is true, false if it
+is false. Expression ends at the end of the line. Enter at eval_expr().
+   
+eval_term_real() evaluates a term in the expression. eval_expr_real() is
+the main expression evaluator.
+*/
+
+int eval_expr(void)
+{
+	skip_eol();
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lwcc/cpp/symbol.c	Tue Sep 10 19:56:05 2013 -0600
@@ -0,0 +1,79 @@
+/*
+lwcc/cpp/symbol.c
+
+Copyright © 2013 William Astle
+
+This file is part of LWTOOLS.
+
+LWTOOLS is free software: you can redistribute it and/or modify it under the
+terms of the GNU General Public License as published by the Free Software
+Foundation, either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <lw_alloc.h>
+#include <lw_string.h>
+
+#include "cpp.h"
+
+struct symtab_e *symtab_head = NULL;
+
+struct symtab_e *symbol_find(const char *s)
+{
+	struct symtab_e *r;
+	
+	for (r = symtab_head; r; r = r -> next)
+		if (strcmp(r -> name, s) == 0)
+			return r;
+	return NULL;
+}
+
+void symbol_free(struct symtab_e *r)
+{
+	lw_free(r -> name);
+	lw_free(r -> strval);
+	lw_free(r);
+}
+
+void symbol_undef(const char *s)
+{
+	struct symtab_e *r, **p;
+	
+	p = &symtab_head;
+	for (r = symtab_head; r; r = r -> next)
+	{
+		if (strcmp(r -> name, s) == 0)
+		{
+			*p = r -> next;
+			symbol_free(r);
+			return;
+		}
+		p = &(r -> next);
+	}
+}
+
+struct symtab_e *symbol_add(const char *s, const char *str, int nargs, int vargs)
+{
+	struct symtab_e *r;
+
+	r = lw_alloc(sizeof (struct symtab_e));
+	*r = (struct symtab_e){
+		.name = lw_strdup(s),
+		.strval = lw_strdup(str),
+		.nargs = nargs,
+		.vargs = vargs,
+		.next = symtab_head };
+	symtab_head = r;
+	return r;
+}