Mercurial > hg > index.cgi
comparison lwcc/parse.c @ 314:a3e277c58df9 ccdev
Checkpoint parser development for lwcc
Beginning of lemon based parser for C including all the infrastructure for
calling the lemon generated parser. This requires a translation layer from
the preprocessor token numbers to the lemon parser token numbers due to the
fact that lemon wants to control the token numbers. Eventually when the
lemon parser gets replaced with a hand crafted recursive descent parser,
this translation will no longer be required. However, in the interest of
getting things working sooner rather than later, using something like lemon
is beneficial.
author | William Astle <lost@l-w.ca> |
---|---|
date | Sun, 17 Nov 2013 11:59:36 -0700 |
parents | 41118fb0a8f2 |
children |
comparison
equal
deleted
inserted
replaced
313:73b2bfa17ab0 | 314:a3e277c58df9 |
---|---|
17 | 17 |
18 You should have received a copy of the GNU General Public License along with | 18 You should have received a copy of the GNU General Public License along with |
19 this program. If not, see <http://www.gnu.org/licenses/>. | 19 this program. If not, see <http://www.gnu.org/licenses/>. |
20 */ | 20 */ |
21 | 21 |
22 #include <stdio.h> | |
23 #include <string.h> | |
24 #include <lw_alloc.h> | |
25 #include <lw_string.h> | |
26 | |
22 #include "cpp.h" | 27 #include "cpp.h" |
23 #include "tree.h" | 28 #include "tree.h" |
29 #include "parse.h" | |
30 | |
31 #include "parse_c.h" | |
32 | |
33 | |
34 void *Parse(void *parser, int tokid, struct tokendata *tdata, struct parserinfo *pi); | |
35 void *ParseAlloc(void *(*alloc)(size_t size)); | |
36 void ParseFree(void *parser, void (*free)(void *ptr)); | |
37 | |
38 void tokendata_free(struct tokendata *td) | |
39 { | |
40 if (td) | |
41 { | |
42 if (td -> strval) | |
43 lw_free(td -> strval); | |
44 lw_free(td); | |
45 } | |
46 } | |
47 | |
48 extern char *ptoken_names[]; | |
49 char *tokendata_name(struct tokendata *td) | |
50 { | |
51 if (td -> tokid < 0) | |
52 return "****UNKNOWN****"; | |
53 return ptoken_names[td -> tokid]; | |
54 } | |
55 | |
56 void tokendata_print(FILE *fp, struct tokendata *td) | |
57 { | |
58 fprintf(fp, "TOKEN: %s", tokendata_name(td)); | |
59 if (td -> strval) | |
60 fprintf(fp, " \"%s\"", td -> strval); | |
61 fprintf(fp, "\n"); | |
62 } | |
63 | |
64 #define TOK_KW_IF -1 | |
65 #define TOK_KW_ELSE -2 | |
66 #define TOK_KW_WHILE -3 | |
67 #define TOK_KW_DO -4 | |
68 #define TOK_KW_FOR -5 | |
69 #define TOK_KW_VOID -6 | |
70 #define TOK_KW_INT -7 | |
71 #define TOK_KW_CHAR -8 | |
72 #define TOK_KW_SHORT -9 | |
73 #define TOK_KW_LONG -10 | |
74 #define TOK_KW_UNSIGNED -11 | |
75 #define TOK_KW_SIGNED -12 | |
76 #define TOK_KW_FLOAT -13 | |
77 #define TOK_KW_DOUBLE -14 | |
78 #define TOK_KW_STRUCT -15 | |
79 #define TOK_KW_UNION -16 | |
80 #define TOK_KW_TYPEDEF -17 | |
81 #define TOK_KW_STATIC -18 | |
82 #define TOK_KW_SWITCH -19 | |
83 #define TOK_KW_CASE -20 | |
84 #define TOK_KW_DEFAULT -21 | |
85 #define TOK_KW_BREAK -22 | |
86 #define TOK_KW_CONTINUE -23 | |
87 #define TOK_KW_CONST -24 | |
88 #define TOK_KW_AUTO -25 | |
89 #define TOK_KW_ENUM -26 | |
90 #define TOK_KW_REGISTER -27 | |
91 #define TOK_KW_SIZEOF -28 | |
92 #define TOK_KW_VOLATILE -29 | |
93 #define TOK_KW_RETURN -30 | |
94 #define TOK_KW_EXTERN -31 | |
95 #define TOK_KW_GOTO -32 | |
96 #define TOK_TYPENAME -100 | |
97 | |
98 static struct { int tok; char *word; } keyword_list[] = { | |
99 { TOK_KW_IF, "if" }, | |
100 { TOK_KW_ELSE, "else" }, | |
101 { TOK_KW_WHILE, "while" }, | |
102 { TOK_KW_DO, "do" }, | |
103 { TOK_KW_FOR, "for" }, | |
104 { TOK_KW_VOID, "void" }, | |
105 { TOK_KW_INT, "int" }, | |
106 { TOK_KW_CHAR, "char" }, | |
107 { TOK_KW_SHORT, "short" }, | |
108 { TOK_KW_LONG, "long" }, | |
109 { TOK_KW_UNSIGNED, "unsigned" }, | |
110 { TOK_KW_SIGNED, "signed" }, | |
111 { TOK_KW_FLOAT, "float" }, | |
112 { TOK_KW_DOUBLE, "double" }, | |
113 { TOK_KW_STRUCT, "struct" }, | |
114 { TOK_KW_UNION, "union" }, | |
115 { TOK_KW_TYPEDEF, "typedef" }, | |
116 { TOK_KW_STATIC, "static" }, | |
117 { TOK_KW_SWITCH, "switch" }, | |
118 { TOK_KW_CASE, "case" }, | |
119 { TOK_KW_DEFAULT, "default" }, | |
120 { TOK_KW_BREAK, "break" }, | |
121 { TOK_KW_CONTINUE, "continue" }, | |
122 { TOK_KW_CONST, "const" }, | |
123 { TOK_KW_AUTO, "auto" }, | |
124 { TOK_KW_ENUM, "enum" }, | |
125 { TOK_KW_REGISTER, "register" }, | |
126 { TOK_KW_SIZEOF, "sizeof" }, | |
127 { TOK_KW_VOLATILE, "volatile" }, | |
128 { TOK_KW_RETURN, "return" }, | |
129 { TOK_KW_EXTERN, "extern" }, | |
130 { TOK_KW_GOTO, "goto" }, | |
131 { 0, "" } | |
132 }; | |
133 | |
134 struct token *parse_next(struct preproc_info *pp) | |
135 { | |
136 struct token *tok; | |
137 int i; | |
138 | |
139 for (;;) | |
140 { | |
141 tok = preproc_next(pp); | |
142 if (tok -> ttype == TOK_WSPACE) | |
143 continue; | |
144 if (tok -> ttype == TOK_EOL) | |
145 continue; | |
146 if (tok -> ttype == TOK_CHAR) | |
147 { | |
148 // random character | |
149 fprintf(stderr, "Random character %02x\n", tok -> strval[0]); | |
150 if (tok -> strval[0] < 32 || tok -> strval[0] > 126) | |
151 continue; | |
152 } | |
153 break; | |
154 } | |
155 if (tok -> ttype == TOK_IDENT) | |
156 { | |
157 /* convert identifier tokens to their respective meanings */ | |
158 for (i = 0; keyword_list[i].tok != TOK_NONE; i++) | |
159 { | |
160 if (strcmp(keyword_list[i].word, tok -> strval) == 0) | |
161 { | |
162 tok -> ttype = keyword_list[i].tok; | |
163 goto out; | |
164 } | |
165 } | |
166 /* check for a registered type here */ | |
167 } | |
168 out: | |
169 fprintf(stderr, "Lexed: "); | |
170 token_print(tok, stderr); | |
171 fprintf(stderr, " (%d)\n", tok -> ttype); | |
172 return tok; | |
173 } | |
174 | |
175 static struct { | |
176 int tokid; | |
177 int ttype; | |
178 } toktable[] = { | |
179 { PTOK_IDENTIFIER, TOK_IDENT }, | |
180 { PTOK_ENDS, TOK_EOS }, | |
181 { PTOK_KW_INT, TOK_KW_INT }, | |
182 { PTOK_KW_LONG, TOK_KW_LONG }, | |
183 { PTOK_KW_SHORT, TOK_KW_SHORT }, | |
184 { PTOK_KW_CHAR, TOK_KW_CHAR }, | |
185 { PTOK_KW_SIGNED, TOK_KW_SIGNED }, | |
186 { PTOK_KW_UNSIGNED, TOK_KW_UNSIGNED }, | |
187 { PTOK_STAR, TOK_STAR }, | |
188 { PTOK_KW_VOID, TOK_KW_VOID }, | |
189 { PTOK_KW_FLOAT, TOK_KW_FLOAT }, | |
190 { PTOK_KW_DOUBLE, TOK_KW_DOUBLE }, | |
191 { PTOK_OBRACE, TOK_OBRACE }, | |
192 { PTOK_CBRACE, TOK_CBRACE }, | |
193 { PTOK_OPAREN, TOK_OPAREN }, | |
194 { PTOK_CPAREN, TOK_CPAREN }, | |
195 { 0, 0 } | |
196 }; | |
197 | |
198 static int lookup_ptok(int ttype) | |
199 { | |
200 int i; | |
201 for (i = 0; toktable[i].tokid != 0; i++) | |
202 if (toktable[i].ttype == ttype) | |
203 return toktable[i].tokid; | |
204 return -1; | |
205 } | |
24 | 206 |
25 node_t *parse_program(struct preproc_info *pp) | 207 node_t *parse_program(struct preproc_info *pp) |
26 { | 208 { |
27 return node_create(NODE_NONE); | 209 struct token *tok; |
28 } | 210 struct tokendata *td; |
211 struct parserinfo pi = { NULL }; | |
212 void *parser; | |
213 | |
214 /* the cast below shuts up a warning */ | |
215 parser = ParseAlloc((void *)lw_alloc); | |
216 for (;;) | |
217 { | |
218 tok = parse_next(pp); | |
219 if (tok -> ttype == TOK_EOF) | |
220 break; | |
221 | |
222 td = lw_alloc(sizeof(struct tokendata)); | |
223 td -> strval = NULL; | |
224 td -> numval[0] = 0; | |
225 td -> numval[1] = 0; | |
226 td -> numval[2] = 0; | |
227 td -> numval[3] = 0; | |
228 td -> numval[4] = 0; | |
229 td -> numval[5] = 0; | |
230 td -> numval[6] = 0; | |
231 td -> numval[7] = 0; | |
232 td -> tokid = lookup_ptok(tok -> ttype); | |
233 if (tok -> strval) | |
234 td -> strval = lw_strdup(tok -> strval); | |
235 | |
236 tokendata_print(stderr, td); | |
237 | |
238 Parse(parser, td -> tokid, td, &pi); | |
239 } | |
240 Parse(parser, 0, NULL, &pi); | |
241 ParseFree(parser, lw_free); | |
242 return pi.parsetree; | |
243 } |