comparison lwcc/preproc.c @ 298:6112c67728ba ccdev

Add stringification and token concatenation Add support for # and ## in macro expansion by the preprocessor (stringification and token concatenation). Totally untested.
author William Astle <lost@l-w.ca>
date Sat, 14 Sep 2013 22:42:53 -0600
parents 310df72c641d
children 856caf91ffaa
comparison
equal deleted inserted replaced
297:310df72c641d 298:6112c67728ba
30 #include "token.h" 30 #include "token.h"
31 31
32 static int expand_macro(struct preproc_info *, char *); 32 static int expand_macro(struct preproc_info *, char *);
33 static void process_directive(struct preproc_info *); 33 static void process_directive(struct preproc_info *);
34 static long eval_expr(struct preproc_info *); 34 static long eval_expr(struct preproc_info *);
35 extern struct token *preproc_lex_next_token(struct preproc_info *);
36
35 37
36 struct token *preproc_next_processed_token(struct preproc_info *pp) 38 struct token *preproc_next_processed_token(struct preproc_info *pp)
37 { 39 {
38 struct token *ct; 40 struct token *ct;
39 41
763 } 765 }
764 766
765 /* 767 /*
766 Below here is the logic for expanding a macro 768 Below here is the logic for expanding a macro
767 */ 769 */
770 static char *stringify(struct token *tl)
771 {
772 struct strbuf *s;
773 int ws = 0;
774
775 s = strbuf_new();
776 strbuf_add(s, '"');
777
778 while (tl && tl -> ttype == TOK_WSPACE)
779 tl = tl -> next;
780
781 for (; tl; tl = tl -> next)
782 {
783 if (tl -> ttype == TOK_WSPACE)
784 {
785 ws = 1;
786 continue;
787 }
788 if (ws)
789 {
790 strbuf_add(s, ' ');
791 }
792 for (ws = 0; tl -> strval[ws]; ws++)
793 {
794 if (tl -> ttype == TOK_STRING || tl -> ttype == TOK_CHR_LIT)
795 {
796 if (tl -> strval[ws] == '"' || tl -> strval[ws] == '\\')
797 strbuf_add(s, '\\');
798 }
799 }
800 ws = 0;
801 }
802
803 strbuf_add(s, '"');
804 return strbuf_end(s);
805 }
806
807 /* return list to tokens as a result of ## expansion */
808 static struct token *paste_tokens(struct preproc_info *pp, struct symtab_e *s, struct token **arglist, struct token *t1, struct token *t2)
809 {
810 struct token *rl = NULL, *rlt;
811 struct token *s1, *s2;
812 struct token *ws;
813 int i;
814 char *tstr;
815
816 if (t1 -> ttype == TOK_IDENT)
817 {
818 if (strcmp(t1 -> strval, "__VA_ARGS__") == 0)
819 {
820 i = s -> nargs;
821 }
822 else
823 {
824 for (i = 0; i < s -> nargs; i++)
825 {
826 if (strcmp(s -> params[i], t1 -> strval) == 0)
827 break;
828 }
829 }
830 if ((i == s -> nargs) && !(s -> vargs))
831 {
832 s1 = token_dup(t1);
833 }
834 else
835 {
836 /* find last non-whitespace token */
837 ws = NULL;
838 for (t1 = s -> tl; t1; t1 = t1 -> next)
839 {
840 if (t1 -> ttype != TOK_WSPACE)
841 ws = t1;
842 }
843 if (!ws)
844 {
845 s1 = NULL;
846 }
847 else
848 {
849 if (ws != s -> tl)
850 {
851 /* output extra tokens */
852 for (t1 = s -> tl; t1 -> next != ws; t1 = t1 -> next)
853 {
854 if (!rl)
855 {
856 rl = token_dup(t1);
857 rlt = rl;
858 }
859 else
860 {
861 rlt -> next = token_dup(t1);
862 rlt = rlt -> next;
863 }
864 }
865 }
866 s1 = token_dup(ws);
867 }
868 }
869 }
870 else
871 {
872 s1 = token_dup(t1);
873 }
874 if (t2 -> ttype == TOK_IDENT)
875 {
876 if (strcmp(t1 -> strval, "__VA_ARGS__") == 0)
877 {
878 i = s -> nargs;
879 }
880 else
881 {
882 for (i = 0; i < s -> nargs; i++)
883 {
884 if (strcmp(s -> params[i], t1 -> strval) == 0)
885 break;
886 }
887 }
888 if ((i == s -> nargs) && !(s -> vargs))
889 {
890 s2 = token_dup(t2);
891 t2 = NULL;
892 }
893 else
894 {
895 /* find last non-whitespace token */
896 ws = NULL;
897 for (t2 = s -> tl; t2; t2 = t2 -> next)
898 {
899 if (t2 -> ttype != TOK_WSPACE)
900 {
901 ws = t2;
902 t2 = t2 -> next;
903 break;
904 }
905 }
906 if (!ws)
907 {
908 s2 = NULL;
909 }
910 else
911 {
912 s2 = token_dup(ws);
913 }
914 }
915 }
916 else
917 {
918 s2 = token_dup(t2);
919 }
920
921 /* here, s1 is NULL if no left operand or a duplicated token for the actual left operand */
922 /* here, s2 is NULL if no right operand or a duplicated token for the actual right operand */
923 /* here, t2 points to a possibly empty list of extra tokens to output after the concatenated tokens */
924 /* here, rl,rlt is a possibly non-empty list of tokens preceding the concatenation */
925
926 /* tokens combine if the combination exactly matches "combinelist", in which case the string values are
927 concatenated and the new token type is used to create a new token. If the tokens do not combine,
928 s1 and s2 are returned in sequence. */
929
930 if (!s1 && s2)
931 {
932 if (!rl)
933 rl = s2;
934 else
935 rlt -> next = s2;
936 rlt = s2;
937 }
938 else if (s1 && !s2)
939 {
940 if (!rl)
941 rl = s1;
942 else
943 rlt -> next = s1;
944 rlt = s1;
945 }
946 else if (s1 && s2)
947 {
948 tstr = lw_alloc(strlen(s1 -> strval) + strlen(s2 -> strval) + 1);
949 strcpy(tstr, s1 -> strval);
950 strcat(tstr, s2 -> strval);
951 /* now try to lex the string */
952 pp -> lexstr = tstr;
953 pp -> lexstrloc = 0;
954 t1 = preproc_lex_next_token(pp);
955 if (pp -> lexstr[pp -> lexstrloc])
956 {
957 // doesn't make a new token - pass through the original two
958 if (!rl)
959 rl = s1;
960 else
961 rlt -> next = s1;
962 s1 -> next = s2;
963 rlt = s2;
964 }
965 else
966 {
967 // does make a new token
968 t1 -> fn = s1 -> fn;
969 t1 -> column = s1 -> column;
970 t1 -> lineno = s1 -> lineno;
971 if (!rl)
972 rl = t1;
973 else
974 rlt -> next = t1;
975 rlt = t1;
976 }
977 lw_free(tstr);
978 pp -> lexstr = NULL;
979 }
980
981 /* add in any extra tokens */
982 while (t2)
983 {
984 if (!rl)
985 {
986 rl = token_dup(t2);
987 rlt = rl;
988 }
989 else
990 {
991 rlt -> next = token_dup(t2);
992 rlt = rlt -> next;
993 }
994 t2 = t2 -> next;
995 }
996
997 return rl;
998 }
999
1000
768 static int expand_macro(struct preproc_info *pp, char *mname) 1001 static int expand_macro(struct preproc_info *pp, char *mname)
769 { 1002 {
770 struct symtab_e *s; 1003 struct symtab_e *s;
771 struct token *t, *t2, *t3; 1004 struct token *t, *t2, *t3;
772 struct token **arglist = NULL; 1005 struct token **arglist = NULL;
773 int nargs = 0; 1006 int nargs = 0;
774 struct expand_e *e; 1007 struct expand_e *e;
775 struct token **exparglist = NULL; 1008 struct token **exparglist = NULL;
776 int i; 1009 int i;
777 int pcount; 1010 int pcount;
778 1011 char *tstr;
1012
779 s = symtab_find(pp, mname); 1013 s = symtab_find(pp, mname);
780 if (!s) 1014 if (!s)
781 return 0; 1015 return 0;
782 1016
783 for (e = pp -> expand_list; e; e = e -> next) 1017 for (e = pp -> expand_list; e; e = e -> next)
910 t2 = NULL; 1144 t2 = NULL;
911 t3 = NULL; 1145 t3 = NULL;
912 1146
913 for (t = s -> tl; t; t = t -> next) 1147 for (t = s -> tl; t; t = t -> next)
914 { 1148 {
1149 again:
1150 if (t -> ttype != TOK_WSPACE && t -> next)
1151 {
1152 struct token *ct1, *ct2;
1153
1154 for (ct1 = t -> next; ct1 && ct1 -> ttype == TOK_WSPACE; ct1 = ct1 -> next)
1155 {
1156 if (ct1 -> ttype == TOK_DBLHASH)
1157 {
1158 // possible concatenation here
1159 for (ct2 = ct1 -> next; ct2 && ct2 -> ttype == TOK_WSPACE; ct2 = ct2 -> next)
1160 /* do nothing */ ;
1161 if (ct2)
1162 {
1163 // we have concatenation here so we paste str1 and str2 together and see what we get
1164 // if we get NULL, the past didn't make a valid token
1165 ct1 = paste_tokens(pp, s, arglist, t, ct2);
1166 if (ct1)
1167 {
1168 if (t2)
1169 {
1170 t2 -> next = ct1;
1171 }
1172 else
1173 {
1174 t3 = ct1;
1175 }
1176 for (t2 = ct1; t2 -> next; t2 = t2 -> next)
1177 /* do nothing */ ;
1178
1179 /* because of the level of control structures, move to next token and restart loop */
1180 t = ct2 -> next;
1181 goto again;
1182 }
1183 goto nopaste;
1184 }
1185 }
1186 }
1187 }
1188
1189 nopaste:
1190 if (t -> ttype == TOK_HASH)
1191 {
1192 if (t -> next && t -> next -> ttype == TOK_IDENT)
1193 {
1194 if (strcmp(t -> next -> strval, "__VA_ARGS__") == 0)
1195 {
1196 i = nargs;
1197 }
1198 else
1199 {
1200 for (i = 0; i < nargs; i++)
1201 {
1202 if (strcmp(t -> next -> strval, s -> params[i]) == 0)
1203 break;
1204 }
1205 }
1206 if (!((i == s -> nargs) && !(s -> vargs)))
1207 {
1208 // we have a stringification here
1209 t = t -> next;
1210 tstr = stringify(arglist[i]);
1211 if (t2)
1212 {
1213 t2 = token_create(TOK_STRING, tstr, t -> lineno, t -> column, t -> fn);
1214 t2 = t2 -> next;
1215 }
1216 else
1217 {
1218 t3 = token_create(TOK_STRING, tstr, t -> lineno, t -> column, t -> fn);
1219 t2 = t3;
1220 }
1221 lw_free(tstr);
1222 continue;
1223 }
1224 }
1225 }
915 if (t -> ttype == TOK_IDENT) 1226 if (t -> ttype == TOK_IDENT)
916 { 1227 {
917 /* identifiers might need expansion to arguments */ 1228 /* identifiers might need expansion to arguments */
918 if (strcmp(t -> strval, "__VA_ARGS__") == 0) 1229 if (strcmp(t -> strval, "__VA_ARGS__") == 0)
919 { 1230 {
929 } 1240 }
930 if ((i == s -> nargs) && !(s -> vargs)) 1241 if ((i == s -> nargs) && !(s -> vargs))
931 { 1242 {
932 struct token *te; 1243 struct token *te;
933 // expand argument 1244 // expand argument
934 // FIXME: handle # and ##
935 for (te = exparglist[i]; te; te = te -> next) 1245 for (te = exparglist[i]; te; te = te -> next)
936 { 1246 {
937 if (t2) 1247 if (t2)
938 { 1248 {
939 t2 -> next = token_dup(te); 1249 t2 -> next = token_dup(te);