From a233cc6762b6030fdf6996c46fb0642001c0e036 Mon Sep 17 00:00:00 2001 From: pjht Date: Sun, 16 Sep 2018 16:44:08 -0500 Subject: [PATCH] The big commit! No errors or warnings. Can handle everything but function calls and types other than int --- cinc.xcodeproj/project.pbxproj | 18 +- cinc/ast.c | 18 +- cinc/env.c | 77 +++++ cinc/env.h | 32 +++ cinc/func.c | 36 +++ cinc/func.h | 26 ++ cinc/generate.c | 386 ++++++++++++++++++++++++- cinc/main.c | 27 +- cinc/oplist.txt | 14 + cinc/out.mys | 4 - cinc/out.s | 41 +++ cinc/parser.c | 502 +++++++++++++++++++++++++++++++-- cinc/token.c | 2 +- cinc/token.h | 19 +- cinc/tokenize.c | 99 ++++++- cinc/tokenize.h | 3 + 16 files changed, 1240 insertions(+), 64 deletions(-) create mode 100644 cinc/env.c create mode 100644 cinc/env.h create mode 100644 cinc/func.c create mode 100644 cinc/func.h create mode 100644 cinc/oplist.txt delete mode 100644 cinc/out.mys create mode 100644 cinc/out.s diff --git a/cinc.xcodeproj/project.pbxproj b/cinc.xcodeproj/project.pbxproj index ffca562..ed49c86 100644 --- a/cinc.xcodeproj/project.pbxproj +++ b/cinc.xcodeproj/project.pbxproj @@ -9,6 +9,8 @@ /* Begin PBXBuildFile section */ F61910612142A876003B8798 /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = F61910602142A876003B8798 /* main.c */; }; F61910692142A8C5003B8798 /* tokenize.c in Sources */ = {isa = PBXBuildFile; fileRef = F61910682142A8C5003B8798 /* tokenize.c */; }; + F62F6C0D214B1BFC00EDE8D2 /* func.c in Sources */ = {isa = PBXBuildFile; fileRef = F62F6C0C214B1BFC00EDE8D2 /* func.c */; }; + F6544B112147F086002C78F7 /* env.c in Sources */ = {isa = PBXBuildFile; fileRef = F6544B102147F086002C78F7 /* env.c */; }; F65A954E21454B31005FCAF5 /* token.c in Sources */ = {isa = PBXBuildFile; fileRef = F65A954D21454B31005FCAF5 /* token.c */; }; F661C333214590930021FCCE /* parser.c in Sources */ = {isa = PBXBuildFile; fileRef = F661C332214590930021FCCE /* parser.c */; }; F661C3362145CE760021FCCE /* ast.c in Sources */ = {isa = PBXBuildFile; fileRef = F661C3352145CE760021FCCE /* ast.c */; }; @@ -28,11 +30,14 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ - F606D3182145FAB300E817B6 /* out.mys */ = {isa = PBXFileReference; lastKnownFileType = text; path = out.mys; sourceTree = ""; }; F619105D2142A876003B8798 /* cinc */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = cinc; sourceTree = BUILT_PRODUCTS_DIR; }; F61910602142A876003B8798 /* main.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = ""; }; F61910672142A8C5003B8798 /* tokenize.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tokenize.h; sourceTree = ""; }; F61910682142A8C5003B8798 /* tokenize.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = tokenize.c; sourceTree = ""; }; + F62F6C0B214B1BFC00EDE8D2 /* func.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = func.h; sourceTree = ""; }; + F62F6C0C214B1BFC00EDE8D2 /* func.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = func.c; sourceTree = ""; }; + F6544B0F2147F086002C78F7 /* env.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = env.h; sourceTree = ""; }; + F6544B102147F086002C78F7 /* env.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = env.c; sourceTree = ""; }; F65A954D21454B31005FCAF5 /* token.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = token.c; sourceTree = ""; }; F661C331214590930021FCCE /* parser.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parser.h; sourceTree = ""; }; F661C332214590930021FCCE /* parser.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = parser.c; sourceTree = ""; }; @@ -41,6 +46,7 @@ F661C3372145D9C40021FCCE /* generate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = generate.h; sourceTree = ""; }; F661C3382145D9C40021FCCE /* generate.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = generate.c; sourceTree = ""; }; F661C33A2145DE300021FCCE /* token.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = token.h; sourceTree = ""; }; + F6B6C137214712F3008F5230 /* oplist.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = oplist.txt; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -84,7 +90,11 @@ F661C3352145CE760021FCCE /* ast.c */, F661C3372145D9C40021FCCE /* generate.h */, F661C3382145D9C40021FCCE /* generate.c */, - F606D3182145FAB300E817B6 /* out.mys */, + F62F6C0B214B1BFC00EDE8D2 /* func.h */, + F62F6C0C214B1BFC00EDE8D2 /* func.c */, + F6B6C137214712F3008F5230 /* oplist.txt */, + F6544B0F2147F086002C78F7 /* env.h */, + F6544B102147F086002C78F7 /* env.c */, ); path = cinc; sourceTree = ""; @@ -146,7 +156,9 @@ buildActionMask = 2147483647; files = ( F65A954E21454B31005FCAF5 /* token.c in Sources */, + F6544B112147F086002C78F7 /* env.c in Sources */, F661C3362145CE760021FCCE /* ast.c in Sources */, + F62F6C0D214B1BFC00EDE8D2 /* func.c in Sources */, F61910692142A8C5003B8798 /* tokenize.c in Sources */, F661C333214590930021FCCE /* parser.c in Sources */, F661C3392145D9C40021FCCE /* generate.c in Sources */, @@ -270,6 +282,7 @@ isa = XCBuildConfiguration; buildSettings = { CODE_SIGN_STYLE = Automatic; + "EXCLUDED_SOURCE_FILE_NAMES[arch=*]" = out.s; PRODUCT_NAME = "$(TARGET_NAME)"; }; name = Debug; @@ -278,6 +291,7 @@ isa = XCBuildConfiguration; buildSettings = { CODE_SIGN_STYLE = Automatic; + "EXCLUDED_SOURCE_FILE_NAMES[arch=*]" = out.s; PRODUCT_NAME = "$(TARGET_NAME)"; }; name = Release; diff --git a/cinc/ast.c b/cinc/ast.c index bfc5428..c9fc68d 100644 --- a/cinc/ast.c +++ b/cinc/ast.c @@ -39,17 +39,23 @@ void print_tree(AstNode* root,int tabLevel) { printf(" "); } printf("Child %d: ",i); - printf("%s\n",root->children[i]->data); - print_tree(root->children[i],tabLevel+1); + if (root->children[i]) { + printf("%s\n",root->children[i]->data); + print_tree(root->children[i],tabLevel+1); + } else { + printf("NULL\n"); + } } } } void free_tree(AstNode* root) { - if (root->children) { - for (int i=0;i<(root->num_children);i++) { - free_tree(root->children[i]); + if (root) { + if (root->children) { + for (int i=0;i<(root->num_children);i++) { + free_tree(root->children[i]); + } } + free(root); } - free(root); } diff --git a/cinc/env.c b/cinc/env.c new file mode 100644 index 0000000..82d4347 --- /dev/null +++ b/cinc/env.c @@ -0,0 +1,77 @@ +// +// env.c +// cinc +// +// Created by Peter Terpstra on 9/11/18. +// Copyright © 2018 Peter Terpstra. All rights reserved. +// + +#include "env.h" +#include +#include +Env* new_env(Env* prev) { + Env* env=malloc(sizeof(Env)); + env->next=NULL; + env->prev=prev; + env->max_els=100; + env->num_els=0; + env->offset=8; + env->offsets=malloc(sizeof(int)*100); + env->varnames=malloc(sizeof(const char*)*100); + env->types=malloc(sizeof(const char*)*100); + env->contlabel=NULL; + env->breaklabel=NULL; + if (prev) { + env->offset=prev->offset; + env->contlabel=prev->contlabel; + env->breaklabel=prev->breaklabel; + prev->next=env; + } + return env; +} + +void add_entry(Env* env,const char* type,const char* name) { + if (env->num_els==env->max_els) { + env->offsets=realloc(env->offsets,sizeof(int)*(100+env->num_els)); + env->varnames=realloc(env->varnames,sizeof(const char*)*(100+env->num_els)); + env->types=realloc(env->types,sizeof(const char*)*(100+env->num_els)); + } + env->varnames[env->num_els]=name; + env->types[env->num_els]=name; + env->offsets[env->num_els]=env->offset; + env->num_els++; + env->offset+=8; +} + +int get_offset(Env* env,const char* name) { + for(int i=0;i<(env->num_els);i++) { + if(strcmp(env->varnames[i], name)==0) { + return env->offsets[i]; + } + } + if (env->prev) { + return get_offset(env->prev, name); + } + return -1; +} + +const char* get_type(Env* env,const char* name) { + for(int i=0;i<(env->num_els);i++) { + if(strcmp(env->varnames[i], name)==0) { + return env->types[i]; + } + } + if (env->prev) { + return get_type(env->prev, name); + } + return NULL; +} + +void free_env(Env* env) { + if (env->next) { + free_env(env->next); + } + free(env->offsets); + free(env->varnames); + free(env); +} diff --git a/cinc/env.h b/cinc/env.h new file mode 100644 index 0000000..6ffabd0 --- /dev/null +++ b/cinc/env.h @@ -0,0 +1,32 @@ +// +// env.h +// cinc +// +// Created by Peter Terpstra on 9/11/18. +// Copyright © 2018 Peter Terpstra. All rights reserved. +// + +#ifndef env_h +#define env_h + +struct _env { + const char** varnames; + const char** types; + int* offsets; + int num_els; + int max_els; + int offset; + struct _env* next; + struct _env* prev; + char* contlabel; + char* breaklabel; +}; + +typedef struct _env Env; + +Env* new_env(Env* prev); +void add_entry(Env* env,const char* type,const char* name); +int get_offset(Env* env,const char* name); +void free_env(Env* env); + +#endif /* env_h */ diff --git a/cinc/func.c b/cinc/func.c new file mode 100644 index 0000000..fd0c21b --- /dev/null +++ b/cinc/func.c @@ -0,0 +1,36 @@ +// +// func.c +// cinc +// +// Created by Peter Terpstra on 9/13/18. +// Copyright © 2018 Peter Terpstra. All rights reserved. +// + +#include "func.h" +#include +#include + +Arg* make_arg(const char* type,const char* name) { + Arg* arg=malloc(sizeof(Arg)); + arg->type=type; + arg->name=name; + return arg; +} + +void free_arg(Arg* arg) { + free(arg); +} + +Func* make_func(const char* name,bool defined,int nargs,Arg* args) { + Func* func=malloc(sizeof(Func)); + func->name=name; + func->defined=defined; + func->nargs=nargs; + func->args=args; + return func; +} + +void free_func(Func* func) { + free(func->args); + free(func); +} diff --git a/cinc/func.h b/cinc/func.h new file mode 100644 index 0000000..17e6241 --- /dev/null +++ b/cinc/func.h @@ -0,0 +1,26 @@ +// +// func.h +// cinc +// +// Created by Peter Terpstra on 9/13/18. +// Copyright © 2018 Peter Terpstra. All rights reserved. +// + +#ifndef func_h +#define func_h + +#include + +typedef struct Arg { + const char* type; + const char* name; +} Arg; + +typedef struct Func { + const char* name; + bool defined; + int nargs; + Arg* args; +} Func; + +#endif /* func_h */ diff --git a/cinc/generate.c b/cinc/generate.c index dc995c8..e3e8afc 100644 --- a/cinc/generate.c +++ b/cinc/generate.c @@ -8,45 +8,409 @@ #include "generate.h" #include "ast.h" +#include "env.h" #include #include #include static char* prg_asm; +static int prgsize=0; +static int nextlabel=0; +Env* env=NULL; + +void prg_add(const char* str) { + unsigned long len=strlen(str); + prg_asm=realloc(prg_asm, prgsize+len+1); + prgsize+=len; + strncat(prg_asm,str,len); + +} + +static char* next_label() { + char* buf=malloc(sizeof(char)*4097); + buf[0]='l'; + int written=snprintf(buf+1, 4096, "%d",nextlabel); + if(written<0 || written>=4096) { + printf("Error: was not able to convert next label number to a string.\n"); + exit(1); + } + nextlabel++; + return buf; +} static void generate_expr(AstNode* ast) { const char* type=ast->data; - if(strcmp("num", type)==0) { - strncat(prg_asm, "mov rax,", 8); - strncat(prg_asm,ast->children[0]->data,strlen(ast->children[0]->data)); - strncat(prg_asm, "\n", 1); + if (strcmp("num", type)==0) { + prg_add("mov rax,"); + prg_add(ast->children[0]->data); + prg_add("\n"); + return; + } else if (strcmp("var", type)==0) { + prg_add("mov rax,[rbp-"); + char buf[4096]; + int offset=get_offset(env, ast->children[0]->data); + if (offset==-1) { + printf("Error: no such variable %s\n",ast->children[0]->data); + exit(1); + } + int written=snprintf(buf, 4096, "%d",offset); + if(written<0 || written>=4096) { + printf("Error: was not able to convert offset for variable %s to a string.\n",ast->children[0]->data); + exit(1); + } + prg_add(buf); + prg_add("]\n"); + return; + } else if (strcmp("neg", type)==0) { + generate_expr(ast->children[0]); + prg_add("neg rax\n"); + return; + } else if (strcmp("not", type)==0) { + generate_expr(ast->children[0]); + prg_add("cmp rax,0\nmove rax,0\nsete al\n"); + return; + } else if (strcmp("comp", type)==0) { + generate_expr(ast->children[0]); + prg_add("not rax\n"); + return; + } else if (strcmp("preinc", type)==0 || strcmp("postinc", type)==0 ) { + prg_add("mov rax,[rbp-"); + char buf[4096]; + int offset=get_offset(env, ast->children[0]->children[0]->data); + if (offset==-1) { + printf("Error: no such variable %s\n",ast->children[0]->children[0]->data); + exit(1); + } + int written=snprintf(buf, 4096, "%d",offset); + if (written<0 || written>=4096) { + printf("Error: was not able to convert offset for variable %s to a string.\n",ast->children[0]->data); + exit(1); + } + prg_add(buf); + prg_add("]\ninc rax\nmov [rbp-"); + prg_add(buf); + prg_add("],rax\n"); + if (strcmp("postinc", type)==0) { + prg_add("dec rax\n"); + } + return; + } else if (strcmp("predec", type)==0 || strcmp("postdec", type)==0) { + prg_add("mov rax,[rbp-"); + char buf[4096]; + int offset=get_offset(env, ast->children[0]->children[0]->data); + if (offset==-1) { + printf("Error: no such variable %s\n",ast->children[0]->children[0]->data); + exit(1); + } + int written=snprintf(buf, 4096, "%d",offset); + if (written<0 || written>=4096) { + printf("Error: was not able to convert offset for variable %s to a string.\n",ast->children[0]->data); + exit(1); + } + prg_add("]\ndec rax\nmov [rbp-"); + prg_add(buf); + prg_add("],rax\n"); + if (strcmp("postdec", type)==0) { + prg_add("inc rax\n"); + } + return; + } else if (strcmp("cond", type)==0) { + char* e3=next_label(); + char* post=next_label(); + generate_expr(ast->children[0]); + prg_add("cmp rax,0\n"); + prg_add("je "); + prg_add(e3); + prg_add("\n"); + generate_expr(ast->children[1]); + prg_add("jmp "); + prg_add(post); + prg_add("\n"); + prg_add(e3); + prg_add(":\n"); + generate_expr(ast->children[2]); + prg_add(post); + prg_add(":\n"); + free(e3); + free(post); + return; + } else if (strcmp("assign", type)==0) { + generate_expr(ast->children[1]); + prg_add("mov [rbp-"); + char buf[4096]; + int offset=get_offset(env, ast->children[0]->data); + if (offset==-1) { + printf("Error: no such variable %s\n",ast->children[0]->data); + exit(1); + } + int written=snprintf(buf, 4096, "%d",offset); + if(written<0 || written>=4096) { + printf("Error: was not able to convert offset for variable %s to a string.\n",ast->children[0]->data); + exit(1); + } + prg_add(buf); + prg_add("],rax\n"); + return; + } + generate_expr(ast->children[0]); + prg_add("push rax\n"); + generate_expr(ast->children[1]); + prg_add("pop rcx\n"); + if (strcmp(type,"add")==0) { + prg_add("add rax,rcx\n"); + } else if (strcmp(type,"sub")==0) { + prg_add("sub rcx,rax\n"); + prg_add("mov rax,rcx\n"); + } else if (strcmp(type,"mul")==0) { + prg_add("imul rax,rcx\n"); + } else if (strcmp(type,"div")==0 || strcmp(type,"mod")==0) { + prg_add("mov rdx,0\npush rcx\npush rax\npop rcx\npop rax\nidiv rcx\n"); + if (strcmp(type,"mod")==0) { + prg_add("mov rax,rdx\n"); + } + } else if (strcmp(type, "and")==0) { + prg_add("and rcx,rax\n"); + } else if (strcmp(type, "or")==0) { + prg_add("or rax,rcx\n"); + } else if (strcmp(type, "xor")==0) { + prg_add("xor rax,rcx\n"); + } else if (strcmp(type, "land")==0) { + prg_add("cmp rcx,0\nsetne cl\ncmp rax,0\nmov rax,0\nsetne al\nand al,cl\n"); + } else if (strcmp(type, "lor")==0) { + prg_add("or rax,rcx\nmov rax,0\nsetne al\n"); + } else if (strcmp(type, "sal")==0) { + prg_add("sal rax,rcx\n"); + } else if (strcmp(type, "sar")==0) { + prg_add("sar rax,rcx\n"); + } else if (strcmp(type, "eq")==0) { + prg_add("cmp rcx,rax\nmov rax,0\nsete al\n"); + } else if (strcmp(type, "ne")==0) { + prg_add("cmp rcx,rax\nmov rax,0\nsetne al\n"); + } else if (strcmp(type, "lt")==0) { + prg_add("cmp rcx,rax\nmov rax,0\nsetl al\n"); + } else if (strcmp(type, "le")==0) { + prg_add("cmp rcx,rax\nmov rax,0\nsetle al\n"); + } else if (strcmp(type, "gt")==0) { + prg_add("cmp rcx,rax\nmov rax,0\nsetg al\n"); + } else if (strcmp(type, "ge")==0) { + prg_add("cmp rcx,rax\nmov rax,0\nsetge al\n"); + } else { + printf("Unknown expr type %s\n",type); + exit(1); } } +static void generate_block(AstNode* ast); static void generate_statement(AstNode* ast) { const char* type=ast->data; - if(strcmp("return", type)==0) { + if (strcmp("return", type)==0) { generate_expr(ast->children[0]); - strncat(prg_asm, "ret\n", 4); + prg_add("mov rsp,rbp\npop rbp\nret\n"); + } else if (strcmp("vardec", type)==0) { + if (get_offset(env, ast->children[1]->data)!=-1) { + printf("Error: Redeclaration of variable %s\n",ast->children[1]->data); + exit(1); + } + add_entry(env,ast->children[0]->data,ast->children[1]->data); + prg_add("push 0\n"); + } else if (strcmp("vardecinitial", type)==0) { + if (get_offset(env, ast->children[1]->data)!=-1) { + printf("Error: Redeclaration of variable %s\n",ast->children[1]->data); + exit(1); + } + add_entry(env,ast->children[0]->data,ast->children[1]->data); + generate_expr(ast->children[2]); + prg_add("push rax\n"); + } else if (strcmp("if", type)==0) { + char* post=next_label(); + generate_expr(ast->children[0]); + prg_add("cmp rax,0\n"); + prg_add("je "); + prg_add(post); + prg_add("\n"); + generate_statement(ast->children[1]); + prg_add(post); + prg_add(":\n"); + free(post); + return; + } else if (strcmp("ifelse", type)==0) { + char* els=next_label(); + char* post=next_label(); + generate_expr(ast->children[0]); + prg_add("cmp rax,0\n"); + prg_add("je "); + prg_add(els); + prg_add("\n"); + generate_statement(ast->children[1]); + prg_add("jmp "); + prg_add(post); + prg_add("\n"); + prg_add(els); + prg_add(":\n"); + generate_statement(ast->children[2]); + prg_add(post); + prg_add(":\n"); + free(els); + free(post); + return; + } else if (strcmp("while", type)==0) { + char* begin=next_label(); + char* end=next_label(); + env->breaklabel=end; + env->contlabel=begin; + prg_add(begin); + prg_add(":\n"); + generate_expr(ast->children[0]); + prg_add("je "); + prg_add(end); + prg_add("\n"); + generate_statement(ast->children[1]); + prg_add("jmp "); + prg_add(begin); + prg_add("\n"); + prg_add(end); + prg_add(":\n"); + env->contlabel=NULL; + env->breaklabel=NULL; + free(begin); + free(end); + } else if (strcmp("dowhile", type)==0) { + char* begin=next_label(); + char* end=next_label(); + env->breaklabel=end; + env->contlabel=begin; + prg_add(begin); + prg_add(":\n"); + generate_statement(ast->children[1]); + generate_expr(ast->children[0]); + prg_add("jne "); + prg_add(begin); + prg_add("\n"); + prg_add(end); + prg_add(":\n"); + env->contlabel=NULL; + env->breaklabel=NULL; + free(end); + free(begin); + } else if (strcmp("for", type)==0) { + char* cond=next_label(); + char* end=next_label(); + char* cont=next_label(); + env->breaklabel=end; + env->contlabel=cont; + env=new_env(env); + if (ast->children[1]) { + if (strcmp(ast->children[0]->data, "decl")==0) { + generate_statement(ast->children[1]); + } else { + generate_expr(ast->children[1]); + } + } + prg_add(cond); + prg_add(":\n"); + if (ast->children[2]) { + generate_expr(ast->children[2]); + } else { + prg_add("mov rax,1\n"); + } + prg_add("je "); + prg_add(end); + prg_add("\n"); + generate_statement(ast->children[4]); + prg_add(cont); + prg_add(":\n"); + if (ast->children[3]) { + generate_expr(ast->children[3]); + } + prg_add("jmp "); + prg_add(cond); + prg_add("\n"); + prg_add(end); + prg_add(":\n"); + int bytes=(env->num_els)*8; + prg_add("add rsp,"); + char buf[4096]; + int written=snprintf(buf, 4096, "%d",bytes); + if(written<0 || written>=4096) { + printf("Error: was not able to convert the bytes to deallocate into a string\n"); + exit(1); + } + prg_add(buf); + prg_add("\n"); + env->contlabel=NULL; + env->breaklabel=NULL; + Env* prev=env->prev; + if (prev) { + prev->next=NULL; + } + free_env(env); + env=prev; + free(cond); + free(end); + free(cont); + } else if (strcmp("break", type)==0) { + if (env->breaklabel) { + prg_add("jmp "); + prg_add(env->breaklabel); + prg_add("\n"); + } else { + printf("Error: break outside of loop\n"); + } + } else if (strcmp("continue", type)==0) { + if (env->breaklabel) { + prg_add("jmp "); + prg_add(env->contlabel); + prg_add("\n"); + } else { + printf("Error: continue outside of loop\n"); + } + } else if (strcmp("block", type)==0) { + generate_block(ast); + } else { + generate_expr(ast); } } static void generate_block(AstNode* ast) { + env=new_env(env); for (int i=0;i<(ast->num_children);i++) { generate_statement(ast->children[i]); } + int bytes=(env->num_els)*8; + prg_add("add rsp,"); + char buf[4096]; + int written=snprintf(buf, 4096, "%d",bytes); + if(written<0 || written>=4096) { + printf("Error: was not able to convert the bytes to deallocate into a string\n"); + exit(1); + } + prg_add(buf); + prg_add("\n"); + Env* prev=env->prev; + if (prev) { + prev->next=NULL; + } + free_env(env); + env=prev; } static void generate_func(AstNode* ast) { const char* name=ast->children[1]->data; - strncat(prg_asm, ".globl _", 8); - strncat(prg_asm, name, strlen(name)); - strncat(prg_asm, "\n_", 2); - strncat(prg_asm, name, strlen(name)); - strncat(prg_asm, ":\n", 2); + prg_add(".globl _"); + prg_add(name); + prg_add("\n_"); + prg_add(name); + prg_add(":\n"); + prg_add("push rbp\nmov rbp,rsp\n"); generate_block(ast->children[2]); + //Generate the implicit return 0: + AstNode* ret=make_node("return"); + AstNode* num=make_node("num"); + add_child(num, make_node("0")); + add_child(ret,num); + generate_statement(ret); } diff --git a/cinc/main.c b/cinc/main.c index c6bb859..5b651cf 100644 --- a/cinc/main.c +++ b/cinc/main.c @@ -14,37 +14,38 @@ #include "parser.h" #include "generate.h" -#define PARSER_DEBUG 0 +#define COMPILER_DEBUG 1 int main(int argc, const char * argv[]) { - char* prgstr="int main() {\n return 173;\n}\n"; - if (PARSER_DEBUG) { + char* prgstr="int main(){int j=0;for(int i=0;i<7;i++){if(i==5){break;}j+=2;}return j;}"; + #if COMPILER_DEBUG printf("Program:\n"); printf("%s",prgstr); - } + #endif Token* tokens=tokenize(prgstr); - if (PARSER_DEBUG) { + #if COMPILER_DEBUG printf("Tokens:\n"); Token* tok=tokens; while (tok) { print_tok(tok); tok=tok->next; } - } + #endif AstNode* ast=parse(tokens); - free_toklist(tokens); - if (PARSER_DEBUG) { + #if COMPILER_DEBUG printf("AST:\n"); print_tree(ast, 0); - } + #endif char* prg=generate_prg(ast); - free_tree(ast); - if (PARSER_DEBUG) { + #if COMPILER_DEBUG printf("Output assembly:\n"); printf("%s",prg); - } - FILE* outfile=fopen("/Users/peterterpstra/Desktop/projects/xcode/cinc/cinc/out.mys","w"); + #endif + FILE* outfile=fopen("/Users/peterterpstra/Desktop/projects/xcode/cinc/cinc/out.s","w"); + //compile with gcc -masm=intel out.s -o out fputs(prg, outfile); fclose(outfile); free(prg); + free_tree(ast); + free_toklist(tokens); } diff --git a/cinc/oplist.txt b/cinc/oplist.txt new file mode 100644 index 0000000..69c63a0 --- /dev/null +++ b/cinc/oplist.txt @@ -0,0 +1,14 @@ +1 () Function call LEX +1 [] Array subscripting LEX +1 . Structure and union member access LEX +1 -> Structure and union member access through pointer TODO +1 (type){list} Compound literal LEX +2 (type) Type cast LEX +2 * Indirection (dereference) LEX +2 & Address-of LEX +2 sizeof Size-of TODO +2 _Alignof Alignment requirement TODO +3 * / % Multiplication, division, and remainder DONE +12 || Logical OR DONE +13 ?: Ternary conditional PARSE +14 = Simple assignment DONE diff --git a/cinc/out.mys b/cinc/out.mys deleted file mode 100644 index 3a33c22..0000000 --- a/cinc/out.mys +++ /dev/null @@ -1,4 +0,0 @@ -.globl _main -_main: -mov rax,173 -ret diff --git a/cinc/out.s b/cinc/out.s new file mode 100644 index 0000000..3e34a97 --- /dev/null +++ b/cinc/out.s @@ -0,0 +1,41 @@ +.globl _main +_main: +push rbp +mov rbp,rsp +mov rax,0 +push rax +mov rax,0 +push rax +l0: +mov rax,[rbp-16] +push rax +mov rax,7 +pop rcx +cmp rcx,rax +mov rax,0 +setl al +je l1 +mov rax,[rbp-8] +push rax +mov rax,2 +pop rcx +add rax,rcx +mov [rbp-8],rax +add rsp,0 +l2: +mov rax,[rbp-16] +inc rax +mov [rbp-16],rax +dec rax +jmp l0 +l1: +add rsp,8 +mov rax,[rbp-8] +mov rsp,rbp +pop rbp +ret +add rsp,8 +mov rax,0 +mov rsp,rbp +pop rbp +ret diff --git a/cinc/parser.c b/cinc/parser.c index c504c24..c119016 100644 --- a/cinc/parser.c +++ b/cinc/parser.c @@ -12,55 +12,420 @@ #include #include #include +#define PARSER_DEBUG 1 -static Token lahead; +static Token* lahead; static void advance() { - lahead=*(lahead.next); +#if PARSER_DEBUG + printf("Consumed:\n"); + print_tok(lahead); +#endif + lahead=lahead->next; } static void match(unsigned char type) { - if (lahead.type!=type) { + if (lahead->type!=type) { + if (type<128) { + printf("Expected %c, got ",type); + } else { + printf("Expected %d, got ",type); + } + print_tok(lahead); exit(1); } advance(); } static const char* getid() { - if (lahead.type!=TYPE_IDENT) { + if (lahead->type!=TYPE_IDENT) { + printf("Expected IDENT, got "); + print_tok(lahead); exit(1); } - const char* id=lahead.val->strval; + const char* id=lahead->val->strval; advance(); return id; } static const char* get_num() { - if (lahead.type!=TYPE_NUM) { + if (lahead->type!=TYPE_NUM) { + printf("Expected NUM, got "); + print_tok(lahead); exit(1); } - const char* num=lahead.val->strval; + const char* num=lahead->val->strval; advance(); return num; } static const char* gettype() { - if (lahead.type!=TYPE_TYPE) { + if (lahead->type!=TYPE_TYPE) { + printf("Expected TYPE, got "); + print_tok(lahead); exit(1); } - const char* id=lahead.val->strval; + const char* id=lahead->val->strval; advance(); return id; } -static AstNode* expr() { - AstNode* expr_root=make_node("num"); - add_child(expr_root, make_node(get_num())); +static AstNode* expr(void); + +static AstNode* factor_lvl1() { + AstNode* factor_root; + switch (lahead->type) { + case '(': + match('('); + factor_root=expr(); + match(')'); + break; + case TYPE_NUM: + factor_root=make_node("num"); + add_child(factor_root, make_node(get_num())); + break; + case TYPE_IDENT: + factor_root=make_node("var"); + add_child(factor_root, make_node(getid())); + break; + default: + printf("Error: Expected factor\n"); + exit(1); + } + if (lahead->type==TYPE_INC || lahead->type==TYPE_DEC) { + unsigned char type=lahead->type; + match(lahead->type); + AstNode* old_root=factor_root; + switch (type) { + case TYPE_INC: + factor_root=make_node("postinc"); + add_child(factor_root, old_root); + break; + case TYPE_DEC: + factor_root=make_node("postdec"); + add_child(factor_root, old_root); + break; + } + + } + return factor_root; +} + +static AstNode* factor() { + AstNode* factor_root; + switch (lahead->type) { + case '-': + match('-'); + factor_root=make_node("neg"); + add_child(factor_root, factor_lvl1()); + break; + case '~': + match('~'); + factor_root=make_node("comp"); + add_child(factor_root, factor_lvl1()); + break; + case '!': + match('!'); + factor_root=make_node("not"); + add_child(factor_root, factor_lvl1()); + break; + case TYPE_INC: + match(TYPE_INC); + factor_root=make_node("preinc"); + add_child(factor_root, factor_lvl1()); + break; + case TYPE_DEC: + match(TYPE_DEC); + factor_root=make_node("predec"); + add_child(factor_root, factor_lvl1()); + break; + default: + return factor_lvl1(); + } + return factor_root; +} + + +static AstNode* term() { + AstNode* term_root=factor(); + while (lahead->type=='*' || lahead->type=='/' || lahead->type=='%') { + unsigned char type=lahead->type; + match(lahead->type); + AstNode* term1=factor(); + switch (type) { + case '*': { + AstNode* old_root=term_root; + term_root=make_node("mul"); + add_child(term_root, old_root); + add_child(term_root, term1); + break; + } + case '/': { + AstNode* old_root=term_root; + term_root=make_node("div"); + add_child(term_root, old_root); + add_child(term_root, term1); + break; + } + case '%': { + AstNode* old_root=term_root; + term_root=make_node("mod"); + add_child(term_root, old_root); + add_child(term_root, term1); + break; + } + } + } + return term_root; +} + +static AstNode* arithexpr() { + AstNode* expr_root=term(); + while (lahead->type=='+' || lahead->type=='-') { + unsigned char type=lahead->type; + match(lahead->type); + AstNode* term1=term(); + switch (type) { + case '+': { + AstNode* old_root=expr_root; + expr_root=make_node("add"); + add_child(expr_root, old_root); + add_child(expr_root, term1); + break; + } + case '-': { + AstNode* old_root=expr_root; + expr_root=make_node("sub"); + add_child(expr_root, old_root); + add_child(expr_root, term1); + break; + } + } + } return expr_root; } + +static AstNode* shiftexpr() { + AstNode* expr_root=arithexpr(); + while (lahead->type==TYPE_SL || lahead->type==TYPE_SR) { + unsigned char type=lahead->type; + match(lahead->type); + AstNode* expr1=arithexpr(); + switch (type) { + case TYPE_SL: { + AstNode* old_root=expr_root; + expr_root=make_node("sal"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + break; + } + case TYPE_SR: { + AstNode* old_root=expr_root; + expr_root=make_node("sar"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + break; + } + } + } + return expr_root; +} + +static AstNode* relexpr() { + AstNode* expr_root=shiftexpr(); + while (lahead->type=='<' || lahead->type=='>'|| lahead->type==TYPE_LE|| lahead->type==TYPE_GE) { + unsigned char type=lahead->type; + match(lahead->type); + AstNode* expr1=shiftexpr(); + switch (type) { + case '<': { + AstNode* old_root=expr_root; + expr_root=make_node("lt"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + break; + } + case '>': { + AstNode* old_root=expr_root; + expr_root=make_node("gt"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + break; + } + case TYPE_LE: { + AstNode* old_root=expr_root; + expr_root=make_node("le"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + break; + } + case TYPE_GE: { + AstNode* old_root=expr_root; + expr_root=make_node("ge"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + break; + } + } + } + return expr_root; +} + +static AstNode* eqexpr() { + AstNode* expr_root=relexpr(); + while (lahead->type==TYPE_NE || lahead->type==TYPE_EQ) { + unsigned char type=lahead->type; + match(lahead->type); + AstNode* expr1=relexpr(); + switch (type) { + case TYPE_NE: { + AstNode* old_root=expr_root; + expr_root=make_node("ne"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + break; + } + case TYPE_EQ: { + AstNode* old_root=expr_root; + expr_root=make_node("eq"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + break; + } + } + } + return expr_root; +} + +static AstNode* andexpr() { + AstNode* expr_root=eqexpr(); + while (lahead->type=='&') { + match(lahead->type); + AstNode* expr1=eqexpr(); + AstNode* old_root=expr_root; + expr_root=make_node("and"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + } + return expr_root; +} + +static AstNode* xorexpr() { + AstNode* expr_root=andexpr(); + while (lahead->type=='^') { + match(lahead->type); + AstNode* expr1=andexpr(); + AstNode* old_root=expr_root; + expr_root=make_node("xor"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + } + return expr_root; +} + +static AstNode* orexpr() { + AstNode* expr_root=xorexpr(); + while (lahead->type=='|') { + match(lahead->type); + AstNode* expr1=xorexpr(); + AstNode* old_root=expr_root; + expr_root=make_node("or"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + } + return expr_root; +} + +static AstNode* landexpr() { + AstNode* expr_root=orexpr(); + while (lahead->type==TYPE_LAND) { + match(lahead->type); + AstNode* expr1=orexpr(); + AstNode* old_root=expr_root; + expr_root=make_node("land"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + } + return expr_root; +} + +static AstNode* lorexpr() { + AstNode* expr_root=landexpr(); + while (lahead->type==TYPE_LOR) { + match(lahead->type); + AstNode* expr1=landexpr(); + AstNode* old_root=expr_root; + expr_root=make_node("lor"); + add_child(expr_root, old_root); + add_child(expr_root, expr1); + } + return expr_root; +} + +static AstNode* condexpr() { + AstNode* expr_root=make_node("cond"); + AstNode* lorexp=lorexpr(); + if (lahead->type=='?') { + add_child(expr_root, lorexp); + } else { + return lorexp; + } + match('?'); + add_child(expr_root, expr()); + match(':'); + add_child(expr_root, condexpr()); + return expr_root; +} + +static AstNode* expr() { + if (lahead->type==TYPE_IDENT) { + if (lahead->next->type=='=') { + const char* id=getid(); + match('='); + AstNode* val=expr(); + AstNode* expr_root=make_node("assign"); + add_child(expr_root, make_node(id)); + add_child(expr_root, val); + return expr_root; + } else if (lahead->next->type==TYPE_COMPSET) { + const char* id=getid(); + char operator=*(lahead->val->strval); + match(lahead->type); + //Construct a sequence of tokens effecting = + Token* tok=new_token(TYPE_IDENT, val_from_const_str(id), NULL); + Token* first=tok; + tok=new_token('=', NULL, tok); + tok=new_token(TYPE_IDENT, val_from_const_str(id), tok); + tok=new_token(operator, NULL, tok); + //Link in the generated token stream + print_tok(lahead); + tok->next=lahead; + print_tok(tok->next); + lahead=first; + print_tok(tok->next); + return expr(); + } else { + return condexpr(); + } + } else { + return condexpr(); + } +} + +static AstNode* exp_option() { + if (lahead->type==';' || lahead->type==')') { + return NULL; + } else { + return expr(); + } +} + +static AstNode* block(void); +static AstNode* declaration(void); + static AstNode* statement() { - switch (lahead.type) { + switch (lahead->type) { case TYPE_RETURN: { match(TYPE_RETURN); AstNode* return_root=make_node("return"); @@ -68,18 +433,115 @@ static AstNode* statement() { match(';'); return return_root; } - default: - printf("Error: Expected statement"); - exit(1); + case TYPE_IF: { + match(TYPE_IF); + AstNode* if_root=make_node("if"); // We dont know whether it's an if only or a if-else, so we assume if only. + match('('); + add_child(if_root, expr()); + match(')'); + add_child(if_root, statement()); + if (lahead->type==TYPE_ELSE) { + if_root->data="ifelse"; + match(TYPE_ELSE); + add_child(if_root, statement()); + } + return if_root; + } + case TYPE_FOR: { + match(TYPE_FOR); + match('('); + AstNode* for_root=make_node("for"); + if (lahead->type==TYPE_TYPE) { + add_child(for_root, make_node("decl")); + add_child(for_root, declaration()); + } else { + add_child(for_root, make_node("exp")); + add_child(for_root, exp_option()); + match(';'); + } + add_child(for_root, exp_option()); + match(';'); + add_child(for_root, exp_option()); + match(')'); + add_child(for_root, statement()); + return for_root; + } + case TYPE_WHILE: { + match(TYPE_WHILE); + match('('); + AstNode* while_root=make_node("while"); + add_child(while_root, expr()); + match(')'); + add_child(while_root, statement()); + return while_root; + } + case TYPE_DO: { + match(TYPE_DO); + AstNode* dowhile_root=make_node("dowhile"); + AstNode* statm=statement(); + match(TYPE_WHILE); + add_child(dowhile_root, expr()); + add_child(dowhile_root, statm); + match(';'); + return dowhile_root; + } + case TYPE_BREAK: + match(TYPE_BREAK); + match(';'); + return make_node("break"); + case TYPE_CONTINUE: + match(TYPE_CONTINUE); + match(';'); + return make_node("continue"); + case '{': + return block(); + default: { + AstNode* statement_root=exp_option(); + match(';'); + return statement_root; +// printf("Error: Expected statement\n"); +// exit(1); + } + } +} + +static AstNode* declaration() { + const char* type=gettype(); + const char* id=getid(); + AstNode* decl_root; + if (lahead->type=='=') { + match('='); + AstNode* decl_val=expr(); + match(';'); + decl_root=make_node("vardecinitial"); + add_child(decl_root, make_node(type)); + add_child(decl_root, make_node(id)); + add_child(decl_root, decl_val); + } else { + match(';'); + decl_root=make_node("vardec"); + add_child(decl_root, make_node(type)); + add_child(decl_root, make_node(id));; + } + return decl_root; +} + +static AstNode* block_item() { + if (lahead->type==TYPE_TYPE) { + return declaration(); + } else { + return statement(); } } - static AstNode* block() { match('{'); AstNode* block_root=make_node("block"); - while (lahead.type!='}') { - add_child(block_root, statement()); + while (lahead->type!='}') { + AstNode* item=block_item(); + if (item) { + add_child(block_root, item); + } } match('}'); return block_root; @@ -99,7 +561,7 @@ static AstNode* func() { } AstNode* parse(Token* prg) { - lahead=*(prg); + lahead=prg; return func(); } diff --git a/cinc/token.c b/cinc/token.c index edc0190..29edb7f 100644 --- a/cinc/token.c +++ b/cinc/token.c @@ -32,7 +32,7 @@ TokenVal* val_from_int(int val) { TokenVal* val_from_const_str(const char* val) { TokenVal* tval=malloc(sizeof(TokenVal)); tval->type=strval; - tval->strval=val; + tval->strval=(char*)val; tval->constflag=true; return tval; } diff --git a/cinc/token.h b/cinc/token.h index 032447a..a61a516 100644 --- a/cinc/token.h +++ b/cinc/token.h @@ -31,7 +31,24 @@ typedef struct { #define TYPE_EOF 130 #define TYPE_RETURN 131 #define TYPE_TYPE 132 - +#define TYPE_LAND 133 +#define TYPE_LOR 134 +#define TYPE_EQ 135 +#define TYPE_NE 136 +#define TYPE_LE 137 +#define TYPE_GE 138 +#define TYPE_SL 139 +#define TYPE_SR 140 +#define TYPE_COMPSET 141 +#define TYPE_INC 142 +#define TYPE_DEC 143 +#define TYPE_IF 144 +#define TYPE_ELSE 145 +#define TYPE_FOR 146 +#define TYPE_WHILE 147 +#define TYPE_DO 148 +#define TYPE_BREAK 149 +#define TYPE_CONTINUE 150 struct _token { unsigned char type; TokenVal* val; diff --git a/cinc/tokenize.c b/cinc/tokenize.c index e5f82e1..ef9afe1 100644 --- a/cinc/tokenize.c +++ b/cinc/tokenize.c @@ -14,8 +14,6 @@ #include #include -#define ID_MAX_SIZE 31 - Token* next_token(int* strpos, char* prg, Token* prev) { char current=prg[*strpos]; if (isalpha(current) || current=='_') { @@ -41,10 +39,24 @@ Token* next_token(int* strpos, char* prg, Token* prev) { return new_token(TYPE_RETURN, NULL, prev); } else if (strcmp("int",id)==0) { return new_token(TYPE_TYPE, val_from_const_str("int"), prev); + } else if (strcmp("if",id)==0) { + return new_token(TYPE_IF, NULL, prev); + } else if (strcmp("else",id)==0) { + return new_token(TYPE_ELSE, NULL, prev); + } else if (strcmp("for",id)==0) { + return new_token(TYPE_FOR, NULL, prev); + } else if (strcmp("while",id)==0) { + return new_token(TYPE_WHILE, NULL, prev); + } else if (strcmp("do",id)==0) { + return new_token(TYPE_DO, NULL, prev); + } else if (strcmp("break",id)==0) { + return new_token(TYPE_BREAK, NULL, prev); + } else if (strcmp("ccontinue",id)==0) { + return new_token(TYPE_CONTINUE, NULL, prev); } return new_token(TYPE_IDENT, val_from_str(id), prev); } else if (isdigit(current)) { - char* num=malloc(sizeof(char)*ID_MAX_SIZE+1); + char* num=malloc(sizeof(char)*NUM_MAX_SIZE+1); int length=1; num[0]=current; (*strpos)++; @@ -57,7 +69,7 @@ Token* next_token(int* strpos, char* prg, Token* prev) { } else { break; } - if (length==ID_MAX_SIZE) { + if (length==NUM_MAX_SIZE) { break; } } @@ -71,11 +83,86 @@ Token* next_token(int* strpos, char* prg, Token* prev) { current=prg[*strpos]; } return next_token(strpos, prg, prev); - } else if (current=='{' || current=='}' || current=='(' || current==')' || current==';' ) { + } else if (current=='&') { + (*strpos)++; + current=prg[*strpos]; + if (current=='&') { + (*strpos)++; + return new_token(TYPE_LAND, NULL, prev); + } + return new_token('&', NULL, prev); + } else if (current=='|') { + (*strpos)++; + current=prg[*strpos]; + if (current=='|') { + (*strpos)++; + return new_token(TYPE_LOR, NULL, prev); + } + return new_token('|', NULL, prev); + } else if (current=='=') { + (*strpos)++; + current=prg[*strpos]; + if (current=='=') { + (*strpos)++; + return new_token(TYPE_EQ, NULL, prev); + } + return new_token('=', NULL, prev); + } else if (current=='!') { + (*strpos)++; + current=prg[*strpos]; + if (current=='=') { + (*strpos)++; + return new_token(TYPE_NE, NULL, prev); + } + return new_token('!', NULL, prev); + } else if (current=='<') { + (*strpos)++; + current=prg[*strpos]; + if (current=='=') { + (*strpos)++; + return new_token(TYPE_LE, NULL, prev); + } else if (current=='<') { + (*strpos)++; + return new_token(TYPE_SL, NULL, prev); + } + return new_token('<', NULL, prev); + } else if (current=='>') { + (*strpos)++; + current=prg[*strpos]; + if (current=='=') { + (*strpos)++; + return new_token(TYPE_GE, NULL, prev); + } else if (current=='>') { + (*strpos)++; + return new_token(TYPE_SR, NULL, prev); + } + return new_token('>', NULL, prev); + } else if (current==0) { + return new_token(TYPE_EOF, NULL, prev); + } else { + if (current=='+' || current=='-' || current=='/' || current=='*' || current=='%' || current=='&' || current=='|' || current=='^') { + (*strpos)++; + char* str=malloc(sizeof(char)); + *str=current; + current=prg[*strpos]; + if (current=='=') { + (*strpos)++; + return new_token(TYPE_COMPSET, val_from_str(str), prev); + } else { + if (*str=='+' && current=='+') { + (*strpos)++; + return new_token(TYPE_INC, NULL, prev); + } + if (*str=='-' && current=='-') { + (*strpos)++; + return new_token(TYPE_DEC, NULL, prev); + } + return new_token(*str, NULL, prev); + } + } (*strpos)++; return new_token(current, NULL, prev); } - return new_token(TYPE_EOF, NULL, prev); } Token* tokenize(char* prg) { diff --git a/cinc/tokenize.h b/cinc/tokenize.h index 09bdba0..a84fc3c 100644 --- a/cinc/tokenize.h +++ b/cinc/tokenize.h @@ -11,6 +11,9 @@ #include "token.h" +#define ID_MAX_SIZE 31 +#define NUM_MAX_SIZE 31 + Token* tokenize(char* prg); void free_toklist(Token* tokens);