The big commit! No errors or warnings. Can handle everything but function calls and types other than int

This commit is contained in:
pjht 2018-09-16 16:44:08 -05:00
parent 14a762bfe6
commit a233cc6762
16 changed files with 1240 additions and 64 deletions

View File

@ -9,6 +9,8 @@
/* Begin PBXBuildFile section */
F61910612142A876003B8798 /* main.c in Sources */ = {isa = PBXBuildFile; fileRef = F61910602142A876003B8798 /* main.c */; };
F61910692142A8C5003B8798 /* tokenize.c in Sources */ = {isa = PBXBuildFile; fileRef = F61910682142A8C5003B8798 /* tokenize.c */; };
F62F6C0D214B1BFC00EDE8D2 /* func.c in Sources */ = {isa = PBXBuildFile; fileRef = F62F6C0C214B1BFC00EDE8D2 /* func.c */; };
F6544B112147F086002C78F7 /* env.c in Sources */ = {isa = PBXBuildFile; fileRef = F6544B102147F086002C78F7 /* env.c */; };
F65A954E21454B31005FCAF5 /* token.c in Sources */ = {isa = PBXBuildFile; fileRef = F65A954D21454B31005FCAF5 /* token.c */; };
F661C333214590930021FCCE /* parser.c in Sources */ = {isa = PBXBuildFile; fileRef = F661C332214590930021FCCE /* parser.c */; };
F661C3362145CE760021FCCE /* ast.c in Sources */ = {isa = PBXBuildFile; fileRef = F661C3352145CE760021FCCE /* ast.c */; };
@ -28,11 +30,14 @@
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
F606D3182145FAB300E817B6 /* out.mys */ = {isa = PBXFileReference; lastKnownFileType = text; path = out.mys; sourceTree = "<group>"; };
F619105D2142A876003B8798 /* cinc */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = cinc; sourceTree = BUILT_PRODUCTS_DIR; };
F61910602142A876003B8798 /* main.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; };
F61910672142A8C5003B8798 /* tokenize.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tokenize.h; sourceTree = "<group>"; };
F61910682142A8C5003B8798 /* tokenize.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = tokenize.c; sourceTree = "<group>"; };
F62F6C0B214B1BFC00EDE8D2 /* func.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = func.h; sourceTree = "<group>"; };
F62F6C0C214B1BFC00EDE8D2 /* func.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = func.c; sourceTree = "<group>"; };
F6544B0F2147F086002C78F7 /* env.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = env.h; sourceTree = "<group>"; };
F6544B102147F086002C78F7 /* env.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = env.c; sourceTree = "<group>"; };
F65A954D21454B31005FCAF5 /* token.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = token.c; sourceTree = "<group>"; };
F661C331214590930021FCCE /* parser.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parser.h; sourceTree = "<group>"; };
F661C332214590930021FCCE /* parser.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = parser.c; sourceTree = "<group>"; };
@ -41,6 +46,7 @@
F661C3372145D9C40021FCCE /* generate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = generate.h; sourceTree = "<group>"; };
F661C3382145D9C40021FCCE /* generate.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = generate.c; sourceTree = "<group>"; };
F661C33A2145DE300021FCCE /* token.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = token.h; sourceTree = "<group>"; };
F6B6C137214712F3008F5230 /* oplist.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = oplist.txt; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -84,7 +90,11 @@
F661C3352145CE760021FCCE /* ast.c */,
F661C3372145D9C40021FCCE /* generate.h */,
F661C3382145D9C40021FCCE /* generate.c */,
F606D3182145FAB300E817B6 /* out.mys */,
F62F6C0B214B1BFC00EDE8D2 /* func.h */,
F62F6C0C214B1BFC00EDE8D2 /* func.c */,
F6B6C137214712F3008F5230 /* oplist.txt */,
F6544B0F2147F086002C78F7 /* env.h */,
F6544B102147F086002C78F7 /* env.c */,
);
path = cinc;
sourceTree = "<group>";
@ -146,7 +156,9 @@
buildActionMask = 2147483647;
files = (
F65A954E21454B31005FCAF5 /* token.c in Sources */,
F6544B112147F086002C78F7 /* env.c in Sources */,
F661C3362145CE760021FCCE /* ast.c in Sources */,
F62F6C0D214B1BFC00EDE8D2 /* func.c in Sources */,
F61910692142A8C5003B8798 /* tokenize.c in Sources */,
F661C333214590930021FCCE /* parser.c in Sources */,
F661C3392145D9C40021FCCE /* generate.c in Sources */,
@ -270,6 +282,7 @@
isa = XCBuildConfiguration;
buildSettings = {
CODE_SIGN_STYLE = Automatic;
"EXCLUDED_SOURCE_FILE_NAMES[arch=*]" = out.s;
PRODUCT_NAME = "$(TARGET_NAME)";
};
name = Debug;
@ -278,6 +291,7 @@
isa = XCBuildConfiguration;
buildSettings = {
CODE_SIGN_STYLE = Automatic;
"EXCLUDED_SOURCE_FILE_NAMES[arch=*]" = out.s;
PRODUCT_NAME = "$(TARGET_NAME)";
};
name = Release;

View File

@ -39,17 +39,23 @@ void print_tree(AstNode* root,int tabLevel) {
printf(" ");
}
printf("Child %d: ",i);
printf("%s\n",root->children[i]->data);
print_tree(root->children[i],tabLevel+1);
if (root->children[i]) {
printf("%s\n",root->children[i]->data);
print_tree(root->children[i],tabLevel+1);
} else {
printf("NULL\n");
}
}
}
}
void free_tree(AstNode* root) {
if (root->children) {
for (int i=0;i<(root->num_children);i++) {
free_tree(root->children[i]);
if (root) {
if (root->children) {
for (int i=0;i<(root->num_children);i++) {
free_tree(root->children[i]);
}
}
free(root);
}
free(root);
}

77
cinc/env.c Normal file
View File

@ -0,0 +1,77 @@
//
// env.c
// cinc
//
// Created by Peter Terpstra on 9/11/18.
// Copyright © 2018 Peter Terpstra. All rights reserved.
//
#include "env.h"
#include <stdlib.h>
#include <string.h>
Env* new_env(Env* prev) {
Env* env=malloc(sizeof(Env));
env->next=NULL;
env->prev=prev;
env->max_els=100;
env->num_els=0;
env->offset=8;
env->offsets=malloc(sizeof(int)*100);
env->varnames=malloc(sizeof(const char*)*100);
env->types=malloc(sizeof(const char*)*100);
env->contlabel=NULL;
env->breaklabel=NULL;
if (prev) {
env->offset=prev->offset;
env->contlabel=prev->contlabel;
env->breaklabel=prev->breaklabel;
prev->next=env;
}
return env;
}
void add_entry(Env* env,const char* type,const char* name) {
if (env->num_els==env->max_els) {
env->offsets=realloc(env->offsets,sizeof(int)*(100+env->num_els));
env->varnames=realloc(env->varnames,sizeof(const char*)*(100+env->num_els));
env->types=realloc(env->types,sizeof(const char*)*(100+env->num_els));
}
env->varnames[env->num_els]=name;
env->types[env->num_els]=name;
env->offsets[env->num_els]=env->offset;
env->num_els++;
env->offset+=8;
}
int get_offset(Env* env,const char* name) {
for(int i=0;i<(env->num_els);i++) {
if(strcmp(env->varnames[i], name)==0) {
return env->offsets[i];
}
}
if (env->prev) {
return get_offset(env->prev, name);
}
return -1;
}
const char* get_type(Env* env,const char* name) {
for(int i=0;i<(env->num_els);i++) {
if(strcmp(env->varnames[i], name)==0) {
return env->types[i];
}
}
if (env->prev) {
return get_type(env->prev, name);
}
return NULL;
}
void free_env(Env* env) {
if (env->next) {
free_env(env->next);
}
free(env->offsets);
free(env->varnames);
free(env);
}

32
cinc/env.h Normal file
View File

@ -0,0 +1,32 @@
//
// env.h
// cinc
//
// Created by Peter Terpstra on 9/11/18.
// Copyright © 2018 Peter Terpstra. All rights reserved.
//
#ifndef env_h
#define env_h
struct _env {
const char** varnames;
const char** types;
int* offsets;
int num_els;
int max_els;
int offset;
struct _env* next;
struct _env* prev;
char* contlabel;
char* breaklabel;
};
typedef struct _env Env;
Env* new_env(Env* prev);
void add_entry(Env* env,const char* type,const char* name);
int get_offset(Env* env,const char* name);
void free_env(Env* env);
#endif /* env_h */

36
cinc/func.c Normal file
View File

@ -0,0 +1,36 @@
//
// func.c
// cinc
//
// Created by Peter Terpstra on 9/13/18.
// Copyright © 2018 Peter Terpstra. All rights reserved.
//
#include "func.h"
#include <stdbool.h>
#include <stdlib.h>
Arg* make_arg(const char* type,const char* name) {
Arg* arg=malloc(sizeof(Arg));
arg->type=type;
arg->name=name;
return arg;
}
void free_arg(Arg* arg) {
free(arg);
}
Func* make_func(const char* name,bool defined,int nargs,Arg* args) {
Func* func=malloc(sizeof(Func));
func->name=name;
func->defined=defined;
func->nargs=nargs;
func->args=args;
return func;
}
void free_func(Func* func) {
free(func->args);
free(func);
}

26
cinc/func.h Normal file
View File

@ -0,0 +1,26 @@
//
// func.h
// cinc
//
// Created by Peter Terpstra on 9/13/18.
// Copyright © 2018 Peter Terpstra. All rights reserved.
//
#ifndef func_h
#define func_h
#include <stdbool.h>
typedef struct Arg {
const char* type;
const char* name;
} Arg;
typedef struct Func {
const char* name;
bool defined;
int nargs;
Arg* args;
} Func;
#endif /* func_h */

View File

@ -8,45 +8,409 @@
#include "generate.h"
#include "ast.h"
#include "env.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
static char* prg_asm;
static int prgsize=0;
static int nextlabel=0;
Env* env=NULL;
void prg_add(const char* str) {
unsigned long len=strlen(str);
prg_asm=realloc(prg_asm, prgsize+len+1);
prgsize+=len;
strncat(prg_asm,str,len);
}
static char* next_label() {
char* buf=malloc(sizeof(char)*4097);
buf[0]='l';
int written=snprintf(buf+1, 4096, "%d",nextlabel);
if(written<0 || written>=4096) {
printf("Error: was not able to convert next label number to a string.\n");
exit(1);
}
nextlabel++;
return buf;
}
static void generate_expr(AstNode* ast) {
const char* type=ast->data;
if(strcmp("num", type)==0) {
strncat(prg_asm, "mov rax,", 8);
strncat(prg_asm,ast->children[0]->data,strlen(ast->children[0]->data));
strncat(prg_asm, "\n", 1);
if (strcmp("num", type)==0) {
prg_add("mov rax,");
prg_add(ast->children[0]->data);
prg_add("\n");
return;
} else if (strcmp("var", type)==0) {
prg_add("mov rax,[rbp-");
char buf[4096];
int offset=get_offset(env, ast->children[0]->data);
if (offset==-1) {
printf("Error: no such variable %s\n",ast->children[0]->data);
exit(1);
}
int written=snprintf(buf, 4096, "%d",offset);
if(written<0 || written>=4096) {
printf("Error: was not able to convert offset for variable %s to a string.\n",ast->children[0]->data);
exit(1);
}
prg_add(buf);
prg_add("]\n");
return;
} else if (strcmp("neg", type)==0) {
generate_expr(ast->children[0]);
prg_add("neg rax\n");
return;
} else if (strcmp("not", type)==0) {
generate_expr(ast->children[0]);
prg_add("cmp rax,0\nmove rax,0\nsete al\n");
return;
} else if (strcmp("comp", type)==0) {
generate_expr(ast->children[0]);
prg_add("not rax\n");
return;
} else if (strcmp("preinc", type)==0 || strcmp("postinc", type)==0 ) {
prg_add("mov rax,[rbp-");
char buf[4096];
int offset=get_offset(env, ast->children[0]->children[0]->data);
if (offset==-1) {
printf("Error: no such variable %s\n",ast->children[0]->children[0]->data);
exit(1);
}
int written=snprintf(buf, 4096, "%d",offset);
if (written<0 || written>=4096) {
printf("Error: was not able to convert offset for variable %s to a string.\n",ast->children[0]->data);
exit(1);
}
prg_add(buf);
prg_add("]\ninc rax\nmov [rbp-");
prg_add(buf);
prg_add("],rax\n");
if (strcmp("postinc", type)==0) {
prg_add("dec rax\n");
}
return;
} else if (strcmp("predec", type)==0 || strcmp("postdec", type)==0) {
prg_add("mov rax,[rbp-");
char buf[4096];
int offset=get_offset(env, ast->children[0]->children[0]->data);
if (offset==-1) {
printf("Error: no such variable %s\n",ast->children[0]->children[0]->data);
exit(1);
}
int written=snprintf(buf, 4096, "%d",offset);
if (written<0 || written>=4096) {
printf("Error: was not able to convert offset for variable %s to a string.\n",ast->children[0]->data);
exit(1);
}
prg_add("]\ndec rax\nmov [rbp-");
prg_add(buf);
prg_add("],rax\n");
if (strcmp("postdec", type)==0) {
prg_add("inc rax\n");
}
return;
} else if (strcmp("cond", type)==0) {
char* e3=next_label();
char* post=next_label();
generate_expr(ast->children[0]);
prg_add("cmp rax,0\n");
prg_add("je ");
prg_add(e3);
prg_add("\n");
generate_expr(ast->children[1]);
prg_add("jmp ");
prg_add(post);
prg_add("\n");
prg_add(e3);
prg_add(":\n");
generate_expr(ast->children[2]);
prg_add(post);
prg_add(":\n");
free(e3);
free(post);
return;
} else if (strcmp("assign", type)==0) {
generate_expr(ast->children[1]);
prg_add("mov [rbp-");
char buf[4096];
int offset=get_offset(env, ast->children[0]->data);
if (offset==-1) {
printf("Error: no such variable %s\n",ast->children[0]->data);
exit(1);
}
int written=snprintf(buf, 4096, "%d",offset);
if(written<0 || written>=4096) {
printf("Error: was not able to convert offset for variable %s to a string.\n",ast->children[0]->data);
exit(1);
}
prg_add(buf);
prg_add("],rax\n");
return;
}
generate_expr(ast->children[0]);
prg_add("push rax\n");
generate_expr(ast->children[1]);
prg_add("pop rcx\n");
if (strcmp(type,"add")==0) {
prg_add("add rax,rcx\n");
} else if (strcmp(type,"sub")==0) {
prg_add("sub rcx,rax\n");
prg_add("mov rax,rcx\n");
} else if (strcmp(type,"mul")==0) {
prg_add("imul rax,rcx\n");
} else if (strcmp(type,"div")==0 || strcmp(type,"mod")==0) {
prg_add("mov rdx,0\npush rcx\npush rax\npop rcx\npop rax\nidiv rcx\n");
if (strcmp(type,"mod")==0) {
prg_add("mov rax,rdx\n");
}
} else if (strcmp(type, "and")==0) {
prg_add("and rcx,rax\n");
} else if (strcmp(type, "or")==0) {
prg_add("or rax,rcx\n");
} else if (strcmp(type, "xor")==0) {
prg_add("xor rax,rcx\n");
} else if (strcmp(type, "land")==0) {
prg_add("cmp rcx,0\nsetne cl\ncmp rax,0\nmov rax,0\nsetne al\nand al,cl\n");
} else if (strcmp(type, "lor")==0) {
prg_add("or rax,rcx\nmov rax,0\nsetne al\n");
} else if (strcmp(type, "sal")==0) {
prg_add("sal rax,rcx\n");
} else if (strcmp(type, "sar")==0) {
prg_add("sar rax,rcx\n");
} else if (strcmp(type, "eq")==0) {
prg_add("cmp rcx,rax\nmov rax,0\nsete al\n");
} else if (strcmp(type, "ne")==0) {
prg_add("cmp rcx,rax\nmov rax,0\nsetne al\n");
} else if (strcmp(type, "lt")==0) {
prg_add("cmp rcx,rax\nmov rax,0\nsetl al\n");
} else if (strcmp(type, "le")==0) {
prg_add("cmp rcx,rax\nmov rax,0\nsetle al\n");
} else if (strcmp(type, "gt")==0) {
prg_add("cmp rcx,rax\nmov rax,0\nsetg al\n");
} else if (strcmp(type, "ge")==0) {
prg_add("cmp rcx,rax\nmov rax,0\nsetge al\n");
} else {
printf("Unknown expr type %s\n",type);
exit(1);
}
}
static void generate_block(AstNode* ast);
static void generate_statement(AstNode* ast) {
const char* type=ast->data;
if(strcmp("return", type)==0) {
if (strcmp("return", type)==0) {
generate_expr(ast->children[0]);
strncat(prg_asm, "ret\n", 4);
prg_add("mov rsp,rbp\npop rbp\nret\n");
} else if (strcmp("vardec", type)==0) {
if (get_offset(env, ast->children[1]->data)!=-1) {
printf("Error: Redeclaration of variable %s\n",ast->children[1]->data);
exit(1);
}
add_entry(env,ast->children[0]->data,ast->children[1]->data);
prg_add("push 0\n");
} else if (strcmp("vardecinitial", type)==0) {
if (get_offset(env, ast->children[1]->data)!=-1) {
printf("Error: Redeclaration of variable %s\n",ast->children[1]->data);
exit(1);
}
add_entry(env,ast->children[0]->data,ast->children[1]->data);
generate_expr(ast->children[2]);
prg_add("push rax\n");
} else if (strcmp("if", type)==0) {
char* post=next_label();
generate_expr(ast->children[0]);
prg_add("cmp rax,0\n");
prg_add("je ");
prg_add(post);
prg_add("\n");
generate_statement(ast->children[1]);
prg_add(post);
prg_add(":\n");
free(post);
return;
} else if (strcmp("ifelse", type)==0) {
char* els=next_label();
char* post=next_label();
generate_expr(ast->children[0]);
prg_add("cmp rax,0\n");
prg_add("je ");
prg_add(els);
prg_add("\n");
generate_statement(ast->children[1]);
prg_add("jmp ");
prg_add(post);
prg_add("\n");
prg_add(els);
prg_add(":\n");
generate_statement(ast->children[2]);
prg_add(post);
prg_add(":\n");
free(els);
free(post);
return;
} else if (strcmp("while", type)==0) {
char* begin=next_label();
char* end=next_label();
env->breaklabel=end;
env->contlabel=begin;
prg_add(begin);
prg_add(":\n");
generate_expr(ast->children[0]);
prg_add("je ");
prg_add(end);
prg_add("\n");
generate_statement(ast->children[1]);
prg_add("jmp ");
prg_add(begin);
prg_add("\n");
prg_add(end);
prg_add(":\n");
env->contlabel=NULL;
env->breaklabel=NULL;
free(begin);
free(end);
} else if (strcmp("dowhile", type)==0) {
char* begin=next_label();
char* end=next_label();
env->breaklabel=end;
env->contlabel=begin;
prg_add(begin);
prg_add(":\n");
generate_statement(ast->children[1]);
generate_expr(ast->children[0]);
prg_add("jne ");
prg_add(begin);
prg_add("\n");
prg_add(end);
prg_add(":\n");
env->contlabel=NULL;
env->breaklabel=NULL;
free(end);
free(begin);
} else if (strcmp("for", type)==0) {
char* cond=next_label();
char* end=next_label();
char* cont=next_label();
env->breaklabel=end;
env->contlabel=cont;
env=new_env(env);
if (ast->children[1]) {
if (strcmp(ast->children[0]->data, "decl")==0) {
generate_statement(ast->children[1]);
} else {
generate_expr(ast->children[1]);
}
}
prg_add(cond);
prg_add(":\n");
if (ast->children[2]) {
generate_expr(ast->children[2]);
} else {
prg_add("mov rax,1\n");
}
prg_add("je ");
prg_add(end);
prg_add("\n");
generate_statement(ast->children[4]);
prg_add(cont);
prg_add(":\n");
if (ast->children[3]) {
generate_expr(ast->children[3]);
}
prg_add("jmp ");
prg_add(cond);
prg_add("\n");
prg_add(end);
prg_add(":\n");
int bytes=(env->num_els)*8;
prg_add("add rsp,");
char buf[4096];
int written=snprintf(buf, 4096, "%d",bytes);
if(written<0 || written>=4096) {
printf("Error: was not able to convert the bytes to deallocate into a string\n");
exit(1);
}
prg_add(buf);
prg_add("\n");
env->contlabel=NULL;
env->breaklabel=NULL;
Env* prev=env->prev;
if (prev) {
prev->next=NULL;
}
free_env(env);
env=prev;
free(cond);
free(end);
free(cont);
} else if (strcmp("break", type)==0) {
if (env->breaklabel) {
prg_add("jmp ");
prg_add(env->breaklabel);
prg_add("\n");
} else {
printf("Error: break outside of loop\n");
}
} else if (strcmp("continue", type)==0) {
if (env->breaklabel) {
prg_add("jmp ");
prg_add(env->contlabel);
prg_add("\n");
} else {
printf("Error: continue outside of loop\n");
}
} else if (strcmp("block", type)==0) {
generate_block(ast);
} else {
generate_expr(ast);
}
}
static void generate_block(AstNode* ast) {
env=new_env(env);
for (int i=0;i<(ast->num_children);i++) {
generate_statement(ast->children[i]);
}
int bytes=(env->num_els)*8;
prg_add("add rsp,");
char buf[4096];
int written=snprintf(buf, 4096, "%d",bytes);
if(written<0 || written>=4096) {
printf("Error: was not able to convert the bytes to deallocate into a string\n");
exit(1);
}
prg_add(buf);
prg_add("\n");
Env* prev=env->prev;
if (prev) {
prev->next=NULL;
}
free_env(env);
env=prev;
}
static void generate_func(AstNode* ast) {
const char* name=ast->children[1]->data;
strncat(prg_asm, ".globl _", 8);
strncat(prg_asm, name, strlen(name));
strncat(prg_asm, "\n_", 2);
strncat(prg_asm, name, strlen(name));
strncat(prg_asm, ":\n", 2);
prg_add(".globl _");
prg_add(name);
prg_add("\n_");
prg_add(name);
prg_add(":\n");
prg_add("push rbp\nmov rbp,rsp\n");
generate_block(ast->children[2]);
//Generate the implicit return 0:
AstNode* ret=make_node("return");
AstNode* num=make_node("num");
add_child(num, make_node("0"));
add_child(ret,num);
generate_statement(ret);
}

View File

@ -14,37 +14,38 @@
#include "parser.h"
#include "generate.h"
#define PARSER_DEBUG 0
#define COMPILER_DEBUG 1
int main(int argc, const char * argv[]) {
char* prgstr="int main() {\n return 173;\n}\n";
if (PARSER_DEBUG) {
char* prgstr="int main(){int j=0;for(int i=0;i<7;i++){if(i==5){break;}j+=2;}return j;}";
#if COMPILER_DEBUG
printf("Program:\n");
printf("%s",prgstr);
}
#endif
Token* tokens=tokenize(prgstr);
if (PARSER_DEBUG) {
#if COMPILER_DEBUG
printf("Tokens:\n");
Token* tok=tokens;
while (tok) {
print_tok(tok);
tok=tok->next;
}
}
#endif
AstNode* ast=parse(tokens);
free_toklist(tokens);
if (PARSER_DEBUG) {
#if COMPILER_DEBUG
printf("AST:\n");
print_tree(ast, 0);
}
#endif
char* prg=generate_prg(ast);
free_tree(ast);
if (PARSER_DEBUG) {
#if COMPILER_DEBUG
printf("Output assembly:\n");
printf("%s",prg);
}
FILE* outfile=fopen("/Users/peterterpstra/Desktop/projects/xcode/cinc/cinc/out.mys","w");
#endif
FILE* outfile=fopen("/Users/peterterpstra/Desktop/projects/xcode/cinc/cinc/out.s","w");
//compile with gcc -masm=intel out.s -o out
fputs(prg, outfile);
fclose(outfile);
free(prg);
free_tree(ast);
free_toklist(tokens);
}

14
cinc/oplist.txt Normal file
View File

@ -0,0 +1,14 @@
1 () Function call LEX
1 [] Array subscripting LEX
1 . Structure and union member access LEX
1 -> Structure and union member access through pointer TODO
1 (type){list} Compound literal LEX
2 (type) Type cast LEX
2 * Indirection (dereference) LEX
2 & Address-of LEX
2 sizeof Size-of TODO
2 _Alignof Alignment requirement TODO
3 * / % Multiplication, division, and remainder DONE
12 || Logical OR DONE
13 ?: Ternary conditional PARSE
14 = Simple assignment DONE

View File

@ -1,4 +0,0 @@
.globl _main
_main:
mov rax,173
ret

41
cinc/out.s Normal file
View File

@ -0,0 +1,41 @@
.globl _main
_main:
push rbp
mov rbp,rsp
mov rax,0
push rax
mov rax,0
push rax
l0:
mov rax,[rbp-16]
push rax
mov rax,7
pop rcx
cmp rcx,rax
mov rax,0
setl al
je l1
mov rax,[rbp-8]
push rax
mov rax,2
pop rcx
add rax,rcx
mov [rbp-8],rax
add rsp,0
l2:
mov rax,[rbp-16]
inc rax
mov [rbp-16],rax
dec rax
jmp l0
l1:
add rsp,8
mov rax,[rbp-8]
mov rsp,rbp
pop rbp
ret
add rsp,8
mov rax,0
mov rsp,rbp
pop rbp
ret

View File

@ -12,55 +12,420 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define PARSER_DEBUG 1
static Token lahead;
static Token* lahead;
static void advance() {
lahead=*(lahead.next);
#if PARSER_DEBUG
printf("Consumed:\n");
print_tok(lahead);
#endif
lahead=lahead->next;
}
static void match(unsigned char type) {
if (lahead.type!=type) {
if (lahead->type!=type) {
if (type<128) {
printf("Expected %c, got ",type);
} else {
printf("Expected %d, got ",type);
}
print_tok(lahead);
exit(1);
}
advance();
}
static const char* getid() {
if (lahead.type!=TYPE_IDENT) {
if (lahead->type!=TYPE_IDENT) {
printf("Expected IDENT, got ");
print_tok(lahead);
exit(1);
}
const char* id=lahead.val->strval;
const char* id=lahead->val->strval;
advance();
return id;
}
static const char* get_num() {
if (lahead.type!=TYPE_NUM) {
if (lahead->type!=TYPE_NUM) {
printf("Expected NUM, got ");
print_tok(lahead);
exit(1);
}
const char* num=lahead.val->strval;
const char* num=lahead->val->strval;
advance();
return num;
}
static const char* gettype() {
if (lahead.type!=TYPE_TYPE) {
if (lahead->type!=TYPE_TYPE) {
printf("Expected TYPE, got ");
print_tok(lahead);
exit(1);
}
const char* id=lahead.val->strval;
const char* id=lahead->val->strval;
advance();
return id;
}
static AstNode* expr() {
AstNode* expr_root=make_node("num");
add_child(expr_root, make_node(get_num()));
static AstNode* expr(void);
static AstNode* factor_lvl1() {
AstNode* factor_root;
switch (lahead->type) {
case '(':
match('(');
factor_root=expr();
match(')');
break;
case TYPE_NUM:
factor_root=make_node("num");
add_child(factor_root, make_node(get_num()));
break;
case TYPE_IDENT:
factor_root=make_node("var");
add_child(factor_root, make_node(getid()));
break;
default:
printf("Error: Expected factor\n");
exit(1);
}
if (lahead->type==TYPE_INC || lahead->type==TYPE_DEC) {
unsigned char type=lahead->type;
match(lahead->type);
AstNode* old_root=factor_root;
switch (type) {
case TYPE_INC:
factor_root=make_node("postinc");
add_child(factor_root, old_root);
break;
case TYPE_DEC:
factor_root=make_node("postdec");
add_child(factor_root, old_root);
break;
}
}
return factor_root;
}
static AstNode* factor() {
AstNode* factor_root;
switch (lahead->type) {
case '-':
match('-');
factor_root=make_node("neg");
add_child(factor_root, factor_lvl1());
break;
case '~':
match('~');
factor_root=make_node("comp");
add_child(factor_root, factor_lvl1());
break;
case '!':
match('!');
factor_root=make_node("not");
add_child(factor_root, factor_lvl1());
break;
case TYPE_INC:
match(TYPE_INC);
factor_root=make_node("preinc");
add_child(factor_root, factor_lvl1());
break;
case TYPE_DEC:
match(TYPE_DEC);
factor_root=make_node("predec");
add_child(factor_root, factor_lvl1());
break;
default:
return factor_lvl1();
}
return factor_root;
}
static AstNode* term() {
AstNode* term_root=factor();
while (lahead->type=='*' || lahead->type=='/' || lahead->type=='%') {
unsigned char type=lahead->type;
match(lahead->type);
AstNode* term1=factor();
switch (type) {
case '*': {
AstNode* old_root=term_root;
term_root=make_node("mul");
add_child(term_root, old_root);
add_child(term_root, term1);
break;
}
case '/': {
AstNode* old_root=term_root;
term_root=make_node("div");
add_child(term_root, old_root);
add_child(term_root, term1);
break;
}
case '%': {
AstNode* old_root=term_root;
term_root=make_node("mod");
add_child(term_root, old_root);
add_child(term_root, term1);
break;
}
}
}
return term_root;
}
static AstNode* arithexpr() {
AstNode* expr_root=term();
while (lahead->type=='+' || lahead->type=='-') {
unsigned char type=lahead->type;
match(lahead->type);
AstNode* term1=term();
switch (type) {
case '+': {
AstNode* old_root=expr_root;
expr_root=make_node("add");
add_child(expr_root, old_root);
add_child(expr_root, term1);
break;
}
case '-': {
AstNode* old_root=expr_root;
expr_root=make_node("sub");
add_child(expr_root, old_root);
add_child(expr_root, term1);
break;
}
}
}
return expr_root;
}
static AstNode* shiftexpr() {
AstNode* expr_root=arithexpr();
while (lahead->type==TYPE_SL || lahead->type==TYPE_SR) {
unsigned char type=lahead->type;
match(lahead->type);
AstNode* expr1=arithexpr();
switch (type) {
case TYPE_SL: {
AstNode* old_root=expr_root;
expr_root=make_node("sal");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
break;
}
case TYPE_SR: {
AstNode* old_root=expr_root;
expr_root=make_node("sar");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
break;
}
}
}
return expr_root;
}
static AstNode* relexpr() {
AstNode* expr_root=shiftexpr();
while (lahead->type=='<' || lahead->type=='>'|| lahead->type==TYPE_LE|| lahead->type==TYPE_GE) {
unsigned char type=lahead->type;
match(lahead->type);
AstNode* expr1=shiftexpr();
switch (type) {
case '<': {
AstNode* old_root=expr_root;
expr_root=make_node("lt");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
break;
}
case '>': {
AstNode* old_root=expr_root;
expr_root=make_node("gt");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
break;
}
case TYPE_LE: {
AstNode* old_root=expr_root;
expr_root=make_node("le");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
break;
}
case TYPE_GE: {
AstNode* old_root=expr_root;
expr_root=make_node("ge");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
break;
}
}
}
return expr_root;
}
static AstNode* eqexpr() {
AstNode* expr_root=relexpr();
while (lahead->type==TYPE_NE || lahead->type==TYPE_EQ) {
unsigned char type=lahead->type;
match(lahead->type);
AstNode* expr1=relexpr();
switch (type) {
case TYPE_NE: {
AstNode* old_root=expr_root;
expr_root=make_node("ne");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
break;
}
case TYPE_EQ: {
AstNode* old_root=expr_root;
expr_root=make_node("eq");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
break;
}
}
}
return expr_root;
}
static AstNode* andexpr() {
AstNode* expr_root=eqexpr();
while (lahead->type=='&') {
match(lahead->type);
AstNode* expr1=eqexpr();
AstNode* old_root=expr_root;
expr_root=make_node("and");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
}
return expr_root;
}
static AstNode* xorexpr() {
AstNode* expr_root=andexpr();
while (lahead->type=='^') {
match(lahead->type);
AstNode* expr1=andexpr();
AstNode* old_root=expr_root;
expr_root=make_node("xor");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
}
return expr_root;
}
static AstNode* orexpr() {
AstNode* expr_root=xorexpr();
while (lahead->type=='|') {
match(lahead->type);
AstNode* expr1=xorexpr();
AstNode* old_root=expr_root;
expr_root=make_node("or");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
}
return expr_root;
}
static AstNode* landexpr() {
AstNode* expr_root=orexpr();
while (lahead->type==TYPE_LAND) {
match(lahead->type);
AstNode* expr1=orexpr();
AstNode* old_root=expr_root;
expr_root=make_node("land");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
}
return expr_root;
}
static AstNode* lorexpr() {
AstNode* expr_root=landexpr();
while (lahead->type==TYPE_LOR) {
match(lahead->type);
AstNode* expr1=landexpr();
AstNode* old_root=expr_root;
expr_root=make_node("lor");
add_child(expr_root, old_root);
add_child(expr_root, expr1);
}
return expr_root;
}
static AstNode* condexpr() {
AstNode* expr_root=make_node("cond");
AstNode* lorexp=lorexpr();
if (lahead->type=='?') {
add_child(expr_root, lorexp);
} else {
return lorexp;
}
match('?');
add_child(expr_root, expr());
match(':');
add_child(expr_root, condexpr());
return expr_root;
}
static AstNode* expr() {
if (lahead->type==TYPE_IDENT) {
if (lahead->next->type=='=') {
const char* id=getid();
match('=');
AstNode* val=expr();
AstNode* expr_root=make_node("assign");
add_child(expr_root, make_node(id));
add_child(expr_root, val);
return expr_root;
} else if (lahead->next->type==TYPE_COMPSET) {
const char* id=getid();
char operator=*(lahead->val->strval);
match(lahead->type);
//Construct a sequence of tokens effecting <id>=<id><operator><link right here>
Token* tok=new_token(TYPE_IDENT, val_from_const_str(id), NULL);
Token* first=tok;
tok=new_token('=', NULL, tok);
tok=new_token(TYPE_IDENT, val_from_const_str(id), tok);
tok=new_token(operator, NULL, tok);
//Link in the generated token stream
print_tok(lahead);
tok->next=lahead;
print_tok(tok->next);
lahead=first;
print_tok(tok->next);
return expr();
} else {
return condexpr();
}
} else {
return condexpr();
}
}
static AstNode* exp_option() {
if (lahead->type==';' || lahead->type==')') {
return NULL;
} else {
return expr();
}
}
static AstNode* block(void);
static AstNode* declaration(void);
static AstNode* statement() {
switch (lahead.type) {
switch (lahead->type) {
case TYPE_RETURN: {
match(TYPE_RETURN);
AstNode* return_root=make_node("return");
@ -68,18 +433,115 @@ static AstNode* statement() {
match(';');
return return_root;
}
default:
printf("Error: Expected statement");
exit(1);
case TYPE_IF: {
match(TYPE_IF);
AstNode* if_root=make_node("if"); // We dont know whether it's an if only or a if-else, so we assume if only.
match('(');
add_child(if_root, expr());
match(')');
add_child(if_root, statement());
if (lahead->type==TYPE_ELSE) {
if_root->data="ifelse";
match(TYPE_ELSE);
add_child(if_root, statement());
}
return if_root;
}
case TYPE_FOR: {
match(TYPE_FOR);
match('(');
AstNode* for_root=make_node("for");
if (lahead->type==TYPE_TYPE) {
add_child(for_root, make_node("decl"));
add_child(for_root, declaration());
} else {
add_child(for_root, make_node("exp"));
add_child(for_root, exp_option());
match(';');
}
add_child(for_root, exp_option());
match(';');
add_child(for_root, exp_option());
match(')');
add_child(for_root, statement());
return for_root;
}
case TYPE_WHILE: {
match(TYPE_WHILE);
match('(');
AstNode* while_root=make_node("while");
add_child(while_root, expr());
match(')');
add_child(while_root, statement());
return while_root;
}
case TYPE_DO: {
match(TYPE_DO);
AstNode* dowhile_root=make_node("dowhile");
AstNode* statm=statement();
match(TYPE_WHILE);
add_child(dowhile_root, expr());
add_child(dowhile_root, statm);
match(';');
return dowhile_root;
}
case TYPE_BREAK:
match(TYPE_BREAK);
match(';');
return make_node("break");
case TYPE_CONTINUE:
match(TYPE_CONTINUE);
match(';');
return make_node("continue");
case '{':
return block();
default: {
AstNode* statement_root=exp_option();
match(';');
return statement_root;
// printf("Error: Expected statement\n");
// exit(1);
}
}
}
static AstNode* declaration() {
const char* type=gettype();
const char* id=getid();
AstNode* decl_root;
if (lahead->type=='=') {
match('=');
AstNode* decl_val=expr();
match(';');
decl_root=make_node("vardecinitial");
add_child(decl_root, make_node(type));
add_child(decl_root, make_node(id));
add_child(decl_root, decl_val);
} else {
match(';');
decl_root=make_node("vardec");
add_child(decl_root, make_node(type));
add_child(decl_root, make_node(id));;
}
return decl_root;
}
static AstNode* block_item() {
if (lahead->type==TYPE_TYPE) {
return declaration();
} else {
return statement();
}
}
static AstNode* block() {
match('{');
AstNode* block_root=make_node("block");
while (lahead.type!='}') {
add_child(block_root, statement());
while (lahead->type!='}') {
AstNode* item=block_item();
if (item) {
add_child(block_root, item);
}
}
match('}');
return block_root;
@ -99,7 +561,7 @@ static AstNode* func() {
}
AstNode* parse(Token* prg) {
lahead=*(prg);
lahead=prg;
return func();
}

View File

@ -32,7 +32,7 @@ TokenVal* val_from_int(int val) {
TokenVal* val_from_const_str(const char* val) {
TokenVal* tval=malloc(sizeof(TokenVal));
tval->type=strval;
tval->strval=val;
tval->strval=(char*)val;
tval->constflag=true;
return tval;
}

View File

@ -31,7 +31,24 @@ typedef struct {
#define TYPE_EOF 130
#define TYPE_RETURN 131
#define TYPE_TYPE 132
#define TYPE_LAND 133
#define TYPE_LOR 134
#define TYPE_EQ 135
#define TYPE_NE 136
#define TYPE_LE 137
#define TYPE_GE 138
#define TYPE_SL 139
#define TYPE_SR 140
#define TYPE_COMPSET 141
#define TYPE_INC 142
#define TYPE_DEC 143
#define TYPE_IF 144
#define TYPE_ELSE 145
#define TYPE_FOR 146
#define TYPE_WHILE 147
#define TYPE_DO 148
#define TYPE_BREAK 149
#define TYPE_CONTINUE 150
struct _token {
unsigned char type;
TokenVal* val;

View File

@ -14,8 +14,6 @@
#include <ctype.h>
#include <string.h>
#define ID_MAX_SIZE 31
Token* next_token(int* strpos, char* prg, Token* prev) {
char current=prg[*strpos];
if (isalpha(current) || current=='_') {
@ -41,10 +39,24 @@ Token* next_token(int* strpos, char* prg, Token* prev) {
return new_token(TYPE_RETURN, NULL, prev);
} else if (strcmp("int",id)==0) {
return new_token(TYPE_TYPE, val_from_const_str("int"), prev);
} else if (strcmp("if",id)==0) {
return new_token(TYPE_IF, NULL, prev);
} else if (strcmp("else",id)==0) {
return new_token(TYPE_ELSE, NULL, prev);
} else if (strcmp("for",id)==0) {
return new_token(TYPE_FOR, NULL, prev);
} else if (strcmp("while",id)==0) {
return new_token(TYPE_WHILE, NULL, prev);
} else if (strcmp("do",id)==0) {
return new_token(TYPE_DO, NULL, prev);
} else if (strcmp("break",id)==0) {
return new_token(TYPE_BREAK, NULL, prev);
} else if (strcmp("ccontinue",id)==0) {
return new_token(TYPE_CONTINUE, NULL, prev);
}
return new_token(TYPE_IDENT, val_from_str(id), prev);
} else if (isdigit(current)) {
char* num=malloc(sizeof(char)*ID_MAX_SIZE+1);
char* num=malloc(sizeof(char)*NUM_MAX_SIZE+1);
int length=1;
num[0]=current;
(*strpos)++;
@ -57,7 +69,7 @@ Token* next_token(int* strpos, char* prg, Token* prev) {
} else {
break;
}
if (length==ID_MAX_SIZE) {
if (length==NUM_MAX_SIZE) {
break;
}
}
@ -71,11 +83,86 @@ Token* next_token(int* strpos, char* prg, Token* prev) {
current=prg[*strpos];
}
return next_token(strpos, prg, prev);
} else if (current=='{' || current=='}' || current=='(' || current==')' || current==';' ) {
} else if (current=='&') {
(*strpos)++;
current=prg[*strpos];
if (current=='&') {
(*strpos)++;
return new_token(TYPE_LAND, NULL, prev);
}
return new_token('&', NULL, prev);
} else if (current=='|') {
(*strpos)++;
current=prg[*strpos];
if (current=='|') {
(*strpos)++;
return new_token(TYPE_LOR, NULL, prev);
}
return new_token('|', NULL, prev);
} else if (current=='=') {
(*strpos)++;
current=prg[*strpos];
if (current=='=') {
(*strpos)++;
return new_token(TYPE_EQ, NULL, prev);
}
return new_token('=', NULL, prev);
} else if (current=='!') {
(*strpos)++;
current=prg[*strpos];
if (current=='=') {
(*strpos)++;
return new_token(TYPE_NE, NULL, prev);
}
return new_token('!', NULL, prev);
} else if (current=='<') {
(*strpos)++;
current=prg[*strpos];
if (current=='=') {
(*strpos)++;
return new_token(TYPE_LE, NULL, prev);
} else if (current=='<') {
(*strpos)++;
return new_token(TYPE_SL, NULL, prev);
}
return new_token('<', NULL, prev);
} else if (current=='>') {
(*strpos)++;
current=prg[*strpos];
if (current=='=') {
(*strpos)++;
return new_token(TYPE_GE, NULL, prev);
} else if (current=='>') {
(*strpos)++;
return new_token(TYPE_SR, NULL, prev);
}
return new_token('>', NULL, prev);
} else if (current==0) {
return new_token(TYPE_EOF, NULL, prev);
} else {
if (current=='+' || current=='-' || current=='/' || current=='*' || current=='%' || current=='&' || current=='|' || current=='^') {
(*strpos)++;
char* str=malloc(sizeof(char));
*str=current;
current=prg[*strpos];
if (current=='=') {
(*strpos)++;
return new_token(TYPE_COMPSET, val_from_str(str), prev);
} else {
if (*str=='+' && current=='+') {
(*strpos)++;
return new_token(TYPE_INC, NULL, prev);
}
if (*str=='-' && current=='-') {
(*strpos)++;
return new_token(TYPE_DEC, NULL, prev);
}
return new_token(*str, NULL, prev);
}
}
(*strpos)++;
return new_token(current, NULL, prev);
}
return new_token(TYPE_EOF, NULL, prev);
}
Token* tokenize(char* prg) {

View File

@ -11,6 +11,9 @@
#include "token.h"
#define ID_MAX_SIZE 31
#define NUM_MAX_SIZE 31
Token* tokenize(char* prg);
void free_toklist(Token* tokens);