cinc/tokenize.c

204 lines
6.0 KiB
C
Raw Normal View History

2018-09-09 12:15:27 -05:00
//
// tokenize.c
// cinc
//
// Created by Peter Terpstra on 9/7/18.
// Copyright © 2018 Peter Terpstra. All rights reserved.
//
#include "tokenize.h"
#include "token.h"
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <ctype.h>
2018-09-09 12:31:39 -05:00
#include <string.h>
2018-09-09 12:15:27 -05:00
Token* next_token(int* strpos, char* prg, Token* prev) {
char current=prg[*strpos];
if (isalpha(current) || current=='_') {
char* id=malloc(sizeof(char)*ID_MAX_SIZE+1);
int length=1;
id[0]=current;
(*strpos)++;
while (true) {
current=prg[*strpos];
if (isalnum(current) || current=='_') {
id[length]=current;
length++;
(*strpos)++;
} else {
break;
}
if (length==ID_MAX_SIZE) {
break;
}
}
id[length]=0;
2018-09-09 12:31:39 -05:00
if (strcmp("return",id)==0) {
return new_token(TYPE_RETURN, NULL, prev);
} else if (strcmp("int",id)==0) {
return new_token(TYPE_TYPE, val_from_const_str("int"), prev);
} else if (strcmp("if",id)==0) {
return new_token(TYPE_IF, NULL, prev);
} else if (strcmp("else",id)==0) {
return new_token(TYPE_ELSE, NULL, prev);
} else if (strcmp("for",id)==0) {
return new_token(TYPE_FOR, NULL, prev);
} else if (strcmp("while",id)==0) {
return new_token(TYPE_WHILE, NULL, prev);
} else if (strcmp("do",id)==0) {
return new_token(TYPE_DO, NULL, prev);
} else if (strcmp("break",id)==0) {
return new_token(TYPE_BREAK, NULL, prev);
} else if (strcmp("ccontinue",id)==0) {
return new_token(TYPE_CONTINUE, NULL, prev);
2018-09-09 12:31:39 -05:00
}
2018-09-09 12:15:27 -05:00
return new_token(TYPE_IDENT, val_from_str(id), prev);
} else if (isdigit(current)) {
char* num=malloc(sizeof(char)*NUM_MAX_SIZE+1);
2018-09-09 12:15:27 -05:00
int length=1;
2018-09-09 17:31:02 -05:00
num[0]=current;
2018-09-09 12:15:27 -05:00
(*strpos)++;
while (true) {
current=prg[*strpos];
if (isdigit(current)) {
2018-09-09 17:31:02 -05:00
num[length]=current;
2018-09-09 12:15:27 -05:00
length++;
(*strpos)++;
} else {
break;
}
if (length==NUM_MAX_SIZE) {
2018-09-09 12:15:27 -05:00
break;
}
}
2018-09-09 17:31:02 -05:00
num[length]=0;
return new_token(TYPE_NUM, val_from_str(num), prev);
2018-09-09 12:15:27 -05:00
} else if (isblank(current) || current=='\n') {
(*strpos)++;
current=prg[*strpos];
while (isblank(current)) {
(*strpos)++;
current=prg[*strpos];
}
return next_token(strpos, prg, prev);
} else if (current=='&') {
(*strpos)++;
current=prg[*strpos];
if (current=='&') {
(*strpos)++;
return new_token(TYPE_LAND, NULL, prev);
}
return new_token('&', NULL, prev);
} else if (current=='|') {
(*strpos)++;
current=prg[*strpos];
if (current=='|') {
(*strpos)++;
return new_token(TYPE_LOR, NULL, prev);
}
return new_token('|', NULL, prev);
} else if (current=='=') {
(*strpos)++;
current=prg[*strpos];
if (current=='=') {
(*strpos)++;
return new_token(TYPE_EQ, NULL, prev);
}
return new_token('=', NULL, prev);
} else if (current=='!') {
(*strpos)++;
current=prg[*strpos];
if (current=='=') {
(*strpos)++;
return new_token(TYPE_NE, NULL, prev);
}
return new_token('!', NULL, prev);
} else if (current=='<') {
(*strpos)++;
current=prg[*strpos];
if (current=='=') {
(*strpos)++;
return new_token(TYPE_LE, NULL, prev);
} else if (current=='<') {
(*strpos)++;
return new_token(TYPE_SL, NULL, prev);
}
return new_token('<', NULL, prev);
} else if (current=='>') {
(*strpos)++;
current=prg[*strpos];
if (current=='=') {
(*strpos)++;
return new_token(TYPE_GE, NULL, prev);
} else if (current=='>') {
(*strpos)++;
return new_token(TYPE_SR, NULL, prev);
}
return new_token('>', NULL, prev);
} else if (current==0) {
return new_token(TYPE_EOF, NULL, prev);
} else {
if (current=='+' || current=='-' || current=='/' || current=='*' || current=='%' || current=='&' || current=='|' || current=='^') {
(*strpos)++;
char* str=malloc(sizeof(char));
*str=current;
current=prg[*strpos];
if (current=='=') {
(*strpos)++;
return new_token(TYPE_COMPSET, val_from_str(str), prev);
} else {
if (*str=='+' && current=='+') {
(*strpos)++;
return new_token(TYPE_INC, NULL, prev);
}
if (*str=='-' && current=='-') {
(*strpos)++;
return new_token(TYPE_DEC, NULL, prev);
}
return new_token(*str, NULL, prev);
}
}
2018-09-09 12:15:27 -05:00
(*strpos)++;
return new_token(current, NULL, prev);
}
}
Token* tokenize(char* prg) {
int strpos=0;
Token* first=NULL;
Token* tok=NULL;
while (true) {
tok=next_token(&strpos, prg, tok);
if (!first) {
first=tok;
}
if (tok->type==TYPE_EOF) {
break;
}
}
return first;
}
void free_toklist(Token* tokens) {
while (tokens) {
unsigned char type=tokens->type;
if (tokens->val) {
TokenVal val=*(tokens->val);
if (!(val.constflag)) {
if (val.type==strval) {
free(val.strval);
}
}
}
free(tokens->val);
Token* next=tokens->next;
free(tokens);
tokens=next;
if (type==TYPE_EOF) {
break;
}
}
}