clox/compiler.c

805 lines
19 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "common.h"
#include "compiler.h"
#include "scanner.h"
#include "object.h"
#include "memory.h"
#ifdef DEBUG_PRINT_CODE
#include "debug.h"
#endif
#define MAX_LOCALS 0xFFFF // 2^16
typedef struct {
Token current;
Token previous;
bool hadError;
bool panicMode;
} Parser;
typedef enum {
PREC_NONE, PREC_ASSIGNMENT, // =
PREC_OR, // or
PREC_AND, // and
PREC_EQUALITY, // == !=
PREC_COMPARISON, // < > <= >=
PREC_TERM, // + -
PREC_FACTOR, // * /
PREC_UNARY, // ! -
PREC_CALL, // . () []
PREC_PRIMARY
} Precedence;
typedef void (*ParseFn)(bool canAssign);
typedef struct {
ParseFn prefix;
ParseFn infix;
Precedence precedence;
} ParseRule;
typedef struct {
Token name;
int depth;
} Local;
typedef struct Compiler {
Local* locals; //TODO: Make this a dynamic array
int localCount;
int localCapacity;
int scopeDepth;
} Compiler;
Parser parser;
Compiler* current = NULL;
Chunk* compilingChunk;
static Chunk* currentChunk() {
return compilingChunk;
}
static void errorAt(Token* token, const char* message) {
if (parser.panicMode) return;
parser.panicMode = true;
fprintf(stderr, "[line %d] Error", token->line);
if (token->type == TOKEN_EOF) {
fprintf(stderr, " at end");
} else if (token->type == TOKEN_ERROR) {
// Nothing.
} else {
fprintf(stderr, " at '%.*s'", token->length, token->start);
}
fprintf(stderr, ": %s\n", message);
parser.hadError = true;
}
static void errorAtCurrent(const char* message) {
errorAt(&parser.current, message);
}
static void error(const char* message) { // @suppress("Unused static function")
errorAt(&parser.previous, message);
}
static void advance() {
parser.previous = parser.current;
for (;;) {
parser.current = scanToken();
if (parser.current.type != TOKEN_ERROR) break;
errorAtCurrent(parser.current.start);
}
}
static void consume(TokenType type, const char* message) {
if (parser.current.type == type) {
advance();
return;
}
errorAtCurrent(message);
}
static bool check(TokenType type) {
return parser.current.type == type;
}
static bool match(TokenType type) {
if (!check(type)) return false;
advance();
return true;
}
static void emitByte(uint8_t byte) {
writeChunk(currentChunk(), byte, parser.previous.line);
}
static void emitBytes(uint8_t byte1, uint8_t byte2) { // @suppress("Unused static function")
emitByte(byte1);
emitByte(byte2);
}
static void emitLoop(int loopStart) {
emitByte(OP_LOOP);
int offset = currentChunk()->count - loopStart + 2;
if (offset > UINT16_MAX) error("Loop body too large.");
emitByte(offset & 0xff);
emitByte((offset >> 8) & 0xff);
}
static int emitJump(uint8_t instruction) {
emitByte(instruction);
emitByte(0xff);
emitByte(0xff);
return currentChunk()->count - 2;
}
static void emitReturn() {
emitByte(OP_RETURN);
}
static void emitConstant(Value value) {
writeConstant(currentChunk(), value, parser.previous.line);
}
static void patchJump(int offset) {
// -2 to adjust for the bytecode for the jump offset itself.
int jump = currentChunk()->count - offset - 2;
if (jump > UINT16_MAX) {
error("Too much code to jump over.");
}
currentChunk()->code[offset + 1] = (jump >> 8) & 0xff;
currentChunk()->code[offset] = jump & 0xff;
}
static void initCompiler(Compiler* compiler) {
compiler->locals = NULL;
compiler->localCount = 0;
compiler->localCapacity = 0;
compiler->scopeDepth = 0;
current = compiler;
}
static void endCompiler() {
emitReturn();
#ifdef DEBUG_PRINT_CODE
if (!parser.hadError) {
disassembleChunk(currentChunk(), "code");
}
#endif
}
static void beginScope() {
current->scopeDepth++;
}
static void endScope() {
current->scopeDepth--;
while (current->localCount > 0 && current->locals[current->localCount - 1].depth > current->scopeDepth) {
emitByte(OP_POP);
current->localCount--;
}
}
static void expression();
static void statement();
static void declaration();
static ParseRule* getRule(TokenType type);
static void parsePrecedence(Precedence precedence);
static void binary(bool canAssign) {
// Remember the operator.
TokenType operatorType = parser.previous.type;
// Compile the right operand.
ParseRule* rule = getRule(operatorType);
parsePrecedence((Precedence) (rule->precedence + 1));
// Emit the operator instruction.
switch (operatorType) {
case TOKEN_BANG_EQUAL:
emitBytes(OP_EQUAL, OP_NOT);
break;
case TOKEN_EQUAL_EQUAL:
emitByte(OP_EQUAL);
break;
case TOKEN_GREATER:
emitByte(OP_GREATER);
break;
case TOKEN_GREATER_EQUAL:
emitBytes(OP_LESS, OP_NOT);
break;
case TOKEN_LESS:
emitByte(OP_LESS);
break;
case TOKEN_LESS_EQUAL:
emitBytes(OP_GREATER, OP_NOT);
break;
case TOKEN_PLUS:
emitByte(OP_ADD);
break;
case TOKEN_MINUS:
emitByte(OP_SUBTRACT);
break;
case TOKEN_STAR:
emitByte(OP_MULTIPLY);
break;
case TOKEN_SLASH:
emitByte(OP_DIVIDE);
break;
default:
return; // Unreachable.
}
}
static void literal(bool canAssign) {
switch (parser.previous.type) {
case TOKEN_FALSE:
emitByte(OP_FALSE);
break;
case TOKEN_NIL:
emitByte(OP_NIL);
break;
case TOKEN_TRUE:
emitByte(OP_TRUE);
break;
default:
return; // Unreachable.
}
}
static uint32_t makeConstant(Value value) {
int constant = addConstant(currentChunk(), value);
if (constant > 0xFFFFFF) {
error("Too many constants in one chunk.");
return 0;
}
return (uint32_t) constant;
}
static void grouping(bool canAssign) {
expression();
consume(TOKEN_RIGHT_PAREN, "Expect ')' after expression.");
}
static void number(bool canAssign) {
double value = strtod(parser.previous.start, NULL);
emitConstant(NUMBER_VAL(value));
}
static void or_(bool canAssign) {
int elseJump = emitJump(OP_JUMP_IF_FALSE);
int endJump = emitJump(OP_JUMP);
patchJump(elseJump);
emitByte(OP_POP);
parsePrecedence(PREC_OR);
patchJump(endJump);
}
static void string(bool canAssign) {
emitConstant(OBJ_VAL(copyString(parser.previous.start + 1, parser.previous.length - 2)));
}
static void unary(bool canAssign) {
TokenType operatorType = parser.previous.type;
// Compile the operand.
parsePrecedence(PREC_UNARY);
// Emit the operator instruction.
switch (operatorType) {
case TOKEN_BANG:
emitByte(OP_NOT);
break;
case TOKEN_MINUS:
emitByte(OP_NEGATE);
break;
default:
return; // Unreachable.
}
}
static uint32_t identifierConstant(Token* name) {
return makeConstant(OBJ_VAL(copyString(name->start, name->length)));
}
static bool identifiersEqual(Token* a, Token* b) {
if (a->length != b->length) return false;
return memcmp(a->start, b->start, a->length) == 0;
}
static int resolveLocal(Compiler* compiler, Token* name) {
for (int i = compiler->localCount - 1; i >= 0; i--) {
Local* local = &compiler->locals[i];
if (identifiersEqual(name, &local->name)) {
if (local->depth == -1) {
error("Cannot read local variable in its own initializer.");
}
return i;
}
}
return -1;
}
static void addLocal(Token name) {
if (current->localCount == MAX_LOCALS) {
error("Too many local variables in function.");
return;
}
if (current->localCount==current->localCapacity) {
int oldCapacity=current->localCapacity;
current->localCapacity=GROW_CAPACITY(current->localCapacity);
current->locals=GROW_ARRAY(current->locals, Local, oldCapacity, current->localCapacity);
}
Local* local = &current->locals[current->localCount++];
local->name = name;
local->depth = current->scopeDepth;
local->depth = -1;
}
static void declareVariable() {
// Global variables are implicitly declared.
if (current->scopeDepth == 0) return;
Token* name = &parser.previous;
for (int i = current->localCount - 1; i >= 0; i--) {
Local* local = &current->locals[i];
if (local->depth != -1 && local->depth < current->scopeDepth) break;
if (identifiersEqual(name, &local->name)) {
error("Variable with this name already declared in this scope.");
}
}
addLocal(*name);
}
static void compiler_index(bool canAssign) {
expression();
emitByte(OP_INDEX);
if (check(TOKEN_COMMA)) {
errorAtCurrent("Cannot get multiple values at once");
}
consume(TOKEN_RIGHT_BRACKET, "Unterminated index");
}
static void namedVariable(Token name, bool canAssign) {
uint8_t getOp, getLongOp, setOp, setLongOp;
bool indexed = false;
int arg = resolveLocal(current, &name);
if (arg != -1) {
getOp = OP_GET_LOCAL;
getLongOp = OP_GET_LOCAL_LONG;
setOp = OP_SET_LOCAL;
setLongOp = OP_SET_LOCAL_LONG;
} else {
arg = identifierConstant(&name);
getOp = OP_GET_GLOBAL;
getLongOp = OP_GET_GLOBAL_LONG;
setOp = OP_SET_GLOBAL;
setLongOp = OP_SET_GLOBAL_LONG;
}
if (canAssign && match(TOKEN_LEFT_BRACKET)) {
expression();
consume(TOKEN_RIGHT_BRACKET, "Unterminated index");
indexed = true;
}
if (canAssign && match(TOKEN_EQUAL)) {
expression();
if (indexed) {
if (arg < 256) {
emitBytes(getOp, arg);
} else {
emitByte(getLongOp);
emitByte(arg & 0xFF0000 >> 16);
emitByte(arg & 0xFF00 >> 8);
emitByte(arg & 0xFF);
}
emitByte(OP_SET_INDEX);
} else if (arg < 256) {
emitBytes(setOp, arg);
} else {
emitByte(setLongOp);
emitByte(arg & 0xFF0000 >> 16);
emitByte(arg & 0xFF00 >> 8);
emitByte(arg & 0xFF);
}
} else {
if (arg < 256) {
emitBytes(getOp, arg);
} else {
emitByte(getLongOp);
emitByte(arg & 0xFF0000 >> 16);
emitByte(arg & 0xFF00 >> 8);
emitByte(arg & 0xFF);
}
if (indexed) {
emitByte(OP_INDEX_FLIPPED);
}
}
}
static void variable(bool canAssign) {
namedVariable(parser.previous, canAssign);
}
static void and_(bool canAssign) {
int endJump = emitJump(OP_JUMP_IF_FALSE);
emitByte(OP_POP);
parsePrecedence(PREC_AND);
patchJump(endJump);
}
static void array(bool canAssign) {
expression();
if (check(TOKEN_COMMA)) {
int numValues = 0;
while (!check(TOKEN_RIGHT_BRACKET) && !check(TOKEN_EOF)) {
consume(TOKEN_COMMA, "Commas must follow every value in an array except the last");
expression();
numValues++;
if (numValues == 256) {
fprintf(stderr, "[line %d] Error: Cannot have more than 256 values in an array literal", parser.current.line);
}
}
emitByte(OP_ARRAY);
emitByte(numValues + 1);
} else {
emitByte(OP_ARRAY);
emitByte(1);
}
consume(TOKEN_RIGHT_BRACKET, "Unterminated array");
}
static void hash(bool canAssign) {
expression();
if (check(TOKEN_ROCKET)) {
int numValues = 0;
while (!check(TOKEN_EOF)) {
consume(TOKEN_ROCKET, "=> must follow every key in a hash");
expression();
if (check(TOKEN_RIGHT_BRACE)) {
break;
}
consume(TOKEN_COMMA, "Commas must follow every key-value pair in a hash except the last");
expression();
numValues++;
if (numValues == 256) {
fprintf(stderr, "[line %d] Error: Cannot have more than 256 key-value pairs in a hash literal", parser.current.line);
}
}
emitByte(OP_HASH);
emitByte(numValues + 1);
} else {
consume(TOKEN_ROCKET, "=> must follow every key in a hash");
expression();
emitByte(OP_HASH);
emitByte(1);
}
consume(TOKEN_RIGHT_BRACE, "Unterminated hash");
}
ParseRule rules[] = { { grouping, NULL, PREC_CALL }, // TOKEN_LEFT_PAREN
{ NULL, NULL, PREC_NONE }, // TOKEN_RIGHT_PAREN
{ hash, NULL, PREC_CALL }, // TOKEN_LEFT_BRACE
{ NULL, NULL, PREC_NONE }, // TOKEN_RIGHT_BRACE
{ array, compiler_index, PREC_CALL }, // TOKEN_LEFT_BRACKET
{ NULL, NULL, PREC_NONE }, // TOKEN_RIGHT_BRACKET
{ NULL, NULL, PREC_NONE }, // TOKEN_COMMA
{ NULL, NULL, PREC_CALL }, // TOKEN_DOT
{ unary, binary, PREC_TERM }, // TOKEN_MINUS
{ NULL, binary, PREC_TERM }, // TOKEN_PLUS
{ NULL, NULL, PREC_NONE }, // TOKEN_SEMICOLON
{ NULL, binary, PREC_FACTOR }, // TOKEN_SLASH
{ NULL, binary, PREC_FACTOR }, // TOKEN_STAR
{ unary, NULL, PREC_NONE }, // TOKEN_BANG
{ NULL, binary, PREC_EQUALITY }, // TOKEN_BANG_EQUAL
{ NULL, NULL, PREC_NONE }, // TOKEN_EQUAL
{ NULL, binary, PREC_EQUALITY }, // TOKEN_EQUAL_EQUAL
{ NULL, binary, PREC_COMPARISON }, // TOKEN_GREATER
{ NULL, binary, PREC_COMPARISON }, // TOKEN_GREATER_EQUAL
{ NULL, binary, PREC_COMPARISON }, // TOKEN_LESS
{ NULL, binary, PREC_COMPARISON }, // TOKEN_LESS_EQUAL
{ NULL, NULL, PREC_NONE }, // TOKEN_ROCKET
{ variable, NULL, PREC_NONE }, // TOKEN_IDENTIFIER
{ string, NULL, PREC_NONE }, // TOKEN_STRING
{ number, NULL, PREC_NONE }, // TOKEN_NUMBER
{ NULL, and_, PREC_AND }, // TOKEN_AND
{ NULL, NULL, PREC_NONE }, // TOKEN_CLASS
{ NULL, NULL, PREC_NONE }, // TOKEN_ELSE
{ literal, NULL, PREC_NONE }, // TOKEN_FALSE
{ NULL, NULL, PREC_NONE }, // TOKEN_FOR
{ NULL, NULL, PREC_NONE }, // TOKEN_FUN
{ NULL, NULL, PREC_NONE }, // TOKEN_IF
{ literal, NULL, PREC_NONE }, // TOKEN_TRUE
{ NULL, or_, PREC_OR }, // TOKEN_OR
{ NULL, NULL, PREC_NONE }, // TOKEN_PRINT
{ NULL, NULL, PREC_NONE }, // TOKEN_RETURN
{ NULL, NULL, PREC_NONE }, // TOKEN_SUPER
{ NULL, NULL, PREC_NONE }, // TOKEN_THIS
{ literal, NULL, PREC_NONE }, // TOKEN_TRUE
{ NULL, NULL, PREC_NONE }, // TOKEN_VAR
{ NULL, NULL, PREC_NONE }, // TOKEN_WHILE
{ NULL, NULL, PREC_NONE }, // TOKEN_ERROR
{ NULL, NULL, PREC_NONE }, // TOKEN_EOF
};
static void parsePrecedence(Precedence precedence) {
advance();
ParseFn prefixRule = getRule(parser.previous.type)->prefix;
if (prefixRule == NULL) {
error("Expect expression.");
return;
}
bool canAssign = precedence <= PREC_ASSIGNMENT;
prefixRule(canAssign);
while (precedence <= getRule(parser.current.type)->precedence) {
advance();
ParseFn infixRule = getRule(parser.previous.type)->infix;
infixRule(canAssign);
}
if (canAssign && match(TOKEN_EQUAL)) {
error("Invalid assignment target.");
expression();
}
}
static uint32_t parseVariable(const char* errorMessage) {
consume(TOKEN_IDENTIFIER, errorMessage);
declareVariable();
if (current->scopeDepth > 0) return 0;
return identifierConstant(&parser.previous);
}
static void markInitialized() {
if (current->scopeDepth == 0) return;
current->locals[current->localCount - 1].depth = current->scopeDepth;
}
static void defineVariable(uint32_t global) {
if (current->scopeDepth > 0) {
markInitialized();
return;
}
if (global < 256) {
emitBytes(OP_DEFINE_GLOBAL, global);
} else {
emitByte(OP_DEFINE_GLOBAL_LONG);
emitByte(global & 0xFF0000 >> 16);
emitByte(global & 0xFF00 >> 8);
emitByte(global & 0xFF);
}
}
static ParseRule* getRule(TokenType type) {
return &rules[type];
}
void expression() {
parsePrecedence(PREC_ASSIGNMENT);
}
static void block() {
while (!check(TOKEN_RIGHT_BRACE) && !check(TOKEN_EOF)) {
declaration();
}
consume(TOKEN_RIGHT_BRACE, "Expect '}' after block.");
}
static void expressionStatement() {
expression();
consume(TOKEN_SEMICOLON, "Expect ';' after expression.");
emitByte(OP_POP);
}
static void varDeclaration();
static void forStatement() {
beginScope();
consume(TOKEN_LEFT_PAREN, "Expect '(' after 'for'.");
if (match(TOKEN_VAR)) {
varDeclaration();
} else if (match(TOKEN_SEMICOLON)) {
} else {
expressionStatement();
}
int loopStart = currentChunk()->count;
int exitJump = -1;
if (!match(TOKEN_SEMICOLON)) {
expression();
consume(TOKEN_SEMICOLON, "Expect ';' after loop condition.");
// Jump out of the loop if the condition is false
exitJump = emitJump(OP_JUMP_IF_FALSE);
emitByte(OP_POP); // Condition.
}
if (!match(TOKEN_RIGHT_PAREN)) {
int bodyJump = emitJump(OP_JUMP);
int incrementStart = currentChunk()->count;
expression();
emitByte(OP_POP);
consume(TOKEN_RIGHT_PAREN, "Expect ')' after for clauses.");
emitLoop(loopStart);
loopStart = incrementStart;
patchJump(bodyJump);
}
statement();
emitLoop(loopStart);
if (exitJump != -1) {
patchJump(exitJump);
emitByte(OP_POP);
}
endScope();
}
static void ifStatement() {
consume(TOKEN_LEFT_PAREN, "Expect '(' after 'if'.");
expression();
consume(TOKEN_RIGHT_PAREN, "Expect ')' after condition.");
int thenJump = emitJump(OP_JUMP_IF_FALSE);
emitByte(OP_POP);
statement();
int elseJump = emitJump(OP_JUMP);
patchJump(thenJump);
emitByte(OP_POP);
if (match(TOKEN_ELSE)) statement();
patchJump(elseJump);
}
static void printStatement() {
expression();
consume(TOKEN_SEMICOLON, "Expect ';' after value.");
emitByte(OP_PRINT);
}
static void whileStatement() {
int loopStart = currentChunk()->count;
consume(TOKEN_LEFT_PAREN, "Expect '(' after 'while'.");
expression();
consume(TOKEN_RIGHT_PAREN, "Expect ')' after condition.");
int exitJump = emitJump(OP_JUMP_IF_FALSE);
emitByte(OP_POP);
statement();
emitLoop(loopStart);
patchJump(exitJump);
emitByte(OP_POP);
}
static void synchronize() {
parser.panicMode = false;
while (parser.current.type != TOKEN_EOF) {
if (parser.previous.type == TOKEN_SEMICOLON) return;
switch (parser.current.type) {
case TOKEN_CLASS:
case TOKEN_FUN:
case TOKEN_VAR:
case TOKEN_FOR:
case TOKEN_IF:
case TOKEN_WHILE:
case TOKEN_PRINT:
case TOKEN_RETURN:
return;
default:
// Do nothing.
;
}
advance();
}
}
static void statement() {
if (match(TOKEN_PRINT)) {
printStatement();
} else if (match(TOKEN_FOR)) {
forStatement();
} else if (match(TOKEN_IF)) {
ifStatement();
} else if (match(TOKEN_WHILE)) {
whileStatement();
} else if (match(TOKEN_LEFT_BRACE)) {
beginScope();
block();
endScope();
} else {
expressionStatement();
}
}
static void varDeclaration() {
uint32_t global = parseVariable("Expect variable name.");
if (match(TOKEN_EQUAL)) {
expression();
} else {
emitByte(OP_NIL);
}
consume(TOKEN_SEMICOLON, "Expect ';' after variable declaration.");
defineVariable(global);
}
static void declaration() {
if (match(TOKEN_VAR)) {
varDeclaration();
} else {
statement();
}
if (parser.panicMode) synchronize();
}
bool compile(const char* source, Chunk* chunk, bool repl) {
initScanner(source);
Compiler* compiler = malloc(sizeof(Compiler));
initCompiler(compiler);
compilingChunk = chunk;
parser.hadError = false;
parser.panicMode = false;
advance();
if (repl && !scannerHasSemicolons()) {
expression();
emitByte(OP_PRINT);
} else {
while (!match(TOKEN_EOF)) {
declaration();
}
}
endCompiler();
free(compiler);
return !parser.hadError;
}