#include #include #include #include "common.h" #include "compiler.h" #include "scanner.h" #include "object.h" #include "memory.h" #ifdef DEBUG_PRINT_CODE #include "debug.h" #endif #define MAX_LOCALS 0xFFFF // 2^16 typedef struct { Token current; Token previous; bool hadError; bool panicMode; } Parser; typedef enum { PREC_NONE, PREC_ASSIGNMENT, // = PREC_OR, // or PREC_AND, // and PREC_EQUALITY, // == != PREC_COMPARISON, // < > <= >= PREC_TERM, // + - PREC_FACTOR, // * / PREC_UNARY, // ! - PREC_CALL, // . () [] PREC_PRIMARY } Precedence; typedef void (*ParseFn)(bool canAssign); typedef struct { ParseFn prefix; ParseFn infix; Precedence precedence; } ParseRule; typedef struct { Token name; int depth; } Local; typedef struct Compiler { Local* locals; int localCount; int localCapacity; int scopeDepth; } Compiler; Parser parser; Compiler* current = NULL; Chunk* compilingChunk; static Chunk* currentChunk() { return compilingChunk; } static void errorAt(Token* token, const char* message) { if (parser.panicMode) return; parser.panicMode = true; fprintf(stderr, "[line %d] Error", token->line); if (token->type == TOKEN_EOF) { fprintf(stderr, " at end"); } else if (token->type == TOKEN_ERROR) { // Nothing. } else { fprintf(stderr, " at '%.*s'", token->length, token->start); } fprintf(stderr, ": %s\n", message); parser.hadError = true; } static void errorAtCurrent(const char* message) { errorAt(&parser.current, message); } static void error(const char* message) { // @suppress("Unused static function") errorAt(&parser.previous, message); } static void advance() { parser.previous = parser.current; for (;;) { parser.current = scanToken(); if (parser.current.type != TOKEN_ERROR) break; errorAtCurrent(parser.current.start); } } static void consume(TokenType type, const char* message) { if (parser.current.type == type) { advance(); return; } errorAtCurrent(message); } static bool check(TokenType type) { return parser.current.type == type; } static bool match(TokenType type) { if (!check(type)) return false; advance(); return true; } static void emitByte(uint8_t byte) { writeChunk(currentChunk(), byte, parser.previous.line); } static void emitBytes(uint8_t byte1, uint8_t byte2) { // @suppress("Unused static function") emitByte(byte1); emitByte(byte2); } static void emitLoop(int loopStart) { emitByte(OP_LOOP); int offset = currentChunk()->count - loopStart + 2; if (offset > UINT16_MAX) error("Loop body too large."); emitByte(offset & 0xff); emitByte((offset >> 8) & 0xff); } static int emitJump(uint8_t instruction) { emitByte(instruction); emitByte(0xff); emitByte(0xff); return currentChunk()->count - 2; } static void emitReturn() { emitByte(OP_RETURN); } static void emitConstant(Value value) { writeConstant(currentChunk(), value, parser.previous.line); } static void patchJump(int offset) { // -2 to adjust for the bytecode for the jump offset itself. int jump = currentChunk()->count - offset - 2; if (jump > UINT16_MAX) { error("Too much code to jump over."); } currentChunk()->code[offset + 1] = (jump >> 8) & 0xff; currentChunk()->code[offset] = jump & 0xff; } static void initCompiler(Compiler* compiler) { compiler->locals = NULL; compiler->localCount = 0; compiler->localCapacity = 0; compiler->scopeDepth = 0; current = compiler; } static void endCompiler() { emitReturn(); #ifdef DEBUG_PRINT_CODE if (!parser.hadError) { disassembleChunk(currentChunk(), "code"); } #endif } static void beginScope() { current->scopeDepth++; } static void endScope() { current->scopeDepth--; while (current->localCount > 0 && current->locals[current->localCount - 1].depth > current->scopeDepth) { emitByte(OP_POP); current->localCount--; } } static void expression(); static void statement(); static void declaration(); static ParseRule* getRule(TokenType type); static void parsePrecedence(Precedence precedence); static void binary(bool canAssign) { // Remember the operator. TokenType operatorType = parser.previous.type; // Compile the right operand. ParseRule* rule = getRule(operatorType); parsePrecedence((Precedence) (rule->precedence + 1)); // Emit the operator instruction. switch (operatorType) { case TOKEN_BANG_EQUAL: emitBytes(OP_EQUAL, OP_NOT); break; case TOKEN_EQUAL_EQUAL: emitByte(OP_EQUAL); break; case TOKEN_GREATER: emitByte(OP_GREATER); break; case TOKEN_GREATER_EQUAL: emitBytes(OP_LESS, OP_NOT); break; case TOKEN_LESS: emitByte(OP_LESS); break; case TOKEN_LESS_EQUAL: emitBytes(OP_GREATER, OP_NOT); break; case TOKEN_PLUS: emitByte(OP_ADD); break; case TOKEN_MINUS: emitByte(OP_SUBTRACT); break; case TOKEN_STAR: emitByte(OP_MULTIPLY); break; case TOKEN_SLASH: emitByte(OP_DIVIDE); break; default: return; // Unreachable. } } static void literal(bool canAssign) { switch (parser.previous.type) { case TOKEN_FALSE: emitByte(OP_FALSE); break; case TOKEN_NIL: emitByte(OP_NIL); break; case TOKEN_TRUE: emitByte(OP_TRUE); break; default: return; // Unreachable. } } static uint32_t makeConstant(Value value) { int constant = addConstant(currentChunk(), value); if (constant > 0xFFFFFF) { error("Too many constants in one chunk."); return 0; } return (uint32_t) constant; } static void grouping(bool canAssign) { expression(); consume(TOKEN_RIGHT_PAREN, "Expect ')' after expression."); } static void number(bool canAssign) { double value = strtod(parser.previous.start, NULL); emitConstant(NUMBER_VAL(value)); } static void or_(bool canAssign) { int elseJump = emitJump(OP_JUMP_IF_FALSE); int endJump = emitJump(OP_JUMP); patchJump(elseJump); emitByte(OP_POP); parsePrecedence(PREC_OR); patchJump(endJump); } static void string(bool canAssign) { emitConstant(OBJ_VAL(copyString(parser.previous.start + 1, parser.previous.length - 2))); } static void unary(bool canAssign) { TokenType operatorType = parser.previous.type; // Compile the operand. parsePrecedence(PREC_UNARY); // Emit the operator instruction. switch (operatorType) { case TOKEN_BANG: emitByte(OP_NOT); break; case TOKEN_MINUS: emitByte(OP_NEGATE); break; default: return; // Unreachable. } } static uint32_t identifierConstant(Token* name) { return makeConstant(OBJ_VAL(copyString(name->start, name->length))); } static bool identifiersEqual(Token* a, Token* b) { if (a->length != b->length) return false; return memcmp(a->start, b->start, a->length) == 0; } static int resolveLocal(Compiler* compiler, Token* name) { for (int i = compiler->localCount - 1; i >= 0; i--) { Local* local = &compiler->locals[i]; if (identifiersEqual(name, &local->name)) { if (local->depth == -1) { error("Cannot read local variable in its own initializer."); } return i; } } return -1; } static void addLocal(Token name) { if (current->localCount == MAX_LOCALS) { error("Too many local variables in function."); return; } if (current->localCount==current->localCapacity) { int oldCapacity=current->localCapacity; current->localCapacity=GROW_CAPACITY(current->localCapacity); current->locals=GROW_ARRAY(current->locals, Local, oldCapacity, current->localCapacity); } Local* local = ¤t->locals[current->localCount++]; local->name = name; local->depth = current->scopeDepth; local->depth = -1; } static void declareVariable() { // Global variables are implicitly declared. if (current->scopeDepth == 0) return; Token* name = &parser.previous; for (int i = current->localCount - 1; i >= 0; i--) { Local* local = ¤t->locals[i]; if (local->depth != -1 && local->depth < current->scopeDepth) break; if (identifiersEqual(name, &local->name)) { error("Variable with this name already declared in this scope."); } } addLocal(*name); } static void compiler_index(bool canAssign) { expression(); emitByte(OP_INDEX); if (check(TOKEN_COMMA)) { errorAtCurrent("Cannot get multiple values at once"); } consume(TOKEN_RIGHT_BRACKET, "Unterminated index"); } static void namedVariable(Token name, bool canAssign) { uint8_t getOp, getLongOp, setOp, setLongOp; bool indexed = false; int arg = resolveLocal(current, &name); if (arg != -1) { getOp = OP_GET_LOCAL; getLongOp = OP_GET_LOCAL_LONG; setOp = OP_SET_LOCAL; setLongOp = OP_SET_LOCAL_LONG; } else { arg = identifierConstant(&name); getOp = OP_GET_GLOBAL; getLongOp = OP_GET_GLOBAL_LONG; setOp = OP_SET_GLOBAL; setLongOp = OP_SET_GLOBAL_LONG; } if (canAssign && match(TOKEN_LEFT_BRACKET)) { expression(); consume(TOKEN_RIGHT_BRACKET, "Unterminated index"); indexed = true; } if (canAssign && match(TOKEN_EQUAL)) { expression(); if (indexed) { if (arg < 256) { emitBytes(getOp, arg); } else { emitByte(getLongOp); emitByte(arg & 0xFF0000 >> 16); emitByte(arg & 0xFF00 >> 8); emitByte(arg & 0xFF); } emitByte(OP_SET_INDEX); } else if (arg < 256) { emitBytes(setOp, arg); } else { emitByte(setLongOp); emitByte(arg & 0xFF0000 >> 16); emitByte(arg & 0xFF00 >> 8); emitByte(arg & 0xFF); } } else { if (arg < 256) { emitBytes(getOp, arg); } else { emitByte(getLongOp); emitByte(arg & 0xFF0000 >> 16); emitByte(arg & 0xFF00 >> 8); emitByte(arg & 0xFF); } if (indexed) { emitByte(OP_INDEX_FLIPPED); } } } static void variable(bool canAssign) { namedVariable(parser.previous, canAssign); } static void and_(bool canAssign) { int endJump = emitJump(OP_JUMP_IF_FALSE); emitByte(OP_POP); parsePrecedence(PREC_AND); patchJump(endJump); } static void array(bool canAssign) { expression(); if (check(TOKEN_COMMA)) { int numValues = 0; while (!check(TOKEN_RIGHT_BRACKET) && !check(TOKEN_EOF)) { consume(TOKEN_COMMA, "Commas must follow every value in an array except the last"); expression(); numValues++; if (numValues == 256) { fprintf(stderr, "[line %d] Error: Cannot have more than 256 values in an array literal", parser.current.line); } } emitByte(OP_ARRAY); emitByte(numValues + 1); } else { emitByte(OP_ARRAY); emitByte(1); } consume(TOKEN_RIGHT_BRACKET, "Unterminated array"); } static void hash(bool canAssign) { expression(); if (check(TOKEN_ROCKET)) { int numValues = 0; while (!check(TOKEN_EOF)) { consume(TOKEN_ROCKET, "=> must follow every key in a hash"); expression(); if (check(TOKEN_RIGHT_BRACE)) { break; } consume(TOKEN_COMMA, "Commas must follow every key-value pair in a hash except the last"); expression(); numValues++; if (numValues == 256) { fprintf(stderr, "[line %d] Error: Cannot have more than 256 key-value pairs in a hash literal", parser.current.line); } } emitByte(OP_HASH); emitByte(numValues + 1); } else { consume(TOKEN_ROCKET, "=> must follow every key in a hash"); expression(); emitByte(OP_HASH); emitByte(1); } consume(TOKEN_RIGHT_BRACE, "Unterminated hash"); } ParseRule rules[] = { { grouping, NULL, PREC_CALL }, // TOKEN_LEFT_PAREN { NULL, NULL, PREC_NONE }, // TOKEN_RIGHT_PAREN { hash, NULL, PREC_CALL }, // TOKEN_LEFT_BRACE { NULL, NULL, PREC_NONE }, // TOKEN_RIGHT_BRACE { array, compiler_index, PREC_CALL }, // TOKEN_LEFT_BRACKET { NULL, NULL, PREC_NONE }, // TOKEN_RIGHT_BRACKET { NULL, NULL, PREC_NONE }, // TOKEN_COMMA { NULL, NULL, PREC_CALL }, // TOKEN_DOT { unary, binary, PREC_TERM }, // TOKEN_MINUS { NULL, binary, PREC_TERM }, // TOKEN_PLUS { NULL, NULL, PREC_NONE }, // TOKEN_SEMICOLON { NULL, binary, PREC_FACTOR }, // TOKEN_SLASH { NULL, binary, PREC_FACTOR }, // TOKEN_STAR { unary, NULL, PREC_NONE }, // TOKEN_BANG { NULL, binary, PREC_EQUALITY }, // TOKEN_BANG_EQUAL { NULL, NULL, PREC_NONE }, // TOKEN_EQUAL { NULL, binary, PREC_EQUALITY }, // TOKEN_EQUAL_EQUAL { NULL, binary, PREC_COMPARISON }, // TOKEN_GREATER { NULL, binary, PREC_COMPARISON }, // TOKEN_GREATER_EQUAL { NULL, binary, PREC_COMPARISON }, // TOKEN_LESS { NULL, binary, PREC_COMPARISON }, // TOKEN_LESS_EQUAL { NULL, NULL, PREC_NONE }, // TOKEN_ROCKET { variable, NULL, PREC_NONE }, // TOKEN_IDENTIFIER { string, NULL, PREC_NONE }, // TOKEN_STRING { number, NULL, PREC_NONE }, // TOKEN_NUMBER { NULL, and_, PREC_AND }, // TOKEN_AND { NULL, NULL, PREC_NONE }, // TOKEN_CLASS { NULL, NULL, PREC_NONE }, // TOKEN_ELSE { literal, NULL, PREC_NONE }, // TOKEN_FALSE { NULL, NULL, PREC_NONE }, // TOKEN_FOR { NULL, NULL, PREC_NONE }, // TOKEN_FUN { NULL, NULL, PREC_NONE }, // TOKEN_IF { literal, NULL, PREC_NONE }, // TOKEN_TRUE { NULL, or_, PREC_OR }, // TOKEN_OR { NULL, NULL, PREC_NONE }, // TOKEN_PRINT { NULL, NULL, PREC_NONE }, // TOKEN_RETURN { NULL, NULL, PREC_NONE }, // TOKEN_SUPER { NULL, NULL, PREC_NONE }, // TOKEN_THIS { literal, NULL, PREC_NONE }, // TOKEN_TRUE { NULL, NULL, PREC_NONE }, // TOKEN_VAR { NULL, NULL, PREC_NONE }, // TOKEN_WHILE { NULL, NULL, PREC_NONE }, // TOKEN_ERROR { NULL, NULL, PREC_NONE }, // TOKEN_EOF }; static void parsePrecedence(Precedence precedence) { advance(); ParseFn prefixRule = getRule(parser.previous.type)->prefix; if (prefixRule == NULL) { error("Expect expression."); return; } bool canAssign = precedence <= PREC_ASSIGNMENT; prefixRule(canAssign); while (precedence <= getRule(parser.current.type)->precedence) { advance(); ParseFn infixRule = getRule(parser.previous.type)->infix; infixRule(canAssign); } if (canAssign && match(TOKEN_EQUAL)) { error("Invalid assignment target."); expression(); } } static uint32_t parseVariable(const char* errorMessage) { consume(TOKEN_IDENTIFIER, errorMessage); declareVariable(); if (current->scopeDepth > 0) return 0; return identifierConstant(&parser.previous); } static void markInitialized() { if (current->scopeDepth == 0) return; current->locals[current->localCount - 1].depth = current->scopeDepth; } static void defineVariable(uint32_t global) { if (current->scopeDepth > 0) { markInitialized(); return; } if (global < 256) { emitBytes(OP_DEFINE_GLOBAL, global); } else { emitByte(OP_DEFINE_GLOBAL_LONG); emitByte(global & 0xFF0000 >> 16); emitByte(global & 0xFF00 >> 8); emitByte(global & 0xFF); } } static ParseRule* getRule(TokenType type) { return &rules[type]; } void expression() { parsePrecedence(PREC_ASSIGNMENT); } static void block() { while (!check(TOKEN_RIGHT_BRACE) && !check(TOKEN_EOF)) { declaration(); } consume(TOKEN_RIGHT_BRACE, "Expect '}' after block."); } static void expressionStatement() { expression(); consume(TOKEN_SEMICOLON, "Expect ';' after expression."); emitByte(OP_POP); } static void varDeclaration(); static void forStatement() { beginScope(); consume(TOKEN_LEFT_PAREN, "Expect '(' after 'for'."); if (match(TOKEN_VAR)) { varDeclaration(); } else if (match(TOKEN_SEMICOLON)) { } else { expressionStatement(); } int loopStart = currentChunk()->count; int exitJump = -1; if (!match(TOKEN_SEMICOLON)) { expression(); consume(TOKEN_SEMICOLON, "Expect ';' after loop condition."); // Jump out of the loop if the condition is false exitJump = emitJump(OP_JUMP_IF_FALSE); emitByte(OP_POP); // Condition. } if (!match(TOKEN_RIGHT_PAREN)) { int bodyJump = emitJump(OP_JUMP); int incrementStart = currentChunk()->count; expression(); emitByte(OP_POP); consume(TOKEN_RIGHT_PAREN, "Expect ')' after for clauses."); emitLoop(loopStart); loopStart = incrementStart; patchJump(bodyJump); } statement(); emitLoop(loopStart); if (exitJump != -1) { patchJump(exitJump); emitByte(OP_POP); } endScope(); } static void ifStatement() { consume(TOKEN_LEFT_PAREN, "Expect '(' after 'if'."); expression(); consume(TOKEN_RIGHT_PAREN, "Expect ')' after condition."); int thenJump = emitJump(OP_JUMP_IF_FALSE); emitByte(OP_POP); statement(); int elseJump = emitJump(OP_JUMP); patchJump(thenJump); emitByte(OP_POP); if (match(TOKEN_ELSE)) statement(); patchJump(elseJump); } static void printStatement() { expression(); consume(TOKEN_SEMICOLON, "Expect ';' after value."); emitByte(OP_PRINT); } static void whileStatement() { int loopStart = currentChunk()->count; consume(TOKEN_LEFT_PAREN, "Expect '(' after 'while'."); expression(); consume(TOKEN_RIGHT_PAREN, "Expect ')' after condition."); int exitJump = emitJump(OP_JUMP_IF_FALSE); emitByte(OP_POP); statement(); emitLoop(loopStart); patchJump(exitJump); emitByte(OP_POP); } static void synchronize() { parser.panicMode = false; while (parser.current.type != TOKEN_EOF) { if (parser.previous.type == TOKEN_SEMICOLON) return; switch (parser.current.type) { case TOKEN_CLASS: case TOKEN_FUN: case TOKEN_VAR: case TOKEN_FOR: case TOKEN_IF: case TOKEN_WHILE: case TOKEN_PRINT: case TOKEN_RETURN: return; default: // Do nothing. ; } advance(); } } static void statement() { if (match(TOKEN_PRINT)) { printStatement(); } else if (match(TOKEN_FOR)) { forStatement(); } else if (match(TOKEN_IF)) { ifStatement(); } else if (match(TOKEN_WHILE)) { whileStatement(); } else if (match(TOKEN_LEFT_BRACE)) { beginScope(); block(); endScope(); } else { expressionStatement(); } } static void varDeclaration() { uint32_t global = parseVariable("Expect variable name."); if (match(TOKEN_EQUAL)) { expression(); } else { emitByte(OP_NIL); } consume(TOKEN_SEMICOLON, "Expect ';' after variable declaration."); defineVariable(global); } static void declaration() { if (match(TOKEN_VAR)) { varDeclaration(); } else { statement(); } if (parser.panicMode) synchronize(); } bool compile(const char* source, Chunk* chunk, bool repl) { initScanner(source); Compiler* compiler = malloc(sizeof(Compiler)); initCompiler(compiler); compilingChunk = chunk; parser.hadError = false; parser.panicMode = false; advance(); if (repl && !scannerHasSemicolons()) { expression(); emitByte(OP_PRINT); } else { while (!match(TOKEN_EOF)) { declaration(); } } endCompiler(); free(compiler); return !parser.hadError; }