From f7afaf34aa57c2434bf9fd81db00e8348aced043 Mon Sep 17 00:00:00 2001 From: pjht Date: Sun, 2 Jun 2019 14:58:15 -0500 Subject: [PATCH] Inital commit --- .gitignore | 37 +++ .project | 26 ++ chunk.c | 51 ++++ chunk.h | 30 +++ common.h | 11 + compiler.c | 777 +++++++++++++++++++++++++++++++++++++++++++++++++++++ compiler.h | 8 + debug.c | 142 ++++++++++ debug.h | 9 + main.c | 78 ++++++ memory.c | 45 ++++ memory.h | 18 ++ object.c | 94 +++++++ object.h | 60 +++++ scanner.c | 261 ++++++++++++++++++ scanner.h | 36 +++ script.lox | 6 + table.c | 139 ++++++++++ table.h | 26 ++ value.c | 136 ++++++++++ value.h | 53 ++++ vm.c | 392 +++++++++++++++++++++++++++ vm.h | 33 +++ 23 files changed, 2468 insertions(+) create mode 100644 .gitignore create mode 100644 .project create mode 100644 chunk.c create mode 100644 chunk.h create mode 100644 common.h create mode 100644 compiler.c create mode 100644 compiler.h create mode 100644 debug.c create mode 100644 debug.h create mode 100644 main.c create mode 100644 memory.c create mode 100644 memory.h create mode 100644 object.c create mode 100644 object.h create mode 100644 scanner.c create mode 100644 scanner.h create mode 100644 script.lox create mode 100644 table.c create mode 100644 table.h create mode 100644 value.c create mode 100644 value.h create mode 100644 vm.c create mode 100644 vm.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4618906 --- /dev/null +++ b/.gitignore @@ -0,0 +1,37 @@ +.DS_Store + +.metadata +bin/ +tmp/ +*.tmp +*.bak +*.swp +*~.nib +local.properties +.settings/ +.loadpath +.recommenders + +# External tool builders +.externalToolBuilders/ + +# Locally stored "Eclipse launch configurations" +*.launch + +# CDT-specific (C/C++ Development Tooling) +.cproject + +# CDT- autotools +.autotools + +# Code Recommenders +.recommenders/ + +# Annotation Processing +.apt_generated/ + +*.o +*.d + +Debug +Release \ No newline at end of file diff --git a/.project b/.project new file mode 100644 index 0000000..20812fd --- /dev/null +++ b/.project @@ -0,0 +1,26 @@ + + + clox + + + + + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + clean,full,incremental, + + + + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + + + + org.eclipse.cdt.core.cnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + + diff --git a/chunk.c b/chunk.c new file mode 100644 index 0000000..488dd56 --- /dev/null +++ b/chunk.c @@ -0,0 +1,51 @@ +#include + +#include "chunk.h" +#include "memory.h" +#include "value.h" + +void initChunk(Chunk* chunk) { + chunk->count = 0; + chunk->capacity = 0; + chunk->code = NULL; + chunk->lines = NULL; + initValueArray(&chunk->constants); +} + +void freeChunk(Chunk* chunk) { + FREE_ARRAY(uint8_t, chunk->code, chunk->capacity); + FREE_ARRAY(int, chunk->lines, chunk->capacity); + freeValueArray(&chunk->constants); + initChunk(chunk); +} + +void writeChunk(Chunk* chunk, uint8_t byte, int line) { + if (chunk->capacity < chunk->count + 1) { + int oldCapacity = chunk->capacity; + chunk->capacity = GROW_CAPACITY(oldCapacity); + chunk->code = GROW_ARRAY(chunk->code, uint8_t, oldCapacity, chunk->capacity); + chunk->lines = GROW_ARRAY(chunk->lines, int, oldCapacity, chunk->capacity); + } + + chunk->code[chunk->count] = byte; + chunk->lines[chunk->count] = line; + chunk->count++; +} + +int addConstant(Chunk* chunk, Value value) { + writeValueArray(&chunk->constants, value); + return (chunk->constants.count - 1); +} + +void writeConstant(Chunk* chunk, Value value, int line) { + int i=addConstant(chunk, value); + if (i<256) { + writeChunk(chunk, OP_CONSTANT, line); + writeChunk(chunk, i&0xFF, line); + } else { + writeChunk(chunk, OP_CONSTANT_LONG, line); + writeChunk(chunk, i&0xFF0000>>16, line); + writeChunk(chunk, i&0xFF00>>8, line); + writeChunk(chunk, i&0xFF, line); + } +} diff --git a/chunk.h b/chunk.h new file mode 100644 index 0000000..0be5109 --- /dev/null +++ b/chunk.h @@ -0,0 +1,30 @@ +#ifndef clox_chunk_h +#define clox_chunk_h + +#include "common.h" +#include "value.h" + +typedef enum { + OP_ARRAY, OP_CONSTANT, OP_CONSTANT_LONG, OP_HASH, OP_NIL, OP_TRUE, OP_FALSE, + OP_POP, OP_GET_LOCAL, OP_GET_LOCAL_LONG, OP_INDEX, OP_SET_LOCAL, + OP_SET_LOCAL_LONG, OP_GET_GLOBAL, OP_GET_GLOBAL_LONG, OP_DEFINE_GLOBAL, + OP_SET_GLOBAL, OP_SET_GLOBAL_LONG, OP_DEFINE_GLOBAL_LONG, OP_EQUAL, + OP_GREATER, OP_LESS, OP_ADD, OP_SUBTRACT, OP_MULTIPLY, OP_DIVIDE, OP_NOT, + OP_NEGATE, OP_PRINT, OP_JUMP, OP_JUMP_IF_FALSE, OP_LOOP, OP_RETURN, +} OpCode; + +typedef struct { + int count; + int capacity; + uint8_t* code; + int* lines; + ValueArray constants; +} Chunk; + +void initChunk(Chunk* chunk); +void writeChunk(Chunk* chunk, uint8_t byte, int line); +void freeChunk(Chunk* chunk); +int addConstant(Chunk* chunk, Value value); // Use writeConstant instead +void writeConstant(Chunk* chunk, Value value, int line); + +#endif diff --git a/common.h b/common.h new file mode 100644 index 0000000..8b94e15 --- /dev/null +++ b/common.h @@ -0,0 +1,11 @@ +#ifndef clox_common_h +#define clox_common_h + +#include +#include +#include + +#define DEBUG_TRACE_EXECUTION +#define DEBUG_PRINT_CODE + +#endif diff --git a/compiler.c b/compiler.c new file mode 100644 index 0000000..aca0554 --- /dev/null +++ b/compiler.c @@ -0,0 +1,777 @@ +#include +#include +#include + +#include "common.h" +#include "compiler.h" +#include "scanner.h" +#include "object.h" + +#ifdef DEBUG_PRINT_CODE +#include "debug.h" +#endif + +#define MAX_LOCALS 0xFFFF // 2^16 + +typedef struct { + Token current; + Token previous; + bool hadError; + bool panicMode; +} Parser; + +typedef enum { + PREC_NONE, PREC_ASSIGNMENT, // = + PREC_OR, // or + PREC_AND, // and + PREC_EQUALITY, // == != + PREC_COMPARISON, // < > <= >= + PREC_TERM, // + - + PREC_FACTOR, // * / + PREC_UNARY, // ! - + PREC_CALL, // . () [] + PREC_PRIMARY +} Precedence; + +typedef void (*ParseFn)(bool canAssign); + +typedef struct { + ParseFn prefix; + ParseFn infix; + Precedence precedence; +} ParseRule; + +typedef struct { + Token name; + int depth; +} Local; + +typedef struct Compiler { + Local locals[MAX_LOCALS]; //TODO: Make this a dynamic array + int localCount; + int scopeDepth; +} Compiler; + +Parser parser; + +Compiler* current = NULL; + +Chunk* compilingChunk; + +static Chunk* currentChunk() { + return compilingChunk; +} + +static void errorAt(Token* token, const char* message) { + if (parser.panicMode) return; + parser.panicMode = true; + + fprintf(stderr, "[line %d] Error", token->line); + + if (token->type == TOKEN_EOF) { + fprintf(stderr, " at end"); + } else if (token->type == TOKEN_ERROR) { + // Nothing. + } else { + fprintf(stderr, " at '%.*s'", token->length, token->start); + } + + fprintf(stderr, ": %s\n", message); + parser.hadError = true; +} + +static void errorAtCurrent(const char* message) { + errorAt(&parser.current, message); +} + +static void error(const char* message) { // @suppress("Unused static function") + errorAt(&parser.previous, message); +} + +static void advance() { + parser.previous = parser.current; + + for (;;) { + parser.current = scanToken(); + if (parser.current.type != TOKEN_ERROR) break; + + errorAtCurrent(parser.current.start); + } +} + +static void consume(TokenType type, const char* message) { + if (parser.current.type == type) { + advance(); + return; + } + + errorAtCurrent(message); +} + +static bool check(TokenType type) { + return parser.current.type == type; +} + +static bool match(TokenType type) { + if (!check(type)) return false; + advance(); + return true; +} + +static void emitByte(uint8_t byte) { + writeChunk(currentChunk(), byte, parser.previous.line); +} + +static void emitBytes(uint8_t byte1, uint8_t byte2) { // @suppress("Unused static function") + emitByte(byte1); + emitByte(byte2); +} + +static void emitLoop(int loopStart) { + emitByte(OP_LOOP); + + int offset = currentChunk()->count - loopStart + 2; + if (offset > UINT16_MAX) error("Loop body too large."); + + emitByte(offset & 0xff); + emitByte((offset >> 8) & 0xff); +} + +static int emitJump(uint8_t instruction) { + emitByte(instruction); + emitByte(0xff); + emitByte(0xff); + return currentChunk()->count - 2; +} + +static void emitReturn() { + emitByte(OP_RETURN); +} + +static void emitConstant(Value value) { + writeConstant(currentChunk(), value, parser.previous.line); +} + +static void patchJump(int offset) { + // -2 to adjust for the bytecode for the jump offset itself. + int jump = currentChunk()->count - offset - 2; + + if (jump > UINT16_MAX) { + error("Too much code to jump over."); + } + + currentChunk()->code[offset + 1] = (jump >> 8) & 0xff; + currentChunk()->code[offset] = jump & 0xff; +} + +static void initCompiler(Compiler* compiler) { + compiler->localCount = 0; + compiler->scopeDepth = 0; + current = compiler; +} + +static void endCompiler() { + emitReturn(); +#ifdef DEBUG_PRINT_CODE + if (!parser.hadError) { + disassembleChunk(currentChunk(), "code"); + } +#endif +} + +static void beginScope() { + current->scopeDepth++; +} + +static void endScope() { + current->scopeDepth--; + + while (current->localCount > 0 && current->locals[current->localCount - 1].depth > current->scopeDepth) { + emitByte(OP_POP); + current->localCount--; + } +} + +static void expression(); +static void statement(); +static void declaration(); +static ParseRule* getRule(TokenType type); +static void parsePrecedence(Precedence precedence); + +static void binary(bool canAssign) { + // Remember the operator. + TokenType operatorType = parser.previous.type; + + // Compile the right operand. + ParseRule* rule = getRule(operatorType); + parsePrecedence((Precedence) (rule->precedence + 1)); + + // Emit the operator instruction. + switch (operatorType) { + case TOKEN_BANG_EQUAL: + emitBytes(OP_EQUAL, OP_NOT); + break; + case TOKEN_EQUAL_EQUAL: + emitByte(OP_EQUAL); + break; + case TOKEN_GREATER: + emitByte(OP_GREATER); + break; + case TOKEN_GREATER_EQUAL: + emitBytes(OP_LESS, OP_NOT); + break; + case TOKEN_LESS: + emitByte(OP_LESS); + break; + case TOKEN_LESS_EQUAL: + emitBytes(OP_GREATER, OP_NOT); + break; + case TOKEN_PLUS: + emitByte(OP_ADD); + break; + case TOKEN_MINUS: + emitByte(OP_SUBTRACT); + break; + case TOKEN_STAR: + emitByte(OP_MULTIPLY); + break; + case TOKEN_SLASH: + emitByte(OP_DIVIDE); + break; + default: + return; // Unreachable. + } +} + +static void literal(bool canAssign) { + switch (parser.previous.type) { + case TOKEN_FALSE: + emitByte(OP_FALSE); + break; + case TOKEN_NIL: + emitByte(OP_NIL); + break; + case TOKEN_TRUE: + emitByte(OP_TRUE); + break; + default: + return; // Unreachable. + } +} + +static uint32_t makeConstant(Value value) { + int constant = addConstant(currentChunk(), value); + if (constant > 0xFFFFFF) { + error("Too many constants in one chunk."); + return 0; + } + + return (uint32_t) constant; +} + +static void grouping(bool canAssign) { + expression(); + consume(TOKEN_RIGHT_PAREN, "Expect ')' after expression."); +} + +static void number(bool canAssign) { + double value = strtod(parser.previous.start, NULL); + emitConstant(NUMBER_VAL(value)); +} + +static void or_(bool canAssign) { + int elseJump = emitJump(OP_JUMP_IF_FALSE); + int endJump = emitJump(OP_JUMP); + + patchJump(elseJump); + emitByte(OP_POP); + + parsePrecedence(PREC_OR); + patchJump(endJump); +} + +static void string(bool canAssign) { + emitConstant(OBJ_VAL(copyString(parser.previous.start + 1, parser.previous.length - 2))); +} + +static void unary(bool canAssign) { + TokenType operatorType = parser.previous.type; + + // Compile the operand. + parsePrecedence(PREC_UNARY); + + // Emit the operator instruction. + switch (operatorType) { + case TOKEN_BANG: + emitByte(OP_NOT); + break; + case TOKEN_MINUS: + emitByte(OP_NEGATE); + break; + default: + return; // Unreachable. + } +} + +static uint32_t identifierConstant(Token* name) { + return makeConstant(OBJ_VAL(copyString(name->start, name->length))); +} + +static bool identifiersEqual(Token* a, Token* b) { + if (a->length != b->length) return false; + return memcmp(a->start, b->start, a->length) == 0; +} + +static int resolveLocal(Compiler* compiler, Token* name) { + for (int i = compiler->localCount - 1; i >= 0; i--) { + Local* local = &compiler->locals[i]; + if (identifiersEqual(name, &local->name)) { + if (local->depth == -1) { + error("Cannot read local variable in its own initializer."); + } + return i; + } + } + + return -1; +} + +static void addLocal(Token name) { + if (current->localCount == MAX_LOCALS) { + error("Too many local variables in function."); + return; + } + + Local* local = ¤t->locals[current->localCount++]; + local->name = name; + local->depth = current->scopeDepth; + local->depth = -1; +} + +static void declareVariable() { + // Global variables are implicitly declared. + if (current->scopeDepth == 0) return; + + Token* name = &parser.previous; + for (int i = current->localCount - 1; i >= 0; i--) { + Local* local = ¤t->locals[i]; + if (local->depth != -1 && local->depth < current->scopeDepth) break; + if (identifiersEqual(name, &local->name)) { + error("Variable with this name already declared in this scope."); + } + } + + addLocal(*name); +} + +static void namedVariable(Token name, bool canAssign) { + uint8_t getOp, getLongOp, setOp, setLongOp; + int arg = resolveLocal(current, &name); + if (arg != -1) { + getOp = OP_GET_LOCAL; + getLongOp = OP_GET_LOCAL_LONG; + setOp = OP_SET_LOCAL; + setLongOp = OP_SET_LOCAL_LONG; + } else { + arg = identifierConstant(&name); + getOp = OP_GET_GLOBAL; + getLongOp = OP_GET_GLOBAL_LONG; + setOp = OP_SET_GLOBAL; + setLongOp = OP_SET_GLOBAL_LONG; + } + + if (canAssign && match(TOKEN_EQUAL)) { + expression(); + if (arg < 256) { + emitBytes(setOp, arg); + } else { + emitByte(setLongOp); + emitByte(arg & 0xFF0000 >> 16); + emitByte(arg & 0xFF00 >> 8); + emitByte(arg & 0xFF); + } + } else { + if (arg < 256) { + emitBytes(getOp, arg); + } else { + emitByte(getLongOp); + emitByte(arg & 0xFF0000 >> 16); + emitByte(arg & 0xFF00 >> 8); + emitByte(arg & 0xFF); + } + } +} + +static void variable(bool canAssign) { + namedVariable(parser.previous, canAssign); +} + +static void and_(bool canAssign) { + int endJump = emitJump(OP_JUMP_IF_FALSE); + + emitByte(OP_POP); + parsePrecedence(PREC_AND); + + patchJump(endJump); +} + +static void array_unary(bool canAssign) { + expression(); + if (check(TOKEN_COMMA)) { + int numValues = 0; + while (!check(TOKEN_RIGHT_BRACKET) && !check(TOKEN_EOF)) { + consume(TOKEN_COMMA, "Commas must follow every value in an array except the last"); + expression(); + numValues++; + if (numValues == 256) { + fprintf(stderr, "[line %d] Error: Cannot have more than 256 values in an array literal", parser.current.line); + } + } + emitByte(OP_ARRAY); + emitByte(numValues + 1); + } else { + expression(); + emitByte(OP_ARRAY); + emitByte(1); + } + consume(TOKEN_RIGHT_BRACKET, "Unterminated array"); +} + +static void compiler_index(bool canAssign) { + expression(); + emitByte(OP_INDEX); + if (check(TOKEN_COMMA)) { + errorAtCurrent("Cannot get multiple values at once"); + } + consume(TOKEN_RIGHT_BRACKET, "Unterminated index"); +} + +static void hash_unary(bool canAssign) { + expression(); + if (check(TOKEN_ROCKET)) { + int numValues = 0; + while (!check(TOKEN_EOF)) { + consume(TOKEN_ROCKET, "=> must follow every key in a hash"); + expression(); + if (check(TOKEN_RIGHT_BRACE)) { + break; + } + consume(TOKEN_COMMA, "Commas must follow every key-value pair in a hash except the last"); + expression(); + numValues++; + if (numValues == 256) { + fprintf(stderr, "[line %d] Error: Cannot have more than 256 key-value pairs in a hash literal", parser.current.line); + } + } + emitByte(OP_HASH); + emitByte(numValues + 1); + } else { + emitByte(OP_HASH); + emitByte(1); + } + consume(TOKEN_RIGHT_BRACE, "Unterminated hash"); +} + +ParseRule rules[] = { { grouping, NULL, PREC_CALL }, // TOKEN_LEFT_PAREN +{ NULL, NULL, PREC_NONE }, // TOKEN_RIGHT_PAREN +{ hash_unary, NULL, PREC_CALL }, // TOKEN_LEFT_BRACE +{ NULL, NULL, PREC_NONE }, // TOKEN_RIGHT_BRACE +{ array_unary, compiler_index, PREC_CALL }, // TOKEN_LEFT_BRACKET +{ NULL, NULL, PREC_NONE }, // TOKEN_RIGHT_BRACKET +{ NULL, NULL, PREC_NONE }, // TOKEN_COMMA +{ NULL, NULL, PREC_CALL }, // TOKEN_DOT +{ unary, binary, PREC_TERM }, // TOKEN_MINUS +{ NULL, binary, PREC_TERM }, // TOKEN_PLUS +{ NULL, NULL, PREC_NONE }, // TOKEN_SEMICOLON +{ NULL, binary, PREC_FACTOR }, // TOKEN_SLASH +{ NULL, binary, PREC_FACTOR }, // TOKEN_STAR +{ unary, NULL, PREC_NONE }, // TOKEN_BANG +{ NULL, binary, PREC_EQUALITY }, // TOKEN_BANG_EQUAL +{ NULL, NULL, PREC_NONE }, // TOKEN_EQUAL +{ NULL, binary, PREC_EQUALITY }, // TOKEN_EQUAL_EQUAL +{ NULL, binary, PREC_COMPARISON }, // TOKEN_GREATER +{ NULL, binary, PREC_COMPARISON }, // TOKEN_GREATER_EQUAL +{ NULL, binary, PREC_COMPARISON }, // TOKEN_LESS +{ NULL, binary, PREC_COMPARISON }, // TOKEN_LESS_EQUAL +{ NULL, NULL, PREC_NONE }, // TOKEN_ROCKET +{ variable, NULL, PREC_NONE }, // TOKEN_IDENTIFIER +{ string, NULL, PREC_NONE }, // TOKEN_STRING +{ number, NULL, PREC_NONE }, // TOKEN_NUMBER +{ NULL, and_, PREC_AND }, // TOKEN_AND +{ NULL, NULL, PREC_NONE }, // TOKEN_CLASS +{ NULL, NULL, PREC_NONE }, // TOKEN_ELSE +{ literal, NULL, PREC_NONE }, // TOKEN_FALSE +{ NULL, NULL, PREC_NONE }, // TOKEN_FOR +{ NULL, NULL, PREC_NONE }, // TOKEN_FUN +{ NULL, NULL, PREC_NONE }, // TOKEN_IF +{ literal, NULL, PREC_NONE }, // TOKEN_TRUE +{ NULL, or_, PREC_OR }, // TOKEN_OR +{ NULL, NULL, PREC_NONE }, // TOKEN_PRINT +{ NULL, NULL, PREC_NONE }, // TOKEN_RETURN +{ NULL, NULL, PREC_NONE }, // TOKEN_SUPER +{ NULL, NULL, PREC_NONE }, // TOKEN_THIS +{ literal, NULL, PREC_NONE }, // TOKEN_TRUE +{ NULL, NULL, PREC_NONE }, // TOKEN_VAR +{ NULL, NULL, PREC_NONE }, // TOKEN_WHILE +{ NULL, NULL, PREC_NONE }, // TOKEN_ERROR +{ NULL, NULL, PREC_NONE }, // TOKEN_EOF +}; + +static void parsePrecedence(Precedence precedence) { + advance(); + ParseFn prefixRule = getRule(parser.previous.type)->prefix; + if (prefixRule == NULL) { + error("Expect expression."); + return; + } + + bool canAssign = precedence <= PREC_ASSIGNMENT; + prefixRule(canAssign); + + while (precedence <= getRule(parser.current.type)->precedence) { + advance(); + ParseFn infixRule = getRule(parser.previous.type)->infix; + infixRule(canAssign); + } + + if (canAssign && match(TOKEN_EQUAL)) { + error("Invalid assignment target."); + expression(); + } +} + +static uint32_t parseVariable(const char* errorMessage) { + consume(TOKEN_IDENTIFIER, errorMessage); + + declareVariable(); + if (current->scopeDepth > 0) return 0; + + return identifierConstant(&parser.previous); +} + +static void markInitialized() { + if (current->scopeDepth == 0) return; + current->locals[current->localCount - 1].depth = current->scopeDepth; +} + +static void defineVariable(uint32_t global) { + if (current->scopeDepth > 0) { + markInitialized(); + return; + } + + if (global < 256) { + emitBytes(OP_DEFINE_GLOBAL, global); + } else { + emitByte(OP_DEFINE_GLOBAL_LONG); + emitByte(global & 0xFF0000 >> 16); + emitByte(global & 0xFF00 >> 8); + emitByte(global & 0xFF); + } +} + +static ParseRule* getRule(TokenType type) { + return &rules[type]; +} + +void expression() { + parsePrecedence(PREC_ASSIGNMENT); +} + +static void block() { + while (!check(TOKEN_RIGHT_BRACE) && !check(TOKEN_EOF)) { + declaration(); + } + + consume(TOKEN_RIGHT_BRACE, "Expect '}' after block."); +} + +static void expressionStatement() { + expression(); + consume(TOKEN_SEMICOLON, "Expect ';' after expression."); + emitByte(OP_POP); +} + +static void varDeclaration(); + +static void forStatement() { + beginScope(); + + consume(TOKEN_LEFT_PAREN, "Expect '(' after 'for'."); + if (match(TOKEN_VAR)) { + varDeclaration(); + } else if (match(TOKEN_SEMICOLON)) { + } else { + expressionStatement(); + } + + int loopStart = currentChunk()->count; + + int exitJump = -1; + if (!match(TOKEN_SEMICOLON)) { + expression(); + consume(TOKEN_SEMICOLON, "Expect ';' after loop condition."); + + // Jump out of the loop if the condition is false + exitJump = emitJump(OP_JUMP_IF_FALSE); + emitByte(OP_POP); // Condition. + } + + if (!match(TOKEN_RIGHT_PAREN)) { + int bodyJump = emitJump(OP_JUMP); + + int incrementStart = currentChunk()->count; + expression(); + emitByte(OP_POP); + consume(TOKEN_RIGHT_PAREN, "Expect ')' after for clauses."); + + emitLoop(loopStart); + loopStart = incrementStart; + patchJump(bodyJump); + } + + statement(); + + emitLoop(loopStart); + + if (exitJump != -1) { + patchJump(exitJump); + emitByte(OP_POP); + } + + endScope(); +} + +static void ifStatement() { + consume(TOKEN_LEFT_PAREN, "Expect '(' after 'if'."); + expression(); + consume(TOKEN_RIGHT_PAREN, "Expect ')' after condition."); + + int thenJump = emitJump(OP_JUMP_IF_FALSE); + emitByte(OP_POP); + statement(); + + int elseJump = emitJump(OP_JUMP); + + patchJump(thenJump); + + emitByte(OP_POP); + + if (match(TOKEN_ELSE)) statement(); + + patchJump(elseJump); +} + +static void printStatement() { + expression(); + consume(TOKEN_SEMICOLON, "Expect ';' after value."); + emitByte(OP_PRINT); +} + +static void whileStatement() { + int loopStart = currentChunk()->count; + + consume(TOKEN_LEFT_PAREN, "Expect '(' after 'while'."); + expression(); + consume(TOKEN_RIGHT_PAREN, "Expect ')' after condition."); + + int exitJump = emitJump(OP_JUMP_IF_FALSE); + + emitByte(OP_POP); + statement(); + + emitLoop(loopStart); + + patchJump(exitJump); + emitByte(OP_POP); +} + +static void synchronize() { + parser.panicMode = false; + + while (parser.current.type != TOKEN_EOF) { + if (parser.previous.type == TOKEN_SEMICOLON) return; + + switch (parser.current.type) { + case TOKEN_CLASS: + case TOKEN_FUN: + case TOKEN_VAR: + case TOKEN_FOR: + case TOKEN_IF: + case TOKEN_WHILE: + case TOKEN_PRINT: + case TOKEN_RETURN: + return; + + default: + // Do nothing. + ; + } + + advance(); + } +} + +static void statement() { + if (match(TOKEN_PRINT)) { + printStatement(); + } else if (match(TOKEN_FOR)) { + forStatement(); + } else if (match(TOKEN_IF)) { + ifStatement(); + } else if (match(TOKEN_WHILE)) { + whileStatement(); + } else if (match(TOKEN_LEFT_BRACE)) { + beginScope(); + block(); + endScope(); + } else { + expressionStatement(); + } +} + +static void varDeclaration() { + uint32_t global = parseVariable("Expect variable name."); + + if (match(TOKEN_EQUAL)) { + expression(); + } else { + emitByte(OP_NIL); + } + consume(TOKEN_SEMICOLON, "Expect ';' after variable declaration."); + + defineVariable(global); +} + +static void declaration() { + if (match(TOKEN_VAR)) { + varDeclaration(); + } else { + statement(); + } + + if (parser.panicMode) synchronize(); +} + +bool compile(const char* source, Chunk* chunk, bool repl) { + initScanner(source); + Compiler* compiler = malloc(sizeof(Compiler)); + initCompiler(compiler); + + compilingChunk = chunk; + parser.hadError = false; + parser.panicMode = false; + + advance(); + if (repl && !scannerHasSemicolons()) { + expression(); + emitByte(OP_PRINT); + } else { + while (!match(TOKEN_EOF)) { + declaration(); + } + } + endCompiler(); + free(compiler); + return !parser.hadError; +} diff --git a/compiler.h b/compiler.h new file mode 100644 index 0000000..df97058 --- /dev/null +++ b/compiler.h @@ -0,0 +1,8 @@ +#ifndef clox_compiler_h +#define clox_compiler_h + +#include "vm.h" + +bool compile(const char* source, Chunk* chunk, bool repl); + +#endif diff --git a/debug.c b/debug.c new file mode 100644 index 0000000..06892a7 --- /dev/null +++ b/debug.c @@ -0,0 +1,142 @@ +#include "debug.h" +#include "chunk.h" +#include "common.h" +#include "value.h" +#include + +void disassembleChunk(Chunk* chunk, const char* name) { + printf("== %s ==\n", name); + + for (int offset = 0; offset < chunk->count;) { + offset = disassembleInstruction(chunk, offset); + } +} + +static int constantInstruction(const char* name, Chunk* chunk, int offset) { + uint8_t constant = chunk->code[offset + 1]; + printf("%-16s %4d '", name, constant); + printValue(chunk->constants.values[constant]); + printf("'\n"); + return offset + 2; +} + +static int longConstantInstruction(const char* name, Chunk* chunk, int offset) { + uint8_t constant_lo = chunk->code[offset + 3]; + uint8_t constant_mid = chunk->code[offset + 2]; + uint8_t constant_hi = chunk->code[offset + 1]; + int constant = constant_lo | (constant_mid << 8) | constant_hi << 16; + printf("%-16s %9d '", name, constant); +// printValue(chunk->constants.values[constant]); + printf("'\n"); + return offset + 4; +} + +static int simpleInstruction(const char* name, int offset) { + printf("%s\n", name); + return offset + 1; +} + +static int byteInstruction(const char* name, Chunk* chunk, int offset) { + uint8_t slot = chunk->code[offset + 1]; + printf("%-16s %4d\n", name, slot); + return offset + 2; +} + +static int threeByteInstruction(const char* name, Chunk* chunk, int offset) { + uint8_t slot_lo = chunk->code[offset + 3]; + uint8_t slot_mid = chunk->code[offset + 2]; + uint8_t slot_hi = chunk->code[offset + 1]; + int slot = slot_lo | (slot_mid << 8) | slot_hi << 16; + printf("%-16s %9d\n", name, slot); + return offset + 2; +} + +static int jumpInstruction(const char* name, int sign, Chunk* chunk, int offset) { + uint16_t jump = (uint16_t) (chunk->code[offset + 2] << 8); + jump |= chunk->code[offset + 1]; + + printf("%-16s %4d -> %d\n", name, offset, offset + 3 + sign * jump); + return offset + 3; +} + +int disassembleInstruction(Chunk* chunk, int offset) { + printf("%04d ", offset); + if (offset > 0 && chunk->lines[offset] == chunk->lines[offset - 1]) { + printf(" | "); + } else { + printf("%4d ", chunk->lines[offset]); + } + + uint8_t instruction = chunk->code[offset]; + switch (instruction) { + case OP_ARRAY: + return byteInstruction("OP_ARRAY", chunk, offset); + case OP_CONSTANT: + return constantInstruction("OP_CONSTANT", chunk, offset); + case OP_CONSTANT_LONG: + return longConstantInstruction("OP_CONSTANT_LONG", chunk, offset); + case OP_HASH: + return byteInstruction("OP_HASH", chunk, offset); + case OP_NIL: + return simpleInstruction("OP_NIL", offset); + case OP_TRUE: + return simpleInstruction("OP_TRUE", offset); + case OP_FALSE: + return simpleInstruction("OP_FALSE", offset); + case OP_POP: + return simpleInstruction("OP_POP", offset); + case OP_INDEX: + return simpleInstruction("OP_INDEX", offset); + case OP_GET_LOCAL: + return byteInstruction("OP_GET_LOCAL", chunk, offset); + case OP_SET_LOCAL: + return byteInstruction("OP_SET_LOCAL", chunk, offset); + case OP_GET_LOCAL_LONG: + return threeByteInstruction("OP_GET_LOCAL_LONG", chunk, offset); + case OP_SET_LOCAL_LONG: + return threeByteInstruction("OP_SET_LOCAL_LONG", chunk, offset); + case OP_GET_GLOBAL: + return constantInstruction("OP_GET_GLOBAL", chunk, offset); + case OP_GET_GLOBAL_LONG: + return longConstantInstruction("OP_GET_GLOBAL_LONG", chunk, offset); + case OP_DEFINE_GLOBAL: + return constantInstruction("OP_DEFINE_GLOBAL", chunk, offset); + case OP_DEFINE_GLOBAL_LONG: + return longConstantInstruction("OP_DEFINE_GLOBAL_LONG", chunk, offset); + case OP_SET_GLOBAL: + return constantInstruction("OP_SET_GLOBAL", chunk, offset); + case OP_SET_GLOBAL_LONG: + return longConstantInstruction("OP_SET_GLOBAL_LONG", chunk, offset); + case OP_EQUAL: + return simpleInstruction("OP_EQUAL", offset); + case OP_GREATER: + return simpleInstruction("OP_GREATER", offset); + case OP_LESS: + return simpleInstruction("OP_LESS", offset); + case OP_ADD: + return simpleInstruction("OP_ADD", offset); + case OP_SUBTRACT: + return simpleInstruction("OP_SUBTRACT", offset); + case OP_MULTIPLY: + return simpleInstruction("OP_MULTIPLY", offset); + case OP_DIVIDE: + return simpleInstruction("OP_DIVIDE", offset); + case OP_NOT: + return simpleInstruction("OP_NOT", offset); + case OP_NEGATE: + return simpleInstruction("OP_NEGATE", offset); + case OP_PRINT: + return simpleInstruction("OP_PRINT", offset); + case OP_JUMP: + return jumpInstruction("OP_JUMP", 1, chunk, offset); + case OP_JUMP_IF_FALSE: + return jumpInstruction("OP_JUMP_IF_FALSE", 1, chunk, offset); + case OP_LOOP: + return jumpInstruction("OP_LOOP", -1, chunk, offset); + case OP_RETURN: + return simpleInstruction("OP_RETURN", offset); + default: + printf("Unknown opcode %d\n", instruction); + return offset + 1; + } +} diff --git a/debug.h b/debug.h new file mode 100644 index 0000000..5731d05 --- /dev/null +++ b/debug.h @@ -0,0 +1,9 @@ +#ifndef clox_debug_h +#define clox_debug_h + +#include "chunk.h" + +void disassembleChunk(Chunk* chunk, const char* name); +int disassembleInstruction(Chunk* chunk, int offset); + +#endif diff --git a/main.c b/main.c new file mode 100644 index 0000000..2a4a6ae --- /dev/null +++ b/main.c @@ -0,0 +1,78 @@ +#include +#include +#include + +#include "common.h" +#include "chunk.h" +#include "debug.h" +#include "vm.h" + +static void repl() { + char line[1024]; + for (;;) { + printf("> "); + + if (!fgets(line, sizeof(line), stdin)) { + printf("\n"); + break; + } + + line[strlen(line)-1]='\0'; + + interpret(line, true); + } +} + +static char* readFile(const char* path) { + FILE* file = fopen(path, "rb"); + if (file == NULL) { + fprintf(stderr, "Could not open file \"%s\".\n", path); + exit(74); + } + + fseek(file, 0L, SEEK_END); + size_t fileSize = ftell(file); + rewind(file); + + char* buffer = (char*) malloc(fileSize + 1); + if (buffer == NULL) { + fprintf(stderr, "Not enough memory to read \"%s\".\n", path); + exit(74); + } + + size_t bytesRead = fread(buffer, sizeof(char), fileSize, file); + if (bytesRead < fileSize) { + fprintf(stderr, "Could not read file \"%s\".\n", path); + exit(74); + } + + buffer[bytesRead] = '\0'; + + fclose(file); + return buffer; +} + +static void runFile(const char* path) { + char* source = readFile(path); + InterpretResult result = interpret(source, false); + free(source); + + if (result == INTERPRET_COMPILE_ERROR) exit(65); + if (result == INTERPRET_RUNTIME_ERROR) exit(70); +} + +int main(int argc, const char* argv[]) { + initVM(); + + if (argc == 1) { + repl(); + } else if (argc == 2) { + runFile(argv[1]); + } else { + fprintf(stderr, "Usage: clox [path]\n"); + exit(64); + } + + freeVM(); + return 0; +} diff --git a/memory.c b/memory.c new file mode 100644 index 0000000..def8f2b --- /dev/null +++ b/memory.c @@ -0,0 +1,45 @@ +#include + +#include "common.h" +#include "memory.h" +#include "vm.h" +#include "value.h" + +void* reallocate(void* previous, size_t oldSize, size_t newSize) { + if (newSize == 0) { + free(previous); + return NULL; + } + return realloc(previous, newSize); +} + +static void freeObject(Obj* object) { + switch (object->type) { + case OBJ_STRING: { + ObjString* string = (ObjString*) object; + FREE_ARRAY(char, string->chars, string->length + 1); + FREE(ObjString, object); + break; + } + case OBJ_ARRAY: { + ObjArray* array = (ObjArray*) object; + freeValueArray(array->array); + FREE(ObjArray, object); + break; + } + case OBJ_HASH: { + ObjHash* hash = (ObjHash*) object; + freeTable(hash->hashTable); + FREE(ObjHash, object); + } + } +} + +void freeObjects() { + Obj* object = vm.objects; + while (object != NULL) { + Obj* next = object->next; + freeObject(object); + object = next; + } +} diff --git a/memory.h b/memory.h new file mode 100644 index 0000000..57e8046 --- /dev/null +++ b/memory.h @@ -0,0 +1,18 @@ +#ifndef clox_memory_h +#define clox_memory_h + +#include "object.h" + +#define ALLOCATE(type, count) (type*)reallocate(NULL, 0, sizeof(type) * (count)) + +#define FREE(type, pointer) reallocate(pointer, sizeof(type), 0) + +#define GROW_CAPACITY(capacity) ((capacity)<8 ? 8 : (capacity)*2) +#define GROW_ARRAY(previous,type,oldCount,count) (type*)reallocate(previous,sizeof(type)*(oldCount),sizeof(type)*(count)) + +#define FREE_ARRAY(type,pointer,oldCount) reallocate(pointer,sizeof(type)*(oldCount),0) + +void* reallocate(void* previous, size_t oldSize, size_t newSize); +void freeObjects(); + +#endif diff --git a/object.c b/object.c new file mode 100644 index 0000000..6029b76 --- /dev/null +++ b/object.c @@ -0,0 +1,94 @@ +#include +#include + +#include "memory.h" +#include "object.h" +#include "table.h" +#include "value.h" +#include "vm.h" + +#define ALLOCATE_OBJ(type, objectType) \ +(type*)allocateObject(sizeof(type), objectType) + +static Obj* allocateObject(size_t size, ObjType type) { + Obj* object = (Obj*) reallocate(NULL, 0, size); + object->type = type; + + object->next = vm.objects; + vm.objects = object; + return object; +} + +static ObjString* allocateString(char* chars, int length, uint32_t hash) { + ObjString* string = ALLOCATE_OBJ(ObjString, OBJ_STRING); + string->length = length; + string->chars = chars; + string->hash = hash; + + tableSet(&vm.strings, OBJ_VAL(string), NIL_VAL); + + return string; +} + +static uint32_t hashString(const char* key, int length) { + uint32_t hash = 2166136261u; + + for (int i = 0; i < length; i++) { + hash ^= key[i]; + hash *= 16777619; + } + + return hash; +} + +ObjArray* takeArray(ValueArray* valArray) { + ObjArray* array = ALLOCATE_OBJ(ObjArray, OBJ_ARRAY); + array->array = valArray; + return array; +} + +ObjHash* takeHash(Table* hashTable) { + ObjHash* hash = ALLOCATE_OBJ(ObjHash, OBJ_HASH); + hash->hashTable = hashTable; + return hash; +} + + +ObjString* takeString(char* chars, int length) { + uint32_t hash = hashString(chars, length); + return allocateString(chars, length, hash); +} + +ObjString* copyString(const char* chars, int length) { + uint32_t hash = hashString(chars, length); + ObjString* interned = tableFindString(&vm.strings, chars, length, hash); + if (interned != NULL) return interned; + + char* heapChars = ALLOCATE(char, length + 1); + memcpy(heapChars, chars, length); + heapChars[length] = '\0'; + + return allocateString(heapChars, length, hash); +} + +void printObject(Value value) { + switch (OBJ_TYPE(value)) { + case OBJ_STRING: + printf("%s", AS_CSTRING(value)); + break; + case OBJ_ARRAY: + printf("["); + ValueArray* array=AS_VARRAY(value); + for (int i=0;icount;i++) { + printValue(array->values[i]); + if (i<(array->count-1)) { + printf(", "); + } + } + printf("]"); + break; + case OBJ_HASH: + printf("hash"); + break; + } +} diff --git a/object.h b/object.h new file mode 100644 index 0000000..e45da83 --- /dev/null +++ b/object.h @@ -0,0 +1,60 @@ +#ifndef clox_object_h +#define clox_object_h + +#include "common.h" +#include "table.h" +#include "value.h" + +#define OBJ_TYPE(value) (AS_OBJ(value)->type) + +#define IS_STRING(value) isObjType(value, OBJ_STRING) +#define IS_ARRAY(value) isObjType(value, OBJ_ARRAY) + + +#define AS_STRING(value) ((ObjString*)AS_OBJ(value)) +#define AS_CSTRING(value) (((ObjString*)AS_OBJ(value))->chars) + +#define AS_ARRAY(value) ((ObjArray*)AS_OBJ(value)) +#define AS_VARRAY(value) (((ObjArray*)AS_OBJ(value))->array) +#define AS_HASH(value) (((ObjHash*)AS_OBJ(value))->hashTable) + +typedef enum { + OBJ_STRING, + OBJ_ARRAY, + OBJ_HASH +} ObjType; + +struct sObj { + ObjType type; + struct sObj* next; +}; + +struct sObjString { + Obj obj; + int length; + char* chars; + uint32_t hash; +}; + +struct sObjArray { + Obj obj; + ValueArray* array; +}; + +struct sObjHash { + Obj obj; + Table* hashTable; +}; + +ObjArray* takeArray(ValueArray* valArray); +ObjHash* takeHash(Table* hash); +ObjString* takeString(char* chars, int length); +ObjString* copyString(const char* chars, int length); +void printObject(Value value); + +static inline bool isObjType(Value value, ObjType type) { + return IS_OBJ(value) && AS_OBJ(value)->type == type; +} + + +#endif diff --git a/scanner.c b/scanner.c new file mode 100644 index 0000000..f13968e --- /dev/null +++ b/scanner.c @@ -0,0 +1,261 @@ +#include +#include + +#include "common.h" +#include "scanner.h" + +typedef struct { + const char* start; + const char* current; + const char* source; + int line; +} Scanner; + +Scanner scanner; + +void initScanner(const char* source) { + scanner.start = source; + scanner.current = source; + scanner.source = source; + scanner.line = 1; +} + +static bool isAlpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; +} + +static bool isDigit(char c) { + return c >= '0' && c <= '9'; +} + +static bool isAtEnd() { + return *scanner.current == '\0'; +} + +static char advance() { + scanner.current++; + return scanner.current[-1]; +} + +static char peek() { + return *scanner.current; +} + +static char peekNext() { + if (isAtEnd()) return '\0'; + return scanner.current[1]; +} + +static bool match(char expected) { + if (isAtEnd()) return false; + if (*scanner.current != expected) return false; + + scanner.current++; + return true; +} + +static Token makeToken(TokenType type) { + Token token; + token.type = type; + token.start = scanner.start; + token.length = (int) (scanner.current - scanner.start); + token.line = scanner.line; + + return token; +} + +static Token errorToken(const char* message) { + Token token; + token.type = TOKEN_ERROR; + token.start = message; + token.length = (int) strlen(message); + token.line = scanner.line; + + return token; +} + +static void skipWhitespace() { + for (;;) { + char c = peek(); + switch (c) { + case ' ': + case '\r': + case '\t': + advance(); + break; + case '\n': + scanner.line++; + advance(); + break; + case '/': + if (peekNext() == '/') { + // A comment goes until the end of the line. + while (peek() != '\n' && !isAtEnd()) + advance(); + } else { + return; + } + break; + default: + return; + } + } +} + +static TokenType checkKeyword(int start, int length, const char* rest, TokenType type) { + if (scanner.current - scanner.start == start + length && memcmp(scanner.start + start, rest, length) == 0) { + return type; + } + + return TOKEN_IDENTIFIER; +} + +static TokenType identifierType() { + switch (scanner.start[0]) { + case 'a': + return checkKeyword(1, 2, "nd", TOKEN_AND); + case 'c': + return checkKeyword(1, 4, "lass", TOKEN_CLASS); + case 'e': + return checkKeyword(1, 3, "lse", TOKEN_ELSE); + case 'f': + if (scanner.current - scanner.start > 1) { + switch (scanner.start[1]) { + case 'a': + return checkKeyword(2, 3, "lse", TOKEN_FALSE); + case 'o': + return checkKeyword(2, 1, "r", TOKEN_FOR); + case 'u': + return checkKeyword(2, 1, "n", TOKEN_FUN); + } + } + break; + case 'i': + return checkKeyword(1, 1, "f", TOKEN_IF); + case 'n': + return checkKeyword(1, 2, "il", TOKEN_NIL); + case 'o': + return checkKeyword(1, 1, "r", TOKEN_OR); + case 'p': + return checkKeyword(1, 4, "rint", TOKEN_PRINT); + case 'r': + return checkKeyword(1, 5, "eturn", TOKEN_RETURN); + case 's': + return checkKeyword(1, 4, "uper", TOKEN_SUPER); + case 't': + if (scanner.current - scanner.start > 1) { + switch (scanner.start[1]) { + case 'h': + return checkKeyword(2, 2, "is", TOKEN_THIS); + case 'r': + return checkKeyword(2, 2, "ue", TOKEN_TRUE); + } + } + break; + case 'v': + return checkKeyword(1, 2, "ar", TOKEN_VAR); + case 'w': + return checkKeyword(1, 4, "hile", TOKEN_WHILE); + } + + return TOKEN_IDENTIFIER; +} + +static Token identifier() { + while (isAlpha(peek()) || isDigit(peek())) + advance(); + + return makeToken(identifierType()); +} + +static Token number() { + while (isDigit(peek())) + advance(); + + // Look for a fractional part. + if (peek() == '.' && isDigit(peekNext())) { + // Consume the "." + advance(); + + while (isDigit(peek())) + advance(); + } + + return makeToken(TOKEN_NUMBER); +} + +static Token string() { + while (peek() != '"' && !isAtEnd()) { + if (peek() == '\n') scanner.line++; + advance(); + } + + if (isAtEnd()) return errorToken("Unterminated string."); + + // The closing ". + advance(); + return makeToken(TOKEN_STRING); +} + +Token scanToken() { + skipWhitespace(); + + scanner.start = scanner.current; + + if (isAtEnd()) return makeToken(TOKEN_EOF); + + char c = advance(); + if (isAlpha(c)) return identifier(); + if (isDigit(c)) return number(); + + switch (c) { + case '(': + return makeToken(TOKEN_LEFT_PAREN); + case ')': + return makeToken(TOKEN_RIGHT_PAREN); + case '{': + return makeToken(TOKEN_LEFT_BRACE); + case '}': + return makeToken(TOKEN_RIGHT_BRACE); + case '[': + return makeToken(TOKEN_LEFT_BRACKET); + case ']': + return makeToken(TOKEN_RIGHT_BRACKET); + case ';': + return makeToken(TOKEN_SEMICOLON); + case ',': + return makeToken(TOKEN_COMMA); + case '.': + return makeToken(TOKEN_DOT); + case '-': + return makeToken(TOKEN_MINUS); + case '+': + return makeToken(TOKEN_PLUS); + case '/': + return makeToken(TOKEN_SLASH); + case '*': + return makeToken(TOKEN_STAR); + case '!': + return makeToken(match('=') ? TOKEN_BANG_EQUAL : TOKEN_BANG); + case '=': + return makeToken(match('=') ? TOKEN_EQUAL_EQUAL : (match('>') ? TOKEN_ROCKET : TOKEN_EQUAL)); + case '<': + return makeToken(match('=') ? TOKEN_LESS_EQUAL : TOKEN_LESS); + case '>': + return makeToken(match('=') ? TOKEN_GREATER_EQUAL : TOKEN_GREATER); + case '"': + return string(); + } + + return errorToken("Unexpected character."); +} + +bool scannerHasSemicolons() { + for (int i=0;i +#include + +#include "memory.h" +#include "object.h" +#include "table.h" +#include "value.h" + +#define TABLE_MAX_LOAD 0.75 + +void initTable(Table* table) { + table->count = 0; + table->capacity = 0; + table->entries = NULL; +} + +void freeTable(Table* table) { + FREE_ARRAY(Entry, table->entries, table->capacity); + initTable(table); +} + +static Entry* findEntry(Entry* entries, int capacity, Value key) { + uint32_t index = hashValue(key) % capacity; + Entry* tombstone = NULL; + + for (;;) { + Entry* entry = &entries[index]; + + if (IS_EMPTY(entry->key)) { + if (IS_NIL(entry->value)) { + // Empty entry. + return tombstone != NULL ? tombstone : entry; + } else { + // We found a tombstone. + if (tombstone == NULL) tombstone = entry; + } + } else if (valuesEqual(key, entry->key)) { + // We found the key. + return entry; + } + + index = (index + 1) % capacity; + } + return NULL; +} + +static void adjustCapacity(Table* table, int capacity) { + Entry* entries = ALLOCATE(Entry, capacity); + for (int i = 0; i < capacity; i++) { + entries[i].key = EMPTY_VAL; + entries[i].value = NIL_VAL; + } + + table->count = 0; + for (int i = 0; i < table->capacity; i++) { + Entry* entry = &table->entries[i]; + if (IS_EMPTY(entry->key)) continue; + + Entry* dest = findEntry(entries, capacity, entry->key); + dest->key = entry->key; + dest->value = entry->value; + table->count++; + } + + FREE_ARRAY(Entry, table->entries, table->capacity); + table->entries = entries; + table->capacity = capacity; +} + +bool tableGet(Table* table, Value key, Value* value) { + if (table->entries == NULL) return false; + + Entry* entry = findEntry(table->entries, table->capacity, key); + if (IS_EMPTY(entry->key)) return false; + + *value = entry->value; + return true; +} + +bool tableSet(Table* table, Value key, Value value) { + if (table->count + 1 > table->capacity * TABLE_MAX_LOAD) { + int capacity = GROW_CAPACITY(table->capacity); + adjustCapacity(table, capacity); + } + + Entry* entry = findEntry(table->entries, table->capacity, key); + + bool isNewKey = IS_EMPTY(entry->key); + if (isNewKey && IS_NIL(entry->value)) table->count++; + + entry->key = key; + entry->value = value; + return isNewKey; +} + +bool tableDelete(Table* table,Value key) { + if (table->count == 0) return false; + + // Find the entry. + Entry* entry = findEntry(table->entries, table->capacity, key); + if (IS_EMPTY(entry->key)) return false; + + // Place a tombstone in the entry. + entry->key = EMPTY_VAL; + entry->value = BOOL_VAL(true); + + return true; +} + +void tableAddAll(Table* from, Table* to) { + for (int i = 0; i < from->capacity; i++) { + Entry* entry = &from->entries[i]; + if (!IS_EMPTY(entry->key)) { + tableSet(to, entry->key, entry->value); + } + } +} + +ObjString* tableFindString(Table* table, const char* chars, int length, uint32_t hash) { + // If the table is empty, we definitely won't find it. + if (table->entries == NULL) return NULL; + + uint32_t index = hash % table->capacity; + + for (;;) { + Entry* entry = &table->entries[index]; + + if (IS_EMPTY(entry->key)) return NULL; + ObjString* string = AS_STRING(entry->key); + if (string->length == length && memcmp(string->chars, chars, length) == 0) { + // We found it. + return string; + } + + // Try the next slot. + index = (index + 1) % table->capacity; + } + return NULL; +} diff --git a/table.h b/table.h new file mode 100644 index 0000000..a063752 --- /dev/null +++ b/table.h @@ -0,0 +1,26 @@ +#ifndef clox_table_h +#define clox_table_h + +#include "common.h" +#include "value.h" + +typedef struct { + Value key; + Value value; +} Entry; + +typedef struct { + int count; + int capacity; + Entry* entries; +} Table; + +void initTable(Table* table); +void freeTable(Table* table); +bool tableGet(Table* table, Value key, Value* value); +bool tableSet(Table* table, Value key, Value value); +bool tableDelete(Table* table, Value key); +void tableAddAll(Table* from, Table* to); +ObjString* tableFindString(Table* table, const char* chars, int length, uint32_t hash); + +#endif diff --git a/value.c b/value.c new file mode 100644 index 0000000..0e3ef68 --- /dev/null +++ b/value.c @@ -0,0 +1,136 @@ +#include +#include +#include + +#include "object.h" +#include "memory.h" +#include "value.h" + +void initValueArray(ValueArray* array) { + array->values = NULL; + array->capacity = 0; + array->count = 0; +} + +void writeValueArray(ValueArray* array, Value value) { + if (array->capacity < array->count + 1) { + int oldCapacity = array->capacity; + array->capacity = GROW_CAPACITY(oldCapacity); + array->values = GROW_ARRAY(array->values, Value, oldCapacity, array->capacity); + } + + array->values[array->count] = value; + array->count++; +} + +void freeValueArray(ValueArray* array) { + FREE_ARRAY(Value, array->values, array->capacity); + initValueArray(array); +} + +void printValue(Value value) { + switch (value.type) { + case VAL_BOOL: + printf(AS_BOOL(value) ? "true" : "false"); + break; + case VAL_NIL: + printf("nil"); + break; + case VAL_NUMBER: + printf("%g", AS_NUMBER(value)); + break; + case VAL_OBJ: + printObject(value); + break; + case VAL_EMPTY: + printf(""); + break; + } +} + +bool valuesEqual(Value a, Value b) { + if (a.type != b.type) return false; + switch (a.type) { + case VAL_BOOL: + return AS_BOOL(a) == AS_BOOL(b); + case VAL_NIL: + return true; + case VAL_NUMBER: + return AS_NUMBER(a) == AS_NUMBER(b); + case VAL_OBJ: { + return AS_OBJ(a) == AS_OBJ(b); + } + case VAL_EMPTY: + return true; + } + return false; +} + +static uint32_t hashDouble(double value) { + union BitCast { + double value; + uint32_t ints[2]; + }; + + union BitCast cast; + cast.value = (value) + 1.0; + return cast.ints[0] + cast.ints[1]; +} + +static uint32_t hashUint(unsigned int value) { + uint8_t bytes[4]; + bytes[0] = value & 0xFF; + bytes[1] = (value & 0xFF00) >> 8; + bytes[2] = (value & 0xFF0000) >> 16; + bytes[3] = (value & 0xFF000000) > 24; + + uint32_t hash = 2166136261u; + + for (int i = 0; i < 4; i++) { + hash ^= bytes[i]; + hash *= 16777619; + } + + return hash; +} + +uint32_t hashValue(Value value) { + switch (value.type) { + case VAL_BOOL: + return AS_BOOL(value) ? 3 : 5; + case VAL_NIL: + return 7; + case VAL_NUMBER: + return hashDouble(AS_NUMBER(value)); + case VAL_OBJ: { + Obj* object = AS_OBJ(value); + switch (object->type) { + case OBJ_STRING: + return AS_STRING(value)->hash; + break; + case OBJ_ARRAY: { // TODO: Figure out how to properly hash an array + ValueArray* valArray = AS_VARRAY(value); + unsigned int sum = 0; + for (int i = 0; i < valArray->count; i++) { + uint32_t valHash = hashValue(valArray->values[i]); + if ((UINT_MAX - sum) < valHash) { + sum = hashUint(sum); + } + sum += valHash; + } + return sum; + break; + } + case OBJ_HASH: // TODO: Figure out how to hash a hash + return 0; + break; + default: + return 0; + } + break; + } + case VAL_EMPTY: + return 0; + } + return 0; +} diff --git a/value.h b/value.h new file mode 100644 index 0000000..b5137e1 --- /dev/null +++ b/value.h @@ -0,0 +1,53 @@ +#ifndef clox_value_h +#define clox_value_h + +#include "common.h" + +typedef struct sObj Obj; +typedef struct sObjString ObjString; +typedef struct sObjArray ObjArray; +typedef struct sObjHash ObjHash; + +typedef enum { + VAL_BOOL, VAL_NIL, VAL_NUMBER, VAL_OBJ, VAL_EMPTY +} ValueType; + +typedef struct { + ValueType type; + union { + bool boolean; + double number; + Obj* obj; + } as; +} Value; + +#define BOOL_VAL(value) ((Value){ VAL_BOOL, { .boolean = value } }) +#define NIL_VAL ((Value){ VAL_NIL, { .number = 0 } }) +#define NUMBER_VAL(value) ((Value){ VAL_NUMBER, { .number = value } }) +#define OBJ_VAL(object) ((Value){ VAL_OBJ, { .obj = (Obj*)object } }) +#define EMPTY_VAL ((Value){ VAL_EMPTY, { .number = 0 } }) + +#define IS_BOOL(value) ((value).type == VAL_BOOL) +#define IS_NIL(value) ((value).type == VAL_NIL) +#define IS_NUMBER(value) ((value).type == VAL_NUMBER) +#define IS_OBJ(value) ((value).type == VAL_OBJ) +#define IS_EMPTY(value) ((value).type == VAL_EMPTY) + +#define AS_OBJ(value) ((value).as.obj) +#define AS_BOOL(value) ((value).as.boolean) +#define AS_NUMBER(value) ((value).as.number) + +typedef struct { + int capacity; + int count; + Value* values; +} ValueArray; + +bool valuesEqual(Value a, Value b); +void initValueArray(ValueArray* array); +void writeValueArray(ValueArray* array, Value value); +void freeValueArray(ValueArray* array); +void printValue(Value value); +uint32_t hashValue(Value value); + +#endif diff --git a/vm.c b/vm.c new file mode 100644 index 0000000..125cf73 --- /dev/null +++ b/vm.c @@ -0,0 +1,392 @@ +#include +#include +#include +#include + +#include "common.h" +#include "compiler.h" +#include "debug.h" +#include "object.h" +#include "memory.h" +#include "table.h" +#include "vm.h" + +#define READ_BYTE() (*vm.ip++) +#define READ_CONSTANT() (vm.chunk->constants.values[READ_BYTE()]) +#define READ_SHORT() (vm.ip += 2, (uint16_t)((vm.ip[-1] << 8) | vm.ip[-2])) +#define READ_3BYTE() (vm.ip += 3, (uint32_t)((vm.ip[-1] << 16)| (vm.ip[-2] << 8) | vm.ip[-3])) +#define READ_STRING() AS_STRING(READ_CONSTANT()) +#define READ_STRING_LONG() AS_STRING(readConstantLong()) + +#define BINARY_OP(valueType, op) \ + do { \ + if (!IS_NUMBER(peek(0)) || !IS_NUMBER(peek(1))) { \ + runtimeError("Operands must be numbers."); \ + return INTERPRET_RUNTIME_ERROR; \ + } \ + \ + double b = AS_NUMBER(pop()); \ + double a = AS_NUMBER(pop()); \ + push(valueType(a op b)); \ + } while (false) + +VM vm; + +static void resetStack() { + vm.sp = 0; + vm.capacity = 0; + vm.stack = NULL; +} + +static void runtimeError(const char* format, ...) { + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + fputs("\n", stderr); + + size_t instruction = vm.ip - vm.chunk->code; + fprintf(stderr, "[line %d] in script\n", vm.chunk->lines[instruction]); + + resetStack(); +} + +void initVM() { + resetStack(); + vm.objects = NULL; + + initTable(&vm.globals); + initTable(&vm.strings); +} + +void freeVM() { + freeTable(&vm.globals); + freeTable(&vm.strings); + freeObjects(); +} + +void push(Value value) { + if (vm.sp == vm.capacity) { + int oldCapacity = vm.capacity; + vm.capacity = GROW_CAPACITY(oldCapacity); + vm.stack = GROW_ARRAY(vm.stack, Value, oldCapacity, vm.capacity); + } + vm.stack[vm.sp] = value; + vm.sp++; +} + +Value pop() { + vm.sp--; + return vm.stack[vm.sp]; +} + +Value peek(int distance) { + return vm.stack[vm.sp - 1 - distance]; +} + +static bool isFalsey(Value value) { + return IS_NIL(value) || (IS_BOOL(value) && !AS_BOOL(value)); +} + +static void concatenate() { + ObjString* b = AS_STRING(pop()); + ObjString* a = AS_STRING(pop()); + + int length = a->length + b->length; + char* chars = ALLOCATE(char, length + 1); + memcpy(chars, a->chars, a->length); + memcpy(chars + a->length, b->chars, b->length); + chars[length] = '\0'; + + ObjString* result = takeString(chars, length); + push(OBJ_VAL(result)); +} + +static inline Value readConstantLong() { + uint8_t constant_lo = READ_BYTE(); + uint8_t constant_mid = READ_BYTE(); + uint8_t constant_hi = READ_BYTE(); + int constant = constant_lo | (constant_mid << 8) | constant_hi << 16; + return vm.chunk->constants.values[constant]; +} + +static InterpretResult run() { + for (;;) { +#ifdef DEBUG_TRACE_EXECUTION + printf(" "); + for (int i = 0; i < vm.sp; i++) { + printf("[ "); + printValue(vm.stack[i]); + printf(" ]"); + } + printf("\n"); + disassembleInstruction(vm.chunk, (int) (vm.ip - vm.chunk->code)); +#endif + uint8_t instruction; + switch (instruction = READ_BYTE()) { + case OP_ARRAY: { + int num_values = READ_BYTE(); + Value* values = malloc(sizeof(Value) * num_values); + for (int i = num_values - 1; i >= 0; i--) { + values[i] = pop(); + } + ValueArray* valArray = malloc(sizeof(ValueArray)); + initValueArray(valArray); + for (int i = 0; i < num_values; i++) { + writeValueArray(valArray, values[i]); + } + ObjArray* array = takeArray(valArray); + push(OBJ_VAL(array)); + free(values); + break; + } + case OP_HASH: { + int num_values = READ_BYTE(); + Value* values = malloc(sizeof(Value) * num_values); + Value* keys = malloc(sizeof(Value) * num_values); + for (int i = num_values - 1; i >= 0; i--) { + values[i] = pop(); + keys[i] = pop(); + } + Table* hashTable = malloc(sizeof(Table)); + initTable(hashTable); + for (int i = 0; i < num_values; i++) { + tableSet(hashTable, keys[i], values[i]); + } + ObjHash* hash = takeHash(hashTable); + push(OBJ_VAL(hash)); + free(values); + free(keys); + break; + } + case OP_CONSTANT: { + Value constant = READ_CONSTANT(); + push(constant); + break; + } + case OP_CONSTANT_LONG: { + Value constant = readConstantLong(); + push(constant); + break; + } + case OP_NIL: + push(NIL_VAL); + break; + case OP_TRUE: + push(BOOL_VAL(true)); + break; + case OP_FALSE: + push(BOOL_VAL(false)); + break; + case OP_POP: + pop(); + break; + case OP_INDEX: { + Value index = pop(); + Value val = pop(); + if (IS_OBJ(val)) { + Obj* obj = AS_OBJ(val); + switch (obj->type) { + case OBJ_ARRAY: { + ValueArray* valArray = AS_VARRAY(val); + if (!IS_NUMBER(index)) { + runtimeError("Array index must be a non-negative integer."); + return INTERPRET_RUNTIME_ERROR; + } + double i_double = AS_NUMBER(index); + if (i_double < 0) { + runtimeError("Array index must be a non-negative integer."); + return INTERPRET_RUNTIME_ERROR; + } + if ((int) i_double != i_double) { + runtimeError("Array index must be a non-negative integer."); + return INTERPRET_RUNTIME_ERROR; + } + int i = (int) i_double; + if (i > valArray->count - 1) { + runtimeError("Array index out of bounds."); + return INTERPRET_RUNTIME_ERROR; + } + push(valArray->values[i]); + break; + } + case OBJ_HASH: { + Table* hashTable = AS_HASH(val); + Value* value = malloc(sizeof(Value)); + if (tableGet(hashTable, index, value)) { + push(*value); + } else { + push(NIL_VAL); + } + break; + } + default: + runtimeError("Cannot index value."); + return INTERPRET_RUNTIME_ERROR; + } + } else { + runtimeError("Cannot index value."); + return INTERPRET_RUNTIME_ERROR; + } + break; + } + case OP_GET_LOCAL: { + uint8_t slot = READ_BYTE(); + push(vm.stack[slot]); + break; + } + case OP_GET_LOCAL_LONG: { + uint32_t slot = READ_3BYTE(); + push(vm.stack[slot]); + break; + } + case OP_SET_LOCAL: { + uint8_t slot = READ_BYTE(); + vm.stack[slot] = peek(0); + break; + } + case OP_SET_LOCAL_LONG: { + uint32_t slot = READ_3BYTE(); + vm.stack[slot] = peek(0); + break; + } + case OP_GET_GLOBAL: { + ObjString* name = READ_STRING(); + Value value; + if (!tableGet(&vm.globals, OBJ_VAL(name), &value)) { + runtimeError("Undefined variable '%s'.", name->chars); + return INTERPRET_RUNTIME_ERROR; + } + push(value); + break; + } + case OP_GET_GLOBAL_LONG: { + ObjString* name = READ_STRING_LONG(); + Value value; + if (!tableGet(&vm.globals, OBJ_VAL(name), &value)) { + runtimeError("Undefined variable '%s'.", name->chars); + return INTERPRET_RUNTIME_ERROR; + } + push(value); + break; + } + case OP_DEFINE_GLOBAL: { + ObjString* name = READ_STRING(); + tableSet(&vm.globals, OBJ_VAL(name), peek(0)); + pop(); + break; + } + case OP_DEFINE_GLOBAL_LONG: { + ObjString* name = READ_STRING_LONG(); + tableSet(&vm.globals, OBJ_VAL(name), peek(0)); + pop(); + break; + } + case OP_SET_GLOBAL: { + ObjString* name = READ_STRING(); + if (tableSet(&vm.globals, OBJ_VAL(name), peek(0))) { + tableDelete(&vm.globals, OBJ_VAL(name)); + runtimeError("Undefined variable '%s'.", name->chars); + return INTERPRET_RUNTIME_ERROR; + } + break; + } + case OP_SET_GLOBAL_LONG: { + ObjString* name = READ_STRING_LONG(); + if (tableSet(&vm.globals, OBJ_VAL(name), peek(0))) { + tableDelete(&vm.globals, OBJ_VAL(name)); + runtimeError("Undefined variable '%s'.", name->chars); + return INTERPRET_RUNTIME_ERROR; + } + break; + } + case OP_EQUAL: { + Value b = pop(); + Value a = pop(); + push(BOOL_VAL(valuesEqual(a, b))); + break; + } + case OP_GREATER: + BINARY_OP(BOOL_VAL, >); + break; + case OP_LESS: + BINARY_OP(BOOL_VAL, <); + break; + case OP_ADD: { + if (IS_STRING(peek(0)) && IS_STRING(peek(1))) { + concatenate(); + } else if (IS_NUMBER(peek(0)) && IS_NUMBER(peek(1))) { + double b = AS_NUMBER(pop()); + double a = AS_NUMBER(pop()); + push(NUMBER_VAL(a + b)); + } else { + runtimeError("Operands must be two numbers or two strings."); + return INTERPRET_RUNTIME_ERROR; + } + break; + } + case OP_SUBTRACT: + BINARY_OP(NUMBER_VAL, -); + break; + case OP_MULTIPLY: + BINARY_OP(NUMBER_VAL, *); + break; + case OP_DIVIDE: + BINARY_OP(NUMBER_VAL, /); + break; + case OP_NOT: + push(BOOL_VAL(isFalsey(pop()))); + break; + case OP_NEGATE: + if (!IS_NUMBER(peek(0))) { + runtimeError("Operand must be a number."); + return INTERPRET_RUNTIME_ERROR; + } + + push(NUMBER_VAL(-AS_NUMBER(pop()))); + break; + case OP_PRINT: { + printValue(pop()); + printf("\n"); + break; + } + case OP_JUMP: { + uint16_t offset = READ_SHORT(); + vm.ip += offset; + break; + } + case OP_JUMP_IF_FALSE: { + uint16_t offset = READ_SHORT(); + if (isFalsey(peek(0))) vm.ip += offset; + break; + } + case OP_LOOP: { + uint16_t offset = READ_SHORT(); + vm.ip -= offset; + break; + } + case OP_RETURN: + return INTERPRET_OK; + break; + } + } + return INTERPRET_RUNTIME_ERROR; +} + +InterpretResult interpret(const char* source, bool repl) { + Chunk chunk; + initChunk(&chunk); + + if (!compile(source, &chunk, repl)) { + freeChunk(&chunk); + return INTERPRET_COMPILE_ERROR; + } + + vm.chunk = &chunk; + vm.ip = vm.chunk->code; + + InterpretResult result = run(); + + freeChunk(&chunk); + return result; +} diff --git a/vm.h b/vm.h new file mode 100644 index 0000000..f472044 --- /dev/null +++ b/vm.h @@ -0,0 +1,33 @@ +#ifndef clox_vm_h +#define clox_vm_h + +#include "chunk.h" +#include "table.h" +#include "value.h" + +#define STACK_MAX 256 + +typedef struct { + Chunk* chunk; + uint8_t* ip; + int sp; + int capacity; + Value* stack; + Obj* objects; + Table strings; + Table globals; +} VM; + +typedef enum { + INTERPRET_OK, INTERPRET_COMPILE_ERROR, INTERPRET_RUNTIME_ERROR +} InterpretResult; + +extern VM vm; + +void initVM(); +void freeVM(); +InterpretResult interpret(const char* source, bool repl); +void push(Value value); +Value pop(); + +#endif