diff --git a/.gitignore b/.gitignore index d704a127..2660bb51 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ __arkscript__/ *.arkc *.arkm /*.ark +!tests/unittests/resources/BytecodeReaderSuite/*.arkc # Generated files include/Ark/Constants.hpp diff --git a/CHANGELOG.md b/CHANGELOG.md index bd073876..580658b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ - fixed a bug in the compiler where one could "use" operators without calling them: `(print nil?)` - fixed a bug in the compiler allowing the use of operators without any argument: `(+)` - fixed a bug in the vm during error reporting when a non-function was used as a function +- refactored code inside the bytecode reader to promote code reuse ### Removed - removed unused `NodeType::Closure` diff --git a/include/Ark/Compiler/BytecodeReader.hpp b/include/Ark/Compiler/BytecodeReader.hpp index 6655871f..3db2cb2b 100644 --- a/include/Ark/Compiler/BytecodeReader.hpp +++ b/include/Ark/Compiler/BytecodeReader.hpp @@ -2,7 +2,7 @@ * @file BytecodeReader.hpp * @author Alexandre Plateau (lexplt.dev@gmail.com) * @brief A bytecode disassembler for ArkScript - * @version 0.4 + * @version 0.5 * @date 2020-10-27 * * @copyright Copyright (c) 2020-2024 @@ -19,6 +19,7 @@ #include #include +#include namespace Ark { @@ -31,6 +32,33 @@ namespace Ark HeadersOnly }; + struct Version + { + uint16_t major; + uint16_t minor; + uint16_t patch; + }; + + struct Symbols + { + std::vector symbols; + std::size_t start; ///< Point to the SYM_TABLE_START byte in the bytecode + std::size_t end; ///< Point to the byte following the last byte of the table in the bytecode + }; + + struct Values + { + std::vector values; + std::size_t start; ///< Point to the VAL_TABLE_START byte in the bytecode + std::size_t end; ///< Point to the byte following the last byte of the table in the bytecode + }; + + struct Code + { + std::vector pages; + std::size_t start; ///< Point to the CODE_SEGMENT_START byte in the bytecode + }; + /** * @brief This class is just a helper to * - check if a bytecode is valid @@ -53,6 +81,12 @@ namespace Ark */ void feed(const std::string& file); + /** + * Check for the presence of the magic header + * @return true if the magic 'ark\0' was found + */ + [[nodiscard]] bool checkMagic() const; + /** * @brief Return the bytecode object constructed * @@ -60,12 +94,42 @@ namespace Ark */ [[nodiscard]] const bytecode_t& bytecode() noexcept; + /** + * + * @return Version compiler version used to create the given bytecode file + */ + [[nodiscard]] Version version() const; + /** * @brief Return the read timestamp from the bytecode file * * @return unsigned long long */ - [[nodiscard]] unsigned long long timestamp(); + [[nodiscard]] unsigned long long timestamp() const; + + /** + * + * @return std::vector bytecode sha + */ + [[nodiscard]] std::vector sha256() const; + + /** + * + * @return Symbols + */ + [[nodiscard]] Symbols symbols() const; + + /** + * + * @return Values + */ + [[nodiscard]] Values values() const; + + /** + * + * @return Code + */ + [[nodiscard]] Code code() const; /** * @brief Display the bytecode opcode in a human friendly way. diff --git a/include/Ark/VM/Value.hpp b/include/Ark/VM/Value.hpp index 1acf1892..552ee568 100644 --- a/include/Ark/VM/Value.hpp +++ b/include/Ark/VM/Value.hpp @@ -25,6 +25,7 @@ namespace Ark { class VM; + class BytecodeReader; // Note: we can have at most 0x7f (127) different types // because type index is stored on the 7 right most bits of a uint8_t in the class Value. @@ -153,12 +154,13 @@ namespace Ark friend ARK_API_INLINE bool operator!(const Value& A) noexcept; friend class Ark::VM; + friend class Ark::BytecodeReader; private: uint8_t m_const_type; ///< First bit if for constness, right most bits are for type Value_t m_value; - [[nodiscard]] constexpr uint8_t type_num() const noexcept { return m_const_type & 0x7f; } + [[nodiscard]] constexpr uint8_t type_num() const noexcept { return m_const_type & 0x7f; } // TODO: rename typeNum [[nodiscard]] internal::PageAddr_t pageAddr() const { return std::get(m_value); } [[nodiscard]] const ProcType& proc() const { return std::get(m_value); } diff --git a/src/arkreactor/Compiler/BytecodeReader.cpp b/src/arkreactor/Compiler/BytecodeReader.cpp index d558b81c..995adfb1 100644 --- a/src/arkreactor/Compiler/BytecodeReader.cpp +++ b/src/arkreactor/Compiler/BytecodeReader.cpp @@ -4,18 +4,23 @@ #include #include +#include #include #include +#include namespace Ark { using namespace Ark::internal; + // TODO: add exporters for symbols... so that the state doesn't have to do it all over again + void BytecodeReader::feed(const std::string& file) { std::ifstream ifs(file, std::ios::binary | std::ios::ate); if (!ifs.good()) - throw std::runtime_error("[BytecodeReader] Couldn't open file '" + file + "'"); + throw std::runtime_error(fmt::format("[BytecodeReader] Couldn't open file '{}'", file)); + const std::size_t pos = ifs.tellg(); // reserve appropriate number of bytes std::vector temp(pos); @@ -28,94 +33,190 @@ namespace Ark m_bytecode[i] = static_cast(temp[i]); } + bool BytecodeReader::checkMagic() const + { + return m_bytecode.size() >= 4 && m_bytecode[0] == 'a' && + m_bytecode[1] == 'r' && m_bytecode[2] == 'k' && + m_bytecode[3] == internal::Instruction::NOP; + } + + const bytecode_t& BytecodeReader::bytecode() noexcept { return m_bytecode; } - unsigned long long BytecodeReader::timestamp() + Version BytecodeReader::version() const { - bytecode_t b = bytecode(); - std::size_t i = 0; + if (!checkMagic() || m_bytecode.size() < 10) + return Version { 0, 0, 0 }; + + return Version { + .major = static_cast((m_bytecode[4] << 8) + m_bytecode[5]), + .minor = static_cast((m_bytecode[6] << 8) + m_bytecode[7]), + .patch = static_cast((m_bytecode[8] << 8) + m_bytecode[9]) + }; + } - // we want to see a 'ark\0' header - if (!(b.size() > 4 && b[i++] == 'a' && b[i++] == 'r' && b[i++] == 'k' && b[i++] == Instruction::NOP)) + + unsigned long long BytecodeReader::timestamp() const + { + // 4 (ark\0) + version (2 bytes / number) + timestamp = 18 bytes + if (!checkMagic() || m_bytecode.size() < 18) return 0; - // read major, minor and patch - std::ignore = readNumber(i); - i++; - std::ignore = readNumber(i); + // reading the timestamp in big endian + using timestamp_t = unsigned long long; + return (static_cast(m_bytecode[10]) << 56) + + (static_cast(m_bytecode[11]) << 48) + + (static_cast(m_bytecode[12]) << 40) + + (static_cast(m_bytecode[13]) << 32) + + (static_cast(m_bytecode[14]) << 24) + + (static_cast(m_bytecode[15]) << 16) + + (static_cast(m_bytecode[16]) << 8) + + static_cast(m_bytecode[17]); + } + + std::vector BytecodeReader::sha256() const + { + if (!checkMagic() || m_bytecode.size() < 18 + picosha2::k_digest_size) + return {}; + + std::vector sha(picosha2::k_digest_size); + for (std::size_t i = 0; i < picosha2::k_digest_size; ++i) + sha[i] = m_bytecode[18 + i]; + return sha; + } + + Symbols BytecodeReader::symbols() const + { + if (!checkMagic() || m_bytecode.size() < 18 + picosha2::k_digest_size || + m_bytecode[18 + picosha2::k_digest_size] != SYM_TABLE_START) + return {}; + + std::size_t i = 18 + picosha2::k_digest_size + 1; + const uint16_t size = readNumber(i); i++; - std::ignore = readNumber(i); + + Symbols block; + block.start = 18 + picosha2::k_digest_size; + block.symbols.reserve(size); + + for (uint16_t j = 0; j < size; ++j) + { + std::string content; + while (m_bytecode[i] != 0) + content += m_bytecode[i++]; + i++; + + block.symbols.push_back(content); + } + + block.end = i; + return block; + } + + Values BytecodeReader::values() const + { + if (!checkMagic()) + return {}; + + const auto data = symbols(); + std::size_t i = data.end; + if (m_bytecode[i] != VAL_TABLE_START) + return {}; i++; - // reading the timestamp in big endian - using timestamp_t = unsigned long long; - timestamp_t timestamp = 0; - const auto aa = (static_cast(m_bytecode[i]) << 56), - ba = (static_cast(m_bytecode[++i]) << 48), - ca = (static_cast(m_bytecode[++i]) << 40), - da = (static_cast(m_bytecode[++i]) << 32), - ea = (static_cast(m_bytecode[++i]) << 24), - fa = (static_cast(m_bytecode[++i]) << 16), - ga = (static_cast(m_bytecode[++i]) << 8), - ha = (static_cast(m_bytecode[++i])); + + const uint16_t size = readNumber(i); i++; - timestamp = aa + ba + ca + da + ea + fa + ga + ha; + Values block; + block.start = data.end; + block.values.reserve(size); - return timestamp; + for (uint16_t j = 0; j < size; ++j) + { + const uint8_t type = m_bytecode[i]; + i++; + + if (type == NUMBER_TYPE) + { + std::string val; + while (m_bytecode[i] != 0) + val.push_back(m_bytecode[i++]); + block.values.emplace_back(std::stod(val)); + } + else if (type == STRING_TYPE) + { + std::string val; + while (m_bytecode[i] != 0) + val.push_back(m_bytecode[i++]); + block.values.emplace_back(val); + } + else if (type == FUNC_TYPE) + { + const uint16_t addr = readNumber(i); + i++; + block.values.emplace_back(addr); + } + else + throw std::runtime_error(fmt::format("Unknown value type: {:x}", type)); + i++; + } + + block.end = i; + return block; + } + + Code BytecodeReader::code() const + { + if (!checkMagic()) + return {}; + + const auto data = values(); + std::size_t i = data.end; + + Code block; + block.start = i; + + while (m_bytecode[i] == CODE_SEGMENT_START) + { + i++; + const std::size_t size = readNumber(i) * 4; + i++; + + block.pages.emplace_back().reserve(size); + for (std::size_t j = 0; j < size; ++j) + block.pages.back().push_back(m_bytecode[i++]); + + if (i == m_bytecode.size()) + break; + } + + return block; } + void BytecodeReader::display(const BytecodeSegment segment, const std::optional sStart, const std::optional sEnd, const std::optional cPage) { - bytecode_t b = bytecode(); - std::size_t i = 0; - std::ostream& os = std::cout; - if (!(b.size() > 4 && b[i++] == 'a' && b[i++] == 'r' && b[i++] == 'k' && b[i++] == NOP)) + if (!checkMagic()) { os << "Invalid format"; return; } - uint16_t major = readNumber(i); - i++; - uint16_t minor = readNumber(i); - i++; - uint16_t patch = readNumber(i); - i++; + auto [major, minor, patch] = version(); os << "Version: " << major << "." << minor << "." << patch << "\n"; - - using timestamp_t = unsigned long long; - timestamp_t timestamp = 0; - auto aa = (static_cast(m_bytecode[i]) << 56), - ba = (static_cast(m_bytecode[++i]) << 48), - ca = (static_cast(m_bytecode[++i]) << 40), - da = (static_cast(m_bytecode[++i]) << 32), - ea = (static_cast(m_bytecode[++i]) << 24), - fa = (static_cast(m_bytecode[++i]) << 16), - ga = (static_cast(m_bytecode[++i]) << 8), - ha = (static_cast(m_bytecode[++i])); - i++; - timestamp = aa + ba + ca + da + ea + fa + ga + ha; - os << "Timestamp: " << timestamp << "\n"; - + os << "Timestamp: " << timestamp() << "\n"; os << "SHA256: "; - for (std::size_t j = 0; j < picosha2::k_digest_size; ++j) - { - os << std::hex << static_cast(m_bytecode[i]); - ++i; - } - os << "\n\n" - << std::dec; - - std::vector symbols; - std::vector values; + for (const auto sha = sha256(); unsigned char h : sha) + os << fmt::format("{:02x}", h); + os << "\n\n"; // reading the different tables, one after another @@ -132,12 +233,14 @@ namespace Ark return; } - if (b[i] == SYM_TABLE_START) + const auto syms = symbols(); + const auto vals = values(); + const auto code_block = code(); + + // symbols table { - i++; - uint16_t size = readNumber(i); - i++; - uint16_t sliceSize = size; + std::size_t size = syms.symbols.size(); + std::size_t sliceSize = size; bool showSym = (segment == BytecodeSegment::All || segment == BytecodeSegment::Symbols); if (showSym && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size)) @@ -148,43 +251,25 @@ namespace Ark if (showSym || segment == BytecodeSegment::HeadersOnly) os << termcolor::cyan << "Symbols table" << termcolor::reset << " (length: " << sliceSize << ")\n"; - for (uint16_t j = 0; j < size; ++j) + for (std::size_t j = 0; j < size; ++j) { if (auto start = sStart; auto end = sEnd) showSym = showSym && (j >= start.value() && j <= end.value()); - std::string content; - while (b[i] != 0) - content += b[i++]; - i++; - if (showSym) - { - os << static_cast(j) << ") "; - os << content << "\n"; - } - - symbols.push_back(content); + os << fmt::format("{}) {}\n", j, syms.symbols[j]); } + if (showSym) os << "\n"; - } - else - { - os << termcolor::red << "Missing symbole table entry point\n" - << termcolor::reset; - return; + if (segment == BytecodeSegment::Symbols) + return; } - if (segment == BytecodeSegment::Symbols) - return; - - if (b[i] == VAL_TABLE_START) + // values table { - i++; - uint16_t size = readNumber(i); - i++; - uint16_t sliceSize = size; + std::size_t size = vals.values.size(); + std::size_t sliceSize = size; bool showVal = (segment == BytecodeSegment::All || segment == BytecodeSegment::Values); if (showVal && sStart.has_value() && sEnd.has_value() && (sStart.value() > size || sEnd.value() > size)) @@ -195,245 +280,211 @@ namespace Ark if (showVal || segment == BytecodeSegment::HeadersOnly) os << termcolor::green << "Constants table" << termcolor::reset << " (length: " << sliceSize << ")\n"; - for (uint16_t j = 0; j < size; ++j) + for (std::size_t j = 0; j < size; ++j) { if (auto start = sStart; auto end = sEnd) showVal = showVal && (j >= start.value() && j <= end.value()); if (showVal) - os << static_cast(j) << ") "; - uint8_t type = b[i]; - i++; - - if (type == Instruction::NUMBER_TYPE) { - std::string val; - while (b[i] != 0) - val.push_back(b[i++]); - i++; - if (showVal) - os << "(Number) " << val; - values.push_back("(Number) " + val); - } - else if (type == Instruction::STRING_TYPE) - { - std::string val; - while (b[i] != 0) - val.push_back(b[i++]); - i++; - if (showVal) - os << "(String) " << val; - values.push_back("(String) " + val); - } - else if (type == Instruction::FUNC_TYPE) - { - uint16_t addr = readNumber(i); - i++; - if (showVal) - os << "(PageAddr) " << addr; - values.push_back("(PageAddr) " + std::to_string(addr)); - i++; - } - else - { - os << termcolor::red << "Unknown value type: " << static_cast(type) << '\n' - << termcolor::reset; - return; + switch (const auto val = vals.values[j]; val.valueType()) + { + case ValueType::Number: + os << fmt::format("{}) (Number) {}\n", j, val.number()); + break; + case ValueType::String: + os << fmt::format("{}) (String) {}\n", j, val.string()); + break; + case ValueType::PageAddr: + os << fmt::format("{}) (PageAddr) {}\n", j, val.pageAddr()); + break; + default: + os << termcolor::red << "Value type not handled: " << types_to_str[static_cast(val.valueType())] + << '\n' + << termcolor::reset; + break; + } } - - if (showVal) - os << "\n"; } if (showVal) os << "\n"; - } - else - { - os << termcolor::red << "Missing constant table entry point\n" - << termcolor::reset; - return; + if (segment == BytecodeSegment::Values) + return; } - if (segment == BytecodeSegment::Values) - return; - - uint16_t pp = 0; - std::size_t cumulated_segment_size = i + 3; + const auto stringify_value = [](const Value& val) -> std::string { + switch (val.valueType()) + { + case ValueType::Number: + return fmt::format("{} (Number)", val.number()); + case ValueType::String: + return fmt::format("{} (String)", val.string()); + case ValueType::PageAddr: + return fmt::format("{} (PageAddr)", val.pageAddr()); + default: + return ""; + } + }; - while (b[i] == Instruction::CODE_SEGMENT_START && (segment == BytecodeSegment::All || segment == BytecodeSegment::Code || segment == BytecodeSegment::HeadersOnly)) + if (segment == BytecodeSegment::All || segment == BytecodeSegment::Code || segment == BytecodeSegment::HeadersOnly) { - i++; - uint16_t size = readNumber(i); - i++; - - bool displayCode = true; + uint16_t pp = 0; - if (auto page = cPage) - displayCode = pp == page.value(); + for (const auto& page : code_block.pages) + { + bool displayCode = true; - if (displayCode) - os << termcolor::magenta << "Code segment " << pp << termcolor::reset << " (length: " << size << ")\n"; + if (auto wanted_page = cPage) + displayCode = pp == wanted_page.value(); - if (size == 0) - { if (displayCode) - os << "NOP"; - } - else - { - i += 4 * sStart.value_or(0); + os << termcolor::magenta << "Code segment " << pp << termcolor::reset << " (length: " << page.size() << ")\n"; - if (cPage.value_or(pp) == pp && segment != BytecodeSegment::HeadersOnly) + if (page.empty()) { - if (sStart.has_value() && sEnd.has_value() && ((sStart.value() > size) || (sEnd.value() > size))) - { - os << termcolor::red << "Slice start or end can't be greater than the segment size: " << size << termcolor::reset << "\n"; - return; - } - - for (std::size_t j = sStart.value_or(0), end = sEnd.value_or(size); j < end; ++j) + if (displayCode) + os << "NOP"; + } + else + { + if (cPage.value_or(pp) == pp && segment != BytecodeSegment::HeadersOnly) { - [[maybe_unused]] uint8_t padding = b[i]; - ++i; - uint8_t inst = b[i]; - ++i; - uint16_t arg = readNumber(i); - ++i; - - // instruction number - os << termcolor::cyan << j << " "; - // padding inst arg arg - os << termcolor::reset << std::hex - << std::setw(2) << std::setfill('0') << static_cast(padding) << " " - << std::setw(2) << std::setfill('0') << static_cast(inst) << " " - << std::setw(2) << std::setfill('0') << static_cast(b[i - 2]) << " " - << std::setw(2) << std::setfill('0') << static_cast(b[i - 1]) << " "; - // reset stream - os << std::dec << termcolor::yellow; - - if (inst == NOP) - os << "NOP\n"; - else if (inst == LOAD_SYMBOL) - os << "LOAD_SYMBOL " << termcolor::green << symbols[arg] << "\n"; - else if (inst == LOAD_CONST) - os << "LOAD_CONST " << termcolor::magenta << values[arg] << "\n"; - else if (inst == POP_JUMP_IF_TRUE) - os << "POP_JUMP_IF_TRUE " << termcolor::red << "(" << arg << ")\n"; - else if (inst == STORE) - os << "STORE " << termcolor::green << symbols[arg] << "\n"; - else if (inst == LET) - os << "LET " << termcolor::green << symbols[arg] << "\n"; - else if (inst == POP_JUMP_IF_FALSE) - os << "POP_JUMP_IF_FALSE " << termcolor::red << "(" << arg << ")\n"; - else if (inst == JUMP) - os << "JUMP " << termcolor::red << "(" << arg << ")\n"; - else if (inst == RET) - os << "RET\n"; - else if (inst == HALT) - os << "HALT\n"; - else if (inst == CALL) - os << "CALL " << termcolor::reset << "(" << arg << ")\n"; - else if (inst == CAPTURE) - os << "CAPTURE " << termcolor::reset << symbols[arg] << "\n"; - else if (inst == BUILTIN) - os << "BUILTIN " << termcolor::reset << Builtins::builtins[arg].first << "\n"; - else if (inst == MUT) - os << "MUT " << termcolor::green << symbols[arg] << "\n"; - else if (inst == DEL) - os << "DEL " << termcolor::green << symbols[arg] << "\n"; - else if (inst == SAVE_ENV) - os << "SAVE_ENV\n"; - else if (inst == GET_FIELD) - os << "GET_FIELD " << termcolor::green << symbols[arg] << "\n"; - else if (inst == PLUGIN) - os << "PLUGIN " << termcolor::magenta << values[arg] << "\n"; - else if (inst == LIST) - os << "LIST " << termcolor::reset << "(" << arg << ")\n"; - else if (inst == APPEND) - os << "APPEND " << termcolor::reset << "(" << arg << ")\n"; - else if (inst == CONCAT) - os << "CONCAT " << termcolor::reset << "(" << arg << ")\n"; - else if (inst == APPEND_IN_PLACE) - os << "APPEND_IN_PLACE " << termcolor::reset << "(" << arg << ")\n"; - else if (inst == CONCAT_IN_PLACE) - os << "CONCAT_IN_PLACE " << termcolor::reset << "(" << arg << ")\n"; - else if (inst == POP_LIST) - os << "POP_LIST " << termcolor::reset << "\n"; - else if (inst == POP_LIST_IN_PLACE) - os << "POP_LIST_IN_PLACE " << termcolor::reset << "\n"; - else if (inst == POP) - os << "POP\n"; - else if (inst == ADD) - os << "ADD\n"; - else if (inst == SUB) - os << "SUB\n"; - else if (inst == MUL) - os << "MUL\n"; - else if (inst == DIV) - os << "DIV\n"; - else if (inst == GT) - os << "GT\n"; - else if (inst == LT) - os << "LT\n"; - else if (inst == LE) - os << "LE\n"; - else if (inst == GE) - os << "GE\n"; - else if (inst == NEQ) - os << "NEQ\n"; - else if (inst == EQ) - os << "EQ\n"; - else if (inst == LEN) - os << "LEN\n"; - else if (inst == EMPTY) - os << "EMPTY\n"; - else if (inst == TAIL) - os << "TAIL\n"; - else if (inst == HEAD) - os << "HEAD\n"; - else if (inst == ISNIL) - os << "ISNIL\n"; - else if (inst == ASSERT) - os << "ASSERT\n"; - else if (inst == TO_NUM) - os << "TO_NUM\n"; - else if (inst == TO_STR) - os << "TO_STR\n"; - else if (inst == AT) - os << "AT\n"; - else if (inst == AND_) - os << "AND_\n"; - else if (inst == OR_) - os << "OR_\n"; - else if (inst == MOD) - os << "MOD\n"; - else if (inst == TYPE) - os << "TYPE\n"; - else if (inst == HASFIELD) - os << "HASFIELD\n"; - else if (inst == NOT) - os << "NOT\n"; - else + if (sStart.has_value() && sEnd.has_value() && ((sStart.value() > page.size()) || (sEnd.value() > page.size()))) { - os << termcolor::reset << "Unknown instruction: " << static_cast(inst) << '\n' - << termcolor::reset; + os << termcolor::red << "Slice start or end can't be greater than the segment size: " << page.size() << termcolor::reset << "\n"; return; } + + for (std::size_t j = sStart.value_or(0), end = sEnd.value_or(page.size()); j < end; j += 4) + { + const uint8_t padding = page[j]; + const uint8_t inst = page[j + 1]; + const uint16_t arg = static_cast((page[j + 2] << 8) + page[j + 3]); + + // instruction number + os << termcolor::cyan << fmt::format("{:>4}", j / 4) << termcolor::reset; + // padding inst arg arg + os << fmt::format(" {:02x} {:02x} {:02x} {:02x} ", padding, inst, page[j + 2], page[j + 3]); + os << termcolor::yellow; + + if (inst == NOP) + os << "NOP\n"; + else if (inst == LOAD_SYMBOL) + os << "LOAD_SYMBOL " << termcolor::green << syms.symbols[arg] << "\n"; + else if (inst == LOAD_CONST) + os << "LOAD_CONST " << termcolor::magenta << stringify_value(vals.values[arg]) << "\n"; + else if (inst == POP_JUMP_IF_TRUE) + os << "POP_JUMP_IF_TRUE " << termcolor::red << "(" << arg << ")\n"; + else if (inst == STORE) + os << "STORE " << termcolor::green << syms.symbols[arg] << "\n"; + else if (inst == LET) + os << "LET " << termcolor::green << syms.symbols[arg] << "\n"; + else if (inst == POP_JUMP_IF_FALSE) + os << "POP_JUMP_IF_FALSE " << termcolor::red << "(" << arg << ")\n"; + else if (inst == JUMP) + os << "JUMP " << termcolor::red << "(" << arg << ")\n"; + else if (inst == RET) + os << "RET\n"; + else if (inst == HALT) + os << "HALT\n"; + else if (inst == CALL) + os << "CALL " << termcolor::reset << "(" << arg << ")\n"; + else if (inst == CAPTURE) + os << "CAPTURE " << termcolor::reset << syms.symbols[arg] << "\n"; + else if (inst == BUILTIN) + os << "BUILTIN " << termcolor::reset << Builtins::builtins[arg].first << "\n"; + else if (inst == MUT) + os << "MUT " << termcolor::green << syms.symbols[arg] << "\n"; + else if (inst == DEL) + os << "DEL " << termcolor::green << syms.symbols[arg] << "\n"; + else if (inst == SAVE_ENV) + os << "SAVE_ENV\n"; + else if (inst == GET_FIELD) + os << "GET_FIELD " << termcolor::green << syms.symbols[arg] << "\n"; + else if (inst == PLUGIN) + os << "PLUGIN " << termcolor::magenta << stringify_value(vals.values[arg]) << "\n"; + else if (inst == LIST) + os << "LIST " << termcolor::reset << "(" << arg << ")\n"; + else if (inst == APPEND) + os << "APPEND " << termcolor::reset << "(" << arg << ")\n"; + else if (inst == CONCAT) + os << "CONCAT " << termcolor::reset << "(" << arg << ")\n"; + else if (inst == APPEND_IN_PLACE) + os << "APPEND_IN_PLACE " << termcolor::reset << "(" << arg << ")\n"; + else if (inst == CONCAT_IN_PLACE) + os << "CONCAT_IN_PLACE " << termcolor::reset << "(" << arg << ")\n"; + else if (inst == POP_LIST) + os << "POP_LIST " << termcolor::reset << "\n"; + else if (inst == POP_LIST_IN_PLACE) + os << "POP_LIST_IN_PLACE " << termcolor::reset << "\n"; + else if (inst == POP) + os << "POP\n"; + else if (inst == ADD) + os << "ADD\n"; + else if (inst == SUB) + os << "SUB\n"; + else if (inst == MUL) + os << "MUL\n"; + else if (inst == DIV) + os << "DIV\n"; + else if (inst == GT) + os << "GT\n"; + else if (inst == LT) + os << "LT\n"; + else if (inst == LE) + os << "LE\n"; + else if (inst == GE) + os << "GE\n"; + else if (inst == NEQ) + os << "NEQ\n"; + else if (inst == EQ) + os << "EQ\n"; + else if (inst == LEN) + os << "LEN\n"; + else if (inst == EMPTY) + os << "EMPTY\n"; + else if (inst == TAIL) + os << "TAIL\n"; + else if (inst == HEAD) + os << "HEAD\n"; + else if (inst == ISNIL) + os << "ISNIL\n"; + else if (inst == ASSERT) + os << "ASSERT\n"; + else if (inst == TO_NUM) + os << "TO_NUM\n"; + else if (inst == TO_STR) + os << "TO_STR\n"; + else if (inst == AT) + os << "AT\n"; + else if (inst == AND_) + os << "AND_\n"; + else if (inst == OR_) + os << "OR_\n"; + else if (inst == MOD) + os << "MOD\n"; + else if (inst == TYPE) + os << "TYPE\n"; + else if (inst == HASFIELD) + os << "HASFIELD\n"; + else if (inst == NOT) + os << "NOT\n"; + else + { + os << termcolor::reset << fmt::format("Unknown instruction: {:02x}", inst) << '\n' + << termcolor::reset; + return; + } + } } } + if (displayCode && segment != BytecodeSegment::HeadersOnly) + os << "\n" + << termcolor::reset; - i = cumulated_segment_size + size * 4; - cumulated_segment_size += size * 4 + 3; + ++pp; } - if (displayCode && segment != BytecodeSegment::HeadersOnly) - os << "\n" - << termcolor::reset; - - ++pp; - - if (i == b.size()) - break; } } diff --git a/src/arkreactor/VM/VM.cpp b/src/arkreactor/VM/VM.cpp index 9f8db0cd..0699dc09 100644 --- a/src/arkreactor/VM/VM.cpp +++ b/src/arkreactor/VM/VM.cpp @@ -1030,7 +1030,7 @@ namespace Ark #pragma endregion default: - throwVMError(ErrorKind::VM, fmt::format("Unknown instruction: {:x}{:x}{:x}", padding, inst, arg)); + throwVMError(ErrorKind::VM, fmt::format("Unknown instruction: {:02x}{:02x}{:04x}", padding, inst, arg)); break; } diff --git a/src/arkscript/main.cpp b/src/arkscript/main.cpp index 7ee78887..cfa419b6 100644 --- a/src/arkscript/main.cpp +++ b/src/arkscript/main.cpp @@ -92,7 +92,7 @@ int main(int argc, char** argv) ) | ( required("-bcr", "--bytecode-reader").set(selected, mode::bytecode_reader).doc("Launch the bytecode reader") - & value("file", file) + & value("file", file).doc("If file isn't a bytecode file, the cached compiled will be loaded ; if there are none, it will be compiled first") , ( option("-on", "--only-names").set(segment, Ark::BytecodeSegment::HeadersOnly).doc("Display only the bytecode segments names and sizes") | ( diff --git a/tests/unittests/BytecodeReaderSuite.cpp b/tests/unittests/BytecodeReaderSuite.cpp new file mode 100644 index 00000000..d8e8bf36 --- /dev/null +++ b/tests/unittests/BytecodeReaderSuite.cpp @@ -0,0 +1,86 @@ +#include + +#include +#include + +using namespace boost; + +ut::suite<"BytecodeReader"> bcr_suite = [] { + using namespace ut; + + Ark::BytecodeReader bcr; + bcr.feed(ARK_TESTS_ROOT "tests/unittests/resources/BytecodeReaderSuite/ackermann.arkc"); + + "bytecode"_test = [bcr] { + should("find the version") = [bcr] { + auto [major, minor, patch] = bcr.version(); + expect(that % major == 4); + expect(that % minor == 0); + expect(that % patch == 0); + }; + + should("find the timestamp") = [bcr] { + const auto time = bcr.timestamp(); + expect(that % time == 1717523961ull); + }; + + should("find the sha256") = [bcr] { + const auto sha256 = bcr.sha256(); + const auto expected_sha = std::vector { + 0xcf, 0x79, 0x82, 0x6b, 0x81, 0x5c, 0xe4, 0x11, + 0xce, 0x25, 0xbe, 0xc3, 0x05, 0x91, 0x21, 0x7f, + 0x6c, 0x70, 0x54, 0x70, 0xd8, 0x8b, 0x2b, 0x90, + 0x82, 0xcd, 0x70, 0x2e, 0xeb, 0x51, 0xb2, 0x75 + }; + expect(that % sha256 == expected_sha); + }; + + const auto [pages, start_code] = bcr.code(); + const auto values_block = bcr.values(); + const auto symbols_block = bcr.symbols(); + + should("list all symbols") = [symbols_block] { + using namespace std::literals::string_literals; + + const auto expected_symbols = std::vector { + "ackermann", "m", "n" + }; + expect(that % symbols_block.symbols == expected_symbols); + // 'ark\0' + version (2 bytes per number) + timestamp + sha -> first byte of the sym table + expect(that % symbols_block.start == 4 + 6 + 8 + 32ull); + // 50 = 4 + 6 + 8 + 32 + // + 1 for the header + // + 2 because we need to count the size of the table (uint16) + // + 3 because we need to count the \0 + expect(that % symbols_block.end == 50 + 1 + 2 + "ackermann"s.size() + "m"s.size() + "n"s.size() + 3); + }; + + should("list all values") = [symbols_block, values_block] { + const auto expected_values = std::vector { + Ark::Value(static_cast(1)), + Ark::Value(0), + Ark::Value(1), + Ark::Value(7), + Ark::Value(3) + }; + expect(that % values_block.values.size() == expected_values.size()); + expect(that % values_block.start == symbols_block.end); + // + 1 for the header + // + 2 for the size + // + 5 for the type tags + // + 2 for the pageaddr + // + 4*8 for the numbers represented as strings on 8 chars + // + 5 for the \0 at the end of each value + expect(that % values_block.end == values_block.start + 1 + 2 + 5 + 2 + 4 * 8 + 5); + }; + + should("list all code page") = [values_block, pages, start_code] { + expect(that % start_code == values_block.end); + expect(that % pages.size() == 2ull); + // 7 instructions on 4 bytes + expect(that % pages[0].size() == 7 * 4ull); + // 32 instructions on 4 bytes + expect(that % pages[1].size() == 32 * 4ull); + }; + }; +}; diff --git a/tests/unittests/resources/BytecodeReaderSuite/ackermann.arkc b/tests/unittests/resources/BytecodeReaderSuite/ackermann.arkc new file mode 100644 index 00000000..5cc0a77a Binary files /dev/null and b/tests/unittests/resources/BytecodeReaderSuite/ackermann.arkc differ