Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance bytecode reader #469

Merged
merged 3 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat(refactor): finally mutualising code between the state and the by…
…tecode reader
  • Loading branch information
SuperFola committed Jun 4, 2024
commit a5b92c61e559efe4d44e862971cfa9b1131f7ec0
7 changes: 7 additions & 0 deletions include/Ark/Compiler/BytecodeReader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ namespace Ark
*/
void feed(const std::string& file);

/**
* @brief Construct needed data before displaying information about a given bytecode
*
* @param bytecode
*/
void feed(const bytecode_t& bytecode);

/**
* Check for the presence of the magic header
* @return true if the magic 'ark\0' was found
Expand Down
11 changes: 5 additions & 6 deletions include/Ark/VM/State.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ namespace Ark
/**
* @brief Construct a new State object
*
* @param libpath a list of search paths for the std library
* @param libenv a list of search paths for the std library
*/
explicit State(const std::vector<std::filesystem::path>& libpath = {}) noexcept;
explicit State(const std::vector<std::filesystem::path>& libenv = {}) noexcept;

/**
* @brief Feed the state by giving it the path to an existing bytecode file
Expand Down Expand Up @@ -113,13 +113,12 @@ namespace Ark
friend class Repl;

private:
static bool checkMagic(const bytecode_t& bytecode);

/**
* @brief Called to configure the state (set the bytecode, debug level, call the compiler...)
*
* @param bcr reference to a pre-fed bytecode reader
*/
void configure();
void configure(const BytecodeReader& bcr);

/**
* @brief Reads and compiles code of file
Expand All @@ -129,7 +128,7 @@ namespace Ark
* @return true on success
* @return false on failure and raise an exception
*/
bool compile(const std::string& file, const std::string& output);
bool compile(const std::string& file, const std::string& output) const;

static void throwStateError(const std::string& message)
{
Expand Down
6 changes: 4 additions & 2 deletions src/arkreactor/Compiler/BytecodeReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include <Ark/Builtins/Builtins.hpp>

#include <iomanip>
#include <numeric>
#include <termcolor/proxy.hpp>
#include <picosha2.h>
#include <fmt/core.h>
Expand All @@ -13,7 +12,10 @@ namespace Ark
{
using namespace Ark::internal;

// TODO: add exporters for symbols... so that the state doesn't have to do it all over again
void BytecodeReader::feed(const bytecode_t& bytecode)
{
m_bytecode = bytecode;
}

void BytecodeReader::feed(const std::string& file)
{
Expand Down
153 changes: 21 additions & 132 deletions src/arkreactor/VM/State.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#endif

#include <picosha2.h>
#include <Ark/Compiler/BytecodeReader.hpp>
#include <termcolor/proxy.hpp>
#include <fmt/core.h>

Expand All @@ -31,14 +32,16 @@ namespace Ark

bool State::feed(const bytecode_t& bytecode)
{
if (!checkMagic(bytecode))
BytecodeReader bcr;
bcr.feed(bytecode);
if (!bcr.checkMagic())
return false;

m_bytecode = bytecode;

try
{
configure();
configure(bcr);
return true;
}
catch (const std::exception& e) // FIXME I don't like this shit
Expand All @@ -48,7 +51,7 @@ namespace Ark
}
}

bool State::compile(const std::string& file, const std::string& output)
bool State::compile(const std::string& file, const std::string& output) const
{
Welder welder(m_debug_level, m_libenv);

Expand Down Expand Up @@ -78,8 +81,10 @@ namespace Ark
}
m_filename = file;

bytecode_t bytecode = Utils::readFileAsBytes(file);
if (!checkMagic(bytecode)) // couldn't read magic number, it's a source file
const bytecode_t bytecode = Utils::readFileAsBytes(file);
BytecodeReader bcr;
bcr.feed(bytecode);
if (!bcr.checkMagic()) // couldn't read magic number, it's a source file
{
// check if it's in the arkscript cache
const std::string short_filename = (std::filesystem::path(file)).filename().string();
Expand Down Expand Up @@ -112,7 +117,7 @@ namespace Ark
return feed(welder.bytecode());
}

void State::loadFunction(const std::string& name, Value::ProcType function) noexcept
void State::loadFunction(const std::string& name, const Value::ProcType function) noexcept
{
m_binded[name] = Value(function);
}
Expand All @@ -137,41 +142,11 @@ namespace Ark
m_libenv = libenv;
}

bool State::checkMagic(const bytecode_t& bytecode)
void State::configure(const BytecodeReader& bcr)
{
return (bytecode.size() > 4 && bytecode[0] == 'a' &&
bytecode[1] == 'r' && bytecode[2] == 'k' &&
bytecode[3] == internal::Instruction::NOP);
}

void State::configure()
{
// FIXME refactor this crap and try to mutualise with the bytecode reader??
using namespace internal;

// configure tables and pages
std::size_t i = 0;

auto readNumber = [&, this](std::size_t& i) -> uint16_t {
auto x = (static_cast<uint16_t>(m_bytecode[i]) << 8);
++i;
auto y = static_cast<uint16_t>(m_bytecode[i]);
return x + y;
};

// read tables and check if bytecode is valid
if (!(m_bytecode.size() > 4 && m_bytecode[i++] == 'a' &&
m_bytecode[i++] == 'r' && m_bytecode[i++] == 'k' &&
m_bytecode[i++] == Instruction::NOP))
throwStateError("invalid format: couldn't find magic constant");

uint16_t major = readNumber(i);
i++;
uint16_t minor = readNumber(i);
i++;
uint16_t patch = readNumber(i);
i++;

const auto [major, minor, patch] = bcr.version();
if (major != ARK_VERSION_MAJOR)
{
std::string str_version = std::to_string(major) + "." +
Expand All @@ -180,110 +155,24 @@ namespace Ark
throwStateError(fmt::format("Compiler and VM versions don't match: got {} while running {}", str_version, ARK_VERSION));
}

using timestamp_t = unsigned long long;
timestamp_t timestamp [[maybe_unused]] = 0;
auto aa = (static_cast<timestamp_t>(m_bytecode[i]) << 56),
ba = (static_cast<timestamp_t>(m_bytecode[++i]) << 48),
ca = (static_cast<timestamp_t>(m_bytecode[++i]) << 40),
da = (static_cast<timestamp_t>(m_bytecode[++i]) << 32),
ea = (static_cast<timestamp_t>(m_bytecode[++i]) << 24),
fa = (static_cast<timestamp_t>(m_bytecode[++i]) << 16),
ga = (static_cast<timestamp_t>(m_bytecode[++i]) << 8),
ha = (static_cast<timestamp_t>(m_bytecode[++i]));
i++;
timestamp = aa + ba + ca + da + ea + fa + ga + ha;
const auto bytecode_hash = bcr.sha256();

std::vector<unsigned char> hash(picosha2::k_digest_size);
picosha2::hash256(m_bytecode.begin() + i + picosha2::k_digest_size, m_bytecode.end(), hash);
picosha2::hash256(m_bytecode.begin() + 18 + picosha2::k_digest_size, m_bytecode.end(), hash);
// checking integrity
for (std::size_t j = 0; j < picosha2::k_digest_size; ++j)
{
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
if (hash[j] != m_bytecode[i])
if (hash[j] != bytecode_hash[j])
throwStateError("Integrity check failed");
#endif
++i;
}

if (m_bytecode[i] == SYM_TABLE_START)
{
i++;
uint16_t size = readNumber(i);
m_symbols.reserve(size);
i++;

for (uint16_t j = 0; j < size; ++j)
{
std::string symbol;
while (m_bytecode[i] != 0)
symbol.push_back(m_bytecode[i++]);
i++;

m_symbols.push_back(symbol);
}
}
else
throwStateError("Couldn't find symbols table");

if (m_bytecode[i] == VAL_TABLE_START)
{
i++;
uint16_t size = readNumber(i);
m_constants.reserve(size);
i++;

for (uint16_t j = 0; j < size; ++j)
{
uint8_t type = m_bytecode[i];
i++;

if (type == NUMBER_TYPE)
{
std::string val;
while (m_bytecode[i] != 0)
val.push_back(m_bytecode[i++]);
i++;

m_constants.emplace_back(std::stod(val));
}
else if (type == STRING_TYPE)
{
std::string val;
while (m_bytecode[i] != 0)
val.push_back(m_bytecode[i++]);
i++;

m_constants.emplace_back(val);
}
else if (type == FUNC_TYPE)
{
uint16_t addr = readNumber(i);
i++;
m_constants.emplace_back(addr);
i++; // skip NOP
}
else
throwStateError("Unknown value type for value " + std::to_string(j));
}
}
else
throwStateError("Couldn't find constants table");

while (m_bytecode[i] == CODE_SEGMENT_START)
{
i++;
uint16_t size = readNumber(i) * 4; // because the instructions are on 4 bytes
i++;

m_pages.emplace_back();
m_pages.back().reserve(size);

for (uint16_t j = 0; j < size; ++j)
m_pages.back().push_back(m_bytecode[i++]);

if (i == m_bytecode.size())
break;
}
// FIXME: we're going to read the symbols 3 times and the values twice
// because code calls values which calls symbols
m_symbols = bcr.symbols().symbols;
m_constants = bcr.values().values;
m_pages = bcr.code().pages;
}

void State::reset() noexcept
Expand Down
6 changes: 3 additions & 3 deletions tests/errors/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

file=arkscript

if [ -f ../../build/Release/${file}.exe ]; then
if [ -f ../../cmake-build-debug/${file} ]; then
ark=../../cmake-build-debug/${file}
elif [ -f ../../build/Release/${file}.exe ]; then
ark=../../build/Release/${file}.exe
elif [ -f ../../build/${file} ]; then
ark=../../build/${file}
elif [ -f ../../cmake-build-debug/${file} ]; then
ark=../../cmake-build-debug/${file}
else
echo "No $file executable found" && exit 1
fi
Expand Down
Loading