Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix the hash rewriting for ca-derivations #4282

Merged
merged 4 commits into from
Jun 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/libexpr/primops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "globals.hh"
#include "json-to-value.hh"
#include "names.hh"
#include "references.hh"
#include "path-references.hh"
#include "store-api.hh"
#include "util.hh"
#include "value-to-json.hh"
Expand Down
43 changes: 22 additions & 21 deletions src/libstore/build/local-derivation-goal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "worker.hh"
#include "builtins.hh"
#include "builtins/buildenv.hh"
#include "references.hh"
#include "path-references.hh"
#include "finally.hh"
#include "util.hh"
#include "archive.hh"
Expand Down Expand Up @@ -2379,18 +2379,21 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
continue;
auto references = *referencesOpt;

auto rewriteOutput = [&]() {
auto rewriteOutput = [&](const StringMap & rewrites) {
/* Apply hash rewriting if necessary. */
if (!outputRewrites.empty()) {
if (!rewrites.empty()) {
debug("rewriting hashes in '%1%'; cross fingers", actualPath);

/* FIXME: this is in-memory. */
StringSink sink;
dumpPath(actualPath, sink);
/* FIXME: Is this actually streaming? */
auto source = sinkToSource([&](Sink & nextSink) {
RewritingSink rsink(rewrites, nextSink);
dumpPath(actualPath, rsink);
rsink.flush();
});
Path tmpPath = actualPath + ".tmp";
restorePath(tmpPath, *source);
deletePath(actualPath);
sink.s = rewriteStrings(sink.s, outputRewrites);
StringSource source(sink.s);
restorePath(actualPath, source);
movePath(tmpPath, actualPath);

/* FIXME: set proper permissions in restorePath() so
we don't have to do another traversal. */
Expand Down Expand Up @@ -2439,7 +2442,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
"since recursive hashing is not enabled (one of outputHashMode={flat,text} is true)",
actualPath);
}
rewriteOutput();
rewriteOutput(outputRewrites);
/* FIXME optimize and deduplicate with addToStore */
std::string oldHashPart { scratchPath->hashPart() };
HashModuloSink caSink { outputHash.hashType, oldHashPart };
Expand Down Expand Up @@ -2477,16 +2480,14 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
Hash::dummy,
};
if (*scratchPath != newInfo0.path) {
// Also rewrite the output path
auto source = sinkToSource([&](Sink & nextSink) {
RewritingSink rsink2(oldHashPart, std::string(newInfo0.path.hashPart()), nextSink);
dumpPath(actualPath, rsink2);
rsink2.flush();
});
Path tmpPath = actualPath + ".tmp";
restorePath(tmpPath, *source);
deletePath(actualPath);
movePath(tmpPath, actualPath);
// If the path has some self-references, we need to rewrite
// them.
// (note that this doesn't invalidate the ca hash we calculated
// above because it's computed *modulo the self-references*, so
// it already takes this rewrite into account).
rewriteOutput(
StringMap{{oldHashPart,
std::string(newInfo0.path.hashPart())}});
}

HashResult narHashAndSize = hashPath(htSHA256, actualPath);
Expand All @@ -2508,7 +2509,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
outputRewrites.insert_or_assign(
std::string { scratchPath->hashPart() },
std::string { requiredFinalPath.hashPart() });
rewriteOutput();
rewriteOutput(outputRewrites);
auto narHashAndSize = hashPath(htSHA256, actualPath);
ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first };
newInfo0.narSize = narHashAndSize.second;
Expand Down
73 changes: 73 additions & 0 deletions src/libstore/path-references.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#include "path-references.hh"
#include "hash.hh"
#include "util.hh"
#include "archive.hh"

#include <map>
#include <cstdlib>
#include <mutex>
#include <algorithm>


namespace nix {


PathRefScanSink::PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap)
: RefScanSink(std::move(hashes))
, backMap(std::move(backMap))
{ }

PathRefScanSink PathRefScanSink::fromPaths(const StorePathSet & refs)
{
StringSet hashes;
std::map<std::string, StorePath> backMap;

for (auto & i : refs) {
std::string hashPart(i.hashPart());
auto inserted = backMap.emplace(hashPart, i).second;
assert(inserted);
hashes.insert(hashPart);
}

return PathRefScanSink(std::move(hashes), std::move(backMap));
}

StorePathSet PathRefScanSink::getResultPaths()
{
/* Map the hashes found back to their store paths. */
StorePathSet found;
for (auto & i : getResult()) {
auto j = backMap.find(i);
assert(j != backMap.end());
found.insert(j->second);
}

return found;
}


std::pair<StorePathSet, HashResult> scanForReferences(
const std::string & path,
const StorePathSet & refs)
{
HashSink hashSink { htSHA256 };
auto found = scanForReferences(hashSink, path, refs);
auto hash = hashSink.finish();
return std::pair<StorePathSet, HashResult>(found, hash);
}

StorePathSet scanForReferences(
Sink & toTee,
const Path & path,
const StorePathSet & refs)
{
PathRefScanSink refsSink = PathRefScanSink::fromPaths(refs);
TeeSink sink { refsSink, toTee };

/* Look for the hashes in the NAR dump of the path. */
dumpPath(path, sink);

return refsSink.getResultPaths();
}

}
25 changes: 25 additions & 0 deletions src/libstore/path-references.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#pragma once

#include "references.hh"
#include "path.hh"

namespace nix {

std::pair<StorePathSet, HashResult> scanForReferences(const Path & path, const StorePathSet & refs);

StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathSet & refs);

class PathRefScanSink : public RefScanSink
{
std::map<std::string, StorePath> backMap;

PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap);

public:

static PathRefScanSink fromPaths(const StorePathSet & refs);

StorePathSet getResultPaths();
};

}
80 changes: 16 additions & 64 deletions src/libstore/references.cc → src/libutil/references.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <map>
#include <cstdlib>
#include <mutex>
#include <algorithm>


namespace nix {
Expand Down Expand Up @@ -66,83 +67,34 @@ void RefScanSink::operator () (std::string_view data)
}


PathRefScanSink::PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap)
: RefScanSink(std::move(hashes))
, backMap(std::move(backMap))
{ }

PathRefScanSink PathRefScanSink::fromPaths(const StorePathSet & refs)
RewritingSink::RewritingSink(const std::string & from, const std::string & to, Sink & nextSink)
: RewritingSink({{from, to}}, nextSink)
{
StringSet hashes;
std::map<std::string, StorePath> backMap;

for (auto & i : refs) {
std::string hashPart(i.hashPart());
auto inserted = backMap.emplace(hashPart, i).second;
assert(inserted);
hashes.insert(hashPart);
}

return PathRefScanSink(std::move(hashes), std::move(backMap));
}

StorePathSet PathRefScanSink::getResultPaths()
RewritingSink::RewritingSink(const StringMap & rewrites, Sink & nextSink)
: rewrites(rewrites), nextSink(nextSink)
{
/* Map the hashes found back to their store paths. */
StorePathSet found;
for (auto & i : getResult()) {
auto j = backMap.find(i);
assert(j != backMap.end());
found.insert(j->second);
long unsigned int maxRewriteSize = 0;
for (auto & [from, to] : rewrites) {
assert(from.size() == to.size());
maxRewriteSize = std::max(maxRewriteSize, from.size());
}

return found;
}


std::pair<StorePathSet, HashResult> scanForReferences(
const std::string & path,
const StorePathSet & refs)
{
HashSink hashSink { htSHA256 };
auto found = scanForReferences(hashSink, path, refs);
auto hash = hashSink.finish();
return std::pair<StorePathSet, HashResult>(found, hash);
}

StorePathSet scanForReferences(
Sink & toTee,
const Path & path,
const StorePathSet & refs)
{
PathRefScanSink refsSink = PathRefScanSink::fromPaths(refs);
TeeSink sink { refsSink, toTee };

/* Look for the hashes in the NAR dump of the path. */
dumpPath(path, sink);

return refsSink.getResultPaths();
}


RewritingSink::RewritingSink(const std::string & from, const std::string & to, Sink & nextSink)
: from(from), to(to), nextSink(nextSink)
{
assert(from.size() == to.size());
this->maxRewriteSize = maxRewriteSize;
}

void RewritingSink::operator () (std::string_view data)
{
std::string s(prev);
s.append(data);

size_t j = 0;
while ((j = s.find(from, j)) != std::string::npos) {
matches.push_back(pos + j);
s.replace(j, from.size(), to);
}
s = rewriteStrings(s, rewrites);

prev = s.size() < from.size() ? s : std::string(s, s.size() - from.size() + 1, from.size() - 1);
prev = s.size() < maxRewriteSize
? s
: maxRewriteSize == 0
? ""
: std::string(s, s.size() - maxRewriteSize + 1, maxRewriteSize - 1);

auto consumed = s.size() - prev.size();

Expand Down
23 changes: 4 additions & 19 deletions src/libstore/references.hh → src/libutil/references.hh
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,9 @@
///@file

#include "hash.hh"
#include "path.hh"

namespace nix {

std::pair<StorePathSet, HashResult> scanForReferences(const Path & path, const StorePathSet & refs);

StorePathSet scanForReferences(Sink & toTee, const Path & path, const StorePathSet & refs);

class RefScanSink : public Sink
{
StringSet hashes;
Expand All @@ -28,28 +23,18 @@ public:
void operator () (std::string_view data) override;
};

class PathRefScanSink : public RefScanSink
{
std::map<std::string, StorePath> backMap;

PathRefScanSink(StringSet && hashes, std::map<std::string, StorePath> && backMap);

public:

static PathRefScanSink fromPaths(const StorePathSet & refs);

StorePathSet getResultPaths();
};

struct RewritingSink : Sink
{
std::string from, to, prev;
const StringMap rewrites;
long unsigned int maxRewriteSize;
std::string prev;
Sink & nextSink;
uint64_t pos = 0;

std::vector<uint64_t> matches;

RewritingSink(const std::string & from, const std::string & to, Sink & nextSink);
RewritingSink(const StringMap & rewrites, Sink & nextSink);

void operator () (std::string_view data) override;

Expand Down
46 changes: 46 additions & 0 deletions src/libutil/tests/references.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include "references.hh"
#include <gtest/gtest.h>

namespace nix {

using std::string;

struct RewriteParams {
string originalString, finalString;
StringMap rewrites;

friend std::ostream& operator<<(std::ostream& os, const RewriteParams& bar) {
StringSet strRewrites;
for (auto & [from, to] : bar.rewrites)
strRewrites.insert(from + "->" + to);
return os <<
"OriginalString: " << bar.originalString << std::endl <<
"Rewrites: " << concatStringsSep(",", strRewrites) << std::endl <<
"Expected result: " << bar.finalString;
}
};

class RewriteTest : public ::testing::TestWithParam<RewriteParams> {
};

TEST_P(RewriteTest, IdentityRewriteIsIdentity) {
RewriteParams param = GetParam();
StringSink rewritten;
auto rewriter = RewritingSink(param.rewrites, rewritten);
rewriter(param.originalString);
rewriter.flush();
ASSERT_EQ(rewritten.s, param.finalString);
}

INSTANTIATE_TEST_CASE_P(
references,
RewriteTest,
::testing::Values(
RewriteParams{ "foooo", "baroo", {{"foo", "bar"}, {"bar", "baz"}}},
RewriteParams{ "foooo", "bazoo", {{"fou", "bar"}, {"foo", "baz"}}},
RewriteParams{ "foooo", "foooo", {}}
)
);

}

Loading