Skip to content

Commit

Permalink
Merge pull request #6614 from RasmusRendal/spaces
Browse files Browse the repository at this point in the history
Implement support for percent encoded filepaths for flakerefs
  • Loading branch information
thufschmitt authored Sep 26, 2023
2 parents b7d88fe + b343309 commit 9a78d87
Show file tree
Hide file tree
Showing 7 changed files with 328 additions and 264 deletions.
4 changes: 4 additions & 0 deletions doc/manual/src/release-notes/rl-next.md
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
# Release X.Y (202?-??-??)

- [URL flake references](@docroot@/command-ref/new-cli/nix3-flake.md#flake-references) now support [percent-encoded](https://datatracker.ietf.org/doc/html/rfc3986#section-2.1) characters.

- [Path-like flake references](@docroot@/command-ref/new-cli/nix3-flake.md#path-like-syntax) now accept arbitrary unicode characters (except `#` and `?`).
256 changes: 148 additions & 108 deletions src/libexpr/flake/flakeref.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,32 +69,130 @@ std::optional<FlakeRef> maybeParseFlakeRef(
}
}

std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
std::pair<FlakeRef, std::string> parsePathFlakeRefWithFragment(
const std::string & url,
const std::optional<Path> & baseDir,
bool allowMissing,
bool isFlake)
{
using namespace fetchers;
std::string path = url;
std::string fragment = "";
std::map<std::string, std::string> query;
auto pathEnd = url.find_first_of("#?");
auto fragmentStart = pathEnd;
if (pathEnd != std::string::npos && url[pathEnd] == '?') {
fragmentStart = url.find("#");
}
if (pathEnd != std::string::npos) {
path = url.substr(0, pathEnd);
}
if (fragmentStart != std::string::npos) {
fragment = percentDecode(url.substr(fragmentStart+1));
}
if (pathEnd != std::string::npos && fragmentStart != std::string::npos) {
query = decodeQuery(url.substr(pathEnd+1, fragmentStart));
}

static std::string fnRegex = "[0-9a-zA-Z-._~!$&'\"()*+,;=]+";
if (baseDir) {
/* Check if 'url' is a path (either absolute or relative
to 'baseDir'). If so, search upward to the root of the
repo (i.e. the directory containing .git). */

path = absPath(path, baseDir);

if (isFlake) {

if (!allowMissing && !pathExists(path + "/flake.nix")){
notice("path '%s' does not contain a 'flake.nix', searching up",path);

// Save device to detect filesystem boundary
dev_t device = lstat(path).st_dev;
bool found = false;
while (path != "/") {
if (pathExists(path + "/flake.nix")) {
found = true;
break;
} else if (pathExists(path + "/.git"))
throw Error("path '%s' is not part of a flake (neither it nor its parent directories contain a 'flake.nix' file)", path);
else {
if (lstat(path).st_dev != device)
throw Error("unable to find a flake before encountering filesystem boundary at '%s'", path);
}
path = dirOf(path);
}
if (!found)
throw BadURL("could not find a flake.nix file");
}

static std::regex pathUrlRegex(
"(/?" + fnRegex + "(?:/" + fnRegex + ")*/?)"
+ "(?:\\?(" + queryRegex + "))?"
+ "(?:#(" + queryRegex + "))?",
std::regex::ECMAScript);
if (!S_ISDIR(lstat(path).st_mode))
throw BadURL("path '%s' is not a flake (because it's not a directory)", path);

if (!allowMissing && !pathExists(path + "/flake.nix"))
throw BadURL("path '%s' is not a flake (because it doesn't contain a 'flake.nix' file)", path);

auto flakeRoot = path;
std::string subdir;

while (flakeRoot != "/") {
if (pathExists(flakeRoot + "/.git")) {
auto base = std::string("git+file://") + flakeRoot;

auto parsedURL = ParsedURL{
.url = base, // FIXME
.base = base,
.scheme = "git+file",
.authority = "",
.path = flakeRoot,
.query = query,
};

if (subdir != "") {
if (parsedURL.query.count("dir"))
throw Error("flake URL '%s' has an inconsistent 'dir' parameter", url);
parsedURL.query.insert_or_assign("dir", subdir);
}

if (pathExists(flakeRoot + "/.git/shallow"))
parsedURL.query.insert_or_assign("shallow", "1");

return std::make_pair(
FlakeRef(fetchers::Input::fromURL(parsedURL), getOr(parsedURL.query, "dir", "")),
fragment);
}

subdir = std::string(baseNameOf(flakeRoot)) + (subdir.empty() ? "" : "/" + subdir);
flakeRoot = dirOf(flakeRoot);
}
}

} else {
if (!hasPrefix(path, "/"))
throw BadURL("flake reference '%s' is not an absolute path", url);
path = canonPath(path + "/" + getOr(query, "dir", ""));
}

fetchers::Attrs attrs;
attrs.insert_or_assign("type", "path");
attrs.insert_or_assign("path", path);

return std::make_pair(FlakeRef(fetchers::Input::fromAttrs(std::move(attrs)), ""), fragment);
};


/* Check if 'url' is a flake ID. This is an abbreviated syntax for
'flake:<flake-id>?ref=<ref>&rev=<rev>'. */
std::optional<std::pair<FlakeRef, std::string>> parseFlakeIdRef(
const std::string & url,
bool isFlake
)
{
std::smatch match;

static std::regex flakeRegex(
"((" + flakeIdRegexS + ")(?:/(?:" + refAndOrRevRegex + "))?)"
+ "(?:#(" + queryRegex + "))?",
std::regex::ECMAScript);

std::smatch match;

/* Check if 'url' is a flake ID. This is an abbreviated syntax for
'flake:<flake-id>?ref=<ref>&rev=<rev>'. */

if (std::regex_match(url, match, flakeRegex)) {
auto parsedURL = ParsedURL{
.url = url,
Expand All @@ -105,111 +203,53 @@ std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
};

return std::make_pair(
FlakeRef(Input::fromURL(parsedURL, isFlake), ""),
FlakeRef(fetchers::Input::fromURL(parsedURL, isFlake), ""),
percentDecode(match.str(6)));
}

else if (std::regex_match(url, match, pathUrlRegex)) {
std::string path = match[1];
std::string fragment = percentDecode(match.str(3));

if (baseDir) {
/* Check if 'url' is a path (either absolute or relative
to 'baseDir'). If so, search upward to the root of the
repo (i.e. the directory containing .git). */

path = absPath(path, baseDir);

if (isFlake) {

if (!allowMissing && !pathExists(path + "/flake.nix")){
notice("path '%s' does not contain a 'flake.nix', searching up",path);

// Save device to detect filesystem boundary
dev_t device = lstat(path).st_dev;
bool found = false;
while (path != "/") {
if (pathExists(path + "/flake.nix")) {
found = true;
break;
} else if (pathExists(path + "/.git"))
throw Error("path '%s' is not part of a flake (neither it nor its parent directories contain a 'flake.nix' file)", path);
else {
if (lstat(path).st_dev != device)
throw Error("unable to find a flake before encountering filesystem boundary at '%s'", path);
}
path = dirOf(path);
}
if (!found)
throw BadURL("could not find a flake.nix file");
}

if (!S_ISDIR(lstat(path).st_mode))
throw BadURL("path '%s' is not a flake (because it's not a directory)", path);

if (!allowMissing && !pathExists(path + "/flake.nix"))
throw BadURL("path '%s' is not a flake (because it doesn't contain a 'flake.nix' file)", path);

auto flakeRoot = path;
std::string subdir;

while (flakeRoot != "/") {
if (pathExists(flakeRoot + "/.git")) {
auto base = std::string("git+file://") + flakeRoot;

auto parsedURL = ParsedURL{
.url = base, // FIXME
.base = base,
.scheme = "git+file",
.authority = "",
.path = flakeRoot,
.query = decodeQuery(match[2]),
};

if (subdir != "") {
if (parsedURL.query.count("dir"))
throw Error("flake URL '%s' has an inconsistent 'dir' parameter", url);
parsedURL.query.insert_or_assign("dir", subdir);
}

if (pathExists(flakeRoot + "/.git/shallow"))
parsedURL.query.insert_or_assign("shallow", "1");

return std::make_pair(
FlakeRef(Input::fromURL(parsedURL, isFlake), getOr(parsedURL.query, "dir", "")),
fragment);
}
return {};
}

subdir = std::string(baseNameOf(flakeRoot)) + (subdir.empty() ? "" : "/" + subdir);
flakeRoot = dirOf(flakeRoot);
}
}
std::optional<std::pair<FlakeRef, std::string>> parseURLFlakeRef(
const std::string & url,
const std::optional<Path> & baseDir,
bool isFlake
)
{
ParsedURL parsedURL;
try {
parsedURL = parseURL(url);
} catch (BadURL &) {
return std::nullopt;
}

} else {
if (!hasPrefix(path, "/"))
throw BadURL("flake reference '%s' is not an absolute path", url);
auto query = decodeQuery(match[2]);
path = canonPath(path + "/" + getOr(query, "dir", ""));
}
std::string fragment;
std::swap(fragment, parsedURL.fragment);

fetchers::Attrs attrs;
attrs.insert_or_assign("type", "path");
attrs.insert_or_assign("path", path);
auto input = fetchers::Input::fromURL(parsedURL, isFlake);
input.parent = baseDir;

return std::make_pair(FlakeRef(Input::fromAttrs(std::move(attrs)), ""), fragment);
}
return std::make_pair(
FlakeRef(std::move(input), getOr(parsedURL.query, "dir", "")),
fragment);
}

else {
auto parsedURL = parseURL(url);
std::string fragment;
std::swap(fragment, parsedURL.fragment);
std::pair<FlakeRef, std::string> parseFlakeRefWithFragment(
const std::string & url,
const std::optional<Path> & baseDir,
bool allowMissing,
bool isFlake)
{
using namespace fetchers;

auto input = Input::fromURL(parsedURL, isFlake);
input.parent = baseDir;
std::smatch match;

return std::make_pair(
FlakeRef(std::move(input), getOr(parsedURL.query, "dir", "")),
fragment);
if (auto res = parseFlakeIdRef(url, isFlake)) {
return *res;
} else if (auto res = parseURLFlakeRef(url, baseDir, isFlake)) {
return *res;
} else {
return parsePathFlakeRefWithFragment(url, baseDir, allowMissing, isFlake);
}
}

Expand Down
9 changes: 9 additions & 0 deletions src/libutil/tests/url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -335,4 +335,13 @@ namespace nix {
ASSERT_EQ(d, s);
}

TEST(percentEncode, yen) {
// https://en.wikipedia.org/wiki/Percent-encoding#Character_data
std::string s = reinterpret_cast<const char*>(u8"");
std::string e = "%E5%86%86";

ASSERT_EQ(percentEncode(s), e);
ASSERT_EQ(percentDecode(e), s);
}

}
2 changes: 1 addition & 1 deletion src/libutil/url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ std::string percentEncode(std::string_view s, std::string_view keep)
|| keep.find(c) != std::string::npos)
res += c;
else
res += fmt("%%%02X", (unsigned int) c);
res += fmt("%%%02X", c & 0xFF);
return res;
}

Expand Down
9 changes: 9 additions & 0 deletions src/nix/flake.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ inputs.nixpkgs = {
};
```

Following [RFC 3986](https://datatracker.ietf.org/doc/html/rfc3986#section-2.1),
characters outside of the allowed range (i.e. neither [reserved characters](https://datatracker.ietf.org/doc/html/rfc3986#section-2.2)
nor [unreserved characters](https://datatracker.ietf.org/doc/html/rfc3986#section-2.3))
must be percent-encoded.

### Examples

Here are some examples of flake references in their URL-like representation:
Expand Down Expand Up @@ -103,10 +108,14 @@ The semantic of such a path is as follows:
2. The filesystem root (/), or
3. A folder on a different mount point.

Contrary to URL-like references, path-like flake references can contain arbitrary unicode characters (except `#` and `?`).

### Examples

* `.`: The flake to which the current directory belongs to.
* `/home/alice/src/patchelf`: A flake in some other directory.
* `./../sub directory/with Ûñî©ôδ€`: A flake in another relative directory that
has Unicode characters in its name.

## Flake reference attributes

Expand Down
10 changes: 5 additions & 5 deletions tests/flakes/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ createGitRepo() {
local repo="$1"
local extraArgs="${2-}"

rm -rf $repo $repo.tmp
mkdir -p $repo
rm -rf "$repo" "$repo".tmp
mkdir -p "$repo"

git -C $repo init $extraArgs
git -C $repo config user.email "foobar@example.com"
git -C $repo config user.name "Foobar"
git -C "$repo" init $extraArgs
git -C "$repo" config user.email "foobar@example.com"
git -C "$repo" config user.name "Foobar"
}
Loading

0 comments on commit 9a78d87

Please sign in to comment.