From 91ba7b230777e3fb023bda48c269d533702e50e8 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Wed, 6 Dec 2023 12:41:47 +0100 Subject: [PATCH 1/9] isAllowedURI: Extract function and test --- src/libexpr/eval.cc | 18 +++++-- src/libexpr/eval.hh | 5 ++ tests/unit/libexpr/eval.cc | 106 +++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 tests/unit/libexpr/eval.cc diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 9e494148e34..0eb6f406e00 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -599,21 +599,29 @@ void EvalState::allowAndSetStorePathString(const StorePath & storePath, Value & mkStorePathString(storePath, v); } -void EvalState::checkURI(const std::string & uri) +bool isAllowedURI(std::string_view uri, const Strings & allowedUris) { - if (!evalSettings.restrictEval) return; - /* 'uri' should be equal to a prefix, or in a subdirectory of a prefix. Thus, the prefix https://github.co does not permit access to https://github.com. Note: this allows 'http://' and 'https://' as prefixes for any http/https URI. */ - for (auto & prefix : evalSettings.allowedUris.get()) + for (auto & prefix : allowedUris) { if (uri == prefix || (uri.size() > prefix.size() && prefix.size() > 0 && hasPrefix(uri, prefix) && (prefix[prefix.size() - 1] == '/' || uri[prefix.size()] == '/'))) - return; + return true; + } + + return false; +} + +void EvalState::checkURI(const std::string & uri) +{ + if (!evalSettings.restrictEval) return; + + if (isAllowedURI(uri, evalSettings.allowedUris.get())) return; /* If the URI is a path, then check it against allowedPaths as well. */ diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index f3f6d35b9f7..6008c3f6001 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -837,6 +837,11 @@ std::string showType(const Value & v); */ SourcePath resolveExprPath(SourcePath path); +/** + * Whether a URI is allowed, assuming restrictEval is enabled + */ +bool isAllowedURI(std::string_view uri, const Strings & allowedPaths); + struct InvalidPathError : EvalError { Path path; diff --git a/tests/unit/libexpr/eval.cc b/tests/unit/libexpr/eval.cc new file mode 100644 index 00000000000..cc5d6bbfa6d --- /dev/null +++ b/tests/unit/libexpr/eval.cc @@ -0,0 +1,106 @@ +#include +#include + +#include "eval.hh" +#include "tests/libexpr.hh" + +namespace nix { + +TEST(nix_isAllowedURI, http_example_com) { + Strings allowed; + allowed.push_back("http://example.com"); + + ASSERT_TRUE(isAllowedURI("http://example.com", allowed)); + ASSERT_TRUE(isAllowedURI("http://example.com/foo", allowed)); + ASSERT_TRUE(isAllowedURI("http://example.com/foo/", allowed)); + ASSERT_FALSE(isAllowedURI("/", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.co", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.como", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.org", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.org/foo", allowed)); +} + +TEST(nix_isAllowedURI, http_example_com_foo) { + Strings allowed; + allowed.push_back("http://example.com/foo"); + + ASSERT_TRUE(isAllowedURI("http://example.com/foo", allowed)); + ASSERT_TRUE(isAllowedURI("http://example.com/foo/", allowed)); + ASSERT_FALSE(isAllowedURI("/foo", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.com", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.como", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.org/foo", allowed)); + // Broken? + // ASSERT_TRUE(isAllowedURI("http://example.com/foo?ok=1", allowed)); +} + +TEST(nix_isAllowedURI, http) { + Strings allowed; + allowed.push_back("http://"); + + ASSERT_TRUE(isAllowedURI("http://", allowed)); + ASSERT_TRUE(isAllowedURI("http://example.com", allowed)); + ASSERT_TRUE(isAllowedURI("http://example.com/foo", allowed)); + ASSERT_TRUE(isAllowedURI("http://example.com/foo/", allowed)); + ASSERT_TRUE(isAllowedURI("http://example.com", allowed)); + ASSERT_FALSE(isAllowedURI("/", allowed)); + ASSERT_FALSE(isAllowedURI("https://", allowed)); + ASSERT_FALSE(isAllowedURI("http:foo", allowed)); +} + +TEST(nix_isAllowedURI, https) { + Strings allowed; + allowed.push_back("https://"); + + ASSERT_TRUE(isAllowedURI("https://example.com", allowed)); + ASSERT_TRUE(isAllowedURI("https://example.com/foo", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.com", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.com/https:", allowed)); +} + +TEST(nix_isAllowedURI, absolute_path) { + Strings allowed; + allowed.push_back("/var/evil"); // bad idea + + ASSERT_TRUE(isAllowedURI("/var/evil", allowed)); + ASSERT_TRUE(isAllowedURI("/var/evil/", allowed)); + ASSERT_TRUE(isAllowedURI("/var/evil/foo", allowed)); + ASSERT_TRUE(isAllowedURI("/var/evil/foo/", allowed)); + ASSERT_FALSE(isAllowedURI("/", allowed)); + ASSERT_FALSE(isAllowedURI("/var/evi", allowed)); + ASSERT_FALSE(isAllowedURI("/var/evilo", allowed)); + ASSERT_FALSE(isAllowedURI("/var/evilo/", allowed)); + ASSERT_FALSE(isAllowedURI("/var/evilo/foo", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.com/var/evil", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.com//var/evil", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.com//var/evil/foo", allowed)); +} + +TEST(nix_isAllowedURI, file_url) { + Strings allowed; + allowed.push_back("file:///var/evil"); // bad idea + + ASSERT_TRUE(isAllowedURI("file:///var/evil", allowed)); + ASSERT_TRUE(isAllowedURI("file:///var/evil/", allowed)); + ASSERT_TRUE(isAllowedURI("file:///var/evil/foo", allowed)); + ASSERT_TRUE(isAllowedURI("file:///var/evil/foo/", allowed)); + ASSERT_FALSE(isAllowedURI("/", allowed)); + ASSERT_FALSE(isAllowedURI("/var/evi", allowed)); + ASSERT_FALSE(isAllowedURI("/var/evilo", allowed)); + ASSERT_FALSE(isAllowedURI("/var/evilo/", allowed)); + ASSERT_FALSE(isAllowedURI("/var/evilo/foo", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.com/var/evil", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.com//var/evil", allowed)); + ASSERT_FALSE(isAllowedURI("http://example.com//var/evil/foo", allowed)); + ASSERT_FALSE(isAllowedURI("http://var/evil", allowed)); + ASSERT_FALSE(isAllowedURI("http:///var/evil", allowed)); + ASSERT_FALSE(isAllowedURI("http://var/evil/", allowed)); + ASSERT_FALSE(isAllowedURI("file:///var/evi", allowed)); + ASSERT_FALSE(isAllowedURI("file:///var/evilo", allowed)); + ASSERT_FALSE(isAllowedURI("file:///var/evilo/", allowed)); + ASSERT_FALSE(isAllowedURI("file:///var/evilo/foo", allowed)); + ASSERT_FALSE(isAllowedURI("file:///", allowed)); + ASSERT_FALSE(isAllowedURI("file://", allowed)); +} + +} // namespace nix \ No newline at end of file From 6cbba914a70eb5da6447fee5528a63723ed13245 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Wed, 6 Dec 2023 12:43:20 +0100 Subject: [PATCH 2/9] isAllowedURI: Remove incorrect note --- src/libexpr/eval.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 0eb6f406e00..d8a36fa02a5 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -603,8 +603,7 @@ bool isAllowedURI(std::string_view uri, const Strings & allowedUris) { /* 'uri' should be equal to a prefix, or in a subdirectory of a prefix. Thus, the prefix https://github.co does not permit - access to https://github.com. Note: this allows 'http://' and - 'https://' as prefixes for any http/https URI. */ + access to https://github.com. */ for (auto & prefix : allowedUris) { if (uri == prefix || (uri.size() > prefix.size() From 1fa958dda1ef0cb37441ef8d1a84faf6d501ac12 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Wed, 6 Dec 2023 14:08:22 +0100 Subject: [PATCH 3/9] isAllowedURI: Format --- src/libexpr/eval.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index d8a36fa02a5..9e541f29328 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -605,11 +605,14 @@ bool isAllowedURI(std::string_view uri, const Strings & allowedUris) prefix. Thus, the prefix https://github.co does not permit access to https://github.com. */ for (auto & prefix : allowedUris) { - if (uri == prefix || - (uri.size() > prefix.size() - && prefix.size() > 0 - && hasPrefix(uri, prefix) - && (prefix[prefix.size() - 1] == '/' || uri[prefix.size()] == '/'))) + if (uri == prefix + // Allow access to subdirectories of the prefix. + || (uri.size() > prefix.size() + && prefix.size() > 0 + && hasPrefix(uri, prefix) + && ( + prefix[prefix.size() - 1] == '/' + || uri[prefix.size()] == '/'))) return true; } From 79eb2920bb51c7ec9528a403986e79f04738e2be Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Wed, 6 Dec 2023 14:39:49 +0100 Subject: [PATCH 4/9] Add nix::isASCII*, locale-independent --- src/libutil/string.hh | 17 +++++++++++ tests/unit/libutil/string.cc | 59 ++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 src/libutil/string.hh create mode 100644 tests/unit/libutil/string.cc diff --git a/src/libutil/string.hh b/src/libutil/string.hh new file mode 100644 index 00000000000..16ef7564321 --- /dev/null +++ b/src/libutil/string.hh @@ -0,0 +1,17 @@ +#pragma once + +namespace nix { + + /** Locale-independent version of std::islower(). */ + inline bool isASCIILower(char c) { return c >= 'a' && c <= 'z'; }; + + /** Locale-independent version of std::isupper(). */ + inline bool isASCIIUpper(char c) { return c >= 'A' && c <= 'Z'; }; + + /** Locale-independent version of std::isalpha(). */ + inline bool isASCIIAlpha(char c) { return isASCIILower(c) || isASCIIUpper(c); }; + + /** Locale-independent version of std::isdigit(). */ + inline bool isASCIIDigit(char c) { return c >= '0' && c <= '9'; }; + +} diff --git a/tests/unit/libutil/string.cc b/tests/unit/libutil/string.cc new file mode 100644 index 00000000000..381f2cc15e8 --- /dev/null +++ b/tests/unit/libutil/string.cc @@ -0,0 +1,59 @@ +#include +#include "string.hh" + +namespace nix { + +TEST(string, isASCIILower) { + ASSERT_TRUE(isASCIILower('a')); + ASSERT_TRUE(isASCIILower('z')); + ASSERT_FALSE(isASCIILower('A')); + ASSERT_FALSE(isASCIILower('Z')); + ASSERT_FALSE(isASCIILower('0')); + ASSERT_FALSE(isASCIILower('9')); + ASSERT_FALSE(isASCIILower(' ')); + ASSERT_FALSE(isASCIILower('\n')); + ASSERT_FALSE(isASCIILower('\t')); + ASSERT_FALSE(isASCIILower(':')); +} + +TEST(string, isASCIIUpper) { + ASSERT_FALSE(isASCIIUpper('a')); + ASSERT_FALSE(isASCIIUpper('z')); + ASSERT_TRUE(isASCIIUpper('A')); + ASSERT_TRUE(isASCIIUpper('Z')); + ASSERT_FALSE(isASCIIUpper('0')); + ASSERT_FALSE(isASCIIUpper('9')); + ASSERT_FALSE(isASCIIUpper(' ')); + ASSERT_FALSE(isASCIIUpper('\n')); + ASSERT_FALSE(isASCIIUpper('\t')); + ASSERT_FALSE(isASCIIUpper(':')); +} + +TEST(string, isASCIIAlpha) { + ASSERT_TRUE(isASCIIAlpha('a')); + ASSERT_TRUE(isASCIIAlpha('z')); + ASSERT_TRUE(isASCIIAlpha('A')); + ASSERT_TRUE(isASCIIAlpha('Z')); + ASSERT_FALSE(isASCIIAlpha('0')); + ASSERT_FALSE(isASCIIAlpha('9')); + ASSERT_FALSE(isASCIIAlpha(' ')); + ASSERT_FALSE(isASCIIAlpha('\n')); + ASSERT_FALSE(isASCIIAlpha('\t')); + ASSERT_FALSE(isASCIIAlpha(':')); +} + +TEST(string, isASCIIDigit) { + ASSERT_FALSE(isASCIIDigit('a')); + ASSERT_FALSE(isASCIIDigit('z')); + ASSERT_FALSE(isASCIIDigit('A')); + ASSERT_FALSE(isASCIIDigit('Z')); + ASSERT_TRUE(isASCIIDigit('0')); + ASSERT_TRUE(isASCIIDigit('1')); + ASSERT_TRUE(isASCIIDigit('9')); + ASSERT_FALSE(isASCIIDigit(' ')); + ASSERT_FALSE(isASCIIDigit('\n')); + ASSERT_FALSE(isASCIIDigit('\t')); + ASSERT_FALSE(isASCIIDigit(':')); +} + +} \ No newline at end of file From d3a85b68347071d8d93ec796a38c707483d7b272 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Wed, 6 Dec 2023 15:14:41 +0100 Subject: [PATCH 5/9] isValidSchemeName: Add function --- src/libutil/url.cc | 17 +++++++++++++++++ src/libutil/url.hh | 9 +++++++++ tests/unit/libutil/url.cc | 18 ++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/src/libutil/url.cc b/src/libutil/url.cc index 57b64d6074c..f2d5f17825a 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -3,6 +3,7 @@ #include "util.hh" #include "split.hh" #include "canon-path.hh" +#include "string.hh" namespace nix { @@ -183,4 +184,20 @@ std::string fixGitURL(const std::string & url) } } +// https://www.rfc-editor.org/rfc/rfc3986#section-3.1 +bool isValidSchemeName(std::string_view s) +{ + if (s.empty()) return false; + if (!isASCIIAlpha(s[0])) return false; + for (auto c : s.substr(1)) { + if (isASCIIAlpha(c)) continue; + if (isASCIIDigit(c)) continue; + if (c == '+') continue; + if (c == '-') continue; + if (c == '.') continue; + return false; + } + return true; +} + } diff --git a/src/libutil/url.hh b/src/libutil/url.hh index 833f546787b..24806bbff81 100644 --- a/src/libutil/url.hh +++ b/src/libutil/url.hh @@ -55,4 +55,13 @@ ParsedUrlScheme parseUrlScheme(std::string_view scheme); changes absolute paths into file:// URLs. */ std::string fixGitURL(const std::string & url); +/** + * Whether a string is valid as RFC 3986 scheme name. + * Colon `:` is part of the URI; not the scheme name, and therefore rejected. + * See https://www.rfc-editor.org/rfc/rfc3986#section-3.1 + * + * Does not check whether the scheme is understood, as that's context-dependent. + */ +bool isValidSchemeName(std::string_view scheme); + } diff --git a/tests/unit/libutil/url.cc b/tests/unit/libutil/url.cc index a678dad2041..09fa4e21879 100644 --- a/tests/unit/libutil/url.cc +++ b/tests/unit/libutil/url.cc @@ -344,4 +344,22 @@ namespace nix { ASSERT_EQ(percentDecode(e), s); } +TEST(nix, isValidSchemeName) { + ASSERT_TRUE(isValidSchemeName("http")); + ASSERT_TRUE(isValidSchemeName("https")); + ASSERT_TRUE(isValidSchemeName("file")); + ASSERT_TRUE(isValidSchemeName("file+https")); + ASSERT_TRUE(isValidSchemeName("fi.le")); + ASSERT_TRUE(isValidSchemeName("file-ssh")); + ASSERT_TRUE(isValidSchemeName("file+")); + ASSERT_TRUE(isValidSchemeName("file.")); + ASSERT_TRUE(isValidSchemeName("file1")); + ASSERT_FALSE(isValidSchemeName("file:")); + ASSERT_FALSE(isValidSchemeName("file/")); + ASSERT_FALSE(isValidSchemeName("+file")); + ASSERT_FALSE(isValidSchemeName(".file")); + ASSERT_FALSE(isValidSchemeName("-file")); + ASSERT_FALSE(isValidSchemeName("1file")); +} + } From a05bc9eb92371af631fc9fb83c3595957fb56943 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Wed, 6 Dec 2023 15:27:29 +0100 Subject: [PATCH 6/9] allowed-uris: Match whole schemes also when scheme is not followed by slashes --- ...llowed-uris-can-now-match-whole-schemes.md | 7 ++++ src/libexpr/eval-settings.hh | 5 +++ src/libexpr/eval.cc | 17 ++++++++- tests/unit/libexpr/eval.cc | 35 +++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 doc/manual/rl-next/allowed-uris-can-now-match-whole-schemes.md diff --git a/doc/manual/rl-next/allowed-uris-can-now-match-whole-schemes.md b/doc/manual/rl-next/allowed-uris-can-now-match-whole-schemes.md new file mode 100644 index 00000000000..3cf75a61253 --- /dev/null +++ b/doc/manual/rl-next/allowed-uris-can-now-match-whole-schemes.md @@ -0,0 +1,7 @@ +--- +synopsis: Option `allowed-uris` can now match whole schemes in URIs without slashes +prs: 9547 +--- + +If a scheme, such as `github:` is specified in the `allowed-uris` option, all URIs starting with `github:` are allowed. +Previously this only worked for schemes whose URIs used the `://` syntax. diff --git a/src/libexpr/eval-settings.hh b/src/libexpr/eval-settings.hh index db2971acb77..3009a462c42 100644 --- a/src/libexpr/eval-settings.hh +++ b/src/libexpr/eval-settings.hh @@ -68,6 +68,11 @@ struct EvalSettings : Config evaluation mode. For example, when set to `https://github.com/NixOS`, builtin functions such as `fetchGit` are allowed to access `https://github.com/NixOS/patchelf.git`. + + Access is granted when + - the URI is equal to the prefix, + - or the URI is a subpath of the prefix, + - or the prefix is a URI scheme ended by a colon `:` and the URI has the same scheme. )"}; Setting traceFunctionCalls{this, false, "trace-function-calls", diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 9e541f29328..1552e3e9220 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -18,6 +18,7 @@ #include "memory-input-accessor.hh" #include "signals.hh" #include "gc-small-vector.hh" +#include "url.hh" #include #include @@ -599,6 +600,14 @@ void EvalState::allowAndSetStorePathString(const StorePath & storePath, Value & mkStorePathString(storePath, v); } +inline static bool isJustSchemePrefix(std::string_view prefix) +{ + return + !prefix.empty() + && prefix[prefix.size() - 1] == ':' + && isValidSchemeName(prefix.substr(0, prefix.size() - 1)); +} + bool isAllowedURI(std::string_view uri, const Strings & allowedUris) { /* 'uri' should be equal to a prefix, or in a subdirectory of a @@ -611,8 +620,14 @@ bool isAllowedURI(std::string_view uri, const Strings & allowedUris) && prefix.size() > 0 && hasPrefix(uri, prefix) && ( + // Allow access to subdirectories of the prefix. prefix[prefix.size() - 1] == '/' - || uri[prefix.size()] == '/'))) + || uri[prefix.size()] == '/' + + // Allow access to whole schemes + || isJustSchemePrefix(prefix) + ) + )) return true; } diff --git a/tests/unit/libexpr/eval.cc b/tests/unit/libexpr/eval.cc index cc5d6bbfa6d..93d3f658f52 100644 --- a/tests/unit/libexpr/eval.cc +++ b/tests/unit/libexpr/eval.cc @@ -103,4 +103,39 @@ TEST(nix_isAllowedURI, file_url) { ASSERT_FALSE(isAllowedURI("file://", allowed)); } +TEST(nix_isAllowedURI, github_all) { + Strings allowed; + allowed.push_back("github:"); + ASSERT_TRUE(isAllowedURI("github:", allowed)); + ASSERT_TRUE(isAllowedURI("github:foo/bar", allowed)); + ASSERT_TRUE(isAllowedURI("github:foo/bar/feat-multi-bar", allowed)); + ASSERT_TRUE(isAllowedURI("github:foo/bar?ref=refs/heads/feat-multi-bar", allowed)); + ASSERT_TRUE(isAllowedURI("github://foo/bar", allowed)); + ASSERT_FALSE(isAllowedURI("https://github:443/foo/bar/archive/master.tar.gz", allowed)); + ASSERT_FALSE(isAllowedURI("file://github:foo/bar/archive/master.tar.gz", allowed)); + ASSERT_FALSE(isAllowedURI("file:///github:foo/bar/archive/master.tar.gz", allowed)); + ASSERT_FALSE(isAllowedURI("github", allowed)); +} + +TEST(nix_isAllowedURI, github_org) { + Strings allowed; + allowed.push_back("github:foo"); + ASSERT_FALSE(isAllowedURI("github:", allowed)); + ASSERT_TRUE(isAllowedURI("github:foo/bar", allowed)); + ASSERT_TRUE(isAllowedURI("github:foo/bar/feat-multi-bar", allowed)); + ASSERT_TRUE(isAllowedURI("github:foo/bar?ref=refs/heads/feat-multi-bar", allowed)); + ASSERT_FALSE(isAllowedURI("github://foo/bar", allowed)); + ASSERT_FALSE(isAllowedURI("https://github:443/foo/bar/archive/master.tar.gz", allowed)); + ASSERT_FALSE(isAllowedURI("file://github:foo/bar/archive/master.tar.gz", allowed)); + ASSERT_FALSE(isAllowedURI("file:///github:foo/bar/archive/master.tar.gz", allowed)); +} + +TEST(nix_isAllowedURI, non_scheme_colon) { + Strings allowed; + allowed.push_back("https://foo/bar:"); + ASSERT_TRUE(isAllowedURI("https://foo/bar:", allowed)); + ASSERT_TRUE(isAllowedURI("https://foo/bar:/baz", allowed)); + ASSERT_FALSE(isAllowedURI("https://foo/bar:baz", allowed)); +} + } // namespace nix \ No newline at end of file From 2e451a663eff96b89360cfd3c0d5eaa60ca46181 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Tue, 12 Dec 2023 17:22:54 +0100 Subject: [PATCH 7/9] schemeRegex -> schemeNameRegex Scheme could be understood to include the typical `:` separator. --- src/libutil/url-parts.hh | 2 +- src/libutil/url.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libutil/url-parts.hh b/src/libutil/url-parts.hh index 5c5a30dc210..07bc8d0cde9 100644 --- a/src/libutil/url-parts.hh +++ b/src/libutil/url-parts.hh @@ -8,7 +8,7 @@ namespace nix { // URI stuff. const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])"; -const static std::string schemeRegex = "(?:[a-z][a-z0-9+.-]*)"; +const static std::string schemeNameRegex = "(?:[a-z][a-z0-9+.-]*)"; const static std::string ipv6AddressSegmentRegex = "[0-9a-fA-F:]+(?:%\\w+)?"; const static std::string ipv6AddressRegex = "(?:\\[" + ipv6AddressSegmentRegex + "\\]|" + ipv6AddressSegmentRegex + ")"; const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])"; diff --git a/src/libutil/url.cc b/src/libutil/url.cc index f2d5f17825a..e9acd67d0c9 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -14,7 +14,7 @@ std::regex revRegex(revRegexS, std::regex::ECMAScript); ParsedURL parseURL(const std::string & url) { static std::regex uriRegex( - "((" + schemeRegex + "):" + "((" + schemeNameRegex + "):" + "(?:(?://(" + authorityRegex + ")(" + absPathRegex + "))|(/?" + pathRegex + ")))" + "(?:\\?(" + queryRegex + "))?" + "(?:#(" + queryRegex + "))?", From 4eaeda6604e2f8977728f14415fe92350d047970 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Tue, 12 Dec 2023 17:43:54 +0100 Subject: [PATCH 8/9] isValidSchemeName: Use regex As requested by Eelco Dolstra. I think it used to be simpler. --- src/libutil/url.cc | 15 +++------------ tests/unit/libutil/url.cc | 5 +++++ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/libutil/url.cc b/src/libutil/url.cc index e9acd67d0c9..152c06d8e9f 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -3,7 +3,6 @@ #include "util.hh" #include "split.hh" #include "canon-path.hh" -#include "string.hh" namespace nix { @@ -187,17 +186,9 @@ std::string fixGitURL(const std::string & url) // https://www.rfc-editor.org/rfc/rfc3986#section-3.1 bool isValidSchemeName(std::string_view s) { - if (s.empty()) return false; - if (!isASCIIAlpha(s[0])) return false; - for (auto c : s.substr(1)) { - if (isASCIIAlpha(c)) continue; - if (isASCIIDigit(c)) continue; - if (c == '+') continue; - if (c == '-') continue; - if (c == '.') continue; - return false; - } - return true; + static std::regex regex(schemeNameRegex, std::regex::ECMAScript); + + return std::regex_match(s.begin(), s.end(), regex, std::regex_constants::match_default); } } diff --git a/tests/unit/libutil/url.cc b/tests/unit/libutil/url.cc index 09fa4e21879..7d08f467eac 100644 --- a/tests/unit/libutil/url.cc +++ b/tests/unit/libutil/url.cc @@ -360,6 +360,11 @@ TEST(nix, isValidSchemeName) { ASSERT_FALSE(isValidSchemeName(".file")); ASSERT_FALSE(isValidSchemeName("-file")); ASSERT_FALSE(isValidSchemeName("1file")); + // regex ok? + ASSERT_FALSE(isValidSchemeName("\nhttp")); + ASSERT_FALSE(isValidSchemeName("\nhttp\n")); + ASSERT_FALSE(isValidSchemeName("http\n")); + ASSERT_FALSE(isValidSchemeName("http ")); } } From 0b87ba50c08d83384e11a8e6db1e2f97fba4b61c Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Tue, 12 Dec 2023 17:44:53 +0100 Subject: [PATCH 9/9] Revert "Add nix::isASCII*, locale-independent" This reverts commit 79eb2920bb51c7ec9528a403986e79f04738e2be. Not used at this time. --- src/libutil/string.hh | 17 ----------- tests/unit/libutil/string.cc | 59 ------------------------------------ 2 files changed, 76 deletions(-) delete mode 100644 src/libutil/string.hh delete mode 100644 tests/unit/libutil/string.cc diff --git a/src/libutil/string.hh b/src/libutil/string.hh deleted file mode 100644 index 16ef7564321..00000000000 --- a/src/libutil/string.hh +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -namespace nix { - - /** Locale-independent version of std::islower(). */ - inline bool isASCIILower(char c) { return c >= 'a' && c <= 'z'; }; - - /** Locale-independent version of std::isupper(). */ - inline bool isASCIIUpper(char c) { return c >= 'A' && c <= 'Z'; }; - - /** Locale-independent version of std::isalpha(). */ - inline bool isASCIIAlpha(char c) { return isASCIILower(c) || isASCIIUpper(c); }; - - /** Locale-independent version of std::isdigit(). */ - inline bool isASCIIDigit(char c) { return c >= '0' && c <= '9'; }; - -} diff --git a/tests/unit/libutil/string.cc b/tests/unit/libutil/string.cc deleted file mode 100644 index 381f2cc15e8..00000000000 --- a/tests/unit/libutil/string.cc +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include "string.hh" - -namespace nix { - -TEST(string, isASCIILower) { - ASSERT_TRUE(isASCIILower('a')); - ASSERT_TRUE(isASCIILower('z')); - ASSERT_FALSE(isASCIILower('A')); - ASSERT_FALSE(isASCIILower('Z')); - ASSERT_FALSE(isASCIILower('0')); - ASSERT_FALSE(isASCIILower('9')); - ASSERT_FALSE(isASCIILower(' ')); - ASSERT_FALSE(isASCIILower('\n')); - ASSERT_FALSE(isASCIILower('\t')); - ASSERT_FALSE(isASCIILower(':')); -} - -TEST(string, isASCIIUpper) { - ASSERT_FALSE(isASCIIUpper('a')); - ASSERT_FALSE(isASCIIUpper('z')); - ASSERT_TRUE(isASCIIUpper('A')); - ASSERT_TRUE(isASCIIUpper('Z')); - ASSERT_FALSE(isASCIIUpper('0')); - ASSERT_FALSE(isASCIIUpper('9')); - ASSERT_FALSE(isASCIIUpper(' ')); - ASSERT_FALSE(isASCIIUpper('\n')); - ASSERT_FALSE(isASCIIUpper('\t')); - ASSERT_FALSE(isASCIIUpper(':')); -} - -TEST(string, isASCIIAlpha) { - ASSERT_TRUE(isASCIIAlpha('a')); - ASSERT_TRUE(isASCIIAlpha('z')); - ASSERT_TRUE(isASCIIAlpha('A')); - ASSERT_TRUE(isASCIIAlpha('Z')); - ASSERT_FALSE(isASCIIAlpha('0')); - ASSERT_FALSE(isASCIIAlpha('9')); - ASSERT_FALSE(isASCIIAlpha(' ')); - ASSERT_FALSE(isASCIIAlpha('\n')); - ASSERT_FALSE(isASCIIAlpha('\t')); - ASSERT_FALSE(isASCIIAlpha(':')); -} - -TEST(string, isASCIIDigit) { - ASSERT_FALSE(isASCIIDigit('a')); - ASSERT_FALSE(isASCIIDigit('z')); - ASSERT_FALSE(isASCIIDigit('A')); - ASSERT_FALSE(isASCIIDigit('Z')); - ASSERT_TRUE(isASCIIDigit('0')); - ASSERT_TRUE(isASCIIDigit('1')); - ASSERT_TRUE(isASCIIDigit('9')); - ASSERT_FALSE(isASCIIDigit(' ')); - ASSERT_FALSE(isASCIIDigit('\n')); - ASSERT_FALSE(isASCIIDigit('\t')); - ASSERT_FALSE(isASCIIDigit(':')); -} - -} \ No newline at end of file