Skip to content

Commit

Permalink
UrlPatternIndex: Fix edge case with patterns starting/ending with wil…
Browse files Browse the repository at this point in the history
…dcards.

We don't correctly handle patterns beginning/ending with wildcards and an
anchor. Fix this.

Also disallow patterns starting with a wildcard and having a subdomain anchor,
since it's not clear how these should be matched. This also necessitates a
scheme update.

BUG=932958

Change-Id: I2cf249b7de1ec238044c293f5dcc1168bb3e1d8e
Reviewed-on: https://chromium-review.googlesource.com/c/1484828
Commit-Queue: Karan Bhatia <karandeepb@chromium.org>
Reviewed-by: Istiaque Ahmed <lazyboy@chromium.org>
Reviewed-by: Charlie Harrison <csharrison@chromium.org>
Cr-Commit-Position: refs/heads/master@{#635301}
  • Loading branch information
Karandeep Bhatia authored and Commit Bot committed Feb 25, 2019
1 parent fe3ec22 commit cf2b1a0
Show file tree
Hide file tree
Showing 12 changed files with 62 additions and 14 deletions.
4 changes: 2 additions & 2 deletions components/subresource_filter/core/common/indexed_ruleset.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ VerifyStatus GetVerifyStatus(const uint8_t* buffer,

// RulesetIndexer --------------------------------------------------------------

const int RulesetIndexer::kIndexedFormatVersion = 25;
const int RulesetIndexer::kIndexedFormatVersion = 26;

// This static assert is meant to catch cases where
// url_pattern_index::kUrlPatternIndexFormatVersion is incremented without
// updating RulesetIndexer::kIndexedFormatVersion.
static_assert(url_pattern_index::kUrlPatternIndexFormatVersion == 4,
static_assert(url_pattern_index::kUrlPatternIndexFormatVersion == 5,
"kUrlPatternIndexFormatVersion has changed, make sure you've "
"also updated RulesetIndexer::kIndexedFormatVersion above.");

Expand Down
23 changes: 20 additions & 3 deletions components/url_pattern_index/url_pattern.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,11 @@ namespace url_pattern_index {

namespace {

constexpr char kWildcard = '*';

class IsWildcard {
public:
bool operator()(char c) const { return c == '*'; }
bool operator()(char c) const { return c == kWildcard; }
};

proto::UrlPatternType ConvertUrlPatternType(flat::UrlPatternType type) {
Expand Down Expand Up @@ -343,15 +345,30 @@ bool UrlPattern::MatchesUrl(const UrlInfo& url) const {
DCHECK(base::IsStringASCII(url.spec()));
DCHECK(base::IsStringASCII(url.GetLowerCaseSpec()));

// Pre-process patterns to ensure left anchored and right anchored patterns
// don't begin and end with a wildcard respectively i.e. change "|*xyz" to
// "*xyz" and "xyz*|" to "xyz*".
proto::AnchorType anchor_left = anchor_left_;
proto::AnchorType anchor_right = anchor_right_;
if (!url_pattern_.empty()) {
if (url_pattern_.front() == kWildcard) {
// Note: We don't handle "||*" and expect clients to disallow it.
DCHECK_NE(proto::ANCHOR_TYPE_SUBDOMAIN, anchor_left_);
anchor_left = proto::ANCHOR_TYPE_NONE;
}
if (url_pattern_.back() == kWildcard)
anchor_right = proto::ANCHOR_TYPE_NONE;
}

if (match_case()) {
return IsCaseSensitiveMatch(url_pattern_, anchor_left_, anchor_right_,
return IsCaseSensitiveMatch(url_pattern_, anchor_left, anchor_right,
url.spec(), url.host());
}

// Use the lower-cased url for case-insensitive comparison. Case-insensitive
// patterns should already be lower-cased.
DCHECK(!HasAnyUpperAscii(url_pattern_));
return IsCaseSensitiveMatch(url_pattern_, anchor_left_, anchor_right_,
return IsCaseSensitiveMatch(url_pattern_, anchor_left, anchor_right,
url.GetLowerCaseSpec(), url.host());
}

Expand Down
7 changes: 7 additions & 0 deletions components/url_pattern_index/url_pattern_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,13 @@ class UrlRuleFlatBufferConverter {
if (anchor_right_ == flat::AnchorType_SUBDOMAIN)
return false; // Unsupported right anchor.

// We disallow patterns like "||*xyz" because it isn't clear how to match
// them.
if (anchor_left_ == flat::AnchorType_SUBDOMAIN &&
(!rule_.url_pattern().empty() && rule_.url_pattern().front() == '*')) {
return false;
}

return true;
}

Expand Down
2 changes: 1 addition & 1 deletion components/url_pattern_index/url_pattern_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ int CompareDomains(base::StringPiece lhs_domain, base::StringPiece rhs_domain);
// Increase this value when introducing an incompatible change to the
// UrlPatternIndex schema (flat/url_pattern_index.fbs). url_pattern_index
// clients can use this as a signal to rebuild rulesets.
constexpr int kUrlPatternIndexFormatVersion = 4;
constexpr int kUrlPatternIndexFormatVersion = 5;

// The class used to construct an index over the URL patterns of a set of URL
// rules. The rules themselves need to be converted to FlatBuffers format by the
Expand Down
4 changes: 4 additions & 0 deletions components/url_pattern_index/url_pattern_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,10 @@ TEST(UrlPatternTest, MatchesUrl) {
{{"abc*^", kAnchorNone, kAnchorNone}, "https://abc.com?q=123", true},
{{"abc*^", kAnchorNone, kBoundary}, "https://abc.com", true},
{{"abc*^", kAnchorNone, kBoundary}, "https://abc.com?q=123", true},
{{"abc*", kAnchorNone, kBoundary}, "https://a.com/abcxyz", true},
{{"*google.com", kBoundary, kAnchorNone}, "https://www.google.com", true},
{{"*", kBoundary, kBoundary}, "https://example.com", true},
{{"", kBoundary, kBoundary}, "https://example.com", false},
};

for (const auto& test_case : kTestCases) {
Expand Down
2 changes: 2 additions & 0 deletions extensions/browser/api/declarative_net_request/constants.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ const char kErrorPersisting[] = "*: Rules file could not be parsed.";
const char kErrorListNotPassed[] = "*: Rules file must contain a list.";
const char kErrorNonAscii[] =
"*: Rule with id * cannot have non-ascii characters as part of \"*\" key.";
const char kErrorInvalidUrlFilter[] =
"*: Rule with id * has an invalid value for \"*\" key.";

const char kRuleCountExceeded[] =
"Declarative Net Request: Rule count exceeded. Some rules were ignored.";
Expand Down
2 changes: 2 additions & 0 deletions extensions/browser/api/declarative_net_request/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ enum class ParseResult {
ERROR_NON_ASCII_URL_FILTER,
ERROR_NON_ASCII_DOMAIN,
ERROR_NON_ASCII_EXCLUDED_DOMAIN,
ERROR_INVALID_URL_FILTER,
};

// Rule parsing errors.
Expand All @@ -46,6 +47,7 @@ extern const char kErrorDuplicateIDs[];
extern const char kErrorPersisting[];
extern const char kErrorListNotPassed[];
extern const char kErrorNonAscii[];
extern const char kErrorInvalidUrlFilter[];

// Rule indexing install warnings.
extern const char kRuleCountExceeded[];
Expand Down
16 changes: 12 additions & 4 deletions extensions/browser/api/declarative_net_request/indexed_rule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ namespace {
namespace flat_rule = url_pattern_index::flat;
namespace dnr_api = extensions::api::declarative_net_request;

constexpr char kAnchorCharacter = '|';
constexpr char kSeparatorCharacter = '^';
constexpr char kWildcardCharacter = '*';

// Returns true if bitmask |sub| is a subset of |super|.
constexpr bool IsSubset(unsigned sub, unsigned super) {
return (super | sub) == super;
Expand Down Expand Up @@ -108,10 +112,6 @@ class UrlFilterParser {
return IsAtValidIndex() && url_filter_[index_] == kAnchorCharacter;
}

static constexpr char kAnchorCharacter = '|';
static constexpr char kSeparatorCharacter = '^';
static constexpr char kWildcardCharacter = '*';

const std::string url_filter_;
const size_t url_filter_len_;
size_t index_;
Expand Down Expand Up @@ -332,6 +332,14 @@ ParseResult IndexedRule::CreateIndexedRule(dnr_api::Rule parsed_rule,
UrlFilterParser::Parse(std::move(parsed_rule.condition.url_filter),
indexed_rule);

// url_pattern_index doesn't support patterns starting with a domain anchor
// followed by a wildcard, e.g. ||*xyz.
if (indexed_rule->anchor_left == flat_rule::AnchorType_SUBDOMAIN &&
!indexed_rule->url_pattern.empty() &&
indexed_rule->url_pattern.front() == kWildcardCharacter) {
return ParseResult::ERROR_INVALID_URL_FILTER;
}

// Lower-case case-insensitive patterns as required by url pattern index.
if (indexed_rule->options & flat_rule::OptionFlag_IS_CASE_INSENSITIVE)
indexed_rule->url_pattern = base::ToLowerASCII(indexed_rule->url_pattern);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,10 @@ TEST_F(IndexedRuleTest, UrlFilterParsing) {
L"ase.com")),
flat_rule::UrlPatternType_SUBSTRING, flat_rule::AnchorType_NONE,
flat_rule::AnchorType_NONE, "", ParseResult::ERROR_NON_ASCII_URL_FILTER},
};
// Url pattern starting with the domain anchor followed by a wildcard.
{std::make_unique<std::string>("||*xyz"),
flat_rule::UrlPatternType_WILDCARDED, flat_rule::AnchorType_SUBDOMAIN,
flat_rule::AnchorType_NONE, "", ParseResult::ERROR_INVALID_URL_FILTER}};

for (size_t i = 0; i < base::size(cases); ++i) {
SCOPED_TRACE(base::StringPrintf("Testing case[%" PRIuS "]", i));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ TEST_F(IndexedRulesetFormatVersionTest, CheckVersionUpdated) {
EXPECT_EQ(StripCommentsAndWhitespace(kFlatbufferSchemaExpected),
StripCommentsAndWhitespace(flatbuffer_schema))
<< "Schema change detected; update this test and the schema version.";
EXPECT_EQ(4, GetIndexedRulesetFormatVersionForTesting())
EXPECT_EQ(5, GetIndexedRulesetFormatVersionForTesting())
<< "Update this test if you update the schema version.";
}

Expand Down
5 changes: 5 additions & 0 deletions extensions/browser/api/declarative_net_request/parse_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@ std::string ParseInfo::GetErrorDescription(
kErrorNonAscii, json_rules_filename, std::to_string(*rule_id_),
kExcludedDomainsKey);
break;
case ParseResult::ERROR_INVALID_URL_FILTER:
error = ErrorUtils::FormatErrorMessage(
kErrorInvalidUrlFilter, json_rules_filename,
std::to_string(*rule_id_), kUrlFilterKey);
break;
}
return error;
}
Expand Down
4 changes: 2 additions & 2 deletions extensions/browser/api/declarative_net_request/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ namespace dnr_api = extensions::api::declarative_net_request;
// url_pattern_index.fbs. Whenever an extension with an indexed ruleset format
// version different from the one currently used by Chrome is loaded, the
// extension ruleset will be reindexed.
constexpr int kIndexedRulesetFormatVersion = 4;
constexpr int kIndexedRulesetFormatVersion = 5;

// This static assert is meant to catch cases where
// url_pattern_index::kUrlPatternIndexFormatVersion is incremented without
// updating kIndexedRulesetFormatVersion.
static_assert(url_pattern_index::kUrlPatternIndexFormatVersion == 4,
static_assert(url_pattern_index::kUrlPatternIndexFormatVersion == 5,
"kUrlPatternIndexFormatVersion has changed, make sure you've "
"also updated kIndexedRulesetFormatVersion above.");

Expand Down

0 comments on commit cf2b1a0

Please sign in to comment.