Skip to content

Commit

Permalink
Revert of Reduce buggy usage of the registry controlled domain servic…
Browse files Browse the repository at this point in the history
…e. (patchset chromium#9 id:160001 of https://codereview.chromium.org/2433583002/ )

Reason for revert:
net_unittests failure on Cronet builders

Original issue's description:
> Reduce buggy usage of the registry controlled domain service.
>
> GetRegistryLength for host names canonicalizes the input for the caller, but
> then returns the length in the canonicalized input, which is not necessarily
> the same as the length in the original string. As a result, computations
> performed by the caller based on this value can be wrong (see the bug for
> more).
>
> All callers of this function were audited and changed to use on of the
> following:
>
> - Many callers don't need the offsets. A new function
>   HostHasRegistryControlledDomain is added to check for the presence of
>   a R.C.D. without the risk of returning incorrect string lengths.
>
> - Many callers already have guaranteed-canonical strings (they came out of
>   a GURL or KURL object soon before the call) These were changed to use a
>   new GetCanonicalHostRegistryLength function. A further advantage is that
>   these calls will be faster.
>
> - A new Permissive function is added that handles cases where the input
>   is necessarily non-canonical.
>
> Adds an IDN test case to the unit tests.
>
> Removes checking for IP addresses in the already-known-canonical cases.
> This requires a separate full canonicalization and IP addresses should
> never match the R.C.D. list.
>
> BUG=657199
>
> Committed: https://crrev.com/060f6a0de7706cc43f9d773ae9ce2cb36bc9964d
> Cr-Commit-Position: refs/heads/master@{#427545}

TBR=pkasting@chromium.org,asargent@chromium.org,brettw@chromium.org
# Skipping CQ checks because original CL landed less than 1 days ago.
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=657199

Review-Url: https://codereview.chromium.org/2454553002
Cr-Commit-Position: refs/heads/master@{#427561}
  • Loading branch information
wychen authored and Commit bot committed Oct 26, 2016
1 parent a64ceca commit 45f62bf
Show file tree
Hide file tree
Showing 31 changed files with 272 additions and 651 deletions.
10 changes: 5 additions & 5 deletions chrome/browser/android/history_report/delta_file_commons.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
using bookmarks::BookmarkModel;
using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES;
using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES;
using net::registry_controlled_domains::GetCanonicalHostRegistryLength;
using net::registry_controlled_domains::GetRegistryLength;

namespace {

Expand All @@ -26,11 +26,11 @@ const int kSHA256ByteSize = 32;
const size_t kUrlLengthLimit = 20 * 1024 * 1024; // 20M
const size_t kUrlLengthWidth = 8;

void StripTopLevelDomain(std::string* canonical_host) {
size_t registry_length = GetCanonicalHostRegistryLength(
*canonical_host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES);
void StripTopLevelDomain(std::string* host) {
size_t registry_length = GetRegistryLength(
*host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES);
if (registry_length != 0 && registry_length != std::string::npos)
canonical_host->erase(canonical_host->length() - (registry_length + 1));
host->erase(host->length() - (registry_length + 1));
}

void StripCommonSubDomains(std::string* host) {
Expand Down
11 changes: 5 additions & 6 deletions chrome/browser/supervised_user/supervised_user_url_filter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
using content::BrowserThread;
using net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES;
using net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES;
using net::registry_controlled_domains::GetCanonicalHostRegistryLength;
using net::registry_controlled_domains::GetRegistryLength;
using policy::URLBlacklist;
using url_matcher::URLMatcher;
using url_matcher::URLMatcherConditionSet;
Expand Down Expand Up @@ -280,13 +280,12 @@ bool SupervisedUserURLFilter::HasFilteredScheme(const GURL& url) {
}

// static
bool SupervisedUserURLFilter::HostMatchesPattern(
const std::string& canonical_host,
const std::string& pattern) {
bool SupervisedUserURLFilter::HostMatchesPattern(const std::string& host,
const std::string& pattern) {
std::string trimmed_pattern = pattern;
std::string trimmed_host = canonical_host;
std::string trimmed_host = host;
if (base::EndsWith(pattern, ".*", base::CompareCase::SENSITIVE)) {
size_t registry_length = GetCanonicalHostRegistryLength(
size_t registry_length = GetRegistryLength(
trimmed_host, EXCLUDE_UNKNOWN_REGISTRIES, EXCLUDE_PRIVATE_REGISTRIES);
// A host without a known registry part does not match.
if (registry_length == 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ class SupervisedUserURLFilter
// Asterisks in other parts of the pattern are not allowed.
// |host| and |pattern| are assumed to be normalized to lower-case.
// This method is public for testing.
static bool HostMatchesPattern(const std::string& canonical_host,
static bool HostMatchesPattern(const std::string& host,
const std::string& pattern);

// Returns the filtering behavior for a given URL, based on the default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ bool PhishingUrlFeatureExtractor::ExtractFeatures(const GURL& url,
// Disallow unknown registries so that we don't classify
// partial hostnames (e.g. "www.subdomain").
size_t registry_length =
net::registry_controlled_domains::GetCanonicalHostRegistryLength(
host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::GetRegistryLength(
host,
net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);

if (registry_length == 0 || registry_length == std::string::npos) {
Expand Down
44 changes: 20 additions & 24 deletions components/google/core/browser/google_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
#define LINKDOCTOR_SERVER_REQUEST_URL ""
#endif

namespace google_util {

// Helpers --------------------------------------------------------------------

Expand All @@ -44,16 +43,16 @@ bool IsPathHomePageBase(base::StringPiece path) {
return (path == "/") || (path == "/webhp");
}

// True if the given canonical |host| is "[www.]<domain_in_lower_case>.<TLD>"
// with a valid TLD. If |subdomain_permission| is ALLOW_SUBDOMAIN, we check
// against host "*.<domain_in_lower_case>.<TLD>" instead.
// True if |host| is "[www.]<domain_in_lower_case>.<TLD>" with a valid TLD. If
// |subdomain_permission| is ALLOW_SUBDOMAIN, we check against host
// "*.<domain_in_lower_case>.<TLD>" instead.
bool IsValidHostName(base::StringPiece host,
base::StringPiece domain_in_lower_case,
SubdomainPermission subdomain_permission) {
size_t tld_length =
net::registry_controlled_domains::GetCanonicalHostRegistryLength(
host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
google_util::SubdomainPermission subdomain_permission) {
size_t tld_length = net::registry_controlled_domains::GetRegistryLength(
host,
net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
if ((tld_length == 0) || (tld_length == std::string::npos))
return false;

Expand All @@ -63,7 +62,7 @@ bool IsValidHostName(base::StringPiece host,
if (base::LowerCaseEqualsASCII(host_minus_tld, domain_in_lower_case))
return true;

if (subdomain_permission == ALLOW_SUBDOMAIN) {
if (subdomain_permission == google_util::ALLOW_SUBDOMAIN) {
std::string dot_domain(".");
domain_in_lower_case.AppendToString(&dot_domain);
return base::EndsWith(host_minus_tld, dot_domain,
Expand All @@ -78,21 +77,16 @@ bool IsValidHostName(base::StringPiece host,
// True if |url| is a valid URL with HTTP or HTTPS scheme. If |port_permission|
// is DISALLOW_NON_STANDARD_PORTS, this also requires |url| to use the standard
// port for its scheme (80 for HTTP, 443 for HTTPS).
bool IsValidURL(const GURL& url, PortPermission port_permission) {
bool IsValidURL(const GURL& url, google_util::PortPermission port_permission) {
return url.is_valid() && url.SchemeIsHTTPOrHTTPS() &&
(url.port().empty() || (port_permission == ALLOW_NON_STANDARD_PORTS));
(url.port().empty() ||
(port_permission == google_util::ALLOW_NON_STANDARD_PORTS));
}

bool IsCanonicalHostGoogleHostname(base::StringPiece canonical_host,
SubdomainPermission subdomain_permission) {
const GURL& base_url(CommandLineGoogleBaseURL());
if (base_url.is_valid() && (canonical_host == base_url.host_piece()))
return true;
} // namespace

return IsValidHostName(canonical_host, "google", subdomain_permission);
}

} // namespace
namespace google_util {

// Global functions -----------------------------------------------------------

Expand Down Expand Up @@ -186,16 +180,18 @@ bool StartsWithCommandLineGoogleBaseURL(const GURL& url) {

bool IsGoogleHostname(base::StringPiece host,
SubdomainPermission subdomain_permission) {
url::CanonHostInfo host_info;
return IsCanonicalHostGoogleHostname(net::CanonicalizeHost(host, &host_info),
subdomain_permission);
const GURL& base_url(CommandLineGoogleBaseURL());
if (base_url.is_valid() && (host == base_url.host_piece()))
return true;

return IsValidHostName(host, "google", subdomain_permission);
}

bool IsGoogleDomainUrl(const GURL& url,
SubdomainPermission subdomain_permission,
PortPermission port_permission) {
return IsValidURL(url, port_permission) &&
IsCanonicalHostGoogleHostname(url.host_piece(), subdomain_permission);
IsGoogleHostname(url.host(), subdomain_permission);
}

bool IsGoogleHomePageUrl(const GURL& url) {
Expand Down
2 changes: 1 addition & 1 deletion components/history/core/browser/history_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ void HistoryBackend::AddPage(const HistoryAddPageArgs& request) {
origin_url.SchemeIs(url::kFtpScheme)) {
std::string host(origin_url.host());
size_t registry_length =
net::registry_controlled_domains::GetCanonicalHostRegistryLength(
net::registry_controlled_domains::GetRegistryLength(
host,
net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
Expand Down
2 changes: 1 addition & 1 deletion components/omnibox/browser/autocomplete_input.cc
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ metrics::OmniboxInputType::Type AutocompleteInput::Parse(
// Check if the canonicalized host has a known TLD, which we'll want to know
// below.
const size_t registry_length =
net::registry_controlled_domains::GetCanonicalHostRegistryLength(
net::registry_controlled_domains::GetRegistryLength(
canonicalized_url->host(),
net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
Expand Down
15 changes: 8 additions & 7 deletions components/omnibox/browser/history_quick_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,18 +141,19 @@ void HistoryQuickProvider::DoAutocomplete() {
!autocomplete_input_.parts().ref.is_nonempty()) {
// Not visited, but we've seen the host before.
will_have_url_what_you_typed_match_first = true;
if (net::registry_controlled_domains::HostHasRegistryControlledDomain(
const size_t registry_length =
net::registry_controlled_domains::GetRegistryLength(
host,
net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::
EXCLUDE_PRIVATE_REGISTRIES)) {
// Known internet host.
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
if (registry_length == 0) {
// Known intranet hosts get one score.
url_what_you_typed_match_score =
HistoryURLProvider::kScoreForWhatYouTypedResult;
HistoryURLProvider::kScoreForUnvisitedIntranetResult;
} else {
// An intranet host.
// Known internet hosts get another.
url_what_you_typed_match_score =
HistoryURLProvider::kScoreForUnvisitedIntranetResult;
HistoryURLProvider::kScoreForWhatYouTypedResult;
}
}
}
Expand Down
9 changes: 5 additions & 4 deletions components/omnibox/browser/history_url_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -982,11 +982,12 @@ bool HistoryURLProvider::CanFindIntranetURL(
return false;
const std::string host(base::UTF16ToUTF8(
input.text().substr(input.parts().host.begin, input.parts().host.len)));
const bool has_registry_domain =
net::registry_controlled_domains::HostHasRegistryControlledDomain(
host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
const size_t registry_length =
net::registry_controlled_domains::GetRegistryLength(
host,
net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
return !has_registry_domain && db->IsTypedHost(host);
return registry_length == 0 && db->IsTypedHost(host);
}

bool HistoryURLProvider::PromoteOrCreateShorterSuggestion(
Expand Down
5 changes: 3 additions & 2 deletions components/search_engines/template_url_service.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,9 @@ void LogDuplicatesHistogram(
// Returns the length of the registry portion of a hostname. For example,
// www.google.co.uk will return 5 (the length of co.uk).
size_t GetRegistryLength(const base::string16& host) {
return net::registry_controlled_domains::PermissiveGetHostRegistryLength(
host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
return net::registry_controlled_domains::GetRegistryLength(
base::UTF16ToUTF8(host),
net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
}

Expand Down
17 changes: 10 additions & 7 deletions components/ssl_errors/error_classification.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ std::vector<HostnameTokens> GetTokenizedDNSNames(
for (const auto& dns_name : dns_names) {
HostnameTokens dns_name_token_single;
if (dns_name.empty() || dns_name.find('\0') != std::string::npos ||
!(HostNameHasKnownTLD(dns_name))) {
!(IsHostNameKnownTLD(dns_name))) {
dns_name_token_single.push_back(std::string());
} else {
dns_name_token_single = Tokenize(dns_name);
Expand Down Expand Up @@ -148,7 +148,7 @@ void RecordUMAStatistics(bool overridable,
}
case ssl_errors::ErrorInfo::CERT_COMMON_NAME_INVALID: {
std::string host_name = request_url.host();
if (HostNameHasKnownTLD(host_name)) {
if (IsHostNameKnownTLD(host_name)) {
HostnameTokens host_name_tokens = Tokenize(host_name);
if (IsWWWSubDomainMatch(request_url, cert))
RecordSSLInterstitialCause(overridable, WWW_SUBDOMAIN_MATCH);
Expand Down Expand Up @@ -278,10 +278,13 @@ void SetBuildTimeForTesting(const base::Time& testing_time) {
g_testing_build_time.Get() = testing_time;
}

bool HostNameHasKnownTLD(const std::string& host_name) {
return net::registry_controlled_domains::HostHasRegistryControlledDomain(
bool IsHostNameKnownTLD(const std::string& host_name) {
size_t tld_length = net::registry_controlled_domains::GetRegistryLength(
host_name, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
if (tld_length == 0 || tld_length == std::string::npos)
return false;
return true;
}

HostnameTokens Tokenize(const std::string& name) {
Expand All @@ -294,12 +297,12 @@ bool GetWWWSubDomainMatch(const GURL& request_url,
std::string* www_match_host_name) {
const std::string& host_name = request_url.host();

if (HostNameHasKnownTLD(host_name)) {
if (IsHostNameKnownTLD(host_name)) {
// Need to account for all possible domains given in the SSL certificate.
for (const auto& dns_name : dns_names) {
if (dns_name.empty() || dns_name.find('\0') != std::string::npos ||
dns_name.length() == host_name.length() ||
!HostNameHasKnownTLD(dns_name)) {
!IsHostNameKnownTLD(dns_name)) {
continue;
} else if (dns_name.length() > host_name.length()) {
if (url_formatter::StripWWW(base::ASCIIToUTF16(dns_name)) ==
Expand Down Expand Up @@ -390,7 +393,7 @@ bool IsSubDomainOutsideWildcard(const GURL& request_url,
for (const auto& dns_name : dns_names) {
if (dns_name.length() < 2 || dns_name.length() >= host_name.length() ||
dns_name.find('\0') != std::string::npos ||
!HostNameHasKnownTLD(dns_name) || dns_name[0] != '*' ||
!IsHostNameKnownTLD(dns_name) || dns_name[0] != '*' ||
dns_name[1] != '.') {
continue;
}
Expand Down
2 changes: 1 addition & 1 deletion components/ssl_errors/error_classification.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ HostnameTokens Tokenize(const std::string& name);
void SetBuildTimeForTesting(const base::Time& testing_time);

// Returns true if the hostname has a known Top Level Domain.
bool HostNameHasKnownTLD(const std::string& host_name);
bool IsHostNameKnownTLD(const std::string& host_name);

// Returns true if any one of the following conditions hold:
// 1.|hostname| is an IP Address in an IANA-reserved range.
Expand Down
6 changes: 3 additions & 3 deletions components/ssl_errors/error_classification_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ TEST_F(SSLErrorClassificationTest, TestNameMismatch) {
}

TEST_F(SSLErrorClassificationTest, TestHostNameHasKnownTLD) {
EXPECT_TRUE(ssl_errors::HostNameHasKnownTLD("www.google.com"));
EXPECT_TRUE(ssl_errors::HostNameHasKnownTLD("b.appspot.com"));
EXPECT_FALSE(ssl_errors::HostNameHasKnownTLD("a.private"));
EXPECT_TRUE(ssl_errors::IsHostNameKnownTLD("www.google.com"));
EXPECT_TRUE(ssl_errors::IsHostNameKnownTLD("b.appspot.com"));
EXPECT_FALSE(ssl_errors::IsHostNameKnownTLD("a.private"));
}

TEST_F(SSLErrorClassificationTest, TestPrivateURL) {
Expand Down
21 changes: 13 additions & 8 deletions components/url_formatter/url_fixer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -201,15 +201,20 @@ void AddDesiredTLD(const std::string& desired_tld, std::string* domain) {
if (desired_tld.empty() || domain->empty())
return;

// Abort if we already have a known TLD. In the case of an invalid host,
// HostHasRegistryControlledDomain will return false and we will try to
// append a TLD (which may make it valid). For example, "999999999999" is
// detected as a broken IP address and marked invalid, but attaching ".com"
// makes it legal. We disallow unknown registries here so users can input
// "mail.yahoo" and hit ctrl-enter to get "www.mail.yahoo.com".
if (net::registry_controlled_domains::HostHasRegistryControlledDomain(
// Check the TLD. If the return value is positive, we already have a TLD, so
// abort. If the return value is std::string::npos, there's no valid host,
// but we can try to append a TLD anyway, since the host may become valid once
// the TLD is attached -- for example, "999999999999" is detected as a broken
// IP address and marked invalid, but attaching ".com" makes it legal. When
// the return value is 0, there's a valid host with no known TLD, so we can
// definitely append the user's TLD. We disallow unknown registries here so
// users can input "mail.yahoo" and hit ctrl-enter to get
// "www.mail.yahoo.com".
const size_t registry_length =
net::registry_controlled_domains::GetRegistryLength(
*domain, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES))
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
if ((registry_length != 0) && (registry_length != std::string::npos))
return;

// Add the suffix at the end of the domain.
Expand Down
13 changes: 6 additions & 7 deletions content/renderer/webpublicsuffixlist_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@ WebPublicSuffixListImpl::~WebPublicSuffixListImpl() {
}

size_t WebPublicSuffixListImpl::getPublicSuffixLength(
const blink::WebString& canonical_host) {
size_t result =
net::registry_controlled_domains::GetCanonicalHostRegistryLength(
canonical_host.utf8(),
net::registry_controlled_domains::INCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
return result ? result : canonical_host.length();
const blink::WebString& host) {
size_t result = net::registry_controlled_domains::GetRegistryLength(
host.utf8(),
net::registry_controlled_domains::INCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
return result ? result : host.length();
}

} // namespace content
2 changes: 1 addition & 1 deletion content/renderer/webpublicsuffixlist_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ namespace content {
class WebPublicSuffixListImpl : public blink::WebPublicSuffixList {
public:
// WebPublicSuffixList methods:
size_t getPublicSuffixLength(const blink::WebString& canonical_host) override;
size_t getPublicSuffixLength(const blink::WebString& host) override;
~WebPublicSuffixListImpl() override;
};

Expand Down
6 changes: 4 additions & 2 deletions extensions/common/csp_validator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,11 @@ bool isNonWildcardTLD(const std::string& url,
return true;

// Wildcards on subdomains of a TLD are not allowed.
return net::registry_controlled_domains::HostHasRegistryControlledDomain(
host, net::registry_controlled_domains::INCLUDE_UNKNOWN_REGISTRIES,
size_t registry_length = net::registry_controlled_domains::GetRegistryLength(
host,
net::registry_controlled_domains::INCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);
return registry_length != 0;
}

// Checks whether the source is a syntactically valid hash.
Expand Down
Loading

0 comments on commit 45f62bf

Please sign in to comment.