Skip to content

Commit

Permalink
Optimize KURL protocols
Browse files Browse the repository at this point in the history
This patch optimizes KURL::protocol and KURL::protocolIs by keeping
an AtomicString m_protocol on KURL. This reduces string allocations
throughout the code using KURL::protocol().

This also fixes an inconsistency with KURL::protocolIs that will return
true for invalid URLs.

BUG=348655

Review-Url: https://codereview.chromium.org/2463703002
Cr-Commit-Position: refs/heads/master@{#438197}
  • Loading branch information
csharrison authored and Commit bot committed Dec 13, 2016
1 parent ff9d994 commit 775abc2
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 79 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ TEST(MixedContentCheckerTest, IsMixedContent) {
{"https://example.com/foo", "blob:null/foo", false},
{"https://example.com/foo", "filesystem:https://example.com/foo", false},
{"https://example.com/foo", "filesystem:http://example.com/foo", false},
{"https://example.com/foo", "filesystem:null/foo", false},

{"https://example.com/foo", "http://example.com/foo", true},
{"https://example.com/foo", "http://google.com/foo", true},
Expand Down
102 changes: 42 additions & 60 deletions third_party/WebKit/Source/platform/weborigin/KURL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "wtf/StdLibExtras.h"
#include "wtf/text/CString.h"
#include "wtf/text/StringHash.h"
#include "wtf/text/StringStatics.h"
#include "wtf/text/StringUTF8Adaptor.h"
#include "wtf/text/TextEncoding.h"
#include <algorithm>
Expand All @@ -48,6 +49,7 @@ static const int invalidPortNumber = 0xFFFF;

static void assertProtocolIsGood(const char* protocol) {
#if ENABLE(ASSERT)
DCHECK_NE(protocol, "");
const char* p = protocol;
while (*p) {
ASSERT(*p > ' ' && *p < 0x7F && !(*p >= 'A' && *p <= 'Z'));
Expand Down Expand Up @@ -241,7 +243,7 @@ KURL::KURL(const AtomicString& canonicalString,
m_protocolIsInHTTPFamily(false),
m_parsed(parsed),
m_string(canonicalString) {
initProtocolIsInHTTPFamily();
initProtocolMetadata();
initInnerURL();
}

Expand All @@ -253,6 +255,7 @@ KURL::KURL(WTF::HashTableDeletedValueType)
KURL::KURL(const KURL& other)
: m_isValid(other.m_isValid),
m_protocolIsInHTTPFamily(other.m_protocolIsInHTTPFamily),
m_protocol(other.m_protocol),
m_parsed(other.m_parsed),
m_string(other.m_string) {
if (other.m_innerURL.get())
Expand All @@ -264,6 +267,7 @@ KURL::~KURL() {}
KURL& KURL::operator=(const KURL& other) {
m_isValid = other.m_isValid;
m_protocolIsInHTTPFamily = other.m_protocolIsInHTTPFamily;
m_protocol = other.m_protocol;
m_parsed = other.m_parsed;
m_string = other.m_string;
if (other.m_innerURL)
Expand All @@ -277,6 +281,7 @@ KURL KURL::copy() const {
KURL result;
result.m_isValid = m_isValid;
result.m_protocolIsInHTTPFamily = m_protocolIsInHTTPFamily;
result.m_protocol = m_protocol.isolatedCopy();
result.m_parsed = m_parsed;
result.m_string = m_string.isolatedCopy();
if (m_innerURL)
Expand Down Expand Up @@ -312,7 +317,7 @@ bool KURL::hasPath() const {

String KURL::lastPathComponent() const {
if (!m_isValid)
return stringForInvalidComponent();
return stringViewForInvalidComponent().toString();
ASSERT(!m_string.isNull());

// When the output ends in a slash, WebCore has different expectations than
Expand All @@ -336,7 +341,8 @@ String KURL::lastPathComponent() const {
}

String KURL::protocol() const {
return componentString(m_parsed.scheme);
DCHECK_EQ(componentString(m_parsed.scheme), m_protocol);
return m_protocol;
}

String KURL::host() const {
Expand Down Expand Up @@ -365,6 +371,9 @@ unsigned short KURL::port() const {
return static_cast<unsigned short>(port);
}

// TODO(csharrison): Migrate pass() and user() to return a StringView. Most
// consumers just need to know if the string is empty.

String KURL::pass() const {
// Bug: https://bugs.webkit.org/show_bug.cgi?id=21015 this function returns
// a null string when the password is empty, which we duplicate here.
Expand Down Expand Up @@ -767,9 +776,8 @@ void KURL::init(const KURL& base,
m_string = AtomicString::fromUTF8(output.data(), output.length());
}

initProtocolIsInHTTPFamily();
initProtocolMetadata();
initInnerURL();
DCHECK_EQ(protocol(), protocol().lower());
}

void KURL::initInnerURL() {
Expand All @@ -786,50 +794,26 @@ void KURL::initInnerURL() {
m_innerURL.reset();
}

template <typename CHAR>
bool internalProtocolIs(const url::Component& scheme,
const CHAR* spec,
const char* protocol) {
const CHAR* begin = spec + scheme.begin;
const CHAR* end = begin + scheme.len;

while (begin != end && *protocol) {
ASSERT(toASCIILower(*protocol) == *protocol);
if (toASCIILower(*begin++) != *protocol++)
return false;
}

// Both strings are equal (ignoring case) if and only if all of the characters
// were equal, and the end of both has been reached.
return begin == end && !*protocol;
}

template <typename CHAR>
bool checkIfProtocolIsInHTTPFamily(const url::Component& scheme,
const CHAR* spec) {
if (scheme.len == 4)
return internalProtocolIs(scheme, spec, "http");
if (scheme.len == 5)
return internalProtocolIs(scheme, spec, "https");
if (scheme.len == 7)
return internalProtocolIs(scheme, spec, "http-so");
if (scheme.len == 8)
return internalProtocolIs(scheme, spec, "https-so");
return false;
}

void KURL::initProtocolIsInHTTPFamily() {
void KURL::initProtocolMetadata() {
if (!m_isValid) {
m_protocolIsInHTTPFamily = false;
m_protocol = componentString(m_parsed.scheme);
return;
}

ASSERT(!m_string.isNull());
m_protocolIsInHTTPFamily =
m_string.is8Bit() ? checkIfProtocolIsInHTTPFamily(m_parsed.scheme,
m_string.characters8())
: checkIfProtocolIsInHTTPFamily(
m_parsed.scheme, m_string.characters16());
DCHECK(!m_string.isNull());
StringView protocol = componentStringView(m_parsed.scheme);
m_protocolIsInHTTPFamily = true;
if (protocol == WTF::httpsAtom) {
m_protocol = WTF::httpsAtom;
} else if (protocol == WTF::httpAtom) {
m_protocol = WTF::httpAtom;
} else {
m_protocol = AtomicString(protocol.toString());
m_protocolIsInHTTPFamily =
m_protocol == "http-so" || m_protocol == "https-so";
}
DCHECK_EQ(m_protocol, m_protocol.lower());
}

bool KURL::protocolIs(const char* protocol) const {
Expand All @@ -840,26 +824,16 @@ bool KURL::protocolIs(const char* protocol) const {
// instead.
// FIXME: Chromium code needs to be fixed for this assert to be enabled.
// ASSERT(strcmp(protocol, "javascript"));

if (m_string.isNull() || m_parsed.scheme.len <= 0)
return *protocol == '\0';

return m_string.is8Bit()
? internalProtocolIs(m_parsed.scheme, m_string.characters8(),
protocol)
: internalProtocolIs(m_parsed.scheme, m_string.characters16(),
protocol);
return m_protocol == protocol;
}

String KURL::stringForInvalidComponent() const {
if (m_string.isNull())
return String();
return emptyString();
StringView KURL::stringViewForInvalidComponent() const {
return m_string.isNull() ? StringView() : StringView("", 0);
}

String KURL::componentString(const url::Component& component) const {
StringView KURL::componentStringView(const url::Component& component) const {
if (!m_isValid || component.len <= 0)
return stringForInvalidComponent();
return stringViewForInvalidComponent();
// begin and len are in terms of bytes which do not match
// if string() is UTF-16 and input contains non-ASCII characters.
// However, the only part in urlString that can contain non-ASCII
Expand All @@ -868,7 +842,14 @@ String KURL::componentString(const url::Component& component) const {
// byte) will be longer than what's needed by 'mid'. However, mid
// truncates len to avoid go past the end of a string so that we can
// get away without doing anything here.
return getString().substring(component.begin, component.len);

int maxLength = getString().length() - component.begin;
return StringView(getString(), component.begin,
component.len > maxLength ? maxLength : component.len);
}

String KURL::componentString(const url::Component& component) const {
return componentStringView(component).toString();
}

template <typename CHAR>
Expand All @@ -882,6 +863,7 @@ void KURL::replaceComponents(const url::Replacements<CHAR>& replacements) {

m_parsed = newParsed;
m_string = AtomicString::fromUTF8(output.data(), output.length());
initProtocolMetadata();
}

bool KURL::isSafeToSendToAnotherThread() const {
Expand Down
11 changes: 9 additions & 2 deletions third_party/WebKit/Source/platform/weborigin/KURL.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,17 +196,24 @@ class PLATFORM_EXPORT KURL {
const String& relative,
const WTF::TextEncoding* queryEncoding);

StringView componentStringView(const url::Component&) const;
String componentString(const url::Component&) const;
String stringForInvalidComponent() const;
StringView stringViewForInvalidComponent() const;

template <typename CHAR>
void replaceComponents(const url::Replacements<CHAR>&);

void initInnerURL();
void initProtocolIsInHTTPFamily();
void initProtocolMetadata();

bool m_isValid;
bool m_protocolIsInHTTPFamily;

// Keep a separate string for the protocol to avoid copious copies for
// protocol(). Normally this will be Atomic, except when constructed via
// KURL::copy(), which is deep.
String m_protocol;

url::Parsed m_parsed;
String m_string;
std::unique_ptr<KURL> m_innerURL;
Expand Down
31 changes: 15 additions & 16 deletions third_party/WebKit/Source/platform/weborigin/KURLTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,46 +367,46 @@ TEST(KURLTest, Valid_HTTP_FTP_URLsHaveHosts) {
url::AddStandardScheme("http-so", url::SCHEME_WITH_PORT);
url::AddStandardScheme("https-so", url::SCHEME_WITH_PORT);

KURL kurl;
KURL kurl(ParsedURLString, "foo://www.google.com/");
EXPECT_TRUE(kurl.setProtocol("http"));
EXPECT_TRUE(kurl.protocolIs("http"));
EXPECT_FALSE(kurl.isValid());
EXPECT_TRUE(kurl.protocolIsInHTTPFamily());
EXPECT_TRUE(kurl.isValid());

EXPECT_TRUE(kurl.setProtocol("http-so"));
EXPECT_TRUE(kurl.protocolIs("http-so"));
EXPECT_FALSE(kurl.isValid());
EXPECT_TRUE(kurl.isValid());

EXPECT_TRUE(kurl.setProtocol("https"));
EXPECT_TRUE(kurl.protocolIs("https"));
EXPECT_FALSE(kurl.isValid());
EXPECT_TRUE(kurl.isValid());

EXPECT_TRUE(kurl.setProtocol("https-so"));
EXPECT_TRUE(kurl.protocolIs("https-so"));
EXPECT_FALSE(kurl.isValid());
EXPECT_TRUE(kurl.isValid());

EXPECT_TRUE(kurl.setProtocol("ftp"));
EXPECT_TRUE(kurl.protocolIs("ftp"));
EXPECT_FALSE(kurl.isValid());
EXPECT_TRUE(kurl.isValid());

kurl = KURL(KURL(), "http://");
EXPECT_FALSE(kurl.protocolIs("http"));

kurl = KURL(KURL(), "http://wide#鸡");
EXPECT_TRUE(kurl.protocolIs("http"));
EXPECT_FALSE(kurl.isValid());
EXPECT_EQ(kurl.protocol(), "http");

kurl = KURL(KURL(), "http-so://");
kurl = KURL(KURL(), "http-so://foo");
EXPECT_TRUE(kurl.protocolIs("http-so"));
EXPECT_FALSE(kurl.isValid());

kurl = KURL(KURL(), "https://");
kurl = KURL(KURL(), "https://foo");
EXPECT_TRUE(kurl.protocolIs("https"));
EXPECT_FALSE(kurl.isValid());

kurl = KURL(KURL(), "https-so://");
kurl = KURL(KURL(), "https-so://foo");
EXPECT_TRUE(kurl.protocolIs("https-so"));
EXPECT_FALSE(kurl.isValid());

kurl = KURL(KURL(), "ftp://");
kurl = KURL(KURL(), "ftp://foo");
EXPECT_TRUE(kurl.protocolIs("ftp"));
EXPECT_FALSE(kurl.isValid());

kurl = KURL(KURL(), "http://host/");
EXPECT_TRUE(kurl.isValid());
Expand Down Expand Up @@ -699,7 +699,6 @@ TEST(KURLTest, ProtocolIs) {

KURL invalidUTF8(ParsedURLString, "http://a@9%aa%:");
EXPECT_FALSE(invalidUTF8.protocolIs("http"));
EXPECT_TRUE(invalidUTF8.protocolIs(""));

KURL capital(KURL(), "HTTP://www.example.text");
EXPECT_TRUE(capital.protocolIs("http"));
Expand Down
2 changes: 2 additions & 0 deletions third_party/WebKit/Source/wtf/text/AtomicString.h
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@ WTF_EXPORT extern const AtomicString& starAtom;
WTF_EXPORT extern const AtomicString& xmlAtom;
WTF_EXPORT extern const AtomicString& xmlnsAtom;
WTF_EXPORT extern const AtomicString& xlinkAtom;
WTF_EXPORT extern const AtomicString& httpAtom;
WTF_EXPORT extern const AtomicString& httpsAtom;

// AtomicStringHash is the default hash for AtomicString
template <typename T>
Expand Down
4 changes: 4 additions & 0 deletions third_party/WebKit/Source/wtf/text/StringStatics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ WTF_EXPORT DEFINE_GLOBAL(AtomicString, starAtom);
WTF_EXPORT DEFINE_GLOBAL(AtomicString, xmlAtom);
WTF_EXPORT DEFINE_GLOBAL(AtomicString, xmlnsAtom);
WTF_EXPORT DEFINE_GLOBAL(AtomicString, xlinkAtom);
WTF_EXPORT DEFINE_GLOBAL(AtomicString, httpAtom);
WTF_EXPORT DEFINE_GLOBAL(AtomicString, httpsAtom);

// This is not an AtomicString because it is unlikely to be used as an
// event/element/attribute name, so it shouldn't pollute the AtomicString hash
Expand Down Expand Up @@ -93,6 +95,8 @@ void StringStatics::init() {
new (NotNull, (void*)&xmlnsAtom) AtomicString(addStaticASCIILiteral("xmlns"));
new (NotNull, (void*)&xlinkAtom) AtomicString(addStaticASCIILiteral("xlink"));
new (NotNull, (void*)&xmlnsWithColon) String("xmlns:");
new (NotNull, (void*)&httpAtom) AtomicString(addStaticASCIILiteral("http"));
new (NotNull, (void*)&httpsAtom) AtomicString(addStaticASCIILiteral("https"));
}

} // namespace WTF

0 comments on commit 775abc2

Please sign in to comment.