Skip to content

Commit

Permalink
Adds a --deterministic-fetch flag to headless_shell
Browse files Browse the repository at this point in the history
Adds a -deterministic-fetch flag to headless_shell which causes network
reuests to complete in the same order they're created in.  This removes
a significant source of network related non-determinism at the cost of
slower page loads.

BUG=546953

Review-Url: https://codereview.chromium.org/2352663003
Cr-Commit-Position: refs/heads/master@{#420657}
  • Loading branch information
alexclarke authored and Commit bot committed Sep 23, 2016
1 parent 330ba42 commit ad54054
Show file tree
Hide file tree
Showing 14 changed files with 429 additions and 15 deletions.
4 changes: 4 additions & 0 deletions headless/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,16 @@ static_library("headless_lib") {
"public/util/black_hole_protocol_handler.h",
"public/util/deterministic_dispatcher.cc",
"public/util/deterministic_dispatcher.h",
"public/util/deterministic_http_protocol_handler.cc",
"public/util/deterministic_http_protocol_handler.h",
"public/util/error_reporter.cc",
"public/util/error_reporter.h",
"public/util/expedited_dispatcher.cc",
"public/util/expedited_dispatcher.h",
"public/util/generic_url_request_job.cc",
"public/util/generic_url_request_job.h",
"public/util/http_url_fetcher.cc",
"public/util/http_url_fetcher.h",
"public/util/in_memory_protocol_handler.cc",
"public/util/in_memory_protocol_handler.h",
"public/util/in_memory_request_job.cc",
Expand Down
23 changes: 22 additions & 1 deletion headless/app/headless_shell.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include "headless/public/headless_devtools_client.h"
#include "headless/public/headless_devtools_target.h"
#include "headless/public/headless_web_contents.h"
#include "headless/public/util/deterministic_dispatcher.h"
#include "headless/public/util/deterministic_http_protocol_handler.h"
#include "net/base/file_stream.h"
#include "net/base/io_buffer.h"
#include "net/base/ip_address.h"
Expand Down Expand Up @@ -73,7 +75,25 @@ class HeadlessShell : public HeadlessWebContents::Observer,
void OnStart(HeadlessBrowser* browser) {
browser_ = browser;

browser_context_ = browser_->CreateBrowserContextBuilder().Build();
if (base::CommandLine::ForCurrentProcess()->HasSwitch(
headless::switches::kDeterministicFetch)) {
deterministic_dispatcher_.reset(
new headless::DeterministicDispatcher(browser_->BrowserIOThread()));

headless::ProtocolHandlerMap protocol_handlers;
protocol_handlers[url::kHttpScheme] =
base::MakeUnique<headless::DeterministicHttpProtocolHandler>(
deterministic_dispatcher_.get(), browser->BrowserIOThread());
protocol_handlers[url::kHttpsScheme] =
base::MakeUnique<headless::DeterministicHttpProtocolHandler>(
deterministic_dispatcher_.get(), browser->BrowserIOThread());

browser_context_ = browser_->CreateBrowserContextBuilder()
.SetProtocolHandlers(std::move(protocol_handlers))
.Build();
} else {
browser_context_ = browser_->CreateBrowserContextBuilder().Build();
}

HeadlessWebContents::Builder builder(
browser_context_->CreateWebContentsBuilder());
Expand Down Expand Up @@ -335,6 +355,7 @@ class HeadlessShell : public HeadlessWebContents::Observer,
bool processed_page_ready_;
std::unique_ptr<net::FileStream> screenshot_file_stream_;
HeadlessBrowserContext* browser_context_;
std::unique_ptr<headless::DeterministicDispatcher> deterministic_dispatcher_;

DISALLOW_COPY_AND_ASSIGN(HeadlessShell);
};
Expand Down
5 changes: 5 additions & 0 deletions headless/app/headless_shell_switches.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
namespace headless {
namespace switches {

// Instructs headless_shell to cause network fetches to complete in order of
// creation. This removes a significant source of network related
// non-determinism at the cost of slower page loads.
const char kDeterministicFetch[] = "deterministic-fetch";

// Instructs headless_shell to print document.body.innerHTML to stdout.
const char kDumpDom[] = "dump-dom";

Expand Down
1 change: 1 addition & 0 deletions headless/app/headless_shell_switches.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

namespace headless {
namespace switches {
extern const char kDeterministicFetch[];
extern const char kDumpDom[];
extern const char kProxyServer[];
extern const char kRemoteDebuggingAddress[];
Expand Down
80 changes: 80 additions & 0 deletions headless/public/util/deterministic_http_protocol_handler.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "headless/public/util/deterministic_http_protocol_handler.h"

#include "base/macros.h"
#include "base/memory/ptr_util.h"
#include "headless/public/headless_browser_context.h"
#include "headless/public/util/deterministic_dispatcher.h"
#include "headless/public/util/generic_url_request_job.h"
#include "headless/public/util/http_url_fetcher.h"
#include "net/url_request/url_request_context.h"
#include "net/url_request/url_request_job_factory_impl.h"

namespace headless {

class DeterministicHttpProtocolHandler::NopGenericURLRequestJobDelegate
: public GenericURLRequestJob::Delegate {
public:
NopGenericURLRequestJobDelegate() {}
~NopGenericURLRequestJobDelegate() override {}

// GenericURLRequestJob::Delegate methods:
bool BlockOrRewriteRequest(
const GURL& url,
const std::string& method,
const std::string& referrer,
GenericURLRequestJob::RewriteCallback callback) override {
return false;
}

const GenericURLRequestJob::HttpResponse* MaybeMatchResource(
const GURL& url,
const std::string& method,
const net::HttpRequestHeaders& request_headers) override {
return nullptr;
}

void OnResourceLoadComplete(const GURL& final_url,
const std::string& mime_type,
int http_response_code) override {}

private:
DISALLOW_COPY_AND_ASSIGN(NopGenericURLRequestJobDelegate);
};

DeterministicHttpProtocolHandler::DeterministicHttpProtocolHandler(
DeterministicDispatcher* deterministic_dispatcher,
scoped_refptr<base::SingleThreadTaskRunner> io_task_runner)
: deterministic_dispatcher_(deterministic_dispatcher),
io_task_runner_(io_task_runner),
nop_delegate_(new NopGenericURLRequestJobDelegate()) {}

DeterministicHttpProtocolHandler::~DeterministicHttpProtocolHandler() {
if (url_request_context_)
io_task_runner_->DeleteSoon(FROM_HERE, url_request_context_.release());
if (url_request_job_factory_)
io_task_runner_->DeleteSoon(FROM_HERE, url_request_job_factory_.release());
}

net::URLRequestJob* DeterministicHttpProtocolHandler::MaybeCreateJob(
net::URLRequest* request,
net::NetworkDelegate* network_delegate) const {
if (!url_request_context_) {
DCHECK(io_task_runner_->BelongsToCurrentThread());
// Create our own URLRequestContext with an empty URLRequestJobFactoryImpl
// which lets us use the default http(s) RequestJobs.
url_request_context_.reset(new net::URLRequestContext());
url_request_context_->CopyFrom(request->context());
url_request_job_factory_.reset(new net::URLRequestJobFactoryImpl());
url_request_context_->set_job_factory(url_request_job_factory_.get());
}
return new GenericURLRequestJob(
request, network_delegate, deterministic_dispatcher_,
base::MakeUnique<HttpURLFetcher>(url_request_context_.get()),
nop_delegate_.get());
}

} // namespace headless
59 changes: 59 additions & 0 deletions headless/public/util/deterministic_http_protocol_handler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef HEADLESS_PUBLIC_UTIL_DETERMINISTIC_HTTP_PROTOCOL_HANDLER_H_
#define HEADLESS_PUBLIC_UTIL_DETERMINISTIC_HTTP_PROTOCOL_HANDLER_H_

#include <memory>

#include "base/single_thread_task_runner.h"
#include "net/url_request/url_request_job_factory.h"

namespace net {
class URLRequestContext;
class URLRequestJobFactory;
} // namespace

namespace headless {
class DeterministicDispatcher;
class HeadlessBrowserContext;

// A deterministic protocol handler. Requests made to this protocol handler
// will return in order of creation, regardless of what order the network
// returns them in. This helps remove one large source of network related
// non determinism at the cost of slower page loads.
class DeterministicHttpProtocolHandler
: public net::URLRequestJobFactory::ProtocolHandler {
public:
// Note |deterministic_dispatcher| is expected to be shared across a number of
// protocol handlers, e.g. for http & https protocols.
DeterministicHttpProtocolHandler(
DeterministicDispatcher* deterministic_dispatcher,
scoped_refptr<base::SingleThreadTaskRunner> io_task_runner);
~DeterministicHttpProtocolHandler() override;

net::URLRequestJob* MaybeCreateJob(
net::URLRequest* request,
net::NetworkDelegate* network_delegate) const override;

private:
class NopGenericURLRequestJobDelegate;

DeterministicDispatcher* deterministic_dispatcher_; // NOT OWNED.
scoped_refptr<base::SingleThreadTaskRunner> io_task_runner_;
std::unique_ptr<NopGenericURLRequestJobDelegate> nop_delegate_;

// |url_request_context_| and |url_request_job_factory_| are lazily created on
// the IO thread. The URLRequestContext is setup to bypass any user-specified
// protocol handlers including this one. This is necessary to actually fetch
// http resources.
mutable std::unique_ptr<net::URLRequestContext> url_request_context_;
mutable std::unique_ptr<net::URLRequestJobFactory> url_request_job_factory_;

DISALLOW_COPY_AND_ASSIGN(DeterministicHttpProtocolHandler);
};

} // namespace headless

#endif // HEADLESS_PUBLIC_UTIL_DETERMINISTIC_HTTP_PROTOCOL_HANDLER_H_
22 changes: 13 additions & 9 deletions headless/public/util/generic_url_request_job.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,13 @@ void GenericURLRequestJob::SetExtraRequestHeaders(
}

void GenericURLRequestJob::Start() {
auto callback = [this](RewriteResult result, const GURL& url) {
auto callback = [this](RewriteResult result, const GURL& url,
const std::string& method) {
switch (result) {
case RewriteResult::kAllow:
// Note that we use the rewritten url for selecting cookies.
// Also, rewriting does not affect the request initiator.
PrepareCookies(url, url::Origin(url));
PrepareCookies(url, method, url::Origin(url));
break;
case RewriteResult::kDeny:
DispatchStartError(net::ERR_FILE_NOT_FOUND);
Expand All @@ -66,14 +67,15 @@ void GenericURLRequestJob::Start() {
}
};

if (!delegate_->BlockOrRewriteRequest(request_->url(), request_->referrer(),
callback)) {
PrepareCookies(request()->url(),
if (!delegate_->BlockOrRewriteRequest(request_->url(), request_->method(),
request_->referrer(), callback)) {
PrepareCookies(request_->url(), request_->method(),
url::Origin(request_->first_party_for_cookies()));
}
}

void GenericURLRequestJob::PrepareCookies(const GURL& rewritten_url,
const std::string& method,
const url::Origin& site_for_cookies) {
net::CookieStore* cookie_store = request_->context()->cookie_store();
net::CookieOptions options;
Expand All @@ -98,11 +100,12 @@ void GenericURLRequestJob::PrepareCookies(const GURL& rewritten_url,
cookie_store->GetCookieListWithOptionsAsync(
rewritten_url, options,
base::Bind(&GenericURLRequestJob::OnCookiesAvailable,
weak_factory_.GetWeakPtr(), rewritten_url));
weak_factory_.GetWeakPtr(), rewritten_url, method));
}

void GenericURLRequestJob::OnCookiesAvailable(
const GURL& rewritten_url,
const std::string& method,
const net::CookieList& cookie_list) {
// TODO(alexclarke): Set user agent.
// Pass cookies, the referrer and any extra headers into the fetch request.
Expand All @@ -114,15 +117,16 @@ void GenericURLRequestJob::OnCookiesAvailable(
request_->referrer());

// The resource may have been supplied in the request.
const HttpResponse* matched_resource =
delegate_->MaybeMatchResource(rewritten_url, extra_request_headers_);
const HttpResponse* matched_resource = delegate_->MaybeMatchResource(
rewritten_url, method, extra_request_headers_);

if (matched_resource) {
OnFetchCompleteExtractHeaders(
matched_resource->final_url, matched_resource->http_response_code,
matched_resource->response_data, matched_resource->response_data_size);
} else {
url_fetcher_->StartFetch(rewritten_url, extra_request_headers_, this);
url_fetcher_->StartFetch(rewritten_url, method, extra_request_headers_,
this);
}
}

Expand Down
8 changes: 6 additions & 2 deletions headless/public/util/generic_url_request_job.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ class GenericURLRequestJob : public ManagedDispatchURLRequestJob,
public URLFetcher::ResultListener {
public:
enum class RewriteResult { kAllow, kDeny, kFailure };
using RewriteCallback =
std::function<void(RewriteResult result, const GURL& url)>;
using RewriteCallback = std::function<
void(RewriteResult result, const GURL& url, const std::string& method)>;

struct HttpResponse {
GURL final_url;
Expand All @@ -58,13 +58,15 @@ class GenericURLRequestJob : public ManagedDispatchURLRequestJob,
// with the result, or false to indicate that no rewriting is necessary.
// Called on an arbitrary thread.
virtual bool BlockOrRewriteRequest(const GURL& url,
const std::string& method,
const std::string& referrer,
RewriteCallback callback) = 0;

// Allows the delegate to synchronously fulfill a request with a reply.
// Called on an arbitrary thread.
virtual const HttpResponse* MaybeMatchResource(
const GURL& url,
const std::string& method,
const net::HttpRequestHeaders& request_headers) = 0;

// Signals that a resource load has finished. Called on an arbitrary thread.
Expand Down Expand Up @@ -104,9 +106,11 @@ class GenericURLRequestJob : public ManagedDispatchURLRequestJob,

private:
void PrepareCookies(const GURL& rewritten_url,
const std::string& method,
const url::Origin& site_for_cookies);

void OnCookiesAvailable(const GURL& rewritten_url,
const std::string& method,
const net::CookieList& cookie_list);

std::unique_ptr<URLFetcher> url_fetcher_;
Expand Down
4 changes: 4 additions & 0 deletions headless/public/util/generic_url_request_job_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,12 @@ class MockFetcher : public URLFetcher {
~MockFetcher() override {}

void StartFetch(const GURL& url,
const std::string& method,
const net::HttpRequestHeaders& request_headers,
ResultListener* result_listener) override {
// Record the request.
fetch_request_->SetString("url", url.spec());
fetch_request_->SetString("method", method);
std::unique_ptr<base::DictionaryValue> headers(new base::DictionaryValue);
for (net::HttpRequestHeaders::Iterator it(request_headers); it.GetNext();) {
headers->SetString(it.name(), it.value());
Expand Down Expand Up @@ -183,6 +185,7 @@ TEST_F(GenericURLRequestJobTest, BasicRequestParams) {

std::string expected_request_json =
"{\"url\": \"https://example.com/\","
" \"method\": \"GET\","
" \"headers\": {"
" \"Accept\": \"text/plain\","
" \"Cookie\": \"\","
Expand Down Expand Up @@ -334,6 +337,7 @@ TEST_F(GenericURLRequestJobTest, RequestWithCookies) {

std::string expected_request_json =
"{\"url\": \"https://example.com/\","
" \"method\": \"GET\","
" \"headers\": {"
" \"Cookie\": \"basic_cookie=1; secure_cookie=2; http_only_cookie=3\","
" \"Referer\": \"\""
Expand Down
Loading

0 comments on commit ad54054

Please sign in to comment.