Skip to content

Commit

Permalink
gpu: Command buffer multi flush.
Browse files Browse the repository at this point in the history
Collect ordering barriers for all contexts on a channel in a list and
flush those together whenever any context flushes explicitly. This
replaces the implicit flush in ordering barrier when switching between
contexts in a stream and when waiting on sync tokens across streams.

Design doc: https://docs.google.com/document/d/1mvX3VGIrlWtIP8ZBJdzPp9Nf-7TfnrN-cyPy6angVU4/edit

Local testing has shown that this reduces total thread times by ~0.6ms
(total ~11ms) per frame in the infinite scrolling benchmark in webview
on nexus 6, mostly by reducing IO and GPU thread times.

Bug: 757629
Cq-Include-Trybots: master.tryserver.blink:linux_trusty_blink_rel;master.tryserver.chromium.android:android_optional_gpu_tests_rel;master.tryserver.chromium.linux:linux_optional_gpu_tests_rel;master.tryserver.chromium.mac:mac_optional_gpu_tests_rel;master.tryserver.chromium.win:win_optional_gpu_tests_rel
Change-Id: Ic5a0015d46d3bc12f548b0c93ad109533565acfb
Reviewed-on: https://chromium-review.googlesource.com/611481
Commit-Queue: Sunny Sachanandani <sunnyps@chromium.org>
Reviewed-by: Daniel Cheng <dcheng@chromium.org>
Reviewed-by: Antoine Labour <piman@chromium.org>
Cr-Commit-Position: refs/heads/master@{#497615}
  • Loading branch information
sunnyps authored and Commit Bot committed Aug 26, 2017
1 parent d35e92a commit 9b8fb34
Show file tree
Hide file tree
Showing 39 changed files with 414 additions and 524 deletions.
9 changes: 2 additions & 7 deletions cc/raster/gpu_raster_buffer_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,13 +180,8 @@ void GpuRasterBufferProvider::OrderingBarrier() {
}

void GpuRasterBufferProvider::Flush() {
if (async_worker_context_enabled_) {
int32_t worker_stream_id =
worker_context_provider_->ContextSupport()->GetStreamId();

compositor_context_provider_->ContextSupport()
->FlushOrderingBarrierOnStream(worker_stream_id);
}
if (async_worker_context_enabled_)
compositor_context_provider_->ContextSupport()->FlushPendingWork();
}

viz::ResourceFormat GpuRasterBufferProvider::GetResourceFormat(
Expand Down
9 changes: 2 additions & 7 deletions cc/raster/one_copy_raster_buffer_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,8 @@ void OneCopyRasterBufferProvider::OrderingBarrier() {
}

void OneCopyRasterBufferProvider::Flush() {
if (async_worker_context_enabled_) {
int32_t worker_stream_id =
worker_context_provider_->ContextSupport()->GetStreamId();

compositor_context_provider_->ContextSupport()
->FlushOrderingBarrierOnStream(worker_stream_id);
}
if (async_worker_context_enabled_)
compositor_context_provider_->ContextSupport()->FlushPendingWork();
}

viz::ResourceFormat OneCopyRasterBufferProvider::GetResourceFormat(
Expand Down
4 changes: 4 additions & 0 deletions cc/resources/layer_tree_resource_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "build/build_config.h"
#include "components/viz/common/resources/resource_format_utils.h"
#include "gpu/command_buffer/client/context_support.h"
#include "gpu/command_buffer/client/gles2_interface.h"
#include "gpu/command_buffer/client/gpu_memory_buffer_manager.h"

Expand Down Expand Up @@ -84,6 +85,9 @@ void LayerTreeResourceProvider::PrepareSendToParent(
unverified_sync_tokens.push_back(new_sync_token.GetData());
}

if (compositor_context_provider_)
compositor_context_provider_->ContextSupport()->FlushPendingWork();

if (!unverified_sync_tokens.empty()) {
DCHECK(settings_.delegated_sync_points_required);
DCHECK(gl);
Expand Down
6 changes: 1 addition & 5 deletions cc/test/test_context_support.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,7 @@ TestContextSupport::TestContextSupport()

TestContextSupport::~TestContextSupport() {}

int32_t TestContextSupport::GetStreamId() const {
return 0;
}

void TestContextSupport::FlushOrderingBarrierOnStream(int32_t stream_id) {}
void TestContextSupport::FlushPendingWork() {}

void TestContextSupport::SignalSyncToken(const gpu::SyncToken& sync_token,
const base::Closure& callback) {
Expand Down
3 changes: 1 addition & 2 deletions cc/test/test_context_support.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ class TestContextSupport : public gpu::ContextSupport {
~TestContextSupport() override;

// gpu::ContextSupport implementation.
int32_t GetStreamId() const override;
void FlushOrderingBarrierOnStream(int32_t stream_id) override;
void FlushPendingWork() override;
void SignalSyncToken(const gpu::SyncToken& sync_token,
const base::Closure& callback) override;
bool IsSyncTokenSignaled(const gpu::SyncToken& sync_token) override;
Expand Down
22 changes: 11 additions & 11 deletions cc/tiles/tile_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -534,25 +534,25 @@ bool TileManager::PrepareTiles(
return true;
}

void TileManager::Flush() {
TRACE_EVENT0("cc", "TileManager::Flush");
void TileManager::CheckForCompletedTasks() {
TRACE_EVENT0("cc", "TileManager::CheckForCompletedTasks");

if (!tile_task_manager_) {
TRACE_EVENT_INSTANT0("cc", "Flush aborted", TRACE_EVENT_SCOPE_THREAD);
TRACE_EVENT_INSTANT0("cc", "TileManager::CheckForCompletedTasksAborted",
TRACE_EVENT_SCOPE_THREAD);
return;
}

tile_task_manager_->CheckForCompletedTasks();
did_check_for_completed_tasks_since_last_schedule_tasks_ = true;

// Actually flush.
raster_buffer_provider_->Flush();

CheckPendingGpuWorkTiles(true /* issue_signals */);

TRACE_EVENT_INSTANT1("cc", "DidFlush", TRACE_EVENT_SCOPE_THREAD, "stats",
RasterTaskCompletionStatsAsValue(flush_stats_));
flush_stats_ = RasterTaskCompletionStats();
TRACE_EVENT_INSTANT1(
"cc", "TileManager::CheckForCompletedTasksFinished",
TRACE_EVENT_SCOPE_THREAD, "stats",
RasterTaskCompletionStatsAsValue(raster_task_completion_stats_));
raster_task_completion_stats_ = RasterTaskCompletionStats();
}

void TileManager::DidModifyTilePriorities() {
Expand Down Expand Up @@ -1213,13 +1213,13 @@ void TileManager::OnRasterTaskCompleted(
scheduled_draw_images_.erase(images_it);

if (was_canceled) {
++flush_stats_.canceled_count;
++raster_task_completion_stats_.canceled_count;
resource_pool_->ReleaseResource(resource);
return;
}

resource_pool_->OnContentReplaced(resource->id(), tile_id);
++flush_stats_.completed_count;
++raster_task_completion_stats_.completed_count;

if (!tile) {
resource_pool_->ReleaseResource(resource);
Expand Down
4 changes: 2 additions & 2 deletions cc/tiles/tile_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class CC_EXPORT TileManager : CheckerImageTrackerClient {

// This causes any completed raster work to finalize, so that tiles get up to
// date draw information.
void Flush();
void CheckForCompletedTasks();

// Called when the required-for-activation/required-for-draw state of tiles
// may have changed.
Expand Down Expand Up @@ -396,7 +396,7 @@ class CC_EXPORT TileManager : CheckerImageTrackerClient {
ImageController image_controller_;
CheckerImageTracker checker_image_tracker_;

RasterTaskCompletionStats flush_stats_;
RasterTaskCompletionStats raster_task_completion_stats_;

TaskGraph graph_;

Expand Down
2 changes: 1 addition & 1 deletion cc/tiles/tile_manager_perftest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ class TileManagerPerfTest : public TestLayerTreeHostBase {

GlobalStateThatImpactsTilePriority global_state(GlobalStateForTest());
tile_manager()->PrepareTiles(global_state);
tile_manager()->Flush();
tile_manager()->CheckForCompletedTasks();
timer_.NextLap();
} while (!timer_.HasTimeLimitExpired());

Expand Down
2 changes: 1 addition & 1 deletion cc/tiles/tile_manager_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1719,7 +1719,7 @@ TEST_F(TileManagerTest, LowResHasNoImage) {
.WillOnce(testing::Invoke([&run_loop]() { run_loop.Quit(); }));
tile_manager->PrepareTiles(host_impl()->global_tile_state());
run_loop.Run();
tile_manager->Flush();
tile_manager->CheckForCompletedTasks();

Tile* tile = tiling->TileAt(0, 0);
// The tile in the tiling was rastered.
Expand Down
2 changes: 1 addition & 1 deletion cc/trees/layer_tree_host_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1136,7 +1136,7 @@ DrawResult LayerTreeHostImpl::PrepareToDraw(FrameData* frame) {
// This will cause NotifyTileStateChanged() to be called for any tiles that
// completed, which will add damage for visible tiles to the frame for them so
// they appear as part of the current frame being drawn.
tile_manager_.Flush();
tile_manager_.CheckForCompletedTasks();

frame->render_surface_list = &active_tree_->GetRenderSurfaceList();
frame->render_passes.clear();
Expand Down
3 changes: 1 addition & 2 deletions gpu/command_buffer/client/client_test_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,7 @@ class MockClientGpuControl : public GpuControl {
MOCK_METHOD0(EnsureWorkVisible, void());
MOCK_CONST_METHOD0(GetNamespaceID, CommandBufferNamespace());
MOCK_CONST_METHOD0(GetCommandBufferID, CommandBufferId());
MOCK_CONST_METHOD0(GetStreamId, int32_t());
MOCK_METHOD1(FlushOrderingBarrierOnStream, void(int32_t));
MOCK_METHOD0(FlushPendingWork, void());
MOCK_METHOD0(GenerateFenceSyncRelease, uint64_t());
MOCK_METHOD1(IsFenceSyncRelease, bool(uint64_t release));
MOCK_METHOD1(IsFenceSyncFlushed, bool(uint64_t release));
Expand Down
7 changes: 2 additions & 5 deletions gpu/command_buffer/client/context_support.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,8 @@ struct SyncToken;

class ContextSupport {
public:
// Returns the stream id for this context.
virtual int32_t GetStreamId() const = 0;

// Flush any outstanding ordering barriers on given stream.
virtual void FlushOrderingBarrierOnStream(int32_t stream_id) = 0;
// Flush any outstanding ordering barriers for all contexts.
virtual void FlushPendingWork() = 0;

// Runs |callback| when the given sync token is signalled. The sync token may
// belong to any context.
Expand Down
24 changes: 6 additions & 18 deletions gpu/command_buffer/client/gles2_implementation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -357,12 +357,8 @@ void GLES2Implementation::RunIfContextNotLost(const base::Closure& callback) {
callback.Run();
}

int32_t GLES2Implementation::GetStreamId() const {
return gpu_control_->GetStreamId();
}

void GLES2Implementation::FlushOrderingBarrierOnStream(int32_t stream_id) {
gpu_control_->FlushOrderingBarrierOnStream(stream_id);
void GLES2Implementation::FlushPendingWork() {
gpu_control_->FlushPendingWork();
}

void GLES2Implementation::SignalSyncToken(const gpu::SyncToken& sync_token,
Expand Down Expand Up @@ -6139,8 +6135,7 @@ void GLES2Implementation::GenSyncTokenCHROMIUM(GLuint64 fence_sync,
}

// Copy the data over after setting the data to ensure alignment.
SyncToken sync_token_data(gpu_control_->GetNamespaceID(),
gpu_control_->GetStreamId(),
SyncToken sync_token_data(gpu_control_->GetNamespaceID(), 0,
gpu_control_->GetCommandBufferID(), fence_sync);
sync_token_data.SetVerifyFlush();
memcpy(sync_token, &sync_token_data, sizeof(sync_token_data));
Expand All @@ -6163,8 +6158,7 @@ void GLES2Implementation::GenUnverifiedSyncTokenCHROMIUM(GLuint64 fence_sync,
}

// Copy the data over after setting the data to ensure alignment.
SyncToken sync_token_data(gpu_control_->GetNamespaceID(),
gpu_control_->GetStreamId(),
SyncToken sync_token_data(gpu_control_->GetNamespaceID(), 0,
gpu_control_->GetCommandBufferID(), fence_sync);
memcpy(sync_token, &sync_token_data, sizeof(sync_token_data));
}
Expand Down Expand Up @@ -6194,15 +6188,9 @@ void GLES2Implementation::VerifySyncTokensCHROMIUM(GLbyte **sync_tokens,
}
}

// This step must be done after all unverified tokens have finished processing
// CanWaitUnverifiedSyncToken(), command buffers use that to do any necessary
// flushes.
if (requires_synchronization) {
// Make sure we have no pending ordering barriers by flushing now.
FlushHelper();
// Ensure all the fence syncs are visible on GPU service.
// Ensure all the fence syncs are visible on GPU service.
if (requires_synchronization)
gpu_control_->EnsureWorkVisible();
}
}

void GLES2Implementation::WaitSyncTokenCHROMIUM(const GLbyte* sync_token_data) {
Expand Down
3 changes: 1 addition & 2 deletions gpu/command_buffer/client/gles2_implementation.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,7 @@ class GLES2_IMPL_EXPORT GLES2Implementation
#include "gpu/command_buffer/client/gles2_implementation_autogen.h"

// ContextSupport implementation.
int32_t GetStreamId() const override;
void FlushOrderingBarrierOnStream(int32_t stream_id) override;
void FlushPendingWork() override;
void SignalSyncToken(const gpu::SyncToken& sync_token,
const base::Closure& callback) override;
bool IsSyncTokenSignaled(const gpu::SyncToken& sync_token) override;
Expand Down
7 changes: 0 additions & 7 deletions gpu/command_buffer/client/gles2_implementation_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3970,7 +3970,6 @@ TEST_F(GLES2ImplementationTest, GenSyncTokenCHROMIUM) {
.WillRepeatedly(Return(kNamespaceId));
EXPECT_CALL(*gpu_control_, GetCommandBufferID())
.WillRepeatedly(Return(kCommandBufferId));
EXPECT_CALL(*gpu_control_, GetStreamId()).WillRepeatedly(Return(0));

gl_->GenSyncTokenCHROMIUM(kFenceSync, nullptr);
EXPECT_EQ(GL_INVALID_VALUE, CheckError());
Expand Down Expand Up @@ -4015,7 +4014,6 @@ TEST_F(GLES2ImplementationTest, GenUnverifiedSyncTokenCHROMIUM) {
.WillRepeatedly(Return(kNamespaceId));
EXPECT_CALL(*gpu_control_, GetCommandBufferID())
.WillRepeatedly(Return(kCommandBufferId));
EXPECT_CALL(*gpu_control_, GetStreamId()).WillRepeatedly(Return(0));

gl_->GenUnverifiedSyncTokenCHROMIUM(kFenceSync, nullptr);
EXPECT_EQ(GL_INVALID_VALUE, CheckError());
Expand Down Expand Up @@ -4067,7 +4065,6 @@ TEST_F(GLES2ImplementationTest, VerifySyncTokensCHROMIUM) {
.WillRepeatedly(Return(kNamespaceId));
EXPECT_CALL(*gpu_control_, GetCommandBufferID())
.WillRepeatedly(Return(kCommandBufferId));
EXPECT_CALL(*gpu_control_, GetStreamId()).WillRepeatedly(Return(0));

EXPECT_CALL(*gpu_control_, IsFenceSyncRelease(kFenceSync))
.WillOnce(Return(true));
Expand Down Expand Up @@ -4122,7 +4119,6 @@ TEST_F(GLES2ImplementationTest, VerifySyncTokensCHROMIUM_Sequence) {
.WillRepeatedly(Return(kNamespaceId));
EXPECT_CALL(*gpu_control_, GetCommandBufferID())
.WillRepeatedly(Return(kCommandBufferId));
EXPECT_CALL(*gpu_control_, GetStreamId()).WillRepeatedly(Return(0));

// Generate sync token 1.
EXPECT_CALL(*gpu_control_, IsFenceSyncRelease(kFenceSync1))
Expand Down Expand Up @@ -4199,7 +4195,6 @@ TEST_F(GLES2ImplementationTest, WaitSyncTokenCHROMIUM) {
EXPECT_CALL(*gpu_control_, GetNamespaceID()).WillOnce(Return(kNamespaceId));
EXPECT_CALL(*gpu_control_, GetCommandBufferID())
.WillOnce(Return(kCommandBufferId));
EXPECT_CALL(*gpu_control_, GetStreamId()).WillOnce(Return(0));
gl_->GenSyncTokenCHROMIUM(kFenceSync, sync_token_data);

struct Cmds {
Expand Down Expand Up @@ -4537,7 +4532,6 @@ TEST_F(GLES2ImplementationTest, SignalSyncToken) {
EXPECT_CALL(*gpu_control_, IsFenceSyncFlushReceived(fence_sync))
.WillOnce(Return(true));
EXPECT_CALL(*gpu_control_, GetNamespaceID()).WillOnce(Return(GPU_IO));
EXPECT_CALL(*gpu_control_, GetStreamId()).WillOnce(Return(0));
EXPECT_CALL(*gpu_control_, GetCommandBufferID())
.WillOnce(Return(CommandBufferId::FromUnsafeValue(1)));
gpu::SyncToken sync_token;
Expand Down Expand Up @@ -4569,7 +4563,6 @@ TEST_F(GLES2ImplementationTest, SignalSyncTokenAfterContextLoss) {
EXPECT_CALL(*gpu_control_, IsFenceSyncFlushReceived(fence_sync))
.WillOnce(Return(true));
EXPECT_CALL(*gpu_control_, GetNamespaceID()).WillOnce(Return(GPU_IO));
EXPECT_CALL(*gpu_control_, GetStreamId()).WillOnce(Return(0));
EXPECT_CALL(*gpu_control_, GetCommandBufferID())
.WillOnce(Return(CommandBufferId::FromUnsafeValue(1)));
gpu::SyncToken sync_token;
Expand Down
8 changes: 2 additions & 6 deletions gpu/command_buffer/client/gpu_control.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,8 @@ class GPU_EXPORT GpuControl {
virtual CommandBufferNamespace GetNamespaceID() const = 0;
virtual CommandBufferId GetCommandBufferID() const = 0;

// Returns the stream id for this context. Only relevant for IPC command
// buffer proxy. Used as extra command buffer data in sync tokens.
virtual int32_t GetStreamId() const = 0;

// Flush any outstanding ordering barriers on given stream.
virtual void FlushOrderingBarrierOnStream(int32_t stream_id) = 0;
// Flush any outstanding ordering barriers on all contexts.
virtual void FlushPendingWork() = 0;

// Generates a fence sync which should be inserted into the GL command stream.
// When the service executes the fence sync it is released. Fence syncs are
Expand Down
30 changes: 25 additions & 5 deletions gpu/command_buffer/service/scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,16 @@ class Scheduler::Sequence {
DISALLOW_COPY_AND_ASSIGN(Sequence);
};

Scheduler::Task::Task(SequenceId sequence_id,
base::OnceClosure closure,
std::vector<SyncToken> sync_token_fences)
: sequence_id(sequence_id),
closure(std::move(closure)),
sync_token_fences(std::move(sync_token_fences)) {}
Scheduler::Task::Task(Task&& other) = default;
Scheduler::Task::~Task() = default;
Scheduler::Task& Scheduler::Task::operator=(Task&& other) = default;

Scheduler::SchedulingState::SchedulingState() = default;
Scheduler::SchedulingState::SchedulingState(const SchedulingState& other) =
default;
Expand Down Expand Up @@ -324,16 +334,26 @@ void Scheduler::DisableSequence(SequenceId sequence_id) {
sequence->SetEnabled(false);
}

void Scheduler::ScheduleTask(SequenceId sequence_id,
base::OnceClosure closure,
const std::vector<SyncToken>& sync_token_fences) {
void Scheduler::ScheduleTask(Task task) {
base::AutoLock auto_lock(lock_);
ScheduleTaskHelper(std::move(task));
}

void Scheduler::ScheduleTasks(std::vector<Task> tasks) {
base::AutoLock auto_lock(lock_);
for (auto& task : tasks)
ScheduleTaskHelper(std::move(task));
}

void Scheduler::ScheduleTaskHelper(Task task) {
lock_.AssertAcquired();
SequenceId sequence_id = task.sequence_id;
Sequence* sequence = GetSequence(sequence_id);
DCHECK(sequence);

uint32_t order_num = sequence->ScheduleTask(std::move(closure));
uint32_t order_num = sequence->ScheduleTask(std::move(task.closure));

for (const SyncToken& sync_token : sync_token_fences) {
for (const SyncToken& sync_token : task.sync_token_fences) {
SequenceId release_id =
sync_point_manager_->GetSyncTokenReleaseSequenceId(sync_token);
Sequence* release_sequence = GetSequence(release_id);
Expand Down
Loading

0 comments on commit 9b8fb34

Please sign in to comment.