Skip to content

Commit

Permalink
profiler: Add UMA stats about performance
Browse files Browse the repository at this point in the history
Add UMA stats about how much time we are taking up when we profile a
thread, and how long some of the waits in the system are.

There are some indications in the crash reports that profiling is either
taking longer than expected or some waits are much longer than expected.
Since we can't use the profiler to investigate the profiler, add UMA
timing stats to see if there is a real issue and narrow it down if there
is.

Also add UMA stats to see if the sampling thread is just getting
completely stuck and never returning.

BUG=chromium:1479395

Change-Id: I59e25aed00576621570fd8acbccb9aa539b67255
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/4861291
Commit-Queue: Ian Barkley-Yeung <iby@chromium.org>
Reviewed-by: Mike Wittman <wittman@chromium.org>
Auto-Submit: Ian Barkley-Yeung <iby@chromium.org>
Reviewed-by: Roger McFarlane <rogerm@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1203042}
  • Loading branch information
ianby authored and Chromium LUCI CQ committed Sep 29, 2023
1 parent e48b507 commit 858dd20
Show file tree
Hide file tree
Showing 5 changed files with 310 additions and 9 deletions.
92 changes: 83 additions & 9 deletions base/profiler/stack_copier_signal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@
#include <cstring>

#include "base/memory/raw_ptr.h"
#include "base/metrics/histogram_functions.h"
#include "base/notreached.h"
#include "base/profiler/register_context.h"
#include "base/profiler/stack_buffer.h"
#include "base/profiler/suspendable_thread_delegate.h"
#include "base/time/default_tick_clock.h"
#include "base/time/time.h"
#include "base/time/time_override.h"
#include "base/trace_event/base_tracing.h"
#include "build/build_config.h"
Expand Down Expand Up @@ -95,11 +98,17 @@ class AsyncSafeWaitableEvent {
// destructor.
class ScopedEventSignaller {
public:
ScopedEventSignaller(AsyncSafeWaitableEvent* event) : event_(event) {}
~ScopedEventSignaller() { event_->Signal(); }
ScopedEventSignaller(AsyncSafeWaitableEvent* event,
absl::optional<TimeTicks>* signal_time)
: event_(event), signal_time_(signal_time) {}
~ScopedEventSignaller() {
*signal_time_ = subtle::MaybeTimeTicksNowIgnoringOverride();
event_->Signal();
}

private:
raw_ptr<AsyncSafeWaitableEvent> event_;
raw_ptr<absl::optional<TimeTicks>> signal_time_;
};

// Struct to store the arguments to the signal handler.
Expand All @@ -124,6 +133,9 @@ struct HandlerParams {
// The timestamp when the stack was copied.
raw_ptr<absl::optional<TimeTicks>> maybe_timestamp;

// The timestamp when |event| was signaled.
raw_ptr<absl::optional<TimeTicks>> maybe_timestamp_signaled;

// The delegate provided to the StackCopier.
raw_ptr<StackCopier::Delegate> stack_copier_delegate;
};
Expand All @@ -146,7 +158,7 @@ void CopyStackSignalHandler(int n, siginfo_t* siginfo, void* sigcontext) {
// to always succeed and is thus not signal-safe.
*params->maybe_timestamp = subtle::MaybeTimeTicksNowIgnoringOverride();

ScopedEventSignaller e(params->event);
ScopedEventSignaller e(params->event, params->maybe_timestamp_signaled);
*params->success = false;

const ucontext_t* ucontext = static_cast<ucontext_t*>(sigcontext);
Expand Down Expand Up @@ -214,7 +226,8 @@ class ScopedSigaction {

StackCopierSignal::StackCopierSignal(
std::unique_ptr<ThreadDelegate> thread_delegate)
: thread_delegate_(std::move(thread_delegate)) {}
: thread_delegate_(std::move(thread_delegate)),
clock_(DefaultTickClock::GetInstance()) {}

StackCopierSignal::~StackCopierSignal() = default;

Expand All @@ -228,9 +241,21 @@ bool StackCopierSignal::CopyStack(StackBuffer* stack_buffer,
const uint8_t* stack_copy_bottom = nullptr;
const uintptr_t stack_base_address = thread_delegate_->GetStackBaseAddress();
absl::optional<TimeTicks> maybe_timestamp;
HandlerParams params = {stack_base_address, &wait_event, &copied,
thread_context, stack_buffer, &stack_copy_bottom,
&maybe_timestamp, delegate};
absl::optional<TimeTicks> maybe_timestamp_signaled;
HandlerParams params = {stack_base_address,
&wait_event,
&copied,
thread_context,
stack_buffer,
&stack_copy_bottom,
&maybe_timestamp,
&maybe_timestamp_signaled,
delegate};
TimeTicks signal_time;
TimeTicks wait_start_time;
TimeTicks wait_end_time;

RecordEvent(CopyStackEvent::kStarted);
{
ScopedSetSignalHandlerParams scoped_handler_params(&params);

Expand All @@ -246,21 +271,30 @@ bool StackCopierSignal::CopyStack(StackBuffer* stack_buffer,
// SIGURG is chosen here because we observe no crashes with this signal and
// neither Chrome or the AOSP sets up a special handler for this signal.
ScopedSigaction scoped_sigaction(SIGURG, &action, &original_action);
if (!scoped_sigaction.succeeded())
if (!scoped_sigaction.succeeded()) {
RecordEvent(CopyStackEvent::kSigactionFailed);
return false;
}

signal_time = clock_->NowTicks();

if (syscall(SYS_tgkill, getpid(), thread_delegate_->GetThreadId(),
SIGURG) != 0) {
RecordEvent(CopyStackEvent::kTgkillFailed);
NOTREACHED();
return false;
}

wait_start_time = clock_->NowTicks();
bool finished_waiting = wait_event.Wait();
TRACE_EVENT_END0(TRACE_DISABLED_BY_DEFAULT("cpu_profiler.debug"),
"StackCopierSignal copy stack");
if (!finished_waiting) {
RecordEvent(CopyStackEvent::kWaitFailed);
NOTREACHED();
return false;
}
wait_end_time = clock_->NowTicks();
// Ideally, an accurate timestamp is captured while the sampled thread is
// paused. In rare cases, this may fail, in which case we resort to
// capturing an delayed timestamp here instead.
Expand All @@ -269,10 +303,45 @@ bool StackCopierSignal::CopyStack(StackBuffer* stack_buffer,
else {
TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("cpu_profiler.debug"),
"Fallback on TimeTicks::Now()");
*timestamp = TimeTicks::Now();
*timestamp = clock_->NowTicks();
}
}

RecordEvent(CopyStackEvent::kSucceeded);
// Record UMA stats about how long everything took. Since the profiler can't
// profile the profiler, this is our only way to make sure the profiler isn't
// taking excessively long.
//
// All times are recorded in microseconds since TimeTicks::IsHighResolution()
// is always true on Posix systems, and we expect these to be very short
// times.
constexpr TimeDelta kMin = Microseconds(1);
constexpr TimeDelta kMax = Microseconds(200 * 1000);
constexpr int kBuckets = 100;

UmaHistogramCustomMicrosecondsTimes(
"UMA.StackProfiler.CopyStack.TotalCrossThreadTime",
wait_end_time - signal_time, kMin, kMax, kBuckets);
UmaHistogramCustomMicrosecondsTimes(
"UMA.StackProfiler.CopyStack.ProfileThreadTotalWaitTime",
wait_end_time - wait_start_time, kMin, kMax, kBuckets);
if (maybe_timestamp) {
UmaHistogramCustomMicrosecondsTimes(
"UMA.StackProfiler.CopyStack.SignalToHandlerTime",
*maybe_timestamp - signal_time, kMin, kMax, kBuckets);

if (maybe_timestamp_signaled) {
UmaHistogramCustomMicrosecondsTimes(
"UMA.StackProfiler.CopyStack.HandlerRunTime",
*maybe_timestamp_signaled - *maybe_timestamp, kMin, kMax, kBuckets);
}
}
if (maybe_timestamp_signaled) {
UmaHistogramCustomMicrosecondsTimes(
"UMA.StackProfiler.CopyStack.EventSignalToWaitEndTime",
wait_end_time - *maybe_timestamp_signaled, kMin, kMax, kBuckets);
}

const uintptr_t bottom = RegisterContextStackPointer(params.context);
for (uintptr_t* reg :
thread_delegate_->GetRegistersToRewrite(thread_context)) {
Expand All @@ -288,4 +357,9 @@ bool StackCopierSignal::CopyStack(StackBuffer* stack_buffer,
return copied;
}

// static
void StackCopierSignal::RecordEvent(CopyStackEvent event) {
UmaHistogramEnumeration("UMA.StackProfiler.CopyStack.Event", event);
}

} // namespace base
24 changes: 24 additions & 0 deletions base/profiler/stack_copier_signal.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
#include <memory>

#include "base/base_export.h"
#include "base/memory/raw_ptr.h"
#include "base/profiler/stack_copier.h"
#include "base/time/tick_clock.h"

namespace base {

Expand All @@ -30,8 +32,30 @@ class BASE_EXPORT StackCopierSignal : public StackCopier {

using StackCopier::CopyStackContentsAndRewritePointers;

void set_clock_for_testing(const TickClock* clock) { clock_ = clock; }

// Events that happen during CopyStack; used for the
// UMA.StackProfiler.CopyStack.Event histogram. Public for use by tests.
// These values are persisted to logs. Entries should not be renumbered and
// numeric values should never be reused.
enum class CopyStackEvent {
kStarted = 0,
kSucceeded = 1,
kSigactionFailed = 2,
kTgkillFailed = 3,
kWaitFailed = 4,
kMaxValue = kWaitFailed
};

private:
// Records an event during a run of CopyStack to the
// UMA.StackProfiler.CopyStack.Event histogram.
static void RecordEvent(CopyStackEvent event);

std::unique_ptr<ThreadDelegate> thread_delegate_;
// Clock used for time inside CopyStack. NOT used for getting the time in the
// signal handler, which always uses the real system tick clock.
raw_ptr<const TickClock> clock_;
};

} // namespace base
Expand Down
103 changes: 103 additions & 0 deletions base/profiler/stack_copier_signal_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,29 @@
#include <string.h>
#include <algorithm>
#include <utility>
#include <vector>

#include "base/debug/alias.h"
#include "base/profiler/sampling_profiler_thread_token.h"
#include "base/profiler/stack_buffer.h"
#include "base/profiler/stack_copier_signal.h"
#include "base/profiler/thread_delegate_posix.h"
#include "base/synchronization/waitable_event.h"
#include "base/test/metrics/histogram_tester.h"
#include "base/threading/platform_thread.h"
#include "base/threading/simple_thread.h"
#include "build/build_config.h"
#include "build/chromeos_buildflags.h"
#include "testing/gmock/include/gmock/gmock.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace base {

namespace {

using ::testing::ElementsAre;
using ::testing::Return;

// Values to write to the stack and look for in the copy.
static const uint32_t kStackSentinels[] = {0xf312ecd9, 0x1fcd7f19, 0xe69e617d,
0x8245f94f};
Expand Down Expand Up @@ -73,6 +79,11 @@ class TestStackCopierDelegate : public StackCopier::Delegate {
bool on_stack_copy_was_invoked_ = false;
};

class MockTickClock : public TickClock {
public:
MOCK_METHOD(TimeTicks, NowTicks, (), (const, override));
};

} // namespace

// ASAN moves local variables outside of the stack extents, which breaks the
Expand Down Expand Up @@ -185,6 +196,98 @@ TEST(StackCopierSignalTest, MAYBE_CopyStackDelegateInvoked) {
EXPECT_TRUE(stack_copier_delegate.on_stack_copy_was_invoked());
}

// TSAN hangs on the AsyncSafeWaitableEvent FUTEX_WAIT call.
#if defined(THREAD_SANITIZER)
#define MAYBE_CopyStackUMAStats DISABLED_CopyStackUMAStats
#elif BUILDFLAG(IS_LINUX)
// We don't support getting the stack base address on Linux, and thus can't
// copy the stack. // https://crbug.com/1394278
#define MAYBE_CopyStackUMAStats DISABLED_CopyStackUMAStats
#else
#define MAYBE_CopyStackUMAStats CopyStackUMAStats
#endif
TEST(StackCopierSignalTest, MAYBE_CopyStackUMAStats) {
HistogramTester histograms;
StackBuffer stack_buffer(/* buffer_size = */ 1 << 20);
memset(stack_buffer.buffer(), 0, stack_buffer.size());
uintptr_t stack_top = 0;
TimeTicks timestamp;
RegisterContext context;
TestStackCopierDelegate stack_copier_delegate;
MockTickClock clock;
TimeTicks real_now = TimeTicks::Now();
EXPECT_CALL(clock, NowTicks())
// signal_time
.WillOnce(Return(real_now - Microseconds(1000)))
// wait_start_time
.WillOnce(Return(real_now - Microseconds(600)))
// wait_end_time. Also covers the fallback if needed.
.WillRepeatedly(Return(real_now));

auto thread_delegate =
ThreadDelegatePosix::Create(GetSamplingProfilerCurrentThreadToken());
ASSERT_TRUE(thread_delegate);
StackCopierSignal copier(std::move(thread_delegate));
copier.set_clock_for_testing(&clock);

bool result = copier.CopyStack(&stack_buffer, &stack_top, &timestamp,
&context, &stack_copier_delegate);
ASSERT_TRUE(result);

EXPECT_THAT(
histograms.GetAllSamples("UMA.StackProfiler.CopyStack.Event"),
ElementsAre(Bucket(StackCopierSignal::CopyStackEvent::kStarted, 1),
Bucket(StackCopierSignal::CopyStackEvent::kSucceeded, 1)));

// Do not use HistogramTester::ExpectUniqueTimeSample which assumes the
// histogram has units of milliseconds.
histograms.ExpectUniqueSample(
"UMA.StackProfiler.CopyStack.TotalCrossThreadTime",
// signal_time to wait_end_time should be 1000 microseconds.
1000, 1);
histograms.ExpectUniqueSample(
"UMA.StackProfiler.CopyStack.ProfileThreadTotalWaitTime",
// start_wait_time to end_wait_time should be 600 microseconds.
600, 1);
// Since we can't override the times returned from the signal handler, we
// can't use the normal ExpectUniqueSample. HistogramTester doesn't give us
// enough information to check if a sample was in a range. So the best we can
// do is check that we got a sample for SignalToHandlerTime, HandlerRunTime,
// and EventSignalToWaitEndTime. However, we might not even get a sample
// for those if the signal handler couldn't get a time, and we don't want to
// fail the test for that. So all we can verify is that:
// 1. We have at most one sample for each and
// 2. If SignalToHandlerTime and EventSignalToWaitEndTime both have a sample
// (meaning both clock fetches succeeded), then HandlerRunTime does as
// well.
HistogramTester::CountsMap counts =
histograms.GetTotalCountsForPrefix("UMA.StackProfiler.CopyStack.");

int signal_to_handler_sample_count = 0;
if (auto it = counts.find("UMA.StackProfiler.CopyStack.SignalToHandlerTime");
it != counts.end()) {
signal_to_handler_sample_count = it->second;
EXPECT_EQ(signal_to_handler_sample_count, 1);
}
int handler_run_time_sample_count = 0;
if (auto it = counts.find("UMA.StackProfiler.CopyStack.HandlerRunTime");
it != counts.end()) {
handler_run_time_sample_count = it->second;
EXPECT_EQ(handler_run_time_sample_count, 1);
}
int event_signal_to_wait_end_time_sample_count = 0;
if (auto it =
counts.find("UMA.StackProfiler.CopyStack.EventSignalToWaitEndTime");
it != counts.end()) {
event_signal_to_wait_end_time_sample_count = it->second;
EXPECT_EQ(event_signal_to_wait_end_time_sample_count, 1);
}

EXPECT_EQ(handler_run_time_sample_count != 0,
signal_to_handler_sample_count != 0 &&
event_signal_to_wait_end_time_sample_count != 0);
}

// Limit to 32-bit Android, which is the platform we care about for this
// functionality. The test is broken on too many other varied platforms to try
// to selectively disable.
Expand Down
8 changes: 8 additions & 0 deletions tools/metrics/histograms/enums.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20413,6 +20413,14 @@ Called by update_net_error_codes.py.-->
<int value="2" label="Cache miss"/>
</enum>

<enum name="CopyStackEvent">
<int value="0" label="Started"/>
<int value="1" label="Succeeded"/>
<int value="2" label="Sigaction failed"/>
<int value="3" label="Tgkill failed"/>
<int value="4" label="Wait failed"/>
</enum>

<enum name="CorbCanonicalMimeType">
<summary>Coarse MIME type classification for CORB decisions.</summary>
<int value="0" label="Html">
Expand Down
Loading

0 comments on commit 858dd20

Please sign in to comment.