forked from chromium/chromium
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sampling_heap_profiler.cc
427 lines (358 loc) · 15 KB
/
sampling_heap_profiler.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "base/sampling_heap_profiler/sampling_heap_profiler.h"
#include <algorithm>
#include <cmath>
#include "base/allocator/allocator_shim.h"
#include "base/allocator/buildflags.h"
#include "base/allocator/partition_allocator/partition_alloc.h"
#include "base/atomicops.h"
#include "base/debug/alias.h"
#include "base/debug/stack_trace.h"
#include "base/no_destructor.h"
#include "base/partition_alloc_buildflags.h"
#include "base/rand_util.h"
#include "build/build_config.h"
namespace base {
using base::allocator::AllocatorDispatch;
using base::subtle::Atomic32;
using base::subtle::AtomicWord;
namespace {
// Control how many top frames to skip when recording call stack.
// These frames correspond to the profiler own frames.
const uint32_t kSkipBaseAllocatorFrames = 2;
const size_t kDefaultSamplingIntervalBytes = 128 * 1024;
// Controls if sample intervals should not be randomized. Used for testing.
bool g_deterministic;
// A positive value if profiling is running, otherwise it's zero.
Atomic32 g_running;
// Number of lock-free safe (not causing rehashing) accesses to samples_ map
// currently being performed.
Atomic32 g_operations_in_flight;
// Controls if new incoming lock-free accesses are allowed.
// When set to true, threads should not enter lock-free paths.
Atomic32 g_fast_path_is_closed;
// Number of bytes left to form the sample being collected.
AtomicWord g_bytes_left;
// Current sample size to be accumulated. Basically:
// <bytes accumulated toward sample> == g_current_interval - g_bytes_left
AtomicWord g_current_interval;
// Sampling interval parameter, the mean value for intervals between samples.
AtomicWord g_sampling_interval = kDefaultSamplingIntervalBytes;
// Last generated sample ordinal number.
uint32_t g_last_sample_ordinal = 0;
SamplingHeapProfiler* g_sampling_heap_profiler_instance;
void (*g_hooks_install_callback)();
Atomic32 g_hooks_installed;
void* AllocFn(const AllocatorDispatch* self, size_t size, void* context) {
void* address = self->next->alloc_function(self->next, size, context);
SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
return address;
}
void* AllocZeroInitializedFn(const AllocatorDispatch* self,
size_t n,
size_t size,
void* context) {
void* address =
self->next->alloc_zero_initialized_function(self->next, n, size, context);
SamplingHeapProfiler::RecordAlloc(address, n * size,
kSkipBaseAllocatorFrames);
return address;
}
void* AllocAlignedFn(const AllocatorDispatch* self,
size_t alignment,
size_t size,
void* context) {
void* address =
self->next->alloc_aligned_function(self->next, alignment, size, context);
SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
return address;
}
void* ReallocFn(const AllocatorDispatch* self,
void* address,
size_t size,
void* context) {
// Note: size == 0 actually performs free.
SamplingHeapProfiler::RecordFree(address);
address = self->next->realloc_function(self->next, address, size, context);
SamplingHeapProfiler::RecordAlloc(address, size, kSkipBaseAllocatorFrames);
return address;
}
void FreeFn(const AllocatorDispatch* self, void* address, void* context) {
SamplingHeapProfiler::RecordFree(address);
self->next->free_function(self->next, address, context);
}
size_t GetSizeEstimateFn(const AllocatorDispatch* self,
void* address,
void* context) {
return self->next->get_size_estimate_function(self->next, address, context);
}
unsigned BatchMallocFn(const AllocatorDispatch* self,
size_t size,
void** results,
unsigned num_requested,
void* context) {
unsigned num_allocated = self->next->batch_malloc_function(
self->next, size, results, num_requested, context);
for (unsigned i = 0; i < num_allocated; ++i) {
SamplingHeapProfiler::RecordAlloc(results[i], size,
kSkipBaseAllocatorFrames);
}
return num_allocated;
}
void BatchFreeFn(const AllocatorDispatch* self,
void** to_be_freed,
unsigned num_to_be_freed,
void* context) {
for (unsigned i = 0; i < num_to_be_freed; ++i)
SamplingHeapProfiler::RecordFree(to_be_freed[i]);
self->next->batch_free_function(self->next, to_be_freed, num_to_be_freed,
context);
}
void FreeDefiniteSizeFn(const AllocatorDispatch* self,
void* address,
size_t size,
void* context) {
SamplingHeapProfiler::RecordFree(address);
self->next->free_definite_size_function(self->next, address, size, context);
}
AllocatorDispatch g_allocator_dispatch = {&AllocFn,
&AllocZeroInitializedFn,
&AllocAlignedFn,
&ReallocFn,
&FreeFn,
&GetSizeEstimateFn,
&BatchMallocFn,
&BatchFreeFn,
&FreeDefiniteSizeFn,
nullptr};
#if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
void PartitionAllocHook(void* address, size_t size, const char*) {
SamplingHeapProfiler::RecordAlloc(address, size);
}
void PartitionFreeHook(void* address) {
SamplingHeapProfiler::RecordFree(address);
}
#endif // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
} // namespace
SamplingHeapProfiler::Sample::Sample(size_t size,
size_t total,
uint32_t ordinal)
: size(size), total(total), ordinal(ordinal) {}
SamplingHeapProfiler::Sample::Sample(const Sample&) = default;
SamplingHeapProfiler::Sample::~Sample() = default;
SamplingHeapProfiler::SamplingHeapProfiler() {
g_sampling_heap_profiler_instance = this;
}
// static
void SamplingHeapProfiler::InstallAllocatorHooksOnce() {
static bool hook_installed = InstallAllocatorHooks();
base::debug::Alias(&hook_installed);
}
// static
bool SamplingHeapProfiler::InstallAllocatorHooks() {
#if BUILDFLAG(USE_ALLOCATOR_SHIM)
base::allocator::InsertAllocatorDispatch(&g_allocator_dispatch);
#else
base::debug::Alias(&g_allocator_dispatch);
DLOG(WARNING)
<< "base::allocator shims are not available for memory sampling.";
#endif // BUILDFLAG(USE_ALLOCATOR_SHIM)
#if BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
base::PartitionAllocHooks::SetAllocationHook(&PartitionAllocHook);
base::PartitionAllocHooks::SetFreeHook(&PartitionFreeHook);
#endif // BUILDFLAG(USE_PARTITION_ALLOC) && !defined(OS_NACL)
int32_t hooks_install_callback_has_been_set =
base::subtle::Acquire_CompareAndSwap(&g_hooks_installed, 0, 1);
if (hooks_install_callback_has_been_set)
g_hooks_install_callback();
return true;
}
// static
void SamplingHeapProfiler::SetHooksInstallCallback(
void (*hooks_install_callback)()) {
CHECK(!g_hooks_install_callback && hooks_install_callback);
g_hooks_install_callback = hooks_install_callback;
int32_t profiler_has_already_been_initialized =
base::subtle::Release_CompareAndSwap(&g_hooks_installed, 0, 1);
if (profiler_has_already_been_initialized)
g_hooks_install_callback();
}
uint32_t SamplingHeapProfiler::Start() {
InstallAllocatorHooksOnce();
size_t next_interval =
GetNextSampleInterval(base::subtle::Acquire_Load(&g_sampling_interval));
base::subtle::Release_Store(&g_current_interval, next_interval);
base::subtle::Release_Store(&g_bytes_left, next_interval);
base::subtle::Barrier_AtomicIncrement(&g_running, 1);
return g_last_sample_ordinal;
}
void SamplingHeapProfiler::Stop() {
AtomicWord count = base::subtle::Barrier_AtomicIncrement(&g_running, -1);
CHECK_GE(count, 0);
}
void SamplingHeapProfiler::SetSamplingInterval(size_t sampling_interval) {
// TODO(alph): Reset the sample being collected if running.
base::subtle::Release_Store(&g_sampling_interval,
static_cast<AtomicWord>(sampling_interval));
}
// static
size_t SamplingHeapProfiler::GetNextSampleInterval(size_t interval) {
if (UNLIKELY(g_deterministic))
return interval;
// We sample with a Poisson process, with constant average sampling
// interval. This follows the exponential probability distribution with
// parameter λ = 1/interval where |interval| is the average number of bytes
// between samples.
// Let u be a uniformly distributed random number between 0 and 1, then
// next_sample = -ln(u) / λ
double uniform = base::RandDouble();
double value = -log(uniform) * interval;
size_t min_value = sizeof(intptr_t);
// We limit the upper bound of a sample interval to make sure we don't have
// huge gaps in the sampling stream. Probability of the upper bound gets hit
// is exp(-20) ~ 2e-9, so it should not skew the distibution.
size_t max_value = interval * 20;
if (UNLIKELY(value < min_value))
return min_value;
if (UNLIKELY(value > max_value))
return max_value;
return static_cast<size_t>(value);
}
// static
void SamplingHeapProfiler::RecordAlloc(void* address,
size_t size,
uint32_t skip_frames) {
if (UNLIKELY(!base::subtle::NoBarrier_Load(&g_running)))
return;
// Lock-free algorithm decreases number of bytes left to form a sample.
// The thread that makes it to reach zero is responsible for recording
// a sample.
AtomicWord bytes_left = base::subtle::NoBarrier_AtomicIncrement(
&g_bytes_left, -static_cast<AtomicWord>(size));
if (LIKELY(bytes_left > 0))
return;
// Return if g_bytes_left was already zero or below before we decreased it.
// That basically means that another thread in fact crossed the threshold.
if (LIKELY(bytes_left + static_cast<AtomicWord>(size) <= 0))
return;
// Only one thread that crossed the threshold is running the code below.
// It is going to be recording the sample.
size_t accumulated = base::subtle::Acquire_Load(&g_current_interval);
size_t next_interval =
GetNextSampleInterval(base::subtle::NoBarrier_Load(&g_sampling_interval));
// Make sure g_current_interval is set before updating g_bytes_left.
base::subtle::Release_Store(&g_current_interval, next_interval);
// Put the next sampling interval to g_bytes_left, thus allowing threads to
// start accumulating bytes towards the next sample.
// Simultaneously extract the current value (which is negative or zero)
// and take it into account when calculating the number of bytes
// accumulated for the current sample.
accumulated -=
base::subtle::NoBarrier_AtomicExchange(&g_bytes_left, next_interval);
g_sampling_heap_profiler_instance->DoRecordAlloc(accumulated, size, address,
kSkipBaseAllocatorFrames);
}
void SamplingHeapProfiler::RecordStackTrace(Sample* sample,
uint32_t skip_frames) {
#if !defined(OS_NACL)
// TODO(alph): Consider using debug::TraceStackFramePointers. It should be
// somewhat faster than base::debug::StackTrace.
base::debug::StackTrace trace;
size_t count;
void* const* addresses = const_cast<void* const*>(trace.Addresses(&count));
const uint32_t kSkipProfilerOwnFrames = 2;
skip_frames += kSkipProfilerOwnFrames;
sample->stack.insert(
sample->stack.end(), &addresses[skip_frames],
&addresses[std::max(count, static_cast<size_t>(skip_frames))]);
#endif
}
void SamplingHeapProfiler::DoRecordAlloc(size_t total_allocated,
size_t size,
void* address,
uint32_t skip_frames) {
// TODO(alph): It's better to use a recursive mutex and move the check
// inside the critical section.
if (entered_.Get())
return;
base::AutoLock lock(mutex_);
entered_.Set(true);
Sample sample(size, total_allocated, ++g_last_sample_ordinal);
RecordStackTrace(&sample, skip_frames);
// Close the fast-path as inserting an element into samples_ may cause
// rehashing that invalidates iterators affecting all the concurrent
// readers.
base::subtle::Release_Store(&g_fast_path_is_closed, 1);
while (base::subtle::Acquire_Load(&g_operations_in_flight)) {
while (base::subtle::NoBarrier_Load(&g_operations_in_flight)) {
}
}
for (auto* observer : observers_)
observer->SampleAdded(sample.ordinal, size, total_allocated);
// TODO(alph): We can do better by keeping the fast-path open when
// we know insert won't cause rehashing.
samples_.emplace(address, std::move(sample));
base::subtle::Release_Store(&g_fast_path_is_closed, 0);
entered_.Set(false);
}
// static
void SamplingHeapProfiler::RecordFree(void* address) {
bool maybe_sampled = true; // Pessimistically assume allocation was sampled.
base::subtle::Barrier_AtomicIncrement(&g_operations_in_flight, 1);
if (LIKELY(!base::subtle::NoBarrier_Load(&g_fast_path_is_closed)))
maybe_sampled = g_sampling_heap_profiler_instance->samples_.count(address);
base::subtle::Barrier_AtomicIncrement(&g_operations_in_flight, -1);
if (maybe_sampled)
g_sampling_heap_profiler_instance->DoRecordFree(address);
}
void SamplingHeapProfiler::DoRecordFree(void* address) {
if (entered_.Get())
return;
base::AutoLock lock(mutex_);
entered_.Set(true);
auto it = samples_.find(address);
if (it != samples_.end()) {
for (auto* observer : observers_)
observer->SampleRemoved(it->second.ordinal);
samples_.erase(it);
}
entered_.Set(false);
}
// static
SamplingHeapProfiler* SamplingHeapProfiler::GetInstance() {
static base::NoDestructor<SamplingHeapProfiler> instance;
return instance.get();
}
// static
void SamplingHeapProfiler::SuppressRandomnessForTest(bool suppress) {
g_deterministic = suppress;
}
void SamplingHeapProfiler::AddSamplesObserver(SamplesObserver* observer) {
base::AutoLock lock(mutex_);
CHECK(!entered_.Get());
observers_.push_back(observer);
}
void SamplingHeapProfiler::RemoveSamplesObserver(SamplesObserver* observer) {
base::AutoLock lock(mutex_);
CHECK(!entered_.Get());
auto it = std::find(observers_.begin(), observers_.end(), observer);
CHECK(it != observers_.end());
observers_.erase(it);
}
std::vector<SamplingHeapProfiler::Sample> SamplingHeapProfiler::GetSamples(
uint32_t profile_id) {
base::AutoLock lock(mutex_);
CHECK(!entered_.Get());
entered_.Set(true);
std::vector<Sample> samples;
for (auto& it : samples_) {
Sample& sample = it.second;
if (sample.ordinal > profile_id)
samples.push_back(sample);
}
entered_.Set(false);
return samples;
}
} // namespace base