Skip to content

Commit

Permalink
[PartitionAlloc] Replace SpinLock with a spinning lock on Linux kernels.
Browse files Browse the repository at this point in the history
PartitionAlloc's main lock is the per-PartitionRoot lock. It has to be
fast on the common uncontended path, and better than a simple spinlock
when it's contended.

Past attempts to replace it with base::Lock regressed low and high-level
benchmarks. This commit introduces SpinningFutex, a futex()-based lock
which spins in userspace like SpinLock before calling into the kernel,
like base::Lock does.

It is intended to be as fast as SpinLock, but better for contended
cases. Since it relies on the linux-specific futex() system call, this
is only available on Linux-based kernels, for now.

Bug: 1125866, 1125999, 1061437
Change-Id: I1e0cf23beb48c32518d0ffc293d1a9326bc2d7bc
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2409902
Commit-Queue: Benoit L <lizeb@chromium.org>
Reviewed-by: Kentaro Hara <haraken@chromium.org>
Cr-Commit-Position: refs/heads/master@{#808343}
  • Loading branch information
Benoit Lize authored and Commit Bot committed Sep 18, 2020
1 parent d1c780a commit d6ce9d7
Show file tree
Hide file tree
Showing 7 changed files with 355 additions and 50 deletions.
9 changes: 9 additions & 0 deletions base/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -1794,6 +1794,7 @@ component("base") {
"allocator/partition_allocator/random.h",
"allocator/partition_allocator/thread_cache.cc",
"allocator/partition_allocator/thread_cache.h",
"allocator/partition_allocator/yield_processor.h",
]
if (is_win) {
sources +=
Expand All @@ -1806,6 +1807,13 @@ component("base") {
"allocator/partition_allocator/page_allocator_internals_fuchsia.h",
]
}

if (is_linux || is_android) {
sources += [
"allocator/partition_allocator/spinning_futex_linux.cc",
"allocator/partition_allocator/spinning_futex_linux.h",
]
}
}
}

Expand Down Expand Up @@ -3217,6 +3225,7 @@ test("base_unittests") {
"allocator/partition_allocator/memory_reclaimer_unittest.cc",
"allocator/partition_allocator/page_allocator_unittest.cc",
"allocator/partition_allocator/partition_alloc_unittest.cc",
"allocator/partition_allocator/partition_lock_unittest.cc",
"allocator/partition_allocator/thread_cache_unittest.cc",
]
}
Expand Down
40 changes: 1 addition & 39 deletions base/allocator/partition_allocator/partition_lock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@

#include "base/allocator/partition_allocator/partition_lock.h"

#if !DCHECK_IS_ON()

#include "base/allocator/partition_allocator/yield_processor.h"
#include "base/threading/platform_thread.h"

#if defined(OS_WIN)
Expand All @@ -14,48 +13,13 @@
#include <sched.h>
#endif

// The YIELD_PROCESSOR macro wraps an architecture specific-instruction that
// informs the processor we're in a busy wait, so it can handle the branch more
// intelligently and e.g. reduce power to our core or give more resources to the
// other hyper-thread on this core. See the following for context:
// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
//
// The YIELD_THREAD macro tells the OS to relinquish our quantum. This is
// basically a worst-case fallback, and if you're hitting it with any frequency
// you really should be using a proper lock (such as |base::Lock|)rather than
// these spinlocks.
#if defined(OS_WIN)

#define YIELD_PROCESSOR YieldProcessor()
#define YIELD_THREAD SwitchToThread()

#elif defined(OS_POSIX) || defined(OS_FUCHSIA)

#if defined(ARCH_CPU_X86_64) || defined(ARCH_CPU_X86)
#define YIELD_PROCESSOR __asm__ __volatile__("pause")
#elif (defined(ARCH_CPU_ARMEL) && __ARM_ARCH >= 6) || defined(ARCH_CPU_ARM64)
#define YIELD_PROCESSOR __asm__ __volatile__("yield")
#elif defined(ARCH_CPU_MIPSEL)
// The MIPS32 docs state that the PAUSE instruction is a no-op on older
// architectures (first added in MIPS32r2). To avoid assembler errors when
// targeting pre-r2, we must encode the instruction manually.
#define YIELD_PROCESSOR __asm__ __volatile__(".word 0x00000140")
#elif defined(ARCH_CPU_MIPS64EL) && __mips_isa_rev >= 2
// Don't bother doing using .word here since r2 is the lowest supported mips64
// that Chromium supports.
#define YIELD_PROCESSOR __asm__ __volatile__("pause")
#elif defined(ARCH_CPU_PPC64_FAMILY)
#define YIELD_PROCESSOR __asm__ __volatile__("or 31,31,31")
#elif defined(ARCH_CPU_S390_FAMILY)
// just do nothing
#define YIELD_PROCESSOR ((void)0)
#endif // ARCH

#ifndef YIELD_PROCESSOR
#warning "Processor yield not supported on this architecture."
#define YIELD_PROCESSOR ((void)0)
#endif

#define YIELD_THREAD sched_yield()

#else // Other OS
Expand Down Expand Up @@ -102,5 +66,3 @@ void SpinLock::AcquireSlow() {

} // namespace internal
} // namespace base

#endif // !DCHECK_IS_ON()
19 changes: 8 additions & 11 deletions base/allocator/partition_allocator/partition_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@

#include "base/allocator/buildflags.h"
#include "base/no_destructor.h"
#include "base/synchronization/lock.h"
#include "base/thread_annotations.h"
#include "base/threading/platform_thread.h"
#include "build/build_config.h"

#if defined(OS_LINUX) || defined(OS_ANDROID)
#include "base/allocator/partition_allocator/spinning_futex_linux.h"
#endif

namespace base {
namespace internal {

Expand Down Expand Up @@ -53,7 +57,6 @@ class SCOPED_LOCKABLE ScopedUnlockGuard {
MaybeSpinLock<thread_safe>& lock_;
};

#if !DCHECK_IS_ON()
// Spinlock. Do not use, to be removed. crbug.com/1061437.
class BASE_EXPORT SpinLock {
public:
Expand All @@ -80,7 +83,6 @@ class BASE_EXPORT SpinLock {

std::atomic_int lock_{0};
};
#endif // !DCHECK_IS_ON()

template <>
class LOCKABLE MaybeSpinLock<true> {
Expand Down Expand Up @@ -134,14 +136,9 @@ class LOCKABLE MaybeSpinLock<true> {
}

private:
#if DCHECK_IS_ON()
// NoDestructor to avoid issues with the "static destruction order fiasco".
//
// This also means that we leak a lock when a partition is destructed. This
// will in practice only show in some tests, as partitions are not destructed
// in regular use. In addition, on most platforms, base::Lock doesn't allocate
// memory and neither does the OS library, and the destructor is a no-op.
base::NoDestructor<base::Lock> lock_;
// DCHECK_IS_ON() for now to check stability before performance.
#if DCHECK_IS_ON() && (defined(OS_LINUX) || defined(OS_ANDROID))
base::NoDestructor<SpinningFutex> lock_;
#else
// base::Lock is slower on the fast path than SpinLock, hence we still use it
// on non-DCHECK() builds. crbug.com/1125999
Expand Down
117 changes: 117 additions & 0 deletions base/allocator/partition_allocator/partition_lock_unittest.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/allocator/partition_allocator/partition_lock.h"

#include "base/bind.h"
#include "base/callback.h"
#include "base/macros.h"
#include "base/test/bind_test_util.h"
#include "base/test/gtest_util.h"
#include "base/threading/platform_thread.h"
#include "base/time/time.h"
#include "testing/gtest/include/gtest/gtest.h"

namespace base {
namespace internal {
namespace {

class LambdaThreadDelegate : public PlatformThread::Delegate {
public:
explicit LambdaThreadDelegate(RepeatingClosure f) : f_(f) {}
void ThreadMain() override { f_.Run(); }

private:
RepeatingClosure f_;
};

TEST(SpinLockTest, Simple) {
MaybeSpinLock<true> lock;
lock.Lock();
lock.Unlock();
}

MaybeSpinLock<true> g_lock;
TEST(SpinLockTest, StaticLockStartsUnlocked) {
g_lock.Lock();
g_lock.Unlock();
}

TEST(SpinLockTest, Contended) {
int counter = 0; // *Not* atomic.
std::vector<PlatformThreadHandle> thread_handles;
constexpr int iterations_per_thread = 1000000;
constexpr int num_threads = 4;

MaybeSpinLock<true> lock;
MaybeSpinLock<true> start_lock;

LambdaThreadDelegate delegate{BindLambdaForTesting([&]() {
start_lock.Lock();
start_lock.Unlock();

for (int i = 0; i < iterations_per_thread; i++) {
lock.Lock();
counter++;
lock.Unlock();
}
})};

start_lock.Lock(); // Make sure that the threads compete, by waiting until
// all of them have at least been created.
for (int i = 0; i < num_threads; i++) {
PlatformThreadHandle handle;
PlatformThread::Create(0, &delegate, &handle);
thread_handles.push_back(handle);
}

start_lock.Unlock();

for (int i = 0; i < num_threads; i++) {
PlatformThread::Join(thread_handles[i]);
}
EXPECT_EQ(iterations_per_thread * num_threads, counter);
}

TEST(SpinLockTest, SlowThreads) {
int counter = 0; // *Not* atomic.
std::vector<PlatformThreadHandle> thread_handles;
constexpr int iterations_per_thread = 100;
constexpr int num_threads = 4;

MaybeSpinLock<true> lock;
MaybeSpinLock<true> start_lock;

LambdaThreadDelegate delegate{BindLambdaForTesting([&]() {
start_lock.Lock();
start_lock.Unlock();

for (int i = 0; i < iterations_per_thread; i++) {
lock.Lock();
counter++;
// Hold the lock for a while, to force futex()-based locks to sleep.
PlatformThread::Sleep(TimeDelta::FromMilliseconds(1));
lock.Unlock();
}
})};

start_lock.Lock(); // Make sure that the threads compete, by waiting until
// all of them have at least been created.
for (int i = 0; i < num_threads; i++) {
PlatformThreadHandle handle;
PlatformThread::Create(0, &delegate, &handle);
thread_handles.push_back(handle);
}

start_lock.Unlock();

for (int i = 0; i < num_threads; i++) {
PlatformThread::Join(thread_handles[i]);
}
EXPECT_EQ(iterations_per_thread * num_threads, counter);
}

} // namespace
} // namespace internal
} // namespace base
73 changes: 73 additions & 0 deletions base/allocator/partition_allocator/spinning_futex_linux.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/allocator/partition_allocator/spinning_futex_linux.h"

#include "base/allocator/partition_allocator/partition_alloc_check.h"
#include "build/build_config.h"

#if defined(OS_LINUX) || defined(OS_ANDROID)

#include <errno.h>
#include <linux/futex.h>
#include <sys/syscall.h>
#include <unistd.h>

namespace base {
namespace internal {

void SpinningFutex::FutexWait() {
// Save and restore errno.
int saved_errno = errno;
// Don't check the return value, as we will not be awaken by a timeout, since
// none is specified.
//
// Ignoring the return value doesn't impact correctness, as this acts as an
// immediate wakeup. For completeness, the possible errors for FUTEX_WAIT are:
// - EACCES: state_ is not readable. Should not happen.
// - EAGAIN: the value is not as expected, that is not |kLockedContended|, in
// which case retrying the loop is the right behavior.
// - EINTR: signal, looping is the right behavior.
// - EINVAL: invalid argument.
//
// Note: not checking the return value is the approach used in bionic and
// glibc as well.
//
// Will return immediately if |state_| is no longer equal to
// |kLockedContended|. Otherwise, sleeps and wakes up when |state_| may not be
// |kLockedContended| anymore. Note that even without spurious wakeups, the
// value of |state_| is not guaranteed when this returns, as another thread
// may get the lock before we get to run.
int err = syscall(SYS_futex, &state_, FUTEX_WAIT | FUTEX_PRIVATE_FLAG,
kLockedContended, nullptr, nullptr, 0);

if (err) {
// These are programming error, check them.
PA_DCHECK(errno != EACCES);
PA_DCHECK(errno != EINVAL);
}
errno = saved_errno;
}

void SpinningFutex::FutexWake() {
int saved_errno = errno;
long retval = syscall(SYS_futex, &state_, FUTEX_WAKE | FUTEX_PRIVATE_FLAG,
1 /* wake up a single waiter */, nullptr, nullptr, 0);
PA_CHECK(retval != -1);
errno = saved_errno;
}

void SpinningFutex::LockSlow() {
// If this thread gets awaken but another one got the lock first, then go back
// to sleeping. See comments in |FutexWait()| to see why a loop is required.
while (state_.exchange(kLockedContended, std::memory_order_acquire) !=
kUnlocked) {
FutexWait();
}
}

} // namespace internal
} // namespace base

#endif // defined(OS_LINUX) || defined(OS_ANDROID)
Loading

0 comments on commit d6ce9d7

Please sign in to comment.