Skip to content

Commit

Permalink
[PartitionAlloc] Add compiler annotations to PA and blink allocators.
Browse files Browse the repository at this point in the history
There are several attributes that we can add to malloc()-like functions:
- malloc: tells the compiler that this is a malloc()-like function that is its
  return values are distinct and non-overlapping.
- assume_aligned: tells the compiler that the return value is aligned on a
  certain boundary.

It has practical consequences for aliasing analysis, but also to allow
the compiler to use aligned SIMD instructions (as we found out the hard
way in the past).

For instance, from the added unittest disassembly on Linux x86_64, release
build. Before, zeroing in the constructor was done with:

  f6ad45:	e8 f6 00 00 00       	callq  f6ae40 <_ZN4base8internal18PartitionAllocTest5AllocEm>
  f6ad4a:	49 89 c7             	mov    %rax,%r15
  f6ad4d:	0f 57 c0             	xorps  %xmm0,%xmm0
  f6ad50:	0f 11 00             	movups %xmm0,(%rax)  <-- Unaligned

And now it is done with:

  f6ad45:	e8 f6 00 00 00       	callq  f6ae40 <_ZN4base8internal18PartitionAllocTest5AllocEm>
  f6ad4a:	49 89 c7             	mov    %rax,%r15
  f6ad4d:	0f 57 c0             	xorps  %xmm0,%xmm0
  f6ad50:	0f 29 00             	movaps %xmm0,(%rax)  <-- Aligned

Change-Id: I89c6426c3da09c74eef2b4d7c06fba3398534746
Bug: 998048
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3059455
Reviewed-by: Kentaro Hara <haraken@chromium.org>
Commit-Queue: Benoit L <lizeb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#912985}
  • Loading branch information
Benoit Lize authored and Chromium LUCI CQ committed Aug 18, 2021
1 parent 17a9a44 commit f50de13
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 40 deletions.
22 changes: 1 addition & 21 deletions base/allocator/partition_allocator/partition_alloc_constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "base/allocator/buildflags.h"
#include "base/allocator/partition_allocator/page_allocator_constants.h"
#include "base/allocator/partition_allocator/partition_alloc_config.h"
#include "base/allocator/partition_allocator/partition_alloc_forward.h"
#include "build/build_config.h"

#if defined(OS_APPLE) && defined(ARCH_CPU_64_BITS)
Expand Down Expand Up @@ -241,27 +242,6 @@ DirectMapAllocationGranularityOffsetMask() {
return DirectMapAllocationGranularity() - 1;
}

// Alignment has two constraints:
// - Alignment requirement for scalar types: alignof(std::max_align_t)
// - Alignment requirement for operator new().
//
// The two are separate on Windows 64 bits, where the first one is 8 bytes, and
// the second one 16. We could technically return something different for
// malloc() and operator new(), but this would complicate things, and most of
// our allocations are presumably coming from operator new() anyway.
//
// __STDCPP_DEFAULT_NEW_ALIGNMENT__ is C++17. As such, it is not defined on all
// platforms, as Chrome's requirement is C++14 as of 2020.
#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__)
constexpr size_t kAlignment =
std::max(alignof(max_align_t), __STDCPP_DEFAULT_NEW_ALIGNMENT__);
#else
constexpr size_t kAlignment = alignof(max_align_t);
#endif
static_assert(kAlignment <= 16,
"PartitionAlloc doesn't support a fundamental alignment larger "
"than 16 bytes.");

// The "order" of an allocation is closely related to the power-of-1 size of the
// allocation. More precisely, the order is the bit index of the
// most-significant-bit in the allocation size, where the bit numbers starts at
Expand Down
57 changes: 57 additions & 0 deletions base/allocator/partition_allocator/partition_alloc_forward.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,37 @@
#ifndef BASE_ALLOCATOR_PARTITION_ALLOCATOR_PARTITION_ALLOC_FORWARD_H_
#define BASE_ALLOCATOR_PARTITION_ALLOCATOR_PARTITION_ALLOC_FORWARD_H_

#include <algorithm>
#include <cstddef>

#include "base/allocator/buildflags.h"
#include "base/base_export.h"
#include "base/compiler_specific.h"
#include "base/dcheck_is_on.h"

namespace base {

// Alignment has two constraints:
// - Alignment requirement for scalar types: alignof(std::max_align_t)
// - Alignment requirement for operator new().
//
// The two are separate on Windows 64 bits, where the first one is 8 bytes, and
// the second one 16. We could technically return something different for
// malloc() and operator new(), but this would complicate things, and most of
// our allocations are presumably coming from operator new() anyway.
//
// __STDCPP_DEFAULT_NEW_ALIGNMENT__ is C++17. As such, it is not defined on all
// platforms, as Chrome's requirement is C++14 as of 2020.
#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__)
constexpr size_t kAlignment =
std::max(alignof(max_align_t), __STDCPP_DEFAULT_NEW_ALIGNMENT__);
#else
constexpr size_t kAlignment = alignof(max_align_t);
#endif
static_assert(kAlignment <= 16,
"PartitionAlloc doesn't support a fundamental alignment larger "
"than 16 bytes.");

namespace internal {

template <bool thread_safe>
Expand All @@ -36,4 +61,36 @@ class PartitionStatsDumper;

} // namespace base

// From https://clang.llvm.org/docs/AttributeReference.html#malloc:
//
// The malloc attribute indicates that the function acts like a system memory
// allocation function, returning a pointer to allocated storage disjoint from
// the storage for any other object accessible to the caller.
//
// Note that it doesn't apply to realloc()-type functions, as they can return
// the same pointer as the one passed as a parameter, as noted in e.g. stdlib.h
// on Linux systems.
#if defined(__has_attribute)

#if __has_attribute(malloc)
#define MALLOC_FN __attribute__((malloc))
#endif

// Allows the compiler to assume that the return value is aligned on a
// kAlignment boundary. This is useful for e.g. using aligned vector
// instructions in the constructor for zeroing.
#if __has_attribute(assume_aligned)
#define MALLOC_ALIGNED __attribute__((assume_aligned(base::kAlignment)))
#endif

#endif // defined(__has_attribute)

#if !defined(MALLOC_FN)
#define MALLOC_FN
#endif

#if !defined(MALLOC_ALIGNED)
#define MALLOC_ALIGNED
#endif

#endif // BASE_ALLOCATOR_PARTITION_ALLOCATOR_PARTITION_ALLOC_FORWARD_H_
22 changes: 22 additions & 0 deletions base/allocator/partition_allocator/partition_alloc_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,12 @@ class PartitionAllocTest : public testing::Test {

void RunRefCountReallocSubtest(size_t orig_size, size_t new_size);

NOINLINE MALLOC_FN void* Alloc(size_t size) {
return allocator.root()->Alloc(size, "");
}

NOINLINE void Free(void* ptr) { allocator.root()->Free(ptr); }

base::test::ScopedFeatureList scoped_feature_list;
PartitionAllocator<base::internal::ThreadSafe> allocator;
PartitionAllocator<base::internal::ThreadSafe> aligned_allocator;
Expand Down Expand Up @@ -3526,6 +3532,22 @@ TEST_F(PartitionAllocTest, GetIndex) {
}
}

// Used to check alignment. If the compiler understands the annotations, the
// zeroing in the constructor uses aligned SIMD instructions.
TEST_F(PartitionAllocTest, MallocFunctionAnnotations) {
struct TestStruct {
uint64_t a = 0;
uint64_t b = 0;
};

void* buffer = Alloc(sizeof(TestStruct));
// Should use "mov*a*ps" on x86_64.
auto* x = new (buffer) TestStruct();

EXPECT_EQ(x->a, 0u);
Free(buffer);
}

} // namespace internal
} // namespace base

Expand Down
36 changes: 21 additions & 15 deletions base/allocator/partition_allocator/partition_root.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,20 +305,23 @@ struct BASE_EXPORT PartitionRoot {
size_t alignment,
size_t requested_size);

ALWAYS_INLINE void* Alloc(size_t requested_size, const char* type_name);
ALWAYS_INLINE void* AllocFlags(int flags,
size_t requested_size,
const char* type_name);
ALWAYS_INLINE MALLOC_FN void* Alloc(size_t requested_size,
const char* type_name) MALLOC_ALIGNED;
ALWAYS_INLINE MALLOC_FN void* AllocFlags(int flags,
size_t requested_size,
const char* type_name)
MALLOC_ALIGNED;
// Same as |AllocFlags()|, but allows specifying |slot_span_alignment|. It has
// to be a multiple of partition page size, greater than 0 and no greater than
// kMaxSupportedAlignment. If it equals exactly 1 partition page, no special
// action is taken as PartitoinAlloc naturally guarantees this alignment,
// otherwise a sub-optimial allocation strategy is used to guarantee the
// higher-order alignment.
ALWAYS_INLINE void* AllocFlagsInternal(int flags,
size_t requested_size,
size_t slot_span_alignment,
const char* type_name);
ALWAYS_INLINE MALLOC_FN void* AllocFlagsInternal(int flags,
size_t requested_size,
size_t slot_span_alignment,
const char* type_name)
MALLOC_ALIGNED;
// Same as |AllocFlags()|, but bypasses the allocator hooks.
//
// This is separate from AllocFlags() because other callers of AllocFlags()
Expand All @@ -328,20 +331,23 @@ struct BASE_EXPORT PartitionRoot {
// taking the extra branch in the non-malloc() case doesn't hurt. In addition,
// for the malloc() case, the compiler correctly removes the branch, since
// this is marked |ALWAYS_INLINE|.
ALWAYS_INLINE void* AllocFlagsNoHooks(int flags,
size_t requested_size,
size_t slot_span_alignment);

ALWAYS_INLINE void* Realloc(void* ptr, size_t newize, const char* type_name);
ALWAYS_INLINE MALLOC_FN void* AllocFlagsNoHooks(int flags,
size_t requested_size,
size_t slot_span_alignment)
MALLOC_ALIGNED;

ALWAYS_INLINE void* Realloc(void* ptr,
size_t newize,
const char* type_name) MALLOC_ALIGNED;
// Overload that may return nullptr if reallocation isn't possible. In this
// case, |ptr| remains valid.
ALWAYS_INLINE void* TryRealloc(void* ptr,
size_t new_size,
const char* type_name);
const char* type_name) MALLOC_ALIGNED;
NOINLINE void* ReallocFlags(int flags,
void* ptr,
size_t new_size,
const char* type_name);
const char* type_name) MALLOC_ALIGNED;
ALWAYS_INLINE static void Free(void* ptr);
// Same as |Free()|, bypasses the allocator hooks.
ALWAYS_INLINE static void FreeNoHooks(void* ptr);
Expand Down
10 changes: 10 additions & 0 deletions third_party/blink/renderer/platform/wtf/allocator/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,21 @@

#include <atomic>

#include "base/allocator/buildflags.h"
#include "base/check_op.h"
#include "build/build_config.h"
#include "third_party/blink/renderer/platform/wtf/allocator/partitions.h"
#include "third_party/blink/renderer/platform/wtf/type_traits.h"

#if BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)
// FastMalloc() defers to malloc() in this case, and including its header
// ensures that the compiler knows that malloc() is "special", e.g. that it
// returns properly-aligned, distinct memory locations.
#include <malloc.h>
#else
#include "base/allocator/partition_allocator/partition_alloc.h"
#endif // BUILDFLAG(USE_PARTITION_ALLOC_AS_MALLOC)

namespace WTF {

namespace internal {
Expand Down
13 changes: 9 additions & 4 deletions third_party/blink/renderer/platform/wtf/allocator/partitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,18 @@ class WTF_EXPORT Partitions {

static void DumpMemoryStats(bool is_light_dump, base::PartitionStatsDumper*);

static void* BufferMalloc(size_t n, const char* type_name);
static void* BufferTryRealloc(void* p, size_t n, const char* type_name);
static void* MALLOC_FN BufferMalloc(size_t n,
const char* type_name) MALLOC_ALIGNED;
static void* BufferTryRealloc(void* p,
size_t n,
const char* type_name) MALLOC_ALIGNED;
static void BufferFree(void* p);
static size_t BufferPotentialCapacity(size_t n);

static void* FastMalloc(size_t n, const char* type_name);
static void* FastZeroedMalloc(size_t n, const char* type_name);
static void* MALLOC_FN FastMalloc(size_t n,
const char* type_name) MALLOC_ALIGNED;
static void* MALLOC_FN FastZeroedMalloc(size_t n,
const char* type_name) MALLOC_ALIGNED;
static void FastFree(void* p);

static void HandleOutOfMemory(size_t size);
Expand Down

0 comments on commit f50de13

Please sign in to comment.