Skip to content

Commit

Permalink
New in StaticContext returns at::DataPtr (pytorch#12029)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#12029

In order to remove New() function in StaticContext(to remove StaticContext) and converge to the Allocator design, we'll first change the return type of New to at::DataPtr.

Reviewed By: ezyang

Differential Revision: D9889990

fbshipit-source-id: 3257c763530b987025f428741bdd2e089d11bad4
  • Loading branch information
jerryzh168 authored and facebook-github-bot committed Oct 4, 2018
1 parent bcc2a05 commit 74dc446
Show file tree
Hide file tree
Showing 20 changed files with 179 additions and 132 deletions.
6 changes: 6 additions & 0 deletions aten/src/ATen/core/Allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,19 @@ class DataPtr {
void* release_context() {
return ptr_.release_context();
}
std::unique_ptr<void, DeleterFnPtr>&& move_context() {
return ptr_.move_context();
}
operator bool() const {
return static_cast<bool>(ptr_);
}
template <typename T>
T* cast_context(DeleterFnPtr expected_deleter) const {
return ptr_.cast_context<T>(expected_deleter);
}
DeleterFnPtr get_deleter() const {
return ptr_.get_deleter();
}
Device device() const {
return device_;
}
Expand Down
16 changes: 16 additions & 0 deletions aten/src/ATen/core/TensorImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,20 @@ const Storage& TensorImpl::storage() const {
return storage_;
}

static void deletePlacementDeleteContext(void* ptr) {
delete static_cast<PlacementDeleteContext*>(ptr);
}

at::DataPtr PlacementDeleteContext::makeDataPtr(
at::DataPtr&& data_ptr,
PlacementDtor placement_dtor,
size_t size,
at::Device device) {
auto* ptr = data_ptr.get();
return {ptr,
new PlacementDeleteContext(std::move(data_ptr), placement_dtor, size),
&deletePlacementDeleteContext,
device};
}

} // namespace at
71 changes: 47 additions & 24 deletions aten/src/ATen/core/TensorImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
#include <atomic>
#include <memory>

#include "ATen/core/Storage.h"
#include "ATen/core/optional.h"
#include "ATen/core/TensorTypeId.h"
#include "ATen/core/TensorTypeIdRegistration.h"
#include "ATen/core/LegacyTypeDispatch.h"
#include "ATen/core/Backend.h"
#include "ATen/core/context_base.h"
#include <ATen/core/Backend.h>
#include <ATen/core/LegacyTypeDispatch.h>
#include <ATen/core/Storage.h>
#include <ATen/core/TensorTypeId.h>
#include <ATen/core/TensorTypeIdRegistration.h>
#include <ATen/core/context_base.h>
#include <ATen/core/optional.h>

#include "caffe2/core/allocator.h"
#include "caffe2/core/common.h"
Expand Down Expand Up @@ -99,6 +99,39 @@ inline int canonical_axis_index_(int axis_index, int ndims) {
return axis_index;
}

using PlacementDtor = void (*)(void*, size_t);

/*
* A Context that will call extra placement deleter during
* deconstruction.
*
* Accept a already constructed DataPtr and store it as member
* during destruction, we'll call extra deleter on the underlying
* data pointer before the DataPtr is destructed.
* `data_ptr_` owns the memory.
*/
struct CAFFE2_API PlacementDeleteContext {
at::DataPtr data_ptr_;
PlacementDtor placement_dtor_;
size_t size_;
PlacementDeleteContext(
at::DataPtr&& data_ptr,
PlacementDtor placement_dtor,
size_t size)
: data_ptr_(std::move(data_ptr)),
placement_dtor_(placement_dtor),
size_(size) {}
static at::DataPtr makeDataPtr(
at::DataPtr&& data_ptr,
PlacementDtor placement_dtor,
size_t size,
at::Device device);
~PlacementDeleteContext() {
placement_dtor_(data_ptr_.get(), size_);
// original memory will be freed when data_ptr_ is destructed
}
};

/**
* The low-level representation of a tensor, which contains a storage
* (which contains the actual data) and metadata (e.g., sizes and strides)
Expand Down Expand Up @@ -734,29 +767,19 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
// destruction procedure.
auto size = numel_;
auto dtor = data_type_.dtor();
void* ptr;
at::DeleterFnPtr deleter;
auto ptr_and_deleter = GetStaticContext()->New(
auto data_ptr = GetStaticContext()->New(
numel_ * storage_.itemsize()); // Removing this can get rid of
// InefficientStdFunctionContext
ptr = ptr_and_deleter.first;
deleter = ptr_and_deleter.second;
storage_.set_data_ptr(at::InefficientStdFunctionContext::makeDataPtr(
ptr,
[size, dtor, deleter](void* local_ptr) -> void {
dtor(local_ptr, size);
deleter(local_ptr);
},
storage_.set_data_ptr(PlacementDeleteContext::makeDataPtr(
std::move(data_ptr),
dtor,
size,
at::Device(storage_.device_type())));
data_type_.ctor()(storage_.data(), numel_);
} else {
// For fundamental type, new and delete is easier.
auto ptr_and_deleter =
GetStaticContext()->New(numel_ * storage_.itemsize());
storage_.set_data_ptr(at::InefficientStdFunctionContext::makeDataPtr(
ptr_and_deleter.first,
ptr_and_deleter.second,
at::Device(storage_.device_type())));
storage_.set_data_ptr(
GetStaticContext()->New(numel_ * storage_.itemsize()));
}
storage_.set_numel(numel_);
AT_ASSERT(storage_offset_ == 0); // because we just reallocated
Expand Down
4 changes: 4 additions & 0 deletions aten/src/ATen/core/UniqueVoidPtr.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ class UniqueVoidPtr {
void* release_context() {
return ctx_.release();
}
std::unique_ptr<void, DeleterFnPtr>&& move_context() {
return std::move(ctx_);
}

template <typename T>
T* cast_context(DeleterFnPtr expected_deleter) const {
if (get_deleter() != expected_deleter)
Expand Down
3 changes: 2 additions & 1 deletion aten/src/ATen/core/context_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <unordered_map>

#include <ATen/core/ATenGeneral.h>
#include <ATen/core/Allocator.h>
#include <ATen/core/Device.h>
#include <ATen/core/Error.h>
#include <ATen/core/UniqueVoidPtr.h>
Expand All @@ -30,7 +31,7 @@ class CAFFE2_API BaseStaticContext {
public:
virtual ~BaseStaticContext() noexcept {}

virtual std::pair<void*, DeleterFnPtr> New(size_t nbytes) const = 0;
virtual at::DataPtr New(size_t nbytes) const = 0;

virtual DeviceType GetDeviceType() = 0;

Expand Down
5 changes: 2 additions & 3 deletions binaries/core_overhead_benchmark_gpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,8 @@ BENCHMARK(BM_OperatorCreationCUDA);
static void BM_RawAllocDeallocCPU(benchmark::State& state) {
while (state.KeepRunning()) {
// Allocating only 1 byte in order to measure the overhead.
auto ptr_and_deleter = GetCPUAllocator()->New(1);
// Deallocate.
ptr_and_deleter.second(ptr_and_deleter.first);
auto data_ptr = GetCPUAllocator()->allocate(1);
// Deallocated when it's out of scope
}
}
BENCHMARK(BM_RawAllocDeallocCPU);
Expand Down
9 changes: 5 additions & 4 deletions caffe2/core/allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,17 @@ namespace caffe2 {

void NoDelete(void*) {}

static std::unique_ptr<CPUAllocator> g_cpu_allocator(new DefaultCPUAllocator());
CPUAllocator* GetCPUAllocator() {
static std::unique_ptr<at::Allocator> g_cpu_allocator(
new DefaultCPUAllocator());
at::Allocator* GetCPUAllocator() {
return g_cpu_allocator.get();
}

void SetCPUAllocator(CPUAllocator* alloc) {
void SetCPUAllocator(at::Allocator* alloc) {
g_cpu_allocator.reset(alloc);
}

MemoryAllocationReporter CPUStaticContext::reporter_;
MemoryAllocationReporter DefaultCPUAllocator::reporter_;

void MemoryAllocationReporter::New(void* ptr, size_t nbytes) {
std::lock_guard<std::mutex> guard(mutex_);
Expand Down
30 changes: 23 additions & 7 deletions caffe2/core/allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <cstring>
#include <unordered_map>

#include <ATen/core/Allocator.h>
#include "caffe2/core/logging.h"
#include "caffe2/core/numa.h"

Expand Down Expand Up @@ -42,10 +43,10 @@ class CAFFE2_API MemoryAllocationReporter {
size_t allocated_;
};

struct CAFFE2_API DefaultCPUAllocator final : CPUAllocator {
struct CAFFE2_API DefaultCPUAllocator final : at::Allocator {
DefaultCPUAllocator() {}
~DefaultCPUAllocator() override {}
std::pair<void*, MemoryDeleter> New(size_t nbytes) override {
at::DataPtr allocate(size_t nbytes) const override {
void* data = nullptr;
#ifdef __ANDROID__
data = memalign(gCaffe2Alignment, nbytes);
Expand All @@ -60,7 +61,11 @@ struct CAFFE2_API DefaultCPUAllocator final : CPUAllocator {
if (FLAGS_caffe2_cpu_allocator_do_zero_fill) {
memset(data, 0, nbytes);
}
return {data, Delete};
if (FLAGS_caffe2_report_cpu_memory_usage) {
reporter_.New(data, nbytes);
return {data, data, &ReportAndDelete, at::Device(at::DeviceType::CPU)};
}
return {data, data, &Delete, at::Device(at::DeviceType::CPU)};
}

#ifdef _MSC_VER
Expand All @@ -73,16 +78,27 @@ struct CAFFE2_API DefaultCPUAllocator final : CPUAllocator {
}
#endif

MemoryDeleter GetDeleter() override {
return Delete;
static void ReportAndDelete(void* ptr) {
reporter_.Delete(ptr);
Delete(ptr);
}

at::DeleterFnPtr raw_deleter() const override {
if (FLAGS_caffe2_report_cpu_memory_usage) {
return &ReportAndDelete;
}
return &Delete;
}

protected:
static MemoryAllocationReporter reporter_;
};

// Get the CPU Alloctor.
CAFFE2_API CPUAllocator* GetCPUAllocator();
CAFFE2_API at::Allocator* GetCPUAllocator();
// Sets the CPU allocator to the given allocator: the caller gives away the
// ownership of the pointer.
CAFFE2_API void SetCPUAllocator(CPUAllocator* alloc);
CAFFE2_API void SetCPUAllocator(at::Allocator* alloc);

} // namespace caffe2

Expand Down
23 changes: 5 additions & 18 deletions caffe2/core/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
#include "caffe2/core/typeid.h"
#include "caffe2/proto/caffe2_pb.h"

#include "ATen/core/ATenCoreTest.h"
#include "ATen/core/ArrayRef.h"
#include <ATen/core/ATenCoreTest.h>
#include <ATen/core/ArrayRef.h>

CAFFE2_DECLARE_bool(caffe2_report_cpu_memory_usage);

Expand Down Expand Up @@ -85,7 +85,7 @@ class CAFFE2_API CPUContext final : public BaseContext {
return *random_generator_.get();
}

inline static std::pair<void*, MemoryDeleter> New(size_t nbytes) {
inline static at::DataPtr New(size_t nbytes) {
return StaticContext()->New(nbytes);
}

Expand Down Expand Up @@ -185,13 +185,8 @@ inline void CPUContext::CopyBytes<CPUContext, CPUContext>(
// TODO(jerryzh): merge CPUStaticContext with Allocator
class CAFFE2_API CPUStaticContext : public BaseStaticContext {
public:
std::pair<void*, MemoryDeleter> New(size_t nbytes) const override {
auto data_and_deleter = GetCPUAllocator()->New(nbytes);
if (FLAGS_caffe2_report_cpu_memory_usage) {
reporter_.New(data_and_deleter.first, nbytes);
data_and_deleter.second = ReportAndDelete;
}
return data_and_deleter;
at::DataPtr New(size_t nbytes) const override {
return GetCPUAllocator()->allocate(nbytes);
}

DeviceType GetDeviceType() override {
Expand All @@ -204,14 +199,6 @@ class CAFFE2_API CPUStaticContext : public BaseStaticContext {
device->set_device_type(TypeToProto(GetDeviceType()));
}

protected:
static MemoryAllocationReporter reporter_;

private:
static void ReportAndDelete(void* ptr) {
reporter_.Delete(ptr);
GetCPUAllocator()->GetDeleter()(ptr);
}
};

} // namespace caffe2
Expand Down
11 changes: 6 additions & 5 deletions caffe2/core/context_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,8 @@ void TrackMemoryAlloc(size_t nbytes) {
}
}

std::pair<void*, MemoryDeleter> CUDAStaticContext::New(size_t nbytes) const {
// TODO: wrap this function in DefaultCUDAAllocator
at::DataPtr CUDAStaticContext::New(size_t nbytes) const {
// Lock the mutex
std::lock_guard<std::mutex> lock(CUDAContext::mutex());
// A one-time caffe2 cuda initializer.
Expand All @@ -331,7 +332,7 @@ std::pair<void*, MemoryDeleter> CUDAStaticContext::New(size_t nbytes) const {
g_size_map[ptr] = nbytes;
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
}
return {ptr, Delete};
return {ptr, ptr, Delete, at::Device(CUDA)};
case CudaMemoryPoolType::CUB:
CUDA_ENFORCE(g_cub_allocator->DeviceAllocate(&ptr, nbytes));
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
Expand All @@ -340,16 +341,16 @@ std::pair<void*, MemoryDeleter> CUDAStaticContext::New(size_t nbytes) const {
if (FLAGS_caffe2_gpu_memory_tracking) {
g_size_map[ptr] = nbytes;
}
return {ptr, Delete};
return {ptr, ptr, Delete, at::Device(CUDA)};
case CudaMemoryPoolType::THC:
CUDA_ENFORCE(g_thc_allocator->Alloc(&ptr, nbytes, 0 /* stream */));
if (FLAGS_caffe2_gpu_memory_tracking) {
g_size_map[ptr] = nbytes;
g_cuda_device_affiliation[ptr] = CaffeCudaGetDevice();
}
return {ptr, Delete};
return {ptr, ptr, Delete, at::Device(CUDA)};
}
return {nullptr, Delete};
return {nullptr, nullptr, Delete, at::Device(CUDA)};
}

void CUDAStaticContext::Delete(void* ptr) {
Expand Down
Loading

0 comments on commit 74dc446

Please sign in to comment.