Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NeoMathEngine] GetReuseMemoryMode and GetCurrentMemoryUsage #1065

Merged
merged 4 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion NeoML/include/NeoML/Dnn/Dnn.h
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ class NEOML_API CBaseLayer : public virtual IObject {
void link();
void addOutput(int number);
void unlink();
void cleanUp( bool total, bool unlink );
void cleanUp( bool total, bool linked );
void buildOrder();
void reshape();
void setInputDesc(int i);
Expand Down
19 changes: 11 additions & 8 deletions NeoML/test/src/DnnBlobTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,15 @@ using namespace NeoMLTest;

TEST( CDnnBlobTest, InitWindowBlob )
{
CPtr<CDnnBlob> parent = CDnnBlob::CreateDataBlob( MathEngine(), CT_Float, 16, 1, 1 );
CPtr<CDnnBlob> blob = CDnnBlob::CreateWindowBlob( parent );
MathEngine().CleanUp();
{
CPtr<CDnnBlob> parent = CDnnBlob::CreateDataBlob( MathEngine(), CT_Float, 16, 1, 1 );
CPtr<CDnnBlob> window = CDnnBlob::CreateWindowBlob( parent );

EXPECT_FALSE( blob->GetData().IsNull() );
EXPECT_TRUE( window->GetData().IsNull() == false );
}
EXPECT_TRUE( MathEngine().GetPeakMemoryUsage() == 16 * sizeof( float ) );
EXPECT_TRUE( MathEngine().GetCurrentMemoryUsage() == 0 );
}

TEST( CDnnBlobTest, BufferTest )
Expand Down Expand Up @@ -57,21 +62,19 @@ TEST( CDnnBlobTest, BufferMemoryThresholdTest )
MathEngine().SetThreadBufferMemoryThreshold( threshold );
}

MathEngine().ResetPeakMemoryUsage();
const size_t peakMemory = MathEngine().GetPeakMemoryUsage();
const size_t peakMemory = MathEngine().GetCurrentMemoryUsage();
const size_t reusedMemory = ( init ? 0 : threshold );
{
CPtr<CDnnBlob> blob1 = CDnnBlob::CreateDataBlob( MathEngine(), CT_Float, int(threshold / sizeof( float )), 1, 1 );
ASSERT_TRUE( blob1 != nullptr && !blob1->GetData().IsNull() );
CPtr<CDnnBlob> blob2 = CDnnBlob::CreateDataBlob( MathEngine(), CT_Float, int( threshold / sizeof( float ) + 1), 1, 1 );
ASSERT_TRUE( blob2 != nullptr && !blob2->GetData().IsNull() );
EXPECT_EQ( MathEngine().GetPeakMemoryUsage(), peakMemory + threshold - reusedMemory + threshold + sizeof( float ) );
EXPECT_EQ( MathEngine().GetCurrentMemoryUsage(), peakMemory + threshold - reusedMemory + threshold + sizeof( float ) );
}
const size_t memoryInPools = MathEngine().GetMemoryInPools() - reusedMemory;
EXPECT_EQ( memoryInPools, threshold - reusedMemory );

MathEngine().ResetPeakMemoryUsage();
EXPECT_EQ( MathEngine().GetPeakMemoryUsage() - memoryInPools, peakMemory );
EXPECT_EQ( MathEngine().GetCurrentMemoryUsage() - memoryInPools, peakMemory );
sumMemoryInPools += memoryInPools;
};

Expand Down
8 changes: 6 additions & 2 deletions NeoMathEngine/include/NeoMathEngine/NeoMathEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -1164,10 +1164,13 @@ class NEOMATHENGINE_API IMathEngine : public IDnnEngine {
// Turns on and off the memory reuse mode
// In this mode, the allocated memory blocks will not be deleted on HeapFree() and may be used until CleanUp()
virtual void SetReuseMemoryMode( bool enable ) = 0;
virtual bool GetReuseMemoryMode() const = 0;
// Specialize the size threshold in bytes for the current thread, so
// memory blocks of a size <= this threshold would be allocated in buffers if 'reuse' mode enabled
// memory blocks of a size > this threshold would be allocated in raw RAM memory (malloc/free)
virtual void SetThreadBufferMemoryThreshold( size_t threshold ) = 0;
// Get the memory blocks' sizes threshold for this thread
virtual size_t GetThreadBufferMemoryThreshold() const = 0;

virtual CMemoryHandle HeapAlloc( size_t count ) = 0;
virtual void HeapFree( const CMemoryHandle& handle ) = 0;
Expand All @@ -1194,7 +1197,8 @@ class NEOMATHENGINE_API IMathEngine : public IDnnEngine {
virtual size_t GetPeakMemoryUsage() const = 0;
// Reset the peak memory counter to the current memory usage value
virtual void ResetPeakMemoryUsage() = 0;

// The current memory usage size
virtual size_t GetCurrentMemoryUsage() const = 0;
// The current size of memory in the pools
virtual size_t GetMemoryInPools() const = 0;

Expand Down Expand Up @@ -1227,7 +1231,7 @@ class NEOMATHENGINE_API IMathEngine : public IDnnEngine {
virtual void AllReduce( const CFloatHandle& handle, int size ) = 0;
virtual void Broadcast( const CFloatHandle& handle, int size, int root ) = 0;
virtual void AbortDistributed() {};
virtual bool IsDistributed() { return false; }
virtual bool IsDistributed() const { return false; }
};

//------------------------------------------------------------------------------------------------------------
Expand Down
22 changes: 22 additions & 0 deletions NeoMathEngine/src/CPU/CpuMathEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,28 @@ void CCpuMathEngine::SetReuseMemoryMode( bool enable )
memoryPool->SetReuseMemoryMode( enable );
}

bool CCpuMathEngine::GetReuseMemoryMode() const
{
// Distributed CPU math engine always uses memory pools
if( IsDistributed() ) {
return true;
}
std::lock_guard<std::mutex> lock( mutex );
return memoryPool->GetReuseMemoryMode();
}

void CCpuMathEngine::SetThreadBufferMemoryThreshold( size_t threshold )
{
std::lock_guard<std::mutex> lock( mutex );
memoryPool->SetThreadBufferMemoryThreshold( threshold );
}

size_t CCpuMathEngine::GetThreadBufferMemoryThreshold() const
{
std::lock_guard<std::mutex> lock( mutex );
return memoryPool->GetThreadBufferMemoryThreshold();
}

CMemoryHandle CCpuMathEngine::HeapAlloc( size_t size )
{
std::lock_guard<std::mutex> lock( mutex );
Expand Down Expand Up @@ -163,6 +179,12 @@ void CCpuMathEngine::ResetPeakMemoryUsage()
memoryPool->ResetPeakMemoryUsage();
}

size_t CCpuMathEngine::GetCurrentMemoryUsage() const
{
std::lock_guard<std::mutex> lock( mutex );
return memoryPool->GetCurrentMemoryUsage();
}

size_t CCpuMathEngine::GetMemoryInPools() const
{
std::lock_guard<std::mutex> lock( mutex );
Expand Down
6 changes: 5 additions & 1 deletion NeoMathEngine/src/CPU/CpuMathEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ class CCpuMathEngine : public IMathEngine, public IRawMemoryManager {
// IMathEngine interface methods
TMathEngineType GetType() const override { return MET_Cpu; }
void SetReuseMemoryMode( bool enabled ) override;
bool GetReuseMemoryMode() const override;
void SetThreadBufferMemoryThreshold( size_t threshold ) override;
size_t GetThreadBufferMemoryThreshold() const override;
CMemoryHandle HeapAlloc( size_t count ) override;
void HeapFree( const CMemoryHandle& handle ) override;
void TransferHandleToThisThread( const CMemoryHandle& handle, size_t size ) override;
Expand All @@ -54,6 +56,7 @@ class CCpuMathEngine : public IMathEngine, public IRawMemoryManager {
size_t GetFreeMemorySize() const override;
size_t GetPeakMemoryUsage() const override;
void ResetPeakMemoryUsage() override;
size_t GetCurrentMemoryUsage() const override;
size_t GetMemoryInPools() const override;
void CleanUp() override;
void* GetBuffer( const CMemoryHandle& handle, size_t pos, size_t size, bool exchange ) override;
Expand Down Expand Up @@ -627,7 +630,8 @@ class CCpuMathEngine : public IMathEngine, public IRawMemoryManager {
void Broadcast( const CFloatHandle& handle, int size, int root ) override;
void AbortDistributed() override;
CMathEngineDistributedInfo GetDistributedInfo() override { return distributedInfo; }
bool IsDistributed() override { return distributedInfo.Threads > 1; }
bool IsDistributed() const override { return distributedInfo.Threads > 1; }

protected:
// IRawMemoryManager interface methods
CMemoryHandle Alloc( size_t size ) override;
Expand Down
18 changes: 18 additions & 0 deletions NeoMathEngine/src/GPU/CUDA/CudaMathEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ void CCudaMathEngine::ResetPeakMemoryUsage()
memoryPool->ResetPeakMemoryUsage();
}

size_t CCudaMathEngine::GetCurrentMemoryUsage() const
{
std::lock_guard<std::mutex> lock( mutex );
return memoryPool->GetCurrentMemoryUsage();
}

size_t CCudaMathEngine::GetMemoryInPools() const
{
std::lock_guard<std::mutex> lock( mutex );
Expand All @@ -74,12 +80,24 @@ void CCudaMathEngine::SetReuseMemoryMode( bool )
// Always true, because allocation is sync
}

bool CCudaMathEngine::GetReuseMemoryMode() const
{
// Always true, because allocation is sync
return true;
}

void CCudaMathEngine::SetThreadBufferMemoryThreshold( size_t threshold )
{
std::lock_guard<std::mutex> lock( mutex );
memoryPool->SetThreadBufferMemoryThreshold( threshold );
}

size_t CCudaMathEngine::GetThreadBufferMemoryThreshold() const
{
std::lock_guard<std::mutex> lock( mutex );
return memoryPool->GetThreadBufferMemoryThreshold();
}

CMemoryHandle CCudaMathEngine::HeapAlloc( size_t size )
{
std::lock_guard<std::mutex> lock( mutex );
Expand Down
7 changes: 6 additions & 1 deletion NeoMathEngine/src/GPU/CUDA/CudaMathEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ class CCudaMathEngine : public IMathEngine, public IRawMemoryManager {
TMathEngineType GetType() const override { return MET_Cuda; }
void GetMathEngineInfo( CMathEngineInfo& info ) const override;
void SetReuseMemoryMode( bool enable ) override;
bool GetReuseMemoryMode() const override;
void SetThreadBufferMemoryThreshold( size_t threshold ) override;
size_t GetThreadBufferMemoryThreshold() const override;
CMemoryHandle HeapAlloc( size_t count ) override;
void HeapFree( const CMemoryHandle& handle ) override;
void TransferHandleToThisThread( const CMemoryHandle& handle, size_t size ) override;
Expand All @@ -61,6 +63,7 @@ class CCudaMathEngine : public IMathEngine, public IRawMemoryManager {
size_t GetFreeMemorySize() const override;
size_t GetPeakMemoryUsage() const override;
void ResetPeakMemoryUsage() override;
size_t GetCurrentMemoryUsage() const override;
size_t GetMemoryInPools() const override;
void CleanUp() override;
void* GetBuffer( const CMemoryHandle& handle, size_t pos, size_t size, bool exchange ) override;
Expand Down Expand Up @@ -629,15 +632,17 @@ class CCudaMathEngine : public IMathEngine, public IRawMemoryManager {
const CFloatHandle& input, const CFloatHandle& output ) override;

IPerformanceCounters* CreatePerformanceCounters( bool ) const override { return new CPerformanceCountersDefault(); }
// For Distributed only
void AllReduce( const CFloatHandle& handle, int size ) override;
void Broadcast( const CFloatHandle& handle, int size, int root ) override;
void AbortDistributed() override;
CMathEngineDistributedInfo GetDistributedInfo() override { return distributedInfo; }
bool IsDistributed() override { return distributedInfo.Threads > 1; }
bool IsDistributed() const override { return distributedInfo.Threads > 1; }
#ifdef NEOML_USE_NCCL
void SetDistributedCommunicator( const ncclUniqueId& uniqueId, const CMathEngineDistributedInfo& info,
std::shared_ptr<std::atomic<bool>> isAbort );
#endif

protected:
// IRawMemoryManager interface methods
CMemoryHandle Alloc( size_t size ) override;
Expand Down
3 changes: 3 additions & 0 deletions NeoMathEngine/src/GPU/Metal/MetalMathEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ class CMetalMathEngine : public IMathEngine, public IRawMemoryManager {
// IMathEngine interface methods
TMathEngineType GetType() const override { return MET_Metal; }
void SetReuseMemoryMode( bool enable ) override;
bool GetReuseMemoryMode() const override;
void SetThreadBufferMemoryThreshold( size_t threshold ) override;
size_t GetThreadBufferMemoryThreshold() const override;
CMemoryHandle HeapAlloc( size_t count ) override;
void HeapFree( const CMemoryHandle& handle ) override;
void TransferHandleToThisThread( const CMemoryHandle& /*handle*/, size_t /*size*/ ) override { ASSERT_EXPR( false ); }
Expand All @@ -56,6 +58,7 @@ class CMetalMathEngine : public IMathEngine, public IRawMemoryManager {
size_t GetFreeMemorySize() const override;
size_t GetPeakMemoryUsage() const override;
void ResetPeakMemoryUsage() override;
size_t GetCurrentMemoryUsage() const override;
size_t GetMemoryInPools() const override;
void CleanUp() override;
void* GetBuffer( const CMemoryHandle& handle, size_t pos, size_t size, bool exchange ) override;
Expand Down
34 changes: 26 additions & 8 deletions NeoMathEngine/src/GPU/Metal/MetalMathEngine.mm
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,28 @@ bool LoadMetalEngineInfo( CMathEngineInfo& info )

void CMetalMathEngine::SetReuseMemoryMode( bool enable )
{
std::lock_guard<std::mutex> lock( *mutex );
std::lock_guard<CMutex> lock( *mutex );
memoryPool->SetReuseMemoryMode( enable );
}

void CVulkanMathEngine::SetThreadBufferMemoryThreshold( size_t threshold )
bool CMetalMathEngine::GetReuseMemoryMode() const
{
std::lock_guard<CMutex> lock( *mutex );
return memoryPool->GetReuseMemoryMode();
}

void CMetalMathEngine::SetThreadBufferMemoryThreshold( size_t threshold )
{
std::lock_guard<std::mutex> lock( *mutex );
std::lock_guard<CMutex> lock( *mutex );
memoryPool->SetThreadBufferMemoryThreshold( threshold );
}

size_t CMetalMathEngine::GetThreadBufferMemoryThreshold() const
{
std::lock_guard<CMutex> lock( *mutex );
return memoryPool->GetThreadBufferMemoryThreshold();
}

CMemoryHandle CMetalMathEngine::HeapAlloc( size_t size )
{
std::lock_guard<CMutex> lock( *mutex );
Expand All @@ -118,7 +130,7 @@ bool LoadMetalEngineInfo( CMathEngineInfo& info )

CMemoryHandle CMetalMathEngine::StackAlloc( size_t size )
{
std::lock_guard<std::mutex> lock( *mutex );
std::lock_guard<CMutex> lock( *mutex );
CMemoryHandle result = deviceStackAllocator->Alloc( size );
if( result.IsNull() ) {
THROW_MEMORY_EXCEPTION;
Expand All @@ -128,7 +140,7 @@ bool LoadMetalEngineInfo( CMathEngineInfo& info )

void CMetalMathEngine::StackFree( const CMemoryHandle& ptr )
{
std::lock_guard<std::mutex> lock( *mutex );
std::lock_guard<CMutex> lock( *mutex );
deviceStackAllocator->Free( ptr );
}

Expand All @@ -140,19 +152,25 @@ bool LoadMetalEngineInfo( CMathEngineInfo& info )

size_t CMetalMathEngine::GetPeakMemoryUsage() const
{
std::lock_guard<std::mutex> lock( *mutex );
std::lock_guard<CMutex> lock( *mutex );
return memoryPool->GetPeakMemoryUsage();
}

void CMetalMathEngine::ResetPeakMemoryUsage()
{
std::lock_guard<std::mutex> lock( *mutex );
std::lock_guard<CMutex> lock( *mutex );
memoryPool->ResetPeakMemoryUsage();
}

size_t CMetalMathEngine::GetCurrentMemoryUsage() const
{
std::lock_guard<CMutex> lock( *mutex );
return memoryPool->GetCurrentMemoryUsage();
}

size_t CMetalMathEngine::GetMemoryInPools() const
{
std::lock_guard<std::mutex> lock( *mutex );
std::lock_guard<CMutex> lock( *mutex );
return memoryPool->GetMemoryInPools();
}

Expand Down
18 changes: 18 additions & 0 deletions NeoMathEngine/src/GPU/Vulkan/VulkanMathEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,24 @@ void CVulkanMathEngine::SetReuseMemoryMode( bool enable )
memoryPool->SetReuseMemoryMode( enable );
}

bool CVulkanMathEngine::GetReuseMemoryMode() const
{
std::lock_guard<std::mutex> lock( mutex );
return memoryPool->GetReuseMemoryMode();
}

void CVulkanMathEngine::SetThreadBufferMemoryThreshold( size_t threshold )
{
std::lock_guard<std::mutex> lock( mutex );
memoryPool->SetThreadBufferMemoryThreshold( threshold );
}

size_t CVulkanMathEngine::GetThreadBufferMemoryThreshold() const
{
std::lock_guard<std::mutex> lock( mutex );
return memoryPool->GetThreadBufferMemoryThreshold();
}

CMemoryHandle CVulkanMathEngine::HeapAlloc( size_t size )
{
std::lock_guard<std::mutex> lock( mutex );
Expand Down Expand Up @@ -156,6 +168,12 @@ void CVulkanMathEngine::ResetPeakMemoryUsage()
memoryPool->ResetPeakMemoryUsage();
}

size_t CVulkanMathEngine::GetCurrentMemoryUsage() const
{
std::lock_guard<std::mutex> lock( mutex );
return memoryPool->GetCurrentMemoryUsage();
}

size_t CVulkanMathEngine::GetMemoryInPools() const
{
std::lock_guard<std::mutex> lock( mutex );
Expand Down
4 changes: 4 additions & 0 deletions NeoMathEngine/src/GPU/Vulkan/VulkanMathEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ class CVulkanMathEngine : public IMathEngine, public IRawMemoryManager {
// IMathEngine interface methods
TMathEngineType GetType() const override { return MET_Vulkan; }
void SetReuseMemoryMode( bool enable ) override;
bool GetReuseMemoryMode() const override;
void SetThreadBufferMemoryThreshold( size_t threshold ) override;
size_t GetThreadBufferMemoryThreshold() const override;
CMemoryHandle HeapAlloc( size_t count ) override;
void HeapFree( const CMemoryHandle& handle ) override;
void TransferHandleToThisThread( const CMemoryHandle& /*handle*/, size_t /*size*/ ) override { ASSERT_EXPR( false ); }
Expand All @@ -68,6 +70,7 @@ class CVulkanMathEngine : public IMathEngine, public IRawMemoryManager {
size_t GetFreeMemorySize() const override;
size_t GetPeakMemoryUsage() const override;
void ResetPeakMemoryUsage() override;
size_t GetCurrentMemoryUsage() const override;
size_t GetMemoryInPools() const override;
void CleanUp() override;
void* GetBuffer( const CMemoryHandle& handle, size_t pos, size_t size, bool exchange ) override;
Expand Down Expand Up @@ -642,6 +645,7 @@ class CVulkanMathEngine : public IMathEngine, public IRawMemoryManager {
const CFloatHandle& ) override { ASSERT_EXPR( false ); }

IPerformanceCounters* CreatePerformanceCounters( bool ) const override { return new CPerformanceCountersDefault(); }
// For Distributed only
void AllReduce( const CFloatHandle& /*handle*/, int /*size*/ ) override {};
void Broadcast( const CFloatHandle& /*handle*/, int /*size*/, int /*root*/ ) override {};

Expand Down
Loading