From 4e743601e361422547ca2066cc016b7855f580b8 Mon Sep 17 00:00:00 2001 From: Pavel Voropaev <8759144+voropz@users.noreply.github.com> Date: Sat, 6 Apr 2024 13:18:39 +0200 Subject: [PATCH] Dont cache fc ptrs Signed-off-by: Pavel Voropaev <8759144+voropz@users.noreply.github.com> --- .../NeoML/Dnn/Layers/TransformerLayer.h | 4 +-- NeoML/src/Dnn/Layers/TransformerLayer.cpp | 27 ++++++++++--------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/NeoML/include/NeoML/Dnn/Layers/TransformerLayer.h b/NeoML/include/NeoML/Dnn/Layers/TransformerLayer.h index 2160c5ea9..f6b93778c 100644 --- a/NeoML/include/NeoML/Dnn/Layers/TransformerLayer.h +++ b/NeoML/include/NeoML/Dnn/Layers/TransformerLayer.h @@ -144,7 +144,7 @@ class NEOML_API CTransformerEncoderLayer : public CCompositeLayer { void SetSelfAttentionDropoutRate( float rate ){ selfAttention->SetDropoutRate( rate ); } // Sets the size of the first fully-connected layer inside of feed-forward - int GetFeedForwardSize() const { return CheckCast( fc1 )->GetNumberOfElements(); } + int GetFeedForwardSize() const; void SetFeedForwardSize( int size ); // Sets activation between fully-connected layers inside of feed-forward @@ -168,9 +168,7 @@ class NEOML_API CTransformerEncoderLayer : public CCompositeLayer { CPtr selfAttention; CPtr dropoutSelfAttention; CPtr selfAttentionSum; - CPtr fc1; CPtr dropoutFc1; - CPtr fc2; CPtr dropoutFc2; CPtr feedForwardSum; diff --git a/NeoML/src/Dnn/Layers/TransformerLayer.cpp b/NeoML/src/Dnn/Layers/TransformerLayer.cpp index 881f15a14..149892ede 100644 --- a/NeoML/src/Dnn/Layers/TransformerLayer.cpp +++ b/NeoML/src/Dnn/Layers/TransformerLayer.cpp @@ -80,9 +80,7 @@ void CTransformerEncoderLayer::Serialize( CArchive& archive ) selfAttention = CheckCast( GetLayer( selfAttentionName ) ); dropoutSelfAttention = getOptionalDropout( *this, dropoutSelfAttentionName ); selfAttentionSum = CheckCast( GetLayer( selfAttentionSumName ) ); - fc1 = GetLayer( fc1Name ); dropoutFc1 = getOptionalDropout( *this, dropoutFc1Name ); - fc2 = GetLayer( fc2Name ); dropoutFc2 = getOptionalDropout( *this, dropoutFc2Name ); feedForwardSum = CheckCast( GetLayer( feedForwardSumName ) ); if( version == 1 ) { @@ -144,11 +142,16 @@ void CTransformerEncoderLayer::SetDropoutRate( float rate ) } } +int CTransformerEncoderLayer::GetFeedForwardSize() const +{ + return CheckCast( GetLayer( fc1Name ) )->GetNumberOfElements(); +} + void CTransformerEncoderLayer::SetFeedForwardSize( int size ) { NeoAssert( size > 0 ); - CheckCast( fc1 )->SetNumberOfElements( size ); + CheckCast( GetLayer( fc1Name ) )->SetNumberOfElements( size ); ForceReshape(); NeoPresume( GetFeedForwardSize() == size ); @@ -161,9 +164,9 @@ void CTransformerEncoderLayer::SetActivation( const CActivationDesc& param ) DeleteLayer( activationName ); CPtr activation = CreateActivationLayer( MathEngine(), param ); activation->SetName( activationName ); - activation->Connect( *fc1 ); + activation->Connect( fc1Name ); if( dropoutFc1 == nullptr ) { - fc2->Connect( *activation ); + GetLayer( fc2Name )->Connect( *activation ); } else { dropoutFc1->Connect( *activation ); } @@ -206,7 +209,7 @@ void CTransformerEncoderLayer::Reshape() if( selfAttention->GetOutputSize() != inputDescs[0].Channels() ) { selfAttention->SetOutputSize( inputDescs[0].Channels() ); } - CFullyConnectedLayer* fc2Ptr = dynamic_cast( fc2.Ptr() ); + auto* fc2Ptr = dynamic_cast( GetLayer( fc2Name ).Ptr() ); if( fc2Ptr != nullptr && fc2Ptr->GetNumberOfElements() != inputDescs[0].Channels() ) { fc2Ptr->SetNumberOfElements( inputDescs[0].Channels() ); } @@ -243,7 +246,7 @@ void CTransformerEncoderLayer::buildLayer() AddLayer( *selfAttentionNorm ); // First fully-connected of feed-forward - fc1 = FINE_DEBUG_NEW CFullyConnectedLayer( MathEngine() ); + CPtr fc1 = FINE_DEBUG_NEW CFullyConnectedLayer( MathEngine() ); fc1->SetName( fc1Name ); CheckCast( fc1 )->SetNumberOfElements( 1 ); AddLayer( *fc1 ); @@ -254,7 +257,7 @@ void CTransformerEncoderLayer::buildLayer() AddLayer( *activation ); // Second fully-connected of feed-forward - fc2 = FINE_DEBUG_NEW CFullyConnectedLayer( MathEngine() ); + CPtr fc2 = FINE_DEBUG_NEW CFullyConnectedLayer( MathEngine() ); fc2->SetName( fc2Name ); CheckCast( fc2 )->SetNumberOfElements( 1 ); AddLayer( *fc2 ); @@ -321,12 +324,12 @@ void CTransformerEncoderLayer::addDropoutLayers() dropoutFc1 = FINE_DEBUG_NEW CDropoutLayer( MathEngine() ); dropoutFc1->SetName( dropoutFc1Name ); dropoutFc1->Connect( activationName ); - fc2->Connect( *dropoutFc1 ); + GetLayer( fc2Name )->Connect( *dropoutFc1 ); AddLayer( *dropoutFc1 ); dropoutFc2 = FINE_DEBUG_NEW CDropoutLayer( MathEngine() ); dropoutFc2->SetName( dropoutFc2Name ); - dropoutFc2->Connect( *fc2 ); + dropoutFc2->Connect( fc2Name ); feedForwardSum->Connect( *dropoutFc2 ); AddLayer( *dropoutFc2 ); @@ -351,11 +354,11 @@ void CTransformerEncoderLayer::removeDropoutLayers() DeleteLayer( *dropoutFc1 ); dropoutFc1 = nullptr; - fc2->Connect( activationName ); + GetLayer( fc2Name )->Connect( activationName ); DeleteLayer( *dropoutFc2 ); dropoutFc2 = nullptr; - feedForwardSum->Connect( *fc2 ); + feedForwardSum->Connect( fc2Name ); NeoPresume( dropoutSelfAttention == nullptr && !HasLayer( dropoutSelfAttentionName ) ); NeoPresume( dropoutFc1 == nullptr && !HasLayer( dropoutFc1Name ) );