diff --git a/NeoML/include/NeoML/Dnn/Layers/ActivationLayers.h b/NeoML/include/NeoML/Dnn/Layers/ActivationLayers.h index a779d57e8..3d2bc7fa0 100644 --- a/NeoML/include/NeoML/Dnn/Layers/ActivationLayers.h +++ b/NeoML/include/NeoML/Dnn/Layers/ActivationLayers.h @@ -1,4 +1,4 @@ -/* Copyright © 2017-2023 ABBYY +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ class CActivationDesc; class NEOML_API CLinearLayer : public CBaseInPlaceLayer, public IActivationLayer { NEOML_DNN_LAYER( CLinearLayer ) public: + enum TParam { TP_Multiplier, TP_FreeTerm, /*...*/ TP_Count }; using CParam = CLinearActivationParam; static constexpr float DefaultMultiplier = CParam::DefaultMultiplier; static constexpr float DefaultFreeTerm = CParam::DefaultFreeTerm; @@ -37,14 +38,15 @@ class NEOML_API CLinearLayer : public CBaseInPlaceLayer, public IActivationLayer void Serialize( CArchive& archive ) override; float GetMultiplier() const { return multiplier; } - void SetMultiplier( float _multiplier ) { multiplier = _multiplier; } + void SetMultiplier( float _multiplier ) { multiplier = _multiplier; ForceReshape(); } float GetFreeTerm() const { return freeTerm; } - void SetFreeTerm( float _freeTerm ) { freeTerm = _freeTerm; } + void SetFreeTerm( float _freeTerm ) { freeTerm = _freeTerm; ForceReshape(); } void ApplyParam( CParam param ) { SetMultiplier( param.Multiplier ); SetFreeTerm( param.FreeTerm ); } CActivationDesc GetDesc() const override; protected: + void OnReshaped() override; void RunOnce() override; void BackwardOnce() override; int BlobsForBackward() const override { return 0; } @@ -52,6 +54,7 @@ class NEOML_API CLinearLayer : public CBaseInPlaceLayer, public IActivationLayer private: float multiplier = DefaultMultiplier; float freeTerm = DefaultFreeTerm; + CPtr vars; }; NEOML_API CLayerWrapper Linear( float multiplier, float freeTerm ); @@ -359,7 +362,7 @@ NEOML_API CLayerWrapper Log(); class NEOML_API CErfLayer : public CBaseLayer, public IActivationLayer { NEOML_DNN_LAYER( CErfLayer ) public: - explicit CErfLayer( IMathEngine& mathEngine ) : CBaseLayer( mathEngine, "CErfLayer", false ) {} + explicit CErfLayer( IMathEngine& mathEngine ); void Serialize( CArchive& archive ) override; @@ -370,6 +373,9 @@ class NEOML_API CErfLayer : public CBaseLayer, public IActivationLayer { void RunOnce() override; void BackwardOnce() override; int BlobsForBackward() const override { return TInputBlobs; } + +private: + CPtr mult; }; NEOML_API CLayerWrapper Erf(); diff --git a/NeoML/src/Dnn/Layers/ActivationLayers.cpp b/NeoML/src/Dnn/Layers/ActivationLayers.cpp index b097f5939..4672f3888 100644 --- a/NeoML/src/Dnn/Layers/ActivationLayers.cpp +++ b/NeoML/src/Dnn/Layers/ActivationLayers.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -189,31 +189,44 @@ CActivationDesc LoadActivationDesc( CArchive& archive ) return result; } -/////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////// +//--------------------------------------------------------------------------------------------------- + CLinearLayer::CLinearLayer( IMathEngine& mathEngine ) : CBaseInPlaceLayer( mathEngine, "CCnnLinearLayer" ) { } +void CLinearLayer::OnReshaped() +{ + if( inputDescs[0].GetDataType() == CT_Float ) { + if( vars == nullptr || vars->GetDataType() != CT_Float ) { + vars = CDnnBlob::CreateVector( MathEngine(), CT_Float, TP_Count ); + } + vars->GetData().SetValueAt( TP_Multiplier, multiplier ); + vars->GetData().SetValueAt( TP_FreeTerm, freeTerm ); + } else { + if( vars == nullptr || vars->GetDataType() != CT_Int ) { + vars = CDnnBlob::CreateVector( MathEngine(), CT_Int, TP_Count ); + } + vars->GetData().SetValueAt( TP_Multiplier, static_cast( multiplier ) ); + vars->GetData().SetValueAt( TP_FreeTerm, static_cast( freeTerm ) ); + } +} + template static void linearRunOnce( const CTypedMemoryHandle& input, T multiplier, T freeTerm, int dataSize, - const CTypedMemoryHandle& output ) + const CDnnBlob& vars, const CTypedMemoryHandle& output ) { IMathEngine& mathEngine = *input.GetMathEngine(); CTypedMemoryHandle currInput = input; if( multiplier != static_cast( 1 ) ) { - CMemoryHandleStackVar multiplierVar( mathEngine ); - multiplierVar.SetValue( multiplier ); - mathEngine.VectorMultiply( currInput, output, dataSize, multiplierVar ); + mathEngine.VectorMultiply( currInput, output, dataSize, vars.GetData( { CLinearLayer::TP_Multiplier } ) ); currInput = output; } - if( freeTerm != static_cast< T >( 0 ) ) { - CMemoryHandleStackVar freeTermVar( mathEngine ); - freeTermVar.SetValue( freeTerm ); - mathEngine.VectorAddValue( currInput, output, dataSize, freeTermVar ); + if( freeTerm != static_cast( 0 ) ) { + mathEngine.VectorAddValue( currInput, output, dataSize, vars.GetData( { CLinearLayer::TP_FreeTerm } ) ); currInput = output; } @@ -224,7 +237,7 @@ static void linearRunOnce( const CTypedMemoryHandle& input, T multiplie CActivationDesc CLinearLayer::GetDesc() const { - CParam param{ multiplier, freeTerm }; + CParam param{ GetMultiplier(), GetFreeTerm() }; return { AF_Linear, param }; } @@ -233,10 +246,11 @@ void CLinearLayer::RunOnce() const int dataSize = outputBlobs[0]->GetDataSize(); if( inputBlobs[0]->GetDataType() == CT_Float ) { - linearRunOnce( inputBlobs[0]->GetData(), multiplier, freeTerm, dataSize, outputBlobs[0]->GetData() ); + linearRunOnce( inputBlobs[0]->GetData(), multiplier, + freeTerm, dataSize, *vars, outputBlobs[0]->GetData() ); } else { linearRunOnce( inputBlobs[0]->GetData(), static_cast( multiplier ), - static_cast( freeTerm ), dataSize, outputBlobs[0]->GetData() ); + static_cast( freeTerm ), dataSize, *vars, outputBlobs[0]->GetData() ); } } @@ -247,9 +261,7 @@ void CLinearLayer::BackwardOnce() int dataSize = outputDiffBlobs[0]->GetDataSize(); if( multiplier != 1.f ) { - CFloatHandleStackVar multiplierValue( MathEngine() ); - multiplierValue.SetValue( multiplier ); - MathEngine().VectorMultiply( outputDiffPtr, inputDiffPtr, dataSize, multiplierValue ); + MathEngine().VectorMultiply( outputDiffPtr, inputDiffPtr, dataSize, vars->GetData( { TP_Multiplier } ) ); } else if( outputDiffPtr != inputDiffPtr ) { MathEngine().VectorCopy( inputDiffPtr, outputDiffPtr, dataSize ); } @@ -510,7 +522,7 @@ void CAbsLayer::RunOnce() { CConstFloatHandle inputPtr = inputBlobs[0]->GetData(); CFloatHandle outputPtr = outputBlobs[0]->GetData(); - int dataSize = inputBlobs[0]->GetDataSize(); + const int dataSize = inputBlobs[0]->GetDataSize(); MathEngine().VectorAbs(inputPtr, outputPtr, dataSize); } @@ -780,6 +792,13 @@ CLayerWrapper Log() //--------------------------------------------------------------------------------------------------- +CErfLayer::CErfLayer( IMathEngine& mathEngine ) : + CBaseLayer( mathEngine, "CErfLayer", false ), + mult( CDnnBlob::CreateVector( mathEngine, CT_Float, 1 ) ) +{ + mult->GetData().SetValue( 1.1283791671f ); // 2 / sqrt( pi ) +} + static const int ErfLayerVersion = 0; void CErfLayer::Serialize( CArchive& archive ) @@ -812,9 +831,7 @@ void CErfLayer::BackwardOnce() CFloatHandle inputDiff = inputDiffBlobs[0]->GetData(); MathEngine().VectorNegMultiply( inputBlobs[0]->GetData(), inputBlobs[0]->GetData(), dataSize, inputDiff ); MathEngine().VectorExp( inputDiff, inputDiff, dataSize ); - CFloatHandleStackVar mult( MathEngine() ); - mult.SetValue( 1.1283791671f ); // 2 / sqrt( pi ) - MathEngine().VectorMultiply( inputDiff, inputDiff, dataSize, mult ); + MathEngine().VectorMultiply( inputDiff, inputDiff, dataSize, mult->GetData() ); MathEngine().VectorEltwiseMultiply( inputDiff, outputDiffBlobs[0]->GetData(), inputDiff, dataSize ); }