Skip to content

Commit

Permalink
[NeoML] CLinearLayer & CErfLayer optimize CUDA syncs (#1046)
Browse files Browse the repository at this point in the history
Signed-off-by: Kirill Golikov <kirill.golikov@abbyy.com>
  • Loading branch information
favorart committed Apr 9, 2024
1 parent 9294842 commit 81ccd81
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 25 deletions.
14 changes: 10 additions & 4 deletions NeoML/include/NeoML/Dnn/Layers/ActivationLayers.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright © 2017-2023 ABBYY
/* Copyright © 2017-2024 ABBYY
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,6 +28,7 @@ class CActivationDesc;
class NEOML_API CLinearLayer : public CBaseInPlaceLayer, public IActivationLayer {
NEOML_DNN_LAYER( CLinearLayer )
public:
enum TParam { TP_Multiplier, TP_FreeTerm, /*...*/ TP_Count };
using CParam = CLinearActivationParam;
static constexpr float DefaultMultiplier = CParam::DefaultMultiplier;
static constexpr float DefaultFreeTerm = CParam::DefaultFreeTerm;
Expand All @@ -37,21 +38,23 @@ class NEOML_API CLinearLayer : public CBaseInPlaceLayer, public IActivationLayer
void Serialize( CArchive& archive ) override;

float GetMultiplier() const { return multiplier; }
void SetMultiplier( float _multiplier ) { multiplier = _multiplier; }
void SetMultiplier( float _multiplier ) { multiplier = _multiplier; ForceReshape(); }
float GetFreeTerm() const { return freeTerm; }
void SetFreeTerm( float _freeTerm ) { freeTerm = _freeTerm; }
void SetFreeTerm( float _freeTerm ) { freeTerm = _freeTerm; ForceReshape(); }

void ApplyParam( CParam param ) { SetMultiplier( param.Multiplier ); SetFreeTerm( param.FreeTerm ); }
CActivationDesc GetDesc() const override;

protected:
void OnReshaped() override;
void RunOnce() override;
void BackwardOnce() override;
int BlobsForBackward() const override { return 0; }

private:
float multiplier = DefaultMultiplier;
float freeTerm = DefaultFreeTerm;
CPtr<CDnnBlob> vars;
};

NEOML_API CLayerWrapper<CLinearLayer> Linear( float multiplier, float freeTerm );
Expand Down Expand Up @@ -359,7 +362,7 @@ NEOML_API CLayerWrapper<CLogLayer> Log();
class NEOML_API CErfLayer : public CBaseLayer, public IActivationLayer {
NEOML_DNN_LAYER( CErfLayer )
public:
explicit CErfLayer( IMathEngine& mathEngine ) : CBaseLayer( mathEngine, "CErfLayer", false ) {}
explicit CErfLayer( IMathEngine& mathEngine );

void Serialize( CArchive& archive ) override;

Expand All @@ -370,6 +373,9 @@ class NEOML_API CErfLayer : public CBaseLayer, public IActivationLayer {
void RunOnce() override;
void BackwardOnce() override;
int BlobsForBackward() const override { return TInputBlobs; }

private:
CPtr<CDnnBlob> mult;
};

NEOML_API CLayerWrapper<CErfLayer> Erf();
Expand Down
59 changes: 38 additions & 21 deletions NeoML/src/Dnn/Layers/ActivationLayers.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Copyright © 2017-2020 ABBYY Production LLC
/* Copyright © 2017-2024 ABBYY
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -189,31 +189,44 @@ CActivationDesc LoadActivationDesc( CArchive& archive )
return result;
}

///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
//---------------------------------------------------------------------------------------------------

CLinearLayer::CLinearLayer( IMathEngine& mathEngine ) :
CBaseInPlaceLayer( mathEngine, "CCnnLinearLayer" )
{
}

void CLinearLayer::OnReshaped()
{
if( inputDescs[0].GetDataType() == CT_Float ) {
if( vars == nullptr || vars->GetDataType() != CT_Float ) {
vars = CDnnBlob::CreateVector( MathEngine(), CT_Float, TP_Count );
}
vars->GetData().SetValueAt( TP_Multiplier, multiplier );
vars->GetData().SetValueAt( TP_FreeTerm, freeTerm );
} else {
if( vars == nullptr || vars->GetDataType() != CT_Int ) {
vars = CDnnBlob::CreateVector( MathEngine(), CT_Int, TP_Count );
}
vars->GetData<int>().SetValueAt( TP_Multiplier, static_cast<int>( multiplier ) );
vars->GetData<int>().SetValueAt( TP_FreeTerm, static_cast<int>( freeTerm ) );
}
}

template<class T>
static void linearRunOnce( const CTypedMemoryHandle<const T>& input, T multiplier, T freeTerm, int dataSize,
const CTypedMemoryHandle<T>& output )
const CDnnBlob& vars, const CTypedMemoryHandle<T>& output )
{
IMathEngine& mathEngine = *input.GetMathEngine();
CTypedMemoryHandle<const T> currInput = input;

if( multiplier != static_cast<T>( 1 ) ) {
CMemoryHandleStackVar<T> multiplierVar( mathEngine );
multiplierVar.SetValue( multiplier );
mathEngine.VectorMultiply( currInput, output, dataSize, multiplierVar );
mathEngine.VectorMultiply( currInput, output, dataSize, vars.GetData<const T>( { CLinearLayer::TP_Multiplier } ) );
currInput = output;
}

if( freeTerm != static_cast< T >( 0 ) ) {
CMemoryHandleStackVar<T> freeTermVar( mathEngine );
freeTermVar.SetValue( freeTerm );
mathEngine.VectorAddValue( currInput, output, dataSize, freeTermVar );
if( freeTerm != static_cast<T>( 0 ) ) {
mathEngine.VectorAddValue( currInput, output, dataSize, vars.GetData<const T>( { CLinearLayer::TP_FreeTerm } ) );
currInput = output;
}

Expand All @@ -224,7 +237,7 @@ static void linearRunOnce( const CTypedMemoryHandle<const T>& input, T multiplie

CActivationDesc CLinearLayer::GetDesc() const
{
CParam param{ multiplier, freeTerm };
CParam param{ GetMultiplier(), GetFreeTerm() };
return { AF_Linear, param };
}

Expand All @@ -233,10 +246,11 @@ void CLinearLayer::RunOnce()
const int dataSize = outputBlobs[0]->GetDataSize();

if( inputBlobs[0]->GetDataType() == CT_Float ) {
linearRunOnce( inputBlobs[0]->GetData<const float>(), multiplier, freeTerm, dataSize, outputBlobs[0]->GetData() );
linearRunOnce( inputBlobs[0]->GetData<const float>(), multiplier,
freeTerm, dataSize, *vars, outputBlobs[0]->GetData() );
} else {
linearRunOnce( inputBlobs[0]->GetData<const int>(), static_cast<int>( multiplier ),
static_cast<int>( freeTerm ), dataSize, outputBlobs[0]->GetData<int>() );
static_cast<int>( freeTerm ), dataSize, *vars, outputBlobs[0]->GetData<int>() );
}
}

Expand All @@ -247,9 +261,7 @@ void CLinearLayer::BackwardOnce()
int dataSize = outputDiffBlobs[0]->GetDataSize();

if( multiplier != 1.f ) {
CFloatHandleStackVar multiplierValue( MathEngine() );
multiplierValue.SetValue( multiplier );
MathEngine().VectorMultiply( outputDiffPtr, inputDiffPtr, dataSize, multiplierValue );
MathEngine().VectorMultiply( outputDiffPtr, inputDiffPtr, dataSize, vars->GetData( { TP_Multiplier } ) );
} else if( outputDiffPtr != inputDiffPtr ) {
MathEngine().VectorCopy( inputDiffPtr, outputDiffPtr, dataSize );
}
Expand Down Expand Up @@ -510,7 +522,7 @@ void CAbsLayer::RunOnce()
{
CConstFloatHandle inputPtr = inputBlobs[0]->GetData();
CFloatHandle outputPtr = outputBlobs[0]->GetData();
int dataSize = inputBlobs[0]->GetDataSize();
const int dataSize = inputBlobs[0]->GetDataSize();

MathEngine().VectorAbs(inputPtr, outputPtr, dataSize);
}
Expand Down Expand Up @@ -780,6 +792,13 @@ CLayerWrapper<CLogLayer> Log()

//---------------------------------------------------------------------------------------------------

CErfLayer::CErfLayer( IMathEngine& mathEngine ) :
CBaseLayer( mathEngine, "CErfLayer", false ),
mult( CDnnBlob::CreateVector( mathEngine, CT_Float, 1 ) )
{
mult->GetData().SetValue( 1.1283791671f ); // 2 / sqrt( pi )
}

static const int ErfLayerVersion = 0;

void CErfLayer::Serialize( CArchive& archive )
Expand Down Expand Up @@ -812,9 +831,7 @@ void CErfLayer::BackwardOnce()
CFloatHandle inputDiff = inputDiffBlobs[0]->GetData();
MathEngine().VectorNegMultiply( inputBlobs[0]->GetData(), inputBlobs[0]->GetData(), dataSize, inputDiff );
MathEngine().VectorExp( inputDiff, inputDiff, dataSize );
CFloatHandleStackVar mult( MathEngine() );
mult.SetValue( 1.1283791671f ); // 2 / sqrt( pi )
MathEngine().VectorMultiply( inputDiff, inputDiff, dataSize, mult );
MathEngine().VectorMultiply( inputDiff, inputDiff, dataSize, mult->GetData() );
MathEngine().VectorEltwiseMultiply( inputDiff, outputDiffBlobs[0]->GetData(), inputDiff, dataSize );
}

Expand Down

0 comments on commit 81ccd81

Please sign in to comment.