From c8ba3be95f5d59770029300b4368963adc443752 Mon Sep 17 00:00:00 2001 From: Daniyal Aliev <70905826+daniyalaliev@users.noreply.github.com> Date: Tue, 27 Feb 2024 19:51:43 +0100 Subject: [PATCH] [NeoML]TiedEmbedding access through composite layer (#1031) * fixed hierarchical clusterization with ward (#998) Signed-off-by: Daniyal Aliev Co-authored-by: Valeriy Fedyunin * master commit Signed-off-by: daniyalaliev * initial changes Signed-off-by: daniyalaliev * need to test Signed-off-by: daniyalaliev * added path tied embedding Signed-off-by: daniyalaliev * added path tied embedding Signed-off-by: daniyalaliev * need to add tests Signed-off-by: daniyalaliev * tests Signed-off-by: daniyalaliev * final commit Signed-off-by: daniyalaliev * docs Signed-off-by: daniyalaliev --------- Signed-off-by: Daniyal Aliev Signed-off-by: daniyalaliev Co-authored-by: Valeriy Fedyunin Co-authored-by: daniyalaliev --- NeoML/docs/en/API/NN/TiedEmbeddingsLayer.md | 10 ++ NeoML/docs/ru/API/NN/TiedEmbeddingsLayer.md | 10 ++ NeoML/include/NeoML/Dnn/Dnn.h | 6 +- .../include/NeoML/Dnn/Layers/CompositeLayer.h | 4 +- .../NeoML/Dnn/Layers/TiedEmbeddingsLayer.h | 25 ++- NeoML/src/Dnn/Dnn.cpp | 24 ++- NeoML/src/Dnn/Layers/CompositeLayer.cpp | 18 +- NeoML/src/Dnn/Layers/TiedEmbeddingsLayer.cpp | 40 +++-- NeoML/test/src/CMakeLists.txt | 1 + NeoML/test/src/TiedEmbeddingTest.cpp | 157 ++++++++++++++++++ 10 files changed, 269 insertions(+), 26 deletions(-) create mode 100644 NeoML/test/src/TiedEmbeddingTest.cpp diff --git a/NeoML/docs/en/API/NN/TiedEmbeddingsLayer.md b/NeoML/docs/en/API/NN/TiedEmbeddingsLayer.md index 58ad3dbec..d700f7efc 100644 --- a/NeoML/docs/en/API/NN/TiedEmbeddingsLayer.md +++ b/NeoML/docs/en/API/NN/TiedEmbeddingsLayer.md @@ -23,6 +23,16 @@ void SetEmbeddingsLayerName( const char* name ) ``` Embeddings layer `name`. Only [CMultichannelLookupLayer](DiscreteFeaturesLayers/MultichannelLookupLayer.md) is allowed. +### EmbeddingsLayerPath + +```c++ +void SetEmbeddingsLayerPath( const CArray& path ) +``` + +If embedding layer is inside of [CompositeLayer](https://github.com/neoml-lib/neoml/blob/master/NeoML/include/NeoML/Dnn/Layers/CompositeLayer.h), then you need to provide full path to it. + +Example: {"composite1", "composite2", ..., "embeddingName"} + ### ChannelIndex ```c++ diff --git a/NeoML/docs/ru/API/NN/TiedEmbeddingsLayer.md b/NeoML/docs/ru/API/NN/TiedEmbeddingsLayer.md index 5a4d7a48f..d660d9320 100644 --- a/NeoML/docs/ru/API/NN/TiedEmbeddingsLayer.md +++ b/NeoML/docs/ru/API/NN/TiedEmbeddingsLayer.md @@ -23,6 +23,16 @@ void SetEmbeddingsLayerName( const char* name ) ``` Использовать слой эмбеддингов `name`. Поддерживается только [CMultichannelLookupLayer](DiscreteFeaturesLayers/MultichannelLookupLayer.md). +### EmbeddingsLayerPath + +```c++ +void SetEmbeddingsLayerPath( const CArray& path ) +``` + +Если слой эмбеддингов находится в [CompositeLayer](https://github.com/neoml-lib/neoml/blob/master/NeoML/include/NeoML/Dnn/Layers/CompositeLayer.h), то надо указать полный путь до него. + +Пример: {"composite1", "composite2", ..., "embeddingName"} + ### ChannelIndex ```c++ diff --git a/NeoML/include/NeoML/Dnn/Dnn.h b/NeoML/include/NeoML/Dnn/Dnn.h index cc4a136e4..f72eae6ca 100644 --- a/NeoML/include/NeoML/Dnn/Dnn.h +++ b/NeoML/include/NeoML/Dnn/Dnn.h @@ -1,4 +1,4 @@ -/* Copyright © 2017-2023 ABBYY +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -455,6 +455,8 @@ class NEOML_API CDnnLayerGraph { virtual void GetLayerList( CArray& layerList ) const = 0; virtual CPtr GetLayer( const char* name ) = 0; virtual CPtr GetLayer( const char* name ) const = 0; + virtual CPtr GetLayer( const CArray& path ) = 0; + virtual CPtr GetLayer( const CArray& path ) const = 0; virtual bool HasLayer( const char* name ) const = 0; void AddLayer(CBaseLayer& layer); @@ -517,6 +519,8 @@ class NEOML_API CDnn : public CDnnLayerGraph { void GetLayerList( CArray& layerList ) const override; CPtr GetLayer( const char* name ) override; CPtr GetLayer( const char* name ) const override; + CPtr GetLayer(const CArray& path) override; + CPtr GetLayer(const CArray& path) const override; bool HasLayer( const char* name ) const override { return layerMap.Has( name ); } // Runs the network: all data from the input blobs is used diff --git a/NeoML/include/NeoML/Dnn/Layers/CompositeLayer.h b/NeoML/include/NeoML/Dnn/Layers/CompositeLayer.h index 5b940d26e..76fef8f8e 100644 --- a/NeoML/include/NeoML/Dnn/Layers/CompositeLayer.h +++ b/NeoML/include/NeoML/Dnn/Layers/CompositeLayer.h @@ -1,4 +1,4 @@ -/* Copyright © 2017-2023 ABBYY +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -58,7 +58,9 @@ class NEOML_API CCompositeLayer : public CBaseLayer, public CDnnLayerGraph { int GetLayerCount() const override { return layers.Size(); } void GetLayerList(CArray& layerList) const override; CPtr GetLayer(const char* name) override; + CPtr GetLayer(const CArray& path) override; CPtr GetLayer(const char* name) const override; + CPtr GetLayer(const CArray& path) const override; bool HasLayer(const char* name) const override; // Returns the total size of the output blobs diff --git a/NeoML/include/NeoML/Dnn/Layers/TiedEmbeddingsLayer.h b/NeoML/include/NeoML/Dnn/Layers/TiedEmbeddingsLayer.h index 42d112f3d..4ee5ccffa 100644 --- a/NeoML/include/NeoML/Dnn/Layers/TiedEmbeddingsLayer.h +++ b/NeoML/include/NeoML/Dnn/Layers/TiedEmbeddingsLayer.h @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Production LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,6 +20,8 @@ limitations under the License. namespace NeoML { +class CMultichannelLookupLayer; + //////////////////////////////////////////////////////////////////////////////////////////////////// // Tied embeddings layer. https://arxiv.org/pdf/1608.05859.pdf @@ -31,10 +33,18 @@ class NEOML_API CTiedEmbeddingsLayer : public CBaseLayer { void Serialize( CArchive& archive ) override; - // Embeddings layer name from which we take the matrix. + // Methods to get/set embeddings layer name from which we take the matrix. + // Only CMultichannelLookupLayer is supported. + // Use this method if the lookupLayer is in the same level of the dnn (in the same composite layer) + const char* GetEmbeddingsLayerName() const { return embeddingPath.Last(); } + void SetEmbeddingsLayerName(const char* name) { embeddingPath = { name }; } + + // Methods to get/set embeddings layer path from which we take the matrix. // Only CMultichannelLookupLayer is supported. - const char* GetEmbeddingsLayerName() const { return embeddingsLayerName; } - void SetEmbeddingsLayerName( const char* name ) { embeddingsLayerName = name; } + // Use this method if the lookupLayer is in the nested level of the dnn (in some nested composite layer) + const CArray& GetEmbeddingsLayerPath() const { return embeddingPath; } + void SetEmbeddingsLayerPath(const CArray& path) { path.CopyTo(embeddingPath); } + // Channel index in embeddings layer. int GetChannelIndex() const { return channelIndex; } void SetChannelIndex( int val ); @@ -48,12 +58,15 @@ class NEOML_API CTiedEmbeddingsLayer : public CBaseLayer { int BlobsForLearn() const override { return TInputBlobs; } private: - // Embedding layer name from which we take the matrix. - CString embeddingsLayerName; + // Path for embedding layer from which matrix is taken + // Now it contains the path as array + // So in case of no composite layer it is gonna be { "embeddingName" } + CArray embeddingPath; // Channel index in embedding layer. int channelIndex; const CDnnBlob* getEmbeddingsTable() const; + CMultichannelLookupLayer* getLookUpLayer() const; }; // Tied embeddings. diff --git a/NeoML/src/Dnn/Dnn.cpp b/NeoML/src/Dnn/Dnn.cpp index d75cda566..0d4fa6947 100644 --- a/NeoML/src/Dnn/Dnn.cpp +++ b/NeoML/src/Dnn/Dnn.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2023 ABBYY +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -452,6 +452,28 @@ CPtr CDnn::GetLayer( const char* name ) const return layerMap.Get( name ); } +CPtr CDnn::GetLayer( const CArray& path) +{ + CheckArchitecture(path.Size() > 0, "NULL", "can not find layer - empty path"); + if (path.Size() == 1) { + return GetLayer(path[0]); + } else { + CheckArchitecture(layerMap.Has(path[0]), path[0], "layer is not in this dnn"); + CPtr currComp = CheckCast( GetLayer(path[0]).Ptr() ); + for (int i = 1; i < path.Size() - 1; ++i) { + CheckArchitecture(currComp->HasLayer(path[i]), path[i], "layer is not in this composite layer"); + currComp = CheckCast(currComp->GetLayer(path[i]).Ptr()); + } + CheckArchitecture(currComp->HasLayer(path.Last()), path.Last(), "layer is not contained by this path"); + return currComp->GetLayer(path.Last()); + } +} + +CPtr CDnn::GetLayer(const CArray& path) const +{ + return const_cast(this)->GetLayer(path); +} + void CDnn::AddLayerImpl( CBaseLayer& layer ) { layer.CheckLayerArchitecture( !layerMap.Has( layer.GetName() ), "layer already in this dnn" ); diff --git a/NeoML/src/Dnn/Layers/CompositeLayer.cpp b/NeoML/src/Dnn/Layers/CompositeLayer.cpp index 650696e5a..f93c611fa 100644 --- a/NeoML/src/Dnn/Layers/CompositeLayer.cpp +++ b/NeoML/src/Dnn/Layers/CompositeLayer.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2023 ABBYY +/* Copyright © 2017-2024 ABBYY Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -202,6 +202,22 @@ CPtr CCompositeLayer::GetLayer(const char* name) const return layerMap.Get(name); } +CPtr CCompositeLayer::GetLayer(const CArray& path) +{ + CPtr currComp = this; + for(int i = 0; i < path.Size() - 1; ++i ) { + CheckArchitecture(currComp->layerMap.Has(path[i]), path[i], "layer is not in this composite layer"); + currComp = CheckCast( currComp->GetLayer(path[i]).Ptr() ); + } + CheckArchitecture(currComp->HasLayer(path.Last()), path.Last(), "layer is not contained by this path"); + return currComp->GetLayer(path.Last()); +} + +CPtr CCompositeLayer::GetLayer(const CArray& path) const +{ + return const_cast(this)->GetLayer(path); +} + bool CCompositeLayer::HasLayer(const char* name) const { return layerMap.Has(name); diff --git a/NeoML/src/Dnn/Layers/TiedEmbeddingsLayer.cpp b/NeoML/src/Dnn/Layers/TiedEmbeddingsLayer.cpp index 5aaf1a436..590856770 100644 --- a/NeoML/src/Dnn/Layers/TiedEmbeddingsLayer.cpp +++ b/NeoML/src/Dnn/Layers/TiedEmbeddingsLayer.cpp @@ -1,4 +1,4 @@ -/* Copyright © 2017-2020 ABBYY Production LLC +/* Copyright © 2017-2024 ABBYY Production LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -36,29 +36,33 @@ void CTiedEmbeddingsLayer::SetChannelIndex( int val ) channelIndex = val; } -static const int CnnTiedEmbeddingsLayerVersion = 2000; +static const int CnnTiedEmbeddingsLayerVersion = 2001; void CTiedEmbeddingsLayer::Serialize( CArchive& archive ) { - archive.SerializeVersion( CnnTiedEmbeddingsLayerVersion, CDnn::ArchiveMinSupportedVersion ); + int version = archive.SerializeVersion(CnnTiedEmbeddingsLayerVersion, CDnn::ArchiveMinSupportedVersion); CBaseLayer::Serialize( archive ); - archive.Serialize( embeddingsLayerName ); + if (version < 2001 && archive.IsLoading()) { + CString embeddingLayerName; + archive.Serialize(embeddingLayerName); + embeddingPath = { embeddingLayerName }; + } + else { + archive.Serialize(embeddingPath); + } + archive.Serialize( channelIndex ); } void CTiedEmbeddingsLayer::Reshape() { CheckInputs(); + const CMultichannelLookupLayer* embeddingsLayer = getLookUpLayer(); - CheckLayerArchitecture( GetDnn()->HasLayer( embeddingsLayerName ), - "Network does not contain embeddings layer with that name." ); - const CMultichannelLookupLayer* embeddingsLayer = dynamic_cast( - GetDnn()->GetLayer( embeddingsLayerName ).Ptr() ); CheckLayerArchitecture( embeddingsLayer != 0, "The layer is not an embedding layer." ); - const int embeddingsChannelsCount = CheckCast( - GetDnn()->GetLayer( embeddingsLayerName ) )->GetDimensions().Size(); + const int embeddingsChannelsCount = embeddingsLayer->GetDimensions().Size(); CheckLayerArchitecture( channelIndex < embeddingsChannelsCount, "Wrong channgel index for embeddings" ); @@ -129,9 +133,7 @@ void CTiedEmbeddingsLayer::LearnOnce() diffBlob->Clear(); } - CMultichannelLookupLayer* embeddingsLayer = - CheckCast( GetDnn()->GetLayer( embeddingsLayerName ) ); - + CMultichannelLookupLayer* embeddingsLayer = getLookUpLayer(); CObjectArray totalDiffBlobs; const int channelsCount = embeddingsLayer->GetDimensions().Size(); for( int i = 0; i < channelsCount; i++ ) { @@ -152,9 +154,15 @@ const CDnnBlob* CTiedEmbeddingsLayer::getEmbeddingsTable() const { NeoAssert( channelIndex >= 0 ); - const CMultichannelLookupLayer* embeddingsLayer = - CheckCast( GetDnn()->GetLayer( embeddingsLayerName ) ); - return embeddingsLayer->GetEmbeddings( channelIndex ); + return getLookUpLayer()->GetEmbeddings( channelIndex ); +} + +CMultichannelLookupLayer* CTiedEmbeddingsLayer::getLookUpLayer() const +{ + CMultichannelLookupLayer* embeddingsLayer; + embeddingsLayer = CheckCast( + const_cast(GetDnn())->GetLayer(embeddingPath).Ptr()); + return embeddingsLayer; } CLayerWrapper TiedEmbeddings( const char* name, int channel ) diff --git a/NeoML/test/src/CMakeLists.txt b/NeoML/test/src/CMakeLists.txt index 961d7accd..a3cb12b19 100644 --- a/NeoML/test/src/CMakeLists.txt +++ b/NeoML/test/src/CMakeLists.txt @@ -36,6 +36,7 @@ target_sources(${PROJECT_NAME} INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/TestFixture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/TestParams.h ${CMAKE_CURRENT_SOURCE_DIR}/TestParams.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/TiedEmbeddingTest.cpp ${CMAKE_CURRENT_SOURCE_DIR}/TransformerSourceMaskTest.cpp ) diff --git a/NeoML/test/src/TiedEmbeddingTest.cpp b/NeoML/test/src/TiedEmbeddingTest.cpp new file mode 100644 index 000000000..8042ce0d6 --- /dev/null +++ b/NeoML/test/src/TiedEmbeddingTest.cpp @@ -0,0 +1,157 @@ +/* Copyright © 2024 ABBYY + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--------------------------------------------------------------------------------------------------------------*/ + +#include +#pragma hdrstop + +#include +#include + +using namespace NeoML; +using namespace NeoMLTest; + +//---------------------------------------------------------------------------------------------------------------------- + +TEST(TiedEmbeddingTest, CompositeTest) +{ + CRandom random( 42 ); + CDnn net(random, MathEngine()); + + const int vectorCount = 2; + const int embeddingSize = 2; + CPtr data = Source(net, "data"); + CPtr dataBlob = CDnnBlob::CreateDataBlob(MathEngine(), CT_Float, 1, 1, 1); + dataBlob->GetData().SetValue(1.f); + data->SetBlob(dataBlob); + + CPtr lookup = new CMultichannelLookupLayer(MathEngine()); + lookup->SetDimensions({ { vectorCount, embeddingSize } }); + lookup->SetName("lookup"); + lookup->SetUseFrameworkLearning(true); + CPtr embeddingInitializer = new CDnnUniformInitializer(random); + lookup->Initialize(embeddingInitializer); + + CPtr compositeInner = new CCompositeLayer(net.GetMathEngine()); + compositeInner->SetName("compositeInner"); + compositeInner->SetInputMapping(*lookup); + compositeInner->SetOutputMapping(*lookup); + compositeInner->AddLayer(*lookup); + + CPtr composite = new CCompositeLayer(net.GetMathEngine()); + composite->SetName("composite"); + composite->Connect(*data); + composite->AddLayer(*compositeInner); + composite->SetInputMapping(*compositeInner); + composite->SetOutputMapping(*compositeInner); + net.AddLayer(*composite); + + CPtr tiedEmb = new CTiedEmbeddingsLayer(MathEngine()); + tiedEmb->SetName("tiedEmb"); + tiedEmb->SetEmbeddingsLayerPath({ "composite", "compositeInner", "lookup" }); + net.AddLayer(*tiedEmb); + tiedEmb->Connect(*composite); + + CPtr softmax = new CSoftmaxLayer(MathEngine()); + softmax->SetName("softmax"); + net.AddLayer(*softmax); + softmax->Connect(*tiedEmb); + + CPtr targets = Source(net, "targets"); + CPtr targetsBlob = CDnnBlob::CreateDataBlob(MathEngine(), CT_Int, 1, 1, 1); + targetsBlob->GetData().SetValueAt(0, 1); + targets->SetBlob(targetsBlob); + + CPtr loss = new CCrossEntropyLossLayer(MathEngine()); + loss->SetName("loss"); + net.AddLayer(*loss); + loss->SetApplySoftmax(false); + loss->Connect(0, *softmax); + loss->Connect(1, *targets); + + CPtr solver = new CDnnSimpleGradientSolver(MathEngine()); + solver->SetL1Regularization(0); + solver->SetL2Regularization(0); + solver->SetLearningRate(1.f); + net.SetSolver(solver.Ptr()); + + const int numOfEpochs = 5; + for (int i = 0; i < numOfEpochs; ++i) { + net.RunAndLearnOnce(); + } + + ASSERT_NEAR(loss->GetLastLoss(), 0.f, 1e-3f); + ASSERT_EQ(dynamic_cast(net.GetLayer({ "composite", "compositeInner", "lookup" }).Ptr()), lookup.Ptr()); +} + +TEST(TiedEmbeddingTest, NoCompositeTest) +{ + CRandom random( 42 ); + CDnn net(random, MathEngine()); + + const int vectorCount = 2; + const int embeddingSize = 2; + CPtr data = Source(net, "data"); + CPtr dataBlob = CDnnBlob::CreateDataBlob(MathEngine(), CT_Float, 1, 1, 1); + dataBlob->GetData().SetValue(1.f); + data->SetBlob(dataBlob); + + CPtr lookup = new CMultichannelLookupLayer(MathEngine()); + lookup->SetDimensions({ { vectorCount, embeddingSize } }); + lookup->SetName("lookup"); + lookup->SetUseFrameworkLearning(true); + CPtr embeddingInitializer = new CDnnUniformInitializer(random); + lookup->Initialize(embeddingInitializer); + net.AddLayer(*lookup); + lookup->Connect(*data); + + CPtr tiedEmb = new CTiedEmbeddingsLayer(MathEngine()); + tiedEmb->SetName("tiedEmb"); + tiedEmb->SetEmbeddingsLayerName("lookup"); + net.AddLayer(*tiedEmb); + tiedEmb->Connect(*lookup); + + CPtr softmax = new CSoftmaxLayer(MathEngine()); + softmax->SetName("softmax"); + net.AddLayer(*softmax); + softmax->Connect(*tiedEmb); + + CPtr targets = Source(net, "targets"); + CPtr targetsBlob = CDnnBlob::CreateDataBlob(MathEngine(), CT_Int, 1, 1, 1); + targetsBlob->GetData().SetValueAt(0, 1); + targets->SetBlob(targetsBlob); + + CPtr loss = new CCrossEntropyLossLayer(MathEngine()); + loss->SetName("loss"); + net.AddLayer(*loss); + loss->SetApplySoftmax(false); + loss->Connect(0, *softmax); + loss->Connect(1, *targets); + + CPtr solver = new CDnnSimpleGradientSolver(MathEngine()); + solver->SetL1Regularization(0); + solver->SetL2Regularization(0); + solver->SetLearningRate(1.f); + net.SetSolver(solver.Ptr()); + + net.RunOnce(); + + const int numOfEpochs = 5; + for (int i = 0; i < numOfEpochs; ++i) { + net.RunAndLearnOnce(); + } + + ASSERT_NEAR(loss->GetLastLoss(), 0.f, 1e-3f); + ASSERT_EQ(dynamic_cast(net.GetLayer("lookup").Ptr()), lookup.Ptr()); +}