Reduce the number of transposes in NeoOnnx (neoml-lib#656)

* Add O_InputDim to CTransformLayer::TOperation Signed-off-by: Valeriy Fedyunin <valery.fedyunin@abbyy.com> * Fix assertion in CTransformLayer::CDimensionRule constructor Signed-off-by: Valeriy Fedyunin <valery.fedyunin@abbyy.com> * Implement ConvertTensor with CTransformLayer (via O_InputDim rule) Signed-off-by: Valeriy Fedyunin <valery.fedyunin@abbyy.com> * Add convertToChannelFirst and convertToChannelLast special cases to ConverTensor Signed-off-by: Valeriy Fedyunin <valery.fedyunin@abbyy.com> Co-authored-by: Stanislav Angeliuk <59917951+SAngeliuk@users.noreply.github.com>
zimka · Jun 21, 2022 · c0007d2 · c0007d2
1 parent 4eab669
commit c0007d2
Show file tree

Hide file tree

Showing 3 changed files with 193 additions and 11 deletions.
diff --git a/NeoML/include/NeoML/Dnn/Layers/TransformLayer.h b/NeoML/include/NeoML/Dnn/Layers/TransformLayer.h
@@ -39,7 +39,11 @@ class NEOML_API CTransformLayer : public CBaseInPlaceLayer {
 		// Multiply this dimension by Parameter value
 		O_Multiply,
 		// Divide this dimension by Parameter value
-		O_Divide
+		O_Divide,
+		// Gets the size from the given input dimension
+		O_InputDim,
+
+		O_Count
 	};
 
 	// The rule of dimension change
@@ -55,7 +59,7 @@ class NEOML_API CTransformLayer : public CBaseInPlaceLayer {
 		bool operator==( const CDimensionRule& other ) const;
 
 		// Applies the transformation set by the rule
-		int Transform( int input ) const;
+		int Transform( int input, const CBlobDesc& inputDesc ) const;
 	};
 
 	// The parameters for transforming the specified dimension

diff --git a/NeoML/src/Dnn/Layers/TransformLayer.cpp b/NeoML/src/Dnn/Layers/TransformLayer.cpp
@@ -30,7 +30,8 @@ CTransformLayer::CDimensionRule::CDimensionRule( TOperation op, int param ) :
 	Operation( op ),
 	Parameter( param )
 {
-	NeoAssert( Operation == O_Remainder || param > 0 );
+	NeoAssert( Operation == O_Remainder || param > 0
+		|| ( Operation == O_InputDim && param >= 0 && param < static_cast<int>( BD_Count ) ) );
 }
 
 bool CTransformLayer::CDimensionRule::operator==( const CDimensionRule& other ) const
@@ -39,20 +40,21 @@ bool CTransformLayer::CDimensionRule::operator==( const CDimensionRule& other )
 }
 
 // Applies the transformation
-int CTransformLayer::CDimensionRule::Transform( int input ) const
+int CTransformLayer::CDimensionRule::Transform( int input, const CBlobDesc& inputDesc ) const
 {
+	static_assert( O_Count == 5, "O_Count != 5" );
 	switch( Operation ) {
 		case O_Remainder:
 			return 1;
 		case O_SetSize:
 			return Parameter;
 		case O_Multiply:
 			return input * Parameter;
-			break;
 		case O_Divide:
 			NeoAssert( input % Parameter == 0 );
 			return input / Parameter;
-			break;
+		case O_InputDim:
+			return inputDesc.DimSize( Parameter );
 		default:
 			NeoAssert( false );
 	}
@@ -106,7 +108,7 @@ void CTransformLayer::OnReshaped()
 			NeoAssert(remainderDim < 0);
 			remainderDim = d;
 		}
-		int outputDimSize = rules[d].Transform(inputDescs[0].DimSize(d));
+		int outputDimSize = rules[d].Transform(inputDescs[0].DimSize(d), inputDescs[0]);
 		outputDescs[0].SetDimSize(d, outputDimSize);
 		NeoAssert(remainder % outputDimSize == 0);
 		remainder /= outputDimSize;
@@ -121,7 +123,7 @@ void CTransformLayer::OnReshaped()
 	outputDesc = outputDescs[0];
 }
 
-static const int TransformLayerVersion = 2001;
+static const int TransformLayerVersion = 2002;
 
 void CTransformLayer::Serialize( CArchive& archive )
 {

diff --git a/NeoOnnx/src/TensorUtils.cpp b/NeoOnnx/src/TensorUtils.cpp
@@ -108,6 +108,63 @@ static CPtr<const CUserTensor> convertTensorToHw( const CUserTensor& input, int
 
 //---------------------------------------------------------------------------------------------------------------------
 
+// Renames dimensions of data blob (without any reordering in memory)
+static CPtr<const CDnnBlob> renameDimensions( const CDnnBlob& input, const CTensorShape& shape, const CTensorLayout& outputLayout )
+{
+	NeoAssert( shape.Size() == outputLayout.Size() );
+	// We have to copy data here because multiple tensors may be connected to the input tensor
+	CBlobDesc outputBlobDesc( input.GetDataType() );
+	for( int dimIndex = 0; dimIndex < shape.Size(); ++dimIndex ) {
+		outputBlobDesc.SetDimSize( outputLayout[dimIndex], shape[dimIndex] );
+	}
+	IMathEngine& mathEngine = input.GetMathEngine();
+	CPtr<CDnnBlob> result = CDnnBlob::CreateBlob( mathEngine, input.GetDataType(), outputBlobDesc );
+	if( result->GetDataType() == CT_Float ) {
+		mathEngine.VectorCopy( result->GetData(), input.GetData(), input.GetDataSize() );
+	} else {
+		mathEngine.VectorCopy( result->GetData<int>(), input.GetData<int>(), input.GetDataSize() );
+	}
+	return result.Ptr();
+}
+
+// Renames dimensions of layer output (without any reordering in memory)
+static CLayerOutput renameDimensions( const CLayerOutput& input, const CTensorLayout& inputLayout, const CTensorLayout& outputLayout )
+{
+	NeoAssert( inputLayout.Size() == outputLayout.Size() );
+	CDnn& dnn = *( input.Layer->GetDnn() );
+	CPtr<CTransformLayer> transformLayer = new CTransformLayer( dnn.GetMathEngine() );
+	transformLayer->SetName( getUniqueLayerName( dnn, "transform_" ) );
+	for( TBlobDim dim = BD_BatchLength; dim < BD_Count; ++dim ) {
+		const int dimIndex = outputLayout.Find( dim );
+		if( dimIndex == NotFound ) {
+			transformLayer->SetDimensionRule( dim, CTransformLayer::CDimensionRule( CTransformLayer::O_SetSize, 1 ) );
+		} else {
+			transformLayer->SetDimensionRule( dim, CTransformLayer::CDimensionRule( CTransformLayer::O_InputDim, inputLayout[dimIndex] ) );
+		}
+	}
+	dnn.AddLayer( *transformLayer );
+	transformLayer->Connect( 0, *input.Layer, input.OutputIndex );
+	return CLayerOutput( transformLayer.Ptr(), 0 );
+}
+
+// Renames dimensions of tensor (without any reordering in memory)
+static CPtr<const CTensorBase> renameDimensions( const CTensorBase& input, const CTensorLayout& outputLayout )
+{
+	if( input.Layout() == outputLayout ) {
+		return &input;
+	}
+
+	if( input.IsCalculated() ) {
+		CPtr<const CDnnBlob> blob = renameDimensions( *dynamic_cast<const CDataTensor&>( input ).Data(),
+			input.Shape(), outputLayout );
+		return new CDataTensor( input.Shape(), outputLayout, *blob );
+	}
+
+	CLayerOutput layerOutput = renameDimensions( dynamic_cast<const CUserTensor&>( input ).LayerOutput(),
+		input.Layout(), outputLayout );
+	return new CUserTensor( input.Shape(), outputLayout, layerOutput );
+}
+
 // Swaps 2 dimensions of data blob
 static CPtr<const CDnnBlob> swapDimensions( const CDnnBlob& inputBlob, TBlobDim firstDim, TBlobDim secondDim )
 {
@@ -141,11 +198,13 @@ static CPtr<const CTensorBase> swapDimensions( const CTensorBase& input, TBlobDi
 	CTensorLayout outputLayout = input.Layout();
 	const int firstDimIndex = outputLayout.Find( firstDim );
 	const int secondDimIndex = outputLayout.Find( secondDim );
-	NeoAssert( firstDimIndex != NotFound );
-	if( secondDimIndex != NotFound ) {
+	NeoAssert( firstDimIndex != NotFound || secondDimIndex != NotFound );
+	if( firstDimIndex != NotFound && secondDimIndex != NotFound ) {
 		swap( outputLayout[firstDimIndex], outputLayout[secondDimIndex] );
-	} else {
+	} else if( firstDimIndex != NotFound ) {
 		outputLayout[firstDimIndex] = secondDim;
+	} else {
+		outputLayout[secondDimIndex] = firstDim;
 	}
 
 	if( input.IsCalculated() ) {
@@ -159,6 +218,85 @@ static CPtr<const CTensorBase> swapDimensions( const CTensorBase& input, TBlobDi
 	return new CUserTensor( input.Shape(), outputLayout, layerOutput );
 }
 
+// Checks that layout is a channel-last-like (NeoML compatible)
+static inline bool isChannelLastLayout( const CTensorLayout& layout )
+{
+	for( int i = 2; i < layout.Size(); ++i ) {
+		if( layout[0] > layout[i] || layout[1] < layout[i]
+			|| ( i != 2 && layout[i] < layout[i - 1] ) )
+		{
+			return false;
+		}
+	}
+
+	return true;
+}
+
+// Checks that layout is a channel-first-like (ONNX compatible)
+static inline bool isChannelFirstLayout( const CTensorLayout& layout )
+{
+	for( int i = 1; i < layout.Size(); ++i ) {
+		if( layout[i] < layout[i - 1] ) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+// Converts tensor from channel-first-like layout to channel-last-like layout
+static CPtr<const CTensorBase> convertToChannelLast( const CTensorBase& input, const CTensorLayout& outputLayout )
+{
+	static_assert( BD_Count == 7, "BD_Count != 7" );
+	const int dimCount = input.DimCount();
+	NeoAssert( dimCount > 2 && dimCount < 7 );
+	NeoAssert( isChannelFirstLayout( input.Layout() ) );
+	NeoAssert( isChannelLastLayout( outputLayout ) );
+
+	CPtr<const CTensorBase> currInput = &input;
+	if( currInput->Layout().Find( BD_Channels ) != NotFound ) {
+		CTensorLayout intermediateLayout( dimCount );
+		// isChannelFirstLayout( input.Layout() ) guarantees that layout[i + 1] > layout[i]
+		// The restriction dimCount < 7 guarantees that in this intermediate layout BD_Channels won't be used
+		for( int i = 0; i < dimCount; ++i ) {
+			intermediateLayout[i] = static_cast<TBlobDim>( i );
+		}
+		currInput = renameDimensions( *currInput, intermediateLayout );
+	}
+
+	NeoAssert( currInput->Layout().Find( BD_Channels ) == NotFound );
+	currInput = swapDimensions( *currInput, currInput->Layout()[1], BD_Channels );
+	return renameDimensions( *currInput, outputLayout );
+}
+
+// Converts tensor from channel-last-like layout to channel-first-like layout
+static CPtr<const CTensorBase> convertToChannelFirst( const CTensorBase& input, const CTensorLayout& outputLayout )
+{
+	static_assert( BD_Count == 7, "BD_Count != 7" );
+	const int dimCount = input.DimCount();
+	NeoAssert( dimCount > 2 && dimCount < 7 );
+	NeoAssert( isChannelLastLayout( input.Layout() ) );
+	NeoAssert( isChannelFirstLayout( outputLayout ) );
+
+	CPtr<const CTensorBase> currInput = &input;
+	TBlobDim onnxChannelDim = currInput->Layout()[0] + 1;
+	if( currInput->Layout()[2] == currInput->Layout()[0] + 1 ) {
+		// We have make additional renaming
+		CTensorLayout intermediateLayout( dimCount );
+		intermediateLayout[0] = BD_BatchLength;
+		intermediateLayout[1] = BD_Channels;
+		for( int i = 2; i < dimCount; ++i ) {
+			intermediateLayout[i] = BD_ListSize + ( i - 2 );
+		}
+		onnxChannelDim = BD_BatchWidth;
+		currInput = renameDimensions( *currInput, intermediateLayout );
+	}
+
+	NeoAssert( currInput->Layout().Find( onnxChannelDim ) == NotFound );
+	currInput = swapDimensions( *currInput, currInput->Layout()[1], onnxChannelDim );
+	return renameDimensions( *currInput, outputLayout );
+}
+
 CPtr<const CTensorBase> ConvertTensor( const CTensorBase& input, const CTensorLayout& outputLayout )
 {
 	// Trivial case
@@ -169,7 +307,45 @@ CPtr<const CTensorBase> ConvertTensor( const CTensorBase& input, const CTensorLa
 	const int dimCount = outputLayout.Size();
 	NeoAssert( input.DimCount() == dimCount );
 
+	// Special cases for conversions between channel-first (ONNX) and channel-last (NeoML)
+	if( dimCount > 2 && dimCount < 7 ) {
+		if( isChannelFirstLayout( input.Layout() ) && isChannelLastLayout( outputLayout ) ) {
+			return convertToChannelLast( input, outputLayout );
+		} else if( isChannelLastLayout( input.Layout() ) && isChannelFirstLayout( outputLayout ) ) {
+			return convertToChannelFirst( input, outputLayout );
+		}
+	}
+
+	// Step 1: renaming dimensions (if needed)
+	// It's possible that input.Layout() and outputLayout use different dimensions
+	// Renaming means assigning outputLayout's dimensions to the ones of input.Layout()
+	// without data transposing.
+	// e.g.
+	//     input.Layout() == { BD_Channels, BD_BatchWidth }
+	//     outputLayout == { BD_Height, BD_Width }
+	// result of renaming:
+	//     renamed.Layout == { BD_Width, BD_Height } (transpose will happen on step #2)
 	CPtr<const CTensorBase> currentTensor = &input;
+	CTensorLayout sortedInputLayout = input.Layout();
+	sortedInputLayout.QuickSort<Ascending<TBlobDim>>();
+	CTensorLayout sortedOutputLayout = outputLayout;
+	sortedOutputLayout.QuickSort<Ascending<TBlobDim>>();
+	if( sortedInputLayout != sortedOutputLayout ) {
+		// Tensors use different blob dimensions, need to rename
+		const CTensorLayout& inputLayout = input.Layout();
+		CTensorLayout renamedLayout;
+		renamedLayout.SetBufferSize( dimCount );
+		for( int dimIndex = 0; dimIndex < dimCount; ++dimIndex ) {
+			const int sortedDimIndex = sortedInputLayout.Find( inputLayout[dimIndex] );
+			renamedLayout.Add( sortedOutputLayout[sortedDimIndex] );
+		}
+		currentTensor = renameDimensions( *currentTensor, renamedLayout );
+	}
+
+	// Step 2: reordering dimensions
+	// Step 1 guarantees that outputLayout is a permutation of currentTensor.Layout()
+	// NeoML has operations only for swapping 2 dimensions
+	// that's why reordering is implemented as a sequence of swaps
 	for( int dimIndex = 0; dimIndex < dimCount; ++dimIndex ) {
 		TBlobDim inputDim = currentTensor->Layout()[dimIndex];
 		TBlobDim outputDim = outputLayout[dimIndex];