Skip to content

Commit

Permalink
[CudaMathEngine] Fix restrict modifier for function arguments (#1011)
Browse files Browse the repository at this point in the history
Signed-off-by: Kirill Golikov <kirill.golikov@abbyy.com>
Co-authored-by: Valeriy Fedyunin <valery.fedyunin@abbyy.com>
  • Loading branch information
favorart and Valeriy Fedyunin authored Dec 21, 2023
1 parent bc3db75 commit 3c37aed
Show file tree
Hide file tree
Showing 7 changed files with 22 additions and 22 deletions.
6 changes: 3 additions & 3 deletions NeoMathEngine/src/GPU/CUDA/Kernels/CudaBlasKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ __global__ void AddVectorToMatrixElementsKernel( float* matrix, int height, int
}

const int AddVectorToMatrixElementsMulCombine = 4;
__global__ void AddVectorToMatrixElementsKernel( float* matrix, int /*height*/, int width,
__global__ void AddVectorToMatrixElementsKernel( float* __restrict__ matrix, int /*height*/, int width,
const int* __restrict__ rowIndices, const int* __restrict__ columnIndices,
const float* __restrict__ vector, int vectorSize )
{
Expand All @@ -65,7 +65,7 @@ __global__ void AddVectorToMatrixElementsKernel( float* matrix, int /*height*/,
// Assigns the values matrix[rowIndices[i], columnIndices[i]] = vector[i].
const int SetVectorToMatrixElementsMulCombine = 4;
__global__ void SetVectorToMatrixElementsKernel(
float* matrix, int /*height*/, int width,
float* __restrict__ matrix, int /*height*/, int width,
const int* __restrict__ rowIndices, const int* __restrict__ columnIndices,
const float* __restrict__ vector, int vectorSize )
{
Expand Down Expand Up @@ -314,7 +314,7 @@ __global__ void MatrixLogSumExpByRowsKernel(const float* __restrict__ matrix, in
}

const int MatrixSoftmaxByRowsCombine = 2;
__global__ void MatrixSoftmaxByRowsKernel(const float* __restrict__ matrix,
__global__ void MatrixSoftmaxByRowsKernel(const float* matrix,
int height, int width, float* result, int widthNorm)
{
extern __shared__ float buffer[];
Expand Down
6 changes: 3 additions & 3 deletions NeoMathEngine/src/GPU/CUDA/Kernels/CudaDnn3dPoolingKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ limitations under the License.
namespace NeoML {

__global__ void Blob3dMaxPoolingKernel( const CCuda3dMaxPoolingDescInternal desc, const float* __restrict__ sourceData,
int* maxIndices, float* resultData )
int* __restrict__ maxIndices, float* __restrict__ resultData )
{
const CCudaBlobDesc& result = desc.Result;
const CCudaBlobDesc& source = desc.Source;
Expand Down Expand Up @@ -83,8 +83,8 @@ __global__ void Blob3dMaxPoolingKernel( const CCuda3dMaxPoolingDescInternal desc
}
}

__global__ void Blob3dMaxPoolingBackwardKernel( const CCuda3dMaxPoolingDescInternal desc, const float* resultDiff,
const int* maxIndices, float* sourceDiff, bool isAtomic )
__global__ void Blob3dMaxPoolingBackwardKernel( const CCuda3dMaxPoolingDescInternal desc, const float* __restrict__ resultDiff,
const int* __restrict__ maxIndices, float* __restrict__ sourceDiff, bool isAtomic )
{
const CCudaBlobDesc& result = desc.Result;
const CCudaBlobDesc& source = desc.Source;
Expand Down
2 changes: 1 addition & 1 deletion NeoMathEngine/src/GPU/CUDA/Kernels/CudaDnnCtcKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ __global__ void CtcFillPaddingKernel( int maxSeqLen, int batchSize, int classCou

const int CtcMatrixLogSumExpByColumnsCombine = 2;
__global__ void CtcMatrixLogSumExpByColumnsKernel(int batchSize, const float* __restrict__ matrix, int height, int width,
float* result, int heightNorm)
float* __restrict__ result, int heightNorm)
{
extern __shared__ float buffer[];
float& my = buffer[(threadIdx.z * blockDim.y + threadIdx.y) * blockDim.x + threadIdx.x];
Expand Down
4 changes: 2 additions & 2 deletions NeoMathEngine/src/GPU/CUDA/Kernels/CudaDnnDropoutKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ limitations under the License.

namespace NeoML {

__global__ void RandomMatrixDropout( const float* __restrict__ first, int firstHeight,
__global__ void RandomMatrixDropout( const float* first, int firstHeight,
int firstWidth, float* res, int seed, float forwardRate )
{
const unsigned int threshold = forwardRate * UINT_MAX;
Expand All @@ -39,7 +39,7 @@ __global__ void RandomMatrixDropout( const float* __restrict__ first, int firstH
}
}

__global__ void RandomSpatialDropout( const float* __restrict__ input, float* res, int inputObjectCount,
__global__ void RandomSpatialDropout( const float* input, float* res, int inputObjectCount,
int inputObjectSize, int maskObjectCount, int maskObjectSize, int seed, float forwardRate )
{
const unsigned int threshold = forwardRate * UINT_MAX;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ __global__ void BlobGlobalMaxPoolingGlobalShuffleKernel( const CCudaGlobalMaxPoo

const int BlobGlobalMaxPoolingBackwardCombine = 8;
__global__ void BlobGlobalMaxPoolingBackwardKernel( const CCudaGlobalMaxPoolingDescInternal desc, const float* __restrict__ resultDiff,
const int* maxIndices, float* sourceDiff, int poolSize, int maxCount, int fullSize )
const int* __restrict__ maxIndices, float* __restrict__ sourceDiff, int poolSize, int maxCount, int fullSize )
{
int index;
int step;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ __global__ void BlobGlobalMaxOverTimePoolingKernel( const CCudaGlobalMaxOverTime
}

__global__ void BlobGlobalMaxOverTimePoolingBackwardKernel( const CCudaGlobalMaxOverTimePoolingDescInternal desc,
const float* __restrict__ resultDiff, const int* __restrict__ maxIndicesData, float* sourceDiff )
const float* __restrict__ resultDiff, const int* __restrict__ maxIndicesData, float* __restrict__ sourceDiff )
{
const CCudaBlobDesc& result = desc.Result;
int pos;
Expand Down
22 changes: 11 additions & 11 deletions NeoMathEngine/src/GPU/CUDA/Kernels/CudaVectorMathKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,8 @@ __global__ void VectorCumSumAlongDimensionDiagKernel( const float* __restrict__
}

const int VectorEqualCombineCount = 16;
__global__ void VectorEqualKernel( const int* __restrict__ first,
const int* __restrict__ second, float* __restrict__ result, int count )
__global__ void VectorEqualKernel( const int* first,
const int* second, float* result, int count )
{
int index;
int step;
Expand All @@ -273,8 +273,8 @@ __global__ void VectorEqualKernel( const int* __restrict__ first,
}
}

__global__ void VectorEqualValueKernel( const int* __restrict__ first,
float* __restrict__ result, int count, const int* __restrict__ value )
__global__ void VectorEqualValueKernel( const int* first,
float* result, int count, const int* __restrict__ value )
{
int index;
int step;
Expand Down Expand Up @@ -346,7 +346,7 @@ __global__ void VectorELUDiffOpKernel( const float* __restrict__ first, const fl
}
}

__global__ void VectorReLUKernel(const float* __restrict__ first, float* result,
__global__ void VectorReLUKernel(const float* first, float* result,
int count, const float* __restrict__ threshold)
{
int index;
Expand Down Expand Up @@ -435,7 +435,7 @@ __global__ void VectorLeakyReLUDiffKernel( const float* __restrict__ first, cons
}
}

__global__ void VectorHSwishKernel( const float* __restrict__ first, float* result, int count )
__global__ void VectorHSwishKernel( const float* first, float* result, int count )
{
int index;
int step;
Expand Down Expand Up @@ -484,7 +484,7 @@ __global__ void VectorHSwishDiffKernel( const float* __restrict__ first, const f
}
}
const int VectorEltwiseMaxCombineCount = 8;
__global__ void VectorEltwiseMaxKernel(const float* __restrict__ first, const float* __restrict__ second,
__global__ void VectorEltwiseMaxKernel(const float* first, const float* second,
float* result, int count)
{
int index;
Expand All @@ -506,7 +506,7 @@ __global__ void VectorEltwiseMaxKernel(const float* __restrict__ first, const fl
}

const int VectorEltwiseMinCombineCount = 8;
__global__ void VectorEltwiseMinKernel(const float* __restrict__ first, const float* __restrict__ second,
__global__ void VectorEltwiseMinKernel(const float* first, const float* second,
float* result, int count)
{
int index;
Expand All @@ -527,7 +527,7 @@ __global__ void VectorEltwiseMinKernel(const float* __restrict__ first, const fl
}
}

__global__ void VectorAbsKernel(const float* __restrict__ first, float* result, int count)
__global__ void VectorAbsKernel(const float* first, float* result, int count)
{
int index;
int step;
Expand Down Expand Up @@ -1437,7 +1437,7 @@ __global__ void VectorLogDiffKernel( const float* __restrict__ sourceGrad,
}

const int VectorAbsDiffCombine = 16;
__global__ void VectorAbsDiffKernel( const float* __restrict__ sourceGrad,
__global__ void VectorAbsDiffKernel( const float* sourceGrad,
int gradCount, int gradSize, int gradNorm,
const float* __restrict__ first, float* resultGrad )
{
Expand All @@ -1462,7 +1462,7 @@ __global__ void VectorAbsDiffKernel( const float* __restrict__ sourceGrad,
}

const int VectorMinMaxDiffCombine = 16;
__global__ void VectorMinMaxDiffKernel( const float* __restrict__ sourceGrad,
__global__ void VectorMinMaxDiffKernel( const float* sourceGrad,
int gradCount, int gradSize, int gradNorm,
const float* __restrict__ first, float* resultGrad,
const float* __restrict__ minPtr, const float* __restrict__ maxPtr )
Expand Down

0 comments on commit 3c37aed

Please sign in to comment.