Skip to content

Commit

Permalink
Speed conv_fusion_op for identity activation. (PaddlePaddle#14744)
Browse files Browse the repository at this point in the history
* Refine conv_fusion_op for identity activation.
* Fix unit testing.
test=develop
  • Loading branch information
qingqing01 authored Dec 6, 2018
1 parent c6b39a0 commit 549f165
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 16 deletions.
52 changes: 36 additions & 16 deletions paddle/fluid/operators/conv_fusion_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {

auto x_dims = framework::vectorize(input->dims());
auto f_dims = framework::vectorize(filter->dims());
if (activation == "identity") {
// Only the CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM algo is
// enabled with CUDNN_ACTIVATION_IDENTITY in cuDNN lib.
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
} else if (!exhaustive_search) {
if (!exhaustive_search) {
CUDNN_ENFORCE(platform::dynload::cudnnGetConvolutionForwardAlgorithm(
handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc,
cudnn_output_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
Expand Down Expand Up @@ -165,18 +161,42 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_LE(workspace_size_in_bytes, workspace_size_limit,
"workspace_size to be allocated exceeds the limit");

// ------------------- cudnn conv+bias+act forward --------------------
ScalingParamType<T> alpha1 = 1.0f;
ScalingParamType<T> alpha2 = residual ? 1.0f : 0.0f;
auto cudnn_func = [&](void* cudnn_workspace) {
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBiasActivationForward(
handle, &alpha1, cudnn_input_desc, input_data, cudnn_filter_desc,
filter_data, cudnn_conv_desc, algo, cudnn_workspace,
workspace_size_in_bytes, &alpha2, cudnn_output_desc, residual_data,
cudnn_bias_desc, bias_data, cudnn_act_desc, cudnn_output_desc,
if ((activation == "identity") &&
(algo != CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM) &&
(!residual)) {
// Only the CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM algo is
// enabled with CUDNN_ACTIVATION_IDENTITY in cuDNN lib.
// But test in some case, the speed is slower, change to use
// cudnnConvolutionForward and cudnnAddTensor
// ------------- cudnn conv forward and bias add ---------------------
ScalingParamType<T> alpha = 1.0f, beta = 0.0f;
auto cudnn_func = [&](void* cudnn_workspace) {
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionForward(
handle, &alpha, cudnn_input_desc, input_data, cudnn_filter_desc,
filter_data, cudnn_conv_desc, algo, cudnn_workspace,
workspace_size_in_bytes, &beta, cudnn_output_desc, output_data));
};
workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes);
CUDNN_ENFORCE(platform::dynload::cudnnAddTensor(
handle, &alpha, cudnn_bias_desc, bias_data, &alpha, cudnn_output_desc,
output_data));
};
workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes);
} else {
if (activation == "identity") {
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
}
// ------------------- cudnn conv+bias+act forward --------------------
ScalingParamType<T> alpha1 = 1.0f;
ScalingParamType<T> alpha2 = residual ? 1.0f : 0.0f;
auto cudnn_func = [&](void* cudnn_workspace) {
CUDNN_ENFORCE(platform::dynload::cudnnConvolutionBiasActivationForward(
handle, &alpha1, cudnn_input_desc, input_data, cudnn_filter_desc,
filter_data, cudnn_conv_desc, algo, cudnn_workspace,
workspace_size_in_bytes, &alpha2, cudnn_output_desc, residual_data,
cudnn_bias_desc, bias_data, cudnn_act_desc, cudnn_output_desc,
output_data));
};
workspace_handle.RunFunc(cudnn_func, workspace_size_in_bytes);
}
}
};
#endif
Expand Down
6 changes: 6 additions & 0 deletions python/paddle/fluid/tests/unittests/test_conv2d_fusion_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ def init_activation(self):
self.activation = 'identity'


class TestIdentityActivation(TestConv2dFusionOp):
def init_activation(self):
self.activation = 'identity'
self.add_residual_data = False


class TestWithGroup(TestConv2dFusionOp):
def init_group(self):
self.groups = 3
Expand Down

0 comments on commit 549f165

Please sign in to comment.