Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#16198 from velconia/imperative_train_…
Browse files Browse the repository at this point in the history
…speed

Improve imperative mode training speed
  • Loading branch information
velconia committed Mar 18, 2019
2 parents 2579ade + 3622537 commit 8e4ad00
Show file tree
Hide file tree
Showing 16 changed files with 170 additions and 57 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
"${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
message(STATUS "AR tools: ${CMAKE_AR}")

if(WIN32)
set(CMAKE_SUPPRESS_REGENERATION ON)
set(CMAKE_STATIC_LIBRARY_PREFIX lib)
Expand Down
8 changes: 5 additions & 3 deletions paddle/fluid/framework/grad_op_desc_maker.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ limitations under the License. */

#pragma once
#include <algorithm>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
Expand Down Expand Up @@ -55,11 +57,11 @@ class GradOpDescMakerBase {
std::back_inserter(ret_val),
[this](const std::string& fwd_var_name) -> std::string {
auto g_name = GradVarName(fwd_var_name);
if (no_grad_set_.count(g_name)) {
return kEmptyVarName;
} else {
if (no_grad_set_.empty() || !no_grad_set_.count(g_name)) {
(*this->grad_to_var_)[g_name] = fwd_var_name;
return g_name;
} else {
return kEmptyVarName;
}
});
if (!drop_empty_grad) {
Expand Down
1 change: 1 addition & 0 deletions paddle/fluid/imperative/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ if(WITH_PYTHON)
cc_library(layer SRCS layer.cc DEPS proto_desc operator device_context blas pybind)
cc_library(tracer SRCS tracer.cc DEPS proto_desc device_context pybind)
cc_library(engine SRCS engine.cc)
cc_library(imperative_profiler SRCS profiler.cc)
endif()
6 changes: 3 additions & 3 deletions paddle/fluid/imperative/layer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {
VLOG(3) << "apply grad op " << grad_op_desc->Type();

// Allocate tmp grad output variable
for (auto it : grad_output_variable_map) {
for (const auto& it : grad_output_variable_map) {
auto& outputs = tmp_grad_outputs[k][it.first];
outputs.reserve(it.second.size());
for (size_t i = 0; i < it.second.size(); ++i) {
Expand Down Expand Up @@ -273,9 +273,9 @@ std::map<std::string, std::vector<VarBase*>> OpBase::ApplyGrad() {

// Add tmp grad outputs to original grad vars
for (size_t k = 0; k < grad_output_vars_.size(); ++k) {
for (auto it : grad_output_vars_[k]) {
for (const auto& it : grad_output_vars_[k]) {
auto& outputs = tmp_grad_outputs[k][it.first];
auto& origin_outputs = it.second;
const auto& origin_outputs = it.second;
PADDLE_ENFORCE_EQ(outputs.size(), origin_outputs.size());

for (size_t i = 0; i < outputs.size(); ++i) {
Expand Down
28 changes: 17 additions & 11 deletions paddle/fluid/imperative/layer.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,17 +294,23 @@ class PYBIND11_HIDDEN OpBase {

void InvokeBackwardHooks();

void TrackPreOp(const VarBase* inp_var, const std::string& inp_name) {
if (inp_var->PreOp() && !inp_var->IsStopGradient()) {
VLOG(3) << "add pre op " << inp_var->PreOp()->Type() << " in slot "
<< inp_name;
pre_ops_[inp_name].push_back(inp_var->PreOp());
pre_ops_out_idx_[inp_name].push_back(inp_var->PreOpOutIdx());
} else {
VLOG(3) << "no pre op in slot " << inp_name
<< " input var stop_gradient: " << inp_var->IsStopGradient();
pre_ops_[inp_name].push_back(nullptr);
// pre_ops_out_idx_[inp_name].push_back(-1);
void TrackPreOp(const std::string& inp_name,
const std::vector<VarBase*>& inputs) {
auto& pre_ops_list = pre_ops_[inp_name];
pre_ops_list.reserve(inputs.size());
auto& pre_ops_out_idx_list = pre_ops_out_idx_[inp_name];
for (VarBase* inp_var : inputs) {
if (inp_var->PreOp() && !inp_var->IsStopGradient()) {
VLOG(3) << "add pre op " << inp_var->PreOp()->Type() << " in slot "
<< inp_name;
pre_ops_list.emplace_back(inp_var->PreOp());
pre_ops_out_idx_list.push_back(inp_var->PreOpOutIdx());
} else {
VLOG(3) << "no pre op in slot " << inp_name
<< " input var stop_gradient: " << inp_var->IsStopGradient();
pre_ops_list.emplace_back(nullptr);
// pre_ops_out_idx_list.push_back(-1);
}
}
}

Expand Down
62 changes: 62 additions & 0 deletions paddle/fluid/imperative/profiler.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/imperative/profiler.h"

#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#endif
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <mutex> // NOLINT
#include <thread> // NOLINT

DEFINE_string(
tracer_profile_fname, "xxgperf",
"Profiler filename for imperative tracer, which generated by gperftools."
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable.");

namespace paddle {
namespace imperative {

static std::once_flag gTracerProfileOnce;
#ifdef WITH_GPERFTOOLS
static bool gTracerProfilerStarted = false;
#endif

void StartProfile() {
if (!FLAGS_tracer_profile_fname.empty()) {
std::call_once(gTracerProfileOnce, [] {
#ifdef WITH_GPERFTOOLS
ProfilerStart(FLAGS_tracer_profile_fname.c_str());
gTracerProfilerStarted = true;
#else
LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_tracer_profile_fname will be ignored";
#endif
});
}
}

void StopProfile() {
#ifdef WITH_GPERFTOOLS
ProfilerFlush();
#else
LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_tracer_profile_fname will be ignored";
#endif
}

} // namespace imperative
} // namespace paddle
25 changes: 25 additions & 0 deletions paddle/fluid/imperative/profiler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

namespace paddle {
namespace imperative {

extern void StartProfile();

extern void StopProfile();

} // namespace imperative
} // namespace paddle
40 changes: 3 additions & 37 deletions paddle/fluid/imperative/tracer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,9 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"

#ifdef WITH_GPERFTOOLS
#include "gperftools/profiler.h"
#endif

DEFINE_string(
tracer_profile_fname, "",
"Profiler filename for imperative tracer, which generated by gperftools."
"Only valid when compiled `WITH_PROFILER=ON`. Empty if disable.");

namespace paddle {
namespace imperative {

static std::once_flag gTracerProfileOnce;
#ifdef WITH_GPERFTOOLS
static bool gTracerProfilerStarted = false;
#endif

void CreateGradOp(const framework::OpDesc& op_desc,
const std::unordered_set<std::string>& no_grad_set,
const std::vector<framework::BlockDesc*>& grad_sub_block,
Expand Down Expand Up @@ -145,31 +131,13 @@ framework::VariableNameMap CreateOutputVarNameMap(
return result;
}

Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {
if (!FLAGS_tracer_profile_fname.empty()) {
std::call_once(gTracerProfileOnce, [] {
#ifdef WITH_GPERFTOOLS
ProfilerStart(FLAGS_tracer_profile_fname.c_str());
gTracerProfilerStarted = true;
#else
LOG(WARNING) << "Paddle is not compiled with gperftools. "
"FLAGS_tracer_profile_fname will be ignored";
#endif
});
}
}
Tracer::Tracer(framework::BlockDesc* root_block) : root_block_(root_block) {}

std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
const VarBasePtrMap& outputs,
framework::AttributeMap attrs_map,
const platform::Place expected_place,
const bool stop_gradient) {
#ifdef WITH_GPERFTOOLS
if (gTracerProfilerStarted) {
ProfilerFlush();
}
#endif

framework::VariableValueMap invars_map;
framework::VariableValueMap outvars_map;

Expand All @@ -184,14 +152,14 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
inp->Name());

invars.emplace_back(inp->var_);
op->TrackPreOp(inp, it.first);
if (!stop_gradient) {
current_vars_map[inp->Name()] = inp;
}
VLOG(3) << "input var name: " << inp->Name()
<< " inited: " << inp->var_->IsInitialized()
<< " stop_grad: " << inp->IsStopGradient();
}
op->TrackPreOp(it.first, it.second);
}

op->output_vars_ = outputs;
Expand Down Expand Up @@ -319,9 +287,7 @@ std::vector<VarBase*> Tracer::PyTrace(OpBase* op,
std::vector<framework::Variable*> ret_vars =
PyLayer::Apply(op->forward_id_, inputs);

for (VarBase* inp : inputs) {
op->TrackPreOp(inp, PyLayer::kFwdInp);
}
op->TrackPreOp(PyLayer::kFwdInp, inputs);

std::vector<VarBase*>& outputs = op->output_vars_[PyLayer::kFwdOut];
outputs.reserve(ret_vars.size());
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,5 +91,5 @@ if(WITH_TESTING)
add_subdirectory(tests/book)
if(WITH_INFERENCE_API_TEST)
add_subdirectory(tests/api)
endif()
endif()
endif()
2 changes: 1 addition & 1 deletion paddle/fluid/pybind/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
set(PYBIND_DEPS pybind python proto_desc memory executor async_executor prune
feed_fetch_method pass_builder parallel_executor profiler layer scope_pool
tracer analysis_predictor)
tracer analysis_predictor imperative_profiler)

if(WITH_PYTHON)
list(APPEND PYBIND_DEPS py_func_op)
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/pybind/imperative.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ void BindTracer(pybind11::module* m) {
framework::AttributeMap attrs_map,
const platform::CPUPlace expected_place,
const bool stop_gradient = false) {
pybind11::gil_scoped_release release;
return self.Trace(op, inputs, outputs, attrs_map, expected_place,
stop_gradient);
})
Expand All @@ -52,6 +53,7 @@ void BindTracer(pybind11::module* m) {
framework::AttributeMap attrs_map,
const platform::CUDAPlace expected_place,
const bool stop_gradient = false) {
pybind11::gil_scoped_release release;
return self.Trace(op, inputs, outputs, attrs_map, expected_place,
stop_gradient);
})
Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/version.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/profiler.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#include "paddle/fluid/memory/allocation/legacy_allocator.h"
#include "paddle/fluid/operators/activation_op.h"
Expand Down Expand Up @@ -156,6 +157,11 @@ PYBIND11_MODULE(core, m) {
m.def("print_mem_usage",
[]() { return memory::allocation::GPUMemMonitor.PrintMemUsage(); });

m.def("start_imperative_gperf_profiler",
[]() { imperative::StartProfile(); });

m.def("stop_imperative_gperf_profiler", []() { imperative::StopProfile(); });

py::class_<imperative::VarBase>(m, "VarBase", R"DOC()DOC")
.def(
py::init<const std::string &, paddle::framework::proto::VarType::Type,
Expand Down
3 changes: 2 additions & 1 deletion python/paddle/fluid/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ def __bootstrap__():
'allocator_strategy', 'reader_queue_speed_test_mode',
'print_sub_graph_dir', 'pe_profile_fname', 'warpctc_dir',
'inner_op_parallelism', 'enable_parallel_graph',
'multiple_of_cupti_buffer_size', 'enable_subgraph_optimize'
'multiple_of_cupti_buffer_size', 'enable_subgraph_optimize',
'tracer_profile_fname'
]
if 'Darwin' not in sysstr:
read_env_flags.append('use_pinned_memory')
Expand Down
4 changes: 4 additions & 0 deletions python/paddle/fluid/imperative/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,12 @@
from . import tracer
from .tracer import *

from . import profiler
from .profiler import *

__all__ = []
__all__ += layers.__all__
__all__ += base.__all__
__all__ += nn.__all__
__all__ += tracer.__all__
__all__ += profiler.__all__
30 changes: 30 additions & 0 deletions python/paddle/fluid/imperative/profiler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function

from .. import core

__all__ = [
'start_gperf_profiler',
'stop_gperf_profiler',
]


def start_gperf_profiler():
core.start_imperative_gperf_profiler()


def stop_gperf_profiler():
core.stop_imperative_gperf_profiler()
6 changes: 6 additions & 0 deletions tools/manylinux1/build_scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,9 @@ done

# Restore LD_LIBRARY_PATH
LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}"

# According to ar issues: https://lists.gnu.org/archive/html/bug-binutils/2016-05/msg00211.html
# we should install new version ar with 64-bit supported here
wget https://ftp.gnu.org/gnu/binutils/binutils-2.27.tar.gz
tar xzf binutils-2.27.tar.gz && cd binutils-2.27
./configure --prefix=/opt/rh/devtoolset-2/root/usr/ --enable-64-bit-archive && make -j `nproc` && make install

0 comments on commit 8e4ad00

Please sign in to comment.