Skip to content

Commit

Permalink
Open gc by default (PaddlePaddle#18836)
Browse files Browse the repository at this point in the history
* open gc by default, test=develop

* fix test_train_recognize_digits and disable gc when ngraph is enabled, test=develop

* fix conditional_block op eager deletion bug, test=develop

* add some comments to reviewers, test=develop
  • Loading branch information
sneaxiy authored Aug 2, 2019
1 parent 3816d22 commit 7ac748a
Show file tree
Hide file tree
Showing 16 changed files with 384 additions and 46 deletions.
2 changes: 1 addition & 1 deletion paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ else()
cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op)
endif()

target_link_libraries(executor while_op_helper executor_gc_helper recurrent_op_helper)
target_link_libraries(executor while_op_helper executor_gc_helper recurrent_op_helper conditional_block_op_helper)

cc_library(parallel_executor SRCS parallel_executor.cc DEPS
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor parallel_ssa_graph_executor async_ssa_graph_executor
Expand Down
28 changes: 21 additions & 7 deletions paddle/fluid/framework/executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ limitations under the License. */
#include "paddle/fluid/framework/trainer_factory.h"
#include "paddle/fluid/framework/transfer_scope_cache.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
#include "paddle/fluid/operators/controlflow/recurrent_op_helper.h"
#include "paddle/fluid/operators/controlflow/while_op_helper.h"
#include "paddle/fluid/operators/distributed/distributed.h"
Expand Down Expand Up @@ -58,10 +59,30 @@ ExecutorPrepareContext::ExecutorPrepareContext(

void ExecutorPrepareContext::PrepareUnusedVars(
const std::vector<std::string>& keep_vars, bool force_disable_gc) {
#ifdef PADDLE_WITH_NGRAPH
if (FLAGS_use_ngraph) {
// FIXME(zjl): There is difference when ngraph and gc are both enabled
// in unittests. I do not know why it happens. Maybe ngraph engine
// would cache some variables?
LOG_FIRST_N(WARNING, 1)
<< "FLAGS_use_ngraph=True, garbage collection strategy is "
"disabled in Executor";
force_disable_gc = true;
}
#endif
force_disable_gc_ = force_disable_gc;
if (GetEagerDeletionThreshold() < 0 || force_disable_gc_) {
return;
}

// If gc is enabled and block size > 1
if (prog_.Size() > 1) {
operators::PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp(
block_id_, ops_);
operators::PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp(block_id_, ops_);
operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(
block_id_, ops_);
}
unused_vars_ = GetUnusedVars(prog_.Block(block_id_), ops_, keep_vars);
}

Expand Down Expand Up @@ -407,13 +428,6 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
#ifdef PADDLE_WITH_CUDA
}
#endif
// If gc is enabled and block size > 1
if (gc && ctx->prog_.Size() > 1) {
operators::PrepareSafeEagerDeletionOnWhileOpAndWhileGradOp(ctx->block_id_,
ctx->ops_);
operators::PrepareSafeEagerDeletionOnRecurrentOpAndRecurrentGradOp(
ctx->block_id_, ctx->ops_);
}
}

for (auto& op : ctx->ops_) {
Expand Down
12 changes: 11 additions & 1 deletion paddle/fluid/framework/garbage_collector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,15 @@
namespace paddle {
namespace framework {

// Disable gc by default when inference library is built
#ifdef PADDLE_ON_INFERENCE
static const double kDefaultEagerDeleteTensorGB = -1;
#else
static const double kDefaultEagerDeleteTensorGB = 0;
#endif

DEFINE_double(
eager_delete_tensor_gb, -1.0,
eager_delete_tensor_gb, kDefaultEagerDeleteTensorGB,
"Memory size threshold (GB) when the garbage collector clear tensors."
"Disabled when this value is less than 0");

Expand All @@ -48,6 +55,9 @@ GarbageCollector::GarbageCollector(const platform::Place &place,
: max_memory_size_((std::max)(max_memory_size, static_cast<size_t>(1))) {
garbages_.reset(new GarbageQueue());
dev_ctx_ = platform::DeviceContextPool::Instance().Get(place);
if (max_memory_size_ > 1) {
mutex_.reset(new std::mutex());
}
}

CPUGarbageCollector::CPUGarbageCollector(const platform::CPUPlace &place,
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/garbage_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class GarbageCollector {

platform::DeviceContext *dev_ctx_;
std::unique_ptr<GarbageQueue> garbages_;
mutable std::mutex mutex_;
mutable std::unique_ptr<std::mutex> mutex_;
const size_t max_memory_size_;
size_t cur_memory_size_{0};
};
Expand Down Expand Up @@ -118,7 +118,7 @@ void GarbageCollector::Add(Container &&objs, Callback &&callback) {

GarbageQueue *garbage_queue = nullptr;
{
std::lock_guard<std::mutex> guard(mutex_);
std::lock_guard<std::mutex> guard(*mutex_);
for (auto &obj : objs) {
if (!obj) continue;
cur_memory_size_ += obj->size();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
cc_library(op_graph_view SRCS op_graph_view.cc DEPS op_handle_base)
cc_library(conditional_block_op_eager_deletion_pass SRCS conditional_block_op_eager_deletion_pass.cc DEPS conditional_block_op_helper graph_helper pass computation_op_handle)
cc_library(while_op_eager_deletion_pass SRCS while_op_eager_deletion_pass.cc DEPS while_op_helper graph_helper pass computation_op_handle)
cc_library(recurrent_op_eager_deletion_pass SRCS recurrent_op_eager_deletion_pass.cc DEPS recurrent_op_helper graph_helper pass computation_op_handle)
cc_library(reference_count_pass_helper SRCS reference_count_pass_helper.cc DEPS garbage_collector computation_op_handle var_handle)
cc_library(reference_count_pass SRCS reference_count_pass.cc DEPS computation_op_handle graph graph_helper pass op_graph_view reference_count_pass_helper)

cc_library(eager_deletion_pass SRCS eager_deletion_pass.cc DEPS computation_op_handle
eager_deletion_op_handle graph graph_helper pass while_op_eager_deletion_pass recurrent_op_eager_deletion_pass reference_count_pass_helper)
eager_deletion_op_handle graph graph_helper pass conditional_block_op_eager_deletion_pass while_op_eager_deletion_pass recurrent_op_eager_deletion_pass reference_count_pass_helper)

cc_library(memory_reuse_pass SRCS memory_reuse_pass.cc DEPS computation_op_handle reference_count_pass_helper share_tensor_buffer_op_handle multi_devices_helper graph pass)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
#include "paddle/fluid/operators/controlflow/op_variant.h"

namespace paddle {
namespace framework {
namespace ir {

class ConditionalOpEagerDeletionPass : public Pass {
protected:
void ApplyImpl(Graph *graph) const override {
auto all_ops = ir::FilterByNodeWrapper<details::OpHandleBase>(*graph);

// Find all conditional_op and conditional_grad_op
std::unordered_map<size_t, std::pair<std::vector<OperatorBase *>,
std::vector<OperatorBase *>>>
target_ops;
for (auto *op : all_ops) {
auto compute_op = dynamic_cast<details::ComputationOpHandle *>(op);
if (compute_op == nullptr) continue;

if (compute_op->Name() == "conditional_block") {
target_ops[compute_op->GetScopeIdx()].first.emplace_back(
compute_op->GetOp());
} else if (compute_op->Name() == "conditional_block_grad") {
target_ops[compute_op->GetScopeIdx()].second.emplace_back(
compute_op->GetOp());
}
}

for (auto &ops_pair : target_ops) {
auto &ifelse_ops = ops_pair.second.first;
auto &ifelse_grad_ops = ops_pair.second.second;
operators::PrepareSafeEagerDeletionOnConditionalOpAndConditionalGradOp(
ifelse_ops, ifelse_grad_ops);
}
}
};

} // namespace ir
} // namespace framework
} // namespace paddle

REGISTER_PASS(conditional_block_op_eager_deletion_pass,
paddle::framework::ir::ConditionalOpEagerDeletionPass);
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,11 @@ void EagerDeletionPass::ApplyImpl(ir::Graph *graph) const {
}
}

auto conditional_block_op_eager_deletion_pass =
ir::PassRegistry::Instance().Get(
"conditional_block_op_eager_deletion_pass");
conditional_block_op_eager_deletion_pass->Apply(graph);

auto while_op_eager_deletion_pass =
ir::PassRegistry::Instance().Get("while_op_eager_deletion_pass");
while_op_eager_deletion_pass->Apply(graph);
Expand All @@ -288,5 +293,6 @@ REGISTER_PASS(eager_deletion_pass, paddle::framework::ir::EagerDeletionPass)
.RequirePassAttr(paddle::framework::ir::kAllPlaces)
.RequirePassAttr(paddle::framework::ir::kGarbageCollector);

USE_PASS(conditional_block_op_eager_deletion_pass);
USE_PASS(while_op_eager_deletion_pass);
USE_PASS(recurrent_op_eager_deletion_pass);
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,10 @@ void ReferenceCountPass::ApplyImpl(ir::Graph *graph) const {

for (auto iter = var_handles.rbegin(); iter != var_handles.rend();
++iter) {
if ((*iter)->Node()->IsCtrlVar()) {
break;
}

VLOG(10) << "Try to find last living ops of " << var_name << " "
<< (iter - var_handles.rbegin()) << " time";
LastLiveOpSearchStatus status = LastLiveOpSearchStatus::kFailure;
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/operators/controlflow/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
include(operators)
register_operators(DEPS naive_executor)
cc_library(op_variant SRCS op_variant.cc DEPS operator proto_desc)
cc_library(conditional_block_op_helper SRCS conditional_block_op_helper.cc DEPS operator op_variant conditional_block_op)
cc_library(recurrent_op_helper SRCS recurrent_op_helper.cc DEPS operator op_variant recurrent_op)
cc_library(while_op_helper SRCS while_op_helper.cc DEPS operator op_variant)

target_link_libraries(conditional_block_infer_op conditional_block_op)

file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n")
71 changes: 43 additions & 28 deletions paddle/fluid/operators/controlflow/conditional_block_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ limitations under the License. */
namespace paddle {
namespace operators {

const char ConditionalOp::kInputs[] = "Input";
const char ConditionalOp::kOutputs[] = "Out";
const char ConditionalOp::kCondition[] = "Cond";
const char ConditionalOp::kScope[] = "Scope";
const char ConditionalOp::kSkipEagerDeletionVars[] = "skip_eager_deletion_vars";

class ConditionalBlockOp : public ConditionalOp {
public:
ConditionalBlockOp(const std::string &type,
Expand All @@ -33,20 +39,20 @@ class ConditionalBlockOp : public ConditionalOp {
// When is_scalar_condition is True, the conditional variable is a scalar,
// whether need to execute the operators in sub-block depends on the
// conditional variable (Cond).
auto xs = InputTensors(scope, "Cond");
auto xs = InputTensors(scope, ConditionalOp::kCondition);
need_run = ScalarCondition(xs);
} else {
// When is_scalar_condition is False, the conditional variable maybe a
// vector or tensor, whether need to execute the operators in sub-block
// depends on the input variables (Input).
auto xs = InputTensors(scope, "Input");
auto xs = InputTensors(scope, ConditionalOp::kInputs);
need_run = std::all_of(
xs.begin(), xs.end(),
[](const framework::LoDTensor *t) { return t->numel() != 0; });
}

if (need_run) {
auto *scope_var = scope.FindVar(Output("Scope"));
auto *scope_var = scope.FindVar(Output(ConditionalOp::kScope));
PADDLE_ENFORCE(scope_var != nullptr, "Must set scope");
auto *scopes = scope_var->GetMutable<std::vector<framework::Scope *>>();
scopes->resize(1);
Expand All @@ -55,7 +61,10 @@ class ConditionalBlockOp : public ConditionalOp {

framework::Executor exec(dev_place);
auto *block = Attr<framework::BlockDesc *>("sub_block");
exec.Run(*block->Program(), &cur_scope, block->ID(), false);
auto &skip_vars =
Attr<std::vector<std::string>>(ConditionalOp::kSkipEagerDeletionVars);
exec.Run(*block->Program(), &cur_scope, block->ID(), false, true,
skip_vars);
}
}
};
Expand All @@ -73,28 +82,30 @@ class ConditionalBlockGradOp : public ConditionalOp {
const platform::Place &dev_place) const override {
bool need_run;
if (Attr<bool>("is_scalar_condition")) {
auto xs = this->InputTensors(scope, "Cond");
auto xs = this->InputTensors(scope, ConditionalOp::kCondition);
need_run = ScalarCondition(xs);
} else {
auto xs = this->InputTensors(scope, "Input");
auto xs = this->InputTensors(scope, ConditionalOp::kInputs);
need_run = std::all_of(
xs.begin(), xs.end(),
[](const framework::LoDTensor *t) { return t->numel() != 0; });
}

if (need_run) {
auto *scope_var = scope.FindVar(Input("Scope"));
auto *scope_var = scope.FindVar(Input(ConditionalOp::kScope));
PADDLE_ENFORCE(scope_var != nullptr, "Must set scope");
auto &scopes = scope_var->Get<std::vector<framework::Scope *>>();
framework::Scope &cur_scope = *scopes[0];

framework::Executor exec(dev_place);
auto *block = Attr<framework::BlockDesc *>("sub_block");

const auto &ins = Inputs("Input");
const auto &d_ins = Outputs(framework::GradVarName("Input"));
const auto &conds = Inputs("Cond");
const auto &d_conds = Outputs(framework::GradVarName("Cond"));
const auto &ins = Inputs(ConditionalOp::kInputs);
const auto &d_ins =
Outputs(framework::GradVarName(ConditionalOp::kInputs));
const auto &conds = Inputs(ConditionalOp::kCondition);
const auto &d_conds =
Outputs(framework::GradVarName(ConditionalOp::kCondition));

std::vector<std::string> ins_conds_grads;
ins_conds_grads.reserve(ins.size() + conds.size());
Expand Down Expand Up @@ -142,15 +153,17 @@ class ConditionalBlockGradOp : public ConditionalOp {
class ConditionalBlockGradInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext *context) const override {
PADDLE_ENFORCE(context->HasInputs("Cond"));
if (context->HasInputs("Input")) {
PADDLE_ENFORCE(context->HasOutputs(framework::GradVarName("Input")));
context->SetOutputsDim(framework::GradVarName("Input"),
context->GetInputsDim("Input"));
PADDLE_ENFORCE(context->HasInputs(ConditionalOp::kCondition));
if (context->HasInputs(ConditionalOp::kInputs)) {
PADDLE_ENFORCE(
context->HasOutputs(framework::GradVarName(ConditionalOp::kInputs)));
context->SetOutputsDim(framework::GradVarName(ConditionalOp::kInputs),
context->GetInputsDim(ConditionalOp::kInputs));
}
if (context->HasOutputs(framework::GradVarName("Cond"))) {
context->SetOutputsDim(framework::GradVarName("Cond"),
context->GetInputsDim("Cond"));
if (context->HasOutputs(
framework::GradVarName(ConditionalOp::kCondition))) {
context->SetOutputsDim(framework::GradVarName(ConditionalOp::kCondition),
context->GetInputsDim(ConditionalOp::kCondition));
}
}
};
Expand All @@ -163,15 +176,17 @@ class ConditionalBlockGradMaker : public framework::SingleGradOpDescMaker {
std::unique_ptr<framework::OpDesc> Apply() const override {
auto grad_op = new framework::OpDesc();
grad_op->SetType("conditional_block_grad");
grad_op->SetInput("Cond", Input("Cond"));
grad_op->SetInput("Input", Input("Input"));
grad_op->SetInput("Out", Output("Out"));
grad_op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
grad_op->SetInput("Scope", Output("Scope"));
grad_op->SetOutput(framework::GradVarName("Cond"),
InputGrad("Cond", false));
grad_op->SetOutput(framework::GradVarName("Input"),
InputGrad("Input", false));
grad_op->SetInput(ConditionalOp::kCondition,
Input(ConditionalOp::kCondition));
grad_op->SetInput(ConditionalOp::kInputs, Input(ConditionalOp::kInputs));
grad_op->SetInput(ConditionalOp::kOutputs, Output(ConditionalOp::kOutputs));
grad_op->SetInput(framework::GradVarName(ConditionalOp::kOutputs),
OutputGrad(ConditionalOp::kOutputs));
grad_op->SetInput(ConditionalOp::kScope, Output(ConditionalOp::kScope));
grad_op->SetOutput(framework::GradVarName(ConditionalOp::kCondition),
InputGrad(ConditionalOp::kCondition, false));
grad_op->SetOutput(framework::GradVarName(ConditionalOp::kInputs),
InputGrad(ConditionalOp::kInputs, false));
grad_op->SetBlockAttr("sub_block", this->grad_block_[0]);
grad_op->SetAttr("is_scalar_condition", GetAttr("is_scalar_condition"));
return std::unique_ptr<framework::OpDesc>(grad_op);
Expand Down
Loading

0 comments on commit 7ac748a

Please sign in to comment.