Skip to content

Commit

Permalink
shrink transformer
Browse files Browse the repository at this point in the history
  • Loading branch information
velconia committed Apr 1, 2019
1 parent 96f2421 commit 124f45c
Show file tree
Hide file tree
Showing 4 changed files with 322 additions and 784 deletions.
20 changes: 16 additions & 4 deletions paddle/fluid/imperative/layer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ class TensorAddToFunctor : public boost::static_visitor<> {

} // namespace detail

template <int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<float, MajorType, IndexType>;

void AddTo(Variable* src, Variable* dst, platform::Place place) {
framework::Tensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
framework::Tensor* src_tensor = src->GetMutable<framework::LoDTensor>();
Expand All @@ -95,10 +99,18 @@ void AddTo(Variable* src, Variable* dst, platform::Place place) {
"dst_numel %lld vs. src_numel %lld", dst_tensor->numel(),
src_tensor->numel());

detail::TensorAddToFunctor<float> func(
src_tensor->numel(), src_tensor->data<float>(),
dst_tensor->mutable_data<float>(place));
boost::apply_visitor(func, place);
auto result = EigenVector<>::Flatten(*dst_tensor);
auto in_0_e = EigenVector<>::Flatten(*dst_tensor);
auto in_1_e = EigenVector<>::Flatten(*src_tensor);
platform::DeviceContext* dev_ctx =
platform::DeviceContextPool::Instance().Get(place);
platform::CPUDeviceContext* x =
reinterpret_cast<platform::CPUDeviceContext*>(dev_ctx);
result.device(*x->eigen_device()) = in_0_e + in_1_e;
// detail::TensorAddToFunctor<float> func(
// src_tensor->numel(), src_tensor->data<float>(),
// dst_tensor->mutable_data<float>(place));
// boost::apply_visitor(func, place);
}

class Autograd {
Expand Down
22 changes: 11 additions & 11 deletions python/paddle/fluid/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,14 @@ def cuda_places(device_ids=None):
:code:`FLAGS_selected_gpus=0,1,2`, the returned list would
be [fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)].
If :code:`FLAGS_selected_gpus` is not set, all visible
gpu places would be returned.
gpu places would be returned.
If :code:`device_ids` is not None, it should be the device
ids of gpus. For example, if :code:`device_ids=[0,1,2]`,
the returned list would be
ids of gpus. For example, if :code:`device_ids=[0,1,2]`,
the returned list would be
[fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)].
Args:
Args:
device_ids (None|list(int)|tuple(int)): gpu device id list.
Returns:
Expand All @@ -133,11 +133,11 @@ def cuda_places(device_ids=None):
def cpu_places(device_count=None):
'''
Create a list of :code:`fluid.CPUPlace` objects.
If :code:`device_count` is None, the device count would
be determined by environment variable :code:`CPU_NUM`.
be determined by environment variable :code:`CPU_NUM`.
If :code:`CPU_NUM` is not set, the device count would
be determined by :code:`multiprocessing.cpu_count()`.
be determined by :code:`multiprocessing.cpu_count()`.
Args:
device_count (None|int): device number.
Expand All @@ -155,9 +155,9 @@ def cuda_pinned_places(device_count=None):
Create a list of :code:`fluid.CUDAPinnedPlace` objects.
If :code:`device_count` is None, the device count would
be determined by environment variable :code:`CPU_NUM`.
be determined by environment variable :code:`CPU_NUM`.
If :code:`CPU_NUM` is not set, the device count would
be determined by :code:`multiprocessing.cpu_count()`.
be determined by :code:`multiprocessing.cpu_count()`.
Args:
device_count (None|int): device number.
Expand Down Expand Up @@ -493,7 +493,7 @@ def _backward(self):
self._ivar._run_backward()

def _gradient(self):
new_ivar = self._ivar._grad_ivar._copy_to(core.CPUPlace(), True)
new_ivar = self._ivar._grad_ivar()._copy_to(core.CPUPlace(), True)
return np.array(new_ivar.value().get_tensor())

def _clear_gradient(self):
Expand Down
Loading

0 comments on commit 124f45c

Please sign in to comment.