shrink transformer

tianshuo78520a · Apr 1, 2019 · 124f45c · 124f45c
1 parent 96f2421
commit 124f45c
Show file tree

Hide file tree

Showing 4 changed files with 322 additions and 784 deletions.
diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc
@@ -81,6 +81,10 @@ class TensorAddToFunctor : public boost::static_visitor<> {
 
 }  // namespace detail
 
+template <int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+using EigenVector = framework::EigenVector<float, MajorType, IndexType>;
+
 void AddTo(Variable* src, Variable* dst, platform::Place place) {
   framework::Tensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
   framework::Tensor* src_tensor = src->GetMutable<framework::LoDTensor>();
@@ -95,10 +99,18 @@ void AddTo(Variable* src, Variable* dst, platform::Place place) {
                  "dst_numel %lld vs. src_numel %lld", dst_tensor->numel(),
                  src_tensor->numel());
 
-  detail::TensorAddToFunctor<float> func(
-      src_tensor->numel(), src_tensor->data<float>(),
-      dst_tensor->mutable_data<float>(place));
-  boost::apply_visitor(func, place);
+  auto result = EigenVector<>::Flatten(*dst_tensor);
+  auto in_0_e = EigenVector<>::Flatten(*dst_tensor);
+  auto in_1_e = EigenVector<>::Flatten(*src_tensor);
+  platform::DeviceContext* dev_ctx =
+      platform::DeviceContextPool::Instance().Get(place);
+  platform::CPUDeviceContext* x =
+      reinterpret_cast<platform::CPUDeviceContext*>(dev_ctx);
+  result.device(*x->eigen_device()) = in_0_e + in_1_e;
+  // detail::TensorAddToFunctor<float> func(
+  // src_tensor->numel(), src_tensor->data<float>(),
+  // dst_tensor->mutable_data<float>(place));
+  // boost::apply_visitor(func, place);
 }
 
 class Autograd {

diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
@@ -104,14 +104,14 @@ def cuda_places(device_ids=None):
     :code:`FLAGS_selected_gpus=0,1,2`, the returned list would
     be [fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)].
     If :code:`FLAGS_selected_gpus` is not set, all visible
-    gpu places would be returned.  
+    gpu places would be returned.
 
     If :code:`device_ids` is not None, it should be the device
-    ids of gpus. For example, if :code:`device_ids=[0,1,2]`, 
-    the returned list would be 
+    ids of gpus. For example, if :code:`device_ids=[0,1,2]`,
+    the returned list would be
     [fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)].
-    
-    Args: 
+
+    Args:
         device_ids (None|list(int)|tuple(int)): gpu device id list.
 
     Returns:
@@ -133,11 +133,11 @@ def cuda_places(device_ids=None):
 def cpu_places(device_count=None):
     '''
     Create a list of :code:`fluid.CPUPlace` objects.
-    
+
     If :code:`device_count` is None, the device count would
-    be determined by environment variable :code:`CPU_NUM`. 
+    be determined by environment variable :code:`CPU_NUM`.
     If :code:`CPU_NUM` is not set, the device count would
-    be determined by :code:`multiprocessing.cpu_count()`. 
+    be determined by :code:`multiprocessing.cpu_count()`.
 
     Args:
         device_count (None|int): device number.
@@ -155,9 +155,9 @@ def cuda_pinned_places(device_count=None):
     Create a list of :code:`fluid.CUDAPinnedPlace` objects.
 
     If :code:`device_count` is None, the device count would
-    be determined by environment variable :code:`CPU_NUM`. 
+    be determined by environment variable :code:`CPU_NUM`.
     If :code:`CPU_NUM` is not set, the device count would
-    be determined by :code:`multiprocessing.cpu_count()`. 
+    be determined by :code:`multiprocessing.cpu_count()`.
 
     Args:
         device_count (None|int): device number.
@@ -493,7 +493,7 @@ def _backward(self):
         self._ivar._run_backward()
 
     def _gradient(self):
-        new_ivar = self._ivar._grad_ivar._copy_to(core.CPUPlace(), True)
+        new_ivar = self._ivar._grad_ivar()._copy_to(core.CPUPlace(), True)
         return np.array(new_ivar.value().get_tensor())
 
     def _clear_gradient(self):