-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix slice op shape=-1 bug #18107
Fix slice op shape=-1 bug #18107
Changes from 1 commit
6b25e5b
4dc1975
40cbd10
ab924fb
d69b459
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,17 +55,49 @@ class SliceKernel : public framework::OpKernel<T> { | |
*context.template device_context<DeviceContext>().eigen_device(); | ||
auto in = context.Input<framework::Tensor>("Input"); | ||
auto out = context.Output<framework::Tensor>("Out"); | ||
out->mutable_data<T>(context.GetPlace()); | ||
auto out_dims = out->dims(); | ||
auto in_dims = in->dims(); | ||
|
||
// resize out_dims | ||
auto decrease_axis = context.Attr<std::vector<int>>("decrease_axis"); | ||
if (decrease_axis.size() > 0) { | ||
if (decrease_axis.size() == (size_t)in_dims.size()) { | ||
std::vector<int> vec_origin_out_shape(decrease_axis.size(), 1); | ||
out->Resize(framework::make_ddim(vec_origin_out_shape)); | ||
} else { | ||
std::vector<int> vec_origin_out_shape( | ||
out_dims.size() + decrease_axis.size(), -1); | ||
|
||
for (size_t i = 0; i < decrease_axis.size(); ++i) { | ||
vec_origin_out_shape[decrease_axis[i]] = 1; | ||
} | ||
|
||
int step_in = 0; | ||
for (int i = 0; i < out_dims.size(); ++i) { | ||
while (true) { | ||
if (vec_origin_out_shape[step_in] == -1) { | ||
vec_origin_out_shape[step_in] = out_dims[i]; | ||
break; | ||
} | ||
|
||
step_in++; | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we change a way to impl this it looks weird with a while(true) |
||
|
||
out->Resize(framework::make_ddim(vec_origin_out_shape)); | ||
} | ||
} | ||
|
||
out->mutable_data<T>(context.GetPlace()); | ||
auto axes = context.Attr<std::vector<int>>("axes"); | ||
auto starts = context.Attr<std::vector<int>>("starts"); | ||
|
||
auto new_out_dims = out->dims(); | ||
auto offsets = Eigen::array<int, D>(); | ||
auto extents = Eigen::array<int, D>(); | ||
for (size_t i = 0; i < D; ++i) { | ||
offsets[i] = 0; | ||
extents[i] = out_dims[i]; | ||
extents[i] = new_out_dims[i]; | ||
} | ||
int start; | ||
for (size_t i = 0; i < axes.size(); ++i) { | ||
|
@@ -81,18 +113,18 @@ class SliceKernel : public framework::OpKernel<T> { | |
*in); | ||
auto out_t = | ||
framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From( | ||
*out); | ||
*out, new_out_dims); | ||
out_t.device(place) = in_t.slice(offsets, extents); | ||
|
||
out->Resize(out_dims); | ||
} | ||
}; | ||
|
||
template <typename DeviceContext, typename T> | ||
class SliceGradKernel : public framework::OpKernel<T> { | ||
public: | ||
void Compute(const framework::ExecutionContext& ctx) const override { | ||
size_t rank = ctx.Input<framework::Tensor>(framework::GradVarName("Out")) | ||
->dims() | ||
.size(); | ||
size_t rank = ctx.Input<framework::Tensor>("Input")->dims().size(); | ||
switch (rank) { | ||
case 1: | ||
SliceCompute<1>(ctx); | ||
|
@@ -130,6 +162,36 @@ class SliceGradKernel : public framework::OpKernel<T> { | |
auto axes = context.Attr<std::vector<int>>("axes"); | ||
auto starts = context.Attr<std::vector<int>>("starts"); | ||
|
||
auto decrease_axis = context.Attr<std::vector<int>>("decrease_axis"); | ||
if (decrease_axis.size() > 0) { | ||
if (decrease_axis.size() == (size_t)in_dims.size()) { | ||
// all dims decrease | ||
std::vector<int> vec_origin_out_shape(decrease_axis.size(), 1); | ||
out_dims = framework::make_ddim(vec_origin_out_shape); | ||
} else { | ||
std::vector<int> vec_origin_out_shape( | ||
out_dims.size() + decrease_axis.size(), -1); | ||
|
||
for (size_t i = 0; i < decrease_axis.size(); ++i) { | ||
vec_origin_out_shape[decrease_axis[i]] = 1; | ||
} | ||
|
||
int step_in = 0; | ||
for (int i = 0; i < out_dims.size(); ++i) { | ||
while (true) { | ||
if (vec_origin_out_shape[step_in] == -1) { | ||
vec_origin_out_shape[step_in] = out_dims[i]; | ||
break; | ||
} | ||
|
||
step_in++; | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about change a way to implement this it looks weird to using an |
||
|
||
out_dims = framework::make_ddim(vec_origin_out_shape); | ||
} | ||
} | ||
|
||
auto offsets = Eigen::array<int, D>(); | ||
auto extents = Eigen::array<int, D>(); | ||
for (size_t i = 0; i < D; ++i) { | ||
|
@@ -155,7 +217,7 @@ class SliceGradKernel : public framework::OpKernel<T> { | |
*d_input); | ||
auto d_out_t = | ||
framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From( | ||
*d_out); | ||
*d_out, out_dims); | ||
d_in_t.device(place) = d_out_t.pad(paddings, 0); | ||
} | ||
}; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,7 +82,24 @@ def _current_expected_place(): | |
|
||
|
||
def _cpu_num(): | ||
return int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) | ||
cpu_num = os.environ.get('CPU_NUM', None) | ||
if cpu_num is None: | ||
sys.stderr.write( | ||
'The CPU_NUM is not specified, you should set CPU_NUM in ' | ||
'the environment variable list, i.e export CPU_NUM=1. CPU_NUM ' | ||
'indicates that how many CPUPlace are used in the current task.\n' | ||
'!!! The default number of CPUPlaces is 1.') | ||
os.environ['CPU_NUM'] = str(1) | ||
return int(cpu_num) | ||
|
||
|
||
def _cuda_ids(): | ||
gpus_env = os.getenv("FLAGS_selected_gpus") | ||
if gpus_env: | ||
device_ids = [int(s) for s in gpus_env.split(",")] | ||
else: | ||
device_ids = six.moves.range(core.get_cuda_device_count()) | ||
return device_ids | ||
|
||
|
||
def cuda_places(device_ids=None): | ||
|
@@ -116,11 +133,7 @@ def cuda_places(device_ids=None): | |
assert core.is_compiled_with_cuda(), \ | ||
"Not compiled with CUDA" | ||
if device_ids is None: | ||
gpus_env = os.getenv("FLAGS_selected_gpus") | ||
if gpus_env: | ||
device_ids = [int(s) for s in gpus_env.split(",")] | ||
else: | ||
device_ids = six.moves.range(core.get_cuda_device_count()) | ||
device_ids = _cuda_ids() | ||
elif not isinstance(device_ids, (list, tuple)): | ||
device_ids = [device_ids] | ||
return [core.CUDAPlace(dev_id) for dev_id in device_ids] | ||
|
@@ -743,10 +756,8 @@ def _detectContinuesSlice(self, item): | |
def _cloneVar(self, copy=False): | ||
if not copy: | ||
return self.block.create_var( | ||
name=unique_name.generate(".".join(self.name)), | ||
dtype=self.dtype, | ||
persistable=self.persistable, | ||
stop_gradient=self.stop_gradient, ) | ||
name=unique_name.generate_with_ignorable_key(self.name), | ||
dtype=self.dtype) | ||
else: | ||
return self | ||
|
||
|
@@ -776,6 +787,7 @@ def _sliceAndConcatVar(self, item, axis): | |
return self._cloneVar(True) | ||
start, stop, step = self._slice_indices(item, self.shape[axis]) | ||
if step == 1: | ||
print("22", start) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. delete this? |
||
return self._sliceVar([axis], [start], [stop]) | ||
else: | ||
vars = [] | ||
|
@@ -811,35 +823,84 @@ def __getitem__(self, item): | |
Returns: | ||
Sliced variable | ||
""" | ||
new_var = None | ||
if isinstance(item, tuple): | ||
if len(item) > len(self.shape): | ||
raise IndexError("Too many indexes") | ||
fixedSize = True | ||
for i in range(len(self.shape)): | ||
if self.shape[i] == -1: | ||
fixedSize = False | ||
break | ||
|
||
newitem = self._reconstructSliceinfo(item) or item | ||
if fixedSize: | ||
check, info = self._detectContinuesSlice(newitem) | ||
if check: | ||
starts = info[0] | ||
ends = info[1] | ||
axes = [i for i in range(len(starts))] | ||
return self._sliceVar(axes, starts, ends) | ||
else: | ||
new_var = self | ||
for index, o in enumerate(newitem): | ||
new_var = new_var._sliceAndConcatVar(o, index) | ||
if not isinstance(item, tuple): | ||
item = [item] | ||
|
||
decrease_axis = [] | ||
slice_axis = [] | ||
slice_start = [] | ||
slice_end = [] | ||
reverse_axis = [] | ||
|
||
for dim, slice_item in enumerate(item): | ||
if isinstance(slice_item, slice): | ||
start = slice_item.start | ||
end = slice_item.stop | ||
step = slice_item.step if slice_item.step else 1 | ||
|
||
assert (step == 1 or step == -1) | ||
|
||
if step == -1: | ||
reverse_axis.append(dim) | ||
assert (start is None and end is None) | ||
|
||
if start is None and end is None: | ||
continue | ||
|
||
if start is None: | ||
start = 0 | ||
|
||
if end is None: | ||
end = 10000000 | ||
|
||
slice_axis.append(dim) | ||
slice_start.append(start) | ||
slice_end.append(end) | ||
else: | ||
new_var = self | ||
for index, o in enumerate(newitem): | ||
new_var = new_var._sliceAndConcatVar(o, index) | ||
else: | ||
new_var = self._sliceAndConcatVar(item, 0) | ||
return new_var | ||
# int | ||
decrease_axis.append(dim) | ||
slice_axis.append(dim) | ||
slice_start.append(slice_item) | ||
slice_end.append(slice_item + 1 | ||
if slice_item != -1 else 10000000) | ||
|
||
out = self | ||
if len(slice_axis) > 0: | ||
# append slice_op here | ||
|
||
slice_out_var = self.block.create_var( | ||
name=unique_name.generate_with_ignorable_key(self.name + | ||
"_slice"), | ||
dtype=self.dtype) | ||
|
||
self.block.append_op( | ||
type="slice", | ||
inputs={'Input': [out]}, | ||
outputs={'Out': [slice_out_var]}, | ||
attrs={ | ||
'axes': slice_axis, | ||
'starts': slice_start, | ||
'ends': slice_end, | ||
'decrease_axis': decrease_axis | ||
}) | ||
|
||
out = slice_out_var | ||
|
||
if len(reverse_axis) > 0: | ||
reverse_out_var = self.block.create_var( | ||
name=unique_name.generate_with_ignorable_key(self.name + | ||
"_slice_reverse"), | ||
dtype=self.dtype) | ||
self.block.append_op( | ||
type="reverse", | ||
inputs={'X': out}, | ||
outputs={'Out': [reverse_out_var]}, | ||
attrs={'axis': reverse_axis}) | ||
|
||
out = reverse_out_var | ||
|
||
return out | ||
|
||
|
||
def get_all_op_protos(): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should this check before line 56, and in this way maybe we don't need line 56?