Skip to content

Commit

Permalink
Support PyTorch 1.6.0 (zhanghang1989#309)
Browse files Browse the repository at this point in the history
* doc

* pre-compile

* fix dispach
  • Loading branch information
zhanghang1989 committed Aug 8, 2020
1 parent f46bcf7 commit 1235f3b
Show file tree
Hide file tree
Showing 18 changed files with 145 additions and 138 deletions.
5 changes: 5 additions & 0 deletions docs/source/notes/compile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@ Installation
* PIP Install::

pip install torch-encoding --pre
# macOS
CC=clang CXX=clang++ pip install torch-encoding --pre

* Install from source::

git clone https://github.com/zhanghang1989/PyTorch-Encoding && cd PyTorch-Encoding
# ubuntu
python setup.py install
# macOS
CC=clang CXX=clang++ python setup.py install


Using Docker
Expand Down
9 changes: 6 additions & 3 deletions encoding/functions/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@

import torch
from torch.autograd import Variable, Function
from .. import lib

from encoding import cpu
if torch.cuda.device_count() > 0:
from encoding import gpu

__all__ = ['NonMaxSuppression']

Expand Down Expand Up @@ -49,6 +52,6 @@ def NonMaxSuppression(boxes, scores, threshold):
>>> surviving_box_indices = indices[mask]
"""
if boxes.is_cuda:
return lib.gpu.non_max_suppression(boxes, scores, threshold)
return gpu.non_max_suppression(boxes, scores, threshold)
else:
return lib.cpu.non_max_suppression(boxes, scores, threshold)
return cpu.non_max_suppression(boxes, scores, threshold)
19 changes: 11 additions & 8 deletions encoding/functions/dist_syncbn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@

import torch
from torch.autograd.function import Function
from .. import lib

from encoding import cpu
if torch.cuda.device_count() > 0:
from encoding import gpu

__all__ = ['dist_syncbatchnorm']

Expand All @@ -25,9 +28,9 @@ def forward(ctx, x, gamma, beta, running_mean, running_var, eps, momentum, train
_ex, _var = running_mean.contiguous(), running_var.contiguous()
_exs = _var + _ex ** 2
if x.is_cuda:
y = lib.gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
y = gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
else:
y = lib.cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
y = cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
ctx.save_for_backward(x, _ex, _exs, gamma, beta)
return y

Expand All @@ -36,7 +39,7 @@ def forward(ctx, x, gamma, beta, running_mean, running_var, eps, momentum, train
raise ValueError('Expected more than 1 value per channel when training, got input size {}'.format(size))

if x.is_cuda:
_ex, _exs = lib.gpu.expectation_forward(x)
_ex, _exs = gpu.expectation_forward(x)
else:
raise NotImplemented

Expand All @@ -62,9 +65,9 @@ def forward(ctx, x, gamma, beta, running_mean, running_var, eps, momentum, train

# BN forward + activation
if x.is_cuda:
y = lib.gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
y = gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
else:
y = lib.cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
y = cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)

ctx.save_for_backward(x, _ex, _exs, gamma, beta)
return y
Expand All @@ -77,7 +80,7 @@ def backward(ctx, dz):
# BN backward
if dz.is_cuda:
dx, _dex, _dexs, dgamma, dbeta = \
lib.gpu.batchnorm_backward(dz, x, _ex, _exs, gamma, beta, ctx.eps)
gpu.batchnorm_backward(dz, x, _ex, _exs, gamma, beta, ctx.eps)
else:
raise NotImplemented

Expand All @@ -96,7 +99,7 @@ def backward(ctx, dz):
_dexs = _dexs / count

if x.is_cuda:
dx_ = lib.gpu.expectation_backward(x, _dex, _dexs)
dx_ = gpu.expectation_backward(x, _dex, _dexs)
else:
raise NotImplemented
dx = dx + dx_
Expand Down
21 changes: 12 additions & 9 deletions encoding/functions/encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
import torch
from torch.autograd import Function, Variable
import torch.nn.functional as F
from .. import lib

from encoding import cpu
if torch.cuda.device_count() > 0:
from encoding import gpu

__all__ = ['aggregate', 'scaled_l2', 'pairwise_cosine']

Expand All @@ -21,18 +24,18 @@ def forward(ctx, A, X, C):
# A \in(BxNxK) R \in(BxNxKxD) => E \in(BxNxD)
ctx.save_for_backward(A, X, C)
if A.is_cuda:
E = lib.gpu.aggregate_forward(A, X, C)
E = gpu.aggregate_forward(A, X, C)
else:
E = lib.cpu.aggregate_forward(A, X, C)
E = cpu.aggregate_forward(A, X, C)
return E

@staticmethod
def backward(ctx, gradE):
A, X, C = ctx.saved_variables
if A.is_cuda:
gradA, gradX, gradC = lib.gpu.aggregate_backward(gradE, A, X, C)
gradA, gradX, gradC = gpu.aggregate_backward(gradE, A, X, C)
else:
gradA, gradX, gradC = lib.cpu.aggregate_backward(gradE, A, X, C)
gradA, gradX, gradC = cpu.aggregate_backward(gradE, A, X, C)
return gradA, gradX, gradC

def aggregate(A, X, C):
Expand Down Expand Up @@ -64,19 +67,19 @@ class _scaled_l2(Function):
@staticmethod
def forward(ctx, X, C, S):
if X.is_cuda:
SL = lib.gpu.scaled_l2_forward(X, C, S)
SL = gpu.scaled_l2_forward(X, C, S)
else:
SL = lib.cpu.scaled_l2_forward(X, C, S)
SL = cpu.scaled_l2_forward(X, C, S)
ctx.save_for_backward(X, C, S, SL)
return SL

@staticmethod
def backward(ctx, gradSL):
X, C, S, SL = ctx.saved_variables
if X.is_cuda:
gradX, gradC, gradS = lib.gpu.scaled_l2_backward(gradSL, X, C, S, SL)
gradX, gradC, gradS = gpu.scaled_l2_backward(gradSL, X, C, S, SL)
else:
gradX, gradC, gradS = lib.cpu.scaled_l2_backward(gradSL, X, C, S, SL)
gradX, gradC, gradS = cpu.scaled_l2_backward(gradSL, X, C, S, SL)
return gradX, gradC, gradS

def scaled_l2(X, C, S):
Expand Down
12 changes: 7 additions & 5 deletions encoding/functions/rectify.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import torch
from torch.autograd import Function

from .. import lib
from encoding import cpu
if torch.cuda.device_count() > 0:
from encoding import gpu

__all__ = ['rectify']

Expand All @@ -26,20 +28,20 @@ def forward(ctx, y, x, kernel_size, stride, padding, dilation, average):
ctx.dilation = dilation
ctx.average = average
if x.is_cuda:
lib.gpu.conv_rectify(y, x, kernel_size, stride, padding, dilation, average)
gpu.conv_rectify(y, x, kernel_size, stride, padding, dilation, average)
else:
lib.cpu.conv_rectify(y, x, kernel_size, stride, padding, dilation, average)
cpu.conv_rectify(y, x, kernel_size, stride, padding, dilation, average)
ctx.mark_dirty(y)
return y

@staticmethod
def backward(ctx, grad_y):
x, = ctx.saved_variables
if x.is_cuda:
lib.gpu.conv_rectify(grad_y, x, ctx.kernel_size, ctx.stride,
gpu.conv_rectify(grad_y, x, ctx.kernel_size, ctx.stride,
ctx.padding, ctx.dilation, ctx.average)
else:
lib.cpu.conv_rectify(grad_y, x, ctx.kernel_size, ctx.stride,
cpu.conv_rectify(grad_y, x, ctx.kernel_size, ctx.stride,
ctx.padding, ctx.dilation, ctx.average)
ctx.mark_dirty(grad_y)
return grad_y, None, None, None, None, None, None
Expand Down
31 changes: 17 additions & 14 deletions encoding/functions/syncbn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,18 @@
import torch.cuda.comm as comm
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from .. import lib

from encoding import cpu
if torch.cuda.device_count() > 0:
from encoding import gpu

__all__ = ['moments', 'syncbatchnorm', 'inp_syncbatchnorm']

class moments_(Function):
@staticmethod
def forward(ctx, x):
if x.is_cuda:
ex, ex2 = lib.gpu.expectation_forward(x)
ex, ex2 = gpu.expectation_forward(x)
else:
raise NotImplemented
ctx.save_for_backward(x)
Expand All @@ -30,7 +33,7 @@ def forward(ctx, x):
def backward(ctx, dex, dex2):
x, = ctx.saved_tensors
if dex.is_cuda:
dx = lib.gpu.expectation_backward(x, dex, dex2)
dx = gpu.expectation_backward(x, dex, dex2)
else:
raise NotImplemented
return dx
Expand All @@ -57,7 +60,7 @@ def forward(cls, ctx, x, gamma, beta, running_mean, running_var,

if ctx.training:
if x.is_cuda:
_ex, _exs = lib.gpu.expectation_forward(x)
_ex, _exs = gpu.expectation_forward(x)
else:
raise NotImplemented

Expand Down Expand Up @@ -94,9 +97,9 @@ def forward(cls, ctx, x, gamma, beta, running_mean, running_var,

# BN forward + activation
if x.is_cuda:
y = lib.gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
y = gpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
else:
y = lib.cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)
y = cpu.batchnorm_forward(x, _ex, _exs, gamma, beta, ctx.eps)

# Output
ctx.save_for_backward(x, _ex, _exs, gamma, beta)
Expand All @@ -111,7 +114,7 @@ def backward(ctx, dz):
# BN backward
if dz.is_cuda:
dx, _dex, _dexs, dgamma, dbeta = \
lib.gpu.batchnorm_backward(dz, x, _ex, _exs, gamma, beta, ctx.eps)
gpu.batchnorm_backward(dz, x, _ex, _exs, gamma, beta, ctx.eps)
else:
raise NotImplemented

Expand All @@ -137,7 +140,7 @@ def backward(ctx, dz):
ctx.worker_queue.task_done()

if x.is_cuda:
dx_ = lib.gpu.expectation_backward(x, _dex, _dexs)
dx_ = gpu.expectation_backward(x, _dex, _dexs)
else:
raise NotImplemented
dx = dx + dx_
Expand All @@ -158,7 +161,7 @@ def _parse_extra(ctx, extra):
def _act_forward(ctx, x):
if ctx.activation.lower() == "leaky_relu":
if x.is_cuda:
lib.gpu.leaky_relu_forward(x, ctx.slope)
gpu.leaky_relu_forward(x, ctx.slope)
else:
raise NotImplemented
else:
Expand All @@ -167,7 +170,7 @@ def _act_forward(ctx, x):
def _act_backward(ctx, x, dx):
if ctx.activation.lower() == "leaky_relu":
if x.is_cuda:
lib.gpu.leaky_relu_backward(x, dx, ctx.slope)
gpu.leaky_relu_backward(x, dx, ctx.slope)
else:
raise NotImplemented
else:
Expand All @@ -194,7 +197,7 @@ def forward(cls, ctx, x, gamma, beta, running_mean, running_var,

if ctx.training:
if x.is_cuda:
_ex, _exs = lib.gpu.expectation_forward(x)
_ex, _exs = gpu.expectation_forward(x)
else:
raise NotImplemented

Expand Down Expand Up @@ -232,7 +235,7 @@ def forward(cls, ctx, x, gamma, beta, running_mean, running_var,

# BN forward + activation
if x.is_cuda:
lib.gpu.batchnorm_inp_forward(x, _ex, _exs, gamma, beta, ctx.eps)
gpu.batchnorm_inp_forward(x, _ex, _exs, gamma, beta, ctx.eps)
else:
raise NotImplemented

Expand All @@ -254,7 +257,7 @@ def backward(ctx, dz):
# BN backward
if dz.is_cuda:
dx, _dex, _dexs, dgamma, dbeta = \
lib.gpu.batchnorm_inp_backward(dz, z, _ex, _exs, gamma, beta, ctx.eps)
gpu.batchnorm_inp_backward(dz, z, _ex, _exs, gamma, beta, ctx.eps)
else:
raise NotImplemented

Expand All @@ -280,7 +283,7 @@ def backward(ctx, dz):
ctx.worker_queue.task_done()

if z.is_cuda:
lib.gpu.expectation_inp_backward(dx, z, _dex, _dexs, _ex, _exs, gamma, beta, ctx.eps)
gpu.expectation_inp_backward(dx, z, _dex, _dexs, _ex, _exs, gamma, beta, ctx.eps)
else:
raise NotImplemented

Expand Down
29 changes: 0 additions & 29 deletions encoding/lib/__init__.py

This file was deleted.

2 changes: 1 addition & 1 deletion encoding/lib/cpu/rectify_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ void conv_rectify_cpu_tempalte(

at::Tensor input = input_.contiguous();

AT_DISPATCH_FLOATING_TYPES(input.type(), "conv_rectify_cuda_frame", ([&] {
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "conv_rectify_cuda_frame", ([&] {
scalar_t *output_data = output.data_ptr<scalar_t>();
conv_rectify_cpu_frame<scalar_t>(
output_data,
Expand Down
2 changes: 1 addition & 1 deletion encoding/lib/cpu/roi_align_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ at::Tensor ROIAlign_Forward_CPU(
AT_ASSERT(input.is_contiguous());
AT_ASSERT(bottom_rois.is_contiguous());

AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_Forward_CPU", ([&] {
AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "ROIAlign_Forward_CPU", ([&] {
ROIAlignForwardCompute<scalar_t>(
output.numel(),
input.data<scalar_t>(),
Expand Down
17 changes: 0 additions & 17 deletions encoding/lib/cpu/setup.py

This file was deleted.

Loading

0 comments on commit 1235f3b

Please sign in to comment.