Skip to content

Commit

Permalink
update backend for PyTorch Update (zhanghang1989#130)
Browse files Browse the repository at this point in the history
* update backend

* version

fixes zhanghang1989#123
  • Loading branch information
zhanghang1989 committed Oct 4, 2018
1 parent a0fe622 commit c2cb2aa
Show file tree
Hide file tree
Showing 16 changed files with 38 additions and 27 deletions.
4 changes: 2 additions & 2 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
MIT License

Copyright (c) 2017 Hang Zhang. All rights reserved.
Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All rights reserved.
Copyright (c) 2017- Hang Zhang. All rights reserved.
Copyright (c) 2018- Amazon.com, Inc. or its affiliates. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
6 changes: 3 additions & 3 deletions docs/source/experiments/segmentation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,15 @@ Test Pre-trained Model


<code xml:space="preserve" id="cmd_enc101_ade" style="display: none; text-align: left; white-space: pre-wrap">
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model EncNet --aux --se-loss --backbone resnet101
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model EncNet --aux --se-loss --backbone resnet101 --base-size 640 --crop-size 576
</code>

<code xml:space="preserve" id="cmd_enc101_voc" style="display: none; text-align: left; white-space: pre-wrap">
# First finetuning COCO dataset pretrained model on augmented set
# You can also train from scratch on COCO by yourself
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_aug --model-zoo EncNet_Resnet101_COCO --aux --se-loss --lr 0.001 --syncbn --ngpus 4 --checkname res101
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_aug --model-zoo EncNet_Resnet101_COCO --aux --se-loss --lr 0.001 --syncbn --ngpus 4 --checkname res101 --ft
# Finetuning on original set
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_voc --model encnet --aux --se-loss --backbone resnet101 --lr 0.0001 --syncbn --ngpus 4 --checkname res101 --resume runs/Pascal_aug/encnet/res101/checkpoint.params
CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_voc --model encnet --aux --se-loss --backbone resnet101 --lr 0.0001 --syncbn --ngpus 4 --checkname res101 --resume runs/Pascal_aug/encnet/res101/checkpoint.params --ft
</code>

Quick Demo
Expand Down
2 changes: 1 addition & 1 deletion docs/source/experiments/texture.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Test Pre-trained Model
cd PyTorch-Encoding/
python scripts/prepare_minc.py

- Download pre-trained model (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`19.70\%` using single crop on test-1 set)::
- Download pre-trained model (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`18.96\%` using single crop on test-1 set)::

cd experiments/recognition
python model/download_models.py
Expand Down
2 changes: 1 addition & 1 deletion encoding/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@

"""An optimized PyTorch package with CUDA backend."""
from .version import __version__
from . import nn, functions, dilated, parallel, utils, models, datasets, optimizer
from . import nn, functions, dilated, parallel, utils, models, datasets
3 changes: 2 additions & 1 deletion encoding/datasets/cityscapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import os
import sys
import random
import numpy as np
from tqdm import tqdm, trange
from PIL import Image, ImageOps, ImageFilter
Expand Down Expand Up @@ -93,7 +94,7 @@ def _sync_transform(self, img, mask):
mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
crop_size = self.crop_size
# random scale (short edge from 480 to 720)
short_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.5))
short_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0))
w, h = img.size
if h > w:
ow = short_size
Expand Down
4 changes: 3 additions & 1 deletion encoding/lib/cpu/nms_cpu.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <torch/tensor.h>
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>

Expand Down Expand Up @@ -42,7 +43,8 @@ std::vector<at::Tensor> Non_Max_Suppression_CPU(

auto num_boxes = input.size(1);
auto batch_size = input.size(0);
auto mask = input.type().toScalarType(at::kByte).tensor({batch_size, num_boxes});
auto mask = torch::zeros({batch_size, num_boxes}, input.type().toScalarType(at::kByte));
//auto mask = input.type().toScalarType(at::kByte).tensor({batch_size, num_boxes});
mask.fill_(1);
auto *rawMask = mask.data<unsigned char>();
auto *rawIdx = sorted_inds.data<int64_t>();
Expand Down
5 changes: 3 additions & 2 deletions encoding/lib/cpu/roi_align_cpu.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <torch/tensor.h>
#include <ATen/ATen.h>
//#include <omp.h>

Expand Down Expand Up @@ -404,7 +405,7 @@ at::Tensor ROIAlign_Forward_CPU(
AT_ASSERT(roi_cols == 4 || roi_cols == 5);

// Output at::Tensor is (num_rois, C, pooled_height, pooled_width)
auto output = input.type().tensor({num_rois, channels, pooled_height, pooled_width});
auto output = torch::zeros({num_rois, channels, pooled_height, pooled_width}, input.options());

AT_ASSERT(input.is_contiguous());
AT_ASSERT(bottom_rois.is_contiguous());
Expand Down Expand Up @@ -451,7 +452,7 @@ at::Tensor ROIAlign_Backward_CPU(
AT_ASSERT(roi_cols == 4 || roi_cols == 5);

// Output at::Tensor is (num_rois, C, pooled_height, pooled_width)
auto grad_in = bottom_rois.type().tensor({b_size, channels, height, width}).zero_();
auto grad_in = torch::zeros({b_size, channels, height, width}, bottom_rois.options());

AT_ASSERT(bottom_rois.is_contiguous());

Expand Down
5 changes: 3 additions & 2 deletions encoding/lib/cpu/syncbn_cpu.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <torch/tensor.h>
#include <ATen/ATen.h>
#include <vector>

Expand Down Expand Up @@ -45,8 +46,8 @@ std::vector<at::Tensor> BatchNorm_Backward_CPU(
std::vector<at::Tensor> Sum_Square_Forward_CPU(
const at::Tensor input) {
/* outputs */
at::Tensor sum = input.type().tensor({input.size(1)}).zero_();
at::Tensor square = input.type().tensor({input.size(1)}).zero_();
at::Tensor sum = torch::zeros({input.size(1)}, input.options());
at::Tensor square = torch::zeros({input.size(1)}, input.options());
return {sum, square};
}

Expand Down
5 changes: 3 additions & 2 deletions encoding/lib/gpu/encoding_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <vector>
#include <torch/tensor.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>

Expand Down Expand Up @@ -165,7 +166,7 @@ at::Tensor Aggregate_Forward_CUDA(
const at::Tensor X_,
const at::Tensor C_) {
/* Device tensors */
auto E_ = A_.type().tensor({A_.size(0), C_.size(0), C_.size(1)}).zero_();
auto E_ = torch::zeros({A_.size(0), C_.size(0), C_.size(1)}, A_.options());
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
// B, K, D
dim3 blocks(C_.size(1), C_.size(0), X_.size(0));
Expand Down Expand Up @@ -214,7 +215,7 @@ at::Tensor ScaledL2_Forward_CUDA(
const at::Tensor X_,
const at::Tensor C_,
const at::Tensor S_) {
auto SL_ = X_.type().tensor({X_.size(0), X_.size(1), C_.size(0)}).zero_();
auto SL_ = torch::zeros({X_.size(0), X_.size(1), C_.size(0)}, X_.options());
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 blocks(C_.size(0), X_.size(1), X_.size(0));
dim3 threads(getNumThreads(C_.size(1)));
Expand Down
7 changes: 4 additions & 3 deletions encoding/lib/gpu/encodingv2_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <vector>
#include <torch/tensor.h>
#include <ATen/ATen.h>
#include <ATen/Functions.h>
#include <ATen/cuda/CUDAContext.h>
Expand Down Expand Up @@ -239,7 +240,7 @@ at::Tensor Encoding_Dist_Inference_Forward_CUDA(
const at::Tensor STD_) {
// const at::Tensor S_,
// X \in R^{B, N, D}, C \in R^{K, D}, S \in R^K
auto KD_ = X_.type().tensor({X_.size(0), X_.size(1), C_.size(0)}).zero_();
auto KD_ = torch::zeros({X_.size(0), X_.size(1), C_.size(0)}, X_.options());
// E(x), E(x^2)
int N = X_.size(0) * X_.size(1);
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
Expand Down Expand Up @@ -301,7 +302,7 @@ std::vector<at::Tensor> Encoding_Dist_Forward_CUDA(
double eps) {
// const at::Tensor S_,
// X \in R^{B, N, D}, C \in R^{K, D}, S \in R^K
auto KD_ = X_.type().tensor({X_.size(0), X_.size(1), C_.size(0)}).zero_();
auto KD_ = torch::zeros({X_.size(0), X_.size(1), C_.size(0)}, X_.options());
// E(x), E(x^2)
int N = X_.size(0) * X_.size(1);
auto SVar_ = (X_.pow(2).sum(0).sum(0).view({1, X_.size(2)}) -
Expand Down Expand Up @@ -373,7 +374,7 @@ at::Tensor AggregateV2_Forward_CUDA(
const at::Tensor C_,
const at::Tensor STD_) {
/* Device tensors */
auto E_ = A_.type().tensor({A_.size(0), C_.size(0), C_.size(1)}).zero_();
auto E_ = torch::zeros({A_.size(0), C_.size(0), C_.size(1)}, A_.options());
// auto IS_ = 1.0f / (S_ + eps).sqrt();
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
// B, K, D
Expand Down
4 changes: 3 additions & 1 deletion encoding/lib/gpu/nms_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <torch/tensor.h>
#include <ATen/ATen.h>
#include "ATen/NativeFunctions.h"
#include <ATen/cuda/CUDAContext.h>
Expand Down Expand Up @@ -75,7 +76,8 @@ std::vector<at::Tensor> Non_Max_Suppression_CUDA(

auto num_boxes = input.size(1);
auto batch_size = input.size(0);
auto mask = input.type().toScalarType(at::kByte).tensor({batch_size, num_boxes});
//auto mask = input.type().toScalarType(at::kByte).tensor({batch_size, num_boxes});
auto mask = torch::zeros({batch_size, num_boxes}, input.type().toScalarType(at::kByte));
mask.fill_(1);

//need the indices of the boxes sorted by score.
Expand Down
5 changes: 3 additions & 2 deletions encoding/lib/gpu/roi_align_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <torch/tensor.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>

Expand Down Expand Up @@ -367,7 +368,7 @@ at::Tensor ROIAlign_Forward_CUDA(
auto width = input.size(3);

// Output Tensor is (num_rois, C, pooled_height, pooled_width)
auto output = input.type().tensor({proposals, channels, pooled_height, pooled_width});
auto output = torch::zeros({proposals, channels, pooled_height, pooled_width}, input.options());

auto count = output.numel();

Expand Down Expand Up @@ -414,7 +415,7 @@ at::Tensor ROIAlign_Backward_CUDA(
// Output Tensor is (num_rois, C, pooled_height, pooled_width)
// gradient wrt input features
auto grad_in = rois.type().tensor({b_size, channels, height, width}).zero_();
auto grad_in = torch::zeros({b_size, channels, height, width}, rois.options());
auto num_rois = rois.size(0);
auto count = grad_output.numel();
Expand Down
5 changes: 3 additions & 2 deletions encoding/lib/gpu/syncbn_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <vector>
#include <torch/tensor.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>

Expand Down Expand Up @@ -244,8 +245,8 @@ std::vector<at::Tensor> BatchNorm_Backward_CUDA(
std::vector<at::Tensor> Sum_Square_Forward_CUDA(
const at::Tensor input_) {
/* outputs */
at::Tensor sum_ = input_.type().tensor({input_.size(1)}).zero_();
at::Tensor square_ = input_.type().tensor({input_.size(1)}).zero_();
at::Tensor sum_ = torch::zeros({input_.size(1)}, input_.options());
at::Tensor square_ = torch::zeros({input_.size(1)}, input_.options());
/* cuda utils*/
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
dim3 blocks(input_.size(1));
Expand Down
2 changes: 1 addition & 1 deletion encoding/models/model_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
('2a57e44de9c853fa015b172309a1ee7e2d0e4e2a', 'resnet101'),
('0d43d698c66aceaa2bc0309f55efdd7ff4b143af', 'resnet152'),
('2e22611a7f3992ebdee6726af169991bc26d7363', 'deepten_minc'),
('fc8c0b795abf0133700c2d4265d2f9edab7eb6cc', 'fcn_resnet50_ade'),
('662e979de25a389f11c65e9f1df7e06c2c356381', 'fcn_resnet50_ade'),
('eeed8e582f0fdccdba8579e7490570adc6d85c7c', 'fcn_resnet50_pcontext'),
('54f70c772505064e30efd1ddd3a14e1759faa363', 'psp_resnet50_ade'),
('075195c5237b778c718fd73ceddfa1376c18dfd0', 'deeplab_resnet50_ade'),
Expand Down
4 changes: 2 additions & 2 deletions experiments/segmentation/option.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,15 @@ def parse(self):
if args.epochs is None:
epoches = {
'coco': 30,
'citys': 180,
'citys': 240,
'pascal_voc': 50,
'pascal_aug': 50,
'pcontext': 80,
'ade20k': 120,
}
args.epochs = epoches[args.dataset.lower()]
if args.batch_size is None:
args.batch_size = 4 * torch.cuda.device_count()
args.batch_size = 16
if args.test_batch_size is None:
args.test_batch_size = args.batch_size
if args.lr is None:
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

cwd = os.path.dirname(os.path.abspath(__file__))

version = '0.5.0'
version = '0.5.1'
try:
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'],
cwd=cwd).decode('ascii').strip()
Expand Down

0 comments on commit c2cb2aa

Please sign in to comment.