V1.0.0 (zhanghang1989#156)

* v1.0
redhat12345 · Dec 17, 2018 · ce461da · ce461da
1 parent c2cb2aa
commit ce461da
Show file tree

Hide file tree

Showing 73 changed files with 2,189 additions and 1,045 deletions.
diff --git a/docs/source/experiments/cifar.rst b/docs/source/experiments/cifar.rst
@@ -57,19 +57,6 @@ Train Your Own Model
   --eval                evaluating
 
 
-Extending the Software
-----------------------
-
-This code is well written, easy to use and extendable for your own models or datasets:
-
-- Write your own Dataloader ``mydataset.py`` to ``dataset/`` folder
-
-- Write your own Model ``mymodel.py`` to ``model/`` folder
-
-- Run the program::
-
-    python main.py --dataset mydataset --model mymodel
-
 Citation
 --------
 

diff --git a/docs/source/experiments/segmentation.rst b/docs/source/experiments/segmentation.rst
@@ -38,25 +38,19 @@ Test Pre-trained Model
 .. role:: raw-html(raw)
    :format: html
 
-+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+
-| Model                            | pixAcc    | mIoU      | Command                                                                                      | Logs       |
-+==================================+===========+===========+==============================================================================================+============+
-| Encnet_ResNet50_PContext         | 79.2%     | 51.0%     | :raw-html:`<a href="javascript:toggleblock('cmd_enc50_pcont')" class="toggleblock">cmd</a>`  | ENC50PC_   |
-+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+
-| EncNet_ResNet101_PContext        | 80.7%     | 54.1%     | :raw-html:`<a href="javascript:toggleblock('cmd_enc101_pcont')" class="toggleblock">cmd</a>` | ENC101PC_  |
-+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+
-| EncNet_ResNet50_ADE              | 80.1%     | 41.5%     | :raw-html:`<a href="javascript:toggleblock('cmd_enc50_ade')" class="toggleblock">cmd</a>`    | ENC50ADE_  |
-+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+
-| EncNet_ResNet101_ADE             | 81.3%     | 44.4%     | :raw-html:`<a href="javascript:toggleblock('cmd_enc101_ade')" class="toggleblock">cmd</a>`   | ENC101ADE_ |
-+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+
-| EncNet_ResNet101_VOC             | N/A       | 85.9%     | :raw-html:`<a href="javascript:toggleblock('cmd_enc101_voc')" class="toggleblock">cmd</a>`   | ENC101VOC_ |
-+----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+------------+
-
-.. _ENC50PC: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet50_pcontext.log?raw=true
-.. _ENC101PC: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet101_pcontext.log?raw=true
-.. _ENC50ADE: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet50_ade.log?raw=true
-.. _ENC101ADE: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet101_ade.log?raw=true
-.. _ENC101VOC: https://github.com/zhanghang1989/image-data/blob/master/encoding/segmentation/logs/encnet_resnet101_voc.log?raw=true
++----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+
+| Model                            | pixAcc    | mIoU      | Command                                                                                      |
++==================================+===========+===========+==============================================================================================+
+| Encnet_ResNet50_PContext         | 79.2%     | 51.0%     | :raw-html:`<a href="javascript:toggleblock('cmd_enc50_pcont')" class="toggleblock">cmd</a>`  |
++----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+
+| EncNet_ResNet101_PContext        | 80.7%     | 54.1%     | :raw-html:`<a href="javascript:toggleblock('cmd_enc101_pcont')" class="toggleblock">cmd</a>` |
++----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+
+| EncNet_ResNet50_ADE              | 80.1%     | 41.5%     | :raw-html:`<a href="javascript:toggleblock('cmd_enc50_ade')" class="toggleblock">cmd</a>`    |
++----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+
+| EncNet_ResNet101_ADE             | 81.3%     | 44.4%     | :raw-html:`<a href="javascript:toggleblock('cmd_enc101_ade')" class="toggleblock">cmd</a>`   |
++----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+
+| EncNet_ResNet101_VOC             | N/A       | 85.9%     | :raw-html:`<a href="javascript:toggleblock('cmd_enc101_voc')" class="toggleblock">cmd</a>`   |
++----------------------------------+-----------+-----------+----------------------------------------------------------------------------------------------+
 
 
 .. raw:: html

diff --git a/docs/source/experiments/texture.rst b/docs/source/experiments/texture.rst
@@ -22,24 +22,19 @@ Test Pre-trained Model
     cd PyTorch-Encoding/
     python scripts/prepare_minc.py
 
-- Download pre-trained model (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`18.96\%` using single crop on test-1 set)::
+- Test pre-trained model on MINC-2500. The pre-trained weight will be automatic downloaded (pre-trained on train-1 split using single training size of 224, with an error rate of :math:`18.96\%` using single crop on test-1 set)::
 
-    cd experiments/recognition
-    python model/download_models.py
-
-- Test pre-trained model on MINC-2500::
-
-    python main.py --dataset minc --model deepten --nclass 23 --resume deepten_minc.pth --eval
+    python main.py --dataset minc --model deepten_resnet50_minc --nclass 23  --pretrained --eval
     # Teriminal Output:
-    # Loss: 1.005 | Err: 18.96% (1090/5750): 100%|████████████████████| 23/23 [00:18<00:00,  1.26it/s]
+    # Loss: 0.995 | Err: 18.957% (1090/5750): 100%|████████████████████| 23/23 [00:18<00:00,  1.26it/s]
 
 
 Train Your Own Model
 --------------------
 
 - Example training command for training above model::
 
-    CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py --dataset minc --model deepten --nclass 23 --model deepten --batch-size 512 --lr 0.004 --epochs 80 --lr-step 60 --lr-scheduler step
+   CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py --dataset minc --model deepten_resnet50_minc --batch-size 512 --lr 0.004 --epochs 80 --lr-step 60 --lr-scheduler step --weight-decay 5e-4
 
 - Detail training options::
 
@@ -62,20 +57,6 @@ Train Your Own Model
   --eval                evaluating
 
 
-Extending the Software
-----------------------
-
-This code is well written, easy to use and extendable for your own models or datasets:
-
-- Write your own Dataloader ``mydataset.py`` to ``dataset/`` folder
-
-- Write your own Model ``mymodel.py`` to ``model/`` folder
-
-- Run the program::
-
-    python main.py --dataset mydataset --model mymodel
-
-
 Citation
 --------
 

diff --git a/docs/source/functions.rst b/docs/source/functions.rst
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -30,8 +30,7 @@ An optimized PyTorch package with CUDA backend.
 
    nn
    parallel
-   dilated
-   functions
+   models
    utils
 
 Indices and tables

diff --git a/docs/source/dilated.rst → docs/source/models.rst b/docs/source/dilated.rst → docs/source/models.rst
@@ -1,9 +1,15 @@
 .. role:: hidden
     :class: hidden-section
 
-encoding.dilated
+encoding.models
 ================
 
+.. automodule:: encoding.models.resnet
+.. currentmodule:: encoding.models.resnet
+
+ResNet
+------
+
 We provide correct dilated pre-trained ResNet and DenseNet (stride of 8) for semantic segmentation. 
 For dilation of DenseNet, we provide :class:`encoding.nn.DilatedAvgPool2d`. 
 All provided models have been verified. 
@@ -14,12 +20,6 @@ All provided models have been verified.
     * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation"  *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
 
 
-.. automodule:: encoding.dilated
-.. currentmodule:: encoding.dilated
-
-ResNet
-------
-
 :hidden:`ResNet`
 ~~~~~~~~~~~~~~~~
 

diff --git a/docs/source/nn.rst b/docs/source/nn.rst
@@ -14,10 +14,10 @@ Customized NN modules in Encoding Package. For Synchronized Cross-GPU Batch Norm
 .. autoclass:: Encoding
     :members:
 
-:hidden:`BatchNorm2d`
+:hidden:`SyncBatchNorm`
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. autoclass:: BatchNorm2d
+.. autoclass:: SyncBatchNorm
     :members:
 
 :hidden:`BatchNorm1d`
@@ -26,6 +26,12 @@ Customized NN modules in Encoding Package. For Synchronized Cross-GPU Batch Norm
 .. autoclass:: BatchNorm1d
     :members:
 
+:hidden:`BatchNorm2d`
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: BatchNorm2d
+    :members:
+
 :hidden:`BatchNorm3d`
 ~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/docs/source/notes/compile.rst b/docs/source/notes/compile.rst
@@ -2,13 +2,10 @@ Install and Citations
 =====================
 
 
-Install from Source
--------------------
+Installation
+------------
 
-    * Install PyTorch by following the `PyTorch instructions <http://pytorch.org/>`_.
-      This package relies on PyTorch master branch (higher than stable released v0.4.0), please follow
-      `the instruction <https://github.com/pytorch/pytorch#from-source>`_ to install
-      PyTorch from source.
+    * Install PyTorch 1.0 by following the `PyTorch instructions <http://pytorch.org/>`_.
 
     * PIP Install::
 

diff --git a/encoding/__init__.py b/encoding/__init__.py
@@ -10,4 +10,4 @@
 
 """An optimized PyTorch package with CUDA backend."""
 from .version import __version__
-from . import nn, functions, dilated, parallel, utils, models, datasets
+from . import nn, functions, parallel, utils, models, datasets, transforms
diff --git a/encoding/datasets/__init__.py b/encoding/datasets/__init__.py
@@ -1,10 +1,16 @@
+import warnings
+from torchvision.datasets import *
 from .base import *
 from .coco import COCOSegmentation
 from .ade20k import ADE20KSegmentation
 from .pascal_voc import VOCSegmentation
 from .pascal_aug import VOCAugSegmentation
 from .pcontext import ContextSegmentation
 from .cityscapes import CitySegmentation
+from .imagenet import ImageNetDataset
+from .minc import MINCDataset
+
+from ..utils import EncodingDeprecationWarning
 
 datasets = {
     'coco': COCOSegmentation,
@@ -13,7 +19,40 @@
     'pascal_aug': VOCAugSegmentation,
     'pcontext': ContextSegmentation,
     'citys': CitySegmentation,
+    'imagenet': ImageNetDataset,
+    'minc': MINCDataset,
+    'cifar10': CIFAR10,
+}
+
+acronyms = {
+    'coco': 'coco',
+    'pascal_voc': 'voc',
+    'pascal_aug': 'voc',
+    'pcontext': pcontext,
+    'ade20k': 'ade',
+    'citys': 'citys',
+    'minc': 'minc',
+    'cifar10': 'cifar10',
 }
 
-def get_segmentation_dataset(name, **kwargs):
+def get_dataset(name, **kwargs):
     return datasets[name.lower()](**kwargs)
+
+def _make_deprecate(meth, old_name):
+    new_name = meth.__name__
+
+    def deprecated_init(*args, **kwargs):
+        warnings.warn("encoding.dataset.{} is now deprecated in favor of encoding.dataset.{}."
+                      .format(old_name, new_name), EncodingDeprecationWarning)
+        return meth(*args, **kwargs)
+
+    deprecated_init.__doc__ = r"""
+    {old_name}(...)
+    .. warning::
+        This method is now deprecated in favor of :func:`torch.nn.init.{new_name}`.
+    See :func:`~torch.nn.init.{new_name}` for details.""".format(
+        old_name=old_name, new_name=new_name)
+    deprecated_init.__name__ = old_name
+    return deprecated_init
+
+get_segmentation_dataset = _make_deprecate(get_dataset, 'get_segmentation_dataset')
diff --git a/encoding/datasets/ade20k.py b/encoding/datasets/ade20k.py
@@ -57,6 +57,39 @@ def __getitem__(self, index):
             mask = self.target_transform(mask)
         return img, mask
 
+    def _sync_transform(self, img, mask):
+        # random mirror
+        if random.random() < 0.5:
+            img = img.transpose(Image.FLIP_LEFT_RIGHT)
+            mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
+        crop_size = self.crop_size
+        w, h = img.size
+        long_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.5))
+        if h > w:
+            oh = long_size
+            ow = int(1.0 * w * long_size / h + 0.5)
+            short_size = ow
+        else:
+            ow = long_size
+            oh = int(1.0 * h * long_size / w + 0.5)
+            short_size = oh
+        img = img.resize((ow, oh), Image.BILINEAR)
+        mask = mask.resize((ow, oh), Image.NEAREST)
+        # pad crop
+        if short_size < crop_size:
+            padh = crop_size - oh if oh < crop_size else 0
+            padw = crop_size - ow if ow < crop_size else 0
+            img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
+            mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=0)
+        # random crop crop_size
+        w, h = img.size
+        x1 = random.randint(0, w - crop_size)
+        y1 = random.randint(0, h - crop_size)
+        img = img.crop((x1, y1, x1+crop_size, y1+crop_size))
+        mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size))
+        # final transform
+        return img, self._mask_transform(mask)
+
     def _mask_transform(self, mask):
         target = np.array(mask).astype('int64') - 1
         return torch.from_numpy(target)

diff --git a/encoding/datasets/base.py b/encoding/datasets/base.py
@@ -67,15 +67,16 @@ def _sync_transform(self, img, mask):
             img = img.transpose(Image.FLIP_LEFT_RIGHT)
             mask = mask.transpose(Image.FLIP_LEFT_RIGHT)
         crop_size = self.crop_size
-        # random scale (short edge from 480 to 720)
-        short_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0))
         w, h = img.size
+        long_size = random.randint(int(self.base_size*0.5), int(self.base_size*2.0))
         if h > w:
-            ow = short_size
-            oh = int(1.0 * h * ow / w)
+            oh = long_size
+            ow = int(1.0 * w * long_size / h + 0.5)
+            short_size = ow
         else:
-            oh = short_size
-            ow = int(1.0 * w * oh / h)
+            ow = long_size
+            oh = int(1.0 * h * long_size / w + 0.5)
+            short_size = oh
         img = img.resize((ow, oh), Image.BILINEAR)
         mask = mask.resize((ow, oh), Image.NEAREST)
         # pad crop
@@ -90,10 +91,6 @@ def _sync_transform(self, img, mask):
         y1 = random.randint(0, h - crop_size)
         img = img.crop((x1, y1, x1+crop_size, y1+crop_size))
         mask = mask.crop((x1, y1, x1+crop_size, y1+crop_size))
-        # gaussian blur as in PSP
-        if random.random() < 0.5:
-            img = img.filter(ImageFilter.GaussianBlur(
-                radius=random.random()))
         # final transform
         return img, self._mask_transform(mask)