Add DeepLabV3 + ResNeSt-269 (zhanghang1989#263)

leafxx · Apr 25, 2020 · 69ba678 · 69ba678
1 parent 17be9e1
commit 69ba678
Show file tree

Hide file tree

Showing 28 changed files with 525 additions and 709 deletions.
diff --git a/README.md b/README.md
@@ -6,6 +6,8 @@
 [![Build Docs](https://github.com/zhanghang1989/PyTorch-Encoding/workflows/Build%20Docs/badge.svg)](https://github.com/zhanghang1989/PyTorch-Encoding/actions)
 [![Unit Test](https://github.com/zhanghang1989/PyTorch-Encoding/workflows/Unit%20Test/badge.svg)](https://github.com/zhanghang1989/PyTorch-Encoding/actions)
 
+[![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/resnest-split-attention-networks/semantic-segmentation-on-ade20k)](https://paperswithcode.com/sota/semantic-segmentation-on-ade20k?p=resnest-split-attention-networks)
+
 # PyTorch-Encoding
 
 created by [Hang Zhang](http://hangzh.com/)

diff --git a/docs/source/model_zoo/imagenet.rst b/docs/source/model_zoo/imagenet.rst
@@ -71,7 +71,7 @@ Test Pretrained
 - The test script is in the ``experiments/recognition/`` folder. For evaluating the model (using MS),
   for example ``ResNeSt50``::
 
-      python test.py --dataset imagenet --model-zoo ResNeSt50 --crop-size 224 --eval
+      python verify.py --dataset imagenet --model ResNeSt50 --crop-size 224
 
 Train Your Own Model
 --------------------
@@ -81,3 +81,5 @@ Train Your Own Model
     python scripts/prepare_imagenet.py --data-dir ./
 
 - The training script is in the ``experiments/recognition/`` folder. Commands for reproducing pre-trained models can be found in the table.
+
+
diff --git a/docs/source/model_zoo/segmentation.rst b/docs/source/model_zoo/segmentation.rst
@@ -35,23 +35,32 @@ ResNeSt Backbone Models
 ==============================================================================  ==============    ==============    =========================================================================================================
 Model                                                                           pixAcc            mIoU              Command                                                                                      
 ==============================================================================  ==============    ==============    =========================================================================================================
-FCN_ResNeSt50_ADE                                                               xx.xx%            xx.xx%            :raw-html:`<a href="javascript:toggleblock('cmd_fcn_nest50_ade')" class="toggleblock">cmd</a>`
+FCN_ResNeSt50_ADE                                                               80.18%            42.94%            :raw-html:`<a href="javascript:toggleblock('cmd_fcn_nest50_ade')" class="toggleblock">cmd</a>`
 DeepLabV3_ResNeSt50_ADE                                                         81.17%            45.12%            :raw-html:`<a href="javascript:toggleblock('cmd_deeplab_resnest50_ade')" class="toggleblock">cmd</a>`
 DeepLabV3_ResNeSt101_ADE                                                        82.07%            46.91%            :raw-html:`<a href="javascript:toggleblock('cmd_deeplab_resnest101_ade')" class="toggleblock">cmd</a>`
+DeepLabV3_ResNeSt269_ADE                                                        82.62%            47.60%            :raw-html:`<a href="javascript:toggleblock('cmd_deeplab_resnest269_ade')" class="toggleblock">cmd</a>`
 ==============================================================================  ==============    ==============    =========================================================================================================
 
 .. raw:: html
 
     <code xml:space="preserve" id="cmd_fcn_nest50_ade" style="display: none; text-align: left; white-space: pre-wrap">
-    python train.py --dataset ade20k --model fcn  --aux --backbone resnest50 --batch-size 2
+    python train_dist.py --dataset ADE20K --model fcn  --aux --backbone resnest50
+    </code>
+
+    <code xml:space="preserve" id="cmd_enc_nest50_ade" style="display: none; text-align: left; white-space: pre-wrap">
+    python train_dist.py --dataset ADE20K --model EncNet --aux --se-loss --backbone resnest50
     </code>
 
     <code xml:space="preserve" id="cmd_deeplab_resnest50_ade" style="display: none; text-align: left; white-space: pre-wrap">
-    python train.py --dataset ADE20K --model deeplab --aux --backbone resnest50
+    python train_dist.py --dataset ADE20K --model deeplab --aux --backbone resnest50
     </code>
 
     <code xml:space="preserve" id="cmd_deeplab_resnest101_ade" style="display: none; text-align: left; white-space: pre-wrap">
-    python train.py --dataset ADE20K --model deeplab --aux --backbone resnest101
+    python train_dist.py --dataset ADE20K --model deeplab --aux --backbone resnest101
+    </code>
+
+    <code xml:space="preserve" id="cmd_deeplab_resnest269_ade" style="display: none; text-align: left; white-space: pre-wrap">
+    python train_dist.py --dataset ADE20K --model deeplab --aux --backbone resnest269
     </code>
 
 
@@ -73,19 +82,19 @@ EncNet_ResNet101s_ADE
 .. raw:: html
 
     <code xml:space="preserve" id="cmd_fcn50_ade" style="display: none; text-align: left; white-space: pre-wrap">
-    CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model FCN
+    CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dist.py --dataset ADE20K --model FCN
     </code>
 
     <code xml:space="preserve" id="cmd_psp50_ade" style="display: none; text-align: left; white-space: pre-wrap">
-    CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model PSP --aux
+    CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dist.py --dataset ADE20K --model PSP --aux
     </code>
 
     <code xml:space="preserve" id="cmd_enc50_ade" style="display: none; text-align: left; white-space: pre-wrap">
-    CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model EncNet --aux --se-loss
+    CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dist.py --dataset ADE20K --model EncNet --aux --se-loss
     </code>
 
     <code xml:space="preserve" id="cmd_enc101_ade" style="display: none; text-align: left; white-space: pre-wrap">
-    CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset ADE20K --model EncNet --aux --se-loss --backbone resnet101 --base-size 640 --crop-size 576
+    CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dist.py --dataset ADE20K --model EncNet --aux --se-loss --backbone resnet101
     </code>
 
 Pascal Context Dataset
@@ -94,22 +103,22 @@ Pascal Context Dataset
 ==============================================================================  =================    ==============    =============================================================================================
 Model                                                                           pixAcc               mIoU              Command                                                                                      
 ==============================================================================  =================    ==============    =============================================================================================
-Encnet_ResNet50_PContext                                                        79.2%                51.0%             :raw-html:`<a href="javascript:toggleblock('cmd_enc50_pcont')" class="toggleblock">cmd</a>`  
-EncNet_ResNet101_PContext                                                       80.7%                54.1%             :raw-html:`<a href="javascript:toggleblock('cmd_enc101_pcont')" class="toggleblock">cmd</a>` 
+Encnet_ResNet50s_PContext                                                        79.2%                51.0%             :raw-html:`<a href="javascript:toggleblock('cmd_enc50_pcont')" class="toggleblock">cmd</a>`  
+EncNet_ResNet101s_PContext                                                       80.7%                54.1%             :raw-html:`<a href="javascript:toggleblock('cmd_enc101_pcont')" class="toggleblock">cmd</a>` 
 ==============================================================================  =================    ==============    =============================================================================================
 
 .. raw:: html
 
     <code xml:space="preserve" id="cmd_fcn50_pcont" style="display: none; text-align: left; white-space: pre-wrap">
-    CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset PContext --model FCN
+    CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dist.py --dataset PContext --model FCN
     </code>
 
     <code xml:space="preserve" id="cmd_enc50_pcont" style="display: none; text-align: left; white-space: pre-wrap">
-    CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset PContext --model EncNet --aux --se-loss
+    CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dist.py --dataset PContext --model EncNet --aux --se-loss
     </code>
 
     <code xml:space="preserve" id="cmd_enc101_pcont" style="display: none; text-align: left; white-space: pre-wrap">
-    CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset PContext --model EncNet --aux --se-loss --backbone resnet101
+    CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dist.py --dataset PContext --model EncNet --aux --se-loss --backbone resnet101
     </code>
 
 
@@ -127,9 +136,9 @@ EncNet_ResNet101s_VOC
     <code xml:space="preserve" id="cmd_enc101_voc" style="display: none; text-align: left; white-space: pre-wrap">
     # First finetuning COCO dataset pretrained model on augmented set
     # You can also train from scratch on COCO by yourself
-    CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_aug --model-zoo EncNet_Resnet101_COCO --aux --se-loss --lr 0.001 --syncbn --ngpus 4 --checkname res101 --ft
+    CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dist.py --dataset Pascal_aug --model-zoo EncNet_Resnet101_COCO --aux --se-loss --lr 0.001 --syncbn --ngpus 4 --checkname res101 --ft
     # Finetuning on original set
-    CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --dataset Pascal_voc --model encnet --aux  --se-loss --backbone resnet101 --lr 0.0001 --syncbn --ngpus 4 --checkname res101 --resume runs/Pascal_aug/encnet/res101/checkpoint.params --ft
+    CUDA_VISIBLE_DEVICES=0,1,2,3 python train_dist.py --dataset Pascal_voc --model encnet --aux  --se-loss --backbone resnet101 --lr 0.0001 --syncbn --ngpus 4 --checkname res101 --resume runs/Pascal_aug/encnet/res101/checkpoint.params --ft
     </code>
 
 
@@ -146,6 +155,28 @@ Test Pretrained
       python test.py --dataset ADE20K --model-zoo EncNet_ResNet50s_ADE --eval
       # pixAcc: 0.801, mIoU: 0.415: 100%|████████████████████████| 250/250
 
+
+Train Your Own Model
+--------------------
+
+- Prepare the datasets by runing the scripts in the ``scripts/`` folder, for example preparing ``ADE20K`` dataset::
+
+    python scripts/prepare_ade20k.py
+
+- The training script is in the ``experiments/segmentation/`` folder, example training command::
+
+    python train_dist.py --dataset ade20k --model encnet --aux --se-loss
+
+- Detail training options, please run ``python train_dist.py -h``. Commands for reproducing pre-trained models can be found in the table.
+
+.. hint::
+    The validation metrics during the training only using center-crop is just for monitoring the
+    training correctness purpose. For evaluating the pretrained model on validation set using MS,
+    please use the command::
+
+        python test.py --dataset pcontext --model encnet --aux --se-loss --resume mycheckpoint --eval
+
+
 Quick Demo
 ~~~~~~~~~~
 
@@ -155,7 +186,7 @@ Quick Demo
     import encoding
 
     # Get the model
-    model = encoding.models.get_model('Encnet_ResNet50_PContext', pretrained=True).cuda()
+    model = encoding.models.get_model('Encnet_ResNet50s_PContext', pretrained=True).cuda()
     model.eval()
 
     # Prepare the image
@@ -180,30 +211,21 @@ Quick Demo
 .. image:: https://raw.githubusercontent.com/zhanghang1989/image-data/master/encoding/segmentation/pcontext/2010_001829.png
    :width: 45%
 
-Train Your Own Model
---------------------
-
-- Prepare the datasets by runing the scripts in the ``scripts/`` folder, for example preparing ``ADE20K`` dataset::
-
-    python scripts/prepare_ade20k.py
-
-- The training script is in the ``experiments/segmentation/`` folder, example training command::
-
-    python train_dist.py --dataset ade20k --model encnet --aux --se-loss
-
-- Detail training options, please run ``python train.py -h``. Commands for reproducing pre-trained models can be found in the table.
-
-.. hint::
-    The validation metrics during the training only using center-crop is just for monitoring the
-    training correctness purpose. For evaluating the pretrained model on validation set using MS,
-    please use the command::
-
-        python test.py --dataset pcontext --model encnet --aux --se-loss --resume mycheckpoint --eval
 
 Citation
 --------
 
 .. note::
+    * Hang Zhang et al. "ResNeSt: Split-Attention Networks" *arXiv 2020*::
+
+        @article{zhang2020resnest,
+        title={ResNeSt: Split-Attention Networks},
+        author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
+        journal={arXiv preprint arXiv:2004.08955},
+        year={2020}
+        }
+
+
     * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation"  *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*::
 
         @InProceedings{Zhang_2018_CVPR,

diff --git a/docs/source/notes/compile.rst b/docs/source/notes/compile.rst
@@ -44,7 +44,14 @@ Citations
 ---------
 
     .. note::
-        If using the code in your research, please cite our papers.
+        * Hang Zhang et al. "ResNeSt: Split-Attention Networks" *arXiv 2020*::
+
+            @article{zhang2020resnest,
+            title={ResNeSt: Split-Attention Networks},
+            author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
+            journal={arXiv preprint arXiv:2004.08955},
+            year={2020}
+            }
 
         * Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, Amit Agrawal. "Context Encoding for Semantic Segmentation"  *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*::
 

diff --git a/encoding/models/backbone/resnest.py b/encoding/models/backbone/resnest.py
@@ -23,7 +23,7 @@ def resnest50(pretrained=False, root='~/.encoding/models', **kwargs):
                    avd=True, avd_first=False, **kwargs)
     if pretrained:
         model.load_state_dict(torch.load(
-            get_model_file('resnest50', root=root)), strict=False)
+            get_model_file('resnest50', root=root)), strict=True)
     return model
 
 def resnest101(pretrained=False, root='~/.encoding/models', **kwargs):
@@ -33,7 +33,7 @@ def resnest101(pretrained=False, root='~/.encoding/models', **kwargs):
                    avd=True, avd_first=False, **kwargs)
     if pretrained:
         model.load_state_dict(torch.load(
-            get_model_file('resnest101', root=root)), strict=False)
+            get_model_file('resnest101', root=root)), strict=True)
     return model
 
 def resnest200(pretrained=False, root='~/.encoding/models', **kwargs):
@@ -53,7 +53,7 @@ def resnest269(pretrained=False, root='~/.encoding/models', **kwargs):
                    avd=True, avd_first=False, **kwargs)
     if pretrained:
         model.load_state_dict(torch.load(
-            get_model_file('resnest269', root=root)), strict=False)
+            get_model_file('resnest269', root=root)), strict=True)
     return model
 
 def resnest50_fast(pretrained=False, root='~/.encoding/models', **kwargs):
@@ -63,7 +63,7 @@ def resnest50_fast(pretrained=False, root='~/.encoding/models', **kwargs):
                    avd=True, avd_first=True, **kwargs)
     if pretrained:
         model.load_state_dict(torch.load(
-            get_model_file('resnest50fast', root=root)), strict=False)
+            get_model_file('resnest50fast', root=root)), strict=True)
     return model
 
 def resnest101_fast(pretrained=False, root='~/.encoding/models', **kwargs):
@@ -73,5 +73,5 @@ def resnest101_fast(pretrained=False, root='~/.encoding/models', **kwargs):
                    avd=True, avd_first=True, **kwargs)
     if pretrained:
         model.load_state_dict(torch.load(
-            get_model_file('resnest101fast', root=root)), strict=False)
+            get_model_file('resnest101fast', root=root)), strict=True)
     return model
diff --git a/encoding/models/model_store.py b/encoding/models/model_store.py
@@ -13,6 +13,8 @@
     ('966fb78c22323b0c68097c5c1242bd16d3e07fd5', 'resnest101'),
     ('d7fd712f5a1fcee5b3ce176026fbb6d0d278454a', 'resnest200'),
     ('51ae5f19032e22af4ec08e695496547acdba5ce5', 'resnest269'),
+    # rectified	
+    #('9b5dc32b3b36ca1a6b41ecd4906830fc84dae8ed', 'resnet101_rt'),
     # resnet other variants
     ('a75c83cfc89a56a4e8ba71b14f1ec67e923787b3', 'resnet50s'),
     ('03a0f310d6447880f1b22a83bd7d1aa7fc702c6e', 'resnet101s'),
@@ -22,15 +24,17 @@
     ('b41562160173ee2e979b795c551d3c7143b1e5b5', 'wideresnet50'),
     # deepten paper
     ('1225f149519c7a0113c43a056153c1bb15468ac0', 'deepten_resnet50_minc'),
-    # segmentation models
+    # segmentation resnet models
     ('662e979de25a389f11c65e9f1df7e06c2c356381', 'fcn_resnet50s_ade'),
     ('4de91d5922d4d3264f678b663f874da72e82db00', 'encnet_resnet50s_pcontext'),
     ('9f27ea13d514d7010e59988341bcbd4140fcc33d', 'encnet_resnet101s_pcontext'),
     ('07ac287cd77e53ea583f37454e17d30ce1509a4a', 'encnet_resnet50s_ade'),
     ('3f54fa3b67bac7619cd9b3673f5c8227cf8f4718', 'encnet_resnet101s_ade'),
     # resnest segmentation models
+    ('4aba491aaf8e4866a9c9981b210e3e3266ac1f2a', 'fcn_resnest50_ade'),
     ('2225f09d0f40b9a168d9091652194bc35ec2a5a9', 'deeplab_resnest50_ade'),
     ('06ca799c8cc148fe0fafb5b6d052052935aa3cc8', 'deeplab_resnest101_ade'),
+    ('0074dd10a6e6696f6f521653fb98224e75955496', 'deeplab_resnest269_ade'),
     ]}
 
 encoding_repo_url = 'https://hangzh.s3.amazonaws.com/'

diff --git a/encoding/models/model_zoo.py b/encoding/models/model_zoo.py
@@ -29,7 +29,7 @@
     'wideresnet50': wideresnet50,
     # deepten paper
     'deepten_resnet50_minc': get_deepten_resnet50_minc,
-    # segmentation models
+    # segmentation resnet models
     'encnet_resnet101s_coco': get_encnet_resnet101_coco,
     'fcn_resnet50s_pcontext': get_fcn_resnet50_pcontext,
     'encnet_resnet50s_pcontext': get_encnet_resnet50_pcontext,
@@ -38,8 +38,12 @@
     'encnet_resnet101s_ade': get_encnet_resnet101_ade,
     'fcn_resnet50s_ade': get_fcn_resnet50_ade,
     'psp_resnet50s_ade': get_psp_resnet50_ade,
+    # segmentation resnest models
+    'fcn_resnest50_ade': get_fcn_resnest50_ade,
     'deeplab_resnest50_ade': get_deeplab_resnest50_ade,
     'deeplab_resnest101_ade': get_deeplab_resnest101_ade,
+    'deeplab_resnest200_ade': get_deeplab_resnest200_ade,
+    'deeplab_resnest269_ade': get_deeplab_resnest269_ade,
 }
 
 model_list = list(models.keys())
@@ -61,7 +65,6 @@ def get_model(name, **kwargs):
     Module:
         The model.
     """
-
     name = name.lower()
     if name not in models:
         raise ValueError('%s\n\t%s' % (str(name), '\n\t'.join(sorted(models.keys()))))

diff --git a/encoding/models/sseg/atten.py b/encoding/models/sseg/atten.py
@@ -108,7 +108,7 @@ def __init__(self, in_channels, out_channels, norm_layer, up_kwargs,
         if with_enc:
             self.encmodule = EncModule(inter_channels+extended_channels, out_channels, ncodes=32,
                                        se_loss=se_loss, norm_layer=norm_layer)
-        self.conv6 = nn.Sequential(nn.Dropout2d(0.1, False),
+        self.conv6 = nn.Sequential(nn.Dropout(0.1, False),
                                    nn.Conv2d(inter_channels+extended_channels, out_channels, 1))
 
     def forward(self, *inputs):