init

layumi · Aug 16, 2021 · 08579bb · 08579bb
1 parent 05ee8bb
commit 08579bb
Show file tree

Hide file tree

Showing 13 changed files with 8,067 additions and 47 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,9 @@
+__pycache__
+log/
+cache/
+slurm/
+data/
+Datasets
+test/
+.vscode
+scripts/
diff --git a/README.md b/README.md
@@ -1,66 +1,76 @@
 # Self-Supervised Mesh Reconstruction
-This is the source code of Self-Supervised Mesh Reconstruction.
-### We are upgrading the dependent kaolin version from v0.1 to v0.9.
-### The new version will be released soon (1-2 weeks).
+This is the source code of [Self-Supervised 3D Mesh Reconstruction From Single Images](https://openaccess.thecvf.com/content/CVPR2021/papers/Hu_Self-Supervised_3D_Mesh_Reconstruction_From_Single_Images_CVPR_2021_paper.pdf).
 
 ### Requirements
 - Linux
-- Python == 3.6
+- Python >= 3.6
 - CUDA >= 10.0.130 (with `nvcc` installed)
-- Display Driver >= 410.48
-
-Windows support is in the works and is currently considered experimental.
 
 ## Installation
 #### Create Environment
 ```sh
-$ conda create --name kaolin python=3.6
-$ conda activate kaolin
-```
-
-#### Pytorch
-```sh
-$ conda install pytorch==1.2.0 torchvision==0.4.0 cudatoolkit=10.0 -c pytorch
-```
-#### Kaolin Library
-```sh
-$ git clone https://github.com/NVIDIAGameWorks/kaolin.git
-$ python setup.py build_ext --inplace
-$ python setup.py install
+$ conda create --name smr python=3.7
+$ conda activate smr
 ```
 
-#### OpenCV
+#### Install from requirements.txt
+You can directly install the requirements through:
 ```sh
-$ conda install -c menpo opencv
+$ pip install -r requirements.txt
 ```
 
-#### Others
-- tqdm
-- tensorboardX
-- pillow == 6.0.0
-- numpy >= 1.17
+Or you can install the required packages seperately
+* Pytorch
+    ```sh
+    $ conda install pytorch==1.6.0 torchvision==0.7.0 cudatoolkit=10.2 -c pytorch
+    ```
+* Kaolin Library
+    ```sh
+    $ git clone --recursive https://github.com/NVIDIAGameWorks/kaolin
+    $ git checkout v0.9.0
+    $ python setup.py develop
+    ```
 
+* OpenCV and other requirements
+    ```sh
+    $ conda install -c menpo opencv
+    ```
 
-## Training
-#### Training Bird on two GPU Cards
-```sh
-$ python train_bird.py --batchSize 96 \
-                     --dataroot /mnt/proj59/taohu/share/Program/Data/Bird/Crop_Seg_Images
-```
+## Training and Testing
+#### Training and Testing On the CUB-200-2011 (Bird) dataset
 
-#### Training Bird on two GPU Cards with 256 x 256 resolution
-```sh
-$ python train_resnet18_256.py --batchSize 16 \
-          --imageSize 256 \
-          --dataroot /mnt/proj59/taohu/share/Program/Data/Bird/Crop_Seg_Images
-```
+* Dataset
+
+  Download the processed data from [Google Drive]().
+* Run
+    ```sh
+    DATA_ROOT=/path/to/Bird/Crop_Seg_Images
+    $ python train.py --imageSize 128 \
+                        --batchSize 24 \
+                        --lr 0.0001 \
+                        --niter 500 \
+                        --dataroot $DATA_ROOT \
+                        --template_path ./template/sphere.obj \
+                        --outf ./log/Bird/SMR \
+                        --azi_scope 360 \
+                        --elev_range '0~30' \
+                        --dist_range '2~6' \
+                        --lambda_gan 0.0001 \
+                        --lambda_reg 1.0 \
+                        --lambda_data 1.0 \
+                        --lambda_ic 0.1 \
+                        --lambda_lc 0.001
+    ```
 
-#### Training ShapeNet on two GPU Cards
-```sh
-$ python train_shapenet.py --categories car \
-                         --batchSize 96 \
-                         --dataroot /mnt/proj59/taohu/share/Program/Data/ShapeNet/ShapeNetRendering
-```
+## Contact
+Tao Hu - [taohu@cse.cuhk.edu.hk](taohu@cse.cuhk.edu.hk)
 
-## Author
-Tao Hu - taohu@cse.cuhk.edu.hk
+## Citation
+@InProceedings{Hu_2021_CVPR,
+    author    = {Hu, Tao and Wang, Liwei and Xu, Xiaogang and Liu, Shu and Jia, Jiaya},
+    title     = {Self-Supervised 3D Mesh Reconstruction From Single Images},
+    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+    month     = {June},
+    year      = {2021},
+    pages     = {6002-6011}
+}
diff --git a/datasets/bird.py b/datasets/bird.py
@@ -0,0 +1,99 @@
+"""
+Copyright (C) 2019 NVIDIA Corporation.  All rights reserved.
+Licensed under the CC BY-NC-SA 4.0 license
+(https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode).
+"""
+import os.path
+from PIL import Image
+import glob
+
+import torch.utils.data as data
+import torch
+import torchvision
+import numpy as np
+import random
+from PIL import ImageFilter, ImageOps
+
+
+def default_loader(path):
+    return Image.open(path).convert('RGB')
+
+
+class Dataset(data.Dataset):
+    def __init__(self, root, image_size, transform=None, loader=default_loader, train=True, return_paths=False):
+        super(Dataset, self).__init__()
+        self.root = root
+        if train:
+            self.im_list = glob.glob(os.path.join(self.root, 'train', '*/*.jpg'))
+            self.class_dir = glob.glob(os.path.join(self.root, 'train', '*'))
+        else:
+            self.im_list = glob.glob(os.path.join(self.root, 'test', '*/*.jpg'))
+            self.class_dir = glob.glob(os.path.join(self.root, 'test', '*'))
+
+        self.transform = transform
+        self.loader = loader
+
+        self.imgs = [(im_path, self.class_dir.index(os.path.dirname(im_path))) for
+                     im_path in self.im_list]
+        random.shuffle(self.imgs)
+
+        self.return_paths = return_paths
+        self.train = train
+        self.image_size = image_size
+
+        print('Seceed loading dataset!')
+
+    def __getitem__(self, index):
+        img_path, label = self.imgs[index]
+        target_height, target_width = self.image_size, self.image_size
+
+        # image and its flipped image
+        seg_path = img_path.replace('.jpg', '.png')
+        img = self.loader(img_path)
+        seg = Image.open(seg_path)
+        W, H = img.size
+
+        if self.train:
+            if random.uniform(0, 1) < 0.5:
+                img = img.transpose(Image.FLIP_LEFT_RIGHT)
+                seg = seg.transpose(Image.FLIP_LEFT_RIGHT)
+
+            h = random.randint(int(0.90 * H), int(0.99 * H))
+            w = random.randint(int(0.90 * W), int(0.99 * W))
+            left = random.randint(0, W-w)
+            upper = random.randint(0, H-h)
+            right = random.randint(w - left, W)
+            lower = random.randint(h - upper, H)
+            img = img.crop((left, upper, right, lower))
+            seg = seg.crop((left, upper, right, lower))
+
+        W, H = img.size
+        desired_size = max(W, H)
+        delta_w = desired_size - W
+        delta_h = desired_size - H
+        padding = (delta_w//2, delta_h//2, delta_w-(delta_w//2), delta_h-(delta_h//2))
+        img = ImageOps.expand(img, padding)
+        seg = ImageOps.expand(seg, padding)
+
+        img = img.resize((target_height, target_width))
+        seg = seg.resize((target_height, target_width))
+        seg = seg.point(lambda p: p > 160 and 255)
+
+        edge = seg.filter(ImageFilter.FIND_EDGES)
+        edge = edge.filter(ImageFilter.SMOOTH_MORE)
+        edge = edge.point(lambda p: p > 20 and 255)
+        edge = torchvision.transforms.functional.to_tensor(edge).max(0, True)[0]
+
+        img = torchvision.transforms.functional.to_tensor(img)
+        seg = torchvision.transforms.functional.to_tensor(seg).max(0, True)[0]
+
+        img = img * seg + torch.ones_like(img) * (1 - seg)
+        rgbs = torch.cat([img, seg], dim=0)
+
+        data= {'images': rgbs, 'path': img_path, 'label': label,
+               'edge': edge}
+
+        return {'data': data}
+
+    def __len__(self):
+        return len(self.imgs)