-
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
05ee8bb
commit 08579bb
Showing
13 changed files
with
8,067 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
__pycache__ | ||
log/ | ||
cache/ | ||
slurm/ | ||
data/ | ||
Datasets | ||
test/ | ||
.vscode | ||
scripts/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,66 +1,76 @@ | ||
# Self-Supervised Mesh Reconstruction | ||
This is the source code of Self-Supervised Mesh Reconstruction. | ||
### We are upgrading the dependent kaolin version from v0.1 to v0.9. | ||
### The new version will be released soon (1-2 weeks). | ||
This is the source code of [Self-Supervised 3D Mesh Reconstruction From Single Images](https://openaccess.thecvf.com/content/CVPR2021/papers/Hu_Self-Supervised_3D_Mesh_Reconstruction_From_Single_Images_CVPR_2021_paper.pdf). | ||
|
||
### Requirements | ||
- Linux | ||
- Python == 3.6 | ||
- Python >= 3.6 | ||
- CUDA >= 10.0.130 (with `nvcc` installed) | ||
- Display Driver >= 410.48 | ||
|
||
Windows support is in the works and is currently considered experimental. | ||
|
||
## Installation | ||
#### Create Environment | ||
```sh | ||
$ conda create --name kaolin python=3.6 | ||
$ conda activate kaolin | ||
``` | ||
|
||
#### Pytorch | ||
```sh | ||
$ conda install pytorch==1.2.0 torchvision==0.4.0 cudatoolkit=10.0 -c pytorch | ||
``` | ||
#### Kaolin Library | ||
```sh | ||
$ git clone https://github.com/NVIDIAGameWorks/kaolin.git | ||
$ python setup.py build_ext --inplace | ||
$ python setup.py install | ||
$ conda create --name smr python=3.7 | ||
$ conda activate smr | ||
``` | ||
|
||
#### OpenCV | ||
#### Install from requirements.txt | ||
You can directly install the requirements through: | ||
```sh | ||
$ conda install -c menpo opencv | ||
$ pip install -r requirements.txt | ||
``` | ||
|
||
#### Others | ||
- tqdm | ||
- tensorboardX | ||
- pillow == 6.0.0 | ||
- numpy >= 1.17 | ||
Or you can install the required packages seperately | ||
* Pytorch | ||
```sh | ||
$ conda install pytorch==1.6.0 torchvision==0.7.0 cudatoolkit=10.2 -c pytorch | ||
``` | ||
* Kaolin Library | ||
```sh | ||
$ git clone --recursive https://github.com/NVIDIAGameWorks/kaolin | ||
$ git checkout v0.9.0 | ||
$ python setup.py develop | ||
``` | ||
|
||
* OpenCV and other requirements | ||
```sh | ||
$ conda install -c menpo opencv | ||
``` | ||
|
||
## Training | ||
#### Training Bird on two GPU Cards | ||
```sh | ||
$ python train_bird.py --batchSize 96 \ | ||
--dataroot /mnt/proj59/taohu/share/Program/Data/Bird/Crop_Seg_Images | ||
``` | ||
## Training and Testing | ||
#### Training and Testing On the CUB-200-2011 (Bird) dataset | ||
|
||
#### Training Bird on two GPU Cards with 256 x 256 resolution | ||
```sh | ||
$ python train_resnet18_256.py --batchSize 16 \ | ||
--imageSize 256 \ | ||
--dataroot /mnt/proj59/taohu/share/Program/Data/Bird/Crop_Seg_Images | ||
``` | ||
* Dataset | ||
|
||
Download the processed data from [Google Drive](). | ||
* Run | ||
```sh | ||
DATA_ROOT=/path/to/Bird/Crop_Seg_Images | ||
$ python train.py --imageSize 128 \ | ||
--batchSize 24 \ | ||
--lr 0.0001 \ | ||
--niter 500 \ | ||
--dataroot $DATA_ROOT \ | ||
--template_path ./template/sphere.obj \ | ||
--outf ./log/Bird/SMR \ | ||
--azi_scope 360 \ | ||
--elev_range '0~30' \ | ||
--dist_range '2~6' \ | ||
--lambda_gan 0.0001 \ | ||
--lambda_reg 1.0 \ | ||
--lambda_data 1.0 \ | ||
--lambda_ic 0.1 \ | ||
--lambda_lc 0.001 | ||
``` | ||
|
||
#### Training ShapeNet on two GPU Cards | ||
```sh | ||
$ python train_shapenet.py --categories car \ | ||
--batchSize 96 \ | ||
--dataroot /mnt/proj59/taohu/share/Program/Data/ShapeNet/ShapeNetRendering | ||
``` | ||
## Contact | ||
Tao Hu - [taohu@cse.cuhk.edu.hk](taohu@cse.cuhk.edu.hk) | ||
|
||
## Author | ||
Tao Hu - taohu@cse.cuhk.edu.hk | ||
## Citation | ||
@InProceedings{Hu_2021_CVPR, | ||
author = {Hu, Tao and Wang, Liwei and Xu, Xiaogang and Liu, Shu and Jia, Jiaya}, | ||
title = {Self-Supervised 3D Mesh Reconstruction From Single Images}, | ||
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, | ||
month = {June}, | ||
year = {2021}, | ||
pages = {6002-6011} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
""" | ||
Copyright (C) 2019 NVIDIA Corporation. All rights reserved. | ||
Licensed under the CC BY-NC-SA 4.0 license | ||
(https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode). | ||
""" | ||
import os.path | ||
from PIL import Image | ||
import glob | ||
|
||
import torch.utils.data as data | ||
import torch | ||
import torchvision | ||
import numpy as np | ||
import random | ||
from PIL import ImageFilter, ImageOps | ||
|
||
|
||
def default_loader(path): | ||
return Image.open(path).convert('RGB') | ||
|
||
|
||
class Dataset(data.Dataset): | ||
def __init__(self, root, image_size, transform=None, loader=default_loader, train=True, return_paths=False): | ||
super(Dataset, self).__init__() | ||
self.root = root | ||
if train: | ||
self.im_list = glob.glob(os.path.join(self.root, 'train', '*/*.jpg')) | ||
self.class_dir = glob.glob(os.path.join(self.root, 'train', '*')) | ||
else: | ||
self.im_list = glob.glob(os.path.join(self.root, 'test', '*/*.jpg')) | ||
self.class_dir = glob.glob(os.path.join(self.root, 'test', '*')) | ||
|
||
self.transform = transform | ||
self.loader = loader | ||
|
||
self.imgs = [(im_path, self.class_dir.index(os.path.dirname(im_path))) for | ||
im_path in self.im_list] | ||
random.shuffle(self.imgs) | ||
|
||
self.return_paths = return_paths | ||
self.train = train | ||
self.image_size = image_size | ||
|
||
print('Seceed loading dataset!') | ||
|
||
def __getitem__(self, index): | ||
img_path, label = self.imgs[index] | ||
target_height, target_width = self.image_size, self.image_size | ||
|
||
# image and its flipped image | ||
seg_path = img_path.replace('.jpg', '.png') | ||
img = self.loader(img_path) | ||
seg = Image.open(seg_path) | ||
W, H = img.size | ||
|
||
if self.train: | ||
if random.uniform(0, 1) < 0.5: | ||
img = img.transpose(Image.FLIP_LEFT_RIGHT) | ||
seg = seg.transpose(Image.FLIP_LEFT_RIGHT) | ||
|
||
h = random.randint(int(0.90 * H), int(0.99 * H)) | ||
w = random.randint(int(0.90 * W), int(0.99 * W)) | ||
left = random.randint(0, W-w) | ||
upper = random.randint(0, H-h) | ||
right = random.randint(w - left, W) | ||
lower = random.randint(h - upper, H) | ||
img = img.crop((left, upper, right, lower)) | ||
seg = seg.crop((left, upper, right, lower)) | ||
|
||
W, H = img.size | ||
desired_size = max(W, H) | ||
delta_w = desired_size - W | ||
delta_h = desired_size - H | ||
padding = (delta_w//2, delta_h//2, delta_w-(delta_w//2), delta_h-(delta_h//2)) | ||
img = ImageOps.expand(img, padding) | ||
seg = ImageOps.expand(seg, padding) | ||
|
||
img = img.resize((target_height, target_width)) | ||
seg = seg.resize((target_height, target_width)) | ||
seg = seg.point(lambda p: p > 160 and 255) | ||
|
||
edge = seg.filter(ImageFilter.FIND_EDGES) | ||
edge = edge.filter(ImageFilter.SMOOTH_MORE) | ||
edge = edge.point(lambda p: p > 20 and 255) | ||
edge = torchvision.transforms.functional.to_tensor(edge).max(0, True)[0] | ||
|
||
img = torchvision.transforms.functional.to_tensor(img) | ||
seg = torchvision.transforms.functional.to_tensor(seg).max(0, True)[0] | ||
|
||
img = img * seg + torch.ones_like(img) * (1 - seg) | ||
rgbs = torch.cat([img, seg], dim=0) | ||
|
||
data= {'images': rgbs, 'path': img_path, 'label': label, | ||
'edge': edge} | ||
|
||
return {'data': data} | ||
|
||
def __len__(self): | ||
return len(self.imgs) |
Oops, something went wrong.