Add CI, make package portable, docstring for MTCNN

sdw8855 · May 29, 2019 · 3141c5e · 3141c5e
1 parent 28f52da
commit 3141c5e
Show file tree

Hide file tree

Showing 9 changed files with 77 additions and 20 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,10 @@
+dist: xenial
+language: python
+python:
+  - "3.5"
+  - "3.6"
+  - "3.7"
+install:
+  - pip install -r requirements.txt
+script:
+  - python test.py
diff --git a/README.md b/README.md
@@ -20,19 +20,19 @@ The following models have been ported to pytorch (with links to download pytorch
 There is no need to manually download the pretrained `state_dict`'s; they are downloaded automatically on model instantiation. To use an Inception Resnet (V1) model for facial recognition/identification in pytorch, use:
 
 ```
-from models.inception_resnet_v1 import InceptionResNetV1
+from models.inception_resnet_v1 import InceptionResnetV1
 
 # For a model pretrained on VGGFace2
-model = InceptionResNetV1(pretrained='vggface2')
+model = InceptionResnetV1(pretrained='vggface2').eval()
 
 # For a model pretrained on CASIA-Webface
-model = InceptionResNetV1(pretrained='casia-webface')
+model = InceptionResnetV1(pretrained='casia-webface').eval()
 
 # For an untrained model
-model = InceptionResNetV1()
+model = InceptionResnetV1().eval()
 
 # For an untrained 1001-class classifier
-model = InceptionResNetV1(classify=True, num_classes=1001)
+model = InceptionResnetV1(classify=True, num_classes=1001).eval()
 ```
 
 By default, the above models will return 512-dimensional embeddings of images. To enable classification instead, either pass `classify=True` to the model constructor, or you can set the object attribute afterwards with `model.classify = True`. For VGGFace2, the pretrained model will output probability vectors of length 8631, and for CASIA-Webface probability vectors of length 10575.
@@ -49,7 +49,6 @@ from torch.utils.data import DataLoader
 from torchvision import transforms, datasets
 import numpy as np
 import pandas as pd
-from tqdm import tqdm
 
 from models.mtcnn import MTCNN
 from models.inception_resnet_v1 import InceptionResNetV1
@@ -94,6 +93,21 @@ print(pd.DataFrame(dists, columns=names, index=names))
 
 ```
 
+## Use this repo in your own project
+
+To use pretrained MTCNN and Inception Resnet V1 models in your own project, I recommend first adding this repo as a submodule. Note that the dash ('-') in the repo name should be removed when cloning as a submodule as it will break python when importing:
+
+`git submodule add https://github.com/timesler/facenet-pytorch.git facenet_pytorch`
+
+Models can then be instantiated simply with the following:
+
+```
+import facenet_pytorch as fp
+
+mtcnn = fp.MTCNN()
+resnet = fp.InceptionResnetV1(pretrained='vggface2').eval()
+```
+
 ## Conversion of parameters from Tensorflow to Pytorch
 
 See: [models/tensorflow2pytorch.py](models/tensorflow2pytorch.py)

diff --git a/__init__.py b/__init__.py
@@ -0,0 +1,2 @@
+from .models.inception_resnet_v1 import InceptionResnetV1
+from .models.mtcnn import MTCNN, PNet, RNet, ONet
diff --git a/example.py b/example.py
@@ -3,10 +3,9 @@
 from torchvision import transforms, datasets
 import numpy as np
 import pandas as pd
-from tqdm import tqdm
 
 from models.mtcnn import MTCNN
-from models.inception_resnet_v1 import InceptionResNetV1
+from models.inception_resnet_v1 import InceptionResnetV1
 
 # Define MTCNN module
 # Default params shown for illustration, but not needed
@@ -17,7 +16,7 @@
 
 # Define Inception Resnet V1 module
 # Set classify=True for pretrained classifier
-resnet = InceptionResNetV1(pretrained='vggface2').eval()
+resnet = InceptionResnetV1(pretrained='vggface2').eval()
 
 # Define a dataset and data loader
 trans = transforms.Compose([
@@ -33,7 +32,7 @@
 # Perfom MTCNN facial detection
 aligned = []
 names = []
-for x, y in tqdm(loader):
+for x, y in loader:
     x_aligned = mtcnn(x[0])
     aligned.append(x_aligned)
     names.append(dataset.idx_to_class[y[0].item()])

diff --git a/models/inception_resnet_v1.py b/models/inception_resnet_v1.py
@@ -177,7 +177,7 @@ def forward(self, x):
         return out
 
 
-class InceptionResNetV1(nn.Module):
+class InceptionResnetV1(nn.Module):
     """Inception Resnet V1 model with optional loading of pretrained weights.
 
     Model parameters can be loaded based on pretraining on the VGGFace2 or CASIA-Webface

diff --git a/models/mtcnn.py b/models/mtcnn.py
@@ -7,10 +7,15 @@
 from PIL import Image
 import tensorflow as tf
 
-from dependencies.utils.detect_face import detect_face
+from .utils.detect_face import detect_face
 
 
 class PNet(nn.Module):
+    """MTCNN PNet.
+    
+    Keyword Arguments:
+        pretrained {bool} -- Whether or not to load saved pretrained weights (default: {True})
+    """
 
     def __init__(self, pretrained=True):
         super().__init__()
@@ -54,6 +59,11 @@ def forward(self, x):
 
 
 class RNet(nn.Module):
+    """MTCNN RNet.
+    
+    Keyword Arguments:
+        pretrained {bool} -- Whether or not to load saved pretrained weights (default: {True})
+    """
 
     def __init__(self, pretrained=True):
         super().__init__()
@@ -104,6 +114,11 @@ def forward(self, x):
 
 
 class ONet(nn.Module):
+    """MTCNN ONet.
+    
+    Keyword Arguments:
+        pretrained {bool} -- Whether or not to load saved pretrained weights (default: {True})
+    """
 
     def __init__(self, pretrained=True):
         super().__init__()
@@ -162,6 +177,23 @@ def forward(self, x):
 
 
 class MTCNN(nn.Module):
+    """Complete MTCNN face detection module.
+
+    This class loads pretrained P-, R-, and O-nets and, given raw input images as tensors,
+    returns images cropped to include the face only. Cropped faces can optionally be saved also.
+    
+    Keyword Arguments:
+        image_size {int} -- Output image size in pixels. The image will be square. (default: {160})
+        margin {int} -- Margin to add to bounding box, in terms of pixels in the original image. (default: {0})
+        min_face_size {int} -- Minimum face size to search for. (default: {20})
+        thresholds {list} -- MTCNN face detection thresholds (default: {[0.6, 0.7, 0.7]})
+        factor {float} -- Factor used to create a scaling pyramid of face sizes. (default: {0.709})
+        prewhiten {bool} -- Whether or not to prewhiten images before returning. (default: {True})
+    
+    Returns:
+        Union[torch.Tensor, (torch.tensor, torch.Tensor)]  -- If detected, cropped image of a single face with
+            dimensions 3 x image_size x image_size. Optionally, the probability that a face was detected.
+    """
 
     def __init__(
         self, image_size=160, margin=0, min_face_size=20,

diff --git a/dependencies/utils/detect_face.py → models/utils/detect_face.py b/dependencies/utils/detect_face.py → models/utils/detect_face.py
diff --git a/tensorflow2pytorch.py b/tensorflow2pytorch.py
@@ -3,11 +3,11 @@
 import json
 import os, sys
 
-from dependencies.facenet.src import facenet
-from dependencies.facenet.src.models import inception_resnet_v1 as tf_mdl
-from dependencies.facenet.src.align import detect_face
+from .dependencies.facenet.src import facenet
+from .dependencies.facenet.src.models import inception_resnet_v1 as tf_mdl
+from .dependencies.facenet.src.align import detect_face
 
-from models.inception_resnet_v1 import InceptionResNetV1
+from models.inception_resnet_v1 import InceptionResnetV1
 from models.mtcnn import PNet, RNet, ONet
 
 
@@ -299,7 +299,7 @@ def tensorflow2pytorch():
     }
 
     print('\nLoad VGGFace2-trained weights and save\n')
-    mdl = InceptionResNetV1(num_classes=8631).eval()
+    mdl = InceptionResnetV1(num_classes=8631).eval()
     tf_mdl_dir = 'data/20180402-114759'
     data_name = 'vggface2'
     load_tf_model_weights(mdl, lookup_inception_resnet_v1, tf_mdl_dir)
@@ -317,7 +317,7 @@ def tensorflow2pytorch():
     torch.save(state_dict, f'{tf_mdl_dir}-{data_name}-features.pt')
 
     print('\nLoad CASIA-Webface-trained weights and save\n')
-    mdl = InceptionResNetV1(num_classes=10575).eval()
+    mdl = InceptionResnetV1(num_classes=10575).eval()
     tf_mdl_dir = 'data/20180408-102900'
     data_name = 'casia-webface'
     load_tf_model_weights(mdl, lookup_inception_resnet_v1, tf_mdl_dir)

diff --git a/test.py b/test.py
@@ -7,7 +7,7 @@
 from time import time
 
 from models.mtcnn import MTCNN
-from models.inception_resnet_v1 import InceptionResNetV1
+from models.inception_resnet_v1 import InceptionResnetV1
 
 trans = transforms.Compose([
     transforms.Resize(512),
@@ -23,7 +23,7 @@ def get_image(path, trans):
 
 
 mtcnn_pt = MTCNN()
-resnet_pt = InceptionResNetV1(pretrained='vggface2').eval()
+resnet_pt = InceptionResnetV1(pretrained='vggface2').eval()
 
 names = ['bradley_cooper', 'shea_whigham', 'paul_rudd', 'kate_siegel', 'angelina_jolie']
 aligned = []