From 0b4f59a3cfdcb4cc117958c245c37b2ee0613394 Mon Sep 17 00:00:00 2001
From: mchong6 <mchong6@illinois.edu>
Date: Mon, 1 Nov 2021 22:54:48 -0500
Subject: [PATCH] update

---
 infinity.ipynb |  12 ++++++
 model.py       | 104 ++++---------------------------------------------
 util.py        |  26 ++++++++++++-
 3 files changed, 43 insertions(+), 99 deletions(-)

diff --git a/infinity.ipynb b/infinity.ipynb
index 0acc4d6..84a6020 100644
--- a/infinity.ipynb
+++ b/infinity.ipynb
@@ -1649,6 +1649,18 @@
    "display_name": "Python 3",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.8"
   }
  },
  "nbformat": 4,
diff --git a/model.py b/model.py
index d83787f..9c28e8a 100755
--- a/model.py
+++ b/model.py
@@ -538,101 +538,6 @@ def get_latent(self, input, is_latent=False, truncation=1, mean_latent=None):
             
         return output
 
-    def blend_bbox(self, latent1, latent2, coord):
-        def get_bbox_from_mask(img):
-            img = img[0,0]
-            a = torch.where(img != 0)
-            
-            y = torch.min(a[0])
-            x = torch.min(a[1])
-            h =  torch.max(a[0]) - y
-            w = torch.max(a[1]) - x
-            
-            return (y,x,h,w)
-
-        noise = [getattr(self.noises, f'noise_{i}') for i in range(self.num_layers)]
-
-        coord = coord.astype('uint8')
-        x1, y1, w1, h1 = coord[0]
-        x2, y2, w2, h2 = coord[1]
-        h = max(h1, h2)
-        w = max(w1, w2)
-
-        mask1 = torch.zeros([1,1,256,256]).cuda()
-        mask1[..., y1:y1+h, x1:x1+w] = 1
-        mask1 = k.gaussian_blur2d(mask1, (21,21), sigma=(10,10))
-
-        mask2 = torch.zeros([1,1,256,256]).cuda()
-        mask2[..., y2:y2+h, x2:x2+w] = 1
-        mask2 = k.gaussian_blur2d(mask2, (21,21), sigma=(10,10))
-
-        out = self.input(latent1[0])
-        out1, _ = self.conv1(out, latent1[0], noise=noise[0])
-        out2, _ = self.conv1(out, latent2[0], noise=noise[0])
-        alpha1 = F.interpolate(mask1, size=out1.size()[2:], mode='bilinear')
-        alpha2 = F.interpolate(mask2, size=out1.size()[2:], mode='bilinear')
-        bbox1 = get_bbox_from_mask(alpha1)
-        bbox2 = get_bbox_from_mask(alpha2)
-        h = max(bbox1[2], bbox2[2])
-        w = max(bbox1[3], bbox2[3])
-        out = (1-alpha1)*out1
-        out[..., bbox1[0]:bbox1[0]+h, bbox1[1]:bbox1[1]+w] += (alpha2*out2)[..., bbox2[0]:bbox2[0]+h, bbox2[1]:bbox2[1]+w]
-
-
-        skip1 = self.to_rgb1(out, latent1[1])
-        skip2 = self.to_rgb1(out, latent2[1])
-        alpha1 = F.interpolate(mask1, size=skip1.size()[2:], mode='bilinear')
-        alpha2 = F.interpolate(mask2, size=skip1.size()[2:], mode='bilinear')
-        bbox1 = get_bbox_from_mask(alpha1)
-        bbox2 = get_bbox_from_mask(alpha2)
-        h = max(bbox1[2], bbox2[2])
-        w = max(bbox1[3], bbox2[3])
-        skip = (1-alpha1)*skip1
-        skip[..., bbox1[0]:bbox1[0]+h, bbox1[1]:bbox1[1]+w] += (alpha2*skip2)[..., bbox2[0]:bbox2[0]+h, bbox2[1]:bbox2[1]+w]
-
-        i = 2
-        for conv1, conv2, noise1, noise2, to_rgb in zip(
-            self.convs[::2], self.convs[1::2], noise[1::2], noise[2::2], self.to_rgbs
-        ):
-
-            out1, _ = conv1(out, latent1[i], noise=noise1)
-            out2, _ = conv1(out, latent2[i], noise=noise1)
-            alpha1 = F.interpolate(mask1, size=out1.size()[2:], mode='bilinear')
-            alpha2 = F.interpolate(mask2, size=out1.size()[2:], mode='bilinear')
-            bbox1 = get_bbox_from_mask(alpha1)
-            bbox2 = get_bbox_from_mask(alpha2)
-            h = max(bbox1[2], bbox2[2])
-            w = max(bbox1[3], bbox2[3])
-            out = (1-alpha1)*out1
-            out[..., bbox1[0]:bbox1[0]+h, bbox1[1]:bbox1[1]+w] += (alpha2*out2)[..., bbox2[0]:bbox2[0]+h, bbox2[1]:bbox2[1]+w]
-
-            out1, _ = conv2(out, latent1[i+1], noise=noise2)
-            out2, _ = conv2(out, latent2[i+1], noise=noise2)
-            alpha1 = F.interpolate(mask1, size=out1.size()[2:], mode='bilinear')
-            alpha2 = F.interpolate(mask2, size=out1.size()[2:], mode='bilinear')
-            bbox1 = get_bbox_from_mask(alpha1)
-            bbox2 = get_bbox_from_mask(alpha2)
-            h = max(bbox1[2], bbox2[2])
-            w = max(bbox1[3], bbox2[3])
-            out = (1-alpha1)*out1
-            out[..., bbox1[0]:bbox1[0]+h, bbox1[1]:bbox1[1]+w] += (alpha2*out2)[..., bbox2[0]:bbox2[0]+h, bbox2[1]:bbox2[1]+w]
-
-            skip1 = to_rgb(out, latent1[i+2], skip)
-            skip2 = to_rgb(out, latent2[i+2], skip)
-            alpha1 = F.interpolate(mask1, size=skip1.size()[2:], mode='bilinear')
-            alpha2 = F.interpolate(mask2, size=skip1.size()[2:], mode='bilinear')
-            bbox1 = get_bbox_from_mask(alpha1)
-            bbox2 = get_bbox_from_mask(alpha2)
-            h = max(bbox1[2], bbox2[2])
-            w = max(bbox1[3], bbox2[3])
-            skip = (1-alpha1)*skip1
-            skip[..., bbox1[0]:bbox1[0]+h, bbox1[1]:bbox1[1]+w] += (alpha2*skip2)[..., bbox2[0]:bbox2[0]+h, bbox2[1]:bbox2[1]+w]
-
-            i += 3
-
-        image = skip.clamp(-1,1)
-        return image
-
     def patch_swap(self, latent1, latent2, coord, swap=True):
         noise = [getattr(self.noises, f'noise_{i}') for i in range(self.num_layers)]
 
@@ -731,10 +636,15 @@ def singan(self, latent, mode):
 
 
 
-    def blend_mask(self, latent1, latent2, coord, num_blend=99, pose_align=False, pose_num=4):
+    def blend_bbox(self, latent1, latent2, coord, model_type, num_blend=99):
         noise = [getattr(self.noises, f'noise_{i}') for i in range(self.num_layers)]
 
-        coord = coord.astype('uint8')
+        if mode_type == 'face':
+            pose_align = True
+            pose_num = 4
+        else:
+            pose_align = False
+
         x, y, w, h = coord[0]
 
         mask = torch.zeros([1,1,256,256]).cuda()
diff --git a/util.py b/util.py
index 2e5cb04..79092fa 100644
--- a/util.py
+++ b/util.py
@@ -9,7 +9,7 @@
 import math
 import scipy
 import scipy.ndimage
-
+import torchvision
 
 # Number of style channels per StyleGAN layer
 style2list_len = [512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 
@@ -19,12 +19,22 @@
 rgb_layer_idx = [1,4,7,10,13,16,19,22,25]
 
 google_drive_paths = {
-    "stylegan2-church-config-f.pt": "https://drive.google.com/uc?id=1ORsZHZEeFNEX9HtqRutt1jMgrf5Gpcat",
+    "church.pt": "https://drive.google.com/uc?id=1ORsZHZEeFNEX9HtqRutt1jMgrf5Gpcat",
+    "face.pt": "https://drive.google.com/uc?id=1dOBo4xWUwM7-BwHWZgp-kV1upaD6tHAh",
+    "landscape.pt": "https://drive.google.com/uc?id=1rN5EhwiY95BBNPvOezhX4SZ_tEOR0qe2",
+    "disney.pt": "https://drive.google.com/uc?id=1n2uQ5s2XdUBGIcZA9Uabz1mkjVvKWFeG",
+    "010000.pt": "https://drive.google.com/uc?id=1hOq8zx0wVS3zqdfASXhzFre7DPi7Sel_",
     "model_ir_se50.pt": "https://drive.google.com/uc?id=1KW7bjndL3QG3sxBbZxreGHigcCCpsDgn",
     "dlibshape_predictor_68_face_landmarks.dat": "https://drive.google.com/uc?id=11BDmNKS1zxSZxkgsEvQoKgFd8J264jKp",
     "e4e_ffhq_encode.pt": "https://drive.google.com/uc?id=1cUv_reLE6k3604or78EranS7XzuVMWeO"
 }
 
+@torch.no_grad()
+def load_model(generator, model_file_path):
+    ensure_checkpoint_exists(model_file_path)
+    ckpt = torch.load(model_file_path, map_location=lambda storage, loc: storage)
+    generator.load_state_dict(ckpt["g_ema"], strict=False)
+    return generator.mean_latent(50000)
 
 def ensure_checkpoint_exists(model_weights_filename):
     if not os.path.isfile(model_weights_filename) and (
@@ -330,3 +340,15 @@ def align_face(filepath, output_size=512):
     # Return aligned image.
     return img
 
+def normalize(x):
+    return (x+1)/2
+
+def tensor2bbox_im(x):
+    return np.array(torchvision.transforms.functional.to_pil_image(normalize(x[0])))
+
+def prepare_bbox(boxes):
+    output = []
+    for i in range(len(boxes)):
+        y1,x1,y2,x2 = boxes[i][0]
+        output.append((256*np.array([x1,y1, x2-x1, y2-y1])).astype(np.uint8))
+    return output