Fix bugs

ngxbac · Jul 6, 2019 · 8f59f20 · 8f59f20
1 parent a49d90e
commit 8f59f20
Show file tree

Hide file tree

Showing 7 changed files with 51 additions and 45 deletions.
diff --git a/bin/train.sh b/bin/train.sh
@@ -4,7 +4,7 @@ export CUDA_VISIBLE_DEVICES=2,3
 RUN_CONFIG=config.yml
 
 
-LOGDIR=/raid/bac/kaggle/logs/recursion_cell/test/c123_s1_1cycle_adamw_norm_per_channel_smooth/se_resnext50_32x4d/
+LOGDIR=/raid/bac/kaggle/logs/recursion_cell/test/c123_s1_1cycle_adamw_norm_per_channel_smooth_reprocedure/se_resnext50_32x4d/
 catalyst-dl run \
     --config=./configs/${RUN_CONFIG} \
     --logdir=$LOGDIR \

diff --git a/configs/config.yml b/configs/config.yml
@@ -43,7 +43,7 @@ stages:
     train_csv: "./csv/train_0.csv"
     valid_csv: "./csv/valid_0.csv"
     root: "/raid/data/kaggle/recursion-cellular-image-classification/"
-    site: 1
+    sites: [1]
     channels: [1, 2, 3]
 
   stage1:

diff --git a/preprocessing/image_to_arr.py b/preprocessing/image_to_arr.py
@@ -32,7 +32,7 @@ def image_path(dataset,
     address : str
         plate address
     site : int
-        site number
+        sites number
     channel : int
         channel number
     base_path : str

diff --git a/src/dataset.py b/src/dataset.py
@@ -61,7 +61,7 @@ def image_path(dataset,
     address : str
         plate address
     site : int
-        site number
+        sites number
     channel : int
         channel number
     base_path : str
@@ -93,7 +93,7 @@ def image_stats(pixel_stat,
     address : str
         plate address
     site : int
-        site number
+        sites number
     channel : int
         channel number
     base_path : str
@@ -143,7 +143,7 @@ def convert_tensor_to_rgb(t, channels=DEFAULT_CHANNELS, vmax=255, rgb_map=RGB_MA
         See rxrx.io.RGB_MAP to see what the defaults are.
     Returns
     -------
-    np.ndarray the image data of the site as RGB channels
+    np.ndarray the image data of the sites as RGB channels
     """
     colored_channels = []
     for i, channel in enumerate(channels):
@@ -181,10 +181,12 @@ def __init__(self,
                  csv_file,
                  root,
                  transform,
-                 site=1,
+                 sites=[1],
                  mode='train',
                  channels=[1, 2, 3, 4, 5, 6],
                  ):
+        print("Channels ", channels)
+        print("sites ", sites)
         df = pd.read_csv(csv_file, nrows=None)
         self.pixel_stat = pd.read_csv(os.path.join(root, "pixel_stats.csv"))
         self.stat_dict = {}
@@ -208,16 +210,16 @@ def __init__(self,
                 self.stat_dict[experiment][plate][well][site] = {}
 
             if not channel in self.stat_dict[experiment][plate][well][site]:
-                self.stat_dict[experiment][plate][well][channel] = {}
+                self.stat_dict[experiment][plate][well][site][channel] = {}
 
-            self.stat_dict[experiment][plate][well][channel]["mean"] = mean / 255
-            self.stat_dict[experiment][plate][well][channel]["std"] = std / 255
+            self.stat_dict[experiment][plate][well][site][channel]["mean"] = mean / 255
+            self.stat_dict[experiment][plate][well][site][channel]["std"] = std / 255
 
 
         self.transform = transform
         self.mode = mode
         self.channels = channels
-        self.site = site
+        self.sites = sites
 
         self.experiments = df['experiment'].values
         self.plates = df['plate'].values
@@ -239,26 +241,30 @@ def __getitem__(self, idx):
         plate = self.plates[idx]
         well = self.wells[idx]
 
-        channel_paths = [
-            image_path(
-                dataset=self.mode,
-                experiment=experiment,
-                plate=plate,
-                address=well,
-                channel=channel,
-                site=self.site,
-                base_path=self.root,
-            ) for channel in self.channels
-        ]
+        channel_paths = []
+
+        for site in self.sites:
+            for channel in self.channels:
+                path = image_path(
+                    dataset=self.mode,
+                    experiment=experiment,
+                    plate=plate,
+                    address=well,
+                    channel=channel,
+                    site=site,
+                    base_path=self.root,
+                )
+                channel_paths.append(path)
 
         std_arr = []
         mean_arr = []
 
-        for channel in self.channels:
-            mean = self.stat_dict[experiment][plate][well][channel]["mean"]
-            std = self.stat_dict[experiment][plate][well][channel]["std"]
-            std_arr.append(std)
-            mean_arr.append(mean)
+        for site in self.sites:
+            for channel in self.channels:
+                mean = self.stat_dict[experiment][plate][well][site][channel]["mean"]
+                std = self.stat_dict[experiment][plate][well][site][channel]["std"]
+                std_arr.append(std)
+                mean_arr.append(mean)
 
         image = load_images_as_tensor(channel_paths, dtype=np.float32)
         # image = convert_tensor_to_rgb(image)

diff --git a/src/experiment.py b/src/experiment.py
@@ -34,7 +34,7 @@ def get_datasets(self, stage: str, **kwargs):
         image_size = kwargs.get("image_size", 320)
         train_csv = kwargs.get('train_csv', None)
         valid_csv = kwargs.get('valid_csv', None)
-        site = kwargs.get('site', 1)
+        sites = kwargs.get('sites', [1])
         channels = kwargs.get('channels', [1, 2, 3, 4, 5, 6])
         root = kwargs.get('root', None)
 
@@ -45,7 +45,7 @@ def get_datasets(self, stage: str, **kwargs):
                 root=root,
                 transform=transform,
                 mode='train',
-                site=site,
+                sites=sites,
                 channels=channels
             )
             datasets["train"] = train_set
@@ -57,7 +57,7 @@ def get_datasets(self, stage: str, **kwargs):
                 root=root,
                 transform=transform,
                 mode='train',
-                site=site,
+                sites=sites,
                 channels=channels
             )
             datasets["valid"] = valid_set

diff --git a/src/make_submission.py b/src/make_submission.py
@@ -35,15 +35,15 @@ def predict(model, loader):
 def predict_all():
     test_csv = '/raid/data/kaggle/recursion-cellular-image-classification/test.csv'
     # test_csv = './csv/valid_0.csv'
-    log_dir = "/raid/bac/kaggle/logs/recursion_cell/test/c123_s1_1cycle_adamw_norm_per_channel_smooth/se_resnext50_32x4d/"
+    log_dir = "/raid/bac/kaggle/logs/recursion_cell/test/c123_s1_1cycle_adamw_norm_per_channel_smooth_reprocedure/se_resnext50_32x4d/"
     root = "/raid/data/kaggle/recursion-cellular-image-classification/"
-    site = 1
+    sites = [1]
     channels = [1,2,3]
 
     model = cell_senet(
         model_name="se_resnext50_32x4d",
         num_classes=1108,
-        n_channels=len(channels)
+        n_channels=len(channels) * len(sites)
     )
 
     checkpoint = f"{log_dir}/checkpoints/best.pth"
@@ -57,7 +57,7 @@ def predict_all():
         root=root,
         transform=valid_aug(512),
         mode='test',
-        site=site,
+        sites=sites,
         channels=channels
     )
 
@@ -75,8 +75,8 @@ def predict_all():
     submission = df.copy()
     submission['sirna'] = all_preds.astype(int)
     os.makedirs("submission", exist_ok=True)
-    submission.to_csv('./submission/se_resnext50_32x4d_c123_s1_1cycle_adamw_norm_per_channel_smooth.csv', index=False, columns=['id_code', 'sirna'])
-    np.save("./submission/se_resnext50_32x4d_c123_s1_1cycle_adamw_norm_per_channel_smooth.npy", pred)
+    submission.to_csv('./submission/se_resnext50_c123_s1_1cycle_adamw_norm_per_channel_smooth_reprocedure.csv', index=False, columns=['id_code', 'sirna'])
+    np.save("./submission/se_resnext50_c123_s1_1cycle_adamw_norm_per_channel_smooth_reprocedure.npy", pred)
 
 
 if __name__ == '__main__':

diff --git a/src/rxrxio.py b/src/rxrxio.py
@@ -74,7 +74,7 @@ def convert_tensor_to_rgb(t, channels=DEFAULT_CHANNELS, vmax=255, rgb_map=RGB_MA
         See rxrx.io.RGB_MAP to see what the defaults are.
     Returns
     -------
-    np.ndarray the image data of the site as RGB channels
+    np.ndarray the image data of the sites as RGB channels
     """
     colored_channels = []
     for i, channel in enumerate(channels):
@@ -111,7 +111,7 @@ def image_path(dataset,
     address : str
         plate address
     site : int
-        site number
+        sites number
     channel : int
         channel number
     base_path : str
@@ -132,7 +132,7 @@ def load_site(dataset,
               channels=DEFAULT_CHANNELS,
               base_path=DEFAULT_IMAGES_BASE_PATH):
     """
-    Returns the image data of a site
+    Returns the image data of a sites
     Parameters
     ----------
     dataset : str
@@ -144,14 +144,14 @@ def load_site(dataset,
     address : str
         plate address
     site : int
-        site number
+        sites number
     channels : list of int
         channels to include
     base_path : str
         the base path of the raw images
     Returns
     -------
-    np.ndarray the image data of the site
+    np.ndarray the image data of the sites
     """
     channel_paths = [
         image_path(
@@ -182,7 +182,7 @@ def load_site_as_rgb(dataset,
     address : str
         plate address
     site : int
-        site number
+        sites number
     channels : list of int
         channels to include
     base_path : str
@@ -192,7 +192,7 @@ def load_site_as_rgb(dataset,
         See rxrx.io.RGB_MAP to see what the defaults are.
     Returns
     -------
-    np.ndarray the image data of the site as RGB channels
+    np.ndarray the image data of the sites as RGB channels
     """
     x = load_site(dataset, experiment, plate, well, site, channels, base_path)
     return convert_tensor_to_rgb(x, channels, rgb_map=rgb_map)
@@ -215,10 +215,10 @@ def _load_dataset(base_path, dataset, include_controls=True):
     dfs = []
     for site in (1, 2):
         df = df.copy()
-        df['site'] = site
+        df['sites'] = site
         dfs.append(df)
     res = pd.concat(dfs).sort_values(
-        by=['id_code', 'site']).set_index('id_code')
+        by=['id_code', 'sites']).set_index('id_code')
     return res