Skip to content

Commit

Permalink
Fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
ngxbac committed Jul 6, 2019
1 parent a49d90e commit 8f59f20
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 45 deletions.
2 changes: 1 addition & 1 deletion bin/train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export CUDA_VISIBLE_DEVICES=2,3
RUN_CONFIG=config.yml


LOGDIR=/raid/bac/kaggle/logs/recursion_cell/test/c123_s1_1cycle_adamw_norm_per_channel_smooth/se_resnext50_32x4d/
LOGDIR=/raid/bac/kaggle/logs/recursion_cell/test/c123_s1_1cycle_adamw_norm_per_channel_smooth_reprocedure/se_resnext50_32x4d/
catalyst-dl run \
--config=./configs/${RUN_CONFIG} \
--logdir=$LOGDIR \
Expand Down
2 changes: 1 addition & 1 deletion configs/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ stages:
train_csv: "./csv/train_0.csv"
valid_csv: "./csv/valid_0.csv"
root: "/raid/data/kaggle/recursion-cellular-image-classification/"
site: 1
sites: [1]
channels: [1, 2, 3]

stage1:
Expand Down
2 changes: 1 addition & 1 deletion preprocessing/image_to_arr.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def image_path(dataset,
address : str
plate address
site : int
site number
sites number
channel : int
channel number
base_path : str
Expand Down
54 changes: 30 additions & 24 deletions src/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def image_path(dataset,
address : str
plate address
site : int
site number
sites number
channel : int
channel number
base_path : str
Expand Down Expand Up @@ -93,7 +93,7 @@ def image_stats(pixel_stat,
address : str
plate address
site : int
site number
sites number
channel : int
channel number
base_path : str
Expand Down Expand Up @@ -143,7 +143,7 @@ def convert_tensor_to_rgb(t, channels=DEFAULT_CHANNELS, vmax=255, rgb_map=RGB_MA
See rxrx.io.RGB_MAP to see what the defaults are.
Returns
-------
np.ndarray the image data of the site as RGB channels
np.ndarray the image data of the sites as RGB channels
"""
colored_channels = []
for i, channel in enumerate(channels):
Expand Down Expand Up @@ -181,10 +181,12 @@ def __init__(self,
csv_file,
root,
transform,
site=1,
sites=[1],
mode='train',
channels=[1, 2, 3, 4, 5, 6],
):
print("Channels ", channels)
print("sites ", sites)
df = pd.read_csv(csv_file, nrows=None)
self.pixel_stat = pd.read_csv(os.path.join(root, "pixel_stats.csv"))
self.stat_dict = {}
Expand All @@ -208,16 +210,16 @@ def __init__(self,
self.stat_dict[experiment][plate][well][site] = {}

if not channel in self.stat_dict[experiment][plate][well][site]:
self.stat_dict[experiment][plate][well][channel] = {}
self.stat_dict[experiment][plate][well][site][channel] = {}

self.stat_dict[experiment][plate][well][channel]["mean"] = mean / 255
self.stat_dict[experiment][plate][well][channel]["std"] = std / 255
self.stat_dict[experiment][plate][well][site][channel]["mean"] = mean / 255
self.stat_dict[experiment][plate][well][site][channel]["std"] = std / 255


self.transform = transform
self.mode = mode
self.channels = channels
self.site = site
self.sites = sites

self.experiments = df['experiment'].values
self.plates = df['plate'].values
Expand All @@ -239,26 +241,30 @@ def __getitem__(self, idx):
plate = self.plates[idx]
well = self.wells[idx]

channel_paths = [
image_path(
dataset=self.mode,
experiment=experiment,
plate=plate,
address=well,
channel=channel,
site=self.site,
base_path=self.root,
) for channel in self.channels
]
channel_paths = []

for site in self.sites:
for channel in self.channels:
path = image_path(
dataset=self.mode,
experiment=experiment,
plate=plate,
address=well,
channel=channel,
site=site,
base_path=self.root,
)
channel_paths.append(path)

std_arr = []
mean_arr = []

for channel in self.channels:
mean = self.stat_dict[experiment][plate][well][channel]["mean"]
std = self.stat_dict[experiment][plate][well][channel]["std"]
std_arr.append(std)
mean_arr.append(mean)
for site in self.sites:
for channel in self.channels:
mean = self.stat_dict[experiment][plate][well][site][channel]["mean"]
std = self.stat_dict[experiment][plate][well][site][channel]["std"]
std_arr.append(std)
mean_arr.append(mean)

image = load_images_as_tensor(channel_paths, dtype=np.float32)
# image = convert_tensor_to_rgb(image)
Expand Down
6 changes: 3 additions & 3 deletions src/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def get_datasets(self, stage: str, **kwargs):
image_size = kwargs.get("image_size", 320)
train_csv = kwargs.get('train_csv', None)
valid_csv = kwargs.get('valid_csv', None)
site = kwargs.get('site', 1)
sites = kwargs.get('sites', [1])
channels = kwargs.get('channels', [1, 2, 3, 4, 5, 6])
root = kwargs.get('root', None)

Expand All @@ -45,7 +45,7 @@ def get_datasets(self, stage: str, **kwargs):
root=root,
transform=transform,
mode='train',
site=site,
sites=sites,
channels=channels
)
datasets["train"] = train_set
Expand All @@ -57,7 +57,7 @@ def get_datasets(self, stage: str, **kwargs):
root=root,
transform=transform,
mode='train',
site=site,
sites=sites,
channels=channels
)
datasets["valid"] = valid_set
Expand Down
12 changes: 6 additions & 6 deletions src/make_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ def predict(model, loader):
def predict_all():
test_csv = '/raid/data/kaggle/recursion-cellular-image-classification/test.csv'
# test_csv = './csv/valid_0.csv'
log_dir = "/raid/bac/kaggle/logs/recursion_cell/test/c123_s1_1cycle_adamw_norm_per_channel_smooth/se_resnext50_32x4d/"
log_dir = "/raid/bac/kaggle/logs/recursion_cell/test/c123_s1_1cycle_adamw_norm_per_channel_smooth_reprocedure/se_resnext50_32x4d/"
root = "/raid/data/kaggle/recursion-cellular-image-classification/"
site = 1
sites = [1]
channels = [1,2,3]

model = cell_senet(
model_name="se_resnext50_32x4d",
num_classes=1108,
n_channels=len(channels)
n_channels=len(channels) * len(sites)
)

checkpoint = f"{log_dir}/checkpoints/best.pth"
Expand All @@ -57,7 +57,7 @@ def predict_all():
root=root,
transform=valid_aug(512),
mode='test',
site=site,
sites=sites,
channels=channels
)

Expand All @@ -75,8 +75,8 @@ def predict_all():
submission = df.copy()
submission['sirna'] = all_preds.astype(int)
os.makedirs("submission", exist_ok=True)
submission.to_csv('./submission/se_resnext50_32x4d_c123_s1_1cycle_adamw_norm_per_channel_smooth.csv', index=False, columns=['id_code', 'sirna'])
np.save("./submission/se_resnext50_32x4d_c123_s1_1cycle_adamw_norm_per_channel_smooth.npy", pred)
submission.to_csv('./submission/se_resnext50_c123_s1_1cycle_adamw_norm_per_channel_smooth_reprocedure.csv', index=False, columns=['id_code', 'sirna'])
np.save("./submission/se_resnext50_c123_s1_1cycle_adamw_norm_per_channel_smooth_reprocedure.npy", pred)


if __name__ == '__main__':
Expand Down
18 changes: 9 additions & 9 deletions src/rxrxio.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def convert_tensor_to_rgb(t, channels=DEFAULT_CHANNELS, vmax=255, rgb_map=RGB_MA
See rxrx.io.RGB_MAP to see what the defaults are.
Returns
-------
np.ndarray the image data of the site as RGB channels
np.ndarray the image data of the sites as RGB channels
"""
colored_channels = []
for i, channel in enumerate(channels):
Expand Down Expand Up @@ -111,7 +111,7 @@ def image_path(dataset,
address : str
plate address
site : int
site number
sites number
channel : int
channel number
base_path : str
Expand All @@ -132,7 +132,7 @@ def load_site(dataset,
channels=DEFAULT_CHANNELS,
base_path=DEFAULT_IMAGES_BASE_PATH):
"""
Returns the image data of a site
Returns the image data of a sites
Parameters
----------
dataset : str
Expand All @@ -144,14 +144,14 @@ def load_site(dataset,
address : str
plate address
site : int
site number
sites number
channels : list of int
channels to include
base_path : str
the base path of the raw images
Returns
-------
np.ndarray the image data of the site
np.ndarray the image data of the sites
"""
channel_paths = [
image_path(
Expand Down Expand Up @@ -182,7 +182,7 @@ def load_site_as_rgb(dataset,
address : str
plate address
site : int
site number
sites number
channels : list of int
channels to include
base_path : str
Expand All @@ -192,7 +192,7 @@ def load_site_as_rgb(dataset,
See rxrx.io.RGB_MAP to see what the defaults are.
Returns
-------
np.ndarray the image data of the site as RGB channels
np.ndarray the image data of the sites as RGB channels
"""
x = load_site(dataset, experiment, plate, well, site, channels, base_path)
return convert_tensor_to_rgb(x, channels, rgb_map=rgb_map)
Expand All @@ -215,10 +215,10 @@ def _load_dataset(base_path, dataset, include_controls=True):
dfs = []
for site in (1, 2):
df = df.copy()
df['site'] = site
df['sites'] = site
dfs.append(df)
res = pd.concat(dfs).sort_values(
by=['id_code', 'site']).set_index('id_code')
by=['id_code', 'sites']).set_index('id_code')
return res


Expand Down

0 comments on commit 8f59f20

Please sign in to comment.