Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deepsupervision #1

Merged
merged 7 commits into from
Aug 1, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
pseudo label
  • Loading branch information
ngxbac committed Jul 30, 2019
commit ec4b3985a0cdf64398dac4d9d6da7cc089f5c022
19 changes: 19 additions & 0 deletions bin/train_pseudo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash

export CUDA_VISIBLE_DEVICES=2,3
RUN_CONFIG=config_pseudo.yml


for channels in [1,2,3,4,5]; do
for fold in 0; do
LOGDIR=/raid/bac/kaggle/logs/recursion_cell/test/190730/private_pseudo/fold_$fold/se_resnext50_32x4d/
catalyst-dl run \
--config=./configs/${RUN_CONFIG} \
--logdir=$LOGDIR \
--out_dir=$LOGDIR:str \
--stages/data_params/channels=$channels:list \
--stages/data_params/train_csv=./csv/train_$fold.csv:str \
--stages/data_params/valid_csv=./csv/valid_$fold.csv:str \
--verbose
done
done
86 changes: 86 additions & 0 deletions configs/config_pseudo.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
model_params:
model: cell_senet
model_name: se_resnext50_32x4d
n_channels: 5
num_classes: 1108

args:
expdir: "src"
logdir: &logdir "./logs/cell"
baselogdir: "./logs/cell"

distributed_params:
opt_level: O1

stages:

state_params:
main_metric: &reduce_metric accuracy01
minimize_metric: False

criterion_params:
# criterion: CrossEntropyLoss
criterion: LabelSmoothingCrossEntropy

data_params:
batch_size: 64
num_workers: 16
drop_last: False
# drop_last: True

image_size: &image_size 512
train_csv: "./csv/train_0.csv"
valid_csv: "./csv/valid_0.csv"
pseudo_csv: "./csv/pseudo.csv"
root: "/raid/data/kaggle/recursion-cellular-image-classification/"
sites: [1]
channels: [1, 2, 3, 4, 5]

stage0:

optimizer_params:
optimizer: Nadam
lr: 0.001

scheduler_params:
scheduler: MultiStepLR
milestones: [10]
gamma: 0.3

state_params:
num_epochs: 2

callbacks_params: &callback_params
loss:
# callback: CriterionCallback
callback: LabelSmoothCriterionCallback
optimizer:
callback: OptimizerCallback
accumulation_steps: 2
accuracy:
callback: AccuracyCallback
accuracy_args: [1]
scheduler:
callback: SchedulerCallback
reduce_metric: *reduce_metric
saver:
callback: CheckpointCallback

stage1:

optimizer_params:
optimizer: Nadam
lr: 0.0001

scheduler_params:
scheduler: OneCycleLR
num_steps: 50
lr_range: [0.0005, 0.00001]
# lr_range: [0.0015, 0.00003]
warmup_steps: 5
momentum_range: [0.85, 0.95]

state_params:
num_epochs: 50

callbacks_params: *callback_params
17 changes: 8 additions & 9 deletions src/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ def __init__(self,
print("sites ", sites)
print(csv_file)
df = pd.read_csv(csv_file, nrows=None)
if not 'sirna' in df.columns:
df['sirna'] = 0
# if "train" in csv_file:
# md = combine_metadata(base_path=root)
# md = md[(md.dataset == mode) & (md.site == 1)]
Expand Down Expand Up @@ -272,10 +274,11 @@ def __init__(self,
self.plates = df['plate'].values
self.wells = df['well'].values

if mode != 'test':
self.labels = df['sirna'].values
else:
self.labels = [0] * len(self.experiments)
self.labels = df['sirna'].values
# if mode != 'test':
# self.labels = df['sirna'].values
# else:
# self.labels = [0] * len(self.experiments)

self.root = root

Expand Down Expand Up @@ -331,11 +334,7 @@ def __getitem__(self, idx):

image = normalize(image, std=std_arr, mean=mean_arr, max_pixel_value=255)
image = np.transpose(image, (2, 0, 1)).astype(np.float32)

if self.mode == 'train':
label = self.labels[idx]
else:
label = -1
label = self.labels[idx]

return {
"images": image,
Expand Down
16 changes: 16 additions & 0 deletions src/experiment.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from collections import OrderedDict
import torch
import torch.nn as nn
from torch.utils.data import ConcatDataset
import random
from catalyst.dl.experiment import ConfigExperiment
from dataset import *
Expand Down Expand Up @@ -52,6 +53,7 @@ def get_datasets(self, stage: str, **kwargs):
image_size = kwargs.get("image_size", 320)
train_csv = kwargs.get('train_csv', None)
valid_csv = kwargs.get('valid_csv', None)
pseudo_csv = kwargs.get('pseudo_csv', None)
sites = kwargs.get('sites', [1])
channels = kwargs.get('channels', [1, 2, 3, 4, 5, 6])
root = kwargs.get('root', None)
Expand All @@ -66,6 +68,20 @@ def get_datasets(self, stage: str, **kwargs):
sites=sites,
channels=channels
)

if pseudo_csv:
pseudo_dataset = RecursionCellularSite(
csv_file=pseudo_csv,
root=root,
transform=transform,
mode='test',
sites=sites,
channels=channels
)

train_set = ConcatDataset([
train_set, pseudo_dataset
])
datasets["train"] = train_set

if valid_csv:
Expand Down
4 changes: 2 additions & 2 deletions src/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def predict_all():
def predict_deepsupervision():
test_csv = '/raid/data/kaggle/recursion-cellular-image-classification/test.csv'
# test_csv = './csv/valid_0.csv'
model_name = 'DSInceptionV3'
model_name = 'DSSENet'

for channel_str in [
"[1,2,3,4,5]",
Expand All @@ -134,7 +134,7 @@ def predict_deepsupervision():
# log_dir = log_dir.replace(']', '[]]')

ckp = os.path.join(log_dir, "checkpoints/stage1.50.pth")
model = DSInceptionV3(
model = DSSENet(
num_classes=1108,
n_channels=len(channels) * len(sites)
)
Expand Down
22 changes: 11 additions & 11 deletions src/make_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def predict(model, loader):
for dct in tqdm(loader, total=len(loader)):
images = dct['images'].to(device)
pred = model(images)
pred = Ftorch.softmax(pred)
# pred = Ftorch.softmax(pred)
pred = pred.detach().cpu().numpy()
preds.append(pred)

Expand Down Expand Up @@ -90,20 +90,20 @@ def predict_all():


def predict_one():
test_csv = './data/test.csv'
test_csv = '/raid/data/kaggle/recursion-cellular-image-classification/test.csv'
# test_csv = './csv/valid_0.csv'

model_name = 'se_resnext50_32x4d'
experiment = "dropout_channel"
experiment = "private_pseudo"

log_dir = f"./bin/log/"
root = "./data/"
log_dir = f"/raid/bac/kaggle/logs/recursion_cell/test/190730/{experiment}/fold_0/{model_name}/"
root = "/raid/data/kaggle/recursion-cellular-image-classification/"
sites = [1]
channels = [1,2,3,4]
channels = [1,2,3,4,5]

preds = []
model = cell_senet(
model_name="se_resnext50_32x4d",
model_name=model_name,
num_classes=1108,
n_channels=len(channels) * len(sites)
)
Expand Down Expand Up @@ -136,12 +136,12 @@ def predict_one():
preds.append(pred)

preds = np.asarray(preds).mean(axis=0)
all_preds = np.argmax(preds, axis=1)
# all_preds = np.argmax(preds, axis=1)
df = pd.read_csv(test_csv)
submission = df.copy()
submission['sirna'] = all_preds.astype(int)
# submission = df.copy()
# submission['sirna'] = all_preds.astype(int)
os.makedirs("submission/", exist_ok=True)
submission.to_csv(f'./submission/{model_name}_{experiment}.csv', index=False, columns=['id_code', 'sirna'])
# submission.to_csv(f'./submission/{model_name}_{experiment}.csv', index=False, columns=['id_code', 'sirna'])
np.save(f"./submission/{model_name}_{experiment}.npy", preds)


Expand Down