Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved model+EMA checkpointing fix #2295

Merged
merged 1 commit into from
Feb 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Improved model+EMA checkpointing 2
  • Loading branch information
glenn-jocher committed Feb 25, 2021
commit 3af1d03536e100751cd56745f8f597e63bb4b342
1 change: 1 addition & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ def test(data,
print(f'pycocotools unable to run: {e}')

# Return results
model.float() # for training
if not training:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
print(f"Results saved to {save_dir}{s}")
Expand Down
7 changes: 3 additions & 4 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import random
import time
from copy import deepcopy
from pathlib import Path
from threading import Thread

Expand Down Expand Up @@ -381,8 +382,8 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
ckpt = {'epoch': epoch,
'best_fitness': best_fitness,
'training_results': results_file.read_text(),
'model': (model.module if is_parallel(model) else model).half(),
'ema': (ema.ema.half(), ema.updates),
'model': deepcopy(model.module if is_parallel(model) else model).half(),
'ema': (deepcopy(ema.ema).half(), ema.updates),
'optimizer': optimizer.state_dict(),
'wandb_id': wandb_run.id if wandb else None}

Expand All @@ -392,8 +393,6 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
torch.save(ckpt, best)
del ckpt

model.float(), ema.ema.float()

# end epoch ----------------------------------------------------------------------------------------------------
# end training

Expand Down