Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Allow to restart with optimizers on CUDA (#1144)
Browse files Browse the repository at this point in the history
* Allow to restart with optimizers on CUDA

* pylint
  • Loading branch information
matt-peters authored Apr 26, 2018
1 parent c684216 commit 6d15d17
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions allennlp/training/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,21 @@ def sparse_clip_norm(parameters, max_norm, norm_type=2) -> float:
return total_norm


def move_optimizer_to_cuda(optimizer):
"""
Move the optimizer state to GPU, if necessary.
After calling, any parameter specific state in the optimizer
will be located on the same device as the parameter.
"""
for param_group in optimizer.param_groups:
for param in param_group['params']:
if param.is_cuda:
param_state = optimizer.state[param]
for k in param_state.keys():
if torch.is_tensor(param_state[k]):
param_state[k] = param_state[k].cuda(device=param.get_device())


class TensorboardWriter:
"""
Wraps a pair of ``SummaryWriter`` instances but is a no-op if they're ``None``.
Expand Down Expand Up @@ -856,6 +871,7 @@ def _restore_checkpoint(self) -> Tuple[int, List[float]]:
training_state = torch.load(training_state_path, map_location=util.device_mapping(-1))
self._model.load_state_dict(model_state)
self._optimizer.load_state_dict(training_state["optimizer"])
move_optimizer_to_cuda(self._optimizer)

# We didn't used to save `validation_metric_per_epoch`, so we can't assume
# that it's part of the trainer state. If it's not there, an empty list is all
Expand Down

0 comments on commit 6d15d17

Please sign in to comment.