Skip to content

Commit

Permalink
Time to Say Goodbye, torch 1.7 and 1.8 (huggingface#22291)
Browse files Browse the repository at this point in the history
* time to say goodbye, torch 1.7 and 1.8

* clean up torch_int_div

* clean up is_torch_less_than_1_8-9

* update

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
  • Loading branch information
ydshieh and ydshieh authored Mar 21, 2023
1 parent 86c7931 commit 67c2dbd
Show file tree
Hide file tree
Showing 42 changed files with 61 additions and 149 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@
"timeout-decorator",
"timm",
"tokenizers>=0.11.1,!=0.11.3,<0.14",
"torch>=1.7,!=1.12.0",
"torch>=1.9,!=1.12.0",
"torchaudio",
"torchvision",
"pyctcdecode>=0.4.0",
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/dependency_versions_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
"timeout-decorator": "timeout-decorator",
"timm": "timm",
"tokenizers": "tokenizers>=0.11.1,!=0.11.3,<0.14",
"torch": "torch>=1.7,!=1.12.0",
"torch": "torch>=1.9,!=1.12.0",
"torchaudio": "torchaudio",
"torchvision": "torchvision",
"pyctcdecode": "pyctcdecode>=0.4.0",
Expand Down
1 change: 0 additions & 1 deletion src/transformers/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@
is_torch_cuda_available,
is_torch_fx_available,
is_torch_fx_proxy,
is_torch_onnx_dict_inputs_support_available,
is_torch_tf32_available,
is_torch_tpu_available,
is_torchaudio_available,
Expand Down
11 changes: 6 additions & 5 deletions src/transformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
MODEL_FOR_VISION_2_SEQ_MAPPING,
)
from ..pytorch_utils import torch_int_div
from ..utils import ModelOutput, logging
from .beam_constraints import DisjunctiveConstraint, PhrasalConstraint
from .beam_search import BeamScorer, BeamSearchScorer, ConstrainedBeamSearchScorer
Expand Down Expand Up @@ -2795,7 +2794,7 @@ def beam_search(
next_token_scores, 2 * num_beams, dim=1, largest=True, sorted=True
)

next_indices = torch_int_div(next_tokens, vocab_size)
next_indices = torch.div(next_tokens, vocab_size, rounding_mode="floor")
next_tokens = next_tokens % vocab_size

# stateless
Expand Down Expand Up @@ -3129,7 +3128,7 @@ def beam_sample(
next_token_scores, _indices = torch.sort(next_token_scores, descending=True, dim=1)
next_tokens = torch.gather(next_tokens, -1, _indices)

next_indices = torch_int_div(next_tokens, vocab_size)
next_indices = torch.div(next_tokens, vocab_size, rounding_mode="floor")
next_tokens = next_tokens % vocab_size

# stateless
Expand Down Expand Up @@ -3473,7 +3472,7 @@ def group_beam_search(
next_token_scores, 2 * group_size, dim=1, largest=True, sorted=True
)

next_indices = torch_int_div(next_tokens, vocab_size)
next_indices = torch.div(next_tokens, vocab_size, rounding_mode="floor")
next_tokens = next_tokens % vocab_size

# stateless
Expand Down Expand Up @@ -3503,7 +3502,9 @@ def group_beam_search(
# (beam_idx // group_size) -> batch_idx
# (beam_idx % group_size) -> offset of idx inside the group
reordering_indices[batch_group_indices] = (
num_beams * torch_int_div(beam_idx, group_size) + group_start_idx + (beam_idx % group_size)
num_beams * torch.div(beam_idx, group_size, rounding_mode="floor")
+ group_start_idx
+ (beam_idx % group_size)
)

# Store scores, attentions and hidden_states when required
Expand Down
5 changes: 0 additions & 5 deletions src/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,9 +539,6 @@ def _move_model_to_meta(model, loaded_state_dict_keys, start_prefix):
"""

# meta device was added in pt=1.9
require_version_core("torch>=1.9")

# dematerialize param storage for keys that are going to be replaced by state_dict, by
# putting those on the meta device
for k in loaded_state_dict_keys:
Expand Down Expand Up @@ -2100,8 +2097,6 @@ def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.P
raise ValueError("Passing along a `device_map` requires `low_cpu_mem_usage=True`")

if low_cpu_mem_usage:
# low_cpu_mem_usage requires PyTorch >= 1.9 to have the meta device.
require_version_core("torch>=1.9")
if device_map is not None:
# The max memory utils require PyTorch >= 1.10 to have torch.cuda.mem_get_info.
require_version_core("torch>=1.10")
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/big_bird/modeling_big_bird.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import apply_chunking_to_forward, torch_int_div
from ...pytorch_utils import apply_chunking_to_forward
from ...utils import (
ModelOutput,
add_code_sample_docstrings,
Expand Down Expand Up @@ -972,7 +972,7 @@ def torch_gather_b2(params, indices):
num_indices_to_pick_from = params.shape[2]

shift = torch.arange(indices.shape[0] * indices.shape[1] * num_indices_to_gather, device=indices.device)
indices_shift = torch_int_div(shift, num_indices_to_gather) * num_indices_to_pick_from
indices_shift = torch.div(shift, num_indices_to_gather, rounding_mode="floor") * num_indices_to_pick_from

flattened_indices = indices.view(-1) + indices_shift
flattened_params = params.reshape(-1, params.shape[-2], params.shape[-1])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
Seq2SeqSequenceClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_int_div
from ...utils import (
add_code_sample_docstrings,
add_end_docstrings,
Expand Down Expand Up @@ -791,7 +790,7 @@ def torch_gather_b2(params, indices):
num_indices_to_pick_from = params.shape[2]

shift = torch.arange(indices.shape[0] * indices.shape[1] * num_indices_to_gather, device=indices.device)
indices_shift = torch_int_div(shift, num_indices_to_gather) * num_indices_to_pick_from
indices_shift = torch.div(shift, num_indices_to_gather, rounding_mode="floor") * num_indices_to_pick_from

flattened_indices = indices.view(-1) + indices_shift
flattened_params = params.reshape(-1, params.shape[-2], params.shape[-1])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@
import torch
from torch import nn

from transformers.pytorch_utils import torch_int_div


if is_vision_available():
import PIL
Expand Down Expand Up @@ -1314,7 +1312,7 @@ def post_process(self, outputs, target_sizes):
prob = out_logits.sigmoid()
topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), 300, dim=1)
scores = topk_values
topk_boxes = torch_int_div(topk_indexes, out_logits.shape[2])
topk_boxes = torch.div(topk_indexes, out_logits.shape[2], rounding_mode="floor")
labels = topk_indexes % out_logits.shape[2]
boxes = center_to_corners_format(out_bbox)
boxes = torch.gather(boxes, 1, topk_boxes.unsqueeze(-1).repeat(1, 1, 4))
Expand Down Expand Up @@ -1360,7 +1358,7 @@ def post_process_object_detection(
prob = out_logits.sigmoid()
topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), 100, dim=1)
scores = topk_values
topk_boxes = torch_int_div(topk_indexes, out_logits.shape[2])
topk_boxes = torch.div(topk_indexes, out_logits.shape[2], rounding_mode="floor")
labels = topk_indexes % out_logits.shape[2]
boxes = center_to_corners_format(out_bbox)
boxes = torch.gather(boxes, 1, topk_boxes.unsqueeze(-1).repeat(1, 1, 4))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from ...activations import ACT2FN
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithCrossAttentions, Seq2SeqModelOutput
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_int_div
from ...utils import (
ModelOutput,
add_start_docstrings,
Expand Down Expand Up @@ -452,7 +451,7 @@ def forward(self, pixel_values, pixel_mask):
x_embed = x_embed / (x_embed[:, :, -1:] + 1e-6) * self.scale

dim_t = torch.arange(self.embedding_dim, dtype=torch.float32, device=pixel_values.device)
dim_t = self.temperature ** (2 * torch_int_div(dim_t, 2) / self.embedding_dim)
dim_t = self.temperature ** (2 * torch.div(dim_t, 2, rounding_mode="floor") / self.embedding_dim)

pos_x = x_embed[:, :, :, None] / dim_t
pos_y = y_embed[:, :, :, None] / dim_t
Expand Down Expand Up @@ -504,7 +503,7 @@ def build_position_encoding(config):
def gen_sine_position_embeddings(pos_tensor):
scale = 2 * math.pi
dim_t = torch.arange(128, dtype=torch.float32, device=pos_tensor.device)
dim_t = 10000 ** (2 * torch_int_div(dim_t, 2) / 128)
dim_t = 10000 ** (2 * torch.div(dim_t, 2, rounding_mode="floor") / 128)
x_embed = pos_tensor[:, :, 0] * scale
y_embed = pos_tensor[:, :, 1] * scale
pos_x = x_embed[:, :, None] / dim_t
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/data2vec/modeling_data2vec_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
XVectorOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_int_div
from ...utils import add_code_sample_docstrings, add_start_docstrings, add_start_docstrings_to_model_forward, logging
from .configuration_data2vec_audio import Data2VecAudioConfig

Expand Down Expand Up @@ -731,7 +730,7 @@ def _get_feat_extract_output_lengths(
def _conv_out_length(input_length, kernel_size, stride):
# 1D convolutional layer output length formula taken
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
return torch_int_div(input_length - kernel_size, stride) + 1
return torch.div(input_length - kernel_size, stride, rounding_mode="floor") + 1

for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@
import torch
from torch import nn

from ...pytorch_utils import torch_int_div


if is_vision_available():
import PIL
Expand Down Expand Up @@ -1312,7 +1310,7 @@ def post_process(self, outputs, target_sizes):
prob = out_logits.sigmoid()
topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), 100, dim=1)
scores = topk_values
topk_boxes = torch_int_div(topk_indexes, out_logits.shape[2])
topk_boxes = torch.div(topk_indexes, out_logits.shape[2], rounding_mode="floor")
labels = topk_indexes % out_logits.shape[2]
boxes = center_to_corners_format(out_bbox)
boxes = torch.gather(boxes, 1, topk_boxes.unsqueeze(-1).repeat(1, 1, 4))
Expand Down Expand Up @@ -1357,7 +1355,7 @@ def post_process_object_detection(
prob = out_logits.sigmoid()
topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), 100, dim=1)
scores = topk_values
topk_boxes = torch_int_div(topk_indexes, out_logits.shape[2])
topk_boxes = torch.div(topk_indexes, out_logits.shape[2], rounding_mode="floor")
labels = topk_indexes % out_logits.shape[2]
boxes = center_to_corners_format(out_bbox)
boxes = torch.gather(boxes, 1, topk_boxes.unsqueeze(-1).repeat(1, 1, 4))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
)
from ...modeling_outputs import BaseModelOutput
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import meshgrid, torch_int_div
from ...pytorch_utils import meshgrid
from ...utils import is_ninja_available, logging
from ..auto import AutoBackbone
from .configuration_deformable_detr import DeformableDetrConfig
Expand Down Expand Up @@ -497,7 +497,7 @@ def forward(self, pixel_values, pixel_mask):
x_embed = (x_embed - 0.5) / (x_embed[:, :, -1:] + eps) * self.scale

dim_t = torch.arange(self.embedding_dim, dtype=torch.float32, device=pixel_values.device)
dim_t = self.temperature ** (2 * torch_int_div(dim_t, 2) / self.embedding_dim)
dim_t = self.temperature ** (2 * torch.div(dim_t, 2, rounding_mode="floor") / self.embedding_dim)

pos_x = x_embed[:, :, :, None] / dim_t
pos_y = y_embed[:, :, :, None] / dim_t
Expand Down Expand Up @@ -1552,7 +1552,7 @@ def get_proposal_pos_embed(self, proposals):
scale = 2 * math.pi

dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=proposals.device)
dim_t = temperature ** (2 * torch_int_div(dim_t, 2) / num_pos_feats)
dim_t = temperature ** (2 * torch.div(dim_t, 2, rounding_mode="floor") / num_pos_feats)
# batch_size, num_queries, 4
proposals = proposals.sigmoid() * scale
# batch_size, num_queries, 4, 128
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/deta/image_processing_deta.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@
if is_torch_available():
import torch

from ...pytorch_utils import torch_int_div

if is_torchvision_available():
from torchvision.ops.boxes import batched_nms
Expand Down Expand Up @@ -967,7 +966,7 @@ def post_process_object_detection(

all_scores = prob.view(batch_size, num_queries * num_labels).to(out_logits.device)
all_indexes = torch.arange(num_queries * num_labels)[None].repeat(batch_size, 1).to(out_logits.device)
all_boxes = torch_int_div(all_indexes, out_logits.shape[2])
all_boxes = torch.div(all_indexes, out_logits.shape[2], rounding_mode="floor")
all_labels = all_indexes % out_logits.shape[2]

boxes = center_to_corners_format(out_bbox)
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/deta/modeling_deta.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
)
from ...modeling_outputs import BaseModelOutput
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import meshgrid, torch_int_div
from ...pytorch_utils import meshgrid
from ...utils import is_torchvision_available, logging, requires_backends
from ..auto import AutoBackbone
from .configuration_deta import DetaConfig
Expand Down Expand Up @@ -399,7 +399,7 @@ def forward(self, pixel_values, pixel_mask):
x_embed = (x_embed - 0.5) / (x_embed[:, :, -1:] + eps) * self.scale

dim_t = torch.arange(self.embedding_dim, dtype=torch.float32, device=pixel_values.device)
dim_t = self.temperature ** (2 * torch_int_div(dim_t, 2) / self.embedding_dim)
dim_t = self.temperature ** (2 * torch.div(dim_t, 2, rounding_mode="floor") / self.embedding_dim)

pos_x = x_embed[:, :, :, None] / dim_t
pos_y = y_embed[:, :, :, None] / dim_t
Expand Down Expand Up @@ -1463,7 +1463,7 @@ def get_proposal_pos_embed(self, proposals):
scale = 2 * math.pi

dim_t = torch.arange(num_pos_feats, dtype=torch.float32, device=proposals.device)
dim_t = temperature ** (2 * torch_int_div(dim_t, 2) / num_pos_feats)
dim_t = temperature ** (2 * torch.div(dim_t, 2, rounding_mode="floor") / num_pos_feats)
# batch_size, num_queries, 4
proposals = proposals.sigmoid() * scale
# batch_size, num_queries, 4, 128
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/detr/modeling_detr.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
from ...activations import ACT2FN
from ...modeling_outputs import BaseModelOutput, BaseModelOutputWithCrossAttentions, Seq2SeqModelOutput
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_int_div
from ...utils import (
ModelOutput,
add_start_docstrings,
Expand Down Expand Up @@ -442,7 +441,7 @@ def forward(self, pixel_values, pixel_mask):
x_embed = x_embed / (x_embed[:, :, -1:] + 1e-6) * self.scale

dim_t = torch.arange(self.embedding_dim, dtype=torch.float32, device=pixel_values.device)
dim_t = self.temperature ** (2 * torch_int_div(dim_t, 2) / self.embedding_dim)
dim_t = self.temperature ** (2 * torch.div(dim_t, 2, rounding_mode="floor") / self.embedding_dim)

pos_x = x_embed[:, :, :, None] / dim_t
pos_y = y_embed[:, :, :, None] / dim_t
Expand Down
3 changes: 1 addition & 2 deletions src/transformers/models/hubert/modeling_hubert.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from ...deepspeed import is_deepspeed_zero3_enabled
from ...modeling_outputs import BaseModelOutput, CausalLMOutput, SequenceClassifierOutput
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import torch_int_div
from ...utils import (
add_code_sample_docstrings,
add_start_docstrings,
Expand Down Expand Up @@ -871,7 +870,7 @@ def _get_feat_extract_output_lengths(self, input_lengths: Union[torch.LongTensor
def _conv_out_length(input_length, kernel_size, stride):
# 1D convolutional layer output length formula taken
# from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
return torch_int_div(input_length - kernel_size, stride) + 1
return torch.div(input_length - kernel_size, stride, rounding_mode="floor") + 1

for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):
input_lengths = _conv_out_length(input_lengths, kernel_size, stride)
Expand Down
8 changes: 5 additions & 3 deletions src/transformers/models/layoutlmv2/modeling_layoutlmv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
TokenClassifierOutput,
)
from ...modeling_utils import PreTrainedModel
from ...pytorch_utils import apply_chunking_to_forward, torch_int_div
from ...pytorch_utils import apply_chunking_to_forward
from ...utils import (
add_start_docstrings,
add_start_docstrings_to_model_forward,
Expand Down Expand Up @@ -770,7 +770,7 @@ def _calc_img_embeddings(self, image, bbox, position_ids):
return embeddings

def _calc_visual_bbox(self, image_feature_pool_shape, bbox, device, final_shape):
visual_bbox_x = torch_int_div(
visual_bbox_x = torch.div(
torch.arange(
0,
1000 * (image_feature_pool_shape[1] + 1),
Expand All @@ -779,8 +779,9 @@ def _calc_visual_bbox(self, image_feature_pool_shape, bbox, device, final_shape)
dtype=bbox.dtype,
),
self.config.image_feature_pool_shape[1],
rounding_mode="floor",
)
visual_bbox_y = torch_int_div(
visual_bbox_y = torch.div(
torch.arange(
0,
1000 * (self.config.image_feature_pool_shape[0] + 1),
Expand All @@ -789,6 +790,7 @@ def _calc_visual_bbox(self, image_feature_pool_shape, bbox, device, final_shape)
dtype=bbox.dtype,
),
self.config.image_feature_pool_shape[0],
rounding_mode="floor",
)
visual_bbox = torch.stack(
[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@
import torch
from torch import nn

from ...pytorch_utils import torch_int_div


# Copied from transformers.models.detr.image_processing_detr.max_across_indices
def max_across_indices(values: Iterable[Any]) -> List[Any]:
Expand Down Expand Up @@ -1009,7 +1007,7 @@ def post_process_instance_segmentation(
scores_per_image, topk_indices = scores.flatten(0, 1).topk(num_queries, sorted=False)
labels_per_image = labels[topk_indices]

topk_indices = torch_int_div(topk_indices, num_classes)
topk_indices = torch.div(topk_indices, num_classes, rounding_mode="floor")
mask_pred = mask_pred[topk_indices]
pred_masks = (mask_pred > 0).float()

Expand Down
Loading

0 comments on commit 67c2dbd

Please sign in to comment.