Skip to content

Commit

Permalink
fix version to 4.46
Browse files Browse the repository at this point in the history
  • Loading branch information
zucchini-nlp committed Aug 30, 2024
1 parent 3b4b44e commit b3c91c0
Show file tree
Hide file tree
Showing 12 changed files with 116 additions and 67 deletions.
12 changes: 5 additions & 7 deletions src/transformers/models/bloom/modeling_bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,17 +900,15 @@ def prepare_inputs_for_generation(
# the batch size = 1 case, `position_ids` is already contiguous but with varying stride which retriggers a capture.
model_inputs = {"input_ids": input_ids.clone(memory_format=torch.contiguous_format), "inputs_embeds": None}

# This part differs from other models because BLOOM needs a 2D mask to construct alibi tensor
# The only difference is the usage of 2D instead of 4D mask, but the shape will be static
if isinstance(past_key_values, StaticCache) and attention_mask is not None:
target_length = past_key_values.get_max_length()
batch_size, seq_length = attention_mask.shape
diff = target_length - seq_length
attention_mask = torch.cat(
[
attention_mask,
torch.zeros(batch_size, diff, device=attention_mask.device, dtype=attention_mask.dtype),
],
dim=-1,
)

new_attn_mask = torch.zeros(batch_size, diff, device=attention_mask.device, dtype=attention_mask.dtype)
attention_mask = torch.cat([attention_mask, new_attn_mask,], dim=-1,)

model_inputs.update(
{
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/falcon/modeling_falcon.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ class FalconLinearScalingRotaryEmbedding(FalconRotaryEmbedding):

def __init__(self, *args, **kwargs):
logger.warning_once(
"`FalconLinearScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
"`FalconLinearScalingRotaryEmbedding` is deprecated an will be removed in v4.46. Please use "
"`FalconRotaryEmbedding`, which now also does linear scaling (simply pass the model config to __init__)."
)
kwargs["rope_type"] = "linear"
Expand All @@ -269,7 +269,7 @@ class FalconDynamicNTKScalingRotaryEmbedding(FalconRotaryEmbedding):

def __init__(self, *args, **kwargs):
logger.warning_once(
"`FalconDynamicNTKScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
"`FalconDynamicNTKScalingRotaryEmbedding` is deprecated an will be removed in v4.46. Please use "
"`FalconRotaryEmbedding`, which now also does dynamic ntk scaling (simply pass the model config to "
"__init__)."
)
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/gpt_neox/modeling_gpt_neox.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ def __init__(
if config is None:
logger.warning_once(
"`GPTNeoXRotaryEmbedding` can now be fully parameterized by passing the model config through the "
"`config` argument. All other arguments will be removed in v4.45"
"`config` argument. All other arguments will be removed in v4.46"
)
self.rope_kwargs = {
"rope_type": rope_type,
Expand Down Expand Up @@ -654,7 +654,7 @@ class GPTNeoXLinearScalingRotaryEmbedding(GPTNeoXRotaryEmbedding):

def __init__(self, *args, **kwargs):
logger.warning_once(
"`GPTNeoXLinearScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
"`GPTNeoXLinearScalingRotaryEmbedding` is deprecated an will be removed in v4.46. Please use "
"`GPTNeoXRotaryEmbedding`, which now also does linear scaling (simply pass the model config to __init__)."
)
kwargs["rope_type"] = "linear"
Expand All @@ -667,7 +667,7 @@ class GPTNeoXDynamicNTKScalingRotaryEmbedding(GPTNeoXRotaryEmbedding):

def __init__(self, *args, **kwargs):
logger.warning_once(
"`GPTNeoXDynamicNTKScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
"`GPTNeoXDynamicNTKScalingRotaryEmbedding` is deprecated an will be removed in v4.46. Please use "
"`GPTNeoXRotaryEmbedding`, which now also does dynamic ntk scaling (simply pass the model config to "
"__init__)."
)
Expand Down
22 changes: 11 additions & 11 deletions src/transformers/models/llama/modeling_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def __init__(
if config is None:
logger.warning_once(
"`LlamaRotaryEmbedding` can now be fully parameterized by passing the model config through the "
"`config` argument. All other arguments will be removed in v4.45"
"`config` argument. All other arguments will be removed in v4.46"
)
self.rope_kwargs = {
"rope_type": rope_type,
Expand Down Expand Up @@ -224,7 +224,7 @@ class LlamaLinearScalingRotaryEmbedding(LlamaRotaryEmbedding):

def __init__(self, *args, **kwargs):
logger.warning_once(
"`LlamaLinearScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
"`LlamaLinearScalingRotaryEmbedding` is deprecated an will be removed in v4.46. Please use "
"`LlamaRotaryEmbedding`, which now also does linear scaling (simply pass the model config to __init__)."
)
kwargs["rope_type"] = "linear"
Expand All @@ -236,7 +236,7 @@ class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding):

def __init__(self, *args, **kwargs):
logger.warning_once(
"`LlamaDynamicNTKScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
"`LlamaDynamicNTKScalingRotaryEmbedding` is deprecated an will be removed in v4.46. Please use "
"`LlamaRotaryEmbedding`, which now also does dynamic ntk scaling (simply pass the model config to "
"__init__)."
)
Expand Down Expand Up @@ -353,7 +353,7 @@ def __init__(self, config: LlamaConfig, layer_idx: Optional[int] = None):
self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias)
self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=config.attention_bias)

# TODO (joao): remove in v4.45 (RoPE is computed in the model, not in the decoder layers)
# TODO (joao): remove in v4.46 (RoPE is computed in the model, not in the decoder layers)
self.rotary_emb = LlamaRotaryEmbedding(config=self.config)

def forward(
Expand All @@ -365,7 +365,7 @@ def forward(
output_attentions: bool = False,
use_cache: bool = False,
cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.45
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.46
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
bsz, q_len, _ = hidden_states.size()
Expand Down Expand Up @@ -400,7 +400,7 @@ def forward(
logger.warning_once(
"The attention layers in this model are transitioning from computing the RoPE embeddings internally "
"through `position_ids` (2D tensor with the indexes of the tokens), to using externally computed "
"`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.45 `position_ids` will be "
"`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.46 `position_ids` will be "
"removed and `position_embeddings` will be mandatory."
)
cos, sin = self.rotary_emb(value_states, position_ids)
Expand Down Expand Up @@ -473,7 +473,7 @@ def forward(
output_attentions: bool = False,
use_cache: bool = False,
cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.45
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.46
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
if isinstance(past_key_value, StaticCache):
raise ValueError(
Expand All @@ -500,7 +500,7 @@ def forward(
logger.warning_once(
"The attention layers in this model are transitioning from computing the RoPE embeddings internally "
"through `position_ids` (2D tensor with the indexes of the tokens), to using externally computed "
"`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.45 `position_ids` will be "
"`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.46 `position_ids` will be "
"removed and `position_embeddings` will be mandatory."
)
cos, sin = self.rotary_emb(value_states, position_ids)
Expand Down Expand Up @@ -586,7 +586,7 @@ def forward(
output_attentions: bool = False,
use_cache: bool = False,
cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.45
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.46
**kwargs,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
if output_attentions:
Expand Down Expand Up @@ -620,7 +620,7 @@ def forward(
logger.warning_once(
"The attention layers in this model are transitioning from computing the RoPE embeddings internally "
"through `position_ids` (2D tensor with the indexes of the tokens), to using externally computed "
"`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.45 `position_ids` will be "
"`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.46 `position_ids` will be "
"removed and `position_embeddings` will be mandatory."
)
cos, sin = self.rotary_emb(value_states, position_ids)
Expand Down Expand Up @@ -695,7 +695,7 @@ def forward(
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.45
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.46
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
"""
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/nemotron/modeling_nemotron.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ def forward(
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
cache_position: Optional[torch.LongTensor] = None,
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.45
position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.46
**kwargs,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
"""
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/persimmon/modeling_persimmon.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def __init__(
if config is None:
logger.warning_once(
"`PersimmonRotaryEmbedding` can now be fully parameterized by passing the model config through the "
"`config` argument. All other arguments will be removed in v4.45"
"`config` argument. All other arguments will be removed in v4.46"
)
self.rope_kwargs = {
"rope_type": rope_type,
Expand Down Expand Up @@ -195,7 +195,7 @@ class PersimmonLinearScalingRotaryEmbedding(PersimmonRotaryEmbedding):

def __init__(self, *args, **kwargs):
logger.warning_once(
"`PersimmonLinearScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
"`PersimmonLinearScalingRotaryEmbedding` is deprecated an will be removed in v4.46. Please use "
"`PersimmonRotaryEmbedding`, which now also does linear scaling (simply pass the model config to __init__)."
)
kwargs["rope_type"] = "linear"
Expand All @@ -208,7 +208,7 @@ class PersimmonDynamicNTKScalingRotaryEmbedding(PersimmonRotaryEmbedding):

def __init__(self, *args, **kwargs):
logger.warning_once(
"`PersimmonDynamicNTKScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
"`PersimmonDynamicNTKScalingRotaryEmbedding` is deprecated an will be removed in v4.46. Please use "
"`PersimmonRotaryEmbedding`, which now also does dynamic ntk scaling (simply pass the model config to "
"__init__)."
)
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/phi/modeling_phi.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def __init__(
if config is None:
logger.warning_once(
"`PhiRotaryEmbedding` can now be fully parameterized by passing the model config through the "
"`config` argument. All other arguments will be removed in v4.45"
"`config` argument. All other arguments will be removed in v4.46"
)
self.rope_kwargs = {
"rope_type": rope_type,
Expand Down Expand Up @@ -207,7 +207,7 @@ class PhiLinearScalingRotaryEmbedding(PhiRotaryEmbedding):

def __init__(self, *args, **kwargs):
logger.warning_once(
"`PhiLinearScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
"`PhiLinearScalingRotaryEmbedding` is deprecated an will be removed in v4.46. Please use "
"`PhiRotaryEmbedding`, which now also does linear scaling (simply pass the model config to __init__)."
)
kwargs["rope_type"] = "linear"
Expand All @@ -220,7 +220,7 @@ class PhiDynamicNTKScalingRotaryEmbedding(PhiRotaryEmbedding):

def __init__(self, *args, **kwargs):
logger.warning_once(
"`PhiDynamicNTKScalingRotaryEmbedding` is deprecated an will be removed in v4.45. Please use "
"`PhiDynamicNTKScalingRotaryEmbedding` is deprecated an will be removed in v4.46. Please use "
"`PhiRotaryEmbedding`, which now also does dynamic ntk scaling (simply pass the model config to "
"__init__)."
)
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/qwen2/modeling_qwen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def __init__(
if config is None:
logger.warning_once(
"`Qwen2RotaryEmbedding` can now be fully parameterized by passing the model config through the "
"`config` argument. All other arguments will be removed in v4.45"
"`config` argument. All other arguments will be removed in v4.46"
)
self.rope_kwargs = {
"rope_type": rope_type,
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/qwen2_moe/modeling_qwen2_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def __init__(
if config is None:
logger.warning_once(
"`Qwen2MoeRotaryEmbedding` can now be fully parameterized by passing the model config through the "
"`config` argument. All other arguments will be removed in v4.45"
"`config` argument. All other arguments will be removed in v4.46"
)
self.rope_kwargs = {
"rope_type": rope_type,
Expand Down
Loading

0 comments on commit b3c91c0

Please sign in to comment.