remove falcon from ci tests

bigscience-workshop · dvmazur · Feb 15, 2024 · Feb 1, 2024 · Feb 1, 2024 · Feb 2, 2024
commit 25ee8ecfd045e234d044e78ef5a76fa92173faea
diff --git a/.github/workflows/run-tests.yaml b/.github/workflows/run-tests.yaml
@@ -14,8 +14,6 @@ jobs:
           - { model: 'bigscience/bloom-560m', os: 'ubuntu', python-version: '3.11' }
           - { model: 'Maykeye/TinyLLama-v0', os: 'ubuntu', python-version: '3.8' }
           - { model: 'Maykeye/TinyLLama-v0', os: 'ubuntu', python-version: '3.11' }
-          - { model: 'petals-team/falcon-rw-1b', os: 'ubuntu', python-version: '3.8' }
-          - { model: 'petals-team/falcon-rw-1b', os: 'ubuntu', python-version: '3.11' }
           - { model: 'Maykeye/TinyLLama-v0', os: 'macos', python-version: '3.10' }
           - { model: 'Maykeye/TinyLLama-v0', os: 'macos', python-version: '3.11' }
       fail-fast: false

diff --git a/src/petals/client/remote_generation.py b/src/petals/client/remote_generation.py
@@ -6,20 +6,20 @@
 import torch
 import transformers
 from hivemind.utils.logging import get_logger
-from transformers.generation.utils import ModelOutput
+from torch import Tensor
 from transformers.cache_utils import Cache, DynamicCache
+from transformers.generation.utils import ModelOutput
 
 from petals.client.inference_session import InferenceSession
 from petals.client.remote_sequential import RemoteSequential
 from petals.utils.misc import DUMMY, docstring_from
 
-from torch import Tensor
-
 logger = get_logger(__name__)
 
 
 class RemotePastKeyValues(Cache):
     """only keeps the number of seen tokens. pretends to be a legit cache"""
+
     def __init__(self) -> None:
         super().__init__()
         self.seen_tokens = 0

diff --git a/src/petals/models/bloom/model.py b/src/petals/models/bloom/model.py
@@ -4,9 +4,9 @@
 import torch
 import torch.nn as nn
 from hivemind.utils.logging import get_logger
+from transformers.cache_utils import Cache
 from transformers.modeling_outputs import BaseModelOutputWithPastAndCrossAttentions
 from transformers.models.bloom import BloomForCausalLM, BloomForSequenceClassification, BloomModel, BloomPreTrainedModel
-from transformers.cache_utils import Cache
 
 from petals.client.from_pretrained import FromPretrainedMixin
 from petals.client.lm_head import LMHead
@@ -124,7 +124,6 @@ def __init__(self, config: DistributedBloomConfig):
         # Initialize weights and apply final processing
         self.post_init()
 
-
     def prepare_inputs_for_generation(
         self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, **kwargs
     ) -> dict:
@@ -174,11 +173,9 @@ def prepare_inputs_for_generation(
         )
         return model_inputs
 
-
     def _temporary_reorder_cache(self, past_key_values, beam_idx):
         return past_key_values
-
-
+
     def get_output_embeddings(self):
         return self.lm_head
 

diff --git a/src/petals/models/llama/block.py b/src/petals/models/llama/block.py
@@ -9,6 +9,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask
 from transformers.models.llama.modeling_llama import (
     LlamaAttention,
     LlamaConfig,
@@ -19,7 +20,6 @@
     repeat_kv,
     rotate_half,
 )
-from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask
 
 from petals.utils.cuda_graphs import make_inference_graphed_callable
 
@@ -99,7 +99,7 @@ def forward(
             value_states = torch.cat([past_key_value[1], value_states], dim=2)
 
         past_key_value = (key_states, value_states) if use_cache else None
-        
+
         # repeat k/v heads if n_kv_heads < n_heads
         key_states = repeat_kv(key_states, self.num_key_value_groups)
         value_states = repeat_kv(value_states, self.num_key_value_groups)

diff --git a/src/petals/models/llama/model.py b/src/petals/models/llama/model.py
@@ -101,7 +101,7 @@ def forward(
         # Add last hidden state
         hidden_states = self.norm(hidden_states)
         hidden_states = hidden_states.view(output_shape)
-        
+
         return BaseModelOutputWithPast(
             last_hidden_state=hidden_states,
             past_key_values=past_key_values,