Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into additional_runtime_…
Browse files Browse the repository at this point in the history
…generation_options

# Conflicts:
#	CHANGELOG.md
#	src/pipelines/generation_utils.rs
#	src/pipelines/summarization.rs
#	src/pipelines/text_generation.rs
#	src/pipelines/translation/translation_pipeline.rs
  • Loading branch information
guillaume-be committed Nov 10, 2021
2 parents f50a1a4 + 12d09c9 commit a97b657
Show file tree
Hide file tree
Showing 80 changed files with 733 additions and 478 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@
All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [Unreleased]
## Changed
- Updated to `tch` 1.6.0 (libtorch 1.10)
- (BREAKING) Simplified the generics for multiple library traits taking as a rule `&[AsRef<str>]` or `&str` as inputs (no longer accepts owned types `Vec` and `String`)

## Added
- (BREAKING) Support for `bad_word_ids` generation, allowing to ban a set of word ids for all model supporting text generation
- Support for half-precision mode for all models (reducing memory footprint). A model can be converted to half-precision by calling the `half()` method on the `VarStore` is it currently stored in. Half-precision Torch kernels are not available for CPU (limited to CUDA devices)
- (BREAKING) Extension of the generation options that can be provided at runtime (after a model has been instantiated with a `GenerateConfig`), allowing to update the generation options from one text generation to another with the same model. This feature is implemented at the `LanguageGenerator` trait level, the high-level `TextGeneration` pipeline API remains unchanged.

## [0.16.0] - 2021-08-24
Expand Down
19 changes: 10 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,21 @@ all-tests = []
features = ["doc-only"]

[dependencies]
rust_tokenizers = "~6.2.4"
tch = "~0.5.0"
serde_json = "1.0.66"
serde = { version = "1.0.129", features = ["derive"] }
dirs = "3.0.2"
ordered-float = "2.7.0"
rust_tokenizers = "~7.0.0"
tch = "~0.6.1"
serde_json = "1.0.68"
serde = { version = "1.0.130", features = ["derive"] }
dirs = "4.0.0"
ordered-float = "2.8.0"
cached-path = "0.5.1"
lazy_static = "1.4.0"
uuid = { version = "0.8.2", features = ["v4"] }
thiserror = "1.0.26"
thiserror = "1.0.30"
half = "1.7.1"

[dev-dependencies]
anyhow = "1.0.43"
anyhow = "1.0.44"
csv = "1.1.6"
criterion = "0.3.5"
torch-sys = "0.5.0"
torch-sys = "~0.6.1"
tempfile = "3.2.0"
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ This cache location defaults to `~/.cache/.rustbert`, but can be changed by sett

### Manual installation (recommended)

1. Download `libtorch` from https://pytorch.org/get-started/locally/. This package requires `v1.9.0`: if this version is no longer available on the "get started" page,
the file should be accessible by modifying the target link, for example `https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.9.0%2Bcu111.zip` for a Linux version with CUDA11.
1. Download `libtorch` from https://pytorch.org/get-started/locally/. This package requires `v1.10.0`: if this version is no longer available on the "get started" page,
the file should be accessible by modifying the target link, for example `https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.10.0%2Bcu111.zip` for a Linux version with CUDA11.
2. Extract the library to a location of your choice
3. Set the following environment variables
##### Linux:
Expand Down
6 changes: 3 additions & 3 deletions benches/tensor_operations_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ extern crate criterion;

use criterion::{black_box, Criterion};
use std::time::{Duration, Instant};
use tch::kind::Kind::Float;
use tch::kind::Kind;
use tch::{Device, Tensor};

fn matrix_multiply(iters: u64, input: &Tensor, weights: &Tensor) -> Duration {
Expand All @@ -21,8 +21,8 @@ fn bench_tensor_ops(c: &mut Criterion) {
unsafe {
torch_sys::dummy_cuda_dependency();
}
let input = Tensor::rand(&[32, 128, 512], (Float, Device::cuda_if_available()));
let weights = Tensor::rand(&[512, 512], (Float, Device::cuda_if_available()));
let input = Tensor::rand(&[32, 128, 512], (Kind::Float, Device::cuda_if_available()));
let weights = Tensor::rand(&[512, 512], (Kind::Float, Device::cuda_if_available()));

let _ = &input.matmul(&weights);
c.bench_function("Matrix multiply ", |b| {
Expand Down
11 changes: 6 additions & 5 deletions examples/generation_gpt_neo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ use tch::Device;
fn main() -> anyhow::Result<()> {
// Set-up model resources
let config_resource = Resource::Remote(RemoteResource::from_pretrained(
GptNeoConfigResources::GPT_NEO_1_3B,
GptNeoConfigResources::GPT_NEO_125M,
));
let vocab_resource = Resource::Remote(RemoteResource::from_pretrained(
GptNeoVocabResources::GPT_NEO_1_3B,
GptNeoVocabResources::GPT_NEO_125M,
));
let merges_resource = Resource::Remote(RemoteResource::from_pretrained(
GptNeoMergesResources::GPT_NEO_1_3B,
GptNeoMergesResources::GPT_NEO_125M,
));
let model_resource = Resource::Remote(RemoteResource::from_pretrained(
GptNeoModelResources::GPT_NEO_1_3B,
GptNeoModelResources::GPT_NEO_125M,
));
let generate_config = TextGenerationConfig {
model_type: ModelType::GPTNeo,
Expand All @@ -52,7 +52,8 @@ fn main() -> anyhow::Result<()> {
..Default::default()
};

let model = TextGenerationModel::new(generate_config)?;
let mut model = TextGenerationModel::new(generate_config)?;
model.set_device(Device::cuda_if_available());

let input_context_1 = "It was a very nice and sunny";
let input_context_2 = "It was a gloom winter night, and";
Expand Down
3 changes: 1 addition & 2 deletions examples/generation_xlnet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ use rust_bert::resources::{RemoteResource, Resource};
use rust_bert::xlnet::{XLNetConfigResources, XLNetModelResources, XLNetVocabResources};

fn main() -> anyhow::Result<()> {
// Set-up model
// Resources paths
let config_resource = Resource::Remote(RemoteResource::from_pretrained(
XLNetConfigResources::XLNET_BASE_CASED,
Expand All @@ -42,7 +41,7 @@ fn main() -> anyhow::Result<()> {
vocab_resource,
merges_resource,
max_length: 32,
do_sample: true,
do_sample: false,
num_beams: 3,
temperature: 1.0,
num_return_sequences: 1,
Expand Down
2 changes: 0 additions & 2 deletions examples/summarization_t5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ use rust_bert::resources::{RemoteResource, Resource};
use rust_bert::t5::{T5ConfigResources, T5ModelResources, T5VocabResources};

fn main() -> anyhow::Result<()> {
// let summarization_model = SummarizationModel::new(Default::default())?;

let config_resource =
Resource::Remote(RemoteResource::from_pretrained(T5ConfigResources::T5_SMALL));
let vocab_resource =
Expand Down
6 changes: 3 additions & 3 deletions examples/translation_m2m100.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ fn main() -> anyhow::Result<()> {
let source_sentence = "This sentence will be translated in multiple languages.";

let mut outputs = Vec::new();
outputs.extend(model.translate([source_sentence], Language::English, Language::French)?);
outputs.extend(model.translate([source_sentence], Language::English, Language::Spanish)?);
outputs.extend(model.translate([source_sentence], Language::English, Language::Hindi)?);
outputs.extend(model.translate(&[source_sentence], Language::English, Language::French)?);
outputs.extend(model.translate(&[source_sentence], Language::English, Language::Spanish)?);
outputs.extend(model.translate(&[source_sentence], Language::English, Language::Hindi)?);

for sentence in outputs {
println!("{}", sentence);
Expand Down
6 changes: 3 additions & 3 deletions examples/translation_mbart.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ fn main() -> anyhow::Result<()> {
let source_sentence = "This sentence will be translated in multiple languages.";

let mut outputs = Vec::new();
outputs.extend(model.translate([source_sentence], Language::English, Language::French)?);
outputs.extend(model.translate([source_sentence], Language::English, Language::Spanish)?);
outputs.extend(model.translate([source_sentence], Language::English, Language::Hindi)?);
outputs.extend(model.translate(&[source_sentence], Language::English, Language::French)?);
outputs.extend(model.translate(&[source_sentence], Language::English, Language::Spanish)?);
outputs.extend(model.translate(&[source_sentence], Language::English, Language::Hindi)?);

for sentence in outputs {
println!("{}", sentence);
Expand Down
6 changes: 3 additions & 3 deletions examples/translation_t5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ fn main() -> anyhow::Result<()> {
let source_sentence = "This sentence will be translated in multiple languages.";

let mut outputs = Vec::new();
outputs.extend(model.translate([source_sentence], Language::English, Language::French)?);
outputs.extend(model.translate([source_sentence], Language::English, Language::German)?);
outputs.extend(model.translate([source_sentence], Language::English, Language::Romanian)?);
outputs.extend(model.translate(&[source_sentence], Language::English, Language::French)?);
outputs.extend(model.translate(&[source_sentence], Language::English, Language::German)?);
outputs.extend(model.translate(&[source_sentence], Language::English, Language::Romanian)?);

for sentence in outputs {
println!("{}", sentence);
Expand Down
9 changes: 5 additions & 4 deletions src/albert/albert_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,6 @@ impl AlbertModel {
};
let mask = mask.unwrap_or_else(|| calc_mask.as_ref().unwrap());

let extended_attention_mask = mask.unsqueeze(1).unsqueeze(2);
let extended_attention_mask: Tensor =
(extended_attention_mask.ones_like() - extended_attention_mask) * -10000.0;

let embedding_output = self.embeddings.forward_t(
input_ids,
token_type_ids,
Expand All @@ -233,6 +229,11 @@ impl AlbertModel {
train,
)?;

let extended_attention_mask = mask.unsqueeze(1).unsqueeze(2);
let extended_attention_mask: Tensor =
((extended_attention_mask.ones_like() - extended_attention_mask) * -10000.0)
.to_kind(embedding_output.kind());

let transformer_output =
self.encoder
.forward_t(&embedding_output, Some(extended_attention_mask), train);
Expand Down
9 changes: 6 additions & 3 deletions src/albert/attention.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
use crate::albert::AlbertConfig;
use crate::common::dropout::Dropout;
use std::borrow::Borrow;
use tch::kind::Kind::Float;
use tch::{nn, Tensor};

#[derive(Debug)]
Expand Down Expand Up @@ -119,7 +118,10 @@ impl AlbertSelfAttention {
query_layer.matmul(&key_layer.transpose(-1, -2))
};

let weights = scores.softmax(-1, Float).apply_t(&self.dropout, train);
let weights = scores
.softmax(-1, scores.kind())
.apply_t(&self.dropout, train);

let context = weights.matmul(&value_layer).transpose(1, 2).contiguous();

let w = self.dense.ws.transpose(0, 1).view((
Expand All @@ -128,7 +130,8 @@ impl AlbertSelfAttention {
self.hidden_size,
));

let context: Tensor = Tensor::einsum("bfnd,ndh->bfh", &[context, w]) + &self.dense.bs;
let context: Tensor =
Tensor::einsum("bfnd,ndh->bfh", &[context, w]) + self.dense.bs.as_ref().unwrap();
let context = (input_ids + context.apply_t(&self.dropout, train)).apply(&self.layer_norm);

if !self.output_attentions {
Expand Down
3 changes: 1 addition & 2 deletions src/bart/attention.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

use crate::common::dropout::Dropout;
use std::borrow::Borrow;
use tch::kind::Kind::Float;
use tch::{nn, Tensor};

#[derive(Debug)]
Expand Down Expand Up @@ -164,7 +163,7 @@ impl BartAttention {
attention_weights.view([bs * self.num_heads, target_length, source_length]);
};

attention_weights = attention_weights.softmax(-1, Float);
attention_weights = attention_weights.softmax(-1, attention_weights.kind());

let saved_attention_weights = if self.output_attentions {
Some(attention_weights.view((bs, self.num_heads, target_length, source_length)))
Expand Down
38 changes: 24 additions & 14 deletions src/bart/bart_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::bart::decoder::BartDecoder;
use crate::bart::encoder::BartEncoder;
use crate::common::activations::Activation;
use crate::common::dropout::Dropout;
use crate::common::kind::get_negative_infinity;
use crate::common::resources::{RemoteResource, Resource};
use crate::gpt2::{
Gpt2ConfigResources, Gpt2MergesResources, Gpt2ModelResources, Gpt2VocabResources,
Expand All @@ -33,7 +34,6 @@ use rust_tokenizers::vocab::{RobertaVocab, Vocab};
use serde::{Deserialize, Serialize};
use std::borrow::Borrow;
use std::collections::HashMap;
use tch::kind::Kind::Int64;
use tch::nn::{embedding, EmbeddingConfig};
use tch::{nn, Device, Kind, Tensor};

Expand Down Expand Up @@ -235,7 +235,7 @@ pub(crate) fn _make_causal_mask(

let mut mask = Tensor::full(
&[target_length, target_length],
f64::NEG_INFINITY,
get_negative_infinity(dtype).unwrap(),
(dtype, device),
);
let mask_cond = Tensor::arange(target_length, (dtype, device));
Expand Down Expand Up @@ -264,16 +264,19 @@ pub(crate) fn _make_causal_mask(
)
}

pub(crate) fn _expand_mask(mask: &Tensor, target_length: Option<i64>) -> Tensor {
pub(crate) fn _expand_mask(mask: &Tensor, target_length: Option<i64>, dtype: Kind) -> Tensor {
let (batch_size, source_length) = mask.size2().unwrap();
let target_length = target_length.unwrap_or(source_length);
let expanded_mask = mask
.unsqueeze(1)
.unsqueeze(1)
.expand(&[batch_size, 1, target_length, source_length], true)
.totype(Kind::Float);
.totype(dtype);
let inverted_mask: Tensor = 1 - expanded_mask;
inverted_mask.masked_fill(&inverted_mask.to_kind(Kind::Bool), f64::NEG_INFINITY)
inverted_mask.masked_fill(
&inverted_mask.to_kind(Kind::Bool),
get_negative_infinity(dtype).unwrap(),
)
}

pub(crate) fn _prepare_decoder_attention_mask(
Expand All @@ -294,8 +297,12 @@ pub(crate) fn _prepare_decoder_attention_mask(
None
};

if let Some(attention_mask) = &attention_mask {
let expanded_attention_mask = _expand_mask(attention_mask, Some(last_input_shape_dim));
if let Some(attention_mask) = attention_mask {
let expanded_attention_mask = _expand_mask(
attention_mask,
Some(last_input_shape_dim),
input_embeds.kind(),
);
combined_attention_mask = match combined_attention_mask {
Some(value) => Some(value + expanded_attention_mask),
None => Some(expanded_attention_mask),
Expand All @@ -308,9 +315,9 @@ pub(crate) fn _prepare_decoder_attention_mask(
fn _shift_tokens_right(input_ids: &Tensor, pad_token_id: i64) -> Tensor {
let index_eos: Tensor = input_ids
.ne(pad_token_id)
.sum_dim_intlist(&[-1], true, Int64)
.sum_dim_intlist(&[-1], true, Kind::Int64)
- 1;
let output = input_ids.empty_like().to_kind(Int64);
let output = input_ids.empty_like().to_kind(Kind::Int64);
output
.select(1, 0)
.copy_(&input_ids.gather(1, &index_eos, true).squeeze());
Expand Down Expand Up @@ -812,7 +819,7 @@ impl BartForSequenceClassification {
train,
);
let eos_mask = input_ids.eq(self.eos_token_id);
let reshape = eos_mask.sum_dim_intlist(&[1], true, Int64);
let reshape = eos_mask.sum_dim_intlist(&[1], true, input_ids.kind());
let sentence_representation = base_model_output
.decoder_output
.permute(&[2, 0, 1])
Expand Down Expand Up @@ -1121,6 +1128,9 @@ impl PrivateLanguageGenerator<BartForConditionalGeneration, RobertaVocab, Robert
fn get_var_store(&self) -> &nn::VarStore {
&self.var_store
}
fn get_var_store_mut(&mut self) -> &mut nn::VarStore {
&mut self.var_store
}
fn get_config(&self) -> &GenerateConfig {
&self.generate_config
}
Expand Down Expand Up @@ -1195,17 +1205,17 @@ impl PrivateLanguageGenerator<BartForConditionalGeneration, RobertaVocab, Robert
}
}

fn encode_prompt_text<'a, S>(
fn encode_prompt_text<S>(
&self,
prompt_text: S,
prompt_text: &[S],
max_len: i64,
pad_token_id: Option<i64>,
) -> Tensor
where
S: AsRef<[&'a str]>,
S: AsRef<str> + Sync,
{
let tokens = self._get_tokenizer().encode_list(
prompt_text.as_ref(),
prompt_text,
max_len as usize,
&TruncationStrategy::LongestFirst,
0,
Expand Down
2 changes: 1 addition & 1 deletion src/bart/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ impl BartDecoder {
);

let encoder_attention_mask = encoder_attention_mask
.map(|mask| _expand_mask(mask, Some(*input_ids.size().last().unwrap())));
.map(|mask| _expand_mask(mask, Some(*input_ids.size().last().unwrap()), x.kind()));

let x = if let Some(layer_norm_embedding) = &self.layer_norm_embedding {
x.apply(layer_norm_embedding)
Expand Down
7 changes: 3 additions & 4 deletions src/bart/embeddings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@
// limitations under the License.

use std::borrow::Borrow;
use tch::kind::Kind::Int64;
use tch::nn::embedding;
use tch::{nn, Tensor};
use tch::{nn, Kind, Tensor};

/// # Abstraction that holds a embeddings configuration
pub enum EmbeddingOption {
Expand Down Expand Up @@ -67,7 +66,7 @@ impl LearnedPositionalEmbedding {
let positions = Tensor::arange_start(
past_key_values_length,
past_key_values_length + sequence_length,
(Int64, input.device()),
(Kind::Int64, input.device()),
) + self.offset;
positions.apply(&self.embedding)
}
Expand Down Expand Up @@ -102,7 +101,7 @@ impl SinusoidalPositionalEmbedding {
let positions = Tensor::arange_start(
past_key_values_length,
past_key_values_length + sequence_length,
(Int64, input.device()),
(Kind::Int64, input.device()),
);
positions.apply(&self.embedding)
}
Expand Down
Loading

0 comments on commit a97b657

Please sign in to comment.