Skip to content

Commit

Permalink
Tokenizer special token map update (guillaume-be#330)
Browse files Browse the repository at this point in the history
* Updates for compatibility with tokenizers special token rework

* Updated mask pipline methods

* Bumped version

* Fix clippy warnings
  • Loading branch information
guillaume-be committed Jan 30, 2023
1 parent 80e0197 commit 84561ec
Show file tree
Hide file tree
Showing 57 changed files with 535 additions and 658 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [Unreleased]
## Changed
- Bumped the tokenizers dependency from 7.x to 8.x, exposing additional options for special token mapping and adding the NLLBTokenizer.

## [0.20.0] - 2023-01-21
## Added
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rust-bert"
version = "0.20.0"
version = "0.20.1-alpha"
authors = ["Guillaume Becquin <guillaume.becquin@gmail.com>"]
edition = "2018"
description = "Ready-to-use NLP pipelines and language models"
Expand Down Expand Up @@ -69,7 +69,7 @@ remote = ["cached-path", "dirs", "lazy_static"]
features = ["doc-only"]

[dependencies]
rust_tokenizers = "~7.0.2"
rust_tokenizers = "8.0.0"
tch = "~0.10.1"
serde_json = "1"
serde = { version = "1", features = ["derive"] }
Expand Down
2 changes: 1 addition & 1 deletion examples/async-sentiment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ async fn main() -> Result<()> {
"Classify this negative text".to_owned(),
];
let sentiments = classifier.predict(texts).await?;
println!("Results: {:?}", sentiments);
println!("Results: {sentiments:?}");

Ok(())
}
Expand Down
4 changes: 2 additions & 2 deletions examples/codebert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ fn main() -> anyhow::Result<()> {
// Run model
let output = sequence_classification_model.predict(input);
for label in output {
println!("{:?}", label);
println!("{label:?}");
}

// Masked language model
Expand Down Expand Up @@ -78,7 +78,7 @@ fn main() -> anyhow::Result<()> {
// Run model
let output = mask_language_model.predict(input)?;
for sentence_output in output {
println!("{:?}", sentence_output);
println!("{sentence_output:?}");
}

Ok(())
Expand Down
6 changes: 3 additions & 3 deletions examples/conversation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ fn main() -> anyhow::Result<()> {

let output = conversation_model.generate_responses(&mut conversation_manager);

println!("{:?}", output);
println!("{output:?}");

let _ = conversation_manager
.get(&conversation_1_id)
Expand All @@ -40,11 +40,11 @@ fn main() -> anyhow::Result<()> {

let output = conversation_model.generate_responses(&mut conversation_manager);

println!("{:?}", output);
println!("{output:?}");

let output = conversation_model.generate_responses(&mut conversation_manager);

println!("{:?}", output);
println!("{output:?}");

Ok(())
}
2 changes: 1 addition & 1 deletion examples/generation_gpt2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ fn main() -> anyhow::Result<()> {
let output = model.generate(&[input_context], None);

for sentence in output {
println!("{:?}", sentence);
println!("{sentence:?}");
}
Ok(())
}
2 changes: 1 addition & 1 deletion examples/generation_gpt_neo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ fn main() -> anyhow::Result<()> {
let output = model.generate(&[input_context_1, input_context_2], None);

for sentence in output {
println!("{}", sentence);
println!("{sentence}");
}
Ok(())
}
2 changes: 1 addition & 1 deletion examples/generation_reformer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ fn main() -> anyhow::Result<()> {
let output = model.generate(&[input_context_1, input_context_2], None);

for sentence in output {
println!("{}", sentence);
println!("{sentence}");
}
Ok(())
}
2 changes: 1 addition & 1 deletion examples/generation_xlnet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ fn main() -> anyhow::Result<()> {
let output = model.generate(&[input_context], None);

for sentence in output {
println!("{}", sentence);
println!("{sentence}");
}
Ok(())
}
2 changes: 1 addition & 1 deletion examples/masked_language.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ fn main() -> anyhow::Result<()> {
// Run model
let output = mask_language_model.predict(input)?;
for sentence_output in output {
println!("{:?}", sentence_output);
println!("{sentence_output:?}");
}

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion examples/named_entities_recognition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fn main() -> anyhow::Result<()> {
// Run model
let output = ner_model.predict_full_entities(&input);
for entity in output {
println!("{:?}", entity);
println!("{entity:?}");
}

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion examples/part_of_speech_tagging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ fn main() -> anyhow::Result<()> {
// Run model
let output = pos_model.predict(&input);
for (pos, pos_tag) in output[0].iter().enumerate() {
println!("{} - {:?}", pos, pos_tag);
println!("{pos} - {pos_tag:?}");
}

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion examples/question_answering.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,6 @@ fn main() -> anyhow::Result<()> {

// Get answer
let answers = qa_model.predict(&[qa_input_1, qa_input_2], 1, 32);
println!("{:?}", answers);
println!("{answers:?}");
Ok(())
}
2 changes: 1 addition & 1 deletion examples/question_answering_bert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,6 @@ fn main() -> anyhow::Result<()> {

// Get answer
let answers = qa_model.predict(&[qa_input_1, qa_input_2], 1, 32);
println!("{:?}", answers);
println!("{answers:?}");
Ok(())
}
2 changes: 1 addition & 1 deletion examples/question_answering_longformer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,6 @@ fn main() -> anyhow::Result<()> {

// Get answer
let answers = qa_model.predict(&[qa_input_1, qa_input_2], 1, 32);
println!("{:?}", answers);
println!("{answers:?}");
Ok(())
}
2 changes: 1 addition & 1 deletion examples/sentence_embeddings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ fn main() -> anyhow::Result<()> {

// Generate Embeddings
let embeddings = model.encode(&sentences)?;
println!("{:?}", embeddings);
println!("{embeddings:?}");
Ok(())
}
2 changes: 1 addition & 1 deletion examples/sentence_embeddings_local.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@ fn main() -> anyhow::Result<()> {

// Generate Embeddings
let embeddings = model.encode(&sentences)?;
println!("{:?}", embeddings);
println!("{embeddings:?}");
Ok(())
}
2 changes: 1 addition & 1 deletion examples/sentiment_analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fn main() -> anyhow::Result<()> {
// Run model
let output = sentiment_classifier.predict(input);
for sentiment in output {
println!("{:?}", sentiment);
println!("{sentiment:?}");
}

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion examples/sentiment_analysis_fnet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ fn main() -> anyhow::Result<()> {
// Run model
let output = sentiment_classifier.predict(input);
for sentiment in output {
println!("{:?}", sentiment);
println!("{sentiment:?}");
}

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion examples/sequence_classification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fn main() -> anyhow::Result<()> {
// Run model
let output = sequence_classification_model.predict(input);
for label in output {
println!("{:?}", label);
println!("{label:?}");
}

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion examples/sequence_classification_multilabel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ fn main() -> anyhow::Result<()> {
let output = sequence_classification_model.predict_multilabel(&input, 0.05);
if let Ok(labels) = output {
for label in labels {
println!("{:?}", label);
println!("{label:?}");
}
}

Expand Down
2 changes: 1 addition & 1 deletion examples/summarization_bart.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ about exoplanets like K2-18b."];
// Credits: WikiNews, CC BY 2.5 license (https://en.wikinews.org/wiki/Astronomers_find_water_vapour_in_atmosphere_of_exoplanet_K2-18b)
let _output = summarization_model.summarize(&input);
for sentence in _output {
println!("{}", sentence);
println!("{sentence}");
}

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion examples/summarization_pegasus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ about exoplanets like K2-18b."];
// Credits: WikiNews, CC BY 2.5 license (https://en.wikinews.org/wiki/Astronomers_find_water_vapour_in_atmosphere_of_exoplanet_K2-18b)
let _output = summarization_model.summarize(&input);
for sentence in _output {
println!("{}", sentence);
println!("{sentence}");
}

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion examples/summarization_prophetnet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ about exoplanets like K2-18b."];
// Credits: WikiNews, CC BY 2.5 license (https://en.wikinews.org/wiki/Astronomers_find_water_vapour_in_atmosphere_of_exoplanet_K2-18b)
let _output = summarization_model.summarize(&input);
for sentence in _output {
println!("{}", sentence);
println!("{sentence}");
}

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion examples/summarization_t5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ about exoplanets like K2-18b."];
// Credits: WikiNews, CC BY 2.5 license (https://en.wikinews.org/wiki/Astronomers_find_water_vapour_in_atmosphere_of_exoplanet_K2-18b)
let _output = summarization_model.summarize(&input);
for sentence in _output {
println!("{}", sentence);
println!("{sentence}");
}

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion examples/token_classification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ fn main() -> anyhow::Result<()> {
let token_outputs = token_classification_model.predict(&input);

for token in token_outputs {
println!("{:?}", token);
println!("{token:?}");
}

Ok(())
Expand Down
2 changes: 1 addition & 1 deletion examples/translation_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ fn main() -> anyhow::Result<()> {
let output = model.translate(&[input_context_1, input_context_2], None, Language::Spanish)?;

for sentence in output {
println!("{}", sentence);
println!("{sentence}");
}
Ok(())
}
2 changes: 1 addition & 1 deletion examples/translation_m2m100.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ fn main() -> anyhow::Result<()> {
outputs.extend(model.translate(&[source_sentence], Language::English, Language::Hindi)?);

for sentence in outputs {
println!("{}", sentence);
println!("{sentence}");
}
Ok(())
}
2 changes: 1 addition & 1 deletion examples/translation_marian.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ fn main() -> anyhow::Result<()> {
let output = model.translate(&[input_context_1, input_context_2], None, None)?;

for sentence in output {
println!("{}", sentence);
println!("{sentence}");
}
Ok(())
}
2 changes: 1 addition & 1 deletion examples/translation_mbart.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ fn main() -> anyhow::Result<()> {
outputs.extend(model.translate(&[source_sentence], Language::English, Language::Hindi)?);

for sentence in outputs {
println!("{}", sentence);
println!("{sentence}");
}
Ok(())
}
2 changes: 1 addition & 1 deletion examples/translation_t5.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ fn main() -> anyhow::Result<()> {
outputs.extend(model.translate(&[source_sentence], Language::English, Language::Romanian)?);

for sentence in outputs {
println!("{}", sentence);
println!("{sentence}");
}
Ok(())
}
4 changes: 2 additions & 2 deletions examples/zero_shot_classification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ fn main() -> anyhow::Result<()> {
[input_sentence, input_sequence_2],
candidate_labels,
Some(Box::new(|label: &str| {
format!("This example is about {}.", label)
format!("This example is about {label}.")
})),
128,
)
.unwrap();

println!("{:?}", output);
println!("{output:?}");

Ok(())
}
6 changes: 2 additions & 4 deletions src/bart/bart_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use crate::pipelines::generation_utils::{
};
use crate::{Config, RustBertError};
use rust_tokenizers::tokenizer::{RobertaTokenizer, TruncationStrategy};
use rust_tokenizers::vocab::{RobertaVocab, Vocab};
use rust_tokenizers::vocab::RobertaVocab;
use serde::{Deserialize, Serialize};
use std::borrow::Borrow;
use std::collections::HashMap;
Expand Down Expand Up @@ -1263,9 +1263,7 @@ impl PrivateLanguageGenerator<BartForConditionalGeneration, RobertaVocab, Robert

let pad_token = match pad_token_id {
Some(value) => value,
None => self
._get_tokenizer()
.convert_tokens_to_ids(&[RobertaVocab::unknown_value()])[0],
None => self._get_tokenizer().get_unk_id(),
};

let token_ids = token_ids
Expand Down
6 changes: 2 additions & 4 deletions src/common/kind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ pub(crate) fn get_positive_infinity(kind: Kind) -> Result<Scalar, RustBertError>
Kind::Double => Scalar::float(f64::INFINITY),
_ => {
return Err(RustBertError::ValueError(format!(
"Type not supported: attempted to get positive infinity for {:?}",
kind
"Type not supported: attempted to get positive infinity for {kind:?}",
)))
}
})
Expand All @@ -34,8 +33,7 @@ pub(crate) fn get_negative_infinity(kind: Kind) -> Result<Scalar, RustBertError>
Kind::Double => Scalar::float(f64::NEG_INFINITY),
_ => {
return Err(RustBertError::ValueError(format!(
"Type not supported: attempted to get negative infinity for {:?}",
kind
"Type not supported: attempted to get negative infinity for {kind:?}",
)))
}
})
Expand Down
3 changes: 1 addition & 2 deletions src/deberta/deberta_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,7 @@ impl FromStr for PositionAttentionType {
"c2p" => Ok(PositionAttentionType::c2p),
"p2p" => Ok(PositionAttentionType::p2p),
_ => Err(RustBertError::InvalidConfigurationError(format!(
"Position attention type `{}` not in accepted variants (`p2c`, `c2p`, `p2p`)",
s
"Position attention type `{s}` not in accepted variants (`p2c`, `c2p`, `p2p`)",
))),
}
}
Expand Down
3 changes: 1 addition & 2 deletions src/deberta_v2/deberta_v2_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,7 @@ impl FromStr for NormRelEmbedType {
match s {
"layer_norm" => Ok(NormRelEmbedType::layer_norm),
_ => Err(RustBertError::InvalidConfigurationError(format!(
"Layer normalization type `{}` not in accepted variants (`layer_norm`)",
s
"Layer normalization type `{s}` not in accepted variants (`layer_norm`)",
))),
}
}
Expand Down
Loading

0 comments on commit 84561ec

Please sign in to comment.