guillaume-be · guillaume-be · Jan 20, 2024 · Jan 20, 2024 · Jan 20, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,8 @@
 All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 
 ## [Unreleased]
+
+## [0.22.0] - 2024-01-20
 ## Added
 - Addition of `new_with_tokenizer` constructor for `SentenceEmbeddingsModel` allowing passing custom tokenizers for sentence embeddings pipelines.
 - Support for [Tokenizers](https://github.com/huggingface/tokenizers) in pipelines, allowing loading `tokenizer.json` and `special_token_map.json` tokenizer files. 

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "rust-bert"
-version = "0.21.0"
+version = "0.22.0"
 authors = ["Guillaume Becquin <guillaume.becquin@gmail.com>"]
 edition = "2018"
 description = "Ready-to-use NLP pipelines and language models"
@@ -86,19 +86,19 @@ half = "2"
 regex = "1.6"
 
 cached-path = { version = "0.6", default-features = false, optional = true }
-dirs = { version = "4", optional = true }
+dirs = { version = "5", optional = true }
 lazy_static = { version = "1", optional = true }
 ort = {version="~1.15.2", optional = true, default-features = false, features = ["half"]}
 ndarray = {version="0.15", optional = true}
-tokenizers = {version="0.13.3", optional=true, default-features = false, features = ["onig"]}
+tokenizers = {version="0.15", optional=true, default-features = false, features = ["onig"]}
 
 [dev-dependencies]
 anyhow = "1"
 csv = "1"
-criterion = "0.4"
-tokio = { version = "1.24", features = ["sync", "rt-multi-thread", "macros"] }
+criterion = "0.5"
+tokio = { version = "1.35", features = ["sync", "rt-multi-thread", "macros"] }
 torch-sys =  "0.14.0"
 tempfile = "3"
-itertools = "0.10"
+itertools = "0.12"
 tracing-subscriber = { version = "0.3", default-features = false, features = [ "env-filter", "fmt" ] }
-ort = {version="~1.15.2", features = ["load-dynamic"]}
+ort = {version="~1.15.5", features = ["load-dynamic"]}
diff --git a/src/pipelines/sentence_embeddings/config.rs b/src/pipelines/sentence_embeddings/config.rs
@@ -309,7 +309,7 @@ impl Config for SentenceEmbeddingsModulesConfig {}
 
 impl SentenceEmbeddingsModulesConfig {
     pub fn validate(self) -> Result<Self, RustBertError> {
-        match self.get(0) {
+        match self.first() {
             Some(SentenceEmbeddingsModuleConfig {
                 module_type: SentenceEmbeddingsModuleType::Transformer,
                 ..
@@ -347,7 +347,7 @@ impl SentenceEmbeddingsModulesConfig {
     }
 
     pub fn transformer_module(&self) -> &SentenceEmbeddingsModuleConfig {
-        self.get(0).as_ref().unwrap()
+        self.first().as_ref().unwrap()
     }
 
     pub fn pooling_module(&self) -> &SentenceEmbeddingsModuleConfig {

diff --git a/tests/gpt_j.rs b/tests/gpt_j.rs
@@ -7,7 +7,6 @@ use rust_bert::resources::{load_weights, RemoteResource, ResourceProvider};
 use rust_bert::Config;
 use rust_tokenizers::tokenizer::{Gpt2Tokenizer, Tokenizer};
 use rust_tokenizers::vocab::Vocab;
-use std::convert::TryFrom;
 use tch::{nn, Device, Kind, Tensor};
 
 /// Equivalent Python code:
@@ -107,7 +106,7 @@ fn gpt_j_correctness() -> anyhow::Result<()> {
             Tensor::from_slice(
                 &input
                     .iter()
-                    .map(|&e| i64::try_from(e != pad_token).unwrap())
+                    .map(|&e| i64::from(e != pad_token))
                     .collect::<Vec<_>>(),
             )
             .to(device)