Skip to content

Commit

Permalink
0.22.0 Release (#440)
Browse files Browse the repository at this point in the history
* Fix Clippy warnings

* bump version, updated dependencies and changelog
  • Loading branch information
guillaume-be committed Jan 20, 2024
1 parent 1f4d344 commit c3a3f39
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 11 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [Unreleased]

## [0.22.0] - 2024-01-20
## Added
- Addition of `new_with_tokenizer` constructor for `SentenceEmbeddingsModel` allowing passing custom tokenizers for sentence embeddings pipelines.
- Support for [Tokenizers](https://github.com/huggingface/tokenizers) in pipelines, allowing loading `tokenizer.json` and `special_token_map.json` tokenizer files.
Expand Down
14 changes: 7 additions & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rust-bert"
version = "0.21.0"
version = "0.22.0"
authors = ["Guillaume Becquin <guillaume.becquin@gmail.com>"]
edition = "2018"
description = "Ready-to-use NLP pipelines and language models"
Expand Down Expand Up @@ -86,19 +86,19 @@ half = "2"
regex = "1.6"

cached-path = { version = "0.6", default-features = false, optional = true }
dirs = { version = "4", optional = true }
dirs = { version = "5", optional = true }
lazy_static = { version = "1", optional = true }
ort = {version="~1.15.2", optional = true, default-features = false, features = ["half"]}
ndarray = {version="0.15", optional = true}
tokenizers = {version="0.13.3", optional=true, default-features = false, features = ["onig"]}
tokenizers = {version="0.15", optional=true, default-features = false, features = ["onig"]}

[dev-dependencies]
anyhow = "1"
csv = "1"
criterion = "0.4"
tokio = { version = "1.24", features = ["sync", "rt-multi-thread", "macros"] }
criterion = "0.5"
tokio = { version = "1.35", features = ["sync", "rt-multi-thread", "macros"] }
torch-sys = "0.14.0"
tempfile = "3"
itertools = "0.10"
itertools = "0.12"
tracing-subscriber = { version = "0.3", default-features = false, features = [ "env-filter", "fmt" ] }
ort = {version="~1.15.2", features = ["load-dynamic"]}
ort = {version="~1.15.5", features = ["load-dynamic"]}
4 changes: 2 additions & 2 deletions src/pipelines/sentence_embeddings/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ impl Config for SentenceEmbeddingsModulesConfig {}

impl SentenceEmbeddingsModulesConfig {
pub fn validate(self) -> Result<Self, RustBertError> {
match self.get(0) {
match self.first() {
Some(SentenceEmbeddingsModuleConfig {
module_type: SentenceEmbeddingsModuleType::Transformer,
..
Expand Down Expand Up @@ -347,7 +347,7 @@ impl SentenceEmbeddingsModulesConfig {
}

pub fn transformer_module(&self) -> &SentenceEmbeddingsModuleConfig {
self.get(0).as_ref().unwrap()
self.first().as_ref().unwrap()
}

pub fn pooling_module(&self) -> &SentenceEmbeddingsModuleConfig {
Expand Down
3 changes: 1 addition & 2 deletions tests/gpt_j.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use rust_bert::resources::{load_weights, RemoteResource, ResourceProvider};
use rust_bert::Config;
use rust_tokenizers::tokenizer::{Gpt2Tokenizer, Tokenizer};
use rust_tokenizers::vocab::Vocab;
use std::convert::TryFrom;
use tch::{nn, Device, Kind, Tensor};

/// Equivalent Python code:
Expand Down Expand Up @@ -107,7 +106,7 @@ fn gpt_j_correctness() -> anyhow::Result<()> {
Tensor::from_slice(
&input
.iter()
.map(|&e| i64::try_from(e != pad_token).unwrap())
.map(|&e| i64::from(e != pad_token))
.collect::<Vec<_>>(),
)
.to(device)
Expand Down

0 comments on commit c3a3f39

Please sign in to comment.