Merge remote-tracking branch 'remotes/origin/master' into t5_implemen…

…tation
guillaume-be · Jul 1, 2020 · b835ccd · b835ccd
2 parents 8e7696f + 3e0ad01
commit b835ccd
Show file tree

Hide file tree

Showing 31 changed files with 692 additions and 296 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,36 @@
+# How to contribute to rust-bert?
+
+Code contributions to the library are very welcome, especially for areas of focus given below. 
+However, please note that direct contributions to the `rust-bert` library are not the only way you can help the project.
+Building applications and supporting the applications built on top of the library, sharing
+the word about them in the Rust and NLP community or simply a star go a long way in helping this project develop.
+
+## Code contributions areas of focus
+
+Rust is a very efficient, safe and fast language when implemented in the right way.
+The field of transformers in NLP and especially their implementation in Rust is a rather new development.
+Considering this, the general execution speed of pipelines built using `rust-bert` is a priority for the project (along with the correctness of the results).
+
+Contributions are therefore welcome on the following areas:
+- Improvement of execution performance at a module, model or pipeline level
+- Reduction of memory footprint for the models
+
+For other areas of contributions, opening an issue to discuss the feature addition would be very welcome. 
+As this started out as a personal project, it would be great to coordinate as this may be something that is already in the implementation pipeline.
+
+## General contribution guidelines
+
+- Please try running the suite of integration tests locally before submitting a pull request. Most features are tested automatically in the Travis CI - but due to the large size of some models some tests cannot be run in the virtual machines provided.
+- The code should be formatted using `cargo +nightly fmt` to format both the code and the documentation
+- As much as possible, please try to adhere to the coding style of the crate. I am open to discuss non-idiomatic code.
+- When providing a performance improvement, please provide benchmarks to illustrate the performance gain, if possible with and without GPU support.
+- Please try to ensure that the documentation always reflects the actual state of the code.
+
+## Did you find a bug?
+
+Thank you - identifying and sharing bugs is one of the best way to improve the overall quality of the crate!
+When submitting the bug as an issue, it would be very helpful if you could share the full stack trace of the error, and the input provided to reproduce the error.
+Since several models are non deterministic (generation pipelines are using random sampling to generate text), it would be very useful to manually turn off sampling (`do_sample: false` in the relevant configuration) and reproduce the error with a given input.
+
+
+This guide was inspired by the original [Transformers guide to contributing](https://github.com/huggingface/transformers/blob/master/CONTRIBUTING.md)
diff --git a/src/albert/albert.rs b/src/albert/albert.rs
@@ -17,7 +17,7 @@ use crate::common::activations::{_gelu, _gelu_new, _mish, _relu, _tanh};
 use crate::common::dropout::Dropout;
 use crate::Config;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
+use std::{borrow::Borrow, collections::HashMap};
 use tch::nn::Module;
 use tch::{nn, Kind, Tensor};
 
@@ -138,13 +138,18 @@ impl AlbertModel {
     /// let device = Device::Cpu;
     /// let p = nn::VarStore::new(device);
     /// let config = AlbertConfig::from_file(config_path);
-    /// let albert: AlbertModel = AlbertModel::new(&(&p.root() / "albert"), &config);
+    /// let albert: AlbertModel = AlbertModel::new(&p.root() / "albert", &config);
     /// ```
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertModel {
-        let embeddings = AlbertEmbeddings::new(&(p / "embeddings"), config);
-        let encoder = AlbertTransformer::new(&(p / "encoder"), config);
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertModel
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let embeddings = AlbertEmbeddings::new(p / "embeddings", config);
+        let encoder = AlbertTransformer::new(p / "encoder", config);
         let pooler = nn::linear(
-            &(p / "pooler"),
+            p / "pooler",
             config.hidden_size,
             config.hidden_size,
             Default::default(),
@@ -288,7 +293,12 @@ pub struct AlbertMLMHead {
 }
 
 impl AlbertMLMHead {
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertMLMHead {
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertMLMHead
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
         let layer_norm_eps = match config.layer_norm_eps {
             Some(value) => value,
             None => 1e-12,
@@ -298,18 +308,18 @@ impl AlbertMLMHead {
             ..Default::default()
         };
         let layer_norm = nn::layer_norm(
-            &(p / "LayerNorm"),
+            p / "LayerNorm",
             vec![config.embedding_size],
             layer_norm_config,
         );
         let dense = nn::linear(
-            &(p / "dense"),
+            p / "dense",
             config.hidden_size,
             config.embedding_size,
             Default::default(),
         );
         let decoder = nn::linear(
-            &(p / "decoder"),
+            p / "decoder",
             config.embedding_size,
             config.vocab_size,
             Default::default(),
@@ -368,9 +378,14 @@ impl AlbertForMaskedLM {
     /// let config = AlbertConfig::from_file(config_path);
     /// let albert: AlbertForMaskedLM = AlbertForMaskedLM::new(&p.root(), &config);
     /// ```
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForMaskedLM {
-        let albert = AlbertModel::new(&(p / "albert"), config);
-        let predictions = AlbertMLMHead::new(&(p / "predictions"), config);
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForMaskedLM
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let albert = AlbertModel::new(p / "albert", config);
+        let predictions = AlbertMLMHead::new(p / "predictions", config);
 
         AlbertForMaskedLM {
             albert,
@@ -486,8 +501,13 @@ impl AlbertForSequenceClassification {
     /// let albert: AlbertForSequenceClassification =
     ///     AlbertForSequenceClassification::new(&p.root(), &config);
     /// ```
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForSequenceClassification {
-        let albert = AlbertModel::new(&(p / "albert"), config);
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForSequenceClassification
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let albert = AlbertModel::new(p / "albert", config);
         let classifier_dropout_prob = match config.classifier_dropout_prob {
             Some(value) => value,
             None => 0.1,
@@ -499,7 +519,7 @@ impl AlbertForSequenceClassification {
             .expect("num_labels not provided in configuration")
             .len() as i64;
         let classifier = nn::linear(
-            &(p / "classifier"),
+            p / "classifier",
             config.hidden_size,
             num_labels,
             Default::default(),
@@ -621,16 +641,21 @@ impl AlbertForTokenClassification {
     /// let albert: AlbertForTokenClassification =
     ///     AlbertForTokenClassification::new(&p.root(), &config);
     /// ```
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForTokenClassification {
-        let albert = AlbertModel::new(&(p / "albert"), config);
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForTokenClassification
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let albert = AlbertModel::new(p / "albert", config);
         let dropout = Dropout::new(config.hidden_dropout_prob);
         let num_labels = config
             .id2label
             .as_ref()
             .expect("num_labels not provided in configuration")
             .len() as i64;
         let classifier = nn::linear(
-            &(p / "classifier"),
+            p / "classifier",
             config.hidden_size,
             num_labels,
             Default::default(),
@@ -750,11 +775,16 @@ impl AlbertForQuestionAnswering {
     /// let config = AlbertConfig::from_file(config_path);
     /// let albert: AlbertForQuestionAnswering = AlbertForQuestionAnswering::new(&p.root(), &config);
     /// ```
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForQuestionAnswering {
-        let albert = AlbertModel::new(&(p / "albert"), config);
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForQuestionAnswering
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let albert = AlbertModel::new(p / "albert", config);
         let num_labels = 2;
         let qa_outputs = nn::linear(
-            &(p / "qa_outputs"),
+            p / "qa_outputs",
             config.hidden_size,
             num_labels,
             Default::default(),
@@ -880,12 +910,17 @@ impl AlbertForMultipleChoice {
     /// let config = AlbertConfig::from_file(config_path);
     /// let albert: AlbertForMultipleChoice = AlbertForMultipleChoice::new(&p.root(), &config);
     /// ```
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForMultipleChoice {
-        let albert = AlbertModel::new(&(p / "albert"), config);
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForMultipleChoice
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let albert = AlbertModel::new(p / "albert", config);
         let dropout = Dropout::new(config.hidden_dropout_prob);
         let num_labels = 1;
         let classifier = nn::linear(
-            &(p / "classifier"),
+            p / "classifier",
             config.hidden_size,
             num_labels,
             Default::default(),

diff --git a/src/albert/attention.rs b/src/albert/attention.rs
@@ -13,6 +13,7 @@
 
 use crate::albert::AlbertConfig;
 use crate::common::dropout::Dropout;
+use std::borrow::Borrow;
 use tch::kind::Kind::Float;
 use tch::{nn, Tensor};
 
@@ -31,33 +32,37 @@ pub struct AlbertSelfAttention {
 }
 
 impl AlbertSelfAttention {
-    pub fn new(p: nn::Path, config: &AlbertConfig) -> AlbertSelfAttention {
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertSelfAttention
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
         assert_eq!(
             config.hidden_size % config.num_attention_heads,
             0,
             "Hidden size not a multiple of the number of attention heads"
         );
+        let p = p.borrow();
 
         let query = nn::linear(
-            &p / "query",
+            p / "query",
             config.hidden_size,
             config.hidden_size,
             Default::default(),
         );
         let key = nn::linear(
-            &p / "key",
+            p / "key",
             config.hidden_size,
             config.hidden_size,
             Default::default(),
         );
         let value = nn::linear(
-            &p / "value",
+            p / "value",
             config.hidden_size,
             config.hidden_size,
             Default::default(),
         );
         let dense = nn::linear(
-            &p / "dense",
+            p / "dense",
             config.hidden_size,
             config.hidden_size,
             Default::default(),
@@ -76,11 +81,8 @@ impl AlbertSelfAttention {
             eps: layer_norm_eps,
             ..Default::default()
         };
-        let layer_norm = nn::layer_norm(
-            &p / "LayerNorm",
-            vec![config.hidden_size],
-            layer_norm_config,
-        );
+        let layer_norm =
+            nn::layer_norm(p / "LayerNorm", vec![config.hidden_size], layer_norm_config);
 
         AlbertSelfAttention {
             num_attention_heads: config.num_attention_heads,

diff --git a/src/albert/embeddings.rs b/src/albert/embeddings.rs
@@ -13,12 +13,12 @@
 
 use crate::albert::AlbertConfig;
 use crate::common::dropout::Dropout;
+use std::borrow::Borrow;
 use tch::nn::{embedding, EmbeddingConfig};
 use tch::{nn, Kind, Tensor};
 
 /// # Embeddings implementation for Albert model
 #[derive(Debug)]
-/// # Embeddings implementation for Electra model
 pub struct AlbertEmbeddings {
     word_embeddings: nn::Embedding,
     position_embeddings: nn::Embedding,
@@ -28,7 +28,12 @@ pub struct AlbertEmbeddings {
 }
 
 impl AlbertEmbeddings {
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertEmbeddings {
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertEmbeddings
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
         let embedding_config = EmbeddingConfig {
             padding_idx: config.pad_token_id,
             ..Default::default()