Addition of benches

guillaume-be · Sep 28, 2020 · 8566cb8 · 8566cb8
1 parent 8c6a2ba
commit 8566cb8
Show file tree

Hide file tree

Showing 6 changed files with 465 additions and 0 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -22,6 +22,29 @@ name = "convert-tensor"
 path = "src/convert-tensor.rs"
 doc = false
 
+[[bench]]
+name = "sst2_benchmark"
+harness = false
+
+[[bench]]
+name = "squad_benchmark"
+harness = false
+
+[[bench]]
+name = "summarization_benchmark"
+harness = false
+
+[[bench]]
+name = "translation_benchmark"
+harness = false
+
+[[bench]]
+name = "tensor_operations_benchmark"
+harness = false
+
+[profile.bench]
+opt-level = 3
+
 [features]
 doc-only = ["tch/doc-only"]
 all-tests = []

diff --git a/benches/squad_benchmark.rs b/benches/squad_benchmark.rs
@@ -0,0 +1,101 @@
+#[macro_use]
+extern crate criterion;
+
+use criterion::{black_box, Criterion};
+use rust_bert::bert::{BertConfigResources, BertModelResources, BertVocabResources};
+use rust_bert::pipelines::common::ModelType;
+use rust_bert::pipelines::question_answering::{
+    squad_processor, QaInput, QuestionAnsweringConfig, QuestionAnsweringModel,
+};
+use rust_bert::resources::{RemoteResource, Resource};
+use std::env;
+use std::path::PathBuf;
+use std::time::{Duration, Instant};
+
+static BATCH_SIZE: usize = 64;
+
+fn create_qa_model() -> QuestionAnsweringModel {
+    let config = QuestionAnsweringConfig::new(
+        ModelType::Bert,
+        Resource::Remote(RemoteResource::from_pretrained(BertModelResources::BERT_QA)),
+        Resource::Remote(RemoteResource::from_pretrained(
+            BertConfigResources::BERT_QA,
+        )),
+        Resource::Remote(RemoteResource::from_pretrained(BertVocabResources::BERT_QA)),
+        None,  //merges resource only relevant with ModelType::Roberta
+        false, //lowercase
+        false,
+        None,
+    );
+    QuestionAnsweringModel::new(config).unwrap()
+}
+
+fn squad_forward_pass(
+    iters: u64,
+    model: &QuestionAnsweringModel,
+    squad_data: &[QaInput],
+) -> Duration {
+    let mut duration = Duration::new(0, 0);
+    let batch_size = BATCH_SIZE;
+    let mut output = vec![];
+    for _i in 0..iters {
+        let start = Instant::now();
+        for batch in squad_data.chunks(batch_size) {
+            output.push(model.predict(batch, 1, 64));
+        }
+        duration = duration.checked_add(start.elapsed()).unwrap();
+    }
+    duration
+}
+
+fn qa_load_model(iters: u64) -> Duration {
+    let mut duration = Duration::new(0, 0);
+    for _i in 0..iters {
+        let start = Instant::now();
+        let config = QuestionAnsweringConfig::new(
+            ModelType::Bert,
+            Resource::Remote(RemoteResource::from_pretrained(BertModelResources::BERT_QA)),
+            Resource::Remote(RemoteResource::from_pretrained(
+                BertConfigResources::BERT_QA,
+            )),
+            Resource::Remote(RemoteResource::from_pretrained(BertVocabResources::BERT_QA)),
+            None,  //merges resource only relevant with ModelType::Roberta
+            false, //lowercase
+            false,
+            None,
+        );
+        let _ = QuestionAnsweringModel::new(config).unwrap();
+        duration = duration.checked_add(start.elapsed()).unwrap();
+    }
+    duration
+}
+
+fn bench_squad(c: &mut Criterion) {
+    //    Set-up QA model
+    let model = create_qa_model();
+    unsafe {
+        torch_sys::dummy_cuda_dependency();
+    }
+    //    Define input
+    let mut squad_path = PathBuf::from(env::var("squad_dataset")
+.expect("Please set the \"squad_dataset\" environment variable pointing to the SQuAD dataset folder"));
+    squad_path.push("dev-v2.0.json");
+    let mut qa_inputs = squad_processor(squad_path);
+    qa_inputs.truncate(1000);
+
+    c.bench_function("SQuAD forward pass", |b| {
+        b.iter_custom(|iters| black_box(squad_forward_pass(iters, &model, &qa_inputs)))
+    });
+
+    c.bench_function("Load model", |b| {
+        b.iter_custom(|iters| black_box(qa_load_model(iters)))
+    });
+}
+
+criterion_group! {
+name = benches;
+config = Criterion::default().sample_size(10);
+targets = bench_squad
+}
+
+criterion_main!(benches);
diff --git a/benches/sst2_benchmark.rs b/benches/sst2_benchmark.rs
@@ -0,0 +1,106 @@
+#[macro_use]
+extern crate criterion;
+
+use criterion::Criterion;
+use rust_bert::pipelines::sentiment::SentimentModel;
+use rust_bert::pipelines::sequence_classification::SequenceClassificationConfig;
+use serde::Deserialize;
+use std::error::Error;
+use std::path::PathBuf;
+use std::time::{Duration, Instant};
+use std::{env, fs};
+use tch::Device;
+
+static BATCH_SIZE: usize = 64;
+
+fn create_sentiment_model() -> SentimentModel {
+    let config = SequenceClassificationConfig {
+        device: Device::cuda_if_available(),
+        ..Default::default()
+    };
+    SentimentModel::new(config).unwrap()
+}
+
+fn sst2_forward_pass(iters: u64, model: &SentimentModel, sst2_data: &[String]) -> Duration {
+    let mut duration = Duration::new(0, 0);
+    let batch_size = BATCH_SIZE;
+    let mut output = vec![];
+    for _i in 0..iters {
+        let start = Instant::now();
+        for batch in sst2_data.chunks(batch_size) {
+            output.push(
+                model.predict(
+                    batch
+                        .iter()
+                        .map(|v| v.as_str())
+                        .collect::<Vec<&str>>()
+                        .as_slice(),
+                ),
+            );
+        }
+        duration = duration.checked_add(start.elapsed()).unwrap();
+    }
+    duration
+}
+
+#[derive(Debug, Deserialize)]
+struct Record {
+    sentence: String,
+    label: i8,
+}
+
+fn ss2_processor(file_path: PathBuf) -> Result<Vec<String>, Box<dyn Error>> {
+    let file = fs::File::open(file_path).expect("unable to open file");
+    let mut csv = csv::ReaderBuilder::new()
+        .has_headers(true)
+        .delimiter(b'\t')
+        .from_reader(file);
+    let mut records = Vec::new();
+    for result in csv.deserialize() {
+        let record: Record = result?;
+        records.push(record.sentence);
+    }
+    Ok(records)
+}
+
+fn sst2_load_model(iters: u64) -> Duration {
+    let mut duration = Duration::new(0, 0);
+    for _i in 0..iters {
+        let start = Instant::now();
+        let config = SequenceClassificationConfig {
+            device: Device::cuda_if_available(),
+            ..Default::default()
+        };
+        let _ = SentimentModel::new(config).unwrap();
+        duration = duration.checked_add(start.elapsed()).unwrap();
+    }
+    duration
+}
+
+fn bench_sst2(c: &mut Criterion) {
+    //    Set-up classifier
+    let model = create_sentiment_model();
+    unsafe {
+        torch_sys::dummy_cuda_dependency();
+    }
+    //    Define input
+    let mut sst2_path = PathBuf::from(env::var("SST2_PATH")
+        .expect("Please set the \"squad_dataset\" environment variable pointing to the SQuAD dataset folder"));
+    sst2_path.push("train.tsv");
+    let mut inputs = ss2_processor(sst2_path).unwrap();
+    inputs.truncate(2000);
+
+    c.bench_function("SST2 forward pass", |b| {
+        b.iter_custom(|iters| sst2_forward_pass(iters, &model, &inputs))
+    });
+
+    c.bench_function("Load model", |b| b.iter_custom(sst2_load_model));
+}
+
+criterion_group! {
+    name = benches;
+    config = Criterion::default().sample_size(10);
+    targets = bench_sst2
+}
+
+criterion_main!(benches);
diff --git a/benches/summarization_benchmark.rs b/benches/summarization_benchmark.rs
@@ -0,0 +1,86 @@
+#[macro_use]
+extern crate criterion;
+
+use criterion::{black_box, Criterion};
+use rust_bert::pipelines::summarization::{SummarizationConfig, SummarizationModel};
+use std::time::{Duration, Instant};
+use tch::Device;
+
+fn create_summarization_model() -> SummarizationModel {
+    let config = SummarizationConfig {
+        device: Device::cuda_if_available(),
+        ..Default::default()
+    };
+    SummarizationModel::new(config).unwrap()
+}
+
+fn summarization_forward_pass(iters: u64, model: &SummarizationModel, data: &[&str]) -> Duration {
+    let mut duration = Duration::new(0, 0);
+    for _i in 0..iters {
+        let start = Instant::now();
+        let _ = model.summarize(data);
+        duration = duration.checked_add(start.elapsed()).unwrap();
+    }
+    duration
+}
+
+fn summarization_load_model(iters: u64) -> Duration {
+    let mut duration = Duration::new(0, 0);
+    for _i in 0..iters {
+        let start = Instant::now();
+        let config = SummarizationConfig {
+            device: Device::cuda_if_available(),
+            ..Default::default()
+        };
+        let _ = SummarizationModel::new(config).unwrap();
+        duration = duration.checked_add(start.elapsed()).unwrap();
+    }
+    duration
+}
+
+fn bench_squad(c: &mut Criterion) {
+    //    Set-up summarization model
+    unsafe {
+        torch_sys::dummy_cuda_dependency();
+    }
+    let model = create_summarization_model();
+
+    //    Define input
+    let input = ["In findings published Tuesday in Cornell University's arXiv by a team of scientists \
+from the University of Montreal and a separate report published Wednesday in Nature Astronomy by a team \
+from University College London (UCL), the presence of water vapour was confirmed in the atmosphere of K2-18b, \
+a planet circling a star in the constellation Leo. This is the first such discovery in a planet in its star's \
+habitable zone — not too hot and not too cold for liquid water to exist. The Montreal team, led by Björn Benneke, \
+used data from the NASA's Hubble telescope to assess changes in the light coming from K2-18b's star as the planet \
+passed between it and Earth. They found that certain wavelengths of light, which are usually absorbed by water, \
+weakened when the planet was in the way, indicating not only does K2-18b have an atmosphere, but the atmosphere \
+contains water in vapour form. The team from UCL then analyzed the Montreal team's data using their own software \
+and confirmed their conclusion. This was not the first time scientists have found signs of water on an exoplanet, \
+but previous discoveries were made on planets with high temperatures or other pronounced differences from Earth. \
+\"This is the first potentially habitable planet where the temperature is right and where we now know there is water,\" \
+said UCL astronomer Angelos Tsiaras. \"It's the best candidate for habitability right now.\" \"It's a good sign\", \
+said Ryan Cloutier of the Harvard–Smithsonian Center for Astrophysics, who was not one of either study's authors. \
+\"Overall,\" he continued, \"the presence of water in its atmosphere certainly improves the prospect of K2-18b being \
+a potentially habitable planet, but further observations will be required to say for sure. \" \
+K2-18b was first identified in 2015 by the Kepler space telescope. It is about 110 light-years from Earth and larger \
+but less dense. Its star, a red dwarf, is cooler than the Sun, but the planet's orbit is much closer, such that a year \
+on K2-18b lasts 33 Earth days. According to The Guardian, astronomers were optimistic that NASA's James Webb space \
+telescope — scheduled for launch in 2021 — and the European Space Agency's 2028 ARIEL program, could reveal more \
+about exoplanets like K2-18b."];
+    // (New sample credits: [WikiNews](https://en.wikinews.org/wiki/Astronomers_find_water_vapour_in_atmosphere_of_exoplanet_K2-18b))
+    c.bench_function("Summarization forward pass", |b| {
+        b.iter_custom(|iters| black_box(summarization_forward_pass(iters, &model, &input)))
+    });
+
+    c.bench_function("Load model", |b| {
+        b.iter_custom(|iters| black_box(summarization_load_model(iters)))
+    });
+}
+
+criterion_group! {
+name = benches;
+config = Criterion::default().sample_size(10);
+targets = bench_squad
+}
+
+criterion_main!(benches);
diff --git a/benches/tensor_operations_benchmark.rs b/benches/tensor_operations_benchmark.rs
@@ -0,0 +1,39 @@
+#[macro_use]
+extern crate criterion;
+
+use criterion::{black_box, Criterion};
+use std::time::{Duration, Instant};
+use tch::kind::Kind::Float;
+use tch::{Device, Tensor};
+
+fn matrix_multiply(iters: u64, input: &Tensor, weights: &Tensor) -> Duration {
+    let mut duration = Duration::new(0, 0);
+    for _i in 0..iters {
+        let start = Instant::now();
+        let _ = input.matmul(weights);
+        duration = duration.checked_add(start.elapsed()).unwrap();
+    }
+    duration
+}
+
+fn bench_tensor_ops(c: &mut Criterion) {
+    //    Set-up summarization model
+    unsafe {
+        torch_sys::dummy_cuda_dependency();
+    }
+    let input = Tensor::rand(&[32, 128, 512], (Float, Device::cuda_if_available()));
+    let weights = Tensor::rand(&[512, 512], (Float, Device::cuda_if_available()));
+
+    let _ = &input.matmul(&weights);
+    c.bench_function("Matrix multiply ", |b| {
+        b.iter_custom(|iters| black_box(matrix_multiply(iters, &input, &weights)))
+    });
+}
+
+criterion_group! {
+name = benches;
+config = Criterion::default().sample_size(100);
+targets = bench_tensor_ops
+}
+
+criterion_main!(benches);