Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
review edits
Browse files Browse the repository at this point in the history
  • Loading branch information
MarinPostma committed Sep 21, 2021
1 parent 6de1b41 commit 842c817
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 126 deletions.
112 changes: 9 additions & 103 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion milli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ smallstr = { version = "0.2.0", features = ["serde"] }
smallvec = { version = "1.6.1", features = ["write"] }
tempfile = "3.2.0"
uuid = { version = "0.8.2", features = ["v4"] }
vec-utils = "0.3.0"

# facet filter parser
pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" }
Expand Down
10 changes: 5 additions & 5 deletions milli/src/documents/serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,11 @@ impl<'a, W: io::Write> SerializeMap for MapSerializer<'a, W> {
type Error = Error;

fn serialize_key<T: ?Sized + Serialize>(&mut self, _key: &T) -> Result<(), Self::Error> {
unimplemented!()
unreachable!()
}

fn serialize_value<T: ?Sized>(&mut self, _value: &T) -> Result<(), Self::Error> {
unimplemented!()
unreachable!()
}

fn end(mut self) -> Result<Self::Ok, Self::Error> {
Expand All @@ -262,8 +262,8 @@ impl<'a, W: io::Write> SerializeMap for MapSerializer<'a, W> {
K: Serialize,
V: Serialize,
{
let field_serilizer = FieldSerializer { index: &mut self.index };
let field_id: FieldId = key.serialize(field_serilizer)?;
let field_serializer = FieldSerializer { index: &mut self.index };
let field_id: FieldId = key.serialize(field_serializer)?;

self.buffer.clear();
let mut cursor = io::Cursor::new(&mut self.buffer);
Expand Down Expand Up @@ -294,7 +294,7 @@ impl<'a> serde::Serializer for FieldSerializer<'a> {

fn serialize_str(self, ws: &str) -> Result<Self::Ok, Self::Error> {
let field_id = match self.index.get_by_right(ws) {
Some(field) => *field,
Some(field_id) => *field_id,
None => {
let field_id = self.index.len() as FieldId;
self.index.insert(field_id, ws.to_string());
Expand Down
16 changes: 13 additions & 3 deletions milli/src/update/index_documents/transform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use itertools::Itertools;
use log::info;
use roaring::RoaringBitmap;
use serde_json::{Map, Value};
use vec_utils::VecExt;

use super::helpers::{
create_sorter, create_writer, keep_latest_obkv, merge_obkvs, merge_two_obkvs, MergeFn,
Expand Down Expand Up @@ -131,7 +130,7 @@ impl Transform<'_, '_> {
let mut external_id_buffer = Vec::new();
let mut field_buffer: Vec<(u16, &[u8])> = Vec::new();
while let Some((addition_index, document)) = reader.next_document_with_index()? {
let mut field_buffer_cache = field_buffer.drop_and_reuse();
let mut field_buffer_cache = drop_and_reuse(field_buffer);
if self.log_every_n.map_or(false, |len| documents_count % len == 0) {
progress_callback(UpdateIndexingStep::RemapDocumentAddition {
documents_seen: documents_count,
Expand Down Expand Up @@ -217,7 +216,7 @@ impl Transform<'_, '_> {
});

obkv_buffer.clear();
field_buffer = field_buffer_cache.drop_and_reuse();
field_buffer = drop_and_reuse(field_buffer_cache);
external_id_buffer.clear();
}

Expand Down Expand Up @@ -482,6 +481,17 @@ fn validate_document_id(document_id: &str) -> Option<&str> {
})
}

/// Drops all the value of type `U` in vec, and reuses the allocation to create a `Vec<T>`.
///
/// The size and alignment of T and U must match.
fn drop_and_reuse<U, T>(mut vec: Vec<U>) -> Vec<T> {
debug_assert_eq!(std::mem::align_of::<U>(), std::mem::align_of::<T>());
debug_assert_eq!(std::mem::size_of::<U>(), std::mem::size_of::<T>());
vec.clear();
debug_assert!(vec.is_empty());
vec.into_iter().map(|_| unreachable!()).collect()
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
37 changes: 23 additions & 14 deletions milli/tests/search/query_criteria.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use std::cmp::Reverse;
use std::io::Cursor;

use big_s::S;
use heed::EnvOpenOptions;
use itertools::Itertools;
use maplit::hashset;
use milli::update::{Settings, UpdateBuilder, UpdateFormat};
use milli::documents::{DocumentBatchBuilder, DocumentBatchReader};
use milli::update::{Settings, UpdateBuilder};
use milli::{AscDesc, Criterion, Index, Search, SearchResult};
use rand::Rng;
use Criterion::*;
Expand Down Expand Up @@ -386,13 +388,13 @@ fn criteria_ascdesc() {
let mut builder = UpdateBuilder::new(0);
builder.max_memory(10 * 1024 * 1024); // 10MiB
let mut builder = builder.index_documents(&mut wtxn, &index);
builder.update_format(UpdateFormat::Csv);
builder.enable_autogenerate_docids();

let content = [
vec![S("name,age")],
(0..ASC_DESC_CANDIDATES_THRESHOLD + 1)
.map(|_| {
let mut cursor = Cursor::new(Vec::new());
let mut batch_builder = DocumentBatchBuilder::new(&mut cursor).unwrap();

(0..ASC_DESC_CANDIDATES_THRESHOLD + 1)
.for_each(|_| {
let mut rng = rand::thread_rng();

let age = rng.gen::<u32>().to_string();
Expand All @@ -403,14 +405,21 @@ fn criteria_ascdesc() {
.take(10)
.collect::<String>();

format!("{},{}", name, age)
})
.collect::<Vec<_>>(),
]
.iter()
.flatten()
.join("\n");
builder.execute(content.as_bytes(), |_, _| ()).unwrap();
let json = serde_json::json!({
"name": name,
"age": age,
});

batch_builder.add_documents(json).unwrap();
});

batch_builder.finish().unwrap();

cursor.set_position(0);

let reader = DocumentBatchReader::from_reader(cursor).unwrap();

builder.execute(reader, |_, _| ()).unwrap();

wtxn.commit().unwrap();

Expand Down

0 comments on commit 842c817

Please sign in to comment.