Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(rust): Unify internal string type #18425

Merged
merged 3 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
26 changes: 2 additions & 24 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,11 @@ recursive = "0.1"
regex = "1.9"
reqwest = { version = "0.12", default-features = false }
ryu = "1.0.13"
serde = { version = "1.0.188", features = ["derive"] }
serde = { version = "1.0.188", features = ["derive", "rc"] }
serde_json = "1"
simd-json = { version = "0.13", features = ["known-key"] }
simdutf8 = "0.1.4"
slotmap = "1"
smartstring = "1"
sqlparser = "0.49"
stacker = "0.1"
streaming-iterator = "0.1.9"
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/array/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ mod iterator;
mod mutable;
pub use mutable::*;
use polars_error::{polars_bail, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

/// The Arrow's equivalent to an immutable `Vec<Option<[T; size]>>` where `T` is an Arrow type.
/// Cloning and slicing this struct is `O(1)`.
Expand Down Expand Up @@ -199,7 +200,7 @@ impl FixedSizeListArray {

/// Returns a [`ArrowDataType`] consistent with [`FixedSizeListArray`].
pub fn default_datatype(data_type: ArrowDataType, size: usize) -> ArrowDataType {
let field = Box::new(Field::new("item", data_type, true));
let field = Box::new(Field::new(PlSmallStr::from_static("item"), data_type, true));
ArrowDataType::FixedSizeList(field, size)
}
}
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/array/fixed_size_list/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::sync::Arc;

use polars_error::{polars_bail, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

use super::FixedSizeListArray;
use crate::array::physical_binary::extend_validity;
Expand Down Expand Up @@ -35,7 +36,7 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {
}

/// Creates a new [`MutableFixedSizeListArray`] from a [`MutableArray`] and size.
pub fn new_with_field(values: M, name: &str, nullable: bool, size: usize) -> Self {
pub fn new_with_field(values: M, name: PlSmallStr, nullable: bool, size: usize) -> Self {
let data_type = ArrowDataType::FixedSizeList(
Box::new(Field::new(name, values.data_type().clone(), nullable)),
size,
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub use iterator::*;
mod mutable;
pub use mutable::*;
use polars_error::{polars_bail, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

/// An [`Array`] semantically equivalent to `Vec<Option<Vec<Option<T>>>>` with Arrow's in-memory.
#[derive(Clone)]
Expand Down Expand Up @@ -185,7 +186,7 @@ impl<O: Offset> ListArray<O> {
impl<O: Offset> ListArray<O> {
/// Returns a default [`ArrowDataType`]: inner field is named "item" and is nullable
pub fn default_datatype(data_type: ArrowDataType) -> ArrowDataType {
let field = Box::new(Field::new("item", data_type, true));
let field = Box::new(Field::new(PlSmallStr::from_static("item"), data_type, true));
if O::IS_LARGE {
ArrowDataType::LargeList(field)
} else {
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/array/list/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::sync::Arc;

use polars_error::{polars_err, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

use super::ListArray;
use crate::array::physical_binary::extend_validity;
Expand Down Expand Up @@ -122,7 +123,7 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
}

/// Creates a new [`MutableListArray`] from a [`MutableArray`].
pub fn new_with_field(values: M, name: &str, nullable: bool) -> Self {
pub fn new_with_field(values: M, name: PlSmallStr, nullable: bool) -> Self {
let field = Box::new(Field::new(name, values.data_type().clone(), nullable));
let data_type = if O::IS_LARGE {
ArrowDataType::LargeList(field)
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-arrow/src/array/primitive/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ pub fn get_write_value<'a, T: NativeType, F: Write>(
Time64(_) => unreachable!(), // remaining are not valid
Timestamp(time_unit, tz) => {
if let Some(tz) = tz {
let timezone = temporal_conversions::parse_offset(tz);
let timezone = temporal_conversions::parse_offset(tz.as_str());
match timezone {
Ok(timezone) => {
dyn_primitive!(array, i64, |time| {
Expand All @@ -65,7 +65,7 @@ pub fn get_write_value<'a, T: NativeType, F: Write>(
},
#[cfg(feature = "chrono-tz")]
Err(_) => {
let timezone = temporal_conversions::parse_offset_tz(tz);
let timezone = temporal_conversions::parse_offset_tz(tz.as_str());
match timezone {
Ok(timezone) => dyn_primitive!(array, i64, |time| {
temporal_conversions::timestamp_to_datetime(
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-arrow/src/array/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use crate::compute::utils::combine_validities_and;
/// let int = Int32Array::from_slice(&[42, 28, 19, 31]).boxed();
///
/// let fields = vec![
/// Field::new("b", ArrowDataType::Boolean, false),
/// Field::new("c", ArrowDataType::Int32, false),
/// Field::new("b".into(), ArrowDataType::Boolean, false),
/// Field::new("c".into(), ArrowDataType::Int32, false),
/// ];
///
/// let array = StructArray::new(ArrowDataType::Struct(fields), vec![boolean, int], None);
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/compute/cast/primitive_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::hash::Hash;

use num_traits::{AsPrimitive, Float, ToPrimitive};
use polars_error::PolarsResult;
use polars_utils::pl_str::PlSmallStr;

use super::CastOptionsImpl;
use crate::array::*;
Expand Down Expand Up @@ -434,7 +435,7 @@ pub fn timestamp_to_timestamp(
from: &PrimitiveArray<i64>,
from_unit: TimeUnit,
to_unit: TimeUnit,
tz: &Option<String>,
tz: &Option<PlSmallStr>,
) -> PrimitiveArray<i64> {
let from_size = time_unit_multiple(from_unit);
let to_size = time_unit_multiple(to_unit);
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-arrow/src/compute/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ macro_rules! date_like {
ArrowDataType::Timestamp(time_unit, Some(timezone_str)) => {
let array = $array.as_any().downcast_ref().unwrap();

if let Ok(timezone) = parse_offset(timezone_str) {
if let Ok(timezone) = parse_offset(timezone_str.as_str()) {
Ok(extract_impl(array, *time_unit, timezone, |x| {
x.$extract().try_into().unwrap()
}))
} else {
chrono_tz(array, *time_unit, timezone_str, |x| {
chrono_tz(array, *time_unit, timezone_str.as_str(), |x| {
x.$extract().try_into().unwrap()
})
}
Expand Down Expand Up @@ -129,12 +129,12 @@ macro_rules! time_like {
ArrowDataType::Timestamp(time_unit, Some(timezone_str)) => {
let array = $array.as_any().downcast_ref().unwrap();

if let Ok(timezone) = parse_offset(timezone_str) {
if let Ok(timezone) = parse_offset(timezone_str.as_str()) {
Ok(extract_impl(array, *time_unit, timezone, |x| {
x.$extract().try_into().unwrap()
}))
} else {
chrono_tz(array, *time_unit, timezone_str, |x| {
chrono_tz(array, *time_unit, timezone_str.as_str(), |x| {
x.$extract().try_into().unwrap()
})
}
Expand Down
30 changes: 23 additions & 7 deletions crates/polars-arrow/src/datatypes/field.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use polars_utils::pl_str::PlSmallStr;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

Expand All @@ -15,7 +16,7 @@ use super::{ArrowDataType, Metadata};
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Field {
/// Its name
pub name: String,
pub name: PlSmallStr,
/// Its logical [`ArrowDataType`]
pub data_type: ArrowDataType,
/// Its nullability
Expand All @@ -26,9 +27,9 @@ pub struct Field {

impl Field {
/// Creates a new [`Field`].
pub fn new<T: Into<String>>(name: T, data_type: ArrowDataType, is_nullable: bool) -> Self {
pub fn new(name: PlSmallStr, data_type: ArrowDataType, is_nullable: bool) -> Self {
Field {
name: name.into(),
name,
data_type,
is_nullable,
metadata: Default::default(),
Expand Down Expand Up @@ -56,8 +57,18 @@ impl Field {
#[cfg(feature = "arrow_rs")]
impl From<Field> for arrow_schema::Field {
fn from(value: Field) -> Self {
Self::new(value.name, value.data_type.into(), value.is_nullable)
.with_metadata(value.metadata.into_iter().collect())
Self::new(
value.name.to_string(),
value.data_type.into(),
value.is_nullable,
)
.with_metadata(
value
.metadata
.into_iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect(),
)
}
}

Expand All @@ -75,9 +86,14 @@ impl From<&arrow_schema::Field> for Field {
let metadata = value
.metadata()
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.map(|(k, v)| (PlSmallStr::from_str(k), PlSmallStr::from_str(v)))
.collect();
Self::new(value.name(), data_type, value.is_nullable()).with_metadata(metadata)
Self::new(
PlSmallStr::from_str(value.name().as_str()),
data_type,
value.is_nullable(),
)
.with_metadata(metadata)
}
}

Expand Down
Loading