Skip to content

Commit

Permalink
c
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion committed Aug 29, 2024
1 parent b2550a0 commit 2c5332e
Show file tree
Hide file tree
Showing 596 changed files with 6,459 additions and 4,645 deletions.
26 changes: 2 additions & 24 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ serde_json = "1"
simd-json = { version = "0.13", features = ["known-key"] }
simdutf8 = "0.1.4"
slotmap = "1"
smartstring = "1"
sqlparser = "0.49"
stacker = "0.1"
streaming-iterator = "0.1.9"
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/array/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ mod iterator;
mod mutable;
pub use mutable::*;
use polars_error::{polars_bail, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

/// The Arrow's equivalent to an immutable `Vec<Option<[T; size]>>` where `T` is an Arrow type.
/// Cloning and slicing this struct is `O(1)`.
Expand Down Expand Up @@ -199,7 +200,7 @@ impl FixedSizeListArray {

/// Returns a [`ArrowDataType`] consistent with [`FixedSizeListArray`].
pub fn default_datatype(data_type: ArrowDataType, size: usize) -> ArrowDataType {
let field = Box::new(Field::new("item", data_type, true));
let field = Box::new(Field::new(PlSmallStr::from_static("item"), data_type, true));
ArrowDataType::FixedSizeList(field, size)
}
}
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/array/fixed_size_list/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::sync::Arc;

use polars_error::{polars_bail, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

use super::FixedSizeListArray;
use crate::array::physical_binary::extend_validity;
Expand Down Expand Up @@ -35,7 +36,7 @@ impl<M: MutableArray> MutableFixedSizeListArray<M> {
}

/// Creates a new [`MutableFixedSizeListArray`] from a [`MutableArray`] and size.
pub fn new_with_field(values: M, name: &str, nullable: bool, size: usize) -> Self {
pub fn new_with_field(values: M, name: PlSmallStr, nullable: bool, size: usize) -> Self {
let data_type = ArrowDataType::FixedSizeList(
Box::new(Field::new(name, values.data_type().clone(), nullable)),
size,
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub use iterator::*;
mod mutable;
pub use mutable::*;
use polars_error::{polars_bail, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

/// An [`Array`] semantically equivalent to `Vec<Option<Vec<Option<T>>>>` with Arrow's in-memory.
#[derive(Clone)]
Expand Down Expand Up @@ -185,7 +186,7 @@ impl<O: Offset> ListArray<O> {
impl<O: Offset> ListArray<O> {
/// Returns a default [`ArrowDataType`]: inner field is named "item" and is nullable
pub fn default_datatype(data_type: ArrowDataType) -> ArrowDataType {
let field = Box::new(Field::new("item", data_type, true));
let field = Box::new(Field::new(PlSmallStr::from_static("item"), data_type, true));
if O::IS_LARGE {
ArrowDataType::LargeList(field)
} else {
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/array/list/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::sync::Arc;

use polars_error::{polars_err, PolarsResult};
use polars_utils::pl_str::PlSmallStr;

use super::ListArray;
use crate::array::physical_binary::extend_validity;
Expand Down Expand Up @@ -122,7 +123,7 @@ impl<O: Offset, M: MutableArray> MutableListArray<O, M> {
}

/// Creates a new [`MutableListArray`] from a [`MutableArray`].
pub fn new_with_field(values: M, name: &str, nullable: bool) -> Self {
pub fn new_with_field(values: M, name: PlSmallStr, nullable: bool) -> Self {
let field = Box::new(Field::new(name, values.data_type().clone(), nullable));
let data_type = if O::IS_LARGE {
ArrowDataType::LargeList(field)
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-arrow/src/array/primitive/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ pub fn get_write_value<'a, T: NativeType, F: Write>(
Time64(_) => unreachable!(), // remaining are not valid
Timestamp(time_unit, tz) => {
if let Some(tz) = tz {
let timezone = temporal_conversions::parse_offset(tz);
let timezone = temporal_conversions::parse_offset(tz.as_str());
match timezone {
Ok(timezone) => {
dyn_primitive!(array, i64, |time| {
Expand All @@ -65,7 +65,7 @@ pub fn get_write_value<'a, T: NativeType, F: Write>(
},
#[cfg(feature = "chrono-tz")]
Err(_) => {
let timezone = temporal_conversions::parse_offset_tz(tz);
let timezone = temporal_conversions::parse_offset_tz(tz.as_str());
match timezone {
Ok(timezone) => dyn_primitive!(array, i64, |time| {
temporal_conversions::timestamp_to_datetime(
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/compute/cast/primitive_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::hash::Hash;

use num_traits::{AsPrimitive, Float, ToPrimitive};
use polars_error::PolarsResult;
use polars_utils::pl_str::PlSmallStr;

use super::CastOptionsImpl;
use crate::array::*;
Expand Down Expand Up @@ -434,7 +435,7 @@ pub fn timestamp_to_timestamp(
from: &PrimitiveArray<i64>,
from_unit: TimeUnit,
to_unit: TimeUnit,
tz: &Option<String>,
tz: &Option<PlSmallStr>,
) -> PrimitiveArray<i64> {
let from_size = time_unit_multiple(from_unit);
let to_size = time_unit_multiple(to_unit);
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-arrow/src/compute/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ macro_rules! date_like {
ArrowDataType::Timestamp(time_unit, Some(timezone_str)) => {
let array = $array.as_any().downcast_ref().unwrap();

if let Ok(timezone) = parse_offset(timezone_str) {
if let Ok(timezone) = parse_offset(timezone_str.as_str()) {
Ok(extract_impl(array, *time_unit, timezone, |x| {
x.$extract().try_into().unwrap()
}))
} else {
chrono_tz(array, *time_unit, timezone_str, |x| {
chrono_tz(array, *time_unit, timezone_str.as_str(), |x| {
x.$extract().try_into().unwrap()
})
}
Expand Down Expand Up @@ -129,12 +129,12 @@ macro_rules! time_like {
ArrowDataType::Timestamp(time_unit, Some(timezone_str)) => {
let array = $array.as_any().downcast_ref().unwrap();

if let Ok(timezone) = parse_offset(timezone_str) {
if let Ok(timezone) = parse_offset(timezone_str.as_str()) {
Ok(extract_impl(array, *time_unit, timezone, |x| {
x.$extract().try_into().unwrap()
}))
} else {
chrono_tz(array, *time_unit, timezone_str, |x| {
chrono_tz(array, *time_unit, timezone_str.as_str(), |x| {
x.$extract().try_into().unwrap()
})
}
Expand Down
30 changes: 23 additions & 7 deletions crates/polars-arrow/src/datatypes/field.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use polars_utils::pl_str::PlSmallStr;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

Expand All @@ -15,7 +16,7 @@ use super::{ArrowDataType, Metadata};
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Field {
/// Its name
pub name: String,
pub name: PlSmallStr,
/// Its logical [`ArrowDataType`]
pub data_type: ArrowDataType,
/// Its nullability
Expand All @@ -26,9 +27,9 @@ pub struct Field {

impl Field {
/// Creates a new [`Field`].
pub fn new<T: Into<String>>(name: T, data_type: ArrowDataType, is_nullable: bool) -> Self {
pub fn new(name: PlSmallStr, data_type: ArrowDataType, is_nullable: bool) -> Self {
Field {
name: name.into(),
name,
data_type,
is_nullable,
metadata: Default::default(),
Expand Down Expand Up @@ -56,8 +57,18 @@ impl Field {
#[cfg(feature = "arrow_rs")]
impl From<Field> for arrow_schema::Field {
fn from(value: Field) -> Self {
Self::new(value.name, value.data_type.into(), value.is_nullable)
.with_metadata(value.metadata.into_iter().collect())
Self::new(
value.name.to_string(),
value.data_type.into(),
value.is_nullable,
)
.with_metadata(
value
.metadata
.into_iter()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect(),
)
}
}

Expand All @@ -75,9 +86,14 @@ impl From<&arrow_schema::Field> for Field {
let metadata = value
.metadata()
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.map(|(k, v)| (PlSmallStr::from_str(k), PlSmallStr::from_str(v)))
.collect();
Self::new(value.name(), data_type, value.is_nullable()).with_metadata(metadata)
Self::new(
PlSmallStr::from_str(value.name().as_str()),
data_type,
value.is_nullable(),
)
.with_metadata(metadata)
}
}

Expand Down
25 changes: 15 additions & 10 deletions crates/polars-arrow/src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ use std::sync::Arc;

pub use field::Field;
pub use physical_type::*;
use polars_utils::pl_str::PlSmallStr;
pub use schema::{ArrowSchema, ArrowSchemaRef};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

/// typedef for [BTreeMap<String, String>] denoting [`Field`]'s and [`ArrowSchema`]'s metadata.
pub type Metadata = BTreeMap<String, String>;
/// typedef for [Option<(String, Option<String>)>] descr
pub(crate) type Extension = Option<(String, Option<String>)>;
/// typedef for [BTreeMap<PlSmallStr, PlSmallStr>] denoting [`Field`]'s and [`ArrowSchema`]'s metadata.
pub type Metadata = BTreeMap<PlSmallStr, PlSmallStr>;
/// typedef for [Option<(PlSmallStr, Option<PlSmallStr>)>] descr
pub(crate) type Extension = Option<(PlSmallStr, Option<PlSmallStr>)>;

/// The set of supported logical types in this crate.
///
Expand Down Expand Up @@ -70,7 +71,7 @@ pub enum ArrowDataType {
///
/// When the timezone is not specified, the timestamp is considered to have no timezone
/// and is represented _as is_
Timestamp(TimeUnit, Option<String>),
Timestamp(TimeUnit, Option<PlSmallStr>),
/// An [`i32`] representing the elapsed time since UNIX epoch (1970-01-01)
/// in days.
Date32,
Expand Down Expand Up @@ -163,7 +164,7 @@ pub enum ArrowDataType {
/// - name
/// - physical type
/// - metadata
Extension(String, Box<ArrowDataType>, Option<String>),
Extension(PlSmallStr, Box<ArrowDataType>, Option<PlSmallStr>),
/// A binary type that inlines small values
/// and can intern bytes.
BinaryView,
Expand Down Expand Up @@ -193,7 +194,9 @@ impl From<ArrowDataType> for arrow_schema::DataType {
ArrowDataType::Float16 => Self::Float16,
ArrowDataType::Float32 => Self::Float32,
ArrowDataType::Float64 => Self::Float64,
ArrowDataType::Timestamp(unit, tz) => Self::Timestamp(unit.into(), tz.map(Into::into)),
ArrowDataType::Timestamp(unit, tz) => {
Self::Timestamp(unit.into(), tz.map(|x| Arc::<str>::from(x.as_str())))
},
ArrowDataType::Date32 => Self::Date32,
ArrowDataType::Date64 => Self::Date64,
ArrowDataType::Time32(unit) => Self::Time32(unit.into()),
Expand Down Expand Up @@ -260,7 +263,7 @@ impl From<arrow_schema::DataType> for ArrowDataType {
DataType::Float32 => Self::Float32,
DataType::Float64 => Self::Float64,
DataType::Timestamp(unit, tz) => {
Self::Timestamp(unit.into(), tz.map(|x| x.to_string()))
Self::Timestamp(unit.into(), tz.map(|x| PlSmallStr::from_str(x.as_ref())))
},
DataType::Date32 => Self::Date32,
DataType::Date64 => Self::Date64,
Expand Down Expand Up @@ -609,8 +612,10 @@ pub type SchemaRef = Arc<ArrowSchema>;

/// support get extension for metadata
pub fn get_extension(metadata: &Metadata) -> Extension {
if let Some(name) = metadata.get("ARROW:extension:name") {
let metadata = metadata.get("ARROW:extension:metadata").cloned();
if let Some(name) = metadata.get(&PlSmallStr::from_static("ARROW:extension:name")) {
let metadata = metadata
.get(&PlSmallStr::from_static("ARROW:extension:metadata"))
.cloned();
Some((name.clone(), metadata))
} else {
None
Expand Down
Loading

0 comments on commit 2c5332e

Please sign in to comment.