From 4512d261a14c846b16d0736c11d3f33cdd0e9542 Mon Sep 17 00:00:00 2001
From: sorhawell <sorhawell@gmail.com>
Date: Fri, 29 Sep 2023 11:10:21 +0300
Subject: [PATCH 01/14] begining

---
 src/rust/src/rlib.rs      | 18 ++++++++++++++++++
 src/rust/src/utils/mod.rs | 25 +++++++++++++++++++++++--
 2 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/src/rust/src/rlib.rs b/src/rust/src/rlib.rs
index e5ae8d5ca..8bff98dd3 100644
--- a/src/rust/src/rlib.rs
+++ b/src/rust/src/rlib.rs
@@ -3,10 +3,12 @@ use crate::lazy::dsl::ProtoExprArray;
 use crate::rdataframe::DataFrame;
 use crate::robj_to;
 
+use crate::rdataframe::LazyFrame;
 use crate::rpolarserr::{rdbg, RResult};
 use crate::series::Series;
 use crate::{rdataframe::VecDataFrame, utils::r_result_list};
 use extendr_api::prelude::*;
+use polars::lazy::dsl;
 use polars::prelude as pl;
 use polars_core::functions as pl_functions;
 use std::result::Result;
@@ -44,6 +46,21 @@ fn concat_df(vdf: &VecDataFrame) -> List {
     r_result_list(result.map_err(|err| format!("{:?}", err)))
 }
 
+#[extendr]
+fn concat_lf(l: Robj, rechunk: bool, parallel: bool, to_supertypes: bool) -> RResult<LazyFrame> {
+    let vlf = robj_to!(Vec, PLLazyFrame, l)?;
+    dsl::concat(
+        vlf,
+        pl::UnionArgs {
+            parallel,
+            rechunk,
+            to_supertypes,
+        },
+    )
+    .map_err(polars_to_rpolars_err)
+    .map(LazyFrame)
+}
+
 #[extendr]
 fn diag_concat_df(dfs: &VecDataFrame) -> List {
     let df = pl_functions::diag_concat_df(&dfs.0[..]).map(DataFrame);
@@ -278,6 +295,7 @@ fn polars_features() -> List {
 extendr_module! {
     mod rlib;
     fn concat_df;
+    fn concat_lf;
     fn hor_concat_df;
     fn diag_concat_df;
     fn min_exprs;
diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs
index 5b3a0e87b..e9186ce0f 100644
--- a/src/rust/src/utils/mod.rs
+++ b/src/rust/src/utils/mod.rs
@@ -6,10 +6,12 @@ pub mod wrappers;
 use crate::lazy::dsl::Expr;
 use crate::rdatatype::RPolarsDataType;
 use crate::rpolarserr::{rdbg, rerr, RPolarsErr, RResult, WithRctx};
+
 use extendr_api::prelude::list;
 use std::any::type_name as tn;
 //use std::intrinsics::read_via_copy;
 use crate::lazy::dsl::robj_to_col;
+use crate::rdataframe::{DataFrame, LazyFrame};
 use extendr_api::Attributes;
 use extendr_api::ExternalPtr;
 use extendr_api::Result as ExtendrResult;
@@ -742,8 +744,23 @@ fn internal_rust_wrap_e(robj: Robj, str_to_lit: bool) -> RResult<Expr> {
 pub fn robj_to_lazyframe(robj: extendr_api::Robj) -> RResult<crate::rdataframe::LazyFrame> {
     let robj = unpack_r_result_list(robj)?;
     let rv = rdbg(&robj);
-    use crate::rdataframe::LazyFrame;
-    let res: Result<ExternalPtr<LazyFrame>, _> = robj.try_into();
+
+    // closure to allow ?-convert extendr::Result to RResult
+    let res = || -> RResult<LazyFrame> {
+        match () {
+            // allow input as a DataFrame
+            _ if robj.inherits("DataFrame") => {
+                let extptr_df: ExternalPtr<DataFrame> = robj.try_into()?;
+                Ok(extptr_df.lazy())
+            }
+            _ => {
+                let lf: ExternalPtr<LazyFrame> = robj.try_into()?;
+                let lf = LazyFrame(lf.0.clone());
+                Ok(lf)
+            }
+        }
+    }();
+
     let ext_ldf = res.bad_val(rv).mistyped(tn::<LazyFrame>())?;
     Ok(LazyFrame(ext_ldf.0.clone()))
 }
@@ -895,6 +912,10 @@ macro_rules! robj_to_inner {
         $crate::utils::robj_to_lazyframe($a)
     };
 
+    (PLLazyFrame, $a:ident) => {
+        $crate::utils::robj_to_lazyframe($a).map(|lf| lf.0)
+    };
+
     (RArrow_schema, $a:ident) => {
         $crate::utils::robj_to_rarrow_schema($a)
     };

From 1b2b9fb8e6fb0a46e03fad6ff472e2263eb62e9d Mon Sep 17 00:00:00 2001
From: sorhawell <sorhawell@gmail.com>
Date: Sun, 1 Oct 2023 01:29:52 +0300
Subject: [PATCH 02/14] almost there

---
 R/error__rpolarserr.R          |   4 +-
 R/extendr-wrappers.R           |  30 ++++---
 R/functions__eager.R           |  78 ++++++++++++------
 R/lazyframe__lazy.R            |   1 +
 R/series__series.R             |   2 +-
 man/Series_dtype.Rd            |  11 ---
 man/Series_flags.Rd            |  21 +++++
 man/pl_concat.Rd               |   4 +-
 src/rust/src/concat.rs         | 139 +++++++++++++++++++++++++++++++++
 src/rust/src/lib.rs            |   3 +
 src/rust/src/rdataframe/mod.rs |   3 +-
 src/rust/src/rlib.rs           |  70 +----------------
 src/rust/src/utils/mod.rs      |  73 +++++++++++++++--
 13 files changed, 310 insertions(+), 129 deletions(-)
 create mode 100644 man/Series_flags.Rd
 create mode 100644 src/rust/src/concat.rs

diff --git a/R/error__rpolarserr.R b/R/error__rpolarserr.R
index 37a86b041..af8cd83be 100644
--- a/R/error__rpolarserr.R
+++ b/R/error__rpolarserr.R
@@ -53,8 +53,8 @@ bad_robj = function(r) {
   .pr$RPolarsErr$new()$bad_robj(r)
 }
 
-Err_plain = function(x) {
-  Err(.pr$RPolarsErr$new()$plain(x))
+Err_plain = function(...) {
+  Err(.pr$RPolarsErr$new()$plain(paste(..., collapse=" ")))
 }
 
 # short hand for extracting an error context in unit testing, will raise error if not an RPolarsErr
diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R
index 91338ef9a..2cbe30350 100644
--- a/R/extendr-wrappers.R
+++ b/R/extendr-wrappers.R
@@ -11,18 +11,6 @@
 #' @useDynLib polars, .registration = TRUE
 NULL
 
-rlazy_csv_reader <- function(path, sep, has_header, ignore_errors, skip_rows, n_rows, cache, overwrite_dtype, low_memory, comment_char, quote_char, null_values, infer_schema_length, skip_rows_after_header, encoding, row_count_name, row_count_offset, parse_dates) .Call(wrap__rlazy_csv_reader, path, sep, has_header, ignore_errors, skip_rows, n_rows, cache, overwrite_dtype, low_memory, comment_char, quote_char, null_values, infer_schema_length, skip_rows_after_header, encoding, row_count_name, row_count_offset, parse_dates)
-
-import_arrow_ipc <- function(path, n_rows, cache, rechunk, row_name, row_count, memmap) .Call(wrap__import_arrow_ipc, path, n_rows, cache, rechunk, row_name, row_count, memmap)
-
-new_from_parquet <- function(path, n_rows, cache, parallel, rechunk, row_name, row_count, low_memory) .Call(wrap__new_from_parquet, path, n_rows, cache, parallel, rechunk, row_name, row_count, low_memory)
-
-concat_df <- function(vdf) .Call(wrap__concat_df, vdf)
-
-hor_concat_df <- function(dfs) .Call(wrap__hor_concat_df, dfs)
-
-diag_concat_df <- function(dfs) .Call(wrap__diag_concat_df, dfs)
-
 min_exprs <- function(exprs) .Call(wrap__min_exprs, exprs)
 
 max_exprs <- function(exprs) .Call(wrap__max_exprs, exprs)
@@ -75,6 +63,24 @@ test_wrong_call_pl_lit <- function(robj) .Call(wrap__test_wrong_call_pl_lit, rob
 
 polars_features <- function() .Call(wrap__polars_features)
 
+concat_df <- function(vdf) .Call(wrap__concat_df, vdf)
+
+concat_lf <- function(l, rechunk, parallel, to_supertypes) .Call(wrap__concat_lf, l, rechunk, parallel, to_supertypes)
+
+diag_concat_df <- function(dfs) .Call(wrap__diag_concat_df, dfs)
+
+diag_concat_lf <- function(l, rechunk, parallel) .Call(wrap__diag_concat_lf, l, rechunk, parallel)
+
+hor_concat_df <- function(l) .Call(wrap__hor_concat_df, l)
+
+concat_series <- function(l, rechunk, to_supertypes) .Call(wrap__concat_series, l, rechunk, to_supertypes)
+
+rlazy_csv_reader <- function(path, sep, has_header, ignore_errors, skip_rows, n_rows, cache, overwrite_dtype, low_memory, comment_char, quote_char, null_values, infer_schema_length, skip_rows_after_header, encoding, row_count_name, row_count_offset, parse_dates) .Call(wrap__rlazy_csv_reader, path, sep, has_header, ignore_errors, skip_rows, n_rows, cache, overwrite_dtype, low_memory, comment_char, quote_char, null_values, infer_schema_length, skip_rows_after_header, encoding, row_count_name, row_count_offset, parse_dates)
+
+import_arrow_ipc <- function(path, n_rows, cache, rechunk, row_name, row_count, memmap) .Call(wrap__import_arrow_ipc, path, n_rows, cache, rechunk, row_name, row_count, memmap)
+
+new_from_parquet <- function(path, n_rows, cache, parallel, rechunk, row_name, row_count, low_memory) .Call(wrap__new_from_parquet, path, n_rows, cache, parallel, rechunk, row_name, row_count, low_memory)
+
 test_rpolarserr <- function() .Call(wrap__test_rpolarserr)
 
 setup_renv <- function() .Call(wrap__setup_renv)
diff --git a/R/functions__eager.R b/R/functions__eager.R
index 7e98573c8..e1c5a7a30 100644
--- a/R/functions__eager.R
+++ b/R/functions__eager.R
@@ -3,7 +3,8 @@
 #' @param l list of DataFrame, or Series, LazyFrame or Expr
 #' @param rechunk perform a rechunk at last
 #' @param how choice of bind direction "vertical"(rbind) "horizontal"(cbind) "diagonal" diagonally
-#' @param parallel BOOL default TRUE, only used for LazyFrames
+#' @param parallel Boolean default TRUE, only used for LazyFrames
+#' @param to_supertypes Boolean default TRUE, cast columns shared super types, if any.
 #'
 #' @details
 #' Categorical columns/Series must have been constructed while global string cache enabled
@@ -37,41 +38,68 @@
 #' # diagonal
 #' pl$concat(l_hor, how = "diagonal")
 pl$concat = function(
-    l, # list of DataFrames or Series or lazyFrames or expr
+    ..., # list of DataFrames or Series or lazyFrames or expr
     rechunk = TRUE,
-    how = c("vertical", "horizontal", "diagonal"),
-    parallel = TRUE # not used yet
+    how = c("vertical", "horizontal", "diagonal"),#, "vertical_relaxed","diangonal_relaxed"),
+    parallel = TRUE,
+    eager = FALSE,
+    to_supertypes = FALSE
     ) {
+  browser()
+  l = unpack_list(...)
   ## Check inputs
-  how = match.arg(how[1L], c("vertical", "horizontal", "diagonal"))
+  how_args =  c("vertical", "horizontal", "diagonal")#, "vertical_relaxed", "diangonal_relaxed")
+
+  how = match.arg(how[1L], how_args) |>
+    result() |>
+    unwrap("in pl$concat()")
+
+  first = l[[1]]
+  eager = isTRUE(eager) #!inherits(first,"LazyFrame")
+  args_modified = names(as.list(sys.call()[-1]))
 
   # dispatch on item class and how
-  first = l[[1L]]
-  result = pcase(
-    inherits(first, "DataFrame"),
-    {
-      vdf = l_to_vdf(l)
-      pcase(
-        how == "vertical",   concat_df(vdf),
-        how == "diagonal",   diag_concat_df(vdf),
-        how == "horizontal", hor_concat_df(vdf),
-        or_else = stopf("Internal error")
-      )
+
+  pcase(
+    how == "vertical" && (inherits(first, "Series") || is.vector(first)), {
+      if(any(args_modified %in% c("eager","parallel", "to_super_types", "rechunk"))) {
+        warning(
+          "args: parallel and eager takes no effect when concat Series"
+        )
+      }
+      concat_series(l, rechunk, to_supertypes)
     },
-    inherits(first, "Series"),
-    {
-      stopf("not implemented Series")
+
+    how == "vertical",  concat_lf(l, rechunk, parallel, to_supertypes),
+    how == "diagonal",  diag_concat_lf(l, rechunk, parallel, to_supertypes),
+
+    how == "horizontal" && !eager, {
+       Err_plain("how=='horizontal' is not supported for !eager. Try e.g. <LazyFrame>$join() .")
     },
-    inherits(first, "Expr"),
-    {
-      stopf("not implemented Expr")
+
+    how == "horizontal", {
+      if(any(args_modified %in% c("parallel", "to_super_types"))) {
+        warning(
+          "args parallel, rechunk, eager and to_supertypes",
+          "takes no effect for how=='horizontal'"
+        )
+      }
+      concat_df(l)
     },
 
     # TODO implement Series, Expr, Lazy etc
-    or_else = stopf(paste0("type of first list element: '", class(first), "' is not supported"))
-  )
+    or_else = Err_plain("type of first list element: '", class(first), "' is not supported")
 
-  unwrap(result)
+  ) |> and_then(\(x) {
+    pcase(
+      inherits(x,"DataFrame") && !eager, Err_plain("internal logical error in pl$concat()"),
+      inherits(x,"LazyFrame") &&  eager, Ok(x$lazy()),
+      or_else = Ok(x)
+    )
+
+  }) |> unwrap(
+    "in pl$concat()"
+  )
 }
 
 
diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R
index febc64dca..f701ebde1 100644
--- a/R/lazyframe__lazy.R
+++ b/R/lazyframe__lazy.R
@@ -353,6 +353,7 @@ LazyFrame_collect = function(
       streaming
     ) |>
     and_then(collect_f) |>
+
     unwrap("in $collect():")
 }
 
diff --git a/R/series__series.R b/R/series__series.R
index ec7c90993..18f7c6e35 100644
--- a/R/series__series.R
+++ b/R/series__series.R
@@ -763,7 +763,7 @@ Series_dtype = method_as_property(function() {
 #' @keywords Series
 #' @return DataType
 #' @aliases Series_flags
-#' @name Series_dtype
+#' @name Series_flags
 #' @details property sorted flags are not settable, use set_sorted
 #' @examples
 #' pl$Series(1:4)$sort()$flags
diff --git a/man/Series_dtype.Rd b/man/Series_dtype.Rd
index a29fa59a8..8d591f6e6 100644
--- a/man/Series_dtype.Rd
+++ b/man/Series_dtype.Rd
@@ -2,30 +2,19 @@
 % Please edit documentation in R/series__series.R
 \name{Series_dtype}
 \alias{Series_dtype}
-\alias{Series_flags}
 \title{Get data type of Series}
 \usage{
 Series_dtype()
-
-Series_flags()
 }
 \value{
-DataType
-
 DataType
 }
 \description{
 Get data type of Series
-
-Get data type of Series
-}
-\details{
-property sorted flags are not settable, use set_sorted
 }
 \examples{
 pl$Series(1:4)$dtype
 pl$Series(c(1, 2))$dtype
 pl$Series(letters)$dtype
-pl$Series(1:4)$sort()$flags
 }
 \keyword{Series}
diff --git a/man/Series_flags.Rd b/man/Series_flags.Rd
new file mode 100644
index 000000000..d909d2c6a
--- /dev/null
+++ b/man/Series_flags.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/series__series.R
+\name{Series_flags}
+\alias{Series_flags}
+\title{Get data type of Series}
+\usage{
+Series_flags()
+}
+\value{
+DataType
+}
+\description{
+Get data type of Series
+}
+\details{
+property sorted flags are not settable, use set_sorted
+}
+\examples{
+pl$Series(1:4)$sort()$flags
+}
+\keyword{Series}
diff --git a/man/pl_concat.Rd b/man/pl_concat.Rd
index 288a30d37..732596535 100644
--- a/man/pl_concat.Rd
+++ b/man/pl_concat.Rd
@@ -10,7 +10,9 @@
 
 \item{how}{choice of bind direction "vertical"(rbind) "horizontal"(cbind) "diagonal" diagonally}
 
-\item{parallel}{BOOL default TRUE, only used for LazyFrames}
+\item{parallel}{Boolean default TRUE, only used for LazyFrames}
+
+\item{to_supertypes}{Boolean default TRUE, cast columns shared super types, if any.}
 }
 \value{
 DataFrame, or Series, LazyFrame or Expr
diff --git a/src/rust/src/concat.rs b/src/rust/src/concat.rs
new file mode 100644
index 000000000..a580a53e4
--- /dev/null
+++ b/src/rust/src/concat.rs
@@ -0,0 +1,139 @@
+use crate::rdataframe::DataFrame;
+use crate::robj_to;
+
+use crate::rdataframe::LazyFrame;
+use crate::rpolarserr::*;
+use crate::series::Series;
+use crate::{rdataframe::VecDataFrame, utils::r_result_list};
+use extendr_api::prelude::*;
+use polars::lazy::dsl;
+use polars::prelude as pl;
+use polars_core;
+use polars_core::functions as pl_functions;
+use std::result::Result;
+
+#[extendr]
+fn concat_df(vdf: &VecDataFrame) -> List {
+    //-> PyResult<PyDataFrame> {
+
+    use polars_core::error::PolarsResult;
+    use polars_core::utils::rayon::prelude::*;
+
+    let identity_df = (*vdf.0.iter().peekable().peek().unwrap())
+        .clone()
+        .slice(0, 0);
+    let rdfs: Vec<pl::PolarsResult<pl::DataFrame>> =
+        vdf.0.iter().map(|df| Ok(df.clone())).collect();
+    let identity = || Ok(identity_df.clone());
+
+    let result = polars_core::POOL
+        .install(|| {
+            rdfs.into_par_iter()
+                .fold(identity, |acc: PolarsResult<pl::DataFrame>, df| {
+                    let mut acc = acc?;
+                    acc.vstack_mut(&df?)?;
+                    Ok(acc)
+                })
+                .reduce(identity, |acc, df| {
+                    let mut acc = acc?;
+                    acc.vstack_mut(&df?)?;
+                    Ok(acc)
+                })
+        })
+        .map(DataFrame);
+
+    r_result_list(result.map_err(|err| format!("{:?}", err)))
+}
+
+#[extendr]
+fn concat_lf(l: Robj, rechunk: bool, parallel: bool, to_supertypes: bool) -> RResult<LazyFrame> {
+    let vlf = robj_to!(Vec, PLLazyFrame, l)?;
+    dsl::concat(
+        vlf,
+        pl::UnionArgs {
+            parallel,
+            rechunk,
+            to_supertypes,
+        },
+    )
+    .map_err(polars_to_rpolars_err)
+    .map(LazyFrame)
+}
+
+#[extendr]
+fn diag_concat_df(dfs: Robj) -> RResult<DataFrame> {
+    let df_vec = robj_to!(Vec, PLDataFrame, dfs)?;
+    pl_functions::diag_concat_df(&df_vec)
+        .map_err(polars_to_rpolars_err)
+        .map(DataFrame)
+}
+
+#[extendr]
+fn diag_concat_lf(l: Robj, rechunk: bool, parallel: bool) -> RResult<LazyFrame> {
+    let vlf = robj_to!(Vec, PLLazyFrame, l)?;
+    dsl::diag_concat_lf(vlf, rechunk, parallel)
+        .map_err(polars_to_rpolars_err)
+        .map(LazyFrame)
+}
+
+#[extendr]
+pub fn hor_concat_df(l: Robj) -> RResult<DataFrame> {
+    let df_vec = robj_to!(Vec, PLDataFrame, l)?;
+    pl_functions::hor_concat_df(&df_vec)
+        .map_err(polars_to_rpolars_err)
+        .map(DataFrame)
+}
+
+#[extendr]
+pub fn concat_series(l: Robj, rechunk: Robj, to_supertypes: Robj) -> RResult<Series> {
+    let to_supertypes = robj_to!(bool, to_supertypes)?;
+    let mut s_vec = robj_to!(Vec, PLSeries, l)?;
+
+    // find any common supertype and cast to it
+    if to_supertypes {
+        let shared_supertype: RResult<Option<pl::DataType>> = s_vec
+            .iter()
+            .map(|s| s.dtype().clone())
+            .fold(Ok(None), |acc, x| match acc {
+                Err(err) => Err(err),
+                Ok(None) => Ok(Some(x)),
+                Ok(Some(acc)) => polars_core::utils::get_supertype(&acc, &x)
+                    .ok_or(RPolarsErr::new().plain("Series' have no common supertype".to_string()))
+                    .map(|dt| Some(dt)),
+            });
+        let shared_supertype = shared_supertype?.expect("cannot be None, unless empty s_vec");
+
+        for i in 0..s_vec.len() {
+            if *s_vec[i].dtype() != shared_supertype {
+                s_vec[i] = s_vec[i]
+                    .cast(&shared_supertype)
+                    .map_err(polars_to_rpolars_err)?;
+            };
+        }
+    }
+
+    let mut iter = s_vec.into_iter();
+    let mut first_s = iter
+        .next()
+        .ok_or(RPolarsErr::new().plain("no series found to concatenate".into()))?;
+    for next_s in iter {
+        first_s.append(&next_s).map_err(polars_to_rpolars_err)?;
+    }
+
+    if robj_to!(bool, rechunk)? {
+        Ok(first_s.rechunk().into())
+    } else {
+        Ok(first_s.into())
+    }
+}
+
+extendr_module! {
+    mod concat;
+
+    fn concat_df;
+    fn concat_lf;
+    fn diag_concat_df;
+    fn diag_concat_lf;
+    fn hor_concat_df;
+    fn concat_series;
+}
diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs
index 0c62951fb..a8198fbdd 100644
--- a/src/rust/src/lib.rs
+++ b/src/rust/src/lib.rs
@@ -13,6 +13,7 @@ pub mod concurrent;
 pub mod lazy;
 
 pub mod arrow_interop;
+pub mod concat;
 pub mod conversion;
 pub mod conversion_r_to_s;
 pub mod conversion_s_to_r;
@@ -42,6 +43,8 @@ pub use crate::rbackground::RBGPOOL;
 // Macro to generate exports
 extendr_module! {
     mod polars;
+    use rlib;
+    use concat;
     use rdataframe;
     use rpolarserr;
     use rbackground;
diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs
index 63f54048d..d98d4c53e 100644
--- a/src/rust/src/rdataframe/mod.rs
+++ b/src/rust/src/rdataframe/mod.rs
@@ -8,7 +8,6 @@ use crate::conversion_r_to_s::robjname2series;
 use crate::lazy;
 use crate::rdatatype;
 use crate::rdatatype::RPolarsDataType;
-use crate::rlib;
 use crate::robj_to;
 use crate::rpolarserr::{polars_to_rpolars_err, RResult};
 
@@ -463,7 +462,7 @@ extendr_module! {
     use read_ipc;
     use read_parquet;
     use rdatatype;
-    use rlib;
+
     impl DataFrame;
     impl VecDataFrame;
 }
diff --git a/src/rust/src/rlib.rs b/src/rust/src/rlib.rs
index 8bff98dd3..b467a6575 100644
--- a/src/rust/src/rlib.rs
+++ b/src/rust/src/rlib.rs
@@ -2,77 +2,12 @@ use crate::lazy::dsl::Expr;
 use crate::lazy::dsl::ProtoExprArray;
 use crate::rdataframe::DataFrame;
 use crate::robj_to;
-
-use crate::rdataframe::LazyFrame;
 use crate::rpolarserr::{rdbg, RResult};
 use crate::series::Series;
-use crate::{rdataframe::VecDataFrame, utils::r_result_list};
 use extendr_api::prelude::*;
-use polars::lazy::dsl;
 use polars::prelude as pl;
-use polars_core::functions as pl_functions;
 use std::result::Result;
 
-#[extendr]
-fn concat_df(vdf: &VecDataFrame) -> List {
-    //-> PyResult<PyDataFrame> {
-
-    use polars_core::error::PolarsResult;
-    use polars_core::utils::rayon::prelude::*;
-
-    let identity_df = (*vdf.0.iter().peekable().peek().unwrap())
-        .clone()
-        .slice(0, 0);
-    let rdfs: Vec<pl::PolarsResult<pl::DataFrame>> =
-        vdf.0.iter().map(|df| Ok(df.clone())).collect();
-    let identity = || Ok(identity_df.clone());
-
-    let result = polars_core::POOL
-        .install(|| {
-            rdfs.into_par_iter()
-                .fold(identity, |acc: PolarsResult<pl::DataFrame>, df| {
-                    let mut acc = acc?;
-                    acc.vstack_mut(&df?)?;
-                    Ok(acc)
-                })
-                .reduce(identity, |acc, df| {
-                    let mut acc = acc?;
-                    acc.vstack_mut(&df?)?;
-                    Ok(acc)
-                })
-        })
-        .map(DataFrame);
-
-    r_result_list(result.map_err(|err| format!("{:?}", err)))
-}
-
-#[extendr]
-fn concat_lf(l: Robj, rechunk: bool, parallel: bool, to_supertypes: bool) -> RResult<LazyFrame> {
-    let vlf = robj_to!(Vec, PLLazyFrame, l)?;
-    dsl::concat(
-        vlf,
-        pl::UnionArgs {
-            parallel,
-            rechunk,
-            to_supertypes,
-        },
-    )
-    .map_err(polars_to_rpolars_err)
-    .map(LazyFrame)
-}
-
-#[extendr]
-fn diag_concat_df(dfs: &VecDataFrame) -> List {
-    let df = pl_functions::diag_concat_df(&dfs.0[..]).map(DataFrame);
-    r_result_list(df.map_err(|err| format!("{:?}", err)))
-}
-
-#[extendr]
-pub fn hor_concat_df(dfs: &VecDataFrame) -> List {
-    let df = pl_functions::hor_concat_df(&dfs.0[..]).map(DataFrame);
-    r_result_list(df.map_err(|err| format!("{:?}", err)))
-}
-
 #[extendr]
 fn min_exprs(exprs: &ProtoExprArray) -> Expr {
     let exprs = exprs.to_vec("select");
@@ -294,10 +229,7 @@ fn polars_features() -> List {
 
 extendr_module! {
     mod rlib;
-    fn concat_df;
-    fn concat_lf;
-    fn hor_concat_df;
-    fn diag_concat_df;
+
     fn min_exprs;
     fn max_exprs;
     fn coalesce_exprs;
diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs
index e9186ce0f..6a158e3d5 100644
--- a/src/rust/src/utils/mod.rs
+++ b/src/rust/src/utils/mod.rs
@@ -3,10 +3,11 @@ pub mod extendr_concurrent;
 pub mod extendr_helpers;
 pub mod wrappers;
 
+use crate::conversion_r_to_s::robjname2series;
 use crate::lazy::dsl::Expr;
 use crate::rdatatype::RPolarsDataType;
-use crate::rpolarserr::{rdbg, rerr, RPolarsErr, RResult, WithRctx};
-
+use crate::rpolarserr::{polars_to_rpolars_err, rdbg, rerr, RPolarsErr, RResult, WithRctx};
+use crate::series::Series;
 use extendr_api::prelude::list;
 use std::any::type_name as tn;
 //use std::intrinsics::read_via_copy;
@@ -741,7 +742,7 @@ fn internal_rust_wrap_e(robj: Robj, str_to_lit: bool) -> RResult<Expr> {
     }
 }
 
-pub fn robj_to_lazyframe(robj: extendr_api::Robj) -> RResult<crate::rdataframe::LazyFrame> {
+pub fn robj_to_lazyframe(robj: extendr_api::Robj) -> RResult<LazyFrame> {
     let robj = unpack_r_result_list(robj)?;
     let rv = rdbg(&robj);
 
@@ -753,16 +754,60 @@ pub fn robj_to_lazyframe(robj: extendr_api::Robj) -> RResult<crate::rdataframe::
                 let extptr_df: ExternalPtr<DataFrame> = robj.try_into()?;
                 Ok(extptr_df.lazy())
             }
-            _ => {
+            _ if robj.inherits("LazyFrame") => {
                 let lf: ExternalPtr<LazyFrame> = robj.try_into()?;
                 let lf = LazyFrame(lf.0.clone());
                 Ok(lf)
             }
+            _ => Ok(DataFrame::new_with_capacity(1)
+                .lazy()
+                .0
+                .select(&[robj_to_rexpr(robj, true)?.0]))
+            .map(LazyFrame),
         }
     }();
 
-    let ext_ldf = res.bad_val(rv).mistyped(tn::<LazyFrame>())?;
-    Ok(LazyFrame(ext_ldf.0.clone()))
+    res.bad_val(rv).mistyped(tn::<LazyFrame>())
+}
+
+pub fn robj_to_dataframe(robj: extendr_api::Robj) -> RResult<DataFrame> {
+    let robj = unpack_r_result_list(robj)?;
+    let robj_clone = robj.clone();
+
+    // closure to allow ?-convert extendr::Result to RResult
+    let res = || -> RResult<DataFrame> {
+        match () {
+            // allow input as a DataFrame
+            _ if robj.inherits("DataFrame") => {
+                let extptr_df: ExternalPtr<DataFrame> = robj.try_into()?;
+                Ok(extptr_df.0.clone())
+            }
+            _ if robj.inherits("LazyFrame") => {
+                let lf: ExternalPtr<LazyFrame> = robj.try_into()?;
+                lf.0.clone().collect()
+            }
+            _ => DataFrame::new_with_capacity(1)
+                .lazy()
+                .0
+                .select(&[robj_to_rexpr(robj, true)?.0])
+                .collect(),
+        }
+        .map(DataFrame)
+        .map_err(polars_to_rpolars_err)
+    }();
+
+    res.bad_val(rdbg(robj_clone))
+        .plain("could not be converted into a DataFrame")
+}
+
+pub fn robj_to_series(robj: extendr_api::Robj) -> RResult<Series> {
+    let robj = unpack_r_result_list(robj)?;
+    let robj_clone = robj.clone();
+    robjname2series(robj, "")
+        .map(Series)
+        .map_err(polars_to_rpolars_err)
+        .bad_val(rdbg(robj_clone))
+        .plain("could not be converted into a DataFrame")
 }
 
 pub fn list_expr_to_vec_pl_expr(
@@ -865,6 +910,14 @@ macro_rules! robj_to_inner {
         $crate::utils::robj_to_binary_vec($a)
     };
 
+    (Series, $a:ident) => {
+        $crate::utils::robj_to_series($a)
+    };
+
+    (PLSeries, $a:ident) => {
+        $crate::utils::robj_to_series($a).map(|ok| ok.0)
+    };
+
     (Expr, $a:ident) => {
         $crate::utils::robj_to_rexpr($a, true)
     };
@@ -916,6 +969,14 @@ macro_rules! robj_to_inner {
         $crate::utils::robj_to_lazyframe($a).map(|lf| lf.0)
     };
 
+    (DataFrame, $a:ident) => {
+        $crate::utils::robj_to_dataframe($a)
+    };
+
+    (PLDataFrame, $a:ident) => {
+        $crate::utils::robj_to_dataframe($a).map(|lf| lf.0)
+    };
+
     (RArrow_schema, $a:ident) => {
         $crate::utils::robj_to_rarrow_schema($a)
     };

From d1234484e1c43e52ebdb400081f548ca2268c26e Mon Sep 17 00:00:00 2001
From: sorhawell <sorhawell@gmail.com>
Date: Sun, 1 Oct 2023 23:32:42 +0300
Subject: [PATCH 03/14] n_chunks + supertypes + unit tests + fix bugs

---
 R/dataframe__frame.R            | 19 ++++++++++
 R/error__rpolarserr.R           |  9 ++++-
 R/extendr-wrappers.R            |  6 +--
 R/functions__eager.R            | 57 +++++++++++++++++++---------
 R/utils.R                       |  9 ++++-
 man/DataFrame_n_chunks.Rd       | 27 +++++++++++++
 man/nanoarrow.Rd                |  8 ++--
 src/rust/src/concat.rs          | 47 +----------------------
 src/rust/src/rdataframe/mod.rs  | 21 ++++++++++-
 src/rust/src/utils/mod.rs       | 12 ++++++
 tests/testthat/test-concat.R    | 67 ++++++++++++++++++++++++++++++---
 tests/testthat/test-dataframe.R | 19 ++++++++++
 12 files changed, 219 insertions(+), 82 deletions(-)
 create mode 100644 man/DataFrame_n_chunks.Rd

diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R
index f2375b91d..9058074cb 100644
--- a/R/dataframe__frame.R
+++ b/R/dataframe__frame.R
@@ -1057,6 +1057,25 @@ DataFrame_first = function() {
   self$lazy()$first()$collect()
 }
 
+
+#' @title Get the number of chunks of the Series' in a DataFrame
+#' @keywords DataFrame
+#' @param strategy string either 'all' or 'first'. 'first' only returns chunks for first Series.
+#' @return real vector of chunk counts per Series.
+#' @examples
+#' df = pl$concat(
+#'   1:10,
+#'   pl$concat(1:5,1:5, rechunk = FALSE, how = "vertical")$rename("b"),
+#'   how = "horizontal"
+#' )
+#' df
+#' df$n_chunks()
+DataFrame_n_chunks = function(strategy = "all") {
+  .pr$DataFrame$n_chunks(self, strategy) |>
+    unwrap("in n_chunks():")
+}
+
+
 #' @title Get the last row of the DataFrame.
 #' @keywords DataFrame
 #' @return A DataFrame with one row.
diff --git a/R/error__rpolarserr.R b/R/error__rpolarserr.R
index af8cd83be..79fa27c68 100644
--- a/R/error__rpolarserr.R
+++ b/R/error__rpolarserr.R
@@ -58,7 +58,14 @@ Err_plain = function(...) {
 }
 
 # short hand for extracting an error context in unit testing, will raise error if not an RPolarsErr
-get_err_ctx = \(x) unwrap_err(result(x))$contexts()
+get_err_ctx = \(x, select = NULL) {
+  ctx = unwrap_err(result(x))$contexts()
+  if(is.null(select)) {
+    ctx
+  } else {
+    ctx[[match.arg(select,names(ctx))]]
+  }
+}
 
 
 # wrapper to return Result
diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R
index 2cbe30350..c76e32d63 100644
--- a/R/extendr-wrappers.R
+++ b/R/extendr-wrappers.R
@@ -63,12 +63,8 @@ test_wrong_call_pl_lit <- function(robj) .Call(wrap__test_wrong_call_pl_lit, rob
 
 polars_features <- function() .Call(wrap__polars_features)
 
-concat_df <- function(vdf) .Call(wrap__concat_df, vdf)
-
 concat_lf <- function(l, rechunk, parallel, to_supertypes) .Call(wrap__concat_lf, l, rechunk, parallel, to_supertypes)
 
-diag_concat_df <- function(dfs) .Call(wrap__diag_concat_df, dfs)
-
 diag_concat_lf <- function(l, rechunk, parallel) .Call(wrap__diag_concat_lf, l, rechunk, parallel)
 
 hor_concat_df <- function(l) .Call(wrap__hor_concat_df, l)
@@ -117,6 +113,8 @@ DataFrame <- new.env(parent = emptyenv())
 
 DataFrame$shape <- function() .Call(wrap__DataFrame__shape, self)
 
+DataFrame$n_chunks <- function(strategy) .Call(wrap__DataFrame__n_chunks, self, strategy)
+
 DataFrame$clone_see_me_macro <- function() .Call(wrap__DataFrame__clone_see_me_macro, self)
 
 DataFrame$default <- function() .Call(wrap__DataFrame__default)
diff --git a/R/functions__eager.R b/R/functions__eager.R
index e1c5a7a30..c673b9fd8 100644
--- a/R/functions__eager.R
+++ b/R/functions__eager.R
@@ -42,11 +42,14 @@ pl$concat = function(
     rechunk = TRUE,
     how = c("vertical", "horizontal", "diagonal"),#, "vertical_relaxed","diangonal_relaxed"),
     parallel = TRUE,
-    eager = FALSE,
+    #eager = FALSE,
     to_supertypes = FALSE
     ) {
-  browser()
-  l = unpack_list(...)
+  if(exists("do_browse", .GlobalEnv) && do_browse) browser()
+
+  #unpack arg list
+  l = unpack_list(..., skip_classes = "data.frame")
+
   ## Check inputs
   how_args =  c("vertical", "horizontal", "diagonal")#, "vertical_relaxed", "diangonal_relaxed")
 
@@ -54,46 +57,64 @@ pl$concat = function(
     result() |>
     unwrap("in pl$concat()")
 
-  first = l[[1]]
-  eager = isTRUE(eager) #!inherits(first,"LazyFrame")
-  args_modified = names(as.list(sys.call()[-1]))
+  first = l[[1L]]
+  eager = !inherits(first,"LazyFrame")
+  args_modified = names(as.list(sys.call()[-1L]))
 
   # dispatch on item class and how
 
-  pcase(
+  Result_out = pcase(
     how == "vertical" && (inherits(first, "Series") || is.vector(first)), {
-      if(any(args_modified %in% c("eager","parallel", "to_super_types", "rechunk"))) {
+      if(any(args_modified %in% c("parallel"))) {
         warning(
-          "args: parallel and eager takes no effect when concat Series"
+          "in pl:concat(): args: parallel takes no effect when concatenating Series",
+          call. = FALSE
         )
       }
       concat_series(l, rechunk, to_supertypes)
     },
 
     how == "vertical",  concat_lf(l, rechunk, parallel, to_supertypes),
-    how == "diagonal",  diag_concat_lf(l, rechunk, parallel, to_supertypes),
+    how == "diagonal",  {
+      if(any(args_modified %in% c("to_supertypes"))) {
+        warning(
+          "Args to_supertypes",
+          "takes no effect for how=='diagonal'",
+          call. = FALSE
+        )
+      }
+      diag_concat_lf(l, rechunk, parallel)
+    },
 
     how == "horizontal" && !eager, {
-       Err_plain("how=='horizontal' is not supported for !eager. Try e.g. <LazyFrame>$join() .")
+       Err_plain(
+         "how=='horizontal' is not supported for lazy (first element is LazyFrame).",
+         "Try e.g. <LazyFrame>$join() to get Lazy join or pl$concat(lf1$collect(),lf2,lf3).",
+         "to get a eager horizontal concatenation"
+        )
     },
 
     how == "horizontal", {
-      if(any(args_modified %in% c("parallel", "to_super_types"))) {
+      if(any(args_modified %in% c("parallel", "to_supertypes"))) {
         warning(
-          "args parallel, rechunk, eager and to_supertypes",
-          "takes no effect for how=='horizontal'"
+          "Args parallel, rechunk, eager and to_supertypes",
+          "takes no effect for how=='horizontal'",
+          call. = FALSE
         )
       }
-      concat_df(l)
+      hor_concat_df(l)
     },
 
     # TODO implement Series, Expr, Lazy etc
-    or_else = Err_plain("type of first list element: '", class(first), "' is not supported")
+    or_else = Err_plain("internal error:", how, "not handled")
+
+  )
 
-  ) |> and_then(\(x) {
+  #convert back from lazy if eager
+  and_then(Result_out, \(x) {
     pcase(
       inherits(x,"DataFrame") && !eager, Err_plain("internal logical error in pl$concat()"),
-      inherits(x,"LazyFrame") &&  eager, Ok(x$lazy()),
+      inherits(x,"LazyFrame") &&  eager, Ok(x$collect()),
       or_else = Ok(x)
     )
 
diff --git a/R/utils.R b/R/utils.R
index 2434366a9..8aea0aada 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -93,6 +93,7 @@ list2 = list
 #' Internal unpack list
 #' @noRd
 #' @param l any list
+#' @param skip_classes char vec, do not unpack list inherits skip_classes.
 #' @details py-polars syntax only allows e.g. `df.select([expr1, expr2,])` and not
 #' `df.select(expr1, expr2,)`. r-polars also allows user to directly write
 #' `df$select(expr1, expr2)` or `df$select(list(expr1,expr2))`. Unpack list
@@ -103,9 +104,13 @@ list2 = list
 #' f = \(...) unpack_list(list(...))
 #' identical(f(list(1L, 2L, 3L)), f(1L, 2L, 3L)) # is TRUE
 #' identical(f(list(1L, 2L), 3L), f(1L, 2L, 3L)) # is FALSE
-unpack_list = function(...) {
+unpack_list = function(..., skip_classes=NULL) {
   l = list2(...)
-  if (length(l) == 1L && is.list(l[[1L]])) {
+  if (
+    length(l) == 1L &&
+    is.list(l[[1L]]) &&
+    !( !is.null(skip_classes) && inherits(l[[1L]],skip_classes) )
+  ) {
     l[[1L]]
   } else {
     l
diff --git a/man/DataFrame_n_chunks.Rd b/man/DataFrame_n_chunks.Rd
new file mode 100644
index 000000000..7320844d7
--- /dev/null
+++ b/man/DataFrame_n_chunks.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe__frame.R
+\name{DataFrame_n_chunks}
+\alias{DataFrame_n_chunks}
+\title{Get the number of chunks of the Series' in a DataFrame}
+\usage{
+DataFrame_n_chunks(strategy = "all")
+}
+\arguments{
+\item{strategy}{string either 'all' or 'first'. 'first' only returns chunks for first Series.}
+}
+\value{
+real vector of chunk counts per Series.
+}
+\description{
+Get the number of chunks of the Series' in a DataFrame
+}
+\examples{
+df = pl$concat(
+  1:10,
+  pl$concat(1:5,1:5, rechunk = FALSE, how = "vertical")$rename("b"),
+  how = "horizontal"
+)
+df
+df$n_chunks()
+}
+\keyword{DataFrame}
diff --git a/man/nanoarrow.Rd b/man/nanoarrow.Rd
index 3ecaf02a4..7af2018a2 100644
--- a/man/nanoarrow.Rd
+++ b/man/nanoarrow.Rd
@@ -16,13 +16,13 @@
 \alias{as_record_batch_reader.DataFrame}
 \title{polars to nanoarrow and arrow}
 \usage{
-\method{as_nanoarrow_array_stream}{DataFrame}(x, ..., schema = NULL)
+as_nanoarrow_array_stream.DataFrame(x, ..., schema = NULL)
 
-\method{infer_nanoarrow_schema}{DataFrame}(x, ...)
+infer_nanoarrow_schema.DataFrame(x, ...)
 
-\method{as_arrow_table}{DataFrame}(x, ...)
+as_arrow_table.DataFrame(x, ...)
 
-\method{as_record_batch_reader}{DataFrame}(x, ..., schema = NULL)
+as_record_batch_reader.DataFrame(x, ..., schema = NULL)
 }
 \arguments{
 \item{x}{a polars DataFrame}
diff --git a/src/rust/src/concat.rs b/src/rust/src/concat.rs
index a580a53e4..7ac7e873b 100644
--- a/src/rust/src/concat.rs
+++ b/src/rust/src/concat.rs
@@ -4,7 +4,6 @@ use crate::robj_to;
 use crate::rdataframe::LazyFrame;
 use crate::rpolarserr::*;
 use crate::series::Series;
-use crate::{rdataframe::VecDataFrame, utils::r_result_list};
 use extendr_api::prelude::*;
 use polars::lazy::dsl;
 use polars::prelude as pl;
@@ -12,39 +11,6 @@ use polars_core;
 use polars_core::functions as pl_functions;
 use std::result::Result;
 
-#[extendr]
-fn concat_df(vdf: &VecDataFrame) -> List {
-    //-> PyResult<PyDataFrame> {
-
-    use polars_core::error::PolarsResult;
-    use polars_core::utils::rayon::prelude::*;
-
-    let identity_df = (*vdf.0.iter().peekable().peek().unwrap())
-        .clone()
-        .slice(0, 0);
-    let rdfs: Vec<pl::PolarsResult<pl::DataFrame>> =
-        vdf.0.iter().map(|df| Ok(df.clone())).collect();
-    let identity = || Ok(identity_df.clone());
-
-    let result = polars_core::POOL
-        .install(|| {
-            rdfs.into_par_iter()
-                .fold(identity, |acc: PolarsResult<pl::DataFrame>, df| {
-                    let mut acc = acc?;
-                    acc.vstack_mut(&df?)?;
-                    Ok(acc)
-                })
-                .reduce(identity, |acc, df| {
-                    let mut acc = acc?;
-                    acc.vstack_mut(&df?)?;
-                    Ok(acc)
-                })
-        })
-        .map(DataFrame);
-
-    r_result_list(result.map_err(|err| format!("{:?}", err)))
-}
-
 #[extendr]
 fn concat_lf(l: Robj, rechunk: bool, parallel: bool, to_supertypes: bool) -> RResult<LazyFrame> {
     let vlf = robj_to!(Vec, PLLazyFrame, l)?;
@@ -60,14 +26,6 @@ fn concat_lf(l: Robj, rechunk: bool, parallel: bool, to_supertypes: bool) -> RRe
     .map(LazyFrame)
 }
 
-#[extendr]
-fn diag_concat_df(dfs: Robj) -> RResult<DataFrame> {
-    let df_vec = robj_to!(Vec, PLDataFrame, dfs)?;
-    pl_functions::diag_concat_df(&df_vec)
-        .map_err(polars_to_rpolars_err)
-        .map(DataFrame)
-}
-
 #[extendr]
 fn diag_concat_lf(l: Robj, rechunk: bool, parallel: bool) -> RResult<LazyFrame> {
     let vlf = robj_to!(Vec, PLLazyFrame, l)?;
@@ -96,7 +54,7 @@ pub fn concat_series(l: Robj, rechunk: Robj, to_supertypes: Robj) -> RResult<Ser
             .map(|s| s.dtype().clone())
             .fold(Ok(None), |acc, x| match acc {
                 Err(err) => Err(err),
-                Ok(None) => Ok(Some(x)),
+                Ok(None) => Ok(Some(x)), // first fold, acc is None, just us x,
                 Ok(Some(acc)) => polars_core::utils::get_supertype(&acc, &x)
                     .ok_or(RPolarsErr::new().plain("Series' have no common supertype".to_string()))
                     .map(|dt| Some(dt)),
@@ -129,10 +87,7 @@ pub fn concat_series(l: Robj, rechunk: Robj, to_supertypes: Robj) -> RResult<Ser
 
 extendr_module! {
     mod concat;
-
-    fn concat_df;
     fn concat_lf;
-    fn diag_concat_df;
     fn diag_concat_lf;
     fn hor_concat_df;
     fn concat_series;
diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs
index d98d4c53e..18a73cc88 100644
--- a/src/rust/src/rdataframe/mod.rs
+++ b/src/rust/src/rdataframe/mod.rs
@@ -77,7 +77,7 @@ impl From<pl::DataFrame> for DataFrame {
         DataFrame(item)
     }
 }
-
+use crate::rpolarserr::*;
 #[extendr]
 impl DataFrame {
     pub fn shape(&self) -> Robj {
@@ -85,6 +85,25 @@ impl DataFrame {
         r!([shp.0, shp.1])
     }
 
+    pub fn n_chunks(&self, strategy: Robj) -> RResult<Vec<f64>> {
+        let nchks: Vec<_> = self.0.iter().map(|s| s.n_chunks() as f64).collect();
+
+        match robj_to!(str, strategy)? {
+            "all" => Ok(nchks),
+            "first" => {
+                if nchks.is_empty() {
+                    Ok(vec![])
+                } else {
+                    Ok(vec![nchks.into_iter().next().expect("has atleast len 1")])
+                }
+            }
+            _ => {
+                Err(RPolarsErr::new()
+                    .plain("strategy not recognized, neither 'all' or 'first'".into()))
+            }
+        }
+    }
+
     //renamed back to clone
     pub fn clone_see_me_macro(&self) -> DataFrame {
         self.clone()
diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs
index 6a158e3d5..cb74aed48 100644
--- a/src/rust/src/utils/mod.rs
+++ b/src/rust/src/utils/mod.rs
@@ -13,9 +13,11 @@ use std::any::type_name as tn;
 //use std::intrinsics::read_via_copy;
 use crate::lazy::dsl::robj_to_col;
 use crate::rdataframe::{DataFrame, LazyFrame};
+use extendr_api::eval_string_with_params;
 use extendr_api::Attributes;
 use extendr_api::ExternalPtr;
 use extendr_api::Result as ExtendrResult;
+use extendr_api::R;
 use polars::prelude as pl;
 
 //macro to translate polars NULLs and  emulate R NA value of any type
@@ -759,6 +761,11 @@ pub fn robj_to_lazyframe(robj: extendr_api::Robj) -> RResult<LazyFrame> {
                 let lf = LazyFrame(lf.0.clone());
                 Ok(lf)
             }
+            _ if robj.inherits("data.frame") => {
+                let df = unpack_r_eval(R!("polars:::result(pl$DataFrame({{robj}}))"))?;
+                let extptr_df: ExternalPtr<DataFrame> = df.try_into()?;
+                Ok(extptr_df.lazy())
+            }
             _ => Ok(DataFrame::new_with_capacity(1)
                 .lazy()
                 .0
@@ -786,6 +793,11 @@ pub fn robj_to_dataframe(robj: extendr_api::Robj) -> RResult<DataFrame> {
                 let lf: ExternalPtr<LazyFrame> = robj.try_into()?;
                 lf.0.clone().collect()
             }
+            _ if robj.inherits("data.frame") => {
+                let df = unpack_r_eval(R!("polars:::result(pl$DataFrame({{robj}}))"))?;
+                let extptr_df: ExternalPtr<DataFrame> = df.try_into()?;
+                Ok(extptr_df.0.clone())
+            }
             _ => DataFrame::new_with_capacity(1)
                 .lazy()
                 .0
diff --git a/tests/testthat/test-concat.R b/tests/testthat/test-concat.R
index f448a692f..ffdb83836 100644
--- a/tests/testthat/test-concat.R
+++ b/tests/testthat/test-concat.R
@@ -1,20 +1,55 @@
 test_that("concat dataframe", {
-  # vertical
-  l_ver = lapply(1:10, function(i) {
+
+  # vertical dfs
+  l_ver = lapply(1:3, function(i) {
     l_internal = list(
       a = 1:5,
       b = letters[1:5]
     )
     pl$DataFrame(l_internal)
   })
+
+
   df_ver = pl$concat(l_ver, how = "vertical")
   expect_equal(
     df_ver$to_data_frame(),
     do.call(rbind, lapply(l_ver, function(df) df$to_data_frame()))
   )
 
+  # unpack args allowed
+  df_ver_2 = pl$concat(l_ver[[1L]],l_ver[[2L]],l_ver[[3L]], how="vertical")
+  expect_identical(df_ver$to_list(),df_ver_2$to_list())
+
+  # use supertypes
+  expect_identical(
+    pl$concat(l_ver[[1L]],pl$DataFrame(a=2,b=42L), how="vertical",to_supertypes = TRUE)$to_list(),
+    pl$DataFrame(rbind(data.frame(a = 1:5, b = letters[1:5]), data.frame(a = 2, b = 42L)))$to_list()
+  )
+
+  # type 'relaxed' vertical concatenation is not allowed by default
+  expect_true(
+      pl$concat(l_ver[[1L]],pl$DataFrame(a=2,b=42L), how="vertical") |>
+        get_err_ctx() |>
+        (\(ctx) ctx$PolarsError)() |>
+        grepl(pat = "dtypes for column", fixed = TRUE)
+  )
+
+
+  #check lazy eager is identical
+  l_ver_lazy = lapply(l_ver, \(df) df$lazy())
+  expect_identical(
+    pl$concat(l_ver_lazy)$collect()$to_list(),
+    pl$concat(l_ver)$to_list()
+  )
+
+  #check rechunk works
+  expect_identical( pl$concat(mtcars, mtcars, rechunk = TRUE)$n_chunks(), rep(1, 11))
+  expect_identical( pl$concat(mtcars, mtcars, rechunk = FALSE)$n_chunks(), rep(2, 11))
+
+
+
   # horizontal
-  l_hor = lapply(1:10, function(i) {
+  l_hor = lapply(1:5, function(i) {
     l_internal = list(
       1:5,
       letters[1:5]
@@ -28,8 +63,28 @@ test_that("concat dataframe", {
     do.call(cbind, lapply(l_hor, function(df) df$to_data_frame()))
   )
 
-  # diagonal
+  pl$concat(pl$LazyFrame(a=1:3), how = "horizontal") |>
+    get_err_ctx( "Plain") |>
+    startsWith("how=='horizontal' is not supported for lazy") |>
+    expect_true()
+
+  # can concat Series
+  expect_identical(
+    pl$concat(1:5,pl$Series(5:1,"b"), how = "horizontal")$to_list(),
+    list(1:5,b=5:1)
+  )
+
+
+  # diagonal eager
   df_dia = pl$concat(l_hor, how = "diagonal")
-  expect_equal(df_dia$shape, c(50, 20))
-  expect_equal(mean(is.na(df_dia$to_data_frame())), 9 / 10)
+  expect_equal(df_dia$shape, c(25, 10))
+  expect_equal(mean(is.na(df_dia$to_data_frame())), 8 / 10)
+
+  #diagonal lazy
+  lf_dia = pl$concat(l_hor |> lapply(pl$LazyFrame), how = "diagonal")
+  expect_identical(
+    lf_dia$collect()$to_list(),
+    df_dia$to_list()
+  )
+
 })
diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R
index 6cf7293ee..073f81df0 100644
--- a/tests/testthat/test-dataframe.R
+++ b/tests/testthat/test-dataframe.R
@@ -775,6 +775,25 @@ test_that("join_asof_simple", {
   )
 })
 
+test_that("n_chunks", {
+
+  df = pl$concat(
+    1:10,
+    pl$concat(1:5,1:5, rechunk = FALSE, how = "vertical")$rename("b"),
+    how = "horizontal"
+  )
+
+  expect_identical( df$n_chunks(),  c(1,2))
+  expect_identical( df$n_chunks("first"),  c(1))
+  expect_identical( pl$DataFrame()$n_chunks(),  numeric())
+  expect_identical( pl$DataFrame()$n_chunks("first"),  numeric())
+
+   pl$DataFrame()$n_chunks("wrong strat") |>
+     get_err_ctx("Plain") |>
+     grepl(pat = "strategy") |>
+     expect_true()
+})
+
 
 test_that("melt example", {
   df = pl$DataFrame(

From 3f8ef34a3b8f09b329df447f24f390ff7b93cfef Mon Sep 17 00:00:00 2001
From: sorhawell <sorhawell@gmail.com>
Date: Sun, 1 Oct 2023 23:33:07 +0300
Subject: [PATCH 04/14] fmt

---
 R/dataframe__frame.R            |  2 +-
 R/error__rpolarserr.R           |  6 ++--
 R/expr__meta.R                  |  3 +-
 R/functions__eager.R            | 60 ++++++++++++++++-----------------
 R/lazyframe__lazy.R             | 12 +++----
 R/utils.R                       |  6 ++--
 tests/testthat/test-concat.R    | 34 +++++++++----------
 tests/testthat/test-dataframe.R | 19 +++++------
 tests/testthat/test-expr_arr.R  |  1 -
 9 files changed, 67 insertions(+), 76 deletions(-)

diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R
index 9058074cb..db998bd31 100644
--- a/R/dataframe__frame.R
+++ b/R/dataframe__frame.R
@@ -1065,7 +1065,7 @@ DataFrame_first = function() {
 #' @examples
 #' df = pl$concat(
 #'   1:10,
-#'   pl$concat(1:5,1:5, rechunk = FALSE, how = "vertical")$rename("b"),
+#'   pl$concat(1:5, 1:5, rechunk = FALSE, how = "vertical")$rename("b"),
 #'   how = "horizontal"
 #' )
 #' df
diff --git a/R/error__rpolarserr.R b/R/error__rpolarserr.R
index 79fa27c68..b851e3bb9 100644
--- a/R/error__rpolarserr.R
+++ b/R/error__rpolarserr.R
@@ -54,16 +54,16 @@ bad_robj = function(r) {
 }
 
 Err_plain = function(...) {
-  Err(.pr$RPolarsErr$new()$plain(paste(..., collapse=" ")))
+  Err(.pr$RPolarsErr$new()$plain(paste(..., collapse = " ")))
 }
 
 # short hand for extracting an error context in unit testing, will raise error if not an RPolarsErr
 get_err_ctx = \(x, select = NULL) {
   ctx = unwrap_err(result(x))$contexts()
-  if(is.null(select)) {
+  if (is.null(select)) {
     ctx
   } else {
-    ctx[[match.arg(select,names(ctx))]]
+    ctx[[match.arg(select, names(ctx))]]
   }
 }
 
diff --git a/R/expr__meta.R b/R/expr__meta.R
index 76a6e7d8d..d88f7026c 100644
--- a/R/expr__meta.R
+++ b/R/expr__meta.R
@@ -166,9 +166,8 @@ ExprMeta_is_regex_projection = function() {
 #' @examples
 #' my_expr = (pl$col("foo") * pl$col("bar"))$sum()$over(pl$col("ham")) / 2
 #' my_expr$meta$tree_format()
-
 ExprMeta_tree_format = function(return_as_string = FALSE) {
-  out <- .pr$Expr$meta_tree_format(self) |>
+  out = .pr$Expr$meta_tree_format(self) |>
     unwrap("in $tree_format():")
   if (isTRUE(return_as_string)) {
     out
diff --git a/R/functions__eager.R b/R/functions__eager.R
index c673b9fd8..a92a5ec62 100644
--- a/R/functions__eager.R
+++ b/R/functions__eager.R
@@ -40,32 +40,32 @@
 pl$concat = function(
     ..., # list of DataFrames or Series or lazyFrames or expr
     rechunk = TRUE,
-    how = c("vertical", "horizontal", "diagonal"),#, "vertical_relaxed","diangonal_relaxed"),
+    how = c("vertical", "horizontal", "diagonal"), # , "vertical_relaxed","diangonal_relaxed"),
     parallel = TRUE,
-    #eager = FALSE,
-    to_supertypes = FALSE
-    ) {
-  if(exists("do_browse", .GlobalEnv) && do_browse) browser()
+    # eager = FALSE,
+    to_supertypes = FALSE) {
+  if (exists("do_browse", .GlobalEnv) && do_browse) browser()
 
-  #unpack arg list
+  # unpack arg list
   l = unpack_list(..., skip_classes = "data.frame")
 
   ## Check inputs
-  how_args =  c("vertical", "horizontal", "diagonal")#, "vertical_relaxed", "diangonal_relaxed")
+  how_args = c("vertical", "horizontal", "diagonal") # , "vertical_relaxed", "diangonal_relaxed")
 
   how = match.arg(how[1L], how_args) |>
     result() |>
     unwrap("in pl$concat()")
 
   first = l[[1L]]
-  eager = !inherits(first,"LazyFrame")
+  eager = !inherits(first, "LazyFrame")
   args_modified = names(as.list(sys.call()[-1L]))
 
   # dispatch on item class and how
 
   Result_out = pcase(
-    how == "vertical" && (inherits(first, "Series") || is.vector(first)), {
-      if(any(args_modified %in% c("parallel"))) {
+    how == "vertical" && (inherits(first, "Series") || is.vector(first)),
+    {
+      if (any(args_modified %in% c("parallel"))) {
         warning(
           "in pl:concat(): args: parallel takes no effect when concatenating Series",
           call. = FALSE
@@ -73,10 +73,11 @@ pl$concat = function(
       }
       concat_series(l, rechunk, to_supertypes)
     },
-
-    how == "vertical",  concat_lf(l, rechunk, parallel, to_supertypes),
-    how == "diagonal",  {
-      if(any(args_modified %in% c("to_supertypes"))) {
+    how == "vertical",
+    concat_lf(l, rechunk, parallel, to_supertypes),
+    how == "diagonal",
+    {
+      if (any(args_modified %in% c("to_supertypes"))) {
         warning(
           "Args to_supertypes",
           "takes no effect for how=='diagonal'",
@@ -85,17 +86,17 @@ pl$concat = function(
       }
       diag_concat_lf(l, rechunk, parallel)
     },
-
-    how == "horizontal" && !eager, {
-       Err_plain(
-         "how=='horizontal' is not supported for lazy (first element is LazyFrame).",
-         "Try e.g. <LazyFrame>$join() to get Lazy join or pl$concat(lf1$collect(),lf2,lf3).",
-         "to get a eager horizontal concatenation"
-        )
+    how == "horizontal" && !eager,
+    {
+      Err_plain(
+        "how=='horizontal' is not supported for lazy (first element is LazyFrame).",
+        "Try e.g. <LazyFrame>$join() to get Lazy join or pl$concat(lf1$collect(),lf2,lf3).",
+        "to get a eager horizontal concatenation"
+      )
     },
-
-    how == "horizontal", {
-      if(any(args_modified %in% c("parallel", "to_supertypes"))) {
+    how == "horizontal",
+    {
+      if (any(args_modified %in% c("parallel", "to_supertypes"))) {
         warning(
           "Args parallel, rechunk, eager and to_supertypes",
           "takes no effect for how=='horizontal'",
@@ -107,17 +108,15 @@ pl$concat = function(
 
     # TODO implement Series, Expr, Lazy etc
     or_else = Err_plain("internal error:", how, "not handled")
-
   )
 
-  #convert back from lazy if eager
+  # convert back from lazy if eager
   and_then(Result_out, \(x) {
     pcase(
-      inherits(x,"DataFrame") && !eager, Err_plain("internal logical error in pl$concat()"),
-      inherits(x,"LazyFrame") &&  eager, Ok(x$collect()),
+      inherits(x, "DataFrame") && !eager, Err_plain("internal logical error in pl$concat()"),
+      inherits(x, "LazyFrame") && eager, Ok(x$collect()),
       or_else = Ok(x)
     )
-
   }) |> unwrap(
     "in pl$concat()"
   )
@@ -185,8 +184,7 @@ pl$date_range = function(
     name = NULL, # : str | None = None,
     time_unit = "us",
     time_zone = NULL, # : str | None = None
-    explode = TRUE
-    ) {
+    explode = TRUE) {
   if (missing(end)) {
     end = start
     interval = "1h"
diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R
index f701ebde1..274af118a 100644
--- a/R/lazyframe__lazy.R
+++ b/R/lazyframe__lazy.R
@@ -353,7 +353,6 @@ LazyFrame_collect = function(
       streaming
     ) |>
     and_then(collect_f) |>
-
     unwrap("in $collect():")
 }
 
@@ -1320,10 +1319,10 @@ LazyFrame_clone = function() {
 #'   b = c("one", "two", "three", "four", "five"),
 #'   c = 6:10
 #' )$
-#'  select(
-#'    pl$col("b")$to_struct(),
-#'    pl$col("a", "c")$to_struct()$alias("a_and_c")
-#'  )
+#'   select(
+#'   pl$col("b")$to_struct(),
+#'   pl$col("a", "c")$to_struct()$alias("a_and_c")
+#' )
 #' lf$collect()
 #'
 #' # by default, all struct columns are unnested
@@ -1331,10 +1330,9 @@ LazyFrame_clone = function() {
 #'
 #' # we can specify specific columns to unnest
 #' lf$unnest("a_and_c")$collect()
-
 LazyFrame_unnest = function(names = NULL) {
   if (is.null(names)) {
-    names <- names(which(dtypes_are_struct(.pr$LazyFrame$schema(self)$ok)))
+    names = names(which(dtypes_are_struct(.pr$LazyFrame$schema(self)$ok)))
   }
   unwrap(.pr$LazyFrame$unnest(self, names), "in $unnest():")
 }
diff --git a/R/utils.R b/R/utils.R
index 8aea0aada..8f18924cc 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -104,12 +104,12 @@ list2 = list
 #' f = \(...) unpack_list(list(...))
 #' identical(f(list(1L, 2L, 3L)), f(1L, 2L, 3L)) # is TRUE
 #' identical(f(list(1L, 2L), 3L), f(1L, 2L, 3L)) # is FALSE
-unpack_list = function(..., skip_classes=NULL) {
+unpack_list = function(..., skip_classes = NULL) {
   l = list2(...)
   if (
     length(l) == 1L &&
-    is.list(l[[1L]]) &&
-    !( !is.null(skip_classes) && inherits(l[[1L]],skip_classes) )
+      is.list(l[[1L]]) &&
+      !(!is.null(skip_classes) && inherits(l[[1L]], skip_classes))
   ) {
     l[[1L]]
   } else {
diff --git a/tests/testthat/test-concat.R b/tests/testthat/test-concat.R
index ffdb83836..8851a6fc3 100644
--- a/tests/testthat/test-concat.R
+++ b/tests/testthat/test-concat.R
@@ -1,5 +1,4 @@
 test_that("concat dataframe", {
-
   # vertical dfs
   l_ver = lapply(1:3, function(i) {
     l_internal = list(
@@ -17,34 +16,34 @@ test_that("concat dataframe", {
   )
 
   # unpack args allowed
-  df_ver_2 = pl$concat(l_ver[[1L]],l_ver[[2L]],l_ver[[3L]], how="vertical")
-  expect_identical(df_ver$to_list(),df_ver_2$to_list())
+  df_ver_2 = pl$concat(l_ver[[1L]], l_ver[[2L]], l_ver[[3L]], how = "vertical")
+  expect_identical(df_ver$to_list(), df_ver_2$to_list())
 
   # use supertypes
   expect_identical(
-    pl$concat(l_ver[[1L]],pl$DataFrame(a=2,b=42L), how="vertical",to_supertypes = TRUE)$to_list(),
+    pl$concat(l_ver[[1L]], pl$DataFrame(a = 2, b = 42L), how = "vertical", to_supertypes = TRUE)$to_list(),
     pl$DataFrame(rbind(data.frame(a = 1:5, b = letters[1:5]), data.frame(a = 2, b = 42L)))$to_list()
   )
 
   # type 'relaxed' vertical concatenation is not allowed by default
   expect_true(
-      pl$concat(l_ver[[1L]],pl$DataFrame(a=2,b=42L), how="vertical") |>
-        get_err_ctx() |>
-        (\(ctx) ctx$PolarsError)() |>
-        grepl(pat = "dtypes for column", fixed = TRUE)
+    pl$concat(l_ver[[1L]], pl$DataFrame(a = 2, b = 42L), how = "vertical") |>
+      get_err_ctx() |>
+      (\(ctx) ctx$PolarsError)() |>
+      grepl(pat = "dtypes for column", fixed = TRUE)
   )
 
 
-  #check lazy eager is identical
+  # check lazy eager is identical
   l_ver_lazy = lapply(l_ver, \(df) df$lazy())
   expect_identical(
     pl$concat(l_ver_lazy)$collect()$to_list(),
     pl$concat(l_ver)$to_list()
   )
 
-  #check rechunk works
-  expect_identical( pl$concat(mtcars, mtcars, rechunk = TRUE)$n_chunks(), rep(1, 11))
-  expect_identical( pl$concat(mtcars, mtcars, rechunk = FALSE)$n_chunks(), rep(2, 11))
+  # check rechunk works
+  expect_identical(pl$concat(mtcars, mtcars, rechunk = TRUE)$n_chunks(), rep(1, 11))
+  expect_identical(pl$concat(mtcars, mtcars, rechunk = FALSE)$n_chunks(), rep(2, 11))
 
 
 
@@ -63,15 +62,15 @@ test_that("concat dataframe", {
     do.call(cbind, lapply(l_hor, function(df) df$to_data_frame()))
   )
 
-  pl$concat(pl$LazyFrame(a=1:3), how = "horizontal") |>
-    get_err_ctx( "Plain") |>
+  pl$concat(pl$LazyFrame(a = 1:3), how = "horizontal") |>
+    get_err_ctx("Plain") |>
     startsWith("how=='horizontal' is not supported for lazy") |>
     expect_true()
 
   # can concat Series
   expect_identical(
-    pl$concat(1:5,pl$Series(5:1,"b"), how = "horizontal")$to_list(),
-    list(1:5,b=5:1)
+    pl$concat(1:5, pl$Series(5:1, "b"), how = "horizontal")$to_list(),
+    list(1:5, b = 5:1)
   )
 
 
@@ -80,11 +79,10 @@ test_that("concat dataframe", {
   expect_equal(df_dia$shape, c(25, 10))
   expect_equal(mean(is.na(df_dia$to_data_frame())), 8 / 10)
 
-  #diagonal lazy
+  # diagonal lazy
   lf_dia = pl$concat(l_hor |> lapply(pl$LazyFrame), how = "diagonal")
   expect_identical(
     lf_dia$collect()$to_list(),
     df_dia$to_list()
   )
-
 })
diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R
index 073f81df0..cd2e8a11c 100644
--- a/tests/testthat/test-dataframe.R
+++ b/tests/testthat/test-dataframe.R
@@ -776,22 +776,21 @@ test_that("join_asof_simple", {
 })
 
 test_that("n_chunks", {
-
   df = pl$concat(
     1:10,
-    pl$concat(1:5,1:5, rechunk = FALSE, how = "vertical")$rename("b"),
+    pl$concat(1:5, 1:5, rechunk = FALSE, how = "vertical")$rename("b"),
     how = "horizontal"
   )
 
-  expect_identical( df$n_chunks(),  c(1,2))
-  expect_identical( df$n_chunks("first"),  c(1))
-  expect_identical( pl$DataFrame()$n_chunks(),  numeric())
-  expect_identical( pl$DataFrame()$n_chunks("first"),  numeric())
+  expect_identical(df$n_chunks(), c(1, 2))
+  expect_identical(df$n_chunks("first"), c(1))
+  expect_identical(pl$DataFrame()$n_chunks(), numeric())
+  expect_identical(pl$DataFrame()$n_chunks("first"), numeric())
 
-   pl$DataFrame()$n_chunks("wrong strat") |>
-     get_err_ctx("Plain") |>
-     grepl(pat = "strategy") |>
-     expect_true()
+  pl$DataFrame()$n_chunks("wrong strat") |>
+    get_err_ctx("Plain") |>
+    grepl(pat = "strategy") |>
+    expect_true()
 })
 
 
diff --git a/tests/testthat/test-expr_arr.R b/tests/testthat/test-expr_arr.R
index e11138021..fc4d26c31 100644
--- a/tests/testthat/test-expr_arr.R
+++ b/tests/testthat/test-expr_arr.R
@@ -436,4 +436,3 @@ test_that("eval", {
     )
   )
 })
-

From 1336f502c67946375f2ee003204b10649c9a302f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=B8ren=20Havelund=20Welling?= <sorhawell@gmail.com>
Date: Mon, 2 Oct 2023 17:36:33 +0300
Subject: [PATCH 05/14] Apply suggestions from code review

Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
---
 R/functions__eager.R | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/R/functions__eager.R b/R/functions__eager.R
index a92a5ec62..4a4ab45e7 100644
--- a/R/functions__eager.R
+++ b/R/functions__eager.R
@@ -44,7 +44,6 @@ pl$concat = function(
     parallel = TRUE,
     # eager = FALSE,
     to_supertypes = FALSE) {
-  if (exists("do_browse", .GlobalEnv) && do_browse) browser()
 
   # unpack arg list
   l = unpack_list(..., skip_classes = "data.frame")
@@ -67,7 +66,7 @@ pl$concat = function(
     {
       if (any(args_modified %in% c("parallel"))) {
         warning(
-          "in pl:concat(): args: parallel takes no effect when concatenating Series",
+          "in pl$concat(): argument `parallel` is not used when concatenating Series",
           call. = FALSE
         )
       }
@@ -79,8 +78,7 @@ pl$concat = function(
     {
       if (any(args_modified %in% c("to_supertypes"))) {
         warning(
-          "Args to_supertypes",
-          "takes no effect for how=='diagonal'",
+          "Argument `to_supertypes` is not used when how=='diagonal'",
           call. = FALSE
         )
       }
@@ -90,7 +88,7 @@ pl$concat = function(
     {
       Err_plain(
         "how=='horizontal' is not supported for lazy (first element is LazyFrame).",
-        "Try e.g. <LazyFrame>$join() to get Lazy join or pl$concat(lf1$collect(),lf2,lf3).",
+        "Try e.g. <LazyFrame>$join() to get Lazy join or pl$concat(lf1$collect(), lf2, lf3).",
         "to get a eager horizontal concatenation"
       )
     },
@@ -98,8 +96,7 @@ pl$concat = function(
     {
       if (any(args_modified %in% c("parallel", "to_supertypes"))) {
         warning(
-          "Args parallel, rechunk, eager and to_supertypes",
-          "takes no effect for how=='horizontal'",
+          "Arguments `parallel`, `rechunk`, `eager` and `to_supertypes` are not used when how=='horizontal'",
           call. = FALSE
         )
       }

From 7ecd49cc6efc6463c9bb1fcb1d5dc81cbd15bfdf Mon Sep 17 00:00:00 2001
From: sorhawell <sorhawell@gmail.com>
Date: Tue, 3 Oct 2023 11:19:19 +0300
Subject: [PATCH 06/14] add also rechunk improve docs

---
 R/dataframe__frame.R      | 115 +++++++++++++++++++++++++++++++++++++-
 R/extendr-wrappers.R      |   2 +
 man/DataFrame_n_chunks.Rd |  55 ++++++++++++++++--
 man/DataFrame_rechunk.Rd  |  72 ++++++++++++++++++++++++
 4 files changed, 237 insertions(+), 7 deletions(-)
 create mode 100644 man/DataFrame_rechunk.Rd

diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R
index 0340a4647..e01f10365 100644
--- a/R/dataframe__frame.R
+++ b/R/dataframe__frame.R
@@ -1034,24 +1034,133 @@ DataFrame_first = function() {
 }
 
 
-#' @title Get the number of chunks of the Series' in a DataFrame
+#' @title Number of chunks of the Series' in a DataFrame
+#' @description Number of chunks (mem allocations) for all or first Series' in a DataFrame.
+#' @details
+#' DataFrame is a vector of Series'. Each Series in rust-polars is a wrapper around a ChunkedArray,
+#' which is like a virtual contiguous vector, which is physically backed by an ordered set of
+#' chunks. Each chunk of values has a contiguous memory layout and is an arrow array. Arrow arrays
+#' are a fast, thread-safe and cross-platform  memory layout.
+#'
+#' In R when combining with `c()` or `rbind()`, it requires immediate vector re-allocation to place
+#' vector values in contiguous memory, hence repeated appending to a vector in R is discouraged
+#' because slow.
+#'
+#' When concatenating/appending to Series' or DataFrame' re-allocation can be avoided or delayed,
+#' by simply appending chunks to each individual Series.
+#'
+#' However, if chunks become many and small or are misaligned across Series, this can hurt the
+#' performance of following operations.
+#'
+#' Most places in the polars api where chunking could occur, the user have to
+#' typically actively opt-out by setting and arg `rechunk = FALSE`. Thus it is not normal
+#'
 #' @keywords DataFrame
 #' @param strategy string either 'all' or 'first'. 'first' only returns chunks for first Series.
 #' @return real vector of chunk counts per Series.
+#' @seealso [`<DataFrame>$rechunk()`][DataFrame_rechunk]
 #' @examples
+#' # create DataFrame with miss aligned chunks
 #' df = pl$concat(
-#'   1:10,
-#'   pl$concat(1:5, 1:5, rechunk = FALSE, how = "vertical")$rename("b"),
+#'   1:10, # single chunk
+#'   pl$concat(1:5, 1:5, rechunk = FALSE, how = "vertical")$rename("b"), # two chunks
 #'   how = "horizontal"
 #' )
 #' df
 #' df$n_chunks()
+#'
+#' # rechunk a chunked DataFrame
+#' df$rechunk()$n_chunks()
+#'
+#' # rechunk is not an in-place operation
+#' df$n_chunks()
+#'
+#' # The following toy example emulates the Series "chunkyness" in R. Here it a S3-classed list with
+#' # same type of vectors and where have all relevant S3 generics implemented to make behave as if
+#' # it was a regular vector.
+#' "+.chunked_vector" = \(x, y) structure(list(unlist(x) + unlist(y)), class = "chunked_vector")
+#' print.chunked_vector = \(x, ...) print(unlist(x), ...)
+#' c.chunked_vector = \(...) structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
+#' rechunk = \(x) structure(unlist(x), class = "chunked_vector")
+#' x = structure(list(1:4, 5L), class = "chunked_vector")
+#' x
+#' x + 5:1
+#' lapply(x, tracemem) # trace chunks to verify no re-allocation
+#' z = c(x, x)
+#' z # looks like a plain vector
+#' lapply(z, tracemem) # mem allocation  in z are the same from x
+#' str(z)
+#' z = rechunk(z)
+#' str(z)
 DataFrame_n_chunks = function(strategy = "all") {
   .pr$DataFrame$n_chunks(self, strategy) |>
     unwrap("in n_chunks():")
 }
 
 
+#' @title Rechunk a DataFrame
+#' @description Rechunking re-allocates any "chunked" memory allocations to speed-up e.g. vectorized
+#' operations.
+#' @details
+#' DataFrame is a vector of Series'. Each Series in rust-polars is a wrapper around a ChunkedArray,
+#' which is like a virtual contiguous vector, which is physically backed by an ordered set of
+#' chunks. Each chunk of values has a contiguous memory layout and is an arrow array. Arrow arrays
+#' are a fast, thread-safe and cross-platform  memory layout.
+#'
+#' In R when combining with `c()` or `rbind()`, it requires immediate vector re-allocation to place
+#' vector values in contiguous memory, hence repeated appending to a vector in R is discouraged
+#' because slow.
+#'
+#' When concatenating/appending to Series' or DataFrame' re-allocation can be avoided or delayed,
+#' by simply appending chunks to each individual Series.
+#'
+#' However, if chunks become many and small or are misaligned across Series, this can hurt the
+#' performance of following operations.
+#'
+#' Most places in the polars api where chunking could occur, the user have to
+#' typically actively opt-out by setting and arg `rechunk = FALSE`. Thus it is not normal
+#'
+#' @keywords DataFrame
+#' @return real vector of chunk counts per Series.
+#' @seealso [`<DataFrame>$n_chunks()`][DataFrame_n_chunks]
+#' @examples
+#' # create DataFrame with miss alligned chunks
+#' df = pl$concat(
+#'   1:10, # single chunk
+#'   pl$concat(1:5, 1:5, rechunk = FALSE, how = "vertical")$rename("b"), # two chunks
+#'   how = "horizontal"
+#' )
+#' df
+#' df$n_chunks()
+#'
+#' # rechunk a chunked DataFrame
+#' df$rechunk()$n_chunks()
+#'
+#' # rechunk is not an in-place operation
+#' df$n_chunks()
+#'
+#' # The following toy example emulates the Series "chunkyness" in R. Here it a S3-classed list with
+#' # same type of vectors and where have all relevant S3 generics implemented to make behave as if
+#' # it was a regular vector.
+#' "+.chunked_vector" = \(x, y) structure(list(unlist(x) + unlist(y)), class = "chunked_vector")
+#' print.chunked_vector = \(x, ...) print(unlist(x), ...)
+#' c.chunked_vector = \(...) structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
+#' rechunk = \(x) structure(unlist(x), class = "chunked_vector")
+#' x = structure(list(1:4, 5L), class = "chunked_vector")
+#' x
+#' x + 5:1
+#' lapply(x, tracemem) # trace chunks to verify no re-allocation
+#' z = c(x, x)
+#' z # looks like a plain vector
+#' lapply(z, tracemem) # mem allocation  in z are the same from x
+#' str(z)
+#' z = rechunk(z)
+#' str(z)
+DataFrame_rechunk = function() {
+  .pr$DataFrame$rechunk(self)
+}
+
+
 #' @title Get the last row of the DataFrame.
 #' @keywords DataFrame
 #' @return A DataFrame with one row.
diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R
index c76e32d63..0be24bfc1 100644
--- a/R/extendr-wrappers.R
+++ b/R/extendr-wrappers.R
@@ -115,6 +115,8 @@ DataFrame$shape <- function() .Call(wrap__DataFrame__shape, self)
 
 DataFrame$n_chunks <- function(strategy) .Call(wrap__DataFrame__n_chunks, self, strategy)
 
+DataFrame$rechunk <- function() .Call(wrap__DataFrame__rechunk, self)
+
 DataFrame$clone_see_me_macro <- function() .Call(wrap__DataFrame__clone_see_me_macro, self)
 
 DataFrame$default <- function() .Call(wrap__DataFrame__default)
diff --git a/man/DataFrame_n_chunks.Rd b/man/DataFrame_n_chunks.Rd
index 7320844d7..741043e79 100644
--- a/man/DataFrame_n_chunks.Rd
+++ b/man/DataFrame_n_chunks.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/dataframe__frame.R
 \name{DataFrame_n_chunks}
 \alias{DataFrame_n_chunks}
-\title{Get the number of chunks of the Series' in a DataFrame}
+\title{Number of chunks of the Series' in a DataFrame}
 \usage{
 DataFrame_n_chunks(strategy = "all")
 }
@@ -13,15 +13,62 @@ DataFrame_n_chunks(strategy = "all")
 real vector of chunk counts per Series.
 }
 \description{
-Get the number of chunks of the Series' in a DataFrame
+Number of chunks (mem allocations) for all or first Series' in a DataFrame.
+}
+\details{
+DataFrame is a vector of Series'. Each Series in rust-polars is a wrapper around a ChunkedArray,
+which is like a virtual contiguous vector, which is physically backed by an ordered set of
+chunks. Each chunk of values has a contiguous memory layout and is an arrow array. Arrow arrays
+are a fast, thread-safe and cross-platform  memory layout.
+
+In R when combining with \code{c()} or \code{rbind()}, it requires immediate vector re-allocation to place
+vector values in contiguous memory, hence repeated appending to a vector in R is discouraged
+because slow.
+
+When concatenating/appending to Series' or DataFrame' re-allocation can be avoided or delayed,
+by simply appending chunks to each individual Series.
+
+However, if chunks become many and small or are misaligned across Series, this can hurt the
+performance of following operations.
+
+Most places in the polars api where chunking could occur, the user have to
+typically actively opt-out by setting and arg \code{rechunk = FALSE}. Thus it is not normal
 }
 \examples{
+# create DataFrame with miss aligned chunks
 df = pl$concat(
-  1:10,
-  pl$concat(1:5,1:5, rechunk = FALSE, how = "vertical")$rename("b"),
+  1:10, # single chunk
+  pl$concat(1:5, 1:5, rechunk = FALSE, how = "vertical")$rename("b"), # two chunks
   how = "horizontal"
 )
 df
 df$n_chunks()
+
+# rechunk a chunked DataFrame
+df$rechunk()$n_chunks()
+
+# rechunk is not an in-place operation
+df$n_chunks()
+
+# The following toy example emulates the Series "chunkyness" in R. Here it a S3-classed list with
+# same type of vectors and where have all relevant S3 generics implemented to make behave as if
+# it was a regular vector.
+'+.chunked_vector' = \(x, y) structure(list(unlist(x) + unlist(y)),class = "chunked_vector")
+print.chunked_vector = \(x, ...) print(unlist(x), ...)
+c.chunked_vector = \(...) structure(do.call(c,lapply(list(...),unclass)),class="chunked_vector")
+rechunk = \(x) structure(unlist(x),class = "chunked_vector")
+x = structure(list(1:4, 5L),class = "chunked_vector")
+x
+x + 5:1
+lapply(x, tracemem) # trace chunks to verify no re-allocation
+z = c(x, x)
+z # looks like a plain vector
+lapply(z, tracemem) # mem allocation  in z are the same from x
+str(z)
+z = rechunk(z)
+str(z)
+}
+\seealso{
+\code{\link[=DataFrame_rechunk]{<DataFrame>$rechunk()}}
 }
 \keyword{DataFrame}
diff --git a/man/DataFrame_rechunk.Rd b/man/DataFrame_rechunk.Rd
new file mode 100644
index 000000000..3f8c27a3d
--- /dev/null
+++ b/man/DataFrame_rechunk.Rd
@@ -0,0 +1,72 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataframe__frame.R
+\name{DataFrame_rechunk}
+\alias{DataFrame_rechunk}
+\title{Rechunk a DataFrame}
+\usage{
+DataFrame_rechunk()
+}
+\value{
+real vector of chunk counts per Series.
+}
+\description{
+Rechunking re-allocates any "chunked" memory allocations to speed-up e.g. vectorized
+operations.
+}
+\details{
+DataFrame is a vector of Series'. Each Series in rust-polars is a wrapper around a ChunkedArray,
+which is like a virtual contiguous vector, which is physically backed by an ordered set of
+chunks. Each chunk of values has a contiguous memory layout and is an arrow array. Arrow arrays
+are a fast, thread-safe and cross-platform  memory layout.
+
+In R when combining with \code{c()} or \code{rbind()}, it requires immediate vector re-allocation to place
+vector values in contiguous memory, hence repeated appending to a vector in R is discouraged
+because slow.
+
+When concatenating/appending to Series' or DataFrame' re-allocation can be avoided or delayed,
+by simply appending chunks to each individual Series.
+
+However, if chunks become many and small or are misaligned across Series, this can hurt the
+performance of following operations.
+
+Most places in the polars api where chunking could occur, the user have to
+typically actively opt-out by setting and arg \code{rechunk = FALSE}. Thus it is not normal
+}
+\examples{
+# create DataFrame with miss alligned chunks
+df = pl$concat(
+  1:10, # single chunk
+  pl$concat(1:5, 1:5, rechunk = FALSE, how = "vertical")$rename("b"), # two chunks
+  how = "horizontal"
+)
+df
+df$n_chunks()
+
+# rechunk a chunked DataFrame
+df$rechunk()$n_chunks()
+
+# rechunk is not an in-place operation
+df$n_chunks()
+
+# The following toy example emulates the Series "chunkyness" in R. Here it a S3-classed list with
+# same type of vectors and where have all relevant S3 generics implemented to make behave as if
+# it was a regular vector.
+'+.chunked_vector' = \(x, y) structure(list(unlist(x) + unlist(y)),class = "chunked_vector")
+print.chunked_vector = \(x, ...) print(unlist(x), ...)
+c.chunked_vector = \(...) structure(do.call(c,lapply(list(...),unclass)),class="chunked_vector")
+rechunk = \(x) structure(unlist(x),class = "chunked_vector")
+x = structure(list(1:4, 5L),class = "chunked_vector")
+x
+x + 5:1
+lapply(x, tracemem) # trace chunks to verify no re-allocation
+z = c(x, x)
+z # looks like a plain vector
+lapply(z, tracemem) # mem allocation  in z are the same from x
+str(z)
+z = rechunk(z)
+str(z)
+}
+\seealso{
+\code{\link[=DataFrame_n_chunks]{<DataFrame>$n_chunks()}}
+}
+\keyword{DataFrame}

From c7f14cd76840f2043e5d9fd15dab5ba4ead6099f Mon Sep 17 00:00:00 2001
From: sorhawell <sorhawell@gmail.com>
Date: Tue, 3 Oct 2023 11:19:50 +0300
Subject: [PATCH 07/14] pl$concat() returns now NULL

---
 R/functions__eager.R | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/R/functions__eager.R b/R/functions__eager.R
index 4a4ab45e7..6b4adcdd8 100644
--- a/R/functions__eager.R
+++ b/R/functions__eager.R
@@ -44,10 +44,14 @@ pl$concat = function(
     parallel = TRUE,
     # eager = FALSE,
     to_supertypes = FALSE) {
-
   # unpack arg list
   l = unpack_list(..., skip_classes = "data.frame")
 
+  # nothing becomes NULL
+  if (length(l) == 0L) {
+    return(NULL)
+  }
+
   ## Check inputs
   how_args = c("vertical", "horizontal", "diagonal") # , "vertical_relaxed", "diangonal_relaxed")
 

From 86875a690818ae9b6a943674d460dbe949b31bff Mon Sep 17 00:00:00 2001
From: sorhawell <sorhawell@gmail.com>
Date: Tue, 3 Oct 2023 11:20:18 +0300
Subject: [PATCH 08/14] rechunk rs

---
 src/rust/src/rdataframe/mod.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs
index 18a73cc88..2e4c77433 100644
--- a/src/rust/src/rdataframe/mod.rs
+++ b/src/rust/src/rdataframe/mod.rs
@@ -104,6 +104,10 @@ impl DataFrame {
         }
     }
 
+    pub fn rechunk(&self) -> Self {
+        self.0.agg_chunks().into()
+    }
+
     //renamed back to clone
     pub fn clone_see_me_macro(&self) -> DataFrame {
         self.clone()

From 67668cf5d25655731d337c068bc6a8f795d58e3f Mon Sep 17 00:00:00 2001
From: sorhawell <sorhawell@gmail.com>
Date: Tue, 3 Oct 2023 11:20:31 +0300
Subject: [PATCH 09/14] fmt

---
 R/dataframe__frame.R            |  5 ++---
 R/lazyframe__lazy.R             |  1 -
 man/LazyFrame_unnest.Rd         |  8 ++++----
 man/as_polars_series.Rd         |  4 ++--
 man/nanoarrow.Rd                |  8 ++++----
 tests/testthat/test-dataframe.R |  2 +-
 tests/testthat/test-lazy.R      | 12 ++++++------
 7 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R
index e01f10365..0efcd191d 100644
--- a/R/dataframe__frame.R
+++ b/R/dataframe__frame.R
@@ -133,7 +133,6 @@ DataFrame
 #'
 #' # custom schema
 #' pl$DataFrame(iris, schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8))
-
 pl$DataFrame = function(..., make_names_unique = TRUE, schema = NULL) {
   largs = unpack_list(...)
 
@@ -181,9 +180,9 @@ pl$DataFrame = function(..., make_names_unique = TRUE, schema = NULL) {
     names(largs) = keys
     lapply(seq_along(largs), \(x) {
       varname = keys[x]
-      out <- pl$lit(largs[[x]])
+      out = pl$lit(largs[[x]])
       if (!is.null(schema) && varname %in% names(schema)) {
-        out <- out$cast(schema[[varname]], strict = TRUE)
+        out = out$cast(schema[[varname]], strict = TRUE)
       }
       out$alias(varname)
     }) |>
diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R
index ad844460a..6b556b001 100644
--- a/R/lazyframe__lazy.R
+++ b/R/lazyframe__lazy.R
@@ -146,7 +146,6 @@ LazyFrame
 #'   iris,
 #'   schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8)
 #' )$collect()
-
 pl$LazyFrame = function(...) {
   pl$DataFrame(...)$lazy()
 }
diff --git a/man/LazyFrame_unnest.Rd b/man/LazyFrame_unnest.Rd
index d8414d15a..0f884bbdb 100644
--- a/man/LazyFrame_unnest.Rd
+++ b/man/LazyFrame_unnest.Rd
@@ -23,10 +23,10 @@ lf = pl$LazyFrame(
   b = c("one", "two", "three", "four", "five"),
   c = 6:10
 )$
- select(
-   pl$col("b")$to_struct(),
-   pl$col("a", "c")$to_struct()$alias("a_and_c")
- )
+  select(
+  pl$col("b")$to_struct(),
+  pl$col("a", "c")$to_struct()$alias("a_and_c")
+)
 lf$collect()
 
 # by default, all struct columns are unnested
diff --git a/man/as_polars_series.Rd b/man/as_polars_series.Rd
index b835e0ca8..2081b9eb4 100644
--- a/man/as_polars_series.Rd
+++ b/man/as_polars_series.Rd
@@ -21,9 +21,9 @@ Internal generic method to convert some Robj into Series
 
 pl$Series(1:5)
 
-#warning this method makes polars very useless
+# warning this method makes polars very useless
 as_polars_series.numeric = function(x, ...) {
-  head(x,3)
+  head(x, 3)
 }
 
 pl$Series(1:5)
diff --git a/man/nanoarrow.Rd b/man/nanoarrow.Rd
index 7af2018a2..3ecaf02a4 100644
--- a/man/nanoarrow.Rd
+++ b/man/nanoarrow.Rd
@@ -16,13 +16,13 @@
 \alias{as_record_batch_reader.DataFrame}
 \title{polars to nanoarrow and arrow}
 \usage{
-as_nanoarrow_array_stream.DataFrame(x, ..., schema = NULL)
+\method{as_nanoarrow_array_stream}{DataFrame}(x, ..., schema = NULL)
 
-infer_nanoarrow_schema.DataFrame(x, ...)
+\method{infer_nanoarrow_schema}{DataFrame}(x, ...)
 
-as_arrow_table.DataFrame(x, ...)
+\method{as_arrow_table}{DataFrame}(x, ...)
 
-as_record_batch_reader.DataFrame(x, ..., schema = NULL)
+\method{as_record_batch_reader}{DataFrame}(x, ..., schema = NULL)
 }
 \arguments{
 \item{x}{a polars DataFrame}
diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R
index 6bc9a5551..4b159916c 100644
--- a/tests/testthat/test-dataframe.R
+++ b/tests/testthat/test-dataframe.R
@@ -158,7 +158,7 @@ test_that("DataFrame, custom schema", {
 
   # works fine if a variable is called "schema"
   expect_no_error(
-   pl$DataFrame(list(schema = 1), schema = list(schema = pl$Float32))
+    pl$DataFrame(list(schema = 1), schema = list(schema = pl$Float32))
   )
   # errors if incorrect datatype
   expect_error(pl$DataFrame(x = 1, schema = list(schema = foo)))
diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R
index 8402c786a..b5688e81b 100644
--- a/tests/testthat/test-lazy.R
+++ b/tests/testthat/test-lazy.R
@@ -774,9 +774,9 @@ test_that("unnest", {
 
   df2 = df$
     select(
-      pl$col("a", "b", "c")$to_struct()$alias("first_struct"),
-      pl$col("d", "e", "f")$to_struct()$alias("second_struct")
-    )
+    pl$col("a", "b", "c")$to_struct()$alias("first_struct"),
+    pl$col("d", "e", "f")$to_struct()$alias("second_struct")
+  )
 
   expect_identical(
     df2$unnest()$collect()$to_data_frame(),
@@ -787,9 +787,9 @@ test_that("unnest", {
     df2$unnest("first_struct")$collect()$to_data_frame(),
     df$
       select(
-        pl$col("a", "b", "c"),
-        pl$col("d", "e", "f")$to_struct()$alias("second_struct")
-      )$
+      pl$col("a", "b", "c"),
+      pl$col("d", "e", "f")$to_struct()$alias("second_struct")
+    )$
       collect()$
       to_data_frame()
   )

From 14fd71fd14f2ef0b1a8cd2a00327bc95a0ef9084 Mon Sep 17 00:00:00 2001
From: sorhawell <sorhawell@gmail.com>
Date: Sat, 7 Oct 2023 01:53:56 +0300
Subject: [PATCH 10/14] disallow following lazy args to a first eager

---
 R/functions__eager.R         | 33 ++++++++++++++++++++++++++-------
 src/rust/src/utils/mod.rs    |  4 ----
 tests/testthat/test-concat.R | 21 ++++++++++++++++++++-
 3 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/R/functions__eager.R b/R/functions__eager.R
index 6b4adcdd8..5344bc621 100644
--- a/R/functions__eager.R
+++ b/R/functions__eager.R
@@ -1,6 +1,9 @@
 #' Concat polars objects
 #' @name pl_concat
-#' @param l list of DataFrame, or Series, LazyFrame or Expr
+#' @param ... individual unpacked args or args wrappend in list(). Args should either be eager
+#' as DataFrame, Series and R vectors, OR lazy as LazyFrame and Expr. If first argument is lazy
+#' the return is LazyFrame, otherwise DataFrame. If returning a DataFrame all args must be eager,
+#' to avoid some implicit collect.
 #' @param rechunk perform a rechunk at last
 #' @param how choice of bind direction "vertical"(rbind) "horizontal"(cbind) "diagonal" diagonally
 #' @param parallel Boolean default TRUE, only used for LazyFrames
@@ -40,9 +43,8 @@
 pl$concat = function(
     ..., # list of DataFrames or Series or lazyFrames or expr
     rechunk = TRUE,
-    how = c("vertical", "horizontal", "diagonal"), # , "vertical_relaxed","diangonal_relaxed"),
+    how = c("vertical", "horizontal", "diagonal"),
     parallel = TRUE,
-    # eager = FALSE,
     to_supertypes = FALSE) {
   # unpack arg list
   l = unpack_list(..., skip_classes = "data.frame")
@@ -63,8 +65,22 @@ pl$concat = function(
   eager = !inherits(first, "LazyFrame")
   args_modified = names(as.list(sys.call()[-1L]))
 
-  # dispatch on item class and how
+  # check not using any mixing of types which could lead to implicit collect
+  if (eager) {
+    for (i in seq_along(l)) {
+      if (inherits(l[[i]], c("LazyFrame", "Expr"))) {
+        .pr$RPolarsErr$new()$
+          plain("tip: explitcit collect lazy inputs first, e.g. pl$concat(dataframe, lazyframe$collect())")$
+          plain("LazyFrame or Expr not allowed if first arg is a DataFrame, to avoid implicit collect")$
+          bad_robj(l[[i]])$
+          bad_arg(paste("of those to concatenate, number", i)) |>
+          Err() |>
+          unwrap("in pl$concat()")
+      }
+    }
+  }
 
+  # dispatch on item class and how
   Result_out = pcase(
     how == "vertical" && (inherits(first, "Series") || is.vector(first)),
     {
@@ -114,13 +130,16 @@ pl$concat = function(
   # convert back from lazy if eager
   and_then(Result_out, \(x) {
     pcase(
+      # run-time assertion for future changes
       inherits(x, "DataFrame") && !eager, Err_plain("internal logical error in pl$concat()"),
+
+      # must collect as in rust side only lazy concat is implemented. Eager inputs are wrapped in
+      # lazy and then collected again. This does not mean any user input is collected.
       inherits(x, "LazyFrame") && eager, Ok(x$collect()),
       or_else = Ok(x)
     )
-  }) |> unwrap(
-    "in pl$concat()"
-  )
+  }) |>
+    unwrap("in pl$concat()")
 }
 
 
diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs
index 8d69f6138..aa28bb6eb 100644
--- a/src/rust/src/utils/mod.rs
+++ b/src/rust/src/utils/mod.rs
@@ -789,10 +789,6 @@ pub fn robj_to_dataframe(robj: extendr_api::Robj) -> RResult<DataFrame> {
                 let extptr_df: ExternalPtr<DataFrame> = robj.try_into()?;
                 Ok(extptr_df.0.clone())
             }
-            _ if robj.inherits("LazyFrame") => {
-                let lf: ExternalPtr<LazyFrame> = robj.try_into()?;
-                lf.0.clone().collect()
-            }
             _ if robj.inherits("data.frame") => {
                 let df = unpack_r_eval(R!("polars:::result(pl$DataFrame({{robj}}))"))?;
                 let extptr_df: ExternalPtr<DataFrame> = df.try_into()?;
diff --git a/tests/testthat/test-concat.R b/tests/testthat/test-concat.R
index 8851a6fc3..90e028d89 100644
--- a/tests/testthat/test-concat.R
+++ b/tests/testthat/test-concat.R
@@ -1,4 +1,24 @@
 test_that("concat dataframe", {
+  # mixing lazy with first eager not allowed
+  ctx = pl$concat(pl$DataFrame(mtcars), pl$LazyFrame(mtcars), how = "vertical") |> get_err_ctx()
+  expect_true(endsWith(ctx$BadArgument, "number 2"))
+  expect_true(endsWith(ctx$PlainErrorMessage, "avoid implicit collect"))
+
+  ctx = pl$concat(pl$DataFrame(mtcars), mtcars$hp, pl$lit(mtcars$mpg), how = "horizontal") |>
+    get_err_ctx()
+  expect_true(endsWith(ctx$BadArgument, "number 3"))
+  expect_true(endsWith(ctx$PlainErrorMessage, "avoid implicit collect"))
+
+  # mixing eager with first lazy is allowd
+  df_ref = rbind(mtcars, mtcars)
+  row.names(df_ref) = 1:64
+  expect_identical(
+    pl$concat(pl$LazyFrame(mtcars), pl$DataFrame(mtcars), how = "vertical")$
+      collect()$
+      to_data_frame(),
+    df_ref
+  )
+
   # vertical dfs
   l_ver = lapply(1:3, function(i) {
     l_internal = list(
@@ -8,7 +28,6 @@ test_that("concat dataframe", {
     pl$DataFrame(l_internal)
   })
 
-
   df_ver = pl$concat(l_ver, how = "vertical")
   expect_equal(
     df_ver$to_data_frame(),

From dd7bedb1ba3a4ffe52b02eb286a809996e63ac8b Mon Sep 17 00:00:00 2001
From: sorhawell <sorhawell@gmail.com>
Date: Sat, 7 Oct 2023 01:54:32 +0300
Subject: [PATCH 11/14] roxygen

---
 man/DataFrame_n_chunks.Rd | 8 ++++----
 man/DataFrame_rechunk.Rd  | 8 ++++----
 man/pl_concat.Rd          | 5 ++++-
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/man/DataFrame_n_chunks.Rd b/man/DataFrame_n_chunks.Rd
index 741043e79..d0f40c3a2 100644
--- a/man/DataFrame_n_chunks.Rd
+++ b/man/DataFrame_n_chunks.Rd
@@ -53,11 +53,11 @@ df$n_chunks()
 # The following toy example emulates the Series "chunkyness" in R. Here it a S3-classed list with
 # same type of vectors and where have all relevant S3 generics implemented to make behave as if
 # it was a regular vector.
-'+.chunked_vector' = \(x, y) structure(list(unlist(x) + unlist(y)),class = "chunked_vector")
+"+.chunked_vector" = \(x, y) structure(list(unlist(x) + unlist(y)), class = "chunked_vector")
 print.chunked_vector = \(x, ...) print(unlist(x), ...)
-c.chunked_vector = \(...) structure(do.call(c,lapply(list(...),unclass)),class="chunked_vector")
-rechunk = \(x) structure(unlist(x),class = "chunked_vector")
-x = structure(list(1:4, 5L),class = "chunked_vector")
+c.chunked_vector = \(...) structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
+rechunk = \(x) structure(unlist(x), class = "chunked_vector")
+x = structure(list(1:4, 5L), class = "chunked_vector")
 x
 x + 5:1
 lapply(x, tracemem) # trace chunks to verify no re-allocation
diff --git a/man/DataFrame_rechunk.Rd b/man/DataFrame_rechunk.Rd
index 3f8c27a3d..a62a752cf 100644
--- a/man/DataFrame_rechunk.Rd
+++ b/man/DataFrame_rechunk.Rd
@@ -51,11 +51,11 @@ df$n_chunks()
 # The following toy example emulates the Series "chunkyness" in R. Here it a S3-classed list with
 # same type of vectors and where have all relevant S3 generics implemented to make behave as if
 # it was a regular vector.
-'+.chunked_vector' = \(x, y) structure(list(unlist(x) + unlist(y)),class = "chunked_vector")
+"+.chunked_vector" = \(x, y) structure(list(unlist(x) + unlist(y)), class = "chunked_vector")
 print.chunked_vector = \(x, ...) print(unlist(x), ...)
-c.chunked_vector = \(...) structure(do.call(c,lapply(list(...),unclass)),class="chunked_vector")
-rechunk = \(x) structure(unlist(x),class = "chunked_vector")
-x = structure(list(1:4, 5L),class = "chunked_vector")
+c.chunked_vector = \(...) structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
+rechunk = \(x) structure(unlist(x), class = "chunked_vector")
+x = structure(list(1:4, 5L), class = "chunked_vector")
 x
 x + 5:1
 lapply(x, tracemem) # trace chunks to verify no re-allocation
diff --git a/man/pl_concat.Rd b/man/pl_concat.Rd
index 732596535..44fe13485 100644
--- a/man/pl_concat.Rd
+++ b/man/pl_concat.Rd
@@ -4,7 +4,10 @@
 \alias{pl_concat}
 \title{Concat polars objects}
 \arguments{
-\item{l}{list of DataFrame, or Series, LazyFrame or Expr}
+\item{...}{individual unpacked args or args wrappend in list(). Args should either be eager
+as DataFrame, Series and R vectors, OR lazy as LazyFrame and Expr. If first argument is lazy
+the return is LazyFrame, otherwise DataFrame. If returning a DataFrame all args must be eager,
+to avoid some implicit collect.}
 
 \item{rechunk}{perform a rechunk at last}
 

From 90d5ec3d8e575b7d4aa30b007154972eb50039a2 Mon Sep 17 00:00:00 2001
From: sorhawell <sorhawell@gmail.com>
Date: Sat, 7 Oct 2023 10:01:32 +0300
Subject: [PATCH 12/14] reduce linewidth to 100 for two examples

---
 R/dataframe__frame.R      | 8 ++++++--
 man/DataFrame_n_chunks.Rd | 4 +++-
 man/DataFrame_rechunk.Rd  | 4 +++-
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R
index 0efcd191d..0bd0e2773 100644
--- a/R/dataframe__frame.R
+++ b/R/dataframe__frame.R
@@ -1079,7 +1079,9 @@ DataFrame_first = function() {
 #' # it was a regular vector.
 #' "+.chunked_vector" = \(x, y) structure(list(unlist(x) + unlist(y)), class = "chunked_vector")
 #' print.chunked_vector = \(x, ...) print(unlist(x), ...)
-#' c.chunked_vector = \(...) structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
+#' c.chunked_vector = \(...) {
+#'   structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
+#' }
 #' rechunk = \(x) structure(unlist(x), class = "chunked_vector")
 #' x = structure(list(1:4, 5L), class = "chunked_vector")
 #' x
@@ -1143,7 +1145,9 @@ DataFrame_n_chunks = function(strategy = "all") {
 #' # it was a regular vector.
 #' "+.chunked_vector" = \(x, y) structure(list(unlist(x) + unlist(y)), class = "chunked_vector")
 #' print.chunked_vector = \(x, ...) print(unlist(x), ...)
-#' c.chunked_vector = \(...) structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
+#' c.chunked_vector = \(...) {
+#'   structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
+#' }
 #' rechunk = \(x) structure(unlist(x), class = "chunked_vector")
 #' x = structure(list(1:4, 5L), class = "chunked_vector")
 #' x
diff --git a/man/DataFrame_n_chunks.Rd b/man/DataFrame_n_chunks.Rd
index d0f40c3a2..7c5ddd526 100644
--- a/man/DataFrame_n_chunks.Rd
+++ b/man/DataFrame_n_chunks.Rd
@@ -55,7 +55,9 @@ df$n_chunks()
 # it was a regular vector.
 "+.chunked_vector" = \(x, y) structure(list(unlist(x) + unlist(y)), class = "chunked_vector")
 print.chunked_vector = \(x, ...) print(unlist(x), ...)
-c.chunked_vector = \(...) structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
+c.chunked_vector = \(...) {
+  structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
+}
 rechunk = \(x) structure(unlist(x), class = "chunked_vector")
 x = structure(list(1:4, 5L), class = "chunked_vector")
 x
diff --git a/man/DataFrame_rechunk.Rd b/man/DataFrame_rechunk.Rd
index a62a752cf..592cc15aa 100644
--- a/man/DataFrame_rechunk.Rd
+++ b/man/DataFrame_rechunk.Rd
@@ -53,7 +53,9 @@ df$n_chunks()
 # it was a regular vector.
 "+.chunked_vector" = \(x, y) structure(list(unlist(x) + unlist(y)), class = "chunked_vector")
 print.chunked_vector = \(x, ...) print(unlist(x), ...)
-c.chunked_vector = \(...) structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
+c.chunked_vector = \(...) {
+  structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
+}
 rechunk = \(x) structure(unlist(x), class = "chunked_vector")
 x = structure(list(1:4, 5L), class = "chunked_vector")
 x

From c8a2abeac99c89a0ba9d9240b91adaa4a7da3b65 Mon Sep 17 00:00:00 2001
From: sorhawell <sorhawell@gmail.com>
Date: Sun, 8 Oct 2023 14:59:51 +0300
Subject: [PATCH 13/14] move rs import statement + news

---
 NEWS.md                        | 1 +
 src/rust/src/rdataframe/mod.rs | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 84ba3446a..455509114 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -11,6 +11,7 @@
 
 ## What's changed
 
+- `pl$concat()` now also supports `Series`, `Expr` and `LazyFrame` (#407).
 - New method `$unnest()` for `LazyFrame` (#397).
 - New method `$sample()` for `DataFrame` (#399).
 - New method `$meta$tree_format()` to display an `Expr` as a tree (#401).
diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs
index 2e4c77433..4bccc8a87 100644
--- a/src/rust/src/rdataframe/mod.rs
+++ b/src/rust/src/rdataframe/mod.rs
@@ -9,7 +9,7 @@ use crate::lazy;
 use crate::rdatatype;
 use crate::rdatatype::RPolarsDataType;
 use crate::robj_to;
-use crate::rpolarserr::{polars_to_rpolars_err, RResult};
+use crate::rpolarserr::*;
 
 pub use lazy::dataframe::*;
 
@@ -77,7 +77,7 @@ impl From<pl::DataFrame> for DataFrame {
         DataFrame(item)
     }
 }
-use crate::rpolarserr::*;
+
 #[extendr]
 impl DataFrame {
     pub fn shape(&self) -> Robj {

From a53620e3f9dc61cc696197cb002d66d371ec6cdc Mon Sep 17 00:00:00 2001
From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
Date: Mon, 9 Oct 2023 09:29:13 +0200
Subject: [PATCH 14/14] improve docs

---
 R/dataframe__frame.R      | 104 ++++++++++----------------------------
 R/functions__eager.R      |  38 +++++++++-----
 man/DataFrame_n_chunks.Rd |  43 ++++++++--------
 man/DataFrame_rechunk.Rd  |  40 +++++++--------
 man/pl_concat.Rd          |  36 +++++++++----
 5 files changed, 120 insertions(+), 141 deletions(-)

diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R
index 0bd0e2773..18f970061 100644
--- a/R/dataframe__frame.R
+++ b/R/dataframe__frame.R
@@ -1033,33 +1033,35 @@ DataFrame_first = function() {
 }
 
 
-#' @title Number of chunks of the Series' in a DataFrame
-#' @description Number of chunks (mem allocations) for all or first Series' in a DataFrame.
+#' @title Number of chunks of the Series in a DataFrame
+#' @description
+#' Number of chunks (memory allocations) for all or first Series in a DataFrame.
 #' @details
-#' DataFrame is a vector of Series'. Each Series in rust-polars is a wrapper around a ChunkedArray,
-#' which is like a virtual contiguous vector, which is physically backed by an ordered set of
-#' chunks. Each chunk of values has a contiguous memory layout and is an arrow array. Arrow arrays
-#' are a fast, thread-safe and cross-platform  memory layout.
+#' A DataFrame is a vector of Series. Each Series in rust-polars is a wrapper
+#' around a ChunkedArray, which is like a virtual contiguous vector physically
+#' backed by an ordered set of chunks. Each chunk of values has a contiguous
+#' memory layout and is an arrow array. Arrow arrays are a fast, thread-safe and
+#' cross-platform memory layout.
 #'
-#' In R when combining with `c()` or `rbind()`, it requires immediate vector re-allocation to place
-#' vector values in contiguous memory, hence repeated appending to a vector in R is discouraged
-#' because slow.
+#' In R, combining with `c()` or `rbind()` requires immediate vector re-allocation
+#' to place vector values in contiguous memory. This is slow and memory consuming,
+#' and it is why repeatedly appending to a vector in R is discouraged.
 #'
-#' When concatenating/appending to Series' or DataFrame' re-allocation can be avoided or delayed,
-#' by simply appending chunks to each individual Series.
-#'
-#' However, if chunks become many and small or are misaligned across Series, this can hurt the
-#' performance of following operations.
+#' In polars, when we concatenate or append to Series or DataFrame, the
+#' re-allocation can be avoided or delayed by simply appending chunks to each
+#' individual Series. However, if chunks become many and small or are misaligned
+#' across Series, this can hurt the performance of subsequent operations.
 #'
 #' Most places in the polars api where chunking could occur, the user have to
-#' typically actively opt-out by setting and arg `rechunk = FALSE`. Thus it is not normal
+#' typically actively opt-out by setting an argument `rechunk = FALSE`.
 #'
 #' @keywords DataFrame
-#' @param strategy string either 'all' or 'first'. 'first' only returns chunks for first Series.
-#' @return real vector of chunk counts per Series.
+#' @param strategy Either `"all"` or `"first"`. `"first"` only returns chunks
+#' for the first Series.
+#' @return A real vector of chunk counts per Series.
 #' @seealso [`<DataFrame>$rechunk()`][DataFrame_rechunk]
 #' @examples
-#' # create DataFrame with miss aligned chunks
+#' # create DataFrame with misaligned chunks
 #' df = pl$concat(
 #'   1:10, # single chunk
 #'   pl$concat(1:5, 1:5, rechunk = FALSE, how = "vertical")$rename("b"), # two chunks
@@ -1074,9 +1076,9 @@ DataFrame_first = function() {
 #' # rechunk is not an in-place operation
 #' df$n_chunks()
 #'
-#' # The following toy example emulates the Series "chunkyness" in R. Here it a S3-classed list with
-#' # same type of vectors and where have all relevant S3 generics implemented to make behave as if
-#' # it was a regular vector.
+#' # The following toy example emulates the Series "chunkyness" in R. Here it a
+#' # S3-classed list with same type of vectors and where have all relevant S3
+#' # generics implemented to make behave as if it was a regular vector.
 #' "+.chunked_vector" = \(x, y) structure(list(unlist(x) + unlist(y)), class = "chunked_vector")
 #' print.chunked_vector = \(x, ...) print(unlist(x), ...)
 #' c.chunked_vector = \(...) {
@@ -1100,65 +1102,13 @@ DataFrame_n_chunks = function(strategy = "all") {
 
 
 #' @title Rechunk a DataFrame
-#' @description Rechunking re-allocates any "chunked" memory allocations to speed-up e.g. vectorized
-#' operations.
-#' @details
-#' DataFrame is a vector of Series'. Each Series in rust-polars is a wrapper around a ChunkedArray,
-#' which is like a virtual contiguous vector, which is physically backed by an ordered set of
-#' chunks. Each chunk of values has a contiguous memory layout and is an arrow array. Arrow arrays
-#' are a fast, thread-safe and cross-platform  memory layout.
-#'
-#' In R when combining with `c()` or `rbind()`, it requires immediate vector re-allocation to place
-#' vector values in contiguous memory, hence repeated appending to a vector in R is discouraged
-#' because slow.
-#'
-#' When concatenating/appending to Series' or DataFrame' re-allocation can be avoided or delayed,
-#' by simply appending chunks to each individual Series.
-#'
-#' However, if chunks become many and small or are misaligned across Series, this can hurt the
-#' performance of following operations.
-#'
-#' Most places in the polars api where chunking could occur, the user have to
-#' typically actively opt-out by setting and arg `rechunk = FALSE`. Thus it is not normal
+#' @description Rechunking re-allocates any "chunked" memory allocations to
+#' speed-up e.g. vectorized operations.
+#' @inherit DataFrame_n_chunks details examples
 #'
 #' @keywords DataFrame
-#' @return real vector of chunk counts per Series.
+#' @return A DataFrame
 #' @seealso [`<DataFrame>$n_chunks()`][DataFrame_n_chunks]
-#' @examples
-#' # create DataFrame with miss alligned chunks
-#' df = pl$concat(
-#'   1:10, # single chunk
-#'   pl$concat(1:5, 1:5, rechunk = FALSE, how = "vertical")$rename("b"), # two chunks
-#'   how = "horizontal"
-#' )
-#' df
-#' df$n_chunks()
-#'
-#' # rechunk a chunked DataFrame
-#' df$rechunk()$n_chunks()
-#'
-#' # rechunk is not an in-place operation
-#' df$n_chunks()
-#'
-#' # The following toy example emulates the Series "chunkyness" in R. Here it a S3-classed list with
-#' # same type of vectors and where have all relevant S3 generics implemented to make behave as if
-#' # it was a regular vector.
-#' "+.chunked_vector" = \(x, y) structure(list(unlist(x) + unlist(y)), class = "chunked_vector")
-#' print.chunked_vector = \(x, ...) print(unlist(x), ...)
-#' c.chunked_vector = \(...) {
-#'   structure(do.call(c, lapply(list(...), unclass)), class = "chunked_vector")
-#' }
-#' rechunk = \(x) structure(unlist(x), class = "chunked_vector")
-#' x = structure(list(1:4, 5L), class = "chunked_vector")
-#' x
-#' x + 5:1
-#' lapply(x, tracemem) # trace chunks to verify no re-allocation
-#' z = c(x, x)
-#' z # looks like a plain vector
-#' lapply(z, tracemem) # mem allocation  in z are the same from x
-#' str(z)
-#' z = rechunk(z)
-#' str(z)
 DataFrame_rechunk = function() {
   .pr$DataFrame$rechunk(self)
 }
diff --git a/R/functions__eager.R b/R/functions__eager.R
index 5344bc621..d925d8582 100644
--- a/R/functions__eager.R
+++ b/R/functions__eager.R
@@ -1,17 +1,24 @@
 #' Concat polars objects
 #' @name pl_concat
-#' @param ... individual unpacked args or args wrappend in list(). Args should either be eager
-#' as DataFrame, Series and R vectors, OR lazy as LazyFrame and Expr. If first argument is lazy
-#' the return is LazyFrame, otherwise DataFrame. If returning a DataFrame all args must be eager,
-#' to avoid some implicit collect.
-#' @param rechunk perform a rechunk at last
-#' @param how choice of bind direction "vertical"(rbind) "horizontal"(cbind) "diagonal" diagonally
-#' @param parallel Boolean default TRUE, only used for LazyFrames
-#' @param to_supertypes Boolean default TRUE, cast columns shared super types, if any.
+#' @param ... Either individual unpacked args or args wrapped in list(). Args can
+#' be eager as DataFrame, Series and R vectors, or lazy as LazyFrame and Expr.
+#' The first element determines the output of `$concat()`: if the first element
+#' is lazy, a LazyFrame is returned; otherwise, a DataFrame is returned (note
+#' that if the first element is eager, all other elements have to be eager to
+#' avoid implicit collect).
+#' @param rechunk Perform a rechunk at last.
+#' @param how Bind direction. Can be "vertical" (like `rbind()`), "horizontal"
+#' (like `cbind()`), or "diagonal".
+#' @param parallel Only used for LazyFrames. If `TRUE` (default), lazy
+#' computations may be executed in parallel.
+#' @param to_supertypes If `TRUE` (default), cast columns shared super types, if
+#' any. For example, if we try to vertically concatenate two columns of types `i32`
+#' and `f64`, the column of type `i32` will be cast to `f64` beforehand. This
+#' argument is equivalent to the "_relaxed" operations in Python polars.
 #'
 #' @details
-#' Categorical columns/Series must have been constructed while global string cache enabled
-#' [`pl$enable_string_cache()`][pl_enable_string_cache]
+#' Categorical columns/Series must have been constructed while global string
+#' cache enabled. See [`pl$enable_string_cache()`][pl_enable_string_cache].
 #'
 #'
 #' @return DataFrame, or Series, LazyFrame or Expr
@@ -27,7 +34,6 @@
 #' })
 #' pl$concat(l_ver, how = "vertical")
 #'
-#'
 #' # horizontal
 #' l_hor = lapply(1:10, function(i) {
 #'   l_internal = list(
@@ -38,8 +44,16 @@
 #'   pl$DataFrame(l_internal)
 #' })
 #' pl$concat(l_hor, how = "horizontal")
+#'
 #' # diagonal
 #' pl$concat(l_hor, how = "diagonal")
+#'
+#' # if two columns don't share the same type, concat() will error unless we use
+#' # `to_supertypes = TRUE`:
+#' test = pl$DataFrame(x = 1L) # i32
+#' test2 = pl$DataFrame(x = 1.0) #f64
+#'
+#' pl$concat(test, test2, to_supertypes = TRUE)
 pl$concat = function(
     ..., # list of DataFrames or Series or lazyFrames or expr
     rechunk = TRUE,
@@ -70,7 +84,7 @@ pl$concat = function(
     for (i in seq_along(l)) {
       if (inherits(l[[i]], c("LazyFrame", "Expr"))) {
         .pr$RPolarsErr$new()$
-          plain("tip: explitcit collect lazy inputs first, e.g. pl$concat(dataframe, lazyframe$collect())")$
+          plain("tip: explicitly collect lazy inputs first, e.g. pl$concat(dataframe, lazyframe$collect())")$
           plain("LazyFrame or Expr not allowed if first arg is a DataFrame, to avoid implicit collect")$
           bad_robj(l[[i]])$
           bad_arg(paste("of those to concatenate, number", i)) |>
diff --git a/man/DataFrame_n_chunks.Rd b/man/DataFrame_n_chunks.Rd
index 7c5ddd526..4fe2e50cc 100644
--- a/man/DataFrame_n_chunks.Rd
+++ b/man/DataFrame_n_chunks.Rd
@@ -2,40 +2,41 @@
 % Please edit documentation in R/dataframe__frame.R
 \name{DataFrame_n_chunks}
 \alias{DataFrame_n_chunks}
-\title{Number of chunks of the Series' in a DataFrame}
+\title{Number of chunks of the Series in a DataFrame}
 \usage{
 DataFrame_n_chunks(strategy = "all")
 }
 \arguments{
-\item{strategy}{string either 'all' or 'first'. 'first' only returns chunks for first Series.}
+\item{strategy}{Either \code{"all"} or \code{"first"}. \code{"first"} only returns chunks
+for the first Series.}
 }
 \value{
-real vector of chunk counts per Series.
+A real vector of chunk counts per Series.
 }
 \description{
-Number of chunks (mem allocations) for all or first Series' in a DataFrame.
+Number of chunks (memory allocations) for all or first Series in a DataFrame.
 }
 \details{
-DataFrame is a vector of Series'. Each Series in rust-polars is a wrapper around a ChunkedArray,
-which is like a virtual contiguous vector, which is physically backed by an ordered set of
-chunks. Each chunk of values has a contiguous memory layout and is an arrow array. Arrow arrays
-are a fast, thread-safe and cross-platform  memory layout.
+A DataFrame is a vector of Series. Each Series in rust-polars is a wrapper
+around a ChunkedArray, which is like a virtual contiguous vector physically
+backed by an ordered set of chunks. Each chunk of values has a contiguous
+memory layout and is an arrow array. Arrow arrays are a fast, thread-safe and
+cross-platform memory layout.
 
-In R when combining with \code{c()} or \code{rbind()}, it requires immediate vector re-allocation to place
-vector values in contiguous memory, hence repeated appending to a vector in R is discouraged
-because slow.
+In R, combining with \code{c()} or \code{rbind()} requires immediate vector re-allocation
+to place vector values in contiguous memory. This is slow and memory consuming,
+and it is why repeatedly appending to a vector in R is discouraged.
 
-When concatenating/appending to Series' or DataFrame' re-allocation can be avoided or delayed,
-by simply appending chunks to each individual Series.
-
-However, if chunks become many and small or are misaligned across Series, this can hurt the
-performance of following operations.
+In polars, when we concatenate or append to Series or DataFrame, the
+re-allocation can be avoided or delayed by simply appending chunks to each
+individual Series. However, if chunks become many and small or are misaligned
+across Series, this can hurt the performance of subsequent operations.
 
 Most places in the polars api where chunking could occur, the user have to
-typically actively opt-out by setting and arg \code{rechunk = FALSE}. Thus it is not normal
+typically actively opt-out by setting an argument \code{rechunk = FALSE}.
 }
 \examples{
-# create DataFrame with miss aligned chunks
+# create DataFrame with misaligned chunks
 df = pl$concat(
   1:10, # single chunk
   pl$concat(1:5, 1:5, rechunk = FALSE, how = "vertical")$rename("b"), # two chunks
@@ -50,9 +51,9 @@ df$rechunk()$n_chunks()
 # rechunk is not an in-place operation
 df$n_chunks()
 
-# The following toy example emulates the Series "chunkyness" in R. Here it a S3-classed list with
-# same type of vectors and where have all relevant S3 generics implemented to make behave as if
-# it was a regular vector.
+# The following toy example emulates the Series "chunkyness" in R. Here it a
+# S3-classed list with same type of vectors and where have all relevant S3
+# generics implemented to make behave as if it was a regular vector.
 "+.chunked_vector" = \(x, y) structure(list(unlist(x) + unlist(y)), class = "chunked_vector")
 print.chunked_vector = \(x, ...) print(unlist(x), ...)
 c.chunked_vector = \(...) {
diff --git a/man/DataFrame_rechunk.Rd b/man/DataFrame_rechunk.Rd
index 592cc15aa..25606064c 100644
--- a/man/DataFrame_rechunk.Rd
+++ b/man/DataFrame_rechunk.Rd
@@ -7,33 +7,33 @@
 DataFrame_rechunk()
 }
 \value{
-real vector of chunk counts per Series.
+A DataFrame
 }
 \description{
-Rechunking re-allocates any "chunked" memory allocations to speed-up e.g. vectorized
-operations.
+Rechunking re-allocates any "chunked" memory allocations to
+speed-up e.g. vectorized operations.
 }
 \details{
-DataFrame is a vector of Series'. Each Series in rust-polars is a wrapper around a ChunkedArray,
-which is like a virtual contiguous vector, which is physically backed by an ordered set of
-chunks. Each chunk of values has a contiguous memory layout and is an arrow array. Arrow arrays
-are a fast, thread-safe and cross-platform  memory layout.
+A DataFrame is a vector of Series. Each Series in rust-polars is a wrapper
+around a ChunkedArray, which is like a virtual contiguous vector physically
+backed by an ordered set of chunks. Each chunk of values has a contiguous
+memory layout and is an arrow array. Arrow arrays are a fast, thread-safe and
+cross-platform memory layout.
 
-In R when combining with \code{c()} or \code{rbind()}, it requires immediate vector re-allocation to place
-vector values in contiguous memory, hence repeated appending to a vector in R is discouraged
-because slow.
+In R, combining with \code{c()} or \code{rbind()} requires immediate vector re-allocation
+to place vector values in contiguous memory. This is slow and memory consuming,
+and it is why repeatedly appending to a vector in R is discouraged.
 
-When concatenating/appending to Series' or DataFrame' re-allocation can be avoided or delayed,
-by simply appending chunks to each individual Series.
-
-However, if chunks become many and small or are misaligned across Series, this can hurt the
-performance of following operations.
+In polars, when we concatenate or append to Series or DataFrame, the
+re-allocation can be avoided or delayed by simply appending chunks to each
+individual Series. However, if chunks become many and small or are misaligned
+across Series, this can hurt the performance of subsequent operations.
 
 Most places in the polars api where chunking could occur, the user have to
-typically actively opt-out by setting and arg \code{rechunk = FALSE}. Thus it is not normal
+typically actively opt-out by setting an argument \code{rechunk = FALSE}.
 }
 \examples{
-# create DataFrame with miss alligned chunks
+# create DataFrame with misaligned chunks
 df = pl$concat(
   1:10, # single chunk
   pl$concat(1:5, 1:5, rechunk = FALSE, how = "vertical")$rename("b"), # two chunks
@@ -48,9 +48,9 @@ df$rechunk()$n_chunks()
 # rechunk is not an in-place operation
 df$n_chunks()
 
-# The following toy example emulates the Series "chunkyness" in R. Here it a S3-classed list with
-# same type of vectors and where have all relevant S3 generics implemented to make behave as if
-# it was a regular vector.
+# The following toy example emulates the Series "chunkyness" in R. Here it a
+# S3-classed list with same type of vectors and where have all relevant S3
+# generics implemented to make behave as if it was a regular vector.
 "+.chunked_vector" = \(x, y) structure(list(unlist(x) + unlist(y)), class = "chunked_vector")
 print.chunked_vector = \(x, ...) print(unlist(x), ...)
 c.chunked_vector = \(...) {
diff --git a/man/pl_concat.Rd b/man/pl_concat.Rd
index 44fe13485..227bebb70 100644
--- a/man/pl_concat.Rd
+++ b/man/pl_concat.Rd
@@ -4,18 +4,25 @@
 \alias{pl_concat}
 \title{Concat polars objects}
 \arguments{
-\item{...}{individual unpacked args or args wrappend in list(). Args should either be eager
-as DataFrame, Series and R vectors, OR lazy as LazyFrame and Expr. If first argument is lazy
-the return is LazyFrame, otherwise DataFrame. If returning a DataFrame all args must be eager,
-to avoid some implicit collect.}
+\item{...}{Either individual unpacked args or args wrapped in list(). Args can
+be eager as DataFrame, Series and R vectors, or lazy as LazyFrame and Expr.
+The first element determines the output of \verb{$concat()}: if the first element
+is lazy, a LazyFrame is returned; otherwise, a DataFrame is returned (note
+that if the first element is eager, all other elements have to be eager to
+avoid implicit collect).}
 
-\item{rechunk}{perform a rechunk at last}
+\item{rechunk}{Perform a rechunk at last.}
 
-\item{how}{choice of bind direction "vertical"(rbind) "horizontal"(cbind) "diagonal" diagonally}
+\item{how}{Bind direction. Can be "vertical" (like \code{rbind()}), "horizontal"
+(like \code{cbind()}), or "diagonal".}
 
-\item{parallel}{Boolean default TRUE, only used for LazyFrames}
+\item{parallel}{Only used for LazyFrames. If \code{TRUE} (default), lazy
+computations may be executed in parallel.}
 
-\item{to_supertypes}{Boolean default TRUE, cast columns shared super types, if any.}
+\item{to_supertypes}{If \code{TRUE} (default), cast columns shared super types, if
+any. For example, if we try to vertically concatenate two columns of types \code{i32}
+and \code{f64}, the column of type \code{i32} will be cast to \code{f64} beforehand. This
+argument is equivalent to the "_relaxed" operations in Python polars.}
 }
 \value{
 DataFrame, or Series, LazyFrame or Expr
@@ -24,8 +31,8 @@ DataFrame, or Series, LazyFrame or Expr
 Concat polars objects
 }
 \details{
-Categorical columns/Series must have been constructed while global string cache enabled
-\code{\link[=pl_enable_string_cache]{pl$enable_string_cache()}}
+Categorical columns/Series must have been constructed while global string
+cache enabled. See \code{\link[=pl_enable_string_cache]{pl$enable_string_cache()}}.
 }
 \examples{
 # vertical
@@ -38,7 +45,6 @@ l_ver = lapply(1:10, function(i) {
 })
 pl$concat(l_ver, how = "vertical")
 
-
 # horizontal
 l_hor = lapply(1:10, function(i) {
   l_internal = list(
@@ -49,6 +55,14 @@ l_hor = lapply(1:10, function(i) {
   pl$DataFrame(l_internal)
 })
 pl$concat(l_hor, how = "horizontal")
+
 # diagonal
 pl$concat(l_hor, how = "diagonal")
+
+# if two columns don't share the same type, concat() will error unless we use
+# `to_supertypes = TRUE`:
+test = pl$DataFrame(x = 1L) # i32
+test2 = pl$DataFrame(x = 1.0) #f64
+
+pl$concat(test, test2, to_supertypes = TRUE)
 }