Skip to content

Commit

Permalink
migrate approx_percentile_cont, approx_distinct, and approx_median to…
Browse files Browse the repository at this point in the history
… UDAF

Ref: approx_distinct apache/datafusion#10851
Ref: approx_median apache/datafusion#10840
Ref: approx_percentile_cont and _with_weight apache/datafusion#10917
  • Loading branch information
Michael-J-Ward committed Jul 24, 2024
1 parent f7bd619 commit 1400069
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 12 deletions.
12 changes: 7 additions & 5 deletions python/datafusion/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1211,9 +1211,9 @@ def flatten(array: Expr) -> Expr:


# aggregate functions
def approx_distinct(arg: Expr) -> Expr:
def approx_distinct(expression: Expr) -> Expr:
"""Returns the approximate number of distinct values."""
return Expr(f.approx_distinct(arg.expr, distinct=True))
return Expr(f.approx_distinct(expression.expr))


def approx_median(arg: Expr, distinct: bool = False) -> Expr:
Expand All @@ -1222,20 +1222,22 @@ def approx_median(arg: Expr, distinct: bool = False) -> Expr:


def approx_percentile_cont(
expr: Expr,
expression: Expr,
percentile: Expr,
num_centroids: int | None = None,
# num_centroids: int | None = None,
distinct: bool = False,
) -> Expr:
"""Returns the value that is approximately at a given percentile of ``expr``."""
# TODO: enable num_centroids
num_centroids = None
if num_centroids is None:
return Expr(
f.approx_percentile_cont(expr.expr, percentile.expr, distinct=distinct)
)

return Expr(
f.approx_percentile_cont(
expr.expr, percentile.expr, num_centroids, distinct=distinct
expr.expr, percentile.expr, distinct=distinct
)
)

Expand Down
58 changes: 51 additions & 7 deletions src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,57 @@ use datafusion_expr::{
lit, Expr, WindowFunctionDefinition,
};

#[pyfunction]
pub fn approx_distinct(expression: PyExpr) -> PyExpr {
functions_aggregate::expr_fn::approx_distinct::approx_distinct(expression.expr).into()
}

#[pyfunction]
pub fn approx_median(expression: PyExpr, distinct: bool) -> PyResult<PyExpr> {
// TODO: better builder pattern
let expr = functions_aggregate::expr_fn::approx_median(expression.expr);
if distinct {
Ok(expr.distinct().build()?.into())
} else {
Ok(expr.into())
}
}

#[pyfunction]
pub fn approx_percentile_cont(
expression: PyExpr,
percentile: PyExpr,
distinct: bool,
) -> PyResult<PyExpr> {
// TODO: better builder pattern
let expr =
functions_aggregate::expr_fn::approx_percentile_cont(expression.expr, percentile.expr);
if distinct {
Ok(expr.distinct().build()?.into())
} else {
Ok(expr.into())
}
}

#[pyfunction]
pub fn approx_percentile_cont_with_weight(
expression: PyExpr,
weight: PyExpr,
percentile: PyExpr,
distinct: bool,
) -> PyResult<PyExpr> {
let expr = functions_aggregate::expr_fn::approx_percentile_cont_with_weight(
expression.expr,
weight.expr,
percentile.expr,
);
if distinct {
Ok(expr.distinct().build()?.into())
} else {
Ok(expr.into())
}
}

#[pyfunction]
pub fn sum(args: PyExpr) -> PyExpr {
functions_aggregate::expr_fn::sum(args.expr).into()
Expand Down Expand Up @@ -697,13 +748,6 @@ array_fn!(list_resize, array_resize, array size value);
array_fn!(flatten, array);
array_fn!(range, start stop step);

aggregate_function!(approx_distinct, ApproxDistinct);
aggregate_function!(approx_median, ApproxMedian);
aggregate_function!(approx_percentile_cont, ApproxPercentileCont);
aggregate_function!(
approx_percentile_cont_with_weight,
ApproxPercentileContWithWeight
);
aggregate_function!(array_agg, ArrayAgg);
aggregate_function!(avg, Avg);
aggregate_function!(corr, Correlation);
Expand Down

0 comments on commit 1400069

Please sign in to comment.