Skip to content

Commit

Permalink
feat(python): Add top-level pl.sql function
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed May 27, 2024
1 parent cc2c905 commit 47a2691
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 41 deletions.
3 changes: 2 additions & 1 deletion py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@
threadpool_size,
)
from polars.series import Series
from polars.sql import SQLContext
from polars.sql import SQLContext, sql
from polars.string_cache import (
StringCache,
disable_string_cache,
Expand Down Expand Up @@ -430,6 +430,7 @@
"from_repr",
# polars.sql
"SQLContext",
"sql",
# polars.utils
"build_info",
"get_index_type",
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/sql/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from polars.sql.context import SQLContext
from polars.sql.functions import sql

__all__ = [
"SQLContext",
"sql",
]
119 changes: 119 additions & 0 deletions py-polars/polars/sql/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Literal, overload

if TYPE_CHECKING:
from polars.dataframe import DataFrame
from polars.lazyframe import LazyFrame


@overload
def sql(query: str, *, eager: Literal[False] = False) -> LazyFrame: ...


@overload
def sql(query: str, *, eager: Literal[True]) -> DataFrame: ...


def sql(query: str, *, eager: bool = False) -> DataFrame | LazyFrame:
"""
Execute a SQL query against frames in the global namespace.
.. versionadded:: 0.20.31
.. warning::
This functionality is considered **unstable**, although it is close to
being considered stable. It may be changed at any point without it being
considered a breaking change.
Parameters
----------
query
SQL query to execute.
eager
Automatically collect the result and return a DataFrame instead of a LazyFrame.
Notes
-----
* More control over registration and execution behaviour is available by
using the :class:`SQLContext` object.
See Also
--------
SQLContext
Examples
--------
>>> lf1 = pl.LazyFrame({"a": [1, 2, 3], "b": [6, 7, 8], "c": ["z", "y", "x"]})
>>> lf2 = pl.LazyFrame({"a": [3, 2, 1], "d": [125, -654, 888]})
Query the LazyFrame using SQL:
>>> lf1.sql("SELECT c, b FROM self WHERE a > 1").collect()
shape: (2, 2)
┌─────┬─────┐
│ c ┆ b │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪═════╡
│ y ┆ 7 │
│ x ┆ 8 │
└─────┴─────┘
Join two LazyFrames:
>>> pl.sql(
... '''
... SELECT lf1.*, d
... FROM lf1
... INNER JOIN lf2 USING (a)
... WHERE a > 1 AND b < 8
... '''
... ).collect()
shape: (1, 4)
┌─────┬─────┬─────┬──────┐
│ a ┆ b ┆ c ┆ d │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ i64 │
╞═════╪═════╪═════╪══════╡
│ 2 ┆ 7 ┆ y ┆ -654 │
└─────┴─────┴─────┴──────┘
Apply SQL transforms (aliasing "self" to "frame") and subsequently
filter natively (you can freely mix SQL and native operations):
>>> pl.sql(
... query='''
... SELECT
... a,
... (a % 2 == 0) AS a_is_even,
... (b::float4 / 2) AS "b/2",
... CONCAT_WS(':', c, c, c) AS c_c_c
... FROM lf1
... ORDER BY a
... ''',
... ).filter(~pl.col("c_c_c").str.starts_with("x")).collect()
shape: (2, 4)
┌─────┬───────────┬─────┬───────┐
│ a ┆ a_is_even ┆ b/2 ┆ c_c_c │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ bool ┆ f32 ┆ str │
╞═════╪═══════════╪═════╪═══════╡
│ 1 ┆ false ┆ 3.0 ┆ z:z:z │
│ 2 ┆ true ┆ 3.5 ┆ y:y:y │
└─────┴───────────┴─────┴───────┘
"""
from polars._utils.unstable import issue_unstable_warning
from polars.sql import SQLContext

issue_unstable_warning(
"`sql` is considered **unstable** (although it is close to being considered stable)."
)
with SQLContext(
eager_execution=eager,
register_globals=True,
) as ctx:
return ctx.execute(query)


__all__ = ["sql"]
7 changes: 4 additions & 3 deletions py-polars/tests/unit/sql/test_joins.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,15 @@ def test_join_inner(foods_ipc_path: Path, join_clause: str) -> None:
foods1 = pl.scan_ipc(foods_ipc_path)
foods2 = foods1 # noqa: F841

out = foods1.sql(
out = pl.sql(
f"""
SELECT *
FROM foods1
INNER JOIN foods2 {join_clause}
LIMIT 2
"""
).collect()
""",
eager=True,
)

assert out.to_dict(as_series=False) == {
"category": ["vegetables", "vegetables"],
Expand Down
20 changes: 10 additions & 10 deletions py-polars/tests/unit/sql/test_miscellaneous.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,16 @@ def test_any_all() -> None:
res = df.sql(
"""
SELECT
x >= ALL(df.y) as 'All Geq',
x > ALL(df.y) as 'All G',
x < ALL(df.y) as 'All L',
x <= ALL(df.y) as 'All Leq',
x >= ANY(df.y) as 'Any Geq',
x > ANY(df.y) as 'Any G',
x < ANY(df.y) as 'Any L',
x <= ANY(df.y) as 'Any Leq',
x == ANY(df.y) as 'Any eq',
x != ANY(df.y) as 'Any Neq',
x >= ALL(df.y) AS "All Geq",
x > ALL(df.y) AS "All G",
x < ALL(df.y) AS "All L",
x <= ALL(df.y) AS "All Leq",
x >= ANY(df.y) AS "Any Geq",
x > ANY(df.y) AS "Any G",
x < ANY(df.y) AS "Any L",
x <= ANY(df.y) AS "Any Leq",
x == ANY(df.y) AS "Any eq",
x != ANY(df.y) AS "Any Neq",
FROM df
""",
)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/sql/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_modulo() -> None:
b % 3 AS b3,
MOD(c, 4) AS c4,
MOD(d, 5.5) AS d55
FROM df
FROM self
"""
)

Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/unit/sql/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def test_date() -> None:

@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
def test_datetime_to_time(time_unit: Literal["ns", "us", "ms"]) -> None:
df = pl.DataFrame(
df = pl.DataFrame( # noqa: F841
{
"dtm": [
datetime(2099, 12, 31, 23, 59, 59),
Expand All @@ -46,8 +46,8 @@ def test_datetime_to_time(time_unit: Literal["ns", "us", "ms"]) -> None:
schema={"dtm": pl.Datetime(time_unit)},
)

res = df.sql("SELECT dtm::time as tm from df")["tm"].to_list()
assert res == [
res = pl.sql("SELECT dtm::time AS tm from df").collect()
assert res["tm"].to_list() == [
time(23, 59, 59),
time(12, 30, 30),
time(1, 1, 1),
Expand Down
43 changes: 20 additions & 23 deletions py-polars/tests/unit/sql/test_trigonometric.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,21 @@

def test_arctan2() -> None:
twoRootTwo = math.sqrt(2) / 2.0
df = pl.DataFrame(
df = pl.DataFrame( # noqa: F841
{
"y": [twoRootTwo, -twoRootTwo, twoRootTwo, -twoRootTwo],
"x": [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo],
}
)

sql = pl.SQLContext(df=df)
res = sql.execute(
res = pl.sql(
"""
SELECT
ATAN2D(y,x) as "atan2d",
ATAN2(y,x) as "atan2"
ATAN2D(y,x) as "atan2d",
ATAN2(y,x) as "atan2"
FROM df
""",
eager=True,
)

df_result = pl.DataFrame({"atan2d": [45.0, -45.0, 135.0, -135.0]})
df_result = df_result.with_columns(pl.col("atan2d").cast(pl.Float64))
df_result = df_result.with_columns(pl.col("atan2d").radians().alias("atan2"))
Expand All @@ -44,25 +41,25 @@ def test_trig() -> None:
res = ctx.execute(
"""
SELECT
asin(1.0)/a as "pi values",
cos(asin(1.0)/a) AS "cos",
cot(asin(1.0)/a) AS "cot",
sin(asin(1.0)/a) AS "sin",
tan(asin(1.0)/a) AS "tan",
asin(1.0)/a as "pi values",
cos(asin(1.0)/a) AS "cos",
cot(asin(1.0)/a) AS "cot",
sin(asin(1.0)/a) AS "sin",
tan(asin(1.0)/a) AS "tan",
cosd(asind(1.0)/a) AS "cosd",
cotd(asind(1.0)/a) AS "cotd",
sind(asind(1.0)/a) AS "sind",
tand(asind(1.0)/a) AS "tand",
cosd(asind(1.0)/a) AS "cosd",
cotd(asind(1.0)/a) AS "cotd",
sind(asind(1.0)/a) AS "sind",
tand(asind(1.0)/a) AS "tand",
1.0/a as "inverse pi values",
acos(1.0/a) AS "acos",
asin(1.0/a) AS "asin",
atan(1.0/a) AS "atan",
1.0/a as "inverse pi values",
acos(1.0/a) AS "acos",
asin(1.0/a) AS "asin",
atan(1.0/a) AS "atan",
acosd(1.0/a) AS "acosd",
asind(1.0/a) AS "asind",
atand(1.0/a) AS "atand"
acosd(1.0/a) AS "acosd",
asind(1.0/a) AS "asind",
atand(1.0/a) AS "atand"
FROM df
""",
eager=True,
Expand Down

0 comments on commit 47a2691

Please sign in to comment.