Skip to content

Commit

Permalink
Cloud Fetch e2e tests (#154)
Browse files Browse the repository at this point in the history
* Cloud Fetch e2e tests

Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com>

* Test case works for e2-dogfood shared unity catalog

Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com>

* Moving test to LargeQueriesSuite and setting catalog to hive_metastore

Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com>

* Align default value of buffer_size_bytes in driver tests

Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com>

* Adding comment to specify what's needed to run successfully

Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com>

---------

Signed-off-by: Matthew Kim <11141331+mattdeekay@users.noreply.github.com>
  • Loading branch information
mattdeekay authored Jul 7, 2023
1 parent 5a34a4a commit 759401c
Showing 1 changed file with 33 additions and 1 deletion.
34 changes: 33 additions & 1 deletion tests/e2e/test_driver.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import itertools
from contextlib import contextmanager
from collections import OrderedDict
import datetime
Expand Down Expand Up @@ -52,6 +53,7 @@ def __init__(self, method_name):
# If running in local mode, just use environment variables for params.
self.arguments = os.environ if get_args_from_env else {}
self.arraysize = 1000
self.buffer_size_bytes = 104857600

def connection_params(self, arguments):
params = {
Expand Down Expand Up @@ -84,7 +86,7 @@ def connection(self, extra_params=()):
@contextmanager
def cursor(self, extra_params=()):
with self.connection(extra_params) as conn:
cursor = conn.cursor(arraysize=self.arraysize)
cursor = conn.cursor(arraysize=self.arraysize, buffer_size_bytes=self.buffer_size_bytes)
try:
yield cursor
finally:
Expand All @@ -104,6 +106,36 @@ def get_some_rows(self, cursor, fetchmany_size):
else:
return None

@skipUnless(pysql_supports_arrow(), 'needs arrow support')
def test_cloud_fetch(self):
# This test can take several minutes to run
limits = [100000, 300000]
threads = [10, 25]
self.arraysize = 100000
# This test requires a large table with many rows to properly initiate cloud fetch.
# e2-dogfood host > hive_metastore catalog > main schema has such a table called store_sales.
# If this table is deleted or this test is run on a different host, a different table may need to be used.
base_query = "SELECT * FROM store_sales WHERE ss_sold_date_sk = 2452234 "
for num_limit, num_threads, lz4_compression in itertools.product(limits, threads, [True, False]):
with self.subTest(num_limit=num_limit, num_threads=num_threads, lz4_compression=lz4_compression):
cf_result, noop_result = None, None
query = base_query + "LIMIT " + str(num_limit)
with self.cursor({
"use_cloud_fetch": True,
"max_download_threads": num_threads,
"catalog": "hive_metastore"
}) as cursor:
cursor.execute(query)
cf_result = cursor.fetchall()
with self.cursor({
"catalog": "hive_metastore"
}) as cursor:
cursor.execute(query)
noop_result = cursor.fetchall()
assert len(cf_result) == len(noop_result)
for i in range(len(cf_result)):
assert cf_result[i] == noop_result[i]


# Exclude Retry tests because they require specific setups, and LargeQueries too slow for core
# tests
Expand Down

0 comments on commit 759401c

Please sign in to comment.