Skip to content

Commit

Permalink
Add in basic support to read structs from parquet (NVIDIA#1132)
Browse files Browse the repository at this point in the history
Signed-off-by: Robert (Bobby) Evans <bobby@apache.org>
  • Loading branch information
revans2 authored Nov 17, 2020
1 parent b71eb2d commit df4f9a4
Show file tree
Hide file tree
Showing 7 changed files with 9 additions and 1 deletion.
4 changes: 3 additions & 1 deletion integration_tests/src/main/python/parquet_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ def read_parquet_sql(data_path):
TimestampGen(start=datetime(1900, 1, 1, tzinfo=timezone.utc)), ArrayGen(byte_gen),
ArrayGen(long_gen), ArrayGen(string_gen), ArrayGen(date_gen),
ArrayGen(TimestampGen(start=datetime(1900, 1, 1, tzinfo=timezone.utc))),
ArrayGen(ArrayGen(byte_gen))],
ArrayGen(ArrayGen(byte_gen)),
StructGen([['child0', ArrayGen(byte_gen)], ['child1', byte_gen], ['child2', float_gen]]),
ArrayGen(StructGen([['child0', string_gen], ['child1', double_gen], ['child2', int_gen]]))],
pytest.param([timestamp_gen], marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/132'))]

# test with original parquet file reader, the multi-file parallel reader for cloud, and coalesce file reader for
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ class Spark300Shims extends SparkShims {
GpuOverrides.isSupportedType(t,
allowArray = true,
allowStringMaps = true,
allowStruct = true,
allowNesting = true)

// partition filters and data filters are not run on the GPU
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ class Spark300dbShims extends Spark300Shims {
GpuOverrides.isSupportedType(t,
allowArray = true,
allowStringMaps = true,
allowStruct = true,
allowNesting = true)

// partition filters and data filters are not run on the GPU
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ class Spark301dbShims extends Spark301Shims {
GpuOverrides.isSupportedType(t,
allowArray = true,
allowStringMaps = true,
allowStruct = true,
allowNesting = true)

// partition filters and data filters are not run on the GPU
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ class Spark310Shims extends Spark301Shims {
GpuOverrides.isSupportedType(t,
allowArray = true,
allowStringMaps = true,
allowStruct = true,
allowNesting = true)

// partition filters and data filters are not run on the GPU
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1909,6 +1909,7 @@ object GpuOverrides {
GpuOverrides.isSupportedType(t,
allowStringMaps = true,
allowArray = true,
allowStruct = true,
allowNesting = true)

override def convertToGpu(): GpuExec =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ object GpuParquetScanBase {
field.dataType,
allowStringMaps = true,
allowArray = true,
allowStruct = true,
allowNesting = true)) {
meta.willNotWorkOnGpu(s"GpuParquetScan does not support fields of type ${field.dataType}")
}
Expand Down

0 comments on commit df4f9a4

Please sign in to comment.