diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala index 297ee49fec6..ed7edeba396 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala @@ -16,6 +16,8 @@ package org.apache.spark.sql.catalyst.json.rapids +import java.nio.charset.StandardCharsets + import scala.collection.JavaConverters._ import ai.rapids.cudf @@ -138,6 +140,11 @@ object GpuJsonScan { meta.willNotWorkOnGpu("GpuJsonScan only supports \"\\n\" as a line separator") } + parsedOptions.encoding.foreach(enc => + if (enc != StandardCharsets.UTF_8.name() && enc != StandardCharsets.US_ASCII.name()) { + meta.willNotWorkOnGpu("GpuJsonScan only supports UTF8 or US-ASCII encoded data") + }) + if (readSchema.map(_.dataType).contains(DateType)) { ShimLoader.getSparkShims.dateFormatInRead(parsedOptions).foreach { dateFormat => if (!supportedDateFormats.contains(dateFormat)) {