Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding LIMIT to MV_TO_ARRAY then UNNEST will result in null and NullPointerException #16091

Open
ccyjoshua opened this issue Mar 10, 2024 · 1 comment

Comments

@ccyjoshua
Copy link

ccyjoshua commented Mar 10, 2024

Affected Version

28.0.1

Description

  • Cluster size

6 historicals, others are single service running

  • Configurations in use

$ cat common.runtime.properties

#
# Extensions
#

druid.extensions.directory=extensions
druid.extensions.hadoopDependenciesDir=hadoop-dependencies
druid.extensions.loadList=["druid-kafka-indexing-service","druid-datasketches","druid-multi-stage-query","druid-s3-extensions","druid-avro-extensions","druid-parquet-extensions","mysql-metadata-storage","druid-histogram","druid-lookups-cached-global","statsd-emitter"]

#
# Logging
#

# Log all runtime properties on startup. Disable to avoid logging properties on startup:
druid.startup.logging.logProperties=true

#
# Zookeeper
#

druid.zk.service.host=ae-druid-zk-1001.bdp.com,ae-druid-zk-1002.bdp.com,ae-druid-zk-1003.bdp.com,ae-druid-zk-1004.bdp.com,ae-druid-zk-1005.bdp.com
druid.zk.paths.base=amp-druid-dev

#
# Metadata storage
#

# For MySQL:
## druid.metadata.storage.type=mysql
druid.metadata.storage.type=mysql
druid.metadata.storage.connector.connectURI=jdbc:mysql://adi-druid-dev-db.bdp.com:3306/druid
druid.metadata.storage.connector.user=druid_user
druid.metadata.storage.connector.password=****

#
# Deep storage
#

# For S3:
druid.storage.type=s3
druid.storage.bucket=****
druid.storage.baseKey=druid/amp-druid-dev/segments

#
# Indexing service logs
#

# For S3:
druid.indexer.logs.type=s3
druid.indexer.logs.s3Bucket=****
druid.indexer.logs.s3Prefix=infrastructure/druid-dev/indexing-logs

#
# Service discovery
#

druid.selectors.indexing.serviceName=amp-druid-dev/overlord
druid.selectors.coordinator.serviceName=amp-druid-dev/coordinator

#
# Monitoring
#
druid.emitter=statsd
druid.emitter.statsd.prefix=adi
druid.emitter.statsd.includeHost=true
druid.emitter.logging.logLevel=debug
druid.emitter.statsd.logLevel=debug

# Configure `statsd-emitter` endpoint
druid.emitter.statsd.hostname=127.0.0.1
druid.emitter.statsd.port=8125

# Configure `statsd-emitter` to use dogstatsd format. Must be set to true, otherwise tags are not reported correctly to Datadog.
druid.emitter.statsd.dogstatsd=true
druid.emitter.statsd.dogstatsdServiceAsTag=true

druid.javascript.enabled=true

#
# Security
#
druid.server.hiddenProperties=["druid.s3.accessKey","druid.s3.secretKey","druid.metadata.storage.connector.password", "password", "key", "token", "pwd"]


#
# SQL
#
druid.sql.enable=true

# Planning SQL query when there is aggregate distinct in the statement
druid.sql.planner.useGroupingSetForExactDistinct=true

#
# Lookups
#
druid.lookup.enableLookupSyncOnStartup=false

#
# Expression processing config
#
druid.expressions.useStrictBooleans=true

#
# Http client
#

$ cat query/broker/runtime.properties

# Broker Process Configs

druid.host=amp-druid-dev-query-1001.bdp.com
druid.service=amp-druid-dev/broker
druid.plaintextPort=8082

# Query configuration

## Query Routing

druid.broker.balancer.type=connectionCount
druid.broker.select.tier=highestPriority

## Query prioritization

druid.query.scheduler.numThreads=95
druid.query.scheduler.prioritization.strategy=threshold
druid.query.scheduler.prioritization.periodThreshold=P30D
druid.query.scheduler.prioritization.adjustment=10

## Query laning

druid.query.scheduler.laning.strategy=hilo
druid.query.scheduler.laning.maxLowPercent=25

# Server Configuration

druid.server.http.numThreads=100
druid.server.http.queueSize=100
druid.server.http.maxIdleTime=PT5M
druid.server.http.enableRequestLimit=true
druid.server.http.defaultQueryTimeout=120000
druid.server.http.maxQueryTimeout=150000

## Guardrails for materialization of subqueries

druid.server.http.maxSubqueryRows=2147483647

# druid.server.http.maxSubqueryBytes=auto
# This config is experimental and currently breaks WINDOW_FUNCTIONS

# Client Configuration

druid.broker.http.numConnections=32
druid.broker.http.numMaxThreads=80
druid.broker.http.maxQueuedBytes=500000000

# Retry Policy

druid.broker.retryPolicy.numTries=1

# Processing

druid.processing.buffer.sizeBytes=536870912
druid.processing.numMergeBuffers=10
druid.processing.fifo=true
druid.processing.merge.useParallelMergePool=true
druid.processing.numThreads=31
druid.processing.tmpDir=/tmp/druid/processing

# SQL

druid.sql.enable=true
druid.sql.avatica.maxConnections=100
druid.sql.avatica.maxStatementsPerConnection=5
druid.sql.http.enable=true

# Broker Caching

druid.broker.cache.useCache=false
druid.broker.cache.populateCache=false

# Group by
druid.query.groupBy.maxOnDiskStorage=10737418240

# Monitoring

druid.monitoring.monitors=["org.apache.druid.client.cache.CacheMonitor","org.apache.druid.java.util.metrics.CpuAcctDeltaMonitor","org.apache.druid.java.util.metrics.JvmMonitor","org.apache.druid.java.util.metrics.JvmThreadsMonitor","org.apache.druid.server.metrics.EventReceiverFirehoseMonitor","org.apache.druid.server.metrics.HistoricalMetricsMonitor","org.apache.druid.server.metrics.QueryCountStatsMonitor"]
druid.monitoring.emissionPeriod=PT15S

$ cat query/router/runtime.properties

# Router Process Configs

druid.host=amp-druid-dev-query-1001.bdp.com
druid.service=amp-druid-dev/router
druid.plaintextPort=8088

# Runtime Configuration

druid.router.defaultBrokerServiceName=amp-druid-dev/broker
druid.router.coordinatorServiceName=amp-druid-dev/coordinator
druid.router.tierToBrokerMap={"_default_tier":"amp-druid-dev/broker"}
druid.router.strategy=connectionCount
druid.router.strategy.connectionCount.loadBalancingCoefficient=1
druid.router.strategy.connectionCount.queryCapacityCoefficient=1
druid.router.avatica.balancer.type=consistentHash
druid.router.managementProxy.enabled=true
druid.router.http.numConnections=50
druid.router.http.eagerInitialization=true
druid.router.http.readTimeout=PT2M
druid.router.http.numMaxThreads=100
druid.router.sql.enable=true

# Monitoring

druid.monitoring.monitors=["org.apache.druid.client.cache.CacheMonitor","org.apache.druid.java.util.metrics.CpuAcctDeltaMonitor","org.apache.druid.java.util.metrics.JvmMonitor","org.apache.druid.java.util.metrics.JvmThreadsMonitor","org.apache.druid.server.metrics.EventReceiverFirehoseMonitor","org.apache.druid.server.metrics.QueryCountStatsMonitor"]
druid.monitoring.emissionPeriod=PT15S

$ cat data/historical/runtime.properties

# Historical Process Configuration

druid.host=amp-druid-dev-query-1001.bdp.com
druid.service=amp-druid-dev/historical
druid.plaintextPort=8083

# istorical General Configuration

druid.server.maxSize=3400000000000
druid.server.tier=_default_tier
druid.server.priority=0

# Storing Segments

druid.segmentCache.locations=[{"path":"/opt/druid-segment-cache","maxSize"\:3400000000000}]
druid.segmentCache.locationSelector.strategy=leastBytesUsed

# Historical Query Configs

druid.server.http.numThreads=300
druid.server.http.maxQueryTimeout=120000
druid.server.http.defaultQueryTimeout=120000

# Processing

druid.processing.buffer.sizeBytes=536870912
druid.processing.numMergeBuffers=4
druid.processing.numThreads=15
druid.processing.fifo=true

# Historical Caching

druid.historical.cache.useCache=true
druid.historical.cache.populateCache=true
druid.cache.type=caffeine

# druid.cache.sizeInBytes=20G bytes = 21_474_836_480 bytes; min(1GiB, Runtime.maxMemory / 10)
druid.cache.sizeInBytes=21474836480

# 7 Days
druid.cache.expireAfter=604800000
druid.cache.evictOnClose=true
druid.cache.cacheExecutorFactory=COMMON_FJP

# GroupBy Query Config

# druid.query.groupBy.maxOnDiskStorage=8G bytes = 8_000_000_000 bytes
druid.query.groupBy.maxOnDiskStorage=8000000000

# Monitoring

druid.monitoring.monitors=["org.apache.druid.client.cache.CacheMonitor","org.apache.druid.java.util.metrics.CpuAcctDeltaMonitor","org.apache.druid.java.util.metrics.JvmMonitor","org.apache.druid.java.util.metrics.JvmThreadsMonitor","org.apache.druid.server.metrics.EventReceiverFirehoseMonitor","org.apache.druid.server.metrics.HistoricalMetricsMonitor","org.apache.druid.serve
  • Steps to reproduce the problem

Ingest inline data using spec below.

{
  "type": "index_parallel",
  "spec": {
    "ioConfig": {
      "type": "index_parallel",
      "inputSource": {
        "type": "inline",
        "data": "{\"timestamp\":\"2019-08-25T00:00:00.031Z\",\"agent_type\":\"Browser\",\"lang1\":[\"es\",\"es-419\"],\"lang2\":[\"a\",\"b\"],\"session_length\":3}\n{\"timestamp\":\"2019-08-25T00:00:00.031Z\",\"agent_type\":\"Browser\",\"lang1\":[\"en\",\"en-us\"],\"lang2\":[\"c\",\"d\"],\"session_length\":5}\n{\"timestamp\":\"2019-08-25T00:00:00.031Z\",\"agent_type\":\"TV\",\"lang1\":[\"es\"],\"lang2\":[\"a\"],\"session_length\":7}\n{\"timestamp\":\"2019-08-25T00:00:00.031Z\",\"agent_type\":\"TV\",\"lang1\":[\"es\",\"es-419\"],\"lang2\":[\"a\",\"b\"],\"session_length\":1}"
      },
      "inputFormat": {
        "type": "json"
      }
    },
    "tuningConfig": {
      "type": "index_parallel",
      "partitionsSpec": {
        "type": "dynamic"
      }
    },
    "dataSchema": {
      "dataSource": "mv_debug_sample",
      "timestampSpec": {
        "column": "timestamp",
        "format": "iso"
      },
      "dimensionsSpec": {
        "dimensions": [
          "agent_type",
          "lang1",
          "lang2",
          {
            "type": "long",
            "name": "session_length"
          }
        ]
      },
      "granularitySpec": {
        "queryGranularity": "none",
        "rollup": false,
        "segmentGranularity": "day"
      }
    }
  }
}

Run query below from UI. Removing LIMIT will resolve the error.

WITH a AS (
  SELECT 
    "agent_type",
    CASE "agent_type" WHEN 'Browser' THEN MV_TO_ARRAY("lang1") ELSE MV_TO_ARRAY("lang2") END AS lang,
    "session_length"
  FROM mv_debug_sample
  LIMIT 5
)
SELECT * FROM a CROSS JOIN unnest(lang) AS t(elem)
  • The error message or stack traces encountered. Providing more context, such as nearby log messages or even entire logs, can be helpful.

Error message from UI:

Error: RUNTIME_FAILURE (OPERATOR)
null
java.lang.NullPointerException

Error in the log with debug=true:

024-03-10T00:11:12,891 INFO [sql[a88cf147-28fb-45ae-b692-0bf3f642afa5]] org.apache.druid.sql.calcite.planner.CalciteRulesManager - Start
LogicalSort(fetch=[1001:BIGINT]): rowcount = 5.0, cumulative cost = {inf}, id = 1543612
  LogicalProject(agent_type=[$0], lang=[$1], session_length=[$2], elem=[$3]): rowcount = 5.0, cumulative cost = {inf}, id = 1543606
    LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}]): rowcount = 5.0, cumulative cost = {inf}, id = 1543604
      LogicalSort(fetch=[5]): rowcount = 5.0, cumulative cost = {205.0 rows, 521.0 cpu, 0.0 io}, id = 1543595
        LogicalProject(agent_type=[$1], lang=[CASE(=($1, 'Browser'), MV_TO_ARRAY($2), MV_TO_ARRAY($3))], session_length=[$4]): rowcount = 100.0, cumulative cost = {200.0 rows, 401.0 cpu, 0.0 io}, id = 1543593
          LogicalTableScan(table=[[druid, mv_debug_sample]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io}, id = 1543567
      LogicalProject(elem=[$0]): rowcount = 1.0, cumulative cost = {4.0 rows, 4.0 cpu, 0.0 io}, id = 1543602
        Uncollect: rowcount = 1.0, cumulative cost = {3.0 rows, 3.0 cpu, 0.0 io}, id = 1543600
          LogicalProject(lang=[$cor0.lang]): rowcount = 1.0, cumulative cost = {2.0 rows, 2.0 cpu, 0.0 io}, id = 1543598
            LogicalValues(tuples=[[{ 0 }]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io}, id = 1543570

2024-03-10T00:11:12,892 INFO [sql[a88cf147-28fb-45ae-b692-0bf3f642afa5]] org.apache.druid.sql.calcite.planner.CalciteRulesManager - After PreProgram
LogicalSort(fetch=[1001:BIGINT]): rowcount = 5.0, cumulative cost = {inf}, id = 1543688
  LogicalCorrelate(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}]): rowcount = 5.0, cumulative cost = {inf}, id = 1543686
    LogicalProject(agent_type=[$1], lang=[CASE(=($1, 'Browser'), MV_TO_ARRAY($2), MV_TO_ARRAY($3))], session_length=[$4]): rowcount = 5.0, cumulative cost = {110.0 rows, 276.0 cpu, 0.0 io}, id = 1543693
      LogicalSort(fetch=[5]): rowcount = 5.0, cumulative cost = {105.0 rows, 261.0 cpu, 0.0 io}, id = 1543690
        LogicalTableScan(table=[[druid, mv_debug_sample]]): rowcount = 100.0, cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io}, id = 1543567
    Uncollect: rowcount = 1.0, cumulative cost = {3.0 rows, 3.0 cpu, 0.0 io}, id = 1543684
      LogicalProject(lang=[$cor0.lang]): rowcount = 1.0, cumulative cost = {2.0 rows, 2.0 cpu, 0.0 io}, id = 1543682
        LogicalValues(tuples=[[{ 0 }]]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io}, id = 1543570

2024-03-10T00:11:12,894 INFO [sql[a88cf147-28fb-45ae-b692-0bf3f642afa5]] org.apache.druid.sql.calcite.planner.CalciteRulesManager - After volcano planner program
DruidCorrelateUnnestRel(correlation=[$cor0], joinType=[inner], requiredColumns=[{1}], query=[{"queryType":"scan","dataSource":{"type":"table","name":"__correlate_unnest__"},"intervals":{"type":"intervals","intervals":
["-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"]},"resultFormat":"compactedList","limit":1001,"columns":["$f0","agent_type","lang","session_length"],"legacy":false,"context":{"debug":true,"queryId":"a8
8cf147-28fb-45ae-b692-0bf3f642afa5","sqlOuterLimit":1001,"sqlQueryId":"a88cf147-28fb-45ae-b692-0bf3f642afa5"},"granularity":{"type":"all"}}], signature=[{agent_type:STRING, lang:ARRAY<STRING>, session_length:LONG, $f0
:STRING}]): rowcount = 1.0, cumulative cost = {100002.59999999999 rows, 1.0 cpu, 0.0 io}, id = 1543742
  DruidQueryRel(query=[{"queryType":"scan","dataSource":{"type":"table","name":"mv_debug_sample"},"intervals":{"type":"intervals","intervals":["-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"]},"virtualC
olumns":[{"type":"expression","name":"v0","expression":"case_searched((\"agent_type\" == 'Browser'),mv_to_array(\"lang1\"),mv_to_array(\"lang2\"))","outputType":"ARRAY<STRING>"}],"resultFormat":"compactedList","limit"
:5,"columns":["agent_type","session_length","v0"],"legacy":false,"context":{"debug":true,"queryId":"a88cf147-28fb-45ae-b692-0bf3f642afa5","sqlOuterLimit":1001,"sqlQueryId":"a88cf147-28fb-45ae-b692-0bf3f642afa5"},"granularity":{"type":"all"}}], signature=[{agent_type:STRING, v0:ARRAY<STRING>, session_length:LONG}]): rowcount = 1.0, cumulative cost = {0.9 rows, 0.0 cpu, 0.0 io}, id = 1543727
  DruidUnnestRel(expr=[$cor0.lang], filter=[null]): rowcount = 1.0, cumulative cost = {1.0 rows, 1.0 cpu, 0.0 io}, id = 1543721

024-03-10T00:11:12,894 WARN [sql[a88cf147-28fb-45ae-b692-0bf3f642afa5]] org.apache.druid.sql.http.SqlResource - Exception while processing sqlQueryId[a88cf147-28fb-45ae-b692-0bf3f642afa5]
org.apache.druid.error.DruidException: null
        at org.apache.druid.error.DruidException$DruidExceptionBuilder.build(DruidException.java:460) ~[druid-processing-28.0.1.jar:28.0.1]
        at org.apache.druid.error.QueryExceptionCompat.makeException(QueryExceptionCompat.java:50) ~[druid-processing-28.0.1.jar:28.0.1]
        at org.apache.druid.error.DruidException.fromFailure(DruidException.java:154) ~[druid-processing-28.0.1.jar:28.0.1]
        at org.apache.druid.server.QueryResultPusher.handleQueryException(QueryResultPusher.java:211) ~[druid-server-28.0.1.jar:28.0.1]
        at org.apache.druid.server.QueryResultPusher.push(QueryResultPusher.java:177) ~[druid-server-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.http.SqlResource.doPost(SqlResource.java:121) ~[druid-sql-28.0.1.jar:28.0.1]
        at jdk.internal.reflect.GeneratedMethodAccessor137.invoke(Unknown Source) ~[?:?]
        at jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:?]
        at java.lang.reflect.Method.invoke(Method.java:566) ~[?:?]
        at com.sun.jersey.spi.container.JavaMethodInvokerFactory$1.invoke(JavaMethodInvokerFactory.java:60) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.model.method.dispatch.AbstractResourceMethodDispatchProvider$ResponseOutInvoker._dispatch(AbstractResourceMethodDispatchProvider.java:205) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.model.method.dispatch.ResourceJavaMethodDispatcher.dispatch(ResourceJavaMethodDispatcher.java:75) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.uri.rules.HttpMethodRule.accept(HttpMethodRule.java:302) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.uri.rules.ResourceClassRule.accept(ResourceClassRule.java:108) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.uri.rules.RightHandPathRule.accept(RightHandPathRule.java:147) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.uri.rules.RootResourceClassesRule.accept(RootResourceClassesRule.java:84) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.application.WebApplicationImpl._handleRequest(WebApplicationImpl.java:1542) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.application.WebApplicationImpl._handleRequest(WebApplicationImpl.java:1473) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.application.WebApplicationImpl.handleRequest(WebApplicationImpl.java:1419) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.server.impl.application.WebApplicationImpl.handleRequest(WebApplicationImpl.java:1409) ~[jersey-server-1.19.4.jar:1.19.4]
        at com.sun.jersey.spi.container.servlet.WebComponent.service(WebComponent.java:409) ~[jersey-servlet-1.19.4.jar:1.19.4]
        at com.sun.jersey.spi.container.servlet.ServletContainer.service(ServletContainer.java:558) ~[jersey-servlet-1.19.4.jar:1.19.4]
        at com.sun.jersey.spi.container.servlet.ServletContainer.service(ServletContainer.java:733) ~[jersey-servlet-1.19.4.jar:1.19.4]
        at javax.servlet.http.HttpServlet.service(HttpServlet.java:790) ~[javax.servlet-api-3.1.0.jar:3.1.0]
        at com.google.inject.servlet.ServletDefinition.doServiceImpl(ServletDefinition.java:286) ~[guice-servlet-4.1.0.jar:?]
        at com.google.inject.servlet.ServletDefinition.doService(ServletDefinition.java:276) ~[guice-servlet-4.1.0.jar:?]
        at com.google.inject.servlet.ServletDefinition.service(ServletDefinition.java:181) ~[guice-servlet-4.1.0.jar:?]
        at com.google.inject.servlet.ManagedServletPipeline.service(ManagedServletPipeline.java:91) ~[guice-servlet-4.1.0.jar:?]
        at com.google.inject.servlet.FilterChainInvocation.doFilter(FilterChainInvocation.java:85) ~[guice-servlet-4.1.0.jar:?]
        at com.google.inject.servlet.ManagedFilterPipeline.dispatch(ManagedFilterPipeline.java:120) ~[guice-servlet-4.1.0.jar:?]
        at com.google.inject.servlet.GuiceFilter.doFilter(GuiceFilter.java:135) ~[guice-servlet-4.1.0.jar:?]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.security.PreResponseAuthorizationCheckFilter.doFilter(PreResponseAuthorizationCheckFilter.java:84) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.initialization.jetty.StandardResponseHeaderFilterHolder$StandardResponseHeaderFilter.doFilter(StandardResponseHeaderFilterHolder.java:164) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.security.AllowHttpMethodsResourceFilter.doFilter(AllowHttpMethodsResourceFilter.java:78) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.security.AllowOptionsResourceFilter.doFilter(AllowOptionsResourceFilter.java:74) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.security.AllowAllAuthenticator$1.doFilter(AllowAllAuthenticator.java:84) ~[druid-server-28.0.1.jar:28.0.1]
        at org.apache.druid.server.security.AuthenticationWrappingFilter.doFilter(AuthenticationWrappingFilter.java:59) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.security.AllowAllAuthenticator$1.doFilter(AllowAllAuthenticator.java:84) ~[druid-server-28.0.1.jar:28.0.1]
        at org.apache.druid.server.security.AuthenticationWrappingFilter.doFilter(AuthenticationWrappingFilter.java:59) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.security.SecuritySanityCheckFilter.doFilter(SecuritySanityCheckFilter.java:77) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.apache.druid.server.initialization.jetty.LimitRequestsFilter.doFilter(LimitRequestsFilter.java:64) ~[druid-server-28.0.1.jar:28.0.1]
        at org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:552) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:233) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1624) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:233) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1440) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:188) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:505) ~[jetty-servlet-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1594) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:186) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1355) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:772) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.HandlerList.handle(HandlerList.java:59) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.StatisticsHandler.handle(StatisticsHandler.java:181) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.Server.handle(Server.java:516) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:487) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:732) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:479) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277) ~[jetty-server-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311) ~[jetty-io-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105) ~[jetty-io-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104) ~[jetty-io-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:338) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:315) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:409) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034) ~[jetty-util-9.4.53.v20231009.jar:9.4.53.v20231009]
        at java.lang.Thread.run(Thread.java:829) ~[?:?]
Caused by: org.apache.druid.query.QueryInterruptedException
        ... 82 more
Caused by: java.lang.NullPointerException
        at org.apache.druid.sql.calcite.expression.DruidExpression.getDirectColumn(DruidExpression.java:356) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.calcite.rel.DruidCorrelateUnnestRel.toDruidQuery(DruidCorrelateUnnestRel.java:212) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.calcite.rel.DruidRel.runQuery(DruidRel.java:55) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.calcite.planner.QueryHandler.lambda$planWithDruidConvention$6(QueryHandler.java:621) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.calcite.planner.PlannerResult.run(PlannerResult.java:62) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.DirectStatement$ResultSet.run(DirectStatement.java:109) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.sql.http.SqlResource$SqlResourceQueryResultPusher$1.start(SqlResource.java:249) ~[druid-sql-28.0.1.jar:28.0.1]
        at org.apache.druid.server.QueryResultPusher.push(QueryResultPusher.java:121) ~[druid-server-28.0.1.jar:28.0.1]
        ... 81 more
  • Any debugging that you have already done

If I do not use CASE, the array column is null, as showing in query below.

WITH a AS (
  SELECT 
    "agent_type",
    MV_TO_ARRAY("lang1") as lang,
    "session_length"
  FROM mv_debug_sample
  LIMIT 5
)
SELECT * FROM a CROSS JOIN unnest(lang) AS t(elem)

Result:

agent_type	lang	session_length	elem
Browser	null	5	en
Browser	null	5	en-us
Browser	null	3	es
Browser	null	3	es-419
TV	null	7	es
TV	null	1	es
TV	null	1	es-419

This only occurs with LIMIT in subquery, and with UNNEST.
The explained native query is below, which misses mv_to_array as virtual column. So the root cause is native query translation.

{
  "queryType": "scan",
  "dataSource": {
    "type": "unnest",
    "base": {
      "type": "query",
      "query": {
        "queryType": "scan",
        "dataSource": {
          "type": "table",
          "name": "mv_debug_sample"
        },
        "intervals": {
          "type": "intervals",
          "intervals": [
            "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
          ]
        },
        "resultFormat": "compactedList",
        "limit": 5,
        "columns": [
          "agent_type",
          "lang1",
          "session_length"
        ],
        "legacy": false,
        "context": {
          "debug": true,
          "queryId": "305f871b-941b-4102-b337-8e71c29a14a3",
          "sqlOuterLimit": 1001,
          "sqlQueryId": "305f871b-941b-4102-b337-8e71c29a14a3",
          "useNativeQueryExplain": true
        },
        "granularity": {
          "type": "all"
        }
      }
    },
    "virtualColumn": {
      "type": "expression",
      "name": "j0.unnest",
      "expression": "\"lang1\"",
      "outputType": "STRING"
    },
    "unnestFilter": null
  },
  "intervals": {
    "type": "intervals",
    "intervals": [
      "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
    ]
  },
  "resultFormat": "compactedList",
  "limit": 1001,
  "columns": [
    "agent_type",
    "j0.unnest",
    "session_length",
    "v0"
  ],
  "legacy": false,
  "context": {
    "debug": true,
    "queryId": "305f871b-941b-4102-b337-8e71c29a14a3",
    "sqlOuterLimit": 1001,
    "sqlQueryId": "305f871b-941b-4102-b337-8e71c29a14a3",
    "useNativeQueryExplain": true
  },
  "granularity": {
    "type": "all"
  }
}

If I add virtual column with mv_to_array, then it works as expected as below.

{
  "queryType": "scan",
  "dataSource": {
    "type": "unnest",
    "base": {
      "type": "query",
      "query": {
        "queryType": "scan",
        "dataSource": {
          "type": "table",
          "name": "mv_debug_sample"
        },
        "intervals": {
          "type": "intervals",
          "intervals": [
            "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
          ]
        },
        "virtualColumns": [
          {
            "type": "expression",
            "name": "v0",
            "expression": "mv_to_array(\"lang1\")",
            "outputType": "ARRAY<STRING>"
          }
        ],
        "resultFormat": "compactedList",
        "limit": 5,
        "columns": [
          "agent_type",
          "v0",
          "session_length"
        ],
        "legacy": false,
        "context": {
          "debug": true,
          "queryId": "305f871b-941b-4102-b337-8e71c29a14a3",
          "sqlOuterLimit": 1001,
          "sqlQueryId": "305f871b-941b-4102-b337-8e71c29a14a3",
          "useNativeQueryExplain": true
        },
        "granularity": {
          "type": "all"
        }
      }
    },
    "virtualColumn": {
      "type": "expression",
      "name": "j0.unnest",
      "expression": "\"v0\"",
      "outputType": "STRING"
    },
    "unnestFilter": null
  },
  "intervals": {
    "type": "intervals",
    "intervals": [
      "-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z"
    ]
  },
  "resultFormat": "compactedList",
  "limit": 1001,
  "columns": [
    "agent_type",
    "j0.unnest",
    "session_length",
    "v0"
  ],
  "legacy": false,
  "context": {
    "debug": true,
    "queryId": "305f871b-941b-4102-b337-8e71c29a14a3",
    "sqlOuterLimit": 1001,
    "sqlQueryId": "305f871b-941b-4102-b337-8e71c29a14a3",
    "useNativeQueryExplain": true
  },
  "granularity": {
    "type": "all"
  }
}
@somu-imply
Copy link
Contributor

Created #15891 to address this type of issue

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants