Skip to content

Commit

Permalink
map_from_arrays supports throw exception on duplicates
Browse files Browse the repository at this point in the history
  • Loading branch information
WangGuangxin committed Oct 9, 2024
1 parent acd5717 commit 18c74b8
Show file tree
Hide file tree
Showing 13 changed files with 435 additions and 363 deletions.
24 changes: 24 additions & 0 deletions velox/core/QueryConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@

namespace facebook::velox::core {

enum class SparkMapKeyDedupPolicy {
LAST_WIN,
EXCEPTION
};

/// A simple wrapper around velox::ConfigBase. Defines constants for query
/// config properties and accessor methods.
/// Create per query context. Does not have a singleton instance.
Expand Down Expand Up @@ -416,6 +421,13 @@ class QueryConfig {
bool debugDisableExpressionsWithLazyInputs() const {
return get<bool>(kDebugDisableExpressionWithLazyInputs, false);
}

/// The policy to deduplicate map keys in Spark builtin functions: map,
/// map_from_arrays, map_from_entries, str_to_map, map_concat etc.
/// When set to EXCEPTION, the query fails if duplicated map keys are detected.
/// When set to LAST_WIN, the map key that is inserted at last takes precedence.
static constexpr const char* kSparkMapKeyDedupPolicy =
"spark.map_key_dedup_policy";

uint64_t queryMaxMemoryPerNode() const {
return config::toCapacity(
Expand Down Expand Up @@ -815,6 +827,18 @@ class QueryConfig {
return get<int32_t>(kPrefixSortMinRows, 130);
}

SparkMapKeyDedupPolicy sparkMapKeyDedupPolicy() const {
std::string policy = get<std::string>(kSparkMapKeyDedupPolicy, "EXCEPTION");
std::transform(policy.begin(), policy.end(), policy.begin(), ::toupper);
if (policy == "LAST_WIN") {
return SparkMapKeyDedupPolicy::LAST_WIN;
} else if (policy == "EXCEPTION") {
return SparkMapKeyDedupPolicy::EXCEPTION;
} else {
VELOX_FAIL("Unknown mapKeyDedupPolicy: {}", policy);
}
}

template <typename T>
T get(const std::string& key, const T& defaultValue) const {
return config_->get<T>(key, defaultValue);
Expand Down
3 changes: 2 additions & 1 deletion velox/docs/functions/spark/map.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ Map Functions
.. spark:function:: map_from_arrays(array(K), array(V)) -> map(K,V)
Creates a map with a pair of the given key/value arrays. All elements in keys should not be null.
If key size != value size will throw exception that key and value must have the same length.::
If key size != value size will throw exception that key and value must have the same length.
The policy to deduplicate map keys is following the config `kSparkMapKeyDedupPolicy`. ::

SELECT map_from_arrays(array(1.0, 3.0), array('2', '4')); -- {1.0 -> 2, 3.0 -> 4}

Expand Down
Loading

0 comments on commit 18c74b8

Please sign in to comment.