From f16a81dd2225a32f01e5d5b4cb268bb6d3fa253f Mon Sep 17 00:00:00 2001 From: Wei He Date: Wed, 21 Dec 2022 10:31:32 -0800 Subject: [PATCH] Allow function signature templates to be used at top-level expression in fuzzer (#3540) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/3540 The signatures of some functions contain type variables such as "T". When generating a random expression, fuzzer currently choose the top-level function only from function signatures that do not contain type variable. This is a limitation because sometimes we would like to test only a function that have type variables in its signature. This diff allows choosing function signatures that contain type variables as the top-level function. It also add an API ArgumentTypeFuzzer::fuzzReturnType() to return a concrete type that can bind to the formal return type of a function signature. This diff fixes the issue https://github.com/facebookincubator/velox/issues/3533. Reviewed By: bikramSingh91 Differential Revision: D42116543 fbshipit-source-id: 0a01926e044e1e1b7e87f6093136dbc7fe12d235 --- velox/exec/Aggregate.cpp | 4 +- velox/exec/WindowFunction.cpp | 4 +- velox/expression/FunctionRegistry.h | 4 +- velox/expression/FunctionSignature.cpp | 2 +- velox/expression/FunctionSignature.h | 2 +- velox/expression/VectorFunction.cpp | 6 +- velox/expression/tests/ArgumentTypeFuzzer.cpp | 17 ++++++ velox/expression/tests/ArgumentTypeFuzzer.h | 7 ++- velox/expression/tests/ExpressionFuzzer.cpp | 60 +++++++++++++------ 9 files changed, 76 insertions(+), 30 deletions(-) diff --git a/velox/exec/Aggregate.cpp b/velox/exec/Aggregate.cpp index 3534f2dfa764..46699d5f4611 100644 --- a/velox/exec/Aggregate.cpp +++ b/velox/exec/Aggregate.cpp @@ -39,7 +39,7 @@ AggregateFunctionMap& aggregateFunctions() { namespace { std::optional getAggregateFunctionEntry( const std::string& name) { - auto sanitizedName = sanitizeFunctionName(name); + auto sanitizedName = sanitizeName(name); auto& functionsMap = aggregateFunctions(); auto it = functionsMap.find(sanitizedName); @@ -55,7 +55,7 @@ bool registerAggregateFunction( const std::string& name, std::vector> signatures, AggregateFunctionFactory factory) { - auto sanitizedName = sanitizeFunctionName(name); + auto sanitizedName = sanitizeName(name); aggregateFunctions()[sanitizedName] = { std::move(signatures), std::move(factory)}; diff --git a/velox/exec/WindowFunction.cpp b/velox/exec/WindowFunction.cpp index 1d97281293b4..fc48442d0097 100644 --- a/velox/exec/WindowFunction.cpp +++ b/velox/exec/WindowFunction.cpp @@ -41,7 +41,7 @@ bool registerWindowFunction( const std::string& name, std::vector signatures, WindowFunctionFactory factory) { - auto sanitizedName = sanitizeFunctionName(name); + auto sanitizedName = sanitizeName(name); windowFunctions()[sanitizedName] = { std::move(signatures), std::move(factory)}; return true; @@ -49,7 +49,7 @@ bool registerWindowFunction( std::optional> getWindowFunctionSignatures( const std::string& name) { - auto sanitizedName = sanitizeFunctionName(name); + auto sanitizedName = sanitizeName(name); if (auto func = getWindowFunctionEntry(sanitizedName)) { return func.value()->signatures; } diff --git a/velox/expression/FunctionRegistry.h b/velox/expression/FunctionRegistry.h index 70c94834424c..3730f955eae5 100644 --- a/velox/expression/FunctionRegistry.h +++ b/velox/expression/FunctionRegistry.h @@ -133,7 +133,7 @@ class FunctionRegistry { const std::shared_ptr& metadata, const typename FunctionEntry::FunctionFactory& factory) { - const auto sanitizedName = sanitizeFunctionName(name); + const auto sanitizedName = sanitizeName(name); SignatureMap& signatureMap = registeredFunctions_[sanitizedName]; signatureMap[*metadata->signature()] = std::make_unique>( @@ -141,7 +141,7 @@ class FunctionRegistry { } const SignatureMap* getSignatureMap(const std::string& name) const { - const auto sanitizedName = sanitizeFunctionName(name); + const auto sanitizedName = sanitizeName(name); const auto it = registeredFunctions_.find(sanitizedName); return it != registeredFunctions_.end() ? &it->second : nullptr; } diff --git a/velox/expression/FunctionSignature.cpp b/velox/expression/FunctionSignature.cpp index cdc9384350df..2c6e4be740c3 100644 --- a/velox/expression/FunctionSignature.cpp +++ b/velox/expression/FunctionSignature.cpp @@ -22,7 +22,7 @@ namespace facebook::velox::exec { -std::string sanitizeFunctionName(const std::string& name) { +std::string sanitizeName(const std::string& name) { std::string sanitizedName; sanitizedName.resize(name.size()); std::transform( diff --git a/velox/expression/FunctionSignature.h b/velox/expression/FunctionSignature.h index d55f39e1c4d5..f54b373cd1fd 100644 --- a/velox/expression/FunctionSignature.h +++ b/velox/expression/FunctionSignature.h @@ -25,7 +25,7 @@ namespace facebook::velox::exec { -std::string sanitizeFunctionName(const std::string& name); +std::string sanitizeName(const std::string& name); inline bool isCommonDecimalName(const std::string& typeName) { return (typeName == "DECIMAL"); diff --git a/velox/expression/VectorFunction.cpp b/velox/expression/VectorFunction.cpp index 28837171e0ca..cb9b7b899e14 100644 --- a/velox/expression/VectorFunction.cpp +++ b/velox/expression/VectorFunction.cpp @@ -28,7 +28,7 @@ VectorFunctionMap& vectorFunctionFactories() { std::optional> getVectorFunctionSignatures( const std::string& name) { - auto sanitizedName = sanitizeFunctionName(name); + auto sanitizedName = sanitizeName(name); return vectorFunctionFactories() .withRLock([&sanitizedName](auto& functions) -> auto { @@ -58,7 +58,7 @@ std::shared_ptr getVectorFunction( const std::string& name, const std::vector& inputTypes, const std::vector& constantInputs) { - auto sanitizedName = sanitizeFunctionName(name); + auto sanitizedName = sanitizeName(name); if (!constantInputs.empty()) { VELOX_CHECK_EQ(inputTypes.size(), constantInputs.size()); @@ -96,7 +96,7 @@ bool registerStatefulVectorFunction( VectorFunctionFactory factory, VectorFunctionMetadata metadata, bool overwrite) { - auto sanitizedName = sanitizeFunctionName(name); + auto sanitizedName = sanitizeName(name); if (overwrite) { vectorFunctionFactories().withWLock([&](auto& functionMap) { diff --git a/velox/expression/tests/ArgumentTypeFuzzer.cpp b/velox/expression/tests/ArgumentTypeFuzzer.cpp index c74d24daa89f..eedc0d927c71 100644 --- a/velox/expression/tests/ArgumentTypeFuzzer.cpp +++ b/velox/expression/tests/ArgumentTypeFuzzer.cpp @@ -119,4 +119,21 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) { return true; } +TypePtr ArgumentTypeFuzzer::fuzzReturnType() { + VELOX_CHECK_EQ( + returnType_, + nullptr, + "Only fuzzing uninitialized return type is allowed."); + + determineUnboundedTypeVariables(); + if (signature_.returnType().baseName() == "any") { + return randomType(rng_); + } else { + auto actualType = exec::SignatureBinder::tryResolveType( + signature_.returnType(), variables(), bindings_); + VELOX_CHECK_NE(actualType, nullptr); + return actualType; + } +} + } // namespace facebook::velox::test diff --git a/velox/expression/tests/ArgumentTypeFuzzer.h b/velox/expression/tests/ArgumentTypeFuzzer.h index 2b631e72a816..657dea5a6d94 100644 --- a/velox/expression/tests/ArgumentTypeFuzzer.h +++ b/velox/expression/tests/ArgumentTypeFuzzer.h @@ -27,7 +27,8 @@ namespace facebook::velox::test { /// For function signatures using type variables, generates a list of /// arguments types. Optionally, allows to specify a desired return type. If /// specified, the return type acts as a constraint on the possible set of -/// argument types. +/// argument types. If no return type is specified, it also allows generate a +/// random type that can bind to the function's return type. class ArgumentTypeFuzzer { public: ArgumentTypeFuzzer( @@ -53,6 +54,10 @@ class ArgumentTypeFuzzer { return argumentTypes_; } + /// Return a random type that can bind to the function signature's return + /// type. This is only allowed when returnType_ is not initialized. + TypePtr fuzzReturnType(); + private: /// Return the variables in the signature. auto& variables() const { diff --git a/velox/expression/tests/ExpressionFuzzer.cpp b/velox/expression/tests/ExpressionFuzzer.cpp index 25d46225fd65..5d50fe029dbb 100644 --- a/velox/expression/tests/ExpressionFuzzer.cpp +++ b/velox/expression/tests/ExpressionFuzzer.cpp @@ -231,11 +231,13 @@ std::optional processSignature( return std::nullopt; } -// Determine whether type is or contains typeName. +// Determine whether type is or contains typeName. typeName should be in lower +// case. bool containTypeName( const exec::TypeSignature& type, const std::string& typeName) { - if (type.baseName() == typeName) { + auto sanitizedTypeName = exec::sanitizeName(type.baseName()); + if (sanitizedTypeName == typeName) { return true; } for (const auto& parameter : type.parameters()) { @@ -247,7 +249,7 @@ bool containTypeName( } // Determine whether the signature has an argument or return type that contains -// typeName. +// typeName. typeName should be in lower case. bool useTypeName( const exec::FunctionSignature& signature, const std::string& typeName) { @@ -262,6 +264,20 @@ bool useTypeName( return false; } +bool isSupportedSignature(const exec::FunctionSignature& signature) { + // Not supporting lambda functions, or functions using decimal and + // timestamp with time zone types. + return !( + useTypeName(signature, "function") || + useTypeName(signature, "long_decimal") || + useTypeName(signature, "short_decimal") || + useTypeName(signature, "decimal") || + useTypeName(signature, "timestamp with time zone") || + useTypeName(signature, "interval day to second") || + (FLAGS_velox_fuzzer_enable_complex_types && + useTypeName(signature, "unknown"))); +} + // Randomly pick columns from the input row vector to wrap in lazy. std::vector generateLazyColumnIds( const RowVectorPtr& rowVector, @@ -304,16 +320,7 @@ ExpressionFuzzer::ExpressionFuzzer( for (const auto& signature : function.second) { ++totalFunctionSignatures; - // Not supporting lambda functions, or functions using decimal and - // timestamp with time zone types. - if (useTypeName(*signature, "function") || - useTypeName(*signature, "long_decimal") || - useTypeName(*signature, "short_decimal") || - useTypeName(*signature, "decimal") || - useTypeName(*signature, "timestamp with time zone") || - useTypeName(*signature, "interval day to second") || - (FLAGS_velox_fuzzer_enable_complex_types && - useTypeName(*signature, "unknown"))) { + if (!isSupportedSignature(*signature)) { continue; } @@ -775,7 +782,6 @@ void ExpressionFuzzer::reset() { } void ExpressionFuzzer::go() { - VELOX_CHECK(!signatures_.empty(), "No function signatures available."); VELOX_CHECK( FLAGS_steps > 0 || FLAGS_duration_sec > 0, "Either --steps or --duration_sec needs to be greater than zero.") @@ -788,10 +794,28 @@ void ExpressionFuzzer::go() { << " (seed: " << currentSeed_ << ")"; reset(); - // Pick a random signature to choose the root return type. - size_t idx = boost::random::uniform_int_distribution( - 0, signatures_.size() - 1)(rng_); - const auto& rootType = signatures_[idx].returnType; + auto chooseFromConcreteSignatures = + boost::random::uniform_int_distribution(0, 1)(rng_); + chooseFromConcreteSignatures = + (chooseFromConcreteSignatures && !signatures_.empty()) || + (!chooseFromConcreteSignatures && signatureTemplates_.empty()); + TypePtr rootType; + if (chooseFromConcreteSignatures) { + // Pick a random signature to choose the root return type. + VELOX_CHECK(!signatures_.empty(), "No function signature available."); + size_t idx = boost::random::uniform_int_distribution( + 0, signatures_.size() - 1)(rng_); + rootType = signatures_[idx].returnType; + } else { + // Pick a random concrete return type that can bind to the return type of + // a chosen signature. + VELOX_CHECK( + !signatureTemplates_.empty(), "No function signature available."); + size_t idx = boost::random::uniform_int_distribution( + 0, signatureTemplates_.size() - 1)(rng_); + ArgumentTypeFuzzer typeFuzzer{*signatureTemplates_[idx].signature, rng_}; + rootType = typeFuzzer.fuzzReturnType(); + } // Generate expression tree and input data vectors. auto plan = generateExpression(rootType);