Skip to content

Commit

Permalink
Allow function signature templates to be used at top-level expression…
Browse files Browse the repository at this point in the history
… in fuzzer (facebookincubator#3540)

Summary:
Pull Request resolved: facebookincubator#3540

The signatures of some functions contain type variables such as "T". When generating
a random expression, fuzzer currently choose the top-level function only from function
signatures that do not contain type variable. This is a limitation because sometimes we
would like to test only a function that have type variables in its signature.

This diff allows choosing function signatures that contain type variables as the top-level
function. It also add an API  ArgumentTypeFuzzer::fuzzReturnType() to return a concrete
type that can bind to the formal return type of a function signature.

This diff fixes the issue facebookincubator#3533.

Differential Revision: https://internalfb.com/D42116543

fbshipit-source-id: fac259688cd1dc2fd33317965471634cb7c85962
  • Loading branch information
kagamiori authored and facebook-github-bot committed Dec 21, 2022
1 parent 882cba8 commit d762e43
Show file tree
Hide file tree
Showing 10 changed files with 77 additions and 30 deletions.
4 changes: 2 additions & 2 deletions velox/exec/Aggregate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ AggregateFunctionMap& aggregateFunctions() {
namespace {
std::optional<const AggregateFunctionEntry*> getAggregateFunctionEntry(
const std::string& name) {
auto sanitizedName = sanitizeFunctionName(name);
auto sanitizedName = sanitizeName(name);

auto& functionsMap = aggregateFunctions();
auto it = functionsMap.find(sanitizedName);
Expand All @@ -55,7 +55,7 @@ bool registerAggregateFunction(
const std::string& name,
std::vector<std::shared_ptr<AggregateFunctionSignature>> signatures,
AggregateFunctionFactory factory) {
auto sanitizedName = sanitizeFunctionName(name);
auto sanitizedName = sanitizeName(name);

aggregateFunctions()[sanitizedName] = {
std::move(signatures), std::move(factory)};
Expand Down
4 changes: 2 additions & 2 deletions velox/exec/WindowFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ bool registerWindowFunction(
const std::string& name,
std::vector<FunctionSignaturePtr> signatures,
WindowFunctionFactory factory) {
auto sanitizedName = sanitizeFunctionName(name);
auto sanitizedName = sanitizeName(name);
windowFunctions()[sanitizedName] = {
std::move(signatures), std::move(factory)};
return true;
}

std::optional<std::vector<FunctionSignaturePtr>> getWindowFunctionSignatures(
const std::string& name) {
auto sanitizedName = sanitizeFunctionName(name);
auto sanitizedName = sanitizeName(name);
if (auto func = getWindowFunctionEntry(sanitizedName)) {
return func.value()->signatures;
}
Expand Down
4 changes: 2 additions & 2 deletions velox/expression/FunctionRegistry.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,15 +133,15 @@ class FunctionRegistry {
const std::shared_ptr<const Metadata>& metadata,
const typename FunctionEntry<Function, Metadata>::FunctionFactory&
factory) {
const auto sanitizedName = sanitizeFunctionName(name);
const auto sanitizedName = sanitizeName(name);
SignatureMap& signatureMap = registeredFunctions_[sanitizedName];
signatureMap[*metadata->signature()] =
std::make_unique<const FunctionEntry<Function, Metadata>>(
metadata, factory);
}

const SignatureMap* getSignatureMap(const std::string& name) const {
const auto sanitizedName = sanitizeFunctionName(name);
const auto sanitizedName = sanitizeName(name);
const auto it = registeredFunctions_.find(sanitizedName);
return it != registeredFunctions_.end() ? &it->second : nullptr;
}
Expand Down
2 changes: 1 addition & 1 deletion velox/expression/FunctionSignature.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

namespace facebook::velox::exec {

std::string sanitizeFunctionName(const std::string& name) {
std::string sanitizeName(const std::string& name) {
std::string sanitizedName;
sanitizedName.resize(name.size());
std::transform(
Expand Down
2 changes: 1 addition & 1 deletion velox/expression/FunctionSignature.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

namespace facebook::velox::exec {

std::string sanitizeFunctionName(const std::string& name);
std::string sanitizeName(const std::string& name);

inline bool isCommonDecimalName(const std::string& typeName) {
return (typeName == "DECIMAL");
Expand Down
6 changes: 3 additions & 3 deletions velox/expression/VectorFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ VectorFunctionMap& vectorFunctionFactories() {

std::optional<std::vector<FunctionSignaturePtr>> getVectorFunctionSignatures(
const std::string& name) {
auto sanitizedName = sanitizeFunctionName(name);
auto sanitizedName = sanitizeName(name);

return vectorFunctionFactories()
.withRLock([&sanitizedName](auto& functions) -> auto {
Expand Down Expand Up @@ -58,7 +58,7 @@ std::shared_ptr<VectorFunction> getVectorFunction(
const std::string& name,
const std::vector<TypePtr>& inputTypes,
const std::vector<VectorPtr>& constantInputs) {
auto sanitizedName = sanitizeFunctionName(name);
auto sanitizedName = sanitizeName(name);

if (!constantInputs.empty()) {
VELOX_CHECK_EQ(inputTypes.size(), constantInputs.size());
Expand Down Expand Up @@ -96,7 +96,7 @@ bool registerStatefulVectorFunction(
VectorFunctionFactory factory,
VectorFunctionMetadata metadata,
bool overwrite) {
auto sanitizedName = sanitizeFunctionName(name);
auto sanitizedName = sanitizeName(name);

if (overwrite) {
vectorFunctionFactories().withWLock([&](auto& functionMap) {
Expand Down
17 changes: 17 additions & 0 deletions velox/expression/tests/ArgumentTypeFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,4 +119,21 @@ bool ArgumentTypeFuzzer::fuzzArgumentTypes(uint32_t maxVariadicArgs) {
return true;
}

TypePtr ArgumentTypeFuzzer::fuzzReturnType() {
VELOX_CHECK_EQ(
returnType_,
nullptr,
"Only fuzzing uninitialized return type is allowed.");

determineUnboundedTypeVariables();
if (signature_.returnType().baseName() == "any") {
return randomType(rng_);
} else {
auto actualType = exec::SignatureBinder::tryResolveType(
signature_.returnType(), variables(), bindings_);
VELOX_CHECK_NE(actualType, nullptr);
return actualType;
}
}

} // namespace facebook::velox::test
7 changes: 6 additions & 1 deletion velox/expression/tests/ArgumentTypeFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ namespace facebook::velox::test {
/// For function signatures using type variables, generates a list of
/// arguments types. Optionally, allows to specify a desired return type. If
/// specified, the return type acts as a constraint on the possible set of
/// argument types.
/// argument types. If no return type is specified, it also allows generate a
/// random type that can bind to the function's return type.
class ArgumentTypeFuzzer {
public:
ArgumentTypeFuzzer(
Expand All @@ -53,6 +54,10 @@ class ArgumentTypeFuzzer {
return argumentTypes_;
}

/// Return a random type that can bind to the function signature's return
/// type. This is only allowed when returnType_ is not initialized.
TypePtr fuzzReturnType();

private:
/// Return the variables in the signature.
auto& variables() const {
Expand Down
60 changes: 42 additions & 18 deletions velox/expression/tests/ExpressionFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,13 @@ std::optional<CallableSignature> processSignature(
return std::nullopt;
}

// Determine whether type is or contains typeName.
// Determine whether type is or contains typeName. typeName should be in lower
// case.
bool containTypeName(
const exec::TypeSignature& type,
const std::string& typeName) {
if (type.baseName() == typeName) {
auto sanitizedTypeName = exec::sanitizeName(type.baseName());
if (sanitizedTypeName == typeName) {
return true;
}
for (const auto& parameter : type.parameters()) {
Expand All @@ -247,7 +249,7 @@ bool containTypeName(
}

// Determine whether the signature has an argument or return type that contains
// typeName.
// typeName. typeName should be in lower case.
bool useTypeName(
const exec::FunctionSignature& signature,
const std::string& typeName) {
Expand All @@ -262,6 +264,20 @@ bool useTypeName(
return false;
}

bool isSupportedSignature(const exec::FunctionSignature& signature) {
// Not supporting lambda functions, or functions using decimal and
// timestamp with time zone types.
return !(
useTypeName(signature, "function") ||
useTypeName(signature, "long_decimal") ||
useTypeName(signature, "short_decimal") ||
useTypeName(signature, "decimal") ||
useTypeName(signature, "timestamp with time zone") ||
useTypeName(signature, "interval day to second") ||
(FLAGS_velox_fuzzer_enable_complex_types &&
useTypeName(signature, "unknown")));
}

// Randomly pick columns from the input row vector to wrap in lazy.
std::vector<column_index_t> generateLazyColumnIds(
const RowVectorPtr& rowVector,
Expand Down Expand Up @@ -304,16 +320,7 @@ ExpressionFuzzer::ExpressionFuzzer(
for (const auto& signature : function.second) {
++totalFunctionSignatures;

// Not supporting lambda functions, or functions using decimal and
// timestamp with time zone types.
if (useTypeName(*signature, "function") ||
useTypeName(*signature, "long_decimal") ||
useTypeName(*signature, "short_decimal") ||
useTypeName(*signature, "decimal") ||
useTypeName(*signature, "timestamp with time zone") ||
useTypeName(*signature, "interval day to second") ||
(FLAGS_velox_fuzzer_enable_complex_types &&
useTypeName(*signature, "unknown"))) {
if (!isSupportedSignature(*signature)) {
continue;
}

Expand Down Expand Up @@ -775,7 +782,6 @@ void ExpressionFuzzer::reset() {
}

void ExpressionFuzzer::go() {
VELOX_CHECK(!signatures_.empty(), "No function signatures available.");
VELOX_CHECK(
FLAGS_steps > 0 || FLAGS_duration_sec > 0,
"Either --steps or --duration_sec needs to be greater than zero.")
Expand All @@ -788,10 +794,28 @@ void ExpressionFuzzer::go() {
<< " (seed: " << currentSeed_ << ")";
reset();

// Pick a random signature to choose the root return type.
size_t idx = boost::random::uniform_int_distribution<uint32_t>(
0, signatures_.size() - 1)(rng_);
const auto& rootType = signatures_[idx].returnType;
auto chooseFromConcreteSignatures =
boost::random::uniform_int_distribution<uint32_t>(0, 1)(rng_);
chooseFromConcreteSignatures =
(chooseFromConcreteSignatures && !signatures_.empty()) ||
(!chooseFromConcreteSignatures && signatureTemplates_.empty());
TypePtr rootType;
if (chooseFromConcreteSignatures) {
// Pick a random signature to choose the root return type.
VELOX_CHECK(!signatures_.empty(), "No function signature available.");
size_t idx = boost::random::uniform_int_distribution<uint32_t>(
0, signatures_.size() - 1)(rng_);
rootType = signatures_[idx].returnType;
} else {
// Pick a random concrete return type that can bind to the return type of
// a chosen signature.
VELOX_CHECK(
!signatureTemplates_.empty(), "No function signature available.");
size_t idx = boost::random::uniform_int_distribution<uint32_t>(
0, signatureTemplates_.size() - 1)(rng_);
ArgumentTypeFuzzer typeFuzzer{*signatureTemplates_[idx].signature, rng_};
rootType = typeFuzzer.fuzzReturnType();
}

// Generate expression tree and input data vectors.
auto plan = generateExpression(rootType);
Expand Down
1 change: 1 addition & 0 deletions velox/expression/tests/ExpressionFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ int main(int argc, char** argv) {
"element_at",
"width_bucket",
"array_intersect",
"zip", // https://github.com/facebookincubator/velox/issues/3542
};
size_t initialSeed = FLAGS_seed == 0 ? std::time(nullptr) : FLAGS_seed;
return FuzzerRunner::run(
Expand Down

0 comments on commit d762e43

Please sign in to comment.