Skip to content

Commit

Permalink
Improve infering start and stop arguments from gapfill query
Browse files Browse the repository at this point in the history
Infering start and stop parameter used to only work for top
level constraints. However even when constraints are at the
toplevel in the query they might not end up at the top-level
of the jointree depending on the plan shape. This patch
changes the gapfill code to traverse the jointree to find
valid start and stop arguments.
  • Loading branch information
svenklemm committed Jul 8, 2020
1 parent fa223ae commit c850fc6
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 11 deletions.
107 changes: 102 additions & 5 deletions tsl/src/nodes/gapfill/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,99 @@ get_boundary_expr_value(GapFillState *state, GapFillBoundary boundary, Expr *exp
return gapfill_datum_get_internal(arg_value, state->gapfill_typid);
}

typedef struct collect_boundary_context
{
List *quals;
Var *ts_var;
} collect_boundary_context;

/*
* expression references our gapfill time column and could be
* a boundary expression, more thorough check is in
* infer_gapfill_boundary
*/
static bool
is_boundary_expr(Node *node, collect_boundary_context *context)
{
OpExpr *op;
Node *left, *right;

if (!IsA(node, OpExpr))
return false;

op = castNode(OpExpr, node);

if (op->args->length != 2)
return false;

left = linitial(op->args);
right = llast(op->args);

/* Var OP Var is not useful here because we are not yet at a point
* where we could evaluate them */
if (IsA(left, Var) && IsA(right, Var))
return false;

if (IsA(left, Var) && var_equal(castNode(Var, left), context->ts_var))
return true;

if (IsA(right, Var) && var_equal(castNode(Var, right), context->ts_var))
return true;

return false;
}

static bool
collect_boundary_walker(Node *node, collect_boundary_context *context)
{
Node *quals = NULL;

if (node == NULL)
return false;

if (IsA(node, FromExpr))
{
quals = castNode(FromExpr, node)->quals;
}
else if (IsA(node, JoinExpr))
{
JoinExpr *j = castNode(JoinExpr, node);

/* don't descdend into outer join */
if (IS_OUTER_JOIN(j->jointype))
return false;

quals = j->quals;
}

if (quals)
{
ListCell *lc;

foreach (lc, castNode(List, quals))
{
if (is_boundary_expr(lfirst(lc), context))
context->quals = lappend(context->quals, lfirst(lc));
}
}

return expression_tree_walker(node, collect_boundary_walker, context);
}

/*
* traverse jointree to look for expressions referencing
* the time column of our gapfill call
*/
static List *
collect_boundary_expressions(Node *node, Var *ts_var)
{
collect_boundary_context context = { .quals = NIL, .ts_var = ts_var };

collect_boundary_walker(node, &context);

return context.quals;
}

static int64
infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
{
Expand All @@ -385,6 +478,7 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
TypeCacheEntry *tce = lookup_type_cache(state->gapfill_typid, TYPECACHE_BTREE_OPFAMILY);
int strategy;
Oid lefttype, righttype;
List *quals;

int64 boundary_value = 0;
bool boundary_found = false;
Expand All @@ -403,18 +497,17 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)

ts_var = castNode(Var, lsecond(func->args));

foreach (lc, (List *) jt->quals)
quals = collect_boundary_expressions((Node *) jt, ts_var);

foreach (lc, quals)
{
OpExpr *opexpr;
Var *var;
Expr *expr;
Oid op;
int64 value;

if (!IsA(lfirst(lc), OpExpr))
continue;

opexpr = lfirst(lc);
opexpr = lfirst_node(OpExpr, lc);

if (IsA(linitial(opexpr->args), Var))
{
Expand All @@ -429,7 +522,11 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
op = get_commutator(opexpr->opno);
}
else
{
/* collect_boundary_expressions has filtered those out already */
Assert(false);
continue;
}

if (!op_in_opfamily(op, tce->btree_opf))
continue;
Expand Down
31 changes: 28 additions & 3 deletions tsl/test/expected/gapfill.out
Original file line number Diff line number Diff line change
Expand Up @@ -1843,14 +1843,39 @@ FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time
WHERE m1.time >=0 AND m1.time < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
-- test outer join with constraints in join condition
-- not usable as start/stop
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1 LEFT OUTER JOIN metrics_int m2 ON m1.time=m2.time AND m1.time >=0 AND m1.time < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
\set ON_ERROR_STOP 1
-- test subqueries
-- subqueries will alter the shape of the plan and top-level constraints
-- might not end up in top-level of jointree
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1
WHERE m1.time >=0 AND m1.time < 2 AND device_id IN (SELECT device_id FROM metrics_int)
GROUP BY 1;
time_bucket_gapfill
---------------------
0
1
(2 rows)

-- test inner join with constraints in join condition
-- only toplevel where clause constraints are supported atm
SELECT
time_bucket_gapfill(1,m2.time)
FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time AND m2.time >=0 AND m2.time < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
\set ON_ERROR_STOP 1
time_bucket_gapfill
---------------------
0
1
(2 rows)

-- int32 time_bucket_gapfill with no start/finish
SELECT
time_bucket_gapfill(1,t)
Expand Down
21 changes: 18 additions & 3 deletions tsl/test/sql/gapfill.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1150,15 +1150,30 @@ FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time
WHERE m1.time >=0 AND m1.time < 2
GROUP BY 1;

-- test outer join with constraints in join condition
-- not usable as start/stop
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1 LEFT OUTER JOIN metrics_int m2 ON m1.time=m2.time AND m1.time >=0 AND m1.time < 2
GROUP BY 1;

\set ON_ERROR_STOP 1

-- test subqueries
-- subqueries will alter the shape of the plan and top-level constraints
-- might not end up in top-level of jointree
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1
WHERE m1.time >=0 AND m1.time < 2 AND device_id IN (SELECT device_id FROM metrics_int)
GROUP BY 1;

-- test inner join with constraints in join condition
-- only toplevel where clause constraints are supported atm
SELECT
time_bucket_gapfill(1,m2.time)
FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time AND m2.time >=0 AND m2.time < 2
GROUP BY 1;

\set ON_ERROR_STOP 1

-- int32 time_bucket_gapfill with no start/finish
SELECT
time_bucket_gapfill(1,t)
Expand Down

0 comments on commit c850fc6

Please sign in to comment.