From d5f03ddbc3b3e1aaeb3b3dee200268dfd6303fec Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Sat, 27 Jul 2024 12:10:24 +0200
Subject: [PATCH 01/12] Move chunkwise aggregation into TSL part

Its purpose is to enable vectorized aggregation, and it has to interact
with DecompressChunk to build proper plans, both of which are TSL-only.
---
 src/gapfill.h                      |   4 +-
 src/import/planner.h               |   4 +-
 src/planner/partialize.c           | 725 +---------------------------
 src/planner/partialize.h           |   3 -
 src/planner/planner.c              |  20 +-
 test/expected/append-15.out        | 124 ++---
 test/expected/parallel-15.out      | 127 +++--
 test/expected/partitionwise-15.out | 126 ++---
 tsl/src/CMakeLists.txt             |   1 +
 tsl/src/chunkwise_agg.c            | 734 +++++++++++++++++++++++++++++
 tsl/src/planner.c                  |   8 +
 tsl/src/planner.h                  |   3 +
 12 files changed, 901 insertions(+), 978 deletions(-)
 create mode 100644 tsl/src/chunkwise_agg.c

diff --git a/src/gapfill.h b/src/gapfill.h
index 7bcf3e45fa2..96568e8c943 100644
--- a/src/gapfill.h
+++ b/src/gapfill.h
@@ -5,6 +5,8 @@
  */
 #pragma once
 
+#include "export.h"
+
 #define GAPFILL_PATH_NAME "GapFill"
 
-extern bool ts_is_gapfill_path(Path *path);
+extern TSDLLEXPORT bool ts_is_gapfill_path(Path *path);
diff --git a/src/import/planner.h b/src/import/planner.h
index f580cc15edc..cc8b9b0feee 100644
--- a/src/import/planner.h
+++ b/src/import/planner.h
@@ -26,8 +26,8 @@
 extern TSDLLEXPORT void ts_make_inh_translation_list(Relation oldrelation, Relation newrelation,
 													 Index newvarno, List **translated_vars);
 
-extern struct PathTarget *ts_make_partial_grouping_target(struct PlannerInfo *root,
-														  PathTarget *grouping_target);
+extern TSDLLEXPORT struct PathTarget *ts_make_partial_grouping_target(struct PlannerInfo *root,
+																	  PathTarget *grouping_target);
 
 extern bool ts_get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
 								  Datum *min, Datum *max);
diff --git a/src/planner/partialize.c b/src/planner/partialize.c
index ea13257d904..8eeaa61d345 100644
--- a/src/planner/partialize.c
+++ b/src/planner/partialize.c
@@ -4,20 +4,15 @@
  * LICENSE-APACHE for a copy of the license.
  */
 #include <postgres.h>
+
 #include <catalog/pg_type.h>
 #include <nodes/nodeFuncs.h>
 #include <nodes/nodes.h>
 #include <nodes/pathnodes.h>
 #include <nodes/pg_list.h>
-#include <optimizer/appendinfo.h>
 #include <optimizer/clauses.h>
-#include <optimizer/cost.h>
 #include <optimizer/optimizer.h>
-#include <optimizer/pathnode.h>
-#include <optimizer/paths.h>
 #include <optimizer/planner.h>
-#include <optimizer/prep.h>
-#include <optimizer/tlist.h>
 #include <parser/parse_func.h>
 #include <utils/lsyscache.h>
 
@@ -25,12 +20,8 @@
 #include "debug_assert.h"
 #include "estimate.h"
 #include "extension_constants.h"
-#include "gapfill.h"
-#include "import/planner.h"
-#include "nodes/chunk_append/chunk_append.h"
 #include "nodes/print.h"
 #include "partialize.h"
-#include "planner.h"
 #include "utils.h"
 
 #define TS_PARTIALFN "partialize_agg"
@@ -198,720 +189,6 @@ partialize_agg_paths(RelOptInfo *rel)
 	return has_combine;
 }
 
-/* Helper function to find the first node of the provided type in the pathlist of the relation */
-static Node *
-find_node(const RelOptInfo *relation, NodeTag type)
-{
-	ListCell *lc;
-	foreach (lc, relation->pathlist)
-	{
-		Node *node = lfirst(lc);
-		if (nodeTag(node) == type)
-			return node;
-	}
-
-	return NULL;
-}
-
-/* Check if the relation already has a min/max path */
-static bool
-has_min_max_agg_path(const RelOptInfo *relation)
-{
-	return find_node(relation, T_MinMaxAggPath) != NULL;
-}
-
-/*
- * Get an an existing aggregation path for the given relation or NULL if no aggregation path exists.
- */
-static AggPath *
-get_existing_agg_path(const RelOptInfo *relation)
-{
-	Node *node = find_node(relation, T_AggPath);
-	return node ? castNode(AggPath, node) : NULL;
-}
-
-/*
- * Get all subpaths from a Append, MergeAppend, or ChunkAppend path
- */
-static List *
-get_subpaths_from_append_path(Path *path, bool handle_gather_path)
-{
-	if (IsA(path, AppendPath))
-	{
-		AppendPath *append_path = castNode(AppendPath, path);
-		return append_path->subpaths;
-	}
-	else if (IsA(path, MergeAppendPath))
-	{
-		MergeAppendPath *merge_append_path = castNode(MergeAppendPath, path);
-		return merge_append_path->subpaths;
-	}
-	else if (ts_is_chunk_append_path(path))
-	{
-		CustomPath *custom_path = castNode(CustomPath, path);
-		return custom_path->custom_paths;
-	}
-	else if (handle_gather_path && IsA(path, GatherPath))
-	{
-		return get_subpaths_from_append_path(castNode(GatherPath, path)->subpath, false);
-	}
-
-	/* Aggregation push-down is not supported for other path types so far */
-	return NIL;
-}
-
-/*
- * Copy an AppendPath and set new subpaths.
- */
-static AppendPath *
-copy_append_path(AppendPath *path, List *subpaths, PathTarget *pathtarget)
-{
-	AppendPath *newPath = makeNode(AppendPath);
-	memcpy(newPath, path, sizeof(AppendPath));
-	newPath->subpaths = subpaths;
-	newPath->path.pathtarget = copy_pathtarget(pathtarget);
-
-	cost_append(newPath);
-
-	return newPath;
-}
-
-/*
- * Copy a MergeAppendPath and set new subpaths.
- */
-static MergeAppendPath *
-copy_merge_append_path(PlannerInfo *root, MergeAppendPath *path, List *subpaths,
-					   PathTarget *pathtarget)
-{
-	MergeAppendPath *newPath =
-		create_merge_append_path(root, path->path.parent, subpaths, path->path.pathkeys, NULL);
-
-	newPath->path.param_info = path->path.param_info;
-	newPath->path.pathtarget = copy_pathtarget(pathtarget);
-
-	return newPath;
-}
-
-/*
- * Copy an append-like path and set new subpaths
- */
-static Path *
-copy_append_like_path(PlannerInfo *root, Path *path, List *new_subpaths, PathTarget *pathtarget)
-{
-	if (IsA(path, AppendPath))
-	{
-		AppendPath *append_path = castNode(AppendPath, path);
-		AppendPath *new_append_path = copy_append_path(append_path, new_subpaths, pathtarget);
-		return &new_append_path->path;
-	}
-	else if (IsA(path, MergeAppendPath))
-	{
-		MergeAppendPath *merge_append_path = castNode(MergeAppendPath, path);
-		MergeAppendPath *new_merge_append_path =
-			copy_merge_append_path(root, merge_append_path, new_subpaths, pathtarget);
-		return &new_merge_append_path->path;
-	}
-	else if (ts_is_chunk_append_path(path))
-	{
-		CustomPath *custom_path = castNode(CustomPath, path);
-		ChunkAppendPath *chunk_append_path = (ChunkAppendPath *) custom_path;
-		ChunkAppendPath *new_chunk_append_path =
-			ts_chunk_append_path_copy(chunk_append_path, new_subpaths, pathtarget);
-		return &new_chunk_append_path->cpath.path;
-	}
-
-	/* Should never happen, already checked by caller */
-	Ensure(false, "unknown path type");
-	pg_unreachable();
-}
-
-/*
- * Generate a partially sorted aggregated agg path on top of a path
- */
-static AggPath *
-create_sorted_partial_agg_path(PlannerInfo *root, Path *path, PathTarget *target,
-							   double d_num_groups, GroupPathExtraData *extra_data)
-{
-	Query *parse = root->parse;
-
-	/* Determine costs for aggregations */
-	AggClauseCosts *agg_partial_costs = &extra_data->agg_partial_costs;
-
-	bool is_sorted = pathkeys_contained_in(root->group_pathkeys, path->pathkeys);
-
-	if (!is_sorted)
-	{
-		path = (Path *) create_sort_path(root, path->parent, path, root->group_pathkeys, -1.0);
-	}
-
-	AggPath *sorted_agg_path = create_agg_path(root,
-											   path->parent,
-											   path,
-											   target,
-											   parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											   AGGSPLIT_INITIAL_SERIAL,
-#if PG16_LT
-											   parse->groupClause,
-#else
-											   root->processed_groupClause,
-#endif
-											   NIL,
-											   agg_partial_costs,
-											   d_num_groups);
-
-	return sorted_agg_path;
-}
-
-/*
- * Generate a partially hashed aggregated add path on top of a path
- */
-static AggPath *
-create_hashed_partial_agg_path(PlannerInfo *root, Path *path, PathTarget *target,
-							   double d_num_groups, GroupPathExtraData *extra_data)
-{
-	/* Determine costs for aggregations */
-	AggClauseCosts *agg_partial_costs = &extra_data->agg_partial_costs;
-
-	AggPath *hash_path = create_agg_path(root,
-										 path->parent,
-										 path,
-										 target,
-										 AGG_HASHED,
-										 AGGSPLIT_INITIAL_SERIAL,
-#if PG16_LT
-										 root->parse->groupClause,
-#else
-										 root->processed_groupClause,
-#endif
-										 NIL,
-										 agg_partial_costs,
-										 d_num_groups);
-	return hash_path;
-}
-
-/*
- * Add partially aggregated subpath
- */
-static void
-add_partially_aggregated_subpaths(PlannerInfo *root, Path *parent_path,
-								  PathTarget *partial_grouping_target, double d_num_groups,
-								  GroupPathExtraData *extra_data, bool can_sort, bool can_hash,
-								  Path *subpath, List **sorted_paths, List **hashed_paths)
-{
-	/* Translate targetlist for partition */
-	AppendRelInfo *appinfo = ts_get_appendrelinfo(root, subpath->parent->relid, false);
-	PathTarget *chunktarget = copy_pathtarget(partial_grouping_target);
-	chunktarget->exprs =
-		castNode(List, adjust_appendrel_attrs(root, (Node *) chunktarget->exprs, 1, &appinfo));
-
-	/* In declarative partitioning planning, this is done by appy_scanjoin_target_to_path */
-	Assert(list_length(subpath->pathtarget->exprs) == list_length(parent_path->pathtarget->exprs));
-	subpath->pathtarget->sortgrouprefs = parent_path->pathtarget->sortgrouprefs;
-
-	if (can_sort)
-	{
-		AggPath *agg_path =
-			create_sorted_partial_agg_path(root, subpath, chunktarget, d_num_groups, extra_data);
-
-		*sorted_paths = lappend(*sorted_paths, (Path *) agg_path);
-	}
-
-	if (can_hash)
-	{
-		AggPath *agg_path =
-			create_hashed_partial_agg_path(root, subpath, chunktarget, d_num_groups, extra_data);
-
-		*hashed_paths = lappend(*hashed_paths, (Path *) agg_path);
-	}
-}
-
-/*
- * Generate a total aggregation path for partial aggregations.
- *
- * The generated paths contain partial aggregations (created by using AGGSPLIT_INITIAL_SERIAL).
- * These aggregations need to be finished by the caller by adding a node that performs the
- * AGGSPLIT_FINAL_DESERIAL step.
- */
-static void
-generate_agg_pushdown_path(PlannerInfo *root, Path *cheapest_total_path, RelOptInfo *output_rel,
-						   RelOptInfo *partially_grouped_rel, PathTarget *grouping_target,
-						   PathTarget *partial_grouping_target, bool can_sort, bool can_hash,
-						   double d_num_groups, GroupPathExtraData *extra_data)
-{
-	/* Get subpaths */
-	List *subpaths = get_subpaths_from_append_path(cheapest_total_path, false);
-
-	/* No subpaths available or unsupported append node */
-	if (subpaths == NIL)
-		return;
-
-	if (list_length(subpaths) < 2)
-	{
-		/*
-		 * Doesn't make sense to add per-chunk aggregation paths if there's
-		 * only one chunk.
-		 */
-		return;
-	}
-
-	/* Generate agg paths on top of the append children */
-	List *sorted_subpaths = NIL;
-	List *hashed_subpaths = NIL;
-
-	ListCell *lc;
-	foreach (lc, subpaths)
-	{
-		Path *subpath = lfirst(lc);
-
-		/* Check if we have an append path under an append path (e.g., a partially compressed
-		 * chunk. The first append path merges the chunk results. The second append path merges the
-		 * uncompressed and the compressed part of the chunk).
-		 *
-		 * In this case, the partial aggregation needs to be pushed down below the lower
-		 * append path.
-		 */
-		List *subsubpaths = get_subpaths_from_append_path(subpath, false);
-
-		if (subsubpaths != NIL)
-		{
-			List *sorted_subsubpaths = NIL;
-			List *hashed_subsubpaths = NIL;
-
-			ListCell *lc2;
-			foreach (lc2, subsubpaths)
-			{
-				Path *subsubpath = lfirst(lc2);
-
-				add_partially_aggregated_subpaths(root,
-												  cheapest_total_path,
-												  partial_grouping_target,
-												  d_num_groups,
-												  extra_data,
-												  can_sort,
-												  can_hash,
-												  subsubpath,
-												  &sorted_subsubpaths /* Result path */,
-												  &hashed_subsubpaths /* Result path */);
-			}
-
-			if (can_sort)
-			{
-				sorted_subpaths = lappend(sorted_subpaths,
-										  copy_append_like_path(root,
-																subpath,
-																sorted_subsubpaths,
-																subpath->pathtarget));
-			}
-
-			if (can_hash)
-			{
-				hashed_subpaths = lappend(hashed_subpaths,
-										  copy_append_like_path(root,
-																subpath,
-																hashed_subsubpaths,
-																subpath->pathtarget));
-			}
-		}
-		else
-		{
-			add_partially_aggregated_subpaths(root,
-											  cheapest_total_path,
-											  partial_grouping_target,
-											  d_num_groups,
-											  extra_data,
-											  can_sort,
-											  can_hash,
-											  subpath,
-											  &sorted_subpaths /* Result paths */,
-											  &hashed_subpaths /* Result paths */);
-		}
-	}
-
-	/* Create new append paths */
-	if (sorted_subpaths != NIL)
-	{
-		add_path(partially_grouped_rel,
-				 copy_append_like_path(root,
-									   cheapest_total_path,
-									   sorted_subpaths,
-									   partial_grouping_target));
-	}
-
-	if (hashed_subpaths != NIL)
-	{
-		add_path(partially_grouped_rel,
-				 copy_append_like_path(root,
-									   cheapest_total_path,
-									   hashed_subpaths,
-									   partial_grouping_target));
-	}
-}
-
-/*
- * Generate a partial aggregation path for chunk-wise partial aggregations.
-
- * This function does almost the same as generate_agg_pushdown_path(). In contrast, it processes a
- * partial_path (paths that are usually used in parallel plans) of the input relation, pushes down
- * the aggregation in this path and adds a gather node on top of the partial plan. Therefore, the
- * push-down of the partial aggregates also works in parallel plans.
- *
- * Note: The PostgreSQL terminology can cause some confusion here. Partial paths are usually used by
- * PostgreSQL to distribute work between parallel workers. This has nothing to do with the partial
- * aggregation we are creating in the function.
- */
-static void
-generate_partial_agg_pushdown_path(PlannerInfo *root, Path *cheapest_partial_path,
-								   RelOptInfo *output_rel, RelOptInfo *partially_grouped_rel,
-								   PathTarget *grouping_target, PathTarget *partial_grouping_target,
-								   bool can_sort, bool can_hash, double d_num_groups,
-								   GroupPathExtraData *extra_data)
-{
-	/* Get subpaths */
-	List *subpaths = get_subpaths_from_append_path(cheapest_partial_path, false);
-
-	/* No subpaths available or unsupported append node */
-	if (subpaths == NIL)
-		return;
-
-	if (list_length(subpaths) < 2)
-	{
-		/*
-		 * Doesn't make sense to add per-chunk aggregation paths if there's
-		 * only one chunk.
-		 */
-		return;
-	}
-	/* Generate agg paths on top of the append children */
-	ListCell *lc;
-	List *sorted_subpaths = NIL;
-	List *hashed_subpaths = NIL;
-
-	foreach (lc, subpaths)
-	{
-		Path *subpath = lfirst(lc);
-
-		Assert(subpath->parallel_safe);
-
-		/* There should be no nested append paths in the partial paths to construct the upper
-		 * relation */
-		Assert(get_subpaths_from_append_path(subpath, false) == NIL);
-
-		add_partially_aggregated_subpaths(root,
-										  cheapest_partial_path,
-										  partial_grouping_target,
-										  d_num_groups,
-										  extra_data,
-										  can_sort,
-										  can_hash,
-										  subpath,
-										  &sorted_subpaths /* Result paths */,
-										  &hashed_subpaths /* Result paths */);
-	}
-
-	/* Create new append paths */
-	if (sorted_subpaths != NIL)
-	{
-		add_partial_path(partially_grouped_rel,
-						 copy_append_like_path(root,
-											   cheapest_partial_path,
-											   sorted_subpaths,
-											   partial_grouping_target));
-	}
-
-	if (hashed_subpaths != NIL)
-	{
-		add_partial_path(partially_grouped_rel,
-						 copy_append_like_path(root,
-											   cheapest_partial_path,
-											   hashed_subpaths,
-											   partial_grouping_target));
-	}
-
-	/* Finish the partial paths (just added by add_partial_path to partially_grouped_rel in this
-	 * function) by adding a gather node and add this path to the partially_grouped_rel using
-	 * add_path). */
-	foreach (lc, partially_grouped_rel->partial_pathlist)
-	{
-		Path *append_path = lfirst(lc);
-		double total_groups = append_path->rows * append_path->parallel_workers;
-
-		Path *gather_path = (Path *) create_gather_path(root,
-														partially_grouped_rel,
-														append_path,
-														partially_grouped_rel->reltarget,
-														NULL,
-														&total_groups);
-		add_path(partially_grouped_rel, (Path *) gather_path);
-	}
-}
-
-/*
- * Get the best total path for aggregation. Prefer chunk append paths if we have one, otherwise
- * return the cheapest_total_path;
- */
-static Path *
-get_best_total_path(RelOptInfo *output_rel)
-{
-	ListCell *lc;
-	foreach (lc, output_rel->pathlist)
-	{
-		Path *path = lfirst(lc);
-
-		if (ts_is_chunk_append_path(path))
-			return path;
-	}
-
-	return output_rel->cheapest_total_path;
-}
-
-/*
- Is the provided path a agg path that uses a sorted or plain agg strategy?
-*/
-static bool pg_nodiscard
-is_path_sorted_or_plain_agg_path(Path *path)
-{
-	AggPath *agg_path = castNode(AggPath, path);
-	Assert(agg_path->aggstrategy == AGG_SORTED || agg_path->aggstrategy == AGG_PLAIN ||
-		   agg_path->aggstrategy == AGG_HASHED);
-	return agg_path->aggstrategy == AGG_SORTED || agg_path->aggstrategy == AGG_PLAIN;
-}
-
-/*
- * Check if this path belongs to a plain or sorted aggregation
- */
-static bool
-contains_path_plain_or_sorted_agg(Path *path)
-{
-	List *subpaths = get_subpaths_from_append_path(path, true);
-
-	Ensure(subpaths != NIL, "Unable to determine aggregation type");
-
-	ListCell *lc;
-	foreach (lc, subpaths)
-	{
-		Path *subpath = lfirst(lc);
-
-		if (IsA(subpath, AggPath))
-			return is_path_sorted_or_plain_agg_path(subpath);
-	}
-
-	/*
-	 * No dedicated aggregation nodes found directly underneath the append node. This could be
-	 * due to two reasons.
-	 *
-	 * (1) Only vectorized aggregation is used and we don't have dedicated Aggregation nods.
-	 * (2) The query plan uses multi-level appends to keep a certain sorting
-	 *     - ChunkAppend
-	 *          - Merge Append
-	 *             - Agg Chunk 1
-	 *             - Agg Chunk 2
-	 *          - Merge Append
-	 *             - Agg Chunk 3
-	 *             - Agg Chunk 4
-	 *
-	 * in both cases, we use a sorted aggregation node to finalize the partial aggregation and
-	 * produce a proper sorting.
-	 */
-	return true;
-}
-
-/*
- * Replan the aggregation and create a partial aggregation at chunk level and finalize the
- * aggregation on top of an append node.
- *
- * The functionality is inspired by PostgreSQL's create_partitionwise_grouping_paths() function
- *
- * Generated aggregation paths:
- *
- * Finalize Aggregate
- *   -> Append
- *      -> Partial Aggregation
- *        - Chunk 1
- *      ...
- *      -> Append of partially compressed chunk 2
- *         -> Partial Aggregation
- *             -> Scan on uncompressed part of chunk 2
- *         -> Partial Aggregation
- *             -> Scan on compressed part of chunk 2
- *      ...
- *      -> Partial Aggregation N
- *        - Chunk N
- */
-void
-ts_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht, RelOptInfo *input_rel,
-						RelOptInfo *output_rel, void *extra)
-{
-	Query *parse = root->parse;
-
-	/* We are only interested in hypertables */
-	if (!ht)
-		return;
-
-	/* Perform partial aggregation planning only if there is an aggregation is requested */
-	if (!parse->hasAggs)
-		return;
-
-	/* Grouping sets are not supported by the partial aggregation pushdown */
-	if (parse->groupingSets)
-		return;
-
-	/* Don't replan aggregation if we already have a MinMaxAggPath (e.g., created by
-	 * ts_preprocess_first_last_aggregates) */
-	if (has_min_max_agg_path(output_rel))
-		return;
-
-	/* Is sorting possible ? */
-	bool can_sort = grouping_is_sortable(parse->groupClause) && ts_guc_enable_chunkwise_aggregation;
-
-	/* Is hashing possible ? */
-	bool can_hash = grouping_is_hashable(parse->groupClause) &&
-					!ts_is_gapfill_path(linitial(output_rel->pathlist)) && enable_hashagg;
-
-	Assert(extra != NULL);
-	GroupPathExtraData *extra_data = (GroupPathExtraData *) extra;
-
-	/* Determine the number of groups from the already planned aggregation */
-	AggPath *existing_agg_path = get_existing_agg_path(output_rel);
-	if (existing_agg_path == NULL)
-		return;
-
-	/* Skip partial aggregations already created by _timescaledb_functions.partialize_agg */
-	if (existing_agg_path->aggsplit == AGGSPLIT_INITIAL_SERIAL)
-		return;
-
-	/* Don't replan aggregation if it contains already partials or non-serializable aggregates */
-	if (root->hasNonPartialAggs || root->hasNonSerialAggs)
-		return;
-
-	double d_num_groups = existing_agg_path->numGroups;
-	Assert(d_num_groups > 0);
-
-	/* Construct partial group agg upper relation */
-	RelOptInfo *partially_grouped_rel =
-		fetch_upper_rel(root, UPPERREL_PARTIAL_GROUP_AGG, input_rel->relids);
-	partially_grouped_rel->consider_parallel = input_rel->consider_parallel;
-	partially_grouped_rel->reloptkind = input_rel->reloptkind;
-	partially_grouped_rel->serverid = input_rel->serverid;
-	partially_grouped_rel->userid = input_rel->userid;
-	partially_grouped_rel->useridiscurrent = input_rel->useridiscurrent;
-	partially_grouped_rel->fdwroutine = input_rel->fdwroutine;
-
-	/* Build target list for partial aggregate paths */
-	PathTarget *grouping_target = output_rel->reltarget;
-	PathTarget *partial_grouping_target = ts_make_partial_grouping_target(root, grouping_target);
-	partially_grouped_rel->reltarget = partial_grouping_target;
-
-	/* Calculate aggregation costs */
-	if (!extra_data->partial_costs_set)
-	{
-		/* Init costs */
-		MemSet(&extra_data->agg_partial_costs, 0, sizeof(AggClauseCosts));
-		MemSet(&extra_data->agg_final_costs, 0, sizeof(AggClauseCosts));
-
-		/* partial phase */
-		get_agg_clause_costs(root, AGGSPLIT_INITIAL_SERIAL, &extra_data->agg_partial_costs);
-
-		/* final phase */
-		get_agg_clause_costs(root, AGGSPLIT_FINAL_DESERIAL, &extra_data->agg_final_costs);
-
-		extra_data->partial_costs_set = true;
-	}
-
-	/* Generate the aggregation pushdown path */
-	Path *cheapest_total_path = get_best_total_path(input_rel);
-	Assert(cheapest_total_path != NULL);
-	generate_agg_pushdown_path(root,
-							   cheapest_total_path,
-							   output_rel,
-							   partially_grouped_rel,
-							   grouping_target,
-							   partial_grouping_target,
-							   can_sort,
-							   can_hash,
-							   d_num_groups,
-							   extra_data);
-
-	/* The same as above but for partial paths */
-	if (input_rel->partial_pathlist != NIL && input_rel->consider_parallel)
-	{
-		Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
-		generate_partial_agg_pushdown_path(root,
-										   cheapest_partial_path,
-										   output_rel,
-										   partially_grouped_rel,
-										   grouping_target,
-										   partial_grouping_target,
-										   can_sort,
-										   can_hash,
-										   d_num_groups,
-										   extra_data);
-	}
-
-	/* Replan aggregation if we were able to generate partially grouped rel paths */
-	if (partially_grouped_rel->pathlist == NIL)
-		return;
-
-	/* Prefer our paths */
-	output_rel->pathlist = NIL;
-	output_rel->partial_pathlist = NIL;
-
-	/* Finalize the created partially aggregated paths by adding a 'Finalize Aggregate' node on top
-	 * of them. */
-	AggClauseCosts *agg_final_costs = &extra_data->agg_final_costs;
-	ListCell *lc;
-	foreach (lc, partially_grouped_rel->pathlist)
-	{
-		Path *append_path = lfirst(lc);
-
-		if (contains_path_plain_or_sorted_agg(append_path))
-		{
-			bool is_sorted;
-
-			is_sorted = pathkeys_contained_in(root->group_pathkeys, append_path->pathkeys);
-
-			if (!is_sorted)
-			{
-				append_path = (Path *)
-					create_sort_path(root, output_rel, append_path, root->group_pathkeys, -1.0);
-			}
-
-			add_path(output_rel,
-					 (Path *) create_agg_path(root,
-											  output_rel,
-											  append_path,
-											  grouping_target,
-											  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
-											  AGGSPLIT_FINAL_DESERIAL,
-#if PG16_LT
-											  parse->groupClause,
-#else
-											  root->processed_groupClause,
-#endif
-											  (List *) parse->havingQual,
-											  agg_final_costs,
-											  d_num_groups));
-		}
-		else
-		{
-			add_path(output_rel,
-					 (Path *) create_agg_path(root,
-											  output_rel,
-											  append_path,
-											  grouping_target,
-											  AGG_HASHED,
-											  AGGSPLIT_FINAL_DESERIAL,
-#if PG16_LT
-											  parse->groupClause,
-#else
-											  root->processed_groupClause,
-#endif
-											  (List *) parse->havingQual,
-											  agg_final_costs,
-											  d_num_groups));
-		}
-	}
-}
-
 /*
  * Turn an aggregate relation into a partial aggregate relation if aggregates
  * are enclosed by the partialize_agg function.
diff --git a/src/planner/partialize.h b/src/planner/partialize.h
index aa5aca9d4d9..8cb4aaed228 100644
--- a/src/planner/partialize.h
+++ b/src/planner/partialize.h
@@ -10,6 +10,3 @@
 #include <optimizer/planner.h>
 
 #include "chunk.h"
-
-void ts_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht, RelOptInfo *input_rel,
-							 RelOptInfo *output_rel, void *extra);
diff --git a/src/planner/planner.c b/src/planner/planner.c
index bd215199286..13171446c5e 100644
--- a/src/planner/planner.c
+++ b/src/planner/planner.c
@@ -1583,10 +1583,6 @@ timescaledb_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage,
 	if (input_rel != NULL)
 		reltype = ts_classify_relation(root, input_rel, &ht);
 
-	if (ts_cm_functions->create_upper_paths_hook != NULL)
-		ts_cm_functions
-			->create_upper_paths_hook(root, stage, input_rel, output_rel, reltype, ht, extra);
-
 	if (output_rel != NULL)
 	{
 		/* Modify for INSERTs on a hypertable */
@@ -1603,23 +1599,19 @@ timescaledb_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage,
 		}
 	}
 
-	if (!ts_guc_enable_optimizations || input_rel == NULL || IS_DUMMY_REL(input_rel))
-		return;
-
-	if (!involves_hypertable(root, input_rel))
-		return;
-
-	if (stage == UPPERREL_GROUP_AGG && output_rel != NULL)
+	if (stage == UPPERREL_GROUP_AGG && output_rel != NULL && ts_guc_enable_optimizations &&
+		input_rel != NULL && !IS_DUMMY_REL(input_rel) && involves_hypertable(root, input_rel))
 	{
 		if (parse->hasAggs)
 			ts_preprocess_first_last_aggregates(root, root->processed_tlist);
 
-		if (ts_guc_enable_chunkwise_aggregation)
-			ts_pushdown_partial_agg(root, ht, input_rel, output_rel, extra);
-
 		if (!partials_found)
 			ts_plan_add_hashagg(root, input_rel, output_rel);
 	}
+
+	if (ts_cm_functions->create_upper_paths_hook != NULL)
+		ts_cm_functions
+			->create_upper_paths_hook(root, stage, input_rel, output_rel, reltype, ht, extra);
 }
 
 static bool
diff --git a/test/expected/append-15.out b/test/expected/append-15.out
index fbf1f113b4a..55f5ea6c74b 100644
--- a/test/expected/append-15.out
+++ b/test/expected/append-15.out
@@ -2039,95 +2039,67 @@ ORDER BY time DESC, device_id;
 
 -- aggregates should prevent pushdown
 :PREFIX SELECT count(*) FROM metrics_timestamptz LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
-         ->  Append (actual rows=5 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
-(13 rows)
+   ->  Aggregate (actual rows=1 loops=1)
+         ->  Append (actual rows=26787 loops=1)
+               ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
+(8 rows)
 
 :PREFIX SELECT count(*) FROM metrics_space LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
-         ->  Append (actual rows=9 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
-(21 rows)
+   ->  Aggregate (actual rows=1 loops=1)
+         ->  Append (actual rows=37450 loops=1)
+               ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
+               ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
+(12 rows)
 
 -- HAVING should prevent pushdown
 :PREFIX SELECT 1 FROM metrics_timestamptz HAVING count(*) > 1 LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
+   ->  Aggregate (actual rows=1 loops=1)
          Filter: (count(*) > 1)
-         ->  Append (actual rows=5 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
-(14 rows)
+         ->  Append (actual rows=26787 loops=1)
+               ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
+(9 rows)
 
 :PREFIX SELECT 1 FROM metrics_space HAVING count(*) > 1 LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
+   ->  Aggregate (actual rows=1 loops=1)
          Filter: (count(*) > 1)
-         ->  Append (actual rows=9 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
-(22 rows)
+         ->  Append (actual rows=37450 loops=1)
+               ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
+               ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
+(13 rows)
 
 -- DISTINCT should prevent pushdown
 SET enable_hashagg TO false;
diff --git a/test/expected/parallel-15.out b/test/expected/parallel-15.out
index 177ed0cb02c..250282199c4 100644
--- a/test/expected/parallel-15.out
+++ b/test/expected/parallel-15.out
@@ -35,12 +35,11 @@ EXPLAIN (costs off) SELECT first(i, j) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT first(i, j) FROM "test";
  first 
@@ -54,12 +53,11 @@ EXPLAIN (costs off) SELECT last(i, j) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT last(i, j) FROM "test";
   last  
@@ -139,12 +137,11 @@ EXPLAIN (costs off) SELECT histogram(i, 1, 1000000, 2) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 1, 1000000, 2) FROM "test";
      histogram     
@@ -158,12 +155,11 @@ EXPLAIN (costs off) SELECT histogram(i, 1,1000001,10) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 1, 1000001, 10) FROM "test";
                             histogram                             
@@ -177,12 +173,11 @@ EXPLAIN (costs off) SELECT histogram(i, 0,100000,5) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 0, 100000, 5) FROM "test";
              histogram              
@@ -196,12 +191,11 @@ EXPLAIN (costs off) SELECT histogram(i, 10,100000,5) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 10, 100000, 5) FROM "test";
              histogram              
@@ -215,14 +209,13 @@ EXPLAIN (costs off) SELECT histogram(NULL, 10,100000,5) FROM "test" WHERE  i = c
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
                            Filter: ((i)::double precision = '-1'::double precision)
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
                            Filter: ((i)::double precision = '-1'::double precision)
-(10 rows)
+(9 rows)
 
 SELECT histogram(NULL, 10,100000,5) FROM "test" WHERE  i = coalesce(-1,j);
  histogram 
@@ -255,21 +248,20 @@ SELECT histogram(NULL, 10,100000,5) FROM "test" WHERE  i = coalesce(-1,j);
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
                                        Filter: (i > 1)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
                                        Filter: (i > 1)
-(17 rows)
+(16 rows)
 
 SELECT count(*) FROM "test" WHERE i > 1 AND length(version()) > 0;
  count 
@@ -554,21 +546,20 @@ SET max_parallel_workers_per_gather TO 2;
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Index Only Scan using _hyper_1_1_chunk_test_i_idx on _hyper_1_1_chunk
                                        Index Cond: (i >= 400000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
                                        Filter: (i >= 400000)
-(17 rows)
+(16 rows)
 
 SELECT count(*) FROM "test" WHERE i >= 400000 AND length(version()) > 0;
  count 
@@ -584,21 +575,20 @@ SELECT count(*) FROM "test" WHERE i >= 400000 AND length(version()) > 0;
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Index Only Scan using _hyper_1_2_chunk_test_i_idx on _hyper_1_2_chunk
                                        Index Cond: (i < 600000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
                                        Filter: (i < 600000)
-(17 rows)
+(16 rows)
 
 SELECT count(*) FROM "test" WHERE i < 600000 AND length(version()) > 0;
  count 
@@ -614,19 +604,18 @@ SET max_parallel_workers_per_gather TO 1;
  Finalize Aggregate
    ->  Gather
          Workers Planned: 1
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
-(15 rows)
+(14 rows)
 
 SELECT count(*) FROM "test" WHERE length(version()) > 0;
  count  
@@ -670,21 +659,20 @@ ALTER TABLE :CHUNK2 SET (parallel_workers=2);
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Index Only Scan using _hyper_1_1_chunk_test_i_idx on _hyper_1_1_chunk
                                        Index Cond: (i > 400000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
                                        Filter: (i > 400000)
-(17 rows)
+(16 rows)
 
 ALTER TABLE :CHUNK1 SET (parallel_workers=2);
 ALTER TABLE :CHUNK2 SET (parallel_workers=0);
@@ -694,21 +682,20 @@ ALTER TABLE :CHUNK2 SET (parallel_workers=0);
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Index Only Scan using _hyper_1_2_chunk_test_i_idx on _hyper_1_2_chunk
                                        Index Cond: (i < 600000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
                                        Filter: (i < 600000)
-(17 rows)
+(16 rows)
 
 ALTER TABLE :CHUNK1 RESET (parallel_workers);
 ALTER TABLE :CHUNK2 RESET (parallel_workers);
diff --git a/test/expected/partitionwise-15.out b/test/expected/partitionwise-15.out
index ea178ed5813..7baadd6c26b 100644
--- a/test/expected/partitionwise-15.out
+++ b/test/expected/partitionwise-15.out
@@ -339,36 +339,24 @@ SELECT device, avg(temp)
 FROM hyper
 GROUP BY 1
 ORDER BY 1;
-                                       QUERY PLAN                                        
------------------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Sort
    Output: _hyper_1_1_chunk.device, (avg(_hyper_1_1_chunk.temp))
    Sort Key: _hyper_1_1_chunk.device
-   ->  Finalize HashAggregate
+   ->  HashAggregate
          Output: _hyper_1_1_chunk.device, avg(_hyper_1_1_chunk.temp)
          Group Key: _hyper_1_1_chunk.device
          ->  Append
-               ->  Partial HashAggregate
-                     Output: _hyper_1_1_chunk.device, PARTIAL avg(_hyper_1_1_chunk.temp)
-                     Group Key: _hyper_1_1_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_1_chunk
-                           Output: _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
-               ->  Partial HashAggregate
-                     Output: _hyper_1_2_chunk.device, PARTIAL avg(_hyper_1_2_chunk.temp)
-                     Group Key: _hyper_1_2_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_2_chunk
-                           Output: _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
-               ->  Partial HashAggregate
-                     Output: _hyper_1_3_chunk.device, PARTIAL avg(_hyper_1_3_chunk.temp)
-                     Group Key: _hyper_1_3_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_3_chunk
-                           Output: _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
-               ->  Partial HashAggregate
-                     Output: _hyper_1_4_chunk.device, PARTIAL avg(_hyper_1_4_chunk.temp)
-                     Group Key: _hyper_1_4_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_4_chunk
-                           Output: _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
-(27 rows)
+               ->  Seq Scan on _timescaledb_internal._hyper_1_1_chunk
+                     Output: _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_2_chunk
+                     Output: _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_3_chunk
+                     Output: _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_4_chunk
+                     Output: _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
+(15 rows)
 
 -- All partition keys covered (full partitionwise)
 SET timescaledb.enable_chunkwise_aggregation = 'off';
@@ -402,56 +390,24 @@ SELECT time, device, avg(temp)
 FROM hyper
 GROUP BY 1, 2
 ORDER BY 1, 2;
-                                                               QUERY PLAN                                                                
------------------------------------------------------------------------------------------------------------------------------------------
- Finalize GroupAggregate
-   Output: hyper."time", hyper.device, avg(hyper.temp)
-   Group Key: hyper."time", hyper.device
-   ->  Sort
-         Output: hyper."time", hyper.device, (PARTIAL avg(hyper.temp))
-         Sort Key: hyper."time", hyper.device
-         ->  Custom Scan (ChunkAppend) on public.hyper
-               Output: hyper."time", hyper.device, (PARTIAL avg(hyper.temp))
-               Order: hyper."time"
-               Startup Exclusion: false
-               Runtime Exclusion: false
-               ->  Merge Append
-                     Sort Key: _hyper_1_1_chunk."time"
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, PARTIAL avg(_hyper_1_1_chunk.temp)
-                           Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
-                                 Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_1_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_1_chunk
-                                       Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, PARTIAL avg(_hyper_1_2_chunk.temp)
-                           Group Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
-                                 Sort Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_2_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_2_chunk
-                                       Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
-               ->  Merge Append
-                     Sort Key: _hyper_1_3_chunk."time"
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, PARTIAL avg(_hyper_1_3_chunk.temp)
-                           Group Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
-                                 Sort Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_3_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_3_chunk
-                                       Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, PARTIAL avg(_hyper_1_4_chunk.temp)
-                           Group Key: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
-                                 Sort Key: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_4_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_4_chunk
-                                       Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
-(47 rows)
+                                             QUERY PLAN                                              
+-----------------------------------------------------------------------------------------------------
+ Sort
+   Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, (avg(_hyper_1_1_chunk.temp))
+   Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
+   ->  HashAggregate
+         Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, avg(_hyper_1_1_chunk.temp)
+         Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
+         ->  Append
+               ->  Seq Scan on _timescaledb_internal._hyper_1_1_chunk
+                     Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_2_chunk
+                     Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_3_chunk
+                     Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_4_chunk
+                     Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
+(15 rows)
 
 -- Partial aggregation since date_trunc(time) is not a partition key
 SET enable_partitionwise_aggregate = 'off';
@@ -800,28 +756,22 @@ FROM hyper_timepart
 GROUP BY 1, 2
 ORDER BY 1, 2
 LIMIT 10;
-                                                       QUERY PLAN                                                       
-------------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Limit
    Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, (avg(_hyper_3_7_chunk.temp))
    ->  Sort
          Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, (avg(_hyper_3_7_chunk.temp))
          Sort Key: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device
-         ->  Finalize HashAggregate
+         ->  HashAggregate
                Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, avg(_hyper_3_7_chunk.temp)
                Group Key: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device
                ->  Append
-                     ->  Partial HashAggregate
-                           Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, PARTIAL avg(_hyper_3_7_chunk.temp)
-                           Group Key: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device
-                           ->  Seq Scan on _timescaledb_internal._hyper_3_7_chunk
-                                 Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, _hyper_3_7_chunk.temp
-                     ->  Partial HashAggregate
-                           Output: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device, PARTIAL avg(_hyper_3_8_chunk.temp)
-                           Group Key: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device
-                           ->  Seq Scan on _timescaledb_internal._hyper_3_8_chunk
-                                 Output: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device, _hyper_3_8_chunk.temp
-(19 rows)
+                     ->  Seq Scan on _timescaledb_internal._hyper_3_7_chunk
+                           Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, _hyper_3_7_chunk.temp
+                     ->  Seq Scan on _timescaledb_internal._hyper_3_8_chunk
+                           Output: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device, _hyper_3_8_chunk.temp
+(13 rows)
 
 -- Applying the time partitioning function should also allow push-down
 -- on open dimensions
diff --git a/tsl/src/CMakeLists.txt b/tsl/src/CMakeLists.txt
index e2524cc5a46..6cbfd4aec2e 100644
--- a/tsl/src/CMakeLists.txt
+++ b/tsl/src/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(SOURCES
     chunk_api.c
     chunk.c
+    chunkwise_agg.c
     init.c
     partialize_finalize.c
     planner.c
diff --git a/tsl/src/chunkwise_agg.c b/tsl/src/chunkwise_agg.c
new file mode 100644
index 00000000000..9188717330c
--- /dev/null
+++ b/tsl/src/chunkwise_agg.c
@@ -0,0 +1,734 @@
+/*
+ * This file and its contents are licensed under the Apache License 2.0.
+ * Please see the included NOTICE for copyright information and
+ * LICENSE-APACHE for a copy of the license.
+ */
+#include <postgres.h>
+
+#include <optimizer/appendinfo.h>
+#include <optimizer/cost.h>
+#include <optimizer/pathnode.h>
+#include <optimizer/paths.h>
+#include <optimizer/prep.h>
+#include <optimizer/tlist.h>
+
+#include "gapfill.h"
+#include "guc.h"
+#include "import/planner.h"
+#include "nodes/chunk_append/chunk_append.h"
+#include "nodes/decompress_chunk/decompress_chunk.h"
+#include "planner.h"
+
+/* Helper function to find the first node of the provided type in the pathlist of the relation */
+static Node *
+find_node(const RelOptInfo *relation, NodeTag type)
+{
+	ListCell *lc;
+	foreach (lc, relation->pathlist)
+	{
+		Node *node = lfirst(lc);
+		if (nodeTag(node) == type)
+			return node;
+	}
+
+	return NULL;
+}
+
+/* Check if the relation already has a min/max path */
+static bool
+has_min_max_agg_path(const RelOptInfo *relation)
+{
+	return find_node(relation, T_MinMaxAggPath) != NULL;
+}
+
+/*
+ * Get an an existing aggregation path for the given relation or NULL if no aggregation path exists.
+ */
+static AggPath *
+get_existing_agg_path(const RelOptInfo *relation)
+{
+	Node *node = find_node(relation, T_AggPath);
+	return node ? castNode(AggPath, node) : NULL;
+}
+
+/*
+ * Get all subpaths from a Append, MergeAppend, or ChunkAppend path
+ */
+static List *
+get_subpaths_from_append_path(Path *path, bool handle_gather_path)
+{
+	if (IsA(path, AppendPath))
+	{
+		AppendPath *append_path = castNode(AppendPath, path);
+		return append_path->subpaths;
+	}
+	else if (IsA(path, MergeAppendPath))
+	{
+		MergeAppendPath *merge_append_path = castNode(MergeAppendPath, path);
+		return merge_append_path->subpaths;
+	}
+	else if (ts_is_chunk_append_path(path))
+	{
+		CustomPath *custom_path = castNode(CustomPath, path);
+		return custom_path->custom_paths;
+	}
+	else if (handle_gather_path && IsA(path, GatherPath))
+	{
+		return get_subpaths_from_append_path(castNode(GatherPath, path)->subpath, false);
+	}
+
+	/* Aggregation push-down is not supported for other path types so far */
+	return NIL;
+}
+
+/*
+ * Copy an AppendPath and set new subpaths.
+ */
+static AppendPath *
+copy_append_path(AppendPath *path, List *subpaths, PathTarget *pathtarget)
+{
+	AppendPath *newPath = makeNode(AppendPath);
+	memcpy(newPath, path, sizeof(AppendPath));
+	newPath->subpaths = subpaths;
+	newPath->path.pathtarget = copy_pathtarget(pathtarget);
+
+	cost_append(newPath);
+
+	return newPath;
+}
+
+/*
+ * Copy a MergeAppendPath and set new subpaths.
+ */
+static MergeAppendPath *
+copy_merge_append_path(PlannerInfo *root, MergeAppendPath *path, List *subpaths,
+					   PathTarget *pathtarget)
+{
+	MergeAppendPath *newPath =
+		create_merge_append_path(root, path->path.parent, subpaths, path->path.pathkeys, NULL);
+
+	newPath->path.param_info = path->path.param_info;
+	newPath->path.pathtarget = copy_pathtarget(pathtarget);
+
+	return newPath;
+}
+
+/*
+ * Copy an append-like path and set new subpaths
+ */
+static Path *
+copy_append_like_path(PlannerInfo *root, Path *path, List *new_subpaths, PathTarget *pathtarget)
+{
+	if (IsA(path, AppendPath))
+	{
+		AppendPath *append_path = castNode(AppendPath, path);
+		AppendPath *new_append_path = copy_append_path(append_path, new_subpaths, pathtarget);
+		return &new_append_path->path;
+	}
+	else if (IsA(path, MergeAppendPath))
+	{
+		MergeAppendPath *merge_append_path = castNode(MergeAppendPath, path);
+		MergeAppendPath *new_merge_append_path =
+			copy_merge_append_path(root, merge_append_path, new_subpaths, pathtarget);
+		return &new_merge_append_path->path;
+	}
+	else if (ts_is_chunk_append_path(path))
+	{
+		CustomPath *custom_path = castNode(CustomPath, path);
+		ChunkAppendPath *chunk_append_path = (ChunkAppendPath *) custom_path;
+		ChunkAppendPath *new_chunk_append_path =
+			ts_chunk_append_path_copy(chunk_append_path, new_subpaths, pathtarget);
+		return &new_chunk_append_path->cpath.path;
+	}
+
+	/* Should never happen, already checked by caller */
+	Ensure(false, "unknown path type");
+	pg_unreachable();
+}
+
+/*
+ * Generate a partially sorted aggregated agg path on top of a path
+ */
+static AggPath *
+create_sorted_partial_agg_path(PlannerInfo *root, Path *path, PathTarget *target,
+							   double d_num_groups, GroupPathExtraData *extra_data)
+{
+	Query *parse = root->parse;
+
+	/* Determine costs for aggregations */
+	AggClauseCosts *agg_partial_costs = &extra_data->agg_partial_costs;
+
+	bool is_sorted = pathkeys_contained_in(root->group_pathkeys, path->pathkeys);
+
+	if (!is_sorted)
+	{
+		path = (Path *) create_sort_path(root, path->parent, path, root->group_pathkeys, -1.0);
+	}
+
+	AggPath *sorted_agg_path = create_agg_path(root,
+											   path->parent,
+											   path,
+											   target,
+											   parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											   AGGSPLIT_INITIAL_SERIAL,
+#if PG16_LT
+											   parse->groupClause,
+#else
+											   root->processed_groupClause,
+#endif
+											   NIL,
+											   agg_partial_costs,
+											   d_num_groups);
+
+	return sorted_agg_path;
+}
+
+/*
+ * Generate a partially hashed aggregated add path on top of a path
+ */
+static AggPath *
+create_hashed_partial_agg_path(PlannerInfo *root, Path *path, PathTarget *target,
+							   double d_num_groups, GroupPathExtraData *extra_data)
+{
+	/* Determine costs for aggregations */
+	AggClauseCosts *agg_partial_costs = &extra_data->agg_partial_costs;
+
+	AggPath *hash_path = create_agg_path(root,
+										 path->parent,
+										 path,
+										 target,
+										 AGG_HASHED,
+										 AGGSPLIT_INITIAL_SERIAL,
+#if PG16_LT
+										 root->parse->groupClause,
+#else
+										 root->processed_groupClause,
+#endif
+										 NIL,
+										 agg_partial_costs,
+										 d_num_groups);
+	return hash_path;
+}
+
+/*
+ * Add partially aggregated subpath
+ */
+static void
+add_partially_aggregated_subpaths(PlannerInfo *root, Path *parent_path,
+								  PathTarget *partial_grouping_target, double d_num_groups,
+								  GroupPathExtraData *extra_data, bool can_sort, bool can_hash,
+								  Path *subpath, List **sorted_paths, List **hashed_paths)
+{
+	/* Translate targetlist for partition */
+	AppendRelInfo *appinfo = ts_get_appendrelinfo(root, subpath->parent->relid, false);
+	PathTarget *chunktarget = copy_pathtarget(partial_grouping_target);
+	chunktarget->exprs =
+		castNode(List, adjust_appendrel_attrs(root, (Node *) chunktarget->exprs, 1, &appinfo));
+
+	/* In declarative partitioning planning, this is done by appy_scanjoin_target_to_path */
+	Assert(list_length(subpath->pathtarget->exprs) == list_length(parent_path->pathtarget->exprs));
+	subpath->pathtarget->sortgrouprefs = parent_path->pathtarget->sortgrouprefs;
+
+	if (can_sort)
+	{
+		AggPath *agg_path =
+			create_sorted_partial_agg_path(root, subpath, chunktarget, d_num_groups, extra_data);
+
+		*sorted_paths = lappend(*sorted_paths, (Path *) agg_path);
+	}
+
+	if (can_hash)
+	{
+		AggPath *agg_path =
+			create_hashed_partial_agg_path(root, subpath, chunktarget, d_num_groups, extra_data);
+
+		*hashed_paths = lappend(*hashed_paths, (Path *) agg_path);
+	}
+}
+
+/*
+ * Generate a total aggregation path for partial aggregations.
+ *
+ * The generated paths contain partial aggregations (created by using AGGSPLIT_INITIAL_SERIAL).
+ * These aggregations need to be finished by the caller by adding a node that performs the
+ * AGGSPLIT_FINAL_DESERIAL step.
+ */
+static void
+generate_agg_pushdown_path(PlannerInfo *root, Path *cheapest_total_path, RelOptInfo *output_rel,
+						   RelOptInfo *partially_grouped_rel, PathTarget *grouping_target,
+						   PathTarget *partial_grouping_target, bool can_sort, bool can_hash,
+						   double d_num_groups, GroupPathExtraData *extra_data)
+{
+	/* Get subpaths */
+	List *subpaths = get_subpaths_from_append_path(cheapest_total_path, false);
+
+	/* No subpaths available or unsupported append node */
+	if (subpaths == NIL)
+		return;
+
+	if (list_length(subpaths) < 2)
+	{
+		/*
+		 * Doesn't make sense to add per-chunk aggregation paths if there's
+		 * only one chunk.
+		 */
+		return;
+	}
+
+	/* Generate agg paths on top of the append children */
+	List *sorted_subpaths = NIL;
+	List *hashed_subpaths = NIL;
+
+	ListCell *lc;
+	foreach (lc, subpaths)
+	{
+		Path *subpath = lfirst(lc);
+
+		/* Check if we have an append path under an append path (e.g., a partially compressed
+		 * chunk. The first append path merges the chunk results. The second append path merges the
+		 * uncompressed and the compressed part of the chunk).
+		 *
+		 * In this case, the partial aggregation needs to be pushed down below the lower
+		 * append path.
+		 */
+		List *subsubpaths = get_subpaths_from_append_path(subpath, false);
+
+		if (subsubpaths != NIL)
+		{
+			List *sorted_subsubpaths = NIL;
+			List *hashed_subsubpaths = NIL;
+
+			ListCell *lc2;
+			foreach (lc2, subsubpaths)
+			{
+				Path *subsubpath = lfirst(lc2);
+
+				add_partially_aggregated_subpaths(root,
+												  cheapest_total_path,
+												  partial_grouping_target,
+												  d_num_groups,
+												  extra_data,
+												  can_sort,
+												  can_hash,
+												  subsubpath,
+												  &sorted_subsubpaths /* Result path */,
+												  &hashed_subsubpaths /* Result path */);
+			}
+
+			if (can_sort)
+			{
+				sorted_subpaths = lappend(sorted_subpaths,
+										  copy_append_like_path(root,
+																subpath,
+																sorted_subsubpaths,
+																subpath->pathtarget));
+			}
+
+			if (can_hash)
+			{
+				hashed_subpaths = lappend(hashed_subpaths,
+										  copy_append_like_path(root,
+																subpath,
+																hashed_subsubpaths,
+																subpath->pathtarget));
+			}
+		}
+		else
+		{
+			add_partially_aggregated_subpaths(root,
+											  cheapest_total_path,
+											  partial_grouping_target,
+											  d_num_groups,
+											  extra_data,
+											  can_sort,
+											  can_hash,
+											  subpath,
+											  &sorted_subpaths /* Result paths */,
+											  &hashed_subpaths /* Result paths */);
+		}
+	}
+
+	/* Create new append paths */
+	if (sorted_subpaths != NIL)
+	{
+		add_path(partially_grouped_rel,
+				 copy_append_like_path(root,
+									   cheapest_total_path,
+									   sorted_subpaths,
+									   partial_grouping_target));
+	}
+
+	if (hashed_subpaths != NIL)
+	{
+		add_path(partially_grouped_rel,
+				 copy_append_like_path(root,
+									   cheapest_total_path,
+									   hashed_subpaths,
+									   partial_grouping_target));
+	}
+}
+
+/*
+ * Generate a partial aggregation path for chunk-wise partial aggregations.
+
+ * This function does almost the same as generate_agg_pushdown_path(). In contrast, it processes a
+ * partial_path (paths that are usually used in parallel plans) of the input relation, pushes down
+ * the aggregation in this path and adds a gather node on top of the partial plan. Therefore, the
+ * push-down of the partial aggregates also works in parallel plans.
+ *
+ * Note: The PostgreSQL terminology can cause some confusion here. Partial paths are usually used by
+ * PostgreSQL to distribute work between parallel workers. This has nothing to do with the partial
+ * aggregation we are creating in the function.
+ */
+static void
+generate_partial_agg_pushdown_path(PlannerInfo *root, Path *cheapest_partial_path,
+								   RelOptInfo *output_rel, RelOptInfo *partially_grouped_rel,
+								   PathTarget *grouping_target, PathTarget *partial_grouping_target,
+								   bool can_sort, bool can_hash, double d_num_groups,
+								   GroupPathExtraData *extra_data)
+{
+	/* Get subpaths */
+	List *subpaths = get_subpaths_from_append_path(cheapest_partial_path, false);
+
+	/* No subpaths available or unsupported append node */
+	if (subpaths == NIL)
+		return;
+
+	if (list_length(subpaths) < 2)
+	{
+		/*
+		 * Doesn't make sense to add per-chunk aggregation paths if there's
+		 * only one chunk.
+		 */
+		return;
+	}
+	/* Generate agg paths on top of the append children */
+	ListCell *lc;
+	List *sorted_subpaths = NIL;
+	List *hashed_subpaths = NIL;
+
+	foreach (lc, subpaths)
+	{
+		Path *subpath = lfirst(lc);
+
+		Assert(subpath->parallel_safe);
+
+		/* There should be no nested append paths in the partial paths to construct the upper
+		 * relation */
+		Assert(get_subpaths_from_append_path(subpath, false) == NIL);
+
+		add_partially_aggregated_subpaths(root,
+										  cheapest_partial_path,
+										  partial_grouping_target,
+										  d_num_groups,
+										  extra_data,
+										  can_sort,
+										  can_hash,
+										  subpath,
+										  &sorted_subpaths /* Result paths */,
+										  &hashed_subpaths /* Result paths */);
+	}
+
+	/* Create new append paths */
+	if (sorted_subpaths != NIL)
+	{
+		add_partial_path(partially_grouped_rel,
+						 copy_append_like_path(root,
+											   cheapest_partial_path,
+											   sorted_subpaths,
+											   partial_grouping_target));
+	}
+
+	if (hashed_subpaths != NIL)
+	{
+		add_partial_path(partially_grouped_rel,
+						 copy_append_like_path(root,
+											   cheapest_partial_path,
+											   hashed_subpaths,
+											   partial_grouping_target));
+	}
+
+	/* Finish the partial paths (just added by add_partial_path to partially_grouped_rel in this
+	 * function) by adding a gather node and add this path to the partially_grouped_rel using
+	 * add_path). */
+	foreach (lc, partially_grouped_rel->partial_pathlist)
+	{
+		Path *append_path = lfirst(lc);
+		double total_groups = append_path->rows * append_path->parallel_workers;
+
+		Path *gather_path = (Path *) create_gather_path(root,
+														partially_grouped_rel,
+														append_path,
+														partially_grouped_rel->reltarget,
+														NULL,
+														&total_groups);
+		add_path(partially_grouped_rel, (Path *) gather_path);
+	}
+}
+
+/*
+ * Get the best total path for aggregation. Prefer chunk append paths if we have one, otherwise
+ * return the cheapest_total_path;
+ */
+static Path *
+get_best_total_path(RelOptInfo *output_rel)
+{
+	ListCell *lc;
+	foreach (lc, output_rel->pathlist)
+	{
+		Path *path = lfirst(lc);
+
+		if (ts_is_chunk_append_path(path))
+			return path;
+	}
+
+	return output_rel->cheapest_total_path;
+}
+
+/*
+ Is the provided path a agg path that uses a sorted or plain agg strategy?
+*/
+static bool pg_nodiscard
+is_path_sorted_or_plain_agg_path(Path *path)
+{
+	AggPath *agg_path = castNode(AggPath, path);
+	Assert(agg_path->aggstrategy == AGG_SORTED || agg_path->aggstrategy == AGG_PLAIN ||
+		   agg_path->aggstrategy == AGG_HASHED);
+	return agg_path->aggstrategy == AGG_SORTED || agg_path->aggstrategy == AGG_PLAIN;
+}
+
+/*
+ * Check if this path belongs to a plain or sorted aggregation
+ */
+static bool
+contains_path_plain_or_sorted_agg(Path *path)
+{
+	List *subpaths = get_subpaths_from_append_path(path, true);
+
+	Ensure(subpaths != NIL, "Unable to determine aggregation type");
+
+	ListCell *lc;
+	foreach (lc, subpaths)
+	{
+		Path *subpath = lfirst(lc);
+
+		if (IsA(subpath, AggPath))
+			return is_path_sorted_or_plain_agg_path(subpath);
+	}
+
+	/*
+	 * No dedicated aggregation nodes found directly underneath the append node. This could be
+	 * due to two reasons.
+	 *
+	 * (1) Only vectorized aggregation is used and we don't have dedicated Aggregation nods.
+	 * (2) The query plan uses multi-level appends to keep a certain sorting
+	 *     - ChunkAppend
+	 *          - Merge Append
+	 *             - Agg Chunk 1
+	 *             - Agg Chunk 2
+	 *          - Merge Append
+	 *             - Agg Chunk 3
+	 *             - Agg Chunk 4
+	 *
+	 * in both cases, we use a sorted aggregation node to finalize the partial aggregation and
+	 * produce a proper sorting.
+	 */
+	return true;
+}
+
+/*
+ * Replan the aggregation and create a partial aggregation at chunk level and finalize the
+ * aggregation on top of an append node.
+ *
+ * The functionality is inspired by PostgreSQL's create_partitionwise_grouping_paths() function
+ *
+ * Generated aggregation paths:
+ *
+ * Finalize Aggregate
+ *   -> Append
+ *      -> Partial Aggregation
+ *        - Chunk 1
+ *      ...
+ *      -> Append of partially compressed chunk 2
+ *         -> Partial Aggregation
+ *             -> Scan on uncompressed part of chunk 2
+ *         -> Partial Aggregation
+ *             -> Scan on compressed part of chunk 2
+ *      ...
+ *      -> Partial Aggregation N
+ *        - Chunk N
+ */
+void
+ts_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht, RelOptInfo *input_rel,
+						RelOptInfo *output_rel, void *extra)
+{
+	Query *parse = root->parse;
+
+	/* We are only interested in hypertables */
+	if (!ht)
+		return;
+
+	/* Perform partial aggregation planning only if there is an aggregation is requested */
+	if (!parse->hasAggs)
+		return;
+
+	/* Grouping sets are not supported by the partial aggregation pushdown */
+	if (parse->groupingSets)
+		return;
+
+	/* Don't replan aggregation if we already have a MinMaxAggPath (e.g., created by
+	 * ts_preprocess_first_last_aggregates) */
+	if (has_min_max_agg_path(output_rel))
+		return;
+
+	/* Is sorting possible ? */
+	bool can_sort = grouping_is_sortable(parse->groupClause) && ts_guc_enable_chunkwise_aggregation;
+
+	/* Is hashing possible ? */
+	bool can_hash = grouping_is_hashable(parse->groupClause) &&
+					!ts_is_gapfill_path(linitial(output_rel->pathlist)) && enable_hashagg;
+
+	Assert(extra != NULL);
+	GroupPathExtraData *extra_data = (GroupPathExtraData *) extra;
+
+	/* Determine the number of groups from the already planned aggregation */
+	AggPath *existing_agg_path = get_existing_agg_path(output_rel);
+	if (existing_agg_path == NULL)
+		return;
+
+	/* Skip partial aggregations already created by _timescaledb_functions.partialize_agg */
+	if (existing_agg_path->aggsplit == AGGSPLIT_INITIAL_SERIAL)
+		return;
+
+	/* Don't replan aggregation if it contains already partials or non-serializable aggregates */
+	if (root->hasNonPartialAggs || root->hasNonSerialAggs)
+		return;
+
+	double d_num_groups = existing_agg_path->numGroups;
+	Assert(d_num_groups > 0);
+
+	/* Construct partial group agg upper relation */
+	RelOptInfo *partially_grouped_rel =
+		fetch_upper_rel(root, UPPERREL_PARTIAL_GROUP_AGG, input_rel->relids);
+	partially_grouped_rel->consider_parallel = input_rel->consider_parallel;
+	partially_grouped_rel->reloptkind = input_rel->reloptkind;
+	partially_grouped_rel->serverid = input_rel->serverid;
+	partially_grouped_rel->userid = input_rel->userid;
+	partially_grouped_rel->useridiscurrent = input_rel->useridiscurrent;
+	partially_grouped_rel->fdwroutine = input_rel->fdwroutine;
+
+	/* Build target list for partial aggregate paths */
+	PathTarget *grouping_target = output_rel->reltarget;
+	PathTarget *partial_grouping_target = ts_make_partial_grouping_target(root, grouping_target);
+	partially_grouped_rel->reltarget = partial_grouping_target;
+
+	/* Calculate aggregation costs */
+	if (!extra_data->partial_costs_set)
+	{
+		/* Init costs */
+		MemSet(&extra_data->agg_partial_costs, 0, sizeof(AggClauseCosts));
+		MemSet(&extra_data->agg_final_costs, 0, sizeof(AggClauseCosts));
+
+		/* partial phase */
+		get_agg_clause_costs(root, AGGSPLIT_INITIAL_SERIAL, &extra_data->agg_partial_costs);
+
+		/* final phase */
+		get_agg_clause_costs(root, AGGSPLIT_FINAL_DESERIAL, &extra_data->agg_final_costs);
+
+		extra_data->partial_costs_set = true;
+	}
+
+	/* Generate the aggregation pushdown path */
+	Path *cheapest_total_path = get_best_total_path(input_rel);
+	Assert(cheapest_total_path != NULL);
+	generate_agg_pushdown_path(root,
+							   cheapest_total_path,
+							   output_rel,
+							   partially_grouped_rel,
+							   grouping_target,
+							   partial_grouping_target,
+							   can_sort,
+							   can_hash,
+							   d_num_groups,
+							   extra_data);
+
+	/* The same as above but for partial paths */
+	if (input_rel->partial_pathlist != NIL && input_rel->consider_parallel)
+	{
+		Path *cheapest_partial_path = linitial(input_rel->partial_pathlist);
+		generate_partial_agg_pushdown_path(root,
+										   cheapest_partial_path,
+										   output_rel,
+										   partially_grouped_rel,
+										   grouping_target,
+										   partial_grouping_target,
+										   can_sort,
+										   can_hash,
+										   d_num_groups,
+										   extra_data);
+	}
+
+	/* Replan aggregation if we were able to generate partially grouped rel paths */
+	if (partially_grouped_rel->pathlist == NIL)
+		return;
+
+	/* Prefer our paths */
+	output_rel->pathlist = NIL;
+	output_rel->partial_pathlist = NIL;
+
+	/* Finalize the created partially aggregated paths by adding a 'Finalize Aggregate' node on top
+	 * of them. */
+	AggClauseCosts *agg_final_costs = &extra_data->agg_final_costs;
+	ListCell *lc;
+	foreach (lc, partially_grouped_rel->pathlist)
+	{
+		Path *append_path = lfirst(lc);
+
+		if (contains_path_plain_or_sorted_agg(append_path))
+		{
+			bool is_sorted;
+
+			is_sorted = pathkeys_contained_in(root->group_pathkeys, append_path->pathkeys);
+
+			if (!is_sorted)
+			{
+				append_path = (Path *)
+					create_sort_path(root, output_rel, append_path, root->group_pathkeys, -1.0);
+			}
+
+			add_path(output_rel,
+					 (Path *) create_agg_path(root,
+											  output_rel,
+											  append_path,
+											  grouping_target,
+											  parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+											  AGGSPLIT_FINAL_DESERIAL,
+#if PG16_LT
+											  parse->groupClause,
+#else
+											  root->processed_groupClause,
+#endif
+											  (List *) parse->havingQual,
+											  agg_final_costs,
+											  d_num_groups));
+		}
+		else
+		{
+			add_path(output_rel,
+					 (Path *) create_agg_path(root,
+											  output_rel,
+											  append_path,
+											  grouping_target,
+											  AGG_HASHED,
+											  AGGSPLIT_FINAL_DESERIAL,
+#if PG16_LT
+											  parse->groupClause,
+#else
+											  root->processed_groupClause,
+#endif
+											  (List *) parse->havingQual,
+											  agg_final_costs,
+											  d_num_groups));
+		}
+	}
+}
diff --git a/tsl/src/planner.c b/tsl/src/planner.c
index 798529486b3..e7473644d5c 100644
--- a/tsl/src/planner.c
+++ b/tsl/src/planner.c
@@ -24,6 +24,7 @@
 #include "nodes/skip_scan/skip_scan.h"
 #include "nodes/vector_agg/plan.h"
 #include "planner.h"
+#include "planner/partialize.h"
 
 #include <math.h>
 
@@ -51,7 +52,14 @@ tsl_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage, RelOptIn
 	{
 		case UPPERREL_GROUP_AGG:
 			if (input_reltype != TS_REL_HYPERTABLE_CHILD)
+			{
 				plan_add_gapfill(root, output_rel);
+			}
+
+			if (ts_guc_enable_chunkwise_aggregation)
+			{
+				ts_pushdown_partial_agg(root, ht, input_rel, output_rel, extra);
+			}
 			break;
 		case UPPERREL_WINDOW:
 			if (IsA(linitial(input_rel->pathlist), CustomPath))
diff --git a/tsl/src/planner.h b/tsl/src/planner.h
index f88d6c45fa6..df1243e8a1d 100644
--- a/tsl/src/planner.h
+++ b/tsl/src/planner.h
@@ -18,3 +18,6 @@ void tsl_set_rel_pathlist_dml(PlannerInfo *, RelOptInfo *, Index, RangeTblEntry
 void tsl_set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte);
 void tsl_preprocess_query(Query *parse);
 void tsl_postprocess_plan(PlannedStmt *stmt);
+
+void TSDLLEXPORT ts_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht, RelOptInfo *input_rel,
+										 RelOptInfo *output_rel, void *extra);

From 545056eeb29cb4a7e7b69f2f65f27457516275fa Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Sat, 27 Jul 2024 12:12:29 +0200
Subject: [PATCH 02/12] license

---
 tsl/src/chunkwise_agg.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tsl/src/chunkwise_agg.c b/tsl/src/chunkwise_agg.c
index 9188717330c..737e59c8192 100644
--- a/tsl/src/chunkwise_agg.c
+++ b/tsl/src/chunkwise_agg.c
@@ -1,7 +1,7 @@
 /*
- * This file and its contents are licensed under the Apache License 2.0.
+ * This file and its contents are licensed under the Timescale License.
  * Please see the included NOTICE for copyright information and
- * LICENSE-APACHE for a copy of the license.
+ * LICENSE-TIMESCALE for a copy of the license.
  */
 #include <postgres.h>
 

From c14405e7dd183a2bc9bc824330a590db063750f7 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Sat, 27 Jul 2024 12:39:50 +0200
Subject: [PATCH 03/12] kinda fix

---
 src/planner/planner.h          |   3 +-
 test/expected/agg_bookends.out | 368 +++++++++++----------------------
 test/expected/sql_query.out    |  38 ++--
 tsl/src/planner.c              |  32 ++-
 4 files changed, 168 insertions(+), 273 deletions(-)

diff --git a/src/planner/planner.h b/src/planner/planner.h
index dbf3efe3179..c0f4e36c0b0 100644
--- a/src/planner/planner.h
+++ b/src/planner/planner.h
@@ -105,4 +105,5 @@ extern Node *ts_add_space_constraints(PlannerInfo *root, List *rtable, Node *nod
 
 extern TSDLLEXPORT void ts_add_baserel_cache_entry_for_chunk(Oid chunk_reloid,
 															 Hypertable *hypertable);
-TsRelType ts_classify_relation(const PlannerInfo *root, const RelOptInfo *rel, Hypertable **ht);
+TsRelType TSDLLEXPORT ts_classify_relation(const PlannerInfo *root, const RelOptInfo *rel,
+										   Hypertable **ht);
diff --git a/test/expected/agg_bookends.out b/test/expected/agg_bookends.out
index e1927d3c926..f5118267c32 100644
--- a/test/expected/agg_bookends.out
+++ b/test/expected/agg_bookends.out
@@ -201,142 +201,82 @@ INSERT INTO btest VALUES('2019-01-20T09:00:43', '2018-01-20T09:00:55', 2, 30.5);
 --check null cmp element is skipped
 INSERT INTO btest VALUES('2018-01-20T09:00:43', NULL, 2, 32.3);
 :PREFIX SELECT last(temp, time_alt) FROM btest;
-                               QUERY PLAN                               
-------------------------------------------------------------------------
- Finalize HashAggregate (actual rows=1 loops=1)
-   Batches: 1 
-   ->  Append (actual rows=3 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
-(12 rows)
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Append (actual rows=9 loops=1)
+         ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
+         ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
+         ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
+(5 rows)
 
 -- fist returns NULL value
 :PREFIX SELECT first(temp, time_alt) FROM btest;
-                               QUERY PLAN                               
-------------------------------------------------------------------------
- Finalize HashAggregate (actual rows=1 loops=1)
-   Batches: 1 
-   ->  Append (actual rows=3 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
-(12 rows)
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Append (actual rows=9 loops=1)
+         ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
+         ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
+         ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
+(5 rows)
 
 -- test first return non NULL value
 INSERT INTO btest VALUES('2016-01-20T09:00:00', '2016-01-20T09:00:00', 2, 36.5);
 :PREFIX SELECT first(temp, time_alt) FROM btest;
-                               QUERY PLAN                               
-------------------------------------------------------------------------
- Finalize HashAggregate (actual rows=1 loops=1)
-   Batches: 1 
-   ->  Append (actual rows=4 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
-(15 rows)
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Append (actual rows=10 loops=1)
+         ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
+         ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
+         ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
+         ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
+(6 rows)
 
 --check non null cmp element insert after null cmp
 INSERT INTO btest VALUES('2020-01-20T09:00:43', '2020-01-20T09:00:43', 2, 35.3);
 :PREFIX SELECT last(temp, time_alt) FROM btest;
-                               QUERY PLAN                               
-------------------------------------------------------------------------
- Finalize HashAggregate (actual rows=1 loops=1)
-   Batches: 1 
-   ->  Append (actual rows=5 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
-(18 rows)
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Append (actual rows=11 loops=1)
+         ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
+         ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
+         ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
+         ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
+         ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
+(7 rows)
 
 :PREFIX SELECT first(temp, time_alt) FROM btest;
-                               QUERY PLAN                               
-------------------------------------------------------------------------
- Finalize HashAggregate (actual rows=1 loops=1)
-   Batches: 1 
-   ->  Append (actual rows=5 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
-(18 rows)
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Append (actual rows=11 loops=1)
+         ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
+         ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
+         ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
+         ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
+         ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
+(7 rows)
 
 --cmp nulls should be ignored and not present in groups
 :PREFIX SELECT gp, last(temp, time_alt) FROM btest GROUP BY gp ORDER BY gp;
-                                  QUERY PLAN                                  
-------------------------------------------------------------------------------
+                               QUERY PLAN                               
+------------------------------------------------------------------------
  Sort (actual rows=2 loops=1)
    Sort Key: _hyper_1_1_chunk.gp
    Sort Method: quicksort 
-   ->  Finalize HashAggregate (actual rows=2 loops=1)
+   ->  HashAggregate (actual rows=2 loops=1)
          Group Key: _hyper_1_1_chunk.gp
          Batches: 1 
-         ->  Append (actual rows=6 loops=1)
-               ->  Partial HashAggregate (actual rows=2 loops=1)
-                     Group Key: _hyper_1_1_chunk.gp
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Group Key: _hyper_1_2_chunk.gp
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Group Key: _hyper_1_3_chunk.gp
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Group Key: _hyper_1_4_chunk.gp
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Group Key: _hyper_1_5_chunk.gp
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
-(27 rows)
+         ->  Append (actual rows=11 loops=1)
+               ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
+               ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
+               ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
+               ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
+               ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
+(12 rows)
 
 --Previously, some bugs were found with NULLS and numeric types, so test that
 INSERT INTO btest_numeric VALUES ('2019-01-20T09:00:43', NULL);
@@ -405,27 +345,16 @@ INSERT INTO btest_numeric VALUES('2020-01-20T09:00:43', 30.5);
 
 -- can't do index scan when ordering on non-index column
 :PREFIX SELECT first(temp, time_alt) FROM btest;
-                               QUERY PLAN                               
-------------------------------------------------------------------------
- Finalize HashAggregate (actual rows=1 loops=1)
-   Batches: 1 
-   ->  Append (actual rows=5 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
-(18 rows)
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Append (actual rows=11 loops=1)
+         ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
+         ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
+         ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
+         ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
+         ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
+(7 rows)
 
 -- do index scan for subquery
 :PREFIX SELECT * FROM (SELECT last(temp, time) FROM btest) last;
@@ -564,54 +493,32 @@ INSERT INTO btest_numeric VALUES('2020-01-20T09:00:43', 30.5);
 
 -- can't do index scan for MAX and LAST combined (MinMax optimization fails when having different aggregate functions)
 :PREFIX SELECT max(time), last(temp, time) FROM btest;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Append (actual rows=11 loops=1)
+         ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
+         ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
+         ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
+         ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
+         ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
+(7 rows)
+
+-- can't do index scan when using FIRST/LAST in ORDER BY
+:PREFIX SELECT last(temp, time) FROM btest ORDER BY last(temp, time);
                                QUERY PLAN                               
 ------------------------------------------------------------------------
- Finalize HashAggregate (actual rows=1 loops=1)
-   Batches: 1 
-   ->  Append (actual rows=5 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
+ Sort (actual rows=1 loops=1)
+   Sort Key: (last(_hyper_1_1_chunk.temp, _hyper_1_1_chunk."time"))
+   Sort Method: quicksort 
+   ->  Aggregate (actual rows=1 loops=1)
+         ->  Append (actual rows=11 loops=1)
                ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
                ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
                ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
                ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
                ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
-(18 rows)
-
--- can't do index scan when using FIRST/LAST in ORDER BY
-:PREFIX SELECT last(temp, time) FROM btest ORDER BY last(temp, time);
-                                  QUERY PLAN                                  
-------------------------------------------------------------------------------
- Sort (actual rows=1 loops=1)
-   Sort Key: (last(_hyper_1_1_chunk.temp, _hyper_1_1_chunk."time"))
-   Sort Method: quicksort 
-   ->  Finalize HashAggregate (actual rows=1 loops=1)
-         Batches: 1 
-         ->  Append (actual rows=5 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
-(21 rows)
+(10 rows)
 
 -- do index scan
 :PREFIX SELECT last(temp, time) FROM btest WHERE temp < 30;
@@ -739,30 +646,19 @@ CREATE INDEX btest_time_alt_idx ON btest(time_alt);
 
 -- test nested FIRST/LAST in ORDER BY - no optimization possible
 :PREFIX SELECT abs(last(temp, time)) FROM btest ORDER BY abs(last(temp,time));
-                                  QUERY PLAN                                  
-------------------------------------------------------------------------------
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
  Sort (actual rows=1 loops=1)
    Sort Key: (abs(last(_hyper_1_1_chunk.temp, _hyper_1_1_chunk."time")))
    Sort Method: quicksort 
-   ->  Finalize HashAggregate (actual rows=1 loops=1)
-         Batches: 1 
-         ->  Append (actual rows=5 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
-               ->  Partial HashAggregate (actual rows=1 loops=1)
-                     Batches: 1 
-                     ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
-(21 rows)
+   ->  Aggregate (actual rows=1 loops=1)
+         ->  Append (actual rows=11 loops=1)
+               ->  Seq Scan on _hyper_1_1_chunk (actual rows=6 loops=1)
+               ->  Seq Scan on _hyper_1_2_chunk (actual rows=2 loops=1)
+               ->  Seq Scan on _hyper_1_3_chunk (actual rows=1 loops=1)
+               ->  Seq Scan on _hyper_1_4_chunk (actual rows=1 loops=1)
+               ->  Seq Scan on _hyper_1_5_chunk (actual rows=1 loops=1)
+(10 rows)
 
 ROLLBACK;
 -- Test with NULL numeric values
@@ -851,32 +747,22 @@ INSERT INTO btest_numeric VALUES('2019-01-20T09:00:43', 2);
 (9 rows)
 
 :PREFIX SELECT first(time, quantity) FROM btest_numeric;
-                               QUERY PLAN                               
-------------------------------------------------------------------------
- Finalize HashAggregate (actual rows=1 loops=1)
-   Batches: 1 
-   ->  Append (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_2_8_chunk (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_2_9_chunk (actual rows=2 loops=1)
-(9 rows)
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Append (actual rows=4 loops=1)
+         ->  Seq Scan on _hyper_2_8_chunk (actual rows=2 loops=1)
+         ->  Seq Scan on _hyper_2_9_chunk (actual rows=2 loops=1)
+(4 rows)
 
 :PREFIX SELECT last(time, quantity) FROM btest_numeric;
-                               QUERY PLAN                               
-------------------------------------------------------------------------
- Finalize HashAggregate (actual rows=1 loops=1)
-   Batches: 1 
-   ->  Append (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_2_8_chunk (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_2_9_chunk (actual rows=2 loops=1)
-(9 rows)
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Append (actual rows=4 loops=1)
+         ->  Seq Scan on _hyper_2_8_chunk (actual rows=2 loops=1)
+         ->  Seq Scan on _hyper_2_9_chunk (actual rows=2 loops=1)
+(4 rows)
 
 TRUNCATE btest_numeric;
 -- non-NULL values followed by NULL values
@@ -911,32 +797,22 @@ INSERT INTO btest_numeric VALUES('2018-01-20T09:00:43', NULL);
 (9 rows)
 
 :PREFIX SELECT first(time, quantity) FROM btest_numeric;
-                               QUERY PLAN                                
--------------------------------------------------------------------------
- Finalize HashAggregate (actual rows=1 loops=1)
-   Batches: 1 
-   ->  Append (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_2_10_chunk (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_2_11_chunk (actual rows=2 loops=1)
-(9 rows)
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Append (actual rows=4 loops=1)
+         ->  Seq Scan on _hyper_2_10_chunk (actual rows=2 loops=1)
+         ->  Seq Scan on _hyper_2_11_chunk (actual rows=2 loops=1)
+(4 rows)
 
 :PREFIX SELECT last(time, quantity) FROM btest_numeric;
-                               QUERY PLAN                                
--------------------------------------------------------------------------
- Finalize HashAggregate (actual rows=1 loops=1)
-   Batches: 1 
-   ->  Append (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_2_10_chunk (actual rows=2 loops=1)
-         ->  Partial HashAggregate (actual rows=1 loops=1)
-               Batches: 1 
-               ->  Seq Scan on _hyper_2_11_chunk (actual rows=2 loops=1)
-(9 rows)
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Aggregate (actual rows=1 loops=1)
+   ->  Append (actual rows=4 loops=1)
+         ->  Seq Scan on _hyper_2_10_chunk (actual rows=2 loops=1)
+         ->  Seq Scan on _hyper_2_11_chunk (actual rows=2 loops=1)
+(4 rows)
 
 ROLLBACK;
 -- we want test results as part of the output too to make sure we produce correct output
diff --git a/test/expected/sql_query.out b/test/expected/sql_query.out
index 4adff1be1f8..26f9ef07d69 100644
--- a/test/expected/sql_query.out
+++ b/test/expected/sql_query.out
@@ -220,41 +220,29 @@ EXPLAIN (verbose ON, costs off)SELECT * FROM PUBLIC."two_Partitions" WHERE serie
 
 --note that without time transform things work too
 EXPLAIN (verbose ON, costs off)SELECT "timeCustom" t, min(series_0) FROM PUBLIC."two_Partitions" GROUP BY t ORDER BY t DESC NULLS LAST limit 2;
-                                                                     QUERY PLAN                                                                      
------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                  QUERY PLAN                                                                   
+-----------------------------------------------------------------------------------------------------------------------------------------------
  Limit
    Output: "two_Partitions"."timeCustom", (min("two_Partitions".series_0))
-   ->  Finalize GroupAggregate
+   ->  GroupAggregate
          Output: "two_Partitions"."timeCustom", min("two_Partitions".series_0)
          Group Key: "two_Partitions"."timeCustom"
          ->  Custom Scan (ChunkAppend) on public."two_Partitions"
-               Output: "two_Partitions"."timeCustom", (PARTIAL min("two_Partitions".series_0))
+               Output: "two_Partitions"."timeCustom", "two_Partitions".series_0
                Order: "two_Partitions"."timeCustom" DESC NULLS LAST
                Startup Exclusion: false
                Runtime Exclusion: false
-               ->  Partial GroupAggregate
-                     Output: _hyper_1_3_chunk."timeCustom", PARTIAL min(_hyper_1_3_chunk.series_0)
-                     Group Key: _hyper_1_3_chunk."timeCustom"
-                     ->  Index Scan using "_hyper_1_3_chunk_two_Partitions_timeCustom_device_id_idx" on _timescaledb_internal._hyper_1_3_chunk
-                           Output: _hyper_1_3_chunk."timeCustom", _hyper_1_3_chunk.series_0
-               ->  Partial GroupAggregate
-                     Output: _hyper_1_2_chunk."timeCustom", PARTIAL min(_hyper_1_2_chunk.series_0)
-                     Group Key: _hyper_1_2_chunk."timeCustom"
-                     ->  Index Scan using "_hyper_1_2_chunk_two_Partitions_timeCustom_device_id_idx" on _timescaledb_internal._hyper_1_2_chunk
-                           Output: _hyper_1_2_chunk."timeCustom", _hyper_1_2_chunk.series_0
+               ->  Index Scan using "_hyper_1_3_chunk_two_Partitions_timeCustom_device_id_idx" on _timescaledb_internal._hyper_1_3_chunk
+                     Output: _hyper_1_3_chunk."timeCustom", _hyper_1_3_chunk.series_0
+               ->  Index Scan using "_hyper_1_2_chunk_two_Partitions_timeCustom_device_id_idx" on _timescaledb_internal._hyper_1_2_chunk
+                     Output: _hyper_1_2_chunk."timeCustom", _hyper_1_2_chunk.series_0
                ->  Merge Append
                      Sort Key: _hyper_1_4_chunk."timeCustom" DESC NULLS LAST
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_4_chunk."timeCustom", PARTIAL min(_hyper_1_4_chunk.series_0)
-                           Group Key: _hyper_1_4_chunk."timeCustom"
-                           ->  Index Scan using "_hyper_1_4_chunk_two_Partitions_timeCustom_device_id_idx" on _timescaledb_internal._hyper_1_4_chunk
-                                 Output: _hyper_1_4_chunk."timeCustom", _hyper_1_4_chunk.series_0
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_1_chunk."timeCustom", PARTIAL min(_hyper_1_1_chunk.series_0)
-                           Group Key: _hyper_1_1_chunk."timeCustom"
-                           ->  Index Scan using "_hyper_1_1_chunk_two_Partitions_timeCustom_device_id_idx" on _timescaledb_internal._hyper_1_1_chunk
-                                 Output: _hyper_1_1_chunk."timeCustom", _hyper_1_1_chunk.series_0
-(32 rows)
+                     ->  Index Scan using "_hyper_1_4_chunk_two_Partitions_timeCustom_device_id_idx" on _timescaledb_internal._hyper_1_4_chunk
+                           Output: _hyper_1_4_chunk."timeCustom", _hyper_1_4_chunk.series_0
+                     ->  Index Scan using "_hyper_1_1_chunk_two_Partitions_timeCustom_device_id_idx" on _timescaledb_internal._hyper_1_1_chunk
+                           Output: _hyper_1_1_chunk."timeCustom", _hyper_1_1_chunk.series_0
+(20 rows)
 
 --The query should still use the index on timeCustom, even though the GROUP BY/ORDER BY is on the transformed time 't'.
 --However, current query plans show that it does not.
diff --git a/tsl/src/planner.c b/tsl/src/planner.c
index e7473644d5c..ad67c68bc05 100644
--- a/tsl/src/planner.c
+++ b/tsl/src/planner.c
@@ -43,6 +43,34 @@ is_osm_present()
 	return osm_present;
 }
 
+static bool
+join_involves_hypertable(const PlannerInfo *root, const RelOptInfo *rel)
+{
+	int relid = -1;
+
+	while ((relid = bms_next_member(rel->relids, relid)) >= 0)
+	{
+		const RangeTblEntry *rte = planner_rt_fetch(relid, root);
+
+		if (rte != NULL)
+			/* This might give a false positive for chunks in case of PostgreSQL
+			 * expansion since the ctename is copied from the parent hypertable
+			 * to the chunk */
+			return ts_rte_is_marked_for_expansion(rte);
+	}
+	return false;
+}
+
+static bool
+involves_hypertable(PlannerInfo *root, RelOptInfo *rel)
+{
+	if (rel->reloptkind == RELOPT_JOINREL)
+		return join_involves_hypertable(root, rel);
+
+	Hypertable *ht;
+	return ts_classify_relation(root, rel, &ht) == TS_REL_HYPERTABLE;
+}
+
 void
 tsl_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage, RelOptInfo *input_rel,
 							RelOptInfo *output_rel, TsRelType input_reltype, Hypertable *ht,
@@ -56,7 +84,9 @@ tsl_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage, RelOptIn
 				plan_add_gapfill(root, output_rel);
 			}
 
-			if (ts_guc_enable_chunkwise_aggregation)
+			if (ts_guc_enable_chunkwise_aggregation && input_rel != NULL &&
+				!IS_DUMMY_REL(input_rel) && output_rel != NULL &&
+				involves_hypertable(root, input_rel))
 			{
 				ts_pushdown_partial_agg(root, ht, input_rel, output_rel, extra);
 			}

From 759ce361d49d400915a76716eee70c531a66f4fe Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Sun, 28 Jul 2024 11:24:58 +0200
Subject: [PATCH 04/12] simplify

---
 tsl/src/planner.c | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

diff --git a/tsl/src/planner.c b/tsl/src/planner.c
index ad67c68bc05..84128235556 100644
--- a/tsl/src/planner.c
+++ b/tsl/src/planner.c
@@ -44,33 +44,21 @@ is_osm_present()
 }
 
 static bool
-join_involves_hypertable(const PlannerInfo *root, const RelOptInfo *rel)
+involves_hypertable(PlannerInfo *root, RelOptInfo *rel)
 {
-	int relid = -1;
-
-	while ((relid = bms_next_member(rel->relids, relid)) >= 0)
+	for (int relid = bms_next_member(rel->relids, -1); relid > 0;
+		 relid = bms_next_member(rel->relids, relid))
 	{
-		const RangeTblEntry *rte = planner_rt_fetch(relid, root);
-
-		if (rte != NULL)
-			/* This might give a false positive for chunks in case of PostgreSQL
-			 * expansion since the ctename is copied from the parent hypertable
-			 * to the chunk */
-			return ts_rte_is_marked_for_expansion(rte);
+		Hypertable *ht;
+		RelOptInfo *rel = root->simple_rel_array[relid];
+		if (ts_classify_relation(root, rel, &ht) == TS_REL_HYPERTABLE)
+		{
+			return true;
+		}
 	}
 	return false;
 }
 
-static bool
-involves_hypertable(PlannerInfo *root, RelOptInfo *rel)
-{
-	if (rel->reloptkind == RELOPT_JOINREL)
-		return join_involves_hypertable(root, rel);
-
-	Hypertable *ht;
-	return ts_classify_relation(root, rel, &ht) == TS_REL_HYPERTABLE;
-}
-
 void
 tsl_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage, RelOptInfo *input_rel,
 							RelOptInfo *output_rel, TsRelType input_reltype, Hypertable *ht,

From 4d581a683a83fb40c14e0be9e025e0622a7c110f Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Sun, 28 Jul 2024 11:27:12 +0200
Subject: [PATCH 05/12] reference REL_16_3 append-* parallel-* partitionwise-*

---
 test/expected/append-16.out        | 124 +++++++++++-----------------
 test/expected/parallel-16.out      | 127 +++++++++++++----------------
 test/expected/partitionwise-16.out | 126 +++++++++-------------------
 3 files changed, 143 insertions(+), 234 deletions(-)

diff --git a/test/expected/append-16.out b/test/expected/append-16.out
index be3a4ccf1cc..f2fa0f74cbc 100644
--- a/test/expected/append-16.out
+++ b/test/expected/append-16.out
@@ -2039,95 +2039,67 @@ ORDER BY time DESC, device_id;
 
 -- aggregates should prevent pushdown
 :PREFIX SELECT count(*) FROM metrics_timestamptz LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
-         ->  Append (actual rows=5 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
-(13 rows)
+   ->  Aggregate (actual rows=1 loops=1)
+         ->  Append (actual rows=26787 loops=1)
+               ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
+(8 rows)
 
 :PREFIX SELECT count(*) FROM metrics_space LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
-         ->  Append (actual rows=9 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
-(21 rows)
+   ->  Aggregate (actual rows=1 loops=1)
+         ->  Append (actual rows=37450 loops=1)
+               ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
+               ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
+(12 rows)
 
 -- HAVING should prevent pushdown
 :PREFIX SELECT 1 FROM metrics_timestamptz HAVING count(*) > 1 LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
+   ->  Aggregate (actual rows=1 loops=1)
          Filter: (count(*) > 1)
-         ->  Append (actual rows=5 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
-(14 rows)
+         ->  Append (actual rows=26787 loops=1)
+               ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
+(9 rows)
 
 :PREFIX SELECT 1 FROM metrics_space HAVING count(*) > 1 LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
+   ->  Aggregate (actual rows=1 loops=1)
          Filter: (count(*) > 1)
-         ->  Append (actual rows=9 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
-(22 rows)
+         ->  Append (actual rows=37450 loops=1)
+               ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
+               ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
+(13 rows)
 
 -- DISTINCT should prevent pushdown
 SET enable_hashagg TO false;
diff --git a/test/expected/parallel-16.out b/test/expected/parallel-16.out
index 01d899fe9f2..1e1f73815a9 100644
--- a/test/expected/parallel-16.out
+++ b/test/expected/parallel-16.out
@@ -35,12 +35,11 @@ EXPLAIN (costs off) SELECT first(i, j) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT first(i, j) FROM "test";
  first 
@@ -54,12 +53,11 @@ EXPLAIN (costs off) SELECT last(i, j) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT last(i, j) FROM "test";
   last  
@@ -139,12 +137,11 @@ EXPLAIN (costs off) SELECT histogram(i, 1, 1000000, 2) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 1, 1000000, 2) FROM "test";
      histogram     
@@ -158,12 +155,11 @@ EXPLAIN (costs off) SELECT histogram(i, 1,1000001,10) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 1, 1000001, 10) FROM "test";
                             histogram                             
@@ -177,12 +173,11 @@ EXPLAIN (costs off) SELECT histogram(i, 0,100000,5) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 0, 100000, 5) FROM "test";
              histogram              
@@ -196,12 +191,11 @@ EXPLAIN (costs off) SELECT histogram(i, 10,100000,5) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 10, 100000, 5) FROM "test";
              histogram              
@@ -215,14 +209,13 @@ EXPLAIN (costs off) SELECT histogram(NULL, 10,100000,5) FROM "test" WHERE  i = c
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
                            Filter: ((i)::double precision = '-1'::double precision)
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
                            Filter: ((i)::double precision = '-1'::double precision)
-(10 rows)
+(9 rows)
 
 SELECT histogram(NULL, 10,100000,5) FROM "test" WHERE  i = coalesce(-1,j);
  histogram 
@@ -255,21 +248,20 @@ SELECT histogram(NULL, 10,100000,5) FROM "test" WHERE  i = coalesce(-1,j);
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
                                        Filter: (i > 1)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
                                        Filter: (i > 1)
-(17 rows)
+(16 rows)
 
 SELECT count(*) FROM "test" WHERE i > 1 AND length(version()) > 0;
  count 
@@ -555,21 +547,20 @@ SET max_parallel_workers_per_gather TO 2;
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Index Only Scan using _hyper_1_1_chunk_test_i_idx on _hyper_1_1_chunk
                                        Index Cond: (i >= 400000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
                                        Filter: (i >= 400000)
-(17 rows)
+(16 rows)
 
 SELECT count(*) FROM "test" WHERE i >= 400000 AND length(version()) > 0;
  count 
@@ -585,21 +576,20 @@ SELECT count(*) FROM "test" WHERE i >= 400000 AND length(version()) > 0;
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Index Only Scan using _hyper_1_2_chunk_test_i_idx on _hyper_1_2_chunk
                                        Index Cond: (i < 600000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
                                        Filter: (i < 600000)
-(17 rows)
+(16 rows)
 
 SELECT count(*) FROM "test" WHERE i < 600000 AND length(version()) > 0;
  count 
@@ -615,19 +605,18 @@ SET max_parallel_workers_per_gather TO 1;
  Finalize Aggregate
    ->  Gather
          Workers Planned: 1
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
-(15 rows)
+(14 rows)
 
 SELECT count(*) FROM "test" WHERE length(version()) > 0;
  count  
@@ -671,21 +660,20 @@ ALTER TABLE :CHUNK2 SET (parallel_workers=2);
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Index Only Scan using _hyper_1_1_chunk_test_i_idx on _hyper_1_1_chunk
                                        Index Cond: (i > 400000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
                                        Filter: (i > 400000)
-(17 rows)
+(16 rows)
 
 ALTER TABLE :CHUNK1 SET (parallel_workers=2);
 ALTER TABLE :CHUNK2 SET (parallel_workers=0);
@@ -695,21 +683,20 @@ ALTER TABLE :CHUNK2 SET (parallel_workers=0);
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Index Only Scan using _hyper_1_2_chunk_test_i_idx on _hyper_1_2_chunk
                                        Index Cond: (i < 600000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
                                        Filter: (i < 600000)
-(17 rows)
+(16 rows)
 
 ALTER TABLE :CHUNK1 RESET (parallel_workers);
 ALTER TABLE :CHUNK2 RESET (parallel_workers);
diff --git a/test/expected/partitionwise-16.out b/test/expected/partitionwise-16.out
index 831f84d0e79..d1ad799cbf7 100644
--- a/test/expected/partitionwise-16.out
+++ b/test/expected/partitionwise-16.out
@@ -339,36 +339,24 @@ SELECT device, avg(temp)
 FROM hyper
 GROUP BY 1
 ORDER BY 1;
-                                       QUERY PLAN                                        
------------------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Sort
    Output: _hyper_1_1_chunk.device, (avg(_hyper_1_1_chunk.temp))
    Sort Key: _hyper_1_1_chunk.device
-   ->  Finalize HashAggregate
+   ->  HashAggregate
          Output: _hyper_1_1_chunk.device, avg(_hyper_1_1_chunk.temp)
          Group Key: _hyper_1_1_chunk.device
          ->  Append
-               ->  Partial HashAggregate
-                     Output: _hyper_1_1_chunk.device, PARTIAL avg(_hyper_1_1_chunk.temp)
-                     Group Key: _hyper_1_1_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_1_chunk
-                           Output: _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
-               ->  Partial HashAggregate
-                     Output: _hyper_1_2_chunk.device, PARTIAL avg(_hyper_1_2_chunk.temp)
-                     Group Key: _hyper_1_2_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_2_chunk
-                           Output: _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
-               ->  Partial HashAggregate
-                     Output: _hyper_1_3_chunk.device, PARTIAL avg(_hyper_1_3_chunk.temp)
-                     Group Key: _hyper_1_3_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_3_chunk
-                           Output: _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
-               ->  Partial HashAggregate
-                     Output: _hyper_1_4_chunk.device, PARTIAL avg(_hyper_1_4_chunk.temp)
-                     Group Key: _hyper_1_4_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_4_chunk
-                           Output: _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
-(27 rows)
+               ->  Seq Scan on _timescaledb_internal._hyper_1_1_chunk
+                     Output: _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_2_chunk
+                     Output: _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_3_chunk
+                     Output: _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_4_chunk
+                     Output: _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
+(15 rows)
 
 -- All partition keys covered (full partitionwise)
 SET timescaledb.enable_chunkwise_aggregation = 'off';
@@ -402,56 +390,24 @@ SELECT time, device, avg(temp)
 FROM hyper
 GROUP BY 1, 2
 ORDER BY 1, 2;
-                                                               QUERY PLAN                                                                
------------------------------------------------------------------------------------------------------------------------------------------
- Finalize GroupAggregate
-   Output: hyper."time", hyper.device, avg(hyper.temp)
-   Group Key: hyper."time", hyper.device
-   ->  Sort
-         Output: hyper."time", hyper.device, (PARTIAL avg(hyper.temp))
-         Sort Key: hyper."time", hyper.device
-         ->  Custom Scan (ChunkAppend) on public.hyper
-               Output: hyper."time", hyper.device, (PARTIAL avg(hyper.temp))
-               Order: hyper."time"
-               Startup Exclusion: false
-               Runtime Exclusion: false
-               ->  Merge Append
-                     Sort Key: _hyper_1_1_chunk."time"
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, PARTIAL avg(_hyper_1_1_chunk.temp)
-                           Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
-                                 Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_1_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_1_chunk
-                                       Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, PARTIAL avg(_hyper_1_2_chunk.temp)
-                           Group Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
-                                 Sort Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_2_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_2_chunk
-                                       Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
-               ->  Merge Append
-                     Sort Key: _hyper_1_3_chunk."time"
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, PARTIAL avg(_hyper_1_3_chunk.temp)
-                           Group Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
-                                 Sort Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_3_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_3_chunk
-                                       Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, PARTIAL avg(_hyper_1_4_chunk.temp)
-                           Group Key: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
-                                 Sort Key: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_4_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_4_chunk
-                                       Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
-(47 rows)
+                                             QUERY PLAN                                              
+-----------------------------------------------------------------------------------------------------
+ Sort
+   Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, (avg(_hyper_1_1_chunk.temp))
+   Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
+   ->  HashAggregate
+         Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, avg(_hyper_1_1_chunk.temp)
+         Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
+         ->  Append
+               ->  Seq Scan on _timescaledb_internal._hyper_1_1_chunk
+                     Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_2_chunk
+                     Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_3_chunk
+                     Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_4_chunk
+                     Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
+(15 rows)
 
 -- Partial aggregation since date_trunc(time) is not a partition key
 SET enable_partitionwise_aggregate = 'off';
@@ -800,28 +756,22 @@ FROM hyper_timepart
 GROUP BY 1, 2
 ORDER BY 1, 2
 LIMIT 10;
-                                                       QUERY PLAN                                                       
-------------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Limit
    Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, (avg(_hyper_3_7_chunk.temp))
    ->  Sort
          Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, (avg(_hyper_3_7_chunk.temp))
          Sort Key: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device
-         ->  Finalize HashAggregate
+         ->  HashAggregate
                Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, avg(_hyper_3_7_chunk.temp)
                Group Key: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device
                ->  Append
-                     ->  Partial HashAggregate
-                           Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, PARTIAL avg(_hyper_3_7_chunk.temp)
-                           Group Key: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device
-                           ->  Seq Scan on _timescaledb_internal._hyper_3_7_chunk
-                                 Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, _hyper_3_7_chunk.temp
-                     ->  Partial HashAggregate
-                           Output: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device, PARTIAL avg(_hyper_3_8_chunk.temp)
-                           Group Key: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device
-                           ->  Seq Scan on _timescaledb_internal._hyper_3_8_chunk
-                                 Output: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device, _hyper_3_8_chunk.temp
-(19 rows)
+                     ->  Seq Scan on _timescaledb_internal._hyper_3_7_chunk
+                           Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, _hyper_3_7_chunk.temp
+                     ->  Seq Scan on _timescaledb_internal._hyper_3_8_chunk
+                           Output: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device, _hyper_3_8_chunk.temp
+(13 rows)
 
 -- Applying the time partitioning function should also allow push-down
 -- on open dimensions

From 806d953b86aa3609ff166dfeee334016ef538286 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Sun, 28 Jul 2024 11:28:03 +0200
Subject: [PATCH 06/12] reference REL_14_11 append-* parallel-* partitionwise-*

---
 test/expected/append-14.out        | 124 +++++++++++-----------------
 test/expected/parallel-14.out      | 127 +++++++++++++----------------
 test/expected/partitionwise-14.out | 126 +++++++++-------------------
 3 files changed, 143 insertions(+), 234 deletions(-)

diff --git a/test/expected/append-14.out b/test/expected/append-14.out
index a3928f14890..8e449cb917b 100644
--- a/test/expected/append-14.out
+++ b/test/expected/append-14.out
@@ -2035,95 +2035,67 @@ ORDER BY time DESC, device_id;
 
 -- aggregates should prevent pushdown
 :PREFIX SELECT count(*) FROM metrics_timestamptz LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
-         ->  Append (actual rows=5 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
-(13 rows)
+   ->  Aggregate (actual rows=1 loops=1)
+         ->  Append (actual rows=26787 loops=1)
+               ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
+(8 rows)
 
 :PREFIX SELECT count(*) FROM metrics_space LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
-         ->  Append (actual rows=9 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
-(21 rows)
+   ->  Aggregate (actual rows=1 loops=1)
+         ->  Append (actual rows=37450 loops=1)
+               ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
+               ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
+(12 rows)
 
 -- HAVING should prevent pushdown
 :PREFIX SELECT 1 FROM metrics_timestamptz HAVING count(*) > 1 LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
+   ->  Aggregate (actual rows=1 loops=1)
          Filter: (count(*) > 1)
-         ->  Append (actual rows=5 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
-(14 rows)
+         ->  Append (actual rows=26787 loops=1)
+               ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
+(9 rows)
 
 :PREFIX SELECT 1 FROM metrics_space HAVING count(*) > 1 LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
+   ->  Aggregate (actual rows=1 loops=1)
          Filter: (count(*) > 1)
-         ->  Append (actual rows=9 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
-(22 rows)
+         ->  Append (actual rows=37450 loops=1)
+               ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
+               ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
+(13 rows)
 
 -- DISTINCT should prevent pushdown
 SET enable_hashagg TO false;
diff --git a/test/expected/parallel-14.out b/test/expected/parallel-14.out
index 845fe60993f..40a80e2f52a 100644
--- a/test/expected/parallel-14.out
+++ b/test/expected/parallel-14.out
@@ -35,12 +35,11 @@ EXPLAIN (costs off) SELECT first(i, j) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT first(i, j) FROM "test";
  first 
@@ -54,12 +53,11 @@ EXPLAIN (costs off) SELECT last(i, j) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT last(i, j) FROM "test";
   last  
@@ -138,12 +136,11 @@ EXPLAIN (costs off) SELECT histogram(i, 1, 1000000, 2) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 1, 1000000, 2) FROM "test";
      histogram     
@@ -157,12 +154,11 @@ EXPLAIN (costs off) SELECT histogram(i, 1,1000001,10) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 1, 1000001, 10) FROM "test";
                             histogram                             
@@ -176,12 +172,11 @@ EXPLAIN (costs off) SELECT histogram(i, 0,100000,5) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 0, 100000, 5) FROM "test";
              histogram              
@@ -195,12 +190,11 @@ EXPLAIN (costs off) SELECT histogram(i, 10,100000,5) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 10, 100000, 5) FROM "test";
              histogram              
@@ -214,14 +208,13 @@ EXPLAIN (costs off) SELECT histogram(NULL, 10,100000,5) FROM "test" WHERE  i = c
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
                            Filter: ((i)::double precision = '-1'::double precision)
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
                            Filter: ((i)::double precision = '-1'::double precision)
-(10 rows)
+(9 rows)
 
 SELECT histogram(NULL, 10,100000,5) FROM "test" WHERE  i = coalesce(-1,j);
  histogram 
@@ -254,21 +247,20 @@ SELECT histogram(NULL, 10,100000,5) FROM "test" WHERE  i = coalesce(-1,j);
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
                                        Filter: (i > 1)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
                                        Filter: (i > 1)
-(17 rows)
+(16 rows)
 
 SELECT count(*) FROM "test" WHERE i > 1 AND length(version()) > 0;
  count 
@@ -553,21 +545,20 @@ SET max_parallel_workers_per_gather TO 2;
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Index Only Scan using _hyper_1_1_chunk_test_i_idx on _hyper_1_1_chunk
                                        Index Cond: (i >= 400000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
                                        Filter: (i >= 400000)
-(17 rows)
+(16 rows)
 
 SELECT count(*) FROM "test" WHERE i >= 400000 AND length(version()) > 0;
  count 
@@ -583,21 +574,20 @@ SELECT count(*) FROM "test" WHERE i >= 400000 AND length(version()) > 0;
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Index Only Scan using _hyper_1_2_chunk_test_i_idx on _hyper_1_2_chunk
                                        Index Cond: (i < 600000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
                                        Filter: (i < 600000)
-(17 rows)
+(16 rows)
 
 SELECT count(*) FROM "test" WHERE i < 600000 AND length(version()) > 0;
  count 
@@ -613,19 +603,18 @@ SET max_parallel_workers_per_gather TO 1;
  Finalize Aggregate
    ->  Gather
          Workers Planned: 1
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
-(15 rows)
+(14 rows)
 
 SELECT count(*) FROM "test" WHERE length(version()) > 0;
  count  
@@ -669,21 +658,20 @@ ALTER TABLE :CHUNK2 SET (parallel_workers=2);
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Index Only Scan using _hyper_1_1_chunk_test_i_idx on _hyper_1_1_chunk
                                        Index Cond: (i > 400000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
                                        Filter: (i > 400000)
-(17 rows)
+(16 rows)
 
 ALTER TABLE :CHUNK1 SET (parallel_workers=2);
 ALTER TABLE :CHUNK2 SET (parallel_workers=0);
@@ -693,21 +681,20 @@ ALTER TABLE :CHUNK2 SET (parallel_workers=0);
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Index Only Scan using _hyper_1_2_chunk_test_i_idx on _hyper_1_2_chunk
                                        Index Cond: (i < 600000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
                                        Filter: (i < 600000)
-(17 rows)
+(16 rows)
 
 ALTER TABLE :CHUNK1 RESET (parallel_workers);
 ALTER TABLE :CHUNK2 RESET (parallel_workers);
diff --git a/test/expected/partitionwise-14.out b/test/expected/partitionwise-14.out
index ea178ed5813..7baadd6c26b 100644
--- a/test/expected/partitionwise-14.out
+++ b/test/expected/partitionwise-14.out
@@ -339,36 +339,24 @@ SELECT device, avg(temp)
 FROM hyper
 GROUP BY 1
 ORDER BY 1;
-                                       QUERY PLAN                                        
------------------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Sort
    Output: _hyper_1_1_chunk.device, (avg(_hyper_1_1_chunk.temp))
    Sort Key: _hyper_1_1_chunk.device
-   ->  Finalize HashAggregate
+   ->  HashAggregate
          Output: _hyper_1_1_chunk.device, avg(_hyper_1_1_chunk.temp)
          Group Key: _hyper_1_1_chunk.device
          ->  Append
-               ->  Partial HashAggregate
-                     Output: _hyper_1_1_chunk.device, PARTIAL avg(_hyper_1_1_chunk.temp)
-                     Group Key: _hyper_1_1_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_1_chunk
-                           Output: _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
-               ->  Partial HashAggregate
-                     Output: _hyper_1_2_chunk.device, PARTIAL avg(_hyper_1_2_chunk.temp)
-                     Group Key: _hyper_1_2_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_2_chunk
-                           Output: _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
-               ->  Partial HashAggregate
-                     Output: _hyper_1_3_chunk.device, PARTIAL avg(_hyper_1_3_chunk.temp)
-                     Group Key: _hyper_1_3_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_3_chunk
-                           Output: _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
-               ->  Partial HashAggregate
-                     Output: _hyper_1_4_chunk.device, PARTIAL avg(_hyper_1_4_chunk.temp)
-                     Group Key: _hyper_1_4_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_4_chunk
-                           Output: _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
-(27 rows)
+               ->  Seq Scan on _timescaledb_internal._hyper_1_1_chunk
+                     Output: _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_2_chunk
+                     Output: _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_3_chunk
+                     Output: _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_4_chunk
+                     Output: _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
+(15 rows)
 
 -- All partition keys covered (full partitionwise)
 SET timescaledb.enable_chunkwise_aggregation = 'off';
@@ -402,56 +390,24 @@ SELECT time, device, avg(temp)
 FROM hyper
 GROUP BY 1, 2
 ORDER BY 1, 2;
-                                                               QUERY PLAN                                                                
------------------------------------------------------------------------------------------------------------------------------------------
- Finalize GroupAggregate
-   Output: hyper."time", hyper.device, avg(hyper.temp)
-   Group Key: hyper."time", hyper.device
-   ->  Sort
-         Output: hyper."time", hyper.device, (PARTIAL avg(hyper.temp))
-         Sort Key: hyper."time", hyper.device
-         ->  Custom Scan (ChunkAppend) on public.hyper
-               Output: hyper."time", hyper.device, (PARTIAL avg(hyper.temp))
-               Order: hyper."time"
-               Startup Exclusion: false
-               Runtime Exclusion: false
-               ->  Merge Append
-                     Sort Key: _hyper_1_1_chunk."time"
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, PARTIAL avg(_hyper_1_1_chunk.temp)
-                           Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
-                                 Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_1_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_1_chunk
-                                       Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, PARTIAL avg(_hyper_1_2_chunk.temp)
-                           Group Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
-                                 Sort Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_2_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_2_chunk
-                                       Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
-               ->  Merge Append
-                     Sort Key: _hyper_1_3_chunk."time"
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, PARTIAL avg(_hyper_1_3_chunk.temp)
-                           Group Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
-                                 Sort Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_3_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_3_chunk
-                                       Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, PARTIAL avg(_hyper_1_4_chunk.temp)
-                           Group Key: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
-                                 Sort Key: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_4_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_4_chunk
-                                       Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
-(47 rows)
+                                             QUERY PLAN                                              
+-----------------------------------------------------------------------------------------------------
+ Sort
+   Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, (avg(_hyper_1_1_chunk.temp))
+   Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
+   ->  HashAggregate
+         Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, avg(_hyper_1_1_chunk.temp)
+         Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
+         ->  Append
+               ->  Seq Scan on _timescaledb_internal._hyper_1_1_chunk
+                     Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_2_chunk
+                     Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_3_chunk
+                     Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_4_chunk
+                     Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
+(15 rows)
 
 -- Partial aggregation since date_trunc(time) is not a partition key
 SET enable_partitionwise_aggregate = 'off';
@@ -800,28 +756,22 @@ FROM hyper_timepart
 GROUP BY 1, 2
 ORDER BY 1, 2
 LIMIT 10;
-                                                       QUERY PLAN                                                       
-------------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Limit
    Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, (avg(_hyper_3_7_chunk.temp))
    ->  Sort
          Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, (avg(_hyper_3_7_chunk.temp))
          Sort Key: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device
-         ->  Finalize HashAggregate
+         ->  HashAggregate
                Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, avg(_hyper_3_7_chunk.temp)
                Group Key: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device
                ->  Append
-                     ->  Partial HashAggregate
-                           Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, PARTIAL avg(_hyper_3_7_chunk.temp)
-                           Group Key: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device
-                           ->  Seq Scan on _timescaledb_internal._hyper_3_7_chunk
-                                 Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, _hyper_3_7_chunk.temp
-                     ->  Partial HashAggregate
-                           Output: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device, PARTIAL avg(_hyper_3_8_chunk.temp)
-                           Group Key: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device
-                           ->  Seq Scan on _timescaledb_internal._hyper_3_8_chunk
-                                 Output: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device, _hyper_3_8_chunk.temp
-(19 rows)
+                     ->  Seq Scan on _timescaledb_internal._hyper_3_7_chunk
+                           Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, _hyper_3_7_chunk.temp
+                     ->  Seq Scan on _timescaledb_internal._hyper_3_8_chunk
+                           Output: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device, _hyper_3_8_chunk.temp
+(13 rows)
 
 -- Applying the time partitioning function should also allow push-down
 -- on open dimensions

From d2f9f903d20e75b7a6c6948d418f788c6037b2d3 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Sun, 28 Jul 2024 11:37:43 +0200
Subject: [PATCH 07/12] windows

---
 tsl/src/planner.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tsl/src/planner.h b/tsl/src/planner.h
index df1243e8a1d..f8d33a3f5a1 100644
--- a/tsl/src/planner.h
+++ b/tsl/src/planner.h
@@ -19,5 +19,6 @@ void tsl_set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTb
 void tsl_preprocess_query(Query *parse);
 void tsl_postprocess_plan(PlannedStmt *stmt);
 
-void TSDLLEXPORT ts_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht, RelOptInfo *input_rel,
-										 RelOptInfo *output_rel, void *extra);
+extern TSDLLEXPORT void ts_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht,
+												RelOptInfo *input_rel, RelOptInfo *output_rel,
+												void *extra);

From 3f66925096983bb457bff46befbaeec7f57dcf9b Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Sun, 28 Jul 2024 11:38:32 +0200
Subject: [PATCH 08/12] reference REL_17_BETA2-151-g821fbd63ea append-*
 parallel-* partitionwise-*

---
 test/expected/append-17.out        | 124 +++++++++++-----------------
 test/expected/parallel-17.out      | 127 +++++++++++++----------------
 test/expected/partitionwise-17.out | 126 +++++++++-------------------
 3 files changed, 143 insertions(+), 234 deletions(-)

diff --git a/test/expected/append-17.out b/test/expected/append-17.out
index f06784a66dc..c98ab0765a1 100644
--- a/test/expected/append-17.out
+++ b/test/expected/append-17.out
@@ -2034,95 +2034,67 @@ ORDER BY time DESC, device_id;
 
 -- aggregates should prevent pushdown
 :PREFIX SELECT count(*) FROM metrics_timestamptz LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
-         ->  Append (actual rows=5 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
-(13 rows)
+   ->  Aggregate (actual rows=1 loops=1)
+         ->  Append (actual rows=26787 loops=1)
+               ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
+(8 rows)
 
 :PREFIX SELECT count(*) FROM metrics_space LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
-         ->  Append (actual rows=9 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
-(21 rows)
+   ->  Aggregate (actual rows=1 loops=1)
+         ->  Append (actual rows=37450 loops=1)
+               ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
+               ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
+(12 rows)
 
 -- HAVING should prevent pushdown
 :PREFIX SELECT 1 FROM metrics_timestamptz HAVING count(*) > 1 LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
+   ->  Aggregate (actual rows=1 loops=1)
          Filter: (count(*) > 1)
-         ->  Append (actual rows=5 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
-(14 rows)
+         ->  Append (actual rows=26787 loops=1)
+               ->  Seq Scan on _hyper_5_17_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_5_18_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_19_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_20_chunk (actual rows=6048 loops=1)
+               ->  Seq Scan on _hyper_5_21_chunk (actual rows=4611 loops=1)
+(9 rows)
 
 :PREFIX SELECT 1 FROM metrics_space HAVING count(*) > 1 LIMIT 1;
-                                    QUERY PLAN                                    
-----------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Limit (actual rows=1 loops=1)
-   ->  Finalize Aggregate (actual rows=1 loops=1)
+   ->  Aggregate (actual rows=1 loops=1)
          Filter: (count(*) > 1)
-         ->  Append (actual rows=9 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
-               ->  Partial Aggregate (actual rows=1 loops=1)
-                     ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
-(22 rows)
+         ->  Append (actual rows=37450 loops=1)
+               ->  Seq Scan on _hyper_6_22_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_23_chunk (actual rows=5376 loops=1)
+               ->  Seq Scan on _hyper_6_24_chunk (actual rows=2688 loops=1)
+               ->  Seq Scan on _hyper_6_25_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_26_chunk (actual rows=8064 loops=1)
+               ->  Seq Scan on _hyper_6_27_chunk (actual rows=4032 loops=1)
+               ->  Seq Scan on _hyper_6_28_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_29_chunk (actual rows=1540 loops=1)
+               ->  Seq Scan on _hyper_6_30_chunk (actual rows=770 loops=1)
+(13 rows)
 
 -- DISTINCT should prevent pushdown
 SET enable_hashagg TO false;
diff --git a/test/expected/parallel-17.out b/test/expected/parallel-17.out
index 01d899fe9f2..1e1f73815a9 100644
--- a/test/expected/parallel-17.out
+++ b/test/expected/parallel-17.out
@@ -35,12 +35,11 @@ EXPLAIN (costs off) SELECT first(i, j) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT first(i, j) FROM "test";
  first 
@@ -54,12 +53,11 @@ EXPLAIN (costs off) SELECT last(i, j) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT last(i, j) FROM "test";
   last  
@@ -139,12 +137,11 @@ EXPLAIN (costs off) SELECT histogram(i, 1, 1000000, 2) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 1, 1000000, 2) FROM "test";
      histogram     
@@ -158,12 +155,11 @@ EXPLAIN (costs off) SELECT histogram(i, 1,1000001,10) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 1, 1000001, 10) FROM "test";
                             histogram                             
@@ -177,12 +173,11 @@ EXPLAIN (costs off) SELECT histogram(i, 0,100000,5) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 0, 100000, 5) FROM "test";
              histogram              
@@ -196,12 +191,11 @@ EXPLAIN (costs off) SELECT histogram(i, 10,100000,5) FROM "test";
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
-(8 rows)
+(7 rows)
 
 SELECT histogram(i, 10, 100000, 5) FROM "test";
              histogram              
@@ -215,14 +209,13 @@ EXPLAIN (costs off) SELECT histogram(NULL, 10,100000,5) FROM "test" WHERE  i = c
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Parallel Append
-               ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Parallel Append
                      ->  Parallel Seq Scan on _hyper_1_1_chunk
                            Filter: ((i)::double precision = '-1'::double precision)
-               ->  Partial Aggregate
                      ->  Parallel Seq Scan on _hyper_1_2_chunk
                            Filter: ((i)::double precision = '-1'::double precision)
-(10 rows)
+(9 rows)
 
 SELECT histogram(NULL, 10,100000,5) FROM "test" WHERE  i = coalesce(-1,j);
  histogram 
@@ -255,21 +248,20 @@ SELECT histogram(NULL, 10,100000,5) FROM "test" WHERE  i = coalesce(-1,j);
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
                                        Filter: (i > 1)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
                                        Filter: (i > 1)
-(17 rows)
+(16 rows)
 
 SELECT count(*) FROM "test" WHERE i > 1 AND length(version()) > 0;
  count 
@@ -555,21 +547,20 @@ SET max_parallel_workers_per_gather TO 2;
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Index Only Scan using _hyper_1_1_chunk_test_i_idx on _hyper_1_1_chunk
                                        Index Cond: (i >= 400000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
                                        Filter: (i >= 400000)
-(17 rows)
+(16 rows)
 
 SELECT count(*) FROM "test" WHERE i >= 400000 AND length(version()) > 0;
  count 
@@ -585,21 +576,20 @@ SELECT count(*) FROM "test" WHERE i >= 400000 AND length(version()) > 0;
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Index Only Scan using _hyper_1_2_chunk_test_i_idx on _hyper_1_2_chunk
                                        Index Cond: (i < 600000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
                                        Filter: (i < 600000)
-(17 rows)
+(16 rows)
 
 SELECT count(*) FROM "test" WHERE i < 600000 AND length(version()) > 0;
  count 
@@ -615,19 +605,18 @@ SET max_parallel_workers_per_gather TO 1;
  Finalize Aggregate
    ->  Gather
          Workers Planned: 1
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
-(15 rows)
+(14 rows)
 
 SELECT count(*) FROM "test" WHERE length(version()) > 0;
  count  
@@ -671,21 +660,20 @@ ALTER TABLE :CHUNK2 SET (parallel_workers=2);
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Index Only Scan using _hyper_1_1_chunk_test_i_idx on _hyper_1_1_chunk
                                        Index Cond: (i > 400000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_2_chunk
                                        Filter: (i > 400000)
-(17 rows)
+(16 rows)
 
 ALTER TABLE :CHUNK1 SET (parallel_workers=2);
 ALTER TABLE :CHUNK2 SET (parallel_workers=0);
@@ -695,21 +683,20 @@ ALTER TABLE :CHUNK2 SET (parallel_workers=0);
  Finalize Aggregate
    ->  Gather
          Workers Planned: 2
-         ->  Result
-               One-Time Filter: (length(version()) > 0)
-               ->  Parallel Custom Scan (ChunkAppend) on test
-                     Chunks excluded during startup: 0
-                     ->  Partial Aggregate
+         ->  Partial Aggregate
+               ->  Result
+                     One-Time Filter: (length(version()) > 0)
+                     ->  Parallel Custom Scan (ChunkAppend) on test
+                           Chunks excluded during startup: 0
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Index Only Scan using _hyper_1_2_chunk_test_i_idx on _hyper_1_2_chunk
                                        Index Cond: (i < 600000)
-                     ->  Partial Aggregate
                            ->  Result
                                  One-Time Filter: (length(version()) > 0)
                                  ->  Parallel Seq Scan on _hyper_1_1_chunk
                                        Filter: (i < 600000)
-(17 rows)
+(16 rows)
 
 ALTER TABLE :CHUNK1 RESET (parallel_workers);
 ALTER TABLE :CHUNK2 RESET (parallel_workers);
diff --git a/test/expected/partitionwise-17.out b/test/expected/partitionwise-17.out
index 831f84d0e79..d1ad799cbf7 100644
--- a/test/expected/partitionwise-17.out
+++ b/test/expected/partitionwise-17.out
@@ -339,36 +339,24 @@ SELECT device, avg(temp)
 FROM hyper
 GROUP BY 1
 ORDER BY 1;
-                                       QUERY PLAN                                        
------------------------------------------------------------------------------------------
+                                 QUERY PLAN                                 
+----------------------------------------------------------------------------
  Sort
    Output: _hyper_1_1_chunk.device, (avg(_hyper_1_1_chunk.temp))
    Sort Key: _hyper_1_1_chunk.device
-   ->  Finalize HashAggregate
+   ->  HashAggregate
          Output: _hyper_1_1_chunk.device, avg(_hyper_1_1_chunk.temp)
          Group Key: _hyper_1_1_chunk.device
          ->  Append
-               ->  Partial HashAggregate
-                     Output: _hyper_1_1_chunk.device, PARTIAL avg(_hyper_1_1_chunk.temp)
-                     Group Key: _hyper_1_1_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_1_chunk
-                           Output: _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
-               ->  Partial HashAggregate
-                     Output: _hyper_1_2_chunk.device, PARTIAL avg(_hyper_1_2_chunk.temp)
-                     Group Key: _hyper_1_2_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_2_chunk
-                           Output: _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
-               ->  Partial HashAggregate
-                     Output: _hyper_1_3_chunk.device, PARTIAL avg(_hyper_1_3_chunk.temp)
-                     Group Key: _hyper_1_3_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_3_chunk
-                           Output: _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
-               ->  Partial HashAggregate
-                     Output: _hyper_1_4_chunk.device, PARTIAL avg(_hyper_1_4_chunk.temp)
-                     Group Key: _hyper_1_4_chunk.device
-                     ->  Seq Scan on _timescaledb_internal._hyper_1_4_chunk
-                           Output: _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
-(27 rows)
+               ->  Seq Scan on _timescaledb_internal._hyper_1_1_chunk
+                     Output: _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_2_chunk
+                     Output: _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_3_chunk
+                     Output: _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_4_chunk
+                     Output: _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
+(15 rows)
 
 -- All partition keys covered (full partitionwise)
 SET timescaledb.enable_chunkwise_aggregation = 'off';
@@ -402,56 +390,24 @@ SELECT time, device, avg(temp)
 FROM hyper
 GROUP BY 1, 2
 ORDER BY 1, 2;
-                                                               QUERY PLAN                                                                
------------------------------------------------------------------------------------------------------------------------------------------
- Finalize GroupAggregate
-   Output: hyper."time", hyper.device, avg(hyper.temp)
-   Group Key: hyper."time", hyper.device
-   ->  Sort
-         Output: hyper."time", hyper.device, (PARTIAL avg(hyper.temp))
-         Sort Key: hyper."time", hyper.device
-         ->  Custom Scan (ChunkAppend) on public.hyper
-               Output: hyper."time", hyper.device, (PARTIAL avg(hyper.temp))
-               Order: hyper."time"
-               Startup Exclusion: false
-               Runtime Exclusion: false
-               ->  Merge Append
-                     Sort Key: _hyper_1_1_chunk."time"
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, PARTIAL avg(_hyper_1_1_chunk.temp)
-                           Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
-                                 Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_1_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_1_chunk
-                                       Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, PARTIAL avg(_hyper_1_2_chunk.temp)
-                           Group Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
-                                 Sort Key: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_2_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_2_chunk
-                                       Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
-               ->  Merge Append
-                     Sort Key: _hyper_1_3_chunk."time"
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, PARTIAL avg(_hyper_1_3_chunk.temp)
-                           Group Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
-                                 Sort Key: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_3_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_3_chunk
-                                       Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
-                     ->  Partial GroupAggregate
-                           Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, PARTIAL avg(_hyper_1_4_chunk.temp)
-                           Group Key: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device
-                           ->  Sort
-                                 Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
-                                 Sort Key: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device
-                                 ->  Index Scan Backward using _hyper_1_4_chunk_hyper_time_idx on _timescaledb_internal._hyper_1_4_chunk
-                                       Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
-(47 rows)
+                                             QUERY PLAN                                              
+-----------------------------------------------------------------------------------------------------
+ Sort
+   Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, (avg(_hyper_1_1_chunk.temp))
+   Sort Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
+   ->  HashAggregate
+         Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, avg(_hyper_1_1_chunk.temp)
+         Group Key: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device
+         ->  Append
+               ->  Seq Scan on _timescaledb_internal._hyper_1_1_chunk
+                     Output: _hyper_1_1_chunk."time", _hyper_1_1_chunk.device, _hyper_1_1_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_2_chunk
+                     Output: _hyper_1_2_chunk."time", _hyper_1_2_chunk.device, _hyper_1_2_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_3_chunk
+                     Output: _hyper_1_3_chunk."time", _hyper_1_3_chunk.device, _hyper_1_3_chunk.temp
+               ->  Seq Scan on _timescaledb_internal._hyper_1_4_chunk
+                     Output: _hyper_1_4_chunk."time", _hyper_1_4_chunk.device, _hyper_1_4_chunk.temp
+(15 rows)
 
 -- Partial aggregation since date_trunc(time) is not a partition key
 SET enable_partitionwise_aggregate = 'off';
@@ -800,28 +756,22 @@ FROM hyper_timepart
 GROUP BY 1, 2
 ORDER BY 1, 2
 LIMIT 10;
-                                                       QUERY PLAN                                                       
-------------------------------------------------------------------------------------------------------------------------
+                                                QUERY PLAN                                                 
+-----------------------------------------------------------------------------------------------------------
  Limit
    Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, (avg(_hyper_3_7_chunk.temp))
    ->  Sort
          Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, (avg(_hyper_3_7_chunk.temp))
          Sort Key: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device
-         ->  Finalize HashAggregate
+         ->  HashAggregate
                Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, avg(_hyper_3_7_chunk.temp)
                Group Key: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device
                ->  Append
-                     ->  Partial HashAggregate
-                           Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, PARTIAL avg(_hyper_3_7_chunk.temp)
-                           Group Key: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device
-                           ->  Seq Scan on _timescaledb_internal._hyper_3_7_chunk
-                                 Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, _hyper_3_7_chunk.temp
-                     ->  Partial HashAggregate
-                           Output: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device, PARTIAL avg(_hyper_3_8_chunk.temp)
-                           Group Key: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device
-                           ->  Seq Scan on _timescaledb_internal._hyper_3_8_chunk
-                                 Output: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device, _hyper_3_8_chunk.temp
-(19 rows)
+                     ->  Seq Scan on _timescaledb_internal._hyper_3_7_chunk
+                           Output: _hyper_3_7_chunk."time", _hyper_3_7_chunk.device, _hyper_3_7_chunk.temp
+                     ->  Seq Scan on _timescaledb_internal._hyper_3_8_chunk
+                           Output: _hyper_3_8_chunk."time", _hyper_3_8_chunk.device, _hyper_3_8_chunk.temp
+(13 rows)
 
 -- Applying the time partitioning function should also allow push-down
 -- on open dimensions

From e67c05054032b0a7523b0f2f930bf8a8bfded17c Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Sun, 28 Jul 2024 17:01:44 +0200
Subject: [PATCH 09/12] fix

---
 tsl/src/planner.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tsl/src/planner.c b/tsl/src/planner.c
index 84128235556..06e1c2d1a1b 100644
--- a/tsl/src/planner.c
+++ b/tsl/src/planner.c
@@ -44,14 +44,15 @@ is_osm_present()
 }
 
 static bool
-involves_hypertable(PlannerInfo *root, RelOptInfo *rel)
+involves_hypertable(PlannerInfo *root, RelOptInfo *parent)
 {
-	for (int relid = bms_next_member(rel->relids, -1); relid > 0;
-		 relid = bms_next_member(rel->relids, relid))
+	my_print(root);
+	for (int relid = bms_next_member(parent->relids, -1); relid > 0;
+		 relid = bms_next_member(parent->relids, relid))
 	{
 		Hypertable *ht;
-		RelOptInfo *rel = root->simple_rel_array[relid];
-		if (ts_classify_relation(root, rel, &ht) == TS_REL_HYPERTABLE)
+		RelOptInfo *child = root->simple_rel_array[relid];
+		if (child != NULL && ts_classify_relation(root, child, &ht) == TS_REL_HYPERTABLE)
 		{
 			return true;
 		}

From fe0457c20e6e8189ffa86fa919714a1dc3f58239 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Mon, 29 Jul 2024 12:24:58 +0200
Subject: [PATCH 10/12] fix

---
 tsl/src/planner.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tsl/src/planner.c b/tsl/src/planner.c
index 06e1c2d1a1b..41f1273e60a 100644
--- a/tsl/src/planner.c
+++ b/tsl/src/planner.c
@@ -46,12 +46,15 @@ is_osm_present()
 static bool
 involves_hypertable(PlannerInfo *root, RelOptInfo *parent)
 {
-	my_print(root);
 	for (int relid = bms_next_member(parent->relids, -1); relid > 0;
 		 relid = bms_next_member(parent->relids, relid))
 	{
 		Hypertable *ht;
 		RelOptInfo *child = root->simple_rel_array[relid];
+		/*
+		 * RelOptInfo can be null here for join RTEs on PG >= 16. This doesn't
+		 * matter because we'll have all the baserels in relids bitmap as well.
+		 */
 		if (child != NULL && ts_classify_relation(root, child, &ht) == TS_REL_HYPERTABLE)
 		{
 			return true;

From 10645230e3268f0bd0fb21a53181917ae09fab1b Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Mon, 29 Jul 2024 12:43:16 +0200
Subject: [PATCH 11/12] inconsistent dll linkage

---
 tsl/src/chunkwise_agg.c |  2 ++
 tsl/src/chunkwise_agg.h | 17 +++++++++++++++++
 tsl/src/planner.c       |  2 ++
 tsl/src/planner.h       |  4 ----
 4 files changed, 21 insertions(+), 4 deletions(-)
 create mode 100644 tsl/src/chunkwise_agg.h

diff --git a/tsl/src/chunkwise_agg.c b/tsl/src/chunkwise_agg.c
index 737e59c8192..f5dae8e7b66 100644
--- a/tsl/src/chunkwise_agg.c
+++ b/tsl/src/chunkwise_agg.c
@@ -12,6 +12,8 @@
 #include <optimizer/prep.h>
 #include <optimizer/tlist.h>
 
+#include "chunkwise_agg.h"
+
 #include "gapfill.h"
 #include "guc.h"
 #include "import/planner.h"
diff --git a/tsl/src/chunkwise_agg.h b/tsl/src/chunkwise_agg.h
new file mode 100644
index 00000000000..2e9df7f0b6a
--- /dev/null
+++ b/tsl/src/chunkwise_agg.h
@@ -0,0 +1,17 @@
+/*
+ * This file and its contents are licensed under the Timescale License.
+ * Please see the included NOTICE for copyright information and
+ * LICENSE-TIMESCALE for a copy of the license.
+ */
+#pragma once
+
+#include <postgres.h>
+
+#include <nodes/pathnodes.h>
+
+#include "export.h"
+#include "hypertable.h"
+
+extern TSDLLEXPORT void ts_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht,
+												RelOptInfo *input_rel, RelOptInfo *output_rel,
+												void *extra);
diff --git a/tsl/src/planner.c b/tsl/src/planner.c
index 41f1273e60a..b6292533acc 100644
--- a/tsl/src/planner.c
+++ b/tsl/src/planner.c
@@ -13,7 +13,9 @@
 #include <parser/parsetree.h>
 
 #include "compat/compat.h"
+
 #include "chunk.h"
+#include "chunkwise_agg.h"
 #include "continuous_aggs/planner.h"
 #include "guc.h"
 #include "hypertable.h"
diff --git a/tsl/src/planner.h b/tsl/src/planner.h
index f8d33a3f5a1..f88d6c45fa6 100644
--- a/tsl/src/planner.h
+++ b/tsl/src/planner.h
@@ -18,7 +18,3 @@ void tsl_set_rel_pathlist_dml(PlannerInfo *, RelOptInfo *, Index, RangeTblEntry
 void tsl_set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte);
 void tsl_preprocess_query(Query *parse);
 void tsl_postprocess_plan(PlannedStmt *stmt);
-
-extern TSDLLEXPORT void ts_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht,
-												RelOptInfo *input_rel, RelOptInfo *output_rel,
-												void *extra);

From fe95431e7859d6993ad27a4e33c7ce60e3fa24ad Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Wed, 31 Jul 2024 12:43:21 +0200
Subject: [PATCH 12/12] inconsistent dll linkage....

---
 tsl/src/chunkwise_agg.c | 4 ++--
 tsl/src/chunkwise_agg.h | 5 ++---
 tsl/src/planner.c       | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/tsl/src/chunkwise_agg.c b/tsl/src/chunkwise_agg.c
index f5dae8e7b66..8395b97ab39 100644
--- a/tsl/src/chunkwise_agg.c
+++ b/tsl/src/chunkwise_agg.c
@@ -561,8 +561,8 @@ contains_path_plain_or_sorted_agg(Path *path)
  *        - Chunk N
  */
 void
-ts_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht, RelOptInfo *input_rel,
-						RelOptInfo *output_rel, void *extra)
+tsl_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht, RelOptInfo *input_rel,
+						 RelOptInfo *output_rel, void *extra)
 {
 	Query *parse = root->parse;
 
diff --git a/tsl/src/chunkwise_agg.h b/tsl/src/chunkwise_agg.h
index 2e9df7f0b6a..15eb42e8cfc 100644
--- a/tsl/src/chunkwise_agg.h
+++ b/tsl/src/chunkwise_agg.h
@@ -12,6 +12,5 @@
 #include "export.h"
 #include "hypertable.h"
 
-extern TSDLLEXPORT void ts_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht,
-												RelOptInfo *input_rel, RelOptInfo *output_rel,
-												void *extra);
+void tsl_pushdown_partial_agg(PlannerInfo *root, Hypertable *ht, RelOptInfo *input_rel,
+							  RelOptInfo *output_rel, void *extra);
diff --git a/tsl/src/planner.c b/tsl/src/planner.c
index b6292533acc..5ca747cbae8 100644
--- a/tsl/src/planner.c
+++ b/tsl/src/planner.c
@@ -82,7 +82,7 @@ tsl_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage, RelOptIn
 				!IS_DUMMY_REL(input_rel) && output_rel != NULL &&
 				involves_hypertable(root, input_rel))
 			{
-				ts_pushdown_partial_agg(root, ht, input_rel, output_rel, extra);
+				tsl_pushdown_partial_agg(root, ht, input_rel, output_rel, extra);
 			}
 			break;
 		case UPPERREL_WINDOW: