Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT: Include only referenced data columns in chart specs #2586

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion altair/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@


class DataTransformerRegistry(PluginRegistry[DataTransformerType]):
_global_settings = {"consolidate_datasets": True}
_global_settings = {
"consolidate_datasets": True,
}

@property
def consolidate_datasets(self):
Expand All @@ -31,6 +33,40 @@ def consolidate_datasets(self):
def consolidate_datasets(self, value):
self._global_settings["consolidate_datasets"] = value

def compress_datasets(self, enable=True, transformed_charts=False):
"""Whether to remove data fields that do not occurr elsewhere in the chart spec

The approach taken here is conservative as it will
leave some of the data fields that should be removed because they
have a common or short name that happens to occur elsewhere in the
spec by coincidence.

Parameters
----------

enable : bool
Whether compression is enabled.

transformed_charts : bool
Whether to enable compression for transformed charts.
Data compression can break charts with transforms where data
fields are referenced without the field name occurring
literally in the chart spec.
"""
if transformed_charts:
warnings.warn(
"Data compression can break charts with transforms where data"
" fields are referenced without the field name occurring"
" literally in the chart spec. Please revert this option if you"
" are observing unexpected behavior with transformed charts."
)
self._compress_datasets = enable
self._compress_transformed_charts = transformed_charts
return

_compress_datasets = True
_compress_transformed_charts = False


# ==============================================================================
# Data model transformers
Expand Down
36 changes: 36 additions & 0 deletions altair/vegalite/v5/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,34 @@ def _prepare_data(data, context=None):
return data


def _compress_datasets(dct):
"""Remove data fields that do not occurr elsewhere in the chart spec

The approach taken here is conservative as it will
leave some of the data fields that should be removed because they
have a common or short name that happens to occur elsewhere in the
spec by coincidence.
It might incorreclty remove fields when advanced transforms are used.
Removing data fields properly is difficult as mentioned here
https://github.com/altair-viz/altair/issues/2428#issuecomment-798917267.

Parameters
----------
dct : dict
The input chart schema
"""
spec_as_string = [
val for key, val in dct.items() if key not in ["data", "datasets", "$schema"]
].__str__()
for dataset_name in dct["datasets"]:
for dict_row in dct["datasets"][dataset_name]:
# Must iterate over a list since the dict will change size
for field in list(dict_row.keys()):
if field not in spec_as_string:
del dict_row[field]
return dct


# ------------------------------------------------------------------------
# Aliases & specializations
Bin = core.BinParams
Expand Down Expand Up @@ -568,6 +596,14 @@ def to_dict(self, *args, **kwargs):
if context["datasets"]:
dct.setdefault("datasets", {}).update(context["datasets"])

# Remove data fields that do not occurr elsewhere in the chart spec
if data_transformers._compress_datasets:
if (
not data_transformers._compress_transformed_charts
and "transform" not in dct.keys()
) or data_transformers._compress_transformed_charts:
if "datasets" in dct:
dct = _compress_datasets(dct)
return dct

def to_html(
Expand Down