Skip to content

Commit

Permalink
Warning specifying future change in to_tf_dataset behaviour (huggingf…
Browse files Browse the repository at this point in the history
…ace#5742)

* Warning specifying future change in to_tf_dataset behaviour

* Only warn for single element lists
  • Loading branch information
amyeroberts committed Apr 21, 2023
1 parent f7265ca commit 649d5a3
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions src/datasets/arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,20 @@ def to_tf_dataset(
else:
raise ImportError("Called a Tensorflow-specific function but Tensorflow is not installed.")

if (isinstance(columns, list) and len(columns) == 1) or (
isinstance(label_cols, list) and len(label_cols) == 1
):
warnings.warn(
"The output of `to_tf_dataset` will change when a passing single element list for `labels` or "
"`columns` in the next datasets version. To return a tuple structure rather than dict, pass a "
"single string.\n"
"Old behaviour: columns=['a'], labels=['labels'] -> (tf.Tensor, tf.Tensor) \n"
" : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor) \n"
"New behaviour: columns=['a'],labels=['labels'] -> ({'a': tf.Tensor}, {'labels': tf.Tensor}) \n"
" : columns='a', labels='labels' -> (tf.Tensor, tf.Tensor) ",
FutureWarning,
)

if isinstance(tf.distribute.get_strategy(), tf.distribute.TPUStrategy):
logger.warning(
"Note that to_tf_dataset() loads the data with a generator rather than a full tf.data "
Expand Down

0 comments on commit 649d5a3

Please sign in to comment.