Skip to content

Commit

Permalink
Fix streaming for servers not supporting HTTP range requests (hugging…
Browse files Browse the repository at this point in the history
…face#3689)

* Download file locally when range request not supported

* Revert "Download file locally when range request not supported"

This reverts commit 9f90bcb.

* Raise custom error when range request not supported
  • Loading branch information
albertvillanova committed Feb 10, 2022
1 parent 36db39c commit fc67b30
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion src/datasets/utils/streaming_download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@
)


class NonStreamableDatasetError(Exception):
pass


def xjoin(a, *p):
"""
This function extends os.path.join to support the "::" hop separator. It supports both paths and urls.
Expand Down Expand Up @@ -383,7 +387,16 @@ def xopen(file: str, mode="r", *args, use_auth_token: Optional[Union[str, bool]]
else:
new_kwargs = {}
kwargs = {**kwargs, **new_kwargs}
file_obj = fsspec.open(file, mode=mode, *args, **kwargs).open()
try:
file_obj = fsspec.open(file, mode=mode, *args, **kwargs).open()
except ValueError as e:
if str(e) == "Cannot seek streaming HTTP file":
raise NonStreamableDatasetError(
"Streaming is not possible for this dataset because data host server doesn't support HTTP range "
"requests. You can still load this dataset in non-streaming mode by passing `streaming=False` (default)"
) from e
else:
raise
_add_retries_to_file_obj_read_method(file_obj)
return file_obj

Expand Down

0 comments on commit fc67b30

Please sign in to comment.