From abb74e53ae26578fbb2954dd59fd05eae4f35083 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Tue, 7 Sep 2021 14:50:54 -0700 Subject: [PATCH 1/3] Add no_sign_request for S3Client --- HISTORY.md | 4 ++++ cloudpathlib/s3/s3client.py | 45 +++++++++++++++++++++++++---------- tests/mock_clients/mock_s3.py | 4 ++-- tests/test_s3_specific.py | 22 +++++++++++++++++ 4 files changed, 61 insertions(+), 14 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 880c5f9d..59828930 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,9 @@ # cloudpathlib Changelog +## v0.5.1 (unreleased) + + - Fix #38 to allow passing `no_sign_request` to `S3Client` so that we can do anonymous requests for public resources on S3. + ## v0.5.0 (2021-08-31) - Added `boto3_transfer_config` parameter to `S3Client` instantiation, which allows passing a `boto3.s3.transfer.TransferConfig` object and is useful for controlling multipart and thread use in uploads and downloads. See [documentation](https://cloudpathlib.drivendata.org/api-reference/s3client/#cloudpathlib.s3.s3client.S3Client.__init__) for more details. ([#150](https://github.com/drivendataorg/cloudpathlib/pull/150)) diff --git a/cloudpathlib/s3/s3client.py b/cloudpathlib/s3/s3client.py index 6c00882d..d8f75324 100644 --- a/cloudpathlib/s3/s3client.py +++ b/cloudpathlib/s3/s3client.py @@ -8,8 +8,9 @@ try: from boto3.session import Session - import botocore.session from boto3.s3.transfer import TransferConfig + from botocore.config import Config + import botocore.session except ModuleNotFoundError: implementation_registry["s3"].dependencies_loaded = False @@ -25,6 +26,7 @@ def __init__( aws_access_key_id: Optional[str] = None, aws_secret_access_key: Optional[str] = None, aws_session_token: Optional[str] = None, + no_sign_request: Optional[bool] = False, botocore_session: Optional["botocore.session.Session"] = None, profile_name: Optional[str] = None, boto3_session: Optional["Session"] = None, @@ -46,6 +48,9 @@ def __init__( aws_secret_access_key (Optional[str]): AWS secret access key. aws_session_token (Optional[str]): Session key for your AWS account. This is only needed when you are using temporarycredentials. + no_sign_request: (Optional[bool]): If `True`, credentials are not looked for and we use unsigned + requests to fetch resources. This will only allow access to public resources. This is equivalent + to `--no-sign-request` in the AWS CLI (https://docs.aws.amazon.com/cli/latest/reference/). botocore_session (Optional[botocore.session.Session]): An already instantiated botocore Session. profile_name (Optional[str]): Profile name of a profile in a shared credentials file. @@ -57,18 +62,34 @@ def __init__( boto3_transfer_config (Optional[dict]): Instantiated TransferConfig for managing s3 transfers. (https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.TransferConfig) """ - if boto3_session is not None: - self.sess = boto3_session - else: - self.sess = Session( - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_session_token=aws_session_token, - botocore_session=botocore_session, - profile_name=profile_name, + + if no_sign_request: + self.sess = Session() + self.s3 = self.sess.resource( + "s3", + endpoint_url=endpoint_url, + config=Config(signature_version=botocore.session.UNSIGNED), + ) + self.client = self.sess.client( + "s3", + endpoint_url=endpoint_url, + config=Config(signature_version=botocore.session.UNSIGNED), ) - self.s3 = self.sess.resource("s3", endpoint_url=endpoint_url) - self.client = self.sess.client("s3", endpoint_url=endpoint_url) + else: + if boto3_session is not None: + self.sess = boto3_session + else: + self.sess = Session( + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_session_token=aws_session_token, + botocore_session=botocore_session, + profile_name=profile_name, + ) + + self.s3 = self.sess.resource("s3", endpoint_url=endpoint_url) + self.client = self.sess.client("s3", endpoint_url=endpoint_url) + self.boto3_transfer_config = boto3_transfer_config super().__init__(local_cache_dir=local_cache_dir) diff --git a/tests/mock_clients/mock_s3.py b/tests/mock_clients/mock_s3.py index f968ea9f..58baebf2 100644 --- a/tests/mock_clients/mock_s3.py +++ b/tests/mock_clients/mock_s3.py @@ -29,10 +29,10 @@ def __init__(self, *args, **kwargs): def __del__(self): self.tmp.cleanup() - def resource(self, item, endpoint_url): + def resource(self, item, endpoint_url, config=None): return MockBoto3Resource(self.tmp_path) - def client(self, item, endpoint_url): + def client(self, item, endpoint_url, config=None): return MockBoto3Client(self.tmp_path) return MockBoto3Session diff --git a/tests/test_s3_specific.py b/tests/test_s3_specific.py index 6c137035..cd2c5140 100644 --- a/tests/test_s3_specific.py +++ b/tests/test_s3_specific.py @@ -3,6 +3,7 @@ import pytest from boto3.s3.transfer import TransferConfig +import botocore from cloudpathlib import S3Path from cloudpathlib.local import LocalS3Path import psutil @@ -130,3 +131,24 @@ def _execute_on_subprocess_and_observe(use_threads): # usually ~15 threads are spun up whe use_threads is True assert _execute_on_subprocess_and_observe(use_threads=True) > 10 + + +def test_no_sign_request(s3_rig, tmp_path): + """Tests that boto3 receives and respects the transfer config + when working with a live backend. Does this by observing + if the `use_threads` parameter changes to number of threads + used by a child process that does a download. + """ + if s3_rig.live_server: + client = s3_rig.client_class(no_sign_request=True) + + # unsigned can access public path (part of AWS open data) + p = client.CloudPath( + "s3://ladi/Images/FEMA_CAP/2020/70349/DSC_0001_5a63d42e-27c6-448a-84f1-bfc632125b8e.jpg" + ) + assert p.exists() + + # unsigned raises for private S3 file that exists + p = client.CloudPath(f"s3://{s3_rig.drive}/dir_0/file0_to_download.txt") + with pytest.raises(botocore.exceptions.ClientError): + p.exists() From 51a9204a955b0b2b25985096d850682d4b3a75c7 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Tue, 7 Sep 2021 15:59:53 -0700 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Jay Qi <2721979+jayqi@users.noreply.github.com> --- HISTORY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 59828930..f06d0f7b 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,7 +2,7 @@ ## v0.5.1 (unreleased) - - Fix #38 to allow passing `no_sign_request` to `S3Client` so that we can do anonymous requests for public resources on S3. + - Added `no_sign_request` parameter to `S3Client` instantiation for anonymous requests for public resources on S3. See [documentation](https://cloudpathlib.drivendata.org/stable/api-reference/s3client/#cloudpathlib.s3.s3client.S3Client.__init__) for more details. ([#164](https://github.com/drivendataorg/cloudpathlib/pull/164)) ## v0.5.0 (2021-08-31) From 4084423807ad35b4f966dca6826cfb6e238d6ec1 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Tue, 7 Sep 2021 16:52:35 -0700 Subject: [PATCH 3/3] Code review comments --- HISTORY.md | 4 ++-- cloudpathlib/s3/s3client.py | 22 ++++++++++------------ setup.py | 2 +- tests/test_s3_specific.py | 8 +++----- 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index f06d0f7b..8078249e 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,8 +1,8 @@ # cloudpathlib Changelog -## v0.5.1 (unreleased) +## v0.6.0 (2021-09-07) - - Added `no_sign_request` parameter to `S3Client` instantiation for anonymous requests for public resources on S3. See [documentation](https://cloudpathlib.drivendata.org/stable/api-reference/s3client/#cloudpathlib.s3.s3client.S3Client.__init__) for more details. ([#164](https://github.com/drivendataorg/cloudpathlib/pull/164)) +- Added `no_sign_request` parameter to `S3Client` instantiation for anonymous requests for public resources on S3. See [documentation](https://cloudpathlib.drivendata.org/stable/api-reference/s3client/#cloudpathlib.s3.s3client.S3Client.__init__) for more details. ([#164](https://github.com/drivendataorg/cloudpathlib/pull/164)) ## v0.5.0 (2021-08-31) diff --git a/cloudpathlib/s3/s3client.py b/cloudpathlib/s3/s3client.py index d8f75324..9a8c818c 100644 --- a/cloudpathlib/s3/s3client.py +++ b/cloudpathlib/s3/s3client.py @@ -62,9 +62,18 @@ def __init__( boto3_transfer_config (Optional[dict]): Instantiated TransferConfig for managing s3 transfers. (https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.TransferConfig) """ + if boto3_session is not None: + self.sess = boto3_session + else: + self.sess = Session( + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + aws_session_token=aws_session_token, + botocore_session=botocore_session, + profile_name=profile_name, + ) if no_sign_request: - self.sess = Session() self.s3 = self.sess.resource( "s3", endpoint_url=endpoint_url, @@ -76,17 +85,6 @@ def __init__( config=Config(signature_version=botocore.session.UNSIGNED), ) else: - if boto3_session is not None: - self.sess = boto3_session - else: - self.sess = Session( - aws_access_key_id=aws_access_key_id, - aws_secret_access_key=aws_secret_access_key, - aws_session_token=aws_session_token, - botocore_session=botocore_session, - profile_name=profile_name, - ) - self.s3 = self.sess.resource("s3", endpoint_url=endpoint_url) self.client = self.sess.client("s3", endpoint_url=endpoint_url) diff --git a/setup.py b/setup.py index 5f881484..1cf82f18 100644 --- a/setup.py +++ b/setup.py @@ -59,5 +59,5 @@ def load_requirements(path: Path): "Source Code": "https://github.com/drivendataorg/cloudpathlib", }, url="https://github.com/drivendataorg/cloudpathlib", - version="0.5.0", + version="0.6.0", ) diff --git a/tests/test_s3_specific.py b/tests/test_s3_specific.py index cd2c5140..09e058d9 100644 --- a/tests/test_s3_specific.py +++ b/tests/test_s3_specific.py @@ -133,11 +133,9 @@ def _execute_on_subprocess_and_observe(use_threads): assert _execute_on_subprocess_and_observe(use_threads=True) > 10 -def test_no_sign_request(s3_rig, tmp_path): - """Tests that boto3 receives and respects the transfer config - when working with a live backend. Does this by observing - if the `use_threads` parameter changes to number of threads - used by a child process that does a download. +def test_no_sign_request(s3_rig): + """Tests that we can pass no_sign_request to the S3Client and we will + be able to access public resources but not private ones. """ if s3_rig.live_server: client = s3_rig.client_class(no_sign_request=True)