Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add possibility to parameterize S3 service URL #138

Merged
merged 11 commits into from
Apr 3, 2021
4 changes: 4 additions & 0 deletions .github/workflows/tests-live.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,7 @@ jobs:
LIVE_S3_BUCKET: ${{ secrets.LIVE_S3_BUCKET }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
CUSTOM_S3_BUCKET: ${{CUSTOM_S3_BUCKET}}
CUSTOM_S3_KEY_ID: ${{CUSTOM_S3_KEY_ID}}
CUSTOM_S3_SECRET_KEY: ${{CUSTOM_S3_SECRET_KEY}}
CUSTOM_S3_ENDPOINT: ${{CUSTOM_S3_ENDPOINT}}
jayqi marked this conversation as resolved.
Show resolved Hide resolved
7 changes: 5 additions & 2 deletions cloudpathlib/s3/s3client.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(
profile_name: Optional[str] = None,
boto3_session: Optional["Session"] = None,
local_cache_dir: Optional[Union[str, os.PathLike]] = None,
endpoint_url: Optional[str] = None,
):
"""Class constructor. Sets up a boto3 [`Session`](
https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html).
Expand All @@ -49,6 +50,8 @@ def __init__(
boto3_session (Optional[Session]): An already instantiated boto3 Session.
local_cache_dir (Optional[Union[str, os.PathLike]]): Path to directory to use as cache
for downloaded files. If None, will use a temporary directory.
endpoint_url (Optional[str]): S3 server endpoint URL to use for the constructed boto3 S3 resource and client.
Parameterize it to access a customly deployed S3-compatible object store such as MinIO, Ceph or any other.
"""
if boto3_session is not None:
self.sess = boto3_session
Expand All @@ -60,8 +63,8 @@ def __init__(
botocore_session=botocore_session,
profile_name=profile_name,
)
self.s3 = self.sess.resource("s3")
self.client = self.sess.client("s3")
self.s3 = self.sess.resource("s3", endpoint_url=endpoint_url)
self.client = self.sess.client("s3", endpoint_url=endpoint_url)

super().__init__(local_cache_dir=local_cache_dir)

Expand Down
25 changes: 24 additions & 1 deletion docs/docs/authentication.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ cloud_path.client

All subsequent instances of that service's cloud paths (in the example, all subsequent `S3Path` instances) will reference the same client instance.

You can also explicitly instantiate a client instance. You will need to do so if you want to authenticate using any option other than the environment variables from the table in the previous section. (To see what those options are, check out the API documentation pages linked to in the table above.) You can then use that client instance's cloud path factory method, or pass it into a cloud path instantiation
You can also explicitly instantiate a client instance. You will need to do so if you want to authenticate using any option other than the environment variables from the table in the previous section. (To see what those options are, check out the API documentation pages linked to in the table above.) You can then use that client instance's cloud path factory method, or pass it into a cloud path instantiation.

```python
from cloudpathlib import S3Client
Expand All @@ -59,3 +59,26 @@ If you need a reference to the default client:
S3Client.get_default_client()
#> <cloudpathlib.s3.s3client.S3Client at 0x7feac3d1fb90>
```

## Accessing custom S3-compatible object stores
It might happen so that you need to access a customly deployed S3 object store ([MinIO](https://min.io/), [Ceph](https://ceph.io/ceph-storage/object-storage/) or any other).
In such cases, the service endpoint will be different from the AWS object store endpoints (used by default).
To specify a custom endpoint address, you will need to manually instantiate `Client` with the `endpoint_url` parameter,
provinding http/https URL including port.

```python
from cloudpathlib import S3Client, CloudPath

# create a client pointing to the endpoint
client = S3Client(endpoint_url="http://my.s3.server:1234")

# option 1: use the client to create paths
cp1 = client.CloudPath("s3://cloudpathlib-test-bucket/")

# option 2: pass the client as keyword argument
cp2 = CloudPath("s3://cloudpathlib-test-bucket/", client=client)

# option3: set this client as the default so it is used in any future paths
client.set_as_default_client()
cp3 = CloudPath("s3://cloudpathlib-test-bucket/")
```
66 changes: 65 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from azure.storage.blob import BlobServiceClient
import boto3
import botocore
from dotenv import find_dotenv, load_dotenv
from google.cloud import storage as google_storage
from pytest_cases import fixture, fixture_union
Expand Down Expand Up @@ -180,7 +181,8 @@ def s3_rig(request, monkeypatch, assets_dir):

if os.getenv("USE_LIVE_CLOUD") == "1":
# Set up test assets
bucket = boto3.resource("s3").Bucket(drive)
session = boto3.Session() # Fresh session to ensure isolation
bucket = session.resource("s3").Bucket(drive)
test_files = [
f for f in assets_dir.glob("**/*") if f.is_file() and f.name not in UPLOAD_IGNORE_LIST
]
Expand Down Expand Up @@ -212,6 +214,67 @@ def s3_rig(request, monkeypatch, assets_dir):
bucket.objects.filter(Prefix=test_dir).delete()


@fixture()
def custom_s3_rig(request, monkeypatch, assets_dir):
"""
Custom S3 rig used to test the integrations with non-AWS S3-compatible object storages like
- MinIO (https://min.io/)
- CEPH (https://ceph.io/ceph-storage/object-storage/)
- others
"""
drive = os.getenv("CUSTOM_S3_BUCKET", "bucket")
test_dir = create_test_dir_name(request)
custom_endpoint_url = os.getenv("CUSTOM_S3_ENDPOINT", "https://s3.us-west-1.drivendatabws.com")

if os.getenv("USE_LIVE_CLOUD") == "1":
monkeypatch.setenv("AWS_ACCESS_KEY_ID", os.getenv("CUSTOM_S3_KEY_ID"))
monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", os.getenv("CUSTOM_S3_SECRET_KEY"))

# Upload test assets
session = boto3.Session() # Fresh session to ensure isolation from AWS S3 auth
s3 = session.resource("s3", endpoint_url=custom_endpoint_url)

# idempotent and our test server on heroku only has ephemeral storage
# so we need to try to create each time
try:
s3.meta.client.head_bucket(Bucket=drive)
except botocore.exceptions.ClientError:
s3.create_bucket(Bucket=drive)

bucket = s3.Bucket(drive)

test_files = [
f for f in assets_dir.glob("**/*") if f.is_file() and f.name not in UPLOAD_IGNORE_LIST
]
for test_file in test_files:
bucket.upload_file(
str(test_file),
str(f"{test_dir}/{PurePosixPath(test_file.relative_to(assets_dir))}"),
)
else:
# Mock cloud SDK
monkeypatch.setattr(
cloudpathlib.s3.s3client,
"Session",
mocked_session_class_factory(test_dir),
)

rig = CloudProviderTestRig(
path_class=S3Path, client_class=S3Client, drive=drive, test_dir=test_dir
)

rig.client_class(
endpoint_url=custom_endpoint_url
).set_as_default_client() # set default client

yield rig

rig.client_class._default_client = None # reset default client

if os.getenv("USE_LIVE_CLOUD") == "1":
bucket.objects.filter(Prefix=test_dir).delete()


@fixture()
def local_azure_rig(request, monkeypatch, assets_dir):
drive = os.getenv("LIVE_AZURE_CONTAINER", "container")
Expand Down Expand Up @@ -294,6 +357,7 @@ def local_s3_rig(request, monkeypatch, assets_dir):
azure_rig,
gs_rig,
s3_rig,
custom_s3_rig,
local_azure_rig,
local_s3_rig,
local_gs_rig,
Expand Down
4 changes: 2 additions & 2 deletions tests/mock_clients/mock_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ def __init__(self, *args, **kwargs):
def __del__(self):
self.tmp.cleanup()

def resource(self, item):
def resource(self, item, endpoint_url):
return MockBoto3Resource(self.tmp_path)

def client(self, item):
def client(self, item, endpoint_url):
return MockBoto3Client(self.tmp_path)

return MockBoto3Session
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cloudpath_file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_file_read_writes(rig, tmp_path):

before_touch = datetime.now()
sleep(1)
p.touch()
p.touch() # I think touch does not change modified time on MinIO
assert datetime.fromtimestamp(p.stat().st_mtime) > before_touch

# no-op
Expand Down