Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: unify dependencies #155

Merged
merged 5 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## \[Unreleased\]

### Added

- Check the Python version used before generating the Dockerfile ([#155])(<https://github.com/Substra/substrafl/pull/155>)).
- Python dependencies can be resolved using pip compile during function registration by setting `compile` to `True`
in the `Dependency` object ([#155])(<https://github.com/Substra/substrafl/pull/155>)).

```py
Dependency(
pypi_dependencies=["pytest", "numpy"],
compile=True,
)
```

- `Dependency` objects are now computed at initialization in a cache directory, accessible through the `copy_compute_dir` method. The cache directory is deleted at the `Dependency` object deletion. ([#155])(<https://github.com/Substra/substrafl/pull/155>))

### Changed

- BREAKING: Rename `generate_wheel.py` to `manage_dependencies.py` ([#156](https://github.com/Substra/substrafl/pull/156))
- BREAKING: Move `manage_dependencies.py` from `remote.register` to `dependency` ([#158](https://github.com/Substra/substrafl/pull/158))
- BREAKING: `local_dependencies` is renamed `local_installable_dependencies` ([#158](https://github.com/Substra/substrafl/pull/158))
- BREAKING: local_installable_dependencies are now limited to local modules or Python wheels (no support for bdist, sdist...)([#155])(<https://github.com/Substra/substrafl/pull/155>)).

### Fixed

Expand Down
1 change: 1 addition & 0 deletions benchmark/camelyon/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def substrafl_fed_avg(
local_code=[base / "common", base / "weldon_fedavg.py"],
# Keeping editable_mode=True to ensure nightly test benchmarks are ran against main substrafl git ref
editable_mode=True,
compile=True,
)

# Custom Strategy used for the data loading (from custom_torch_function.py file)
Expand Down
2 changes: 2 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@
("py:class", "substra.sdk.schemas.FunctionOutputSpec"),
("py:class", "substra.sdk.schemas.FunctionInputSpec"),
("py:class", "ComputePlan"),
("py:class", "Path"),
("py:class", "module"),
]

html_css_files = [
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"wheel",
"six",
"packaging",
"pip-tools",
],
extras_require={
"dev": [
Expand Down
1 change: 1 addition & 0 deletions substrafl/constants.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
TMP_SUBSTRAFL_PREFIX = "tmp_substrafl"
SUBSTRAFL_FOLDER = "substrafl_internal"
205 changes: 138 additions & 67 deletions substrafl/dependency/manage_dependencies.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,97 @@
"""
Generate wheels for the Substra algo.
Utility functions to manage dependencies (building wheels, compiling requirement...)
"""
import logging
import os
import re
import shutil
import subprocess
import sys
from pathlib import Path
from pathlib import PurePosixPath
from types import ModuleType
from typing import List
from typing import Union

from substrafl.exceptions import InvalidDependenciesError
from substrafl.exceptions import InvalidUserModuleError

logger = logging.getLogger(__name__)

LOCAL_WHEELS_FOLDER = Path.home() / ".substrafl"


def local_lib_wheels(lib_modules: List, *, operation_dir: Path, python_major_minor: str, dest_dir: str) -> str:
"""Prepares the private modules from lib_modules list to be installed in a Docker image and generates the
appropriated install command for a dockerfile. It first creates the wheel for each library. Each of the
libraries must be already installed in the correct version locally. Use command:
``pip install -e library-name`` in the directory of each library.
def build_user_dependency_wheel(lib_path: Path, dest_dir: Path) -> str:
"""Build the wheel for user dependencies passed as a local module.
Delete the local module when the build is done.

Args:
lib_path (Path): where the module is located.
dest_dir (Path): where the wheel needs to be copied.

Returns:
str: the filename of the wheel
"""
# sys.executable takes the current Python interpreter instead of the default one on the computer
try:
ret = subprocess.run(
[
sys.executable,
"-m",
"pip",
"wheel",
str(lib_path) + os.sep,
"--no-deps",
],
cwd=str(dest_dir),
check=True,
capture_output=True,
text=True,
)

except subprocess.CalledProcessError as e:
raise InvalidUserModuleError from e

finally:
# Delete the folder when the wheel is computed
shutil.rmtree(dest_dir / lib_path) # delete directory

wheel_name = re.findall(r"filename=(\S*)", ret.stdout)[0]
SdgJlbl marked this conversation as resolved.
Show resolved Hide resolved

return wheel_name


def local_lib_wheels(lib_modules: List[ModuleType], *, dest_dir: Path) -> List[str]:
"""Generate wheels for the private modules from lib_modules list and returns the list of names for each wheel.

It first creates the wheel for each library. Each of the libraries must be already installed in the correct
version locally. Use command: ``pip install -e library-name`` in the directory of each library.
Then it copies the wheels to the given directory.

This allows one user to use custom version of the passed modules.

Args:
lib_modules (list): list of modules to be installed.
operation_dir (pathlib.Path): Path to the operation directory
python_major_minor (str): version which is to be used in the dockerfile. Eg: '3.8'
dest_dir (str): relative directory where the wheels are saved

Returns:
str: dockerfile command for installing the given modules
List[str]: wheel names for the given modules
"""
install_cmds = []
wheels_dir = operation_dir / dest_dir
wheels_dir.mkdir(exist_ok=True, parents=True)
wheel_names = []
dest_dir.mkdir(exist_ok=True, parents=True)
for lib_module in lib_modules:
if not (Path(lib_module.__file__).parents[1] / "setup.py").exists():
lib_path = Path(lib_module.__file__).parents[1]
# this function is in practice only called on substra libraries, and we know we use a setup.py
if not (lib_path / "setup.py").exists():
msg = ", ".join([lib.__name__ for lib in lib_modules])
raise NotImplementedError(
f"You must install {msg} in editable mode.\n" "eg `pip install -e substra` in the substra directory"
)
lib_name = lib_module.__name__
lib_path = Path(lib_module.__file__).parents[1]
wheel_name = f"{lib_name}-{lib_module.__version__}-py3-none-any.whl"

wheel_path = LOCAL_WHEELS_FOLDER / wheel_name
# Recreate the wheel only if it does not exist
# Recreate the wheel only if it does not exist
SdgJlbl marked this conversation as resolved.
Show resolved Hide resolved
if wheel_path.exists():
logger.warning(
f"Existing wheel {wheel_path} will be used to build {lib_name}. "
Expand All @@ -54,14 +101,14 @@ def local_lib_wheels(lib_modules: List, *, operation_dir: Path, python_major_min
else:
# if the right version of substra or substratools is not found, it will search if they are already
# installed in 'dist' and take them from there.
# sys.executable takes the Python interpreter run by the code and not the default one on the computer
extra_args: list = list()
# sys.executable takes the current Python interpreter instead of the default one on the computer
extra_args: list = []
if lib_name == "substrafl":
extra_args = [
"--find-links",
operation_dir / "dist/substra",
dest_dir.parent / "substra",
"--find-links",
operation_dir / "dist/substratools",
dest_dir.parent / "substratools",
]
subprocess.check_output(
[
Expand All @@ -78,65 +125,89 @@ def local_lib_wheels(lib_modules: List, *, operation_dir: Path, python_major_min
cwd=str(lib_path),
)

shutil.copy(wheel_path, wheels_dir / wheel_name)

# Necessary command to install the wheel in the docker image
force_reinstall = "--force-reinstall " if lib_name in ["substratools", "substra"] else ""
install_cmd = (
f"COPY {dest_dir}/{wheel_name} . \n"
+ f"RUN python{python_major_minor} -m pip install {force_reinstall}{wheel_name}\n"
)
install_cmds.append(install_cmd)
shutil.copy(wheel_path, dest_dir / wheel_name)
wheel_names.append(wheel_name)

return "\n".join(install_cmds)
return wheel_names


def pypi_lib_wheels(lib_modules: List, *, operation_dir: Path, python_major_minor: str, dest_dir: str) -> str:
"""Retrieves lib_modules' wheels to be installed in a Docker image and generates
the appropriated install command for a dockerfile.
def get_pypi_dependencies_versions(lib_modules: List) -> List[str]:
"""Retrieve the version of the PyPI libraries installed to generate the dependency list

Args:
lib_modules (list): list of modules to be installed.
operation_dir (pathlib.Path): Path to the operation directory
python_major_minor (str): version which is to be used in the dockerfile. Eg: '3.8'
dest_dir (str): relative directory where the wheels are saved

Returns:
str: dockerfile command for installing the given modules
list(str): list of dependencies to install in the Docker container
"""
install_cmds = []
wheels_dir = operation_dir / dest_dir
wheels_dir.mkdir(exist_ok=True, parents=True)
return [f"{lib_module.__name__}=={lib_module.__version__}" for lib_module in lib_modules]

LOCAL_WHEELS_FOLDER.mkdir(exist_ok=True)

for lib_module in lib_modules:
wheel_name = f"{lib_module.__name__}-{lib_module.__version__}-py3-none-any.whl"
def write_requirements(dependency_list: List[Union[str, Path]], *, dest_dir: Path) -> None:
SdgJlbl marked this conversation as resolved.
Show resolved Hide resolved
"""Writes down a `requirements.txt` file with the list of explicit dependencies.

# Download only if exists
if not ((LOCAL_WHEELS_FOLDER / wheel_name).exists()):
subprocess.check_output(
[
sys.executable,
"-m",
"pip",
"download",
"--only-binary",
":all:",
"--python-version",
python_major_minor,
"--no-deps",
"--implementation",
"py",
"-d",
LOCAL_WHEELS_FOLDER,
f"{lib_module.__name__}=={lib_module.__version__}",
]
)
Args:
dependency_list: list of dependencies to install; acceptable formats are library names (eg "substrafl"),
any constraint expression accepted by pip("substrafl==0.36.0" or "substrafl < 1.0") or wheel names
("substrafl-0.36.0-py3-none-any.whl")
dest_dir: path to the directory where to write the ``requirements.txt``.
"""
requirements_txt = dest_dir / "requirements.txt"

_write_requirements_file(dependency_list=dependency_list, file=requirements_txt)


def compile_requirements(dependency_list: List[Union[str, Path]], *, dest_dir: Path) -> None:
"""Compile a list of requirements using pip-compile to generate a set of fully pinned third parties requirements

Writes down a `requirements.in` file with the list of explicit dependencies, then generates a `requirements.txt`
file using pip-compile. The `requirements.txt` file contains a set of fully pinned dependencies, including indirect
dependencies.

# Get wheel name based on current version
shutil.copy(LOCAL_WHEELS_FOLDER / wheel_name, wheels_dir / wheel_name)
install_cmd = f"COPY {dest_dir}/{wheel_name} . \n RUN python{python_major_minor} -m pip install {wheel_name}\n"
install_cmds.append(install_cmd)
Args:
dependency_list: list of dependencies to install; acceptable formats are library names (eg "substrafl"),
any constraint expression accepted by pip("substrafl==0.36.0" or "substrafl < 1.0") or wheel names
("substrafl-0.36.0-py3-none-any.whl")
dest_dir: path to the directory where to write the ``requirements.in`` and ``requirements.txt``.

Raises:
InvalidDependenciesError: if pip-compile does not find a set of compatible dependencies

return "\n".join(install_cmds)
"""
requirements_in = dest_dir / "requirements.in"

_write_requirements_file(dependency_list=dependency_list, file=requirements_in)

command = [
sys.executable,
"-m",
"piptools",
"compile",
"--resolver=backtracking",
"--no-emit-index-url",
str(requirements_in),
]
try:
logger.info("Compiling dependencies.")
subprocess.run(
command,
cwd=dest_dir,
check=True,
capture_output=True,
text=True,
)
except subprocess.CalledProcessError as e:
raise InvalidDependenciesError(
f"Error in command {' '.join(command)}\nstdout: {e.stdout}\nstderr: {e.stderr}"
) from e


def _write_requirements_file(dependency_list: List[Union[str, Path]], *, file: Path) -> None:
requirements = ""
for dependency in dependency_list:
if str(dependency).endswith(".whl"):
# Require '/', even on windows. The double conversion resolves that.
requirements += f"file:{PurePosixPath(Path(dependency))}\n"
else:
requirements += f"{dependency}\n"
file.write_text(requirements)
Loading
Loading