Skip to content

Commit

Permalink
feat: unify dependencies
Browse files Browse the repository at this point in the history
Signed-off-by: ThibaultFy <50656860+ThibaultFy@users.noreply.github.com>
  • Loading branch information
ThibaultFy committed Aug 16, 2023
1 parent c40a7b2 commit b6fa09f
Show file tree
Hide file tree
Showing 17 changed files with 637 additions and 208 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## \[Unreleased\]

- Check the Python version used before generating the Dockerfile ([#155])(<https://github.com/Substra/substrafl/pull/155>)).
- Python dependencies can be resolved using pip compile during function registration by setting `compile` to `True`
in the `Dependency` object ([#155])(<https://github.com/Substra/substrafl/pull/155>)).

```py
Dependency(
pypi_dependencies=["pytest", "numpy"],
compile=True,
)
```

- `Dependency` objects are now computed at initialization in a cache directory, accessible through the `copy_compute_dir` method. The cache directory is deleted at the `Dependency` object deletion. ([#155])(<https://github.com/Substra/substrafl/pull/155>))
- BREAKING: local_installable_dependencies are now limited to local modules or Python wheels (no support for bdist, sdist...)([#155])(<https://github.com/Substra/substrafl/pull/155>)).

### Changed

- BREAKING: Rename `generate_wheel.py` to `manage_dependencies.py` ([#156](https://github.com/Substra/substrafl/pull/156))
Expand Down
1 change: 1 addition & 0 deletions benchmark/camelyon/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ def substrafl_fed_avg(
local_code=[base / "common", base / "weldon_fedavg.py"],
# Keeping editable_mode=True to ensure nightly test benchmarks are ran against main substrafl git ref
editable_mode=True,
compile=True,
)

# Custom Strategy used for the data loading (from custom_torch_function.py file)
Expand Down
2 changes: 2 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,8 @@
("py:class", "substra.sdk.schemas.FunctionOutputSpec"),
("py:class", "substra.sdk.schemas.FunctionInputSpec"),
("py:class", "ComputePlan"),
("py:class", "Path"),
("py:class", "module"),
]

html_css_files = [
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"wheel",
"six",
"packaging",
"pip-tools",
],
extras_require={
"dev": [
Expand Down
1 change: 1 addition & 0 deletions substrafl/constants.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
TMP_SUBSTRAFL_PREFIX = "tmp_substrafl"
SUBSTRAFL_FOLDER = "substrafl_internal"
201 changes: 134 additions & 67 deletions substrafl/dependency/manage_dependencies.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,94 @@
"""
Generate wheels for the Substra algo.
Utility functions to manage dependencies (building wheels, compiling requirement...)
"""
import logging
import os
import re
import shutil
import subprocess
import sys
from pathlib import Path
from pathlib import PurePosixPath
from types import ModuleType
from typing import List
from typing import Union

from substrafl.exceptions import InvalidDependenciesError
from substrafl.exceptions import InvalidUserModuleError

logger = logging.getLogger(__name__)

LOCAL_WHEELS_FOLDER = Path.home() / ".substrafl"


def local_lib_wheels(lib_modules: List, *, operation_dir: Path, python_major_minor: str, dest_dir: str) -> str:
"""Prepares the private modules from lib_modules list to be installed in a Docker image and generates the
appropriated install command for a dockerfile. It first creates the wheel for each library. Each of the
libraries must be already installed in the correct version locally. Use command:
``pip install -e library-name`` in the directory of each library.
def build_user_dependency_wheel(lib_path: Path, dest_dir: Path) -> str:
"""Build the wheel for user dependencies passed as a local module.
Delete the local module when the build is done.
Args:
lib_path (Path): where the module is located.
dest_dir (Path): where the wheel needs to be copied.
Returns:
str: the filename of the wheel
"""
# sys.executable takes the current Python interpreter instead of the default one on the computer
try:
ret = subprocess.run(
[
sys.executable,
"-m",
"pip",
"wheel",
str(lib_path) + os.sep,
"--no-deps",
],
cwd=str(dest_dir),
check=True,
capture_output=True,
text=True,
)

# Delete the folder when the wheel is computed
shutil.rmtree(dest_dir / lib_path) # delete directory
except subprocess.CalledProcessError as e:
raise InvalidUserModuleError from e
wheel_name = re.findall(r"filename=(\S*)", ret.stdout)[0]

return wheel_name


def local_lib_wheels(lib_modules: List[ModuleType], *, dest_dir: Path) -> List[str]:
"""Generate wheels for the private modules from lib_modules list and returns the list of names for each wheel.
It first creates the wheel for each library. Each of the libraries must be already installed in the correct
version locally. Use command: ``pip install -e library-name`` in the directory of each library.
Then it copies the wheels to the given directory.
This allows one user to use custom version of the passed modules.
Args:
lib_modules (list): list of modules to be installed.
operation_dir (pathlib.Path): Path to the operation directory
python_major_minor (str): version which is to be used in the dockerfile. Eg: '3.8'
dest_dir (str): relative directory where the wheels are saved
Returns:
str: dockerfile command for installing the given modules
List[str]: wheel names for the given modules
"""
install_cmds = []
wheels_dir = operation_dir / dest_dir
wheels_dir.mkdir(exist_ok=True, parents=True)
wheel_names = []
dest_dir.mkdir(exist_ok=True, parents=True)
for lib_module in lib_modules:
if not (Path(lib_module.__file__).parents[1] / "setup.py").exists():
lib_path = Path(lib_module.__file__).parents[1]
# this function is in practice only called on substra libraries, and we know we use a setup.py
if not (lib_path / "setup.py").exists():
msg = ", ".join([lib.__name__ for lib in lib_modules])
raise NotImplementedError(
f"You must install {msg} in editable mode.\n" "eg `pip install -e substra` in the substra directory"
)
lib_name = lib_module.__name__
lib_path = Path(lib_module.__file__).parents[1]
wheel_name = f"{lib_name}-{lib_module.__version__}-py3-none-any.whl"

wheel_path = LOCAL_WHEELS_FOLDER / wheel_name
# Recreate the wheel only if it does not exist
# Recreate the wheel only if it does not exist
if wheel_path.exists():
logger.warning(
f"Existing wheel {wheel_path} will be used to build {lib_name}. "
Expand All @@ -54,14 +98,14 @@ def local_lib_wheels(lib_modules: List, *, operation_dir: Path, python_major_min
else:
# if the right version of substra or substratools is not found, it will search if they are already
# installed in 'dist' and take them from there.
# sys.executable takes the Python interpreter run by the code and not the default one on the computer
extra_args: list = list()
# sys.executable takes the current Python interpreter instead of the default one on the computer
extra_args: list = []
if lib_name == "substrafl":
extra_args = [
"--find-links",
operation_dir / "dist/substra",
dest_dir.parent / "substra",
"--find-links",
operation_dir / "dist/substratools",
dest_dir.parent / "substratools",
]
subprocess.check_output(
[
Expand All @@ -78,65 +122,88 @@ def local_lib_wheels(lib_modules: List, *, operation_dir: Path, python_major_min
cwd=str(lib_path),
)

shutil.copy(wheel_path, wheels_dir / wheel_name)
shutil.copy(wheel_path, dest_dir / wheel_name)
wheel_names.append(wheel_name)

# Necessary command to install the wheel in the docker image
force_reinstall = "--force-reinstall " if lib_name in ["substratools", "substra"] else ""
install_cmd = (
f"COPY {dest_dir}/{wheel_name} . \n"
+ f"RUN python{python_major_minor} -m pip install {force_reinstall}{wheel_name}\n"
)
install_cmds.append(install_cmd)
return wheel_names

return "\n".join(install_cmds)


def pypi_lib_wheels(lib_modules: List, *, operation_dir: Path, python_major_minor: str, dest_dir: str) -> str:
"""Retrieves lib_modules' wheels to be installed in a Docker image and generates
the appropriated install command for a dockerfile.
def get_pypi_dependencies_versions(lib_modules: List) -> List[str]:
"""Retrieve the version of the PyPI libraries installed to generate the dependency list
Args:
lib_modules (list): list of modules to be installed.
operation_dir (pathlib.Path): Path to the operation directory
python_major_minor (str): version which is to be used in the dockerfile. Eg: '3.8'
dest_dir (str): relative directory where the wheels are saved
Returns:
str: dockerfile command for installing the given modules
list(str): list of dependencies to install in the Docker container
"""
install_cmds = []
wheels_dir = operation_dir / dest_dir
wheels_dir.mkdir(exist_ok=True, parents=True)
return [f"{lib_module.__name__}=={lib_module.__version__}" for lib_module in lib_modules]

LOCAL_WHEELS_FOLDER.mkdir(exist_ok=True)

for lib_module in lib_modules:
wheel_name = f"{lib_module.__name__}-{lib_module.__version__}-py3-none-any.whl"
def write_requirements(dependency_list: List[Union[str, Path]], *, dest_dir: Path) -> None:
"""Writes down a `requirements.txt` file with the list of explicit dependencies.
# Download only if exists
if not ((LOCAL_WHEELS_FOLDER / wheel_name).exists()):
subprocess.check_output(
[
sys.executable,
"-m",
"pip",
"download",
"--only-binary",
":all:",
"--python-version",
python_major_minor,
"--no-deps",
"--implementation",
"py",
"-d",
LOCAL_WHEELS_FOLDER,
f"{lib_module.__name__}=={lib_module.__version__}",
]
)
Args:
dependency_list: list of dependencies to install; acceptable formats are library names (eg "substrafl"),
any constraint expression accepted by pip("substrafl==0.36.0" or "substrafl < 1.0") or wheel names
("substrafl-0.36.0-py3-none-any.whl")
dest_dir: path to the directory where to write the ``requirements.txt``.
"""
requirements_txt = dest_dir / "requirements.txt"

# Get wheel name based on current version
shutil.copy(LOCAL_WHEELS_FOLDER / wheel_name, wheels_dir / wheel_name)
install_cmd = f"COPY {dest_dir}/{wheel_name} . \n RUN python{python_major_minor} -m pip install {wheel_name}\n"
install_cmds.append(install_cmd)
_write_requirements_file(dependency_list=dependency_list, file=requirements_txt)

return "\n".join(install_cmds)

def compile_requirements(dependency_list: List[Union[str, Path]], *, dest_dir: Path) -> None:
"""Compile a list of requirements using pip-compile to generate a set of fully pinned third parties requirements
Writes down a `requirements.in` file with the list of explicit dependencies, then generates a `requirements.txt`
file using pip-compile. The `requirements.txt` file contains a set of fully pinned dependencies, including indirect
dependencies.
Args:
dependency_list: list of dependencies to install; acceptable formats are library names (eg "substrafl"),
any constraint expression accepted by pip("substrafl==0.36.0" or "substrafl < 1.0") or wheel names
("substrafl-0.36.0-py3-none-any.whl")
dest_dir: path to the directory where to write the ``requirements.in`` and ``requirements.txt``.
Raises:
InvalidDependenciesError: if pip-compile does not find a set of compatible dependencies
"""
requirements_in = dest_dir / "requirements.in"

_write_requirements_file(dependency_list=dependency_list, file=requirements_in)

command = [
sys.executable,
"-m",
"piptools",
"compile",
"--resolver=backtracking",
str(requirements_in),
]
try:
logger.info("Compiling dependencies.")
subprocess.run(
command,
cwd=dest_dir,
check=True,
capture_output=True,
text=True,
)
except subprocess.CalledProcessError as e:
raise InvalidDependenciesError(
f"Error in command {' '.join(command)}\nstdout: {e.stdout}\nstderr: {e.stderr}"
) from e


def _write_requirements_file(dependency_list: List[Union[str, Path]], *, file: Path) -> None:
requirements = ""
for dependency in dependency_list:
if str(dependency).endswith(".whl"):
# Require '/', even on windows. The double conversion resolves that.
requirements += f"file:{PurePosixPath(Path(dependency))}\n"
else:
requirements += f"{dependency}\n"
file.write_text(requirements)
Loading

0 comments on commit b6fa09f

Please sign in to comment.