Skip to content

Commit

Permalink
Initial commit for python env in mlmd (#124)
Browse files Browse the repository at this point in the history
* Initial commit for python env in mlmd

* Adding python env in example-get-started code

* Adding some required changes

* Removing redundent code

* dropping Python_Env value in query output as it is very big in size most of the time

* addressed review changes

* made some changes for conda

* resolving issues in pushing conda env

* Fixed conda related issues

* Changes to fix conda output
  • Loading branch information
varkha-d-sharma authored Mar 26, 2024
1 parent cb6ce08 commit 14eb4fe
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 4 deletions.
9 changes: 7 additions & 2 deletions cmflib/cmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
link_execution_to_input_artifact,
)
from cmflib.utils.cmf_config import CmfConfig
from cmflib.utils.helper_functions import get_python_env
from cmflib.cmf_commands_wrapper import (
_metadata_push,
_metadata_pull,
Expand Down Expand Up @@ -351,6 +352,7 @@ def create_execution(
git_repo = git_get_repo()
git_start_commit = git_get_commit()
cmd = str(sys.argv) if cmd is None else cmd
python_env=get_python_env()
self.execution = create_new_execution_in_existing_run_context(
store=self.store,
# Type field when re-using executions
Expand All @@ -364,6 +366,7 @@ def create_execution(
pipeline_type=self.parent_context.name,
git_repo=git_repo,
git_start_commit=git_start_commit,
python_env=python_env,
custom_properties=custom_props,
create_new_execution=create_new_execution,
)
Expand Down Expand Up @@ -530,6 +533,7 @@ def merge_created_execution(
# print(custom_props)
git_repo = properties.get("Git_Repo", "")
git_start_commit = properties.get("Git_Start_Commit", "")
python_env = properties.get("Python_Env", "")
#name = properties.get("Name", "")
create_new_execution = True
execution_name = execution_type
Expand All @@ -550,6 +554,7 @@ def merge_created_execution(
pipeline_type=self.parent_context.name,
git_repo=git_repo,
git_start_commit=git_start_commit,
python_env=python_env,
custom_properties=custom_props,
create_new_execution=create_new_execution
)
Expand Down Expand Up @@ -1523,6 +1528,7 @@ def log_validation_output(
milliseconds_since_epoch=int(time.time() * 1000),
)


def update_existing_artifact(
self, artifact: mlpb.Artifact, custom_properties: t.Dict
):
Expand Down Expand Up @@ -2013,5 +2019,4 @@ def non_related_args(type:str,args:dict):
if repo ==type:
non_related_args=list(set(available_args)-set(dict_repository_args[repo]))
return non_related_args



6 changes: 5 additions & 1 deletion cmflib/metadata_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ def create_new_execution_in_existing_context(
EXECUTION_REPO = "Git_Repo"
EXECUTION_START_COMMIT = "Git_Start_Commit"
EXECUTION_END_COMMIT = "Git_End_Commit"
EXECUTION_PYTHON_ENV= "Python_Env"
EXECUTION_PIPELINE_TYPE = "Pipeline_Type"

EXECUTION_PIPELINE_ID = "Pipeline_id"
Expand Down Expand Up @@ -385,6 +386,7 @@ def create_new_execution_in_existing_run_context(
git_repo: str = None,
git_start_commit: str = None,
git_end_commit: str = "",
python_env: str = "",
custom_properties: {} = None,
create_new_execution:bool = True
) -> metadata_store_pb2.Execution:
Expand All @@ -409,6 +411,7 @@ def create_new_execution_in_existing_run_context(
EXECUTION_REPO: metadata_store_pb2.STRING,
EXECUTION_START_COMMIT: metadata_store_pb2.STRING,
EXECUTION_END_COMMIT: metadata_store_pb2.STRING,
EXECUTION_PYTHON_ENV: metadata_store_pb2.STRING,
},

properties={
Expand All @@ -422,7 +425,8 @@ def create_new_execution_in_existing_run_context(
EXECUTION_PIPELINE_ID: metadata_store_pb2.Value(int_value=pipeline_id),
EXECUTION_REPO: metadata_store_pb2.Value(string_value=git_repo),
EXECUTION_START_COMMIT: metadata_store_pb2.Value(string_value=git_start_commit),
EXECUTION_END_COMMIT: metadata_store_pb2.Value(string_value=git_end_commit)
EXECUTION_END_COMMIT: metadata_store_pb2.Value(string_value=git_end_commit),
EXECUTION_PYTHON_ENV: metadata_store_pb2.Value(string_value=python_env),
# should set to task ID, not component ID
},
custom_properties=mlmd_custom_properties,
Expand Down
50 changes: 50 additions & 0 deletions cmflib/utils/helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
###

import os
import sys
import subprocess
import json

def is_git_repo():
git_dir = os.path.join(os.getcwd(), '.git')
Expand All @@ -24,3 +27,50 @@ def is_git_repo():
return f"A Git repository already exists in {git_dir}."
else:
return


def get_python_env()-> str:
installed_packages = ""
python_version = sys.version
packages = ""
# check if conda is installed
if is_conda_installed():
import conda
# List all installed packages and their versions
data = list_conda_packages_json()
transformed_result = [f"{entry['name']}=={entry['version']}" for entry in data]
installed_packages = transformed_result
packages = f"Conda: Python {python_version}: {installed_packages}"
else:
# pip
try:
from pip._internal.operations import freeze

# List all installed packages and their versions
installed_packages_generator = freeze.freeze()
installed_packages = list(installed_packages_generator)
packages = f"Python {python_version}: {installed_packages}"
except ImportError:
print("Pip is not installed.")
return packages

def is_conda_installed():
try:
import conda
# Run the 'conda --version' command and capture the output
subprocess.run(['conda', '--version'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return True
except subprocess.CalledProcessError:
return False
except ImportError:
return False


def list_conda_packages_json():
try:
result = subprocess.run(['conda', 'list', '--json'], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
return json.loads(result.stdout)
except subprocess.CalledProcessError as e:
return f"Error: {e.stderr}"


3 changes: 2 additions & 1 deletion examples/example-get-started/src/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ def _print_executions_in_stage(cmf_query: cmfquery.CmfQuery, stage_name: str) ->
print('\n')
print('\n')
df: pd.DataFrame = cmf_query.get_all_executions_in_stage(stage_name)
df.drop(columns=['Git_Start_Commit', 'Git_End_Commit'], inplace=True, axis=1)
# dropping Python_Env value in query output as it is very big in size most of the time
df.drop(columns=['Git_Start_Commit', 'Git_End_Commit', 'Python_Env'], inplace=True, axis=1)
print(tabulate(df, headers='keys', tablefmt='psql'))


Expand Down

0 comments on commit 14eb4fe

Please sign in to comment.