Skip to content

Commit

Permalink
Added support for staging and pushing external files (#180)
Browse files Browse the repository at this point in the history
* Added support for staging and pushing external files

* Fixed dvc wrapper
  • Loading branch information
rishabhsharma22 authored Jul 24, 2024
1 parent fcc78d0 commit 3046849
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 6 deletions.
4 changes: 4 additions & 0 deletions cmflib/bin/cmf_dvc_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def get_cmf_hierarchy(execution_lineage:str):
execution_exist : True if it exeist, False otherwise
metawrite: cmf object
"""
tracked = {} #Used to keep a record of files tracked by outs and therefore not needed to be tracked in deps
def ingest_metadata(execution_lineage:str, metadata:dict, metawriter:cmf.Cmf, command:str = "") :
pipeline_name, context_name, execution = get_cmf_hierarchy(execution_lineage)

Expand All @@ -77,9 +78,12 @@ def ingest_metadata(execution_lineage:str, metadata:dict, metawriter:cmf.Cmf, co
if k == "deps":
for dep in v:
metawriter.log_dataset_with_version(dep["path"], dep["md5"], "input")
if dep["path"] not in tracked:
metawriter.log_dataset(dep["path"], 'input')
if k == "outs":
for out in v:
metawriter.log_dataset_with_version(out["path"], out["md5"], "output")
tracked[out["path"]] = True

def find_location(string, elements):
for index, element in enumerate(elements):
Expand Down
24 changes: 18 additions & 6 deletions cmflib/dvc_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
###
# Copyright (2022) Hewlett Packard Enterprise Development LP
# Copyright (2024) Hewlett Packard Enterprise Development LP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
Expand Down Expand Up @@ -74,6 +74,12 @@ def dvc_get_url(folder: str, retry: bool = False, repo: str = "") -> str:
url = dvc_get_url(folder, True)
else:
print(f"dvc.exceptions.PathMissingError Caught Unexpected {err}, {type(err)}")
except dvc.exceptions.OutputNotFoundError as err:
if not retry:
filename = folder.split('/')[-1]
folder = os.path.join(os.getcwd() , filename)
url = dvc_get_url(folder, True)

except Exception as err:
print(f"Unexpected {err}, {type(err)}")
return url
Expand Down Expand Up @@ -137,6 +143,7 @@ def git_checkout_new_branch(branch_name: str):
except Exception as err:
process.kill()
outs, errs = process.communicate()

print(f"Unexpected {err}, {type(err)}")
print(f"Unexpected {outs}")
print(f"Unexpected {errs}")
Expand Down Expand Up @@ -230,14 +237,19 @@ def commit_output(folder: str, execution_id: str) -> str:
commit = ""
process = ""
try:
process = subprocess.Popen(['dvc', 'add', folder],
stdout=subprocess.PIPE,
universal_newlines=True)

if os.path.exists(os.getcwd() + '/' + folder):
process = subprocess.Popen(['dvc', 'add', folder],
stdout=subprocess.PIPE,
universal_newlines=True)
else:
process = subprocess.Popen(['dvc', 'import-url', '--to-remote', folder],
stdout=subprocess.PIPE,
universal_newlines=True)

# To-Do : Parse the output and report if error
output, errs = process.communicate()
commit = output.strip()
process = subprocess.Popen(['git', 'add', folder + '.dvc'],
process = subprocess.Popen(['git', 'add', folder.split('/')[-1] + '.dvc'],
stdout=subprocess.PIPE,
universal_newlines=True)
# To-Do : Parse the output and report if error
Expand Down

0 comments on commit 3046849

Please sign in to comment.