diff --git a/README.md b/README.md index ec3c46d57..be24b54c7 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ QCEngine ======== -[![Travis build](https://img.shields.io/travis/MolSSI/QCEngine/master.svg?logo=linux&logoColor=white)](https://travis-ci.org/MolSSI/QCEngine) +[![CI](https://github.com/MolSSI/QCEngine/actions/workflows/CI.yml/badge.svg)](https://github.com/MolSSI/QCEngine/actions/workflows/CI.yml) [![codecov](https://img.shields.io/codecov/c/github/MolSSI/QCEngine.svg?logo=Codecov&logoColor=white)](https://codecov.io/gh/MolSSI/QCEngine) [![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/MolSSI/QCEngine.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/MolSSI/QCEngine/context:python) [![Documentation Status](https://readthedocs.org/projects/qcengine/badge/?version=latest)](https://qcengine.readthedocs.io/en/latest/?badge=latest) diff --git a/qcengine/tests/test_cli.py b/qcengine/tests/test_cli.py index 83f786260..ad8884ab6 100644 --- a/qcengine/tests/test_cli.py +++ b/qcengine/tests/test_cli.py @@ -36,7 +36,7 @@ def run_qcengine_cli(args: List[str], stdin: str = None) -> str: def test_no_args(): - """ Test for qcengine with no arguments """ + """Test for qcengine with no arguments""" try: run_qcengine_cli([]) except subprocess.CalledProcessError as e: diff --git a/qcengine/tests/test_utils.py b/qcengine/tests/test_utils.py index f1203da0b..38a6440f4 100644 --- a/qcengine/tests/test_utils.py +++ b/qcengine/tests/test_utils.py @@ -1,6 +1,7 @@ import os import sys import time +import pathlib import pytest from qcelemental.models import AtomicInput @@ -64,17 +65,28 @@ def test_tmpdir(): assert str(tmpdir).split(os.path.sep)[-1].endswith("this") -def test_disk_files(): +@pytest.mark.parametrize("outfiles_track", [[], ["thing*"], ["thing*", "other"]]) +def test_disk_files(outfiles_track): infiles = {"thing1": "hello", "thing2": "world", "other": "everyone"} outfiles = {"thing*": None, "other": None} with util.temporary_directory(suffix="this") as tmpdir: - with util.disk_files(infiles=infiles, outfiles=outfiles, cwd=tmpdir): + with util.disk_files(infiles=infiles, outfiles=outfiles, cwd=tmpdir, outfiles_track=outfiles_track): pass assert outfiles.keys() == {"thing*", "other"} - assert outfiles["thing*"]["thing1"] == "hello" - assert outfiles["other"] == "everyone" + for ofile, ofile_val in outfiles.items(): + if isinstance(ofile_val, dict): + if ofile in outfiles_track: + for fpath in ofile_val.values(): + assert isinstance(fpath, pathlib.PurePath) + else: + for key in ofile_val.keys(): + assert ofile_val[key] == infiles[key] + elif ofile in outfiles_track: + assert isinstance(ofile_val, pathlib.PurePath) + else: + assert ofile_val == infiles[ofile] def test_popen_tee_output(capsys): diff --git a/qcengine/util.py b/qcengine/util.py index 45177fede..0cf09f146 100644 --- a/qcengine/util.py +++ b/qcengine/util.py @@ -377,6 +377,7 @@ def execute( infiles: Optional[Dict[str, str]] = None, outfiles: Optional[List[str]] = None, *, + outfiles_track: Optional[List[str]] = None, as_binary: Optional[List[str]] = None, scratch_name: Optional[str] = None, scratch_directory: Optional[str] = None, @@ -404,6 +405,11 @@ def execute( outfiles : List[str] = None Output file names to be collected after execution into values. May be {}. + outfiles_track: List[str], optional + Keys of `outfiles` to keep track of without loading their contents in memory. + For specified filename in `outfiles_track`, the file path instead of contents + is stored in `outfiles`. To ensure tracked files are not deleted after execution, + you must set `scratch_messy=True`. as_binary : List[str] = None Keys of `infiles` or `outfiles` to be treated as bytes. scratch_name : str, optional @@ -428,7 +434,6 @@ def execute( Run command through the shell. exit_code: int, optional The exit code above which the process is considered failure. - Raises ------ FileExistsError @@ -471,7 +476,9 @@ def execute( ) as scrdir: popen_kwargs["cwd"] = scrdir popen_kwargs["shell"] = shell - with disk_files(infiles, outfiles, cwd=scrdir, as_binary=as_binary) as extrafiles: + with disk_files( + infiles, outfiles, cwd=scrdir, as_binary=as_binary, outfiles_track=outfiles_track + ) as extrafiles: with popen(command, popen_kwargs=popen_kwargs) as proc: # Wait for the subprocess to complete or the timeout to expire if interupt_after is None: @@ -562,7 +569,8 @@ def disk_files( *, cwd: Optional[str] = None, as_binary: Optional[List[str]] = None, -) -> Dict[str, Union[str, bytes]]: + outfiles_track: Optional[List[str]] = None, +) -> Dict[str, Union[str, bytes, Path]]: """Write and collect files. Parameters @@ -577,10 +585,13 @@ def disk_files( Directory to which to write and read files. as_binary : List[str] = None Keys in `infiles` (`outfiles`) to be written (read) as bytes, not decoded. - + outfiles_track: List[str], optional + Keys of `outfiles` to keep track of (i.e. file contents not loaded in memory). + For specified filename in `outfiles_track`, the file path instead of contents + is stored in `outfiles`. Yields ------ - Dict[str] = str + Dict[str, Union[str, bytes, Path]] outfiles with RHS filled in. """ @@ -592,6 +603,8 @@ def disk_files( as_binary = [] assert set(as_binary) <= (set(infiles) | set(outfiles)) + outfiles_track = outfiles_track or [] + try: for fl, content in infiles.items(): omode = "wb" if fl in as_binary else "w" @@ -603,20 +616,31 @@ def disk_files( yield outfiles finally: + outfiles_track = [ + fpath.name if "*" in track else track for track in outfiles_track for fpath in lwd.glob(track) + ] for fl in outfiles.keys(): + filename = lwd / fl omode = "rb" if fl in as_binary else "r" try: - filename = lwd / fl with open(filename, omode) as fp: - outfiles[fl] = fp.read() - LOGGER.info(f"... Writing ({omode}): {filename}") + if fl not in outfiles_track: + outfiles[fl] = fp.read() + LOGGER.info(f"... Reading ({omode}): {filename}") + else: + outfiles[fl] = filename + LOGGER.info(f"... Tracking: {filename}") except (OSError, FileNotFoundError): if "*" in fl: gfls = {} for gfl in lwd.glob(fl): with open(gfl, omode) as fp: - gfls[gfl.name] = fp.read() - LOGGER.info(f"... Writing ({omode}): {gfl}") + if gfl.name not in outfiles_track: + gfls[gfl.name] = fp.read() + LOGGER.info(f"... Reading ({omode}): {gfl}") + else: + gfls[gfl.name] = gfl + LOGGER.info(f"... Tracking: {gfl}") if not gfls: gfls = None outfiles[fl] = gfls