Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent util.execute from reading output files #296

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
QCEngine
========
[![Travis build](https://img.shields.io/travis/MolSSI/QCEngine/master.svg?logo=linux&logoColor=white)](https://travis-ci.org/MolSSI/QCEngine)
[![CI](https://github.com/MolSSI/QCEngine/actions/workflows/CI.yml/badge.svg)](https://github.com/MolSSI/QCEngine/actions/workflows/CI.yml)
[![codecov](https://img.shields.io/codecov/c/github/MolSSI/QCEngine.svg?logo=Codecov&logoColor=white)](https://codecov.io/gh/MolSSI/QCEngine)
[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/MolSSI/QCEngine.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/MolSSI/QCEngine/context:python)
[![Documentation Status](https://readthedocs.org/projects/qcengine/badge/?version=latest)](https://qcengine.readthedocs.io/en/latest/?badge=latest)
Expand Down
2 changes: 1 addition & 1 deletion qcengine/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def run_qcengine_cli(args: List[str], stdin: str = None) -> str:


def test_no_args():
""" Test for qcengine with no arguments """
"""Test for qcengine with no arguments"""
try:
run_qcengine_cli([])
except subprocess.CalledProcessError as e:
Expand Down
20 changes: 16 additions & 4 deletions qcengine/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import sys
import time
import pathlib

import pytest
from qcelemental.models import AtomicInput
Expand Down Expand Up @@ -64,17 +65,28 @@ def test_tmpdir():
assert str(tmpdir).split(os.path.sep)[-1].endswith("this")


def test_disk_files():
@pytest.mark.parametrize("outfiles_track", [[], ["thing*"], ["thing*", "other"]])
def test_disk_files(outfiles_track):

infiles = {"thing1": "hello", "thing2": "world", "other": "everyone"}
outfiles = {"thing*": None, "other": None}
with util.temporary_directory(suffix="this") as tmpdir:
with util.disk_files(infiles=infiles, outfiles=outfiles, cwd=tmpdir):
with util.disk_files(infiles=infiles, outfiles=outfiles, cwd=tmpdir, outfiles_track=outfiles_track):
pass

assert outfiles.keys() == {"thing*", "other"}
assert outfiles["thing*"]["thing1"] == "hello"
assert outfiles["other"] == "everyone"
for ofile, ofile_val in outfiles.items():
if isinstance(ofile_val, dict):
if ofile in outfiles_track:
for fpath in ofile_val.values():
assert isinstance(fpath, pathlib.PurePath)
else:
for key in ofile_val.keys():
assert ofile_val[key] == infiles[key]
elif ofile in outfiles_track:
assert isinstance(ofile_val, pathlib.PurePath)
else:
assert ofile_val == infiles[ofile]


def test_popen_tee_output(capsys):
Expand Down
44 changes: 34 additions & 10 deletions qcengine/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ def execute(
infiles: Optional[Dict[str, str]] = None,
outfiles: Optional[List[str]] = None,
*,
outfiles_track: Optional[List[str]] = None,
as_binary: Optional[List[str]] = None,
scratch_name: Optional[str] = None,
scratch_directory: Optional[str] = None,
Expand Down Expand Up @@ -404,6 +405,11 @@ def execute(
outfiles : List[str] = None
Output file names to be collected after execution into
values. May be {}.
outfiles_track: List[str], optional
Keys of `outfiles` to keep track of without loading their contents in memory.
For specified filename in `outfiles_track`, the file path instead of contents
is stored in `outfiles`. To ensure tracked files are not deleted after execution,
you must set `scratch_messy=True`.
as_binary : List[str] = None
Keys of `infiles` or `outfiles` to be treated as bytes.
scratch_name : str, optional
Expand All @@ -428,7 +434,6 @@ def execute(
Run command through the shell.
exit_code: int, optional
The exit code above which the process is considered failure.

Raises
------
FileExistsError
Expand Down Expand Up @@ -471,7 +476,9 @@ def execute(
) as scrdir:
popen_kwargs["cwd"] = scrdir
popen_kwargs["shell"] = shell
with disk_files(infiles, outfiles, cwd=scrdir, as_binary=as_binary) as extrafiles:
with disk_files(
infiles, outfiles, cwd=scrdir, as_binary=as_binary, outfiles_track=outfiles_track
) as extrafiles:
with popen(command, popen_kwargs=popen_kwargs) as proc:
# Wait for the subprocess to complete or the timeout to expire
if interupt_after is None:
Expand Down Expand Up @@ -562,7 +569,8 @@ def disk_files(
*,
cwd: Optional[str] = None,
as_binary: Optional[List[str]] = None,
) -> Dict[str, Union[str, bytes]]:
outfiles_track: Optional[List[str]] = None,
) -> Dict[str, Union[str, bytes, Path]]:
"""Write and collect files.

Parameters
Expand All @@ -577,10 +585,13 @@ def disk_files(
Directory to which to write and read files.
as_binary : List[str] = None
Keys in `infiles` (`outfiles`) to be written (read) as bytes, not decoded.

outfiles_track: List[str], optional
Keys of `outfiles` to keep track of (i.e. file contents not loaded in memory).
For specified filename in `outfiles_track`, the file path instead of contents
is stored in `outfiles`.
Yields
------
Dict[str] = str
Dict[str, Union[str, bytes, Path]]
outfiles with RHS filled in.

"""
Expand All @@ -592,6 +603,8 @@ def disk_files(
as_binary = []
assert set(as_binary) <= (set(infiles) | set(outfiles))

outfiles_track = outfiles_track or []

try:
for fl, content in infiles.items():
omode = "wb" if fl in as_binary else "w"
Expand All @@ -603,20 +616,31 @@ def disk_files(
yield outfiles

finally:
outfiles_track = [
fpath.name if "*" in track else track for track in outfiles_track for fpath in lwd.glob(track)
]
for fl in outfiles.keys():
filename = lwd / fl
omode = "rb" if fl in as_binary else "r"
try:
filename = lwd / fl
with open(filename, omode) as fp:
outfiles[fl] = fp.read()
LOGGER.info(f"... Writing ({omode}): {filename}")
if fl not in outfiles_track:
outfiles[fl] = fp.read()
LOGGER.info(f"... Reading ({omode}): {filename}")
else:
outfiles[fl] = filename
LOGGER.info(f"... Tracking: {filename}")
except (OSError, FileNotFoundError):
if "*" in fl:
gfls = {}
for gfl in lwd.glob(fl):
with open(gfl, omode) as fp:
gfls[gfl.name] = fp.read()
LOGGER.info(f"... Writing ({omode}): {gfl}")
if gfl.name not in outfiles_track:
gfls[gfl.name] = fp.read()
LOGGER.info(f"... Reading ({omode}): {gfl}")
else:
gfls[gfl.name] = gfl
LOGGER.info(f"... Tracking: {gfl}")
if not gfls:
gfls = None
outfiles[fl] = gfls
Expand Down