Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate and maybe prune interpreter cache run over run #7225

Merged
merged 5 commits into from
Feb 11, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 18 additions & 19 deletions src/python/pants/backend/python/interpreter_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,18 +115,19 @@ def select_interpreter_for_targets(self, targets):
# Return the lowest compatible interpreter.
return min(allowed_interpreters)

def _interpreter_from_path(self, path, filters=()):
try:
executable = os.readlink(os.path.join(path, 'python'))
if not os.path.exists(executable):
if os.path.dirname(path) == self._cache_dir:
def _interpreter_from_relpath(self, path, filters=()):
path = os.path.join(self._cache_dir, path)
if os.path.isdir(path):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can ditch this check, the os.readlink below will already raise OSError if any part of the path DNE.

try:
executable = os.readlink(os.path.join(path, 'python'))
if not os.path.exists(executable):
self._purge_interpreter(path)
return None
except OSError:
return None
except OSError:
return None
interpreter = PythonInterpreter.from_binary(executable, include_site_extras=False)
if self._matches(interpreter, filters=filters):
return self._resolve(interpreter)
interpreter = PythonInterpreter.from_binary(executable, include_site_extras=False)
if self._matches(interpreter, filters=filters):
return self._resolve(interpreter)
return None

def _setup_interpreter(self, interpreter, cache_target_path):
Expand All @@ -138,22 +139,20 @@ def _setup_interpreter(self, interpreter, cache_target_path):
def _setup_cached(self, filters=()):
"""Find all currently-cached interpreters."""
for interpreter_dir in os.listdir(self._cache_dir):
path = os.path.join(self._cache_dir, interpreter_dir)
if os.path.isdir(path):
pi = self._interpreter_from_path(path, filters=filters)
if pi:
logger.debug('Detected interpreter {}: {}'.format(pi.binary, str(pi.identity)))
yield pi
pi = self._interpreter_from_relpath(interpreter_dir, filters=filters)
if pi:
logger.debug('Detected interpreter {}: {}'.format(pi.binary, str(pi.identity)))
yield pi

def _setup_paths(self, paths, filters=()):
"""Find interpreters under paths, and cache them."""
for interpreter in self._matching(PythonInterpreter.all(paths), filters=filters):
identity_str = str(interpreter.identity)
cache_path = os.path.join(self._cache_dir, identity_str)
pi = self._interpreter_from_path(cache_path, filters=filters)
pi = self._interpreter_from_relpath(identity_str, filters=filters)
if pi is None:
cache_path = os.path.join(self._cache_dir, identity_str)
self._setup_interpreter(interpreter, cache_path)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be a bit more symmetric on the whole and less error-prone here to make _setup_interpreter take a relpath (identity_str) and have it do the final cache path construction.

pi = self._interpreter_from_path(cache_path, filters=filters)
pi = self._interpreter_from_relpath(identity_str, filters=filters)
if pi:
yield pi

Expand Down
6 changes: 2 additions & 4 deletions src/python/pants/backend/python/tasks/select_interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,8 @@ def _interpreter_path_file(self, target_set_id):
return os.path.join(self.workdir, target_set_id, 'interpreter.info')

def _detect_and_purge_invalid_interpreter(self, interpreter_path_file):
with open(interpreter_path_file, 'r') as infile:
lines = infile.readlines()
binary = lines[0].strip()
if not os.path.exists(binary):
interpreter = self._get_interpreter(interpreter_path_file)
if not os.path.exists(interpreter.binary):
self.context.log.info('Stale interpreter reference detected: {}, removing reference and '
'selecting a new interpreter.'.format(binary))
os.remove(interpreter_path_file)
Expand Down
31 changes: 31 additions & 0 deletions tests/python/pants_test/backend/python/test_interpreter_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from __future__ import absolute_import, division, print_function, unicode_literals

import os
import shutil
from builtins import str
from contextlib import contextmanager

Expand All @@ -16,6 +17,7 @@
from pants.backend.python.interpreter_cache import PythonInterpreter, PythonInterpreterCache
from pants.subsystem.subsystem import Subsystem
from pants.util.contextutil import temporary_dir
from pants.util.dirutil import chmod_plus_x, safe_mkdir
from pants_test.backend.python.interpreter_selection_utils import (PY_27, PY_36,
python_interpreter_path,
skip_unless_python27_and_python36)
Expand Down Expand Up @@ -171,3 +173,32 @@ def test_setup_cached_warm(self):
def test_setup_cached_cold(self):
with self._setup_cache() as (cache, _):
self.assertEqual([], list(cache._setup_cached()))

def test_interpreter_from_relpath_purges_stale_interpreter(self):
"""
Simulates a stale interpreter cache and tests that _interpreter_from_relpath
properly detects it and removes the stale dist directory.

See https://github.com/pantsbuild/pants/issues/3416 for more info.
"""
with temporary_dir() as temp_dir:
# Setup a interpreter distribution that we can safely mutate.
test_interpreter_binary = os.path.join(temp_dir, 'python2.7')
src = '/usr/bin/python2.7'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It realy is more robust to os.path.realpath(sys.executable) - you know that oath must exist and points to the actual binary file and not a symlink.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to use the output of which python2.7 like above tests with py27 path, however only copying the binary for that path gave me issues with linking against the python runtime. I'll give this a shot.

Copy link
Contributor Author

@CMLivingston CMLivingston Feb 8, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am experiencing a different issue with sys.exe:

[tw-mbp-clivingston pants (clivingston/PY-135)]$ /var/folders/7n/x4k4tqh94k9gwm61g046m2340000gn/T/tmp9p8l3H/python2.7
Could not find platform independent libraries <prefix>
Could not find platform dependent libraries <exec_prefix>
Consider setting $PYTHONHOME to <prefix>[:<exec_prefix>]
ImportError: No module named site

(due to the venv I think)

I think that /usr/bin/python2.7 gets me PATH coherence for free which is why I did that. I'm happy to play around with PYTHONPATH or temp PATH setting.

shutil.copyfile(src, test_interpreter_binary)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shutil.copy2 would be a one-stop-shop: https://docs.python.org/2.7/library/shutil.html#shutil.copy2

chmod_plus_x(test_interpreter_binary)

with self._setup_cache(constraints=[]) as (cache, path):
# Setup cache for test interpreter distribution.
identity_str = str(PythonInterpreter.from_binary(test_interpreter_binary).identity)
cached_interpreter_dir = os.path.join(cache._cache_dir, identity_str)
safe_mkdir(cached_interpreter_dir)
cached_symlink = os.path.join(cached_interpreter_dir, 'python')
os.symlink(test_interpreter_binary, cached_symlink)

# Remove the test interpreter binary from filesystem and assert that the cache is purged.
os.remove(test_interpreter_binary)
self.assertEqual(os.path.exists(test_interpreter_binary), False)
self.assertEqual(os.path.exists(cached_interpreter_dir), True)
cache._interpreter_from_relpath(identity_str)
self.assertEqual(os.path.exists(cached_interpreter_dir), False)