From 874ce34973165dd044f7c08d17ccfccd6be2badc Mon Sep 17 00:00:00 2001 From: Chris Livingston Date: Mon, 11 Feb 2019 13:27:50 -0500 Subject: [PATCH] Validate and maybe prune interpreter cache run over run (#7225) * Purge stale interpreters from Interpreter Cache --- .../pants/backend/python/interpreter_cache.py | 37 +++++++++++++------ .../python/tasks/select_interpreter.py | 12 ++++++ .../backend/python/test_interpreter_cache.py | 36 +++++++++++++++++- 3 files changed, 72 insertions(+), 13 deletions(-) diff --git a/src/python/pants/backend/python/interpreter_cache.py b/src/python/pants/backend/python/interpreter_cache.py index e0126f74e13..95908ddbd75 100644 --- a/src/python/pants/backend/python/interpreter_cache.py +++ b/src/python/pants/backend/python/interpreter_cache.py @@ -115,9 +115,13 @@ def select_interpreter_for_targets(self, targets): # Return the lowest compatible interpreter. return min(allowed_interpreters) - def _interpreter_from_path(self, path, filters=()): + def _interpreter_from_relpath(self, path, filters=()): + path = os.path.join(self._cache_dir, path) try: executable = os.readlink(os.path.join(path, 'python')) + if not os.path.exists(executable): + self._purge_interpreter(path) + return None except OSError: return None interpreter = PythonInterpreter.from_binary(executable, include_site_extras=False) @@ -125,7 +129,8 @@ def _interpreter_from_path(self, path, filters=()): return self._resolve(interpreter) return None - def _setup_interpreter(self, interpreter, cache_target_path): + def _setup_interpreter(self, interpreter, identity_str): + cache_target_path = os.path.join(self._cache_dir, identity_str) with safe_concurrent_creation(cache_target_path) as safe_path: os.mkdir(safe_path) # Parent will already have been created by safe_concurrent_creation. os.symlink(interpreter.binary, os.path.join(safe_path, 'python')) @@ -134,22 +139,19 @@ def _setup_interpreter(self, interpreter, cache_target_path): def _setup_cached(self, filters=()): """Find all currently-cached interpreters.""" for interpreter_dir in os.listdir(self._cache_dir): - path = os.path.join(self._cache_dir, interpreter_dir) - if os.path.isdir(path): - pi = self._interpreter_from_path(path, filters=filters) - if pi: - logger.debug('Detected interpreter {}: {}'.format(pi.binary, str(pi.identity))) - yield pi + pi = self._interpreter_from_relpath(interpreter_dir, filters=filters) + if pi: + logger.debug('Detected interpreter {}: {}'.format(pi.binary, str(pi.identity))) + yield pi def _setup_paths(self, paths, filters=()): """Find interpreters under paths, and cache them.""" for interpreter in self._matching(PythonInterpreter.all(paths), filters=filters): identity_str = str(interpreter.identity) - cache_path = os.path.join(self._cache_dir, identity_str) - pi = self._interpreter_from_path(cache_path, filters=filters) + pi = self._interpreter_from_relpath(identity_str, filters=filters) if pi is None: - self._setup_interpreter(interpreter, cache_path) - pi = self._interpreter_from_path(cache_path, filters=filters) + self._setup_interpreter(interpreter, identity_str) + pi = self._interpreter_from_relpath(identity_str, filters=filters) if pi: yield pi @@ -251,3 +253,14 @@ def _resolve_and_link(self, interpreter, requirement, target_link): _safe_link(target_location, target_link) logger.debug(' installed {}'.format(target_location)) return Package.from_href(target_location) + + def _purge_interpreter(self, interpreter_dir): + try: + logger.info('Detected stale interpreter `{}` in the interpreter cache, purging.' + .format(interpreter_dir)) + shutil.rmtree(interpreter_dir, ignore_errors=True) + except Exception as e: + logger.warn( + 'Caught exception {!r} during interpreter purge. Please run `./pants clean-all`!' + .format(e) + ) diff --git a/src/python/pants/backend/python/tasks/select_interpreter.py b/src/python/pants/backend/python/tasks/select_interpreter.py index 8c0905a90e2..24a06809f47 100644 --- a/src/python/pants/backend/python/tasks/select_interpreter.py +++ b/src/python/pants/backend/python/tasks/select_interpreter.py @@ -78,6 +78,9 @@ def execute(self): interpreter_path_file = self._interpreter_path_file(target_set_id) if not os.path.exists(interpreter_path_file): self._create_interpreter_path_file(interpreter_path_file, python_tgts) + else: + if self._detect_and_purge_invalid_interpreter(interpreter_path_file): + self._create_interpreter_path_file(interpreter_path_file, python_tgts) interpreter = self._get_interpreter(interpreter_path_file) self.context.products.register_data(PythonInterpreter, interpreter) @@ -95,6 +98,15 @@ def _create_interpreter_path_file(self, interpreter_path_file, targets): def _interpreter_path_file(self, target_set_id): return os.path.join(self.workdir, target_set_id, 'interpreter.info') + def _detect_and_purge_invalid_interpreter(self, interpreter_path_file): + interpreter = self._get_interpreter(interpreter_path_file) + if not os.path.exists(interpreter.binary): + self.context.log.info('Stale interpreter reference detected: {}, removing reference and ' + 'selecting a new interpreter.'.format(interpreter.binary)) + os.remove(interpreter_path_file) + return True + return False + @staticmethod def _get_interpreter(interpreter_path_file): with open(interpreter_path_file, 'r') as infile: diff --git a/tests/python/pants_test/backend/python/test_interpreter_cache.py b/tests/python/pants_test/backend/python/test_interpreter_cache.py index 86196d710ed..84cf353cf2d 100644 --- a/tests/python/pants_test/backend/python/test_interpreter_cache.py +++ b/tests/python/pants_test/backend/python/test_interpreter_cache.py @@ -5,6 +5,8 @@ from __future__ import absolute_import, division, print_function, unicode_literals import os +import shutil +import sys from builtins import str from contextlib import contextmanager @@ -15,7 +17,8 @@ from pants.backend.python.interpreter_cache import PythonInterpreter, PythonInterpreterCache from pants.subsystem.subsystem import Subsystem -from pants.util.contextutil import temporary_dir +from pants.util.contextutil import environment_as, temporary_dir +from pants.util.dirutil import safe_mkdir from pants_test.backend.python.interpreter_selection_utils import (PY_27, PY_36, python_interpreter_path, skip_unless_python27_and_python36) @@ -171,3 +174,34 @@ def test_setup_cached_warm(self): def test_setup_cached_cold(self): with self._setup_cache() as (cache, _): self.assertEqual([], list(cache._setup_cached())) + + def test_interpreter_from_relpath_purges_stale_interpreter(self): + """ + Simulates a stale interpreter cache and tests that _interpreter_from_relpath + properly detects it and removes the stale dist directory. + + See https://github.com/pantsbuild/pants/issues/3416 for more info. + """ + with temporary_dir() as temp_dir: + # Setup a interpreter distribution that we can safely mutate. + test_interpreter_binary = os.path.join(temp_dir, 'python2.7') + src = os.path.realpath(sys.executable) + sys_exe_dist = os.path.dirname(os.path.dirname(src)) + shutil.copy2(src, test_interpreter_binary) + with environment_as( + PYTHONPATH='{}'.format(os.path.join(sys_exe_dist, 'lib/python2.7')) + ): + with self._setup_cache(constraints=[]) as (cache, path): + # Setup cache for test interpreter distribution. + identity_str = str(PythonInterpreter.from_binary(test_interpreter_binary).identity) + cached_interpreter_dir = os.path.join(cache._cache_dir, identity_str) + safe_mkdir(cached_interpreter_dir) + cached_symlink = os.path.join(cached_interpreter_dir, 'python') + os.symlink(test_interpreter_binary, cached_symlink) + + # Remove the test interpreter binary from filesystem and assert that the cache is purged. + os.remove(test_interpreter_binary) + self.assertEqual(os.path.exists(test_interpreter_binary), False) + self.assertEqual(os.path.exists(cached_interpreter_dir), True) + cache._interpreter_from_relpath(identity_str) + self.assertEqual(os.path.exists(cached_interpreter_dir), False)