Skip to content

Commit

Permalink
Significantly speed up file handling error paths (python#17920)
Browse files Browse the repository at this point in the history
This can have a huge overall impact on mypy performance when search paths are long
  • Loading branch information
hauntsaninja authored Oct 14, 2024
1 parent ca97d96 commit c32d11e
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 49 deletions.
17 changes: 8 additions & 9 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,8 +736,8 @@ def maybe_swap_for_shadow_path(self, path: str) -> str:
shadow_file = self.shadow_equivalence_map.get(path)
return shadow_file if shadow_file else path

def get_stat(self, path: str) -> os.stat_result:
return self.fscache.stat(self.maybe_swap_for_shadow_path(path))
def get_stat(self, path: str) -> os.stat_result | None:
return self.fscache.stat_or_none(self.maybe_swap_for_shadow_path(path))

def getmtime(self, path: str) -> int:
"""Return a file's mtime; but 0 in bazel mode.
Expand Down Expand Up @@ -1394,9 +1394,9 @@ def validate_meta(
if bazel:
# Normalize path under bazel to make sure it isn't absolute
path = normpath(path, manager.options)
try:
st = manager.get_stat(path)
except OSError:

st = manager.get_stat(path)
if st is None:
return None
if not stat.S_ISDIR(st.st_mode) and not stat.S_ISREG(st.st_mode):
manager.log(f"Metadata abandoned for {id}: file or directory {path} does not exist")
Expand Down Expand Up @@ -1572,10 +1572,9 @@ def write_cache(
plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=False))

# Obtain and set up metadata
try:
st = manager.get_stat(path)
except OSError as err:
manager.log(f"Cannot get stat for {path}: {err}")
st = manager.get_stat(path)
if st is None:
manager.log(f"Cannot get stat for {path}")
# Remove apparently-invalid cache files.
# (This is purely an optimization.)
for filename in [data_json, meta_json]:
Expand Down
59 changes: 26 additions & 33 deletions mypy/fscache.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ def set_package_root(self, package_root: list[str]) -> None:

def flush(self) -> None:
"""Start another transaction and empty all caches."""
self.stat_cache: dict[str, os.stat_result] = {}
self.stat_error_cache: dict[str, OSError] = {}
self.stat_or_none_cache: dict[str, os.stat_result | None] = {}

self.listdir_cache: dict[str, list[str]] = {}
self.listdir_error_cache: dict[str, OSError] = {}
self.isfile_case_cache: dict[str, bool] = {}
Expand All @@ -62,24 +62,21 @@ def flush(self) -> None:
self.hash_cache: dict[str, str] = {}
self.fake_package_cache: set[str] = set()

def stat(self, path: str) -> os.stat_result:
if path in self.stat_cache:
return self.stat_cache[path]
if path in self.stat_error_cache:
raise copy_os_error(self.stat_error_cache[path])
def stat_or_none(self, path: str) -> os.stat_result | None:
if path in self.stat_or_none_cache:
return self.stat_or_none_cache[path]

st = None
try:
st = os.stat(path)
except OSError as err:
except OSError:
if self.init_under_package_root(path):
try:
return self._fake_init(path)
st = self._fake_init(path)
except OSError:
pass
# Take a copy to get rid of associated traceback and frame objects.
# Just assigning to __traceback__ doesn't free them.
self.stat_error_cache[path] = copy_os_error(err)
raise err
self.stat_cache[path] = st

self.stat_or_none_cache[path] = st
return st

def init_under_package_root(self, path: str) -> bool:
Expand Down Expand Up @@ -112,9 +109,9 @@ def init_under_package_root(self, path: str) -> bool:
if not os.path.basename(dirname).isidentifier():
# Can't put an __init__.py in a place that's not an identifier
return False
try:
st = self.stat(dirname)
except OSError:

st = self.stat_or_none(dirname)
if st is None:
return False
else:
if not stat.S_ISDIR(st.st_mode):
Expand Down Expand Up @@ -145,15 +142,14 @@ def _fake_init(self, path: str) -> os.stat_result:
assert basename == "__init__.py", path
assert not os.path.exists(path), path # Not cached!
dirname = os.path.normpath(dirname)
st = self.stat(dirname) # May raise OSError
st = os.stat(dirname) # May raise OSError
# Get stat result as a list so we can modify it.
seq: list[float] = list(st)
seq[stat.ST_MODE] = stat.S_IFREG | 0o444
seq[stat.ST_INO] = 1
seq[stat.ST_NLINK] = 1
seq[stat.ST_SIZE] = 0
st = os.stat_result(seq)
self.stat_cache[path] = st
# Make listdir() and read() also pretend this file exists.
self.fake_package_cache.add(dirname)
return st
Expand Down Expand Up @@ -181,9 +177,8 @@ def listdir(self, path: str) -> list[str]:
return results

def isfile(self, path: str) -> bool:
try:
st = self.stat(path)
except OSError:
st = self.stat_or_none(path)
if st is None:
return False
return stat.S_ISREG(st.st_mode)

Expand Down Expand Up @@ -248,18 +243,14 @@ def exists_case(self, path: str, prefix: str) -> bool:
return res

def isdir(self, path: str) -> bool:
try:
st = self.stat(path)
except OSError:
st = self.stat_or_none(path)
if st is None:
return False
return stat.S_ISDIR(st.st_mode)

def exists(self, path: str) -> bool:
try:
self.stat(path)
except FileNotFoundError:
return False
return True
st = self.stat_or_none(path)
return st is not None

def read(self, path: str) -> bytes:
if path in self.read_cache:
Expand All @@ -269,7 +260,7 @@ def read(self, path: str) -> bytes:

# Need to stat first so that the contents of file are from no
# earlier instant than the mtime reported by self.stat().
self.stat(path)
self.stat_or_none(path)

dirname, basename = os.path.split(path)
dirname = os.path.normpath(dirname)
Expand All @@ -294,8 +285,10 @@ def hash_digest(self, path: str) -> str:
return self.hash_cache[path]

def samefile(self, f1: str, f2: str) -> bool:
s1 = self.stat(f1)
s2 = self.stat(f2)
s1 = self.stat_or_none(f1)
s2 = self.stat_or_none(f2)
if s1 is None or s2 is None:
return False
return os.path.samestat(s1, s2)


Expand Down
13 changes: 6 additions & 7 deletions mypy/fswatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import os
from typing import AbstractSet, Iterable, NamedTuple

from mypy.fscache import FileSystemCache
Expand Down Expand Up @@ -56,18 +57,16 @@ def remove_watched_paths(self, paths: Iterable[str]) -> None:
del self._file_data[path]
self._paths -= set(paths)

def _update(self, path: str) -> None:
st = self.fs.stat(path)
def _update(self, path: str, st: os.stat_result) -> None:
hash_digest = self.fs.hash_digest(path)
self._file_data[path] = FileData(st.st_mtime, st.st_size, hash_digest)

def _find_changed(self, paths: Iterable[str]) -> AbstractSet[str]:
changed = set()
for path in paths:
old = self._file_data[path]
try:
st = self.fs.stat(path)
except FileNotFoundError:
st = self.fs.stat_or_none(path)
if st is None:
if old is not None:
# File was deleted.
changed.add(path)
Expand All @@ -76,13 +75,13 @@ def _find_changed(self, paths: Iterable[str]) -> AbstractSet[str]:
if old is None:
# File is new.
changed.add(path)
self._update(path)
self._update(path, st)
# Round mtimes down, to match the mtimes we write to meta files
elif st.st_size != old.st_size or int(st.st_mtime) != int(old.st_mtime):
# Only look for changes if size or mtime has changed as an
# optimization, since calculating hash is expensive.
new_hash = self.fs.hash_digest(path)
self._update(path)
self._update(path, st)
if st.st_size != old.st_size or new_hash != old.hash:
# Changed file.
changed.add(path)
Expand Down

0 comments on commit c32d11e

Please sign in to comment.