Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
issue warning instead of failing when lock can't be acquired on a res…
Browse files Browse the repository at this point in the history
…ource that exists in a read-only file system (#4867)

* allow FileLock to work on read-only file-system

* catch OSError too

* narrow-down the OSError

* fix
  • Loading branch information
epwalsh authored Jan 5, 2021
1 parent ec197c3 commit 1d21c75
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 6 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- Added a `FileLock` class to `common.file_utils`. This is just like the `FileLock` from the `filelock` library, except that
it adds an optional flag `read_only_ok: bool`, which when set to `True` changes the behavior so that a warning will be emitted
instead of an exception when lacking write permissions on an existing file lock.
This makes it possible to use the `FileLock` class on a read-only file system.
- Added a new learning rate scheduler: `CombinedLearningRateScheduler`. This can be used to combine different LR schedulers, using one after the other.
- Moving `ModelCard` and `TaskCard` abstractions into the main repository.

Expand All @@ -21,6 +25,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed

- Fixed typo with `LabelField` string representation: removed trailing apostrophe.
- `Vocabulary.from_files` and `cached_path` will issue a warning, instead of failing, when a lock on an existing resource
can't be acquired because the file system is read-only.


## [v1.3.0](https://github.com/allenai/allennlp/releases/tag/v1.3.0) - 2020-12-15
Expand Down
46 changes: 44 additions & 2 deletions allennlp/common/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,13 @@
import tarfile
import shutil
import time
import warnings

import boto3
import botocore
from botocore.exceptions import ClientError, EndpointConnectionError
from filelock import FileLock
from filelock import FileLock as _FileLock
from overrides import overrides
import requests
from requests.adapters import HTTPAdapter
from requests.exceptions import ConnectionError
Expand All @@ -63,6 +65,46 @@
)


class FileLock(_FileLock):
"""
This is just a subclass of the `FileLock` class from the `filelock` library, except that
it adds an additional argument to the `__init__` method: `read_only_ok`.
By default this flag is `False`, which an exception will be thrown when a lock
can't be acquired due to lack of write permissions.
But if this flag is set to `True`, a warning will be emitted instead of an error when
the lock already exists but the lock can't be acquired because write access is blocked.
"""

def __init__(
self, lock_file: Union[str, PathLike], timeout=-1, read_only_ok: bool = False
) -> None:
super().__init__(str(lock_file), timeout=timeout)
self._read_only_ok = read_only_ok

@overrides
def acquire(self, timeout=None, poll_interval=0.05):
try:
super().acquire(timeout=timeout, poll_intervall=poll_interval)
except OSError as err:
# OSError could be a lot of different things, but what we're looking
# for in particular are permission errors, such as:
# - errno 1 - EPERM - "Operation not permitted"
# - errno 13 - EACCES - "Permission denied"
# - errno 30 - EROFS - "Read-only file system"
if err.errno not in (1, 13, 30):
raise

if os.path.isfile(self._lock_file) and self._read_only_ok:
warnings.warn(
f"Lacking permissions required to obtain lock '{self._lock_file}'. "
"Race conditions are possible if other processes are writing to the same resource.",
UserWarning,
)
else:
raise


def _resource_to_filename(resource: str, etag: str = None) -> str:
"""
Convert a `resource` into a hashed filename in a repeatable way.
Expand Down Expand Up @@ -583,7 +625,7 @@ def get_from_cache(url: str, cache_dir: Union[str, Path] = None) -> str:
# on the call to `lock.acquire()` until the process currently holding the lock
# releases it.
logger.debug("waiting to acquire lock on %s", cache_path)
with FileLock(cache_path + ".lock"):
with FileLock(cache_path + ".lock", read_only_ok=True):
if os.path.exists(cache_path):
logger.info("cache of %s is up-to-date", url)
else:
Expand Down
6 changes: 2 additions & 4 deletions allennlp/data/vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,8 @@
from collections import defaultdict
from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Union, TYPE_CHECKING

from filelock import FileLock

from allennlp.common import Registrable
from allennlp.common.file_utils import cached_path
from allennlp.common.file_utils import cached_path, FileLock
from allennlp.common.checks import ConfigurationError
from allennlp.common.tqdm import Tqdm
from allennlp.common.util import namespace_match
Expand Down Expand Up @@ -338,7 +336,7 @@ def from_files(

# We use a lock file to avoid race conditions where multiple processes
# might be reading/writing from/to the same vocab files at once.
with FileLock(os.path.join(directory, ".lock")):
with FileLock(os.path.join(directory, ".lock"), read_only_ok=True):
with codecs.open(
os.path.join(directory, NAMESPACE_PADDING_FILE), "r", "utf-8"
) as namespace_file:
Expand Down
38 changes: 38 additions & 0 deletions tests/common/file_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import time
import shutil

from filelock import Timeout
import pytest
import responses
from requests.exceptions import ConnectionError

from allennlp.common import file_utils
from allennlp.common.file_utils import (
FileLock,
_resource_to_filename,
filename_to_url,
get_from_cache,
Expand Down Expand Up @@ -59,6 +61,42 @@ def head_callback(_):
responses.add_callback(responses.HEAD, url, callback=head_callback)


class TestFileLock(AllenNlpTestCase):
def setup_method(self):
super().setup_method()

# Set up a regular lock and a read-only lock.
open(self.TEST_DIR / "lock", "a").close()
open(self.TEST_DIR / "read_only_lock", "a").close()
os.chmod(self.TEST_DIR / "read_only_lock", 0o555)

# Also set up a read-only directory.
os.mkdir(self.TEST_DIR / "read_only_dir", 0o555)

def test_locking(self):
with FileLock(self.TEST_DIR / "lock"):
# Trying to acquire the lock again should fail.
with pytest.raises(Timeout):
with FileLock(self.TEST_DIR / "lock", timeout=0.1):
pass

# Trying to acquire a lock when lacking write permissions on the file should fail.
with pytest.raises(PermissionError):
with FileLock(self.TEST_DIR / "read_only_lock"):
pass

# But this should only issue a warning if we set the `read_only_ok` flag to `True`.
with pytest.warns(UserWarning, match="Lacking permissions"):
with FileLock(self.TEST_DIR / "read_only_lock", read_only_ok=True):
pass

# However this should always fail when we lack write permissions and the file lock
# doesn't exist yet.
with pytest.raises(PermissionError):
with FileLock(self.TEST_DIR / "read_only_dir" / "lock", read_only_ok=True):
pass


class TestFileUtils(AllenNlpTestCase):
def setup_method(self):
super().setup_method()
Expand Down

0 comments on commit 1d21c75

Please sign in to comment.