Skip to content

Commit

Permalink
Fix test-install by moving tests into module (allenai#1232)
Browse files Browse the repository at this point in the history
fixes allenai#1228

Goals of this PR:
1. [x] 1. Prevent tests from being installed as a module in `python setup.py` . Currently, pip copies them to `site-packages`, which is definitely not intended behavior.

2. [x] 2. installation method-invariant way to load and run tests, especially those that depend on files outside of the module (e.g. `scripts` or `tutorials`). The issue is that files can be in different places depending on if allennlp is installed with `pip` or `setup.py install`. This is done by moving the tests within the module and using os.chdir to make the paths in the fixtures properly resolve.
  • Loading branch information
nelson-liu authored May 25, 2018
1 parent 214c28f commit 96f5bb6
Show file tree
Hide file tree
Showing 315 changed files with 546 additions and 419 deletions.
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ RUN ./scripts/build_demo.py

COPY scripts/ scripts/
COPY allennlp/ allennlp/
COPY tests/ tests/
COPY pytest.ini pytest.ini
COPY .pylintrc .pylintrc
COPY tutorials/ tutorials/
Expand Down
1 change: 0 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ include requirements.txt
include bin/allennlp
recursive-include allennlp *
recursive-include scripts *
recursive-include tests *
recursive-include training_config *.json
recursive-include tutorials *
recursive-exclude * __pycache__
20 changes: 10 additions & 10 deletions allennlp/commands/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
import argparse
import logging
import os
import pathlib

import pytest

import allennlp
from allennlp.commands.subcommand import Subcommand

logger = logging.getLogger(__name__) # pylint: disable=invalid-name
Expand All @@ -44,22 +46,20 @@ def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argpar
return subparser


def _get_project_root():
return os.path.abspath(
os.path.join(
os.path.dirname(os.path.realpath(__file__)),
os.pardir, os.pardir))
def _get_module_root():
return pathlib.Path(allennlp.__file__).parent


def _run_test(args: argparse.Namespace):
initial_working_dir = os.getcwd()
project_root = _get_project_root()
logger.info("Changing directory to %s", project_root)
os.chdir(project_root)
test_dir = os.path.join(project_root, "tests")
module_root = _get_module_root()
logger.info("Changing directory to %s", module_root)
os.chdir(module_root)
test_dir = os.path.join(module_root, "tests")
logger.info("Running tests at %s", test_dir)
if args.run_all:
pytest.main([test_dir])
# TODO(nfliu): remove this when notebooks have been rewritten as markdown.
pytest.main([test_dir, '-k', 'not notebooks_test'])
else:
pytest.main([test_dir, '-k', 'not sniff_test and not notebooks_test'])
# Change back to original working directory after running tests
Expand Down
11 changes: 7 additions & 4 deletions allennlp/common/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
Utilities for working with the local dataset cache.
"""

from typing import Tuple
from typing import Tuple, Union
import os
from hashlib import sha256
import logging
from pathlib import Path
import shutil
import tempfile
from urllib.parse import urlparse
Expand All @@ -17,8 +18,8 @@

logger = logging.getLogger(__name__) # pylint: disable=invalid-name

CACHE_ROOT = os.getenv('ALLENNLP_CACHE_ROOT', os.path.expanduser(os.path.join('~', '.allennlp')))
DATASET_CACHE = os.path.join(CACHE_ROOT, "datasets")
CACHE_ROOT = Path(os.getenv('ALLENNLP_CACHE_ROOT', Path.home() / '.allennlp'))
DATASET_CACHE = str(CACHE_ROOT / "datasets")

def url_to_filename(url: str, etag: str = None) -> str:
"""
Expand Down Expand Up @@ -60,7 +61,7 @@ def filename_to_url(filename: str, cache_dir: str = None) -> Tuple[str, str]:

return url, etag

def cached_path(url_or_filename: str, cache_dir: str = None) -> str:
def cached_path(url_or_filename: Union[str, Path], cache_dir: str = None) -> str:
"""
Given something that might be a URL (or might be a local path),
determine which. If it's a URL, download the file and cache it, and
Expand All @@ -69,6 +70,8 @@ def cached_path(url_or_filename: str, cache_dir: str = None) -> str:
"""
if cache_dir is None:
cache_dir = DATASET_CACHE
if isinstance(url_or_filename, Path):
url_or_filename = str(url_or_filename)

parsed = urlparse(url_or_filename)

Expand Down
18 changes: 16 additions & 2 deletions allennlp/common/testing/model_test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ class ModelTestCase(AllenNlpTestCase):
"""
def set_up_model(self, param_file, dataset_file):
# pylint: disable=attribute-defined-outside-init
initial_working_dir = os.getcwd()
# Change directory to module root.
os.chdir(self.MODULE_ROOT)

self.param_file = param_file
params = Params.from_file(self.param_file)

Expand All @@ -40,12 +44,19 @@ def set_up_model(self, param_file, dataset_file):
self.dataset = Batch(self.instances)
self.dataset.index_instances(self.vocab)

# Change directory back to what it was initially
os.chdir(initial_working_dir)

def ensure_model_can_train_save_and_load(self,
param_file: str,
tolerance: float = 1e-4,
cuda_device: int = -1):
save_dir = os.path.join(self.TEST_DIR, "save_and_load_test")
archive_file = os.path.join(save_dir, "model.tar.gz")
initial_working_dir = os.getcwd()
# Change directory to module root.
os.chdir(self.MODULE_ROOT)

save_dir = self.TEST_DIR / "save_and_load_test"
archive_file = save_dir / "model.tar.gz"
model = train_model_from_file(param_file, save_dir)
loaded_model = load_archive(archive_file, cuda_device=cuda_device).model
state_keys = model.state_dict().keys()
Expand Down Expand Up @@ -109,6 +120,9 @@ def ensure_model_can_train_save_and_load(self,
name=key,
tolerance=tolerance)

# Change directory back to what it was initially
os.chdir(initial_working_dir)

return model, loaded_model

def assert_fields_equal(self, field1, field2, name: str, tolerance: float = 1e-6) -> None:
Expand Down
10 changes: 9 additions & 1 deletion allennlp/common/testing/test_case.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pylint: disable=invalid-name,protected-access
import logging
import os
import pathlib
import shutil
from unittest import TestCase

Expand All @@ -13,6 +14,12 @@ class AllenNlpTestCase(TestCase): # pylint: disable=too-many-public-methods
more verbose AllenNLP logging and that creates and destroys a temp directory
as a test fixture.
"""
PROJECT_ROOT = (pathlib.Path(__file__).parent / ".." / ".." / "..").resolve() # pylint: disable=no-member
MODULE_ROOT = PROJECT_ROOT / "allennlp"
TOOLS_ROOT = MODULE_ROOT / "tools"
TESTS_ROOT = MODULE_ROOT / "tests"
FIXTURES_ROOT = TESTS_ROOT / "fixtures"

def setUp(self):
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
level=logging.DEBUG)
Expand All @@ -23,7 +30,8 @@ def setUp(self):
logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)
log_pytorch_version_info()

self.TEST_DIR = "/tmp/allennlp_tests/"
self.TEST_DIR = pathlib.Path("/tmp/allennlp_tests/")

os.makedirs(self.TEST_DIR, exist_ok=True)

def tearDown(self):
Expand Down
15 changes: 13 additions & 2 deletions allennlp/service/predictors/wikitables_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import pathlib
from subprocess import run
from typing import Tuple
import shutil

from overrides import overrides

Expand All @@ -18,7 +20,9 @@
DEFAULT_EXECUTOR_JAR = "https://s3-us-west-2.amazonaws.com/allennlp/misc/wikitables-executor-0.1.0.jar"
ABBREVIATIONS_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/misc/wikitables-abbreviations.tsv"
GROW_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/misc/wikitables-grow.grammar"
SEMPRE_DIR = 'data/'
SEMPRE_DIR = str(pathlib.Path('data/'))
SEMPRE_ABBREVIATIONS_PATH = os.path.join(SEMPRE_DIR, "abbreviations.tsv")
SEMPRE_GRAMMAR_PATH = os.path.join(SEMPRE_DIR, "grow.grammar")

@Predictor.register('wikitables-parser')
class WikiTablesParserPredictor(Predictor):
Expand Down Expand Up @@ -117,4 +121,11 @@ def _execute_logical_form_on_table(logical_form, table):
denotations_file = os.path.join(SEMPRE_DIR, 'logical_forms_denotations.tsv')
with open(denotations_file) as temp_file:
line = temp_file.readline().split('\t')
return line[1] if len(line) > 1 else line[0]

# Clean up all the temp files generated from this function.
# Take care to not remove the auxiliary sempre files
os.remove(logical_form_filename)
shutil.rmtree(table_dir)
os.remove(denotations_file)
os.remove(test_data_filename)
return line[1] if len(line) > 1 else line[0]
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def setUp(self):
}
},
"dataset_reader": {"type": "sequence_tagging"},
"train_data_path": 'tests/fixtures/data/sequence_tagging.tsv',
"validation_data_path": 'tests/fixtures/data/sequence_tagging.tsv',
"train_data_path": self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv',
"validation_data_path": self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv',
"iterator": {"type": "basic", "batch_size": 2},
"trainer": {
"num_epochs": 2,
Expand All @@ -36,10 +36,10 @@ def setUp(self):
})

def test_dry_run_doesnt_overwrite_vocab(self):
vocab_path = os.path.join(self.TEST_DIR, 'pre-defined-vocab')
vocab_path = self.TEST_DIR / 'pre-defined-vocab'
os.mkdir(vocab_path)
# Put something in the vocab directory
with open(os.path.join(vocab_path, "test.txt"), "a+") as open_file:
with open(vocab_path / "test.txt", "a+") as open_file:
open_file.write("test")

self.params['vocabulary'] = {}
Expand All @@ -51,5 +51,5 @@ def test_dry_run_doesnt_overwrite_vocab(self):
predefined_vocab_files = os.listdir(vocab_path)
assert set(predefined_vocab_files) == {'test.txt'}
# But we should have written the created vocab to serialisation_dir/vocab:
new_vocab_files = os.listdir(os.path.join(self.TEST_DIR, 'vocabulary'))
new_vocab_files = os.listdir(self.TEST_DIR / 'vocabulary')
assert set(new_vocab_files) == {'tokens.txt', 'non_padded_namespaces.txt', 'labels.txt'}
Loading

0 comments on commit 96f5bb6

Please sign in to comment.