Fix test-install by moving tests into module (allenai#1232)

fixes allenai#1228 Goals of this PR: 1. [x] 1. Prevent tests from being installed as a module in `python setup.py` . Currently, pip copies them to `site-packages`, which is definitely not intended behavior. 2. [x] 2. installation method-invariant way to load and run tests, especially those that depend on files outside of the module (e.g. `scripts` or `tutorials`). The issue is that files can be in different places depending on if allennlp is installed with `pip` or `setup.py install`. This is done by moving the tests within the module and using os.chdir to make the paths in the fixtures properly resolve.
gabrielStanovsky · May 25, 2018 · 96f5bb6 · 96f5bb6
1 parent 214c28f
commit 96f5bb6
Show file tree

Hide file tree

Showing 315 changed files with 546 additions and 419 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -56,7 +56,6 @@ RUN ./scripts/build_demo.py
 
 COPY scripts/ scripts/
 COPY allennlp/ allennlp/
-COPY tests/ tests/
 COPY pytest.ini pytest.ini
 COPY .pylintrc .pylintrc
 COPY tutorials/ tutorials/

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -3,7 +3,6 @@ include requirements.txt
 include bin/allennlp
 recursive-include allennlp *
 recursive-include scripts *
-recursive-include tests *
 recursive-include training_config *.json
 recursive-include tutorials *
 recursive-exclude * __pycache__
diff --git a/allennlp/commands/test_install.py b/allennlp/commands/test_install.py
@@ -21,9 +21,11 @@
 import argparse
 import logging
 import os
+import pathlib
 
 import pytest
 
+import allennlp
 from allennlp.commands.subcommand import Subcommand
 
 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
@@ -44,22 +46,20 @@ def add_subparser(self, name: str, parser: argparse._SubParsersAction) -> argpar
         return subparser
 
 
-def _get_project_root():
-    return os.path.abspath(
-            os.path.join(
-                    os.path.dirname(os.path.realpath(__file__)),
-                    os.pardir, os.pardir))
+def _get_module_root():
+    return pathlib.Path(allennlp.__file__).parent
 
 
 def _run_test(args: argparse.Namespace):
     initial_working_dir = os.getcwd()
-    project_root = _get_project_root()
-    logger.info("Changing directory to %s", project_root)
-    os.chdir(project_root)
-    test_dir = os.path.join(project_root, "tests")
+    module_root = _get_module_root()
+    logger.info("Changing directory to %s", module_root)
+    os.chdir(module_root)
+    test_dir = os.path.join(module_root, "tests")
     logger.info("Running tests at %s", test_dir)
     if args.run_all:
-        pytest.main([test_dir])
+        # TODO(nfliu): remove this when notebooks have been rewritten as markdown.
+        pytest.main([test_dir, '-k', 'not notebooks_test'])
     else:
         pytest.main([test_dir, '-k', 'not sniff_test and not notebooks_test'])
     # Change back to original working directory after running tests

diff --git a/allennlp/common/file_utils.py b/allennlp/common/file_utils.py
@@ -2,10 +2,11 @@
 Utilities for working with the local dataset cache.
 """
 
-from typing import Tuple
+from typing import Tuple, Union
 import os
 from hashlib import sha256
 import logging
+from pathlib import Path
 import shutil
 import tempfile
 from urllib.parse import urlparse
@@ -17,8 +18,8 @@
 
 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
 
-CACHE_ROOT = os.getenv('ALLENNLP_CACHE_ROOT', os.path.expanduser(os.path.join('~', '.allennlp')))
-DATASET_CACHE = os.path.join(CACHE_ROOT, "datasets")
+CACHE_ROOT = Path(os.getenv('ALLENNLP_CACHE_ROOT', Path.home() / '.allennlp'))
+DATASET_CACHE = str(CACHE_ROOT / "datasets")
 
 def url_to_filename(url: str, etag: str = None) -> str:
     """
@@ -60,7 +61,7 @@ def filename_to_url(filename: str, cache_dir: str = None) -> Tuple[str, str]:
 
     return url, etag
 
-def cached_path(url_or_filename: str, cache_dir: str = None) -> str:
+def cached_path(url_or_filename: Union[str, Path], cache_dir: str = None) -> str:
     """
     Given something that might be a URL (or might be a local path),
     determine which. If it's a URL, download the file and cache it, and
@@ -69,6 +70,8 @@ def cached_path(url_or_filename: str, cache_dir: str = None) -> str:
     """
     if cache_dir is None:
         cache_dir = DATASET_CACHE
+    if isinstance(url_or_filename, Path):
+        url_or_filename = str(url_or_filename)
 
     parsed = urlparse(url_or_filename)
 

diff --git a/allennlp/common/testing/model_test_case.py b/allennlp/common/testing/model_test_case.py
@@ -19,6 +19,10 @@ class ModelTestCase(AllenNlpTestCase):
     """
     def set_up_model(self, param_file, dataset_file):
         # pylint: disable=attribute-defined-outside-init
+        initial_working_dir = os.getcwd()
+        # Change directory to module root.
+        os.chdir(self.MODULE_ROOT)
+
         self.param_file = param_file
         params = Params.from_file(self.param_file)
 
@@ -40,12 +44,19 @@ def set_up_model(self, param_file, dataset_file):
         self.dataset = Batch(self.instances)
         self.dataset.index_instances(self.vocab)
 
+        # Change directory back to what it was initially
+        os.chdir(initial_working_dir)
+
     def ensure_model_can_train_save_and_load(self,
                                              param_file: str,
                                              tolerance: float = 1e-4,
                                              cuda_device: int = -1):
-        save_dir = os.path.join(self.TEST_DIR, "save_and_load_test")
-        archive_file = os.path.join(save_dir, "model.tar.gz")
+        initial_working_dir = os.getcwd()
+        # Change directory to module root.
+        os.chdir(self.MODULE_ROOT)
+
+        save_dir = self.TEST_DIR / "save_and_load_test"
+        archive_file = save_dir / "model.tar.gz"
         model = train_model_from_file(param_file, save_dir)
         loaded_model = load_archive(archive_file, cuda_device=cuda_device).model
         state_keys = model.state_dict().keys()
@@ -109,6 +120,9 @@ def ensure_model_can_train_save_and_load(self,
                                      name=key,
                                      tolerance=tolerance)
 
+        # Change directory back to what it was initially
+        os.chdir(initial_working_dir)
+
         return model, loaded_model
 
     def assert_fields_equal(self, field1, field2, name: str, tolerance: float = 1e-6) -> None:

diff --git a/allennlp/common/testing/test_case.py b/allennlp/common/testing/test_case.py
@@ -1,6 +1,7 @@
 # pylint: disable=invalid-name,protected-access
 import logging
 import os
+import pathlib
 import shutil
 from unittest import TestCase
 
@@ -13,6 +14,12 @@ class AllenNlpTestCase(TestCase):  # pylint: disable=too-many-public-methods
     more verbose AllenNLP logging and that creates and destroys a temp directory
     as a test fixture.
     """
+    PROJECT_ROOT = (pathlib.Path(__file__).parent / ".." / ".." / "..").resolve()  # pylint: disable=no-member
+    MODULE_ROOT = PROJECT_ROOT / "allennlp"
+    TOOLS_ROOT = MODULE_ROOT / "tools"
+    TESTS_ROOT = MODULE_ROOT / "tests"
+    FIXTURES_ROOT = TESTS_ROOT / "fixtures"
+
     def setUp(self):
         logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
                             level=logging.DEBUG)
@@ -23,7 +30,8 @@ def setUp(self):
         logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)
         log_pytorch_version_info()
 
-        self.TEST_DIR = "/tmp/allennlp_tests/"
+        self.TEST_DIR = pathlib.Path("/tmp/allennlp_tests/")
+
         os.makedirs(self.TEST_DIR, exist_ok=True)
 
     def tearDown(self):

diff --git a/allennlp/service/predictors/wikitables_parser.py b/allennlp/service/predictors/wikitables_parser.py
@@ -1,6 +1,8 @@
 import os
+import pathlib
 from subprocess import run
 from typing import Tuple
+import shutil
 
 from overrides import overrides
 
@@ -18,7 +20,9 @@
 DEFAULT_EXECUTOR_JAR = "https://s3-us-west-2.amazonaws.com/allennlp/misc/wikitables-executor-0.1.0.jar"
 ABBREVIATIONS_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/misc/wikitables-abbreviations.tsv"
 GROW_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/misc/wikitables-grow.grammar"
-SEMPRE_DIR = 'data/'
+SEMPRE_DIR = str(pathlib.Path('data/'))
+SEMPRE_ABBREVIATIONS_PATH = os.path.join(SEMPRE_DIR, "abbreviations.tsv")
+SEMPRE_GRAMMAR_PATH = os.path.join(SEMPRE_DIR, "grow.grammar")
 
 @Predictor.register('wikitables-parser')
 class WikiTablesParserPredictor(Predictor):
@@ -117,4 +121,11 @@ def _execute_logical_form_on_table(logical_form, table):
         denotations_file = os.path.join(SEMPRE_DIR, 'logical_forms_denotations.tsv')
         with open(denotations_file) as temp_file:
             line = temp_file.readline().split('\t')
-            return line[1] if len(line) > 1 else line[0]
+
+        # Clean up all the temp files generated from this function.
+        # Take care to not remove the auxiliary sempre files
+        os.remove(logical_form_filename)
+        shutil.rmtree(table_dir)
+        os.remove(denotations_file)
+        os.remove(test_data_filename)
+        return line[1] if len(line) > 1 else line[0]
diff --git a/tests/__init__.py → allennlp/tests/__init__.py b/tests/__init__.py → allennlp/tests/__init__.py
diff --git a/tests/commands/__init__.py → allennlp/tests/commands/__init__.py b/tests/commands/__init__.py → allennlp/tests/commands/__init__.py
diff --git a/tests/commands/dry_run_test.py → allennlp/tests/commands/dry_run_test.py b/tests/commands/dry_run_test.py → allennlp/tests/commands/dry_run_test.py
@@ -26,8 +26,8 @@ def setUp(self):
                         }
                 },
                 "dataset_reader": {"type": "sequence_tagging"},
-                "train_data_path": 'tests/fixtures/data/sequence_tagging.tsv',
-                "validation_data_path": 'tests/fixtures/data/sequence_tagging.tsv',
+                "train_data_path": self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv',
+                "validation_data_path": self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv',
                 "iterator": {"type": "basic", "batch_size": 2},
                 "trainer": {
                         "num_epochs": 2,
@@ -36,10 +36,10 @@ def setUp(self):
         })
 
     def test_dry_run_doesnt_overwrite_vocab(self):
-        vocab_path = os.path.join(self.TEST_DIR, 'pre-defined-vocab')
+        vocab_path = self.TEST_DIR / 'pre-defined-vocab'
         os.mkdir(vocab_path)
         # Put something in the vocab directory
-        with open(os.path.join(vocab_path, "test.txt"), "a+") as open_file:
+        with open(vocab_path / "test.txt", "a+") as open_file:
             open_file.write("test")
 
         self.params['vocabulary'] = {}
@@ -51,5 +51,5 @@ def test_dry_run_doesnt_overwrite_vocab(self):
         predefined_vocab_files = os.listdir(vocab_path)
         assert set(predefined_vocab_files) == {'test.txt'}
         # But we should have written the created vocab to serialisation_dir/vocab:
-        new_vocab_files = os.listdir(os.path.join(self.TEST_DIR, 'vocabulary'))
+        new_vocab_files = os.listdir(self.TEST_DIR / 'vocabulary')
         assert set(new_vocab_files) == {'tokens.txt', 'non_padded_namespaces.txt', 'labels.txt'}