allenai · DeNeutoy · May 31, 2018 · May 30, 2018 · May 30, 2018 · May 30, 2018
diff --git a/allennlp/commands/serve.py b/allennlp/commands/serve.py
@@ -34,7 +34,7 @@
                 'machine-comprehension'
         ),
         'semantic-role-labeling': DemoModel(
-                'https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.02.27.tar.gz', # pylint: disable=line-too-long
+                'https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz', # pylint: disable=line-too-long
                 'semantic-role-labeling'
         ),
         'textual-entailment': DemoModel(

diff --git a/allennlp/tests/models/sniff_test.py b/allennlp/tests/models/sniff_test.py
@@ -48,11 +48,11 @@ def test_semantic_role_labeling(self):
                  "description": "If you liked the music we [V: were] playing last night , you will absolutely love what we 're playing tomorrow !",
                  "tags": ["O", "O", "O", "O", "O", "O", "B-V", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"]},
                 {"verb": "playing",
-                 "description": "If you liked [ARG1: the music] [ARG0: we] were [V: playing] [ARGM-TMP: last night] , you will absolutely love what we 're playing tomorrow !",
-                 "tags": ["O", "O", "O", "B-ARG1", "I-ARG1", "B-ARG0", "O", "B-V", "B-ARGM-TMP", "I-ARGM-TMP", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"]},
+                 "description": "If you liked [ARG1: the music] [ARG0: we] were [V: playing] [ARGM-TMP: last] night , you will absolutely love what we 're playing tomorrow !",
+                 "tags": ["O", "O", "O", "B-ARG1", "I-ARG1", "B-ARG0", "O", "B-V", "B-ARGM-TMP", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O"]},
                 {"verb": "will",
-                 "description": "[ARGM-ADV: If you liked the music we were playing last night] , [ARG0: you] [V: will] [ARG1: absolutely love what we 're playing tomorrow] !",
-                 "tags": ["B-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "O", "B-ARG0", "B-V", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O"]},
+                 "description": "If you liked the music we were playing last night , you [V: will] absolutely love what we 're playing tomorrow !",
+                 "tags": ["O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-V", "O", "O", "O", "O", "O", "O", "O", "O"]},
                 {"verb": "love",
                  "description": "[ARGM-ADV: If you liked the music we were playing last night] , [ARG0: you] [ARGM-MOD: will] [ARGM-ADV: absolutely] [V: love] [ARG1: what we 're playing tomorrow] !",
                  "tags": ["B-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "I-ARGM-ADV", "O", "B-ARG0", "B-ARGM-MOD", "B-ARGM-ADV", "B-V", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O"]},

diff --git a/scripts/write_srl_predictions_to_conll_format.py b/scripts/write_srl_predictions_to_conll_format.py
@@ -1,73 +1,102 @@
 import os
 import sys
 
-import torch
-
 sys.path.insert(0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir))))
 import argparse
-from allennlp.common import Params
+
+import torch
+
 from allennlp.common.tqdm import Tqdm
+from allennlp.common import Params
+from allennlp.models.archival import load_archive
 from allennlp.data.iterators import BasicIterator
 from allennlp.data import DatasetReader
 from allennlp.models import Model
 from allennlp.models.semantic_role_labeler import write_to_conll_eval_file
+from allennlp.modules.elmo import Elmo
 
-
-def main(serialization_directory, device):
+def main(serialization_directory: int,
+         device: int,
+         data: str,
+         prefix: str,
+         domain: str = None):
     """
     serialization_directory : str, required.
         The directory containing the serialized weights.
     device: int, default = -1
         The device to run the evaluation on.
+    data: str, default = None
+        The data to evaluate on. By default, we use the validation data from
+        the original experiment.
+    prefix: str, default=""
+        The prefix to prepend to the generated gold and prediction files, to distinguish
+        different models/data.
+    domain: str, optional (default = None)
+        If passed, filters the ontonotes evaluation/test dataset to only contain the
+        specified domain. This overwrites the domain in the config file from the model,
+        to allow evaluation on domains other than the one the model was trained on.
     """
-    torch.set_grad_enabled(False)
-
     config = Params.from_file(os.path.join(serialization_directory, "config.json"))
+
+    if domain is not None:
+        # Hack to allow evaluation on different domains than the
+        # model was trained on.
+        config["dataset_reader"]["domain_identifier"] = domain
+        prefix = f"{domain}_{prefix}"
+    else:
+        config["dataset_reader"].pop("domain_identifier", None)
+
     dataset_reader = DatasetReader.from_params(config['dataset_reader'])
-    evaluation_data_path = config['validation_data_path']
+    evaluation_data_path = data if data else config['validation_data_path']
 
-    model = Model.load(config, serialization_dir=serialization_directory, cuda_device=device)
+    archive = load_archive(os.path.join(serialization_directory, "model.tar.gz"), cuda_device=device)
+    model = archive.model
+    model.eval()
 
-    prediction_file_path = os.path.join(serialization_directory, "predictions.txt")
-    gold_file_path = os.path.join(serialization_directory, "gold.txt")
+    prediction_file_path = os.path.join(serialization_directory, prefix + "_predictions.txt")
+    gold_file_path = os.path.join(serialization_directory, prefix + "_gold.txt")
     prediction_file = open(prediction_file_path, "w+")
     gold_file = open(gold_file_path, "w+")
 
     # Load the evaluation data and index it.
-    print("Reading evaluation data from {}".format(evaluation_data_path))
+    print("reading evaluation data from {}".format(evaluation_data_path))
     instances = dataset_reader.read(evaluation_data_path)
-    iterator = BasicIterator(batch_size=32)
-    iterator.index_with(model.vocab)
-
-    model_predictions = []
-    batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device)
-    for batch in Tqdm.tqdm(batches):
-        result = model(**batch)
-        predictions = model.decode(result)
-        model_predictions.extend(predictions["tags"])
-
-    for instance, prediction in zip(instances, model_predictions):
-        fields = instance.fields
-        try:
-            # Most sentences have a verbal predicate, but not all.
-            verb_index = fields["verb_indicator"].labels.index(1)
-        except ValueError:
-            verb_index = None
-
-        gold_tags = fields["tags"].labels
-        sentence = fields["tokens"].tokens
-
-        write_to_conll_eval_file(prediction_file, gold_file,
-                                 verb_index, sentence, prediction, gold_tags)
-    prediction_file.close()
-    gold_file.close()
+
+    with torch.autograd.no_grad():
+        iterator = BasicIterator(batch_size=32)
+        iterator.index_with(model.vocab)
+
+        model_predictions = []
+        batches = iterator(instances, num_epochs=1, shuffle=False, cuda_device=device)
+        for batch in Tqdm.tqdm(batches):
+            result = model(**batch)
+            predictions = model.decode(result)
+            model_predictions.extend(predictions["tags"])
+
+        for instance, prediction in zip(instances, model_predictions):
+            fields = instance.fields
+            try:
+                # Most sentences have a verbal predicate, but not all.
+                verb_index = fields["verb_indicator"].labels.index(1)
+            except ValueError:
+                verb_index = None
+
+            gold_tags = fields["tags"].labels
+            sentence = [x.text for x in fields["tokens"].tokens]
+
+            write_to_conll_eval_file(prediction_file, gold_file,
+                                     verb_index, sentence, prediction, gold_tags)
+        prediction_file.close()
+        gold_file.close()
 
 if __name__ == "__main__":
 
-    parser = argparse.ArgumentParser(description="Write CONLL format SRL predictions"
+    parser = argparse.ArgumentParser(description="write conll format srl predictions"
                                                  " to file from a pretrained model.")
-    parser.add_argument('--path', type=str, help='The serialization directory.')
-    parser.add_argument('--device', type=int, default=-1, help='The device to load the model onto.')
-
+    parser.add_argument('--path', type=str, help='the serialization directory.')
+    parser.add_argument('--device', type=int, default=-1, help='the device to load the model onto.')
+    parser.add_argument('--data', type=str, default=None, help='A directory containing a dataset to evaluate on.')
+    parser.add_argument('--prefix', type=str, default="", help='A prefix to distinguish model outputs.')
+    parser.add_argument('--domain', type=str, default=None, help='An optional domain to filter by for producing results.')
     args = parser.parse_args()
-    main(args.path, args.device)
+    main(args.path, args.device, args.data, args.prefix, args.domain)
diff --git a/training_config/srl_elmo_5.5B.json b/training_config/srl_elmo_5.5B.json
@@ -0,0 +1,76 @@
+{
+    "dataset_reader": {
+        "type": "srl",
+        "token_indexers": {
+            "elmo": {
+                "type": "elmo_characters"
+            }
+        }
+    },
+    "train_data_path": ${SRL_TRAIN_DATA_PATH},
+    "validation_data_path": ${SRL_VALIDATION_DATA_PATH},
+    "model": {
+        "type": "srl",
+        "text_field_embedder": {
+            "elmo": {
+                "type": "elmo_token_embedder",
+                "options_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json",
+                "weight_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway_5.5B/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5",
+                "do_layer_norm": false,
+                "dropout": 0.1
+            }
+        },
+        "initializer": [
+            [
+                "tag_projection_layer.*weight",
+                {
+                    "type": "orthogonal"
+                }
+            ]
+        ],
+        // NOTE: This configuration is correct, but slow.
+        // If you are interested in training the SRL model
+        // from scratch, you should use the 'alternating_lstm_cuda'
+        // encoder instead.
+        "encoder": {
+            "type": "alternating_lstm",
+            "input_size": 1124,
+            "hidden_size": 300,
+            "num_layers": 8,
+            "recurrent_dropout_probability": 0.1,
+            "use_input_projection_bias": false
+        },
+        "binary_feature_dim": 100,
+        "regularizer": [
+            [
+                ".*scalar_parameters.*",
+                {
+                    "type": "l2",
+                    "alpha": 0.001
+                }
+            ]
+        ]
+    },
+    "iterator": {
+        "type": "bucket",
+        "sorting_keys": [
+            [
+                "tokens",
+                "num_tokens"
+            ]
+        ],
+        "batch_size": 80
+    },
+    "trainer": {
+        "num_epochs": 500,
+        "grad_clipping": 1.0,
+        "patience": 200,
+        "num_serialized_models_to_keep": 10,
+        "validation_metric": "+f1-measure-overall",
+        "cuda_device": 0,
+        "optimizer": {
+            "type": "adadelta",
+            "rho": 0.95
+        }
+    }
+}