add back file-friendly-logging flag (#4509)

allenai · Jul 31, 2020 · 2401282 · 2401282
1 parent 54e5c83
commit 2401282
Show file tree

Hide file tree

Showing 7 changed files with 90 additions and 17 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,14 +10,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Fixed
 
 - Removed unnecessary warning about deadlocks in `DataLoader`.
-- Use slower tqdm intervals when output is being piped or redirected.
 - Fixed testing models that only return a loss when they are in training mode.
 - Fixed a bug in `FromParams` that causes silent failure in case of the parameter type being Optional[Union[...]].
 
 ### Added
 
 - Added the option to specify `requires_grad: false` within an optimizers parameter groups.
-
+- Added the `file-friendly-logging` flag back to the `train` command. Also added this flag to the `predict`, `evaluate`, and `find-learning-rate` commands.
 
 ## [v1.1.0rc1](https://github.com/allenai/allennlp/releases/tag/v1.1.0rc1) - 2020-07-14
 

diff --git a/allennlp/commands/evaluate.py b/allennlp/commands/evaluate.py
@@ -12,6 +12,7 @@
 from overrides import overrides
 
 from allennlp.commands.subcommand import Subcommand
+from allennlp.common import logging as common_logging
 from allennlp.common.util import dump_metrics, prepare_environment
 from allennlp.data.dataset_readers.dataset_reader import DatasetReader
 from allennlp.data import DataLoader
@@ -84,13 +85,21 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
             "extended, we will try to use the original file paths used during training. If "
             "they are not available we will use random vectors for embedding extension.",
         )
+        subparser.add_argument(
+            "--file-friendly-logging",
+            action="store_true",
+            default=False,
+            help="outputs tqdm status on separate lines and slows tqdm refresh rate",
+        )
 
         subparser.set_defaults(func=evaluate_from_args)
 
         return subparser
 
 
 def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
+    common_logging.FILE_FRIENDLY_LOGGING = args.file_friendly_logging
+
     # Disable some of the more verbose logging statements
     logging.getLogger("allennlp.common.params").disabled = True
     logging.getLogger("allennlp.nn.initializers").disabled = True

diff --git a/allennlp/commands/find_learning_rate.py b/allennlp/commands/find_learning_rate.py
@@ -16,6 +16,7 @@
 
 from allennlp.commands.subcommand import Subcommand
 from allennlp.common import Params, Tqdm
+from allennlp.common import logging as common_logging
 from allennlp.common.checks import check_for_gpu, ConfigurationError
 from allennlp.common.util import prepare_environment
 from allennlp.data import Vocabulary
@@ -86,6 +87,12 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
             required=False,
             help="overwrite the output directory if it exists",
         )
+        subparser.add_argument(
+            "--file-friendly-logging",
+            action="store_true",
+            default=False,
+            help="outputs tqdm status on separate lines and slows tqdm refresh rate",
+        )
 
         subparser.set_defaults(func=find_learning_rate_from_args)
 
@@ -96,6 +103,7 @@ def find_learning_rate_from_args(args: argparse.Namespace) -> None:
     """
     Start learning rate finder for given args
     """
+    common_logging.FILE_FRIENDLY_LOGGING = args.file_friendly_logging
     params = Params.from_file(args.param_path, args.overrides)
     find_learning_rate_model(
         params,

diff --git a/allennlp/commands/predict.py b/allennlp/commands/predict.py
@@ -12,6 +12,7 @@
 from overrides import overrides
 
 from allennlp.commands.subcommand import Subcommand
+from allennlp.common import logging as common_logging
 from allennlp.common.checks import check_for_gpu, ConfigurationError
 from allennlp.common.file_utils import cached_path
 from allennlp.common.util import lazy_groups_of
@@ -84,6 +85,13 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
             "--predictor", type=str, help="optionally specify a specific predictor to use"
         )
 
+        subparser.add_argument(
+            "--file-friendly-logging",
+            action="store_true",
+            default=False,
+            help="outputs tqdm status on separate lines and slows tqdm refresh rate",
+        )
+
         subparser.set_defaults(func=_predict)
 
         return subparser
@@ -194,6 +202,8 @@ def run(self) -> None:
 
 
 def _predict(args: argparse.Namespace) -> None:
+    common_logging.FILE_FRIENDLY_LOGGING = args.file_friendly_logging
+
     predictor = _get_predictor(args)
 
     if args.silent and not args.output_file:

diff --git a/allennlp/commands/train.py b/allennlp/commands/train.py
@@ -18,7 +18,7 @@
 from allennlp.commands.subcommand import Subcommand
 from allennlp.common import Params, Registrable, Lazy
 from allennlp.common.checks import check_for_gpu, ConfigurationError
-from allennlp.common.logging import prepare_global_logging
+from allennlp.common import logging as common_logging
 from allennlp.common import util as common_util
 from allennlp.common.plugins import import_plugins
 from allennlp.data import DatasetReader, Vocabulary
@@ -84,6 +84,12 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
             help="do not train a model, but create a vocabulary, show dataset statistics and "
             "other training information",
         )
+        subparser.add_argument(
+            "--file-friendly-logging",
+            action="store_true",
+            default=False,
+            help="outputs tqdm status on separate lines and slows tqdm refresh rate",
+        )
 
         subparser.set_defaults(func=train_model_from_args)
 
@@ -103,6 +109,7 @@ def train_model_from_args(args: argparse.Namespace):
         node_rank=args.node_rank,
         include_package=args.include_package,
         dry_run=args.dry_run,
+        file_friendly_logging=args.file_friendly_logging,
     )
 
 
@@ -115,6 +122,7 @@ def train_model_from_file(
     node_rank: int = 0,
     include_package: List[str] = None,
     dry_run: bool = False,
+    file_friendly_logging: bool = False,
 ) -> Optional[Model]:
     """
     A wrapper around [`train_model`](#train_model) which loads the params from a file.
@@ -141,6 +149,9 @@ def train_model_from_file(
     dry_run : `bool`, optional (default=`False`)
         Do not train a model, but create a vocabulary, show dataset statistics and other training
         information.
+    file_friendly_logging : `bool`, optional (default=`False`)
+        If `True`, we add newlines to tqdm output, even on an interactive terminal, and we slow
+        down tqdm's output to only once every 10 seconds.
 
     # Returns
 
@@ -157,6 +168,7 @@ def train_model_from_file(
         node_rank=node_rank,
         include_package=include_package,
         dry_run=dry_run,
+        file_friendly_logging=file_friendly_logging,
     )
 
 
@@ -168,6 +180,7 @@ def train_model(
     node_rank: int = 0,
     include_package: List[str] = None,
     dry_run: bool = False,
+    file_friendly_logging: bool = False,
 ) -> Optional[Model]:
     """
     Trains the model specified in the given [`Params`](../common/params.md#params) object, using the data
@@ -192,12 +205,17 @@ def train_model(
     dry_run : `bool`, optional (default=`False`)
         Do not train a model, but create a vocabulary, show dataset statistics and other training
         information.
+    file_friendly_logging : `bool`, optional (default=`False`)
+        If `True`, we add newlines to tqdm output, even on an interactive terminal, and we slow
+        down tqdm's output to only once every 10 seconds.
 
     # Returns
 
     best_model : `Optional[Model]`
         The model with the best epoch weights or `None` if in dry run.
     """
+    common_logging.FILE_FRIENDLY_LOGGING = file_friendly_logging
+
     training_util.create_serialization_dir(params, serialization_dir, recover, force)
     params.to_file(os.path.join(serialization_dir, CONFIG_NAME))
 
@@ -211,6 +229,7 @@ def train_model(
             serialization_dir=serialization_dir,
             include_package=include_package,
             dry_run=dry_run,
+            file_friendly_logging=file_friendly_logging,
         )
 
         if not dry_run:
@@ -274,6 +293,7 @@ def train_model(
                 master_port,
                 world_size,
                 device_ids,
+                file_friendly_logging,
             ),
             nprocs=num_procs,
         )
@@ -296,6 +316,7 @@ def _train_worker(
     master_port: int = 29500,
     world_size: int = 1,
     distributed_device_ids: List[int] = None,
+    file_friendly_logging: bool = False,
 ) -> Optional[Model]:
     """
     Helper to train the configured model/experiment. In distributed mode, this is spawned as a
@@ -327,13 +348,18 @@ def _train_worker(
         The number of processes involved in distributed training.
     distributed_device_ids: `List[str]`, optional
         IDs of the devices used involved in distributed training.
+    file_friendly_logging : `bool`, optional (default=`False`)
+        If `True`, we add newlines to tqdm output, even on an interactive terminal, and we slow
+        down tqdm's output to only once every 10 seconds.
 
     # Returns
 
     best_model : `Optional[Model]`
         The model with the best epoch weights or `None` if in distributed training or in dry run.
     """
-    prepare_global_logging(
+    common_logging.FILE_FRIENDLY_LOGGING = file_friendly_logging
+
+    common_logging.prepare_global_logging(
         serialization_dir, rank=process_rank, world_size=world_size,
     )
     common_util.prepare_environment(params)

diff --git a/allennlp/common/logging.py b/allennlp/common/logging.py
@@ -44,6 +44,16 @@ def critical_once(self, msg, *args, **kwargs):
 
 
 logging.setLoggerClass(AllenNlpLogger)
+logger = logging.getLogger(__name__)
+
+
+FILE_FRIENDLY_LOGGING: bool = False
+"""
+If this flag is set to `True`, we add newlines to tqdm output, even on an interactive terminal, and we slow
+down tqdm's output to only once every 10 seconds.
+
+By default, it is set to `False`.
+"""
 
 
 class ErrorFilter(Filter):

diff --git a/allennlp/common/tqdm.py b/allennlp/common/tqdm.py
@@ -3,9 +3,9 @@
 global defaults for certain tqdm parameters.
 """
 import logging
-from time import time
-
 import sys
+from time import time
+from typing import Optional
 
 try:
     SHELL = str(type(get_ipython()))  # type:ignore # noqa: F821
@@ -17,6 +17,9 @@
 else:
     from tqdm import tqdm as _tqdm
 
+from allennlp.common import logging as common_logging
+
+
 # This is necessary to stop tqdm from hanging
 # when exceptions are raised inside iterators.
 # It should have been fixed in 4.2.1, but it still
@@ -47,20 +50,23 @@ def replace_cr_with_newline(message: str) -> str:
 
 class TqdmToLogsWriter(object):
     def __init__(self):
-        self.last_message_written_time = 0
+        self.last_message_written_time = 0.0
 
     def write(self, message):
-        if sys.stderr.isatty():
-            # We're running in a real terminal.
-            sys.stderr.write(message)
+        file_friendly_message: Optional[str] = None
+        if common_logging.FILE_FRIENDLY_LOGGING:
+            file_friendly_message = replace_cr_with_newline(message)
+            if file_friendly_message.strip():
+                sys.stderr.write(file_friendly_message)
         else:
-            # The output is being piped or redirected.
-            sys.stderr.write(replace_cr_with_newline(message))
+            sys.stderr.write(message)
+
         # Every 10 seconds we also log the message.
         now = time()
         if now - self.last_message_written_time >= 10 or "100%" in message:
-            message = replace_cr_with_newline(message)
-            for message in message.split("\n"):
+            if file_friendly_message is None:
+                file_friendly_message = replace_cr_with_newline(message)
+            for message in file_friendly_message.split("\n"):
                 message = message.strip()
                 if len(message) > 0:
                     logger.info(message)
@@ -73,8 +79,13 @@ def flush(self):
 class Tqdm:
     @staticmethod
     def tqdm(*args, **kwargs):
-        # Use a slow interval if the output is being piped or redirected.
-        default_mininterval = 0.1 if sys.stderr.isatty() else 10.0
-        new_kwargs = {"file": TqdmToLogsWriter(), "mininterval": default_mininterval, **kwargs}
+        # Use a slower interval when FILE_FRIENDLY_LOGGING is set.
+        default_mininterval = 2.0 if common_logging.FILE_FRIENDLY_LOGGING else 0.1
+
+        new_kwargs = {
+            "file": TqdmToLogsWriter(),
+            "mininterval": default_mininterval,
+            **kwargs,
+        }
 
         return _tqdm(*args, **new_kwargs)