From 6e2292c91524c061add3f8167a06cc9788ab38e4 Mon Sep 17 00:00:00 2001 From: don Date: Fri, 13 Apr 2018 12:35:14 -0700 Subject: [PATCH] Convert logging settings to a Python file, as part of #438. --- .gitignore | 1 + micall/core/FILTER_CONTAMINANTS.py | 17 ---- micall/core/aln2counts.py | 8 +- micall/core/filter_quality.py | 17 ++-- micall/core/miseq_logging.py | 119 --------------------------- micall/core/prelim_map.py | 7 +- micall/core/remap.py | 11 ++- micall/monitor/kive_download.py | 8 +- micall/monitor/kive_watcher.py | 3 + micall/utils/remap_fastq_simplify.py | 6 +- micall/utils/sam_g2p_simplify.py | 10 +-- micall_basespace.py | 4 +- micall_kive.py | 2 +- micall_kive_resistance.py | 2 +- micall_logging_config.py | 50 +++++++++++ micall_watcher.py | 26 +++--- 16 files changed, 97 insertions(+), 194 deletions(-) delete mode 100755 micall/core/FILTER_CONTAMINANTS.py delete mode 100755 micall/core/miseq_logging.py create mode 100644 micall_logging_config.py diff --git a/.gitignore b/.gitignore index ca6d38cbd..47a843da0 100755 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +/micall_logging_override.py /micall/settings.py /micall/monitor/HIV1_COM_2015_genome_DNA.csv /micall/monitor/compare_454_samples.csv diff --git a/micall/core/FILTER_CONTAMINANTS.py b/micall/core/FILTER_CONTAMINANTS.py deleted file mode 100755 index 241d4c812..000000000 --- a/micall/core/FILTER_CONTAMINANTS.py +++ /dev/null @@ -1,17 +0,0 @@ -import logging -import miseq_logging -import os -import miseq_modules -import sys - - -logger = miseq_logging.init_logging_console_only(logging.DEBUG) - -# arguments -root = sys.argv[1] # folder with data -qcut = int(sys.argv[2]) # base quality score cutoff -num_threads = int(sys.argv[3]) # for bowtie2 - -logger.info("pid {}: miseq_modules.filter_cross_contaminants({},{},{})".format(os.getpid(), root, qcut, num_threads)) -miseq_modules.filter_cross_contaminants(root=root, qcutoff=qcut, num_threads=num_threads) -logging.shutdown() diff --git a/micall/core/aln2counts.py b/micall/core/aln2counts.py index 88ce4bd97..195938db5 100755 --- a/micall/core/aln2counts.py +++ b/micall/core/aln2counts.py @@ -14,13 +14,11 @@ from collections import Counter, defaultdict, OrderedDict import csv from itertools import groupby -import logging from operator import itemgetter import os import gotoh -from micall.core import miseq_logging from micall.core.project_config import ProjectConfig, G2P_SEED_NAME from micall.utils.big_counter import BigCounter from micall.utils.translation import translate, ambig_dict @@ -30,6 +28,7 @@ GAP_OPEN_COORD = 40 GAP_EXTEND_COORD = 10 CONSENSUS_MIN_COVERAGE = 100 +MAX_CUTOFF = 'MAX' def parse_args(): @@ -70,11 +69,6 @@ def parse_args(): return parser.parse_args() -logger = miseq_logging.init_logging_console_only(logging.DEBUG) - -MAX_CUTOFF = 'MAX' - - class SequenceReport(object): """ Hold the data for several reports related to a sample's genetic sequence. diff --git a/micall/core/filter_quality.py b/micall/core/filter_quality.py index 3f26ad51d..3bc080610 100755 --- a/micall/core/filter_quality.py +++ b/micall/core/filter_quality.py @@ -1,19 +1,14 @@ -#! /usr/bin/env python3.4 - import argparse import csv import itertools -import logging import math from operator import itemgetter import os -from micall.core import miseq_logging - BAD_ERROR_RATE = 7.5 -def parseArgs(): +def parse_args(): parser = argparse.ArgumentParser( description='Post-processing of short-read alignments.') @@ -26,8 +21,6 @@ def parseArgs(): return parser.parse_args() -logger = miseq_logging.init_logging_console_only(logging.DEBUG) - def direction_grouper(cycle): return math.copysign(1, int(cycle['cycle'])) @@ -39,7 +32,9 @@ def report_bad_cycles(quality_csv, bad_cycles_csv, bad_tiles_csv=None): ['tile', 'cycle', 'errorrate'], lineterminator=os.linesep) writer.writeheader() - if bad_tiles_csv is not None: + if bad_tiles_csv is None: + tile_writer = None + else: tile_writer = csv.DictWriter(bad_tiles_csv, ['tile', 'bad_cycles'], lineterminator=os.linesep) @@ -58,12 +53,12 @@ def report_bad_cycles(quality_csv, bad_cycles_csv, bad_tiles_csv=None): if is_bad: writer.writerow(cycle) bad_cycle_count += 1 - if bad_tiles_csv is not None: + if tile_writer is not None: tile_writer.writerow(dict(tile=tile, bad_cycles=bad_cycle_count)) def main(): - args = parseArgs() + args = parse_args() with args.quality_csv, args.bad_cycles_csv: report_bad_cycles(args.quality_csv, args.bad_cycles_csv) diff --git a/micall/core/miseq_logging.py b/micall/core/miseq_logging.py deleted file mode 100755 index 07dade2fe..000000000 --- a/micall/core/miseq_logging.py +++ /dev/null @@ -1,119 +0,0 @@ -import logging - - -class Timestamp(logging.Formatter): - """ - Extended logging.Formatter to use DateTime instead of struct_time which doesn't support milliseconds. - http://stackoverflow.com/questions/6290739/python-logging-use-milliseconds-in-time-format - """ - import datetime - - converter = datetime.datetime.fromtimestamp - - def formatTime(self, record, datefmt=None): - ct = self.converter(record.created) - if datefmt: - s = ct.strftime(datefmt) - else: - t = ct.strftime("%Y-%m-%d %H:%M:%S") - s = "%s,%03d" % (t, record.msecs) - return s - - -def init_logging_console_only(log_level=logging.DEBUG): - import sys - logger = logging.getLogger() - - # Clear previous handlers - for handler in logger.handlers: - stream = getattr(handler, 'stream', None) - if stream not in (None, sys.stdout, sys.stderr): - stream.close() - logger.handlers = [] - - logger.setLevel(logging.DEBUG) - console_logger = logging.StreamHandler(sys.stdout) - console_logger.setLevel(log_level) - formatter = Timestamp('%(asctime)s - [%(levelname)s](%(name)s) %(message)s', datefmt="%Y-%m-%d %H:%M:%S.%f") - console_logger.setFormatter(formatter) - logger.addHandler(console_logger) - - # Quiet the urllib3 logger - connection_logger = logging.getLogger('requests.packages.urllib3.connectionpool') - connection_logger.setLevel(logging.ERROR) - return logger - - -def init_logging(logging_path, file_log_level=logging.DEBUG, console_log_level=logging.DEBUG): - """ - Creates a logger object which will log to the console and to the path specified. - Formatting of logging is performed by an extended version of logging.Formatter - - The logging_level is assumed to be at the DEBUG level, but can be set to: - logging.DEBUG, logging.INFO, logging.WARN, logging.ERROR, logging.CRITICAL - """ - import sys - - logger = logging.getLogger() - logger.handlers = [] # Clear previous handlers - logger.setLevel(logging.DEBUG) - - # Logging will go to 2 different places (Console + log file) - file_logger = logging.FileHandler(logging_path) - file_logger.setLevel(file_log_level) - console_logger = logging.StreamHandler(sys.stdout) - console_logger.setLevel(console_log_level) - - # Format the handlers - formatter = Timestamp('%(asctime)s - [%(levelname)s](%(name)s) %(message)s', - datefmt="%Y-%m-%d %H:%M:%S.%f") - console_logger.setFormatter(formatter) - file_logger.setFormatter(formatter) - logger.addHandler(console_logger) - logger.addHandler(file_logger) - - # Quiet the urllib3 logger - connection_logger = logging.getLogger('requests.packages.urllib3.connectionpool') - connection_logger.setLevel(logging.ERROR) - - return logger - - -def collate_logs(path, extension, final_log): - """ - Combine logs of a given extension into final_log in a sorted manner. - Delete original source logs. - """ - - import datetime - import os - from glob import glob - - logs = [] - malformed_lines = [] - - for log_path in glob("{}/*.{}".format(path, extension)): - with open(log_path, "r") as f: - for line in f: - try: - date_string = line.split(" - ")[0] - date_time = datetime.datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S.%f") - logs.append((date_time, line)) - except: - malformed_lines.append("Malformed line in {}: {}".format(log_path, line)) - os.remove(log_path) - - logs.sort() - with open("{}/{}".format(path, final_log), "w") as collated_logs: - for date_time, message in logs: - collated_logs.write(message) - - # This takes care of the scenario where the last run had some uncollatable - # entries, but this run has none. It will write an empty file, then erase - # it. - uncollatable_path = "{}/{}_UNCOLLATABLE.log".format(path, final_log) - with open(uncollatable_path, "w") as uncollatable_logs: - for message in malformed_lines: - uncollatable_logs.write(message) - if not malformed_lines: - os.remove(uncollatable_path) diff --git a/micall/core/prelim_map.py b/micall/core/prelim_map.py index 619caa74c..6e4708526 100755 --- a/micall/core/prelim_map.py +++ b/micall/core/prelim_map.py @@ -1,7 +1,5 @@ -#! /usr/bin/env python3.4 - """ -Shipyard-style bowtie2 +Kive-style bowtie2 Run bowtie2 on paired-end FASTQ data sets with user-supplied *.bt2 bowtie2 SAM format output to for redirection via subprocess.Popen Sort outputs by refname. @@ -14,7 +12,6 @@ import os import sys -from micall.core import miseq_logging from micall.core import project_config from micall.utils.externals import Bowtie2, Bowtie2Build, LineCounter @@ -28,7 +25,7 @@ REF_GAP_OPEN = 10 REF_GAP_EXTEND = 3 -logger = miseq_logging.init_logging_console_only(logging.DEBUG) +logger = logging.getLogger(__name__) line_counter = LineCounter() diff --git a/micall/core/remap.py b/micall/core/remap.py index 00e5d3d75..0fcc8fb41 100644 --- a/micall/core/remap.py +++ b/micall/core/remap.py @@ -1,5 +1,3 @@ -#! /usr/bin/env python3.4 - """ Takes preliminary SAM as CSV input. Iterative re-mapping of reads from original FASTQ files. @@ -11,7 +9,7 @@ from collections import Counter, defaultdict import csv from functools import partial -import logging +from logging import getLogger import os import re import shutil @@ -19,9 +17,10 @@ # noinspection PyUnresolvedReferences from gotoh import align_it + import Levenshtein -from micall.core import miseq_logging, project_config +from micall.core import project_config from micall.core.sam2aln import apply_cigar, merge_pairs, merge_inserts from micall.core.prelim_map import BOWTIE_BUILD_PATH, \ BOWTIE_PATH, BOWTIE_VERSION, READ_GAP_OPEN, READ_GAP_EXTEND, REF_GAP_OPEN, \ @@ -53,7 +52,7 @@ cigar_re = re.compile('[0-9]+[MIDNSHPX=]') # CIGAR token -logger = miseq_logging.init_logging_console_only(logging.DEBUG) +logger = getLogger(__name__) indel_re = re.compile('[+-][0-9]+') line_counter = LineCounter() @@ -262,7 +261,7 @@ def sam_to_conseqs(samfile, drifted_seeds.append((read_counts[name], name)) continue - other_seed = other_dist = None + other_seed = other_dist = seed_dist = None for seed_name in sorted(new_conseqs.keys()): seed_ref = original_seeds[seed_name] aligned_seed, aligned_conseq, _score = align_it(seed_ref, diff --git a/micall/monitor/kive_download.py b/micall/monitor/kive_download.py index 437fa6105..2c93f223f 100644 --- a/micall/monitor/kive_download.py +++ b/micall/monitor/kive_download.py @@ -16,9 +16,8 @@ KiveAPI = RunStatus = None from requests.adapters import HTTPAdapter -from micall.settings import kive_server_url, kive_user, kive_password, home, \ +from micall.settings import kive_server_url, kive_user, kive_password, \ kive_pipelines -from micall.core.miseq_logging import init_logging def parse_args(): @@ -200,9 +199,7 @@ def find_batch_runs(kive, batch_name, batch_size): def main(): - logger = init_logging(os.path.join(home, 'kive_download.log'), - file_log_level=logging.INFO, - console_log_level=logging.INFO) + logger = logging.getLogger(__name__) args = parse_args() logger.info('Starting.') @@ -232,5 +229,6 @@ def main(): download_results(runs, args.resultfolder, args.workfolder) logger.info('%d runs found (%d unfinished).', len(runs), unfinished_count) + if __name__ == '__main__': main() diff --git a/micall/monitor/kive_watcher.py b/micall/monitor/kive_watcher.py index e0aab0b7a..7a9f0d1c4 100644 --- a/micall/monitor/kive_watcher.py +++ b/micall/monitor/kive_watcher.py @@ -186,6 +186,9 @@ def is_full(self): for folder_watcher in self.folder_watchers.values()) return active_count >= self.config.max_active + def is_idle(self): + return not self.folder_watchers + def get_kive_pipeline(self, pipeline_id): self.check_session() kive_pipeline = self.pipelines.get(pipeline_id) diff --git a/micall/utils/remap_fastq_simplify.py b/micall/utils/remap_fastq_simplify.py index f509f20a3..41c4265ad 100644 --- a/micall/utils/remap_fastq_simplify.py +++ b/micall/utils/remap_fastq_simplify.py @@ -1,10 +1,8 @@ import csv from collections import defaultdict -from itertools import imap import logging import os -from micall.core.miseq_logging import init_logging_console_only from micall.core.remap import remap from micall.core.prelim_map import prelim_map from csv import DictReader @@ -159,7 +157,7 @@ def get_result(self, censored_map_count, trimmed_map_count): return DD.FAIL if diff >= 20 else DD.PASS def write_simple_fastq(self, filename1, read_indexes): - selected_reads = imap(self.reads.__getitem__, read_indexes) + selected_reads = map(self.reads.__getitem__, read_indexes) filename2 = get_reverse_filename(filename1) with open(filename1, 'w') as f1, open(filename2, 'w') as f2: for lines in selected_reads: @@ -219,7 +217,7 @@ def read_fastq(filename, reads): def main(): - logger = init_logging_console_only(logging.INFO) + logger = logging.getLogger(__name__) try: logger.info('Starting.') fname = 'censored1.fastq' diff --git a/micall/utils/sam_g2p_simplify.py b/micall/utils/sam_g2p_simplify.py index 4f2b2f958..e573e355d 100644 --- a/micall/utils/sam_g2p_simplify.py +++ b/micall/utils/sam_g2p_simplify.py @@ -5,9 +5,8 @@ from subprocess import check_call, call from tempfile import NamedTemporaryFile -from micall.core.miseq_logging import init_logging_console_only from micall.g2p.pssm_lib import Pssm -from micall.g2p.fastq_g2p import sam_g2p +from micall.g2p.fastq_g2p import fastq_g2p def write_simple_sam(samfile, sam_lines): @@ -37,8 +36,9 @@ def test(remap_lines, temp_prefix, pssm, ruby_script, delete_results=True): cwd=ruby_path) with open(nuc_filename, 'rU') as nuc_csv, \ open(python_out_filename, 'wb') as g2p_csv: - - sam_g2p(pssm, remap_file, nuc_csv, g2p_csv) + + # TODO: update this to the new arguments. + fastq_g2p(pssm, remap_file, nuc_csv, g2p_csv) with open(os.devnull, 'w') as devnull: is_diff = call(['diff', '-q', ruby_out_filename, python_out_filename], @@ -110,7 +110,7 @@ def compare_conseqs(txtfilename, ruby_script, pssm): args = parser.parse_args() - logger = init_logging_console_only(logging.INFO) + logger = logging.getLogger(__name__) pssm = Pssm(path_to_lookup='../g2p/g2p_fpr.txt', path_to_matrix='../g2p/g2p.matrix') for txtfilename in sorted(glob.glob(os.path.join(args.workdir, args.pattern))): diff --git a/micall_basespace.py b/micall_basespace.py index 45176c533..b9c929ab0 100644 --- a/micall_basespace.py +++ b/micall_basespace.py @@ -319,11 +319,11 @@ def link_samples(run_path, data_path): sample_groups = [] run_info_path = os.path.join(run_path, 'RunInfo.xml') - if not os.path.exists(run_info_path): + interop_path = os.path.join(run_path, 'InterOp') + if not (os.path.exists(run_info_path) and os.path.exists(interop_path)): read_sizes = None else: read_sizes = parse_read_sizes(run_info_path) - interop_path = os.path.join(run_path, 'Interop') run_info = RunInfo(sample_groups, reports=['PR_RT', 'IN', 'NS3', 'NS5a', 'NS5b'], interop_path=interop_path, diff --git a/micall_kive.py b/micall_kive.py index a11165a59..304725a27 100644 --- a/micall_kive.py +++ b/micall_kive.py @@ -8,7 +8,7 @@ from micall.drivers.sample import Sample from micall.g2p.pssm_lib import Pssm -logger = logging.getLogger('foo') +logger = logging.getLogger(__name__) def parse_args(): diff --git a/micall_kive_resistance.py b/micall_kive_resistance.py index 46be6ea8e..4d9b16b5f 100644 --- a/micall_kive_resistance.py +++ b/micall_kive_resistance.py @@ -8,7 +8,7 @@ from micall.drivers.sample import Sample from micall.drivers.sample_group import SampleGroup -logger = logging.getLogger('foo') +logger = logging.getLogger(__name__) def parse_args(): diff --git a/micall_logging_config.py b/micall_logging_config.py new file mode 100644 index 000000000..63cc1805c --- /dev/null +++ b/micall_logging_config.py @@ -0,0 +1,50 @@ +""" Logging configuration for the MiCall pipeline. + +You can override the settings in micall_logging_config.py by copying the whole +file to micall_logging_override.py. The original settings in +micall_logging_config.py should be useful defaults for developers, with extra +settings available for production use, but disabled. + +For a detailed description of the settings, see the Python documentation: +https://docs.python.org/3/library/logging.config.html#logging-config-dictschema + +Do not commit micall_logging_override.py to source control. +""" + +LOGGING = { + # This is the default logger. Probably want to switch console to mail. + 'root': {'handlers': ['console', 'file'], + 'level': 'INFO'}, + 'loggers': {"__main__": {"level": "INFO"}}, + + # This lets you call logging.getLogger() before the configuration is done. + 'disable_existing_loggers': False, + + 'version': 1, + 'formatters': {'basic': { + 'format': '%(asctime)s[%(levelname)s]%(name)s.%(funcName)s(): %(message)s', + 'datefmt': '%Y-%m-%d %H:%M:%S'}}, + 'filters': { + 'rate_limit': {'()': 'micall.utils.ratelimitingfilter.RateLimitingFilter', + 'rate': 1, + 'per': 300, + 'burst': 5} + }, + 'handlers': {'console': {'class': 'logging.StreamHandler', + 'level': 'DEBUG', + 'formatter': 'basic'}, + 'file': {'class': 'logging.handlers.RotatingFileHandler', + 'level': 'DEBUG', + 'formatter': 'basic', + 'filename': 'micall.log', + 'maxBytes': 1024*1024*15, # 15MB + 'backupCount': 10}, + 'mail': {'class': 'logging.handlers.SMTPHandler', + 'filters': ['rate_limit'], + 'level': 'WARN', + 'formatter': 'basic', + 'mailhost': 'mail.FILLINDOMAIN.com', + 'fromaddr': 'no.reply.micall.server@FILLINDOMAIN.com', + 'toaddrs': ['admin.team@FILLINDOMAIN.com'], + 'subject': 'Error logged in MiCall Watcher'}}, +} diff --git a/micall_watcher.py b/micall_watcher.py index fc1732513..f8c29c1b4 100644 --- a/micall_watcher.py +++ b/micall_watcher.py @@ -1,22 +1,22 @@ from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter, SUPPRESS - -import os from functools import partial - +import logging.config +import os from pathlib import Path from queue import Queue, Empty from threading import Thread from time import sleep from micall.monitor.kive_watcher import find_samples, KiveWatcher, FolderEventType - +from micall.monitor import update_qai try: - from micall.monitor import update_qai + from micall_logging_override import LOGGING except ImportError: - # Swallow import error to allow testing. Check again at start of main(). - update_qai = None + from micall_logging_config import LOGGING POLLING_DELAY = 10 # seconds between scans for new samples or finished runs +logging.config.dictConfig(LOGGING) +logger = logging.getLogger(__name__) def parse_args(argv=None): @@ -81,6 +81,10 @@ def parse_args(argv=None): type=int, default=os.environ.get('MICALL_MAX_ACTIVE', 5), help='maximum number of active jobs in Kive') + parser.add_argument( + '--quit', + action='store_true', + help='quit when all runs are processed') args = parser.parse_args(argv) if not hasattr(args, 'kive_password'): @@ -94,10 +98,8 @@ def parse_args(argv=None): def main(): - if update_qai is None: - raise RuntimeError( - 'Failed to import update_qai. Is the requests library installed?') args = parse_args() + logger.info('Starting up.') result_handler = partial(update_qai.process_folder, qai_server=args.qai_server, qai_user=args.qai_user, @@ -127,7 +129,9 @@ def main(): else: kive_watcher.finish_folder(folder_event.base_calls) except Empty: - pass + if args.quit and kive_watcher.is_idle(): + logger.info('Shutting down.') + break if __name__ == '__main__':