diff --git a/augur/align.py b/augur/align.py index c9f385e1a..83de03991 100644 --- a/augur/align.py +++ b/augur/align.py @@ -294,7 +294,7 @@ def analyse_insertions(aln, ungapped, insertion_csv): for insertion_seq, strains in i_data.items(): for strain in strains: strain_data[strain][idx] = insertion_seq - with open(insertion_csv, 'w') as fh: + with open(insertion_csv, 'w', encoding='utf-8') as fh: print(",".join(header), file=fh) for strain in strain_data: print("{},{}".format(strain, ",".join(strain_data[strain])), file=fh) diff --git a/augur/distance.py b/augur/distance.py index 37c6125d6..a24294407 100644 --- a/augur/distance.py +++ b/augur/distance.py @@ -166,7 +166,7 @@ def read_distance_map(map_file): [('default', 0.0), ('map', {'SigPep': {0: {('W', 'P'): -8.3}}})] """ # Load the JSON. - with open(map_file, "r") as fh: + with open(map_file, "r", encoding='utf-8') as fh: json_distance_map = json.load(fh) # Confirm that all required fields are present. diff --git a/augur/export_v2.py b/augur/export_v2.py index f3574ffed..b8358300d 100644 --- a/augur/export_v2.py +++ b/augur/export_v2.py @@ -783,7 +783,7 @@ def set_description(data_json, cmd_line_description_file): `meta.description` in *data_json* to the text provided. """ try: - with open(cmd_line_description_file) as description_file: + with open(cmd_line_description_file, encoding='utf-8') as description_file: markdown_text = description_file.read() data_json['meta']['description'] = markdown_text except FileNotFoundError: diff --git a/augur/filter.py b/augur/filter.py index 0f570f356..700bdff99 100644 --- a/augur/filter.py +++ b/augur/filter.py @@ -16,9 +16,9 @@ def read_vcf(filename): if filename.lower().endswith(".gz"): import gzip - file = gzip.open(filename, mode="rt") + file = gzip.open(filename, mode="rt", encoding='utf-8') else: - file = open(filename) + file = open(filename, encoding='utf-8') chrom_line = next(line for line in file if line.startswith("#C")) file.close() @@ -55,7 +55,7 @@ def write_vcf(input_filename, output_filename, dropped_samps): def read_priority_scores(fname): try: - with open(fname) as pfile: + with open(fname, encoding='utf-8') as pfile: return defaultdict(float, { elems[0]: float(elems[1]) for elems in (line.strip().split() for line in pfile.readlines()) @@ -169,7 +169,7 @@ def run(args): num_excluded_by_name = 0 if args.exclude: try: - with open(args.exclude, 'r') as ifile: + with open(args.exclude, 'r', encoding='utf-8') as ifile: to_exclude = set() for line in ifile: if line[0] != comment_char: @@ -326,7 +326,7 @@ def run(args): # Note that we are also not checking for existing meta data here num_included_by_name = 0 if args.include and os.path.isfile(args.include): - with open(args.include, 'r') as ifile: + with open(args.include, 'r', encoding='utf-8') as ifile: to_include = set( [ line.strip() diff --git a/augur/frequencies.py b/augur/frequencies.py index a1dd3b572..d616e92af 100644 --- a/augur/frequencies.py +++ b/augur/frequencies.py @@ -82,7 +82,7 @@ def run(args): if args.method == "kde": # Load weights if they have been provided. if args.weights: - with open(args.weights, "r") as fh: + with open(args.weights, "r", encoding='utf-8') as fh: weights = json.load(fh) weights_attribute = args.weights_attribute diff --git a/augur/import_beast.py b/augur/import_beast.py index 1cbfebbf7..5732c1528 100644 --- a/augur/import_beast.py +++ b/augur/import_beast.py @@ -231,7 +231,7 @@ def parse_nexus(tree_path, treestring_regex=r'tree [A-Za-z\_]+([0-9]+)', verbose if isinstance(tree_path,str): ## determine if path or handle was provided to function try: - handle=open(tree_path,'r') + handle=open(tree_path,'r', encoding='utf-8') except FileNotFoundError: print("FATAL: No such file {}".format(tree_path)) sys.exit(2) diff --git a/augur/lbi.py b/augur/lbi.py index 7d5a56034..e26538b92 100644 --- a/augur/lbi.py +++ b/augur/lbi.py @@ -94,7 +94,7 @@ def run(args): tree = Bio.Phylo.read(args.tree, "newick") # Load branch lengths. - with open(args.branch_lengths, "r") as json_fh: + with open(args.branch_lengths, "r", encoding='utf-8') as json_fh: branch_lengths = json.load(json_fh) # Annotate branch lengths and dates onto tree nodes. diff --git a/augur/parse.py b/augur/parse.py index 23b627ae0..6fc6bb2c3 100644 --- a/augur/parse.py +++ b/augur/parse.py @@ -92,7 +92,7 @@ def run(args): strain_key = args.fields[0] # loop over sequences, parse fasta header of each sequence - with open(args.output_sequences, 'w') as output: + with open(args.output_sequences, 'w', encoding='utf-8') as output: for seq in seqs: fields = map(str.strip, seq.description.split(args.separator)) tmp_meta = dict(zip(args.fields, fields)) diff --git a/augur/reconstruct_sequences.py b/augur/reconstruct_sequences.py index b5acb407e..981f835b8 100644 --- a/augur/reconstruct_sequences.py +++ b/augur/reconstruct_sequences.py @@ -73,7 +73,7 @@ def run(args): #if VCF, read in the reference seq for each gene, put on root if(is_vcf): node_data["nodes"][root_node]['aa_sequences'] = {} - with open(args.vcf_aa_reference) as handle: + with open(args.vcf_aa_reference, encoding='utf-8') as handle: for record in SeqIO.parse(handle, "fasta"): if record.id==args.gene: #'root' may not be same as 'reference', so apply any mutations at root here! diff --git a/augur/traits.py b/augur/traits.py index 185f26851..701303e58 100644 --- a/augur/traits.py +++ b/augur/traits.py @@ -145,7 +145,7 @@ def run(args): if args.weights: weight_dict = {c:{} for c in args.columns} sep = ',' if args.weights.endswith('csv') else '\t' - with open(args.weights, 'r') as fh: + with open(args.weights, 'r', encoding='utf-8') as fh: for line in fh: if line[0]=='#': continue @@ -187,7 +187,7 @@ def run(args): models[column]['transition_matrix'] = [list(x) for x in gtr.W] if gtr: - with open(out_prefix+'%s.mugration_model.txt'%column, 'w') as ofile: + with open(out_prefix+'%s.mugration_model.txt'%column, 'w', encoding='utf-8') as ofile: ofile.write('Map from character to field name\n') for k,v in alphabet.items(): ofile.write(k+':\t'+str(v)+'\n') diff --git a/augur/translate.py b/augur/translate.py index 5348429eb..244b86c12 100644 --- a/augur/translate.py +++ b/augur/translate.py @@ -285,7 +285,7 @@ def assign_aa_fasta(tree, translations): def get_genes_from_file(fname): genes = [] if os.path.isfile(fname): - with open(fname) as ifile: + with open(fname, encoding='utf-8') as ifile: for line in ifile: fields = line.strip().split('#') if fields[0].strip(): diff --git a/augur/tree.py b/augur/tree.py index a83336640..5c878a22d 100644 --- a/augur/tree.py +++ b/augur/tree.py @@ -136,13 +136,13 @@ def build_iqtree(aln_file, out_file, substitution_model="GTR", clean_up=True, nt aln_file file name of input aligment out_file file name to write tree to ''' - with open(aln_file) as ifile: + with open(aln_file, encoding='utf-8') as ifile: tmp_seqs = ifile.readlines() # IQ-tree messes with taxon names. Hence remove offending characters, reinstaniate later tmp_aln_file = aln_file.replace(".fasta", "-delim.fasta") log_file = tmp_aln_file.replace(".fasta", ".iqtree.log") - with open(tmp_aln_file, 'w') as ofile: + with open(tmp_aln_file, 'w', encoding='utf-8') as ofile: for line in tmp_seqs: ofile.write(line.replace('/', '_X_X_').replace('|','_Y_Y_').replace("(","_X_Y_").replace(")","_Y_X_")) @@ -256,7 +256,7 @@ def write_out_informative_fasta(compress_seq, alignment, stripFile=None): #If want a position map, print: if printPositionMap: - with open(fasta_file+".positions.txt", 'w') as the_file: + with open(fasta_file+".positions.txt", 'w', encoding='utf-8') as the_file: the_file.write("\n".join(pos)) return fasta_file @@ -294,7 +294,7 @@ def mask_sites_in_multiple_sequence_alignment(alignment_file, excluded_sites_fil # Write the masked alignment to disk one record at a time. alignment_file_path = Path(alignment_file) masked_alignment_file = str(alignment_file_path.parent / ("masked_%s" % alignment_file_path.name)) - with open(masked_alignment_file, "w") as oh: + with open(masked_alignment_file, "w", encoding='utf-8') as oh: for record in alignment: # Convert to a mutable sequence to enable masking with Ns. sequence = record.seq.tomutable() diff --git a/augur/utils.py b/augur/utils.py index b20e768c0..8aa917d5a 100644 --- a/augur/utils.py +++ b/augur/utils.py @@ -26,10 +26,10 @@ def open_file(fname, mode): if "t" not in mode: # For interoperability, gzip needs to open files in "text" mode mode = mode + "t" - with gzip.open(fname, mode) as fh: + with gzip.open(fname, mode, encoding='utf-8') as fh: yield fh else: - with open(fname, mode) as fh: + with open(fname, mode, encoding='utf-8') as fh: yield fh def is_vcf(fname): @@ -47,9 +47,9 @@ def is_vcf(fname): def myopen(fname, mode): if fname.endswith('.gz'): import gzip - return gzip.open(fname, mode) + return gzip.open(fname, mode, encoding='utf-8') else: - return open(fname, mode) + return open(fname, mode, encoding='utf-8') def get_json_name(args, default=None): if args.output_node_data: @@ -231,7 +231,7 @@ def read_node_data(fnames, tree=None): node_data = {"nodes": {}} for fname in fnames: if os.path.isfile(fname): - with open(fname) as jfile: + with open(fname, encoding='utf-8') as jfile: tmp_data = json.load(jfile) if tmp_data.get("annotations"): try: @@ -327,7 +327,7 @@ def write_json(data, file_name, indent=(None if os.environ.get("AUGUR_MINIFY_JSO if include_version: data["generated_by"] = {"program": "augur", "version": get_augur_version()} - with open(file_name, 'w') as handle: + with open(file_name, 'w', encoding='utf-8') as handle: json.dump(data, handle, indent=indent, sort_keys=True) @@ -348,7 +348,7 @@ def load_features(reference, feature_names=None): return None limit_info = dict( gff_type = ['gene'] ) - with open(reference) as in_handle: + with open(reference, encoding='utf-8') as in_handle: for rec in GFF.parse(in_handle, limit_info=limit_info): for feat in rec.features: if feature_names is not None: #check both tags; user may have used either @@ -468,7 +468,7 @@ def add_line(line): if overrides: if os.path.isfile(overrides): - with open(overrides) as fh: + with open(overrides, encoding='utf-8') as fh: for line in fh: add_line(line) else: @@ -497,7 +497,7 @@ def write_VCF_translation(prot_dict, vcf_file_name, ref_file_name): #prepare the header of the VCF & write out header=["#CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO","FORMAT"]+seqNames - with open(vcf_file_name, 'w') as the_file: + with open(vcf_file_name, 'w', encoding='utf-8') as the_file: the_file.write( "##fileformat=VCFv4.2\n"+ "##source=NextStrain_Protein_Translation\n"+ "##FORMAT=\n") @@ -552,10 +552,10 @@ def write_VCF_translation(prot_dict, vcf_file_name, ref_file_name): vcfWrite.append("\t".join(output)) #write it all out - with open(ref_file_name, 'w') as the_file: + with open(ref_file_name, 'w', encoding='utf-8') as the_file: the_file.write("\n".join(refWrite)) - with open(vcf_file_name, 'a') as the_file: + with open(vcf_file_name, 'a', encoding='utf-8') as the_file: the_file.write("\n".join(vcfWrite)) if vcf_file_name.lower().endswith('.gz'): @@ -869,7 +869,7 @@ def read_mask_file(mask_file): Sorted list of unique zero-indexed sites """ mask_sites = [] - with open(mask_file) as mf: + with open(mask_file, encoding='utf-8') as mf: for idx, line in enumerate(l.strip() for l in mf.readlines()): if "\t" in line: line = line.split("\t")[1]