From 567279a5047d6f3ba17d3a0475d78d9ed8af90fe Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:05:10 -0700 Subject: [PATCH] Use only the 21L sequence from references data/references_sequences.fasta contains both `Wuhan/Hu-1/2019` and `21L`. The aligned input is expected to contain `Wuhan/Hu-1/2019` so the merging with references_sequences.fasta is really only for 21L. --- nextstrain_profiles/nextstrain-gisaid-21L/prefilter.smk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nextstrain_profiles/nextstrain-gisaid-21L/prefilter.smk b/nextstrain_profiles/nextstrain-gisaid-21L/prefilter.smk index 23a38ee8d..01be8ad67 100644 --- a/nextstrain_profiles/nextstrain-gisaid-21L/prefilter.smk +++ b/nextstrain_profiles/nextstrain-gisaid-21L/prefilter.smk @@ -92,7 +92,8 @@ rule gisaid_21L_aligned: exec 2> {log:q} < {input.references:q} \ - zstd \ + seqkit grep --by-name --pattern 21L \ + | zstd \ > {output.aligned} < {input.aligned:q} \