diff --git a/Snakefile b/Snakefile index e3d8dd15..3d543488 100644 --- a/Snakefile +++ b/Snakefile @@ -107,7 +107,8 @@ rule filter: input: sequences = "data/sequences_{serotype}.fasta", metadata = "results/wrangled_metadata_{serotype}.tsv", - exclude = files.dropped_strains + exclude = files.dropped_strains, + include_strains = "config/include_{serotype}.txt" output: sequences = "results/filtered_{serotype}.fasta" params: @@ -120,6 +121,7 @@ rule filter: --sequences {input.sequences} \ --metadata {input.metadata} \ --exclude {input.exclude} \ + --include {input.include_strains} \ --output {output.sequences} \ --group-by {params.group_by} \ --sequences-per-group {params.sequences_per_group} \ diff --git a/config/include_all.txt b/config/include_all.txt new file mode 100644 index 00000000..2920968d --- /dev/null +++ b/config/include_all.txt @@ -0,0 +1,54 @@ +OR389282 # 1944-XX-XX DENV1/I +OR389293 # 1964-XX-XX DENV1/I +AY732483 # 1981-XX-XX DENV1/I +KM204119 # 1944-XX-XX DENV1/II +OR389281 # 1944-XX-XX DENV1/II +EU848545 # 1944-XX-XX DENV1/II +KF289073 # 1956-XX-XX DENV1/III +AY732474 # 1980-XX-XX DENV1/III +AY732476 # 1980-XX-XX DENV1/III +MW945952 # 1974-XX-XX DENV1/IV +OK469344 # 1975-XX-XX DENV1/IV +OK605753 # 1983-XX-XX DENV1/IV +JQ922544 # 1963-XX-XX DENV1/V +OR389283 # 1963-XX-XX DENV1/V +OR389284 # 1970-XX-XX DENV1/V +OK469346 # 1981-XX-XX DENV2/AA +KF955363 # 1986-XX-XX DENV2/AA +EU482568 # 1986-XX-XX DENV2/AA +NC_001474 # 1964-XX-XX DENV2/AI +GQ868591 # 1964-XX-XX DENV2/AI +AJ487271 # 1974-XX-XX DENV2/AI +KM204118 # 1944-XX-XX DENV2/AII +MW945433 # 1944-XX-XX DENV2/AII +EU854293 # 1944-XX-XX DENV2/AII +JX966380 # 1969-XX-XX DENV2/AM +GQ868600 # 1969-XX-XX DENV2/AM +HM582099 # 1971-XX-XX DENV2/AM +OR389318 # 1969-XX-XX DENV2/C +MW946478 # 1974-XX-XX DENV2/C +GQ398258 # 1975-XX-XX DENV2/C +OR389309 # 1959-XX-XX DENV2/S +JQ922552 # 1960-XX-XX DENV2/S +OR389310 # 1963-XX-XX DENV2/S +OM258630 # 1953-XX-XX DENV3/I +MW945427 # 1963-XX-XX DENV3/I +MW946955 # 1978-XX-XX DENV3/I +OK469355 # 1973-XX-XX DENV3/II +DQ863638 # 1973-XX-XX DENV3/II +GQ868593 # 1973-XX-XX DENV3/II +JQ922555 # 1966-XX-XX DENV3/III +OR389336 # 1966-XX-XX DENV3/III +OR389337 # 1970-XX-XX DENV3/III +KU050695 # 1956-XX-XX DENV3/IV +JQ922554 # 1963-XX-XX DENV3/IV +KM190937 # 1964-XX-XX DENV3/IV +KR011349 # 1956-08-28 DENV4/I +GQ868594 # 1956-XX-XX DENV4/I +OR389348 # 1961-XX-XX DENV4/I +KF907503 # 1953-XX-XX DENV4/II +OR389347 # 1961-XX-XX DENV4/II +MW945636 # 1973-XX-XX DENV4/II +MW945552 # 1973-XX-XX DENV4/S +OR389042 # 1973-XX-XX DENV4/S +JF262780 # 1973-XX-XX DENV4/S diff --git a/config/include_denv1.txt b/config/include_denv1.txt new file mode 100644 index 00000000..0e164799 --- /dev/null +++ b/config/include_denv1.txt @@ -0,0 +1,15 @@ +OR389282 # 1944-XX-XX DENV1/I +OR389293 # 1964-XX-XX DENV1/I +AY732483 # 1981-XX-XX DENV1/I +KM204119 # 1944-XX-XX DENV1/II +OR389281 # 1944-XX-XX DENV1/II +EU848545 # 1944-XX-XX DENV1/II +KF289073 # 1956-XX-XX DENV1/III +AY732474 # 1980-XX-XX DENV1/III +AY732476 # 1980-XX-XX DENV1/III +MW945952 # 1974-XX-XX DENV1/IV +OK469344 # 1975-XX-XX DENV1/IV +OK605753 # 1983-XX-XX DENV1/IV +JQ922544 # 1963-XX-XX DENV1/V +OR389283 # 1963-XX-XX DENV1/V +OR389284 # 1970-XX-XX DENV1/V diff --git a/config/include_denv2.txt b/config/include_denv2.txt new file mode 100644 index 00000000..7c11de92 --- /dev/null +++ b/config/include_denv2.txt @@ -0,0 +1,18 @@ +OK469346 # 1981-XX-XX DENV2/AA +KF955363 # 1986-XX-XX DENV2/AA +EU482568 # 1986-XX-XX DENV2/AA +NC_001474 # 1964-XX-XX DENV2/AI +GQ868591 # 1964-XX-XX DENV2/AI +AJ487271 # 1974-XX-XX DENV2/AI +KM204118 # 1944-XX-XX DENV2/AII +MW945433 # 1944-XX-XX DENV2/AII +EU854293 # 1944-XX-XX DENV2/AII +JX966380 # 1969-XX-XX DENV2/AM +GQ868600 # 1969-XX-XX DENV2/AM +HM582099 # 1971-XX-XX DENV2/AM +OR389318 # 1969-XX-XX DENV2/C +MW946478 # 1974-XX-XX DENV2/C +GQ398258 # 1975-XX-XX DENV2/C +OR389309 # 1959-XX-XX DENV2/S +JQ922552 # 1960-XX-XX DENV2/S +OR389310 # 1963-XX-XX DENV2/S diff --git a/config/include_denv3.txt b/config/include_denv3.txt new file mode 100644 index 00000000..6cad6d56 --- /dev/null +++ b/config/include_denv3.txt @@ -0,0 +1,12 @@ +OM258630 # 1953-XX-XX DENV3/I +MW945427 # 1963-XX-XX DENV3/I +MW946955 # 1978-XX-XX DENV3/I +OK469355 # 1973-XX-XX DENV3/II +DQ863638 # 1973-XX-XX DENV3/II +GQ868593 # 1973-XX-XX DENV3/II +JQ922555 # 1966-XX-XX DENV3/III +OR389336 # 1966-XX-XX DENV3/III +OR389337 # 1970-XX-XX DENV3/III +KU050695 # 1956-XX-XX DENV3/IV +JQ922554 # 1963-XX-XX DENV3/IV +KM190937 # 1964-XX-XX DENV3/IV diff --git a/config/include_denv4.txt b/config/include_denv4.txt new file mode 100644 index 00000000..53d17266 --- /dev/null +++ b/config/include_denv4.txt @@ -0,0 +1,9 @@ +KR011349 # 1956-08-28 DENV4/I +GQ868594 # 1956-XX-XX DENV4/I +OR389348 # 1961-XX-XX DENV4/I +KF907503 # 1953-XX-XX DENV4/II +OR389347 # 1961-XX-XX DENV4/II +MW945636 # 1973-XX-XX DENV4/II +MW945552 # 1973-XX-XX DENV4/S +OR389042 # 1973-XX-XX DENV4/S +JF262780 # 1973-XX-XX DENV4/S