From 5177fbc3471cf19f847e6e3607d50cd18e0bcad7 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:15:27 -0700 Subject: [PATCH 1/5] Copy filtering/subsampling guide into this repo Future commits will modify the contents to fit this repo better. Copied from --- .gitignore | 1 - src/fetch-docs.py | 1 - .../filtering-and-subsampling.rst | 208 +++++++++++++++++- 3 files changed, 207 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 155988ba..a86f89b8 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,3 @@ src/guides/bioinformatics/translate_ref.md src/guides/bioinformatics/vcf_input.md src/guides/communicate/create-pdf.md src/guides/communicate/narratives-intro.md -src/snippets/filtering-and-subsampling.rst diff --git a/src/fetch-docs.py b/src/fetch-docs.py index f5d78df1..985212b0 100755 --- a/src/fetch-docs.py +++ b/src/fetch-docs.py @@ -22,7 +22,6 @@ f'{augur_url}faq/fasta_input.md': 'guides/bioinformatics/fasta_input.md', f'{augur_url}faq/seq_traits.md': 'guides/bioinformatics/seq_traits.md', f'{augur_url}examples/examples.rst': 'guides/bioinformatics/examples.rst', - f'{augur_url}usage/cli/snippets/filtering-and-subsampling.rst': 'snippets/filtering-and-subsampling.rst', } if __name__ == '__main__': diff --git a/src/guides/bioinformatics/filtering-and-subsampling.rst b/src/guides/bioinformatics/filtering-and-subsampling.rst index 4522e63a..22a63d1f 100644 --- a/src/guides/bioinformatics/filtering-and-subsampling.rst +++ b/src/guides/bioinformatics/filtering-and-subsampling.rst @@ -8,4 +8,210 @@ sample data. .. contents:: Table of Contents :local: -.. include:: ../../snippets/filtering-and-subsampling.rst +Filtering +--------- + +The filter command allows you to select various subsets of your input data for different types of analysis. +A simple example use of this command would be + +.. code-block:: bash + + augur filter \ + --sequences data/sequences.fasta \ + --metadata data/metadata.tsv \ + --min-date 2012 \ + --output-sequences filtered_sequences.fasta \ + --output-metadata filtered_metadata.tsv + +This command will select all sequences with collection date in 2012 or later. +The filter command has a large number of options that allow flexible filtering for many common situations. +One such use-case is the exclusion of sequences that are known to be outliers (e.g. because of sequencing errors, cell-culture adaptation, ...). +These can be specified in a separate text file (e.g. ``exclude.txt``): + +.. code-block:: + + BRA/2016/FC_DQ75D1 + COL/FLR_00034/2015 + ... + +To drop such strains, you can pass the filename to ``--exclude``: + +.. code-block:: bash + + augur filter \ + --sequences data/sequences.fasta \ + --metadata data/metadata.tsv \ + --min-date 2012 \ + --exclude exclude.txt \ + --output-sequences filtered_sequences.fasta \ + --output-metadata filtered_metadata.tsv + +Subsampling within ``augur filter`` +----------------------------------- + +Another common filtering operation is subsetting of data to a achieve a more even spatio-temporal distribution or to cut-down data set size to more manageable numbers. +The filter command allows you to select a specific number of sequences from specific groups, for example one sequence per month from each country: + +.. code-block:: bash + + augur filter \ + --sequences data/sequences.fasta \ + --metadata data/metadata.tsv \ + --min-date 2012 \ + --exclude exclude.txt \ + --group-by country year month \ + --sequences-per-group 1 \ + --output-sequences subsampled_sequences.fasta \ + --output-metadata subsampled_metadata.tsv + +Subsampling using multiple ``augur filter`` commands +---------------------------------------------------- + +There are some subsampling strategies in which a single call to ``augur filter`` +does not suffice. One such strategy is "tiered subsampling". In this strategy, +mutually exclusive sets of filters, each representing a "tier", are sampled with +different subsampling rules. This is commonly used to create geographic tiers. +Consider this subsampling scheme: + + Sample 100 sequences from Washington state and 50 sequences from the rest of the United States. + +This cannot be done in a single call to ``augur filter``. Instead, it can be +decomposed into multiple schemes, each handled by a single call to ``augur +filter``. Additionally, there is an extra step to combine the intermediate +samples. + + 1. Sample 100 sequences from Washington state. + 2. Sample 50 sequences from the rest of the United States. + 3. Combine the samples. + +Calling ``augur filter`` multiple times +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A basic approach is to run the ``augur filter`` commands directly. This works +well for ad-hoc analyses. + +.. code-block:: bash + + # 1. Sample 100 sequences from Washington state + augur filter \ + --sequences sequences.fasta \ + --metadata metadata.tsv \ + --query "state == 'WA'" \ + --subsample-max-sequences 100 \ + --output-strains sample_strains_state.txt + + # 2. Sample 50 sequences from the rest of the United States + augur filter \ + --sequences sequences.fasta \ + --metadata metadata.tsv \ + --query "state != 'WA' & country == 'USA'" \ + --subsample-max-sequences 50 \ + --output-strains sample_strains_country.txt + + # 3. Combine using augur filter + augur filter \ + --sequences sequences.fasta \ + --metadata metadata.tsv \ + --exclude-all \ + --include sample_strains_state.txt \ + sample_strains_country.txt \ + --output-sequences subsampled_sequences.fasta \ + --output-metadata subsampled_metadata.tsv + +Each intermediate sample is represented by a strain list file obtained from +``--output-strains``. The final step uses ``augur filter`` with ``--exclude-all`` +and ``--include`` to sample the data based on the intermediate strain list +files. If the same strain appears in both files, ``augur filter`` will only +write it once in each of the final outputs. + +Generalizing subsampling in a workflow +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The approach above can be cumbersome with more intermediate samples. To +generalize this process and allow for more flexibility, a workflow management +system can be used. The following examples use `Snakemake`_. + +1. Add a section in the `config file`_. + + .. code-block:: yaml + + subsampling: + state: --query "state == 'WA'" --subsample-max-sequences 100 + country: --query "state != 'WA' & country == 'USA'" --subsample-max-sequences 50 + +2. Add two rules in a `Snakefile`_. If you are building a standard Nextstrain + workflow, the output files should be used as input to sequence alignment. See + :doc:`docs.nextstrain.org:learn/parts` to learn more about the placement of + this step within a workflow. + + .. code-block:: python + + # 1. Sample 100 sequences from Washington state + # 2. Sample 50 sequences from the rest of the United States + rule intermediate_sample: + input: + metadata = "data/metadata.tsv", + output: + strains = "results/sample_strains_{sample_name}.txt", + params: + augur_filter_args = lambda wildcards: config.get("subsampling", {}).get(wildcards.sample_name, "") + shell: + """ + augur filter \ + --metadata {input.metadata} \ + {params.augur_filter_args} \ + --output-strains {output.strains} + """ + + # 3. Combine using augur filter + rule combine_intermediate_samples: + input: + sequences = "data/sequences.fasta", + metadata = "data/metadata.tsv", + intermediate_sample_strains = expand("results/sample_strains_{sample_name}.txt", sample_name=list(config.get("subsampling", {}).keys())) + output: + sequences = "results/subsampled_sequences.fasta", + metadata = "results/subsampled_metadata.tsv", + shell: + """ + augur filter \ + --sequences {input.sequences} \ + --metadata {input.metadata} \ + --exclude-all \ + --include {input.intermediate_sample_strains} \ + --output-sequences {output.sequences} \ + --output-metadata {output.metadata} + """ + +3. Run Snakemake targeting the second rule. + + .. code-block:: bash + + snakemake combine_intermediate_samples + +Explanation: + +- The configuration section consists of one entry per intermediate sample in the + format ``sample_name: ``. +- The first rule is run once per intermediate sample using `wildcards`_ and an + `input function`_. The output of each run is the sampled strain list. +- The second rule uses `expand()`_ to define input as all the intermediate + sampled strain lists, which are passed directly to ``--include`` as done in + the previous example. + +It is easy to add or remove intermediate samples. The configuration above can be +updated to add another tier in between state and country: + + .. code-block:: yaml + + subsampling: + state: --query "state == 'WA'" --subsample-max-sequences 100 + neighboring_states: --query "state in {'CA', 'ID', 'OR', 'NV'}" --subsample-max-sequences 75 + country: --query "country == 'USA' & state not in {'WA', 'CA', 'ID', 'OR', 'NV'}" --subsample-max-sequences 50 + +.. _Snakemake: https://snakemake.readthedocs.io/en/stable/index.html +.. _config file: https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html#snakefiles-standard-configuration +.. _Snakefile: https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html +.. _wildcards: https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#wildcards +.. _input function: https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#snakefiles-input-functions +.. _expand(): https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#the-expand-function From 7a50339b3105500d6d6815d3dc75e9d4f4d30eb0 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:24:17 -0700 Subject: [PATCH 2/5] Use indent size of 3 spaces For consistency with other files in this project. Retain separate indent sizes for code blocks. --- .../filtering-and-subsampling.rst | 203 +++++++++--------- 1 file changed, 101 insertions(+), 102 deletions(-) diff --git a/src/guides/bioinformatics/filtering-and-subsampling.rst b/src/guides/bioinformatics/filtering-and-subsampling.rst index 22a63d1f..6a8c47b6 100644 --- a/src/guides/bioinformatics/filtering-and-subsampling.rst +++ b/src/guides/bioinformatics/filtering-and-subsampling.rst @@ -16,12 +16,12 @@ A simple example use of this command would be .. code-block:: bash - augur filter \ - --sequences data/sequences.fasta \ - --metadata data/metadata.tsv \ - --min-date 2012 \ - --output-sequences filtered_sequences.fasta \ - --output-metadata filtered_metadata.tsv + augur filter \ + --sequences data/sequences.fasta \ + --metadata data/metadata.tsv \ + --min-date 2012 \ + --output-sequences filtered_sequences.fasta \ + --output-metadata filtered_metadata.tsv This command will select all sequences with collection date in 2012 or later. The filter command has a large number of options that allow flexible filtering for many common situations. @@ -30,21 +30,21 @@ These can be specified in a separate text file (e.g. ``exclude.txt``): .. code-block:: - BRA/2016/FC_DQ75D1 - COL/FLR_00034/2015 - ... + BRA/2016/FC_DQ75D1 + COL/FLR_00034/2015 + ... To drop such strains, you can pass the filename to ``--exclude``: .. code-block:: bash - augur filter \ - --sequences data/sequences.fasta \ - --metadata data/metadata.tsv \ - --min-date 2012 \ - --exclude exclude.txt \ - --output-sequences filtered_sequences.fasta \ - --output-metadata filtered_metadata.tsv + augur filter \ + --sequences data/sequences.fasta \ + --metadata data/metadata.tsv \ + --min-date 2012 \ + --exclude exclude.txt \ + --output-sequences filtered_sequences.fasta \ + --output-metadata filtered_metadata.tsv Subsampling within ``augur filter`` ----------------------------------- @@ -54,15 +54,15 @@ The filter command allows you to select a specific number of sequences from spec .. code-block:: bash - augur filter \ - --sequences data/sequences.fasta \ - --metadata data/metadata.tsv \ - --min-date 2012 \ - --exclude exclude.txt \ - --group-by country year month \ - --sequences-per-group 1 \ - --output-sequences subsampled_sequences.fasta \ - --output-metadata subsampled_metadata.tsv + augur filter \ + --sequences data/sequences.fasta \ + --metadata data/metadata.tsv \ + --min-date 2012 \ + --exclude exclude.txt \ + --group-by country year month \ + --sequences-per-group 1 \ + --output-sequences subsampled_sequences.fasta \ + --output-metadata subsampled_metadata.tsv Subsampling using multiple ``augur filter`` commands ---------------------------------------------------- @@ -73,16 +73,16 @@ mutually exclusive sets of filters, each representing a "tier", are sampled with different subsampling rules. This is commonly used to create geographic tiers. Consider this subsampling scheme: - Sample 100 sequences from Washington state and 50 sequences from the rest of the United States. + Sample 100 sequences from Washington state and 50 sequences from the rest of the United States. This cannot be done in a single call to ``augur filter``. Instead, it can be decomposed into multiple schemes, each handled by a single call to ``augur filter``. Additionally, there is an extra step to combine the intermediate samples. - 1. Sample 100 sequences from Washington state. - 2. Sample 50 sequences from the rest of the United States. - 3. Combine the samples. + 1. Sample 100 sequences from Washington state. + 2. Sample 50 sequences from the rest of the United States. + 3. Combine the samples. Calling ``augur filter`` multiple times ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -92,31 +92,31 @@ well for ad-hoc analyses. .. code-block:: bash - # 1. Sample 100 sequences from Washington state - augur filter \ - --sequences sequences.fasta \ - --metadata metadata.tsv \ - --query "state == 'WA'" \ - --subsample-max-sequences 100 \ - --output-strains sample_strains_state.txt - - # 2. Sample 50 sequences from the rest of the United States - augur filter \ - --sequences sequences.fasta \ - --metadata metadata.tsv \ - --query "state != 'WA' & country == 'USA'" \ - --subsample-max-sequences 50 \ - --output-strains sample_strains_country.txt - - # 3. Combine using augur filter - augur filter \ - --sequences sequences.fasta \ - --metadata metadata.tsv \ - --exclude-all \ - --include sample_strains_state.txt \ - sample_strains_country.txt \ - --output-sequences subsampled_sequences.fasta \ - --output-metadata subsampled_metadata.tsv + # 1. Sample 100 sequences from Washington state + augur filter \ + --sequences sequences.fasta \ + --metadata metadata.tsv \ + --query "state == 'WA'" \ + --subsample-max-sequences 100 \ + --output-strains sample_strains_state.txt + + # 2. Sample 50 sequences from the rest of the United States + augur filter \ + --sequences sequences.fasta \ + --metadata metadata.tsv \ + --query "state != 'WA' & country == 'USA'" \ + --subsample-max-sequences 50 \ + --output-strains sample_strains_country.txt + + # 3. Combine using augur filter + augur filter \ + --sequences sequences.fasta \ + --metadata metadata.tsv \ + --exclude-all \ + --include sample_strains_state.txt \ + sample_strains_country.txt \ + --output-sequences subsampled_sequences.fasta \ + --output-metadata subsampled_metadata.tsv Each intermediate sample is represented by a strain list file obtained from ``--output-strains``. The final step uses ``augur filter`` with ``--exclude-all`` @@ -133,61 +133,60 @@ system can be used. The following examples use `Snakemake`_. 1. Add a section in the `config file`_. - .. code-block:: yaml + .. code-block:: yaml - subsampling: - state: --query "state == 'WA'" --subsample-max-sequences 100 - country: --query "state != 'WA' & country == 'USA'" --subsample-max-sequences 50 + subsampling: + state: --query "state == 'WA'" --subsample-max-sequences 100 + country: --query "state != 'WA' & country == 'USA'" --subsample-max-sequences 50 2. Add two rules in a `Snakefile`_. If you are building a standard Nextstrain workflow, the output files should be used as input to sequence alignment. See :doc:`docs.nextstrain.org:learn/parts` to learn more about the placement of this step within a workflow. - .. code-block:: python - - # 1. Sample 100 sequences from Washington state - # 2. Sample 50 sequences from the rest of the United States - rule intermediate_sample: - input: - metadata = "data/metadata.tsv", - output: - strains = "results/sample_strains_{sample_name}.txt", - params: - augur_filter_args = lambda wildcards: config.get("subsampling", {}).get(wildcards.sample_name, "") - shell: - """ - augur filter \ - --metadata {input.metadata} \ - {params.augur_filter_args} \ - --output-strains {output.strains} - """ - - # 3. Combine using augur filter - rule combine_intermediate_samples: - input: - sequences = "data/sequences.fasta", - metadata = "data/metadata.tsv", - intermediate_sample_strains = expand("results/sample_strains_{sample_name}.txt", sample_name=list(config.get("subsampling", {}).keys())) - output: - sequences = "results/subsampled_sequences.fasta", - metadata = "results/subsampled_metadata.tsv", - shell: - """ - augur filter \ - --sequences {input.sequences} \ - --metadata {input.metadata} \ - --exclude-all \ - --include {input.intermediate_sample_strains} \ - --output-sequences {output.sequences} \ - --output-metadata {output.metadata} - """ + .. code-block:: python + + # 1. Sample 100 sequences from Washington state + # 2. Sample 50 sequences from the rest of the United States + rule intermediate_sample: + input: + metadata = "data/metadata.tsv", + output: + strains = "results/sample_strains_{sample_name}.txt", + params: + augur_filter_args = lambda wildcards: config.get("subsampling", {}).get(wildcards.sample_name, "") + shell: + """ + augur filter \ + --metadata {input.metadata} \ + {params.augur_filter_args} \ + --output-strains {output.strains} + """ + # 3. Combine using augur filter + rule combine_intermediate_samples: + input: + sequences = "data/sequences.fasta", + metadata = "data/metadata.tsv", + intermediate_sample_strains = expand("results/sample_strains_{sample_name}.txt", sample_name=list(config.get("subsampling", {}).keys())) + output: + sequences = "results/subsampled_sequences.fasta", + metadata = "results/subsampled_metadata.tsv", + shell: + """ + augur filter \ + --sequences {input.sequences} \ + --metadata {input.metadata} \ + --exclude-all \ + --include {input.intermediate_sample_strains} \ + --output-sequences {output.sequences} \ + --output-metadata {output.metadata} + """ 3. Run Snakemake targeting the second rule. - .. code-block:: bash + .. code-block:: bash - snakemake combine_intermediate_samples + snakemake combine_intermediate_samples Explanation: @@ -204,10 +203,10 @@ updated to add another tier in between state and country: .. code-block:: yaml - subsampling: - state: --query "state == 'WA'" --subsample-max-sequences 100 - neighboring_states: --query "state in {'CA', 'ID', 'OR', 'NV'}" --subsample-max-sequences 75 - country: --query "country == 'USA' & state not in {'WA', 'CA', 'ID', 'OR', 'NV'}" --subsample-max-sequences 50 + subsampling: + state: --query "state == 'WA'" --subsample-max-sequences 100 + neighboring_states: --query "state in {'CA', 'ID', 'OR', 'NV'}" --subsample-max-sequences 75 + country: --query "country == 'USA' & state not in {'WA', 'CA', 'ID', 'OR', 'NV'}" --subsample-max-sequences 50 .. _Snakemake: https://snakemake.readthedocs.io/en/stable/index.html .. _config file: https://snakemake.readthedocs.io/en/stable/snakefiles/configuration.html#snakefiles-standard-configuration From a21e2cd6644a43e229e6f4c61afaad113e951769 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:25:53 -0700 Subject: [PATCH 3/5] Word wrap at 80 characters For consistency within this file. --- .../filtering-and-subsampling.rst | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/guides/bioinformatics/filtering-and-subsampling.rst b/src/guides/bioinformatics/filtering-and-subsampling.rst index 6a8c47b6..751fa56e 100644 --- a/src/guides/bioinformatics/filtering-and-subsampling.rst +++ b/src/guides/bioinformatics/filtering-and-subsampling.rst @@ -11,8 +11,8 @@ sample data. Filtering --------- -The filter command allows you to select various subsets of your input data for different types of analysis. -A simple example use of this command would be +The filter command allows you to select various subsets of your input data for +different types of analysis. A simple example use of this command would be .. code-block:: bash @@ -24,9 +24,11 @@ A simple example use of this command would be --output-metadata filtered_metadata.tsv This command will select all sequences with collection date in 2012 or later. -The filter command has a large number of options that allow flexible filtering for many common situations. -One such use-case is the exclusion of sequences that are known to be outliers (e.g. because of sequencing errors, cell-culture adaptation, ...). -These can be specified in a separate text file (e.g. ``exclude.txt``): +The filter command has a large number of options that allow flexible filtering +for many common situations. One such use-case is the exclusion of sequences that +are known to be outliers (e.g. because of sequencing errors, cell-culture +adaptation, ...). These can be specified in a separate text file (e.g. +``exclude.txt``): .. code-block:: @@ -49,8 +51,11 @@ To drop such strains, you can pass the filename to ``--exclude``: Subsampling within ``augur filter`` ----------------------------------- -Another common filtering operation is subsetting of data to a achieve a more even spatio-temporal distribution or to cut-down data set size to more manageable numbers. -The filter command allows you to select a specific number of sequences from specific groups, for example one sequence per month from each country: +Another common filtering operation is subsetting of data to a achieve a more +even spatio-temporal distribution or to cut-down data set size to more +manageable numbers. The filter command allows you to select a specific number of +sequences from specific groups, for example one sequence per month from each +country: .. code-block:: bash From 5790eba38d282b5feaa3da53206baa571ad7f16d Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:27:52 -0700 Subject: [PATCH 4/5] Update heading punctuation character For consistency with other files in this project. --- .../bioinformatics/filtering-and-subsampling.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/guides/bioinformatics/filtering-and-subsampling.rst b/src/guides/bioinformatics/filtering-and-subsampling.rst index 751fa56e..9eed64a8 100644 --- a/src/guides/bioinformatics/filtering-and-subsampling.rst +++ b/src/guides/bioinformatics/filtering-and-subsampling.rst @@ -9,7 +9,7 @@ sample data. :local: Filtering ---------- +========= The filter command allows you to select various subsets of your input data for different types of analysis. A simple example use of this command would be @@ -49,7 +49,7 @@ To drop such strains, you can pass the filename to ``--exclude``: --output-metadata filtered_metadata.tsv Subsampling within ``augur filter`` ------------------------------------ +=================================== Another common filtering operation is subsetting of data to a achieve a more even spatio-temporal distribution or to cut-down data set size to more @@ -70,7 +70,7 @@ country: --output-metadata subsampled_metadata.tsv Subsampling using multiple ``augur filter`` commands ----------------------------------------------------- +==================================================== There are some subsampling strategies in which a single call to ``augur filter`` does not suffice. One such strategy is "tiered subsampling". In this strategy, @@ -90,7 +90,7 @@ samples. 3. Combine the samples. Calling ``augur filter`` multiple times -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +--------------------------------------- A basic approach is to run the ``augur filter`` commands directly. This works well for ad-hoc analyses. @@ -130,7 +130,7 @@ files. If the same strain appears in both files, ``augur filter`` will only write it once in each of the final outputs. Generalizing subsampling in a workflow -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-------------------------------------- The approach above can be cumbersome with more intermediate samples. To generalize this process and allow for more flexibility, a workflow management From 1f77c01c3793904235f4929b0564ecb4536a8a69 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:30:23 -0700 Subject: [PATCH 5/5] Reference internal page directly The external reference is no longer necessary now that the contents are in this project. --- src/conf.py | 2 -- src/guides/bioinformatics/filtering-and-subsampling.rst | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/conf.py b/src/conf.py index 25ef461d..15b73f5f 100644 --- a/src/conf.py +++ b/src/conf.py @@ -82,8 +82,6 @@ 'augur': ('https://docs.nextstrain.org/projects/augur/page/', None), 'auspice': ('https://docs.nextstrain.org/projects/auspice/page/', None), 'cli': ('https://docs.nextstrain.org/projects/cli/page/', None), - # For externally sourced snippets that reference this docs project - 'docs.nextstrain.org': ('https://docs.nextstrain.org/page/', None), 'nextclade': ('https://docs.nextstrain.org/projects/nextclade/page/', None), 'ncov': ('https://docs.nextstrain.org/projects/ncov/page/', None) } diff --git a/src/guides/bioinformatics/filtering-and-subsampling.rst b/src/guides/bioinformatics/filtering-and-subsampling.rst index 9eed64a8..f6fa8f77 100644 --- a/src/guides/bioinformatics/filtering-and-subsampling.rst +++ b/src/guides/bioinformatics/filtering-and-subsampling.rst @@ -146,7 +146,7 @@ system can be used. The following examples use `Snakemake`_. 2. Add two rules in a `Snakefile`_. If you are building a standard Nextstrain workflow, the output files should be used as input to sequence alignment. See - :doc:`docs.nextstrain.org:learn/parts` to learn more about the placement of + :doc:`../../learn/parts` to learn more about the placement of this step within a workflow. .. code-block:: python