Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

frequencies: Fix --regions flag #1424

Merged
merged 5 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
### Bug Fixes

* filter: Updated the help and report text of `--min-length` to explicitly state that the minimum length filter only counts standard nucleotide characters A, C, G, or T (case-insensitive). This has been the behavior since version 3.0.3.dev1, but has never been explicitly documented. [#1422][] (@joverlee521)
* frequencies: Fixed a bug introduced in 24.2.0 and 24.1.0 that prevented `--regions` from working when providing regions other than the default "global" region. [#1424]

[#1422]: https://github.com/nextstrain/augur/pull/1422
[#1424]: https://github.com/nextstrain/augur/pull/1424

## 24.2.2 (16 February 2024)

Expand Down
20 changes: 16 additions & 4 deletions augur/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from .io.metadata import DEFAULT_DELIMITERS, DEFAULT_ID_COLUMNS, METADATA_DATE_COLUMN, InvalidDelimiter, Metadata, read_metadata
from .utils import write_json

REGION_COLUMN = 'region'
DEFAULT_REGION = 'global'

def register_parser(parent_subparsers):
parser = parent_subparsers.add_parser("frequencies", help=__doc__)
Expand All @@ -26,8 +28,10 @@ def register_parser(parent_subparsers):
help="delimiters to accept when reading a metadata file. Only one delimiter will be inferred.")
parser.add_argument('--metadata-id-columns', default=DEFAULT_ID_COLUMNS, nargs="+",
help="names of possible metadata columns containing identifier information, ordered by priority. Only one ID column will be inferred.")
parser.add_argument('--regions', type=str, nargs='+', default=['global'],
help="region to subsample to")
parser.add_argument('--regions', type=str, nargs='+', default=[DEFAULT_REGION],
help="region to filter to. " \
f"Regions should match values in the {REGION_COLUMN!r} column of the metadata file " \
f"if specifying values other than the default {DEFAULT_REGION!r} region.")
parser.add_argument("--pivot-interval", type=int, default=3,
help="number of units between pivots")
parser.add_argument("--pivot-interval-units", type=str, default="months", choices=['months', 'weeks'],
Expand Down Expand Up @@ -97,6 +101,11 @@ def run(args):
columns_to_load = [metadata_object.id_column, METADATA_DATE_COLUMN]
if args.weights_attribute:
columns_to_load.append(args.weights_attribute)

filter_to_region = any(region != DEFAULT_REGION for region in args.regions)
if filter_to_region:
columns_to_load.append(REGION_COLUMN)

metadata = read_metadata(
args.metadata,
delimiters=[metadata_object.delimiter],
Expand Down Expand Up @@ -130,6 +139,9 @@ def run(args):
# Annotate tip with weight attribute.
tip.attr[weights_attribute] = metadata.loc[tip.name, weights_attribute]

if filter_to_region:
tip.attr[REGION_COLUMN] = metadata.loc[tip.name, REGION_COLUMN]

if args.method == "diffusion":
# estimate tree frequencies
pivots = get_pivots(tps, args.pivot_interval, args.min_date, args.max_date, args.pivot_interval_units)
Expand All @@ -139,10 +151,10 @@ def run(args):
for region in args.regions:
# Omit strains sampled prior to the first pivot from frequency calculations.
# (these tend to be reference strains included for phylogenetic context)
if region=='global':
if region==DEFAULT_REGION:
node_filter_func = lambda node: node.attr["num_date"] >= pivots[0]
joverlee521 marked this conversation as resolved.
Show resolved Hide resolved
else:
node_filter_func = lambda node: (node.attr["region"] == region
node_filter_func = lambda node: (node.attr[REGION_COLUMN] == region
and node.attr["num_date"] >= pivots[0])

tree_freqs = tree_frequencies(tree, pivots, method='SLSQP',
Expand Down
247 changes: 247 additions & 0 deletions tests/functional/frequencies/cram/diffusion-region.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
Setup

$ source "$TESTDIR"/_setup.sh

Calculate diffusion-based tip frequencies from a refined tree with `--regions`.

$ ${AUGUR} frequencies \
> --method diffusion \
> --tree "$TESTDIR/../data/tree.nwk" \
> --metadata "$TESTDIR/../data/metadata.tsv" \
> --regions "global" "North America" "South America" \
> --pivot-interval 3 \
> --output tip-frequencies.json > /dev/null

$ cat tip-frequencies.json
{
"BRA/2016/FC_6706": {
"North America": [
0.0,
0.0,
0.0,
0.0
],
"South America": [
0.2,
0.2,
0.2,
0.2
],
"global": [
0.1,
0.1,
0.1,
0.1
]
},
"COL/FLR_00008/2015": {
"North America": [
0.0,
0.0,
0.0,
0.0
],
"South America": [
0.2,
0.2,
0.2,
0.2
],
"global": [
0.1,
0.1,
0.1,
0.1
]
},
"Colombia/2016/ZC204Se": {
"North America": [
0.0,
0.0,
0.0,
0.0
],
"South America": [
0.2,
0.2,
0.2,
0.2
],
"global": [
0.1,
0.1,
0.1,
0.1
]
},
"DOM/2016/BB_0183": {
"North America": [
0.25,
0.25,
0.25,
0.25
],
"South America": [
0.0,
0.0,
0.0,
0.0
],
"global": [
0.1,
0.1,
0.1,
0.1
]
},
"EcEs062_16": {
"North America": [
0.0,
0.0,
0.0,
0.0
],
"South America": [
0.2,
0.2,
0.2,
0.2
],
"global": [
0.1,
0.1,
0.1,
0.1
]
},
"HND/2016/HU_ME59": {
"North America": [
0.25,
0.25,
0.25,
0.25
],
"South America": [
0.0,
0.0,
0.0,
0.0
],
"global": [
0.1,
0.1,
0.1,
0.1
]
},
"PAN/CDC_259359_V1_V3/2015": {
"North America": [
0.25,
0.25,
0.25,
0.25
],
"South America": [
0.0,
0.0,
0.0,
0.0
],
"global": [
0.1,
0.1,
0.1,
0.1
]
},
"PRVABC59": {
"North America": [
0.25,
0.25,
0.25,
0.25
],
"South America": [
0.0,
0.0,
0.0,
0.0
],
"global": [
0.1,
0.1,
0.1,
0.1
]
},
"VEN/UF_1/2016": {
"North America": [
0.0,
0.0,
0.0,
0.0
],
"South America": [
0.2,
0.2,
0.2,
0.2
],
"global": [
0.1,
0.1,
0.1,
0.1
]
},
"ZKC2/2016": {
"North America": [
0.0,
0.0,
0.0,
0.0
],
"South America": [
0.0,
0.0,
0.0,
0.0
],
"global": [
0.1,
0.1,
0.1,
0.1
]
},
"counts": {
"North America": [
0,
2,
2,
0
],
"South America": [
0,
2,
3,
0
],
"global": [
0,
5,
5,
0
]
},
"generated_by": {
"program": "augur",
"version": ".*" (re)
},
"pivots": [
2015.7521,
2016.0041,
2016.2527,
2016.5014
]
} (no-eol)
Loading