Skip to content

Commit

Permalink
Warn users that --probabilistic-sampling is deprecated
Browse files Browse the repository at this point in the history
Adds text to the flag's help and also issues a warning to users who
explicitly request the flag. Note that we remove the default `True`
value from the flag so we know when users have requested this option.
  • Loading branch information
huddlej committed Jan 5, 2021
1 parent 9c52421 commit 9d21aad
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
12 changes: 10 additions & 2 deletions augur/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

comment_char = '#'


def read_vcf(filename):
if filename.lower().endswith(".gz"):
import gzip
Expand Down Expand Up @@ -101,7 +100,7 @@ def register_arguments(parser):
subsample_group.add_argument('--subsample-max-sequences', type=int, help="subsample to no more than this number of sequences")
parser.add_argument('--group-by', nargs='+', help="categories with respect to subsample; two virtual fields, \"month\" and \"year\", are supported if they don't already exist as real fields but a \"date\" field does exist")
probabilistic_sampling_group = parser.add_mutually_exclusive_group()
probabilistic_sampling_group.add_argument('--probabilistic-sampling', action='store_true', default=True, help="Enable probabilistic sampling during subsampling. This is useful when there are more groups than requested sequences. This option only applies when `--subsample-max-sequences` is provided.")
probabilistic_sampling_group.add_argument('--probabilistic-sampling', action='store_true', help="(deprecated) Enable probabilistic sampling during subsampling. This is useful when there are more groups than requested sequences. This option only applies when `--subsample-max-sequences` is provided.")
probabilistic_sampling_group.add_argument('--no-probabilistic-sampling', action='store_true', help="Disable probabilistic sampling during subsampling, requiring that there must be no more subsampling groups than the requested maximum number of sequences. This option only applies when `--subsample-max-sequences` is provided.")
parser.add_argument('--subsample-seed', help="random number generator seed to allow reproducible sub-sampling (with same input data). Can be number or string.")
parser.add_argument('--exclude-where', nargs='+',
Expand Down Expand Up @@ -139,6 +138,15 @@ def run(args):
# user has explicitly requested so.
use_probabilistic_sampling = not args.no_probabilistic_sampling

# If the user requested probabilistic sampling, let them know the flag is
# deprecated.
if args.probabilistic_sampling:
print(
"WARNING: Probabilistic sampling is now the default behavior.",
"The `--probabilistic_sampling` is deprecated and will be removed in Augur version 12.",
file=sys.stderr
)

####Read in files

#If VCF, open and get sequence names
Expand Down
1 change: 1 addition & 0 deletions tests/functional/filter.t
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Explicitly use probabilistic subsampling to handle the case when there are more
> --subsample-seed 314159 \
> --probabilistic-sampling \
> --output "$TMP/filtered.fasta" > /dev/null
WARNING: Probabilistic sampling is now the default behavior. The `--probabilistic_sampling` is deprecated and will be removed in Augur version 12.
$ rm -f "$TMP/filtered.fasta"

Using the default probabilistic subsampling, should work the same as the previous case.
Expand Down

0 comments on commit 9d21aad

Please sign in to comment.