diff --git a/vendored/.github/workflows/ci.yaml b/vendored/.github/workflows/ci.yaml
index bbf40f72..c6a218a5 100644
--- a/vendored/.github/workflows/ci.yaml
+++ b/vendored/.github/workflows/ci.yaml
@@ -1,9 +1,11 @@
 name: CI
 
 on:
-  - push
-  - pull_request
-  - workflow_dispatch
+  push:
+    branches:
+      - main
+  pull_request:
+  workflow_dispatch:
 
 jobs:
   shellcheck:
@@ -18,4 +20,4 @@ jobs:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v4
       - run: pip install cram
-      - run: cram tests/
+      - run: cram tests/
\ No newline at end of file
diff --git a/vendored/.gitrepo b/vendored/.gitrepo
index fc2150f5..13a71698 100644
--- a/vendored/.gitrepo
+++ b/vendored/.gitrepo
@@ -6,7 +6,7 @@
 [subrepo]
 	remote = https://github.com/nextstrain/ingest
 	branch = main
-	commit = c97df238518171c2b1574bec0349a55855d1e7a7
-	parent = 6ef4dc097df037130845d002e54eb4b7338e3d5b
+	commit = 7617c39fae05e5882c5e6c065c5b47d500c998af
+	parent = 6c0a9cc7a1c3cfc6a055707a0eb661af56befeb6
 	method = merge
 	cmdver = 0.4.6
diff --git a/vendored/README.md b/vendored/README.md
index 533b39ad..fa918913 100644
--- a/vendored/README.md
+++ b/vendored/README.md
@@ -25,6 +25,31 @@ Any future updates of ingest scripts can be pulled in with:
 git subrepo pull ingest/vendored
 ```
 
+If you run into merge conflicts and would like to pull in a fresh copy of the
+latest ingest scripts, pull with the `--force` flag:
+
+```
+git subrepo pull ingest/vendored --force
+```
+
+> **Warning**
+> Beware of rebasing/dropping the parent commit of a `git subrepo` update
+
+`git subrepo` relies on metadata in the `ingest/vendored/.gitrepo` file,
+which includes the hash for the parent commit in the pathogen repos.
+If this hash no longer exists in the commit history, there will be errors when
+running future `git subrepo pull` commands.
+
+If you run into an error similar to the following:
+```
+$ git subrepo pull ingest/vendored
+git-subrepo: Command failed: 'git branch subrepo/ingest/vendored '.
+fatal: not a valid object name: ''
+```
+Check the parent commit hash in the `ingest/vendored/.gitrepo` file and make
+sure the commit exists in the commit history. Update to the appropriate parent
+commit hash if needed.
+
 ## History
 
 Much of this tooling originated in
@@ -72,10 +97,9 @@ Scripts for supporting ingest workflow automation that don’t really belong in
 NCBI interaction scripts that are useful for fetching public metadata and sequences.
 
 - [fetch-from-ncbi-entrez](fetch-from-ncbi-entrez) - Fetch metadata and nucleotide sequences from [NCBI Entrez](https://www.ncbi.nlm.nih.gov/books/NBK25501/) and output to a GenBank file.
-  Useful for pathogens with metadata and annotations in custom fields that are not part of the standard [NCBI Virus](https://www.ncbi.nlm.nih.gov/labs/virus/vssi/) or [NCBI Datasets](https://www.ncbi.nlm.nih.gov/datasets/) outputs.
-- [fetch-from-ncbi-virus](fetch-from-ncbi-virus) - Fetch metadata and nucleotide sequences from [NCBI Virus](https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/) and output NDJSON records to stdout.
-- [ncbi-virus-url](ncbi-virus-url) - Generates the URL to download metadata and sequences from NCBI Virus as a single CSV file.
-- [csv-to-ndjson](csv-to-ndjson) - Converts CSV file to NDJSON file with a hard-coded 200MiB field size limit to accommodate sequences in the NCBI Virus download.
+  Useful for pathogens with metadata and annotations in custom fields that are not part of the standard [NCBI Datasets](https://www.ncbi.nlm.nih.gov/datasets/) outputs.
+
+Historically, some pathogen repos used the undocumented NCBI Virus API through [fetch-from-ncbi-virus](https://github.com/nextstrain/ingest/blob/c97df238518171c2b1574bec0349a55855d1e7a7/fetch-from-ncbi-virus) to fetch data. However we've opted to drop the NCBI Virus scripts due to https://github.com/nextstrain/ingest/issues/18.
 
 Potential Nextstrain CLI scripts
 
@@ -97,6 +121,7 @@ Potential augur curate scripts
 - [transform-authors](transform-authors) - Abbreviates full author lists to '<first author> et al.'
 - [transform-field-names](transform-field-names) - Rename fields of NDJSON records
 - [transform-genbank-location](transform-genbank-location) - Parses `location` field with the expected pattern `"<country_value>[:<region>][, <locality>]"` based on [GenBank's country field](https://www.ncbi.nlm.nih.gov/genbank/collab/country/)
+- [transform-strain-names](transform-strain-names) - Ordered search for strain names across several fields.
 
 ## Software requirements
 
diff --git a/vendored/csv-to-ndjson b/vendored/csv-to-ndjson
deleted file mode 100755
index 84befe08..00000000
--- a/vendored/csv-to-ndjson
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env python3
-"""
-Convert CSV on stdin to NDJSON on stdout.
-usage: `cat dummy.csv | ./csv-to-ndjson > dummy.ndjson`
-"""
-import csv
-import json
-from sys import stdin, stdout
-
-# 200 MiB; default is 128 KiB
-csv.field_size_limit(200 * 1024 * 1024)
-
-for row in csv.DictReader(stdin):
-    json.dump(row, stdout, allow_nan = False, indent = None, separators = ',:')
-    print()
diff --git a/vendored/docs/ncbi-virus-all-fields-example.json b/vendored/docs/ncbi-virus-all-fields-example.json
deleted file mode 100644
index bbf9a7f7..00000000
--- a/vendored/docs/ncbi-virus-all-fields-example.json
+++ /dev/null
@@ -1,292 +0,0 @@
-{
-    "ExportDate_dt": "2023-08-08T21:02:01.475Z",
-    "QualNum_i": 0,
-    "QualPct_d": 0.0,
-    "IncompleteCdsCnt_i": 0,
-    "gi_l": 1798174254,
-    "Host_s": "Homo sapiens",
-    "HostSpecies_s": "Homo sapiens (human), taxid:9606|",
-    "HostLineage_ss": [
-        "cellular organisms, taxid:131567| biota",
-        "Eukaryota (eucaryotes), taxid:2759| eukaryotes Eucarya Eucaryotae Eukarya Eukaryotae",
-        "Opisthokonta, taxid:33154| Fungi/Metazoa group opisthokonts",
-        "Metazoa (metazoans), taxid:33208| multicellular animals Animalia animals",
-        "Eumetazoa, taxid:6072|",
-        "Bilateria, taxid:33213|",
-        "Deuterostomia (deuterostomes), taxid:33511|",
-        "Chordata (chordates), taxid:7711|",
-        "Craniata, taxid:89593|",
-        "Vertebrata (vertebrates), taxid:7742|",
-        "Gnathostomata (jawed vertebrates), taxid:7776|",
-        "Teleostomi, taxid:117570|",
-        "Euteleostomi (bony vertebrates), taxid:117571|",
-        "Sarcopterygii, taxid:8287|",
-        "Dipnotetrapodomorpha, taxid:1338369|",
-        "Tetrapoda (tetrapods), taxid:32523|",
-        "Amniota (amniotes), taxid:32524|",
-        "Mammalia (mammals), taxid:40674|",
-        "Theria, taxid:32525|",
-        "Eutheria (placentals), taxid:9347| eutherian mammals placental mammals Placentalia",
-        "Boreoeutheria, taxid:1437010| Boreotheria",
-        "Euarchontoglires, taxid:314146|",
-        "Primates, taxid:9443| Primata primates",
-        "Haplorrhini, taxid:376913|",
-        "Simiiformes, taxid:314293| Anthropoidea",
-        "Catarrhini, taxid:9526|",
-        "Hominoidea (apes), taxid:314295| ape",
-        "Hominidae (great apes), taxid:9604| Pongidae",
-        "Homininae, taxid:207598| Homo/Pan/Gorilla group",
-        "Homo (humans), taxid:9605|",
-        "Homo sapiens (human), taxid:9606|"
-    ],
-    "HostLineageId_ss": [
-        "131567",
-        "2759",
-        "33154",
-        "33208",
-        "6072",
-        "33213",
-        "33511",
-        "7711",
-        "89593",
-        "7742",
-        "7776",
-        "117570",
-        "117571",
-        "8287",
-        "1338369",
-        "32523",
-        "32524",
-        "40674",
-        "32525",
-        "9347",
-        "1437010",
-        "314146",
-        "9443",
-        "376913",
-        "314293",
-        "9526",
-        "314295",
-        "9604",
-        "207598",
-        "9605",
-        "9606"
-    ],
-    "Locus_s": "NC_045512",
-    "OrgId_i": 2697049,
-    "VirusFamily_s": "Coronaviridae",
-    "VirusGenus_s": "Betacoronavirus",
-    "VirusSpecies_s": "Severe acute respiratory syndrome-related coronavirus",
-    "VirusSpeciesId_i": 694009,
-    "VirusLineage_ss": [
-        "Viruses, taxid:10239| Vira Viridae viruses",
-        "Riboviria (RNA viruses), taxid:2559587| RNA viruses and viroids",
-        "Orthornavirae, taxid:2732396|",
-        "Pisuviricota, taxid:2732408|",
-        "Pisoniviricetes, taxid:2732506|",
-        "Nidovirales, taxid:76804|",
-        "Cornidovirineae, taxid:2499399|",
-        "Coronaviridae, taxid:11118|",
-        "Orthocoronavirinae, taxid:2501931|",
-        "Betacoronavirus, taxid:694002| Coronavirus",
-        "Sarbecovirus, taxid:2509511|",
-        "Severe acute respiratory syndrome-related coronavirus, taxid:694009| HCoV-SARS SARS SARSr-CoV SARSrCoV",
-        "Severe acute respiratory syndrome coronavirus 2, taxid:2697049| SARS-CoV-2",
-        "RNA viruses"
-    ],
-    "VirusLineageId_ss": [
-        "10239",
-        "2559587",
-        "2732396",
-        "2732408",
-        "2732506",
-        "76804",
-        "2499399",
-        "11118",
-        "2501931",
-        "694002",
-        "2509511",
-        "694009",
-        "2697049"
-    ],
-    "VirusL0_s": "RNA viruses",
-    "VirusL1_s": "Orthornavirae, taxid:2732396",
-    "VirusL2_s": "Pisuviricota, taxid:2732408",
-    "VirusL3_s": "Pisoniviricetes, taxid:2732506",
-    "VirusL4_s": "Nidovirales, taxid:76804",
-    "VirusL5_s": "Cornidovirineae, taxid:2499399",
-    "VirusL6_s": "Coronaviridae, taxid:11118",
-    "VirusL7_s": "Orthocoronavirinae, taxid:2501931",
-    "VirusL8_s": "Betacoronavirus, taxid:694002",
-    "VirusL9_s": "Sarbecovirus, taxid:2509511",
-    "VirusL10_s": "Severe acute respiratory syndrome-related coronavirus, taxid:694009",
-    "ViralHost_ss": [
-        "human",
-        "vertebrates"
-    ],
-    "GenomicMoltype_s": "ssRNA(+)",
-    "SLen_i": 29903,
-    "Flags_ss": [
-        "refseq",
-        "complete"
-    ],
-    "Flags_csv": "refseq, complete",
-    "FlagsCount_i": 2,
-    "SetAcc_s": "GCF_009858895.2",
-    "Authors_ss": [
-        "Wu,F.",
-        "Zhao,S.",
-        "Yu,B.",
-        "Chen,Y.M.",
-        "Wang,W.",
-        "Song,Z.G.",
-        "Hu,Y.",
-        "Tao,Z.W.",
-        "Tian,J.H.",
-        "Pei,Y.Y.",
-        "Yuan,M.L.",
-        "Zhang,Y.L.",
-        "Dai,F.H.",
-        "Liu,Y.",
-        "Wang,Q.M.",
-        "Zheng,J.J.",
-        "Xu,L.",
-        "Holmes,E.C.",
-        "Zhang,Y.Z.",
-        "Baranov,P.V.",
-        "Henderson,C.M.",
-        "Anderson,C.B.",
-        "Gesteland,R.F.",
-        "Atkins,J.F.",
-        "Howard,M.T.",
-        "Robertson,M.P.",
-        "Igel,H.",
-        "Baertsch,R.",
-        "Haussler,D.",
-        "Ares,M. Jr.",
-        "Scott,W.G.",
-        "Williams,G.D.",
-        "Chang,R.Y.",
-        "Brian,D.A.",
-        "Chen,Y.-M.",
-        "Song,Z.-G.",
-        "Tao,Z.-W.",
-        "Tian,J.-H.",
-        "Pei,Y.-Y.",
-        "Zhang,Y.-L.",
-        "Dai,F.-H.",
-        "Wang,Q.-M.",
-        "Zheng,J.-J.",
-        "Zhang,Y.-Z."
-    ],
-    "Authors_csv": "Wu,F., Zhao,S., Yu,B., Chen,Y.M., Wang,W., Song,Z.G., Hu,Y., Tao,Z.W., Tian,J.H., Pei,Y.Y., Yuan,M.L., Zhang,Y.L., Dai,F.H., Liu,Y., Wang,Q.M., Zheng,J.J., Xu,L., Holmes,E.C., Zhang,Y.Z., Baranov,P.V., Henderson,C.M., Anderson,C.B., Gesteland,R.F., Atkins,J.F., Howard,M.T., Robertson,M.P., Igel,H., Baertsch,R., Haussler,D., Ares,M. Jr., Scott,W.G., Williams,G.D., Chang,R.Y., Brian,D.A., Chen,Y.-M., Song,Z.-G., Tao,Z.-W., Tian,J.-H., Pei,Y.-Y., Zhang,Y.-L., Dai,F.-H., Wang,Q.-M., Zheng,J.-J., Zhang,Y.-Z.",
-    "AuthorsCount_i": 44,
-    "Country_s": "China",
-    "Isolate_s": "Wuhan-Hu-1",
-    "Lineage_s": "B",
-    "Division_s": "VRL",
-    "Keywords_ss": [
-        "RefSeq"
-    ],
-    "KeywordsCount_i": 1,
-    "TaxName_s": "Severe acute respiratory syndrome coronavirus 2",
-    "Region_s": "Asia",
-    "ParentAcc_s": "set:NC_045512",
-    "SetPosition_i": 0,
-    "SourceDB_s": "RefSeq",
-    "Definition_s": "Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome",
-    "HostId_i": 9606,
-    "CreateDate_dt": "2020-01-13T00:00:00Z",
-    "CreateYear_i": 2020,
-    "Genome_js": "[{\"id\": \"NC_045512.2\", \"segment\": null, \"proteins\": [{\"id\": \"YP_009724389.1\", \"name\": \"ORF1ab polyprotein\", \"location\": \"join(266..13468,13468..21555)\"}, {\"id\": \"YP_009725295.1\", \"name\": \"ORF1a polyprotein\", \"location\": \"266..13483\"}, {\"id\": \"YP_009724390.1\", \"name\": \"surface glycoprotein\", \"location\": \"21563..25384\"}, {\"id\": \"YP_009724391.1\", \"name\": \"ORF3a protein\", \"location\": \"25393..26220\"}, {\"id\": \"YP_009724392.1\", \"name\": \"envelope protein\", \"location\": \"26245..26472\"}, {\"id\": \"YP_009724393.1\", \"name\": \"membrane glycoprotein\", \"location\": \"26523..27191\"}, {\"id\": \"YP_009724394.1\", \"name\": \"ORF6 protein\", \"location\": \"27202..27387\"}, {\"id\": \"YP_009724395.1\", \"name\": \"ORF7a protein\", \"location\": \"27394..27759\"}, {\"id\": \"YP_009725318.1\", \"name\": \"ORF7b\", \"location\": \"27756..27887\"}, {\"id\": \"YP_009724396.1\", \"name\": \"ORF8 protein\", \"location\": \"27894..28259\"}, {\"id\": \"YP_009724397.2\", \"name\": \"nucleocapsid phosphoprotein\", \"location\": \"28274..29533\"}, {\"id\": \"YP_009725255.1\", \"name\": \"ORF10 protein\", \"location\": \"29558..29674\"}]}]",
-    "MolType_s": "RNA",
-    "ProtAcc_ss": [
-        "YP_009724389",
-        "YP_009725295",
-        "YP_009724390",
-        "YP_009724391",
-        "YP_009724392",
-        "YP_009724393",
-        "YP_009724394",
-        "YP_009724395",
-        "YP_009725318",
-        "YP_009724396",
-        "YP_009724397",
-        "YP_009725255"
-    ],
-    "ProtAccCount_i": 12,
-    "UpdateDate_dt": "2020-07-18T00:00:00Z",
-    "UpdateYear_i": 2020,
-    "PubMed_ss": [
-        "32015508",
-        "15680415",
-        "15630477",
-        "10482585"
-    ],
-    "PubMed_csv": "32015508, 15680415, 15630477, 10482585",
-    "PubMedCount_i": 4,
-    "Completeness_s": "complete",
-    "CountryFull_s": "China",
-    "ProtNames_ss": [
-        "ORF1ab polyprotein",
-        "ORF1a polyprotein",
-        "surface glycoprotein",
-        "ORF3a protein",
-        "envelope protein",
-        "membrane glycoprotein",
-        "ORF6 protein",
-        "ORF7a protein",
-        "ORF7b protein",
-        "ORF8 protein",
-        "nucleocapsid phosphoprotein",
-        "ORF10 protein"
-    ],
-    "ProtNamesCount_i": 12,
-    "IsolateParsed_s": "Wuhan-Hu-1",
-    "NuclAcc_ss": [
-        "NC_045512"
-    ],
-    "NuclAccCount_i": 1,
-    "CollectionDate_dr": "2019-12",
-    "CollectionYear_i": 2019,
-    "SubmitterAffil_s": "National Center for Biotechnology Information, NIH",
-    "BioProject_ss": [
-        "PRJNA485481"
-    ],
-    "BioProject_csv": "PRJNA485481",
-    "BioProjectCount_i": 1,
-    "AccVer_s": "NC_045512.2",
-    "CollectionDate_s": "2019-12",
-    "SubmitterCountry_s": "USA",
-    "CollectionDate_dt": "2019-12-01T00:00:00Z",
-    "GenomeCompleteness_s": "complete",
-    "SubmitterAffilFull_s": "National Center for Biotechnology Information, NIH",
-    "BioProject_s": "PRJNA485481",
-    "AccNV_s": "NC_045512",
-    "id": "NC_045512",
-    "SeqType_s": "Nucleotide",
-    "FastaMD5_s": "4928f859a1822d291e0225206a0068c8",
-    "live_i": 1,
-    "ids_ss": [
-        "GCF_009858895",
-        "GCF_009858895.2",
-        "NC_045512",
-        "NC_045512.2",
-        "PRJNA485481",
-        "YP_009724389",
-        "YP_009724390",
-        "YP_009724391",
-        "YP_009724392",
-        "YP_009724393",
-        "YP_009724394",
-        "YP_009724395",
-        "YP_009724396",
-        "YP_009724397",
-        "YP_009725255",
-        "YP_009725295",
-        "YP_009725318",
-        "set:NC_045512"
-    ],
-    "gi_i": 1798174254,
-    "_version_": 1773711315042304000
-}
diff --git a/vendored/fetch-from-ncbi-virus b/vendored/fetch-from-ncbi-virus
deleted file mode 100755
index 39733e6f..00000000
--- a/vendored/fetch-from-ncbi-virus
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env bash
-# usage: fetch-from-ncbi-virus <ncbi_taxon_id> <github_repo> [options]
-#
-# Fetch metadata and nucleotide sequences from [NCBI Virus](https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/)
-# and output NDJSON records to stdout.
-#
-# [options] are passed directly to ncbi-virus-url. See that script for usage details.
-#
-# Originally copied from "bin/fetch-from-genbank" in nextstrain/ncov-ingest:
-#   https://github.com/nextstrain/ncov-ingest/blob/2a5f255329ee5bdf0cabc8b8827a700c92becbe4/bin/fetch-from-genbank
-#
-set -euo pipefail
-
-bin="$(dirname "$0")"
-
-
-main() {
-    local ncbi_taxon_id="${1:?NCBI taxon id is required.}"
-    local github_repo="${2:?A GitHub repository with owner and repository name is required as the second argument}"
-
-    # "${@:3}" represents all other options, if any.
-    ncbi_virus_url="$("$bin"/ncbi-virus-url --ncbi-taxon-id "$ncbi_taxon_id" "${@:3}")"
-
-    fetch "$ncbi_virus_url" "$github_repo" | "$bin"/csv-to-ndjson
-}
-
-fetch() {
-    curl "$1" \
-        --fail --silent --show-error --http1.1 \
-        --header "User-Agent: https://github.com/$2 (hello@nextstrain.org)"
-}
-
-main "$@"
diff --git a/vendored/ncbi-virus-url b/vendored/ncbi-virus-url
deleted file mode 100755
index 0dd116b8..00000000
--- a/vendored/ncbi-virus-url
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/usr/bin/env python3
-"""
-Generate URL to download all virus sequences and their curated metadata for a
-specified NCBI Taxon ID from GenBank via NCBI Virus.
-
-The URL this program builds is based on the URL for SARS-CoV-2 constructed with
-
-    https://github.com/nextstrain/ncov-ingest/blob/2a5f255329ee5bdf0cabc8b8827a700c92becbe4/bin/genbank-url
-
-and observing the network activity at
-
-    https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/virus?SeqType_s=Nucleotide
-"""
-from urllib.parse import urlencode
-from typing import List, Optional
-import argparse
-
-def parse_args():
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--ncbi-taxon-id", required=True,
-        help="NCBI Taxon ID. Visit NCBI virus at " +
-             "https://www.ncbi.nlm.nih.gov/labs/virus/vssi/#/find-data/virus " +
-             "to search for supported taxon IDs.")
-    parser.add_argument("--filters", required=False, nargs="*",
-        help="Filter criteria to add as `fq` param values. " +
-             "Apply filters via the NCBI Virus UI and observe the network " +
-             "activity to find the desired filter string.")
-    parser.add_argument("--fields", required=False, nargs="*",
-        help="Metadata fields to add as `fl` param values. " +
-             "Expected to be formatted as <output_column_name>:<ncbi_virus_field_name>. " +
-             "See docs/ncbi-virus-all-fields-example.json for the available NCBI Virus fields.")
-    return parser.parse_args()
-
-def build_query_url(ncbi_taxon_id: str,
-                    filters: Optional[List[str]]=None,
-                    fields: Optional[List[str]]=None):
-    """
-    Generate URL to download all viral sequences and their curated metadata
-    from GenBank via NCBI Virus.
-    """
-    endpoint = "https://www.ncbi.nlm.nih.gov/genomes/VirusVariation/vvsearch2/"
-    params = {
-        # Search criteria
-        'fq': [
-            '{!tag=SeqType_s}SeqType_s:("Nucleotide")', # Nucleotide sequences (as opposed to protein)
-            f'VirusLineageId_ss:({ncbi_taxon_id})',
-            *(filters or []),
-        ],
-
-        # Unclear, but seems necessary.
-        'q': '*:*',
-
-        # Response format
-        'cmd': 'download',
-        'dlfmt': 'csv',
-        'fl': ','.join(
-            [':'.join(names) for names in [
-                # Pairs of (output column name, source data field).
-                ('genbank_accession',       'id'),
-                ('genbank_accession_rev',   'AccVer_s'),
-                ('database',                'SourceDB_s'),
-                ('strain',                  'Isolate_s'),
-                ('region',                  'Region_s'),
-                ('location',                'CountryFull_s'),
-                ('collected',               'CollectionDate_s'),
-                ('submitted',               'CreateDate_dt'),
-                ('updated',                 'UpdateDate_dt'),
-                ('length',                  'SLen_i'),
-                ('host',                    'Host_s'),
-                ('isolation_source',        'Isolation_csv'),
-                ('bioproject_accession',    'BioProject_s'),
-                ('biosample_accession',     'BioSample_s'),
-                ('sra_accession',           'SRALink_csv'),
-                ('title',                   'Definition_s'),
-                ('authors',                 'Authors_csv'),
-                ('submitting_organization', 'SubmitterAffilFull_s'),
-                ('publications',            'PubMed_csv'),
-                ('sequence',                'Nucleotide_seq'),
-            ]] + (fields or [])
-        ),
-
-        # Stable sort with GenBank accessions.
-        # Columns are source data fields, not our output columns.
-        'sort': 'id asc',
-
-        # This isn't Entrez, but include the same email parameter it requires just
-        # to be nice.
-        'email': 'hello@nextstrain.org',
-    }
-    query = urlencode(params, doseq = True, encoding = "utf-8")
-
-    print(f"{endpoint}?{query}")
-
-def main():
-    args = parse_args()
-    build_query_url(
-        ncbi_taxon_id=args.ncbi_taxon_id,
-        filters=args.filters,
-        fields=args.fields
-    )
-
-if __name__ == '__main__':
-    main()
diff --git a/vendored/tests/fetch-from-ncbi-virus/filter-and-fields.t b/vendored/tests/fetch-from-ncbi-virus/filter-and-fields.t
deleted file mode 100644
index 2fd7020f..00000000
--- a/vendored/tests/fetch-from-ncbi-virus/filter-and-fields.t
+++ /dev/null
@@ -1,18 +0,0 @@
-Get the virus lineage IDs for 4 early Dengue sequences, testing the options --filter and --field.
-
-  $ $TESTDIR/../../fetch-from-ncbi-virus 12637 nextstrain/ingest \
-  > --filters 'CreateDate_dt:([1987-11-29T00:00:00Z TO 1987-11-29T00:00:01Z])' \
-  > --fields 'viruslineage_ids:VirusLineageId_ss'
-  {"genbank_accession":"X05375","genbank_accession_rev":"X05375.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for envelope protein E N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"GTAACTTATGGGACGTGTACCACCACAGGAGAACACAGAAGAGAAAAAAGATCAGTGGCACTCGTTCCACATGTGGGAATGGGACTGGAGACACGAACTGAAACATGGATGTCATCAGAAGGGGCCTGGAAACATGCCCAGAGAATTGAAACTTGGATCTTGAGACATCCAGGCTTTACCATAATGGCAGCAATCCTGGCATACACCATAGGAACGACACATTTCCAAAGAGCCCTGATTTTCATCTTACTGACAGCTGTCGCTCCTTCAATGACAATGCGTTGCATAGGAATATCAAATAGAGACTTTGTAGAAGGGGTTTCAGGAGGAAGCTGGGTTGACATAGTCTTAGAACATGGA","viruslineage_ids":"10239,2559587,2732396,2732406,2732462,2732545,11050,11051,12637,11060"}
-  {"genbank_accession":"X05376","genbank_accession_rev":"X05376.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS1 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"ACAACAATGAGGGGAGCGAAGAGAATGGCCATTTTAGGTGACACAGCTTGGGATTTTGGATCCCTGGGAGGAGTGTTTACATCTATAGGAAAGGCTCTCCACCAAGTTTTCGGAGCAATCTATGGGGCTGCCTTCAGTGGGGTCTCATGGACTATGAAAATCCTCATAGGAGTCATTATCACATGGATAGGAATGAATTCACGCAGCACCTCACTTTCTGTGTCACTAGTATTGGTGGGAGTCGTGACGCTGTATTTGGGAGTTATGGTGCAGGCCGATAGTGGTTGCGTTGTGAGCTGGAAAAACAAAGAACTGAAGTGTGGCAGTGGGATTTTCATCACAGACAACGTGCACACATGG","viruslineage_ids":"10239,2559587,2732396,2732406,2732462,2732545,11050,11051,12637,11060"}
-  {"genbank_accession":"X05377","genbank_accession_rev":"X05377.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS3 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"CTCACTGTGTGCTACGTGCTCACTGGACGATCGGCCGATTTGGAACTGGAGAGAGCCGCCGATGTCAAATGGGAAGATCAGGCAGAGATATCAGGAAGCAGTCCAATCCTGTCAATAACAATATCAGAAGATGGTAGCATGTCGATAAAAAACGAAGAGGAAGAACAAACACTGACCATACTCATTAGAACAGGATTGCTGGTGATCTCAGGACTTTTTCCTGTATCAATACCAATCACGGCAGCAGCATGGTACCTGTGGGAAGTGAAGAAACAACGGGCTGGAGTATTGTGGGATGTCCCTTCACCCCCACCCGTGGGAAAGGCTGAACTGGAAGATGGAGCCTATAGAATCAAGCAA","viruslineage_ids":"10239,2559587,2732396,2732406,2732462,2732545,11050,11051,12637,11060"}
-  {"genbank_accession":"X05378","genbank_accession_rev":"X05378.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS5 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"GATCCAATACCCTATGATCCAAAGTTTGAAAAGCAGTTGGGACAAGTAATGCTCCTAGTCCTCTGCGGGACTCAAGTGTTGATGATGAGGACTACATGGGCTCTGTGTGAGGCTTTAACCTTAGCGACCGGGCCTATCTCCACATTGTGGGAAGGAAATCCAGGGAGGTTTTGGAACACTACCATTGCAGTGTCAATGGCTAACATTTTTAGAGGGAGTTACTTGGCCGGAGCTGGACTTCTCTTTTCCATCATGAAGAACACAACCAACACGAGAAGGGGAACTGGCAACATAGGAGAGACGCTTGGAGAGAAATGGAAAAGCCGATTGAACGCATTGGGGAAAAGTGAATTCCAGATC","viruslineage_ids":"10239,2559587,2732396,2732406,2732462,2732545,11050,11051,12637,11060"}
-
-Do the same but without --field.
-
-  $ $TESTDIR/../../fetch-from-ncbi-virus 12637 nextstrain/ingest \
-  > --filters 'CreateDate_dt:([1987-11-29T00:00:00Z TO 1987-11-29T00:00:01Z])'
-  {"genbank_accession":"X05375","genbank_accession_rev":"X05375.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for envelope protein E N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"GTAACTTATGGGACGTGTACCACCACAGGAGAACACAGAAGAGAAAAAAGATCAGTGGCACTCGTTCCACATGTGGGAATGGGACTGGAGACACGAACTGAAACATGGATGTCATCAGAAGGGGCCTGGAAACATGCCCAGAGAATTGAAACTTGGATCTTGAGACATCCAGGCTTTACCATAATGGCAGCAATCCTGGCATACACCATAGGAACGACACATTTCCAAAGAGCCCTGATTTTCATCTTACTGACAGCTGTCGCTCCTTCAATGACAATGCGTTGCATAGGAATATCAAATAGAGACTTTGTAGAAGGGGTTTCAGGAGGAAGCTGGGTTGACATAGTCTTAGAACATGGA"}
-  {"genbank_accession":"X05376","genbank_accession_rev":"X05376.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS1 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"ACAACAATGAGGGGAGCGAAGAGAATGGCCATTTTAGGTGACACAGCTTGGGATTTTGGATCCCTGGGAGGAGTGTTTACATCTATAGGAAAGGCTCTCCACCAAGTTTTCGGAGCAATCTATGGGGCTGCCTTCAGTGGGGTCTCATGGACTATGAAAATCCTCATAGGAGTCATTATCACATGGATAGGAATGAATTCACGCAGCACCTCACTTTCTGTGTCACTAGTATTGGTGGGAGTCGTGACGCTGTATTTGGGAGTTATGGTGCAGGCCGATAGTGGTTGCGTTGTGAGCTGGAAAAACAAAGAACTGAAGTGTGGCAGTGGGATTTTCATCACAGACAACGTGCACACATGG"}
-  {"genbank_accession":"X05377","genbank_accession_rev":"X05377.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS3 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"CTCACTGTGTGCTACGTGCTCACTGGACGATCGGCCGATTTGGAACTGGAGAGAGCCGCCGATGTCAAATGGGAAGATCAGGCAGAGATATCAGGAAGCAGTCCAATCCTGTCAATAACAATATCAGAAGATGGTAGCATGTCGATAAAAAACGAAGAGGAAGAACAAACACTGACCATACTCATTAGAACAGGATTGCTGGTGATCTCAGGACTTTTTCCTGTATCAATACCAATCACGGCAGCAGCATGGTACCTGTGGGAAGTGAAGAAACAACGGGCTGGAGTATTGTGGGATGTCCCTTCACCCCCACCCGTGGGAAAGGCTGAACTGGAAGATGGAGCCTATAGAATCAAGCAA"}
-  {"genbank_accession":"X05378","genbank_accession_rev":"X05378.1","database":"GenBank","strain":"","region":"","location":"","collected":"","submitted":"1987-11-29T00:00:00Z","updated":"2016-07-26T00:00:00Z","length":"360","host":"","isolation_source":"","bioproject_accession":"","biosample_accession":"","sra_accession":"","title":"Dengue virus type 2 genomic RNA for NS5 protein N-term","authors":"Biedrzycka,A., Cauchi,M.R., Bartholomeusz,A., Gorman,J.J., Wright,P.J.","submitting_organization":"","publications":"2952760","sequence":"GATCCAATACCCTATGATCCAAAGTTTGAAAAGCAGTTGGGACAAGTAATGCTCCTAGTCCTCTGCGGGACTCAAGTGTTGATGATGAGGACTACATGGGCTCTGTGTGAGGCTTTAACCTTAGCGACCGGGCCTATCTCCACATTGTGGGAAGGAAATCCAGGGAGGTTTTGGAACACTACCATTGCAGTGTCAATGGCTAACATTTTTAGAGGGAGTTACTTGGCCGGAGCTGGACTTCTCTTTTCCATCATGAAGAACACAACCAACACGAGAAGGGGAACTGGCAACATAGGAGAGACGCTTGGAGAGAAATGGAAAAGCCGATTGAACGCATTGGGGAAAAGTGAATTCCAGATC"}
diff --git a/vendored/tests/fetch-from-ncbi-virus/invalid-taxon-id.t b/vendored/tests/fetch-from-ncbi-virus/invalid-taxon-id.t
deleted file mode 100644
index 7a0d5223..00000000
--- a/vendored/tests/fetch-from-ncbi-virus/invalid-taxon-id.t
+++ /dev/null
@@ -1,4 +0,0 @@
-Fetch from an invalid Taxon ID without any additional options.
-This should not error nor return any output.
-
-  $ $TESTDIR/../../fetch-from-ncbi-virus INVALID_TAXID nextstrain/ingest
diff --git a/vendored/tests/transform-strain-names/transform-strain-names.t b/vendored/tests/transform-strain-names/transform-strain-names.t
new file mode 100644
index 00000000..1c05df7b
--- /dev/null
+++ b/vendored/tests/transform-strain-names/transform-strain-names.t
@@ -0,0 +1,17 @@
+Look for strain name in "strain" or a list of backup fields.
+
+If strain entry exists, do not do anything.
+
+  $ echo '{"strain": "i/am/a/strain", "strain_s": "other"}' \
+  >   | $TESTDIR/../../transform-strain-names \
+  >       --strain-regex '^.+$' \
+  >       --backup-fields strain_s accession
+  {"strain":"i/am/a/strain","strain_s":"other"}
+
+If strain entry does not exists, search the backup fields
+
+  $ echo '{"strain_s": "other"}' \
+  >   | $TESTDIR/../../transform-strain-names \
+  >       --strain-regex '^.+$' \
+  >       --backup-fields accession strain_s 
+  {"strain_s":"other","strain":"other"}
\ No newline at end of file
diff --git a/vendored/transform-strain-names b/vendored/transform-strain-names
new file mode 100755
index 00000000..d86c0e40
--- /dev/null
+++ b/vendored/transform-strain-names
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3
+"""
+Verifies strain name pattern in the 'strain' field of the NDJSON record from
+stdin. Adds a 'strain' field to the record if it does not already exist.
+
+Outputs the modified records to stdout.
+"""
+import argparse
+import json
+import re
+from sys import stderr, stdin, stdout
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("--strain-regex", default="^.+$",
+        help="Regex pattern for strain names. " +
+             "Strain names that do not match the pattern will be dropped.")
+    parser.add_argument("--backup-fields", nargs="*",
+        help="List of backup fields to use as strain name if the value in 'strain' " +
+             "does not match the strain regex pattern. " +
+             "If multiple fields are provided, will use the first field that has a non-empty string.")
+
+    args = parser.parse_args()
+
+    strain_name_pattern = re.compile(args.strain_regex)
+
+    for index, record in enumerate(stdin):
+        record = json.loads(record)
+
+        # Verify strain name matches the strain regex pattern
+        if strain_name_pattern.match(record.get('strain', '')) is None:
+            # Default to empty string if not matching pattern
+            record['strain'] = ''
+            # Use non-empty value of backup fields if provided
+            if args.backup_fields:
+                for field in args.backup_fields:
+                    if record.get(field):
+                        record['strain'] = str(record[field])
+                        break
+
+        if record['strain'] == '':
+            print(f"WARNING: Record number {index} has an empty string as the strain name.", file=stderr)
+
+
+        json.dump(record, stdout, allow_nan=False, indent=None, separators=',:')
+        print()