From c0b9e509c544c8c87763c5beeb3039f48d82975b Mon Sep 17 00:00:00 2001 From: Jennifer Chang Date: Thu, 2 May 2024 15:59:33 -0700 Subject: [PATCH] ingest: fix csvtk quotes We can now use `csvtk fix-quotes` and `csvtk del-quotes` to work around quoting issues (e.g. internal quotes in the submitter.affiliation). Copied commit from Zika repo: * https://github.com/nextstrain/zika/pull/58 --- ingest/rules/fetch_from_ncbi.smk | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ingest/rules/fetch_from_ncbi.smk b/ingest/rules/fetch_from_ncbi.smk index 1c49b8c..2ee6714 100644 --- a/ingest/rules/fetch_from_ncbi.smk +++ b/ingest/rules/fetch_from_ncbi.smk @@ -96,9 +96,11 @@ rule format_ncbi_dataset_report: --package {input.dataset_package} \ --fields {params.ncbi_datasets_fields:q} \ --elide-header \ + | csvtk fix-quotes -Ht \ | csvtk add-header -t -l -n {params.ncbi_datasets_fields:q} \ | csvtk rename -t -f accession -n accession_version \ - | csvtk -tl mutate -f accession_version -n accession -p "^(.+?)\." \ + | csvtk -t mutate -f accession_version -n accession -p "^(.+?)\." \ + | csvtk del-quotes -t \ | tsv-select -H -f accession --rest last \ > {output.ncbi_dataset_tsv} """