nextstrain · joverlee521 · Dec 12, 2022 · Dec 10, 2022 · Dec 10, 2022 · Dec 10, 2022
diff --git a/CHANGES.md b/CHANGES.md
@@ -7,10 +7,12 @@
 * docs: Update the API documentation to reflect the latest state of things in the codebase. [#1087][] (@victorlin)
 * Fix support for Biopython version 1.80 which deprecated `Bio.Seq.Seq.ungap()`. [#1102][] (@victorlin)
 * export v2: Fixed a bug where colorings for zero values via `--colors` would not get applied to the exported Auspice JSON. [#1100][] (@joverlee521)
+* curate: Fixed a bug where metadata TSVs failed to parse if data within a column included comma separated values [#1110][] (@joverlee521)
 
 [#1087]: https://github.com/nextstrain/augur/pull/1087
 [#1100]: https://github.com/nextstrain/augur/pull/1100
 [#1102]: https://github.com/nextstrain/augur/pull/1102
+[#1110]: https://github.com/nextstrain/augur/pull/1110
 
 ## 18.2.0 (15 November 2022)
 

diff --git a/augur/io/metadata.py b/augur/io/metadata.py
@@ -138,7 +138,7 @@ def read_table_to_dict(table, duplicate_reporting=DataErrorMethod.ERROR_FIRST, i
     duplicate_ids = set()
     with open_file(table) as handle:
         # Get sample to determine delimiter
-        table_sample = handle.read(1024)
+        table_sample = handle.readline()
 
         if handle.seekable():
             handle.seek(0)

diff --git a/tests/functional/curate/cram/metadata-input.t b/tests/functional/curate/cram/metadata-input.t
@@ -9,28 +9,28 @@ Running the `passthru` subcommand since it does not do any data transformations.
 Create metadata TSV file for testing.
 
   $ cat >$TMP/metadata.tsv <<~~
-  > strain	country	date
-  > sequence_A	USA	2020-10-01
-  > sequence_B	USA	2020-10-02
-  > sequence_C	USA	2020-10-03
+  > strain	country	date	authors
+  > sequence_A	USA	2020-10-01	A,B,C,D,E,F,G,H,I,J,K
+  > sequence_B	USA	2020-10-02	A,B,C,D,E,F,G,H,I,J,K
+  > sequence_C	USA	2020-10-03	A,B,C,D,E,F,G,H,I,J,K
   > ~~
 
 Test TSV metadata input
 
   $ ${AUGUR} curate passthru \
   > --metadata $TMP/metadata.tsv
-  {"strain": "sequence_A", "country": "USA", "date": "2020-10-01"}
-  {"strain": "sequence_B", "country": "USA", "date": "2020-10-02"}
-  {"strain": "sequence_C", "country": "USA", "date": "2020-10-03"}
+  {"strain": "sequence_A", "country": "USA", "date": "2020-10-01", "authors": "A,B,C,D,E,F,G,H,I,J,K"}
+  {"strain": "sequence_B", "country": "USA", "date": "2020-10-02", "authors": "A,B,C,D,E,F,G,H,I,J,K"}
+  {"strain": "sequence_C", "country": "USA", "date": "2020-10-03", "authors": "A,B,C,D,E,F,G,H,I,J,K"}
 
 Test TSV metadata input from stdin
 
   $ cat $TMP/metadata.tsv \
   >   | ${AUGUR} curate normalize-strings \
   >     --metadata -
-  {"strain": "sequence_A", "country": "USA", "date": "2020-10-01"}
-  {"strain": "sequence_B", "country": "USA", "date": "2020-10-02"}
-  {"strain": "sequence_C", "country": "USA", "date": "2020-10-03"}
+  {"strain": "sequence_A", "country": "USA", "date": "2020-10-01", "authors": "A,B,C,D,E,F,G,H,I,J,K"}
+  {"strain": "sequence_B", "country": "USA", "date": "2020-10-02", "authors": "A,B,C,D,E,F,G,H,I,J,K"}
+  {"strain": "sequence_C", "country": "USA", "date": "2020-10-03", "authors": "A,B,C,D,E,F,G,H,I,J,K"}
 
 Create metadata CSV file for testing.