Skip to content

Commit

Permalink
[feat] Match fauna strain names
Browse files Browse the repository at this point in the history
  • Loading branch information
j23414 committed Jul 3, 2023
1 parent 8b28622 commit 125ba00
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 4 deletions.
8 changes: 4 additions & 4 deletions ingest/bin/post_process_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,19 @@ def _set_strain_name(record, fixes):
strain_name = record["strain"]

if strain_name in fixes:
strain_name = fixes[strain_name]
return(fixes[strain_name])

strain_name = strain_name.replace('Zika_virus', '').replace('Zikavirus', '').replace('Zika virus', '').replace('Zika', '').replace('ZIKV', '')
strain_name = strain_name.replace('Human', '').replace('human', '').replace('H.sapiens_wt', '').replace('H.sapiens_tc', '').replace('Hsapiens_tc', '').replace('H.sapiens-tc', '').replace('Homo_sapiens', '').replace('Homo sapiens', '').replace('Hsapiens', '').replace('H.sapiens', '')
strain_name = strain_name.replace('Human', '').replace('human', '').replace('H.sapiens_wt', '').replace('H.sapiens-wt', '').replace('H.sapiens_tc', '').replace('Hsapiens_tc', '').replace('H.sapiens-tc', '').replace('Homo_sapiens', '').replace('Homo sapiens', '').replace('Hsapiens', '').replace('H.sapiens', '')
strain_name = strain_name.replace('/Hu/', '')
strain_name = strain_name.replace('_Asian', '').replace('_Asia', '').replace('_asian', '').replace('_asia', '')
strain_name = strain_name.replace('_URI', '').replace('_SER', '').replace('_PLA', '').replace('_MOS', '').replace('_SAL', '')
strain_name = strain_name.replace('_URI', '').replace('-URI', '').replace('_SER', '').replace('-SER', '').replace('_PLA', '').replace('-PLA', '').replace('_MOS', '').replace('_SAL', '')
strain_name = strain_name.replace('Aaegypti_wt', 'Aedes_aegypti').replace('Aedessp', 'Aedes_sp')
strain_name = strain_name.replace(' ', '').replace('\'', '').replace('(', '').replace(')', '').replace('//', '/').replace('__', '_').replace('.', '').replace(',', '')
strain_name = re.sub('^[\/\_\-]', '', strain_name)

if strain_name in fixes:
strain_name = fixes[strain_name]
return(fixes[strain_name])

try:
strain_name = 'V' + str(int(strain_name))
Expand Down
29 changes: 29 additions & 0 deletions ingest/source-data/annotations.tsv
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
KY014295 strain USA/2016/FL010
MT377503 strain V151144
MF988734 strain SG_EHI_/33164Y17
KU853013 strain Dominican_Republic/2016/PD2
KY785443 strain USA/2016/FL028
KX906952 strain 2016_HND_19563
KY120348 strain MEX_CIENI551
KX856011 strain Aedes_sp/MEX_I_44/2016
KY785421 strain USA/2016/FL019
KU527068 strain Natal_RGN
MF438286 strain Cuba_2017
KF993678 strain THA/PLCal_ZV/2013
KY631494 strain ENCB165P4
KY785440 strain USA/2016/FL035
KY785451 strain Martinique/2016/FL001
MF664436 strain Dominican_Republic/2016/ZB
KY648934 strain Aedes_aegypti/MEX/MEX_I_44/2016
KX879603 strain EC/Esmeraldas/062/2016
OL414716 strain Faranah/18
MN185326 strain French_Guiana_Aedes_aegypti_T1010
MN185328 strain French_Guiana_Aedes_aegypti_T1141
KX827268 strain USA/UT_1/2016
KU853012 strain Dominican_Republic/2016/PD1
MK028857 strain Puerto_Rico/2015/PRVABC59
KY785457 strain USA/2016/FL029
MH513600 strain BR/Sinop/H366_2P/2015
KY927808 strain ZZ_1
KX087102 strain COL/FLR/2015
KX879604 strain EC/Esmeraldas/089/2016
AF380138 country Democratic Republic of the Congo
AY741551 country Sierra Leone
DQ011153 country USA
Expand Down

0 comments on commit 125ba00

Please sign in to comment.