-
Notifications
You must be signed in to change notification settings - Fork 162
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Reinstate drag&drop CSV with additional traits
Reinstate important but somewhat undocumented functionality allowing one to drag & drop a CSV defining additional / private traits for strains and having these appear as additional color-bys. Provided an example CSV file for the "minimal" dataset.
- Loading branch information
1 parent
5aa0f79
commit a8b0a71
Showing
8 changed files
with
176 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
strain,Random Colours | ||
NODE_0000031,x | ||
NODE_0000011,x | ||
Thailand/1610acTw,x | ||
NODE_0000012,x | ||
SG_018,x | ||
NODE_0000013,x | ||
SG_056,y | ||
SG_027,y | ||
SG_074,y | ||
NODE_0000009,y | ||
NODE_0000010,y | ||
ZKC2/2016,y | ||
NODE_0000022,y | ||
DOM/2016/BB_0433,y | ||
NODE_0000023,x | ||
DOM/2016/BB_0183,x | ||
DOM/2016/MA_WGS16_011,y | ||
NODE_0000025,z | ||
NODE_0000026,z | ||
USA/2016/FLUR022,y | ||
Aedes_aegypti/USA/2016/FL05,z | ||
NODE_0000027,z | ||
DOM/2016/BB_0059,z | ||
NODE_0000028,z | ||
USA/2016/FL022,z | ||
USA/2016/FLWB042,z | ||
Brazil/2016/ZBRC16,y | ||
NODE_0000018,y | ||
V8375,y | ||
NODE_0000032,x | ||
HND/2016/HU_ME59,x | ||
Nica1_16,x | ||
NODE_0000004,z | ||
Brazil/2015/ZBRC301,z | ||
NODE_0000003,z | ||
Brazil/2015/ZBRC303,w | ||
BRA/2016/FC_6706,w | ||
Colombia/2016/ZC204Se,w |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,82 +1,134 @@ | ||
import Papa from "papaparse"; | ||
import { errorNotification, successNotification, warningNotification } from "./notifications"; | ||
import { ADD_COLOR_BYS } from "./types"; | ||
import { turnAttrsIntoHeaderArray } from "../components/download/helperFunctions"; | ||
|
||
const csvCompleteCallback = (dispatch, getState, results, file) => { | ||
const { tree } = getState(); | ||
const strainKey = results.meta.fields[0]; | ||
const ignoreTheseFields = turnAttrsIntoHeaderArray(tree.attrs); /* these are in the downloaded strain metadata CSV */ | ||
const newColorBys = results.meta.fields.slice(1).filter((x) => ignoreTheseFields.indexOf(x) === -1); | ||
const excludedColorBys = results.meta.fields.slice(1).filter((x) => ignoreTheseFields.indexOf(x) !== -1); | ||
const csvTaxa = results.data.map((o) => o[strainKey]); | ||
const treeTaxa = tree.nodes.filter((n) => !n.hasChildren).map((n) => n.strain); | ||
const taxaMatchingTree = csvTaxa.filter((x) => treeTaxa.indexOf(x) !== -1); | ||
const csvTaxaToIgnore = csvTaxa.filter((x) => taxaMatchingTree.indexOf(x) === -1); | ||
if (csvTaxaToIgnore.length) { | ||
console.warn("Ignoring these taxa from the CSV as they don't appear in the tree:", csvTaxaToIgnore); | ||
} | ||
/* data structure: obj with keys of strain names and values an array in correspondence with newColorBys */ | ||
const data = {}; | ||
for (const o of results.data.filter((r) => taxaMatchingTree.indexOf(r[strainKey]) !== -1)) { | ||
data[o[strainKey]] = newColorBys.map((x) => o[x].length ? o[x] : undefined); | ||
/** | ||
* A promise-ified version of Papa.parse() | ||
* A note on encoding here: It will be common that people drop CSVs from microsoft excel | ||
* in here annd, you guessed it, this causes all sorts of problems. | ||
* https://github.com/mholt/PapaParse/issues/169 suggests adding encoding: "ISO-8859-1" | ||
* to the config, which may work | ||
* @param {*} csvFile a DataTransfer object | ||
*/ | ||
const parseCsv = (csvFile) => new Promise((resolve, reject) => { | ||
Papa.parse(csvFile, { | ||
header: true, | ||
complete: (results) => { | ||
resolve(results); | ||
}, | ||
error: (error) => { | ||
reject(error); | ||
}, | ||
encoding: "UTF-8", | ||
comments: "#", | ||
delimiter: ",", | ||
skipEmptyLines: true, | ||
dynamicTyping: false | ||
}); | ||
}); | ||
|
||
|
||
const handleDroppedCSV = async (dispatch, getState, file) => { | ||
|
||
let csvData, errors, csvMeta | ||
try { | ||
({data: csvData, errors, meta: csvMeta} = await parseCsv(file)) | ||
if (errors.length) { | ||
console.error("Encountered the following errors during CSV parsing:", errors); | ||
} | ||
} catch (err) { | ||
return dispatch(errorNotification({ | ||
message: `Parsing of ${file.name} failed`, | ||
details: err.message | ||
})); | ||
} | ||
/* edge cases where the CSV has no "real" info */ | ||
if (taxaMatchingTree.length === 0 || newColorBys.length === 0) { | ||
dispatch(errorNotification({ | ||
message: file.name + " had no (relevent) information", | ||
details: newColorBys.length === 0 ? "No columns to add as traits" : "No taxa which match those in the tree" | ||
|
||
const strainKey = csvMeta.fields[0]; | ||
const {controls, tree} = getState(); | ||
const newColorByNames = []; | ||
const colorBysIgnored = []; | ||
csvMeta.fields.slice(1).forEach((colorBy) => { | ||
controls.coloringsPresentOnTree.has(colorBy) ? colorBysIgnored.push(colorBy) : newColorByNames.push(colorBy); | ||
}); | ||
const strainsToProcess = new Set(); | ||
const dataToProcess = {}; | ||
const taxaInCsvButNotInTree = []; | ||
const allStrainNames = new Set(tree.nodes.map((n) => n.strain)); // can be internal nodes | ||
csvData.forEach((d) => { | ||
const strain = d[strainKey]; | ||
if (allStrainNames.has(strain)) { | ||
strainsToProcess.add(strain); | ||
dataToProcess[strain] = {}; | ||
newColorByNames.forEach((colorBy) => { | ||
if (d[colorBy]) { | ||
dataToProcess[strain][colorBy] = {value: d[colorBy]}; | ||
} | ||
}); | ||
} else { | ||
taxaInCsvButNotInTree.push(strain); | ||
} | ||
}); | ||
|
||
/* CHECK FOR ERRORS */ | ||
if (strainsToProcess.size === 0 || newColorByNames.length === 0) { | ||
return dispatch(errorNotification({ | ||
message: `${file.name} had no (relevent) information`, | ||
details: newColorByNames.length === 0 ? "No columns to add as colorings" : "No taxa which match those in the tree" | ||
})); | ||
return; | ||
} | ||
dispatch({type: ADD_COLOR_BYS, newColorBys, data, taxa: taxaMatchingTree}); | ||
dispatch(successNotification({ | ||
message: "Adding metadata from " + file.name, | ||
details: newColorBys.length + " fields for " + taxaMatchingTree.length + " / " + treeTaxa.length + " taxa" | ||
})); | ||
if (excludedColorBys.length) { | ||
|
||
/* DISPATCH APPROPRIATE WARNINGS */ | ||
if (taxaInCsvButNotInTree.length) { | ||
const n = taxaInCsvButNotInTree.length | ||
dispatch(warningNotification({ | ||
message: "Excluded " + excludedColorBys.length + " fields as they already exist", | ||
details: excludedColorBys.join(", ") | ||
message: `Ignoring ${n} taxa which ${n > 1 ? "don't" : "doesn't"} appear in the tree!`, | ||
details: taxaInCsvButNotInTree.join(", ") | ||
})); | ||
console.warn("Ignoring these taxa from the CSV as they don't appear in the tree:", taxaInCsvButNotInTree); | ||
} | ||
if (csvTaxaToIgnore.length) { | ||
if (colorBysIgnored.length) { | ||
dispatch(warningNotification({ | ||
message: "Excluded " + csvTaxaToIgnore.length + " taxa from the CSV as they aren't in the tree", | ||
details: csvTaxaToIgnore.join(", ") | ||
message: `Ignoring ${colorBysIgnored.length} CSV fields as they are already set as colorings`, | ||
details: colorBysIgnored.join(", ") | ||
})); | ||
} | ||
}; | ||
|
||
const csvError = (dispatch, error, file) => { | ||
dispatch(errorNotification({message: "Error parsing " + file.name, details: error})); | ||
}; | ||
/* DISPATCH NEW COLORINGS & SUCCESS NOTIFICATION */ | ||
const newColorings = {} | ||
newColorByNames.forEach((title) => { | ||
// TODO -- let the CSV define the type | ||
newColorings[title] = {title, type: "categorical"}; | ||
}); | ||
dispatch({type: ADD_COLOR_BYS, newColorings, strains: strainsToProcess, traits: dataToProcess}); | ||
dispatch(successNotification({ | ||
message: "Adding metadata from " + file.name, | ||
details: `${newColorByNames.length} new field${newColorByNames.length > 1 ? "s" : ""} for ${strainsToProcess.size} node${strainsToProcess.size > 1 ? "s" : ""}` | ||
})); | ||
} | ||
|
||
/* a note on encoding here. It will be common that people drop CSVs from microsoft excel | ||
in here annd, you guessed it, this causes all sorts of problems. | ||
https://github.com/mholt/PapaParse/issues/169 suggests adding encoding: "ISO-8859-1" to the Papa config - which seems to work | ||
*/ | ||
/** | ||
* A thunk to handle dropped files and take the appropriate action. | ||
* @param {*} files DataTransfer object's FileList | ||
*/ | ||
const handleFilesDropped = (files) => (dispatch, getState) => { | ||
|
||
const filesDropped = (files) => { | ||
return (dispatch, getState) => { | ||
for (const file of files) { | ||
if (file.type !== "text/csv") { | ||
dispatch(warningNotification({message: "Non-CSV File dropped", details: file.type + file.name})); | ||
} else { | ||
// http://papaparse.com/docs#config | ||
Papa.parse(file, { | ||
header: true, | ||
complete: csvCompleteCallback.bind(this, dispatch, getState), | ||
error: csvError.bind(this, dispatch), | ||
encoding: "UTF-8", | ||
comments: "#", | ||
delimiter: ",", | ||
skipEmptyLines: true, | ||
dynamicTyping: false | ||
}); | ||
} | ||
} | ||
}; | ||
if (files.length !== 1) { | ||
return dispatch(warningNotification({ | ||
message: "More than one file dropped", | ||
details: "Currently we only allow a single CSV to be used" | ||
})); | ||
} | ||
|
||
const file = files[0]; | ||
if (file.type === "text/csv") { | ||
handleDroppedCSV(dispatch, getState, file); | ||
} else { | ||
return dispatch(warningNotification({ | ||
message: "Non-CSV File dropped", | ||
details: `Currently only CSV files are allowed, not ${file.type}` | ||
})); | ||
} | ||
}; | ||
|
||
export default filesDropped; | ||
export default handleFilesDropped; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters