From 629ac310d7f95ffaded6c19e7a2fe3bd2f3e04b8 Mon Sep 17 00:00:00 2001 From: Sander van den Hoek Date: Thu, 13 Feb 2020 10:02:40 +0100 Subject: [PATCH 1/6] Removed data files from repo (is available through Zenodo). --- ...2020-ncbi-gene-ids-failed-HGNC-symbols.txt | 14 - cgd/CGD_4feb2020-ncbi-gene-ids-minimized.tsv | 3987 ----------------- 2 files changed, 4001 deletions(-) delete mode 100755 cgd/CGD_4feb2020-ncbi-gene-ids-failed-HGNC-symbols.txt delete mode 100644 cgd/CGD_4feb2020-ncbi-gene-ids-minimized.tsv diff --git a/cgd/CGD_4feb2020-ncbi-gene-ids-failed-HGNC-symbols.txt b/cgd/CGD_4feb2020-ncbi-gene-ids-failed-HGNC-symbols.txt deleted file mode 100755 index b8dbbf8..0000000 --- a/cgd/CGD_4feb2020-ncbi-gene-ids-failed-HGNC-symbols.txt +++ /dev/null @@ -1,14 +0,0 @@ -No replacement NCBI gene id could be found for AGMX2 (line 114), skipping line. -No replacement NCBI gene id could be found for AMCN (line 181), skipping line. -No replacement NCBI gene id could be found for CLAM (line 693), skipping line. -No replacement NCBI gene id could be found for CYTB (line 924), skipping line. -No replacement NCBI gene id could be found for DHS6S1 (line 974), skipping line. -No replacement NCBI gene id could be found for FCMTE2 (line 1262), skipping line. -No replacement NCBI gene id could be found for FSHMD1A (line 1343), skipping line. -No replacement NCBI gene id could be found for TRNH (line 3718), skipping line. -No replacement NCBI gene id could be found for TRNI (line 3719), skipping line. -No replacement NCBI gene id could be found for TRNK (line 3720), skipping line. -No replacement NCBI gene id could be found for TRNP (line 3721), skipping line. -No replacement NCBI gene id could be found for TRNQ (line 3722), skipping line. -No replacement NCBI gene id could be found for TRNS2 (line 3723), skipping line. -No replacement NCBI gene id could be found for TRNT (line 3724), skipping line. \ No newline at end of file diff --git a/cgd/CGD_4feb2020-ncbi-gene-ids-minimized.tsv b/cgd/CGD_4feb2020-ncbi-gene-ids-minimized.tsv deleted file mode 100644 index a723109..0000000 --- a/cgd/CGD_4feb2020-ncbi-gene-ids-minimized.tsv +++ /dev/null @@ -1,3987 +0,0 @@ -Gene -2 -144568 -53947 -8086 -79719 -16 -57505 -10157 -18 -19 -26154 -21 -24 -23461 -5243 -8647 -5244 -10058 -22 -85320 -1244 -368 -6833 -10060 -215 -5825 -5826 -9429 -64240 -64241 -26090 -51099 -25 -28 -27034 -28976 -34 -35 -36 -37 -176 -38 -65057 -1636 -55331 -43 -2532 -50 -51 -8309 -93650 -54 -197322 -2182 -58 -59 -60 -70 -71 -72 -51412 -87 -88 -89 -81 -90 -93 -94 -95 -100 -51816 -102 -6868 -53616 -8754 -81794 -11093 -170691 -170692 -9509 -9508 -9719 -54507 -103 -113179 -107 -109 -111 -112 -120 -30817 -9289 -10149 -57211 -84059 -132 -23394 -54936 -151 -152 -153 -154 -158 -165 -2334 -27125 -10939 -174 -175 -123624 -60509 -55750 -178 -10555 -8540 -375790 -183 -23287 -185 -186 -189 -64902 -191 -27245 -54806 -196 -57379 -9131 -9255 -7965 -9049 -23746 -326 -203 -204 -122481 -10142 -1646 -6718 -207 -208 -10000 -210 -212 -213 -5832 -220 -224 -8659 -7915 -4329 -501 -226 -229 -56052 -440138 -79087 -79868 -199857 -85365 -10195 -29929 -79053 -79796 -238 -91801 -7840 -242 -240 -59344 -57538 -249 -57679 -259173 -8092 -257 -60529 -23600 -258 -265 -139285 -268 -269 -9949 -81693 -270 -271 -272 -275 -401138 -64682 -283 -27329 -286 -287 -56172 -23141 -29123 -22852 -203286 -54443 -55129 -63982 -203859 -196527 -3730 -84168 -118429 -311 -162 -1174 -8905 -130340 -1173 -1175 -8546 -8120 -8943 -10717 -23431 -9179 -11154 -9907 -324 -10297 -147495 -335 -336 -116519 -338 -344 -345 -348 -351 -26060 -353 -54840 -358 -359 -360 -367 -372 -375 -10564 -383 -57514 -396 -9138 -9639 -23370 -9181 -9459 -23229 -8289 -57492 -196528 -200894 -23568 -403 -84100 -23204 -84071 -55130 -79798 -80210 -9915 -10095 -407 -410 -411 -415 -420 -64801 -170302 -427 -136371 -51008 -429 -55870 -434 -435 -440 -443 -444 -259266 -445 -171023 -55252 -80816 -84896 -55210 -85300 -22926 -471 -51062 -25923 -472 -1822 -220202 -286410 -23400 -476 -477 -478 -487 -488 -492 -27032 -498 -514 -84833 -537 -10159 -23545 -50617 -523 -525 -526 -529 -538 -540 -51761 -5205 -91647 -545 -546 -6310 -25814 -6311 -4287 -6314 -724066 -6315 -549 -6795 -26053 -10677 -551 -554 -8312 -8313 -567 -8706 -148789 -126792 -26229 -145173 -2583 -2683 -11285 -11041 -27077 -80776 -570 -60468 -9531 -8815 -8314 -580 -92482 -582 -79738 -166379 -583 -585 -129880 -55212 -27241 -4059 -10134 -590 -593 -594 -10295 -8915 -53335 -64919 -53630 -54880 -63035 -613 -617 -627 -55814 -146227 -7439 -631 -8419 -633 -727857 -79365 -80114 -23299 -274 -640 -641 -29760 -388552 -26258 -644 -649 -9210 -650 -652 -168667 -657 -658 -659 -9790 -646 -54796 -388962 -669 -54928 -2186 -673 -221927 -672 -675 -676 -2972 -83990 -7862 -254065 -26580 -682 -7809 -686 -695 -701 -11149 -79703 -57102 -113246 -91574 -84529 -83636 -712 -713 -708 -714 -114902 -715 -716 -717 -26005 -718 -720 -721 -727 -729 -730 -731 -732 -157657 -735 -203228 -771 -760 -762 -763 -767 -51475 -57010 -773 -774 -775 -776 -777 -778 -8913 -8912 -779 -9254 -93589 -783 -785 -10369 -790 -801 -805 -125972 -815 -816 -818 -23261 -124583 -823 -825 -726 -84433 -79092 -64170 -146206 -79587 -8573 -843 -23581 -841 -844 -845 -846 -831 -847 -117144 -857 -859 -284119 -867 -2694 -875 -84733 -54862 -57545 -147372 -388389 -93233 -84317 -339230 -115948 -51244 -28952 -79140 -339829 -55036 -57003 -152137 -85478 -124093 -83987 -55704 -440193 -83605 -8838 -894 -8812 -10309 -92002 -22948 -977 -8763 -930 -50489 -919 -939 -23607 -51293 -948 -915 -916 -917 -920 -958 -959 -960 -4179 -1604 -966 -970 -973 -974 -975 -925 -10225 -146059 -8556 -998 -8318 -990 -79577 -83879 -999 -1009 -1013 -64072 -1001 -92211 -8558 -8621 -1019 -1020 -55755 -1021 -1024 -6792 -1027 -1028 -1029 -50937 -1041 -81620 -388551 -1050 -1053 -1056 -63036 -1062 -1063 -55835 -80152 -153241 -9662 -22995 -22897 -84984 -11190 -80184 -95681 -55165 -9702 -80254 -84131 -51134 -375298 -10715 -204219 -10087 -1066 -1071 -56683 -755 -80217 -55779 -220136 -149465 -255101 -79846 -118491 -629 -55997 -1675 -3075 -3078 -10878 -10877 -81494 -3426 -1073 -5199 -1080 -283489 -1103 -400916 -51142 -1105 -1106 -1107 -1108 -55636 -57680 -11200 -1118 -1120 -1121 -5119 -25978 -128866 -1123 -11261 -91851 -1131 -1134 -1135 -1136 -1137 -1139 -1140 -1141 -1144 -1145 -1146 -50515 -113189 -9469 -4166 -64377 -22856 -1147 -10519 -10518 -137392 -23152 -63924 -4261 -22858 -493856 -11113 -10370 -25792 -150468 -23529 -1180 -1181 -1183 -1184 -1186 -1187 -1188 -9076 -9071 -23562 -10686 -149461 -64581 -1193 -53405 -79827 -1201 -1203 -54982 -2055 -10978 -81570 -8192 -10845 -7401 -1213 -7555 -1259 -1261 -1258 -54714 -22866 -54805 -26504 -23019 -4848 -4849 -10695 -1272 -6900 -8506 -26047 -28958 -493753 -388753 -65260 -84334 -80347 -1690 -9382 -22796 -25839 -10466 -57511 -91949 -84342 -1300 -1301 -1302 -1303 -1305 -1308 -80781 -1277 -1278 -84570 -85301 -1280 -1281 -1282 -1284 -1285 -1286 -1287 -1288 -1289 -1290 -1291 -1292 -1293 -1294 -1296 -1297 -1298 -1299 -10584 -78989 -79709 -8292 -1311 -1312 -1314 -9276 -27235 -51117 -51004 -10229 -56997 -79934 -57017 -10699 -11151 -1352 -84987 -1355 -116228 -84701 -1337 -1340 -1349 -1351 -1356 -57094 -27151 -65250 -10815 -1369 -1371 -1373 -1374 -126129 -1376 -1378 -1380 -8738 -1384 -23418 -286204 -51185 -90993 -1387 -78987 -9419 -9244 -729920 -10491 -1406 -1407 -1409 -1410 -1411 -1412 -1413 -1414 -1415 -1417 -1419 -1420 -1421 -1427 -1428 -1436 -1438 -1439 -1441 -1453 -1457 -1460 -79848 -8048 -1471 -1474 -1475 -1476 -1487 -80169 -10664 -9150 -1491 -115908 -1493 -1495 -1496 -29119 -1499 -1500 -1497 -1503 -5476 -1508 -1075 -1509 -8722 -1513 -348180 -8029 -8452 -8450 -9820 -1523 -23316 -10283 -55280 -7852 -63932 -1528 -1727 -1535 -1536 -1537 -54205 -26999 -1540 -1583 -1584 -1585 -1586 -1588 -1544 -1545 -1589 -1591 -340665 -1593 -1594 -1548 -1555 -1557 -1558 -1559 -1565 -120227 -113612 -1576 -1577 -8529 -126410 -285440 -9420 -728294 -1600 -51339 -1605 -1615 -55157 -1621 -1629 -80067 -50717 -1630 -51473 -8642 -64421 -1634 -28960 -1639 -1641 -51181 -1643 -1644 -80821 -23259 -1650 -4921 -65992 -1663 -1654 -51428 -23586 -83479 -1656 -10522 -1666 -8560 -23258 -9681 -1674 -8694 -9993 -8526 -1716 -1718 -1717 -79947 -1719 -50846 -1723 -1725 -55526 -8449 -22907 -57647 -9785 -56616 -1729 -1730 -23405 -57609 -129563 -1736 -1737 -1738 -1741 -28514 -10683 -54567 -1743 -1747 -1748 -1749 -1756 -29958 -1758 -1760 -23312 -1763 -123872 -55172 -161582 -54919 -139212 -25981 -8701 -8632 -1767 -1770 -27019 -64446 -51726 -374407 -3300 -10049 -56521 -23317 -131118 -134218 -5611 -80331 -9829 -83544 -10126 -1776 -1759 -10059 -1785 -23268 -1786 -1788 -1789 -1794 -1795 -57572 -85440 -81704 -285489 -22845 -29980 -1798 -5977 -1801 -8813 -8818 -54344 -1804 -283417 -1806 -1807 -128338 -92749 -1813 -1824 -1825 -29940 -1828 -1829 -147409 -1832 -1834 -667 -25778 -1837 -84062 -50506 -405753 -1848 -100288687 -1855 -1857 -54808 -1778 -79659 -51626 -1859 -9149 -8291 -199221 -124454 -253738 -10682 -1889 -9427 -1892 -1893 -1896 -10913 -128178 -80153 -1906 -1908 -1909 -1910 -8726 -1917 -51603 -1938 -2202 -30008 -114327 -79631 -1947 -9343 -1950 -1956 -54583 -1959 -1962 -79813 -9451 -440275 -1967 -8892 -8891 -8890 -8893 -1968 -8665 -9775 -1977 -1981 -60528 -1991 -63916 -84173 -2006 -64834 -6785 -60481 -8518 -55250 -26610 -23065 -2010 -10436 -2009 -2013 -2018 -10117 -2022 -2027 -5167 -953 -285203 -2033 -2034 -2035 -2036 -2038 -4072 -57724 -1969 -2048 -2050 -2052 -7957 -2056 -2057 -2058 -2059 -64787 -8288 -26284 -2065 -2066 -2067 -2068 -2071 -2072 -2073 -2074 -375748 -1161 -2077 -57222 -10613 -11160 -114625 -55780 -90952 -157570 -83715 -2099 -2100 -54845 -2103 -2108 -2109 -2110 -23474 -2120 -2121 -132884 -23233 -23404 -51010 -11340 -23086 -2131 -2132 -2137 -2138 -2070 -346007 -2146 -2159 -2160 -2161 -2162 -2165 -2147 -2153 -2155 -2157 -2158 -79152 -729041 -8772 -2184 -63901 -374393 -84668 -84140 -54757 -56975 -286077 -22909 -2175 -2187 -2176 -2177 -2178 -2188 -2189 -55215 -55120 -57697 -84188 -10667 -10056 -355 -356 -22868 -2196 -79633 -2192 -10516 -2200 -2201 -2203 -26224 -26235 -80204 -79791 -81545 -25793 -2214 -8547 -197258 -2222 -2224 -112812 -2232 -2235 -55612 -83706 -389549 -2243 -2244 -2245 -121512 -2255 -2257 -2259 -8823 -8822 -26281 -8074 -2248 -2250 -2253 -2254 -2260 -2263 -2261 -2266 -2271 -2273 -9158 -9896 -344018 -128486 -60681 -55033 -79147 -2218 -80308 -201163 -2312 -388698 -2313 -2316 -2317 -2318 -23767 -2324 -28982 -55640 -56776 -2328 -2332 -2335 -2348 -2296 -2303 -2304 -2301 -2294 -2290 -8928 -2299 -2302 -668 -8456 -27086 -93986 -50943 -55572 -80144 -158326 -341640 -55691 -90167 -9758 -23732 -25794 -2488 -2492 -401024 -10841 -2495 -2512 -79068 -24140 -2517 -2521 -2523 -2524 -2525 -2528 -2530 -80199 -2395 -486 -2533 -79443 -2535 -8322 -8323 -2538 -92579 -2539 -2548 -2549 -9568 -2554 -2555 -2558 -2560 -2561 -2562 -2566 -2571 -51083 -2581 -2582 -2584 -2588 -79695 -2591 -2592 -2593 -8139 -23193 -2617 -2619 -2622 -2623 -2624 -2625 -2626 -140628 -2627 -57798 -57459 -2628 -2629 -57704 -2632 -2639 -2643 -2645 -2729 -9247 -2651 -2653 -54332 -54834 -2657 -2658 -9573 -8200 -392255 -2661 -2664 -2668 -50628 -2670 -2671 -2672 -8328 -85476 -84340 -2673 -2677 -2678 -2688 -2690 -2692 -2693 -26058 -9837 -126326 -2697 -2700 -2702 -2703 -2705 -2706 -2707 -127534 -10804 -57165 -2710 -2717 -2720 -2731 -342035 -2733 -2735 -2736 -2737 -84662 -169792 -11146 -2741 -2743 -51218 -2744 -2746 -2752 -132158 -2760 -51053 -29926 -29925 -2767 -2773 -2774 -2775 -2778 -2779 -2780 -2782 -2784 -59345 -10681 -10020 -27232 -8443 -79158 -84572 -2796 -2798 -2799 -92344 -9570 -2806 -2811 -2812 -51206 -2815 -8733 -2719 -2239 -10082 -2819 -23171 -10243 -2821 -338328 -10457 -83550 -4935 -440435 -8111 -54112 -114928 -29899 -84706 -2879 -10750 -80000 -26585 -64388 -79977 -57822 -9380 -2892 -2893 -2895 -2898 -2900 -2902 -2903 -2904 -2906 -23426 -6011 -2911 -2916 -2896 -389207 -643226 -145258 -1687 -2934 -2936 -2937 -2954 -2961 -404672 -54676 -84705 -2978 -2979 -2982 -2984 -3000 -60558 -2990 -2992 -2993 -2994 -2995 -2997 -2998 -64412 -3008 -283120 -9563 -23498 -3026 -57531 -3033 -3030 -3032 -3034 -57817 -3035 -23438 -84868 -10456 -3039 -3040 -3043 -3047 -3048 -3052 -3054 -348980 -10021 -3060 -9759 -10013 -55869 -3067 -57520 -3070 -220296 -341208 -8925 -8924 -84667 -8820 -3073 -3074 -3077 -164045 -3081 -3082 -138050 -26275 -51501 -3094 -3097 -148738 -3098 -3105 -3106 -3141 -3145 -83872 -3149 -3155 -3156 -3158 -3162 -3166 -6927 -6928 -3172 -3176 -3178 -3181 -9987 -3188 -3190 -3192 -112817 -9455 -3198 -3207 -3209 -3199 -3211 -3229 -3236 -3239 -3240 -3208 -3242 -3248 -3251 -3257 -84343 -89781 -11234 -79803 -60495 -55806 -3265 -3273 -9394 -90161 -3290 -3291 -3028 -3293 -3295 -3284 -80270 -3299 -3305 -3313 -3315 -8988 -26353 -3329 -3339 -3350 -3356 -5654 -27429 -3064 -10075 -3373 -54768 -219844 -10525 -3376 -55699 -200205 -3386 -29851 -3418 -3420 -3423 -3425 -51124 -64135 -387733 -3455 -3459 -3460 -282617 -55764 -9742 -26160 -11020 -112752 -51098 -55081 -80173 -57560 -28981 -8100 -3476 -3479 -3480 -3481 -3483 -3490 -3507 -3508 -3514 -3543 -3547 -3321 -3549 -3551 -8517 -10320 -3586 -3587 -3588 -3590 -3593 -3594 -112744 -23765 -84818 -54756 -10068 -11141 -3557 -59067 -50615 -3559 -3560 -3561 -133396 -26525 -3572 -3574 -3575 -286676 -3612 -3614 -3615 -3617 -50939 -64423 -56623 -51763 -3636 -3630 -3640 -3643 -26173 -27152 -27130 -253430 -9657 -23288 -9922 -23096 -51135 -3658 -359948 -64207 -3661 -3662 -3664 -3665 -3394 -10379 -8471 -81689 -122961 -23479 -9636 -83737 -3674 -3675 -3655 -3679 -8516 -3689 -3690 -3691 -3694 -3702 -9445 -3704 -3708 -3709 -3712 -389434 -182 -84522 -3717 -3718 -83700 -57158 -57338 -3728 -23189 -25959 -284058 -3735 -7994 -23522 -10300 -390594 -3736 -3737 -3739 -3741 -3746 -3748 -3752 -3753 -9992 -10008 -3756 -3757 -3758 -3766 -3767 -3769 -100134444 -3759 -3760 -3762 -3763 -338567 -3777 -50801 -51305 -3778 -3782 -3783 -3784 -10984 -3785 -3786 -9132 -56479 -57582 -343450 -169522 -79734 -154881 -126695 -23028 -10765 -8242 -7403 -23135 -2531 -3792 -11081 -154288 -3795 -23247 -9786 -9851 -285600 -57670 -57498 -3832 -9928 -547 -23095 -10749 -55605 -3835 -3796 -24137 -3798 -3800 -374654 -26128 -84623 -3814 -84634 -3815 -4254 -55857 -9365 -64837 -10661 -8462 -200942 -317719 -80311 -54800 -26249 -131377 -10324 -55975 -9622 -3818 -100144748 -4297 -9757 -58508 -8085 -55904 -51111 -3827 -57082 -11133 -3845 -83999 -889 -3848 -3858 -3859 -3860 -3861 -3868 -3872 -3849 -147183 -3850 -3851 -3852 -3853 -3854 -286887 -112802 -121391 -9119 -3887 -3889 -3891 -3892 -3857 -339855 -8942 -3897 -79944 -8270 -284217 -3908 -3909 -3910 -3912 -3913 -3914 -3918 -10319 -3920 -28956 -9215 -51574 -51520 -23395 -81887 -27040 -3930 -167691 -3931 -3932 -3938 -11155 -3939 -3945 -197257 -3949 -26119 -7044 -221496 -23592 -3952 -3953 -3955 -9211 -163175 -3972 -3973 -222662 -8022 -89884 -11019 -3977 -3981 -3982 -51474 -55679 -84894 -55180 -3988 -3990 -3991 -200879 -149998 -643418 -51601 -387787 -9516 -3998 -81562 -64327 -55788 -64788 -4000 -4001 -84823 -56203 -4010 -80856 -9361 -4014 -4015 -125336 -4018 -10161 -23175 -9663 -4023 -9227 -987 -9860 -345193 -83938 -4035 -29967 -4036 -4038 -4041 -4040 -4043 -10128 -115399 -23639 -56262 -120892 -90678 -220074 -4047 -4053 -4054 -8425 -4056 -57128 -90624 -1130 -4069 -54585 -8216 -4081 -10586 -23499 -10459 -4094 -389692 -9935 -4099 -10916 -54551 -9863 -84061 -4117 -10892 -10046 -11253 -4125 -4126 -4128 -55262 -5604 -5605 -4214 -9020 -51776 -9064 -6885 -5602 -9479 -23162 -7867 -23005 -10982 -4137 -10299 -4140 -4141 -92935 -153562 -5648 -10747 -22983 -84930 -4143 -4148 -9782 -4149 -55777 -4153 -79143 -8720 -51360 -4157 -4158 -4159 -4160 -56922 -64087 -84693 -90411 -345643 -4171 -8888 -4173 -4174 -4175 -84515 -254394 -57192 -79648 -4191 -4193 -2122 -4204 -51102 -9968 -23389 -9440 -9439 -81857 -4208 -4210 -84466 -1954 -150365 -254528 -4212 -4221 -4222 -10461 -23184 -145873 -4233 -124512 -29081 -8076 -56947 -9927 -83552 -84879 -256471 -4247 -92667 -4256 -57534 -125988 -10367 -4281 -11043 -4284 -4285 -406932 -407975 -406960 -406987 -407053 -4286 -8195 -7681 -54903 -23209 -4292 -27030 -79083 -23417 -166785 -326625 -25974 -27249 -4311 -4322 -4323 -4327 -4313 -9313 -118856 -4318 -4594 -4330 -3110 -55034 -4337 -4338 -4340 -7841 -22880 -51660 -9526 -8777 -4351 -80739 -4352 -136647 -4353 -4358 -4359 -10205 -3140 -56246 -112609 -22808 -4361 -29960 -11222 -65080 -63931 -51021 -51116 -56945 -65993 -51081 -931 -4436 -4437 -4439 -2956 -10943 -6307 -4478 -4481 -253827 -2660 -55154 -4487 -4488 -4508 -4509 -4512 -4513 -4514 -4535 -4536 -4537 -4538 -4539 -4540 -4541 -4549 -4550 -4511 -4556 -4558 -4567 -4574 -4507 -123263 -4522 -4524 -10588 -4534 -8898 -25821 -2475 -55149 -4548 -4552 -4547 -4582 -4593 -4595 -4597 -4598 -4604 -4607 -4613 -4615 -4617 -4618 -4629 -79784 -4620 -4621 -4624 -4625 -4626 -4627 -4632 -4633 -4634 -4635 -4638 -85366 -389827 -51168 -84700 -4640 -4643 -53904 -4644 -4645 -4646 -4647 -4649 -4653 -93649 -57462 -9499 -51778 -84665 -745 -23040 -8260 -80155 -112939 -133686 -4668 -4669 -162417 -259232 -340719 -54187 -79731 -9 -10 -339983 -55739 -128240 -51594 -23218 -4683 -9918 -23310 -54892 -23397 -653361 -4688 -4689 -23385 -54820 -4693 -10397 -3340 -4694 -4705 -126328 -55967 -51079 -4695 -4700 -4704 -51103 -91942 -25915 -29078 -79133 -137682 -54539 -4709 -4714 -4715 -4719 -4720 -4722 -4724 -4726 -374291 -4728 -4723 -4729 -4703 -25977 -5818 -81607 -23327 -4744 -4747 -4750 -4751 -284086 -91754 -4758 -4760 -4761 -50674 -340533 -91624 -4763 -4771 -23114 -4780 -4774 -4781 -4784 -4790 -4791 -4792 -27247 -4803 -55768 -79840 -378884 -374354 -55651 -4810 -51199 -123606 -348938 -25836 -7080 -1482 -137814 -579 -84504 -54413 -57502 -58484 -22861 -91662 -114548 -199713 -51314 -23530 -135935 -64127 -4838 -9241 -8996 -4841 -55505 -10528 -4851 -4853 -100996717 -4854 -4863 -4864 -29881 -10577 -4867 -27031 -261734 -4868 -7827 -4878 -4882 -10641 -8131 -190 -9971 -10002 -7025 -7026 -2908 -4306 -2516 -4893 -8204 -4901 -9378 -64324 -50814 -286053 -56160 -26012 -22978 -51251 -4907 -4909 -4913 -9423 -84628 -4914 -4915 -80224 -55270 -57122 -55746 -9631 -23279 -23165 -8021 -79023 -23636 -79902 -4927 -116150 -101060691 -55998 -64359 -60506 -4942 -23363 -4948 -100506658 -4952 -152816 -8481 -8473 -4976 -80207 -4983 -26873 -5956 -2652 -611 -10133 -84876 -4998 -5000 -23594 -9885 -55644 -9180 -28962 -5009 -146183 -9381 -340990 -283310 -51633 -90268 -5015 -58495 -5019 -55074 -22953 -64805 -64175 -55214 -8974 -5034 -54681 -8106 -55690 -23241 -51702 -353238 -5048 -5053 -5058 -5063 -79728 -23022 -51025 -80025 -24145 -9060 -11315 -5073 -25973 -197135 -5075 -5076 -5077 -5078 -5079 -5080 -5081 -7849 -5083 -55193 -5087 -5091 -388939 -5092 -5095 -5096 -51294 -65217 -57526 -7703 -5105 -5111 -5116 -5122 -255738 -5130 -11235 -10846 -50940 -5137 -5139 -5144 -5145 -5158 -5146 -5147 -5148 -5149 -8622 -5155 -5156 -5159 -5160 -5162 -8050 -5165 -54704 -23590 -57107 -3651 -8566 -5173 -79955 -5184 -8864 -8863 -105371045 -100131801 -5189 -5192 -8799 -5193 -5194 -5195 -9409 -5824 -5828 -55670 -8504 -5830 -5190 -5191 -5213 -5216 -5224 -80055 -27315 -93210 -5230 -5236 -5238 -221692 -1911 -5251 -51317 -84295 -23133 -26227 -55023 -5255 -5256 -5257 -5261 -401 -8929 -5264 -85007 -5297 -10464 -9780 -63895 -5277 -9488 -5279 -54872 -5283 -9487 -93183 -23556 -84720 -51227 -9091 -51604 -128869 -55650 -284098 -84992 -5286 -5290 -5293 -5295 -5296 -23533 -200576 -65018 -23396 -83394 -5307 -5308 -5309 -494513 -5310 -168507 -5311 -5314 -5313 -5317 -5318 -5321 -5322 -8398 -7941 -9373 -5325 -5328 -23236 -5332 -5333 -51196 -5336 -89869 -5337 -23646 -5339 -64857 -57449 -9842 -5340 -5346 -10733 -5350 -5351 -5352 -8985 -5354 -11212 -5358 -83483 -83449 -5373 -5375 -5376 -23203 -9512 -5395 -10654 -11284 -5406 -4860 -285848 -57104 -10908 -50640 -55163 -87178 -25886 -282809 -79983 -23509 -56983 -23126 -5422 -5424 -5426 -5428 -11232 -5429 -25885 -9533 -51082 -11128 -55703 -5443 -55624 -84892 -84197 -51371 -10585 -29954 -5444 -10940 -5447 -64840 -25913 -5449 -5455 -5456 -5459 -11281 -27068 -5468 -79717 -5479 -23262 -8493 -152926 -5498 -5500 -84919 -5506 -5515 -5518 -5521 -55012 -5528 -5530 -5538 -10084 -768206 -59335 -63976 -11107 -93166 -56978 -5052 -9581 -5551 -10216 -144165 -166336 -201973 -5566 -5568 -51422 -53632 -5573 -5580 -5582 -5589 -5587 -5591 -5592 -5071 -8575 -5618 -54496 -5621 -5624 -5625 -60675 -128674 -8842 -5626 -5627 -8858 -9129 -26121 -9128 -24148 -10594 -5961 -5631 -112476 -5396 -5644 -8492 -646960 -58497 -57716 -5660 -29968 -5663 -5664 -55851 -5684 -5692 -5696 -5698 -29893 -5718 -5723 -9051 -5727 -8643 -139411 -9791 -5728 -256297 -5741 -5745 -5744 -5781 -5784 -5788 -5792 -5800 -374462 -51651 -5805 -22827 -9698 -5813 -80324 -83480 -54517 -7837 -5831 -29920 -5836 -5837 -79912 -5859 -5860 -54870 -84074 -9230 -22931 -51715 -5873 -9364 -83452 -116442 -22930 -25782 -7879 -285282 -5879 -5880 -5881 -5885 -10111 -5888 -5889 -5892 -5894 -5896 -5897 -10743 -5903 -9693 -5913 -5915 -5917 -57038 -5921 -10125 -10235 -30062 -84839 -5925 -9821 -5932 -10616 -8241 -10137 -282996 -55131 -27316 -5949 -5950 -3516 -55213 -343035 -51109 -145226 -5959 -5962 -9401 -65055 -51308 -92840 -5970 -5971 -5649 -84957 -5972 -85021 -473 -5978 -5979 -54463 -5981 -91869 -55159 -5993 -222546 -8625 -5994 -5995 -8787 -388531 -6005 -79651 -6006 -6010 -387 -23221 -399 -22999 -54453 -60561 -8737 -54101 -9750 -134701 -6016 -6017 -51132 -55005 -6023 -246243 -10535 -79621 -84153 -6041 -8635 -7737 -54941 -11342 -84282 -11236 -165918 -81790 -57674 -54476 -54894 -55599 -100151683 -6092 -64221 -54538 -79641 -6094 -4919 -4920 -6095 -6096 -6097 -6101 -94137 -6102 -6100 -6121 -6103 -57096 -23322 -22934 -6134 -6135 -6137 -6138 -6141 -6144 -6154 -6155 -11224 -6165 -6125 -6204 -6210 -6218 -6223 -6228 -6229 -6231 -6232 -6234 -6235 -6197 -6201 -3921 -22800 -50484 -6247 -89765 -83861 -345895 -221421 -284654 -340419 -343637 -89970 -51319 -51750 -6253 -84816 -25914 -9711 -861 -860 -9853 -122042 -10329 -6261 -6262 -9294 -26278 -6295 -6299 -6297 -57167 -401474 -54809 -219285 -25939 -51128 -6301 -54938 -9733 -23328 -163786 -23314 -51119 -6305 -81846 -6309 -49855 -950 -91179 -6336 -11280 -6323 -6324 -6326 -6327 -6328 -55800 -6329 -6330 -6331 -6334 -6335 -6337 -6338 -6340 -6341 -9997 -6342 -57410 -10806 -6389 -644096 -54949 -6390 -6391 -6392 -121214 -10484 -10483 -9871 -22872 -29927 -11231 -79048 -8991 -57190 -10371 -9723 -64218 -8482 -51091 -124404 -10801 -84947 -5265 -866 -6906 -5269 -8710 -5271 -462 -3053 -5054 -5176 -5345 -710 -871 -5274 -6418 -26040 -29072 -55209 -23064 -10262 -6424 -729238 -6439 -6440 -119559 -6442 -6443 -6444 -8910 -6445 -166929 -8879 -6448 -4068 -6452 -30011 -285590 -79628 -22941 -85358 -6469 -8036 -6473 -23729 -57477 -6476 -10280 -150094 -23387 -64374 -25942 -23094 -6495 -6496 -147912 -4990 -6497 -6499 -6555 -84068 -4891 -6557 -6559 -57468 -9990 -64849 -284111 -6563 -6566 -387700 -6567 -26503 -246213 -63910 -6571 -6572 -10560 -80704 -6505 -6506 -6507 -6509 -6575 -116085 -6584 -9187 -123041 -283652 -6576 -8402 -8604 -10165 -10166 -60386 -788 -79751 -29957 -115286 -5250 -81034 -54977 -291 -284439 -91137 -10861 -1836 -1811 -5172 -375611 -116369 -10999 -9154 -55315 -6513 -81031 -6514 -56606 -55532 -7780 -10463 -9197 -6569 -10568 -142680 -10559 -7355 -23443 -55343 -23169 -153201 -2542 -146167 -91252 -23516 -55630 -283375 -64116 -6519 -30061 -254428 -80736 -50651 -51151 -113235 -6521 -83959 -8671 -55065 -79581 -113278 -6523 -6524 -6528 -60482 -388662 -340024 -6530 -54716 -6531 -9152 -6535 -6536 -57709 -9056 -11136 -6548 -6550 -9368 -10479 -84679 -285195 -10599 -28234 -6578 -342618 -114798 -84189 -57152 -84464 -4088 -4089 -4091 -4093 -6595 -6597 -56916 -50485 -6598 -6601 -6603 -6605 -8243 -9126 -23347 -56006 -388588 -6606 -6607 -64093 -64094 -6609 -55627 -23676 -6611 -6591 -6616 -9342 -6622 -9627 -6620 -79753 -727676 -23020 -6628 -6635 -6640 -29887 -57231 -55084 -6647 -402381 -6651 -6654 -6655 -50964 -6663 -6664 -64321 -54345 -6657 -6658 -6659 -6660 -6662 -3431 -121340 -6678 -23111 -6683 -83893 -166378 -55812 -23384 -79925 -10290 -80208 -51324 -6687 -6690 -6691 -11005 -10653 -124976 -6697 -161742 -10253 -81848 -6708 -6709 -6710 -6712 -57731 -10558 -9517 -84654 -8878 -6714 -10847 -6716 -79644 -6731 -27286 -6736 -6748 -6755 -6768 -6487 -8869 -246329 -10274 -10735 -10734 -10617 -6770 -56910 -6772 -6773 -6774 -6777 -55240 -6491 -6786 -340061 -6794 -6789 -79991 -64220 -92335 -161497 -412 -3703 -201595 -10273 -8676 -8675 -112755 -6812 -6813 -8803 -8802 -51684 -79783 -6820 -285362 -7341 -140732 -6821 -6834 -374969 -93426 -50511 -6853 -23345 -23224 -163183 -8831 -8867 -6855 -6857 -255928 -127833 -23334 -23118 -6866 -51204 -6870 -4070 -6872 -6884 -6873 -6875 -6878 -6888 -128989 -6890 -6891 -6892 -23435 -6897 -80222 -50833 -5726 -6898 -6901 -128637 -55773 -57465 -9882 -51256 -54885 -6904 -6905 -93627 -29110 -6907 -79718 -90665 -6908 -10716 -6899 -6913 -9096 -9095 -6909 -57057 -50945 -6926 -9496 -6910 -6911 -6915 -6916 -6862 -8557 -6938 -6942 -6929 -6925 -7062 -10312 -6948 -6949 -255758 -79600 -79867 -26123 -6997 -6999 -55775 -51567 -23424 -122402 -7003 -9895 -9524 -253017 -7007 -7010 -9894 -10178 -55714 -26011 -55603 -7012 -7015 -56159 -56155 -56154 -7018 -7019 -7020 -7021 -10342 -7036 -7037 -7038 -23483 -7040 -7042 -7043 -7045 -7046 -7048 -7050 -7051 -7053 -9333 -343641 -7054 -55145 -7056 -57187 -79228 -7066 -7067 -7068 -55901 -7072 -148022 -92609 -1678 -51300 -7078 -26277 -9414 -7084 -7086 -79816 -11011 -7092 -7098 -117531 -11322 -147138 -54499 -54664 -84314 -84233 -55863 -55654 -124842 -51524 -55858 -147007 -51259 -79583 -65062 -339453 -54916 -55151 -79188 -9725 -91147 -54968 -9772 -26022 -259236 -55217 -7112 -5651 -64699 -164656 -160418 -51075 -3371 -7128 -8795 -8792 -4982 -23495 -115650 -7132 -7293 -8600 -23043 -7134 -7136 -7137 -51086 -7138 -7139 -7140 -23534 -27327 -7148 -114034 -4796 -7156 -10210 -1861 -26092 -163590 -7157 -112858 -8626 -219931 -7167 -27010 -7168 -7169 -7170 -7172 -7173 -1200 -51002 -286262 -7177 -28755 -7187 -26146 -10758 -10293 -22906 -10131 -60684 -51112 -6399 -51693 -122553 -83696 -10345 -11181 -54209 -11277 -7201 -23321 -22954 -55521 -4591 -54765 -131405 -7204 -11078 -9321 -9320 -9319 -9325 -54802 -55621 -93587 -54931 -57570 -55687 -51095 -8989 -7222 -7225 -4308 -54795 -140803 -7227 -162514 -59341 -8295 -7248 -7249 -116461 -80746 -79042 -283989 -10102 -80705 -7252 -7253 -10194 -23554 -7102 -54084 -7259 -90121 -146057 -54902 -79809 -83538 -83894 -9652 -57217 -123016 -80185 -23093 -7273 -7274 -7276 -7275 -7846 -113457 -7277 -51807 -203068 -81027 -7280 -347733 -10381 -10382 -10383 -84617 -347688 -7283 -10844 -27229 -85378 -7284 -7287 -7991 -7291 -117581 -56652 -25828 -10907 -10587 -7297 -1890 -7299 -7305 -7306 -7317 -79876 -51271 -7319 -29089 -7337 -89910 -29914 -29978 -197131 -7343 -7345 -51506 -51569 -55325 -7360 -54658 -7369 -7372 -9094 -201294 -146862 -285175 -81622 -7374 -51733 -65109 -7380 -84300 -790955 -7381 -7385 -27089 -131669 -7389 -7390 -79650 -10083 -124590 -7399 -11274 -389856 -85015 -8239 -8287 -84916 -57654 -55697 -6843 -81839 -57216 -9217 -7407 -57176 -11023 -1462 -7414 -7415 -7421 -7424 -7428 -7431 -63894 -79001 -7436 -203547 -55823 -23230 -157680 -54832 -55187 -65082 -55737 -137492 -23339 -11311 -738 -55275 -7443 -30813 -338917 -200403 -7450 -51322 -7453 -10352 -7454 -8936 -23325 -9897 -23558 -23001 -51057 -55717 -57728 -80232 -89891 -57539 -22884 -10785 -11152 -56270 -55112 -284403 -144406 -256764 -84942 -124997 -494551 -7466 -25861 -7456 -26100 -65125 -65266 -7471 -80326 -7480 -7482 -7473 -54361 -7474 -7476 -55135 -7486 -7490 -51741 -7498 -7499 -331 -7503 -7504 -7507 -7508 -7512 -63929 -9213 -7515 -7516 -7518 -64131 -64132 -10413 -8565 -51067 -55689 -10730 -7532 -7534 -7528 -55249 -7535 -27107 -7704 -10472 -26137 -9841 -100128927 -79882 -55906 -55596 -158866 -51114 -6935 -9839 -85446 -346171 -23414 -23503 -118813 -7545 -7546 -7547 -23619 -57178 -10269 -51364 -10771 -84225 -7700 -7701 -7707 -63925 -84905 -79797 -7592 -23090 -58499 -84627 -147657 -130557 -147929 -9640 -84146 -641339 -57592 -7552 -79755 -347344 -9326 -22917 -7783 -7784 -57688 From b62da8d92a52429e7d23d25797d86bd611a458cc Mon Sep 17 00:00:00 2001 From: Sander van den Hoek Date: Thu, 13 Feb 2020 10:08:10 +0100 Subject: [PATCH 2/6] Removed GeneNetwork (GADO) benchmark scripts as this was replaced by the GADO stand-alone commandline tool. --- .../GeneNetworkBenchmarkFileGenerator.py | 55 -------- benchmarking/GeneNetworkBenchmarkRunner.py | 128 ------------------ 2 files changed, 183 deletions(-) delete mode 100644 benchmarking/GeneNetworkBenchmarkFileGenerator.py delete mode 100644 benchmarking/GeneNetworkBenchmarkRunner.py diff --git a/benchmarking/GeneNetworkBenchmarkFileGenerator.py b/benchmarking/GeneNetworkBenchmarkFileGenerator.py deleted file mode 100644 index ad6ea04..0000000 --- a/benchmarking/GeneNetworkBenchmarkFileGenerator.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/user/bin/env python3 -""" -Name: - GeneNetworkBenchmarkFileGenerator.py -Example: - GeneNetworkBenchmarkFileGenerator.py input/ output.tsv - -Description: - Processes the output from AmelieBenchmarkRunner.py for usage in R plots. -""" - -from BenchmarkGenerics import fileMergerParser -from BenchmarkGenerics import mergeFiles - - -def main(): - # Runs application processes. - args = fileMergerParser() - mergeFiles(processGeneNetworkFile, args.inDir, args.out) - - -def processGeneNetworkFile(fileWriter, filePath): - """ - Processes a single output file from the gene network benchmark. - :param fileWriter: the file to write the output to - :param filePath: the path to the file to be processed - :return: - """ - # Toggle for first line after the hashtag lines (lines should not be skipped anymore starting from the second - # non-hashtag line). - header = True - - # Stores the genes. - genes = [] - - # Writes the genes in order to file separated by a comma (with a newline at the end). - for line in open(filePath): - # Skips lines starting with a hashtag. - if line.startswith("#"): - continue - - # First line after hashtag lines is the header (describing columns). - if header: - header = False - continue - - # Collects genes. - genes.append(line.split("\t")[0]) - - # Writes genes to file. - fileWriter.write(",".join(genes)) - - -if __name__ == '__main__': - main() diff --git a/benchmarking/GeneNetworkBenchmarkRunner.py b/benchmarking/GeneNetworkBenchmarkRunner.py deleted file mode 100644 index 4f670d8..0000000 --- a/benchmarking/GeneNetworkBenchmarkRunner.py +++ /dev/null @@ -1,128 +0,0 @@ -#!/user/bin/env python3 -""" -Name: - GeneNetworkBenchmarkRunner.py -Example: - GeneNetworkBenchmarkRunner.py hp.obo benchmark_data.tsv output/ - -Description: - Retrieves data from the API https://www.genenetwork.nl/api/v1 and writes the genes with weightedZScore to files - (1 file per LOVD from the benchmark file). -""" - -from os.path import isfile -from os.path import isdir -from argparse import ArgumentParser -from time import time -from requests import post -from requests.exceptions import ConnectionError -from requests.exceptions import ReadTimeout -from requests.packages.urllib3 import disable_warnings -from requests.packages.urllib3.exceptions import InsecureRequestWarning -from requests import HTTPError -from BenchmarkGenerics import readPhenotypes -from BenchmarkGenerics import retrieveLovdPhenotypes -from BenchmarkGenerics import convertPhenotypeNamesToIds -from BenchmarkGenerics import waitTillElapsed - - -def main(): - # Disables InsecureRequestWarning. See also: https://urllib3.readthedocs.org/en/latest/security.html - disable_warnings(InsecureRequestWarning) - - # Runs application processes. - args = parseCommandLine() - phenotypeIdsByName = readPhenotypes(args.hpo) - lovdPhenotypes = retrieveLovdPhenotypes(args.tsv) - lovdPhenotypes = convertPhenotypeNamesToIds(lovdPhenotypes, phenotypeIdsByName) - retrieveGeneNetworkResults(lovdPhenotypes, args.out) - - -def parseCommandLine(): - """ - Processes the command line arguments. - :return: args - """ - - # Defines command line. - parser = ArgumentParser() - parser.add_argument("hpo", help="he HPO .obo file containing phenotype id's/names") - parser.add_argument("tsv", help="the benchmarking .tsv file where the first column is the sample ID and the 5th column 1 or more phenotypes (separated by a ';')") - parser.add_argument("out", help="the file to write output to") - - # Processes command line. - args = parser.parse_args() - - # Validates command line. - if not args.hpo.endswith(".obo"): - parser.error('"' + args.hpo.split('/')[-1] + '" is not an .obo file') - if not isfile(args.hpo): - parser.error('"' + args.hpo.split('/')[-1] + '" is not an existing file') - - if not args.tsv.endswith(".tsv"): - parser.error('"' + args.tsv.split('/')[-1] + '" is not a .tsv file') - if not isfile(args.tsv): - parser.error('"' + args.tsv.split('/')[-1] + '" is not an existing file') - - if not isdir(args.out): - parser.error('"' + args.out + '" is not a valid directory') - - return args - - -def retrieveGeneNetworkResults(lovdPhenotypes, outDir): - """ - Retrieves the results from gene network and writes these to files on a per-LOVD basis. If the output dir already - contains a file with the LOVD name (.tsv), that LOVD is skipped (allowing of continuing the benchmark later on - if stopped). - :param lovdPhenotypes: benchmark data with as key the LOVD and as value a list of HPO IDs - :param outDir: the directory to write the output files to (and used to check whether a benchmark for that LOVD was - already done) - :return: - """ - # Stores initial time as negative time() so that sleep is not triggered the first time. - requestTime = -time() - - # Goes through all LOVDs. - for lovd in lovdPhenotypes.keys(): - # Defines output file for this LOVD. - outFile = outDir + "/" + lovd + ".tsv" - - # Checks if output folder already contains a file for this LOVD, and if so, skips this LOVD. - if isfile(outFile): - print("# skipping: " + lovd) - continue - - print("# processing: " + lovd) - - # File to write output to. - fileWriter = open(outFile, 'w') - - # The request uri used to retrieve the results. - uri = "https://www.genenetwork.nl/api/v1/tabdelim" - data = {"what":"diagnosis", "terms":",".join(lovdPhenotypes.get(lovd))} - - # Waits till elapsed time exceeds 1 second. - waitTillElapsed(2, time() - requestTime) - - # Tries to make a request to the REST API with the JSON String. - # If an HTTPError is triggered, this is printed and then no further benchmarking data will be uploaded. - try: - response = post(uri, data, verify=False, timeout=(6, 12)) - response.raise_for_status() - except (ConnectionError, HTTPError, ReadTimeout) as e: - exit(e) - - # Stores the current time for managing time between requests. - requestTime = time() - - # Writes output to file. - fileWriter.write(response.text) - - # Closes file. - fileWriter.flush() - fileWriter.close() - - -if __name__ == '__main__': - main() From 55565c6f928346b23b70b036c98e4ac0fd9d1b42 Mon Sep 17 00:00:00 2001 From: Sander van den Hoek Date: Thu, 13 Feb 2020 10:11:51 +0100 Subject: [PATCH 3/6] Moved info in GADO readme file into main README. --- README.md | 12 ++---------- benchmarking/GADOBenchmarkReadme | 1 - 2 files changed, 2 insertions(+), 11 deletions(-) delete mode 100644 benchmarking/GADOBenchmarkReadme diff --git a/README.md b/README.md index 1dc488f..db0b3ca 100644 --- a/README.md +++ b/README.md @@ -86,17 +86,9 @@ There are several files used among these scripts. These include: python3 AmelieBenchmarkFileGenerator.py amelie_output/ amelie_results.tsv ``` -#### Gene Network +#### GADO -1. Run benchmark: - ``` - python3 GeneNetworkBenchmarkRunner.py hp.obo benchmark_data.tsv genenetwork_output/ - ``` - -2. Process benchmark output: - ``` - python3 GeneNetworkBenchmarkFileGenerator.py genenetwork_output/ genenetwork_results.tsv - ``` +We used the stand-alone commandline version GADO (v 1.0.1), available at: https://github.com/molgenis/systemsgenetics/wiki/GADO-Command-line. We accepted all automatically suggested alternative HPO terms in cases that the supplied HPO term could not be used. We have used the prediction matrix `hpo_predictions_sigOnly_spiked_01_02_2018`. #### Phenomizer diff --git a/benchmarking/GADOBenchmarkReadme b/benchmarking/GADOBenchmarkReadme deleted file mode 100644 index 0c2a3a9..0000000 --- a/benchmarking/GADOBenchmarkReadme +++ /dev/null @@ -1 +0,0 @@ -We used the stand-alone commandline version GADO (v 1.0.1), available at: https://github.com/molgenis/systemsgenetics/wiki/GADO-Command-line. We accepted all automatically suggested alternative HPO terms in cases that the supplied HPO term could not be used. We have used this prediction matrix hpo_predictions_sigOnly_spiked_01_02_2018. From ed3c22ce9674b2a500ad94bccd883e51a82b6f0b Mon Sep 17 00:00:00 2001 From: Sander van den Hoek Date: Thu, 13 Feb 2020 10:50:58 +0100 Subject: [PATCH 4/6] Removed obsolete bash script. --- README.md | 3 --- benchmarking/VibeSimpleOutputFilesMerger.sh | 22 --------------------- 2 files changed, 25 deletions(-) delete mode 100644 benchmarking/VibeSimpleOutputFilesMerger.sh diff --git a/README.md b/README.md index db0b3ca..870b372 100644 --- a/README.md +++ b/README.md @@ -56,9 +56,6 @@ such as for vibe where there is a `ParallelBashScriptsGenerator` instead. So ple information. * __Important:__ As each VIBE instance needs a separate database, please refer to the information in the script itself for how to prepare for the benchmarking correctly. -* __`VibeSimpleOutputFilesMerger.sh`__ - * __Info:__ Merges the output generated by the scripts which were created using - `VibeBenchmarkBashScriptsGenerator.py`. ### Data diff --git a/benchmarking/VibeSimpleOutputFilesMerger.sh b/benchmarking/VibeSimpleOutputFilesMerger.sh deleted file mode 100644 index 258bb68..0000000 --- a/benchmarking/VibeSimpleOutputFilesMerger.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -# Merges a directory containing VIBE output files (that were generated with "-l" enabled in the VIBE application). -# Usage: sh VibeSimpleOutputFilesMerger.sh ~/path/to/dir - -# Sets output file (tmp & final). -OUT_FILE_TMP="$1"/merged.tmp -OUT_FILE_FINAL="$1"/merged.tsv - -# printf: prints format of header for file -# >: writes this header to the output file -# -# grep: retrieves full line (each file contains a single line with genes belonging to the LOVD in the filename) -# --- /path/to/.tsv: -# sed: removes full path -# --- .tsv: -# sed: converts ".tsv:" to "\t" -# >>: adds the LOVD with their genes to the output file (format: "LOVD\tgenes" where the genes are separated using commas) -# -# mv: renames the output file (OUT_FILE_TMP -> OUT_FILE_FINAL so that output file would not interfere with the collection -# of results from the input tsv files) -printf 'lovd\tsuggested_genes\n' > ${OUT_FILE_TMP} && grep '' "$1"/*.tsv | sed "s#^$1/##" | sed $'s/.tsv:/\t/' >> ${OUT_FILE_TMP} && mv ${OUT_FILE_TMP} ${OUT_FILE_FINAL} From 3f01a21cdaf4fda273eae30dcd484d8afa541bfd Mon Sep 17 00:00:00 2001 From: Sander van den Hoek Date: Thu, 13 Feb 2020 11:10:49 +0100 Subject: [PATCH 5/6] Removed Phenotips benchmarking due to becoming obsolete. --- README.md | 10 +- benchmarking/PhenotipsBenchmarkRunner.py | 164 ----------------------- 2 files changed, 1 insertion(+), 173 deletions(-) delete mode 100644 benchmarking/PhenotipsBenchmarkRunner.py diff --git a/README.md b/README.md index 870b372..f544dbb 100644 --- a/README.md +++ b/README.md @@ -44,10 +44,6 @@ such as for vibe where there is a `ParallelBashScriptsGenerator` instead. So ple * __Info:__ Uses the [query_phenomizer][query_phenomizer] python tool to process all benchmark data. * __Important:__ [query_phenomizer][query_phenomizer] needs to be installed on the system. Additionally, an account is needed for running [query_phenomizer][query_phenomizer]. -* __`PhenotipsBenchmarkRunner.py`__ - * __Info:__ Uses the API of Phenotips to upload the benchmark dataset and then download the results. - * __Important:__ A phenotips instince to which can be connected is required. Please refer to the - [Phenotips download page][phenotips_download] for more information. * __`VibeBenchmarkFileGenerator.py`__ * __Info:__ Converts the output from `VibeBenchmarkParallelBashScriptsGenerator.py` for usage in `BenchmarkResultsProcessor.R`. * __`VibeBenchmarkParallelBashScriptsGenerator.py`__ @@ -108,12 +104,8 @@ We used the stand-alone commandline version GADO (v 1.0.1), available at: https: #### Phenotips -1. Install [phenotips][phenotips_download]. +**IMPORTANT**: Starting of January 2020, Phenotips does not offer a stand-alone downloadable solution anymore and requires a paid cloud subscription to be used, as can be read [here](https://phenotips.com/blog/new-year-new-website.html). Therefore, this benchmark has become obsolete. -2. Run benchmark: - ``` - python3 PhenotipsBenchmarkRunner.py http://localhost:8080/ username hp.obo benchmark_data.tsv phenotips_results.tsv - ``` #### Vibe diff --git a/benchmarking/PhenotipsBenchmarkRunner.py b/benchmarking/PhenotipsBenchmarkRunner.py deleted file mode 100644 index 6295c8d..0000000 --- a/benchmarking/PhenotipsBenchmarkRunner.py +++ /dev/null @@ -1,164 +0,0 @@ -#!/user/bin/env python3 -""" -Name: PhenotipsBenchmarkRunner.py - -Example: - PhenotipsBenchmarkRunner.py http://localhost:8080/ username hp.obo benchmark_data.tsv output.tsv - -Description: - Uploads benchmark data to Phenotips using HPO data (as an .obo file) and then downloads suggested genes. -""" - -from os.path import isfile -from argparse import ArgumentParser -from requests import post -from requests import get -from requests.exceptions import ConnectionError -from requests import HTTPError -from requests.auth import HTTPBasicAuth -from getpass import getpass -from re import match -from BenchmarkGenerics import readPhenotypes -from BenchmarkGenerics import retrieveLovdPhenotypes - - -def main(): - args = parseCommandLine() - phenotypeIdsByName = readPhenotypes(args.hpo) - lovdPhenotypes = retrieveLovdPhenotypes(args.tsv) - pwd = getpass("Phenotips password for " + args.username + ":") - uploadPhenotypes(args.url, args.username, pwd, lovdPhenotypes, phenotypeIdsByName) - downloadGenes(args.url, args.username, pwd, args.out, lovdPhenotypes.keys()) - - -def parseCommandLine(): - """ - Processes the command line arguments - :return: args - """ - - # Defines command line. - parser = ArgumentParser() - parser.add_argument("url", help="the url to the phenotips instance to upload to (") - parser.add_argument("username", help="the username for authentication to the phenotips server") - parser.add_argument("hpo", help="he HPO .obo file containing phenotype id's/names") - parser.add_argument("tsv", help="the benchmarking .tsv file where the first column is the sample ID and the 5th column 1 or more phenotypes (separated by a ';')") - parser.add_argument("out", help="the file to write output to") - - # Processes command line. - args = parser.parse_args() - - # Strips slash on the end of an url. - args.url = args.url.rstrip("/") - - # Validates command line. - if match("https?:\/\/[a-z0-9.\-]+(:[0-9]{4})?", args.url) is None: - parser.error("invalid url") - if match("^\w+$", args.username) is None: - parser.error("the username may only contain: greek characters (a-z A-Z), numbers (0-9) and underscores (_)") - if not args.hpo.endswith(".obo"): - parser.error('"' + args.hpo.split('/')[-1] + '" is not an .obo file') - if not isfile(args.hpo): - parser.error('"' + args.hpo.split('/')[-1] + '" is not an existing file') - if not args.tsv.endswith(".tsv"): - parser.error('"' + args.tsv.split('/')[-1] + '" is not a .tsv file') - if not isfile(args.tsv): - parser.error('"' + args.tsv.split('/')[-1] + '" is not an existing file') - if not args.out.endswith(".tsv"): - parser.error('"' + args.out.split('/')[-1] + '" is not a .tsv file') - if isfile(args.out): - parser.error('"' + args.out.split('/')[-1] + '" already exists') - - return args - - -def uploadPhenotypes(phenotipsUrl, username, password, lovdPhenotypes, phenotypeIdsByName): - """ - Uploads the benchmark data to phenotips. - :param phenotipsUrl: the url to upload the benchmark data to - :param username: the username for authentication - :param password: the password for authentication - :param lovdPhenotypes: benchmark data with as key the LOVD and as value a list of phenotype names - :param phenotypeIdsByName: dict with phenotype names as keys and their id as value - :return: list with all LOVDs that were uploaded - """ - - for lovd in lovdPhenotypes.keys(): - # Starts generating the JSON for the request. - requestString = '{"external_id":"' + lovd + '","features":[' - - # Goes through all phenotypes and adds these to the JSON String. - for i,phenotypeName in enumerate(lovdPhenotypes.get(lovd)): - if i > 0: - requestString += "," - requestString += '{"id":"' + phenotypeIdsByName.get(phenotypeName) + '","label":"' + phenotypeName + '","type":"phenotype","observed":"yes"}' - requestString += "]}" - - # Tries to make a request to the REST API with the JSON String. - # If an HTTPError is triggered, this is printed and then no further benchmarking data will be uploaded. - try: - response = post(phenotipsUrl + "/rest/patients", data=requestString, auth=HTTPBasicAuth(username, password)) - response.raise_for_status() - except (ConnectionError, HTTPError) as e: - exit(e) - - -def downloadGenes(phenotipsUrl, username, password, out, lovds): - """ - Downloads the suggested genes for each uploaded LOVD - :param phenotipsUrl: the url to upload the benchmark data to - :param username: the username for authentication - :param password: the password for authentication - :param out: the file to write the results to - :param lovds: list with the LOVDs for which the suggested genes should be retrieved - """ - - # File to write output to. - fileWriter = open(out, 'w') - - # Writes the header to the file. - fileWriter.write("lovd\tsuggested_genes\n") - - # Goes through all LOVDs. - for lovd in lovds: - # Writes the LOVD followed by a tab - fileWriter.write(lovd + "\t") - - # Tries to make a several request to the REST API for data retrieval. - # If an HTTPError is triggered, this is printed and then no further benchmarking data will be uploaded. - try: - # Retrieves internal ID based on external ID. - response = get(phenotipsUrl + "/rest/patients/eid/" + lovd, auth=HTTPBasicAuth(username, password)) - response.raise_for_status() - - # If external ID is only used once, directly retrieve internal ID. - try: - phenotipsId = response.json()['report_id'] - # Otherwise retrieve the internal ID from the first item. - # Any items that have the same external ID are assumed to be equal. - except KeyError: - phenotipsId = response.json()['patients'][0]['id'] - - # Retrieves the suggested genes for the LOVD. - response = get(phenotipsUrl + "/rest/patients/" + phenotipsId + "/suggested-gene-panels", auth=HTTPBasicAuth(username, password)) - response.raise_for_status() - except (ConnectionError, HTTPError) as e: - exit(e) - - # Goes through all suggested genes for a single LOVD. - for i, gene in enumerate(response.json()['genes']): - # If more than 1 gene found, adds separator to output between genes. - if i > 0: - fileWriter.write(',') - fileWriter.write((gene['gene_symbol'])) - - # After all suggested genes are processed, adds a newLine. - fileWriter.write('\n') - - # Flushes and closes writer. - fileWriter.flush() - fileWriter.close() - - -if __name__ == '__main__': - main() From a1ef6c60ac9d239a7480f5395d73e0b37503ef1c Mon Sep 17 00:00:00 2001 From: Sander van den Hoek Date: Thu, 13 Feb 2020 13:40:57 +0100 Subject: [PATCH 6/6] Updated README. --- README.md | 157 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 95 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index f544dbb..242bff3 100644 --- a/README.md +++ b/README.md @@ -1,69 +1,24 @@ # vibe-suppl -This repo contains supplemental files regarding the Java application found [here][vibe]. Note that these are in no way -needed to use the vibe tool, but were used to generate additional information (such as benchmarking). They were created -with the assumption that they are used exactly in the way they are meant to be used, so while certain checks/validations -might be present, using these scripts in the wrong way might result in weird behavior. +This repo contains supplemental files regarding the Java application found [here][vibe]. Note that these are in no way needed to use the vibe tool, but were used to generate additional information (such as benchmarking). They were created with the assumption that they are used exactly in the way they are meant to be used, so while certain checks/validations might be present, using these scripts in the wrong way might result in weird behavior. -## Benchmarking +## Paper + +Please refer to the `README.md` at https://zenodo.org/record/3662470 for the exact commits used for the benchmarking. There, all required files for [PaperPlots.R](benchmarking_results_processing/PaperPlots.R) can be found as well. -### Scripts - -There are several benchmarking scripts available with some generic code used by multiple benchmarks in a separate file. -An explanation on how to run the can be found below. In general, the `Runner` scripts runs the benchmark while the -`FileGenerator` script (if available) formats the `Runner` output to a more usable format. Some exceptions are present, -such as for vibe where there is a `ParallelBashScriptsGenerator` instead. So please refer to to -this section for more information regarding running the individual benchmarks. - -* __`AmelieApiOutputGenerator.py`__ - * __Info:__ Connects to `https://amelie.stanford.edu/api/` to retrieve the gene scores for each set of HPO terms - available in the benchmark data. As the genes of interest should be entered manually and there is a limit in the - number of entered genes, the [complete HGNC dataset][hgnc_complete] - is used and divided over multiple separate requests so that all genes get a score. As the scores are only sorted - per request, a sort on all genes is done prior to file writing. -* __`AmelieBenchmarkRunner.py`__ - * __Info:__ Converts the output from `AmelieApiOutputGenerator.py` for usage in `BenchmarkResultsProcessor.R`. -* __`BenchmarkFileHpoConverter.py`__ - * __Info:__ A script to convert a benchmark file containing HPO names in the fifth column to a benchmark file with - HPO codes in the fifth column. Should not be needed for running existing benchmarks, but is supplied as a - convenience script in case benchmarks are created that cannot use `BenchmarkGenerics.py` but do need HPO codes as - input. -* __`BenchmarkGenerics.py`__ - * __Info:__ Contains methods used in multiple scripts. - * __Important:__ This script should not be ran independently. If Python scripts are moved (for example to a server - to run the benchmarks there), be sure to include this file within the same directory. -* __`BenchmarkResultsProcessor.R`__ - * __Info:__ Creates plots from the benchmark data. -* __`GeneNetworkBenchmarkFileGenerator.py`__ - * __Info:__ Converts the output from `GeneNetworkBenchmarkRunner.py` for usage in `BenchmarkResultsProcessor.R`. -* __`GeneNetworkBenchmarkRunner.py`__ - * __Info:__ Connects to the API from `https://www.genenetwork.nl/` to retrieve the prioritized genes based on input - phenotypes. -* __`PhenomizerBenchmarkFileGenerator.py`__ - * __Info:__ Converts the output from `PhenomizerBenchmarkRunner.py` for usage in `BenchmarkResultsProcessor.R`. -* __`PhenomizerBenchmarkRunner.py`__ - * __Info:__ Uses the [query_phenomizer][query_phenomizer] python tool to process all benchmark data. - * __Important:__ [query_phenomizer][query_phenomizer] needs to be installed on the system. Additionally, an account - is needed for running [query_phenomizer][query_phenomizer]. -* __`VibeBenchmarkFileGenerator.py`__ - * __Info:__ Converts the output from `VibeBenchmarkParallelBashScriptsGenerator.py` for usage in `BenchmarkResultsProcessor.R`. -* __`VibeBenchmarkParallelBashScriptsGenerator.py`__ - * __Info:__ Generates bash files used for benchmarking (by using a limit of runs per file). Note that for each - created bash script a separate TDB is needed. Please refer to the documentation in the script itself for more - information. - * __Important:__ As each VIBE instance needs a separate database, please refer to the information in the script - itself for how to prepare for the benchmarking correctly. +## Benchmarking ### Data There are several files used among these scripts. These include: -* benchmark_data.tsv +* [benchmark_data.tsv](https://zenodo.org/record/3662470/files/benchmark_data-hgnc_symbol.tsv) * A dataset with the first column being an ID and the fourth column 1 or more phenotypes separated by a comma (the phenotype names should exist within the [Human Phenotype Ontology][hpo_obo]) . * [hp.obo][hpo_obo] - * The Human Phenotype Ontology used for combining/converting phenotype names with their HPO ID. + * The Human Phenotype Ontology used for combining/converting phenotype names with their HPO ID. Note that the `benchmark_data.tsv` was made compatible for release 2018-03-08 specifically. * [hgnc_complete_set.txt][hgnc_complete] - * The HUGO Gene Nomenclature Committee file containing information about genes (primarily used to generate a list - containing all genes). + * The HUGO Gene Nomenclature Committee file containing information about genes (primarily used to generate a list containing all genes). +* [benchmark_file_conversion_data.tsv](https://www.genenames.org/cgi-bin/download/custom?col=gd_hgnc_id&col=gd_app_sym&col=gd_prev_sym&col=md_eg_id&col=gd_pub_eg_id&status=Approved&status=Entry%20Withdrawn&hgnc_dbtag=on&order_by=gd_hgnc_id&format=text&submit=submit) + * A file generated through [genenames.org](https://www.genenames.org/) that contains HGNC gene symbols with their previous symbols and their NCBI gene IDs. ### Running the benchmarks @@ -79,9 +34,63 @@ There are several files used among these scripts. These include: python3 AmelieBenchmarkFileGenerator.py amelie_output/ amelie_results.tsv ``` +3. Convert the HGNC gene symbols to NCBI gene IDS: + + ``` + python3 BenchmarkFileGeneSymbolToIdConverter.py amelie_results.tsv benchmark_file_conversion_data.tsv 1> amelie.log 2> amelie.err + ``` + +#### Exomiser + +**IMPORTANT:** A custom `.jar` file supplied by the Exomiser team was supplied to run this benchmark without requiring a `.vcf` file. Exomiser has not yet made a public release of this yet. This custom `.jar` however is based on the exomiser-rest-prioritiser module of the Exomiser open-source code (release 12.1.0). + +##### hiPHIVE + +1. Run benchmark: + + ``` + python3 ExomiserBenchmarkRunner.py hp.obo benchmark_data.tsv hiphive hiphive_output/ + ``` + +2. Process benchmark output: + + ``` + python3 ExomiserBenchmarkFileGenerator.py hiphive_output/ hiphive_results.tsv + ``` + +3. Convert the HGNC gene symbols to NCBI gene IDS: + + ``` + python3 BenchmarkFileGeneSymbolToIdConverter.py hiphive_results.tsv benchmark_file_conversion_data.tsv 1> hiphive.log 2> hiphive.err + ``` + +##### PhenIX + +1. Run benchmark: + + ``` + python3 ExomiserBenchmarkRunner.py hp.obo benchmark_data.tsv phenix phenix_output/ + ``` + +2. Process benchmark output: + + ``` + python3 ExomiserBenchmarkFileGenerator.py phenix_output/ phenix_results.tsv + ``` + +3. Convert the HGNC gene symbols to NCBI gene IDS: + + ``` + python3 BenchmarkFileGeneSymbolToIdConverter.py phenix_results.tsv benchmark_file_conversion_data.tsv 1> phenix.log 2> phenix.err + ``` + #### GADO -We used the stand-alone commandline version GADO (v 1.0.1), available at: https://github.com/molgenis/systemsgenetics/wiki/GADO-Command-line. We accepted all automatically suggested alternative HPO terms in cases that the supplied HPO term could not be used. We have used the prediction matrix `hpo_predictions_sigOnly_spiked_01_02_2018`. +We used the stand-alone commandline version GADO (v 1.0.1), available at: https://github.com/molgenis/systemsgenetics/wiki/GADO-Command-line. We accepted all automatically suggested alternative HPO terms in cases that the supplied HPO term could not be used. We have used the prediction matrix `hpo_predictions_sigOnly_spiked_01_02_2018`. The output was also converted to NCBI gene IDs through the following: + +``` +python3 BenchmarkFileGeneSymbolToIdConverter.py gado_results.tsv benchmark_file_conversion_data.tsv 1> gado.log 2> gado.err +``` #### Phenomizer @@ -101,11 +110,36 @@ We used the stand-alone commandline version GADO (v 1.0.1), available at: https: ``` python3 PhenomizerBenchmarkFileGenerator.py phenomizer_output/ phenomizer_results.tsv ``` + +4. Convert the HGNC gene symbols to NCBI gene IDS: + + ``` + python3 BenchmarkFileGeneSymbolToIdConverter.py phenomizer_results.tsv benchmark_file_conversion_data.tsv 1> phenomizer.log 2> phenimozer.err + ``` #### Phenotips -**IMPORTANT**: Starting of January 2020, Phenotips does not offer a stand-alone downloadable solution anymore and requires a paid cloud subscription to be used, as can be read [here](https://phenotips.com/blog/new-year-new-website.html). Therefore, this benchmark has become obsolete. +**IMPORTANT**: As of January 2020, Phenotips does not offer a stand-alone downloadable solution anymore and requires a paid cloud subscription to be used ([source](https://phenotips.com/blog/new-year-new-website.html)). While the [GitHub repo](https://github.com/phenotips/phenotips) is currently still online, it seems uncertain whether it will still be updated and the easy-to-use `.dmg` as offered on the old website is not available anymore. Therefore, this benchmark is deemed obsolete. +#### PubCaseFinder + +1. Run benchmark: + + ``` + python3 PubCaseFinderBenchmarkRunner.py hp.obo benchmark_data.tsv pubcasefinder_output/ + ``` + +2. Process benchmark output: + + ``` + python3 PubCaseFinderBenchmarkFileGenerator.py pubcasefinder_output/ pubcasefinder_results.tsv + ``` + +3. Convert the HGNC gene symbols to NCBI gene IDS: + + ``` + python3 BenchmarkFileGeneSymbolToIdConverter.py amelie_results.tsv benchmark_file_conversion_data.tsv 1> amelie.log 2> amelie.err + ``` #### Vibe @@ -145,16 +179,15 @@ We used the stand-alone commandline version GADO (v 1.0.1), available at: https: 7. Process benchmark output: ``` - python3 VibeBenchmarkFileGenerator.py results/ vibe_results.tsv + python3 VibeBenchmarkFileGenerator.py results/ vibe_results.tsv none ``` - - [vibe]:https://github.com/molgenis/vibe -[vibe_preperations]:https://github.com/molgenis/vibe/#preparations +[vibe_preperations]:https://github.com/molgenis/vibe/#quickstart [hgnc_complete]:http://ftp.ebi.ac.uk/pub/databases/genenames/new/tsv/hgnc_complete_set.txt [query_phenomizer]:https://github.com/svandenhoek/query_phenomizer [phenotips_download]:https://phenotips.org/Download [hpo_obo_current]:http://purl.obolibrary.org/obo/hp.obo -[hpo_obo]:https://raw.githubusercontent.com/obophenotype/human-phenotype-ontology/2f6309173883d5d342849388c74bd986a2c0092c/hp.obo \ No newline at end of file +[hpo_obo]:https://raw.githubusercontent.com/obophenotype/human-phenotype-ontology/2f6309173883d5d342849388c74bd986a2c0092c/hp.obo +