-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Handle nameIdentifiers as dict rather than array
This is similar to issue #51, but for another field. To check for similar errors a sample of a single file sample for each publisher was created by running and the running the following. mkdir sample cd sample tar xzvf ../datacite.tar.gz $(awk -F/ '!s[$3] {print "./" $3 "/part_00000.jsonl"; s[$3]++}' <(tar tzvf ../datacite.tar.gz)) tar czvf ../sample.tar.gz .
- Loading branch information
1 parent
346eabf
commit f4c28e0
Showing
4 changed files
with
268 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
|
||
common.py,527 | ||
class Alexandria3kError(44,1215 | ||
def __init__(50,1499 | ||
class Alexandria3kInternalError(55,1599 | ||
def __init__(60,1856 | ||
def is_unittest(65,1956 | ||
def warn(74,2198 | ||
def ensure_unlinked(86,2467 | ||
def query_result(97,2755 | ||
def table_exists(112,3112 | ||
def ensure_table_exists(129,3568 | ||
def set_fast_writing(148,4130 | ||
def log_sql(174,5176 | ||
def try_sql_execute(186,5488 | ||
def program_version(207,6245 | ||
def is_url(222,6710 | ||
def data_from_uri_provider(232,6868 | ||
def get_string_resource(259,7771 | ||
def remove_sqlite_comments(271,8106 | ||
|
||
csv_source.py,359 | ||
class VTSource:VTSource32,1103 | ||
def __init__(37,1293 | ||
def get_container_iterator(42,1466 | ||
def get_container_name(46,1621 | ||
def Create(50,1771 | ||
class CsvCursor:CsvCursor62,2190 | ||
def __init__(65,2256 | ||
def Eof(78,2645 | ||
def Rowid(82,2769 | ||
def Column(86,2884 | ||
def Filter(99,3283 | ||
def Next(113,3910 | ||
def Close(125,4297 | ||
|
||
data_source.py,2267 | ||
class StreamingTable:StreamingTable66,1988 | ||
def __init__(69,2086 | ||
def BestIndex(78,2391 | ||
def Disconnect(83,2569 | ||
def sample(88,2697 | ||
def get_table_meta_by_name(93,2908 | ||
def get_table_meta(97,3047 | ||
def get_data_source(101,3157 | ||
def cursor(105,3272 | ||
def Open(117,3858 | ||
def get_value_extractor_by_ordinal(121,3970 | ||
class StreamingCachedContainerTable(127,4237 | ||
def BestIndex(133,4532 | ||
class ElementsCursor:ElementsCursor174,6293 | ||
def __init__(180,6442 | ||
def Filter(192,6865 | ||
def Eof(199,7084 | ||
def Rowid(204,7232 | ||
def record_id(208,7331 | ||
def current_row_value(212,7457 | ||
def Next(217,7628 | ||
def container_id(221,7725 | ||
def Column(226,7917 | ||
def Close(237,8273 | ||
class ItemsCursor:ItemsCursor243,8412 | ||
def __init__(247,8542 | ||
def Rowid(258,8906 | ||
def current_row_value(262,9021 | ||
def Eof(266,9145 | ||
def Close(270,9269 | ||
class FilesCursor(275,9370 | ||
def __init__(279,9507 | ||
def debug_progress_bar(285,9741 | ||
def Filter(304,10390 | ||
def Next(318,10989 | ||
class _IndexManager:_IndexManager341,11737 | ||
def __init__(345,11853 | ||
def create_index(350,11996 | ||
def drop_indexes(365,12480 | ||
class DataSource:DataSource372,12700 | ||
def __init__(397,13621 | ||
def get_table_meta_by_name(451,15556 | ||
def tables_transitive_closure(458,15823 | ||
def get_virtual_db(470,16305 | ||
def add_column(475,16444 | ||
def set_query_columns(483,16715 | ||
def trace_query_columns(487,16872 | ||
def authorizer(492,17087 | ||
def tracer(503,17599 | ||
def query(523,18368 | ||
def get_query_column_names(604,21796 | ||
def populate(609,22026 | ||
def set_join_columns(646,23516 | ||
def query_and_population_tables(666,24458 | ||
def joined_tables(674,24766 | ||
def partition_condition(711,26591 | ||
def populate_only_root_table(721,26998 | ||
def populate_table(745,27819 | ||
def add_columns(779,29250 | ||
def create_database_schema(799,30069 | ||
def create_matched_tables(839,31626 | ||
def run_post_population_script(886,33760 | ||
class DataFiles:DataFiles943,36138 | ||
def __init__(946,36206 | ||
def get_file_array(970,37013 | ||
def get_container_iterator(974,37120 | ||
def get_container_name(978,37281 | ||
|
||
db_schema.py,780 | ||
class TableMeta:TableMeta22,839 | ||
def __init__(26,950 | ||
def table_schema(50,1884 | ||
def insert_statement(60,2410 | ||
def get_name(74,2918 | ||
def get_primary_key(78,3006 | ||
def get_foreign_key(83,3161 | ||
def get_extract_multiple(87,3303 | ||
def get_parent_extract_multiple(91,3442 | ||
def get_post_population_script(95,3617 | ||
def get_parent_name(99,3776 | ||
def get_cursor_class(103,3914 | ||
def get_columns(107,4036 | ||
def get_value_extractor_by_ordinal(111,4133 | ||
def get_value_extractor_by_name(115,4315 | ||
def get_column_definition_by_name(119,4500 | ||
class ColumnMeta:ColumnMeta124,4679 | ||
def __init__(127,4741 | ||
def get_name(134,5019 | ||
def get_definition(138,5104 | ||
def get_description(147,5519 | ||
def get_value_extractor(151,5633 | ||
|
||
debug.py,109 | ||
def set_output(43,1164 | ||
def get_output(56,1496 | ||
def set_flags(62,1611 | ||
def enabled(84,2247 | ||
def log(97,2484 | ||
|
||
file_cache.py,110 | ||
class FileCache:FileCache25,884 | ||
def __init__(33,1063 | ||
def read(37,1156 | ||
def get_file_cache(57,1730 | ||
|
||
file_pubmed_cache.py,110 | ||
class FileCache:FileCache25,899 | ||
def __init__(33,1077 | ||
def read(37,1170 | ||
def get_file_cache(55,1656 | ||
|
||
file_xml_cache.py,110 | ||
class FileCache:FileCache24,864 | ||
def __init__(30,1012 | ||
def read(34,1110 | ||
def get_file_cache(61,1857 | ||
|
||
__main__.py,1099 | ||
def module_get_attribute(43,1242 | ||
def module_name(49,1466 | ||
def class_name(55,1673 | ||
def facility_modules(63,2002 | ||
def facility_names(71,2330 | ||
def get_data_source_instance(78,2551 | ||
def populate(102,3570 | ||
def add_subcommand_populate(118,4020 | ||
def process(173,5623 | ||
def add_subcommand_process(182,5922 | ||
def add_subcommand_help(202,6578 | ||
def top_level_help(205,6684 | ||
def query(213,6913 | ||
def add_subcommand_query(240,7852 | ||
def get_tables(316,10083 | ||
def list_facility_schema(322,10287 | ||
def add_subcommand_list_complete_schema(336,10762 | ||
def list_complete_schema(339,10867 | ||
def add_subcommand_list_source_schema(352,11280 | ||
def list_source_schema(355,11381 | ||
def add_subcommand_list_process_schema(369,11831 | ||
def list_process_schema(372,11934 | ||
def list_facility_description(386,12384 | ||
def add_subcommand_list_processes(409,13314 | ||
def list_processes(412,13407 | ||
def add_subcommand_list_sources(422,13704 | ||
def list_sources(425,13793 | ||
def add_subcommand_version(435,14086 | ||
def show_version(438,14165 | ||
def get_cli_parser(446,14414 | ||
def error_raising_main(497,16147 | ||
def main(522,16766 | ||
|
||
perf.py,17 | ||
def log(34,1067 | ||
|
||
tsort.py,19 | ||
def tsort(41,1398 | ||
|
||
uspto_zip_cache.py,117 | ||
class UsptoZipCache:UsptoZipCache27,973 | ||
def __init__(33,1125 | ||
def read(38,1242 | ||
def get_zip_cache(84,2902 | ||
|
||
xml.py,342 | ||
def get_element(28,1092 | ||
def get_attribute(37,1306 | ||
def getter(48,1679 | ||
def agetter(54,1843 | ||
def all_getter(60,2021 | ||
def getter_by_attribute(65,2142 | ||
def fgetter(69,2346 | ||
def lower(79,2586 | ||
def lfunc(83,2705 | ||
def get_root_text(90,2827 | ||
class XMLCursor(95,2937 | ||
def __init__(99,3052 | ||
def Next(108,3448 | ||
def Rowid(134,4421 | ||
|
||
__init__.py,0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters