Skip to content

Commit

Permalink
feat(loader): process non-items-type objects
Browse files Browse the repository at this point in the history
  • Loading branch information
pavellos21 committed Jul 17, 2020
1 parent b1125f6 commit db6cee0
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions src/loaders/WikiDataWithContextLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ def thread_fun(self, queue):
def resolve_ids(self):
for triplet in self._info['triplets']:
_, rlt, ent = triplet
triplet[1] = self._info['relations'][rlt]['identifier']
triplet[2] = self._info['entities'][ent]['identifier']
if rlt.startswith('P'):
triplet[1] = self._info['relations'][rlt]['identifier']
if ent.startswith('Q'):
triplet[2] = self._info['entities'][ent]['identifier']

temp_ent = self._info['entities']
self._info['entities'] = {}
Expand Down Expand Up @@ -80,6 +82,11 @@ def getEntity(self, entity, lang='en'):
if ent.startswith('Q'):
loading_queue.put([ent, 'entities'])
self._info['triplets'].append([page_title, rlt, ent])
else:
if set(re.findall(r'\.\w*$', ent)) & set(['.jpg', '.svg', '.png', '.map']):
continue
self._info['triplets'].append(
[page_title, rlt, '[%s]' % ent])

loading_queue.join()
self.resolve_ids()

0 comments on commit db6cee0

Please sign in to comment.