Skip to content

Commit

Permalink
Add run_sentence_recognition and format_prediction methods
Browse files Browse the repository at this point in the history
  • Loading branch information
lukehare committed Jul 25, 2023
1 parent 01e63e0 commit 2fa1eb3
Showing 1 changed file with 54 additions and 53 deletions.
107 changes: 54 additions & 53 deletions geoparser/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,17 +237,8 @@ def run_sentence(
candidates and ranks them, and finally links them to the Wikidata
ID.
"""
# Get predictions:
predictions = self.myner.ner_predict(sentence)

# Process predictions:
procpreds = [
[x["word"], x["entity"], "O", x["start"], x["end"], x["score"]]
for x in predictions
]

# Aggregate mentions:
mentions = ner.aggregate_mentions(procpreds, "pred")
mentions = self.run_sentence_recognition(sentence)

# List of mentions for the ranker:
rmentions = []
Expand All @@ -266,23 +257,7 @@ def run_sentence(
mentions_dataset = dict()
mentions_dataset["linking"] = []
for m in mentions:
prediction = dict()
prediction["mention"] = m["mention"]
prediction["context"] = context
prediction["candidates"] = []
prediction["gold"] = ["NONE"]
prediction["ner_score"] = m["ner_score"]
prediction["pos"] = m["start_char"]
prediction["sent_idx"] = sent_idx
prediction["end_pos"] = m["end_char"]
prediction["ngram"] = m["mention"]
prediction["conf_md"] = m["ner_score"]
prediction["tag"] = m["ner_label"]
prediction["sentence"] = sentence
prediction["string_match_candidates"] = wk_cands.get(m["mention"], dict())
prediction["candidates"] = wk_cands.get(m["mention"], dict())
prediction["place"] = place
prediction["place_wqid"] = place_wqid
prediction = self.format_prediction(m, sentence, wk_cands=wk_cands, context=context, sent_idx=sent_idx, place=place, place_wqid=place_wqid)
mentions_dataset["linking"].append(prediction)

# If the linking method is "reldisamb", rank and format candidates,
Expand Down Expand Up @@ -533,6 +508,55 @@ def run_text(

return document_dataset


def run_sentence_recognition(
self,
sentence
) -> List[dict]:
# Get predictions:
predictions = self.myner.ner_predict(sentence)

# Process predictions:
procpreds = [
[x["word"], x["entity"], "O", x["start"], x["end"], x["score"]]
for x in predictions
]

# Aggregate mentions:
mentions = ner.aggregate_mentions(procpreds, "pred")
return mentions


def format_prediction(self, mention,
sentence: str,
wk_cands: Optional[dict] = None,
context: Optional[Tuple[str, str]] = ("", ""),
sent_idx: Optional[int] = 0,
place: Optional[str] = "",
place_wqid: Optional[str] = ""
) -> dict:
prediction = dict()
prediction["mention"] = mention["mention"]
prediction["context"] = context
prediction["candidates"] = []
prediction["gold"] = ["NONE"]
prediction["ner_score"] = mention["ner_score"]
prediction["pos"] = mention["start_char"]
prediction["sent_idx"] = sent_idx
prediction["end_pos"] = mention["end_char"]
prediction["ngram"] = mention["mention"]
prediction["conf_md"] = mention["ner_score"]
prediction["tag"] = mention["ner_label"]
prediction["sentence"] = sentence
prediction["place"] = place
prediction["place_wqid"] = place_wqid
if wk_cands:
prediction["string_match_candidates"] = wk_cands.get(mention["mention"], dict())
prediction["candidates"] = wk_cands.get(mention["mention"], dict())
return prediction



def run_text_recognition(
self,
text: str,
Expand Down Expand Up @@ -599,35 +623,12 @@ def run_text_recognition(
if idx + 1 < len(sentences):
context[1] = sentences[idx + 1]

# Get predictions:
predictions = self.myner.ner_predict(sentence)

# Process predictions:
procpreds = [
[x["word"], x["entity"], "O", x["start"], x["end"], x["score"]]
for x in predictions
]

# Aggregate mentions:
mentions = ner.aggregate_mentions(procpreds, "pred")
mentions = self.run_sentence_recognition(sentence)

mentions_dataset = []
for m in mentions:
prediction = dict()
prediction["mention"] = m["mention"]
prediction["context"] = context
prediction["candidates"] = []
prediction["gold"] = ["NONE"]
prediction["ner_score"] = m["ner_score"]
prediction["pos"] = m["start_char"]
prediction["sent_idx"] = idx
prediction["end_pos"] = m["end_char"]
prediction["ngram"] = m["mention"]
prediction["conf_md"] = m["ner_score"]
prediction["tag"] = m["ner_label"]
prediction["sentence"] = sentence
prediction["place"] = place
prediction["place_wqid"] = place_wqid
prediction = self.format_prediction(m, sentence, wk_cands=None, context=context, sent_idx=idx, place=place, place_wqid=place_wqid)
# mentions_dataset["linking"].append(prediction)
if not len(m["mention"]) == 1 and not m["mention"].islower():
mentions_dataset.append(prediction)

Expand Down

0 comments on commit 2fa1eb3

Please sign in to comment.