Skip to content

Commit

Permalink
Alignment operator source adjust.
Browse files Browse the repository at this point in the history
  • Loading branch information
jan-niestadt committed Jun 3, 2024
1 parent 3628f9f commit 823f2c8
Showing 1 changed file with 44 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ public String toString() {
/** List of relations captured for current hit */
private List<RelationInfo> capturedRelations = new ArrayList<>();

/** Start of current (source) hit (covers all sources of captured relations) */
private int adjustedStart;

/** End of current (source) hit (covers all sources of captured relations) */
private int adjustedEnd;

/**
* Construct a SpansCaptureRelationsWithinSpan.
*
Expand All @@ -128,6 +134,24 @@ public SpansCaptureRelationsBetweenSpans(BLSpans source, List<Target> targets) {
this.targets = targets;
}

@Override
public int startPosition() {
if (atFirstInCurrentDoc)
return -1;
if (startPos == -1 || startPos == NO_MORE_POSITIONS)
return startPos;
return adjustedStart;
}

@Override
public int endPosition() {
if (atFirstInCurrentDoc)
return -1;
if (startPos == -1 || startPos == NO_MORE_POSITIONS)
return startPos;
return adjustedEnd;
}

@Override
protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException {
// Prepare matchInfo so we can add captured relations to it
Expand All @@ -139,8 +163,12 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException
candidate.getMatchInfo(matchInfo);

// Find current source span
int sourceStart = startPosition();
int sourceEnd = endPosition();
int sourceStart = candidate.startPosition();
int sourceEnd = candidate.endPosition();

// Our final (source) span will cover all captured relations.
adjustedStart = sourceStart;
adjustedEnd = sourceEnd;

for (Target target: targets) {

Expand Down Expand Up @@ -198,6 +226,7 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException
target.targetField);
}

updateSourceStartEndWithCapturedRelations(); // update start/end to cover all captured relations
continue;
}

Expand Down Expand Up @@ -244,6 +273,8 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException
capturedRelations.sort(RelationInfo::compareTo);
matchInfo[target.captureAsIndex] = RelationListInfo.create(capturedRelations, getOverriddenField());
target.target.getMatchInfo(finalTargetIndex, matchInfo); // also perform captures on the target

updateSourceStartEndWithCapturedRelations(); // update start/end to cover all captured relations
} else {
// Target document has no matches. Reject this hit.
return FilterSpans.AcceptStatus.NO;
Expand All @@ -253,6 +284,17 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException
return FilterSpans.AcceptStatus.YES;
}

private void updateSourceStartEndWithCapturedRelations() {
// Our final (source) span will cover all captured relations, so that
// e.g. "the" =sentence-alignment=>nl "de" will have the aligned sentences as hits, not just single words.
capturedRelations.forEach(r -> {
if (r.getSourceStart() < adjustedStart)
adjustedStart = r.getSourceStart();
if (r.getSourceEnd() > adjustedEnd)
adjustedEnd = r.getSourceEnd();
});
}

@Override
public String toString() {
return "==>(" + in + ", " + targets + ")";
Expand Down

0 comments on commit 823f2c8

Please sign in to comment.