Skip to content

Commit

Permalink
Parallel: include correct "foreign hits" spans in otherFields.
Browse files Browse the repository at this point in the history
Before, the foreign hits would only be determined by the target spans of
the relations matched, not by matches for the spans to the right of the ==>
operator. Now we autocapture those spans as well so the foreign hits at least
cover that. We exclude these autocaptures from the BLS response (they have
a special tag) because they are superfluous now.
  • Loading branch information
jan-niestadt committed Jun 17, 2024
1 parent 9981195 commit b0a08db
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@
*/
public class SpanQueryCaptureRelationsBetweenSpans extends BLSpanQueryAbstract {

/** Target hits will have an autogenerated capture name with this suffix;
* we will omit them from the response (it is just the target hit, so
* doesn't need to be a separate match info) */
public static final String TAG_MATCHINFO_TARGET_HIT = "__@target";

/** Combination of relation and target queries */
public static class Target {

Expand Down Expand Up @@ -90,16 +95,30 @@ public static List<TargetWeight> createWeightTargets(List<Target> targets, Index
private TargetWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
BLSpanWeight relationsWeight = relations.createWeight(searcher, scoreMode, boost);
BLSpanWeight targetWeight = null;
String captureTargetAs = null;
//String targetField = null;
if (target instanceof SpanQueryCaptureGroup &&
BLSpanQuery.isAnyNGram(((SpanQueryCaptureGroup)target).getClause())) {
// Special case: target is e.g. A:[]*. Don't actually search for all n-grams, just ignore
// target while matching relations and capture the relation targets as A.
captureTargetAs = ((SpanQueryCaptureGroup)target).getCaptureName();
//targetField = target.getField(); // so we can set the field properly when we capture target
String captureTargetAs;

// Are we explicitly capturing target, without any edge adjustments?
if (target instanceof SpanQueryCaptureGroup && ((SpanQueryCaptureGroup) target).leftAdjust == 0 && ((SpanQueryCaptureGroup) target).rightAdjust == 0) {
// Yes; remember the capture name, and strip the capture from the clause
// (capturing will be done by SpansCaptureRelationsBetweenSpans)
captureTargetAs = ((SpanQueryCaptureGroup) target).getCaptureName();
BLSpanQuery targetClause = ((SpanQueryCaptureGroup) target).getClause();
if (BLSpanQuery.isAnyNGram(targetClause)) {
// Special case: target is e.g. A:[]*. Don't actually search for all n-grams, just ignore
// target while matching relations and capture the relation targets as A.
} else {
// Normal case: target is a real query. Create a weight for it.
targetWeight = targetClause.createWeight(searcher, scoreMode, boost);
}
} else {
targetWeight = target == null || BLSpanQuery.isAnyNGram(target) ? null : target.createWeight(searcher, scoreMode, boost);
// Not explicitly capturing target; just create a weight for it.

// tag this so it can be omitted from the response
// (the only reason we're capturing it is so we know the correct "foreign hit" to
// return in the otherFields section of the response)
captureTargetAs = captureAs + TAG_MATCHINFO_TARGET_HIT;
targetWeight = target == null || BLSpanQuery.isAnyNGram(target) ? null :
target.createWeight(searcher, scoreMode, boost);
}
return new TargetWeight(relationsWeight, targetWeight, captureAs, captureTargetAs, targetField);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import java.util.List;
import java.util.Objects;

import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.search.spans.FilterSpans;

/**
Expand All @@ -27,10 +26,10 @@ public static class Target {
private final BLSpans relations;

/** Match info name for the list of captured relations */
private final String captureAs;
private final String captureRelationsAs;

/** Group index of captureAs */
private int captureAsIndex = -1;
private int captureRelationsIndex = -1;

/** Span the relation targets must be inside of (or null if there are no hits or we don't care; in the latter
* case, {@link #hasTargetRestrictions} will be false) */
Expand All @@ -52,24 +51,22 @@ public static class Target {
/** If target == null and captureTargetAs is set, this gives the target field for capture. */
private final String targetField;

public Target(BLSpans relations, BLSpans target, boolean hasTargetRestrictions, String captureAs, String captureTargetAs, String targetField) {
public Target(BLSpans relations, BLSpans target, boolean hasTargetRestrictions, String captureRelationsAs, String captureTargetAs, String targetField) {
this.relations = relations;
this.captureAs = captureAs;
this.captureRelationsAs = captureRelationsAs;
this.target = target == null ? null : new SpansInBucketsPerDocument(target);
this.hasTargetRestrictions = hasTargetRestrictions;
this.captureTargetAs = captureTargetAs;
this.targetField = targetField;
if (target != null && !StringUtils.isEmpty(captureTargetAs))
throw new IllegalArgumentException("Can't specify captureTargetAs if target is not null");
assert captureTargetAs != null && !captureTargetAs.isEmpty();
}

void setContext(HitQueryContext context) {
relations.setHitQueryContext(context);
captureAsIndex = context.registerMatchInfo(captureAs, MatchInfo.Type.LIST_OF_RELATIONS);
captureRelationsIndex = context.registerMatchInfo(captureRelationsAs, MatchInfo.Type.LIST_OF_RELATIONS);

HitQueryContext targetContext = context.withField(targetField);
if (!StringUtils.isEmpty(captureTargetAs))
captureTargetAsIndex = targetContext.registerMatchInfo(captureTargetAs, MatchInfo.Type.SPAN);
captureTargetAsIndex = targetContext.registerMatchInfo(captureTargetAs, MatchInfo.Type.SPAN);
if (target != null)
target.setHitQueryContext(targetContext);
}
Expand All @@ -81,24 +78,24 @@ public boolean equals(Object o) {
if (o == null || getClass() != o.getClass())
return false;
Target target1 = (Target) o;
return captureAsIndex == target1.captureAsIndex && hasTargetRestrictions == target1.hasTargetRestrictions
return captureRelationsIndex == target1.captureRelationsIndex && hasTargetRestrictions == target1.hasTargetRestrictions
&& captureTargetAsIndex == target1.captureTargetAsIndex && Objects.equals(relations,
target1.relations) && Objects.equals(captureAs, target1.captureAs) && Objects.equals(
target1.relations) && Objects.equals(captureRelationsAs, target1.captureRelationsAs) && Objects.equals(
target, target1.target) && Objects.equals(captureTargetAs, target1.captureTargetAs)
&& Objects.equals(targetField, target1.targetField);
}

@Override
public int hashCode() {
return Objects.hash(relations, captureAs, captureAsIndex, target, hasTargetRestrictions, captureTargetAs,
return Objects.hash(relations, captureRelationsAs, captureRelationsIndex, target, hasTargetRestrictions, captureTargetAs,
captureTargetAsIndex, targetField);
}

@Override
public String toString() {
return "Target{" +
"relations=" + relations +
", captureAs='" + captureAs + '\'' +
", captureAs='" + captureRelationsAs + '\'' +
", captureTargetAs='" + captureTargetAs + '\'' +
", target=" + target +
", hasTargetRestrictions=" + hasTargetRestrictions +
Expand Down Expand Up @@ -217,14 +214,11 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException

if (!target.hasTargetRestrictions) {
// No target span specified (or e.g. A:[]* ); just accept the relations we captured.
matchInfo[target.captureAsIndex] = RelationListInfo.create(capturedRelations, getOverriddenField());
matchInfo[target.captureRelationsIndex] = RelationListInfo.create(capturedRelations, getOverriddenField());

// If target query was e.g. A:[]*, capture [targetPosMin, targetPosmax) into A.
if (target.captureTargetAsIndex >= 0) {

matchInfo[target.captureTargetAsIndex] = SpanInfo.create(targetPosMin, targetPosMax,
target.targetField);
}
// Capture target span
matchInfo[target.captureTargetAsIndex] = SpanInfo.create(targetPosMin, targetPosMax,
target.targetField);

updateSourceStartEndWithCapturedRelations(); // update start/end to cover all captured relations
continue;
Expand Down Expand Up @@ -271,7 +265,9 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException
capturedRelations.removeIf(r -> r.getTargetEnd() <= target.target.startPosition(finalTargetIndex)
|| r.getTargetStart() >= target.target.endPosition(finalTargetIndex));
capturedRelations.sort(RelationInfo::compareTo);
matchInfo[target.captureAsIndex] = RelationListInfo.create(capturedRelations, getOverriddenField());
matchInfo[target.captureRelationsIndex] = RelationListInfo.create(capturedRelations, getOverriddenField());
matchInfo[target.captureTargetAsIndex] = SpanInfo.create(target.target.startPosition(finalTargetIndex),
target.target.endPosition(finalTargetIndex), target.targetField);
target.target.getMatchInfo(finalTargetIndex, matchInfo); // also perform captures on the target

updateSourceStartEndWithCapturedRelations(); // update start/end to cover all captured relations
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ private static Map<String, int[]> updateMinMaxForMatchInfo(BlackLabIndex index,
if (!field.equals(defaultField)) { // foreign KWICs only
// By default, just use the match info span
// (which, in case of a cross-field relation, is the source span)
minMaxPerField = updateMinMaxPerField(minMaxPerField, field, mi.getSpanStart(), mi.getSpanEnd(), -1, -1);
minMaxPerField = updateMinMaxPerField(minMaxPerField, field, mi.getSpanStart(), mi.getSpanEnd(), mi.getSpanStart(), mi.getSpanEnd());
afisPerField.computeIfAbsent(field, k -> getAnnotationForwardIndexes(
index.forwardIndex(index.annotatedField(field))));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import nl.inl.blacklab.search.lucene.MatchInfo;
import nl.inl.blacklab.search.lucene.RelationInfo;
import nl.inl.blacklab.search.lucene.RelationListInfo;
import nl.inl.blacklab.search.lucene.SpanQueryCaptureRelationsBetweenSpans;
import nl.inl.blacklab.search.results.ContextSize;
import nl.inl.blacklab.search.results.CorpusSize;
import nl.inl.blacklab.search.results.DocGroup;
Expand Down Expand Up @@ -709,9 +710,18 @@ private void outputHitOrSnippet(String docPid, Hit hit, String searchField, Map<

private void optMatchInfos(Map<String, MatchInfo> matchInfos, Predicate<MatchInfo> include) {
if (modernizeApi) {
if (matchInfos != null && !matchInfos.isEmpty()) {
Set<Map.Entry<String, MatchInfo>> filtered = matchInfos == null ? Collections.emptySet() :
matchInfos.entrySet().stream()
.filter(e ->
// don't include the autogenerated "foreign hit" match infos
!e.getKey().endsWith(SpanQueryCaptureRelationsBetweenSpans.TAG_MATCHINFO_TARGET_HIT) &&
// make sure we should include this match info here
e.getValue() != null && include.test(e.getValue()))
.collect(Collectors.toSet());

if (!filtered.isEmpty()) {
ds.startEntry(KEY_MATCH_INFOS).startMap();
for (Map.Entry<String, MatchInfo> e: matchInfos.entrySet()) {
for (Map.Entry<String, MatchInfo> e: filtered) {
MatchInfo matchInfo = e.getValue();
if (matchInfo != null && include.test(matchInfo)) {
ds.startElEntry(e.getKey());
Expand Down

0 comments on commit b0a08db

Please sign in to comment.