Skip to content

Commit

Permalink
Alignment operator includes "optional match" mode now.
Browse files Browse the repository at this point in the history
E.g. ==>nl? means "try to find alignments with Dutch, but report
all hits, whether or not any alignment was found".

On the other hand, ==>nl will ONLY find aligned hits and skip hits
where no alignment relations matched.
  • Loading branch information
jan-niestadt committed Jun 26, 2024
1 parent dee592f commit 93bd80c
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 129 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public class SpanQueryCaptureRelationsBetweenSpans extends BLSpanQueryAbstract {
public static class Target {

public static Target get(QueryInfo queryInfo, String relationFieldName, BLSpanQuery target, String targetField,
String captureRelsAs, String relationType) {
String captureRelsAs, String relationType, boolean optionalMatch) {

// Determine what relations to capture based on the class of the matching regex, if any
String relClass = RelationUtil.classFromFullType(relationType);
Expand All @@ -49,7 +49,7 @@ public static Target get(QueryInfo queryInfo, String relationFieldName, BLSpanQu

return new Target(getRelationsQuery(queryInfo, relationFieldName, relationType),
getRelationsQuery(queryInfo, relationFieldName, anyType), target, targetField,
captureRelsAs);
captureRelsAs, optionalMatch);
}

private static SpanQueryRelations getRelationsQuery(QueryInfo queryInfo, String relationFieldName,
Expand Down Expand Up @@ -79,12 +79,17 @@ private static SpanQueryRelations getRelationsQuery(QueryInfo queryInfo, String

private final String targetField;

private Target(BLSpanQuery matchRelations, BLSpanQuery captureRelations, BLSpanQuery target, String targetField, String captureAs) {
/** Should we include the hit on the left side of the relation even if there's no hit on the right side? */
private final boolean optionalMatch;

private Target(BLSpanQuery matchRelations, BLSpanQuery captureRelations, BLSpanQuery target, String targetField,
String captureAs, boolean optionalMatch) {
this.matchRelations = matchRelations;
this.captureRelations = captureRelations;
this.target = target;
this.targetField = targetField;
this.captureAs = captureAs;
this.optionalMatch = optionalMatch;
}

public static List<Target> rewriteTargets(List<Target> targets, IndexReader reader) throws IOException {
Expand Down Expand Up @@ -139,15 +144,15 @@ private TargetWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, f
target.createWeight(searcher, scoreMode, boost);
}
return new TargetWeight(matchRelationsWeight, captureRelationsWeight, targetWeight, captureAs,
captureTargetAs, targetField);
captureTargetAs, targetField, optionalMatch);
}

private Target rewrite(IndexReader reader) throws IOException {
BLSpanQuery newMatchRelations = matchRelations.rewrite(reader);
BLSpanQuery newCaptureRelations = captureRelations.rewrite(reader);
BLSpanQuery newTarget = target == null ? null : target.rewrite(reader);
if (newMatchRelations != matchRelations || newCaptureRelations != captureRelations || newTarget != target) {
return new Target(newMatchRelations, newCaptureRelations, newTarget, targetField, captureAs);
return new Target(newMatchRelations, newCaptureRelations, newTarget, targetField, captureAs, optionalMatch);
}
return this;
}
Expand All @@ -159,15 +164,15 @@ public boolean equals(Object o) {
if (o == null || getClass() != o.getClass())
return false;
Target target1 = (Target) o;
return Objects.equals(matchRelations, target1.matchRelations) &&
Objects.equals(captureRelations, target1.captureRelations) &&
Objects.equals(captureAs, target1.captureAs) &&
Objects.equals(target, target1.target);
return optionalMatch == target1.optionalMatch && Objects.equals(matchRelations, target1.matchRelations)
&& Objects.equals(captureRelations, target1.captureRelations) && Objects.equals(
captureAs, target1.captureAs) && Objects.equals(target, target1.target)
&& Objects.equals(targetField, target1.targetField);
}

@Override
public int hashCode() {
return Objects.hash(matchRelations, captureRelations, captureAs, target);
return Objects.hash(matchRelations, captureRelations, captureAs, target, targetField, optionalMatch);
}

@Override
Expand Down Expand Up @@ -200,14 +205,18 @@ static class TargetWeight {
/** Span the relation targets must be inside of (or null if we don't care) */
private final BLSpanWeight target;

public TargetWeight(BLSpanWeight matchRelations, BLSpanWeight captureRelations, BLSpanWeight target, String captureAs, String captureTargetAs,
String targetField) {
/** Should we include the hit on the left side of the relation even if there's no hit on the right side? */
private final boolean optionalMatch;

public TargetWeight(BLSpanWeight matchRelations, BLSpanWeight captureRelations, BLSpanWeight target,
String captureAs, String captureTargetAs, String targetField, boolean optionalMatch) {
this.matchRelations = matchRelations;
this.captureRelations = captureRelations;
this.captureAs = captureAs;
this.captureTargetAs = captureTargetAs;
this.targetField = targetField;
this.target = target;
this.optionalMatch = optionalMatch;
}

public static void extractTermsFromTargets(List<TargetWeight> targets, Set<Term> terms) {
Expand Down Expand Up @@ -250,7 +259,7 @@ private SpansCaptureRelationsBetweenSpans.Target getSpans(LeafReaderContext cont
boolean hasTargetRestrictions = target != null;
BLSpans targetSpans = hasTargetRestrictions ? target.getSpans(context, requiredPostings) : null;
return new SpansCaptureRelationsBetweenSpans.Target(matchRelationsSpans, targetSpans, hasTargetRestrictions,
captureRelationsSpans, captureAs, captureTargetAs, targetField);
captureRelationsSpans, captureAs, captureTargetAs, targetField, optionalMatch);
}

private void extractTermStates(Map<Term, TermStates> contexts) {
Expand Down Expand Up @@ -278,16 +287,16 @@ public boolean equals(Object o) {
if (o == null || getClass() != o.getClass())
return false;
TargetWeight that = (TargetWeight) o;
return Objects.equals(matchRelations, that.matchRelations) &&
Objects.equals(captureRelations, that.captureRelations) &&
Objects.equals(captureAs, that.captureAs) &&
Objects.equals(captureTargetAs, that.captureTargetAs) &&
Objects.equals(target, that.target);
return optionalMatch == that.optionalMatch && Objects.equals(matchRelations, that.matchRelations)
&& Objects.equals(captureRelations, that.captureRelations) && Objects.equals(captureAs,
that.captureAs) && Objects.equals(captureTargetAs, that.captureTargetAs)
&& Objects.equals(targetField, that.targetField) && Objects.equals(target, that.target);
}

@Override
public int hashCode() {
return Objects.hash(matchRelations, captureRelations, captureAs, captureTargetAs, target);
return Objects.hash(matchRelations, captureRelations, captureAs, captureTargetAs, targetField, target,
optionalMatch);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;

import org.apache.lucene.search.spans.FilterSpans;

Expand Down Expand Up @@ -62,15 +61,20 @@ public static class Target {
/** If target == null and captureTargetAs is set, this gives the target field for capture. */
private final String targetField;

/** Should we include the hit on the left side of the relation even if there's no hit on the right side? */
private final boolean optionalMatch;

public Target(BLSpans matchRelations, BLSpans target, boolean hasTargetRestrictions,
BLSpans captureRelations, String captureRelationsAs, String captureTargetAs, String targetField) {
BLSpans captureRelations, String captureRelationsAs, String captureTargetAs, String targetField,
boolean optionalMatch) {
this.matchRelations = matchRelations;
this.captureRelations = captureRelations;
this.captureRelationsAs = captureRelationsAs;
this.target = target == null ? null : new SpansInBucketsPerDocument(target);
this.hasTargetRestrictions = hasTargetRestrictions;
this.captureTargetAs = captureTargetAs;
this.targetField = targetField;
this.optionalMatch = optionalMatch;
assert captureTargetAs != null && !captureTargetAs.isEmpty();
}

Expand All @@ -92,35 +96,40 @@ public boolean equals(Object o) {
if (o == null || getClass() != o.getClass())
return false;
Target target1 = (Target) o;
return captureRelationsIndex == target1.captureRelationsIndex &&
hasTargetRestrictions == target1.hasTargetRestrictions &&
captureTargetAsIndex == target1.captureTargetAsIndex &&
Objects.equals(matchRelations, target1.matchRelations) &&
Objects.equals(captureRelations, target1.captureRelations) &&
Objects.equals(captureRelationsAs, target1.captureRelationsAs) &&
Objects.equals(target, target1.target) &&
Objects.equals(captureTargetAs, target1.captureTargetAs) &&
Objects.equals(targetField, target1.targetField);
return captureRelationsIndex == target1.captureRelationsIndex
&& hasTargetRestrictions == target1.hasTargetRestrictions
&& captureTargetAsIndex == target1.captureTargetAsIndex && optionalMatch == target1.optionalMatch
&& Objects.equals(matchRelations, target1.matchRelations) && Objects.equals(
captureRelations, target1.captureRelations) && Objects.equals(captureRelationsAs,
target1.captureRelationsAs) && Objects.equals(target, target1.target) && Objects.equals(
captureTargetAs, target1.captureTargetAs) && Objects.equals(targetField, target1.targetField);
}

@Override
public int hashCode() {
return Objects.hash(matchRelations, captureRelations, captureRelationsAs,
captureRelationsIndex, target, hasTargetRestrictions, captureTargetAs,
captureTargetAsIndex, targetField);
return Objects.hash(matchRelations, captureRelations, captureRelationsAs, captureRelationsIndex, target,
hasTargetRestrictions, captureTargetAs, captureTargetAsIndex, targetField, optionalMatch);
}

@Override
public String toString() {
return "Target{" +
"matchRelations=" + matchRelations +
", captureRelations=" + matchRelations +
", captureAs='" + captureRelationsAs + '\'' +
", captureTargetAs='" + captureTargetAs + '\'' +
", captureRelations=" + captureRelations +
", captureRelationsAs='" + captureRelationsAs + '\'' +
", captureRelationsIndex=" + captureRelationsIndex +
", target=" + target +
", hasTargetRestrictions=" + hasTargetRestrictions +
", captureTargetAs='" + captureTargetAs + '\'' +
", captureTargetAsIndex=" + captureTargetAsIndex +
", targetField='" + targetField + '\'' +
", optionalMatch=" + optionalMatch +
'}';
}

public boolean isOptionalMatch() {
return optionalMatch;
}
}

private final List<Target> targets;
Expand Down Expand Up @@ -196,11 +205,6 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException
adjustedStart = sourceStart;
adjustedEnd = sourceEnd;

// If there's no match in the target field, should we reject this hit or accept it
// (and just return the source hit)?
FilterSpans.AcceptStatus returnValueIfNoTarget = REJECT_IF_NO_TARGET_MATCH ? FilterSpans.AcceptStatus.NO :
FilterSpans.AcceptStatus.YES;

for (Target target: targets) {

// Capture all relations with source overlapping this span.
Expand All @@ -209,11 +213,17 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException

if (matchingRelations.isEmpty()) {
// If no relations match, there is no match.
return returnValueIfNoTarget;
if (!target.isOptionalMatch())
return FilterSpans.AcceptStatus.NO;
else
continue; // check next target
}
if (target.hasTargetRestrictions && target.target == null) {
// There were target restrictions, but no hits (in this index segment); no match
return returnValueIfNoTarget;
if (!target.isOptionalMatch())
return FilterSpans.AcceptStatus.NO;
else
continue; // check next target
}

if (!target.hasTargetRestrictions) {
Expand Down Expand Up @@ -269,7 +279,10 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException
}
if (targetRelationsCovered == 0) {
// A valid hit must have at least one matching relation in each target.
return returnValueIfNoTarget;
if (!target.isOptionalMatch())
return FilterSpans.AcceptStatus.NO;
else
continue; // check next target
}

// Find relations to capture.
Expand All @@ -290,7 +303,8 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException
updateSourceStartEndWithCapturedRelations(); // update start/end to cover all captured relations
} else {
// Target document has no matches. Reject this hit.
return returnValueIfNoTarget;
if (!target.isOptionalMatch())
return FilterSpans.AcceptStatus.NO;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ public class RelationOperatorInfo {
* @return relation type and target version (intepret both as regexes)
*/
public static RelationOperatorInfo fromOperator(String op) {
// If relation operator ends with ?, hits on the left are included even if
// there's no match on the right. Right now only available for alignment operators (parallel corpora).
boolean optionalMatch = op.endsWith("?");
if (optionalMatch)
op = op.substring(0, op.length() - 1);

// Root operator?
// (this determines the relation directions we allow; direction is usually both (i.e. forward and backward),
// but root relations have a special "direction" because they have no source, so using that we ensure we'll
Expand Down Expand Up @@ -83,7 +89,7 @@ public static RelationOperatorInfo fromOperator(String op) {
typeRegex = RelationUtil.ANY_TYPE_REGEX; // any relation type

return new RelationOperatorInfo(typeRegex, direction, targetVersion, negate,
isAlignmentOperator);
isAlignmentOperator, optionalMatch);
}

/** Relation type regex. */
Expand All @@ -102,17 +108,24 @@ public static RelationOperatorInfo fromOperator(String op) {
* used for parallel corpora) */
private final boolean isAlignmentOperator;

/** Are hits on the left included even if there's no match on the right?
* Right now, this is only available for alignment operators (parallel corpora). */
private final boolean optionalMatch;

public RelationOperatorInfo(String typeRegex, SpanQueryRelations.Direction direction,
String targetVersion, boolean negate,
boolean isAlignmentOperator) {
boolean isAlignmentOperator, boolean optionalMatch) {
this.typeRegex = typeRegex;
this.direction = direction;
this.targetVersion = targetVersion == null || targetVersion.isEmpty() ? null : targetVersion;
this.negate = negate;
this.isAlignmentOperator = isAlignmentOperator;
this.optionalMatch = optionalMatch;

if (isAlignmentOperator && negate)
throw new RuntimeException("Alignment operator cannot be negated");
if (optionalMatch && !isAlignmentOperator)
throw new RuntimeException("Optional match operator can only be used with alignment operators");
}

public String getTypeRegex() {
Expand All @@ -135,21 +148,25 @@ public Boolean isAlignment() {
return isAlignmentOperator;
}

public boolean isOptionalMatch() {
return optionalMatch;
}

@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
RelationOperatorInfo that = (RelationOperatorInfo) o;
return negate == that.negate && isAlignmentOperator == that.isAlignmentOperator && Objects.equals(
typeRegex, that.typeRegex) && direction == that.direction && Objects.equals(targetVersion,
that.targetVersion);
return negate == that.negate && isAlignmentOperator == that.isAlignmentOperator
&& optionalMatch == that.optionalMatch && Objects.equals(typeRegex, that.typeRegex)
&& direction == that.direction && Objects.equals(targetVersion, that.targetVersion);
}

@Override
public int hashCode() {
return Objects.hash(typeRegex, direction, targetVersion, negate, isAlignmentOperator);
return Objects.hash(typeRegex, direction, targetVersion, negate, isAlignmentOperator, optionalMatch);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ private static SpanQueryCaptureRelationsBetweenSpans.Target alignmentTarget(Rela

return SpanQueryCaptureRelationsBetweenSpans.Target.get(
context.queryInfo(), context.withRelationAnnotation().luceneField(), targetQuery,
targetContext.field().name(), captureName, relationType);
targetContext.field().name(), captureName, relationType, opInfo.isOptionalMatch());
}

private BLSpanQuery createRelMatchQuery(QueryExecutionContext context, TextPattern parent, List<RelationTarget> children) throws InvalidQuery {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,8 @@ void serialize(TextPatternStruct pattern, StringBuilder b, boolean parenthesizeI
String opChar = operatorInfo.isAlignment() ? "=" : "-";
String optTargetVersion = operatorInfo.getTargetVersion() == null ? "" : operatorInfo.getTargetVersion();
b.append(isRoot ? "" : " ").append(optCapture).append(optOperatorPrefix).append(opChar).append(optRegex)
.append(opChar).append(">").append(optTargetVersion).append(" ");
.append(opChar).append(">").append(optTargetVersion).append(operatorInfo.isOptionalMatch() ? "?" : "")
.append(" ");
serialize(tp.getTarget(), b, true, insideTokenBrackets);
});

Expand Down
Loading

0 comments on commit 93bd80c

Please sign in to comment.