diff --git a/engine/src/main/java/nl/inl/blacklab/search/lucene/SpanQueryCaptureRelationsBetweenSpans.java b/engine/src/main/java/nl/inl/blacklab/search/lucene/SpanQueryCaptureRelationsBetweenSpans.java index d7ccd3345..d0145dae2 100644 --- a/engine/src/main/java/nl/inl/blacklab/search/lucene/SpanQueryCaptureRelationsBetweenSpans.java +++ b/engine/src/main/java/nl/inl/blacklab/search/lucene/SpanQueryCaptureRelationsBetweenSpans.java @@ -40,7 +40,7 @@ public class SpanQueryCaptureRelationsBetweenSpans extends BLSpanQueryAbstract { public static class Target { public static Target get(QueryInfo queryInfo, String relationFieldName, BLSpanQuery target, String targetField, - String captureRelsAs, String relationType) { + String captureRelsAs, String relationType, boolean optionalMatch) { // Determine what relations to capture based on the class of the matching regex, if any String relClass = RelationUtil.classFromFullType(relationType); @@ -49,7 +49,7 @@ public static Target get(QueryInfo queryInfo, String relationFieldName, BLSpanQu return new Target(getRelationsQuery(queryInfo, relationFieldName, relationType), getRelationsQuery(queryInfo, relationFieldName, anyType), target, targetField, - captureRelsAs); + captureRelsAs, optionalMatch); } private static SpanQueryRelations getRelationsQuery(QueryInfo queryInfo, String relationFieldName, @@ -79,12 +79,17 @@ private static SpanQueryRelations getRelationsQuery(QueryInfo queryInfo, String private final String targetField; - private Target(BLSpanQuery matchRelations, BLSpanQuery captureRelations, BLSpanQuery target, String targetField, String captureAs) { + /** Should we include the hit on the left side of the relation even if there's no hit on the right side? */ + private final boolean optionalMatch; + + private Target(BLSpanQuery matchRelations, BLSpanQuery captureRelations, BLSpanQuery target, String targetField, + String captureAs, boolean optionalMatch) { this.matchRelations = matchRelations; this.captureRelations = captureRelations; this.target = target; this.targetField = targetField; this.captureAs = captureAs; + this.optionalMatch = optionalMatch; } public static List rewriteTargets(List targets, IndexReader reader) throws IOException { @@ -139,7 +144,7 @@ private TargetWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, f target.createWeight(searcher, scoreMode, boost); } return new TargetWeight(matchRelationsWeight, captureRelationsWeight, targetWeight, captureAs, - captureTargetAs, targetField); + captureTargetAs, targetField, optionalMatch); } private Target rewrite(IndexReader reader) throws IOException { @@ -147,7 +152,7 @@ private Target rewrite(IndexReader reader) throws IOException { BLSpanQuery newCaptureRelations = captureRelations.rewrite(reader); BLSpanQuery newTarget = target == null ? null : target.rewrite(reader); if (newMatchRelations != matchRelations || newCaptureRelations != captureRelations || newTarget != target) { - return new Target(newMatchRelations, newCaptureRelations, newTarget, targetField, captureAs); + return new Target(newMatchRelations, newCaptureRelations, newTarget, targetField, captureAs, optionalMatch); } return this; } @@ -159,15 +164,15 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; Target target1 = (Target) o; - return Objects.equals(matchRelations, target1.matchRelations) && - Objects.equals(captureRelations, target1.captureRelations) && - Objects.equals(captureAs, target1.captureAs) && - Objects.equals(target, target1.target); + return optionalMatch == target1.optionalMatch && Objects.equals(matchRelations, target1.matchRelations) + && Objects.equals(captureRelations, target1.captureRelations) && Objects.equals( + captureAs, target1.captureAs) && Objects.equals(target, target1.target) + && Objects.equals(targetField, target1.targetField); } @Override public int hashCode() { - return Objects.hash(matchRelations, captureRelations, captureAs, target); + return Objects.hash(matchRelations, captureRelations, captureAs, target, targetField, optionalMatch); } @Override @@ -200,14 +205,18 @@ static class TargetWeight { /** Span the relation targets must be inside of (or null if we don't care) */ private final BLSpanWeight target; - public TargetWeight(BLSpanWeight matchRelations, BLSpanWeight captureRelations, BLSpanWeight target, String captureAs, String captureTargetAs, - String targetField) { + /** Should we include the hit on the left side of the relation even if there's no hit on the right side? */ + private final boolean optionalMatch; + + public TargetWeight(BLSpanWeight matchRelations, BLSpanWeight captureRelations, BLSpanWeight target, + String captureAs, String captureTargetAs, String targetField, boolean optionalMatch) { this.matchRelations = matchRelations; this.captureRelations = captureRelations; this.captureAs = captureAs; this.captureTargetAs = captureTargetAs; this.targetField = targetField; this.target = target; + this.optionalMatch = optionalMatch; } public static void extractTermsFromTargets(List targets, Set terms) { @@ -250,7 +259,7 @@ private SpansCaptureRelationsBetweenSpans.Target getSpans(LeafReaderContext cont boolean hasTargetRestrictions = target != null; BLSpans targetSpans = hasTargetRestrictions ? target.getSpans(context, requiredPostings) : null; return new SpansCaptureRelationsBetweenSpans.Target(matchRelationsSpans, targetSpans, hasTargetRestrictions, - captureRelationsSpans, captureAs, captureTargetAs, targetField); + captureRelationsSpans, captureAs, captureTargetAs, targetField, optionalMatch); } private void extractTermStates(Map contexts) { @@ -278,16 +287,16 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; TargetWeight that = (TargetWeight) o; - return Objects.equals(matchRelations, that.matchRelations) && - Objects.equals(captureRelations, that.captureRelations) && - Objects.equals(captureAs, that.captureAs) && - Objects.equals(captureTargetAs, that.captureTargetAs) && - Objects.equals(target, that.target); + return optionalMatch == that.optionalMatch && Objects.equals(matchRelations, that.matchRelations) + && Objects.equals(captureRelations, that.captureRelations) && Objects.equals(captureAs, + that.captureAs) && Objects.equals(captureTargetAs, that.captureTargetAs) + && Objects.equals(targetField, that.targetField) && Objects.equals(target, that.target); } @Override public int hashCode() { - return Objects.hash(matchRelations, captureRelations, captureAs, captureTargetAs, target); + return Objects.hash(matchRelations, captureRelations, captureAs, captureTargetAs, targetField, target, + optionalMatch); } } diff --git a/engine/src/main/java/nl/inl/blacklab/search/lucene/SpansCaptureRelationsBetweenSpans.java b/engine/src/main/java/nl/inl/blacklab/search/lucene/SpansCaptureRelationsBetweenSpans.java index c58b0efbb..bc4dc93d9 100644 --- a/engine/src/main/java/nl/inl/blacklab/search/lucene/SpansCaptureRelationsBetweenSpans.java +++ b/engine/src/main/java/nl/inl/blacklab/search/lucene/SpansCaptureRelationsBetweenSpans.java @@ -5,7 +5,6 @@ import java.util.Arrays; import java.util.List; import java.util.Objects; -import java.util.function.Function; import org.apache.lucene.search.spans.FilterSpans; @@ -62,8 +61,12 @@ public static class Target { /** If target == null and captureTargetAs is set, this gives the target field for capture. */ private final String targetField; + /** Should we include the hit on the left side of the relation even if there's no hit on the right side? */ + private final boolean optionalMatch; + public Target(BLSpans matchRelations, BLSpans target, boolean hasTargetRestrictions, - BLSpans captureRelations, String captureRelationsAs, String captureTargetAs, String targetField) { + BLSpans captureRelations, String captureRelationsAs, String captureTargetAs, String targetField, + boolean optionalMatch) { this.matchRelations = matchRelations; this.captureRelations = captureRelations; this.captureRelationsAs = captureRelationsAs; @@ -71,6 +74,7 @@ public Target(BLSpans matchRelations, BLSpans target, boolean hasTargetRestricti this.hasTargetRestrictions = hasTargetRestrictions; this.captureTargetAs = captureTargetAs; this.targetField = targetField; + this.optionalMatch = optionalMatch; assert captureTargetAs != null && !captureTargetAs.isEmpty(); } @@ -92,35 +96,40 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; Target target1 = (Target) o; - return captureRelationsIndex == target1.captureRelationsIndex && - hasTargetRestrictions == target1.hasTargetRestrictions && - captureTargetAsIndex == target1.captureTargetAsIndex && - Objects.equals(matchRelations, target1.matchRelations) && - Objects.equals(captureRelations, target1.captureRelations) && - Objects.equals(captureRelationsAs, target1.captureRelationsAs) && - Objects.equals(target, target1.target) && - Objects.equals(captureTargetAs, target1.captureTargetAs) && - Objects.equals(targetField, target1.targetField); + return captureRelationsIndex == target1.captureRelationsIndex + && hasTargetRestrictions == target1.hasTargetRestrictions + && captureTargetAsIndex == target1.captureTargetAsIndex && optionalMatch == target1.optionalMatch + && Objects.equals(matchRelations, target1.matchRelations) && Objects.equals( + captureRelations, target1.captureRelations) && Objects.equals(captureRelationsAs, + target1.captureRelationsAs) && Objects.equals(target, target1.target) && Objects.equals( + captureTargetAs, target1.captureTargetAs) && Objects.equals(targetField, target1.targetField); } @Override public int hashCode() { - return Objects.hash(matchRelations, captureRelations, captureRelationsAs, - captureRelationsIndex, target, hasTargetRestrictions, captureTargetAs, - captureTargetAsIndex, targetField); + return Objects.hash(matchRelations, captureRelations, captureRelationsAs, captureRelationsIndex, target, + hasTargetRestrictions, captureTargetAs, captureTargetAsIndex, targetField, optionalMatch); } @Override public String toString() { return "Target{" + "matchRelations=" + matchRelations + - ", captureRelations=" + matchRelations + - ", captureAs='" + captureRelationsAs + '\'' + - ", captureTargetAs='" + captureTargetAs + '\'' + + ", captureRelations=" + captureRelations + + ", captureRelationsAs='" + captureRelationsAs + '\'' + + ", captureRelationsIndex=" + captureRelationsIndex + ", target=" + target + ", hasTargetRestrictions=" + hasTargetRestrictions + + ", captureTargetAs='" + captureTargetAs + '\'' + + ", captureTargetAsIndex=" + captureTargetAsIndex + + ", targetField='" + targetField + '\'' + + ", optionalMatch=" + optionalMatch + '}'; } + + public boolean isOptionalMatch() { + return optionalMatch; + } } private final List targets; @@ -196,11 +205,6 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException adjustedStart = sourceStart; adjustedEnd = sourceEnd; - // If there's no match in the target field, should we reject this hit or accept it - // (and just return the source hit)? - FilterSpans.AcceptStatus returnValueIfNoTarget = REJECT_IF_NO_TARGET_MATCH ? FilterSpans.AcceptStatus.NO : - FilterSpans.AcceptStatus.YES; - for (Target target: targets) { // Capture all relations with source overlapping this span. @@ -209,11 +213,17 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException if (matchingRelations.isEmpty()) { // If no relations match, there is no match. - return returnValueIfNoTarget; + if (!target.isOptionalMatch()) + return FilterSpans.AcceptStatus.NO; + else + continue; // check next target } if (target.hasTargetRestrictions && target.target == null) { // There were target restrictions, but no hits (in this index segment); no match - return returnValueIfNoTarget; + if (!target.isOptionalMatch()) + return FilterSpans.AcceptStatus.NO; + else + continue; // check next target } if (!target.hasTargetRestrictions) { @@ -269,7 +279,10 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException } if (targetRelationsCovered == 0) { // A valid hit must have at least one matching relation in each target. - return returnValueIfNoTarget; + if (!target.isOptionalMatch()) + return FilterSpans.AcceptStatus.NO; + else + continue; // check next target } // Find relations to capture. @@ -290,7 +303,8 @@ protected FilterSpans.AcceptStatus accept(BLSpans candidate) throws IOException updateSourceStartEndWithCapturedRelations(); // update start/end to cover all captured relations } else { // Target document has no matches. Reject this hit. - return returnValueIfNoTarget; + if (!target.isOptionalMatch()) + return FilterSpans.AcceptStatus.NO; } } diff --git a/engine/src/main/java/nl/inl/blacklab/search/textpattern/RelationOperatorInfo.java b/engine/src/main/java/nl/inl/blacklab/search/textpattern/RelationOperatorInfo.java index 02e59f567..2a270a487 100644 --- a/engine/src/main/java/nl/inl/blacklab/search/textpattern/RelationOperatorInfo.java +++ b/engine/src/main/java/nl/inl/blacklab/search/textpattern/RelationOperatorInfo.java @@ -45,6 +45,12 @@ public class RelationOperatorInfo { * @return relation type and target version (intepret both as regexes) */ public static RelationOperatorInfo fromOperator(String op) { + // If relation operator ends with ?, hits on the left are included even if + // there's no match on the right. Right now only available for alignment operators (parallel corpora). + boolean optionalMatch = op.endsWith("?"); + if (optionalMatch) + op = op.substring(0, op.length() - 1); + // Root operator? // (this determines the relation directions we allow; direction is usually both (i.e. forward and backward), // but root relations have a special "direction" because they have no source, so using that we ensure we'll @@ -83,7 +89,7 @@ public static RelationOperatorInfo fromOperator(String op) { typeRegex = RelationUtil.ANY_TYPE_REGEX; // any relation type return new RelationOperatorInfo(typeRegex, direction, targetVersion, negate, - isAlignmentOperator); + isAlignmentOperator, optionalMatch); } /** Relation type regex. */ @@ -102,17 +108,24 @@ public static RelationOperatorInfo fromOperator(String op) { * used for parallel corpora) */ private final boolean isAlignmentOperator; + /** Are hits on the left included even if there's no match on the right? + * Right now, this is only available for alignment operators (parallel corpora). */ + private final boolean optionalMatch; + public RelationOperatorInfo(String typeRegex, SpanQueryRelations.Direction direction, String targetVersion, boolean negate, - boolean isAlignmentOperator) { + boolean isAlignmentOperator, boolean optionalMatch) { this.typeRegex = typeRegex; this.direction = direction; this.targetVersion = targetVersion == null || targetVersion.isEmpty() ? null : targetVersion; this.negate = negate; this.isAlignmentOperator = isAlignmentOperator; + this.optionalMatch = optionalMatch; if (isAlignmentOperator && negate) throw new RuntimeException("Alignment operator cannot be negated"); + if (optionalMatch && !isAlignmentOperator) + throw new RuntimeException("Optional match operator can only be used with alignment operators"); } public String getTypeRegex() { @@ -135,6 +148,10 @@ public Boolean isAlignment() { return isAlignmentOperator; } + public boolean isOptionalMatch() { + return optionalMatch; + } + @Override public boolean equals(Object o) { if (this == o) @@ -142,14 +159,14 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; RelationOperatorInfo that = (RelationOperatorInfo) o; - return negate == that.negate && isAlignmentOperator == that.isAlignmentOperator && Objects.equals( - typeRegex, that.typeRegex) && direction == that.direction && Objects.equals(targetVersion, - that.targetVersion); + return negate == that.negate && isAlignmentOperator == that.isAlignmentOperator + && optionalMatch == that.optionalMatch && Objects.equals(typeRegex, that.typeRegex) + && direction == that.direction && Objects.equals(targetVersion, that.targetVersion); } @Override public int hashCode() { - return Objects.hash(typeRegex, direction, targetVersion, negate, isAlignmentOperator); + return Objects.hash(typeRegex, direction, targetVersion, negate, isAlignmentOperator, optionalMatch); } /** diff --git a/engine/src/main/java/nl/inl/blacklab/search/textpattern/TextPatternRelationMatch.java b/engine/src/main/java/nl/inl/blacklab/search/textpattern/TextPatternRelationMatch.java index 8fe344140..81cc7e771 100644 --- a/engine/src/main/java/nl/inl/blacklab/search/textpattern/TextPatternRelationMatch.java +++ b/engine/src/main/java/nl/inl/blacklab/search/textpattern/TextPatternRelationMatch.java @@ -85,7 +85,7 @@ private static SpanQueryCaptureRelationsBetweenSpans.Target alignmentTarget(Rela return SpanQueryCaptureRelationsBetweenSpans.Target.get( context.queryInfo(), context.withRelationAnnotation().luceneField(), targetQuery, - targetContext.field().name(), captureName, relationType); + targetContext.field().name(), captureName, relationType, opInfo.isOptionalMatch()); } private BLSpanQuery createRelMatchQuery(QueryExecutionContext context, TextPattern parent, List children) throws InvalidQuery { diff --git a/engine/src/main/java/nl/inl/blacklab/search/textpattern/TextPatternSerializerCql.java b/engine/src/main/java/nl/inl/blacklab/search/textpattern/TextPatternSerializerCql.java index bfa267f2f..827564106 100644 --- a/engine/src/main/java/nl/inl/blacklab/search/textpattern/TextPatternSerializerCql.java +++ b/engine/src/main/java/nl/inl/blacklab/search/textpattern/TextPatternSerializerCql.java @@ -204,7 +204,8 @@ void serialize(TextPatternStruct pattern, StringBuilder b, boolean parenthesizeI String opChar = operatorInfo.isAlignment() ? "=" : "-"; String optTargetVersion = operatorInfo.getTargetVersion() == null ? "" : operatorInfo.getTargetVersion(); b.append(isRoot ? "" : " ").append(optCapture).append(optOperatorPrefix).append(opChar).append(optRegex) - .append(opChar).append(">").append(optTargetVersion).append(" "); + .append(opChar).append(">").append(optTargetVersion).append(operatorInfo.isOptionalMatch() ? "?" : "") + .append(" "); serialize(tp.getTarget(), b, true, insideTokenBrackets); }); diff --git a/engine/src/main/java/nl/inl/blacklab/search/textpattern/TextPatternSerializerJson.java b/engine/src/main/java/nl/inl/blacklab/search/textpattern/TextPatternSerializerJson.java index 75144348a..67342a75d 100644 --- a/engine/src/main/java/nl/inl/blacklab/search/textpattern/TextPatternSerializerJson.java +++ b/engine/src/main/java/nl/inl/blacklab/search/textpattern/TextPatternSerializerJson.java @@ -101,6 +101,7 @@ interface NodeSerializer { private static final String KEY_NAME = "name"; // annotation, function private static final String KEY_NEGATE = "negate"; private static final String KEY_OPERATION = "operation"; // posfilter, ngrams + private static final String KEY_OPTIONAL = "optional"; // relation target private static final String KEY_PARENT = "parent"; private static final String KEY_PRODUCER = "producer"; private static final String KEY_REL_SPAN_MODE = "spanMode"; @@ -240,7 +241,8 @@ KEY_REL_SPAN_MODE, nullIf(tp.getSpanMode().getCode(), "source"), KEY_DIRECTION, nullIf(operatorInfo.getDirection().getCode(), "both"), KEY_CAPTURE, nullIfEmpty(tp.getCaptureAs()), KEY_TARGET_VERSION, nullIfEmpty(operatorInfo.getTargetVersion()), - KEY_ALIGNMENT, nullIf(operatorInfo.isAlignment(), false)); + KEY_ALIGNMENT, nullIf(operatorInfo.isAlignment(), false), + KEY_OPTIONAL, nullIf(operatorInfo.isOptionalMatch(), false)); }); // Repetition @@ -473,7 +475,8 @@ public static TextPatternStruct deserialize(String nodeType, Map SpanQueryRelations.Direction.fromCode((String)args.getOrDefault(KEY_DIRECTION, "both")), (String) args.get(KEY_TARGET_VERSION), (boolean) args.getOrDefault(KEY_NEGATE, false), - (boolean) args.getOrDefault(KEY_ALIGNMENT, false)); // @@@ TODO + (boolean) args.getOrDefault(KEY_ALIGNMENT, false), + (boolean) args.getOrDefault(KEY_OPTIONAL, false)); return new RelationTarget( relOpInfo, (TextPattern) args.get(KEY_CLAUSE), diff --git a/query-parser/src/main/java/nl/inl/blacklab/queryParser/corpusql/GeneratedCorpusQueryLanguageParserTokenManager.java b/query-parser/src/main/java/nl/inl/blacklab/queryParser/corpusql/GeneratedCorpusQueryLanguageParserTokenManager.java index 03b8a5599..3d02aa80f 100644 --- a/query-parser/src/main/java/nl/inl/blacklab/queryParser/corpusql/GeneratedCorpusQueryLanguageParserTokenManager.java +++ b/query-parser/src/main/java/nl/inl/blacklab/queryParser/corpusql/GeneratedCorpusQueryLanguageParserTokenManager.java @@ -393,7 +393,7 @@ private int jjMoveNfa_0(int startState, int curPos) catch(java.io.IOException e) { throw new Error("Internal Error"); } curPos = 0; int startsAt = 0; - jjnewStateCnt = 62; + jjnewStateCnt = 63; int i = 1; jjstateSet[0] = startState; int kind = 0x7fffffff; @@ -413,16 +413,16 @@ private int jjMoveNfa_0(int startState, int curPos) { if (kind > 17) kind = 17; - { jjCheckNAdd(52); } + { jjCheckNAdd(53); } } else if (curChar == 37) - jjstateSet[jjnewStateCnt++] = 51; + jjstateSet[jjnewStateCnt++] = 52; else if (curChar == 39) { jjCheckNAddStates(0, 2); } else if (curChar == 34) { jjCheckNAddStates(3, 5); } else if (curChar == 61) - { jjCheckNAddTwoStates(31, 37); } + { jjCheckNAddTwoStates(31, 38); } else if (curChar == 45) { jjCheckNAddTwoStates(23, 29); } else if (curChar == 33) @@ -565,7 +565,7 @@ else if (curChar == 35) break; case 30: if (curChar == 61) - { jjCheckNAddTwoStates(31, 37); } + { jjCheckNAddTwoStates(31, 38); } break; case 31: if ((0xdfffffffffffffffL & l) != 0L) @@ -588,100 +588,104 @@ else if (curChar == 35) break; if (kind > 12) kind = 12; - { jjCheckNAdd(36); } + { jjCheckNAddTwoStates(36, 37); } break; case 36: if ((0x3ff200000000000L & l) == 0L) break; if (kind > 12) kind = 12; - { jjCheckNAdd(36); } + { jjCheckNAddTwoStates(36, 37); } break; case 37: + if (curChar == 63 && kind > 12) + kind = 12; + break; + case 38: if (curChar == 61) jjstateSet[jjnewStateCnt++] = 35; break; - case 38: + case 39: if (curChar == 34) { jjCheckNAddStates(3, 5); } break; - case 39: + case 40: if ((0xfffffffbffffffffL & l) != 0L) { jjCheckNAddStates(3, 5); } break; - case 41: + case 42: { jjCheckNAddStates(3, 5); } break; - case 42: + case 43: if (curChar == 34 && kind > 13) kind = 13; break; - case 43: + case 44: if (curChar == 39) { jjCheckNAddStates(0, 2); } break; - case 44: + case 45: if ((0xffffff7fffffffffL & l) != 0L) { jjCheckNAddStates(0, 2); } break; - case 46: + case 47: { jjCheckNAddStates(0, 2); } break; - case 47: + case 48: if (curChar == 39 && kind > 14) kind = 14; break; - case 49: + case 50: if ((0x3ff200000000000L & l) == 0L) break; if (kind > 15) kind = 15; - jjstateSet[jjnewStateCnt++] = 49; + jjstateSet[jjnewStateCnt++] = 50; break; - case 50: + case 51: if (curChar == 37) - jjstateSet[jjnewStateCnt++] = 51; + jjstateSet[jjnewStateCnt++] = 52; break; - case 52: + case 53: if ((0x3ff000000000000L & l) == 0L) break; if (kind > 17) kind = 17; - { jjCheckNAdd(52); } + { jjCheckNAdd(53); } break; - case 54: + case 55: if ((0x3ff000000000000L & l) != 0L) { jjAddStates(21, 22); } break; - case 55: + case 56: if (curChar == 61) - { jjCheckNAdd(56); } + { jjCheckNAdd(57); } break; - case 56: + case 57: if ((0x3ff000000000000L & l) == 0L) break; if (kind > 18) kind = 18; - { jjCheckNAddTwoStates(56, 57); } + { jjCheckNAddTwoStates(57, 58); } break; - case 57: + case 58: if (curChar == 44) - { jjCheckNAdd(58); } + { jjCheckNAdd(59); } break; - case 58: + case 59: if ((0x3ff000000000000L & l) != 0L) - { jjCheckNAddTwoStates(58, 59); } + { jjCheckNAddTwoStates(59, 60); } break; - case 59: + case 60: if (curChar == 61) - { jjCheckNAdd(60); } + { jjCheckNAdd(61); } break; - case 60: + case 61: if ((0x3ff000000000000L & l) == 0L) break; if (kind > 18) kind = 18; - { jjCheckNAddTwoStates(57, 60); } + { jjCheckNAddTwoStates(58, 61); } break; default : break; } @@ -699,10 +703,10 @@ else if (curChar < 128) { if (kind > 15) kind = 15; - { jjCheckNAdd(49); } + { jjCheckNAdd(50); } } else if (curChar == 64) - { jjCheckNAdd(54); } + { jjCheckNAdd(55); } else if (curChar == 94) jjstateSet[jjnewStateCnt++] = 12; if ((0x100000001000L & l) != 0L) @@ -764,72 +768,72 @@ else if (curChar == 94) break; if (kind > 12) kind = 12; - jjstateSet[jjnewStateCnt++] = 36; + { jjAddStates(25, 26); } break; - case 39: + case 40: if ((0xffffffffefffffffL & l) != 0L) { jjCheckNAddStates(3, 5); } break; - case 40: + case 41: if (curChar == 92) - jjstateSet[jjnewStateCnt++] = 41; + jjstateSet[jjnewStateCnt++] = 42; break; - case 41: + case 42: { jjCheckNAddStates(3, 5); } break; - case 44: + case 45: if ((0xffffffffefffffffL & l) != 0L) { jjCheckNAddStates(0, 2); } break; - case 45: + case 46: if (curChar == 92) - jjstateSet[jjnewStateCnt++] = 46; + jjstateSet[jjnewStateCnt++] = 47; break; - case 46: + case 47: { jjCheckNAddStates(0, 2); } break; - case 48: case 49: + case 50: if ((0x7fffffe87fffffeL & l) == 0L) break; if (kind > 15) kind = 15; - { jjCheckNAdd(49); } + { jjCheckNAdd(50); } break; - case 51: + case 52: if ((0x101800001018L & l) == 0L) break; if (kind > 16) kind = 16; - jjstateSet[jjnewStateCnt++] = 51; + jjstateSet[jjnewStateCnt++] = 52; break; - case 53: + case 54: if (curChar == 64) - { jjCheckNAdd(54); } + { jjCheckNAdd(55); } break; - case 54: + case 55: if ((0x7fffffe87fffffeL & l) != 0L) - { jjCheckNAddTwoStates(54, 55); } + { jjCheckNAddTwoStates(55, 56); } break; - case 56: + case 57: if ((0x7fffffe87fffffeL & l) == 0L) break; if (kind > 18) kind = 18; - { jjCheckNAddTwoStates(56, 57); } + { jjCheckNAddTwoStates(57, 58); } break; - case 58: + case 59: if ((0x7fffffe87fffffeL & l) != 0L) - { jjAddStates(25, 26); } + { jjAddStates(27, 28); } break; - case 60: + case 61: if ((0x7fffffe87fffffeL & l) == 0L) break; if (kind > 18) kind = 18; - { jjCheckNAddTwoStates(57, 60); } + { jjCheckNAddTwoStates(58, 61); } break; - case 61: + case 62: if ((0x100000001000L & l) != 0L) { jjAddStates(23, 24); } break; @@ -878,13 +882,13 @@ else if (curChar == 94) if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { jjCheckNAddStates(18, 20); } break; - case 39: - case 41: + case 40: + case 42: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { jjCheckNAddStates(3, 5); } break; - case 44: - case 46: + case 45: + case 47: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { jjCheckNAddStates(0, 2); } break; @@ -899,7 +903,7 @@ else if (curChar == 94) kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 62 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 63 - (jjnewStateCnt = startsAt))) break; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { break; } @@ -925,8 +929,8 @@ else if (jjmatchedPos == strPos && jjmatchedKind > strKind) return toRet; } static final int[] jjnextStates = { - 44, 45, 47, 39, 40, 42, 1, 2, 4, 6, 7, 10, 14, 15, 19, 24, - 25, 29, 32, 33, 37, 54, 55, 38, 43, 58, 59, + 45, 46, 48, 40, 41, 43, 1, 2, 4, 6, 7, 10, 14, 15, 19, 24, + 25, 29, 32, 33, 38, 55, 56, 39, 44, 36, 37, 59, 60, }; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) { @@ -1109,7 +1113,7 @@ private void ReInitRounds() { int i; jjround = 0x80000001; - for (i = 62; i-- > 0;) + for (i = 63; i-- > 0;) jjrounds[i] = 0x80000000; } @@ -1145,8 +1149,8 @@ public void SwitchTo(int lexState) }; protected JavaCharStream input_stream; - private final int[] jjrounds = new int[62]; - private final int[] jjstateSet = new int[2 * 62]; + private final int[] jjrounds = new int[63]; + private final int[] jjstateSet = new int[2 * 63]; protected int curChar; diff --git a/query-parser/src/main/javacc/nl/inl/blacklab/queryParser/corpusql/cql.jj b/query-parser/src/main/javacc/nl/inl/blacklab/queryParser/corpusql/cql.jj index 7ffb55ec6..8184db3c2 100644 --- a/query-parser/src/main/javacc/nl/inl/blacklab/queryParser/corpusql/cql.jj +++ b/query-parser/src/main/javacc/nl/inl/blacklab/queryParser/corpusql/cql.jj @@ -99,7 +99,7 @@ TOKEN [IGNORE_CASE]: /* valid are e.g. --> -root-> -.*-> -a-b-> (and same preceded by ^ or !) */ | ", " ", "'", "\"", "]"] )* )? "->" (["A"-"Z","a"-"z","_","-","0"-"9"])* > | ", " ", "'", "\"", "]"] )* )? "->" (["A"-"Z","a"-"z","_","-","0"-"9"])* > -| ", " ", "'", "\"", "]"] )* )? "=>" (["A"-"Z","a"-"z","_","-","0"-"9"])* > +| ", " ", "'", "\"", "]"] )* )? "=>" (["A"-"Z","a"-"z","_","-","0"-"9"])* ("?")? > | | | diff --git a/site/docs/guide/corpus-query-language.md b/site/docs/guide/corpus-query-language.md index 87eda7e11..c35b2efc2 100644 --- a/site/docs/guide/corpus-query-language.md +++ b/site/docs/guide/corpus-query-language.md @@ -534,11 +534,11 @@ Assuming your data has both sentence and word alignments, and you want to find a This should find aligning English and Dutch sentences, including any word alignments between words in those sentences. You can also filter by alignment type, as we'll show later. -::: details If no alignments are found +::: details Required versus optional alignment -The `==>` operator will try to find alignments, but won't skip a hit if it couldn't find any aligments for that hit. Instead, the hit will simply be reported without any alignment information. +The `==>` operator will _require_ that an alignment exists. If you wish to see all hits on the left side of the `==>nl` regardless of whether any aligments to the right side can be found, use `==>nl?`. -For example, if you're searching for translations of `cat` to Dutch, you will see both English `cat` hits where the translation to Dutch was found, and `cat` hits where it wasn't. +For example, if you're searching for translations of `cat` to Dutch, with `==>nl` you will _only_ see instances where `cat` is aligned to a Dutch word; on the other hand, with `==>nl?` you will see both English `cat` hits where the translation to Dutch was found, and `cat` hits where it wasn't. :::