Skip to content

Commit

Permalink
Remove lowercase_expanded_terms and locale from query-parser opti…
Browse files Browse the repository at this point in the history
…ons. (#20208)

Lucene 6.2 introduces the new `Analyzer.normalize` API, which allows to apply
only character-level normalization such as lowercasing or accent folding, which
is exactly what is needed to process queries that operate on partial terms such
as `prefix`, `wildcard` or `fuzzy` queries. As a consequence, the
`lowercase_expanded_terms` option is not necessary anymore. Furthermore, the
`locale` option was only needed in order to know how to perform the lowercasing,
so this one can be removed as well.

Closes #9978
  • Loading branch information
jpountz committed Nov 2, 2016
1 parent dbdc9df commit 14d2b12
Show file tree
Hide file tree
Showing 29 changed files with 193 additions and 470 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.analyzing.AnalyzingQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
Expand All @@ -34,6 +35,7 @@
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.common.lucene.search.Queries;
Expand All @@ -42,6 +44,7 @@
import org.elasticsearch.index.mapper.LegacyDateFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.StringFieldType;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.support.QueryParsers;

Expand All @@ -63,7 +66,7 @@
* Also breaks fields with [type].[name] into a boolean query that must include the type
* as well as the query on the name.
*/
public class MapperQueryParser extends QueryParser {
public class MapperQueryParser extends AnalyzingQueryParser {

public static final Map<String, FieldQueryExtension> FIELD_QUERY_EXTENSIONS;

Expand Down Expand Up @@ -99,11 +102,10 @@ public void reset(QueryParserSettings settings) {
setAutoGeneratePhraseQueries(settings.autoGeneratePhraseQueries());
setMaxDeterminizedStates(settings.maxDeterminizedStates());
setAllowLeadingWildcard(settings.allowLeadingWildcard());
setLowercaseExpandedTerms(settings.lowercaseExpandedTerms());
setLowercaseExpandedTerms(false);
setPhraseSlop(settings.phraseSlop());
setDefaultOperator(settings.defaultOperator());
setFuzzyPrefixLength(settings.fuzzyPrefixLength());
setLocale(settings.locale());
setSplitOnWhitespace(settings.splitOnWhitespace());
}

Expand Down Expand Up @@ -330,21 +332,20 @@ private Query getRangeQuerySingle(String field, String part1, String part2,
boolean startInclusive, boolean endInclusive, QueryShardContext context) {
currentFieldType = context.fieldMapper(field);
if (currentFieldType != null) {
if (lowercaseExpandedTerms && currentFieldType.tokenized()) {
part1 = part1 == null ? null : part1.toLowerCase(locale);
part2 = part2 == null ? null : part2.toLowerCase(locale);
}

try {
BytesRef part1Binary = part1 == null ? null : getAnalyzer().normalize(field, part1);
BytesRef part2Binary = part2 == null ? null : getAnalyzer().normalize(field, part2);
Query rangeQuery;
if (currentFieldType instanceof LegacyDateFieldMapper.DateFieldType && settings.timeZone() != null) {
LegacyDateFieldMapper.DateFieldType dateFieldType = (LegacyDateFieldMapper.DateFieldType) this.currentFieldType;
rangeQuery = dateFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, settings.timeZone(), null, context);
rangeQuery = dateFieldType.rangeQuery(part1Binary, part2Binary,
startInclusive, endInclusive, settings.timeZone(), null, context);
} else if (currentFieldType instanceof DateFieldMapper.DateFieldType && settings.timeZone() != null) {
DateFieldMapper.DateFieldType dateFieldType = (DateFieldMapper.DateFieldType) this.currentFieldType;
rangeQuery = dateFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, settings.timeZone(), null, context);
rangeQuery = dateFieldType.rangeQuery(part1Binary, part2Binary,
startInclusive, endInclusive, settings.timeZone(), null, context);
} else {
rangeQuery = currentFieldType.rangeQuery(part1, part2, startInclusive, endInclusive, context);
rangeQuery = currentFieldType.rangeQuery(part1Binary, part2Binary, startInclusive, endInclusive, context);
}
return rangeQuery;
} catch (RuntimeException e) {
Expand All @@ -358,9 +359,6 @@ private Query getRangeQuerySingle(String field, String part1, String part2,
}

protected Query getFuzzyQuery(String field, String termStr, String minSimilarity) throws ParseException {
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(locale);
}
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
Expand Down Expand Up @@ -399,8 +397,9 @@ private Query getFuzzyQuerySingle(String field, String termStr, String minSimila
currentFieldType = context.fieldMapper(field);
if (currentFieldType != null) {
try {
return currentFieldType.fuzzyQuery(termStr, Fuzziness.build(minSimilarity),
fuzzyPrefixLength, settings.fuzzyMaxExpansions(), FuzzyQuery.defaultTranspositions);
BytesRef term = termStr == null ? null : getAnalyzer().normalize(field, termStr);
return currentFieldType.fuzzyQuery(term, Fuzziness.build(minSimilarity),
getFuzzyPrefixLength(), settings.fuzzyMaxExpansions(), FuzzyQuery.defaultTranspositions);
} catch (RuntimeException e) {
if (settings.lenient()) {
return null;
Expand All @@ -423,9 +422,6 @@ protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLeng

@Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(locale);
}
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
Expand Down Expand Up @@ -471,8 +467,8 @@ private Query getPrefixQuerySingle(String field, String termStr) throws ParseExc
setAnalyzer(context.getSearchAnalyzer(currentFieldType));
}
Query query = null;
if (currentFieldType.tokenized() == false) {
query = currentFieldType.prefixQuery(termStr, multiTermRewriteMethod, context);
if (currentFieldType instanceof StringFieldType == false) {
query = currentFieldType.prefixQuery(termStr, getMultiTermRewriteMethod(), context);
}
if (query == null) {
query = getPossiblyAnalyzedPrefixQuery(currentFieldType.name(), termStr);
Expand Down Expand Up @@ -590,9 +586,6 @@ protected Query getWildcardQuery(String field, String termStr) throws ParseExcep
return FIELD_QUERY_EXTENSIONS.get(ExistsFieldQueryExtension.NAME).query(context, actualField);
}
}
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(locale);
}
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
Expand Down Expand Up @@ -639,9 +632,8 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
setAnalyzer(context.getSearchAnalyzer(currentFieldType));
}
indexedNameField = currentFieldType.name();
return getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr);
}
return getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr);
return super.getWildcardQuery(indexedNameField, termStr);
} catch (RuntimeException e) {
if (settings.lenient()) {
return null;
Expand All @@ -652,75 +644,8 @@ private Query getWildcardQuerySingle(String field, String termStr) throws ParseE
}
}

private Query getPossiblyAnalyzedWildcardQuery(String field, String termStr) throws ParseException {
if (!settings.analyzeWildcard()) {
return super.getWildcardQuery(field, termStr);
}
boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
StringBuilder aggStr = new StringBuilder();
StringBuilder tmp = new StringBuilder();
for (int i = 0; i < termStr.length(); i++) {
char c = termStr.charAt(i);
if (c == '?' || c == '*') {
if (isWithinToken) {
try (TokenStream source = getAnalyzer().tokenStream(field, tmp.toString())) {
source.reset();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
if (source.incrementToken()) {
String term = termAtt.toString();
if (term.length() == 0) {
// no tokens, just use what we have now
aggStr.append(tmp);
} else {
aggStr.append(term);
}
} else {
// no tokens, just use what we have now
aggStr.append(tmp);
}
} catch (IOException e) {
aggStr.append(tmp);
}
tmp.setLength(0);
}
isWithinToken = false;
aggStr.append(c);
} else {
tmp.append(c);
isWithinToken = true;
}
}
if (isWithinToken) {
try {
try (TokenStream source = getAnalyzer().tokenStream(field, tmp.toString())) {
source.reset();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
if (source.incrementToken()) {
String term = termAtt.toString();
if (term.length() == 0) {
// no tokens, just use what we have now
aggStr.append(tmp);
} else {
aggStr.append(term);
}
} else {
// no tokens, just use what we have now
aggStr.append(tmp);
}
}
} catch (IOException e) {
aggStr.append(tmp);
}
}

return super.getWildcardQuery(field, aggStr.toString());
}

@Override
protected Query getRegexpQuery(String field, String termStr) throws ParseException {
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase(locale);
}
Collection<String> fields = extractMultiFields(field);
if (fields != null) {
if (fields.size() == 1) {
Expand Down Expand Up @@ -768,7 +693,7 @@ private Query getRegexpQuerySingle(String field, String termStr) throws ParseExc
Query query = null;
if (currentFieldType.tokenized() == false) {
query = currentFieldType.regexpQuery(termStr, RegExp.ALL,
maxDeterminizedStates, multiTermRewriteMethod, context);
getMaxDeterminizedStates(), getMultiTermRewriteMethod(), context);
}
if (query == null) {
query = super.getRegexpQuery(field, termStr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import org.elasticsearch.common.unit.Fuzziness;
import org.joda.time.DateTimeZone;

import java.util.Locale;
import java.util.Map;

/**
Expand Down Expand Up @@ -53,12 +52,8 @@ public class QueryParserSettings {

private boolean analyzeWildcard;

private boolean lowercaseExpandedTerms;

private boolean enablePositionIncrements;

private Locale locale;

private Fuzziness fuzziness;
private int fuzzyPrefixLength;
private int fuzzyMaxExpansions;
Expand Down Expand Up @@ -137,14 +132,6 @@ public void allowLeadingWildcard(boolean allowLeadingWildcard) {
this.allowLeadingWildcard = allowLeadingWildcard;
}

public boolean lowercaseExpandedTerms() {
return lowercaseExpandedTerms;
}

public void lowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
this.lowercaseExpandedTerms = lowercaseExpandedTerms;
}

public boolean enablePositionIncrements() {
return enablePositionIncrements;
}
Expand Down Expand Up @@ -269,14 +256,6 @@ public void useDisMax(boolean useDisMax) {
this.useDisMax = useDisMax;
}

public void locale(Locale locale) {
this.locale = locale;
}

public Locale locale() {
return this.locale;
}

public void timeZone(DateTimeZone timeZone) {
this.timeZone = timeZone;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.function.LongSupplier;
import java.util.function.Supplier;
import java.util.stream.Collectors;

Expand Down
Loading

0 comments on commit 14d2b12

Please sign in to comment.