Skip to content

Commit

Permalink
Change default index type to integrated.
Browse files Browse the repository at this point in the history
This new index type has been thoroughly tested by this time and should
work without issues. If necessary, you can still use the older "external files"
index type by passing --index-type external to the IndexTool, or by setting
environment variable BLACKLAB_FEATURE_defaultIndexType to "external".
  • Loading branch information
jan-niestadt committed Jul 1, 2024
1 parent d99ac39 commit 165b7f4
Show file tree
Hide file tree
Showing 10 changed files with 44 additions and 24 deletions.
24 changes: 18 additions & 6 deletions engine/src/main/java/nl/inl/blacklab/search/BlackLab.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,12 @@ public final class BlackLab {
/** Global settings are read from file and applied to the different parts of BL once. */
private static boolean globalSettingsApplied = false;

public static final String FEATURE_INTEGRATE_EXTERNAL_FILES = "integrateExternalFiles";
/** Controls what BlackLab's default index type is. If not present, will default to the new
* integrated index. Set to 'external' to use the legacy index with external forward index
* that was the default in BlackLab 3.x. Used for testing.
*/
public static final String FEATURE_DEFAULT_INDEX_TYPE = "defaultIndexType";

private static RuleBasedCollator fieldValueSortCollator = null;

/**
Expand Down Expand Up @@ -370,15 +375,22 @@ public synchronized static BlackLabConfig config() {
return blackLabConfig;
}

/**
* Get the value of a feature flag.
*
* Feature flags can be set in the environment (BLACKLAB_FEATURE_<flagName>) or in the
* blacklab[-server].yaml configuration file under the 'featureFlags' key.
*
* Used for testing both index types.
*
* @param name name of the feature flag
* @return value of the feature flag, or an empty string if not set
*/
public static String featureFlag(String name) {
String value = System.getenv("BLACKLAB_FEATURE_" + name);
if (value == null)
value = config().getFeatureFlags().get(name);
return value;
}

public static boolean isFeatureEnabled(String name) {
return Boolean.parseBoolean(featureFlag(name));
return value == null ? "" : value;
}

/**
Expand Down
12 changes: 8 additions & 4 deletions engine/src/main/java/nl/inl/blacklab/search/BlackLabEngine.java
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,10 @@ public synchronized void close() {
}
}

public BlackLabIndexWriter openForWriting(String indexName, IndexReader reader, ConfigInputFormat format) throws ErrorOpeningIndex {
public BlackLabIndexWriter openForWriting(String indexName, IndexReader reader, ConfigInputFormat format,
IndexType indexType) throws ErrorOpeningIndex {
if (indexType != IndexType.INTEGRATED)
throw new RuntimeException("This version of the method only works with integrated indexes");
return new BlackLabIndexIntegrated(indexName, this, reader, null, true, false, format);
}

Expand All @@ -280,9 +283,10 @@ public BlackLabIndexWriter openForWriting(String indexName, IndexReader reader,
* @return the default index type
*/
public IndexType getDefaultIndexType() {
return BlackLab.isFeatureEnabled(BlackLab.FEATURE_INTEGRATE_EXTERNAL_FILES) ?
IndexType.INTEGRATED :
IndexType.EXTERNAL_FILES;
String defaultIndexType = BlackLab.featureFlag(BlackLab.FEATURE_DEFAULT_INDEX_TYPE);
return defaultIndexType.equalsIgnoreCase("external") ?
IndexType.EXTERNAL_FILES :
IndexType.INTEGRATED;
}

public BlackLabIndex open(File indexDir) throws ErrorOpeningIndex {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ public void testStandoffSpans() throws IOException, InvalidQuery {

@After
public void teardown() {
testIndex.close();
testDir.close();
if (testIndex != null)
testIndex.close();
if (testDir != null)
testDir.close();
}
}
4 changes: 2 additions & 2 deletions index-corpus.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ if [ $# -lt 3 ]; then
echo ' - BLACKLAB_VERSION (optional) the BlackLab Docker image to use. Defaults to "latest",'
echo ' but it is recommended to use a specific tag, e.g. "4-alpha1".'
echo ' - INDEXTOOL_OPTIONS (optional) options to pass to IndexTool.'
echo ' Defaults to "--threads 4 --index-type integrated".'
echo ' Defaults to "--threads 4".'
echo
echo 'By default, a Java heap size of 6G is used. If you need more, set the environment'
echo 'variable BL_JAVA_HEAP_MEM to the desired value (e.g. "10G").'
Expand All @@ -47,7 +47,7 @@ BL_CORPUS_INPUT_DIR=$(realpath $2)
BL_CORPUS_FORMAT="$3"
BL_CORPUS_FORMAT_FILE=$(realpath "$BL_CORPUS_FORMAT")
BL_VERSION="${4:-latest}"
BL_INDEXTOOL_OPTIONS="${5:---threads 4 --index-type integrated}"
BL_INDEXTOOL_OPTIONS="${5:---threads 4}"

# Base names to use inside the container
BL_CORPUS_NAME=$(basename $BL_CORPUS_TARGET_DIR)
Expand Down
4 changes: 2 additions & 2 deletions site/docs/guide/how-to-configure-indexing.md
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ Indexing and searching relations will be supported from BlackLab 4.0 (and curren

It is also possible to index relations (such as dependency relations) using [standoff annotations](#standoff-annotations). Aside from using the built-in `conll-u` DocIndexer, or implementing your own DocIndexer, this is currently the only way to index relations in BlackLab. Standoff annotations make the most sense as relations don't just apply to a span of words, but connect two different words (or word groups).

Please note that you must use the new integrated index format to index relations. You can do this by passing `--index-type integrated` to `IndexTool`. Alternatively, you may set the environment variable `BLACKLAB_FEATURE_integrateExternalFiles` to `true` before indexing (this changes the default index type to integrated). Future versions of BlackLab will default to the integrated index type.
Please note that the relations features only work with the newer integrated index type. This type is the default now, so you don't need to pass any extra options to BlackLab.

```xml
<doc>
Expand Down Expand Up @@ -653,7 +653,7 @@ The above would allow you to search for `_ -nsubj-> "I"` to find "I support", wi
### Indexing parallel corpora

::: tip Supported from v4.0
Indexing and searching parallel corpoora will be supported from BlackLab 4.0 (and current development snapshots).
Indexing and searching parallel corpora will be supported from BlackLab 4.0 (and current development snapshots).
:::

TODO: how to index parallel corpus
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import nl.inl.blacklab.indexers.config.ConfigInputFormat;
import nl.inl.blacklab.indexers.config.InputFormatReader;
import nl.inl.blacklab.search.BlackLab;
import nl.inl.blacklab.search.BlackLabIndex;
import nl.inl.blacklab.search.BlackLabIndexWriter;
import nl.inl.blacklab.search.indexmetadata.AnnotatedField;
import nl.inl.blacklab.search.indexmetadata.AnnotatedFieldNameUtil;
Expand Down Expand Up @@ -70,7 +71,8 @@ public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stre

String fileName = params.get("bl.filename");
String indexName = req.getCore().getName();
try (BlackLabIndexWriter index = BlackLab.implicitInstance().openForWriting(indexName, reader, formatConfig)) {
try (BlackLabIndexWriter index = BlackLab.implicitInstance().openForWriting(indexName, reader, formatConfig,
BlackLabIndex.IndexType.INTEGRATED)) {
Indexer indexer = Indexer.create(index, paramFormat);
InputStream is = stream.getStream();

Expand Down
2 changes: 1 addition & 1 deletion test/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ services:
#- JPDA_ADDRESS=*:5005
#- JPDA_TRANSPORT=dt_socket
# Pass overridden feature flag(s) directly to the container (for testing both values)
- BLACKLAB_FEATURE_integrateExternalFiles
- BLACKLAB_FEATURE_defaultIndexType

volumes:
# Test data to index
Expand Down
2 changes: 1 addition & 1 deletion test/only-integrated.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export DOCKER_BUILDKIT=1
# Re-run to test the other index format as well
echo === Testing integrated index format...
$COMPOSE build testserver "$SERVICE_NAME"
export BLACKLAB_FEATURE_integrateExternalFiles=true
export BLACKLAB_FEATURE_defaultIndexType=integrated
export INDEX_TYPE=integrated
$COMPOSE up -d --force-recreate testserver # (--force-recreate to avoid error 'network not found')
$COMPOSE run --rm "$SERVICE_NAME"
Expand Down
4 changes: 2 additions & 2 deletions test/testrunner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export DOCKER_BUILDKIT=1
# Build and run BlackLab Server
echo === Testing classic index format...
$COMPOSE build testserver "$SERVICE_NAME"
export BLACKLAB_FEATURE_integrateExternalFiles=false
export BLACKLAB_FEATURE_defaultIndexType=external
$COMPOSE up -d --force-recreate testserver # (--force-recreate to avoid error 'network not found')
$COMPOSE run --rm "$SERVICE_NAME"
$COMPOSE stop testserver # (stop then rm -v instead of down -v, otherwise we get an error about the volume being in use)
Expand All @@ -42,7 +42,7 @@ $COMPOSE rm -fv testserver
#----------------------------------------------------------
# Re-run to test the other index format as well
echo === Testing integrated index format...
export BLACKLAB_FEATURE_integrateExternalFiles=true
export BLACKLAB_FEATURE_defaultIndexType=integrated
export INDEX_TYPE=integrated
$COMPOSE up -d testserver
$COMPOSE run --rm "$SERVICE_NAME"
Expand Down
6 changes: 3 additions & 3 deletions tools/src/main/java/nl/inl/blacklab/tools/IndexTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,15 @@ public static void main(String[] args) throws ErrorOpeningIndex, ParseException
switch (name) {
case "index-type":
if (i + 1 == args.length || !List.of("integrated", "external").contains(args[i + 1].toLowerCase())) {
System.err.println("--index-type needs a parameter: integrated or external.");
System.err.println("--index-type needs a parameter: integrated (the default) or external (legacy index type).");
usage();
return;
}
indexType = args[i + 1].equalsIgnoreCase("integrated") ? IndexType.INTEGRATED : IndexType.EXTERNAL_FILES;
i++;
break;
case "integrate-external-files":
// NOTE: deprecated, use --index-type integrated instead
// NOTE: deprecated; this is the default (or use --index-type external to use the legacy variant)
if (i + 1 == args.length || !List.of("true", "false").contains(args[i + 1].toLowerCase())) {
System.err.println("--integrate-external-files needs a parameter: true or false.");
usage();
Expand Down Expand Up @@ -435,7 +435,7 @@ private static void usage() {
+ " --format-dir <d> Look in directory <d> for formats (i.e. .blf.yaml files)\n"
+ " --nothreads Disable multithreaded indexing (enabled by default)\n"
+ " --threads <n> Number of threads to use\n"
+ " --index-type <t> Set the index type, external (old) or integrated (new)\n"
+ " --index-type <t> Set the index type, integrated (new, default) or external (legacy)\n"
+ " --create-empty Create an empty index (ignore inputdir param)\n"
+ "\n"
+ "Available input format configurations:");
Expand Down

0 comments on commit 165b7f4

Please sign in to comment.