Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EQL: Sequences will now support nano-timestamps #76953

Merged
merged 11 commits into from
Sep 1, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*/
package org.elasticsearch.test.eql;

import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.instanceOf;
import static org.junit.Assert.assertThat;

Expand All @@ -18,6 +19,7 @@
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.function.Consumer;

import org.apache.http.HttpHost;
import org.apache.logging.log4j.LogManager;
Expand All @@ -44,20 +46,21 @@
* Loads EQL dataset into ES.
*
* Checks for predefined indices:
* - endgame-140 - for existing data
* - extra - additional data
* - endgame-140 - for existing data
* - endgame-140-nanos - same as endgame-140, but with nano-precision timestamps
* - extra - additional data
*
* While the loader could be made generic, the queries are bound to each index and generalizing that would make things way too complicated.
*/
public class DataLoader {
public static final String TEST_INDEX = "endgame-140";
public static final String TEST_EXTRA_INDEX = "extra";
public static final String DATE_NANOS_INDEX = "eql_date_nanos";
public static final String TEST_NANOS_INDEX = "endgame-140-nanos";

private static final Map<String, String[]> replacementPatterns = Collections.unmodifiableMap(getReplacementPatterns());

private static final long FILETIME_EPOCH_DIFF = 11644473600000L;
private static final long FILETIME_ONE_MILLISECOND = 10 * 1000;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 for the explanations

private static final long FILETIME_EPOCH_DIFF = 11644473600000L; // millis delta from the start of year 1601 (Windows filetime) to 1970
private static final long FILETIME_ONE_MILLISECOND = 10 * 1000; // Windows filetime is in 100-nanoseconds ticks

// runs as java main
private static boolean main = false;
Expand Down Expand Up @@ -86,33 +89,34 @@ public static void loadDatasetIntoEs(RestHighLevelClient client,
//
// Main Index
//
load(client, TEST_INDEX, true, p);
load(client, TEST_INDEX, null, DataLoader::timestampToUnixMillis, p);
//
// Aux Index
//
load(client, TEST_EXTRA_INDEX, false, p);
load(client, TEST_EXTRA_INDEX, null, null, p);
//
// Date_Nanos index
//
// The data for this index are identical to the endgame-140.data with only the values for @timestamp changed.
// The data for this index is loaded from the same endgame-140.data sample, only having the mapping for @timestamp changed: the
// chosen Windows filetime timestamps (2017+) can coincidentally also be readily used as nano-resolution unix timestamps (1973+).
// There are mixed values with and without nanos precision so that the filtering is properly tested for both cases.
load(client, DATE_NANOS_INDEX, false, p);
load(client, TEST_NANOS_INDEX, TEST_INDEX, DataLoader::timestampToUnixNanos, p);
}

private static void load(RestHighLevelClient client, String indexName, boolean winFileTime,
private static void load(RestHighLevelClient client, String indexName, String dataName, Consumer<Map<String, Object>> datasetTransform,
CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p) throws IOException {
String name = "/data/" + indexName + ".mapping";
URL mapping = DataLoader.class.getResource(name);
if (mapping == null) {
throw new IllegalArgumentException("Cannot find resource " + name);
}
name = "/data/" + indexName + ".data";
name = "/data/" + (dataName != null ? dataName : indexName) + ".data";
URL data = DataLoader.class.getResource(name);
if (data == null) {
throw new IllegalArgumentException("Cannot find resource " + name);
}
createTestIndex(client, indexName, readMapping(mapping));
loadData(client, indexName, winFileTime, data, p);
loadData(client, indexName, datasetTransform, data, p);
}

private static void createTestIndex(RestHighLevelClient client, String indexName, String mapping) throws IOException {
Expand Down Expand Up @@ -147,8 +151,8 @@ private static CharSequence randomOf(String...values) {
}

@SuppressWarnings("unchecked")
private static void loadData(RestHighLevelClient client, String indexName, boolean winfileTime, URL resource,
CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p)
private static void loadData(RestHighLevelClient client, String indexName, Consumer<Map<String, Object>> datasetTransform,
URL resource, CheckedBiFunction<XContent, InputStream, XContentParser, IOException> p)
throws IOException {
BulkRequest bulk = new BulkRequest();
bulk.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
Expand All @@ -158,8 +162,8 @@ private static void loadData(RestHighLevelClient client, String indexName, boole
for (Object item : list) {
assertThat(item, instanceOf(Map.class));
Map<String, Object> entry = (Map<String, Object>) item;
if (winfileTime) {
transformDataset(entry);
if (datasetTransform != null) {
datasetTransform.accept(entry);
}
bulk.add(new IndexRequest(indexName).source(entry, XContentType.JSON));
}
Expand All @@ -175,14 +179,28 @@ private static void loadData(RestHighLevelClient client, String indexName, boole
}
}

private static void transformDataset(Map<String, Object> entry) {
private static void timestampToUnixMillis(Map<String, Object> entry) {
Object object = entry.get("timestamp");
assertThat(object, instanceOf(Long.class));
Long ts = (Long) object;
// currently this is windows filetime
entry.put("@timestamp", winFileTimeToUnix(ts));
}

private static void timestampToUnixNanos(Map<String, Object> entry) {
Object object = entry.get("timestamp");
assertThat(object, instanceOf(Long.class));
// interpret the value as nanos since the unix epoch
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The chosen Windows filetime timestamps (2017+) can coincidently also be readily used as nano-resolution unix timestamps (1973+). Which warrants conveniently using the same endgame-140.data file also the nano tests.

String timestamp = object.toString();
assertThat(timestamp.length(), greaterThan(12));
// avoid double approximations and BigDecimal ops
String millis = timestamp.substring(0, 12);
String milliFraction = timestamp.substring(12);
// strip the fractions right away if not actually present
entry.put("@timestamp", milliFraction.equals("000000") ? millis : millis + "." + milliFraction);
entry.put("timestamp", ((long) object)/1_000_000L);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For date_nanos field type needs to either provide millis or an ISO9601 format. The value of the object is however holding the nanos.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant why do you need timestamp to hold the same value as it did before, but without the nanos in it. Is timestamp actually used in tests?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just like for the existing millis tests, timestamp is populated but not used in the qa tests. Having it with a meaningful value allows however quick millis vs nanos comparison tests though (used mostly for troubleshooting, only changing the timestamp_field).

}

public static long winFileTimeToUnix(final long filetime) {
long ts = (filetime / FILETIME_ONE_MILLISECOND);
return ts - FILETIME_EPOCH_DIFF;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,22 @@

package org.elasticsearch.test.eql;

import static org.elasticsearch.test.eql.DataLoader.DATE_NANOS_INDEX;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;

import java.util.HashSet;
import java.util.List;

import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import static org.elasticsearch.test.eql.DataLoader.TEST_NANOS_INDEX;

public abstract class EqlDateNanosSpecTestCase extends BaseEqlSpecTestCase {

@ParametersFactory(shuffle = false, argumentFormatting = PARAM_FORMATTING)
public static List<Object[]> readTestSpecs() throws Exception {
return asArray(EqlSpecLoader.load("/test_queries_date_nanos.toml", new HashSet<>()));
return asArray(EqlSpecLoader.load("/test_queries_date_nanos.toml", "/test_queries.toml"));
}

// constructor for "local" rest tests
public EqlDateNanosSpecTestCase(String query, String name, long[] eventIds) {
this(DATE_NANOS_INDEX, query, name, eventIds);
this(TEST_NANOS_INDEX, query, name, eventIds);
}

// constructor for multi-cluster tests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;

import java.util.HashSet;
import java.util.List;

import static org.elasticsearch.test.eql.DataLoader.TEST_EXTRA_INDEX;
Expand All @@ -18,7 +17,7 @@ public abstract class EqlExtraSpecTestCase extends BaseEqlSpecTestCase {

@ParametersFactory(shuffle = false, argumentFormatting = PARAM_FORMATTING)
public static List<Object[]> readTestSpecs() throws Exception {
return asArray(EqlSpecLoader.load("/test_extra.toml", new HashSet<>()));
return asArray(EqlSpecLoader.load("/test_extra.toml"));
}

// constructor for "local" rest tests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

Expand All @@ -29,6 +30,15 @@ public static List<EqlSpec> load(String path, Set<String> uniqueTestNames) throw
}
}

public static List<EqlSpec> load(String ...paths) throws Exception {
Set<String> uniqueTestNames = new HashSet<>();
List<EqlSpec> specs = new ArrayList<>();
for (String path: paths) {
specs.addAll(load(path, uniqueTestNames));
}
return specs;
}

private static void validateAndAddSpec(List<EqlSpec> specs, EqlSpec spec, Set<String> uniqueTestNames) {
if (Strings.isNullOrEmpty(spec.name())) {
throw new IllegalArgumentException("Read a test without a name value");
Expand All @@ -41,7 +51,7 @@ private static void validateAndAddSpec(List<EqlSpec> specs, EqlSpec spec, Set<St
if (spec.expectedEventIds() == null) {
throw new IllegalArgumentException("Read a test without a expected_event_ids value");
}
if (uniqueTestNames.contains(spec.name())) {
if (uniqueTestNames.contains(spec.name())) { // TODO: scope it per file?
throw new IllegalArgumentException("Found a test with the same name as another test: " + spec.name());
} else {
uniqueTestNames.add(spec.name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@

import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import static org.elasticsearch.test.eql.DataLoader.TEST_INDEX;

Expand All @@ -21,12 +19,7 @@ public abstract class EqlSpecTestCase extends BaseEqlSpecTestCase {
public static List<Object[]> readTestSpecs() throws Exception {

// Load EQL validation specs
Set<String> uniqueTestNames = new HashSet<>();
List<EqlSpec> specs = EqlSpecLoader.load("/test_queries.toml", uniqueTestNames);
specs.addAll(EqlSpecLoader.load("/additional_test_queries.toml", uniqueTestNames));
specs.addAll(EqlSpecLoader.load("/test_queries_date.toml", uniqueTestNames));

return asArray(specs);
return asArray(EqlSpecLoader.load("/test_queries.toml", "/additional_test_queries.toml", "/test_queries_date.toml"));
}

@Override
Expand Down
Loading