Skip to content

Commit

Permalink
Java bindings for mixed semi and anti joins (#10040)
Browse files Browse the repository at this point in the history
This depends on #9941 and #10037.  Adds Java bindings for mixed left semi join and mixed left anti join.

Authors:
  - Jason Lowe (https://github.com/jlowe)

Approvers:
  - Alessandro Bellina (https://github.com/abellina)
  - Thomas Graves (https://github.com/tgravescs)
  - Jim Brennan (https://github.com/jbrennan333)

URL: #10040
  • Loading branch information
jlowe authored Jan 25, 2022
1 parent a552afb commit 52a61b7
Show file tree
Hide file tree
Showing 3 changed files with 532 additions and 5 deletions.
208 changes: 203 additions & 5 deletions java/src/main/java/ai/rapids/cudf/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -616,10 +616,6 @@ private static native long[] conditionalInnerJoinGatherMapsWithCount(long leftTa
private static native long[] conditionalFullJoinGatherMaps(long leftTable, long rightTable,
long condition) throws CudfException;

private static native long[] conditionalFullJoinGatherMapsWithCount(long leftTable, long rightTable,
long condition,
long rowCount) throws CudfException;

private static native long conditionalLeftSemiJoinRowCount(long leftTable, long rightTable,
long condition) throws CudfException;

Expand Down Expand Up @@ -670,6 +666,32 @@ private static native long[] mixedFullJoinGatherMaps(long leftKeysTable, long ri
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedLeftSemiJoinSize(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedLeftSemiJoinGatherMap(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedLeftSemiJoinGatherMapWithSize(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual,
long outputRowCount, long matchesColumnView);

private static native long[] mixedLeftAntiJoinSize(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedLeftAntiJoinGatherMap(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedLeftAntiJoinGatherMapWithSize(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual,
long outputRowCount, long matchesColumnView);

private static native long[] crossJoin(long leftTable, long rightTable) throws CudfException;

private static native long[] concatenate(long[] cudfTablePointers) throws CudfException;
Expand Down Expand Up @@ -2853,7 +2875,7 @@ public static GatherMap[] mixedFullJoinGatherMaps(Table leftKeys, Table rightKey
return buildJoinGatherMaps(gatherMapData);
}

private GatherMap buildSemiJoinGatherMap(long[] gatherMapData) {
private static GatherMap buildSemiJoinGatherMap(long[] gatherMapData) {
long bufferSize = gatherMapData[0];
long leftAddr = gatherMapData[1];
long leftHandle = gatherMapData[2];
Expand Down Expand Up @@ -2939,6 +2961,94 @@ public GatherMap conditionalLeftSemiJoinGatherMap(Table rightTable,
return buildSemiJoinGatherMap(gatherMapData);
}

/**
* Computes output size information for a left semi join between two tables using a mix of
* equality and inequality conditions. The entire join condition is assumed to be a logical AND
* of the equality condition and inequality condition.
* NOTE: It is the responsibility of the caller to close the resulting size information object
* or native resources can be leaked!
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return size information for the join
*/
public static MixedJoinSize mixedLeftSemiJoinSize(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] mixedSizeInfo = mixedLeftSemiJoinSize(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(), nullEquality == NullEquality.EQUAL);
assert mixedSizeInfo.length == 2;
long outputRowCount = mixedSizeInfo[0];
long matchesColumnHandle = mixedSizeInfo[1];
return new MixedJoinSize(outputRowCount, new ColumnVector(matchesColumnHandle));
}

/**
* Computes the gather map that can be used to manifest the result of a left semi join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* A {@link GatherMap} instance will be returned that can be used to gather
* the left table to produce the result of the left semi join.
* It is the responsibility of the caller to close the resulting gather map instances.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return left and right table gather maps
*/
public static GatherMap mixedLeftSemiJoinGatherMap(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] gatherMapData = mixedLeftSemiJoinGatherMap(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL);
return buildSemiJoinGatherMap(gatherMapData);
}

/**
* Computes the gather map that can be used to manifest the result of a left semi join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* A {@link GatherMap} instance will be returned that can be used to gather
* the left table to produce the result of the left semi join.
* It is the responsibility of the caller to close the resulting gather map instances.
* This interface allows passing the size result from
* {@link #mixedLeftSemiJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)}
* when the output size was computed previously.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @param joinSize mixed join size result
* @return left and right table gather maps
*/
public static GatherMap mixedLeftSemiJoinGatherMap(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality,
MixedJoinSize joinSize) {
long[] gatherMapData = mixedLeftSemiJoinGatherMapWithSize(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL,
joinSize.getOutputRowCount(), joinSize.getMatches().getNativeView());
return buildSemiJoinGatherMap(gatherMapData);
}

/**
* Computes the gather map that can be used to manifest the result of a left anti-join between
* two tables. It is assumed this table instance holds the key columns from the left table, and
Expand Down Expand Up @@ -3018,6 +3128,94 @@ public GatherMap conditionalLeftAntiJoinGatherMap(Table rightTable,
return buildSemiJoinGatherMap(gatherMapData);
}

/**
* Computes output size information for a left anti join between two tables using a mix of
* equality and inequality conditions. The entire join condition is assumed to be a logical AND
* of the equality condition and inequality condition.
* NOTE: It is the responsibility of the caller to close the resulting size information object
* or native resources can be leaked!
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return size information for the join
*/
public static MixedJoinSize mixedLeftAntiJoinSize(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] mixedSizeInfo = mixedLeftAntiJoinSize(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(), nullEquality == NullEquality.EQUAL);
assert mixedSizeInfo.length == 2;
long outputRowCount = mixedSizeInfo[0];
long matchesColumnHandle = mixedSizeInfo[1];
return new MixedJoinSize(outputRowCount, new ColumnVector(matchesColumnHandle));
}

/**
* Computes the gather map that can be used to manifest the result of a left anti join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* A {@link GatherMap} instance will be returned that can be used to gather
* the left table to produce the result of the left anti join.
* It is the responsibility of the caller to close the resulting gather map instances.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return left and right table gather maps
*/
public static GatherMap mixedLeftAntiJoinGatherMap(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] gatherMapData = mixedLeftAntiJoinGatherMap(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL);
return buildSemiJoinGatherMap(gatherMapData);
}

/**
* Computes the gather map that can be used to manifest the result of a left anti join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* A {@link GatherMap} instance will be returned that can be used to gather
* the left table to produce the result of the left anti join.
* It is the responsibility of the caller to close the resulting gather map instances.
* This interface allows passing the size result from
* {@link #mixedLeftAntiJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)}
* when the output size was computed previously.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @param joinSize mixed join size result
* @return left and right table gather maps
*/
public static GatherMap mixedLeftAntiJoinGatherMap(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality,
MixedJoinSize joinSize) {
long[] gatherMapData = mixedLeftAntiJoinGatherMapWithSize(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL,
joinSize.getOutputRowCount(), joinSize.getMatches().getNativeView());
return buildSemiJoinGatherMap(gatherMapData);
}

/**
* For details about how this method functions refer to
* {@link #convertToRowsFixedWidthOptimized()}.
Expand Down
Loading

0 comments on commit 52a61b7

Please sign in to comment.