Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Java bindings for mixed left, inner, and full joins #9941

Merged
merged 7 commits into from
Jan 19, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions java/src/main/java/ai/rapids/cudf/MixedJoinSize.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.rapids.cudf;

/** This class tracks size information associated with a mixed table join. */
public final class MixedJoinSize implements AutoCloseable {
private final long outputRowCount;
// This is in flux, avoid exposing publicly until the dust settles.
private ColumnVector matches;

MixedJoinSize(long outputRowCount, ColumnVector matches) {
this.outputRowCount = outputRowCount;
this.matches = matches;
}

/** Return the number of output rows that would be generated from the mixed join */
public long getOutputRowCount() {
return outputRowCount;
}

ColumnVector getMatches() {
return matches;
}

@Override
public synchronized void close() {
matches.close();
}
}
235 changes: 234 additions & 1 deletion java/src/main/java/ai/rapids/cudf/Table.java
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,36 @@ private static native long[] conditionalLeftAntiJoinGatherMapWithCount(long left
long condition,
long rowCount) throws CudfException;

private static native long[] mixedLeftJoinSize(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedLeftJoinGatherMaps(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedLeftJoinGatherMapsWithSize(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual,
long outputRowCount, long matchesColumnView);

private static native long[] mixedInnerJoinSize(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedInnerJoinGatherMaps(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] mixedInnerJoinGatherMapsWithSize(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual,
long outputRowCount, long matchesColumnView);

private static native long[] mixedFullJoinGatherMaps(long leftKeysTable, long rightKeysTable,
long leftConditionTable, long rightConditionTable,
long condition, boolean compareNullsEqual);

private static native long[] crossJoin(long leftTable, long rightTable) throws CudfException;

private static native long[] concatenate(long[] cudfTablePointers) throws CudfException;
Expand Down Expand Up @@ -2121,7 +2151,7 @@ public static Table scatter(Scalar[] source, ColumnView scatterMap, Table target
target.getNativeView(), checkBounds));
}

private GatherMap[] buildJoinGatherMaps(long[] gatherMapData) {
private static GatherMap[] buildJoinGatherMaps(long[] gatherMapData) {
long bufferSize = gatherMapData[0];
long leftAddr = gatherMapData[1];
long leftHandle = gatherMapData[2];
Expand Down Expand Up @@ -2274,6 +2304,94 @@ public GatherMap[] conditionalLeftJoinGatherMaps(Table rightTable,
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes output size information for a left join between two tables using a mix of equality
* and inequality conditions. The entire join condition is assumed to be a logical AND of the
* equality condition and inequality condition.
* NOTE: It is the responsibility of the caller to close the resulting size information object
* or native resources can be leaked!
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return size information for the join
*/
public static MixedJoinSize mixedLeftJoinSize(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] mixedSizeInfo = mixedLeftJoinSize(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(), nullEquality == NullEquality.EQUAL);
assert mixedSizeInfo.length == 2;
long outputRowCount = mixedSizeInfo[0];
long matchesColumnHandle = mixedSizeInfo[1];
return new MixedJoinSize(outputRowCount, new ColumnVector(matchesColumnHandle));
}

/**
* Computes the gather maps that can be used to manifest the result of a left join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the left join.
* It is the responsibility of the caller to close the resulting gather map instances.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return left and right table gather maps
*/
public static GatherMap[] mixedLeftJoinGatherMaps(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] gatherMapData = mixedLeftJoinGatherMaps(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL);
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of a left join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the left join.
* It is the responsibility of the caller to close the resulting gather map instances.
* This interface allows passing the size result from
* {@link #mixedLeftJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)}
* when the output size was computed previously.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @param joinSize mixed join size result
* @return left and right table gather maps
*/
public static GatherMap[] mixedLeftJoinGatherMaps(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality,
MixedJoinSize joinSize) {
long[] gatherMapData = mixedLeftJoinGatherMapsWithSize(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL,
joinSize.getOutputRowCount(), joinSize.getMatches().getNativeView());
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of an inner equi-join between
* two tables. It is assumed this table instance holds the key columns from the left table, and
Expand Down Expand Up @@ -2414,6 +2532,94 @@ public GatherMap[] conditionalInnerJoinGatherMaps(Table rightTable,
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes output size information for an inner join between two tables using a mix of equality
* and inequality conditions. The entire join condition is assumed to be a logical AND of the
* equality condition and inequality condition.
* NOTE: It is the responsibility of the caller to close the resulting size information object
* or native resources can be leaked!
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return size information for the join
*/
public static MixedJoinSize mixedInnerJoinSize(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] mixedSizeInfo = mixedInnerJoinSize(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(), nullEquality == NullEquality.EQUAL);
assert mixedSizeInfo.length == 2;
long outputRowCount = mixedSizeInfo[0];
long matchesColumnHandle = mixedSizeInfo[1];
return new MixedJoinSize(outputRowCount, new ColumnVector(matchesColumnHandle));
}

/**
* Computes the gather maps that can be used to manifest the result of an inner join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the inner join.
* It is the responsibility of the caller to close the resulting gather map instances.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return left and right table gather maps
*/
public static GatherMap[] mixedInnerJoinGatherMaps(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] gatherMapData = mixedInnerJoinGatherMaps(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL);
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of an inner join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the inner join.
* It is the responsibility of the caller to close the resulting gather map instances.
* This interface allows passing the size result from
* {@link #mixedInnerJoinSize(Table, Table, Table, Table, CompiledExpression, NullEquality)}
* when the output size was computed previously.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @param joinSize mixed join size result
* @return left and right table gather maps
*/
public static GatherMap[] mixedInnerJoinGatherMaps(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality,
MixedJoinSize joinSize) {
long[] gatherMapData = mixedInnerJoinGatherMapsWithSize(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL,
joinSize.getOutputRowCount(), joinSize.getMatches().getNativeView());
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of an full equi-join between
* two tables. It is assumed this table instance holds the key columns from the left table, and
Expand Down Expand Up @@ -2520,6 +2726,33 @@ public GatherMap[] conditionalFullJoinGatherMaps(Table rightTable,
return buildJoinGatherMaps(gatherMapData);
}

/**
* Computes the gather maps that can be used to manifest the result of a full join between
* two tables using a mix of equality and inequality conditions. The entire join condition is
* assumed to be a logical AND of the equality condition and inequality condition.
* Two {@link GatherMap} instances will be returned that can be used to gather
* the left and right tables, respectively, to produce the result of the full join.
* It is the responsibility of the caller to close the resulting gather map instances.
* @param leftKeys the left table's key columns for the equality condition
* @param rightKeys the right table's key columns for the equality condition
* @param leftConditional the left table's columns needed to evaluate the inequality condition
* @param rightConditional the right table's columns needed to evaluate the inequality condition
* @param condition the inequality condition of the join
* @param nullEquality whether nulls should compare as equal
* @return left and right table gather maps
*/
public static GatherMap[] mixedFullJoinGatherMaps(Table leftKeys, Table rightKeys,
Table leftConditional, Table rightConditional,
CompiledExpression condition,
NullEquality nullEquality) {
long[] gatherMapData = mixedFullJoinGatherMaps(
leftKeys.getNativeView(), rightKeys.getNativeView(),
leftConditional.getNativeView(), rightConditional.getNativeView(),
condition.getNativeHandle(),
nullEquality == NullEquality.EQUAL);
return buildJoinGatherMaps(gatherMapData);
}

private GatherMap buildSemiJoinGatherMap(long[] gatherMapData) {
long bufferSize = gatherMapData[0];
long leftAddr = gatherMapData[1];
Expand Down
Loading