Skip to content

Commit

Permalink
Merge branch 'jdbc-to-arrow-config' into jdbc-column-metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
Mike Pigott committed Dec 8, 2018
2 parents a78c770 + df632e3 commit fe097c8
Show file tree
Hide file tree
Showing 9 changed files with 272 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,9 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, B
Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");

return sqlToArrow(connection, query, allocator,
Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT), false);
JdbcToArrowConfig config =
new JdbcToArrowConfig(allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT), false);
return sqlToArrow(connection, query, config);
}

/**
Expand All @@ -122,36 +123,30 @@ public static VectorSchemaRoot sqlToArrow(
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

return sqlToArrow(connection, query, allocator, calendar, false);
return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar));
}

/**
* For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects.
*
* @param connection Database connection to be used. This method will not close the passed connection object.
* Since the caller has passed the connection object it's the responsibility of the caller
* to close or return the connection to the pool.
* @param query The DB Query to fetch the data.
* @param allocator Memory allocator
* @param calendar Calendar object to use to handle Date, Time and Timestamp datasets.
* @param includeMetadata Whether to include column information in the schema field metadata.
* @param connection Database connection to be used. This method will not close the passed connection object.
* Since the caller has passed the connection object it's the responsibility of the caller
* to close or return the connection to the pool.
* @param query The DB Query to fetch the data.
* @param config Configuration
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as
* ResultSet and Statement objects.
*/
public static VectorSchemaRoot sqlToArrow(
Connection connection,
String query,
BaseAllocator allocator,
Calendar calendar,
boolean includeMetadata) throws SQLException, IOException {
public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config)
throws SQLException, IOException {
Preconditions.checkNotNull(connection, "JDBC connection object can not be null");
Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty");
Preconditions.checkNotNull(allocator, "Memory allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
Preconditions.checkNotNull(config, "The configuration cannot be null");
Preconditions.checkArgument(config.isValid(), "The configuration must be valid");

try (Statement stmt = connection.createStatement()) {
return sqlToArrow(stmt.executeQuery(query), allocator, calendar, includeMetadata);
return sqlToArrow(stmt.executeQuery(query), config);
}
}

Expand Down Expand Up @@ -182,7 +177,9 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BaseAllocator all
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");

return sqlToArrow(resultSet, allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
JdbcToArrowConfig config =
new JdbcToArrowConfig(allocator, Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT));
return sqlToArrow(resultSet, config);
}

/**
Expand All @@ -197,10 +194,7 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

RootAllocator rootAllocator = new RootAllocator(Integer.MAX_VALUE);
VectorSchemaRoot root = sqlToArrow(resultSet, rootAllocator, calendar, false);

return root;
return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar));
}

/**
Expand All @@ -221,32 +215,26 @@ public static VectorSchemaRoot sqlToArrow(
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

return sqlToArrow(resultSet, allocator, calendar, false);
return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar));
}

/**
* For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects.
*
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param allocator Memory allocator to use.
* @param calendar Calendar instance to use for Date, Time and Timestamp datasets.
* @param includeMetadata Whether to include column information in the schema field metadata.
* @param resultSet ResultSet to use to fetch the data from underlying database
* @param config Configuration of the conversion from JDBC to Arrow.
* @return Arrow Data Objects {@link VectorSchemaRoot}
* @throws SQLException on error
*/
public static VectorSchemaRoot sqlToArrow(
ResultSet resultSet,
BaseAllocator allocator,
Calendar calendar,
boolean includeMetadata)
public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig config)
throws SQLException, IOException {
Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null");
Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
Preconditions.checkNotNull(config, "The configuration cannot be null");
Preconditions.checkArgument(config.isValid(), "The configuration must be valid");

VectorSchemaRoot root = VectorSchemaRoot.create(
JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), calendar, includeMetadata), allocator);
JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root, calendar);
JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config.getCalendar(), config.includeMetadata()), config.getAllocator());
JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root, config.getCalendar());
return root;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.arrow.adapter.jdbc;

import java.util.Calendar;

import org.apache.arrow.memory.BaseAllocator;

import com.google.common.base.Preconditions;

/**
* This class configures the JDBC-to-Arrow conversion process.
* <p>
* The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot},
* and the calendar is used to define the time zone of any {@link org.apahe.arrow.vector.pojo.ArrowType.Timestamp}
* fields that are created during the conversion.
* </p>
* <p>
* Neither field may be <code>null</code>.
* </p>
*/
public final class JdbcToArrowConfig {
private Calendar calendar;
private BaseAllocator allocator;
private boolean includeMetadata;

/**
* Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code>
* is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define
* Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>.
*
* @param allocator The memory allocator to construct the Arrow vectors with.
* @param calendar The calendar to use when constructing Timestamp fields and reading time-based results.
*/
public JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar) {
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
Preconditions.checkNotNull(calendar, "Calendar object can not be null");

this.allocator = allocator;
this.calendar = calendar;
this.includeMetadata = false;
}

public JdbcToArrowConfig(BaseAllocator allocator, Calendar calendar, boolean includeMetadata) {
this(allocator, calendar);
this.includeMetadata = includeMetadata;
}

/**
* The calendar to use when defining Arrow Timestamp fields
* and retrieving time-based fields from the database.
* @return the calendar.
*/
public Calendar getCalendar() {
return calendar;
}

/**
* Sets the {@link Calendar} to use when constructing timestamp fields in the
* Arrow schema, and reading time-based fields from the JDBC <code>ResultSet</code>.
*
* @param calendar the calendar to set.
* @exception NullPointerExeption if <code>calendar</code> is <code>null</code>.
*/
public JdbcToArrowConfig setCalendar(Calendar calendar) {
Preconditions.checkNotNull(calendar, "Calendar object can not be null");
this.calendar = calendar;
return this;
}

/**
* The Arrow memory allocator.
* @return the allocator.
*/
public BaseAllocator getAllocator() {
return allocator;
}

/**
* Sets the memory allocator to use when construting the Arrow vectors from the ResultSet.
*
* @param allocator the allocator to set.
* @exception NullPointerException if <code>allocator</code> is null.
*/
public JdbcToArrowConfig setAllocator(BaseAllocator allocator) {
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
this.allocator = allocator;
return this;
}

public boolean includeMetadata() {
return includeMetadata;
}

/**
* Whether this configuration is valid. The configuration is valid when:
* <ul>
* <li>A memory allocator is provided.</li>
* <li>A calendar is provided.</li>
* </ul>
*
* @return Whether this configuration is valid.
*/
public boolean isValid() {
return (calendar != null) && (allocator != null);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public abstract class AbstractJdbcToArrowTest {
* @return Table object
* @throws IOException on error
*/
protected static Table getTable(String ymlFilePath, Class clss) throws IOException {
protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes") Class clss) throws IOException {
return new ObjectMapper(new YAMLFactory()).readValue(
clss.getClassLoader().getResourceAsStream(ymlFilePath), Table.class);
}
Expand Down Expand Up @@ -94,7 +94,7 @@ public void destroy() throws SQLException {
* @throws ClassNotFoundException on error
* @throws IOException on error
*/
public static Object[][] prepareTestData(String[] testFiles, Class clss)
public static Object[][] prepareTestData(String[] testFiles, @SuppressWarnings("rawtypes") Class clss)
throws SQLException, ClassNotFoundException, IOException {
Object[][] tableArr = new Object[testFiles.length][];
int i = 0;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.arrow.adapter.jdbc;

import static org.junit.Assert.*;

import java.util.Calendar;
import java.util.Locale;
import java.util.TimeZone;

import org.apache.arrow.memory.BaseAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.junit.Test;

public class JdbcToArrowConfigTest {

private static final RootAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
private static final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);

@Test(expected = NullPointerException.class)
public void testNullArguments() {
new JdbcToArrowConfig(null, null);
}

@Test(expected = NullPointerException.class)
public void testNullCalendar() {
new JdbcToArrowConfig(allocator, null);
}

@Test(expected = NullPointerException.class)
public void testNullAllocator() {
new JdbcToArrowConfig(null, calendar);
}

@Test(expected = NullPointerException.class)
public void testSetNullAllocator() {
JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, calendar);
config.setAllocator(null);
}

@Test(expected = NullPointerException.class)
public void testSetNullCalendar() {
JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, calendar);
config.setCalendar(null);
}

@Test
public void testConfig() {
JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, calendar);
assertTrue(config.isValid());
assertTrue(allocator == config.getAllocator());
assertTrue(calendar == config.getCalendar());

Calendar newCalendar = Calendar.getInstance();
BaseAllocator newAllocator = new RootAllocator(Integer.SIZE);

config.setAllocator(newAllocator).setCalendar(newCalendar);

assertTrue(config.isValid());
assertTrue(newAllocator == config.getAllocator());
assertTrue(newCalendar == config.getCalendar());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
import org.apache.arrow.adapter.jdbc.JdbcToArrow;
import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
import org.apache.arrow.adapter.jdbc.Table;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.VarCharVector;
Expand Down Expand Up @@ -116,6 +117,13 @@ public void testJdbcToArroValues() throws SQLException, IOException {
new RootAllocator(Integer.MAX_VALUE)));
testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
Calendar.getInstance()));
testDataSets(JdbcToArrow.sqlToArrow(
conn.createStatement().executeQuery(table.getQuery()),
new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance())));
testDataSets(JdbcToArrow.sqlToArrow(
conn,
table.getQuery(),
new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance())));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
import org.apache.arrow.adapter.jdbc.JdbcToArrow;
import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
import org.apache.arrow.adapter.jdbc.Table;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BigIntVector;
Expand Down Expand Up @@ -142,6 +143,13 @@ public void testJdbcToArroValues() throws SQLException, IOException {
testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()),
new RootAllocator(Integer.MAX_VALUE)));
testDataSets(JdbcToArrow.sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()));
testDataSets(JdbcToArrow.sqlToArrow(
conn.createStatement().executeQuery(table.getQuery()),
new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance())));
testDataSets(JdbcToArrow.sqlToArrow(
conn,
table.getQuery(),
new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance())));
}

/**
Expand Down
Loading

0 comments on commit fe097c8

Please sign in to comment.