Skip to content

Commit

Permalink
Revert removal of skip_rows / num_rows options from the Parquet reade…
Browse files Browse the repository at this point in the history
  • Loading branch information
nvdbaranec authored Sep 8, 2022
1 parent d3e8f6d commit 37612ee
Show file tree
Hide file tree
Showing 6 changed files with 616 additions and 106 deletions.
74 changes: 74 additions & 0 deletions cpp/include/cudf/io/parquet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ class parquet_reader_options {

// List of individual row groups to read (ignored if empty)
std::vector<std::vector<size_type>> _row_groups;
// Number of rows to skip from the start
size_type _skip_rows = 0;
// Number of rows to read; -1 is all
size_type _num_rows = -1;

// Whether to store string data as categorical type
bool _convert_strings_to_categories = false;
Expand Down Expand Up @@ -127,6 +131,20 @@ class parquet_reader_options {
return _reader_column_schema;
}

/**
* @brief Returns number of rows to skip from the start.
*
* @return Number of rows to skip from the start
*/
[[nodiscard]] size_type get_skip_rows() const { return _skip_rows; }

/**
* @brief Returns number of rows to read.
*
* @return Number of rows to read
*/
[[nodiscard]] size_type get_num_rows() const { return _num_rows; }

/**
* @brief Returns names of column to be read, if set.
*
Expand Down Expand Up @@ -162,6 +180,10 @@ class parquet_reader_options {
*/
void set_row_groups(std::vector<std::vector<size_type>> row_groups)
{
if ((!row_groups.empty()) and ((_skip_rows != 0) or (_num_rows != -1))) {
CUDF_FAIL("row_groups can't be set along with skip_rows and num_rows");
}

_row_groups = std::move(row_groups);
}

Expand Down Expand Up @@ -190,6 +212,34 @@ class parquet_reader_options {
_reader_column_schema = std::move(val);
}

/**
* @brief Sets number of rows to skip.
*
* @param val Number of rows to skip from start
*/
void set_skip_rows(size_type val)
{
if ((val != 0) and (!_row_groups.empty())) {
CUDF_FAIL("skip_rows can't be set along with a non-empty row_groups");
}

_skip_rows = val;
}

/**
* @brief Sets number of rows to read.
*
* @param val Number of rows to read after skip
*/
void set_num_rows(size_type val)
{
if ((val != -1) and (!_row_groups.empty())) {
CUDF_FAIL("num_rows can't be set along with a non-empty row_groups");
}

_num_rows = val;
}

/**
* @brief Sets timestamp_type used to cast timestamp columns.
*
Expand Down Expand Up @@ -279,6 +329,30 @@ class parquet_reader_options_builder {
return *this;
}

/**
* @brief Sets number of rows to skip.
*
* @param val Number of rows to skip from start
* @return this for chaining
*/
parquet_reader_options_builder& skip_rows(size_type val)
{
options.set_skip_rows(val);
return *this;
}

/**
* @brief Sets number of rows to read.
*
* @param val Number of rows to read after skip
* @return this for chaining
*/
parquet_reader_options_builder& num_rows(size_type val)
{
options.set_num_rows(val);
return *this;
}

/**
* @brief timestamp_type used to cast timestamp columns.
*
Expand Down
Loading

0 comments on commit 37612ee

Please sign in to comment.