From 3fb08f7bd330e3b63938b7488d658dc41b93c409 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 2 Jan 2024 12:33:27 -0800 Subject: [PATCH 1/6] Change namespace Signed-off-by: Nghia Truong --- cpp/include/cudf/io/detail/orc.hpp | 16 ++++++---------- cpp/include/cudf/io/orc.hpp | 2 +- cpp/src/io/functions.cpp | 8 ++++---- cpp/src/io/orc/reader_impl.cu | 5 +++-- cpp/src/io/orc/reader_impl.hpp | 4 ++-- cpp/src/io/orc/writer_impl.cu | 10 ++-------- cpp/src/io/orc/writer_impl.hpp | 13 ++++--------- 7 files changed, 22 insertions(+), 36 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 623f402f9c9..06cbba902cc 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -28,16 +28,14 @@ #include -namespace cudf { -namespace io { +namespace cudf::io { // Forward declaration class orc_reader_options; class orc_writer_options; class chunked_orc_writer_options; -namespace detail { -namespace orc { +namespace orc::detail { /** * @brief Class to read ORC dataset data into columns. @@ -94,7 +92,7 @@ class writer { */ explicit writer(std::unique_ptr sink, orc_writer_options const& options, - single_write_mode mode, + cudf::io::detail::single_write_mode mode, rmm::cuda_stream_view stream); /** @@ -107,7 +105,7 @@ class writer { */ explicit writer(std::unique_ptr sink, chunked_orc_writer_options const& options, - single_write_mode mode, + cudf::io::detail::single_write_mode mode, rmm::cuda_stream_view stream); /** @@ -127,7 +125,5 @@ class writer { */ void close(); }; -} // namespace orc -} // namespace detail -} // namespace io -} // namespace cudf +} // namespace orc::detail +} // namespace cudf::io diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index c2762b05aa6..f9c0d501d2a 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -1304,7 +1304,7 @@ class orc_chunked_writer { void close(); /// Unique pointer to impl writer class - std::unique_ptr writer; + std::unique_ptr writer; }; /** @} */ // end of group diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 964e40e36cd..fa7e6fd814e 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -270,7 +270,7 @@ void write_csv(csv_writer_options const& options, mr); } -namespace detail_orc = cudf::io::detail::orc; +namespace orc_detail = cudf::io::orc::detail; raw_orc_statistics read_raw_orc_statistics(source_info const& src_info) { @@ -418,7 +418,7 @@ table_with_metadata read_orc(orc_reader_options const& options, rmm::mr::device_ CUDF_FUNC_RANGE(); auto datasources = make_datasources(options.get_source()); - auto reader = std::make_unique( + auto reader = std::make_unique( std::move(datasources), options, cudf::get_default_stream(), mr); return reader->read(options); @@ -436,7 +436,7 @@ void write_orc(orc_writer_options const& options) auto sinks = make_datasinks(options.get_sink()); CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing"); - auto writer = std::make_unique( + auto writer = std::make_unique( std::move(sinks[0]), options, io_detail::single_write_mode::YES, cudf::get_default_stream()); writer->write(options.get_table()); @@ -452,7 +452,7 @@ orc_chunked_writer::orc_chunked_writer(chunked_orc_writer_options const& options auto sinks = make_datasinks(options.get_sink()); CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing"); - writer = std::make_unique( + writer = std::make_unique( std::move(sinks[0]), options, io_detail::single_write_mode::NO, cudf::get_default_stream()); } diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 157269cf52e..dcad538a1aa 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -52,7 +52,8 @@ #include #include -namespace cudf::io::detail::orc { +namespace cudf::io::orc::detail { +using namespace cudf::io::detail; using namespace cudf::io::orc; namespace { @@ -1363,4 +1364,4 @@ table_with_metadata reader::read(orc_reader_options const& options) return _impl->read(options.get_skip_rows(), options.get_num_rows(), options.get_stripes()); } -} // namespace cudf::io::detail::orc +} // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 7a576d61726..b54e142ea12 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -34,7 +34,7 @@ #include #include -namespace cudf::io::detail::orc { +namespace cudf::io::orc::detail { using namespace cudf::io::orc; namespace { @@ -86,4 +86,4 @@ class reader::impl { std::unique_ptr const _col_meta; // Track of orc mapping and child details }; -} // namespace cudf::io::detail::orc +} // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index 344fc6075a9..bbeea8dd091 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -68,10 +68,7 @@ #include #include -namespace cudf { -namespace io { -namespace detail { -namespace orc { +namespace cudf::io::orc::detail { using namespace cudf::io::orc; using namespace cudf::io; @@ -2741,7 +2738,4 @@ void writer::write(table_view const& table) { _impl->write(table); } // Forward to implementation void writer::close() { _impl->close(); } -} // namespace orc -} // namespace detail -} // namespace io -} // namespace cudf +} // namespace cudf::io::orc::detail diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 0d1a83f3d85..88f5bb9933f 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -39,15 +39,13 @@ #include #include -namespace cudf { -namespace io { -namespace detail { -namespace orc { +namespace cudf::io::orc::detail { // Forward internal classes class orc_column_view; -using namespace cudf::io::orc; using namespace cudf::io; +using namespace cudf::io::detail; +using namespace cudf::io::orc; using cudf::detail::device_2dspan; using cudf::detail::host_2dspan; using cudf::detail::hostdevice_2dvector; @@ -364,7 +362,4 @@ class writer::impl { bool _closed = false; // To track if the output has been written to sink. }; -} // namespace orc -} // namespace detail -} // namespace io -} // namespace cudf +} // namespace cudf::io::orc::detail From 46cb03d8b577ddd41ef5d22d1f07faa5603787ae Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 2 Jan 2024 12:42:51 -0800 Subject: [PATCH 2/6] Update copyright year Signed-off-by: Nghia Truong --- cpp/include/cudf/io/detail/orc.hpp | 2 +- cpp/include/cudf/io/orc.hpp | 2 +- cpp/src/io/functions.cpp | 2 +- cpp/src/io/orc/reader_impl.cu | 2 +- cpp/src/io/orc/reader_impl.hpp | 2 +- cpp/src/io/orc/writer_impl.cu | 2 +- cpp/src/io/orc/writer_impl.hpp | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 06cbba902cc..4ec8d2e8c2a 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index f9c0d501d2a..67dae3709d9 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index fa7e6fd814e..401e1ce47eb 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index dcad538a1aa..31f14682c33 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index b54e142ea12..178fd0f7a7d 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index bbeea8dd091..ef66f74e2d6 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 88f5bb9933f..a880ff5524f 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From e8d482c55705120ffb6b4c1cf733daa079893742 Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Tue, 2 Jan 2024 13:53:16 -0700 Subject: [PATCH 3/6] Update cpp/include/cudf/io/orc.hpp Co-authored-by: Vukasin Milovanovic --- cpp/include/cudf/io/orc.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 67dae3709d9..3ef356bed1b 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -1304,7 +1304,7 @@ class orc_chunked_writer { void close(); /// Unique pointer to impl writer class - std::unique_ptr writer; + std::unique_ptr writer; }; /** @} */ // end of group From 322ad6c07d4820b481ec889f6407ac47e5f3fe0c Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 2 Jan 2024 12:56:29 -0800 Subject: [PATCH 4/6] Remove redundant namespace import Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 1 - cpp/src/io/orc/reader_impl.hpp | 1 - cpp/src/io/orc/writer_impl.cu | 2 -- cpp/src/io/orc/writer_impl.hpp | 2 -- 4 files changed, 6 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 31f14682c33..83d6f4860df 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -54,7 +54,6 @@ namespace cudf::io::orc::detail { using namespace cudf::io::detail; -using namespace cudf::io::orc; namespace { diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 178fd0f7a7d..b92434f81fb 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -35,7 +35,6 @@ #include namespace cudf::io::orc::detail { -using namespace cudf::io::orc; namespace { struct reader_column_meta; diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index ef66f74e2d6..42eea37e695 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -69,8 +69,6 @@ #include namespace cudf::io::orc::detail { -using namespace cudf::io::orc; -using namespace cudf::io; template [[nodiscard]] constexpr int varint_size(T val) diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index a880ff5524f..6417afeb7e3 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -43,9 +43,7 @@ namespace cudf::io::orc::detail { // Forward internal classes class orc_column_view; -using namespace cudf::io; using namespace cudf::io::detail; -using namespace cudf::io::orc; using cudf::detail::device_2dspan; using cudf::detail::host_2dspan; using cudf::detail::hostdevice_2dvector; From 4212960655fcd356be6a1840f344b06fba4ba9e1 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Tue, 2 Jan 2024 14:59:17 -0800 Subject: [PATCH 5/6] Remove prefix namespace Signed-off-by: Nghia Truong --- cpp/src/io/orc/reader_impl.cu | 23 +++++++++++------------ cpp/src/io/orc/reader_impl.hpp | 4 ++-- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index 83d6f4860df..46f6861e789 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -622,7 +622,7 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector const& * @brief Aggregate child metadata from parent column chunks. */ void aggregate_child_meta(std::size_t level, - cudf::io::orc::detail::column_hierarchy const& selected_columns, + column_hierarchy const& selected_columns, cudf::detail::host_2dspan chunks, cudf::detail::host_2dspan row_groups, host_span list_col, @@ -775,7 +775,7 @@ constexpr type_id to_cudf_type(orc::TypeKind kind, * @brief Determines cuDF type of an ORC Decimal column. */ type_id to_cudf_decimal_type(host_span decimal128_columns, - cudf::io::orc::detail::aggregate_orc_metadata const& metadata, + aggregate_orc_metadata const& metadata, int column_index) { if (metadata.get_col_type(column_index).kind != DECIMAL) { return type_id::EMPTY; } @@ -798,14 +798,13 @@ std::string get_map_child_col_name(std::size_t const idx) { return (idx == 0) ? /** * @brief Create empty columns and respective schema information from the buffer. */ -std::unique_ptr create_empty_column( - size_type orc_col_id, - cudf::io::orc::detail::aggregate_orc_metadata const& metadata, - host_span decimal128_columns, - bool use_np_dtypes, - data_type timestamp_type, - column_name_info& schema_info, - rmm::cuda_stream_view stream) +std::unique_ptr create_empty_column(size_type orc_col_id, + aggregate_orc_metadata const& metadata, + host_span decimal128_columns, + bool use_np_dtypes, + data_type timestamp_type, + column_name_info& schema_info, + rmm::cuda_stream_view stream) { schema_info.name = metadata.column_name(0, orc_col_id); auto const kind = metadata.get_col_type(orc_col_id).kind; @@ -891,8 +890,8 @@ std::unique_ptr create_empty_column( column_buffer assemble_buffer(size_type orc_col_id, std::size_t level, reader_column_meta const& col_meta, - cudf::io::orc::detail::aggregate_orc_metadata const& metadata, - cudf::io::orc::detail::column_hierarchy const& selected_columns, + aggregate_orc_metadata const& metadata, + column_hierarchy const& selected_columns, std::vector>& col_buffers, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index b92434f81fb..4a7771687f6 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -75,8 +75,8 @@ class reader::impl { rmm::mr::device_memory_resource* const _mr; std::vector> const _sources; // Unused but owns data for `_metadata` - cudf::io::orc::detail::aggregate_orc_metadata _metadata; - cudf::io::orc::detail::column_hierarchy const _selected_columns; // Need to be after _metadata + aggregate_orc_metadata _metadata; + column_hierarchy const _selected_columns; // Need to be after _metadata data_type const _timestamp_type; // Override output timestamp resolution bool const _use_index; // Enable or disable attempt to use row index for parsing From 94273cf3cdd3033562bbfd017e64cd2fae49e292 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Wed, 3 Jan 2024 13:45:54 -0800 Subject: [PATCH 6/6] Remove namespace prefix Signed-off-by: Nghia Truong --- cpp/src/io/functions.cpp | 16 ++++++---------- cpp/src/io/orc/writer_impl.hpp | 4 ++-- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 401e1ce47eb..dea8bdaef79 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -38,8 +38,7 @@ #include -namespace cudf { -namespace io { +namespace cudf::io { // Returns builder for csv_reader_options csv_reader_options_builder csv_reader_options::builder(source_info src) { @@ -270,8 +269,6 @@ void write_csv(csv_writer_options const& options, mr); } -namespace orc_detail = cudf::io::orc::detail; - raw_orc_statistics read_raw_orc_statistics(source_info const& src_info) { auto stream = cudf::get_default_stream(); @@ -322,7 +319,7 @@ raw_orc_statistics read_raw_orc_statistics(source_info const& src_info) return result; } -column_statistics::column_statistics(cudf::io::orc::column_statistics&& cs) +column_statistics::column_statistics(orc::column_statistics&& cs) { number_of_values = cs.number_of_values; has_null = cs.has_null; @@ -418,7 +415,7 @@ table_with_metadata read_orc(orc_reader_options const& options, rmm::mr::device_ CUDF_FUNC_RANGE(); auto datasources = make_datasources(options.get_source()); - auto reader = std::make_unique( + auto reader = std::make_unique( std::move(datasources), options, cudf::get_default_stream(), mr); return reader->read(options); @@ -436,7 +433,7 @@ void write_orc(orc_writer_options const& options) auto sinks = make_datasinks(options.get_sink()); CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing"); - auto writer = std::make_unique( + auto writer = std::make_unique( std::move(sinks[0]), options, io_detail::single_write_mode::YES, cudf::get_default_stream()); writer->write(options.get_table()); @@ -452,7 +449,7 @@ orc_chunked_writer::orc_chunked_writer(chunked_orc_writer_options const& options auto sinks = make_datasinks(options.get_sink()); CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing"); - writer = std::make_unique( + writer = std::make_unique( std::move(sinks[0]), options, io_detail::single_write_mode::NO, cudf::get_default_stream()); } @@ -896,5 +893,4 @@ chunked_parquet_writer_options_builder::max_page_fragment_size(size_type val) return *this; } -} // namespace io -} // namespace cudf +} // namespace cudf::io diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index 6417afeb7e3..5e5caa30873 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -354,8 +354,8 @@ class writer::impl { // Internal states, filled during `write()` and written to sink during `write` and `close()`. std::unique_ptr _table_meta; - cudf::io::orc::FileFooter _ffooter; - cudf::io::orc::Metadata _orc_meta; + FileFooter _ffooter; + Metadata _orc_meta; persisted_statistics _persisted_stripe_statistics; // Statistics data saved between calls. bool _closed = false; // To track if the output has been written to sink. };