diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 09d917a30c1..c14b8302ef0 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -191,6 +191,10 @@ else else "$WORKSPACE/build.sh" cudf dask_cudf cudf_kafka -l --ptds fi + + # If examples grows too large to build, should move to cpu side + gpuci_logger "Building libcudf examples" + $WORKSPACE/cpp/examples/build.sh fi # Both regular and Project Flash proceed here diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index a6154e3db85..4d7296eb060 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -55,4 +55,7 @@ sed_runner "s|\(TAGFILES.*librmm/\).*|\1${NEXT_SHORT_TAG}|" cpp/doxygen/Doxyfile # README.md update sed_runner "s/version == ${CURRENT_SHORT_TAG}/version == ${NEXT_SHORT_TAG}/g" README.md -sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md \ No newline at end of file +sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md + +# Libcudf examples update +sed_runner "s/CUDF_TAG \"branch-${CURRENT_SHORT_TAG}\"/CUDF_TAG \"branch-${NEXT_SHORT_TAG}\"/" cpp/examples/basic/CMakeLists.txt diff --git a/cpp/examples/README.md b/cpp/examples/README.md new file mode 100644 index 00000000000..30b291d38f4 --- /dev/null +++ b/cpp/examples/README.md @@ -0,0 +1,8 @@ +# Libcudf Examples + +This folder contains examples to demonstrate libcudf use cases. Running `build.sh` builds all +libcudf examples. + +Current examples: + +- Basic: example that demonstrates basic use case with libcudf and building a custom application with libcudf. diff --git a/cpp/examples/basic/4stock_5day.csv b/cpp/examples/basic/4stock_5day.csv new file mode 100644 index 00000000000..471912a3295 --- /dev/null +++ b/cpp/examples/basic/4stock_5day.csv @@ -0,0 +1,21 @@ +Company,Date,Open,High,Low,Close,Volume +MSFT,2021-03-03,232.16000366210938,233.5800018310547,227.25999450683594,227.55999755859375,33950400.0 +MSFT,2021-03-04,226.74000549316406,232.49000549316406,224.25999450683594,226.72999572753906,44584200.0 +MSFT,2021-03-05,229.52000427246094,233.27000427246094,226.4600067138672,231.60000610351562,41842100.0 +MSFT,2021-03-08,231.3699951171875,233.3699951171875,227.1300048828125,227.38999938964844,35245900.0 +MSFT,2021-03-09,232.8800048828125,235.3800048828125,231.6699981689453,233.77999877929688,33034000.0 +GOOG,2021-03-03,2067.2099609375,2088.51806640625,2010.0,2026.7099609375,1483100.0 +GOOG,2021-03-04,2023.3699951171875,2089.239990234375,2020.27001953125,2049.090087890625,2116100.0 +GOOG,2021-03-05,2073.1201171875,2118.110107421875,2046.4150390625,2108.5400390625,2193800.0 +GOOG,2021-03-08,2101.1298828125,2128.81005859375,2021.6099853515625,2024.1700439453125,1646000.0 +GOOG,2021-03-09,2070.0,2078.0400390625,2047.8299560546875,2052.699951171875,1696400.0 +AMZN,2021-03-03,3081.179931640625,3107.780029296875,2995.0,3005.0,3967200.0 +AMZN,2021-03-04,3012.0,3058.1298828125,2945.429931640625,2977.570068359375,5458700.0 +AMZN,2021-03-05,3005.0,3009.0,2881.0,3000.4599609375,5383400.0 +AMZN,2021-03-08,3015.0,3064.590087890625,2951.31005859375,2951.949951171875,4178500.0 +AMZN,2021-03-09,3017.989990234375,3090.9599609375,3005.14990234375,3062.85009765625,4023500.0 +AAPL,2021-03-03,124.80999755859375,125.70999908447266,121.83999633789062,122.05999755859375,112430400.0 +AAPL,2021-03-04,121.75,123.5999984741211,118.62000274658203,120.12999725341797,177275300.0 +AAPL,2021-03-05,120.9800033569336,121.94000244140625,117.56999969482422,121.41999816894531,153590400.0 +AAPL,2021-03-08,120.93000030517578,121.0,116.20999908447266,116.36000061035156,153918600.0 +AAPL,2021-03-09,119.02999877929688,122.05999755859375,118.79000091552734,121.08999633789062,129159600.0 diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt new file mode 100644 index 00000000000..4d444f88a16 --- /dev/null +++ b/cpp/examples/basic/CMakeLists.txt @@ -0,0 +1,30 @@ +cmake_minimum_required(VERSION 3.18) + +project(basic_example VERSION 0.0.1 LANGUAGES C CXX CUDA) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CUDA_ARCHITECTURES "") +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +set(CPM_DOWNLOAD_VERSION 0.27.2) +set(CPM_DOWNLOAD_LOCATION "${CMAKE_BINARY_DIR}/cmake/CPM_${CPM_DOWNLOAD_VERSION}.cmake") + +set(CUDF_TAG "branch-21.08") + +if(NOT (EXISTS ${CPM_DOWNLOAD_LOCATION})) + message(STATUS "Downloading CPM.cmake") + file(DOWNLOAD https://github.com/TheLartians/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake ${CPM_DOWNLOAD_LOCATION}) +endif() + +include(${CPM_DOWNLOAD_LOCATION}) + +CPMFindPackage(NAME cudf + GIT_REPOSITORY https://github.com/rapidsai/cudf + GIT_TAG ${CUDF_TAG} + GIT_SHALLOW TRUE + SOURCE_SUBDIR cpp +) + +# Configure your project here +add_executable(${PROJECT_NAME} "src/process_csv.cpp") +target_link_libraries(${PROJECT_NAME} cudf::cudf) diff --git a/cpp/examples/basic/README.md b/cpp/examples/basic/README.md new file mode 100644 index 00000000000..75f16e54033 --- /dev/null +++ b/cpp/examples/basic/README.md @@ -0,0 +1,23 @@ +# Basic Standalone libcudf C++ application + +This C++ example demonstrates a basic libcudf use case and provides a minimal +example of building your own application based on libcudf using CMake. + +The example source code loads a csv file that contains stock prices from 4 +companies spanning across 5 days, computes the average of the closing price +for each company and writes the result in csv format. + +## Compile and execute + +```bash +# Configure project +cmake -S . -B build/ +# Build +cmake --build build/ --parallel $PARALLEL_LEVEL +# Execute +build/libcudf_example +``` + +If your machine does not come with a pre-built libcudf binary, expect the +first build to take some time, as it would build libcudf on the host machine. +It may be sped up by configuring the proper `PARALLEL_LEVEL` number. diff --git a/cpp/examples/basic/src/process_csv.cpp b/cpp/examples/basic/src/process_csv.cpp new file mode 100644 index 00000000000..2467c97393b --- /dev/null +++ b/cpp/examples/basic/src/process_csv.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include + +#include +#include +#include +#include + +cudf::io::table_with_metadata read_csv(std::string const& file_path) +{ + auto source_info = cudf::io::source_info(file_path); + auto builder = cudf::io::csv_reader_options::builder(source_info); + auto options = builder.build(); + return cudf::io::read_csv(options); +} + +void write_csv(cudf::table_view const& tbl_view, std::string const& file_path) +{ + auto sink_info = cudf::io::sink_info(file_path); + auto builder = cudf::io::csv_writer_options::builder(sink_info, tbl_view); + auto options = builder.build(); + cudf::io::write_csv(options); +} + +std::vector make_single_aggregation_request( + std::unique_ptr&& agg, cudf::column_view value) +{ + std::vector requests; + requests.emplace_back(cudf::groupby::aggregation_request()); + requests[0].aggregations.push_back(std::move(agg)); + requests[0].values = value; + return requests; +} + +std::unique_ptr average_closing_price(cudf::table_view stock_info_table) +{ + // Schema: | Company | Date | Open | High | Low | Close | Volume | + auto keys = cudf::table_view{{stock_info_table.column(0)}}; // Company + auto val = stock_info_table.column(5); // Close + + // Compute the average of each company's closing price with entire column + cudf::groupby::groupby grpby_obj(keys); + auto requests = make_single_aggregation_request(cudf::make_mean_aggregation(), val); + + auto agg_results = grpby_obj.aggregate(requests); + + // Assemble the result + auto result_key = std::move(agg_results.first); + auto result_val = std::move(agg_results.second[0].results[0]); + std::vector columns{result_key->get_column(0), *result_val}; + return std::make_unique(cudf::table_view(columns)); +} + +int main(int argc, char** argv) +{ + // Read data + auto stock_table_with_metadata = read_csv("4stock_5day.csv"); + + // Process + auto result = average_closing_price(*stock_table_with_metadata.tbl); + + // Write out result + write_csv(*result, "4stock_5day_avg_close.csv"); + + return 0; +} diff --git a/cpp/examples/build.sh b/cpp/examples/build.sh new file mode 100755 index 00000000000..c3cf75a556f --- /dev/null +++ b/cpp/examples/build.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Copyright (c) 2021, NVIDIA CORPORATION. + +# libcudf examples build script + +# Add libcudf examples build scripts down below + +# Parallelism control +PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} + +EXAMPLES_DIR=${WORKSPACE}/cpp/examples + +################################################################################ +# Basic example +BASIC_EXAMPLE_DIR=${EXAMPLES_DIR}/basic +BASIC_EXAMPLE_BUILD_DIR=${BASIC_EXAMPLE_DIR}/build + +# Configure +cmake -S ${BASIC_EXAMPLE_DIR} -B ${BASIC_EXAMPLE_BUILD_DIR} +# Build +cmake --build ${BASIC_EXAMPLE_BUILD_DIR} -j${PARALLEL_LEVEL}