-
Notifications
You must be signed in to change notification settings - Fork 96
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
98dd414
commit 67da239
Showing
7 changed files
with
397 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#pragma once | ||
#ifndef FRI_H | ||
#define FRI_H | ||
|
||
#include <cuda_runtime.h> | ||
|
||
#include "gpu-utils/device_context.cuh" | ||
|
||
namespace fri { | ||
|
||
struct FriConfig { | ||
device_context::DeviceContext ctx; | ||
bool are_evals_on_device; | ||
bool are_domain_elements_on_device; | ||
bool are_results_on_device; | ||
bool is_async; | ||
} | ||
|
||
/** | ||
* @brief Folds a layer's evaluation into a degree d/2 evaluation using the provided folding factor alpha. | ||
* | ||
* @param evals Pointer to the array of evaluation in the current FRI layer. | ||
* @param domain_xs Pointer to a subset of line domain values. | ||
* @param alpha The folding factor used in the FRI protocol. | ||
* @param folded_evals Pointer to the array where the folded evaluations will be stored. | ||
* @param n The number of evaluations in the original layer (before folding). | ||
* | ||
* @tparam S The scalar field type used for domain_xs. | ||
* @tparam E The evaluation type, typically the same as the field element type. | ||
* | ||
* @note The size of the output array 'folded_evals' should be half of 'n', as folding reduces the number of evaluations by half. | ||
*/ | ||
template <typename S, typename E> | ||
cudaError_t fold_line(E* eval, S* domain_xs, E alpha, E* folded_evals, int n, FriConfig cfg); | ||
|
||
/** | ||
* @brief Folds a layer of FRI evaluations from a circle into a line. | ||
* | ||
* This function performs the folding operation in the FRI (Fast Reed-Solomon IOP of Proximity) protocol, | ||
* specifically for evaluations on a circle domain. It takes a layer of evaluations on a circle and folds | ||
* them into a line using the provided folding factor alpha. | ||
* | ||
* @param evals Pointer to the array of evaluations in the current FRI layer, representing points on a circle. | ||
* @param domain_ys Pointer to the array of y-coordinates of the circle points in the domain of the circle that evals represents. | ||
* @param alpha The folding factor used in the FRI protocol. | ||
* @param folded_evals Pointer to the array where the folded evaluations (now on a line) will be stored. | ||
* @param n The number of evaluations in the original layer (before folding). | ||
* | ||
* @tparam S The scalar field type used for alpha and domain_ys. | ||
* @tparam E The evaluation type, typically the same as the field element type. | ||
* | ||
* @note The size of the output array 'folded_evals' should be half of 'n', as folding reduces the number of evaluations by half. | ||
* @note This function is specifically designed for folding evaluations from a circular domain to a linear domain. | ||
*/ | ||
|
||
template <typename S, typename E> | ||
cudaError_t fold_circle_into_line(E* evals, S* domain_ys, S alpha, E* folded_evals, int n, FriConfig cfg); | ||
|
||
} // namespace fri |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#include "fri.cuh" | ||
#include "gpu-utils/device_context.cuh" | ||
|
||
namespace fri { | ||
/** | ||
* Extern "C" version of [fold_line](@ref fold_line) function with the following values of | ||
* template parameters (where the field is given by `-DFIELD` env variable during build): | ||
* - `E` is the extension field type used for evaluations and alpha | ||
* - `S` is the scalar field type used for domain elements | ||
* @param line_eval Pointer to the array of evaluations on the line | ||
* @param domain_elements Pointer to the array of domain elements | ||
* @param alpha The folding factor | ||
* @param folded_evals Pointer to the array where folded evaluations will be stored | ||
* @param n The number of evaluations | ||
* @param ctx The device context; if the stream is not 0, then everything is run async | ||
* @return `cudaSuccess` if the execution was successful and an error code otherwise. | ||
*/ | ||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, fold_line)(extension_t* line_eval, scalar_t* domain_elements, extension_t alpha, extension_t* folded_evals, int n, FriConfig cfg) { | ||
return fold_line(line_eval, domain_elements, alpha, folded_evals, n, cfg); | ||
}; | ||
|
||
/** | ||
* Extern "C" version of [fold_circle_into_line](@ref fold_circle_into_line) function with the following values of | ||
* template parameters (where the field is given by `-DFIELD` env variable during build): | ||
* - `E` is the extension field type used for evaluations and alpha | ||
* - `S` is the scalar field type used for domain elements | ||
* @param circle_evals Pointer to the array of evaluations on the circle | ||
* @param domain_elements Pointer to the array of domain elements | ||
* @param alpha The folding factor | ||
* @param folded_line_evals Pointer to the array where folded evaluations will be stored | ||
* @param n The number of evaluations | ||
* @param ctx The device context; if the stream is not 0, then everything is run async | ||
* @return `cudaSuccess` if the execution was successful and an error code otherwise. | ||
*/ | ||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, fold_circle_into_line)(extension_t* circle_evals, scalar_t* domain_elements, extension_t alpha, extension_t* folded_line_evals, int n, FriConfig cfg) { | ||
return fold_circle_into_line(circle_evals, domain_elements, alpha, folded_line_evals, n, cfg); | ||
}; | ||
} // namespace fri |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
#include <cuda_runtime.h> | ||
|
||
#include "fri.h" | ||
|
||
#include "fields/field.cuh" | ||
#include "gpu-utils/error_handler.cuh" | ||
#include "gpu-utils/device_context.cuh" | ||
|
||
namespace fri { | ||
|
||
namespace { | ||
template <typename S, typename E> | ||
__device__ void ibutterfly(E& v0, E& v1, const S& itwid) { | ||
E tmp = v0; | ||
v0 = tmp + v1; | ||
v1 = (tmp - v1) * itwid; | ||
} | ||
|
||
template <typename S, typename E> | ||
__global__ void fold_line_kernel(E* evals, S* domain_xs, E alpha, E* folded_evals, int n) { | ||
int idx = blockIdx.x * blockDim.x + threadIdx.x; | ||
if (idx % 2 == 0 && idx < n) { | ||
E f_x = evals[idx]; // even | ||
E f_x_neg = evals[idx + 1]; // odd | ||
ibutterfly(f_x, f_x_neg, domain_xs[idx / 2].inverse()); | ||
auto folded_evals_idx = idx / 2; | ||
folded_evals[folded_evals_idx] = f_x + alpha * f_x_neg; | ||
} | ||
} | ||
|
||
template <typename S, typename E> | ||
__global__ void fold_circle_into_line_kernel(E* eval, S* domain_ys, S alpha, S alpha_sq, E* folded_evals, int n) { | ||
int idx = blockIdx.x * blockDim.x + threadIdx.x; | ||
if (idx % 2 == 0 && idx < n) { | ||
E f0_px = eval[idx]; | ||
E f1_px = eval[idx + 1]; | ||
ibutterfly(f0_px, f1_px, domain_ys[idx / 2].inverse()); | ||
E f_prime = f0_px + alpha * f1_px; | ||
auto folded_evals_idx = idx / 2; | ||
folded_evals[folded_evals_idx] = folded_evals[folded_evals_idx] * alpha_sq + f_prime; | ||
} | ||
} | ||
} | ||
|
||
template <typename S, typename E> | ||
cudaError_t fold_line(E* evals, S* domain_xs, E alpha, E* folded_evals, int n, FriConfig cfg) { | ||
CHK_INIT_IF_RETURN(); | ||
|
||
cudaStream_t stream = cfg.ctx.stream; | ||
// Allocate and move line domain evals to device if necessary | ||
E* d_evals; | ||
if (!cfg.are_evals_on_device) { | ||
auto data_size = sizeof(E) * n; | ||
CHK_IF_RETURN(cudaMallocAsync(&d_evals, data_size, stream)); | ||
CHK_IF_RETURN(cudaMemcpyAsync(d_evals, evals, data_size, cudaMemcpyHostToDevice, stream)); | ||
} else { | ||
d_evals = evals; | ||
} | ||
|
||
// Allocate and move domain's elements to device if necessary | ||
S* d_domain_ys; | ||
if (!cfg.are_domain_elements_on_device) { | ||
auto data_size = sizeof(S) * n/2; | ||
CHK_IF_RETURN(cudaMallocAsync(&d_domain_ys, data_size, stream)); | ||
CHK_IF_RETURN(cudaMemcpyAsync(d_domain_ys, domain_ys, data_size, cudaMemcpyHostToDevice, stream)); | ||
} else { | ||
d_domain_ys = domain_ys; | ||
} | ||
|
||
// Allocate folded_evals if pointer is not a device pointer | ||
E* d_folded_evals; | ||
if (!cfg.are_results_on_device) { | ||
CHK_IF_RETURN(cudaMallocAsync(&d_folded_evals, sizeof(E) * n/2, stream)); | ||
} else { | ||
d_out = out; | ||
} | ||
|
||
int block_size = 256; | ||
int num_blocks = (n / 2 + block_size - 1) / block_size; | ||
fold_line_kernel<<<num_blocks, block_size, 0 stream>>>(eval, domain_xs, alpha, folded_evals, n); | ||
|
||
// Move folded_evals back to host if requested | ||
if (!cfg.are_results_on_device) { | ||
CHK_IF_RETURN(cudaMemcpyAsync(folded_evals, d_folded_evals, sizeof(E) * n/2, cudaMemcpyDeviceToHost, stream)); | ||
} | ||
|
||
// Sync if stream is default stream | ||
if (stream == 0) CHK_IF_RETURN(cudaStreamSynchronize(stream)); | ||
|
||
return CHK_LAST(); | ||
} | ||
|
||
template <typename S, typename E> | ||
cudaError_t fold_circle_into_line(E* evals, S* domain_ys, S alpha, E* folded_evals, int n, FriConfig cfg) { | ||
CHK_INIT_IF_RETURN(); | ||
|
||
cudaStream_t stream = cfg.ctx.stream; | ||
// Allocate and move circle domain evals to device if necessary | ||
E* d_evals; | ||
if (!cfg.are_evals_on_device) { | ||
auto data_size = sizeof(E) * n; | ||
CHK_IF_RETURN(cudaMallocAsync(&d_evals, data_size, stream)); | ||
CHK_IF_RETURN(cudaMemcpyAsync(d_evals, evals, data_size, cudaMemcpyHostToDevice, stream)); | ||
} else { | ||
d_evals = evals; | ||
} | ||
|
||
// Allocate and move domain's elements to device if necessary | ||
S* d_domain_ys; | ||
if (!cfg.are_domain_elements_on_device) { | ||
auto data_size = sizeof(S) * n/2; | ||
CHK_IF_RETURN(cudaMallocAsync(&d_domain_ys, data_size, stream)); | ||
CHK_IF_RETURN(cudaMemcpyAsync(d_domain_ys, domain_ys, data_size, cudaMemcpyHostToDevice, stream)); | ||
} else { | ||
d_domain_ys = domain_ys; | ||
} | ||
|
||
// Allocate folded_evals if pointer is not a device pointer | ||
E* d_folded_evals; | ||
bool folded_evals_on_host = is_host_ptr(folded_evals); | ||
if (!cfg.are_results_on_device) { | ||
CHK_IF_RETURN(cudaMallocAsync(&d_folded_evals, sizeof(E) * n/2, stream)); | ||
} else { | ||
d_out = out; | ||
} | ||
|
||
S alpha_sq = alpha * alpha; | ||
int block_size = 256; | ||
int num_blocks = (n / 2 + block_size - 1) / block_size; | ||
fold_circle_into_line_kernel<<<num_blocks, block_size, 0 stream>>>(d_evals, d_domain_ys, alpha, alpha_sq, d_folded_evals, n); | ||
|
||
// Move folded_evals back to host if requested | ||
if (!cfg.are_results_on_device) { | ||
CHK_IF_RETURN(cudaMemcpyAsync(folded_evals, d_folded_evals, sizeof(E) * n/2, cudaMemcpyDeviceToHost, stream)); | ||
} | ||
|
||
// Sync if stream is default stream | ||
if (stream == 0) CHK_IF_RETURN(cudaStreamSynchronize(stream)); | ||
|
||
return CHK_LAST(); | ||
} | ||
} // namespace fri |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,3 +17,4 @@ cmake = "0.1.50" | |
[features] | ||
default = [] | ||
devmode = ["icicle-core/devmode"] | ||
fri = [] |
Oops, something went wrong.