forked from ROCm/rocFFT
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request ROCm#117 from bragadeesh/develop
bluestein fft initial
- Loading branch information
Showing
16 changed files
with
867 additions
and
85 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
|
||
/******************************************************************************* | ||
* Copyright (C) 2016 Advanced Micro Devices, Inc. All rights reserved. | ||
******************************************************************************/ | ||
|
||
#include <iostream> | ||
#include "kernel_launch.h" | ||
#include "rocfft_hip.h" | ||
#include "bluestein.h" | ||
|
||
|
||
template<typename T> | ||
rocfft_status chirp_launch(size_t N, size_t M, T* B, void *twiddles_large, int twl, int dir, hipStream_t rocfft_stream) | ||
{ | ||
dim3 grid((M-N)/64 + 1); | ||
dim3 threads(64); | ||
|
||
hipLaunchKernelGGL(HIP_KERNEL_NAME(chirp_device<T>), dim3(grid), dim3(threads), 0, rocfft_stream, N, M, B, (T *)twiddles_large, twl, dir); | ||
|
||
return rocfft_status_success; | ||
} | ||
|
||
|
||
void rocfft_internal_chirp(const void *data_p, void *back_p) | ||
{ | ||
DeviceCallIn *data = (DeviceCallIn *)data_p; | ||
|
||
size_t N = data->node->length[0]; | ||
size_t M = data->node->lengthBlue; | ||
|
||
int twl = 0; | ||
|
||
if(data->node->large1D > (size_t)256*256*256*256) printf("large1D twiddle size too large error"); | ||
else if(data->node->large1D > (size_t)256*256*256) twl = 4; | ||
else if(data->node->large1D > (size_t)256*256) twl = 3; | ||
else if(data->node->large1D > (size_t)256) twl = 2; | ||
else twl = 1; | ||
|
||
int dir = data->node->direction; | ||
|
||
hipStream_t rocfft_stream = data->rocfft_stream; | ||
|
||
if( data->node->precision == rocfft_precision_single) | ||
chirp_launch<float2>(N, M, (float2 *)data->bufOut[0], data->node->twiddles_large, twl, dir, rocfft_stream); | ||
else | ||
chirp_launch<double2>(N, M, (double2 *)data->bufOut[0], data->node->twiddles_large, twl, dir, rocfft_stream); | ||
} | ||
|
||
template<typename T> | ||
rocfft_status mul_launch(size_t numof, size_t totalWI, size_t N, size_t M, const T* A, T* B, size_t dim, size_t *lengths, size_t *stride_in, size_t *stride_out, int dir, int scheme, hipStream_t rocfft_stream) | ||
{ | ||
|
||
dim3 grid((totalWI-1)/64 + 1); | ||
dim3 threads(64); | ||
|
||
hipLaunchKernelGGL(HIP_KERNEL_NAME(mul_device<T>), dim3(grid), dim3(threads), 0, rocfft_stream, | ||
numof, totalWI, N, M, A, B, dim, lengths, stride_in, stride_out, dir, scheme); | ||
|
||
return rocfft_status_success; | ||
|
||
} | ||
|
||
void rocfft_internal_mul(const void *data_p, void *back_p) | ||
{ | ||
DeviceCallIn *data = (DeviceCallIn *)data_p; | ||
|
||
size_t N = data->node->length[0]; | ||
size_t M = data->node->lengthBlue; | ||
|
||
|
||
int scheme = 0; // fft mul | ||
if(data->node->scheme == CS_KERNEL_PAD_MUL) | ||
{ | ||
scheme = 1; // pad mul | ||
} | ||
else if(data->node->scheme == CS_KERNEL_RES_MUL) | ||
{ | ||
scheme = 2; // res mul | ||
} | ||
|
||
size_t cBytes; | ||
if(data->node->precision == rocfft_precision_single) | ||
{ | ||
cBytes = sizeof(float)*2; | ||
} | ||
else | ||
{ | ||
cBytes = sizeof(double)*2; | ||
} | ||
|
||
void *bufIn = data->bufIn[0]; | ||
void *bufOut = data->bufOut[0]; | ||
|
||
size_t numof = 0; | ||
if(scheme == 0) | ||
{ | ||
bufIn = ((char *)bufIn + M*cBytes); | ||
bufOut = ((char *)bufOut + 2*M*cBytes); | ||
|
||
numof = M; | ||
} | ||
else if(scheme == 1) | ||
{ | ||
bufOut = ((char *)bufOut + M*cBytes); | ||
|
||
numof = M; | ||
} | ||
else if(scheme == 2) | ||
{ | ||
numof = N; | ||
} | ||
|
||
size_t count = data->node->batch; | ||
for(size_t i=1; i<data->node->length.size(); i++) count *= data->node->length[i]; | ||
count *= numof; | ||
|
||
int dir = data->node->direction; | ||
|
||
hipStream_t rocfft_stream = data->rocfft_stream; | ||
|
||
|
||
if( data->node->precision == rocfft_precision_single) | ||
mul_launch<float2>(numof, count, N, M, (const float2 *)bufIn, (float2 *)bufOut, data->node->length.size(), | ||
data->node->devKernArg, data->node->devKernArg + 1*KERN_ARGS_ARRAY_WIDTH, data->node->devKernArg + 2*KERN_ARGS_ARRAY_WIDTH, dir, scheme, rocfft_stream); | ||
else | ||
mul_launch<double2>(numof, count, N, M, (const double2 *)bufIn, (double2 *)bufOut, data->node->length.size(), | ||
data->node->devKernArg, data->node->devKernArg + 1*KERN_ARGS_ARRAY_WIDTH, data->node->devKernArg + 2*KERN_ARGS_ARRAY_WIDTH, dir, scheme, rocfft_stream); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#ifndef BLUESTEIN_H | ||
#define BLUESTEIN_H | ||
|
||
#include "rocfft_hip.h" | ||
#include "common.h" | ||
|
||
|
||
template<typename T> | ||
__global__ void | ||
chirp_device(const size_t N, const size_t M, T* output, T *twiddles_large, const int twl, const int dir) | ||
{ | ||
size_t tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; | ||
|
||
T val = lib_make_vector2<T>(0, 0); | ||
|
||
if(twl == 1) | ||
val = TWLstep1(twiddles_large, (tx*tx)%(2*N)); | ||
else if(twl == 2) | ||
val = TWLstep2(twiddles_large, (tx*tx)%(2*N)); | ||
else if(twl == 3) | ||
val = TWLstep3(twiddles_large, (tx*tx)%(2*N)); | ||
else if(twl == 4) | ||
val = TWLstep4(twiddles_large, (tx*tx)%(2*N)); | ||
|
||
val.y *= (real_type_t<T>)(dir); | ||
|
||
if(tx == 0) | ||
{ | ||
output[tx] = val; | ||
output[tx + M] = val; | ||
} | ||
else if(tx < N) | ||
{ | ||
output[tx] = val; | ||
output[tx + M] = val; | ||
|
||
output[M - tx] = val; | ||
output[M - tx + M] = val; | ||
} | ||
else if(tx <= (M-N)) | ||
{ | ||
output[tx] = lib_make_vector2<T>(0, 0); | ||
output[tx + M] = lib_make_vector2<T>(0, 0); | ||
} | ||
} | ||
|
||
|
||
template<typename T> | ||
__global__ void | ||
mul_device(const size_t numof, const size_t totalWI, const size_t N, const size_t M, const T* input, T* output, const size_t dim, const size_t *lengths, | ||
const size_t *stride_in, const size_t *stride_out, const int dir, const int scheme) | ||
{ | ||
size_t tx = hipThreadIdx_x + hipBlockIdx_x * hipBlockDim_x; | ||
|
||
if(tx >= totalWI) | ||
return; | ||
|
||
size_t iOffset = 0; | ||
size_t oOffset = 0; | ||
|
||
size_t counter_mod = tx/numof; | ||
|
||
for(size_t i = dim; i>1; i--){ | ||
size_t currentLength = 1; | ||
for(size_t j=1; j<i; j++){ | ||
currentLength *= lengths[j]; | ||
} | ||
|
||
iOffset += (counter_mod / currentLength)*stride_in[i]; | ||
oOffset += (counter_mod / currentLength)*stride_out[i]; | ||
counter_mod = counter_mod % currentLength; | ||
} | ||
iOffset += counter_mod * stride_in[1]; | ||
oOffset += counter_mod * stride_out[1]; | ||
|
||
tx = tx%numof; | ||
if(scheme == 0) | ||
{ | ||
output += oOffset; | ||
|
||
T out = output[tx]; | ||
output[tx].x = input[tx].x * out.x - input[tx].y * out.y; | ||
output[tx].y = input[tx].x * out.y + input[tx].y * out.x; | ||
} | ||
else if(scheme == 1) | ||
{ | ||
T *chirp = output; | ||
|
||
input += iOffset; | ||
|
||
output += M; | ||
output += oOffset; | ||
|
||
if(tx < N) | ||
{ | ||
output[tx].x = input[tx].x * chirp[tx].x + input[tx].y * chirp[tx].y; | ||
output[tx].y = -input[tx].x * chirp[tx].y + input[tx].y * chirp[tx].x; | ||
} | ||
else | ||
{ | ||
output[tx] = lib_make_vector2<T>(0, 0); | ||
} | ||
} | ||
else if(scheme == 2) | ||
{ | ||
const T *chirp = input; | ||
|
||
input += 2*M; | ||
input += iOffset; | ||
|
||
output += oOffset; | ||
|
||
real_type_t<T> MI = 1.0 / (real_type_t<T>)M; | ||
output[tx].x = MI * ( input[tx].x * chirp[tx].x + input[tx].y * chirp[tx].y); | ||
output[tx].y = MI * (-input[tx].x * chirp[tx].y + input[tx].y * chirp[tx].x); | ||
|
||
} | ||
} | ||
|
||
|
||
#endif // BLUESTEIN_H | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.