50 LOG(
"%s\n", __FUNCTION__);
51 LOG(
"\tMemory Allocation : ");
57 bool bSupportDouble =
false;
75 const long long int pnXY = pnX * pnY;
86 Vertex* device_pc_data =
nullptr;
94 ulonglong gridSize2 = (pnXY + blockSize - 1) / blockSize;
95 ulonglong gridSize3 = (pnXY * 4 + blockSize - 1) / blockSize;
96 cuDoubleComplex* device_dst =
nullptr;
98 HANDLE_ERROR(cudaMalloc((
void**)&device_dst, pnXY *
sizeof(cuDoubleComplex)));
100 cuDoubleComplex* src;
101 cufftDoubleComplex *fftsrc;
102 cufftDoubleComplex *fftdst;
103 HANDLE_ERROR(cudaMalloc((
void**)&src, pnXY * 4 *
sizeof(cuDoubleComplex)));
104 HANDLE_ERROR(cudaMalloc((
void**)&fftsrc, pnXY * 4 *
sizeof(cufftDoubleComplex)));
105 HANDLE_ERROR(cudaMalloc((
void**)&fftdst, pnXY * 4 *
sizeof(cufftDoubleComplex)));
115 for (
uint ch = 0; ch < nChannel; ch++)
117 LOG(
"\tCUDA Gen WRP <<<%llu, %d>>> : ", gridSize, blockSize);
118 HANDLE_ERROR(cudaMemset(src, 0, pnXY * 4 *
sizeof(cuDoubleComplex)));
119 HANDLE_ERROR(cudaMemset(fftsrc, 0, pnXY * 4 *
sizeof(cufftDoubleComplex)));
120 HANDLE_ERROR(cudaMemset(fftdst, 0, pnXY * 4 *
sizeof(cufftDoubleComplex)));
134 k, lambda, bRandomPhase, nAdd
137 HANDLE_ERROR(cudaMemset(device_dst, 0., pnXY *
sizeof(cuDoubleComplex)));
145 cudaError error = cudaGetLastError();
146 if (error != cudaSuccess) {
147 LOG(
"cudaGetLastError(): %s\n", cudaGetErrorName(error));
148 if (error == cudaErrorLaunchOutOfResources) {
151 gridSize = (
n_points + blockSize - 1) / blockSize;
152 gridSize2 = (pnXY + blockSize - 1) / blockSize;
153 gridSize3 = (pnXY * 4 + blockSize - 1) / blockSize;
159 LOG(
"\tCUDA FresnelPropagation <<<%llu, %d>>> : ", gridSize2, blockSize);
162 HANDLE_ERROR(cudaMemcpy(
complex_H[ch], device_dst,
sizeof(cuDoubleComplex) * pnXY, cudaMemcpyDeviceToHost));
166 error = cudaGetLastError();
167 if (error != cudaSuccess) {
168 LOG(
"cudaGetLastError(): %s\n", cudaGetErrorName(error));
void cudaGenWRP(const int &nBlocks, const int &nThreads, const int &n_pts_per_stream, Vertex *cuda_pc_data, cuDoubleComplex *cuda_dst, const WRPGpuConst *cuda_config)
void cudaFresnelPropagationWRP(const int &nBlocks, const int &nBlocks2, const int &nThreads, const int &nx, const int &ny, cuDoubleComplex *src, cuDoubleComplex *dst, cufftDoubleComplex *fftsrc, cufftDoubleComplex *fftdst, const WRPGpuConst *cuda_config)
ulonglong n_points
Number of points.
#define HANDLE_ERROR(err)
static cudaWrapper * getInstance()
OphPointCloudData obj_
Input Pointcloud Data.
bool GetRandomPhase()
Function for getting the random phase.
unsigned long long ulonglong
int getMaxThreads(int idx)
Real wrp_location
Location distance of WRP.
OphWRPConfig wrp_config_
structure variable for WRP hologram configuration
struct KernelConst WRPGpuConst
#define ELAPSED_TIME(x, y)
void setCurThreads(int thread)
int n_colors
Number of color channel.
int n_points
numbers of points
void calculateWRPGPU(void)
Real propagation_distance
Distance of Hologram plane.
Complex< Real > ** complex_H