1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
10 // For Open Source Digital Holographic Library
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
46 #ifndef ophLFKernel_cu__
47 #define ophLFKernel_cu__
49 #include "ophKernel.cuh"
50 #include "ophLightField_GPU.h"
51 #include <curand_kernel.h>
53 __global__ void cudaKernel_CalcDataLF(cufftDoubleComplex *src, const LFGpuConst* config)
55 ulonglong tid = blockIdx.x * blockDim.x + threadIdx.x;
56 __shared__ double s_ppX;
57 __shared__ double s_ppY;
60 __shared__ int s_pnXY;
61 __shared__ double s_ssX;
62 __shared__ double s_ssY;
63 __shared__ double s_z;
64 __shared__ double s_v;
65 __shared__ double s_lambda;
66 __shared__ double s_distance;
67 __shared__ double s_pi2;
75 s_pnXY = s_pnX * s_pnY;
76 s_ssX = s_pnX * s_ppX * 2;
77 s_ssY = s_pnY * s_ppY * 2;
78 s_lambda = config->lambda;
79 s_distance = config->distance;
81 s_z = s_distance * s_pi2;
82 s_v = 1 / (s_lambda * s_lambda);
93 double fy = (-s_pnY + h) / s_ssY;
95 double fx = (-s_pnX + w) / s_ssX;
97 double sqrtpart = sqrt(s_v - fxx - fyy);
101 prop.y = s_z * sqrtpart;
103 exponent_complex(&prop);
109 cuDoubleComplex val2 = cuCmul(val, prop);
116 __global__ void cudaKernel_MoveDataPostLF(cufftDoubleComplex *src, cuDoubleComplex *dst, const LFGpuConst* config)
118 ulonglong tid = blockIdx.x * blockDim.x + threadIdx.x;
122 __shared__ ulonglong pnXY;
124 if (threadIdx.x == 0)
136 ulonglong iSrc = pnX * 2 * (pnY / 2 + h) + pnX / 2;
138 dst[tid] = src[iSrc + w];
142 __global__ void cudaKernel_MoveDataPreLF(cuDoubleComplex *src, cufftDoubleComplex *dst, const LFGpuConst* config)
144 ulonglong tid = blockIdx.x * blockDim.x + threadIdx.x;
148 __shared__ ulonglong pnXY;
150 if (threadIdx.x == 0)
162 ulonglong iDst = pnX * 2 * (pnY / 2 + h) + pnX / 2;
163 dst[iDst + w] = src[tid];
167 #if false // use constant
168 __global__ void cudaKernel_convertLF2ComplexField(/*const LFGpuConst *config, */uchar1** LF, cufftDoubleComplex* output)
170 int tid = threadIdx.x + blockIdx.x * blockDim.x;
172 if (tid < img_resolution[0])
174 int c = tid % img_resolution[1];
175 int r = tid / img_resolution[1];
176 int iWidth = c * channel_info[0] + channel_info[1];
177 int cWidth = (img_resolution[1] * channel_info[0] + 3) & ~3;
179 int src = r * cWidth + iWidth;
180 int dst = (r * img_resolution[1] + c) * img_number[0];
181 for (int k = 0; k < img_number[0]; k++)
183 output[dst + k] = make_cuDoubleComplex((double)LF[k][src].x, 0);
188 __global__ void cudaKernel_convertLF2ComplexField(const LFGpuConst *config, uchar1** LF, cufftDoubleComplex* output)
190 int tid = threadIdx.x + blockIdx.x * blockDim.x;
197 int nChannel = config->nChannel;
198 int iAmplitude = config->iAmp;
204 int iWidth = c * nChannel + iAmplitude;
205 int cWidth = (rX * nChannel + 3) & ~3;
207 int src = r * cWidth + iWidth;
208 int dst = (r * rX + c) * N;
209 for (int k = 0; k < N; k++)
211 output[dst + k] = make_cuDoubleComplex((double)LF[k][src].x, 0);
216 __global__ void cudaKernel_MultiplyPhase(const LFGpuConst *config, cufftDoubleComplex* in, cufftDoubleComplex* output)
218 int tid = threadIdx.x + blockIdx.x * blockDim.x;
220 __shared__ double s_pi2;
227 __shared__ bool s_bRandomPhase;
228 __shared__ int s_iAmp;
230 if (threadIdx.x == 0)
239 s_bRandomPhase = config->randomPhase;
240 s_iAmp = config->iAmp;
253 curand_init(s_N * s_R * (s_iAmp + 1), 0, 0, &state);
256 int src = (r * s_rX + c) * s_N;
257 int dst = c * s_nX + r * s_rX * s_N;
259 for (int n = 0; n < s_N; n++)
261 double randomData = s_bRandomPhase ? curand_uniform_double(&state) : 1.0;
263 cufftDoubleComplex phase = make_cuDoubleComplex(0, randomData * s_pi2);
264 exponent_complex(&phase);
266 cufftDoubleComplex val = in[src + n];
267 int cc = n % s_nX; // 0 ~ 9
268 int rr = n / s_nX; // 0 ~ 9
269 output[dst + cc + rr * s_nX * s_rX] = cuCmul(val, phase);
277 void cudaConvertLF2ComplexField_Kernel(CUstream_st* stream, const int &nBlocks, const int &nThreads, const LFGpuConst *config, uchar1** LF, cufftDoubleComplex* output)
279 //cudaKernel_convertLF2ComplexField << <nBlocks, nThreads, 0, stream >> > (config, LF, output);
280 cudaKernel_convertLF2ComplexField << < nBlocks, nThreads >> > (config, LF, output);
282 if (cudaDeviceSynchronize() != cudaSuccess)
286 void cudaFFT_LF(cufftHandle *plan, CUstream_st* stream, const int &nBlocks, const int &nThreads, const int &nx, const int &ny, cufftDoubleComplex* in_field, cufftDoubleComplex* output_field, const int &direction)
288 //cudaFFT(nullptr, nx, ny, in_field, output_field, CUFFT_FORWARD, false);
291 //fftShift << <nBlocks, nThreads, 0, stream >> > (N, nx, ny, in_field, output_field, false);
292 fftShift << < nBlocks, nThreads >> > (N, nx, ny, in_field, output_field, false);
296 result = cufftExecZ2Z(*plan, output_field, in_field, CUFFT_FORWARD);
298 result = cufftExecZ2Z(*plan, output_field, in_field, CUFFT_INVERSE);
300 if (result != CUFFT_SUCCESS)
303 if (cudaDeviceSynchronize() != cudaSuccess)
306 //fftShift << < nBlocks, nThreads, 0, stream >> > (N, nx, ny, in_field, output_field, false);
307 fftShift << < nBlocks, nThreads >> > (N, nx, ny, in_field, output_field, false);
310 void procMultiplyPhase(CUstream_st* stream, const int &nBlocks, const int &nThreads, const LFGpuConst *config, cufftDoubleComplex* in, cufftDoubleComplex* out)
312 //cudaKernel_MultiplyPhase << <nBlocks, nThreads, 0, stream >> > (config, in, output);
313 cudaKernel_MultiplyPhase << <nBlocks, nThreads >> > (config, in, out);
315 if (cudaDeviceSynchronize() != cudaSuccess)
319 void cudaFresnelPropagationLF(
320 const int &nBlocks, const int&nBlocks2, const int &nThreads, const int &nx, const int &ny,
321 cufftDoubleComplex *src, cufftDoubleComplex *tmp, cufftDoubleComplex *tmp2, cufftDoubleComplex *dst,
322 const LFGpuConst* cuda_config)
325 cudaKernel_MoveDataPreLF << <nBlocks, nThreads >> > (src, tmp, cuda_config);
326 error = cudaDeviceSynchronize();
327 if (error != cudaSuccess)
329 LOG("cudaDeviceSynchronize(%d) : Failed\n", __LINE__);
331 cudaFFT(nullptr, nx * 2, ny * 2, tmp, tmp2, CUFFT_FORWARD, false);
333 cudaKernel_CalcDataLF << <nBlocks2, nThreads >> > (tmp2, cuda_config);
334 error = cudaDeviceSynchronize();
335 if (error != cudaSuccess)
337 LOG("cudaDeviceSynchronize(%d) : Failed\n", __LINE__);
339 cudaFFT(nullptr, nx * 2, ny * 2, tmp2, tmp, CUFFT_INVERSE, true);
341 cudaKernel_MoveDataPostLF << <nBlocks, nThreads >> > (tmp, dst, cuda_config);
342 error = cudaDeviceSynchronize();
343 if (error != cudaSuccess)
345 LOG("cudaDeviceSynchronize(%d) : Failed\n", __LINE__);
350 #endif // !ophLFKernel_cu__