1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
10 // For Open Source Digital Holographic Library
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
46 #ifndef ophWRPKernel_cu__
47 #define ophWRPKernel_cu__
48 #include "ophKernel.cuh"
49 #include <cuda_runtime_api.h>
51 #include <curand_kernel.h>
52 #include <curand_uniform.h>
53 #include <device_launch_parameters.h>
54 #include "ophWRP_GPU.h"
56 __global__ void cudaKernel_CalcDataWRP(cufftDoubleComplex *src, const WRPGpuConst* config)
58 ulonglong tid = blockIdx.x * blockDim.x + threadIdx.x;
59 __shared__ double ppX;
60 __shared__ double ppY;
64 __shared__ double ssX;
65 __shared__ double ssY;
68 __shared__ double lambda;
69 __shared__ double distance;
70 __shared__ double pi2;
81 lambda = config->lambda;
82 distance = config->propa_d;
85 v = 1 / (lambda * lambda);
96 double fy = (-pnY + h) / ssY;
98 double fx = (-pnX + w) / ssX;
100 double sqrtpart = sqrt(v - fxx - fyy);
102 cuDoubleComplex prop;
104 prop.y = z * sqrtpart;
106 exponent_complex(&prop);
112 cuDoubleComplex val2 = cuCmul(val, prop);
118 __global__ void cudaKernel_MoveDataPostWRP(cuDoubleComplex *src, cuDoubleComplex *dst, const WRPGpuConst* config)
120 ulonglong tid = blockIdx.x * blockDim.x + threadIdx.x;
124 __shared__ ulonglong pnXY;
126 if (threadIdx.x == 0)
138 ulonglong iSrc = pnX * 2 * (pnY / 2 + h) + pnX / 2;
140 dst[tid] = src[iSrc + w];
144 __global__ void cudaKernel_MoveDataPreWRP(cuDoubleComplex *src, cuDoubleComplex *dst, const WRPGpuConst* config)
146 ulonglong tid = blockIdx.x * blockDim.x + threadIdx.x;
150 __shared__ ulonglong pnXY;
152 if (threadIdx.x == 0)
164 ulonglong iDst = pnX * 2 * (pnY / 2 + h) + pnX / 2;
165 dst[iDst + w] = src[tid];
169 //__global__ void cudaKernel_GenWRP(Real* pc_dst, Real* amp_dst, const WRPGpuConst* config, const int n_points_stream, cuDoubleComplex* dst)
171 void cudaKernel_GenWRP(Vertex* pc_dst, const WRPGpuConst* config, const int n_points_stream, cuDoubleComplex* dst)
173 ulonglong tid = blockIdx.x * blockDim.x + threadIdx.x;
175 if (tid < n_points_stream)
177 __shared__ double ppX;
178 __shared__ double ppY;
181 __shared__ double dz;
182 __shared__ double dzz;
183 __shared__ double pi2;
185 __shared__ double lambda;
186 __shared__ bool random_phase;
187 __shared__ bool sign;
189 if (threadIdx.x == 0) {
194 dz = config->wrp_d - config->zmax;
197 lambda = config->lambda;
199 random_phase = config->bRandomPhase;
200 sign = dz > 0.0 ? true : false;
205 double x = pc_dst[tid].point.pos[_X];
206 double y = pc_dst[tid].point.pos[_Y];
207 double z = pc_dst[tid].point.pos[_Z];
208 double amp = pc_dst[tid].color.color[_R + config->iAmplitude];
212 double ppXX = ppX * ppX * 2;
213 //double dz = config->wrp_d - config->zmax;
214 double tw = fabs(lambda * dz / ppXX) * 2;
217 int tx = (int)(x / ppX) + hpnX;
218 int ty = (int)(y / ppY) + hpnY;
223 curand_init(4 * w * w, 0, 0, &state);
226 for (int wy = -w; wy < w; wy++)
228 double dy = wy * ppY;
229 double dyy = dy * dy;
231 int baseY = tmpY * pnX;
233 for (int wx = -w; wx < w; wx++) //WRP coordinate
237 if (tmpX >= 0 && tmpX < pnX && tmpY >= 0 && tmpY < pnY) {
238 int iDst = tmpX + baseY;
240 double dx = wx * ppX;
242 double r = sign ? sqrt(dx * dx + dyy + dzz) : -sqrt(dx * dx + dyy + dzz);
243 double randomData = random_phase ? curand_uniform_double(&state) : 1.0;
244 double randVal = randomData * pi2;
247 tmp.x = (amp * cos(k*r) * cos(randVal)) / r;
248 tmp.y = (-amp * sin(k*r) * sin(randVal)) / r;
250 #if defined(__cplusplus) && defined(__CUDACC__)
251 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
252 dst[iDst].x = atomicAdd(&dst[iDst].x, tmp.x);
253 dst[iDst].y = atomicAdd(&dst[iDst].y, tmp.y);
256 dst[iDst].x += tmp.x; // <-- sync problem
257 dst[iDst].y += tmp.y; // <-- sync problem
260 dst[iDst].x += tmp.x; // <-- sync problem
261 dst[iDst].y += tmp.y; // <-- sync problem
271 void cudaFresnelPropagationWRP(
272 const int &nBlocks, const int&nBlocks2, const int &nThreads, const int &nx, const int &ny,
273 cuDoubleComplex *src, cuDoubleComplex *dst, cufftDoubleComplex *fftsrc, cufftDoubleComplex *fftdst,
274 const WRPGpuConst* cuda_config)
276 cudaKernel_MoveDataPreWRP << <nBlocks, nThreads >> > (src, dst, cuda_config);
278 cudaFFT(nullptr, nx * 2, ny * 2, dst, fftsrc, CUFFT_FORWARD, false);
280 cudaKernel_CalcDataWRP << <nBlocks2, nThreads >> > (fftsrc, cuda_config);
282 cudaFFT(nullptr, nx * 2, ny * 2, fftsrc, fftdst, CUFFT_INVERSE, true);
284 cudaKernel_MoveDataPostWRP << <nBlocks, nThreads >> > (fftdst, src, cuda_config);
288 const int &nBlocks, const int &nThreads, const int &n_pts_per_stream,
289 //Real* cuda_pc_data, Real* cuda_amp_data,
290 Vertex* cuda_pc_data,
291 cuDoubleComplex* cuda_dst, const WRPGpuConst* cuda_config)
293 //cudaKernel_GenWRP << <nBlocks, nThreads >> > (cuda_pc_data, cuda_amp_data, cuda_config, n_pts_per_stream, cuda_dst);
294 cudaKernel_GenWRP << <nBlocks, nThreads >> > (cuda_pc_data, cuda_config, n_pts_per_stream, cuda_dst);
298 #endif // !OphWRPKernel_cu__