Openholo  v4.0
Open Source Digital Holographic Library
ophLFKernel.cu
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
43 //
44 //M*/
45 
46 #ifndef ophLFKernel_cu__
47 #define ophLFKernel_cu__
48 
49 #include "ophKernel.cuh"
50 #include "ophLightField_GPU.h"
51 #include <curand_kernel.h>
52 
53 __global__ void cudaKernel_CalcDataLF(cufftDoubleComplex *src, const LFGpuConst* config)
54 {
55  ulonglong tid = blockIdx.x * blockDim.x + threadIdx.x;
56  __shared__ double s_ppX;
57  __shared__ double s_ppY;
58  __shared__ int s_pnX;
59  __shared__ int s_pnY;
60  __shared__ int s_pnXY;
61  __shared__ double s_ssX;
62  __shared__ double s_ssY;
63  __shared__ double s_z;
64  __shared__ double s_v;
65  __shared__ double s_lambda;
66  __shared__ double s_distance;
67  __shared__ double s_pi2;
68 
69  if (threadIdx.x == 0)
70  {
71  s_ppX = config->ppX;
72  s_ppY = config->ppY;
73  s_pnX = config->pnX;
74  s_pnY = config->pnY;
75  s_pnXY = s_pnX * s_pnY;
76  s_ssX = s_pnX * s_ppX * 2;
77  s_ssY = s_pnY * s_ppY * 2;
78  s_lambda = config->lambda;
79  s_distance = config->distance;
80  s_pi2 = config->pi2;
81  s_z = s_distance * s_pi2;
82  s_v = 1 / (s_lambda * s_lambda);
83  }
84  __syncthreads();
85 
86  if (tid < s_pnXY * 4)
87  {
88  int pnX2 = s_pnX * 2;
89 
90  int w = tid % pnX2;
91  int h = tid / pnX2;
92 
93  double fy = (-s_pnY + h) / s_ssY;
94  double fyy = fy * fy;
95  double fx = (-s_pnX + w) / s_ssX;
96  double fxx = fx * fx;
97  double sqrtpart = sqrt(s_v - fxx - fyy);
98 
99  cuDoubleComplex prop;
100  prop.x = 0;
101  prop.y = s_z * sqrtpart;
102 
103  exponent_complex(&prop);
104 
105  cuDoubleComplex val;
106  val.x = src[tid].x;
107  val.y = src[tid].y;
108 
109  cuDoubleComplex val2 = cuCmul(val, prop);
110 
111  src[tid].x = val2.x;
112  src[tid].y = val2.y;
113  }
114 }
115 
116 __global__ void cudaKernel_MoveDataPostLF(cufftDoubleComplex *src, cuDoubleComplex *dst, const LFGpuConst* config)
117 {
118  ulonglong tid = blockIdx.x * blockDim.x + threadIdx.x;
119 
120  __shared__ int pnX;
121  __shared__ int pnY;
122  __shared__ ulonglong pnXY;
123 
124  if (threadIdx.x == 0)
125  {
126  pnX = config->pnX;
127  pnY = config->pnY;
128  pnXY = pnX * pnY;
129  }
130  __syncthreads();
131 
132  if (tid < pnXY)
133  {
134  int w = tid % pnX;
135  int h = tid / pnX;
136  ulonglong iSrc = pnX * 2 * (pnY / 2 + h) + pnX / 2;
137 
138  dst[tid] = src[iSrc + w];
139  }
140 }
141 
142 __global__ void cudaKernel_MoveDataPreLF(cuDoubleComplex *src, cufftDoubleComplex *dst, const LFGpuConst* config)
143 {
144  ulonglong tid = blockIdx.x * blockDim.x + threadIdx.x;
145 
146  __shared__ int pnX;
147  __shared__ int pnY;
148  __shared__ ulonglong pnXY;
149 
150  if (threadIdx.x == 0)
151  {
152  pnX = config->pnX;
153  pnY = config->pnY;
154  pnXY = pnX * pnY;
155  }
156  __syncthreads();
157 
158  if (tid < pnXY)
159  {
160  int w = tid % pnX;
161  int h = tid / pnX;
162  ulonglong iDst = pnX * 2 * (pnY / 2 + h) + pnX / 2;
163  dst[iDst + w] = src[tid];
164  }
165 }
166 
167 #if false // use constant
168 __global__ void cudaKernel_convertLF2ComplexField(/*const LFGpuConst *config, */uchar1** LF, cufftDoubleComplex* output)
169 {
170  int tid = threadIdx.x + blockIdx.x * blockDim.x;
171 
172  if (tid < img_resolution[0])
173  {
174  int c = tid % img_resolution[1];
175  int r = tid / img_resolution[1];
176  int iWidth = c * channel_info[0] + channel_info[1];
177  int cWidth = (img_resolution[1] * channel_info[0] + 3) & ~3;
178 
179  int src = r * cWidth + iWidth;
180  int dst = (r * img_resolution[1] + c) * img_number[0];
181  for (int k = 0; k < img_number[0]; k++)
182  {
183  output[dst + k] = make_cuDoubleComplex((double)LF[k][src].x, 0);
184  }
185  }
186 #endif
187 
188 __global__ void cudaKernel_convertLF2ComplexField(const LFGpuConst *config, uchar1** LF, cufftDoubleComplex* output)
189 {
190  int tid = threadIdx.x + blockIdx.x * blockDim.x;
191  int rX = config->rX;
192  int rY = config->rY;
193  int nX = config->nX;
194  int nY = config->nY;
195  int N = nX * nY;
196  int R = rX * rY;
197  int nChannel = config->nChannel;
198  int iAmplitude = config->iAmp;
199 
200  if (tid < R)
201  {
202  int c = tid % rX;
203  int r = tid / rX;
204  int iWidth = c * nChannel + iAmplitude;
205  int cWidth = (rX * nChannel + 3) & ~3;
206 
207  int src = r * cWidth + iWidth;
208  int dst = (r * rX + c) * N;
209  for (int k = 0; k < N; k++)
210  {
211  output[dst + k] = make_cuDoubleComplex((double)LF[k][src].x, 0);
212  }
213  }
214 }
215 
216 __global__ void cudaKernel_MultiplyPhase(const LFGpuConst *config, cufftDoubleComplex* in, cufftDoubleComplex* output)
217 {
218  int tid = threadIdx.x + blockIdx.x * blockDim.x;
219 
220  __shared__ double s_pi2;
221  __shared__ int s_R;
222  __shared__ int s_N;
223  __shared__ int s_rX;
224  __shared__ int s_rY;
225  __shared__ int s_nX;
226  __shared__ int s_nY;
227  __shared__ bool s_bRandomPhase;
228  __shared__ int s_iAmp;
229 
230  if (threadIdx.x == 0)
231  {
232  s_pi2 = config->pi2;
233  s_rX = config->rX;
234  s_rY = config->rY;
235  s_nX = config->nX;
236  s_nY = config->nY;
237  s_N = s_nX * s_nY;
238  s_R = s_rX * s_rY;
239  s_bRandomPhase = config->randomPhase;
240  s_iAmp = config->iAmp;
241  }
242 
243 
244  __syncthreads();
245 
246  if (tid < s_R) {
247 
248  int c = tid % s_rX;
249  int r = tid / s_rX;
250  curandState state;
251  if (s_bRandomPhase)
252  {
253  curand_init(s_N * s_R * (s_iAmp + 1), 0, 0, &state);
254  }
255 
256  int src = (r * s_rX + c) * s_N;
257  int dst = c * s_nX + r * s_rX * s_N;
258 
259  for (int n = 0; n < s_N; n++)
260  {
261  double randomData = s_bRandomPhase ? curand_uniform_double(&state) : 1.0;
262 
263  cufftDoubleComplex phase = make_cuDoubleComplex(0, randomData * s_pi2);
264  exponent_complex(&phase);
265 
266  cufftDoubleComplex val = in[src + n];
267  int cc = n % s_nX; // 0 ~ 9
268  int rr = n / s_nX; // 0 ~ 9
269  output[dst + cc + rr * s_nX * s_rX] = cuCmul(val, phase);
270  }
271  }
272 }
273 
274 
275 extern "C"
276 {
277  void cudaConvertLF2ComplexField_Kernel(CUstream_st* stream, const int &nBlocks, const int &nThreads, const LFGpuConst *config, uchar1** LF, cufftDoubleComplex* output)
278  {
279  //cudaKernel_convertLF2ComplexField << <nBlocks, nThreads, 0, stream >> > (config, LF, output);
280  cudaKernel_convertLF2ComplexField << < nBlocks, nThreads >> > (config, LF, output);
281 
282  if (cudaDeviceSynchronize() != cudaSuccess)
283  return;
284  }
285 
286  void cudaFFT_LF(cufftHandle *plan, CUstream_st* stream, const int &nBlocks, const int &nThreads, const int &nx, const int &ny, cufftDoubleComplex* in_field, cufftDoubleComplex* output_field, const int &direction)
287  {
288  //cudaFFT(nullptr, nx, ny, in_field, output_field, CUFFT_FORWARD, false);
289  int N = nx * ny;
290 
291  //fftShift << <nBlocks, nThreads, 0, stream >> > (N, nx, ny, in_field, output_field, false);
292  fftShift << < nBlocks, nThreads >> > (N, nx, ny, in_field, output_field, false);
293 
294  cufftResult result;
295  if (direction == -1)
296  result = cufftExecZ2Z(*plan, output_field, in_field, CUFFT_FORWARD);
297  else
298  result = cufftExecZ2Z(*plan, output_field, in_field, CUFFT_INVERSE);
299 
300  if (result != CUFFT_SUCCESS)
301  return;
302 
303  if (cudaDeviceSynchronize() != cudaSuccess)
304  return;
305 
306  //fftShift << < nBlocks, nThreads, 0, stream >> > (N, nx, ny, in_field, output_field, false);
307  fftShift << < nBlocks, nThreads >> > (N, nx, ny, in_field, output_field, false);
308  }
309 
310  void procMultiplyPhase(CUstream_st* stream, const int &nBlocks, const int &nThreads, const LFGpuConst *config, cufftDoubleComplex* in, cufftDoubleComplex* out)
311  {
312  //cudaKernel_MultiplyPhase << <nBlocks, nThreads, 0, stream >> > (config, in, output);
313  cudaKernel_MultiplyPhase << <nBlocks, nThreads >> > (config, in, out);
314 
315  if (cudaDeviceSynchronize() != cudaSuccess)
316  return;
317  }
318 
319  void cudaFresnelPropagationLF(
320  const int &nBlocks, const int&nBlocks2, const int &nThreads, const int &nx, const int &ny,
321  cufftDoubleComplex *src, cufftDoubleComplex *tmp, cufftDoubleComplex *tmp2, cufftDoubleComplex *dst,
322  const LFGpuConst* cuda_config)
323  {
324  cudaError_t error;
325  cudaKernel_MoveDataPreLF << <nBlocks, nThreads >> > (src, tmp, cuda_config);
326  error = cudaDeviceSynchronize();
327  if (error != cudaSuccess)
328  {
329  LOG("cudaDeviceSynchronize(%d) : Failed\n", __LINE__);
330  }
331  cudaFFT(nullptr, nx * 2, ny * 2, tmp, tmp2, CUFFT_FORWARD, false);
332 
333  cudaKernel_CalcDataLF << <nBlocks2, nThreads >> > (tmp2, cuda_config);
334  error = cudaDeviceSynchronize();
335  if (error != cudaSuccess)
336  {
337  LOG("cudaDeviceSynchronize(%d) : Failed\n", __LINE__);
338  }
339  cudaFFT(nullptr, nx * 2, ny * 2, tmp2, tmp, CUFFT_INVERSE, true);
340 
341  cudaKernel_MoveDataPostLF << <nBlocks, nThreads >> > (tmp, dst, cuda_config);
342  error = cudaDeviceSynchronize();
343  if (error != cudaSuccess)
344  {
345  LOG("cudaDeviceSynchronize(%d) : Failed\n", __LINE__);
346  }
347  }
348 }
349 
350 #endif // !ophLFKernel_cu__