Openholo  v4.0
Open Source Digital Holographic Library
ophDMKernel.cu
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
43 //
44 //M*/
45 #ifndef ophDMKernel_cu__
46 #define ophDMKernel_cu__
47 
48 #include "ophDepthMap_GPU.h"
49 #include "ophKernel.cuh"
50 #include "typedef.h"
51 #include <define.h>
52 
53 
54 __global__
55 void cudaKernel_double_get_kernel(cufftDoubleComplex* u_o_gpu, unsigned char* img_src_gpu, unsigned char* dimg_src_gpu, double* depth_index_gpu,
56  int dtr, cuDoubleComplex rand_phase, cuDoubleComplex carrier_phase_delay, int pnx, int pny,
57  int change_depth_quantization, unsigned int default_depth_quantization)
58 {
59  int tid = threadIdx.x + blockIdx.x * blockDim.x;
60 
61  if (tid < pnx*pny) {
62 
63  double img = ((double)img_src_gpu[tid]) / 255.0;
64  double depth_idx;
65  if (change_depth_quantization == 1)
66  depth_idx = depth_index_gpu[tid];
67  else
68  depth_idx = (double)default_depth_quantization - (double)dimg_src_gpu[tid];
69 
70  double alpha_map = ((double)img_src_gpu[tid] > 0.0 ? 1.0 : 0.0);
71 
72  u_o_gpu[tid].x = img * alpha_map * (depth_idx == (double)dtr ? 1.0 : 0.0);
73 
74  cuDoubleComplex tmp1 = cuCmul(rand_phase, carrier_phase_delay);
75  u_o_gpu[tid] = cuCmul(u_o_gpu[tid], tmp1);
76  }
77 }
78 
79 
80 __global__
81 void cudaKernel_single_get_kernel(cufftDoubleComplex* u_o_gpu, unsigned char* img_src_gpu, unsigned char* dimg_src_gpu, double* depth_index_gpu,
82  int dtr, cuComplex rand_phase, cuComplex carrier_phase_delay, int pnx, int pny,
83  int change_depth_quantization, unsigned int default_depth_quantization)
84 {
85  int tid = threadIdx.x + blockIdx.x * blockDim.x;
86 
87  if (tid < pnx * pny) {
88 
89  float img = ((float)img_src_gpu[tid]) / 255.0f;
90  float depth_idx;
91  if (change_depth_quantization == 1)
92  depth_idx = depth_index_gpu[tid];
93  else
94  depth_idx = (float)default_depth_quantization - (float)dimg_src_gpu[tid];
95 
96  float alpha_map = ((float)img_src_gpu[tid] > 0.0f ? 1.0f : 0.0f);
97 
98  u_o_gpu[tid].x = img * alpha_map * (depth_idx == (float)dtr ? 1.0f : 0.0f);
99 
100  cuComplex tmp1 = cuCmulf(rand_phase, carrier_phase_delay);
101 
102  u_o_gpu[tid].x = (u_o_gpu[tid].x * tmp1.x) - (u_o_gpu[tid].y * tmp1.y);
103  u_o_gpu[tid].y = (u_o_gpu[tid].x * tmp1.y) + (u_o_gpu[tid].y * tmp1.x);
104  }
105 }
106 
107 
108 __global__
109 void cudaKernel_single_ASM_Propagation(cufftDoubleComplex* input_d, cufftDoubleComplex* u_complex, const DMKernelConfig* config, double propagation_dist)
110 {
111  int tid = threadIdx.x + blockIdx.x * blockDim.x;
112  if (tid < config->pn_X * config->pn_Y)
113  {
114  int x = tid % config->pn_X;
115  int y = tid / config->pn_X;
116 
117  float fxx = (-1.0f / (2.0f * config->pp_X)) + (1.0f / config->ss_X) * (float)x;
118  float fyy = (1.0f / (2.0f * config->pp_Y)) - (1.0f / config->ss_Y) - (1.0f / config->ss_Y) * (float)y;
119 
120 
121  float sval = sqrtf(1 - (config->lambda * fxx) * (config->lambda * fxx) -
122  (config->lambda * fyy) * (config->lambda * fyy));
123  sval *= config->k * propagation_dist;
124 
125  int prop_mask = ((fxx * fxx + fyy * fyy) < (config->k * config->k)) ? 1 : 0;
126 
127  cuDoubleComplex kernel = make_cuDoubleComplex(0, sval);
128  exponent_complex(&kernel);
129 
130  cuDoubleComplex u_frequency = make_cuDoubleComplex(0, 0);
131  if (prop_mask == 1)
132  u_frequency = cuCmul(kernel, input_d[tid]);
133 
134  u_complex[tid] = cuCadd(u_complex[tid], u_frequency);
135  }
136 }
137 
138 
139 __global__
140 void cudaKernel_double_ASM_Propagation(cufftDoubleComplex* input_d, cufftDoubleComplex* u_complex, const DMKernelConfig* config, double propagation_dist)
141 {
142  int tid = threadIdx.x + blockIdx.x * blockDim.x;
143  if (tid < config->pn_X * config->pn_Y)
144  {
145  int x = tid % config->pn_X;
146  int y = tid / config->pn_X;
147 
148  double fxx = (-1.0 / (2.0 * config->pp_X)) + (1.0 / config->ss_X) * (double)x;
149  double fyy = (1.0 / (2.0 * config->pp_Y)) - (1.0 / config->ss_Y) - (1.0 / config->ss_Y) * (double)y;
150 
151 
152  double sval = sqrt(1 - (config->lambda * fxx) * (config->lambda * fxx) -
153  (config->lambda * fyy) * (config->lambda * fyy));
154  sval *= config->k * propagation_dist;
155 
156  int prop_mask = ((fxx * fxx + fyy * fyy) < (config->k * config->k)) ? 1 : 0;
157 
158  cuDoubleComplex kernel = make_cuDoubleComplex(0, sval);
159  exponent_complex(&kernel);
160 
161  cuDoubleComplex u_frequency = make_cuDoubleComplex(0, 0);
162  if (prop_mask == 1)
163  u_frequency = cuCmul(kernel, input_d[tid]);
164 
165  u_complex[tid] = cuCadd(u_complex[tid], u_frequency);
166  }
167 }
168 
169 
170 __global__
171 void cropFringe(int nx, int ny, cufftDoubleComplex* in_filed, cufftDoubleComplex* out_filed, int cropx1, int cropx2, int cropy1, int cropy2)
172 {
173  __shared__ int s_pnX, s_pnY, s_cropx1, s_cropx2, s_cropy1, s_cropy2;
174 
175  if (threadIdx.x == 0)
176  {
177  s_pnX = nx;
178  s_pnY = ny;
179  s_cropx1 = cropx1;
180  s_cropx2 = cropx2;
181  s_cropy1 = cropy1;
182  s_cropy2 = cropy2;
183  }
184  __syncthreads();
185 
186  int tid = threadIdx.x + blockIdx.x * blockDim.x;
187 
188  if (tid < s_pnX * s_pnY)
189  {
190  int x = tid % s_pnX;
191  int y = tid / s_pnX;
192 
193  if (x >= s_cropx1 && x <= s_cropx2 && y >= s_cropy1 && y <= s_cropy2)
194  out_filed[tid] = in_filed[tid];
195  }
196 }
197 
198 
199 __global__
200 void getFringe(int nx, int ny, cufftDoubleComplex* in_filed, cufftDoubleComplex* out_filed, int sig_locationx, int sig_locationy,
201  double ssx, double ssy, double ppx, double ppy, double pi)
202 {
203  int tid = threadIdx.x + blockIdx.x*blockDim.x;
204 
205  if (tid < nx*ny)
206  {
207  cuDoubleComplex shift_phase = make_cuDoubleComplex(1, 0);
208 
209  if (sig_locationy != 0)
210  {
211  int r = tid / nx;
212  double yy = (ssy / 2.0) - (ppy)*(double)r - ppy;
213 
214  cuDoubleComplex val = make_cuDoubleComplex(0, 0);
215  if (sig_locationy == 1)
216  val.y = 2.0 * pi * (yy / (4.0 * ppy));
217  else
218  val.y = 2.0 * pi * (-yy / (4.0 * ppy));
219 
220  exponent_complex(&val);
221 
222  shift_phase = cuCmul(shift_phase, val);
223  }
224 
225  if (sig_locationx != 0)
226  {
227  int c = tid % nx;
228  double xx = (-ssx / 2.0) - (ppx)*(double)c - ppx;
229 
230  cuDoubleComplex val = make_cuDoubleComplex(0, 0);
231  if (sig_locationx == -1)
232  val.y = 2.0 * pi * (-xx / (4.0 * ppx));
233  else
234  val.y = 2.0 * pi * (xx / (4.0 * ppx));
235 
236  exponent_complex(&val);
237  shift_phase = cuCmul(shift_phase, val);
238  }
239 
240  out_filed[tid] = cuCmul(in_filed[tid], shift_phase);
241  }
242 
243 }
244 
245 __global__
246 void change_depth_quan_kernel(double* depth_index_gpu, unsigned char* dimg_src_gpu, int pnx, int pny,
247  int dtr, double d1, double d2, double num_depth, double far_depth, double near_depth)
248 {
249  int tid = threadIdx.x + blockIdx.x * blockDim.x;
250 
251  if (tid < pnx * pny) {
252 
253  int tdepth;
254  double dmap_src = double(dimg_src_gpu[tid]) / 255.0;
255  double dmap = (1.0 - dmap_src)*(far_depth - near_depth) + near_depth;
256 
257  if (dtr < num_depth - 1)
258  tdepth = (dmap >= d1 ? 1 : 0) * (dmap < d2 ? 1 : 0);
259  else
260  tdepth = (dmap >= d1 ? 1 : 0) * (dmap <= d2 ? 1 : 0);
261 
262  depth_index_gpu[tid] = depth_index_gpu[tid] + (double)(tdepth * (dtr + 1));
263  }
264 }
265 
266 extern "C"
267 {
268  void cudaDepthHoloKernel(CUstream_st* stream, int pnx, int pny, cufftDoubleComplex* u_o_gpu, unsigned char* img_src_gpu, unsigned char* dimg_src_gpu, double* depth_index_gpu,
269  int dtr, cuDoubleComplex rand_phase_val, cuDoubleComplex carrier_phase_delay, int flag_change_depth_quan, unsigned int default_depth_quan, const unsigned int& mode)
270  {
271  dim3 grid((pnx * pny + kBlockThreads - 1) / kBlockThreads, 1, 1);
272 
273  if (mode & MODE_FLOAT)
274  {
275  //if (mode & MODE_FASTMATH)
276  // //cudaKernel_single_FastMath_RS_Diffraction << < nBlocks, nThreads >> > (iChannel, cuda_vertex_data, cuda_config, n_pts_per_stream, cuda_dst);
277  //else
278  cudaKernel_single_get_kernel << <grid, kBlockThreads, 0, stream >> > (u_o_gpu, img_src_gpu, dimg_src_gpu, depth_index_gpu,
279  dtr, make_cuComplex((float)rand_phase_val.x, (float)rand_phase_val.y),
280  make_cuComplex((float)carrier_phase_delay.x, (float)carrier_phase_delay.y), pnx, pny, flag_change_depth_quan, default_depth_quan);
281  }
282  else
283  {
284  //if (mode & MODE_FASTMATH)
285  // //cudaKernel_double_FastMath_RS_Diffraction << < nBlocks, nThreads >> > (iChannel, cuda_vertex_data, cuda_config, n_pts_per_stream, cuda_dst);
286  //else
287  cudaKernel_double_get_kernel << <grid, kBlockThreads, 0, stream >> > (u_o_gpu, img_src_gpu, dimg_src_gpu, depth_index_gpu,
288  dtr, rand_phase_val, carrier_phase_delay, pnx, pny, flag_change_depth_quan, default_depth_quan);
289  }
290 
291  }
292 
293 
294  void cudaPropagation_AngularSpKernel(
295  const int& nBlocks, const int& nThreads,
296  CUstream_st* stream, cufftDoubleComplex* input_d, cufftDoubleComplex* u_complex,
297  const DMKernelConfig*cuda_config, double propagation_dist)
298  {
299  cudaKernel_double_ASM_Propagation << <nBlocks, nThreads >> > (input_d, u_complex, cuda_config, propagation_dist);
300  }
301 
302  void cudaCropFringe(CUstream_st* stream, int nx, int ny, cufftDoubleComplex* in_field, cufftDoubleComplex* out_field, int cropx1, int cropx2, int cropy1, int cropy2)
303  {
304  unsigned int nblocks = (nx * ny + kBlockThreads - 1) / kBlockThreads;
305 
306  cropFringe << < nblocks, kBlockThreads, 0, stream >> > (nx, ny, in_field, out_field, cropx1, cropx2, cropy1, cropy2);
307  }
308 
309  void cudaGetFringe(CUstream_st* stream, int pnx, int pny, cufftDoubleComplex* in_field, cufftDoubleComplex* out_field, int sig_locationx, int sig_locationy,
310  double ssx, double ssy, double ppx, double ppy, double PI)
311  {
312  unsigned int nblocks = (pnx * pny + kBlockThreads - 1) / kBlockThreads;
313 
314  getFringe << < nblocks, kBlockThreads, 0, stream >> > (pnx, pny, in_field, out_field, sig_locationx, sig_locationy, ssx, ssy, ppx, ppy, PI);
315  }
316 
317  void cudaChangeDepthQuanKernel(CUstream_st* stream, int pnx, int pny, double* depth_index_gpu, unsigned char* dimg_src_gpu,
318  int dtr, double d1, double d2, double params_num_of_depth, double params_far_depthmap, double params_near_depthmap)
319  {
320  dim3 grid((pnx * pny + kBlockThreads - 1) / kBlockThreads, 1, 1);
321  change_depth_quan_kernel << <grid, kBlockThreads, 0, stream >> > (depth_index_gpu, dimg_src_gpu, pnx, pny,
322  dtr, d1, d2, params_num_of_depth, params_far_depthmap, params_near_depthmap);
323  }
324 }
325 
326 #endif // !ophDMKernel_cu__