1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
10 // For Open Source Digital Holographic Library
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
45 #ifndef ophDMKernel_cu__
46 #define ophDMKernel_cu__
48 #include "ophDepthMap_GPU.h"
49 #include "ophKernel.cuh"
55 void cudaKernel_double_get_kernel(cufftDoubleComplex* u_o_gpu, unsigned char* img_src_gpu, unsigned char* dimg_src_gpu, double* depth_index_gpu,
56 int dtr, cuDoubleComplex rand_phase, cuDoubleComplex carrier_phase_delay, int pnx, int pny,
57 int change_depth_quantization, unsigned int default_depth_quantization)
59 int tid = threadIdx.x + blockIdx.x * blockDim.x;
63 double img = ((double)img_src_gpu[tid]) / 255.0;
65 if (change_depth_quantization == 1)
66 depth_idx = depth_index_gpu[tid];
68 depth_idx = (double)default_depth_quantization - (double)dimg_src_gpu[tid];
70 double alpha_map = ((double)img_src_gpu[tid] > 0.0 ? 1.0 : 0.0);
72 u_o_gpu[tid].x = img * alpha_map * (depth_idx == (double)dtr ? 1.0 : 0.0);
74 cuDoubleComplex tmp1 = cuCmul(rand_phase, carrier_phase_delay);
75 u_o_gpu[tid] = cuCmul(u_o_gpu[tid], tmp1);
81 void cudaKernel_single_get_kernel(cufftDoubleComplex* u_o_gpu, unsigned char* img_src_gpu, unsigned char* dimg_src_gpu, double* depth_index_gpu,
82 int dtr, cuComplex rand_phase, cuComplex carrier_phase_delay, int pnx, int pny,
83 int change_depth_quantization, unsigned int default_depth_quantization)
85 int tid = threadIdx.x + blockIdx.x * blockDim.x;
87 if (tid < pnx * pny) {
89 float img = ((float)img_src_gpu[tid]) / 255.0f;
91 if (change_depth_quantization == 1)
92 depth_idx = depth_index_gpu[tid];
94 depth_idx = (float)default_depth_quantization - (float)dimg_src_gpu[tid];
96 float alpha_map = ((float)img_src_gpu[tid] > 0.0f ? 1.0f : 0.0f);
98 u_o_gpu[tid].x = img * alpha_map * (depth_idx == (float)dtr ? 1.0f : 0.0f);
100 cuComplex tmp1 = cuCmulf(rand_phase, carrier_phase_delay);
102 u_o_gpu[tid].x = (u_o_gpu[tid].x * tmp1.x) - (u_o_gpu[tid].y * tmp1.y);
103 u_o_gpu[tid].y = (u_o_gpu[tid].x * tmp1.y) + (u_o_gpu[tid].y * tmp1.x);
109 void cudaKernel_single_ASM_Propagation(cufftDoubleComplex* input_d, cufftDoubleComplex* u_complex, const DMKernelConfig* config, double propagation_dist)
111 int tid = threadIdx.x + blockIdx.x * blockDim.x;
112 if (tid < config->pn_X * config->pn_Y)
114 int x = tid % config->pn_X;
115 int y = tid / config->pn_X;
117 float fxx = (-1.0f / (2.0f * config->pp_X)) + (1.0f / config->ss_X) * (float)x;
118 float fyy = (1.0f / (2.0f * config->pp_Y)) - (1.0f / config->ss_Y) - (1.0f / config->ss_Y) * (float)y;
121 float sval = sqrtf(1 - (config->lambda * fxx) * (config->lambda * fxx) -
122 (config->lambda * fyy) * (config->lambda * fyy));
123 sval *= config->k * propagation_dist;
125 int prop_mask = ((fxx * fxx + fyy * fyy) < (config->k * config->k)) ? 1 : 0;
127 cuDoubleComplex kernel = make_cuDoubleComplex(0, sval);
128 exponent_complex(&kernel);
130 cuDoubleComplex u_frequency = make_cuDoubleComplex(0, 0);
132 u_frequency = cuCmul(kernel, input_d[tid]);
134 u_complex[tid] = cuCadd(u_complex[tid], u_frequency);
140 void cudaKernel_double_ASM_Propagation(cufftDoubleComplex* input_d, cufftDoubleComplex* u_complex, const DMKernelConfig* config, double propagation_dist)
142 int tid = threadIdx.x + blockIdx.x * blockDim.x;
143 if (tid < config->pn_X * config->pn_Y)
145 int x = tid % config->pn_X;
146 int y = tid / config->pn_X;
148 double fxx = (-1.0 / (2.0 * config->pp_X)) + (1.0 / config->ss_X) * (double)x;
149 double fyy = (1.0 / (2.0 * config->pp_Y)) - (1.0 / config->ss_Y) - (1.0 / config->ss_Y) * (double)y;
152 double sval = sqrt(1 - (config->lambda * fxx) * (config->lambda * fxx) -
153 (config->lambda * fyy) * (config->lambda * fyy));
154 sval *= config->k * propagation_dist;
156 int prop_mask = ((fxx * fxx + fyy * fyy) < (config->k * config->k)) ? 1 : 0;
158 cuDoubleComplex kernel = make_cuDoubleComplex(0, sval);
159 exponent_complex(&kernel);
161 cuDoubleComplex u_frequency = make_cuDoubleComplex(0, 0);
163 u_frequency = cuCmul(kernel, input_d[tid]);
165 u_complex[tid] = cuCadd(u_complex[tid], u_frequency);
171 void cropFringe(int nx, int ny, cufftDoubleComplex* in_filed, cufftDoubleComplex* out_filed, int cropx1, int cropx2, int cropy1, int cropy2)
173 __shared__ int s_pnX, s_pnY, s_cropx1, s_cropx2, s_cropy1, s_cropy2;
175 if (threadIdx.x == 0)
186 int tid = threadIdx.x + blockIdx.x * blockDim.x;
188 if (tid < s_pnX * s_pnY)
193 if (x >= s_cropx1 && x <= s_cropx2 && y >= s_cropy1 && y <= s_cropy2)
194 out_filed[tid] = in_filed[tid];
200 void getFringe(int nx, int ny, cufftDoubleComplex* in_filed, cufftDoubleComplex* out_filed, int sig_locationx, int sig_locationy,
201 double ssx, double ssy, double ppx, double ppy, double pi)
203 int tid = threadIdx.x + blockIdx.x*blockDim.x;
207 cuDoubleComplex shift_phase = make_cuDoubleComplex(1, 0);
209 if (sig_locationy != 0)
212 double yy = (ssy / 2.0) - (ppy)*(double)r - ppy;
214 cuDoubleComplex val = make_cuDoubleComplex(0, 0);
215 if (sig_locationy == 1)
216 val.y = 2.0 * pi * (yy / (4.0 * ppy));
218 val.y = 2.0 * pi * (-yy / (4.0 * ppy));
220 exponent_complex(&val);
222 shift_phase = cuCmul(shift_phase, val);
225 if (sig_locationx != 0)
228 double xx = (-ssx / 2.0) - (ppx)*(double)c - ppx;
230 cuDoubleComplex val = make_cuDoubleComplex(0, 0);
231 if (sig_locationx == -1)
232 val.y = 2.0 * pi * (-xx / (4.0 * ppx));
234 val.y = 2.0 * pi * (xx / (4.0 * ppx));
236 exponent_complex(&val);
237 shift_phase = cuCmul(shift_phase, val);
240 out_filed[tid] = cuCmul(in_filed[tid], shift_phase);
246 void change_depth_quan_kernel(double* depth_index_gpu, unsigned char* dimg_src_gpu, int pnx, int pny,
247 int dtr, double d1, double d2, double num_depth, double far_depth, double near_depth)
249 int tid = threadIdx.x + blockIdx.x * blockDim.x;
251 if (tid < pnx * pny) {
254 double dmap_src = double(dimg_src_gpu[tid]) / 255.0;
255 double dmap = (1.0 - dmap_src)*(far_depth - near_depth) + near_depth;
257 if (dtr < num_depth - 1)
258 tdepth = (dmap >= d1 ? 1 : 0) * (dmap < d2 ? 1 : 0);
260 tdepth = (dmap >= d1 ? 1 : 0) * (dmap <= d2 ? 1 : 0);
262 depth_index_gpu[tid] = depth_index_gpu[tid] + (double)(tdepth * (dtr + 1));
268 void cudaDepthHoloKernel(CUstream_st* stream, int pnx, int pny, cufftDoubleComplex* u_o_gpu, unsigned char* img_src_gpu, unsigned char* dimg_src_gpu, double* depth_index_gpu,
269 int dtr, cuDoubleComplex rand_phase_val, cuDoubleComplex carrier_phase_delay, int flag_change_depth_quan, unsigned int default_depth_quan, const unsigned int& mode)
271 dim3 grid((pnx * pny + kBlockThreads - 1) / kBlockThreads, 1, 1);
273 if (mode & MODE_FLOAT)
275 //if (mode & MODE_FASTMATH)
276 // //cudaKernel_single_FastMath_RS_Diffraction << < nBlocks, nThreads >> > (iChannel, cuda_vertex_data, cuda_config, n_pts_per_stream, cuda_dst);
278 cudaKernel_single_get_kernel << <grid, kBlockThreads, 0, stream >> > (u_o_gpu, img_src_gpu, dimg_src_gpu, depth_index_gpu,
279 dtr, make_cuComplex((float)rand_phase_val.x, (float)rand_phase_val.y),
280 make_cuComplex((float)carrier_phase_delay.x, (float)carrier_phase_delay.y), pnx, pny, flag_change_depth_quan, default_depth_quan);
284 //if (mode & MODE_FASTMATH)
285 // //cudaKernel_double_FastMath_RS_Diffraction << < nBlocks, nThreads >> > (iChannel, cuda_vertex_data, cuda_config, n_pts_per_stream, cuda_dst);
287 cudaKernel_double_get_kernel << <grid, kBlockThreads, 0, stream >> > (u_o_gpu, img_src_gpu, dimg_src_gpu, depth_index_gpu,
288 dtr, rand_phase_val, carrier_phase_delay, pnx, pny, flag_change_depth_quan, default_depth_quan);
294 void cudaPropagation_AngularSpKernel(
295 const int& nBlocks, const int& nThreads,
296 CUstream_st* stream, cufftDoubleComplex* input_d, cufftDoubleComplex* u_complex,
297 const DMKernelConfig*cuda_config, double propagation_dist)
299 cudaKernel_double_ASM_Propagation << <nBlocks, nThreads >> > (input_d, u_complex, cuda_config, propagation_dist);
302 void cudaCropFringe(CUstream_st* stream, int nx, int ny, cufftDoubleComplex* in_field, cufftDoubleComplex* out_field, int cropx1, int cropx2, int cropy1, int cropy2)
304 unsigned int nblocks = (nx * ny + kBlockThreads - 1) / kBlockThreads;
306 cropFringe << < nblocks, kBlockThreads, 0, stream >> > (nx, ny, in_field, out_field, cropx1, cropx2, cropy1, cropy2);
309 void cudaGetFringe(CUstream_st* stream, int pnx, int pny, cufftDoubleComplex* in_field, cufftDoubleComplex* out_field, int sig_locationx, int sig_locationy,
310 double ssx, double ssy, double ppx, double ppy, double PI)
312 unsigned int nblocks = (pnx * pny + kBlockThreads - 1) / kBlockThreads;
314 getFringe << < nblocks, kBlockThreads, 0, stream >> > (pnx, pny, in_field, out_field, sig_locationx, sig_locationy, ssx, ssy, ppx, ppy, PI);
317 void cudaChangeDepthQuanKernel(CUstream_st* stream, int pnx, int pny, double* depth_index_gpu, unsigned char* dimg_src_gpu,
318 int dtr, double d1, double d2, double params_num_of_depth, double params_far_depthmap, double params_near_depthmap)
320 dim3 grid((pnx * pny + kBlockThreads - 1) / kBlockThreads, 1, 1);
321 change_depth_quan_kernel << <grid, kBlockThreads, 0, stream >> > (depth_index_gpu, dimg_src_gpu, pnx, pny,
322 dtr, d1, d2, params_num_of_depth, params_far_depthmap, params_near_depthmap);
326 #endif // !ophDMKernel_cu__