Openholo  v4.0
Open Source Digital Holographic Library
ophDepthMap_GPU.cpp
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
43 //
44 //M*/
45 
46 #include "ophDepthMap.h"
47 #include "ophDepthMap_GPU.h"
48 #include <cuComplex.h>
49 #include <sys.h>
50 #include "CUDA.h"
51 
52 extern "C"
53 {
66  void cudaFFT(CUstream_st* stream, int nx, int ny, cufftDoubleComplex* in_filed, cufftDoubleComplex* output_field, int direction, bool bNormalized);
67 }
68 
69 using namespace oph;
70 
71 void ophDepthMap::initGPU()
72 {
73  const int pnX = context_.pixel_number[_X];
74  const int pnY = context_.pixel_number[_Y];
75  const int N = pnX * pnY;
76 
77  dlevel.clear();
78 
79  if (!stream_)
80  cudaStreamCreate(&stream_);
81 
82  if (img_src_gpu) cudaFree(img_src_gpu);
83  HANDLE_ERROR(cudaMalloc((void**)&img_src_gpu, sizeof(uchar1)*N));
84 
85  if (dimg_src_gpu) cudaFree(dimg_src_gpu);
86  HANDLE_ERROR(cudaMalloc((void**)&dimg_src_gpu, sizeof(uchar1)*N));
87 
88  if (depth_index_gpu) cudaFree(depth_index_gpu);
89  if (dm_config_.change_depth_quantization == 1)
90  HANDLE_ERROR(cudaMalloc((void**)&depth_index_gpu, sizeof(Real)*N));
91 
92  if (u_o_gpu_) cudaFree(u_o_gpu_);
93  if (u_complex_gpu_) cudaFree(u_complex_gpu_);
94 
95  HANDLE_ERROR(cudaMalloc((void**)&u_o_gpu_, sizeof(cufftDoubleComplex)*N));
96  HANDLE_ERROR(cudaMalloc((void**)&u_complex_gpu_, sizeof(cufftDoubleComplex)*N));
97 
98  if (k_temp_d_) cudaFree(k_temp_d_);
99  HANDLE_ERROR(cudaMalloc((void**)&k_temp_d_, sizeof(cufftDoubleComplex)*N));
100 }
101 
102 bool ophDepthMap::prepareInputdataGPU()
103 {
104  auto begin = CUR_TIME;
105  const int N = context_.pixel_number[_X] * context_.pixel_number[_Y];
106 
107  // 2022-09-23
108  if (depth_img == nullptr) // not used depth
109  {
110  depth_img = new uchar[N];
111  memset(depth_img, 0, N);
112  }
113  HANDLE_ERROR(cudaMemcpyAsync(dimg_src_gpu, depth_img, sizeof(uchar1) * N, cudaMemcpyHostToDevice, stream_));
114 
115  LOG("%s : %.5lf (sec)\n", __FUNCTION__, ELAPSED_TIME(begin, CUR_TIME));
116  return true;
117 }
118 
119 void ophDepthMap::changeDepthQuanGPU()
120 {
121  const int pnX = context_.pixel_number[_X];
122  const int pnY = context_.pixel_number[_Y];
123  const int N = pnX * pnY;
124 
125  HANDLE_ERROR(cudaMemsetAsync(depth_index_gpu, 0, sizeof(Real) * N, stream_));
126 
127  for (uint dtr = 0; dtr < dm_config_.num_of_depth; dtr++)
128  {
129  Real temp_depth = dlevel[dtr];
130  Real d1 = temp_depth - dstep / 2.0;
131  Real d2 = temp_depth + dstep / 2.0;
132 
133  cudaChangeDepthQuanKernel(stream_, pnX, pnY, depth_index_gpu, dimg_src_gpu,
134  dtr, d1, d2, dm_config_.num_of_depth, dm_config_.far_depthmap, dm_config_.near_depthmap);
135  }
136 }
137 
138 void ophDepthMap::calcHoloGPU()
139 {
140  auto begin = CUR_TIME;
141 
142  if (!stream_)
143  cudaStreamCreate(&stream_);
144 
145  const int pnX = context_.pixel_number[_X];
146  const int pnY = context_.pixel_number[_Y];
147  const Real ppX = context_.pixel_pitch[_X];
148  const Real ppY = context_.pixel_pitch[_Y];
149  const Real ssX = context_.ss[_X] = pnX * ppX;
150  const Real ssY = context_.ss[_Y] = pnY * ppY;
151  const uint N = pnX * pnY;
152  const uint nChannel = context_.waveNum;
153 
154  size_t depth_sz = dm_config_.render_depth.size();
155 
156  const bool bRandomPhase = GetRandomPhase();
157  DMKernelConfig* device_config = nullptr;
158  HANDLE_ERROR(cudaMalloc((void**)&device_config, sizeof(DMKernelConfig)));
159 
160 
161  int blockSize = CUDA::getInstance()->getMaxThreads() >> 1; //n_threads // blockSize < devProp.maxThreadsPerBlock
162  ulonglong gridSize = (N + blockSize - 1) / blockSize; //n_blocks
163 
164  vector<double>* pSrc = is_ViewingWindow ? &dlevel_transform : &dlevel;
165 
166  for (uint ch = 0; ch < nChannel; ch++)
167  {
168  HANDLE_ERROR(cudaMemsetAsync(u_complex_gpu_, 0, sizeof(cufftDoubleComplex) * N, stream_));
169  HANDLE_ERROR(cudaMemcpyAsync(img_src_gpu, m_vecRGB[ch], sizeof(uchar1) * N, cudaMemcpyHostToDevice, stream_));
170  Real lambda = context_.wave_length[ch];
171  Real k = context_.k = (2 * M_PI / lambda);
172 
173  DMKernelConfig* host_config = new DMKernelConfig(
174  context_.pixel_number,
175  context_.pixel_pitch,
176  context_.ss,
177  context_.k,
178  context_.wave_length[ch]
179  );
180 
181  HANDLE_ERROR(cudaMemcpyAsync(device_config, host_config, sizeof(DMKernelConfig), cudaMemcpyHostToDevice));
182 
183 
184  for (size_t p = 0; p < depth_sz; ++p)
185  {
186  Complex<Real> randPhase;
187  cuDoubleComplex rand_phase, carrier_phase_delay;
188  GetRandomPhaseValue(randPhase, bRandomPhase);
189  memcpy(&rand_phase, &randPhase, sizeof(Complex<Real>));
190 
191  int dtr = dm_config_.render_depth[p];
192  Real temp_depth = pSrc->at(dtr - 1);
193 
194  Complex<Real> carrierPhaseDelay(0, k * -temp_depth);
195  carrierPhaseDelay.exp();
196  memcpy(&carrier_phase_delay, &carrierPhaseDelay, sizeof(Complex<Real>));
197 
198  HANDLE_ERROR(cudaMemsetAsync(u_o_gpu_, 0, sizeof(cufftDoubleComplex) * N, stream_));
199 
200  cudaDepthHoloKernel(stream_, pnX, pnY, u_o_gpu_, img_src_gpu, dimg_src_gpu, depth_index_gpu,
201  dtr, rand_phase, carrier_phase_delay,
202  dm_config_.change_depth_quantization, dm_config_.default_depth_quantization, m_mode);
203 
204  HANDLE_ERROR(cudaMemsetAsync(k_temp_d_, 0, sizeof(cufftDoubleComplex) * N, stream_));
205 
206  cudaFFT(stream_, pnX, pnY, u_o_gpu_, k_temp_d_, -1, false);
207 
208  cudaPropagation_AngularSpKernel(gridSize, blockSize, stream_,k_temp_d_, u_complex_gpu_, device_config, temp_depth);
209 
210  m_nProgress = (int)((Real)(ch * depth_sz + p + 1) * 100 / ((Real)depth_sz * nChannel));
211  }
212  cudaMemcpy(complex_H[ch], u_complex_gpu_, sizeof(cufftDoubleComplex)* N, cudaMemcpyDeviceToHost);
213 
214  //cudaFFT(stream_, pnX, pnY, u_complex_gpu_, k_temp_d_, 1, true);
215  //cudaMemcpy(complex_H[ch], k_temp_d_, sizeof(cufftDoubleComplex) * N, cudaMemcpyDeviceToHost);
216 
217  delete host_config;
218  }
219  HANDLE_ERROR(cudaFree(device_config));
220  LOG("%s : %.5lf (sec)\n", __FUNCTION__, ELAPSED_TIME(begin, CUR_TIME));
221 }
222 
224 {
225  if (u_o_gpu_) cudaFree(u_o_gpu_);
226  if (u_complex_gpu_) cudaFree(u_complex_gpu_);
227  if (k_temp_d_) cudaFree(k_temp_d_);
228 }
static CUDA * getInstance()
Definition: CUDA.h:17
#define HANDLE_ERROR(err)
Definition: CUDA.cpp:13
int getMaxThreads()
Definition: CUDA.h:34
unsigned char uchar
Definition: typedef.h:64
void cudaChangeDepthQuanKernel(CUstream_st *stream_, int pnx, int pny, Real *depth_index_gpu, unsigned char *dimg_src_gpu, int dtr, Real d1, Real d2, Real params_num_of_depth, Real params_far_depthmap, Real params_near_depthmap)
Quantize depth map on the GPU, only when the number of depth quantization is not the default value (i...
void free_gpu(void)
float Real
Definition: typedef.h:55
#define CUR_TIME
Definition: function.h:58
void cudaFFT(CUstream_st *stream, int nx, int ny, cufftDoubleComplex *in_filed, cufftDoubleComplex *output_field, int direction, bool bNormalized)
Convert data from the spatial domain to the frequency domain using 2D FFT on GPU. ...
void cudaPropagation_AngularSpKernel(const int &nBlocks, const int &nThreads, CUstream_st *stream_, cufftDoubleComplex *input_d, cufftDoubleComplex *u_complex, const DMKernelConfig *cuda_config, Real propagation_dist)
Angular spectrum propagation method for GPU implementation.
#define _Y
Definition: define.h:96
unsigned long long ulonglong
Definition: typedef.h:67
#define _X
Definition: define.h:92
struct DMKernelConfig DMKernelConfig
#define ELAPSED_TIME(x, y)
Definition: function.h:59
Openholo Point Cloud based CGH generation with CUDA GPGPU.
void cudaDepthHoloKernel(CUstream_st *stream, int nx, int ny, cufftDoubleComplex *u_o_gpu_, unsigned char *img_src_gpu, unsigned char *dimg_src_gpu, Real *depth_index_gpu, int dtr, cuDoubleComplex rand_phase_val, cuDoubleComplex carrier_phase_delay, int flag_change_depth_quan, unsigned int default_depth_quan, const unsigned int &mode)
Convert data from the spatial domain to the frequency domain using 2D FFT on GPU. ...
Definition: Bitmap.h:49
unsigned int uint
Definition: typedef.h:62
#define M_PI
Definition: define.h:52