48 #include <cuComplex.h> 66 void cudaFFT(CUstream_st* stream,
int nx,
int ny, cufftDoubleComplex* in_filed, cufftDoubleComplex* output_field,
int direction,
bool bNormalized);
71 void ophDepthMap::initGPU()
73 const int pnX = context_.pixel_number[
_X];
74 const int pnY = context_.pixel_number[
_Y];
75 const int N = pnX * pnY;
80 cudaStreamCreate(&stream_);
82 if (img_src_gpu) cudaFree(img_src_gpu);
83 HANDLE_ERROR(cudaMalloc((
void**)&img_src_gpu,
sizeof(uchar1)*N));
85 if (dimg_src_gpu) cudaFree(dimg_src_gpu);
86 HANDLE_ERROR(cudaMalloc((
void**)&dimg_src_gpu,
sizeof(uchar1)*N));
88 if (depth_index_gpu) cudaFree(depth_index_gpu);
89 if (dm_config_.change_depth_quantization == 1)
92 if (u_o_gpu_) cudaFree(u_o_gpu_);
93 if (u_complex_gpu_) cudaFree(u_complex_gpu_);
95 HANDLE_ERROR(cudaMalloc((
void**)&u_o_gpu_,
sizeof(cufftDoubleComplex)*N));
96 HANDLE_ERROR(cudaMalloc((
void**)&u_complex_gpu_,
sizeof(cufftDoubleComplex)*N));
98 if (k_temp_d_) cudaFree(k_temp_d_);
99 HANDLE_ERROR(cudaMalloc((
void**)&k_temp_d_,
sizeof(cufftDoubleComplex)*N));
102 bool ophDepthMap::prepareInputdataGPU()
105 const int N = context_.pixel_number[
_X] * context_.pixel_number[
_Y];
108 if (depth_img ==
nullptr)
110 depth_img =
new uchar[N];
111 memset(depth_img, 0, N);
113 HANDLE_ERROR(cudaMemcpyAsync(dimg_src_gpu, depth_img,
sizeof(uchar1) * N, cudaMemcpyHostToDevice, stream_));
119 void ophDepthMap::changeDepthQuanGPU()
121 const int pnX = context_.pixel_number[
_X];
122 const int pnY = context_.pixel_number[
_Y];
123 const int N = pnX * pnY;
125 HANDLE_ERROR(cudaMemsetAsync(depth_index_gpu, 0,
sizeof(
Real) * N, stream_));
127 for (
uint dtr = 0; dtr < dm_config_.num_of_depth; dtr++)
129 Real temp_depth = dlevel[dtr];
130 Real d1 = temp_depth - dstep / 2.0;
131 Real d2 = temp_depth + dstep / 2.0;
134 dtr, d1, d2, dm_config_.num_of_depth, dm_config_.far_depthmap, dm_config_.near_depthmap);
138 void ophDepthMap::calcHoloGPU()
143 cudaStreamCreate(&stream_);
145 const int pnX = context_.pixel_number[
_X];
146 const int pnY = context_.pixel_number[
_Y];
147 const Real ppX = context_.pixel_pitch[
_X];
148 const Real ppY = context_.pixel_pitch[
_Y];
149 const Real ssX = context_.ss[
_X] = pnX * ppX;
150 const Real ssY = context_.ss[
_Y] = pnY * ppY;
151 const uint N = pnX * pnY;
152 const uint nChannel = context_.waveNum;
154 size_t depth_sz = dm_config_.render_depth.size();
156 const bool bRandomPhase = GetRandomPhase();
162 ulonglong gridSize = (N + blockSize - 1) / blockSize;
164 vector<double>* pSrc = is_ViewingWindow ? &dlevel_transform : &dlevel;
166 for (
uint ch = 0; ch < nChannel; ch++)
168 HANDLE_ERROR(cudaMemsetAsync(u_complex_gpu_, 0,
sizeof(cufftDoubleComplex) * N, stream_));
169 HANDLE_ERROR(cudaMemcpyAsync(img_src_gpu, m_vecRGB[ch],
sizeof(uchar1) * N, cudaMemcpyHostToDevice, stream_));
170 Real lambda = context_.wave_length[ch];
171 Real k = context_.k = (2 *
M_PI / lambda);
174 context_.pixel_number,
175 context_.pixel_pitch,
178 context_.wave_length[ch]
184 for (
size_t p = 0; p < depth_sz; ++p)
187 cuDoubleComplex rand_phase, carrier_phase_delay;
188 GetRandomPhaseValue(randPhase, bRandomPhase);
191 int dtr = dm_config_.render_depth[p];
192 Real temp_depth = pSrc->at(dtr - 1);
195 carrierPhaseDelay.exp();
196 memcpy(&carrier_phase_delay, &carrierPhaseDelay,
sizeof(
Complex<Real>));
198 HANDLE_ERROR(cudaMemsetAsync(u_o_gpu_, 0,
sizeof(cufftDoubleComplex) * N, stream_));
200 cudaDepthHoloKernel(stream_, pnX, pnY, u_o_gpu_, img_src_gpu, dimg_src_gpu, depth_index_gpu,
201 dtr, rand_phase, carrier_phase_delay,
202 dm_config_.change_depth_quantization, dm_config_.default_depth_quantization, m_mode);
204 HANDLE_ERROR(cudaMemsetAsync(k_temp_d_, 0,
sizeof(cufftDoubleComplex) * N, stream_));
206 cudaFFT(stream_, pnX, pnY, u_o_gpu_, k_temp_d_, -1,
false);
210 m_nProgress = (int)((
Real)(ch * depth_sz + p + 1) * 100 / ((
Real)depth_sz * nChannel));
212 cudaMemcpy(complex_H[ch], u_complex_gpu_,
sizeof(cufftDoubleComplex)* N, cudaMemcpyDeviceToHost);
225 if (u_o_gpu_) cudaFree(u_o_gpu_);
226 if (u_complex_gpu_) cudaFree(u_complex_gpu_);
227 if (k_temp_d_) cudaFree(k_temp_d_);
#define HANDLE_ERROR(err)
void cudaChangeDepthQuanKernel(CUstream_st *stream_, int pnx, int pny, Real *depth_index_gpu, unsigned char *dimg_src_gpu, int dtr, Real d1, Real d2, Real params_num_of_depth, Real params_far_depthmap, Real params_near_depthmap)
Quantize depth map on the GPU, only when the number of depth quantization is not the default value (i...
static cudaWrapper * getInstance()
void cudaFFT(CUstream_st *stream, int nx, int ny, cufftDoubleComplex *in_filed, cufftDoubleComplex *output_field, int direction, bool bNormalized)
Convert data from the spatial domain to the frequency domain using 2D FFT on GPU. ...
void cudaPropagation_AngularSpKernel(const int &nBlocks, const int &nThreads, CUstream_st *stream_, cufftDoubleComplex *input_d, cufftDoubleComplex *u_complex, const DMKernelConfig *cuda_config, Real propagation_dist)
Angular spectrum propagation method for GPU implementation.
unsigned long long ulonglong
int getMaxThreads(int idx)
struct DMKernelConfig DMKernelConfig
#define ELAPSED_TIME(x, y)
Openholo Point Cloud based CGH generation with CUDA GPGPU.
void cudaDepthHoloKernel(CUstream_st *stream, int nx, int ny, cufftDoubleComplex *u_o_gpu_, unsigned char *img_src_gpu, unsigned char *dimg_src_gpu, Real *depth_index_gpu, int dtr, cuDoubleComplex rand_phase_val, cuDoubleComplex carrier_phase_delay, int flag_change_depth_quan, unsigned int default_depth_quan, const unsigned int &mode)
Convert data from the spatial domain to the frequency domain using 2D FFT on GPU. ...