Openholo  v4.2
Open Source Digital Holographic Library
ophWRP_GPU.cpp
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 //M*/
43 #include "ophWRP.h"
44 #include "ophWRP_GPU.h"
45 #include "sys.h"
46 #include "CUDA.h"
47 
49 {
50  LOG("%s\n", __FUNCTION__);
51  LOG("\tMemory Allocation : ");
52  auto begin = CUR_TIME;
53  auto step = CUR_TIME;
54 
55  CUDA *cuda = CUDA::getInstance();
56 
57  bool bSupportDouble = false;
58 
60 
61 
62  int blockSize = cuda->getMaxThreads(); //n_threads
63 
64  ulonglong gridSize = (n_points + blockSize - 1) / blockSize; //n_blocks
65 
66  //threads number
67 
68  //Host Memory Location
69  const int n_colors = obj_.n_colors;
70  //Real* host_pc_data = scaledVertex;//obj_.vertex;
71  Vertex* host_pc_data = scaledVertex;//obj_.vertex;
72  //Real* host_amp_data = obj_.color;
73  const uint pnX = context_.pixel_number[_X];
74  const uint pnY = context_.pixel_number[_Y];
75  const long long int pnXY = pnX * pnY;
76  const Real ppX = context_.pixel_pitch[_X];
77  const Real ppY = context_.pixel_pitch[_Y];
78  const Real distance = wrp_config_.propagation_distance;
79  const uint nChannel = context_.waveNum;
80 
81  //Device(GPU) Memory Location
82  //Real* device_pc_data;
83  //HANDLE_ERROR(cudaMalloc((void**)&device_pc_data, n_points * 3 * sizeof(Real)));
84  //Real* device_amp_data;
85  //HANDLE_ERROR(cudaMalloc((void**)&device_amp_data, n_points * n_colors * sizeof(Real)));
86  Vertex* device_pc_data = nullptr;
87  HANDLE_ERROR(cudaMalloc((void**)&device_pc_data, n_points * sizeof(Vertex)));
88  WRPGpuConst* device_config = nullptr;
89  HANDLE_ERROR(cudaMalloc((void**)&device_config, sizeof(WRPGpuConst)));
90 
91  //cuda obj dst
92  const ulonglong bufferSize = pnXY * sizeof(Real);
93 
94  ulonglong gridSize2 = (pnXY + blockSize - 1) / blockSize; //n_blocks
95  ulonglong gridSize3 = (pnXY * 4 + blockSize - 1) / blockSize;
96  cuDoubleComplex* device_dst = nullptr;
97  //Real* device_dst;
98  HANDLE_ERROR(cudaMalloc((void**)&device_dst, pnXY * sizeof(cuDoubleComplex)));
99 
100  cuDoubleComplex* src;
101  cufftDoubleComplex *fftsrc;
102  cufftDoubleComplex *fftdst;
103  HANDLE_ERROR(cudaMalloc((void**)&src, pnXY * 4 * sizeof(cuDoubleComplex)));
104  HANDLE_ERROR(cudaMalloc((void**)&fftsrc, pnXY * 4 * sizeof(cufftDoubleComplex)));
105  HANDLE_ERROR(cudaMalloc((void**)&fftdst, pnXY * 4 * sizeof(cufftDoubleComplex)));
106  //HANDLE_ERROR(cudaMemcpy(device_pc_data, host_pc_data, n_points * 3 * sizeof(Real), cudaMemcpyHostToDevice));
107  //HANDLE_ERROR(cudaMemcpy(device_amp_data, host_amp_data, n_points * n_colors * sizeof(Real), cudaMemcpyHostToDevice));
108  HANDLE_ERROR(cudaMemcpy(device_pc_data, host_pc_data, n_points * sizeof(Vertex), cudaMemcpyHostToDevice));
109  bool bRandomPhase = GetRandomPhase();
110 
111  LOG("%lf (s)\n", ELAPSED_TIME(step, CUR_TIME));
112 
113 
114  //Real wz = wrp_config_.wrp_location - zmax_;
115  for (uint ch = 0; ch < nChannel; ch++)
116  {
117  LOG("\tCUDA Gen WRP <<<%llu, %d>>> : ", gridSize, blockSize);
118  HANDLE_ERROR(cudaMemset(src, 0, pnXY * 4 * sizeof(cuDoubleComplex)));
119  HANDLE_ERROR(cudaMemset(fftsrc, 0, pnXY * 4 * sizeof(cufftDoubleComplex)));
120  HANDLE_ERROR(cudaMemset(fftdst, 0, pnXY * 4 * sizeof(cufftDoubleComplex)));
121 
122  step = CUR_TIME;
123 
124  Real lambda = context_.wave_length[ch];
125  Real k = context_.k = (2 * M_PI / lambda);
126  int nAdd = ch;
127 
128  WRPGpuConst* host_config = new WRPGpuConst(
129  obj_.n_points, n_colors, 1,
134  k, lambda, bRandomPhase, nAdd
135  );
136  HANDLE_ERROR(cudaMemcpy(device_config, host_config, sizeof(WRPGpuConst), cudaMemcpyHostToDevice));
137  HANDLE_ERROR(cudaMemset(device_dst, 0., pnXY * sizeof(cuDoubleComplex)));
138 
139  // cuda WRP
140  cudaGenWRP(gridSize, blockSize, n_points, device_pc_data, device_dst, (WRPGpuConst*)device_config);
141 
142  LOG("%lf (s)\n", ELAPSED_TIME(step, CUR_TIME));
143 
144  // 20200824_mwnam_
145  cudaError error = cudaGetLastError();
146  if (error != cudaSuccess) {
147  LOG("cudaGetLastError(): %s\n", cudaGetErrorName(error));
148  if (error == cudaErrorLaunchOutOfResources) {
149  ch--;
150  blockSize /= 2;
151  gridSize = (n_points + blockSize - 1) / blockSize;
152  gridSize2 = (pnXY + blockSize - 1) / blockSize;
153  gridSize3 = (pnXY * 4 + blockSize - 1) / blockSize;
154  cuda->setCurThreads(blockSize);
155  delete host_config;
156  continue;
157  }
158  }
159  LOG("\tCUDA FresnelPropagation <<<%llu, %d>>> : ", gridSize2, blockSize);
160  step = CUR_TIME;
161  cudaFresnelPropagationWRP(gridSize2, gridSize3, blockSize, pnX, pnY, device_dst, src, fftsrc, fftdst, (WRPGpuConst*)device_config);
162  HANDLE_ERROR(cudaMemcpy(complex_H[ch], device_dst, sizeof(cuDoubleComplex) * pnXY, cudaMemcpyDeviceToHost));
163 
164  LOG("%lf (s)\n", ELAPSED_TIME(step, CUR_TIME));
165  // 20200824_mwnam_
166  error = cudaGetLastError();
167  if (error != cudaSuccess) {
168  LOG("cudaGetLastError(): %s\n", cudaGetErrorName(error));
169  }
170  delete host_config;
171  }
172 
173  //free memory
174  HANDLE_ERROR(cudaFree(src));
175  HANDLE_ERROR(cudaFree(fftsrc));
176  HANDLE_ERROR(cudaFree(fftdst));
177  HANDLE_ERROR(cudaFree(device_dst));
178  HANDLE_ERROR(cudaFree(device_pc_data));
179  //HANDLE_ERROR(cudaFree(device_amp_data));
180  HANDLE_ERROR(cudaFree(device_config));
181  LOG("Total : %lf (s)\n", ELAPSED_TIME(begin, CUR_TIME));
182 }
void cudaGenWRP(const int &nBlocks, const int &nThreads, const int &n_pts_per_stream, Vertex *cuda_pc_data, cuDoubleComplex *cuda_dst, const WRPGpuConst *cuda_config)
static CUDA * getInstance()
Definition: CUDA.h:17
Vertex * scaledVertex
Definition: ophWRP.h:226
#define M_PI
Definition: define.h:52
Real k
Definition: Openholo.h:103
int getMaxThreads()
Definition: CUDA.h:34
ulonglong n_points
Number of points.
Definition: ophGen.h:584
void cudaFresnelPropagationWRP(const int &nBlocks, const int &nBlocks2, const int &nThreads, const int &nx, const int &ny, cuDoubleComplex *src, cuDoubleComplex *dst, cufftDoubleComplex *fftsrc, cufftDoubleComplex *fftdst, const WRPGpuConst *cuda_config)
struct KernelConst WRPGpuConst
OphPointCloudData obj_
Input Pointcloud Data.
Definition: ophWRP.h:225
void setCurThreads(int thread)
Definition: CUDA.h:32
#define HANDLE_ERROR(err)
float Real
Definition: typedef.h:55
def k(wvl)
Definition: Depthmap.py:16
bool GetRandomPhase()
Function for getting the random phase.
Definition: ophGen.h:537
vec2 pixel_pitch
Definition: Openholo.h:101
unsigned long long ulonglong
Definition: typedef.h:67
#define _X
Definition: define.h:92
Real wrp_location
Location distance of WRP.
Definition: ophGen.h:663
OphWRPConfig wrp_config_
structure variable for WRP hologram configuration
Definition: ophWRP.h:227
Definition: CUDA.h:7
Definition: struct.h:102
uint waveNum
Definition: Openholo.h:105
int n_colors
Number of color channel.
Definition: ophGen.h:586
ivec2 pixel_number
Definition: Openholo.h:99
#define _Y
Definition: define.h:96
int n_points
numbers of points
Definition: ophWRP.h:221
Complex< Real > ** complex_H
Definition: Openholo.h:489
void calculateWRPGPU(void)
Definition: ophWRP_GPU.cpp:48
Real propagation_distance
Distance of Hologram plane.
Definition: ophGen.h:665
#define ELAPSED_TIME(x, y)
Definition: function.h:59
OphConfig context_
Definition: Openholo.h:485
Real * wave_length
Definition: Openholo.h:106
unsigned int uint
Definition: typedef.h:62
#define CUR_TIME
Definition: function.h:58