Openholo  v4.1
Open Source Digital Holographic Library
ophPointCloud_GPU.cpp
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
43 //
44 //M*/
45 
46 #include "ophPointCloud.h"
47 #include "ophPointCloud_GPU.h"
48 #include "CUDA.h"
49 
50 #ifdef _USE_OPENCL
51 #include "OpenCL.h"
52 
53 void ophPointCloud::genCghPointCloudGPU(uint diff_flag)
54 {
55  int nErr;
56  auto begin = CUR_TIME;
58 
59  cl_context context = cl->getContext();
60  cl_command_queue commands = cl->getCommand();
61  cl_mem device_pc_data;
62  cl_mem device_amp_data;
63  cl_mem device_result;
64  cl_mem device_config;
65 
66  //threads number
68  const ulonglong bufferSize = pnXY * sizeof(Real);
69 
70  //Host Memory Location
71  const int n_colors = pc_data_.n_colors;
72  Real* host_pc_data = nullptr;
73  Real* host_amp_data = pc_data_.color;
74  Real* host_dst = nullptr;
75 
76  // Keep original buffer
77  if (is_ViewingWindow) {
78  host_pc_data = new Real[n_points * 3];
79  transVW(n_points * 3, host_pc_data, pc_data_.vertex);
80  }
81  else {
82  host_pc_data = pc_data_.vertex;
83  }
84 
85  uint nChannel = context_.waveNum;
86  bool bIsGrayScale = n_colors == 1 ? true : false;
87 
88  cl->LoadKernel();
89 
90  cl_kernel* kernel = cl->getKernel();
91 
92  cl_kernel* current_kernel = nullptr;
93  if ((diff_flag == PC_DIFF_RS) || (diff_flag == PC_DIFF_FRESNEL)) {
94  host_dst = new Real[pnXY * 2];
95  memset(host_dst, 0., bufferSize * 2);
96 
97  current_kernel = diff_flag == PC_DIFF_RS ? &kernel[0] : &kernel[1];
98 
99  device_pc_data = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(Real) * n_points * 3, nullptr, &nErr);
100  device_amp_data = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(Real) * n_points * n_colors, nullptr, &nErr);
101  nErr = clEnqueueWriteBuffer(commands, device_pc_data, CL_TRUE, 0, sizeof(Real) * n_points * 3, host_pc_data, 0, nullptr, nullptr);
102  nErr = clEnqueueWriteBuffer(commands, device_amp_data, CL_TRUE, 0, sizeof(Real) * n_points * n_colors, host_amp_data, 0, nullptr, nullptr);
103 
104  device_result = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(Real) * pnXY * 2, nullptr, &nErr);
105 
106 
107  size_t global[2] = { context_.pixel_number[_X], context_.pixel_number[_Y] };
108  size_t local[2] = { 32, 32 };
109 
110  clSetKernelArg(*current_kernel, 1, sizeof(cl_mem), &device_pc_data);
111  clSetKernelArg(*current_kernel, 2, sizeof(cl_mem), &device_amp_data);
112  clSetKernelArg(*current_kernel, 4, sizeof(uint), &n_points);
113  for (uint ch = 0; ch < nChannel; ch++)
114  {
115  uint nAdd = bIsGrayScale ? 0 : ch;
116  context_.k = (2 * M_PI) / context_.wave_length[ch];
117  Real ratio = 1.0; //context_.wave_length[nChannel - 1] / context_.wave_length[ch];
118 
119  GpuConst* host_config = new GpuConst(
120  n_points, n_colors, 1,
121  pc_config_.scale, pc_config_.distance,
124  context_.ss,
125  context_.k,
126  context_.wave_length[ch],
127  ratio
128  );
129 
130  if (diff_flag == PC_DIFF_RS)
131  {
132  host_config = new GpuConstNERS(*host_config);
133  device_config = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(GpuConstNERS), nullptr, &nErr);
134 
135  nErr = clEnqueueWriteBuffer(commands, device_result, CL_TRUE, 0, sizeof(Real) * pnXY * 2, host_dst, 0, nullptr, nullptr);
136  nErr = clEnqueueWriteBuffer(commands, device_config, CL_TRUE, 0, sizeof(GpuConstNERS), host_config, 0, nullptr, nullptr);
137  }
138  else if (diff_flag == PC_DIFF_FRESNEL)
139  {
140  host_config = new GpuConstNEFR(*host_config);
141  device_config = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(GpuConstNEFR), nullptr, &nErr);
142 
143  nErr = clEnqueueWriteBuffer(commands, device_result, CL_TRUE, 0, sizeof(Real) * pnXY * 2, host_dst, 0, nullptr, nullptr);
144  nErr = clEnqueueWriteBuffer(commands, device_config, CL_TRUE, 0, sizeof(GpuConstNEFR), host_config, 0, nullptr, nullptr);
145  }
146 
147  clSetKernelArg(*current_kernel, 0, sizeof(cl_mem), &device_result);
148  clSetKernelArg(*current_kernel, 3, sizeof(cl_mem), &device_config);
149  clSetKernelArg(*current_kernel, 5, sizeof(uint), &ch);
150 
151  nErr = clEnqueueNDRangeKernel(commands, *current_kernel, 2, nullptr, global, nullptr/*local*/, 0, nullptr, nullptr);
152 
153 
154  //nErr = clFlush(commands);
155  nErr = clFinish(commands);
156 
157  if (nErr != CL_SUCCESS) cl->errorCheck(nErr, "Check", __FILE__, __LINE__);
158 
159  nErr = clEnqueueReadBuffer(commands, device_result, CL_TRUE, 0, sizeof(Real) * pnXY * 2, complex_H[ch], 0, nullptr, nullptr);
160 
161  delete host_config;
162 
163  m_nProgress = (ch + 1) * 100 / nChannel;
164  }
165 
166  clReleaseMemObject(device_result);
167  clReleaseMemObject(device_amp_data);
168  clReleaseMemObject(device_pc_data);
169  if (host_dst) delete[] host_dst;
170  if (is_ViewingWindow && host_pc_data) delete[] host_pc_data;
171  }
172 
173  LOG("%s : %.5lf (sec)\n", __FUNCTION__, ELAPSED_TIME(begin, CUR_TIME));
174 }
175 #endif
176 
177 using namespace oph;
178 void ophPointCloud::genCghPointCloudGPU(uint diff_flag)
179 {
180  if ((diff_flag != PC_DIFF_RS) && (diff_flag != PC_DIFF_FRESNEL))
181  {
182  LOG("<FAILED> Wrong parameters.");
183  return;
184  }
185  CUDA* pCuda = CUDA::getInstance();
186 
187  auto begin = CUR_TIME;
188  const ulonglong pnXY = context_.pixel_number[_X] * context_.pixel_number[_Y];
189  int blockSize = pCuda->getMaxThreads(); //n_threads // blockSize < devProp.maxThreadsPerBlock
190  ulonglong gridSize = (pnXY + blockSize - 1) / blockSize; //n_blocks
191 
192  cout << ">>> All " << blockSize * gridSize << " threads in CUDA" << endl;
193  cout << ">>> " << blockSize << " threads/block, " << gridSize << " blocks/grid" << endl;
194 
195 
196  //Host Memory Location
197  Vertex* host_vertex_data = nullptr;
198  if (!is_ViewingWindow)
199  host_vertex_data = pc_data_.vertices;
200  else
201  {
202  host_vertex_data = new Vertex[pc_data_.n_points];
203  std::memcpy(host_vertex_data, pc_data_.vertices, sizeof(Vertex) * pc_data_.n_points);
204  transVW(pc_data_.n_points, host_vertex_data, host_vertex_data);
205  }
206 
207  Vertex* device_vertex_data;
208  HANDLE_ERROR(cudaMalloc((void**)&device_vertex_data, pc_data_.n_points * sizeof(Vertex)));
209 
210  //threads number
211  const ulonglong bufferSize = pnXY * sizeof(cuDoubleComplex);
212  cuDoubleComplex* device_dst = nullptr;
213  HANDLE_ERROR(cudaMalloc((void**)&device_dst, bufferSize));
214  HANDLE_ERROR(cudaMemsetAsync(device_dst, 0., bufferSize));
215 
216  uint nChannel = context_.waveNum;
217  size_t free, total;
218  cudaMemGetInfo(&free, &total);
219  pCuda->printMemoryInfo(total, free);
220 
221  CudaPointCloudConfig* host_config = new CudaPointCloudConfig(
222  pc_data_.n_points,
223  pc_config_.scale,
224  pc_config_.distance,
225  context_.pixel_number,
226  context_.offset,
227  context_.pixel_pitch,
228  context_.ss,
229  context_.k,
230  context_.wave_length[0]
231  );
232 
233  HANDLE_ERROR(cudaMemcpy(device_vertex_data, host_vertex_data, pc_data_.n_points * sizeof(Vertex), cudaMemcpyHostToDevice));
234 
235  for (uint ch = 0; ch < nChannel; ch++)
236  {
237  host_config->k = context_.k = (2 * M_PI) / context_.wave_length[ch];
238  host_config->lambda = context_.wave_length[ch];
239 
240  CudaPointCloudConfig* device_config = nullptr;
241  switch (diff_flag) {
242  case PC_DIFF_RS: {
243  host_config = new CudaPointCloudConfigRS(*host_config);
244  HANDLE_ERROR(cudaMalloc((void**)&device_config, sizeof(CudaPointCloudConfigRS)));
245  HANDLE_ERROR(cudaMemcpy(device_config, host_config, sizeof(CudaPointCloudConfigRS), cudaMemcpyHostToDevice));
246  cudaPointCloud_RS(gridSize, blockSize, device_vertex_data, device_dst, (CudaPointCloudConfigRS*)device_config, ch, m_mode);
247  break;
248  }
249  case PC_DIFF_FRESNEL: {
250  host_config = new CudaPointCloudConfigFresnel(*host_config);
251  HANDLE_ERROR(cudaMalloc((void**)&device_config, sizeof(CudaPointCloudConfigFresnel)));
252  HANDLE_ERROR(cudaMemcpy(device_config, host_config, sizeof(CudaPointCloudConfigFresnel), cudaMemcpyHostToDevice));
253  cudaPointCloud_Fresnel(gridSize, blockSize, device_vertex_data, device_dst, (CudaPointCloudConfigFresnel*)device_config, ch, m_mode);
254  break;
255  }
256 }
257 
258  cudaError error = cudaGetLastError();
259  if (error != cudaSuccess) {
260  LOG("cudaGetLastError(): %s\n", cudaGetErrorName(error));
261  if (error == cudaErrorLaunchOutOfResources) {
262  ch--;
263  blockSize /= 2;
264  gridSize *= 2;
265  continue;
266  }
267  }
268  HANDLE_ERROR(cudaMemcpy(complex_H[ch], device_dst, bufferSize, cudaMemcpyDeviceToHost));
269  HANDLE_ERROR(cudaMemset(device_dst, 0., bufferSize));
270  m_nProgress = (ch + 1) * 100 / nChannel;
271 
272  HANDLE_ERROR(cudaFree(device_config));
273  }
274  delete host_config;
275  HANDLE_ERROR(cudaFree(device_vertex_data));
276  HANDLE_ERROR(cudaFree(device_dst));
277 
278  if (is_ViewingWindow) {
279  delete[] host_vertex_data;
280  }
281 
282  LOG("%s : %.5lf (sec)\n", __FUNCTION__, ELAPSED_TIME(begin, CUR_TIME));
283 }
static CUDA * getInstance()
Definition: CUDA.h:17
void errorCheck(cl_int err, const char *operation, char *filename, int line)
Definition: OpenCL.cpp:150
bool LoadKernel()
Definition: OpenCL.cpp:162
Real k
Definition: Openholo.h:103
#define HANDLE_ERROR(err)
Definition: CUDA.cpp:16
double lambda
Wave Number = (2 * PI) / lambda;.
Real distance
Offset value of point cloud.
Definition: ophGen.h:559
void printMemoryInfo(uint64_t total, uint64_t free)
Definition: CUDA.cpp:78
int getMaxThreads()
Definition: CUDA.h:34
_CudaPointCloudConfigRS CudaPointCloudConfigRS
void cudaPointCloud_RS(const int &nBlocks, const int &nThreads, Vertex *cuda_vertex_data, cuDoubleComplex *cuda_dst, const CudaPointCloudConfigRS *cuda_config, const uint &iColor, const uint &mode)
cl_context & getContext()
Definition: OpenCL.h:45
vec2 ss
Definition: Openholo.h:104
#define _Y
Definition: define.h:96
vec2 pixel_pitch
Definition: Openholo.h:101
vec3 scale
Scaling factor of coordinate of point cloud.
Definition: ophGen.h:557
unsigned long long ulonglong
Definition: typedef.h:67
double k
(pixel_y * ny) / 2
return true
Definition: Openholo.cpp:434
void cudaPointCloud_Fresnel(const int &nBlocks, const int &nThreads, Vertex *cuda_vertex_data, cuDoubleComplex *cuda_dst, const CudaPointCloudConfigFresnel *cuda_config, const uint &iColor, const uint &mode)
struct _CudaPointCloudConfig CudaPointCloudConfig
#define CUR_TIME
Definition: function.h:58
static OpenCL * getInstance()
Definition: OpenCL.h:17
#define ELAPSED_TIME(x, y)
Definition: function.h:59
cl_kernel * getKernel()
Definition: OpenCL.h:48
Definition: CUDA.h:7
Definition: struct.h:102
uint waveNum
Definition: Openholo.h:105
int n_colors
Number of color channel.
Definition: ophGen.h:586
ivec2 pixel_number
Definition: Openholo.h:99
#define M_PI
Definition: define.h:52
_CudaPointCloudConfigFresnel CudaPointCloudConfigFresnel
cl_command_queue & getCommand()
Definition: OpenCL.h:46
Complex< Real > ** complex_H
Definition: Openholo.h:489
float Real
Definition: typedef.h:55
#define _X
Definition: define.h:92
OphConfig context_
Definition: Openholo.h:485
Definition: Bitmap.h:49
void transVW(int nVertex, Vertex *dst, Vertex *src)
Definition: ophGen.cpp:2072
Real * wave_length
Definition: Openholo.h:106
unsigned int uint
Definition: typedef.h:62
Definition: OpenCL.h:8