53 void ophPointCloud::genCghPointCloudGPU(
uint diff_flag)
61 cl_mem device_pc_data;
62 cl_mem device_amp_data;
71 const int n_colors = pc_data_.
n_colors;
72 Real* host_pc_data =
nullptr;
73 Real* host_amp_data = pc_data_.color;
74 Real* host_dst =
nullptr;
77 if (is_ViewingWindow) {
78 host_pc_data =
new Real[n_points * 3];
79 transVW(n_points * 3, host_pc_data, pc_data_.vertex);
82 host_pc_data = pc_data_.vertex;
86 bool bIsGrayScale = n_colors == 1 ?
true :
false;
92 cl_kernel* current_kernel =
nullptr;
94 host_dst =
new Real[pnXY * 2];
95 memset(host_dst, 0., bufferSize * 2);
97 current_kernel = diff_flag ==
PC_DIFF_RS ? &kernel[0] : &kernel[1];
99 device_pc_data = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(
Real) * n_points * 3,
nullptr, &nErr);
100 device_amp_data = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(
Real) * n_points * n_colors,
nullptr, &nErr);
101 nErr = clEnqueueWriteBuffer(commands, device_pc_data, CL_TRUE, 0,
sizeof(
Real) * n_points * 3, host_pc_data, 0,
nullptr,
nullptr);
102 nErr = clEnqueueWriteBuffer(commands, device_amp_data, CL_TRUE, 0,
sizeof(
Real) * n_points * n_colors, host_amp_data, 0,
nullptr,
nullptr);
104 device_result = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
sizeof(
Real) * pnXY * 2,
nullptr, &nErr);
108 size_t local[2] = { 32, 32 };
110 clSetKernelArg(*current_kernel, 1,
sizeof(cl_mem), &device_pc_data);
111 clSetKernelArg(*current_kernel, 2,
sizeof(cl_mem), &device_amp_data);
112 clSetKernelArg(*current_kernel, 4,
sizeof(
uint), &n_points);
113 for (
uint ch = 0; ch < nChannel; ch++)
115 uint nAdd = bIsGrayScale ? 0 : ch;
119 GpuConst* host_config =
new GpuConst(
120 n_points, n_colors, 1,
132 host_config =
new GpuConstNERS(*host_config);
133 device_config = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(GpuConstNERS),
nullptr, &nErr);
135 nErr = clEnqueueWriteBuffer(commands, device_result, CL_TRUE, 0,
sizeof(
Real) * pnXY * 2, host_dst, 0,
nullptr,
nullptr);
136 nErr = clEnqueueWriteBuffer(commands, device_config, CL_TRUE, 0,
sizeof(GpuConstNERS), host_config, 0,
nullptr,
nullptr);
140 host_config =
new GpuConstNEFR(*host_config);
141 device_config = clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(GpuConstNEFR),
nullptr, &nErr);
143 nErr = clEnqueueWriteBuffer(commands, device_result, CL_TRUE, 0,
sizeof(
Real) * pnXY * 2, host_dst, 0,
nullptr,
nullptr);
144 nErr = clEnqueueWriteBuffer(commands, device_config, CL_TRUE, 0,
sizeof(GpuConstNEFR), host_config, 0,
nullptr,
nullptr);
147 clSetKernelArg(*current_kernel, 0,
sizeof(cl_mem), &device_result);
148 clSetKernelArg(*current_kernel, 3,
sizeof(cl_mem), &device_config);
149 clSetKernelArg(*current_kernel, 5,
sizeof(
uint), &ch);
151 nErr = clEnqueueNDRangeKernel(commands, *current_kernel, 2,
nullptr, global,
nullptr, 0,
nullptr,
nullptr);
155 nErr = clFinish(commands);
157 if (nErr != CL_SUCCESS) cl->
errorCheck(nErr,
"Check", __FILE__, __LINE__);
159 nErr = clEnqueueReadBuffer(commands, device_result, CL_TRUE, 0,
sizeof(
Real) * pnXY * 2,
complex_H[ch], 0,
nullptr,
nullptr);
163 m_nProgress = (ch + 1) * 100 / nChannel;
166 clReleaseMemObject(device_result);
167 clReleaseMemObject(device_amp_data);
168 clReleaseMemObject(device_pc_data);
169 if (host_dst)
delete[] host_dst;
170 if (is_ViewingWindow && host_pc_data)
delete[] host_pc_data;
178 void ophPointCloud::genCghPointCloudGPU(
uint diff_flag)
180 if ((diff_flag != PC_DIFF_RS) && (diff_flag != PC_DIFF_FRESNEL))
182 LOG(
"<FAILED> Wrong parameters.");
188 const ulonglong pnXY = context_.pixel_number[
_X] * context_.pixel_number[
_Y];
190 ulonglong gridSize = (pnXY + blockSize - 1) / blockSize;
192 cout <<
">>> All " << blockSize * gridSize <<
" threads in CUDA" << endl;
193 cout <<
">>> " << blockSize <<
" threads/block, " << gridSize <<
" blocks/grid" << endl;
197 Vertex* host_vertex_data =
nullptr;
198 if (!is_ViewingWindow)
199 host_vertex_data = pc_data_.vertices;
202 host_vertex_data =
new Vertex[pc_data_.n_points];
203 std::memcpy(host_vertex_data, pc_data_.vertices,
sizeof(
Vertex) * pc_data_.n_points);
204 transVW(pc_data_.n_points, host_vertex_data, host_vertex_data);
207 Vertex* device_vertex_data;
208 HANDLE_ERROR(cudaMalloc((
void**)&device_vertex_data, pc_data_.n_points *
sizeof(
Vertex)));
211 const ulonglong bufferSize = pnXY *
sizeof(cuDoubleComplex);
212 cuDoubleComplex* device_dst =
nullptr;
213 HANDLE_ERROR(cudaMalloc((
void**)&device_dst, bufferSize));
214 HANDLE_ERROR(cudaMemsetAsync(device_dst, 0., bufferSize));
216 uint nChannel = context_.waveNum;
218 cudaMemGetInfo(&free, &total);
225 context_.pixel_number,
227 context_.pixel_pitch,
230 context_.wave_length[0]
233 HANDLE_ERROR(cudaMemcpy(device_vertex_data, host_vertex_data, pc_data_.n_points *
sizeof(
Vertex), cudaMemcpyHostToDevice));
235 for (
uint ch = 0; ch < nChannel; ch++)
237 host_config->
k = context_.k = (2 *
M_PI) / context_.wave_length[ch];
238 host_config->
lambda = context_.wave_length[ch];
249 case PC_DIFF_FRESNEL: {
258 cudaError error = cudaGetLastError();
259 if (error != cudaSuccess) {
260 LOG(
"cudaGetLastError(): %s\n", cudaGetErrorName(error));
261 if (error == cudaErrorLaunchOutOfResources) {
268 HANDLE_ERROR(cudaMemcpy(complex_H[ch], device_dst, bufferSize, cudaMemcpyDeviceToHost));
270 m_nProgress = (ch + 1) * 100 / nChannel;
278 if (is_ViewingWindow) {
279 delete[] host_vertex_data;
static CUDA * getInstance()
void errorCheck(cl_int err, const char *operation, char *filename, int line)
double lambda
Wave Number = (2 * PI) / lambda;.
Real distance
Offset value of point cloud.
void printMemoryInfo(uint64_t total, uint64_t free)
cl_context & getContext()
void cudaPointCloud_Fresnel(const int &nBlocks, const int &nThreads, Vertex *cuda_vertex_data, cuDoubleComplex *cuda_dst, const CudaPointCloudConfigFresnel *cuda_config, const uint &iColor, const uint &mode)
#define HANDLE_ERROR(err)
vec3 scale
Scaling factor of coordinate of point cloud.
unsigned long long ulonglong
double k
(pixel_y * ny) / 2
_CudaPointCloudConfigRS CudaPointCloudConfigRS
static OpenCL * getInstance()
void cudaPointCloud_RS(const int &nBlocks, const int &nThreads, Vertex *cuda_vertex_data, cuDoubleComplex *cuda_dst, const CudaPointCloudConfigRS *cuda_config, const uint &iColor, const uint &mode)
int n_colors
Number of color channel.
cl_command_queue & getCommand()
Complex< Real > ** complex_H
#define ELAPSED_TIME(x, y)
_CudaPointCloudConfigFresnel CudaPointCloudConfigFresnel
void transVW(int nVertex, Vertex *dst, Vertex *src)
struct _CudaPointCloudConfig CudaPointCloudConfig