Openholo  v4.2
Open Source Digital Holographic Library
ophTriMesh_GPU.cpp
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
43 //
44 //M*/
45 
46 #include "ophTriMesh_GPU.h"
47 #include "CUDA.h"
48 
49 using namespace oph;
50 
51 void ophTri::initialize_GPU()
52 {
53  const long long int pnXY = context_.pixel_number[_X] * context_.pixel_number[_Y];
54  const int N = meshData->n_faces;
55 
56  if (scaledMeshData) {
57  delete[] scaledMeshData;
58  scaledMeshData = nullptr;
59  }
60 
61  //scaledMeshData = new Real[N * 9];
62  //memset(scaledMeshData, 0, sizeof(Real) * N * 9);
63  scaledMeshData = new Face[N];
64  memset(scaledMeshData, 0, sizeof(Face) * N);
65 
66  if (no) {
67  delete[] no;
68  no = nullptr;
69  }
70  no = new vec3[N];
71  memset(no, 0, sizeof(vec3) * N);
72 
73 
74  if (na) {
75  delete[] na;
76  na = nullptr;
77  }
78  na = new vec3[N];
79  memset(na, 0, sizeof(vec3) * N);
80 
81 
82  if (nv) {
83  delete[] nv;
84  nv = nullptr;
85  }
86  nv = new vec3[N * 3];
87  memset(nv, 0, sizeof(vec3) * N * 3);
88 
89  if (!streamTriMesh)
90  cudaStreamCreate(&streamTriMesh);
91 
92 
93  if (angularSpectrum_GPU) cudaFree(angularSpectrum_GPU);
94  HANDLE_ERROR(cudaMalloc((void**)&angularSpectrum_GPU, sizeof(cufftDoubleComplex) * pnXY));
95 
96  if (ffttemp) cudaFree(ffttemp);
97  HANDLE_ERROR(cudaMalloc((void**)&ffttemp, sizeof(cufftDoubleComplex) * pnXY));
98 }
99 
100 void ophTri::generateAS_GPU(uint SHADING_FLAG)
101 {
102  const uint pnX = context_.pixel_number[_X];
103  const uint pnY = context_.pixel_number[_Y];
104  const long long int pnXY = context_.pixel_number[_X] * context_.pixel_number[_Y];
105  int N = meshData->n_faces;
106  uint nChannel = context_.waveNum;
107 
108  cufftDoubleComplex* output = new cufftDoubleComplex[pnXY];
109 
110  findNormals(SHADING_FLAG);
111 
112  MeshKernelConfig* device_config = nullptr;
113  cudaMalloc((void**)&device_config, sizeof(MeshKernelConfig));
114  geometric* device_geom = nullptr;
115  cudaMalloc((void**)&device_geom, sizeof(geometric));
116 
117  int nBlockThreads = CUDA::getInstance()->getMaxThreads() >> 1;
118  int nBlocks = (pnX * pnY + nBlockThreads - 1) / nBlockThreads;
119 
120  for (uint ch = 0; ch < nChannel; ch++) {
121 
122  HANDLE_ERROR(cudaMemsetAsync(angularSpectrum_GPU, 0, sizeof(cufftDoubleComplex) * pnXY, streamTriMesh));
123  geometric geom;
124 
125  MeshKernelConfig* host_config = new MeshKernelConfig(
126  context_.pixel_number,
127  context_.pixel_pitch,
128  context_.wave_length[ch],
129  SHADING_FLAG
130  );
131 
132  cudaMemcpy(device_config, host_config, sizeof(MeshKernelConfig), cudaMemcpyHostToDevice);
133 
134  for (int j = 0; j < N; j++)
135  {
136  if (!checkValidity(no[j])) // Ignore Invalid
137  continue;
138 
139  if (!findGeometricalRelations(scaledMeshData[j], no[j], geom))
140  continue;
141 
142  cudaMemcpy(device_geom, (void*)&geom, sizeof(geometric), cudaMemcpyHostToDevice);
143 
144  Real shadingFactor = 0;
145  vec3 av(0, 0, 0);
146 
147  if (SHADING_FLAG == SHADING_FLAT)
148  {
149  vec3 no_ = no[j];
150  vec3 n = no_ / norm(no_);
151  if (illumination[_X] == 0 && illumination[_Y] == 0 && illumination[_Z] == 0) {
152  shadingFactor = 1;
153  }
154  else {
155  vec3 normIllu = illumination / norm(illumination);
156  shadingFactor = 2 * (n[_X] * normIllu[_X] + n[_Y] * normIllu[_Y] + n[_Z] * normIllu[_Z]) + 0.3;
157  if (shadingFactor < 0) shadingFactor = 0;
158  }
159 
160  cudaMesh_Flat(nBlocks, nBlockThreads, angularSpectrum_GPU, device_config, shadingFactor,
161  device_geom, carrierWave[_X], carrierWave[_Y], carrierWave[_Z], streamTriMesh);
162  }
163  else if (SHADING_FLAG == SHADING_CONTINUOUS)
164  {
165  av[0] = nv[3 * j + 0][0] * illumination[0] + nv[3 * j + 0][1] * illumination[1] + nv[3 * j + 0][2] * illumination[2] + 0.1;
166  av[2] = nv[3 * j + 1][0] * illumination[0] + nv[3 * j + 1][1] * illumination[1] + nv[3 * j + 1][2] * illumination[2] + 0.1;
167  av[1] = nv[3 * j + 2][0] * illumination[0] + nv[3 * j + 2][1] * illumination[1] + nv[3 * j + 2][2] * illumination[2] + 0.1;
168 
169  cudaMesh_Continuous(nBlocks, nBlockThreads, angularSpectrum_GPU, device_config,
170  device_geom, av[0], av[1], av[2], carrierWave[_X], carrierWave[_Y], carrierWave[_Z], streamTriMesh);
171  }
172  m_nProgress = (int)((Real)(ch * N + j + 1) * 50 / ((Real)N * nChannel));
173  }
174 
175  HANDLE_ERROR(cudaMemcpyAsync(output, angularSpectrum_GPU, sizeof(cufftDoubleComplex) * pnXY, cudaMemcpyDeviceToHost, streamTriMesh));
176 
177  for (int i = 0; i < pnXY; ++i)
178  {
179  complex_H[ch][i][_RE] = output[i].x;
180  complex_H[ch][i][_IM] = output[i].y;
181  }
182 
183  delete host_config;
184  }
185 
186  cudaFree(device_geom);
187  cudaFree(device_config);
188  cudaFree(angularSpectrum_GPU);
189  cudaFree(fftTemp);
190 
191  cudaStreamDestroy(streamTriMesh);
192  streamTriMesh = nullptr;
193 
194  m_nProgress = 100;
195  if (output != nullptr)
196  {
197  delete[] output;
198  output = nullptr;
199  }
200  if (scaledMeshData != nullptr)
201  {
202  delete[] scaledMeshData;
203  scaledMeshData = nullptr;
204  }
205  if (no != nullptr)
206  {
207  delete[] no;
208  no = nullptr;
209  }
210  if (na != nullptr)
211  {
212  delete[] na;
213  na = nullptr;
214  }
215  if (nv != nullptr)
216  {
217  delete[] nv;
218  nv = nullptr;
219  }
220 }
static CUDA * getInstance()
Definition: CUDA.h:17
void cudaMesh_Continuous(const int &nBlocks, const int &nThreads, cufftDoubleComplex *output, const MeshKernelConfig *config, const geometric *geom, double av0, double av1, double av2, double carrierWaveX, double carrierWaveY, double carrierWaveZ, CUstream_st *stream)
int getMaxThreads()
Definition: CUDA.h:34
Real norm(const vec2 &a)
Definition: vec.h:417
#define HANDLE_ERROR(err)
float Real
Definition: typedef.h:55
#define _IM
Definition: complex.h:58
struct MeshKernelConfig MeshKernelConfig
#define _X
Definition: define.h:92
geometrical relations
Definition: ophTriMesh.h:62
void cudaMesh_Flat(const int &nBlocks, const int &nThreads, cufftDoubleComplex *output, const MeshKernelConfig *config, double shading_factor, const geometric *geom, double carrierWaveX, double carrierWaveY, double carrierWaveZ, CUstream_st *stream)
#define _RE
Definition: complex.h:55
structure for 3-dimensional Real type vector and its arithmetic.
Definition: vec.h:466
Definition: struct.h:115
#define _Y
Definition: define.h:96
#define _Z
Definition: define.h:100
Definition: Bitmap.h:49
unsigned int uint
Definition: typedef.h:62