51 void ophTri::initialize_GPU()
53 const long long int pnXY = context_.pixel_number[
_X] * context_.pixel_number[
_Y];
54 const int N = meshData->n_faces;
57 delete[] scaledMeshData;
58 scaledMeshData =
nullptr;
63 scaledMeshData =
new Face[N];
64 memset(scaledMeshData, 0,
sizeof(
Face) * N);
71 memset(no, 0,
sizeof(
vec3) * N);
79 memset(na, 0,
sizeof(
vec3) * N);
87 memset(nv, 0,
sizeof(
vec3) * N * 3);
90 cudaStreamCreate(&streamTriMesh);
93 if (angularSpectrum_GPU) cudaFree(angularSpectrum_GPU);
94 HANDLE_ERROR(cudaMalloc((
void**)&angularSpectrum_GPU,
sizeof(cufftDoubleComplex) * pnXY));
96 if (ffttemp) cudaFree(ffttemp);
97 HANDLE_ERROR(cudaMalloc((
void**)&ffttemp,
sizeof(cufftDoubleComplex) * pnXY));
100 void ophTri::generateAS_GPU(
uint SHADING_FLAG)
102 const uint pnX = context_.pixel_number[
_X];
103 const uint pnY = context_.pixel_number[
_Y];
104 const long long int pnXY = context_.pixel_number[
_X] * context_.pixel_number[
_Y];
105 int N = meshData->n_faces;
106 uint nChannel = context_.waveNum;
108 cufftDoubleComplex* output =
new cufftDoubleComplex[pnXY];
110 findNormals(SHADING_FLAG);
115 cudaMalloc((
void**)&device_geom,
sizeof(
geometric));
118 int nBlocks = (pnX * pnY + nBlockThreads - 1) / nBlockThreads;
120 for (
uint ch = 0; ch < nChannel; ch++) {
122 HANDLE_ERROR(cudaMemsetAsync(angularSpectrum_GPU, 0,
sizeof(cufftDoubleComplex) * pnXY, streamTriMesh));
126 context_.pixel_number,
127 context_.pixel_pitch,
128 context_.wave_length[ch],
132 cudaMemcpy(device_config, host_config,
sizeof(
MeshKernelConfig), cudaMemcpyHostToDevice);
134 for (
int j = 0; j < N; j++)
136 if (!checkValidity(no[j]))
139 if (!findGeometricalRelations(scaledMeshData[j], no[j], geom))
142 cudaMemcpy(device_geom, (
void*)&geom,
sizeof(
geometric), cudaMemcpyHostToDevice);
144 Real shadingFactor = 0;
147 if (SHADING_FLAG == SHADING_FLAT)
151 if (illumination[
_X] == 0 && illumination[
_Y] == 0 && illumination[
_Z] == 0) {
155 vec3 normIllu = illumination /
norm(illumination);
156 shadingFactor = 2 * (n[
_X] * normIllu[
_X] + n[
_Y] * normIllu[
_Y] + n[
_Z] * normIllu[
_Z]) + 0.3;
157 if (shadingFactor < 0) shadingFactor = 0;
160 cudaMesh_Flat(nBlocks, nBlockThreads, angularSpectrum_GPU, device_config, shadingFactor,
161 device_geom, carrierWave[
_X], carrierWave[
_Y], carrierWave[
_Z], streamTriMesh);
163 else if (SHADING_FLAG == SHADING_CONTINUOUS)
165 av[0] = nv[3 * j + 0][0] * illumination[0] + nv[3 * j + 0][1] * illumination[1] + nv[3 * j + 0][2] * illumination[2] + 0.1;
166 av[2] = nv[3 * j + 1][0] * illumination[0] + nv[3 * j + 1][1] * illumination[1] + nv[3 * j + 1][2] * illumination[2] + 0.1;
167 av[1] = nv[3 * j + 2][0] * illumination[0] + nv[3 * j + 2][1] * illumination[1] + nv[3 * j + 2][2] * illumination[2] + 0.1;
170 device_geom, av[0], av[1], av[2], carrierWave[
_X], carrierWave[
_Y], carrierWave[
_Z], streamTriMesh);
172 m_nProgress = (int)((
Real)(ch * N + j + 1) * 50 / ((
Real)N * nChannel));
175 HANDLE_ERROR(cudaMemcpyAsync(output, angularSpectrum_GPU,
sizeof(cufftDoubleComplex) * pnXY, cudaMemcpyDeviceToHost, streamTriMesh));
177 for (
int i = 0; i < pnXY; ++i)
179 complex_H[ch][i][
_RE] = output[i].x;
180 complex_H[ch][i][
_IM] = output[i].y;
186 cudaFree(device_geom);
187 cudaFree(device_config);
188 cudaFree(angularSpectrum_GPU);
191 cudaStreamDestroy(streamTriMesh);
192 streamTriMesh =
nullptr;
195 if (output !=
nullptr)
200 if (scaledMeshData !=
nullptr)
202 delete[] scaledMeshData;
203 scaledMeshData =
nullptr;
#define HANDLE_ERROR(err)
static cudaWrapper * getInstance()
void cudaMesh_Flat(const int &nBlocks, const int &nThreads, cufftDoubleComplex *output, const MeshKernelConfig *config, double shading_factor, const geometric *geom, double carrierWaveX, double carrierWaveY, double carrierWaveZ, CUstream_st *stream)
int getMaxThreads(int idx)
struct MeshKernelConfig MeshKernelConfig
structure for 3-dimensional Real type vector and its arithmetic.
void cudaMesh_Continuous(const int &nBlocks, const int &nThreads, cufftDoubleComplex *output, const MeshKernelConfig *config, const geometric *geom, double av0, double av1, double av2, double carrierWaveX, double carrierWaveY, double carrierWaveZ, CUstream_st *stream)