Openholo  v4.2
Open Source Digital Holographic Library
ophTriMeshKernel.cu
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
43 //
44 //M*/
45 #pragma once
46 #ifndef ophTriMeshKernel_cu__
47 #define ophTriMeshKernel_cu__
48 #include "ophKernel.cuh"
49 #include "ophTriMesh_GPU.h"
50 #include <cuda_runtime_api.h>
51 #include <cuComplex.h>
52 #include <cuda.h>
53 #include <device_launch_parameters.h>
54 #include <cuda_runtime_api.h>
55 #include <cufft.h>
56 #include <vector>
57 
58 __device__ void exponent_complex_mesh(cuDoubleComplex* val)
59 {
60  double exp_val = exp(val->x);
61  double cos_v;
62  double sin_v;
63  sincos(val->y, &sin_v, &cos_v);
64 
65  val->x = exp_val * cos_v;
66  val->y = exp_val * sin_v;
67 }
68 
69 __device__ void exponent_complex_meshf(cuFloatComplex* val)
70 {
71  float exp_val = expf(val->x);
72  float cos_v;
73  float sin_v;
74  sincosf(val->y, &sin_v, &cos_v);
75 
76  val->x = exp_val * cos_v;
77  val->y = exp_val * sin_v;
78 }
79 
80 
81 void cudaFFT_Mesh(CUstream_st* stream, int nx, int ny, cufftDoubleComplex* in_field, cufftDoubleComplex* output_field, int direction)
82 {
83  unsigned int nblocks = (nx * ny + kBlockThreads - 1) / kBlockThreads;
84  int N = nx * ny;
85  fftShift << <nblocks, kBlockThreads, 0, stream >> > (N, nx, ny, in_field, output_field, false);
86 
87  cufftHandle plan;
88 
89  // fft
90  if (cufftPlan2d(&plan, ny, nx, CUFFT_Z2Z) != CUFFT_SUCCESS)
91  {
92  //LOG("FAIL in creating cufft plan");
93  return;
94  };
95 
96  cufftResult result;
97 
98  if (direction == -1)
99  result = cufftExecZ2Z(plan, output_field, in_field, CUFFT_FORWARD);
100  else
101  result = cufftExecZ2Z(plan, output_field, in_field, CUFFT_INVERSE);
102 
103  if (result != CUFFT_SUCCESS)
104  {
105  //LOG("------------------FAIL: execute cufft, code=%s", result);
106  return;
107  }
108 
109  if (cudaDeviceSynchronize() != cudaSuccess) {
110  //LOG("Cuda error: Failed to synchronize\n");
111  return;
112  }
113 
114  fftShift << < nblocks, kBlockThreads, 0, stream >> > (N, nx, ny, in_field, output_field, false);
115 
116  cufftDestroy(plan);
117 }
118 
119 void cudaFFT_Meshf(CUstream_st* stream, int nx, int ny, cufftComplex* in_field, cufftComplex* output_field, int direction)
120 {
121  unsigned int nblocks = (nx * ny + kBlockThreads - 1) / kBlockThreads;
122  int N = nx * ny;
123  fftShiftf << <nblocks, kBlockThreads, 0, stream >> > (N, nx, ny, in_field, output_field, false);
124 
125  cufftHandle plan;
126 
127  // fft
128  if (cufftPlan2d(&plan, ny, nx, CUFFT_C2C) != CUFFT_SUCCESS)
129  {
130  //LOG("FAIL in creating cufft plan");
131  return;
132  };
133 
134  cufftResult result;
135 
136  if (direction == -1)
137  result = cufftExecC2C(plan, output_field, in_field, CUFFT_FORWARD);
138  else
139  result = cufftExecC2C(plan, output_field, in_field, CUFFT_INVERSE);
140 
141  if (result != CUFFT_SUCCESS)
142  {
143  //LOG("------------------FAIL: execute cufft, code=%s", result);
144  return;
145  }
146 
147  if (cudaDeviceSynchronize() != cudaSuccess) {
148  //LOG("Cuda error: Failed to synchronize\n");
149  return;
150  }
151 
152  fftShiftf << < nblocks, kBlockThreads, 0, stream >> > (N, nx, ny, in_field, output_field, false);
153 
154  cufftDestroy(plan);
155 }
156 
157 __global__
158 void cudaKernel_double_RefAS_flat(cufftDoubleComplex* output, const MeshKernelConfig* config,
159  double shadingFactor, const geometric* geom, double carrierWaveX, double carrierWaveY, double carrierWaveZ)
160 {
161  int tid = threadIdx.x + blockIdx.x * blockDim.x;
162 
163  if (tid < config->pn_X * config->pn_Y) {
164 
165  int col = tid % config->pn_X;
166  int row = tid / config->pn_X;
167 
168  double flx, fly, flz, fx, fy, fz, flxShifted, flyShifted, freqTermX, freqTermY;
169 
170  double det = geom->loRot[0] * geom->loRot[3] - geom->loRot[1] * geom->loRot[2];
171  if (det == 0)
172  return;
173 
174  double a = 1 / det;
175  double invLoRot[4];
176  invLoRot[0] = a * geom->loRot[3];
177  invLoRot[1] = -a * geom->loRot[2];
178  invLoRot[2] = -a * geom->loRot[1];
179  invLoRot[3] = a * geom->loRot[0];
180 
181  cuDoubleComplex refTerm1 = make_cuDoubleComplex(0, 0);
182  cuDoubleComplex refTerm2 = make_cuDoubleComplex(0, 0);
183  cuDoubleComplex refTerm3 = make_cuDoubleComplex(0, 0);
184  cuDoubleComplex refAS = make_cuDoubleComplex(0, 0);
185  cuDoubleComplex term1 = make_cuDoubleComplex(0, 0);
186  cuDoubleComplex term2 = make_cuDoubleComplex(0, 0);
187 
188  term1.y = -config->pi2 / config->lambda * (
189  carrierWaveX * (geom->glRot[0] * geom->glShift[0] + geom->glRot[3] * geom->glShift[1] + geom->glRot[6] * geom->glShift[2])
190  + carrierWaveY * (geom->glRot[1] * geom->glShift[0] + geom->glRot[4] * geom->glShift[1] + geom->glRot[7] * geom->glShift[2])
191  + carrierWaveZ * (geom->glRot[2] * geom->glShift[0] + geom->glRot[5] * geom->glShift[1] + geom->glRot[8] * geom->glShift[2]));
192 
193 
194  // calculate frequency term =======================================================================
195  int idxFx = -config->pn_X / 2 + col;
196  int idxFy = config->pn_X / 2 - row;
197  double w = 1.0 / config->lambda;
198 
199  fx = (double)idxFx * config->dfx;
200  fy = (double)idxFy * config->dfy;
201  fz = sqrt(w * w - fx * fx - fy * fy);
202 
203  flx = geom->glRot[0] * fx + geom->glRot[1] * fy + geom->glRot[2] * fz;
204  fly = geom->glRot[3] * fx + geom->glRot[4] * fy + geom->glRot[5] * fz;
205  flz = sqrt(w * w - flx * flx - fly * fly);
206 
207 
208  flxShifted = flx - w * (geom->glRot[0] * carrierWaveX + geom->glRot[1] * carrierWaveY + geom->glRot[2] * carrierWaveZ);
209  flyShifted = fly - w * (geom->glRot[3] * carrierWaveX + geom->glRot[4] * carrierWaveY + geom->glRot[5] * carrierWaveZ);
210  freqTermX = invLoRot[0] * flxShifted + invLoRot[1] * flyShifted;
211  freqTermY = invLoRot[2] * flxShifted + invLoRot[3] * flyShifted;
212 
213  double sqFreqTermX = freqTermX * freqTermX;
214  double cuFreqTermX = sqFreqTermX * freqTermX;
215  double sqFreqTermY = freqTermY * freqTermY;
216  double cuFreqTermY = sqFreqTermY * freqTermY;
217 
218  //if (freqTermX == -freqTermY && freqTermY != 0) {
219  if (abs(freqTermX - freqTermY) <= config->tolerence && abs(freqTermY) > config->tolerence) {
220  refTerm1.y = config->pi2 * freqTermY;
221  refTerm2.y = 1;
222 
223  //refAS = shadingFactor * (((Complex<Real>)1 - exp(refTerm1)) / (4 * pi*pi*freqTermY * freqTermY) + refTerm2 / (2 * pi*freqTermY));
224  exponent_complex_mesh(&refTerm1);
225  cuDoubleComplex value1 = make_cuDoubleComplex(1, 0);
226  cuDoubleComplex value2 = cuCsub(value1, refTerm1);
227  double value3 = config->square_pi2 * freqTermY * freqTermY;
228  cuDoubleComplex value4 = cuCdiv(value2, make_cuDoubleComplex(value3, 0));
229  cuDoubleComplex value5 = cuCdiv(refTerm2, make_cuDoubleComplex(config->pi2 * freqTermY, 0));
230  cuDoubleComplex value6 = cuCadd(value4, value5);
231  refAS = cuCmul(value6, make_cuDoubleComplex(shadingFactor, 0));
232 
233  //}else if (freqTermX == freqTermY && freqTermX == 0) {
234  }
235  else if (abs(freqTermX - freqTermY) <= config->tolerence && abs(freqTermX) <= config->tolerence) {
236 
237  //refAS = shadingFactor * 1 / 2;
238  refAS = make_cuDoubleComplex(shadingFactor * 0.5, 0);
239 
240  //} else if (freqTermX != 0 && freqTermY == 0) {
241  }
242  else if (abs(freqTermX) > config->tolerence && abs(freqTermY) <= config->tolerence) {
243 
244  refTerm1.y = -config->pi2 * freqTermX;
245  refTerm2.y = 1;
246 
247  //refAS = shadingFactor * ((exp(refTerm1) - (Complex<Real>)1) / (2 * M_PI*freqTermX * 2 * M_PI*freqTermX) + (refTerm2 * exp(refTerm1)) / (2 * M_PI*freqTermX));
248  exponent_complex_mesh(&refTerm1);
249  cuDoubleComplex value1 = make_cuDoubleComplex(1, 0);
250  cuDoubleComplex value2 = cuCsub(refTerm1, value1);
251  double value3 = config->square_pi2 * sqFreqTermX;
252  cuDoubleComplex value4 = cuCdiv(value2, make_cuDoubleComplex(value3, 0));
253 
254  cuDoubleComplex value5 = cuCmul(refTerm2, refTerm1);
255  cuDoubleComplex value6 = cuCdiv(value5, make_cuDoubleComplex(config->pi2 * freqTermX, 0));
256 
257  cuDoubleComplex value7 = cuCadd(value4, value6);
258  refAS = cuCmul(value7, make_cuDoubleComplex(shadingFactor, 0));
259 
260  //} else if (freqTermX == 0 && freqTermY != 0) {
261  }
262  else if (abs(freqTermX) <= config->tolerence && abs(freqTermY) > config->tolerence) {
263 
264  refTerm1.y = config->pi2 * freqTermY;
265  refTerm2.y = 1;
266 
267  //refAS = shadingFactor * (((Complex<Real>)1 - exp(refTerm1)) / (4 * M_PI*M_PI*freqTermY * freqTermY) - refTerm2 / (2 * M_PI*freqTermY));
268  exponent_complex_mesh(&refTerm1);
269  cuDoubleComplex value1 = make_cuDoubleComplex(1, 0);
270  cuDoubleComplex value2 = cuCsub(value1, refTerm1);
271  double value3 = config->square_pi2 * sqFreqTermY;
272  cuDoubleComplex value4 = cuCdiv(value2, make_cuDoubleComplex(value3, 0));
273  cuDoubleComplex value5 = cuCdiv(refTerm2, make_cuDoubleComplex(config->pi2 * freqTermY, 0));
274  cuDoubleComplex value6 = cuCsub(value4, value5);
275  refAS = cuCmul(value6, make_cuDoubleComplex(shadingFactor, 0));
276 
277  }
278  else {
279 
280  refTerm1.y = -config->pi2 * freqTermX;
281  refTerm2.y = -config->pi2 * (freqTermX + freqTermY);
282 
283  //refAS = shadingFactor * ((exp(refTerm1) - (Complex<Real>)1) / (4 * M_PI*M_PI*freqTermX * freqTermY) + ((Complex<Real>)1 - exp(refTerm2)) / (4 * M_PI*M_PI*freqTermY * (freqTermX + freqTermY)));
284  exponent_complex_mesh(&refTerm1);
285  cuDoubleComplex value1 = make_cuDoubleComplex(1, 0);
286  cuDoubleComplex value2 = cuCsub(refTerm1, value1);
287  double value3 = config->square_pi2 * freqTermX * freqTermY;
288  cuDoubleComplex value4 = cuCdiv(value2, make_cuDoubleComplex(value3, 0));
289 
290  exponent_complex_mesh(&refTerm2);
291  cuDoubleComplex value5 = cuCsub(make_cuDoubleComplex(1, 0), refTerm2);
292  double value6 = config->square_pi2 * freqTermY * (freqTermX + freqTermY);
293  cuDoubleComplex value7 = cuCdiv(value5, make_cuDoubleComplex(value6, 0));
294 
295  cuDoubleComplex value8 = cuCadd(value4, value7);
296  refAS = cuCmul(value8, make_cuDoubleComplex(shadingFactor, 0));
297  }
298 
299  cuDoubleComplex temp;
300  if (abs(fz) <= config->tolerence)
301  temp = make_cuDoubleComplex(0, 0);
302  else {
303  term2.y = config->pi2 * (flx * geom->glShift[0] + fly * geom->glShift[1] + flz * geom->glShift[2]);
304 
305  //temp = refAS / det * exp(term1)* flz / fz * exp(term2);
306 
307  exponent_complex_mesh(&term1);
308  exponent_complex_mesh(&term2);
309 
310  cuDoubleComplex tmp1 = cuCdiv(refAS, make_cuDoubleComplex(det, 0));
311  cuDoubleComplex tmp2 = cuCmul(tmp1, term1);
312  cuDoubleComplex tmp3 = cuCmul(tmp2, make_cuDoubleComplex(flz, 0));
313  cuDoubleComplex tmp4 = cuCdiv(tmp3, make_cuDoubleComplex(fz, 0));
314  temp = cuCmul(tmp4, term2);
315 
316  }
317 
318  double absval = sqrt((temp.x * temp.x) + (temp.y * temp.y));
319  if (absval > config->min_double)
320  {
321  }
322  else {
323  temp = make_cuDoubleComplex(0, 0);
324  }
325 
326  //cuDoubleComplex addtmp = output[col + row * config->pn_X];
327  //output[col+row*config->pn_X] = cuCadd(addtmp,temp);
328 
329  output[tid].x += temp.x;
330  output[tid].y += temp.y;
331  }
332 }
333 
334 __global__
335 void cudaKernel_double_RefAS_continuous(cufftDoubleComplex* output, const MeshKernelConfig* config,
336  const geometric* geom, double av0, double av1, double av2, double carrierWaveX, double carrierWaveY, double carrierWaveZ)
337 {
338  int tid = threadIdx.x + blockIdx.x * blockDim.x;
339 
340  if (tid < config->pn_X * config->pn_Y) {
341 
342  int col = tid % config->pn_X;
343  int row = tid / config->pn_X;
344 
345  double flx, fly, flz, fx, fy, fz, flxShifted, flyShifted, freqTermX, freqTermY;
346 
347  double det = geom->loRot[0] * geom->loRot[3] - geom->loRot[1] * geom->loRot[2];
348  if (det == 0)
349  return;
350 
351  double a = 1 / det;
352  double invLoRot[4];
353  invLoRot[0] = a * geom->loRot[3];
354  invLoRot[1] = -a * geom->loRot[2];
355  invLoRot[2] = -a * geom->loRot[1];
356  invLoRot[3] = a * geom->loRot[0];
357 
358  cuDoubleComplex refTerm1 = make_cuDoubleComplex(0, 0);
359  cuDoubleComplex refTerm2 = make_cuDoubleComplex(0, 0);
360  cuDoubleComplex refTerm3 = make_cuDoubleComplex(0, 0);
361  cuDoubleComplex refAS = make_cuDoubleComplex(0, 0);
362  cuDoubleComplex term1 = make_cuDoubleComplex(0, 0);
363  cuDoubleComplex term2 = make_cuDoubleComplex(0, 0);
364 
365  term1.y = -config->pi2 / config->lambda * (
366  carrierWaveX * (geom->glRot[0] * geom->glShift[0] + geom->glRot[3] * geom->glShift[1] + geom->glRot[6] * geom->glShift[2])
367  + carrierWaveY * (geom->glRot[1] * geom->glShift[0] + geom->glRot[4] * geom->glShift[1] + geom->glRot[7] * geom->glShift[2])
368  + carrierWaveZ * (geom->glRot[2] * geom->glShift[0] + geom->glRot[5] * geom->glShift[1] + geom->glRot[8] * geom->glShift[2]));
369 
370 
371  // calculate frequency term =======================================================================
372  int idxFx = -config->pn_X / 2 + col;
373  int idxFy = config->pn_X / 2 - row;
374  double w = 1.0 / config->lambda;
375 
376  fx = (double)idxFx * config->dfx;
377  fy = (double)idxFy * config->dfy;
378  fz = sqrt(w * w - fx * fx - fy * fy);
379 
380  flx = geom->glRot[0] * fx + geom->glRot[1] * fy + geom->glRot[2] * fz;
381  fly = geom->glRot[3] * fx + geom->glRot[4] * fy + geom->glRot[5] * fz;
382  flz = sqrt(w * w - flx * flx - fly * fly);
383 
384 
385  flxShifted = flx - w * (geom->glRot[0] * carrierWaveX + geom->glRot[1] * carrierWaveY + geom->glRot[2] * carrierWaveZ);
386  flyShifted = fly - w * (geom->glRot[3] * carrierWaveX + geom->glRot[4] * carrierWaveY + geom->glRot[5] * carrierWaveZ);
387  freqTermX = invLoRot[0] * flxShifted + invLoRot[1] * flyShifted;
388  freqTermY = invLoRot[2] * flxShifted + invLoRot[3] * flyShifted;
389 
390  double sqFreqTermX = freqTermX * freqTermX;
391  double cuFreqTermX = sqFreqTermX * freqTermX;
392  double sqFreqTermY = freqTermY * freqTermY;
393  double cuFreqTermY = sqFreqTermY * freqTermY;
394 
395  cuDoubleComplex D1 = make_cuDoubleComplex(0, 0);
396  cuDoubleComplex D2 = make_cuDoubleComplex(0, 0);
397  cuDoubleComplex D3 = make_cuDoubleComplex(0, 0);
398 
399  //if (freqTermX == 0.0 && freqTermY == 0.0) {
400  if (abs(freqTermX) <= config->tolerence && abs(freqTermY) <= config->tolerence) {
401 
402  D1.x = (double)1.0 / (double)3.0;
403  D2.x = (double)1.0 / (double)5.0;
404  D3.x = (double)1.0 / (double)2.0;
405 
406  //}else if (freqTermX == 0.0 && freqTermY != 0.0) {
407  }
408  else if (abs(freqTermX) <= config->tolerence && abs(freqTermY) > config->tolerence) {
409 
410  refTerm1.y = -config->pi2 * freqTermY;
411  refTerm2.y = 1;
412 
413  //D1 = (refTerm1 - (Real)1)*refTerm1.exp() / (8 * M_PI*M_PI*M_PI*freqTermY * freqTermY * freqTermY)
414  // - refTerm1 / (4 * M_PI*M_PI*M_PI*freqTermY * freqTermY * freqTermY);
415 
416  cuDoubleComplex refTerm1_exp = make_cuDoubleComplex(refTerm1.x, refTerm1.y);
417  exponent_complex_mesh(&refTerm1_exp);
418  cuDoubleComplex value1 = make_cuDoubleComplex(1, 0);
419  cuDoubleComplex value2 = cuCsub(refTerm1, value1);
420  cuDoubleComplex value3 = cuCmul(value2, refTerm1_exp);
421  cuDoubleComplex value4 = cuCdiv(value3, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermY, 0));
422  cuDoubleComplex value5 = cuCdiv(refTerm1, make_cuDoubleComplex(config->square_pi2 * config->pi * cuFreqTermY, 0));
423 
424  D1 = cuCsub(value4, value5);
425 
426  //D2 = -(M_PI*freqTermY + refTerm2) / (4 * M_PI*M_PI*M_PI*freqTermY * freqTermY * freqTermY)*exp(refTerm1)
427  // + refTerm1 / (8 * M_PI*M_PI*M_PI*freqTermY * freqTermY * freqTermY);
428  cuDoubleComplex value6 = cuCadd(make_cuDoubleComplex(config->pi * freqTermY, 0), refTerm2);
429  cuDoubleComplex value7 = cuCmul(make_cuDoubleComplex(-1, 0), value6);
430  cuDoubleComplex value8 = cuCdiv(value7, make_cuDoubleComplex(config->square_pi2 * config->pi * cuFreqTermY, 0));
431  cuDoubleComplex value9 = cuCmul(value8, refTerm1_exp);
432  cuDoubleComplex value10 = cuCdiv(refTerm1, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermY, 0));
433  D2 = cuCadd(value9, value10);
434 
435  //D3 = exp(refTerm1) / (2 * M_PI*freqTermY) + ((Real)1 - refTerm2) / (2 * M_PI*freqTermY);
436  cuDoubleComplex value11 = cuCdiv(refTerm1_exp, make_cuDoubleComplex(config->pi2 * freqTermY, 0));
437  cuDoubleComplex value12 = cuCsub(make_cuDoubleComplex(1, 0), refTerm2);
438  cuDoubleComplex value13 = cuCdiv(value12, make_cuDoubleComplex(config->pi2 * freqTermY, 0));
439 
440  D3 = cuCadd(value11, value13);
441 
442  //} else if (freqTermX != 0.0 && freqTermY == 0.0) {
443  }
444  else if (abs(freqTermX) > config->tolerence && abs(freqTermY) <= config->tolerence) {
445 
446  refTerm1.y = config->square_pi2 * freqTermX * freqTermX;
447  refTerm2.y = 1;
448  refTerm3.y = config->pi2 * freqTermX;
449 
450  //D1 = (refTerm1 + 4 * M_PI*freqTermX - (Real)2 * refTerm2) / (8 * M_PI*M_PI*M_PI*freqTermY * freqTermY * freqTermY)*exp(-refTerm3)
451  // + refTerm2 / (4 * M_PI*M_PI*M_PI*freqTermX * freqTermX * freqTermX);
452 
453  cuDoubleComplex refTerm3_exp = make_cuDoubleComplex(refTerm3.x, refTerm3.y);
454  exponent_complex_mesh(&refTerm3_exp);
455 
456  cuDoubleComplex value1 = cuCadd(refTerm1, make_cuDoubleComplex(4 * config->pi * freqTermX, 0));
457  cuDoubleComplex value2 = cuCmul(make_cuDoubleComplex(2, 0), refTerm2);
458  cuDoubleComplex value3 = cuCsub(value1, value2);
459  cuDoubleComplex value4 = cuCdiv(value3, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermY, 0));
460  cuDoubleComplex value5 = cuCmul(value4, refTerm3_exp);
461  cuDoubleComplex value6 = cuCdiv(refTerm2, make_cuDoubleComplex(config->square_pi2 * config->pi * cuFreqTermX, 0));
462 
463  D1 = cuCadd(value5, value6);
464 
465  //D2 = (Real)1 / (Real)2 * D1;
466  D2 = cuCmul(make_cuDoubleComplex(1.0 / 2.0, 0), D1);
467 
468  //D3 = ((refTerm3 + (Real)1)*exp(-refTerm3) - (Real)1) / (4 * M_PI*M_PI*freqTermX * freqTermX);
469  cuDoubleComplex value7 = cuCadd(refTerm3, make_cuDoubleComplex(1.0, 0));
470  cuDoubleComplex value8 = cuCmul(refTerm3, make_cuDoubleComplex(-1.0, 0));
471  exponent_complex_mesh(&value8);
472  cuDoubleComplex value9 = cuCmul(value7, value8);
473  cuDoubleComplex value10 = cuCsub(value9, make_cuDoubleComplex(1.0, 0));
474  D3 = cuCdiv(value10, make_cuDoubleComplex(config->square_pi2 * sqFreqTermX, 0));
475 
476  //} else if (freqTermX == -freqTermY) {
477  }
478  else if (abs(freqTermX + freqTermY) <= config->tolerence) {
479 
480  refTerm1.y = 1;
481  refTerm2.y = config->pi2 * freqTermX;
482  refTerm3.y = config->pi2 * config->pi * freqTermX * freqTermX;
483 
484  //D1 = (-2 * M_PI*freqTermX + refTerm1) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermX * freqTermX)*exp(-refTerm2)
485  // - (refTerm3 + refTerm1) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermX * freqTermX);
486 
487  cuDoubleComplex value1 = cuCadd(make_cuDoubleComplex(-config->pi2 * freqTermX, 0), refTerm1);
488  cuDoubleComplex value2 = cuCdiv(value1, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermX, 0));
489  cuDoubleComplex value3 = cuCmul(refTerm2, make_cuDoubleComplex(-1.0, 0));
490  exponent_complex_mesh(&value3);
491  cuDoubleComplex value4 = cuCmul(value2, value3);
492 
493  cuDoubleComplex value5 = cuCadd(refTerm3, refTerm1);
494  cuDoubleComplex value6 = cuCdiv(value5, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermX, 0));
495 
496  D1 = cuCsub(value4, value6);
497 
498  //D2 = (-refTerm1) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermX * freqTermX)*exp(-refTerm2)
499  // + (-refTerm3 + refTerm1 + 2 * M_PI*freqTermX) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermX * freqTermX);
500 
501  cuDoubleComplex value7 = cuCmul(refTerm1, make_cuDoubleComplex(-1.0, 0));
502  cuDoubleComplex value8 = cuCdiv(value7, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermX, 0));
503  cuDoubleComplex value9 = cuCmul(value8, value3);
504 
505  cuDoubleComplex value10 = cuCmul(refTerm3, make_cuDoubleComplex(-1.0, 0));
506  cuDoubleComplex value11 = cuCadd(value10, refTerm1);
507  cuDoubleComplex value12 = cuCadd(value11, make_cuDoubleComplex(config->pi2 * freqTermX, 0));
508  cuDoubleComplex value13 = cuCdiv(value12, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermX, 0));
509 
510  D2 = cuCadd(value9, value13);
511 
512  //D3 = (-refTerm1) / (4 * M_PI*M_PI*freqTermX * freqTermX)*exp(-refTerm2)
513  // + (-refTerm2 + (Real)1) / (4 * M_PI*M_PI*freqTermX * freqTermX);
514 
515  cuDoubleComplex value14 = cuCdiv(value7, make_cuDoubleComplex(config->square_pi2 * sqFreqTermX, 0));
516  cuDoubleComplex value15 = cuCmul(value14, value3);
517 
518  cuDoubleComplex value16 = cuCmul(refTerm2, make_cuDoubleComplex(-1.0, 0));
519  cuDoubleComplex value17 = cuCadd(value16, make_cuDoubleComplex(1.0, 0));
520  cuDoubleComplex value18 = cuCdiv(value17, make_cuDoubleComplex(config->square_pi2 * sqFreqTermX, 0));
521 
522  D3 = cuCadd(value15, value18);
523 
524  }
525  else {
526 
527  refTerm1.y = -config->pi2 * (freqTermX + freqTermY);
528  refTerm2.y = 1.0;
529  refTerm3.y = -config->pi2 * freqTermX;
530 
531  //D1 = exp(refTerm1)*(refTerm2 - 2 * M_PI*(freqTermX + freqTermY)) / (8 * M_PI*M_PI*M_PI*freqTermY * (freqTermX + freqTermY)*(freqTermX + freqTermY))
532  // + exp(refTerm3)*(2 * M_PI*freqTermX - refTerm2) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermX * freqTermY)
533  // + ((2 * freqTermX + freqTermY)*refTerm2) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermX * (freqTermX + freqTermY)*(freqTermX + freqTermY));
534 
535  cuDoubleComplex refTerm1_exp = make_cuDoubleComplex(refTerm1.x, refTerm1.y);
536  exponent_complex_mesh(&refTerm1_exp);
537 
538  double val1 = config->pi2 * (freqTermX + freqTermY);
539  cuDoubleComplex value1 = cuCsub(refTerm2, make_cuDoubleComplex(val1, 0));
540  cuDoubleComplex value2 = cuCmul(refTerm1_exp, value1);
541 
542  double val2 = config->cube_pi2 * freqTermY * (freqTermX + freqTermY) * (freqTermX + freqTermY);
543  cuDoubleComplex value3 = cuCdiv(value2, make_cuDoubleComplex(val2, 0));
544 
545  cuDoubleComplex refTerm3_exp = make_cuDoubleComplex(refTerm3.x, refTerm3.y);
546  exponent_complex_mesh(&refTerm3_exp);
547 
548  double val3 = config->pi2 * freqTermX;
549  cuDoubleComplex value4 = cuCsub(make_cuDoubleComplex(val3, 0), refTerm2);
550  cuDoubleComplex value5 = cuCmul(refTerm3_exp, value4);
551  double val4 = config->cube_pi2 * sqFreqTermX * freqTermY;
552  cuDoubleComplex value6 = cuCdiv(value5, make_cuDoubleComplex(val4, 0));
553 
554  double val5 = 2.0 * freqTermX + freqTermY;
555  cuDoubleComplex value7 = cuCmul(make_cuDoubleComplex(val5, 0), refTerm2);
556  double val6 = config->cube_pi2 * sqFreqTermX * (freqTermX + freqTermY) * (freqTermX + freqTermY);
557  cuDoubleComplex value8 = cuCdiv(value7, make_cuDoubleComplex(val6, 0));
558 
559  cuDoubleComplex value9 = cuCadd(value3, value6);
560  D1 = cuCadd(value9, value8);
561 
562  //D2 = exp(refTerm1)*(refTerm2*(freqTermX + 2 * freqTermY) - 2 * M_PI*freqTermY * (freqTermX + freqTermY)) / (8 * M_PI*M_PI*M_PI*freqTermY * freqTermY * (freqTermX + freqTermY)*(freqTermX + freqTermY))
563  // + exp(refTerm3)*(-refTerm2) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermY * freqTermY)
564  // + refTerm2 / (8 * M_PI*M_PI*M_PI*freqTermX * (freqTermX + freqTermY)* (freqTermX + freqTermY));
565 
566  double val7 = freqTermX + 2.0 * freqTermY;
567  cuDoubleComplex value10 = cuCmul(refTerm2, make_cuDoubleComplex(val7, 0));
568  double val8 = config->pi2 * freqTermY * (freqTermX + freqTermY);
569  cuDoubleComplex value11 = cuCsub(value10, make_cuDoubleComplex(val8, 0));
570  cuDoubleComplex value12 = cuCmul(refTerm1_exp, value11);
571  double val9 = config->cube_pi2 * sqFreqTermY * (freqTermX + freqTermY) * (freqTermX + freqTermY);
572  cuDoubleComplex value13 = cuCdiv(value12, make_cuDoubleComplex(val9, 0));
573 
574  cuDoubleComplex value14 = cuCmul(refTerm2, make_cuDoubleComplex(-1.0, 0));
575  cuDoubleComplex value15 = cuCmul(refTerm3_exp, value14);
576  double val10 = config->cube_pi2 * freqTermX * sqFreqTermY;
577  cuDoubleComplex value16 = cuCdiv(value15, make_cuDoubleComplex(val10, 0));
578 
579  double val11 = config->cube_pi2 * freqTermX * (freqTermX + freqTermY) * (freqTermX + freqTermY);
580  cuDoubleComplex value17 = cuCdiv(refTerm2, make_cuDoubleComplex(val11, 0));
581 
582  cuDoubleComplex value18 = cuCadd(value13, value16);
583  D2 = cuCadd(value18, value17);
584 
585  //D3 = -exp(refTerm1) / (4 * M_PI*M_PI*freqTermY * (freqTermX + freqTermY))
586  // + exp(refTerm3) / (4 * M_PI*M_PI*freqTermX * freqTermY)
587  // - (Real)1 / (4 * M_PI*M_PI*freqTermX * (freqTermX + freqTermY));
588 
589  cuDoubleComplex value19 = cuCmul(refTerm1_exp, make_cuDoubleComplex(-1.0, 0));
590  double val12 = config->square_pi2 * freqTermY * (freqTermX + freqTermY);
591  cuDoubleComplex value20 = cuCdiv(value19, make_cuDoubleComplex(val12, 0));
592 
593  double val13 = config->square_pi2 * freqTermX * freqTermY;
594  cuDoubleComplex value21 = cuCdiv(refTerm3_exp, make_cuDoubleComplex(val13, 0));
595 
596  double val14 = 1.0 / (config->square_pi2 * freqTermX * (freqTermX + freqTermY));
597  cuDoubleComplex value22 = make_cuDoubleComplex(val14, 0);
598 
599  cuDoubleComplex value23 = cuCadd(value20, value21);
600  D3 = cuCsub(value23, value22);
601 
602  }
603 
604  //refAS = (av1 - av0)*D1 + (av2 - av1)*D2 + av0 * D3;
605 
606  double t1 = av1 - av0;
607  double t2 = av2 - av1;
608  cuDoubleComplex value_temp1 = cuCmul(make_cuDoubleComplex(t1, 0), D1);
609  cuDoubleComplex value_temp2 = cuCmul(make_cuDoubleComplex(t2, 0), D2);
610  cuDoubleComplex value_temp3 = cuCmul(make_cuDoubleComplex(av0, 0), D3);
611 
612  cuDoubleComplex valeF = cuCadd(value_temp1, value_temp2);
613  refAS = cuCadd(valeF, value_temp3);
614 
615  cuDoubleComplex temp;
616  if (abs(fz) <= config->tolerence)
617  temp = make_cuDoubleComplex(0, 0);
618  else {
619  term2.y = config->pi2 * (flx * geom->glShift[0] + fly * geom->glShift[1] + flz * geom->glShift[2]);
620 
621  //temp = refAS / det * exp(term1)* flz / fz * exp(term2);
622 
623  exponent_complex_mesh(&term1);
624  exponent_complex_mesh(&term2);
625 
626  cuDoubleComplex tmp1 = cuCdiv(refAS, make_cuDoubleComplex(det, 0));
627  cuDoubleComplex tmp2 = cuCmul(tmp1, term1);
628  cuDoubleComplex tmp3 = cuCmul(tmp2, make_cuDoubleComplex(flz, 0));
629  cuDoubleComplex tmp4 = cuCdiv(tmp3, make_cuDoubleComplex(fz, 0));
630  temp = cuCmul(tmp4, term2);
631 
632  }
633 
634  double absval = sqrt((temp.x * temp.x) + (temp.y * temp.y));
635  if (absval > config->min_double)
636  {
637  }
638  else {
639  temp = make_cuDoubleComplex(0, 0);
640  }
641 
642  //cuDoubleComplex addtmp = output[col + row * config->pn_X];
643  //output[col+row*config->pn_X] = cuCadd(addtmp,temp);
644 
645  output[tid].x += temp.x;
646  output[tid].y += temp.y;
647  }
648 }
649 
650 extern "C"
651 {
652  void cudaMesh_Flat(
653  const int& nBlocks, const int& nThreads, cufftDoubleComplex* output,
654  const MeshKernelConfig* config, double shading_factor, const geometric* geom,
655  double carrierWaveX, double carrierWaveY, double carrierWaveZ, CUstream_st* stream)
656  {
657  cudaKernel_double_RefAS_flat << <nBlocks, nThreads, 0, stream >> > (output, config, shading_factor,
658  geom, carrierWaveX, carrierWaveY, carrierWaveZ);
659  }
660 
661  void cudaMesh_Continuous(
662  const int& nBlocks, const int& nThreads, cufftDoubleComplex* output,
663  const MeshKernelConfig* config, const geometric* geom, double av0, double av1, double av2,
664  double carrierWaveX, double carrierWaveY, double carrierWaveZ, CUstream_st* stream)
665  {
666  cudaKernel_double_RefAS_continuous << <nBlocks, nThreads, 0, stream >> > (output, config,
667  geom, av0, av1, av2, carrierWaveX, carrierWaveY, carrierWaveZ);
668  }
669 
670 
671  void call_fftGPU(int nx, int ny, cufftDoubleComplex* input, cufftDoubleComplex* output, CUstream_st* streamTriMesh)
672  {
673  cudaFFT_Mesh(streamTriMesh, nx, ny, input, output, 1);
674  }
675 
676  void call_fftGPUf(int nx, int ny, cuFloatComplex* input, cuFloatComplex* output, CUstream_st* streamTriMesh)
677  {
678  cudaFFT_Meshf(streamTriMesh, nx, ny, input, output, 1);
679  }
680 }
681 
682 #endif // !ophTriMeshKernel_cu__