1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
10 // For Open Source Digital Holographic Library
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
46 #ifndef ophTriMeshKernel_cu__
47 #define ophTriMeshKernel_cu__
48 #include "ophKernel.cuh"
49 #include "ophTriMesh_GPU.h"
50 #include <cuda_runtime_api.h>
51 #include <cuComplex.h>
53 #include <device_launch_parameters.h>
54 #include <cuda_runtime_api.h>
58 __device__ void exponent_complex_mesh(cuDoubleComplex* val)
60 double exp_val = exp(val->x);
63 sincos(val->y, &sin_v, &cos_v);
65 val->x = exp_val * cos_v;
66 val->y = exp_val * sin_v;
69 __device__ void exponent_complex_meshf(cuFloatComplex* val)
71 float exp_val = expf(val->x);
74 sincosf(val->y, &sin_v, &cos_v);
76 val->x = exp_val * cos_v;
77 val->y = exp_val * sin_v;
81 void cudaFFT_Mesh(CUstream_st* stream, int nx, int ny, cufftDoubleComplex* in_field, cufftDoubleComplex* output_field, int direction)
83 unsigned int nblocks = (nx * ny + kBlockThreads - 1) / kBlockThreads;
85 fftShift << <nblocks, kBlockThreads, 0, stream >> > (N, nx, ny, in_field, output_field, false);
90 if (cufftPlan2d(&plan, ny, nx, CUFFT_Z2Z) != CUFFT_SUCCESS)
92 //LOG("FAIL in creating cufft plan");
99 result = cufftExecZ2Z(plan, output_field, in_field, CUFFT_FORWARD);
101 result = cufftExecZ2Z(plan, output_field, in_field, CUFFT_INVERSE);
103 if (result != CUFFT_SUCCESS)
105 //LOG("------------------FAIL: execute cufft, code=%s", result);
109 if (cudaDeviceSynchronize() != cudaSuccess) {
110 //LOG("Cuda error: Failed to synchronize\n");
114 fftShift << < nblocks, kBlockThreads, 0, stream >> > (N, nx, ny, in_field, output_field, false);
119 void cudaFFT_Meshf(CUstream_st* stream, int nx, int ny, cufftComplex* in_field, cufftComplex* output_field, int direction)
121 unsigned int nblocks = (nx * ny + kBlockThreads - 1) / kBlockThreads;
123 fftShiftf << <nblocks, kBlockThreads, 0, stream >> > (N, nx, ny, in_field, output_field, false);
128 if (cufftPlan2d(&plan, ny, nx, CUFFT_C2C) != CUFFT_SUCCESS)
130 //LOG("FAIL in creating cufft plan");
137 result = cufftExecC2C(plan, output_field, in_field, CUFFT_FORWARD);
139 result = cufftExecC2C(plan, output_field, in_field, CUFFT_INVERSE);
141 if (result != CUFFT_SUCCESS)
143 //LOG("------------------FAIL: execute cufft, code=%s", result);
147 if (cudaDeviceSynchronize() != cudaSuccess) {
148 //LOG("Cuda error: Failed to synchronize\n");
152 fftShiftf << < nblocks, kBlockThreads, 0, stream >> > (N, nx, ny, in_field, output_field, false);
158 void cudaKernel_double_RefAS_flat(cufftDoubleComplex* output, const MeshKernelConfig* config,
159 double shadingFactor, const geometric* geom, double carrierWaveX, double carrierWaveY, double carrierWaveZ)
161 int tid = threadIdx.x + blockIdx.x * blockDim.x;
163 if (tid < config->pn_X * config->pn_Y) {
165 int col = tid % config->pn_X;
166 int row = tid / config->pn_X;
168 double flx, fly, flz, fx, fy, fz, flxShifted, flyShifted, freqTermX, freqTermY;
170 double det = geom->loRot[0] * geom->loRot[3] - geom->loRot[1] * geom->loRot[2];
176 invLoRot[0] = a * geom->loRot[3];
177 invLoRot[1] = -a * geom->loRot[2];
178 invLoRot[2] = -a * geom->loRot[1];
179 invLoRot[3] = a * geom->loRot[0];
181 cuDoubleComplex refTerm1 = make_cuDoubleComplex(0, 0);
182 cuDoubleComplex refTerm2 = make_cuDoubleComplex(0, 0);
183 cuDoubleComplex refTerm3 = make_cuDoubleComplex(0, 0);
184 cuDoubleComplex refAS = make_cuDoubleComplex(0, 0);
185 cuDoubleComplex term1 = make_cuDoubleComplex(0, 0);
186 cuDoubleComplex term2 = make_cuDoubleComplex(0, 0);
188 term1.y = -config->pi2 / config->lambda * (
189 carrierWaveX * (geom->glRot[0] * geom->glShift[0] + geom->glRot[3] * geom->glShift[1] + geom->glRot[6] * geom->glShift[2])
190 + carrierWaveY * (geom->glRot[1] * geom->glShift[0] + geom->glRot[4] * geom->glShift[1] + geom->glRot[7] * geom->glShift[2])
191 + carrierWaveZ * (geom->glRot[2] * geom->glShift[0] + geom->glRot[5] * geom->glShift[1] + geom->glRot[8] * geom->glShift[2]));
194 // calculate frequency term =======================================================================
195 int idxFx = -config->pn_X / 2 + col;
196 int idxFy = config->pn_X / 2 - row;
197 double w = 1.0 / config->lambda;
199 fx = (double)idxFx * config->dfx;
200 fy = (double)idxFy * config->dfy;
201 fz = sqrt(w * w - fx * fx - fy * fy);
203 flx = geom->glRot[0] * fx + geom->glRot[1] * fy + geom->glRot[2] * fz;
204 fly = geom->glRot[3] * fx + geom->glRot[4] * fy + geom->glRot[5] * fz;
205 flz = sqrt(w * w - flx * flx - fly * fly);
208 flxShifted = flx - w * (geom->glRot[0] * carrierWaveX + geom->glRot[1] * carrierWaveY + geom->glRot[2] * carrierWaveZ);
209 flyShifted = fly - w * (geom->glRot[3] * carrierWaveX + geom->glRot[4] * carrierWaveY + geom->glRot[5] * carrierWaveZ);
210 freqTermX = invLoRot[0] * flxShifted + invLoRot[1] * flyShifted;
211 freqTermY = invLoRot[2] * flxShifted + invLoRot[3] * flyShifted;
213 double sqFreqTermX = freqTermX * freqTermX;
214 double cuFreqTermX = sqFreqTermX * freqTermX;
215 double sqFreqTermY = freqTermY * freqTermY;
216 double cuFreqTermY = sqFreqTermY * freqTermY;
218 //if (freqTermX == -freqTermY && freqTermY != 0) {
219 if (abs(freqTermX - freqTermY) <= config->tolerence && abs(freqTermY) > config->tolerence) {
220 refTerm1.y = config->pi2 * freqTermY;
223 //refAS = shadingFactor * (((Complex<Real>)1 - exp(refTerm1)) / (4 * pi*pi*freqTermY * freqTermY) + refTerm2 / (2 * pi*freqTermY));
224 exponent_complex_mesh(&refTerm1);
225 cuDoubleComplex value1 = make_cuDoubleComplex(1, 0);
226 cuDoubleComplex value2 = cuCsub(value1, refTerm1);
227 double value3 = config->square_pi2 * freqTermY * freqTermY;
228 cuDoubleComplex value4 = cuCdiv(value2, make_cuDoubleComplex(value3, 0));
229 cuDoubleComplex value5 = cuCdiv(refTerm2, make_cuDoubleComplex(config->pi2 * freqTermY, 0));
230 cuDoubleComplex value6 = cuCadd(value4, value5);
231 refAS = cuCmul(value6, make_cuDoubleComplex(shadingFactor, 0));
233 //}else if (freqTermX == freqTermY && freqTermX == 0) {
235 else if (abs(freqTermX - freqTermY) <= config->tolerence && abs(freqTermX) <= config->tolerence) {
237 //refAS = shadingFactor * 1 / 2;
238 refAS = make_cuDoubleComplex(shadingFactor * 0.5, 0);
240 //} else if (freqTermX != 0 && freqTermY == 0) {
242 else if (abs(freqTermX) > config->tolerence && abs(freqTermY) <= config->tolerence) {
244 refTerm1.y = -config->pi2 * freqTermX;
247 //refAS = shadingFactor * ((exp(refTerm1) - (Complex<Real>)1) / (2 * M_PI*freqTermX * 2 * M_PI*freqTermX) + (refTerm2 * exp(refTerm1)) / (2 * M_PI*freqTermX));
248 exponent_complex_mesh(&refTerm1);
249 cuDoubleComplex value1 = make_cuDoubleComplex(1, 0);
250 cuDoubleComplex value2 = cuCsub(refTerm1, value1);
251 double value3 = config->square_pi2 * sqFreqTermX;
252 cuDoubleComplex value4 = cuCdiv(value2, make_cuDoubleComplex(value3, 0));
254 cuDoubleComplex value5 = cuCmul(refTerm2, refTerm1);
255 cuDoubleComplex value6 = cuCdiv(value5, make_cuDoubleComplex(config->pi2 * freqTermX, 0));
257 cuDoubleComplex value7 = cuCadd(value4, value6);
258 refAS = cuCmul(value7, make_cuDoubleComplex(shadingFactor, 0));
260 //} else if (freqTermX == 0 && freqTermY != 0) {
262 else if (abs(freqTermX) <= config->tolerence && abs(freqTermY) > config->tolerence) {
264 refTerm1.y = config->pi2 * freqTermY;
267 //refAS = shadingFactor * (((Complex<Real>)1 - exp(refTerm1)) / (4 * M_PI*M_PI*freqTermY * freqTermY) - refTerm2 / (2 * M_PI*freqTermY));
268 exponent_complex_mesh(&refTerm1);
269 cuDoubleComplex value1 = make_cuDoubleComplex(1, 0);
270 cuDoubleComplex value2 = cuCsub(value1, refTerm1);
271 double value3 = config->square_pi2 * sqFreqTermY;
272 cuDoubleComplex value4 = cuCdiv(value2, make_cuDoubleComplex(value3, 0));
273 cuDoubleComplex value5 = cuCdiv(refTerm2, make_cuDoubleComplex(config->pi2 * freqTermY, 0));
274 cuDoubleComplex value6 = cuCsub(value4, value5);
275 refAS = cuCmul(value6, make_cuDoubleComplex(shadingFactor, 0));
280 refTerm1.y = -config->pi2 * freqTermX;
281 refTerm2.y = -config->pi2 * (freqTermX + freqTermY);
283 //refAS = shadingFactor * ((exp(refTerm1) - (Complex<Real>)1) / (4 * M_PI*M_PI*freqTermX * freqTermY) + ((Complex<Real>)1 - exp(refTerm2)) / (4 * M_PI*M_PI*freqTermY * (freqTermX + freqTermY)));
284 exponent_complex_mesh(&refTerm1);
285 cuDoubleComplex value1 = make_cuDoubleComplex(1, 0);
286 cuDoubleComplex value2 = cuCsub(refTerm1, value1);
287 double value3 = config->square_pi2 * freqTermX * freqTermY;
288 cuDoubleComplex value4 = cuCdiv(value2, make_cuDoubleComplex(value3, 0));
290 exponent_complex_mesh(&refTerm2);
291 cuDoubleComplex value5 = cuCsub(make_cuDoubleComplex(1, 0), refTerm2);
292 double value6 = config->square_pi2 * freqTermY * (freqTermX + freqTermY);
293 cuDoubleComplex value7 = cuCdiv(value5, make_cuDoubleComplex(value6, 0));
295 cuDoubleComplex value8 = cuCadd(value4, value7);
296 refAS = cuCmul(value8, make_cuDoubleComplex(shadingFactor, 0));
299 cuDoubleComplex temp;
300 if (abs(fz) <= config->tolerence)
301 temp = make_cuDoubleComplex(0, 0);
303 term2.y = config->pi2 * (flx * geom->glShift[0] + fly * geom->glShift[1] + flz * geom->glShift[2]);
305 //temp = refAS / det * exp(term1)* flz / fz * exp(term2);
307 exponent_complex_mesh(&term1);
308 exponent_complex_mesh(&term2);
310 cuDoubleComplex tmp1 = cuCdiv(refAS, make_cuDoubleComplex(det, 0));
311 cuDoubleComplex tmp2 = cuCmul(tmp1, term1);
312 cuDoubleComplex tmp3 = cuCmul(tmp2, make_cuDoubleComplex(flz, 0));
313 cuDoubleComplex tmp4 = cuCdiv(tmp3, make_cuDoubleComplex(fz, 0));
314 temp = cuCmul(tmp4, term2);
318 double absval = sqrt((temp.x * temp.x) + (temp.y * temp.y));
319 if (absval > config->min_double)
323 temp = make_cuDoubleComplex(0, 0);
326 //cuDoubleComplex addtmp = output[col + row * config->pn_X];
327 //output[col+row*config->pn_X] = cuCadd(addtmp,temp);
329 output[tid].x += temp.x;
330 output[tid].y += temp.y;
335 void cudaKernel_double_RefAS_continuous(cufftDoubleComplex* output, const MeshKernelConfig* config,
336 const geometric* geom, double av0, double av1, double av2, double carrierWaveX, double carrierWaveY, double carrierWaveZ)
338 int tid = threadIdx.x + blockIdx.x * blockDim.x;
340 if (tid < config->pn_X * config->pn_Y) {
342 int col = tid % config->pn_X;
343 int row = tid / config->pn_X;
345 double flx, fly, flz, fx, fy, fz, flxShifted, flyShifted, freqTermX, freqTermY;
347 double det = geom->loRot[0] * geom->loRot[3] - geom->loRot[1] * geom->loRot[2];
353 invLoRot[0] = a * geom->loRot[3];
354 invLoRot[1] = -a * geom->loRot[2];
355 invLoRot[2] = -a * geom->loRot[1];
356 invLoRot[3] = a * geom->loRot[0];
358 cuDoubleComplex refTerm1 = make_cuDoubleComplex(0, 0);
359 cuDoubleComplex refTerm2 = make_cuDoubleComplex(0, 0);
360 cuDoubleComplex refTerm3 = make_cuDoubleComplex(0, 0);
361 cuDoubleComplex refAS = make_cuDoubleComplex(0, 0);
362 cuDoubleComplex term1 = make_cuDoubleComplex(0, 0);
363 cuDoubleComplex term2 = make_cuDoubleComplex(0, 0);
365 term1.y = -config->pi2 / config->lambda * (
366 carrierWaveX * (geom->glRot[0] * geom->glShift[0] + geom->glRot[3] * geom->glShift[1] + geom->glRot[6] * geom->glShift[2])
367 + carrierWaveY * (geom->glRot[1] * geom->glShift[0] + geom->glRot[4] * geom->glShift[1] + geom->glRot[7] * geom->glShift[2])
368 + carrierWaveZ * (geom->glRot[2] * geom->glShift[0] + geom->glRot[5] * geom->glShift[1] + geom->glRot[8] * geom->glShift[2]));
371 // calculate frequency term =======================================================================
372 int idxFx = -config->pn_X / 2 + col;
373 int idxFy = config->pn_X / 2 - row;
374 double w = 1.0 / config->lambda;
376 fx = (double)idxFx * config->dfx;
377 fy = (double)idxFy * config->dfy;
378 fz = sqrt(w * w - fx * fx - fy * fy);
380 flx = geom->glRot[0] * fx + geom->glRot[1] * fy + geom->glRot[2] * fz;
381 fly = geom->glRot[3] * fx + geom->glRot[4] * fy + geom->glRot[5] * fz;
382 flz = sqrt(w * w - flx * flx - fly * fly);
385 flxShifted = flx - w * (geom->glRot[0] * carrierWaveX + geom->glRot[1] * carrierWaveY + geom->glRot[2] * carrierWaveZ);
386 flyShifted = fly - w * (geom->glRot[3] * carrierWaveX + geom->glRot[4] * carrierWaveY + geom->glRot[5] * carrierWaveZ);
387 freqTermX = invLoRot[0] * flxShifted + invLoRot[1] * flyShifted;
388 freqTermY = invLoRot[2] * flxShifted + invLoRot[3] * flyShifted;
390 double sqFreqTermX = freqTermX * freqTermX;
391 double cuFreqTermX = sqFreqTermX * freqTermX;
392 double sqFreqTermY = freqTermY * freqTermY;
393 double cuFreqTermY = sqFreqTermY * freqTermY;
395 cuDoubleComplex D1 = make_cuDoubleComplex(0, 0);
396 cuDoubleComplex D2 = make_cuDoubleComplex(0, 0);
397 cuDoubleComplex D3 = make_cuDoubleComplex(0, 0);
399 //if (freqTermX == 0.0 && freqTermY == 0.0) {
400 if (abs(freqTermX) <= config->tolerence && abs(freqTermY) <= config->tolerence) {
402 D1.x = (double)1.0 / (double)3.0;
403 D2.x = (double)1.0 / (double)5.0;
404 D3.x = (double)1.0 / (double)2.0;
406 //}else if (freqTermX == 0.0 && freqTermY != 0.0) {
408 else if (abs(freqTermX) <= config->tolerence && abs(freqTermY) > config->tolerence) {
410 refTerm1.y = -config->pi2 * freqTermY;
413 //D1 = (refTerm1 - (Real)1)*refTerm1.exp() / (8 * M_PI*M_PI*M_PI*freqTermY * freqTermY * freqTermY)
414 // - refTerm1 / (4 * M_PI*M_PI*M_PI*freqTermY * freqTermY * freqTermY);
416 cuDoubleComplex refTerm1_exp = make_cuDoubleComplex(refTerm1.x, refTerm1.y);
417 exponent_complex_mesh(&refTerm1_exp);
418 cuDoubleComplex value1 = make_cuDoubleComplex(1, 0);
419 cuDoubleComplex value2 = cuCsub(refTerm1, value1);
420 cuDoubleComplex value3 = cuCmul(value2, refTerm1_exp);
421 cuDoubleComplex value4 = cuCdiv(value3, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermY, 0));
422 cuDoubleComplex value5 = cuCdiv(refTerm1, make_cuDoubleComplex(config->square_pi2 * config->pi * cuFreqTermY, 0));
424 D1 = cuCsub(value4, value5);
426 //D2 = -(M_PI*freqTermY + refTerm2) / (4 * M_PI*M_PI*M_PI*freqTermY * freqTermY * freqTermY)*exp(refTerm1)
427 // + refTerm1 / (8 * M_PI*M_PI*M_PI*freqTermY * freqTermY * freqTermY);
428 cuDoubleComplex value6 = cuCadd(make_cuDoubleComplex(config->pi * freqTermY, 0), refTerm2);
429 cuDoubleComplex value7 = cuCmul(make_cuDoubleComplex(-1, 0), value6);
430 cuDoubleComplex value8 = cuCdiv(value7, make_cuDoubleComplex(config->square_pi2 * config->pi * cuFreqTermY, 0));
431 cuDoubleComplex value9 = cuCmul(value8, refTerm1_exp);
432 cuDoubleComplex value10 = cuCdiv(refTerm1, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermY, 0));
433 D2 = cuCadd(value9, value10);
435 //D3 = exp(refTerm1) / (2 * M_PI*freqTermY) + ((Real)1 - refTerm2) / (2 * M_PI*freqTermY);
436 cuDoubleComplex value11 = cuCdiv(refTerm1_exp, make_cuDoubleComplex(config->pi2 * freqTermY, 0));
437 cuDoubleComplex value12 = cuCsub(make_cuDoubleComplex(1, 0), refTerm2);
438 cuDoubleComplex value13 = cuCdiv(value12, make_cuDoubleComplex(config->pi2 * freqTermY, 0));
440 D3 = cuCadd(value11, value13);
442 //} else if (freqTermX != 0.0 && freqTermY == 0.0) {
444 else if (abs(freqTermX) > config->tolerence && abs(freqTermY) <= config->tolerence) {
446 refTerm1.y = config->square_pi2 * freqTermX * freqTermX;
448 refTerm3.y = config->pi2 * freqTermX;
450 //D1 = (refTerm1 + 4 * M_PI*freqTermX - (Real)2 * refTerm2) / (8 * M_PI*M_PI*M_PI*freqTermY * freqTermY * freqTermY)*exp(-refTerm3)
451 // + refTerm2 / (4 * M_PI*M_PI*M_PI*freqTermX * freqTermX * freqTermX);
453 cuDoubleComplex refTerm3_exp = make_cuDoubleComplex(refTerm3.x, refTerm3.y);
454 exponent_complex_mesh(&refTerm3_exp);
456 cuDoubleComplex value1 = cuCadd(refTerm1, make_cuDoubleComplex(4 * config->pi * freqTermX, 0));
457 cuDoubleComplex value2 = cuCmul(make_cuDoubleComplex(2, 0), refTerm2);
458 cuDoubleComplex value3 = cuCsub(value1, value2);
459 cuDoubleComplex value4 = cuCdiv(value3, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermY, 0));
460 cuDoubleComplex value5 = cuCmul(value4, refTerm3_exp);
461 cuDoubleComplex value6 = cuCdiv(refTerm2, make_cuDoubleComplex(config->square_pi2 * config->pi * cuFreqTermX, 0));
463 D1 = cuCadd(value5, value6);
465 //D2 = (Real)1 / (Real)2 * D1;
466 D2 = cuCmul(make_cuDoubleComplex(1.0 / 2.0, 0), D1);
468 //D3 = ((refTerm3 + (Real)1)*exp(-refTerm3) - (Real)1) / (4 * M_PI*M_PI*freqTermX * freqTermX);
469 cuDoubleComplex value7 = cuCadd(refTerm3, make_cuDoubleComplex(1.0, 0));
470 cuDoubleComplex value8 = cuCmul(refTerm3, make_cuDoubleComplex(-1.0, 0));
471 exponent_complex_mesh(&value8);
472 cuDoubleComplex value9 = cuCmul(value7, value8);
473 cuDoubleComplex value10 = cuCsub(value9, make_cuDoubleComplex(1.0, 0));
474 D3 = cuCdiv(value10, make_cuDoubleComplex(config->square_pi2 * sqFreqTermX, 0));
476 //} else if (freqTermX == -freqTermY) {
478 else if (abs(freqTermX + freqTermY) <= config->tolerence) {
481 refTerm2.y = config->pi2 * freqTermX;
482 refTerm3.y = config->pi2 * config->pi * freqTermX * freqTermX;
484 //D1 = (-2 * M_PI*freqTermX + refTerm1) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermX * freqTermX)*exp(-refTerm2)
485 // - (refTerm3 + refTerm1) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermX * freqTermX);
487 cuDoubleComplex value1 = cuCadd(make_cuDoubleComplex(-config->pi2 * freqTermX, 0), refTerm1);
488 cuDoubleComplex value2 = cuCdiv(value1, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermX, 0));
489 cuDoubleComplex value3 = cuCmul(refTerm2, make_cuDoubleComplex(-1.0, 0));
490 exponent_complex_mesh(&value3);
491 cuDoubleComplex value4 = cuCmul(value2, value3);
493 cuDoubleComplex value5 = cuCadd(refTerm3, refTerm1);
494 cuDoubleComplex value6 = cuCdiv(value5, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermX, 0));
496 D1 = cuCsub(value4, value6);
498 //D2 = (-refTerm1) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermX * freqTermX)*exp(-refTerm2)
499 // + (-refTerm3 + refTerm1 + 2 * M_PI*freqTermX) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermX * freqTermX);
501 cuDoubleComplex value7 = cuCmul(refTerm1, make_cuDoubleComplex(-1.0, 0));
502 cuDoubleComplex value8 = cuCdiv(value7, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermX, 0));
503 cuDoubleComplex value9 = cuCmul(value8, value3);
505 cuDoubleComplex value10 = cuCmul(refTerm3, make_cuDoubleComplex(-1.0, 0));
506 cuDoubleComplex value11 = cuCadd(value10, refTerm1);
507 cuDoubleComplex value12 = cuCadd(value11, make_cuDoubleComplex(config->pi2 * freqTermX, 0));
508 cuDoubleComplex value13 = cuCdiv(value12, make_cuDoubleComplex(config->cube_pi2 * cuFreqTermX, 0));
510 D2 = cuCadd(value9, value13);
512 //D3 = (-refTerm1) / (4 * M_PI*M_PI*freqTermX * freqTermX)*exp(-refTerm2)
513 // + (-refTerm2 + (Real)1) / (4 * M_PI*M_PI*freqTermX * freqTermX);
515 cuDoubleComplex value14 = cuCdiv(value7, make_cuDoubleComplex(config->square_pi2 * sqFreqTermX, 0));
516 cuDoubleComplex value15 = cuCmul(value14, value3);
518 cuDoubleComplex value16 = cuCmul(refTerm2, make_cuDoubleComplex(-1.0, 0));
519 cuDoubleComplex value17 = cuCadd(value16, make_cuDoubleComplex(1.0, 0));
520 cuDoubleComplex value18 = cuCdiv(value17, make_cuDoubleComplex(config->square_pi2 * sqFreqTermX, 0));
522 D3 = cuCadd(value15, value18);
527 refTerm1.y = -config->pi2 * (freqTermX + freqTermY);
529 refTerm3.y = -config->pi2 * freqTermX;
531 //D1 = exp(refTerm1)*(refTerm2 - 2 * M_PI*(freqTermX + freqTermY)) / (8 * M_PI*M_PI*M_PI*freqTermY * (freqTermX + freqTermY)*(freqTermX + freqTermY))
532 // + exp(refTerm3)*(2 * M_PI*freqTermX - refTerm2) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermX * freqTermY)
533 // + ((2 * freqTermX + freqTermY)*refTerm2) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermX * (freqTermX + freqTermY)*(freqTermX + freqTermY));
535 cuDoubleComplex refTerm1_exp = make_cuDoubleComplex(refTerm1.x, refTerm1.y);
536 exponent_complex_mesh(&refTerm1_exp);
538 double val1 = config->pi2 * (freqTermX + freqTermY);
539 cuDoubleComplex value1 = cuCsub(refTerm2, make_cuDoubleComplex(val1, 0));
540 cuDoubleComplex value2 = cuCmul(refTerm1_exp, value1);
542 double val2 = config->cube_pi2 * freqTermY * (freqTermX + freqTermY) * (freqTermX + freqTermY);
543 cuDoubleComplex value3 = cuCdiv(value2, make_cuDoubleComplex(val2, 0));
545 cuDoubleComplex refTerm3_exp = make_cuDoubleComplex(refTerm3.x, refTerm3.y);
546 exponent_complex_mesh(&refTerm3_exp);
548 double val3 = config->pi2 * freqTermX;
549 cuDoubleComplex value4 = cuCsub(make_cuDoubleComplex(val3, 0), refTerm2);
550 cuDoubleComplex value5 = cuCmul(refTerm3_exp, value4);
551 double val4 = config->cube_pi2 * sqFreqTermX * freqTermY;
552 cuDoubleComplex value6 = cuCdiv(value5, make_cuDoubleComplex(val4, 0));
554 double val5 = 2.0 * freqTermX + freqTermY;
555 cuDoubleComplex value7 = cuCmul(make_cuDoubleComplex(val5, 0), refTerm2);
556 double val6 = config->cube_pi2 * sqFreqTermX * (freqTermX + freqTermY) * (freqTermX + freqTermY);
557 cuDoubleComplex value8 = cuCdiv(value7, make_cuDoubleComplex(val6, 0));
559 cuDoubleComplex value9 = cuCadd(value3, value6);
560 D1 = cuCadd(value9, value8);
562 //D2 = exp(refTerm1)*(refTerm2*(freqTermX + 2 * freqTermY) - 2 * M_PI*freqTermY * (freqTermX + freqTermY)) / (8 * M_PI*M_PI*M_PI*freqTermY * freqTermY * (freqTermX + freqTermY)*(freqTermX + freqTermY))
563 // + exp(refTerm3)*(-refTerm2) / (8 * M_PI*M_PI*M_PI*freqTermX * freqTermY * freqTermY)
564 // + refTerm2 / (8 * M_PI*M_PI*M_PI*freqTermX * (freqTermX + freqTermY)* (freqTermX + freqTermY));
566 double val7 = freqTermX + 2.0 * freqTermY;
567 cuDoubleComplex value10 = cuCmul(refTerm2, make_cuDoubleComplex(val7, 0));
568 double val8 = config->pi2 * freqTermY * (freqTermX + freqTermY);
569 cuDoubleComplex value11 = cuCsub(value10, make_cuDoubleComplex(val8, 0));
570 cuDoubleComplex value12 = cuCmul(refTerm1_exp, value11);
571 double val9 = config->cube_pi2 * sqFreqTermY * (freqTermX + freqTermY) * (freqTermX + freqTermY);
572 cuDoubleComplex value13 = cuCdiv(value12, make_cuDoubleComplex(val9, 0));
574 cuDoubleComplex value14 = cuCmul(refTerm2, make_cuDoubleComplex(-1.0, 0));
575 cuDoubleComplex value15 = cuCmul(refTerm3_exp, value14);
576 double val10 = config->cube_pi2 * freqTermX * sqFreqTermY;
577 cuDoubleComplex value16 = cuCdiv(value15, make_cuDoubleComplex(val10, 0));
579 double val11 = config->cube_pi2 * freqTermX * (freqTermX + freqTermY) * (freqTermX + freqTermY);
580 cuDoubleComplex value17 = cuCdiv(refTerm2, make_cuDoubleComplex(val11, 0));
582 cuDoubleComplex value18 = cuCadd(value13, value16);
583 D2 = cuCadd(value18, value17);
585 //D3 = -exp(refTerm1) / (4 * M_PI*M_PI*freqTermY * (freqTermX + freqTermY))
586 // + exp(refTerm3) / (4 * M_PI*M_PI*freqTermX * freqTermY)
587 // - (Real)1 / (4 * M_PI*M_PI*freqTermX * (freqTermX + freqTermY));
589 cuDoubleComplex value19 = cuCmul(refTerm1_exp, make_cuDoubleComplex(-1.0, 0));
590 double val12 = config->square_pi2 * freqTermY * (freqTermX + freqTermY);
591 cuDoubleComplex value20 = cuCdiv(value19, make_cuDoubleComplex(val12, 0));
593 double val13 = config->square_pi2 * freqTermX * freqTermY;
594 cuDoubleComplex value21 = cuCdiv(refTerm3_exp, make_cuDoubleComplex(val13, 0));
596 double val14 = 1.0 / (config->square_pi2 * freqTermX * (freqTermX + freqTermY));
597 cuDoubleComplex value22 = make_cuDoubleComplex(val14, 0);
599 cuDoubleComplex value23 = cuCadd(value20, value21);
600 D3 = cuCsub(value23, value22);
604 //refAS = (av1 - av0)*D1 + (av2 - av1)*D2 + av0 * D3;
606 double t1 = av1 - av0;
607 double t2 = av2 - av1;
608 cuDoubleComplex value_temp1 = cuCmul(make_cuDoubleComplex(t1, 0), D1);
609 cuDoubleComplex value_temp2 = cuCmul(make_cuDoubleComplex(t2, 0), D2);
610 cuDoubleComplex value_temp3 = cuCmul(make_cuDoubleComplex(av0, 0), D3);
612 cuDoubleComplex valeF = cuCadd(value_temp1, value_temp2);
613 refAS = cuCadd(valeF, value_temp3);
615 cuDoubleComplex temp;
616 if (abs(fz) <= config->tolerence)
617 temp = make_cuDoubleComplex(0, 0);
619 term2.y = config->pi2 * (flx * geom->glShift[0] + fly * geom->glShift[1] + flz * geom->glShift[2]);
621 //temp = refAS / det * exp(term1)* flz / fz * exp(term2);
623 exponent_complex_mesh(&term1);
624 exponent_complex_mesh(&term2);
626 cuDoubleComplex tmp1 = cuCdiv(refAS, make_cuDoubleComplex(det, 0));
627 cuDoubleComplex tmp2 = cuCmul(tmp1, term1);
628 cuDoubleComplex tmp3 = cuCmul(tmp2, make_cuDoubleComplex(flz, 0));
629 cuDoubleComplex tmp4 = cuCdiv(tmp3, make_cuDoubleComplex(fz, 0));
630 temp = cuCmul(tmp4, term2);
634 double absval = sqrt((temp.x * temp.x) + (temp.y * temp.y));
635 if (absval > config->min_double)
639 temp = make_cuDoubleComplex(0, 0);
642 //cuDoubleComplex addtmp = output[col + row * config->pn_X];
643 //output[col+row*config->pn_X] = cuCadd(addtmp,temp);
645 output[tid].x += temp.x;
646 output[tid].y += temp.y;
653 const int& nBlocks, const int& nThreads, cufftDoubleComplex* output,
654 const MeshKernelConfig* config, double shading_factor, const geometric* geom,
655 double carrierWaveX, double carrierWaveY, double carrierWaveZ, CUstream_st* stream)
657 cudaKernel_double_RefAS_flat << <nBlocks, nThreads, 0, stream >> > (output, config, shading_factor,
658 geom, carrierWaveX, carrierWaveY, carrierWaveZ);
661 void cudaMesh_Continuous(
662 const int& nBlocks, const int& nThreads, cufftDoubleComplex* output,
663 const MeshKernelConfig* config, const geometric* geom, double av0, double av1, double av2,
664 double carrierWaveX, double carrierWaveY, double carrierWaveZ, CUstream_st* stream)
666 cudaKernel_double_RefAS_continuous << <nBlocks, nThreads, 0, stream >> > (output, config,
667 geom, av0, av1, av2, carrierWaveX, carrierWaveY, carrierWaveZ);
671 void call_fftGPU(int nx, int ny, cufftDoubleComplex* input, cufftDoubleComplex* output, CUstream_st* streamTriMesh)
673 cudaFFT_Mesh(streamTriMesh, nx, ny, input, output, 1);
676 void call_fftGPUf(int nx, int ny, cuFloatComplex* input, cuFloatComplex* output, CUstream_st* streamTriMesh)
678 cudaFFT_Meshf(streamTriMesh, nx, ny, input, output, 1);
682 #endif // !ophTriMeshKernel_cu__