Openholo  v4.0
Open Source Digital Holographic Library
ophSig_GPU.cpp
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
43 //
44 //M*/
45 
46 #include "ophSig.h"
47 #include "ophSig_GPU.h"
48 
49 
50 
51 static void HandleError(cudaError_t err,
52  const char* file,
53  int line) {
54  if (err != cudaSuccess) {
55  printf("%s in %s at line %d\n", cudaGetErrorString(err),
56  file, line);
57  exit(EXIT_FAILURE);
58  }
59 }
60 #define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
61 
62 
63 
64 void ophSig::cvtOffaxis_GPU(Real angleX, Real angleY)
65 {
66  int nx = context_.pixel_number[_X];
67  int ny = context_.pixel_number[_Y];
69  Complex<Real> *host_data;
70 
71  cField2Buffer(*ComplexH, &host_data, nx, ny);
72 
73  cudaStreamCreate(&streamLF);
74 
75  cuDoubleComplex* dev_src_data;
76  Real *device_angle = new Real[2];
77  Real *temp_angle = new Real[2];
78  temp_angle[0] = angleX;
79  temp_angle[1] = angleY;
80 
81  //
82  Real *dev_dst_data;
83  cuDoubleComplex* F;
84  ophSigConfig *device_config = nullptr;
85  //Malloc
86 
87  size_t nxy = sizeof(cuDoubleComplex) * nx * ny;
88 
89  cudaMalloc((void**)&dev_src_data, nxy);
90  cudaMalloc((void**)&dev_dst_data, sizeof(Real)*nx*ny);
91  cudaMalloc((void**)&F, nxy);
92  cudaMalloc((void**)&device_config, sizeof(ophSigConfig));
93  cudaMalloc((void**)&device_angle, sizeof(Real) * 2);
94 
95  //memcpy
96  cudaMemcpy(dev_src_data, host_data, nxy, cudaMemcpyHostToDevice);
97  cudaMemcpy(dev_dst_data, 0, sizeof(Real)*nx*ny, cudaMemcpyHostToDevice);
98  cudaMemcpy(F, 0, nxy, cudaMemcpyHostToDevice);
99  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
100  cudaMemcpy(device_angle, temp_angle, sizeof(Real)*2, cudaMemcpyHostToDevice);
101 
102  //start
103 
104  cudaCvtOFF(dev_src_data, dev_dst_data, device_config, nx, ny,wl, F, device_angle);
105  //end
106  cudaMemcpy(host_data, dev_src_data, nxy, cudaMemcpyDeviceToHost);
107  ivec2 size(nx, ny);
108  Buffer2Field(host_data, *ComplexH, size);
109 
110  cudaFree(dev_src_data);
111  cudaFree(dev_dst_data);
112  cudaFree(F);
113  cudaFree(device_config);
114  cudaFree(device_angle);
115 
116  delete[] host_data;
117 }
118 
120 {
121  int nx = context_.pixel_number[_X];
122  int ny = context_.pixel_number[_Y];
123  Complex<Real> *host_data;
124  cufftDoubleComplex *fft_temp_data,*out_data, *F;
125  cufftHandle fftplan;
126  ophSigConfig *device_config = nullptr;
127  if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS)
128  {
129  LOG("FAIL in creating cufft plan");
130  return false;
131  };
132  if (!streamLF)
133  cudaStreamCreate(&streamLF);
134 
135  cField2Buffer(*ComplexH, &host_data, nx, ny);
136 
137  size_t nxy = sizeof(cufftDoubleComplex)* nx* ny;
138 
139  cudaMalloc((void**)&fft_temp_data, nxy);
140  cudaMalloc((void**)&out_data, nxy);
141  cudaMalloc((void**)&F, nxy);
142  cudaMalloc((void**)&device_config, sizeof(ophSigConfig));
143 
144  cudaMemcpy(fft_temp_data, host_data, nxy, cudaMemcpyHostToDevice);
145  cudaMemcpy(out_data, 0, nxy, cudaMemcpyHostToDevice);
146  cudaMemcpy(F, 0, nxy, cudaMemcpyHostToDevice);
147  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
148 
149  cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD);
150 
151  // µ¥ÀÌÅÍ °è»ê Real wl = *context_.wave_length; Real NA = _cfgSig.NA; Real_t NA_g = NA * redRate; Real Rephase = -(1 / (4 * M_PI)*pow((wl / NA_g), 2)); Real Imphase = ((1 / (4 * M_PI))*depth*wl); cudaCvtHPO(streamLF,out_data,fft_temp_data,device_config,F,nx, ny,Rephase,Imphase); cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE); cudaMemcpy(host_data, fft_temp_data, nxy, cudaMemcpyDeviceToHost); ivec2 size(nx, ny); Buffer2Field(host_data, *ComplexH, size); // cudaFree(F); cudaFree(device_config); cudaFree(out_data); cudaFree(fft_temp_data); cufftDestroy(fftplan); delete[] host_data; return true; } bool ophSig::sigConvertCAC_GPU(double red, double green, double blue) { int nx = context_.pixel_number[_X]; int ny = context_.pixel_number[_Y]; Complex<Real>* host_data; cufftDoubleComplex *fft_temp_data, *out_data, *F; cufftHandle fftplan; ophSigConfig *device_config = nullptr; Real radius = _radius; if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS) { LOG("FAIL in creating cufft plan"); return false; }; size_t nxy = sizeof(cufftDoubleComplex) * nx * ny; ColorField2Buffer(ComplexH[0], &host_data, nx, ny); cudaMalloc((void**)&fft_temp_data, nxy); cudaMalloc((void**)&out_data, nxy); cudaMalloc((void**)&F, nxy); cudaMalloc((void**)&device_config, sizeof(ophSigConfig)); cudaMemcpy(F, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice); cudaMemcpy(fft_temp_data, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(out_data, 0, nxy, cudaMemcpyHostToDevice); //blue cudaMemcpy(out_data, host_data, nxy, cudaMemcpyHostToDevice); //cudaCvtFieldToCuFFT(temp_data, out_data, nx, ny); cudaCuFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_FORWARD); double sigmaf = ((_foc[2] - _foc[0]) * blue) / (4 * M_PI); cudaCvtCAC(fft_temp_data, out_data,F,device_config,nx, ny,sigmaf,radius); cudaCuIFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_INVERSE); //cudaCvtCuFFTToField(out_data, temp_data, nx, ny); cudaMemcpy(host_data, out_data, nxy, cudaMemcpyDeviceToHost); ivec2 size(nx, ny); Buffer2Field(host_data, ComplexH[0], size); // green ColorField2Buffer(ComplexH[1], &host_data, nx, ny); cudaMemcpy(out_data, host_data, nxy, cudaMemcpyHostToDevice); //cudaCvtFieldToCuFFT(temp_data, out_data, nx, ny); cudaCuFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_FORWARD); sigmaf = ((_foc[2] - _foc[1]) * green) / (4 * M_PI); cudaCvtCAC(fft_temp_data, out_data, F, device_config, nx, ny, sigmaf, radius); cudaCuIFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_INVERSE); //cudaCvtCuFFTToField(out_data, temp_data, nx, ny); cudaMemcpy(host_data, out_data, nxy, cudaMemcpyDeviceToHost); Buffer2Field(host_data, ComplexH[1], size); //free cudaFree(F); cudaFree(device_config); //cudaFree(temp_data); cudaFree(out_data); cudaFree(fft_temp_data); cufftDestroy(fftplan); delete[] host_data; return true; } bool ophSig::propagationHolo_GPU(float depth) { int nx = context_.pixel_number[_X]; int ny = context_.pixel_number[_Y]; Complex<Real> *host_data; cufftDoubleComplex *fft_temp_data, *out_data, *F; cufftHandle fftplan; ophSigConfig *device_config = nullptr; if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS) { LOG("FAIL in creating cufft plan"); return false; }; cField2Buffer(*ComplexH, &host_data, nx, ny); size_t nxy = sizeof(cufftDoubleComplex) * nx * ny; cudaMalloc((void**)&fft_temp_data, nxy); cudaMalloc((void**)&out_data, nxy); cudaMalloc((void**)&F, nxy); cudaMalloc((void**)&device_config, sizeof(ophSigConfig)); cudaMemcpy(fft_temp_data, host_data, nxy, cudaMemcpyHostToDevice); cudaMemcpy(out_data, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(F, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice); //cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny); cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD); Real wl = *context_.wave_length; Real_t sigmaf = (depth*wl) / (4 * M_PI); cudaPropagation(out_data, fft_temp_data, F, device_config, nx, ny, sigmaf); cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE); //cudaCvtCuFFTToField(fft_temp_data, temp_data, nx, ny); cudaMemcpy(host_data, fft_temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost); ivec2 size(nx, ny); Buffer2Field(host_data, *ComplexH, size); // cudaFree(F); cudaFree(device_config); cudaFree(out_data); cudaFree(fft_temp_data); cufftDestroy(fftplan); delete[] host_data; return true; } bool ophSig::Color_propagationHolo_GPU(float depth) { int nx = context_.pixel_number[_X]; int ny = context_.pixel_number[_Y]; Complex<Real> *host_data; cufftDoubleComplex *fft_temp_data, *out_data, *F; cufftHandle fftplan; ophSigConfig *device_config = nullptr; if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS) { LOG("FAIL in creating cufft plan"); return false; }; cField2Buffer(*ComplexH, &host_data, nx, ny); size_t nxy = sizeof(cufftDoubleComplex) * nx * ny; cudaMalloc((void**)&fft_temp_data, nxy); cudaMalloc((void**)&out_data, nxy); cudaMalloc((void**)&F, nxy); cudaMalloc((void**)&device_config, sizeof(ophSigConfig)); cudaMemcpy(fft_temp_data, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(out_data, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(F, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice); Real wl = 0; Real_t sigmaf = 0; /////////////// wl = 0.000000473; sigmaf = (depth*wl) / (4 * M_PI); ColorField2Buffer(ComplexH[0], &host_data, nx, ny); cudaMemcpy(fft_temp_data, host_data, nxy, cudaMemcpyHostToDevice); //cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny); cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD); cudaPropagation(out_data, fft_temp_data, F, device_config, nx, ny, sigmaf); cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE); //cudaCvtCuFFTToField(out_data, temp_data, nx, ny); cudaMemcpy(host_data, fft_temp_data, nxy, cudaMemcpyDeviceToHost); ivec2 size(nx, ny); Buffer2Field(host_data, ComplexH[0], size); // wl = 0.000000532; sigmaf = (depth*wl) / (4 * M_PI); ColorField2Buffer(ComplexH[1], &host_data, nx, ny); cudaMemcpy(fft_temp_data, host_data, nxy, cudaMemcpyHostToDevice); //cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny); cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD); cudaPropagation(out_data, fft_temp_data, F, device_config, nx, ny, sigmaf); cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE); //cudaCvtCuFFTToField(out_data, temp_data, nx, ny); cudaMemcpy(host_data, fft_temp_data, nxy, cudaMemcpyDeviceToHost); Buffer2Field(host_data, ComplexH[1], size); // wl = 0.000000633; sigmaf = (depth*wl) / (4 * M_PI); ColorField2Buffer(ComplexH[2], &host_data, nx, ny); cudaMemcpy(fft_temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice); //cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny); cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD); cudaPropagation(out_data, fft_temp_data, F, device_config, nx, ny, sigmaf); cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE); //cudaCvtCuFFTToField(out_data, temp_data, nx, ny); cudaMemcpy(host_data, fft_temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost); Buffer2Field(host_data, ComplexH[2], size); // cudaFree(F); cudaFree(device_config); cudaFree(out_data); cudaFree(fft_temp_data); cufftDestroy(fftplan); delete[] host_data; return true; } double ophSig::sigGetParamSF_GPU(float zMax, float zMin, int sampN, float th) { int nx = context_.pixel_number[_X]; int ny = context_.pixel_number[_Y]; Complex<Real> *host_data; cufftDoubleComplex *fft_temp_data, *out_data, *Ftemp_data, *FH; cufftHandle fftplan; //cufftResult a; Real wl = *context_.wave_length; Real depth; Real *f; ophSigConfig *device_config = nullptr; size_t nxy = sizeof(cufftDoubleComplex) * nx * ny; cudaMalloc((void**)&f, sizeof(Real)*nx*ny); cudaMalloc((void**)&FH, nxy); cudaMalloc((void**)&device_config, sizeof(ophSigConfig)); cudaMalloc((void**)&fft_temp_data, nxy); cudaMalloc((void**)&Ftemp_data, nxy); cudaMalloc((void**)&out_data, nxy); if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS) { LOG("FAIL in creating cufft plan"); return false; }; cField2Buffer(*ComplexH, &host_data, nx, ny); cudaMemcpy(fft_temp_data, host_data, nxy, cudaMemcpyHostToDevice); cudaMemcpy(out_data, 0, nxy, cudaMemcpyHostToDevice); //cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny); cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD); cudaMemcpy(FH, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(f, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice); depth = cudaGetParamSF(&fftplan, out_data, Ftemp_data, fft_temp_data, f, FH, device_config, nx, ny, zMax, zMin, sampN, th, wl); /*cudaCvtCuFFTToField(out_data, temp_data, nx, ny); cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost); ivec2 size(nx, ny);*/ //Buffer2Field(host_data, *ComplexH, size); // cudaFree(FH); cudaFree(device_config); cudaFree(Ftemp_data); cudaFree(out_data); cudaFree(fft_temp_data); cudaFree(f); cufftDestroy(fftplan); delete[] host_data; return depth; } double ophSig::sigGetParamAT_GPU() { Real index; int nx = context_.pixel_number[_X]; int ny = context_.pixel_number[_Y]; int tid = 0; ivec2 size(nx, ny); Real_t NA_g = (Real_t)0.025; Real wl = *context_.wave_length; Real max = 0; Complex<Real>* host_data; cuDoubleComplex* Flr, * Fli, * G, *tmp1, *tmp2; cufftDoubleComplex *fft_data, *out_data; OphComplexField Fo_temp(nx, ny); OphComplexField Fon, yn, Ab_yn; OphRealField Ab_yn_half; ophSigConfig *device_config = nullptr; cufftHandle fftplan; vector<Real> t, tn; //cufftResult a; //a = cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z); //cout << a << endl; if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS) { LOG("FAIL in creating cufft plan"); return 0; }; cField2Buffer(*ComplexH, &host_data, nx, ny); size_t nxy = sizeof(cufftDoubleComplex) * nx * ny; cudaMalloc((void**)&Flr, nxy); cudaMalloc((void**)&Fli, nxy); cudaMalloc((void**)&tmp1, nxy); cudaMalloc((void**)&tmp2, nxy); cudaMalloc((void**)&G, nxy); cudaMalloc((void**)&device_config, sizeof(ophSigConfig)); cudaMalloc((void**)&fft_data, nxy); cudaMalloc((void**)&out_data, nxy); cudaMemcpy(fft_data, host_data, nxy, cudaMemcpyHostToDevice); cudaMemcpy(Flr, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(Fli, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(G, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice); cudaMemcpy(out_data, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(tmp1, 0, nxy, cudaMemcpyHostToDevice); cudaMemcpy(tmp2, 0, nxy, cudaMemcpyHostToDevice); cudaGetParamAT1(fft_data, Flr, Fli, G, device_config, nx, ny, NA_g, wl); //cudaCvtFieldToCuFFT(Flr, fft_data, nx, ny); cudaCuFFT(&fftplan, Flr, tmp1, nx, ny, CUFFT_FORWARD); //cudaCvtCuFFTToField(out_data, Flr, nx, ny); //cudaCvtFieldToCuFFT(Fli, fft_data, nx, ny); cudaCuFFT(&fftplan, Fli, tmp2, nx, ny, CUFFT_FORWARD); //cudaCvtCuFFTToField(out_data, Fli, nx, ny); cudaGetParamAT2(tmp1, tmp2, G, out_data, nx, ny); cudaMemcpy(host_data, out_data, nxy, cudaMemcpyDeviceToHost); Buffer2Field(host_data, Fo_temp, size); cudaFree(Flr); cudaFree(Fli); cudaFree(device_config); cudaFree(G); cudaFree(out_data); cudaFree(fft_data); cufftDestroy(fftplan); delete[] host_data; t = linspace(0., 1., nx / 2 + 1); tn.resize(t.size()); Fon.resize(1, t.size()); for (int i = 0; i < tn.size(); i++) { tn.at(i) = pow(t.at(i), 0.5); Fon(0, i)[_RE] = Fo_temp(nx / 2 - 1, nx / 2 - 1 + i)[_RE]; Fon(0, i)[_IM] = 0; } yn.resize(1, tn.size()); linInterp(t, Fon, tn, yn); fft1(yn, yn); Ab_yn.resize(yn.size[_X], yn.size[_Y]); absMat(yn, Ab_yn); Ab_yn_half.resize(1, nx / 4 + 1); for (int i = 0; i < nx / 4 + 1; i++) { Ab_yn_half(0, i) = Ab_yn(0, nx / 4 + i - 1)[_RE]; if (i == 0) max = Ab_yn_half(0, 0); else { if (Ab_yn_half(0, i) > max) { max = Ab_yn_half(0, i); index = i; } } } index = -(((index + 1) - 120) / 10) / 140 + 0.1; return index; }
152  Real wl = *context_.wave_length;
153  Real NA = _cfgSig.NA;
154  Real_t NA_g = NA * redRate;
155  Real Rephase = -(1 / (4 * M_PI)*pow((wl / NA_g), 2));
156  Real Imphase = ((1 / (4 * M_PI))*depth*wl);
157 
158  cudaCvtHPO(streamLF,out_data,fft_temp_data,device_config,F,nx, ny,Rephase,Imphase);
159 
160  cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE);
161 
162  cudaMemcpy(host_data, fft_temp_data, nxy, cudaMemcpyDeviceToHost);
163  ivec2 size(nx, ny);
164  Buffer2Field(host_data, *ComplexH, size);
165  //
166  cudaFree(F);
167  cudaFree(device_config);
168  cudaFree(out_data);
169  cudaFree(fft_temp_data);
170  cufftDestroy(fftplan);
171 
172  delete[] host_data;
173 
174 
175  return true;
176 }
177 
178 
179 
180 
181 bool ophSig::sigConvertCAC_GPU(double red, double green, double blue)
182 {
183  int nx = context_.pixel_number[_X];
184  int ny = context_.pixel_number[_Y];
185 
186  Complex<Real>* host_data;
187  cufftDoubleComplex *fft_temp_data, *out_data, *F;
188  cufftHandle fftplan;
189  ophSigConfig *device_config = nullptr;
190  Real radius = _radius;
191 
192  if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS)
193  {
194  LOG("FAIL in creating cufft plan");
195  return false;
196  };
197 
198  size_t nxy = sizeof(cufftDoubleComplex) * nx * ny;
199 
200  ColorField2Buffer(ComplexH[0], &host_data, nx, ny);
201  cudaMalloc((void**)&fft_temp_data, nxy);
202  cudaMalloc((void**)&out_data, nxy);
203  cudaMalloc((void**)&F, nxy);
204  cudaMalloc((void**)&device_config, sizeof(ophSigConfig));
205 
206  cudaMemcpy(F, 0, nxy, cudaMemcpyHostToDevice);
207  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
208  cudaMemcpy(fft_temp_data, 0, nxy, cudaMemcpyHostToDevice);
209  cudaMemcpy(out_data, 0, nxy, cudaMemcpyHostToDevice);
210 
211  //blue
212 
213  cudaMemcpy(out_data, host_data, nxy, cudaMemcpyHostToDevice);
214  //cudaCvtFieldToCuFFT(temp_data, out_data, nx, ny);
215  cudaCuFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_FORWARD);
216 
217  double sigmaf = ((_foc[2] - _foc[0]) * blue) / (4 * M_PI);
218  cudaCvtCAC(fft_temp_data, out_data,F,device_config,nx, ny,sigmaf,radius);
219 
220  cudaCuIFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_INVERSE);
221 
222  //cudaCvtCuFFTToField(out_data, temp_data, nx, ny);
223  cudaMemcpy(host_data, out_data, nxy, cudaMemcpyDeviceToHost);
224  ivec2 size(nx, ny);
225  Buffer2Field(host_data, ComplexH[0], size);
226 
227  // green
228  ColorField2Buffer(ComplexH[1], &host_data, nx, ny);
229  cudaMemcpy(out_data, host_data, nxy, cudaMemcpyHostToDevice);
230  //cudaCvtFieldToCuFFT(temp_data, out_data, nx, ny);
231  cudaCuFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_FORWARD);
232 
233  sigmaf = ((_foc[2] - _foc[1]) * green) / (4 * M_PI);
234  cudaCvtCAC(fft_temp_data, out_data, F, device_config, nx, ny, sigmaf, radius);
235 
236  cudaCuIFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_INVERSE);
237 
238  //cudaCvtCuFFTToField(out_data, temp_data, nx, ny);
239  cudaMemcpy(host_data, out_data, nxy, cudaMemcpyDeviceToHost);
240  Buffer2Field(host_data, ComplexH[1], size);
241 
242  //free
243  cudaFree(F);
244  cudaFree(device_config);
245  //cudaFree(temp_data);
246  cudaFree(out_data);
247  cudaFree(fft_temp_data);
248  cufftDestroy(fftplan);
249 
250  delete[] host_data;
251 
252  return true;
253 }
254 
256 {
257  int nx = context_.pixel_number[_X];
258  int ny = context_.pixel_number[_Y];
259  Complex<Real> *host_data;
260  cufftDoubleComplex *fft_temp_data, *out_data, *F;
261  cufftHandle fftplan;
262 
263  ophSigConfig *device_config = nullptr;
264 
265  if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS)
266  {
267  LOG("FAIL in creating cufft plan");
268  return false;
269  };
270 
271  cField2Buffer(*ComplexH, &host_data, nx, ny);
272 
273  size_t nxy = sizeof(cufftDoubleComplex) * nx * ny;
274  cudaMalloc((void**)&fft_temp_data, nxy);
275  cudaMalloc((void**)&out_data, nxy);
276  cudaMalloc((void**)&F, nxy);
277  cudaMalloc((void**)&device_config, sizeof(ophSigConfig));
278 
279  cudaMemcpy(fft_temp_data, host_data, nxy, cudaMemcpyHostToDevice);
280  cudaMemcpy(out_data, 0, nxy, cudaMemcpyHostToDevice);
281  cudaMemcpy(F, 0, nxy, cudaMemcpyHostToDevice);
282  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
283 
284  //cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny);
285  cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD);
286 
287  Real wl = *context_.wave_length;
288  Real_t sigmaf = (depth*wl) / (4 * M_PI);
289 
290  cudaPropagation(out_data, fft_temp_data, F, device_config, nx, ny, sigmaf);
291 
292  cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE);
293 
294 
295  //cudaCvtCuFFTToField(fft_temp_data, temp_data, nx, ny);
296  cudaMemcpy(host_data, fft_temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost);
297  ivec2 size(nx, ny);
298  Buffer2Field(host_data, *ComplexH, size);
299  //
300  cudaFree(F);
301  cudaFree(device_config);
302  cudaFree(out_data);
303  cudaFree(fft_temp_data);
304  cufftDestroy(fftplan);
305 
306  delete[] host_data;
307 
308  return true;
309 }
310 
312 {
313  int nx = context_.pixel_number[_X];
314  int ny = context_.pixel_number[_Y];
315  Complex<Real> *host_data;
316  cufftDoubleComplex *fft_temp_data, *out_data, *F;
317  cufftHandle fftplan;
318 
319  ophSigConfig *device_config = nullptr;
320 
321  if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS)
322  {
323  LOG("FAIL in creating cufft plan");
324  return false;
325  };
326 
327  cField2Buffer(*ComplexH, &host_data, nx, ny);
328 
329  size_t nxy = sizeof(cufftDoubleComplex) * nx * ny;
330  cudaMalloc((void**)&fft_temp_data, nxy);
331  cudaMalloc((void**)&out_data, nxy);
332  cudaMalloc((void**)&F, nxy);
333  cudaMalloc((void**)&device_config, sizeof(ophSigConfig));
334 
335  cudaMemcpy(fft_temp_data, 0, nxy, cudaMemcpyHostToDevice);
336  cudaMemcpy(out_data, 0, nxy, cudaMemcpyHostToDevice);
337  cudaMemcpy(F, 0, nxy, cudaMemcpyHostToDevice);
338  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
339 
340  Real wl = 0;
341  Real_t sigmaf = 0;
342 
344  wl = 0.000000473;
345  sigmaf = (depth*wl) / (4 * M_PI);
346  ColorField2Buffer(ComplexH[0], &host_data, nx, ny);
347  cudaMemcpy(fft_temp_data, host_data, nxy, cudaMemcpyHostToDevice);
348  //cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny);
349  cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD);
350  cudaPropagation(out_data, fft_temp_data, F, device_config, nx, ny, sigmaf);
351  cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE);
352  //cudaCvtCuFFTToField(out_data, temp_data, nx, ny);
353  cudaMemcpy(host_data, fft_temp_data, nxy, cudaMemcpyDeviceToHost);
354  ivec2 size(nx, ny);
355  Buffer2Field(host_data, ComplexH[0], size);
356  //
357  wl = 0.000000532;
358  sigmaf = (depth*wl) / (4 * M_PI);
359  ColorField2Buffer(ComplexH[1], &host_data, nx, ny);
360  cudaMemcpy(fft_temp_data, host_data, nxy, cudaMemcpyHostToDevice);
361  //cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny);
362  cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD);
363 
364  cudaPropagation(out_data, fft_temp_data, F, device_config, nx, ny, sigmaf);
365  cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE);
366  //cudaCvtCuFFTToField(out_data, temp_data, nx, ny);
367  cudaMemcpy(host_data, fft_temp_data, nxy, cudaMemcpyDeviceToHost);
368  Buffer2Field(host_data, ComplexH[1], size);
369  //
370  wl = 0.000000633;
371  sigmaf = (depth*wl) / (4 * M_PI);
372  ColorField2Buffer(ComplexH[2], &host_data, nx, ny);
373  cudaMemcpy(fft_temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
374  //cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny);
375  cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD);
376  cudaPropagation(out_data, fft_temp_data, F, device_config, nx, ny, sigmaf);
377  cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE);
378  //cudaCvtCuFFTToField(out_data, temp_data, nx, ny);
379  cudaMemcpy(host_data, fft_temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost);
380  Buffer2Field(host_data, ComplexH[2], size);
381  //
382  cudaFree(F);
383  cudaFree(device_config);
384  cudaFree(out_data);
385  cudaFree(fft_temp_data);
386  cufftDestroy(fftplan);
387 
388  delete[] host_data;
389 
390  return true;
391 }
392 
393 double ophSig::sigGetParamSF_GPU(float zMax, float zMin, int sampN, float th)
394 {
395  int nx = context_.pixel_number[_X];
396  int ny = context_.pixel_number[_Y];
397 
398  Complex<Real> *host_data;
399  cufftDoubleComplex *fft_temp_data, *out_data, *Ftemp_data, *FH;
400  cufftHandle fftplan;
401  //cufftResult a;
402  Real wl = *context_.wave_length;
403  Real depth;
404  Real *f;
405  ophSigConfig *device_config = nullptr;
406 
407  size_t nxy = sizeof(cufftDoubleComplex) * nx * ny;
408 
409  cudaMalloc((void**)&f, sizeof(Real)*nx*ny);
410  cudaMalloc((void**)&FH, nxy);
411  cudaMalloc((void**)&device_config, sizeof(ophSigConfig));
412  cudaMalloc((void**)&fft_temp_data, nxy);
413  cudaMalloc((void**)&Ftemp_data, nxy);
414  cudaMalloc((void**)&out_data, nxy);
415 
416  if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS)
417  {
418  LOG("FAIL in creating cufft plan");
419  return false;
420  };
421 
422  cField2Buffer(*ComplexH, &host_data, nx, ny);
423 
424  cudaMemcpy(fft_temp_data, host_data, nxy, cudaMemcpyHostToDevice);
425  cudaMemcpy(out_data, 0, nxy, cudaMemcpyHostToDevice);
426 
427 
428  //cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny);
429  cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD);
430 
431  cudaMemcpy(FH, 0, nxy, cudaMemcpyHostToDevice);
432  cudaMemcpy(f, 0, nxy, cudaMemcpyHostToDevice);
433  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
434 
435  depth = cudaGetParamSF(&fftplan, out_data, Ftemp_data, fft_temp_data, f, FH, device_config, nx, ny, zMax, zMin, sampN, th, wl);
436 
437 
438  /*cudaCvtCuFFTToField(out_data, temp_data, nx, ny);
439  cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost);
440  ivec2 size(nx, ny);*/
441  //Buffer2Field(host_data, *ComplexH, size);
442  //
443  cudaFree(FH);
444  cudaFree(device_config);
445  cudaFree(Ftemp_data);
446  cudaFree(out_data);
447  cudaFree(fft_temp_data);
448  cudaFree(f);
449  cufftDestroy(fftplan);
450 
451  delete[] host_data;
452 
453  return depth;
454 }
455 
457 {
458 
459  Real index;
460  int nx = context_.pixel_number[_X];
461  int ny = context_.pixel_number[_Y];
462  int tid = 0;
463  ivec2 size(nx, ny);
464  Real_t NA_g = (Real_t)0.025;
465  Real wl = *context_.wave_length;
466  Real max = 0;
467  Complex<Real>* host_data;
468  cuDoubleComplex* Flr, * Fli, * G, *tmp1, *tmp2;
469  cufftDoubleComplex *fft_data, *out_data;
470 
471  OphComplexField Fo_temp(nx, ny);
472  OphComplexField Fon, yn, Ab_yn;
473  OphRealField Ab_yn_half;
474  ophSigConfig *device_config = nullptr;
475  cufftHandle fftplan;
476  vector<Real> t, tn;
477 
478  //cufftResult a;
479  //a = cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z);
480  //cout << a << endl;
481  if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS)
482  {
483  LOG("FAIL in creating cufft plan");
484  return 0;
485  };
486 
487  cField2Buffer(*ComplexH, &host_data, nx, ny);
488 
489  size_t nxy = sizeof(cufftDoubleComplex) * nx * ny;
490  cudaMalloc((void**)&Flr, nxy);
491  cudaMalloc((void**)&Fli, nxy);
492  cudaMalloc((void**)&tmp1, nxy);
493  cudaMalloc((void**)&tmp2, nxy);
494  cudaMalloc((void**)&G, nxy);
495  cudaMalloc((void**)&device_config, sizeof(ophSigConfig));
496 
497  cudaMalloc((void**)&fft_data, nxy);
498  cudaMalloc((void**)&out_data, nxy);
499 
500 
501  cudaMemcpy(fft_data, host_data, nxy, cudaMemcpyHostToDevice);
502  cudaMemcpy(Flr, 0, nxy, cudaMemcpyHostToDevice);
503  cudaMemcpy(Fli, 0, nxy, cudaMemcpyHostToDevice);
504  cudaMemcpy(G, 0, nxy, cudaMemcpyHostToDevice);
505  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
506 
507  cudaMemcpy(out_data, 0, nxy, cudaMemcpyHostToDevice);
508  cudaMemcpy(tmp1, 0, nxy, cudaMemcpyHostToDevice);
509  cudaMemcpy(tmp2, 0, nxy, cudaMemcpyHostToDevice);
510 
511  cudaGetParamAT1(fft_data, Flr, Fli, G, device_config, nx, ny, NA_g, wl);
512 
513  //cudaCvtFieldToCuFFT(Flr, fft_data, nx, ny);
514  cudaCuFFT(&fftplan, Flr, tmp1, nx, ny, CUFFT_FORWARD);
515  //cudaCvtCuFFTToField(out_data, Flr, nx, ny);
516 
517  //cudaCvtFieldToCuFFT(Fli, fft_data, nx, ny);
518  cudaCuFFT(&fftplan, Fli, tmp2, nx, ny, CUFFT_FORWARD);
519  //cudaCvtCuFFTToField(out_data, Fli, nx, ny);
520 
521 
522  cudaGetParamAT2(tmp1, tmp2, G, out_data, nx, ny);
523 
524  cudaMemcpy(host_data, out_data, nxy, cudaMemcpyDeviceToHost);
525  Buffer2Field(host_data, Fo_temp, size);
526 
527  cudaFree(Flr);
528  cudaFree(Fli);
529  cudaFree(device_config);
530  cudaFree(G);
531  cudaFree(out_data);
532  cudaFree(fft_data);
533  cufftDestroy(fftplan);
534 
535  delete[] host_data;
536 
537  t = linspace(0., 1., nx / 2 + 1);
538  tn.resize(t.size());
539  Fon.resize(1, t.size());
540 
541  for (int i = 0; i < tn.size(); i++)
542  {
543  tn.at(i) = pow(t.at(i), 0.5);
544  Fon(0, i)[_RE] = Fo_temp(nx / 2 - 1, nx / 2 - 1 + i)[_RE];
545  Fon(0, i)[_IM] = 0;
546  }
547 
548  yn.resize(1, tn.size());
549  linInterp(t, Fon, tn, yn);
550  fft1(yn, yn);
551  Ab_yn.resize(yn.size[_X], yn.size[_Y]);
552  absMat(yn, Ab_yn);
553  Ab_yn_half.resize(1, nx / 4 + 1);
554 
555  for (int i = 0; i < nx / 4 + 1; i++)
556  {
557  Ab_yn_half(0, i) = Ab_yn(0, nx / 4 + i - 1)[_RE];
558  if (i == 0) max = Ab_yn_half(0, 0);
559  else
560  {
561  if (Ab_yn_half(0, i) > max)
562  {
563  max = Ab_yn_half(0, i);
564  index = i;
565  }
566  }
567  }
568 
569  index = -(((index + 1) - 120) / 10) / 140 + 0.1;
570 
571  return index;
572 
573 
574 }
575 
void cField2Buffer(matrix< Complex< Real >> &src, Complex< Real > **dst, int nx, int ny)
Function for move data from matrix<Complex<Real>> to Complex<Real>
Definition: ophSig.cpp:56
Real_t _radius
Definition: ophSig.h:491
Real * wave_length
Definition: Openholo.h:70
void ColorField2Buffer(matrix< Complex< Real >> &src, Complex< Real > **dst, int nx, int ny)
Function for move Color data from matrix<Complex<Real>> to Complex<Real>
Definition: ophSig.cpp:71
double sigGetParamAT_GPU()
Extraction of distance parameter using axis transfomation by using GPU.
Definition: ophSig_GPU.cpp:456
float Real
Definition: typedef.h:55
void fft1(matrix< Complex< T >> &src, matrix< Complex< T >> &dst, int sign=OPH_FORWARD, uint flag=OPH_ESTIMATE)
Function for Fast Fourier transform 1D.
Definition: ophSig.cpp:113
void cudaPropagation(cufftDoubleComplex *src_data, cufftDoubleComplex *dst_data, cuDoubleComplex *FH, ophSigConfig *device_config, int nx, int ny, Real sigmaf)
bool sigConvertHPO_GPU(Real depth, Real_t redRate)
Function for convert complex hologram to horizontal parallax only hologram by using GPU...
Definition: ophSig_GPU.cpp:119
structure for 2-dimensional integer vector and its arithmetic.
Definition: ivec.h:66
matrix< T > & resize(int x, int y)
Definition: mat.h:117
void cudaCvtCAC(cufftDoubleComplex *src_data, cufftDoubleComplex *dst_data, cuDoubleComplex *FFZP, ophSigConfig *device_config, int nx, int ny, Real sigmaf, Real radius)
void cudaCvtHPO(CUstream_st *stream, cufftDoubleComplex *src_data, cufftDoubleComplex *dst_data, ophSigConfig *device_config, cuDoubleComplex *F, int nx, int ny, Real Rephase, Real Imphase)
cudaStream_t streamLF
Definition: ophSig_GPU.h:57
Real_t _foc[3]
Definition: ophSig.h:492
#define _Y
Definition: define.h:96
#define _IM
Definition: complex.h:58
void cudaCuIFFT(cufftHandle *plan, cufftDoubleComplex *src_data, cufftDoubleComplex *dst_data, int nx, int ny, int direction)
ophSigConfig _cfgSig
Definition: ophSig.h:486
void cudaCvtOFF(cuDoubleComplex *src_data, Real *dst_data, ophSigConfig *device_config, int nx, int ny, Real wl, cuDoubleComplex *F, Real *angle)
#define _X
Definition: define.h:92
bool Color_propagationHolo_GPU(float depth)
Definition: ophSig_GPU.cpp:311
Real_t NA
Definition: ophSig.h:73
OphComplexField * ComplexH
Definition: ophSig.h:487
bool sigConvertCAC_GPU(double red, double green, double blue)
Function for Chromatic aberration compensation filter by using GPU.
Definition: ophSig_GPU.cpp:181
ivec2 size
Definition: mat.h:71
#define _RE
Definition: complex.h:55
double cudaGetParamSF(cufftHandle *fftplan, cufftDoubleComplex *src_data, cufftDoubleComplex *temp_data, cufftDoubleComplex *dst_data, Real *f, cuDoubleComplex *FH, ophSigConfig *device_config, int nx, int ny, float zMax, float zMin, int sampN, float th, Real wl)
double Real_t
Definition: typedef.h:56
void absMat(matrix< Complex< T >> &src, matrix< T > &dst)
Function for extracts Complex absolute value.
Definition: ophSig.h:657
ivec2 pixel_number
Definition: Openholo.h:63
bool propagationHolo_GPU(float depth)
Function for propagation hologram by using GPU.
Definition: ophSig_GPU.cpp:255
void linInterp(vector< T > &X, matrix< Complex< T >> &src, vector< T > &Xq, matrix< Complex< T >> &dst)
Linear interpolation.
Definition: ophSig.cpp:90
void cudaCuFFT(cufftHandle *plan, cufftDoubleComplex *src_data, cufftDoubleComplex *dst_data, int nx, int ny, int direction)
void cudaGetParamAT2(cuDoubleComplex *Flr, cuDoubleComplex *Fli, cuDoubleComplex *G, cuDoubleComplex *temp_data, int nx, int ny)
void Buffer2Field(const T *src, matrix< T > &dst, const ivec2 buffer_size)
Definition: function.h:350
double sigGetParamSF_GPU(float zMax, float zMin, int sampN, float th)
Extraction of distance parameter using sharpness functions by using GPU.
Definition: ophSig_GPU.cpp:393
OphConfig context_
Definition: Openholo.h:449
void cvtOffaxis_GPU(Real angleX, Real angleY)
Definition: ophSig_GPU.cpp:64
vector< T > linspace(T first, T last, int len)
Generate linearly spaced vector.
Definition: ophSig.h:645
#define M_PI
Definition: define.h:52
void cudaGetParamAT1(cuDoubleComplex *src_data, cuDoubleComplex *Flr, cuDoubleComplex *Fli, cuDoubleComplex *G, ophSigConfig *device_config, int nx, int ny, Real_t NA_g, Real wl)