Openholo  v5.0
Open Source Digital Holographic Library
ophLightField_GPU.cpp
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
43 //
44 //M*/
45 
46 #include "ophLightField_GPU.h"
47 #include "cudaWrapper.h"
48 #include "sys.h"
49 
51 {
52  auto begin = CUR_TIME;
53 
54  cudaWrapper *pCudaWrapper = cudaWrapper::getInstance();
55  const uint pnX = context_.pixel_number[_X];
56  const uint pnY = context_.pixel_number[_Y];
57  const long long int pnXY = pnX * pnY;
58  const Real ppX = context_.pixel_pitch[_X];
59  const Real ppY = context_.pixel_pitch[_Y];
60  const int nX = num_image[_X];
61  const int nY = num_image[_Y];
62  const int N = nX * nY;
63  const int rX = resolution_image[_X];
64  const int rY = resolution_image[_Y];
65  const long long int R = rX * rY;
66  const long long int NR = N * R;
67  const Real distance = distanceRS2Holo;
68  const uint nWave = context_.waveNum;
69  bool bRandomPhase = GetRandomPhase();
70 
71  // device
72  uchar1** device_LF = nullptr;
73  uchar** device_LFData = nullptr;
74  cufftDoubleComplex* device_FFT_src = nullptr;
75  cufftDoubleComplex* device_FFT_dst = nullptr;
76  cufftDoubleComplex *device_dst = nullptr;
77  cufftDoubleComplex *device_FFT_tmp = nullptr;
78  cufftDoubleComplex *device_FFT_tmp2 = nullptr;
79  cufftDoubleComplex *device_FFT_tmp3 = nullptr;
80  LFGpuConst* device_config = nullptr;
81 
82  Complex<Real>* host_FFT_tmp = new Complex<Real>[pnXY];
83  auto step = CUR_TIME;
84  LOG("%s (Memory Allocation) : ", __FUNCTION__);
85 
86  HANDLE_ERROR(cudaMalloc((void **)& device_LF, sizeof(uchar1*) * N));
87  device_LFData = new uchar*[N];
88 
89  // LF Image to GPU Memory
90  for (int i = 0; i < N; i++)
91  {
92  int size = m_vecImgSize[i];
93  HANDLE_ERROR(cudaMalloc((void**)&device_LFData[i], sizeof(uchar1) * size));
94  HANDLE_ERROR(cudaMemcpy(device_LFData[i], m_vecImages[i], sizeof(uchar) * size, cudaMemcpyHostToDevice));
95  }
96  HANDLE_ERROR(cudaMemcpy(device_LF, device_LFData, sizeof(uchar*) * N, cudaMemcpyHostToDevice));
97 
98  HANDLE_ERROR(cudaMalloc((void**)&device_config, sizeof(LFGpuConst)));
99  HANDLE_ERROR(cudaMalloc((void**)&device_FFT_src, sizeof(cufftDoubleComplex) * NR));
100  HANDLE_ERROR(cudaMalloc((void**)&device_FFT_dst, sizeof(cufftDoubleComplex) * NR));
101  HANDLE_ERROR(cudaMalloc((void **)&device_dst, sizeof(cufftDoubleComplex) * NR));
102 
103  HANDLE_ERROR(cudaMemset(device_FFT_src, 0, sizeof(cufftDoubleComplex) * NR));// , streamLF));
104  HANDLE_ERROR(cudaMemset(device_FFT_dst, 0, sizeof(cufftDoubleComplex) * NR));//, streamLF));
105 
106  HANDLE_ERROR(cudaMalloc((void**)&device_FFT_tmp, sizeof(cufftDoubleComplex) * pnXY));
107  HANDLE_ERROR(cudaMalloc((void**)&device_FFT_tmp2, sizeof(cufftDoubleComplex) * pnXY * 4));
108  HANDLE_ERROR(cudaMalloc((void**)&device_FFT_tmp3, sizeof(cufftDoubleComplex) * pnXY * 4));
109 
110  LOG("%lf (s)\n", ELAPSED_TIME(step, CUR_TIME));
111 
112  int nThreads = pCudaWrapper->getMaxThreads(0);
113  int nBlocks = (R + nThreads - 1) / nThreads;
114  int nBlocks2 = (NR + nThreads - 1) / nThreads;
115  int nBlocks3 = (NR * 4 + nThreads - 1) / nThreads;
116  int nBlocks4 = (N + nThreads - 1) / nThreads;
117 
118  Real pi2 = M_PI * 2;
119  for (uint ch = 0; ch < nWave; ch++)
120  {
121  HANDLE_ERROR(cudaMemset(device_dst, 0, sizeof(cuDoubleComplex) * NR));//, streamLF));
122  HANDLE_ERROR(cudaMemset(device_FFT_tmp, 0, sizeof(cuDoubleComplex) * pnXY));//, streamLF));
123  HANDLE_ERROR(cudaMemset(device_FFT_tmp2, 0, sizeof(cuDoubleComplex) * pnXY * 4));//, streamLF));
124  HANDLE_ERROR(cudaMemset(device_FFT_tmp3, 0, sizeof(cuDoubleComplex) * pnXY * 4));//, streamLF));
125 
126  Real lambda = context_.wave_length[ch];
127 
128  LFGpuConst* host_config = new LFGpuConst(
129  nWave, nWave - 1 - ch, pnX, pnY, ppX, ppY, nX, nY, rX, rY, distance, pi2 / lambda, lambda, bRandomPhase
130  );
131 
132  HANDLE_ERROR(cudaMemcpy(device_config, host_config, sizeof(LFGpuConst), cudaMemcpyHostToDevice));
133 
134  cudaConvertLF2ComplexField_Kernel(0, nBlocks, nThreads, device_config, device_LF, device_FFT_src);
135 
136  //char fname[FILENAME_MAX] = { 0, };
137  //sprintf(fname, "d:\\lf_data_gpu_%d.dat", ch);
138  //FILE* fp = fopen(fname, "wb");
139  //if (fp != nullptr)
140  //{
141  // cufftDoubleComplex* host = new cufftDoubleComplex[NR];
142  // HANDLE_ERROR(cudaMemcpy(host, device_FFT_src, sizeof(cufftDoubleComplex) * NR, cudaMemcpyDeviceToHost));
143  // LOG("wrote: %llu\n", fwrite(host, sizeof(cufftDoubleComplex), NR, fp));
144  // delete[] host;
145  // fclose(fp);
146  //}
147 
148  // 20200824_mwnam_
149  cudaError error = cudaGetLastError();
150  if (error != cudaSuccess) {
151  LOG("cudaGetLastError(): %s\n", cudaGetErrorName(error));
152  if (error == cudaErrorLaunchOutOfResources) {
153  ch--;
154  nThreads /= 2;
155  nBlocks = (R + nThreads - 1) / nThreads;
156  nBlocks2 = (NR + nThreads - 1) / nThreads;
157  nBlocks3 = (NR * 4 + nThreads - 1) / nThreads;
158  nBlocks4 = (N * 4 + nThreads - 1) / nThreads;
159  delete host_config;
160  continue;
161  }
162  }
163 
164 
165  cufftHandle plan;
166  cufftResult result;
167  // fft
168  result = cufftPlan2d(&plan, nY, nX, CUFFT_Z2Z);
169  if (result != CUFFT_SUCCESS)
170  {
171  LOG("<FAILED> cufftPlan2d (%d)\n", result);
172  return;
173  };
174 
175  cufftDoubleComplex* in, *out;
176  for (int r = 0; r < R; r++)
177  {
178  int offset = N * r;
179  in = &device_FFT_src[offset];
180  out = &device_FFT_dst[offset];
181  cudaFFT_LF(&plan, 0, nBlocks4, nThreads, nX, nY, in, out, -1);
182  }
183  if (cudaDeviceSynchronize() != cudaSuccess)
184  LOG("<FAILED> Synchronize\n");
185 
186  cufftDestroy(plan);
187  procMultiplyPhase(0, nBlocks, nThreads, device_config, device_FFT_dst, device_FFT_tmp);
188  cudaFresnelPropagationLF(nBlocks2, nBlocks3, nThreads, pnX, pnY, device_FFT_tmp, device_FFT_tmp2, device_FFT_tmp3, device_dst, device_config);
189 
190  // this problem
191  HANDLE_ERROR(cudaMemcpy(complex_H[ch], device_dst, sizeof(cuDoubleComplex) * pnXY, cudaMemcpyDeviceToHost));
192 
193  delete host_config;
194  }
195 
196  delete[] host_FFT_tmp;
197  cudaFree(device_LF);
198  for (int i = 0; i < N; i++)
199  cudaFree(device_LFData[i]);
200  delete[] device_LFData;
201 
202  cudaFree(device_config);
203  cudaFree(device_FFT_src);
204  cudaFree(device_FFT_dst);
205  cudaFree(device_FFT_tmp);
206  cudaFree(device_FFT_tmp2);
207  cudaFree(device_FFT_tmp3);
208  cudaFree(device_dst);
209  LOG("%s : %.5lf (sec)\n", __FUNCTION__, ELAPSED_TIME(begin, CUR_TIME));
210 }
Real * wave_length
Definition: Openholo.h:106
unsigned char uchar
Definition: typedef.h:64
#define HANDLE_ERROR(err)
Definition: cudaWrapper.cpp:13
static cudaWrapper * getInstance()
Definition: cudaWrapper.h:50
float Real
Definition: typedef.h:55
void cudaConvertLF2ComplexField_Kernel(CUstream_st *stream, const int &nBlocks, const int &nThreads, const LFGpuConst *config, uchar1 **LF, cufftDoubleComplex *output)
#define CUR_TIME
Definition: function.h:58
void convertLF2ComplexField_GPU()
bool GetRandomPhase()
Function for getting the random phase.
Definition: ophGen.h:528
#define _Y
Definition: define.h:96
vec2 pixel_pitch
Definition: Openholo.h:101
#define _X
Definition: define.h:92
void cudaFFT_LF(cufftHandle *plan, CUstream_st *stream, const int &nBlocks, const int &nThreads, const int &nx, const int &ny, cufftDoubleComplex *in_field, cufftDoubleComplex *output_field, const int &direction)
struct KernelConst LFGpuConst
int getMaxThreads(int idx)
Definition: cudaWrapper.h:70
void cudaFresnelPropagationLF(const int &nBlocks, const int &nBlocks2, const int &nThreads, const int &nx, const int &ny, cufftDoubleComplex *src, cufftDoubleComplex *tmp, cufftDoubleComplex *tmp2, cufftDoubleComplex *dst, const LFGpuConst *cuda_config)
#define ELAPSED_TIME(x, y)
Definition: function.h:59
uint waveNum
Definition: Openholo.h:105
void procMultiplyPhase(CUstream_st *stream, const int &nBlocks, const int &nThreads, const LFGpuConst *config, cufftDoubleComplex *in, cufftDoubleComplex *output)
ivec2 pixel_number
Definition: Openholo.h:99
OphConfig context_
Definition: Openholo.h:486
Complex< Real > ** complex_H
Definition: Openholo.h:490
unsigned int uint
Definition: typedef.h:62
#define M_PI
Definition: define.h:52