Openholo  v4.2
Open Source Digital Holographic Library
ophPAS_GPU.cpp
Go to the documentation of this file.
1 #define OPH_DM_EXPORT
2 
3 #include "ophPAS_GPU.h"
4 #include <fstream>
5 #include <iostream>
6 #include <string>
7 #include <iomanip>
8 #include <cooperative_groups.h>
9 #include "sys.h"
10 #include <cuda.h>
11 
12 #include <cuda_device_runtime_api.h>
13 #include "tinyxml2.h"
14 #include "PLYparser.h"
15 #include <cuda_runtime_api.h>
16 #include <device_launch_parameters.h>
17 #include <device_functions.h>
18 #include <math_constants.h>
19 
20 //CGHEnvironmentData CONF; // config
21 
22 using namespace std;
23 using namespace oph;
24 
26  : ophGen()
27 {
28 }
29 
31 {
32 
33 }
34 
35 
43 bool ophPAS_GPU::readConfig(const char* fname) {
44  LOG("Reading....%s...", fname);
45 
46  auto start = CUR_TIME;
47 
48  /*XML parsing*/
49  tinyxml2::XMLDocument xml_doc;
50  tinyxml2::XMLNode *xml_node;
51 
52 
53  if (!checkExtension(fname, ".xml"))
54  {
55  LOG("file's extension is not 'xml'\n");
56  return false;
57  }
58  auto ret = xml_doc.LoadFile(fname);
59  LOG("%d", ret);
60  if (ret != tinyxml2::XML_SUCCESS)
61  {
62  LOG("Failed to load file \"%s\"\n", fname);
63  return false;
64  }
65 
66  xml_node = xml_doc.FirstChild();
67 
68  int nWave = 1;
69  auto next = xml_node->FirstChildElement("ScaleX");
70  if (!next || tinyxml2::XML_SUCCESS != next->QueryDoubleText(&pc_config.scale[_X]))
71  return false;
72  next = xml_node->FirstChildElement("ScaleY");
73  if (!next || tinyxml2::XML_SUCCESS != next->QueryDoubleText(&pc_config.scale[_Y]))
74  return false;
75  next = xml_node->FirstChildElement("ScaleZ");
76  if (!next || tinyxml2::XML_SUCCESS != next->QueryDoubleText(&pc_config.scale[_Z]))
77  return false;
78  next = xml_node->FirstChildElement("Distance");
79  if (!next || tinyxml2::XML_SUCCESS != next->QueryDoubleText(&pc_config.distance))
80  return false;
81 
82 
83 
84 
85  next = xml_node->FirstChildElement("SLM_WaveNum"); // OffsetInDepth
86  if (!next || tinyxml2::XML_SUCCESS != next->QueryIntText(&nWave))
87  return false;
88 
89  context_.waveNum = nWave;
91  context_.wave_length = new Real[nWave];
92 
93  char szNodeName[32] = { 0, };
94  for (int i = 1; i <= nWave; i++) {
95  sprintf(szNodeName, "SLM_WaveLength_%d", i);
96  next = xml_node->FirstChildElement(szNodeName);
97  if (!next || tinyxml2::XML_SUCCESS != next->QueryDoubleText(&context_.wave_length[i - 1]))
98  return false;
99  }
100  next = xml_node->FirstChildElement("SLM_PixelNumX");
101  if (!next || tinyxml2::XML_SUCCESS != next->QueryIntText(&context_.pixel_number[_X]))
102  return false;
103  next = xml_node->FirstChildElement("SLM_PixelNumY");
104  if (!next || tinyxml2::XML_SUCCESS != next->QueryIntText(&context_.pixel_number[_Y]))
105  return false;
106  next = xml_node->FirstChildElement("SLM_PixelPitchX");
107  if (!next || tinyxml2::XML_SUCCESS != next->QueryDoubleText(&context_.pixel_pitch[_X]))
108  return false;
109  next = xml_node->FirstChildElement("SLM_PixelPitchY");
110  if (!next || tinyxml2::XML_SUCCESS != next->QueryDoubleText(&context_.pixel_pitch[_Y]))
111  return false;
112  next = xml_node->FirstChildElement("IMG_Rotation");
113  if (!next || tinyxml2::XML_SUCCESS != next->QueryBoolText(&imgCfg.rotate))
114  imgCfg.rotate = false;
115  next = xml_node->FirstChildElement("IMG_Merge");
116  if (!next || tinyxml2::XML_SUCCESS != next->QueryBoolText(&imgCfg.merge))
117  imgCfg.merge = false;
118  next = xml_node->FirstChildElement("IMG_Flip");
119  if (!next || tinyxml2::XML_SUCCESS != next->QueryIntText(&imgCfg.flip))
120  imgCfg.flip = 0;
121  next = xml_node->FirstChildElement("DoublePrecision");
122  if (!next || tinyxml2::XML_SUCCESS != next->QueryBoolText(&context_.bUseDP))
123  context_.bUseDP = true;
124  next = xml_node->FirstChildElement("ShiftX");
125  if (!next || tinyxml2::XML_SUCCESS != next->QueryDoubleText(&context_.shift[_X]))
126  context_.shift[_X] = 0.0;
127  next = xml_node->FirstChildElement("ShiftY");
128  if (!next || tinyxml2::XML_SUCCESS != next->QueryDoubleText(&context_.shift[_Y]))
129  context_.shift[_Y] = 0.0;
130  next = xml_node->FirstChildElement("ShiftZ");
131  if (!next || tinyxml2::XML_SUCCESS != next->QueryDoubleText(&context_.shift[_Z]))
132  context_.shift[_Z] = 0.0;
133  next = xml_node->FirstChildElement("FieldLength");
134  if (!next || tinyxml2::XML_SUCCESS != next->QueryDoubleText(&m_dFieldLength))
135  m_dFieldLength = 0.0;
136  next = xml_node->FirstChildElement("NumOfStream");
137  if (!next || tinyxml2::XML_SUCCESS != next->QueryIntText(&m_nStream))
138  m_nStream = 1;
139 
142 
145 
147  for (int i = 0; i < nWave; i++)
149 
150  auto end = CUR_TIME;
151 
152  auto during = ((std::chrono::duration<Real>)(end - start)).count();
153 
154  LOG("%.5lfsec...done\n", during);
155  initialize();
156  return true;
157 }
158 
166 int ophPAS_GPU::loadPoint(const char* _filename)
167 {
168  n_points = ophGen::loadPointCloud(_filename, &pc_data);
169  return n_points;
170 }
171 
172 
173 
174 
175 int ophPAS_GPU::save(const char * fname, uint8_t bitsperpixel, uchar* src, uint px, uint py)
176 {
177  if (fname == nullptr) return -1;
178 
179  uchar* source = src;
180  ivec2 p(px, py);
181 
182  if (src == nullptr)
183  source = m_lpNormalized[0];
184  if (px == 0 && py == 0)
186 
187  if (checkExtension(fname, ".bmp")) // when the extension is bmp
188  return Openholo::saveAsImg(fname, bitsperpixel, source, p[_X], p[_Y]);
189  else { // when extension is not .ohf, .bmp - force bmp
190  char buf[256];
191  memset(buf, 0x00, sizeof(char) * 256);
192  sprintf(buf, "%s.bmp", fname);
193 
194  return Openholo::saveAsImg(buf, bitsperpixel, source, p[_X], p[_Y]);
195  }
196 }
197 
204 void ophPAS_GPU::save(const char * fname)
205 {
207  delete[] cgh_fringe;
208 }
209 
210 /*
211 int ophPAS::saveAsImg(const char * fname, uint8_t bitsperpixel, void * src, int pic_width, int pic_height)
212 {
213 LOG("Saving...%s...", fname);
214 auto start = CUR_TIME;
215 
216 int _width = pic_width, _height = pic_height;
217 
218 int _pixelbytesize = _height * _width * bitsperpixel / 8;
219 int _filesize = _pixelbytesize + sizeof(bitmap);
220 
221 FILE *fp;
222 fopen_s(&fp, fname, "wb");
223 if (fp == nullptr) return -1;
224 
225 bitmap *pbitmap = (bitmap*)calloc(1, sizeof(bitmap));
226 memset(pbitmap, 0x00, sizeof(bitmap));
227 
228 pbitmap->fileheader.signature[0] = 'B';
229 pbitmap->fileheader.signature[1] = 'M';
230 pbitmap->fileheader.filesize = _filesize;
231 pbitmap->fileheader.fileoffset_to_pixelarray = sizeof(bitmap);
232 
233 for (int i = 0; i < 256; i++) {
234 pbitmap->rgbquad[i].rgbBlue = i;
235 pbitmap->rgbquad[i].rgbGreen = i;
236 pbitmap->rgbquad[i].rgbRed = i;
237 }
238 
239 pbitmap->bitmapinfoheader.dibheadersize = sizeof(bitmapinfoheader);
240 pbitmap->bitmapinfoheader.width = _width;
241 pbitmap->bitmapinfoheader.height = _height;
242 //pbitmap->bitmapinfoheader.planes = _planes;
243 pbitmap->bitmapinfoheader.bitsperpixel = bitsperpixel;
244 //pbitmap->bitmapinfoheader.compression = _compression;
245 pbitmap->bitmapinfoheader.imagesize = _pixelbytesize;
246 //pbitmap->bitmapinfoheader.ypixelpermeter = _ypixelpermeter;
247 //pbitmap->bitmapinfoheader.xpixelpermeter = _xpixelpermeter;
248 pbitmap->bitmapinfoheader.numcolorspallette = 256;
249 fwrite(pbitmap, 1, sizeof(bitmap), fp);
250 
251 fwrite(src, 1, _pixelbytesize, fp);
252 fclose(fp);
253 free(pbitmap);
254 
255 auto end = CUR_TIME;
256 
257 auto during = ((std::chrono::duration<Real>)(end - start)).count();
258 
259 LOG("%.5lfsec...done\n", during);
260 
261 return 0;
262 }
263 */
264 
265 // ¹®ÀÚ¿­ ¿ìÃø °ø¹é¹®ÀÚ »èÁ¦ ÇÔ¼öchar* ophPAS_GPU::rtrim(char* s) { char t[MAX_STR_LEN]; char *end; strcpy(t, s); end = t + strlen(t) - 1; while (end != t && isspace(*end)) end--; *(end + 1) = '\0'; s = t; return s; } // ¹®ÀÚ¿­ ÁÂÃø °ø¹é¹®ÀÚ »èÁ¦ ÇÔ¼ö char* ophPAS_GPU::ltrim(char* s) { char* begin; begin = s; while (*begin != '\0') { if (isspace(*begin)) begin++; else { s = begin; break; } } return s; } // ¹®ÀÚ¿­ ¾ÕµÚ °ø¹é ¸ðµÎ »èÁ¦ ÇÔ¼ö char* ophPAS_GPU::trim(char* s) { return rtrim(ltrim(s)); } /** @fn void DataInit(OphPointCloudConfig &conf) @brief PAS¾Ë°í¸®Áò ¼öÇàÀ» À§ÇÑ µ¥ÀÌÅÍ ÃʱâÈ­ ÇÔ¼ö @return ¾øÀ½ @param conf: OpenHolo Config¸¦ À§ÇÑ ±¸Á¶Ã¼ */ void ophPAS_GPU::DataInit(OphPointCloudConfig &conf) { m_pHologram = new double[getContext().pixel_number[_X] * getContext().pixel_number[_Y]]; memset(m_pHologram, 0x00, sizeof(double)*getContext().pixel_number[_X] * getContext().pixel_number[_Y]); //for (int i = 0; i < NUMTBL; i++) { // float theta = (float)M2_PI * (float)(i + i - 1) / (float)(2 * NUMTBL); // m_COStbl[i] = (float)cos(theta); // m_SINtbl[i] = (float)sin(theta); //}// -> gpu } /* void ophPAS::PASCalcuation(long voxnum, unsigned char * cghfringe, VoxelStruct * h_vox, CGHEnvironmentData * _CGHE) { long i, j; double Max = -1E9, Min = 1E9; double myBuffer; int cghwidth = _CGHE->CghWidth; int cghheight = _CGHE->CghHeight; DataInit(_CGHE); //PAS // PAS(voxnum, h_vox, m_pHologram, _CGHE); // for (i = 0; i<cghheight; i++) { for (j = 0; j<cghwidth; j++) { if (Max < m_pHologram[i*cghwidth + j]) Max = m_pHologram[i*cghwidth + j]; if (Min > m_pHologram[i*cghwidth + j]) Min = m_pHologram[i*cghwidth + j]; } } for (i = 0; i<cghheight; i++) { for (j = 0; j<cghwidth; j++) { myBuffer = 1.0*(((m_pHologram[i*cghwidth + j] - Min) / (Max - Min))*255. + 0.5); if (myBuffer >= 255.0) cghfringe[i*cghwidth + j] = 255; else cghfringe[i*cghwidth + j] = (unsigned char)(myBuffer); } } } */ /** @fn void PASCalculation(long voxnum, unsigned char * cghfringe, OphPointCloudData *data, OphPointCloudConfig& conf) @brief PAS¾Ë°í¸®Áò ¼öÇà ÇÔ¼ö @return ¾øÀ½ @param voxnum: vertex °³¼ö cghfringe: À̹ÌÁö·Î ÀúÀåÇÒ ¹è¿­ data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::PASCalculation(long voxnum, unsigned char * cghfringe, OphPointCloudData *data, OphPointCloudConfig& conf) { long i, j; double Max = -1E9, Min = 1E9; double myBuffer; int cghwidth = getContext().pixel_number[_X]; int cghheight = getContext().pixel_number[_Y]; //DataInit(_CGHE); DataInit(conf); //PAS // //PAS(voxnum, h_vox, m_pHologram, _CGHE); PAS(voxnum, data, m_pHologram, conf); // /* °í¼ÓÈ­ ÇÊ¿äÇÑ º¯¼ö m_pHologram cghfringe º¯¼ö´Â unified memory »ç¿ëÇÏ´Â ¹æÇâÀ¸·Î */ for (i = 0; i < cghheight; i++) { for (j = 0; j < cghwidth; j++) { if (Max < m_pHologram[i*cghwidth + j]) Max = m_pHologram[i*cghwidth + j]; if (Min > m_pHologram[i*cghwidth + j]) Min = m_pHologram[i*cghwidth + j]; } } for (i = 0; i < cghheight; i++) { for (j = 0; j < cghwidth; j++) { myBuffer = 1.0*(((m_pHologram[i*cghwidth + j] - Min) / (Max - Min))*255. + 0.5); if (myBuffer >= 255.0) cghfringe[i*cghwidth + j] = 255; else cghfringe[i*cghwidth + j] = (unsigned char)(myBuffer); } } } /* void ophPAS::PAS(long voxelnum, VoxelStruct * voxel, double * m_pHologram, CGHEnvironmentData* _CGHE) { float xiInterval = _CGHE->xiInterval; float etaInterval = _CGHE->etaInterval; float cghScale = _CGHE->CGHScale; float defaultDepth = _CGHE->DefaultDepth; DataInit(_CGHE->fftSegmentationSize, _CGHE->CghWidth, _CGHE->CghHeight, xiInterval, etaInterval); int no; // voxel Number float X, Y, Z; ; // x, y, real distance float Amplitude; float sf_base = 1.0 / (xiInterval*_CGHE->fftSegmentationSize); //CString mm; clock_t start, finish; double duration; start = clock(); // Iteration according to the point number for (no = 0; no<voxelnum; no++) { // point coordinate X = (voxel[no].x) * cghScale; Y = (voxel[no].y) * cghScale; Z = voxel[no].z * cghScale - defaultDepth; Amplitude = voxel[no].r; CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval,_CGHE); CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm,_CGHE); } RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram,_CGHE); finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; //mm.Format("%f", duration); //AfxMessageBox(mm); cout << duration << endl; MemoryRelease(); } */ /** @fn void PAS(long voxnum, OphPointCloudData *data, double* m_pHologram,OphPointCloudConfig& conf) @brief implementation of the PAS @return ¾øÀ½ @param voxnum: vertex °³¼ö data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ m_pHologram: fftw °á°ú¸¦ ³ÖÀ» º¯¼ö conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::PAS(long voxelnum, OphPointCloudData *data, double * m_pHologram, OphPointCloudConfig& conf) { float xiInterval = getContext().pixel_pitch[_X];//_CGHE->xiInterval; float etaInterval = getContext().pixel_pitch[_Y];//_CGHE->etaInterval; float cghScale = conf.scale[_X];// _CGHE->CGHScale; float defaultDepth = conf.distance;//_CGHE->DefaultDepth; DataInit(FFT_SEGMENT_SIZE, getContext().pixel_number[_X], getContext().pixel_number[_Y], xiInterval, etaInterval); long no; // voxel Number float X, Y, Z; ; // x, y, real distance float Amplitude; float sf_base = 1.0 / (xiInterval* FFT_SEGMENT_SIZE); //CString mm; clock_t start, finish; double duration; start = clock(); // Iteration according to the point number for (no = 0; no < voxelnum * 3; no += 3) { // point coordinate X = (data->vertices[no].point.pos[_X]) * cghScale; Y = (data->vertices[no].point.pos[_Y]) * cghScale; Z = (data->vertices[no].point.pos[_Z]) * cghScale - defaultDepth; Amplitude = data->vertices[no].phase; std::cout << "X: " << X << ", Y: " << Y << ", Z: " << Z << ", Amp: " << Amplitude << endl; //c_x = X; //c_y = Y; //c_z = Z; //amplitude = Amplitude; /* CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval, _CGHE); */ CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval, conf); /* CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm, _CGHE); */ CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm, conf); } /* RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram, _CGHE); */ RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram, conf); finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; //mm.Format("%f", duration); //AfxMessageBox(mm); cout << duration << endl; MemoryRelease(); } /** @fn void DataInit(int segsize, int cghwidth, int cghheight, float xiinter, float etainter) @brief PAS ¾Ë°í¸®Áò ¼öÇàÀ» À§ÇÑ µ¥ÀÌÅÍ ÃʱâÈ­ ÇÔ¼ö @return ¾øÀ½ @param segsize: cghwidth: cghheight: xiinter: etainter: */ void ophPAS_GPU::DataInit(int segsize, int cghwidth, int cghheight, float xiinter, float etainter) { int i; // size m_segSize = segsize; m_hsegSize = (int)(m_segSize / 2); m_dsegSize = m_segSize*m_segSize; m_segNumx = (int)(cghwidth / m_segSize); m_segNumy = (int)(cghheight / m_segSize); m_hsegNumx = (int)(m_segNumx / 2); m_hsegNumy = (int)(m_segNumy / 2); cudaMallocHost((void**)&m_inRe_h, sizeof(float)*m_segNumy * m_segNumx * m_segSize * m_segSize); cudaMallocHost((void**)&m_inIm_h, sizeof(float)*m_segNumy * m_segNumx * m_segSize * m_segSize); cudaMallocHost((void**)&m_Coefficient_cx, sizeof(int)*m_segNumx); cudaMallocHost((void**)&m_Coefficient_cy, sizeof(int)*m_segNumy); cudaMallocHost((void**)&m_xc, sizeof(float)*m_segNumx); cudaMallocHost((void**)&m_yc, sizeof(float)*m_segNumy); cudaMallocHost((void**)&m_COStbl, sizeof(float)*NUMTBL); cudaMallocHost((void**)&m_SINtbl, sizeof(float)*NUMTBL); /*m_inRe_h = new float[m_segNumy * m_segNumx * m_segSize * m_segSize]{ 0 }; m_inIm_h = new float[m_segNumy * m_segNumx * m_segSize * m_segSize]{ 0 }; m_COStbl = new float[NUMTBL]; m_SINtbl = new float[NUMTBL]; m_Coefficient_cx = new int[m_segNumx]; m_Coefficient_cy = new int[m_segNumy]; m_xc = new float[m_segNumx]; m_yc = new float[m_segNumy];*/ // calculation components m_SFrequency_cx = new float[m_segNumx]; m_SFrequency_cy = new float[m_segNumy]; m_PickPoint_cx = new int[m_segNumx]; m_PickPoint_cy = new int[m_segNumy]; for (int i = 0; i<NUMTBL; i++) { float theta = (float)M2_PI * (float)(i + i - 1) / (float)(2 * NUMTBL); m_COStbl[i] = (float)cos(theta); m_SINtbl[i] = (float)sin(theta); } // base spatial frequency m_sf_base = (float)(1.0 / (xiinter*m_segSize)); /* for (i = 0; i<m_segNumy; i++) { for (j = 0; j<m_segNumx; j++) { m_inRe[i*m_segNumx + j] = new float[m_segSize * m_segSize]; m_inIm[i*m_segNumx + j] = new float[m_segSize * m_segSize]; memset(m_inRe[i*m_segNumx + j], 0x00, sizeof(float) * m_segSize * m_segSize); memset(m_inIm[i*m_segNumx + j], 0x00, sizeof(float) * m_segSize * m_segSize); } } */ m_in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_segSize * m_segSize); m_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_segSize * m_segSize); memset(m_in, 0x00, sizeof(fftw_complex) * m_segSize * m_segSize); // segmentation center point calculation for (i = 0; i<m_segNumy; i++) m_yc[i] = ((i - m_hsegNumy) * m_segSize + m_hsegSize) * etainter; for (i = 0; i<m_segNumx; i++) m_xc[i] = ((i - m_hsegNumx) * m_segSize + m_hsegSize) * xiinter; m_plan = fftw_plan_dft_2d(m_segSize, m_segSize, m_in, m_out, FFTW_BACKWARD, FFTW_ESTIMATE); //sex = m_segNumx; //sey = m_segNumy; //sen = segsize; } void ophPAS_GPU::MemoryRelease(void) { cudaFree(&se); fftw_destroy_plan(m_plan); fftw_free(m_in); fftw_free(m_out); delete[] m_SFrequency_cx; delete[] m_SFrequency_cy; delete[] m_PickPoint_cx; delete[] m_PickPoint_cy; /*delete[] m_Coefficient_cx; delete[] m_Coefficient_cy; delete[] m_xc; delete[] m_yc; delete[] m_COStbl; delete[] m_SINtbl; delete[] m_inRe_h; delete[] m_inIm_h;*/ cudaFreeHost(m_Coefficient_cx); cudaFreeHost(m_Coefficient_cy); cudaFreeHost(m_xc); cudaFreeHost(m_yc); cudaFreeHost(m_COStbl); cudaFreeHost(m_SINtbl); cudaFreeHost(m_inRe_h); cudaFreeHost(m_inIm_h); /* for (i = 0; i<m_segNumy; i++) { for (j = 0; j<m_segNumx; j++) { delete[] m_inRe[i*m_segNumx + j]; delete[] m_inIm[i*m_segNumx + j]; } } */ } void ophPAS_GPU::generateHologram() { auto begin = CUR_TIME; cgh_fringe = new unsigned char[context_.pixel_number[_X] * context_.pixel_number[_Y]]; PASCalculation(n_points, cgh_fringe, &pc_data, pc_config); auto end = CUR_TIME; m_elapsedTime = ((std::chrono::duration<Real>)(end - begin)).count(); LOG("Total Elapsed Time: %lf (s)\n", m_elapsedTime); } void ophPAS_GPU::PASCalculation_GPU(long voxnum, unsigned char * cghfringe, OphPointCloudData * data, OphPointCloudConfig & conf) { } void ophPAS_GPU::CalcSpatialFrequency(float cx, float cy, float cz, float amp, int segnumx, int segnumy, int segsize, int hsegsize, float sf_base, float * xc, float * yc, float * sf_cx, float * sf_cy, int * pp_cx, int * pp_cy, int * cf_cx, int * cf_cy, float xiint, float etaint, OphPointCloudConfig& conf) { int segx, segy; // coordinate in a Segment float theta_cx, theta_cy; float rWaveLength = getContext().wave_length[0];//_CGHE->rWaveLength; float thetaX = 0.0;// _CGHE->ThetaX; float thetaY = 0.0;// _CGHE->ThetaY; for (segx = 0; segx < segnumx; segx++) { theta_cx = (xc[segx] - cx) / cz; sf_cx[segx] = (float)((theta_cx + thetaX) / rWaveLength); (sf_cx[segx] >= 0) ? pp_cx[segx] = (int)(sf_cx[segx] / sf_base + 0.5) : pp_cx[segx] = (int)(sf_cx[segx] / sf_base - 0.5); (abs(pp_cx[segx]) < hsegsize) ? cf_cx[segx] = ((segsize - pp_cx[segx]) % segsize) : cf_cx[segx] = 0; } for (segy = 0; segy < segnumy; segy++) { theta_cy = (yc[segy] - cy) / cz; sf_cy[segy] = (float)((theta_cy + thetaY) / rWaveLength); (sf_cy[segy] >= 0) ? pp_cy[segy] = (int)(sf_cy[segy] / sf_base + 0.5) : pp_cy[segy] = (int)(sf_cy[segy] / sf_base - 0.5); (abs(pp_cy[segy]) < hsegsize) ? cf_cy[segy] = ((segsize - pp_cy[segy]) % segsize) : cf_cy[segy] = 0; } } void ophPAS_GPU::CalcCompensatedPhase(float cx, float cy, float cz, float amp , int segNumx, int segNumy , int segsize, int hsegsize, float sf_base , float *xc, float *yc , int *cf_cx, int *cf_cy , float *COStbl, float *SINtbl , float **inRe, float **inIm, OphPointCloudConfig& conf) { /* CUDA 󸮰úÁ¤ÀÌ ¼øÂ÷ÀûÀ¸·Î 󸮰¡ µÇ±â ¶§¹®¿¡, È£½ºÆ®¿¡¼­ µð¹ÙÀ̽º·Î µ¥ÀÌÅ͸¦ º¹»çÇÏ´Â °úÁ¤¿¡¼­ GPU´Â ´ë±âÇÏ°Ô µÈ´Ù. µû¶ó¼­ 󸮰úÁ¤À» ´ÙÀ½°ú °°ÀÌ Á¤ÇÑ´Ù. 1. cudaMallocHost¸¦ ÅëÇØ È£½ºÆ®Äڵ带 °­Á¦·Î pinned memory·Î »ç¿ë(GPU·Î µ¥ÀÌÅÍ Àü¼ÛÀÌ »¡¶óÁú ¼ö ÀÖÀ½) 2. cudaMemcpyAsync·Î µ¥ÀÌÅÍ Àü¼Û ¼Óµµ up(cudaMemcpy´Â µ¿±âÈ­¹æ½Ä) 3. cudastream »ç¿ëÀ¸·Î º´·Äó¸® ±Ø´ëÈ­ ¼º´Éºñ±³´Â ´ÙÀ½°ú °°ÀÌ ÇÑ´Ù. 1. ÀϹÝÀûÀÎ CUDA Programming 2. °³¼±µÈ CUDA Programming(pinned memory »ç¿ë, cudastream »ç¿ë) 3. CPU ÄÚµå */ constValue d_const; int num_x = segNumx*segNumy; int num_y = segsize*segsize; float* inRe_d; float* inIm_d; /*cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx); cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy); cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx); cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy); cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL); cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL);*/ cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx); cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy); cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx); cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy); cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL); cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL); cudaMemcpyAsync(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpyAsync(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.cf_cx, cf_cx , sizeof(int)*segNumx, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.cf_cy, cf_cy , sizeof(int)*segNumy, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); ivec3 v(segNumx, segNumy, segsize); cuda_Wrapper_phaseCalc(inRe_d, inIm_d, d_const, cx, cy, cz, amp, v); //phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const); cudaMemcpyAsync(m_inRe_h, inRe_d , sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); /*cudaMemcpy(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpy(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpy(d_const.cf_cx, cf_cx, sizeof(int)*segNumx, cudaMemcpyHostToDevice); cudaMemcpy(d_const.cf_cy, cf_cy, sizeof(int)*segNumy, cudaMemcpyHostToDevice); cudaMemcpy(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice); cudaMemcpy(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice); cudaMemcpy(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); cudaMemcpy(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice);*/ /*cudaMemcpyAsync(m_inRe_h, inRe_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost); cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); cudaFreeHost(d_const.cf_cx); cudaFreeHost(d_const.cf_cy); cudaFreeHost(d_const.xc); cudaFreeHost(d_const.yc); cudaFreeHost(d_const.costbl); cudaFreeHost(d_const.sintbl); cudaFreeHost(inRe_d); cudaFreeHost(inIm_d); */ /*for (int i = 0; i < nStreams; i++) cudaStreamDestroy(streams[i]);*/ cudaFree(d_const.cf_cx); cudaFree(d_const.cf_cy); cudaFree(d_const.xc); cudaFree(d_const.yc); cudaFree(d_const.costbl); cudaFree(d_const.sintbl); cudaFree(inRe_d); cudaFree(inIm_d); /*cudaFreeHost(cf_cx); cudaFreeHost(cf_cy); cudaFreeHost(xc); cudaFreeHost(yc); cudaFreeHost(COStbl); cudaFreeHost(SINtbl); cudaFreeHost(m_inRe_h); cudaFreeHost(m_inIm_h);*/ /*phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const); cudaMemcpy(m_inRe_h, inRe_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaMemcpy(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaFree(d_const.cf_cx); cudaFree(d_const.cf_cy); cudaFree(d_const.xc); cudaFree(d_const.yc); cudaFree(d_const.costbl); cudaFree(d_const.sintbl); cudaFree(inRe_d); cudaFree(inIm_d);*/ } /** @fn void RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf) @brief Ǫ¸®¿¡ º¯È¯ ¼öÇà ÇÔ¼ö @return ¾øÀ½ @param voxnum: vertex °³¼ö cghfringe: À̹ÌÁö·Î ÀúÀåÇÒ ¹è¿­ data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf) { int i, j; int segx, segy; // coordinate in a Segment int segxx, segyy; int cghWidth = getContext().pixel_number[_X]; int rows = m_segNumy; int cols = m_segNumx; for (segy = 0; segy < segnumy; segy++) { for (segx = 0; segx < segnumx; segx++) { segyy = segy * segnumx + segx; memset(in, 0x00, sizeof(fftw_complex) * segsize * segsize); for (i = 0; i < segsize; i++) { for (j = 0; j < segsize; j++) { segxx = i * segsize + j; in[i*segsize + j][0] = m_inRe_h[segyy*segsize*segsize+segxx]; in[i*segsize + j][1] = m_inIm_h[segyy*segsize*segsize+segxx]; //inIm_h°ªÀÌ ´Ù¸§(x) ´Ù °°À½ } } fftw_execute(*plan); for (i = 0; i < segsize; i++) { for (j = 0; j < segsize; j++) { pHologram[(segy*segsize + i)*cghWidth + (segx*segsize + j)] = out[i * segsize + j][0];// - out[l * SEGSIZE + m][1]; } } } } } void ophPAS_GPU::encodeHologram(const vec2 band_limit, const vec2 spectrum_shift) { if (complex_H == nullptr) { LOG("Not found diffracted data."); return; } LOG("Single Side Band Encoding.."); const uint nChannel = context_.waveNum; const uint pnX = context_.pixel_number[_X]; const uint pnY = context_.pixel_number[_Y]; const Real ppX = context_.pixel_pitch[_X]; const Real ppY = context_.pixel_pitch[_Y]; const long long int pnXY = pnX * pnY; m_vecEncodeSize = ivec2(pnX, pnY); context_.ss[_X] = pnX * ppX; context_.ss[_Y] = pnY * ppY; vec2 ss = context_.ss; Real cropx = floor(pnX * band_limit[_X]); Real cropx1 = cropx - floor(cropx / 2); Real cropx2 = cropx1 + cropx - 1; Real cropy = floor(pnY * band_limit[_Y]); Real cropy1 = cropy - floor(cropy / 2); Real cropy2 = cropy1 + cropy - 1; Real* x_o = new Real[pnX]; Real* y_o = new Real[pnY]; for (uint i = 0; i < pnX; i++) x_o[i] = (-ss[_X] / 2) + (ppX * i) + (ppX / 2); for (uint i = 0; i < pnY; i++) y_o[i] = (ss[_Y] - ppY) - (ppY * i); Real* xx_o = new Real[pnXY]; Real* yy_o = new Real[pnXY]; for (uint i = 0; i < pnXY; i++) xx_o[i] = x_o[i % pnX]; for (uint i = 0; i < pnX; i++) for (uint j = 0; j < pnY; j++) yy_o[i + j * pnX] = y_o[j]; Complex<Real>* h = new Complex<Real>[pnXY]; for (uint ch = 0; ch < nChannel; ch++) { fft2(complex_H[ch], h, pnX, pnY, OPH_FORWARD); fft2(ivec2(pnX, pnY), h, OPH_FORWARD); fftExecute(h); fft2(h, h, pnX, pnY, OPH_BACKWARD); fft2(h, h, pnX, pnY, OPH_FORWARD); fft2(ivec2(pnX, pnY), h, OPH_BACKWARD); fftExecute(h); fft2(h, h, pnX, pnY, OPH_BACKWARD); for (int i = 0; i < pnXY; i++) { Complex<Real> shift_phase(1.0, 0.0); int r = i / pnX; int c = i % pnX; Real X = (M_PI * xx_o[i] * spectrum_shift[_X]) / ppX; Real Y = (M_PI * yy_o[i] * spectrum_shift[_Y]) / ppY; shift_phase[_RE] = shift_phase[_RE] * (cos(X) * cos(Y) - sin(X) * sin(Y)); m_lpEncoded[ch][i] = (h[i] * shift_phase).real(); } } delete[] h; delete[] x_o; delete[] xx_o; delete[] y_o; delete[] yy_o; LOG("Done.\n"); } /** @fn void encoding(unsigned int ENCODE_FLAG) @brief abstract function of ophGen::encoding @return ¾øÀ½ @param ENCODE_FLAG: ¾Ïȣȭ ¸Þ¼­µå */ void ophPAS_GPU::encoding(unsigned int ENCODE_FLAG) { ophGen::encoding(ENCODE_FLAG); }
266 char* ophPAS_GPU::rtrim(char* s)
267 {
268  char t[MAX_STR_LEN];
269  char *end;
270 
271  strcpy(t, s);
272  end = t + strlen(t) - 1;
273  while (end != t && isspace(*end))
274  end--;
275  *(end + 1) = '\0';
276  s = t;
277 
278  return s;
279 }
280 
281 // ¹®ÀÚ¿­ ÁÂÃø °ø¹é¹®ÀÚ »èÁ¦ ÇÔ¼öchar* ophPAS_GPU::ltrim(char* s) { char* begin; begin = s; while (*begin != '\0') { if (isspace(*begin)) begin++; else { s = begin; break; } } return s; } // ¹®ÀÚ¿­ ¾ÕµÚ °ø¹é ¸ðµÎ »èÁ¦ ÇÔ¼ö char* ophPAS_GPU::trim(char* s) { return rtrim(ltrim(s)); } /** @fn void DataInit(OphPointCloudConfig &conf) @brief PAS¾Ë°í¸®Áò ¼öÇàÀ» À§ÇÑ µ¥ÀÌÅÍ ÃʱâÈ­ ÇÔ¼ö @return ¾øÀ½ @param conf: OpenHolo Config¸¦ À§ÇÑ ±¸Á¶Ã¼ */ void ophPAS_GPU::DataInit(OphPointCloudConfig &conf) { m_pHologram = new double[getContext().pixel_number[_X] * getContext().pixel_number[_Y]]; memset(m_pHologram, 0x00, sizeof(double)*getContext().pixel_number[_X] * getContext().pixel_number[_Y]); //for (int i = 0; i < NUMTBL; i++) { // float theta = (float)M2_PI * (float)(i + i - 1) / (float)(2 * NUMTBL); // m_COStbl[i] = (float)cos(theta); // m_SINtbl[i] = (float)sin(theta); //}// -> gpu } /* void ophPAS::PASCalcuation(long voxnum, unsigned char * cghfringe, VoxelStruct * h_vox, CGHEnvironmentData * _CGHE) { long i, j; double Max = -1E9, Min = 1E9; double myBuffer; int cghwidth = _CGHE->CghWidth; int cghheight = _CGHE->CghHeight; DataInit(_CGHE); //PAS // PAS(voxnum, h_vox, m_pHologram, _CGHE); // for (i = 0; i<cghheight; i++) { for (j = 0; j<cghwidth; j++) { if (Max < m_pHologram[i*cghwidth + j]) Max = m_pHologram[i*cghwidth + j]; if (Min > m_pHologram[i*cghwidth + j]) Min = m_pHologram[i*cghwidth + j]; } } for (i = 0; i<cghheight; i++) { for (j = 0; j<cghwidth; j++) { myBuffer = 1.0*(((m_pHologram[i*cghwidth + j] - Min) / (Max - Min))*255. + 0.5); if (myBuffer >= 255.0) cghfringe[i*cghwidth + j] = 255; else cghfringe[i*cghwidth + j] = (unsigned char)(myBuffer); } } } */ /** @fn void PASCalculation(long voxnum, unsigned char * cghfringe, OphPointCloudData *data, OphPointCloudConfig& conf) @brief PAS¾Ë°í¸®Áò ¼öÇà ÇÔ¼ö @return ¾øÀ½ @param voxnum: vertex °³¼ö cghfringe: À̹ÌÁö·Î ÀúÀåÇÒ ¹è¿­ data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::PASCalculation(long voxnum, unsigned char * cghfringe, OphPointCloudData *data, OphPointCloudConfig& conf) { long i, j; double Max = -1E9, Min = 1E9; double myBuffer; int cghwidth = getContext().pixel_number[_X]; int cghheight = getContext().pixel_number[_Y]; //DataInit(_CGHE); DataInit(conf); //PAS // //PAS(voxnum, h_vox, m_pHologram, _CGHE); PAS(voxnum, data, m_pHologram, conf); // /* °í¼ÓÈ­ ÇÊ¿äÇÑ º¯¼ö m_pHologram cghfringe º¯¼ö´Â unified memory »ç¿ëÇÏ´Â ¹æÇâÀ¸·Î */ for (i = 0; i < cghheight; i++) { for (j = 0; j < cghwidth; j++) { if (Max < m_pHologram[i*cghwidth + j]) Max = m_pHologram[i*cghwidth + j]; if (Min > m_pHologram[i*cghwidth + j]) Min = m_pHologram[i*cghwidth + j]; } } for (i = 0; i < cghheight; i++) { for (j = 0; j < cghwidth; j++) { myBuffer = 1.0*(((m_pHologram[i*cghwidth + j] - Min) / (Max - Min))*255. + 0.5); if (myBuffer >= 255.0) cghfringe[i*cghwidth + j] = 255; else cghfringe[i*cghwidth + j] = (unsigned char)(myBuffer); } } } /* void ophPAS::PAS(long voxelnum, VoxelStruct * voxel, double * m_pHologram, CGHEnvironmentData* _CGHE) { float xiInterval = _CGHE->xiInterval; float etaInterval = _CGHE->etaInterval; float cghScale = _CGHE->CGHScale; float defaultDepth = _CGHE->DefaultDepth; DataInit(_CGHE->fftSegmentationSize, _CGHE->CghWidth, _CGHE->CghHeight, xiInterval, etaInterval); int no; // voxel Number float X, Y, Z; ; // x, y, real distance float Amplitude; float sf_base = 1.0 / (xiInterval*_CGHE->fftSegmentationSize); //CString mm; clock_t start, finish; double duration; start = clock(); // Iteration according to the point number for (no = 0; no<voxelnum; no++) { // point coordinate X = (voxel[no].x) * cghScale; Y = (voxel[no].y) * cghScale; Z = voxel[no].z * cghScale - defaultDepth; Amplitude = voxel[no].r; CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval,_CGHE); CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm,_CGHE); } RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram,_CGHE); finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; //mm.Format("%f", duration); //AfxMessageBox(mm); cout << duration << endl; MemoryRelease(); } */ /** @fn void PAS(long voxnum, OphPointCloudData *data, double* m_pHologram,OphPointCloudConfig& conf) @brief implementation of the PAS @return ¾øÀ½ @param voxnum: vertex °³¼ö data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ m_pHologram: fftw °á°ú¸¦ ³ÖÀ» º¯¼ö conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::PAS(long voxelnum, OphPointCloudData *data, double * m_pHologram, OphPointCloudConfig& conf) { float xiInterval = getContext().pixel_pitch[_X];//_CGHE->xiInterval; float etaInterval = getContext().pixel_pitch[_Y];//_CGHE->etaInterval; float cghScale = conf.scale[_X];// _CGHE->CGHScale; float defaultDepth = conf.distance;//_CGHE->DefaultDepth; DataInit(FFT_SEGMENT_SIZE, getContext().pixel_number[_X], getContext().pixel_number[_Y], xiInterval, etaInterval); long no; // voxel Number float X, Y, Z; ; // x, y, real distance float Amplitude; float sf_base = 1.0 / (xiInterval* FFT_SEGMENT_SIZE); //CString mm; clock_t start, finish; double duration; start = clock(); // Iteration according to the point number for (no = 0; no < voxelnum * 3; no += 3) { // point coordinate X = (data->vertices[no].point.pos[_X]) * cghScale; Y = (data->vertices[no].point.pos[_Y]) * cghScale; Z = (data->vertices[no].point.pos[_Z]) * cghScale - defaultDepth; Amplitude = data->vertices[no].phase; std::cout << "X: " << X << ", Y: " << Y << ", Z: " << Z << ", Amp: " << Amplitude << endl; //c_x = X; //c_y = Y; //c_z = Z; //amplitude = Amplitude; /* CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval, _CGHE); */ CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval, conf); /* CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm, _CGHE); */ CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm, conf); } /* RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram, _CGHE); */ RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram, conf); finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; //mm.Format("%f", duration); //AfxMessageBox(mm); cout << duration << endl; MemoryRelease(); } /** @fn void DataInit(int segsize, int cghwidth, int cghheight, float xiinter, float etainter) @brief PAS ¾Ë°í¸®Áò ¼öÇàÀ» À§ÇÑ µ¥ÀÌÅÍ ÃʱâÈ­ ÇÔ¼ö @return ¾øÀ½ @param segsize: cghwidth: cghheight: xiinter: etainter: */ void ophPAS_GPU::DataInit(int segsize, int cghwidth, int cghheight, float xiinter, float etainter) { int i; // size m_segSize = segsize; m_hsegSize = (int)(m_segSize / 2); m_dsegSize = m_segSize*m_segSize; m_segNumx = (int)(cghwidth / m_segSize); m_segNumy = (int)(cghheight / m_segSize); m_hsegNumx = (int)(m_segNumx / 2); m_hsegNumy = (int)(m_segNumy / 2); cudaMallocHost((void**)&m_inRe_h, sizeof(float)*m_segNumy * m_segNumx * m_segSize * m_segSize); cudaMallocHost((void**)&m_inIm_h, sizeof(float)*m_segNumy * m_segNumx * m_segSize * m_segSize); cudaMallocHost((void**)&m_Coefficient_cx, sizeof(int)*m_segNumx); cudaMallocHost((void**)&m_Coefficient_cy, sizeof(int)*m_segNumy); cudaMallocHost((void**)&m_xc, sizeof(float)*m_segNumx); cudaMallocHost((void**)&m_yc, sizeof(float)*m_segNumy); cudaMallocHost((void**)&m_COStbl, sizeof(float)*NUMTBL); cudaMallocHost((void**)&m_SINtbl, sizeof(float)*NUMTBL); /*m_inRe_h = new float[m_segNumy * m_segNumx * m_segSize * m_segSize]{ 0 }; m_inIm_h = new float[m_segNumy * m_segNumx * m_segSize * m_segSize]{ 0 }; m_COStbl = new float[NUMTBL]; m_SINtbl = new float[NUMTBL]; m_Coefficient_cx = new int[m_segNumx]; m_Coefficient_cy = new int[m_segNumy]; m_xc = new float[m_segNumx]; m_yc = new float[m_segNumy];*/ // calculation components m_SFrequency_cx = new float[m_segNumx]; m_SFrequency_cy = new float[m_segNumy]; m_PickPoint_cx = new int[m_segNumx]; m_PickPoint_cy = new int[m_segNumy]; for (int i = 0; i<NUMTBL; i++) { float theta = (float)M2_PI * (float)(i + i - 1) / (float)(2 * NUMTBL); m_COStbl[i] = (float)cos(theta); m_SINtbl[i] = (float)sin(theta); } // base spatial frequency m_sf_base = (float)(1.0 / (xiinter*m_segSize)); /* for (i = 0; i<m_segNumy; i++) { for (j = 0; j<m_segNumx; j++) { m_inRe[i*m_segNumx + j] = new float[m_segSize * m_segSize]; m_inIm[i*m_segNumx + j] = new float[m_segSize * m_segSize]; memset(m_inRe[i*m_segNumx + j], 0x00, sizeof(float) * m_segSize * m_segSize); memset(m_inIm[i*m_segNumx + j], 0x00, sizeof(float) * m_segSize * m_segSize); } } */ m_in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_segSize * m_segSize); m_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_segSize * m_segSize); memset(m_in, 0x00, sizeof(fftw_complex) * m_segSize * m_segSize); // segmentation center point calculation for (i = 0; i<m_segNumy; i++) m_yc[i] = ((i - m_hsegNumy) * m_segSize + m_hsegSize) * etainter; for (i = 0; i<m_segNumx; i++) m_xc[i] = ((i - m_hsegNumx) * m_segSize + m_hsegSize) * xiinter; m_plan = fftw_plan_dft_2d(m_segSize, m_segSize, m_in, m_out, FFTW_BACKWARD, FFTW_ESTIMATE); //sex = m_segNumx; //sey = m_segNumy; //sen = segsize; } void ophPAS_GPU::MemoryRelease(void) { cudaFree(&se); fftw_destroy_plan(m_plan); fftw_free(m_in); fftw_free(m_out); delete[] m_SFrequency_cx; delete[] m_SFrequency_cy; delete[] m_PickPoint_cx; delete[] m_PickPoint_cy; /*delete[] m_Coefficient_cx; delete[] m_Coefficient_cy; delete[] m_xc; delete[] m_yc; delete[] m_COStbl; delete[] m_SINtbl; delete[] m_inRe_h; delete[] m_inIm_h;*/ cudaFreeHost(m_Coefficient_cx); cudaFreeHost(m_Coefficient_cy); cudaFreeHost(m_xc); cudaFreeHost(m_yc); cudaFreeHost(m_COStbl); cudaFreeHost(m_SINtbl); cudaFreeHost(m_inRe_h); cudaFreeHost(m_inIm_h); /* for (i = 0; i<m_segNumy; i++) { for (j = 0; j<m_segNumx; j++) { delete[] m_inRe[i*m_segNumx + j]; delete[] m_inIm[i*m_segNumx + j]; } } */ } void ophPAS_GPU::generateHologram() { auto begin = CUR_TIME; cgh_fringe = new unsigned char[context_.pixel_number[_X] * context_.pixel_number[_Y]]; PASCalculation(n_points, cgh_fringe, &pc_data, pc_config); auto end = CUR_TIME; m_elapsedTime = ((std::chrono::duration<Real>)(end - begin)).count(); LOG("Total Elapsed Time: %lf (s)\n", m_elapsedTime); } void ophPAS_GPU::PASCalculation_GPU(long voxnum, unsigned char * cghfringe, OphPointCloudData * data, OphPointCloudConfig & conf) { } void ophPAS_GPU::CalcSpatialFrequency(float cx, float cy, float cz, float amp, int segnumx, int segnumy, int segsize, int hsegsize, float sf_base, float * xc, float * yc, float * sf_cx, float * sf_cy, int * pp_cx, int * pp_cy, int * cf_cx, int * cf_cy, float xiint, float etaint, OphPointCloudConfig& conf) { int segx, segy; // coordinate in a Segment float theta_cx, theta_cy; float rWaveLength = getContext().wave_length[0];//_CGHE->rWaveLength; float thetaX = 0.0;// _CGHE->ThetaX; float thetaY = 0.0;// _CGHE->ThetaY; for (segx = 0; segx < segnumx; segx++) { theta_cx = (xc[segx] - cx) / cz; sf_cx[segx] = (float)((theta_cx + thetaX) / rWaveLength); (sf_cx[segx] >= 0) ? pp_cx[segx] = (int)(sf_cx[segx] / sf_base + 0.5) : pp_cx[segx] = (int)(sf_cx[segx] / sf_base - 0.5); (abs(pp_cx[segx]) < hsegsize) ? cf_cx[segx] = ((segsize - pp_cx[segx]) % segsize) : cf_cx[segx] = 0; } for (segy = 0; segy < segnumy; segy++) { theta_cy = (yc[segy] - cy) / cz; sf_cy[segy] = (float)((theta_cy + thetaY) / rWaveLength); (sf_cy[segy] >= 0) ? pp_cy[segy] = (int)(sf_cy[segy] / sf_base + 0.5) : pp_cy[segy] = (int)(sf_cy[segy] / sf_base - 0.5); (abs(pp_cy[segy]) < hsegsize) ? cf_cy[segy] = ((segsize - pp_cy[segy]) % segsize) : cf_cy[segy] = 0; } } void ophPAS_GPU::CalcCompensatedPhase(float cx, float cy, float cz, float amp , int segNumx, int segNumy , int segsize, int hsegsize, float sf_base , float *xc, float *yc , int *cf_cx, int *cf_cy , float *COStbl, float *SINtbl , float **inRe, float **inIm, OphPointCloudConfig& conf) { /* CUDA 󸮰úÁ¤ÀÌ ¼øÂ÷ÀûÀ¸·Î 󸮰¡ µÇ±â ¶§¹®¿¡, È£½ºÆ®¿¡¼­ µð¹ÙÀ̽º·Î µ¥ÀÌÅ͸¦ º¹»çÇÏ´Â °úÁ¤¿¡¼­ GPU´Â ´ë±âÇÏ°Ô µÈ´Ù. µû¶ó¼­ 󸮰úÁ¤À» ´ÙÀ½°ú °°ÀÌ Á¤ÇÑ´Ù. 1. cudaMallocHost¸¦ ÅëÇØ È£½ºÆ®Äڵ带 °­Á¦·Î pinned memory·Î »ç¿ë(GPU·Î µ¥ÀÌÅÍ Àü¼ÛÀÌ »¡¶óÁú ¼ö ÀÖÀ½) 2. cudaMemcpyAsync·Î µ¥ÀÌÅÍ Àü¼Û ¼Óµµ up(cudaMemcpy´Â µ¿±âÈ­¹æ½Ä) 3. cudastream »ç¿ëÀ¸·Î º´·Äó¸® ±Ø´ëÈ­ ¼º´Éºñ±³´Â ´ÙÀ½°ú °°ÀÌ ÇÑ´Ù. 1. ÀϹÝÀûÀÎ CUDA Programming 2. °³¼±µÈ CUDA Programming(pinned memory »ç¿ë, cudastream »ç¿ë) 3. CPU ÄÚµå */ constValue d_const; int num_x = segNumx*segNumy; int num_y = segsize*segsize; float* inRe_d; float* inIm_d; /*cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx); cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy); cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx); cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy); cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL); cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL);*/ cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx); cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy); cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx); cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy); cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL); cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL); cudaMemcpyAsync(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpyAsync(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.cf_cx, cf_cx , sizeof(int)*segNumx, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.cf_cy, cf_cy , sizeof(int)*segNumy, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); ivec3 v(segNumx, segNumy, segsize); cuda_Wrapper_phaseCalc(inRe_d, inIm_d, d_const, cx, cy, cz, amp, v); //phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const); cudaMemcpyAsync(m_inRe_h, inRe_d , sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); /*cudaMemcpy(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpy(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpy(d_const.cf_cx, cf_cx, sizeof(int)*segNumx, cudaMemcpyHostToDevice); cudaMemcpy(d_const.cf_cy, cf_cy, sizeof(int)*segNumy, cudaMemcpyHostToDevice); cudaMemcpy(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice); cudaMemcpy(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice); cudaMemcpy(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); cudaMemcpy(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice);*/ /*cudaMemcpyAsync(m_inRe_h, inRe_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost); cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); cudaFreeHost(d_const.cf_cx); cudaFreeHost(d_const.cf_cy); cudaFreeHost(d_const.xc); cudaFreeHost(d_const.yc); cudaFreeHost(d_const.costbl); cudaFreeHost(d_const.sintbl); cudaFreeHost(inRe_d); cudaFreeHost(inIm_d); */ /*for (int i = 0; i < nStreams; i++) cudaStreamDestroy(streams[i]);*/ cudaFree(d_const.cf_cx); cudaFree(d_const.cf_cy); cudaFree(d_const.xc); cudaFree(d_const.yc); cudaFree(d_const.costbl); cudaFree(d_const.sintbl); cudaFree(inRe_d); cudaFree(inIm_d); /*cudaFreeHost(cf_cx); cudaFreeHost(cf_cy); cudaFreeHost(xc); cudaFreeHost(yc); cudaFreeHost(COStbl); cudaFreeHost(SINtbl); cudaFreeHost(m_inRe_h); cudaFreeHost(m_inIm_h);*/ /*phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const); cudaMemcpy(m_inRe_h, inRe_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaMemcpy(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaFree(d_const.cf_cx); cudaFree(d_const.cf_cy); cudaFree(d_const.xc); cudaFree(d_const.yc); cudaFree(d_const.costbl); cudaFree(d_const.sintbl); cudaFree(inRe_d); cudaFree(inIm_d);*/ } /** @fn void RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf) @brief Ǫ¸®¿¡ º¯È¯ ¼öÇà ÇÔ¼ö @return ¾øÀ½ @param voxnum: vertex °³¼ö cghfringe: À̹ÌÁö·Î ÀúÀåÇÒ ¹è¿­ data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf) { int i, j; int segx, segy; // coordinate in a Segment int segxx, segyy; int cghWidth = getContext().pixel_number[_X]; int rows = m_segNumy; int cols = m_segNumx; for (segy = 0; segy < segnumy; segy++) { for (segx = 0; segx < segnumx; segx++) { segyy = segy * segnumx + segx; memset(in, 0x00, sizeof(fftw_complex) * segsize * segsize); for (i = 0; i < segsize; i++) { for (j = 0; j < segsize; j++) { segxx = i * segsize + j; in[i*segsize + j][0] = m_inRe_h[segyy*segsize*segsize+segxx]; in[i*segsize + j][1] = m_inIm_h[segyy*segsize*segsize+segxx]; //inIm_h°ªÀÌ ´Ù¸§(x) ´Ù °°À½ } } fftw_execute(*plan); for (i = 0; i < segsize; i++) { for (j = 0; j < segsize; j++) { pHologram[(segy*segsize + i)*cghWidth + (segx*segsize + j)] = out[i * segsize + j][0];// - out[l * SEGSIZE + m][1]; } } } } } void ophPAS_GPU::encodeHologram(const vec2 band_limit, const vec2 spectrum_shift) { if (complex_H == nullptr) { LOG("Not found diffracted data."); return; } LOG("Single Side Band Encoding.."); const uint nChannel = context_.waveNum; const uint pnX = context_.pixel_number[_X]; const uint pnY = context_.pixel_number[_Y]; const Real ppX = context_.pixel_pitch[_X]; const Real ppY = context_.pixel_pitch[_Y]; const long long int pnXY = pnX * pnY; m_vecEncodeSize = ivec2(pnX, pnY); context_.ss[_X] = pnX * ppX; context_.ss[_Y] = pnY * ppY; vec2 ss = context_.ss; Real cropx = floor(pnX * band_limit[_X]); Real cropx1 = cropx - floor(cropx / 2); Real cropx2 = cropx1 + cropx - 1; Real cropy = floor(pnY * band_limit[_Y]); Real cropy1 = cropy - floor(cropy / 2); Real cropy2 = cropy1 + cropy - 1; Real* x_o = new Real[pnX]; Real* y_o = new Real[pnY]; for (uint i = 0; i < pnX; i++) x_o[i] = (-ss[_X] / 2) + (ppX * i) + (ppX / 2); for (uint i = 0; i < pnY; i++) y_o[i] = (ss[_Y] - ppY) - (ppY * i); Real* xx_o = new Real[pnXY]; Real* yy_o = new Real[pnXY]; for (uint i = 0; i < pnXY; i++) xx_o[i] = x_o[i % pnX]; for (uint i = 0; i < pnX; i++) for (uint j = 0; j < pnY; j++) yy_o[i + j * pnX] = y_o[j]; Complex<Real>* h = new Complex<Real>[pnXY]; for (uint ch = 0; ch < nChannel; ch++) { fft2(complex_H[ch], h, pnX, pnY, OPH_FORWARD); fft2(ivec2(pnX, pnY), h, OPH_FORWARD); fftExecute(h); fft2(h, h, pnX, pnY, OPH_BACKWARD); fft2(h, h, pnX, pnY, OPH_FORWARD); fft2(ivec2(pnX, pnY), h, OPH_BACKWARD); fftExecute(h); fft2(h, h, pnX, pnY, OPH_BACKWARD); for (int i = 0; i < pnXY; i++) { Complex<Real> shift_phase(1.0, 0.0); int r = i / pnX; int c = i % pnX; Real X = (M_PI * xx_o[i] * spectrum_shift[_X]) / ppX; Real Y = (M_PI * yy_o[i] * spectrum_shift[_Y]) / ppY; shift_phase[_RE] = shift_phase[_RE] * (cos(X) * cos(Y) - sin(X) * sin(Y)); m_lpEncoded[ch][i] = (h[i] * shift_phase).real(); } } delete[] h; delete[] x_o; delete[] xx_o; delete[] y_o; delete[] yy_o; LOG("Done.\n"); } /** @fn void encoding(unsigned int ENCODE_FLAG) @brief abstract function of ophGen::encoding @return ¾øÀ½ @param ENCODE_FLAG: ¾Ïȣȭ ¸Þ¼­µå */ void ophPAS_GPU::encoding(unsigned int ENCODE_FLAG) { ophGen::encoding(ENCODE_FLAG); }
282 char* ophPAS_GPU::ltrim(char* s)
283 {
284  char* begin;
285  begin = s;
286 
287  while (*begin != '\0') {
288  if (isspace(*begin))
289  begin++;
290  else {
291  s = begin;
292  break;
293  }
294  }
295 
296  return s;
297 }
298 
299 
300 // ¹®ÀÚ¿­ ¾ÕµÚ °ø¹é ¸ðµÎ »èÁ¦ ÇÔ¼öchar* ophPAS_GPU::trim(char* s) { return rtrim(ltrim(s)); } /** @fn void DataInit(OphPointCloudConfig &conf) @brief PAS¾Ë°í¸®Áò ¼öÇàÀ» À§ÇÑ µ¥ÀÌÅÍ ÃʱâÈ­ ÇÔ¼ö @return ¾øÀ½ @param conf: OpenHolo Config¸¦ À§ÇÑ ±¸Á¶Ã¼ */ void ophPAS_GPU::DataInit(OphPointCloudConfig &conf) { m_pHologram = new double[getContext().pixel_number[_X] * getContext().pixel_number[_Y]]; memset(m_pHologram, 0x00, sizeof(double)*getContext().pixel_number[_X] * getContext().pixel_number[_Y]); //for (int i = 0; i < NUMTBL; i++) { // float theta = (float)M2_PI * (float)(i + i - 1) / (float)(2 * NUMTBL); // m_COStbl[i] = (float)cos(theta); // m_SINtbl[i] = (float)sin(theta); //}// -> gpu } /* void ophPAS::PASCalcuation(long voxnum, unsigned char * cghfringe, VoxelStruct * h_vox, CGHEnvironmentData * _CGHE) { long i, j; double Max = -1E9, Min = 1E9; double myBuffer; int cghwidth = _CGHE->CghWidth; int cghheight = _CGHE->CghHeight; DataInit(_CGHE); //PAS // PAS(voxnum, h_vox, m_pHologram, _CGHE); // for (i = 0; i<cghheight; i++) { for (j = 0; j<cghwidth; j++) { if (Max < m_pHologram[i*cghwidth + j]) Max = m_pHologram[i*cghwidth + j]; if (Min > m_pHologram[i*cghwidth + j]) Min = m_pHologram[i*cghwidth + j]; } } for (i = 0; i<cghheight; i++) { for (j = 0; j<cghwidth; j++) { myBuffer = 1.0*(((m_pHologram[i*cghwidth + j] - Min) / (Max - Min))*255. + 0.5); if (myBuffer >= 255.0) cghfringe[i*cghwidth + j] = 255; else cghfringe[i*cghwidth + j] = (unsigned char)(myBuffer); } } } */ /** @fn void PASCalculation(long voxnum, unsigned char * cghfringe, OphPointCloudData *data, OphPointCloudConfig& conf) @brief PAS¾Ë°í¸®Áò ¼öÇà ÇÔ¼ö @return ¾øÀ½ @param voxnum: vertex °³¼ö cghfringe: À̹ÌÁö·Î ÀúÀåÇÒ ¹è¿­ data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::PASCalculation(long voxnum, unsigned char * cghfringe, OphPointCloudData *data, OphPointCloudConfig& conf) { long i, j; double Max = -1E9, Min = 1E9; double myBuffer; int cghwidth = getContext().pixel_number[_X]; int cghheight = getContext().pixel_number[_Y]; //DataInit(_CGHE); DataInit(conf); //PAS // //PAS(voxnum, h_vox, m_pHologram, _CGHE); PAS(voxnum, data, m_pHologram, conf); // /* °í¼ÓÈ­ ÇÊ¿äÇÑ º¯¼ö m_pHologram cghfringe º¯¼ö´Â unified memory »ç¿ëÇÏ´Â ¹æÇâÀ¸·Î */ for (i = 0; i < cghheight; i++) { for (j = 0; j < cghwidth; j++) { if (Max < m_pHologram[i*cghwidth + j]) Max = m_pHologram[i*cghwidth + j]; if (Min > m_pHologram[i*cghwidth + j]) Min = m_pHologram[i*cghwidth + j]; } } for (i = 0; i < cghheight; i++) { for (j = 0; j < cghwidth; j++) { myBuffer = 1.0*(((m_pHologram[i*cghwidth + j] - Min) / (Max - Min))*255. + 0.5); if (myBuffer >= 255.0) cghfringe[i*cghwidth + j] = 255; else cghfringe[i*cghwidth + j] = (unsigned char)(myBuffer); } } } /* void ophPAS::PAS(long voxelnum, VoxelStruct * voxel, double * m_pHologram, CGHEnvironmentData* _CGHE) { float xiInterval = _CGHE->xiInterval; float etaInterval = _CGHE->etaInterval; float cghScale = _CGHE->CGHScale; float defaultDepth = _CGHE->DefaultDepth; DataInit(_CGHE->fftSegmentationSize, _CGHE->CghWidth, _CGHE->CghHeight, xiInterval, etaInterval); int no; // voxel Number float X, Y, Z; ; // x, y, real distance float Amplitude; float sf_base = 1.0 / (xiInterval*_CGHE->fftSegmentationSize); //CString mm; clock_t start, finish; double duration; start = clock(); // Iteration according to the point number for (no = 0; no<voxelnum; no++) { // point coordinate X = (voxel[no].x) * cghScale; Y = (voxel[no].y) * cghScale; Z = voxel[no].z * cghScale - defaultDepth; Amplitude = voxel[no].r; CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval,_CGHE); CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm,_CGHE); } RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram,_CGHE); finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; //mm.Format("%f", duration); //AfxMessageBox(mm); cout << duration << endl; MemoryRelease(); } */ /** @fn void PAS(long voxnum, OphPointCloudData *data, double* m_pHologram,OphPointCloudConfig& conf) @brief implementation of the PAS @return ¾øÀ½ @param voxnum: vertex °³¼ö data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ m_pHologram: fftw °á°ú¸¦ ³ÖÀ» º¯¼ö conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::PAS(long voxelnum, OphPointCloudData *data, double * m_pHologram, OphPointCloudConfig& conf) { float xiInterval = getContext().pixel_pitch[_X];//_CGHE->xiInterval; float etaInterval = getContext().pixel_pitch[_Y];//_CGHE->etaInterval; float cghScale = conf.scale[_X];// _CGHE->CGHScale; float defaultDepth = conf.distance;//_CGHE->DefaultDepth; DataInit(FFT_SEGMENT_SIZE, getContext().pixel_number[_X], getContext().pixel_number[_Y], xiInterval, etaInterval); long no; // voxel Number float X, Y, Z; ; // x, y, real distance float Amplitude; float sf_base = 1.0 / (xiInterval* FFT_SEGMENT_SIZE); //CString mm; clock_t start, finish; double duration; start = clock(); // Iteration according to the point number for (no = 0; no < voxelnum * 3; no += 3) { // point coordinate X = (data->vertices[no].point.pos[_X]) * cghScale; Y = (data->vertices[no].point.pos[_Y]) * cghScale; Z = (data->vertices[no].point.pos[_Z]) * cghScale - defaultDepth; Amplitude = data->vertices[no].phase; std::cout << "X: " << X << ", Y: " << Y << ", Z: " << Z << ", Amp: " << Amplitude << endl; //c_x = X; //c_y = Y; //c_z = Z; //amplitude = Amplitude; /* CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval, _CGHE); */ CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval, conf); /* CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm, _CGHE); */ CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm, conf); } /* RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram, _CGHE); */ RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram, conf); finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; //mm.Format("%f", duration); //AfxMessageBox(mm); cout << duration << endl; MemoryRelease(); } /** @fn void DataInit(int segsize, int cghwidth, int cghheight, float xiinter, float etainter) @brief PAS ¾Ë°í¸®Áò ¼öÇàÀ» À§ÇÑ µ¥ÀÌÅÍ ÃʱâÈ­ ÇÔ¼ö @return ¾øÀ½ @param segsize: cghwidth: cghheight: xiinter: etainter: */ void ophPAS_GPU::DataInit(int segsize, int cghwidth, int cghheight, float xiinter, float etainter) { int i; // size m_segSize = segsize; m_hsegSize = (int)(m_segSize / 2); m_dsegSize = m_segSize*m_segSize; m_segNumx = (int)(cghwidth / m_segSize); m_segNumy = (int)(cghheight / m_segSize); m_hsegNumx = (int)(m_segNumx / 2); m_hsegNumy = (int)(m_segNumy / 2); cudaMallocHost((void**)&m_inRe_h, sizeof(float)*m_segNumy * m_segNumx * m_segSize * m_segSize); cudaMallocHost((void**)&m_inIm_h, sizeof(float)*m_segNumy * m_segNumx * m_segSize * m_segSize); cudaMallocHost((void**)&m_Coefficient_cx, sizeof(int)*m_segNumx); cudaMallocHost((void**)&m_Coefficient_cy, sizeof(int)*m_segNumy); cudaMallocHost((void**)&m_xc, sizeof(float)*m_segNumx); cudaMallocHost((void**)&m_yc, sizeof(float)*m_segNumy); cudaMallocHost((void**)&m_COStbl, sizeof(float)*NUMTBL); cudaMallocHost((void**)&m_SINtbl, sizeof(float)*NUMTBL); /*m_inRe_h = new float[m_segNumy * m_segNumx * m_segSize * m_segSize]{ 0 }; m_inIm_h = new float[m_segNumy * m_segNumx * m_segSize * m_segSize]{ 0 }; m_COStbl = new float[NUMTBL]; m_SINtbl = new float[NUMTBL]; m_Coefficient_cx = new int[m_segNumx]; m_Coefficient_cy = new int[m_segNumy]; m_xc = new float[m_segNumx]; m_yc = new float[m_segNumy];*/ // calculation components m_SFrequency_cx = new float[m_segNumx]; m_SFrequency_cy = new float[m_segNumy]; m_PickPoint_cx = new int[m_segNumx]; m_PickPoint_cy = new int[m_segNumy]; for (int i = 0; i<NUMTBL; i++) { float theta = (float)M2_PI * (float)(i + i - 1) / (float)(2 * NUMTBL); m_COStbl[i] = (float)cos(theta); m_SINtbl[i] = (float)sin(theta); } // base spatial frequency m_sf_base = (float)(1.0 / (xiinter*m_segSize)); /* for (i = 0; i<m_segNumy; i++) { for (j = 0; j<m_segNumx; j++) { m_inRe[i*m_segNumx + j] = new float[m_segSize * m_segSize]; m_inIm[i*m_segNumx + j] = new float[m_segSize * m_segSize]; memset(m_inRe[i*m_segNumx + j], 0x00, sizeof(float) * m_segSize * m_segSize); memset(m_inIm[i*m_segNumx + j], 0x00, sizeof(float) * m_segSize * m_segSize); } } */ m_in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_segSize * m_segSize); m_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_segSize * m_segSize); memset(m_in, 0x00, sizeof(fftw_complex) * m_segSize * m_segSize); // segmentation center point calculation for (i = 0; i<m_segNumy; i++) m_yc[i] = ((i - m_hsegNumy) * m_segSize + m_hsegSize) * etainter; for (i = 0; i<m_segNumx; i++) m_xc[i] = ((i - m_hsegNumx) * m_segSize + m_hsegSize) * xiinter; m_plan = fftw_plan_dft_2d(m_segSize, m_segSize, m_in, m_out, FFTW_BACKWARD, FFTW_ESTIMATE); //sex = m_segNumx; //sey = m_segNumy; //sen = segsize; } void ophPAS_GPU::MemoryRelease(void) { cudaFree(&se); fftw_destroy_plan(m_plan); fftw_free(m_in); fftw_free(m_out); delete[] m_SFrequency_cx; delete[] m_SFrequency_cy; delete[] m_PickPoint_cx; delete[] m_PickPoint_cy; /*delete[] m_Coefficient_cx; delete[] m_Coefficient_cy; delete[] m_xc; delete[] m_yc; delete[] m_COStbl; delete[] m_SINtbl; delete[] m_inRe_h; delete[] m_inIm_h;*/ cudaFreeHost(m_Coefficient_cx); cudaFreeHost(m_Coefficient_cy); cudaFreeHost(m_xc); cudaFreeHost(m_yc); cudaFreeHost(m_COStbl); cudaFreeHost(m_SINtbl); cudaFreeHost(m_inRe_h); cudaFreeHost(m_inIm_h); /* for (i = 0; i<m_segNumy; i++) { for (j = 0; j<m_segNumx; j++) { delete[] m_inRe[i*m_segNumx + j]; delete[] m_inIm[i*m_segNumx + j]; } } */ } void ophPAS_GPU::generateHologram() { auto begin = CUR_TIME; cgh_fringe = new unsigned char[context_.pixel_number[_X] * context_.pixel_number[_Y]]; PASCalculation(n_points, cgh_fringe, &pc_data, pc_config); auto end = CUR_TIME; m_elapsedTime = ((std::chrono::duration<Real>)(end - begin)).count(); LOG("Total Elapsed Time: %lf (s)\n", m_elapsedTime); } void ophPAS_GPU::PASCalculation_GPU(long voxnum, unsigned char * cghfringe, OphPointCloudData * data, OphPointCloudConfig & conf) { } void ophPAS_GPU::CalcSpatialFrequency(float cx, float cy, float cz, float amp, int segnumx, int segnumy, int segsize, int hsegsize, float sf_base, float * xc, float * yc, float * sf_cx, float * sf_cy, int * pp_cx, int * pp_cy, int * cf_cx, int * cf_cy, float xiint, float etaint, OphPointCloudConfig& conf) { int segx, segy; // coordinate in a Segment float theta_cx, theta_cy; float rWaveLength = getContext().wave_length[0];//_CGHE->rWaveLength; float thetaX = 0.0;// _CGHE->ThetaX; float thetaY = 0.0;// _CGHE->ThetaY; for (segx = 0; segx < segnumx; segx++) { theta_cx = (xc[segx] - cx) / cz; sf_cx[segx] = (float)((theta_cx + thetaX) / rWaveLength); (sf_cx[segx] >= 0) ? pp_cx[segx] = (int)(sf_cx[segx] / sf_base + 0.5) : pp_cx[segx] = (int)(sf_cx[segx] / sf_base - 0.5); (abs(pp_cx[segx]) < hsegsize) ? cf_cx[segx] = ((segsize - pp_cx[segx]) % segsize) : cf_cx[segx] = 0; } for (segy = 0; segy < segnumy; segy++) { theta_cy = (yc[segy] - cy) / cz; sf_cy[segy] = (float)((theta_cy + thetaY) / rWaveLength); (sf_cy[segy] >= 0) ? pp_cy[segy] = (int)(sf_cy[segy] / sf_base + 0.5) : pp_cy[segy] = (int)(sf_cy[segy] / sf_base - 0.5); (abs(pp_cy[segy]) < hsegsize) ? cf_cy[segy] = ((segsize - pp_cy[segy]) % segsize) : cf_cy[segy] = 0; } } void ophPAS_GPU::CalcCompensatedPhase(float cx, float cy, float cz, float amp , int segNumx, int segNumy , int segsize, int hsegsize, float sf_base , float *xc, float *yc , int *cf_cx, int *cf_cy , float *COStbl, float *SINtbl , float **inRe, float **inIm, OphPointCloudConfig& conf) { /* CUDA 󸮰úÁ¤ÀÌ ¼øÂ÷ÀûÀ¸·Î 󸮰¡ µÇ±â ¶§¹®¿¡, È£½ºÆ®¿¡¼­ µð¹ÙÀ̽º·Î µ¥ÀÌÅ͸¦ º¹»çÇÏ´Â °úÁ¤¿¡¼­ GPU´Â ´ë±âÇÏ°Ô µÈ´Ù. µû¶ó¼­ 󸮰úÁ¤À» ´ÙÀ½°ú °°ÀÌ Á¤ÇÑ´Ù. 1. cudaMallocHost¸¦ ÅëÇØ È£½ºÆ®Äڵ带 °­Á¦·Î pinned memory·Î »ç¿ë(GPU·Î µ¥ÀÌÅÍ Àü¼ÛÀÌ »¡¶óÁú ¼ö ÀÖÀ½) 2. cudaMemcpyAsync·Î µ¥ÀÌÅÍ Àü¼Û ¼Óµµ up(cudaMemcpy´Â µ¿±âÈ­¹æ½Ä) 3. cudastream »ç¿ëÀ¸·Î º´·Äó¸® ±Ø´ëÈ­ ¼º´Éºñ±³´Â ´ÙÀ½°ú °°ÀÌ ÇÑ´Ù. 1. ÀϹÝÀûÀÎ CUDA Programming 2. °³¼±µÈ CUDA Programming(pinned memory »ç¿ë, cudastream »ç¿ë) 3. CPU ÄÚµå */ constValue d_const; int num_x = segNumx*segNumy; int num_y = segsize*segsize; float* inRe_d; float* inIm_d; /*cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx); cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy); cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx); cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy); cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL); cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL);*/ cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx); cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy); cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx); cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy); cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL); cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL); cudaMemcpyAsync(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpyAsync(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.cf_cx, cf_cx , sizeof(int)*segNumx, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.cf_cy, cf_cy , sizeof(int)*segNumy, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); ivec3 v(segNumx, segNumy, segsize); cuda_Wrapper_phaseCalc(inRe_d, inIm_d, d_const, cx, cy, cz, amp, v); //phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const); cudaMemcpyAsync(m_inRe_h, inRe_d , sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); /*cudaMemcpy(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpy(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpy(d_const.cf_cx, cf_cx, sizeof(int)*segNumx, cudaMemcpyHostToDevice); cudaMemcpy(d_const.cf_cy, cf_cy, sizeof(int)*segNumy, cudaMemcpyHostToDevice); cudaMemcpy(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice); cudaMemcpy(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice); cudaMemcpy(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); cudaMemcpy(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice);*/ /*cudaMemcpyAsync(m_inRe_h, inRe_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost); cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); cudaFreeHost(d_const.cf_cx); cudaFreeHost(d_const.cf_cy); cudaFreeHost(d_const.xc); cudaFreeHost(d_const.yc); cudaFreeHost(d_const.costbl); cudaFreeHost(d_const.sintbl); cudaFreeHost(inRe_d); cudaFreeHost(inIm_d); */ /*for (int i = 0; i < nStreams; i++) cudaStreamDestroy(streams[i]);*/ cudaFree(d_const.cf_cx); cudaFree(d_const.cf_cy); cudaFree(d_const.xc); cudaFree(d_const.yc); cudaFree(d_const.costbl); cudaFree(d_const.sintbl); cudaFree(inRe_d); cudaFree(inIm_d); /*cudaFreeHost(cf_cx); cudaFreeHost(cf_cy); cudaFreeHost(xc); cudaFreeHost(yc); cudaFreeHost(COStbl); cudaFreeHost(SINtbl); cudaFreeHost(m_inRe_h); cudaFreeHost(m_inIm_h);*/ /*phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const); cudaMemcpy(m_inRe_h, inRe_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaMemcpy(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaFree(d_const.cf_cx); cudaFree(d_const.cf_cy); cudaFree(d_const.xc); cudaFree(d_const.yc); cudaFree(d_const.costbl); cudaFree(d_const.sintbl); cudaFree(inRe_d); cudaFree(inIm_d);*/ } /** @fn void RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf) @brief Ǫ¸®¿¡ º¯È¯ ¼öÇà ÇÔ¼ö @return ¾øÀ½ @param voxnum: vertex °³¼ö cghfringe: À̹ÌÁö·Î ÀúÀåÇÒ ¹è¿­ data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf) { int i, j; int segx, segy; // coordinate in a Segment int segxx, segyy; int cghWidth = getContext().pixel_number[_X]; int rows = m_segNumy; int cols = m_segNumx; for (segy = 0; segy < segnumy; segy++) { for (segx = 0; segx < segnumx; segx++) { segyy = segy * segnumx + segx; memset(in, 0x00, sizeof(fftw_complex) * segsize * segsize); for (i = 0; i < segsize; i++) { for (j = 0; j < segsize; j++) { segxx = i * segsize + j; in[i*segsize + j][0] = m_inRe_h[segyy*segsize*segsize+segxx]; in[i*segsize + j][1] = m_inIm_h[segyy*segsize*segsize+segxx]; //inIm_h°ªÀÌ ´Ù¸§(x) ´Ù °°À½ } } fftw_execute(*plan); for (i = 0; i < segsize; i++) { for (j = 0; j < segsize; j++) { pHologram[(segy*segsize + i)*cghWidth + (segx*segsize + j)] = out[i * segsize + j][0];// - out[l * SEGSIZE + m][1]; } } } } } void ophPAS_GPU::encodeHologram(const vec2 band_limit, const vec2 spectrum_shift) { if (complex_H == nullptr) { LOG("Not found diffracted data."); return; } LOG("Single Side Band Encoding.."); const uint nChannel = context_.waveNum; const uint pnX = context_.pixel_number[_X]; const uint pnY = context_.pixel_number[_Y]; const Real ppX = context_.pixel_pitch[_X]; const Real ppY = context_.pixel_pitch[_Y]; const long long int pnXY = pnX * pnY; m_vecEncodeSize = ivec2(pnX, pnY); context_.ss[_X] = pnX * ppX; context_.ss[_Y] = pnY * ppY; vec2 ss = context_.ss; Real cropx = floor(pnX * band_limit[_X]); Real cropx1 = cropx - floor(cropx / 2); Real cropx2 = cropx1 + cropx - 1; Real cropy = floor(pnY * band_limit[_Y]); Real cropy1 = cropy - floor(cropy / 2); Real cropy2 = cropy1 + cropy - 1; Real* x_o = new Real[pnX]; Real* y_o = new Real[pnY]; for (uint i = 0; i < pnX; i++) x_o[i] = (-ss[_X] / 2) + (ppX * i) + (ppX / 2); for (uint i = 0; i < pnY; i++) y_o[i] = (ss[_Y] - ppY) - (ppY * i); Real* xx_o = new Real[pnXY]; Real* yy_o = new Real[pnXY]; for (uint i = 0; i < pnXY; i++) xx_o[i] = x_o[i % pnX]; for (uint i = 0; i < pnX; i++) for (uint j = 0; j < pnY; j++) yy_o[i + j * pnX] = y_o[j]; Complex<Real>* h = new Complex<Real>[pnXY]; for (uint ch = 0; ch < nChannel; ch++) { fft2(complex_H[ch], h, pnX, pnY, OPH_FORWARD); fft2(ivec2(pnX, pnY), h, OPH_FORWARD); fftExecute(h); fft2(h, h, pnX, pnY, OPH_BACKWARD); fft2(h, h, pnX, pnY, OPH_FORWARD); fft2(ivec2(pnX, pnY), h, OPH_BACKWARD); fftExecute(h); fft2(h, h, pnX, pnY, OPH_BACKWARD); for (int i = 0; i < pnXY; i++) { Complex<Real> shift_phase(1.0, 0.0); int r = i / pnX; int c = i % pnX; Real X = (M_PI * xx_o[i] * spectrum_shift[_X]) / ppX; Real Y = (M_PI * yy_o[i] * spectrum_shift[_Y]) / ppY; shift_phase[_RE] = shift_phase[_RE] * (cos(X) * cos(Y) - sin(X) * sin(Y)); m_lpEncoded[ch][i] = (h[i] * shift_phase).real(); } } delete[] h; delete[] x_o; delete[] xx_o; delete[] y_o; delete[] yy_o; LOG("Done.\n"); } /** @fn void encoding(unsigned int ENCODE_FLAG) @brief abstract function of ophGen::encoding @return ¾øÀ½ @param ENCODE_FLAG: ¾Ïȣȭ ¸Þ¼­µå */ void ophPAS_GPU::encoding(unsigned int ENCODE_FLAG) { ophGen::encoding(ENCODE_FLAG); }
301 char* ophPAS_GPU::trim(char* s)
302 {
303  return rtrim(ltrim(s));
304 }
305 
306 
315 {
317  memset(m_pHologram, 0x00, sizeof(double)*getContext().pixel_number[_X] * getContext().pixel_number[_Y]);
318 
319  //for (int i = 0; i < NUMTBL; i++) {
320  // float theta = (float)M2_PI * (float)(i + i - 1) / (float)(2 * NUMTBL);
321  // m_COStbl[i] = (float)cos(theta);
322  // m_SINtbl[i] = (float)sin(theta);
323  //}// -> gpu
324 }
325 
326 
327 
328 /*
329 void ophPAS::PASCalcuation(long voxnum, unsigned char * cghfringe, VoxelStruct * h_vox, CGHEnvironmentData * _CGHE)
330 {
331 long i, j;
332 
333 double Max = -1E9, Min = 1E9;
334 double myBuffer;
335 int cghwidth = _CGHE->CghWidth;
336 int cghheight = _CGHE->CghHeight;
337 
338 DataInit(_CGHE);
339 
340 //PAS
341 //
342 PAS(voxnum, h_vox, m_pHologram, _CGHE);
343 //
344 
345 for (i = 0; i<cghheight; i++) {
346 for (j = 0; j<cghwidth; j++) {
347 if (Max < m_pHologram[i*cghwidth + j]) Max = m_pHologram[i*cghwidth + j];
348 if (Min > m_pHologram[i*cghwidth + j]) Min = m_pHologram[i*cghwidth + j];
349 }
350 }
351 
352 for (i = 0; i<cghheight; i++) {
353 for (j = 0; j<cghwidth; j++) {
354 myBuffer = 1.0*(((m_pHologram[i*cghwidth + j] - Min) / (Max - Min))*255. + 0.5);
355 if (myBuffer >= 255.0) cghfringe[i*cghwidth + j] = 255;
356 else cghfringe[i*cghwidth + j] = (unsigned char)(myBuffer);
357 }
358 }
359 
360 }
361 */
362 
373 void ophPAS_GPU::PASCalculation(long voxnum, unsigned char * cghfringe, OphPointCloudData *data, OphPointCloudConfig& conf) {
374  long i, j;
375 
376  double Max = -1E9, Min = 1E9;
377  double myBuffer;
378  int cghwidth = getContext().pixel_number[_X];
379  int cghheight = getContext().pixel_number[_Y];
380 
381 
382 
383  //DataInit(_CGHE);
384  DataInit(conf);
385 
386  //PAS
387  //
388  //PAS(voxnum, h_vox, m_pHologram, _CGHE);
389  PAS(voxnum, data, m_pHologram, conf);
390  //
391  /*
392  °í¼ÓÈ­ ÇÊ¿äÇÑ º¯¼ö m_pHologram cghfringe º¯¼ö´Â unified memory »ç¿ëÇÏ´Â ¹æÇâÀ¸·Î */ for (i = 0; i < cghheight; i++) { for (j = 0; j < cghwidth; j++) { if (Max < m_pHologram[i*cghwidth + j]) Max = m_pHologram[i*cghwidth + j]; if (Min > m_pHologram[i*cghwidth + j]) Min = m_pHologram[i*cghwidth + j]; } } for (i = 0; i < cghheight; i++) { for (j = 0; j < cghwidth; j++) { myBuffer = 1.0*(((m_pHologram[i*cghwidth + j] - Min) / (Max - Min))*255. + 0.5); if (myBuffer >= 255.0) cghfringe[i*cghwidth + j] = 255; else cghfringe[i*cghwidth + j] = (unsigned char)(myBuffer); } } } /* void ophPAS::PAS(long voxelnum, VoxelStruct * voxel, double * m_pHologram, CGHEnvironmentData* _CGHE) { float xiInterval = _CGHE->xiInterval; float etaInterval = _CGHE->etaInterval; float cghScale = _CGHE->CGHScale; float defaultDepth = _CGHE->DefaultDepth; DataInit(_CGHE->fftSegmentationSize, _CGHE->CghWidth, _CGHE->CghHeight, xiInterval, etaInterval); int no; // voxel Number float X, Y, Z; ; // x, y, real distance float Amplitude; float sf_base = 1.0 / (xiInterval*_CGHE->fftSegmentationSize); //CString mm; clock_t start, finish; double duration; start = clock(); // Iteration according to the point number for (no = 0; no<voxelnum; no++) { // point coordinate X = (voxel[no].x) * cghScale; Y = (voxel[no].y) * cghScale; Z = voxel[no].z * cghScale - defaultDepth; Amplitude = voxel[no].r; CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval,_CGHE); CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm,_CGHE); } RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram,_CGHE); finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; //mm.Format("%f", duration); //AfxMessageBox(mm); cout << duration << endl; MemoryRelease(); } */ /** @fn void PAS(long voxnum, OphPointCloudData *data, double* m_pHologram,OphPointCloudConfig& conf) @brief implementation of the PAS @return ¾øÀ½ @param voxnum: vertex °³¼ö data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ m_pHologram: fftw °á°ú¸¦ ³ÖÀ» º¯¼ö conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::PAS(long voxelnum, OphPointCloudData *data, double * m_pHologram, OphPointCloudConfig& conf) { float xiInterval = getContext().pixel_pitch[_X];//_CGHE->xiInterval; float etaInterval = getContext().pixel_pitch[_Y];//_CGHE->etaInterval; float cghScale = conf.scale[_X];// _CGHE->CGHScale; float defaultDepth = conf.distance;//_CGHE->DefaultDepth; DataInit(FFT_SEGMENT_SIZE, getContext().pixel_number[_X], getContext().pixel_number[_Y], xiInterval, etaInterval); long no; // voxel Number float X, Y, Z; ; // x, y, real distance float Amplitude; float sf_base = 1.0 / (xiInterval* FFT_SEGMENT_SIZE); //CString mm; clock_t start, finish; double duration; start = clock(); // Iteration according to the point number for (no = 0; no < voxelnum * 3; no += 3) { // point coordinate X = (data->vertices[no].point.pos[_X]) * cghScale; Y = (data->vertices[no].point.pos[_Y]) * cghScale; Z = (data->vertices[no].point.pos[_Z]) * cghScale - defaultDepth; Amplitude = data->vertices[no].phase; std::cout << "X: " << X << ", Y: " << Y << ", Z: " << Z << ", Amp: " << Amplitude << endl; //c_x = X; //c_y = Y; //c_z = Z; //amplitude = Amplitude; /* CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval, _CGHE); */ CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval, conf); /* CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm, _CGHE); */ CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm, conf); } /* RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram, _CGHE); */ RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram, conf); finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; //mm.Format("%f", duration); //AfxMessageBox(mm); cout << duration << endl; MemoryRelease(); } /** @fn void DataInit(int segsize, int cghwidth, int cghheight, float xiinter, float etainter) @brief PAS ¾Ë°í¸®Áò ¼öÇàÀ» À§ÇÑ µ¥ÀÌÅÍ ÃʱâÈ­ ÇÔ¼ö @return ¾øÀ½ @param segsize: cghwidth: cghheight: xiinter: etainter: */ void ophPAS_GPU::DataInit(int segsize, int cghwidth, int cghheight, float xiinter, float etainter) { int i; // size m_segSize = segsize; m_hsegSize = (int)(m_segSize / 2); m_dsegSize = m_segSize*m_segSize; m_segNumx = (int)(cghwidth / m_segSize); m_segNumy = (int)(cghheight / m_segSize); m_hsegNumx = (int)(m_segNumx / 2); m_hsegNumy = (int)(m_segNumy / 2); cudaMallocHost((void**)&m_inRe_h, sizeof(float)*m_segNumy * m_segNumx * m_segSize * m_segSize); cudaMallocHost((void**)&m_inIm_h, sizeof(float)*m_segNumy * m_segNumx * m_segSize * m_segSize); cudaMallocHost((void**)&m_Coefficient_cx, sizeof(int)*m_segNumx); cudaMallocHost((void**)&m_Coefficient_cy, sizeof(int)*m_segNumy); cudaMallocHost((void**)&m_xc, sizeof(float)*m_segNumx); cudaMallocHost((void**)&m_yc, sizeof(float)*m_segNumy); cudaMallocHost((void**)&m_COStbl, sizeof(float)*NUMTBL); cudaMallocHost((void**)&m_SINtbl, sizeof(float)*NUMTBL); /*m_inRe_h = new float[m_segNumy * m_segNumx * m_segSize * m_segSize]{ 0 }; m_inIm_h = new float[m_segNumy * m_segNumx * m_segSize * m_segSize]{ 0 }; m_COStbl = new float[NUMTBL]; m_SINtbl = new float[NUMTBL]; m_Coefficient_cx = new int[m_segNumx]; m_Coefficient_cy = new int[m_segNumy]; m_xc = new float[m_segNumx]; m_yc = new float[m_segNumy];*/ // calculation components m_SFrequency_cx = new float[m_segNumx]; m_SFrequency_cy = new float[m_segNumy]; m_PickPoint_cx = new int[m_segNumx]; m_PickPoint_cy = new int[m_segNumy]; for (int i = 0; i<NUMTBL; i++) { float theta = (float)M2_PI * (float)(i + i - 1) / (float)(2 * NUMTBL); m_COStbl[i] = (float)cos(theta); m_SINtbl[i] = (float)sin(theta); } // base spatial frequency m_sf_base = (float)(1.0 / (xiinter*m_segSize)); /* for (i = 0; i<m_segNumy; i++) { for (j = 0; j<m_segNumx; j++) { m_inRe[i*m_segNumx + j] = new float[m_segSize * m_segSize]; m_inIm[i*m_segNumx + j] = new float[m_segSize * m_segSize]; memset(m_inRe[i*m_segNumx + j], 0x00, sizeof(float) * m_segSize * m_segSize); memset(m_inIm[i*m_segNumx + j], 0x00, sizeof(float) * m_segSize * m_segSize); } } */ m_in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_segSize * m_segSize); m_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_segSize * m_segSize); memset(m_in, 0x00, sizeof(fftw_complex) * m_segSize * m_segSize); // segmentation center point calculation for (i = 0; i<m_segNumy; i++) m_yc[i] = ((i - m_hsegNumy) * m_segSize + m_hsegSize) * etainter; for (i = 0; i<m_segNumx; i++) m_xc[i] = ((i - m_hsegNumx) * m_segSize + m_hsegSize) * xiinter; m_plan = fftw_plan_dft_2d(m_segSize, m_segSize, m_in, m_out, FFTW_BACKWARD, FFTW_ESTIMATE); //sex = m_segNumx; //sey = m_segNumy; //sen = segsize; } void ophPAS_GPU::MemoryRelease(void) { cudaFree(&se); fftw_destroy_plan(m_plan); fftw_free(m_in); fftw_free(m_out); delete[] m_SFrequency_cx; delete[] m_SFrequency_cy; delete[] m_PickPoint_cx; delete[] m_PickPoint_cy; /*delete[] m_Coefficient_cx; delete[] m_Coefficient_cy; delete[] m_xc; delete[] m_yc; delete[] m_COStbl; delete[] m_SINtbl; delete[] m_inRe_h; delete[] m_inIm_h;*/ cudaFreeHost(m_Coefficient_cx); cudaFreeHost(m_Coefficient_cy); cudaFreeHost(m_xc); cudaFreeHost(m_yc); cudaFreeHost(m_COStbl); cudaFreeHost(m_SINtbl); cudaFreeHost(m_inRe_h); cudaFreeHost(m_inIm_h); /* for (i = 0; i<m_segNumy; i++) { for (j = 0; j<m_segNumx; j++) { delete[] m_inRe[i*m_segNumx + j]; delete[] m_inIm[i*m_segNumx + j]; } } */ } void ophPAS_GPU::generateHologram() { auto begin = CUR_TIME; cgh_fringe = new unsigned char[context_.pixel_number[_X] * context_.pixel_number[_Y]]; PASCalculation(n_points, cgh_fringe, &pc_data, pc_config); auto end = CUR_TIME; m_elapsedTime = ((std::chrono::duration<Real>)(end - begin)).count(); LOG("Total Elapsed Time: %lf (s)\n", m_elapsedTime); } void ophPAS_GPU::PASCalculation_GPU(long voxnum, unsigned char * cghfringe, OphPointCloudData * data, OphPointCloudConfig & conf) { } void ophPAS_GPU::CalcSpatialFrequency(float cx, float cy, float cz, float amp, int segnumx, int segnumy, int segsize, int hsegsize, float sf_base, float * xc, float * yc, float * sf_cx, float * sf_cy, int * pp_cx, int * pp_cy, int * cf_cx, int * cf_cy, float xiint, float etaint, OphPointCloudConfig& conf) { int segx, segy; // coordinate in a Segment float theta_cx, theta_cy; float rWaveLength = getContext().wave_length[0];//_CGHE->rWaveLength; float thetaX = 0.0;// _CGHE->ThetaX; float thetaY = 0.0;// _CGHE->ThetaY; for (segx = 0; segx < segnumx; segx++) { theta_cx = (xc[segx] - cx) / cz; sf_cx[segx] = (float)((theta_cx + thetaX) / rWaveLength); (sf_cx[segx] >= 0) ? pp_cx[segx] = (int)(sf_cx[segx] / sf_base + 0.5) : pp_cx[segx] = (int)(sf_cx[segx] / sf_base - 0.5); (abs(pp_cx[segx]) < hsegsize) ? cf_cx[segx] = ((segsize - pp_cx[segx]) % segsize) : cf_cx[segx] = 0; } for (segy = 0; segy < segnumy; segy++) { theta_cy = (yc[segy] - cy) / cz; sf_cy[segy] = (float)((theta_cy + thetaY) / rWaveLength); (sf_cy[segy] >= 0) ? pp_cy[segy] = (int)(sf_cy[segy] / sf_base + 0.5) : pp_cy[segy] = (int)(sf_cy[segy] / sf_base - 0.5); (abs(pp_cy[segy]) < hsegsize) ? cf_cy[segy] = ((segsize - pp_cy[segy]) % segsize) : cf_cy[segy] = 0; } } void ophPAS_GPU::CalcCompensatedPhase(float cx, float cy, float cz, float amp , int segNumx, int segNumy , int segsize, int hsegsize, float sf_base , float *xc, float *yc , int *cf_cx, int *cf_cy , float *COStbl, float *SINtbl , float **inRe, float **inIm, OphPointCloudConfig& conf) { /* CUDA 󸮰úÁ¤ÀÌ ¼øÂ÷ÀûÀ¸·Î 󸮰¡ µÇ±â ¶§¹®¿¡, È£½ºÆ®¿¡¼­ µð¹ÙÀ̽º·Î µ¥ÀÌÅ͸¦ º¹»çÇÏ´Â °úÁ¤¿¡¼­ GPU´Â ´ë±âÇÏ°Ô µÈ´Ù. µû¶ó¼­ 󸮰úÁ¤À» ´ÙÀ½°ú °°ÀÌ Á¤ÇÑ´Ù. 1. cudaMallocHost¸¦ ÅëÇØ È£½ºÆ®Äڵ带 °­Á¦·Î pinned memory·Î »ç¿ë(GPU·Î µ¥ÀÌÅÍ Àü¼ÛÀÌ »¡¶óÁú ¼ö ÀÖÀ½) 2. cudaMemcpyAsync·Î µ¥ÀÌÅÍ Àü¼Û ¼Óµµ up(cudaMemcpy´Â µ¿±âÈ­¹æ½Ä) 3. cudastream »ç¿ëÀ¸·Î º´·Äó¸® ±Ø´ëÈ­ ¼º´Éºñ±³´Â ´ÙÀ½°ú °°ÀÌ ÇÑ´Ù. 1. ÀϹÝÀûÀÎ CUDA Programming 2. °³¼±µÈ CUDA Programming(pinned memory »ç¿ë, cudastream »ç¿ë) 3. CPU ÄÚµå */ constValue d_const; int num_x = segNumx*segNumy; int num_y = segsize*segsize; float* inRe_d; float* inIm_d; /*cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx); cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy); cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx); cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy); cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL); cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL);*/ cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx); cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy); cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx); cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy); cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL); cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL); cudaMemcpyAsync(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpyAsync(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.cf_cx, cf_cx , sizeof(int)*segNumx, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.cf_cy, cf_cy , sizeof(int)*segNumy, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); ivec3 v(segNumx, segNumy, segsize); cuda_Wrapper_phaseCalc(inRe_d, inIm_d, d_const, cx, cy, cz, amp, v); //phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const); cudaMemcpyAsync(m_inRe_h, inRe_d , sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); /*cudaMemcpy(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpy(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpy(d_const.cf_cx, cf_cx, sizeof(int)*segNumx, cudaMemcpyHostToDevice); cudaMemcpy(d_const.cf_cy, cf_cy, sizeof(int)*segNumy, cudaMemcpyHostToDevice); cudaMemcpy(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice); cudaMemcpy(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice); cudaMemcpy(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); cudaMemcpy(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice);*/ /*cudaMemcpyAsync(m_inRe_h, inRe_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost); cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); cudaFreeHost(d_const.cf_cx); cudaFreeHost(d_const.cf_cy); cudaFreeHost(d_const.xc); cudaFreeHost(d_const.yc); cudaFreeHost(d_const.costbl); cudaFreeHost(d_const.sintbl); cudaFreeHost(inRe_d); cudaFreeHost(inIm_d); */ /*for (int i = 0; i < nStreams; i++) cudaStreamDestroy(streams[i]);*/ cudaFree(d_const.cf_cx); cudaFree(d_const.cf_cy); cudaFree(d_const.xc); cudaFree(d_const.yc); cudaFree(d_const.costbl); cudaFree(d_const.sintbl); cudaFree(inRe_d); cudaFree(inIm_d); /*cudaFreeHost(cf_cx); cudaFreeHost(cf_cy); cudaFreeHost(xc); cudaFreeHost(yc); cudaFreeHost(COStbl); cudaFreeHost(SINtbl); cudaFreeHost(m_inRe_h); cudaFreeHost(m_inIm_h);*/ /*phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const); cudaMemcpy(m_inRe_h, inRe_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaMemcpy(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaFree(d_const.cf_cx); cudaFree(d_const.cf_cy); cudaFree(d_const.xc); cudaFree(d_const.yc); cudaFree(d_const.costbl); cudaFree(d_const.sintbl); cudaFree(inRe_d); cudaFree(inIm_d);*/ } /** @fn void RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf) @brief Ǫ¸®¿¡ º¯È¯ ¼öÇà ÇÔ¼ö @return ¾øÀ½ @param voxnum: vertex °³¼ö cghfringe: À̹ÌÁö·Î ÀúÀåÇÒ ¹è¿­ data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf) { int i, j; int segx, segy; // coordinate in a Segment int segxx, segyy; int cghWidth = getContext().pixel_number[_X]; int rows = m_segNumy; int cols = m_segNumx; for (segy = 0; segy < segnumy; segy++) { for (segx = 0; segx < segnumx; segx++) { segyy = segy * segnumx + segx; memset(in, 0x00, sizeof(fftw_complex) * segsize * segsize); for (i = 0; i < segsize; i++) { for (j = 0; j < segsize; j++) { segxx = i * segsize + j; in[i*segsize + j][0] = m_inRe_h[segyy*segsize*segsize+segxx]; in[i*segsize + j][1] = m_inIm_h[segyy*segsize*segsize+segxx]; //inIm_h°ªÀÌ ´Ù¸§(x) ´Ù °°À½ } } fftw_execute(*plan); for (i = 0; i < segsize; i++) { for (j = 0; j < segsize; j++) { pHologram[(segy*segsize + i)*cghWidth + (segx*segsize + j)] = out[i * segsize + j][0];// - out[l * SEGSIZE + m][1]; } } } } } void ophPAS_GPU::encodeHologram(const vec2 band_limit, const vec2 spectrum_shift) { if (complex_H == nullptr) { LOG("Not found diffracted data."); return; } LOG("Single Side Band Encoding.."); const uint nChannel = context_.waveNum; const uint pnX = context_.pixel_number[_X]; const uint pnY = context_.pixel_number[_Y]; const Real ppX = context_.pixel_pitch[_X]; const Real ppY = context_.pixel_pitch[_Y]; const long long int pnXY = pnX * pnY; m_vecEncodeSize = ivec2(pnX, pnY); context_.ss[_X] = pnX * ppX; context_.ss[_Y] = pnY * ppY; vec2 ss = context_.ss; Real cropx = floor(pnX * band_limit[_X]); Real cropx1 = cropx - floor(cropx / 2); Real cropx2 = cropx1 + cropx - 1; Real cropy = floor(pnY * band_limit[_Y]); Real cropy1 = cropy - floor(cropy / 2); Real cropy2 = cropy1 + cropy - 1; Real* x_o = new Real[pnX]; Real* y_o = new Real[pnY]; for (uint i = 0; i < pnX; i++) x_o[i] = (-ss[_X] / 2) + (ppX * i) + (ppX / 2); for (uint i = 0; i < pnY; i++) y_o[i] = (ss[_Y] - ppY) - (ppY * i); Real* xx_o = new Real[pnXY]; Real* yy_o = new Real[pnXY]; for (uint i = 0; i < pnXY; i++) xx_o[i] = x_o[i % pnX]; for (uint i = 0; i < pnX; i++) for (uint j = 0; j < pnY; j++) yy_o[i + j * pnX] = y_o[j]; Complex<Real>* h = new Complex<Real>[pnXY]; for (uint ch = 0; ch < nChannel; ch++) { fft2(complex_H[ch], h, pnX, pnY, OPH_FORWARD); fft2(ivec2(pnX, pnY), h, OPH_FORWARD); fftExecute(h); fft2(h, h, pnX, pnY, OPH_BACKWARD); fft2(h, h, pnX, pnY, OPH_FORWARD); fft2(ivec2(pnX, pnY), h, OPH_BACKWARD); fftExecute(h); fft2(h, h, pnX, pnY, OPH_BACKWARD); for (int i = 0; i < pnXY; i++) { Complex<Real> shift_phase(1.0, 0.0); int r = i / pnX; int c = i % pnX; Real X = (M_PI * xx_o[i] * spectrum_shift[_X]) / ppX; Real Y = (M_PI * yy_o[i] * spectrum_shift[_Y]) / ppY; shift_phase[_RE] = shift_phase[_RE] * (cos(X) * cos(Y) - sin(X) * sin(Y)); m_lpEncoded[ch][i] = (h[i] * shift_phase).real(); } } delete[] h; delete[] x_o; delete[] xx_o; delete[] y_o; delete[] yy_o; LOG("Done.\n"); } /** @fn void encoding(unsigned int ENCODE_FLAG) @brief abstract function of ophGen::encoding @return ¾øÀ½ @param ENCODE_FLAG: ¾Ïȣȭ ¸Þ¼­µå */ void ophPAS_GPU::encoding(unsigned int ENCODE_FLAG) { ophGen::encoding(ENCODE_FLAG); }
393  m_pHologram
394  cghfringe
395  º¯¼ö´Â unified memory »ç¿ëÇÏ´Â ¹æÇâÀ¸·Î */ for (i = 0; i < cghheight; i++) { for (j = 0; j < cghwidth; j++) { if (Max < m_pHologram[i*cghwidth + j]) Max = m_pHologram[i*cghwidth + j]; if (Min > m_pHologram[i*cghwidth + j]) Min = m_pHologram[i*cghwidth + j]; } } for (i = 0; i < cghheight; i++) { for (j = 0; j < cghwidth; j++) { myBuffer = 1.0*(((m_pHologram[i*cghwidth + j] - Min) / (Max - Min))*255. + 0.5); if (myBuffer >= 255.0) cghfringe[i*cghwidth + j] = 255; else cghfringe[i*cghwidth + j] = (unsigned char)(myBuffer); } } } /* void ophPAS::PAS(long voxelnum, VoxelStruct * voxel, double * m_pHologram, CGHEnvironmentData* _CGHE) { float xiInterval = _CGHE->xiInterval; float etaInterval = _CGHE->etaInterval; float cghScale = _CGHE->CGHScale; float defaultDepth = _CGHE->DefaultDepth; DataInit(_CGHE->fftSegmentationSize, _CGHE->CghWidth, _CGHE->CghHeight, xiInterval, etaInterval); int no; // voxel Number float X, Y, Z; ; // x, y, real distance float Amplitude; float sf_base = 1.0 / (xiInterval*_CGHE->fftSegmentationSize); //CString mm; clock_t start, finish; double duration; start = clock(); // Iteration according to the point number for (no = 0; no<voxelnum; no++) { // point coordinate X = (voxel[no].x) * cghScale; Y = (voxel[no].y) * cghScale; Z = voxel[no].z * cghScale - defaultDepth; Amplitude = voxel[no].r; CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval,_CGHE); CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm,_CGHE); } RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram,_CGHE); finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; //mm.Format("%f", duration); //AfxMessageBox(mm); cout << duration << endl; MemoryRelease(); } */ /** @fn void PAS(long voxnum, OphPointCloudData *data, double* m_pHologram,OphPointCloudConfig& conf) @brief implementation of the PAS @return ¾øÀ½ @param voxnum: vertex °³¼ö data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ m_pHologram: fftw °á°ú¸¦ ³ÖÀ» º¯¼ö conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::PAS(long voxelnum, OphPointCloudData *data, double * m_pHologram, OphPointCloudConfig& conf) { float xiInterval = getContext().pixel_pitch[_X];//_CGHE->xiInterval; float etaInterval = getContext().pixel_pitch[_Y];//_CGHE->etaInterval; float cghScale = conf.scale[_X];// _CGHE->CGHScale; float defaultDepth = conf.distance;//_CGHE->DefaultDepth; DataInit(FFT_SEGMENT_SIZE, getContext().pixel_number[_X], getContext().pixel_number[_Y], xiInterval, etaInterval); long no; // voxel Number float X, Y, Z; ; // x, y, real distance float Amplitude; float sf_base = 1.0 / (xiInterval* FFT_SEGMENT_SIZE); //CString mm; clock_t start, finish; double duration; start = clock(); // Iteration according to the point number for (no = 0; no < voxelnum * 3; no += 3) { // point coordinate X = (data->vertices[no].point.pos[_X]) * cghScale; Y = (data->vertices[no].point.pos[_Y]) * cghScale; Z = (data->vertices[no].point.pos[_Z]) * cghScale - defaultDepth; Amplitude = data->vertices[no].phase; std::cout << "X: " << X << ", Y: " << Y << ", Z: " << Z << ", Amp: " << Amplitude << endl; //c_x = X; //c_y = Y; //c_z = Z; //amplitude = Amplitude; /* CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval, _CGHE); */ CalcSpatialFrequency(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_SFrequency_cx, m_SFrequency_cy , m_PickPoint_cx, m_PickPoint_cy , m_Coefficient_cx, m_Coefficient_cy , xiInterval, etaInterval, conf); /* CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm, _CGHE); */ CalcCompensatedPhase(X, Y, Z, Amplitude , m_segNumx, m_segNumy , m_segSize, m_hsegSize, m_sf_base , m_xc, m_yc , m_Coefficient_cx, m_Coefficient_cy , m_COStbl, m_SINtbl , m_inRe, m_inIm, conf); } /* RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram, _CGHE); */ RunFFTW(m_segNumx, m_segNumy , m_segSize, m_hsegSize , m_inRe, m_inIm , m_in, m_out , &m_plan, m_pHologram, conf); finish = clock(); duration = (double)(finish - start) / CLOCKS_PER_SEC; //mm.Format("%f", duration); //AfxMessageBox(mm); cout << duration << endl; MemoryRelease(); } /** @fn void DataInit(int segsize, int cghwidth, int cghheight, float xiinter, float etainter) @brief PAS ¾Ë°í¸®Áò ¼öÇàÀ» À§ÇÑ µ¥ÀÌÅÍ ÃʱâÈ­ ÇÔ¼ö @return ¾øÀ½ @param segsize: cghwidth: cghheight: xiinter: etainter: */ void ophPAS_GPU::DataInit(int segsize, int cghwidth, int cghheight, float xiinter, float etainter) { int i; // size m_segSize = segsize; m_hsegSize = (int)(m_segSize / 2); m_dsegSize = m_segSize*m_segSize; m_segNumx = (int)(cghwidth / m_segSize); m_segNumy = (int)(cghheight / m_segSize); m_hsegNumx = (int)(m_segNumx / 2); m_hsegNumy = (int)(m_segNumy / 2); cudaMallocHost((void**)&m_inRe_h, sizeof(float)*m_segNumy * m_segNumx * m_segSize * m_segSize); cudaMallocHost((void**)&m_inIm_h, sizeof(float)*m_segNumy * m_segNumx * m_segSize * m_segSize); cudaMallocHost((void**)&m_Coefficient_cx, sizeof(int)*m_segNumx); cudaMallocHost((void**)&m_Coefficient_cy, sizeof(int)*m_segNumy); cudaMallocHost((void**)&m_xc, sizeof(float)*m_segNumx); cudaMallocHost((void**)&m_yc, sizeof(float)*m_segNumy); cudaMallocHost((void**)&m_COStbl, sizeof(float)*NUMTBL); cudaMallocHost((void**)&m_SINtbl, sizeof(float)*NUMTBL); /*m_inRe_h = new float[m_segNumy * m_segNumx * m_segSize * m_segSize]{ 0 }; m_inIm_h = new float[m_segNumy * m_segNumx * m_segSize * m_segSize]{ 0 }; m_COStbl = new float[NUMTBL]; m_SINtbl = new float[NUMTBL]; m_Coefficient_cx = new int[m_segNumx]; m_Coefficient_cy = new int[m_segNumy]; m_xc = new float[m_segNumx]; m_yc = new float[m_segNumy];*/ // calculation components m_SFrequency_cx = new float[m_segNumx]; m_SFrequency_cy = new float[m_segNumy]; m_PickPoint_cx = new int[m_segNumx]; m_PickPoint_cy = new int[m_segNumy]; for (int i = 0; i<NUMTBL; i++) { float theta = (float)M2_PI * (float)(i + i - 1) / (float)(2 * NUMTBL); m_COStbl[i] = (float)cos(theta); m_SINtbl[i] = (float)sin(theta); } // base spatial frequency m_sf_base = (float)(1.0 / (xiinter*m_segSize)); /* for (i = 0; i<m_segNumy; i++) { for (j = 0; j<m_segNumx; j++) { m_inRe[i*m_segNumx + j] = new float[m_segSize * m_segSize]; m_inIm[i*m_segNumx + j] = new float[m_segSize * m_segSize]; memset(m_inRe[i*m_segNumx + j], 0x00, sizeof(float) * m_segSize * m_segSize); memset(m_inIm[i*m_segNumx + j], 0x00, sizeof(float) * m_segSize * m_segSize); } } */ m_in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_segSize * m_segSize); m_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_segSize * m_segSize); memset(m_in, 0x00, sizeof(fftw_complex) * m_segSize * m_segSize); // segmentation center point calculation for (i = 0; i<m_segNumy; i++) m_yc[i] = ((i - m_hsegNumy) * m_segSize + m_hsegSize) * etainter; for (i = 0; i<m_segNumx; i++) m_xc[i] = ((i - m_hsegNumx) * m_segSize + m_hsegSize) * xiinter; m_plan = fftw_plan_dft_2d(m_segSize, m_segSize, m_in, m_out, FFTW_BACKWARD, FFTW_ESTIMATE); //sex = m_segNumx; //sey = m_segNumy; //sen = segsize; } void ophPAS_GPU::MemoryRelease(void) { cudaFree(&se); fftw_destroy_plan(m_plan); fftw_free(m_in); fftw_free(m_out); delete[] m_SFrequency_cx; delete[] m_SFrequency_cy; delete[] m_PickPoint_cx; delete[] m_PickPoint_cy; /*delete[] m_Coefficient_cx; delete[] m_Coefficient_cy; delete[] m_xc; delete[] m_yc; delete[] m_COStbl; delete[] m_SINtbl; delete[] m_inRe_h; delete[] m_inIm_h;*/ cudaFreeHost(m_Coefficient_cx); cudaFreeHost(m_Coefficient_cy); cudaFreeHost(m_xc); cudaFreeHost(m_yc); cudaFreeHost(m_COStbl); cudaFreeHost(m_SINtbl); cudaFreeHost(m_inRe_h); cudaFreeHost(m_inIm_h); /* for (i = 0; i<m_segNumy; i++) { for (j = 0; j<m_segNumx; j++) { delete[] m_inRe[i*m_segNumx + j]; delete[] m_inIm[i*m_segNumx + j]; } } */ } void ophPAS_GPU::generateHologram() { auto begin = CUR_TIME; cgh_fringe = new unsigned char[context_.pixel_number[_X] * context_.pixel_number[_Y]]; PASCalculation(n_points, cgh_fringe, &pc_data, pc_config); auto end = CUR_TIME; m_elapsedTime = ((std::chrono::duration<Real>)(end - begin)).count(); LOG("Total Elapsed Time: %lf (s)\n", m_elapsedTime); } void ophPAS_GPU::PASCalculation_GPU(long voxnum, unsigned char * cghfringe, OphPointCloudData * data, OphPointCloudConfig & conf) { } void ophPAS_GPU::CalcSpatialFrequency(float cx, float cy, float cz, float amp, int segnumx, int segnumy, int segsize, int hsegsize, float sf_base, float * xc, float * yc, float * sf_cx, float * sf_cy, int * pp_cx, int * pp_cy, int * cf_cx, int * cf_cy, float xiint, float etaint, OphPointCloudConfig& conf) { int segx, segy; // coordinate in a Segment float theta_cx, theta_cy; float rWaveLength = getContext().wave_length[0];//_CGHE->rWaveLength; float thetaX = 0.0;// _CGHE->ThetaX; float thetaY = 0.0;// _CGHE->ThetaY; for (segx = 0; segx < segnumx; segx++) { theta_cx = (xc[segx] - cx) / cz; sf_cx[segx] = (float)((theta_cx + thetaX) / rWaveLength); (sf_cx[segx] >= 0) ? pp_cx[segx] = (int)(sf_cx[segx] / sf_base + 0.5) : pp_cx[segx] = (int)(sf_cx[segx] / sf_base - 0.5); (abs(pp_cx[segx]) < hsegsize) ? cf_cx[segx] = ((segsize - pp_cx[segx]) % segsize) : cf_cx[segx] = 0; } for (segy = 0; segy < segnumy; segy++) { theta_cy = (yc[segy] - cy) / cz; sf_cy[segy] = (float)((theta_cy + thetaY) / rWaveLength); (sf_cy[segy] >= 0) ? pp_cy[segy] = (int)(sf_cy[segy] / sf_base + 0.5) : pp_cy[segy] = (int)(sf_cy[segy] / sf_base - 0.5); (abs(pp_cy[segy]) < hsegsize) ? cf_cy[segy] = ((segsize - pp_cy[segy]) % segsize) : cf_cy[segy] = 0; } } void ophPAS_GPU::CalcCompensatedPhase(float cx, float cy, float cz, float amp , int segNumx, int segNumy , int segsize, int hsegsize, float sf_base , float *xc, float *yc , int *cf_cx, int *cf_cy , float *COStbl, float *SINtbl , float **inRe, float **inIm, OphPointCloudConfig& conf) { /* CUDA 󸮰úÁ¤ÀÌ ¼øÂ÷ÀûÀ¸·Î 󸮰¡ µÇ±â ¶§¹®¿¡, È£½ºÆ®¿¡¼­ µð¹ÙÀ̽º·Î µ¥ÀÌÅ͸¦ º¹»çÇÏ´Â °úÁ¤¿¡¼­ GPU´Â ´ë±âÇÏ°Ô µÈ´Ù. µû¶ó¼­ 󸮰úÁ¤À» ´ÙÀ½°ú °°ÀÌ Á¤ÇÑ´Ù. 1. cudaMallocHost¸¦ ÅëÇØ È£½ºÆ®Äڵ带 °­Á¦·Î pinned memory·Î »ç¿ë(GPU·Î µ¥ÀÌÅÍ Àü¼ÛÀÌ »¡¶óÁú ¼ö ÀÖÀ½) 2. cudaMemcpyAsync·Î µ¥ÀÌÅÍ Àü¼Û ¼Óµµ up(cudaMemcpy´Â µ¿±âÈ­¹æ½Ä) 3. cudastream »ç¿ëÀ¸·Î º´·Äó¸® ±Ø´ëÈ­ ¼º´Éºñ±³´Â ´ÙÀ½°ú °°ÀÌ ÇÑ´Ù. 1. ÀϹÝÀûÀÎ CUDA Programming 2. °³¼±µÈ CUDA Programming(pinned memory »ç¿ë, cudastream »ç¿ë) 3. CPU ÄÚµå */ constValue d_const; int num_x = segNumx*segNumy; int num_y = segsize*segsize; float* inRe_d; float* inIm_d; /*cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx); cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy); cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx); cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy); cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL); cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL);*/ cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx); cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy); cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx); cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy); cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL); cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL); cudaMemcpyAsync(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpyAsync(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.cf_cx, cf_cx , sizeof(int)*segNumx, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.cf_cy, cf_cy , sizeof(int)*segNumy, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); ivec3 v(segNumx, segNumy, segsize); cuda_Wrapper_phaseCalc(inRe_d, inIm_d, d_const, cx, cy, cz, amp, v); //phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const); cudaMemcpyAsync(m_inRe_h, inRe_d , sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); /*cudaMemcpy(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpy(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpy(d_const.cf_cx, cf_cx, sizeof(int)*segNumx, cudaMemcpyHostToDevice); cudaMemcpy(d_const.cf_cy, cf_cy, sizeof(int)*segNumy, cudaMemcpyHostToDevice); cudaMemcpy(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice); cudaMemcpy(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice); cudaMemcpy(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); cudaMemcpy(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice);*/ /*cudaMemcpyAsync(m_inRe_h, inRe_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost); cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); cudaFreeHost(d_const.cf_cx); cudaFreeHost(d_const.cf_cy); cudaFreeHost(d_const.xc); cudaFreeHost(d_const.yc); cudaFreeHost(d_const.costbl); cudaFreeHost(d_const.sintbl); cudaFreeHost(inRe_d); cudaFreeHost(inIm_d); */ /*for (int i = 0; i < nStreams; i++) cudaStreamDestroy(streams[i]);*/ cudaFree(d_const.cf_cx); cudaFree(d_const.cf_cy); cudaFree(d_const.xc); cudaFree(d_const.yc); cudaFree(d_const.costbl); cudaFree(d_const.sintbl); cudaFree(inRe_d); cudaFree(inIm_d); /*cudaFreeHost(cf_cx); cudaFreeHost(cf_cy); cudaFreeHost(xc); cudaFreeHost(yc); cudaFreeHost(COStbl); cudaFreeHost(SINtbl); cudaFreeHost(m_inRe_h); cudaFreeHost(m_inIm_h);*/ /*phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const); cudaMemcpy(m_inRe_h, inRe_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaMemcpy(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaFree(d_const.cf_cx); cudaFree(d_const.cf_cy); cudaFree(d_const.xc); cudaFree(d_const.yc); cudaFree(d_const.costbl); cudaFree(d_const.sintbl); cudaFree(inRe_d); cudaFree(inIm_d);*/ } /** @fn void RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf) @brief Ǫ¸®¿¡ º¯È¯ ¼öÇà ÇÔ¼ö @return ¾øÀ½ @param voxnum: vertex °³¼ö cghfringe: À̹ÌÁö·Î ÀúÀåÇÒ ¹è¿­ data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf) { int i, j; int segx, segy; // coordinate in a Segment int segxx, segyy; int cghWidth = getContext().pixel_number[_X]; int rows = m_segNumy; int cols = m_segNumx; for (segy = 0; segy < segnumy; segy++) { for (segx = 0; segx < segnumx; segx++) { segyy = segy * segnumx + segx; memset(in, 0x00, sizeof(fftw_complex) * segsize * segsize); for (i = 0; i < segsize; i++) { for (j = 0; j < segsize; j++) { segxx = i * segsize + j; in[i*segsize + j][0] = m_inRe_h[segyy*segsize*segsize+segxx]; in[i*segsize + j][1] = m_inIm_h[segyy*segsize*segsize+segxx]; //inIm_h°ªÀÌ ´Ù¸§(x) ´Ù °°À½ } } fftw_execute(*plan); for (i = 0; i < segsize; i++) { for (j = 0; j < segsize; j++) { pHologram[(segy*segsize + i)*cghWidth + (segx*segsize + j)] = out[i * segsize + j][0];// - out[l * SEGSIZE + m][1]; } } } } } void ophPAS_GPU::encodeHologram(const vec2 band_limit, const vec2 spectrum_shift) { if (complex_H == nullptr) { LOG("Not found diffracted data."); return; } LOG("Single Side Band Encoding.."); const uint nChannel = context_.waveNum; const uint pnX = context_.pixel_number[_X]; const uint pnY = context_.pixel_number[_Y]; const Real ppX = context_.pixel_pitch[_X]; const Real ppY = context_.pixel_pitch[_Y]; const long long int pnXY = pnX * pnY; m_vecEncodeSize = ivec2(pnX, pnY); context_.ss[_X] = pnX * ppX; context_.ss[_Y] = pnY * ppY; vec2 ss = context_.ss; Real cropx = floor(pnX * band_limit[_X]); Real cropx1 = cropx - floor(cropx / 2); Real cropx2 = cropx1 + cropx - 1; Real cropy = floor(pnY * band_limit[_Y]); Real cropy1 = cropy - floor(cropy / 2); Real cropy2 = cropy1 + cropy - 1; Real* x_o = new Real[pnX]; Real* y_o = new Real[pnY]; for (uint i = 0; i < pnX; i++) x_o[i] = (-ss[_X] / 2) + (ppX * i) + (ppX / 2); for (uint i = 0; i < pnY; i++) y_o[i] = (ss[_Y] - ppY) - (ppY * i); Real* xx_o = new Real[pnXY]; Real* yy_o = new Real[pnXY]; for (uint i = 0; i < pnXY; i++) xx_o[i] = x_o[i % pnX]; for (uint i = 0; i < pnX; i++) for (uint j = 0; j < pnY; j++) yy_o[i + j * pnX] = y_o[j]; Complex<Real>* h = new Complex<Real>[pnXY]; for (uint ch = 0; ch < nChannel; ch++) { fft2(complex_H[ch], h, pnX, pnY, OPH_FORWARD); fft2(ivec2(pnX, pnY), h, OPH_FORWARD); fftExecute(h); fft2(h, h, pnX, pnY, OPH_BACKWARD); fft2(h, h, pnX, pnY, OPH_FORWARD); fft2(ivec2(pnX, pnY), h, OPH_BACKWARD); fftExecute(h); fft2(h, h, pnX, pnY, OPH_BACKWARD); for (int i = 0; i < pnXY; i++) { Complex<Real> shift_phase(1.0, 0.0); int r = i / pnX; int c = i % pnX; Real X = (M_PI * xx_o[i] * spectrum_shift[_X]) / ppX; Real Y = (M_PI * yy_o[i] * spectrum_shift[_Y]) / ppY; shift_phase[_RE] = shift_phase[_RE] * (cos(X) * cos(Y) - sin(X) * sin(Y)); m_lpEncoded[ch][i] = (h[i] * shift_phase).real(); } } delete[] h; delete[] x_o; delete[] xx_o; delete[] y_o; delete[] yy_o; LOG("Done.\n"); } /** @fn void encoding(unsigned int ENCODE_FLAG) @brief abstract function of ophGen::encoding @return ¾øÀ½ @param ENCODE_FLAG: ¾Ïȣȭ ¸Þ¼­µå */ void ophPAS_GPU::encoding(unsigned int ENCODE_FLAG) { ophGen::encoding(ENCODE_FLAG); }
396  */
397 
398 
399  for (i = 0; i < cghheight; i++) {
400  for (j = 0; j < cghwidth; j++) {
401  if (Max < m_pHologram[i*cghwidth + j]) Max = m_pHologram[i*cghwidth + j];
402  if (Min > m_pHologram[i*cghwidth + j]) Min = m_pHologram[i*cghwidth + j];
403  }
404  }
405 
406  for (i = 0; i < cghheight; i++) {
407  for (j = 0; j < cghwidth; j++) {
408  myBuffer = 1.0*(((m_pHologram[i*cghwidth + j] - Min) / (Max - Min))*255. + 0.5);
409  if (myBuffer >= 255.0) cghfringe[i*cghwidth + j] = 255;
410  else cghfringe[i*cghwidth + j] = (unsigned char)(myBuffer);
411  }
412  }
413 
414 
415 }
416 
417 /*
418 void ophPAS::PAS(long voxelnum, VoxelStruct * voxel, double * m_pHologram, CGHEnvironmentData* _CGHE)
419 {
420 float xiInterval = _CGHE->xiInterval;
421 float etaInterval = _CGHE->etaInterval;
422 float cghScale = _CGHE->CGHScale;
423 float defaultDepth = _CGHE->DefaultDepth;
424 
425 DataInit(_CGHE->fftSegmentationSize, _CGHE->CghWidth, _CGHE->CghHeight, xiInterval, etaInterval);
426 
427 int no; // voxel Number
428 
429 
430 float X, Y, Z; ; // x, y, real distance
431 float Amplitude;
432 float sf_base = 1.0 / (xiInterval*_CGHE->fftSegmentationSize);
433 
434 
435 //CString mm;
436 clock_t start, finish;
437 double duration;
438 start = clock();
439 
440 // Iteration according to the point number
441 for (no = 0; no<voxelnum; no++)
442 {
443 // point coordinate
444 X = (voxel[no].x) * cghScale;
445 Y = (voxel[no].y) * cghScale;
446 Z = voxel[no].z * cghScale - defaultDepth;
447 Amplitude = voxel[no].r;
448 
449 CalcSpatialFrequency(X, Y, Z, Amplitude
450 , m_segNumx, m_segNumy
451 , m_segSize, m_hsegSize, m_sf_base
452 , m_xc, m_yc
453 , m_SFrequency_cx, m_SFrequency_cy
454 , m_PickPoint_cx, m_PickPoint_cy
455 , m_Coefficient_cx, m_Coefficient_cy
456 , xiInterval, etaInterval,_CGHE);
457 
458 CalcCompensatedPhase(X, Y, Z, Amplitude
459 , m_segNumx, m_segNumy
460 , m_segSize, m_hsegSize, m_sf_base
461 , m_xc, m_yc
462 , m_Coefficient_cx, m_Coefficient_cy
463 , m_COStbl, m_SINtbl
464 , m_inRe, m_inIm,_CGHE);
465 
466 }
467 
468 RunFFTW(m_segNumx, m_segNumy
469 , m_segSize, m_hsegSize
470 , m_inRe, m_inIm
471 , m_in, m_out
472 , &m_plan, m_pHologram,_CGHE);
473 
474 finish = clock();
475 
476 duration = (double)(finish - start) / CLOCKS_PER_SEC;
477 //mm.Format("%f", duration);
478 //AfxMessageBox(mm);
479 cout << duration << endl;
480 MemoryRelease();
481 }
482 */
483 
494 void ophPAS_GPU::PAS(long voxelnum, OphPointCloudData *data, double * m_pHologram, OphPointCloudConfig& conf)
495 {
496  float xiInterval = getContext().pixel_pitch[_X];//_CGHE->xiInterval;
497  float etaInterval = getContext().pixel_pitch[_Y];//_CGHE->etaInterval;
498  float cghScale = conf.scale[_X];// _CGHE->CGHScale;
499  float defaultDepth = conf.distance;//_CGHE->DefaultDepth;
500 
501  DataInit(FFT_SEGMENT_SIZE, getContext().pixel_number[_X], getContext().pixel_number[_Y], xiInterval, etaInterval);
502 
503  long no; // voxel Number
504 
505 
506  float X, Y, Z; ; // x, y, real distance
507  float Amplitude;
508  float sf_base = 1.0 / (xiInterval* FFT_SEGMENT_SIZE);
509 
510  //CString mm;
511  clock_t start, finish;
512  double duration;
513  start = clock();
514 
515  // Iteration according to the point number
516  for (no = 0; no < voxelnum * 3; no += 3)
517  {
518  // point coordinate
519  X = (data->vertices[no].point.pos[_X]) * cghScale;
520  Y = (data->vertices[no].point.pos[_Y]) * cghScale;
521  Z = (data->vertices[no].point.pos[_Z]) * cghScale - defaultDepth;
522  Amplitude = data->vertices[no].phase;
523 
524  std::cout << "X: " << X << ", Y: " << Y << ", Z: " << Z << ", Amp: " << Amplitude << endl;
525 
526  //c_x = X;
527  //c_y = Y;
528  //c_z = Z;
529  //amplitude = Amplitude;
530  /*
531  CalcSpatialFrequency(X, Y, Z, Amplitude
532  , m_segNumx, m_segNumy
533  , m_segSize, m_hsegSize, m_sf_base
534  , m_xc, m_yc
535  , m_SFrequency_cx, m_SFrequency_cy
536  , m_PickPoint_cx, m_PickPoint_cy
537  , m_Coefficient_cx, m_Coefficient_cy
538  , xiInterval, etaInterval, _CGHE);
539  */
543  , m_xc, m_yc
547  , xiInterval, etaInterval, conf);
548 
549  /*
550  CalcCompensatedPhase(X, Y, Z, Amplitude
551  , m_segNumx, m_segNumy
552  , m_segSize, m_hsegSize, m_sf_base
553  , m_xc, m_yc
554  , m_Coefficient_cx, m_Coefficient_cy
555  , m_COStbl, m_SINtbl
556  , m_inRe, m_inIm, _CGHE);
557  */
561  , m_xc, m_yc
563  , m_COStbl, m_SINtbl
564  , m_inRe, m_inIm, conf);
565 
566  }
567 
568  /*
569  RunFFTW(m_segNumx, m_segNumy
570  , m_segSize, m_hsegSize
571  , m_inRe, m_inIm
572  , m_in, m_out
573  , &m_plan, m_pHologram, _CGHE);
574  */
577  , m_inRe, m_inIm
578  , m_in, m_out
579  , &m_plan, m_pHologram, conf);
580 
581  finish = clock();
582 
583  duration = (double)(finish - start) / CLOCKS_PER_SEC;
584  //mm.Format("%f", duration);
585  //AfxMessageBox(mm);
586  cout << duration << endl;
587  MemoryRelease();
588 }
589 
602 void ophPAS_GPU::DataInit(int segsize, int cghwidth, int cghheight, float xiinter, float etainter)
603 {
604  int i;
605 
606 
607 
608  // size
609  m_segSize = segsize;
610  m_hsegSize = (int)(m_segSize / 2);
612  m_segNumx = (int)(cghwidth / m_segSize);
613  m_segNumy = (int)(cghheight / m_segSize);
614  m_hsegNumx = (int)(m_segNumx / 2);
615  m_hsegNumy = (int)(m_segNumy / 2);
616 
617 
618 
619  cudaMallocHost((void**)&m_inRe_h, sizeof(float)*m_segNumy * m_segNumx * m_segSize * m_segSize);
620  cudaMallocHost((void**)&m_inIm_h, sizeof(float)*m_segNumy * m_segNumx * m_segSize * m_segSize);
621  cudaMallocHost((void**)&m_Coefficient_cx, sizeof(int)*m_segNumx);
622  cudaMallocHost((void**)&m_Coefficient_cy, sizeof(int)*m_segNumy);
623  cudaMallocHost((void**)&m_xc, sizeof(float)*m_segNumx);
624  cudaMallocHost((void**)&m_yc, sizeof(float)*m_segNumy);
625  cudaMallocHost((void**)&m_COStbl, sizeof(float)*NUMTBL);
626  cudaMallocHost((void**)&m_SINtbl, sizeof(float)*NUMTBL);
627 
628  /*m_inRe_h = new float[m_segNumy * m_segNumx * m_segSize * m_segSize]{ 0 };
629  m_inIm_h = new float[m_segNumy * m_segNumx * m_segSize * m_segSize]{ 0 };
630  m_COStbl = new float[NUMTBL];
631  m_SINtbl = new float[NUMTBL];
632 
633 
634  m_Coefficient_cx = new int[m_segNumx];
635  m_Coefficient_cy = new int[m_segNumy];
636  m_xc = new float[m_segNumx];
637  m_yc = new float[m_segNumy];*/
638  // calculation components
639  m_SFrequency_cx = new float[m_segNumx];
640  m_SFrequency_cy = new float[m_segNumy];
641  m_PickPoint_cx = new int[m_segNumx];
642  m_PickPoint_cy = new int[m_segNumy];
643 
644 
645  for (int i = 0; i<NUMTBL; i++) {
646  float theta = (float)M2_PI * (float)(i + i - 1) / (float)(2 * NUMTBL);
647  m_COStbl[i] = (float)cos(theta);
648  m_SINtbl[i] = (float)sin(theta);
649  }
650 
651 
652 
653 
654  // base spatial frequency
655  m_sf_base = (float)(1.0 / (xiinter*m_segSize));
656 
657 
658 
659 
660 
661 
662  /*
663  for (i = 0; i<m_segNumy; i++) {
664  for (j = 0; j<m_segNumx; j++) {
665  m_inRe[i*m_segNumx + j] = new float[m_segSize * m_segSize];
666  m_inIm[i*m_segNumx + j] = new float[m_segSize * m_segSize];
667  memset(m_inRe[i*m_segNumx + j], 0x00, sizeof(float) * m_segSize * m_segSize);
668  memset(m_inIm[i*m_segNumx + j], 0x00, sizeof(float) * m_segSize * m_segSize);
669  }
670  }
671  */
672  m_in = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_segSize * m_segSize);
673  m_out = (fftw_complex *)fftw_malloc(sizeof(fftw_complex) * m_segSize * m_segSize);
674  memset(m_in, 0x00, sizeof(fftw_complex) * m_segSize * m_segSize);
675 
676  // segmentation center point calculation
677  for (i = 0; i<m_segNumy; i++)
678  m_yc[i] = ((i - m_hsegNumy) * m_segSize + m_hsegSize) * etainter;
679  for (i = 0; i<m_segNumx; i++)
680  m_xc[i] = ((i - m_hsegNumx) * m_segSize + m_hsegSize) * xiinter;
681 
683 
684  //sex = m_segNumx;
685  //sey = m_segNumy;
686  //sen = segsize;
687 
688 }
689 
690 void ophPAS_GPU::MemoryRelease(void)
691 {
692  cudaFree(&se);
693 
694  fftw_destroy_plan(m_plan);
695  fftw_free(m_in);
696  fftw_free(m_out);
697 
698  delete[] m_SFrequency_cx;
699  delete[] m_SFrequency_cy;
700  delete[] m_PickPoint_cx;
701  delete[] m_PickPoint_cy;
702 
703  /*delete[] m_Coefficient_cx;
704  delete[] m_Coefficient_cy;
705  delete[] m_xc;
706  delete[] m_yc;
707  delete[] m_COStbl;
708  delete[] m_SINtbl;
709  delete[] m_inRe_h;
710  delete[] m_inIm_h;*/
711 
712  cudaFreeHost(m_Coefficient_cx);
713  cudaFreeHost(m_Coefficient_cy);
714  cudaFreeHost(m_xc);
715  cudaFreeHost(m_yc);
716  cudaFreeHost(m_COStbl);
717  cudaFreeHost(m_SINtbl);
718  cudaFreeHost(m_inRe_h);
719  cudaFreeHost(m_inIm_h);
720 
721  /*
722  for (i = 0; i<m_segNumy; i++) {
723  for (j = 0; j<m_segNumx; j++) {
724  delete[] m_inRe[i*m_segNumx + j];
725  delete[] m_inIm[i*m_segNumx + j];
726  }
727  }
728  */
729 
730 
731 }
732 
734 {
735 
736  auto begin = CUR_TIME;
737  cgh_fringe = new unsigned char[context_.pixel_number[_X] * context_.pixel_number[_Y]];
738 
739  PASCalculation(n_points, cgh_fringe, &pc_data, pc_config);
740  auto end = CUR_TIME;
741  m_elapsedTime = ((std::chrono::duration<Real>)(end - begin)).count();
742  LOG("Total Elapsed Time: %lf (s)\n", m_elapsedTime);
743 
744 }
745 
746 void ophPAS_GPU::PASCalculation_GPU(long voxnum, unsigned char * cghfringe, OphPointCloudData * data, OphPointCloudConfig & conf)
747 {
748 
749 }
750 
751 
752 
753 
754 
755 
756 void ophPAS_GPU::CalcSpatialFrequency(float cx, float cy, float cz, float amp, int segnumx, int segnumy, int segsize, int hsegsize, float sf_base, float * xc, float * yc, float * sf_cx, float * sf_cy, int * pp_cx, int * pp_cy, int * cf_cx, int * cf_cy, float xiint, float etaint, OphPointCloudConfig& conf)
757 {
758  int segx, segy; // coordinate in a Segment
759  float theta_cx, theta_cy;
760 
761  float rWaveLength = getContext().wave_length[0];//_CGHE->rWaveLength;
762  float thetaX = 0.0;// _CGHE->ThetaX;
763  float thetaY = 0.0;// _CGHE->ThetaY;
764 
765 
766  for (segx = 0; segx < segnumx; segx++)
767  {
768  theta_cx = (xc[segx] - cx) / cz;
769  sf_cx[segx] = (float)((theta_cx + thetaX) / rWaveLength);
770  (sf_cx[segx] >= 0) ? pp_cx[segx] = (int)(sf_cx[segx] / sf_base + 0.5)
771  : pp_cx[segx] = (int)(sf_cx[segx] / sf_base - 0.5);
772  (abs(pp_cx[segx]) < hsegsize) ? cf_cx[segx] = ((segsize - pp_cx[segx]) % segsize)
773  : cf_cx[segx] = 0;
774 
775  }
776 
777  for (segy = 0; segy < segnumy; segy++)
778  {
779  theta_cy = (yc[segy] - cy) / cz;
780  sf_cy[segy] = (float)((theta_cy + thetaY) / rWaveLength);
781  (sf_cy[segy] >= 0) ? pp_cy[segy] = (int)(sf_cy[segy] / sf_base + 0.5)
782  : pp_cy[segy] = (int)(sf_cy[segy] / sf_base - 0.5);
783  (abs(pp_cy[segy]) < hsegsize) ? cf_cy[segy] = ((segsize - pp_cy[segy]) % segsize)
784  : cf_cy[segy] = 0;
785  }
786 }
787 
788 
789 
790 void ophPAS_GPU::CalcCompensatedPhase(float cx, float cy, float cz, float amp
791  , int segNumx, int segNumy
792  , int segsize, int hsegsize, float sf_base
793  , float *xc, float *yc
794  , int *cf_cx, int *cf_cy
795  , float *COStbl, float *SINtbl
796  , float **inRe, float **inIm, OphPointCloudConfig& conf)
797 {
798  /*
799  CUDA 󸮰úÁ¤ÀÌ ¼øÂ÷ÀûÀ¸·Î 󸮰¡ µÇ±â ¶§¹®¿¡, È£½ºÆ®¿¡¼­ µð¹ÙÀ̽º·Î µ¥ÀÌÅ͸¦ º¹»çÇÏ´Â °úÁ¤¿¡¼­ GPU´Â ´ë±âÇÏ°Ô µÈ´Ù.
800  µû¶ó¼­ 󸮰úÁ¤À» ´ÙÀ½°ú °°ÀÌ Á¤ÇÑ´Ù.
801  1. cudaMallocHost¸¦ ÅëÇØ È£½ºÆ®Äڵ带 °­Á¦·Î pinned memory·Î »ç¿ë(GPU·Î µ¥ÀÌÅÍ Àü¼ÛÀÌ »¡¶óÁú ¼ö ÀÖÀ½)
802  2. cudaMemcpyAsync·Î µ¥ÀÌÅÍ Àü¼Û ¼Óµµ up(cudaMemcpy´Â µ¿±âÈ­¹æ½Ä)
803  3. cudastream »ç¿ëÀ¸·Î º´·Äó¸® ±Ø´ëÈ­
804 
805  ¼º´Éºñ±³´Â ´ÙÀ½°ú °°ÀÌ ÇÑ´Ù.
806  1. ÀϹÝÀûÀÎ CUDA Programming
807  2. °³¼±µÈ CUDA Programming(pinned memory »ç¿ë, cudastream »ç¿ë)
808  3. CPU ÄÚµå */ constValue d_const; int num_x = segNumx*segNumy; int num_y = segsize*segsize; float* inRe_d; float* inIm_d; /*cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx); cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy); cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx); cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy); cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL); cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL);*/ cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y); cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx); cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy); cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx); cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy); cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL); cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL); cudaMemcpyAsync(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpyAsync(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.cf_cx, cf_cx , sizeof(int)*segNumx, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.cf_cy, cf_cy , sizeof(int)*segNumy, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); cudaMemcpyAsync(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); ivec3 v(segNumx, segNumy, segsize); cuda_Wrapper_phaseCalc(inRe_d, inIm_d, d_const, cx, cy, cz, amp, v); //phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const); cudaMemcpyAsync(m_inRe_h, inRe_d , sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); /*cudaMemcpy(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpy(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice); cudaMemcpy(d_const.cf_cx, cf_cx, sizeof(int)*segNumx, cudaMemcpyHostToDevice); cudaMemcpy(d_const.cf_cy, cf_cy, sizeof(int)*segNumy, cudaMemcpyHostToDevice); cudaMemcpy(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice); cudaMemcpy(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice); cudaMemcpy(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice); cudaMemcpy(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice);*/ /*cudaMemcpyAsync(m_inRe_h, inRe_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost); cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost); cudaDeviceSynchronize(); cudaFreeHost(d_const.cf_cx); cudaFreeHost(d_const.cf_cy); cudaFreeHost(d_const.xc); cudaFreeHost(d_const.yc); cudaFreeHost(d_const.costbl); cudaFreeHost(d_const.sintbl); cudaFreeHost(inRe_d); cudaFreeHost(inIm_d); */ /*for (int i = 0; i < nStreams; i++) cudaStreamDestroy(streams[i]);*/ cudaFree(d_const.cf_cx); cudaFree(d_const.cf_cy); cudaFree(d_const.xc); cudaFree(d_const.yc); cudaFree(d_const.costbl); cudaFree(d_const.sintbl); cudaFree(inRe_d); cudaFree(inIm_d); /*cudaFreeHost(cf_cx); cudaFreeHost(cf_cy); cudaFreeHost(xc); cudaFreeHost(yc); cudaFreeHost(COStbl); cudaFreeHost(SINtbl); cudaFreeHost(m_inRe_h); cudaFreeHost(m_inIm_h);*/ /*phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const); cudaMemcpy(m_inRe_h, inRe_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaMemcpy(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost); cudaFree(d_const.cf_cx); cudaFree(d_const.cf_cy); cudaFree(d_const.xc); cudaFree(d_const.yc); cudaFree(d_const.costbl); cudaFree(d_const.sintbl); cudaFree(inRe_d); cudaFree(inIm_d);*/ } /** @fn void RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf) @brief Ǫ¸®¿¡ º¯È¯ ¼öÇà ÇÔ¼ö @return ¾øÀ½ @param voxnum: vertex °³¼ö cghfringe: À̹ÌÁö·Î ÀúÀåÇÒ ¹è¿­ data: OpenHolo Data°ü·Ã ±¸Á¶Ã¼ conf: OpenHolo Config°ü·Ã ±¸Á¶Ã¼ */ void ophPAS_GPU::RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf) { int i, j; int segx, segy; // coordinate in a Segment int segxx, segyy; int cghWidth = getContext().pixel_number[_X]; int rows = m_segNumy; int cols = m_segNumx; for (segy = 0; segy < segnumy; segy++) { for (segx = 0; segx < segnumx; segx++) { segyy = segy * segnumx + segx; memset(in, 0x00, sizeof(fftw_complex) * segsize * segsize); for (i = 0; i < segsize; i++) { for (j = 0; j < segsize; j++) { segxx = i * segsize + j; in[i*segsize + j][0] = m_inRe_h[segyy*segsize*segsize+segxx]; in[i*segsize + j][1] = m_inIm_h[segyy*segsize*segsize+segxx]; //inIm_h°ªÀÌ ´Ù¸§(x) ´Ù °°À½ } } fftw_execute(*plan); for (i = 0; i < segsize; i++) { for (j = 0; j < segsize; j++) { pHologram[(segy*segsize + i)*cghWidth + (segx*segsize + j)] = out[i * segsize + j][0];// - out[l * SEGSIZE + m][1]; } } } } } void ophPAS_GPU::encodeHologram(const vec2 band_limit, const vec2 spectrum_shift) { if (complex_H == nullptr) { LOG("Not found diffracted data."); return; } LOG("Single Side Band Encoding.."); const uint nChannel = context_.waveNum; const uint pnX = context_.pixel_number[_X]; const uint pnY = context_.pixel_number[_Y]; const Real ppX = context_.pixel_pitch[_X]; const Real ppY = context_.pixel_pitch[_Y]; const long long int pnXY = pnX * pnY; m_vecEncodeSize = ivec2(pnX, pnY); context_.ss[_X] = pnX * ppX; context_.ss[_Y] = pnY * ppY; vec2 ss = context_.ss; Real cropx = floor(pnX * band_limit[_X]); Real cropx1 = cropx - floor(cropx / 2); Real cropx2 = cropx1 + cropx - 1; Real cropy = floor(pnY * band_limit[_Y]); Real cropy1 = cropy - floor(cropy / 2); Real cropy2 = cropy1 + cropy - 1; Real* x_o = new Real[pnX]; Real* y_o = new Real[pnY]; for (uint i = 0; i < pnX; i++) x_o[i] = (-ss[_X] / 2) + (ppX * i) + (ppX / 2); for (uint i = 0; i < pnY; i++) y_o[i] = (ss[_Y] - ppY) - (ppY * i); Real* xx_o = new Real[pnXY]; Real* yy_o = new Real[pnXY]; for (uint i = 0; i < pnXY; i++) xx_o[i] = x_o[i % pnX]; for (uint i = 0; i < pnX; i++) for (uint j = 0; j < pnY; j++) yy_o[i + j * pnX] = y_o[j]; Complex<Real>* h = new Complex<Real>[pnXY]; for (uint ch = 0; ch < nChannel; ch++) { fft2(complex_H[ch], h, pnX, pnY, OPH_FORWARD); fft2(ivec2(pnX, pnY), h, OPH_FORWARD); fftExecute(h); fft2(h, h, pnX, pnY, OPH_BACKWARD); fft2(h, h, pnX, pnY, OPH_FORWARD); fft2(ivec2(pnX, pnY), h, OPH_BACKWARD); fftExecute(h); fft2(h, h, pnX, pnY, OPH_BACKWARD); for (int i = 0; i < pnXY; i++) { Complex<Real> shift_phase(1.0, 0.0); int r = i / pnX; int c = i % pnX; Real X = (M_PI * xx_o[i] * spectrum_shift[_X]) / ppX; Real Y = (M_PI * yy_o[i] * spectrum_shift[_Y]) / ppY; shift_phase[_RE] = shift_phase[_RE] * (cos(X) * cos(Y) - sin(X) * sin(Y)); m_lpEncoded[ch][i] = (h[i] * shift_phase).real(); } } delete[] h; delete[] x_o; delete[] xx_o; delete[] y_o; delete[] yy_o; LOG("Done.\n"); } /** @fn void encoding(unsigned int ENCODE_FLAG) @brief abstract function of ophGen::encoding @return ¾øÀ½ @param ENCODE_FLAG: ¾Ïȣȭ ¸Þ¼­µå */ void ophPAS_GPU::encoding(unsigned int ENCODE_FLAG) { ophGen::encoding(ENCODE_FLAG); }
809 
810 
811  */
812 
813 
814  constValue d_const;
815  int num_x = segNumx*segNumy;
816  int num_y = segsize*segsize;
817  float* inRe_d;
818  float* inIm_d;
819 
820 
821 
822 
823 
824 
825 
826  /*cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y);
827  cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y);
828  cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx);
829  cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy);
830  cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx);
831  cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy);
832  cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL);
833  cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL);*/
834 
835  cudaMalloc((void**)&inRe_d, sizeof(float)*num_x*num_y);
836  cudaMalloc((void**)&inIm_d, sizeof(float)*num_x*num_y);
837  cudaMalloc((void**)&d_const.cf_cx, sizeof(int)*segNumx);
838  cudaMalloc((void**)&d_const.cf_cy, sizeof(int)*segNumy);
839  cudaMalloc((void**)&d_const.xc, sizeof(float)*segNumx);
840  cudaMalloc((void**)&d_const.yc, sizeof(float)*segNumy);
841  cudaMalloc((void**)&d_const.costbl, sizeof(float)*NUMTBL);
842  cudaMalloc((void**)&d_const.sintbl, sizeof(float)*NUMTBL);
843 
844 
845  cudaMemcpyAsync(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice);
846  cudaMemcpyAsync(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice);
847  cudaMemcpyAsync(d_const.cf_cx, cf_cx , sizeof(int)*segNumx, cudaMemcpyHostToDevice);
848  cudaMemcpyAsync(d_const.cf_cy, cf_cy , sizeof(int)*segNumy, cudaMemcpyHostToDevice);
849  cudaMemcpyAsync(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice);
850  cudaMemcpyAsync(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice);
851  cudaMemcpyAsync(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice);
852  cudaMemcpyAsync(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice);
853 
854 
855  ivec3 v(segNumx, segNumy, segsize);
856  cuda_Wrapper_phaseCalc(inRe_d, inIm_d, d_const, cx, cy, cz, amp, v);
857  //phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const);
858 
859 
860 
861 
862  cudaMemcpyAsync(m_inRe_h, inRe_d , sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost);
863  cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost);
864 
865 
866  /*cudaMemcpy(inRe_d, m_inRe_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice);
867  cudaMemcpy(inIm_d, m_inIm_h, sizeof(float)*num_x*num_y, cudaMemcpyHostToDevice);
868  cudaMemcpy(d_const.cf_cx, cf_cx, sizeof(int)*segNumx, cudaMemcpyHostToDevice);
869  cudaMemcpy(d_const.cf_cy, cf_cy, sizeof(int)*segNumy, cudaMemcpyHostToDevice);
870  cudaMemcpy(d_const.xc, xc, sizeof(float)*segNumx, cudaMemcpyHostToDevice);
871  cudaMemcpy(d_const.yc, yc, sizeof(float)*segNumy, cudaMemcpyHostToDevice);
872  cudaMemcpy(d_const.costbl, COStbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice);
873  cudaMemcpy(d_const.sintbl, SINtbl, sizeof(float)*NUMTBL, cudaMemcpyHostToDevice);*/
874 
875 
876 
877  /*cudaMemcpyAsync(m_inRe_h, inRe_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost);
878  cudaMemcpyAsync(m_inIm_h, inIm_d, sizeof(float)*i*j, cudaMemcpyDeviceToHost);
879  cudaDeviceSynchronize();
880  cudaFreeHost(d_const.cf_cx);
881  cudaFreeHost(d_const.cf_cy);
882  cudaFreeHost(d_const.xc);
883  cudaFreeHost(d_const.yc);
884  cudaFreeHost(d_const.costbl);
885  cudaFreeHost(d_const.sintbl);
886  cudaFreeHost(inRe_d);
887  cudaFreeHost(inIm_d);
888  */
889  /*for (int i = 0; i < nStreams; i++)
890  cudaStreamDestroy(streams[i]);*/
891 
892  cudaFree(d_const.cf_cx);
893  cudaFree(d_const.cf_cy);
894  cudaFree(d_const.xc);
895  cudaFree(d_const.yc);
896  cudaFree(d_const.costbl);
897  cudaFree(d_const.sintbl);
898  cudaFree(inRe_d);
899  cudaFree(inIm_d);
900 
901  /*cudaFreeHost(cf_cx);
902  cudaFreeHost(cf_cy);
903  cudaFreeHost(xc);
904  cudaFreeHost(yc);
905  cudaFreeHost(COStbl);
906  cudaFreeHost(SINtbl);
907  cudaFreeHost(m_inRe_h);
908  cudaFreeHost(m_inIm_h);*/
909 
910 
911  /*phaseCalc << <blockSize, gridSize >> >(inRe_d, inIm_d, d_const);
912  cudaMemcpy(m_inRe_h, inRe_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost);
913  cudaMemcpy(m_inIm_h, inIm_d, sizeof(float)*num_x*num_y, cudaMemcpyDeviceToHost);
914 
915  cudaFree(d_const.cf_cx);
916  cudaFree(d_const.cf_cy);
917  cudaFree(d_const.xc);
918  cudaFree(d_const.yc);
919  cudaFree(d_const.costbl);
920  cudaFree(d_const.sintbl);
921  cudaFree(inRe_d);
922  cudaFree(inIm_d);*/
923 }
924 
925 
936 void ophPAS_GPU::RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float ** inRe, float ** inIm, fftw_complex * in, fftw_complex * out, fftw_plan * plan, double * pHologram, OphPointCloudConfig& conf)
937 {
938  int i, j;
939  int segx, segy; // coordinate in a Segment
940  int segxx, segyy;
941 
942  int cghWidth = getContext().pixel_number[_X];
943  int rows = m_segNumy;
944  int cols = m_segNumx;
945 
946 
947  for (segy = 0; segy < segnumy; segy++) {
948  for (segx = 0; segx < segnumx; segx++) {
949  segyy = segy * segnumx + segx;
950  memset(in, 0x00, sizeof(fftw_complex) * segsize * segsize);
951  for (i = 0; i < segsize; i++) {
952  for (j = 0; j < segsize; j++) {
953  segxx = i * segsize + j;
954  in[i*segsize + j][0] = m_inRe_h[segyy*segsize*segsize+segxx];
955  in[i*segsize + j][1] = m_inIm_h[segyy*segsize*segsize+segxx];
956  //inIm_h°ªÀÌ ´Ù¸§(x) ´Ù °°À½
957 
958  }
959  }
960  fftw_execute(*plan);
961  for (i = 0; i < segsize; i++) {
962  for (j = 0; j < segsize; j++) {
963  pHologram[(segy*segsize + i)*cghWidth + (segx*segsize + j)] = out[i * segsize + j][0];// - out[l * SEGSIZE + m][1];
964 
965  }
966  }
967  }
968 
969  }
970 
971 }
972 
973 
974 void ophPAS_GPU::encodeHologram(const vec2 band_limit, const vec2 spectrum_shift)
975 {
976  if (complex_H == nullptr) {
977  LOG("Not found diffracted data.");
978  return;
979  }
980 
981  LOG("Single Side Band Encoding..");
982  const uint nChannel = context_.waveNum;
983  const uint pnX = context_.pixel_number[_X];
984  const uint pnY = context_.pixel_number[_Y];
985  const Real ppX = context_.pixel_pitch[_X];
986  const Real ppY = context_.pixel_pitch[_Y];
987  const long long int pnXY = pnX * pnY;
988 
989  m_vecEncodeSize = ivec2(pnX, pnY);
990  context_.ss[_X] = pnX * ppX;
991  context_.ss[_Y] = pnY * ppY;
992  vec2 ss = context_.ss;
993 
994  Real cropx = floor(pnX * band_limit[_X]);
995  Real cropx1 = cropx - floor(cropx / 2);
996  Real cropx2 = cropx1 + cropx - 1;
997 
998  Real cropy = floor(pnY * band_limit[_Y]);
999  Real cropy1 = cropy - floor(cropy / 2);
1000  Real cropy2 = cropy1 + cropy - 1;
1001 
1002  Real* x_o = new Real[pnX];
1003  Real* y_o = new Real[pnY];
1004 
1005  for (uint i = 0; i < pnX; i++)
1006  x_o[i] = (-ss[_X] / 2) + (ppX * i) + (ppX / 2);
1007 
1008  for (uint i = 0; i < pnY; i++)
1009  y_o[i] = (ss[_Y] - ppY) - (ppY * i);
1010 
1011  Real* xx_o = new Real[pnXY];
1012  Real* yy_o = new Real[pnXY];
1013 
1014  for (uint i = 0; i < pnXY; i++)
1015  xx_o[i] = x_o[i % pnX];
1016 
1017 
1018  for (uint i = 0; i < pnX; i++)
1019  for (uint j = 0; j < pnY; j++)
1020  yy_o[i + j * pnX] = y_o[j];
1021 
1022  Complex<Real>* h = new Complex<Real>[pnXY];
1023 
1024  for (uint ch = 0; ch < nChannel; ch++) {
1025  fft2(complex_H[ch], h, pnX, pnY, OPH_FORWARD);
1026  fft2(ivec2(pnX, pnY), h, OPH_FORWARD);
1027  fftExecute(h);
1028  fft2(h, h, pnX, pnY, OPH_BACKWARD);
1029 
1030  fft2(h, h, pnX, pnY, OPH_FORWARD);
1031  fft2(ivec2(pnX, pnY), h, OPH_BACKWARD);
1032  fftExecute(h);
1033  fft2(h, h, pnX, pnY, OPH_BACKWARD);
1034 
1035  for (int i = 0; i < pnXY; i++) {
1036  Complex<Real> shift_phase(1.0, 0.0);
1037  int r = i / pnX;
1038  int c = i % pnX;
1039 
1040  Real X = (M_PI * xx_o[i] * spectrum_shift[_X]) / ppX;
1041  Real Y = (M_PI * yy_o[i] * spectrum_shift[_Y]) / ppY;
1042 
1043  shift_phase[_RE] = shift_phase[_RE] * (cos(X) * cos(Y) - sin(X) * sin(Y));
1044 
1045  m_lpEncoded[ch][i] = (h[i] * shift_phase).real();
1046  }
1047  }
1048  delete[] h;
1049  delete[] x_o;
1050  delete[] xx_o;
1051  delete[] y_o;
1052  delete[] yy_o;
1053 
1054  LOG("Done.\n");
1055 }
1056 
1064 void ophPAS_GPU::encoding(unsigned int ENCODE_FLAG)
1065 {
1067 }
1068 
1069 
1070 
1071 
1072 
int m_hsegNumy
Definition: ophPAS_GPU.h:103
ENCODE_FLAG
Definition: ophGen.h:84
void abs(const oph::Complex< T > &src, oph::Complex< T > &dst)
Definition: function.h:113
OphConfig & getContext(void)
Function for getting the current context.
Definition: Openholo.h:229
void generateHologram()
Definition: ophPAS_GPU.cpp:733
char * rtrim(char *s)
Definition: ophPAS_GPU.cpp:266
ivec3 se
Definition: ophPAS_GPU.h:131
float * m_COStbl
Definition: ophPAS_GPU.h:94
#define M_PI
Definition: define.h:52
Point point
Definition: struct.h:103
#define OPH_FORWARD
Definition: define.h:66
virtual bool saveAsImg(const char *fname, uint8_t bitsperpixel, uchar *src, int width, int height)
Function for creating image files.
Definition: Openholo.cpp:135
float * xc
Definition: ophPAS_GPU.h:26
void CalcCompensatedPhase(float cx, float cy, float cz, float amp, int segnumx, int segnumy, int segsize, int hsegsize, float sf_base, float *xc, float *yc, int *cf_cx, int *cf_cy, float *COStbl, float *SINtbl, float **inRe, float **inIm, OphPointCloudConfig &conf)
Definition: ophPAS_GPU.cpp:790
float * m_SFrequency_cx
Definition: ophPAS_GPU.h:105
vec3 shift
Definition: Openholo.h:102
double * m_pHologram
Definition: ophPAS_GPU.h:92
Real distance
Offset value of point cloud.
Definition: ophGen.h:559
void setPixelNumberOHC(const ivec2 pixel_number)
getter/setter for OHC file read and write
Definition: Openholo.h:501
float * m_inIm_h
Definition: ophPAS_GPU.h:124
int * cf_cx
Definition: ophPAS_GPU.h:24
unsigned char uchar
Definition: typedef.h:64
char * ltrim(char *s)
Definition: ophPAS_GPU.cpp:282
Real m_dFieldLength
Definition: ophGen.h:348
#define NUMTBL
Definition: ophACPAS.h:15
int m_segNumy
Definition: ophPAS_GPU.h:101
float * m_yc
Definition: ophPAS_GPU.h:112
void initialize(void)
Initialize variables for Hologram complex field, encoded data, normalized data.
Definition: ophGen.cpp:145
void setPixelPitchOHC(const vec2 pixel_pitch)
Definition: Openholo.h:504
int * m_Coefficient_cy
Definition: ophPAS_GPU.h:110
vec2 ss
Definition: Openholo.h:104
int m_hsegNumx
Definition: ophPAS_GPU.h:102
float Real
Definition: typedef.h:55
bool checkExtension(const char *fname, const char *ext)
Functions for extension checking.
Definition: Openholo.cpp:86
#define M2_PI
Definition: ophACPAS.h:10
virtual ~ophPAS_GPU()
Definition: ophPAS_GPU.cpp:30
structure for 2-dimensional integer vector and its arithmetic.
Definition: ivec.h:66
fftw_plan m_plan
Definition: ophPAS_GPU.h:119
h
Definition: Openholo.cpp:430
const XMLNode * FirstChild() const
Get the first child node, or null if none exists.
Definition: tinyxml2.h:761
char * trim(char *s)
Definition: ophPAS_GPU.cpp:301
int m_dsegSize
Definition: ophPAS_GPU.h:99
int save(const char *fname, uint8_t bitsperpixel, uchar *src, uint px, uint py)
Definition: ophPAS_GPU.cpp:175
void PAS(long voxelnum, OphPointCloudData *data, double *m_pHologram, OphPointCloudConfig &conf)
Definition: ophPAS_GPU.cpp:494
#define FFTW_BACKWARD
Definition: fftw3.h:380
bool rotate
Definition: Openholo.h:124
Real phase
Definition: struct.h:105
ImageConfig imgCfg
Definition: Openholo.h:487
void setWavelengthOHC(const Real wavelength, const LenUnit wavelength_unit)
Definition: Openholo.h:507
vec2 pixel_pitch
Definition: Openholo.h:101
int * m_PickPoint_cy
Definition: ophPAS_GPU.h:108
#define MAX_STR_LEN
Definition: ophACPAS.h:17
vec3 scale
Scaling factor of coordinate of point cloud.
Definition: ophGen.h:557
Real m_elapsedTime
Elapsed time of generate hologram.
Definition: ophGen.h:336
ivec2 m_vecEncodeSize
Encoded hologram size, varied from encoding type.
Definition: ophGen.h:330
float m_sf_base
Definition: ophPAS_GPU.h:115
#define _X
Definition: define.h:92
void MemoryRelease(void)
Definition: ophPAS_GPU.cpp:690
int * m_Coefficient_cx
Definition: ophPAS_GPU.h:109
void CalcSpatialFrequency(float cx, float cy, float cz, float amp, int segnumx, int segnumy, int segsize, int hsegsize, float sf_base, float *xc, float *yc, float *sf_cx, float *sf_cy, int *pp_cx, int *pp_cy, int *cf_cx, int *cf_cy, float xiint, float etaint, OphPointCloudConfig &conf)
Definition: ophPAS_GPU.cpp:756
float ** m_inIm
Definition: ophPAS_GPU.h:122
fftw_complex * m_in
Definition: ophPAS_GPU.h:118
#define FFT_SEGMENT_SIZE
Definition: ophPAS.h:19
float * yc
Definition: ophPAS_GPU.h:27
Real ** m_lpEncoded
buffer to encoded.
Definition: ophGen.h:338
void PASCalculation_GPU(long voxnum, unsigned char *cghfringe, OphPointCloudData *data, OphPointCloudConfig &conf)
Definition: ophPAS_GPU.cpp:746
#define _RE
Definition: complex.h:55
void fftExecute(Complex< Real > *out, bool bReverse=false)
Execution functions to be called after fft1, fft2, and fft3.
Definition: Openholo.cpp:623
void fft2(ivec2 n, Complex< Real > *in, int sign=OPH_FORWARD, uint flag=OPH_ESTIMATE)
Functions for performing fftw 2-dimension operations inside Openholo.
Definition: Openholo.cpp:559
int loadPoint(const char *_filename)
Definition: ophPAS_GPU.cpp:166
structure for 2-dimensional Real type vector and its arithmetic.
Definition: vec.h:66
uint waveNum
Definition: Openholo.h:105
void Amplitude(Complex< T > *holo, T *encoded, const int size)
unsigned char * cgh_fringe
Definition: ophPAS_GPU.h:113
float * sintbl
Definition: ophPAS_GPU.h:30
void encodeHologram(const vec2 band_limit, const vec2 spectrum_shift)
Definition: ophPAS_GPU.cpp:974
ivec2 pixel_number
Definition: Openholo.h:99
float ** m_inRe
Definition: ophPAS_GPU.h:121
float * m_inRe_h
Definition: ophPAS_GPU.h:123
#define _Y
Definition: define.h:96
XMLError LoadFile(const char *filename)
Definition: tinyxml2.cpp:2150
bool bUseDP
Definition: Openholo.h:98
Configuration for Point Cloud.
Definition: ophGen.h:555
void PASCalculation(long voxnum, unsigned char *cghfringe, OphPointCloudData *data, OphPointCloudConfig &conf)
Definition: ophPAS_GPU.cpp:373
Vertex * vertices
Data of point clouds.
Definition: ophGen.h:588
float * costbl
Definition: ophPAS_GPU.h:29
structure for 3-dimensional integer vector and its arithmetic.
Definition: ivec.h:386
Complex< Real > ** complex_H
Definition: Openholo.h:489
Data for Point Cloud.
Definition: ophGen.h:582
fftw_complex * m_out
Definition: ophPAS_GPU.h:118
float * m_SINtbl
Definition: ophPAS_GPU.h:95
uchar ** m_lpNormalized
buffer to normalized.
Definition: ophGen.h:340
void encoding()
Definition: ophGen.cpp:985
float * m_xc
Definition: ophPAS_GPU.h:111
int * m_PickPoint_cx
Definition: ophPAS_GPU.h:107
#define _Z
Definition: define.h:100
int m_nStream
Definition: ophGen.h:349
ImgEncoderOhc * OHC_encoder
OHC file format Variables for read and write.
Definition: Openholo.h:494
void cuda_Wrapper_phaseCalc(float *inRe, float *inIm, constValue val, float &cx, float &cy, float &cz, float &amp, ivec3 &seg)
OphConfig context_
Definition: Openholo.h:485
#define OPH_BACKWARD
Definition: define.h:67
Definition: Bitmap.h:49
float * m_SFrequency_cy
Definition: ophPAS_GPU.h:106
void DataInit(int segsize, int cghwidth, int cghheight, float xiinter, float etainter)
Definition: ophPAS_GPU.cpp:602
Real * wave_length
Definition: Openholo.h:106
int loadPointCloud(const char *pc_file, OphPointCloudData *pc_data_)
load to point cloud data.
Definition: ophGen.cpp:206
int * cf_cy
Definition: ophPAS_GPU.h:25
const XMLElement * FirstChildElement(const char *name=0) const
Definition: tinyxml2.cpp:940
void RunFFTW(int segnumx, int segnumy, int segsize, int hsegsize, float **inRe, float **inIm, fftw_complex *in, fftw_complex *out, fftw_plan *plan, double *pHologram, OphPointCloudConfig &conf)
Definition: ophPAS_GPU.cpp:936
unsigned int uint
Definition: typedef.h:62
#define CUR_TIME
Definition: function.h:58
bool readConfig(const char *fname)
Definition: ophPAS_GPU.cpp:43
bool merge
Definition: Openholo.h:125
int m_hsegSize
Definition: ophPAS_GPU.h:98
Definition: ophGen.h:76
int m_segSize
Definition: ophPAS_GPU.h:97
#define FFTW_ESTIMATE
Definition: fftw3.h:392
Real pos[3]
Definition: struct.h:87
int m_segNumx
Definition: ophPAS_GPU.h:100