51 LOG(
"%s\n", __FUNCTION__);
52 LOG(
"\tMemory Allocation : ");
60 const int N = pnX * pnY;
69 const Real simGap = (simStep > 1) ? (simTo - simFrom) / (simStep - 1) : 0;
71 const Real tx = 1 / ppX;
72 const Real ty = 1 / ppY;
73 const Real dx = tx / pnX;
74 const Real dy = ty / pnY;
76 const Real htx = tx / 2;
77 const Real hty = ty / 2;
78 const Real hdx = dx / 2;
79 const Real hdy = dy / 2;
80 const Real baseX = -htx + hdx;
81 const Real baseY = -hty + hdy;
88 cuDoubleComplex *device_src;
89 cuDoubleComplex *device_dst;
91 HANDLE_ERROR(cudaMalloc((
void**)&device_src, N *
sizeof(cuDoubleComplex)));
92 HANDLE_ERROR(cudaMalloc((
void**)&device_dst, N *
sizeof(cuDoubleComplex)));
95 bool bRandomPhase =
true;
97 int nBlock = (N + nThread - 1) / nThread;
104 unsigned char* nppMaxBuffer;
106 nppsSumGetBufferSize_64f(N, &nBuffer);
107 HANDLE_ERROR(cudaMalloc((
void**)&nppMaxBuffer, nBuffer));
111 for (
int step = 0; step < simStep; step++)
115 for (
uint ch = 0; ch < nChannel; ch++)
121 nChannel, 1, pnX, pnY, ppX, ppY,
122 simFrom + (step * simGap), k, lambda, bRandomPhase
128 cudaASMPropagation(nBlock, nThread, pnX, pnY, device_src, device_dst, device_encode, device_config);
132 HANDLE_ERROR(cudaMemcpy(encode, device_encode,
sizeof(
Real) * N, cudaMemcpyDeviceToHost));
136 nppsMax_64f(device_encode, N, max_device, nppMaxBuffer);
137 nppsMin_64f(device_encode, N, min_device, nppMaxBuffer);
139 cudaMemcpy(&locmax, max_device,
sizeof(
Real), cudaMemcpyDeviceToHost);
140 cudaMemcpy(&locmin, min_device,
sizeof(
Real), cudaMemcpyDeviceToHost);
142 if (min > locmin) min = locmin;
143 if (max < locmax) max = locmax;
145 m_vecEncoded.push_back(encode);
146 m_vecNormalized.push_back(normal);
151 LOG(
"step: %d => max: %e / min: %e\n", step, max, min);
154 for (
int ch = 0; ch < nWave; ch++)
156 int idx = step * nWave + ch;
157 normalize(m_vecEncoded[idx], m_vecNormalized[idx], pnX, pnY, max, min);
161 normalize(m_vecEncoded[step], m_vecNormalized[step], pnX, pnY);
#define HANDLE_ERROR(err)
void cudaASMPropagation(const int &nBlocks, const int &nThreads, const int &nx, const int &ny, cuDoubleComplex *src, cuDoubleComplex *dst, Real *encode, const RecGpuConst *cuda_config)
#define ELAPSED_TIME(x, y)
void ASM_Propagation_GPU()
Complex< Real > ** complex_H
void normalize(T *src, uchar *dst, int x, int y)
struct KernelConst RecGpuConst