00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "CUDA/CudaImage.H"
00039 #include "Util/Assert.H"
00040 #include "CUDA/cudadefs.h"
00041 #include "CudaMathOps.H"
00042 #include "CudaDevices.H"
00043 #include "wrap_c_cuda.h"
00044
00045
00046 void cudaGetMin(const CudaImage<float>& src, CudaImage<float>& minim, CudaImage<float> *buf)
00047 {
00048 MemoryPolicy mp = src.getMemoryPolicy();
00049 const int dev = src.getMemoryDevice();
00050
00051
00052 ASSERT(src.initialized());
00053
00054 ASSERT(mp != HOST_MEMORY);
00055
00056 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00057 CudaImage<float> tmp;
00058 if(minim.size() != 1 || minim.getMemoryDevice() != dev || minim.getMemoryPolicy() != mp)
00059 minim = CudaImage<float>(1,1,NO_INIT,mp, dev);
00060
00061 if(buf == 0)
00062 {
00063
00064 tmp = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev);
00065 buf = &tmp;
00066 }
00067
00068 cuda_c_getMin(src.getCudaArrayPtr(), minim.getCudaArrayPtr(), buf->getCudaArrayPtr(), tile.sz(), src.size());
00069
00070 }
00071
00072 void cudaGetMax(const CudaImage<float>& src, CudaImage<float>& maxim, CudaImage<float> *buf)
00073 {
00074 MemoryPolicy mp = src.getMemoryPolicy();
00075 const int dev = src.getMemoryDevice();
00076
00077
00078 ASSERT(src.initialized());
00079
00080 ASSERT(mp != HOST_MEMORY);
00081
00082 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00083 CudaImage<float> tmp;
00084 if(maxim.size() != 1 || maxim.getMemoryDevice() != dev || maxim.getMemoryPolicy() != mp)
00085 maxim = CudaImage<float>(1,1,NO_INIT,mp, dev);
00086
00087 if(buf == 0)
00088 {
00089
00090 tmp = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev);
00091 buf = &tmp;
00092 }
00093
00094
00095 cuda_c_getMax(src.getCudaArrayPtr(), maxim.getCudaArrayPtr(),buf->getCudaArrayPtr(), tile.sz(), src.size());
00096
00097 }
00098
00099 void cudaGetAvg(const CudaImage<float>& src, CudaImage<float>& avgim, CudaImage<float> *buf)
00100 {
00101 MemoryPolicy mp = src.getMemoryPolicy();
00102 const int dev = src.getMemoryDevice();
00103
00104
00105 ASSERT(src.initialized());
00106
00107 ASSERT(mp != HOST_MEMORY);
00108
00109 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00110 CudaImage<float> tmp;
00111 if(avgim.size() != 1 || avgim.getMemoryDevice() != dev || avgim.getMemoryPolicy() != mp)
00112 avgim = CudaImage<float>(1,1,NO_INIT,mp, dev);
00113
00114 if(buf == 0)
00115 {
00116
00117 tmp = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev);
00118 buf = &tmp;
00119 }
00120
00121
00122 cuda_c_getAvg(src.getCudaArrayPtr(), avgim.getCudaArrayPtr(), buf->getCudaArrayPtr(), tile.sz(), src.size());
00123
00124 }
00125
00126 CudaImage<float> cudaGetAvg(const CudaImage<float>& src)
00127 {
00128 MemoryPolicy mp = src.getMemoryPolicy();
00129 const int dev = src.getMemoryDevice();
00130
00131
00132 ASSERT(src.initialized());
00133
00134 ASSERT(mp != HOST_MEMORY);
00135
00136 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00137 CudaImage<float> avgim = CudaImage<float>(1,1,NO_INIT,mp, dev);
00138
00139
00140 CudaImage<float> buf = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev);
00141
00142
00143 cuda_c_getAvg(src.getCudaArrayPtr(), avgim.getCudaArrayPtr(), buf.getCudaArrayPtr(), tile.sz(), src.size());
00144 return avgim;
00145 }
00146
00147
00148 CudaImage<float> cudaGetSum(const CudaImage<float>& src)
00149 {
00150 MemoryPolicy mp = src.getMemoryPolicy();
00151 const int dev = src.getMemoryDevice();
00152
00153
00154 ASSERT(src.initialized());
00155
00156 ASSERT(mp != HOST_MEMORY);
00157
00158 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00159 CudaImage<float> sumim = CudaImage<float>(1,1,NO_INIT,mp, dev);
00160
00161
00162 CudaImage<float> buf = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev);
00163
00164
00165 cuda_c_getSum(src.getCudaArrayPtr(), sumim.getCudaArrayPtr(), buf.getCudaArrayPtr(), tile.sz(), src.size());
00166 return sumim;
00167 }
00168
00169 CudaImage<float> cudaSquared(const CudaImage<float>& src)
00170 {
00171 MemoryPolicy mp = src.getMemoryPolicy();
00172 const int dev = src.getMemoryDevice();
00173
00174
00175 ASSERT(src.initialized());
00176
00177 ASSERT(mp != HOST_MEMORY);
00178
00179 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00180 CudaImage<float> res = CudaImage<float>(src.getDims(),NO_INIT,mp,dev);
00181
00182
00183 cuda_c_squared(src.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), src.size());
00184 return res;
00185 }
00186
00187 CudaImage<float> cudaSqrt(const CudaImage<float>& src)
00188 {
00189 MemoryPolicy mp = src.getMemoryPolicy();
00190 const int dev = src.getMemoryDevice();
00191
00192
00193 ASSERT(src.initialized());
00194
00195 ASSERT(mp != HOST_MEMORY);
00196
00197 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00198 CudaImage<float> res = CudaImage<float>(src.getDims(),NO_INIT,mp,dev);
00199
00200
00201 cuda_c_sqrt(src.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), src.size());
00202 return res;
00203 }
00204
00205 void cudaGetMinMax(const CudaImage<float>& src, CudaImage<float>& minim, CudaImage<float>& maxim, CudaImage<float> *buf)
00206 {
00207 const MemoryPolicy mp = src.getMemoryPolicy();
00208 const int dev = src.getMemoryDevice();
00209
00210 ASSERT(src.initialized());
00211 ASSERT(mp != HOST_MEMORY);
00212 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00213 CudaImage<float> tmp;
00214 if(buf == 0)
00215 {
00216
00217 tmp = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, mp, dev);
00218 buf = &tmp;
00219 }
00220 cudaGetMin(src,minim,buf);
00221 cudaGetMax(src,maxim,buf);
00222 }
00223
00224
00225 void cudaGetMinMaxAvg(const CudaImage<float>& src, CudaImage<float>& minim, CudaImage<float>& maxim, CudaImage<float>& avgim, CudaImage<float> *buf)
00226 {
00227 const int dev = src.getMemoryDevice();
00228 const MemoryPolicy mp = src.getMemoryPolicy();
00229
00230 ASSERT(src.initialized());
00231 ASSERT(mp != HOST_MEMORY);
00232
00233 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00234 CudaImage<float> tmp;
00235 if(buf == 0)
00236 {
00237
00238 tmp = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, mp, dev);
00239 buf = &tmp;
00240 }
00241 cudaGetMin(src,minim,buf);
00242 cudaGetMax(src,maxim,buf);
00243 cudaGetAvg(src,avgim,buf);
00244 }
00245
00246
00247 template <class T> T cudaGetScalar(const CudaImage<T>& src)
00248 {
00249 ASSERT(src.size() == 1);
00250 Image<T> im = src.exportToImage();
00251 return im.getVal(0,0);
00252 }
00253
00254 template float cudaGetScalar(const CudaImage<float>& src);
00255 template PixRGB<float> cudaGetScalar(const CudaImage<PixRGB<float> >& src);
00256 template int cudaGetScalar(const CudaImage<int>& src);
00257
00258 void cudaFindMin(const CudaImage<float>& src, Point2D<int>& p, float& val)
00259 {
00260 const int dev = src.getMemoryDevice();
00261 const MemoryPolicy mp = src.getMemoryPolicy();
00262 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00263
00264 ASSERT(src.initialized());
00265 ASSERT(mp != HOST_MEMORY);
00266 CudaImage<int> tmp = CudaImage<int>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, mp, dev);
00267 CudaImage<float> buf = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev);
00268 cuda_c_findMin(src.getCudaArrayPtr(),buf.getCudaArrayPtr(),tmp.getCudaArrayPtr(),tile.sz(),src.size());
00269 Image<int> resLoc = tmp.exportToImage();
00270 Image<float> res = buf.exportToImage();
00271 int idx = resLoc.getVal(0,0);
00272 val = res.getVal(0,0);
00273 int x,y;
00274 y = idx / src.getWidth();
00275 x = idx % src.getWidth();
00276 p = Point2D<int>(x,y);
00277 }
00278
00279 void cudaFindMax(const CudaImage<float>& src, Point2D<int>& p, float& val)
00280 {
00281 const int dev = src.getMemoryDevice();
00282 const MemoryPolicy mp = src.getMemoryPolicy();
00283 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00284
00285 ASSERT(src.initialized());
00286 ASSERT(mp != HOST_MEMORY);
00287 CudaImage<int> tmp = CudaImage<int>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, mp, dev);
00288 CudaImage<float> buf = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev);
00289 cuda_c_findMax(src.getCudaArrayPtr(),buf.getCudaArrayPtr(),tmp.getCudaArrayPtr(),tile.sz(),src.size());
00290 Image<int> resLoc = tmp.exportToImage();
00291 Image<float> res = buf.exportToImage();
00292 int idx = resLoc.getVal(0,0);
00293 val = res.getVal(0,0);
00294 int x,y;
00295 y = idx / src.getWidth();
00296 x = idx % src.getWidth();
00297 p = Point2D<int>(x,y);
00298 }
00299
00300
00301
00302 void cudaInplaceNormalize(CudaImage<float>& dst, const float nmin, const float nmax)
00303 {
00304 ASSERT(dst.initialized());
00305 const int dev = dst.getMemoryDevice();
00306 const MemoryPolicy mp = dst.getMemoryPolicy();
00307 ASSERT(mp != HOST_MEMORY);
00308 if (!dst.initialized()) return;
00309 CudaImage<float> oldmin,oldmax;
00310 cudaGetMin(dst, oldmin);
00311 cudaGetMax(dst, oldmax);
00312 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00313 cuda_c_inplaceNormalize(dst.getCudaArrayPtr(), oldmin.getCudaArrayPtr(), oldmax.getCudaArrayPtr(), nmin, nmax,tile.sz(),dst.size());
00314 }
00315
00316 void cudaInplaceRectify(CudaImage<float>& dst)
00317 {
00318 ASSERT(dst.initialized());
00319 if (!dst.initialized()) return;
00320 const int dev = dst.getMemoryDevice();
00321 const MemoryPolicy mp = dst.getMemoryPolicy();
00322 ASSERT(mp != HOST_MEMORY);
00323 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00324 cuda_c_inplaceRectify(dst.getCudaArrayPtr(), tile.sz(),dst.size());
00325 }
00326
00327 void cudaInplaceClamp(CudaImage<float>& dst, const float cmin, const float cmax)
00328 {
00329 ASSERT(dst.initialized());
00330 if (!dst.initialized()) return;
00331 const int dev = dst.getMemoryDevice();
00332 const MemoryPolicy mp = dst.getMemoryPolicy();
00333 ASSERT(mp != HOST_MEMORY);
00334 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00335 cuda_c_inplaceClamp(dst.getCudaArrayPtr(),cmin,cmax,tile.sz(),dst.size());
00336 }
00337
00338 void cudaClear(CudaImage<float>& dst, const float val)
00339 {
00340 const int dev = dst.getMemoryDevice();
00341 const MemoryPolicy mp = dst.getMemoryPolicy();
00342 ASSERT(mp != HOST_MEMORY);
00343 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00344 cuda_c_clear(dst.getCudaArrayPtr(),val,tile.sz(),dst.size());
00345 }
00346
00347 void cudaAbs(CudaImage<float>& src)
00348 {
00349 const int dev = src.getMemoryDevice();
00350 const MemoryPolicy mp = src.getMemoryPolicy();
00351 ASSERT(mp != HOST_MEMORY);
00352 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00353 cuda_c_abs(src.getCudaArrayPtr(),tile.sz(),src.size());
00354 }
00355
00356 void cudaInplaceAddScalar(CudaImage<float>& dst, const CudaImage<float>& offset)
00357 {
00358 ASSERT(dst.initialized());
00359 const MemoryPolicy mp = dst.getMemoryPolicy();
00360 ASSERT(mp != HOST_MEMORY);
00361 const int dev = dst.getMemoryDevice();
00362 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00363 cuda_c_inplaceAddScalar(dst.getCudaArrayPtr(), offset.getCudaArrayPtr(), tile.sz(), dst.size());
00364 }
00365
00366 void cudaInplaceSubtractScalar(CudaImage<float>& dst, const CudaImage<float>& offset)
00367 {
00368 ASSERT(dst.initialized());
00369 const int dev = dst.getMemoryDevice();
00370 const MemoryPolicy mp = dst.getMemoryPolicy();
00371 ASSERT(mp != HOST_MEMORY);
00372 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00373 cuda_c_inplaceSubtractScalar(dst.getCudaArrayPtr(), offset.getCudaArrayPtr(), tile.sz(), dst.size());
00374 }
00375
00376 void cudaInplaceMultiplyScalar(CudaImage<float>& dst, const CudaImage<float>& offset)
00377 {
00378 ASSERT(dst.initialized());
00379 const int dev = dst.getMemoryDevice();
00380 const MemoryPolicy mp = dst.getMemoryPolicy();
00381 ASSERT(mp != HOST_MEMORY);
00382 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00383 cuda_c_inplaceMultiplyScalar(dst.getCudaArrayPtr(), offset.getCudaArrayPtr(), tile.sz(), dst.size());
00384 }
00385
00386 void cudaInplaceDivideScalar(CudaImage<float>& dst, const CudaImage<float>& offset)
00387 {
00388 ASSERT(dst.initialized());
00389 const int dev = dst.getMemoryDevice();
00390 const MemoryPolicy mp = dst.getMemoryPolicy();
00391 ASSERT(mp != HOST_MEMORY);
00392 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00393 cuda_c_inplaceDivideScalar(dst.getCudaArrayPtr(), offset.getCudaArrayPtr(), tile.sz(), dst.size());
00394 }
00395
00396 void cudaInplaceAddImages(CudaImage<float>& im1, const CudaImage<float>& im2)
00397 {
00398 ASSERT(im1.initialized() && im2.initialized());
00399 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice());
00400 ASSERT(im1.size() == im2.size());
00401 const int dev = im1.getMemoryDevice();
00402 const MemoryPolicy mp = im1.getMemoryPolicy();
00403 ASSERT(mp != HOST_MEMORY);
00404 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00405 cuda_c_inplaceAddImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), tile.sz(), im1.size());
00406 }
00407
00408 void cudaInplaceSubtractImages(CudaImage<float>& im1, const CudaImage<float>& im2)
00409 {
00410 ASSERT(im1.initialized() && im2.initialized());
00411 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice());
00412 ASSERT(im1.size() == im2.size());
00413 const int dev = im1.getMemoryDevice();
00414 const MemoryPolicy mp = im1.getMemoryPolicy();
00415 ASSERT(mp != HOST_MEMORY);
00416 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00417 cuda_c_inplaceSubtractImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), tile.sz(), im1.size());
00418 }
00419
00420 void cudaInplaceMultiplyImages(CudaImage<float>& im1, const CudaImage<float>& im2)
00421 {
00422 ASSERT(im1.initialized() && im2.initialized());
00423 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice());
00424 ASSERT(im1.size() == im2.size());
00425 const int dev = im1.getMemoryDevice();
00426 const MemoryPolicy mp = im1.getMemoryPolicy();
00427 ASSERT(mp != HOST_MEMORY);
00428 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00429 cuda_c_inplaceMultiplyImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), tile.sz(), im1.size());
00430 }
00431
00432 void cudaInplaceDivideImages(CudaImage<float>& im1, const CudaImage<float>& im2)
00433 {
00434 ASSERT(im1.initialized() && im2.initialized());
00435 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice());
00436 ASSERT(im1.size() == im2.size());
00437 const int dev = im1.getMemoryDevice();
00438 const MemoryPolicy mp = im1.getMemoryPolicy();
00439 ASSERT(mp != HOST_MEMORY);
00440 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00441 cuda_c_inplaceDivideImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), tile.sz(), im1.size());
00442 }
00443
00444 CudaImage<float> cudaAddImages(const CudaImage<float>& im1, const CudaImage<float>& im2)
00445 {
00446 ASSERT(im1.initialized() && im2.initialized());
00447 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice());
00448 ASSERT(im1.size() == im2.size());
00449 MemoryPolicy mp = im1.getMemoryPolicy();
00450 const int dev = im1.getMemoryDevice();
00451 ASSERT(mp != HOST_MEMORY);
00452 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00453 CudaImage<float> res = CudaImage<float>(im1.getDims(),NO_INIT,mp,dev);
00454 cuda_c_addImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), im1.size());
00455 return res;
00456 }
00457
00458 CudaImage<float> cudaSubtractImages(const CudaImage<float>& im1, const CudaImage<float>& im2)
00459 {
00460 ASSERT(im1.initialized() && im2.initialized());
00461 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice());
00462 ASSERT(im1.size() == im2.size());
00463 MemoryPolicy mp = im1.getMemoryPolicy();
00464 const int dev = im1.getMemoryDevice();
00465 ASSERT(mp != HOST_MEMORY);
00466 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00467 CudaImage<float> res = CudaImage<float>(im1.getDims(),NO_INIT,mp,dev);
00468 cuda_c_subtractImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), im1.size());
00469 return res;
00470 }
00471
00472 CudaImage<float> cudaMultiplyImages(const CudaImage<float>& im1, const CudaImage<float>& im2)
00473 {
00474 ASSERT(im1.initialized() && im2.initialized());
00475 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice());
00476 ASSERT(im1.size() == im2.size());
00477 MemoryPolicy mp = im1.getMemoryPolicy();
00478 const int dev = im1.getMemoryDevice();
00479 ASSERT(mp != HOST_MEMORY);
00480 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00481 CudaImage<float> res = CudaImage<float>(im1.getDims(),NO_INIT,mp,dev);
00482 cuda_c_multiplyImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), im1.size());
00483 return res;
00484 }
00485
00486 CudaImage<float> cudaDivideImages(const CudaImage<float>& im1, const CudaImage<float>& im2)
00487 {
00488 ASSERT(im1.initialized() && im2.initialized());
00489 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice());
00490 ASSERT(im1.size() == im2.size());
00491 MemoryPolicy mp = im1.getMemoryPolicy();
00492 const int dev = im1.getMemoryDevice();
00493 ASSERT(mp != HOST_MEMORY);
00494 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00495 CudaImage<float> res = CudaImage<float>(im1.getDims(),NO_INIT,mp,dev);
00496 cuda_c_divideImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), im1.size());
00497 return res;
00498 }
00499
00500 CudaImage<float> cudaTakeMax(const CudaImage<float>& im1, const CudaImage<float>& im2)
00501 {
00502 ASSERT(im1.initialized() && im2.initialized());
00503 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice());
00504 ASSERT(im1.size() == im2.size());
00505 MemoryPolicy mp = im1.getMemoryPolicy();
00506 const int dev = im1.getMemoryDevice();
00507 ASSERT(mp != HOST_MEMORY);
00508 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00509 CudaImage<float> res = CudaImage<float>(im1.getDims(),NO_INIT,mp,dev);
00510 cuda_c_takeMax(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), im1.size());
00511 return res;
00512 }
00513
00514
00515
00516 CudaImage<float> cudaQuadEnergy(const CudaImage<float>& real, const CudaImage<float>& imag)
00517 {
00518 ASSERT(real.initialized() && imag.initialized());
00519 ASSERT(real.getMemoryDevice() == imag.getMemoryDevice());
00520 ASSERT(real.isSameSize(imag));
00521 MemoryPolicy mp = real.getMemoryPolicy();
00522 const int dev = real.getMemoryDevice();
00523 ASSERT(mp != HOST_MEMORY);
00524 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00525 CudaImage<float> out(real.getDims(), NO_INIT,mp,dev);
00526 cuda_c_quadEnergy(real.getCudaArrayPtr(), imag.getCudaArrayPtr(), out.getCudaArrayPtr(), tile.sz(), real.size());
00527
00528 return out;
00529 }
00530
00531
00532 void cudaInplaceAttenuateBorders(CudaImage<float>& a, int size)
00533 {
00534 ASSERT(a.initialized());
00535
00536 Dims dims = a.getDims();
00537
00538 if (size * 2 > dims.w()) size = dims.w() / 2;
00539 if (size * 2 > dims.h()) size = dims.h() / 2;
00540 if (size < 1) return;
00541 const int dev = a.getMemoryDevice();
00542 MemoryPolicy mp = a.getMemoryPolicy();
00543 ASSERT(mp != HOST_MEMORY);
00544 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00545
00546 cuda_c_inplaceAttenuateBorders(a.getCudaArrayPtr(), size, tile.sz(), a.getWidth(), a.getHeight());
00547 }
00548
00549