00001 /*!@file CUDA/CudaMathOps.C C++ wrapper for CUDA Math operations */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/CudaMathOps.C $ 00035 // $Id: CudaMathOps.C 12962 2010-03-06 02:13:53Z irock $ 00036 // 00037 00038 #include "CUDA/CudaImage.H" 00039 #include "Util/Assert.H" 00040 #include "CUDA/cudadefs.h" 00041 #include "CudaMathOps.H" 00042 #include "CudaDevices.H" 00043 #include "wrap_c_cuda.h" 00044 00045 00046 void cudaGetMin(const CudaImage<float>& src, CudaImage<float>& minim, CudaImage<float> *buf) 00047 { 00048 MemoryPolicy mp = src.getMemoryPolicy(); 00049 const int dev = src.getMemoryDevice(); 00050 00051 // Ensure that the data is valid 00052 ASSERT(src.initialized()); 00053 // Ensure that we are on a CUDA device 00054 ASSERT(mp != HOST_MEMORY); 00055 00056 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00057 CudaImage<float> tmp; 00058 if(minim.size() != 1 || minim.getMemoryDevice() != dev || minim.getMemoryPolicy() != mp) 00059 minim = CudaImage<float>(1,1,NO_INIT,mp, dev); 00060 00061 if(buf == 0) 00062 { 00063 // Set up output image memory 00064 tmp = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev); 00065 buf = &tmp; 00066 } 00067 // Call CUDA implementation 00068 cuda_c_getMin(src.getCudaArrayPtr(), minim.getCudaArrayPtr(), buf->getCudaArrayPtr(), tile.sz(), src.size()); 00069 00070 } 00071 00072 void cudaGetMax(const CudaImage<float>& src, CudaImage<float>& maxim, CudaImage<float> *buf) 00073 { 00074 MemoryPolicy mp = src.getMemoryPolicy(); 00075 const int dev = src.getMemoryDevice(); 00076 00077 // Ensure that the data is valid 00078 ASSERT(src.initialized()); 00079 // Ensure that we are on a CUDA device 00080 ASSERT(mp != HOST_MEMORY); 00081 00082 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00083 CudaImage<float> tmp; 00084 if(maxim.size() != 1 || maxim.getMemoryDevice() != dev || maxim.getMemoryPolicy() != mp) 00085 maxim = CudaImage<float>(1,1,NO_INIT,mp, dev); 00086 00087 if(buf == 0) 00088 { 00089 // Set up output image memory 00090 tmp = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev); 00091 buf = &tmp; 00092 } 00093 00094 // Call CUDA implementation 00095 cuda_c_getMax(src.getCudaArrayPtr(), maxim.getCudaArrayPtr(),buf->getCudaArrayPtr(), tile.sz(), src.size()); 00096 00097 } 00098 00099 void cudaGetAvg(const CudaImage<float>& src, CudaImage<float>& avgim, CudaImage<float> *buf) 00100 { 00101 MemoryPolicy mp = src.getMemoryPolicy(); 00102 const int dev = src.getMemoryDevice(); 00103 00104 // Ensure that the data is valid 00105 ASSERT(src.initialized()); 00106 // Ensure that we are on a CUDA device 00107 ASSERT(mp != HOST_MEMORY); 00108 00109 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00110 CudaImage<float> tmp; 00111 if(avgim.size() != 1 || avgim.getMemoryDevice() != dev || avgim.getMemoryPolicy() != mp) 00112 avgim = CudaImage<float>(1,1,NO_INIT,mp, dev); 00113 00114 if(buf == 0) 00115 { 00116 // Set up output image memory 00117 tmp = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev); 00118 buf = &tmp; 00119 } 00120 00121 // Call CUDA implementation 00122 cuda_c_getAvg(src.getCudaArrayPtr(), avgim.getCudaArrayPtr(), buf->getCudaArrayPtr(), tile.sz(), src.size()); 00123 00124 } 00125 00126 CudaImage<float> cudaGetAvg(const CudaImage<float>& src) 00127 { 00128 MemoryPolicy mp = src.getMemoryPolicy(); 00129 const int dev = src.getMemoryDevice(); 00130 00131 // Ensure that the data is valid 00132 ASSERT(src.initialized()); 00133 // Ensure that we are on a CUDA device 00134 ASSERT(mp != HOST_MEMORY); 00135 00136 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00137 CudaImage<float> avgim = CudaImage<float>(1,1,NO_INIT,mp, dev); 00138 00139 // Set up output image memory 00140 CudaImage<float> buf = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev); 00141 00142 // Call CUDA implementation 00143 cuda_c_getAvg(src.getCudaArrayPtr(), avgim.getCudaArrayPtr(), buf.getCudaArrayPtr(), tile.sz(), src.size()); 00144 return avgim; 00145 } 00146 00147 00148 CudaImage<float> cudaGetSum(const CudaImage<float>& src) 00149 { 00150 MemoryPolicy mp = src.getMemoryPolicy(); 00151 const int dev = src.getMemoryDevice(); 00152 00153 // Ensure that the data is valid 00154 ASSERT(src.initialized()); 00155 // Ensure that we are on a CUDA device 00156 ASSERT(mp != HOST_MEMORY); 00157 00158 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00159 CudaImage<float> sumim = CudaImage<float>(1,1,NO_INIT,mp, dev); 00160 00161 // Set up output image memory 00162 CudaImage<float> buf = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev); 00163 00164 // Call CUDA implementation 00165 cuda_c_getSum(src.getCudaArrayPtr(), sumim.getCudaArrayPtr(), buf.getCudaArrayPtr(), tile.sz(), src.size()); 00166 return sumim; 00167 } 00168 00169 CudaImage<float> cudaSquared(const CudaImage<float>& src) 00170 { 00171 MemoryPolicy mp = src.getMemoryPolicy(); 00172 const int dev = src.getMemoryDevice(); 00173 00174 // Ensure that the data is valid 00175 ASSERT(src.initialized()); 00176 // Ensure that we are on a CUDA device 00177 ASSERT(mp != HOST_MEMORY); 00178 00179 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00180 CudaImage<float> res = CudaImage<float>(src.getDims(),NO_INIT,mp,dev); 00181 00182 // Call CUDA implementation 00183 cuda_c_squared(src.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), src.size()); 00184 return res; 00185 } 00186 00187 CudaImage<float> cudaSqrt(const CudaImage<float>& src) 00188 { 00189 MemoryPolicy mp = src.getMemoryPolicy(); 00190 const int dev = src.getMemoryDevice(); 00191 00192 // Ensure that the data is valid 00193 ASSERT(src.initialized()); 00194 // Ensure that we are on a CUDA device 00195 ASSERT(mp != HOST_MEMORY); 00196 00197 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00198 CudaImage<float> res = CudaImage<float>(src.getDims(),NO_INIT,mp,dev); 00199 00200 // Call CUDA implementation 00201 cuda_c_sqrt(src.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), src.size()); 00202 return res; 00203 } 00204 00205 void cudaGetMinMax(const CudaImage<float>& src, CudaImage<float>& minim, CudaImage<float>& maxim, CudaImage<float> *buf) 00206 { 00207 const MemoryPolicy mp = src.getMemoryPolicy(); 00208 const int dev = src.getMemoryDevice(); 00209 // Ensure that the data is valid 00210 ASSERT(src.initialized()); 00211 ASSERT(mp != HOST_MEMORY); 00212 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00213 CudaImage<float> tmp; 00214 if(buf == 0) 00215 { 00216 // Save time by only allocating mem once 00217 tmp = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, mp, dev); 00218 buf = &tmp; 00219 } 00220 cudaGetMin(src,minim,buf); 00221 cudaGetMax(src,maxim,buf); 00222 } 00223 00224 00225 void cudaGetMinMaxAvg(const CudaImage<float>& src, CudaImage<float>& minim, CudaImage<float>& maxim, CudaImage<float>& avgim, CudaImage<float> *buf) 00226 { 00227 const int dev = src.getMemoryDevice(); 00228 const MemoryPolicy mp = src.getMemoryPolicy(); 00229 // Ensure that the data is valid 00230 ASSERT(src.initialized()); 00231 ASSERT(mp != HOST_MEMORY); 00232 00233 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00234 CudaImage<float> tmp; 00235 if(buf == 0) 00236 { 00237 // Save time by only allocating mem once 00238 tmp = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, mp, dev); 00239 buf = &tmp; 00240 } 00241 cudaGetMin(src,minim,buf); 00242 cudaGetMax(src,maxim,buf); 00243 cudaGetAvg(src,avgim,buf); 00244 } 00245 00246 // Extract a single value from a 1x1 CudaImage 00247 template <class T> T cudaGetScalar(const CudaImage<T>& src) 00248 { 00249 ASSERT(src.size() == 1); 00250 Image<T> im = src.exportToImage(); 00251 return im.getVal(0,0); 00252 } 00253 00254 template float cudaGetScalar(const CudaImage<float>& src); 00255 template PixRGB<float> cudaGetScalar(const CudaImage<PixRGB<float> >& src); 00256 template int cudaGetScalar(const CudaImage<int>& src); 00257 00258 void cudaFindMin(const CudaImage<float>& src, Point2D<int>& p, float& val) 00259 { 00260 const int dev = src.getMemoryDevice(); 00261 const MemoryPolicy mp = src.getMemoryPolicy(); 00262 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00263 // Ensure that the data is valid 00264 ASSERT(src.initialized()); 00265 ASSERT(mp != HOST_MEMORY); 00266 CudaImage<int> tmp = CudaImage<int>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, mp, dev); 00267 CudaImage<float> buf = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev); 00268 cuda_c_findMin(src.getCudaArrayPtr(),buf.getCudaArrayPtr(),tmp.getCudaArrayPtr(),tile.sz(),src.size()); 00269 Image<int> resLoc = tmp.exportToImage(); 00270 Image<float> res = buf.exportToImage(); 00271 int idx = resLoc.getVal(0,0); 00272 val = res.getVal(0,0); 00273 int x,y; 00274 y = idx / src.getWidth(); 00275 x = idx % src.getWidth(); 00276 p = Point2D<int>(x,y); 00277 } 00278 00279 void cudaFindMax(const CudaImage<float>& src, Point2D<int>& p, float& val) 00280 { 00281 const int dev = src.getMemoryDevice(); 00282 const MemoryPolicy mp = src.getMemoryPolicy(); 00283 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00284 // Ensure that the data is valid 00285 ASSERT(src.initialized()); 00286 ASSERT(mp != HOST_MEMORY); 00287 CudaImage<int> tmp = CudaImage<int>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, mp, dev); 00288 CudaImage<float> buf = CudaImage<float>(Dims(iDivUp(src.size(),tile.sz()),1), NO_INIT, src.getMemoryPolicy(), dev); 00289 cuda_c_findMax(src.getCudaArrayPtr(),buf.getCudaArrayPtr(),tmp.getCudaArrayPtr(),tile.sz(),src.size()); 00290 Image<int> resLoc = tmp.exportToImage(); 00291 Image<float> res = buf.exportToImage(); 00292 int idx = resLoc.getVal(0,0); 00293 val = res.getVal(0,0); 00294 int x,y; 00295 y = idx / src.getWidth(); 00296 x = idx % src.getWidth(); 00297 p = Point2D<int>(x,y); 00298 } 00299 00300 00301 // ###################################################################### 00302 void cudaInplaceNormalize(CudaImage<float>& dst, const float nmin, const float nmax) 00303 { 00304 ASSERT(dst.initialized()); 00305 const int dev = dst.getMemoryDevice(); 00306 const MemoryPolicy mp = dst.getMemoryPolicy(); 00307 ASSERT(mp != HOST_MEMORY); 00308 if (!dst.initialized()) return; 00309 CudaImage<float> oldmin,oldmax; 00310 cudaGetMin(dst, oldmin); 00311 cudaGetMax(dst, oldmax); 00312 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00313 cuda_c_inplaceNormalize(dst.getCudaArrayPtr(), oldmin.getCudaArrayPtr(), oldmax.getCudaArrayPtr(), nmin, nmax,tile.sz(),dst.size()); 00314 } 00315 00316 void cudaInplaceRectify(CudaImage<float>& dst) 00317 { 00318 ASSERT(dst.initialized()); 00319 if (!dst.initialized()) return; 00320 const int dev = dst.getMemoryDevice(); 00321 const MemoryPolicy mp = dst.getMemoryPolicy(); 00322 ASSERT(mp != HOST_MEMORY); 00323 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00324 cuda_c_inplaceRectify(dst.getCudaArrayPtr(), tile.sz(),dst.size()); 00325 } 00326 00327 void cudaInplaceClamp(CudaImage<float>& dst, const float cmin, const float cmax) 00328 { 00329 ASSERT(dst.initialized()); 00330 if (!dst.initialized()) return; 00331 const int dev = dst.getMemoryDevice(); 00332 const MemoryPolicy mp = dst.getMemoryPolicy(); 00333 ASSERT(mp != HOST_MEMORY); 00334 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00335 cuda_c_inplaceClamp(dst.getCudaArrayPtr(),cmin,cmax,tile.sz(),dst.size()); 00336 } 00337 00338 void cudaClear(CudaImage<float>& dst, const float val) 00339 { 00340 const int dev = dst.getMemoryDevice(); 00341 const MemoryPolicy mp = dst.getMemoryPolicy(); 00342 ASSERT(mp != HOST_MEMORY); 00343 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00344 cuda_c_clear(dst.getCudaArrayPtr(),val,tile.sz(),dst.size()); 00345 } 00346 00347 void cudaAbs(CudaImage<float>& src) 00348 { 00349 const int dev = src.getMemoryDevice(); 00350 const MemoryPolicy mp = src.getMemoryPolicy(); 00351 ASSERT(mp != HOST_MEMORY); 00352 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00353 cuda_c_abs(src.getCudaArrayPtr(),tile.sz(),src.size()); 00354 } 00355 00356 void cudaInplaceAddScalar(CudaImage<float>& dst, const CudaImage<float>& offset) 00357 { 00358 ASSERT(dst.initialized()); 00359 const MemoryPolicy mp = dst.getMemoryPolicy(); 00360 ASSERT(mp != HOST_MEMORY); 00361 const int dev = dst.getMemoryDevice(); 00362 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00363 cuda_c_inplaceAddScalar(dst.getCudaArrayPtr(), offset.getCudaArrayPtr(), tile.sz(), dst.size()); 00364 } 00365 00366 void cudaInplaceSubtractScalar(CudaImage<float>& dst, const CudaImage<float>& offset) 00367 { 00368 ASSERT(dst.initialized()); 00369 const int dev = dst.getMemoryDevice(); 00370 const MemoryPolicy mp = dst.getMemoryPolicy(); 00371 ASSERT(mp != HOST_MEMORY); 00372 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00373 cuda_c_inplaceSubtractScalar(dst.getCudaArrayPtr(), offset.getCudaArrayPtr(), tile.sz(), dst.size()); 00374 } 00375 00376 void cudaInplaceMultiplyScalar(CudaImage<float>& dst, const CudaImage<float>& offset) 00377 { 00378 ASSERT(dst.initialized()); 00379 const int dev = dst.getMemoryDevice(); 00380 const MemoryPolicy mp = dst.getMemoryPolicy(); 00381 ASSERT(mp != HOST_MEMORY); 00382 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00383 cuda_c_inplaceMultiplyScalar(dst.getCudaArrayPtr(), offset.getCudaArrayPtr(), tile.sz(), dst.size()); 00384 } 00385 00386 void cudaInplaceDivideScalar(CudaImage<float>& dst, const CudaImage<float>& offset) 00387 { 00388 ASSERT(dst.initialized()); 00389 const int dev = dst.getMemoryDevice(); 00390 const MemoryPolicy mp = dst.getMemoryPolicy(); 00391 ASSERT(mp != HOST_MEMORY); 00392 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00393 cuda_c_inplaceDivideScalar(dst.getCudaArrayPtr(), offset.getCudaArrayPtr(), tile.sz(), dst.size()); 00394 } 00395 00396 void cudaInplaceAddImages(CudaImage<float>& im1, const CudaImage<float>& im2) 00397 { 00398 ASSERT(im1.initialized() && im2.initialized()); 00399 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice()); 00400 ASSERT(im1.size() == im2.size()); 00401 const int dev = im1.getMemoryDevice(); 00402 const MemoryPolicy mp = im1.getMemoryPolicy(); 00403 ASSERT(mp != HOST_MEMORY); 00404 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00405 cuda_c_inplaceAddImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), tile.sz(), im1.size()); 00406 } 00407 00408 void cudaInplaceSubtractImages(CudaImage<float>& im1, const CudaImage<float>& im2) 00409 { 00410 ASSERT(im1.initialized() && im2.initialized()); 00411 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice()); 00412 ASSERT(im1.size() == im2.size()); 00413 const int dev = im1.getMemoryDevice(); 00414 const MemoryPolicy mp = im1.getMemoryPolicy(); 00415 ASSERT(mp != HOST_MEMORY); 00416 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00417 cuda_c_inplaceSubtractImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), tile.sz(), im1.size()); 00418 } 00419 00420 void cudaInplaceMultiplyImages(CudaImage<float>& im1, const CudaImage<float>& im2) 00421 { 00422 ASSERT(im1.initialized() && im2.initialized()); 00423 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice()); 00424 ASSERT(im1.size() == im2.size()); 00425 const int dev = im1.getMemoryDevice(); 00426 const MemoryPolicy mp = im1.getMemoryPolicy(); 00427 ASSERT(mp != HOST_MEMORY); 00428 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00429 cuda_c_inplaceMultiplyImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), tile.sz(), im1.size()); 00430 } 00431 00432 void cudaInplaceDivideImages(CudaImage<float>& im1, const CudaImage<float>& im2) 00433 { 00434 ASSERT(im1.initialized() && im2.initialized()); 00435 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice()); 00436 ASSERT(im1.size() == im2.size()); 00437 const int dev = im1.getMemoryDevice(); 00438 const MemoryPolicy mp = im1.getMemoryPolicy(); 00439 ASSERT(mp != HOST_MEMORY); 00440 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00441 cuda_c_inplaceDivideImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), tile.sz(), im1.size()); 00442 } 00443 00444 CudaImage<float> cudaAddImages(const CudaImage<float>& im1, const CudaImage<float>& im2) 00445 { 00446 ASSERT(im1.initialized() && im2.initialized()); 00447 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice()); 00448 ASSERT(im1.size() == im2.size()); 00449 MemoryPolicy mp = im1.getMemoryPolicy(); 00450 const int dev = im1.getMemoryDevice(); 00451 ASSERT(mp != HOST_MEMORY); 00452 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00453 CudaImage<float> res = CudaImage<float>(im1.getDims(),NO_INIT,mp,dev); 00454 cuda_c_addImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), im1.size()); 00455 return res; 00456 } 00457 00458 CudaImage<float> cudaSubtractImages(const CudaImage<float>& im1, const CudaImage<float>& im2) 00459 { 00460 ASSERT(im1.initialized() && im2.initialized()); 00461 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice()); 00462 ASSERT(im1.size() == im2.size()); 00463 MemoryPolicy mp = im1.getMemoryPolicy(); 00464 const int dev = im1.getMemoryDevice(); 00465 ASSERT(mp != HOST_MEMORY); 00466 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00467 CudaImage<float> res = CudaImage<float>(im1.getDims(),NO_INIT,mp,dev); 00468 cuda_c_subtractImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), im1.size()); 00469 return res; 00470 } 00471 00472 CudaImage<float> cudaMultiplyImages(const CudaImage<float>& im1, const CudaImage<float>& im2) 00473 { 00474 ASSERT(im1.initialized() && im2.initialized()); 00475 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice()); 00476 ASSERT(im1.size() == im2.size()); 00477 MemoryPolicy mp = im1.getMemoryPolicy(); 00478 const int dev = im1.getMemoryDevice(); 00479 ASSERT(mp != HOST_MEMORY); 00480 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00481 CudaImage<float> res = CudaImage<float>(im1.getDims(),NO_INIT,mp,dev); 00482 cuda_c_multiplyImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), im1.size()); 00483 return res; 00484 } 00485 00486 CudaImage<float> cudaDivideImages(const CudaImage<float>& im1, const CudaImage<float>& im2) 00487 { 00488 ASSERT(im1.initialized() && im2.initialized()); 00489 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice()); 00490 ASSERT(im1.size() == im2.size()); 00491 MemoryPolicy mp = im1.getMemoryPolicy(); 00492 const int dev = im1.getMemoryDevice(); 00493 ASSERT(mp != HOST_MEMORY); 00494 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00495 CudaImage<float> res = CudaImage<float>(im1.getDims(),NO_INIT,mp,dev); 00496 cuda_c_divideImages(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), im1.size()); 00497 return res; 00498 } 00499 00500 CudaImage<float> cudaTakeMax(const CudaImage<float>& im1, const CudaImage<float>& im2) 00501 { 00502 ASSERT(im1.initialized() && im2.initialized()); 00503 ASSERT(im1.getMemoryDevice() == im2.getMemoryDevice()); 00504 ASSERT(im1.size() == im2.size()); 00505 MemoryPolicy mp = im1.getMemoryPolicy(); 00506 const int dev = im1.getMemoryDevice(); 00507 ASSERT(mp != HOST_MEMORY); 00508 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00509 CudaImage<float> res = CudaImage<float>(im1.getDims(),NO_INIT,mp,dev); 00510 cuda_c_takeMax(im1.getCudaArrayPtr(), im2.getCudaArrayPtr(), res.getCudaArrayPtr(), tile.sz(), im1.size()); 00511 return res; 00512 } 00513 00514 00515 // ###################################################################### 00516 CudaImage<float> cudaQuadEnergy(const CudaImage<float>& real, const CudaImage<float>& imag) 00517 { 00518 ASSERT(real.initialized() && imag.initialized()); 00519 ASSERT(real.getMemoryDevice() == imag.getMemoryDevice()); 00520 ASSERT(real.isSameSize(imag)); 00521 MemoryPolicy mp = real.getMemoryPolicy(); 00522 const int dev = real.getMemoryDevice(); 00523 ASSERT(mp != HOST_MEMORY); 00524 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00525 CudaImage<float> out(real.getDims(), NO_INIT,mp,dev); 00526 cuda_c_quadEnergy(real.getCudaArrayPtr(), imag.getCudaArrayPtr(), out.getCudaArrayPtr(), tile.sz(), real.size()); 00527 00528 return out; 00529 } 00530 00531 00532 void cudaInplaceAttenuateBorders(CudaImage<float>& a, int size) 00533 { 00534 ASSERT(a.initialized()); 00535 00536 Dims dims = a.getDims(); 00537 00538 if (size * 2 > dims.w()) size = dims.w() / 2; 00539 if (size * 2 > dims.h()) size = dims.h() / 2; 00540 if (size < 1) return; // forget it 00541 const int dev = a.getMemoryDevice(); 00542 MemoryPolicy mp = a.getMemoryPolicy(); 00543 ASSERT(mp != HOST_MEMORY); 00544 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00545 00546 cuda_c_inplaceAttenuateBorders(a.getCudaArrayPtr(), size, tile.sz(), a.getWidth(), a.getHeight()); 00547 } 00548 00549