00001 /*!@file CUDA/CudaImage.H An image template class meant for interacting with 00002 CUDA devices */ 00003 00004 // //////////////////////////////////////////////////////////////////// // 00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00006 // University of Southern California (USC) and the iLab at USC. // 00007 // See http://iLab.usc.edu for information about this project. // 00008 // //////////////////////////////////////////////////////////////////// // 00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00011 // in Visual Environments, and Applications'' by Christof Koch and // 00012 // Laurent Itti, California Institute of Technology, 2001 (patent // 00013 // pending; application number 09/912,225 filed July 23, 2001; see // 00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00015 // //////////////////////////////////////////////////////////////////// // 00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00017 // // 00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00019 // redistribute it and/or modify it under the terms of the GNU General // 00020 // Public License as published by the Free Software Foundation; either // 00021 // version 2 of the License, or (at your option) any later version. // 00022 // // 00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00026 // PURPOSE. See the GNU General Public License for more details. // 00027 // // 00028 // You should have received a copy of the GNU General Public License // 00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00031 // Boston, MA 02111-1307 USA. // 00032 // //////////////////////////////////////////////////////////////////// // 00033 // 00034 // Primary maintainer for this file: Laurent Itti <itti@usc.edu> 00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/CudaImage.H $ 00036 // $Id: CudaImage.H 12962 2010-03-06 02:13:53Z irock $ 00037 // 00038 00039 #ifndef CUDAIMAGE_H_DEFINED 00040 #define CUDAIMAGE_H_DEFINED 00041 00042 #include "Util/Assert.H" 00043 #include "Util/Promotions.H" 00044 #include "Image/ArrayData.H" 00045 #include "Image/Point2D.H" 00046 #include "Image/Rectangle.H" 00047 #include "Image/Image.H" 00048 #include "Image/Pixels.H" 00049 #include "CUDA/wrap_c_cuda.h" 00050 #include <algorithm> // for std::min 00051 00052 //! CudaImage template class 00053 /*! This is a image template class for CUDA devices that can handle grayscale 00054 as well as color or multispectral images. CudaImage methods should 00055 be instantiable for any type T that has the basic arithmetic 00056 operators. However, our CUDA processing is inherently float based, so 00057 conversions may occur. Note that some CudaImage functions will only 00058 work with float types, and other will only work for composite types 00059 such as PixRGB. 00060 */ 00061 00062 template <class T> 00063 class CudaImage 00064 { 00065 public: 00066 00067 // ############################################################ 00068 /*! @name Constructors, destructors, assignment */ 00069 //@{ 00070 00071 //! Construct from C array using memory policy 00072 /*! Build from C array; an internal copy of the C array will be 00073 allocated, so the C array can (and should) be freed without 00074 affecting the Image. */ 00075 inline CudaImage(const T* inarray, int width, int height, const MemoryPolicy mp, const int dev, const MemoryPolicy srcmp, const int srcdev); 00076 00077 //! Construct from C array using memory policy 00078 /*! Build from C array; an internal copy of the C array will be 00079 allocated, so the C array can (and should) be freed without 00080 affecting the CudaImage. */ 00081 inline CudaImage(const T* inarray, const Dims& dims, const MemoryPolicy mp, const int dev, const MemoryPolicy srcmp, const int srcdev); 00082 00083 //! Allocates memory for given size, optionally zero-clear that memory, and set the memory policy 00084 inline CudaImage(int width, int height, InitPolicy init, const MemoryPolicy mp, const int dev); 00085 00086 //! Constructor that only allocates memory for given size 00087 inline explicit CudaImage(const Dims& dims, const MemoryPolicy mp, const int dev); 00088 00089 //! Constructor that only allocates memory for given size and init type 00090 inline explicit CudaImage(const Dims& dims, InitPolicy init, const MemoryPolicy mp, const int dev); 00091 00092 //! Construct an empty (0-by-0) image (useful for arrays of CudaImages). 00093 inline CudaImage(); 00094 00095 //! Copy constructor 00096 /*! e.g.: 00097 \code 00098 CudaImage<byte> im(other); 00099 // or 00100 CudaImage<byte> im = other; // with other also of type CudaImage<byte> 00101 \endcode 00102 */ 00103 inline CudaImage(const CudaImage<T>& A); 00104 00105 //! Copy constructor to particular memory 00106 /*! e.g.: 00107 \code 00108 CudaImage<byte> im(other); 00109 // or 00110 CudaImage<byte> im = other; // with other also of type CudaImage<byte> 00111 \endcode 00112 */ 00113 inline CudaImage(const CudaImage<T>& A, const MemoryPolicy mp, const int dev); 00114 00115 //! Import copy constructor 00116 /*! e.g.: 00117 \code 00118 CudaImage<byte> im(other); 00119 // or 00120 CudaImage<byte> im = other; // with other of type CudaImage<float> 00121 \endcode 00122 */ 00123 inline CudaImage(const Image<T>& A, const MemoryPolicy mp, const int dev); 00124 00125 //! Assigment operator. 00126 /*! e.g.: 00127 \code 00128 CudaImage<byte> im1, im2; im2 = im1; 00129 \endcode 00130 */ 00131 inline CudaImage<T>& operator=(const CudaImage<T>& A); 00132 00133 //! Destructor 00134 inline ~CudaImage(); 00135 00136 //! Free memory and switch to uninitialized state. 00137 /*! Note that it is \b NOT necessary to call this function to ensure 00138 proper cleanup, that will be done in the destructor by 00139 default. Rather, freeMem() is offered just as a performance 00140 optimization, to allow you to release a potentially large chunk 00141 of memory when you are finished using it. */ 00142 inline void freeMem(); 00143 00144 //@} 00145 00146 // ############################################################ 00147 /*! @name Math operators */ 00148 //! Math operators 00149 //@{ 00150 00151 //! Addition inplace by a host provided scalar 00152 inline CudaImage<float>& operator+=(const float val); 00153 00154 //! Subtraction inplace by a host provided scalar 00155 inline CudaImage<float>& operator-=(const float val); 00156 00157 //! Multiplication inplace by a host provided scalar 00158 inline CudaImage<float>& operator*=(const float val); 00159 00160 //! Division inplace by a host provided scalar 00161 inline CudaImage<float>& operator/=(const float val); 00162 00163 //! Addition inplace by a device image or scalar (a 1x1 CudaImage is treated as a scalar) 00164 inline CudaImage<float>& operator+=(const CudaImage<float>& im); 00165 00166 //! Subtraction inplace by a device image or scalar (a 1x1 CudaImage is treated as a scalar) 00167 inline CudaImage<float>& operator-=(const CudaImage<float>& im); 00168 00169 //! Multiplication inplace by a device image or scalar (a 1x1 CudaImage is treated as a scalar) 00170 inline CudaImage<float>& operator*=(const CudaImage<float>& im); 00171 00172 //! Division inplace by a device image or scalar (a 1x1 CudaImage is treated as a scalar) 00173 inline CudaImage<float>& operator/=(const CudaImage<float>& im); 00174 00175 //! Addition by a host provided scalar 00176 inline CudaImage<float> operator+(const float val) const; 00177 00178 //! Subtraction by a host provided scalar 00179 inline CudaImage<float> operator-(const float val) const; 00180 00181 //! Multiplcation by a host provided scalar 00182 inline CudaImage<float> operator*(const float val) const; 00183 00184 //! Division by a host provided scalar 00185 inline CudaImage<float> operator/(const float val) const; 00186 00187 //! Addition by a device image or scalar (a 1x1 CudaImage is treated as a scalar) 00188 inline CudaImage<float> operator+(const CudaImage<float>& im) const; 00189 00190 //! Subtraction by a device image or scalar (a 1x1 CudaImage is treated as a scalar) 00191 inline CudaImage<float> operator-(const CudaImage<float>& im) const; 00192 00193 //! Multiplication by a device image or scalar (a 1x1 CudaImage is treated as a scalar) 00194 inline CudaImage<float> operator*(const CudaImage<float>& im) const; 00195 00196 //! Division by a device image or scalar (a 1x1 CudaImage is treated as a scalar) 00197 inline CudaImage<float> operator/(const CudaImage<float>& im) const; 00198 //@} 00199 00200 00201 // ############################################################ 00202 /*! @name Memory management functions */ 00203 //@{ 00204 00205 //! Swap the contents of two images 00206 inline void swap(CudaImage<T>& other); 00207 00208 00209 //! Return a new image object with a deep copy of the underlying data 00210 /*! This function is necessary for safe use of 00211 attach()/detach(). That is, unfortunately attach()/detach() are 00212 not safe for use with shared image objects -- consider the 00213 following code: 00214 00215 \code 00216 double d[4] = { 0.0, 1.0, 2.0, 3.0}; 00217 00218 // create an CudaImage that is attach()'ed to the double array 00219 CudaImage<double> a; 00220 a.attach(&d[0], 2, 2); 00221 00222 const CudaImage<double> b = a; 00223 // now 'b' thinks it has a safe lock on some const values: 00224 00225 d[0] = -1.0; 00226 // OOPS! By changing values in the 'd' array directly, we'll now 00227 // have changed things to that b[0] == -1.0, even though 'b' was 00228 // declared as 'const' 00229 \endcode 00230 00231 The solution to this problem is to prohibit the copy done in 00232 'b=a' above (this triggers an LFATAL() in 00233 ArrayData::acquire()). That assures us that any ArrayData that 00234 has a StoragePolicy of WRITE_THRU will be un-shareable. 00235 00236 So, back to the point -- the correct way to write the code above 00237 would be to use deepcopy(): 00238 00239 \code 00240 double d[4] = { 0.0, 1.0, 2.0, 3.0}; 00241 00242 CudaImage<double> a; 00243 a.attach(&d[0], 2, 2); 00244 00245 const CudaImage<double> b = a.deepcopy(); 00246 00247 d[0] = -1.0; 00248 // Now, 'b' is insulated from any changes to 'd' since we've 00249 // done a deep copy, so even now we'll still have b[0]==0.0 00250 \endcode 00251 */ 00252 inline CudaImage<T> deepcopy() const; 00253 00254 //! Return a new image w/ deep copy of underlying data using memory policy 00255 /*! This function is needed to allow new image generation to any 00256 memory area in the system, which is going to have to be a deep copy 00257 */ 00258 inline CudaImage<T> deepcopy(const MemoryPolicy mp, const int dev) const; 00259 00260 inline Image<T> exportToImage() const; 00261 00262 //! Free mem and realloc new array (array contents are lost). 00263 /*! Use rescale() instead if you want to preserve image contents. */ 00264 inline void resize(const Dims& dims, const bool clear = false); 00265 00266 //! Free mem and realloc new array (array contents are lost). 00267 /*! Use rescale() instead if you want to preserve image contents. */ 00268 inline void resize(const int width, const int height, 00269 const bool clear = false); 00270 00271 //@} 00272 00273 // ############################################################ 00274 /*! @name Access functions*/ 00275 //@{ 00276 00277 //! Check whether image is non-empty (i.e., non-zero height and width). 00278 inline bool initialized() const; 00279 00280 //! Get image size (width * height) 00281 inline int getSize() const; 00282 00283 //! Get image size (width * height) 00284 inline uint size() const; 00285 00286 //! Get image width 00287 inline int getWidth() const; 00288 00289 //! Get image height 00290 inline int getHeight() const; 00291 00292 //! Get image width+height in Dims struct 00293 inline const Dims& getDims() const; 00294 00295 //! Get image bounds as a rectangle with upper-left point at (0,0) and dims matching the image dims 00296 inline Rectangle getBounds() const; 00297 00298 //! Check if *this is the same size as the other thing 00299 /*! The other thing can be any type that exposes getHeight() and 00300 getWidth() */ 00301 template <class C> 00302 inline bool isSameSize(const C& other) const; 00303 00304 //! Check if the image is 1D, i.e., width == 1 or height == 1 00305 inline bool is1D() const; 00306 00307 //! Check if the image is a vector, i.e., width == 1 00308 inline bool isVector() const; 00309 00310 //! Check if the image is a transposed vector, i.e., height == 1 00311 inline bool isTransposedVector() const; 00312 00313 //! Check if the image is square, i.e., width == height 00314 inline bool isSquare() const; 00315 00316 //! Returns read-only (const) pointer to internal image array 00317 inline const T* getCudaArrayPtr() const; 00318 00319 //! Returns read/write (non-const) pointer to internal image array 00320 inline T* getCudaArrayPtr(); 00321 00322 //! Test whether point falls inside array boundaries 00323 inline bool coordsOk(const Point2D<int>& P) const; 00324 00325 //! Test whether point falls inside array boundaries 00326 inline bool coordsOk(const int i, const int j) const; 00327 00328 //! Test whether point falls inside array boundaries 00329 /*! This test is intended to be used before you attempt a getValInterp() */ 00330 inline bool coordsOk(const Point2D<float>& p) const; 00331 00332 //! Test whether point falls inside array boundaries 00333 /*! This test is intended to be used before you attempt a getValInterp() */ 00334 inline bool coordsOk(const float i, const float j) const; 00335 00336 //! Test whether rectangle fits in image 00337 inline bool rectangleOk(const Rectangle& rect) const; 00338 00339 //! Return memory policy of underlying array data 00340 inline MemoryPolicy getMemoryPolicy() const; 00341 00342 //! Return memory policy of underlying array data 00343 inline int getMemoryDevice() const; 00344 00345 //@} 00346 00347 //! clear contents (or set to given value) 00348 inline void clear(const char& val = 0); 00349 00350 //@} 00351 00352 // ############################################################ 00353 /*! @name Functions for testing/debugging only */ 00354 //@{ 00355 00356 //! For testing/debugging only. 00357 bool hasSameData(const CudaImage<T>& b) const; 00358 00359 //! For testing/debugging only. 00360 /*! Returns the current reference count. */ 00361 long refCount() const throw(); 00362 00363 //! For testing/debugging only. 00364 /*! Check if the ArrayHandle is shared. */ 00365 bool isShared() const throw(); 00366 00367 //@} 00368 00369 private: 00370 // ############################################################ 00371 // ##### Data: 00372 // ############################################################ 00373 ArrayHandle<T> itsHdl; 00374 inline const ArrayData<T>& impl() const; 00375 inline ArrayData<T>& uniq(); 00376 00377 }; 00378 00379 // ###################################################################### 00380 // ###################################################################### 00381 // ###################################################################### 00382 // FREE FUNCTIONS: 00383 // ###################################################################### 00384 // ###################################################################### 00385 // ###################################################################### 00386 00387 //! Return a new Image with the same data, but interpreted as a different shape 00388 /*! NOTE that this is very different from resize()! With reshape(), we 00389 are not changing the number of elements in the Image; rather we 00390 are just changing the way that the internal 1-D memory array is 00391 mapped to a logical 2-D Image. For example, you could reshape a 00392 10x5 Image to a 50x1 Image, or a 25x2 Image, or vice versa. You 00393 need to be aware the fact that Image uses a row-major storage 00394 format internally (i.e., as you step through the memory, you first 00395 traverse all the pixels in row 1, then all the pixels in row 2, 00396 etc.). So if you reshape a WxH Image to a WHx1 Image, your new 1-D 00397 Image will appear to have the rows of the original Image arranged 00398 end-to-end. 00399 00400 Programmer note: In principle, we could support a reshape() 00401 operation that wouldn't require copying any data; in order to do 00402 that we'd need to split the Dims management out of ArrayData and 00403 ArrayHandle, so that a single ArrayData object could be used in 00404 multiple Image objects, each with potentially different Dims (but 00405 all with the same total number of pixels). However, for now we 00406 just use a simpler implementation which copies the data into the 00407 new Image. 00408 */ 00409 template <class T> 00410 inline CudaImage<T> reshape(const CudaImage<T>& orig, const Dims& newdims) 00411 { 00412 ASSERT(orig.getDims().sz() == newdims.sz()); 00413 return CudaImage<T>(orig.getCudaArrayPtr(), newdims, orig.getMemoryPolicy(), orig.getMemoryDevice(), orig.getMemoryPolicy(), orig.getMemoryDevice()); 00414 } 00415 00416 // ###################################################################### 00417 // ###################################################################### 00418 // ###################################################################### 00419 // INLINE FUNCTIONS: 00420 // ###################################################################### 00421 // ###################################################################### 00422 // ###################################################################### 00423 00424 // ###################################################################### 00425 // ##### Constructors & Destructors: 00426 // ###################################################################### 00427 00428 // ###################################################################### 00429 template <class T> inline 00430 CudaImage<T>::CudaImage(const T* inarray, int width, int height, MemoryPolicy trgmp, int trgdev, const MemoryPolicy srcmp, const int srcdev) : 00431 itsHdl(new ArrayData<T>(Dims(width, height), inarray, trgmp, trgdev, srcmp, srcdev)) 00432 {} 00433 00434 // ###################################################################### 00435 template <class T> inline 00436 CudaImage<T>::CudaImage(const T* inarray, const Dims& dims, MemoryPolicy trgmp, int trgdev, const MemoryPolicy srcmp, const int srcdev) : 00437 itsHdl(new ArrayData<T>(dims, inarray, trgmp, trgdev, srcmp, srcdev)) 00438 {} 00439 00440 // ###################################################################### 00441 template <class T> inline 00442 CudaImage<T>::CudaImage(int width, int height, InitPolicy init, MemoryPolicy mp, int dev) : 00443 itsHdl(new ArrayData<T>(Dims(width, height), init, mp, dev)) 00444 {} 00445 00446 // ###################################################################### 00447 template <class T> inline 00448 CudaImage<T>::CudaImage(const Dims& dims, const MemoryPolicy mp, const int dev) : 00449 itsHdl(new ArrayData<T>(dims, NO_INIT, mp, dev)) 00450 {} 00451 00452 // ###################################################################### 00453 template <class T> inline 00454 CudaImage<T>::CudaImage(const Dims& dims, InitPolicy init, const MemoryPolicy mp, const int dev) : 00455 itsHdl(new ArrayData<T>(dims, init, mp, dev)) 00456 {} 00457 00458 // ###################################################################### 00459 template <class T> inline 00460 CudaImage<T>::CudaImage() : 00461 itsHdl(new ArrayData<T>()) 00462 {} 00463 00464 // ###################################################################### 00465 template <class T> inline 00466 CudaImage<T>::CudaImage(const CudaImage<T>& A) : 00467 itsHdl(A.itsHdl) 00468 {} 00469 00470 // ###################################################################### 00471 template <class T> inline 00472 CudaImage<T>::CudaImage(const CudaImage<T>& A, const MemoryPolicy mp, const int dev) : 00473 itsHdl(new ArrayData<T>(A.getDims(), NO_INIT, mp,dev)) 00474 { 00475 // Find out where src is sitting 00476 const MemoryPolicy srcmp = A.getMemoryPolicy(); 00477 const int srcdev = A.getMemoryDevice(); 00478 // How should we get the actual number of elements? dist from stop to aptr or size()? 00479 //int n = stop-aptr; 00480 int n = size(); 00481 ArrayHelper<T>::copy_initialize(getCudaArrayPtr(),n,A.getCudaArrayPtr(),mp,dev,srcmp,srcdev); 00482 } 00483 00484 template <class T> inline 00485 CudaImage<T>::CudaImage(const Image<T>& A, const MemoryPolicy mp, const int dev) : 00486 itsHdl(new ArrayData<T>(A.getDims(), NO_INIT, mp,dev)) 00487 { 00488 // How should we get the actual number of elements? dist from stop to aptr or size()? 00489 //int n = stop-aptr; 00490 int n = size(); 00491 ArrayHelper<T>::copy_initialize(getCudaArrayPtr(),n,A.getArrayPtr(),mp,dev,HOST_MEMORY,CUDA_HOST_DEVICE_NUM); 00492 } 00493 00494 // ###################################################################### 00495 template <class T> inline 00496 CudaImage<T>& CudaImage<T>::operator=(const CudaImage<T>& A) 00497 { 00498 CudaImage<T> A_copy( A ); 00499 this->swap(A_copy); 00500 return *this; 00501 } 00502 00503 template <class T> inline 00504 CudaImage<float>& CudaImage<T>::operator+=(const float val) 00505 { 00506 const int dev = this->getMemoryDevice(); 00507 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00508 00509 cuda_c_inplaceAddHostScalar(this->getCudaArrayPtr(), val, tile.sz(), this->size()); 00510 return *this; 00511 } 00512 00513 template <class T> inline 00514 CudaImage<float>& CudaImage<T>::operator-=(const float val) 00515 { 00516 const int dev = this->getMemoryDevice(); 00517 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00518 00519 cuda_c_inplaceSubtractHostScalar(this->getCudaArrayPtr(), val, tile.sz(), this->size()); 00520 return *this; 00521 } 00522 00523 template <class T> inline 00524 CudaImage<float>& CudaImage<T>::operator*=(const float val) 00525 { 00526 const int dev = this->getMemoryDevice(); 00527 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00528 00529 cuda_c_inplaceMultiplyHostScalar(this->getCudaArrayPtr(), val, tile.sz(), this->size()); 00530 return *this; 00531 } 00532 00533 template <class T> inline 00534 CudaImage<float>& CudaImage<T>::operator/=(const float val) 00535 { 00536 const int dev = this->getMemoryDevice(); 00537 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00538 00539 cuda_c_inplaceDivideHostScalar(this->getCudaArrayPtr(), val, tile.sz(), this->size()); 00540 return *this; 00541 } 00542 00543 // For math operations with CudaImage arguments, size()==1 CudaImages are treated as scalars to allow for 00544 // device side values to be applied as scalars 00545 00546 template <class T> inline 00547 CudaImage<float>& CudaImage<T>::operator+=(const CudaImage<float>& im) 00548 { 00549 ASSERT(im.initialized()); 00550 ASSERT(this->getMemoryDevice() == im.getMemoryDevice()); 00551 const int dev = this->getMemoryDevice(); 00552 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00553 00554 if(im.getSize() == 1) 00555 { 00556 cuda_c_inplaceAddScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size()); 00557 } 00558 else 00559 { 00560 ASSERT(this->isSameSize(im)); 00561 cuda_c_inplaceAddImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size()); 00562 } 00563 return *this; 00564 } 00565 00566 template <class T> inline 00567 CudaImage<float>& CudaImage<T>::operator-=(const CudaImage<float>& im) 00568 { 00569 ASSERT(im.initialized()); 00570 ASSERT(this->getMemoryDevice() == im.getMemoryDevice()); 00571 const int dev = this->getMemoryDevice(); 00572 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00573 00574 if(im.getSize() == 1) 00575 { 00576 cuda_c_inplaceSubtractScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size()); 00577 } 00578 else 00579 { 00580 ASSERT(this->isSameSize(im)); 00581 cuda_c_inplaceSubtractImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size()); 00582 } 00583 return *this; 00584 } 00585 00586 template <class T> inline 00587 CudaImage<float>& CudaImage<T>::operator*=(const CudaImage<float>& im) 00588 { 00589 ASSERT(im.initialized()); 00590 ASSERT(this->getMemoryDevice() == im.getMemoryDevice()); 00591 const int dev = this->getMemoryDevice(); 00592 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00593 00594 if(im.getSize() == 1) 00595 { 00596 cuda_c_inplaceMultiplyScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size()); 00597 } 00598 else 00599 { 00600 ASSERT(this->isSameSize(im)); 00601 cuda_c_inplaceMultiplyImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size()); 00602 } 00603 return *this; 00604 } 00605 00606 template <class T> inline 00607 CudaImage<float>& CudaImage<T>::operator/=(const CudaImage<float>& im) 00608 { 00609 ASSERT(im.initialized()); 00610 ASSERT(this->getMemoryDevice() == im.getMemoryDevice()); 00611 const int dev = this->getMemoryDevice(); 00612 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00613 00614 if(im.getSize() == 1) 00615 { 00616 cuda_c_inplaceDivideScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size()); 00617 } 00618 else 00619 { 00620 ASSERT(this->isSameSize(im)); 00621 cuda_c_inplaceDivideImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size()); 00622 } 00623 return *this; 00624 } 00625 00626 00627 template <class T> inline 00628 CudaImage<float> CudaImage<T>::operator+(const float val) const 00629 { 00630 const int dev = this->getMemoryDevice(); 00631 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00632 CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev); 00633 00634 cuda_c_addHostScalar(this->getCudaArrayPtr(), val, result.getCudaArrayPtr(), tile.sz(), this->size()); 00635 00636 return result; 00637 } 00638 00639 template <class T> inline 00640 CudaImage<float> CudaImage<T>::operator-(const float val) const 00641 { 00642 const int dev = this->getMemoryDevice(); 00643 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00644 CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev); 00645 00646 cuda_c_subtractHostScalar(this->getCudaArrayPtr(), val, result.getCudaArrayPtr(), tile.sz(), this->size()); 00647 00648 return result; 00649 } 00650 00651 template <class T> inline 00652 CudaImage<float> CudaImage<T>::operator*(const float val) const 00653 { 00654 const int dev = this->getMemoryDevice(); 00655 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00656 CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev); 00657 00658 cuda_c_multiplyHostScalar(this->getCudaArrayPtr(), val, result.getCudaArrayPtr(), tile.sz(), this->size()); 00659 00660 return result; 00661 } 00662 00663 template <class T> inline 00664 CudaImage<float> CudaImage<T>::operator/(const float val) const 00665 { 00666 const int dev = this->getMemoryDevice(); 00667 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00668 CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev); 00669 00670 cuda_c_divideHostScalar(this->getCudaArrayPtr(), val, result.getCudaArrayPtr(), tile.sz(), this->size()); 00671 00672 return result; 00673 } 00674 00675 00676 // For math operations with CudaImage arguments, size()==1 CudaImages are treated as scalars to allow for 00677 // device side values to be applied as scalars 00678 00679 template <class T> inline 00680 CudaImage<float> CudaImage<T>::operator+(const CudaImage<float>& im) const 00681 { 00682 ASSERT(im.initialized()); 00683 ASSERT(this->getMemoryDevice() == im.getMemoryDevice()); 00684 const int dev = this->getMemoryDevice(); 00685 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00686 CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev); 00687 if(im.getSize() == 1) 00688 { 00689 cuda_c_addScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size()); 00690 } 00691 else 00692 { 00693 ASSERT(this->isSameSize(im)); 00694 cuda_c_addImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size()); 00695 } 00696 return result; 00697 } 00698 00699 template <class T> inline 00700 CudaImage<float> CudaImage<T>::operator-(const CudaImage<float>& im) const 00701 { 00702 ASSERT(im.initialized()); 00703 ASSERT(this->getMemoryDevice() == im.getMemoryDevice()); 00704 const int dev = this->getMemoryDevice(); 00705 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00706 CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev); 00707 if(im.getSize() == 1) 00708 { 00709 cuda_c_subtractScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size()); 00710 } 00711 else 00712 { 00713 ASSERT(this->isSameSize(im)); 00714 cuda_c_subtractImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size()); 00715 } 00716 return result; 00717 } 00718 00719 template <class T> inline 00720 CudaImage<float> CudaImage<T>::operator*(const CudaImage<float>& im) const 00721 { 00722 ASSERT(im.initialized()); 00723 ASSERT(this->getMemoryDevice() == im.getMemoryDevice()); 00724 const int dev = this->getMemoryDevice(); 00725 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00726 CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev); 00727 if(im.getSize() == 1) 00728 { 00729 cuda_c_multiplyScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size()); 00730 } 00731 else 00732 { 00733 ASSERT(this->isSameSize(im)); 00734 cuda_c_multiplyImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size()); 00735 } 00736 return result; 00737 } 00738 00739 template <class T> inline 00740 CudaImage<float> CudaImage<T>::operator/(const CudaImage<float>& im) const 00741 { 00742 ASSERT(im.initialized()); 00743 ASSERT(this->getMemoryDevice() == im.getMemoryDevice()); 00744 const int dev = this->getMemoryDevice(); 00745 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00746 CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev); 00747 if(im.getSize() == 1) 00748 { 00749 cuda_c_divideScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size()); 00750 } 00751 else 00752 { 00753 ASSERT(this->isSameSize(im)); 00754 cuda_c_divideImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size()); 00755 } 00756 return result; 00757 } 00758 00759 // ###################################################################### 00760 template <class T> inline 00761 CudaImage<T>::~CudaImage() 00762 { /* memory deallocation is handled by ArrayData's destructor */ } 00763 00764 // ###################################################################### 00765 template <class T> inline 00766 void CudaImage<T>::freeMem() 00767 { 00768 CudaImage<T> empty; 00769 this->swap(empty); 00770 } 00771 00772 // ###################################################################### 00773 // ##### Memory management functions: 00774 // ###################################################################### 00775 00776 // ###################################################################### 00777 template <class T> inline 00778 void CudaImage<T>::swap(CudaImage<T>& other) 00779 { 00780 itsHdl.swap(other.itsHdl); 00781 } 00782 00783 // ###################################################################### 00784 template <class T> inline 00785 CudaImage<T> CudaImage<T>::deepcopy(const MemoryPolicy mp, const int dev) const 00786 { 00787 // see comment in class definition for why deepcopy() might be 00788 // needed 00789 return CudaImage<T>(this->getCudaArrayPtr(), this->getDims(), mp, dev, this->getMemoryPolicy(), this->getMemoryDevice()); 00790 } 00791 00792 00793 template <class T> inline 00794 Image<T> CudaImage<T>::exportToImage() const 00795 { 00796 if(getMemoryPolicy() == HOST_MEMORY) 00797 return Image<T>(getCudaArrayPtr(), getDims()); 00798 else 00799 { 00800 Image<T> ret(getDims(),NO_INIT); 00801 ArrayHelper<T>::copy_initialize(ret.getArrayPtr(),size(),getCudaArrayPtr(),HOST_MEMORY,CUDA_HOST_DEVICE_NUM,getMemoryPolicy(),getMemoryDevice()); 00802 return ret; 00803 } 00804 } 00805 00806 // ###################################################################### 00807 template <class T> inline 00808 void CudaImage<T>::resize(const Dims& dims, const bool do_clear) 00809 { 00810 // This algorithm is slightly more convoluted than may appear necessary 00811 // at first, in order to be most efficient in the context of 00812 // copy-on-write. If the requested size is different than the current 00813 // size, it's very simple: we just make a new image of the right size, 00814 // clearing it if so requested. Otherwise, if we're keeping the same 00815 // size, we don't necessarily have to make a new data block; instead we 00816 // just delegate to clear(), which does the right thing depending on 00817 // whether we currently have a shared or unshared data block. 00818 00819 if (dims != getDims()) 00820 { 00821 ArrayHandle<T> resized(new ArrayData<T>(dims, do_clear ? ZEROS:NO_INIT, this->getMemoryPolicy(), this->getMemoryDevice())); 00822 itsHdl.swap(resized); 00823 } 00824 else // we're keeping the same size, so just clear() if necessary 00825 { 00826 if (do_clear) clear( 0 ); 00827 } 00828 } 00829 00830 // ###################################################################### 00831 template <class T> inline 00832 void CudaImage<T>::resize(const int width, const int height, const bool do_clear) 00833 { 00834 resize(Dims(width, height), do_clear); 00835 } 00836 00837 // ###################################################################### 00838 // ##### Access functions: 00839 // ###################################################################### 00840 00841 // ###################################################################### 00842 template <class T> inline 00843 bool CudaImage<T>::initialized() const 00844 { return getWidth() > 0 && getHeight() > 0; } 00845 00846 // ###################################################################### 00847 template <class T> inline 00848 int CudaImage<T>::getSize() const 00849 { return getDims().sz(); } 00850 00851 // ###################################################################### 00852 template <class T> inline 00853 uint CudaImage<T>::size() const 00854 { return getDims().sz(); } 00855 00856 // ###################################################################### 00857 template <class T> inline 00858 int CudaImage<T>::getWidth() const 00859 { return itsHdl.get().w(); } 00860 00861 // ###################################################################### 00862 template <class T> inline 00863 int CudaImage<T>::getHeight() const 00864 { return itsHdl.get().h(); } 00865 00866 // ###################################################################### 00867 template <class T> inline 00868 const Dims& CudaImage<T>::getDims() const 00869 { return itsHdl.get().dims(); } 00870 00871 // ###################################################################### 00872 template <class T> inline 00873 Rectangle CudaImage<T>::getBounds() const 00874 { return Rectangle(Point2D<int>(0,0), itsHdl.get().dims()); } 00875 00876 // ###################################################################### 00877 template <class T> template <class C> inline 00878 bool CudaImage<T>::isSameSize(const C& other) const 00879 { return getWidth() == other.getWidth() && getHeight() == other.getHeight(); } 00880 00881 // ###################################################################### 00882 template <class T> inline 00883 bool CudaImage<T>::is1D() const 00884 { return (getWidth() == 1) || (getHeight() == 1); } 00885 00886 // ###################################################################### 00887 template <class T> inline 00888 bool CudaImage<T>::isVector() const 00889 { return (getWidth() == 1); } 00890 00891 // ###################################################################### 00892 template <class T> inline 00893 bool CudaImage<T>::isTransposedVector() const 00894 { return (getHeight() == 1); } 00895 00896 // ###################################################################### 00897 template <class T> inline 00898 bool CudaImage<T>::isSquare() const 00899 { return (getWidth() == getHeight()); } 00900 00901 // ###################################################################### 00902 template <class T> inline 00903 const T* CudaImage<T>::getCudaArrayPtr() const 00904 { 00905 return impl().data(); 00906 } 00907 00908 // ###################################################################### 00909 template <class T> inline 00910 T* CudaImage<T>::getCudaArrayPtr() 00911 { 00912 return uniq().dataw(); 00913 } 00914 00915 // ###################################################################### 00916 template <class T> inline 00917 bool CudaImage<T>::coordsOk(const Point2D<int>& P) const 00918 { 00919 return (P.i >= 0 && P.j >= 0 && P.i < getWidth() && P.j < getHeight()); 00920 } 00921 00922 // ###################################################################### 00923 template <class T> inline 00924 bool CudaImage<T>::coordsOk(const int i, const int j) const 00925 { 00926 return (i >= 0 && j >= 0 && i < getWidth() && j < getHeight()); 00927 } 00928 00929 // ###################################################################### 00930 template <class T> inline 00931 bool CudaImage<T>::coordsOk(const Point2D<float>& p) const 00932 { 00933 return this->coordsOk(p.i, p.j); 00934 } 00935 00936 // ###################################################################### 00937 template <class T> inline 00938 bool CudaImage<T>::coordsOk(const float i, const float j) const 00939 { 00940 return (i >= 0.0F && j >= 0.0F && 00941 i < float(getWidth() - 1) && j < float(getHeight() - 1)); 00942 } 00943 00944 // ###################################################################### 00945 template <class T> inline 00946 bool CudaImage<T>::rectangleOk(const Rectangle& rect) const 00947 { 00948 return (rect.left() < getWidth() && rect.rightI() < getWidth() && 00949 rect.top() < getHeight() && rect.bottomI() < getHeight() && 00950 rect.left() >= 0 && rect.rightI() >= 0 && 00951 rect.top() >= 0 && rect.bottomI() >= 0); 00952 } 00953 00954 // ###################################################################### 00955 template <class T> inline 00956 MemoryPolicy CudaImage<T>::getMemoryPolicy() const 00957 { 00958 return impl().getMemoryPolicy(); 00959 } 00960 00961 // ###################################################################### 00962 template <class T> inline 00963 int CudaImage<T>::getMemoryDevice() const 00964 { 00965 return impl().getMemoryDevice(); 00966 } 00967 00968 00969 // ###################################################################### 00970 template <class T> inline 00971 void CudaImage<T>::clear(const char &val) 00972 { 00973 // Check if we have a shared implementation... if yes, then for 00974 // efficiency we should release our copy before doing the iterative 00975 // loop, which would otherwise unnecessarily make a unique copy of 00976 // the shared data for us, which we would then ceremoniously ignore 00977 // as we clear it to a new value. 00978 if (itsHdl.isShared()) 00979 *this = CudaImage<T>(getDims(), NO_INIT, this->getMemoryPolicy(), this->getMemoryDevice()); 00980 00981 CudaDevices::memset(this->getCudaArrayPtr(),val,size()*sizeof(T),this->getMemoryDevice()); 00982 } 00983 00984 // ###################################################################### 00985 // ##### Functions for testing/debugging only: 00986 // ###################################################################### 00987 00988 template <class T> inline 00989 bool CudaImage<T>::hasSameData(const CudaImage<T>& b) const 00990 { return itsHdl.hasSameData(b.itsHdl); } 00991 00992 // ###################################################################### 00993 template <class T> inline 00994 long CudaImage<T>::refCount() const throw() { return itsHdl.refCount(); } 00995 00996 // ###################################################################### 00997 template <class T> inline 00998 bool CudaImage<T>::isShared() const throw() { return itsHdl.isShared(); } 00999 01000 // ###################################################################### 01001 // ##### Private methods: 01002 // ###################################################################### 01003 01004 template <class T> inline 01005 const ArrayData<T>& CudaImage<T>::impl() const 01006 { return itsHdl.get(); } 01007 01008 // ###################################################################### 01009 template <class T> inline 01010 ArrayData<T>& CudaImage<T>::uniq() 01011 { return itsHdl.uniq(this->getMemoryPolicy(),this->getMemoryDevice()); } 01012 01013 // ###################################################################### 01014 /* So things look consistent in everyone's emacs... */ 01015 /* Local Variables: */ 01016 /* indent-tabs-mode: nil */ 01017 /* End: */ 01018 01019 #endif