CudaImage.H

Go to the documentation of this file.
00001 /*!@file CUDA/CudaImage.H An image template class meant for interacting with
00002   CUDA devices */
00003 
00004 // //////////////////////////////////////////////////////////////////// //
00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00006 // University of Southern California (USC) and the iLab at USC.         //
00007 // See http://iLab.usc.edu for information about this project.          //
00008 // //////////////////////////////////////////////////////////////////// //
00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00011 // in Visual Environments, and Applications'' by Christof Koch and      //
00012 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00013 // pending; application number 09/912,225 filed July 23, 2001; see      //
00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00015 // //////////////////////////////////////////////////////////////////// //
00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00017 //                                                                      //
00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00019 // redistribute it and/or modify it under the terms of the GNU General  //
00020 // Public License as published by the Free Software Foundation; either  //
00021 // version 2 of the License, or (at your option) any later version.     //
00022 //                                                                      //
00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00026 // PURPOSE.  See the GNU General Public License for more details.       //
00027 //                                                                      //
00028 // You should have received a copy of the GNU General Public License    //
00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00031 // Boston, MA 02111-1307 USA.                                           //
00032 // //////////////////////////////////////////////////////////////////// //
00033 //
00034 // Primary maintainer for this file: Laurent Itti <itti@usc.edu>
00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/CudaImage.H $
00036 // $Id: CudaImage.H 12962 2010-03-06 02:13:53Z irock $
00037 //
00038 
00039 #ifndef CUDAIMAGE_H_DEFINED
00040 #define CUDAIMAGE_H_DEFINED
00041 
00042 #include "Util/Assert.H"
00043 #include "Util/Promotions.H"
00044 #include "Image/ArrayData.H"
00045 #include "Image/Point2D.H"
00046 #include "Image/Rectangle.H"
00047 #include "Image/Image.H"
00048 #include "Image/Pixels.H"
00049 #include "CUDA/wrap_c_cuda.h"
00050 #include <algorithm> // for std::min
00051 
00052 //! CudaImage template class
00053 /*! This is a image template class for CUDA devices that can handle grayscale
00054     as well as color or multispectral images. CudaImage methods should
00055     be instantiable for any type T that has the basic arithmetic
00056     operators. However, our CUDA processing is inherently float based, so
00057     conversions may occur.  Note that some CudaImage functions will only
00058     work with float types, and other will only work for composite types
00059     such as PixRGB.
00060 */
00061 
00062 template <class T>
00063 class CudaImage
00064 {
00065 public:
00066 
00067   // ############################################################
00068   /*! @name Constructors, destructors, assignment */
00069   //@{
00070 
00071   //! Construct from C array using memory policy
00072   /*! Build from C array; an internal copy of the C array will be
00073       allocated, so the C array can (and should) be freed without
00074       affecting the Image. */
00075   inline CudaImage(const T* inarray, int width, int height, const MemoryPolicy mp, const int dev, const MemoryPolicy srcmp, const int srcdev);
00076 
00077   //! Construct from C array using memory policy
00078   /*! Build from C array; an internal copy of the C array will be
00079       allocated, so the C array can (and should) be freed without
00080       affecting the CudaImage. */
00081   inline CudaImage(const T* inarray, const Dims& dims, const MemoryPolicy mp, const int dev, const MemoryPolicy srcmp, const int srcdev);
00082 
00083   //! Allocates memory for given size, optionally zero-clear that memory, and set the memory policy
00084   inline CudaImage(int width, int height, InitPolicy init, const MemoryPolicy mp, const int dev);
00085 
00086   //! Constructor that only allocates memory for given size
00087   inline explicit CudaImage(const Dims& dims, const MemoryPolicy mp, const int dev);
00088 
00089   //! Constructor that only allocates memory for given size and init type
00090   inline explicit CudaImage(const Dims& dims, InitPolicy init, const MemoryPolicy mp, const int dev);
00091 
00092   //! Construct an empty (0-by-0) image (useful for arrays of CudaImages).
00093   inline CudaImage();
00094 
00095   //! Copy constructor
00096   /*! e.g.:
00097       \code
00098       CudaImage<byte> im(other);
00099       // or
00100       CudaImage<byte> im = other; // with other also of type CudaImage<byte>
00101       \endcode
00102   */
00103   inline CudaImage(const CudaImage<T>& A);
00104 
00105   //! Copy constructor to particular memory
00106   /*! e.g.:
00107       \code
00108       CudaImage<byte> im(other);
00109       // or
00110       CudaImage<byte> im = other; // with other also of type CudaImage<byte>
00111       \endcode
00112   */
00113   inline CudaImage(const CudaImage<T>& A, const MemoryPolicy mp, const int dev);
00114 
00115   //! Import copy constructor
00116   /*! e.g.:
00117       \code
00118       CudaImage<byte> im(other);
00119       // or
00120       CudaImage<byte> im = other; // with other of type CudaImage<float>
00121       \endcode
00122   */
00123   inline CudaImage(const Image<T>& A, const MemoryPolicy mp, const int dev);
00124 
00125   //! Assigment operator.
00126   /*! e.g.:
00127       \code
00128       CudaImage<byte> im1, im2; im2 = im1;
00129       \endcode
00130   */
00131   inline CudaImage<T>& operator=(const CudaImage<T>& A);
00132 
00133   //! Destructor
00134   inline ~CudaImage();
00135 
00136   //! Free memory and switch to uninitialized state.
00137   /*! Note that it is \b NOT necessary to call this function to ensure
00138       proper cleanup, that will be done in the destructor by
00139       default. Rather, freeMem() is offered just as a performance
00140       optimization, to allow you to release a potentially large chunk
00141       of memory when you are finished using it. */
00142   inline void freeMem();
00143 
00144   //@}
00145 
00146   // ############################################################
00147   /*! @name Math operators */
00148   //! Math operators
00149   //@{
00150 
00151   //! Addition inplace by a host provided scalar
00152   inline CudaImage<float>& operator+=(const float val);
00153 
00154   //! Subtraction inplace by a host provided scalar
00155   inline CudaImage<float>& operator-=(const float val);
00156 
00157   //! Multiplication inplace by a host provided scalar
00158   inline CudaImage<float>& operator*=(const float val);
00159 
00160   //! Division inplace by a host provided scalar
00161   inline CudaImage<float>& operator/=(const float val);
00162 
00163   //! Addition inplace by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00164   inline CudaImage<float>& operator+=(const CudaImage<float>& im);
00165 
00166   //! Subtraction inplace by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00167   inline CudaImage<float>& operator-=(const CudaImage<float>& im);
00168 
00169   //! Multiplication inplace by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00170   inline CudaImage<float>& operator*=(const CudaImage<float>& im);
00171 
00172   //! Division inplace by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00173   inline CudaImage<float>& operator/=(const CudaImage<float>& im);
00174 
00175   //! Addition by a host provided scalar
00176   inline CudaImage<float> operator+(const float val) const;
00177 
00178   //! Subtraction by a host provided scalar
00179   inline CudaImage<float> operator-(const float val) const;
00180 
00181   //! Multiplcation by a host provided scalar
00182   inline CudaImage<float> operator*(const float val) const;
00183 
00184   //! Division by a host provided scalar
00185   inline CudaImage<float> operator/(const float val) const;
00186 
00187   //! Addition by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00188   inline CudaImage<float> operator+(const CudaImage<float>& im) const;
00189 
00190   //! Subtraction by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00191   inline CudaImage<float> operator-(const CudaImage<float>& im) const;
00192 
00193   //! Multiplication by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00194   inline CudaImage<float> operator*(const CudaImage<float>& im) const;
00195 
00196   //! Division by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00197   inline CudaImage<float> operator/(const CudaImage<float>& im) const;
00198   //@}
00199 
00200 
00201   // ############################################################
00202   /*! @name Memory management functions */
00203   //@{
00204 
00205   //! Swap the contents of two images
00206   inline void swap(CudaImage<T>& other);
00207 
00208 
00209   //! Return a new image object with a deep copy of the underlying data
00210   /*! This function is necessary for safe use of
00211       attach()/detach(). That is, unfortunately attach()/detach() are
00212       not safe for use with shared image objects -- consider the
00213       following code:
00214 
00215       \code
00216       double d[4] = { 0.0, 1.0, 2.0, 3.0};
00217 
00218       // create an CudaImage that is attach()'ed to the double array
00219       CudaImage<double> a;
00220       a.attach(&d[0], 2, 2);
00221 
00222       const CudaImage<double> b = a;
00223       // now 'b' thinks it has a safe lock on some const values:
00224 
00225       d[0] = -1.0;
00226       // OOPS! By changing values in the 'd' array directly, we'll now
00227       // have changed things to that b[0] == -1.0, even though 'b' was
00228       // declared as 'const'
00229       \endcode
00230 
00231       The solution to this problem is to prohibit the copy done in
00232       'b=a' above (this triggers an LFATAL() in
00233       ArrayData::acquire()). That assures us that any ArrayData that
00234       has a StoragePolicy of WRITE_THRU will be un-shareable.
00235 
00236       So, back to the point -- the correct way to write the code above
00237       would be to use deepcopy():
00238 
00239       \code
00240       double d[4] = { 0.0, 1.0, 2.0, 3.0};
00241 
00242       CudaImage<double> a;
00243       a.attach(&d[0], 2, 2);
00244 
00245       const CudaImage<double> b = a.deepcopy();
00246 
00247       d[0] = -1.0;
00248       // Now, 'b' is insulated from any changes to 'd' since we've
00249       // done a deep copy, so even now we'll still have b[0]==0.0
00250       \endcode
00251   */
00252   inline CudaImage<T> deepcopy() const;
00253 
00254   //! Return a new image w/ deep copy of underlying data using memory policy
00255   /*! This function is needed to allow new image generation to any
00256       memory area in the system, which is going to have to be a deep copy
00257    */
00258   inline CudaImage<T> deepcopy(const MemoryPolicy mp, const int dev) const;
00259 
00260   inline Image<T> exportToImage() const;
00261 
00262   //! Free mem and realloc new array (array contents are lost).
00263   /*! Use rescale() instead if you want to preserve image contents. */
00264   inline void resize(const Dims& dims, const bool clear = false);
00265 
00266   //! Free mem and realloc new array (array contents are lost).
00267   /*! Use rescale() instead if you want to preserve image contents. */
00268   inline void resize(const int width, const int height,
00269                      const bool clear = false);
00270 
00271   //@}
00272 
00273   // ############################################################
00274   /*! @name Access functions*/
00275   //@{
00276 
00277   //! Check whether image is non-empty (i.e., non-zero height and width).
00278   inline bool initialized() const;
00279 
00280   //! Get image size (width * height)
00281   inline int getSize() const;
00282 
00283   //! Get image size (width * height)
00284   inline uint size() const;
00285 
00286   //! Get image width
00287   inline int getWidth() const;
00288 
00289   //! Get image height
00290   inline int getHeight() const;
00291 
00292   //! Get image width+height in Dims struct
00293   inline const Dims& getDims() const;
00294 
00295   //! Get image bounds as a rectangle with upper-left point at (0,0) and dims matching the image dims
00296   inline Rectangle getBounds() const;
00297 
00298   //! Check if *this is the same size as the other thing
00299   /*! The other thing can be any type that exposes getHeight() and
00300       getWidth() */
00301   template <class C>
00302   inline bool isSameSize(const C& other) const;
00303 
00304   //! Check if the image is 1D, i.e., width == 1 or height == 1
00305   inline bool is1D() const;
00306 
00307   //! Check if the image is a vector, i.e., width == 1
00308   inline bool isVector() const;
00309 
00310   //! Check if the image is a transposed vector, i.e., height == 1
00311   inline bool isTransposedVector() const;
00312 
00313   //! Check if the image is square, i.e., width == height
00314   inline bool isSquare() const;
00315 
00316   //! Returns read-only (const) pointer to internal image array
00317   inline const T* getCudaArrayPtr() const;
00318 
00319   //! Returns read/write (non-const) pointer to internal image array
00320   inline T* getCudaArrayPtr();
00321 
00322   //! Test whether point falls inside array boundaries
00323   inline bool coordsOk(const Point2D<int>& P) const;
00324 
00325   //! Test whether point falls inside array boundaries
00326   inline bool coordsOk(const int i, const int j) const;
00327 
00328   //! Test whether point falls inside array boundaries
00329   /*! This test is intended to be used before you attempt a getValInterp() */
00330   inline bool coordsOk(const Point2D<float>& p) const;
00331 
00332   //! Test whether point falls inside array boundaries
00333   /*! This test is intended to be used before you attempt a getValInterp() */
00334   inline bool coordsOk(const float i, const float j) const;
00335 
00336   //! Test whether rectangle fits in image
00337   inline bool rectangleOk(const Rectangle& rect) const;
00338 
00339   //! Return memory policy of underlying array data
00340   inline MemoryPolicy getMemoryPolicy() const;
00341 
00342   //! Return memory policy of underlying array data
00343   inline int getMemoryDevice() const;
00344 
00345   //@}
00346 
00347   //! clear contents (or set to given value)
00348   inline void clear(const char& val = 0);
00349 
00350   //@}
00351 
00352   // ############################################################
00353   /*! @name Functions for testing/debugging only */
00354   //@{
00355 
00356   //! For testing/debugging only.
00357   bool hasSameData(const CudaImage<T>& b) const;
00358 
00359   //! For testing/debugging only.
00360   /*! Returns the current reference count. */
00361   long refCount() const throw();
00362 
00363   //! For testing/debugging only.
00364   /*! Check if the ArrayHandle is shared. */
00365   bool isShared() const throw();
00366 
00367   //@}
00368 
00369 private:
00370   // ############################################################
00371   // ##### Data:
00372   // ############################################################
00373   ArrayHandle<T> itsHdl;
00374   inline const ArrayData<T>& impl() const;
00375   inline ArrayData<T>& uniq();
00376 
00377 };
00378 
00379 // ######################################################################
00380 // ######################################################################
00381 // ######################################################################
00382 // FREE FUNCTIONS:
00383 // ######################################################################
00384 // ######################################################################
00385 // ######################################################################
00386 
00387 //! Return a new Image with the same data, but interpreted as a different shape
00388 /*! NOTE that this is very different from resize()! With reshape(), we
00389     are not changing the number of elements in the Image; rather we
00390     are just changing the way that the internal 1-D memory array is
00391     mapped to a logical 2-D Image. For example, you could reshape a
00392     10x5 Image to a 50x1 Image, or a 25x2 Image, or vice versa. You
00393     need to be aware the fact that Image uses a row-major storage
00394     format internally (i.e., as you step through the memory, you first
00395     traverse all the pixels in row 1, then all the pixels in row 2,
00396     etc.). So if you reshape a WxH Image to a WHx1 Image, your new 1-D
00397     Image will appear to have the rows of the original Image arranged
00398     end-to-end.
00399 
00400     Programmer note: In principle, we could support a reshape()
00401     operation that wouldn't require copying any data; in order to do
00402     that we'd need to split the Dims management out of ArrayData and
00403     ArrayHandle, so that a single ArrayData object could be used in
00404     multiple Image objects, each with potentially different Dims (but
00405     all with the same total number of pixels). However, for now we
00406     just use a simpler implementation which copies the data into the
00407     new Image.
00408 */
00409 template <class T>
00410 inline CudaImage<T> reshape(const CudaImage<T>& orig, const Dims& newdims)
00411 {
00412   ASSERT(orig.getDims().sz() == newdims.sz());
00413   return CudaImage<T>(orig.getCudaArrayPtr(), newdims, orig.getMemoryPolicy(), orig.getMemoryDevice(), orig.getMemoryPolicy(), orig.getMemoryDevice());
00414 }
00415 
00416 // ######################################################################
00417 // ######################################################################
00418 // ######################################################################
00419 // INLINE FUNCTIONS:
00420 // ######################################################################
00421 // ######################################################################
00422 // ######################################################################
00423 
00424 // ######################################################################
00425 // ##### Constructors & Destructors:
00426 // ######################################################################
00427 
00428 // ######################################################################
00429 template <class T> inline
00430 CudaImage<T>::CudaImage(const T* inarray, int width, int height, MemoryPolicy trgmp, int trgdev, const MemoryPolicy srcmp, const int srcdev) :
00431   itsHdl(new ArrayData<T>(Dims(width, height), inarray, trgmp, trgdev, srcmp, srcdev))
00432 {}
00433 
00434 // ######################################################################
00435 template <class T> inline
00436 CudaImage<T>::CudaImage(const T* inarray, const Dims& dims, MemoryPolicy trgmp, int trgdev, const MemoryPolicy srcmp, const int srcdev) :
00437   itsHdl(new ArrayData<T>(dims, inarray, trgmp, trgdev, srcmp, srcdev))
00438 {}
00439 
00440 // ######################################################################
00441 template <class T> inline
00442 CudaImage<T>::CudaImage(int width, int height, InitPolicy init, MemoryPolicy mp, int dev) :
00443   itsHdl(new ArrayData<T>(Dims(width, height), init, mp, dev))
00444 {}
00445 
00446 // ######################################################################
00447 template <class T> inline
00448 CudaImage<T>::CudaImage(const Dims& dims, const MemoryPolicy mp, const int dev) :
00449   itsHdl(new ArrayData<T>(dims, NO_INIT, mp, dev))
00450 {}
00451 
00452 // ######################################################################
00453 template <class T> inline
00454 CudaImage<T>::CudaImage(const Dims& dims, InitPolicy init,  const MemoryPolicy mp, const int dev) :
00455   itsHdl(new ArrayData<T>(dims, init, mp, dev))
00456 {}
00457 
00458 // ######################################################################
00459 template <class T> inline
00460 CudaImage<T>::CudaImage() :
00461   itsHdl(new ArrayData<T>())
00462 {}
00463 
00464 // ######################################################################
00465 template <class T> inline
00466 CudaImage<T>::CudaImage(const CudaImage<T>& A) :
00467   itsHdl(A.itsHdl)
00468 {}
00469 
00470 // ######################################################################
00471 template <class T> inline
00472 CudaImage<T>::CudaImage(const CudaImage<T>& A, const MemoryPolicy mp, const int dev) :
00473   itsHdl(new ArrayData<T>(A.getDims(), NO_INIT, mp,dev))
00474 {
00475   // Find out where src is sitting
00476   const MemoryPolicy srcmp = A.getMemoryPolicy();
00477   const int srcdev = A.getMemoryDevice();
00478   // How should we get the actual number of elements? dist from stop to aptr or size()?
00479   //int n = stop-aptr;
00480   int n = size();
00481   ArrayHelper<T>::copy_initialize(getCudaArrayPtr(),n,A.getCudaArrayPtr(),mp,dev,srcmp,srcdev);
00482 }
00483 
00484 template <class T> inline
00485 CudaImage<T>::CudaImage(const Image<T>& A, const MemoryPolicy mp, const int dev) :
00486   itsHdl(new ArrayData<T>(A.getDims(), NO_INIT, mp,dev))
00487 {
00488   // How should we get the actual number of elements? dist from stop to aptr or size()?
00489   //int n = stop-aptr;
00490   int n = size();
00491   ArrayHelper<T>::copy_initialize(getCudaArrayPtr(),n,A.getArrayPtr(),mp,dev,HOST_MEMORY,CUDA_HOST_DEVICE_NUM);
00492 }
00493 
00494 // ######################################################################
00495 template <class T> inline
00496 CudaImage<T>& CudaImage<T>::operator=(const CudaImage<T>& A)
00497 {
00498   CudaImage<T> A_copy( A );
00499   this->swap(A_copy);
00500   return *this;
00501 }
00502 
00503 template <class T> inline
00504 CudaImage<float>& CudaImage<T>::operator+=(const float val)
00505 {
00506   const int dev = this->getMemoryDevice();
00507   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00508 
00509   cuda_c_inplaceAddHostScalar(this->getCudaArrayPtr(), val, tile.sz(), this->size());
00510   return *this;
00511 }
00512 
00513 template <class T> inline
00514 CudaImage<float>& CudaImage<T>::operator-=(const float val)
00515 {
00516   const int dev = this->getMemoryDevice();
00517   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00518 
00519   cuda_c_inplaceSubtractHostScalar(this->getCudaArrayPtr(), val, tile.sz(), this->size());
00520   return *this;
00521 }
00522 
00523 template <class T> inline
00524 CudaImage<float>& CudaImage<T>::operator*=(const float val)
00525 {
00526   const int dev = this->getMemoryDevice();
00527   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00528 
00529   cuda_c_inplaceMultiplyHostScalar(this->getCudaArrayPtr(), val, tile.sz(), this->size());
00530   return *this;
00531 }
00532 
00533 template <class T> inline
00534 CudaImage<float>& CudaImage<T>::operator/=(const float val)
00535 {
00536   const int dev = this->getMemoryDevice();
00537   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00538 
00539   cuda_c_inplaceDivideHostScalar(this->getCudaArrayPtr(), val, tile.sz(), this->size());
00540   return *this;
00541 }
00542 
00543 // For math operations with CudaImage arguments, size()==1 CudaImages are treated as scalars to allow for
00544 // device side values to be applied as scalars
00545 
00546 template <class T> inline
00547 CudaImage<float>& CudaImage<T>::operator+=(const CudaImage<float>& im)
00548 {
00549   ASSERT(im.initialized());
00550   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00551   const int dev = this->getMemoryDevice();
00552   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00553 
00554   if(im.getSize() == 1)
00555   {
00556     cuda_c_inplaceAddScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());
00557   }
00558   else
00559   {
00560     ASSERT(this->isSameSize(im));
00561     cuda_c_inplaceAddImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());
00562   }
00563   return *this;
00564 }
00565 
00566 template <class T> inline
00567 CudaImage<float>& CudaImage<T>::operator-=(const CudaImage<float>& im)
00568 {
00569   ASSERT(im.initialized());
00570   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00571   const int dev = this->getMemoryDevice();
00572   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00573 
00574   if(im.getSize() == 1)
00575   {
00576     cuda_c_inplaceSubtractScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());
00577   }
00578   else
00579   {
00580     ASSERT(this->isSameSize(im));
00581     cuda_c_inplaceSubtractImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());
00582   }
00583   return *this;
00584 }
00585 
00586 template <class T> inline
00587 CudaImage<float>& CudaImage<T>::operator*=(const CudaImage<float>& im)
00588 {
00589   ASSERT(im.initialized());
00590   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00591   const int dev = this->getMemoryDevice();
00592   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00593 
00594   if(im.getSize() == 1)
00595   {
00596     cuda_c_inplaceMultiplyScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());
00597   }
00598   else
00599   {
00600     ASSERT(this->isSameSize(im));
00601     cuda_c_inplaceMultiplyImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());
00602   }
00603   return *this;
00604 }
00605 
00606 template <class T> inline
00607 CudaImage<float>& CudaImage<T>::operator/=(const CudaImage<float>& im)
00608 {
00609   ASSERT(im.initialized());
00610   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00611   const int dev = this->getMemoryDevice();
00612   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00613 
00614   if(im.getSize() == 1)
00615   {
00616     cuda_c_inplaceDivideScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());
00617   }
00618   else
00619   {
00620     ASSERT(this->isSameSize(im));
00621     cuda_c_inplaceDivideImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());
00622   }
00623   return *this;
00624 }
00625 
00626 
00627 template <class T> inline
00628 CudaImage<float> CudaImage<T>::operator+(const float val) const
00629 {
00630   const int dev = this->getMemoryDevice();
00631   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00632   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00633 
00634   cuda_c_addHostScalar(this->getCudaArrayPtr(), val, result.getCudaArrayPtr(), tile.sz(), this->size());
00635 
00636   return result;
00637 }
00638 
00639 template <class T> inline
00640 CudaImage<float> CudaImage<T>::operator-(const float val) const
00641 {
00642   const int dev = this->getMemoryDevice();
00643   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00644   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00645 
00646   cuda_c_subtractHostScalar(this->getCudaArrayPtr(), val, result.getCudaArrayPtr(), tile.sz(), this->size());
00647 
00648   return result;
00649 }
00650 
00651 template <class T> inline
00652 CudaImage<float> CudaImage<T>::operator*(const float val) const
00653 {
00654   const int dev = this->getMemoryDevice();
00655   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00656   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00657 
00658   cuda_c_multiplyHostScalar(this->getCudaArrayPtr(), val, result.getCudaArrayPtr(), tile.sz(), this->size());
00659 
00660   return result;
00661 }
00662 
00663 template <class T> inline
00664 CudaImage<float> CudaImage<T>::operator/(const float val) const
00665 {
00666   const int dev = this->getMemoryDevice();
00667   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00668   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00669 
00670   cuda_c_divideHostScalar(this->getCudaArrayPtr(), val, result.getCudaArrayPtr(), tile.sz(), this->size());
00671 
00672   return result;
00673 }
00674 
00675 
00676 // For math operations with CudaImage arguments, size()==1 CudaImages are treated as scalars to allow for
00677 // device side values to be applied as scalars
00678 
00679 template <class T> inline
00680 CudaImage<float> CudaImage<T>::operator+(const CudaImage<float>& im) const
00681 {
00682   ASSERT(im.initialized());
00683   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00684   const int dev = this->getMemoryDevice();
00685   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00686   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00687   if(im.getSize() == 1)
00688   {
00689     cuda_c_addScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());
00690   }
00691   else
00692   {
00693     ASSERT(this->isSameSize(im));
00694     cuda_c_addImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());
00695   }
00696   return result;
00697 }
00698 
00699 template <class T> inline
00700 CudaImage<float> CudaImage<T>::operator-(const CudaImage<float>& im) const
00701 {
00702   ASSERT(im.initialized());
00703   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00704   const int dev = this->getMemoryDevice();
00705   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00706   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00707   if(im.getSize() == 1)
00708   {
00709     cuda_c_subtractScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());
00710   }
00711   else
00712   {
00713     ASSERT(this->isSameSize(im));
00714     cuda_c_subtractImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());
00715   }
00716   return result;
00717 }
00718 
00719 template <class T> inline
00720 CudaImage<float> CudaImage<T>::operator*(const CudaImage<float>& im) const
00721 {
00722   ASSERT(im.initialized());
00723   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00724   const int dev = this->getMemoryDevice();
00725   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00726   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00727   if(im.getSize() == 1)
00728   {
00729     cuda_c_multiplyScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());
00730   }
00731   else
00732   {
00733     ASSERT(this->isSameSize(im));
00734     cuda_c_multiplyImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());
00735   }
00736   return result;
00737 }
00738 
00739 template <class T> inline
00740 CudaImage<float> CudaImage<T>::operator/(const CudaImage<float>& im) const
00741 {
00742   ASSERT(im.initialized());
00743   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00744   const int dev = this->getMemoryDevice();
00745   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00746   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00747   if(im.getSize() == 1)
00748   {
00749     cuda_c_divideScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());
00750   }
00751   else
00752   {
00753     ASSERT(this->isSameSize(im));
00754     cuda_c_divideImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());
00755   }
00756   return result;
00757 }
00758 
00759 // ######################################################################
00760 template <class T> inline
00761 CudaImage<T>::~CudaImage()
00762 { /* memory deallocation is handled by ArrayData's destructor */ }
00763 
00764 // ######################################################################
00765 template <class T> inline
00766 void CudaImage<T>::freeMem()
00767 {
00768   CudaImage<T> empty;
00769   this->swap(empty);
00770 }
00771 
00772 // ######################################################################
00773 // ##### Memory management functions:
00774 // ######################################################################
00775 
00776 // ######################################################################
00777 template <class T> inline
00778 void CudaImage<T>::swap(CudaImage<T>& other)
00779 {
00780   itsHdl.swap(other.itsHdl);
00781 }
00782 
00783 // ######################################################################
00784 template <class T> inline
00785 CudaImage<T> CudaImage<T>::deepcopy(const MemoryPolicy mp, const int dev) const
00786 {
00787   // see comment in class definition for why deepcopy() might be
00788   // needed
00789   return CudaImage<T>(this->getCudaArrayPtr(), this->getDims(), mp, dev, this->getMemoryPolicy(), this->getMemoryDevice());
00790 }
00791 
00792 
00793 template <class T> inline
00794 Image<T> CudaImage<T>::exportToImage() const
00795 {
00796   if(getMemoryPolicy() == HOST_MEMORY)
00797     return Image<T>(getCudaArrayPtr(), getDims());
00798   else
00799   {
00800     Image<T> ret(getDims(),NO_INIT);
00801     ArrayHelper<T>::copy_initialize(ret.getArrayPtr(),size(),getCudaArrayPtr(),HOST_MEMORY,CUDA_HOST_DEVICE_NUM,getMemoryPolicy(),getMemoryDevice());
00802     return ret;
00803   }
00804 }
00805 
00806 // ######################################################################
00807 template <class T> inline
00808 void CudaImage<T>::resize(const Dims& dims, const bool do_clear)
00809 {
00810   // This algorithm is slightly more convoluted than may appear necessary
00811   // at first, in order to be most efficient in the context of
00812   // copy-on-write. If the requested size is different than the current
00813   // size, it's very simple: we just make a new image of the right size,
00814   // clearing it if so requested. Otherwise, if we're keeping the same
00815   // size, we don't necessarily have to make a new data block; instead we
00816   // just delegate to clear(), which does the right thing depending on
00817   // whether we currently have a shared or unshared data block.
00818 
00819   if (dims != getDims())
00820     {
00821       ArrayHandle<T> resized(new ArrayData<T>(dims, do_clear ? ZEROS:NO_INIT, this->getMemoryPolicy(), this->getMemoryDevice()));
00822       itsHdl.swap(resized);
00823     }
00824   else // we're keeping the same size, so just clear() if necessary
00825     {
00826       if (do_clear) clear( 0 );
00827     }
00828 }
00829 
00830 // ######################################################################
00831 template <class T> inline
00832 void CudaImage<T>::resize(const int width, const int height, const bool do_clear)
00833 {
00834   resize(Dims(width, height), do_clear);
00835 }
00836 
00837 // ######################################################################
00838 // ##### Access functions:
00839 // ######################################################################
00840 
00841 // ######################################################################
00842 template <class T> inline
00843 bool CudaImage<T>::initialized() const
00844 { return getWidth() > 0 && getHeight() > 0; }
00845 
00846 // ######################################################################
00847 template <class T> inline
00848 int CudaImage<T>::getSize() const
00849 { return getDims().sz(); }
00850 
00851 // ######################################################################
00852 template <class T> inline
00853 uint CudaImage<T>::size() const
00854 { return getDims().sz(); }
00855 
00856 // ######################################################################
00857 template <class T> inline
00858 int CudaImage<T>::getWidth() const
00859 { return itsHdl.get().w(); }
00860 
00861 // ######################################################################
00862 template <class T> inline
00863 int CudaImage<T>::getHeight() const
00864 { return itsHdl.get().h(); }
00865 
00866 // ######################################################################
00867 template <class T> inline
00868 const Dims& CudaImage<T>::getDims() const
00869 { return itsHdl.get().dims(); }
00870 
00871 // ######################################################################
00872 template <class T> inline
00873 Rectangle CudaImage<T>::getBounds() const
00874 { return Rectangle(Point2D<int>(0,0), itsHdl.get().dims()); }
00875 
00876 // ######################################################################
00877 template <class T> template <class C> inline
00878 bool CudaImage<T>::isSameSize(const C& other) const
00879 { return getWidth() == other.getWidth() && getHeight() == other.getHeight(); }
00880 
00881 // ######################################################################
00882 template <class T> inline
00883 bool CudaImage<T>::is1D() const
00884 { return (getWidth() == 1) || (getHeight() == 1); }
00885 
00886 // ######################################################################
00887 template <class T> inline
00888 bool CudaImage<T>::isVector() const
00889 { return (getWidth() == 1); }
00890 
00891 // ######################################################################
00892 template <class T> inline
00893 bool CudaImage<T>::isTransposedVector() const
00894 { return (getHeight() == 1); }
00895 
00896 // ######################################################################
00897 template <class T> inline
00898 bool CudaImage<T>::isSquare() const
00899 { return (getWidth() == getHeight()); }
00900 
00901 // ######################################################################
00902 template <class T> inline
00903 const T* CudaImage<T>::getCudaArrayPtr() const
00904 {
00905   return impl().data();
00906 }
00907 
00908 // ######################################################################
00909 template <class T> inline
00910 T* CudaImage<T>::getCudaArrayPtr()
00911 {
00912   return uniq().dataw();
00913 }
00914 
00915 // ######################################################################
00916 template <class T> inline
00917 bool CudaImage<T>::coordsOk(const Point2D<int>& P) const
00918 {
00919   return (P.i >= 0 && P.j >= 0 && P.i < getWidth() && P.j < getHeight());
00920 }
00921 
00922 // ######################################################################
00923 template <class T> inline
00924 bool CudaImage<T>::coordsOk(const int i, const int j) const
00925 {
00926   return (i >= 0 && j >= 0 && i < getWidth() && j < getHeight());
00927 }
00928 
00929 // ######################################################################
00930 template <class T> inline
00931 bool CudaImage<T>::coordsOk(const Point2D<float>& p) const
00932 {
00933   return this->coordsOk(p.i, p.j);
00934 }
00935 
00936 // ######################################################################
00937 template <class T> inline
00938 bool CudaImage<T>::coordsOk(const float i, const float j) const
00939 {
00940   return (i >= 0.0F && j >= 0.0F &&
00941           i < float(getWidth() - 1) && j < float(getHeight() - 1));
00942 }
00943 
00944 // ######################################################################
00945 template <class T> inline
00946 bool CudaImage<T>::rectangleOk(const Rectangle& rect) const
00947 {
00948   return (rect.left() < getWidth() && rect.rightI() < getWidth() &&
00949           rect.top() < getHeight() && rect.bottomI() < getHeight() &&
00950           rect.left() >= 0 && rect.rightI() >= 0 &&
00951           rect.top() >= 0 && rect.bottomI() >= 0);
00952 }
00953 
00954 // ######################################################################
00955 template <class T> inline
00956 MemoryPolicy CudaImage<T>::getMemoryPolicy() const
00957 {
00958   return impl().getMemoryPolicy();
00959 }
00960 
00961 // ######################################################################
00962 template <class T> inline
00963 int CudaImage<T>::getMemoryDevice() const
00964 {
00965   return impl().getMemoryDevice();
00966 }
00967 
00968 
00969 // ######################################################################
00970 template <class T> inline
00971 void CudaImage<T>::clear(const char &val)
00972 {
00973   // Check if we have a shared implementation... if yes, then for
00974   // efficiency we should release our copy before doing the iterative
00975   // loop, which would otherwise unnecessarily make a unique copy of
00976   // the shared data for us, which we would then ceremoniously ignore
00977   // as we clear it to a new value.
00978   if (itsHdl.isShared())
00979     *this = CudaImage<T>(getDims(), NO_INIT, this->getMemoryPolicy(), this->getMemoryDevice());
00980 
00981   CudaDevices::memset(this->getCudaArrayPtr(),val,size()*sizeof(T),this->getMemoryDevice());
00982 }
00983 
00984 // ######################################################################
00985 // ##### Functions for testing/debugging only:
00986 // ######################################################################
00987 
00988 template <class T> inline
00989 bool CudaImage<T>::hasSameData(const CudaImage<T>& b) const
00990 { return itsHdl.hasSameData(b.itsHdl); }
00991 
00992 // ######################################################################
00993 template <class T> inline
00994 long CudaImage<T>::refCount() const throw() { return itsHdl.refCount(); }
00995 
00996 // ######################################################################
00997 template <class T> inline
00998 bool CudaImage<T>::isShared() const throw() { return itsHdl.isShared(); }
00999 
01000 // ######################################################################
01001 // ##### Private methods:
01002 // ######################################################################
01003 
01004 template <class T> inline
01005 const ArrayData<T>& CudaImage<T>::impl() const
01006 { return itsHdl.get(); }
01007 
01008 // ######################################################################
01009 template <class T> inline
01010 ArrayData<T>& CudaImage<T>::uniq()
01011 { return itsHdl.uniq(this->getMemoryPolicy(),this->getMemoryDevice()); }
01012 
01013 // ######################################################################
01014 /* So things look consistent in everyone's emacs... */
01015 /* Local Variables: */
01016 /* indent-tabs-mode: nil */
01017 /* End: */
01018 
01019 #endif