Main Page | Modules | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

CudaImage.H

Go to the documentation of this file.
00001 /*!@file CUDA/CudaImage.H An image template class meant for interacting with
00002   CUDA devices */
00003 
00004 // //////////////////////////////////////////////////////////////////// //
00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00006 // University of Southern California (USC) and the iLab at USC.         //
00007 // See http://iLab.usc.edu for information about this project.          //
00008 // //////////////////////////////////////////////////////////////////// //
00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00011 // in Visual Environments, and Applications'' by Christof Koch and      //
00012 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00013 // pending; application number 09/912,225 filed July 23, 2001; see      //
00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00015 // //////////////////////////////////////////////////////////////////// //
00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00017 //                                                                      //
00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00019 // redistribute it and/or modify it under the terms of the GNU General  //
00020 // Public License as published by the Free Software Foundation; either  //
00021 // version 2 of the License, or (at your option) any later version.     //
00022 //                                                                      //
00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00026 // PURPOSE.  See the GNU General Public License for more details.       //
00027 //                                                                      //
00028 // You should have received a copy of the GNU General Public License    //
00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00031 // Boston, MA 02111-1307 USA.                                           //
00032 // //////////////////////////////////////////////////////////////////// //
00033 //
00034 // Primary maintainer for this file: Laurent Itti <itti@usc.edu>
00035 // $HeadURL$
00036 // $Id$
00037 //
00038 
00039 #ifndef CUDAIMAGE_H_DEFINED
00040 #define CUDAIMAGE_H_DEFINED
00041 
00042 #include "Util/Assert.H"
00043 #include "Util/Promotions.H"
00044 #include "Image/ArrayData.H"
00045 #include "Image/Point2D.H"
00046 #include "Image/Rectangle.H"
00047 #include "Image/Image.H"
00048 #include "CUDA/wrap_c_cuda.h"
00049 #include <algorithm> // for std::min
00050 
00051 //! CudaImage template class
00052 /*! This is a image template class for CUDA devices that can handle grayscale
00053     as well as color or multispectral images. CudaImage methods should
00054     be instantiable for any type T that has the basic arithmetic
00055     operators. However, our CUDA processing is inherently float based, so 
00056     conversions may occur.  Note that some CudaImage functions will only 
00057     work with float types, and other will only work for composite types 
00058     such as PixRGB.
00059 */
00060 
00061 template <class T>
00062 class CudaImage
00063 {
00064 public:
00065 
00066   // ############################################################
00067   /*! @name Constructors, destructors, assignment */
00068   //@{
00069 
00070   //! Construct from C array using memory policy
00071   /*! Build from C array; an internal copy of the C array will be
00072       allocated, so the C array can (and should) be freed without
00073       affecting the Image. */
00074   inline CudaImage(const T* inarray, int width, int height, const MemoryPolicy mp, const int dev, const MemoryPolicy srcmp, const int srcdev);
00075 
00076   //! Construct from C array using memory policy
00077   /*! Build from C array; an internal copy of the C array will be
00078       allocated, so the C array can (and should) be freed without
00079       affecting the CudaImage. */
00080   inline CudaImage(const T* inarray, const Dims& dims, const MemoryPolicy mp, const int dev, const MemoryPolicy srcmp, const int srcdev);
00081 
00082   //! Allocates memory for given size, optionally zero-clear that memory, and set the memory policy
00083   inline CudaImage(int width, int height, InitPolicy init, const MemoryPolicy mp, const int dev);
00084 
00085   //! Constructor that only allocates memory for given size
00086   inline explicit CudaImage(const Dims& dims, const MemoryPolicy mp, const int dev);
00087 
00088   //! Constructor that only allocates memory for given size and init type
00089   inline explicit CudaImage(const Dims& dims, InitPolicy init, const MemoryPolicy mp, const int dev);
00090 
00091   //! Construct an empty (0-by-0) image (useful for arrays of CudaImages).
00092   inline CudaImage();
00093 
00094   //! Copy constructor
00095   /*! e.g.:
00096       \code
00097       CudaImage<byte> im(other);
00098       // or
00099       CudaImage<byte> im = other; // with other also of type CudaImage<byte>
00100       \endcode
00101   */
00102   inline CudaImage(const CudaImage<T>& A);
00103 
00104   //! Copy constructor to particular memory
00105   /*! e.g.:
00106       \code
00107       CudaImage<byte> im(other);
00108       // or
00109       CudaImage<byte> im = other; // with other also of type CudaImage<byte>
00110       \endcode
00111   */
00112   inline CudaImage(const CudaImage<T>& A, const MemoryPolicy mp, const int dev);
00113 
00114   //! Import copy constructor
00115   /*! e.g.:
00116       \code
00117       CudaImage<byte> im(other);
00118       // or
00119       CudaImage<byte> im = other; // with other of type CudaImage<float>
00120       \endcode
00121   */
00122   inline CudaImage(const Image<T>& A, const MemoryPolicy mp, const int dev);
00123 
00124   //! Assigment operator.
00125   /*! e.g.:
00126       \code
00127       CudaImage<byte> im1, im2; im2 = im1;
00128       \endcode
00129   */
00130   inline CudaImage<T>& operator=(const CudaImage<T>& A);
00131 
00132   //! Destructor
00133   inline ~CudaImage();
00134 
00135   //! Free memory and switch to uninitialized state.
00136   /*! Note that it is \b NOT necessary to call this function to ensure
00137       proper cleanup, that will be done in the destructor by
00138       default. Rather, freeMem() is offered just as a performance
00139       optimization, to allow you to release a potentially large chunk
00140       of memory when you are finished using it. */
00141   inline void freeMem();
00142 
00143   //@}
00144 
00145   // ############################################################
00146   /*! @name Math operators */
00147   //! Math operators
00148   //@{
00149 
00150   //! Addition inplace by a host provided scalar
00151   inline CudaImage<float>& operator+=(const float val);
00152 
00153   //! Subtraction inplace by a host provided scalar
00154   inline CudaImage<float>& operator-=(const float val);
00155 
00156   //! Multiplication inplace by a host provided scalar
00157   inline CudaImage<float>& operator*=(const float val);
00158 
00159   //! Division inplace by a host provided scalar
00160   inline CudaImage<float>& operator/=(const float val);
00161 
00162   //! Addition inplace by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00163   inline CudaImage<float>& operator+=(const CudaImage<float>& im);
00164 
00165   //! Subtraction inplace by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00166   inline CudaImage<float>& operator-=(const CudaImage<float>& im);
00167 
00168   //! Multiplication inplace by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00169   inline CudaImage<float>& operator*=(const CudaImage<float>& im);
00170 
00171   //! Division inplace by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00172   inline CudaImage<float>& operator/=(const CudaImage<float>& im);
00173 
00174   //! Addition by a host provided scalar
00175   inline CudaImage<float> operator+(const float val) const;
00176 
00177   //! Subtraction by a host provided scalar
00178   inline CudaImage<float> operator-(const float val) const;
00179 
00180   //! Multiplcation by a host provided scalar
00181   inline CudaImage<float> operator*(const float val) const;
00182 
00183   //! Division by a host provided scalar
00184   inline CudaImage<float> operator/(const float val) const;
00185 
00186   //! Addition by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00187   inline CudaImage<float> operator+(const CudaImage<float>& im) const;
00188 
00189   //! Subtraction by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00190   inline CudaImage<float> operator-(const CudaImage<float>& im) const;
00191 
00192   //! Multiplication by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00193   inline CudaImage<float> operator*(const CudaImage<float>& im) const;
00194 
00195   //! Division by a device image or scalar (a 1x1 CudaImage is treated as a scalar)
00196   inline CudaImage<float> operator/(const CudaImage<float>& im) const;
00197   //@}
00198 
00199 
00200   // ############################################################
00201   /*! @name Memory management functions */
00202   //@{
00203 
00204   //! Swap the contents of two images
00205   inline void swap(CudaImage<T>& other);
00206 
00207 
00208   //! Return a new image object with a deep copy of the underlying data
00209   /*! This function is necessary for safe use of
00210       attach()/detach(). That is, unfortunately attach()/detach() are
00211       not safe for use with shared image objects -- consider the
00212       following code:
00213 
00214       \code
00215       double d[4] = { 0.0, 1.0, 2.0, 3.0};
00216 
00217       // create an CudaImage that is attach()'ed to the double array
00218       CudaImage<double> a;
00219       a.attach(&d[0], 2, 2);
00220 
00221       const CudaImage<double> b = a;
00222       // now 'b' thinks it has a safe lock on some const values:
00223 
00224       d[0] = -1.0;
00225       // OOPS! By changing values in the 'd' array directly, we'll now
00226       // have changed things to that b[0] == -1.0, even though 'b' was
00227       // declared as 'const'
00228       \endcode
00229 
00230       The solution to this problem is to prohibit the copy done in
00231       'b=a' above (this triggers an LFATAL() in
00232       ArrayData::acquire()). That assures us that any ArrayData that
00233       has a StoragePolicy of WRITE_THRU will be un-shareable.
00234 
00235       So, back to the point -- the correct way to write the code above
00236       would be to use deepcopy():
00237 
00238       \code
00239       double d[4] = { 0.0, 1.0, 2.0, 3.0};
00240 
00241       CudaImage<double> a;
00242       a.attach(&d[0], 2, 2);
00243 
00244       const CudaImage<double> b = a.deepcopy();
00245 
00246       d[0] = -1.0;
00247       // Now, 'b' is insulated from any changes to 'd' since we've
00248       // done a deep copy, so even now we'll still have b[0]==0.0
00249       \endcode
00250   */
00251   inline CudaImage<T> deepcopy() const;
00252 
00253   //! Return a new image w/ deep copy of underlying data using memory policy
00254   /*! This function is needed to allow new image generation to any
00255       memory area in the system, which is going to have to be a deep copy
00256    */
00257   inline CudaImage<T> deepcopy(const MemoryPolicy mp, const int dev) const;
00258 
00259   inline Image<T> exportToImage() const;
00260 
00261   //! Free mem and realloc new array (array contents are lost).
00262   /*! Use rescale() instead if you want to preserve image contents. */
00263   inline void resize(const Dims& dims, const bool clear = false);
00264 
00265   //! Free mem and realloc new array (array contents are lost).
00266   /*! Use rescale() instead if you want to preserve image contents. */
00267   inline void resize(const int width, const int height,
00268                      const bool clear = false);
00269 
00270   //@}
00271 
00272   // ############################################################
00273   /*! @name Access functions*/
00274   //@{
00275 
00276   //! Check whether image is non-empty (i.e., non-zero height and width).
00277   inline bool initialized() const;
00278 
00279   //! Get image size (width * height)
00280   inline int getSize() const;
00281 
00282   //! Get image size (width * height)
00283   inline uint size() const;
00284 
00285   //! Get image width
00286   inline int getWidth() const;
00287 
00288   //! Get image height
00289   inline int getHeight() const;
00290 
00291   //! Get image width+height in Dims struct
00292   inline const Dims& getDims() const;
00293 
00294   //! Get image bounds as a rectangle with upper-left point at (0,0) and dims matching the image dims
00295   inline Rectangle getBounds() const;
00296 
00297   //! Check if *this is the same size as the other thing
00298   /*! The other thing can be any type that exposes getHeight() and
00299       getWidth() */
00300   template <class C>
00301   inline bool isSameSize(const C& other) const;
00302 
00303   //! Check if the image is 1D, i.e., width == 1 or height == 1
00304   inline bool is1D() const;
00305 
00306   //! Check if the image is a vector, i.e., width == 1
00307   inline bool isVector() const;
00308 
00309   //! Check if the image is a transposed vector, i.e., height == 1
00310   inline bool isTransposedVector() const;
00311 
00312   //! Check if the image is square, i.e., width == height
00313   inline bool isSquare() const;
00314 
00315   //! Returns read-only (const) pointer to internal image array
00316   inline const T* getCudaArrayPtr() const;
00317 
00318   //! Returns read/write (non-const) pointer to internal image array
00319   inline T* getCudaArrayPtr();
00320 
00321   //! Test whether point falls inside array boundaries
00322   inline bool coordsOk(const Point2D<int>& P) const;
00323 
00324   //! Test whether point falls inside array boundaries
00325   inline bool coordsOk(const int i, const int j) const;
00326 
00327   //! Test whether point falls inside array boundaries
00328   /*! This test is intended to be used before you attempt a getValInterp() */
00329   inline bool coordsOk(const Point2D<float>& p) const;
00330 
00331   //! Test whether point falls inside array boundaries
00332   /*! This test is intended to be used before you attempt a getValInterp() */
00333   inline bool coordsOk(const float i, const float j) const;
00334 
00335   //! Test whether rectangle fits in image
00336   inline bool rectangleOk(const Rectangle& rect) const;
00337 
00338   //! Return memory policy of underlying array data
00339   inline MemoryPolicy getMemoryPolicy() const;
00340 
00341   //! Return memory policy of underlying array data
00342   inline int getMemoryDevice() const;
00343 
00344   //@}
00345 
00346   //! clear contents (or set to given value)
00347   inline void clear(const char& val = 0);
00348 
00349   //@}
00350 
00351   // ############################################################
00352   /*! @name Functions for testing/debugging only */
00353   //@{
00354 
00355   //! For testing/debugging only.
00356   bool hasSameData(const CudaImage<T>& b) const;
00357 
00358   //! For testing/debugging only.
00359   /*! Returns the current reference count. */
00360   long refCount() const throw();
00361 
00362   //! For testing/debugging only.
00363   /*! Check if the ArrayHandle is shared. */
00364   bool isShared() const throw();
00365 
00366   //@}
00367 
00368 private:
00369   // ############################################################
00370   // ##### Data:
00371   // ############################################################
00372   ArrayHandle<T> itsHdl;
00373   inline const ArrayData<T>& impl() const;
00374   inline ArrayData<T>& uniq();
00375 
00376 };
00377 
00378 // ######################################################################
00379 // ######################################################################
00380 // ######################################################################
00381 // FREE FUNCTIONS:
00382 // ######################################################################
00383 // ######################################################################
00384 // ######################################################################
00385 
00386 //! Return a new Image with the same data, but interpreted as a different shape
00387 /*! NOTE that this is very different from resize()! With reshape(), we
00388     are not changing the number of elements in the Image; rather we
00389     are just changing the way that the internal 1-D memory array is
00390     mapped to a logical 2-D Image. For example, you could reshape a
00391     10x5 Image to a 50x1 Image, or a 25x2 Image, or vice versa. You
00392     need to be aware the fact that Image uses a row-major storage
00393     format internally (i.e., as you step through the memory, you first
00394     traverse all the pixels in row 1, then all the pixels in row 2,
00395     etc.). So if you reshape a WxH Image to a WHx1 Image, your new 1-D
00396     Image will appear to have the rows of the original Image arranged
00397     end-to-end.
00398 
00399     Programmer note: In principle, we could support a reshape()
00400     operation that wouldn't require copying any data; in order to do
00401     that we'd need to split the Dims management out of ArrayData and
00402     ArrayHandle, so that a single ArrayData object could be used in
00403     multiple Image objects, each with potentially different Dims (but
00404     all with the same total number of pixels). However, for now we
00405     just use a simpler implementation which copies the data into the
00406     new Image.
00407 */
00408 template <class T>
00409 inline CudaImage<T> reshape(const CudaImage<T>& orig, const Dims& newdims)
00410 {
00411   ASSERT(orig.getDims().sz() == newdims.sz());
00412   return CudaImage<T>(orig.getCudaArrayPtr(), newdims, orig.getMemoryPolicy(), orig.getMemoryDevice(), orig.getMemoryPolicy(), orig.getMemoryDevice());
00413 }
00414 
00415 // ######################################################################
00416 // ######################################################################
00417 // ######################################################################
00418 // INLINE FUNCTIONS:
00419 // ######################################################################
00420 // ######################################################################
00421 // ######################################################################
00422 
00423 // ######################################################################
00424 // ##### Constructors & Destructors:
00425 // ######################################################################
00426 
00427 // ######################################################################
00428 template <class T> inline
00429 CudaImage<T>::CudaImage(const T* inarray, int width, int height, MemoryPolicy trgmp, int trgdev, const MemoryPolicy srcmp, const int srcdev) :
00430   itsHdl(new ArrayData<T>(Dims(width, height), inarray, trgmp, trgdev, srcmp, srcdev))
00431 {}
00432 
00433 // ######################################################################
00434 template <class T> inline
00435 CudaImage<T>::CudaImage(const T* inarray, const Dims& dims, MemoryPolicy trgmp, int trgdev, const MemoryPolicy srcmp, const int srcdev) :
00436   itsHdl(new ArrayData<T>(dims, inarray, trgmp, trgdev, srcmp, srcdev))
00437 {}
00438 
00439 // ######################################################################
00440 template <class T> inline
00441 CudaImage<T>::CudaImage(int width, int height, InitPolicy init, MemoryPolicy mp, int dev) :
00442   itsHdl(new ArrayData<T>(Dims(width, height), init, mp, dev))
00443 {}
00444 
00445 // ######################################################################
00446 template <class T> inline
00447 CudaImage<T>::CudaImage(const Dims& dims, const MemoryPolicy mp, const int dev) :
00448   itsHdl(new ArrayData<T>(dims, NO_INIT, mp, dev))
00449 {}
00450 
00451 // ######################################################################
00452 template <class T> inline
00453 CudaImage<T>::CudaImage(const Dims& dims, InitPolicy init,  const MemoryPolicy mp, const int dev) :
00454   itsHdl(new ArrayData<T>(dims, init, mp, dev))
00455 {}
00456 
00457 // ######################################################################
00458 template <class T> inline
00459 CudaImage<T>::CudaImage() :
00460   itsHdl(new ArrayData<T>())
00461 {}
00462 
00463 // ######################################################################
00464 template <class T> inline
00465 CudaImage<T>::CudaImage(const CudaImage<T>& A) :
00466   itsHdl(A.itsHdl)
00467 {}
00468 
00469 // ######################################################################
00470 template <class T> inline
00471 CudaImage<T>::CudaImage(const CudaImage<T>& A, const MemoryPolicy mp, const int dev) :
00472   itsHdl(new ArrayData<T>(A.getDims(), NO_INIT, mp,dev))
00473 {
00474   // Find out where src is sitting
00475   const MemoryPolicy srcmp = A.getMemoryPolicy();
00476   const int srcdev = A.getMemoryDevice();
00477   // How should we get the actual number of elements? dist from stop to aptr or size()?
00478   //int n = stop-aptr;
00479   int n = size();
00480   ArrayHelper<T>::copy_initialize(getCudaArrayPtr(),n,A.getCudaArrayPtr(),mp,dev,srcmp,srcdev);
00481 }
00482 
00483 template <class T> inline 
00484 CudaImage<T>::CudaImage(const Image<T>& A, const MemoryPolicy mp, const int dev) :
00485   itsHdl(new ArrayData<T>(A.getDims(), NO_INIT, mp,dev))
00486 {
00487   // How should we get the actual number of elements? dist from stop to aptr or size()?
00488   //int n = stop-aptr;
00489   int n = size();
00490   ArrayHelper<T>::copy_initialize(getCudaArrayPtr(),n,A.getArrayPtr(),mp,dev,HOST_MEMORY,CUDA_HOST_DEVICE_NUM);
00491 }
00492 
00493 // ######################################################################
00494 template <class T> inline
00495 CudaImage<T>& CudaImage<T>::operator=(const CudaImage<T>& A)
00496 {
00497   CudaImage<T> A_copy( A );
00498   this->swap(A_copy);
00499   return *this;
00500 }
00501 
00502 template <class T> inline
00503 CudaImage<float>& CudaImage<T>::operator+=(const float val)
00504 {
00505   const int dev = this->getMemoryDevice();
00506   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00507 
00508   cuda_c_inplaceAddHostScalar(this->getCudaArrayPtr(), val, tile.sz(), this->size());
00509   return *this;
00510 }
00511 
00512 template <class T> inline
00513 CudaImage<float>& CudaImage<T>::operator-=(const float val)
00514 {
00515   const int dev = this->getMemoryDevice();
00516   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00517 
00518   cuda_c_inplaceSubtractHostScalar(this->getCudaArrayPtr(), val, tile.sz(), this->size());
00519   return *this;
00520 }
00521 
00522 template <class T> inline
00523 CudaImage<float>& CudaImage<T>::operator*=(const float val)
00524 {
00525   const int dev = this->getMemoryDevice();
00526   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00527 
00528   cuda_c_inplaceMultiplyHostScalar(this->getCudaArrayPtr(), val, tile.sz(), this->size());
00529   return *this;
00530 }
00531 
00532 template <class T> inline
00533 CudaImage<float>& CudaImage<T>::operator/=(const float val)
00534 {
00535   const int dev = this->getMemoryDevice();
00536   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00537 
00538   cuda_c_inplaceDivideHostScalar(this->getCudaArrayPtr(), val, tile.sz(), this->size());
00539   return *this;
00540 }
00541 
00542 // For math operations with CudaImage arguments, size()==1 CudaImages are treated as scalars to allow for 
00543 // device side values to be applied as scalars
00544 
00545 template <class T> inline
00546 CudaImage<float>& CudaImage<T>::operator+=(const CudaImage<float>& im)
00547 {
00548   ASSERT(im.initialized());
00549   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00550   const int dev = this->getMemoryDevice();
00551   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00552 
00553   if(im.getSize() == 1)
00554   {
00555     cuda_c_inplaceAddScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());
00556   }
00557   else
00558   {
00559     ASSERT(this->isSameSize(im));
00560     cuda_c_inplaceAddImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());      
00561   }
00562   return *this;
00563 }
00564 
00565 template <class T> inline
00566 CudaImage<float>& CudaImage<T>::operator-=(const CudaImage<float>& im)
00567 {
00568   ASSERT(im.initialized());
00569   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00570   const int dev = this->getMemoryDevice();
00571   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00572 
00573   if(im.getSize() == 1)
00574   {
00575     cuda_c_inplaceSubtractScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());
00576   }
00577   else
00578   {
00579     ASSERT(this->isSameSize(im));
00580     cuda_c_inplaceSubtractImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());      
00581   }
00582   return *this;
00583 }
00584 
00585 template <class T> inline
00586 CudaImage<float>& CudaImage<T>::operator*=(const CudaImage<float>& im)
00587 {
00588   ASSERT(im.initialized());
00589   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00590   const int dev = this->getMemoryDevice();
00591   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00592 
00593   if(im.getSize() == 1)
00594   {
00595     cuda_c_inplaceMultiplyScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());
00596   }
00597   else
00598   {
00599     ASSERT(this->isSameSize(im));
00600     cuda_c_inplaceMultiplyImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());      
00601   }
00602   return *this;
00603 }
00604 
00605 template <class T> inline
00606 CudaImage<float>& CudaImage<T>::operator/=(const CudaImage<float>& im)
00607 {
00608   ASSERT(im.initialized());
00609   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00610   const int dev = this->getMemoryDevice();
00611   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00612 
00613   if(im.getSize() == 1)
00614   {
00615     cuda_c_inplaceDivideScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());
00616   }
00617   else
00618   {
00619     ASSERT(this->isSameSize(im));
00620     cuda_c_inplaceDivideImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), tile.sz(), this->size());      
00621   }
00622   return *this;
00623 }
00624 
00625 
00626 template <class T> inline
00627 CudaImage<float> CudaImage<T>::operator+(const float val) const
00628 {
00629   const int dev = this->getMemoryDevice();
00630   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00631   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00632 
00633   cuda_c_addHostScalar(this->getCudaArrayPtr(), val, result.getCudaArrayPtr(), tile.sz(), this->size());
00634 
00635   return result;
00636 }
00637 
00638 template <class T> inline
00639 CudaImage<float> CudaImage<T>::operator-(const float val) const
00640 {
00641   const int dev = this->getMemoryDevice();
00642   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00643   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00644 
00645   cuda_c_subtractHostScalar(this->getCudaArrayPtr(), val, result.getCudaArrayPtr(), tile.sz(), this->size());
00646 
00647   return result;
00648 }
00649 
00650 template <class T> inline
00651 CudaImage<float> CudaImage<T>::operator*(const float val) const
00652 {
00653   const int dev = this->getMemoryDevice();
00654   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00655   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00656 
00657   cuda_c_multiplyHostScalar(this->getCudaArrayPtr(), val, result.getCudaArrayPtr(), tile.sz(), this->size());
00658 
00659   return result;
00660 }
00661 
00662 template <class T> inline
00663 CudaImage<float> CudaImage<T>::operator/(const float val) const
00664 {
00665   const int dev = this->getMemoryDevice();
00666   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00667   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00668 
00669   cuda_c_divideHostScalar(this->getCudaArrayPtr(), val, result.getCudaArrayPtr(), tile.sz(), this->size());
00670 
00671   return result;
00672 }
00673 
00674 
00675 // For math operations with CudaImage arguments, size()==1 CudaImages are treated as scalars to allow for 
00676 // device side values to be applied as scalars
00677 
00678 template <class T> inline
00679 CudaImage<float> CudaImage<T>::operator+(const CudaImage<float>& im) const
00680 {
00681   ASSERT(im.initialized());
00682   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00683   const int dev = this->getMemoryDevice();
00684   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00685   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00686   if(im.getSize() == 1)
00687   {
00688     cuda_c_addScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());
00689   }
00690   else
00691   {
00692     ASSERT(this->isSameSize(im));
00693     cuda_c_addImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());      
00694   }
00695   return result;
00696 }
00697 
00698 template <class T> inline
00699 CudaImage<float> CudaImage<T>::operator-(const CudaImage<float>& im) const
00700 {
00701   ASSERT(im.initialized());
00702   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00703   const int dev = this->getMemoryDevice();
00704   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00705   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00706   if(im.getSize() == 1)
00707   {
00708     cuda_c_subtractScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());
00709   }
00710   else
00711   {
00712     ASSERT(this->isSameSize(im));
00713     cuda_c_subtractImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());      
00714   }
00715   return result;
00716 }
00717 
00718 template <class T> inline
00719 CudaImage<float> CudaImage<T>::operator*(const CudaImage<float>& im) const
00720 {
00721   ASSERT(im.initialized());
00722   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00723   const int dev = this->getMemoryDevice();
00724   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00725   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00726   if(im.getSize() == 1)
00727   {
00728     cuda_c_multiplyScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());
00729   }
00730   else
00731   {
00732     ASSERT(this->isSameSize(im));
00733     cuda_c_multiplyImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());      
00734   }
00735   return result;
00736 }
00737 
00738 template <class T> inline
00739 CudaImage<float> CudaImage<T>::operator/(const CudaImage<float>& im) const
00740 {
00741   ASSERT(im.initialized());
00742   ASSERT(this->getMemoryDevice() == im.getMemoryDevice());
00743   const int dev = this->getMemoryDevice();
00744   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00745   CudaImage<float> result(this->getDims(),NO_INIT,this->getMemoryPolicy(),dev);
00746   if(im.getSize() == 1)
00747   {
00748     cuda_c_divideScalar(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());
00749   }
00750   else
00751   {
00752     ASSERT(this->isSameSize(im));
00753     cuda_c_divideImages(this->getCudaArrayPtr(), im.getCudaArrayPtr(), result.getCudaArrayPtr(), tile.sz(), this->size());      
00754   }
00755   return result;
00756 }
00757 
00758 // ######################################################################
00759 template <class T> inline
00760 CudaImage<T>::~CudaImage()
00761 { /* memory deallocation is handled by ArrayData's destructor */ }
00762 
00763 // ######################################################################
00764 template <class T> inline
00765 void CudaImage<T>::freeMem()
00766 {
00767   CudaImage<T> empty;
00768   this->swap(empty);
00769 }
00770 
00771 // ######################################################################
00772 // ##### Memory management functions:
00773 // ######################################################################
00774 
00775 // ######################################################################
00776 template <class T> inline
00777 void CudaImage<T>::swap(CudaImage<T>& other)
00778 {
00779   itsHdl.swap(other.itsHdl);
00780 }
00781 
00782 // ######################################################################
00783 template <class T> inline
00784 CudaImage<T> CudaImage<T>::deepcopy(const MemoryPolicy mp, const int dev) const
00785 {
00786   // see comment in class definition for why deepcopy() might be
00787   // needed
00788   return CudaImage<T>(this->getCudaArrayPtr(), this->getDims(), mp, dev, this->getMemoryPolicy(), this->getMemoryDevice());
00789 }
00790 
00791 
00792 template <class T> inline 
00793 Image<T> CudaImage<T>::exportToImage() const
00794 {
00795   if(getMemoryPolicy() == HOST_MEMORY)
00796     return Image<T>(getCudaArrayPtr(), getDims());
00797   else
00798   {
00799     Image<T> ret(getDims(),NO_INIT);
00800     ArrayHelper<T>::copy_initialize(ret.getArrayPtr(),size(),getCudaArrayPtr(),HOST_MEMORY,CUDA_HOST_DEVICE_NUM,getMemoryPolicy(),getMemoryDevice());
00801     return ret;
00802   }
00803 }
00804 
00805 // ######################################################################
00806 template <class T> inline
00807 void CudaImage<T>::resize(const Dims& dims, const bool do_clear)
00808 {
00809   // This algorithm is slightly more convoluted than may appear necessary
00810   // at first, in order to be most efficient in the context of
00811   // copy-on-write. If the requested size is different than the current
00812   // size, it's very simple: we just make a new image of the right size,
00813   // clearing it if so requested. Otherwise, if we're keeping the same
00814   // size, we don't necessarily have to make a new data block; instead we
00815   // just delegate to clear(), which does the right thing depending on
00816   // whether we currently have a shared or unshared data block.
00817 
00818   if (dims != getDims())
00819     {
00820       ArrayHandle<T> resized(new ArrayData<T>(dims, do_clear ? ZEROS:NO_INIT, this->getMemoryPolicy(), this->getMemoryDevice()));
00821       itsHdl.swap(resized);
00822     }
00823   else // we're keeping the same size, so just clear() if necessary
00824     {
00825       if (do_clear) clear( 0 );
00826     }
00827 }
00828 
00829 // ######################################################################
00830 template <class T> inline
00831 void CudaImage<T>::resize(const int width, const int height, const bool do_clear)
00832 {
00833   resize(Dims(width, height), do_clear);
00834 }
00835 
00836 // ######################################################################
00837 // ##### Access functions:
00838 // ######################################################################
00839 
00840 // ######################################################################
00841 template <class T> inline
00842 bool CudaImage<T>::initialized() const
00843 { return getWidth() > 0 && getHeight() > 0; }
00844 
00845 // ######################################################################
00846 template <class T> inline
00847 int CudaImage<T>::getSize() const
00848 { return getDims().sz(); }
00849 
00850 // ######################################################################
00851 template <class T> inline
00852 uint CudaImage<T>::size() const
00853 { return getDims().sz(); }
00854 
00855 // ######################################################################
00856 template <class T> inline
00857 int CudaImage<T>::getWidth() const
00858 { return itsHdl.get().w(); }
00859 
00860 // ######################################################################
00861 template <class T> inline
00862 int CudaImage<T>::getHeight() const
00863 { return itsHdl.get().h(); }
00864 
00865 // ######################################################################
00866 template <class T> inline
00867 const Dims& CudaImage<T>::getDims() const
00868 { return itsHdl.get().dims(); }
00869 
00870 // ######################################################################
00871 template <class T> inline
00872 Rectangle CudaImage<T>::getBounds() const
00873 { return Rectangle(Point2D<int>(0,0), itsHdl.get().dims()); }
00874 
00875 // ######################################################################
00876 template <class T> template <class C> inline
00877 bool CudaImage<T>::isSameSize(const C& other) const
00878 { return getWidth() == other.getWidth() && getHeight() == other.getHeight(); }
00879 
00880 // ######################################################################
00881 template <class T> inline
00882 bool CudaImage<T>::is1D() const
00883 { return (getWidth() == 1) || (getHeight() == 1); }
00884 
00885 // ######################################################################
00886 template <class T> inline
00887 bool CudaImage<T>::isVector() const
00888 { return (getWidth() == 1); }
00889 
00890 // ######################################################################
00891 template <class T> inline
00892 bool CudaImage<T>::isTransposedVector() const
00893 { return (getHeight() == 1); }
00894 
00895 // ######################################################################
00896 template <class T> inline
00897 bool CudaImage<T>::isSquare() const
00898 { return (getWidth() == getHeight()); }
00899 
00900 // ######################################################################
00901 template <class T> inline
00902 const T* CudaImage<T>::getCudaArrayPtr() const
00903 {
00904   return impl().data();
00905 }
00906 
00907 // ######################################################################
00908 template <class T> inline
00909 T* CudaImage<T>::getCudaArrayPtr()
00910 {
00911   return uniq().dataw();
00912 }
00913 
00914 // ######################################################################
00915 template <class T> inline
00916 bool CudaImage<T>::coordsOk(const Point2D<int>& P) const
00917 {
00918   return (P.i >= 0 && P.j >= 0 && P.i < getWidth() && P.j < getHeight());
00919 }
00920 
00921 // ######################################################################
00922 template <class T> inline
00923 bool CudaImage<T>::coordsOk(const int i, const int j) const
00924 {
00925   return (i >= 0 && j >= 0 && i < getWidth() && j < getHeight());
00926 }
00927 
00928 // ######################################################################
00929 template <class T> inline
00930 bool CudaImage<T>::coordsOk(const Point2D<float>& p) const
00931 {
00932   return this->coordsOk(p.i, p.j);
00933 }
00934 
00935 // ######################################################################
00936 template <class T> inline
00937 bool CudaImage<T>::coordsOk(const float i, const float j) const
00938 {
00939   return (i >= 0.0F && j >= 0.0F &&
00940           i < float(getWidth() - 1) && j < float(getHeight() - 1));
00941 }
00942 
00943 // ######################################################################
00944 template <class T> inline
00945 bool CudaImage<T>::rectangleOk(const Rectangle& rect) const
00946 {
00947   return (rect.left() < getWidth() && rect.rightI() < getWidth() &&
00948           rect.top() < getHeight() && rect.bottomI() < getHeight() &&
00949           rect.left() >= 0 && rect.rightI() >= 0 &&
00950           rect.top() >= 0 && rect.bottomI() >= 0);
00951 }
00952 
00953 // ######################################################################
00954 template <class T> inline
00955 MemoryPolicy CudaImage<T>::getMemoryPolicy() const
00956 {
00957   return impl().getMemoryPolicy();
00958 }
00959 
00960 // ######################################################################
00961 template <class T> inline
00962 int CudaImage<T>::getMemoryDevice() const
00963 {
00964   return impl().getMemoryDevice();
00965 }
00966 
00967 
00968 // ######################################################################
00969 template <class T> inline
00970 void CudaImage<T>::clear(const char &val)
00971 {
00972   // Check if we have a shared implementation... if yes, then for
00973   // efficiency we should release our copy before doing the iterative
00974   // loop, which would otherwise unnecessarily make a unique copy of
00975   // the shared data for us, which we would then ceremoniously ignore
00976   // as we clear it to a new value.
00977   if (itsHdl.isShared())
00978     *this = CudaImage<T>(getDims(), NO_INIT, this->getMemoryPolicy(), this->getMemoryDevice());
00979 
00980   CudaDevices::memset(this->getCudaArrayPtr(),val,size()*sizeof(T),this->getMemoryDevice());
00981 }
00982 
00983 // ######################################################################
00984 // ##### Functions for testing/debugging only:
00985 // ######################################################################
00986 
00987 template <class T> inline
00988 bool CudaImage<T>::hasSameData(const CudaImage<T>& b) const
00989 { return itsHdl.hasSameData(b.itsHdl); }
00990 
00991 // ######################################################################
00992 template <class T> inline
00993 long CudaImage<T>::refCount() const throw() { return itsHdl.refCount(); }
00994 
00995 // ######################################################################
00996 template <class T> inline
00997 bool CudaImage<T>::isShared() const throw() { return itsHdl.isShared(); }
00998 
00999 // ######################################################################
01000 // ##### Private methods:
01001 // ######################################################################
01002 
01003 template <class T> inline
01004 const ArrayData<T>& CudaImage<T>::impl() const
01005 { return itsHdl.get(); }
01006 
01007 // ######################################################################
01008 template <class T> inline
01009 ArrayData<T>& CudaImage<T>::uniq()
01010 { return itsHdl.uniq(this->getMemoryPolicy(),this->getMemoryDevice()); }
01011 
01012 // ######################################################################
01013 /* So things look consistent in everyone's emacs... */
01014 /* Local Variables: */
01015 /* indent-tabs-mode: nil */
01016 /* End: */
01017 
01018 #endif

Generated on Mon Nov 23 15:45:37 2009 for iLab Neuromorphic Vision Toolkit by  doxygen 1.4.4