ArrayData.H

Go to the documentation of this file.
00001 /*!@file Image/ArrayData.H The guts of the 2D ref-counted copy-on-write array
00002     implementation */
00003 
00004 // //////////////////////////////////////////////////////////////////// //
00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00006 // University of Southern California (USC) and the iLab at USC.         //
00007 // See http://iLab.usc.edu for information about this project.          //
00008 // //////////////////////////////////////////////////////////////////// //
00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00011 // in Visual Environments, and Applications'' by Christof Koch and      //
00012 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00013 // pending; application number 09/912,225 filed July 23, 2001; see      //
00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00015 // //////////////////////////////////////////////////////////////////// //
00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00017 //                                                                      //
00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00019 // redistribute it and/or modify it under the terms of the GNU General  //
00020 // Public License as published by the Free Software Foundation; either  //
00021 // version 2 of the License, or (at your option) any later version.     //
00022 //                                                                      //
00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00026 // PURPOSE.  See the GNU General Public License for more details.       //
00027 //                                                                      //
00028 // You should have received a copy of the GNU General Public License    //
00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00031 // Boston, MA 02111-1307 USA.                                           //
00032 // //////////////////////////////////////////////////////////////////// //
00033 //
00034 // Primary maintainer for this file: Rob Peters <rjpeters@klab.caltech.edu>
00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Image/ArrayData.H $
00036 // $Id: ArrayData.H 13229 2010-04-15 01:56:52Z dparks $
00037 //
00038 
00039 #ifndef ARRAYDATA_H_DEFINED
00040 #define ARRAYDATA_H_DEFINED
00041 
00042 #include "Image/Dims.H"
00043 #include "Util/Alloc.H" // for invt_allocate()/invt_deallocate()
00044 #include "Util/TypeTraits.H"
00045 #include "Util/log.H"
00046 #include "rutz/atomic.h"
00047 
00048 #include <cstring> // for memset() and memcpy()
00049 #include <new> // for placement new, in ArrayData's ctors
00050 
00051 #ifdef INVT_USE_CUDA
00052 #include "CUDA/CudaAlloc.H"
00053 #include "CUDA/CudaDevices.H"
00054 #endif
00055 
00056 //! Specifies what to do with the T* used to init an ArrayData.
00057 enum StoragePolicy
00058   {
00059     //! Make an initial copy of the data, then copy-on-write as usual.
00060     MAKE_OWN_COPY,
00061 
00062     //! Borrow the data, with write-through.
00063     /*! Writing to the Array will modify the T[] that initialized it. */
00064     WRITE_THRU
00065   };
00066 
00067 //! Specifies how to initialize the pixels of a new image.
00068 enum InitPolicy
00069   {
00070     //! All elements are initialized to the pixel type's zero or default value.
00071     ZEROS,
00072 
00073     //! Pixels are only minimally initialized.
00074     /*! For trivial types, this means the pixel values are not
00075         initialized at all; for non-trivial user-defined types, this
00076         means each pixel is copy-constructed from the type's default
00077         value. */
00078     NO_INIT
00079   };
00080 
00081 //! Specifies where on a particular device the memory is stored
00082 enum MemoryPolicy
00083   {
00084     //! On GPU, global (to a particular set of kernels on a single device)
00085     GLOBAL_DEVICE_MEMORY,
00086     //! On GPU, located in texture memory
00087     TEXTURE_DEVICE_MEMORY,
00088     //! On CPU/Host
00089     HOST_MEMORY,
00090   };
00091 
00092 //! Device id for the host, to distinguish from CUDA devices
00093 #define CUDA_HOST_DEVICE_NUM -1
00094 
00095 
00096 //! Function to wrap use of invt_allocate to allow for CUDA memory policies
00097 inline void* wrap_invt_allocate(size_t user_nbytes, const MemoryPolicy mp, const int dev)
00098   {
00099 #ifdef INVT_USE_CUDA
00100     switch(mp)
00101     {
00102     case GLOBAL_DEVICE_MEMORY:
00103     case TEXTURE_DEVICE_MEMORY:
00104       return cuda_invt_allocate(user_nbytes,dev);
00105     case HOST_MEMORY:
00106       return invt_allocate(user_nbytes);
00107     default:
00108       LFATAL("Invalid memory policy %d",mp);
00109       return NULL;
00110     }
00111     LFATAL("Should be impossible to get here");
00112     return NULL;
00113 #else
00114     // Optimization, don't look at memory policy
00115     return invt_allocate(user_nbytes);
00116 #endif
00117   }
00118 
00119 //! Function to wrap use of invt_deallocate to allow for CUDA memory policies
00120 inline void wrap_invt_deallocate(void *mem, const MemoryPolicy mp, const int dev, const size_t nBytes)
00121   {
00122 #ifdef INVT_USE_CUDA
00123     switch(mp)
00124     {
00125     case GLOBAL_DEVICE_MEMORY:
00126     case TEXTURE_DEVICE_MEMORY:
00127       cuda_invt_deallocate(mem, dev,nBytes);
00128       return;
00129     case HOST_MEMORY:
00130       invt_deallocate(mem);
00131       return;
00132     }
00133 #else
00134     // Optimization, don't look at memory policy
00135     invt_deallocate(mem);
00136 #endif
00137   }
00138 
00139 
00140 void check_acquisition(StoragePolicy s, int count) throw();
00141 
00142 
00143 
00144 //! Implements a ref-counted 2-D array, should only be used via ArrayHandle.
00145 /*! ArrayData implements a 2-D array, by storing:
00146       - a Dims representing the width+height of the array,
00147       - a pointer to the dynamically allocated memory where the array
00148         data are stored,
00149       - a reference count, and
00150       - a StoragePolicy, indicating whether the ArrayData object
00151         "owns" the memory and should delete[] it eventually (in the
00152         case of MAKE_OWN_COPY), or whether someone else "owns" the
00153         memory and will eventually delete[] it themselves (in the case
00154         of WRITE_THRU)
00155 
00156     All of these data members are immutable, with the exception of the
00157     reference count. This reflects the semantics that an ArrayData
00158     object is a fixed-size block of data; if a higher-level clients
00159     wants to resize(), it does so by creating a new ArrayData of the
00160     desired size, and releasing the old ArrayData. This simplifies the
00161     memory management: within ArrayData, memory is new[]'ed only in
00162     the constructor(s), and delete[]'ed only in the destructor.
00163 
00164     ArrayData provides the basics needed to use STL-style iterator
00165     algorithms.  In particular it offers two pairs of functions to get
00166     half-open iterator ranges:
00167 
00168       - data() and end() for read-only iteration, and
00169       - dataw() and endw() for read-write iteration (where the "w"
00170         means "write" or "writable").
00171 */
00172 template <class T>
00173 class ArrayData
00174 {
00175 private:
00176   ArrayData(const ArrayData&); // not allowed; use clone() instead
00177   ArrayData& operator=(const ArrayData&); // not allowed; use clone() instead
00178 
00179 public:
00180 
00181   // ############################################################
00182   // ##### Constructors
00183   // ############################################################
00184 
00185   //! Default construct an empty (i.e. 0-by-0) array.
00186   inline ArrayData();
00187 
00188   //! Construct with the specified dimensions and InitPolicy.
00189   inline ArrayData(const Dims& dims, const InitPolicy ip = NO_INIT);
00190 
00191   //! Construct with the specified dimensions and InitPolicy and Memory Policy
00192   inline ArrayData(const Dims& dims, const InitPolicy ip, const MemoryPolicy mp, const int device);
00193 
00194   //! Construct with the specified dimensions.
00195   /*! If the passed-in T* array is non-null, copy that data into the
00196       constructed object. */
00197   inline ArrayData(const Dims& dims, const T* aa);
00198 
00199   //! Construct with the specified dimensions, input data, and memory policy
00200   /*! If the passed-in T* array is non-null, copy that data into the
00201       constructed object.  Use the given memory policy */
00202   inline ArrayData(const Dims& dims, const T* aa, const MemoryPolicy mp, const int device, const MemoryPolicy srcmp, const int srcdevice);
00203 
00204   //! Construct with the specified dimensions and storage policy.
00205   /*! If the passed-in T* array is non-null, then if the storage
00206       policy is MAKE_OWN_COPY we make a copy of the data, but if the
00207       storage policy is WRITE_THRU then we "borrow" the data. */
00208   inline ArrayData(const Dims& dims, T* aa, const StoragePolicy s);
00209 
00210   //! Construct with the specified dimensions, storage and memory policies.
00211   /*! If the passed-in T* array is non-null, then if the storage
00212       policy is MAKE_OWN_COPY we make a copy of the data, but if the
00213       storage policy is WRITE_THRU then we "borrow" the data. */
00214   inline ArrayData(const Dims& dims, T* aa, const StoragePolicy s, const MemoryPolicy mp, const int device, const MemoryPolicy srcmp, const int srcdevice);
00215 
00216   //! Return a new'ed copy of *this.
00217   /*! The cloned copy will always be MAKE_OWN_COPY, even if *this is
00218       WRITE_THRU. */
00219   inline ArrayData* clone() const;
00220 
00221   //! Return a new'ed copy of *this using the given memory policy
00222   /*! The cloned copy will always be MAKE_OWN_COPY, even if *this is
00223       WRITE_THRU. */
00224   inline ArrayData* clone(const MemoryPolicy mp, const int device) const;
00225 
00226   //! Release the pointed-to data if our storage policy is MAKE_OWN_COPY.
00227   inline ~ArrayData() throw();
00228 
00229   // ############################################################
00230   // ##### Accessors
00231   // ############################################################
00232 
00233   //! Returns a read-only pointer to the data array.
00234   /*! With end(), forms an STL-style half-open const_iterator range. */
00235   inline const T* data() const throw();
00236 
00237   //! Returns a read-only pointer to one-past-the-end of the data array.
00238   /*! With data(), forms an STL-style half-open const_iterator range. */
00239   inline const T* end() const throw();
00240 
00241   //! Returns a read-write pointer to the data array (the "w" means "write").
00242   /*! With endw(), forms an STL-style half-open iterator range. */
00243   inline T* dataw() throw();
00244 
00245   //! Returns a read-write pointer to one-past-the-end of the data array.
00246   /*! With dataw(), forms an STL-style half-open iterator range. */
00247   inline T* endw() throw();
00248 
00249   //! Returns the dimensions (i.e. width, height) of the 2-D data array.
00250   inline const Dims& dims() const throw();
00251 
00252   //! Returns the width of the data array (i.e. the number of columns).
00253   inline int w() const throw();
00254 
00255   //! Returns the height of the data array (i.e. the number of rows).
00256   inline int h() const throw();
00257 
00258   // ############################################################
00259   // ##### Reference count manipulation
00260   // ############################################################
00261 
00262   //! Increment the reference count.
00263   inline void acquire() throw();
00264 
00265   //! Decrement the reference count and delete self if the count goes to zero.
00266   inline void release() throw();
00267 
00268   //! Query whether this object is shared (i.e. ref count > 1).
00269   inline bool isShared() const throw();
00270 
00271   //! For debugging/testing only.
00272   /*! Returns the current reference count. */
00273   inline int refCount() const throw();
00274 
00275   //! Return memory policy of underlying array data
00276   inline MemoryPolicy getMemoryPolicy() const;
00277 
00278   //! Return memory policy of underlying array data
00279   inline int getMemoryDevice() const;
00280 
00281 
00282 private:
00283   rutz::atomic_int_t itsRefCount;
00284   MemoryPolicy const itsMemoryPolicy;  // Type of memory that is being used (global,texture,host)
00285   StoragePolicy const itsStoragePolicy;
00286   int const itsDevice;          // CUDA device (or host) where memory is located
00287 
00288   Dims const itsDims;           // width+height of data array
00289   T* const itsData;             // data array
00290 };
00291 
00292 
00293 //! This class provides ref-counting and copy-on-write for ArrayData's.
00294 /*! NOTE: Most clients will not need to use ArrayHandle<T> directly,
00295     but can rather use Image<T>, which wraps and expands the
00296     ArrayHandle interface. Nevertheless, these notes may be of
00297     interest because the ref-counting and copy-on-write semantics that
00298     ArrayHandle<T> offers are transferred through to Image<T>.
00299 
00300     As long as clients access ArrayData<T> through an ArrayHandle<T>,
00301     the proper copy-on-write semantics are guaranteed. This is by
00302     virtue of the fact that the only way to extract a non-const
00303     ArrayData<T>& from an ArrayHandle<T> is to call uniq(), which in
00304     turn checks to see if the ArrayData is shared (i.e. ref count >
00305     1). If it is shared, then that ArrayData is clone()'d before
00306     returning a reference to it.
00307 
00308     The only caveat: you should not store non-const references to an
00309     ArrayData or to the elements of an ArrayData object for "long"
00310     periods of time. In this case, "long" means "longer than you are
00311     sure you have a unique reference", which in practice means "you
00312     can keep such references up until a new copy of the ArrayHandle<T>
00313     is made". This goes along with sensible coding practices anyway; a
00314     typical usage pattern would be:
00315 
00316       -# you have an ArrayHandle<T> (or an Image<T>, which holds an
00317          ArrayHandle<T>) that you want to modify
00318       -# get a pair of iterator's for that object so that you can
00319          perform your series of write operations
00320       -# don't use those iterators anymore, and then you can safely
00321          make copies of the freshly-modified ArrayHandle<T> or
00322          Image<T> object (for example to return a copy of it as the
00323          return value from a function)
00324 */
00325 template<class T>
00326 class ArrayHandle
00327 {
00328 public:
00329   //! Default construct with an empty array (i.e. 0-by-0).
00330   inline ArrayHandle();
00331 
00332   //! Construct with a new ArrayData, which the ArrayHandle now owns.
00333   inline explicit ArrayHandle(const ArrayData<T>* p) throw();
00334 
00335   //! Construct with a new ArrayData, which the ArrayHandle now owns.
00336   inline explicit ArrayHandle(const ArrayData<T>* p, const MemoryPolicy mp, const int dev) throw();
00337 
00338   //! Copy construct; no deep-copy is incurred, just bumps the ref-count.
00339   inline ArrayHandle(const ArrayHandle& r) throw();
00340 
00341   //! Destruct; delete's the ArrayData<T> if its ref count goes to zero.
00342   inline ~ArrayHandle() throw();
00343 
00344   //! Swap contents with another ArrayHandle.
00345   inline void swap(ArrayHandle<T>& other) throw();
00346 
00347   //! Assignment; no deep-copy is incurred, just bumps the ref-count.
00348   inline ArrayHandle& operator=(const ArrayHandle& r) throw();
00349 
00350   //! Get a read-only reference to the ArrayData object.
00351   /*! This is a "cheap" operation, since the ArrayData will never be
00352       copied in this operation. */
00353   inline const ArrayData<T>& get() const throw();
00354 
00355   //! Get a unique read-write reference to the ArrayData object.
00356   /*! This is a potentially "expensive" operation, since in order to
00357       get a unique ArrayData object, the current one may have to be
00358       clone()'d if it is shared (i.e. ref-count > 1). */
00359   inline ArrayData<T>& uniq(const MemoryPolicy mp, const int dev);
00360 
00361   //! Get a unique read-write reference to the ArrayData object.
00362   /*! This is a potentially "expensive" operation, since in order to
00363       get a unique ArrayData object, the current one may have to be
00364       clone()'d if it is shared (i.e. ref-count > 1). */
00365   inline ArrayData<T>& uniq();
00366 
00367   //! See if the ArrayData object is shared (i.e. ref-count > 1).
00368   inline bool isShared() const throw();
00369 
00370   //! For testing/debugging only.
00371   /*! See if we are pointing to the same ArrayData<T> as is the other
00372       ArrayHandle<T>. */
00373   inline bool hasSameData(const ArrayHandle<T>& b) const throw();
00374 
00375   //! For testing/debugging only.
00376   /*! Returns the current reference count of the our ArrayData<T>. */
00377   inline int refCount() const throw();
00378 
00379 private:
00380   ArrayData<T>* px;     // contained pointer
00381 };
00382 
00383 
00384 
00385 // ######################################################################
00386 // ######################################################################
00387 // INLINE FUNCTIONS for ArrayData:
00388 // ######################################################################
00389 // ######################################################################
00390 
00391 //! This is a helper template struct for ArrayData's constructors.
00392 /*! It exposes functions for initializing raw memory in different
00393     ways. These functions can then be specialized for builtin or
00394     trivial types (i.e., types for which TypeTraits<T>::isTrivial is
00395     true) for maximum efficiency. We get this specialization by having
00396     an extra bool isTrivial template parameter for ArrayHelper, which
00397     allows us to select at compile-time based on the complexity of
00398     T. This is much better than using a run-time "if" statement in the
00399     constructors, since that incurs both a time cost (to evaluate an
00400     "if" statement) and a code space cost (since we're then including
00401     code that won't be used). */
00402 template <class T, bool isTrivial = TypeTraits<T>::isTrivial>
00403 struct ArrayHelper
00404 {
00405   //! Construct \a n copies of \a val at \a space using placement new.
00406   /*! For general types, the minimal initialization that we can do is
00407       to zero_initialize() (i.e., unlike for builtin or trivial types,
00408       we can't just leave the memory sitting as it is,
00409       uninitialized). */
00410   static void minimal_initialize(T* trg, int n)
00411   {
00412     zero_initialize(trg, n);
00413   }
00414 
00415   static void minimal_initialize(T* trg, int n, const MemoryPolicy mp, const int dev)
00416   {
00417     zero_initialize(trg, n, mp, dev);
00418   }
00419 
00420   //! Initialize the memory to T's default or zero value.
00421   /*! For general types, the best way to do this is to make one
00422       default T object, and then copy construct each element of the
00423       array from that object using placement new. */
00424   static inline void zero_initialize(T* trg, int n)
00425   {
00426     T zero = T();
00427     T* cur = trg;
00428     try
00429       {
00430         for (; n > 0; --n, ++cur)
00431           new (cur) T(zero); // placement new
00432       }
00433     // if a constructor call throws, we have to clean up manually:
00434     catch (...)
00435       {
00436         while (cur != trg) (cur--)->~T(); // explicit destructor call
00437         invt_deallocate(trg);
00438         throw; // rethrow the exception
00439       }
00440   }
00441 
00442   static void zero_initialize(T* trg, int n, const MemoryPolicy mp, const int dev)
00443   {
00444 #ifdef INVT_USE_CUDA
00445     T* tmp;
00446     switch(mp)
00447     {
00448     case GLOBAL_DEVICE_MEMORY:
00449     case TEXTURE_DEVICE_MEMORY:
00450       // Is there a better way to do this?
00451       tmp = new T[n];
00452       CudaDevices::memcpyHostToDevice(trg,tmp,n*sizeof(T),dev);
00453       delete tmp;
00454       break;
00455     case HOST_MEMORY:
00456       zero_initialize(trg,n);
00457       break;
00458     }
00459 #else
00460     // Optimization, just assume the memory policy
00461     zero_initialize(trg,n);
00462 #endif
00463 
00464   }
00465 
00466   //! Initialize the memory by copying elements from the \a src array.
00467   /*! For general types, we just copy construct each element in the
00468       target array from the corresponding element in the \a src array,
00469       using placement new. */
00470   static inline void copy_initialize(T* trg, int n, const T*src)
00471   {
00472         T* cur = trg;
00473         try
00474         {
00475           for (; n > 0; --n, ++cur)
00476             new (cur) T(*src++); // placement new
00477         }
00478         // if a constructor call throws, we have to clean up manually:
00479         catch (...)
00480         {
00481           while (cur != trg) (cur--)->~T(); // explicit destructor call
00482           invt_deallocate(trg);
00483           throw; // rethrow the exception
00484         }
00485   }
00486 
00487   // THIS IS AN UNSAFE DESIGN!  Complex types that allocate/deallocate resources in their constructors/destructors
00488   // are not properly handled when sent/received from CUDA devices under this framework.
00489   // The reason that non-primitive types are supported for CUDA right now, is because PixRGB and its ilk
00490   // are very useful, non-primitive type that will work with CUDA.
00491   // Maybe we should add a complext template or some other intelligence to demarcate the line between simple and complex classes
00492   // to fix this problem.  -- DFP 082009
00493   static void copy_initialize(T* trg, int n, const T*src, const MemoryPolicy trgmp, const int trgdev,
00494                               const MemoryPolicy srcmp, const int srcdev)
00495   {
00496 #ifdef INVT_USE_CUDA
00497     T *tmp;
00498     switch(srcmp)
00499     {
00500     case GLOBAL_DEVICE_MEMORY:
00501     case TEXTURE_DEVICE_MEMORY:
00502       switch(trgmp)
00503       {
00504       case GLOBAL_DEVICE_MEMORY:
00505       case TEXTURE_DEVICE_MEMORY:
00506         // These types *SHOULD* be semi-trivial since they are going to the CUDA device
00507         CudaDevices::memcpyDeviceToDevice(trg,src,n*sizeof(T),trgdev,srcdev);
00508         break;
00509       case HOST_MEMORY:
00510         // Very inefficient, have to copy whole block from device to host, and then
00511         // call the constructor for each item in array using a new block of mem
00512         tmp = new T[n];
00513         CudaDevices::memcpyDeviceToHost(tmp,src,n*sizeof(T),srcdev);
00514         copy_initialize(trg,n,tmp);
00515         delete tmp;
00516         break;
00517       }
00518 
00519       break;
00520     case HOST_MEMORY:
00521       switch(trgmp)
00522       {
00523       case GLOBAL_DEVICE_MEMORY:
00524       case TEXTURE_DEVICE_MEMORY:
00525         CudaDevices::memcpyHostToDevice(trg,src,n*sizeof(T),trgdev);
00526         break;
00527       case HOST_MEMORY:
00528         copy_initialize(trg,n,src);
00529         break;
00530       }
00531       break;
00532     }
00533 #else
00534     // Optimization, just assume the memory policy
00535     copy_initialize(trg,n,src);
00536 #endif
00537   }
00538 
00539 
00540 
00541   //! Call destructors on the \a n objects stored in \a space.
00542   static inline void destruct(T* trg, int n)
00543   {
00544     while (--n >= 0)
00545       (trg+n)->~T(); // explicit destructor call
00546   }
00547 
00548   static void destruct(T* trg, int n, const MemoryPolicy mp, const int dev)
00549   {
00550 #ifdef INVT_USE_CUDA
00551     switch(mp)
00552     {
00553     case GLOBAL_DEVICE_MEMORY:
00554     case TEXTURE_DEVICE_MEMORY:
00555       // What can we do here?
00556       break;
00557     case HOST_MEMORY:
00558       destruct(trg,n);
00559       break;
00560     }
00561 #else
00562     // Optimization, just assume the memory policy
00563     destruct(trg,n);
00564 #endif
00565   }
00566 
00567 };
00568 
00569 //! Specialization of ArrayHelper for builtin/trivial types.
00570 /*! We can do things much faster with these kinds of types, using
00571     e.g. memset() and memcpy(), and we don't have to do default
00572     initialization at all. */
00573 template <class T>
00574 struct ArrayHelper<T, true>
00575 {
00576   //! For trivial types, we don't have to do anything here.
00577   static void minimal_initialize(T* /*trg*/, int /*n*/) {}
00578 
00579   //! For trivial types, we don't have to do anything here.
00580   static void minimal_initialize(T* /*trg*/, int /*n*/, const MemoryPolicy /*mp*/, const int /*dev*/) {}
00581 
00582   //! For trivial types, we just memset() all the memory to zero.
00583   static inline void zero_initialize(T* trg, int n)
00584   {
00585     memset(trg, 0, n * sizeof(T));
00586   }
00587 
00588   static void zero_initialize(T* trg, int n, const MemoryPolicy mp, const int device)
00589   {
00590 #ifdef INVT_USE_CUDA
00591     switch(mp)
00592     {
00593     case GLOBAL_DEVICE_MEMORY:
00594     case TEXTURE_DEVICE_MEMORY:
00595       CudaDevices::memset(trg,0,n*sizeof(T),device);
00596       break;
00597     case HOST_MEMORY:
00598       zero_initialize(trg, n);
00599       break;
00600     }
00601 #else
00602     // Optimization, just assume the memory policy
00603     zero_initialize(trg, n);
00604 #endif
00605   }
00606 
00607   //! For trivial types, we just memcpy() into the destination.
00608   static inline void copy_initialize(T* space, int n, const T* src)
00609   {
00610     memcpy(space, src, n * sizeof(T));
00611   }
00612 
00613   static void copy_initialize(T* trg, int n, const T*src, const MemoryPolicy trgmp, const int trgdev,
00614                               const MemoryPolicy srcmp, const int srcdev)
00615   {
00616 #ifdef INVT_USE_CUDA
00617     switch(srcmp)
00618     {
00619     case GLOBAL_DEVICE_MEMORY:
00620     case TEXTURE_DEVICE_MEMORY:
00621       switch(trgmp)
00622       {
00623       case GLOBAL_DEVICE_MEMORY:
00624       case TEXTURE_DEVICE_MEMORY:
00625         CudaDevices::memcpyDeviceToDevice(trg,src,n*sizeof(T),trgdev,srcdev);
00626         break;
00627       case HOST_MEMORY:
00628         CudaDevices::memcpyDeviceToHost(trg,src,n*sizeof(T),srcdev);
00629         break;
00630       }
00631       break;
00632     case HOST_MEMORY:
00633       switch(trgmp)
00634       {
00635       case GLOBAL_DEVICE_MEMORY:
00636       case TEXTURE_DEVICE_MEMORY:
00637         CudaDevices::memcpyHostToDevice(trg,src,n*sizeof(T),trgdev);
00638         break;
00639       case HOST_MEMORY:
00640         copy_initialize(trg, n, src);
00641         break;
00642       }
00643       break;
00644     }
00645 #else
00646     // Optimization, just assume the memory policy
00647     copy_initialize(trg, n, src);
00648 #endif
00649   }
00650 
00651   //! For trivial types, no destruction is needed.
00652   static void destruct(T* /*space*/, int /*n*/) {}
00653 
00654   //! For trivial types, no destruction is needed.
00655   static void destruct(T* /*space*/, int /*n*/, const MemoryPolicy /*mp*/, const int /*dev*/) {}
00656 
00657 };
00658 
00659 // ######################################################################
00660 template <class T> inline
00661 ArrayData<T>::ArrayData() :
00662   itsRefCount(),
00663   itsMemoryPolicy(HOST_MEMORY),
00664   itsStoragePolicy(MAKE_OWN_COPY),
00665   itsDevice(CUDA_HOST_DEVICE_NUM),
00666   itsDims(0, 0),
00667   itsData(static_cast<T*>(wrap_invt_allocate(1,itsMemoryPolicy,itsDevice))) // just ask for one byte
00668 {
00669   itsRefCount.atomic_set(0);
00670 }
00671 
00672 // ######################################################################
00673 template <class T> inline
00674 ArrayData<T>::ArrayData(const Dims& d, const InitPolicy ip) :
00675   itsRefCount(),
00676   itsMemoryPolicy(HOST_MEMORY),
00677   itsStoragePolicy(MAKE_OWN_COPY),
00678   itsDevice(CUDA_HOST_DEVICE_NUM),
00679   itsDims(d),
00680   itsData(static_cast<T*>(wrap_invt_allocate(itsDims.sz() * sizeof(T),itsMemoryPolicy,itsDevice)))
00681 {
00682   if (ip == ZEROS)
00683     ArrayHelper<T>::zero_initialize(itsData, itsDims.sz(), itsMemoryPolicy, itsDevice);
00684   else
00685     ArrayHelper<T>::minimal_initialize(itsData, itsDims.sz(), itsMemoryPolicy, itsDevice);
00686   itsRefCount.atomic_set(0);
00687 }
00688 
00689 
00690 // ######################################################################
00691 template <class T> inline
00692 ArrayData<T>::ArrayData(const Dims& d, const InitPolicy ip, const MemoryPolicy mp, const int device) :
00693   itsRefCount(),
00694   itsMemoryPolicy(mp),
00695   itsStoragePolicy(MAKE_OWN_COPY),
00696   itsDevice(device),
00697   itsDims(d),
00698   itsData(static_cast<T*>(wrap_invt_allocate(itsDims.sz() * sizeof(T),itsMemoryPolicy,itsDevice)))
00699 {
00700   if (ip == ZEROS)
00701     ArrayHelper<T>::zero_initialize(itsData, itsDims.sz(), itsMemoryPolicy, itsDevice);
00702   else
00703     ArrayHelper<T>::minimal_initialize(itsData, itsDims.sz(), itsMemoryPolicy, itsDevice);
00704   itsRefCount.atomic_set(0);
00705 }
00706 
00707 // ######################################################################
00708 template <class T> inline
00709 ArrayData<T>::ArrayData(const Dims& d, const T* aa) :
00710   itsRefCount(),
00711   itsMemoryPolicy(HOST_MEMORY),
00712   itsStoragePolicy(MAKE_OWN_COPY),
00713   itsDevice(CUDA_HOST_DEVICE_NUM),
00714   itsDims(d),
00715   itsData(static_cast<T*>(wrap_invt_allocate(itsDims.sz() * sizeof(T),itsMemoryPolicy,itsDevice)))
00716 {
00717   ArrayHelper<T>::copy_initialize(itsData, itsDims.sz(), aa);
00718 
00719   itsRefCount.atomic_set(0);
00720 }
00721 
00722 // ######################################################################
00723 template <class T> inline
00724 ArrayData<T>::ArrayData(const Dims& d, const T* aa, const MemoryPolicy mp, const int device, const MemoryPolicy srcmp, const int srcdevice) :
00725   itsRefCount(),
00726   itsMemoryPolicy(mp),
00727   itsStoragePolicy(MAKE_OWN_COPY),
00728   itsDevice(device),
00729   itsDims(d),
00730   itsData(static_cast<T*>(wrap_invt_allocate(itsDims.sz() * sizeof(T),itsMemoryPolicy,itsDevice)))
00731 {
00732   ArrayHelper<T>::copy_initialize(itsData, itsDims.sz(), aa, itsMemoryPolicy, itsDevice, srcmp, srcdevice);
00733 
00734   itsRefCount.atomic_set(0);
00735 }
00736 
00737 // ######################################################################
00738 template <class T> inline
00739 ArrayData<T>::ArrayData(const Dims& d, T* aa, const StoragePolicy s) :
00740   itsRefCount(),
00741   itsMemoryPolicy(HOST_MEMORY),
00742   itsStoragePolicy(s),
00743   itsDevice(CUDA_HOST_DEVICE_NUM),
00744   itsDims(d),
00745   itsData(s == MAKE_OWN_COPY
00746           ? static_cast<T*>(wrap_invt_allocate(itsDims.sz() * sizeof(T),itsMemoryPolicy,itsDevice))
00747           : aa)
00748 {
00749   if (s == MAKE_OWN_COPY)
00750     {
00751       if (itsDims.sz() != 0 && aa != 0)
00752         ArrayHelper<T>::copy_initialize(itsData, itsDims.sz(), aa);
00753       else
00754         ArrayHelper<T>::minimal_initialize(itsData, itsDims.sz());
00755     }
00756 
00757   itsRefCount.atomic_set(0);
00758 }
00759 
00760 
00761 // ######################################################################
00762 template <class T> inline
00763 ArrayData<T>::ArrayData(const Dims& d, T* aa, const StoragePolicy s, const MemoryPolicy mp, const int device, const MemoryPolicy srcmp, const int srcdevice) :
00764   itsRefCount(),
00765   itsMemoryPolicy(mp),
00766   itsStoragePolicy(s),
00767   itsDevice(device),
00768   itsDims(d),
00769   itsData((s == MAKE_OWN_COPY)
00770           ? static_cast<T*>(wrap_invt_allocate(itsDims.sz() * sizeof(T),itsMemoryPolicy,itsDevice))
00771           : aa)
00772 {
00773   if (s == MAKE_OWN_COPY)
00774     {
00775       if (itsDims.sz() != 0 && aa != 0)
00776         ArrayHelper<T>::copy_initialize(itsData, itsDims.sz(), aa, itsMemoryPolicy, itsDevice, srcmp, srcdevice);
00777       else
00778         ArrayHelper<T>::minimal_initialize(itsData, itsDims.sz(), itsMemoryPolicy, itsDevice);
00779     }
00780   else
00781     {
00782       if(itsMemoryPolicy!=srcmp || itsDevice!=srcdevice)
00783         LFATAL("WriteThru for ArrayData must be done using the same memory policy on the same device");
00784     }
00785 
00786   itsRefCount.atomic_set(0);
00787 }
00788 
00789 // ######################################################################
00790 template <class T> inline
00791 ArrayData<T>* ArrayData<T>::clone() const
00792 {
00793   // When the new memory policy is not specified, we should either default to keeping it
00794   // in the same location, or default to the host. Currently, all memcpy's have to go
00795   // from dev->host->dev even if it is to/from the same device so it seems like
00796   // defaulting to the host makes sense for now
00797   return new ArrayData(itsDims, itsData, HOST_MEMORY, CUDA_HOST_DEVICE_NUM, itsMemoryPolicy, itsDevice);
00798 }
00799 
00800 
00801 // ######################################################################
00802 template <class T> inline
00803 ArrayData<T>* ArrayData<T>::clone(const MemoryPolicy mp, const int dev) const
00804 {
00805   return new ArrayData(itsDims, itsData, mp, dev, itsMemoryPolicy, itsDevice);
00806 }
00807 
00808 // ######################################################################
00809 template <class T> inline
00810 ArrayData<T>::~ArrayData() throw()
00811 {
00812   if (itsStoragePolicy == MAKE_OWN_COPY)
00813     {
00814       ArrayHelper<T>::destruct(itsData, itsDims.sz());
00815       wrap_invt_deallocate(itsData,itsMemoryPolicy,itsDevice,itsDims.sz()*sizeof(T));
00816     }
00817 }
00818 
00819 // ######################################################################
00820 template <class T> inline
00821 const T* ArrayData<T>::data() const throw()
00822 { return itsData; }
00823 
00824 // ######################################################################
00825 template <class T> inline
00826 const T* ArrayData<T>::end() const throw()
00827 { return itsData + itsDims.sz(); }
00828 
00829 // ######################################################################
00830 template <class T> inline
00831 T* ArrayData<T>::dataw() throw()
00832 { return itsData; }
00833 
00834 // ######################################################################
00835 template <class T> inline
00836 T* ArrayData<T>::endw() throw()
00837 { return itsData + itsDims.sz(); }
00838 
00839 // ######################################################################
00840 template <class T> inline
00841 const Dims& ArrayData<T>::dims() const throw()
00842 { return itsDims; }
00843 
00844 // ######################################################################
00845 template <class T> inline
00846 int ArrayData<T>::w() const throw()
00847 { return itsDims.w(); }
00848 
00849 // ######################################################################
00850 template <class T> inline
00851 int ArrayData<T>::h() const throw()
00852 { return itsDims.h(); }
00853 
00854 // ######################################################################
00855 template <class T> inline
00856 void ArrayData<T>::acquire() throw()
00857 {
00858   itsRefCount.atomic_incr();
00859 
00860   check_acquisition(itsStoragePolicy, itsRefCount.atomic_get());
00861 }
00862 
00863 // ######################################################################
00864 template <class T> inline
00865 void ArrayData<T>::release() throw()
00866 {
00867   if ( itsRefCount.atomic_decr_test_zero() ) delete this;
00868 }
00869 
00870 // ######################################################################
00871 template <class T> inline
00872 bool ArrayData<T>::isShared() const throw()
00873 {
00874   return (itsRefCount.atomic_get() > 1);
00875 }
00876 
00877 // ######################################################################
00878 template <class T> inline
00879 int ArrayData<T>::refCount() const throw()
00880 {
00881   return itsRefCount.atomic_get();
00882 }
00883 
00884 // ######################################################################
00885 template <class T> inline
00886 MemoryPolicy ArrayData<T>::getMemoryPolicy() const
00887 {
00888   return itsMemoryPolicy;
00889 }
00890 
00891 // ######################################################################
00892 template <class T> inline
00893 int ArrayData<T>::getMemoryDevice() const
00894 {
00895   return itsDevice;
00896 }
00897 
00898 
00899 // ######################################################################
00900 // ######################################################################
00901 // INLINE FUNCTIONS for ArrayHandle:
00902 // ######################################################################
00903 // ######################################################################
00904 
00905 // ######################################################################
00906 template <class T> inline
00907 ArrayHandle<T>::ArrayHandle() : px(new ArrayData<T>)
00908 {
00909   px->acquire();
00910 }
00911 
00912 
00913 // ######################################################################
00914 template <class T> inline
00915 ArrayHandle<T>::ArrayHandle(const ArrayData<T>* p) throw() :
00916   // this cast is OK because we maintain const-correctness through
00917   // copy-on-write
00918   px(const_cast<ArrayData<T>*>(p))
00919 {
00920   px->acquire();
00921 }
00922 
00923 // ######################################################################
00924 template <class T> inline
00925 ArrayHandle<T>::ArrayHandle(const ArrayData<T>* p, const MemoryPolicy mp, const int dev) throw() :
00926   // this cast is OK because we maintain const-correctness through
00927   // copy-on-write
00928   px(const_cast<ArrayData<T>*>(p->clone(mp,dev)))
00929 {
00930   px->acquire();
00931 }
00932 
00933 
00934 // ######################################################################
00935 template <class T> inline
00936 ArrayHandle<T>::ArrayHandle(const ArrayHandle& r) throw() :
00937   px(r.px)
00938 {
00939   px->acquire();
00940 }
00941 
00942 // ######################################################################
00943 template <class T> inline
00944 ArrayHandle<T>::~ArrayHandle() throw()
00945 {
00946   px->release();
00947 }
00948 
00949 // ######################################################################
00950 template <class T> inline
00951 void ArrayHandle<T>::swap(ArrayHandle<T>& other) throw()
00952 {
00953   ArrayData<T>* other_px = other.px;
00954   other.px = this->px;
00955   this->px = other_px;
00956 }
00957 
00958 // ######################################################################
00959 template <class T> inline
00960 ArrayHandle<T>& ArrayHandle<T>::operator=(const ArrayHandle& r) throw()
00961 {
00962   ArrayHandle tmp(r); this->swap(tmp); return *this;
00963 }
00964 
00965 // ######################################################################
00966 template <class T> inline
00967 const ArrayData<T>& ArrayHandle<T>::get() const throw()
00968 { return *px; }
00969 
00970 // ######################################################################
00971 template <class T> inline
00972 ArrayData<T>& ArrayHandle<T>::uniq(const MemoryPolicy mp, const int dev)
00973 {
00974   if (px->isShared())
00975     {
00976       ArrayHandle copy(px->clone(mp,dev));
00977       this->swap(copy);
00978     }
00979 
00980   return *px;
00981 }
00982 
00983 // ######################################################################
00984 template <class T> inline
00985 ArrayData<T>& ArrayHandle<T>::uniq()
00986 {
00987   if (px->isShared())
00988     {
00989       ArrayHandle copy(px->clone());
00990       this->swap(copy);
00991     }
00992 
00993   return *px;
00994 }
00995 
00996 // ######################################################################
00997 template <class T> inline
00998 bool ArrayHandle<T>::isShared() const throw()
00999 { return px->isShared(); }
01000 
01001 // ######################################################################
01002 template <class T> inline
01003 bool ArrayHandle<T>::hasSameData(const ArrayHandle<T>& b) const throw()
01004 { return px==b.px; }
01005 
01006 // ######################################################################
01007 template <class T> inline
01008 int ArrayHandle<T>::refCount() const throw()
01009 { return px->refCount(); }
01010 
01011 // ######################################################################
01012 /* So things look consistent in everyone's emacs... */
01013 /* Local Variables: */
01014 /* indent-tabs-mode: nil */
01015 /* End: */
01016 
01017 #endif // !ARRAY_IMPL_H_DEFINED