00001 /*!@file Image/ArrayData.H The guts of the 2D ref-counted copy-on-write array 00002 implementation */ 00003 00004 // //////////////////////////////////////////////////////////////////// // 00005 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the // 00006 // University of Southern California (USC) and the iLab at USC. // 00007 // See http://iLab.usc.edu for information about this project. // 00008 // //////////////////////////////////////////////////////////////////// // 00009 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00010 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00011 // in Visual Environments, and Applications'' by Christof Koch and // 00012 // Laurent Itti, California Institute of Technology, 2001 (patent // 00013 // pending; application number 09/912,225 filed July 23, 2001; see // 00014 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00015 // //////////////////////////////////////////////////////////////////// // 00016 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00017 // // 00018 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00019 // redistribute it and/or modify it under the terms of the GNU General // 00020 // Public License as published by the Free Software Foundation; either // 00021 // version 2 of the License, or (at your option) any later version. // 00022 // // 00023 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00024 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00025 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00026 // PURPOSE. See the GNU General Public License for more details. // 00027 // // 00028 // You should have received a copy of the GNU General Public License // 00029 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00030 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00031 // Boston, MA 02111-1307 USA. // 00032 // //////////////////////////////////////////////////////////////////// // 00033 // 00034 // Primary maintainer for this file: Rob Peters <rjpeters@klab.caltech.edu> 00035 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Image/ArrayData.H $ 00036 // $Id: ArrayData.H 13229 2010-04-15 01:56:52Z dparks $ 00037 // 00038 00039 #ifndef ARRAYDATA_H_DEFINED 00040 #define ARRAYDATA_H_DEFINED 00041 00042 #include "Image/Dims.H" 00043 #include "Util/Alloc.H" // for invt_allocate()/invt_deallocate() 00044 #include "Util/TypeTraits.H" 00045 #include "Util/log.H" 00046 #include "rutz/atomic.h" 00047 00048 #include <cstring> // for memset() and memcpy() 00049 #include <new> // for placement new, in ArrayData's ctors 00050 00051 #ifdef INVT_USE_CUDA 00052 #include "CUDA/CudaAlloc.H" 00053 #include "CUDA/CudaDevices.H" 00054 #endif 00055 00056 //! Specifies what to do with the T* used to init an ArrayData. 00057 enum StoragePolicy 00058 { 00059 //! Make an initial copy of the data, then copy-on-write as usual. 00060 MAKE_OWN_COPY, 00061 00062 //! Borrow the data, with write-through. 00063 /*! Writing to the Array will modify the T[] that initialized it. */ 00064 WRITE_THRU 00065 }; 00066 00067 //! Specifies how to initialize the pixels of a new image. 00068 enum InitPolicy 00069 { 00070 //! All elements are initialized to the pixel type's zero or default value. 00071 ZEROS, 00072 00073 //! Pixels are only minimally initialized. 00074 /*! For trivial types, this means the pixel values are not 00075 initialized at all; for non-trivial user-defined types, this 00076 means each pixel is copy-constructed from the type's default 00077 value. */ 00078 NO_INIT 00079 }; 00080 00081 //! Specifies where on a particular device the memory is stored 00082 enum MemoryPolicy 00083 { 00084 //! On GPU, global (to a particular set of kernels on a single device) 00085 GLOBAL_DEVICE_MEMORY, 00086 //! On GPU, located in texture memory 00087 TEXTURE_DEVICE_MEMORY, 00088 //! On CPU/Host 00089 HOST_MEMORY, 00090 }; 00091 00092 //! Device id for the host, to distinguish from CUDA devices 00093 #define CUDA_HOST_DEVICE_NUM -1 00094 00095 00096 //! Function to wrap use of invt_allocate to allow for CUDA memory policies 00097 inline void* wrap_invt_allocate(size_t user_nbytes, const MemoryPolicy mp, const int dev) 00098 { 00099 #ifdef INVT_USE_CUDA 00100 switch(mp) 00101 { 00102 case GLOBAL_DEVICE_MEMORY: 00103 case TEXTURE_DEVICE_MEMORY: 00104 return cuda_invt_allocate(user_nbytes,dev); 00105 case HOST_MEMORY: 00106 return invt_allocate(user_nbytes); 00107 default: 00108 LFATAL("Invalid memory policy %d",mp); 00109 return NULL; 00110 } 00111 LFATAL("Should be impossible to get here"); 00112 return NULL; 00113 #else 00114 // Optimization, don't look at memory policy 00115 return invt_allocate(user_nbytes); 00116 #endif 00117 } 00118 00119 //! Function to wrap use of invt_deallocate to allow for CUDA memory policies 00120 inline void wrap_invt_deallocate(void *mem, const MemoryPolicy mp, const int dev, const size_t nBytes) 00121 { 00122 #ifdef INVT_USE_CUDA 00123 switch(mp) 00124 { 00125 case GLOBAL_DEVICE_MEMORY: 00126 case TEXTURE_DEVICE_MEMORY: 00127 cuda_invt_deallocate(mem, dev,nBytes); 00128 return; 00129 case HOST_MEMORY: 00130 invt_deallocate(mem); 00131 return; 00132 } 00133 #else 00134 // Optimization, don't look at memory policy 00135 invt_deallocate(mem); 00136 #endif 00137 } 00138 00139 00140 void check_acquisition(StoragePolicy s, int count) throw(); 00141 00142 00143 00144 //! Implements a ref-counted 2-D array, should only be used via ArrayHandle. 00145 /*! ArrayData implements a 2-D array, by storing: 00146 - a Dims representing the width+height of the array, 00147 - a pointer to the dynamically allocated memory where the array 00148 data are stored, 00149 - a reference count, and 00150 - a StoragePolicy, indicating whether the ArrayData object 00151 "owns" the memory and should delete[] it eventually (in the 00152 case of MAKE_OWN_COPY), or whether someone else "owns" the 00153 memory and will eventually delete[] it themselves (in the case 00154 of WRITE_THRU) 00155 00156 All of these data members are immutable, with the exception of the 00157 reference count. This reflects the semantics that an ArrayData 00158 object is a fixed-size block of data; if a higher-level clients 00159 wants to resize(), it does so by creating a new ArrayData of the 00160 desired size, and releasing the old ArrayData. This simplifies the 00161 memory management: within ArrayData, memory is new[]'ed only in 00162 the constructor(s), and delete[]'ed only in the destructor. 00163 00164 ArrayData provides the basics needed to use STL-style iterator 00165 algorithms. In particular it offers two pairs of functions to get 00166 half-open iterator ranges: 00167 00168 - data() and end() for read-only iteration, and 00169 - dataw() and endw() for read-write iteration (where the "w" 00170 means "write" or "writable"). 00171 */ 00172 template <class T> 00173 class ArrayData 00174 { 00175 private: 00176 ArrayData(const ArrayData&); // not allowed; use clone() instead 00177 ArrayData& operator=(const ArrayData&); // not allowed; use clone() instead 00178 00179 public: 00180 00181 // ############################################################ 00182 // ##### Constructors 00183 // ############################################################ 00184 00185 //! Default construct an empty (i.e. 0-by-0) array. 00186 inline ArrayData(); 00187 00188 //! Construct with the specified dimensions and InitPolicy. 00189 inline ArrayData(const Dims& dims, const InitPolicy ip = NO_INIT); 00190 00191 //! Construct with the specified dimensions and InitPolicy and Memory Policy 00192 inline ArrayData(const Dims& dims, const InitPolicy ip, const MemoryPolicy mp, const int device); 00193 00194 //! Construct with the specified dimensions. 00195 /*! If the passed-in T* array is non-null, copy that data into the 00196 constructed object. */ 00197 inline ArrayData(const Dims& dims, const T* aa); 00198 00199 //! Construct with the specified dimensions, input data, and memory policy 00200 /*! If the passed-in T* array is non-null, copy that data into the 00201 constructed object. Use the given memory policy */ 00202 inline ArrayData(const Dims& dims, const T* aa, const MemoryPolicy mp, const int device, const MemoryPolicy srcmp, const int srcdevice); 00203 00204 //! Construct with the specified dimensions and storage policy. 00205 /*! If the passed-in T* array is non-null, then if the storage 00206 policy is MAKE_OWN_COPY we make a copy of the data, but if the 00207 storage policy is WRITE_THRU then we "borrow" the data. */ 00208 inline ArrayData(const Dims& dims, T* aa, const StoragePolicy s); 00209 00210 //! Construct with the specified dimensions, storage and memory policies. 00211 /*! If the passed-in T* array is non-null, then if the storage 00212 policy is MAKE_OWN_COPY we make a copy of the data, but if the 00213 storage policy is WRITE_THRU then we "borrow" the data. */ 00214 inline ArrayData(const Dims& dims, T* aa, const StoragePolicy s, const MemoryPolicy mp, const int device, const MemoryPolicy srcmp, const int srcdevice); 00215 00216 //! Return a new'ed copy of *this. 00217 /*! The cloned copy will always be MAKE_OWN_COPY, even if *this is 00218 WRITE_THRU. */ 00219 inline ArrayData* clone() const; 00220 00221 //! Return a new'ed copy of *this using the given memory policy 00222 /*! The cloned copy will always be MAKE_OWN_COPY, even if *this is 00223 WRITE_THRU. */ 00224 inline ArrayData* clone(const MemoryPolicy mp, const int device) const; 00225 00226 //! Release the pointed-to data if our storage policy is MAKE_OWN_COPY. 00227 inline ~ArrayData() throw(); 00228 00229 // ############################################################ 00230 // ##### Accessors 00231 // ############################################################ 00232 00233 //! Returns a read-only pointer to the data array. 00234 /*! With end(), forms an STL-style half-open const_iterator range. */ 00235 inline const T* data() const throw(); 00236 00237 //! Returns a read-only pointer to one-past-the-end of the data array. 00238 /*! With data(), forms an STL-style half-open const_iterator range. */ 00239 inline const T* end() const throw(); 00240 00241 //! Returns a read-write pointer to the data array (the "w" means "write"). 00242 /*! With endw(), forms an STL-style half-open iterator range. */ 00243 inline T* dataw() throw(); 00244 00245 //! Returns a read-write pointer to one-past-the-end of the data array. 00246 /*! With dataw(), forms an STL-style half-open iterator range. */ 00247 inline T* endw() throw(); 00248 00249 //! Returns the dimensions (i.e. width, height) of the 2-D data array. 00250 inline const Dims& dims() const throw(); 00251 00252 //! Returns the width of the data array (i.e. the number of columns). 00253 inline int w() const throw(); 00254 00255 //! Returns the height of the data array (i.e. the number of rows). 00256 inline int h() const throw(); 00257 00258 // ############################################################ 00259 // ##### Reference count manipulation 00260 // ############################################################ 00261 00262 //! Increment the reference count. 00263 inline void acquire() throw(); 00264 00265 //! Decrement the reference count and delete self if the count goes to zero. 00266 inline void release() throw(); 00267 00268 //! Query whether this object is shared (i.e. ref count > 1). 00269 inline bool isShared() const throw(); 00270 00271 //! For debugging/testing only. 00272 /*! Returns the current reference count. */ 00273 inline int refCount() const throw(); 00274 00275 //! Return memory policy of underlying array data 00276 inline MemoryPolicy getMemoryPolicy() const; 00277 00278 //! Return memory policy of underlying array data 00279 inline int getMemoryDevice() const; 00280 00281 00282 private: 00283 rutz::atomic_int_t itsRefCount; 00284 MemoryPolicy const itsMemoryPolicy; // Type of memory that is being used (global,texture,host) 00285 StoragePolicy const itsStoragePolicy; 00286 int const itsDevice; // CUDA device (or host) where memory is located 00287 00288 Dims const itsDims; // width+height of data array 00289 T* const itsData; // data array 00290 }; 00291 00292 00293 //! This class provides ref-counting and copy-on-write for ArrayData's. 00294 /*! NOTE: Most clients will not need to use ArrayHandle<T> directly, 00295 but can rather use Image<T>, which wraps and expands the 00296 ArrayHandle interface. Nevertheless, these notes may be of 00297 interest because the ref-counting and copy-on-write semantics that 00298 ArrayHandle<T> offers are transferred through to Image<T>. 00299 00300 As long as clients access ArrayData<T> through an ArrayHandle<T>, 00301 the proper copy-on-write semantics are guaranteed. This is by 00302 virtue of the fact that the only way to extract a non-const 00303 ArrayData<T>& from an ArrayHandle<T> is to call uniq(), which in 00304 turn checks to see if the ArrayData is shared (i.e. ref count > 00305 1). If it is shared, then that ArrayData is clone()'d before 00306 returning a reference to it. 00307 00308 The only caveat: you should not store non-const references to an 00309 ArrayData or to the elements of an ArrayData object for "long" 00310 periods of time. In this case, "long" means "longer than you are 00311 sure you have a unique reference", which in practice means "you 00312 can keep such references up until a new copy of the ArrayHandle<T> 00313 is made". This goes along with sensible coding practices anyway; a 00314 typical usage pattern would be: 00315 00316 -# you have an ArrayHandle<T> (or an Image<T>, which holds an 00317 ArrayHandle<T>) that you want to modify 00318 -# get a pair of iterator's for that object so that you can 00319 perform your series of write operations 00320 -# don't use those iterators anymore, and then you can safely 00321 make copies of the freshly-modified ArrayHandle<T> or 00322 Image<T> object (for example to return a copy of it as the 00323 return value from a function) 00324 */ 00325 template<class T> 00326 class ArrayHandle 00327 { 00328 public: 00329 //! Default construct with an empty array (i.e. 0-by-0). 00330 inline ArrayHandle(); 00331 00332 //! Construct with a new ArrayData, which the ArrayHandle now owns. 00333 inline explicit ArrayHandle(const ArrayData<T>* p) throw(); 00334 00335 //! Construct with a new ArrayData, which the ArrayHandle now owns. 00336 inline explicit ArrayHandle(const ArrayData<T>* p, const MemoryPolicy mp, const int dev) throw(); 00337 00338 //! Copy construct; no deep-copy is incurred, just bumps the ref-count. 00339 inline ArrayHandle(const ArrayHandle& r) throw(); 00340 00341 //! Destruct; delete's the ArrayData<T> if its ref count goes to zero. 00342 inline ~ArrayHandle() throw(); 00343 00344 //! Swap contents with another ArrayHandle. 00345 inline void swap(ArrayHandle<T>& other) throw(); 00346 00347 //! Assignment; no deep-copy is incurred, just bumps the ref-count. 00348 inline ArrayHandle& operator=(const ArrayHandle& r) throw(); 00349 00350 //! Get a read-only reference to the ArrayData object. 00351 /*! This is a "cheap" operation, since the ArrayData will never be 00352 copied in this operation. */ 00353 inline const ArrayData<T>& get() const throw(); 00354 00355 //! Get a unique read-write reference to the ArrayData object. 00356 /*! This is a potentially "expensive" operation, since in order to 00357 get a unique ArrayData object, the current one may have to be 00358 clone()'d if it is shared (i.e. ref-count > 1). */ 00359 inline ArrayData<T>& uniq(const MemoryPolicy mp, const int dev); 00360 00361 //! Get a unique read-write reference to the ArrayData object. 00362 /*! This is a potentially "expensive" operation, since in order to 00363 get a unique ArrayData object, the current one may have to be 00364 clone()'d if it is shared (i.e. ref-count > 1). */ 00365 inline ArrayData<T>& uniq(); 00366 00367 //! See if the ArrayData object is shared (i.e. ref-count > 1). 00368 inline bool isShared() const throw(); 00369 00370 //! For testing/debugging only. 00371 /*! See if we are pointing to the same ArrayData<T> as is the other 00372 ArrayHandle<T>. */ 00373 inline bool hasSameData(const ArrayHandle<T>& b) const throw(); 00374 00375 //! For testing/debugging only. 00376 /*! Returns the current reference count of the our ArrayData<T>. */ 00377 inline int refCount() const throw(); 00378 00379 private: 00380 ArrayData<T>* px; // contained pointer 00381 }; 00382 00383 00384 00385 // ###################################################################### 00386 // ###################################################################### 00387 // INLINE FUNCTIONS for ArrayData: 00388 // ###################################################################### 00389 // ###################################################################### 00390 00391 //! This is a helper template struct for ArrayData's constructors. 00392 /*! It exposes functions for initializing raw memory in different 00393 ways. These functions can then be specialized for builtin or 00394 trivial types (i.e., types for which TypeTraits<T>::isTrivial is 00395 true) for maximum efficiency. We get this specialization by having 00396 an extra bool isTrivial template parameter for ArrayHelper, which 00397 allows us to select at compile-time based on the complexity of 00398 T. This is much better than using a run-time "if" statement in the 00399 constructors, since that incurs both a time cost (to evaluate an 00400 "if" statement) and a code space cost (since we're then including 00401 code that won't be used). */ 00402 template <class T, bool isTrivial = TypeTraits<T>::isTrivial> 00403 struct ArrayHelper 00404 { 00405 //! Construct \a n copies of \a val at \a space using placement new. 00406 /*! For general types, the minimal initialization that we can do is 00407 to zero_initialize() (i.e., unlike for builtin or trivial types, 00408 we can't just leave the memory sitting as it is, 00409 uninitialized). */ 00410 static void minimal_initialize(T* trg, int n) 00411 { 00412 zero_initialize(trg, n); 00413 } 00414 00415 static void minimal_initialize(T* trg, int n, const MemoryPolicy mp, const int dev) 00416 { 00417 zero_initialize(trg, n, mp, dev); 00418 } 00419 00420 //! Initialize the memory to T's default or zero value. 00421 /*! For general types, the best way to do this is to make one 00422 default T object, and then copy construct each element of the 00423 array from that object using placement new. */ 00424 static inline void zero_initialize(T* trg, int n) 00425 { 00426 T zero = T(); 00427 T* cur = trg; 00428 try 00429 { 00430 for (; n > 0; --n, ++cur) 00431 new (cur) T(zero); // placement new 00432 } 00433 // if a constructor call throws, we have to clean up manually: 00434 catch (...) 00435 { 00436 while (cur != trg) (cur--)->~T(); // explicit destructor call 00437 invt_deallocate(trg); 00438 throw; // rethrow the exception 00439 } 00440 } 00441 00442 static void zero_initialize(T* trg, int n, const MemoryPolicy mp, const int dev) 00443 { 00444 #ifdef INVT_USE_CUDA 00445 T* tmp; 00446 switch(mp) 00447 { 00448 case GLOBAL_DEVICE_MEMORY: 00449 case TEXTURE_DEVICE_MEMORY: 00450 // Is there a better way to do this? 00451 tmp = new T[n]; 00452 CudaDevices::memcpyHostToDevice(trg,tmp,n*sizeof(T),dev); 00453 delete tmp; 00454 break; 00455 case HOST_MEMORY: 00456 zero_initialize(trg,n); 00457 break; 00458 } 00459 #else 00460 // Optimization, just assume the memory policy 00461 zero_initialize(trg,n); 00462 #endif 00463 00464 } 00465 00466 //! Initialize the memory by copying elements from the \a src array. 00467 /*! For general types, we just copy construct each element in the 00468 target array from the corresponding element in the \a src array, 00469 using placement new. */ 00470 static inline void copy_initialize(T* trg, int n, const T*src) 00471 { 00472 T* cur = trg; 00473 try 00474 { 00475 for (; n > 0; --n, ++cur) 00476 new (cur) T(*src++); // placement new 00477 } 00478 // if a constructor call throws, we have to clean up manually: 00479 catch (...) 00480 { 00481 while (cur != trg) (cur--)->~T(); // explicit destructor call 00482 invt_deallocate(trg); 00483 throw; // rethrow the exception 00484 } 00485 } 00486 00487 // THIS IS AN UNSAFE DESIGN! Complex types that allocate/deallocate resources in their constructors/destructors 00488 // are not properly handled when sent/received from CUDA devices under this framework. 00489 // The reason that non-primitive types are supported for CUDA right now, is because PixRGB and its ilk 00490 // are very useful, non-primitive type that will work with CUDA. 00491 // Maybe we should add a complext template or some other intelligence to demarcate the line between simple and complex classes 00492 // to fix this problem. -- DFP 082009 00493 static void copy_initialize(T* trg, int n, const T*src, const MemoryPolicy trgmp, const int trgdev, 00494 const MemoryPolicy srcmp, const int srcdev) 00495 { 00496 #ifdef INVT_USE_CUDA 00497 T *tmp; 00498 switch(srcmp) 00499 { 00500 case GLOBAL_DEVICE_MEMORY: 00501 case TEXTURE_DEVICE_MEMORY: 00502 switch(trgmp) 00503 { 00504 case GLOBAL_DEVICE_MEMORY: 00505 case TEXTURE_DEVICE_MEMORY: 00506 // These types *SHOULD* be semi-trivial since they are going to the CUDA device 00507 CudaDevices::memcpyDeviceToDevice(trg,src,n*sizeof(T),trgdev,srcdev); 00508 break; 00509 case HOST_MEMORY: 00510 // Very inefficient, have to copy whole block from device to host, and then 00511 // call the constructor for each item in array using a new block of mem 00512 tmp = new T[n]; 00513 CudaDevices::memcpyDeviceToHost(tmp,src,n*sizeof(T),srcdev); 00514 copy_initialize(trg,n,tmp); 00515 delete tmp; 00516 break; 00517 } 00518 00519 break; 00520 case HOST_MEMORY: 00521 switch(trgmp) 00522 { 00523 case GLOBAL_DEVICE_MEMORY: 00524 case TEXTURE_DEVICE_MEMORY: 00525 CudaDevices::memcpyHostToDevice(trg,src,n*sizeof(T),trgdev); 00526 break; 00527 case HOST_MEMORY: 00528 copy_initialize(trg,n,src); 00529 break; 00530 } 00531 break; 00532 } 00533 #else 00534 // Optimization, just assume the memory policy 00535 copy_initialize(trg,n,src); 00536 #endif 00537 } 00538 00539 00540 00541 //! Call destructors on the \a n objects stored in \a space. 00542 static inline void destruct(T* trg, int n) 00543 { 00544 while (--n >= 0) 00545 (trg+n)->~T(); // explicit destructor call 00546 } 00547 00548 static void destruct(T* trg, int n, const MemoryPolicy mp, const int dev) 00549 { 00550 #ifdef INVT_USE_CUDA 00551 switch(mp) 00552 { 00553 case GLOBAL_DEVICE_MEMORY: 00554 case TEXTURE_DEVICE_MEMORY: 00555 // What can we do here? 00556 break; 00557 case HOST_MEMORY: 00558 destruct(trg,n); 00559 break; 00560 } 00561 #else 00562 // Optimization, just assume the memory policy 00563 destruct(trg,n); 00564 #endif 00565 } 00566 00567 }; 00568 00569 //! Specialization of ArrayHelper for builtin/trivial types. 00570 /*! We can do things much faster with these kinds of types, using 00571 e.g. memset() and memcpy(), and we don't have to do default 00572 initialization at all. */ 00573 template <class T> 00574 struct ArrayHelper<T, true> 00575 { 00576 //! For trivial types, we don't have to do anything here. 00577 static void minimal_initialize(T* /*trg*/, int /*n*/) {} 00578 00579 //! For trivial types, we don't have to do anything here. 00580 static void minimal_initialize(T* /*trg*/, int /*n*/, const MemoryPolicy /*mp*/, const int /*dev*/) {} 00581 00582 //! For trivial types, we just memset() all the memory to zero. 00583 static inline void zero_initialize(T* trg, int n) 00584 { 00585 memset(trg, 0, n * sizeof(T)); 00586 } 00587 00588 static void zero_initialize(T* trg, int n, const MemoryPolicy mp, const int device) 00589 { 00590 #ifdef INVT_USE_CUDA 00591 switch(mp) 00592 { 00593 case GLOBAL_DEVICE_MEMORY: 00594 case TEXTURE_DEVICE_MEMORY: 00595 CudaDevices::memset(trg,0,n*sizeof(T),device); 00596 break; 00597 case HOST_MEMORY: 00598 zero_initialize(trg, n); 00599 break; 00600 } 00601 #else 00602 // Optimization, just assume the memory policy 00603 zero_initialize(trg, n); 00604 #endif 00605 } 00606 00607 //! For trivial types, we just memcpy() into the destination. 00608 static inline void copy_initialize(T* space, int n, const T* src) 00609 { 00610 memcpy(space, src, n * sizeof(T)); 00611 } 00612 00613 static void copy_initialize(T* trg, int n, const T*src, const MemoryPolicy trgmp, const int trgdev, 00614 const MemoryPolicy srcmp, const int srcdev) 00615 { 00616 #ifdef INVT_USE_CUDA 00617 switch(srcmp) 00618 { 00619 case GLOBAL_DEVICE_MEMORY: 00620 case TEXTURE_DEVICE_MEMORY: 00621 switch(trgmp) 00622 { 00623 case GLOBAL_DEVICE_MEMORY: 00624 case TEXTURE_DEVICE_MEMORY: 00625 CudaDevices::memcpyDeviceToDevice(trg,src,n*sizeof(T),trgdev,srcdev); 00626 break; 00627 case HOST_MEMORY: 00628 CudaDevices::memcpyDeviceToHost(trg,src,n*sizeof(T),srcdev); 00629 break; 00630 } 00631 break; 00632 case HOST_MEMORY: 00633 switch(trgmp) 00634 { 00635 case GLOBAL_DEVICE_MEMORY: 00636 case TEXTURE_DEVICE_MEMORY: 00637 CudaDevices::memcpyHostToDevice(trg,src,n*sizeof(T),trgdev); 00638 break; 00639 case HOST_MEMORY: 00640 copy_initialize(trg, n, src); 00641 break; 00642 } 00643 break; 00644 } 00645 #else 00646 // Optimization, just assume the memory policy 00647 copy_initialize(trg, n, src); 00648 #endif 00649 } 00650 00651 //! For trivial types, no destruction is needed. 00652 static void destruct(T* /*space*/, int /*n*/) {} 00653 00654 //! For trivial types, no destruction is needed. 00655 static void destruct(T* /*space*/, int /*n*/, const MemoryPolicy /*mp*/, const int /*dev*/) {} 00656 00657 }; 00658 00659 // ###################################################################### 00660 template <class T> inline 00661 ArrayData<T>::ArrayData() : 00662 itsRefCount(), 00663 itsMemoryPolicy(HOST_MEMORY), 00664 itsStoragePolicy(MAKE_OWN_COPY), 00665 itsDevice(CUDA_HOST_DEVICE_NUM), 00666 itsDims(0, 0), 00667 itsData(static_cast<T*>(wrap_invt_allocate(1,itsMemoryPolicy,itsDevice))) // just ask for one byte 00668 { 00669 itsRefCount.atomic_set(0); 00670 } 00671 00672 // ###################################################################### 00673 template <class T> inline 00674 ArrayData<T>::ArrayData(const Dims& d, const InitPolicy ip) : 00675 itsRefCount(), 00676 itsMemoryPolicy(HOST_MEMORY), 00677 itsStoragePolicy(MAKE_OWN_COPY), 00678 itsDevice(CUDA_HOST_DEVICE_NUM), 00679 itsDims(d), 00680 itsData(static_cast<T*>(wrap_invt_allocate(itsDims.sz() * sizeof(T),itsMemoryPolicy,itsDevice))) 00681 { 00682 if (ip == ZEROS) 00683 ArrayHelper<T>::zero_initialize(itsData, itsDims.sz(), itsMemoryPolicy, itsDevice); 00684 else 00685 ArrayHelper<T>::minimal_initialize(itsData, itsDims.sz(), itsMemoryPolicy, itsDevice); 00686 itsRefCount.atomic_set(0); 00687 } 00688 00689 00690 // ###################################################################### 00691 template <class T> inline 00692 ArrayData<T>::ArrayData(const Dims& d, const InitPolicy ip, const MemoryPolicy mp, const int device) : 00693 itsRefCount(), 00694 itsMemoryPolicy(mp), 00695 itsStoragePolicy(MAKE_OWN_COPY), 00696 itsDevice(device), 00697 itsDims(d), 00698 itsData(static_cast<T*>(wrap_invt_allocate(itsDims.sz() * sizeof(T),itsMemoryPolicy,itsDevice))) 00699 { 00700 if (ip == ZEROS) 00701 ArrayHelper<T>::zero_initialize(itsData, itsDims.sz(), itsMemoryPolicy, itsDevice); 00702 else 00703 ArrayHelper<T>::minimal_initialize(itsData, itsDims.sz(), itsMemoryPolicy, itsDevice); 00704 itsRefCount.atomic_set(0); 00705 } 00706 00707 // ###################################################################### 00708 template <class T> inline 00709 ArrayData<T>::ArrayData(const Dims& d, const T* aa) : 00710 itsRefCount(), 00711 itsMemoryPolicy(HOST_MEMORY), 00712 itsStoragePolicy(MAKE_OWN_COPY), 00713 itsDevice(CUDA_HOST_DEVICE_NUM), 00714 itsDims(d), 00715 itsData(static_cast<T*>(wrap_invt_allocate(itsDims.sz() * sizeof(T),itsMemoryPolicy,itsDevice))) 00716 { 00717 ArrayHelper<T>::copy_initialize(itsData, itsDims.sz(), aa); 00718 00719 itsRefCount.atomic_set(0); 00720 } 00721 00722 // ###################################################################### 00723 template <class T> inline 00724 ArrayData<T>::ArrayData(const Dims& d, const T* aa, const MemoryPolicy mp, const int device, const MemoryPolicy srcmp, const int srcdevice) : 00725 itsRefCount(), 00726 itsMemoryPolicy(mp), 00727 itsStoragePolicy(MAKE_OWN_COPY), 00728 itsDevice(device), 00729 itsDims(d), 00730 itsData(static_cast<T*>(wrap_invt_allocate(itsDims.sz() * sizeof(T),itsMemoryPolicy,itsDevice))) 00731 { 00732 ArrayHelper<T>::copy_initialize(itsData, itsDims.sz(), aa, itsMemoryPolicy, itsDevice, srcmp, srcdevice); 00733 00734 itsRefCount.atomic_set(0); 00735 } 00736 00737 // ###################################################################### 00738 template <class T> inline 00739 ArrayData<T>::ArrayData(const Dims& d, T* aa, const StoragePolicy s) : 00740 itsRefCount(), 00741 itsMemoryPolicy(HOST_MEMORY), 00742 itsStoragePolicy(s), 00743 itsDevice(CUDA_HOST_DEVICE_NUM), 00744 itsDims(d), 00745 itsData(s == MAKE_OWN_COPY 00746 ? static_cast<T*>(wrap_invt_allocate(itsDims.sz() * sizeof(T),itsMemoryPolicy,itsDevice)) 00747 : aa) 00748 { 00749 if (s == MAKE_OWN_COPY) 00750 { 00751 if (itsDims.sz() != 0 && aa != 0) 00752 ArrayHelper<T>::copy_initialize(itsData, itsDims.sz(), aa); 00753 else 00754 ArrayHelper<T>::minimal_initialize(itsData, itsDims.sz()); 00755 } 00756 00757 itsRefCount.atomic_set(0); 00758 } 00759 00760 00761 // ###################################################################### 00762 template <class T> inline 00763 ArrayData<T>::ArrayData(const Dims& d, T* aa, const StoragePolicy s, const MemoryPolicy mp, const int device, const MemoryPolicy srcmp, const int srcdevice) : 00764 itsRefCount(), 00765 itsMemoryPolicy(mp), 00766 itsStoragePolicy(s), 00767 itsDevice(device), 00768 itsDims(d), 00769 itsData((s == MAKE_OWN_COPY) 00770 ? static_cast<T*>(wrap_invt_allocate(itsDims.sz() * sizeof(T),itsMemoryPolicy,itsDevice)) 00771 : aa) 00772 { 00773 if (s == MAKE_OWN_COPY) 00774 { 00775 if (itsDims.sz() != 0 && aa != 0) 00776 ArrayHelper<T>::copy_initialize(itsData, itsDims.sz(), aa, itsMemoryPolicy, itsDevice, srcmp, srcdevice); 00777 else 00778 ArrayHelper<T>::minimal_initialize(itsData, itsDims.sz(), itsMemoryPolicy, itsDevice); 00779 } 00780 else 00781 { 00782 if(itsMemoryPolicy!=srcmp || itsDevice!=srcdevice) 00783 LFATAL("WriteThru for ArrayData must be done using the same memory policy on the same device"); 00784 } 00785 00786 itsRefCount.atomic_set(0); 00787 } 00788 00789 // ###################################################################### 00790 template <class T> inline 00791 ArrayData<T>* ArrayData<T>::clone() const 00792 { 00793 // When the new memory policy is not specified, we should either default to keeping it 00794 // in the same location, or default to the host. Currently, all memcpy's have to go 00795 // from dev->host->dev even if it is to/from the same device so it seems like 00796 // defaulting to the host makes sense for now 00797 return new ArrayData(itsDims, itsData, HOST_MEMORY, CUDA_HOST_DEVICE_NUM, itsMemoryPolicy, itsDevice); 00798 } 00799 00800 00801 // ###################################################################### 00802 template <class T> inline 00803 ArrayData<T>* ArrayData<T>::clone(const MemoryPolicy mp, const int dev) const 00804 { 00805 return new ArrayData(itsDims, itsData, mp, dev, itsMemoryPolicy, itsDevice); 00806 } 00807 00808 // ###################################################################### 00809 template <class T> inline 00810 ArrayData<T>::~ArrayData() throw() 00811 { 00812 if (itsStoragePolicy == MAKE_OWN_COPY) 00813 { 00814 ArrayHelper<T>::destruct(itsData, itsDims.sz()); 00815 wrap_invt_deallocate(itsData,itsMemoryPolicy,itsDevice,itsDims.sz()*sizeof(T)); 00816 } 00817 } 00818 00819 // ###################################################################### 00820 template <class T> inline 00821 const T* ArrayData<T>::data() const throw() 00822 { return itsData; } 00823 00824 // ###################################################################### 00825 template <class T> inline 00826 const T* ArrayData<T>::end() const throw() 00827 { return itsData + itsDims.sz(); } 00828 00829 // ###################################################################### 00830 template <class T> inline 00831 T* ArrayData<T>::dataw() throw() 00832 { return itsData; } 00833 00834 // ###################################################################### 00835 template <class T> inline 00836 T* ArrayData<T>::endw() throw() 00837 { return itsData + itsDims.sz(); } 00838 00839 // ###################################################################### 00840 template <class T> inline 00841 const Dims& ArrayData<T>::dims() const throw() 00842 { return itsDims; } 00843 00844 // ###################################################################### 00845 template <class T> inline 00846 int ArrayData<T>::w() const throw() 00847 { return itsDims.w(); } 00848 00849 // ###################################################################### 00850 template <class T> inline 00851 int ArrayData<T>::h() const throw() 00852 { return itsDims.h(); } 00853 00854 // ###################################################################### 00855 template <class T> inline 00856 void ArrayData<T>::acquire() throw() 00857 { 00858 itsRefCount.atomic_incr(); 00859 00860 check_acquisition(itsStoragePolicy, itsRefCount.atomic_get()); 00861 } 00862 00863 // ###################################################################### 00864 template <class T> inline 00865 void ArrayData<T>::release() throw() 00866 { 00867 if ( itsRefCount.atomic_decr_test_zero() ) delete this; 00868 } 00869 00870 // ###################################################################### 00871 template <class T> inline 00872 bool ArrayData<T>::isShared() const throw() 00873 { 00874 return (itsRefCount.atomic_get() > 1); 00875 } 00876 00877 // ###################################################################### 00878 template <class T> inline 00879 int ArrayData<T>::refCount() const throw() 00880 { 00881 return itsRefCount.atomic_get(); 00882 } 00883 00884 // ###################################################################### 00885 template <class T> inline 00886 MemoryPolicy ArrayData<T>::getMemoryPolicy() const 00887 { 00888 return itsMemoryPolicy; 00889 } 00890 00891 // ###################################################################### 00892 template <class T> inline 00893 int ArrayData<T>::getMemoryDevice() const 00894 { 00895 return itsDevice; 00896 } 00897 00898 00899 // ###################################################################### 00900 // ###################################################################### 00901 // INLINE FUNCTIONS for ArrayHandle: 00902 // ###################################################################### 00903 // ###################################################################### 00904 00905 // ###################################################################### 00906 template <class T> inline 00907 ArrayHandle<T>::ArrayHandle() : px(new ArrayData<T>) 00908 { 00909 px->acquire(); 00910 } 00911 00912 00913 // ###################################################################### 00914 template <class T> inline 00915 ArrayHandle<T>::ArrayHandle(const ArrayData<T>* p) throw() : 00916 // this cast is OK because we maintain const-correctness through 00917 // copy-on-write 00918 px(const_cast<ArrayData<T>*>(p)) 00919 { 00920 px->acquire(); 00921 } 00922 00923 // ###################################################################### 00924 template <class T> inline 00925 ArrayHandle<T>::ArrayHandle(const ArrayData<T>* p, const MemoryPolicy mp, const int dev) throw() : 00926 // this cast is OK because we maintain const-correctness through 00927 // copy-on-write 00928 px(const_cast<ArrayData<T>*>(p->clone(mp,dev))) 00929 { 00930 px->acquire(); 00931 } 00932 00933 00934 // ###################################################################### 00935 template <class T> inline 00936 ArrayHandle<T>::ArrayHandle(const ArrayHandle& r) throw() : 00937 px(r.px) 00938 { 00939 px->acquire(); 00940 } 00941 00942 // ###################################################################### 00943 template <class T> inline 00944 ArrayHandle<T>::~ArrayHandle() throw() 00945 { 00946 px->release(); 00947 } 00948 00949 // ###################################################################### 00950 template <class T> inline 00951 void ArrayHandle<T>::swap(ArrayHandle<T>& other) throw() 00952 { 00953 ArrayData<T>* other_px = other.px; 00954 other.px = this->px; 00955 this->px = other_px; 00956 } 00957 00958 // ###################################################################### 00959 template <class T> inline 00960 ArrayHandle<T>& ArrayHandle<T>::operator=(const ArrayHandle& r) throw() 00961 { 00962 ArrayHandle tmp(r); this->swap(tmp); return *this; 00963 } 00964 00965 // ###################################################################### 00966 template <class T> inline 00967 const ArrayData<T>& ArrayHandle<T>::get() const throw() 00968 { return *px; } 00969 00970 // ###################################################################### 00971 template <class T> inline 00972 ArrayData<T>& ArrayHandle<T>::uniq(const MemoryPolicy mp, const int dev) 00973 { 00974 if (px->isShared()) 00975 { 00976 ArrayHandle copy(px->clone(mp,dev)); 00977 this->swap(copy); 00978 } 00979 00980 return *px; 00981 } 00982 00983 // ###################################################################### 00984 template <class T> inline 00985 ArrayData<T>& ArrayHandle<T>::uniq() 00986 { 00987 if (px->isShared()) 00988 { 00989 ArrayHandle copy(px->clone()); 00990 this->swap(copy); 00991 } 00992 00993 return *px; 00994 } 00995 00996 // ###################################################################### 00997 template <class T> inline 00998 bool ArrayHandle<T>::isShared() const throw() 00999 { return px->isShared(); } 01000 01001 // ###################################################################### 01002 template <class T> inline 01003 bool ArrayHandle<T>::hasSameData(const ArrayHandle<T>& b) const throw() 01004 { return px==b.px; } 01005 01006 // ###################################################################### 01007 template <class T> inline 01008 int ArrayHandle<T>::refCount() const throw() 01009 { return px->refCount(); } 01010 01011 // ###################################################################### 01012 /* So things look consistent in everyone's emacs... */ 01013 /* Local Variables: */ 01014 /* indent-tabs-mode: nil */ 01015 /* End: */ 01016 01017 #endif // !ARRAY_IMPL_H_DEFINED