00001 /*!@file Util/AllocAux.C memory allocation routines for 16-byte alignment */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Util/AllocAux.C $ 00035 // $Id: AllocAux.C 9602 2008-04-10 00:16:46Z rjpeters $ 00036 // 00037 00038 #ifndef UTIL_ALLOCAUX_C_DEFINED 00039 #define UTIL_ALLOCAUX_C_DEFINED 00040 00041 #include "Util/AllocAux.H" 00042 00043 #include "Util/Assert.H" 00044 #include "Util/log.H" 00045 #include "Util/sformat.H" 00046 #include "rutz/freelist.h" 00047 #include "rutz/mutex.h" 00048 #include "rutz/trace.h" 00049 00050 #include <map> 00051 #include <pthread.h> 00052 00053 namespace 00054 { 00055 //! Trivial allocator that just calls operator new() and operator delete() 00056 struct trivial_alloc 00057 { 00058 void set_debug(bool /*do_debug*/) 00059 { 00060 // no debug settings for this allocator type 00061 } 00062 00063 void set_allow_caching(bool /*on*/) 00064 { 00065 // no caching here in any case 00066 } 00067 00068 void show_stats(int /*verbosity*/, const char* /*pfx*/, 00069 const size_t block_size, const size_t overhead) const 00070 { 00071 // nothing to do here 00072 } 00073 00074 void* allocate(size_t nbytes, rutz::free_list_base** source = 0) 00075 { 00076 if (source != 0) 00077 *source = 0; 00078 return ::operator new(nbytes); 00079 } 00080 00081 void deallocate(void* space, rutz::free_list_base* source = 0) 00082 { 00083 ASSERT(source == 0); 00084 ::operator delete(space); 00085 } 00086 00087 void release_free_mem() 00088 { 00089 // nothing to do here 00090 } 00091 }; 00092 00093 //! Caching allocator with free lists for common allocation sizes 00094 template <size_t cache_size> 00095 struct fastcache_alloc 00096 { 00097 rutz::free_list_base* cache[cache_size]; 00098 mutable size_t num_alloc[cache_size]; 00099 bool allow_caching; 00100 00101 fastcache_alloc() 00102 : 00103 allow_caching(true) 00104 { 00105 for (size_t i = 0; i < cache_size; ++i) 00106 { 00107 this->cache[i] = 0; 00108 this->num_alloc[i] = 0; 00109 } 00110 } 00111 00112 void set_debug(bool /*do_debug*/) 00113 { 00114 // no debug settings for this allocator type 00115 } 00116 00117 void set_allow_caching(bool on) 00118 { 00119 if (!on && this->allow_caching) 00120 { 00121 // if we are currently caching but are being asked to turn 00122 // off caching, then let's first free any existing caches 00123 00124 this->release_free_mem(); 00125 } 00126 this->allow_caching = on; 00127 } 00128 00129 void show_stats(int verbosity, const char* pfx, 00130 const size_t block_size, const size_t overhead) const 00131 { 00132 size_t nused = 0; 00133 size_t bytes_allocated = 0; 00134 00135 std::map<size_t, std::string> msgs; 00136 00137 for (size_t i = 0; i < cache_size; ++i) 00138 if (this->cache[i] != 0) 00139 { 00140 ++nused; 00141 const size_t nb = (this->cache[i]->num_allocations() 00142 * this->cache[i]->alloc_size()); 00143 00144 const size_t extra = (this->cache[i]->num_allocations() 00145 - this->num_alloc[i]); 00146 00147 this->num_alloc[i] = this->cache[i]->num_allocations(); 00148 00149 bytes_allocated += nb; 00150 00151 if (verbosity <= 0) 00152 continue; 00153 00154 std::string msg = 00155 sformat("%s%sfastcache[%02"ZU"/%02"ZU"]: " 00156 "%10.4fMB in %4"ZU" allocations of %10.4fkB", 00157 pfx ? pfx : "", pfx ? ": " : "", 00158 i, cache_size, nb / (1024.0*1024.0), 00159 this->cache[i]->num_allocations(), 00160 this->cache[i]->alloc_size() / 1024.0); 00161 00162 if (block_size > 0) 00163 { 00164 if (this->cache[i]->alloc_size() - overhead >= block_size 00165 || this->cache[i]->alloc_size() - overhead <= 1) 00166 msg += sformat(" (%.2fkB * %7.1f + %"ZU"B)", 00167 block_size / 1024.0, 00168 (double(this->cache[i]->alloc_size() - overhead) 00169 / double(block_size)), 00170 overhead); 00171 else 00172 msg += sformat(" (%.2fkB / %7.1f + %"ZU"B)", 00173 block_size / 1024.0, 00174 (double(block_size) 00175 / double(this->cache[i]->alloc_size() - overhead)), 00176 overhead); 00177 } 00178 00179 if (extra > 0) 00180 msg += sformat(" (+%"ZU" new)", extra); 00181 00182 msgs[this->cache[i]->alloc_size()] = msg; 00183 } 00184 00185 for (std::map<size_t, std::string>::const_iterator 00186 itr = msgs.begin(), stop = msgs.end(); 00187 itr != stop; ++itr) 00188 LINFO("%s", (*itr).second.c_str()); 00189 00190 00191 std::string msg = 00192 sformat("%s%sfastcache_alloc<%"ZU">: %"ZU"/%"ZU" cache table " 00193 "entries in use, %fMB total allocated", 00194 pfx ? pfx : "", pfx ? ": " : "", 00195 cache_size, nused, cache_size, 00196 bytes_allocated / (1024.0*1024.0)); 00197 00198 if (block_size > 0) 00199 msg += sformat(" (%.2fkB * %7.1f)", 00200 block_size / 1024.0, 00201 double(bytes_allocated) / double(block_size)); 00202 00203 LINFO("%s", msg.c_str()); 00204 } 00205 00206 // allocate memory block of size nbytes; also return the address 00207 // of the rutz::free_list_base, if any, that was used for 00208 // allocation 00209 void* allocate(size_t nbytes, rutz::free_list_base** source) 00210 { 00211 if (this->allow_caching && source != 0) 00212 for (size_t i = 0; i < cache_size; ++i) 00213 { 00214 if (this->cache[i] != 0) 00215 { 00216 // we found a filled slot, let's see if it matches our 00217 // requested size 00218 if (this->cache[i]->alloc_size() == nbytes) 00219 { 00220 *source = this->cache[i]; 00221 return this->cache[i]->allocate(nbytes); 00222 } 00223 // else, continue 00224 } 00225 else // this->cache[i] == 0 00226 { 00227 // we found an empty slot, let's set up a new free 00228 // list for our requested size: 00229 this->cache[i] = new rutz::free_list_base(nbytes); 00230 *source = this->cache[i]; 00231 return this->cache[i]->allocate(nbytes); 00232 } 00233 } 00234 00235 *source = 0; 00236 return ::operator new(nbytes); 00237 } 00238 00239 // deallocate memory from the given rutz::free_list_base, 00240 // otherwise free it globally 00241 void deallocate(void* space, rutz::free_list_base* source) 00242 { 00243 if (source != 0) 00244 { 00245 source->deallocate(space); 00246 if (!this->allow_caching) 00247 source->release_free_nodes(); 00248 } 00249 else 00250 { 00251 ::operator delete(space); 00252 } 00253 } 00254 00255 void release_free_mem() 00256 { 00257 for (size_t i = 0; i < cache_size; ++i) 00258 if (this->cache[i] != 0) 00259 this->cache[i]->release_free_nodes(); 00260 } 00261 }; 00262 00263 //! Auxiliary information about an aligned memory allocation 00264 template <size_t N> 00265 struct alloc_info 00266 { 00267 struct data_s 00268 { 00269 // address of ALLOCATED memory, not the address returned to 00270 // the user: 00271 void* alloc_addr; 00272 00273 // number of bytes ALLOCATED, not the number requested by the 00274 // user: 00275 size_t alloc_nbytes; 00276 00277 // the freelist from which this memory was allocated, or else 00278 // null: 00279 rutz::free_list_base* source; 00280 00281 size_t user_nbytes() const 00282 { 00283 return alloc_nbytes - 2*N; 00284 } 00285 00286 unsigned long align() const 00287 { 00288 return reinterpret_cast<unsigned long>(this->alloc_addr) % N; 00289 } 00290 00291 unsigned long adjust() const 00292 { 00293 return N-this->align(); 00294 } 00295 00296 void* user_addr() const 00297 { 00298 return 00299 static_cast<char*>(this->alloc_addr) 00300 +this->adjust() 00301 +N; 00302 } 00303 00304 unsigned long user_align() const 00305 { 00306 return reinterpret_cast<unsigned long>(this->user_addr()) % N; 00307 } 00308 00309 void print() const 00310 { 00311 LINFO("alloc: internal=[%"ZU" bytes @ %p (align%%%zu=%lu)], " 00312 "user=[%"ZU" bytes @ %p (align%%%"ZU"=%lu)]", 00313 this->alloc_nbytes, this->alloc_addr, N, this->align(), 00314 this->user_nbytes(), this->user_addr(), N, this->user_align()); 00315 } 00316 00317 }; 00318 data_s data; 00319 char pad[N-sizeof(data_s)]; 00320 }; 00321 00322 //! Allocate memory that is aligned on an N-byte boundary 00323 template <class src_type, size_t N> 00324 struct aligned_alloc 00325 { 00326 src_type src_alloc; 00327 00328 bool do_debug_printing; 00329 double nbytes_allocated; 00330 size_t nallocations; 00331 size_t nbytes_current; 00332 size_t nallocations_current; 00333 00334 struct assertions 00335 { 00336 // this dummy array is here as a compile-time assertion about 00337 // sizeof(alloc_info<N>) -- if it doesn't match our 00338 // expectations, then dummy_array gets a negative array size, 00339 // which triggers a compile error: 00340 char assert_sizeof_alloc_info_must_be_N[(sizeof(alloc_info<N>) 00341 == N) ? 1 : -1]; 00342 }; 00343 00344 aligned_alloc() 00345 : 00346 do_debug_printing(false), 00347 nbytes_allocated(0.0), 00348 nallocations(0), 00349 nbytes_current(0), 00350 nallocations_current(0) 00351 {} 00352 00353 void set_debug(bool do_debug) 00354 { 00355 this->do_debug_printing = do_debug; 00356 00357 // also call set_debug() on our child object 00358 src_alloc.set_debug(do_debug); 00359 } 00360 00361 void set_allow_caching(bool on) 00362 { 00363 src_alloc.set_allow_caching(on); 00364 } 00365 00366 void show_stats(int verbosity, const char* pfx, 00367 const size_t block_size, const size_t overhead) const 00368 { 00369 LINFO("%s%saligned_alloc<%"ZU">: " 00370 "all-time: [%fMB in %"ZU" allocations], " 00371 "current: [%fMB in %"ZU" allocations]", 00372 pfx ? pfx : "", pfx ? ": " : "", 00373 N, 00374 this->nbytes_allocated/(1024.0*1024.0), this->nallocations, 00375 this->nbytes_current/(1024.0*1024.0), this->nallocations_current); 00376 00377 // also show stats for our child object 00378 src_alloc.show_stats(verbosity, pfx, block_size, overhead+2*N); 00379 } 00380 00381 void* allocate(size_t user_nbytes) 00382 { 00383 GVX_TRACE(__PRETTY_FUNCTION__); 00384 00385 alloc_info<N> info; 00386 info.data.source = 0; 00387 00388 // We request extra space beyond what the user wants -- N extra 00389 // bytes allow us to return an address to the user that is 00390 // aligned to a N-byte boundary, and sizeof(alloc_info<N>) extra 00391 // bytes allow us to stick some auxiliary info just ahead of the 00392 // address that we return to the user. Note that for convenience 00393 // we have set things up so that sizeof(alloc_info<N>)==N, so 00394 // the number of extra bytes that we need is just 2*N. 00395 info.data.alloc_nbytes = user_nbytes+2*N; 00396 00397 info.data.alloc_addr = 00398 this->src_alloc.allocate(info.data.alloc_nbytes, &info.data.source); 00399 00400 /* We will set things up like this: 00401 00402 +- address = return value of src_alloc.allocate() 00403 | 00404 | +- address = return value 00405 | | of this function 00406 v v 00407 00408 +------------------+-------------------+------------------------+ 00409 | len==adjust | len==N | len==user_nbytes | 00410 | contents: | contents: | contents: | 00411 | unused | alloc_info<N> | empty space for user | 00412 | alignment: | alignment: | alignment: | 00413 | unknown | N-byte boundary | N-byte boundary | 00414 +------------------+-------------------+------------------------+ 00415 00416 */ 00417 00418 void* const user_addr = info.data.user_addr(); 00419 00420 static_cast<alloc_info<N>*>(user_addr)[-1].data = info.data; 00421 00422 this->nbytes_allocated += info.data.alloc_nbytes; 00423 ++this->nallocations; 00424 this->nbytes_current += info.data.alloc_nbytes; 00425 ++this->nallocations_current; 00426 00427 if (this->do_debug_printing) 00428 { 00429 info.data.print(); 00430 this->show_stats(0, 0, 0, 0); 00431 } 00432 00433 return user_addr; 00434 } 00435 00436 void deallocate(void* user_addr) 00437 { 00438 GVX_TRACE(__PRETTY_FUNCTION__); 00439 00440 const alloc_info<N>* const info = 00441 static_cast<alloc_info<N>*>(user_addr) - 1; 00442 00443 this->nbytes_current -= info->data.alloc_nbytes; 00444 --this->nallocations_current; 00445 00446 if (this->do_debug_printing) 00447 { 00448 info->data.print(); 00449 this->show_stats(0, 0, 0, 0); 00450 } 00451 00452 this->src_alloc.deallocate(info->data.alloc_addr, info->data.source); 00453 } 00454 00455 void release_free_mem() 00456 { 00457 this->src_alloc.release_free_mem(); 00458 } 00459 }; 00460 00461 /* Here are the various macros that you can twiddle if you need to 00462 change the allocation strategy. Basically you can have aligned 00463 allocation (DO_ALIGN) at an arbitrary N-byte boundary (NALIGN), 00464 with optional freelist caching (DO_FASTCACHE) of (NCACHE) 00465 commonly-requested memory sizes. 00466 00467 If you turn off both DO_ALIGN and DO_FASTCACHE, you will end up 00468 using trivial_alloc, which is just a bare wrapper around operator 00469 new() and operator delete(). By default, malloc() returns 8-byte 00470 aligned memory on gnu/linux/x86 machines. 00471 00472 Note that certain libraries (fftw [see FourierEngine] in 00473 particular) require greater than 8-byte alignment, so if you are 00474 going to be using those parts of the code, then you'll need to 00475 leave DO_ALIGN set, with NALIGN>=16. Also note that NALIGN must 00476 be at least 4*sizeof(void*) -- in particular, 16 will be too 00477 small on 64-bit systems for which sizeof(void*) is 8; for those 00478 systems we'll need NALIGN>=32. 00479 00480 DO_FASTCACHE is here primarily for performance; since our memory 00481 usage pattern tends to involve many many allocations of Image 00482 objects with only a few different Dims shapes, it helps to cache 00483 those memory allocations in a freelist. Profiling tests showed 00484 that this can give a 15-20% speedup. 00485 */ 00486 00487 #define DO_ALIGN 00488 #define DO_FASTCACHE 00489 #define NALIGN ( (4*sizeof(void*)) > 16 ? (4*sizeof(void*)) : 16 ) 00490 #define NCACHE 64 00491 00492 #ifdef DO_ALIGN 00493 # ifdef DO_FASTCACHE 00494 typedef aligned_alloc<fastcache_alloc<NCACHE>, NALIGN> g_alloc_type; 00495 # else 00496 typedef aligned_alloc<trivial_alloc, NALIGN> g_alloc_type; 00497 # endif 00498 #else // !DO_ALIGN 00499 typedef trivial_alloc g_alloc_type; 00500 #endif 00501 00502 // Here is our global allocator object, whose type is determined by 00503 // the various macro settings abovve, and a corresponding mutex. For 00504 // now, we use a heavy-handed approach and just use the mutex to 00505 // lock the entire structure during each call to any of the public 00506 // functions. If this turns out to be a performance problem, we 00507 // could turn to finer-grained locking within the various allocator 00508 // classes themselves. 00509 g_alloc_type g_alloc; 00510 pthread_mutex_t g_alloc_mutex = PTHREAD_MUTEX_INITIALIZER; 00511 00512 size_t g_stats_units = 0; 00513 } 00514 00515 void* invt_allocate_aux(size_t user_nbytes) 00516 { 00517 GVX_MUTEX_LOCK(&g_alloc_mutex); 00518 return g_alloc.allocate(user_nbytes); 00519 } 00520 00521 void invt_deallocate_aux(void* mem) 00522 { 00523 GVX_MUTEX_LOCK(&g_alloc_mutex); 00524 g_alloc.deallocate(mem); 00525 } 00526 00527 void invt_allocation_release_free_mem() 00528 { 00529 GVX_MUTEX_LOCK(&g_alloc_mutex); 00530 g_alloc.release_free_mem(); 00531 } 00532 00533 void invt_allocation_allow_caching(bool on) 00534 { 00535 GVX_MUTEX_LOCK(&g_alloc_mutex); 00536 g_alloc.set_allow_caching(on); 00537 } 00538 00539 void invt_allocation_debug_print(bool do_debug) 00540 { 00541 GVX_MUTEX_LOCK(&g_alloc_mutex); 00542 g_alloc.set_debug(do_debug); 00543 } 00544 00545 void invt_allocation_show_stats(int verbosity, const char* pfx, 00546 const size_t block_size) 00547 { 00548 GVX_MUTEX_LOCK(&g_alloc_mutex); 00549 g_alloc.show_stats(verbosity, pfx, 00550 block_size ? block_size : g_stats_units, 0); 00551 } 00552 00553 void invt_allocation_set_stats_units(const size_t units) 00554 { 00555 g_stats_units = units; 00556 } 00557 00558 // ###################################################################### 00559 /* So things look consistent in everyone's emacs... */ 00560 /* Local Variables: */ 00561 /* indent-tabs-mode: nil */ 00562 /* End: */ 00563 00564 #endif // UTIL_ALLOCAUX_C_DEFINED