AllocAux.C

Go to the documentation of this file.
00001 /*!@file Util/AllocAux.C memory allocation routines for 16-byte alignment */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Rob Peters <rjpeters at usc dot edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/Util/AllocAux.C $
00035 // $Id: AllocAux.C 9602 2008-04-10 00:16:46Z rjpeters $
00036 //
00037 
00038 #ifndef UTIL_ALLOCAUX_C_DEFINED
00039 #define UTIL_ALLOCAUX_C_DEFINED
00040 
00041 #include "Util/AllocAux.H"
00042 
00043 #include "Util/Assert.H"
00044 #include "Util/log.H"
00045 #include "Util/sformat.H"
00046 #include "rutz/freelist.h"
00047 #include "rutz/mutex.h"
00048 #include "rutz/trace.h"
00049 
00050 #include <map>
00051 #include <pthread.h>
00052 
00053 namespace
00054 {
00055   //! Trivial allocator that just calls operator new() and operator delete()
00056   struct trivial_alloc
00057   {
00058     void set_debug(bool /*do_debug*/)
00059     {
00060       // no debug settings for this allocator type
00061     }
00062 
00063     void set_allow_caching(bool /*on*/)
00064     {
00065       // no caching here in any case
00066     }
00067 
00068     void show_stats(int /*verbosity*/, const char* /*pfx*/,
00069                     const size_t block_size, const size_t overhead) const
00070     {
00071       // nothing to do here
00072     }
00073 
00074     void* allocate(size_t nbytes, rutz::free_list_base** source = 0)
00075     {
00076       if (source != 0)
00077         *source = 0;
00078       return ::operator new(nbytes);
00079     }
00080 
00081     void deallocate(void* space, rutz::free_list_base* source = 0)
00082     {
00083       ASSERT(source == 0);
00084       ::operator delete(space);
00085     }
00086 
00087     void release_free_mem()
00088     {
00089       // nothing to do here
00090     }
00091   };
00092 
00093   //! Caching allocator with free lists for common allocation sizes
00094   template <size_t cache_size>
00095   struct fastcache_alloc
00096   {
00097     rutz::free_list_base* cache[cache_size];
00098     mutable size_t num_alloc[cache_size];
00099     bool allow_caching;
00100 
00101     fastcache_alloc()
00102       :
00103       allow_caching(true)
00104     {
00105       for (size_t i = 0; i < cache_size; ++i)
00106         {
00107           this->cache[i] = 0;
00108           this->num_alloc[i] = 0;
00109         }
00110     }
00111 
00112     void set_debug(bool /*do_debug*/)
00113     {
00114       // no debug settings for this allocator type
00115     }
00116 
00117     void set_allow_caching(bool on)
00118     {
00119       if (!on && this->allow_caching)
00120         {
00121           // if we are currently caching but are being asked to turn
00122           // off caching, then let's first free any existing caches
00123 
00124           this->release_free_mem();
00125         }
00126       this->allow_caching = on;
00127     }
00128 
00129     void show_stats(int verbosity, const char* pfx,
00130                     const size_t block_size, const size_t overhead) const
00131     {
00132       size_t nused = 0;
00133       size_t bytes_allocated = 0;
00134 
00135       std::map<size_t, std::string> msgs;
00136 
00137       for (size_t i = 0; i < cache_size; ++i)
00138         if (this->cache[i] != 0)
00139           {
00140             ++nused;
00141             const size_t nb = (this->cache[i]->num_allocations()
00142                                * this->cache[i]->alloc_size());
00143 
00144             const size_t extra = (this->cache[i]->num_allocations()
00145                                   - this->num_alloc[i]);
00146 
00147             this->num_alloc[i] = this->cache[i]->num_allocations();
00148 
00149             bytes_allocated += nb;
00150 
00151             if (verbosity <= 0)
00152               continue;
00153 
00154             std::string msg =
00155               sformat("%s%sfastcache[%02"ZU"/%02"ZU"]: "
00156                       "%10.4fMB in %4"ZU" allocations of %10.4fkB",
00157                       pfx ? pfx : "", pfx ? ": " : "",
00158                       i, cache_size, nb / (1024.0*1024.0),
00159                       this->cache[i]->num_allocations(),
00160                       this->cache[i]->alloc_size() / 1024.0);
00161 
00162             if (block_size > 0)
00163               {
00164                 if (this->cache[i]->alloc_size() - overhead >= block_size
00165                     || this->cache[i]->alloc_size() - overhead <= 1)
00166                   msg += sformat(" (%.2fkB * %7.1f + %"ZU"B)",
00167                                  block_size / 1024.0,
00168                                  (double(this->cache[i]->alloc_size() - overhead)
00169                                   / double(block_size)),
00170                                  overhead);
00171                 else
00172                   msg += sformat(" (%.2fkB / %7.1f + %"ZU"B)",
00173                                  block_size / 1024.0,
00174                                  (double(block_size)
00175                                   / double(this->cache[i]->alloc_size() - overhead)),
00176                                  overhead);
00177               }
00178 
00179             if (extra > 0)
00180               msg += sformat(" (+%"ZU" new)", extra);
00181 
00182             msgs[this->cache[i]->alloc_size()] = msg;
00183           }
00184 
00185       for (std::map<size_t, std::string>::const_iterator
00186              itr = msgs.begin(), stop = msgs.end();
00187            itr != stop; ++itr)
00188         LINFO("%s", (*itr).second.c_str());
00189 
00190 
00191       std::string msg =
00192         sformat("%s%sfastcache_alloc<%"ZU">: %"ZU"/%"ZU" cache table "
00193                 "entries in use, %fMB total allocated",
00194                 pfx ? pfx : "", pfx ? ": " : "",
00195                 cache_size, nused, cache_size,
00196                 bytes_allocated / (1024.0*1024.0));
00197 
00198       if (block_size > 0)
00199         msg += sformat(" (%.2fkB * %7.1f)",
00200                        block_size / 1024.0,
00201                        double(bytes_allocated) / double(block_size));
00202 
00203       LINFO("%s", msg.c_str());
00204     }
00205 
00206     // allocate memory block of size nbytes; also return the address
00207     // of the rutz::free_list_base, if any, that was used for
00208     // allocation
00209     void* allocate(size_t nbytes, rutz::free_list_base** source)
00210     {
00211       if (this->allow_caching && source != 0)
00212         for (size_t i = 0; i < cache_size; ++i)
00213           {
00214             if (this->cache[i] != 0)
00215               {
00216                 // we found a filled slot, let's see if it matches our
00217                 // requested size
00218                 if (this->cache[i]->alloc_size() == nbytes)
00219                   {
00220                     *source = this->cache[i];
00221                     return this->cache[i]->allocate(nbytes);
00222                   }
00223                 // else, continue
00224               }
00225             else // this->cache[i] == 0
00226               {
00227                 // we found an empty slot, let's set up a new free
00228                 // list for our requested size:
00229                 this->cache[i] = new rutz::free_list_base(nbytes);
00230                 *source = this->cache[i];
00231                 return this->cache[i]->allocate(nbytes);
00232               }
00233           }
00234 
00235       *source = 0;
00236       return ::operator new(nbytes);
00237     }
00238 
00239     // deallocate memory from the given rutz::free_list_base,
00240     // otherwise free it globally
00241     void deallocate(void* space, rutz::free_list_base* source)
00242     {
00243       if (source != 0)
00244         {
00245           source->deallocate(space);
00246           if (!this->allow_caching)
00247             source->release_free_nodes();
00248         }
00249       else
00250         {
00251           ::operator delete(space);
00252         }
00253     }
00254 
00255     void release_free_mem()
00256     {
00257       for (size_t i = 0; i < cache_size; ++i)
00258         if (this->cache[i] != 0)
00259           this->cache[i]->release_free_nodes();
00260     }
00261   };
00262 
00263   //! Auxiliary information about an aligned memory allocation
00264   template <size_t N>
00265   struct alloc_info
00266   {
00267     struct data_s
00268     {
00269       // address of ALLOCATED memory, not the address returned to
00270       // the user:
00271       void* alloc_addr;
00272 
00273       // number of bytes ALLOCATED, not the number requested by the
00274       // user:
00275       size_t alloc_nbytes;
00276 
00277       // the freelist from which this memory was allocated, or else
00278       // null:
00279       rutz::free_list_base* source;
00280 
00281       size_t user_nbytes() const
00282       {
00283         return alloc_nbytes - 2*N;
00284       }
00285 
00286       unsigned long align() const
00287       {
00288         return reinterpret_cast<unsigned long>(this->alloc_addr) % N;
00289       }
00290 
00291       unsigned long adjust() const
00292       {
00293         return N-this->align();
00294       }
00295 
00296       void* user_addr() const
00297       {
00298         return
00299           static_cast<char*>(this->alloc_addr)
00300           +this->adjust()
00301           +N;
00302       }
00303 
00304       unsigned long user_align() const
00305       {
00306         return reinterpret_cast<unsigned long>(this->user_addr()) % N;
00307       }
00308 
00309       void print() const
00310       {
00311         LINFO("alloc: internal=[%"ZU" bytes @ %p (align%%%zu=%lu)], "
00312               "user=[%"ZU" bytes @ %p (align%%%"ZU"=%lu)]",
00313               this->alloc_nbytes, this->alloc_addr, N, this->align(),
00314               this->user_nbytes(), this->user_addr(), N, this->user_align());
00315       }
00316 
00317     };
00318     data_s data;
00319     char pad[N-sizeof(data_s)];
00320   };
00321 
00322   //! Allocate memory that is aligned on an N-byte boundary
00323   template <class src_type, size_t N>
00324   struct aligned_alloc
00325   {
00326     src_type src_alloc;
00327 
00328     bool do_debug_printing;
00329     double nbytes_allocated;
00330     size_t nallocations;
00331     size_t nbytes_current;
00332     size_t nallocations_current;
00333 
00334     struct assertions
00335     {
00336       // this dummy array is here as a compile-time assertion about
00337       // sizeof(alloc_info<N>) -- if it doesn't match our
00338       // expectations, then dummy_array gets a negative array size,
00339       // which triggers a compile error:
00340       char assert_sizeof_alloc_info_must_be_N[(sizeof(alloc_info<N>)
00341                                                == N) ? 1 : -1];
00342     };
00343 
00344     aligned_alloc()
00345       :
00346       do_debug_printing(false),
00347       nbytes_allocated(0.0),
00348       nallocations(0),
00349       nbytes_current(0),
00350       nallocations_current(0)
00351     {}
00352 
00353     void set_debug(bool do_debug)
00354     {
00355       this->do_debug_printing = do_debug;
00356 
00357       // also call set_debug() on our child object
00358       src_alloc.set_debug(do_debug);
00359     }
00360 
00361     void set_allow_caching(bool on)
00362     {
00363       src_alloc.set_allow_caching(on);
00364     }
00365 
00366     void show_stats(int verbosity, const char* pfx,
00367                     const size_t block_size, const size_t overhead) const
00368     {
00369       LINFO("%s%saligned_alloc<%"ZU">: "
00370             "all-time: [%fMB in %"ZU" allocations], "
00371             "current: [%fMB in %"ZU" allocations]",
00372             pfx ? pfx : "", pfx ? ": " : "",
00373             N,
00374             this->nbytes_allocated/(1024.0*1024.0), this->nallocations,
00375             this->nbytes_current/(1024.0*1024.0), this->nallocations_current);
00376 
00377       // also show stats for our child object
00378       src_alloc.show_stats(verbosity, pfx, block_size, overhead+2*N);
00379     }
00380 
00381     void* allocate(size_t user_nbytes)
00382     {
00383     GVX_TRACE(__PRETTY_FUNCTION__);
00384 
00385       alloc_info<N> info;
00386       info.data.source = 0;
00387 
00388       // We request extra space beyond what the user wants -- N extra
00389       // bytes allow us to return an address to the user that is
00390       // aligned to a N-byte boundary, and sizeof(alloc_info<N>) extra
00391       // bytes allow us to stick some auxiliary info just ahead of the
00392       // address that we return to the user. Note that for convenience
00393       // we have set things up so that sizeof(alloc_info<N>)==N, so
00394       // the number of extra bytes that we need is just 2*N.
00395       info.data.alloc_nbytes = user_nbytes+2*N;
00396 
00397       info.data.alloc_addr =
00398         this->src_alloc.allocate(info.data.alloc_nbytes, &info.data.source);
00399 
00400       /* We will set things up like this:
00401 
00402       +- address = return value of src_alloc.allocate()
00403       |
00404       |                                      +- address = return value
00405       |                                      |     of this function
00406       v                                      v
00407 
00408       +------------------+-------------------+------------------------+
00409       | len==adjust      | len==N            | len==user_nbytes       |
00410       | contents:        | contents:         | contents:              |
00411       |   unused         |   alloc_info<N>   |   empty space for user |
00412       | alignment:       | alignment:        | alignment:             |
00413       |   unknown        |   N-byte boundary |   N-byte boundary      |
00414       +------------------+-------------------+------------------------+
00415 
00416       */
00417 
00418       void* const user_addr = info.data.user_addr();
00419 
00420       static_cast<alloc_info<N>*>(user_addr)[-1].data = info.data;
00421 
00422       this->nbytes_allocated += info.data.alloc_nbytes;
00423       ++this->nallocations;
00424       this->nbytes_current += info.data.alloc_nbytes;
00425       ++this->nallocations_current;
00426 
00427       if (this->do_debug_printing)
00428         {
00429           info.data.print();
00430           this->show_stats(0, 0, 0, 0);
00431         }
00432 
00433       return user_addr;
00434     }
00435 
00436     void deallocate(void* user_addr)
00437     {
00438     GVX_TRACE(__PRETTY_FUNCTION__);
00439 
00440       const alloc_info<N>* const info =
00441         static_cast<alloc_info<N>*>(user_addr) - 1;
00442 
00443       this->nbytes_current -= info->data.alloc_nbytes;
00444       --this->nallocations_current;
00445 
00446       if (this->do_debug_printing)
00447         {
00448           info->data.print();
00449           this->show_stats(0, 0, 0, 0);
00450         }
00451 
00452       this->src_alloc.deallocate(info->data.alloc_addr, info->data.source);
00453     }
00454 
00455     void release_free_mem()
00456     {
00457       this->src_alloc.release_free_mem();
00458     }
00459   };
00460 
00461   /* Here are the various macros that you can twiddle if you need to
00462      change the allocation strategy. Basically you can have aligned
00463      allocation (DO_ALIGN) at an arbitrary N-byte boundary (NALIGN),
00464      with optional freelist caching (DO_FASTCACHE) of (NCACHE)
00465      commonly-requested memory sizes.
00466 
00467      If you turn off both DO_ALIGN and DO_FASTCACHE, you will end up
00468      using trivial_alloc, which is just a bare wrapper around operator
00469      new() and operator delete(). By default, malloc() returns 8-byte
00470      aligned memory on gnu/linux/x86 machines.
00471 
00472      Note that certain libraries (fftw [see FourierEngine] in
00473      particular) require greater than 8-byte alignment, so if you are
00474      going to be using those parts of the code, then you'll need to
00475      leave DO_ALIGN set, with NALIGN>=16. Also note that NALIGN must
00476      be at least 4*sizeof(void*) -- in particular, 16 will be too
00477      small on 64-bit systems for which sizeof(void*) is 8; for those
00478      systems we'll need NALIGN>=32.
00479 
00480      DO_FASTCACHE is here primarily for performance; since our memory
00481      usage pattern tends to involve many many allocations of Image
00482      objects with only a few different Dims shapes, it helps to cache
00483      those memory allocations in a freelist. Profiling tests showed
00484      that this can give a 15-20% speedup.
00485   */
00486 
00487 #define DO_ALIGN
00488 #define DO_FASTCACHE
00489 #define NALIGN        ( (4*sizeof(void*)) > 16 ? (4*sizeof(void*)) : 16 )
00490 #define NCACHE        64
00491 
00492 #ifdef DO_ALIGN
00493 #  ifdef DO_FASTCACHE
00494   typedef aligned_alloc<fastcache_alloc<NCACHE>, NALIGN> g_alloc_type;
00495 #  else
00496   typedef aligned_alloc<trivial_alloc, NALIGN>           g_alloc_type;
00497 #  endif
00498 #else // !DO_ALIGN
00499   typedef trivial_alloc                                  g_alloc_type;
00500 #endif
00501 
00502   // Here is our global allocator object, whose type is determined by
00503   // the various macro settings abovve, and a corresponding mutex. For
00504   // now, we use a heavy-handed approach and just use the mutex to
00505   // lock the entire structure during each call to any of the public
00506   // functions. If this turns out to be a performance problem, we
00507   // could turn to finer-grained locking within the various allocator
00508   // classes themselves.
00509   g_alloc_type    g_alloc;
00510   pthread_mutex_t g_alloc_mutex = PTHREAD_MUTEX_INITIALIZER;
00511 
00512   size_t          g_stats_units = 0;
00513 }
00514 
00515 void* invt_allocate_aux(size_t user_nbytes)
00516 {
00517   GVX_MUTEX_LOCK(&g_alloc_mutex);
00518   return g_alloc.allocate(user_nbytes);
00519 }
00520 
00521 void invt_deallocate_aux(void* mem)
00522 {
00523   GVX_MUTEX_LOCK(&g_alloc_mutex);
00524   g_alloc.deallocate(mem);
00525 }
00526 
00527 void invt_allocation_release_free_mem()
00528 {
00529   GVX_MUTEX_LOCK(&g_alloc_mutex);
00530   g_alloc.release_free_mem();
00531 }
00532 
00533 void invt_allocation_allow_caching(bool on)
00534 {
00535   GVX_MUTEX_LOCK(&g_alloc_mutex);
00536   g_alloc.set_allow_caching(on);
00537 }
00538 
00539 void invt_allocation_debug_print(bool do_debug)
00540 {
00541   GVX_MUTEX_LOCK(&g_alloc_mutex);
00542   g_alloc.set_debug(do_debug);
00543 }
00544 
00545 void invt_allocation_show_stats(int verbosity, const char* pfx,
00546                                 const size_t block_size)
00547 {
00548   GVX_MUTEX_LOCK(&g_alloc_mutex);
00549   g_alloc.show_stats(verbosity, pfx,
00550                      block_size ? block_size : g_stats_units, 0);
00551 }
00552 
00553 void invt_allocation_set_stats_units(const size_t units)
00554 {
00555   g_stats_units = units;
00556 }
00557 
00558 // ######################################################################
00559 /* So things look consistent in everyone's emacs... */
00560 /* Local Variables: */
00561 /* indent-tabs-mode: nil */
00562 /* End: */
00563 
00564 #endif // UTIL_ALLOCAUX_C_DEFINED
Generated on Sun May 8 08:42:24 2011 for iLab Neuromorphic Vision Toolkit by  doxygen 1.6.3