00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #ifndef UTIL_ALLOCAUX_C_DEFINED
00039 #define UTIL_ALLOCAUX_C_DEFINED
00040
00041 #include "Util/AllocAux.H"
00042
00043 #include "Util/Assert.H"
00044 #include "Util/log.H"
00045 #include "Util/sformat.H"
00046 #include "rutz/freelist.h"
00047 #include "rutz/mutex.h"
00048 #include "rutz/trace.h"
00049
00050 #include <map>
00051 #include <pthread.h>
00052
00053 namespace
00054 {
00055
00056 struct trivial_alloc
00057 {
00058 void set_debug(bool )
00059 {
00060
00061 }
00062
00063 void set_allow_caching(bool )
00064 {
00065
00066 }
00067
00068 void show_stats(int , const char* ,
00069 const size_t block_size, const size_t overhead) const
00070 {
00071
00072 }
00073
00074 void* allocate(size_t nbytes, rutz::free_list_base** source = 0)
00075 {
00076 if (source != 0)
00077 *source = 0;
00078 return ::operator new(nbytes);
00079 }
00080
00081 void deallocate(void* space, rutz::free_list_base* source = 0)
00082 {
00083 ASSERT(source == 0);
00084 ::operator delete(space);
00085 }
00086
00087 void release_free_mem()
00088 {
00089
00090 }
00091 };
00092
00093
00094 template <size_t cache_size>
00095 struct fastcache_alloc
00096 {
00097 rutz::free_list_base* cache[cache_size];
00098 mutable size_t num_alloc[cache_size];
00099 bool allow_caching;
00100
00101 fastcache_alloc()
00102 :
00103 allow_caching(true)
00104 {
00105 for (size_t i = 0; i < cache_size; ++i)
00106 {
00107 this->cache[i] = 0;
00108 this->num_alloc[i] = 0;
00109 }
00110 }
00111
00112 void set_debug(bool )
00113 {
00114
00115 }
00116
00117 void set_allow_caching(bool on)
00118 {
00119 if (!on && this->allow_caching)
00120 {
00121
00122
00123
00124 this->release_free_mem();
00125 }
00126 this->allow_caching = on;
00127 }
00128
00129 void show_stats(int verbosity, const char* pfx,
00130 const size_t block_size, const size_t overhead) const
00131 {
00132 size_t nused = 0;
00133 size_t bytes_allocated = 0;
00134
00135 std::map<size_t, std::string> msgs;
00136
00137 for (size_t i = 0; i < cache_size; ++i)
00138 if (this->cache[i] != 0)
00139 {
00140 ++nused;
00141 const size_t nb = (this->cache[i]->num_allocations()
00142 * this->cache[i]->alloc_size());
00143
00144 const size_t extra = (this->cache[i]->num_allocations()
00145 - this->num_alloc[i]);
00146
00147 this->num_alloc[i] = this->cache[i]->num_allocations();
00148
00149 bytes_allocated += nb;
00150
00151 if (verbosity <= 0)
00152 continue;
00153
00154 std::string msg =
00155 sformat("%s%sfastcache[%02"ZU"/%02"ZU"]: "
00156 "%10.4fMB in %4"ZU" allocations of %10.4fkB",
00157 pfx ? pfx : "", pfx ? ": " : "",
00158 i, cache_size, nb / (1024.0*1024.0),
00159 this->cache[i]->num_allocations(),
00160 this->cache[i]->alloc_size() / 1024.0);
00161
00162 if (block_size > 0)
00163 {
00164 if (this->cache[i]->alloc_size() - overhead >= block_size
00165 || this->cache[i]->alloc_size() - overhead <= 1)
00166 msg += sformat(" (%.2fkB * %7.1f + %"ZU"B)",
00167 block_size / 1024.0,
00168 (double(this->cache[i]->alloc_size() - overhead)
00169 / double(block_size)),
00170 overhead);
00171 else
00172 msg += sformat(" (%.2fkB / %7.1f + %"ZU"B)",
00173 block_size / 1024.0,
00174 (double(block_size)
00175 / double(this->cache[i]->alloc_size() - overhead)),
00176 overhead);
00177 }
00178
00179 if (extra > 0)
00180 msg += sformat(" (+%"ZU" new)", extra);
00181
00182 msgs[this->cache[i]->alloc_size()] = msg;
00183 }
00184
00185 for (std::map<size_t, std::string>::const_iterator
00186 itr = msgs.begin(), stop = msgs.end();
00187 itr != stop; ++itr)
00188 LINFO("%s", (*itr).second.c_str());
00189
00190
00191 std::string msg =
00192 sformat("%s%sfastcache_alloc<%"ZU">: %"ZU"/%"ZU" cache table "
00193 "entries in use, %fMB total allocated",
00194 pfx ? pfx : "", pfx ? ": " : "",
00195 cache_size, nused, cache_size,
00196 bytes_allocated / (1024.0*1024.0));
00197
00198 if (block_size > 0)
00199 msg += sformat(" (%.2fkB * %7.1f)",
00200 block_size / 1024.0,
00201 double(bytes_allocated) / double(block_size));
00202
00203 LINFO("%s", msg.c_str());
00204 }
00205
00206
00207
00208
00209 void* allocate(size_t nbytes, rutz::free_list_base** source)
00210 {
00211 if (this->allow_caching && source != 0)
00212 for (size_t i = 0; i < cache_size; ++i)
00213 {
00214 if (this->cache[i] != 0)
00215 {
00216
00217
00218 if (this->cache[i]->alloc_size() == nbytes)
00219 {
00220 *source = this->cache[i];
00221 return this->cache[i]->allocate(nbytes);
00222 }
00223
00224 }
00225 else
00226 {
00227
00228
00229 this->cache[i] = new rutz::free_list_base(nbytes);
00230 *source = this->cache[i];
00231 return this->cache[i]->allocate(nbytes);
00232 }
00233 }
00234
00235 *source = 0;
00236 return ::operator new(nbytes);
00237 }
00238
00239
00240
00241 void deallocate(void* space, rutz::free_list_base* source)
00242 {
00243 if (source != 0)
00244 {
00245 source->deallocate(space);
00246 if (!this->allow_caching)
00247 source->release_free_nodes();
00248 }
00249 else
00250 {
00251 ::operator delete(space);
00252 }
00253 }
00254
00255 void release_free_mem()
00256 {
00257 for (size_t i = 0; i < cache_size; ++i)
00258 if (this->cache[i] != 0)
00259 this->cache[i]->release_free_nodes();
00260 }
00261 };
00262
00263
00264 template <size_t N>
00265 struct alloc_info
00266 {
00267 struct data_s
00268 {
00269
00270
00271 void* alloc_addr;
00272
00273
00274
00275 size_t alloc_nbytes;
00276
00277
00278
00279 rutz::free_list_base* source;
00280
00281 size_t user_nbytes() const
00282 {
00283 return alloc_nbytes - 2*N;
00284 }
00285
00286 unsigned long align() const
00287 {
00288 return reinterpret_cast<unsigned long>(this->alloc_addr) % N;
00289 }
00290
00291 unsigned long adjust() const
00292 {
00293 return N-this->align();
00294 }
00295
00296 void* user_addr() const
00297 {
00298 return
00299 static_cast<char*>(this->alloc_addr)
00300 +this->adjust()
00301 +N;
00302 }
00303
00304 unsigned long user_align() const
00305 {
00306 return reinterpret_cast<unsigned long>(this->user_addr()) % N;
00307 }
00308
00309 void print() const
00310 {
00311 LINFO("alloc: internal=[%"ZU" bytes @ %p (align%%%zu=%lu)], "
00312 "user=[%"ZU" bytes @ %p (align%%%"ZU"=%lu)]",
00313 this->alloc_nbytes, this->alloc_addr, N, this->align(),
00314 this->user_nbytes(), this->user_addr(), N, this->user_align());
00315 }
00316
00317 };
00318 data_s data;
00319 char pad[N-sizeof(data_s)];
00320 };
00321
00322
00323 template <class src_type, size_t N>
00324 struct aligned_alloc
00325 {
00326 src_type src_alloc;
00327
00328 bool do_debug_printing;
00329 double nbytes_allocated;
00330 size_t nallocations;
00331 size_t nbytes_current;
00332 size_t nallocations_current;
00333
00334 struct assertions
00335 {
00336
00337
00338
00339
00340 char assert_sizeof_alloc_info_must_be_N[(sizeof(alloc_info<N>)
00341 == N) ? 1 : -1];
00342 };
00343
00344 aligned_alloc()
00345 :
00346 do_debug_printing(false),
00347 nbytes_allocated(0.0),
00348 nallocations(0),
00349 nbytes_current(0),
00350 nallocations_current(0)
00351 {}
00352
00353 void set_debug(bool do_debug)
00354 {
00355 this->do_debug_printing = do_debug;
00356
00357
00358 src_alloc.set_debug(do_debug);
00359 }
00360
00361 void set_allow_caching(bool on)
00362 {
00363 src_alloc.set_allow_caching(on);
00364 }
00365
00366 void show_stats(int verbosity, const char* pfx,
00367 const size_t block_size, const size_t overhead) const
00368 {
00369 LINFO("%s%saligned_alloc<%"ZU">: "
00370 "all-time: [%fMB in %"ZU" allocations], "
00371 "current: [%fMB in %"ZU" allocations]",
00372 pfx ? pfx : "", pfx ? ": " : "",
00373 N,
00374 this->nbytes_allocated/(1024.0*1024.0), this->nallocations,
00375 this->nbytes_current/(1024.0*1024.0), this->nallocations_current);
00376
00377
00378 src_alloc.show_stats(verbosity, pfx, block_size, overhead+2*N);
00379 }
00380
00381 void* allocate(size_t user_nbytes)
00382 {
00383 GVX_TRACE(__PRETTY_FUNCTION__);
00384
00385 alloc_info<N> info;
00386 info.data.source = 0;
00387
00388
00389
00390
00391
00392
00393
00394
00395 info.data.alloc_nbytes = user_nbytes+2*N;
00396
00397 info.data.alloc_addr =
00398 this->src_alloc.allocate(info.data.alloc_nbytes, &info.data.source);
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418 void* const user_addr = info.data.user_addr();
00419
00420 static_cast<alloc_info<N>*>(user_addr)[-1].data = info.data;
00421
00422 this->nbytes_allocated += info.data.alloc_nbytes;
00423 ++this->nallocations;
00424 this->nbytes_current += info.data.alloc_nbytes;
00425 ++this->nallocations_current;
00426
00427 if (this->do_debug_printing)
00428 {
00429 info.data.print();
00430 this->show_stats(0, 0, 0, 0);
00431 }
00432
00433 return user_addr;
00434 }
00435
00436 void deallocate(void* user_addr)
00437 {
00438 GVX_TRACE(__PRETTY_FUNCTION__);
00439
00440 const alloc_info<N>* const info =
00441 static_cast<alloc_info<N>*>(user_addr) - 1;
00442
00443 this->nbytes_current -= info->data.alloc_nbytes;
00444 --this->nallocations_current;
00445
00446 if (this->do_debug_printing)
00447 {
00448 info->data.print();
00449 this->show_stats(0, 0, 0, 0);
00450 }
00451
00452 this->src_alloc.deallocate(info->data.alloc_addr, info->data.source);
00453 }
00454
00455 void release_free_mem()
00456 {
00457 this->src_alloc.release_free_mem();
00458 }
00459 };
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487 #define DO_ALIGN
00488 #define DO_FASTCACHE
00489 #define NALIGN ( (4*sizeof(void*)) > 16 ? (4*sizeof(void*)) : 16 )
00490 #define NCACHE 64
00491
00492 #ifdef DO_ALIGN
00493 # ifdef DO_FASTCACHE
00494 typedef aligned_alloc<fastcache_alloc<NCACHE>, NALIGN> g_alloc_type;
00495 # else
00496 typedef aligned_alloc<trivial_alloc, NALIGN> g_alloc_type;
00497 # endif
00498 #else // !DO_ALIGN
00499 typedef trivial_alloc g_alloc_type;
00500 #endif
00501
00502
00503
00504
00505
00506
00507
00508
00509 g_alloc_type g_alloc;
00510 pthread_mutex_t g_alloc_mutex = PTHREAD_MUTEX_INITIALIZER;
00511
00512 size_t g_stats_units = 0;
00513 }
00514
00515 void* invt_allocate_aux(size_t user_nbytes)
00516 {
00517 GVX_MUTEX_LOCK(&g_alloc_mutex);
00518 return g_alloc.allocate(user_nbytes);
00519 }
00520
00521 void invt_deallocate_aux(void* mem)
00522 {
00523 GVX_MUTEX_LOCK(&g_alloc_mutex);
00524 g_alloc.deallocate(mem);
00525 }
00526
00527 void invt_allocation_release_free_mem()
00528 {
00529 GVX_MUTEX_LOCK(&g_alloc_mutex);
00530 g_alloc.release_free_mem();
00531 }
00532
00533 void invt_allocation_allow_caching(bool on)
00534 {
00535 GVX_MUTEX_LOCK(&g_alloc_mutex);
00536 g_alloc.set_allow_caching(on);
00537 }
00538
00539 void invt_allocation_debug_print(bool do_debug)
00540 {
00541 GVX_MUTEX_LOCK(&g_alloc_mutex);
00542 g_alloc.set_debug(do_debug);
00543 }
00544
00545 void invt_allocation_show_stats(int verbosity, const char* pfx,
00546 const size_t block_size)
00547 {
00548 GVX_MUTEX_LOCK(&g_alloc_mutex);
00549 g_alloc.show_stats(verbosity, pfx,
00550 block_size ? block_size : g_stats_units, 0);
00551 }
00552
00553 void invt_allocation_set_stats_units(const size_t units)
00554 {
00555 g_stats_units = units;
00556 }
00557
00558
00559
00560
00561
00562
00563
00564 #endif // UTIL_ALLOCAUX_C_DEFINED