CudaDevices.H

Go to the documentation of this file.
00001 /*!@file CUDA/CudaDevices.H Static class to keep track of status and properties of CUDA devices */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file:
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/CudaDevices.H $
00035 // $Id: CudaDevices.H 12962 2010-03-06 02:13:53Z irock $
00036 //
00037 
00038 
00039 #ifndef CUDADEVICES_H_DEFINED
00040 #define CUDADEVICES_H_DEFINED
00041 
00042 #include <map>
00043 #include <list>
00044 #include <cmath>
00045 #include "Image/Dims.H"
00046 #include "CUDA/cutil.h"
00047 #include <cuda_runtime_api.h>
00048 #include "Util/log.H"
00049 #include <stdlib.h>
00050 #include <stdio.h>
00051 
00052 using std::map;
00053 using std::list;
00054 
00055 class CudaDevices
00056 {
00057  public:
00058   static void initDevice(const int deviceNum)
00059   {
00060     // Optimization, avoid repeated initializations
00061     if(isValid(deviceNum))
00062       return;
00063     // Initializing CUDA devices should not happen often, we can be thorough
00064     int deviceCount;
00065     CUDA_SAFE_CALL_NO_SYNC(cudaGetDeviceCount(&deviceCount));
00066 
00067     if (deviceCount == 0)
00068       LFATAL("No devices supporting CUDA");
00069 
00070     if (deviceNum > deviceCount-1)
00071       LFATAL("Device [%d] out of valid device range [0-%d]",deviceNum,deviceCount-1);
00072 
00073     cudaDeviceProp deviceProp;
00074     CUDA_SAFE_CALL_NO_SYNC(cudaGetDeviceProperties(&deviceProp, deviceNum));
00075 
00076     if (deviceProp.major < 1)
00077       LFATAL("Device [%d] does not support CUDA",deviceNum);
00078 
00079     // Determine the optimal tile size for this device
00080     // Take the square root of the maximum num of threads per block and then find
00081     // the next smallest power of 2
00082     int sqSize = nextpow2down((int) sqrt(deviceProp.maxThreadsPerBlock));
00083     Dims tileSize = Dims(sqSize,sqSize);
00084     int smSize = deviceProp.sharedMemPerBlock;
00085     setDeviceTileSize(deviceNum,tileSize);
00086     setDeviceSharedMemorySize(deviceNum,smSize);
00087   }
00088 
00089   static inline int nextpow2down (int x)
00090   {
00091     if (x < 0)
00092       return 0;
00093     --x;
00094     x |= x >> 1;
00095     x |= x >> 2;
00096     x |= x >> 4;
00097     x |= x >> 8;
00098     x |= x >> 16;
00099     x = x>>1;
00100     return x+1;
00101   }
00102 
00103   static void memcpyHostToDevice(void *trg, const void *src, const int len, const int deviceNum)
00104   {
00105     setCurrentDevice(deviceNum);
00106     CUDA_SAFE_CALL( cudaMemcpy(trg, src, len, cudaMemcpyHostToDevice));
00107   }
00108 
00109   static void memcpyDeviceToHost(void *trg, const void *src, const int len, const int deviceNum)
00110   {
00111     setCurrentDevice(deviceNum);
00112     CUDA_SAFE_CALL( cudaMemcpy(trg, src, len, cudaMemcpyDeviceToHost));
00113   }
00114 
00115   static void memcpyDeviceToDevice(void *trg, const void *src, const int len, const int trgDev, const int srcDev)
00116   {
00117     if(trgDev == srcDev)
00118     {
00119       setCurrentDevice(srcDev);
00120       CUDA_SAFE_CALL( cudaMemcpy(trg, src, len, cudaMemcpyDeviceToDevice));
00121     }
00122     else
00123     {
00124       char *tmp;
00125       tmp = (char *) std::malloc(len);
00126       memcpyDeviceToHost(tmp,src,len,srcDev);
00127       memcpyHostToDevice(trg,tmp,len,trgDev);
00128       std::free(tmp);
00129     }
00130   }
00131 
00132   static void malloc(void **ret, const int sz, const int deviceNum)
00133   {
00134     setCurrentDevice(deviceNum);
00135     cudaError err = cudaMalloc(ret,sz);
00136     if( cudaSuccess != err) {
00137         fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n",
00138                 __FILE__, __LINE__, cudaGetErrorString( err) );
00139         printf("Attempted to malloc %d bytes on cuda device\n",sz);
00140         abort();
00141     }
00142   //CUDA_SAFE_CALL(cudaMalloc(ret, sz) );
00143     if(*ret == NULL && sz > 0)
00144       {
00145         LFATAL("CUDA Device [%d] failed to allocate %d bytes of memory",deviceNum,sz);
00146       }
00147   }
00148 
00149   static void free(void *mem, const int deviceNum)
00150   {
00151     setCurrentDevice(deviceNum);
00152     cudaError err = cudaFree(mem);
00153     if( cudaSuccess != err) {
00154         fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n",
00155                 __FILE__, __LINE__, cudaGetErrorString( err) );
00156         printf("Attempted to free %p address on cuda device\n",mem);
00157         abort();
00158     }
00159 
00160   //CUDA_SAFE_CALL(cudaFree(mem) );
00161   }
00162 
00163   static void memset(void *mem, const char val, const int sz, const int deviceNum)
00164   {
00165     setCurrentDevice(deviceNum);
00166     CUDA_SAFE_CALL(cudaMemset(mem, val, sz) );
00167   }
00168 
00169   static inline Dims getDeviceTileSize(const int deviceNum)
00170   {
00171     return deviceTileSizes[deviceNum];
00172   }
00173 
00174   static inline int getDeviceSharedMemorySize(const int deviceNum)
00175   {
00176     return deviceSharedMemorySizes[deviceNum];
00177   }
00178 
00179 
00180   static inline Dims getDeviceTileSize1D(const int deviceNum)
00181   {
00182     Dims tile1D = Dims(deviceTileSizes[deviceNum].sz(),1);
00183     return tile1D;
00184   }
00185 
00186   static inline bool isValid(const int deviceNum)
00187   {
00188     if(deviceTileSizes[deviceNum].isEmpty())
00189       return false;
00190     return true;
00191   }
00192 
00193   static void displayProperties( const int deviceNum )
00194   {
00195     setCurrentDevice(deviceNum);
00196     cudaDeviceProp deviceProp;
00197     CUDA_SAFE_CALL_NO_SYNC(cudaGetDeviceProperties(&deviceProp, deviceNum));
00198     if (deviceProp.major < 1)
00199     {
00200       LINFO("Video Device [%d] does not support CUDA",deviceNum);
00201       return;
00202     }
00203     printf( "\nCUDA Device Name \t - %s ", deviceProp.name );
00204     printf( "\n**************************************");
00205     printf( "\nTotal Global Memory\t\t -%lu KB", deviceProp.totalGlobalMem/1024 );
00206     printf( "\nShared memory available per block \t - %lu KB", deviceProp.sharedMemPerBlock/1024 );
00207     printf( "\nNumber of registers per thread block \t - %d", deviceProp.regsPerBlock );
00208     printf( "\nWarp size in threads \t - %d", deviceProp.warpSize );
00209     printf( "\nMemory Pitch \t - %d bytes", (int) deviceProp.memPitch );
00210     printf( "\nMaximum threads per block \t - %d",  deviceProp.maxThreadsPerBlock );
00211     printf( "\nMaximum Thread Dimension (block) \t - %d %d %d", deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1], deviceProp.maxThreadsDim[2] );
00212     printf( "\nMaximum Thread Dimension (grid) \t - %d %d %d", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2] );
00213     printf( "\nTotal constant memory \t - %d bytes", (int) deviceProp.totalConstMem );
00214     printf( "\nCUDA ver \t - %d.%d", deviceProp.major, deviceProp.minor );
00215     printf( "\nClock rate \t - %d KHz", deviceProp.clockRate );
00216     printf( "\nTexture Alignment \t - %d bytes", (int) deviceProp.textureAlignment );
00217     printf( "\nDevice Overlap \t - %s", deviceProp. deviceOverlap?"Allowed":"Not Allowed" );
00218     printf( "\nNumber of Multi processors \t - %d\n", deviceProp.multiProcessorCount );
00219   }
00220 
00221 
00222   static inline void setCurrentDevice(const int deviceNum)
00223   {
00224     if(!isValid(deviceNum)) initDevice(deviceNum);
00225     _setCurrentDevice(deviceNum);
00226   }
00227 
00228 
00229  private:
00230   // Don't allow objects of CudaDevices
00231   CudaDevices();
00232 
00233   // Don't check if our setup is valid
00234   static inline void _setCurrentDevice(const int deviceNum)
00235   {
00236     // Only change if it's different
00237     if(deviceNum != currentDevice)
00238       {
00239         if(deviceNum < 0)
00240           {
00241             fprintf(stderr,"Attempting to set device to %d from %d in file '%s' in line %i.\n",deviceNum,currentDevice,
00242                     __FILE__, __LINE__ );
00243             abort();
00244           }
00245 
00246         CUDA_SAFE_CALL(cudaSetDevice(deviceNum));
00247         currentDevice = deviceNum;
00248       }
00249   }
00250 
00251   static inline void setDeviceTileSize(const int deviceNum, const Dims tileSize)
00252   {
00253     deviceTileSizes[deviceNum] = tileSize;
00254   }
00255 
00256   static inline void setDeviceSharedMemorySize(const int deviceNum, const int smSize)
00257   {
00258     deviceSharedMemorySizes[deviceNum] = smSize;
00259   }
00260 
00261   static map<int,Dims> deviceTileSizes;
00262   static map<int,int> deviceSharedMemorySizes;
00263   static int currentDevice;
00264 };
00265 
00266 #endif