00001 /*!@file CUDA/CudaMathOps.C C++ wrapper for CUDA Math operations */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/CudaFilterOps.C $ 00035 // $Id: CudaFilterOps.C 12962 2010-03-06 02:13:53Z irock $ 00036 // 00037 00038 #include "Image/Rectangle.H" 00039 #include "CUDA/CudaImage.H" 00040 #include "Util/Assert.H" 00041 #include "CUDA/cudadefs.h" 00042 #include "CUDA/CudaMathOps.H" 00043 #include "CUDA/CudaLowPass.H" 00044 #include "CUDA/CudaShapeOps.H" 00045 #include "CUDA/CudaCutPaste.H" 00046 #include "CUDA/CudaKernels.H" 00047 #include "CUDA/CudaConvolutions.H" 00048 #include "CUDA/CudaNorm.H" 00049 #include "CudaFilterOps.H" 00050 #include "CudaDevices.H" 00051 #include "Util/Timer.H" 00052 #include "wrap_c_cuda.h" 00053 00054 // ###################################################################### 00055 CudaImage<float> cudaOrientedFilter(const CudaImage<float>& src, const float k, 00056 const float theta, const float intensity) 00057 { 00058 double kx = double(k) * cos((theta + 90.0) * M_PI / 180.0); 00059 double ky = double(k) * sin((theta + 90.0) * M_PI / 180.0); 00060 MemoryPolicy mp = src.getMemoryPolicy(); 00061 int dev = src.getMemoryDevice(); 00062 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00063 CudaImage<float> re(src.getDims(), NO_INIT, mp, dev); 00064 CudaImage<float> im(src.getDims(), NO_INIT, mp, dev); 00065 00066 cuda_c_orientedFilter(src.getCudaArrayPtr(),re.getCudaArrayPtr(),im.getCudaArrayPtr(),(float)kx,(float)ky,intensity,src.getWidth(),src.getHeight(),tile.sz()); 00067 00068 re = cudaLowPass9(re); 00069 im = cudaLowPass9(im); 00070 00071 return cudaQuadEnergy(re, im); 00072 } 00073 00074 00075 // ###################################################################### 00076 CudaImage<float> cudaCenterSurround(const CudaImage<float>& center, const CudaImage<float>& surround, 00077 const bool absol) 00078 { 00079 ASSERT(center.initialized() && surround.initialized()); 00080 ASSERT(center.getMemoryDevice() == surround.getMemoryDevice()); 00081 00082 const int lw = center.getWidth(), lh = center.getHeight(); 00083 const int sw = surround.getWidth(), sh = surround.getHeight(); 00084 00085 if (sw > lw || sh > lh) LFATAL("center must be larger than surround"); 00086 00087 MemoryPolicy mp = center.getMemoryPolicy(); 00088 int dev = center.getMemoryDevice(); 00089 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00090 00091 // result has the size of the larger image: 00092 CudaImage<float> result(center.getDims(), NO_INIT, mp, dev); 00093 // scan large image and subtract corresponding pixel from small image: 00094 00095 if(absol) 00096 cuda_c_centerSurroundAbs(center.getCudaArrayPtr(), surround.getCudaArrayPtr(), result.getCudaArrayPtr(), lw, lh, sw, sh, tile.sz()); 00097 else 00098 cuda_c_centerSurroundClamped(center.getCudaArrayPtr(), surround.getCudaArrayPtr(), result.getCudaArrayPtr(), lw, lh, sw, sh, tile.sz()); 00099 00100 // attenuate borders: 00101 cudaInplaceAttenuateBorders(result, result.getDims().max() / 20); 00102 00103 00104 return result; 00105 00106 } 00107 00108 00109 void cudaCenterSurround(const CudaImage<float>& center, const CudaImage<float>& surround, 00110 CudaImage<float>& pos, CudaImage<float>& neg) 00111 { 00112 00113 const int lw = center.getWidth(), lh = center.getHeight(); 00114 const int sw = surround.getWidth(), sh = surround.getHeight(); 00115 00116 if (sw > lw || sh > lh) LFATAL("center must be larger than surround"); 00117 00118 MemoryPolicy mp = center.getMemoryPolicy(); 00119 int dev = center.getMemoryDevice(); 00120 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00121 00122 // result has the size of the larger image: 00123 pos = CudaImage<float>(center.getDims(), NO_INIT,mp,dev); 00124 neg = CudaImage<float>(center.getDims(), NO_INIT,mp,dev); 00125 00126 cuda_c_centerSurroundDirectional(center.getCudaArrayPtr(), surround.getCudaArrayPtr(), pos.getCudaArrayPtr(), neg.getCudaArrayPtr(), 00127 lw,lh,sw,sh,tile.sz()); 00128 // attenuate borders: 00129 cudaInplaceAttenuateBorders(pos, pos.getDims().max() / 20); 00130 cudaInplaceAttenuateBorders(neg, neg.getDims().max() / 20); 00131 } 00132 00133 00134 // ###################################################################### 00135 CudaImage<float> cudaDoubleOpp(const CudaImage<float>& cplus, const CudaImage<float>& cminus, 00136 const CudaImage<float>& splus, const CudaImage<float>& sminus) 00137 { 00138 ASSERT(cplus.isSameSize(cminus)); ASSERT(splus.isSameSize(sminus)); 00139 00140 00141 // compute difference between both center arrays 00142 CudaImage<float> cdiff = cplus; 00143 cdiff -= cminus; 00144 00145 // compute difference between both surround arrays 00146 CudaImage<float> sdiff = splus; 00147 sdiff -= sminus; 00148 00149 // compute center-surround cdiff - sdiff = (cp - cm) [-] (sp - sm) 00150 return cudaCenterSurround(cdiff, sdiff, true); // take abs 00151 } 00152 00153 00154 // ###################################################################### 00155 CudaImage<float> cudaCenterSurroundAbsDownScaleNormalize(const CudaImage<float>& center, const CudaImage<float>& surround, 00156 const bool absol, int newWidth, int newHeight, float mi, float ma, int nIter, float weakness) 00157 { 00158 00159 00160 ASSERT(center.initialized() && surround.initialized()); 00161 ASSERT(center.getMemoryDevice() == surround.getMemoryDevice()); 00162 00163 const int lw = center.getWidth(), lh = center.getHeight(); 00164 const int sw = surround.getWidth(), sh = surround.getHeight(); 00165 00166 if (sw > lw || sh > lh) LFATAL("center must be larger than surround"); 00167 00168 MemoryPolicy mp = center.getMemoryPolicy(); 00169 int dev = center.getMemoryDevice(); 00170 Dims tile = CudaDevices::getDeviceTileSize(dev); 00171 00172 00173 // result has the size of the larger image: 00174 CudaImage<float> result(center.getDims(), NO_INIT, mp, dev); 00175 00176 // Attenuate at the edge of the image of this border size 00177 const int attBorder = std::max(lw, lh)/20; 00178 //T zero = T(); 00179 cuda_c_centerSurroundAbsAttenuate(center.getCudaArrayPtr(), surround.getCudaArrayPtr(), result.getCudaArrayPtr(), lw, lh, sw, sh, attBorder, tile.w(), tile.h()); 00180 00181 result = cudaDownSize(result, newWidth, newHeight); 00182 cudaInplaceRectify(result); 00183 00184 // then, normalize between mi and ma if not zero 00185 if (mi != 0.0F || ma != 0.0F) cudaInplaceNormalize(result, mi, ma); 00186 00187 // Normalize using fancy normalization: multiple iterations of 00188 // filtering by a DoG 00189 00190 const int w = result.getWidth(); 00191 const int h = result.getHeight(); 00192 int siz = std::max(w, h); 00193 int maxhw = std::max(0, std::min(w, h) / 2 - 1); 00194 00195 // Allocate max buffer so that it can be reused 00196 CudaImage<float> maxim = CudaImage<float>(1,1,NO_INIT,mp,dev); 00197 // first clamp negative values to zero 00198 00199 00200 // build separable Gaussians for DoG computation: 00201 float esig = (float(siz) * FANCYESIG) * 0.01F; 00202 float isig = (float(siz) * FANCYISIG) * 0.01F; 00203 CudaImage<float> gExc = cudaGaussian(mp,dev,FANCYCOEX/(esig*sqrt(2.0*M_PI)) * weakness, esig, maxhw); 00204 CudaImage<float> gInh = cudaGaussian(mp,dev,FANCYCOIN/(isig*sqrt(2.0*M_PI)) * weakness, isig, maxhw); 00205 00206 for (int i = 0; i < nIter; ++i) 00207 { 00208 00209 CudaImage<float> excit = cudaSepFilter(result, gExc, gExc, CONV_BOUNDARY_CLEAN); // excitatory part 00210 CudaImage<float> inhib = cudaSepFilter(result, gInh, gInh, CONV_BOUNDARY_CLEAN); // inhibitory part 00211 00212 // result = result + excit - inhib + maxim*(-0.01F*FANCYINHI) 00213 excit -= inhib; 00214 cudaGetMax(result, maxim); 00215 00216 result += excit; // we get broad inhibition from everywhere 00217 00218 result += cudaGetScalar(maxim)*(-0.01F * FANCYINHI) ; // we get fixed global inhibition 00219 00220 cudaInplaceRectify(result); 00221 00222 // sigmoid(FANCYG, FANCYH, FANCYS); 00223 } 00224 00225 00226 return result; 00227 00228 } 00229 00230 00231 // ###################################################################### 00232 CudaImage<float> cudaSpatialPoolMax(const CudaImage<float>& src, const int si, const int sj, 00233 const int sti, const int stj) 00234 { 00235 const int w = src.getWidth(), h = src.getHeight(); 00236 MemoryPolicy mp = src.getMemoryPolicy(); 00237 int dev = src.getMemoryDevice(); 00238 Dims tile = CudaDevices::getDeviceTileSize(dev); 00239 00240 // This is confusing... we need to oversize the result, so that the intermediate results can be stored 00241 // in between cuda kernels 00242 CudaImage<float> buf1(src.getSize(),1, NO_INIT,mp,dev); 00243 CudaImage<float> buf2(src.getSize(),1, NO_INIT,mp,dev); 00244 int res_w = (int)ceil((float)w / (float)si); 00245 int res_h = (int)ceil((float)h / (float)sj); 00246 CudaImage<float> result(res_w,res_h,NO_INIT,mp,dev); 00247 cuda_c_spatialPoolMax(src.getCudaArrayPtr(),result.getCudaArrayPtr(),buf1.getCudaArrayPtr(),buf2.getCudaArrayPtr(),w,h,si,sj,sti,stj,tile.w(),tile.h()); 00248 return result; 00249 } 00250 00251