CudaFilterOps.C

00001 /*!@file CUDA/CudaMathOps.C C++ wrapper for CUDA Math operations */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file:
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/CudaFilterOps.C $
00035 // $Id: CudaFilterOps.C 12962 2010-03-06 02:13:53Z irock $
00036 //
00037 
00038 #include "Image/Rectangle.H"
00039 #include "CUDA/CudaImage.H"
00040 #include "Util/Assert.H"
00041 #include "CUDA/cudadefs.h"
00042 #include "CUDA/CudaMathOps.H"
00043 #include "CUDA/CudaLowPass.H"
00044 #include "CUDA/CudaShapeOps.H"
00045 #include "CUDA/CudaCutPaste.H"
00046 #include "CUDA/CudaKernels.H"
00047 #include "CUDA/CudaConvolutions.H"
00048 #include "CUDA/CudaNorm.H"
00049 #include "CudaFilterOps.H"
00050 #include "CudaDevices.H"
00051 #include "Util/Timer.H"
00052 #include "wrap_c_cuda.h"
00053 
00054 // ######################################################################
00055 CudaImage<float> cudaOrientedFilter(const CudaImage<float>& src, const float k,
00056                                     const float theta, const float intensity)
00057 {
00058   double kx = double(k) * cos((theta + 90.0) * M_PI / 180.0);
00059   double ky = double(k) * sin((theta + 90.0) * M_PI / 180.0);
00060   MemoryPolicy mp = src.getMemoryPolicy();
00061   int dev = src.getMemoryDevice();
00062   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00063   CudaImage<float> re(src.getDims(), NO_INIT, mp, dev);
00064   CudaImage<float> im(src.getDims(), NO_INIT, mp, dev);
00065 
00066   cuda_c_orientedFilter(src.getCudaArrayPtr(),re.getCudaArrayPtr(),im.getCudaArrayPtr(),(float)kx,(float)ky,intensity,src.getWidth(),src.getHeight(),tile.sz());
00067 
00068   re = cudaLowPass9(re);
00069   im = cudaLowPass9(im);
00070 
00071   return cudaQuadEnergy(re, im);
00072 }
00073 
00074 
00075 // ######################################################################
00076 CudaImage<float> cudaCenterSurround(const CudaImage<float>& center, const CudaImage<float>& surround,
00077                                     const bool absol)
00078 {
00079   ASSERT(center.initialized() && surround.initialized());
00080   ASSERT(center.getMemoryDevice() == surround.getMemoryDevice());
00081 
00082   const int lw = center.getWidth(), lh = center.getHeight();
00083   const int sw = surround.getWidth(), sh = surround.getHeight();
00084 
00085   if (sw > lw || sh > lh) LFATAL("center must be larger than surround");
00086 
00087   MemoryPolicy mp = center.getMemoryPolicy();
00088   int dev = center.getMemoryDevice();
00089   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00090 
00091   // result has the size of the larger image:
00092   CudaImage<float> result(center.getDims(), NO_INIT, mp, dev);
00093   // scan large image and subtract corresponding pixel from small image:
00094 
00095   if(absol)
00096     cuda_c_centerSurroundAbs(center.getCudaArrayPtr(), surround.getCudaArrayPtr(), result.getCudaArrayPtr(), lw, lh, sw, sh, tile.sz());
00097   else
00098     cuda_c_centerSurroundClamped(center.getCudaArrayPtr(), surround.getCudaArrayPtr(), result.getCudaArrayPtr(), lw, lh, sw, sh, tile.sz());
00099 
00100   // attenuate borders:
00101   cudaInplaceAttenuateBorders(result, result.getDims().max() / 20);
00102 
00103 
00104   return result;
00105 
00106 }
00107 
00108 
00109 void cudaCenterSurround(const CudaImage<float>& center, const CudaImage<float>& surround,
00110                     CudaImage<float>& pos, CudaImage<float>& neg)
00111 {
00112 
00113   const int lw = center.getWidth(), lh = center.getHeight();
00114   const int sw = surround.getWidth(), sh = surround.getHeight();
00115 
00116   if (sw > lw || sh > lh) LFATAL("center must be larger than surround");
00117 
00118   MemoryPolicy mp = center.getMemoryPolicy();
00119   int dev = center.getMemoryDevice();
00120   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00121 
00122   // result has the size of the larger image:
00123   pos = CudaImage<float>(center.getDims(), NO_INIT,mp,dev);
00124   neg = CudaImage<float>(center.getDims(), NO_INIT,mp,dev);
00125 
00126   cuda_c_centerSurroundDirectional(center.getCudaArrayPtr(), surround.getCudaArrayPtr(), pos.getCudaArrayPtr(), neg.getCudaArrayPtr(),
00127                                    lw,lh,sw,sh,tile.sz());
00128   // attenuate borders:
00129   cudaInplaceAttenuateBorders(pos, pos.getDims().max() / 20);
00130   cudaInplaceAttenuateBorders(neg, neg.getDims().max() / 20);
00131 }
00132 
00133 
00134 // ######################################################################
00135 CudaImage<float> cudaDoubleOpp(const CudaImage<float>& cplus, const CudaImage<float>& cminus,
00136                                const CudaImage<float>& splus, const CudaImage<float>& sminus)
00137 {
00138   ASSERT(cplus.isSameSize(cminus)); ASSERT(splus.isSameSize(sminus));
00139 
00140 
00141   // compute difference between both center arrays
00142   CudaImage<float> cdiff = cplus;
00143   cdiff -= cminus;
00144 
00145   // compute difference between both surround arrays
00146   CudaImage<float> sdiff = splus;
00147   sdiff -= sminus;
00148 
00149   // compute center-surround cdiff - sdiff = (cp - cm) [-] (sp - sm)
00150   return cudaCenterSurround(cdiff, sdiff, true);  // take abs
00151 }
00152 
00153 
00154 // ######################################################################
00155 CudaImage<float> cudaCenterSurroundAbsDownScaleNormalize(const CudaImage<float>& center, const CudaImage<float>& surround,
00156                                                          const bool absol, int newWidth, int newHeight, float mi, float ma, int nIter, float weakness)
00157 {
00158 
00159 
00160   ASSERT(center.initialized() && surround.initialized());
00161   ASSERT(center.getMemoryDevice() == surround.getMemoryDevice());
00162 
00163   const int lw = center.getWidth(), lh = center.getHeight();
00164   const int sw = surround.getWidth(), sh = surround.getHeight();
00165 
00166   if (sw > lw || sh > lh) LFATAL("center must be larger than surround");
00167 
00168   MemoryPolicy mp = center.getMemoryPolicy();
00169   int dev = center.getMemoryDevice();
00170   Dims tile = CudaDevices::getDeviceTileSize(dev);
00171 
00172 
00173   // result has the size of the larger image:
00174   CudaImage<float> result(center.getDims(), NO_INIT, mp, dev);
00175 
00176   // Attenuate at the edge of the image of this border size
00177   const int attBorder = std::max(lw, lh)/20;
00178   //T zero = T();
00179   cuda_c_centerSurroundAbsAttenuate(center.getCudaArrayPtr(), surround.getCudaArrayPtr(), result.getCudaArrayPtr(), lw, lh, sw, sh, attBorder, tile.w(), tile.h());
00180 
00181   result = cudaDownSize(result, newWidth, newHeight);
00182   cudaInplaceRectify(result);
00183 
00184   // then, normalize between mi and ma if not zero
00185   if (mi != 0.0F || ma != 0.0F) cudaInplaceNormalize(result, mi, ma);
00186 
00187   // Normalize using fancy normalization: multiple iterations of
00188   // filtering by a DoG
00189 
00190   const int w = result.getWidth();
00191   const int h = result.getHeight();
00192   int siz = std::max(w, h);
00193   int maxhw = std::max(0, std::min(w, h) / 2 - 1);
00194 
00195   // Allocate max buffer so that it can be reused
00196   CudaImage<float> maxim = CudaImage<float>(1,1,NO_INIT,mp,dev);
00197   // first clamp negative values to zero
00198 
00199 
00200   // build separable Gaussians for DoG computation:
00201   float esig = (float(siz) * FANCYESIG) * 0.01F;
00202   float isig = (float(siz) * FANCYISIG) * 0.01F;
00203   CudaImage<float> gExc = cudaGaussian(mp,dev,FANCYCOEX/(esig*sqrt(2.0*M_PI)) * weakness, esig, maxhw);
00204   CudaImage<float> gInh = cudaGaussian(mp,dev,FANCYCOIN/(isig*sqrt(2.0*M_PI)) * weakness, isig, maxhw);
00205 
00206   for (int i = 0; i < nIter; ++i)
00207     {
00208 
00209       CudaImage<float> excit = cudaSepFilter(result, gExc, gExc, CONV_BOUNDARY_CLEAN); // excitatory part
00210       CudaImage<float> inhib = cudaSepFilter(result, gInh, gInh, CONV_BOUNDARY_CLEAN); // inhibitory part
00211 
00212       // result = result + excit - inhib + maxim*(-0.01F*FANCYINHI)
00213       excit -= inhib;
00214       cudaGetMax(result, maxim);
00215 
00216       result += excit;        // we get broad inhibition from everywhere
00217 
00218       result += cudaGetScalar(maxim)*(-0.01F * FANCYINHI) ;    // we get fixed global inhibition
00219 
00220       cudaInplaceRectify(result);
00221 
00222       //      sigmoid(FANCYG, FANCYH, FANCYS);
00223     }
00224 
00225 
00226   return result;
00227 
00228 }
00229 
00230 
00231 // ######################################################################
00232 CudaImage<float> cudaSpatialPoolMax(const CudaImage<float>& src, const int si, const int sj,
00233                const int sti, const int stj)
00234 {
00235   const int w = src.getWidth(), h = src.getHeight();
00236   MemoryPolicy mp = src.getMemoryPolicy();
00237   int dev = src.getMemoryDevice();
00238   Dims tile = CudaDevices::getDeviceTileSize(dev);
00239 
00240   // This is confusing... we need to oversize the result, so that the intermediate results can be stored
00241   // in between cuda kernels
00242   CudaImage<float> buf1(src.getSize(),1, NO_INIT,mp,dev);
00243   CudaImage<float> buf2(src.getSize(),1, NO_INIT,mp,dev);
00244   int res_w = (int)ceil((float)w / (float)si);
00245   int res_h =  (int)ceil((float)h / (float)sj);
00246   CudaImage<float> result(res_w,res_h,NO_INIT,mp,dev);
00247   cuda_c_spatialPoolMax(src.getCudaArrayPtr(),result.getCudaArrayPtr(),buf1.getCudaArrayPtr(),buf2.getCudaArrayPtr(),w,h,si,sj,sti,stj,tile.w(),tile.h());
00248   return result;
00249 }
00250 
00251