cuda_kernels.h

Go to the documentation of this file.
00001 /*!@file CUDA/cuda_kernels.h CUDA/GPU convolution kernel generation code  */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file:
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/cuda_kernels.h $
00035 // $Id: cuda_kernels.h 12962 2010-03-06 02:13:53Z irock $
00036 //
00037 
00038 
00039 #ifndef CUDA_KERNELS_H_DEFINED
00040 #define CUDA_KERNELS_H_DEFINED
00041 
00042 #include <cuda.h>
00043 #include "CUDA/cutil.h"
00044 #include "cudadefs.h"
00045 
00046 
00047 __global__ void cuda_global_dogFilterHmax(float *dest, const float theta, const float gamma, const int size, const float div, const int tile_width, const int tile_height)
00048 {
00049 
00050   // Note here sz is size along one dimension of this SQUARE filter (so total filter size is sz*sz)
00051 
00052   // change the angles in degree to the those in radian : rotation degree
00053   float thetaRads = M_PI / 180.0F * theta;
00054 
00055   // calculate constants
00056   float lambda = size*2.0F/div;
00057   float sigma = lambda*0.8F;
00058   float sigq = sigma*sigma;
00059   int center    = (int)ceil(size/2.0F);
00060   int filtSizeL = center-1;
00061   //int filtSizeR = size-filtSizeL-1;
00062   int ypos = IMUL(blockIdx.y,tile_height) + threadIdx.y;
00063   int xpos = IMUL(blockIdx.x,tile_width) + threadIdx.x;
00064   int dst_idx = IMUL(ypos,size) + xpos;
00065 
00066   int x = xpos -filtSizeL;
00067   int y = ypos -filtSizeL;
00068   // for DOG operation : to give orientation, it uses omit y-directional
00069   // component
00070 
00071   if(xpos < size && ypos < size)
00072     {
00073       if(sqrt((float)(IMUL(x,x)+IMUL(y,y))) > size/2.0F)
00074         {
00075           dest[dst_idx] = 0.0F;
00076         }
00077       else
00078         {
00079           float rtX =  y * cos(thetaRads) - x * sin(thetaRads);
00080           float rtY = y * sin(thetaRads) + x * cos(thetaRads);
00081           dest[dst_idx] = exp(-(rtX*rtX + gamma*gamma*rtY*rtY)/(2.0F*sigq)) *
00082             cos(2*M_PI*rtX/lambda);
00083         }
00084     }
00085 }
00086 
00087 __global__ void cuda_global_dogFilter(float *dest, float stddev, float theta, int half_size, int size, int tile_width, int tile_height)
00088 {
00089   // change the angles in degree to the those in radian : rotation degree
00090   float thetaRads = M_PI / 180.0F * theta;
00091 
00092   // calculate constants
00093   float sigq = stddev * stddev;
00094   int ypos = IMUL(blockIdx.y,tile_height) + threadIdx.y;
00095   int xpos = IMUL(blockIdx.x,tile_width) + threadIdx.x;
00096   int dst_idx = IMUL(ypos,size) + xpos;
00097 
00098   int x = xpos - half_size;
00099   int y = ypos - half_size;
00100   // for DOG operation : to give orientation, it uses omit y-directional
00101   // component
00102 
00103   if(xpos < size && ypos < size)
00104     {
00105       float rtX =  x * cos(thetaRads) + y * sin(thetaRads);
00106       float rtY = -x * sin(thetaRads) + y * cos(thetaRads);
00107       dest[dst_idx] = (rtX*rtX/sigq - 1.0F)/sigq *
00108         exp(-(rtX*rtX + rtY*rtY)/(2.0F*sigq));
00109     }
00110 
00111 }
00112 
00113 
00114 
00115 __global__ void cuda_global_gaborFilter3(float *kern, const float major_stddev, const float minor_stddev,
00116                                          const float period, const float phase,
00117                                          const float theta, const int size, const int tile_len, const int sz)
00118 {
00119 
00120   // change the angles in degree to the those in radians:
00121   const float psi = M_PI / 180.0F * phase;
00122   const float rtDeg = M_PI / 180.0F * theta;
00123 
00124   // calculate constants:
00125   const float omega = (2.0F * M_PI) / period;
00126   const float co = cos(rtDeg), si = sin(rtDeg);
00127   const float major_sigq = 2.0F * major_stddev * major_stddev;
00128   const float minor_sigq = 2.0F * minor_stddev * minor_stddev;
00129 
00130   const int src_idx = blockIdx.x*tile_len + threadIdx.x;
00131 
00132   // compute gabor:
00133   //for (int y = -size; y <= size; ++y)
00134   //  for (int x = -size; x <= size; ++x)
00135 
00136   const int y = floorf(src_idx / (size*2+1)) - size;
00137   const int x = src_idx % (size*2+1) - size;
00138   const float major = x*co + y*si;
00139   const float minor = x*si - y*co;
00140   if(src_idx < sz)
00141     kern[src_idx] = float(cos(omega * major + psi)
00142                           * exp(-(major*major) / major_sigq)
00143                           * exp(-(minor*minor) / minor_sigq));
00144 
00145 }
00146 
00147 __global__ void cuda_global_gaussian(float *res, float c, float sig22, int hw, int tile_len, int sz)
00148 {
00149   const int idx = blockIdx.x*tile_len + threadIdx.x;
00150 
00151 
00152   if(idx<sz)
00153     {
00154       float x = float(idx-hw);
00155       res[idx] = c*exp(x*x*sig22);
00156     }
00157 }
00158 
00159 
00160 #endif