00001 /*!@file CUDA/cuda_kernels.h CUDA/GPU convolution kernel generation code */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/cuda_kernels.h $ 00035 // $Id: cuda_kernels.h 12962 2010-03-06 02:13:53Z irock $ 00036 // 00037 00038 00039 #ifndef CUDA_KERNELS_H_DEFINED 00040 #define CUDA_KERNELS_H_DEFINED 00041 00042 #include <cuda.h> 00043 #include "CUDA/cutil.h" 00044 #include "cudadefs.h" 00045 00046 00047 __global__ void cuda_global_dogFilterHmax(float *dest, const float theta, const float gamma, const int size, const float div, const int tile_width, const int tile_height) 00048 { 00049 00050 // Note here sz is size along one dimension of this SQUARE filter (so total filter size is sz*sz) 00051 00052 // change the angles in degree to the those in radian : rotation degree 00053 float thetaRads = M_PI / 180.0F * theta; 00054 00055 // calculate constants 00056 float lambda = size*2.0F/div; 00057 float sigma = lambda*0.8F; 00058 float sigq = sigma*sigma; 00059 int center = (int)ceil(size/2.0F); 00060 int filtSizeL = center-1; 00061 //int filtSizeR = size-filtSizeL-1; 00062 int ypos = IMUL(blockIdx.y,tile_height) + threadIdx.y; 00063 int xpos = IMUL(blockIdx.x,tile_width) + threadIdx.x; 00064 int dst_idx = IMUL(ypos,size) + xpos; 00065 00066 int x = xpos -filtSizeL; 00067 int y = ypos -filtSizeL; 00068 // for DOG operation : to give orientation, it uses omit y-directional 00069 // component 00070 00071 if(xpos < size && ypos < size) 00072 { 00073 if(sqrt((float)(IMUL(x,x)+IMUL(y,y))) > size/2.0F) 00074 { 00075 dest[dst_idx] = 0.0F; 00076 } 00077 else 00078 { 00079 float rtX = y * cos(thetaRads) - x * sin(thetaRads); 00080 float rtY = y * sin(thetaRads) + x * cos(thetaRads); 00081 dest[dst_idx] = exp(-(rtX*rtX + gamma*gamma*rtY*rtY)/(2.0F*sigq)) * 00082 cos(2*M_PI*rtX/lambda); 00083 } 00084 } 00085 } 00086 00087 __global__ void cuda_global_dogFilter(float *dest, float stddev, float theta, int half_size, int size, int tile_width, int tile_height) 00088 { 00089 // change the angles in degree to the those in radian : rotation degree 00090 float thetaRads = M_PI / 180.0F * theta; 00091 00092 // calculate constants 00093 float sigq = stddev * stddev; 00094 int ypos = IMUL(blockIdx.y,tile_height) + threadIdx.y; 00095 int xpos = IMUL(blockIdx.x,tile_width) + threadIdx.x; 00096 int dst_idx = IMUL(ypos,size) + xpos; 00097 00098 int x = xpos - half_size; 00099 int y = ypos - half_size; 00100 // for DOG operation : to give orientation, it uses omit y-directional 00101 // component 00102 00103 if(xpos < size && ypos < size) 00104 { 00105 float rtX = x * cos(thetaRads) + y * sin(thetaRads); 00106 float rtY = -x * sin(thetaRads) + y * cos(thetaRads); 00107 dest[dst_idx] = (rtX*rtX/sigq - 1.0F)/sigq * 00108 exp(-(rtX*rtX + rtY*rtY)/(2.0F*sigq)); 00109 } 00110 00111 } 00112 00113 00114 00115 __global__ void cuda_global_gaborFilter3(float *kern, const float major_stddev, const float minor_stddev, 00116 const float period, const float phase, 00117 const float theta, const int size, const int tile_len, const int sz) 00118 { 00119 00120 // change the angles in degree to the those in radians: 00121 const float psi = M_PI / 180.0F * phase; 00122 const float rtDeg = M_PI / 180.0F * theta; 00123 00124 // calculate constants: 00125 const float omega = (2.0F * M_PI) / period; 00126 const float co = cos(rtDeg), si = sin(rtDeg); 00127 const float major_sigq = 2.0F * major_stddev * major_stddev; 00128 const float minor_sigq = 2.0F * minor_stddev * minor_stddev; 00129 00130 const int src_idx = blockIdx.x*tile_len + threadIdx.x; 00131 00132 // compute gabor: 00133 //for (int y = -size; y <= size; ++y) 00134 // for (int x = -size; x <= size; ++x) 00135 00136 const int y = floorf(src_idx / (size*2+1)) - size; 00137 const int x = src_idx % (size*2+1) - size; 00138 const float major = x*co + y*si; 00139 const float minor = x*si - y*co; 00140 if(src_idx < sz) 00141 kern[src_idx] = float(cos(omega * major + psi) 00142 * exp(-(major*major) / major_sigq) 00143 * exp(-(minor*minor) / minor_sigq)); 00144 00145 } 00146 00147 __global__ void cuda_global_gaussian(float *res, float c, float sig22, int hw, int tile_len, int sz) 00148 { 00149 const int idx = blockIdx.x*tile_len + threadIdx.x; 00150 00151 00152 if(idx<sz) 00153 { 00154 float x = float(idx-hw); 00155 res[idx] = c*exp(x*x*sig22); 00156 } 00157 } 00158 00159 00160 #endif