cuda_kernels.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #ifndef CUDA_KERNELS_H_DEFINED
00040 #define CUDA_KERNELS_H_DEFINED
00041
00042 #include <cuda.h>
00043 #include "CUDA/cutil.h"
00044 #include "cudadefs.h"
00045
00046
00047 __global__ void cuda_global_dogFilterHmax(float *dest, const float theta, const float gamma, const int size, const float div, const int tile_width, const int tile_height)
00048 {
00049
00050
00051
00052
00053 float thetaRads = M_PI / 180.0F * theta;
00054
00055
00056 float lambda = size*2.0F/div;
00057 float sigma = lambda*0.8F;
00058 float sigq = sigma*sigma;
00059 int center = (int)ceil(size/2.0F);
00060 int filtSizeL = center-1;
00061
00062 int ypos = IMUL(blockIdx.y,tile_height) + threadIdx.y;
00063 int xpos = IMUL(blockIdx.x,tile_width) + threadIdx.x;
00064 int dst_idx = IMUL(ypos,size) + xpos;
00065
00066 int x = xpos -filtSizeL;
00067 int y = ypos -filtSizeL;
00068
00069
00070
00071 if(xpos < size && ypos < size)
00072 {
00073 if(sqrt((float)(IMUL(x,x)+IMUL(y,y))) > size/2.0F)
00074 {
00075 dest[dst_idx] = 0.0F;
00076 }
00077 else
00078 {
00079 float rtX = y * cos(thetaRads) - x * sin(thetaRads);
00080 float rtY = y * sin(thetaRads) + x * cos(thetaRads);
00081 dest[dst_idx] = exp(-(rtX*rtX + gamma*gamma*rtY*rtY)/(2.0F*sigq)) *
00082 cos(2*M_PI*rtX/lambda);
00083 }
00084 }
00085 }
00086
00087 __global__ void cuda_global_dogFilter(float *dest, float stddev, float theta, int half_size, int size, int tile_width, int tile_height)
00088 {
00089
00090 float thetaRads = M_PI / 180.0F * theta;
00091
00092
00093 float sigq = stddev * stddev;
00094 int ypos = IMUL(blockIdx.y,tile_height) + threadIdx.y;
00095 int xpos = IMUL(blockIdx.x,tile_width) + threadIdx.x;
00096 int dst_idx = IMUL(ypos,size) + xpos;
00097
00098 int x = xpos - half_size;
00099 int y = ypos - half_size;
00100
00101
00102
00103 if(xpos < size && ypos < size)
00104 {
00105 float rtX = x * cos(thetaRads) + y * sin(thetaRads);
00106 float rtY = -x * sin(thetaRads) + y * cos(thetaRads);
00107 dest[dst_idx] = (rtX*rtX/sigq - 1.0F)/sigq *
00108 exp(-(rtX*rtX + rtY*rtY)/(2.0F*sigq));
00109 }
00110
00111 }
00112
00113
00114
00115 __global__ void cuda_global_gaborFilter3(float *kern, const float major_stddev, const float minor_stddev,
00116 const float period, const float phase,
00117 const float theta, const int size, const int tile_len, const int sz)
00118 {
00119
00120
00121 const float psi = M_PI / 180.0F * phase;
00122 const float rtDeg = M_PI / 180.0F * theta;
00123
00124
00125 const float omega = (2.0F * M_PI) / period;
00126 const float co = cos(rtDeg), si = sin(rtDeg);
00127 const float major_sigq = 2.0F * major_stddev * major_stddev;
00128 const float minor_sigq = 2.0F * minor_stddev * minor_stddev;
00129
00130 const int src_idx = blockIdx.x*tile_len + threadIdx.x;
00131
00132
00133
00134
00135
00136 const int y = floorf(src_idx / (size*2+1)) - size;
00137 const int x = src_idx % (size*2+1) - size;
00138 const float major = x*co + y*si;
00139 const float minor = x*si - y*co;
00140 if(src_idx < sz)
00141 kern[src_idx] = float(cos(omega * major + psi)
00142 * exp(-(major*major) / major_sigq)
00143 * exp(-(minor*minor) / minor_sigq));
00144
00145 }
00146
00147 __global__ void cuda_global_gaussian(float *res, float c, float sig22, int hw, int tile_len, int sz)
00148 {
00149 const int idx = blockIdx.x*tile_len + threadIdx.x;
00150
00151
00152 if(idx<sz)
00153 {
00154 float x = float(idx-hw);
00155 res[idx] = c*exp(x*x*sig22);
00156 }
00157 }
00158
00159
00160 #endif