00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039 #ifndef CUDA_FILTEROPS_H_DEFINED
00040 #define CUDA_FILTEROPS_H_DEFINED
00041
00042 #include <cuda.h>
00043 #include "CUDA/cutil.h"
00044 #include "cudadefs.h"
00045 #include <float.h>
00046
00047
00048 __global__ void cuda_global_orientedFilter(const float *src, float *re, float *im, const float kx, const float ky, const float intensity, const int w, const int h, const int tile_width)
00049 {
00050
00051 const int x_idx = IMUL(blockIdx.x,tile_width) + threadIdx.x;
00052 const int y_idx = blockIdx.y;
00053 const int idx = IMUL(y_idx,w) + x_idx;
00054
00055 const float val = src[idx] * intensity;
00056
00057 const int w2l = w >> 1, w2r = w - w2l;
00058 const int h2l = h >> 1, h2r = h - h2l;
00059
00060
00061
00062
00063 const int x_pos = x_idx-w2l;
00064 const int y_pos = y_idx-h2l;
00065 if(x_pos < w2r && y_pos < h2r)
00066 {
00067 const float arg = kx*x_pos + ky*y_pos;
00068
00069
00070
00071
00072 float sinarg, cosarg;
00073 sincosf(arg, &sinarg, &cosarg);
00074
00075
00076 re[idx] = val * cosarg;
00077 im[idx] = val * sinarg;
00078 }
00079
00080 }
00081
00082
00083 __global__ void cuda_global_centerSurroundAbs(const float *center, const float *surround, float *res, int lw, int lh, int sw, int sh, int scalex, int scaley, int remx, int remy, int tile_width )
00084 {
00085
00086 const int x_pos = IMUL(blockIdx.x,tile_width) + threadIdx.x;
00087 const int y_pos = blockIdx.y;
00088 const int cidx = IMUL(y_pos,lw) + x_pos;
00089
00090 int sidx = IMUL(y_pos/scaley,sw) + x_pos/scalex;
00091
00092
00093 if(x_pos > remx) sidx--;
00094 if(y_pos > remy) sidx-=sw;
00095
00096 if(x_pos < lw && y_pos < lh)
00097 {
00098 float cval = center[cidx];
00099 float sval = surround[sidx];
00100 if(cval > sval)
00101 {
00102 res[cidx] = cval - sval;
00103 }
00104 else
00105 {
00106 res[cidx] = sval - cval;
00107 }
00108 }
00109 }
00110
00111 __global__ void cuda_global_centerSurroundClamped(const float *center, const float *surround, float *res, int lw, int lh, int sw, int sh, int scalex, int scaley, int remx, int remy, int tile_width )
00112 {
00113
00114 const int x_pos = IMUL(blockIdx.x,tile_width) + threadIdx.x;
00115 const int y_pos = blockIdx.y;
00116 const int cidx = IMUL(y_pos,lw) + x_pos;
00117
00118 int sidx = IMUL(y_pos/scaley,sw) + x_pos/scalex;
00119
00120
00121 if(x_pos > remx) sidx--;
00122 if(y_pos > remy) sidx-=sw;
00123
00124 if(x_pos < lw && y_pos < lh)
00125 {
00126 float cval = center[cidx];
00127 float sval = surround[sidx];
00128 if(cval > sval)
00129 {
00130 res[cidx] = cval - sval;
00131 }
00132 else
00133 {
00134 res[cidx] = 0;
00135 }
00136 }
00137 }
00138
00139 __global__ void cuda_global_centerSurroundDirectional(const float *center, const float *surround, float *pos, float *neg, int lw, int lh, int sw, int sh, int scalex, int scaley, int remx, int remy, int tile_width )
00140 {
00141
00142 const int x_pos = IMUL(blockIdx.x,tile_width) + threadIdx.x;
00143 const int y_pos = blockIdx.y;
00144 const int cidx = IMUL(y_pos,lw) + x_pos;
00145
00146 int sidx = IMUL(y_pos/scaley,sw) + x_pos/scalex;
00147
00148
00149 if(x_pos > remx) sidx--;
00150 if(y_pos > remy) sidx-=sw;
00151
00152 if(x_pos < lw && y_pos < lh)
00153 {
00154 float cval = center[cidx];
00155 float sval = surround[sidx];
00156 if(cval > sval)
00157 {
00158 pos[cidx] = cval - sval;
00159 neg[cidx] = 0.0F;
00160 }
00161 else
00162 {
00163 pos[cidx] = 0.0F;
00164 neg[cidx] = sval - cval;
00165 }
00166 }
00167 }
00168
00169
00170
00171 __global__ void cuda_global_centerSurroundAbsAttenuate(const float *center, const float *surround, float *res, int lw, int lh, int sw, int sh, int borderSize, int scalex, int scaley, int remx, int remy, int tile_width, int tile_height)
00172 {
00173
00174 const float increment = 1.0 / (float)(borderSize + 1);
00175 const int x_pos = IMUL(blockIdx.x,tile_width) + threadIdx.x;
00176 const int y_pos = blockIdx.y;
00177 const int cidx = IMUL(y_pos,lw) + x_pos;
00178 float result;
00179
00180 int sidx = IMUL(y_pos/scaley,sw) + x_pos/scalex;
00181
00182
00183 if(x_pos > remx) sidx--;
00184 if(y_pos > remy) sidx-=sw;
00185
00186 if(x_pos < lw && y_pos < lh)
00187 {
00188
00189 float cval = center[cidx];
00190 float sval = surround[sidx];
00191 if(cval > sval)
00192 {
00193 result = cval - sval;
00194 }
00195 else
00196 {
00197 result = sval - cval;
00198 }
00199
00200
00201 if(y_pos < borderSize)
00202 {
00203 float coeff = increment*(y_pos+1);
00204 result *= coeff;
00205 }
00206
00207 else if(y_pos > lh-borderSize-1)
00208 {
00209 float coeff = increment*(borderSize-lh);
00210 result *= coeff;
00211 }
00212
00213 if(x_pos < borderSize)
00214 {
00215 float coeff = increment*(x_pos+1);
00216 result *= coeff;
00217 }
00218
00219 else if(x_pos < lw-borderSize-1)
00220 {
00221 float coeff = increment*(borderSize-lw);
00222 result *= coeff;
00223 }
00224 }
00225
00226 }
00227
00228
00229 __global__ void cuda_global_spatialPoolMax(const float *src, float *res, const int src_w, const int src_h, int skip_w, int skip_h, int reg_w, int reg_h, int tile_width, int tile_height)
00230 {
00231
00232 const int tilesperregion_w = IDIVUP(reg_w,tile_width);
00233 const int tilesperregion_h = IDIVUP(reg_h,tile_height);
00234
00235 const int reg_x = blockIdx.x / tilesperregion_w;
00236 const int reg_y = blockIdx.y / tilesperregion_h;
00237
00238 const int tile_x = blockIdx.x % tilesperregion_w;
00239 const int tile_y = blockIdx.y % tilesperregion_h;
00240
00241 const int reg_x_pos = IMUL(tile_x,tile_width) + threadIdx.x;
00242 const int reg_y_pos = IMUL(tile_y,tile_height) + threadIdx.y;
00243 const int x_pos = IMUL(reg_x, skip_w) + reg_x_pos;
00244 const int y_pos = IMUL(reg_y, skip_h) + reg_y_pos;
00245 const int ld_idx = IMUL(threadIdx.y,tile_width) + threadIdx.x;
00246 const int src_idx = IMUL(y_pos,src_w)+x_pos;
00247 const int src_sz = IMUL(src_w,src_h);
00248
00249
00250 float *data = (float *) shared_data;
00251 const int tile_sz = IMUL(tile_height,tile_width);
00252
00253
00254 if(y_pos < src_h && x_pos < src_w && reg_x_pos < reg_w && reg_y_pos < reg_h)
00255 data[ld_idx] = src[src_idx];
00256 else
00257 data[ld_idx] = -FLT_MAX;
00258
00259 __syncthreads();
00260
00261 if(y_pos < src_h && x_pos < src_w)
00262 {
00263
00264 int incr = 1;
00265 int mod = 2;
00266 while(incr < tile_sz)
00267 {
00268 if(ld_idx % mod == 0 && ld_idx+incr < tile_sz)
00269 {
00270
00271 if(data[ld_idx] < data[ld_idx+incr])
00272 data[ld_idx] = data[ld_idx+incr];
00273 }
00274 __syncthreads();
00275
00276 incr *= 2;
00277 mod *= 2;
00278 }
00279 }
00280
00281 if(ld_idx == 0)
00282 {
00283 const int res_idx = IMUL(blockIdx.y,gridDim.x)+blockIdx.x;
00284 res[res_idx] = data[ld_idx];
00285 }
00286
00287 }
00288
00289
00290
00291 #endif