00001 /*!@file CUDA/cuda_debayer.h CUDA/GPU optimized color operations code */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/cuda_debayer.h $ 00035 // $Id: cuda_debayer.h 13228 2010-04-15 01:49:10Z itti $ 00036 // 00037 #ifndef CUDA_DEBAYER_H_DEFINED 00038 #define CUDA_DEBAYER_H_DEFINED 00039 00040 #include <cuda.h> 00041 #include "CUDA/cutil.h" 00042 #include "cudadefs.h" 00043 00044 00045 00046 //###################################################################### 00047 __global__ void cuda_debayer_kernel_Nearest_Neighbour(float *src,float3_t *dptr,int w,int h,int tile_width,int tile_height) 00048 { 00049 00050 int Row = blockIdx.y * tile_height + (threadIdx.y); 00051 int Col = blockIdx.x * tile_width + (threadIdx.x); 00052 if((Row*w+Col)<w*h) 00053 { 00054 if(Row%2==0 && Col%2==0) 00055 { 00056 dptr[Row*w+Col].p[0]= src[ Col + ((Row+1) * w) ]; 00057 dptr[Row*w+Col].p[1]= src[ Col + ( (Row) * w ) ]; 00058 dptr[Row*w+Col].p[2]= src[ Col + 1 + Row * w ]; 00059 } 00060 if(Row%2==0 && Col%2==1) 00061 { 00062 dptr[Row*w+Col].p[0]= src[Col -1 + ( (Row+1) * w ) ]; 00063 dptr[Row*w+Col].p[1]= src[Col + (Row+1) * w]; 00064 dptr[Row*w+Col].p[2]= src[Col + ( (Row) * w ) ]; 00065 } 00066 if(Row%2==1 && Col%2==0) 00067 { 00068 dptr[Row*w+Col].p[0]= src[Col + (Row * w) ]; 00069 dptr[Row*w+Col].p[1]= src[Col + Row * w ]; 00070 dptr[Row*w+Col].p[2]= src[Col +1 + ( (Row-1) * w) ]; 00071 } 00072 if(Row%2==1 && Col%2==1) 00073 { 00074 dptr[Row*w+Col].p[0]= src[Col -1 + Row * w ]; 00075 dptr[Row*w+Col].p[1]= src[Col + Row * w]; 00076 dptr[Row*w+Col].p[2]= src[Col + ( (Row-1) * w) ]; 00077 } 00078 } 00079 00080 } 00081 00082 //###################################################################### 00083 __global__ void cuda_debayer_kernel_MHC_optimised(float *src,float3_t *dptr,int w,int h,int tile_width,int tile_height) 00084 { 00085 //Create the shared memory space 00086 //float *Mds = (float*) shared_data; 00087 //Calculate necessary indexes 00088 //Create border 00089 /* 00090 float *border = (float *) &data[tile_width]; 00091 // Load the border always and this will speed up the computation 00092 // and get rid of all this symbol table crap 00093 00094 //Loading the border 00095 int width_border = tile_width + 4; 00096 int height_border = tile_height + 4; 00097 int shared_index = sy*width_border+sx; //How to adjust the border 00098 00099 00100 Mds[shared_index]=src[global_index]; 00101 00102 */ 00103 00104 const int sts_y = blockIdx.y * tile_height; 00105 const int sts_x = blockIdx.x * tile_width; 00106 00107 const int sx = threadIdx.x; // x source pixel within source tile 00108 const int sy = threadIdx.y; // y source pixel within source tile 00109 const int Row = sts_y + sy; 00110 const int Col = sts_x + sx; 00111 00112 00113 float *data = (float*) shared_data; 00114 00115 00116 //In the shared memory 00117 00118 data[(sy*tile_width)+sx]=src[(Row*w)+Col]; 00119 00120 __syncthreads(); 00121 if(Row<h&&Col<w) 00122 { 00123 //Threads synced 00124 00125 //Global row column conditions 00126 /* 00127 int Row_min_1=Row-1; 00128 int Row_min_2=Row-2; 00129 int Row_pls_1=Row+1; 00130 int Row_pls_2=Row+2; 00131 int Col_min_1=Col-1; 00132 int Col_min_2=Col-2; 00133 int Col_pls_1=Col+1; 00134 int Col_pls_2=Col+2; 00135 int sx_min_1=sx-1; 00136 int sx_min_2=sx-2; 00137 int sx_pls_1=sx+1; 00138 int sx_pls_2=sx+2; 00139 int sy_min_1=sy-1; 00140 int sy_min_2=sy-2; 00141 int sy_pls_1=sy+1; 00142 int sy_pls_2=sy+2; 00143 00144 bool bulk = (sy>1)&&(sy<tile_height-2)&&(sx>1)&&(sx<tile_width-2); 00145 */ 00146 /* 00147 if(!bulk) 00148 { 00149 if(sts_y<2||sts_y>h-3) 00150 { 00151 switch(sts_y) //Global row 00152 { 00153 case 0: Row_min_1=Row; Row_min_2 = Row; break; 00154 case 1: Row_min_2=Row;break; 00155 00156 } 00157 switch(sts_y-h) //Global row 00158 { 00159 case -2: Row_pls_2 =Row;break; 00160 case -1: Row_pls_2= Row; Row_pls_1 =Row;break; 00161 } 00162 } 00163 if(sts_x<2||sts_x>w-3) 00164 { 00165 switch(sts_x) //Global column 00166 { 00167 case 0: Col_min_1=Col; Col_min_2 = Col; break; 00168 case 1: Col_min_2=Col;break; 00169 00170 } 00171 switch(sts_x-w) //Global row 00172 { 00173 case -2: Col_pls_2 =Col;break; 00174 case -1: Col_pls_2= Col; Col_pls_1 =Col;break; 00175 } 00176 } 00177 //Local row and column conditions 00178 00179 if(sy<2||sy>tile_height-3) 00180 { 00181 switch(sy) //Local row sy 00182 { 00183 case 0: sy_min_1 = tile_height+2; sy_min_2 = tile_height+3; break; 00184 case 1: sy_min_2 =tile_height+2;break; 00185 00186 00187 } 00188 switch(sy-tile_height) 00189 { 00190 case -2:sy_pls_2=tile_height;break; 00191 case -1:sy_pls_1=tile_height;sy_pls_2=tile_height+1;break; 00192 } 00193 } 00194 if(sx<2||sx>tile_width-3) 00195 { 00196 switch(sx) //Local column sx 00197 { 00198 case 0: sx_min_1 = tile_width+2; sx_min_2 = tile_width+3; break; 00199 case 1: sx_min_2 =tile_width+2;break; 00200 00201 00202 00203 } 00204 switch(sx-tile_width) 00205 { 00206 case -2:sx_pls_2=tile_width;break; 00207 case -1:sx_pls_1=tile_width;sx_pls_2=tile_width+1;break; 00208 } 00209 } 00210 //Assigning new values at those border points 00211 if(sy<2||sy>tile_height-3) 00212 { 00213 switch(sy) 00214 { 00215 case 0: 00216 { 00217 Mds[sy_min_1*tile_width+sx_min_1] = src[Col_min_1 + ((Row_min_1) * w)]; 00218 Mds[sy_min_1*tile_width+sx_pls_1] = src[Col_pls_1 + ((Row_min_1) * w)]; 00219 Mds[sy_min_1*tile_width+sx] = src[Col + ((Row_min_1) * w)]; 00220 Mds[sy_min_2*tile_width+sx] = src[Col + ((Row_min_2) * w)];break; 00221 } 00222 case 1: 00223 Mds[sy_min_2*tile_width+sx] = src[Col + ((Row_min_2) * w)]; break; 00224 00225 } 00226 switch(sy-tile_height) 00227 { case -2: 00228 Mds[sy_pls_2*tile_width+sx] = src[Col + ((Row_pls_2) * w)]; break; 00229 case -1: 00230 { 00231 Mds[sy_pls_1*tile_width+sx_min_1] = src[Col_min_1 + ((Row_pls_1) * w)]; 00232 Mds[sy_pls_1*tile_width+sx_pls_1] = src[Col_pls_1 + ((Row_pls_1) * w)]; 00233 Mds[sy_pls_1*tile_width+sx] = src[Col + ((Row_pls_1) * w)]; 00234 Mds[sy_pls_2*tile_width+sx] = src[Col + ((Row_pls_2) * w)]; break; 00235 } 00236 } 00237 } 00238 if(sx<2||sx>tile_width-3) 00239 { 00240 switch(sx) 00241 { 00242 case 0: 00243 { 00244 Mds[sy_min_1*tile_width+sx_min_1] = src[Col_min_1 + ((Row_min_1) * w)]; 00245 Mds[sy_pls_1*tile_width+sx_min_1] = src[Col_min_1 + ((Row_pls_1) * w)]; 00246 Mds[sy*tile_width+sx_min_1] = src[Col_min_1 + ((Row) * w)]; 00247 Mds[sy*tile_width+sx_min_2] = src[Col_min_2 + ((Row) * w)]; break; 00248 } 00249 case 1: 00250 Mds[sy*tile_width+sx_min_2] = src[Col_min_2 + ((Row) * w)]; break; 00251 00252 } 00253 switch(sx-tile_width) 00254 { 00255 case -2: 00256 Mds[sy*tile_width+sx_pls_2] = src[Col_pls_2 + ((Row) * w)]; break; 00257 case -1: 00258 { 00259 Mds[sy_min_1*tile_width+sx_pls_1] = src[Col_pls_1 + ((Row_min_1) * w)]; 00260 Mds[sy_pls_1*tile_width+sx_pls_1] = src[Col_pls_1 + ((Row_pls_1) * w)]; 00261 Mds[sy*tile_width+sx_pls_1] = src[Col_pls_1 + ((Row) * w)]; 00262 Mds[sy*tile_width+sx_pls_2] = src[Col_pls_2 + ((Row) * w)]; break; 00263 } 00264 } 00265 } 00266 }*/ 00267 //Calculating constants 00268 00269 { 00270 int A,B,C,D,E,F; 00271 00272 A = data[(sy-1)*(tile_width)+sx-1]+ 00273 data[(sy-1)*(tile_width)+sx+1]+ 00274 data[(sy+1)*(tile_width)+sx-1]+ 00275 data[(sy+1)*(tile_width)+sx+1]; 00276 00277 00278 B = data[(sy-1)*(tile_width)+sx]+ 00279 data[(sy+1)*(tile_width)+sx]; 00280 00281 00282 C = data[(sy-2)*(tile_width)+sx]+ 00283 data[(sy+2)*(tile_width)+sx]; 00284 00285 D = data[(sy)*(tile_width)+sx-1]+ 00286 data[(sy)*(tile_width)+sx+1]; 00287 00288 E = data[(sy)*(tile_width)+sx-2]+ 00289 data[(sy)*(tile_width)+sx+2]; 00290 00291 F= data[(sy)*(tile_width)+sx]; 00292 00293 /* 00294 A = src[(sy_min_1)*(tile_width)+sx_min_1]+ 00295 src[(sy_min_1)*(tile_width)+sx_pls_1]+ 00296 src[(sy_pls_1)*(tile_width)+sx_min_1]+ 00297 src[(sy_pls_1)*(tile_width)+sx_pls_1]; 00298 00299 00300 B = src[(sy_min_1)*(tile_width)+sx]+ 00301 src[(sy_pls_1)*(tile_width)+sx]; 00302 00303 00304 C = src[(sy_min_2)*(tile_width)+sx]+ 00305 src[(sy_pls_2)*(tile_width)+sx]; 00306 00307 D = src[(sy)*(tile_width)+sx_min_1]+ 00308 src[(sy)*(tile_width)+sx_pls_1]; 00309 00310 E = src[(sy)*(tile_width)+sx_min_2]+ 00311 src[(sy)*(tile_width)+sx_pls_2]; 00312 00313 F= src[(sy)*(tile_width)+sx]; 00314 00315 *//* 00316 A = src[(Row_min_1)*(w)+Col_min_1]+ 00317 src[(Row_min_1)*(w)+Col_pls_1]+ 00318 src[(Row_pls_1)*(w)+Col_min_1]+ 00319 src[(Row_pls_1)*(w)+Col_pls_1]; 00320 00321 00322 B = src[(Row_min_1)*(w)+Col]+ 00323 src[(Row_pls_1)*(w)+Col]; 00324 00325 00326 C = src[(Row_min_2)*(w)+Col]+ 00327 src[(Row_pls_2)*(w)+Col]; 00328 00329 D = src[(Row)*(w)+Col_min_1]+ 00330 src[(Row)*(w)+Col_pls_1]; 00331 00332 E = src[(Row)*(w)+Col_min_2]+ 00333 src[(Row)*(w)+Col_pls_2]; 00334 00335 F= src[(Row)*(w)+Col]; 00336 */ 00337 //Odd Even Row Col conditions 00338 /* int sit_3; 00339 if(Row%2==0) 00340 sit_3 = 0; 00341 else 00342 sit_3 = 1; 00343 if(Col%2==0) 00344 sit_3 = sit_3 + 2; 00345 else 00346 sit_3 = sit_3 + 4;*/ 00347 if(Row%2==0 && Col%2==0) 00348 { 00349 dptr[Row*w+Col].p[0]= ((5*F) -(1 * (A + C)) + (4 * B) + ((1/2) * E))/8 ; 00350 dptr[Row*w+Col].p[1]= F; 00351 dptr[Row*w+Col].p[2]= ((5*F) -(1 * (A + E)) + (4 * D) + ((1/2) * C))/8 ; 00352 00353 } 00354 if(Row%2==1 && Col%2==0){ 00355 dptr[Row*w+Col].p[0]= F ; 00356 dptr[Row*w+Col].p[1]= ((4*F) -(1 * (C + E)) + (2 * (D+B)))/8; 00357 dptr[Row*w+Col].p[2]= ((6*F) -((3/2) * (E + C)) + (2 * A)) /8 ; 00358 00359 } 00360 if(Row%2==0 && Col%2==1){ 00361 dptr[Row*w+Col].p[0]= ((6*F) -((3/2) * (E + C)) + (2 * A)) /8 ; 00362 dptr[Row*w+Col].p[1]= ((4*F) -(1 * (C + E)) + (2 * (D+B)))/8; 00363 dptr[Row*w+Col].p[2]= F ; 00364 } 00365 if(Row%2==1 && Col%2==1){ 00366 dptr[Row*w+Col].p[0]= ((5*F) -(1 * (A + E)) + (4 * D) + ((1/2) * C))/8 ; 00367 dptr[Row*w+Col].p[1]= F; 00368 dptr[Row*w+Col].p[2]= ((5*F) -(1 * (A + C)) + (4 * B) + ((1/2) * E))/8; 00369 } 00370 } 00371 /* 00372 switch(sit_3) 00373 { 00374 case 2: dptr[Row*w+Col].p[0]= ((5*F) -(1 * (A + C)) + (4 * B) + ((1/2) * E))/8 ; 00375 dptr[Row*w+Col].p[1]= F; 00376 dptr[Row*w+Col].p[2]= ((5*F) -(1 * (A + E)) + (4 * D) + ((1/2) * C))/8 ; 00377 break; 00378 case 3: dptr[Row*w+Col].p[0]= F ; 00379 dptr[Row*w+Col].p[1]= ((4*F) -(1 * (C + E)) + (2 * (D+B)))/8; 00380 dptr[Row*w+Col].p[2]= ((6*F) -((3/2) * (E + C)) + (2 * A)) /8 ; 00381 00382 break; 00383 case 4: dptr[Row*w+Col].p[0]= ((6*F) -((3/2) * (E + C)) + (2 * A)) /8 ; 00384 dptr[Row*w+Col].p[1]= ((4*F) -(1 * (C + E)) + (2 * (D+B)))/8; 00385 dptr[Row*w+Col].p[2]= F ; 00386 break; 00387 case 5: dptr[Row*w+Col].p[0]= ((5*F) -(1 * (A + E)) + (4 * D) + ((1/2) * C))/8 ; 00388 dptr[Row*w+Col].p[1]= F; 00389 dptr[Row*w+Col].p[2]= ((5*F) -(1 * (A + C)) + (4 * B) + ((1/2) * E))/8; 00390 break; 00391 00392 } */ 00393 } 00394 00395 } 00396 00397 00398 __global__ void cuda_kernel_debayer(float *src,float3_t *dptr,int w,int h,int tile_width,int tile_height) 00399 { 00400 00401 00402 int Row = blockIdx.y * tile_height + (threadIdx.y); 00403 int Col = blockIdx.x * tile_width + (threadIdx.x); 00404 int A,B,C,D,E,F; 00405 00406 00407 if((Row>1||Row<(w-2))&&(Col>1||Col<(h-2))&&((Row*w+Col)<=w*h)) 00408 { 00409 A = src[ Col-1 + ((Row-1) * w) ]+ 00410 src[ Col+1 + ((Row-1) * w) ]+ 00411 src[ Col-1 + ((Row+1) * w) ]+ 00412 src[ Col+1 + ((Row+1) * w) ]; 00413 00414 B = src[ Col + ((Row+1) * w) ] + 00415 src[ Col + ((Row-1) * w) ]; 00416 00417 C= src[ Col + ((Row+2) * w) ]+ 00418 src[ Col + ((Row-2) * w) ]; 00419 00420 D= src[ Col-1 + ((Row) * w) ]+ 00421 src[ Col+1 + ((Row) * w) ]; 00422 00423 E= src[ Col-2 + ((Row) * w) ]+ 00424 src[ Col+2 + ((Row) * w) ]; 00425 00426 F= src[ Col + ((Row) * w) ]; 00427 00428 if(Row%2==0 && Col%2==0) 00429 { 00430 00431 00432 dptr[Row*w+Col].p[0]= ((5*F) -(1 * (A + C)) + (4 * B) + ((1/2) * E))/8 ; 00433 dptr[Row*w+Col].p[1]= F; 00434 dptr[Row*w+Col].p[2]= ((5*F) -(1 * (A + E)) + (4 * D) + ((1/2) * C))/8 ; 00435 } 00436 if(Row%2==0 && Col%2==1) 00437 { 00438 00439 00440 dptr[Row*w+Col].p[0]= ((6*F) -((3/2) * (E + C)) + (2 * A)) /8 ; 00441 dptr[Row*w+Col].p[1]= ((4*F) -(1 * (C + E)) + (2 * (D+B)))/8; 00442 dptr[Row*w+Col].p[2]= F ; 00443 } 00444 if(Row%2==1 && Col%2==0) 00445 { 00446 00447 00448 00449 dptr[Row*w+Col].p[0]= F ; 00450 dptr[Row*w+Col].p[1]= ((4*F) -(1 * (C + E)) + (2 * (D+B)))/8; 00451 dptr[Row*w+Col].p[2]= ((6*F) -((3/2) * (E + C)) + (2 * A)) /8 ; 00452 00453 } 00454 if(Row%2==1 && Col%2==1) 00455 { 00456 00457 00458 dptr[Row*w+Col].p[0]= ((5*F) -(1 * (A + E)) + (4 * D) + ((1/2) * C))/8 ; 00459 dptr[Row*w+Col].p[1]= F; 00460 dptr[Row*w+Col].p[2]= ((5*F) -(1 * (A + C)) + (4 * B) + ((1/2) * E))/8; 00461 } 00462 00463 if (dptr[Row*w+Col].p[0] < 0) 00464 dptr[Row*w+Col].p[0] = 0; 00465 else if(dptr[Row*w+Col].p[0] > 255) 00466 dptr[Row*w+Col].p[0] = 255; 00467 if (dptr[Row*w+Col].p[1] < 0) 00468 dptr[Row*w+Col].p[1] = 0; 00469 else if(dptr[Row*w+Col].p[1] > 255) 00470 dptr[Row*w+Col].p[1] = 255; 00471 if (dptr[Row*w+Col].p[2] < 0) 00472 dptr[Row*w+Col].p[2] = 0; 00473 else if(dptr[Row*w+Col].p[2] > 255) 00474 dptr[Row*w+Col].p[2] = 255; 00475 } 00476 } 00477 00478 #endif 00479