cuda_debayer.h

Go to the documentation of this file.
00001 /*!@file CUDA/cuda_debayer.h CUDA/GPU optimized color operations code */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file:
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/cuda_debayer.h $
00035 // $Id: cuda_debayer.h 13228 2010-04-15 01:49:10Z itti $
00036 //
00037 #ifndef CUDA_DEBAYER_H_DEFINED
00038 #define CUDA_DEBAYER_H_DEFINED
00039 
00040 #include <cuda.h>
00041 #include "CUDA/cutil.h"
00042 #include "cudadefs.h"
00043 
00044 
00045 
00046 //######################################################################
00047 __global__ void cuda_debayer_kernel_Nearest_Neighbour(float *src,float3_t  *dptr,int w,int h,int tile_width,int tile_height)
00048 {
00049 
00050   int Row = blockIdx.y * tile_height + (threadIdx.y);
00051   int Col = blockIdx.x * tile_width + (threadIdx.x);
00052   if((Row*w+Col)<w*h)
00053     {
00054       if(Row%2==0 && Col%2==0)
00055         {
00056           dptr[Row*w+Col].p[0]= src[ Col + ((Row+1) * w) ];
00057           dptr[Row*w+Col].p[1]= src[ Col + ( (Row) * w ) ];
00058           dptr[Row*w+Col].p[2]= src[ Col + 1 + Row * w ];
00059         }
00060       if(Row%2==0 && Col%2==1)
00061         {
00062           dptr[Row*w+Col].p[0]= src[Col -1 + ( (Row+1) * w ) ];
00063           dptr[Row*w+Col].p[1]= src[Col + (Row+1) * w];
00064           dptr[Row*w+Col].p[2]= src[Col  + ( (Row) * w ) ];
00065         }
00066       if(Row%2==1 && Col%2==0)
00067         {
00068           dptr[Row*w+Col].p[0]= src[Col  + (Row * w)  ];
00069           dptr[Row*w+Col].p[1]= src[Col +  Row * w ];
00070           dptr[Row*w+Col].p[2]= src[Col +1 + ( (Row-1) * w) ];
00071         }
00072       if(Row%2==1 && Col%2==1)
00073         {
00074           dptr[Row*w+Col].p[0]= src[Col -1 + Row * w ];
00075           dptr[Row*w+Col].p[1]= src[Col  + Row * w];
00076           dptr[Row*w+Col].p[2]= src[Col  + ( (Row-1) * w) ];
00077         }
00078     }
00079 
00080 }
00081 
00082 //######################################################################
00083 __global__ void cuda_debayer_kernel_MHC_optimised(float *src,float3_t  *dptr,int w,int h,int tile_width,int tile_height)
00084 {
00085   //Create the shared memory space
00086   //float *Mds = (float*) shared_data;
00087   //Calculate necessary indexes
00088   //Create border
00089   /*
00090   float *border = (float *) &data[tile_width];
00091   // Load the border always and this will speed up the computation
00092   // and get rid of all this symbol table crap
00093 
00094   //Loading the border
00095   int width_border = tile_width + 4;
00096   int height_border = tile_height + 4;
00097   int shared_index = sy*width_border+sx; //How to adjust the border
00098 
00099 
00100   Mds[shared_index]=src[global_index];
00101 
00102   */
00103 
00104   const int sts_y = blockIdx.y * tile_height;
00105   const int sts_x = blockIdx.x * tile_width;
00106 
00107   const int sx = threadIdx.x;                          // x source pixel within source tile
00108   const int sy = threadIdx.y;                           // y source pixel within source tile
00109   const int Row = sts_y + sy;
00110   const int Col = sts_x + sx;
00111 
00112 
00113    float *data = (float*) shared_data;
00114 
00115 
00116   //In the shared memory
00117 
00118    data[(sy*tile_width)+sx]=src[(Row*w)+Col];
00119 
00120    __syncthreads();
00121   if(Row<h&&Col<w)
00122      {
00123        //Threads synced
00124 
00125        //Global row column conditions
00126        /*
00127        int Row_min_1=Row-1;
00128        int Row_min_2=Row-2;
00129        int Row_pls_1=Row+1;
00130        int Row_pls_2=Row+2;
00131        int Col_min_1=Col-1;
00132        int Col_min_2=Col-2;
00133        int Col_pls_1=Col+1;
00134        int Col_pls_2=Col+2;
00135        int sx_min_1=sx-1;
00136        int sx_min_2=sx-2;
00137        int sx_pls_1=sx+1;
00138        int sx_pls_2=sx+2;
00139        int sy_min_1=sy-1;
00140        int sy_min_2=sy-2;
00141        int sy_pls_1=sy+1;
00142        int sy_pls_2=sy+2;
00143 
00144        bool bulk = (sy>1)&&(sy<tile_height-2)&&(sx>1)&&(sx<tile_width-2);
00145        */
00146        /*
00147        if(!bulk)
00148          {
00149            if(sts_y<2||sts_y>h-3)
00150              {
00151                switch(sts_y) //Global row
00152                  {
00153                  case 0: Row_min_1=Row; Row_min_2 = Row; break;
00154                  case 1: Row_min_2=Row;break;
00155 
00156                  }
00157                switch(sts_y-h) //Global row
00158                  {
00159                  case -2: Row_pls_2 =Row;break;
00160                  case -1: Row_pls_2= Row; Row_pls_1 =Row;break;
00161                  }
00162              }
00163            if(sts_x<2||sts_x>w-3)
00164              {
00165                switch(sts_x) //Global column
00166                  {
00167                  case 0: Col_min_1=Col; Col_min_2 = Col; break;
00168                  case 1: Col_min_2=Col;break;
00169 
00170                  }
00171                switch(sts_x-w) //Global row
00172                  {
00173                  case -2: Col_pls_2 =Col;break;
00174                  case -1: Col_pls_2= Col; Col_pls_1 =Col;break;
00175                  }
00176              }
00177            //Local row and column conditions
00178 
00179            if(sy<2||sy>tile_height-3)
00180              {
00181                switch(sy) //Local row sy
00182                  {
00183                  case 0: sy_min_1 = tile_height+2; sy_min_2 = tile_height+3; break;
00184                  case 1: sy_min_2 =tile_height+2;break;
00185 
00186 
00187                  }
00188                switch(sy-tile_height)
00189                  {
00190                  case -2:sy_pls_2=tile_height;break;
00191                  case -1:sy_pls_1=tile_height;sy_pls_2=tile_height+1;break;
00192                  }
00193              }
00194            if(sx<2||sx>tile_width-3)
00195              {
00196                switch(sx) //Local column sx
00197                  {
00198                  case 0: sx_min_1 = tile_width+2; sx_min_2 = tile_width+3; break;
00199                  case 1: sx_min_2 =tile_width+2;break;
00200 
00201 
00202 
00203                  }
00204                switch(sx-tile_width)
00205                  {
00206                  case -2:sx_pls_2=tile_width;break;
00207                  case -1:sx_pls_1=tile_width;sx_pls_2=tile_width+1;break;
00208                  }
00209              }
00210            //Assigning new values at those border points
00211            if(sy<2||sy>tile_height-3)
00212              {
00213                switch(sy)
00214                  {
00215                  case 0:
00216                    {
00217                      Mds[sy_min_1*tile_width+sx_min_1] = src[Col_min_1 + ((Row_min_1) * w)];
00218                      Mds[sy_min_1*tile_width+sx_pls_1] = src[Col_pls_1 + ((Row_min_1) * w)];
00219                      Mds[sy_min_1*tile_width+sx] = src[Col + ((Row_min_1) * w)];
00220                      Mds[sy_min_2*tile_width+sx] = src[Col + ((Row_min_2) * w)];break;
00221                    }
00222                  case 1:
00223                    Mds[sy_min_2*tile_width+sx] = src[Col + ((Row_min_2) * w)]; break;
00224 
00225                  }
00226                switch(sy-tile_height)
00227                  {   case -2:
00228                      Mds[sy_pls_2*tile_width+sx] =  src[Col + ((Row_pls_2) * w)]; break;
00229                  case -1:
00230                    {
00231                      Mds[sy_pls_1*tile_width+sx_min_1]  =  src[Col_min_1 + ((Row_pls_1) * w)];
00232                      Mds[sy_pls_1*tile_width+sx_pls_1]  =  src[Col_pls_1 + ((Row_pls_1) * w)];
00233                      Mds[sy_pls_1*tile_width+sx] =  src[Col + ((Row_pls_1) * w)];
00234                      Mds[sy_pls_2*tile_width+sx] =  src[Col + ((Row_pls_2) * w)]; break;
00235                    }
00236                  }
00237              }
00238            if(sx<2||sx>tile_width-3)
00239              {
00240                switch(sx)
00241                  {
00242                  case 0:
00243                    {
00244                      Mds[sy_min_1*tile_width+sx_min_1]  = src[Col_min_1 + ((Row_min_1) * w)];
00245                      Mds[sy_pls_1*tile_width+sx_min_1]  = src[Col_min_1 + ((Row_pls_1) * w)];
00246                      Mds[sy*tile_width+sx_min_1]  = src[Col_min_1 + ((Row) * w)];
00247                      Mds[sy*tile_width+sx_min_2] = src[Col_min_2 + ((Row) * w)]; break;
00248                    }
00249                  case 1:
00250                    Mds[sy*tile_width+sx_min_2]  = src[Col_min_2 + ((Row) * w)]; break;
00251 
00252                  }
00253                switch(sx-tile_width)
00254                  {
00255                  case -2:
00256                    Mds[sy*tile_width+sx_pls_2]  =  src[Col_pls_2 + ((Row) * w)]; break;
00257                  case -1:
00258                    {
00259                      Mds[sy_min_1*tile_width+sx_pls_1]  =  src[Col_pls_1 + ((Row_min_1) * w)];
00260                      Mds[sy_pls_1*tile_width+sx_pls_1]  =  src[Col_pls_1 + ((Row_pls_1) * w)];
00261                      Mds[sy*tile_width+sx_pls_1]  =  src[Col_pls_1 + ((Row) * w)];
00262                      Mds[sy*tile_width+sx_pls_2]  =  src[Col_pls_2 + ((Row) * w)]; break;
00263                    }
00264                  }
00265              }
00266          }*/
00267        //Calculating constants
00268 
00269          {
00270        int A,B,C,D,E,F;
00271 
00272        A = data[(sy-1)*(tile_width)+sx-1]+
00273          data[(sy-1)*(tile_width)+sx+1]+
00274          data[(sy+1)*(tile_width)+sx-1]+
00275          data[(sy+1)*(tile_width)+sx+1];
00276 
00277 
00278        B = data[(sy-1)*(tile_width)+sx]+
00279          data[(sy+1)*(tile_width)+sx];
00280 
00281 
00282        C = data[(sy-2)*(tile_width)+sx]+
00283          data[(sy+2)*(tile_width)+sx];
00284 
00285        D = data[(sy)*(tile_width)+sx-1]+
00286          data[(sy)*(tile_width)+sx+1];
00287 
00288        E = data[(sy)*(tile_width)+sx-2]+
00289          data[(sy)*(tile_width)+sx+2];
00290 
00291        F=   data[(sy)*(tile_width)+sx];
00292 
00293  /*
00294        A = src[(sy_min_1)*(tile_width)+sx_min_1]+
00295          src[(sy_min_1)*(tile_width)+sx_pls_1]+
00296          src[(sy_pls_1)*(tile_width)+sx_min_1]+
00297          src[(sy_pls_1)*(tile_width)+sx_pls_1];
00298 
00299 
00300        B = src[(sy_min_1)*(tile_width)+sx]+
00301          src[(sy_pls_1)*(tile_width)+sx];
00302 
00303 
00304        C = src[(sy_min_2)*(tile_width)+sx]+
00305          src[(sy_pls_2)*(tile_width)+sx];
00306 
00307        D = src[(sy)*(tile_width)+sx_min_1]+
00308          src[(sy)*(tile_width)+sx_pls_1];
00309 
00310        E = src[(sy)*(tile_width)+sx_min_2]+
00311          src[(sy)*(tile_width)+sx_pls_2];
00312 
00313        F=   src[(sy)*(tile_width)+sx];
00314 
00315    *//*
00316        A = src[(Row_min_1)*(w)+Col_min_1]+
00317          src[(Row_min_1)*(w)+Col_pls_1]+
00318          src[(Row_pls_1)*(w)+Col_min_1]+
00319          src[(Row_pls_1)*(w)+Col_pls_1];
00320 
00321 
00322        B = src[(Row_min_1)*(w)+Col]+
00323          src[(Row_pls_1)*(w)+Col];
00324 
00325 
00326        C = src[(Row_min_2)*(w)+Col]+
00327          src[(Row_pls_2)*(w)+Col];
00328 
00329        D = src[(Row)*(w)+Col_min_1]+
00330          src[(Row)*(w)+Col_pls_1];
00331 
00332        E = src[(Row)*(w)+Col_min_2]+
00333          src[(Row)*(w)+Col_pls_2];
00334 
00335        F=   src[(Row)*(w)+Col];
00336      */
00337        //Odd Even Row Col conditions
00338        /* int sit_3;
00339           if(Row%2==0)
00340           sit_3 = 0;
00341           else
00342           sit_3 = 1;
00343           if(Col%2==0)
00344           sit_3 = sit_3 + 2;
00345           else
00346           sit_3 = sit_3 + 4;*/
00347        if(Row%2==0 && Col%2==0)
00348          {
00349            dptr[Row*w+Col].p[0]=  ((5*F) -(1 * (A + C)) + (4 * B) + ((1/2) * E))/8 ;
00350            dptr[Row*w+Col].p[1]=   F;
00351            dptr[Row*w+Col].p[2]=  ((5*F) -(1 * (A + E)) + (4 * D) + ((1/2) * C))/8 ;
00352 
00353          }
00354         if(Row%2==1 && Col%2==0){
00355          dptr[Row*w+Col].p[0]=  F ;
00356          dptr[Row*w+Col].p[1]=  ((4*F) -(1 * (C + E)) + (2 * (D+B)))/8;
00357          dptr[Row*w+Col].p[2]=  ((6*F) -((3/2) * (E + C)) + (2 * A)) /8 ;
00358 
00359        }
00360        if(Row%2==0 && Col%2==1){
00361          dptr[Row*w+Col].p[0]=  ((6*F) -((3/2) * (E + C)) + (2 * A)) /8 ;
00362          dptr[Row*w+Col].p[1]=  ((4*F) -(1 * (C + E)) + (2 * (D+B)))/8;
00363          dptr[Row*w+Col].p[2]=  F ;
00364        }
00365        if(Row%2==1 && Col%2==1){
00366          dptr[Row*w+Col].p[0]=  ((5*F) -(1 * (A + E)) + (4 * D) + ((1/2) * C))/8 ;
00367          dptr[Row*w+Col].p[1]=   F;
00368          dptr[Row*w+Col].p[2]=  ((5*F) -(1 * (A + C)) + (4 * B) + ((1/2) * E))/8;
00369        }
00370          }
00371        /*
00372          switch(sit_3)
00373          {
00374          case 2: dptr[Row*w+Col].p[0]=  ((5*F) -(1 * (A + C)) + (4 * B) + ((1/2) * E))/8 ;
00375          dptr[Row*w+Col].p[1]=   F;
00376          dptr[Row*w+Col].p[2]=  ((5*F) -(1 * (A + E)) + (4 * D) + ((1/2) * C))/8 ;
00377          break;
00378          case 3: dptr[Row*w+Col].p[0]=  F ;
00379          dptr[Row*w+Col].p[1]=  ((4*F) -(1 * (C + E)) + (2 * (D+B)))/8;
00380          dptr[Row*w+Col].p[2]=  ((6*F) -((3/2) * (E + C)) + (2 * A)) /8 ;
00381 
00382          break;
00383          case 4: dptr[Row*w+Col].p[0]=  ((6*F) -((3/2) * (E + C)) + (2 * A)) /8 ;
00384          dptr[Row*w+Col].p[1]=  ((4*F) -(1 * (C + E)) + (2 * (D+B)))/8;
00385          dptr[Row*w+Col].p[2]=  F ;
00386          break;
00387          case 5: dptr[Row*w+Col].p[0]=  ((5*F) -(1 * (A + E)) + (4 * D) + ((1/2) * C))/8 ;
00388          dptr[Row*w+Col].p[1]=   F;
00389          dptr[Row*w+Col].p[2]=  ((5*F) -(1 * (A + C)) + (4 * B) + ((1/2) * E))/8;
00390          break;
00391 
00392          } */
00393      }
00394 
00395 }
00396 
00397 
00398 __global__ void cuda_kernel_debayer(float *src,float3_t  *dptr,int w,int h,int tile_width,int tile_height)
00399 {
00400 
00401 
00402     int Row = blockIdx.y * tile_height + (threadIdx.y);
00403     int Col = blockIdx.x * tile_width  + (threadIdx.x);
00404     int A,B,C,D,E,F;
00405 
00406 
00407   if((Row>1||Row<(w-2))&&(Col>1||Col<(h-2))&&((Row*w+Col)<=w*h))
00408     {
00409       A =         src[ Col-1 + ((Row-1) * w) ]+
00410             src[ Col+1 + ((Row-1) * w) ]+
00411             src[ Col-1 + ((Row+1) * w) ]+
00412             src[ Col+1 + ((Row+1) * w) ];
00413 
00414           B =     src[ Col + ((Row+1) * w) ] +
00415             src[ Col + ((Row-1) * w) ];
00416 
00417           C=      src[ Col + ((Row+2) * w) ]+
00418             src[ Col + ((Row-2) * w) ];
00419 
00420           D=      src[ Col-1 + ((Row) * w) ]+
00421             src[ Col+1 + ((Row) * w) ];
00422 
00423           E=      src[ Col-2 + ((Row) * w) ]+
00424             src[ Col+2 + ((Row) * w) ];
00425 
00426           F=      src[ Col + ((Row) * w) ];
00427 
00428       if(Row%2==0 && Col%2==0)
00429         {
00430 
00431 
00432           dptr[Row*w+Col].p[0]=  ((5*F) -(1 * (A + C)) + (4 * B) + ((1/2) * E))/8 ;
00433           dptr[Row*w+Col].p[1]=   F;
00434           dptr[Row*w+Col].p[2]=  ((5*F) -(1 * (A + E)) + (4 * D) + ((1/2) * C))/8 ;
00435         }
00436       if(Row%2==0 && Col%2==1)
00437         {
00438 
00439 
00440           dptr[Row*w+Col].p[0]=  ((6*F) -((3/2) * (E + C)) + (2 * A)) /8 ;
00441           dptr[Row*w+Col].p[1]=  ((4*F) -(1 * (C + E)) + (2 * (D+B)))/8;
00442           dptr[Row*w+Col].p[2]=  F ;
00443         }
00444       if(Row%2==1 && Col%2==0)
00445         {
00446 
00447 
00448 
00449           dptr[Row*w+Col].p[0]=  F ;
00450           dptr[Row*w+Col].p[1]=  ((4*F) -(1 * (C + E)) + (2 * (D+B)))/8;
00451           dptr[Row*w+Col].p[2]=  ((6*F) -((3/2) * (E + C)) + (2 * A)) /8 ;
00452 
00453         }
00454       if(Row%2==1 && Col%2==1)
00455         {
00456 
00457 
00458           dptr[Row*w+Col].p[0]=  ((5*F) -(1 * (A + E)) + (4 * D) + ((1/2) * C))/8 ;
00459           dptr[Row*w+Col].p[1]=   F;
00460           dptr[Row*w+Col].p[2]=  ((5*F) -(1 * (A + C)) + (4 * B) + ((1/2) * E))/8;
00461         }
00462 
00463        if (dptr[Row*w+Col].p[0] < 0)
00464                 dptr[Row*w+Col].p[0] = 0;
00465        else if(dptr[Row*w+Col].p[0] > 255)
00466                 dptr[Row*w+Col].p[0] = 255;
00467        if (dptr[Row*w+Col].p[1] < 0)
00468                 dptr[Row*w+Col].p[1] = 0;
00469        else if(dptr[Row*w+Col].p[1] > 255)
00470                 dptr[Row*w+Col].p[1] = 255;
00471         if (dptr[Row*w+Col].p[2] < 0)
00472                 dptr[Row*w+Col].p[2] = 0;
00473        else if(dptr[Row*w+Col].p[2] > 255)
00474                 dptr[Row*w+Col].p[2] = 255;
00475     }
00476 }
00477 
00478 #endif
00479