cuda_colorops.h

00001 /*!@file CUDA/cuda-colorops.h CUDA/GPU optimized color operations code */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file:
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/cuda_colorops.h $
00035 // $Id: cuda_colorops.h 12962 2010-03-06 02:13:53Z irock $
00036 //
00037 
00038 #ifndef CUDA_COLOROPS_H_DEFINED
00039 #define CUDA_COLOROPS_H_DEFINED
00040 
00041 #include <cuda.h>
00042 #include "CUDA/cutil.h"
00043 #include "cudadefs.h"
00044 
00045 // General note, all device functions are inlined automatically
00046 
00047 // Get double color opponency maps
00048 __global__ void cuda_global_getRGBY(const float3_t *src, float *rgptr, float *byptr, const float thresh, const float min_range, const float max_range, const int w, const int h, const int tile_width, const int tile_height)
00049 {
00050   const int x_pos = IMUL(blockIdx.x,tile_width) + threadIdx.x;
00051   const int y_pos = IMUL(blockIdx.y,tile_height) + threadIdx.y;
00052   const int idx = IMUL(y_pos,w) + x_pos;
00053   float thresh3 = 3.0F * thresh;
00054 
00055   if(x_pos < w && y_pos < h)
00056   {
00057     float r = src[idx].p[0], g = src[idx].p[1], b = src[idx].p[2];
00058 
00059     // first do the luminanceNormalization:
00060     float lum = r + g + b;
00061     if (lum < thresh3)  // too dark... no response from anybody
00062       {
00063         rgptr[idx] = min_range;
00064         byptr[idx] = min_range;
00065       }
00066     else
00067       {
00068         // normalize chroma by luminance:
00069         float fac = (max_range-min_range) / lum;
00070         r *= fac; g *= fac; b *= fac;
00071 
00072         // red = [r - (g+b)/2]        [.] = clamp between 0 and 255
00073         // green = [g - (r+b)/2]
00074         // blue = [b - (r+g)/2]
00075         // yellow = [2*((r+g)/2 - |r-g| - b)]
00076 
00077         // now compute color opponencies:
00078         // yellow gets a factor 2 to compensate for its previous attenuation
00079         // by luminanceNormalize():
00080         float red = r - 0.5f * (g + b), green = g - 0.5f * (r + b);
00081         float blue = b - 0.5f * (r + g), yellow = -2.0f * (blue + fabs(r-g));
00082 
00083         if (red < min_range) red = min_range;
00084         else if (red > max_range) red = max_range;
00085         if (green < min_range) green = min_range;
00086         else if (green > max_range) green = max_range;
00087         if (blue < min_range) blue = min_range;
00088         else if (blue > max_range) blue = max_range;
00089         if (yellow < min_range) yellow=min_range;
00090         else if (yellow > max_range) yellow=max_range;
00091 
00092         rgptr[idx] = red - green;
00093         byptr[idx] = blue - yellow;
00094       }
00095   }
00096 }
00097 
00098 
00099 // Get double color opponency maps
00100 __global__ void cuda_global_toRGB(float3_t *dst, const float *src, const int sz, const int tile_len)
00101 {
00102   const int idx = IMUL(blockIdx.x,tile_len) + threadIdx.x;
00103   if(idx < sz)
00104   {
00105     float val = src[idx];
00106     dst[idx].p[0] = val;
00107     dst[idx].p[1] = val;
00108     dst[idx].p[2] = val;
00109   }
00110 }
00111 
00112 
00113 // Actual CUDA Implementation, set up as a __device__ function to allow it to be called
00114 //  from other CUDA functions
00115 __device__ void cuda_device_getComponents(const float3_t *aptr, float *rptr, float *gptr, float *bptr,
00116                                           const int w, const int h, const int idx)
00117 {
00118   if(idx < w*h)
00119   {
00120     rptr[idx] = aptr[idx].p[0];
00121     gptr[idx] = aptr[idx].p[1];
00122     bptr[idx] = aptr[idx].p[2];
00123   }
00124 }
00125 
00126 
00127 
00128 
00129 // Wrap the device function in a global wrapper so it is also callable from the host
00130 __global__ void cuda_global_getComponents(const float3_t *aptr, float *rptr, float *gptr, float *bptr, int w, int h, int tile_width, int tile_height)
00131 {
00132   // Optimization, as this will be frequently calculated across many functions, why don't we just pass it along?
00133   const int idx = blockIdx.y*tile_height*w + threadIdx.y*w + blockIdx.x*tile_width + threadIdx.x;
00134   cuda_device_getComponents(aptr,rptr,gptr,bptr,w,h,idx);
00135 }
00136 
00137 
00138 // Actual CUDA Implementation, set up as a __device__ function to allow it to be called
00139 //  from other CUDA functions
00140 __device__ void cuda_device_luminance(const float3_t *aptr, float *dptr, const int w, const int h, const int idx, const int secidx)
00141 {
00142   if(idx < w*h)
00143     dptr[idx] = (aptr[idx].p[0] + aptr[idx].p[1] + aptr[idx].p[2])/3.0F;
00144   // Second index is used for a border pixel if we are layering filters together
00145   // For calls not using this layer, it is a 1 boolean comparision charge
00146   // For calls using this, however, it allows simple/complex filters to be stacked (as long as the array size doesn't change)
00147   if(secidx >= 0 && secidx < w*h)
00148     dptr[secidx] = (aptr[secidx].p[0] + aptr[secidx].p[1] + aptr[secidx].p[2])/3.0F;
00149 }
00150 
00151 
00152 // Wrap the device function in a global wrapper so it is also callable from the host
00153 __global__ void cuda_global_luminance(const float3_t *aptr, float *dptr, const int w, const int h, const int tile_width, const int tile_height)
00154 {
00155   // Optimization, as this will be frequently calculated across many functions, why don't we just pass it along?
00156   const int idx = blockIdx.y*tile_height*w + threadIdx.y*w + blockIdx.x*tile_width + threadIdx.x;
00157   // Secidx is set to -1 because we are not doing a border with this direct call
00158   cuda_device_luminance(aptr,dptr,w,h,idx,-1);
00159 }
00160 
00161 
00162 // Actual CUDA Implementation, set up as a __device__ function to allow it to be called
00163 //  from other CUDA functions
00164 __device__ void cuda_device_luminanceNTSC(const float3_t *aptr, float *dptr, const int w, const int h, const int idx, const int secidx)
00165 {
00166   //Taken from Matlab's rgb2gray() function
00167   // T = inv([1.0 0.956 0.621; 1.0 -0.272 -0.647; 1.0 -1.106 1.703]);
00168   // coef = T(1,:)';
00169   const float coef1 =  0.298936F,coef2 = 0.587043F, coef3 = 0.114021F;;
00170   if(idx < w*h)
00171     dptr[idx] = roundf(aptr[idx].p[0]*coef1 + aptr[idx].p[1]*coef2 + aptr[idx].p[2]*coef3);
00172   // Second index is used for a border pixel if we are layering filters together
00173   // For calls not using this layer, it is a 1 boolean comparision charge
00174   // For calls using this, however, it allows simple/complex filters to be stacked (as long as the array size doesn't change)
00175   if(secidx >= 0 && secidx < w*h)
00176     dptr[secidx] = roundf(aptr[secidx].p[0]*coef1 + aptr[secidx].p[1]*coef2 + aptr[secidx].p[2]*coef3);
00177 }
00178 
00179 
00180 // Wrap the device function in a global wrapper so it is also callable from the host
00181 __global__ void cuda_global_luminanceNTSC(const float3_t *aptr, float *dptr, const int w, const int h, const int tile_width, const int tile_height)
00182 {
00183   // Optimization, as this will be frequently calculated across many functions, why don't we just pass it along?
00184   const int idx = blockIdx.y*tile_height*w + threadIdx.y*w + blockIdx.x*tile_width + threadIdx.x;
00185   // Secidx is set to -1 because we are not doing a border with this direct call
00186   cuda_device_luminanceNTSC(aptr,dptr,w,h,idx,-1);
00187 }
00188 
00189 #endif