00001 /*!@file CUDA/cuda-colorops.h CUDA/GPU optimized color operations code */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/cuda_colorops.h $ 00035 // $Id: cuda_colorops.h 12962 2010-03-06 02:13:53Z irock $ 00036 // 00037 00038 #ifndef CUDA_COLOROPS_H_DEFINED 00039 #define CUDA_COLOROPS_H_DEFINED 00040 00041 #include <cuda.h> 00042 #include "CUDA/cutil.h" 00043 #include "cudadefs.h" 00044 00045 // General note, all device functions are inlined automatically 00046 00047 // Get double color opponency maps 00048 __global__ void cuda_global_getRGBY(const float3_t *src, float *rgptr, float *byptr, const float thresh, const float min_range, const float max_range, const int w, const int h, const int tile_width, const int tile_height) 00049 { 00050 const int x_pos = IMUL(blockIdx.x,tile_width) + threadIdx.x; 00051 const int y_pos = IMUL(blockIdx.y,tile_height) + threadIdx.y; 00052 const int idx = IMUL(y_pos,w) + x_pos; 00053 float thresh3 = 3.0F * thresh; 00054 00055 if(x_pos < w && y_pos < h) 00056 { 00057 float r = src[idx].p[0], g = src[idx].p[1], b = src[idx].p[2]; 00058 00059 // first do the luminanceNormalization: 00060 float lum = r + g + b; 00061 if (lum < thresh3) // too dark... no response from anybody 00062 { 00063 rgptr[idx] = min_range; 00064 byptr[idx] = min_range; 00065 } 00066 else 00067 { 00068 // normalize chroma by luminance: 00069 float fac = (max_range-min_range) / lum; 00070 r *= fac; g *= fac; b *= fac; 00071 00072 // red = [r - (g+b)/2] [.] = clamp between 0 and 255 00073 // green = [g - (r+b)/2] 00074 // blue = [b - (r+g)/2] 00075 // yellow = [2*((r+g)/2 - |r-g| - b)] 00076 00077 // now compute color opponencies: 00078 // yellow gets a factor 2 to compensate for its previous attenuation 00079 // by luminanceNormalize(): 00080 float red = r - 0.5f * (g + b), green = g - 0.5f * (r + b); 00081 float blue = b - 0.5f * (r + g), yellow = -2.0f * (blue + fabs(r-g)); 00082 00083 if (red < min_range) red = min_range; 00084 else if (red > max_range) red = max_range; 00085 if (green < min_range) green = min_range; 00086 else if (green > max_range) green = max_range; 00087 if (blue < min_range) blue = min_range; 00088 else if (blue > max_range) blue = max_range; 00089 if (yellow < min_range) yellow=min_range; 00090 else if (yellow > max_range) yellow=max_range; 00091 00092 rgptr[idx] = red - green; 00093 byptr[idx] = blue - yellow; 00094 } 00095 } 00096 } 00097 00098 00099 // Get double color opponency maps 00100 __global__ void cuda_global_toRGB(float3_t *dst, const float *src, const int sz, const int tile_len) 00101 { 00102 const int idx = IMUL(blockIdx.x,tile_len) + threadIdx.x; 00103 if(idx < sz) 00104 { 00105 float val = src[idx]; 00106 dst[idx].p[0] = val; 00107 dst[idx].p[1] = val; 00108 dst[idx].p[2] = val; 00109 } 00110 } 00111 00112 00113 // Actual CUDA Implementation, set up as a __device__ function to allow it to be called 00114 // from other CUDA functions 00115 __device__ void cuda_device_getComponents(const float3_t *aptr, float *rptr, float *gptr, float *bptr, 00116 const int w, const int h, const int idx) 00117 { 00118 if(idx < w*h) 00119 { 00120 rptr[idx] = aptr[idx].p[0]; 00121 gptr[idx] = aptr[idx].p[1]; 00122 bptr[idx] = aptr[idx].p[2]; 00123 } 00124 } 00125 00126 00127 00128 00129 // Wrap the device function in a global wrapper so it is also callable from the host 00130 __global__ void cuda_global_getComponents(const float3_t *aptr, float *rptr, float *gptr, float *bptr, int w, int h, int tile_width, int tile_height) 00131 { 00132 // Optimization, as this will be frequently calculated across many functions, why don't we just pass it along? 00133 const int idx = blockIdx.y*tile_height*w + threadIdx.y*w + blockIdx.x*tile_width + threadIdx.x; 00134 cuda_device_getComponents(aptr,rptr,gptr,bptr,w,h,idx); 00135 } 00136 00137 00138 // Actual CUDA Implementation, set up as a __device__ function to allow it to be called 00139 // from other CUDA functions 00140 __device__ void cuda_device_luminance(const float3_t *aptr, float *dptr, const int w, const int h, const int idx, const int secidx) 00141 { 00142 if(idx < w*h) 00143 dptr[idx] = (aptr[idx].p[0] + aptr[idx].p[1] + aptr[idx].p[2])/3.0F; 00144 // Second index is used for a border pixel if we are layering filters together 00145 // For calls not using this layer, it is a 1 boolean comparision charge 00146 // For calls using this, however, it allows simple/complex filters to be stacked (as long as the array size doesn't change) 00147 if(secidx >= 0 && secidx < w*h) 00148 dptr[secidx] = (aptr[secidx].p[0] + aptr[secidx].p[1] + aptr[secidx].p[2])/3.0F; 00149 } 00150 00151 00152 // Wrap the device function in a global wrapper so it is also callable from the host 00153 __global__ void cuda_global_luminance(const float3_t *aptr, float *dptr, const int w, const int h, const int tile_width, const int tile_height) 00154 { 00155 // Optimization, as this will be frequently calculated across many functions, why don't we just pass it along? 00156 const int idx = blockIdx.y*tile_height*w + threadIdx.y*w + blockIdx.x*tile_width + threadIdx.x; 00157 // Secidx is set to -1 because we are not doing a border with this direct call 00158 cuda_device_luminance(aptr,dptr,w,h,idx,-1); 00159 } 00160 00161 00162 // Actual CUDA Implementation, set up as a __device__ function to allow it to be called 00163 // from other CUDA functions 00164 __device__ void cuda_device_luminanceNTSC(const float3_t *aptr, float *dptr, const int w, const int h, const int idx, const int secidx) 00165 { 00166 //Taken from Matlab's rgb2gray() function 00167 // T = inv([1.0 0.956 0.621; 1.0 -0.272 -0.647; 1.0 -1.106 1.703]); 00168 // coef = T(1,:)'; 00169 const float coef1 = 0.298936F,coef2 = 0.587043F, coef3 = 0.114021F;; 00170 if(idx < w*h) 00171 dptr[idx] = roundf(aptr[idx].p[0]*coef1 + aptr[idx].p[1]*coef2 + aptr[idx].p[2]*coef3); 00172 // Second index is used for a border pixel if we are layering filters together 00173 // For calls not using this layer, it is a 1 boolean comparision charge 00174 // For calls using this, however, it allows simple/complex filters to be stacked (as long as the array size doesn't change) 00175 if(secidx >= 0 && secidx < w*h) 00176 dptr[secidx] = roundf(aptr[secidx].p[0]*coef1 + aptr[secidx].p[1]*coef2 + aptr[secidx].p[2]*coef3); 00177 } 00178 00179 00180 // Wrap the device function in a global wrapper so it is also callable from the host 00181 __global__ void cuda_global_luminanceNTSC(const float3_t *aptr, float *dptr, const int w, const int h, const int tile_width, const int tile_height) 00182 { 00183 // Optimization, as this will be frequently calculated across many functions, why don't we just pass it along? 00184 const int idx = blockIdx.y*tile_height*w + threadIdx.y*w + blockIdx.x*tile_width + threadIdx.x; 00185 // Secidx is set to -1 because we are not doing a border with this direct call 00186 cuda_device_luminanceNTSC(aptr,dptr,w,h,idx,-1); 00187 } 00188 00189 #endif