00001 /*!@file CUDA/CudaLowPass.C C++ wrapper for CUDA Low pass operations */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/CudaLowPass.C $ 00035 // $Id: CudaLowPass.C 12962 2010-03-06 02:13:53Z irock $ 00036 // 00037 00038 #include "CUDA/CudaImage.H" 00039 #include "CUDA/CudaDevices.H" 00040 #include "CUDA/CudaLowPass.H" 00041 #include "wrap_c_cuda.h" 00042 00043 00044 CudaImage<float> cudaLowPass5xDec(const CudaImage<float>&src) 00045 { 00046 // Ensure that the data is valid 00047 ASSERT(src.initialized()); 00048 // Ensure that we are on a CUDA device 00049 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00050 00051 const int dev = src.getMemoryDevice(); 00052 // Set up output image memory 00053 00054 CudaImage<float> result = CudaImage<float>(src.getWidth()/2, src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev); 00055 00056 const Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00057 00058 cuda_c_lowpass_5_x_dec_x(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w()); 00059 00060 return result; 00061 } 00062 00063 00064 CudaImage<float> cudaLowPass5yDec(const CudaImage<float>&src) 00065 { 00066 // Ensure that the data is valid 00067 ASSERT(src.initialized()); 00068 // Ensure that we are on a CUDA device 00069 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00070 00071 const int dev = src.getMemoryDevice(); 00072 // Set up output image memory 00073 00074 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight()/2, NO_INIT, src.getMemoryPolicy(), dev); 00075 00076 const Dims tile = CudaDevices::getDeviceTileSize(dev); 00077 00078 cuda_c_lowpass_5_y_dec_y(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w(),tile.h()); 00079 return result; 00080 } 00081 00082 CudaImage<float> cudaLowPass5Dec(const CudaImage<float>& src, const bool go_x, const bool go_y) 00083 { 00084 CudaImage<float> result = src; 00085 if(go_x) result = cudaLowPass5xDec(result); 00086 if(go_y) result = cudaLowPass5yDec(result); 00087 return result; 00088 } 00089 00090 CudaImage<float> cudaLowPass9xDec(const CudaImage<float>&src) 00091 { 00092 // Ensure that the data is valid 00093 ASSERT(src.initialized()); 00094 // Ensure that we are on a CUDA device 00095 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00096 00097 const int dev = src.getMemoryDevice(); 00098 // Set up output image memory 00099 00100 const int rw = src.getWidth()/2; 00101 const int rh = src.getHeight(); 00102 00103 CudaImage<float> result = CudaImage<float>(rw,rh, NO_INIT, src.getMemoryPolicy(), dev); 00104 00105 const Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00106 00107 cuda_c_lowpass_9_x_dec_x(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),rw,rh,tile.w()); 00108 00109 return result; 00110 } 00111 00112 00113 CudaImage<float> cudaLowPass9yDec(const CudaImage<float>&src) 00114 { 00115 // Ensure that the data is valid 00116 ASSERT(src.initialized()); 00117 // Ensure that we are on a CUDA device 00118 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00119 00120 const int dev = src.getMemoryDevice(); 00121 // Set up output image memory 00122 00123 const int rw = src.getWidth(); 00124 const int rh = src.getHeight()/2; 00125 00126 CudaImage<float> result = CudaImage<float>(rw,rh, NO_INIT, src.getMemoryPolicy(), dev); 00127 00128 const Dims tile = CudaDevices::getDeviceTileSize(dev); 00129 00130 cuda_c_lowpass_9_y_dec_y(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),rw,rh,tile.w(),tile.h()); 00131 return result; 00132 } 00133 00134 CudaImage<float> cudaLowPass9Dec(const CudaImage<float>& src, const bool go_x, const bool go_y) 00135 { 00136 CudaImage<float> result = src; 00137 if(go_x) result = cudaLowPass9xDec(result); 00138 if(go_y) result = cudaLowPass9yDec(result); 00139 return result; 00140 } 00141 00142 00143 00144 CudaImage<float> cudaLowPass9x(const CudaImage<float>&src) 00145 { 00146 // Ensure that the data is valid 00147 ASSERT(src.initialized()); 00148 // Ensure that we are on a CUDA device 00149 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00150 00151 const int dev = src.getMemoryDevice(); 00152 // Set up output image memory 00153 00154 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev); 00155 00156 const Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00157 00158 cuda_c_lowpass_9_x(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w()); 00159 00160 return result; 00161 } 00162 00163 00164 CudaImage<float> cudaLowPass9xyDec(const CudaImage<float>&src) 00165 { 00166 // Ensure that the data is valid 00167 ASSERT(src.initialized()); 00168 // Ensure that we are on a CUDA device 00169 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00170 00171 const int dev = src.getMemoryDevice(); 00172 // Set up output image memory 00173 const int rw = src.getWidth()/2; 00174 const int rh = src.getHeight()/2; 00175 CudaImage<float> tmp = CudaImage<float>(rw, src.getHeight(), ZEROS, src.getMemoryPolicy(), dev); 00176 00177 const Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00178 cuda_c_lowpass_texture_9_x_dec_x(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),tmp.getCudaArrayPtr(),rw,src.getHeight(),tile.sz(),1); 00179 00180 CudaImage<float> res = CudaImage<float>(rw, rh, ZEROS, src.getMemoryPolicy(), dev); 00181 cuda_c_lowpass_texture_9_y_dec_y(tmp.getCudaArrayPtr(),tmp.getWidth(),tmp.getHeight(),res.getCudaArrayPtr(),rw,rh,1,tile.sz()); 00182 return res; 00183 } 00184 00185 00186 00187 CudaImage<float> cudaLowPass9y(const CudaImage<float>&src) 00188 { 00189 // Ensure that the data is valid 00190 ASSERT(src.initialized()); 00191 // Ensure that we are on a CUDA device 00192 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00193 00194 const int dev = src.getMemoryDevice(); 00195 // Set up output image memory 00196 00197 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev); 00198 00199 const Dims tile = CudaDevices::getDeviceTileSize(dev); 00200 00201 cuda_c_lowpass_9_y(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w(),tile.h()); 00202 return result; 00203 } 00204 00205 CudaImage<float> cudaLowPass5x(const CudaImage<float>&src) 00206 { 00207 // Ensure that the data is valid 00208 ASSERT(src.initialized()); 00209 // Ensure that we are on a CUDA device 00210 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00211 00212 const int dev = src.getMemoryDevice(); 00213 // Set up output image memory 00214 00215 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev); 00216 00217 const Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00218 00219 cuda_c_lowpass_5_x(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w()); 00220 00221 return result; 00222 } 00223 00224 00225 CudaImage<float> cudaLowPass5y(const CudaImage<float>&src) 00226 { 00227 // Ensure that the data is valid 00228 ASSERT(src.initialized()); 00229 // Ensure that we are on a CUDA device 00230 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00231 00232 const int dev = src.getMemoryDevice(); 00233 // Set up output image memory 00234 00235 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev); 00236 00237 const Dims tile = CudaDevices::getDeviceTileSize(dev); 00238 00239 cuda_c_lowpass_5_y(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w(),tile.h()); 00240 return result; 00241 } 00242 00243 CudaImage<float> cudaLowPass3x(const CudaImage<float>&src) 00244 { 00245 // Ensure that the data is valid 00246 ASSERT(src.initialized()); 00247 // Ensure that we are on a CUDA device 00248 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00249 00250 const int dev = src.getMemoryDevice(); 00251 // Set up output image memory 00252 00253 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev); 00254 00255 const Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00256 00257 cuda_c_lowpass_3_x(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w()); 00258 00259 return result; 00260 } 00261 00262 00263 CudaImage<float> cudaLowPass3y(const CudaImage<float>&src) 00264 { 00265 // Ensure that the data is valid 00266 ASSERT(src.initialized()); 00267 // Ensure that we are on a CUDA device 00268 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00269 00270 const int dev = src.getMemoryDevice(); 00271 // Set up output image memory 00272 00273 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev); 00274 00275 const Dims tile = CudaDevices::getDeviceTileSize(dev); 00276 00277 cuda_c_lowpass_3_y(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w(),tile.h()); 00278 return result; 00279 } 00280 00281 CudaImage<float> cudaLowPass9(const CudaImage<float>& src, const bool go_x, const bool go_y) 00282 { 00283 CudaImage<float> result = src; 00284 if(go_x) result = cudaLowPass9x(result); 00285 if(go_y) result = cudaLowPass9y(result); 00286 return result; 00287 } 00288 00289 // ###################################################################### 00290 CudaImage<float> cudaLowPass(const int N, const CudaImage<float>& src, const bool go_x, const bool go_y) 00291 { 00292 CudaImage<float> result = src; 00293 if (go_x) result = cudaLowPassX(N,result); 00294 if (go_y) result = cudaLowPassY(N,result); 00295 return result; 00296 } 00297 00298 // ###################################################################### 00299 CudaImage<float> cudaLowPassX(const int N, const CudaImage<float>& src) 00300 { 00301 switch (N) 00302 { 00303 case 3: return cudaLowPass3x(src); 00304 case 5: return cudaLowPass5x(src); 00305 case 9: return cudaLowPass9x(src); 00306 default: 00307 LERROR("Only 3,5, and 9 tap kernels implemented"); 00308 return CudaImage<float>(); 00309 break; 00310 } 00311 // const Image<float> kern = binomialKernel(N); 00312 // ASSERT(kern.getWidth() == N); 00313 // ASSERT(kern.getHeight() == 1); 00314 // return sepFilter(src, kern.getArrayPtr(), NULL, N, 0, 00315 // CONV_BOUNDARY_CLEAN); 00316 } 00317 00318 // ###################################################################### 00319 CudaImage<float> cudaLowPassY(const int N, const CudaImage<float>& src) 00320 { 00321 switch (N) 00322 { 00323 case 3: return cudaLowPass3y(src); 00324 case 5: return cudaLowPass5y(src); 00325 case 9: return cudaLowPass9y(src); 00326 default: 00327 LERROR("Only 3,5, and 9 tap kernels implemented"); 00328 return CudaImage<float>(); 00329 break; 00330 } 00331 00332 // const Image<float> kern = binomialKernel(N); 00333 // ASSERT(kern.getWidth() == N); 00334 // ASSERT(kern.getHeight() == 1); 00335 // return sepFilter(src, NULL, kern.getArrayPtr(), 0, N, 00336 // CONV_BOUNDARY_CLEAN); 00337 }