00001 /*!@file CUDA/CudaMathOps.C C++ wrapper for CUDA Math operations */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/CudaShapeOps.C $ 00035 // $Id: CudaShapeOps.C 12962 2010-03-06 02:13:53Z irock $ 00036 // 00037 00038 #include "CUDA/CudaImage.H" 00039 #include "Util/Assert.H" 00040 #include "CUDA/cudadefs.h" 00041 #include "CudaShapeOps.H" 00042 #include "CUDA/CudaLowPass.H" 00043 #include "CudaDevices.H" 00044 #include "wrap_c_cuda.h" 00045 00046 #include <cmath> 00047 00048 // ###################################################################### 00049 CudaImage<float> cudaQuickLocalAvg(const CudaImage<float>& array, const int scale) 00050 { 00051 const MemoryPolicy mp = array.getMemoryPolicy(); 00052 const int dev = array.getMemoryDevice(); 00053 ASSERT(array.initialized()); 00054 ASSERT(mp != HOST_MEMORY); 00055 int lw = array.getWidth(), lh = array.getHeight(); 00056 int sw = std::max(1, lw / scale), sh = std::max(1, lh / scale); 00057 00058 Dims tile = CudaDevices::getDeviceTileSize(dev); 00059 00060 CudaImage<float> result(sw, sh, NO_INIT, mp, dev); 00061 00062 float fac = 1.0f / float(scale * scale); 00063 00064 cuda_c_quickLocalAvg(array.getCudaArrayPtr(),result.getCudaArrayPtr(),fac,lw,lh,sw,sh,tile.w(),tile.h()); 00065 00066 return result; 00067 00068 } 00069 00070 // ###################################################################### 00071 CudaImage<float> cudaQuickLocalAvg2x2(const CudaImage<float>& array) 00072 { 00073 const MemoryPolicy mp = array.getMemoryPolicy(); 00074 const int dev = array.getMemoryDevice(); 00075 ASSERT(array.initialized()); 00076 ASSERT(mp != HOST_MEMORY); 00077 00078 int lw = array.getWidth(), lh = array.getHeight(); 00079 int sw = lw / 2, sh = lh / 2; 00080 00081 // Just do default averaging if this is smaller than 2 along a side 00082 if(lw < 2 || lh < 2) 00083 return cudaQuickLocalAvg(array,2); 00084 00085 Dims tile = CudaDevices::getDeviceTileSize(dev); 00086 00087 CudaImage<float> result(sw, sh, NO_INIT, mp, dev); 00088 00089 cuda_c_quickLocalAvg2x2(array.getCudaArrayPtr(),result.getCudaArrayPtr(),lw,lh,sw,sh,tile.w(),tile.h()); 00090 return result; 00091 } 00092 00093 CudaImage<float> cudaQuickLocalMax(const CudaImage<float>& array, const int scale) 00094 { 00095 00096 ASSERT(array.initialized()); 00097 int lw = array.getWidth(), lh = array.getHeight(); 00098 int sw = std::max(1, lw / scale), sh = std::max(1, lh / scale); 00099 00100 const MemoryPolicy mp = array.getMemoryPolicy(); 00101 const int dev = array.getMemoryDevice(); 00102 Dims tile = CudaDevices::getDeviceTileSize(dev); 00103 CudaImage<float> result(sw, sh, NO_INIT,mp,dev); 00104 00105 cuda_c_quickLocalMax(array.getCudaArrayPtr(),result.getCudaArrayPtr(),lw,lh,sw,sh,tile.w(),tile.h()); 00106 return result; 00107 } 00108 00109 00110 CudaImage<float> cudaDecXY(const CudaImage<float>& src, const int xfactor, const int yfactor_raw) 00111 { 00112 // Ensure that the data is valid 00113 ASSERT(src.initialized()); 00114 // Ensure that we are on a CUDA device 00115 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00116 00117 const int yfactor = yfactor_raw >= 0 ? yfactor_raw : xfactor; 00118 00119 const int dev = src.getMemoryDevice(); 00120 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00121 00122 const int w = src.getWidth(); 00123 const int h = src.getHeight(); 00124 // Set up output image memory 00125 CudaImage<float> res = CudaImage<float>(Dims(w/xfactor,h/yfactor), NO_INIT, src.getMemoryPolicy(), dev); 00126 00127 // Call CUDA implementation 00128 cuda_c_dec_xy(src.getCudaArrayPtr(), res.getCudaArrayPtr(), xfactor, yfactor, w, h, tile.sz()); 00129 return res; 00130 } 00131 00132 CudaImage<float> cudaDecX(const CudaImage<float>& src, const int xfactor) 00133 { 00134 // Ensure that the data is valid 00135 ASSERT(src.initialized()); 00136 // Ensure that we are on a CUDA device 00137 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00138 00139 const int dev = src.getMemoryDevice(); 00140 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00141 00142 const int w = src.getWidth(); 00143 const int h = src.getHeight(); 00144 // Set up output image memory 00145 CudaImage<float> res = CudaImage<float>(Dims(w/xfactor,h), NO_INIT, src.getMemoryPolicy(), dev); 00146 00147 // Call CUDA implementation 00148 cuda_c_dec_x(src.getCudaArrayPtr(), res.getCudaArrayPtr(), xfactor, w, h, tile.sz()); 00149 return res; 00150 } 00151 00152 CudaImage<float> cudaDecY(const CudaImage<float>& src, const int yfactor) 00153 { 00154 // Ensure that the data is valid 00155 ASSERT(src.initialized()); 00156 // Ensure that we are on a CUDA device 00157 ASSERT(src.getMemoryPolicy() != HOST_MEMORY); 00158 00159 const int dev = src.getMemoryDevice(); 00160 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00161 00162 const int w = src.getWidth(); 00163 const int h = src.getHeight(); 00164 // Set up output image memory 00165 CudaImage<float> res = CudaImage<float>(Dims(w,h/yfactor), NO_INIT, src.getMemoryPolicy(), dev); 00166 00167 // Call CUDA implementation 00168 cuda_c_dec_y(src.getCudaArrayPtr(), res.getCudaArrayPtr(), yfactor, w, h, tile.sz()); 00169 return res; 00170 } 00171 00172 // ###################################################################### 00173 CudaImage<float> cudaDownSize(const CudaImage<float>& src, const Dims& dims, 00174 const int filterWidth) 00175 { 00176 return cudaDownSize(src, dims.w(), dims.h(), filterWidth); 00177 } 00178 00179 // ###################################################################### 00180 CudaImage<float> cudaDownSize(const CudaImage<float>& src, const int new_w, const int new_h, 00181 const int filterWidth) 00182 { 00183 00184 if (src.getWidth() == new_w && src.getHeight() == new_h) return src; 00185 00186 ASSERT(src.getWidth() / new_w > 1 && src.getHeight() / new_h > 1); 00187 00188 const int wdepth = int(0.5+log(double(src.getWidth() / new_w)) / M_LN2); 00189 const int hdepth = int(0.5+log(double(src.getHeight() / new_h)) / M_LN2); 00190 00191 if (wdepth != hdepth) 00192 LFATAL("arrays must have same proportions"); 00193 00194 CudaImage<float> result = src; 00195 for (int i = 0; i < wdepth; ++i) 00196 { 00197 switch(filterWidth) 00198 { 00199 case 5: 00200 result = cudaLowPass5Dec(result,true,true); 00201 break; 00202 case 9: 00203 result = cudaLowPass9Dec(result,true,true); 00204 break; 00205 default: 00206 result = cudaDecX(cudaLowPassX(filterWidth, result)); 00207 result = cudaDecY(cudaLowPassY(filterWidth, result)); 00208 break; 00209 } 00210 } 00211 return result; 00212 } 00213 00214 // ###################################################################### 00215 CudaImage<float> cudaDownSizeClean(const CudaImage<float>& src, const Dims& new_dims, 00216 const int filterWidth) 00217 { 00218 00219 if (src.getDims() == new_dims) return src; 00220 00221 ASSERT(new_dims.isNonEmpty()); 00222 ASSERT(filterWidth >= 1); 00223 00224 CudaImage<float> result = src; 00225 00226 while (result.getWidth() > new_dims.w() * 2 && 00227 result.getHeight() > new_dims.h() * 2) 00228 { 00229 if (filterWidth == 1) 00230 { 00231 result = cudaDecX(result); 00232 result = cudaDecY(result); 00233 } 00234 else if (filterWidth == 2) 00235 { 00236 result = cudaQuickLocalAvg2x2(result); 00237 } 00238 else 00239 { 00240 result = cudaDecX(cudaLowPassX(filterWidth, result)); 00241 result = cudaDecY(cudaLowPassY(filterWidth, result)); 00242 } 00243 } 00244 00245 return cudaRescaleBilinear(result, new_dims); 00246 } 00247 00248 00249 // ###################################################################### 00250 template <class T> CudaImage<T> cudaRescaleBilinear(const CudaImage<T>& src, const Dims& dims) 00251 { 00252 return cudaRescaleBilinear(src, dims.w(), dims.h()); 00253 } 00254 00255 // ###################################################################### 00256 CudaImage<float> cudaRescaleBilinear(const CudaImage<float>& src, const int new_w, const int new_h) 00257 { 00258 00259 const int dev = src.getMemoryDevice(); 00260 const MemoryPolicy mp = src.getMemoryPolicy(); 00261 ASSERT(src.initialized()); ASSERT(new_w > 0 && new_h > 0); 00262 ASSERT(mp != HOST_MEMORY); 00263 const int orig_w = src.getWidth(); 00264 const int orig_h = src.getHeight(); 00265 00266 // check if same size already 00267 if (new_w == orig_w && new_h == orig_h) return src; 00268 00269 const float sw = float(orig_w) / float(new_w); 00270 const float sh = float(orig_h) / float(new_h); 00271 00272 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00273 CudaImage<float> result(new_w, new_h, NO_INIT, mp, dev); 00274 cuda_c_rescaleBilinear(src.getCudaArrayPtr(),result.getCudaArrayPtr(),sw,sh,orig_w,orig_h,new_w,new_h,tile.w(),tile.h()); 00275 return result; 00276 } 00277 00278 // ###################################################################### 00279 CudaImage<PixRGB<float> > cudaRescaleBilinear(const CudaImage<PixRGB<float> >& src, const int new_w, const int new_h) 00280 { 00281 00282 const int dev = src.getMemoryDevice(); 00283 const MemoryPolicy mp = src.getMemoryPolicy(); 00284 ASSERT(src.initialized()); ASSERT(new_w > 0 && new_h > 0); 00285 ASSERT(mp != HOST_MEMORY); 00286 const int orig_w = src.getWidth(); 00287 const int orig_h = src.getHeight(); 00288 00289 // check if same size already 00290 if (new_w == orig_w && new_h == orig_h) return src; 00291 00292 const float sw = float(orig_w) / float(new_w); 00293 const float sh = float(orig_h) / float(new_h); 00294 00295 Dims tile = CudaDevices::getDeviceTileSize1D(dev); 00296 CudaImage<PixRGB<float> > result(new_w, new_h, NO_INIT, mp, dev); 00297 cuda_c_rescaleBilinearRGB((float3_t *)src.getCudaArrayPtr(),(float3_t *)result.getCudaArrayPtr(),sw,sh,orig_w,orig_h,new_w,new_h,tile.w(),tile.h()); 00298 return result; 00299 } 00300 00301 00302 // ###################################################################### 00303 template <class T> CudaImage<T> cudaRescale(const CudaImage<T>& src, const Dims& newdims, 00304 RescaleType ftype) 00305 { 00306 switch (ftype) 00307 { 00308 case RESCALE_SIMPLE_BILINEAR: return cudaRescaleBilinear(src, newdims); 00309 default: LFATAL("unhandled ftype '%c'", ftype); 00310 } 00311 ASSERT(0); 00312 /* never reached */ return CudaImage<T>(); 00313 } 00314 00315 // ###################################################################### 00316 template <class T> CudaImage<T> cudaRescale(const CudaImage<T>& src, const int width, const int height, 00317 RescaleType ftype) 00318 { 00319 return cudaRescale(src, Dims(width, height), ftype); 00320 } 00321 00322 // Explicit template instantiations 00323 template CudaImage<float> cudaRescale(const CudaImage<float>& src, const Dims& newdims, 00324 RescaleType ftype = RESCALE_SIMPLE_BILINEAR); 00325 template CudaImage<float> cudaRescale(const CudaImage<float>& src, const int width, const int height, 00326 RescaleType ftype = RESCALE_SIMPLE_BILINEAR); 00327 template CudaImage<float> cudaRescaleBilinear(const CudaImage<float>& src, const Dims& dims); 00328 template CudaImage<PixRGB<float> > cudaRescale(const CudaImage<PixRGB<float> >& src, const Dims& newdims, 00329 RescaleType ftype = RESCALE_SIMPLE_BILINEAR); 00330 template CudaImage<PixRGB<float> > cudaRescale(const CudaImage<PixRGB<float> >& src, const int width, const int height, 00331 RescaleType ftype = RESCALE_SIMPLE_BILINEAR); 00332 template CudaImage<PixRGB<float> > cudaRescaleBilinear(const CudaImage<PixRGB<float> >& src, const Dims& dims);