CudaShapeOps.C

00001 /*!@file CUDA/CudaMathOps.C C++ wrapper for CUDA Math operations */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file:
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDA/CudaShapeOps.C $
00035 // $Id: CudaShapeOps.C 12962 2010-03-06 02:13:53Z irock $
00036 //
00037 
00038 #include "CUDA/CudaImage.H"
00039 #include "Util/Assert.H"
00040 #include "CUDA/cudadefs.h"
00041 #include "CudaShapeOps.H"
00042 #include "CUDA/CudaLowPass.H"
00043 #include "CudaDevices.H"
00044 #include "wrap_c_cuda.h"
00045 
00046 #include <cmath>
00047 
00048 // ######################################################################
00049 CudaImage<float> cudaQuickLocalAvg(const CudaImage<float>& array, const int scale)
00050 {
00051   const MemoryPolicy mp = array.getMemoryPolicy();
00052   const int dev = array.getMemoryDevice();
00053   ASSERT(array.initialized());
00054   ASSERT(mp != HOST_MEMORY);
00055   int lw = array.getWidth(), lh = array.getHeight();
00056   int sw = std::max(1, lw / scale), sh = std::max(1, lh / scale);
00057 
00058   Dims tile = CudaDevices::getDeviceTileSize(dev);
00059 
00060   CudaImage<float> result(sw, sh, NO_INIT, mp, dev);
00061 
00062   float fac = 1.0f / float(scale * scale);
00063 
00064   cuda_c_quickLocalAvg(array.getCudaArrayPtr(),result.getCudaArrayPtr(),fac,lw,lh,sw,sh,tile.w(),tile.h());
00065 
00066   return result;
00067 
00068 }
00069 
00070 // ######################################################################
00071 CudaImage<float> cudaQuickLocalAvg2x2(const CudaImage<float>& array)
00072 {
00073   const MemoryPolicy mp = array.getMemoryPolicy();
00074   const int dev = array.getMemoryDevice();
00075   ASSERT(array.initialized());
00076   ASSERT(mp != HOST_MEMORY);
00077 
00078   int lw = array.getWidth(), lh = array.getHeight();
00079   int sw = lw / 2, sh = lh / 2;
00080 
00081   // Just do default averaging if this is smaller than 2 along a side
00082   if(lw < 2 || lh < 2)
00083     return cudaQuickLocalAvg(array,2);
00084 
00085   Dims tile = CudaDevices::getDeviceTileSize(dev);
00086 
00087   CudaImage<float> result(sw, sh, NO_INIT, mp, dev);
00088 
00089   cuda_c_quickLocalAvg2x2(array.getCudaArrayPtr(),result.getCudaArrayPtr(),lw,lh,sw,sh,tile.w(),tile.h());
00090   return result;
00091 }
00092 
00093 CudaImage<float> cudaQuickLocalMax(const CudaImage<float>& array, const int scale)
00094 {
00095 
00096   ASSERT(array.initialized());
00097   int lw = array.getWidth(), lh = array.getHeight();
00098   int sw = std::max(1, lw / scale), sh = std::max(1, lh / scale);
00099 
00100   const MemoryPolicy mp = array.getMemoryPolicy();
00101   const int dev = array.getMemoryDevice();
00102   Dims tile = CudaDevices::getDeviceTileSize(dev);
00103   CudaImage<float> result(sw, sh, NO_INIT,mp,dev);
00104 
00105   cuda_c_quickLocalMax(array.getCudaArrayPtr(),result.getCudaArrayPtr(),lw,lh,sw,sh,tile.w(),tile.h());
00106   return result;
00107 }
00108 
00109 
00110 CudaImage<float> cudaDecXY(const CudaImage<float>& src, const int xfactor, const int yfactor_raw)
00111 {
00112   // Ensure that the data is valid
00113   ASSERT(src.initialized());
00114   // Ensure that we are on a CUDA device
00115   ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00116 
00117   const int yfactor = yfactor_raw >= 0 ? yfactor_raw : xfactor;
00118 
00119   const int dev = src.getMemoryDevice();
00120   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00121 
00122   const int w = src.getWidth();
00123   const int h = src.getHeight();
00124   // Set up output image memory
00125   CudaImage<float> res = CudaImage<float>(Dims(w/xfactor,h/yfactor), NO_INIT, src.getMemoryPolicy(), dev);
00126 
00127   // Call CUDA implementation
00128   cuda_c_dec_xy(src.getCudaArrayPtr(), res.getCudaArrayPtr(), xfactor, yfactor, w, h, tile.sz());
00129   return res;
00130 }
00131 
00132 CudaImage<float> cudaDecX(const CudaImage<float>& src, const int xfactor)
00133 {
00134   // Ensure that the data is valid
00135   ASSERT(src.initialized());
00136   // Ensure that we are on a CUDA device
00137   ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00138 
00139   const int dev = src.getMemoryDevice();
00140   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00141 
00142   const int w = src.getWidth();
00143   const int h = src.getHeight();
00144   // Set up output image memory
00145   CudaImage<float> res = CudaImage<float>(Dims(w/xfactor,h), NO_INIT, src.getMemoryPolicy(), dev);
00146 
00147   // Call CUDA implementation
00148   cuda_c_dec_x(src.getCudaArrayPtr(), res.getCudaArrayPtr(), xfactor, w, h, tile.sz());
00149   return res;
00150 }
00151 
00152 CudaImage<float> cudaDecY(const CudaImage<float>& src, const int yfactor)
00153 {
00154   // Ensure that the data is valid
00155   ASSERT(src.initialized());
00156   // Ensure that we are on a CUDA device
00157   ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00158 
00159   const int dev = src.getMemoryDevice();
00160   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00161 
00162   const int w = src.getWidth();
00163   const int h = src.getHeight();
00164   // Set up output image memory
00165   CudaImage<float> res = CudaImage<float>(Dims(w,h/yfactor), NO_INIT, src.getMemoryPolicy(), dev);
00166 
00167   // Call CUDA implementation
00168   cuda_c_dec_y(src.getCudaArrayPtr(), res.getCudaArrayPtr(), yfactor, w, h, tile.sz());
00169   return res;
00170 }
00171 
00172 // ######################################################################
00173 CudaImage<float> cudaDownSize(const CudaImage<float>& src, const Dims& dims,
00174                   const int filterWidth)
00175 {
00176   return cudaDownSize(src, dims.w(), dims.h(), filterWidth);
00177 }
00178 
00179 // ######################################################################
00180 CudaImage<float> cudaDownSize(const CudaImage<float>& src, const int new_w, const int new_h,
00181                   const int filterWidth)
00182 {
00183 
00184   if (src.getWidth() == new_w && src.getHeight() == new_h) return src;
00185 
00186   ASSERT(src.getWidth() / new_w > 1 && src.getHeight() / new_h > 1);
00187 
00188   const int wdepth = int(0.5+log(double(src.getWidth() / new_w)) / M_LN2);
00189   const int hdepth = int(0.5+log(double(src.getHeight() / new_h)) / M_LN2);
00190 
00191   if (wdepth != hdepth)
00192     LFATAL("arrays must have same proportions");
00193 
00194   CudaImage<float> result = src;
00195   for (int i = 0; i < wdepth; ++i)
00196     {
00197       switch(filterWidth)
00198         {
00199         case 5:
00200           result = cudaLowPass5Dec(result,true,true);
00201           break;
00202         case 9:
00203           result = cudaLowPass9Dec(result,true,true);
00204           break;
00205         default:
00206           result = cudaDecX(cudaLowPassX(filterWidth, result));
00207           result = cudaDecY(cudaLowPassY(filterWidth, result));
00208           break;
00209         }
00210     }
00211   return result;
00212 }
00213 
00214 // ######################################################################
00215 CudaImage<float> cudaDownSizeClean(const CudaImage<float>& src, const Dims& new_dims,
00216                            const int filterWidth)
00217 {
00218 
00219   if (src.getDims() == new_dims) return src;
00220 
00221   ASSERT(new_dims.isNonEmpty());
00222   ASSERT(filterWidth >= 1);
00223 
00224   CudaImage<float> result = src;
00225 
00226   while (result.getWidth() > new_dims.w() * 2 &&
00227          result.getHeight() > new_dims.h() * 2)
00228     {
00229       if (filterWidth == 1)
00230         {
00231           result = cudaDecX(result);
00232           result = cudaDecY(result);
00233         }
00234       else if (filterWidth == 2)
00235         {
00236           result = cudaQuickLocalAvg2x2(result);
00237         }
00238       else
00239         {
00240           result = cudaDecX(cudaLowPassX(filterWidth, result));
00241           result = cudaDecY(cudaLowPassY(filterWidth, result));
00242         }
00243     }
00244 
00245   return cudaRescaleBilinear(result, new_dims);
00246 }
00247 
00248 
00249 // ######################################################################
00250 template <class T> CudaImage<T> cudaRescaleBilinear(const CudaImage<T>& src, const Dims& dims)
00251 {
00252   return cudaRescaleBilinear(src, dims.w(), dims.h());
00253 }
00254 
00255 // ######################################################################
00256 CudaImage<float> cudaRescaleBilinear(const CudaImage<float>& src, const int new_w, const int new_h)
00257 {
00258 
00259   const int dev = src.getMemoryDevice();
00260   const MemoryPolicy mp = src.getMemoryPolicy();
00261   ASSERT(src.initialized()); ASSERT(new_w > 0 && new_h > 0);
00262   ASSERT(mp != HOST_MEMORY);
00263   const int orig_w = src.getWidth();
00264   const int orig_h = src.getHeight();
00265 
00266   // check if same size already
00267   if (new_w == orig_w && new_h == orig_h) return src;
00268 
00269   const float sw = float(orig_w) / float(new_w);
00270   const float sh = float(orig_h) / float(new_h);
00271 
00272   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00273   CudaImage<float> result(new_w, new_h, NO_INIT, mp, dev);
00274   cuda_c_rescaleBilinear(src.getCudaArrayPtr(),result.getCudaArrayPtr(),sw,sh,orig_w,orig_h,new_w,new_h,tile.w(),tile.h());
00275   return result;
00276 }
00277 
00278 // ######################################################################
00279 CudaImage<PixRGB<float> > cudaRescaleBilinear(const CudaImage<PixRGB<float> >& src, const int new_w, const int new_h)
00280 {
00281 
00282   const int dev = src.getMemoryDevice();
00283   const MemoryPolicy mp = src.getMemoryPolicy();
00284   ASSERT(src.initialized()); ASSERT(new_w > 0 && new_h > 0);
00285   ASSERT(mp != HOST_MEMORY);
00286   const int orig_w = src.getWidth();
00287   const int orig_h = src.getHeight();
00288 
00289   // check if same size already
00290   if (new_w == orig_w && new_h == orig_h) return src;
00291 
00292   const float sw = float(orig_w) / float(new_w);
00293   const float sh = float(orig_h) / float(new_h);
00294 
00295   Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00296   CudaImage<PixRGB<float> > result(new_w, new_h, NO_INIT, mp, dev);
00297   cuda_c_rescaleBilinearRGB((float3_t *)src.getCudaArrayPtr(),(float3_t *)result.getCudaArrayPtr(),sw,sh,orig_w,orig_h,new_w,new_h,tile.w(),tile.h());
00298   return result;
00299 }
00300 
00301 
00302 // ######################################################################
00303 template <class T> CudaImage<T> cudaRescale(const CudaImage<T>& src, const Dims& newdims,
00304                  RescaleType ftype)
00305 {
00306   switch (ftype)
00307     {
00308     case RESCALE_SIMPLE_BILINEAR: return cudaRescaleBilinear(src, newdims);
00309     default: LFATAL("unhandled ftype '%c'", ftype);
00310     }
00311   ASSERT(0);
00312   /* never reached */ return CudaImage<T>();
00313 }
00314 
00315 // ######################################################################
00316 template <class T> CudaImage<T> cudaRescale(const CudaImage<T>& src, const int width, const int height,
00317                  RescaleType ftype)
00318 {
00319   return cudaRescale(src, Dims(width, height), ftype);
00320 }
00321 
00322 // Explicit template instantiations
00323 template CudaImage<float> cudaRescale(const CudaImage<float>& src, const Dims& newdims,
00324                              RescaleType ftype = RESCALE_SIMPLE_BILINEAR);
00325 template CudaImage<float> cudaRescale(const CudaImage<float>& src, const int width, const int height,
00326                              RescaleType ftype = RESCALE_SIMPLE_BILINEAR);
00327 template CudaImage<float> cudaRescaleBilinear(const CudaImage<float>& src, const Dims& dims);
00328 template CudaImage<PixRGB<float> > cudaRescale(const CudaImage<PixRGB<float> >& src, const Dims& newdims,
00329                              RescaleType ftype = RESCALE_SIMPLE_BILINEAR);
00330 template CudaImage<PixRGB<float> > cudaRescale(const CudaImage<PixRGB<float> >& src, const int width, const int height,
00331                              RescaleType ftype = RESCALE_SIMPLE_BILINEAR);
00332 template CudaImage<PixRGB<float> > cudaRescaleBilinear(const CudaImage<PixRGB<float> >& src, const Dims& dims);