00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "CUDA/CudaImage.H"
00039 #include "Util/Assert.H"
00040 #include "CUDA/cudadefs.h"
00041 #include "CudaShapeOps.H"
00042 #include "CUDA/CudaLowPass.H"
00043 #include "CudaDevices.H"
00044 #include "wrap_c_cuda.h"
00045
00046 #include <cmath>
00047
00048
00049 CudaImage<float> cudaQuickLocalAvg(const CudaImage<float>& array, const int scale)
00050 {
00051 const MemoryPolicy mp = array.getMemoryPolicy();
00052 const int dev = array.getMemoryDevice();
00053 ASSERT(array.initialized());
00054 ASSERT(mp != HOST_MEMORY);
00055 int lw = array.getWidth(), lh = array.getHeight();
00056 int sw = std::max(1, lw / scale), sh = std::max(1, lh / scale);
00057
00058 Dims tile = CudaDevices::getDeviceTileSize(dev);
00059
00060 CudaImage<float> result(sw, sh, NO_INIT, mp, dev);
00061
00062 float fac = 1.0f / float(scale * scale);
00063
00064 cuda_c_quickLocalAvg(array.getCudaArrayPtr(),result.getCudaArrayPtr(),fac,lw,lh,sw,sh,tile.w(),tile.h());
00065
00066 return result;
00067
00068 }
00069
00070
00071 CudaImage<float> cudaQuickLocalAvg2x2(const CudaImage<float>& array)
00072 {
00073 const MemoryPolicy mp = array.getMemoryPolicy();
00074 const int dev = array.getMemoryDevice();
00075 ASSERT(array.initialized());
00076 ASSERT(mp != HOST_MEMORY);
00077
00078 int lw = array.getWidth(), lh = array.getHeight();
00079 int sw = lw / 2, sh = lh / 2;
00080
00081
00082 if(lw < 2 || lh < 2)
00083 return cudaQuickLocalAvg(array,2);
00084
00085 Dims tile = CudaDevices::getDeviceTileSize(dev);
00086
00087 CudaImage<float> result(sw, sh, NO_INIT, mp, dev);
00088
00089 cuda_c_quickLocalAvg2x2(array.getCudaArrayPtr(),result.getCudaArrayPtr(),lw,lh,sw,sh,tile.w(),tile.h());
00090 return result;
00091 }
00092
00093 CudaImage<float> cudaQuickLocalMax(const CudaImage<float>& array, const int scale)
00094 {
00095
00096 ASSERT(array.initialized());
00097 int lw = array.getWidth(), lh = array.getHeight();
00098 int sw = std::max(1, lw / scale), sh = std::max(1, lh / scale);
00099
00100 const MemoryPolicy mp = array.getMemoryPolicy();
00101 const int dev = array.getMemoryDevice();
00102 Dims tile = CudaDevices::getDeviceTileSize(dev);
00103 CudaImage<float> result(sw, sh, NO_INIT,mp,dev);
00104
00105 cuda_c_quickLocalMax(array.getCudaArrayPtr(),result.getCudaArrayPtr(),lw,lh,sw,sh,tile.w(),tile.h());
00106 return result;
00107 }
00108
00109
00110 CudaImage<float> cudaDecXY(const CudaImage<float>& src, const int xfactor, const int yfactor_raw)
00111 {
00112
00113 ASSERT(src.initialized());
00114
00115 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00116
00117 const int yfactor = yfactor_raw >= 0 ? yfactor_raw : xfactor;
00118
00119 const int dev = src.getMemoryDevice();
00120 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00121
00122 const int w = src.getWidth();
00123 const int h = src.getHeight();
00124
00125 CudaImage<float> res = CudaImage<float>(Dims(w/xfactor,h/yfactor), NO_INIT, src.getMemoryPolicy(), dev);
00126
00127
00128 cuda_c_dec_xy(src.getCudaArrayPtr(), res.getCudaArrayPtr(), xfactor, yfactor, w, h, tile.sz());
00129 return res;
00130 }
00131
00132 CudaImage<float> cudaDecX(const CudaImage<float>& src, const int xfactor)
00133 {
00134
00135 ASSERT(src.initialized());
00136
00137 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00138
00139 const int dev = src.getMemoryDevice();
00140 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00141
00142 const int w = src.getWidth();
00143 const int h = src.getHeight();
00144
00145 CudaImage<float> res = CudaImage<float>(Dims(w/xfactor,h), NO_INIT, src.getMemoryPolicy(), dev);
00146
00147
00148 cuda_c_dec_x(src.getCudaArrayPtr(), res.getCudaArrayPtr(), xfactor, w, h, tile.sz());
00149 return res;
00150 }
00151
00152 CudaImage<float> cudaDecY(const CudaImage<float>& src, const int yfactor)
00153 {
00154
00155 ASSERT(src.initialized());
00156
00157 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00158
00159 const int dev = src.getMemoryDevice();
00160 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00161
00162 const int w = src.getWidth();
00163 const int h = src.getHeight();
00164
00165 CudaImage<float> res = CudaImage<float>(Dims(w,h/yfactor), NO_INIT, src.getMemoryPolicy(), dev);
00166
00167
00168 cuda_c_dec_y(src.getCudaArrayPtr(), res.getCudaArrayPtr(), yfactor, w, h, tile.sz());
00169 return res;
00170 }
00171
00172
00173 CudaImage<float> cudaDownSize(const CudaImage<float>& src, const Dims& dims,
00174 const int filterWidth)
00175 {
00176 return cudaDownSize(src, dims.w(), dims.h(), filterWidth);
00177 }
00178
00179
00180 CudaImage<float> cudaDownSize(const CudaImage<float>& src, const int new_w, const int new_h,
00181 const int filterWidth)
00182 {
00183
00184 if (src.getWidth() == new_w && src.getHeight() == new_h) return src;
00185
00186 ASSERT(src.getWidth() / new_w > 1 && src.getHeight() / new_h > 1);
00187
00188 const int wdepth = int(0.5+log(double(src.getWidth() / new_w)) / M_LN2);
00189 const int hdepth = int(0.5+log(double(src.getHeight() / new_h)) / M_LN2);
00190
00191 if (wdepth != hdepth)
00192 LFATAL("arrays must have same proportions");
00193
00194 CudaImage<float> result = src;
00195 for (int i = 0; i < wdepth; ++i)
00196 {
00197 switch(filterWidth)
00198 {
00199 case 5:
00200 result = cudaLowPass5Dec(result,true,true);
00201 break;
00202 case 9:
00203 result = cudaLowPass9Dec(result,true,true);
00204 break;
00205 default:
00206 result = cudaDecX(cudaLowPassX(filterWidth, result));
00207 result = cudaDecY(cudaLowPassY(filterWidth, result));
00208 break;
00209 }
00210 }
00211 return result;
00212 }
00213
00214
00215 CudaImage<float> cudaDownSizeClean(const CudaImage<float>& src, const Dims& new_dims,
00216 const int filterWidth)
00217 {
00218
00219 if (src.getDims() == new_dims) return src;
00220
00221 ASSERT(new_dims.isNonEmpty());
00222 ASSERT(filterWidth >= 1);
00223
00224 CudaImage<float> result = src;
00225
00226 while (result.getWidth() > new_dims.w() * 2 &&
00227 result.getHeight() > new_dims.h() * 2)
00228 {
00229 if (filterWidth == 1)
00230 {
00231 result = cudaDecX(result);
00232 result = cudaDecY(result);
00233 }
00234 else if (filterWidth == 2)
00235 {
00236 result = cudaQuickLocalAvg2x2(result);
00237 }
00238 else
00239 {
00240 result = cudaDecX(cudaLowPassX(filterWidth, result));
00241 result = cudaDecY(cudaLowPassY(filterWidth, result));
00242 }
00243 }
00244
00245 return cudaRescaleBilinear(result, new_dims);
00246 }
00247
00248
00249
00250 template <class T> CudaImage<T> cudaRescaleBilinear(const CudaImage<T>& src, const Dims& dims)
00251 {
00252 return cudaRescaleBilinear(src, dims.w(), dims.h());
00253 }
00254
00255
00256 CudaImage<float> cudaRescaleBilinear(const CudaImage<float>& src, const int new_w, const int new_h)
00257 {
00258
00259 const int dev = src.getMemoryDevice();
00260 const MemoryPolicy mp = src.getMemoryPolicy();
00261 ASSERT(src.initialized()); ASSERT(new_w > 0 && new_h > 0);
00262 ASSERT(mp != HOST_MEMORY);
00263 const int orig_w = src.getWidth();
00264 const int orig_h = src.getHeight();
00265
00266
00267 if (new_w == orig_w && new_h == orig_h) return src;
00268
00269 const float sw = float(orig_w) / float(new_w);
00270 const float sh = float(orig_h) / float(new_h);
00271
00272 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00273 CudaImage<float> result(new_w, new_h, NO_INIT, mp, dev);
00274 cuda_c_rescaleBilinear(src.getCudaArrayPtr(),result.getCudaArrayPtr(),sw,sh,orig_w,orig_h,new_w,new_h,tile.w(),tile.h());
00275 return result;
00276 }
00277
00278
00279 CudaImage<PixRGB<float> > cudaRescaleBilinear(const CudaImage<PixRGB<float> >& src, const int new_w, const int new_h)
00280 {
00281
00282 const int dev = src.getMemoryDevice();
00283 const MemoryPolicy mp = src.getMemoryPolicy();
00284 ASSERT(src.initialized()); ASSERT(new_w > 0 && new_h > 0);
00285 ASSERT(mp != HOST_MEMORY);
00286 const int orig_w = src.getWidth();
00287 const int orig_h = src.getHeight();
00288
00289
00290 if (new_w == orig_w && new_h == orig_h) return src;
00291
00292 const float sw = float(orig_w) / float(new_w);
00293 const float sh = float(orig_h) / float(new_h);
00294
00295 Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00296 CudaImage<PixRGB<float> > result(new_w, new_h, NO_INIT, mp, dev);
00297 cuda_c_rescaleBilinearRGB((float3_t *)src.getCudaArrayPtr(),(float3_t *)result.getCudaArrayPtr(),sw,sh,orig_w,orig_h,new_w,new_h,tile.w(),tile.h());
00298 return result;
00299 }
00300
00301
00302
00303 template <class T> CudaImage<T> cudaRescale(const CudaImage<T>& src, const Dims& newdims,
00304 RescaleType ftype)
00305 {
00306 switch (ftype)
00307 {
00308 case RESCALE_SIMPLE_BILINEAR: return cudaRescaleBilinear(src, newdims);
00309 default: LFATAL("unhandled ftype '%c'", ftype);
00310 }
00311 ASSERT(0);
00312 return CudaImage<T>();
00313 }
00314
00315
00316 template <class T> CudaImage<T> cudaRescale(const CudaImage<T>& src, const int width, const int height,
00317 RescaleType ftype)
00318 {
00319 return cudaRescale(src, Dims(width, height), ftype);
00320 }
00321
00322
00323 template CudaImage<float> cudaRescale(const CudaImage<float>& src, const Dims& newdims,
00324 RescaleType ftype = RESCALE_SIMPLE_BILINEAR);
00325 template CudaImage<float> cudaRescale(const CudaImage<float>& src, const int width, const int height,
00326 RescaleType ftype = RESCALE_SIMPLE_BILINEAR);
00327 template CudaImage<float> cudaRescaleBilinear(const CudaImage<float>& src, const Dims& dims);
00328 template CudaImage<PixRGB<float> > cudaRescale(const CudaImage<PixRGB<float> >& src, const Dims& newdims,
00329 RescaleType ftype = RESCALE_SIMPLE_BILINEAR);
00330 template CudaImage<PixRGB<float> > cudaRescale(const CudaImage<PixRGB<float> >& src, const int width, const int height,
00331 RescaleType ftype = RESCALE_SIMPLE_BILINEAR);
00332 template CudaImage<PixRGB<float> > cudaRescaleBilinear(const CudaImage<PixRGB<float> >& src, const Dims& dims);