00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "CUDA/CudaImage.H"
00039 #include "CUDA/CudaDevices.H"
00040 #include "CUDA/CudaLowPass.H"
00041 #include "wrap_c_cuda.h"
00042
00043
00044 CudaImage<float> cudaLowPass5xDec(const CudaImage<float>&src)
00045 {
00046
00047 ASSERT(src.initialized());
00048
00049 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00050
00051 const int dev = src.getMemoryDevice();
00052
00053
00054 CudaImage<float> result = CudaImage<float>(src.getWidth()/2, src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev);
00055
00056 const Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00057
00058 cuda_c_lowpass_5_x_dec_x(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w());
00059
00060 return result;
00061 }
00062
00063
00064 CudaImage<float> cudaLowPass5yDec(const CudaImage<float>&src)
00065 {
00066
00067 ASSERT(src.initialized());
00068
00069 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00070
00071 const int dev = src.getMemoryDevice();
00072
00073
00074 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight()/2, NO_INIT, src.getMemoryPolicy(), dev);
00075
00076 const Dims tile = CudaDevices::getDeviceTileSize(dev);
00077
00078 cuda_c_lowpass_5_y_dec_y(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w(),tile.h());
00079 return result;
00080 }
00081
00082 CudaImage<float> cudaLowPass5Dec(const CudaImage<float>& src, const bool go_x, const bool go_y)
00083 {
00084 CudaImage<float> result = src;
00085 if(go_x) result = cudaLowPass5xDec(result);
00086 if(go_y) result = cudaLowPass5yDec(result);
00087 return result;
00088 }
00089
00090 CudaImage<float> cudaLowPass9xDec(const CudaImage<float>&src)
00091 {
00092
00093 ASSERT(src.initialized());
00094
00095 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00096
00097 const int dev = src.getMemoryDevice();
00098
00099
00100 const int rw = src.getWidth()/2;
00101 const int rh = src.getHeight();
00102
00103 CudaImage<float> result = CudaImage<float>(rw,rh, NO_INIT, src.getMemoryPolicy(), dev);
00104
00105 const Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00106
00107 cuda_c_lowpass_9_x_dec_x(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),rw,rh,tile.w());
00108
00109 return result;
00110 }
00111
00112
00113 CudaImage<float> cudaLowPass9yDec(const CudaImage<float>&src)
00114 {
00115
00116 ASSERT(src.initialized());
00117
00118 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00119
00120 const int dev = src.getMemoryDevice();
00121
00122
00123 const int rw = src.getWidth();
00124 const int rh = src.getHeight()/2;
00125
00126 CudaImage<float> result = CudaImage<float>(rw,rh, NO_INIT, src.getMemoryPolicy(), dev);
00127
00128 const Dims tile = CudaDevices::getDeviceTileSize(dev);
00129
00130 cuda_c_lowpass_9_y_dec_y(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),rw,rh,tile.w(),tile.h());
00131 return result;
00132 }
00133
00134 CudaImage<float> cudaLowPass9Dec(const CudaImage<float>& src, const bool go_x, const bool go_y)
00135 {
00136 CudaImage<float> result = src;
00137 if(go_x) result = cudaLowPass9xDec(result);
00138 if(go_y) result = cudaLowPass9yDec(result);
00139 return result;
00140 }
00141
00142
00143
00144 CudaImage<float> cudaLowPass9x(const CudaImage<float>&src)
00145 {
00146
00147 ASSERT(src.initialized());
00148
00149 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00150
00151 const int dev = src.getMemoryDevice();
00152
00153
00154 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev);
00155
00156 const Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00157
00158 cuda_c_lowpass_9_x(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w());
00159
00160 return result;
00161 }
00162
00163
00164 CudaImage<float> cudaLowPass9xyDec(const CudaImage<float>&src)
00165 {
00166
00167 ASSERT(src.initialized());
00168
00169 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00170
00171 const int dev = src.getMemoryDevice();
00172
00173 const int rw = src.getWidth()/2;
00174 const int rh = src.getHeight()/2;
00175 CudaImage<float> tmp = CudaImage<float>(rw, src.getHeight(), ZEROS, src.getMemoryPolicy(), dev);
00176
00177 const Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00178 cuda_c_lowpass_texture_9_x_dec_x(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),tmp.getCudaArrayPtr(),rw,src.getHeight(),tile.sz(),1);
00179
00180 CudaImage<float> res = CudaImage<float>(rw, rh, ZEROS, src.getMemoryPolicy(), dev);
00181 cuda_c_lowpass_texture_9_y_dec_y(tmp.getCudaArrayPtr(),tmp.getWidth(),tmp.getHeight(),res.getCudaArrayPtr(),rw,rh,1,tile.sz());
00182 return res;
00183 }
00184
00185
00186
00187 CudaImage<float> cudaLowPass9y(const CudaImage<float>&src)
00188 {
00189
00190 ASSERT(src.initialized());
00191
00192 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00193
00194 const int dev = src.getMemoryDevice();
00195
00196
00197 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev);
00198
00199 const Dims tile = CudaDevices::getDeviceTileSize(dev);
00200
00201 cuda_c_lowpass_9_y(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w(),tile.h());
00202 return result;
00203 }
00204
00205 CudaImage<float> cudaLowPass5x(const CudaImage<float>&src)
00206 {
00207
00208 ASSERT(src.initialized());
00209
00210 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00211
00212 const int dev = src.getMemoryDevice();
00213
00214
00215 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev);
00216
00217 const Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00218
00219 cuda_c_lowpass_5_x(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w());
00220
00221 return result;
00222 }
00223
00224
00225 CudaImage<float> cudaLowPass5y(const CudaImage<float>&src)
00226 {
00227
00228 ASSERT(src.initialized());
00229
00230 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00231
00232 const int dev = src.getMemoryDevice();
00233
00234
00235 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev);
00236
00237 const Dims tile = CudaDevices::getDeviceTileSize(dev);
00238
00239 cuda_c_lowpass_5_y(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w(),tile.h());
00240 return result;
00241 }
00242
00243 CudaImage<float> cudaLowPass3x(const CudaImage<float>&src)
00244 {
00245
00246 ASSERT(src.initialized());
00247
00248 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00249
00250 const int dev = src.getMemoryDevice();
00251
00252
00253 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev);
00254
00255 const Dims tile = CudaDevices::getDeviceTileSize1D(dev);
00256
00257 cuda_c_lowpass_3_x(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w());
00258
00259 return result;
00260 }
00261
00262
00263 CudaImage<float> cudaLowPass3y(const CudaImage<float>&src)
00264 {
00265
00266 ASSERT(src.initialized());
00267
00268 ASSERT(src.getMemoryPolicy() != HOST_MEMORY);
00269
00270 const int dev = src.getMemoryDevice();
00271
00272
00273 CudaImage<float> result = CudaImage<float>(src.getWidth(), src.getHeight(), NO_INIT, src.getMemoryPolicy(), dev);
00274
00275 const Dims tile = CudaDevices::getDeviceTileSize(dev);
00276
00277 cuda_c_lowpass_3_y(src.getCudaArrayPtr(),src.getWidth(),src.getHeight(),result.getCudaArrayPtr(),tile.w(),tile.h());
00278 return result;
00279 }
00280
00281 CudaImage<float> cudaLowPass9(const CudaImage<float>& src, const bool go_x, const bool go_y)
00282 {
00283 CudaImage<float> result = src;
00284 if(go_x) result = cudaLowPass9x(result);
00285 if(go_y) result = cudaLowPass9y(result);
00286 return result;
00287 }
00288
00289
00290 CudaImage<float> cudaLowPass(const int N, const CudaImage<float>& src, const bool go_x, const bool go_y)
00291 {
00292 CudaImage<float> result = src;
00293 if (go_x) result = cudaLowPassX(N,result);
00294 if (go_y) result = cudaLowPassY(N,result);
00295 return result;
00296 }
00297
00298
00299 CudaImage<float> cudaLowPassX(const int N, const CudaImage<float>& src)
00300 {
00301 switch (N)
00302 {
00303 case 3: return cudaLowPass3x(src);
00304 case 5: return cudaLowPass5x(src);
00305 case 9: return cudaLowPass9x(src);
00306 default:
00307 LERROR("Only 3,5, and 9 tap kernels implemented");
00308 return CudaImage<float>();
00309 break;
00310 }
00311
00312
00313
00314
00315
00316 }
00317
00318
00319 CudaImage<float> cudaLowPassY(const int N, const CudaImage<float>& src)
00320 {
00321 switch (N)
00322 {
00323 case 3: return cudaLowPass3y(src);
00324 case 5: return cudaLowPass5y(src);
00325 case 9: return cudaLowPass9y(src);
00326 default:
00327 LERROR("Only 3,5, and 9 tap kernels implemented");
00328 return CudaImage<float>();
00329 break;
00330 }
00331
00332
00333
00334
00335
00336
00337 }