00001 #ifndef CUDASIFTH_DEVICE_H 00002 #define CUDASIFTH_DEVICE_H 00003 00004 #include "cudaSiftD.h" 00005 #include "cudaImage.h" 00006 00007 //********************************************************// 00008 // CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // 00009 //********************************************************// 00010 00011 double Find3DMinMax(CudaArray *minmax, CudaImage *data1, CudaImage *data2, 00012 CudaImage *data3, float thresh, int maxPts); 00013 double UnpackPointers(CudaArray *minmax, int maxPts, int *ptrs, int *numPts); 00014 double ComputePositions(CudaImage *data1, CudaImage *data2, CudaImage *data3, 00015 int *h_ptrs, CudaArray *sift, int numPts, int maxPts, float scale, 00016 float factor); 00017 double RemoveEdgePoints(CudaArray *sift, int *initNumPts, int maxPts, 00018 float edgeLimit); 00019 double ComputeOrientations(CudaImage *img, int *h_ptrs, CudaArray *sift, 00020 int numPts, int maxPts); 00021 double SecondOrientations(CudaArray *sift, int *initNumPts, int maxPts); 00022 double ExtractSiftDescriptors(CudaImage *img, CudaArray *sift, 00023 CudaArray *desc, int numPts, int maxPts); 00024 double AddSiftData(SiftData *data, float *d_sift, float *d_desc, 00025 int numPts, int maxPts, float subsampling); 00026 00027 //////////////////////////////////////////////////////////////////// 00028 // Templated filter funtions 00029 //////////////////////////////////////////////////////////////////// 00030 template<int RADIUS> 00031 double SeparableFilter(CudaImage *dataA, CudaImage *dataB, 00032 CudaImage *temp, float *h_Kernel) 00033 { 00034 unsigned int width = dataA->width; 00035 unsigned int height = dataA->height; 00036 float *d_DataA = dataA->d_data; 00037 float *d_DataB = dataB->d_data; 00038 float *d_Temp = temp->d_data; 00039 if (d_DataA==NULL || d_DataB==NULL || d_Temp==NULL) { 00040 printf("SeparableFilter: missing data\n"); 00041 return 0.0; 00042 } 00043 /* unsigned int hTimer; */ 00044 /* CUT_SAFE_CALL(cutCreateTimer(&hTimer)); */ 00045 const unsigned int kernelSize = (2*RADIUS+1)*sizeof(float); 00046 CUDA_SAFE_CALL(cudaMemcpyToSymbol(d_Kernel, h_Kernel, kernelSize)); 00047 /* CUT_SAFE_CALL(cutResetTimer(hTimer)); */ 00048 /* CUT_SAFE_CALL(cutStartTimer(hTimer)); */ 00049 00050 #if 1 00051 dim3 blockGridRows(iDivUp(width, ROW_TILE_W), height); 00052 dim3 threadBlockRows(WARP_SIZE + ROW_TILE_W + RADIUS); 00053 ConvRowGPU<RADIUS><<<blockGridRows, threadBlockRows>>>(d_Temp, 00054 d_DataA, width, height); //%%%% 00055 CUT_CHECK_ERROR("ConvRowGPU() execution failed\n"); 00056 CUDA_SAFE_CALL(cudaThreadSynchronize()); 00057 #endif 00058 #if 1 00059 dim3 blockGridColumns(iDivUp(width, COLUMN_TILE_W), 00060 iDivUp(height, COLUMN_TILE_H)); 00061 dim3 threadBlockColumns(COLUMN_TILE_W, 8); 00062 ConvColGPU<RADIUS><<<blockGridColumns, threadBlockColumns>>>(d_DataB, 00063 d_Temp, width, height, COLUMN_TILE_W*8, width*8); 00064 CUT_CHECK_ERROR("ConvColGPU() execution failed\n"); 00065 CUDA_SAFE_CALL(cudaThreadSynchronize()); 00066 #endif 00067 /* CUT_SAFE_CALL(cutStopTimer(hTimer)); */ 00068 /* double gpuTime = cutGetTimerValue(hTimer); */ 00069 /* #ifdef VERBOSE */ 00070 /* printf("SeparableFilter time = %.2f msec\n", gpuTime); */ 00071 /* #endif */ 00072 /* CUT_SAFE_CALL(cutDeleteTimer(hTimer)); */ 00073 /* return gpuTime; */ 00074 return 0.0; 00075 } 00076 00077 template<int RADIUS> 00078 double LowPass(CudaImage *dataB, CudaImage *dataA, CudaImage *temp, double var) 00079 { 00080 float kernel[2*RADIUS+1]; 00081 float kernelSum = 0.0f; 00082 for (int j=-RADIUS;j<=RADIUS;j++) { 00083 kernel[j+RADIUS] = (float)expf(-(double)j*j/2.0/var); 00084 kernelSum += kernel[j+RADIUS]; 00085 } 00086 for (int j=-RADIUS;j<=RADIUS;j++) 00087 kernel[j+RADIUS] /= kernelSum; 00088 return SeparableFilter<RADIUS>(dataA, dataB, temp, kernel); 00089 } 00090 00091 #endif