00001 //********************************************************// 00002 // CUDA SIFT extractor by Marten Bjorkman aka Celebrandil // 00003 //********************************************************// 00004 00005 #include <stdio.h> 00006 #include "CUDA/cutil.h" 00007 #include "cudaImage.h" 00008 00009 00010 int iDivUp(int a, int b) { 00011 return (a % b != 0) ? (a / b + 1) : (a / b); 00012 } 00013 00014 int iDivDown(int a, int b) { 00015 return a / b; 00016 } 00017 00018 int iAlignUp(int a, int b) { 00019 return (a % b != 0) ? (a - a % b + b) : a; 00020 } 00021 00022 int iAlignDown(int a, int b) { 00023 return a - a % b; 00024 } 00025 00026 double AllocCudaImage(CudaImage *img, int w, int h, int p, bool host, bool dev) 00027 { 00028 int sz = sizeof(float)*p*h; 00029 img->width = w; 00030 img->height = h; 00031 img->pitch = p; 00032 img->h_data = NULL; 00033 if (host) { 00034 #ifdef VERBOSE 00035 printf("Allocating host data...\n"); 00036 #endif 00037 //img->h_data = (float *)malloc(sz); 00038 CUDA_SAFE_CALL(cudaMallocHost((void **)&img->h_data, sz)); 00039 } 00040 img->d_data = NULL; 00041 if (dev) { 00042 #ifdef VERBOSE 00043 printf("Allocating device data...\n"); 00044 #endif 00045 CUDA_SAFE_CALL(cudaMalloc((void **)&img->d_data, sz)); 00046 if (img->d_data==NULL) 00047 printf("Failed to allocate device data\n"); 00048 } 00049 img->t_data = NULL; 00050 return 0.0; 00051 } 00052 00053 double FreeCudaImage(CudaImage *img) 00054 { 00055 if (img->d_data!=NULL) { 00056 #ifdef VERBOSE 00057 printf("Freeing device data...\n"); 00058 #endif 00059 CUDA_SAFE_CALL(cudaFree(img->d_data)); 00060 } 00061 img->d_data = NULL; 00062 if (img->h_data!=NULL) { 00063 #ifdef VERBOSE 00064 printf("Freeing host data...\n"); 00065 #endif 00066 //free(img->h_data); 00067 CUDA_SAFE_CALL(cudaFreeHost(img->h_data)); 00068 } 00069 img->h_data = NULL; 00070 if (img->t_data!=NULL) { 00071 #ifdef VERBOSE 00072 printf("Freeing texture data...\n"); 00073 #endif 00074 CUDA_SAFE_CALL(cudaFreeArray((cudaArray *)img->t_data)); 00075 } 00076 img->t_data = NULL; 00077 return 0; 00078 } 00079 00080 double Download(CudaImage *img) 00081 { 00082 if (img->d_data!=NULL && img->h_data!=NULL) 00083 CUDA_SAFE_CALL(cudaMemcpy(img->d_data, img->h_data, 00084 sizeof(float)*img->pitch*img->height, cudaMemcpyHostToDevice)); 00085 return 0; 00086 } 00087 00088 double Readback(CudaImage *img, int w, int h) 00089 { 00090 int p = sizeof(float)*img->pitch; 00091 w = sizeof(float)*(w<0 ? img->width : w); 00092 h = (h<0 ? img->height : h); 00093 CUDA_SAFE_CALL(cudaMemcpy2D(img->h_data, p, img->d_data, p, 00094 w, h, cudaMemcpyDeviceToHost)); 00095 //CUDA_SAFE_CALL(cudaMemcpy(img->h_data, img->d_data, 00096 // sizeof(float)*img->pitch*img->height, cudaMemcpyDeviceToHost)); 00097 return 0; 00098 } 00099 00100 double InitTexture(CudaImage *img) 00101 { 00102 cudaChannelFormatDesc t_desc = cudaCreateChannelDesc<float>(); 00103 CUDA_SAFE_CALL(cudaMallocArray((cudaArray **)&img->t_data, &t_desc, 00104 img->pitch, img->height)); 00105 #ifdef VERBOSE 00106 printf("InitTexture(%d, %d)\n", img->pitch, img->height); 00107 #endif 00108 if (img->t_data==NULL) 00109 printf("Failed to allocated texture data\n"); 00110 return 0; 00111 } 00112 00113 double CopyToTexture(CudaImage *src, CudaImage *dst, bool host) 00114 { 00115 if (dst->t_data==NULL) { 00116 printf("Error CopyToTexture: No texture data\n"); 00117 return 0.0; 00118 } 00119 if ((!host || src->h_data==NULL) && (host || src->d_data==NULL)) { 00120 printf("Error CopyToTexture: No source data\n"); 00121 return 0.0; 00122 } 00123 if (host) 00124 {CUDA_SAFE_CALL(cudaMemcpyToArray((cudaArray *)dst->t_data, 0, 0, 00125 src->h_data, sizeof(float)*src->pitch*dst->height, 00126 cudaMemcpyHostToDevice));} 00127 else 00128 {CUDA_SAFE_CALL(cudaMemcpyToArray((cudaArray *)dst->t_data, 0, 0, 00129 src->d_data, sizeof(float)*src->pitch*dst->height, 00130 cudaMemcpyDeviceToDevice));} 00131 CUDA_SAFE_CALL(cudaThreadSynchronize()); 00132 return 0; 00133 }