cudaImage.cu

00001 //********************************************************//
00002 // CUDA SIFT extractor by Marten Bjorkman aka Celebrandil //
00003 //********************************************************//  
00004 
00005 #include <stdio.h>
00006 #include "CUDA/cutil.h"
00007 #include "cudaImage.h"
00008 
00009 
00010 int iDivUp(int a, int b) {
00011   return (a % b != 0) ? (a / b + 1) : (a / b);
00012 }
00013 
00014 int iDivDown(int a, int b) {
00015   return a / b;
00016 }
00017 
00018 int iAlignUp(int a, int b) {
00019   return (a % b != 0) ?  (a - a % b + b) : a;
00020 }
00021 
00022 int iAlignDown(int a, int b) {
00023   return a - a % b;
00024 }
00025 
00026 double AllocCudaImage(CudaImage *img, int w, int h, int p, bool host, bool dev)
00027 {
00028   int sz = sizeof(float)*p*h;
00029   img->width = w;
00030   img->height = h;
00031   img->pitch = p;
00032   img->h_data = NULL;
00033   if (host) {
00034 #ifdef VERBOSE
00035     printf("Allocating host data...\n");
00036 #endif
00037     //img->h_data = (float *)malloc(sz);
00038     CUDA_SAFE_CALL(cudaMallocHost((void **)&img->h_data, sz));
00039   }
00040   img->d_data = NULL;
00041   if (dev) {
00042 #ifdef VERBOSE
00043     printf("Allocating device data...\n");
00044 #endif
00045     CUDA_SAFE_CALL(cudaMalloc((void **)&img->d_data, sz));
00046     if (img->d_data==NULL) 
00047       printf("Failed to allocate device data\n");
00048   }
00049   img->t_data = NULL;
00050   return 0.0;
00051 }
00052 
00053 double FreeCudaImage(CudaImage *img)
00054 {
00055   if (img->d_data!=NULL) {
00056 #ifdef VERBOSE
00057     printf("Freeing device data...\n");
00058 #endif
00059     CUDA_SAFE_CALL(cudaFree(img->d_data));
00060   }
00061   img->d_data = NULL;
00062   if (img->h_data!=NULL) {
00063 #ifdef VERBOSE
00064     printf("Freeing host data...\n");
00065 #endif
00066     //free(img->h_data);
00067     CUDA_SAFE_CALL(cudaFreeHost(img->h_data));
00068   }
00069   img->h_data = NULL;
00070   if (img->t_data!=NULL) {
00071 #ifdef VERBOSE
00072     printf("Freeing texture data...\n");
00073 #endif
00074     CUDA_SAFE_CALL(cudaFreeArray((cudaArray *)img->t_data));
00075   }
00076   img->t_data = NULL;
00077   return 0;
00078 }
00079 
00080 double Download(CudaImage *img)
00081 {
00082   if (img->d_data!=NULL && img->h_data!=NULL) 
00083     CUDA_SAFE_CALL(cudaMemcpy(img->d_data, img->h_data, 
00084                               sizeof(float)*img->pitch*img->height, cudaMemcpyHostToDevice));
00085   return 0;
00086 }
00087 
00088 double Readback(CudaImage *img, int w, int h)
00089 {
00090   int p = sizeof(float)*img->pitch;
00091   w = sizeof(float)*(w<0 ? img->width : w);
00092   h = (h<0 ? img->height : h); 
00093   CUDA_SAFE_CALL(cudaMemcpy2D(img->h_data, p, img->d_data, p, 
00094                               w, h, cudaMemcpyDeviceToHost));
00095   //CUDA_SAFE_CALL(cudaMemcpy(img->h_data, img->d_data, 
00096   //  sizeof(float)*img->pitch*img->height, cudaMemcpyDeviceToHost));
00097   return 0;
00098 }
00099 
00100 double InitTexture(CudaImage *img)
00101 {
00102   cudaChannelFormatDesc t_desc = cudaCreateChannelDesc<float>(); 
00103   CUDA_SAFE_CALL(cudaMallocArray((cudaArray **)&img->t_data, &t_desc, 
00104                                  img->pitch, img->height)); 
00105 #ifdef VERBOSE
00106   printf("InitTexture(%d, %d)\n", img->pitch, img->height); 
00107 #endif
00108   if (img->t_data==NULL)
00109     printf("Failed to allocated texture data\n");
00110   return 0;
00111 }
00112  
00113 double CopyToTexture(CudaImage *src, CudaImage *dst, bool host)
00114 {
00115   if (dst->t_data==NULL) {
00116     printf("Error CopyToTexture: No texture data\n");
00117     return 0.0;
00118   }
00119   if ((!host || src->h_data==NULL) && (host || src->d_data==NULL)) {
00120     printf("Error CopyToTexture: No source data\n");
00121     return 0.0;
00122   }
00123   if (host)
00124     {CUDA_SAFE_CALL(cudaMemcpyToArray((cudaArray *)dst->t_data, 0, 0,
00125                                       src->h_data, sizeof(float)*src->pitch*dst->height, 
00126                                       cudaMemcpyHostToDevice));}
00127   else
00128     {CUDA_SAFE_CALL(cudaMemcpyToArray((cudaArray *)dst->t_data, 0, 0, 
00129                                       src->d_data, sizeof(float)*src->pitch*dst->height, 
00130                                       cudaMemcpyDeviceToDevice));}
00131   CUDA_SAFE_CALL(cudaThreadSynchronize());
00132   return 0;
00133 }
Generated on Sun May 8 08:40:37 2011 for iLab Neuromorphic Vision Toolkit by  doxygen 1.6.3