cudaImage.cu
00001
00002
00003
00004
00005 #include <stdio.h>
00006 #include "CUDA/cutil.h"
00007 #include "cudaImage.h"
00008
00009
00010 int iDivUp(int a, int b) {
00011 return (a % b != 0) ? (a / b + 1) : (a / b);
00012 }
00013
00014 int iDivDown(int a, int b) {
00015 return a / b;
00016 }
00017
00018 int iAlignUp(int a, int b) {
00019 return (a % b != 0) ? (a - a % b + b) : a;
00020 }
00021
00022 int iAlignDown(int a, int b) {
00023 return a - a % b;
00024 }
00025
00026 double AllocCudaImage(CudaImage *img, int w, int h, int p, bool host, bool dev)
00027 {
00028 int sz = sizeof(float)*p*h;
00029 img->width = w;
00030 img->height = h;
00031 img->pitch = p;
00032 img->h_data = NULL;
00033 if (host) {
00034 #ifdef VERBOSE
00035 printf("Allocating host data...\n");
00036 #endif
00037
00038 CUDA_SAFE_CALL(cudaMallocHost((void **)&img->h_data, sz));
00039 }
00040 img->d_data = NULL;
00041 if (dev) {
00042 #ifdef VERBOSE
00043 printf("Allocating device data...\n");
00044 #endif
00045 CUDA_SAFE_CALL(cudaMalloc((void **)&img->d_data, sz));
00046 if (img->d_data==NULL)
00047 printf("Failed to allocate device data\n");
00048 }
00049 img->t_data = NULL;
00050 return 0.0;
00051 }
00052
00053 double FreeCudaImage(CudaImage *img)
00054 {
00055 if (img->d_data!=NULL) {
00056 #ifdef VERBOSE
00057 printf("Freeing device data...\n");
00058 #endif
00059 CUDA_SAFE_CALL(cudaFree(img->d_data));
00060 }
00061 img->d_data = NULL;
00062 if (img->h_data!=NULL) {
00063 #ifdef VERBOSE
00064 printf("Freeing host data...\n");
00065 #endif
00066
00067 CUDA_SAFE_CALL(cudaFreeHost(img->h_data));
00068 }
00069 img->h_data = NULL;
00070 if (img->t_data!=NULL) {
00071 #ifdef VERBOSE
00072 printf("Freeing texture data...\n");
00073 #endif
00074 CUDA_SAFE_CALL(cudaFreeArray((cudaArray *)img->t_data));
00075 }
00076 img->t_data = NULL;
00077 return 0;
00078 }
00079
00080 double Download(CudaImage *img)
00081 {
00082 if (img->d_data!=NULL && img->h_data!=NULL)
00083 CUDA_SAFE_CALL(cudaMemcpy(img->d_data, img->h_data,
00084 sizeof(float)*img->pitch*img->height, cudaMemcpyHostToDevice));
00085 return 0;
00086 }
00087
00088 double Readback(CudaImage *img, int w, int h)
00089 {
00090 int p = sizeof(float)*img->pitch;
00091 w = sizeof(float)*(w<0 ? img->width : w);
00092 h = (h<0 ? img->height : h);
00093 CUDA_SAFE_CALL(cudaMemcpy2D(img->h_data, p, img->d_data, p,
00094 w, h, cudaMemcpyDeviceToHost));
00095
00096
00097 return 0;
00098 }
00099
00100 double InitTexture(CudaImage *img)
00101 {
00102 cudaChannelFormatDesc t_desc = cudaCreateChannelDesc<float>();
00103 CUDA_SAFE_CALL(cudaMallocArray((cudaArray **)&img->t_data, &t_desc,
00104 img->pitch, img->height));
00105 #ifdef VERBOSE
00106 printf("InitTexture(%d, %d)\n", img->pitch, img->height);
00107 #endif
00108 if (img->t_data==NULL)
00109 printf("Failed to allocated texture data\n");
00110 return 0;
00111 }
00112
00113 double CopyToTexture(CudaImage *src, CudaImage *dst, bool host)
00114 {
00115 if (dst->t_data==NULL) {
00116 printf("Error CopyToTexture: No texture data\n");
00117 return 0.0;
00118 }
00119 if ((!host || src->h_data==NULL) && (host || src->d_data==NULL)) {
00120 printf("Error CopyToTexture: No source data\n");
00121 return 0.0;
00122 }
00123 if (host)
00124 {CUDA_SAFE_CALL(cudaMemcpyToArray((cudaArray *)dst->t_data, 0, 0,
00125 src->h_data, sizeof(float)*src->pitch*dst->height,
00126 cudaMemcpyHostToDevice));}
00127 else
00128 {CUDA_SAFE_CALL(cudaMemcpyToArray((cudaArray *)dst->t_data, 0, 0,
00129 src->d_data, sizeof(float)*src->pitch*dst->height,
00130 cudaMemcpyDeviceToDevice));}
00131 CUDA_SAFE_CALL(cudaThreadSynchronize());
00132 return 0;
00133 }