CudaImageDisplay.cu

00001 /*
00002 Simple image display kernel that uses CUDA and OpenGL
00003  */
00004 
00005 #include <stdio.h>
00006 #include <stdlib.h>
00007 #include <string.h>
00008 #include "CudaImageDisplay.h"
00009 //#include "CUDA/CudaImage.H"
00010 ////////////////////////////////////////////////////////////////////////////////
00011 // Helper functions
00012 ////////////////////////////////////////////////////////////////////////////////
00013 float Max(float x, float y){
00014   return (x > y) ? x : y;
00015 }
00016 
00017 float Min(float x, float y){
00018   return (x < y) ? x : y;
00019 }
00020 
00021 
00022 __device__ float lerpf(float a, float b, float c){
00023   return a + (b - a) * c;
00024 }
00025 
00026 __device__ float vecLen(float4 a, float4 b){
00027   return (
00028           (b.x - a.x) * (b.x - a.x) +
00029           (b.y - a.y) * (b.y - a.y) +
00030           (b.z - a.z) * (b.z - a.z)
00031           );
00032 }
00033 
00034 
00035 __device__ unsigned int make_color(unsigned char r, unsigned char g, unsigned char b, unsigned char a){
00036   return (unsigned int) (
00037     ((int)(a) << 24) |
00038     ((int)(b) << 16) |
00039     ((int)(g) <<  8) |
00040     ((int)(r) <<  0));
00041 }
00042 
00043 __device__ unsigned int make_color(float r, float g, float b, float a){
00044   return (unsigned int) (
00045     ((int)(a) << 24) |
00046     ((int)(b) << 16) |
00047     ((int)(g) <<  8) |
00048     ((int)(r) <<  0));
00049 }
00050 
00051 
00052 
00053 ////////////////////////////////////////////////////////////////////////////////
00054 // Global data handlers and parameters
00055 ////////////////////////////////////////////////////////////////////////////////
00056 //Texture reference and channel descriptor for image texture
00057 texture<uchar4, 2, cudaReadModeElementType> texImage;
00058 //texture<float4, 2, cudaReadModeElementType> copyImage;
00059 cudaChannelFormatDesc uchar4tex = cudaCreateChannelDesc<uchar4>();
00060 //cudaChannelFormatDesc float3tex = cudaCreateChannelDesc(32,32,32,0,cudaChannelFormatKindFloat);
00061 
00062 //CUDA array descriptor
00063 cudaArray *a_Src1;
00064 cudaArray *a_Src2;
00065 // We have dptr in cuda memory and we need a way to map dptr data to a_Src
00066 // Need to convert memory to array and then map to texture or directly 
00067 ////////////////////////////////////////////////////////////////////////////////
00068 // Filtering kernels
00069 ////////////////////////////////////////////////////////////////////////////////
00070 __global__ void Copy(unsigned int *dst, int imageW, int imageH){
00071   const int ix = blockDim.x * blockIdx.x + threadIdx.x;
00072   const int iy = blockDim.y * blockIdx.y + threadIdx.y;
00073   //Add half of a texel to always address exact texel centers
00074   const float x = (float)ix + 0.5f;
00075   const float y = (float)iy + 0.5f;
00076  
00077   if(ix < imageW && iy < imageH){
00078     
00079     uchar4 result;
00080     result = tex2D(texImage,x,y);
00081     dst[imageW * iy + ix] = make_color(result.x, result.y, result.z, 255.0);
00082   }
00083 }
00084 
00085 
00086 extern "C" 
00087 void cuda_Copy(unsigned int *d_dst, int imageW, int imageH)
00088 {
00089   dim3 threads(BLOCKDIM_X, BLOCKDIM_Y);
00090   dim3 grid(iDivUp(imageW, BLOCKDIM_X), iDivUp(imageH, BLOCKDIM_Y));
00091 
00092   Copy<<<grid, threads>>>(d_dst, imageW, imageH);
00093 }
00094 
00095 
00096 
00097 extern "C"
00098 cudaError_t CUDA_Bind2TextureArray(int index)
00099 { 
00100   cudaError_t error;
00101   if(index==0)
00102   error = cudaBindTextureToArray(texImage,a_Src1);
00103   if(index==1)
00104   error = cudaBindTextureToArray(texImage,a_Src2);
00105   return error;
00106 }
00107 
00108 extern "C"
00109 cudaError_t CUDA_UnbindTexture(int index)
00110 {
00111   cudaError_t error;
00112   if(index==0)
00113   error =  cudaUnbindTexture(texImage);
00114   if(index==1)
00115   error =  cudaUnbindTexture(texImage);
00116   return error;
00117 }
00118 
00119 extern "C"
00120 cudaError_t CUDA_MallocArray(unsigned int * src, int imageW, int imageH,int index)
00121 {  
00122   cudaError_t error;
00123    if(index==0)
00124     { 
00125    error = cudaMallocArray(&a_Src1, &uchar4tex, imageW, imageH);
00126 
00127    error = cudaMemcpyToArray(a_Src1,0,0,
00128                             src, imageW * imageH * sizeof(unsigned int), cudaMemcpyDeviceToDevice
00129                             );
00130     }  
00131    if(index==1)
00132     {
00133       error = cudaMallocArray(&a_Src2, &uchar4tex, imageW, imageH);
00134 
00135       error = cudaMemcpyToArray(a_Src2,0,0,
00136                             src, imageW * imageH * sizeof(unsigned int), cudaMemcpyDeviceToDevice
00137                             );
00138     } 
00139  return error;
00140 }
00141 
00142 
00143 extern "C"
00144 cudaError_t CUDA_UpdateArray(unsigned int * src, int imageW, int imageH,int index)
00145 {  
00146   cudaError_t error;
00147    if(index==0)
00148     { 
00149       error = cudaMemcpyToArray(a_Src1,0,0,
00150                             src, imageW * imageH * sizeof(unsigned int), cudaMemcpyDeviceToDevice
00151                             );
00152     }  
00153    if(index==1)
00154     {
00155 
00156       error = cudaMemcpyToArray(a_Src2,0,0,
00157                             src, imageW * imageH * sizeof(unsigned int), cudaMemcpyDeviceToDevice
00158                             );
00159     } 
00160  return error;
00161 }
00162 
00163 
00164 __global__ void change_float_uint(float3_t* src,unsigned int *dst,int tile_length,int size)
00165 {
00166   
00167   
00168    const int ix =  blockIdx.x * tile_length + threadIdx.x;
00169    if(ix<size)
00170      {
00171         dst[ix] = (unsigned int) make_color(src[ix].p[0],src[ix].p[1],src[ix].p[2],255.0F);
00172      }
00173 }
00174 
00175 extern "C"
00176 void CUDA_convert_float_uint(float3_t* src,unsigned int *dst,int tile_length,int size)
00177 {
00178   dim3 threads(tile_length);
00179   dim3 grid(iDivUp(size,tile_length));
00180   change_float_uint<<<grid, threads>>>(src,dst,tile_length,size);
00181 }
00182 
00183 
00184 extern "C"
00185 cudaError_t CUDA_FreeArray()
00186 { 
00187   cudaError_t error;
00188   error = cudaFreeArray(a_Src1);
00189   error = cudaFreeArray(a_Src2);
00190   return error;     
00191 }
00192