00001 /* 00002 Simple image display kernel that uses CUDA and OpenGL 00003 */ 00004 00005 #include <stdio.h> 00006 #include <stdlib.h> 00007 #include <string.h> 00008 #include "CudaImageDisplay.h" 00009 //#include "CUDA/CudaImage.H" 00010 //////////////////////////////////////////////////////////////////////////////// 00011 // Helper functions 00012 //////////////////////////////////////////////////////////////////////////////// 00013 float Max(float x, float y){ 00014 return (x > y) ? x : y; 00015 } 00016 00017 float Min(float x, float y){ 00018 return (x < y) ? x : y; 00019 } 00020 00021 00022 __device__ float lerpf(float a, float b, float c){ 00023 return a + (b - a) * c; 00024 } 00025 00026 __device__ float vecLen(float4 a, float4 b){ 00027 return ( 00028 (b.x - a.x) * (b.x - a.x) + 00029 (b.y - a.y) * (b.y - a.y) + 00030 (b.z - a.z) * (b.z - a.z) 00031 ); 00032 } 00033 00034 00035 __device__ unsigned int make_color(unsigned char r, unsigned char g, unsigned char b, unsigned char a){ 00036 return (unsigned int) ( 00037 ((int)(a) << 24) | 00038 ((int)(b) << 16) | 00039 ((int)(g) << 8) | 00040 ((int)(r) << 0)); 00041 } 00042 00043 __device__ unsigned int make_color(float r, float g, float b, float a){ 00044 return (unsigned int) ( 00045 ((int)(a) << 24) | 00046 ((int)(b) << 16) | 00047 ((int)(g) << 8) | 00048 ((int)(r) << 0)); 00049 } 00050 00051 00052 00053 //////////////////////////////////////////////////////////////////////////////// 00054 // Global data handlers and parameters 00055 //////////////////////////////////////////////////////////////////////////////// 00056 //Texture reference and channel descriptor for image texture 00057 texture<uchar4, 2, cudaReadModeElementType> texImage; 00058 //texture<float4, 2, cudaReadModeElementType> copyImage; 00059 cudaChannelFormatDesc uchar4tex = cudaCreateChannelDesc<uchar4>(); 00060 //cudaChannelFormatDesc float3tex = cudaCreateChannelDesc(32,32,32,0,cudaChannelFormatKindFloat); 00061 00062 //CUDA array descriptor 00063 cudaArray *a_Src1; 00064 cudaArray *a_Src2; 00065 // We have dptr in cuda memory and we need a way to map dptr data to a_Src 00066 // Need to convert memory to array and then map to texture or directly 00067 //////////////////////////////////////////////////////////////////////////////// 00068 // Filtering kernels 00069 //////////////////////////////////////////////////////////////////////////////// 00070 __global__ void Copy(unsigned int *dst, int imageW, int imageH){ 00071 const int ix = blockDim.x * blockIdx.x + threadIdx.x; 00072 const int iy = blockDim.y * blockIdx.y + threadIdx.y; 00073 //Add half of a texel to always address exact texel centers 00074 const float x = (float)ix + 0.5f; 00075 const float y = (float)iy + 0.5f; 00076 00077 if(ix < imageW && iy < imageH){ 00078 00079 uchar4 result; 00080 result = tex2D(texImage,x,y); 00081 dst[imageW * iy + ix] = make_color(result.x, result.y, result.z, 255.0); 00082 } 00083 } 00084 00085 00086 extern "C" 00087 void cuda_Copy(unsigned int *d_dst, int imageW, int imageH) 00088 { 00089 dim3 threads(BLOCKDIM_X, BLOCKDIM_Y); 00090 dim3 grid(iDivUp(imageW, BLOCKDIM_X), iDivUp(imageH, BLOCKDIM_Y)); 00091 00092 Copy<<<grid, threads>>>(d_dst, imageW, imageH); 00093 } 00094 00095 00096 00097 extern "C" 00098 cudaError_t CUDA_Bind2TextureArray(int index) 00099 { 00100 cudaError_t error; 00101 if(index==0) 00102 error = cudaBindTextureToArray(texImage,a_Src1); 00103 if(index==1) 00104 error = cudaBindTextureToArray(texImage,a_Src2); 00105 return error; 00106 } 00107 00108 extern "C" 00109 cudaError_t CUDA_UnbindTexture(int index) 00110 { 00111 cudaError_t error; 00112 if(index==0) 00113 error = cudaUnbindTexture(texImage); 00114 if(index==1) 00115 error = cudaUnbindTexture(texImage); 00116 return error; 00117 } 00118 00119 extern "C" 00120 cudaError_t CUDA_MallocArray(unsigned int * src, int imageW, int imageH,int index) 00121 { 00122 cudaError_t error; 00123 if(index==0) 00124 { 00125 error = cudaMallocArray(&a_Src1, &uchar4tex, imageW, imageH); 00126 00127 error = cudaMemcpyToArray(a_Src1,0,0, 00128 src, imageW * imageH * sizeof(unsigned int), cudaMemcpyDeviceToDevice 00129 ); 00130 } 00131 if(index==1) 00132 { 00133 error = cudaMallocArray(&a_Src2, &uchar4tex, imageW, imageH); 00134 00135 error = cudaMemcpyToArray(a_Src2,0,0, 00136 src, imageW * imageH * sizeof(unsigned int), cudaMemcpyDeviceToDevice 00137 ); 00138 } 00139 return error; 00140 } 00141 00142 00143 extern "C" 00144 cudaError_t CUDA_UpdateArray(unsigned int * src, int imageW, int imageH,int index) 00145 { 00146 cudaError_t error; 00147 if(index==0) 00148 { 00149 error = cudaMemcpyToArray(a_Src1,0,0, 00150 src, imageW * imageH * sizeof(unsigned int), cudaMemcpyDeviceToDevice 00151 ); 00152 } 00153 if(index==1) 00154 { 00155 00156 error = cudaMemcpyToArray(a_Src2,0,0, 00157 src, imageW * imageH * sizeof(unsigned int), cudaMemcpyDeviceToDevice 00158 ); 00159 } 00160 return error; 00161 } 00162 00163 00164 __global__ void change_float_uint(float3_t* src,unsigned int *dst,int tile_length,int size) 00165 { 00166 00167 00168 const int ix = blockIdx.x * tile_length + threadIdx.x; 00169 if(ix<size) 00170 { 00171 dst[ix] = (unsigned int) make_color(src[ix].p[0],src[ix].p[1],src[ix].p[2],255.0F); 00172 } 00173 } 00174 00175 extern "C" 00176 void CUDA_convert_float_uint(float3_t* src,unsigned int *dst,int tile_length,int size) 00177 { 00178 dim3 threads(tile_length); 00179 dim3 grid(iDivUp(size,tile_length)); 00180 change_float_uint<<<grid, threads>>>(src,dst,tile_length,size); 00181 } 00182 00183 00184 extern "C" 00185 cudaError_t CUDA_FreeArray() 00186 { 00187 cudaError_t error; 00188 error = cudaFreeArray(a_Src1); 00189 error = cudaFreeArray(a_Src2); 00190 return error; 00191 } 00192