00001
00002
00003
00004
00005 #include <stdio.h>
00006 #include <stdlib.h>
00007 #include <string.h>
00008 #include "CudaImageDisplay.h"
00009
00010
00011
00012
00013 float Max(float x, float y){
00014 return (x > y) ? x : y;
00015 }
00016
00017 float Min(float x, float y){
00018 return (x < y) ? x : y;
00019 }
00020
00021
00022 __device__ float lerpf(float a, float b, float c){
00023 return a + (b - a) * c;
00024 }
00025
00026 __device__ float vecLen(float4 a, float4 b){
00027 return (
00028 (b.x - a.x) * (b.x - a.x) +
00029 (b.y - a.y) * (b.y - a.y) +
00030 (b.z - a.z) * (b.z - a.z)
00031 );
00032 }
00033
00034
00035 __device__ unsigned int make_color(unsigned char r, unsigned char g, unsigned char b, unsigned char a){
00036 return (unsigned int) (
00037 ((int)(a) << 24) |
00038 ((int)(b) << 16) |
00039 ((int)(g) << 8) |
00040 ((int)(r) << 0));
00041 }
00042
00043 __device__ unsigned int make_color(float r, float g, float b, float a){
00044 return (unsigned int) (
00045 ((int)(a) << 24) |
00046 ((int)(b) << 16) |
00047 ((int)(g) << 8) |
00048 ((int)(r) << 0));
00049 }
00050
00051
00052
00053
00054
00055
00056
00057 texture<uchar4, 2, cudaReadModeElementType> texImage;
00058
00059 cudaChannelFormatDesc uchar4tex = cudaCreateChannelDesc<uchar4>();
00060
00061
00062
00063 cudaArray *a_Src1;
00064 cudaArray *a_Src2;
00065
00066
00067
00068
00069
00070 __global__ void Copy(unsigned int *dst, int imageW, int imageH){
00071 const int ix = blockDim.x * blockIdx.x + threadIdx.x;
00072 const int iy = blockDim.y * blockIdx.y + threadIdx.y;
00073
00074 const float x = (float)ix + 0.5f;
00075 const float y = (float)iy + 0.5f;
00076
00077 if(ix < imageW && iy < imageH){
00078
00079 uchar4 result;
00080 result = tex2D(texImage,x,y);
00081 dst[imageW * iy + ix] = make_color(result.x, result.y, result.z, 255.0);
00082 }
00083 }
00084
00085
00086 extern "C"
00087 void cuda_Copy(unsigned int *d_dst, int imageW, int imageH)
00088 {
00089 dim3 threads(BLOCKDIM_X, BLOCKDIM_Y);
00090 dim3 grid(iDivUp(imageW, BLOCKDIM_X), iDivUp(imageH, BLOCKDIM_Y));
00091
00092 Copy<<<grid, threads>>>(d_dst, imageW, imageH);
00093 }
00094
00095
00096
00097 extern "C"
00098 cudaError_t CUDA_Bind2TextureArray(int index)
00099 {
00100 cudaError_t error;
00101 if(index==0)
00102 error = cudaBindTextureToArray(texImage,a_Src1);
00103 if(index==1)
00104 error = cudaBindTextureToArray(texImage,a_Src2);
00105 return error;
00106 }
00107
00108 extern "C"
00109 cudaError_t CUDA_UnbindTexture(int index)
00110 {
00111 cudaError_t error;
00112 if(index==0)
00113 error = cudaUnbindTexture(texImage);
00114 if(index==1)
00115 error = cudaUnbindTexture(texImage);
00116 return error;
00117 }
00118
00119 extern "C"
00120 cudaError_t CUDA_MallocArray(unsigned int * src, int imageW, int imageH,int index)
00121 {
00122 cudaError_t error;
00123 if(index==0)
00124 {
00125 error = cudaMallocArray(&a_Src1, &uchar4tex, imageW, imageH);
00126
00127 error = cudaMemcpyToArray(a_Src1,0,0,
00128 src, imageW * imageH * sizeof(unsigned int), cudaMemcpyDeviceToDevice
00129 );
00130 }
00131 if(index==1)
00132 {
00133 error = cudaMallocArray(&a_Src2, &uchar4tex, imageW, imageH);
00134
00135 error = cudaMemcpyToArray(a_Src2,0,0,
00136 src, imageW * imageH * sizeof(unsigned int), cudaMemcpyDeviceToDevice
00137 );
00138 }
00139 return error;
00140 }
00141
00142
00143 extern "C"
00144 cudaError_t CUDA_UpdateArray(unsigned int * src, int imageW, int imageH,int index)
00145 {
00146 cudaError_t error;
00147 if(index==0)
00148 {
00149 error = cudaMemcpyToArray(a_Src1,0,0,
00150 src, imageW * imageH * sizeof(unsigned int), cudaMemcpyDeviceToDevice
00151 );
00152 }
00153 if(index==1)
00154 {
00155
00156 error = cudaMemcpyToArray(a_Src2,0,0,
00157 src, imageW * imageH * sizeof(unsigned int), cudaMemcpyDeviceToDevice
00158 );
00159 }
00160 return error;
00161 }
00162
00163
00164 __global__ void change_float_uint(float3_t* src,unsigned int *dst,int tile_length,int size)
00165 {
00166
00167
00168 const int ix = blockIdx.x * tile_length + threadIdx.x;
00169 if(ix<size)
00170 {
00171 dst[ix] = (unsigned int) make_color(src[ix].p[0],src[ix].p[1],src[ix].p[2],255.0F);
00172 }
00173 }
00174
00175 extern "C"
00176 void CUDA_convert_float_uint(float3_t* src,unsigned int *dst,int tile_length,int size)
00177 {
00178 dim3 threads(tile_length);
00179 dim3 grid(iDivUp(size,tile_length));
00180 change_float_uint<<<grid, threads>>>(src,dst,tile_length,size);
00181 }
00182
00183
00184 extern "C"
00185 cudaError_t CUDA_FreeArray()
00186 {
00187 cudaError_t error;
00188 error = cudaFreeArray(a_Src1);
00189 error = cudaFreeArray(a_Src2);
00190 return error;
00191 }
00192