00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include <stdio.h>
00039 #include <stdint.h>
00040
00041 #include "Util/log.H"
00042 #include "Raster/DeBayer.H"
00043 #include "Raster/DeBayerREG.H"
00044
00045 #ifdef INVT_USE_SSEDB
00046 #include <emmintrin.h>
00047 #include "Raster/DeBayerSSE2.H"
00048 #ifdef INVT_USE_SSE3
00049 #include <pmmintrin.h>
00050 #include "Raster/DeBayerSSE3.H"
00051 #endif
00052 #endif
00053
00054 #include "Image/Image.H"
00055 #include "Image/Pixels.H"
00056 #include "Util/Types.H"
00057
00058 template <class T>
00059 Image<PixRGB<T> > deBayer(const Image<T>& src, BayerFormat format)
00060 {
00061 Image<PixRGB<T> > res(src.getDims(), NO_INIT);
00062 int bitDepth = 8*sizeof(T);
00063
00064 if(bitDepth == 8)
00065 {
00066
00067
00068
00069 #ifdef INVT_USE_SSEDB
00070
00071 #ifdef INVT_USE_SSE3
00072 res = debayerSSE3(src,format);
00073 #else
00074 res = debayerSSE2(src,format);
00075 #endif
00076
00077 #else
00078 res = debayerREG(src,format);
00079 #endif
00080 }
00081 else
00082 res = debayerREG(src, format);
00083 return res;
00084 }
00085
00086 template Image<PixRGB<byte> > deBayer(const Image<byte>& src, BayerFormat format);
00087 template Image<PixRGB<uint16> > deBayer(const Image<uint16>& src, BayerFormat format);
00088
00089
00090 int
00091 replicateBorder_8u (uint8_t * src, int sstride, int width, int height)
00092 {
00093
00094 memcpy (src - sstride, src, width);
00095 memcpy (src + height * sstride, src + (height-1)*sstride, width);
00096
00097 int i;
00098 for (i = -1; i < height+1; i++) {
00099 src[i*sstride-1] = src[i*sstride];
00100 src[i*sstride + width] = src[i*sstride + width - 1];
00101 }
00102 return 0;
00103 }
00104
00105
00106 int
00107 copy_8u_generic (const uint8_t *src, int sstride,
00108 uint8_t *dst, int dstride,
00109 int src_x, int src_y,
00110 int dst_x, int dst_y,
00111 int width, int height,
00112 int bits_per_pixel)
00113 {
00114 if (bits_per_pixel % 8) return -1;
00115 int bytes_per_pixel = bits_per_pixel / 8;
00116
00117 int i;
00118 for (i=0; i<height; i++) {
00119 uint8_t *dst_row = dst + (dst_y + i) * dstride;
00120 const uint8_t *src_row = src + (src_y + i) * sstride;
00121
00122 memcpy (dst_row + dst_x * bytes_per_pixel,
00123 src_row + src_x * bytes_per_pixel,
00124 width * bytes_per_pixel);
00125 }
00126 return 0;
00127 }
00128
00129
00130
00131
00132
00133 int
00134 splitBayerPlanes_8u (uint8_t *dst[4], int dstride,
00135 const uint8_t * src, int sstride,
00136 int width, int height)
00137 {
00138 #ifndef INVT_USE_SSEDB
00139 LINFO("you need to have sse2 support");
00140 return -1;
00141 #else
00142 __m128i mask;
00143 int i, j;
00144
00145 for (i = 0; i < 4; i++) {
00146 if (!IS_ALIGNED16(dstride)) {
00147 LFATAL("splitBayerPlanes_8u: dst[%d] is not "
00148 "16-byte aligned\n",i);
00149 return -1;
00150 }
00151 }
00152 if (!IS_ALIGNED16(sstride)) {
00153 LFATAL("splitBayerPlanes_8u: src is not 16-byte "
00154 "aligned\n");
00155 return -1;
00156 }
00157
00158
00159
00160 if (!IS_ALIGNED32 (sstride))
00161 width -= 8;
00162
00163 mask = _mm_set1_epi16 (0xff);
00164 for (i = 0; i < height; i++) {
00165 uint8_t * drow1 = dst[0] + i * dstride;
00166 uint8_t * drow2 = dst[1] + i * dstride;
00167 uint8_t * drow3 = dst[2] + i * dstride;
00168 uint8_t * drow4 = dst[3] + i * dstride;
00169 const uint8_t * srow = src + 2*i*sstride;
00170 for (j = 0; j < width; j += 16) {
00171 __m128i s1, s2, t1, t2, out;
00172 s1 = _mm_load_si128 ((__m128i *)(srow + 2*j));
00173 s2 = _mm_load_si128 ((__m128i *)(srow + 2*j + 16));
00174
00175 t1 = _mm_and_si128 (s1, mask);
00176 t2 = _mm_and_si128 (s2, mask);
00177
00178 out = _mm_packus_epi16 (t1, t2);
00179 _mm_store_si128 ((__m128i *)(drow1 + j), out);
00180
00181 t1 = _mm_srli_epi16 (s1, 8);
00182 t2 = _mm_srli_epi16 (s2, 8);
00183
00184 out = _mm_packus_epi16 (t1, t2);
00185 _mm_store_si128 ((__m128i *)(drow2 + j), out);
00186
00187 s1 = _mm_load_si128 ((__m128i *)(srow + sstride + 2*j));
00188 s2 = _mm_load_si128 ((__m128i *)(srow + sstride + 2*j + 16));
00189
00190 t1 = _mm_and_si128 (s1, mask);
00191 t2 = _mm_and_si128 (s2, mask);
00192
00193 out = _mm_packus_epi16 (t1, t2);
00194 _mm_store_si128 ((__m128i *)(drow3 + j), out);
00195
00196 t1 = _mm_srli_epi16 (s1, 8);
00197 t2 = _mm_srli_epi16 (s2, 8);
00198
00199 out = _mm_packus_epi16 (t1, t2);
00200 _mm_store_si128 ((__m128i *)(drow4 + j), out);
00201 }
00202 }
00203 if (IS_ALIGNED32 (sstride))
00204 return 0;
00205
00206
00207 const uint8_t * scol1 = src + 2*width;
00208 const uint8_t * scol2 = src + 2*width + sstride;
00209 uint8_t * dcol1 = dst[0] + width;
00210 uint8_t * dcol2 = dst[1] + width;
00211 uint8_t * dcol3 = dst[2] + width;
00212 uint8_t * dcol4 = dst[3] + width;
00213 __m128i t2 = _mm_set1_epi16 (0);
00214 for (i = 0; i < height; i++) {
00215 __m128i s1, t1, out;
00216 s1 = _mm_load_si128 ((__m128i *)(scol1 + 2*i*sstride));
00217 t1 = _mm_and_si128 (s1, mask);
00218
00219 out = _mm_packus_epi16 (t1, t2);
00220 _mm_store_si128 ((__m128i *)(dcol1 + i*dstride), out);
00221
00222 t1 = _mm_srli_epi16 (s1, 8);
00223
00224 out = _mm_packus_epi16 (t1, t2);
00225 _mm_store_si128 ((__m128i *)(dcol2 + i*dstride), out);
00226
00227 s1 = _mm_load_si128 ((__m128i *)(scol2 + 2*i*sstride));
00228 t1 = _mm_and_si128 (s1, mask);
00229
00230 out = _mm_packus_epi16 (t1, t2);
00231 _mm_store_si128 ((__m128i *)(dcol3 + i*dstride), out);
00232
00233 t1 = _mm_srli_epi16 (s1, 8);
00234
00235 out = _mm_packus_epi16 (t1, t2);
00236 _mm_store_si128 ((__m128i *)(dcol4 + i*dstride), out);
00237 }
00238 return 0;
00239 #endif
00240 }
00241
00242
00243
00244
00245
00246