00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include <string.h>
00034 #include <stdio.h>
00035 #include <stdlib.h>
00036 #include <cerrno>
00037 #include <fcntl.h>
00038 #include <sys/ioctl.h>
00039 #include <sys/mman.h>
00040 #include <sys/stat.h>
00041 #include <unistd.h>
00042 #include <sys/time.h>
00043
00044 #include <linux/videodev.h>
00045 #include <linux/videodev2.h>
00046
00047 #include <linux/fb.h>
00048 #include <asm-arm/arch-omap/omapfb.h>
00049 #include <X11/Xlib.h>
00050 #include <X11/Xutil.h>
00051
00052 #define INT_IS_32_BITS 1
00053 #define LONG_IS_32_BITS 1
00054 #include "Envision/env_config.h"
00055 #include "Envision/env_alloc.h"
00056 #include "Envision/env_c_math_ops.h"
00057 #include "Envision/env_stdio_interface.h"
00058 #include "Envision/env_image.h"
00059 #include "Envision/env_image_ops.h"
00060 #include "Envision/env_log.h"
00061 #include "Envision/env_visual_cortex.h"
00062 #include "Envision/env_params.h"
00063
00064 #include <pthread.h>
00065
00066 #include "Envision/env_alloc.c"
00067 #include "Envision/env_c_math_ops.c"
00068 #include "Envision/env_stdio_interface.c"
00069 #include "Envision/env_image.c"
00070 #include "Envision/env_image_ops.c"
00071 #include "Envision/env_log.c"
00072 #include "Envision/env_visual_cortex.c"
00073 #include "Envision/env_params.c"
00074 #include "Envision/env_channel.c"
00075 #include "Envision/env_math.c"
00076 #include "Envision/env_motion_channel.c"
00077 #include "Envision/env_pyr.c"
00078
00079 #include "Image/font6x10.h"
00080 #define FONTW 6
00081 #define FONTH 10
00082
00083 #include <libosso.h>
00084
00085 #define LINFO printf("\n")&&printf
00086 #define LFATAL printf("\n")&&printf
00087 #define PLFATAL printf("\n")&&printf
00088 #define PLERROR printf("\n")&&printf
00089
00090
00091 #define GRABDEV "/dev/video0"
00092 #define GRABNBUF 1
00093 #define GRABW 320
00094 #define GRABH 240
00095
00096
00097 #define HRES 800
00098 #define VRES 480
00099 #define BPP 16
00100
00101 #define BCOL intg16(0x3FFF)
00102 #define TCOL intg16(0x3F3F)
00103 #define TCOL2 intg16(0xFF3F)
00104 #define SCOL intg16(0x3F30)
00105
00106 #define OMAPFB_FORMAT_FLAG_TEARSYNC 0x0200
00107 #define OMAPFB_FORMAT_FLAG_FORCE_VSYNC 0x0400
00108
00109
00110
00111
00112
00113
00114
00115 static void* malloc_thunk(env_size_t n)
00116 { return malloc(n); }
00117
00118
00119 void draw_rect(intg16 *buf, const int x, const int y, const int w, const int h, const intg16 col)
00120 {
00121 intg16 *b = buf + x + y * HRES;
00122
00123 const int offy = (h-1) * HRES;
00124 for (int xx = 0; xx < w; ++xx) { b[xx] = col; b[xx + offy] = col; }
00125
00126 const int offx = w-1;
00127 for (int yy = 0; yy < h * HRES; yy += HRES) { b[yy] = col; b[yy + offx] = col; }
00128 }
00129
00130
00131 void draw_filled_rect(intg16 *buf, const int x, const int y, const int w, const int h, const intg16 col)
00132 {
00133 intg16 *b = buf + x + y * HRES;
00134
00135 for (int yy = 0; yy < h; ++yy) {
00136 for (int xx = 0; xx < w; ++xx) *b++ = col;
00137 b += HRES - w;
00138 }
00139 }
00140
00141
00142 void write_text(intg16 *buf, const char *txt, int x0, int y0, const intg16 col)
00143 {
00144 const int len = int(strlen(txt));
00145
00146
00147 if (x0 == -1) x0 = (HRES - FONTW * len) / 2;
00148
00149 for (int i = 0; i < len; i ++)
00150 {
00151 const unsigned char *ptr = ((const unsigned char *)font6x10) + (txt[i] - 32) * FONTW * FONTH;
00152
00153 for (int y = 0; y < FONTH; y ++)
00154 for (int x = 0; x < FONTW; x ++)
00155 if (!ptr[y * FONTW + x]) buf[x0 + x + HRES * (y0 + y)] = col; else buf[x0 + x + HRES * (y0 + y)] = 0;
00156 x0 += FONTW;
00157 }
00158 }
00159
00160
00161 void draw_map(intg16 *buf, const env_image *img, const int xoff, const int yoff, const env_size_t scale)
00162 {
00163 intg16 *d = buf + xoff + yoff * HRES;;
00164 intg32 *s = img->pixels;
00165
00166 const env_size_t w = img->dims.w, h = img->dims.h;
00167 const env_size_t ws = w * scale;
00168
00169 for (env_size_t jj = 0; jj < h; ++jj) {
00170 const intg16 *dd = d;
00171 for (env_size_t ii = 0; ii < w; ++ii) {
00172 const intg16 val = intg16( (*s++) >> 3 );
00173 for (env_size_t k = 0; k < scale; ++k) *d++ = val;
00174 }
00175 d += HRES - ws;
00176 for (env_size_t k = 1; k < scale; ++k) { memcpy(d, dd, ws * 2); d += HRES; }
00177 }
00178 draw_rect(buf, xoff, yoff, scale * w, scale * h, BCOL);
00179 }
00180
00181
00182 void print_help(intg16 *buf, const bool doit) {
00183 draw_filled_rect(buf, 0, 450, 800, 30, intg16(0));
00184 if (doit) {
00185 write_text(buf, "saliency - Copyright (c) 2009 by Laurent Itti and the iLab team - See http://iLab.usc.edu for more info about visual saliency", -1, 450, TCOL2);
00186 write_text(buf, "This program analyzes the input video to determine which point is most likely to attract human visual attention and gaze", -1, 460, TCOL2);
00187 write_text(buf, "Press <SPACE> for more info. Press <ESC> or square key at center of cursor pad to exit application", -1, 470, TCOL2);
00188 }
00189 }
00190
00191
00192 void print_help2(intg16 *buf, const bool doit) {
00193 draw_filled_rect(buf, 0, 300, 800, 180, intg16(0));
00194 if (doit) {
00195
00196 write_text(buf, "saliency - Copyright (c) 2009 by Laurent Itti and the iLab team - See http://iLab.usc.edu for more info about visual saliency", -1, 310, TCOL2);
00197 write_text(buf, "This program analyzes the input video to determine which point is most likely to attract human visual attention and gaze.", -1, 320, TCOL2);
00198
00199 write_text(buf, "In this biologically-inspired system (Itti, Koch & Niebur, IEEE Transactions on Pattern Analysis and Machine Intelligence, 1998),", -1, 340, TCOL2);
00200 write_text(buf, "each input image is decomposed into a set of multiscale neural ``feature maps,'' which extract local spatial discontinuities in the", -1, 350, TCOL2);
00201 write_text(buf, "modalities of color, intensity, orientation, flicker and motion. Each feature map is endowed with non-linear spatially competitive", -1, 360, TCOL2);
00202 write_text(buf, "dynamics, so that the response of a neuron at a given location in a map is modulated by the activity of neighboring neurons. Such", -1, 370, TCOL2);
00203 write_text(buf, "contextual modulation, inspired by recent neurobiological findings, enhances salient targets from cluttered backgrounds. All feature", -1, 380, TCOL2);
00204 write_text(buf, "maps are then combined into a unique scalar saliency map which encodes for the salience of a location in the scene, irrespectively", -1, 390, TCOL2);
00205 write_text(buf, "of the particular feature which detected this location as conspicuous. A winner-take-all neural network then detects the point of", -1, 400, TCOL2);
00206 write_text(buf, "highest salience in the map at any given time, and draws the focus of attention towards this location (small cyan square marker).", -1, 410, TCOL2);
00207 write_text(buf, "", -1, 420, TCOL2);
00208 write_text(buf, "This Maemo program uses a fast integer-only saliency algorithm implemented by Robert J. Peters, see Peters & Itti, ACM Transactions", -1, 430, TCOL2);
00209 write_text(buf, "on Applied Perception, 2008.", -1, 440, TCOL2);
00210
00211
00212 write_text(buf, "Press <SPACE> to switch back to full display mode.", -1, 470, TCOL2);
00213 }
00214 }
00215
00216
00217 int main(int argc, char **argv)
00218 {
00219
00220 osso_context_t *osso = osso_initialize("saliency", "1.00", true, 0);
00221 if (osso == NULL) LFATAL("Cannot initialize OSSO");
00222
00223
00224
00225
00226 int gfd = open(GRABDEV, O_RDWR);
00227 if (gfd == -1) PLFATAL("Cannot open V4L2 device %s", GRABDEV);
00228
00229
00230 struct v4l2_format fmt;
00231 memset(&fmt, 0, sizeof(fmt));
00232 fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
00233 fmt.fmt.pix.width = GRABW;
00234 fmt.fmt.pix.height = GRABH;
00235 fmt.fmt.pix.field = V4L2_FIELD_INTERLACED;
00236 fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_RGB565;
00237
00238 if (ioctl(gfd, VIDIOC_S_FMT, &fmt) == -1) PLFATAL("Cannot set requested video mode/resolution");
00239
00240
00241 const int nbuf = GRABNBUF;
00242 struct v4l2_requestbuffers req;
00243 memset(&req, 0, sizeof(req));
00244 req.count = nbuf;
00245 req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
00246 req.memory = V4L2_MEMORY_MMAP;
00247
00248 if (ioctl(gfd, VIDIOC_REQBUFS, &req) == -1) PLFATAL("Cannot allocate %d mmap'ed video frame buffers", nbuf);
00249 if (int(req.count) != nbuf) LFATAL("Hardware only supports %d video buffers (vs. %d requested)", req.count, nbuf);
00250
00251 byte **itsMmapBuf = new byte*[req.count];
00252 int *itsMmapBufSize = new int[req.count];
00253
00254 for (uint i = 0; i < req.count; ++i) {
00255 struct v4l2_buffer buf;
00256 memset(&buf, 0, sizeof(buf));
00257 buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
00258 buf.memory = V4L2_MEMORY_MMAP;
00259 buf.index = i;
00260 if (ioctl(gfd, VIDIOC_QUERYBUF, &buf) == -1) PLFATAL("Could not query for MMAP buffer");
00261
00262 itsMmapBufSize[i] = buf.length;
00263 itsMmapBuf[i] = static_cast<byte*>(mmap(NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, gfd, buf.m.offset));
00264 if (itsMmapBuf[i] == MAP_FAILED) PLFATAL("Error MMAP'ing video buffer number %d", i);
00265 }
00266
00267
00268 int itsCurrentFrame = 0;
00269 bool *itsGrabbing = new bool[nbuf];
00270 for (int i = 0; i < nbuf; ++i) itsGrabbing[i] = false;
00271
00272
00273 struct v4l2_buffer buf;
00274 memset(&buf, 0, sizeof(buf));
00275 buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
00276 buf.memory = V4L2_MEMORY_MMAP;
00277
00278 for (int i = 0; i < nbuf; ++i)
00279 if (itsGrabbing[i] == true) {
00280 buf.index = i;
00281 if (ioctl(gfd, VIDIOC_DQBUF, &buf) == -1) PLFATAL("VIDIOC_DQBUF (frame %d)", i);
00282 itsGrabbing[i] = false;
00283 }
00284
00285 for (int i = 0; i < nbuf; ++i) {
00286
00287 buf.index = i;
00288 if (ioctl(gfd, VIDIOC_QBUF, &buf)) PLFATAL("VIDIOC_QBUF (frame %d)", i);
00289 itsGrabbing[i] = true;
00290 }
00291
00292
00293 enum v4l2_buf_type typ = V4L2_BUF_TYPE_VIDEO_CAPTURE;
00294 if (ioctl(gfd, VIDIOC_STREAMON, &typ)) PLFATAL("VIDIOC_STREAMON");
00295
00296
00297
00298
00299
00300
00301 Display *display = XOpenDisplay(getenv ("DISPLAY"));
00302 if (display == NULL) LFATAL("cannot open X display");
00303 int screen_num = DefaultScreen(display);
00304
00305 Window win = XCreateSimpleWindow(display, RootWindow(display, screen_num), 0, 0, HRES, VRES, 0,
00306 WhitePixel(display, screen_num), BlackPixel (display, screen_num));
00307 XMapWindow(display, win);
00308 XSelectInput(display, win, ExposureMask | KeyPressMask);
00309 XFlush(display);
00310
00311 XEvent xev;
00312 XWindowEvent(display, win, ExposureMask, &xev);
00313
00314
00315 xev.xclient.type = ClientMessage;
00316 xev.xclient.serial = 0;
00317 xev.xclient.send_event = True;
00318 xev.xclient.message_type = XInternAtom (display, "_NET_WM_STATE", False);
00319 xev.xclient.window = win;
00320 xev.xclient.format = 32;
00321 xev.xclient.data.l[0] = 1;
00322 xev.xclient.data.l[1] = XInternAtom(display, "_NET_WM_STATE_FULLSCREEN", False);
00323 xev.xclient.data.l[2] = 0;
00324 xev.xclient.data.l[3] = 0;
00325 xev.xclient.data.l[4] = 0;
00326
00327 if (!XSendEvent(display, DefaultRootWindow(display), False, SubstructureRedirectMask | SubstructureNotifyMask, &xev))
00328 LFATAL("cannot bring X window to fullscreen");
00329 XSync(display, False);
00330
00331
00332 int fbfd = open("/dev/fb0", O_RDWR);
00333 if (!fbfd) LFATAL("cannot open framebuffer device");
00334
00335 size_t ssize = HRES * BPP / 8 * VRES;
00336
00337
00338 char* fbp = (char*)mmap(0, ssize, PROT_READ | PROT_WRITE, MAP_SHARED, fbfd, 0);
00339 if ((int)fbp == -1) LFATAL("failed to memory map framebuffer");
00340 intg16 *fbp16 = (intg16 *)fbp;
00341
00342
00343 struct omapfb_update_window update;
00344
00345
00346 update.x = 0;
00347 update.y = 0;
00348 update.width = HRES;
00349 update.height = VRES;
00350
00351
00352 update.format = OMAPFB_COLOR_RGB565 | OMAPFB_FORMAT_FLAG_TEARSYNC;
00353
00354
00355
00356
00357
00358
00359
00360 struct env_params envp;
00361 env_params_set_defaults(&envp);
00362
00363 envp.maxnorm_type = ENV_VCXNORM_MAXNORM;
00364 envp.scale_bits = 16;
00365 env_allocation_init(&malloc_thunk, &free);
00366
00367 struct env_visual_cortex ivc;
00368 env_visual_cortex_init(&ivc, &envp);
00369 struct env_dims indims; indims.w = GRABW; indims.h = GRABH;
00370 struct env_rgb_pixel* input = (struct env_rgb_pixel*)env_allocate(GRABW * GRABH * sizeof(struct env_rgb_pixel));
00371
00372
00373 int helpmode = 30; print_help(fbp16, true);
00374 bool helpmode2 = false; bool refresh = true;
00375
00376
00377
00378
00379 for (int fram = 0; ; ++fram)
00380 {
00381
00382 intg16* result = (intg16*)(itsMmapBuf[itsCurrentFrame]);
00383
00384 struct v4l2_buffer buf;
00385 memset(&buf, 0, sizeof(buf));
00386 buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
00387 buf.memory = V4L2_MEMORY_MMAP;
00388 buf.index = itsCurrentFrame;
00389
00390
00391
00392 if (itsGrabbing[itsCurrentFrame] == false) {
00393 if (ioctl(gfd, VIDIOC_QBUF, &buf)) PLFATAL("VIDIOC_QBUF (frame %d)", itsCurrentFrame);
00394 itsGrabbing[itsCurrentFrame] = true;
00395 }
00396
00397
00398 if (ioctl(gfd, VIDIOC_DQBUF, &buf) == -1) PLFATAL("VIDIOC_DQBUF (frame %d)", itsCurrentFrame);
00399 itsGrabbing[itsCurrentFrame] = false;
00400
00401
00402 itsGrabbing[itsCurrentFrame] = true;
00403 if (ioctl(gfd, VIDIOC_QBUF, &buf) < 0) PLFATAL("VIDIOC_QBUF (frame %d)", itsCurrentFrame);
00404
00405
00406 ++itsCurrentFrame;
00407 if (itsCurrentFrame >= nbuf) itsCurrentFrame = 0;
00408
00409
00410
00411
00412 unsigned char *in = ((unsigned char *)input) + 2;
00413 intg16 *data = result;
00414 for (uint ii = 0; ii < GRABW * GRABH; ++ii) {
00415 intg16 x = *data++;
00416 *in-- = ((byte(x)) << 3) & byte(0xF8);
00417 x >>= 3; *in-- = (byte(x)) & byte(0xFC);
00418 x >>= 5; *in = (byte(x)) & byte(0xF8);
00419 in += 5;
00420 }
00421
00422 struct timeval real1, real2;
00423 gettimeofday(&real1, 0);
00424
00425
00426 struct env_image ivcout = env_img_initializer;
00427 struct env_image intens = env_img_initializer;
00428 struct env_image color = env_img_initializer;
00429 struct env_image ori = env_img_initializer;
00430 #ifdef ENV_WITH_DYNAMIC_CHANNELS
00431 struct env_image flicker = env_img_initializer;
00432 struct env_image motion = env_img_initializer;
00433 #endif
00434
00435 env_visual_cortex_input(&ivc, &envp, "visualcortex", input, 0, indims, 0, 0, &ivcout, &intens, &color, &ori
00436 #ifdef ENV_WITH_DYNAMIC_CHANNELS
00437 , &flicker, &motion
00438 #endif
00439 );
00440
00441 env_visual_cortex_rescale_ranges(&ivcout, &intens, &color, &ori
00442 #ifdef ENV_WITH_DYNAMIC_CHANNELS
00443 , &flicker, &motion
00444 #endif
00445 );
00446
00447 gettimeofday(&real2, 0);
00448 const double real_secs = (real2.tv_sec - real1.tv_sec) + (real2.tv_usec - real1.tv_usec) / 1000000.0;
00449
00450 if (helpmode2 == false) {
00451 if (helpmode == 0) {
00452 char msg[255];
00453 sprintf(msg, "frame %06d, %0.2ffps", fram, 1.0 / real_secs);
00454 write_text(fbp16, msg, -1 , 464, TCOL);
00455 } else { if (--helpmode == 0) print_help(fbp16, false); }
00456 }
00457
00458
00459
00460
00461 intg16 *bf = fbp16;
00462 intg16 *img = result;
00463 for (uint jj = 0; jj < GRABH/4; ++jj) {
00464
00465 for (uint ii = 0; ii < GRABW/4; ++ii)
00466 { *bf++ = *img; *bf++ = *img++; *bf++ = *img++; *bf++ = *img++; *bf++ = *img++; } bf+=HRES-400; img-=GRABW;
00467 for (uint ii = 0; ii < GRABW/4; ++ii)
00468 { *bf++ = *img; *bf++ = *img++; *bf++ = *img++; *bf++ = *img++; *bf++ = *img++; } bf += HRES - 400;
00469
00470 for (uint ii = 0; ii < GRABW/4; ++ii)
00471 { *bf++ = *img; *bf++ = *img++; *bf++ = *img++; *bf++ = *img++; *bf++ = *img++; } bf += HRES - 400;
00472 for (uint ii = 0; ii < GRABW/4; ++ii)
00473 { *bf++ = *img; *bf++ = *img++; *bf++ = *img++; *bf++ = *img++; *bf++ = *img++; } bf += HRES - 400;
00474 for (uint ii = 0; ii < GRABW/4; ++ii)
00475 { *bf++ = *img; *bf++ = *img++; *bf++ = *img++; *bf++ = *img++; *bf++ = *img++; } bf += HRES - 400;
00476 }
00477
00478 draw_rect(fbp16, 0, 0, 400, 300, BCOL);
00479
00480
00481 draw_map(fbp16, &ivcout, 400, 0, 20);
00482 write_text(fbp16, "saliency map", 800 - 12*6-3, 3, TCOL);
00483 if (helpmode2 == false) {
00484 draw_map(fbp16, &intens, 0, 325, 8);
00485 draw_map(fbp16, &color, GRABW/2, 325, 8);
00486 draw_map(fbp16, &ori, GRABW, 325, 8);
00487 draw_map(fbp16, &flicker, (GRABW*3)/2, 325, 8);
00488 draw_map(fbp16, &motion, GRABW*2, 325, 8);
00489 }
00490
00491
00492 intg32 *sm = ivcout.pixels;
00493 env_size_t mx = 0, my = 0;
00494 intg32 mv = *sm;
00495 const env_size_t smw = ivcout.dims.w, smh = ivcout.dims.h;
00496 for (env_size_t j = 0; j < smh; ++j)
00497 for (env_size_t i = 0; i < smw; ++i)
00498 if (*sm > mv) { mv = *sm++; mx = i; my = j; } else ++sm;
00499 draw_filled_rect(fbp16, mx * 20+6, my * 20 + 6, 8, 8, SCOL);
00500 draw_rect(fbp16, mx * 20+5, my * 20 + 5, 10, 10, 0);
00501 draw_filled_rect(fbp16, mx * 20+6 + 400, my * 20 + 6, 8, 8, SCOL);
00502
00503
00504
00505 if (refresh) {
00506 write_text(fbp16, "intensity", 80 - 30, 314, TCOL);
00507 write_text(fbp16, "color", 240 - 15, 314, TCOL);
00508 write_text(fbp16, "orientation", 400 - 33, 314, TCOL);
00509 write_text(fbp16, "flicker", 560 - 18, 314, TCOL);
00510 write_text(fbp16, "motion", 720 - 15, 314, TCOL);
00511 }
00512
00513 env_img_make_empty(&ivcout);
00514 env_img_make_empty(&intens);
00515 env_img_make_empty(&color);
00516 env_img_make_empty(&ori);
00517 env_img_make_empty(&flicker);
00518 env_img_make_empty(&motion);
00519
00520
00521 XEvent event; refresh = false;
00522 if (XCheckWindowEvent(display, win, KeyPressMask, &event) == True && event.type == KeyPress) {
00523 if (event.xkey.keycode == 0x68 || event.xkey.keycode == 0x9) break;
00524 else if (event.xkey.keycode == 0x41) { helpmode2 = !helpmode2; print_help2(fbp16, helpmode2); helpmode = 0; if (helpmode2 == false) refresh = true; }
00525 else if (helpmode2 == false) { helpmode = 20; print_help(fbp16, true); }
00526 }
00527
00528
00529 ioctl(fbfd, OMAPFB_SYNC_GFX);
00530
00531
00532 ioctl (fbfd, OMAPFB_VSYNC);
00533
00534
00535 ioctl (fbfd, OMAPFB_UPDATE_WINDOW, &update);
00536
00537
00538 if (fram % 30 == 0) osso_display_blanking_pause(osso);
00539 }
00540
00541
00542 env_deallocate(input);
00543 env_visual_cortex_destroy(&ivc);
00544 env_allocation_cleanup();
00545
00546 munmap(fbp, ssize); close(fbfd);
00547
00548 typ = V4L2_BUF_TYPE_VIDEO_CAPTURE;
00549 if (ioctl(gfd, VIDIOC_STREAMOFF, &typ)) PLERROR("VIDIOC_STREAMOFF");
00550
00551 for (int i = 0; i < nbuf; i ++) munmap(itsMmapBuf[i], itsMmapBufSize[i]);
00552 delete [] itsMmapBuf; itsMmapBuf = NULL;
00553 delete [] itsMmapBufSize; itsMmapBufSize = NULL;
00554 close(gfd);
00555 delete [] itsGrabbing;
00556
00557
00558 XCloseDisplay (display);
00559
00560 osso_deinitialize(osso);
00561
00562 return 0;
00563 }