00001 /*!@file NeovisionII/objRec-ServerPCA.C */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/NeovisionII/objRec-serverPCA.C $ 00035 // $Id: objRec-serverPCA.C 13901 2010-09-09 15:12:26Z lior $ 00036 // 00037 00038 #ifndef OBJREC_SERVERPCA_C_DEFINED 00039 #define OBJREC_SERVERPCA_C_DEFINED 00040 00041 #include "Image/OpenCVUtil.H" 00042 #include <stdlib.h> 00043 #include <stdio.h> 00044 #include <signal.h> 00045 #include "Component/ModelManager.H" 00046 #include "Image/Image.H" 00047 #include "Image/ImageSet.H" 00048 #include "Image/ShapeOps.H" 00049 #include "Image/CutPaste.H" 00050 #include "Image/DrawOps.H" 00051 #include "Image/FilterOps.H" 00052 #include "Image/ColorOps.H" 00053 #include "Image/Transforms.H" 00054 #include "Image/MathOps.H" 00055 #include "Learn/Bayes.H" 00056 #include "GUI/DebugWin.H" 00057 #include "SIFT/ScaleSpace.H" 00058 #include "SIFT/VisualObject.H" 00059 #include "SIFT/Keypoint.H" 00060 #include "SIFT/VisualObjectDB.H" 00061 #include "NeovisionII/nv2_common.h" 00062 #include "NeovisionII/nv2_label_server.h" 00063 #include "rutz/fstring.h" 00064 #include "rutz/time.h" 00065 #include "rutz/timeformat.h" 00066 #include "Media/FrameSeries.H" 00067 #include "Transport/FrameInfo.H" 00068 #include "Raster/GenericFrame.H" 00069 #include "GUI/XWinManaged.H" 00070 #include "GUI/ImageDisplayStream.H" 00071 00072 #include <iostream> // for std::cin 00073 00074 const bool USECOLOR = false; 00075 00076 bool terminate = false; 00077 00078 void terminateProc(int s) 00079 { 00080 terminate = true; 00081 } 00082 00083 std::string getBestLabel(const std::deque<std::string>& labels, 00084 const size_t mincount) 00085 { 00086 if (labels.size() == 0) 00087 return std::string(); 00088 00089 std::map<std::string, size_t> counts; 00090 00091 size_t bestcount = 0; 00092 size_t bestpos = 0; 00093 00094 for (size_t i = 0; i < labels.size(); ++i) 00095 { 00096 const size_t c = ++(counts[labels[i]]); 00097 00098 if (c >= bestcount) 00099 { 00100 bestcount = c; 00101 bestpos = i; 00102 } 00103 } 00104 00105 if (bestcount >= mincount) 00106 return labels[bestpos]; 00107 00108 return std::string(); 00109 } 00110 00111 namespace 00112 { 00113 void fillRegion(Image<PixRGB<byte> >& img, PixRGB<byte> col, 00114 const int x0, const int x1, 00115 const int y0, const int y1) 00116 { 00117 for (int x = x0; x < x1; ++x) 00118 for (int y = y0; y < y1; ++y) 00119 img.setVal(x, y, col); 00120 } 00121 00122 Image<PixRGB<byte> > makeColorbars(const int w, const int h) 00123 { 00124 Image<PixRGB<byte> > result = Image<PixRGB<byte> >(w, h, ZEROS); 00125 00126 const PixRGB<byte> cols[] = 00127 { 00128 PixRGB<byte>(255, 255, 255), // white 00129 PixRGB<byte>(255, 255, 0), // yellow 00130 PixRGB<byte>(0, 255, 255), // cyan 00131 PixRGB<byte>(0, 255, 0), // green 00132 PixRGB<byte>(255, 0, 255), // magenta 00133 PixRGB<byte>(255, 0, 0), // red 00134 PixRGB<byte>(0, 0, 255) // blue 00135 }; 00136 00137 int x1 = 0; 00138 for (int i = 0; i < 7; ++i) 00139 { 00140 const int x0 = x1+1; 00141 x1 = int(double(w)*(i+1)/7.0 + 0.5); 00142 fillRegion(result, cols[i], 00143 x0, x1, 00144 0, int(h*2.0/3.0)); 00145 } 00146 00147 x1 = 0; 00148 for (int i = 0; i < 16; ++i) 00149 { 00150 const int x0 = x1; 00151 x1 = int(double(w)*(i+1)/16.0 + 0.5); 00152 const int gray = int(255.0*i/15.0 + 0.5); 00153 fillRegion(result, PixRGB<byte>(gray, gray, gray), 00154 x0, x1, 00155 int(h*2.0/3.0)+1, int(h*5.0/6.0)); 00156 } 00157 00158 fillRegion(result, PixRGB<byte>(255, 0, 0), 00159 0, w, 00160 int(h*5.0/6.0)+1, h); 00161 00162 writeText(result, Point2D<int>(1, int(h*5.0/6.0)+2), 00163 "iLab Neuromorphic Vision", 00164 PixRGB<byte>(0, 0, 0), PixRGB<byte>(255, 0, 0), 00165 SimpleFont::FIXED(10)); 00166 00167 return result; 00168 } 00169 00170 Image<PixRGB<byte> > addLabels(const Image<PixRGB<byte> >& templ, 00171 const int fnum) 00172 { 00173 Image<PixRGB<byte> > result = templ; 00174 00175 std::string fnumstr = sformat("%06d", fnum); 00176 writeText(result, Point2D<int>(1, 1), 00177 fnumstr.c_str(), 00178 PixRGB<byte>(0, 0, 0), PixRGB<byte>(255, 255, 255), 00179 SimpleFont::FIXED(10)); 00180 00181 rutz::time t = rutz::time::wall_clock_now(); 00182 00183 writeText(result, Point2D<int>(1, result.getHeight() - 14), 00184 rutz::format_time(t).c_str(), 00185 PixRGB<byte>(32, 32, 32), PixRGB<byte>(255, 0, 0), 00186 SimpleFont::FIXED(6)); 00187 00188 return result; 00189 } 00190 } 00191 00192 void trainPCA(ImageSet<byte> images) 00193 { 00194 int imagesCollected = 10; 00195 IplImage* input[imagesCollected]; 00196 00197 //IplImage* input = img2ipl(images[0]); 00198 CvMat* pcaInputs = cvCreateMat(imagesCollected, (input[0]->width * input[0]->height), CV_8UC1); 00199 CvMat* average = cvCreateMat(1, (input[0]->width * input[0]->height), CV_32FC1); 00200 CvMat* eigenValues = cvCreateMat(1, std::min(pcaInputs->rows, pcaInputs->cols), CV_32FC1); 00201 CvMat* eigens = cvCreateMat(imagesCollected, (input[0]->width * input[0]->height), CV_32FC1); 00202 CvMat* coefficients = cvCreateMat(imagesCollected, eigens->rows, CV_32FC1); 00203 00204 // construct required structures for later recognition 00205 00206 CvMat* recogniseCoeffs = cvCreateMat(1, eigens->rows, CV_32FC1); 00207 CvMat* recognise = cvCreateMat(1, input[0]->width * input[0]->height, CV_8UC1); 00208 00209 for (int i = 0; i < imagesCollected; i++){ 00210 for (int j = 0; j < (input[0]->width * input[0]->height); j++){ 00211 CV_MAT_ELEM(*pcaInputs, uchar, i, j) = (input[i])->imageData[(j)]; 00212 } 00213 } 00214 00215 // compute eigen image representation 00216 00217 cvCalcPCA(pcaInputs, average, eigenValues, eigens, CV_PCA_DATA_AS_ROW); 00218 00219 // compute eigen. co-efficients for all sample images and store 00220 00221 cvProjectPCA(pcaInputs, average, eigens, coefficients); 00222 00223 for (int i = 0; i < imagesCollected; i++){cvReleaseImage( &(input[i]));} 00224 00225 // release matrix objects 00226 00227 cvReleaseMat( &pcaInputs); 00228 cvReleaseMat( &average ); 00229 cvReleaseMat( &eigenValues ); 00230 cvReleaseMat( &eigens ); 00231 cvReleaseMat( &coefficients ); 00232 cvReleaseMat( &recogniseCoeffs ); 00233 cvReleaseMat( &recognise ); 00234 00235 } 00236 00237 std::string recogPCA(Image<byte> img) 00238 { 00239 00240 // project image to eigen space 00241 00242 00243 // for (int j = 0; j < (input[0]->width * input[0]->height); j++){ 00244 // CV_MAT_ELEM(*recognise, uchar, 0, j) = (grayImg)->imageData[(j)]; 00245 // } 00246 // 00247 // cvProjectPCA(recognise, average, eigens, recogniseCoeffs); 00248 // 00249 // // check which set of stored sample co-efficients it is 00250 // // closest too and then display the corresponding image 00251 // 00252 // double closestCoeffDistance = HUGE; 00253 // int closestImage = 0; 00254 // 00255 // for (int i = 0; i < imagesCollected; i++) 00256 // { 00257 // double diff = 0; 00258 // for(int j = 0; j < recogniseCoeffs->cols; j++) 00259 // { 00260 // diff += fabs(CV_MAT_ELEM(*coefficients, float, i, j) 00261 // - CV_MAT_ELEM(*recogniseCoeffs, float, 0, j)); 00262 // } 00263 // if (diff < closestCoeffDistance){ 00264 // closestCoeffDistance = diff; 00265 // closestImage = i; 00266 // 00267 // } 00268 // } 00269 00270 return std::string("nomatch"); 00271 } 00272 00273 00274 Point2D<int> getMouseClick(nub::ref<OutputFrameSeries> &ofs, const char* wname) 00275 { 00276 const nub::soft_ref<ImageDisplayStream> ids = 00277 ofs->findFrameDestType<ImageDisplayStream>(); 00278 00279 const rutz::shared_ptr<XWinManaged> uiwin = 00280 ids.is_valid() 00281 ? ids->getWindow(wname) 00282 : rutz::shared_ptr<XWinManaged>(); 00283 00284 if (uiwin.is_valid()) 00285 return uiwin->getLastMouseClick(); 00286 else 00287 return Point2D<int>(-1,-1); 00288 } 00289 00290 int main(const int argc, const char **argv) 00291 { 00292 00293 MYLOGVERB = LOG_INFO; 00294 ModelManager mgr("Test ObjRec"); 00295 00296 nub::ref<OutputFrameSeries> ofs(new OutputFrameSeries(mgr)); 00297 mgr.addSubComponent(ofs); 00298 00299 nub::ref<InputFrameSeries> ifs(new InputFrameSeries(mgr)); 00300 mgr.addSubComponent(ifs); 00301 00302 00303 00304 00305 if (mgr.parseCommandLine(argc, argv, "<vdb file> <server ip>", 2, 2) == false) 00306 return 1; 00307 00308 mgr.start(); 00309 00310 // catch signals and redirect them to terminate for clean exit: 00311 signal(SIGHUP, terminateProc); signal(SIGINT, terminateProc); 00312 signal(SIGQUIT, terminateProc); signal(SIGTERM, terminateProc); 00313 signal(SIGALRM, terminateProc); 00314 00315 //get command line options 00316 const std::string vdbFile = mgr.getExtraArg(0); 00317 const std::string server_ip = mgr.getExtraArg(1); 00318 bool train = false; 00319 00320 struct nv2_label_server* labelServer = 00321 nv2_label_server_create(9930, 00322 server_ip.c_str(), 00323 9931); 00324 00325 nv2_label_server_set_verbosity(labelServer,1); //allow warnings 00326 00327 00328 const size_t max_label_history = 1; 00329 std::deque<std::string> recent_labels; 00330 00331 Image<PixRGB<byte> > colorbars = makeColorbars(256, 256); 00332 00333 bool getImgFromFile = true; 00334 while (!terminate) 00335 { 00336 00337 Image<PixRGB<byte> > inputImg; 00338 struct nv2_image_patch p; 00339 00340 if (getImgFromFile) 00341 { 00342 00343 const FrameState is = ifs->updateNext(); 00344 if (is == FRAME_COMPLETE) return 0; 00345 GenericFrame input = ifs->readFrame(); 00346 inputImg = input.asRgb(); 00347 } else { 00348 00349 const enum nv2_image_patch_result res = 00350 nv2_label_server_get_current_patch(labelServer, &p); 00351 00352 std::string objName; 00353 if (res == NV2_IMAGE_PATCH_END) 00354 { 00355 LINFO("ok, quitting"); 00356 break; 00357 } 00358 else if (res == NV2_IMAGE_PATCH_NONE) 00359 { 00360 usleep(10000); 00361 continue; 00362 } 00363 else if (res == NV2_IMAGE_PATCH_VALID) 00364 { 00365 if (p.type != NV2_PIXEL_TYPE_RGB24) 00366 { 00367 LINFO("got a non-rgb24 patch; ignoring %i", p.type); 00368 continue; 00369 } 00370 00371 if (p.width * p.height == 1) 00372 { 00373 //xwin.drawImage(addLabels(colorbars, p.id)); 00374 continue; 00375 } 00376 00377 Image<PixRGB<byte> > img(p.width, p.height, NO_INIT); 00378 memcpy(img.getArrayPtr(), p.data, p.width*p.height*3); 00379 00380 inputImg = rescale(img, 256, 256); 00381 } 00382 } 00383 00384 float score = 0; 00385 std::string objName = "nomatch"; 00386 00387 if (inputImg.initialized()) 00388 { 00389 ofs->writeRGB(inputImg, "object", FrameInfo("object", SRC_POS)); 00390 getchar(); 00391 ofs->updateNext(); 00392 00393 Point2D<int> clickLoc = getMouseClick(ofs, "object"); 00394 if (clickLoc.isValid()) 00395 train = !train; 00396 00397 00398 if (objName == "nomatch") 00399 { 00400 recent_labels.resize(0); 00401 00402 if (train) 00403 { 00404 printf("Enter a label for this object:\n"); 00405 std::getline(std::cin, objName); 00406 printf("You typed '%s'\n", objName.c_str()); 00407 00408 if (objName == "exit") 00409 break; 00410 else if (objName != "") 00411 { 00412 //Train object with objName 00413 } 00414 } 00415 } 00416 } 00417 00418 if (objName != "nomatch" && !getImgFromFile) 00419 { 00420 recent_labels.push_back(objName); 00421 while (recent_labels.size() > max_label_history) 00422 recent_labels.pop_front(); 00423 00424 const std::string bestObjName = 00425 getBestLabel(recent_labels, 1); 00426 00427 if (bestObjName.size() > 0) 00428 { 00429 struct nv2_patch_label l; 00430 l.protocol_version = NV2_LABEL_PROTOCOL_VERSION; 00431 l.patch_id = p.id; 00432 l.confidence = (int)(score*100.0F); 00433 snprintf(l.source, sizeof(l.source), "%s", 00434 "ObjRec"); 00435 snprintf(l.name, sizeof(l.name), "%s", 00436 objName.c_str()); 00437 snprintf(l.extra_info, sizeof(l.extra_info), 00438 "%ux%u #%u", 00439 (unsigned int) p.width, 00440 (unsigned int) p.height, 00441 (unsigned int) p.id); 00442 00443 nv2_label_server_send_label(labelServer, &l); 00444 00445 LINFO("sent label '%s (%s)'\n", l.name, l.extra_info); 00446 } 00447 nv2_image_patch_destroy(&p); 00448 } 00449 } 00450 00451 if (terminate) 00452 LINFO("Ending application because a signal was caught"); 00453 00454 nv2_label_server_destroy(labelServer); 00455 } 00456 00457 00458 // ###################################################################### 00459 /* So things look consistent in everyone's emacs... */ 00460 /* Local Variables: */ 00461 /* indent-tabs-mode: nil */ 00462 /* End: */ 00463 00464 #endif