00001 /*!@file NeovisionII/objRec-ServerSift.C */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/NeovisionII/objRec-serverSift.C $ 00035 // $Id: objRec-serverSift.C 13901 2010-09-09 15:12:26Z lior $ 00036 // 00037 00038 #ifndef OBJREC_SERVERSIFT_C_DEFINED 00039 #define OBJREC_SERVERSIFT_C_DEFINED 00040 00041 #include <stdlib.h> 00042 #include <stdio.h> 00043 #include <signal.h> 00044 #include "Component/ModelManager.H" 00045 #include "Image/Image.H" 00046 #include "Image/ImageSet.H" 00047 #include "Image/ShapeOps.H" 00048 #include "Image/CutPaste.H" 00049 #include "Image/DrawOps.H" 00050 #include "Image/FilterOps.H" 00051 #include "Image/ColorOps.H" 00052 #include "Image/Transforms.H" 00053 #include "Image/MathOps.H" 00054 #include "Learn/Bayes.H" 00055 #include "GUI/DebugWin.H" 00056 #include "SIFT/ScaleSpace.H" 00057 #include "SIFT/VisualObject.H" 00058 #include "SIFT/Keypoint.H" 00059 #include "SIFT/VisualObjectDB.H" 00060 #include "NeovisionII/nv2_common.h" 00061 #include "NeovisionII/nv2_label_server.h" 00062 #include "rutz/fstring.h" 00063 #include "rutz/time.h" 00064 #include "rutz/timeformat.h" 00065 00066 #include <iostream> // for std::cin 00067 00068 const bool USECOLOR = false; 00069 00070 bool terminate = false; 00071 00072 void terminateProc(int s) 00073 { 00074 terminate = true; 00075 } 00076 00077 std::string matchObject(Image<PixRGB<byte> > &ima, VisualObjectDB& vdb, float &score) 00078 { 00079 //find object in the database 00080 std::vector< rutz::shared_ptr<VisualObjectMatch> > matches; 00081 rutz::shared_ptr<VisualObject> 00082 vo(new VisualObject("PIC", "PIC", ima, 00083 Point2D<int>(-1,-1), 00084 std::vector<float>(), 00085 std::vector< rutz::shared_ptr<Keypoint> >(), 00086 USECOLOR)); 00087 00088 const uint nmatches = vdb.getObjectMatches(vo, matches, VOMA_SIMPLE, 00089 100U, //max objs to return 00090 0.5F, //keypoint distance score default 0.5F 00091 0.5F, //affine distance score default 0.5F 00092 1.0F, //minscore default 1.0F 00093 3U, //min # of keypoint match 00094 100U, //keypoint selection thershold 00095 false //sort by preattentive 00096 ); 00097 00098 score = 0; 00099 float avgScore = 0, affineAvgDist = 0; 00100 int nkeyp = 0; 00101 int objId = -1; 00102 if (nmatches > 0) 00103 { 00104 rutz::shared_ptr<VisualObject> obj; //so we will have a ref to the last matches obj 00105 rutz::shared_ptr<VisualObjectMatch> vom; 00106 //for(unsigned int i=0; i< nmatches; i++){ 00107 for (unsigned int i = 0; i < 1; ++i) 00108 { 00109 vom = matches[i]; 00110 obj = vom->getVoTest(); 00111 score = vom->getScore(); 00112 nkeyp = vom->size(); 00113 avgScore = vom->getKeypointAvgDist(); 00114 affineAvgDist = vom->getAffineAvgDist(); 00115 00116 objId = atoi(obj->getName().c_str()+3); 00117 00118 return obj->getName(); 00119 LINFO("### Object match with '%s' score=%f ID:%i", 00120 obj->getName().c_str(), vom->getScore(), objId); 00121 00122 //calculate the actual distance (location of keypoints) between 00123 //keypoints. If the same patch was found, then the distance should 00124 //be close to 0 00125 double dist = 0; 00126 for (int keyp=0; keyp<nkeyp; keyp++) 00127 { 00128 const KeypointMatch kpm = vom->getKeypointMatch(keyp); 00129 00130 float refX = kpm.refkp->getX(); 00131 float refY = kpm.refkp->getY(); 00132 00133 float tstX = kpm.tstkp->getX(); 00134 float tstY = kpm.tstkp->getY(); 00135 dist += (refX-tstX) * (refX-tstX); 00136 dist += (refY-tstY) * (refY-tstY); 00137 } 00138 00139 // printf("%i:%s %i %f %i %f %f %f\n", objNum, obj->getName().c_str(), 00140 // nmatches, score, nkeyp, avgScore, affineAvgDist, sqrt(dist)); 00141 00142 //analizeImage(); 00143 } 00144 00145 } 00146 00147 return std::string("nomatch"); 00148 } 00149 00150 std::string getBestLabel(const std::deque<std::string>& labels, 00151 const size_t mincount) 00152 { 00153 if (labels.size() == 0) 00154 return std::string(); 00155 00156 std::map<std::string, size_t> counts; 00157 00158 size_t bestcount = 0; 00159 size_t bestpos = 0; 00160 00161 for (size_t i = 0; i < labels.size(); ++i) 00162 { 00163 const size_t c = ++(counts[labels[i]]); 00164 00165 if (c >= bestcount) 00166 { 00167 bestcount = c; 00168 bestpos = i; 00169 } 00170 } 00171 00172 if (bestcount >= mincount) 00173 return labels[bestpos]; 00174 00175 return std::string(); 00176 } 00177 00178 namespace 00179 { 00180 void fillRegion(Image<PixRGB<byte> >& img, PixRGB<byte> col, 00181 const int x0, const int x1, 00182 const int y0, const int y1) 00183 { 00184 for (int x = x0; x < x1; ++x) 00185 for (int y = y0; y < y1; ++y) 00186 img.setVal(x, y, col); 00187 } 00188 00189 Image<PixRGB<byte> > makeColorbars(const int w, const int h) 00190 { 00191 Image<PixRGB<byte> > result = Image<PixRGB<byte> >(w, h, ZEROS); 00192 00193 const PixRGB<byte> cols[] = 00194 { 00195 PixRGB<byte>(255, 255, 255), // white 00196 PixRGB<byte>(255, 255, 0), // yellow 00197 PixRGB<byte>(0, 255, 255), // cyan 00198 PixRGB<byte>(0, 255, 0), // green 00199 PixRGB<byte>(255, 0, 255), // magenta 00200 PixRGB<byte>(255, 0, 0), // red 00201 PixRGB<byte>(0, 0, 255) // blue 00202 }; 00203 00204 int x1 = 0; 00205 for (int i = 0; i < 7; ++i) 00206 { 00207 const int x0 = x1+1; 00208 x1 = int(double(w)*(i+1)/7.0 + 0.5); 00209 fillRegion(result, cols[i], 00210 x0, x1, 00211 0, int(h*2.0/3.0)); 00212 } 00213 00214 x1 = 0; 00215 for (int i = 0; i < 16; ++i) 00216 { 00217 const int x0 = x1; 00218 x1 = int(double(w)*(i+1)/16.0 + 0.5); 00219 const int gray = int(255.0*i/15.0 + 0.5); 00220 fillRegion(result, PixRGB<byte>(gray, gray, gray), 00221 x0, x1, 00222 int(h*2.0/3.0)+1, int(h*5.0/6.0)); 00223 } 00224 00225 fillRegion(result, PixRGB<byte>(255, 0, 0), 00226 0, w, 00227 int(h*5.0/6.0)+1, h); 00228 00229 writeText(result, Point2D<int>(1, int(h*5.0/6.0)+2), 00230 "iLab Neuromorphic Vision", 00231 PixRGB<byte>(0, 0, 0), PixRGB<byte>(255, 0, 0), 00232 SimpleFont::FIXED(10)); 00233 00234 return result; 00235 } 00236 00237 Image<PixRGB<byte> > addLabels(const Image<PixRGB<byte> >& templ, 00238 const int fnum) 00239 { 00240 Image<PixRGB<byte> > result = templ; 00241 00242 std::string fnumstr = sformat("%06d", fnum); 00243 writeText(result, Point2D<int>(1, 1), 00244 fnumstr.c_str(), 00245 PixRGB<byte>(0, 0, 0), PixRGB<byte>(255, 255, 255), 00246 SimpleFont::FIXED(10)); 00247 00248 rutz::time t = rutz::time::wall_clock_now(); 00249 00250 writeText(result, Point2D<int>(1, result.getHeight() - 14), 00251 rutz::format_time(t).c_str(), 00252 PixRGB<byte>(32, 32, 32), PixRGB<byte>(255, 0, 0), 00253 SimpleFont::FIXED(6)); 00254 00255 return result; 00256 } 00257 } 00258 00259 int main(const int argc, const char **argv) 00260 { 00261 00262 MYLOGVERB = LOG_INFO; 00263 ModelManager mgr("Test ObjRec"); 00264 00265 if (mgr.parseCommandLine(argc, argv, "<vdb file> <server ip>", 2, 2) == false) 00266 return 1; 00267 00268 mgr.start(); 00269 00270 // catch signals and redirect them to terminate for clean exit: 00271 signal(SIGHUP, terminateProc); signal(SIGINT, terminateProc); 00272 signal(SIGQUIT, terminateProc); signal(SIGTERM, terminateProc); 00273 signal(SIGALRM, terminateProc); 00274 00275 //get command line options 00276 const std::string vdbFile = mgr.getExtraArg(0); 00277 const std::string server_ip = mgr.getExtraArg(1); 00278 bool train = false; 00279 00280 LINFO("Loading db from %s\n", vdbFile.c_str()); 00281 VisualObjectDB vdb; 00282 vdb.loadFrom(vdbFile); 00283 00284 XWinManaged xwin(Dims(256,256), 00285 -1, -1, "ILab Robot Head Demo"); 00286 00287 00288 struct nv2_label_server* labelServer = 00289 nv2_label_server_create(9930, 00290 server_ip.c_str(), 00291 9931); 00292 00293 nv2_label_server_set_verbosity(labelServer,1); //allow warnings 00294 00295 00296 const size_t max_label_history = 1; 00297 std::deque<std::string> recent_labels; 00298 00299 Image<PixRGB<byte> > colorbars = makeColorbars(256, 256); 00300 00301 while (!terminate) 00302 { 00303 Point2D<int> clickLoc = xwin.getLastMouseClick(); 00304 if (clickLoc.isValid()) 00305 train = !train; 00306 00307 struct nv2_image_patch p; 00308 const enum nv2_image_patch_result res = 00309 nv2_label_server_get_current_patch(labelServer, &p); 00310 00311 std::string objName; 00312 if (res == NV2_IMAGE_PATCH_END) 00313 { 00314 LINFO("ok, quitting"); 00315 break; 00316 } 00317 else if (res == NV2_IMAGE_PATCH_NONE) 00318 { 00319 usleep(10000); 00320 continue; 00321 } 00322 else if (res == NV2_IMAGE_PATCH_VALID) 00323 { 00324 if (p.type != NV2_PIXEL_TYPE_RGB24) 00325 { 00326 LINFO("got a non-rgb24 patch; ignoring %i", p.type); 00327 continue; 00328 } 00329 00330 if (p.width * p.height == 1) 00331 { 00332 xwin.drawImage(addLabels(colorbars, p.id)); 00333 continue; 00334 } 00335 00336 Image<PixRGB<byte> > img(p.width, p.height, NO_INIT); 00337 memcpy(img.getArrayPtr(), p.data, p.width*p.height*3); 00338 00339 Image<PixRGB<byte> > inputImg = rescale(img, 256, 256); 00340 00341 xwin.drawImage(inputImg); 00342 float score = 0; 00343 std::string objName = matchObject(inputImg, vdb, score); 00344 00345 if (objName == "nomatch") 00346 { 00347 recent_labels.resize(0); 00348 00349 if (train) 00350 { 00351 LINFO("Enter a label for this object:\n"); 00352 std::getline(std::cin, objName); 00353 LINFO("You typed '%s'\n", objName.c_str()); 00354 00355 if (objName == "exit") 00356 break; 00357 else if (objName != "") 00358 { 00359 rutz::shared_ptr<VisualObject> 00360 vo(new VisualObject(objName.c_str(), "NULL", inputImg, 00361 Point2D<int>(-1,-1), 00362 std::vector<float>(), 00363 std::vector< rutz::shared_ptr<Keypoint> >(), 00364 USECOLOR)); 00365 vdb.addObject(vo); 00366 vdb.saveTo(vdbFile); 00367 } 00368 } 00369 } 00370 else 00371 { 00372 recent_labels.push_back(objName); 00373 while (recent_labels.size() > max_label_history) 00374 recent_labels.pop_front(); 00375 00376 const std::string bestObjName = 00377 getBestLabel(recent_labels, 1); 00378 00379 if (bestObjName.size() > 0) 00380 { 00381 struct nv2_patch_label l; 00382 l.protocol_version = NV2_LABEL_PROTOCOL_VERSION; 00383 l.patch_id = p.id; 00384 l.confidence = (int)(score*100.0F); 00385 snprintf(l.source, sizeof(l.source), "%s", 00386 "ObjRec"); 00387 snprintf(l.name, sizeof(l.name), "%s", 00388 objName.c_str()); 00389 snprintf(l.extra_info, sizeof(l.extra_info), 00390 "%ux%u #%u", 00391 (unsigned int) p.width, 00392 (unsigned int) p.height, 00393 (unsigned int) p.id); 00394 00395 nv2_label_server_send_label(labelServer, &l); 00396 00397 LINFO("sent label '%s (%s)'\n", l.name, l.extra_info); 00398 } 00399 } 00400 00401 nv2_image_patch_destroy(&p); 00402 } 00403 00404 } 00405 00406 if (terminate) 00407 LINFO("Ending application because a signal was caught"); 00408 00409 nv2_label_server_destroy(labelServer); 00410 } 00411 00412 00413 // ###################################################################### 00414 /* So things look consistent in everyone's emacs... */ 00415 /* Local Variables: */ 00416 /* indent-tabs-mode: nil */ 00417 /* End: */ 00418 00419 #endif