00001 /*!@file NeovisionII/objRec-ServerSift.C */ 00002 00003 // //////////////////////////////////////////////////////////////////// // 00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005 // 00005 // by the University of Southern California (USC) and the iLab at USC. // 00006 // See http://iLab.usc.edu for information about this project. // 00007 // //////////////////////////////////////////////////////////////////// // 00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected // 00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency // 00010 // in Visual Environments, and Applications'' by Christof Koch and // 00011 // Laurent Itti, California Institute of Technology, 2001 (patent // 00012 // pending; application number 09/912,225 filed July 23, 2001; see // 00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status). // 00014 // //////////////////////////////////////////////////////////////////// // 00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit. // 00016 // // 00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can // 00018 // redistribute it and/or modify it under the terms of the GNU General // 00019 // Public License as published by the Free Software Foundation; either // 00020 // version 2 of the License, or (at your option) any later version. // 00021 // // 00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope // 00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the // 00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // 00025 // PURPOSE. See the GNU General Public License for more details. // 00026 // // 00027 // You should have received a copy of the GNU General Public License // 00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write // 00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, // 00030 // Boston, MA 02111-1307 USA. // 00031 // //////////////////////////////////////////////////////////////////// // 00032 // 00033 // Primary maintainer for this file: John McInerney <jmcinerney6@gmail.com> 00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/CUDASIFT/cudasift-server.C $ 00035 // $Id: cudasift-server.C 14295 2010-12-02 20:02:32Z itti $ 00036 // 00037 00038 00039 #ifndef OBJREC_CUDASIFTSERVER_C_DEFINED 00040 #define OBJREC_CUDASIFTSERVER_C_DEFINED 00041 00042 #include <signal.h> 00043 #include "Component/ModelManager.H" 00044 #include "Image/Image.H" 00045 #include "Image/ImageSet.H" 00046 #include "Image/ShapeOps.H" 00047 #include "Image/CutPaste.H" 00048 #include "Image/DrawOps.H" 00049 #include "Image/FilterOps.H" 00050 #include "Image/ColorOps.H" 00051 #include "Image/Transforms.H" 00052 #include "Image/MathOps.H" 00053 #include "Learn/Bayes.H" 00054 #include "GUI/DebugWin.H" 00055 #include "SIFT/ScaleSpace.H" 00056 #include "SIFT/VisualObject.H" 00057 #include "SIFT/Keypoint.H" 00058 #include "SIFT/VisualObjectDB.H" 00059 //#include "CUDASIFT/CUDAVisualObjectDB.H" 00060 #include "CUDASIFT/CUDAVisualObject.H" 00061 #include "NeovisionII/nv2_common.h" 00062 #include "NeovisionII/nv2_label_server.h" 00063 #include "rutz/fstring.h" 00064 #include "rutz/time.h" 00065 #include "rutz/timeformat.h" 00066 00067 #include "CUDASIFT/tpimageutil.h" 00068 #include "CUDASIFT/tpimage.h" 00069 #include "CUDASIFT/cudaImage.h" 00070 #include "CUDASIFT/cudaSift.h" 00071 #include "CUDASIFT/cudaSiftH.h" //This one is an addition and null 00072 00073 #include <iostream> // for std::cin 00074 00075 const bool USECOLOR = false; 00076 00077 bool terminate = false; 00078 00079 void terminateProc(int s) 00080 { 00081 terminate = true; 00082 } 00083 00084 std::string matchObject(Image<PixRGB<byte> > &ima, VisualObjectDB& vdb, float &score) 00085 { 00086 std::vector< rutz::shared_ptr<VisualObjectMatch> > matches; 00087 #ifdef GPUSIFT 00088 rutz::shared_ptr<CUDAVisualObject> 00089 vo(new CUDAVisualObject("PIC", "PIC", ima, 00090 Point2D<int>(-1,-1), 00091 std::vector<float>(), 00092 std::vector< rutz::shared_ptr<Keypoint> >(), 00093 false,true)); 00094 #else 00095 rutz::shared_ptr<VisualObject> 00096 vo(new VisualObject("PIC", "PIC", ima, 00097 Point2D<int>(-1,-1), 00098 std::vector<float>(), 00099 std::vector< rutz::shared_ptr<Keypoint> >(), 00100 false,true)); 00101 #endif 00102 00103 const uint nmatches = vdb.getObjectMatches(vo, matches, VOMA_SIMPLE, 00104 100U, //max objs to return 00105 0.5F, //keypoint distance score default 0.5F 00106 0.5F, //affine distance score default 0.5F 00107 1.0F, //minscore default 1.0F 00108 3U, //min # of keypoint match 00109 100U, //keypoint selection threshold 00110 false //sort by preattentive 00111 ); 00112 score = 0; 00113 float avgScore = 0, affineAvgDist = 0; 00114 int nkeyp = 0; 00115 int objId = -1; 00116 //rutz::shared_ptr<VisualObject> bestobj; 00117 //double bestdist = 100000000.0; 00118 //int bestobjId = -1; 00119 if (nmatches > 0) 00120 { 00121 //Shouldn't this be a CUDAVisualObject? Make computkeypoints virtual? 00122 rutz::shared_ptr<VisualObject> obj; 00123 rutz::shared_ptr<VisualObjectMatch> vom; 00124 //for (unsigned int i = 0; i < nmatches; ++i) 00125 for (unsigned int i = 0; i < 1; ++i) //Loop just once, sorted? 00126 { 00127 vom = matches[i]; 00128 obj = vom->getVoTest(); 00129 score = vom->getScore(); 00130 nkeyp = vom->size(); 00131 avgScore = vom->getKeypointAvgDist(); 00132 affineAvgDist = vom->getAffineAvgDist(); 00133 00134 objId = atoi(obj->getName().c_str()+3); 00135 // Pick off the prototype name from full path 00136 std::string fullpath = obj->getName(); 00137 std::string::size_type spos = fullpath.find_last_of('/'); 00138 std::string protoname = fullpath.substr(0,spos); 00139 spos = protoname.find_last_of('/'); 00140 protoname = protoname.substr(spos+1); 00141 std::cout << "protoname = " << protoname << std::endl; 00142 00143 LINFO("### Object match with '%s' score=%f ID:%i", 00144 obj->getName().c_str(), vom->getScore(), objId); 00145 return protoname; 00146 } 00147 } 00148 00149 return std::string("nomatch"); 00150 } 00151 00152 std::string getBestLabel(const std::deque<std::string>& labels, 00153 const size_t mincount) 00154 { 00155 if (labels.size() == 0) 00156 return std::string(); 00157 00158 std::map<std::string, size_t> counts; 00159 00160 size_t bestcount = 0; 00161 size_t bestpos = 0; 00162 00163 for (size_t i = 0; i < labels.size(); ++i) 00164 { 00165 const size_t c = ++(counts[labels[i]]); 00166 00167 if (c >= bestcount) 00168 { 00169 bestcount = c; 00170 bestpos = i; 00171 } 00172 } 00173 00174 if (bestcount >= mincount) 00175 return labels[bestpos]; 00176 00177 return std::string(); 00178 } 00179 00180 namespace 00181 { 00182 void fillRegion(Image<PixRGB<byte> >& img, PixRGB<byte> col, 00183 const int x0, const int x1, 00184 const int y0, const int y1) 00185 { 00186 for (int x = x0; x < x1; ++x) 00187 for (int y = y0; y < y1; ++y) 00188 img.setVal(x, y, col); 00189 } 00190 00191 Image<PixRGB<byte> > makeColorbars(const int w, const int h) 00192 { 00193 Image<PixRGB<byte> > result = Image<PixRGB<byte> >(w, h, ZEROS); 00194 00195 const PixRGB<byte> cols[] = 00196 { 00197 PixRGB<byte>(255, 255, 255), // white 00198 PixRGB<byte>(255, 255, 0), // yellow 00199 PixRGB<byte>(0, 255, 255), // cyan 00200 PixRGB<byte>(0, 255, 0), // green 00201 PixRGB<byte>(255, 0, 255), // magenta 00202 PixRGB<byte>(255, 0, 0), // red 00203 PixRGB<byte>(0, 0, 255) // blue 00204 }; 00205 00206 int x1 = 0; 00207 for (int i = 0; i < 7; ++i) 00208 { 00209 const int x0 = x1+1; 00210 x1 = int(double(w)*(i+1)/7.0 + 0.5); 00211 fillRegion(result, cols[i], 00212 x0, x1, 00213 0, int(h*2.0/3.0)); 00214 } 00215 00216 x1 = 0; 00217 for (int i = 0; i < 16; ++i) 00218 { 00219 const int x0 = x1; 00220 x1 = int(double(w)*(i+1)/16.0 + 0.5); 00221 const int gray = int(255.0*i/15.0 + 0.5); 00222 fillRegion(result, PixRGB<byte>(gray, gray, gray), 00223 x0, x1, 00224 int(h*2.0/3.0)+1, int(h*5.0/6.0)); 00225 } 00226 00227 fillRegion(result, PixRGB<byte>(255, 0, 0), 00228 0, w, 00229 int(h*5.0/6.0)+1, h); 00230 00231 writeText(result, Point2D<int>(1, int(h*5.0/6.0)+2), 00232 "iLab Neuromorphic Vision", 00233 PixRGB<byte>(0, 0, 0), PixRGB<byte>(255, 0, 0), 00234 SimpleFont::FIXED(10)); 00235 00236 return result; 00237 } 00238 00239 Image<PixRGB<byte> > addLabels(const Image<PixRGB<byte> >& templ, 00240 const int fnum) 00241 { 00242 Image<PixRGB<byte> > result = templ; 00243 00244 std::string fnumstr = sformat("%06d", fnum); 00245 writeText(result, Point2D<int>(1, 1), 00246 fnumstr.c_str(), 00247 PixRGB<byte>(0, 0, 0), PixRGB<byte>(255, 255, 255), 00248 SimpleFont::FIXED(10)); 00249 00250 rutz::time t = rutz::time::wall_clock_now(); 00251 00252 writeText(result, Point2D<int>(1, result.getHeight() - 14), 00253 rutz::format_time(t).c_str(), 00254 PixRGB<byte>(32, 32, 32), PixRGB<byte>(255, 0, 0), 00255 SimpleFont::FIXED(6)); 00256 00257 return result; 00258 } 00259 } 00260 00261 int main(const int argc, const char **argv) 00262 { 00263 //CudaImage cfimage; 00264 00265 MYLOGVERB = LOG_INFO; 00266 ModelManager mgr("Test ObjRec"); 00267 00268 if (mgr.parseCommandLine(argc, argv, "<cudadev> <vdb file> <localport> <server ip> <serverport>", 5, 5) == false) 00269 return 1; 00270 00271 mgr.start(); 00272 00273 // catch signals and redirect them to terminate for clean exit: 00274 signal(SIGHUP, terminateProc); signal(SIGINT, terminateProc); 00275 signal(SIGQUIT, terminateProc); signal(SIGTERM, terminateProc); 00276 signal(SIGALRM, terminateProc); 00277 00278 //get command line options 00279 const std::string devArg = mgr.getExtraArg(0); 00280 const std::string vdbFile = mgr.getExtraArg(1); 00281 const std::string localPortStr = mgr.getExtraArg(2); 00282 const std::string serverIP = mgr.getExtraArg(3); 00283 const std::string serverPortStr = mgr.getExtraArg(4); 00284 00285 bool train = false; 00286 00287 int dev = strtol(devArg.c_str(),NULL,0); 00288 std::cout << "device = " << dev << std::endl; 00289 cudaSetDevice(dev); 00290 //InitCuda(argc,argv); 00291 00292 LINFO("Loading db from %s\n", vdbFile.c_str()); 00293 VisualObjectDB vdb; 00294 vdb.loadFrom(vdbFile,false); 00295 00296 //XWinManaged xwin(Dims(640,280), 00297 //XWinManaged xwin(Dims(532,532), 00298 XWinManaged xwin(Dims(256,256), 00299 -1, -1, "ILab NeoVision2 CUDASIFT Demo"); 00300 00301 int serverPort = strtol(serverPortStr.c_str(),NULL,0); 00302 int localPort = strtol(localPortStr.c_str(),NULL,0); 00303 00304 struct nv2_label_server* labelServer = 00305 nv2_label_server_create(localPort, 00306 serverIP.c_str(), 00307 serverPort); 00308 00309 nv2_label_server_set_verbosity(labelServer,1); //allow warnings 00310 00311 00312 const size_t max_label_history = 1; 00313 std::deque<std::string> recent_labels; 00314 00315 Image<PixRGB<byte> > colorbars = makeColorbars(256, 256); 00316 00317 while (!terminate) 00318 { 00319 Point2D<int> clickLoc = xwin.getLastMouseClick(); 00320 if (clickLoc.isValid()) 00321 train = !train; 00322 00323 struct nv2_image_patch p; 00324 const enum nv2_image_patch_result res = 00325 nv2_label_server_get_current_patch(labelServer, &p); 00326 00327 std::string objName; 00328 if (res == NV2_IMAGE_PATCH_END) 00329 { 00330 LINFO("ok, quitting"); 00331 break; 00332 } 00333 else if (res == NV2_IMAGE_PATCH_NONE) 00334 { 00335 usleep(10000); 00336 continue; 00337 } 00338 else if (res == NV2_IMAGE_PATCH_VALID) 00339 { 00340 if (p.type != NV2_PIXEL_TYPE_RGB24) 00341 { 00342 LINFO("got a non-rgb24 patch; ignoring %i", p.type); 00343 continue; 00344 } 00345 00346 if (p.width * p.height == 1) 00347 { 00348 xwin.drawImage(addLabels(colorbars, p.id)); 00349 continue; 00350 } 00351 00352 Image<PixRGB<byte> > bimage(p.width, p.height, NO_INIT); 00353 memcpy(bimage.getArrayPtr(), p.data, p.width*p.height*3); 00354 Image<PixRGB<byte> > inputImage = bimage; //works, default 00355 printf("inputImage w=%d, h=%d\n",inputImage.getWidth(),inputImage.getHeight()); 00356 00357 xwin.drawImage(inputImage); 00358 float score = 0.0; 00359 std::string objName = matchObject(inputImage, vdb, score); 00360 //printf("File %s, Number %d\n",__FILE__,__LINE__); 00361 //printf("objName=%s\n",objName.c_str()); 00362 if (objName == "nomatch") 00363 { 00364 recent_labels.resize(0); 00365 // train = true; //Every image that doesn't match is training 00366 if (train) 00367 { 00368 printf("Enter a label for this object:\n"); 00369 std::getline(std::cin, objName); 00370 printf("You typed '%s'\n", objName.c_str()); 00371 00372 if (objName == "exit") 00373 break; 00374 else if (objName != "") 00375 { 00376 #ifdef GPUSIFT 00377 rutz::shared_ptr<CUDAVisualObject> 00378 vo(new CUDAVisualObject(objName.c_str(), "NULL", inputImage, 00379 Point2D<int>(-1,-1), 00380 std::vector<float>(), 00381 std::vector< rutz::shared_ptr<Keypoint> >(), 00382 false,true)); 00383 #else 00384 rutz::shared_ptr<VisualObject> 00385 vo(new VisualObject(objName.c_str(), "NULL", inputImage, 00386 Point2D<int>(-1,-1), 00387 std::vector<float>(), 00388 std::vector< rutz::shared_ptr<Keypoint> >(), 00389 false,true)); 00390 #endif 00391 vdb.addObject(vo); 00392 vdb.saveTo(vdbFile); 00393 } 00394 } 00395 } 00396 else 00397 { 00398 recent_labels.push_back(objName); 00399 while (recent_labels.size() > max_label_history) 00400 recent_labels.pop_front(); 00401 00402 const std::string bestObjName = 00403 getBestLabel(recent_labels, 1); 00404 00405 if (bestObjName.size() > 0) 00406 { 00407 struct nv2_patch_label l; 00408 l.protocol_version = NV2_LABEL_PROTOCOL_VERSION; 00409 l.patch_id = p.id; 00410 //printf("File %s, Number %d\n",__FILE__,__LINE__); 00411 //printf("objName=%s, score=%f\n",objName.c_str(),score); 00412 l.confidence = (int)(score*10000.0F); 00413 snprintf(l.source, sizeof(l.source), "%s", 00414 "ObjRec"); 00415 snprintf(l.name, sizeof(l.name), "%s", 00416 objName.c_str()); 00417 snprintf(l.extra_info, sizeof(l.extra_info), 00418 "%ux%u #%u", 00419 (unsigned int) p.width, 00420 (unsigned int) p.height, 00421 (unsigned int) p.id); 00422 00423 nv2_label_server_send_label(labelServer, &l); 00424 00425 LINFO("sent label '%s (%s)'\n", l.name, l.extra_info); 00426 } 00427 } 00428 00429 nv2_image_patch_destroy(&p); 00430 } 00431 00432 } 00433 00434 if (terminate) 00435 LINFO("Ending application because a signal was caught"); 00436 00437 // This seems to lock up the server at shutdown 00438 //nv2_label_server_destroy(labelServer); 00439 } 00440 00441 // ###################################################################### 00442 /* So things look consistent in everyone's emacs... */ 00443 /* Local Variables: */ 00444 /* indent-tabs-mode: nil */ 00445 /* End: */ 00446 00447 #endif