hmaxfl-server.C

Go to the documentation of this file.
00001 /*!@file HMAX/hmaxfl-server.C Hmax Label Server class */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Dan Parks <danielfp@usc.edu>
00034 // $HeadURL$
00035 // $Id$
00036 //
00037 
00038 #include "Component/ModelManager.H"
00039 #include "Learn/Bayes.H"
00040 #include "GUI/DebugWin.H"
00041 #include "NeovisionII/nv2_common.h"
00042 #include "NeovisionII/nv2_label_server.h"
00043 #include "HMAX/HmaxFL.H"
00044 #include "GUI/XWindow.H"
00045 #include "Image/Image.H"
00046 #include "Image/ImageSet.H"
00047 #include "Image/ColorOps.H"
00048 #include "Image/ShapeOps.H"
00049 #include "Image/CutPaste.H"
00050 #include "Image/FilterOps.H"
00051 #include "Image/Rectangle.H"
00052 #include "Image/MathOps.H"
00053 #include "Image/DrawOps.H"
00054 #include "Image/MatrixOps.H"
00055 #include "Image/Transforms.H"
00056 #include "Image/Convolutions.H"
00057 #include "Learn/SVMClassifier.H"
00058 #include "Media/FrameSeries.H"
00059 #include "nub/ref.h"
00060 #include "Raster/GenericFrame.H"
00061 #include "Raster/Raster.H"
00062 #include "Util/Types.H"
00063 #include "Util/log.H"
00064 
00065 #include <signal.h>
00066 
00067 #include "rutz/fstring.h"
00068 #include "rutz/time.h"
00069 #include "rutz/timeformat.h"
00070 
00071 
00072 #include <fstream>
00073 #include <map>
00074 #include <vector>
00075 #include <utility>
00076 #include <iostream>
00077 #include <iomanip>
00078 #include <string>
00079 #include <unistd.h>
00080 #include <cstdlib>
00081 
00082 
00083 // number of orientations to use in HmaxFL
00084 #define NORI 4
00085 #define NUM_PATCHES_PER_SIZE 250
00086 
00087 
00088 const bool USECOLOR = false;
00089 
00090 bool terminate = false;
00091 
00092 void terminateProc(int s)
00093 {
00094   terminate = true;
00095 }
00096 
00097 std::string getBestLabel(const std::deque<std::string>& labels,
00098                          const size_t mincount)
00099 {
00100   if (labels.size() == 0)
00101     return std::string();
00102 
00103   std::map<std::string, size_t> counts;
00104 
00105   size_t bestcount = 0;
00106   size_t bestpos = 0;
00107 
00108   for (size_t i = 0; i < labels.size(); ++i)
00109     {
00110       const size_t c = ++(counts[labels[i]]);
00111 
00112       if (c >= bestcount)
00113         {
00114           bestcount = c;
00115           bestpos = i;
00116         }
00117     }
00118 
00119   if (bestcount >= mincount)
00120     return labels[bestpos];
00121 
00122   return std::string();
00123 }
00124 
00125 namespace
00126 {
00127   void fillRegion(Image<PixRGB<byte> >& img, PixRGB<byte> col,
00128                   const int x0, const int x1,
00129                   const int y0, const int y1)
00130   {
00131     for (int x = x0; x < x1; ++x)
00132       for (int y = y0; y < y1; ++y)
00133         img.setVal(x, y, col);
00134   }
00135 
00136   Image<PixRGB<byte> > makeColorbars(const int w, const int h)
00137   {
00138     Image<PixRGB<byte> > result = Image<PixRGB<byte> >(w, h, ZEROS);
00139 
00140     const PixRGB<byte> cols[] =
00141       {
00142         PixRGB<byte>(255, 255, 255), // white
00143         PixRGB<byte>(255, 255, 0),   // yellow
00144         PixRGB<byte>(0,   255, 255), // cyan
00145         PixRGB<byte>(0,   255, 0),   // green
00146         PixRGB<byte>(255, 0,   255), // magenta
00147         PixRGB<byte>(255, 0,   0),   // red
00148         PixRGB<byte>(0,   0,   255)  // blue
00149       };
00150 
00151     int x1 = 0;
00152     for (int i = 0; i < 7; ++i)
00153       {
00154         const int x0 = x1+1;
00155         x1 = int(double(w)*(i+1)/7.0 + 0.5);
00156         fillRegion(result, cols[i],
00157                    x0, x1,
00158                    0, int(h*2.0/3.0));
00159       }
00160 
00161     x1 = 0;
00162     for (int i = 0; i < 16; ++i)
00163       {
00164         const int x0 = x1;
00165         x1 = int(double(w)*(i+1)/16.0 + 0.5);
00166         const int gray = int(255.0*i/15.0 + 0.5);
00167         fillRegion(result, PixRGB<byte>(gray, gray, gray),
00168                    x0, x1,
00169                    int(h*2.0/3.0)+1, int(h*5.0/6.0));
00170       }
00171 
00172     fillRegion(result, PixRGB<byte>(255, 0, 0),
00173                0, w,
00174                int(h*5.0/6.0)+1, h);
00175 
00176     writeText(result, Point2D<int>(1, int(h*5.0/6.0)+2),
00177               "iLab Neuromorphic Vision",
00178               PixRGB<byte>(0, 0, 0), PixRGB<byte>(255, 0, 0),
00179               SimpleFont::FIXED(10));
00180 
00181     return result;
00182   }
00183 
00184   Image<PixRGB<byte> > addLabels(const Image<PixRGB<byte> >& templ,
00185                                  const int fnum)
00186   {
00187     Image<PixRGB<byte> > result = templ;
00188 
00189     std::string fnumstr = sformat("%06d", fnum);
00190     writeText(result, Point2D<int>(1, 1),
00191               fnumstr.c_str(),
00192               PixRGB<byte>(0, 0, 0), PixRGB<byte>(255, 255, 255),
00193               SimpleFont::FIXED(10));
00194 
00195     rutz::time t = rutz::time::wall_clock_now();
00196 
00197     writeText(result, Point2D<int>(1, result.getHeight() - 14),
00198               rutz::format_time(t).c_str(),
00199               PixRGB<byte>(32, 32, 32), PixRGB<byte>(255, 0, 0),
00200               SimpleFont::FIXED(6));
00201 
00202     return result;
00203   }
00204 
00205   int maxKey(std::map<int, std::string> m)
00206   {
00207     std::map<int, std::string>::iterator cur,end;
00208     cur = m.begin(); end = m.end();
00209     int mKey=-1;
00210     while(cur!=end)
00211       {
00212         if(cur->first > mKey)
00213           mKey = cur->first;
00214         cur++;
00215       }
00216     return mKey;
00217   }
00218 
00219   std::map<int, std::string> loadLabels(std::string labelFile)
00220   {
00221     std::map<int, std::string> labels;
00222     FILE *fp = fopen(labelFile.c_str(),"r");
00223     int ret;
00224     if(fp==NULL) return labels;
00225     while(1)
00226       {
00227         int id; char clabel[80];
00228         ret = fscanf(fp,"%d ",&id);
00229         if(ret != 1)
00230         {
00231           fprintf(stderr,"fscanf failed with %d\n",ret);
00232           break;
00233         }
00234         ret = fscanf(fp,"%80s",clabel);
00235         if(ret != 1)
00236           {
00237             fprintf(stderr,"fscanf failed with %d\n",ret);
00238             break;
00239           }
00240         printf("loaded label %d %s\n",id,clabel);
00241         std::string label = std::string(clabel);
00242         labels.insert(std::pair<int, std::string>(id,label));
00243       }
00244     fclose(fp);
00245     return labels;
00246   }
00247 
00248   void writeLabels(std::string labelFile, std::map<int, std::string> labels)
00249   {
00250     FILE *fp = fopen(labelFile.c_str(),"w");
00251     if(fp==NULL) return;
00252     std::map<int, std::string>::iterator cur,end;
00253     cur = labels.begin(); end = labels.end();
00254     while(cur!=end)
00255       {
00256         fprintf(fp,"%d %80s\n",cur->first, (cur->second).c_str());
00257         cur++;
00258       }
00259     fclose(fp);
00260   }
00261 
00262   int findLabel(std::string label, std::map<int, std::string> labels)
00263   {
00264     std::map<int, std::string>::iterator cur,end;
00265     cur = labels.begin(); end = labels.end();
00266     while(cur!=end)
00267       {
00268         if(cur->second.compare(label)==0)
00269           return cur->first;
00270         cur++;
00271       }
00272     return -1;
00273   }
00274 
00275 
00276   int addLabel(std::string label, std::map<int, std::string> &labels)
00277   {
00278     int id = maxKey(labels)+1;
00279     labels.insert(std::pair<int, std::string>(id,label));
00280     return id;
00281   }
00282 
00283   bool idExists(int id, std::map<int, std::string> &labels)
00284   {
00285     if(labels.find(id) == labels.end())
00286       return false;
00287     else
00288       return true;
00289   }
00290 
00291   void writeFeatures(std::string trainingFileName, int id, float **features, int dim1, int dim2)
00292   {
00293     std::ofstream trainFile;
00294     trainFile.open(trainingFileName.c_str(),std::ios::app);
00295 
00296     if (trainFile.is_open())
00297       {
00298         trainFile << id << " ";
00299         for(int i=0;i<dim1;i++)
00300           {
00301             for(int j=0;j<dim2;j++)
00302               {
00303                 trainFile << std::setiosflags(std::ios::fixed) << std::setprecision(4) <<
00304                   (i*dim2+j+1) << ":" << features[i][j] << " ";
00305               }
00306           }
00307         trainFile << std::endl;
00308       }
00309 
00310     trainFile.close();
00311   }
00312 
00313 }
00314 
00315 
00316 int main(const int argc, const char **argv)
00317 {
00318 
00319   MYLOGVERB = LOG_INFO;
00320 
00321   ModelManager *mgr = new ModelManager("Hmax with Feature Learning Server");
00322 
00323 
00324   mgr->exportOptions(MC_RECURSE);
00325 
00326 
00327   if (mgr->parseCommandLine(
00328                             (const int)argc, (const char**)argv, "<labelFile> <c1patchesDir> <featuresFile> <localport> <server_ip> <serverport> <svmModelFile> <svmRangeFile> ", 6, 8) == false)
00329     return 1;
00330 
00331   std::string devArg, serverIP,serverPortStr,localPortStr;
00332   std::string c1PatchesBaseDir;
00333   std::string svmModelFileName, svmRangeFileName;
00334   std::string c2FileName;
00335   std::string labelFileName, featuresFileName;
00336   std::string trainPosName; // Directory where positive images are
00337 
00338   // Load the SVM Classifier Model and Range in
00339   SVMClassifier svm;
00340 
00341   // Whether we are in training mode
00342   bool train = false;
00343 
00344 
00345   // Now we run
00346   mgr->start();
00347 
00348   // catch signals and redirect them to terminate for clean exit:
00349   signal(SIGHUP, terminateProc); signal(SIGINT, terminateProc);
00350   signal(SIGQUIT, terminateProc); signal(SIGTERM, terminateProc);
00351   signal(SIGALRM, terminateProc);
00352 
00353 
00354   labelFileName = mgr->getExtraArg(0);
00355   c1PatchesBaseDir = mgr->getExtraArg(1);
00356   featuresFileName = mgr->getExtraArg(2);
00357   localPortStr = mgr->getExtraArg(3);
00358   serverIP = mgr->getExtraArg(4);
00359   serverPortStr = mgr->getExtraArg(5);
00360   // If we are given the SVM info, load it in
00361   if(mgr->numExtraArgs() > 6)
00362   {
00363     svmModelFileName = mgr->getExtraArg(7);
00364     svm.readModel(svmModelFileName);
00365     if(mgr->numExtraArgs() == 8)
00366       {    
00367         svmRangeFileName = mgr->getExtraArg(8);
00368         svm.readRange(svmRangeFileName);
00369       }
00370   }
00371   else
00372   {
00373     // With no SVM data, we should be in training mode
00374     train = true;
00375   }
00376   std::map<int,std::string> labels = loadLabels(labelFileName);
00377 
00378   // Get an HmaxFL object:
00379   std::vector<int> scss(9);
00380   scss[0] = 1; scss[1] = 3; scss[2] = 5; scss[3] = 7; scss[4] = 9;
00381   scss[5] = 11; scss[6] = 13; scss[7] = 15; scss[8] = 17;
00382   std::vector<int> spss(8);
00383   spss[0] = 8; spss[1] = 10; spss[2] = 12; spss[3] = 14;
00384   spss[4] = 16; spss[5] = 18; spss[6] = 20; spss[7] = 22;
00385   // std::vector<int> scss(4);
00386   // scss[0] = 3; scss[1] = 7; scss[2] = 11; scss[3] = 15;
00387   // std::vector<int> spss(4);
00388   // spss[0] = 10; spss[1] = 14; spss[2] = 18; spss[3] = 22;
00389 
00390   HmaxFL hmax(NORI, spss, scss);
00391 
00392   // Read the C1 Patches from file
00393   hmax.readInC1Patches(c1PatchesBaseDir);
00394 
00395   std::vector<int> patchSizes = hmax.getC1PatchSizes();
00396 
00397   // Allocate memory for C2 vectors based on the number of patch sizes and patches per size
00398   float **c2Res = new float*[patchSizes.size()];
00399   for(unsigned int i=0;i<patchSizes.size();i++) {
00400     c2Res[i] = new float[NUM_PATCHES_PER_SIZE];
00401   }
00402 
00403 
00404   XWinManaged xwin(Dims(256,256),
00405                    -1, -1, "ILab Robot Head Demo");
00406 
00407   int serverPort = strtol(serverPortStr.c_str(),NULL,0);
00408   int localPort = strtol(localPortStr.c_str(),NULL,0);
00409   struct nv2_label_server* labelServer =
00410     nv2_label_server_create(localPort,
00411                             serverIP.c_str(),
00412                             serverPort);
00413 
00414   nv2_label_server_set_verbosity(labelServer,1); //allow warnings
00415 
00416 
00417   const size_t max_label_history = 1;
00418   std::deque<std::string> recent_labels;
00419 
00420   Image<PixRGB<byte> > colorbars = makeColorbars(256, 256);
00421 
00422 
00423   while(!terminate)
00424     {
00425       Point2D<int> clickLoc = xwin.getLastMouseClick();
00426       if (clickLoc.isValid())
00427         train = !train;
00428 
00429       struct nv2_image_patch p;
00430       const enum nv2_image_patch_result res =
00431         nv2_label_server_get_current_patch(labelServer, &p);
00432 
00433       std::string objName;
00434       if (res == NV2_IMAGE_PATCH_END)
00435         {
00436           LINFO("ok, quitting");
00437           break;
00438         }
00439       else if (res == NV2_IMAGE_PATCH_NONE)
00440         {
00441           usleep(10000);
00442           continue;
00443         }
00444       else if (res == NV2_IMAGE_PATCH_VALID)
00445         {
00446           if (p.type != NV2_PIXEL_TYPE_RGB24)
00447             {
00448               LINFO("got a non-rgb24 patch; ignoring %i", p.type);
00449               continue;
00450             }
00451 
00452           if (p.width * p.height == 1)
00453             {
00454               xwin.drawImage(addLabels(colorbars, p.id));
00455               continue;
00456             }
00457 
00458           Image<PixRGB<byte> > img(p.width, p.height, NO_INIT);
00459           // Get the test image from the socket
00460           memcpy(img.getArrayPtr(), p.data, p.width*p.height*3);
00461 
00462           Image<PixRGB<byte> > inputImg = rescale(img, 256, 256);
00463 
00464           xwin.drawImage(inputImg);
00465 
00466           Image<float> inputf = luminanceNTSC(inputImg);
00467 
00468           // Get the C2 Layer Response
00469           hmax.getC2(inputf,c2Res);
00470           if(!train)
00471             {
00472               // Output the c2 responses into a libsvm
00473               double pred = svm.predict(c2Res,patchSizes.size(),NUM_PATCHES_PER_SIZE);
00474               printf("Prediction is %f\n",pred);
00475               int predId = (int) pred;
00476               bool knowObject = idExists(predId,labels);
00477               if(knowObject)
00478               {
00479                 printf("Known object %d\n",predId);
00480                 objName = labels[predId];
00481               }
00482               else
00483               {
00484                 printf("Unknown object %d\n",predId);
00485                 char tmp[200];
00486                 sprintf(tmp,"Unknown-%d",predId);
00487                 objName = std::string(tmp);
00488               }
00489               recent_labels.push_back(objName);
00490               while (recent_labels.size() > max_label_history)
00491                 recent_labels.pop_front();
00492 
00493               struct nv2_patch_label l;
00494               l.protocol_version = NV2_LABEL_PROTOCOL_VERSION;
00495               l.patch_id = p.id;
00496               // FIX ME -- SVMClassifier has no confidence score
00497               l.confidence = (int)(100.0F);
00498               snprintf(l.source, sizeof(l.source), "%s",
00499                        "HmaxFL");
00500               snprintf(l.name, sizeof(l.name), "%s",
00501                        objName.c_str());
00502               snprintf(l.extra_info, sizeof(l.extra_info),
00503                        "%ux%u #%u",
00504                        (unsigned int) p.width,
00505                        (unsigned int) p.height,
00506                        (unsigned int) p.id);
00507 
00508               nv2_label_server_send_label(labelServer, &l);
00509               LINFO("sent label '%s (%s)'\n", l.name, l.extra_info);
00510             }
00511           // Determine what the object is
00512           else
00513             {
00514               printf("Enter a label for this object:\n");
00515               std::getline(std::cin, objName);
00516               printf("You typed '%s'\n", objName.c_str());
00517 
00518               if (objName == "exit")
00519                 break;
00520               else if (objName != "")
00521                 {
00522                   int newId = findLabel(objName,labels);
00523                   if(newId == -1)
00524                     {
00525                       newId = addLabel(objName,labels);
00526                       printf("No existing label found, adding [%s]\n",objName.c_str());
00527                     }
00528                   else
00529                     {
00530                       printf("Found existing label\n");
00531                     }
00532                   writeFeatures(featuresFileName,newId,c2Res,patchSizes.size(),NUM_PATCHES_PER_SIZE);
00533                 }
00534             }
00535 
00536           nv2_image_patch_destroy(&p);
00537         }
00538     }
00539 
00540   writeLabels(labelFileName,labels);
00541   // Free memory
00542   for(unsigned int i=0;i<patchSizes.size();i++) {
00543     delete[] c2Res[i];
00544   }
00545   delete [] c2Res;
00546 
00547   if (terminate)
00548     LINFO("Ending application because a signal was caught");
00549 
00550   //nv2_label_server_destroy(labelServer);
00551   LINFO("Got Here");
00552 
00553   return 0;
00554 }
00555 
00556 
00557 
00558 
00559 // ######################################################################
00560 /* So things look consistent in everyone's emacs... */
00561 /* Local Variables: */
00562 /* indent-tabs-mode: nil */
00563 /* End: */
Generated on Sun May 8 08:40:41 2011 for iLab Neuromorphic Vision Toolkit by  doxygen 1.6.3