objRec-serverPCA.C

00001 /*!@file NeovisionII/objRec-ServerPCA.C */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2000-2005   //
00005 // by the University of Southern California (USC) and the iLab at USC.  //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Lior Elazary <elazary@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/NeovisionII/objRec-serverPCA.C $
00035 // $Id: objRec-serverPCA.C 13901 2010-09-09 15:12:26Z lior $
00036 //
00037 
00038 #ifndef OBJREC_SERVERPCA_C_DEFINED
00039 #define OBJREC_SERVERPCA_C_DEFINED
00040 
00041 #include "Image/OpenCVUtil.H"
00042 #include <stdlib.h>
00043 #include <stdio.h>
00044 #include <signal.h>
00045 #include "Component/ModelManager.H"
00046 #include "Image/Image.H"
00047 #include "Image/ImageSet.H"
00048 #include "Image/ShapeOps.H"
00049 #include "Image/CutPaste.H"
00050 #include "Image/DrawOps.H"
00051 #include "Image/FilterOps.H"
00052 #include "Image/ColorOps.H"
00053 #include "Image/Transforms.H"
00054 #include "Image/MathOps.H"
00055 #include "Learn/Bayes.H"
00056 #include "GUI/DebugWin.H"
00057 #include "SIFT/ScaleSpace.H"
00058 #include "SIFT/VisualObject.H"
00059 #include "SIFT/Keypoint.H"
00060 #include "SIFT/VisualObjectDB.H"
00061 #include "NeovisionII/nv2_common.h"
00062 #include "NeovisionII/nv2_label_server.h"
00063 #include "rutz/fstring.h"
00064 #include "rutz/time.h"
00065 #include "rutz/timeformat.h"
00066 #include "Media/FrameSeries.H"
00067 #include "Transport/FrameInfo.H"
00068 #include "Raster/GenericFrame.H"
00069 #include "GUI/XWinManaged.H"
00070 #include "GUI/ImageDisplayStream.H"
00071 
00072 #include <iostream> // for std::cin
00073 
00074 const bool USECOLOR = false;
00075 
00076 bool terminate = false;
00077 
00078 void terminateProc(int s)
00079 {
00080   terminate = true;
00081 }
00082 
00083 std::string getBestLabel(const std::deque<std::string>& labels,
00084                          const size_t mincount)
00085 {
00086   if (labels.size() == 0)
00087     return std::string();
00088 
00089   std::map<std::string, size_t> counts;
00090 
00091   size_t bestcount = 0;
00092   size_t bestpos = 0;
00093 
00094   for (size_t i = 0; i < labels.size(); ++i)
00095     {
00096       const size_t c = ++(counts[labels[i]]);
00097 
00098       if (c >= bestcount)
00099         {
00100           bestcount = c;
00101           bestpos = i;
00102         }
00103     }
00104 
00105   if (bestcount >= mincount)
00106     return labels[bestpos];
00107 
00108   return std::string();
00109 }
00110 
00111 namespace
00112 {
00113   void fillRegion(Image<PixRGB<byte> >& img, PixRGB<byte> col,
00114                   const int x0, const int x1,
00115                   const int y0, const int y1)
00116   {
00117     for (int x = x0; x < x1; ++x)
00118       for (int y = y0; y < y1; ++y)
00119         img.setVal(x, y, col);
00120   }
00121 
00122   Image<PixRGB<byte> > makeColorbars(const int w, const int h)
00123   {
00124     Image<PixRGB<byte> > result = Image<PixRGB<byte> >(w, h, ZEROS);
00125 
00126     const PixRGB<byte> cols[] =
00127       {
00128         PixRGB<byte>(255, 255, 255), // white
00129         PixRGB<byte>(255, 255, 0),   // yellow
00130         PixRGB<byte>(0,   255, 255), // cyan
00131         PixRGB<byte>(0,   255, 0),   // green
00132         PixRGB<byte>(255, 0,   255), // magenta
00133         PixRGB<byte>(255, 0,   0),   // red
00134         PixRGB<byte>(0,   0,   255)  // blue
00135       };
00136 
00137     int x1 = 0;
00138     for (int i = 0; i < 7; ++i)
00139       {
00140         const int x0 = x1+1;
00141         x1 = int(double(w)*(i+1)/7.0 + 0.5);
00142         fillRegion(result, cols[i],
00143                    x0, x1,
00144                    0, int(h*2.0/3.0));
00145       }
00146 
00147     x1 = 0;
00148     for (int i = 0; i < 16; ++i)
00149       {
00150         const int x0 = x1;
00151         x1 = int(double(w)*(i+1)/16.0 + 0.5);
00152         const int gray = int(255.0*i/15.0 + 0.5);
00153         fillRegion(result, PixRGB<byte>(gray, gray, gray),
00154                    x0, x1,
00155                    int(h*2.0/3.0)+1, int(h*5.0/6.0));
00156       }
00157 
00158     fillRegion(result, PixRGB<byte>(255, 0, 0),
00159                0, w,
00160                int(h*5.0/6.0)+1, h);
00161 
00162     writeText(result, Point2D<int>(1, int(h*5.0/6.0)+2),
00163               "iLab Neuromorphic Vision",
00164               PixRGB<byte>(0, 0, 0), PixRGB<byte>(255, 0, 0),
00165               SimpleFont::FIXED(10));
00166 
00167     return result;
00168   }
00169 
00170   Image<PixRGB<byte> > addLabels(const Image<PixRGB<byte> >& templ,
00171                                  const int fnum)
00172   {
00173     Image<PixRGB<byte> > result = templ;
00174 
00175     std::string fnumstr = sformat("%06d", fnum);
00176     writeText(result, Point2D<int>(1, 1),
00177               fnumstr.c_str(),
00178               PixRGB<byte>(0, 0, 0), PixRGB<byte>(255, 255, 255),
00179               SimpleFont::FIXED(10));
00180 
00181     rutz::time t = rutz::time::wall_clock_now();
00182 
00183     writeText(result, Point2D<int>(1, result.getHeight() - 14),
00184               rutz::format_time(t).c_str(),
00185               PixRGB<byte>(32, 32, 32), PixRGB<byte>(255, 0, 0),
00186               SimpleFont::FIXED(6));
00187 
00188     return result;
00189   }
00190 }
00191 
00192 void trainPCA(ImageSet<byte> images)
00193 {
00194   int imagesCollected = 10;
00195   IplImage* input[imagesCollected];
00196 
00197   //IplImage* input = img2ipl(images[0]);
00198   CvMat* pcaInputs = cvCreateMat(imagesCollected, (input[0]->width * input[0]->height), CV_8UC1);
00199   CvMat* average = cvCreateMat(1, (input[0]->width * input[0]->height), CV_32FC1);
00200   CvMat* eigenValues = cvCreateMat(1, std::min(pcaInputs->rows, pcaInputs->cols), CV_32FC1);
00201   CvMat* eigens = cvCreateMat(imagesCollected, (input[0]->width * input[0]->height), CV_32FC1);
00202   CvMat* coefficients = cvCreateMat(imagesCollected, eigens->rows, CV_32FC1);
00203 
00204   // construct required structures for later recognition
00205 
00206   CvMat* recogniseCoeffs = cvCreateMat(1, eigens->rows, CV_32FC1);
00207   CvMat* recognise = cvCreateMat(1, input[0]->width * input[0]->height, CV_8UC1);
00208 
00209   for (int i = 0; i < imagesCollected; i++){
00210     for (int j = 0; j < (input[0]->width * input[0]->height); j++){
00211       CV_MAT_ELEM(*pcaInputs, uchar, i, j) = (input[i])->imageData[(j)];
00212     }
00213   }
00214 
00215   // compute eigen image representation
00216 
00217   cvCalcPCA(pcaInputs, average, eigenValues, eigens, CV_PCA_DATA_AS_ROW);
00218 
00219   // compute eigen. co-efficients for all sample images and store
00220 
00221   cvProjectPCA(pcaInputs, average, eigens, coefficients);
00222 
00223   for (int i = 0; i < imagesCollected; i++){cvReleaseImage( &(input[i]));}
00224 
00225   // release matrix objects
00226 
00227   cvReleaseMat( &pcaInputs);
00228   cvReleaseMat( &average );
00229   cvReleaseMat( &eigenValues );
00230   cvReleaseMat( &eigens );
00231   cvReleaseMat( &coefficients );
00232   cvReleaseMat( &recogniseCoeffs );
00233   cvReleaseMat( &recognise );
00234 
00235 }
00236 
00237 std::string recogPCA(Image<byte> img)
00238 {
00239 
00240   // project image to eigen space
00241 
00242 
00243 //  for (int j = 0; j < (input[0]->width * input[0]->height); j++){
00244 //    CV_MAT_ELEM(*recognise, uchar, 0, j) = (grayImg)->imageData[(j)];
00245 //  }
00246 //
00247 //  cvProjectPCA(recognise, average, eigens, recogniseCoeffs);
00248 //
00249 //  // check which set of stored sample co-efficients it is
00250 //  // closest too and then display the corresponding image
00251 //
00252 //  double closestCoeffDistance = HUGE;
00253 //  int closestImage = 0;
00254 //
00255 //  for (int i = 0; i < imagesCollected; i++)
00256 //  {
00257 //    double diff = 0;
00258 //    for(int j = 0; j < recogniseCoeffs->cols; j++)
00259 //    {
00260 //      diff += fabs(CV_MAT_ELEM(*coefficients, float, i, j)
00261 //          - CV_MAT_ELEM(*recogniseCoeffs, float, 0, j));
00262 //    }
00263 //    if (diff < closestCoeffDistance){
00264 //      closestCoeffDistance = diff;
00265 //      closestImage = i;
00266 //
00267 //    }
00268 //  }
00269 
00270   return std::string("nomatch");
00271 }
00272 
00273 
00274 Point2D<int> getMouseClick(nub::ref<OutputFrameSeries> &ofs, const char* wname)
00275 {
00276   const nub::soft_ref<ImageDisplayStream> ids =
00277     ofs->findFrameDestType<ImageDisplayStream>();
00278 
00279   const rutz::shared_ptr<XWinManaged> uiwin =
00280     ids.is_valid()
00281     ? ids->getWindow(wname)
00282     : rutz::shared_ptr<XWinManaged>();
00283 
00284   if (uiwin.is_valid())
00285     return uiwin->getLastMouseClick();
00286   else
00287     return Point2D<int>(-1,-1);
00288 }
00289 
00290 int main(const int argc, const char **argv)
00291 {
00292 
00293   MYLOGVERB = LOG_INFO;
00294   ModelManager mgr("Test ObjRec");
00295 
00296   nub::ref<OutputFrameSeries> ofs(new OutputFrameSeries(mgr));
00297   mgr.addSubComponent(ofs);
00298 
00299   nub::ref<InputFrameSeries> ifs(new InputFrameSeries(mgr));
00300   mgr.addSubComponent(ifs);
00301 
00302 
00303 
00304 
00305   if (mgr.parseCommandLine(argc, argv, "<vdb file> <server ip>", 2, 2) == false)
00306     return 1;
00307 
00308   mgr.start();
00309 
00310   // catch signals and redirect them to terminate for clean exit:
00311   signal(SIGHUP, terminateProc); signal(SIGINT, terminateProc);
00312   signal(SIGQUIT, terminateProc); signal(SIGTERM, terminateProc);
00313   signal(SIGALRM, terminateProc);
00314 
00315   //get command line options
00316   const std::string vdbFile = mgr.getExtraArg(0);
00317   const std::string server_ip = mgr.getExtraArg(1);
00318   bool train = false;
00319 
00320   struct nv2_label_server* labelServer =
00321     nv2_label_server_create(9930,
00322                             server_ip.c_str(),
00323                             9931);
00324 
00325   nv2_label_server_set_verbosity(labelServer,1); //allow warnings
00326 
00327 
00328   const size_t max_label_history = 1;
00329   std::deque<std::string> recent_labels;
00330 
00331   Image<PixRGB<byte> > colorbars = makeColorbars(256, 256);
00332 
00333   bool getImgFromFile = true;
00334   while (!terminate)
00335   {
00336 
00337     Image<PixRGB<byte> > inputImg;
00338     struct nv2_image_patch p;
00339 
00340     if (getImgFromFile)
00341     {
00342 
00343       const FrameState is = ifs->updateNext();
00344       if (is == FRAME_COMPLETE) return 0;
00345       GenericFrame input = ifs->readFrame();
00346       inputImg = input.asRgb();
00347     } else {
00348 
00349       const enum nv2_image_patch_result res =
00350         nv2_label_server_get_current_patch(labelServer, &p);
00351 
00352       std::string objName;
00353       if (res == NV2_IMAGE_PATCH_END)
00354       {
00355         LINFO("ok, quitting");
00356         break;
00357       }
00358       else if (res == NV2_IMAGE_PATCH_NONE)
00359       {
00360         usleep(10000);
00361         continue;
00362       }
00363       else if (res == NV2_IMAGE_PATCH_VALID)
00364       {
00365         if (p.type != NV2_PIXEL_TYPE_RGB24)
00366         {
00367           LINFO("got a non-rgb24 patch; ignoring %i", p.type);
00368           continue;
00369         }
00370 
00371         if (p.width * p.height == 1)
00372         {
00373           //xwin.drawImage(addLabels(colorbars, p.id));
00374           continue;
00375         }
00376 
00377         Image<PixRGB<byte> > img(p.width, p.height, NO_INIT);
00378         memcpy(img.getArrayPtr(), p.data, p.width*p.height*3);
00379 
00380         inputImg = rescale(img, 256, 256);
00381       }
00382     }
00383 
00384     float score = 0;
00385     std::string objName = "nomatch";
00386 
00387     if (inputImg.initialized())
00388     {
00389       ofs->writeRGB(inputImg, "object", FrameInfo("object", SRC_POS));
00390       getchar();
00391       ofs->updateNext();
00392 
00393       Point2D<int> clickLoc = getMouseClick(ofs, "object");
00394       if (clickLoc.isValid())
00395         train = !train;
00396 
00397 
00398       if (objName == "nomatch")
00399       {
00400         recent_labels.resize(0);
00401 
00402         if (train)
00403         {
00404           printf("Enter a label for this object:\n");
00405           std::getline(std::cin, objName);
00406           printf("You typed '%s'\n", objName.c_str());
00407 
00408           if (objName == "exit")
00409             break;
00410           else if (objName != "")
00411           {
00412             //Train object with objName
00413           }
00414         }
00415       }
00416     }
00417 
00418     if (objName != "nomatch" && !getImgFromFile)
00419     {
00420       recent_labels.push_back(objName);
00421       while (recent_labels.size() > max_label_history)
00422         recent_labels.pop_front();
00423 
00424       const std::string bestObjName =
00425         getBestLabel(recent_labels, 1);
00426 
00427       if (bestObjName.size() > 0)
00428       {
00429         struct nv2_patch_label l;
00430         l.protocol_version = NV2_LABEL_PROTOCOL_VERSION;
00431         l.patch_id = p.id;
00432         l.confidence = (int)(score*100.0F);
00433         snprintf(l.source, sizeof(l.source), "%s",
00434             "ObjRec");
00435         snprintf(l.name, sizeof(l.name), "%s",
00436             objName.c_str());
00437         snprintf(l.extra_info, sizeof(l.extra_info),
00438             "%ux%u #%u",
00439             (unsigned int) p.width,
00440             (unsigned int) p.height,
00441             (unsigned int) p.id);
00442 
00443         nv2_label_server_send_label(labelServer, &l);
00444 
00445         LINFO("sent label '%s (%s)'\n", l.name, l.extra_info);
00446       }
00447       nv2_image_patch_destroy(&p);
00448     }
00449   }
00450 
00451   if (terminate)
00452     LINFO("Ending application because a signal was caught");
00453 
00454   nv2_label_server_destroy(labelServer);
00455 }
00456 
00457 
00458 // ######################################################################
00459 /* So things look consistent in everyone's emacs... */
00460 /* Local Variables: */
00461 /* indent-tabs-mode: nil */
00462 /* End: */
00463 
00464 #endif
Generated on Sun May 8 08:41:02 2011 for iLab Neuromorphic Vision Toolkit by  doxygen 1.6.3