BPnnet.C

Go to the documentation of this file.
00001 /*!@file BPnnet/BPnnet.C Back Prop Neural Net class */
00002 
00003 // //////////////////////////////////////////////////////////////////// //
00004 // The iLab Neuromorphic Vision C++ Toolkit - Copyright (C) 2001 by the //
00005 // University of Southern California (USC) and the iLab at USC.         //
00006 // See http://iLab.usc.edu for information about this project.          //
00007 // //////////////////////////////////////////////////////////////////// //
00008 // Major portions of the iLab Neuromorphic Vision Toolkit are protected //
00009 // under the U.S. patent ``Computation of Intrinsic Perceptual Saliency //
00010 // in Visual Environments, and Applications'' by Christof Koch and      //
00011 // Laurent Itti, California Institute of Technology, 2001 (patent       //
00012 // pending; application number 09/912,225 filed July 23, 2001; see      //
00013 // http://pair.uspto.gov/cgi-bin/final/home.pl for current status).     //
00014 // //////////////////////////////////////////////////////////////////// //
00015 // This file is part of the iLab Neuromorphic Vision C++ Toolkit.       //
00016 //                                                                      //
00017 // The iLab Neuromorphic Vision C++ Toolkit is free software; you can   //
00018 // redistribute it and/or modify it under the terms of the GNU General  //
00019 // Public License as published by the Free Software Foundation; either  //
00020 // version 2 of the License, or (at your option) any later version.     //
00021 //                                                                      //
00022 // The iLab Neuromorphic Vision C++ Toolkit is distributed in the hope  //
00023 // that it will be useful, but WITHOUT ANY WARRANTY; without even the   //
00024 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR      //
00025 // PURPOSE.  See the GNU General Public License for more details.       //
00026 //                                                                      //
00027 // You should have received a copy of the GNU General Public License    //
00028 // along with the iLab Neuromorphic Vision C++ Toolkit; if not, write   //
00029 // to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,   //
00030 // Boston, MA 02111-1307 USA.                                           //
00031 // //////////////////////////////////////////////////////////////////// //
00032 //
00033 // Primary maintainer for this file: Philip Williams <plw@usc.edu>
00034 // $HeadURL: svn://isvn.usc.edu/software/invt/trunk/saliency/src/BPnnet/BPnnet.C $
00035 // $Id: BPnnet.C 6095 2006-01-17 01:15:21Z rjpeters $
00036 //
00037 
00038 #include "BPnnet/BPnnet.H"
00039 
00040 #include "Image/MathOps.H"
00041 #include "Util/Assert.H"
00042 #include "Util/MathFunctions.H"
00043 #include "Util/log.H"
00044 
00045 #include <cstdlib>
00046 #include <fstream>
00047 #include <limits>
00048 
00049 // ######################################################################
00050 BPnnet::BPnnet(const int numInput, const int numHidden,
00051                const KnowledgeBase *kb )
00052 {
00053   numInputUnits = numInput;
00054 
00055   // changing the number of hidden units can affect (improve/worsen)
00056   //   the net's performance
00057   numHiddenUnits = numHidden;
00058 
00059   // numOuputUnits = number visual objects in the knowledge base
00060   itsKb = kb;
00061   numOutputUnits = itsKb->getSize();
00062 
00063   //LINFO("input: %d units; hidden: %d units; output: %d units",
00064   //numInputUnits, numHiddenUnits, numOutputUnits);
00065 
00066   // initialize weights:
00067   weightFromInput.resize(numInputUnits, numHiddenUnits, true);
00068   weightToOutput.resize(numHiddenUnits, numOutputUnits, true);
00069 
00070   // initialize input & output vectors:
00071   inputLayer.resize(numInputUnits);
00072   hiddenLayer.resize(numHiddenUnits);
00073   outputLayer.resize(numOutputUnits);
00074 }
00075 
00076 // ######################################################################
00077 BPnnet::~BPnnet()
00078 {
00079   weightFromInput.freeMem();
00080   weightToOutput.freeMem();
00081 }
00082 
00083 // ######################################################################
00084 void BPnnet::randomizeWeights(void)
00085 {
00086   for (int y = 0; y < numHiddenUnits; y++)
00087     for (int x = 0; x < numInputUnits; x++)
00088       weightFromInput.setVal(x, y, (randomDouble() - 0.5) * 0.0001);
00089 
00090   for (int y = 0; y < numOutputUnits; y++)
00091     for (int x = 0; x < numHiddenUnits; x++)
00092       weightToOutput.setVal(x, y, (randomDouble() - 0.5) * 0.0001);
00093 
00094   normalizeWeights();
00095 }
00096 
00097 // ######################################################################
00098 void BPnnet::normalizeWeights(void)
00099 {
00100   double mi1, ma1, mi2, ma2;
00101   getMinMax(weightFromInput, mi1, ma1);
00102   getMinMax(weightToOutput, mi2, ma2);
00103   //LINFO("Weights: in=[%f .. %f] out=[%f .. %f]", mi1, ma1, mi2, ma2);
00104   inplaceClamp(weightFromInput, -10000.0, 10000.0);
00105   inplaceClamp(weightToOutput, -10000.0, 10000.0);
00106   //  weightFromInput /= sum(abs(weightFromInput));
00107   //  weightToOutput /= sum(abs(weightToOutput));
00108 }
00109 
00110 // ######################################################################
00111 double BPnnet::train(const Image<float> &in, const SimpleVisualObject& target,
00112                      const double learnRate)
00113 {
00114   // Check that target exists in knowledge base itsKb
00115   // and determine its id, unless target name is "unknown"
00116   //   ie which output neuron should associate with it
00117   //   (1-to-1 mapping between the VO's array index in the KB
00118   //     and the neuron's array index in the outputLayer)
00119   int targetNeuron = itsKb->findSimpleVisualObjectIndex(target.getName());
00120   if (targetNeuron == -1 && strcmp(target.getName(), "unknown"))
00121     LFATAL("Cannot train on unknown object '%s'", target.getName());
00122 
00123   // set expected output levels
00124   double expectedOutput[numOutputUnits];
00125   for (int i = 0; i < numOutputUnits; i++) expectedOutput[i] = 0.0;
00126   if (targetNeuron >= 0) expectedOutput[targetNeuron] = 1.0;
00127 
00128   // do a forward pass:
00129   forwardProp(in);
00130 
00131   double rms1 = 0.0; // intermediate value for RMS error calculation
00132   // Calculate error for output
00133   for (int i = 0; i < numOutputUnits; i++)
00134     {
00135       double rms0 = outputLayer[i].calcOutputDelta(expectedOutput[i]);
00136       rms1 += rms0 * rms0;
00137     }
00138   rms1 /= numOutputUnits;
00139 
00140   // Back prop errors to hidden layer
00141   for (int j = 0; j < numHiddenUnits; j++)
00142     {
00143       // Calculate weighted sum of the delta values of all units that
00144       //   receive ouput from hidden unit j
00145       // [weightedDeltaSum = sum_k(delta_k * w_kj)]
00146       double weightedDeltaSum = 0.0;
00147       for (int k = 0; k < numOutputUnits; k++)
00148         {
00149           double delta_k = outputLayer[k].getDelta();
00150           double w_kj = weightToOutput.getVal(j, k);
00151           weightedDeltaSum += delta_k * w_kj;
00152         }
00153 
00154       // Calculate hidden layer delta
00155       hiddenLayer[j].calcHiddenDelta(weightedDeltaSum);
00156     }
00157 
00158   // Adjust weights going to output layer
00159   for (int j = 0; j < numOutputUnits; j++)
00160     for (int i = 0; i < numHiddenUnits; i++)
00161       {
00162         // New weight = learning rate * output neuron's error value *
00163         //   hidden neuron's activation level.
00164         // [delta_w_ji = eta * delta_j * a_i]
00165         double delta_j = outputLayer[j].getDelta(); // error level
00166         double a_i = hiddenLayer[i].getActivationLevel();
00167         double weightChange = learnRate * delta_j * a_i;
00168 
00169         weightToOutput.setVal(i, j,
00170                               weightToOutput.getVal(i, j) + weightChange);
00171 
00172       }
00173 
00174   // Adjust weights coming from input layer
00175   for (int j = 0; j < numHiddenUnits; j++)
00176     for (int i = 0; i < numInputUnits; i++)
00177       {
00178         // New weight = learning rate * hidden neuron's error value *
00179         //   input neuron's activation level.
00180         // [delta_w_ji = eta * delta_j * a_i]
00181         double delta_j = hiddenLayer[j].getDelta(); // error level
00182         double a_i = inputLayer[i].getActivationLevel();
00183         double weightChange = learnRate * delta_j * a_i;
00184 
00185         weightFromInput.setVal(i, j,
00186                                weightFromInput.getVal(i, j) + weightChange);
00187       }
00188 
00189   return rms1;
00190   // whoever called train should know how to use rms1 to calculate RMS
00191   //   error value for the net's performance during this training cycle
00192 
00193 }
00194 
00195 // ######################################################################
00196 bool BPnnet::recognize(const Image<float> &in, SimpleVisualObject& vo)
00197 {
00198   // do a forward propagation:
00199   forwardProp(in);
00200 
00201   // Determine which output neuron (n) has maximum activationLevel
00202   double maxOutput = - std::numeric_limits<double>::max();
00203   double meanOutput = 0.0, maxOutput2 = maxOutput;
00204   int maxNeuron = -1, maxNeuron2 = -1;
00205   for (int n = 0; n < numOutputUnits; n++)
00206     {
00207       double thisOutput = outputLayer[n].getActivationLevel();
00208       meanOutput += thisOutput;
00209       if (thisOutput > maxOutput)
00210         { maxOutput = thisOutput; maxNeuron = n; }
00211     }
00212   for (int n = 0; n < numOutputUnits; n++)
00213     {
00214       double thisOutput = outputLayer[n].getActivationLevel();
00215       if (n != maxNeuron && thisOutput > maxOutput2)
00216         { maxOutput2 = thisOutput; maxNeuron2 = n; }
00217     }
00218   meanOutput /= (double)numOutputUnits;
00219   LINFO("max for '%s' (%.3f), max2 for '%s' (%.3f), mean=%.3f",
00220         itsKb->getSimpleVisualObject(maxNeuron).getName(), maxOutput,
00221         itsKb->getSimpleVisualObject(maxNeuron2).getName(), maxOutput2, meanOutput);
00222 
00223   if (maxOutput > 0.25 &&              // require non-negligible activation
00224       maxOutput > 1.75 * meanOutput && // require larger than mean
00225       maxOutput > 1.25 * maxOutput2)   // require larger than second best
00226     { vo = itsKb->getSimpleVisualObject(maxNeuron); return true; }
00227 
00228   return false;
00229 }
00230 
00231 // ######################################################################
00232 bool BPnnet::save(const char* filename) const
00233 {
00234   char fname[256]; strcpy(fname, filename); strcat(fname, "_w1.raw");
00235   std::ofstream s(fname, std::ofstream::binary);
00236   if (s.is_open() == false) { LERROR("Cannot write %s", fname); return false; }
00237   s.write((char *)(weightFromInput.getArrayPtr()),
00238           weightFromInput.getSize() * sizeof(double));
00239   s.close();
00240 
00241   strcpy(fname, filename); strcat(fname, "_w2.raw");
00242   s.open(fname, std::ofstream::binary);
00243   if (s.is_open() == false) { LERROR("Cannot write %s", fname); return false; }
00244   s.write((char *)(weightToOutput.getArrayPtr()),
00245           weightToOutput.getSize() * sizeof(double));
00246   s.close();
00247 
00248   return true;
00249 }
00250 
00251 // ######################################################################
00252 bool BPnnet::load(const char* filename)
00253 {
00254   char fname[256]; strcpy(fname, filename); strcat(fname, "_w1.raw");
00255   std::ifstream s(fname, std::ifstream::binary);
00256   if (s.is_open() == false) { LERROR("Cannot read %s", fname); return false; }
00257   s.read((char *)(weightFromInput.getArrayPtr()),
00258          weightFromInput.getSize() * sizeof(double));
00259   s.close();
00260 
00261   strcpy(fname, filename); strcat(fname, "_w2.raw");
00262   s.open(fname, std::ifstream::binary);
00263   if (s.is_open() == false) { LERROR("Cannot read %s", fname); return false; }
00264   s.read((char *)(weightToOutput.getArrayPtr()),
00265          weightToOutput.getSize() * sizeof(double));
00266   s.close();
00267 
00268   return true;
00269 }
00270 
00271 // ######################################################################
00272 void BPnnet::forwardProp( const Image<float> &in)
00273 {
00274   ASSERT(in.getSize() == numInputUnits);
00275 
00276   // Assign inputs
00277   for (int i = 0; i < numInputUnits; i ++)
00278     inputLayer[i].assignInput(in.getVal(i));
00279 
00280   // Forward prop to hidden layer
00281   double weightedInputSum_h = 0.0;
00282   for (int n = 0; n < numHiddenUnits; n ++)
00283     {
00284       // calculate weighted input sum
00285       for (int m = 0; m < numInputUnits; m ++)
00286         {
00287           double iOutput = inputLayer[m].getActivationLevel();
00288           double weightedInput = iOutput * weightFromInput.getVal(m, n);
00289           weightedInputSum_h += weightedInput;
00290         }
00291       hiddenLayer[n].assignInput(weightedInputSum_h);
00292     }
00293 
00294   // Forward prop to output layer
00295   double weightedInputSum_o = 0.0;
00296   for (int n = 0; n < numOutputUnits; n ++)
00297     {
00298       // calculate weighted input sum
00299       for (int m = 0; m < numHiddenUnits; m ++)
00300         {
00301           double hOutput = hiddenLayer[m].getActivationLevel();
00302           double weightedInput = hOutput * weightToOutput.getVal(m, n);
00303           weightedInputSum_o += weightedInput;
00304         }
00305       outputLayer[n].assignInput(weightedInputSum_o);
00306     }
00307 }
00308 
00309 // ######################################################################
00310 /* So things look consistent in everyone's emacs... */
00311 /* Local Variables: */
00312 /* indent-tabs-mode: nil */
00313 /* End: */