00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "BPnnet/BPnnet.H"
00039
00040 #include "Image/MathOps.H"
00041 #include "Util/Assert.H"
00042 #include "Util/MathFunctions.H"
00043 #include "Util/log.H"
00044
00045 #include <cstdlib>
00046 #include <fstream>
00047 #include <limits>
00048
00049
00050 BPnnet::BPnnet(const int numInput, const int numHidden,
00051 const KnowledgeBase *kb )
00052 {
00053 numInputUnits = numInput;
00054
00055
00056
00057 numHiddenUnits = numHidden;
00058
00059
00060 itsKb = kb;
00061 numOutputUnits = itsKb->getSize();
00062
00063
00064
00065
00066
00067 weightFromInput.resize(numInputUnits, numHiddenUnits, true);
00068 weightToOutput.resize(numHiddenUnits, numOutputUnits, true);
00069
00070
00071 inputLayer.resize(numInputUnits);
00072 hiddenLayer.resize(numHiddenUnits);
00073 outputLayer.resize(numOutputUnits);
00074 }
00075
00076
00077 BPnnet::~BPnnet()
00078 {
00079 weightFromInput.freeMem();
00080 weightToOutput.freeMem();
00081 }
00082
00083
00084 void BPnnet::randomizeWeights(void)
00085 {
00086 for (int y = 0; y < numHiddenUnits; y++)
00087 for (int x = 0; x < numInputUnits; x++)
00088 weightFromInput.setVal(x, y, (randomDouble() - 0.5) * 0.0001);
00089
00090 for (int y = 0; y < numOutputUnits; y++)
00091 for (int x = 0; x < numHiddenUnits; x++)
00092 weightToOutput.setVal(x, y, (randomDouble() - 0.5) * 0.0001);
00093
00094 normalizeWeights();
00095 }
00096
00097
00098 void BPnnet::normalizeWeights(void)
00099 {
00100 double mi1, ma1, mi2, ma2;
00101 getMinMax(weightFromInput, mi1, ma1);
00102 getMinMax(weightToOutput, mi2, ma2);
00103
00104 inplaceClamp(weightFromInput, -10000.0, 10000.0);
00105 inplaceClamp(weightToOutput, -10000.0, 10000.0);
00106
00107
00108 }
00109
00110
00111 double BPnnet::train(const Image<float> &in, const SimpleVisualObject& target,
00112 const double learnRate)
00113 {
00114
00115
00116
00117
00118
00119 int targetNeuron = itsKb->findSimpleVisualObjectIndex(target.getName());
00120 if (targetNeuron == -1 && strcmp(target.getName(), "unknown"))
00121 LFATAL("Cannot train on unknown object '%s'", target.getName());
00122
00123
00124 double expectedOutput[numOutputUnits];
00125 for (int i = 0; i < numOutputUnits; i++) expectedOutput[i] = 0.0;
00126 if (targetNeuron >= 0) expectedOutput[targetNeuron] = 1.0;
00127
00128
00129 forwardProp(in);
00130
00131 double rms1 = 0.0;
00132
00133 for (int i = 0; i < numOutputUnits; i++)
00134 {
00135 double rms0 = outputLayer[i].calcOutputDelta(expectedOutput[i]);
00136 rms1 += rms0 * rms0;
00137 }
00138 rms1 /= numOutputUnits;
00139
00140
00141 for (int j = 0; j < numHiddenUnits; j++)
00142 {
00143
00144
00145
00146 double weightedDeltaSum = 0.0;
00147 for (int k = 0; k < numOutputUnits; k++)
00148 {
00149 double delta_k = outputLayer[k].getDelta();
00150 double w_kj = weightToOutput.getVal(j, k);
00151 weightedDeltaSum += delta_k * w_kj;
00152 }
00153
00154
00155 hiddenLayer[j].calcHiddenDelta(weightedDeltaSum);
00156 }
00157
00158
00159 for (int j = 0; j < numOutputUnits; j++)
00160 for (int i = 0; i < numHiddenUnits; i++)
00161 {
00162
00163
00164
00165 double delta_j = outputLayer[j].getDelta();
00166 double a_i = hiddenLayer[i].getActivationLevel();
00167 double weightChange = learnRate * delta_j * a_i;
00168
00169 weightToOutput.setVal(i, j,
00170 weightToOutput.getVal(i, j) + weightChange);
00171
00172 }
00173
00174
00175 for (int j = 0; j < numHiddenUnits; j++)
00176 for (int i = 0; i < numInputUnits; i++)
00177 {
00178
00179
00180
00181 double delta_j = hiddenLayer[j].getDelta();
00182 double a_i = inputLayer[i].getActivationLevel();
00183 double weightChange = learnRate * delta_j * a_i;
00184
00185 weightFromInput.setVal(i, j,
00186 weightFromInput.getVal(i, j) + weightChange);
00187 }
00188
00189 return rms1;
00190
00191
00192
00193 }
00194
00195
00196 bool BPnnet::recognize(const Image<float> &in, SimpleVisualObject& vo)
00197 {
00198
00199 forwardProp(in);
00200
00201
00202 double maxOutput = - std::numeric_limits<double>::max();
00203 double meanOutput = 0.0, maxOutput2 = maxOutput;
00204 int maxNeuron = -1, maxNeuron2 = -1;
00205 for (int n = 0; n < numOutputUnits; n++)
00206 {
00207 double thisOutput = outputLayer[n].getActivationLevel();
00208 meanOutput += thisOutput;
00209 if (thisOutput > maxOutput)
00210 { maxOutput = thisOutput; maxNeuron = n; }
00211 }
00212 for (int n = 0; n < numOutputUnits; n++)
00213 {
00214 double thisOutput = outputLayer[n].getActivationLevel();
00215 if (n != maxNeuron && thisOutput > maxOutput2)
00216 { maxOutput2 = thisOutput; maxNeuron2 = n; }
00217 }
00218 meanOutput /= (double)numOutputUnits;
00219 LINFO("max for '%s' (%.3f), max2 for '%s' (%.3f), mean=%.3f",
00220 itsKb->getSimpleVisualObject(maxNeuron).getName(), maxOutput,
00221 itsKb->getSimpleVisualObject(maxNeuron2).getName(), maxOutput2, meanOutput);
00222
00223 if (maxOutput > 0.25 &&
00224 maxOutput > 1.75 * meanOutput &&
00225 maxOutput > 1.25 * maxOutput2)
00226 { vo = itsKb->getSimpleVisualObject(maxNeuron); return true; }
00227
00228 return false;
00229 }
00230
00231
00232 bool BPnnet::save(const char* filename) const
00233 {
00234 char fname[256]; strcpy(fname, filename); strcat(fname, "_w1.raw");
00235 std::ofstream s(fname, std::ofstream::binary);
00236 if (s.is_open() == false) { LERROR("Cannot write %s", fname); return false; }
00237 s.write((char *)(weightFromInput.getArrayPtr()),
00238 weightFromInput.getSize() * sizeof(double));
00239 s.close();
00240
00241 strcpy(fname, filename); strcat(fname, "_w2.raw");
00242 s.open(fname, std::ofstream::binary);
00243 if (s.is_open() == false) { LERROR("Cannot write %s", fname); return false; }
00244 s.write((char *)(weightToOutput.getArrayPtr()),
00245 weightToOutput.getSize() * sizeof(double));
00246 s.close();
00247
00248 return true;
00249 }
00250
00251
00252 bool BPnnet::load(const char* filename)
00253 {
00254 char fname[256]; strcpy(fname, filename); strcat(fname, "_w1.raw");
00255 std::ifstream s(fname, std::ifstream::binary);
00256 if (s.is_open() == false) { LERROR("Cannot read %s", fname); return false; }
00257 s.read((char *)(weightFromInput.getArrayPtr()),
00258 weightFromInput.getSize() * sizeof(double));
00259 s.close();
00260
00261 strcpy(fname, filename); strcat(fname, "_w2.raw");
00262 s.open(fname, std::ifstream::binary);
00263 if (s.is_open() == false) { LERROR("Cannot read %s", fname); return false; }
00264 s.read((char *)(weightToOutput.getArrayPtr()),
00265 weightToOutput.getSize() * sizeof(double));
00266 s.close();
00267
00268 return true;
00269 }
00270
00271
00272 void BPnnet::forwardProp( const Image<float> &in)
00273 {
00274 ASSERT(in.getSize() == numInputUnits);
00275
00276
00277 for (int i = 0; i < numInputUnits; i ++)
00278 inputLayer[i].assignInput(in.getVal(i));
00279
00280
00281 double weightedInputSum_h = 0.0;
00282 for (int n = 0; n < numHiddenUnits; n ++)
00283 {
00284
00285 for (int m = 0; m < numInputUnits; m ++)
00286 {
00287 double iOutput = inputLayer[m].getActivationLevel();
00288 double weightedInput = iOutput * weightFromInput.getVal(m, n);
00289 weightedInputSum_h += weightedInput;
00290 }
00291 hiddenLayer[n].assignInput(weightedInputSum_h);
00292 }
00293
00294
00295 double weightedInputSum_o = 0.0;
00296 for (int n = 0; n < numOutputUnits; n ++)
00297 {
00298
00299 for (int m = 0; m < numHiddenUnits; m ++)
00300 {
00301 double hOutput = hiddenLayer[m].getActivationLevel();
00302 double weightedInput = hOutput * weightToOutput.getVal(m, n);
00303 weightedInputSum_o += weightedInput;
00304 }
00305 outputLayer[n].assignInput(weightedInputSum_o);
00306 }
00307 }
00308
00309
00310
00311
00312
00313