00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "Component/ModelManager.H"
00039 #include "Learn/GentleBoost.H"
00040 #include "rutz/rand.h"
00041 #include "rutz/trace.h"
00042 #include "Util/SortUtil.H"
00043 #include "Util/Assert.H"
00044 #include <math.h>
00045 #include <fcntl.h>
00046 #include <limits>
00047 #include <string>
00048 #include <stdio.h>
00049
00050
00051 void makeData(const int numCategories, const uint sampleDim, std::vector<std::vector<float> >& data, std::vector<int>& labels, bool printData);
00052
00053 int main(const int argc, const char **argv)
00054 {
00055
00056 MYLOGVERB = LOG_INFO;
00057 ModelManager manager("Test Decision Tree");
00058
00059
00060
00061 uint nDim=4;
00062 int numCategories=3;
00063 int maxIters=1;
00064 int maxTreeSize = 4;
00065 GentleBoost gb(maxTreeSize);
00066 std::string saveDataFile("tmp.dat");
00067 std::string compareDataFile("tmp.cmp.dat");
00068
00069 if (manager.parseCommandLine(
00070 (const int)argc, (const char**)argv, "", 0, 0) == false)
00071 return 0;
00072
00073 manager.start();
00074 std::vector<std::vector<float> > traindata(nDim);
00075 std::vector<int> trainlabels;
00076 std::vector<float> dimMeanIn(nDim), dimMeanOut(nDim), dimVarIn(nDim,1.0F), dimVarOut(nDim,1.0F);
00077 for(uint i=0;i<nDim;i++)
00078 {
00079 dimMeanIn[i] = nDim-i;
00080 dimMeanOut[i] = -(nDim-i);
00081 }
00082 makeData(numCategories,1000,traindata,trainlabels,false);
00083
00084 gb.train(traindata,trainlabels,maxIters);
00085 gb.save(saveDataFile);
00086
00087 GentleBoost tmpGB;
00088 tmpGB.load(saveDataFile);
00089 tmpGB.save(compareDataFile);
00090
00091 std::map<int,std::vector<float> > trainPDF = gb.predictPDF(traindata);
00092 std::vector<int> trainResults = gb.getMostLikelyClass(trainPDF);
00093
00094 int numCorrect=0;
00095 for(uint i=0;i<trainlabels.size();i++)
00096 {
00097 if(trainResults[i]==trainlabels[i]) numCorrect++;
00098
00099 }
00100 printf("Training Accuracy:[Correct/Total]=[%d/%Zu]:%f\n",numCorrect,trainlabels.size(),numCorrect/float(trainlabels.size()));
00101 gb.printAllTrees();
00102 std::vector<std::vector<float> > testdata(nDim);
00103 std::vector<int> testlabels;
00104
00105 makeData(numCategories,10,testdata,testlabels,true);
00106
00107 std::map<int,std::vector<float> > testPDF = gb.predictPDF(testdata);
00108 std::vector<int> testResults = gb.getMostLikelyClass(testPDF);
00109 numCorrect=0;
00110 for(uint i=0;i<testlabels.size();i++)
00111 {
00112 if(testResults[i]==testlabels[i]) numCorrect++;
00113 std::map<int,std::vector<float> >::iterator litr;
00114 printf("Guess %d [",testResults[i]);
00115 for(litr=testPDF.begin();litr!=testPDF.end();litr++)
00116 {
00117 printf("(%d)%f, ",litr->first,litr->second[i]);
00118 }
00119 printf("] *** Ground Truth %d\n",testlabels[i]);
00120 }
00121 printf("Accuracy:[Correct/Total]=[%d/%Zu]:%f\n",numCorrect,testlabels.size(),numCorrect/float(testlabels.size()));
00122 manager.stop();
00123
00124 }
00125
00126 void makeData(const int numCategories, const uint sampleDim, std::vector<std::vector<float> >& data, std::vector<int>& labels, bool printData)
00127 {
00128
00129 rutz::urand rgen(time((time_t*)0)+getpid());
00130 ASSERT(data.size()>0);
00131
00132 const uint dataDim=(uint) data.size();
00133
00134 for(uint i=0;i<sampleDim;i++)
00135 {
00136 int l=rgen.idraw(numCategories)+1;
00137 if(printData) printf("data[][%u]: l=%d; ",i,l);
00138 for(uint j=0;j<dataDim;j++)
00139 {
00140 data[j].push_back(rgen.fdraw_range(l-0.75,l+0.75));
00141 if(printData) printf("%f, ",data[j][i]);
00142 }
00143 if(printData) printf("\n");
00144 labels.push_back(l);
00145 }
00146 }
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156