00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include "Learn/HMM.H"
00039 #include "Util/Assert.H"
00040 #include "Util/log.H"
00041 #include <math.h>
00042 #include <fcntl.h>
00043 #include <limits>
00044 #include <string>
00045
00046 #include <cstdio>
00047
00048
00049 template <class T>
00050 HMM<T>::HMM(const std::vector<T>& states, const std::vector<T>& observations, const std::string& name) :
00051 itsName(name),
00052 itsStates(states),
00053 itsObservations(observations)
00054 {
00055 itsStateTransitions.resize(states.size(), states.size());
00056 itsStateTransitions.clear(0);
00057
00058 itsStateEmissions.resize(states.size(), observations.size());
00059 itsStateEmissions.clear(1.0F/(float)observations.size());
00060
00061 for(uint i=0; i<itsStates.size(); i++)
00062 itsStatesMap[itsStates[i]] = i;
00063
00064 for(uint i=0; i<itsObservations.size(); i++)
00065 itsObservationsMap[itsObservations[i]] = i;
00066
00067 itsCurrentPath.resize(states.size());
00068
00069
00070 }
00071
00072
00073 template <class T>
00074 HMM<T>::~HMM()
00075 {
00076 }
00077
00078
00079
00080 template <class T>
00081 void HMM<T>::setStateTransition(const T fromState, const T toState, double prob)
00082 {
00083 size_t fromStateIdx = itsStatesMap[fromState];
00084 size_t toStateIdx = itsStatesMap[toState];
00085
00086 itsStateTransitions.setVal(fromStateIdx, toStateIdx, prob);
00087
00088 }
00089
00090
00091 template <class T>
00092 void HMM<T>::setStateEmission(const T state, const T emission, double prob)
00093 {
00094 size_t stateIdx = itsStatesMap[state];
00095 size_t observationIdx = itsObservationsMap[emission];
00096
00097 itsStateEmissions.setVal(stateIdx, observationIdx, prob);
00098
00099 }
00100
00101
00102 template <class T>
00103 void HMM<T>::setCurrentState(const T state, double prob)
00104 {
00105 size_t stateIdx = itsStatesMap[state];
00106
00107 itsCurrentPath[stateIdx].prob = prob;
00108 itsCurrentPath[stateIdx].path.clear();
00109 itsCurrentPath[stateIdx].path.push_back(stateIdx);
00110 }
00111
00112
00113 template <class T>
00114 void HMM<T>::show()
00115 {
00116 printf("HMM: %s", itsName.c_str());
00117
00118 printf("Current State\n");
00119 for(size_t i=0; i<itsCurrentPath.size(); i++)
00120 printf("%zu\t\t", i);
00121 printf("\n");
00122
00123 for(size_t i=0; i<itsCurrentPath.size(); i++)
00124 printf("%f\t", itsCurrentPath[i].prob);
00125 printf("\n\n");
00126
00127 printf("State Transitions\n");
00128 printf("\t");
00129 for(int i=0; i<itsStateTransitions.getWidth(); i++)
00130 printf("%i\t\t", i);
00131 printf("\n");
00132
00133 for(int i=0; i<itsStateTransitions.getWidth(); i++)
00134 {
00135 printf("%i\t", i);
00136 for(int j=0; j<itsStateTransitions.getHeight(); j++)
00137 printf("%f\t", itsStateTransitions.getVal(i,j));
00138 printf("\n");
00139 }
00140 printf("\n\n");
00141
00142 printf("State Emissions\n");
00143 printf("\t");
00144 for(int i=0; i<itsStateEmissions.getWidth(); i++)
00145 printf("%i\t\t", i);
00146 printf("\n");
00147
00148 for(int j=0; j<itsStateEmissions.getHeight(); j++)
00149 {
00150 printf("%i\t", j);
00151 for(int i=0; i<itsStateEmissions.getWidth(); i++)
00152 printf("%f\t", itsStateEmissions.getVal(i,j));
00153 printf("\n");
00154 }
00155
00156
00157 }
00158
00159
00160
00161 template <class T>
00162 std::vector<T> HMM<T>::getLikelyStates(const std::vector<T> observations,
00163 double& maxPathProb)
00164 {
00165 if (observations.size() < 1)
00166 return std::vector<T>();
00167
00168 for(uint i=0; i<observations.size(); i++)
00169 iteratePath(observations[i]);
00170
00171
00172 return getMaxPath(maxPathProb);
00173 }
00174
00175
00176
00177 template <class T>
00178 std::vector<T> HMM<T>::getMaxPath(double& maxPathProb)
00179 {
00180
00181 double maxProb = 0;
00182 std::vector<size_t> maxPath;
00183 for(uint i=0; i<itsStates.size(); i++)
00184 if (itsCurrentPath[i].prob > maxProb)
00185 {
00186 maxProb = itsCurrentPath[i].prob;
00187 maxPath = itsCurrentPath[i].path;
00188 }
00189
00190 std::vector<T> maxPathType;
00191 for(uint i=0; i<maxPath.size(); i++)
00192 maxPathType.push_back(itsStates[maxPath[i]]);
00193
00194 maxPathProb = maxProb;
00195
00196 return maxPathType;
00197 }
00198
00199
00200 template <class T>
00201 void HMM<T>::iteratePath(const T observation)
00202 {
00203 std::vector<Path> U(itsStates.size());
00204
00205 for(uint nextState=0; nextState<itsStates.size(); nextState++)
00206 {
00207 Path nextPath;
00208 nextPath.prob = 0;
00209
00210
00211 double maxProb = 0;
00212 for(uint sourceState=0; sourceState<itsStates.size(); sourceState++)
00213 {
00214 size_t observationIdx = itsObservationsMap[observation];
00215
00216 double p = itsStateEmissions.getVal(sourceState,observationIdx) *
00217 itsStateTransitions.getVal(sourceState,nextState);
00218
00219 double prob = itsCurrentPath[sourceState].prob * p;
00220
00221 if (prob > maxProb)
00222 {
00223 nextPath.path = itsCurrentPath[sourceState].path;
00224 nextPath.path.push_back(nextState);
00225 nextPath.prob = prob;
00226 maxProb = prob;
00227 }
00228 }
00229 U[nextState] = nextPath;
00230 }
00231 itsCurrentPath = U;
00232
00233 }
00234
00235
00236 template <class T>
00237 double HMM<T>::forward(const std::vector<T> observations)
00238 {
00239 if (observations.size() < 1)
00240 return 0;
00241
00242 double logProb = 0;
00243
00244 double sum;
00245
00246
00247 itsSeqInfo.alpha.clear();
00248
00249
00250 itsSeqInfo.alpha.resize(observations.size());
00251 itsSeqInfo.scale.resize(observations.size());
00252
00253 for(size_t t = 0; t<observations.size(); t++)
00254 {
00255 size_t observationIdx = itsObservationsMap[observations[t] ];
00256
00257
00258 if(t == 0){
00259
00260 logProb = 0.0;
00261 itsSeqInfo.scale[0] = 0.0;
00262 itsSeqInfo.alpha[0].resize(itsStates.size());
00263
00264
00265 for(size_t state = 0; state < itsStates.size(); state++) {
00266 itsSeqInfo.alpha[0][state] = itsCurrentPath[state].prob * itsStateEmissions.getVal(state, observationIdx);
00267 itsSeqInfo.scale[0] += itsSeqInfo.alpha[0][state];
00268 }
00269 for(size_t state = 0; state < itsStates.size(); state++)
00270 itsSeqInfo.alpha[0][state] /= itsSeqInfo.scale[0];
00271
00272 }else{
00273 itsSeqInfo.scale[t] = 0.0;
00274 itsSeqInfo.alpha[t].resize(itsStates.size());
00275 for(size_t j = 0; j < itsStates.size(); j++) {
00276 sum = 0.0;
00277 for(size_t i = 0; i < itsStates.size(); i++)
00278 sum += itsSeqInfo.alpha[t-1][i] *itsStateTransitions.getVal(i,j);
00279
00280 itsSeqInfo.alpha[t][j] = sum * itsStateEmissions.getVal(j, observationIdx);
00281 itsSeqInfo.scale[t] += itsSeqInfo.alpha[t][j];
00282 }
00283 for(size_t j = 0; j < itsStates.size(); j++)
00284 itsSeqInfo.alpha[t][j] /= itsSeqInfo.scale[t];
00285 }
00286 }
00287
00288
00289
00290
00291 for(size_t t = 0; t<observations.size(); t++)
00292 logProb += log(itsSeqInfo.scale[t]);
00293
00294
00295 return logProb;
00296 }
00297
00298 template <class T>
00299 double HMM<T>::backward(const std::vector<T> observations)
00300 {
00301 if (observations.size() < 1)
00302 return 0;
00303
00304
00305
00306
00307
00308 for(size_t i = 0; i < itsStates.size(); i++)
00309 itsSeqInfo.beta[observations.size()-1][i] = 1.0/itsSeqInfo.scale[observations.size()-1];
00310
00311
00312
00313
00314 for(size_t t = observations.size() - 2; ; t--)
00315 {
00316 for(size_t i = 0; i < itsStates.size(); i++) {
00317 double sum = 0.0;
00318 for(size_t j = 0; j < itsStates.size(); j++)
00319 {
00320 size_t observationIdx = itsObservationsMap[observations[t+1] ];
00321 sum += itsStateTransitions.getVal(i,j)
00322 * itsStateEmissions.getVal(j,observationIdx)
00323 * itsSeqInfo.beta[t+1][j];
00324 }
00325 itsSeqInfo.beta[t][i] = sum/itsSeqInfo.scale[t];
00326 }
00327 if(t==0)
00328 break;
00329 }
00330
00331 return 0;
00332 }
00333
00334
00335 template <class T>
00336 void HMM<T>::train(const std::vector<T> observations, size_t numIterations)
00337 {
00338 if (observations.size() < 1)
00339 return;
00340
00341
00342 size_t loopCount = 0;
00343
00344 double numeratorA, denominatorA;
00345 double numeratorB, denominatorB;
00346
00347
00348 double logProb_prev;
00349
00350 double delta;
00351
00352
00353 itsSeqInfo.beta.resize(observations.size());
00354 itsSeqInfo.gamma.resize(observations.size());
00355 itsSeqInfo.xi.resize(observations.size());
00356
00357 for(size_t t=0; t<observations.size(); t++){
00358 itsSeqInfo.beta[t].resize(itsStates.size());
00359 itsSeqInfo.gamma[t].resize(itsStates.size());
00360 itsSeqInfo.xi[t].resize(itsStates.size());
00361 for(size_t i=0; i<itsStates.size(); i++)
00362 itsSeqInfo.xi[t][i].resize(itsStates.size());
00363 }
00364
00365
00366
00367 double logProb = forward(observations);
00368 backward(observations);
00369 computeGamma(observations);
00370 computeXi(observations);
00371
00372 logProb_prev = logProb;
00373
00374 do{
00375
00376
00377
00378
00379
00380 for(size_t i = 0; i < itsStates.size(); i++) {
00381 denominatorA = 0.0;
00382 for(size_t t = 0; t < observations.size() - 1; t++)
00383 denominatorA += itsSeqInfo.gamma[t][i];
00384
00385 for(size_t j = 0; j < itsStates.size(); j++) {
00386 numeratorA = 0.0;
00387 for(size_t t = 0; t < observations.size() - 1; t++)
00388 numeratorA += itsSeqInfo.xi[t][i][j];
00389 itsStateTransitions.setVal(i, j, 0.0001 + 0.9999*numeratorA/denominatorA);
00390 }
00391
00392 denominatorB = denominatorA + itsSeqInfo.gamma[observations.size()-1][i];
00393 for(size_t k = 0; k < itsObservations.size(); k++) {
00394 numeratorB = 0.0;
00395 for(size_t t = 0; t < observations.size(); t++) {
00396 size_t observationIdx = itsObservationsMap[observations[t] ];
00397 if(observationIdx == k)
00398 numeratorB += itsSeqInfo.gamma[t][i];
00399 }
00400 itsStateEmissions.setVal(i,k, 0.0001 + 0.9999*numeratorB/denominatorB);
00401 }
00402 }
00403
00404
00405 logProb = forward(observations);
00406 backward(observations);
00407 computeGamma(observations);
00408 computeXi(observations);
00409
00410
00411 delta = logProb - logProb_prev;
00412 logProb_prev = logProb;
00413 loopCount++;
00414 } while(loopCount < numIterations);
00415
00416
00417 }
00418
00419
00420 template <class T>
00421 void HMM<T>::train(const std::vector< std::vector<T> > observations, size_t numIterations)
00422 {
00423
00424 if (observations.size() < 1)
00425 return;
00426
00427
00428 if (observations.size() == 1)
00429 {
00430 train(observations[0], numIterations);
00431 return;
00432 }
00433
00434 size_t loopCount = 0;
00435
00436
00437 itsSeqInfo.beta.resize(observations[0].size());
00438 itsSeqInfo.gamma.resize(observations[0].size());
00439 itsSeqInfo.xi.resize(observations[0].size());
00440
00441 for(size_t t=0; t<observations[0].size(); t++){
00442 itsSeqInfo.beta[t].resize(itsStates.size());
00443 itsSeqInfo.gamma[t].resize(itsStates.size());
00444 itsSeqInfo.xi[t].resize(itsStates.size());
00445 for(size_t i=0; i<itsStates.size(); i++)
00446 itsSeqInfo.xi[t][i].resize(itsStates.size());
00447 }
00448
00449 double numeratorA, numeratorA_partial, denominatorA, denominatorA_partial;
00450 double numeratorB, numeratorB_partial, denominatorB, denominatorB_partial;
00451
00452 double logProbSum_prev = 0.0;
00453 double logProbSum = 0.0;
00454 double delta;
00455
00456
00457
00458
00459 std::vector<SeqInfo> seqsInfo(observations.size());
00460
00461 for(size_t seq = 0; seq < observations.size(); seq++)
00462 {
00463 double logProb = forward(observations[seq]);
00464 backward(observations[seq]);
00465 computeGamma(observations[seq]);
00466 computeXi(observations[seq]);
00467 logProbSum_prev += logProb;
00468
00469 seqsInfo[seq].prob = logProb;
00470 seqsInfo[seq].gamma = itsSeqInfo.gamma;
00471 seqsInfo[seq].xi = itsSeqInfo.xi;
00472
00473 }
00474
00475
00476 do{
00477
00478
00479
00480
00481
00482 for(size_t i = 0; i < itsStates.size(); i++) {
00483 denominatorA = 0.0;
00484 denominatorB = 0.0;
00485
00486 for(size_t seq = 0; seq < observations.size(); seq++)
00487 {
00488 double logProb = seqsInfo[seq].prob;
00489 std::vector< std::vector<double> >& gamma = seqsInfo[seq].gamma;
00490 denominatorA_partial = 0.0;
00491 for(size_t t = 0; t < observations[seq].size() - 1; t++)
00492 denominatorA_partial += gamma[t][i];
00493 denominatorB_partial = denominatorA_partial + gamma[observations[seq].size()-1][i];
00494 denominatorA += denominatorA_partial/exp(logProb);
00495 denominatorB += denominatorB_partial/exp(logProb);
00496 }
00497
00498 for(size_t j = 0; j < itsStates.size(); j++) {
00499 numeratorA = 0.0;
00500 for(size_t seq = 0; seq < observations.size(); seq++)
00501 {
00502 std::vector< std::vector< std::vector<double> > >& xi = seqsInfo[seq].xi;
00503 numeratorA_partial = 0.0;
00504 for(size_t t = 0; t < observations[seq].size() - 1; t++)
00505 numeratorA_partial += xi[t][i][j];
00506 numeratorA += numeratorA_partial/exp(seqsInfo[seq].prob);
00507 }
00508 itsStateTransitions.setVal(i, j, 0.0001 + 0.9999*numeratorA/denominatorA);
00509 }
00510
00511 for(size_t k = 0; k < itsObservations.size(); k++) {
00512 numeratorB = 0.0;
00513 for(size_t seq = 0; seq < observations.size(); seq++)
00514 {
00515 std::vector< std::vector<double> >& gamma = seqsInfo[seq].gamma;
00516 numeratorB_partial = 0.0;
00517 for(size_t t = 0; t < observations[seq].size(); t++)
00518 {
00519 size_t observationIdx = itsObservationsMap[observations[seq][t] ];
00520 if(observationIdx == k)
00521 numeratorB_partial += gamma[t][i];
00522 }
00523 numeratorB += numeratorB_partial/exp(seqsInfo[seq].prob);
00524 }
00525 itsStateEmissions.setVal(i,k, 0.0001 + 0.9999*numeratorB/denominatorB);
00526 }
00527 }
00528
00529
00530 for(size_t seq = 0; seq < observations.size(); seq++)
00531 {
00532 double logProb = forward(observations[seq]);
00533 backward(observations[seq]);
00534 computeGamma(observations[seq]);
00535 computeXi(observations[seq]);
00536 logProbSum += logProb;
00537
00538 seqsInfo[seq].prob = logProb;
00539 seqsInfo[seq].gamma = itsSeqInfo.gamma;
00540 seqsInfo[seq].xi = itsSeqInfo.xi;
00541 }
00542
00543
00544 delta = logProbSum - logProbSum_prev;
00545 logProbSum_prev = logProbSum;
00546 loopCount++;
00547 }while(loopCount < numIterations);
00548
00549 }
00550
00551
00552 template <class T>
00553 void HMM<T>::computeGamma(const std::vector<T> observations)
00554 {
00555
00556 for(size_t t = 0; t < observations.size(); t++) {
00557 double denominator = 0.0;
00558 for(size_t j = 0; j < itsStates.size(); j++) {
00559 itsSeqInfo.gamma[t][j] = itsSeqInfo.alpha[t][j] * itsSeqInfo.beta[t][j];
00560 denominator += itsSeqInfo.gamma[t][j];
00561 }
00562
00563 for(size_t i = 0; i < itsStates.size(); i++)
00564 itsSeqInfo.gamma[t][i] = itsSeqInfo.gamma[t][i]/denominator;
00565 }
00566
00567 }
00568
00569
00570 template <class T>
00571 void HMM<T>::computeXi(const std::vector<T> observations)
00572 {
00573 for(size_t t = 0; t < observations.size()-1; t++) {
00574 double denominator = 0.0;
00575 for(size_t i = 0; i < itsStates.size(); i++)
00576 for(size_t j = 0; j < itsStates.size(); j++) {
00577 size_t observationIdx = itsObservationsMap[observations[t+1] ];
00578 itsSeqInfo.xi[t][i][j] = itsSeqInfo.alpha[t][i] * itsSeqInfo.beta[t+1][j] *
00579 itsStateTransitions.getVal(i,j) * itsStateEmissions.getVal(j, observationIdx);
00580 denominator += itsSeqInfo.xi[t][i][j];
00581 }
00582
00583 for(size_t i = 0; i < itsStates.size(); i++)
00584 for(size_t j = 0; j < itsStates.size(); j++)
00585 itsSeqInfo.xi[t][i][j] /= denominator;
00586 }
00587
00588 }
00589
00590
00591
00592 template class HMM<std::string>;
00593 template class HMM<uint>;
00594
00595
00596
00597
00598
00599
00600