00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "GMMClassifier.h"
00020 #include "NumericLib.h"
00021
00022
00023 using std::ostringstream;
00024 using std::vector;
00025
00026 using namespace Marsyas;
00027
00028 GMMClassifier::GMMClassifier(mrs_string name):MarSystem("GMMClassifier",name)
00029 {
00030 prev_mode_= "predict";
00031 classSize_ = -1;
00032 featSize_ = -1;
00033 nMixtures_ = -1;
00034 addControls();
00035 }
00036
00037
00038 GMMClassifier::GMMClassifier(const GMMClassifier& a):MarSystem(a)
00039 {
00040 ctrl_mode_ = getctrl("mrs_string/mode");
00041 ctrl_nClasses_ = getctrl("mrs_natural/nClasses");
00042 ctrl_nMixtures_ = getctrl("mrs_natural/nMixtures");
00043 ctrl_iterations_ = getctrl("mrs_natural/iterations");
00044 ctrl_kiterations_ = getctrl("mrs_natural/kiterations");
00045 ctrl_eiterations_ = getctrl("mrs_natural/eiterations");
00046
00047 prev_mode_ = "predict";
00048 classSize_ = -1;
00049 featSize_ = -1;
00050 nMixtures_ = -1;
00051 }
00052
00053
00054 GMMClassifier::~GMMClassifier()
00055 {
00056 }
00057
00058
00059 MarSystem*
00060 GMMClassifier::clone() const
00061 {
00062 return new GMMClassifier(*this);
00063 }
00064
00065 void
00066 GMMClassifier::addControls()
00067 {
00068 addctrl("mrs_string/mode", "train", ctrl_mode_);
00069 ctrl_mode_->setState(true);
00070
00071 addctrl("mrs_natural/nClasses", -1, ctrl_nClasses_);
00072 ctrl_nClasses_->setState(true);
00073
00074 addctrl("mrs_natural/nMixtures", -1, ctrl_nMixtures_);
00075 ctrl_nMixtures_->setState(true);
00076
00077 addctrl("mrs_natural/iterations", 200, ctrl_iterations_);
00078 addctrl("mrs_natural/kiterations", 100, ctrl_kiterations_);
00079 addctrl("mrs_natural/eiterations", 20, ctrl_eiterations_);
00080 }
00081
00082 void
00083 GMMClassifier::initialize()
00084 {
00085 mrs_natural trainSize = trainMatrix_.getCols();
00086
00087 realvec temp(featSize_);
00088 realvec randstep(featSize_);
00089
00090 mrs_natural count;
00091 mrs_natural seedSize = 5;
00092 mrs_real rind;
00093 rind = ((mrs_real)rand() / (mrs_real)(RAND_MAX))*trainSize;
00094
00095 for (mrs_natural cl=0; cl < classSize_; cl++)
00096 {
00097 for (mrs_natural k=0; k < nMixtures_; k++)
00098 {
00100
00102 temp.setval(0.0);
00103 count = 0;
00104 for (mrs_natural c=0; c < seedSize; ++c)
00105 {
00106
00107 rind = ((mrs_real)rand() / (mrs_real)(RAND_MAX))*trainSize;
00108 while (trainMatrix_(labelRow_,(mrs_natural)rind)!= cl)
00109 rind = ((mrs_real)rand() / (mrs_real)(RAND_MAX))*trainSize;
00110
00111
00112 for(mrs_natural f=0; f < featSize_; ++f)
00113 temp(f) += trainMatrix_(f, (mrs_natural)rind);
00114 }
00115 temp /= seedSize;
00116
00117 for(mrs_natural f=0; f < featSize_; ++f)
00118 means_[cl](f, k) = temp(f);
00119
00121
00123
00124 mrs_natural classExamples = 0;
00125 vector<mrs_natural> classCols;
00126 for(mrs_natural c=0; c < trainSize; ++c)
00127 if(trainMatrix_(labelRow_, c) == cl)
00128 {
00129 classExamples++;
00130 classCols.push_back(c);
00131 }
00132
00133
00134
00135 realvec classFeatMatrix(featSize_, classExamples);
00136 for(mrs_natural c=0; c < classExamples; ++c)
00137 {
00138 for(mrs_natural f=0; f < featSize_; ++f)
00139 classFeatMatrix(f, c) = trainMatrix_(f, classCols[c]);
00140 }
00141
00142
00143 classFeatMatrix.varObs(temp);
00144
00145
00146 for(mrs_natural f=0; f < featSize_; ++f)
00147 vars_[cl](f, k) = temp(f);
00148 }
00149
00151
00153 for (mrs_natural k=0; k < nMixtures_; k++)
00154 for (mrs_natural f=0; f < featSize_; f++)
00155 {
00156 if (vars_[cl](f,k) != 0.0)
00157 covars_[cl](f,k) = 1.0 / vars_[cl](f,k);
00158 else
00159 covars_[cl](f,k) = 0.0;
00160 }
00161
00163
00165 weights_[cl].setval(1.0 / nMixtures_);
00166 }
00167
00169
00171 mrs_real dist = 0.0;
00172 mrs_natural min_k = 0;
00173
00174 likelihoods_.create(classSize_, nMixtures_);
00175
00176 for (mrs_natural i=0; i < kiterations_; ++i)
00177 {
00178 likelihoods_.setval(0.0);
00179
00180
00181 for (mrs_natural cl = 0; cl < classSize_; cl++)
00182 for (mrs_natural k=0; k < nMixtures_; k++)
00183 for (mrs_natural f=0; f < featSize_; f++)
00184 {
00185 omeans_[cl](f,k) = means_[cl](f,k);
00186 }
00187
00188
00189 for (mrs_natural cl=0; cl < classSize_; cl++)
00190 {
00191 means_[cl].setval(0.0);
00192 }
00193
00194
00195 for (mrs_natural c=0; c < trainSize; ++c)
00196 {
00197 mrs_real min = 100000000;
00198
00199
00200 mrs_natural cl = (mrs_natural)trainMatrix_(labelRow_, c);
00201 trainMatrix_.getCol(c, temp);
00202
00203
00204
00205 for (mrs_natural k=0; k < nMixtures_; k++)
00206 {
00207
00208 realvec omean;
00209 omeans_[cl].getCol(k, omean);
00210 realvec covar;
00211 covars_[cl].getCol(k, covar);
00212
00213
00214 dist = NumericLib::mahalanobisDistance(temp, omean, covar);
00215
00216 if (dist < min)
00217 {
00218 min = dist;
00219 min_k = k;
00220 }
00221 }
00222
00223
00224 for (mrs_natural f=0; f < featSize_; f++)
00225 {
00226 means_[cl](f, min_k) += temp(f);
00227 }
00228
00229
00230 likelihoods_(cl,min_k)++;
00231 }
00232
00233
00234 for (mrs_natural cl=0; cl < classSize_; cl++)
00235 {
00236 for (mrs_natural k=0; k < nMixtures_; k++)
00237 for (mrs_natural f=0; f < featSize_; f++)
00238 {
00239 if (likelihoods_(cl,k) != 0.0)
00240 means_[cl](f,k) /= likelihoods_(cl, k);
00241 }
00242
00243
00244 }
00245 }
00246
00247 classSizes_.create(classSize_);
00248 sum_.create(classSize_);
00249 likelihoods_.create(classSize_, nMixtures_);
00250 accumVec_.create(featSize_);
00251 temp_.create(featSize_);
00252 sprobs_.create(classSize_,nMixtures_);
00253
00254 probs_.reserve(classSize_);
00255 ssprobs_.reserve(classSize_);
00256 for (mrs_natural cl=0; cl < classSize_; ++cl)
00257 {
00258 probs_.push_back(realvec(trainSize, nMixtures_));
00259 ssprobs_.push_back(realvec(featSize_, nMixtures_));
00260 }
00261 }
00262
00263 mrs_real
00264 GMMClassifier::gaussian(mrs_natural cl, mrs_natural k, realvec& vec)
00265 {
00266 mrs_real res;
00267 mrs_real oldres;
00268 mrs_real temp;
00269 mrs_real det = 1.0;
00270
00271 for (mrs_natural f=0; f < featSize_; f++)
00272 det *= (vars_[cl])(f,k);
00273
00274 res = 1 / (factor_ * det);
00275 oldres = res;
00276
00277 realvec mean;
00278 means_[cl].getCol(k, mean);
00279 realvec covar;
00280 covars_[cl].getCol(k, covar);
00281 temp = NumericLib::mahalanobisDistance(vec, mean, covar);
00282
00283 res *= exp(-temp*0.5);
00284
00285 return res;
00286 }
00287
00288 void
00289 GMMClassifier::doEM()
00290 {
00291 realvec featVec;
00292 mrs_natural cl;
00293
00294
00295 classSizes_.setval(0.0);
00296 sum_.setval(0.0);
00297 sprobs_.setval(0.0);
00298 accumVec_.setval(0.0);
00299 for (cl=0; cl < classSize_; cl++)
00300 ssprobs_[cl].setval(0.0);
00301
00302 mrs_natural trainSize = trainMatrix_.getCols();
00303 mrs_real prob;
00304 mrs_real sum;
00305
00306
00307 for (mrs_natural c=0; c < trainSize; ++c)
00308 {
00309
00310 cl = (mrs_natural)trainMatrix_(labelRow_, c);
00311 classSizes_(cl)++;
00312 sum = 0.0;
00313
00314
00315 trainMatrix_.getCol(c, featVec);
00316
00317
00318
00319 for (mrs_natural k=0; k < nMixtures_; k++)
00320 {
00321
00322 likelihoods_(cl,k) = gaussian(cl,k, featVec);
00323
00324 sum += likelihoods_(cl,k);
00325 }
00326
00327
00328 for (mrs_natural k=0; k < nMixtures_; k++)
00329 {
00330
00331
00332 if (sum != 0.0)
00333 prob = likelihoods_(cl,k) / sum;
00334 else
00335 {
00336 prob = 0.0000000001;
00337 }
00338
00339 probs_[cl](c,k) = prob;
00340
00341
00342 sprobs_(cl,k) += prob;
00343
00344
00345 temp_ = featVec;
00346 temp_ *= prob;
00347
00348
00349 ssprobs_[cl].getCol(k, accumVec_);
00350 accumVec_ += temp_;
00351
00352
00353 for(mrs_natural f=0; f < featSize_; ++f)
00354 ssprobs_[cl](f, k) = accumVec_(f);
00355 }
00356 }
00357
00358 for (cl = 0; cl < classSize_; cl++)
00359 for (mrs_natural k=0; k < nMixtures_; k++)
00360 {
00361 weights_[cl](k) = sprobs_(cl,k) / classSizes_(cl);
00362 ssprobs_[cl].getCol(k, temp_);
00363 if (sprobs_(cl,k) != 0.0)
00364 {
00365
00366 temp_ /= sprobs_(cl,k);
00367
00368 for(mrs_natural f=0; f < featSize_; ++f)
00369 means_[cl](f,k) = temp_(f);
00370 }
00371 }
00372
00373 for (cl=0; cl < classSize_; cl++)
00374 ssprobs_[cl].setval(0.0);
00375
00376
00377
00378 for (mrs_natural t=0; t < trainSize; t++)
00379 {
00380
00381 cl = (mrs_natural)trainMatrix_(labelRow_, t);
00382
00383
00384 trainMatrix_.getCol(t, featVec);
00385
00386
00387
00388 for (mrs_natural k=0; k < nMixtures_; k++)
00389 {
00390 prob = (probs_[cl])(t,k);
00391 temp_ = featVec;
00392 realvec means;
00393 means_[cl].getCol(k, means);
00394 temp_ -= means;
00395 temp_.sqr();
00396 temp_ *= prob;
00397
00398 ssprobs_[cl].getCol(k, accumVec_);
00399 accumVec_ += temp_;
00400
00401
00402 for(mrs_natural f=0; f < featSize_; ++f)
00403 ssprobs_[cl](f, k) = accumVec_(f);
00404 }
00405 }
00406
00407 for (cl = 0; cl < classSize_; cl++)
00408 {
00409 for (mrs_natural k=0; k < nMixtures_; k++)
00410 {
00411 ssprobs_[cl].getCol(k, temp_);
00412 temp_ *= (1.0 / (sprobs_(cl,k)));
00413
00414
00415 for(mrs_natural f=0; f < featSize_; ++f)
00416 vars_[cl](f, k) = temp_(f);
00417 }
00418
00419 for (mrs_natural k=0; k < nMixtures_; k++)
00420 for (mrs_natural f=0; f < featSize_; f++)
00421 {
00422 if (vars_[cl](f, k) > 0.0)
00423 covars_[cl](f, k) = 1.0 / (vars_[cl](f, k));
00424 else
00425 {
00426 covars_[cl](f, k) = 10000000.0;
00427 vars_[cl](f, k) = 0.0000001;
00428 }
00429 }
00430 }
00431 }
00432
00433 void
00434 GMMClassifier::myUpdate(MarControlPtr sender)
00435 {
00436 (void) sender;
00437 MRSDIAG("GMMClassifier.cpp - GMMClassifier:myUpdate");
00438
00439 setctrl("mrs_natural/onSamples", getctrl("mrs_natural/inSamples"));
00440 setctrl("mrs_natural/onObservations", (mrs_natural)2);
00441 setctrl("mrs_real/osrate", getctrl("mrs_real/israte"));
00442 setctrl("mrs_string/onObsNames", "GT_label, Predicted_label,");
00443
00444 mrs_string mode = getctrl("mrs_string/mode")->to<mrs_string>();
00445
00446 mrs_natural classSize = getctrl("mrs_natural/nClasses")->to<mrs_natural>();
00447 mrs_natural nMixtures = getctrl("mrs_natural/nMixtures")->to<mrs_natural>();
00448 mrs_natural featSize = inObservations_-1;
00449
00450
00451
00452
00453 if((classSize != classSize_) || (nMixtures != nMixtures_) ||
00454 (featSize != featSize_))
00455 {
00456 classSize_ = classSize;
00457 nMixtures_ = nMixtures;
00458 featSize_ = featSize;
00459 labelRow_ = featSize_;
00460
00461 factor_ = pow(sqrt(TWOPI), (mrs_real)featSize_);
00462
00463
00464 means_.clear();
00465 omeans_.clear();
00466 vars_.clear();
00467 covars_.clear();
00468 weights_.clear();
00469 means_.reserve(classSize_);
00470 omeans_.reserve(classSize_);
00471 vars_.reserve(classSize_);
00472 covars_.reserve(classSize_);
00473 weights_.reserve(classSize_);
00474
00475
00476 for (mrs_natural cl=0; cl < classSize_; cl++)
00477 {
00478 realvec cmeans(featSize_, nMixtures_);
00479 realvec ocmeans(featSize_, nMixtures_);
00480 realvec cvars(featSize_, nMixtures_);
00481 realvec ccovars(featSize_, nMixtures_);
00482 realvec cweights(nMixtures_);
00483
00484
00485 means_.push_back(cmeans);
00486 omeans_.push_back(ocmeans);
00487 vars_.push_back(cvars);
00488 covars_.push_back(ccovars);
00489 weights_.push_back(cweights);
00490 }
00491 }
00492
00493
00494 if ((prev_mode_ == "train") && (mode == "predict"))
00495 {
00496 initialize();
00497
00498 for (mrs_natural i=0; i < iterations_ ; ++i)
00499 {
00500 doEM();
00501 }
00502
00503 prev_mode_ = mode;
00504 }
00505 }
00506
00507 void
00508 GMMClassifier::myProcess(realvec& in, realvec& out)
00509 {
00510 mrs_string mode = ctrl_mode_->to<mrs_string>();
00511
00512
00513 if ((prev_mode_ == "predict") && (mode == "train"))
00514 {
00515
00516
00517 trainMatrix_ = in;
00518 }
00519
00520 if (mode == "train")
00521 {
00522 MRSASSERT(trainMatrix_.getRows() == inObservations_);
00523
00524
00525 mrs_natural storedFeatVecs = trainMatrix_.getCols();
00526 trainMatrix_.stretch(inObservations_, storedFeatVecs + inSamples_);
00527
00528
00529 for(mrs_natural c=0; c < inSamples_; ++c)
00530 for(mrs_natural r = 0; r < inObservations_; ++r)
00531 trainMatrix_(r, c+storedFeatVecs) = in(r,c);
00532 }
00533
00534 if (mode == "predict")
00535 {
00536 mrs_real maxProb = 0.0;
00537 mrs_natural maxClass = 0;
00538 mrs_real prob;
00539 mrs_real dist;
00540 realvec vec;
00541 realvec means;
00542 realvec covars;
00543
00544 MRSASSERT(trainMatrix_.getRows() == inObservations_);
00545
00546 for(mrs_natural t=0; t < inSamples_; ++t)
00547 {
00548
00549 in.getCol(t, vec);
00550
00551 for (mrs_natural cl=0; cl < classSize_; cl++)
00552 {
00553 for (mrs_natural k=0; k < nMixtures_; k++)
00554 {
00555 means_[cl].getCol(k, means);
00556 covars_[cl].getCol(k, covars);
00557 dist = NumericLib::mahalanobisDistance(vec, means, covars);
00558 likelihoods_(cl,k) = weights_[cl](k) / dist;
00559 }
00560 prob = 0.0;
00561 for (mrs_natural k=0; k < nMixtures_; k++)
00562 {
00563 prob += likelihoods_(cl,k);
00564 }
00565 if (prob > maxProb)
00566 {
00567 maxProb = prob;
00568 maxClass = cl;
00569 }
00570 }
00571 out(0,t) = in(labelRow_, t);
00572 out(1,t) = (mrs_real)maxClass;
00573 }
00574 }
00575
00576 prev_mode_ = mode;
00577 }