00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "common.h"
00020 #include "PeakClusterSelect.h"
00021
00022 using std::min;
00023 using std::max;
00024 using std::ostringstream;
00025 using namespace Marsyas;
00026
00027 static std::ofstream outTextFile;
00028
00029
00030 #ifdef DBG_FILE_OUT
00031 static const std::string outFileName("d:/temp/peakcluster.txt");
00032 #endif
00033
00034
00035 PeakClusterSelect::PeakClusterSelect(mrs_string name):MarSystem("PeakClusterSelect", name)
00036 {
00037 addControls();
00038 }
00039
00040 PeakClusterSelect::PeakClusterSelect(const PeakClusterSelect& a) : MarSystem(a)
00041 {
00042 ctrl_numClustersToKeep_ = getctrl("mrs_natural/numClustersToKeep");
00043 }
00044
00045 PeakClusterSelect::~PeakClusterSelect()
00046 {
00047 #ifdef DBG_FILE_OUT
00048 outTextFile.close ();
00049 #endif
00050 }
00051
00052 MarSystem*
00053 PeakClusterSelect::clone() const
00054 {
00055 return new PeakClusterSelect(*this);
00056 }
00057
00058 void
00059 PeakClusterSelect::addControls()
00060 {
00061 addctrl("mrs_natural/numClustersToKeep", 1, ctrl_numClustersToKeep_);
00062 ctrl_numClustersToKeep_->setState(false);
00063 }
00064
00065 void
00066 PeakClusterSelect::myUpdate(MarControlPtr sender)
00067 {
00068 (void) sender;
00069 MRSDIAG("PeakClusterSelect.cpp - PeakClusterSelect:myUpdate");
00070
00071 ctrl_onObservations_->setValue(1, NOUPDATE);
00072 ctrl_onSamples_->setValue(ctrl_inSamples_, NOUPDATE);
00073 ctrl_osrate_->setValue(ctrl_israte_, NOUPDATE);
00074 ctrl_onObsNames_->setValue("peakLabels", NOUPDATE);
00075
00076 #ifdef DBG_FILE_OUT
00077 if (!outTextFile.good () || ! outTextFile.is_open ())
00078 outTextFile.open(outFileName.c_str ());
00079 #endif
00080 }
00081
00082 void
00083 PeakClusterSelect::sort(realvec& rv, mrs_natural dimension, mrs_natural left, mrs_natural right, mrs_bool sortColumns)
00084 {
00085 if( left < right )
00086 {
00087 int part = partition(rv, dimension, left, right, sortColumns);
00088 sort(rv, dimension, left, part-1, sortColumns);
00089 sort(rv, dimension, part+1, right, sortColumns);
00090 }
00091 }
00092
00093 int
00094 PeakClusterSelect::partition(realvec& rv, mrs_natural dimension, mrs_natural left, mrs_natural right, mrs_bool sortColumns)
00095 {
00096
00097 int pivot_i = rand()%(right-left+1) + left;
00098
00099 swap(rv, pivot_i, right, sortColumns);
00100
00101 mrs_real pivot_val;
00102 if( sortColumns )
00103 pivot_val = rv(dimension,right);
00104 else
00105 pivot_val = rv(right,dimension);
00106
00107 int i = left-1;
00108
00109 if( sortColumns )
00110 {
00111 for( int j=left ; j<right ; j++ )
00112 {
00113 if( rv(dimension,j) <= pivot_val )
00114 {
00115 ++i;
00116 swap(rv, i, j, sortColumns);
00117 }
00118 }
00119
00120 swap(rv, i+1, right, sortColumns);
00121 }
00122 else
00123 {
00124 for( int j=left ; j<right ; j++ )
00125 {
00126 if( rv(j,dimension) <= pivot_val )
00127 {
00128 ++i;
00129 swap(rv, i, j, sortColumns);
00130 }
00131 }
00132
00133 swap(rv, i+1, right, sortColumns);
00134 }
00135
00136 return i+1;
00137 }
00138
00139 void
00140 PeakClusterSelect::swap(realvec& rv, mrs_natural sample1, mrs_natural sample2, mrs_bool swapColumns)
00141 {
00142 if( swapColumns )
00143 {
00144 int rows = rv.getRows();
00145 mrs_real tmp;
00146
00147 for( int i=0 ; i<rows ; ++i )
00148 {
00149 tmp = rv(i, sample1);
00150 rv(i,sample1) = rv(i,sample2);
00151 rv(i,sample2) = tmp;
00152 }
00153 }
00154 else
00155 {
00156 int cols = rv.getCols();
00157 mrs_real tmp;
00158
00159 for( int i=0 ; i<cols ; ++i )
00160 {
00161 tmp = rv(sample1,i);
00162 rv(sample1,i) = rv(sample2,i);
00163 rv(sample2,i) = tmp;
00164 }
00165 }
00166 }
00167
00168 void
00169 PeakClusterSelect::myProcess(realvec& in, realvec& out)
00170 {
00171 mrs_natural t;
00172 mrs_natural numClustersToKeep = ctrl_numClustersToKeep_->to<mrs_natural>();
00173 mrs_natural curNumClusters=-1, i, j, k, curClusterLabel;
00174 mrs_natural numPeaks = ctrl_inSamples_->to<mrs_natural>();
00175
00176
00177 for (t = 0; t < inSamples_; t++)
00178 if( in(0,t) > curNumClusters )
00179 curNumClusters = (mrs_natural)in(0,t);
00180 curNumClusters += 1;
00181
00182 mrs_realvec intraClusterSimilarities(3, curNumClusters);
00183 mrs_realvec clusterSimilarity (curNumClusters, curNumClusters);
00184 mrs_realvec clusterNorm (curNumClusters, curNumClusters);
00185 mrs_realvec keepMe(curNumClusters);
00186 mrs_real intraSimKeepThresh = .5;
00187
00188 clusterSimilarity.setval (0.);
00189 clusterNorm.setval (0.);
00190 keepMe.setval (1.);
00191
00192 for( i=0 ; i<curNumClusters ; ++i )
00193 {
00194
00195
00196
00197 intraClusterSimilarities(0,i) = i;
00198
00199 intraClusterSimilarities(1,i) = 0;
00200
00201 intraClusterSimilarities(2,i) = 0;
00202 }
00203
00204
00205
00206
00207 for( i=0 ; i<numPeaks ; ++i )
00208 {
00209 mrs_realvec numAcc(curNumClusters);
00210 mrs_realvec similarity(curNumClusters);
00211
00212 numAcc.setval (0.);
00213 similarity.setval (0.);
00214 for( j=0 ; j<numPeaks ; j++ )
00215 {
00216 if (i==j)
00217 continue;
00218 similarity((mrs_natural)(in(0,j)+.1)) += in (i+1, j);
00219 clusterNorm ((mrs_natural)(in(0,i)+.1),(mrs_natural)(in(0,j)+.1)) += 1;
00220 }
00221 for (k = 0; k < curNumClusters; k++)
00222 clusterSimilarity ((mrs_natural)(in(0,i)+.1), k) += similarity(k);
00223 }
00224
00225 for (i = 0; i < curNumClusters; i++)
00226 for (j = 0; j < curNumClusters; j++)
00227 clusterSimilarity(i,j) /= (clusterNorm(i,j)>0)? clusterNorm(i,j) : 1.;
00228
00229
00230 mrs_realvec silhouetteCoeffs (curNumClusters);
00231 silhouetteCoeffs.setval(0.);
00232
00233 for (k = 0; k < curNumClusters; k++)
00234 {
00235 mrs_real selfSim, maximum, minSim = 0;
00236 selfSim = clusterSimilarity(k,k);
00237 for (i = 0; i < curNumClusters; i++)
00238 {
00239 if (i == k)
00240 continue;
00241
00242 minSim += clusterSimilarity(k,i);
00243 }
00244 minSim /= (curNumClusters-1);
00245
00246 if ((maximum = max(selfSim, minSim)) != 0)
00247 silhouetteCoeffs(k) = (selfSim - minSim)/ maximum;
00248 }
00249
00250
00251 for (k = 0; k < curNumClusters; k++)
00252 intraClusterSimilarities(2,k) = clusterSimilarity(k,k);
00253
00254 #ifdef MARSYAS_MATLAB
00255 #ifdef MTLB_DBG_LOG
00256
00257
00258
00259
00260
00261 #endif
00262 #endif
00263
00264
00265
00266
00267 sort( intraClusterSimilarities, 2, 0, curNumClusters-1 );
00268
00269
00270 #ifdef DBG_FILE_OUT
00271 if (outTextFile.good ())
00272 {
00273 if (curNumClusters < 6)
00274 {
00275 for (k = 0; k < 6-curNumClusters; k++)
00276 outTextFile << 0 <<"\t";
00277 }
00278 for (k = 0; k < curNumClusters; k++)
00279 outTextFile << silhouetteCoeffs(k) <<"\t"; ;
00280
00281
00282 outTextFile << std::endl;
00283 }
00284 #endif
00285
00286
00287 if (numClustersToKeep == 0)
00288 {
00289 const mrs_real intraThreshBounds[2] = {.3,.6};
00290 const mrs_real silThreshBound = 1./curNumClusters;
00291 intraSimKeepThresh = max(intraThreshBounds[0],min(intraThreshBounds[1],.5*(intraClusterSimilarities(2,0) + intraClusterSimilarities(2, curNumClusters-1))));
00292 for (k = 0; k < curNumClusters; k++)
00293 {
00294
00295 if (intraClusterSimilarities(2,k) < intraSimKeepThresh)
00296 keepMe(k) = 0;
00297
00298
00299 if (silhouetteCoeffs((mrs_natural)(intraClusterSimilarities(0,k)+.1)) < silThreshBound)
00300 keepMe(k) = 0;
00301 }
00302 numClustersToKeep = (mrs_natural)(keepMe.sum () +.1);
00303
00304 if (numClustersToKeep == curNumClusters)
00305 keepMe(0) = 0;
00306
00307 }
00308 else
00309 {
00310 for (k = 0; k < (curNumClusters - numClustersToKeep); k++)
00311 keepMe(k) = 0;
00312 }
00313
00314 for (t = 0; t < inSamples_; t++)
00315 {
00316 curClusterLabel = (mrs_natural)in(0,t);
00317 out(0,t) = curClusterLabel;
00318
00319 for( k=0 ; k < curNumClusters; k++)
00320 {
00321 if( curClusterLabel == intraClusterSimilarities(0,k) )
00322 {
00323 if (keepMe(k) < .5)
00324 out(0,t) = (curClusterLabel)? -curClusterLabel : -1;
00325 break;
00326 }
00327 }
00328 }
00329 }