00001 #include "WekaData.h"
00002
00003 using namespace std;
00004 using namespace Marsyas;
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 WekaData::WekaData():cols_(0),rows_(0), isFold_(false)
00015 {
00016 }
00017
00018 WekaData::~WekaData()
00019 {
00020
00021
00022
00023
00024
00025
00026 if (!isFold_)
00027 Clear();
00028 }
00029
00030 void
00031 WekaData::setFold(bool isFold)
00032 {
00033 isFold_ = isFold;
00034 }
00035
00036
00037
00038 void WekaData::Create(mrs_natural cols)
00039 {
00040 MRSASSERT(cols>=0);
00041 this->Clear();
00042 cols_ = cols;
00043 rows_ = 0;
00044 }
00045
00046
00047
00048 void WekaData::Clear()
00049 {
00050 if (rows_ > 0) {
00051 vector<vector<mrs_real>*>::iterator iter = this->begin();
00052 while (iter != this->end()) {
00053 delete (*iter);
00054 this->erase(iter);
00055 }
00056 }
00057 this->clear();
00058 filenames_.clear();
00059
00060 }
00061
00062
00063 void
00064 WekaData::NormMaxMinRow(realvec& in)
00065 {
00066 int ii;
00067 for(ii=0; ii<(int)in.getSize()-1; ++ii)
00068 {
00069 in(ii) = (in(ii) - minimums_(ii)) / (maximums_(ii) - minimums_(ii));
00070 }
00071 }
00072
00073 void
00074 WekaData::NormMaxMin()
00075 {
00076 minimums_.create(cols_-1);
00077 maximums_.create(cols_-1);
00078 maximums_.setval(DBL_MIN);
00079 minimums_.setval(DBL_MAX);
00080
00081
00082 for(vector<vector<mrs_real>*>::const_iterator citer = this->begin(); citer!=this->end(); citer++)
00083 {
00084 const vector<mrs_real> *row = (*citer);
00085 int ii;
00086 for(ii=0; ii<(int)row->size()-1; ++ii)
00087 {
00088 if (row->at(ii) > maximums_(ii))
00089 maximums_(ii) = row->at(ii);
00090 if (row->at(ii) < minimums_(ii))
00091 minimums_(ii) = row->at(ii);
00092 }
00093 }
00094
00095
00096
00097 for(vector<vector<mrs_real>*>::const_iterator citer = this->begin(); citer!=this->end(); citer++)
00098 {
00099 vector<mrs_real> *row = (*citer);
00100 int ii;
00101 for(ii=0; ii<(int)row->size()-1; ++ii)
00102 {
00103
00104 if (maximums_(ii) - minimums_(ii) == 0)
00105 row->at(ii) = 0;
00106 else
00107 row->at(ii) = ((row->at(ii) - minimums_(ii)) / (maximums_(ii) - minimums_(ii)));
00108 }
00109 }
00110
00111
00112
00113
00114 }
00115
00116 mrs_realvec WekaData::GetMinimums() const
00117 {
00118 return minimums_;
00119 }
00120
00121 mrs_realvec WekaData::GetMaximums() const
00122 {
00123 return maximums_;
00124 }
00125
00126
00127
00128
00129 void WekaData::Shuffle()
00130 {
00131 srand(0);
00132
00133 mrs_natural size = this->size()-1;
00134 for (mrs_natural ii=0; ii<size; ++ii)
00135 {
00136 mrs_natural rind = (mrs_natural)(((mrs_real)rand() / (mrs_real)(RAND_MAX))*size);
00137
00138 swapRows(ii, rind);
00139 }
00140 }
00141
00142
00143
00144 void WekaData::swapRows(mrs_natural l, mrs_natural r)
00145 {
00146 vector<mrs_real> *temp = this->at(l);
00147 this->at(l) = this->at(r);
00148 this->at(r) = temp;
00149 }
00150
00151 mrs_natural WekaData::partition(mrs_natural attIndex, mrs_natural l, mrs_natural r)
00152 {
00153 mrs_real pivot = this->at((l+r)/2)->at(attIndex);
00154 while (l < r)
00155 {
00156 while ((this->at(l)->at(attIndex) < pivot) && (l < r))
00157 {
00158 l++;
00159 }
00160
00161 while ((this->at(r)->at(attIndex) > pivot) && (l < r))
00162 {
00163 r--;
00164 }
00165
00166 if (l < r)
00167 {
00168 swapRows(l, r);
00169 l++;
00170 r--;
00171 }
00172 }
00173 if ((l == r) && (this->at(r)->at(attIndex) > pivot))
00174 {
00175 r--;
00176 }
00177
00178 return r;
00179 }
00180
00189
00190
00191
00192 void WekaData::quickSort(mrs_natural attIndex, mrs_natural left, mrs_natural right)
00193 {
00194 if (left < right)
00195 {
00196 int middle = partition(attIndex, left, right);
00197 quickSort(attIndex, left, middle);
00198 quickSort(attIndex, middle + 1, right);
00199 }
00200 }
00201
00202
00203
00204
00205 void WekaData::Sort(mrs_natural attr)
00206 {
00207 MRSASSERT(attr>=0&&attr<cols_);
00208 quickSort(attr, 0, this->size()-1);
00209 }
00210
00211
00212 void WekaData::Append(const realvec& in)
00213 {
00214 MRSASSERT(in.getRows()==cols_);
00215
00216
00217 if (in(in.getRows()-1, 0) >=0)
00218 {
00219 data_ = new vector<mrs_real>(cols_);
00220 for(mrs_natural ii=0; ii<in.getRows(); ++ii)
00221 {
00222 data_->at(ii) = in(ii, 0);
00223 }
00224 Append(data_);
00225 }
00226
00227 }
00228
00229
00230
00231
00232
00233 void WekaData::Append(vector<mrs_real> *data)
00234 {
00235 MRSASSERT(data!=NULL && (int)data->size()==cols_);
00236 rows_++;
00237
00238 this->push_back(data);
00239 }
00240
00241
00242
00243 void WekaData::AppendFilename(mrs_string fname)
00244 {
00245 filenames_.push_back(fname);
00246 }
00247
00248 mrs_string WekaData::GetFilename(mrs_natural row) const
00249 {
00250 return (mrs_string)filenames_.at(row);
00251 }
00252
00253
00254
00255 mrs_natural WekaData::GetClass(mrs_natural row) const
00256 {
00257 return (mrs_natural)this->at(row)->at(cols_-1);
00258 }
00259
00260
00261 void WekaData::Dump(const mrs_string& filename, const vector<mrs_string>& classNames) const
00262 {
00263 char buffer[32];
00264
00265 ofstream *mis = new ofstream;
00266
00267 mis->open(filename.c_str(), ios_base::out | ios_base::trunc );
00268 MRSASSERT( mis->is_open() );
00269
00270 for(vector<vector<mrs_real>*>::const_iterator citer = this->begin(); citer!=this->end(); citer++)
00271 {
00272 bool first = true;
00273 const vector<mrs_real> *row = (*citer);
00274 int ii;
00275 for(ii=0; ii<(int)row->size()-1; ++ii)
00276 {
00277 if(!first)
00278 mis->write(", ", 2);
00279 first = false;
00280
00281 sprintf(buffer, "%09.4f", row->at(ii));
00282 mis->write(buffer, strlen(buffer));
00283 }
00284 mis->write(", ", 2);
00285 mrs_natural classIndex = (mrs_natural)row->at(ii);
00286 mis->write(classNames[classIndex].c_str(), strlen(classNames[classIndex].c_str()));
00287 mis->write("\n", 1);
00288 }
00289
00290 mis->close();
00291 delete mis;
00292 }