Marsyas  0.5.0-beta1
/Users/jleben/code/marsyas/src/marsyas/Collection.cpp
Go to the documentation of this file.
00001 /*
00002 ** Copyright (C) 1998-2010 George Tzanetakis <gtzan@cs.uvic.ca>
00003 **
00004 ** This program is free software; you can redistribute it and/or modify
00005 ** it under the terms of the GNU General Public License as published by
00006 ** the Free Software Foundation; either version 2 of the License, or
00007 ** (at your option) any later version.
00008 **
00009 ** This program is distributed in the hope that it will be useful,
00010 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
00011 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012 ** GNU General Public License for more details.
00013 **
00014 ** You should have received a copy of the GNU General Public License
00015 ** along with this program; if not, write to the Free Software
00016 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00017 */
00018 
00019 #include <marsyas/common_source.h>
00020 #include <marsyas/Collection.h>
00021 #include <algorithm>
00022 #include <iterator>
00023 #include <time.h>  // for srand(time(NULL))
00024 
00025 
00026 
00027 using std::ostringstream;
00028 using std::vector;
00029 using std::ifstream;
00030 using std::ofstream;
00031 using std::ostream_iterator;
00032 using std::endl;
00033 using std::ostream;
00034 using std::istream;
00035 using std::string;
00036 
00037 // if the directory doesn't exist, we need to make it a "".
00038 static std::string marsyas_datadir_ =
00039   std::getenv("MARSYAS_DATADIR") == NULL ?
00040   "" : std::getenv("MARSYAS_DATADIR");
00041 
00042 namespace Marsyas
00043 {
00044 
00045 // Utility function. Should move this somewhere publicly accessible for code re-use.
00046 mrs_string join(const vector<mrs_string>& v, const mrs_string delim)
00047 {
00048   ostringstream os;
00049   copy(v.begin(), v.end(), ostream_iterator<mrs_string>(os, delim.c_str()));
00050 
00051   return os.str();
00052 }
00053 
00054 Collection::Collection()
00055 {
00056   collectionList_.reserve(1024);
00057   hasLabels_ = false;
00058   store_labels_ = true;
00059   // initialize random number generation.
00060   srand( (unsigned int) time( NULL) );
00061 }
00062 
00063 Collection::~Collection()
00064 {
00065 }
00066 
00067 void
00068 Collection::setName(mrs_string name)
00069 {
00070   name_ = name;
00071 }
00072 
00073 void
00074 Collection::store_labels(mrs_bool store)
00075 {
00076   store_labels_ = store;
00077 }
00078 
00079 
00080 void
00081 Collection::read(mrs_string filename)
00082 {
00083   ifstream is(filename.c_str());
00084   name_ = filename.substr(0, filename.rfind(".", filename.length()));
00085 
00086   is >> (*this);
00087 }
00088 
00089 
00090 void
00091 Collection::write(mrs_string filename)
00092 {
00093   ofstream os(filename.c_str());
00094   os << (*this) << endl;
00095 }
00096 
00097 void
00098 Collection::labelAll(mrs_string label)
00099 {
00100   if (hasLabels_ == false)
00101   {
00102     hasLabels_ = true;
00103     labelList_.reserve(collectionList_.size());
00104     for (mrs_natural i = 0; i < (mrs_natural)collectionList_.size(); ++i)
00105       labelList_.push_back(label);
00106   }
00107   else
00108   {
00109     for (mrs_natural i=0; i < (mrs_natural)collectionList_.size(); ++i)
00110       labelList_[i] = label;
00111   }
00112 }
00113 
00114 ostream&
00115 operator<<(ostream& o, const Collection& l)
00116 {
00117   // o << "# MARSYAS Collection " << endl;
00118   // o << "# name = " << l.name_ << endl << endl;
00119   for (mrs_natural i=0; i < (mrs_natural)l.collectionList_.size(); ++i)
00120   {
00121     o << l.collectionList_[i];
00122     if (l.hasLabels_)
00123       o << "\t" << l.labelList_[i];
00124     o << endl;
00125   }
00126   //o << endl;
00127   return o;
00128 }
00129 
00130 
00131 
00132 mrs_natural
00133 Collection::size()
00134 {
00135   return (mrs_natural) collectionList_.size();
00136 }
00137 
00138 mrs_natural
00139 Collection::getSize()
00140 {
00141   return (mrs_natural) collectionList_.size();
00142 }
00143 
00144 
00145 mrs_string
00146 Collection::name()
00147 {
00148   return name_;
00149 }
00150 
00151 void
00152 Collection::add(mrs_string entry)
00153 {
00154   collectionList_.push_back(entry);
00155   hasLabels_ = false;
00156 }
00157 
00158 
00159 
00160 void
00161 Collection::clear()
00162 {
00163   collectionList_.clear();
00164   labelList_.clear();
00165 
00166   // Do not clear labelNames so that multiple collections
00167   // can share the same label set
00168   // maybe at some point make this behavior controllable
00169 }
00170 
00171 
00172 void
00173 Collection::add(mrs_string entry, mrs_string label)
00174 {
00175 
00176   collectionList_.push_back(entry);
00177   hasLabels_ = true;
00178   labelList_.push_back(label);
00179 
00180   if (store_labels_) {
00181     if (find(labelNames_.begin(), labelNames_.end(), label) == labelNames_.end()) {
00182       labelNames_.push_back(label);
00183     }
00184     sort(labelNames_.begin(), labelNames_.end());
00185   }
00186 
00187 }
00188 
00189 
00190 
00191 
00192 mrs_natural
00193 Collection::getNumLabels()
00194 {
00195   return (mrs_natural) labelNames_.size();
00196 }
00197 
00198 mrs_string
00199 Collection::labelName(mrs_natural i)
00200 {
00201   if (i >= 0 && i < (mrs_natural)labelNames_.size())
00202     return labelNames_[i];
00203 
00204   return EMPTYSTRING;
00205 }
00206 
00207 mrs_string
00208 Collection::getLabelNames()
00209 {
00210   return join(labelNames_, ",");
00211 }
00212 
00213 mrs_bool
00214 Collection::hasLabels()
00215 {
00216   return hasLabels_;
00217 }
00218 
00219 void
00220 Collection::shuffle()
00221 {
00222   // Use a Fisher-Yates shuffle
00223   // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
00224   mrs_natural n = (mrs_natural)collectionList_.size();
00225   while (n > 1)
00226   {
00227     // Generate a random index in the range [0, n).
00228     mrs_natural k = (mrs_natural)(n * ((mrs_real)rand() / ((mrs_real)(RAND_MAX) + (mrs_real)1)));
00229 
00230     n--;
00231     swap(collectionList_[n], collectionList_[k]);
00232     if (hasLabels_)
00233       swap(labelList_[n], labelList_[k]);
00234   }
00235 }
00236 
00237 mrs_string
00238 Collection::toLongString()
00239 {
00240   return join(collectionList_, ",");
00241 }
00242 
00243 mrs_natural
00244 Collection::labelNum(mrs_string label)
00245 {
00246 
00247   vector<mrs_string>::iterator it = find(labelNames_.begin(), labelNames_.end(), label);
00248   if (it == labelNames_.end())
00249     return -1;
00250 
00251   return (mrs_natural) distance(labelNames_.begin(), it);
00252 
00253 }
00254 mrs_real
00255 Collection::regression_label(mrs_natural i)
00256 {
00257   if (hasLabels_ && i >= 0 && i < (mrs_natural)labelList_.size()) {
00258     return (mrs_real) atof(labelList_[i].c_str());
00259   }
00260   return 0.0;
00261 }
00262 
00263 mrs_string
00264 Collection::labelEntry(mrs_natural i)
00265 {
00266   if (hasLabels_ && i >= 0 && i < (mrs_natural)labelList_.size())
00267       return labelList_[i];
00268 
00269   return "No label";
00270 }
00271 
00272 mrs_string
00273 Collection::entry(mrs_natural i)
00274 {
00275   if (i >= 0 && i < (mrs_natural)collectionList_.size())
00276     return collectionList_[i];
00277 
00278   return mrs_string();
00279 }
00280 
00281 
00282 void
00283 Collection::concatenate(vector<Collection> cls)
00284 {
00285   for (mrs_natural cj = 0; cj < (mrs_natural)cls.size(); cj++)
00286   {
00287     Collection l = cls[cj];
00288     if (l.hasLabels_)
00289       hasLabels_ = true;
00290 
00291     for (mrs_natural i = 0; i < l.size(); ++i)
00292       add(l.entry(i), l.labelEntry(i));
00293   }
00294 }
00295 
00296 
00297 /* I can't be bothered to think about this myself, so copied from
00298  http://stackoverflow.com/questions/3418231/c-replace-part-of-a-string-with-another-string
00299  -gp */
00300 bool replace(std::string& str, const std::string& from, const std::string& to) {
00301   string::size_type start_pos = str.find(from);
00302   if(start_pos == std::string::npos)
00303     return false;
00304   str.replace(start_pos, from.length(), to);
00305   return true;
00306 }
00307 
00308 
00309 istream&
00310 operator>>(istream& i, Collection& l)
00311 {
00312   MRSDIAG("Collection.cpp - operator>>");
00313 
00314   mrs_string fileEntry;
00315   while (getline(i, fileEntry))
00316   {
00317     // Skip blank lines.
00318     if (fileEntry.empty())
00319       continue;
00320 
00321     // Skip comment lines.
00322     if (fileEntry[0] == '#')
00323       continue;
00324 
00325     // Check to see if there is a label. Could use rfind for efficiency
00326     // if we were sure there weren't tabs after the label.
00327     if (marsyas_datadir_.length() > 0) {
00328       replace(fileEntry, "MARSYAS_DATADIR", marsyas_datadir_);
00329     }
00330     mrs_string::size_type loc = fileEntry.find('\t', 0);
00331     if (loc != mrs_string::npos)
00332     {
00333       mrs_string file = fileEntry.substr(0, loc);
00334       mrs_string label = fileEntry.substr(loc+1, fileEntry.size());
00335       l.add(file, label);
00336     }
00337     else {
00338       l.add(fileEntry);
00339     }
00340   }
00341 
00342   return i;
00343 }
00344 }