BICO  1.1
 All Classes Namespaces Files Functions Variables Typedefs Pages
main.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <sstream>
3 #include <fstream>
4 #include <random>
5 #include <ctime>
6 #include <time.h>
7 
8 #include <boost/algorithm/string.hpp>
9 
10 #include "src/point/l2metric.h"
12 #include "src/point/point.h"
14 #include "src/clustering/bico.h"
15 #include "src/misc/randomness.h"
21 
22 int main(int argc, char **argv)
23 {
24  using namespace CluE;
25 
26  time_t starttime, endtime;
27  double difference;
28 
29  if (argc < 8)
30  {
31  std::cout << "Usage: input n k d space output projections [seed]" << std::endl;
32  std::cout << " input = path to input file" << std::endl;
33  std::cout << " n = number of input points" << std::endl;
34  std::cout << " k = number of desired centers" << std::endl;
35  std::cout << " d = dimension of an input point" << std::endl;
36  std::cout << " space = coreset size" << std::endl;
37  std::cout << " projections = number of random projections used for nearest neighbour search" << std::endl;
38  std::cout << " in first level" << std::endl;
39  std::cout << " seed = random seed (optional)" << std::endl;
40  std::cout << std::endl;
41  std::cout << "7 arguments expected, got " << argc - 1 << ":" << std::endl;
42  for (int i = 1; i < argc; ++i)
43  std::cout << i << ".: " << argv[i] << std::endl;
44  return 1;
45  }
46 
47  // Read arguments
48  std::ifstream filestream(argv[1], std::ifstream::in);
49  int n = atoi(argv[2]);
50  int k = atoi(argv[3]);
51  int d = atoi(argv[4]);
52  int space = atoi(argv[5]);
53  std::ofstream outputstream(argv[6], std::ifstream::out);
54  int p = atoi(argv[7]);
55  if (argc >= 9)
56  Randomness::initialize(atoi(argv[8]));
57 
58  time(&starttime);
59 
60  // Initialize BICO
61  Bico<Point> bico(d, n, k, p, space, new SquaredL2Metric(), new PointWeightModifier());
62 
63  int pos = 0;
64  while (filestream.good())
65  {
66  // Read line and construct point
67  std::string line;
68  std::getline(filestream, line);
69  std::vector<std::string> stringcoords;
70  boost::split(stringcoords, line, boost::is_any_of(" "));
71 
72  std::vector<double> coords;
73  coords.reserve(stringcoords.size());
74  for (size_t i = 0; i < stringcoords.size(); ++i)
75  coords.push_back(atof(stringcoords[i].c_str()));
76  Point p(coords);
77 
78  if (p.dimension() != d)
79  {
80  std::clog << "Line skipped because line dimension is " << p.dimension() << " instead of " << d << std::endl;
81  continue;
82  }
83 
84  // Call BICO point update
85  bico << p;
86  }
87 
88  // Retrieve coreset
89  ProxySolution<Point>* sol = bico.compute();
90 
91  // Output coreset size
92  outputstream << sol->proxysets[0].size() << "\n";
93 
94  // Output coreset points
95  for (size_t i = 0; i < sol->proxysets[0].size(); ++i)
96  {
97  // Output weight
98  outputstream << sol->proxysets[0][i].getWeight() << " ";
99  // Output center of gravity
100  for (size_t j = 0; j < sol->proxysets[0][i].dimension(); ++j)
101  {
102  outputstream << sol->proxysets[0][i][j];
103  if (j < sol->proxysets[0][i].dimension() - 1)
104  outputstream << " ";
105  }
106  outputstream << "\n";
107  }
108  outputstream.close();
109 
110  return 0;
111 }
static void initialize(uint_fast32_t seed)
Definition: randomness.h:28
Squared L2 metric for Point.
int main(int argc, char **argv)
Definition: main.cpp:22
Fast computation of k-means coresets in a data stream.
Definition: bico.h:43
size_t dimension() const
Definition: point.h:87
Modifies the weight of a Point.
Data structure for proxies.
Definition: proxysolution.h:19
Weighted point of arbitrary dimension.
Definition: point.h:17
virtual ProxySolution< T > * compute()
Returns a coreset of all point read so far.
Definition: bico.h:642