BICO  1.1
 All Classes Namespaces Files Functions Variables Typedefs Pages
bico.h
Go to the documentation of this file.
1 #ifndef BICO_H
2 #define BICO_H
3 
4 #include <array>
5 #include <cmath>
6 #include <memory>
7 #include <random>
8 #include <vector>
9 
10 #include "../base/streamingalgorithm.h"
11 #include "../base/dissimilaritymeasure.h"
12 #include "../base/solutionprovider.h"
13 #include "../base/weightmodifier.h"
14 #include "../base/partitionprovider.h"
15 #include "../clustering/cfrentry.h"
16 #include "../datastructure/proxysolution.h"
17 #include "../evaluation/kmeansevaluator.h"
18 #include "../exception/invalidruntimeconfigurationexception.h"
19 #include "../misc/randomness.h"
20 
21 namespace CluE
22 {
23 
43 template<typename T> class Bico : public StreamingAlgorithm<T>
44 {
45 private:
46 
50  class BicoNode
51  {
52  public:
53  typedef std::pair<CFREntry<T>, BicoNode*> FeaturePair;
54  typedef std::list<FeaturePair> FeatureList;
55 
61  objectId(outer.nodeIdCounter),
62  outer(outer),
63  features()
64  {
65  ++outer.nodeIdCounter;
66  }
67 
71  void clear()
72  {
73  for (auto it = features.begin(); it != features.end(); ++it)
74  delete it->second;
75  }
76 
82  typename FeatureList::iterator insert(CFREntry<T> const & feature)
83  {
84  return features.insert(features.end(),
85  FeaturePair(feature, new BicoNode(outer)));
86  }
87 
92  typename FeatureList::iterator begin()
93  {
94  return features.begin();
95  }
96 
101  typename FeatureList::iterator end()
102  {
103  return features.end();
104  }
105 
110  size_t size()
111  {
112  return features.size();
113  }
114 
119  bool empty()
120  {
121  return features.empty();
122  }
123 
131  typename FeatureList::iterator nearest(T const & element, int level)
132  {
133  typename FeatureList::iterator minIt = features.end();
134  // Nearest neighbour search based on projections in level 1
135  if (level == 1)
136  {
137  // Project point and calculate projection bucket number
138  double val = outer.project(element, 0);
139  int bucket_number = outer.calcBucketNumber(0, val);
140  int mini = 0;
141  int bucket_min = bucket_number;
142  int mins;
143 
144  if ((bucket_number < 0) || (bucket_number > outer.buckets[0].size() - 1))
145  {
146  // The bucket does not exist (yet)
147  mins = 0;
148  }
149  else
150  {
151  // Search for the projection with smallest bucket size
152  mins = outer.buckets[mini][bucket_min].size();
153  for (int i = 1; i < outer.L; i++)
154  {
155  val = outer.project(element, i);
156  bucket_number = outer.calcBucketNumber(i, val);
157  if ((bucket_number >= 0) & (bucket_number <= outer.buckets[i].size() - 1))
158  {
159  int s = outer.buckets[i][bucket_number].size();
160  if (s < mins)
161  {
162  mins = s;
163  bucket_min = bucket_number;
164  mini = i;
165  }
166  }
167  else
168  {
169  mins = 0;
170  bucket_min = bucket_number;
171  mini = i;
172  break;
173  }
174  }
175 
176  }
177 
178  bucket_number = bucket_min;
179  int rnd = mini;
180 
181  if (bucket_number < 0)
182  {
183  // Bucket does not exist => create one
184  outer.allocateBucket(rnd, true);
185  }
186  else if (bucket_number > outer.buckets[rnd].size() - 1)
187  {
188  // Bucket does not exist => create one
189  outer.allocateBucket(rnd, false);
190  }
191  else
192  {
193  // Bucket does exist => search nearest point in bucket
194  double minDist = -1;
195 
196  for (auto it = outer.buckets[rnd][bucket_number].begin(); it != outer.buckets[rnd][bucket_number].end(); ++it)
197  {
198  double tmpDist = outer.measure->dissimilarity((*it)->first.representative, element);
199  if (tmpDist < minDist || minDist == -1)
200  {
201  minDist = tmpDist;
202  minIt = (*it);
203  }
204  }
205 
206  }
207  }
208  // Simple nearest neighbour search in all other levels
209  else
210  {
211  double minDist = -1;
212  for (auto it = features.begin(); it != features.end(); ++it)
213  {
214  double tmpDist = outer.measure->dissimilarity(it->first.representative, element);
215  if (tmpDist < minDist || minDist == -1)
216  {
217  minDist = tmpDist;
218  minIt = it;
219  }
220  }
221  }
222 
223  return minIt;
224  }
225 
230  void erase(typename FeatureList::iterator pos)
231  {
232  features.erase(pos);
233  }
234 
240  void spliceAllTo(BicoNode* to, typename FeatureList::iterator pos)
241  {
242  to->features.splice(pos, features);
243  }
244 
251  void spliceElementTo(typename FeatureList::iterator it, BicoNode* to, typename FeatureList::iterator pos)
252  {
253  to->features.splice(pos, features, it);
254  }
255 
260  int id()
261  {
262  return objectId;
263  }
264 
265  private:
269  int objectId;
270 
275 
280  };
281 
282 public:
297  Bico(size_t dimension, size_t n, size_t k, size_t p, size_t nMax,
299 
304  virtual ProxySolution<T>* compute();
305 
313  virtual Bico<T>& operator<<(T const & element);
314 
319  void print(std::ostream& os);
320 
321 private:
328  void insert(BicoNode* node, int level, T const & element);
329 
334  void insertIntoNN(typename BicoNode::FeatureList::iterator iteratorElement);
335 
339  void initializeNN();
340 
346  void allocateBucket(int bucket, bool left);
347 
354  int calcBucketNumber(int rnd, double val);
355 
362  double project(T point, int i);
363 
367  void rebuild();
368 
374  void rebuildFirstLevel(BicoNode* parent, BicoNode* child);
375 
381  void rebuildTraversMerge(BicoNode* node, int level);
382 
388  void computeTraverse(BicoNode* node, ProxySolution<T>* solution);
389 
395  double getT(int level);
396 
402  double getR(int level);
403 
409  void print(std::ostream& os, BicoNode* node);
410 
414  size_t k;
418  size_t L;
419 
423  std::vector<std::vector<double >> rndprojections;
424 
428  std::vector<std::vector<std::vector<typename BicoNode::FeatureList::iterator >> > buckets;
432  std::vector<std::pair<double, double >> borders;
436  std::vector<double> bucket_radius;
437 
442 
446  std::unique_ptr<DissimilarityMeasure<T >> measure;
447 
451  std::unique_ptr<WeightModifier<T >> weightModifier;
452 
456  size_t maxNumOfCFs;
457 
461  size_t curNumOfCFs;
462 
466  size_t dimension;
467 
471  double optEst;
472 
476  std::vector<double> maxVal;
477 
482 
487 
491  std::vector<T> buffer;
492 
496  std::vector<std::pair<double, T const *>> projection_buffer;
497 
501  double minDist;
502 
507 
512 };
513 
514 template<template <typename> class P = std::less> struct comparePairFirst
515 {
516  template<class T1, class T2> bool operator()(std::pair<T1, T2> const & left, std::pair<T1, T2> const & right)
517  {
518  return P<T1>()(left.first, right.first);
519  }
520 };
521 
522 template<typename T> Bico<T>::Bico(size_t dim, size_t n, size_t k, size_t p, size_t nMax,
523  DissimilarityMeasure<T>* measure, WeightModifier<T>* weightModifier) :
524 nodeIdCounter(0),
525 measure(measure->clone()),
526 weightModifier(weightModifier->clone()),
527 maxNumOfCFs(nMax),
528 curNumOfCFs(0),
529 k(k),
530 L(p),
531 optEst(-1),
532 root(new BicoNode(*this)),
533 bufferPhase(true),
534 numOfRebuilds(0),
535 buffer(),
536 projection_buffer(),
537 minDist(std::numeric_limits<double>::infinity()),
538 pairwise_different(0),
539 dimension(dim)
540 {
542  std::vector<double> rndpoint(dimension);
543  rndprojections.resize(L);
544  bucket_radius.resize(L);
545  maxVal.resize(L);
546  double norm;
547  std::normal_distribution<double> realDist(0.0, 1.0);
548  for (int i = 0; i < L; i++)
549  {
550  maxVal[i] = -1;
551  norm = 0.0;
552  for (int j = 0; j < dimension; j++)
553  {
554  rndpoint[j] = realDist(rg);
555  norm += rndpoint[j] * rndpoint[j];
556  }
557  norm = std::sqrt(norm);
558  for (int j = 0; j < dimension; j++)
559  {
560  rndpoint[j] /= norm;
561  }
562  rndprojections[i] = rndpoint;
563  }
564  buckets.resize(L);
565  buffer.reserve(maxNumOfCFs + 1);
566  projection_buffer.reserve(maxNumOfCFs + 1);
567 }
568 
569 template<typename T> int Bico<T>::calcBucketNumber(int rnd, double val)
570 {
571  return (int) floor((val - borders[rnd].first) / bucket_radius[rnd]);
572 }
573 
574 template<typename T> void Bico<T>::initializeNN()
575 {
576  double maxBuckets = 10000;
577  double Size = 0;
578  for (int i = 0; i < L; i++)
579  {
580  // Compute new bucket size
581  if (buckets[i].size() == 1)
582  {
583  Size = 1;
584  }
585  else
586  {
587  bucket_radius[i] = (long long int) ceil(sqrt(getR(1)));
588  Size = (int) ceil((borders[i].second - borders[i].first) / (double) bucket_radius[i]);
589  if(Size < 0 || Size > maxBuckets)
590  {
591  bucket_radius[i] = (borders[i].second - borders[i].first) / maxBuckets;
592  Size = (int) ceil((borders[i].second - borders[i].first) / (double) bucket_radius[i]);
593  }
594  }
595  for (int l = 0; l < buckets[i].size(); l++) buckets[i][l].clear();
596  // Create new buckets
597  buckets[i].clear();
598  buckets[i].resize((int) ceil(Size));
599  }
600 }
601 
602 template<typename T> void Bico<T>::allocateBucket(int bucket, bool left)
603 {
604  if (left)
605  {
606  // Push front bucket
607  borders[bucket].first = 2 * borders[bucket].first - borders[bucket].second;
608  std::vector < std::vector<typename BicoNode::FeatureList::iterator >> a(2 * buckets[bucket].size());
609  for (int i = 0; i < buckets[bucket].size(); i++)
610  {
611  a[buckets[bucket].size() + i] = buckets[bucket][i];
612  }
613  for (int l = 0; l < buckets[bucket].size(); l++) buckets[bucket][l].clear();
614  buckets[bucket].clear();
615  buckets[bucket] = a;
616  }
617  else
618  {
619  // Push back bucket
620  borders[bucket].second = 2 * borders[bucket].second - borders[bucket].first;
621  std::vector < std::vector<typename BicoNode::FeatureList::iterator >> a(2 * buckets[bucket].size());
622  for (int i = 0; i < buckets[bucket].size(); i++)
623  {
624  a[i] = buckets[bucket][i];
625  }
626  for (int l = 0; l < buckets[bucket].size(); l++) buckets[bucket][l].clear();
627  buckets[bucket].clear();
628  buckets[bucket] = a;
629  }
630 }
631 
632 template<typename T> double Bico<T>::project(T point, int i)
633 {
634  double ip = 0.0;
635  for (int j = 0; j < dimension; j++)
636  {
637  ip += point[j]*(rndprojections[i][j]);
638  }
639  return ip;
640 }
641 
642 template<typename T> ProxySolution<T>* Bico<T>::compute()
643 {
644  ProxySolution<T>* result = new ProxySolution<T>();
645  if(bufferPhase)
646  {
647  result->proxysets.push_back(buffer);
648  }
649  else
650  {
651  result->proxysets.push_back(std::vector<T>());
652  result->proxysets[0].reserve(curNumOfCFs);
653  computeTraverse(root, result);
654  }
655  return result;
656 }
657 
658 template<typename T> void Bico<T>::computeTraverse(BicoNode* node, ProxySolution<T>* solution)
659 {
660  for (auto it = node->begin(); it != node->end(); ++it)
661  {
662  T point(it->first.cog());
663  weightModifier->setWeight(point, it->first.number);
664  solution->proxysets[0].push_back(point);
665  computeTraverse(it->second, solution);
666  }
667 }
668 
669 template<typename T> Bico<T>& Bico<T>::operator<<(T const & element)
670 {
671  if (bufferPhase)
672  {
673  // Update bucket configuration
674  for (int i = 0; i < L; i++)
675  {
676  double val = std::abs(project(element, i));
677  if (val > maxVal[i] || maxVal[i] == -1)
678  {
679  maxVal[i] = val;
680  }
681  }
682 
683  // Insert point into buffer
684  buffer.push_back(element);
685  ++pairwise_different;
686 
687  // Project point for find nearest neighbor
688  double projected = project(element, 0);
689  projection_buffer.push_back(std::pair<double, T const *>(projected, &buffer.back()));
690 
691  // Enough pairwise different elements to estimate optimal cost?
692  if (pairwise_different >= maxNumOfCFs + 1)
693  {
694  // Sort projection values and determine smallest distance on projection line
695  std::sort(projection_buffer.begin(), projection_buffer.end(), comparePairFirst<>());
696  double minProjDist = std::numeric_limits<double>::infinity();
697  double minProjRealDist = std::numeric_limits<double>::infinity();
698  for(int i = 0; i < pairwise_different-2; ++i)
699  {
700  double tmpDist = projection_buffer[i+1].first - projection_buffer[i].first;
701  if(tmpDist < minProjRealDist)
702  {
703  minProjDist = tmpDist;
704  double tmpMinProjRealDist = measure->dissimilarity(*projection_buffer[i].second, *projection_buffer[i+1].second);
705  if(tmpMinProjRealDist > 0)
706  {
707  minProjRealDist = tmpMinProjRealDist;
708  }
709  }
710  }
711 
712  // Compute approximate minimum pairwise distance
713  int lowerIndex = 0;
714  int upperIndex = 0;
715  double lowerEnd = projection_buffer[0].first;
716  double upperEnd = lowerEnd + minProjRealDist;
717  double minDist = minProjRealDist;
718  for(int i = 0; i < pairwise_different-1; ++i)
719  {
720  if(projection_buffer[i].first >= upperEnd)
721  {
722  upperIndex = i;
723 
724  for(int j = lowerIndex; j < upperIndex; ++j)
725  {
726  for(int k = j+1; k < upperIndex; ++k)
727  {
728  //std::cout << "Bulk " << i << ": Distance calculation point " << j << "," << k << std::endl;
729  double tmpDist = measure->dissimilarity(*projection_buffer[j].second, *projection_buffer[k].second);
730  if(tmpDist < minDist && tmpDist > 0)
731  {
732  minDist = tmpDist;
733  }
734  }
735  }
736 
737  lowerIndex = i;
738  lowerEnd = projection_buffer[i].first;
739  upperEnd = lowerEnd + minProjRealDist;
740  }
741  }
742 
743  // Construct buckets
744  optEst = 16.0 * minDist;
745  //std::cout << "minDist = " << minDist << std::endl;
746  //std::cout << "optEst = " << minDist << std::endl;
747  long long int radius = (long long int) ceil(sqrt(getR(1)));
748  borders.resize(L);
749  for (int i = 0; i < L; i++)
750  {
751  borders[i].first = -maxVal[i];
752  borders[i].second = maxVal[i];
753  bucket_radius[i] = radius;
754  }
755  initializeNN();
756 
757  // Insert buffered elements into tree
758  for (auto it = buffer.begin(); it != buffer.end(); ++it)
759  insert(root, 1, *it);
760  buffer.resize(0);
761  bufferPhase = false;
762  }
763  }
764  else
765  insert(root, 1, element);
766  return *this;
767 }
768 
769 template<typename T> void Bico<T>::insertIntoNN(typename BicoNode::FeatureList::iterator iteratorElement)
770 {
771  for (int i = 0; i < L; i++)
772  {
773  double val = project(iteratorElement->first.representative, i);
774  int bucket_number = calcBucketNumber(i, val);
775 
776  if (bucket_number < 0)
777  {
778  while (bucket_number < 0)
779  {
780  allocateBucket(i, true);
781  bucket_number = calcBucketNumber(i, val);
782  }
783  }
784  else if (bucket_number > buckets[i].size() - 1)
785  {
786  while (bucket_number > buckets[i].size() - 1)
787  {
788  allocateBucket(i, false);
789  bucket_number = calcBucketNumber(i, val);
790  }
791  }
792  buckets[i][bucket_number].push_back(iteratorElement);
793  }
794 }
795 
796 template<typename T> void Bico<T>::insert(BicoNode* node, int level, T const & element)
797 {
798 
799  if (optEst < 0)
800  throw (InvalidRuntimeConfigurationException(0, "Estimated optimal cost invalid"));
801 
802  // Determine nearest clustering feature in current node
803  typename BicoNode::FeatureList::iterator nearest(node->nearest(element, level));
804 
805 
806  // Construct new clustering feature if element is too far away from
807  // nearest clustering feature or insert element into nearest
808  if (node->empty() || nearest == node->end()
809  || measure->dissimilarity(nearest->first.representative, element) > getR(level))
810  {
811  CFREntry<T> feature(1, element, element * element, element);
812  typename BicoNode::FeatureList::iterator itele = node->insert(feature);
813 
814  if (level == 1)
815  {
816  insertIntoNN(itele);
817  }
818 
819  ++curNumOfCFs;
820  }
821  else
822  {
823  T center(nearest->first.cog());
824  // Insert element into (a copy of) nearest and compute cost
825  // for insertion at current level
826  CFEntry<T> testFeature(nearest->first);
827  testFeature.insert(element);
828  double tfCost = testFeature.kMeansCost(center);
829 
830  // Insert element either to current level (if cost remains small)
831  // or to higher level
832  if (tfCost < getT(level))
833  {
834  nearest->first.insert(element);
835  }
836  else
837  {
838  insert(nearest->second, level + 1, element);
839  }
840  }
841 
842  // Rebuild?
843  while (curNumOfCFs > maxNumOfCFs)
844  {
845  rebuild();
846  }
847 }
848 
849 template<typename T> void Bico<T>::rebuild()
850 {
851  // Rebuild first level
852  BicoNode * oldRoot(this->root);
853  this->root = new BicoNode(*this);
854  rebuildFirstLevel(this->root, oldRoot);
855  oldRoot->clear();
856  delete oldRoot;
857 
858  // Traverse through tree and merge
859  rebuildTraversMerge(this->root, 1);
860 }
861 
862 template<typename T> void Bico<T>::rebuildFirstLevel(BicoNode* parent, BicoNode* child)
863 {
864  optEst *= 2.0;
865  ++numOfRebuilds;
866 
867  initializeNN();
868 
869  // The current element it may be spliced around the tree, so nextIt
870  // will maintain an iterator pointing at the next element in child
871  auto nextIt = child->begin();
872  for (auto it = child->begin(); it != child->end(); it = nextIt)
873  {
874  ++nextIt;
875  // Determine clustering feature in parent that is nearest to child
876  typename BicoNode::FeatureList::iterator nearest(parent->nearest(it->first.representative, 1));
877  if (parent->empty() || nearest == parent->end()
878  || measure->dissimilarity(nearest->first.representative, it->first.representative) > getR(1))
879  {
880  // Move it from child to parent
881  child->spliceElementTo(it, parent, parent->end());
882 
883  insertIntoNN(it);
884  }
885  else
886  {
887  CFEntry<T> testFeature(it->first);
888  testFeature += nearest->first;
889  if (testFeature.kMeansCost(nearest->first.representative) <= getT(1))
890  {
891  // Insert it into nearest
892  nearest->first += it->first;
893  // Make children of it children of nearest
894  it->second->spliceAllTo(nearest->second, nearest->second->end());
895  // Delete (now) empty child node of it
896  it->second->clear();
897  delete it->second;
898  // Remove it from tree and delete it
899  child->erase(it);
900  --curNumOfCFs;
901  }
902  else
903  {
904  // Make it a child of nearest
905  child->spliceElementTo(it, nearest->second, nearest->second->end());
906  }
907  }
908  }
909 }
910 
911 template<typename T> void Bico<T>::rebuildTraversMerge(BicoNode* node, int level)
912 {
913  for (auto parentIt = node->begin(); parentIt != node->end(); ++parentIt)
914  {
915  if (!parentIt->second->empty())
916  {
917  auto nextIt = parentIt->second->begin();
918  for (auto childIt = parentIt->second->begin(); childIt != parentIt->second->end(); childIt = nextIt)
919  {
920  ++nextIt;
921 
922  T center(parentIt->first.cog());
923  // Insert element into (a copy of) nearest and compute cost
924  // for insertion at current level
925  CFEntry<T> testFeature(parentIt->first + childIt->first);
926  double tfCost = testFeature.kMeansCost(center);
927 
928  // Merge if possible
929  if (tfCost < getT(level))
930  {
931  parentIt->first += childIt->first;
932  childIt->second->spliceAllTo(parentIt->second, parentIt->second->end());
933  childIt->second->clear();
934  delete childIt->second;
935  parentIt->second->erase(childIt);
936  --curNumOfCFs;
937  }
938  else
939  {
940  rebuildTraversMerge(childIt->second, level + 1);
941  }
942  }
943  }
944  }
945 }
946 
947 template<typename T> double Bico<T>::getT(int level)
948 {
949  return optEst;
950 }
951 
952 template<typename T> double Bico<T>::getR(int level)
953 {
954  return getT(level) / (double(1 << (3 + level)));
955 }
956 
957 template<typename T> void Bico<T>::print(std::ostream& os)
958 {
959  os << "digraph G{\n";
960  os << "node [shape=record];\n";
961  print(os, root);
962  os << "}\n";
963 }
964 
965 template<typename T> void Bico<T>::print(std::ostream& os, BicoNode* node)
966 {
967  int id = node->id();
968 
969  os << "node" << id << "[label=\"";
970  int fvalue = 0;
971  os << node->id() << "|";
972  for (auto it = node->begin(); it != node->end(); ++it)
973  {
974  if (fvalue > 0)
975  os << "|";
976  os << "<f" << fvalue << "> " << it->first.number << "," << it->first.representative
977  << "\\n" << it->first.LS << "," << it->first.SS;
978  fvalue++;
979  }
980  os << "\"];\n";
981 
982  fvalue = 0;
983  for (auto it = node->begin(); it != node->end(); ++it)
984  {
985  print(os, it->second);
986  os << "node" << id << ":f" << fvalue << " -> node" << it->second->id() << ";\n";
987  fvalue++;
988  }
989 }
990 
991 }
992 
993 #endif
void computeTraverse(BicoNode *node, ProxySolution< T > *solution)
Recursive computation of the coreset.
Definition: bico.h:658
void initializeNN()
Initialize nearest neighbour data structure.
Definition: bico.h:574
Encapsulates an STL random generator.
double getR(int level)
Returns the radius for a given level.
Definition: bico.h:952
size_t maxNumOfCFs
Maximum coreset size.
Definition: bico.h:456
void clear()
Delete all nodes.
Definition: bico.h:71
double getT(int level)
Returns the threshold for a given level.
Definition: bico.h:947
std::vector< std::vector< T > > proxysets
Definition: proxysolution.h:37
double kMeansCost(T const &center)
1-means clustering cost
Definition: cfentry.h:134
void spliceElementTo(typename FeatureList::iterator it, BicoNode *to, typename FeatureList::iterator pos)
Definition: bico.h:251
std::unique_ptr< WeightModifier< T > > weightModifier
Buffer for WeightModifier.
Definition: bico.h:451
void erase(typename FeatureList::iterator pos)
Definition: bico.h:230
std::unique_ptr< DissimilarityMeasure< T > > measure
Buffer for DissimilarityMeasure.
Definition: bico.h:446
size_t curNumOfCFs
Current coreset size.
Definition: bico.h:461
int numOfRebuilds
Current number of rebuilding.
Definition: bico.h:511
void insertIntoNN(typename BicoNode::FeatureList::iterator iteratorElement)
Inserts an element into the nearest neighbour data structure.
Definition: bico.h:769
void spliceAllTo(BicoNode *to, typename FeatureList::iterator pos)
Definition: bico.h:240
FeatureList::iterator insert(CFREntry< T > const &feature)
Definition: bico.h:82
Fast computation of k-means coresets in a data stream.
Definition: bico.h:43
FeatureList::iterator begin()
Definition: bico.h:92
std::vector< T > buffer
Buffer phase's buffer.
Definition: bico.h:491
void rebuild()
Rebuilds the tree.
Definition: bico.h:849
size_t pairwise_different
Number of unique elements read in buffer phase.
Definition: bico.h:506
bool operator()(std::pair< T1, T2 > const &left, std::pair< T1, T2 > const &right)
Definition: bico.h:516
void print(std::ostream &os)
Write the tree as GraphViz source into a stream.
Definition: bico.h:957
std::vector< std::vector< std::vector< typename BicoNode::FeatureList::iterator > > > buckets
Buckets for nearest neighbour search in first level.
Definition: bico.h:428
std::vector< double > maxVal
Extreme values used for constructing the nearest neighbour buckets.
Definition: bico.h:476
int calcBucketNumber(int rnd, double val)
Definition: bico.h:569
void rebuildFirstLevel(BicoNode *parent, BicoNode *child)
Definition: bico.h:862
Bico< T > & outer
Parent BICO instance.
Definition: bico.h:274
BicoNode * root
Root node of BICO's tree.
Definition: bico.h:481
int nodeIdCounter
Counter for unique BicoNode object ids.
Definition: bico.h:441
std::vector< std::pair< double, T const * > > projection_buffer
Buffer phase's buffer for projected buffer points.
Definition: bico.h:496
BicoNode(Bico< T > &outer)
Definition: bico.h:60
double minDist
Minimum pair distance of two points read in buffer phase.
Definition: bico.h:501
Abstract base class to modify the weight of weighted objects.
static RandomGenerator getRandomGenerator()
Definition: randomness.h:23
size_t size()
Definition: bico.h:110
std::vector< double > bucket_radius
Width of buckets.
Definition: bico.h:436
double project(T point, int i)
Projects a point onto a projection line.
Definition: bico.h:632
Clustering feature with representation point.
Definition: cfrentry.h:17
void insert(BicoNode *node, int level, T const &element)
Inserts an element into a BicoNode at a certain level.
Definition: bico.h:796
Class representing a node in BICO's tree.
Definition: bico.h:50
size_t k
Number of centers.
Definition: bico.h:414
std::pair< CFREntry< T >, BicoNode * > FeaturePair
Definition: bico.h:53
Data structure for proxies.
Definition: proxysolution.h:19
int objectId
Unique object id.
Definition: bico.h:269
FeatureList::iterator nearest(T const &element, int level)
Definition: bico.h:131
virtual Bico< T > & operator<<(T const &element)
Read a point Insert the point into BICO's tree.
Definition: bico.h:669
Abstract base class for streaming algorithms.
std::vector< std::pair< double, double > > borders
Bucket borders.
Definition: bico.h:432
std::list< FeaturePair > FeatureList
Definition: bico.h:54
double optEst
Current estimation of the optimal clustering cost.
Definition: bico.h:471
Abstract base class for dissimilarity measurement.
size_t dimension
Dimension of the input points.
Definition: bico.h:466
bool bufferPhase
Buffer phase indicator.
Definition: bico.h:486
FeatureList features
Definition: bico.h:279
virtual ProxySolution< T > * compute()
Returns a coreset of all point read so far.
Definition: bico.h:642
Clustering feature tree entry.
Definition: cfentry.h:22
std::vector< std::vector< double > > rndprojections
Random projection vectors.
Definition: bico.h:423
size_t L
Number of projections.
Definition: bico.h:418
Indicates that a computation entered an invalid configuration state.
FeatureList::iterator end()
Definition: bico.h:101
void allocateBucket(int bucket, bool left)
Allocates a new bucket.
Definition: bico.h:602
Bico(size_t dimension, size_t n, size_t k, size_t p, size_t nMax, DissimilarityMeasure< T > *measure, WeightModifier< T > *weightModifier)
Constructs BICO for points of type T T can be an arbitrary type but it has to fulfil the requirements...
Definition: bico.h:522
void rebuildTraversMerge(BicoNode *node, int level)
Definition: bico.h:911