10 #include "../base/streamingalgorithm.h"
11 #include "../base/dissimilaritymeasure.h"
12 #include "../base/solutionprovider.h"
13 #include "../base/weightmodifier.h"
14 #include "../base/partitionprovider.h"
15 #include "../clustering/cfrentry.h"
16 #include "../datastructure/proxysolution.h"
17 #include "../evaluation/kmeansevaluator.h"
18 #include "../exception/invalidruntimeconfigurationexception.h"
19 #include "../misc/randomness.h"
91 typename FeatureList::iterator
begin()
100 typename FeatureList::iterator
end()
130 typename FeatureList::iterator
nearest(T
const & element,
int level)
132 typename FeatureList::iterator minIt =
features.end();
137 double val =
outer.project(element, 0);
138 int bucket_number =
outer.calcBucketNumber(0, val);
140 int bucket_min = bucket_number;
143 if ((bucket_number < 0) || (bucket_number >
outer.buckets[0].size() - 1))
151 mins =
outer.buckets[mini][bucket_min].size();
152 for (
int i = 1; i <
outer.L; i++)
154 val =
outer.project(element, i);
155 bucket_number =
outer.calcBucketNumber(i, val);
156 if ((bucket_number >= 0) & (bucket_number <=
outer.buckets[i].size() - 1))
158 int s =
outer.buckets[i][bucket_number].size();
162 bucket_min = bucket_number;
169 bucket_min = bucket_number;
177 bucket_number = bucket_min;
180 if (bucket_number < 0)
183 outer.allocateBucket(rnd,
true);
185 else if (bucket_number >
outer.buckets[rnd].size() - 1)
188 outer.allocateBucket(rnd,
false);
195 for (
auto it =
outer.buckets[rnd][bucket_number].begin(); it !=
outer.buckets[rnd][bucket_number].end(); ++it)
197 double tmpDist =
outer.measure->dissimilarity((*it)->first.representative, element);
198 if (tmpDist < minDist || minDist == -1)
213 double tmpDist =
outer.measure->dissimilarity(it->first.representative, element);
214 if (tmpDist < minDist || minDist == -1)
229 void erase(
typename FeatureList::iterator pos)
318 void print(std::ostream& os);
355 double project(T point,
int i);
388 double getT(
int level);
395 double getR(
int level);
421 std::vector<std::vector<std::vector<typename BicoNode::FeatureList::iterator >> >
buckets;
425 std::vector<std::pair<double, double >>
borders;
439 std::unique_ptr<DissimilarityMeasure<T >>
measure;
492 template<
typename T>
Bico<T>::Bico(
size_t dim,
size_t n,
size_t k,
size_t p,
size_t nMax,
495 measure(measure->clone()),
496 weightModifier(weightModifier->clone()),
514 std::normal_distribution<double> realDist(0.0, 1.0);
515 for (
int i = 0; i <
L; i++)
521 rndpoint[j] = realDist(rg);
522 norm += rndpoint[j] * rndpoint[j];
524 norm = std::sqrt(norm);
537 return (
int) floor((val - borders[rnd].first) / bucket_radius[rnd]);
543 for (
int i = 0; i < L; i++)
546 if (buckets[i].size() == 1)
552 bucket_radius[i] = (int) ceil(sqrt(getR(1)));
553 Size = (int) ceil((borders[i].second - borders[i].first) / (double) bucket_radius[i]);
555 for (
int l = 0; l < buckets[i].size(); l++) buckets[i][l].clear();
558 buckets[i].resize((
int) ceil(Size));
567 borders[bucket].first = 2 * borders[bucket].first - borders[bucket].second;
568 std::vector < std::vector<typename BicoNode::FeatureList::iterator >> a(2 * buckets[bucket].size());
569 for (
int i = 0; i < buckets[bucket].size(); i++)
571 a[buckets[bucket].size() + i] = buckets[bucket][i];
573 for (
int l = 0; l < buckets[bucket].size(); l++) buckets[bucket][l].clear();
574 buckets[bucket].clear();
580 borders[bucket].second = 2 * borders[bucket].second - borders[bucket].first;
581 std::vector < std::vector<typename BicoNode::FeatureList::iterator >> a(2 * buckets[bucket].size());
582 for (
int i = 0; i < buckets[bucket].size(); i++)
584 a[i] = buckets[bucket][i];
586 for (
int l = 0; l < buckets[bucket].size(); l++) buckets[bucket][l].clear();
587 buckets[bucket].clear();
595 for (
int j = 0; j < dimension; j++)
597 ip += point[j]*(rndprojections[i][j]);
605 result->
proxysets.push_back(std::vector<T>());
606 result->
proxysets[0].reserve(curNumOfCFs);
607 computeTraverse(root, result);
613 for (
auto it = node->
begin(); it != node->
end(); ++it)
615 T point(it->first.cog());
616 weightModifier->setWeight(point, it->first.number);
618 computeTraverse(it->second, solution);
626 static double minDist = std::numeric_limits<double>::infinity();
627 static size_t pairwise_different = 0;
629 for (
size_t i = 0; i < buffer.size(); ++i)
631 double tmpDist = measure->dissimilarity(buffer[i], element);
634 ++pairwise_different;
635 if (tmpDist < minDist)
638 for (
int i = 0; i < L; i++)
640 double val = std::abs(project(element, i));
641 if (val > maxVal[i] || maxVal[i] == -1)
648 buffer.push_back(element);
651 if (pairwise_different >= maxNumOfCFs + 1)
653 optEst = 16.0 * minDist;
654 int radius = (int) ceil(sqrt(getR(1)));
656 for (
int i = 0; i < L; i++)
658 borders[i].first = -maxVal[i];
659 borders[i].second = maxVal[i];
660 bucket_radius[i] = radius;
665 for (
auto it = buffer.begin(); it != buffer.end(); ++it)
666 insert(root, 1, *it);
672 insert(root, 1, element);
683 typename BicoNode::FeatureList::iterator nearest(node->
nearest(element, level));
688 if (node->
empty() || nearest == node->
end()
689 || measure->dissimilarity(nearest->first.representative, element) > getR(level))
691 CFREntry<T> feature(1, element, element * element, element);
692 typename BicoNode::FeatureList::iterator itele = node->
insert(feature);
696 for (
int i = 0; i < L; i++)
698 double val = project(element, i);
699 int bucket_number = calcBucketNumber(i, val);
701 if (bucket_number < 0)
703 while (bucket_number < 0)
705 allocateBucket(i,
true);
706 bucket_number = calcBucketNumber(i, val);
709 else if (bucket_number > buckets[i].size() - 1)
711 while (bucket_number > buckets[i].size() - 1)
713 allocateBucket(i,
false);
714 bucket_number = calcBucketNumber(i, val);
717 buckets[i][bucket_number].push_back(itele);
725 T center(nearest->first.cog());
729 testFeature.insert(element);
730 double tfCost = testFeature.kMeansCost(center);
734 if (tfCost < getT(level))
736 nearest->first.insert(element);
740 insert(nearest->second, level + 1, element);
745 while (curNumOfCFs > maxNumOfCFs)
756 rebuildFirstLevel(this->root, oldRoot);
761 for (
auto it = this->root->begin(); it != this->root->end(); ++it)
762 rebuildTraversMerge(it->second, 2);
774 auto nextIt = child->
begin();
775 for (
auto it = child->
begin(); it != child->
end(); it = nextIt)
779 typename BicoNode::FeatureList::iterator nearest(parent->
nearest(it->first.representative, 1));
780 if (parent->
empty() || nearest == parent->
end()
781 || measure->dissimilarity(nearest->first.representative, it->first.representative) > getR(1))
786 for (
int i = 0; i < L; i++)
788 double val = project(it->first.representative, i);
789 int bucket_number = calcBucketNumber(i, val);
790 if (bucket_number < 0)
792 while (bucket_number < 0)
794 allocateBucket(i,
true);
795 bucket_number = calcBucketNumber(i, val);
798 else if (bucket_number > buckets[i].size() - 1)
800 while (bucket_number > buckets[i].size() - 1)
802 allocateBucket(i,
false);
803 bucket_number = calcBucketNumber(i, val);
806 buckets[i][bucket_number].push_back(it);
812 testFeature += nearest->first;
813 if (testFeature.
kMeansCost(nearest->first.representative) <= getT(1))
816 nearest->first += it->first;
818 it->second->spliceAllTo(nearest->second, nearest->second->end());
837 for (
auto parentIt = node->
begin(); parentIt != node->
end(); ++parentIt)
839 if (!parentIt->second->empty())
841 auto nextIt = parentIt->second->begin();
842 for (
auto childIt = parentIt->second->begin(); childIt != parentIt->second->end(); childIt = nextIt)
846 T center(parentIt->first.cog());
849 CFEntry<T> testFeature(parentIt->first + childIt->first);
850 double tfCost = testFeature.kMeansCost(center);
853 if (tfCost < getT(level))
855 parentIt->first += childIt->first;
856 childIt->second->spliceAllTo(parentIt->second, parentIt->second->end());
857 childIt->second->clear();
858 delete childIt->second;
859 parentIt->second->erase(childIt);
864 rebuildTraversMerge(childIt->second, level + 1);
878 return getT(level) / (double(1 << (3 + level)));
883 os <<
"digraph G{\n";
884 os <<
"node [shape=record];\n";
893 os <<
"node" <<
id <<
"[label=\"";
895 os << node->
id() <<
"|";
896 for (
auto it = node->
begin(); it != node->
end(); ++it)
900 os <<
"<f" << fvalue <<
"> " << it->first.number <<
"," << it->first.representative
901 <<
"\\n" << it->first.LS <<
"," << it->first.SS;
907 for (
auto it = node->
begin(); it != node->
end(); ++it)
909 print(os, it->second);
910 os <<
"node" <<
id <<
":f" << fvalue <<
" -> node" << it->second->id() <<
";\n";