Returns vector mapping previously allocated node IDs to a smarter allocation based on the points-to sets in pta accessed through keys. The second part of the keys pairs are the number of (potential) occurrences of that points-to set or a subset, depending on the client's wish. TODO: interfaces are getting unwieldy, an initialised object may be better. TODO: kind of sucks pta can't be const here because getPts isn't.
189 assert(pta !=
nullptr &&
"Clusterer::cluster: given null BVDataPTAImpl");
190 assert(
Options::NodeAllocStrat() == Strategy::DENSE &&
"Clusterer::cluster: only dense allocation clustering currently supported");
192 Map<std::string, std::string> overallStats;
193 double fastClusterTime = 0.0;
194 double distanceMatrixTime = 0.0;
195 double dendrogramTraversalTime = 0.0;
196 double regioningTime = 0.0;
197 double evalTime = 0.0;
200 Map<std::pair<NodeID, NodeID>, std::pair<unsigned, unsigned>> distances;
205 Map<PointsTo, unsigned> pointsToSets;
208 Map<NodeID, Set<NodeID>> coPointeeGraph;
209 for (
const std::pair<NodeID, unsigned> &keyOcc : keys)
211 const PointsTo &pts = pta->getPts(keyOcc.first);
212 const size_t oldSize = pointsToSets.size();
213 pointsToSets[pts] += keyOcc.second;;
217 if (oldSize != pointsToSets.size())
219 NodeID firstO = !pts.empty() ? *(pts.begin()) : 0;
220 Set<NodeID> &firstOsNeighbours = coPointeeGraph[firstO];
221 for (
const NodeID o : pts)
225 firstOsNeighbours.insert(o);
226 coPointeeGraph[o].insert(firstO);
235 size_t numRegions = 0;
236 std::vector<unsigned> objectsRegion;
244 objectsRegion.insert(objectsRegion.end(),
numObjects, 0);
253 std::vector<OrderedSet<NodeID>> regionsObjects(numRegions);
254 for (
NodeID o = 0; o <
numObjects; ++o) regionsObjects[objectsRegion[o]].insert(o);
262 std::vector<std::vector<NodeID>> regionMappings(numRegions);
264 std::vector<Map<NodeID, unsigned>> regionReverseMappings(numRegions);
266 for (
unsigned region = 0; region < numRegions; ++region)
270 for (
NodeID o : regionsObjects[region])
273 regionMappings[region].push_back(o);
274 regionReverseMappings[region][o] = curr++;
289 std::vector<std::vector<std::pair<const PointsTo *, unsigned>>> regionsPointsTos(numRegions);
290 for (
const Map<PointsTo, unsigned>::value_type &ptocc : pointsToSets)
292 const PointsTo &pt = ptocc.first;
293 const unsigned occ = ptocc.second;
294 if (pt.empty())
continue;
297 unsigned region = objectsRegion[*(pt.begin())];
300 regionsPointsTos[region].push_back(std::make_pair(&pt, occ));
306 overallStats[
NumRegions] = std::to_string(numRegions);
308 std::vector<hclust_fast_methods> methods;
322 std::vector<NodeID> nodeMap(
numObjects, UINT_MAX);
324 unsigned numGtIntRegions = 0;
325 unsigned largestRegion = 0;
326 unsigned nonTrivialRegionObjects = 0;
327 unsigned allocCounter = 0;
328 for (
unsigned region = 0; region < numRegions; ++region)
330 const size_t regionNumObjects = regionsObjects[region].size();
339 if (regionNumObjects > largestRegion) largestRegion = regionNumObjects;
345 for (
NodeID o : regionsObjects[region]) nodeMap[o] = allocCounter++;
350 nonTrivialRegionObjects += regionNumObjects;
352 double *distMatrix =
getDistanceMatrix(regionsPointsTos[region], regionNumObjects,
353 regionReverseMappings[region], distanceMatrixTime);
356 int *dendrogram =
new int[2 * (regionNumObjects - 1)];
357 double *height =
new double[regionNumObjects - 1];
358 hclust_fast(regionNumObjects, distMatrix, method, dendrogram, height);
367 visited, regionNumObjects - 1, regionMappings[region]);
370 dendrogramTraversalTime += (clkEnd - clkStart) /
TIMEINTERVAL;
373 candidates.push_back(std::make_pair(method, nodeMap));
382 std::pair<hclust_fast_methods, std::vector<NodeID>> bestMapping =
determineBestMapping(candidates, pointsToSets,
383 evalSubtitle, evalTime);
388 overallStats[
EvalTime] = std::to_string(evalTime);
389 overallStats[
TotalTime] = std::to_string(distanceMatrixTime + dendrogramTraversalTime + fastClusterTime + regioningTime + evalTime);
392 printStats(evalSubtitle +
": overall", overallStats);
394 return bestMapping.second;
#define NATIVE_INT_SIZE
Size of native integer that we'll use for bit vectors, in bits.
static const std::string DistanceMatrixTime
static const std::string LargestRegion
static const std::string NumNonTrivialRegionObjects
static const std::string EvalTime
static const std::string BestCandidate
static std::pair< hclust_fast_methods, std::vector< NodeID > > determineBestMapping(const std::vector< std::pair< hclust_fast_methods, std::vector< NodeID >>> &candidates, Map< PointsTo, unsigned > pointsToSets, const std::string &evalSubtitle, double &evalTime)
static const std::string DendrogramTraversalTime
static std::vector< unsigned > regionObjects(const Map< NodeID, Set< NodeID >> &graph, size_t numObjects, size_t &numLabels)
static void traverseDendrogram(std::vector< NodeID > &nodeMap, const int *dendrogram, const size_t numObjects, unsigned &allocCounter, Set< int > &visited, const int index, const std::vector< NodeID > ®ionNodeMap)
static void printStats(std::string title, Map< std::string, std::string > &stats)
static const std::string NumRegions
static const std::string RegioningTime
static const std::string NumGtIntRegions
static double * getDistanceMatrix(const std::vector< std::pair< const PointsTo *, unsigned >> pointsToSets, const size_t numObjects, const Map< NodeID, unsigned > &nodeMap, double &distanceMatrixTime)
static const std::string FastClusterTime
static const std::string NumObjects
static const std::string TotalTime
static NodeIDAllocator * get(void)
Return (singleton) allocator.
static const OptionMap< SVF::NodeIDAllocator::Strategy > NodeAllocStrat
static const Option< bool > RegionAlign
Align identifiers in each region to a word.
static const Option< bool > RegionedClustering
Cluster partitions separately.
static const OptionMap< enum hclust_fast_methods > ClusterMethod
static double getClk(bool mark=false)
int hclust_fast(int n, double *distmat, int method, int *merge, double *height)
std::string hclustMethodToString(hclust_fast_methods method)
Returns a string representation of a hclust method.