47 : numObjects(4), numValues(4), numSymbols(4), numNodes(4), strategy(
Options::NodeAllocStrat())
58 else if (
strategy == Strategy::REVERSE_DENSE)
77 assert(
false &&
"NodeIDAllocator::allocateObjectId: unimplemented node allocation strategy.");
82 assert(
id != 0 &&
"NodeIDAllocator::allocateObjectId: ID not allocated");
94 else if (
strategy == Strategy::REVERSE_DENSE)
103 else if (
strategy == Strategy::DBUG)
110 NodeID gepMultiplier = pow(10, ceil(log10(
114 id = (
offset + 1) * gepMultiplier + base;
115 assert(
id >
numSymbols &&
"NodeIDAllocator::allocateGepObjectId: GEP allocation clashing with other nodes");
119 assert(
false &&
"NodeIDAllocator::allocateGepObjectId: unimplemented node allocation strategy");
124 assert(
id != 0 &&
"NodeIDAllocator::allocateGepObjectId: ID not allocated");
138 else if (
strategy == Strategy::REVERSE_DENSE)
147 else if (
strategy == Strategy::DBUG)
153 assert(
false &&
"NodeIDAllocator::allocateValueId: unimplemented node allocation strategy");
159 assert(
id != 0 &&
"NodeIDAllocator::allocateValueId: ID not allocated");
189 assert(pta !=
nullptr &&
"Clusterer::cluster: given null BVDataPTAImpl");
190 assert(
Options::NodeAllocStrat() == Strategy::DENSE &&
"Clusterer::cluster: only dense allocation clustering currently supported");
193 double fastClusterTime = 0.0;
194 double distanceMatrixTime = 0.0;
195 double dendrogramTraversalTime = 0.0;
196 double regioningTime = 0.0;
197 double evalTime = 0.0;
209 for (
const std::pair<NodeID, unsigned> &keyOcc : keys)
212 const size_t oldSize = pointsToSets.size();
213 pointsToSets[pts] += keyOcc.second;;
217 if (oldSize != pointsToSets.size())
220 Set<NodeID> &firstOsNeighbours = coPointeeGraph[firstO];
221 for (
const NodeID o : pts)
225 firstOsNeighbours.insert(o);
226 coPointeeGraph[o].insert(firstO);
235 size_t numRegions = 0;
236 std::vector<unsigned> objectsRegion;
244 objectsRegion.insert(objectsRegion.end(),
numObjects, 0);
253 std::vector<OrderedSet<NodeID>> regionsObjects(numRegions);
254 for (
NodeID o = 0; o <
numObjects; ++o) regionsObjects[objectsRegion[o]].insert(o);
262 std::vector<std::vector<NodeID>> regionMappings(numRegions);
264 std::vector<Map<NodeID, unsigned>> regionReverseMappings(numRegions);
266 for (
unsigned region = 0; region < numRegions; ++region)
270 for (
NodeID o : regionsObjects[region])
273 regionMappings[region].push_back(o);
274 regionReverseMappings[region][o] = curr++;
289 std::vector<std::vector<std::pair<const PointsTo *, unsigned>>> regionsPointsTos(numRegions);
293 const unsigned occ = ptocc.second;
294 if (pt.
empty())
continue;
297 unsigned region = objectsRegion[*(pt.
begin())];
300 regionsPointsTos[region].push_back(std::make_pair(&pt, occ));
306 overallStats[
NumRegions] = std::to_string(numRegions);
308 std::vector<hclust_fast_methods> methods;
322 std::vector<NodeID> nodeMap(
numObjects, UINT_MAX);
324 unsigned numGtIntRegions = 0;
325 unsigned largestRegion = 0;
326 unsigned nonTrivialRegionObjects = 0;
327 unsigned allocCounter = 0;
328 for (
unsigned region = 0; region < numRegions; ++region)
330 const size_t regionNumObjects = regionsObjects[region].size();
339 if (regionNumObjects > largestRegion) largestRegion = regionNumObjects;
345 for (
NodeID o : regionsObjects[region]) nodeMap[o] = allocCounter++;
350 nonTrivialRegionObjects += regionNumObjects;
352 double *distMatrix =
getDistanceMatrix(regionsPointsTos[region], regionNumObjects,
353 regionReverseMappings[region], distanceMatrixTime);
356 int *dendrogram =
new int[2 * (regionNumObjects - 1)];
357 double *height =
new double[regionNumObjects - 1];
358 hclust_fast(regionNumObjects, distMatrix, method, dendrogram, height);
367 visited, regionNumObjects - 1, regionMappings[region]);
370 dendrogramTraversalTime += (clkEnd - clkStart) /
TIMEINTERVAL;
373 candidates.push_back(std::make_pair(method, nodeMap));
382 std::pair<hclust_fast_methods, std::vector<NodeID>> bestMapping =
determineBestMapping(candidates, pointsToSets,
383 evalSubtitle, evalTime);
388 overallStats[
EvalTime] = std::to_string(evalTime);
389 overallStats[
TotalTime] = std::to_string(distanceMatrixTime + dendrogramTraversalTime + fastClusterTime + regioningTime + evalTime);
392 printStats(evalSubtitle +
": overall", overallStats);
394 return bestMapping.second;
400 std::vector<NodeID> reverseNodeMapping(nodeMapping.size(), UINT_MAX);
401 for (
size_t i = 0; i < nodeMapping.size(); ++i)
403 const NodeID mapsTo = nodeMapping.at(i);
404 if (mapsTo >= reverseNodeMapping.size()) reverseNodeMapping.resize(mapsTo + 1, UINT_MAX);
405 reverseNodeMapping.at(mapsTo) = i;
408 return reverseNodeMapping;
414 return n*(
n-1)/2 - (
n-i)*(
n-i-1)/2 + j - i - 1;
419 return requiredBits(pts.
count());
424 if (
n == 0)
return 0;
432 double &distanceMatrixTime)
436 double *distMatrix =
new double[condensedSize];
442 double occurrenceEpsilon = 0.0001;
444 for (
const std::pair<const PointsTo *, unsigned> &ptsOcc : pointsToSets)
447 assert(pts !=
nullptr);
448 const unsigned occ = ptsOcc.second;
454 std::vector<NodeID> ptsVec;
455 for (
const NodeID o : *pts) ptsVec.push_back(o);
456 for (
size_t i = 0; i < ptsVec.size(); ++i)
458 const NodeID oi = ptsVec[i];
460 assert(moi != nodeMap.end());
461 for (
size_t j = i + 1; j < ptsVec.size(); ++j)
463 const NodeID oj = ptsVec[j];
465 assert(moj != nodeMap.end());
466 double &existingDistance = distMatrix[condensedIndex(
numObjects, moi->second, moj->second)];
470 if (distance < existingDistance) existingDistance = distance - occurrenceEpsilon;
472 if (distance == std::ceil(existingDistance))
480 if (existingDistance - occ * occurrenceEpsilon > std::floor(existingDistance))
482 existingDistance -= occ * occurrenceEpsilon;
487 existingDistance = std::floor(existingDistance) + occurrenceEpsilon;
496 distanceMatrixTime += (clkEnd - clkStart) /
TIMEINTERVAL;
503 if (visited.find(
index) != visited.end())
return;
504 visited.insert(
index);
506 int left = dendrogram[
index - 1];
511 nodeMap[regionNodeMap[std::abs(left) - 1]] = allocCounter;
516 traverseDendrogram(nodeMap, dendrogram,
numObjects, allocCounter, visited, left, regionNodeMap);
523 nodeMap[regionNodeMap[std::abs(right) - 1]] = allocCounter;
528 traverseDendrogram(nodeMap, dendrogram,
numObjects, allocCounter, visited, right, regionNodeMap);
534 unsigned label = UINT_MAX;
535 std::vector<NodeID> labels(
numObjects, UINT_MAX);
539 const NodeID o = oos.first;
540 if (labels[o] != UINT_MAX)
continue;
541 std::queue<NodeID> bfsQueue;
544 while (!bfsQueue.empty())
546 const NodeID o = bfsQueue.front();
548 if (labels[o] != UINT_MAX)
550 assert(labels[o] == label);
556 assert(neighboursIt != graph.end());
557 for (
const NodeID neighbour : neighboursIt->second) bfsQueue.push(neighbour);
564 if (labels[o] == UINT_MAX) labels[o] = ++label;
567 numLabels = label + 1;
574 u64_t totalTheoretical = 0;
575 u64_t totalOriginalSbv = 0;
576 u64_t totalOriginalBv = 0;
577 u64_t totalNewSbv = 0;
578 u64_t totalNewBv = 0;
583 const unsigned occ = ptsOcc.second;
584 if (pts.
count() == 0)
continue;
587 if (accountForOcc) theoretical *= occ;
592 for (
const NodeID o : pts) words.insert(o / 128);
593 u64_t originalSbv = words.size() * 2;
594 if (accountForOcc) originalSbv *= occ;
601 if (o < min) min = o;
602 if (o > max) max = o;
609 u64_t originalBv = words.size();
610 if (accountForOcc) originalBv *= occ;
615 for (
const NodeID o : pts) words.insert(nodeMap[o] / 128);
616 u64_t newSbv = words.size() * 2;
617 if (accountForOcc) newSbv *= occ;
622 for (
const NodeID o : pts)
624 const NodeID mappedO = nodeMap[o];
625 if (mappedO < min) min = mappedO;
626 if (mappedO > max) max = mappedO;
632 u64_t newBv = words.size();
633 if (accountForOcc) newBv *= occ;
635 totalTheoretical += theoretical;
636 totalOriginalSbv += originalSbv;
637 totalOriginalBv += originalBv;
638 totalNewSbv += newSbv;
642 stats[TheoreticalNumWords] = std::to_string(totalTheoretical);
643 stats[OriginalSbvNumWords] = std::to_string(totalOriginalSbv);
644 stats[OriginalBvNumWords] = std::to_string(totalOriginalBv);
645 stats[NewSbvNumWords] = std::to_string(totalNewSbv);
646 stats[NewBvNumWords] = std::to_string(totalNewBv);
655 std::pair<hclust_fast_methods, std::vector<NodeID>> bestMapping = candidates[0];
657 size_t bestWords = std::numeric_limits<size_t>::max();
665 std::vector<NodeID> candidateMapping = candidate.second;
669 evaluate(candidateMapping, pointsToSets, candidateStats,
true);
672 printStats(evalSubtitle +
": candidate " + candidateMethodName, candidateStats);
674 size_t candidateWords = 0;
677 else assert(
false &&
"Clusterer::cluster: unsupported BV type for clustering.");
679 if (candidateWords < bestWords)
681 bestWords = candidateWords;
682 bestMapping = candidate;
695 NumObjects, TheoreticalNumWords, OriginalSbvNumWords, OriginalBvNumWords,
696 NewSbvNumWords, NewBvNumWords, NumRegions, NumGtIntRegions,
697 NumNonTrivialRegionObjects, LargestRegion, RegioningTime,
698 DistanceMatrixTime, FastClusterTime, DendrogramTraversalTime,
699 EvalTime, TotalTime, BestCandidate
702 const unsigned fieldWidth = 20;
704 SVFUtil::outs() <<
"****Clusterer Statistics: " << subtitle <<
"****\n";
708 if (stat != stats.end())
710 SVFUtil::outs() << std::setw(fieldWidth) << statKey <<
" " << stat->second <<
"\n";
#define NATIVE_INT_SIZE
Size of native integer that we'll use for bit vectors, in bits.
const PointsTo & getPts(NodeID id) override
static const std::string DistanceMatrixTime
static const std::string LargestRegion
static const std::string NumNonTrivialRegionObjects
static const std::string EvalTime
static const std::string TheoreticalNumWords
static std::vector< NodeID > cluster(BVDataPTAImpl *pta, const std::vector< std::pair< NodeID, unsigned >> keys, std::vector< std::pair< hclust_fast_methods, std::vector< NodeID >>> &candidates, std::string evalSubtitle="")
static const std::string BestCandidate
static std::vector< NodeID > getReverseNodeMapping(const std::vector< NodeID > &nodeMapping)
static std::pair< hclust_fast_methods, std::vector< NodeID > > determineBestMapping(const std::vector< std::pair< hclust_fast_methods, std::vector< NodeID >>> &candidates, Map< PointsTo, unsigned > pointsToSets, const std::string &evalSubtitle, double &evalTime)
static const std::string OriginalSbvNumWords
static const std::string DendrogramTraversalTime
static const std::string NewSbvNumWords
static std::vector< unsigned > regionObjects(const Map< NodeID, Set< NodeID >> &graph, size_t numObjects, size_t &numLabels)
static unsigned requiredBits(const PointsTo &pts)
Returns the minimum number of bits required to represent pts in a perfect world.
static void traverseDendrogram(std::vector< NodeID > &nodeMap, const int *dendrogram, const size_t numObjects, unsigned &allocCounter, Set< int > &visited, const int index, const std::vector< NodeID > ®ionNodeMap)
static void printStats(std::string title, Map< std::string, std::string > &stats)
static const std::string NumRegions
static size_t condensedIndex(size_t n, size_t i, size_t j)
static void evaluate(const std::vector< NodeID > &nodeMap, const Map< PointsTo, unsigned > pointsToSets, Map< std::string, std::string > &stats, bool accountForOcc)
Fills in *NumWords statistics in stats..
static const std::string RegioningTime
static const std::string NumGtIntRegions
static double * getDistanceMatrix(const std::vector< std::pair< const PointsTo *, unsigned >> pointsToSets, const size_t numObjects, const Map< NodeID, unsigned > &nodeMap, double &distanceMatrixTime)
static const std::string FastClusterTime
static const std::string OriginalBvNumWords
static const std::string NewBvNumWords
static const std::string NumObjects
static const std::string TotalTime
NodeID numValues
Number of values allocated, including specials.
static const NodeID nullPointerId
NodeID allocateValueId(void)
Allocate a value ID as determined by the strategy.
static NodeIDAllocator * get(void)
Return (singleton) allocator.
NodeID numSymbols
Number of explicit symbols allocated (e.g., llvm::Values), including specials.
enum Strategy strategy
Strategy to allocate with.
static const NodeID blackHolePointerId
static NodeIDAllocator * allocator
Single allocator.
static const NodeID constantObjectId
NodeID allocateObjectId(void)
Allocate an object ID as determined by the strategy.
static const NodeID blackHoleObjectId
NodeID numNodes
Total number of objects and values allocated.
NodeID endSymbolAllocation(void)
Notify the allocator that all symbols have had IDs allocated.
void increaseNumOfObjAndNodes()
NodeID allocateGepObjectId(NodeID base, u32_t offset, u32_t maxFieldLimit)
NodeIDAllocator(void)
Builds a node ID allocator with the strategy specified on the command line.
static void unset(void)
Deletes the (singleton) allocator.
Carries around command line options.
static const OptionMap< SVF::NodeIDAllocator::Strategy > NodeAllocStrat
static const OptionMap< PointsTo::Type > PtType
Type of points-to set to use for all analyses.
static const Option< bool > RegionAlign
Align identifiers in each region to a word.
static const Option< bool > RegionedClustering
Cluster partitions separately.
static const OptionMap< enum hclust_fast_methods > ClusterMethod
bool empty() const
Returns true if set is empty.
u32_t count() const
Returns number of elements.
const_iterator begin() const
static double getClk(bool mark=false)
int hclust_fast(int n, double *distmat, int method, int *merge, double *height)
std::string hclustMethodToString(hclust_fast_methods method)
Returns a string representation of a hclust method.
std::ostream & outs()
Overwrite llvm::outs()
std::unordered_map< Key, Value, Hash, KeyEqual, Allocator > Map
std::unordered_set< Key, Hash, KeyEqual, Allocator > Set