Static Value-Flow Analysis
NodeIDAllocator.cpp
Go to the documentation of this file.
1 //===- NodeIDAllocator.cpp -- Allocates node IDs on request ------------------------//
2 
3 #include <iomanip>
4 #include <iostream>
5 #include <queue>
6 #include <cmath>
7 
10 #include "Util/PTAStat.h"
11 #include "Util/NodeIDAllocator.h"
12 #include "SVFIR/SVFValue.h"
13 #include "SVFIR/SVFType.h"
14 #include "Util/SVFUtil.h"
15 #include "Util/Options.h"
16 
17 namespace SVF
18 {
23 
24 NodeIDAllocator *NodeIDAllocator::allocator = nullptr;
25 
27 {
28  if (allocator == nullptr)
29  {
30  allocator = new NodeIDAllocator();
31  }
32 
33  return allocator;
34 }
35 
37 {
38  if (allocator != nullptr)
39  {
40  delete allocator;
41  allocator = nullptr;
42  }
43 }
44 
45 // Initialise counts to 4 because that's how many special nodes we have.
47  : numObjects(4), numValues(4), numSymbols(4), numNodes(4), strategy(Options::NodeAllocStrat())
48 { }
49 
51 {
52  NodeID id = 0;
53  if (strategy == Strategy::DENSE)
54  {
55  // We allocate objects from 0(-ish, considering the special nodes) to # of objects.
56  id = numObjects;
57  }
58  else if (strategy == Strategy::REVERSE_DENSE)
59  {
60  id = UINT_MAX - numObjects;
61  }
62  else if (strategy == Strategy::SEQ)
63  {
64  // Everything is sequential and intermixed.
65  id = numNodes;
66  }
67  else if (strategy == Strategy::DBUG)
68  {
69  // Non-GEPs just grab the next available ID.
70  // We may have "holes" because GEPs increment the total
71  // but allocate far away. This is not a problem because
72  // we don't care about the relative distances between nodes.
73  id = numNodes;
74  }
75  else
76  {
77  assert(false && "NodeIDAllocator::allocateObjectId: unimplemented node allocation strategy.");
78  }
79 
81 
82  assert(id != 0 && "NodeIDAllocator::allocateObjectId: ID not allocated");
83  return id;
84 }
85 
87 {
88  NodeID id = 0;
89  if (strategy == Strategy::DENSE)
90  {
91  // Nothing different to the other case.
92  id = numObjects;
93  }
94  else if (strategy == Strategy::REVERSE_DENSE)
95  {
96  id = UINT_MAX - numObjects;
97  }
98  else if (strategy == Strategy::SEQ)
99  {
100  // Everything is sequential and intermixed.
101  id = numNodes;
102  }
103  else if (strategy == Strategy::DBUG)
104  {
105  // For a gep id, base id is set at lower bits, and offset is set at higher bits
106  // e.g., 1100050 denotes base=50 and offset=10
107  // The offset is 10, not 11, because we add 1 to the offset to ensure that the
108  // high bits are never 0. For example, we do not want the gep id to be 50 when
109  // the base is 50 and the offset is 0.
110  NodeID gepMultiplier = pow(10, ceil(log10(
111  numSymbols > maxFieldLimit ?
112  numSymbols : maxFieldLimit
113  )));
114  id = (offset + 1) * gepMultiplier + base;
115  assert(id > numSymbols && "NodeIDAllocator::allocateGepObjectId: GEP allocation clashing with other nodes");
116  }
117  else
118  {
119  assert(false && "NodeIDAllocator::allocateGepObjectId: unimplemented node allocation strategy");
120  }
121 
123 
124  assert(id != 0 && "NodeIDAllocator::allocateGepObjectId: ID not allocated");
125  return id;
126 }
127 
129 {
130  NodeID id = 0;
131  if (strategy == Strategy::DENSE)
132  {
133  // We allocate values from UINT_MAX to UINT_MAX - # of values.
134  // TODO: UINT_MAX does not allow for an easily changeable type
135  // of NodeID (though it is already in use elsewhere).
136  id = UINT_MAX - numValues;
137  }
138  else if (strategy == Strategy::REVERSE_DENSE)
139  {
140  id = numValues;
141  }
142  else if (strategy == Strategy::SEQ)
143  {
144  // Everything is sequential and intermixed.
145  id = numNodes;
146  }
147  else if (strategy == Strategy::DBUG)
148  {
149  id = numNodes;
150  }
151  else
152  {
153  assert(false && "NodeIDAllocator::allocateValueId: unimplemented node allocation strategy");
154  }
155 
156  ++numValues;
157  ++numNodes;
158 
159  assert(id != 0 && "NodeIDAllocator::allocateValueId: ID not allocated");
160  return id;
161 }
162 
164 {
166  return numSymbols;
167 }
168 
186 
187 std::vector<NodeID> NodeIDAllocator::Clusterer::cluster(BVDataPTAImpl *pta, const std::vector<std::pair<NodeID, unsigned>> keys, std::vector<std::pair<hclust_fast_methods, std::vector<NodeID>>> &candidates, std::string evalSubtitle)
188 {
189  assert(pta != nullptr && "Clusterer::cluster: given null BVDataPTAImpl");
190  assert(Options::NodeAllocStrat() == Strategy::DENSE && "Clusterer::cluster: only dense allocation clustering currently supported");
191 
192  Map<std::string, std::string> overallStats;
193  double fastClusterTime = 0.0;
194  double distanceMatrixTime = 0.0;
195  double dendrogramTraversalTime = 0.0;
196  double regioningTime = 0.0;
197  double evalTime = 0.0;
198 
199  // Pair of nodes to their (minimum) distance and the number of occurrences of that distance.
200  Map<std::pair<NodeID, NodeID>, std::pair<unsigned, unsigned>> distances;
201 
202  double clkStart = PTAStat::getClk(true);
203 
204  // Map points-to sets to occurrences.
205  Map<PointsTo, unsigned> pointsToSets;
206 
207  // Objects each object shares at least a points-to set with.
208  Map<NodeID, Set<NodeID>> coPointeeGraph;
209  for (const std::pair<NodeID, unsigned> &keyOcc : keys)
210  {
211  const PointsTo &pts = pta->getPts(keyOcc.first);
212  const size_t oldSize = pointsToSets.size();
213  pointsToSets[pts] += keyOcc.second;;
214 
215  // Edges in this graph have no weight or uniqueness, so we only need to
216  // do this for each points-to set once.
217  if (oldSize != pointsToSets.size())
218  {
219  NodeID firstO = !pts.empty() ? *(pts.begin()) : 0;
220  Set<NodeID> &firstOsNeighbours = coPointeeGraph[firstO];
221  for (const NodeID o : pts)
222  {
223  if (o != firstO)
224  {
225  firstOsNeighbours.insert(o);
226  coPointeeGraph[o].insert(firstO);
227  }
228  }
229  }
230  }
231 
233  overallStats[NumObjects] = std::to_string(numObjects);
234 
235  size_t numRegions = 0;
236  std::vector<unsigned> objectsRegion;
238  {
239  objectsRegion = regionObjects(coPointeeGraph, numObjects, numRegions);
240  }
241  else
242  {
243  // Just a single big region (0).
244  objectsRegion.insert(objectsRegion.end(), numObjects, 0);
245  numRegions = 1;
246  }
247 
248  // Set needs to be ordered because getDistanceMatrix, in its n^2 iteration, expects
249  // sets to be ordered (we are building a condensed matrix, not a full matrix, so it
250  // matters). In getDistanceMatrix, doing regionReverseMapping for oi and oj, where
251  // oi < oj, and getting a result moi > moj gives incorrect results.
252  // In the condensed matrix, [b][a] where b >= a, is incorrect.
253  std::vector<OrderedSet<NodeID>> regionsObjects(numRegions);
254  for (NodeID o = 0; o < numObjects; ++o) regionsObjects[objectsRegion[o]].insert(o);
255 
256  // Size of the return node mapping. It is potentially larger than the number of
257  // objects because we align each region to NATIVE_INT_SIZE.
258  // size_t numMappings = 0;
259 
260  // Maps a region to a mapping which maps 0 to n to all objects
261  // in that region.
262  std::vector<std::vector<NodeID>> regionMappings(numRegions);
263  // The reverse: region to mapping of objects to a 0 to n from above.
264  std::vector<Map<NodeID, unsigned>> regionReverseMappings(numRegions);
265  // We can thus use 0 to n for each region to create smaller distance matrices.
266  for (unsigned region = 0; region < numRegions; ++region)
267  {
268  size_t curr = 0;
269  // With the OrderedSet above, o1 < o2 => map[o1] < map[o2].
270  for (NodeID o : regionsObjects[region])
271  {
272  // push_back here is just like p...[region][curr] = o.
273  regionMappings[region].push_back(o);
274  regionReverseMappings[region][o] = curr++;
275  }
276 
277  // curr is the number of objects. A region with no objects makes no sense.
278  assert(curr != 0);
279 
280  // Number of bits needed for this region if we were
281  // to start assigning from 0 rounded up to the fewest needed
282  // native ints. This is added to the number of mappings since
283  // we align each region to a native int.
284  // numMappings += requiredBits(regionsObjects[region].size());
285  }
286 
287  // Points-to sets which are relevant to a region, i.e., those whose elements
288  // belong to that region. Pair is for occurrences.
289  std::vector<std::vector<std::pair<const PointsTo *, unsigned>>> regionsPointsTos(numRegions);
290  for (const Map<PointsTo, unsigned>::value_type &ptocc : pointsToSets)
291  {
292  const PointsTo &pt = ptocc.first;
293  const unsigned occ = ptocc.second;
294  if (pt.empty()) continue;
295  // Guaranteed that begin() != end() because of the continue above. All objects in pt
296  // will be relevant to the same region.
297  unsigned region = objectsRegion[*(pt.begin())];
298  // In our "graph", objects in the same points-to set have an edge between them,
299  // so they are all in the same connected component/region.
300  regionsPointsTos[region].push_back(std::make_pair(&pt, occ));
301  }
302 
303  double clkEnd = PTAStat::getClk(true);
304  regioningTime = (clkEnd - clkStart) / TIMEINTERVAL;
305  overallStats[RegioningTime] = std::to_string(regioningTime);
306  overallStats[NumRegions] = std::to_string(numRegions);
307 
308  std::vector<hclust_fast_methods> methods;
310  {
311  methods.push_back(HCLUST_METHOD_SINGLE);
312  methods.push_back(HCLUST_METHOD_COMPLETE);
313  methods.push_back(HCLUST_METHOD_AVERAGE);
314  }
315  else
316  {
317  methods.push_back(Options::ClusterMethod());
318  }
319 
320  for (const hclust_fast_methods method : methods)
321  {
322  std::vector<NodeID> nodeMap(numObjects, UINT_MAX);
323 
324  unsigned numGtIntRegions = 0;
325  unsigned largestRegion = 0;
326  unsigned nonTrivialRegionObjects = 0;
327  unsigned allocCounter = 0;
328  for (unsigned region = 0; region < numRegions; ++region)
329  {
330  const size_t regionNumObjects = regionsObjects[region].size();
331  // Round up to next Word: ceiling of current allocation to get how
332  // many words and multiply to get the number of bits; if we're aligning.
333  if (Options::RegionAlign())
334  {
335  allocCounter =
336  ((allocCounter + NATIVE_INT_SIZE - 1) / NATIVE_INT_SIZE) * NATIVE_INT_SIZE;
337  }
338 
339  if (regionNumObjects > largestRegion) largestRegion = regionNumObjects;
340 
341  // For regions with fewer than 64 objects, we can just allocate them
342  // however as they will be in the one int regardless..
343  if (regionNumObjects < NATIVE_INT_SIZE)
344  {
345  for (NodeID o : regionsObjects[region]) nodeMap[o] = allocCounter++;
346  continue;
347  }
348 
349  ++numGtIntRegions;
350  nonTrivialRegionObjects += regionNumObjects;
351 
352  double *distMatrix = getDistanceMatrix(regionsPointsTos[region], regionNumObjects,
353  regionReverseMappings[region], distanceMatrixTime);
354 
355  clkStart = PTAStat::getClk(true);
356  int *dendrogram = new int[2 * (regionNumObjects - 1)];
357  double *height = new double[regionNumObjects - 1];
358  hclust_fast(regionNumObjects, distMatrix, method, dendrogram, height);
359  delete[] distMatrix;
360  delete[] height;
361  clkEnd = PTAStat::getClk(true);
362  fastClusterTime += (clkEnd - clkStart) / TIMEINTERVAL;
363 
364  clkStart = PTAStat::getClk(true);
365  Set<int> visited;
366  traverseDendrogram(nodeMap, dendrogram, regionNumObjects, allocCounter,
367  visited, regionNumObjects - 1, regionMappings[region]);
368  delete[] dendrogram;
369  clkEnd = PTAStat::getClk(true);
370  dendrogramTraversalTime += (clkEnd - clkStart) / TIMEINTERVAL;
371  }
372 
373  candidates.push_back(std::make_pair(method, nodeMap));
374 
375  // Though we "update" these in the loop, they will be the same every iteration.
376  overallStats[NumGtIntRegions] = std::to_string(numGtIntRegions);
377  overallStats[LargestRegion] = std::to_string(largestRegion);
378  overallStats[NumNonTrivialRegionObjects] = std::to_string(nonTrivialRegionObjects);
379  }
380 
381  // Work out which of the mappings we generated looks best.
382  std::pair<hclust_fast_methods, std::vector<NodeID>> bestMapping = determineBestMapping(candidates, pointsToSets,
383  evalSubtitle, evalTime);
384 
385  overallStats[DistanceMatrixTime] = std::to_string(distanceMatrixTime);
386  overallStats[DendrogramTraversalTime] = std::to_string(dendrogramTraversalTime);
387  overallStats[FastClusterTime] = std::to_string(fastClusterTime);
388  overallStats[EvalTime] = std::to_string(evalTime);
389  overallStats[TotalTime] = std::to_string(distanceMatrixTime + dendrogramTraversalTime + fastClusterTime + regioningTime + evalTime);
390 
391  overallStats[BestCandidate] = SVFUtil::hclustMethodToString(bestMapping.first);
392  printStats(evalSubtitle + ": overall", overallStats);
393 
394  return bestMapping.second;
395 }
396 
397 std::vector<NodeID> NodeIDAllocator::Clusterer::getReverseNodeMapping(const std::vector<NodeID> &nodeMapping)
398 {
399  // nodeMapping.size() may not be big enough because we leave some gaps, but it's a start.
400  std::vector<NodeID> reverseNodeMapping(nodeMapping.size(), UINT_MAX);
401  for (size_t i = 0; i < nodeMapping.size(); ++i)
402  {
403  const NodeID mapsTo = nodeMapping.at(i);
404  if (mapsTo >= reverseNodeMapping.size()) reverseNodeMapping.resize(mapsTo + 1, UINT_MAX);
405  reverseNodeMapping.at(mapsTo) = i;
406  }
407 
408  return reverseNodeMapping;
409 }
410 
411 size_t NodeIDAllocator::Clusterer::condensedIndex(size_t n, size_t i, size_t j)
412 {
413  // From https://stackoverflow.com/a/14839010
414  return n*(n-1)/2 - (n-i)*(n-i-1)/2 + j - i - 1;
415 }
416 
418 {
419  return requiredBits(pts.count());
420 }
421 
423 {
424  if (n == 0) return 0;
425  // Ceiling of number of bits amongst each native integer gives needed native ints,
426  // so we then multiply again by the number of bits in each native int.
427  return ((n - 1) / NATIVE_INT_SIZE + 1) * NATIVE_INT_SIZE;
428 }
429 
430 double *NodeIDAllocator::Clusterer::getDistanceMatrix(const std::vector<std::pair<const PointsTo *, unsigned>> pointsToSets,
431  const size_t numObjects, const Map<NodeID, unsigned> &nodeMap,
432  double &distanceMatrixTime)
433 {
434  const double clkStart = PTAStat::getClk(true);
435  size_t condensedSize = (numObjects * (numObjects - 1)) / 2;
436  double *distMatrix = new double[condensedSize];
437  for (size_t i = 0; i < condensedSize; ++i) distMatrix[i] = numObjects * numObjects;
438 
439  // TODO: maybe use machine epsilon?
440  // For reducing distance due to extra occurrences.
441  // Can differentiate ~9999 occurrences.
442  double occurrenceEpsilon = 0.0001;
443 
444  for (const std::pair<const PointsTo *, unsigned> &ptsOcc : pointsToSets)
445  {
446  const PointsTo *pts = ptsOcc.first;
447  assert(pts != nullptr);
448  const unsigned occ = ptsOcc.second;
449 
450  // Distance between each element of pts.
451  unsigned distance = requiredBits(*pts) / NATIVE_INT_SIZE;
452 
453  // Use a vector so we can index into pts.
454  std::vector<NodeID> ptsVec;
455  for (const NodeID o : *pts) ptsVec.push_back(o);
456  for (size_t i = 0; i < ptsVec.size(); ++i)
457  {
458  const NodeID oi = ptsVec[i];
459  const Map<NodeID, unsigned>::const_iterator moi = nodeMap.find(oi);
460  assert(moi != nodeMap.end());
461  for (size_t j = i + 1; j < ptsVec.size(); ++j)
462  {
463  const NodeID oj = ptsVec[j];
464  const Map<NodeID, unsigned>::const_iterator moj = nodeMap.find(oj);
465  assert(moj != nodeMap.end());
466  double &existingDistance = distMatrix[condensedIndex(numObjects, moi->second, moj->second)];
467 
468  // Subtract extra occurrenceEpsilon to make upcoming logic simpler.
469  // When existingDistance is never whole, it is always between two distances.
470  if (distance < existingDistance) existingDistance = distance - occurrenceEpsilon;
471 
472  if (distance == std::ceil(existingDistance))
473  {
474  // We have something like distance == x, existingDistance == x - e, for some e < 1
475  // (potentially even set during this iteration).
476  // So, the new distance is an occurrence the existingDistance being tracked, it just
477  // had some reductions because of multiple occurrences.
478  // If there is not room within this distance to reduce more (increase priority),
479  // just ignore it. TODO: maybe warn?
480  if (existingDistance - occ * occurrenceEpsilon > std::floor(existingDistance))
481  {
482  existingDistance -= occ * occurrenceEpsilon;
483  }
484  else
485  {
486  // Reached minimum.
487  existingDistance = std::floor(existingDistance) + occurrenceEpsilon;
488  }
489  }
490  }
491  }
492 
493  }
494 
495  const double clkEnd = PTAStat::getClk(true);
496  distanceMatrixTime += (clkEnd - clkStart) / TIMEINTERVAL;
497 
498  return distMatrix;
499 }
500 
501 void NodeIDAllocator::Clusterer::traverseDendrogram(std::vector<NodeID> &nodeMap, const int *dendrogram, const size_t numObjects, unsigned &allocCounter, Set<int> &visited, const int index, const std::vector<NodeID> &regionNodeMap)
502 {
503  if (visited.find(index) != visited.end()) return;
504  visited.insert(index);
505 
506  int left = dendrogram[index - 1];
507  if (left < 0)
508  {
509  // Reached a leaf.
510  // -1 because the items start from 1 per fastcluster (TODO).
511  nodeMap[regionNodeMap[std::abs(left) - 1]] = allocCounter;
512  ++allocCounter;
513  }
514  else
515  {
516  traverseDendrogram(nodeMap, dendrogram, numObjects, allocCounter, visited, left, regionNodeMap);
517  }
518 
519  // Repeat for the right child.
520  int right = dendrogram[(numObjects - 1) + index - 1];
521  if (right < 0)
522  {
523  nodeMap[regionNodeMap[std::abs(right) - 1]] = allocCounter;
524  ++allocCounter;
525  }
526  else
527  {
528  traverseDendrogram(nodeMap, dendrogram, numObjects, allocCounter, visited, right, regionNodeMap);
529  }
530 }
531 
532 std::vector<NodeID> NodeIDAllocator::Clusterer::regionObjects(const Map<NodeID, Set<NodeID>> &graph, size_t numObjects, size_t &numLabels)
533 {
534  unsigned label = UINT_MAX;
535  std::vector<NodeID> labels(numObjects, UINT_MAX);
536  Set<NodeID> labelled;
537  for (const Map<NodeID, Set<NodeID>>::value_type &oos : graph)
538  {
539  const NodeID o = oos.first;
540  if (labels[o] != UINT_MAX) continue;
541  std::queue<NodeID> bfsQueue;
542  bfsQueue.push(o);
543  ++label;
544  while (!bfsQueue.empty())
545  {
546  const NodeID o = bfsQueue.front();
547  bfsQueue.pop();
548  if (labels[o] != UINT_MAX)
549  {
550  assert(labels[o] == label);
551  continue;
552  }
553 
554  labels[o] = label;
555  Map<NodeID, Set<NodeID>>::const_iterator neighboursIt = graph.find(o);
556  assert(neighboursIt != graph.end());
557  for (const NodeID neighbour : neighboursIt->second) bfsQueue.push(neighbour);
558  }
559  }
560 
561  // The remaining objects have no relation with others: they get their own label.
562  for (size_t o = 0; o < numObjects; ++o)
563  {
564  if (labels[o] == UINT_MAX) labels[o] = ++label;
565  }
566 
567  numLabels = label + 1;
568 
569  return labels;
570 }
571 
572 void NodeIDAllocator::Clusterer::evaluate(const std::vector<NodeID> &nodeMap, const Map<PointsTo, unsigned> pointsToSets, Map<std::string, std::string> &stats, bool accountForOcc)
573 {
574  u64_t totalTheoretical = 0;
575  u64_t totalOriginalSbv = 0;
576  u64_t totalOriginalBv = 0;
577  u64_t totalNewSbv = 0;
578  u64_t totalNewBv = 0;
579 
580  for (const Map<PointsTo, unsigned>::value_type &ptsOcc : pointsToSets)
581  {
582  const PointsTo &pts = ptsOcc.first;
583  const unsigned occ = ptsOcc.second;
584  if (pts.count() == 0) continue;
585 
586  u64_t theoretical = requiredBits(pts) / NATIVE_INT_SIZE;
587  if (accountForOcc) theoretical *= occ;
588 
589  // Check number of words for original SBV.
590  Set<unsigned> words;
591  // TODO: nasty hardcoding.
592  for (const NodeID o : pts) words.insert(o / 128);
593  u64_t originalSbv = words.size() * 2;
594  if (accountForOcc) originalSbv *= occ;
595 
596  // Check number of words for original BV.
597  NodeID min = UINT_MAX;
598  NodeID max = 0;
599  for (NodeID o : pts)
600  {
601  if (o < min) min = o;
602  if (o > max) max = o;
603  }
604  words.clear();
605  for (NodeID b = min; b <= max; ++b)
606  {
607  words.insert(b / NATIVE_INT_SIZE);
608  }
609  u64_t originalBv = words.size();
610  if (accountForOcc) originalBv *= occ;
611 
612  // Check number of words for new SBV.
613  words.clear();
614  // TODO: nasty hardcoding.
615  for (const NodeID o : pts) words.insert(nodeMap[o] / 128);
616  u64_t newSbv = words.size() * 2;
617  if (accountForOcc) newSbv *= occ;
618 
619  // Check number of words for new BV.
620  min = UINT_MAX;
621  max = 0;
622  for (const NodeID o : pts)
623  {
624  const NodeID mappedO = nodeMap[o];
625  if (mappedO < min) min = mappedO;
626  if (mappedO > max) max = mappedO;
627  }
628 
629  words.clear();
630  // No nodeMap[b] because min and max and from nodeMap.
631  for (NodeID b = min; b <= max; ++b) words.insert(b / NATIVE_INT_SIZE);
632  u64_t newBv = words.size();
633  if (accountForOcc) newBv *= occ;
634 
635  totalTheoretical += theoretical;
636  totalOriginalSbv += originalSbv;
637  totalOriginalBv += originalBv;
638  totalNewSbv += newSbv;
639  totalNewBv += newBv;
640  }
641 
642  stats[TheoreticalNumWords] = std::to_string(totalTheoretical);
643  stats[OriginalSbvNumWords] = std::to_string(totalOriginalSbv);
644  stats[OriginalBvNumWords] = std::to_string(totalOriginalBv);
645  stats[NewSbvNumWords] = std::to_string(totalNewSbv);
646  stats[NewBvNumWords] = std::to_string(totalNewBv);
647 }
648 
649 // Work out which of the mappings we generated looks best.
650 std::pair<hclust_fast_methods, std::vector<NodeID>> NodeIDAllocator::Clusterer::determineBestMapping(
651  const std::vector<std::pair<hclust_fast_methods, std::vector<NodeID>>> &candidates,
652  Map<PointsTo, unsigned> pointsToSets, const std::string &evalSubtitle, double &evalTime)
653 {
654  // In case we're not comparing anything, set to first "candidate".
655  std::pair<hclust_fast_methods, std::vector<NodeID>> bestMapping = candidates[0];
656  // Number of bits required for the best candidate.
657  size_t bestWords = std::numeric_limits<size_t>::max();
658  if (evalSubtitle != "" || Options::ClusterMethod() == HCLUST_METHOD_SVF_BEST)
659  {
660  for (const std::pair<hclust_fast_methods, std::vector<NodeID>> &candidate : candidates)
661  {
662  Map<std::string, std::string> candidateStats;
663  hclust_fast_methods candidateMethod = candidate.first;
664  std::string candidateMethodName = SVFUtil::hclustMethodToString(candidateMethod);
665  std::vector<NodeID> candidateMapping = candidate.second;
666 
667  // TODO: parameterise final arg.
668  const double clkStart = PTAStat::getClk(true);
669  evaluate(candidateMapping, pointsToSets, candidateStats, true);
670  const double clkEnd = PTAStat::getClk(true);
671  evalTime += (clkEnd - clkStart) / TIMEINTERVAL;
672  printStats(evalSubtitle + ": candidate " + candidateMethodName, candidateStats);
673 
674  size_t candidateWords = 0;
675  if (Options::PtType() == PointsTo::SBV) candidateWords = std::stoull(candidateStats[NewSbvNumWords]);
676  else if (Options::PtType() == PointsTo::CBV) candidateWords = std::stoull(candidateStats[NewBvNumWords]);
677  else assert(false && "Clusterer::cluster: unsupported BV type for clustering.");
678 
679  if (candidateWords < bestWords)
680  {
681  bestWords = candidateWords;
682  bestMapping = candidate;
683  }
684  }
685  }
686 
687  return bestMapping;
688 }
689 
691 {
692  // When not in order, it is too hard to compare original/new SBV/BV words, so this array forces an order.
693  static const std::string statKeys[] =
694  {
695  NumObjects, TheoreticalNumWords, OriginalSbvNumWords, OriginalBvNumWords,
696  NewSbvNumWords, NewBvNumWords, NumRegions, NumGtIntRegions,
697  NumNonTrivialRegionObjects, LargestRegion, RegioningTime,
698  DistanceMatrixTime, FastClusterTime, DendrogramTraversalTime,
699  EvalTime, TotalTime, BestCandidate
700  };
701 
702  const unsigned fieldWidth = 20;
703  SVFUtil::outs().flags(std::ios::left);
704  SVFUtil::outs() << "****Clusterer Statistics: " << subtitle << "****\n";
705  for (const std::string& statKey : statKeys)
706  {
707  Map<std::string, std::string>::const_iterator stat = stats.find(statKey);
708  if (stat != stats.end())
709  {
710  SVFUtil::outs() << std::setw(fieldWidth) << statKey << " " << stat->second << "\n";
711  }
712  }
713 
714  SVFUtil::outs().flush();
715 }
716 
717 }; // namespace SVF.
#define TIMEINTERVAL
Definition: SVFType.h:512
#define NATIVE_INT_SIZE
Size of native integer that we'll use for bit vectors, in bits.
Definition: SVFType.h:516
buffer offset
Definition: cJSON.cpp:1113
cJSON * n
Definition: cJSON.cpp:2558
const cJSON *const b
Definition: cJSON.h:255
int index
Definition: cJSON.h:170
const char *const string
Definition: cJSON.h:172
const PointsTo & getPts(NodeID id) override
static const std::string DistanceMatrixTime
static const std::string LargestRegion
static const std::string NumNonTrivialRegionObjects
static const std::string EvalTime
static const std::string TheoreticalNumWords
static std::vector< NodeID > cluster(BVDataPTAImpl *pta, const std::vector< std::pair< NodeID, unsigned >> keys, std::vector< std::pair< hclust_fast_methods, std::vector< NodeID >>> &candidates, std::string evalSubtitle="")
static const std::string BestCandidate
static std::vector< NodeID > getReverseNodeMapping(const std::vector< NodeID > &nodeMapping)
static std::pair< hclust_fast_methods, std::vector< NodeID > > determineBestMapping(const std::vector< std::pair< hclust_fast_methods, std::vector< NodeID >>> &candidates, Map< PointsTo, unsigned > pointsToSets, const std::string &evalSubtitle, double &evalTime)
static const std::string OriginalSbvNumWords
static const std::string DendrogramTraversalTime
static const std::string NewSbvNumWords
static std::vector< unsigned > regionObjects(const Map< NodeID, Set< NodeID >> &graph, size_t numObjects, size_t &numLabels)
static unsigned requiredBits(const PointsTo &pts)
Returns the minimum number of bits required to represent pts in a perfect world.
static void traverseDendrogram(std::vector< NodeID > &nodeMap, const int *dendrogram, const size_t numObjects, unsigned &allocCounter, Set< int > &visited, const int index, const std::vector< NodeID > &regionNodeMap)
static void printStats(std::string title, Map< std::string, std::string > &stats)
static const std::string NumRegions
static size_t condensedIndex(size_t n, size_t i, size_t j)
static void evaluate(const std::vector< NodeID > &nodeMap, const Map< PointsTo, unsigned > pointsToSets, Map< std::string, std::string > &stats, bool accountForOcc)
Fills in *NumWords statistics in stats..
static const std::string RegioningTime
static const std::string NumGtIntRegions
static double * getDistanceMatrix(const std::vector< std::pair< const PointsTo *, unsigned >> pointsToSets, const size_t numObjects, const Map< NodeID, unsigned > &nodeMap, double &distanceMatrixTime)
static const std::string FastClusterTime
static const std::string OriginalBvNumWords
static const std::string NewBvNumWords
static const std::string NumObjects
static const std::string TotalTime
NodeID numValues
Number of values allocated, including specials.
static const NodeID nullPointerId
NodeID allocateValueId(void)
Allocate a value ID as determined by the strategy.
static NodeIDAllocator * get(void)
Return (singleton) allocator.
NodeID numSymbols
Number of explicit symbols allocated (e.g., llvm::Values), including specials.
enum Strategy strategy
Strategy to allocate with.
static const NodeID blackHolePointerId
static NodeIDAllocator * allocator
Single allocator.
static const NodeID constantObjectId
NodeID allocateObjectId(void)
Allocate an object ID as determined by the strategy.
static const NodeID blackHoleObjectId
NodeID numNodes
Total number of objects and values allocated.
NodeID endSymbolAllocation(void)
Notify the allocator that all symbols have had IDs allocated.
NodeID allocateGepObjectId(NodeID base, u32_t offset, u32_t maxFieldLimit)
NodeIDAllocator(void)
Builds a node ID allocator with the strategy specified on the command line.
static void unset(void)
Deletes the (singleton) allocator.
Carries around command line options.
Definition: Options.h:20
static const OptionMap< SVF::NodeIDAllocator::Strategy > NodeAllocStrat
Definition: Options.h:35
static const OptionMap< PointsTo::Type > PtType
Type of points-to set to use for all analyses.
Definition: Options.h:50
static const Option< bool > RegionAlign
Align identifiers in each region to a word.
Definition: Options.h:62
static const Option< bool > RegionedClustering
Cluster partitions separately.
Definition: Options.h:59
static const OptionMap< enum hclust_fast_methods > ClusterMethod
Definition: Options.h:56
bool empty() const
Returns true if set is empty.
Definition: PointsTo.cpp:98
u32_t count() const
Returns number of elements.
Definition: PointsTo.cpp:111
const_iterator begin() const
Definition: PointsTo.h:128
static double getClk(bool mark=false)
Definition: SVFStat.cpp:47
hclust_fast_methods
Definition: fastcluster.h:66
@ HCLUST_METHOD_AVERAGE
Definition: fastcluster.h:72
@ HCLUST_METHOD_COMPLETE
Definition: fastcluster.h:70
@ HCLUST_METHOD_SVF_BEST
Definition: fastcluster.h:76
@ HCLUST_METHOD_SINGLE
Definition: fastcluster.h:68
int hclust_fast(int n, double *distmat, int method, int *merge, double *height)
std::string hclustMethodToString(hclust_fast_methods method)
Returns a string representation of a hclust method.
Definition: SVFUtil.cpp:260
std::ostream & outs()
Overwrite llvm::outs()
Definition: SVFUtil.h:50
for isBitcode
Definition: BasicTypes.h:68
unsigned long long u64_t
Definition: GeneralType.h:48
u32_t NodeID
Definition: GeneralType.h:55
std::unordered_map< Key, Value, Hash, KeyEqual, Allocator > Map
Definition: GeneralType.h:101
unsigned u32_t
Definition: GeneralType.h:46
std::unordered_set< Key, Hash, KeyEqual, Allocator > Set
Definition: GeneralType.h:96