Static Value-Flow Analysis
ObjTypeInference.cpp
Go to the documentation of this file.
1 //===- ObjTypeInference.cpp -- Type inference----------------------------//
2 //
3 // SVF: Static Value-Flow Analysis
4 //
5 // Copyright (C) <2013-> <Yulei Sui>
6 //
7 
8 // This program is free software: you can redistribute it and/or modify
9 // it under the terms of the GNU Affero General Public License as published by
10 // the Free Software Foundation, either version 3 of the License, or
11 // (at your option) any later version.
12 
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 // GNU Affero General Public License for more details.
17 
18 // You should have received a copy of the GNU Affero General Public License
19 // along with this program. If not, see <http://www.gnu.org/licenses/>.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 /*
24  * ObjTypeInference.cpp
25  *
26  * Created by Xiao Cheng on 10/01/24.
27  *
28  */
29 
31 #include "SVF-LLVM/BasicTypes.h"
32 #include "SVF-LLVM/LLVMModule.h"
33 #include "SVF-LLVM/LLVMUtil.h"
34 #include "SVF-LLVM/CppUtil.h"
35 #include "Util/Casting.h"
36 
37 #define TYPE_DEBUG 0 /* Turn this on if you're debugging type inference */
38 #define ERR_MSG(msg) \
39  do \
40  { \
41  SVFUtil::errs() << SVFUtil::errMsg("Error ") << __FILE__ << ':' \
42  << __LINE__ << ": " << (msg) << '\n'; \
43  } while (0)
44 #define ABORT_MSG(msg) \
45  do \
46  { \
47  ERR_MSG(msg); \
48  abort(); \
49  } while (0)
50 #define ABORT_IFNOT(condition, msg) \
51  do \
52  { \
53  if (!(condition)) \
54  ABORT_MSG(msg); \
55  } while (0)
56 
57 #if TYPE_DEBUG
58 #define WARN_MSG(msg) \
59  do \
60  { \
61  SVFUtil::outs() << SVFUtil::wrnMsg("Warning ") << __FILE__ << ':' \
62  << __LINE__ << ": " << msg << '\n'; \
63  } while (0)
64 #define WARN_IFNOT(condition, msg) \
65  do \
66  { \
67  if (!(condition)) \
68  WARN_MSG(msg); \
69  } while (0)
70 #else
71 #define WARN_MSG(msg)
72 #define WARN_IFNOT(condition, msg)
73 #endif
74 
75 using namespace SVF;
76 using namespace SVFUtil;
77 using namespace LLVMUtil;
78 using namespace cppUtil;
79 
80 
81 const std::string TYPEMALLOC = "TYPE_MALLOC";
82 
85 const Type *infersiteToType(const Value *val)
86 {
87  assert(val && "value cannot be empty");
88  if (SVFUtil::isa<LoadInst, StoreInst>(val))
89  {
90  return llvm::getLoadStoreType(const_cast<Value *>(val));
91  }
92  else if (const auto *gepInst = SVFUtil::dyn_cast<GetElementPtrInst>(val))
93  {
94  return gepInst->getSourceElementType();
95  }
96  else if (const auto *call = SVFUtil::dyn_cast<CallBase>(val))
97  {
98  return call->getFunctionType();
99  }
100  else if (const auto *allocaInst = SVFUtil::dyn_cast<AllocaInst>(val))
101  {
102  return allocaInst->getAllocatedType();
103  }
104  else if (const auto *globalValue = SVFUtil::dyn_cast<GlobalValue>(val))
105  {
106  return globalValue->getValueType();
107  }
108  else
109  {
110  ABORT_MSG("unknown value:" + dumpValueAndDbgInfo(val));
111  }
112 }
113 
115 {
116  ABORT_IFNOT(val, "val cannot be null");
117  // heap has a default type of 8-bit integer type
118  if (SVFUtil::isa<Instruction>(val) && LLVMUtil::isHeapAllocExtCallViaRet(
119  SVFUtil::cast<Instruction>(val)))
120  return int8Type();
121  // otherwise we return a pointer type in the default address space
122  return ptrType();
123 }
124 
126 {
128 }
129 
137 {
138  if (isAlloc(var)) return fwInferObjType(var);
139  Set<const Value *> &sources = bwfindAllocOfVar(var);
140  Set<const Type *> types;
141  if (sources.empty())
142  {
143  // cannot find allocation, try to fw infer starting from var
144  types.insert(fwInferObjType(var));
145  }
146  else
147  {
148  for (const auto &source: sources)
149  {
150  types.insert(fwInferObjType(source));
151  }
152  }
153  const Type *largestTy = selectLargestSizedType(types);
154  ABORT_IFNOT(largestTy, "return type cannot be null");
155  return largestTy;
156 }
157 
163 {
164  if (const AllocaInst *allocaInst = SVFUtil::dyn_cast<AllocaInst>(var))
165  {
166  // stack object
167  return infersiteToType(allocaInst);
168  }
169  else if (const GlobalValue *global = SVFUtil::dyn_cast<GlobalValue>(var))
170  {
171  // global object
172  return infersiteToType(global);
173  }
174  else
175  {
176  // for heap or static object, we forward infer its type
177 
178  // consult cache
179  auto tIt = _valueToType.find(var);
180  if (tIt != _valueToType.end())
181  {
182  return tIt->second ? tIt->second : defaultType(var);
183  }
184 
185  // simulate the call stack, the second element indicates whether we should update valueTypes for current value
187  Set<ValueBoolPair> visited;
188  workList.push({var, false});
189 
190  while (!workList.empty())
191  {
192  auto curPair = workList.pop();
193  if (visited.count(curPair))
194  continue;
195  visited.insert(curPair);
196  const Value* curValue = curPair.first;
197  bool canUpdate = curPair.second;
198  Set<const Value*> infersites;
199 
200  auto insertInferSite = [&infersites,
201  &canUpdate](const Value* infersite)
202  {
203  if (canUpdate)
204  infersites.insert(infersite);
205  };
206  auto insertInferSitesOrPushWorklist =
207  [this, &infersites, &workList, &canUpdate](const auto& pUser)
208  {
209  auto vIt = _valueToInferSites.find(pUser);
210  if (canUpdate)
211  {
212  if (vIt != _valueToInferSites.end())
213  {
214  infersites.insert(vIt->second.begin(),
215  vIt->second.end());
216  }
217  }
218  else
219  {
220  if (vIt == _valueToInferSites.end())
221  workList.push({pUser, false});
222  }
223  };
224  if (!canUpdate && !_valueToInferSites.count(curValue))
225  {
226  workList.push({curValue, true});
227  }
228  if (const auto* gepInst =
229  SVFUtil::dyn_cast<GetElementPtrInst>(curValue))
230  insertInferSite(gepInst);
231  for (const auto it : curValue->users())
232  {
233  if (const auto* loadInst = SVFUtil::dyn_cast<LoadInst>(it))
234  {
235  /*
236  * infer based on load, e.g.,
237  %call = call i8* malloc()
238  %1 = bitcast i8* %call to %struct.MyStruct*
239  %q = load %struct.MyStruct, %struct.MyStruct* %1
240  */
241  insertInferSite(loadInst);
242  }
243  else if (const auto* storeInst =
244  SVFUtil::dyn_cast<StoreInst>(it))
245  {
246  if (storeInst->getPointerOperand() == curValue)
247  {
248  /*
249  * infer based on store (pointer operand), e.g.,
250  %call = call i8* malloc()
251  %1 = bitcast i8* %call to %struct.MyStruct*
252  store %struct.MyStruct .., %struct.MyStruct* %1
253  */
254  insertInferSite(storeInst);
255  }
256  else
257  {
258  for (const auto nit :
259  storeInst->getPointerOperand()->users())
260  {
261  /*
262  * propagate across store (value operand) and load
263  %call = call i8* malloc()
264  store i8* %call, i8** %p
265  %q = load i8*, i8** %p
266  ..infer based on %q..
267  */
268  if (SVFUtil::isa<LoadInst>(nit))
269  insertInferSitesOrPushWorklist(nit);
270  }
271  /*
272  * infer based on store (value operand) <- gep (result element)
273  */
274  if (const auto* gepInst =
275  SVFUtil::dyn_cast<GetElementPtrInst>(
276  storeInst->getPointerOperand()))
277  {
278  /*
279  %call1 = call i8* @TYPE_MALLOC(i32 noundef 16, i32
280  noundef 2), !dbg !39 %2 = bitcast i8* %call1 to
281  %struct.MyStruct*, !dbg !41 %3 = load
282  %struct.MyStruct*, %struct.MyStruct** %p, align 8,
283  !dbg !42 %next = getelementptr inbounds
284  %struct.MyStruct, %struct.MyStruct* %3, i32 0, i32
285  1, !dbg !43 store %struct.MyStruct* %2,
286  %struct.MyStruct** %next, align 8, !dbg !44 %5 =
287  load %struct.MyStruct*, %struct.MyStruct** %p,
288  align 8, !dbg !48 %next3 = getelementptr inbounds
289  %struct.MyStruct, %struct.MyStruct* %5, i32 0, i32
290  1, !dbg !49 %6 = load %struct.MyStruct*,
291  %struct.MyStruct** %next3, align 8, !dbg !49 infer
292  site -> %f1 = getelementptr inbounds
293  %struct.MyStruct, %struct.MyStruct* %6, i32 0, i32
294  0, !dbg !50
295  */
296  const Value* gepBase = gepInst->getPointerOperand();
297  if (const auto* load =
298  SVFUtil::dyn_cast<LoadInst>(gepBase))
299  {
300  for (const auto loadUse :
301  load->getPointerOperand()->users())
302  {
303  if (loadUse == load ||
304  !SVFUtil::isa<LoadInst>(loadUse))
305  continue;
306  for (const auto gepUse : loadUse->users())
307  {
308  if (!SVFUtil::isa<GetElementPtrInst>(
309  gepUse))
310  continue;
311  for (const auto loadUse2 :
312  gepUse->users())
313  {
314  if (SVFUtil::isa<LoadInst>(
315  loadUse2))
316  {
317  insertInferSitesOrPushWorklist(
318  loadUse2);
319  }
320  }
321  }
322  }
323  }
324  else if (const auto* alloc =
325  SVFUtil::dyn_cast<AllocaInst>(gepBase))
326  {
327  /*
328  %2 = alloca %struct.ll, align 8
329  store i32 0, ptr %1, align 4
330  %3 = call noalias noundef nonnull ptr
331  @_Znwm(i64 noundef 16) #2 %4 = getelementptr
332  inbounds %struct.ll, ptr %2, i32 0, i32 1
333  store ptr %3, ptr %4, align 8
334  %5 = getelementptr inbounds %struct.ll, ptr
335  %2, i32 0, i32 1 %6 = load ptr, ptr %5, align
336  8 %7 = getelementptr inbounds %struct.ll, ptr
337  %6, i32 0, i32 0
338  */
339  for (const auto gepUse : alloc->users())
340  {
341  if (!SVFUtil::isa<GetElementPtrInst>(
342  gepUse))
343  continue;
344  for (const auto loadUse2 : gepUse->users())
345  {
346  if (SVFUtil::isa<LoadInst>(loadUse2))
347  {
348  insertInferSitesOrPushWorklist(
349  loadUse2);
350  }
351  }
352  }
353  }
354  }
355  }
356  }
357  else if (const auto* gepInst =
358  SVFUtil::dyn_cast<GetElementPtrInst>(it))
359  {
360  /*
361  * infer based on gep (pointer operand)
362  %call = call i8* malloc()
363  %1 = bitcast i8* %call to %struct.MyStruct*
364  %next = getelementptr inbounds %struct.MyStruct,
365  %struct.MyStruct* %1, i32 0..
366  */
367  if (gepInst->getPointerOperand() == curValue)
368  insertInferSite(gepInst);
369  }
370  else if (const auto* bitcast =
371  SVFUtil::dyn_cast<BitCastInst>(it))
372  {
373  // continue on bitcast
374  insertInferSitesOrPushWorklist(bitcast);
375  }
376  else if (const auto* phiNode = SVFUtil::dyn_cast<PHINode>(it))
377  {
378  // continue on bitcast
379  insertInferSitesOrPushWorklist(phiNode);
380  }
381  else if (const auto* retInst =
382  SVFUtil::dyn_cast<ReturnInst>(it))
383  {
384  /*
385  * propagate from return to caller
386  Function Attrs: noinline nounwind optnone uwtable
387  define dso_local i8* @malloc_wrapper() #0 !dbg !22 {
388  entry:
389  %call = call i8* @malloc(i32 noundef 16), !dbg !25
390  ret i8* %call, !dbg !26
391  }
392  %call = call i8* @malloc_wrapper()
393  ..infer based on %call..
394  */
395  for (const auto callsite : retInst->getFunction()->users())
396  {
397  if (const auto* callBase =
398  SVFUtil::dyn_cast<CallBase>(callsite))
399  {
400  // skip function as parameter
401  // e.g., call void @foo(%struct.ssl_ctx_st* %9, i32 (i8*, i32, i32, i8*)* @passwd_callback)
402  if (callBase->getCalledFunction() !=
403  retInst->getFunction())
404  continue;
405  insertInferSitesOrPushWorklist(callBase);
406  }
407  }
408  }
409  else if (const auto* callBase = SVFUtil::dyn_cast<CallBase>(it))
410  {
411  /*
412  * propagate from callsite to callee
413  %call = call i8* @malloc(i32 noundef 16)
414  %0 = bitcast i8* %call to %struct.Node*, !dbg !43
415  call void @foo(%struct.Node* noundef %0), !dbg !45
416 
417  define dso_local void @foo(%struct.Node* noundef %param)
418  #0 !dbg !22 {...}
419  ..infer based on the formal param %param..
420  */
421  // skip global function value -> callsite
422  // e.g., def @foo() -> call @foo()
423  // we don't skip function as parameter, e.g., def @foo() -> call @bar(..., @foo)
424  if (SVFUtil::isa<Function>(curValue) &&
425  curValue == callBase->getCalledFunction())
426  continue;
427  // skip indirect call
428  // e.g., %0 = ... -> call %0(...)
429  if (!callBase->hasArgument(curValue))
430  continue;
431  if (Function* calleeFunc = callBase->getCalledFunction())
432  {
433  u32_t pos = getArgPosInCall(callBase, curValue);
434  // for varargs function, we cannot directly get the value-flow between actual and formal args e.g., consider the following vararg function @callee 1: call void @callee(%arg) 2: define dso_local i32 @callee(...) #0 !dbg !17 { 3: ....... 4: %5 = load i32, ptr %vaarg.addr, align 4, !dbg !55 5: .......
435  // 6: }
436  // it is challenging to precisely identify the forward value-flow of %arg (Line 2) because the function definition of callee (Line 2) does not have any formal args related to the actual arg %arg therefore we track all possible instructions like ``load i32, ptr %vaarg.addr''
437  if (calleeFunc->isVarArg())
438  {
439  // conservatively track all var args
440  for (auto& I : instructions(calleeFunc))
441  {
442  if (auto* load =
443  llvm::dyn_cast<llvm::LoadInst>(&I))
444  {
445  llvm::Value* loadPointer =
446  load->getPointerOperand();
447  if (loadPointer->getName().compare(
448  "vaarg.addr") == 0)
449  {
450  insertInferSitesOrPushWorklist(load);
451  }
452  }
453  }
454  }
455  else if (!calleeFunc->isDeclaration())
456  {
457  insertInferSitesOrPushWorklist(
458  calleeFunc->getArg(pos));
459  }
460  }
461  }
462  }
463  if (canUpdate)
464  {
465  Set<const Type*> types;
466  std::transform(infersites.begin(), infersites.end(),
467  std::inserter(types, types.begin()),
469  _valueToInferSites[curValue] = SVFUtil::move(infersites);
470  _valueToType[curValue] = selectLargestSizedType(types);
471  }
472  }
473  const Type* type = _valueToType[var];
474  if (type == nullptr)
475  {
476  type = defaultType(var);
477  WARN_MSG("Using default type, trace ID is " +
478  std::to_string(traceId) + ":" + dumpValueAndDbgInfo(var));
479  }
480  ABORT_IFNOT(type, "type cannot be a null ptr");
481  return type;
482  }
483 }
484 
491 {
492 
493  // consult cache
494  auto tIt = _valueToAllocs.find(var);
495  if (tIt != _valueToAllocs.end())
496  {
497  return tIt->second;
498  }
499 
500  // simulate the call stack, the second element indicates whether we should update sources for current value
502  Set<ValueBoolPair> visited;
503  workList.push({var, false});
504  while (!workList.empty())
505  {
506  auto curPair = workList.pop();
507  if (visited.count(curPair)) continue;
508  visited.insert(curPair);
509  const Value *curValue = curPair.first;
510  bool canUpdate = curPair.second;
511 
512  Set<const Value *> sources;
513  auto insertAllocs = [&sources, &canUpdate](const Value *source)
514  {
515  if (canUpdate) sources.insert(source);
516  };
517  auto insertAllocsOrPushWorklist = [this, &sources, &workList, &canUpdate](const auto &pUser)
518  {
519  auto vIt = _valueToAllocs.find(pUser);
520  if (canUpdate)
521  {
522  if (vIt != _valueToAllocs.end())
523  {
524  sources.insert(vIt->second.begin(), vIt->second.end());
525  }
526  }
527  else
528  {
529  if (vIt == _valueToAllocs.end()) workList.push({pUser, false});
530  }
531  };
532 
533  if (!canUpdate && !_valueToAllocs.count(curValue))
534  {
535  workList.push({curValue, true});
536  }
537 
538  if (isAlloc(curValue))
539  {
540  insertAllocs(curValue);
541  }
542  else if (const auto *bitCastInst = SVFUtil::dyn_cast<BitCastInst>(curValue))
543  {
544  Value *prevVal = bitCastInst->getOperand(0);
545  insertAllocsOrPushWorklist(prevVal);
546  }
547  else if (const auto *phiNode = SVFUtil::dyn_cast<PHINode>(curValue))
548  {
549  for (u32_t i = 0; i < phiNode->getNumOperands(); ++i)
550  {
551  insertAllocsOrPushWorklist(phiNode->getOperand(i));
552  }
553  }
554  else if (const auto *loadInst = SVFUtil::dyn_cast<LoadInst>(curValue))
555  {
556  for (const auto use: loadInst->getPointerOperand()->users())
557  {
558  if (const StoreInst *storeInst = SVFUtil::dyn_cast<StoreInst>(use))
559  {
560  if (storeInst->getPointerOperand() == loadInst->getPointerOperand())
561  {
562  insertAllocsOrPushWorklist(storeInst->getValueOperand());
563  }
564  }
565  }
566  }
567  else if (const auto *argument = SVFUtil::dyn_cast<Argument>(curValue))
568  {
569  for (const auto use: argument->getParent()->users())
570  {
571  if (const CallBase *callBase = SVFUtil::dyn_cast<CallBase>(use))
572  {
573  // skip function as parameter
574  // e.g., call void @foo(%struct.ssl_ctx_st* %9, i32 (i8*, i32, i32, i8*)* @passwd_callback)
575  if (callBase->getCalledFunction() != argument->getParent()) continue;
576  u32_t pos = argument->getParent()->isVarArg() ? 0 : argument->getArgNo();
577  insertAllocsOrPushWorklist(callBase->getArgOperand(pos));
578  }
579  }
580  }
581  else if (const auto *callBase = SVFUtil::dyn_cast<CallBase>(curValue))
582  {
583  ABORT_IFNOT(!callBase->doesNotReturn(), "callbase does not return:" + dumpValueAndDbgInfo(callBase));
584  if (Function *callee = callBase->getCalledFunction())
585  {
586  if (!callee->isDeclaration())
587  {
588  const SVFFunction *svfFunc = LLVMModuleSet::getLLVMModuleSet()->getSVFFunction(callee);
589  const BasicBlock* exitBB = SVFUtil::cast<BasicBlock>(LLVMModuleSet::getLLVMModuleSet()->getLLVMValue(
590  svfFunc->getExitBB()));
591  const Value *pValue = &exitBB->back();
592  const auto *retInst = SVFUtil::dyn_cast<ReturnInst>(pValue);
593  ABORT_IFNOT(retInst && retInst->getReturnValue(), "not return inst?");
594  insertAllocsOrPushWorklist(retInst->getReturnValue());
595  }
596  }
597  }
598  if (canUpdate)
599  {
600  _valueToAllocs[curValue] = SVFUtil::move(sources);
601  }
602  }
603  Set<const Value *> &srcs = _valueToAllocs[var];
604  if (srcs.empty())
605  {
606  WARN_MSG("Cannot find allocation: " + dumpValueAndDbgInfo(var));
607  }
608  return srcs;
609 }
610 
612 {
613  return LLVMUtil::isObject(val);
614 }
615 
621 {
622  if (const Function *func = cs->getCalledFunction())
623  {
624  if (func->getName().find(TYPEMALLOC) != std::string::npos)
625  {
626  const Type *objType = fwInferObjType(cs);
627  const auto *pInt =
628  SVFUtil::dyn_cast<llvm::ConstantInt>(cs->getOperand(1));
629  assert(pInt && "the second argument is a integer");
630  u32_t iTyNum = objTyToNumFields(objType);
631  if (iTyNum >= pInt->getZExtValue())
632  SVFUtil::outs() << SVFUtil::sucMsg("\t SUCCESS :") << dumpValueAndDbgInfo(cs)
633  << SVFUtil::pasMsg(" TYPE: ")
634  << dumpType(objType) << "\n";
635  else
636  {
637  SVFUtil::errs() << SVFUtil::errMsg("\t FAILURE :") << ":" << dumpValueAndDbgInfo(cs) << " TYPE: "
638  << dumpType(objType) << "\n";
639  abort();
640  }
641  }
642  }
643 }
644 
645 void ObjTypeInference::typeSizeDiffTest(const PointerType *oPTy, const Type *iTy, const Value *val)
646 {
647 #if TYPE_DEBUG
648  Type *oTy = getPtrElementType(oPTy);
649  u32_t iTyNum = objTyToNumFields(iTy);
650  if (getNumOfElements(oTy) > iTyNum)
651  {
652  ERR_MSG("original type is:" + dumpType(oTy));
653  ERR_MSG("infered type is:" + dumpType(iTy));
654  ABORT_MSG("wrong type, trace ID is " + std::to_string(traceId) + ":" + dumpValueAndDbgInfo(val));
655  }
656 #endif
657 }
658 
660 {
661  assert(callBase->hasArgument(arg) && "callInst does not have argument arg?");
662  auto it = std::find(callBase->arg_begin(), callBase->arg_end(), arg);
663  assert(it != callBase->arg_end() && "Didn't find argument?");
664  return std::distance(callBase->arg_begin(), it);
665 }
666 
667 
669 {
670  if (objTys.empty()) return nullptr;
671  // map type size to types from with key in descending order
672  OrderedMap<u32_t, OrderedSet<const Type *>, std::greater<int>> typeSzToTypes;
673  for (const Type *ty: objTys)
674  {
675  typeSzToTypes[objTyToNumFields(ty)].insert(ty);
676  }
677  assert(!typeSzToTypes.empty() && "typeSzToTypes cannot be empty");
678  OrderedSet<const Type *> largestTypes;
679  std::tie(std::ignore, largestTypes) = *typeSzToTypes.begin();
680  assert(!largestTypes.empty() && "largest element cannot be empty");
681  return *largestTypes.begin();
682 }
683 
685 {
687  if (SVFUtil::isa<ArrayType>(objTy))
688  num = getNumOfElements(objTy);
689  else if (const auto *st = SVFUtil::dyn_cast<StructType>(objTy))
690  {
693  if (!classTyHasVTable(st))
694  num = getNumOfElements(st);
695  }
696  return num;
697 }
698 
699 
710 {
711  auto it = _thisPtrClassNames.find(thisPtr);
712  if (it != _thisPtrClassNames.end()) return it->second;
713 
714  Set<std::string> names;
715 
716  // Lambda for checking a function is a valid name source & extracting a class name from it
717  auto addNamesFromFunc = [&names](const Function *func) -> void
718  {
719  ABORT_IFNOT(isClsNameSource(func), "Func is invalid class name source: " + dumpValueAndDbgInfo(func));
720  for (const auto &name : extractClsNamesFromFunc(func)) names.insert(name);
721  };
722 
723  // Lambda for getting callee & extracting class name for calls to constructors/destructors/template funcs
724  auto addNamesFromCall = [&names, &addNamesFromFunc](const CallBase *call) -> void
725  {
726  ABORT_IFNOT(isClsNameSource(call), "Call is invalid class name source: " + dumpValueAndDbgInfo(call));
727 
728  const auto *func = call->getCalledFunction();
729  if (isDynCast(func)) names.insert(extractClsNameFromDynCast(call));
730  else addNamesFromFunc(func);
731  };
732 
733  // Walk backwards to find all valid source sites for the pointer (e.g. stack/global/heap variables)
734  for (const auto &val: bwFindAllocOrClsNameSources(thisPtr))
735  {
736  // A source site is either a constructor/destructor/template function from which the class name can be
737  // extracted; a call to a C++ constructor/destructor/template function from which the class name can be
738  // extracted; or an allocation site of an object (i.e. a stack/global/heap variable), from which a
739  // forward walk can be performed to find calls to C++ constructor/destructor/template functions from
740  // which the class' name can then be extracted; skip starting pointer
741  if (val == thisPtr) continue;
742 
743  if (const auto *func = SVFUtil::dyn_cast<Function>(val))
744  {
745  // Constructor/destructor/template func; extract name from func directly
746  addNamesFromFunc(func);
747  }
748  else if (isClsNameSource(val))
749  {
750  // Call to constructor/destructor/template func; get callee; extract name from callee
751  ABORT_IFNOT(SVFUtil::isa<CallBase>(val), "Call source site is not a callbase: " + dumpValueAndDbgInfo(val));
752  addNamesFromCall(SVFUtil::cast<CallBase>(val));
753  }
754  else if (isAlloc(val))
755  {
756  // Stack/global/heap allocation site; walk forward; find constructor/destructor/template calls
757  ABORT_IFNOT((SVFUtil::isa<AllocaInst, CallBase, GlobalVariable>(val)),
758  "Alloc site source is not a stack/heap/global variable: " + dumpValueAndDbgInfo(val));
759  for (const auto *src : fwFindClsNameSources(val))
760  {
761  if (const auto *func = SVFUtil::dyn_cast<Function>(src)) addNamesFromFunc(func);
762  else if (const auto *call = SVFUtil::dyn_cast<CallBase>(src)) addNamesFromCall(call);
763  else ABORT_MSG("Source site from forward walk is invalid: " + dumpValueAndDbgInfo(src));
764  }
765  }
766  else
767  {
768  ERR_MSG("Unsupported source type found:" + dumpValueAndDbgInfo(val));
769  }
770  }
771 
772  return _thisPtrClassNames[thisPtr] = names;
773 }
774 
783 {
784 
785  // consult cache
786  auto tIt = _valueToAllocOrClsNameSources.find(startValue);
787  if (tIt != _valueToAllocOrClsNameSources.end())
788  {
789  return tIt->second;
790  }
791 
792  // simulate the call stack, the second element indicates whether we should update sources for current value
794  Set<ValueBoolPair> visited;
795  workList.push({startValue, false});
796  while (!workList.empty())
797  {
798  auto curPair = workList.pop();
799  if (visited.count(curPair)) continue;
800  visited.insert(curPair);
801  const Value *curValue = curPair.first;
802  bool canUpdate = curPair.second;
803 
804  Set<const Value *> sources;
805  auto insertSource = [&sources, &canUpdate](const Value *source)
806  {
807  if (canUpdate) sources.insert(source);
808  };
809  auto insertSourcesOrPushWorklist = [this, &sources, &workList, &canUpdate](const auto &pUser)
810  {
811  auto vIt = _valueToAllocOrClsNameSources.find(pUser);
812  if (canUpdate)
813  {
814  if (vIt != _valueToAllocOrClsNameSources.end() && !vIt->second.empty())
815  {
816  sources.insert(vIt->second.begin(), vIt->second.end());
817  }
818  }
819  else
820  {
821  if (vIt == _valueToAllocOrClsNameSources.end()) workList.push({pUser, false});
822  }
823  };
824 
825  if (!canUpdate && !_valueToAllocOrClsNameSources.count(curValue))
826  {
827  workList.push({curValue, true});
828  }
829 
830  // If current value is an instruction inside a constructor/destructor/template, use it as a source
831  if (const auto *inst = SVFUtil::dyn_cast<Instruction>(curValue))
832  {
833  if (const auto *parent = inst->getFunction())
834  {
835  if (isClsNameSource(parent)) insertSource(parent);
836  }
837  }
838 
839  // If the current value is an object (global, heap, stack, etc) or name source (constructor/destructor,
840  // a C++ dynamic cast, or a template function), use it as a source
841  if (isAlloc(curValue) || isClsNameSource(curValue))
842  {
843  insertSource(curValue);
844  }
845 
846  // Explore the current value further depending on the type of the value; use cached values if possible
847  if (const auto *getElementPtrInst = SVFUtil::dyn_cast<GetElementPtrInst>(curValue))
848  {
849  insertSourcesOrPushWorklist(getElementPtrInst->getPointerOperand());
850  }
851  else if (const auto *bitCastInst = SVFUtil::dyn_cast<BitCastInst>(curValue))
852  {
853  insertSourcesOrPushWorklist(bitCastInst->getOperand(0));
854  }
855  else if (const auto *phiNode = SVFUtil::dyn_cast<PHINode>(curValue))
856  {
857  for (const auto *op : phiNode->operand_values())
858  {
859  insertSourcesOrPushWorklist(op);
860  }
861  }
862  else if (const auto *loadInst = SVFUtil::dyn_cast<LoadInst>(curValue))
863  {
864  for (const auto *user : loadInst->getPointerOperand()->users())
865  {
866  if (const auto *storeInst = SVFUtil::dyn_cast<StoreInst>(user))
867  {
868  if (storeInst->getPointerOperand() == loadInst->getPointerOperand())
869  {
870  insertSourcesOrPushWorklist(storeInst->getValueOperand());
871  }
872  }
873  }
874  }
875  else if (const auto *argument = SVFUtil::dyn_cast<Argument>(curValue))
876  {
877  for (const auto *user: argument->getParent()->users())
878  {
879  if (const auto *callBase = SVFUtil::dyn_cast<CallBase>(user))
880  {
881  // skip function as parameter
882  // e.g., call void @foo(%struct.ssl_ctx_st* %9, i32 (i8*, i32, i32, i8*)* @passwd_callback)
883  if (callBase->getCalledFunction() != argument->getParent()) continue;
884  u32_t pos = argument->getParent()->isVarArg() ? 0 : argument->getArgNo();
885  insertSourcesOrPushWorklist(callBase->getArgOperand(pos));
886  }
887  }
888  }
889  else if (const auto *callBase = SVFUtil::dyn_cast<CallBase>(curValue))
890  {
891  ABORT_IFNOT(!callBase->doesNotReturn(), "callbase does not return:" + dumpValueAndDbgInfo(callBase));
892  if (const auto *callee = callBase->getCalledFunction())
893  {
894  if (!callee->isDeclaration())
895  {
896  const SVFFunction *svfFunc = LLVMModuleSet::getLLVMModuleSet()->getSVFFunction(callee);
897  const BasicBlock* exitBB = SVFUtil::cast<BasicBlock>(LLVMModuleSet::getLLVMModuleSet()->getLLVMValue(
898  svfFunc->getExitBB()));
899  const Value *pValue = &exitBB->back();
900  const auto *retInst = SVFUtil::dyn_cast<ReturnInst>(pValue);
901  ABORT_IFNOT(retInst && retInst->getReturnValue(), "not return inst?");
902  insertSourcesOrPushWorklist(retInst->getReturnValue());
903  }
904  }
905  }
906 
907  // If updating is allowed; store the gathered sources as sources for the current value in the cache
908  if (canUpdate)
909  {
910  _valueToAllocOrClsNameSources[curValue] = sources;
911  }
912  }
913 
914  return _valueToAllocOrClsNameSources[startValue];
915 }
916 
918 {
919  assert(startValue && "startValue was null?");
920 
921  // consult cache
922  auto tIt = _objToClsNameSources.find(startValue);
923  if (tIt != _objToClsNameSources.end())
924  {
925  return tIt->second;
926  }
927 
928  Set<const CallBase *> sources;
929 
930  // Lambda for adding a callee to the sources iff it is a constructor/destructor/template/dyncast
931  auto inferViaCppCall = [&sources](const CallBase *caller)
932  {
933  if (!caller) return;
934  if (isClsNameSource(caller)) sources.insert(caller);
935  };
936 
937  // Find all calls of starting val (or through cast); add as potential source iff applicable
938  for (const auto *user : startValue->users())
939  {
940  if (const auto *caller = SVFUtil::dyn_cast<CallBase>(user))
941  {
942  inferViaCppCall(caller);
943  }
944  else if (const auto *bitcast = SVFUtil::dyn_cast<BitCastInst>(user))
945  {
946  for (const auto *cast_user : bitcast->users())
947  {
948  if (const auto *caller = SVFUtil::dyn_cast<CallBase>(cast_user))
949  {
950  inferViaCppCall(caller);
951  }
952  }
953  }
954  }
955 
956  // Store sources in cache for starting value & return the found sources
957  return _objToClsNameSources[startValue] = SVFUtil::move(sources);
958 }
const Type * infersiteToType(const Value *val)
#define ABORT_MSG(msg)
const std::string TYPEMALLOC
#define ABORT_IFNOT(condition, msg)
#define ERR_MSG(msg)
#define WARN_MSG(msg)
newitem type
Definition: cJSON.cpp:2739
const char *const name
Definition: cJSON.h:264
const char *const string
Definition: cJSON.h:172
bool empty() const
Definition: WorkList.h:238
bool push(const Data &data)
Definition: WorkList.h:257
SVFFunction * getSVFFunction(const Function *fun) const
Definition: LLVMModule.h:230
LLVMContext & getContext() const
Definition: LLVMModule.h:349
static LLVMModuleSet * getLLVMModuleSet()
Definition: LLVMModule.h:118
LLVMContext & getLLVMCtx()
const Type * selectLargestSizedType(Set< const Type * > &objTys)
select the largest (conservative) type from all types
Set< const Value * > & bwfindAllocOfVar(const Value *var)
backward collect all possible allocation sites (stack, static, heap) of var
u32_t objTyToNumFields(const Type *objTy)
bool isAlloc(const SVF::Value *val)
is allocation (stack, static, heap)
u32_t getArgPosInCall(const CallBase *callBase, const Value *arg)
Set< const Value * > & bwFindAllocOrClsNameSources(const Value *startValue)
Set< std::string > & inferThisPtrClsName(const Value *thisPtr)
get or infer the class names of thisptr
void typeSizeDiffTest(const PointerType *oPTy, const Type *iTy, const Value *val)
const Type * fwInferObjType(const Value *var)
forward infer the type of the object pointed by var
const Type * inferObjType(const Value *var)
get or infer the type of the object pointed by the value
const Type * defaultType(const Value *val)
default type
ObjToClsNameSources _objToClsNameSources
Set< const CallBase * > & fwFindClsNameSources(const Value *startValue)
forward find class name sources starting from an allocation
void validateTypeCheck(const CallBase *cs)
validate type inference
static const Option< u32_t > MaxFieldLimit
Maximum number of field derivations for an object.
Definition: Options.h:38
const SVFBasicBlock * getExitBB() const
Definition: SVFValue.cpp:186
bool isHeapAllocExtCallViaRet(const Instruction *inst)
Definition: LLVMUtil.cpp:618
std::string dumpType(const Type *type)
Definition: LLVMUtil.cpp:596
std::string dumpValueAndDbgInfo(const Value *val)
Definition: LLVMUtil.cpp:607
u32_t getNumOfElements(const Type *ety)
Return size of this object based on LLVM value.
Definition: LLVMUtil.cpp:297
static Type * getPtrElementType(const PointerType *pty)
Definition: LLVMUtil.h:96
bool isObject(const Value *ref)
Return true if this value refers to a object.
Definition: LLVMUtil.cpp:59
std::string sucMsg(const std::string &msg)
Returns successful message by converting a string into green string output.
Definition: SVFUtil.cpp:53
std::string pasMsg(const std::string &msg)
Print each pass/phase message by converting a string into blue string output.
Definition: SVFUtil.cpp:99
constexpr std::remove_reference< T >::type && move(T &&t) noexcept
Definition: SVFUtil.h:447
std::string errMsg(const std::string &msg)
Print error message by converting a string into red string output.
Definition: SVFUtil.cpp:76
std::ostream & errs()
Overwrite llvm::errs()
Definition: SVFUtil.h:56
std::ostream & outs()
Overwrite llvm::outs()
Definition: SVFUtil.h:50
std::string extractClsNameFromDynCast(const CallBase *callBase)
extract class name from cpp dyncast function
Definition: CppUtil.cpp:921
bool classTyHasVTable(const StructType *ty)
Definition: CppUtil.cpp:569
bool isClsNameSource(const Value *val)
Definition: CppUtil.cpp:861
Set< std::string > extractClsNamesFromFunc(const Function *foo)
extract class name from the c++ function name, e.g., constructor/destructors
Definition: CppUtil.cpp:706
bool isDynCast(const Function *foo)
whether foo is a cpp dyncast function
Definition: CppUtil.cpp:910
for isBitcode
Definition: BasicTypes.h:68
llvm::Type Type
Definition: BasicTypes.h:83
llvm::CallBase CallBase
Definition: BasicTypes.h:146
llvm::BasicBlock BasicBlock
Definition: BasicTypes.h:86
llvm::AllocaInst AllocaInst
Definition: BasicTypes.h:150
std::set< Key, Compare, Allocator > OrderedSet
Definition: GeneralType.h:105
llvm::Function Function
Definition: BasicTypes.h:85
llvm::GlobalValue GlobalValue
Definition: BasicTypes.h:88
llvm::Value Value
LLVM Basic classes.
Definition: BasicTypes.h:82
llvm::PointerType PointerType
Definition: BasicTypes.h:96
llvm::StoreInst StoreInst
Definition: BasicTypes.h:148
unsigned u32_t
Definition: GeneralType.h:46
std::map< Key, Value, Compare, Allocator > OrderedMap
Definition: GeneralType.h:109
std::unordered_set< Key, Hash, KeyEqual, Allocator > Set
Definition: GeneralType.h:96
llvm::LLVMContext LLVMContext
Definition: BasicTypes.h:70