Static Value-Flow Analysis
Public Types | Public Member Functions | Protected Member Functions | Private Member Functions | Private Attributes | List of all members
SVF::ObjTypeInference Class Reference

#include <ObjTypeInference.h>

Public Types

typedef Set< const Value * > ValueSet
 
typedef Map< const Value *, ValueSetValueToValueSet
 
typedef ValueToValueSet ValueToInferSites
 
typedef ValueToValueSet ValueToSources
 
typedef Map< const Value *, const Type * > ValueToType
 
typedef std::pair< const Value *, bool > ValueBoolPair
 
typedef Map< const Value *, Set< std::string > > ValueToClassNames
 
typedef Map< const Value *, Set< const CallBase * > > ObjToClsNameSources
 

Public Member Functions

 ObjTypeInference ()=default
 
 ~ObjTypeInference ()=default
 
const TypeinferObjType (const Value *var)
 get or infer the type of the object pointed by the value More...
 
void validateTypeCheck (const CallBase *cs)
 validate type inference More...
 
void typeSizeDiffTest (const PointerType *oPTy, const Type *iTy, const Value *val)
 
const TypedefaultType (const Value *val)
 default type More...
 
const TypeptrType ()
 pointer type More...
 
const IntegerTypeint8Type ()
 int8 type More...
 
LLVMContextgetLLVMCtx ()
 
const TypeselectLargestSizedType (Set< const Type * > &objTys)
 select the largest (conservative) type from all types More...
 
u32_t objTyToNumFields (const Type *objTy)
 
u32_t getArgPosInCall (const CallBase *callBase, const Value *arg)
 
Set< std::string > & inferThisPtrClsName (const Value *thisPtr)
 get or infer the class names of thisptr More...
 

Protected Member Functions

Set< const Value * > & bwFindAllocOrClsNameSources (const Value *startValue)
 
Set< const CallBase * > & fwFindClsNameSources (const Value *startValue)
 forward find class name sources starting from an allocation More...
 

Private Member Functions

const TypefwInferObjType (const Value *var)
 forward infer the type of the object pointed by var More...
 
Set< const Value * > & bwfindAllocOfVar (const Value *var)
 backward collect all possible allocation sites (stack, static, heap) of var More...
 
bool isAlloc (const SVF::Value *val)
 is allocation (stack, static, heap) More...
 

Private Attributes

ValueToInferSites _valueToInferSites
 
ValueToType _valueToType
 
ValueToSources _valueToAllocs
 
ValueToClassNames _thisPtrClassNames
 
ValueToSources _valueToAllocOrClsNameSources
 
ObjToClsNameSources _objToClsNameSources
 

Detailed Description

Definition at line 40 of file ObjTypeInference.h.

Member Typedef Documentation

◆ ObjToClsNameSources

Definition at line 51 of file ObjTypeInference.h.

◆ ValueBoolPair

typedef std::pair<const Value *, bool> SVF::ObjTypeInference::ValueBoolPair

Definition at line 49 of file ObjTypeInference.h.

◆ ValueSet

Definition at line 44 of file ObjTypeInference.h.

◆ ValueToClassNames

Definition at line 50 of file ObjTypeInference.h.

◆ ValueToInferSites

Definition at line 46 of file ObjTypeInference.h.

◆ ValueToSources

Definition at line 47 of file ObjTypeInference.h.

◆ ValueToType

typedef Map<const Value *, const Type *> SVF::ObjTypeInference::ValueToType

Definition at line 48 of file ObjTypeInference.h.

◆ ValueToValueSet

Definition at line 45 of file ObjTypeInference.h.

Constructor & Destructor Documentation

◆ ObjTypeInference()

SVF::ObjTypeInference::ObjTypeInference ( )
explicitdefault

◆ ~ObjTypeInference()

SVF::ObjTypeInference::~ObjTypeInference ( )
default

Member Function Documentation

◆ bwfindAllocOfVar()

Set< const Value * > & ObjTypeInference::bwfindAllocOfVar ( const Value var)
private

backward collect all possible allocation sites (stack, static, heap) of var

backward collect all possible allocation sites (stack, static, heap) of var

Parameters
var
Returns

Definition at line 490 of file ObjTypeInference.cpp.

491 {
492 
493  // consult cache
494  auto tIt = _valueToAllocs.find(var);
495  if (tIt != _valueToAllocs.end())
496  {
497  return tIt->second;
498  }
499 
500  // simulate the call stack, the second element indicates whether we should update sources for current value
502  Set<ValueBoolPair> visited;
503  workList.push({var, false});
504  while (!workList.empty())
505  {
506  auto curPair = workList.pop();
507  if (visited.count(curPair)) continue;
508  visited.insert(curPair);
509  const Value *curValue = curPair.first;
510  bool canUpdate = curPair.second;
511 
512  Set<const Value *> sources;
513  auto insertAllocs = [&sources, &canUpdate](const Value *source)
514  {
515  if (canUpdate) sources.insert(source);
516  };
517  auto insertAllocsOrPushWorklist = [this, &sources, &workList, &canUpdate](const auto &pUser)
518  {
519  auto vIt = _valueToAllocs.find(pUser);
520  if (canUpdate)
521  {
522  if (vIt != _valueToAllocs.end())
523  {
524  sources.insert(vIt->second.begin(), vIt->second.end());
525  }
526  }
527  else
528  {
529  if (vIt == _valueToAllocs.end()) workList.push({pUser, false});
530  }
531  };
532 
533  if (!canUpdate && !_valueToAllocs.count(curValue))
534  {
535  workList.push({curValue, true});
536  }
537 
538  if (isAlloc(curValue))
539  {
540  insertAllocs(curValue);
541  }
542  else if (const auto *bitCastInst = SVFUtil::dyn_cast<BitCastInst>(curValue))
543  {
544  Value *prevVal = bitCastInst->getOperand(0);
545  insertAllocsOrPushWorklist(prevVal);
546  }
547  else if (const auto *phiNode = SVFUtil::dyn_cast<PHINode>(curValue))
548  {
549  for (u32_t i = 0; i < phiNode->getNumOperands(); ++i)
550  {
551  insertAllocsOrPushWorklist(phiNode->getOperand(i));
552  }
553  }
554  else if (const auto *loadInst = SVFUtil::dyn_cast<LoadInst>(curValue))
555  {
556  for (const auto use: loadInst->getPointerOperand()->users())
557  {
558  if (const StoreInst *storeInst = SVFUtil::dyn_cast<StoreInst>(use))
559  {
560  if (storeInst->getPointerOperand() == loadInst->getPointerOperand())
561  {
562  insertAllocsOrPushWorklist(storeInst->getValueOperand());
563  }
564  }
565  }
566  }
567  else if (const auto *argument = SVFUtil::dyn_cast<Argument>(curValue))
568  {
569  for (const auto use: argument->getParent()->users())
570  {
571  if (const CallBase *callBase = SVFUtil::dyn_cast<CallBase>(use))
572  {
573  // skip function as parameter
574  // e.g., call void @foo(%struct.ssl_ctx_st* %9, i32 (i8*, i32, i32, i8*)* @passwd_callback)
575  if (callBase->getCalledFunction() != argument->getParent()) continue;
576  u32_t pos = argument->getParent()->isVarArg() ? 0 : argument->getArgNo();
577  insertAllocsOrPushWorklist(callBase->getArgOperand(pos));
578  }
579  }
580  }
581  else if (const auto *callBase = SVFUtil::dyn_cast<CallBase>(curValue))
582  {
583  ABORT_IFNOT(!callBase->doesNotReturn(), "callbase does not return:" + dumpValueAndDbgInfo(callBase));
584  if (Function *callee = callBase->getCalledFunction())
585  {
586  if (!callee->isDeclaration())
587  {
588  const SVFFunction *svfFunc = LLVMModuleSet::getLLVMModuleSet()->getSVFFunction(callee);
589  const BasicBlock* exitBB = SVFUtil::cast<BasicBlock>(LLVMModuleSet::getLLVMModuleSet()->getLLVMValue(
590  svfFunc->getExitBB()));
591  const Value *pValue = &exitBB->back();
592  const auto *retInst = SVFUtil::dyn_cast<ReturnInst>(pValue);
593  ABORT_IFNOT(retInst && retInst->getReturnValue(), "not return inst?");
594  insertAllocsOrPushWorklist(retInst->getReturnValue());
595  }
596  }
597  }
598  if (canUpdate)
599  {
600  _valueToAllocs[curValue] = SVFUtil::move(sources);
601  }
602  }
603  Set<const Value *> &srcs = _valueToAllocs[var];
604  if (srcs.empty())
605  {
606  WARN_MSG("Cannot find allocation: " + dumpValueAndDbgInfo(var));
607  }
608  return srcs;
609 }
unsigned u32_t
Definition: CommandLine.h:18
#define ABORT_IFNOT(condition, msg)
#define WARN_MSG(msg)
bool empty() const
Definition: WorkList.h:238
bool push(const Data &data)
Definition: WorkList.h:257
SVFFunction * getSVFFunction(const Function *fun) const
Definition: LLVMModule.h:230
static LLVMModuleSet * getLLVMModuleSet()
Definition: LLVMModule.h:118
ValueToSources _valueToAllocs
bool isAlloc(const SVF::Value *val)
is allocation (stack, static, heap)
const SVFBasicBlock * getExitBB() const
Definition: SVFValue.cpp:186
std::string dumpValueAndDbgInfo(const Value *val)
Definition: LLVMUtil.cpp:607
constexpr std::remove_reference< T >::type && move(T &&t) noexcept
Definition: SVFUtil.h:447
llvm::CallBase CallBase
Definition: BasicTypes.h:146
llvm::BasicBlock BasicBlock
Definition: BasicTypes.h:86
llvm::Function Function
Definition: BasicTypes.h:85
llvm::Value Value
LLVM Basic classes.
Definition: BasicTypes.h:82
llvm::StoreInst StoreInst
Definition: BasicTypes.h:148
std::unordered_set< Key, Hash, KeyEqual, Allocator > Set
Definition: GeneralType.h:96

◆ bwFindAllocOrClsNameSources()

Set< const Value * > & ObjTypeInference::bwFindAllocOrClsNameSources ( const Value startValue)
protected

find all possible allocations or class name sources (e.g., constructors/destructors or template functions) starting from a value

find all possible allocations or class name sources (e.g., constructors/destructors or template functions) if we already find class name sources, we don't need to find the allocations and forward find class name sources

Parameters
startValue
Returns

Definition at line 782 of file ObjTypeInference.cpp.

783 {
784 
785  // consult cache
786  auto tIt = _valueToAllocOrClsNameSources.find(startValue);
787  if (tIt != _valueToAllocOrClsNameSources.end())
788  {
789  return tIt->second;
790  }
791 
792  // simulate the call stack, the second element indicates whether we should update sources for current value
794  Set<ValueBoolPair> visited;
795  workList.push({startValue, false});
796  while (!workList.empty())
797  {
798  auto curPair = workList.pop();
799  if (visited.count(curPair)) continue;
800  visited.insert(curPair);
801  const Value *curValue = curPair.first;
802  bool canUpdate = curPair.second;
803 
804  Set<const Value *> sources;
805  auto insertSource = [&sources, &canUpdate](const Value *source)
806  {
807  if (canUpdate) sources.insert(source);
808  };
809  auto insertSourcesOrPushWorklist = [this, &sources, &workList, &canUpdate](const auto &pUser)
810  {
811  auto vIt = _valueToAllocOrClsNameSources.find(pUser);
812  if (canUpdate)
813  {
814  if (vIt != _valueToAllocOrClsNameSources.end() && !vIt->second.empty())
815  {
816  sources.insert(vIt->second.begin(), vIt->second.end());
817  }
818  }
819  else
820  {
821  if (vIt == _valueToAllocOrClsNameSources.end()) workList.push({pUser, false});
822  }
823  };
824 
825  if (!canUpdate && !_valueToAllocOrClsNameSources.count(curValue))
826  {
827  workList.push({curValue, true});
828  }
829 
830  // If current value is an instruction inside a constructor/destructor/template, use it as a source
831  if (const auto *inst = SVFUtil::dyn_cast<Instruction>(curValue))
832  {
833  if (const auto *parent = inst->getFunction())
834  {
835  if (isClsNameSource(parent)) insertSource(parent);
836  }
837  }
838 
839  // If the current value is an object (global, heap, stack, etc) or name source (constructor/destructor,
840  // a C++ dynamic cast, or a template function), use it as a source
841  if (isAlloc(curValue) || isClsNameSource(curValue))
842  {
843  insertSource(curValue);
844  }
845 
846  // Explore the current value further depending on the type of the value; use cached values if possible
847  if (const auto *getElementPtrInst = SVFUtil::dyn_cast<GetElementPtrInst>(curValue))
848  {
849  insertSourcesOrPushWorklist(getElementPtrInst->getPointerOperand());
850  }
851  else if (const auto *bitCastInst = SVFUtil::dyn_cast<BitCastInst>(curValue))
852  {
853  insertSourcesOrPushWorklist(bitCastInst->getOperand(0));
854  }
855  else if (const auto *phiNode = SVFUtil::dyn_cast<PHINode>(curValue))
856  {
857  for (const auto *op : phiNode->operand_values())
858  {
859  insertSourcesOrPushWorklist(op);
860  }
861  }
862  else if (const auto *loadInst = SVFUtil::dyn_cast<LoadInst>(curValue))
863  {
864  for (const auto *user : loadInst->getPointerOperand()->users())
865  {
866  if (const auto *storeInst = SVFUtil::dyn_cast<StoreInst>(user))
867  {
868  if (storeInst->getPointerOperand() == loadInst->getPointerOperand())
869  {
870  insertSourcesOrPushWorklist(storeInst->getValueOperand());
871  }
872  }
873  }
874  }
875  else if (const auto *argument = SVFUtil::dyn_cast<Argument>(curValue))
876  {
877  for (const auto *user: argument->getParent()->users())
878  {
879  if (const auto *callBase = SVFUtil::dyn_cast<CallBase>(user))
880  {
881  // skip function as parameter
882  // e.g., call void @foo(%struct.ssl_ctx_st* %9, i32 (i8*, i32, i32, i8*)* @passwd_callback)
883  if (callBase->getCalledFunction() != argument->getParent()) continue;
884  u32_t pos = argument->getParent()->isVarArg() ? 0 : argument->getArgNo();
885  insertSourcesOrPushWorklist(callBase->getArgOperand(pos));
886  }
887  }
888  }
889  else if (const auto *callBase = SVFUtil::dyn_cast<CallBase>(curValue))
890  {
891  ABORT_IFNOT(!callBase->doesNotReturn(), "callbase does not return:" + dumpValueAndDbgInfo(callBase));
892  if (const auto *callee = callBase->getCalledFunction())
893  {
894  if (!callee->isDeclaration())
895  {
896  const SVFFunction *svfFunc = LLVMModuleSet::getLLVMModuleSet()->getSVFFunction(callee);
897  const BasicBlock* exitBB = SVFUtil::cast<BasicBlock>(LLVMModuleSet::getLLVMModuleSet()->getLLVMValue(
898  svfFunc->getExitBB()));
899  const Value *pValue = &exitBB->back();
900  const auto *retInst = SVFUtil::dyn_cast<ReturnInst>(pValue);
901  ABORT_IFNOT(retInst && retInst->getReturnValue(), "not return inst?");
902  insertSourcesOrPushWorklist(retInst->getReturnValue());
903  }
904  }
905  }
906 
907  // If updating is allowed; store the gathered sources as sources for the current value in the cache
908  if (canUpdate)
909  {
910  _valueToAllocOrClsNameSources[curValue] = sources;
911  }
912  }
913 
914  return _valueToAllocOrClsNameSources[startValue];
915 }
ValueToSources _valueToAllocOrClsNameSources
bool isClsNameSource(const Value *val)
Definition: CppUtil.cpp:861

◆ defaultType()

const Type * ObjTypeInference::defaultType ( const Value val)

default type

Definition at line 114 of file ObjTypeInference.cpp.

115 {
116  ABORT_IFNOT(val, "val cannot be null");
117  // heap has a default type of 8-bit integer type
118  if (SVFUtil::isa<Instruction>(val) && LLVMUtil::isHeapAllocExtCallViaRet(
119  SVFUtil::cast<Instruction>(val)))
120  return int8Type();
121  // otherwise we return a pointer type in the default address space
122  return ptrType();
123 }
const IntegerType * int8Type()
int8 type
const Type * ptrType()
pointer type
bool isHeapAllocExtCallViaRet(const Instruction *inst)
Definition: LLVMUtil.cpp:618

◆ fwFindClsNameSources()

Set< const CallBase * > & ObjTypeInference::fwFindClsNameSources ( const Value startValue)
protected

forward find class name sources starting from an allocation

Definition at line 917 of file ObjTypeInference.cpp.

918 {
919  assert(startValue && "startValue was null?");
920 
921  // consult cache
922  auto tIt = _objToClsNameSources.find(startValue);
923  if (tIt != _objToClsNameSources.end())
924  {
925  return tIt->second;
926  }
927 
928  Set<const CallBase *> sources;
929 
930  // Lambda for adding a callee to the sources iff it is a constructor/destructor/template/dyncast
931  auto inferViaCppCall = [&sources](const CallBase *caller)
932  {
933  if (!caller) return;
934  if (isClsNameSource(caller)) sources.insert(caller);
935  };
936 
937  // Find all calls of starting val (or through cast); add as potential source iff applicable
938  for (const auto *user : startValue->users())
939  {
940  if (const auto *caller = SVFUtil::dyn_cast<CallBase>(user))
941  {
942  inferViaCppCall(caller);
943  }
944  else if (const auto *bitcast = SVFUtil::dyn_cast<BitCastInst>(user))
945  {
946  for (const auto *cast_user : bitcast->users())
947  {
948  if (const auto *caller = SVFUtil::dyn_cast<CallBase>(cast_user))
949  {
950  inferViaCppCall(caller);
951  }
952  }
953  }
954  }
955 
956  // Store sources in cache for starting value & return the found sources
957  return _objToClsNameSources[startValue] = SVFUtil::move(sources);
958 }
ObjToClsNameSources _objToClsNameSources

◆ fwInferObjType()

const Type * ObjTypeInference::fwInferObjType ( const Value var)
private

forward infer the type of the object pointed by var

forward infer the type of the object pointed by var

Parameters
var

Definition at line 162 of file ObjTypeInference.cpp.

163 {
164  if (const AllocaInst *allocaInst = SVFUtil::dyn_cast<AllocaInst>(var))
165  {
166  // stack object
167  return infersiteToType(allocaInst);
168  }
169  else if (const GlobalValue *global = SVFUtil::dyn_cast<GlobalValue>(var))
170  {
171  // global object
172  return infersiteToType(global);
173  }
174  else
175  {
176  // for heap or static object, we forward infer its type
177 
178  // consult cache
179  auto tIt = _valueToType.find(var);
180  if (tIt != _valueToType.end())
181  {
182  return tIt->second ? tIt->second : defaultType(var);
183  }
184 
185  // simulate the call stack, the second element indicates whether we should update valueTypes for current value
187  Set<ValueBoolPair> visited;
188  workList.push({var, false});
189 
190  while (!workList.empty())
191  {
192  auto curPair = workList.pop();
193  if (visited.count(curPair))
194  continue;
195  visited.insert(curPair);
196  const Value* curValue = curPair.first;
197  bool canUpdate = curPair.second;
198  Set<const Value*> infersites;
199 
200  auto insertInferSite = [&infersites,
201  &canUpdate](const Value* infersite)
202  {
203  if (canUpdate)
204  infersites.insert(infersite);
205  };
206  auto insertInferSitesOrPushWorklist =
207  [this, &infersites, &workList, &canUpdate](const auto& pUser)
208  {
209  auto vIt = _valueToInferSites.find(pUser);
210  if (canUpdate)
211  {
212  if (vIt != _valueToInferSites.end())
213  {
214  infersites.insert(vIt->second.begin(),
215  vIt->second.end());
216  }
217  }
218  else
219  {
220  if (vIt == _valueToInferSites.end())
221  workList.push({pUser, false});
222  }
223  };
224  if (!canUpdate && !_valueToInferSites.count(curValue))
225  {
226  workList.push({curValue, true});
227  }
228  if (const auto* gepInst =
229  SVFUtil::dyn_cast<GetElementPtrInst>(curValue))
230  insertInferSite(gepInst);
231  for (const auto it : curValue->users())
232  {
233  if (const auto* loadInst = SVFUtil::dyn_cast<LoadInst>(it))
234  {
235  /*
236  * infer based on load, e.g.,
237  %call = call i8* malloc()
238  %1 = bitcast i8* %call to %struct.MyStruct*
239  %q = load %struct.MyStruct, %struct.MyStruct* %1
240  */
241  insertInferSite(loadInst);
242  }
243  else if (const auto* storeInst =
244  SVFUtil::dyn_cast<StoreInst>(it))
245  {
246  if (storeInst->getPointerOperand() == curValue)
247  {
248  /*
249  * infer based on store (pointer operand), e.g.,
250  %call = call i8* malloc()
251  %1 = bitcast i8* %call to %struct.MyStruct*
252  store %struct.MyStruct .., %struct.MyStruct* %1
253  */
254  insertInferSite(storeInst);
255  }
256  else
257  {
258  for (const auto nit :
259  storeInst->getPointerOperand()->users())
260  {
261  /*
262  * propagate across store (value operand) and load
263  %call = call i8* malloc()
264  store i8* %call, i8** %p
265  %q = load i8*, i8** %p
266  ..infer based on %q..
267  */
268  if (SVFUtil::isa<LoadInst>(nit))
269  insertInferSitesOrPushWorklist(nit);
270  }
271  /*
272  * infer based on store (value operand) <- gep (result element)
273  */
274  if (const auto* gepInst =
275  SVFUtil::dyn_cast<GetElementPtrInst>(
276  storeInst->getPointerOperand()))
277  {
278  /*
279  %call1 = call i8* @TYPE_MALLOC(i32 noundef 16, i32
280  noundef 2), !dbg !39 %2 = bitcast i8* %call1 to
281  %struct.MyStruct*, !dbg !41 %3 = load
282  %struct.MyStruct*, %struct.MyStruct** %p, align 8,
283  !dbg !42 %next = getelementptr inbounds
284  %struct.MyStruct, %struct.MyStruct* %3, i32 0, i32
285  1, !dbg !43 store %struct.MyStruct* %2,
286  %struct.MyStruct** %next, align 8, !dbg !44 %5 =
287  load %struct.MyStruct*, %struct.MyStruct** %p,
288  align 8, !dbg !48 %next3 = getelementptr inbounds
289  %struct.MyStruct, %struct.MyStruct* %5, i32 0, i32
290  1, !dbg !49 %6 = load %struct.MyStruct*,
291  %struct.MyStruct** %next3, align 8, !dbg !49 infer
292  site -> %f1 = getelementptr inbounds
293  %struct.MyStruct, %struct.MyStruct* %6, i32 0, i32
294  0, !dbg !50
295  */
296  const Value* gepBase = gepInst->getPointerOperand();
297  if (const auto* load =
298  SVFUtil::dyn_cast<LoadInst>(gepBase))
299  {
300  for (const auto loadUse :
301  load->getPointerOperand()->users())
302  {
303  if (loadUse == load ||
304  !SVFUtil::isa<LoadInst>(loadUse))
305  continue;
306  for (const auto gepUse : loadUse->users())
307  {
308  if (!SVFUtil::isa<GetElementPtrInst>(
309  gepUse))
310  continue;
311  for (const auto loadUse2 :
312  gepUse->users())
313  {
314  if (SVFUtil::isa<LoadInst>(
315  loadUse2))
316  {
317  insertInferSitesOrPushWorklist(
318  loadUse2);
319  }
320  }
321  }
322  }
323  }
324  else if (const auto* alloc =
325  SVFUtil::dyn_cast<AllocaInst>(gepBase))
326  {
327  /*
328  %2 = alloca %struct.ll, align 8
329  store i32 0, ptr %1, align 4
330  %3 = call noalias noundef nonnull ptr
331  @_Znwm(i64 noundef 16) #2 %4 = getelementptr
332  inbounds %struct.ll, ptr %2, i32 0, i32 1
333  store ptr %3, ptr %4, align 8
334  %5 = getelementptr inbounds %struct.ll, ptr
335  %2, i32 0, i32 1 %6 = load ptr, ptr %5, align
336  8 %7 = getelementptr inbounds %struct.ll, ptr
337  %6, i32 0, i32 0
338  */
339  for (const auto gepUse : alloc->users())
340  {
341  if (!SVFUtil::isa<GetElementPtrInst>(
342  gepUse))
343  continue;
344  for (const auto loadUse2 : gepUse->users())
345  {
346  if (SVFUtil::isa<LoadInst>(loadUse2))
347  {
348  insertInferSitesOrPushWorklist(
349  loadUse2);
350  }
351  }
352  }
353  }
354  }
355  }
356  }
357  else if (const auto* gepInst =
358  SVFUtil::dyn_cast<GetElementPtrInst>(it))
359  {
360  /*
361  * infer based on gep (pointer operand)
362  %call = call i8* malloc()
363  %1 = bitcast i8* %call to %struct.MyStruct*
364  %next = getelementptr inbounds %struct.MyStruct,
365  %struct.MyStruct* %1, i32 0..
366  */
367  if (gepInst->getPointerOperand() == curValue)
368  insertInferSite(gepInst);
369  }
370  else if (const auto* bitcast =
371  SVFUtil::dyn_cast<BitCastInst>(it))
372  {
373  // continue on bitcast
374  insertInferSitesOrPushWorklist(bitcast);
375  }
376  else if (const auto* phiNode = SVFUtil::dyn_cast<PHINode>(it))
377  {
378  // continue on bitcast
379  insertInferSitesOrPushWorklist(phiNode);
380  }
381  else if (const auto* retInst =
382  SVFUtil::dyn_cast<ReturnInst>(it))
383  {
384  /*
385  * propagate from return to caller
386  Function Attrs: noinline nounwind optnone uwtable
387  define dso_local i8* @malloc_wrapper() #0 !dbg !22 {
388  entry:
389  %call = call i8* @malloc(i32 noundef 16), !dbg !25
390  ret i8* %call, !dbg !26
391  }
392  %call = call i8* @malloc_wrapper()
393  ..infer based on %call..
394  */
395  for (const auto callsite : retInst->getFunction()->users())
396  {
397  if (const auto* callBase =
398  SVFUtil::dyn_cast<CallBase>(callsite))
399  {
400  // skip function as parameter
401  // e.g., call void @foo(%struct.ssl_ctx_st* %9, i32 (i8*, i32, i32, i8*)* @passwd_callback)
402  if (callBase->getCalledFunction() !=
403  retInst->getFunction())
404  continue;
405  insertInferSitesOrPushWorklist(callBase);
406  }
407  }
408  }
409  else if (const auto* callBase = SVFUtil::dyn_cast<CallBase>(it))
410  {
411  /*
412  * propagate from callsite to callee
413  %call = call i8* @malloc(i32 noundef 16)
414  %0 = bitcast i8* %call to %struct.Node*, !dbg !43
415  call void @foo(%struct.Node* noundef %0), !dbg !45
416 
417  define dso_local void @foo(%struct.Node* noundef %param)
418  #0 !dbg !22 {...}
419  ..infer based on the formal param %param..
420  */
421  // skip global function value -> callsite
422  // e.g., def @foo() -> call @foo()
423  // we don't skip function as parameter, e.g., def @foo() -> call @bar(..., @foo)
424  if (SVFUtil::isa<Function>(curValue) &&
425  curValue == callBase->getCalledFunction())
426  continue;
427  // skip indirect call
428  // e.g., %0 = ... -> call %0(...)
429  if (!callBase->hasArgument(curValue))
430  continue;
431  if (Function* calleeFunc = callBase->getCalledFunction())
432  {
433  u32_t pos = getArgPosInCall(callBase, curValue);
434  // for varargs function, we cannot directly get the value-flow between actual and formal args e.g., consider the following vararg function @callee 1: call void @callee(%arg) 2: define dso_local i32 @callee(...) #0 !dbg !17 { 3: ....... 4: %5 = load i32, ptr %vaarg.addr, align 4, !dbg !55 5: .......
435  // 6: }
436  // it is challenging to precisely identify the forward value-flow of %arg (Line 2) because the function definition of callee (Line 2) does not have any formal args related to the actual arg %arg therefore we track all possible instructions like ``load i32, ptr %vaarg.addr''
437  if (calleeFunc->isVarArg())
438  {
439  // conservatively track all var args
440  for (auto& I : instructions(calleeFunc))
441  {
442  if (auto* load =
443  llvm::dyn_cast<llvm::LoadInst>(&I))
444  {
445  llvm::Value* loadPointer =
446  load->getPointerOperand();
447  if (loadPointer->getName().compare(
448  "vaarg.addr") == 0)
449  {
450  insertInferSitesOrPushWorklist(load);
451  }
452  }
453  }
454  }
455  else if (!calleeFunc->isDeclaration())
456  {
457  insertInferSitesOrPushWorklist(
458  calleeFunc->getArg(pos));
459  }
460  }
461  }
462  }
463  if (canUpdate)
464  {
465  Set<const Type*> types;
466  std::transform(infersites.begin(), infersites.end(),
467  std::inserter(types, types.begin()),
469  _valueToInferSites[curValue] = SVFUtil::move(infersites);
470  _valueToType[curValue] = selectLargestSizedType(types);
471  }
472  }
473  const Type* type = _valueToType[var];
474  if (type == nullptr)
475  {
476  type = defaultType(var);
477  WARN_MSG("Using default type, trace ID is " +
478  std::to_string(traceId) + ":" + dumpValueAndDbgInfo(var));
479  }
480  ABORT_IFNOT(type, "type cannot be a null ptr");
481  return type;
482  }
483 }
const Type * infersiteToType(const Value *val)
newitem type
Definition: cJSON.cpp:2739
ValueToInferSites _valueToInferSites
const Type * selectLargestSizedType(Set< const Type * > &objTys)
select the largest (conservative) type from all types
u32_t getArgPosInCall(const CallBase *callBase, const Value *arg)
const Type * defaultType(const Value *val)
default type
llvm::Type Type
Definition: BasicTypes.h:83
llvm::AllocaInst AllocaInst
Definition: BasicTypes.h:150
llvm::GlobalValue GlobalValue
Definition: BasicTypes.h:88

◆ getArgPosInCall()

u32_t ObjTypeInference::getArgPosInCall ( const CallBase callBase,
const Value arg 
)

Definition at line 659 of file ObjTypeInference.cpp.

660 {
661  assert(callBase->hasArgument(arg) && "callInst does not have argument arg?");
662  auto it = std::find(callBase->arg_begin(), callBase->arg_end(), arg);
663  assert(it != callBase->arg_end() && "Didn't find argument?");
664  return std::distance(callBase->arg_begin(), it);
665 }

◆ getLLVMCtx()

LLVMContext & ObjTypeInference::getLLVMCtx ( )

Definition at line 125 of file ObjTypeInference.cpp.

126 {
128 }
LLVMContext & getContext() const
Definition: LLVMModule.h:349

◆ inferObjType()

const Type * ObjTypeInference::inferObjType ( const Value var)

get or infer the type of the object pointed by the value

get or infer the type of the object pointed by var if the start value is a source (alloc/global, heap, static), call fwInferObjType if not, find allocations and then forward get or infer types

Parameters
val

Definition at line 136 of file ObjTypeInference.cpp.

137 {
138  if (isAlloc(var)) return fwInferObjType(var);
139  Set<const Value *> &sources = bwfindAllocOfVar(var);
140  Set<const Type *> types;
141  if (sources.empty())
142  {
143  // cannot find allocation, try to fw infer starting from var
144  types.insert(fwInferObjType(var));
145  }
146  else
147  {
148  for (const auto &source: sources)
149  {
150  types.insert(fwInferObjType(source));
151  }
152  }
153  const Type *largestTy = selectLargestSizedType(types);
154  ABORT_IFNOT(largestTy, "return type cannot be null");
155  return largestTy;
156 }
Set< const Value * > & bwfindAllocOfVar(const Value *var)
backward collect all possible allocation sites (stack, static, heap) of var
const Type * fwInferObjType(const Value *var)
forward infer the type of the object pointed by var

◆ inferThisPtrClsName()

Set< std::string > & ObjTypeInference::inferThisPtrClsName ( const Value thisPtr)

get or infer the class names of thisptr

get or infer the class names of thisptr; starting from :param:thisPtr, will walk backwards to find all potential sources for the class name. Valid sources include global or stack variables, heap allocations, or C++ dynamic casts/constructors/destructors. If the source site is a global/stack/heap variable, find the corresponding constructor/destructor to extract the class' name from (since the type of the variable is not reliable but the demangled name is)

Parameters
thisPtr
Returns
a set of all possible type names that :param:thisPtr could point to

Definition at line 709 of file ObjTypeInference.cpp.

710 {
711  auto it = _thisPtrClassNames.find(thisPtr);
712  if (it != _thisPtrClassNames.end()) return it->second;
713 
714  Set<std::string> names;
715 
716  // Lambda for checking a function is a valid name source & extracting a class name from it
717  auto addNamesFromFunc = [&names](const Function *func) -> void
718  {
719  ABORT_IFNOT(isClsNameSource(func), "Func is invalid class name source: " + dumpValueAndDbgInfo(func));
720  for (const auto &name : extractClsNamesFromFunc(func)) names.insert(name);
721  };
722 
723  // Lambda for getting callee & extracting class name for calls to constructors/destructors/template funcs
724  auto addNamesFromCall = [&names, &addNamesFromFunc](const CallBase *call) -> void
725  {
726  ABORT_IFNOT(isClsNameSource(call), "Call is invalid class name source: " + dumpValueAndDbgInfo(call));
727 
728  const auto *func = call->getCalledFunction();
729  if (isDynCast(func)) names.insert(extractClsNameFromDynCast(call));
730  else addNamesFromFunc(func);
731  };
732 
733  // Walk backwards to find all valid source sites for the pointer (e.g. stack/global/heap variables)
734  for (const auto &val: bwFindAllocOrClsNameSources(thisPtr))
735  {
736  // A source site is either a constructor/destructor/template function from which the class name can be
737  // extracted; a call to a C++ constructor/destructor/template function from which the class name can be
738  // extracted; or an allocation site of an object (i.e. a stack/global/heap variable), from which a
739  // forward walk can be performed to find calls to C++ constructor/destructor/template functions from
740  // which the class' name can then be extracted; skip starting pointer
741  if (val == thisPtr) continue;
742 
743  if (const auto *func = SVFUtil::dyn_cast<Function>(val))
744  {
745  // Constructor/destructor/template func; extract name from func directly
746  addNamesFromFunc(func);
747  }
748  else if (isClsNameSource(val))
749  {
750  // Call to constructor/destructor/template func; get callee; extract name from callee
751  ABORT_IFNOT(SVFUtil::isa<CallBase>(val), "Call source site is not a callbase: " + dumpValueAndDbgInfo(val));
752  addNamesFromCall(SVFUtil::cast<CallBase>(val));
753  }
754  else if (isAlloc(val))
755  {
756  // Stack/global/heap allocation site; walk forward; find constructor/destructor/template calls
757  ABORT_IFNOT((SVFUtil::isa<AllocaInst, CallBase, GlobalVariable>(val)),
758  "Alloc site source is not a stack/heap/global variable: " + dumpValueAndDbgInfo(val));
759  for (const auto *src : fwFindClsNameSources(val))
760  {
761  if (const auto *func = SVFUtil::dyn_cast<Function>(src)) addNamesFromFunc(func);
762  else if (const auto *call = SVFUtil::dyn_cast<CallBase>(src)) addNamesFromCall(call);
763  else ABORT_MSG("Source site from forward walk is invalid: " + dumpValueAndDbgInfo(src));
764  }
765  }
766  else
767  {
768  ERR_MSG("Unsupported source type found:" + dumpValueAndDbgInfo(val));
769  }
770  }
771 
772  return _thisPtrClassNames[thisPtr] = names;
773 }
#define ABORT_MSG(msg)
#define ERR_MSG(msg)
const char *const name
Definition: cJSON.h:264
Set< const Value * > & bwFindAllocOrClsNameSources(const Value *startValue)
ValueToClassNames _thisPtrClassNames
Set< const CallBase * > & fwFindClsNameSources(const Value *startValue)
forward find class name sources starting from an allocation
std::string extractClsNameFromDynCast(const CallBase *callBase)
extract class name from cpp dyncast function
Definition: CppUtil.cpp:921
Set< std::string > extractClsNamesFromFunc(const Function *foo)
extract class name from the c++ function name, e.g., constructor/destructors
Definition: CppUtil.cpp:706
bool isDynCast(const Function *foo)
whether foo is a cpp dyncast function
Definition: CppUtil.cpp:910

◆ int8Type()

const IntegerType* SVF::ObjTypeInference::int8Type ( )
inline

int8 type

Definition at line 88 of file ObjTypeInference.h.

89  {
90  return Type::getInt8Ty(getLLVMCtx());
91  }
LLVMContext & getLLVMCtx()

◆ isAlloc()

bool ObjTypeInference::isAlloc ( const SVF::Value val)
private

is allocation (stack, static, heap)

Definition at line 611 of file ObjTypeInference.cpp.

612 {
613  return LLVMUtil::isObject(val);
614 }
bool isObject(const Value *ref)
Return true if this value refers to a object.
Definition: LLVMUtil.cpp:59

◆ objTyToNumFields()

u32_t ObjTypeInference::objTyToNumFields ( const Type objTy)

For an C++ class, it can have variant elements depending on the vtable size, Hence we only handle non-cpp-class object, the type of the cpp class is treated as default PointerType

Definition at line 684 of file ObjTypeInference.cpp.

685 {
687  if (SVFUtil::isa<ArrayType>(objTy))
688  num = getNumOfElements(objTy);
689  else if (const auto *st = SVFUtil::dyn_cast<StructType>(objTy))
690  {
693  if (!classTyHasVTable(st))
694  num = getNumOfElements(st);
695  }
696  return num;
697 }
static const Option< u32_t > MaxFieldLimit
Maximum number of field derivations for an object.
Definition: Options.h:38
u32_t getNumOfElements(const Type *ety)
Return size of this object based on LLVM value.
Definition: LLVMUtil.cpp:297
bool classTyHasVTable(const StructType *ty)
Definition: CppUtil.cpp:569

◆ ptrType()

const Type* SVF::ObjTypeInference::ptrType ( )
inline

pointer type

Definition at line 82 of file ObjTypeInference.h.

83  {
84  return PointerType::getUnqual(getLLVMCtx());
85  }

◆ selectLargestSizedType()

const Type * ObjTypeInference::selectLargestSizedType ( Set< const Type * > &  objTys)

select the largest (conservative) type from all types

Definition at line 668 of file ObjTypeInference.cpp.

669 {
670  if (objTys.empty()) return nullptr;
671  // map type size to types from with key in descending order
672  OrderedMap<u32_t, OrderedSet<const Type *>, std::greater<int>> typeSzToTypes;
673  for (const Type *ty: objTys)
674  {
675  typeSzToTypes[objTyToNumFields(ty)].insert(ty);
676  }
677  assert(!typeSzToTypes.empty() && "typeSzToTypes cannot be empty");
678  OrderedSet<const Type *> largestTypes;
679  std::tie(std::ignore, largestTypes) = *typeSzToTypes.begin();
680  assert(!largestTypes.empty() && "largest element cannot be empty");
681  return *largestTypes.begin();
682 }
u32_t objTyToNumFields(const Type *objTy)
std::set< Key, Compare, Allocator > OrderedSet
Definition: GeneralType.h:105
std::map< Key, Value, Compare, Allocator > OrderedMap
Definition: GeneralType.h:109

◆ typeSizeDiffTest()

void ObjTypeInference::typeSizeDiffTest ( const PointerType oPTy,
const Type iTy,
const Value val 
)

Definition at line 645 of file ObjTypeInference.cpp.

646 {
647 #if TYPE_DEBUG
648  Type *oTy = getPtrElementType(oPTy);
649  u32_t iTyNum = objTyToNumFields(iTy);
650  if (getNumOfElements(oTy) > iTyNum)
651  {
652  ERR_MSG("original type is:" + dumpType(oTy));
653  ERR_MSG("infered type is:" + dumpType(iTy));
654  ABORT_MSG("wrong type, trace ID is " + std::to_string(traceId) + ":" + dumpValueAndDbgInfo(val));
655  }
656 #endif
657 }
std::string dumpType(const Type *type)
Definition: LLVMUtil.cpp:596
static Type * getPtrElementType(const PointerType *pty)
Definition: LLVMUtil.h:96

◆ validateTypeCheck()

void ObjTypeInference::validateTypeCheck ( const CallBase cs)

validate type inference

validate type inference

Parameters
cs: stub malloc function with element number label

Definition at line 620 of file ObjTypeInference.cpp.

621 {
622  if (const Function *func = cs->getCalledFunction())
623  {
624  if (func->getName().find(TYPEMALLOC) != std::string::npos)
625  {
626  const Type *objType = fwInferObjType(cs);
627  const auto *pInt =
628  SVFUtil::dyn_cast<llvm::ConstantInt>(cs->getOperand(1));
629  assert(pInt && "the second argument is a integer");
630  u32_t iTyNum = objTyToNumFields(objType);
631  if (iTyNum >= pInt->getZExtValue())
632  SVFUtil::outs() << SVFUtil::sucMsg("\t SUCCESS :") << dumpValueAndDbgInfo(cs)
633  << SVFUtil::pasMsg(" TYPE: ")
634  << dumpType(objType) << "\n";
635  else
636  {
637  SVFUtil::errs() << SVFUtil::errMsg("\t FAILURE :") << ":" << dumpValueAndDbgInfo(cs) << " TYPE: "
638  << dumpType(objType) << "\n";
639  abort();
640  }
641  }
642  }
643 }
const std::string TYPEMALLOC
std::string sucMsg(const std::string &msg)
Returns successful message by converting a string into green string output.
Definition: SVFUtil.cpp:53
std::string pasMsg(const std::string &msg)
Print each pass/phase message by converting a string into blue string output.
Definition: SVFUtil.cpp:99
std::string errMsg(const std::string &msg)
Print error message by converting a string into red string output.
Definition: SVFUtil.cpp:76
std::ostream & errs()
Overwrite llvm::errs()
Definition: SVFUtil.h:56
std::ostream & outs()
Overwrite llvm::outs()
Definition: SVFUtil.h:50

Member Data Documentation

◆ _objToClsNameSources

ObjToClsNameSources SVF::ObjTypeInference::_objToClsNameSources
private

Definition at line 60 of file ObjTypeInference.h.

◆ _thisPtrClassNames

ValueToClassNames SVF::ObjTypeInference::_thisPtrClassNames
private

Definition at line 58 of file ObjTypeInference.h.

◆ _valueToAllocOrClsNameSources

ValueToSources SVF::ObjTypeInference::_valueToAllocOrClsNameSources
private

Definition at line 59 of file ObjTypeInference.h.

◆ _valueToAllocs

ValueToSources SVF::ObjTypeInference::_valueToAllocs
private

Definition at line 57 of file ObjTypeInference.h.

◆ _valueToInferSites

ValueToInferSites SVF::ObjTypeInference::_valueToInferSites
private

Definition at line 55 of file ObjTypeInference.h.

◆ _valueToType

ValueToType SVF::ObjTypeInference::_valueToType
private

Definition at line 56 of file ObjTypeInference.h.


The documentation for this class was generated from the following files: