Static Value-Flow Analysis
Loading...
Searching...
No Matches
SVFIRExtAPI.cpp
Go to the documentation of this file.
1//===- SVFIRExtAPI.cpp -- External function IR of SVF ---------------------------------------------//
2//
3// SVF: Static Value-Flow Analysis
4//
5// Copyright (C) <2013-> <Yulei Sui>
6//
7
8// This program is free software: you can redistribute it and/or modify
9// it under the terms of the GNU Affero General Public License as published by
10// the Free Software Foundation, either version 3 of the License, or
11// (at your option) any later version.
12
13// This program is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16// GNU Affero General Public License for more details.
17
18// You should have received a copy of the GNU Affero General Public License
19// along with this program. If not, see <http://www.gnu.org/licenses/>.
20//
21//===----------------------------------------------------------------------===//
22
23/*
24 * SVFIRExtAPI.cpp
25 *
26 * Created on: 18, 5, 2023
27 * Author: Shuangxiang Kan
28 */
29
31#include "Util/SVFUtil.h"
34#include "Graphs/CallGraph.h"
35
36using namespace std;
37using namespace SVF;
38using namespace SVFUtil;
39using namespace LLVMUtil;
40
41namespace
42{
43
44struct MemcpyField
45{
46 APOffset byteOffset;
47 AccessPath accessPath;
48 const SVFType* elementType;
49};
50
52 const Type* llvmType,
53 const SVFType* svfType,
54 const DataLayout& dl,
55 IRGraph* pag,
56 std::vector<MemcpyField>& fields,
59{
60 if (llvmType == nullptr || svfType == nullptr)
61 return;
62
63 if (svfType->isPointerTy())
64 {
65 fields.push_back({baseByteOffset, AccessPath(baseFldIdx), svfType});
66 return;
67 }
68
69 if (const auto* structType = SVFUtil::dyn_cast<StructType>(llvmType))
70 {
71 const StructLayout* layout = dl.getStructLayout(const_cast<StructType*>(structType));
72 for (u32_t i = 0; i < structType->getNumElements(); ++i)
73 {
74 const Type* elemLLVMType = structType->getElementType(i);
76 if (elemSVFType == nullptr)
77 return;
78 APOffset elemByteOffset = baseByteOffset + static_cast<APOffset>(layout->getElementOffset(i));
81 }
82 return;
83 }
84
85 if (const auto* arrayType = SVFUtil::dyn_cast<ArrayType>(llvmType))
86 {
87 const Type* elemLLVMType = arrayType->getElementType();
89 if (elemSVFType == nullptr)
90 return;
91 const APOffset elemByteSize = static_cast<APOffset>(dl.getTypeAllocSize(const_cast<Type*>(elemLLVMType)));
92 for (u32_t i = 0; i < arrayType->getNumElements(); ++i)
93 {
97 }
98 }
99}
100
101std::vector<MemcpyField> getMemcpyFields(const Value* value, const Type* llvmType, const SVFType* svfType)
102{
103 std::vector<MemcpyField> fields;
105 auto* pag = PAG::getPAG();
106 const DataLayout& dl = mset->getMainLLVMModule()->getDataLayout();
107 collectMemcpyFields(llvmType, svfType, dl, pag, fields);
108 return fields;
109}
110
112{
113 if (const auto* allocaInst = llvm::dyn_cast_or_null<AllocaInst>(baseValue))
114 return allocaInst->getAllocatedType();
115
116 if (const auto* global = llvm::dyn_cast_or_null<GlobalVariable>(baseValue))
117 return global->getValueType();
118
119 return fallbackType;
120}
121
122}
123
127const Type* SVFIRBuilder::getBaseTypeAndFlattenedFields(const Value* V, std::vector<AccessPath> &fields, const Value* szValue)
128{
129 assert(V);
130 const Value* value = getBaseValueForExtArg(V);
134 if(szValue && SVFUtil::isa<ConstantInt>(szValue))
135 {
136 auto szIntVal = LLVMUtil::getIntegerValue(SVFUtil::cast<ConstantInt>(szValue));
137 numOfElems = (numOfElems > szIntVal.first) ? szIntVal.first : numOfElems;
138 }
139
141 for(u32_t ei = 0; ei < numOfElems; ei++)
142 {
144 // make a ConstantInt and create char for the content type due to byte-wise copy
145 const ConstantInt* offset = ConstantInt::get(context, llvm::APInt(32, ei));
146 if (!llvmModuleSet()->hasValueNode(offset))
147 {
149 builder.collectSym(offset);
151 pag->addConstantIntValNode(id, LLVMUtil::getIntegerValue(offset), nullptr, llvmModuleSet()->getSVFType(offset->getType()));
153 pag->getGNode(id));
154 }
155 ls.addOffsetVarAndGepTypePair(getPAG()->getValVar(llvmModuleSet()->getValueNode(offset)), nullptr);
156 fields.push_back(ls);
157 }
158 return objType;
159}
160
166{
167 assert(D && S);
169 if(!vnD || !vnS)
170 return;
171
172 std::vector<AccessPath> fields;
173
174 //Get the max possible size of the copy, unless it was provided.
175 std::vector<AccessPath> srcFields;
176 std::vector<AccessPath> dstFields;
179 if(srcFields.size() > dstFields.size())
180 fields = dstFields;
181 else
182 fields = srcFields;
183
185 u32_t sz = fields.size();
186
187 if (fields.size() == 1 && (LLVMUtil::isConstDataOrAggData(D) || LLVMUtil::isConstDataOrAggData(S)))
188 {
192 return;
193 }
194
201 const bool hasRemappedGlobalBase =
202 (dstFieldBase != D && SVFUtil::isa<GlobalVariable>(dstFieldBase)) ||
203 (srcFieldBase != S && SVFUtil::isa<GlobalVariable>(srcFieldBase));
204 const bool useByteLayoutMemcpy =
207 {
210 std::vector<MemcpyField> dstMemcpyFields = getMemcpyFields(D, dstLayoutType, dstSVFType);
211 std::vector<MemcpyField> srcMemcpyFields = getMemcpyFields(S, srcLayoutType, srcSVFType);
212 if (dstMemcpyFields.empty() || srcMemcpyFields.empty())
214
215 std::unordered_map<APOffset, MemcpyField> srcFieldsByByteOffset;
216 for (const auto& field : srcMemcpyFields)
217 srcFieldsByByteOffset.emplace(field.byteOffset, field);
218
219 const DataLayout& dl = llvmModuleSet()->getMainLLVMModule()->getDataLayout();
220 APOffset copyBytes = std::min<APOffset>(
221 static_cast<APOffset>(dl.getTypeAllocSize(const_cast<Type*>(dstLayoutType))),
222 static_cast<APOffset>(dl.getTypeAllocSize(const_cast<Type*>(srcLayoutType))));
223 if (szValue && SVFUtil::isa<ConstantInt>(szValue))
224 {
225 auto szIntVal = LLVMUtil::getIntegerValue(SVFUtil::cast<ConstantInt>(szValue));
226 copyBytes = std::min(copyBytes, static_cast<APOffset>(szIntVal.first));
227 }
228
229 for (const auto& dstField : dstMemcpyFields)
230 {
231 if (dstField.byteOffset >= copyBytes)
232 continue;
233 auto it = srcFieldsByByteOffset.find(dstField.byteOffset);
234 if (it == srcFieldsByByteOffset.end())
235 continue;
236
237 NodeID dField = getGepValVar(dstFieldBase, dstField.accessPath, dstField.elementType);
238 NodeID sField = getGepValVar(srcFieldBase, it->second.accessPath, it->second.elementType);
242 }
243 return;
244 }
245
247 //For each field (i), add (Ti = *S + i) and (*D + i = Ti).
248 for (u32_t index = 0; index < sz; index++)
249 {
252 fields[index].getConstantStructFldIdx());
254 fields[index].getConstantStructFldIdx());
260 }
261}
262
264{
266
268 {
272 }
274 {
276 Value* arg = cs->getArgOperand(arg_pos);
277 if (cs->getArgOperand(arg_pos)->getType()->isPointerTy())
278 {
281 NodeID obj = pag->addDummyObjNode(llvmModuleSet()->getSVFType(cs->getArgOperand(arg_pos)->getType()));
282 if (vnArg && dummy && obj)
283 {
286 }
287 }
288 else
289 {
290 writeWrnMsg("Arg receiving new object must be pointer type");
291 }
292 }
294 {
295 // Side-effects similar to void *memcpy(void *dest, const void * src, size_t n)
296 // which copies n characters from memory area 'src' to memory area 'dest'.
297 if(callee->getName().find("iconv") != std::string::npos)
298 addComplexConsForExt(cs->getArgOperand(3), cs->getArgOperand(1), nullptr);
299 else if(callee->getName().find("bcopy") != std::string::npos)
300 addComplexConsForExt(cs->getArgOperand(1), cs->getArgOperand(0), cs->getArgOperand(2));
301 if(cs->arg_size() == 3)
302 addComplexConsForExt(cs->getArgOperand(0), cs->getArgOperand(1), cs->getArgOperand(2));
303 else
304 addComplexConsForExt(cs->getArgOperand(0), cs->getArgOperand(1), nullptr);
305 if(SVFUtil::isa<PointerType>(cs->getType()))
306 addCopyEdge(getValueNode(cs->getArgOperand(0)), getValueNode(cs), CopyStmt::COPYVAL);
307 }
309 {
310 // Side-effects similar to memset(void *str, int c, size_t n)
311 // which copies the character c (an unsigned char) to the first n characters of the string pointed to, by the argument str
312 std::vector<AccessPath> dstFields;
313 const Type *dtype = getBaseTypeAndFlattenedFields(cs->getArgOperand(0), dstFields, cs->getArgOperand(2));
314 u32_t sz = dstFields.size();
315 //For each field (i), add store edge *(arg0 + i) = arg1
316 for (u32_t index = 0; index < sz; index++)
317 {
320 dstFields[index].getConstantStructFldIdx());
321 NodeID dField = getGepValVar(cs->getArgOperand(0), dstFields[index], dElementType);
322 addStoreEdge(getValueNode(cs->getArgOperand(1)),dField);
323 }
324 if(SVFUtil::isa<PointerType>(cs->getType()))
325 addCopyEdge(getValueNode(cs->getArgOperand(0)), getValueNode(cs), CopyStmt::COPYVAL);
326 }
327 else if(callee->getName().compare("dlsym") == 0)
328 {
329 /*
330 Side-effects of void* dlsym( void* handle, const char* funName),
331 Locate the function with the name "funName," then add a "copy" edge between the callsite and that function.
332 dlsym() example:
333 int main() {
334 // Open the shared library
335 void* handle = dlopen("./my_shared_library.so", RTLD_LAZY);
336 // Find the function address
337 void (*myFunctionPtr)() = (void (*)())dlsym(handle, "myFunction");
338 // Call the function
339 myFunctionPtr();
340 }
341 */
342 const Value* src = cs->getArgOperand(1);
343 if(const GetElementPtrInst* gep = SVFUtil::dyn_cast<GetElementPtrInst>(src))
344 src = stripConstantCasts(gep->getPointerOperand());
345
346 auto getHookFn = [](const Value* src)->const Function*
347 {
348 if (!SVFUtil::isa<GlobalVariable>(src))
349 return nullptr;
350
351 auto *glob = SVFUtil::cast<GlobalVariable>(src);
352 if (!glob->hasInitializer() || !SVFUtil::isa<ConstantDataArray>(glob->getInitializer()))
353 return nullptr;
354
355 auto *constarray = SVFUtil::cast<ConstantDataArray>(glob->getInitializer());
356 return LLVMUtil::getProgFunction(constarray->getAsCString().str());
357 };
358
359 if (const Function *fn = getHookFn(src))
360 {
363 }
364 }
365 else if(callee->getName().find("_ZSt29_Rb_tree_insert_and_rebalancebPSt18_Rb_tree_node_baseS0_RS_") != std::string::npos)
366 {
367 // The purpose of this function is to insert a new node into the red-black tree and then rebalance the tree to ensure that the red-black tree properties are maintained.
368 assert(cs->arg_size() == 4 && "_Rb_tree_insert_and_rebalance should have 4 arguments.\n");
369
370 // We have vArg3 points to the entry of _Rb_tree_node_base { color; parent; left; right; }.
371 // Now we calculate the offset from base to vArg3
372 NodeID vnArg3 = llvmModuleSet()->getValueNode(cs->getArgOperand(3));
375
376 // We get all flattened fields of base
378
379 // We summarize the side effects: arg3->parent = arg1, arg3->left = arg1, arg3->right = arg1
380 // Note that arg0 is aligned with "offset".
381 for (APOffset i = offset + 1; i <= offset + 3; ++i)
382 {
383 if((u32_t)i >= fields.size())
384 break;
385 const SVFType* elementType = pag->getFlatternedElemType(pag->getTypeLocSetsMap(vnArg3).first,
386 fields[i].getConstantStructFldIdx());
387 NodeID vnD = getGepValVar(cs->getArgOperand(3), fields[i], elementType);
388 NodeID vnS = llvmModuleSet()->getValueNode(cs->getArgOperand(1));
389 if(vnD && vnS)
391 }
392 }
393
395 {
397 if (const FunValVar* funcValVar = SVFUtil::dyn_cast<FunValVar>(valVar))
398 {
403 assert((forkedFun->arg_size() <= 2) && "Size of formal parameter of start routine should be one");
404 if (forkedFun->arg_size() <= 2 && forkedFun->arg_size() >= 1)
405 {
406 const ArgValVar* formalParm = forkedFun->getArg(0);
408 if (actualParm->isPointer() && formalParm->getType()->isPointerTy())
409 {
411 addThreadForkEdge(actualParm->getId(), formalParm->getId(), callICFGNode, entry);
412 }
413 }
414 }
415 else
416 {
421 }
425 }
426
428}
unsigned u32_t
Definition CommandLine.h:18
buffer offset
Definition cJSON.cpp:1113
int index
Definition cJSON.h:170
APOffset getConstantStructFldIdx() const
Get methods.
Definition AccessPath.h:98
Class representing a function argument variable in the SVFIR.
virtual const FunObjVar * getFunction() const
Get containing function, or null for globals/constants.
const FunObjVar * getDefFunForMultipleModule() const
NodeType * getGNode(NodeID id) const
Get a node.
FunEntryICFGNode * getFunEntryICFGNode(const FunObjVar *fun)
Add a function entry node.
Definition ICFG.cpp:242
u32_t getFlattenedElemIdx(const SVFType *T, u32_t origId)
Flattened element idx of an array or struct by considering stride.
Definition IRGraph.cpp:144
u32_t getNumOfFlattenElements(const SVFType *T)
Definition IRGraph.cpp:169
const SVFType * getFlatternedElemType(const SVFType *baseType, u32_t flatten_idx)
Return the type of a flattened element given a flattened index.
Definition IRGraph.cpp:123
const SVFType * getOriginalElemType(const SVFType *baseType, u32_t origId) const
Definition IRGraph.cpp:139
NodeID getValueNode(const Value *V)
Module * getMainLLVMModule() const
Definition LLVMModule.h:366
static LLVMModuleSet * getLLVMModuleSet()
Definition LLVMModule.h:131
void addToSVFVar2LLVMValueMap(const Value *val, SVFValue *svfBaseNode)
SVFType * getSVFType(const Type *T)
Get or create SVFType and typeinfo.
CallICFGNode * getCallICFGNode(const Instruction *cs)
get a call node
NodeID getObjectNode(const Value *V)
LLVMContext & getContext() const
Definition LLVMModule.h:381
ObjTypeInference * getTypeInference()
const Type * inferObjType(const Value *var)
get or infer the type of the object pointed by the value
SVFIR * getPAG() const
Return SVFIR.
void addStoreEdge(NodeID src, NodeID dst)
Add Store edge.
void addLoadEdge(NodeID src, NodeID dst)
Add Load edge.
virtual void handleExtCall(const CallBase *cs, const Function *callee)
AddrStmt * addAddrWithHeapSz(NodeID src, NodeID dst, const CallBase *cs)
Add Address edge from ext call with args like "%5 = call i8* @malloc(i64 noundef 5)".
LLVMModuleSet * llvmModuleSet()
void addThreadForkEdge(NodeID src, NodeID dst, const CallICFGNode *cs, const FunEntryICFGNode *entry)
Add Thread fork edge for parameter passing.
AccessPath getAccessPathFromBaseNode(NodeID nodeId)
const Value * getBaseValueForExtArg(const Value *V)
Get the base value of (i8* src and i8* dst) for external argument (e.g. memcpy(i8* dst,...
virtual const Type * getBaseTypeAndFlattenedFields(const Value *V, std::vector< AccessPath > &fields, const Value *szValue)
Handle external call.
CopyStmt * addCopyEdge(NodeID src, NodeID dst, CopyStmt::CopyKind kind)
NodeID getValueNode(const Value *V)
Get different kinds of node.
virtual void addComplexConsForExt(Value *D, Value *S, const Value *sz)
NodeID getGepValVar(const Value *val, const AccessPath &ap, const SVFType *elementType)
NodeID addConstantIntValNode(NodeID i, const std::pair< s64_t, u64_t > &intValue, const ICFGNode *icfgNode, const SVFType *type)
Definition SVFIR.h:689
ICFG * getICFG() const
Definition SVFIR.h:229
NodeID addDummyValNode()
Definition SVFIR.h:564
static SVFIR * getPAG(bool buildFromFile=false)
Singleton design here to make sure we only have one instance during any analysis.
Definition SVFIR.h:118
SVFTypeLocSetsPair & getTypeLocSetsMap(NodeID argId)
Given an arg NodeId, get its base SVFType* and all its field location sets.
Definition SVFIR.h:339
NodeID addDummyObjNode(const SVFType *type)
Definition SVFIR.h:568
const Function * getProgFunction(const std::string &funName)
Get program entry function from module.
Definition LLVMUtil.cpp:40
const Value * stripConstantCasts(const Value *val)
Strip off the constant casts.
Definition LLVMUtil.cpp:219
bool isHeapAllocExtCallViaRet(const Instruction *inst)
Definition LLVMUtil.cpp:638
bool isMemcpyExtFun(const Function *fun)
Definition LLVMUtil.cpp:389
std::pair< s64_t, u64_t > getIntegerValue(const ConstantInt *intValue)
Definition LLVMUtil.h:83
bool isConstDataOrAggData(const Value *val)
Return true if the value refers to constant data, e.g., i32 0.
Definition LLVMUtil.h:374
bool isHeapAllocExtCallViaArg(const Instruction *inst)
Definition LLVMUtil.cpp:653
bool isMemsetExtFun(const Function *fun)
Definition LLVMUtil.cpp:395
bool isObject(const Value *ref)
Return true if this value refers to a object.
Definition LLVMUtil.cpp:60
u32_t getHeapAllocHoldingArgPosition(const Function *fun)
Definition LLVMUtil.cpp:401
bool isThreadForkCall(const CallICFGNode *inst)
Definition SVFUtil.h:360
void writeWrnMsg(const std::string &msg)
Writes a message run through wrnMsg.
Definition SVFUtil.cpp:68
const ValVar * getActualParmAtForkSite(const CallICFGNode *cs)
Return sole argument of the thread routine.
Definition SVFUtil.h:408
const ValVar * getForkedFun(const CallICFGNode *inst)
Return thread fork function.
Definition SVFUtil.h:331
for isBitcode
Definition BasicTypes.h:70
llvm::DataLayout DataLayout
Definition BasicTypes.h:112
llvm::Type Type
Definition BasicTypes.h:87
llvm::CallBase CallBase
Definition BasicTypes.h:153
llvm::StructType StructType
LLVM types.
Definition BasicTypes.h:98
u32_t NodeID
Definition GeneralType.h:56
llvm::StructLayout StructLayout
Definition BasicTypes.h:109
s64_t APOffset
Definition GeneralType.h:60
llvm::Function Function
Definition BasicTypes.h:89
llvm::Value Value
LLVM Basic classes.
Definition BasicTypes.h:86
llvm::IRBuilder IRBuilder
Definition BasicTypes.h:76
llvm::GetElementPtrInst GetElementPtrInst
Definition BasicTypes.h:169
unsigned u32_t
Definition GeneralType.h:47
llvm::ConstantInt ConstantInt
Definition BasicTypes.h:129
llvm::LLVMContext LLVMContext
Definition BasicTypes.h:72