Static Value-Flow Analysis
Loading...
Searching...
No Matches
SVFIRExtAPI.cpp
Go to the documentation of this file.
1//===- SVFIRExtAPI.cpp -- External function IR of SVF ---------------------------------------------//
2//
3// SVF: Static Value-Flow Analysis
4//
5// Copyright (C) <2013-> <Yulei Sui>
6//
7
8// This program is free software: you can redistribute it and/or modify
9// it under the terms of the GNU Affero General Public License as published by
10// the Free Software Foundation, either version 3 of the License, or
11// (at your option) any later version.
12
13// This program is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16// GNU Affero General Public License for more details.
17
18// You should have received a copy of the GNU Affero General Public License
19// along with this program. If not, see <http://www.gnu.org/licenses/>.
20//
21//===----------------------------------------------------------------------===//
22
23/*
24 * SVFIRExtAPI.cpp
25 *
26 * Created on: 18, 5, 2023
27 * Author: Shuangxiang Kan
28 */
29
31#include "Util/SVFUtil.h"
34#include "Graphs/CallGraph.h"
35#include "Util/ExtAPI.h"
36
37using namespace std;
38using namespace SVF;
39using namespace SVFUtil;
40using namespace LLVMUtil;
41
42namespace
43{
44
45struct MemcpyField
46{
47 APOffset byteOffset;
48 AccessPath accessPath;
49 const SVFType* elementType;
50};
51
53{
54 const std::string prefix = "STORE_TOP:Arg";
55 const size_t start = annotation.find(prefix);
56 if (start == std::string::npos)
57 return false;
58
59 size_t idx = start + prefix.size();
60 if (idx >= annotation.size() || annotation[idx] < '0' || annotation[idx] > '9')
61 return false;
62
63 firstArg = 0;
64 do
65 {
66 firstArg = firstArg * 10 + static_cast<u32_t>(annotation[idx] - '0');
67 ++idx;
68 }
69 while (idx < annotation.size() && annotation[idx] >= '0' && annotation[idx] <= '9');
70
71 return idx < annotation.size() && annotation[idx] == '+';
72}
73
75{
76 const FunObjVar* extFun = callICFGNode->getCalledFunction();
77 if (extFun == nullptr)
78 return false;
79
80 for (const std::string& annotation :
81 ExtAPI::getExtAPI()->getExtFuncAnnotations(extFun))
82 {
83 u32_t firstArg = 0;
85 return true;
86 }
87 return false;
88}
89
91 const Type* llvmType,
92 const SVFType* svfType,
93 const DataLayout& dl,
94 IRGraph* pag,
95 std::vector<MemcpyField>& fields,
98{
99 if (llvmType == nullptr || svfType == nullptr)
100 return;
101
102 if (svfType->isPointerTy())
103 {
104 fields.push_back({baseByteOffset, AccessPath(baseFldIdx), svfType});
105 return;
106 }
107
108 if (const auto* structType = SVFUtil::dyn_cast<StructType>(llvmType))
109 {
110 const StructLayout* layout = dl.getStructLayout(const_cast<StructType*>(structType));
111 for (u32_t i = 0; i < structType->getNumElements(); ++i)
112 {
113 const Type* elemLLVMType = structType->getElementType(i);
115 if (elemSVFType == nullptr)
116 return;
117 APOffset elemByteOffset = baseByteOffset + static_cast<APOffset>(layout->getElementOffset(i));
120 }
121 return;
122 }
123
124 if (const auto* arrayType = SVFUtil::dyn_cast<ArrayType>(llvmType))
125 {
126 const Type* elemLLVMType = arrayType->getElementType();
128 if (elemSVFType == nullptr)
129 return;
130 const APOffset elemByteSize = static_cast<APOffset>(dl.getTypeAllocSize(const_cast<Type*>(elemLLVMType)));
131 for (u32_t i = 0; i < arrayType->getNumElements(); ++i)
132 {
136 }
137 }
138}
139
140std::vector<MemcpyField> getMemcpyFields(const Value* value, const Type* llvmType, const SVFType* svfType)
141{
142 std::vector<MemcpyField> fields;
144 auto* pag = PAG::getPAG();
145 const DataLayout& dl = mset->getMainLLVMModule()->getDataLayout();
146 collectMemcpyFields(llvmType, svfType, dl, pag, fields);
147 return fields;
148}
149
151{
152 if (const auto* allocaInst = llvm::dyn_cast_or_null<AllocaInst>(baseValue))
153 return allocaInst->getAllocatedType();
154
155 if (const auto* global = llvm::dyn_cast_or_null<GlobalVariable>(baseValue))
156 return global->getValueType();
157
158 return fallbackType;
159}
160
161}
162
166const Type* SVFIRBuilder::getBaseTypeAndFlattenedFields(const Value* V, std::vector<AccessPath> &fields, const Value* szValue)
167{
168 assert(V);
169 const Value* value = getBaseValueForExtArg(V);
173 if(szValue && SVFUtil::isa<ConstantInt>(szValue))
174 {
175 auto szIntVal = LLVMUtil::getIntegerValue(SVFUtil::cast<ConstantInt>(szValue));
176 numOfElems = (numOfElems > szIntVal.first) ? szIntVal.first : numOfElems;
177 }
178
180 for(u32_t ei = 0; ei < numOfElems; ei++)
181 {
183 // make a ConstantInt and create char for the content type due to byte-wise copy
184 const ConstantInt* offset = ConstantInt::get(context, llvm::APInt(32, ei));
185 if (!llvmModuleSet()->hasValueNode(offset))
186 {
188 builder.collectSym(offset);
190 pag->addConstantIntValNode(id, LLVMUtil::getIntegerValue(offset), nullptr, llvmModuleSet()->getSVFType(offset->getType()));
192 pag->getGNode(id));
193 }
194 ls.addOffsetVarAndGepTypePair(getPAG()->getValVar(llvmModuleSet()->getValueNode(offset)), nullptr);
195 fields.push_back(ls);
196 }
197 return objType;
198}
199
205{
206 assert(D && S);
208 if(!vnD || !vnS)
209 return;
210
211 std::vector<AccessPath> fields;
212
213 //Get the max possible size of the copy, unless it was provided.
214 std::vector<AccessPath> srcFields;
215 std::vector<AccessPath> dstFields;
218 if(srcFields.size() > dstFields.size())
219 fields = dstFields;
220 else
221 fields = srcFields;
222
224 u32_t sz = fields.size();
225
226 if (fields.size() == 1 && (LLVMUtil::isConstDataOrAggData(D) || LLVMUtil::isConstDataOrAggData(S)))
227 {
231 return;
232 }
233
240 const bool hasRemappedGlobalBase =
241 (dstFieldBase != D && SVFUtil::isa<GlobalVariable>(dstFieldBase)) ||
242 (srcFieldBase != S && SVFUtil::isa<GlobalVariable>(srcFieldBase));
243 const bool useByteLayoutMemcpy =
246 {
249 std::vector<MemcpyField> dstMemcpyFields = getMemcpyFields(D, dstLayoutType, dstSVFType);
250 std::vector<MemcpyField> srcMemcpyFields = getMemcpyFields(S, srcLayoutType, srcSVFType);
251 if (!dstMemcpyFields.empty() && !srcMemcpyFields.empty())
252 {
253 std::unordered_map<APOffset, MemcpyField> srcFieldsByByteOffset;
254 for (const auto& field : srcMemcpyFields)
255 srcFieldsByByteOffset.emplace(field.byteOffset, field);
256
257 const DataLayout& dl = llvmModuleSet()->getMainLLVMModule()->getDataLayout();
258 APOffset copyBytes = std::min<APOffset>(
259 static_cast<APOffset>(dl.getTypeAllocSize(const_cast<Type*>(dstLayoutType))),
260 static_cast<APOffset>(dl.getTypeAllocSize(const_cast<Type*>(srcLayoutType))));
261 if (szValue && SVFUtil::isa<ConstantInt>(szValue))
262 {
263 auto szIntVal = LLVMUtil::getIntegerValue(SVFUtil::cast<ConstantInt>(szValue));
264 copyBytes = std::min(copyBytes, static_cast<APOffset>(szIntVal.first));
265 }
266
267 for (const auto& dstField : dstMemcpyFields)
268 {
269 if (dstField.byteOffset >= copyBytes)
270 continue;
271 auto it = srcFieldsByByteOffset.find(dstField.byteOffset);
272 if (it == srcFieldsByByteOffset.end())
273 continue;
274
275 NodeID dField = getGepValVar(dstFieldBase, dstField.accessPath, dstField.elementType);
276 NodeID sField = getGepValVar(srcFieldBase, it->second.accessPath, it->second.elementType);
280 }
281 return;
282 }
283 }
284
285 //For each field (i), add (Ti = *S + i) and (*D + i = Ti).
286 for (u32_t index = 0; index < sz; index++)
287 {
290 fields[index].getConstantStructFldIdx());
292 fields[index].getConstantStructFldIdx());
298 }
299}
300
302{
304 const FunObjVar* extFun = callICFGNode->getCalledFunction();
305 if (extFun)
306 {
307 for (const std::string& annotation :
309 {
310 u32_t firstArg = 0;
312 continue;
313 if (firstArg >= cs->arg_size())
314 continue;
315
316 for (u32_t argIdx = firstArg; argIdx < cs->arg_size(); ++argIdx)
317 storeTopArgs.insert(argIdx);
318 }
319 }
320
322 {
323 const Value* arg = cs->getArgOperand(argIdx);
324 if (!arg->getType()->isPointerTy())
325 continue;
326
327 const Type* storedType =
329 NodeID src = pag->getBlkPtr();
330 NodeID dst = getValueNode(arg);
332 dst = fieldZero;
333 if (src && dst)
334 addStoreEdge(src, dst);
335 }
336}
337
339{
341
343 {
345 }
347 {
351 }
353 {
355 Value* arg = cs->getArgOperand(arg_pos);
356 if (cs->getArgOperand(arg_pos)->getType()->isPointerTy())
357 {
360 NodeID obj = pag->addDummyObjNode(llvmModuleSet()->getSVFType(cs->getArgOperand(arg_pos)->getType()));
361 if (vnArg && dummy && obj)
362 {
365 }
366 }
367 else
368 {
369 writeWrnMsg("Arg receiving new object must be pointer type");
370 }
371 }
373 {
374 // Side-effects similar to void *memcpy(void *dest, const void * src, size_t n)
375 // which copies n characters from memory area 'src' to memory area 'dest'.
376 if(callee->getName().find("iconv") != std::string::npos)
377 addComplexConsForExt(cs->getArgOperand(3), cs->getArgOperand(1), nullptr);
378 else if(callee->getName().find("bcopy") != std::string::npos)
379 addComplexConsForExt(cs->getArgOperand(1), cs->getArgOperand(0), cs->getArgOperand(2));
380 if(cs->arg_size() == 3)
381 addComplexConsForExt(cs->getArgOperand(0), cs->getArgOperand(1), cs->getArgOperand(2));
382 else
383 addComplexConsForExt(cs->getArgOperand(0), cs->getArgOperand(1), nullptr);
384 if(SVFUtil::isa<PointerType>(cs->getType()))
385 addCopyEdge(getValueNode(cs->getArgOperand(0)), getValueNode(cs), CopyStmt::COPYVAL);
386 }
388 {
389 // Side-effects similar to memset(void *str, int c, size_t n)
390 // which copies the character c (an unsigned char) to the first n characters of the string pointed to, by the argument str
391 std::vector<AccessPath> dstFields;
392 const Type *dtype = getBaseTypeAndFlattenedFields(cs->getArgOperand(0), dstFields, cs->getArgOperand(2));
393 u32_t sz = dstFields.size();
394 //For each field (i), add store edge *(arg0 + i) = arg1
395 for (u32_t index = 0; index < sz; index++)
396 {
399 dstFields[index].getConstantStructFldIdx());
400 NodeID dField = getGepValVar(cs->getArgOperand(0), dstFields[index], dElementType);
401 addStoreEdge(getValueNode(cs->getArgOperand(1)),dField);
402 }
403 if(SVFUtil::isa<PointerType>(cs->getType()))
404 addCopyEdge(getValueNode(cs->getArgOperand(0)), getValueNode(cs), CopyStmt::COPYVAL);
405 }
406 else if(callee->getName().compare("dlsym") == 0)
407 {
408 /*
409 Side-effects of void* dlsym( void* handle, const char* funName),
410 Locate the function with the name "funName," then add a "copy" edge between the callsite and that function.
411 dlsym() example:
412 int main() {
413 // Open the shared library
414 void* handle = dlopen("./my_shared_library.so", RTLD_LAZY);
415 // Find the function address
416 void (*myFunctionPtr)() = (void (*)())dlsym(handle, "myFunction");
417 // Call the function
418 myFunctionPtr();
419 }
420 */
421 const Value* src = cs->getArgOperand(1);
422 if(const GetElementPtrInst* gep = SVFUtil::dyn_cast<GetElementPtrInst>(src))
423 src = stripConstantCasts(gep->getPointerOperand());
424
425 auto getHookFn = [](const Value* src)->const Function*
426 {
427 if (!SVFUtil::isa<GlobalVariable>(src))
428 return nullptr;
429
430 auto *glob = SVFUtil::cast<GlobalVariable>(src);
431 if (!glob->hasInitializer() || !SVFUtil::isa<ConstantDataArray>(glob->getInitializer()))
432 return nullptr;
433
434 auto *constarray = SVFUtil::cast<ConstantDataArray>(glob->getInitializer());
435 return LLVMUtil::getProgFunction(constarray->getAsCString().str());
436 };
437
438 if (const Function *fn = getHookFn(src))
439 {
442 }
443 }
444 else if(callee->getName().find("_ZSt29_Rb_tree_insert_and_rebalancebPSt18_Rb_tree_node_baseS0_RS_") != std::string::npos)
445 {
446 // The purpose of this function is to insert a new node into the red-black tree and then rebalance the tree to ensure that the red-black tree properties are maintained.
447 assert(cs->arg_size() == 4 && "_Rb_tree_insert_and_rebalance should have 4 arguments.\n");
448
449 // We have vArg3 points to the entry of _Rb_tree_node_base { color; parent; left; right; }.
450 // Now we calculate the offset from base to vArg3
451 NodeID vnArg3 = llvmModuleSet()->getValueNode(cs->getArgOperand(3));
454
455 // We get all flattened fields of base
457
458 // We summarize the side effects: arg3->parent = arg1, arg3->left = arg1, arg3->right = arg1
459 // Note that arg0 is aligned with "offset".
460 for (APOffset i = offset + 1; i <= offset + 3; ++i)
461 {
462 if((u32_t)i >= fields.size())
463 break;
464 const SVFType* elementType = pag->getFlatternedElemType(pag->getTypeLocSetsMap(vnArg3).first,
465 fields[i].getConstantStructFldIdx());
466 NodeID vnD = getGepValVar(cs->getArgOperand(3), fields[i], elementType);
467 NodeID vnS = llvmModuleSet()->getValueNode(cs->getArgOperand(1));
468 if(vnD && vnS)
470 }
471 }
472
474 {
476 if (const FunValVar* funcValVar = SVFUtil::dyn_cast<FunValVar>(valVar))
477 {
482 assert((forkedFun->arg_size() <= 2) && "Size of formal parameter of start routine should be one");
483 if (forkedFun->arg_size() <= 2 && forkedFun->arg_size() >= 1)
484 {
485 const ArgValVar* formalParm = forkedFun->getArg(0);
487 if (actualParm->isPointer() && formalParm->getType()->isPointerTy())
488 {
490 addThreadForkEdge(actualParm->getId(), formalParm->getId(), callICFGNode, entry);
491 }
492 }
493 }
494 else
495 {
500 }
504 }
505
507}
unsigned u32_t
Definition CommandLine.h:18
buffer offset
Definition cJSON.cpp:1113
int index
Definition cJSON.h:170
APOffset getConstantStructFldIdx() const
Get methods.
Definition AccessPath.h:98
Class representing a function argument variable in the SVFIR.
static ExtAPI * getExtAPI()
Definition ExtAPI.cpp:44
const std::vector< std::string > & getExtFuncAnnotations(const FunObjVar *fun)
Definition ExtAPI.cpp:256
virtual const FunObjVar * getFunction() const
Get containing function, or null for globals/constants.
const FunObjVar * getDefFunForMultipleModule() const
NodeType * getGNode(NodeID id) const
Get a node.
FunEntryICFGNode * getFunEntryICFGNode(const FunObjVar *fun)
Add a function entry node.
Definition ICFG.cpp:242
u32_t getFlattenedElemIdx(const SVFType *T, u32_t origId)
Flattened element idx of an array or struct by considering stride.
Definition IRGraph.cpp:144
u32_t getNumOfFlattenElements(const SVFType *T)
Definition IRGraph.cpp:169
NodeID getBlkPtr() const
Definition IRGraph.h:255
const SVFType * getFlatternedElemType(const SVFType *baseType, u32_t flatten_idx)
Return the type of a flattened element given a flattened index.
Definition IRGraph.cpp:123
const SVFType * getOriginalElemType(const SVFType *baseType, u32_t origId) const
Definition IRGraph.cpp:139
NodeID getValueNode(const Value *V)
Module * getMainLLVMModule() const
Definition LLVMModule.h:366
static LLVMModuleSet * getLLVMModuleSet()
Definition LLVMModule.h:131
void addToSVFVar2LLVMValueMap(const Value *val, SVFValue *svfBaseNode)
SVFType * getSVFType(const Type *T)
Get or create SVFType and typeinfo.
CallICFGNode * getCallICFGNode(const Instruction *cs)
get a call node
NodeID getObjectNode(const Value *V)
LLVMContext & getContext() const
Definition LLVMModule.h:381
ObjTypeInference * getTypeInference()
const Type * inferObjType(const Value *var)
get or infer the type of the object pointed by the value
SVFIR * getPAG() const
Return SVFIR.
void addStoreEdge(NodeID src, NodeID dst)
Add Store edge.
void addLoadEdge(NodeID src, NodeID dst)
Add Load edge.
virtual void handleExtCall(const CallBase *cs, const Function *callee)
AddrStmt * addAddrWithHeapSz(NodeID src, NodeID dst, const CallBase *cs)
Add Address edge from ext call with args like "%5 = call i8* @malloc(i64 noundef 5)".
LLVMModuleSet * llvmModuleSet()
void addThreadForkEdge(NodeID src, NodeID dst, const CallICFGNode *cs, const FunEntryICFGNode *entry)
Add Thread fork edge for parameter passing.
AccessPath getAccessPathFromBaseNode(NodeID nodeId)
const Value * getBaseValueForExtArg(const Value *V)
Get the base value of (i8* src and i8* dst) for external argument (e.g. memcpy(i8* dst,...
virtual void handleNondetArgStoreAtExtCall(const CallBase *cs, const CallICFGNode *callICFGNode)
NodeID getDirectAccessFieldZeroValVar(const Value *ptr, const Type *accessTy)
virtual const Type * getBaseTypeAndFlattenedFields(const Value *V, std::vector< AccessPath > &fields, const Value *szValue)
Handle external call.
CopyStmt * addCopyEdge(NodeID src, NodeID dst, CopyStmt::CopyKind kind)
NodeID getValueNode(const Value *V)
Get different kinds of node.
virtual void addComplexConsForExt(Value *D, Value *S, const Value *sz)
NodeID getGepValVar(const Value *val, const AccessPath &ap, const SVFType *elementType)
NodeID addConstantIntValNode(NodeID i, const std::pair< s64_t, u64_t > &intValue, const ICFGNode *icfgNode, const SVFType *type)
Definition SVFIR.h:689
ICFG * getICFG() const
Definition SVFIR.h:229
NodeID addDummyValNode()
Definition SVFIR.h:564
static SVFIR * getPAG(bool buildFromFile=false)
Singleton design here to make sure we only have one instance during any analysis.
Definition SVFIR.h:118
SVFTypeLocSetsPair & getTypeLocSetsMap(NodeID argId)
Given an arg NodeId, get its base SVFType* and all its field location sets.
Definition SVFIR.h:339
NodeID addDummyObjNode(const SVFType *type)
Definition SVFIR.h:568
const Function * getProgFunction(const std::string &funName)
Get program entry function from module.
Definition LLVMUtil.cpp:40
const Value * stripConstantCasts(const Value *val)
Strip off the constant casts.
Definition LLVMUtil.cpp:219
bool isHeapAllocExtCallViaRet(const Instruction *inst)
Definition LLVMUtil.cpp:638
bool isMemcpyExtFun(const Function *fun)
Definition LLVMUtil.cpp:389
std::pair< s64_t, u64_t > getIntegerValue(const ConstantInt *intValue)
Definition LLVMUtil.h:83
bool isConstDataOrAggData(const Value *val)
Return true if the value refers to constant data, e.g., i32 0.
Definition LLVMUtil.h:374
bool isHeapAllocExtCallViaArg(const Instruction *inst)
Definition LLVMUtil.cpp:653
bool isMemsetExtFun(const Function *fun)
Definition LLVMUtil.cpp:395
bool isObject(const Value *ref)
Return true if this value refers to a object.
Definition LLVMUtil.cpp:60
u32_t getHeapAllocHoldingArgPosition(const Function *fun)
Definition LLVMUtil.cpp:401
bool isThreadForkCall(const CallICFGNode *inst)
Definition SVFUtil.h:360
void writeWrnMsg(const std::string &msg)
Writes a message run through wrnMsg.
Definition SVFUtil.cpp:68
const ValVar * getActualParmAtForkSite(const CallICFGNode *cs)
Return sole argument of the thread routine.
Definition SVFUtil.h:408
const ValVar * getForkedFun(const CallICFGNode *inst)
Return thread fork function.
Definition SVFUtil.h:331
for isBitcode
Definition BasicTypes.h:70
llvm::DataLayout DataLayout
Definition BasicTypes.h:112
llvm::Type Type
Definition BasicTypes.h:87
llvm::CallBase CallBase
Definition BasicTypes.h:153
llvm::StructType StructType
LLVM types.
Definition BasicTypes.h:98
u32_t NodeID
Definition GeneralType.h:56
llvm::StructLayout StructLayout
Definition BasicTypes.h:109
s64_t APOffset
Definition GeneralType.h:60
llvm::Function Function
Definition BasicTypes.h:89
llvm::Value Value
LLVM Basic classes.
Definition BasicTypes.h:86
llvm::IRBuilder IRBuilder
Definition BasicTypes.h:76
llvm::GetElementPtrInst GetElementPtrInst
Definition BasicTypes.h:169
unsigned u32_t
Definition GeneralType.h:47
llvm::ConstantInt ConstantInt
Definition BasicTypes.h:129
llvm::LLVMContext LLVMContext
Definition BasicTypes.h:72