Static Value-Flow Analysis
Loading...
Searching...
No Matches
CFGNormalizer.cpp
Go to the documentation of this file.
1//===----- CFGNormalizer.cpp -- Context Free Grammar Normalizer--------------//
2//
3// SVF: Static Value-Flow Analysis
4//
5// Copyright (C) <2013-> <Yulei Sui>
6//
7
8// This program is free software: you can redistribute it and/or modify
9// it under the terms of the GNU Affero General Public License as published by
10// the Free Software Foundation, either version 3 of the License, or
11// (at your option) any later version.
12
13// This program is distributed in the hope that it will be useful,
14// but WITHOUT ANY WARRANTY; without even the implied warranty of
15// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16// GNU Affero General Public License for more details.
17
18// You should have received a copy of the GNU Affero General Public License
19// along with this program. If not, see <http://www.gnu.org/licenses/>.
20//
21//===----------------------------------------------------------------------===//
22
23/*
24 * CFGNormalizer.cpp
25 *
26 * Created on: April 19 , 2022
27 * Author: Pei Xu
28 */
29
30#include "CFL/CFGNormalizer.h"
31#include "Util/SVFUtil.h"
32#include "Util/WorkList.h"
33#include "SVFIR/SVFValue.h"
34#include <string>
35#include <fstream>
36#include <sstream>
37#include <iostream>
38
39using namespace SVF;
40
42{
43 CFGrammar *grammar = new CFGrammar();
44 grammar->setStartKind(generalGrammar->getStartKind());
45 grammar->setTerminals(generalGrammar->getTerminals());
46 grammar->setNonterminals(generalGrammar->getNonterminals());
47 grammar->setEBNFSigns(generalGrammar->getEBNFSigns());
48 grammar->setTotalKind(generalGrammar->getTotalKind());
49 grammar->setAttributeKinds(generalGrammar->getAttrSyms());
50 grammar->setKindToAttrsMap(generalGrammar->getKindToAttrsMap());
51 grammar->setRawProductions(generalGrammar->getRawProductions());
52 barReplace(grammar);
53 ebnfSignReplace('*', grammar);
54 ebnfSignReplace('?', grammar);
55 ebnf_bin(grammar);
56 fillAttribute(grammar, grammar->getKindToAttrsMap());
57 return grammar;
58}
59
60
62{
63 NodeSet nodeSet = {};
64 for (auto pair: kindToAttrsMap)
65 {
66 for (auto attri: pair.second)
67 {
68 nodeSet.insert(attri);
69 }
70 }
71 for(auto symProdsPair: grammar->getRawProductions())
72 {
73 for(auto prod: symProdsPair.second)
74 {
78 tempP.insert(tempP.begin(), symProdsPair.first);
80 getFilledProductions(tempP, nodeSet, grammar, normalProds);
81 for (auto filledProd : normalProds)
82 {
84 }
85 }
86 }
87
88 return grammar;
89}
90
92{
94 std::string tempStr = "";
95 removeFirstSymbol(grammar);
96
97 auto rawProductions = grammar->getRawProductions();
98
99 for(auto itr : rawProductions)
100 {
101 auto head = *(grammar->getRawProductions().find(itr.first));
102 for(auto rule: head.second)
103 {
104 if (rule.size() < 3) continue;
105
107 GrammarBase::Production long_run(rule.begin() + 1, rule.end());
108 auto it = grammar->getRawProductions()[head.first].find(rule);
109 grammar->getRawProductions()[head.first].erase(it);
111 if (X == u32_t(-1))
112 {
113 X = check_head(grammar->getRawProductions(), long_run);
114 }
115 if ((X == u32_t(-1)) == false)
116 {
117 rule = {first, X};
118 grammar->getRawProductions()[head.first].insert(rule);
119 }
120 else
121 {
122 tempStr = "X";
123 std::ostringstream ss;
124 ss << grammar->num_generator();
125 tempStr.append(ss.str());
131 for (unsigned i = 0; i < long_run.size(); i++)
132 {
133 GrammarBase::VariableAttribute variableAttribute = long_run[i].variableAttribute;
134 if ( variableAttribute != 0)
135 {
136 variableAttributeSet.insert(variableAttribute);
137 }
138 }
139 if ( variableAttributeSet.size() == 1)
140 {
141 tempStr += "_";
142 tempStr += char(*variableAttributeSet.begin());
143 }
145 rule = {first, tempSym};
146 grammar->getRawProductions()[head.first].insert(rule);
147 X = tempSym;
148 }
149 new_grammar[X] = {};
152 if (long_run.size() ==2)
153 {
154 new_grammar[X].insert(temp_p);
155 long_run.clear();
156 }
157 else
158 {
159 new_grammar[X].insert(long_run);
160 RHX = X;
161 }
162 while (long_run.size() > 2)
163 {
164 first = long_run[0];
166 long_run.erase(long_run.begin());
167
168 X = RHX;
170
172 if (RHX == u32_t(-1))
173 {
175 }
176 if(RHX == u32_t(-1))
177 {
178 tempStr = "X";
179 std::ostringstream ss;
180 ss << grammar->num_generator();
181 tempStr.append(ss.str());
183 for (unsigned i = 0; i < long_run.size(); i++)
184 {
185 GrammarBase::VariableAttribute variableAttribute = long_run[i].variableAttribute;
186 if ( variableAttribute != 0)
187 {
188 variableAttributeSet.insert(variableAttribute);
189 }
190 }
191 if ( variableAttributeSet.size() == 1)
192 {
193 tempStr += "_";
194 tempStr += char(*variableAttributeSet.begin());
195 }
197 auto it = new_grammar[X].find(prev_rule);
198 new_grammar[X].erase(it);
199 new_grammar[X].insert({first, tempSym});
201 RHX = tempSym;
202 }
203 }
204 }
205 }
206 for (auto new_head : new_grammar)
207 {
208 for (auto prod : new_head.second)
209 {
210 auto it = grammar->getRawProductions()[new_head.first].find(prod);
211 if (it == grammar->getRawProductions()[new_head.first].end())
212 {
213 grammar->getRawProductions()[new_head.first].insert(prod);
214 }
215 }
216 }
217}
218
224{
225 normalProds.clear();
227 worklist.push(prod);
228 while( worklist.empty() == false )
229 {
233 // GrammarBase::Kind baseKind;
235 {
236 if ( currentVariableAttribute == 0 )
237 {
238 currentVariableAttribute = symbol.variableAttribute;
239 // baseKind = symbol.kind;
240 }
241 }
242 if ( currentVariableAttribute == 0)
243 {
245 continue;
246 }
247 //*(kindToAttriMap.find(baseKind));
248 //for (auto attribute : nodeSet.second)
249 for (auto attribute : nodeSet)
250 {
253 {
254 if ( symbol.variableAttribute == currentVariableAttribute)
255 {
256 symbol.attribute = attribute;
257 symbol.variableAttribute = 0;
258 }
259 }
261 bool continueToFill = false;
263 {
264 if ( symbol.variableAttribute != 0 )
265 {
266 continueToFill = true;
267 }
268 }
269 if ( continueToFill == false)
270 {
272 }
273 else
274 {
275 worklist.push(fillingProduction);
276 }
277 }
278 }
279}
280
282{
283 int index = i;
284 while (index >= 0)
285 {
286 if (grammar->kindToStr(prod[index].kind) == "(")
287 {
288 return index;
289 }
290 index--;
291 }
292 return 0;
293}
294
296{
297 for (auto &symbolToProductionsPair : grammar->getRawProductions())
298 {
300 //GrammarBase::Productions Originalproductions = symbolToProductionsPair.second;
301 for (auto ebnfProduction : symbolToProductionsPair.second)
302 {
303 size_t i = 1;
304 size_t j = 1;
305 while (i < ebnfProduction.size())
306 {
307 if (grammar->kindToStr(ebnfProduction[i].kind) == "|")
308 {
310 tempPro.insert(tempPro.begin(), symbolToProductionsPair.first );
311 productions.insert(tempPro);
312 j = i+1;
313 }
314 i++;
315 }
317 tempPro.insert(tempPro.begin(), symbolToProductionsPair.first );
318 productions.insert(tempPro);
319 }
320 symbolToProductionsPair.second.clear();
322 }
323}
324
326{
330 std::string tempNonterminal = "X";
331
332 for (auto &symbolToProductionsPair : grammar->getRawProductions())
333 {
335 for (auto ebnfProduction : symbolToProductionsPair.second)
336 {
337 size_t i = 1;
338 while (i < ebnfProduction.size())
339 {
341 if (grammar->kindToStr(ebnfProduction[i].kind) == std::string(1, sign))
342 {
344 assert(i != 1 && "sign in grammar associate with no symbol");
345 if (grammar->kindToStr(ebnfProduction[i - 1].kind) != std::string(1, ')'))
346 {
347 signGroupStart = i - 1;
348 }
350 else
351 {
353 }
354 std::string groupString = "";
355 for (size_t j = signGroupStart; j < i; j++)
356 {
357 groupString.append(grammar->kindToStr(ebnfProduction[j].kind));
358 groupString.append(" ");
359 }
360 groupString.append(grammar->kindToStr(ebnfProduction[i].kind));
361 if (newProductions.find(groupString) != newProductions.end())
362 {
364 ebnfProduction.erase(ebnfProduction.begin() + signGroupStart, ebnfProduction.begin() + i + 1);
367 }
368 else if ( (signGroupStart == 1) && (i == ebnfProduction.size() -1))
369 {
372 ebnfProduction.erase(ebnfProduction.begin() + signGroupStart, ebnfProduction.begin() + i + 1);
373
374 }
375 else
376 {
377 tempNonterminal = "X";
378 std::ostringstream ss;
379 ss << grammar->num_generator();
380 tempNonterminal.append(ss.str());
383 ebnfProduction.erase(ebnfProduction.begin() + signGroupStart, ebnfProduction.begin() + i + 1);
387 }
388
390 }
391 i++;
392 }
393 }
395 }
396 for(auto rep: newProductions)
397 {
399 std::string new_nonterminal = rep.second;
401 grammar->getRawProductions()[grammar->strToSymbol(new_nonterminal)].insert(temp_list);
404 if (sign == '*' || sign == '?')
405 {
408 strTrans(rep.first, grammar, normalProd);
410 if (sign == '*')
411 {
412 for (auto &word : normalProd)
413 {
414 if (word != grammar->strToSymbol("*") && word != grammar->strToSymbol("(") && word != grammar->strToSymbol(")"))
415 {
416 withoutSign.push_back(word);
417 }
418 }
419 withoutSign.push_back(grammar->strToSymbol(rep.second));
420 }
421 if (sign == '?')
422 {
423 for (auto &word : normalProd)
424 {
425 if (word != grammar->strToSymbol("?") && word != grammar->strToSymbol("(") && word != grammar->strToSymbol(")"))
426 {
427 withoutSign.push_back(word);
428 }
429 }
430 }
431 temp_list.insert(temp_list.end(), withoutSign.begin(), withoutSign.end());
432 }
433 grammar->getRawProductions()[grammar->strToSymbol(new_nonterminal)].insert(temp_list);
434 }
435}
436
438{
439 // Find the position of the first non-whitespace character
440 size_t start = LHS.find_first_not_of(" \t\n\r");
441 // If the string contains non-whitespace characters, remove leading spaces
442 if (start != std::string::npos)
443 {
444 LHS = LHS.substr(start);
445 }
446 else
447 {
448 // If the string contains only spaces, clear it
449 LHS.clear();
450 }
451
452 std::string delimiter;
453 size_t pos;
454 std::string word;
455
456 delimiter = " ";
457 while ((pos = LHS.find(delimiter)) != std::string::npos)
458 {
459 word = LHS.substr(0, pos);
460 LHS.erase(0, pos + delimiter.length());
461 normalProd.push_back(grammar->strToSymbol(word));
462 }
463 normalProd.push_back(grammar->strToSymbol(LHS));
464}
465
466
468{
469 for(auto symProdPair: grammar)
470 {
471 for(auto prod: symProdPair.second)
472 {
473 if (rule == prod)
474 {
475 return symProdPair.first;
476 }
477 }
478 }
480 return symbol;
481}
482
485{
486 if (prod.size() == 2)
487 {
488 if ((std::find(prod.begin(), prod.end(), grammar->strToKind("epsilon")) != prod.end()))
489 {
490 if (std::find(grammar->getEpsilonProds().begin(), grammar->getEpsilonProds().end(), prod) == grammar->getEpsilonProds().end())
491 {
492 grammar->getEpsilonProds().insert(prod);
493 }
494 }
495 else
496 {
497 grammar->getSingleRHSToProds()[prod[1]].insert(prod);
498 }
499 }
500 if (prod.size() == 3)
501 {
502 grammar->getFirstRHSToProds()[prod[1]].insert(prod);
503 grammar->getSecondRHSToProds()[prod[2]].insert(prod);
504 }
505}
506
508{
509 // Remove First Terminal
510 for(auto head : grammar->getRawProductions())
511 {
512 for(auto rule: head.second)
513 {
514
516 long_run.erase(long_run.begin());
517 auto it = grammar->getRawProductions().at(head.first).find(rule);
518 grammar->getRawProductions().at(head.first).erase(it);
519 grammar->getRawProductions()[head.first].insert(long_run);
520 }
521 }
522}
int index
Definition cJSON.h:170
void barReplace(CFGrammar *grammar)
void ebnf_bin(CFGrammar *grammar)
Add nonterminal to tranfer long rules to binary rules.
void ebnfSignReplace(char sign, CFGrammar *grammar)
void insertToCFLGrammar(CFGrammar *grammar, GrammarBase::Production &prod)
Based on prod size to add on suitable member field of grammar.
void removeFirstSymbol(CFGrammar *grammar)
CFGrammar * fillAttribute(CFGrammar *grammar, const Map< CFGrammar::Kind, Set< CFGrammar::Attribute > > &kindToAttrsMap)
Expand every variable attribute in rawProductions of grammarbase.
CFGrammar * normalize(GrammarBase *generalGrammar)
Binary Normal Form(BNF) normalization with variable attribute expanded.
int ebnfBracketMatch(GrammarBase::Production &prod, int i, CFGrammar *grammar)
void getFilledProductions(GrammarBase::Production &prod, const NodeSet &nodeSet, CFGrammar *grammar, GrammarBase::Productions &normalProds)
void strTrans(std::string strPro, CFGrammar *grammar, GrammarBase::Production &normalProd)
GrammarBase::Symbol check_head(GrammarBase::SymbolMap< GrammarBase::Symbol, GrammarBase::Productions > &grammar, GrammarBase::Production &rule)
SymbolMap< Symbol, Productions > & getFirstRHSToProds()
Definition CFGrammar.h:318
SymbolMap< Symbol, Productions > & getSingleRHSToProds()
Definition CFGrammar.h:313
const u32_t num_generator()
Definition CFGrammar.h:388
SymbolMap< Symbol, Productions > & getSecondRHSToProds()
Definition CFGrammar.h:323
Productions & getEpsilonProds()
Definition CFGrammar.h:308
bool empty() const
Definition CFGrammar.h:416
bool push(Data data)
Definition CFGrammar.h:429
void setStartKind(Kind startKind)
Definition CFGrammar.h:210
void setAttributeKinds(const Set< Kind > &attributeKind)
Definition CFGrammar.cpp:50
const Map< Kind, Set< Attribute > > & getKindToAttrsMap() const
Definition CFGrammar.h:195
void setRawProductions(SymbolMap< Symbol, Productions > &rawProductions)
Definition CFGrammar.cpp:40
void setTotalKind(Kind totalKind)
Definition CFGrammar.h:215
Kind strToKind(std::string str) const
Definition CFGrammar.cpp:55
std::unordered_map< Key, Value, Hash, KeyEqual, Allocator > SymbolMap
Definition CFGrammar.h:150
Symbol strToSymbol(const std::string str) const
Definition CFGrammar.cpp:74
Symbol insertNonTerminalSymbol(std::string strLit)
SymbolMap< Symbol, Productions > & getRawProductions()
Definition CFGrammar.h:190
u32_t VariableAttribute
Definition CFGrammar.h:41
SymbolSet< Production > Productions
Definition CFGrammar.h:157
void setKindToAttrsMap(const Map< Kind, Set< Attribute > > &kindToAttrsMap)
Definition CFGrammar.cpp:45
std::string kindToStr(Kind kind) const
void setEBNFSigns(Map< std::string, Kind > &EBNFSigns)
Definition CFGrammar.h:185
void setTerminals(Map< std::string, Kind > &terminals)
Definition CFGrammar.h:175
std::vector< Symbol > Production
Definition CFGrammar.h:156
void setNonterminals(Map< std::string, Kind > &nonterminals)
Definition CFGrammar.h:165
for isBitcode
Definition BasicTypes.h:68
Set< NodeID > NodeSet
std::unordered_map< Key, Value, Hash, KeyEqual, Allocator > Map
llvm::IRBuilder IRBuilder
Definition BasicTypes.h:74
signed s32_t
Definition GeneralType.h:47
unsigned u32_t
Definition GeneralType.h:46