Static Value-Flow Analysis
Loading...
Searching...
No Matches
Public Member Functions | Private Member Functions | List of all members
SVF::CFGNormalizer Class Reference

#include <CFGNormalizer.h>

Public Member Functions

 CFGNormalizer ()
 
CFGrammarnormalize (GrammarBase *generalGrammar)
 Binary Normal Form(BNF) normalization with variable attribute expanded.
 
CFGrammarfillAttribute (CFGrammar *grammar, const Map< CFGrammar::Kind, Set< CFGrammar::Attribute > > &kindToAttrsMap)
 Expand every variable attribute in rawProductions of grammarbase.
 

Private Member Functions

void ebnf_bin (CFGrammar *grammar)
 Add nonterminal to tranfer long rules to binary rules.
 
void ebnfSignReplace (char sign, CFGrammar *grammar)
 
void barReplace (CFGrammar *grammar)
 
void insertToCFLGrammar (CFGrammar *grammar, GrammarBase::Production &prod)
 Based on prod size to add on suitable member field of grammar.
 
int ebnfBracketMatch (GrammarBase::Production &prod, int i, CFGrammar *grammar)
 
GrammarBase::Symbol check_head (GrammarBase::SymbolMap< GrammarBase::Symbol, GrammarBase::Productions > &grammar, GrammarBase::Production &rule)
 
void strTrans (std::string strPro, CFGrammar *grammar, GrammarBase::Production &normalProd)
 
void getFilledProductions (GrammarBase::Production &prod, const NodeSet &nodeSet, CFGrammar *grammar, GrammarBase::Productions &normalProds)
 
void removeFirstSymbol (CFGrammar *grammar)
 

Detailed Description

Generate Normalized Grammar (backus naur form) from a grammarbase (Extended extended Backus–Naur form )

To Do: Error Notice for ill formed production, e.g. not end with ';' and '*' not preceding with '()' and extra space before ';' '|' sign support

Definition at line 47 of file CFGNormalizer.h.

Constructor & Destructor Documentation

◆ CFGNormalizer()

SVF::CFGNormalizer::CFGNormalizer ( )
inline

Definition at line 51 of file CFGNormalizer.h.

52 {
53 }

Member Function Documentation

◆ barReplace()

void CFGNormalizer::barReplace ( CFGrammar grammar)
private

Definition at line 295 of file CFGNormalizer.cpp.

296{
297 for (auto &symbolToProductionsPair : grammar->getRawProductions())
298 {
300 //GrammarBase::Productions Originalproductions = symbolToProductionsPair.second;
302 {
303 size_t i = 1;
304 size_t j = 1;
305 while (i < ebnfProduction.size())
306 {
307 if (grammar->kindToStr(ebnfProduction[i].kind) == "|")
308 {
310 tempPro.insert(tempPro.begin(), symbolToProductionsPair.first );
311 productions.insert(tempPro);
312 j = i+1;
313 }
314 i++;
315 }
317 tempPro.insert(tempPro.begin(), symbolToProductionsPair.first );
318 productions.insert(tempPro);
319 }
320 symbolToProductionsPair.second.clear();
322 }
323}
SymbolSet< Production > Productions
Definition CFGrammar.h:157
std::string kindToStr(Kind kind) const
std::vector< Symbol > Production
Definition CFGrammar.h:156
llvm::IRBuilder IRBuilder
Definition BasicTypes.h:74

◆ check_head()

GrammarBase::Symbol CFGNormalizer::check_head ( GrammarBase::SymbolMap< GrammarBase::Symbol, GrammarBase::Productions > &  grammar,
GrammarBase::Production rule 
)
private

Definition at line 467 of file CFGNormalizer.cpp.

468{
469 for(auto symProdPair: grammar)
470 {
471 for(auto prod: symProdPair.second)
472 {
473 if (rule == prod)
474 {
475 return symProdPair.first;
476 }
477 }
478 }
480 return symbol;
481}
unsigned u32_t
Definition GeneralType.h:46

◆ ebnf_bin()

void CFGNormalizer::ebnf_bin ( CFGrammar grammar)
private

Add nonterminal to tranfer long rules to binary rules.

Assign _attribute if target portion of the production contain more than 1 variable then X add no variable attribute if target only contain one variable attribute X share the same variable attribute

Definition at line 91 of file CFGNormalizer.cpp.

92{
94 std::string tempStr = "";
95 removeFirstSymbol(grammar);
96
97 auto rawProductions = grammar->getRawProductions();
98
99 for(auto itr : rawProductions)
100 {
101 auto head = *(grammar->getRawProductions().find(itr.first));
102 for(auto rule: head.second)
103 {
104 if (rule.size() < 3) continue;
105
107 GrammarBase::Production long_run(rule.begin() + 1, rule.end());
108 auto it = grammar->getRawProductions()[head.first].find(rule);
109 grammar->getRawProductions()[head.first].erase(it);
111 if (X == u32_t(-1))
112 {
113 X = check_head(grammar->getRawProductions(), long_run);
114 }
115 if ((X == u32_t(-1)) == false)
116 {
117 rule = {first, X};
118 grammar->getRawProductions()[head.first].insert(rule);
119 }
120 else
121 {
122 tempStr = "X";
123 std::ostringstream ss;
124 ss << grammar->num_generator();
125 tempStr.append(ss.str());
131 for (unsigned i = 0; i < long_run.size(); i++)
132 {
133 GrammarBase::VariableAttribute variableAttribute = long_run[i].variableAttribute;
134 if ( variableAttribute != 0)
135 {
136 variableAttributeSet.insert(variableAttribute);
137 }
138 }
139 if ( variableAttributeSet.size() == 1)
140 {
141 tempStr += "_";
142 tempStr += char(*variableAttributeSet.begin());
143 }
145 rule = {first, tempSym};
146 grammar->getRawProductions()[head.first].insert(rule);
147 X = tempSym;
148 }
149 new_grammar[X] = {};
152 if (long_run.size() ==2)
153 {
154 new_grammar[X].insert(temp_p);
155 long_run.clear();
156 }
157 else
158 {
159 new_grammar[X].insert(long_run);
160 RHX = X;
161 }
162 while (long_run.size() > 2)
163 {
164 first = long_run[0];
166 long_run.erase(long_run.begin());
167
168 X = RHX;
170
172 if (RHX == u32_t(-1))
173 {
175 }
176 if(RHX == u32_t(-1))
177 {
178 tempStr = "X";
179 std::ostringstream ss;
180 ss << grammar->num_generator();
181 tempStr.append(ss.str());
183 for (unsigned i = 0; i < long_run.size(); i++)
184 {
185 GrammarBase::VariableAttribute variableAttribute = long_run[i].variableAttribute;
186 if ( variableAttribute != 0)
187 {
188 variableAttributeSet.insert(variableAttribute);
189 }
190 }
191 if ( variableAttributeSet.size() == 1)
192 {
193 tempStr += "_";
194 tempStr += char(*variableAttributeSet.begin());
195 }
197 auto it = new_grammar[X].find(prev_rule);
198 new_grammar[X].erase(it);
199 new_grammar[X].insert({first, tempSym});
201 RHX = tempSym;
202 }
203 }
204 }
205 }
206 for (auto new_head : new_grammar)
207 {
208 for (auto prod : new_head.second)
209 {
210 auto it = grammar->getRawProductions()[new_head.first].find(prod);
211 if (it == grammar->getRawProductions()[new_head.first].end())
212 {
213 grammar->getRawProductions()[new_head.first].insert(prod);
214 }
215 }
216 }
217}
void removeFirstSymbol(CFGrammar *grammar)
GrammarBase::Symbol check_head(GrammarBase::SymbolMap< GrammarBase::Symbol, GrammarBase::Productions > &grammar, GrammarBase::Production &rule)
const u32_t num_generator()
Definition CFGrammar.h:388
std::unordered_map< Key, Value, Hash, KeyEqual, Allocator > SymbolMap
Definition CFGrammar.h:150
Symbol insertNonTerminalSymbol(std::string strLit)
SymbolMap< Symbol, Productions > & getRawProductions()
Definition CFGrammar.h:190
u32_t VariableAttribute
Definition CFGrammar.h:41

◆ ebnfBracketMatch()

int CFGNormalizer::ebnfBracketMatch ( GrammarBase::Production prod,
int  i,
CFGrammar grammar 
)
private

Definition at line 281 of file CFGNormalizer.cpp.

282{
283 int index = i;
284 while (index >= 0)
285 {
286 if (grammar->kindToStr(prod[index].kind) == "(")
287 {
288 return index;
289 }
290 index--;
291 }
292 return 0;
293}
int index
Definition cJSON.h:170

◆ ebnfSignReplace()

void CFGNormalizer::ebnfSignReplace ( char  sign,
CFGrammar grammar 
)
private

Replace Sign Group With tempNonterminal 'X' And load the replace in newProductions

If sign associate without group e.i with single symbol

sign associate with group of symbol by brace pair

For Both * and ? need to insert epsilon rule

insert second rule for '*' X -> X E for '+' X -> E

Insert Back the Group

Definition at line 325 of file CFGNormalizer.cpp.

326{
330 std::string tempNonterminal = "X";
331
332 for (auto &symbolToProductionsPair : grammar->getRawProductions())
333 {
336 {
337 size_t i = 1;
338 while (i < ebnfProduction.size())
339 {
341 if (grammar->kindToStr(ebnfProduction[i].kind) == std::string(1, sign))
342 {
344 assert(i != 1 && "sign in grammar associate with no symbol");
345 if (grammar->kindToStr(ebnfProduction[i - 1].kind) != std::string(1, ')'))
346 {
347 signGroupStart = i - 1;
348 }
350 else
351 {
353 }
354 std::string groupString = "";
355 for (size_t j = signGroupStart; j < i; j++)
356 {
357 groupString.append(grammar->kindToStr(ebnfProduction[j].kind));
358 groupString.append(" ");
359 }
360 groupString.append(grammar->kindToStr(ebnfProduction[i].kind));
361 if (newProductions.find(groupString) != newProductions.end())
362 {
364 ebnfProduction.erase(ebnfProduction.begin() + signGroupStart, ebnfProduction.begin() + i + 1);
367 }
368 else if ( (signGroupStart == 1) && (i == ebnfProduction.size() -1))
369 {
372 ebnfProduction.erase(ebnfProduction.begin() + signGroupStart, ebnfProduction.begin() + i + 1);
373
374 }
375 else
376 {
377 tempNonterminal = "X";
378 std::ostringstream ss;
379 ss << grammar->num_generator();
380 tempNonterminal.append(ss.str());
383 ebnfProduction.erase(ebnfProduction.begin() + signGroupStart, ebnfProduction.begin() + i + 1);
387 }
388
390 }
391 i++;
392 }
393 }
395 }
396 for(auto rep: newProductions)
397 {
399 std::string new_nonterminal = rep.second;
401 grammar->getRawProductions()[grammar->strToSymbol(new_nonterminal)].insert(temp_list);
404 if (sign == '*' || sign == '?')
405 {
408 strTrans(rep.first, grammar, normalProd);
410 if (sign == '*')
411 {
412 for (auto &word : normalProd)
413 {
414 if (word != grammar->strToSymbol("*") && word != grammar->strToSymbol("(") && word != grammar->strToSymbol(")"))
415 {
416 withoutSign.push_back(word);
417 }
418 }
419 withoutSign.push_back(grammar->strToSymbol(rep.second));
420 }
421 if (sign == '?')
422 {
423 for (auto &word : normalProd)
424 {
425 if (word != grammar->strToSymbol("?") && word != grammar->strToSymbol("(") && word != grammar->strToSymbol(")"))
426 {
427 withoutSign.push_back(word);
428 }
429 }
430 }
431 temp_list.insert(temp_list.end(), withoutSign.begin(), withoutSign.end());
432 }
433 grammar->getRawProductions()[grammar->strToSymbol(new_nonterminal)].insert(temp_list);
434 }
435}
int ebnfBracketMatch(GrammarBase::Production &prod, int i, CFGrammar *grammar)
void strTrans(std::string strPro, CFGrammar *grammar, GrammarBase::Production &normalProd)
Symbol strToSymbol(const std::string str) const
Definition CFGrammar.cpp:74
signed s32_t
Definition GeneralType.h:47

◆ fillAttribute()

CFGrammar * CFGNormalizer::fillAttribute ( CFGrammar grammar,
const Map< CFGrammar::Kind, Set< CFGrammar::Attribute > > &  kindToAttrsMap 
)

Expand every variable attribute in rawProductions of grammarbase.

rawProductions production does not include lhs so append to the begin of the production

Definition at line 61 of file CFGNormalizer.cpp.

62{
63 NodeSet nodeSet = {};
64 for (auto pair: kindToAttrsMap)
65 {
66 for (auto attri: pair.second)
67 {
68 nodeSet.insert(attri);
69 }
70 }
71 for(auto symProdsPair: grammar->getRawProductions())
72 {
73 for(auto prod: symProdsPair.second)
74 {
78 tempP.insert(tempP.begin(), symProdsPair.first);
80 getFilledProductions(tempP, nodeSet, grammar, normalProds);
81 for (auto filledProd : normalProds)
82 {
84 }
85 }
86 }
87
88 return grammar;
89}
void insertToCFLGrammar(CFGrammar *grammar, GrammarBase::Production &prod)
Based on prod size to add on suitable member field of grammar.
void getFilledProductions(GrammarBase::Production &prod, const NodeSet &nodeSet, CFGrammar *grammar, GrammarBase::Productions &normalProds)
Set< NodeID > NodeSet

◆ getFilledProductions()

void CFGNormalizer::getFilledProductions ( GrammarBase::Production prod,
const NodeSet nodeSet,
CFGrammar grammar,
GrammarBase::Productions normalProds 
)
private

Loop through provided production based on existence of attribute of attribute variable and expand to productions set e.g Xi -> Y Zi with Xi i = 0, 1, Yi i = 0,2 Will get {X0 -> Y Z0, X1 -> Y Z1, X2 -> Y Z2}

Get the first encounter variable attribute to expand

Check whether all symbol expanded

Definition at line 223 of file CFGNormalizer.cpp.

224{
225 normalProds.clear();
227 worklist.push(prod);
228 while( worklist.empty() == false )
229 {
233 // GrammarBase::Kind baseKind;
235 {
236 if ( currentVariableAttribute == 0 )
237 {
238 currentVariableAttribute = symbol.variableAttribute;
239 // baseKind = symbol.kind;
240 }
241 }
242 if ( currentVariableAttribute == 0)
243 {
245 continue;
246 }
247 //*(kindToAttriMap.find(baseKind));
248 //for (auto attribute : nodeSet.second)
249 for (auto attribute : nodeSet)
250 {
253 {
254 if ( symbol.variableAttribute == currentVariableAttribute)
255 {
256 symbol.attribute = attribute;
257 symbol.variableAttribute = 0;
258 }
259 }
261 bool continueToFill = false;
263 {
264 if ( symbol.variableAttribute != 0 )
265 {
266 continueToFill = true;
267 }
268 }
269 if ( continueToFill == false)
270 {
272 }
273 else
274 {
275 worklist.push(fillingProduction);
276 }
277 }
278 }
279}
bool empty() const
Definition CFGrammar.h:416
bool push(Data data)
Definition CFGrammar.h:429

◆ insertToCFLGrammar()

void CFGNormalizer::insertToCFLGrammar ( CFGrammar grammar,
GrammarBase::Production prod 
)
private

Based on prod size to add on suitable member field of grammar.

Definition at line 484 of file CFGNormalizer.cpp.

485{
486 if (prod.size() == 2)
487 {
488 if ((std::find(prod.begin(), prod.end(), grammar->strToKind("epsilon")) != prod.end()))
489 {
490 if (std::find(grammar->getEpsilonProds().begin(), grammar->getEpsilonProds().end(), prod) == grammar->getEpsilonProds().end())
491 {
492 grammar->getEpsilonProds().insert(prod);
493 }
494 }
495 else
496 {
497 grammar->getSingleRHSToProds()[prod[1]].insert(prod);
498 }
499 }
500 if (prod.size() == 3)
501 {
502 grammar->getFirstRHSToProds()[prod[1]].insert(prod);
503 grammar->getSecondRHSToProds()[prod[2]].insert(prod);
504 }
505}
SymbolMap< Symbol, Productions > & getFirstRHSToProds()
Definition CFGrammar.h:318
SymbolMap< Symbol, Productions > & getSingleRHSToProds()
Definition CFGrammar.h:313
SymbolMap< Symbol, Productions > & getSecondRHSToProds()
Definition CFGrammar.h:323
Productions & getEpsilonProds()
Definition CFGrammar.h:308
Kind strToKind(std::string str) const
Definition CFGrammar.cpp:55

◆ normalize()

CFGrammar * CFGNormalizer::normalize ( GrammarBase generalGrammar)

Binary Normal Form(BNF) normalization with variable attribute expanded.

Definition at line 41 of file CFGNormalizer.cpp.

42{
43 CFGrammar *grammar = new CFGrammar();
44 grammar->setStartKind(generalGrammar->getStartKind());
45 grammar->setTerminals(generalGrammar->getTerminals());
46 grammar->setNonterminals(generalGrammar->getNonterminals());
47 grammar->setEBNFSigns(generalGrammar->getEBNFSigns());
48 grammar->setTotalKind(generalGrammar->getTotalKind());
49 grammar->setAttributeKinds(generalGrammar->getAttrSyms());
50 grammar->setKindToAttrsMap(generalGrammar->getKindToAttrsMap());
51 grammar->setRawProductions(generalGrammar->getRawProductions());
52 barReplace(grammar);
53 ebnfSignReplace('*', grammar);
54 ebnfSignReplace('?', grammar);
55 ebnf_bin(grammar);
56 fillAttribute(grammar, grammar->getKindToAttrsMap());
57 return grammar;
58}
void barReplace(CFGrammar *grammar)
void ebnf_bin(CFGrammar *grammar)
Add nonterminal to tranfer long rules to binary rules.
void ebnfSignReplace(char sign, CFGrammar *grammar)
CFGrammar * fillAttribute(CFGrammar *grammar, const Map< CFGrammar::Kind, Set< CFGrammar::Attribute > > &kindToAttrsMap)
Expand every variable attribute in rawProductions of grammarbase.
void setStartKind(Kind startKind)
Definition CFGrammar.h:210
void setAttributeKinds(const Set< Kind > &attributeKind)
Definition CFGrammar.cpp:50
const Map< Kind, Set< Attribute > > & getKindToAttrsMap() const
Definition CFGrammar.h:195
void setRawProductions(SymbolMap< Symbol, Productions > &rawProductions)
Definition CFGrammar.cpp:40
void setTotalKind(Kind totalKind)
Definition CFGrammar.h:215
void setKindToAttrsMap(const Map< Kind, Set< Attribute > > &kindToAttrsMap)
Definition CFGrammar.cpp:45
void setEBNFSigns(Map< std::string, Kind > &EBNFSigns)
Definition CFGrammar.h:185
void setTerminals(Map< std::string, Kind > &terminals)
Definition CFGrammar.h:175
void setNonterminals(Map< std::string, Kind > &nonterminals)
Definition CFGrammar.h:165

◆ removeFirstSymbol()

void CFGNormalizer::removeFirstSymbol ( CFGrammar grammar)
private

Definition at line 507 of file CFGNormalizer.cpp.

508{
509 // Remove First Terminal
510 for(auto head : grammar->getRawProductions())
511 {
512 for(auto rule: head.second)
513 {
514
516 long_run.erase(long_run.begin());
517 auto it = grammar->getRawProductions().at(head.first).find(rule);
518 grammar->getRawProductions().at(head.first).erase(it);
519 grammar->getRawProductions()[head.first].insert(long_run);
520 }
521 }
522}

◆ strTrans()

void CFGNormalizer::strTrans ( std::string  strPro,
CFGrammar grammar,
GrammarBase::Production normalProd 
)
private

Definition at line 437 of file CFGNormalizer.cpp.

438{
439 // Find the position of the first non-whitespace character
440 size_t start = LHS.find_first_not_of(" \t\n\r");
441 // If the string contains non-whitespace characters, remove leading spaces
442 if (start != std::string::npos)
443 {
444 LHS = LHS.substr(start);
445 }
446 else
447 {
448 // If the string contains only spaces, clear it
449 LHS.clear();
450 }
451
452 std::string delimiter;
453 size_t pos;
454 std::string word;
455
456 delimiter = " ";
457 while ((pos = LHS.find(delimiter)) != std::string::npos)
458 {
459 word = LHS.substr(0, pos);
460 LHS.erase(0, pos + delimiter.length());
461 normalProd.push_back(grammar->strToSymbol(word));
462 }
463 normalProd.push_back(grammar->strToSymbol(LHS));
464}

The documentation for this class was generated from the following files: