Static Value-Flow Analysis
Public Member Functions | Private Member Functions | List of all members
SVF::CFGNormalizer Class Reference

#include <CFGNormalizer.h>

Public Member Functions

 CFGNormalizer ()
 
CFGrammarnormalize (GrammarBase *generalGrammar)
 Binary Normal Form(BNF) normalization with variable attribute expanded. More...
 
CFGrammarfillAttribute (CFGrammar *grammar, const Map< CFGrammar::Kind, Set< CFGrammar::Attribute >> &kindToAttrsMap)
 Expand every variable attribute in rawProductions of grammarbase. More...
 

Private Member Functions

void ebnf_bin (CFGrammar *grammar)
 Add nonterminal to tranfer long rules to binary rules. More...
 
void ebnfSignReplace (char sign, CFGrammar *grammar)
 
void barReplace (CFGrammar *grammar)
 
void insertToCFLGrammar (CFGrammar *grammar, GrammarBase::Production &prod)
 Based on prod size to add on suitable member field of grammar. More...
 
int ebnfBracketMatch (GrammarBase::Production &prod, int i, CFGrammar *grammar)
 
GrammarBase::Symbol check_head (GrammarBase::SymbolMap< GrammarBase::Symbol, GrammarBase::Productions > &grammar, GrammarBase::Production &rule)
 
void strTrans (std::string strPro, CFGrammar *grammar, GrammarBase::Production &normalProd)
 
void getFilledProductions (GrammarBase::Production &prod, const NodeSet &nodeSet, CFGrammar *grammar, GrammarBase::Productions &normalProds)
 
void removeFirstSymbol (CFGrammar *grammar)
 

Detailed Description

Generate Normalized Grammar (backus naur form) from a grammarbase (Extended extended Backus–Naur form )

To Do: Error Notice for ill formed production, e.g. not end with ';' and '*' not preceding with '()' and extra space before ';' '|' sign support

Definition at line 47 of file CFGNormalizer.h.

Constructor & Destructor Documentation

◆ CFGNormalizer()

SVF::CFGNormalizer::CFGNormalizer ( )
inline

Definition at line 51 of file CFGNormalizer.h.

52  {
53  }

Member Function Documentation

◆ barReplace()

void CFGNormalizer::barReplace ( CFGrammar grammar)
private

Definition at line 296 of file CFGNormalizer.cpp.

297 {
298  for (auto &symbolToProductionsPair : grammar->getRawProductions())
299  {
300  GrammarBase::Productions productions;
301  //GrammarBase::Productions Originalproductions = symbolToProductionsPair.second;
302  for (auto ebnfProduction : symbolToProductionsPair.second)
303  {
304  size_t i = 1;
305  size_t j = 1;
306  while (i < ebnfProduction.size())
307  {
308  if (grammar->kindToStr(ebnfProduction[i].kind) == "|")
309  {
310  GrammarBase::Production tempPro(ebnfProduction.begin()+j, ebnfProduction.begin()+i);
311  tempPro.insert(tempPro.begin(), symbolToProductionsPair.first );
312  productions.insert(tempPro);
313  j = i+1;
314  }
315  i++;
316  }
317  GrammarBase::Production tempPro(ebnfProduction.begin()+j, ebnfProduction.begin()+i);
318  tempPro.insert(tempPro.begin(), symbolToProductionsPair.first );
319  productions.insert(tempPro);
320  }
321  symbolToProductionsPair.second.clear();
322  symbolToProductionsPair.second = productions;
323  }
324 }
SymbolMap< Symbol, Productions > & getRawProductions()
Definition: CFGrammar.h:190
SymbolSet< Production > Productions
Definition: CFGrammar.h:157
std::string kindToStr(Kind kind) const
Definition: CFGrammar.cpp:103
std::vector< Symbol > Production
Definition: CFGrammar.h:156

◆ check_head()

GrammarBase::Symbol CFGNormalizer::check_head ( GrammarBase::SymbolMap< GrammarBase::Symbol, GrammarBase::Productions > &  grammar,
GrammarBase::Production rule 
)
private

Definition at line 457 of file CFGNormalizer.cpp.

458 {
459  for(auto symProdPair: grammar)
460  {
461  for(auto prod: symProdPair.second)
462  {
463  if (rule == prod)
464  {
465  return symProdPair.first;
466  }
467  }
468  }
469  GrammarBase::Symbol symbol = u32_t(-1);
470  return symbol;
471 }
unsigned u32_t
Definition: GeneralType.h:46

◆ ebnf_bin()

void CFGNormalizer::ebnf_bin ( CFGrammar grammar)
private

Add nonterminal to tranfer long rules to binary rules.

Assign _attribute if target portion of the production contain more than 1 variable then X add no variable attribute if target only contain one variable attribute X share the same variable attribute

Definition at line 92 of file CFGNormalizer.cpp.

93 {
95  std::string tempStr = "";
96  removeFirstSymbol(grammar);
97 
98  auto rawProductions = grammar->getRawProductions();
99 
100  for(auto itr : rawProductions)
101  {
102  auto head = *(grammar->getRawProductions().find(itr.first));
103  for(auto rule: head.second)
104  {
105  if (rule.size() < 3) continue;
106 
107  GrammarBase::Symbol first = rule[0];
108  GrammarBase::Production long_run(rule.begin() + 1, rule.end());
109  auto it = grammar->getRawProductions()[head.first].find(rule);
110  grammar->getRawProductions()[head.first].erase(it);
111  GrammarBase::Symbol X = check_head(new_grammar, long_run);
112  if (X == u32_t(-1))
113  {
114  X = check_head(grammar->getRawProductions(), long_run);
115  }
116  if ((X == u32_t(-1)) == false)
117  {
118  rule = {first, X};
119  grammar->getRawProductions()[head.first].insert(rule);
120  }
121  else
122  {
123  tempStr = "X";
124  std::ostringstream ss;
125  ss << grammar->num_generator();
126  tempStr.append(ss.str());
131  Set<GrammarBase::VariableAttribute> variableAttributeSet = {};
132  for (unsigned i = 0; i < long_run.size(); i++)
133  {
134  GrammarBase::VariableAttribute variableAttribute = long_run[i].variableAttribute;
135  if ( variableAttribute != 0)
136  {
137  variableAttributeSet.insert(variableAttribute);
138  }
139  }
140  if ( variableAttributeSet.size() == 1)
141  {
142  tempStr += "_";
143  tempStr += char(*variableAttributeSet.begin());
144  }
145  GrammarBase::Symbol tempSym = grammar->insertNonTerminalSymbol(tempStr);
146  rule = {first, tempSym};
147  grammar->getRawProductions()[head.first].insert(rule);
148  X = tempSym;
149  }
150  new_grammar[X] = {};
151  GrammarBase::Production temp_p = long_run;
153  if (long_run.size() ==2)
154  {
155  new_grammar[X].insert(temp_p);
156  long_run.clear();
157  }
158  else
159  {
160  new_grammar[X].insert(long_run);
161  RHX = X;
162  }
163  while (long_run.size() > 2)
164  {
165  first = long_run[0];
166  GrammarBase::Production prev_rule = long_run;
167  long_run.erase(long_run.begin());
168 
169  X = RHX;
170  temp_p = long_run;
171 
172  RHX = check_head(new_grammar, long_run);
173  if (RHX == u32_t(-1))
174  {
175  RHX = check_head(grammar->getRawProductions(), long_run);
176  }
177  if(RHX == u32_t(-1))
178  {
179  tempStr = "X";
180  std::ostringstream ss;
181  ss << grammar->num_generator();
182  tempStr.append(ss.str());
183  Set<GrammarBase::VariableAttribute> variableAttributeSet = {};
184  for (unsigned i = 0; i < long_run.size(); i++)
185  {
186  GrammarBase::VariableAttribute variableAttribute = long_run[i].variableAttribute;
187  if ( variableAttribute != 0)
188  {
189  variableAttributeSet.insert(variableAttribute);
190  }
191  }
192  if ( variableAttributeSet.size() == 1)
193  {
194  tempStr += "_";
195  tempStr += char(*variableAttributeSet.begin());
196  }
197  GrammarBase::Symbol tempSym = grammar->insertNonTerminalSymbol(tempStr);
198  auto it = new_grammar[X].find(prev_rule);
199  new_grammar[X].erase(it);
200  new_grammar[X].insert({first, tempSym});
201  new_grammar[tempSym].insert(long_run);
202  RHX = tempSym;
203  }
204  }
205  }
206  }
207  for (auto new_head : new_grammar)
208  {
209  for (auto prod : new_head.second)
210  {
211  auto it = grammar->getRawProductions()[new_head.first].find(prod);
212  if (it == grammar->getRawProductions()[new_head.first].end())
213  {
214  grammar->getRawProductions()[new_head.first].insert(prod);
215  }
216  }
217  }
218 }
const char *const string
Definition: cJSON.h:172
void removeFirstSymbol(CFGrammar *grammar)
GrammarBase::Symbol check_head(GrammarBase::SymbolMap< GrammarBase::Symbol, GrammarBase::Productions > &grammar, GrammarBase::Production &rule)
const u32_t num_generator()
Definition: CFGrammar.h:388
std::unordered_map< Key, Value, Hash, KeyEqual, Allocator > SymbolMap
Definition: CFGrammar.h:150
Symbol insertNonTerminalSymbol(std::string strLit)
Definition: CFGrammar.cpp:249
u32_t VariableAttribute
Definition: CFGrammar.h:41
std::unordered_set< Key, Hash, KeyEqual, Allocator > Set
Definition: GeneralType.h:96

◆ ebnfBracketMatch()

int CFGNormalizer::ebnfBracketMatch ( GrammarBase::Production prod,
int  i,
CFGrammar grammar 
)
private

Definition at line 282 of file CFGNormalizer.cpp.

283 {
284  int index = i;
285  while (index >= 0)
286  {
287  if (grammar->kindToStr(prod[index].kind) == "(")
288  {
289  return index;
290  }
291  index--;
292  }
293  return 0;
294 }
int index
Definition: cJSON.h:170

◆ ebnfSignReplace()

void CFGNormalizer::ebnfSignReplace ( char  sign,
CFGrammar grammar 
)
private

Replace Sign Group With tempNonterminal 'X' And load the replace in newProductions

If sign associate without group e.i with single symbol

sign associate with group of symbol by brace pair

For Both * and ? need to insert epsilon rule

insert second rule for '*' X -> X E for '+' X -> E

Insert Back the Group

Definition at line 326 of file CFGNormalizer.cpp.

327 {
330  SVF::Map<std::string, std::string> newProductions;
331  std::string tempNonterminal = "X";
332 
333  for (auto &symbolToProductionsPair : grammar->getRawProductions())
334  {
335  GrammarBase::Productions productions = symbolToProductionsPair.second;
336  for (auto ebnfProduction : symbolToProductionsPair.second)
337  {
338  size_t i = 1;
339  while (i < ebnfProduction.size())
340  {
341  s32_t signGroupStart = -1;
342  if (grammar->kindToStr(ebnfProduction[i].kind) == std::string(1, sign))
343  {
345  assert(i != 1 && "sign in grammar associate with no symbol");
346  if (grammar->kindToStr(ebnfProduction[i - 1].kind) != std::string(1, ')'))
347  {
348  signGroupStart = i - 1;
349  }
351  else
352  {
353  signGroupStart = ebnfBracketMatch(ebnfProduction, i, grammar);
354  }
355  std::string groupString = "";
356  for (size_t j = signGroupStart; j < i; j++)
357  {
358  groupString.append(grammar->kindToStr(ebnfProduction[j].kind));
359  groupString.append(" ");
360  }
361  groupString.append(grammar->kindToStr(ebnfProduction[i].kind));
362  if (newProductions.find(groupString) != newProductions.end())
363  {
364  productions.erase(ebnfProduction);
365  ebnfProduction.erase(ebnfProduction.begin() + signGroupStart, ebnfProduction.begin() + i + 1);
366  ebnfProduction.insert(ebnfProduction.begin() + signGroupStart, grammar->strToSymbol(newProductions[groupString]));
367  productions.insert(ebnfProduction);
368  }
369  else if ( (signGroupStart == 1) && (i == ebnfProduction.size() -1))
370  {
371  newProductions[groupString] = grammar->kindToStr(ebnfProduction[0].kind);
372  productions.erase(ebnfProduction);
373  ebnfProduction.erase(ebnfProduction.begin() + signGroupStart, ebnfProduction.begin() + i + 1);
374 
375  }
376  else
377  {
378  tempNonterminal = "X";
379  std::ostringstream ss;
380  ss << grammar->num_generator();
381  tempNonterminal.append(ss.str());
382  GrammarBase::Symbol tempSym = grammar->insertNonTerminalSymbol(tempNonterminal);
383  productions.erase(ebnfProduction);
384  ebnfProduction.erase(ebnfProduction.begin() + signGroupStart, ebnfProduction.begin() + i + 1);
385  ebnfProduction.insert(ebnfProduction.begin() + signGroupStart, tempSym);
386  newProductions[groupString] = tempNonterminal;
387  productions.insert(ebnfProduction);
388  }
389 
390  i = signGroupStart;
391  }
392  i++;
393  }
394  }
395  symbolToProductionsPair.second = productions;
396  }
397  for(auto rep: newProductions)
398  {
400  std::string new_nonterminal = rep.second;
401  GrammarBase::Production temp_list = {grammar->strToSymbol(new_nonterminal), grammar->strToSymbol("epsilon")};
402  grammar->getRawProductions()[grammar->strToSymbol(new_nonterminal)].insert(temp_list);
404  temp_list = {grammar->strToSymbol(new_nonterminal)};
405  if (sign == '*' || sign == '?')
406  {
408  GrammarBase::Production normalProd;
409  strTrans(rep.first, grammar, normalProd);
410  GrammarBase::Production withoutSign = {};
411  if (sign == '*')
412  {
413  for (auto &word : normalProd)
414  {
415  if (word != grammar->strToSymbol("*") && word != grammar->strToSymbol("(") && word != grammar->strToSymbol(")"))
416  {
417  withoutSign.push_back(word);
418  }
419  }
420  withoutSign.push_back(grammar->strToSymbol(rep.second));
421  }
422  if (sign == '?')
423  {
424  for (auto &word : normalProd)
425  {
426  if (word != grammar->strToSymbol("?") && word != grammar->strToSymbol("(") && word != grammar->strToSymbol(")"))
427  {
428  withoutSign.push_back(word);
429  }
430  }
431  }
432  temp_list.insert(temp_list.end(), withoutSign.begin(), withoutSign.end());
433  }
434  grammar->getRawProductions()[grammar->strToSymbol(new_nonterminal)].insert(temp_list);
435  }
436 }
int ebnfBracketMatch(GrammarBase::Production &prod, int i, CFGrammar *grammar)
void strTrans(std::string strPro, CFGrammar *grammar, GrammarBase::Production &normalProd)
Symbol strToSymbol(const std::string str) const
Definition: CFGrammar.cpp:74
std::unordered_map< Key, Value, Hash, KeyEqual, Allocator > Map
Definition: GeneralType.h:101
signed s32_t
Definition: GeneralType.h:47

◆ fillAttribute()

CFGrammar * CFGNormalizer::fillAttribute ( CFGrammar grammar,
const Map< CFGrammar::Kind, Set< CFGrammar::Attribute >> &  kindToAttrsMap 
)

Expand every variable attribute in rawProductions of grammarbase.

rawProductions production does not include lhs so append to the begin of the production

Definition at line 62 of file CFGNormalizer.cpp.

63 {
64  NodeSet nodeSet = {};
65  for (auto pair: kindToAttrsMap)
66  {
67  for (auto attri: pair.second)
68  {
69  nodeSet.insert(attri);
70  }
71  }
72  for(auto symProdsPair: grammar->getRawProductions())
73  {
74  for(auto prod: symProdsPair.second)
75  {
78  GrammarBase::Production tempP = prod;
79  tempP.insert(tempP.begin(), symProdsPair.first);
80  GrammarBase::Productions normalProds;
81  getFilledProductions(tempP, nodeSet, grammar, normalProds);
82  for (auto filledProd : normalProds)
83  {
84  insertToCFLGrammar(grammar, filledProd);
85  }
86  }
87  }
88 
89  return grammar;
90 }
void insertToCFLGrammar(CFGrammar *grammar, GrammarBase::Production &prod)
Based on prod size to add on suitable member field of grammar.
void getFilledProductions(GrammarBase::Production &prod, const NodeSet &nodeSet, CFGrammar *grammar, GrammarBase::Productions &normalProds)
Set< NodeID > NodeSet
Definition: GeneralType.h:113

◆ getFilledProductions()

void CFGNormalizer::getFilledProductions ( GrammarBase::Production prod,
const NodeSet nodeSet,
CFGrammar grammar,
GrammarBase::Productions normalProds 
)
private

Loop through provided production based on existence of attribute of attribute variable and expand to productions set e.g Xi -> Y Zi with Xi i = 0, 1, Yi i = 0,2 Will get {X0 -> Y Z0, X1 -> Y Z1, X2 -> Y Z2}

Get the first encounter variable attribute to expand

Check whether all symbol expanded

Definition at line 224 of file CFGNormalizer.cpp.

225 {
226  normalProds.clear();
228  worklist.push(prod);
229  while( worklist.empty() == false )
230  {
231  GrammarBase::Production currentProduction = worklist.pop();
233  GrammarBase::VariableAttribute currentVariableAttribute = 0;
234  // GrammarBase::Kind baseKind;
235  for ( GrammarBase::Symbol &symbol : currentProduction )
236  {
237  if ( currentVariableAttribute == 0 )
238  {
239  currentVariableAttribute = symbol.variableAttribute;
240  // baseKind = symbol.kind;
241  }
242  }
243  if ( currentVariableAttribute == 0)
244  {
245  normalProds.insert(currentProduction);
246  continue;
247  }
248  //*(kindToAttriMap.find(baseKind));
249  //for (auto attribute : nodeSet.second)
250  for (auto attribute : nodeSet)
251  {
252  GrammarBase::Production fillingProduction = currentProduction;
253  for ( GrammarBase::Symbol &symbol : fillingProduction )
254  {
255  if ( symbol.variableAttribute == currentVariableAttribute)
256  {
257  symbol.attribute = attribute;
258  symbol.variableAttribute = 0;
259  }
260  }
262  bool continueToFill = false;
263  for ( GrammarBase::Symbol &symbol : fillingProduction )
264  {
265  if ( symbol.variableAttribute != 0 )
266  {
267  continueToFill = true;
268  }
269  }
270  if ( continueToFill == false)
271  {
272  normalProds.insert(fillingProduction);
273  }
274  else
275  {
276  worklist.push(fillingProduction);
277  }
278  }
279  }
280 }
bool empty() const
Definition: CFGrammar.h:416
bool push(Data data)
Definition: CFGrammar.h:429

◆ insertToCFLGrammar()

void CFGNormalizer::insertToCFLGrammar ( CFGrammar grammar,
GrammarBase::Production prod 
)
private

Based on prod size to add on suitable member field of grammar.

Definition at line 474 of file CFGNormalizer.cpp.

475 {
476  if (prod.size() == 2)
477  {
478  if ((std::find(prod.begin(), prod.end(), grammar->strToKind("epsilon")) != prod.end()))
479  {
480  if (std::find(grammar->getEpsilonProds().begin(), grammar->getEpsilonProds().end(), prod) == grammar->getEpsilonProds().end())
481  {
482  grammar->getEpsilonProds().insert(prod);
483  }
484  }
485  else
486  {
487  grammar->getSingleRHSToProds()[prod[1]].insert(prod);
488  }
489  }
490  if (prod.size() == 3)
491  {
492  grammar->getFirstRHSToProds()[prod[1]].insert(prod);
493  grammar->getSecondRHSToProds()[prod[2]].insert(prod);
494  }
495 }
SymbolMap< Symbol, Productions > & getSecondRHSToProds()
Definition: CFGrammar.h:323
Productions & getEpsilonProds()
Definition: CFGrammar.h:308
SymbolMap< Symbol, Productions > & getSingleRHSToProds()
Definition: CFGrammar.h:313
SymbolMap< Symbol, Productions > & getFirstRHSToProds()
Definition: CFGrammar.h:318
Kind strToKind(std::string str) const
Definition: CFGrammar.cpp:55

◆ normalize()

CFGrammar * CFGNormalizer::normalize ( GrammarBase generalGrammar)

Binary Normal Form(BNF) normalization with variable attribute expanded.

Definition at line 42 of file CFGNormalizer.cpp.

43 {
44  CFGrammar *grammar = new CFGrammar();
45  grammar->setStartKind(generalGrammar->getStartKind());
46  grammar->setTerminals(generalGrammar->getTerminals());
47  grammar->setNonterminals(generalGrammar->getNonterminals());
48  grammar->setEBNFSigns(generalGrammar->getEBNFSigns());
49  grammar->setTotalKind(generalGrammar->getTotalKind());
50  grammar->setAttributeKinds(generalGrammar->getAttrSyms());
51  grammar->setKindToAttrsMap(generalGrammar->getKindToAttrsMap());
52  grammar->setRawProductions(generalGrammar->getRawProductions());
53  barReplace(grammar);
54  ebnfSignReplace('*', grammar);
55  ebnfSignReplace('?', grammar);
56  ebnf_bin(grammar);
57  fillAttribute(grammar, grammar->getKindToAttrsMap());
58  return grammar;
59 }
void barReplace(CFGrammar *grammar)
void ebnf_bin(CFGrammar *grammar)
Add nonterminal to tranfer long rules to binary rules.
void ebnfSignReplace(char sign, CFGrammar *grammar)
CFGrammar * fillAttribute(CFGrammar *grammar, const Map< CFGrammar::Kind, Set< CFGrammar::Attribute >> &kindToAttrsMap)
Expand every variable attribute in rawProductions of grammarbase.
Map< std::string, Kind > & getNonterminals()
Definition: CFGrammar.h:160
void setKindToAttrsMap(const Map< Kind, Set< Attribute >> &kindToAttrsMap)
Definition: CFGrammar.cpp:45
void setStartKind(Kind startKind)
Definition: CFGrammar.h:210
void setAttributeKinds(const Set< Kind > &attributeKind)
Definition: CFGrammar.cpp:50
const Set< Kind > & getAttrSyms() const
Definition: CFGrammar.h:243
void setRawProductions(SymbolMap< Symbol, Productions > &rawProductions)
Definition: CFGrammar.cpp:40
void setTotalKind(Kind totalKind)
Definition: CFGrammar.h:215
Map< std::string, Kind > & getTerminals()
Definition: CFGrammar.h:170
Map< std::string, Kind > & getEBNFSigns()
Definition: CFGrammar.h:180
Kind getTotalKind()
Definition: CFGrammar.h:200
Kind getStartKind()
Definition: CFGrammar.h:205
const Map< Kind, Set< Attribute > > & getKindToAttrsMap() const
Definition: CFGrammar.h:195
void setEBNFSigns(Map< std::string, Kind > &EBNFSigns)
Definition: CFGrammar.h:185
void setTerminals(Map< std::string, Kind > &terminals)
Definition: CFGrammar.h:175
void setNonterminals(Map< std::string, Kind > &nonterminals)
Definition: CFGrammar.h:165

◆ removeFirstSymbol()

void CFGNormalizer::removeFirstSymbol ( CFGrammar grammar)
private

Definition at line 497 of file CFGNormalizer.cpp.

498 {
499  // Remove First Terminal
500  for(auto head : grammar->getRawProductions())
501  {
502  for(auto rule: head.second)
503  {
504 
505  GrammarBase::Production long_run = rule;
506  long_run.erase(long_run.begin());
507  auto it = grammar->getRawProductions().at(head.first).find(rule);
508  grammar->getRawProductions().at(head.first).erase(it);
509  grammar->getRawProductions()[head.first].insert(long_run);
510  }
511  }
512 }

◆ strTrans()

void CFGNormalizer::strTrans ( std::string  strPro,
CFGrammar grammar,
GrammarBase::Production normalProd 
)
private

Definition at line 438 of file CFGNormalizer.cpp.

439 {
440  std::smatch matches;
441  std::regex LHSReg("\\s*(.*)");
442  std::string delimiter;
443  size_t pos;
444  std::string word;
445  std::regex_search(LHS, matches, LHSReg);
446  LHS = matches.str(1);
447  delimiter = " ";
448  while ((pos = LHS.find(delimiter)) != std::string::npos)
449  {
450  word = LHS.substr(0, pos);
451  LHS.erase(0, pos + delimiter.length());
452  normalProd.push_back(grammar->strToSymbol(word));
453  }
454  normalProd.push_back(grammar->strToSymbol(LHS));
455 }

The documentation for this class was generated from the following files: