Static Value-Flow Analysis
CFGNormalizer.cpp
Go to the documentation of this file.
1 //===----- CFGNormalizer.cpp -- Context Free Grammar Normalizer--------------//
2 //
3 // SVF: Static Value-Flow Analysis
4 //
5 // Copyright (C) <2013-> <Yulei Sui>
6 //
7 
8 // This program is free software: you can redistribute it and/or modify
9 // it under the terms of the GNU Affero General Public License as published by
10 // the Free Software Foundation, either version 3 of the License, or
11 // (at your option) any later version.
12 
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 // GNU Affero General Public License for more details.
17 
18 // You should have received a copy of the GNU Affero General Public License
19 // along with this program. If not, see <http://www.gnu.org/licenses/>.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 /*
24  * CFGNormalizer.cpp
25  *
26  * Created on: April 19 , 2022
27  * Author: Pei Xu
28  */
29 
30 #include "CFL/CFGNormalizer.h"
31 #include "Util/SVFUtil.h"
32 #include "Util/WorkList.h"
33 #include "SVFIR/SVFValue.h"
34 #include <string>
35 #include <regex>
36 #include <fstream>
37 #include <sstream>
38 #include <iostream>
39 
40 using namespace SVF;
41 
43 {
44  CFGrammar *grammar = new CFGrammar();
45  grammar->setStartKind(generalGrammar->getStartKind());
46  grammar->setTerminals(generalGrammar->getTerminals());
47  grammar->setNonterminals(generalGrammar->getNonterminals());
48  grammar->setEBNFSigns(generalGrammar->getEBNFSigns());
49  grammar->setTotalKind(generalGrammar->getTotalKind());
50  grammar->setAttributeKinds(generalGrammar->getAttrSyms());
51  grammar->setKindToAttrsMap(generalGrammar->getKindToAttrsMap());
52  grammar->setRawProductions(generalGrammar->getRawProductions());
53  barReplace(grammar);
54  ebnfSignReplace('*', grammar);
55  ebnfSignReplace('?', grammar);
56  ebnf_bin(grammar);
57  fillAttribute(grammar, grammar->getKindToAttrsMap());
58  return grammar;
59 }
60 
61 
63 {
64  NodeSet nodeSet = {};
65  for (auto pair: kindToAttrsMap)
66  {
67  for (auto attri: pair.second)
68  {
69  nodeSet.insert(attri);
70  }
71  }
72  for(auto symProdsPair: grammar->getRawProductions())
73  {
74  for(auto prod: symProdsPair.second)
75  {
78  GrammarBase::Production tempP = prod;
79  tempP.insert(tempP.begin(), symProdsPair.first);
80  GrammarBase::Productions normalProds;
81  getFilledProductions(tempP, nodeSet, grammar, normalProds);
82  for (auto filledProd : normalProds)
83  {
84  insertToCFLGrammar(grammar, filledProd);
85  }
86  }
87  }
88 
89  return grammar;
90 }
91 
93 {
95  std::string tempStr = "";
96  removeFirstSymbol(grammar);
97 
98  auto rawProductions = grammar->getRawProductions();
99 
100  for(auto itr : rawProductions)
101  {
102  auto head = *(grammar->getRawProductions().find(itr.first));
103  for(auto rule: head.second)
104  {
105  if (rule.size() < 3) continue;
106 
107  GrammarBase::Symbol first = rule[0];
108  GrammarBase::Production long_run(rule.begin() + 1, rule.end());
109  auto it = grammar->getRawProductions()[head.first].find(rule);
110  grammar->getRawProductions()[head.first].erase(it);
111  GrammarBase::Symbol X = check_head(new_grammar, long_run);
112  if (X == u32_t(-1))
113  {
114  X = check_head(grammar->getRawProductions(), long_run);
115  }
116  if ((X == u32_t(-1)) == false)
117  {
118  rule = {first, X};
119  grammar->getRawProductions()[head.first].insert(rule);
120  }
121  else
122  {
123  tempStr = "X";
124  std::ostringstream ss;
125  ss << grammar->num_generator();
126  tempStr.append(ss.str());
131  Set<GrammarBase::VariableAttribute> variableAttributeSet = {};
132  for (unsigned i = 0; i < long_run.size(); i++)
133  {
134  GrammarBase::VariableAttribute variableAttribute = long_run[i].variableAttribute;
135  if ( variableAttribute != 0)
136  {
137  variableAttributeSet.insert(variableAttribute);
138  }
139  }
140  if ( variableAttributeSet.size() == 1)
141  {
142  tempStr += "_";
143  tempStr += char(*variableAttributeSet.begin());
144  }
145  GrammarBase::Symbol tempSym = grammar->insertNonTerminalSymbol(tempStr);
146  rule = {first, tempSym};
147  grammar->getRawProductions()[head.first].insert(rule);
148  X = tempSym;
149  }
150  new_grammar[X] = {};
151  GrammarBase::Production temp_p = long_run;
153  if (long_run.size() ==2)
154  {
155  new_grammar[X].insert(temp_p);
156  long_run.clear();
157  }
158  else
159  {
160  new_grammar[X].insert(long_run);
161  RHX = X;
162  }
163  while (long_run.size() > 2)
164  {
165  first = long_run[0];
166  GrammarBase::Production prev_rule = long_run;
167  long_run.erase(long_run.begin());
168 
169  X = RHX;
170  temp_p = long_run;
171 
172  RHX = check_head(new_grammar, long_run);
173  if (RHX == u32_t(-1))
174  {
175  RHX = check_head(grammar->getRawProductions(), long_run);
176  }
177  if(RHX == u32_t(-1))
178  {
179  tempStr = "X";
180  std::ostringstream ss;
181  ss << grammar->num_generator();
182  tempStr.append(ss.str());
183  Set<GrammarBase::VariableAttribute> variableAttributeSet = {};
184  for (unsigned i = 0; i < long_run.size(); i++)
185  {
186  GrammarBase::VariableAttribute variableAttribute = long_run[i].variableAttribute;
187  if ( variableAttribute != 0)
188  {
189  variableAttributeSet.insert(variableAttribute);
190  }
191  }
192  if ( variableAttributeSet.size() == 1)
193  {
194  tempStr += "_";
195  tempStr += char(*variableAttributeSet.begin());
196  }
197  GrammarBase::Symbol tempSym = grammar->insertNonTerminalSymbol(tempStr);
198  auto it = new_grammar[X].find(prev_rule);
199  new_grammar[X].erase(it);
200  new_grammar[X].insert({first, tempSym});
201  new_grammar[tempSym].insert(long_run);
202  RHX = tempSym;
203  }
204  }
205  }
206  }
207  for (auto new_head : new_grammar)
208  {
209  for (auto prod : new_head.second)
210  {
211  auto it = grammar->getRawProductions()[new_head.first].find(prod);
212  if (it == grammar->getRawProductions()[new_head.first].end())
213  {
214  grammar->getRawProductions()[new_head.first].insert(prod);
215  }
216  }
217  }
218 }
219 
225 {
226  normalProds.clear();
228  worklist.push(prod);
229  while( worklist.empty() == false )
230  {
231  GrammarBase::Production currentProduction = worklist.pop();
233  GrammarBase::VariableAttribute currentVariableAttribute = 0;
234  // GrammarBase::Kind baseKind;
235  for ( GrammarBase::Symbol &symbol : currentProduction )
236  {
237  if ( currentVariableAttribute == 0 )
238  {
239  currentVariableAttribute = symbol.variableAttribute;
240  // baseKind = symbol.kind;
241  }
242  }
243  if ( currentVariableAttribute == 0)
244  {
245  normalProds.insert(currentProduction);
246  continue;
247  }
248  //*(kindToAttriMap.find(baseKind));
249  //for (auto attribute : nodeSet.second)
250  for (auto attribute : nodeSet)
251  {
252  GrammarBase::Production fillingProduction = currentProduction;
253  for ( GrammarBase::Symbol &symbol : fillingProduction )
254  {
255  if ( symbol.variableAttribute == currentVariableAttribute)
256  {
257  symbol.attribute = attribute;
258  symbol.variableAttribute = 0;
259  }
260  }
262  bool continueToFill = false;
263  for ( GrammarBase::Symbol &symbol : fillingProduction )
264  {
265  if ( symbol.variableAttribute != 0 )
266  {
267  continueToFill = true;
268  }
269  }
270  if ( continueToFill == false)
271  {
272  normalProds.insert(fillingProduction);
273  }
274  else
275  {
276  worklist.push(fillingProduction);
277  }
278  }
279  }
280 }
281 
283 {
284  int index = i;
285  while (index >= 0)
286  {
287  if (grammar->kindToStr(prod[index].kind) == "(")
288  {
289  return index;
290  }
291  index--;
292  }
293  return 0;
294 }
295 
297 {
298  for (auto &symbolToProductionsPair : grammar->getRawProductions())
299  {
300  GrammarBase::Productions productions;
301  //GrammarBase::Productions Originalproductions = symbolToProductionsPair.second;
302  for (auto ebnfProduction : symbolToProductionsPair.second)
303  {
304  size_t i = 1;
305  size_t j = 1;
306  while (i < ebnfProduction.size())
307  {
308  if (grammar->kindToStr(ebnfProduction[i].kind) == "|")
309  {
310  GrammarBase::Production tempPro(ebnfProduction.begin()+j, ebnfProduction.begin()+i);
311  tempPro.insert(tempPro.begin(), symbolToProductionsPair.first );
312  productions.insert(tempPro);
313  j = i+1;
314  }
315  i++;
316  }
317  GrammarBase::Production tempPro(ebnfProduction.begin()+j, ebnfProduction.begin()+i);
318  tempPro.insert(tempPro.begin(), symbolToProductionsPair.first );
319  productions.insert(tempPro);
320  }
321  symbolToProductionsPair.second.clear();
322  symbolToProductionsPair.second = productions;
323  }
324 }
325 
326 void CFGNormalizer::ebnfSignReplace(char sign, CFGrammar *grammar)
327 {
330  SVF::Map<std::string, std::string> newProductions;
331  std::string tempNonterminal = "X";
332 
333  for (auto &symbolToProductionsPair : grammar->getRawProductions())
334  {
335  GrammarBase::Productions productions = symbolToProductionsPair.second;
336  for (auto ebnfProduction : symbolToProductionsPair.second)
337  {
338  size_t i = 1;
339  while (i < ebnfProduction.size())
340  {
341  s32_t signGroupStart = -1;
342  if (grammar->kindToStr(ebnfProduction[i].kind) == std::string(1, sign))
343  {
345  assert(i != 1 && "sign in grammar associate with no symbol");
346  if (grammar->kindToStr(ebnfProduction[i - 1].kind) != std::string(1, ')'))
347  {
348  signGroupStart = i - 1;
349  }
351  else
352  {
353  signGroupStart = ebnfBracketMatch(ebnfProduction, i, grammar);
354  }
355  std::string groupString = "";
356  for (size_t j = signGroupStart; j < i; j++)
357  {
358  groupString.append(grammar->kindToStr(ebnfProduction[j].kind));
359  groupString.append(" ");
360  }
361  groupString.append(grammar->kindToStr(ebnfProduction[i].kind));
362  if (newProductions.find(groupString) != newProductions.end())
363  {
364  productions.erase(ebnfProduction);
365  ebnfProduction.erase(ebnfProduction.begin() + signGroupStart, ebnfProduction.begin() + i + 1);
366  ebnfProduction.insert(ebnfProduction.begin() + signGroupStart, grammar->strToSymbol(newProductions[groupString]));
367  productions.insert(ebnfProduction);
368  }
369  else if ( (signGroupStart == 1) && (i == ebnfProduction.size() -1))
370  {
371  newProductions[groupString] = grammar->kindToStr(ebnfProduction[0].kind);
372  productions.erase(ebnfProduction);
373  ebnfProduction.erase(ebnfProduction.begin() + signGroupStart, ebnfProduction.begin() + i + 1);
374 
375  }
376  else
377  {
378  tempNonterminal = "X";
379  std::ostringstream ss;
380  ss << grammar->num_generator();
381  tempNonterminal.append(ss.str());
382  GrammarBase::Symbol tempSym = grammar->insertNonTerminalSymbol(tempNonterminal);
383  productions.erase(ebnfProduction);
384  ebnfProduction.erase(ebnfProduction.begin() + signGroupStart, ebnfProduction.begin() + i + 1);
385  ebnfProduction.insert(ebnfProduction.begin() + signGroupStart, tempSym);
386  newProductions[groupString] = tempNonterminal;
387  productions.insert(ebnfProduction);
388  }
389 
390  i = signGroupStart;
391  }
392  i++;
393  }
394  }
395  symbolToProductionsPair.second = productions;
396  }
397  for(auto rep: newProductions)
398  {
400  std::string new_nonterminal = rep.second;
401  GrammarBase::Production temp_list = {grammar->strToSymbol(new_nonterminal), grammar->strToSymbol("epsilon")};
402  grammar->getRawProductions()[grammar->strToSymbol(new_nonterminal)].insert(temp_list);
404  temp_list = {grammar->strToSymbol(new_nonterminal)};
405  if (sign == '*' || sign == '?')
406  {
408  GrammarBase::Production normalProd;
409  strTrans(rep.first, grammar, normalProd);
410  GrammarBase::Production withoutSign = {};
411  if (sign == '*')
412  {
413  for (auto &word : normalProd)
414  {
415  if (word != grammar->strToSymbol("*") && word != grammar->strToSymbol("(") && word != grammar->strToSymbol(")"))
416  {
417  withoutSign.push_back(word);
418  }
419  }
420  withoutSign.push_back(grammar->strToSymbol(rep.second));
421  }
422  if (sign == '?')
423  {
424  for (auto &word : normalProd)
425  {
426  if (word != grammar->strToSymbol("?") && word != grammar->strToSymbol("(") && word != grammar->strToSymbol(")"))
427  {
428  withoutSign.push_back(word);
429  }
430  }
431  }
432  temp_list.insert(temp_list.end(), withoutSign.begin(), withoutSign.end());
433  }
434  grammar->getRawProductions()[grammar->strToSymbol(new_nonterminal)].insert(temp_list);
435  }
436 }
437 
439 {
440  std::smatch matches;
441  std::regex LHSReg("\\s*(.*)");
442  std::string delimiter;
443  size_t pos;
444  std::string word;
445  std::regex_search(LHS, matches, LHSReg);
446  LHS = matches.str(1);
447  delimiter = " ";
448  while ((pos = LHS.find(delimiter)) != std::string::npos)
449  {
450  word = LHS.substr(0, pos);
451  LHS.erase(0, pos + delimiter.length());
452  normalProd.push_back(grammar->strToSymbol(word));
453  }
454  normalProd.push_back(grammar->strToSymbol(LHS));
455 }
456 
458 {
459  for(auto symProdPair: grammar)
460  {
461  for(auto prod: symProdPair.second)
462  {
463  if (rule == prod)
464  {
465  return symProdPair.first;
466  }
467  }
468  }
469  GrammarBase::Symbol symbol = u32_t(-1);
470  return symbol;
471 }
472 
475 {
476  if (prod.size() == 2)
477  {
478  if ((std::find(prod.begin(), prod.end(), grammar->strToKind("epsilon")) != prod.end()))
479  {
480  if (std::find(grammar->getEpsilonProds().begin(), grammar->getEpsilonProds().end(), prod) == grammar->getEpsilonProds().end())
481  {
482  grammar->getEpsilonProds().insert(prod);
483  }
484  }
485  else
486  {
487  grammar->getSingleRHSToProds()[prod[1]].insert(prod);
488  }
489  }
490  if (prod.size() == 3)
491  {
492  grammar->getFirstRHSToProds()[prod[1]].insert(prod);
493  grammar->getSecondRHSToProds()[prod[2]].insert(prod);
494  }
495 }
496 
498 {
499  // Remove First Terminal
500  for(auto head : grammar->getRawProductions())
501  {
502  for(auto rule: head.second)
503  {
504 
505  GrammarBase::Production long_run = rule;
506  long_run.erase(long_run.begin());
507  auto it = grammar->getRawProductions().at(head.first).find(rule);
508  grammar->getRawProductions().at(head.first).erase(it);
509  grammar->getRawProductions()[head.first].insert(long_run);
510  }
511  }
512 }
int index
Definition: cJSON.h:170
const char *const string
Definition: cJSON.h:172
void barReplace(CFGrammar *grammar)
void ebnf_bin(CFGrammar *grammar)
Add nonterminal to tranfer long rules to binary rules.
void ebnfSignReplace(char sign, CFGrammar *grammar)
void insertToCFLGrammar(CFGrammar *grammar, GrammarBase::Production &prod)
Based on prod size to add on suitable member field of grammar.
void removeFirstSymbol(CFGrammar *grammar)
CFGrammar * normalize(GrammarBase *generalGrammar)
Binary Normal Form(BNF) normalization with variable attribute expanded.
int ebnfBracketMatch(GrammarBase::Production &prod, int i, CFGrammar *grammar)
CFGrammar * fillAttribute(CFGrammar *grammar, const Map< CFGrammar::Kind, Set< CFGrammar::Attribute >> &kindToAttrsMap)
Expand every variable attribute in rawProductions of grammarbase.
void getFilledProductions(GrammarBase::Production &prod, const NodeSet &nodeSet, CFGrammar *grammar, GrammarBase::Productions &normalProds)
void strTrans(std::string strPro, CFGrammar *grammar, GrammarBase::Production &normalProd)
GrammarBase::Symbol check_head(GrammarBase::SymbolMap< GrammarBase::Symbol, GrammarBase::Productions > &grammar, GrammarBase::Production &rule)
SymbolMap< Symbol, Productions > & getSecondRHSToProds()
Definition: CFGrammar.h:323
const u32_t num_generator()
Definition: CFGrammar.h:388
Productions & getEpsilonProds()
Definition: CFGrammar.h:308
SymbolMap< Symbol, Productions > & getSingleRHSToProds()
Definition: CFGrammar.h:313
SymbolMap< Symbol, Productions > & getFirstRHSToProds()
Definition: CFGrammar.h:318
bool empty() const
Definition: CFGrammar.h:416
bool push(Data data)
Definition: CFGrammar.h:429
Map< std::string, Kind > & getNonterminals()
Definition: CFGrammar.h:160
void setKindToAttrsMap(const Map< Kind, Set< Attribute >> &kindToAttrsMap)
Definition: CFGrammar.cpp:45
void setStartKind(Kind startKind)
Definition: CFGrammar.h:210
void setAttributeKinds(const Set< Kind > &attributeKind)
Definition: CFGrammar.cpp:50
const Set< Kind > & getAttrSyms() const
Definition: CFGrammar.h:243
void setRawProductions(SymbolMap< Symbol, Productions > &rawProductions)
Definition: CFGrammar.cpp:40
void setTotalKind(Kind totalKind)
Definition: CFGrammar.h:215
Kind strToKind(std::string str) const
Definition: CFGrammar.cpp:55
std::unordered_map< Key, Value, Hash, KeyEqual, Allocator > SymbolMap
Definition: CFGrammar.h:150
Map< std::string, Kind > & getTerminals()
Definition: CFGrammar.h:170
SymbolMap< Symbol, Productions > & getRawProductions()
Definition: CFGrammar.h:190
Symbol strToSymbol(const std::string str) const
Definition: CFGrammar.cpp:74
Map< std::string, Kind > & getEBNFSigns()
Definition: CFGrammar.h:180
Symbol insertNonTerminalSymbol(std::string strLit)
Definition: CFGrammar.cpp:249
u32_t VariableAttribute
Definition: CFGrammar.h:41
SymbolSet< Production > Productions
Definition: CFGrammar.h:157
Kind getTotalKind()
Definition: CFGrammar.h:200
Kind getStartKind()
Definition: CFGrammar.h:205
std::string kindToStr(Kind kind) const
Definition: CFGrammar.cpp:103
const Map< Kind, Set< Attribute > > & getKindToAttrsMap() const
Definition: CFGrammar.h:195
void setEBNFSigns(Map< std::string, Kind > &EBNFSigns)
Definition: CFGrammar.h:185
void setTerminals(Map< std::string, Kind > &terminals)
Definition: CFGrammar.h:175
std::vector< Symbol > Production
Definition: CFGrammar.h:156
void setNonterminals(Map< std::string, Kind > &nonterminals)
Definition: CFGrammar.h:165
for isBitcode
Definition: BasicTypes.h:68
Set< NodeID > NodeSet
Definition: GeneralType.h:113
std::unordered_map< Key, Value, Hash, KeyEqual, Allocator > Map
Definition: GeneralType.h:101
signed s32_t
Definition: GeneralType.h:47
unsigned u32_t
Definition: GeneralType.h:46
std::unordered_set< Key, Hash, KeyEqual, Allocator > Set
Definition: GeneralType.h:96