1 /*------------------------------------------------------------------------- 2 ParserGen.cs -- Generation of the Recursive Descent Parser 3 Compiler Generator Coco/R, 4 Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz 5 extended by M. Loeberbauer & A. Woess, Univ. of Linz 6 with improvements by Pat Terry, Rhodes University 7 8 This program is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published by the 10 Free Software Foundation; either version 2, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License along 19 with this program; if not, write to the Free Software Foundation, Inc., 20 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 21 22 As an exception, it is allowed to write an extension of Coco/R that is 23 used as a plugin in non-free software. 24 25 If not otherwise stated, any source code generated by Coco/R (other than 26 Coco/R itself) does not fall under the GNU General Public License. 27 -------------------------------------------------------------------------*/ 28 using System; 29 using System.IO; 30 using System.Collections; 31 using System.Text; 32 33 namespace at.jku.ssw.Coco { 34 35 public class ParserGen { 36 37 const int maxTerm = 3; // sets of size < maxTerm are enumerated 38 const char CR = ' '; 39 const char LF = ' '; 40 const int EOF = -1; 41 42 const int tErr = 0; // error codes 43 const int altErr = 1; 44 const int syncErr = 2; 45 46 public Position usingPos; // "using" definitions from the attributed grammar 47 48 int errorNr; // highest parser error number 49 Symbol curSy; // symbol whose production is currently generated 50 FileStream fram; // parser frame file 51 StreamWriter gen; // generated parser source file 52 StringWriter err; // generated parser error messages 53 ArrayList symSet = new ArrayList(); 54 55 Tab tab; // other Coco objects 56 TextWriter trace; 57 Errors errors; 58 Buffer buffer; 59 60 void Indent (int n) { 61 for (int i = 1; i <= n; i++) gen.Write(' '); 62 } 63 64 65 bool Overlaps(BitArray s1, BitArray s2) { 66 int len = s1.Count; 67 for (int i = 0; i < len; ++i) { 68 if (s1[i] && s2[i]) { 69 return true; 70 } 71 } 72 return false; 73 } 74 75 // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning 76 bool UseSwitch (Node p) { 77 BitArray s1, s2; 78 if (p.typ != Node.alt) return false; 79 int nAlts = 0; 80 s1 = new BitArray(tab.terminals.Count); 81 while (p != null) { 82 s2 = tab.Expected0(p.sub, curSy); 83 // must not optimize with switch statement, if there are ll1 warnings 84 if (Overlaps(s1, s2)) { return false; } 85 s1.Or(s2); 86 ++nAlts; 87 // must not optimize with switch-statement, if alt uses a resolver expression 88 if (p.sub.typ == Node.rslv) return false; 89 p = p.down; 90 } 91 return nAlts > 5; 92 } 93 94 void CopySourcePart (Position pos, int indent) { 95 // Copy text described by pos from atg to gen 96 int ch, i; 97 if (pos != null) { 98 buffer.Pos = pos.beg; ch = buffer.Read(); 99 if (tab.emitLines) { 100 gen.WriteLine(); 101 gen.WriteLine("#line {0} "{1}"", pos.line, tab.srcName); 102 } 103 Indent(indent); 104 while (buffer.Pos <= pos.end) { 105 while (ch == CR || ch == LF) { // eol is either CR or CRLF or LF 106 gen.WriteLine(); Indent(indent); 107 if (ch == CR) ch = buffer.Read(); // skip CR 108 if (ch == LF) ch = buffer.Read(); // skip LF 109 for (i = 1; i <= pos.col && (ch == ' ' || ch == ' '); i++) { 110 // skip blanks at beginning of line 111 ch = buffer.Read(); 112 } 113 if (buffer.Pos > pos.end) goto done; 114 } 115 gen.Write((char)ch); 116 ch = buffer.Read(); 117 } 118 done: 119 if (indent > 0) gen.WriteLine(); 120 } 121 } 122 123 void GenErrorMsg (int errTyp, Symbol sym) { 124 errorNr++; 125 err.Write(" case " + errorNr + ": s = ""); 126 switch (errTyp) { 127 case tErr: 128 if (sym.name[0] == '"') err.Write(tab.Escape(sym.name) + " expected"); 129 else err.Write(sym.name + " expected"); 130 break; 131 case altErr: err.Write("invalid " + sym.name); break; 132 case syncErr: err.Write("this symbol not expected in " + sym.name); break; 133 } 134 err.WriteLine(""; break;"); 135 } 136 137 int NewCondSet (BitArray s) { 138 for (int i = 1; i < symSet.Count; i++) // skip symSet[0] (reserved for union of SYNC sets) 139 if (Sets.Equals(s, (BitArray)symSet[i])) return i; 140 symSet.Add(s.Clone()); 141 return symSet.Count - 1; 142 } 143 144 void GenCond (BitArray s, Node p) { 145 if (p.typ == Node.rslv) CopySourcePart(p.pos, 0); 146 else { 147 int n = Sets.Elements(s); 148 if (n == 0) gen.Write("false"); // happens if an ANY set matches no symbol 149 else if (n <= maxTerm) 150 foreach (Symbol sym in tab.terminals) { 151 if (s[sym.n]) { 152 gen.Write("la.kind == {0}", sym.n); 153 --n; 154 if (n > 0) gen.Write(" || "); 155 } 156 } 157 else 158 gen.Write("StartOf({0})", NewCondSet(s)); 159 } 160 } 161 162 void PutCaseLabels (BitArray s) { 163 foreach (Symbol sym in tab.terminals) 164 if (s[sym.n]) gen.Write("case {0}: ", sym.n); 165 } 166 167 void GenCode (Node p, int indent, BitArray isChecked) { 168 Node p2; 169 BitArray s1, s2; 170 while (p != null) { 171 switch (p.typ) { 172 case Node.nt: { 173 Indent(indent); 174 gen.Write(p.sym.name + "("); 175 CopySourcePart(p.pos, 0); 176 gen.WriteLine(");"); 177 break; 178 } 179 case Node.t: { 180 Indent(indent); 181 // assert: if isChecked[p.sym.n] is true, then isChecked contains only p.sym.n 182 if (isChecked[p.sym.n]) gen.WriteLine("Get();"); 183 else gen.WriteLine("Expect({0});", p.sym.n); 184 break; 185 } 186 case Node.wt: { 187 Indent(indent); 188 s1 = tab.Expected(p.next, curSy); 189 s1.Or(tab.allSyncSets); 190 gen.WriteLine("ExpectWeak({0}, {1});", p.sym.n, NewCondSet(s1)); 191 break; 192 } 193 case Node.any: { 194 Indent(indent); 195 int acc = Sets.Elements(p.set); 196 if (tab.terminals.Count == (acc + 1) || (acc > 0 && Sets.Equals(p.set, isChecked))) { 197 // either this ANY accepts any terminal (the + 1 = end of file), or exactly what's allowed here 198 gen.WriteLine("Get();"); 199 } else { 200 GenErrorMsg(altErr, curSy); 201 if (acc > 0) { 202 gen.Write("if ("); GenCond(p.set, p); gen.WriteLine(") Get(); else SynErr({0});", errorNr); 203 } else gen.WriteLine("SynErr({0}); // ANY node that matches no symbol", errorNr); 204 } 205 break; 206 } 207 case Node.eps: break; // nothing 208 case Node.rslv: break; // nothing 209 case Node.sem: { 210 CopySourcePart(p.pos, indent); 211 break; 212 } 213 case Node.sync: { 214 Indent(indent); 215 GenErrorMsg(syncErr, curSy); 216 s1 = (BitArray)p.set.Clone(); 217 gen.Write("while (!("); GenCond(s1, p); gen.Write(")) {"); 218 gen.Write("SynErr({0}); Get();", errorNr); gen.WriteLine("}"); 219 break; 220 } 221 case Node.alt: { 222 s1 = tab.First(p); 223 bool equal = Sets.Equals(s1, isChecked); 224 bool useSwitch = UseSwitch(p); 225 if (useSwitch) { Indent(indent); gen.WriteLine("switch (la.kind) {"); } 226 p2 = p; 227 while (p2 != null) { 228 s1 = tab.Expected(p2.sub, curSy); 229 Indent(indent); 230 if (useSwitch) { 231 PutCaseLabels(s1); gen.WriteLine("{"); 232 } else if (p2 == p) { 233 gen.Write("if ("); GenCond(s1, p2.sub); gen.WriteLine(") {"); 234 } else if (p2.down == null && equal) { gen.WriteLine("} else {"); 235 } else { 236 gen.Write("} else if ("); GenCond(s1, p2.sub); gen.WriteLine(") {"); 237 } 238 GenCode(p2.sub, indent + 1, s1); 239 if (useSwitch) { 240 Indent(indent); gen.WriteLine(" break;"); 241 Indent(indent); gen.WriteLine("}"); 242 } 243 p2 = p2.down; 244 } 245 Indent(indent); 246 if (equal) { 247 gen.WriteLine("}"); 248 } else { 249 GenErrorMsg(altErr, curSy); 250 if (useSwitch) { 251 gen.WriteLine("default: SynErr({0}); break;", errorNr); 252 Indent(indent); gen.WriteLine("}"); 253 } else { 254 gen.Write("} "); gen.WriteLine("else SynErr({0});", errorNr); 255 } 256 } 257 break; 258 } 259 case Node.iter: { 260 Indent(indent); 261 p2 = p.sub; 262 gen.Write("while ("); 263 if (p2.typ == Node.wt) { 264 s1 = tab.Expected(p2.next, curSy); 265 s2 = tab.Expected(p.next, curSy); 266 gen.Write("WeakSeparator({0},{1},{2}) ", p2.sym.n, NewCondSet(s1), NewCondSet(s2)); 267 s1 = new BitArray(tab.terminals.Count); // for inner structure 268 if (p2.up || p2.next == null) p2 = null; else p2 = p2.next; 269 } else { 270 s1 = tab.First(p2); 271 GenCond(s1, p2); 272 } 273 gen.WriteLine(") {"); 274 GenCode(p2, indent + 1, s1); 275 Indent(indent); gen.WriteLine("}"); 276 break; 277 } 278 case Node.opt: 279 s1 = tab.First(p.sub); 280 Indent(indent); 281 gen.Write("if ("); GenCond(s1, p.sub); gen.WriteLine(") {"); 282 GenCode(p.sub, indent + 1, s1); 283 Indent(indent); gen.WriteLine("}"); 284 break; 285 } 286 if (p.typ != Node.eps && p.typ != Node.sem && p.typ != Node.sync) 287 isChecked.SetAll(false); // = new BitArray(tab.terminals.Count); 288 if (p.up) break; 289 p = p.next; 290 } 291 } 292 293 void GenTokens() { 294 foreach (Symbol sym in tab.terminals) { 295 if (Char.IsLetter(sym.name[0])) 296 gen.WriteLine(" public const int _{0} = {1};", sym.name, sym.n); 297 } 298 } 299 300 void GenPragmas() { 301 foreach (Symbol sym in tab.pragmas) { 302 gen.WriteLine(" public const int _{0} = {1};", sym.name, sym.n); 303 } 304 } 305 306 void GenCodePragmas() { 307 foreach (Symbol sym in tab.pragmas) { 308 gen.WriteLine(" if (la.kind == {0}) {{", sym.n); 309 CopySourcePart(sym.semPos, 4); 310 gen.WriteLine(" }"); 311 } 312 } 313 314 void GenProductions() { 315 foreach (Symbol sym in tab.nonterminals) { 316 curSy = sym; 317 gen.Write(" void {0}(", sym.name); 318 CopySourcePart(sym.attrPos, 0); 319 gen.WriteLine(") {"); 320 CopySourcePart(sym.semPos, 2); 321 GenCode(sym.graph, 2, new BitArray(tab.terminals.Count)); 322 gen.WriteLine(" }"); gen.WriteLine(); 323 } 324 } 325 326 void InitSets() { 327 for (int i = 0; i < symSet.Count; i++) { 328 BitArray s = (BitArray)symSet[i]; 329 gen.Write(" {"); 330 int j = 0; 331 foreach (Symbol sym in tab.terminals) { 332 if (s[sym.n]) gen.Write("_T,"); else gen.Write("_x,"); 333 ++j; 334 if (j%4 == 0) gen.Write(" "); 335 } 336 if (i == symSet.Count-1) gen.WriteLine("_x}"); else gen.WriteLine("_x},"); 337 } 338 } 339 340 public void WriteParser () { 341 Generator g = new Generator(tab); 342 int oldPos = buffer.Pos; // Pos is modified by CopySourcePart 343 symSet.Add(tab.allSyncSets); 344 345 fram = g.OpenFrame("Parser.frame"); 346 gen = g.OpenGen("Parser.cs"); 347 err = new StringWriter(); 348 foreach (Symbol sym in tab.terminals) GenErrorMsg(tErr, sym); 349 350 g.GenCopyright(); 351 g.SkipFramePart("-->begin"); 352 353 if (usingPos != null) { CopySourcePart(usingPos, 0); gen.WriteLine(); } 354 g.CopyFramePart("-->namespace"); 355 /* AW open namespace, if it exists */ 356 if (tab.nsName != null && tab.nsName.Length > 0) { 357 gen.WriteLine("namespace {0} {{", tab.nsName); 358 gen.WriteLine(); 359 } 360 g.CopyFramePart("-->constants"); 361 GenTokens(); /* ML 2002/09/07 write the token kinds */ 362 gen.WriteLine(" public const int maxT = {0};", tab.terminals.Count-1); 363 GenPragmas(); /* ML 2005/09/23 write the pragma kinds */ 364 g.CopyFramePart("-->declarations"); CopySourcePart(tab.semDeclPos, 0); 365 g.CopyFramePart("-->pragmas"); GenCodePragmas(); 366 g.CopyFramePart("-->productions"); GenProductions(); 367 g.CopyFramePart("-->parseRoot"); gen.WriteLine(" {0}();", tab.gramSy.name); if (tab.checkEOF) gen.WriteLine(" Expect(0);"); 368 g.CopyFramePart("-->initialization"); InitSets(); 369 g.CopyFramePart("-->errors"); gen.Write(err.ToString()); 370 g.CopyFramePart(null); 371 /* AW 2002-12-20 close namespace, if it exists */ 372 if (tab.nsName != null && tab.nsName.Length > 0) gen.Write("}"); 373 gen.Close(); 374 buffer.Pos = oldPos; 375 } 376 377 public void WriteStatistics () { 378 trace.WriteLine(); 379 trace.WriteLine("{0} terminals", tab.terminals.Count); 380 trace.WriteLine("{0} symbols", tab.terminals.Count + tab.pragmas.Count + 381 tab.nonterminals.Count); 382 trace.WriteLine("{0} nodes", tab.nodes.Count); 383 trace.WriteLine("{0} sets", symSet.Count); 384 } 385 386 public ParserGen (Parser parser) { 387 tab = parser.tab; 388 errors = parser.errors; 389 trace = parser.trace; 390 buffer = parser.scanner.buffer; 391 errorNr = -1; 392 usingPos = null; 393 } 394 395 } // end ParserGen 396 397 } // end namespace