CR.mod 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. (* CR Main Module of Coco/R
  2. == =====================
  3. This is a compiler generator that produces a scanner and a parser
  4. from an attributed grammar, and optionally a complete small compiler.
  5. Original code in Oberon by Hanspeter Moessenboeck, ETH Zurich
  6. Usage:
  7. COCOR [-options] GrammarName[.atg] [$options]
  8. Input:
  9. attributed grammar input grammar
  10. scanner.frm frame file
  11. parser.frm frame file
  12. compiler.frm frame file (optional)
  13. (Frame files must be in the sme directory as the grammar, or may be
  14. found on a path specified by DOS environment variable CRFRAMES).
  15. Output:
  16. <GrammarName>S.def + mod generated scanner
  17. <GrammarName>P.def + mod generated parser
  18. <GrammarName>.err error numbers and corresponding error messages
  19. <GrammarName>.lst source listing with error messages and trace output
  20. Optionally
  21. <GrammarName>G.def + mod generated symbolic names
  22. <GrammarName>.mod generated compiler main module
  23. Implementation restrictions
  24. 1 too many nodes in graph (>1500) CRT.NewNode
  25. 2 too many symbols (>500) CRT.NewSym, MovePragmas
  26. 3 too many sets (>256 ANY-syms or SYNC syms) CRT.NewSet,
  27. 4 too many character classes (>250) CRT.NewClass
  28. 5 too many conditions in generated code (>100) CRX.NewCondSet
  29. 6 too many token names in "NAMES" (>100) CRT.NewName
  30. 7 too many states in automata (>500) CRA.NewState
  31. Trace output
  32. (To activate a trace switch, write "${letter}" in the input grammar, or
  33. invoke Coco with a second command line parameter)
  34. A Prints states of automaton
  35. C Generates complete compiler module
  36. D Suppresses Def Mod generation
  37. F Prints start symbols and followers of nonterminals.
  38. G Prints the top-down graph.
  39. I Trace of start symbol set computation.
  40. L Forces a listing (otherwise a listing is only printed if errors are found).
  41. M Suppresses FORWARD declarations in parser (for multipass compilers).
  42. N Uses default names for symbol value constants. This generates an
  43. extra module <grammar name>G, and corresponding import statements
  44. using constant names instead of numbers for symbols in parser and
  45. scanner.
  46. The constants are used unqualified and hence all needed constants
  47. have to be imported; so a complete import list for these constants
  48. is generated.
  49. There is no decision whether a constant is actually needed.
  50. The default conventions are (only terminals or pragmas can have names):
  51. single character --> <ASCII name (lowercase)>Sym
  52. eg. "+" --> plusSym
  53. character string --> <string>Sym
  54. eg. "PROGRAM" --> PROGRAMSym
  55. scanner token --> <token name>Sym
  56. eg. ident --> identSym
  57. O Trace of follow set computation (not yet implemented).
  58. P Generates parser only
  59. S Prints the symbol list.
  60. T Suppresses generation of def and mod files (grammar tests only).
  61. X Prints a cross reference list.
  62. ==========================================================================*)
  63. MODULE CR;
  64. FROM CRS IMPORT lst, src, errors, directory, Error, CharAt;
  65. FROM CRP IMPORT Parse;
  66. IMPORT CRC, CRT, CRA, CRP, CRS, CRX, FileIO, Storage;
  67. IMPORT SYSTEM (* for TSIZE only *);
  68. CONST
  69. ATGExt = ".atg";
  70. LSTExt = ".lst";
  71. Version = "1.53";
  72. ReleaseDate = "17 September 2002";
  73. TYPE
  74. INT32 = FileIO.INT32;
  75. VAR
  76. Options,
  77. GrammarName,
  78. ATGFileName,
  79. lstFileName: ARRAY [0 .. 63] OF CHAR;
  80. ll1: BOOLEAN; (* TRUE, if grammar is LL(1) *)
  81. ok: BOOLEAN; (* TRUE, if grammar tests ok so far *)
  82. MODULE ListHandler;
  83. (* ------------------- Source Listing and Error handler -------------- *)
  84. IMPORT FileIO, Storage, SYSTEM;
  85. IMPORT lst, CharAt, errors, INT32;
  86. EXPORT StoreError, PrintListing;
  87. TYPE
  88. Err = POINTER TO ErrDesc;
  89. ErrDesc = RECORD
  90. nr, line, col: INTEGER;
  91. next: Err
  92. END;
  93. CONST
  94. tab = 11C;
  95. VAR
  96. firstErr, lastErr: Err;
  97. Extra: INTEGER;
  98. PROCEDURE StoreError (nr, line, col: INTEGER; pos: INT32);
  99. (* Store an error message for later printing *)
  100. VAR
  101. nextErr: Err;
  102. BEGIN
  103. Storage.ALLOCATE(nextErr, SYSTEM.TSIZE(ErrDesc));
  104. nextErr^.nr := nr; nextErr^.line := line; nextErr^.col := col;
  105. nextErr^.next := NIL;
  106. IF firstErr = NIL
  107. THEN firstErr := nextErr
  108. ELSE lastErr^.next := nextErr
  109. END;
  110. lastErr := nextErr;
  111. INC(errors)
  112. END StoreError;
  113. PROCEDURE GetLine (VAR pos: INT32;
  114. VAR line: ARRAY OF CHAR;
  115. VAR eof: BOOLEAN);
  116. (* Read a source line. Return empty line if eof *)
  117. VAR
  118. ch: CHAR;
  119. i: CARDINAL;
  120. BEGIN
  121. i := 0; eof := FALSE; ch := CharAt(pos); INC(pos);
  122. WHILE (ch # FileIO.CR) & (ch # FileIO.LF) & (ch # FileIO.EOF) DO
  123. line[i] := ch; INC(i); ch := CharAt(pos); INC(pos);
  124. END;
  125. eof := (i = 0) & (ch = FileIO.EOF); line[i] := 0C;
  126. IF ch = FileIO.CR THEN (* check for MsDos *)
  127. ch := CharAt(pos);
  128. IF ch = FileIO.LF THEN INC(pos); Extra := 0 END
  129. END
  130. END GetLine;
  131. PROCEDURE PrintErr (line: ARRAY OF CHAR; nr, col: INTEGER);
  132. (* Print an error message *)
  133. PROCEDURE Msg (s: ARRAY OF CHAR);
  134. BEGIN
  135. FileIO.WriteString(lst, s)
  136. END Msg;
  137. PROCEDURE Pointer;
  138. VAR
  139. i: INTEGER;
  140. BEGIN
  141. FileIO.WriteString(lst, "***** ");
  142. i := 0;
  143. WHILE i < col + Extra - 2 DO
  144. IF line[i] = tab
  145. THEN FileIO.Write(lst, tab)
  146. ELSE FileIO.Write(lst, ' ')
  147. END;
  148. INC(i)
  149. END;
  150. FileIO.WriteString(lst, "^ ")
  151. END Pointer;
  152. BEGIN
  153. Pointer;
  154. CASE nr OF
  155. 0: Msg("EOF expected")
  156. | 1: Msg("ident expected")
  157. | 2: Msg("string expected")
  158. | 3: Msg("badstring expected")
  159. | 4: Msg("number expected")
  160. | 5: Msg("'COMPILER' expected")
  161. | 6: Msg("'PRODUCTIONS' expected")
  162. | 7: Msg("'=' expected")
  163. | 8: Msg("'.' expected")
  164. | 9: Msg("'END' expected")
  165. | 10: Msg("'CHARACTERS' expected")
  166. | 11: Msg("'TOKENS' expected")
  167. | 12: Msg("'NAMES' expected")
  168. | 13: Msg("'PRAGMAS' expected")
  169. | 14: Msg("'COMMENTS' expected")
  170. | 15: Msg("'FROM' expected")
  171. | 16: Msg("'TO' expected")
  172. | 17: Msg("'NESTED' expected")
  173. | 18: Msg("'IGNORE' expected")
  174. | 19: Msg("'CASE' expected")
  175. | 20: Msg("'+' expected")
  176. | 21: Msg("'-' expected")
  177. | 22: Msg("'..' expected")
  178. | 23: Msg("'ANY' expected")
  179. | 24: Msg("'CHR' expected")
  180. | 25: Msg("'(' expected")
  181. | 26: Msg("')' expected")
  182. | 27: Msg("'|' expected")
  183. | 28: Msg("'WEAK' expected")
  184. | 29: Msg("'[' expected")
  185. | 30: Msg("']' expected")
  186. | 31: Msg("'{' expected")
  187. | 32: Msg("'}' expected")
  188. | 33: Msg("'SYNC' expected")
  189. | 34: Msg("'CONTEXT' expected")
  190. | 35: Msg("'<' expected")
  191. | 36: Msg("'>' expected")
  192. | 37: Msg("'<.' expected")
  193. | 38: Msg("'.>' expected")
  194. | 39: Msg("'(.' expected")
  195. | 40: Msg("'.)' expected")
  196. | 41: Msg("not expected")
  197. | 42: Msg("invalid TokenFactor")
  198. | 43: Msg("invalid Factor")
  199. | 44: Msg("invalid Factor")
  200. | 45: Msg("invalid Term")
  201. | 46: Msg("invalid Symbol")
  202. | 47: Msg("invalid SingleChar")
  203. | 48: Msg("invalid SimSet")
  204. | 49: Msg("invalid NameDecl")
  205. | 50: Msg("this symbol not expected in TokenDecl")
  206. | 51: Msg("invalid TokenDecl")
  207. | 52: Msg("invalid Attribs")
  208. | 53: Msg("invalid Declaration")
  209. | 54: Msg("invalid Declaration")
  210. | 55: Msg("invalid Declaration")
  211. | 56: Msg("this symbol not expected in CR")
  212. | 57: Msg("invalid CR")
  213. | 101: Msg("character set may not be empty")
  214. | 102: Msg("string literal may not extend over line end")
  215. | 103: Msg("a literal must not have attributes")
  216. | 104: Msg("this symbol kind not allowed in production")
  217. | 105: Msg("attribute mismatch between declaration and use")
  218. | 106: Msg("undefined string in production")
  219. | 107: Msg("name declared twice")
  220. | 108: Msg("this type not allowed on left side of production")
  221. | 109: Msg("earlier semantic action was not terminated")
  222. | 111: Msg("no production found for grammar name")
  223. | 112: Msg("grammar symbol must not have attributes")
  224. | 113: Msg("a literal must not be declared with a structure")
  225. | 114: Msg("semantic action not allowed here")
  226. | 115: Msg("undefined name")
  227. | 116: Msg("attributes not allowed in token declaration")
  228. | 117: Msg("name does not match grammar name")
  229. | 118: Msg("unacceptable constant value")
  230. | 119: Msg("may not ignore CHR(0)")
  231. | 120: Msg("token might be empty")
  232. | 121: Msg("token must not start with an iteration")
  233. | 122: Msg("comment delimiters may not be structured")
  234. | 123: Msg("only terminals may be weak")
  235. | 124: Msg("literal tokens may not contain white space")
  236. | 125: Msg("comment delimiter must be 1 or 2 characters long")
  237. | 126: Msg("character set contains more than one character")
  238. | 127: Msg("could not make deterministic automaton")
  239. | 128: Msg("semantic action text too long - please split it")
  240. | 129: Msg("literal tokens may not be empty")
  241. | 130: Msg("IGNORE CASE must appear earlier")
  242. ELSE Msg("Error: "); FileIO.WriteInt(lst, nr, 1);
  243. END;
  244. FileIO.WriteLn(lst)
  245. END PrintErr;
  246. PROCEDURE PrintListing;
  247. (* Print a source listing with error messages *)
  248. VAR
  249. nextErr: Err;
  250. eof: BOOLEAN;
  251. lnr, errC: INTEGER;
  252. srcPos: INT32;
  253. line: ARRAY [0 .. 255] OF CHAR;
  254. BEGIN
  255. FileIO.WriteString(lst, "Listing:");
  256. FileIO.WriteLn(lst); FileIO.WriteLn(lst);
  257. srcPos := FileIO.Long0; nextErr := firstErr;
  258. GetLine(srcPos, line, eof); lnr := 1; errC := 0;
  259. WHILE ~ eof DO
  260. FileIO.WriteInt(lst, lnr, 5); FileIO.WriteString(lst, " ");
  261. FileIO.WriteString(lst, line); FileIO.WriteLn(lst);
  262. WHILE (nextErr # NIL) & (nextErr^.line = lnr) DO
  263. PrintErr(line, nextErr^.nr, nextErr^.col); INC(errC);
  264. nextErr := nextErr^.next
  265. END;
  266. GetLine(srcPos, line, eof); INC(lnr);
  267. END;
  268. IF nextErr # NIL THEN
  269. FileIO.WriteInt(lst, lnr, 5); FileIO.WriteLn(lst);
  270. WHILE nextErr # NIL DO
  271. PrintErr(line, nextErr^.nr, nextErr^.col); INC(errC);
  272. nextErr := nextErr^.next
  273. END
  274. END;
  275. FileIO.WriteLn(lst);
  276. FileIO.WriteInt(lst, errC, 5); FileIO.WriteString(lst, " error");
  277. IF errC # 1 THEN FileIO.Write(lst, "s") END;
  278. FileIO.WriteLn(lst); FileIO.WriteLn(lst); FileIO.WriteLn(lst);
  279. END PrintListing;
  280. BEGIN
  281. firstErr := NIL; Extra := 1;
  282. END ListHandler;
  283. PROCEDURE SetOption (s: ARRAY OF CHAR);
  284. (* Set compiler options *)
  285. VAR
  286. i: CARDINAL;
  287. BEGIN
  288. i := 1;
  289. WHILE s[i] # 0C DO
  290. s[i] := CAP(s[i]);
  291. IF (s[i] >= "A") AND (s[i] <= "Z") THEN CRT.ddt[s[i]] := TRUE END;
  292. INC(i);
  293. END;
  294. END SetOption;
  295. PROCEDURE Msg (S: ARRAY OF CHAR);
  296. BEGIN
  297. FileIO.WriteString(FileIO.StdOut, S); FileIO.WriteLn(FileIO.StdOut);
  298. END Msg;
  299. (* --------------------------- Help ------------------------------- *)
  300. PROCEDURE Help;
  301. BEGIN
  302. Msg("Usage: COCOR [-Options] [Grammar[.atg]] [-Options]");
  303. Msg("Example: COCOR -mcs Test");
  304. Msg("");
  305. Msg("Options are");
  306. Msg("a - Trace automaton");
  307. Msg("c - Generate compiler module");
  308. Msg("d - Suppress generation of Definition Modules");
  309. Msg("f - Give Start and Follower sets");
  310. Msg("g - Print top-down graph");
  311. Msg("i - Trace start set computations");
  312. Msg("l - Force listing");
  313. Msg("m - (Multipass) Suppress FORWARD declarations");
  314. Msg("n - Generate symbolic names");
  315. Msg("p - Generate parser only");
  316. Msg("s - Print symbol table");
  317. Msg("t - Grammar tests only - no code generated");
  318. Msg("x - Print cross reference list");
  319. Msg("COMPILER.FRM, SCANNER.FRM and PARSER.FRM must be in the working directory,");
  320. Msg("or on the path specified by the environment variable CRFRAMES");
  321. END Help;
  322. BEGIN (* CR *)
  323. FileIO.WriteString(FileIO.StdOut, "Coco/R (WinTel) - Compiler-Compiler V");
  324. FileIO.WriteString(FileIO.StdOut, Version);
  325. FileIO.WriteLn(FileIO.StdOut);
  326. FileIO.WriteString(FileIO.StdOut, "Released by Pat Terry ");
  327. FileIO.WriteString(FileIO.StdOut, ReleaseDate);
  328. FileIO.WriteLn(FileIO.StdOut);
  329. FileIO.NextParameter(GrammarName);
  330. IF (GrammarName[0] = "?")
  331. OR (GrammarName[0] = "/") AND (GrammarName[1] = "?") THEN
  332. Help; FileIO.QuitExecution
  333. END;
  334. IF GrammarName[0] = 0C THEN
  335. FileIO.WriteString(FileIO.StdOut, "(COCOR ? gives short help screen)");
  336. FileIO.WriteLn(FileIO.StdOut);
  337. END;
  338. WHILE (GrammarName[0] = "-") OR (GrammarName[0] = "/") DO
  339. (* accept options before filename *)
  340. SetOption(GrammarName); FileIO.NextParameter(GrammarName)
  341. END;
  342. ok := GrammarName[0] # 0C;
  343. REPEAT
  344. IF ~ ok THEN
  345. FileIO.WriteString(FileIO.StdOut, "Grammar[.atg] ? : ");
  346. FileIO.ReadString(FileIO.StdIn, GrammarName);
  347. IF ~ FileIO.Okay THEN FileIO.QuitExecution END;
  348. FileIO.ReadLn(FileIO.StdIn);
  349. END;
  350. FileIO.AppendExtension(GrammarName, ATGExt, ATGFileName);
  351. GrammarName := ATGFileName;
  352. FileIO.Open(src, GrammarName, FALSE);
  353. ok := FileIO.Okay;
  354. IF ~ ok THEN
  355. FileIO.WriteString(FileIO.StdOut, "File <");
  356. FileIO.WriteString(FileIO.StdOut, GrammarName);
  357. FileIO.WriteString(FileIO.StdOut, "> not found.");
  358. FileIO.WriteLn(FileIO.StdOut);
  359. END
  360. UNTIL ok;
  361. FileIO.NextParameter(Options);
  362. IF Options[0] # 0C THEN SetOption(Options) END;
  363. FileIO.ExtractDirectory(GrammarName, directory);
  364. FileIO.ChangeExtension(GrammarName, LSTExt, lstFileName);
  365. FileIO.Open(lst, lstFileName, TRUE);
  366. FileIO.WriteString(lst, "Coco/R - Compiler-Compiler V");
  367. FileIO.WriteString(lst, Version);
  368. FileIO.WriteLn(lst);
  369. FileIO.WriteString(lst, "Released by Pat Terry ");
  370. FileIO.WriteString(lst, ReleaseDate);
  371. FileIO.WriteLn(lst);
  372. FileIO.WriteString(lst, "Source file: ");
  373. FileIO.WriteString(lst, GrammarName);
  374. FileIO.WriteLn(lst); FileIO.WriteLn(lst);
  375. FileIO.WriteLn(FileIO.StdOut);
  376. FileIO.WriteString(FileIO.StdOut, "parsing file ");
  377. FileIO.WriteString(FileIO.StdOut, GrammarName);
  378. FileIO.WriteLn(FileIO.StdOut);
  379. CRS.Error := StoreError;
  380. CRP.Parse;
  381. IF errors = 0 THEN
  382. Msg("testing grammar");
  383. FileIO.WriteString(lst, "Grammar Tests:");
  384. FileIO.WriteLn(lst); FileIO.WriteLn(lst);
  385. CRT.CompSymbolSets;
  386. CRT.TestCompleteness(ok);
  387. IF ok THEN CRT.TestIfAllNtReached(ok) END;
  388. IF ok THEN CRT.FindCircularProductions(ok) END;
  389. IF ok THEN CRT.TestIfNtToTerm(ok) END;
  390. IF ok THEN CRT.LL1Test(ll1) END;
  391. FileIO.WriteLn(lst);
  392. IF ~ ok OR ~ ll1 OR CRT.ddt["L"] OR CRT.ddt["X"] THEN
  393. Msg("listing");
  394. PrintListing; IF CRT.ddt["X"] THEN CRT.XRef; END;
  395. END;
  396. IF CRT.ddt["N"] OR CRT.symNames THEN
  397. Msg("symbol name assignment");
  398. CRT.AssignSymNames(CRT.ddt["N"], CRT.symNames);
  399. END;
  400. IF ok AND ~ CRT.ddt["T"] THEN
  401. Msg("generating parser");
  402. CRX.GenCompiler;
  403. IF CRT.genScanner AND ~ CRT.ddt["P"] THEN
  404. Msg("generating scanner");
  405. CRA.WriteScanner(ok);
  406. IF CRT.ddt["A"] THEN CRA.PrintStates END;
  407. END;
  408. IF CRT.ddt["C"] THEN
  409. Msg("generating compiler");
  410. CRC.WriteDriver;
  411. END;
  412. CRX.WriteStatistics;
  413. END;
  414. IF ~ ok THEN Msg("Compilation ended with errors in grammar tests.");
  415. ELSIF ~ ll1 THEN Msg("Compilation ended with LL(1) errors.");
  416. ELSE Msg("Compilation completed. No errors detected.");
  417. END;
  418. ELSE
  419. Msg("listing");
  420. PrintListing; IF CRT.ddt["X"] THEN CRT.XRef END;
  421. Msg("*** errors detected ***");
  422. END;
  423. IF CRT.ddt["G"] THEN CRT.PrintGraph END;
  424. IF CRT.ddt["S"] THEN CRT.PrintSymbolTable END;
  425. FileIO.Close(lst); FileIO.Close(src);
  426. END CR.