1 #!/usr/bin/env dub
2 /+ dub.sdl:
3     name "regenerate_parser"
4     version "1.0.0"
5     license "public domain"
6     dependency "pegged" version="~>0.4.5"
7 +/
8 
9 // This script fetches the official TOML ABNF file and converts it
10 // into `source/toml_foolery/peg_grammar.d`. Along the way, it needs
11 // to patch the ABNF and PEGs.
12 //
13 // It receives a single argument corresponding to a TOML version
14 // (specifically, a branch or tag name in the official TOML GitHub
15 // repo (https://github.com/toml-lang/toml)).
16 //
17 // The script performs the following steps:
18 //
19 // 1. Fetch the ABNF from GitHub, and save it to `tmp/toml.abnf`.
20 //
21 //
22 // 2. Patch `tmp/toml.abnf` using `toml_abnf.patch`. That patch file
23 //    makes the following changes:
24 //
25 //    - Replace all instances of `=/` with `/` for compatibility with
26 //      abnf2peg.
27 //
28 //    - Rename rules whose names are D keywords (float, string, etc.)
29 //      so that they end in a hyphen (e.g., "float" becomes "float-"),
30 //      which later gets translated to an underscore in PEG and D.
31 //
32 //
33 // 3. Translated `tmp/toml.abnf` into `tmp/toml.peg` using a custom
34 //    version of abnf2peg.
35 //
36 //    - Original abnf2peg repo:
37 //      https://github.com/sanjayss/abnf2peg
38 //
39 //    - The fork, included here as a submodule:
40 //      https://github.com/andrejp88/abnf2peg
41 //
42 //
43 // 4. Patch `tmp/toml.peg` using `toml_peg.patch`. That patch file
44 //    makes the following changes:
45 //
46 //   - Add `TomlGrammar:` before the first rule.
47 //
48 //   - Add `:eoi` at the end of the `toml` rule.
49 //
50 //   - Reverse the order of the alternations in the `expression` rule
51 //     so that it checks the "easiest" case (tables) first and the
52 //     empty line case last (it can always succeed, so it must not be
53 //     the first attempt).
54 //
55 //   - Reverse the order of alternations in the `key` rule so that
56 //     `dotted_key` is attempted before `simple_key`.
57 //
58 //   - Add `!quotation_mark` to the end of the `mlb_quotes` rule to
59 //     prevent it from matching two out of three closing quotation
60 //     marks.
61 //
62 //   - Add `!apostrophe` to the end of the `mll_quotes` rule to
63 //     prevent it from matching two out of three closing apostrophes.
64 //
65 //   - Rearrange the order of alternations in the `integer` rule so
66 //     that `dec_int` is attempted last.
67 //
68 //   - Reverse the order of alternations in the `unsigned_dec_int`
69 //     rule so that `DIGIT` is attempted last.
70 //
71 //   - Add lowercase letters to the `HEXDIG` rule. ABNF is
72 //     case-insensitive which is why they aren't there to begin with.
73 //
74 //
75 // 5. Convert tmp/toml.peg into a Pegged module at
76 //    `source/toml_foolery/decode/peg_grammar.d` using the `asModule`
77 //    function from `pegged.grammar`.
78 
79 
80 module regenerate_parser;
81 
82 import std.conv : to;
83 import std.stdio : File, writeln;
84 
85 int main(string[] args)
86 {
87     if (args.length != 2)
88     {
89         import std.stdio : stderr;
90         stderr.writeln(
91             "regenerate_parser expects an argument corresponding to a TOML version.\n" ~
92             "The argument must be a valid branch or tag name in the GitHub TOML repository at\n" ~
93             "https://github.com/toml-lang/toml\n" ~
94             "\n" ~
95             "Examples:\n" ~
96             "    ./regenerate_parser 1.0.0\n" ~
97             "    ./regenerate_parser master\n"
98         );
99 
100         return 1;
101     }
102 
103     string tomlVersion = args[1];
104 
105     fetchAbnf(tomlVersion);
106     patchAbnf();
107     convertAbnfToPeg();
108     patchPeg();
109     convertPegToD();
110 
111     writeln("Done");
112     return 0;
113 }
114 
115 void fetchAbnf(string tomlVersion)
116 {
117     import std.file : mkdirRecurse, write;
118     import std.net.curl : get, HTTPStatusException;
119     import std.path : buildPath;
120 
121     string url = "https://raw.githubusercontent.com/toml-lang/toml/" ~ tomlVersion ~ "/toml.abnf";
122 
123     writeln("Fetching " ~ url);
124     string abnf = get(url).to!string;
125 
126     writeln("Saving to tmp/toml.abnf");
127     mkdirRecurse("tmp");
128     write(buildPath("tmp", "toml.abnf"), abnf);
129 }
130 
131 void patchAbnf()
132 {
133     import std.process : Pid, spawnProcess, wait;
134     Pid patchPid = spawnProcess(["patch", "tmp/toml.abnf"], File("toml_abnf.patch"));
135     wait(patchPid);
136 }
137 
138 void convertAbnfToPeg()
139 {
140     import std.process : Config, Pid, spawnProcess, wait;
141     import std.stdio : stderr, stdin, stdout;
142 
143     Pid dubBuildPid = spawnProcess(["dub", "build"], stdin, File("/dev/null"), stderr, null, Config.none, "abnf2peg");
144     wait(dubBuildPid);
145 
146     writeln("Converting tmp/toml.abnf to tmp/toml.peg");
147     Pid abnf2pegPid = spawnProcess(["./abnf2peg/abnf2peg", "tmp/toml.abnf"], stdin, File("tmp/toml.peg", "w"));
148     wait(abnf2pegPid);
149 }
150 
151 void patchPeg()
152 {
153     import std.process : Pid, spawnProcess, wait;
154     Pid patchPid = spawnProcess(["patch", "tmp/toml.peg"], File("toml_peg.patch"));
155     wait(patchPid);
156 }
157 
158 void convertPegToD()
159 {
160     import pegged.grammar : asModule;
161     import std.file : readText;
162 
163     writeln("Converting tmp/toml.peg into a Pegged module at source/toml_foolery/decode/peg_grammar.d");
164     string peg = readText("tmp/toml.peg");
165     asModule("toml_foolery.decode.peg_grammar", "source/toml_foolery/decode/peg_grammar", peg);
166 }