1 #!/usr/bin/env dub 2 /+ dub.sdl: 3 name "regenerate_parser" 4 version "1.0.0" 5 license "public domain" 6 dependency "pegged" version="~>0.4.5" 7 +/ 8 9 // This script fetches the official TOML ABNF file and converts it 10 // into `source/toml_foolery/peg_grammar.d`. Along the way, it needs 11 // to patch the ABNF and PEGs. 12 // 13 // It receives a single argument corresponding to a TOML version 14 // (specifically, a branch or tag name in the official TOML GitHub 15 // repo (https://github.com/toml-lang/toml)). 16 // 17 // The script performs the following steps: 18 // 19 // 1. Fetch the ABNF from GitHub, and save it to `tmp/toml.abnf`. 20 // 21 // 22 // 2. Patch `tmp/toml.abnf` using `toml_abnf.patch`. That patch file 23 // makes the following changes: 24 // 25 // - Replace all instances of `=/` with `/` for compatibility with 26 // abnf2peg. 27 // 28 // - Rename rules whose names are D keywords (float, string, etc.) 29 // so that they end in a hyphen (e.g., "float" becomes "float-"), 30 // which later gets translated to an underscore in PEG and D. 31 // 32 // 33 // 3. Translated `tmp/toml.abnf` into `tmp/toml.peg` using a custom 34 // version of abnf2peg. 35 // 36 // - Original abnf2peg repo: 37 // https://github.com/sanjayss/abnf2peg 38 // 39 // - The fork, included here as a submodule: 40 // https://github.com/andrejp88/abnf2peg 41 // 42 // 43 // 4. Patch `tmp/toml.peg` using `toml_peg.patch`. That patch file 44 // makes the following changes: 45 // 46 // - Add `TomlGrammar:` before the first rule. 47 // 48 // - Add `:eoi` at the end of the `toml` rule. 49 // 50 // - Reverse the order of the alternations in the `expression` rule 51 // so that it checks the "easiest" case (tables) first and the 52 // empty line case last (it can always succeed, so it must not be 53 // the first attempt). 54 // 55 // - Reverse the order of alternations in the `key` rule so that 56 // `dotted_key` is attempted before `simple_key`. 57 // 58 // - Add `!quotation_mark` to the end of the `mlb_quotes` rule to 59 // prevent it from matching two out of three closing quotation 60 // marks. 61 // 62 // - Add `!apostrophe` to the end of the `mll_quotes` rule to 63 // prevent it from matching two out of three closing apostrophes. 64 // 65 // - Rearrange the order of alternations in the `integer` rule so 66 // that `dec_int` is attempted last. 67 // 68 // - Reverse the order of alternations in the `unsigned_dec_int` 69 // rule so that `DIGIT` is attempted last. 70 // 71 // - Add lowercase letters to the `HEXDIG` rule. ABNF is 72 // case-insensitive which is why they aren't there to begin with. 73 // 74 // 75 // 5. Convert tmp/toml.peg into a Pegged module at 76 // `source/toml_foolery/decode/peg_grammar.d` using the `asModule` 77 // function from `pegged.grammar`. 78 79 80 module regenerate_parser; 81 82 import std.conv : to; 83 import std.stdio : File, writeln; 84 85 int main(string[] args) 86 { 87 if (args.length != 2) 88 { 89 import std.stdio : stderr; 90 stderr.writeln( 91 "regenerate_parser expects an argument corresponding to a TOML version.\n" ~ 92 "The argument must be a valid branch or tag name in the GitHub TOML repository at\n" ~ 93 "https://github.com/toml-lang/toml\n" ~ 94 "\n" ~ 95 "Examples:\n" ~ 96 " ./regenerate_parser 1.0.0\n" ~ 97 " ./regenerate_parser master\n" 98 ); 99 100 return 1; 101 } 102 103 string tomlVersion = args[1]; 104 105 fetchAbnf(tomlVersion); 106 patchAbnf(); 107 convertAbnfToPeg(); 108 patchPeg(); 109 convertPegToD(); 110 111 writeln("Done"); 112 return 0; 113 } 114 115 void fetchAbnf(string tomlVersion) 116 { 117 import std.file : mkdirRecurse, write; 118 import std.net.curl : get, HTTPStatusException; 119 import std.path : buildPath; 120 121 string url = "https://raw.githubusercontent.com/toml-lang/toml/" ~ tomlVersion ~ "/toml.abnf"; 122 123 writeln("Fetching " ~ url); 124 string abnf = get(url).to!string; 125 126 writeln("Saving to tmp/toml.abnf"); 127 mkdirRecurse("tmp"); 128 write(buildPath("tmp", "toml.abnf"), abnf); 129 } 130 131 void patchAbnf() 132 { 133 import std.process : Pid, spawnProcess, wait; 134 Pid patchPid = spawnProcess(["patch", "tmp/toml.abnf"], File("toml_abnf.patch")); 135 wait(patchPid); 136 } 137 138 void convertAbnfToPeg() 139 { 140 import std.process : Config, Pid, spawnProcess, wait; 141 import std.stdio : stderr, stdin, stdout; 142 143 Pid dubBuildPid = spawnProcess(["dub", "build"], stdin, File("/dev/null"), stderr, null, Config.none, "abnf2peg"); 144 wait(dubBuildPid); 145 146 writeln("Converting tmp/toml.abnf to tmp/toml.peg"); 147 Pid abnf2pegPid = spawnProcess(["./abnf2peg/abnf2peg", "tmp/toml.abnf"], stdin, File("tmp/toml.peg", "w")); 148 wait(abnf2pegPid); 149 } 150 151 void patchPeg() 152 { 153 import std.process : Pid, spawnProcess, wait; 154 Pid patchPid = spawnProcess(["patch", "tmp/toml.peg"], File("toml_peg.patch")); 155 wait(patchPid); 156 } 157 158 void convertPegToD() 159 { 160 import pegged.grammar : asModule; 161 import std.file : readText; 162 163 writeln("Converting tmp/toml.peg into a Pegged module at source/toml_foolery/decode/peg_grammar.d"); 164 string peg = readText("tmp/toml.peg"); 165 asModule("toml_foolery.decode.peg_grammar", "source/toml_foolery/decode/peg_grammar", peg); 166 }