1 module toml_foolery.decode.parse_toml;
2 
3 import std.algorithm;
4 import std.array;
5 import std.conv;
6 import std.datetime;
7 import std.exception : enforce;
8 import std.range;
9 import std.range.primitives;
10 import std.traits;
11 
12 version(unittest) import dshould;
13 
14 import toml_foolery.decode.set_data;
15 import toml_foolery.decode.exceptions;
16 import toml_foolery.decode.types.datetime;
17 import toml_foolery.decode.types.floating_point;
18 import toml_foolery.decode.types.integer;
19 import toml_foolery.decode.types.string;
20 
21 import toml_foolery.decode.peg_grammar;
22 // If you're working on the toml.peg file, comment the previous import and uncomment this:
23 // import pegged.grammar; mixin(grammar(import("toml.peg")));
24 // To turn it into a D module again, run the following code once:
25 // import pegged.grammar : asModule; asModule("toml_foolery.decode.peg_grammar", "source/toml_foolery/decode/peg_grammar", import("toml.peg"));
26 
27 
28 /**
29  *  Decodes a TOML string
30  *
31  *  Params:
32  *      toml = A string containing TOML data.
33  *
34  *      dest = The struct into which the parsed TOML data should be placed.
35  *
36  *      T =    The type of struct to create and return.
37  *
38  *
39  *  Returns:
40  *      The `dest` parameter, populated with data read from `toml`.
41  *
42  *  Throws:
43  *      TomlSyntaxException if the given data is invalid TOML.
44  *      TomlDuplicateNameException if the given data contains duplicate key or table names.
45  *      TomlUnsupportedException if the given data contains TOML features not yet supported by the library.
46  *      TomlInvalidValueException if the given data contains invalid values (e.g. a date with an invalid month).
47  *      TomlTypeException if a declared key's value does not match the destination value.
48  *
49  */
50 public void parseToml(T)(string toml, ref T dest)
51 if (is(T == struct))
52 {
53     // version tracer is for debugging a grammar, it comes from pegged.
54     version (tracer)
55     {
56         import std.experimental.logger : sharedLog;
57         sharedLog = new TraceLogger("TraceLog " ~ __TIMESTAMP__ ~ ".txt");
58         traceAll();
59     }
60 
61     ParseTree tree = TomlGrammar(toml);
62 
63     assert(
64         tree.name == "TomlGrammar",
65         "Expected root of tree to be TomlGrammar, but got: " ~ tree.name
66     );
67 
68     assert(
69         tree.children.length == 1,
70         "Expected root of tree to have exactly one child, but got: " ~ tree.children.length.to!string
71     );
72 
73     assert(
74         tree.children[0].name == "TomlGrammar.toml",
75         "Expected only child of tree root to be TomlGrammar.toml, but got: " ~ tree.name
76     );
77 
78     ParseTree[] lines = tree.children[0].children;
79 
80     bool[string[]] seenSoFar;
81     string[] tableAddress;
82 
83     // Given a dotted key representing an array of tables, how many times has it appeared so far?
84     size_t[string[]] tableArrayCounts;
85 
86     foreach (ParseTree line; lines)
87     {
88         if(line.name != "TomlGrammar.expression")
89         {
90             throw new TomlSyntaxException(
91                 "Invalid TOML data. Expected a TomlGrammar.expression, but got: " ~
92                 line.name ~ "\n Full tree:\n" ~ tree.toString()
93             );
94         }
95 
96         lineLoop:
97         foreach (ParseTree partOfLine; line.children)
98         {
99             switch (partOfLine.name)
100             {
101                 case "TomlGrammar.keyval":
102                     processTomlKeyval(partOfLine, dest, tableAddress, seenSoFar);
103                     break;
104 
105                 case "TomlGrammar.table":
106 
107                     tableAddress =
108                         partOfLine
109                         .children[0]
110                         .children
111                         .find!(e => e.name == "TomlGrammar.key")[0]
112                         .splitDottedKey;
113 
114                     if (partOfLine.children[0].name == "TomlGrammar.array_table")
115                     {
116                         if (tableAddress !in tableArrayCounts)
117                         {
118                             tableArrayCounts[tableAddress.idup] = 0;
119                         }
120                         else
121                         {
122                             tableArrayCounts[tableAddress.idup]++;
123                         }
124                         tableAddress ~= tableArrayCounts[tableAddress].to!string;
125                     }
126                     else
127                     {
128                         if (tableAddress in seenSoFar)
129                         {
130                             throw new TomlDuplicateNameException(
131                                 `Key/table "` ~ tableAddress.join('.') ~ `" has been declared twice.`
132                             );
133                         }
134 
135                         seenSoFar[tableAddress.idup] = true;
136                     }
137 
138                     break;
139 
140                 default:
141                     continue lineLoop;
142             }
143         }
144     }
145 
146     version (tracer)
147     {
148         // Does not need to be in version(tracer) necessarily, but I figure if you
149         // want the tracer, you want the HTML.
150         // Be warned that toHTML breaks when encountering non-ASCII UTF-8 codepoints.
151         import pegged.tohtml : toHTML, Expand;
152         toHTML!
153             (Expand.ifNotMatch,".comment", ".simple_key", ".basic_string", ".literal_string", ".expression")
154             (tree, "hard_example_toml.html");
155     }
156 }
157 
158 /// ditto
159 public T parseToml(T)(string toml)
160 {
161     T dest;
162     parseToml(toml, dest);
163     return dest;
164 }
165 
166 /// A simple example of `parseToml` with an array of tables.
167 @("A simple example of `parseToml` with an array of tables.")
168 unittest
169 {
170     struct Configuration
171     {
172         struct Account
173         {
174             string username;
175             ulong id;
176         }
177 
178         string serverAddress;
179         int port;
180         Account[] accounts;
181     }
182 
183     string data = `
184 
185     serverAddress = "127.0.0.1"
186     port = 11000
187 
188     [[accounts]]
189     username = "Tom"
190     id = 0x827e7b52
191 
192     [[accounts]]
193     username = "Jerry"
194     id = 0x99134cce
195 
196     `;
197 
198     Configuration config = parseToml!Configuration(data);
199 
200     config.should.equal(
201         Configuration(
202             "127.0.0.1",
203             11_000,
204             [
205                 Configuration.Account("Tom", 0x827e7b52),
206                 Configuration.Account("Jerry", 0x99134cce)
207             ]
208         )
209     );
210 }
211 
212 /// Syntactically invalid TOML results in an exception.
213 @("Syntactically invalid TOML results in an exception.")
214 unittest
215 {
216     struct S {}
217 
218     try
219     {
220         parseToml!S(`[[[bad`);
221         assert(false, "Expected a TomlDecodingException to be thrown.");
222     }
223     catch (TomlDecodingException e)
224     {
225         // As expected.
226     }
227 }
228 
229 /// Duplicate key names result in an exception.
230 @("Duplicate key names result in an exception.")
231 unittest
232 {
233     struct S
234     {
235         int x;
236     }
237 
238     try
239     {
240         S s = parseToml!S(`
241             x = 5
242             x = 10
243         `);
244         assert(false, "Expected a TomlDecodingException to be thrown.");
245     }
246     catch (TomlDecodingException e)
247     {
248         // As expected
249     }
250 }
251 
252 /// Duplicate table names result in an exception.
253 @("Duplicate table names result in an exception.")
254 unittest
255 {
256     struct S
257     {
258         struct Fruit { string apple; string orange; }
259 
260         Fruit fruit;
261     }
262 
263     try
264     {
265         S s = parseToml!S(`
266             [fruit]
267             apple = "red"
268 
269             [fruit]
270             orange = "orange"
271         `);
272         assert(false, "Expected a TomlDecodingException to be thrown.");
273     }
274     catch (TomlDecodingException e)
275     {
276         // As expected
277     }
278 }
279 
280 private void processTomlKeyval(S)(
281     ParseTree pt,
282     ref S dest,
283     string[] tableAddress,
284     ref bool[string[]] seenSoFar
285 )
286 in (pt.name == "TomlGrammar.keyval")
287 {
288     processTomlVal(pt.children[2], dest, tableAddress ~ splitDottedKey(pt.children[0]), seenSoFar);
289 }
290 
291 private void processTomlVal(S)(ParseTree pt, ref S dest, string[] address, ref bool[string[]] seenSoFar)
292 in (pt.name == "TomlGrammar.val")
293 {
294     if (address in seenSoFar)
295     {
296         throw new TomlDuplicateNameException(`Duplicate key: "` ~ address.join('.') ~ `"`);
297     }
298 
299     seenSoFar[address.idup] = true;
300 
301     string value = pt.input[pt.begin .. pt.end];
302 
303     ParseTree typedValPT = pt.children[0];
304 
305     switch (typedValPT.name)
306     {
307         case "TomlGrammar.integer":
308             setData(dest, address, parseTomlInteger(value));
309             break;
310 
311         case "TomlGrammar.float_":
312             setData(dest, address, parseTomlFloat(value));
313             break;
314 
315         case "TomlGrammar.boolean":
316             setData(dest, address, value.to!bool);
317             break;
318 
319         case "TomlGrammar.string_":
320             setData(dest, address, parseTomlString(value));
321             break;
322 
323         case "TomlGrammar.date_time":
324             processTomlDateTime(typedValPT, dest, address);
325             break;
326 
327         case "TomlGrammar.array":
328             processTomlArray(typedValPT, dest, address, seenSoFar);
329             break;
330 
331         case "TomlGrammar.inline_table":
332             processTomlInlineTable(typedValPT, dest, address, seenSoFar);
333             break;
334 
335         default:
336             debug { assert(false, "Unsupported TomlGrammar rule: \"" ~ pt.children[0].name ~ "\""); }
337             else { break; }
338     }
339 }
340 
341 
342 private void processTomlDateTime(S)(ParseTree pt, ref S dest, string[] address)
343 in (pt.name == "TomlGrammar.date_time")
344 {
345     import core.time : TimeException;
346 
347     string value = pt.input[pt.begin .. pt.end];
348 
349     try
350     {
351         string dateTimeType = pt.children[0].name;
352         switch (dateTimeType)
353         {
354             case "TomlGrammar.offset_date_time":
355                 setData(dest, address, parseTomlOffsetDateTime(value));
356                 break;
357 
358             case "TomlGrammar.local_date_time":
359                 setData(dest, address, parseTomlLocalDateTime(value));
360                 break;
361 
362             case "TomlGrammar.local_date":
363                 setData(dest, address, parseTomlLocalDate(value));
364                 break;
365 
366             case "TomlGrammar.local_time":
367                 setData(dest, address, parseTomlLocalTime(value));
368                 break;
369 
370             default:
371                 assert(false, "Unsupported TOML date_time sub-type: " ~ dateTimeType);
372         }
373     }
374     catch (TimeException e)
375     {
376         throw new TomlInvalidValueException(
377             "Invalid date/time: " ~ value, e
378         );
379     }
380 }
381 
382 
383 private void processTomlInlineTable(S)(ParseTree pt, ref S dest, string[] address, ref bool[string[]] seenSoFar)
384 in (pt.name == "TomlGrammar.inline_table", `Expected "TomlGrammar.inline_table" but got "` ~ pt.name ~ `".`)
385 {
386     void processTomlInlineTableKeyvals(S)(ParseTree pt, ref S dest, string[] address, ref bool[string[]] seenSoFar)
387     in (pt.name == "TomlGrammar.inline_table_keyvals")
388     {
389         processTomlKeyval(pt.children.find!(e => e.name == "TomlGrammar.keyval")[0], dest, address, seenSoFar);
390         ParseTree[] keyvals = pt.children.find!(e => e.name == "TomlGrammar.inline_table_keyvals");
391         if (keyvals.empty) return;
392         processTomlInlineTableKeyvals(keyvals[0], dest, address, seenSoFar);
393     }
394 
395     ParseTree[] keyvals = pt.children.find!(e => e.name == "TomlGrammar.inline_table_keyvals");
396     if (keyvals.empty) return;
397     processTomlInlineTableKeyvals(keyvals[0], dest, address, seenSoFar);
398 }
399 
400 
401 private void processTomlArray(S)(ParseTree pt, ref S dest, string[] address, ref bool[string[]] seenSoFar)
402 in (pt.name == "TomlGrammar.array", `Expected "TomlGrammar.array" but got "` ~ pt.name ~ `".`)
403 {
404     string[] typeRules;
405 
406     ParseTree[] consumeArrayValues(ParseTree arrayValuesPT, ParseTree[] acc)
407     in (arrayValuesPT.name == "TomlGrammar.array_values")
408     in (acc.all!(e => e.name == "TomlGrammar.val" ))
409     out (ret; ret.all!(e => e.name == "TomlGrammar.val"))
410     {
411         static string[] getTypeRules(ParseTree valPT)
412         {
413             static string[] _getTypeRules(ParseTree valPT, string fullMatch, string[] acc)
414             {
415                 if (
416                     valPT.input[valPT.begin .. valPT.end] != fullMatch ||
417                     !([
418                         "TomlGrammar.string_",
419                         "TomlGrammar.boolean",
420                         "TomlGrammar.array",
421                         "TomlGrammar.inline_table",
422                         "TomlGrammar.date_time",
423                         "TomlGrammar.float_",
424                         "TomlGrammar.integer",
425                         "TomlGrammar.offset_date_time",
426                         "TomlGrammar.local_date_time",
427                         "TomlGrammar.local_date",
428                         "TomlGrammar.local_time",
429                         ].canFind(valPT.name))
430                 )
431                 {
432                     return acc;
433                 }
434                 else
435                 {
436                     return _getTypeRules(valPT.children[0], fullMatch, acc ~ valPT.name);
437                 }
438             }
439 
440             // Trabampoline
441             return _getTypeRules(valPT.children[0], valPT.input[valPT.begin .. valPT.end], []);
442         }
443 
444         auto foundVal = arrayValuesPT.children.find!(e => e.name == "TomlGrammar.val");
445         assert(
446             foundVal.length != 0,
447             `Expected array to have a "TomlGrammar.val" child, but found: ` ~ arrayValuesPT.children.to!string
448         );
449         ParseTree firstValPT = foundVal[0];
450         assert(
451             firstValPT.name == "TomlGrammar.val",
452             `Expected array to have a "TomlGrammar.val" child at index 1, but found "` ~ firstValPT.name ~ `".`
453         );
454 
455         string[] currTypeRules = getTypeRules(firstValPT);
456         if (typeRules.length == 0)
457         {
458             typeRules = currTypeRules;
459         }
460         else if (typeRules != currTypeRules)
461         {
462             throw new TomlUnsupportedException(
463                 `Mixed-type arrays not yet supported. Array started with "` ~
464                 typeRules.to!string ~ `" but also contains "` ~ currTypeRules.to!string ~ `".`
465             );
466         }
467 
468         auto restFindResult = arrayValuesPT.children.find!(e => e.name == "TomlGrammar.array_values");
469 
470         if (restFindResult.length > 0)
471         {
472             ParseTree restValPT = restFindResult[0];
473             return consumeArrayValues(
474                 restValPT,
475                 acc ~ firstValPT
476             );
477         }
478         else
479         {
480             return acc ~ firstValPT;
481         }
482     }
483 
484     auto findResult = pt.children.find!(e => e.name == "TomlGrammar.array_values");
485     if (findResult.length == 0)
486     {
487         return;
488     }
489 
490     ParseTree[] valuePTs = consumeArrayValues(findResult[0], []);
491     foreach (size_t i, ParseTree valuePT; valuePTs)
492     {
493         processTomlVal(valuePT, dest, address ~ i.to!string, seenSoFar);
494     }
495 }
496 
497 private string[] splitDottedKey(ParseTree pt)
498 pure
499 in (pt.name == "TomlGrammar.key")
500 {
501     return pt.children[0].name == "TomlGrammar.dotted_key" ?
502         (
503             pt.children[0]
504                 .children
505                 .filter!(e => e.name == "TomlGrammar.simple_key")
506                 .map!(e =>
507                     e.children[0].name == "TomlGrammar.quoted_key" ?
508                     e.input[e.begin + 1 .. e.end - 1] :
509                     e.input[e.begin .. e.end]
510                 )
511                 .array
512         )
513         :
514         (
515             pt.children[0].children[0].name == "TomlGrammar.quoted_key" ?
516             [ pt.input[pt.begin + 1 .. pt.end - 1] ] :
517             [ pt.input[pt.begin .. pt.end] ]
518         );
519 }