1 module toml_foolery.decode.parse_toml;
2 
3 import std.algorithm;
4 import std.array;
5 import std.conv;
6 import std.datetime;
7 import std.exception : enforce;
8 import std.range;
9 import std.range.primitives;
10 import std.traits;
11 
12 version(unittest) import exceeds_expectations;
13 
14 import toml_foolery.decode.set_data;
15 import toml_foolery.decode.exceptions;
16 import toml_foolery.decode.types.datetime;
17 import toml_foolery.decode.types.floating_point;
18 import toml_foolery.decode.types.integer;
19 import toml_foolery.decode.types..string;
20 
21 import toml_foolery.decode.peg_grammar;
22 
23 
24 /**
25  *  Decodes a TOML string
26  *
27  *  Params:
28  *      toml = A string containing TOML data.
29  *
30  *      dest = The struct into which the parsed TOML data should be placed.
31  *
32  *      T =    The type of struct to create and return.
33  *
34  *
35  *  Returns:
36  *      The `dest` parameter, populated with data read from `toml`.
37  *
38  *  Throws:
39  *      TomlSyntaxException if the given data is invalid TOML.
40  *      TomlDuplicateNameException if the given data contains duplicate key or table names.
41  *      TomlUnsupportedException if the given data contains TOML features not yet supported by the library.
42  *      TomlInvalidValueException if the given data contains invalid values (e.g. a date with an invalid month).
43  *      TomlTypeException if a declared key's value does not match the destination value.
44  *
45  */
46 public void parseToml(T)(string toml, ref T dest)
47 if (is(T == struct))
48 {
49     // version tracer is for debugging a grammar, it comes from pegged.
50     version (tracer)
51     {
52         import pegged.peg : setTraceConditionFunction;
53 
54         bool cond(string ruleName, const ref ParseTree p)
55         {
56             static startTrace = false;
57             if (ruleName.startsWith("Eris.Function"))
58                 startTrace = true;
59             return  /* startTrace &&  */ ruleName.startsWith("TomlGrammar");
60         }
61 
62         setTraceConditionFunction(&cond);
63     }
64 
65     ParseTree tree = TomlGrammar(toml);
66 
67     if (!tree.successful)
68     {
69         throw new TomlSyntaxException(
70             "Failed to parse TOML data"
71         );
72     }
73 
74     assert(
75         tree.name == "TomlGrammar",
76         "Expected root of tree to be TomlGrammar, but got: " ~ tree.name
77     );
78 
79     assert(
80         tree.children.length == 1,
81         "Expected root of tree to have exactly one child, but got: " ~ tree.children.length.to!string
82     );
83 
84     assert(
85         tree.children[0].name == "TomlGrammar.toml",
86         "Expected only child of tree root to be TomlGrammar.toml, but got: " ~ tree.name
87     );
88 
89     ParseTree[] lines = tree.children[0].children;
90 
91     // Fully-qualified key names are added to `keysCompleted` once
92     // they are written-to. The only exception is the names of
93     // array-of-tables, because we won't know that an array-of-tables
94     // has been completed until we reach the end of the document.
95     // Instead, we add the names of arrays-of-tables to
96     // `tableArrayCounts`.
97     bool[string[]] keysCompleted;
98     size_t[string[]] tableArrayCounts;
99 
100     // A set containing all things that are known to be tables, even
101     // if they haven't been completed yet, and even if they have only
102     // been implicitly declared.
103     bool[string[]] tables;
104 
105     string[] tableAddress;
106 
107     foreach (ParseTree line; lines)
108     {
109         if(line.name != "TomlGrammar.expression")
110         {
111             throw new TomlSyntaxException(
112                 "Invalid TOML data. Expected a TomlGrammar.expression, but got: " ~
113                 line.name ~ "\n Full tree:\n" ~ tree.toString()
114             );
115         }
116 
117         lineLoop:
118         foreach (ParseTree partOfLine; line.children)
119         {
120             switch (partOfLine.name)
121             {
122                 case "TomlGrammar.keyval":
123                     processTomlKeyval(partOfLine, dest, tableAddress, keysCompleted);
124                     break;
125 
126                 case "TomlGrammar.table":
127 
128                     // Add the previous table to the keys-completed
129                     // set. Even if it was an array of tables, the key
130                     // contains the index, so it was still completed.
131                     if (tableAddress != tableAddress.init && tableAddress !in tableArrayCounts)
132                     {
133                         keysCompleted[tableAddress.idup] = true;
134                     }
135 
136                     tableAddress =
137                         partOfLine
138                         .children[0]
139                         .children
140                         .find!(e => e.name == "TomlGrammar.key")[0]
141                         .splitDottedKey;
142 
143                     // Add all the implicitly defined regular tables.
144                     foreach (string[] parentTable; getAllParentTables(tableAddress))
145                     {
146                         tables[parentTable.idup] = true;
147                     }
148 
149                     if (partOfLine.children[0].name == "TomlGrammar.array_table")
150                     {
151                         if (tableAddress in tables)
152                         {
153                             throw new TomlDuplicateNameException(
154                                 "Attempt to re-define table `" ~ tableAddress.join(".") ~ "` as an array-of-tables."
155                             );
156                         }
157 
158                         if (tableAddress in keysCompleted && tableAddress !in tableArrayCounts)
159                         {
160                             throw new TomlDuplicateNameException(
161                                 "Attempt to re-define table `" ~ tableAddress.join(".") ~ "` as an array-of-tables."
162                             );
163                         }
164 
165                         if (tableAddress !in tableArrayCounts)
166                         {
167                             tableArrayCounts[tableAddress.idup] = 1;
168                         }
169                         else
170                         {
171                             tableArrayCounts[tableAddress.idup]++;
172                         }
173 
174                         size_t tableArrayCurrentIndex = (tableArrayCounts[tableAddress] - 1);
175                         tableAddress ~= tableArrayCurrentIndex.to!string;
176 
177                     }
178                     else if (partOfLine.children[0].name == "TomlGrammar.std_table")
179                     {
180                         tables[tableAddress.idup] = true;
181 
182                         if (tableAddress in tableArrayCounts)
183                         {
184                             throw new TomlDuplicateNameException(
185                                 `The name "` ~ tableAddress.join('.') ~
186                                 `" was already used for an array-of-tables, but is now being used for a table.`
187                             );
188                         }
189                     }
190                     else assert(false, "Unknown table type: " ~ partOfLine.children[0].name);
191 
192                     if (tableAddress in keysCompleted)
193                     {
194                         throw new TomlDuplicateNameException(
195                             `Table "` ~ tableAddress.join('.') ~ `" has been declared twice.`
196                         );
197                     }
198 
199                     break;
200 
201                 default:
202                     continue lineLoop;
203             }
204         }
205     }
206 
207     version (tracer)
208     {
209         // Does not need to be in version(tracer) necessarily, but I figure if you
210         // want the tracer, you want the HTML.
211         // Be warned that toHTML breaks when encountering non-ASCII UTF-8 codepoints.
212         import pegged.tohtml : toHTML, Expand;
213         toHTML!
214             (Expand.ifNotMatch,".comment", ".simple_key", ".basic_string", ".literal_string", ".expression")
215             (tree, "hard_example_toml.html");
216     }
217 }
218 
219 /// ditto
220 public T parseToml(T)(string toml)
221 {
222     T dest;
223     parseToml(toml, dest);
224     return dest;
225 }
226 
227 /// A simple example of `parseToml` with an array of tables.
228 @("A simple example of `parseToml` with an array of tables.")
229 unittest
230 {
231     struct Configuration
232     {
233         struct Account
234         {
235             string username;
236             ulong id;
237         }
238 
239         string serverAddress;
240         int port;
241         Account[] accounts;
242     }
243 
244     string data = `
245 
246     serverAddress = "127.0.0.1"
247     port = 11000
248 
249     [[accounts]]
250     username = "Tom"
251     id = 0x827e7b52
252 
253     [[accounts]]
254     username = "Jerry"
255     id = 0x99134cce
256 
257     `;
258 
259     Configuration config = parseToml!Configuration(data);
260 
261     expect(config).toEqual(
262         Configuration(
263             "127.0.0.1",
264             11_000,
265             [
266                 Configuration.Account("Tom", 0x827e7b52),
267                 Configuration.Account("Jerry", 0x99134cce)
268             ]
269         )
270     );
271 }
272 
273 /// Syntactically invalid TOML results in an exception.
274 @("Syntactically invalid TOML results in an exception.")
275 unittest
276 {
277     struct S {}
278 
279     try
280     {
281         parseToml!S(`[[[bad`);
282         assert(false, "Expected a TomlDecodingException to be thrown.");
283     }
284     catch (TomlDecodingException e)
285     {
286         // As expected.
287     }
288 }
289 
290 /// Duplicate key names result in an exception.
291 @("Duplicate key names result in an exception.")
292 unittest
293 {
294     struct S
295     {
296         int x;
297     }
298 
299     try
300     {
301         S s = parseToml!S(`
302             x = 5
303             x = 10
304         `);
305         assert(false, "Expected a TomlDecodingException to be thrown.");
306     }
307     catch (TomlDecodingException e)
308     {
309         // As expected
310     }
311 }
312 
313 /// Duplicate table names result in an exception.
314 @("Duplicate table names result in an exception.")
315 unittest
316 {
317     struct S
318     {
319         struct Fruit { string apple; string orange; }
320 
321         Fruit fruit;
322     }
323 
324     try
325     {
326         S s = parseToml!S(`
327             [fruit]
328             apple = "red"
329 
330             [fruit]
331             orange = "orange"
332         `);
333         assert(false, "Expected a TomlDecodingException to be thrown.");
334     }
335     catch (TomlDecodingException e)
336     {
337         // As expected
338     }
339 }
340 
341 private void processTomlKeyval(S)(
342     ParseTree pt,
343     ref S dest,
344     string[] tableAddress,
345     ref bool[string[]] keysCompleted,
346 )
347 in (pt.name == "TomlGrammar.keyval")
348 {
349     processTomlVal(pt.children[2], dest, tableAddress, splitDottedKey(pt.children[0]), keysCompleted);
350 }
351 
352 private void processTomlVal(S)(
353     ParseTree pt,
354     ref S dest,
355     string[] tableAddress,
356     string[] keyAddressInTable,
357     ref bool[string[]] keysCompleted,
358 )
359 in (pt.name == "TomlGrammar.val")
360 {
361     string[] address = tableAddress ~ keyAddressInTable;
362 
363     if (address in keysCompleted)
364     {
365         throw new TomlDuplicateNameException(`Duplicate key: "` ~ address.join('.') ~ `"`);
366     }
367 
368     // For compliance with:
369     // - validator/toml-test/tests/invalid/table/injection-1.toml
370     // - validator/toml-test/tests/invalid/table/injection-2.toml
371     //
372     // e.g. for the address "a.b.c.d", check if we've already defined
373     // a table named "a" or "a.b" or "a.b.c" or "a.b.c.d".
374     string[][] tableAddressesToCheckFor = getAllParentTables(address);
375 
376     foreach (string[] tableAddressToCheckFor; tableAddressesToCheckFor)
377     {
378         if (tableAddressToCheckFor in keysCompleted)
379         {
380             throw new TomlDecodingException(
381                 "Attempted injection into table `" ~ tableAddressToCheckFor.join(".") ~
382                 "` via key `" ~ address.join(".") ~ "`."
383             );
384         }
385     }
386 
387     string value = pt.input[pt.begin .. pt.end];
388 
389     ParseTree typedValPT = pt.children[0];
390 
391     switch (typedValPT.name)
392     {
393         case "TomlGrammar.integer":
394             setData(dest, address, parseTomlInteger(value));
395             break;
396 
397         case "TomlGrammar.float_":
398             setData(dest, address, parseTomlFloat(value));
399             break;
400 
401         case "TomlGrammar.boolean":
402             setData(dest, address, value.to!bool);
403             break;
404 
405         case "TomlGrammar.string_":
406             setData(dest, address, parseTomlString(value));
407             break;
408 
409         case "TomlGrammar.date_time":
410             processTomlDateTime(typedValPT, dest, address);
411             break;
412 
413         case "TomlGrammar.array":
414             processTomlArray(typedValPT, dest, address, keysCompleted);
415             break;
416 
417         case "TomlGrammar.inline_table":
418             processTomlInlineTable(typedValPT, dest, address, keysCompleted);
419             break;
420 
421         default:
422             debug { assert(false, "Unsupported TomlGrammar rule: \"" ~ pt.children[0].name ~ "\""); }
423             else { break; }
424     }
425 
426     keysCompleted[address.idup] = true;
427 }
428 
429 
430 private void processTomlDateTime(S)(ParseTree pt, ref S dest, string[] address)
431 in (pt.name == "TomlGrammar.date_time")
432 {
433     import core.time : TimeException;
434 
435     string value = pt.input[pt.begin .. pt.end];
436 
437     try
438     {
439         string dateTimeType = pt.children[0].name;
440         switch (dateTimeType)
441         {
442             case "TomlGrammar.offset_date_time":
443                 setData(dest, address, parseTomlOffsetDateTime(value));
444                 break;
445 
446             case "TomlGrammar.local_date_time":
447                 setData(dest, address, parseTomlLocalDateTime(value));
448                 break;
449 
450             case "TomlGrammar.local_date":
451                 setData(dest, address, parseTomlLocalDate(value));
452                 break;
453 
454             case "TomlGrammar.local_time":
455                 setData(dest, address, parseTomlLocalTime(value));
456                 break;
457 
458             default:
459                 assert(false, "Unsupported TOML date_time sub-type: " ~ dateTimeType);
460         }
461     }
462     catch (TimeException e)
463     {
464         throw new TomlInvalidValueException(
465             "Invalid date/time: " ~ value, e
466         );
467     }
468 }
469 
470 
471 private void processTomlInlineTable(S)(
472     ParseTree pt,
473     ref S dest,
474     string[] address,
475     ref bool[string[]] keysCompleted,
476 )
477 in (pt.name == "TomlGrammar.inline_table", `Expected "TomlGrammar.inline_table" but got "` ~ pt.name ~ `".`)
478 {
479     void processTomlInlineTableKeyvals(S)(
480         ParseTree pt,
481         ref S dest,
482         string[] address,
483         ref bool[string[]] keysCompleted,
484     )
485     in (pt.name == "TomlGrammar.inline_table_keyvals")
486     {
487         processTomlKeyval(pt.children.find!(e => e.name == "TomlGrammar.keyval")[0], dest, address, keysCompleted);
488         ParseTree[] keyvals = pt.children.find!(e => e.name == "TomlGrammar.inline_table_keyvals");
489         if (keyvals.empty) return;
490         processTomlInlineTableKeyvals(keyvals[0], dest, address, keysCompleted);
491     }
492 
493     ParseTree[] keyvals = pt.children.find!(e => e.name == "TomlGrammar.inline_table_keyvals");
494     if (keyvals.empty) return;
495     processTomlInlineTableKeyvals(keyvals[0], dest, address, keysCompleted);
496 }
497 
498 
499 private void processTomlArray(S)(
500     ParseTree pt,
501     ref S dest,
502     string[] address,
503     ref bool[string[]] keysCompleted,
504 )
505 in (pt.name == "TomlGrammar.array", `Expected "TomlGrammar.array" but got "` ~ pt.name ~ `".`)
506 {
507     string[] typeRules;
508 
509     ParseTree[] consumeArrayValues(ParseTree arrayValuesPT, ParseTree[] acc)
510     in (arrayValuesPT.name == "TomlGrammar.array_values")
511     in (acc.all!(e => e.name == "TomlGrammar.val" ))
512     out (ret; ret.all!(e => e.name == "TomlGrammar.val"))
513     {
514         static string[] getTypeRules(ParseTree valPT)
515         {
516             static string[] _getTypeRules(ParseTree valPT, string fullMatch, string[] acc)
517             {
518                 if (
519                     valPT.input[valPT.begin .. valPT.end] != fullMatch ||
520                     !([
521                         "TomlGrammar.string_",
522                         "TomlGrammar.boolean",
523                         "TomlGrammar.array",
524                         "TomlGrammar.inline_table",
525                         "TomlGrammar.date_time",
526                         "TomlGrammar.float_",
527                         "TomlGrammar.integer",
528                         "TomlGrammar.offset_date_time",
529                         "TomlGrammar.local_date_time",
530                         "TomlGrammar.local_date",
531                         "TomlGrammar.local_time",
532                         ].canFind(valPT.name))
533                 )
534                 {
535                     return acc;
536                 }
537                 else
538                 {
539                     return _getTypeRules(valPT.children[0], fullMatch, acc ~ valPT.name);
540                 }
541             }
542 
543             // Trabampoline
544             return _getTypeRules(valPT.children[0], valPT.input[valPT.begin .. valPT.end], []);
545         }
546 
547         auto foundVal = arrayValuesPT.children.find!(e => e.name == "TomlGrammar.val");
548         assert(
549             foundVal.length != 0,
550             `Expected array to have a "TomlGrammar.val" child, but found: ` ~ arrayValuesPT.children.to!string
551         );
552         ParseTree firstValPT = foundVal[0];
553         assert(
554             firstValPT.name == "TomlGrammar.val",
555             `Expected array to have a "TomlGrammar.val" child at index 1, but found "` ~ firstValPT.name ~ `".`
556         );
557 
558         string[] currTypeRules = getTypeRules(firstValPT);
559         if (typeRules.length == 0)
560         {
561             typeRules = currTypeRules;
562         }
563         else if (typeRules != currTypeRules)
564         {
565             throw new TomlUnsupportedException(
566                 `Mixed-type arrays not yet supported. Array started with "` ~
567                 typeRules.to!string ~ `" but also contains "` ~ currTypeRules.to!string ~ `".`
568             );
569         }
570 
571         auto restFindResult = arrayValuesPT.children.find!(e => e.name == "TomlGrammar.array_values");
572 
573         if (restFindResult.length > 0)
574         {
575             ParseTree restValPT = restFindResult[0];
576             return consumeArrayValues(
577                 restValPT,
578                 acc ~ firstValPT
579             );
580         }
581         else
582         {
583             return acc ~ firstValPT;
584         }
585     }
586 
587     auto findResult = pt.children.find!(e => e.name == "TomlGrammar.array_values");
588     if (findResult.length == 0)
589     {
590         return;
591     }
592 
593     ParseTree[] valuePTs = consumeArrayValues(findResult[0], []);
594     foreach (size_t i, ParseTree valuePT; valuePTs)
595     {
596         processTomlVal(valuePT, dest, address, [i.to!string], keysCompleted);
597     }
598 }
599 
600 private string[] splitDottedKey(ParseTree pt)
601 pure
602 in (pt.name == "TomlGrammar.key")
603 {
604     return pt.children[0].name == "TomlGrammar.dotted_key" ?
605         (
606             pt.children[0]
607                 .children
608                 .filter!(e => e.name == "TomlGrammar.simple_key")
609                 .map!(e =>
610                     e.children[0].name == "TomlGrammar.quoted_key" ?
611                     e.input[e.begin + 1 .. e.end - 1] :
612                     e.input[e.begin .. e.end]
613                 )
614                 .array
615         )
616         :
617         (
618             pt.children[0].children[0].name == "TomlGrammar.quoted_key" ?
619             [ pt.input[pt.begin + 1 .. pt.end - 1] ] :
620             [ pt.input[pt.begin .. pt.end] ]
621         );
622 }
623 
624 private string[][] getAllParentTables(string[] table)
625 {
626     return cumulativeFold!((string[] previous, string current) => previous ~ current)
627                           (table[0 .. $-1], cast(string[])[])
628                           .array;
629 }