1 module toml_foolery.decode.parse_toml; 2 3 import std.algorithm; 4 import std.array; 5 import std.conv; 6 import std.datetime; 7 import std.exception : enforce; 8 import std.range; 9 import std.range.primitives; 10 import std.traits; 11 12 version(unittest) import exceeds_expectations; 13 14 import toml_foolery.decode.set_data; 15 import toml_foolery.decode.exceptions; 16 import toml_foolery.decode.types.datetime; 17 import toml_foolery.decode.types.floating_point; 18 import toml_foolery.decode.types.integer; 19 import toml_foolery.decode.types..string; 20 21 import toml_foolery.decode.peg_grammar; 22 23 24 /** 25 * Decodes a TOML string 26 * 27 * Params: 28 * toml = A string containing TOML data. 29 * 30 * dest = The struct into which the parsed TOML data should be placed. 31 * 32 * T = The type of struct to create and return. 33 * 34 * 35 * Returns: 36 * The `dest` parameter, populated with data read from `toml`. 37 * 38 * Throws: 39 * TomlSyntaxException if the given data is invalid TOML. 40 * TomlDuplicateNameException if the given data contains duplicate key or table names. 41 * TomlUnsupportedException if the given data contains TOML features not yet supported by the library. 42 * TomlInvalidValueException if the given data contains invalid values (e.g. a date with an invalid month). 43 * TomlTypeException if a declared key's value does not match the destination value. 44 * 45 */ 46 public void parseToml(T)(string toml, ref T dest) 47 if (is(T == struct)) 48 { 49 // version tracer is for debugging a grammar, it comes from pegged. 50 version (tracer) 51 { 52 import pegged.peg : setTraceConditionFunction; 53 54 bool cond(string ruleName, const ref ParseTree p) 55 { 56 static startTrace = false; 57 if (ruleName.startsWith("Eris.Function")) 58 startTrace = true; 59 return /* startTrace && */ ruleName.startsWith("TomlGrammar"); 60 } 61 62 setTraceConditionFunction(&cond); 63 } 64 65 ParseTree tree = TomlGrammar(toml); 66 67 if (!tree.successful) 68 { 69 throw new TomlSyntaxException( 70 "Failed to parse TOML data" 71 ); 72 } 73 74 assert( 75 tree.name == "TomlGrammar", 76 "Expected root of tree to be TomlGrammar, but got: " ~ tree.name 77 ); 78 79 assert( 80 tree.children.length == 1, 81 "Expected root of tree to have exactly one child, but got: " ~ tree.children.length.to!string 82 ); 83 84 assert( 85 tree.children[0].name == "TomlGrammar.toml", 86 "Expected only child of tree root to be TomlGrammar.toml, but got: " ~ tree.name 87 ); 88 89 ParseTree[] lines = tree.children[0].children; 90 91 // Fully-qualified key names are added to `keysCompleted` once 92 // they are written-to. The only exception is the names of 93 // array-of-tables, because we won't know that an array-of-tables 94 // has been completed until we reach the end of the document. 95 // Instead, we add the names of arrays-of-tables to 96 // `tableArrayCounts`. 97 bool[string[]] keysCompleted; 98 size_t[string[]] tableArrayCounts; 99 100 // A set containing all things that are known to be tables, even 101 // if they haven't been completed yet, and even if they have only 102 // been implicitly declared. 103 bool[string[]] tables; 104 105 string[] tableAddress; 106 107 foreach (ParseTree line; lines) 108 { 109 if(line.name != "TomlGrammar.expression") 110 { 111 throw new TomlSyntaxException( 112 "Invalid TOML data. Expected a TomlGrammar.expression, but got: " ~ 113 line.name ~ "\n Full tree:\n" ~ tree.toString() 114 ); 115 } 116 117 lineLoop: 118 foreach (ParseTree partOfLine; line.children) 119 { 120 switch (partOfLine.name) 121 { 122 case "TomlGrammar.keyval": 123 processTomlKeyval(partOfLine, dest, tableAddress, keysCompleted); 124 break; 125 126 case "TomlGrammar.table": 127 128 // Add the previous table to the keys-completed 129 // set. Even if it was an array of tables, the key 130 // contains the index, so it was still completed. 131 if (tableAddress != tableAddress.init && tableAddress !in tableArrayCounts) 132 { 133 keysCompleted[tableAddress.idup] = true; 134 } 135 136 tableAddress = 137 partOfLine 138 .children[0] 139 .children 140 .find!(e => e.name == "TomlGrammar.key")[0] 141 .splitDottedKey; 142 143 // Add all the implicitly defined regular tables. 144 foreach (string[] parentTable; getAllParentTables(tableAddress)) 145 { 146 tables[parentTable.idup] = true; 147 } 148 149 if (partOfLine.children[0].name == "TomlGrammar.array_table") 150 { 151 if (tableAddress in tables) 152 { 153 throw new TomlDuplicateNameException( 154 "Attempt to re-define table `" ~ tableAddress.join(".") ~ "` as an array-of-tables." 155 ); 156 } 157 158 if (tableAddress in keysCompleted && tableAddress !in tableArrayCounts) 159 { 160 throw new TomlDuplicateNameException( 161 "Attempt to re-define table `" ~ tableAddress.join(".") ~ "` as an array-of-tables." 162 ); 163 } 164 165 if (tableAddress !in tableArrayCounts) 166 { 167 tableArrayCounts[tableAddress.idup] = 1; 168 } 169 else 170 { 171 tableArrayCounts[tableAddress.idup]++; 172 } 173 174 size_t tableArrayCurrentIndex = (tableArrayCounts[tableAddress] - 1); 175 tableAddress ~= tableArrayCurrentIndex.to!string; 176 177 } 178 else if (partOfLine.children[0].name == "TomlGrammar.std_table") 179 { 180 tables[tableAddress.idup] = true; 181 182 if (tableAddress in tableArrayCounts) 183 { 184 throw new TomlDuplicateNameException( 185 `The name "` ~ tableAddress.join('.') ~ 186 `" was already used for an array-of-tables, but is now being used for a table.` 187 ); 188 } 189 } 190 else assert(false, "Unknown table type: " ~ partOfLine.children[0].name); 191 192 if (tableAddress in keysCompleted) 193 { 194 throw new TomlDuplicateNameException( 195 `Table "` ~ tableAddress.join('.') ~ `" has been declared twice.` 196 ); 197 } 198 199 break; 200 201 default: 202 continue lineLoop; 203 } 204 } 205 } 206 207 version (tracer) 208 { 209 // Does not need to be in version(tracer) necessarily, but I figure if you 210 // want the tracer, you want the HTML. 211 // Be warned that toHTML breaks when encountering non-ASCII UTF-8 codepoints. 212 import pegged.tohtml : toHTML, Expand; 213 toHTML! 214 (Expand.ifNotMatch,".comment", ".simple_key", ".basic_string", ".literal_string", ".expression") 215 (tree, "hard_example_toml.html"); 216 } 217 } 218 219 /// ditto 220 public T parseToml(T)(string toml) 221 { 222 T dest; 223 parseToml(toml, dest); 224 return dest; 225 } 226 227 /// A simple example of `parseToml` with an array of tables. 228 @("A simple example of `parseToml` with an array of tables.") 229 unittest 230 { 231 struct Configuration 232 { 233 struct Account 234 { 235 string username; 236 ulong id; 237 } 238 239 string serverAddress; 240 int port; 241 Account[] accounts; 242 } 243 244 string data = ` 245 246 serverAddress = "127.0.0.1" 247 port = 11000 248 249 [[accounts]] 250 username = "Tom" 251 id = 0x827e7b52 252 253 [[accounts]] 254 username = "Jerry" 255 id = 0x99134cce 256 257 `; 258 259 Configuration config = parseToml!Configuration(data); 260 261 expect(config).toEqual( 262 Configuration( 263 "127.0.0.1", 264 11_000, 265 [ 266 Configuration.Account("Tom", 0x827e7b52), 267 Configuration.Account("Jerry", 0x99134cce) 268 ] 269 ) 270 ); 271 } 272 273 /// Syntactically invalid TOML results in an exception. 274 @("Syntactically invalid TOML results in an exception.") 275 unittest 276 { 277 struct S {} 278 279 try 280 { 281 parseToml!S(`[[[bad`); 282 assert(false, "Expected a TomlDecodingException to be thrown."); 283 } 284 catch (TomlDecodingException e) 285 { 286 // As expected. 287 } 288 } 289 290 /// Duplicate key names result in an exception. 291 @("Duplicate key names result in an exception.") 292 unittest 293 { 294 struct S 295 { 296 int x; 297 } 298 299 try 300 { 301 S s = parseToml!S(` 302 x = 5 303 x = 10 304 `); 305 assert(false, "Expected a TomlDecodingException to be thrown."); 306 } 307 catch (TomlDecodingException e) 308 { 309 // As expected 310 } 311 } 312 313 /// Duplicate table names result in an exception. 314 @("Duplicate table names result in an exception.") 315 unittest 316 { 317 struct S 318 { 319 struct Fruit { string apple; string orange; } 320 321 Fruit fruit; 322 } 323 324 try 325 { 326 S s = parseToml!S(` 327 [fruit] 328 apple = "red" 329 330 [fruit] 331 orange = "orange" 332 `); 333 assert(false, "Expected a TomlDecodingException to be thrown."); 334 } 335 catch (TomlDecodingException e) 336 { 337 // As expected 338 } 339 } 340 341 private void processTomlKeyval(S)( 342 ParseTree pt, 343 ref S dest, 344 string[] tableAddress, 345 ref bool[string[]] keysCompleted, 346 ) 347 in (pt.name == "TomlGrammar.keyval") 348 { 349 processTomlVal(pt.children[2], dest, tableAddress, splitDottedKey(pt.children[0]), keysCompleted); 350 } 351 352 private void processTomlVal(S)( 353 ParseTree pt, 354 ref S dest, 355 string[] tableAddress, 356 string[] keyAddressInTable, 357 ref bool[string[]] keysCompleted, 358 ) 359 in (pt.name == "TomlGrammar.val") 360 { 361 string[] address = tableAddress ~ keyAddressInTable; 362 363 if (address in keysCompleted) 364 { 365 throw new TomlDuplicateNameException(`Duplicate key: "` ~ address.join('.') ~ `"`); 366 } 367 368 // For compliance with: 369 // - validator/toml-test/tests/invalid/table/injection-1.toml 370 // - validator/toml-test/tests/invalid/table/injection-2.toml 371 // 372 // e.g. for the address "a.b.c.d", check if we've already defined 373 // a table named "a" or "a.b" or "a.b.c" or "a.b.c.d". 374 string[][] tableAddressesToCheckFor = getAllParentTables(address); 375 376 foreach (string[] tableAddressToCheckFor; tableAddressesToCheckFor) 377 { 378 if (tableAddressToCheckFor in keysCompleted) 379 { 380 throw new TomlDecodingException( 381 "Attempted injection into table `" ~ tableAddressToCheckFor.join(".") ~ 382 "` via key `" ~ address.join(".") ~ "`." 383 ); 384 } 385 } 386 387 string value = pt.input[pt.begin .. pt.end]; 388 389 ParseTree typedValPT = pt.children[0]; 390 391 switch (typedValPT.name) 392 { 393 case "TomlGrammar.integer": 394 setData(dest, address, parseTomlInteger(value)); 395 break; 396 397 case "TomlGrammar.float_": 398 setData(dest, address, parseTomlFloat(value)); 399 break; 400 401 case "TomlGrammar.boolean": 402 setData(dest, address, value.to!bool); 403 break; 404 405 case "TomlGrammar.string_": 406 setData(dest, address, parseTomlString(value)); 407 break; 408 409 case "TomlGrammar.date_time": 410 processTomlDateTime(typedValPT, dest, address); 411 break; 412 413 case "TomlGrammar.array": 414 processTomlArray(typedValPT, dest, address, keysCompleted); 415 break; 416 417 case "TomlGrammar.inline_table": 418 processTomlInlineTable(typedValPT, dest, address, keysCompleted); 419 break; 420 421 default: 422 debug { assert(false, "Unsupported TomlGrammar rule: \"" ~ pt.children[0].name ~ "\""); } 423 else { break; } 424 } 425 426 keysCompleted[address.idup] = true; 427 } 428 429 430 private void processTomlDateTime(S)(ParseTree pt, ref S dest, string[] address) 431 in (pt.name == "TomlGrammar.date_time") 432 { 433 import core.time : TimeException; 434 435 string value = pt.input[pt.begin .. pt.end]; 436 437 try 438 { 439 string dateTimeType = pt.children[0].name; 440 switch (dateTimeType) 441 { 442 case "TomlGrammar.offset_date_time": 443 setData(dest, address, parseTomlOffsetDateTime(value)); 444 break; 445 446 case "TomlGrammar.local_date_time": 447 setData(dest, address, parseTomlLocalDateTime(value)); 448 break; 449 450 case "TomlGrammar.local_date": 451 setData(dest, address, parseTomlLocalDate(value)); 452 break; 453 454 case "TomlGrammar.local_time": 455 setData(dest, address, parseTomlLocalTime(value)); 456 break; 457 458 default: 459 assert(false, "Unsupported TOML date_time sub-type: " ~ dateTimeType); 460 } 461 } 462 catch (TimeException e) 463 { 464 throw new TomlInvalidValueException( 465 "Invalid date/time: " ~ value, e 466 ); 467 } 468 } 469 470 471 private void processTomlInlineTable(S)( 472 ParseTree pt, 473 ref S dest, 474 string[] address, 475 ref bool[string[]] keysCompleted, 476 ) 477 in (pt.name == "TomlGrammar.inline_table", `Expected "TomlGrammar.inline_table" but got "` ~ pt.name ~ `".`) 478 { 479 void processTomlInlineTableKeyvals(S)( 480 ParseTree pt, 481 ref S dest, 482 string[] address, 483 ref bool[string[]] keysCompleted, 484 ) 485 in (pt.name == "TomlGrammar.inline_table_keyvals") 486 { 487 processTomlKeyval(pt.children.find!(e => e.name == "TomlGrammar.keyval")[0], dest, address, keysCompleted); 488 ParseTree[] keyvals = pt.children.find!(e => e.name == "TomlGrammar.inline_table_keyvals"); 489 if (keyvals.empty) return; 490 processTomlInlineTableKeyvals(keyvals[0], dest, address, keysCompleted); 491 } 492 493 ParseTree[] keyvals = pt.children.find!(e => e.name == "TomlGrammar.inline_table_keyvals"); 494 if (keyvals.empty) return; 495 processTomlInlineTableKeyvals(keyvals[0], dest, address, keysCompleted); 496 } 497 498 499 private void processTomlArray(S)( 500 ParseTree pt, 501 ref S dest, 502 string[] address, 503 ref bool[string[]] keysCompleted, 504 ) 505 in (pt.name == "TomlGrammar.array", `Expected "TomlGrammar.array" but got "` ~ pt.name ~ `".`) 506 { 507 string[] typeRules; 508 509 ParseTree[] consumeArrayValues(ParseTree arrayValuesPT, ParseTree[] acc) 510 in (arrayValuesPT.name == "TomlGrammar.array_values") 511 in (acc.all!(e => e.name == "TomlGrammar.val" )) 512 out (ret; ret.all!(e => e.name == "TomlGrammar.val")) 513 { 514 static string[] getTypeRules(ParseTree valPT) 515 { 516 static string[] _getTypeRules(ParseTree valPT, string fullMatch, string[] acc) 517 { 518 if ( 519 valPT.input[valPT.begin .. valPT.end] != fullMatch || 520 !([ 521 "TomlGrammar.string_", 522 "TomlGrammar.boolean", 523 "TomlGrammar.array", 524 "TomlGrammar.inline_table", 525 "TomlGrammar.date_time", 526 "TomlGrammar.float_", 527 "TomlGrammar.integer", 528 "TomlGrammar.offset_date_time", 529 "TomlGrammar.local_date_time", 530 "TomlGrammar.local_date", 531 "TomlGrammar.local_time", 532 ].canFind(valPT.name)) 533 ) 534 { 535 return acc; 536 } 537 else 538 { 539 return _getTypeRules(valPT.children[0], fullMatch, acc ~ valPT.name); 540 } 541 } 542 543 // Trabampoline 544 return _getTypeRules(valPT.children[0], valPT.input[valPT.begin .. valPT.end], []); 545 } 546 547 auto foundVal = arrayValuesPT.children.find!(e => e.name == "TomlGrammar.val"); 548 assert( 549 foundVal.length != 0, 550 `Expected array to have a "TomlGrammar.val" child, but found: ` ~ arrayValuesPT.children.to!string 551 ); 552 ParseTree firstValPT = foundVal[0]; 553 assert( 554 firstValPT.name == "TomlGrammar.val", 555 `Expected array to have a "TomlGrammar.val" child at index 1, but found "` ~ firstValPT.name ~ `".` 556 ); 557 558 string[] currTypeRules = getTypeRules(firstValPT); 559 if (typeRules.length == 0) 560 { 561 typeRules = currTypeRules; 562 } 563 else if (typeRules != currTypeRules) 564 { 565 throw new TomlUnsupportedException( 566 `Mixed-type arrays not yet supported. Array started with "` ~ 567 typeRules.to!string ~ `" but also contains "` ~ currTypeRules.to!string ~ `".` 568 ); 569 } 570 571 auto restFindResult = arrayValuesPT.children.find!(e => e.name == "TomlGrammar.array_values"); 572 573 if (restFindResult.length > 0) 574 { 575 ParseTree restValPT = restFindResult[0]; 576 return consumeArrayValues( 577 restValPT, 578 acc ~ firstValPT 579 ); 580 } 581 else 582 { 583 return acc ~ firstValPT; 584 } 585 } 586 587 auto findResult = pt.children.find!(e => e.name == "TomlGrammar.array_values"); 588 if (findResult.length == 0) 589 { 590 return; 591 } 592 593 ParseTree[] valuePTs = consumeArrayValues(findResult[0], []); 594 foreach (size_t i, ParseTree valuePT; valuePTs) 595 { 596 processTomlVal(valuePT, dest, address, [i.to!string], keysCompleted); 597 } 598 } 599 600 private string[] splitDottedKey(ParseTree pt) 601 pure 602 in (pt.name == "TomlGrammar.key") 603 { 604 return pt.children[0].name == "TomlGrammar.dotted_key" ? 605 ( 606 pt.children[0] 607 .children 608 .filter!(e => e.name == "TomlGrammar.simple_key") 609 .map!(e => 610 e.children[0].name == "TomlGrammar.quoted_key" ? 611 e.input[e.begin + 1 .. e.end - 1] : 612 e.input[e.begin .. e.end] 613 ) 614 .array 615 ) 616 : 617 ( 618 pt.children[0].children[0].name == "TomlGrammar.quoted_key" ? 619 [ pt.input[pt.begin + 1 .. pt.end - 1] ] : 620 [ pt.input[pt.begin .. pt.end] ] 621 ); 622 } 623 624 private string[][] getAllParentTables(string[] table) 625 { 626 return cumulativeFold!((string[] previous, string current) => previous ~ current) 627 (table[0 .. $-1], cast(string[])[]) 628 .array; 629 }