1 // SDLang-D 2 // Written in the D programming language. 3 4 module sdlang.parser; 5 6 import std.concurrency; 7 import std.file; 8 9 import taggedalgebraic; 10 11 import sdlang.ast; 12 import sdlang.exception; 13 import sdlang.lexer; 14 import sdlang.symbol; 15 import sdlang.token; 16 import sdlang.util; 17 18 /// Returns root tag. 19 Tag parseFile(string filename) 20 { 21 auto source = cast(string)read(filename); 22 return parseSource(source, filename); 23 } 24 25 /// Returns root tag. The optional `filename` parameter can be included 26 /// so that the SDLang document's filename (if any) can be displayed with 27 /// any syntax error messages. 28 Tag parseSource(string source, string filename=null) 29 { 30 auto lexer = new Lexer(source, filename); 31 auto parser = DOMParser(lexer); 32 return parser.parseRoot(); 33 } 34 35 /++ 36 Parses an SDL document using StAX/Pull-style. Returns an InputRange with 37 element type ParserEvent. 38 39 The pullParseFile version reads a file and parses it, while pullParseSource 40 parses a string passed in. The optional `filename` parameter in pullParseSource 41 can be included so that the SDLang document's filename (if any) can be displayed 42 with any syntax error messages. 43 44 Note: The old FileStartEvent and FileEndEvent events 45 $(LINK2 https://github.com/Abscissa/SDLang-D/issues/17, were deemed unnessecary) 46 and removed as of SDLang-D v0.10.0. 47 48 Note: Previously, in SDLang-D v0.9.x, ParserEvent was a 49 $(LINK2 http://dlang.org/phobos/std_variant.html#.Algebraic, std.variant.Algebraic). 50 As of SDLang-D v0.10.0, it is now a 51 $(LINK2 https://github.com/s-ludwig/taggedalgebraic, TaggedAlgebraic), 52 so usage has changed somewhat. 53 54 Example: 55 ------------------ 56 parent 12 attr="q" { 57 childA 34 58 childB 56 59 } 60 lastTag 61 ------------------ 62 63 The ParserEvent sequence emitted for that SDL document would be as 64 follows (indented for readability): 65 ------------------ 66 TagStartEvent (parent) 67 ValueEvent (12) 68 AttributeEvent (attr, "q") 69 TagStartEvent (childA) 70 ValueEvent (34) 71 TagEndEvent 72 TagStartEvent (childB) 73 ValueEvent (56) 74 TagEndEvent 75 TagEndEvent 76 TagStartEvent (lastTag) 77 TagEndEvent 78 ------------------ 79 +/ 80 auto pullParseFile(string filename) 81 { 82 auto source = cast(string)read(filename); 83 return parseSource(source, filename); 84 } 85 86 ///ditto 87 auto pullParseSource(string source, string filename=null) 88 { 89 auto lexer = new Lexer(source, filename); 90 auto parser = PullParser(lexer); 91 return new Generator!ParserEvent({ parser.parseRoot; }); 92 } 93 94 /// 95 @("pullParseFile/pullParseSource example") 96 unittest 97 { 98 // stuff.sdl 99 immutable stuffSdl = ` 100 name "sdlang-d" 101 description "An SDL (Simple Declarative Language) library for D." 102 homepage "http://github.com/Abscissa/SDLang-D" 103 104 configuration "library" { 105 targetType "library" 106 } 107 `; 108 109 import std.stdio; 110 111 foreach(event; pullParseSource(stuffSdl)) 112 final switch(event.kind) 113 { 114 case ParserEvent.Kind.tagStart: 115 auto e = cast(TagStartEvent) event; 116 writeln("TagStartEvent: ", e.namespace, ":", e.name, " @ ", e.location); 117 break; 118 119 case ParserEvent.Kind.tagEnd: 120 auto e = cast(TagEndEvent) event; 121 writeln("TagEndEvent"); 122 break; 123 124 case ParserEvent.Kind.value: 125 auto e = cast(ValueEvent) event; 126 writeln("ValueEvent: ", e.value); 127 break; 128 129 case ParserEvent.Kind.attribute: 130 auto e = cast(AttributeEvent) event; 131 writeln("AttributeEvent: ", e.namespace, ":", e.name, "=", e.value); 132 break; 133 } 134 } 135 136 private union ParserEventUnion 137 { 138 TagStartEvent tagStart; 139 TagEndEvent tagEnd; 140 ValueEvent value; 141 AttributeEvent attribute; 142 } 143 144 /++ 145 The element of the InputRange returned by pullParseFile and pullParseSource. 146 147 This is a tagged union, built from the following: 148 ------- 149 alias ParserEvent = TaggedAlgebraic!ParserEventUnion; 150 private union ParserEventUnion 151 { 152 TagStartEvent tagStart; 153 TagEndEvent tagEnd; 154 ValueEvent value; 155 AttributeEvent attribute; 156 } 157 ------- 158 159 Note: The old FileStartEvent and FileEndEvent events 160 $(LINK2 https://github.com/Abscissa/SDLang-D/issues/17, were deemed unnessecary) 161 and removed as of SDLang-D v0.10.0. 162 163 Note: Previously, in SDLang-D v0.9.x, ParserEvent was a 164 $(LINK2 http://dlang.org/phobos/std_variant.html#.Algebraic, std.variant.Algebraic). 165 As of SDLang-D v0.10.0, it is now a 166 $(LINK2 https://github.com/s-ludwig/taggedalgebraic, TaggedAlgebraic), 167 so usage has changed somewhat. 168 +/ 169 alias ParserEvent = TaggedAlgebraic!ParserEventUnion; 170 171 /// 172 @("ParserEvent example") 173 unittest 174 { 175 // Create 176 ParserEvent event1 = TagStartEvent(); 177 ParserEvent event2 = TagEndEvent(); 178 ParserEvent event3 = ValueEvent(); 179 ParserEvent event4 = AttributeEvent(); 180 181 // Check type 182 assert(event1.kind == ParserEvent.Kind.tagStart); 183 assert(event2.kind == ParserEvent.Kind.tagEnd); 184 assert(event3.kind == ParserEvent.Kind.value); 185 assert(event4.kind == ParserEvent.Kind.attribute); 186 187 // Cast to base type 188 auto e1 = cast(TagStartEvent) event1; 189 auto e2 = cast(TagEndEvent) event2; 190 auto e3 = cast(ValueEvent) event3; 191 auto e4 = cast(AttributeEvent) event4; 192 //auto noGood = cast(AttributeEvent) event1; // AssertError: event1 is a TagStartEvent, not AttributeEvent. 193 194 // Use as base type. 195 // In many cases, no casting is even needed. 196 event1.name = "foo"; 197 //auto noGood = event3.name; // AssertError: ValueEvent doesn't have a member 'name'. 198 199 // Final switch is supported: 200 final switch(event1.kind) 201 { 202 case ParserEvent.Kind.tagStart: break; 203 case ParserEvent.Kind.tagEnd: break; 204 case ParserEvent.Kind.value: break; 205 case ParserEvent.Kind.attribute: break; 206 } 207 } 208 209 /// Event: Start of tag 210 struct TagStartEvent 211 { 212 Location location; 213 string namespace; 214 string name; 215 } 216 217 /// Event: End of tag 218 struct TagEndEvent 219 { 220 //Location location; 221 } 222 223 /// Event: Found a Value in the current tag 224 struct ValueEvent 225 { 226 Location location; 227 Value value; 228 } 229 230 /// Event: Found an Attribute in the current tag 231 struct AttributeEvent 232 { 233 Location location; 234 string namespace; 235 string name; 236 Value value; 237 } 238 239 // The actual pull parser 240 private struct PullParser 241 { 242 private Lexer lexer; 243 244 private struct IDFull 245 { 246 string namespace; 247 string name; 248 } 249 250 private void error(string msg) 251 { 252 error(lexer.front.location, msg); 253 } 254 255 private void error(Location loc, string msg) 256 { 257 throw new ParseException(loc, "Error: "~msg); 258 } 259 260 private void emit(Event)(Event event) 261 { 262 yield( ParserEvent(event) ); 263 } 264 265 /// <Root> ::= <Tags> EOF (Lookaheads: Anything) 266 private void parseRoot() 267 { 268 //trace("Starting parse of file: ", lexer.filename); 269 //trace(__FUNCTION__, ": <Root> ::= <Tags> EOF (Lookaheads: Anything)"); 270 271 auto startLocation = Location(lexer.filename, 0, 0, 0); 272 273 parseTags(); 274 275 auto token = lexer.front; 276 if(token.matches!":"()) 277 { 278 lexer.popFront(); 279 token = lexer.front; 280 if(token.matches!"Ident"()) 281 { 282 error("Missing namespace. If you don't wish to use a namespace, then say '"~token.data~"', not ':"~token.data~"'"); 283 assert(0); 284 } 285 else 286 { 287 error("Missing namespace. If you don't wish to use a namespace, then omit the ':'"); 288 assert(0); 289 } 290 } 291 else if(!token.matches!"EOF"()) 292 error("Expected a tag or end-of-file, not " ~ token.symbol.name); 293 } 294 295 /// <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value) 296 /// | EOL <Tags> (Lookaheads: EOL) 297 /// | {empty} (Lookaheads: Anything else, except '{') 298 void parseTags() 299 { 300 //trace("Enter ", __FUNCTION__); 301 while(true) 302 { 303 auto token = lexer.front; 304 if(token.matches!"Ident"() || token.matches!"Value"()) 305 { 306 //trace(__FUNCTION__, ": <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value)"); 307 parseTag(); 308 continue; 309 } 310 else if(token.matches!"EOL"()) 311 { 312 //trace(__FUNCTION__, ": <Tags> ::= EOL <Tags> (Lookaheads: EOL)"); 313 lexer.popFront(); 314 continue; 315 } 316 else if(token.matches!"{"()) 317 { 318 error("Found start of child block, but no tag name. If you intended an anonymous "~ 319 "tag, you must have at least one value before any attributes or child tags."); 320 } 321 else 322 { 323 //trace(__FUNCTION__, ": <Tags> ::= {empty} (Lookaheads: Anything else, except '{')"); 324 break; 325 } 326 } 327 } 328 329 /// <Tag> 330 /// ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident) 331 /// | <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value) 332 void parseTag() 333 { 334 auto token = lexer.front; 335 if(token.matches!"Ident"()) 336 { 337 //trace(__FUNCTION__, ": <Tag> ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident)"); 338 //trace("Found tag named: ", tag.fullName); 339 auto id = parseIDFull(); 340 emit( TagStartEvent(token.location, id.namespace, id.name) ); 341 } 342 else if(token.matches!"Value"()) 343 { 344 //trace(__FUNCTION__, ": <Tag> ::= <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value)"); 345 //trace("Found anonymous tag."); 346 emit( TagStartEvent(token.location, null, null) ); 347 } 348 else 349 error("Expected tag name or value, not " ~ token.symbol.name); 350 351 if(lexer.front.matches!"="()) 352 error("Found attribute, but no tag name. If you intended an anonymous "~ 353 "tag, you must have at least one value before any attributes."); 354 355 parseValues(); 356 parseAttributes(); 357 parseOptChild(); 358 parseTagTerminator(); 359 360 emit( TagEndEvent() ); 361 } 362 363 /// <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident) 364 IDFull parseIDFull() 365 { 366 auto token = lexer.front; 367 if(token.matches!"Ident"()) 368 { 369 //trace(__FUNCTION__, ": <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident)"); 370 lexer.popFront(); 371 return parseIDSuffix(token.data); 372 } 373 else 374 { 375 error("Expected namespace or identifier, not " ~ token.symbol.name); 376 assert(0); 377 } 378 } 379 380 /// <IDSuffix> 381 /// ::= ':' Ident (Lookaheads: ':') 382 /// ::= {empty} (Lookaheads: Anything else) 383 IDFull parseIDSuffix(string firstIdent) 384 { 385 auto token = lexer.front; 386 if(token.matches!":"()) 387 { 388 //trace(__FUNCTION__, ": <IDSuffix> ::= ':' Ident (Lookaheads: ':')"); 389 lexer.popFront(); 390 token = lexer.front; 391 if(token.matches!"Ident"()) 392 { 393 lexer.popFront(); 394 return IDFull(firstIdent, token.data); 395 } 396 else 397 { 398 error("Expected name, not " ~ token.symbol.name); 399 assert(0); 400 } 401 } 402 else 403 { 404 //trace(__FUNCTION__, ": <IDSuffix> ::= {empty} (Lookaheads: Anything else)"); 405 return IDFull("", firstIdent); 406 } 407 } 408 409 /// <Values> 410 /// ::= Value <Values> (Lookaheads: Value) 411 /// | {empty} (Lookaheads: Anything else) 412 void parseValues() 413 { 414 while(true) 415 { 416 auto token = lexer.front; 417 if(token.matches!"Value"()) 418 { 419 //trace(__FUNCTION__, ": <Values> ::= Value <Values> (Lookaheads: Value)"); 420 parseValue(); 421 continue; 422 } 423 else 424 { 425 //trace(__FUNCTION__, ": <Values> ::= {empty} (Lookaheads: Anything else)"); 426 break; 427 } 428 } 429 } 430 431 /// Handle Value terminals that aren't part of an attribute 432 void parseValue() 433 { 434 auto token = lexer.front; 435 if(token.matches!"Value"()) 436 { 437 //trace(__FUNCTION__, ": (Handle Value terminals that aren't part of an attribute)"); 438 auto value = token.value; 439 //trace("In tag '", parent.fullName, "', found value: ", value); 440 emit( ValueEvent(token.location, value) ); 441 442 lexer.popFront(); 443 } 444 else 445 error("Expected value, not "~token.symbol.name); 446 } 447 448 /// <Attributes> 449 /// ::= <Attribute> <Attributes> (Lookaheads: Ident) 450 /// | {empty} (Lookaheads: Anything else) 451 void parseAttributes() 452 { 453 while(true) 454 { 455 auto token = lexer.front; 456 if(token.matches!"Ident"()) 457 { 458 //trace(__FUNCTION__, ": <Attributes> ::= <Attribute> <Attributes> (Lookaheads: Ident)"); 459 parseAttribute(); 460 continue; 461 } 462 else 463 { 464 //trace(__FUNCTION__, ": <Attributes> ::= {empty} (Lookaheads: Anything else)"); 465 break; 466 } 467 } 468 } 469 470 /// <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident) 471 void parseAttribute() 472 { 473 //trace(__FUNCTION__, ": <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident)"); 474 auto token = lexer.front; 475 if(!token.matches!"Ident"()) 476 error("Expected attribute name, not "~token.symbol.name); 477 478 auto id = parseIDFull(); 479 480 token = lexer.front; 481 if(!token.matches!"="()) 482 error("Expected '=' after attribute name, not "~token.symbol.name); 483 484 lexer.popFront(); 485 token = lexer.front; 486 if(!token.matches!"Value"()) 487 error("Expected attribute value, not "~token.symbol.name); 488 489 //trace("In tag '", parent.fullName, "', found attribute '", attr.fullName, "'"); 490 emit( AttributeEvent(token.location, id.namespace, id.name, token.value) ); 491 492 lexer.popFront(); 493 } 494 495 /// <OptChild> 496 /// ::= '{' EOL <Tags> '}' (Lookaheads: '{') 497 /// | {empty} (Lookaheads: Anything else) 498 void parseOptChild() 499 { 500 auto token = lexer.front; 501 if(token.matches!"{") 502 { 503 //trace(__FUNCTION__, ": <OptChild> ::= '{' EOL <Tags> '}' (Lookaheads: '{')"); 504 lexer.popFront(); 505 token = lexer.front; 506 if(!token.matches!"EOL"()) 507 error("Expected newline or semicolon after '{', not "~token.symbol.name); 508 509 lexer.popFront(); 510 parseTags(); 511 512 token = lexer.front; 513 if(!token.matches!"}"()) 514 error("Expected '}' after child tags, not "~token.symbol.name); 515 lexer.popFront(); 516 } 517 else 518 { 519 //trace(__FUNCTION__, ": <OptChild> ::= {empty} (Lookaheads: Anything else)"); 520 // Do nothing, no error. 521 } 522 } 523 524 /// <TagTerminator> 525 /// ::= EOL (Lookahead: EOL) 526 /// | {empty} (Lookahead: EOF) 527 void parseTagTerminator() 528 { 529 auto token = lexer.front; 530 if(token.matches!"EOL") 531 { 532 //trace(__FUNCTION__, ": <TagTerminator> ::= EOL (Lookahead: EOL)"); 533 lexer.popFront(); 534 } 535 else if(token.matches!"EOF") 536 { 537 //trace(__FUNCTION__, ": <TagTerminator> ::= {empty} (Lookahead: EOF)"); 538 // Do nothing 539 } 540 else 541 error("Expected end of tag (newline, semicolon or end-of-file), not " ~ token.symbol.name); 542 } 543 } 544 545 private struct DOMParser 546 { 547 Lexer lexer; 548 549 Tag parseRoot() 550 { 551 auto currTag = new Tag(null, null, "root"); 552 currTag.location = Location(lexer.filename, 0, 0, 0); 553 554 auto parser = PullParser(lexer); 555 auto eventRange = new Generator!ParserEvent({ parser.parseRoot; }); 556 557 foreach(event; eventRange) 558 final switch(event.kind) 559 { 560 case ParserEvent.Kind.tagStart: 561 auto newTag = new Tag(currTag, event.namespace, event.name); 562 newTag.location = event.location; 563 564 currTag = newTag; 565 break; 566 567 case ParserEvent.Kind.tagEnd: 568 currTag = currTag.parent; 569 570 if(!currTag) 571 parser.error("Internal Error: Received an extra TagEndEvent"); 572 break; 573 574 case ParserEvent.Kind.value: 575 currTag.add((cast(ValueEvent)event).value); 576 break; 577 578 case ParserEvent.Kind.attribute: 579 auto e = cast(AttributeEvent) event; 580 auto attr = new Attribute(e.namespace, e.name, e.value, e.location); 581 currTag.add(attr); 582 break; 583 } 584 585 return currTag; 586 } 587 } 588 589 // Other parser tests are part of the AST's tests over in the ast module. 590 591 // Regression test, issue #13: https://github.com/Abscissa/SDLang-D/issues/13 592 // "Incorrectly accepts ":tagname" (blank namespace, tagname prefixed with colon)" 593 @("parser: Regression test issue #13") 594 unittest 595 { 596 import std.exception; 597 assertThrown!ParseException(parseSource(`:test`)); 598 assertThrown!ParseException(parseSource(`:4`)); 599 } 600 601 // Regression test, issue #16: https://github.com/Abscissa/SDLang-D/issues/16 602 @("parser: Regression test issue #16") 603 unittest 604 { 605 // Shouldn't crash 606 foreach(event; pullParseSource(`tag "data"`)) 607 { 608 if(event.kind == ParserEvent.Kind.tagStart) 609 auto e = cast(TagStartEvent) event; 610 } 611 } 612 613 // Regression test, issue #31: https://github.com/Abscissa/SDLang-D/issues/31 614 // "Escape sequence results in range violation error" 615 @("parser: Regression test issue #31") 616 unittest 617 { 618 // Shouldn't get a Range violation 619 parseSource(`test "\"foo\""`); 620 }