1 // SDLang-D
2 // Written in the D programming language.
3 
4 module sdlang.parser;
5 
6 import std.concurrency;
7 import std.file;
8 
9 import taggedalgebraic;
10 
11 import sdlang.ast;
12 import sdlang.exception;
13 import sdlang.lexer;
14 import sdlang.symbol;
15 import sdlang.token;
16 import sdlang.util;
17 
18 /// Returns root tag.
19 Tag parseFile(string filename)
20 {
21 	auto source = cast(string)read(filename);
22 	return parseSource(source, filename);
23 }
24 
25 /// Returns root tag. The optional `filename` parameter can be included
26 /// so that the SDLang document's filename (if any) can be displayed with
27 /// any syntax error messages.
28 Tag parseSource(string source, string filename=null)
29 {
30 	auto lexer = new Lexer(source, filename);
31 	auto parser = DOMParser(lexer);
32 	return parser.parseRoot();
33 }
34 
35 /++
36 Parses an SDL document using StAX/Pull-style. Returns an InputRange with
37 element type ParserEvent.
38 
39 The pullParseFile version reads a file and parses it, while pullParseSource
40 parses a string passed in. The optional `filename` parameter in pullParseSource
41 can be included so that the SDLang document's filename (if any) can be displayed
42 with any syntax error messages.
43 
44 Note: The old FileStartEvent and FileEndEvent events
45 $(LINK2 https://github.com/Abscissa/SDLang-D/issues/17, were deemed unnessecary)
46 and removed as of SDLang-D v0.10.0.
47 
48 Note: Previously, in SDLang-D v0.9.x, ParserEvent was a
49 $(LINK2 http://dlang.org/phobos/std_variant.html#.Algebraic, std.variant.Algebraic).
50 As of SDLang-D v0.10.0, it is now a
51 $(LINK2 https://github.com/s-ludwig/taggedalgebraic, TaggedAlgebraic),
52 so usage has changed somewhat.
53 
54 Example:
55 ------------------
56 parent 12 attr="q" {
57 	childA 34
58 	childB 56
59 }
60 lastTag
61 ------------------
62 
63 The ParserEvent sequence emitted for that SDL document would be as
64 follows (indented for readability):
65 ------------------
66 TagStartEvent (parent)
67 	ValueEvent (12)
68 	AttributeEvent (attr, "q")
69 	TagStartEvent (childA)
70 		ValueEvent (34)
71 	TagEndEvent
72 	TagStartEvent (childB)
73 		ValueEvent (56)
74 	TagEndEvent
75 TagEndEvent
76 TagStartEvent (lastTag)
77 TagEndEvent
78 ------------------
79 +/
80 auto pullParseFile(string filename)
81 {
82 	auto source = cast(string)read(filename);
83 	return parseSource(source, filename);
84 }
85 
86 ///ditto
87 auto pullParseSource(string source, string filename=null)
88 {
89 	auto lexer = new Lexer(source, filename);
90 	auto parser = PullParser(lexer);
91 	return new Generator!ParserEvent({ parser.parseRoot; });
92 }
93 
94 ///
95 @("pullParseFile/pullParseSource example")
96 unittest
97 {
98 	// stuff.sdl
99 	immutable stuffSdl = `
100 		name "sdlang-d"
101 		description "An SDL (Simple Declarative Language) library for D."
102 		homepage "http://github.com/Abscissa/SDLang-D"
103 		
104 		configuration "library" {
105 			targetType "library"
106 		}
107 	`;
108 	
109 	import std.stdio;
110 
111 	foreach(event; pullParseSource(stuffSdl))
112 	final switch(event.kind)
113 	{
114 	case ParserEvent.Kind.tagStart:
115 		auto e = cast(TagStartEvent) event;
116 		writeln("TagStartEvent: ", e.namespace, ":", e.name, " @ ", e.location);
117 		break;
118 
119 	case ParserEvent.Kind.tagEnd:
120 		auto e = cast(TagEndEvent) event;
121 		writeln("TagEndEvent");
122 		break;
123 
124 	case ParserEvent.Kind.value:
125 		auto e = cast(ValueEvent) event;
126 		writeln("ValueEvent: ", e.value);
127 		break;
128 
129 	case ParserEvent.Kind.attribute:
130 		auto e = cast(AttributeEvent) event;
131 		writeln("AttributeEvent: ", e.namespace, ":", e.name, "=", e.value);
132 		break;
133 	}
134 }
135 
136 private union ParserEventUnion
137 {
138 	TagStartEvent  tagStart;
139 	TagEndEvent    tagEnd;
140 	ValueEvent     value;
141 	AttributeEvent attribute;
142 }
143 
144 /++
145 The element of the InputRange returned by pullParseFile and pullParseSource.
146 
147 This is a tagged union, built from the following:
148 -------
149 alias ParserEvent = TaggedAlgebraic!ParserEventUnion;
150 private union ParserEventUnion
151 {
152 	TagStartEvent  tagStart;
153 	TagEndEvent    tagEnd;
154 	ValueEvent     value;
155 	AttributeEvent attribute;
156 }
157 -------
158 
159 Note: The old FileStartEvent and FileEndEvent events
160 $(LINK2 https://github.com/Abscissa/SDLang-D/issues/17, were deemed unnessecary)
161 and removed as of SDLang-D v0.10.0.
162 
163 Note: Previously, in SDLang-D v0.9.x, ParserEvent was a
164 $(LINK2 http://dlang.org/phobos/std_variant.html#.Algebraic, std.variant.Algebraic).
165 As of SDLang-D v0.10.0, it is now a
166 $(LINK2 https://github.com/s-ludwig/taggedalgebraic, TaggedAlgebraic),
167 so usage has changed somewhat.
168 +/
169 alias ParserEvent = TaggedAlgebraic!ParserEventUnion;
170 
171 ///
172 @("ParserEvent example")
173 unittest
174 {
175 	// Create
176 	ParserEvent event1 = TagStartEvent();
177 	ParserEvent event2 = TagEndEvent();
178 	ParserEvent event3 = ValueEvent();
179 	ParserEvent event4 = AttributeEvent();
180 
181 	// Check type
182 	assert(event1.kind == ParserEvent.Kind.tagStart);
183 	assert(event2.kind == ParserEvent.Kind.tagEnd);
184 	assert(event3.kind == ParserEvent.Kind.value);
185 	assert(event4.kind == ParserEvent.Kind.attribute);
186 
187 	// Cast to base type
188 	auto e1 = cast(TagStartEvent) event1;
189 	auto e2 = cast(TagEndEvent) event2;
190 	auto e3 = cast(ValueEvent) event3;
191 	auto e4 = cast(AttributeEvent) event4;
192 	//auto noGood = cast(AttributeEvent) event1; // AssertError: event1 is a TagStartEvent, not AttributeEvent.
193 
194 	// Use as base type.
195 	// In many cases, no casting is even needed.
196 	event1.name = "foo";  
197 	//auto noGood = event3.name; // AssertError: ValueEvent doesn't have a member 'name'.
198 
199 	// Final switch is supported:
200 	final switch(event1.kind)
201 	{
202 		case ParserEvent.Kind.tagStart:  break;
203 		case ParserEvent.Kind.tagEnd:    break;
204 		case ParserEvent.Kind.value:     break;
205 		case ParserEvent.Kind.attribute: break;
206 	}
207 }
208 
209 /// Event: Start of tag
210 struct TagStartEvent
211 {
212 	Location location;
213 	string namespace;
214 	string name;
215 }
216 
217 /// Event: End of tag
218 struct TagEndEvent
219 {
220 	//Location location;
221 }
222 
223 /// Event: Found a Value in the current tag
224 struct ValueEvent
225 {
226 	Location location;
227 	Value value;
228 }
229 
230 /// Event: Found an Attribute in the current tag
231 struct AttributeEvent
232 {
233 	Location location;
234 	string namespace;
235 	string name;
236 	Value value;
237 }
238 
239 // The actual pull parser
240 private struct PullParser
241 {
242 	private Lexer lexer;
243 	
244 	private struct IDFull
245 	{
246 		string namespace;
247 		string name;
248 	}
249 	
250 	private void error(string msg)
251 	{
252 		error(lexer.front.location, msg);
253 	}
254 
255 	private void error(Location loc, string msg)
256 	{
257 		throw new ParseException(loc, "Error: "~msg);
258 	}
259 	
260 	private void emit(Event)(Event event)
261 	{
262 		yield( ParserEvent(event) );
263 	}
264 	
265 	/// <Root> ::= <Tags> EOF  (Lookaheads: Anything)
266 	private void parseRoot()
267 	{
268 		//trace("Starting parse of file: ", lexer.filename);
269 		//trace(__FUNCTION__, ": <Root> ::= <Tags> EOF  (Lookaheads: Anything)");
270 
271 		auto startLocation = Location(lexer.filename, 0, 0, 0);
272 
273 		parseTags();
274 		
275 		auto token = lexer.front;
276 		if(token.matches!":"())
277 		{
278 			lexer.popFront();
279 			token = lexer.front;
280 			if(token.matches!"Ident"())
281 			{
282 				error("Missing namespace. If you don't wish to use a namespace, then say '"~token.data~"', not ':"~token.data~"'");
283 				assert(0);
284 			}
285 			else
286 			{
287 				error("Missing namespace. If you don't wish to use a namespace, then omit the ':'");
288 				assert(0);
289 			}
290 		}
291 		else if(!token.matches!"EOF"())
292 			error("Expected a tag or end-of-file, not " ~ token.symbol.name);
293 	}
294 
295 	/// <Tags> ::= <Tag> <Tags>  (Lookaheads: Ident Value)
296 	///        |   EOL   <Tags>  (Lookaheads: EOL)
297 	///        |   {empty}       (Lookaheads: Anything else, except '{')
298 	void parseTags()
299 	{
300 		//trace("Enter ", __FUNCTION__);
301 		while(true)
302 		{
303 			auto token = lexer.front;
304 			if(token.matches!"Ident"() || token.matches!"Value"())
305 			{
306 				//trace(__FUNCTION__, ": <Tags> ::= <Tag> <Tags>  (Lookaheads: Ident Value)");
307 				parseTag();
308 				continue;
309 			}
310 			else if(token.matches!"EOL"())
311 			{
312 				//trace(__FUNCTION__, ": <Tags> ::= EOL <Tags>  (Lookaheads: EOL)");
313 				lexer.popFront();
314 				continue;
315 			}
316 			else if(token.matches!"{"())
317 			{
318 				error("Found start of child block, but no tag name. If you intended an anonymous "~
319 				"tag, you must have at least one value before any attributes or child tags.");
320 			}
321 			else
322 			{
323 				//trace(__FUNCTION__, ": <Tags> ::= {empty}  (Lookaheads: Anything else, except '{')");
324 				break;
325 			}
326 		}
327 	}
328 
329 	/// <Tag>
330 	///     ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Ident)
331 	///     |   <Value>  <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Value)
332 	void parseTag()
333 	{
334 		auto token = lexer.front;
335 		if(token.matches!"Ident"())
336 		{
337 			//trace(__FUNCTION__, ": <Tag> ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Ident)");
338 			//trace("Found tag named: ", tag.fullName);
339 			auto id = parseIDFull();
340 			emit( TagStartEvent(token.location, id.namespace, id.name) );
341 		}
342 		else if(token.matches!"Value"())
343 		{
344 			//trace(__FUNCTION__, ": <Tag> ::= <Value>  <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Value)");
345 			//trace("Found anonymous tag.");
346 			emit( TagStartEvent(token.location, null, null) );
347 		}
348 		else
349 			error("Expected tag name or value, not " ~ token.symbol.name);
350 
351 		if(lexer.front.matches!"="())
352 			error("Found attribute, but no tag name. If you intended an anonymous "~
353 			"tag, you must have at least one value before any attributes.");
354 
355 		parseValues();
356 		parseAttributes();
357 		parseOptChild();
358 		parseTagTerminator();
359 		
360 		emit( TagEndEvent() );
361 	}
362 
363 	/// <IDFull> ::= Ident <IDSuffix>  (Lookaheads: Ident)
364 	IDFull parseIDFull()
365 	{
366 		auto token = lexer.front;
367 		if(token.matches!"Ident"())
368 		{
369 			//trace(__FUNCTION__, ": <IDFull> ::= Ident <IDSuffix>  (Lookaheads: Ident)");
370 			lexer.popFront();
371 			return parseIDSuffix(token.data);
372 		}
373 		else
374 		{
375 			error("Expected namespace or identifier, not " ~ token.symbol.name);
376 			assert(0);
377 		}
378 	}
379 
380 	/// <IDSuffix>
381 	///     ::= ':' Ident  (Lookaheads: ':')
382 	///     ::= {empty}    (Lookaheads: Anything else)
383 	IDFull parseIDSuffix(string firstIdent)
384 	{
385 		auto token = lexer.front;
386 		if(token.matches!":"())
387 		{
388 			//trace(__FUNCTION__, ": <IDSuffix> ::= ':' Ident  (Lookaheads: ':')");
389 			lexer.popFront();
390 			token = lexer.front;
391 			if(token.matches!"Ident"())
392 			{
393 				lexer.popFront();
394 				return IDFull(firstIdent, token.data);
395 			}
396 			else
397 			{
398 				error("Expected name, not " ~ token.symbol.name);
399 				assert(0);
400 			}
401 		}
402 		else
403 		{
404 			//trace(__FUNCTION__, ": <IDSuffix> ::= {empty}  (Lookaheads: Anything else)");
405 			return IDFull("", firstIdent);
406 		}
407 	}
408 
409 	/// <Values>
410 	///     ::= Value <Values>  (Lookaheads: Value)
411 	///     |   {empty}         (Lookaheads: Anything else)
412 	void parseValues()
413 	{
414 		while(true)
415 		{
416 			auto token = lexer.front;
417 			if(token.matches!"Value"())
418 			{
419 				//trace(__FUNCTION__, ": <Values> ::= Value <Values>  (Lookaheads: Value)");
420 				parseValue();
421 				continue;
422 			}
423 			else
424 			{
425 				//trace(__FUNCTION__, ": <Values> ::= {empty}  (Lookaheads: Anything else)");
426 				break;
427 			}
428 		}
429 	}
430 
431 	/// Handle Value terminals that aren't part of an attribute
432 	void parseValue()
433 	{
434 		auto token = lexer.front;
435 		if(token.matches!"Value"())
436 		{
437 			//trace(__FUNCTION__, ": (Handle Value terminals that aren't part of an attribute)");
438 			auto value = token.value;
439 			//trace("In tag '", parent.fullName, "', found value: ", value);
440 			emit( ValueEvent(token.location, value) );
441 			
442 			lexer.popFront();
443 		}
444 		else
445 			error("Expected value, not "~token.symbol.name);
446 	}
447 
448 	/// <Attributes>
449 	///     ::= <Attribute> <Attributes>  (Lookaheads: Ident)
450 	///     |   {empty}                   (Lookaheads: Anything else)
451 	void parseAttributes()
452 	{
453 		while(true)
454 		{
455 			auto token = lexer.front;
456 			if(token.matches!"Ident"())
457 			{
458 				//trace(__FUNCTION__, ": <Attributes> ::= <Attribute> <Attributes>  (Lookaheads: Ident)");
459 				parseAttribute();
460 				continue;
461 			}
462 			else
463 			{
464 				//trace(__FUNCTION__, ": <Attributes> ::= {empty}  (Lookaheads: Anything else)");
465 				break;
466 			}
467 		}
468 	}
469 
470 	/// <Attribute> ::= <IDFull> '=' Value  (Lookaheads: Ident)
471 	void parseAttribute()
472 	{
473 		//trace(__FUNCTION__, ": <Attribute> ::= <IDFull> '=' Value  (Lookaheads: Ident)");
474 		auto token = lexer.front;
475 		if(!token.matches!"Ident"())
476 			error("Expected attribute name, not "~token.symbol.name);
477 		
478 		auto id = parseIDFull();
479 		
480 		token = lexer.front;
481 		if(!token.matches!"="())
482 			error("Expected '=' after attribute name, not "~token.symbol.name);
483 		
484 		lexer.popFront();
485 		token = lexer.front;
486 		if(!token.matches!"Value"())
487 			error("Expected attribute value, not "~token.symbol.name);
488 		
489 		//trace("In tag '", parent.fullName, "', found attribute '", attr.fullName, "'");
490 		emit( AttributeEvent(token.location, id.namespace, id.name, token.value) );
491 		
492 		lexer.popFront();
493 	}
494 
495 	/// <OptChild>
496 	///      ::= '{' EOL <Tags> '}'  (Lookaheads: '{')
497 	///      |   {empty}             (Lookaheads: Anything else)
498 	void parseOptChild()
499 	{
500 		auto token = lexer.front;
501 		if(token.matches!"{")
502 		{
503 			//trace(__FUNCTION__, ": <OptChild> ::= '{' EOL <Tags> '}'  (Lookaheads: '{')");
504 			lexer.popFront();
505 			token = lexer.front;
506 			if(!token.matches!"EOL"())
507 				error("Expected newline or semicolon after '{', not "~token.symbol.name);
508 			
509 			lexer.popFront();
510 			parseTags();
511 			
512 			token = lexer.front;
513 			if(!token.matches!"}"())
514 				error("Expected '}' after child tags, not "~token.symbol.name);
515 			lexer.popFront();
516 		}
517 		else
518 		{
519 			//trace(__FUNCTION__, ": <OptChild> ::= {empty}  (Lookaheads: Anything else)");
520 			// Do nothing, no error.
521 		}
522 	}
523 	
524 	/// <TagTerminator>
525 	///     ::= EOL      (Lookahead: EOL)
526 	///     |   {empty}  (Lookahead: EOF)
527 	void parseTagTerminator()
528 	{
529 		auto token = lexer.front;
530 		if(token.matches!"EOL")
531 		{
532 			//trace(__FUNCTION__, ": <TagTerminator> ::= EOL  (Lookahead: EOL)");
533 			lexer.popFront();
534 		}
535 		else if(token.matches!"EOF")
536 		{
537 			//trace(__FUNCTION__, ": <TagTerminator> ::= {empty}  (Lookahead: EOF)");
538 			// Do nothing
539 		}
540 		else
541 			error("Expected end of tag (newline, semicolon or end-of-file), not " ~ token.symbol.name);
542 	}
543 }
544 
545 private struct DOMParser
546 {
547 	Lexer lexer;
548 	
549 	Tag parseRoot()
550 	{
551 		auto currTag = new Tag(null, null, "root");
552 		currTag.location = Location(lexer.filename, 0, 0, 0);
553 		
554 		auto parser = PullParser(lexer);
555 		auto eventRange = new Generator!ParserEvent({ parser.parseRoot; });
556 		
557 		foreach(event; eventRange)
558 		final switch(event.kind)
559 		{
560 		case ParserEvent.Kind.tagStart:
561 			auto newTag = new Tag(currTag, event.namespace, event.name);
562 			newTag.location = event.location;
563 			
564 			currTag = newTag;
565 			break;
566 
567 		case ParserEvent.Kind.tagEnd:
568 			currTag = currTag.parent;
569 
570 			if(!currTag)
571 				parser.error("Internal Error: Received an extra TagEndEvent");
572 			break;
573 
574 		case ParserEvent.Kind.value:
575 			currTag.add((cast(ValueEvent)event).value);
576 			break;
577 
578 		case ParserEvent.Kind.attribute:
579 			auto e = cast(AttributeEvent) event;
580 			auto attr = new Attribute(e.namespace, e.name, e.value, e.location);
581 			currTag.add(attr);
582 			break;
583 		}
584 		
585 		return currTag;
586 	}
587 }
588 
589 // Other parser tests are part of the AST's tests over in the ast module.
590 
591 // Regression test, issue #13: https://github.com/Abscissa/SDLang-D/issues/13
592 // "Incorrectly accepts ":tagname" (blank namespace, tagname prefixed with colon)"
593 @("parser: Regression test issue #13")
594 unittest
595 {
596 	import std.exception;
597 	assertThrown!ParseException(parseSource(`:test`));
598 	assertThrown!ParseException(parseSource(`:4`));
599 }
600 
601 // Regression test, issue #16: https://github.com/Abscissa/SDLang-D/issues/16
602 @("parser: Regression test issue #16")
603 unittest
604 {
605 	// Shouldn't crash
606 	foreach(event; pullParseSource(`tag "data"`))
607 	{
608 		if(event.kind == ParserEvent.Kind.tagStart)
609 			auto e = cast(TagStartEvent) event;
610 	}
611 }
612 
613 // Regression test, issue #31: https://github.com/Abscissa/SDLang-D/issues/31
614 // "Escape sequence results in range violation error"
615 @("parser: Regression test issue #31")
616 unittest
617 {
618 	// Shouldn't get a Range violation
619 	parseSource(`test "\"foo\""`);
620 }