1 // SDLang-D 2 // Written in the D programming language. 3 4 module sdlang.lexer; 5 6 import std.algorithm; 7 import std.array; 8 static import std.ascii; 9 import std.base64; 10 import std.bigint; 11 import std.conv; 12 import std.datetime; 13 import std.file; 14 import std.format; 15 import std.traits; 16 import std.typecons; 17 import std.uni; 18 import std.utf; 19 import std.variant; 20 21 import sdlang.exception; 22 import sdlang.symbol; 23 import sdlang.token; 24 import sdlang.util; 25 26 alias sdlang.util.startsWith startsWith; 27 28 Token[] lexFile(string filename) 29 { 30 auto source = cast(string)read(filename); 31 return lexSource(source, filename); 32 } 33 34 Token[] lexSource(string source, string filename=null) 35 { 36 auto lexer = scoped!Lexer(source, filename); 37 38 // Can't use 'std.array.array(Range)' because 'lexer' is scoped 39 // and therefore cannot have its reference copied. 40 Appender!(Token[]) tokens; 41 foreach(tok; lexer) 42 tokens.put(tok); 43 44 return tokens.data; 45 } 46 47 // Kind of a poor-man's yield, but fast. 48 // Only to be used inside Lexer.popFront (and Lexer.this). 49 private template accept(string symbolName) 50 { 51 static assert(symbolName != "Value", "Value symbols must also take a value."); 52 enum accept = acceptImpl!(symbolName, "null"); 53 } 54 private template accept(string symbolName, string value) 55 { 56 static assert(symbolName == "Value", "Only a Value symbol can take a value."); 57 enum accept = acceptImpl!(symbolName, value); 58 } 59 private template accept(string symbolName, string value, string startLocation, string endLocation) 60 { 61 static assert(symbolName == "Value", "Only a Value symbol can take a value."); 62 enum accept = (" 63 { 64 _front = makeToken!"~symbolName.stringof~"; 65 _front.value = "~value~"; 66 _front.location = "~(startLocation==""? "tokenStart" : startLocation)~"; 67 _front.data = source[ 68 "~(startLocation==""? "tokenStart.index" : startLocation)~" 69 .. 70 "~(endLocation==""? "location.index" : endLocation)~" 71 ]; 72 return; 73 } 74 ").replace("\n", ""); 75 } 76 private template acceptImpl(string symbolName, string value) 77 { 78 enum acceptImpl = (" 79 { 80 _front = makeToken!"~symbolName.stringof~"; 81 _front.value = "~value~"; 82 return; 83 } 84 ").replace("\n", ""); 85 } 86 87 class Lexer 88 { 89 string source; 90 string filename; 91 Location location; /// Location of current character in source 92 93 private dchar ch; // Current character 94 private dchar nextCh; // Lookahead character 95 private size_t nextPos; // Position of lookahead character (an index into source) 96 private bool hasNextCh; // If false, then there's no more lookahead, just EOF 97 private size_t posAfterLookahead; // Position after lookahead character (an index into source) 98 99 private Location tokenStart; // The starting location of the token being lexed 100 101 // Length so far of the token being lexed, not including current char 102 private size_t tokenLength; // Length in UTF-8 code units 103 private size_t tokenLength32; // Length in UTF-32 code units 104 105 // Slight kludge: 106 // If a numeric fragment is found after a Date (separated by arbitrary 107 // whitespace), it could be the "hours" part of a DateTime, or it could 108 // be a separate numeric literal that simply follows a plain Date. If the 109 // latter, then the Date must be emitted, but numeric fragment that was 110 // found after it needs to be saved for the the lexer's next iteration. 111 // 112 // It's a slight kludge, and could instead be implemented as a slightly 113 // kludgey parser hack, but it's the only situation where SDLang's lexing 114 // needs to lookahead more than one character, so this is good enough. 115 private struct LookaheadTokenInfo 116 { 117 bool exists = false; 118 string numericFragment = ""; 119 bool isNegative = false; 120 Location tokenStart; 121 } 122 private LookaheadTokenInfo lookaheadTokenInfo; 123 124 this(string source=null, string filename=null) 125 { 126 this.filename = filename; 127 this.source = source; 128 129 _front = Token(symbol!"Error", Location()); 130 lookaheadTokenInfo = LookaheadTokenInfo.init; 131 132 if( source.startsWith( ByteOrderMarks[BOM.UTF8] ) ) 133 { 134 source = source[ ByteOrderMarks[BOM.UTF8].length .. $ ]; 135 this.source = source; 136 } 137 138 foreach(bom; ByteOrderMarks) 139 if( source.startsWith(bom) ) 140 error(Location(filename,0,0,0), "SDL spec only supports UTF-8, not UTF-16 or UTF-32"); 141 142 if(source == "") 143 mixin(accept!"EOF"); 144 145 // Prime everything 146 hasNextCh = true; 147 nextCh = source.decode(posAfterLookahead); 148 advanceChar(ErrorOnEOF.Yes); 149 location = Location(filename, 0, 0, 0); 150 popFront(); 151 } 152 153 @property bool empty() 154 { 155 return _front.symbol == symbol!"EOF"; 156 } 157 158 Token _front; 159 @property Token front() 160 { 161 return _front; 162 } 163 164 @property bool isEOF() 165 { 166 return location.index == source.length && !lookaheadTokenInfo.exists; 167 } 168 169 private void error(string msg) 170 { 171 error(location, msg); 172 } 173 174 //TODO: Take varargs and use output range sink. 175 private void error(Location loc, string msg) 176 { 177 throw new ParseException(loc, "Error: "~msg); 178 } 179 180 private Token makeToken(string symbolName)() 181 { 182 auto tok = Token(symbol!symbolName, tokenStart); 183 tok.data = tokenData; 184 return tok; 185 } 186 187 private @property string tokenData() 188 { 189 return source[ tokenStart.index .. location.index ]; 190 } 191 192 /// Check the lookahead character 193 private bool lookahead(dchar ch) 194 { 195 return hasNextCh && nextCh == ch; 196 } 197 198 private bool lookahead(bool function(dchar) condition) 199 { 200 return hasNextCh && condition(nextCh); 201 } 202 203 private static bool isNewline(dchar ch) 204 { 205 return ch == '\n' || ch == '\r' || ch == lineSep || ch == paraSep; 206 } 207 208 /// Returns the length of the newline sequence, or zero if the current 209 /// character is not a newline 210 /// 211 /// Note that there are only single character sequences and the two 212 /// character sequence `\r\n` as used on Windows. 213 private size_t isAtNewline() 214 { 215 if(ch == '\n' || ch == lineSep || ch == paraSep) return 1; 216 else if(ch == '\r') return lookahead('\n') ? 2 : 1; 217 else return 0; 218 } 219 220 /// Is 'ch' a valid base 64 character? 221 private bool isBase64(dchar ch) 222 { 223 if(ch >= 'A' && ch <= 'Z') 224 return true; 225 226 if(ch >= 'a' && ch <= 'z') 227 return true; 228 229 if(ch >= '0' && ch <= '9') 230 return true; 231 232 return ch == '+' || ch == '/' || ch == '='; 233 } 234 235 /// Is the current character one that's allowed 236 /// immediately *after* an int/float literal? 237 private bool isEndOfNumber() 238 { 239 if(isEOF) 240 return true; 241 242 return !isDigit(ch) && ch != ':' && ch != '_' && !isAlpha(ch); 243 } 244 245 /// Is current character the last one in an ident? 246 private bool isEndOfIdentCached = false; 247 private bool _isEndOfIdent; 248 private bool isEndOfIdent() 249 { 250 if(!isEndOfIdentCached) 251 { 252 if(!hasNextCh) 253 _isEndOfIdent = true; 254 else 255 _isEndOfIdent = !isIdentChar(nextCh); 256 257 isEndOfIdentCached = true; 258 } 259 260 return _isEndOfIdent; 261 } 262 263 /// Is 'ch' a character that's allowed *somewhere* in an identifier? 264 private bool isIdentChar(dchar ch) 265 { 266 if(isAlpha(ch)) 267 return true; 268 269 else if(isNumber(ch)) 270 return true; 271 272 else 273 return 274 ch == '-' || 275 ch == '_' || 276 ch == '.' || 277 ch == '$'; 278 } 279 280 private bool isDigit(dchar ch) 281 { 282 return ch >= '0' && ch <= '9'; 283 } 284 285 private enum KeywordResult 286 { 287 Accept, // Keyword is matched 288 Continue, // Keyword is not matched *yet* 289 Failed, // Keyword doesn't match 290 } 291 private KeywordResult checkKeyword(dstring keyword32) 292 { 293 // Still within length of keyword 294 if(tokenLength32 < keyword32.length) 295 { 296 if(ch == keyword32[tokenLength32]) 297 return KeywordResult.Continue; 298 else 299 return KeywordResult.Failed; 300 } 301 302 // At position after keyword 303 else if(tokenLength32 == keyword32.length) 304 { 305 if(isEOF || !isIdentChar(ch)) 306 { 307 debug assert(tokenData == to!string(keyword32)); 308 return KeywordResult.Accept; 309 } 310 else 311 return KeywordResult.Failed; 312 } 313 314 assert(0, "Fell off end of keyword to check"); 315 } 316 317 enum ErrorOnEOF { No, Yes } 318 319 /// Advance one code point. 320 private void advanceChar(ErrorOnEOF errorOnEOF) 321 { 322 if(auto cnt = isAtNewline()) 323 { 324 if (cnt == 1) 325 location.line++; 326 location.col = 0; 327 } 328 else 329 location.col++; 330 331 location.index = nextPos; 332 333 nextPos = posAfterLookahead; 334 ch = nextCh; 335 336 if(!hasNextCh) 337 { 338 if(errorOnEOF == ErrorOnEOF.Yes) 339 error("Unexpected end of file"); 340 341 return; 342 } 343 344 tokenLength32++; 345 tokenLength = location.index - tokenStart.index; 346 347 if(nextPos == source.length) 348 { 349 nextCh = dchar.init; 350 hasNextCh = false; 351 return; 352 } 353 354 nextCh = source.decode(posAfterLookahead); 355 isEndOfIdentCached = false; 356 } 357 358 /// Advances the specified amount of characters 359 private void advanceChar(size_t count, ErrorOnEOF errorOnEOF) 360 { 361 while(count-- > 0) 362 advanceChar(errorOnEOF); 363 } 364 365 void popFront() 366 { 367 // -- Main Lexer ------------- 368 369 eatWhite(); 370 371 if(isEOF) 372 mixin(accept!"EOF"); 373 374 tokenStart = location; 375 tokenLength = 0; 376 tokenLength32 = 0; 377 isEndOfIdentCached = false; 378 379 if(lookaheadTokenInfo.exists) 380 { 381 tokenStart = lookaheadTokenInfo.tokenStart; 382 383 auto prevLATokenInfo = lookaheadTokenInfo; 384 lookaheadTokenInfo = LookaheadTokenInfo.init; 385 lexNumeric(prevLATokenInfo); 386 return; 387 } 388 389 if(ch == '=') 390 { 391 advanceChar(ErrorOnEOF.No); 392 mixin(accept!"="); 393 } 394 395 else if(ch == '{') 396 { 397 advanceChar(ErrorOnEOF.No); 398 mixin(accept!"{"); 399 } 400 401 else if(ch == '}') 402 { 403 advanceChar(ErrorOnEOF.No); 404 mixin(accept!"}"); 405 } 406 407 else if(ch == ':') 408 { 409 advanceChar(ErrorOnEOF.No); 410 mixin(accept!":"); 411 } 412 413 else if(ch == ';') 414 { 415 advanceChar(ErrorOnEOF.No); 416 mixin(accept!"EOL"); 417 } 418 419 else if(auto cnt = isAtNewline()) 420 { 421 advanceChar(cnt, ErrorOnEOF.No); 422 mixin(accept!"EOL"); 423 } 424 425 else if(isAlpha(ch) || ch == '_') 426 lexIdentKeyword(); 427 428 else if(ch == '"') 429 lexRegularString(); 430 431 else if(ch == '`') 432 lexRawString(); 433 434 else if(ch == '\'') 435 lexCharacter(); 436 437 else if(ch == '[') 438 lexBinary(); 439 440 else if(ch == '-' || ch == '.' || isDigit(ch)) 441 lexNumeric(); 442 443 else 444 { 445 if(ch == ',') 446 error("Unexpected comma: SDLang is not a comma-separated format."); 447 else if(std.ascii.isPrintable(ch)) 448 error(text("Unexpected: ", ch)); 449 else 450 error("Unexpected character code 0x%02X".format(ch)); 451 452 advanceChar(ErrorOnEOF.No); 453 } 454 } 455 456 /// Lex Ident or Keyword 457 private void lexIdentKeyword() 458 { 459 assert(isAlpha(ch) || ch == '_'); 460 461 // Keyword 462 struct Key 463 { 464 dstring name; 465 Value value; 466 bool failed = false; 467 } 468 static Key[5] keywords; 469 static keywordsInited = false; 470 if(!keywordsInited) 471 { 472 // Value (as a std.variant-based type) can't be statically inited 473 keywords[0] = Key("true", Value(true )); 474 keywords[1] = Key("false", Value(false)); 475 keywords[2] = Key("on", Value(true )); 476 keywords[3] = Key("off", Value(false)); 477 keywords[4] = Key("null", Value(null )); 478 keywordsInited = true; 479 } 480 481 foreach(ref key; keywords) 482 key.failed = false; 483 484 auto numKeys = keywords.length; 485 486 do 487 { 488 foreach(ref key; keywords) 489 if(!key.failed) 490 { 491 final switch(checkKeyword(key.name)) 492 { 493 case KeywordResult.Accept: 494 mixin(accept!("Value", "key.value")); 495 496 case KeywordResult.Continue: 497 break; 498 499 case KeywordResult.Failed: 500 key.failed = true; 501 numKeys--; 502 break; 503 } 504 } 505 506 if(numKeys == 0) 507 { 508 lexIdent(); 509 return; 510 } 511 512 advanceChar(ErrorOnEOF.No); 513 514 } while(!isEOF); 515 516 foreach(ref key; keywords) 517 if(!key.failed) 518 if(key.name.length == tokenLength32+1) 519 mixin(accept!("Value", "key.value")); 520 521 mixin(accept!"Ident"); 522 } 523 524 /// Lex Ident 525 private void lexIdent() 526 { 527 if(tokenLength == 0) 528 assert(isAlpha(ch) || ch == '_'); 529 530 while(!isEOF && isIdentChar(ch)) 531 advanceChar(ErrorOnEOF.No); 532 533 mixin(accept!"Ident"); 534 } 535 536 /// Lex regular string 537 private void lexRegularString() 538 { 539 assert(ch == '"'); 540 541 Appender!string buf; 542 size_t spanStart = nextPos; 543 544 // Doesn't include current character 545 void updateBuf() 546 { 547 if(location.index == spanStart) 548 return; 549 550 buf.put( source[spanStart..location.index] ); 551 } 552 553 advanceChar(ErrorOnEOF.Yes); 554 while(ch != '"') 555 { 556 if(ch == '\\') 557 { 558 updateBuf(); 559 560 bool wasEscSequence = true; 561 if(hasNextCh) 562 { 563 switch(nextCh) 564 { 565 case 'n': buf.put('\n'); break; 566 case 'r': buf.put('\r'); break; 567 case 't': buf.put('\t'); break; 568 case '"': buf.put('\"'); break; 569 case '\\': buf.put('\\'); break; 570 default: wasEscSequence = false; break; 571 } 572 } 573 574 if(wasEscSequence) 575 { 576 advanceChar(ErrorOnEOF.Yes); 577 spanStart = nextPos; 578 } 579 else 580 { 581 eatWhite(false); 582 spanStart = location.index; 583 } 584 } 585 586 else if(isNewline(ch)) 587 error("Unescaped newlines are only allowed in raw strings, not regular strings."); 588 589 advanceChar(ErrorOnEOF.Yes); 590 } 591 592 updateBuf(); 593 advanceChar(ErrorOnEOF.No); // Skip closing double-quote 594 mixin(accept!("Value", "buf.data")); 595 } 596 597 /// Lex raw string 598 private void lexRawString() 599 { 600 assert(ch == '`'); 601 602 do 603 advanceChar(ErrorOnEOF.Yes); 604 while(ch != '`'); 605 606 advanceChar(ErrorOnEOF.No); // Skip closing back-tick 607 mixin(accept!("Value", "tokenData[1..$-1]")); 608 } 609 610 /// Lex character literal 611 private void lexCharacter() 612 { 613 assert(ch == '\''); 614 advanceChar(ErrorOnEOF.Yes); // Skip opening single-quote 615 616 dchar value; 617 if(ch == '\\') 618 { 619 advanceChar(ErrorOnEOF.Yes); // Skip escape backslash 620 switch(ch) 621 { 622 case 'n': value = '\n'; break; 623 case 'r': value = '\r'; break; 624 case 't': value = '\t'; break; 625 case '\'': value = '\''; break; 626 case '\\': value = '\\'; break; 627 default: error("Invalid escape sequence."); 628 } 629 } 630 else if(isNewline(ch)) 631 error("Newline not alowed in character literal."); 632 else 633 value = ch; 634 advanceChar(ErrorOnEOF.Yes); // Skip the character itself 635 636 if(ch == '\'') 637 advanceChar(ErrorOnEOF.No); // Skip closing single-quote 638 else 639 error("Expected closing single-quote."); 640 641 mixin(accept!("Value", "value")); 642 } 643 644 /// Lex base64 binary literal 645 private void lexBinary() 646 { 647 assert(ch == '['); 648 advanceChar(ErrorOnEOF.Yes); 649 650 void eatBase64Whitespace() 651 { 652 while(!isEOF && isWhite(ch)) 653 { 654 if(isNewline(ch)) 655 advanceChar(ErrorOnEOF.Yes); 656 657 if(!isEOF && isWhite(ch)) 658 eatWhite(); 659 } 660 } 661 662 eatBase64Whitespace(); 663 664 // Iterates all valid base64 characters, ending at ']'. 665 // Skips all whitespace. Throws on invalid chars. 666 struct Base64InputRange 667 { 668 Lexer lexer; 669 private bool isInited = false; 670 private int numInputCharsMod4 = 0; 671 672 @property bool empty() 673 { 674 if(lexer.ch == ']') 675 { 676 if(numInputCharsMod4 != 0) 677 lexer.error("Length of Base64 encoding must be a multiple of 4. ("~to!string(numInputCharsMod4)~")"); 678 679 return true; 680 } 681 682 return false; 683 } 684 685 @property dchar front() 686 { 687 return lexer.ch; 688 } 689 690 void popFront() 691 { 692 auto lex = lexer; 693 694 if(!isInited) 695 { 696 if(lexer.isBase64(lexer.ch)) 697 { 698 numInputCharsMod4++; 699 numInputCharsMod4 %= 4; 700 } 701 702 isInited = true; 703 } 704 705 lex.advanceChar(lex.ErrorOnEOF.Yes); 706 707 eatBase64Whitespace(); 708 709 if(lex.isEOF) 710 lex.error("Unexpected end of file."); 711 712 if(lex.ch != ']') 713 { 714 if(!lex.isBase64(lex.ch)) 715 lex.error("Invalid character in base64 binary literal."); 716 717 numInputCharsMod4++; 718 numInputCharsMod4 %= 4; 719 } 720 } 721 } 722 723 // This is a slow ugly hack. It's necessary because Base64.decode 724 // currently requires the source to have known length. 725 //TODO: Remove this when DMD issue #9543 is fixed. 726 dchar[] tmpBuf = array(Base64InputRange(this)); 727 728 Appender!(ubyte[]) outputBuf; 729 // Ugly workaround for DMD issue #9102 730 //TODO: Remove this when DMD #9102 is fixed 731 struct OutputBuf 732 { 733 void put(ubyte ch) 734 { 735 outputBuf.put(ch); 736 } 737 } 738 739 try 740 //Base64.decode(Base64InputRange(this), OutputBuf()); 741 Base64.decode(tmpBuf, OutputBuf()); 742 743 catch(Base64Exception e) 744 error("Invalid character in base64 binary literal."); 745 746 advanceChar(ErrorOnEOF.No); // Skip ']' 747 mixin(accept!("Value", "outputBuf.data")); 748 } 749 750 private BigInt toBigInt(bool isNegative, string absValue) 751 { 752 auto num = BigInt(absValue); 753 assert(num >= 0); 754 755 if(isNegative) 756 num = -num; 757 758 return num; 759 } 760 761 /// Lex [0-9]+, but without emitting a token. 762 /// This is used by the other numeric parsing functions. 763 private string lexNumericFragment() 764 { 765 if(!isDigit(ch)) 766 error("Expected a digit 0-9."); 767 768 auto spanStart = location.index; 769 770 do 771 { 772 advanceChar(ErrorOnEOF.No); 773 } while(!isEOF && isDigit(ch)); 774 775 return source[spanStart..location.index]; 776 } 777 778 /// Lex anything that starts with 0-9 or '-'. Ints, floats, dates, etc. 779 private void lexNumeric(LookaheadTokenInfo laTokenInfo = LookaheadTokenInfo.init) 780 { 781 bool isNegative; 782 string firstFragment; 783 if(laTokenInfo.exists) 784 { 785 firstFragment = laTokenInfo.numericFragment; 786 isNegative = laTokenInfo.isNegative; 787 } 788 else 789 { 790 assert(ch == '-' || ch == '.' || isDigit(ch)); 791 792 // Check for negative 793 isNegative = ch == '-'; 794 if(isNegative) 795 advanceChar(ErrorOnEOF.Yes); 796 797 // Some floating point with omitted leading zero? 798 if(ch == '.') 799 { 800 lexFloatingPoint(""); 801 return; 802 } 803 804 firstFragment = lexNumericFragment(); 805 } 806 807 // Long integer (64-bit signed)? 808 if(ch == 'L' || ch == 'l') 809 { 810 advanceChar(ErrorOnEOF.No); 811 812 // BigInt(long.min) is a workaround for DMD issue #9548 813 auto num = toBigInt(isNegative, firstFragment); 814 if(num < BigInt(long.min) || num > long.max) 815 error(tokenStart, "Value doesn't fit in 64-bit signed long integer: "~to!string(num)); 816 817 mixin(accept!("Value", "num.toLong()")); 818 } 819 820 // Float (32-bit signed)? 821 else if(ch == 'F' || ch == 'f') 822 { 823 auto value = to!float(tokenData); 824 advanceChar(ErrorOnEOF.No); 825 mixin(accept!("Value", "value")); 826 } 827 828 // Double float (64-bit signed) with suffix? 829 else if((ch == 'D' || ch == 'd') && !lookahead(':') 830 ) 831 { 832 auto value = to!double(tokenData); 833 advanceChar(ErrorOnEOF.No); 834 mixin(accept!("Value", "value")); 835 } 836 837 // Decimal (128+ bits signed)? 838 else if( 839 (ch == 'B' || ch == 'b') && 840 (lookahead('D') || lookahead('d')) 841 ) 842 { 843 auto value = to!real(tokenData); 844 advanceChar(ErrorOnEOF.No); 845 advanceChar(ErrorOnEOF.No); 846 mixin(accept!("Value", "value")); 847 } 848 849 // Some floating point? 850 else if(ch == '.') 851 lexFloatingPoint(firstFragment); 852 853 // Some date? 854 else if(ch == '/' && hasNextCh && isDigit(nextCh)) 855 lexDate(isNegative, firstFragment); 856 857 // Some time span? 858 else if(ch == ':' || ch == 'd') 859 lexTimeSpan(isNegative, firstFragment); 860 861 // Integer (32-bit signed)? 862 else if(isEndOfNumber()) 863 { 864 auto num = toBigInt(isNegative, firstFragment); 865 if(num < int.min || num > int.max) 866 error(tokenStart, "Value doesn't fit in 32-bit signed integer: "~to!string(num)); 867 868 mixin(accept!("Value", "num.toInt()")); 869 } 870 871 // Invalid suffix 872 else 873 error("Invalid integer suffix."); 874 } 875 876 /// Lex any floating-point literal (after the initial numeric fragment was lexed) 877 private void lexFloatingPoint(string firstPart) 878 { 879 assert(ch == '.'); 880 advanceChar(ErrorOnEOF.No); 881 882 auto secondPart = lexNumericFragment(); 883 884 try 885 { 886 // Double float (64-bit signed) with suffix? 887 if(ch == 'D' || ch == 'd') 888 { 889 auto value = to!double(tokenData); 890 advanceChar(ErrorOnEOF.No); 891 mixin(accept!("Value", "value")); 892 } 893 894 // Float (32-bit signed)? 895 else if(ch == 'F' || ch == 'f') 896 { 897 auto value = to!float(tokenData); 898 advanceChar(ErrorOnEOF.No); 899 mixin(accept!("Value", "value")); 900 } 901 902 // Decimal (128+ bits signed)? 903 else if(ch == 'B' || ch == 'b') 904 { 905 auto value = to!real(tokenData); 906 advanceChar(ErrorOnEOF.Yes); 907 908 if(!isEOF && (ch == 'D' || ch == 'd')) 909 { 910 advanceChar(ErrorOnEOF.No); 911 if(isEndOfNumber()) 912 mixin(accept!("Value", "value")); 913 } 914 915 error("Invalid floating point suffix."); 916 } 917 918 // Double float (64-bit signed) without suffix? 919 else if(isEOF || !isIdentChar(ch)) 920 { 921 auto value = to!double(tokenData); 922 mixin(accept!("Value", "value")); 923 } 924 925 // Invalid suffix 926 else 927 error("Invalid floating point suffix."); 928 } 929 catch(ConvException e) 930 error("Invalid floating point literal."); 931 } 932 933 private Date makeDate(bool isNegative, string yearStr, string monthStr, string dayStr) 934 { 935 BigInt biTmp; 936 937 biTmp = BigInt(yearStr); 938 if(isNegative) 939 biTmp = -biTmp; 940 if(biTmp < int.min || biTmp > int.max) 941 error(tokenStart, "Date's year is out of range. (Must fit within a 32-bit signed int.)"); 942 auto year = biTmp.toInt(); 943 944 biTmp = BigInt(monthStr); 945 if(biTmp < 1 || biTmp > 12) 946 error(tokenStart, "Date's month is out of range."); 947 auto month = biTmp.toInt(); 948 949 biTmp = BigInt(dayStr); 950 if(biTmp < 1 || biTmp > 31) 951 error(tokenStart, "Date's month is out of range."); 952 auto day = biTmp.toInt(); 953 954 return Date(year, month, day); 955 } 956 957 private DateTimeFrac makeDateTimeFrac( 958 bool isNegative, Date date, string hourStr, string minuteStr, 959 string secondStr, string millisecondStr 960 ) 961 { 962 BigInt biTmp; 963 964 biTmp = BigInt(hourStr); 965 if(biTmp < int.min || biTmp > int.max) 966 error(tokenStart, "Datetime's hour is out of range."); 967 auto numHours = biTmp.toInt(); 968 969 biTmp = BigInt(minuteStr); 970 if(biTmp < 0 || biTmp > int.max) 971 error(tokenStart, "Datetime's minute is out of range."); 972 auto numMinutes = biTmp.toInt(); 973 974 int numSeconds = 0; 975 if(secondStr != "") 976 { 977 biTmp = BigInt(secondStr); 978 if(biTmp < 0 || biTmp > int.max) 979 error(tokenStart, "Datetime's second is out of range."); 980 numSeconds = biTmp.toInt(); 981 } 982 983 int millisecond = 0; 984 if(millisecondStr != "") 985 { 986 biTmp = BigInt(millisecondStr); 987 if(biTmp < 0 || biTmp > int.max) 988 error(tokenStart, "Datetime's millisecond is out of range."); 989 millisecond = biTmp.toInt(); 990 991 if(millisecondStr.length == 1) 992 millisecond *= 100; 993 else if(millisecondStr.length == 2) 994 millisecond *= 10; 995 } 996 997 Duration fracSecs = millisecond.msecs; 998 999 auto offset = hours(numHours) + minutes(numMinutes) + seconds(numSeconds); 1000 1001 if(isNegative) 1002 { 1003 offset = -offset; 1004 fracSecs = -fracSecs; 1005 } 1006 1007 return DateTimeFrac(DateTime(date) + offset, fracSecs); 1008 } 1009 1010 private Duration makeDuration( 1011 bool isNegative, string dayStr, 1012 string hourStr, string minuteStr, string secondStr, 1013 string millisecondStr 1014 ) 1015 { 1016 BigInt biTmp; 1017 1018 long day = 0; 1019 if(dayStr != "") 1020 { 1021 biTmp = BigInt(dayStr); 1022 if(biTmp < long.min || biTmp > long.max) 1023 error(tokenStart, "Time span's day is out of range."); 1024 day = biTmp.toLong(); 1025 } 1026 1027 biTmp = BigInt(hourStr); 1028 if(biTmp < long.min || biTmp > long.max) 1029 error(tokenStart, "Time span's hour is out of range."); 1030 auto hour = biTmp.toLong(); 1031 1032 biTmp = BigInt(minuteStr); 1033 if(biTmp < long.min || biTmp > long.max) 1034 error(tokenStart, "Time span's minute is out of range."); 1035 auto minute = biTmp.toLong(); 1036 1037 biTmp = BigInt(secondStr); 1038 if(biTmp < long.min || biTmp > long.max) 1039 error(tokenStart, "Time span's second is out of range."); 1040 auto second = biTmp.toLong(); 1041 1042 long millisecond = 0; 1043 if(millisecondStr != "") 1044 { 1045 biTmp = BigInt(millisecondStr); 1046 if(biTmp < long.min || biTmp > long.max) 1047 error(tokenStart, "Time span's millisecond is out of range."); 1048 millisecond = biTmp.toLong(); 1049 1050 if(millisecondStr.length == 1) 1051 millisecond *= 100; 1052 else if(millisecondStr.length == 2) 1053 millisecond *= 10; 1054 } 1055 1056 auto duration = 1057 dur!"days" (day) + 1058 dur!"hours" (hour) + 1059 dur!"minutes"(minute) + 1060 dur!"seconds"(second) + 1061 dur!"msecs" (millisecond); 1062 1063 if(isNegative) 1064 duration = -duration; 1065 1066 return duration; 1067 } 1068 1069 // This has to reproduce some weird corner case behaviors from the 1070 // original Java version of SDL. So some of this may seem weird. 1071 private Nullable!Duration getTimeZoneOffset(string str) 1072 { 1073 if(str.length < 2) 1074 return Nullable!Duration(); // Unknown timezone 1075 1076 if(str[0] != '+' && str[0] != '-') 1077 return Nullable!Duration(); // Unknown timezone 1078 1079 auto isNegative = str[0] == '-'; 1080 1081 string numHoursStr; 1082 string numMinutesStr; 1083 if(str[1] == ':') 1084 { 1085 numMinutesStr = str[1..$]; 1086 numHoursStr = ""; 1087 } 1088 else 1089 { 1090 numMinutesStr = str.find(':'); 1091 numHoursStr = str[1 .. $-numMinutesStr.length]; 1092 } 1093 1094 long numHours = 0; 1095 long numMinutes = 0; 1096 bool isUnknown = false; 1097 try 1098 { 1099 switch(numHoursStr.length) 1100 { 1101 case 0: 1102 if(numMinutesStr.length == 3) 1103 { 1104 numHours = 0; 1105 numMinutes = to!long(numMinutesStr[1..$]); 1106 } 1107 else 1108 isUnknown = true; 1109 break; 1110 1111 case 1: 1112 case 2: 1113 if(numMinutesStr.length == 0) 1114 { 1115 numHours = to!long(numHoursStr); 1116 numMinutes = 0; 1117 } 1118 else if(numMinutesStr.length == 3) 1119 { 1120 numHours = to!long(numHoursStr); 1121 numMinutes = to!long(numMinutesStr[1..$]); 1122 } 1123 else 1124 isUnknown = true; 1125 break; 1126 1127 default: 1128 if(numMinutesStr.length == 0) 1129 { 1130 // Yes, this is correct 1131 numHours = 0; 1132 numMinutes = to!long(numHoursStr[1..$]); 1133 } 1134 else 1135 isUnknown = true; 1136 break; 1137 } 1138 } 1139 catch(ConvException e) 1140 isUnknown = true; 1141 1142 if(isUnknown) 1143 return Nullable!Duration(); // Unknown timezone 1144 1145 auto timeZoneOffset = hours(numHours) + minutes(numMinutes); 1146 if(isNegative) 1147 timeZoneOffset = -timeZoneOffset; 1148 1149 // Timezone valid 1150 return Nullable!Duration(timeZoneOffset); 1151 } 1152 1153 /// Lex date or datetime (after the initial numeric fragment was lexed) 1154 private void lexDate(bool isDateNegative, string yearStr) 1155 { 1156 assert(ch == '/'); 1157 1158 // Lex months 1159 advanceChar(ErrorOnEOF.Yes); // Skip '/' 1160 auto monthStr = lexNumericFragment(); 1161 1162 // Lex days 1163 if(ch != '/') 1164 error("Invalid date format: Missing days."); 1165 advanceChar(ErrorOnEOF.Yes); // Skip '/' 1166 auto dayStr = lexNumericFragment(); 1167 1168 auto date = makeDate(isDateNegative, yearStr, monthStr, dayStr); 1169 1170 if(!isEndOfNumber() && ch != '/') 1171 error("Dates cannot have suffixes."); 1172 1173 // Date? 1174 if(isEOF) 1175 mixin(accept!("Value", "date")); 1176 1177 auto endOfDate = location; 1178 1179 while( 1180 !isEOF && 1181 ( ch == '\\' || ch == '/' || (isWhite(ch) && !isNewline(ch)) ) 1182 ) 1183 { 1184 if(ch == '\\' && hasNextCh && isNewline(nextCh)) 1185 { 1186 advanceChar(ErrorOnEOF.Yes); 1187 if(isAtNewline()) 1188 advanceChar(ErrorOnEOF.Yes); 1189 advanceChar(ErrorOnEOF.No); 1190 } 1191 1192 eatWhite(); 1193 } 1194 1195 // Date? 1196 if(isEOF || (!isDigit(ch) && ch != '-')) 1197 mixin(accept!("Value", "date", "", "endOfDate.index")); 1198 1199 auto startOfTime = location; 1200 1201 // Is time negative? 1202 bool isTimeNegative = ch == '-'; 1203 if(isTimeNegative) 1204 advanceChar(ErrorOnEOF.Yes); 1205 1206 // Lex hours 1207 auto hourStr = ch == '.'? "" : lexNumericFragment(); 1208 1209 // Lex minutes 1210 if(ch != ':') 1211 { 1212 // No minutes found. Therefore we had a plain Date followed 1213 // by a numeric literal, not a DateTime. 1214 lookaheadTokenInfo.exists = true; 1215 lookaheadTokenInfo.numericFragment = hourStr; 1216 lookaheadTokenInfo.isNegative = isTimeNegative; 1217 lookaheadTokenInfo.tokenStart = startOfTime; 1218 mixin(accept!("Value", "date", "", "endOfDate.index")); 1219 } 1220 advanceChar(ErrorOnEOF.Yes); // Skip ':' 1221 auto minuteStr = lexNumericFragment(); 1222 1223 // Lex seconds, if exists 1224 string secondStr; 1225 if(ch == ':') 1226 { 1227 advanceChar(ErrorOnEOF.Yes); // Skip ':' 1228 secondStr = lexNumericFragment(); 1229 } 1230 1231 // Lex milliseconds, if exists 1232 string millisecondStr; 1233 if(ch == '.') 1234 { 1235 advanceChar(ErrorOnEOF.Yes); // Skip '.' 1236 millisecondStr = lexNumericFragment(); 1237 } 1238 1239 auto dateTimeFrac = makeDateTimeFrac(isTimeNegative, date, hourStr, minuteStr, secondStr, millisecondStr); 1240 1241 // Lex zone, if exists 1242 if(ch == '-') 1243 { 1244 advanceChar(ErrorOnEOF.Yes); // Skip '-' 1245 auto timezoneStart = location; 1246 1247 if(!isAlpha(ch)) 1248 error("Invalid timezone format."); 1249 1250 while(!isEOF && !isWhite(ch)) 1251 advanceChar(ErrorOnEOF.No); 1252 1253 auto timezoneStr = source[timezoneStart.index..location.index]; 1254 if(timezoneStr.startsWith("GMT")) 1255 { 1256 auto isoPart = timezoneStr["GMT".length..$]; 1257 auto offset = getTimeZoneOffset(isoPart); 1258 1259 if(offset.isNull()) 1260 { 1261 // Unknown time zone 1262 mixin(accept!("Value", "DateTimeFracUnknownZone(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezoneStr)")); 1263 } 1264 else 1265 { 1266 auto timezone = new immutable SimpleTimeZone(offset.get()); 1267 mixin(accept!("Value", "SysTime(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezone)")); 1268 } 1269 } 1270 1271 try 1272 { 1273 auto timezone = TimeZone.getTimeZone(timezoneStr); 1274 if(timezone) 1275 mixin(accept!("Value", "SysTime(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezone)")); 1276 } 1277 catch(TimeException e) 1278 { 1279 // Time zone not found. So just move along to "Unknown time zone" below. 1280 } 1281 1282 // Unknown time zone 1283 mixin(accept!("Value", "DateTimeFracUnknownZone(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezoneStr)")); 1284 } 1285 1286 if(!isEndOfNumber()) 1287 error("Date-Times cannot have suffixes."); 1288 1289 mixin(accept!("Value", "dateTimeFrac")); 1290 } 1291 1292 /// Lex time span (after the initial numeric fragment was lexed) 1293 private void lexTimeSpan(bool isNegative, string firstPart) 1294 { 1295 assert(ch == ':' || ch == 'd'); 1296 1297 string dayStr = ""; 1298 string hourStr; 1299 1300 // Lexed days? 1301 bool hasDays = ch == 'd'; 1302 if(hasDays) 1303 { 1304 dayStr = firstPart; 1305 advanceChar(ErrorOnEOF.Yes); // Skip 'd' 1306 1307 // Lex hours 1308 if(ch != ':') 1309 error("Invalid time span format: Missing hours."); 1310 advanceChar(ErrorOnEOF.Yes); // Skip ':' 1311 hourStr = lexNumericFragment(); 1312 } 1313 else 1314 hourStr = firstPart; 1315 1316 // Lex minutes 1317 if(ch != ':') 1318 error("Invalid time span format: Missing minutes."); 1319 advanceChar(ErrorOnEOF.Yes); // Skip ':' 1320 auto minuteStr = lexNumericFragment(); 1321 1322 // Lex seconds 1323 if(ch != ':') 1324 error("Invalid time span format: Missing seconds."); 1325 advanceChar(ErrorOnEOF.Yes); // Skip ':' 1326 auto secondStr = lexNumericFragment(); 1327 1328 // Lex milliseconds, if exists 1329 string millisecondStr = ""; 1330 if(ch == '.') 1331 { 1332 advanceChar(ErrorOnEOF.Yes); // Skip '.' 1333 millisecondStr = lexNumericFragment(); 1334 } 1335 1336 if(!isEndOfNumber()) 1337 error("Time spans cannot have suffixes."); 1338 1339 auto duration = makeDuration(isNegative, dayStr, hourStr, minuteStr, secondStr, millisecondStr); 1340 mixin(accept!("Value", "duration")); 1341 } 1342 1343 /// Advances past whitespace and comments 1344 private void eatWhite(bool allowComments=true) 1345 { 1346 // -- Comment/Whitepace Lexer ------------- 1347 1348 enum State 1349 { 1350 normal, 1351 lineComment, // Got "#" or "//" or "--", Eating everything until newline 1352 blockComment, // Got "/*", Eating everything until "*/" 1353 } 1354 1355 if(isEOF) 1356 return; 1357 1358 Location commentStart; 1359 State state = State.normal; 1360 bool consumeNewlines = false; 1361 bool hasConsumedNewline = false; 1362 while(true) 1363 { 1364 final switch(state) 1365 { 1366 case State.normal: 1367 1368 if(ch == '\\') 1369 { 1370 commentStart = location; 1371 consumeNewlines = true; 1372 hasConsumedNewline = false; 1373 } 1374 1375 else if(ch == '#') 1376 { 1377 if(!allowComments) 1378 return; 1379 1380 commentStart = location; 1381 state = State.lineComment; 1382 continue; 1383 } 1384 1385 else if(ch == '/' || ch == '-') 1386 { 1387 commentStart = location; 1388 if(lookahead(ch)) 1389 { 1390 if(!allowComments) 1391 return; 1392 1393 advanceChar(ErrorOnEOF.No); 1394 state = State.lineComment; 1395 continue; 1396 } 1397 else if(ch == '/' && lookahead('*')) 1398 { 1399 if(!allowComments) 1400 return; 1401 1402 advanceChar(ErrorOnEOF.No); 1403 state = State.blockComment; 1404 continue; 1405 } 1406 else 1407 return; // Done 1408 } 1409 else if(isAtNewline()) 1410 { 1411 if(consumeNewlines) 1412 hasConsumedNewline = true; 1413 else 1414 return; // Done 1415 } 1416 else if(!isWhite(ch)) 1417 { 1418 if(consumeNewlines) 1419 { 1420 if(hasConsumedNewline) 1421 return; // Done 1422 else 1423 error("Only whitespace can come between a line-continuation backslash and the following newline."); 1424 } 1425 else 1426 return; // Done 1427 } 1428 1429 break; 1430 1431 case State.lineComment: 1432 if(lookahead(&isNewline)) 1433 state = State.normal; 1434 break; 1435 1436 case State.blockComment: 1437 if(ch == '*' && lookahead('/')) 1438 { 1439 advanceChar(ErrorOnEOF.No); 1440 state = State.normal; 1441 } 1442 break; 1443 } 1444 1445 advanceChar(ErrorOnEOF.No); 1446 if(isEOF) 1447 { 1448 // Reached EOF 1449 1450 if(consumeNewlines && !hasConsumedNewline) 1451 error("Missing newline after line-continuation backslash."); 1452 1453 else if(state == State.blockComment) 1454 error(commentStart, "Unterminated block comment."); 1455 1456 else 1457 return; // Done, reached EOF 1458 } 1459 } 1460 } 1461 } 1462 1463 version(unittest) 1464 { 1465 import std.stdio; 1466 1467 version(Have_unit_threaded) import unit_threaded; 1468 else { enum DontTest; } 1469 1470 private auto loc = Location("filename", 0, 0, 0); 1471 private auto loc2 = Location("a", 1, 1, 1); 1472 1473 @("lexer: EOL") 1474 unittest 1475 { 1476 assert([Token(symbol!"EOL",loc) ] == [Token(symbol!"EOL",loc) ] ); 1477 assert([Token(symbol!"EOL",loc,Value(7),"A")] == [Token(symbol!"EOL",loc2,Value(7),"B")] ); 1478 } 1479 1480 private int numErrors = 0; 1481 @DontTest 1482 private void testLex(string source, Token[] expected, bool test_locations = false, string file=__FILE__, size_t line=__LINE__) 1483 { 1484 Token[] actual; 1485 try 1486 actual = lexSource(source, "filename"); 1487 catch(ParseException e) 1488 { 1489 numErrors++; 1490 stderr.writeln(file, "(", line, "): testLex failed on: ", source); 1491 stderr.writeln(" Expected:"); 1492 stderr.writeln(" ", expected); 1493 stderr.writeln(" Actual: ParseException thrown:"); 1494 stderr.writeln(" ", e.msg); 1495 return; 1496 } 1497 1498 bool is_same = actual == expected; 1499 if (is_same && test_locations) { 1500 is_same = actual.map!(t => t.location).equal(expected.map!(t => t.location)); 1501 } 1502 1503 if(!is_same) 1504 { 1505 numErrors++; 1506 stderr.writeln(file, "(", line, "): testLex failed on: ", source); 1507 stderr.writeln(" Expected:"); 1508 stderr.writeln(" ", expected); 1509 stderr.writeln(" Actual:"); 1510 stderr.writeln(" ", actual); 1511 1512 if(expected.length > 1 || actual.length > 1) 1513 { 1514 stderr.writeln(" expected.length: ", expected.length); 1515 stderr.writeln(" actual.length: ", actual.length); 1516 1517 if(actual.length == expected.length) 1518 foreach(i; 0..actual.length) 1519 if(actual[i] != expected[i]) 1520 { 1521 stderr.writeln(" Unequal at index #", i, ":"); 1522 stderr.writeln(" Expected:"); 1523 stderr.writeln(" ", expected[i]); 1524 stderr.writeln(" Actual:"); 1525 stderr.writeln(" ", actual[i]); 1526 } 1527 } 1528 } 1529 } 1530 1531 private void testLexThrows(string file=__FILE__, size_t line=__LINE__)(string source) 1532 { 1533 bool hadException = false; 1534 Token[] actual; 1535 try 1536 actual = lexSource(source, "filename"); 1537 catch(ParseException e) 1538 hadException = true; 1539 1540 if(!hadException) 1541 { 1542 numErrors++; 1543 stderr.writeln(file, "(", line, "): testLex failed on: ", source); 1544 stderr.writeln(" Expected ParseException"); 1545 stderr.writeln(" Actual:"); 1546 stderr.writeln(" ", actual); 1547 } 1548 } 1549 } 1550 1551 @("sdlang lexer") 1552 unittest 1553 { 1554 testLex("", []); 1555 testLex(" ", []); 1556 testLex("\\\n", []); 1557 testLex("/*foo*/", []); 1558 testLex("/* multiline \n comment */", []); 1559 testLex("/* * */", []); 1560 testLexThrows("/* "); 1561 1562 testLex(":", [ Token(symbol!":", loc) ]); 1563 testLex("=", [ Token(symbol!"=", loc) ]); 1564 testLex("{", [ Token(symbol!"{", loc) ]); 1565 testLex("}", [ Token(symbol!"}", loc) ]); 1566 testLex(";", [ Token(symbol!"EOL",loc) ]); 1567 testLex("\n", [ Token(symbol!"EOL",loc) ]); 1568 1569 testLex("foo", [ Token(symbol!"Ident",loc,Value(null),"foo") ]); 1570 testLex("_foo", [ Token(symbol!"Ident",loc,Value(null),"_foo") ]); 1571 testLex("foo.bar", [ Token(symbol!"Ident",loc,Value(null),"foo.bar") ]); 1572 testLex("foo-bar", [ Token(symbol!"Ident",loc,Value(null),"foo-bar") ]); 1573 testLex("foo.", [ Token(symbol!"Ident",loc,Value(null),"foo.") ]); 1574 testLex("foo-", [ Token(symbol!"Ident",loc,Value(null),"foo-") ]); 1575 testLexThrows(".foo"); 1576 1577 testLex("foo bar", [ 1578 Token(symbol!"Ident",loc,Value(null),"foo"), 1579 Token(symbol!"Ident",loc,Value(null),"bar"), 1580 ]); 1581 testLex("foo \\ \n \n bar", [ 1582 Token(symbol!"Ident",loc,Value(null),"foo"), 1583 Token(symbol!"Ident",loc,Value(null),"bar"), 1584 ]); 1585 testLex("foo \\ \n \\ \n bar", [ 1586 Token(symbol!"Ident",loc,Value(null),"foo"), 1587 Token(symbol!"Ident",loc,Value(null),"bar"), 1588 ]); 1589 testLexThrows("foo \\ "); 1590 testLexThrows("foo \\ bar"); 1591 testLexThrows("foo \\ \n \\ "); 1592 testLexThrows("foo \\ \n \\ bar"); 1593 1594 testLex("foo : = { } ; \n bar \n", [ 1595 Token(symbol!"Ident",loc,Value(null),"foo"), 1596 Token(symbol!":",loc), 1597 Token(symbol!"=",loc), 1598 Token(symbol!"{",loc), 1599 Token(symbol!"}",loc), 1600 Token(symbol!"EOL",loc), 1601 Token(symbol!"EOL",loc), 1602 Token(symbol!"Ident",loc,Value(null),"bar"), 1603 Token(symbol!"EOL",loc), 1604 ]); 1605 1606 testLexThrows("<"); 1607 testLexThrows("*"); 1608 testLexThrows(`\`); 1609 1610 // Integers 1611 testLex( "7", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]); 1612 testLex( "-7", [ Token(symbol!"Value",loc,Value(cast( int)-7)) ]); 1613 testLex( "7L", [ Token(symbol!"Value",loc,Value(cast(long) 7)) ]); 1614 testLex( "7l", [ Token(symbol!"Value",loc,Value(cast(long) 7)) ]); 1615 testLex("-7L", [ Token(symbol!"Value",loc,Value(cast(long)-7)) ]); 1616 testLex( "0", [ Token(symbol!"Value",loc,Value(cast( int) 0)) ]); 1617 testLex( "-0", [ Token(symbol!"Value",loc,Value(cast( int) 0)) ]); 1618 1619 testLex("7/**/", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]); 1620 testLex("7#", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]); 1621 1622 testLex("7 A", [ 1623 Token(symbol!"Value",loc,Value(cast(int)7)), 1624 Token(symbol!"Ident",loc,Value( null),"A"), 1625 ]); 1626 testLexThrows("7A"); 1627 testLexThrows("-A"); 1628 testLexThrows(`-""`); 1629 1630 testLex("7;", [ 1631 Token(symbol!"Value",loc,Value(cast(int)7)), 1632 Token(symbol!"EOL",loc), 1633 ]); 1634 1635 // Floats 1636 testLex("1.2F" , [ Token(symbol!"Value",loc,Value(cast( float)1.2)) ]); 1637 testLex("1.2f" , [ Token(symbol!"Value",loc,Value(cast( float)1.2)) ]); 1638 testLex("1.2" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]); 1639 testLex("1.2D" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]); 1640 testLex("1.2d" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]); 1641 testLex("1.2BD", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]); 1642 testLex("1.2bd", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]); 1643 testLex("1.2Bd", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]); 1644 testLex("1.2bD", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]); 1645 1646 testLex(".2F" , [ Token(symbol!"Value",loc,Value(cast( float)0.2)) ]); 1647 testLex(".2" , [ Token(symbol!"Value",loc,Value(cast(double)0.2)) ]); 1648 testLex(".2D" , [ Token(symbol!"Value",loc,Value(cast(double)0.2)) ]); 1649 testLex(".2BD", [ Token(symbol!"Value",loc,Value(cast( real)0.2)) ]); 1650 1651 testLex("-1.2F" , [ Token(symbol!"Value",loc,Value(cast( float)-1.2)) ]); 1652 testLex("-1.2" , [ Token(symbol!"Value",loc,Value(cast(double)-1.2)) ]); 1653 testLex("-1.2D" , [ Token(symbol!"Value",loc,Value(cast(double)-1.2)) ]); 1654 testLex("-1.2BD", [ Token(symbol!"Value",loc,Value(cast( real)-1.2)) ]); 1655 1656 testLex("-.2F" , [ Token(symbol!"Value",loc,Value(cast( float)-0.2)) ]); 1657 testLex("-.2" , [ Token(symbol!"Value",loc,Value(cast(double)-0.2)) ]); 1658 testLex("-.2D" , [ Token(symbol!"Value",loc,Value(cast(double)-0.2)) ]); 1659 testLex("-.2BD", [ Token(symbol!"Value",loc,Value(cast( real)-0.2)) ]); 1660 1661 testLex( "0.0" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]); 1662 testLex( "0.0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]); 1663 testLex( "0.0BD", [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]); 1664 testLex("-0.0" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]); 1665 testLex("-0.0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]); 1666 testLex("-0.0BD", [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]); 1667 testLex( "7F" , [ Token(symbol!"Value",loc,Value(cast( float)7.0)) ]); 1668 testLex( "7D" , [ Token(symbol!"Value",loc,Value(cast(double)7.0)) ]); 1669 testLex( "7BD" , [ Token(symbol!"Value",loc,Value(cast( real)7.0)) ]); 1670 testLex( "0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]); 1671 testLex( "0D" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]); 1672 testLex( "0BD" , [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]); 1673 testLex("-0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]); 1674 testLex("-0D" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]); 1675 testLex("-0BD" , [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]); 1676 1677 testLex("1.2 F", [ 1678 Token(symbol!"Value",loc,Value(cast(double)1.2)), 1679 Token(symbol!"Ident",loc,Value( null),"F"), 1680 ]); 1681 testLexThrows("1.2A"); 1682 testLexThrows("1.2B"); 1683 testLexThrows("1.2BDF"); 1684 1685 testLex("1.2;", [ 1686 Token(symbol!"Value",loc,Value(cast(double)1.2)), 1687 Token(symbol!"EOL",loc), 1688 ]); 1689 1690 testLex("1.2F;", [ 1691 Token(symbol!"Value",loc,Value(cast(float)1.2)), 1692 Token(symbol!"EOL",loc), 1693 ]); 1694 1695 testLex("1.2BD;", [ 1696 Token(symbol!"Value",loc,Value(cast(real)1.2)), 1697 Token(symbol!"EOL",loc), 1698 ]); 1699 1700 // Booleans and null 1701 testLex("true", [ Token(symbol!"Value",loc,Value( true)) ]); 1702 testLex("false", [ Token(symbol!"Value",loc,Value(false)) ]); 1703 testLex("on", [ Token(symbol!"Value",loc,Value( true)) ]); 1704 testLex("off", [ Token(symbol!"Value",loc,Value(false)) ]); 1705 testLex("null", [ Token(symbol!"Value",loc,Value( null)) ]); 1706 1707 testLex("TRUE", [ Token(symbol!"Ident",loc,Value(null),"TRUE") ]); 1708 testLex("true ", [ Token(symbol!"Value",loc,Value(true)) ]); 1709 testLex("true ", [ Token(symbol!"Value",loc,Value(true)) ]); 1710 testLex("tru", [ Token(symbol!"Ident",loc,Value(null),"tru") ]); 1711 testLex("truX", [ Token(symbol!"Ident",loc,Value(null),"truX") ]); 1712 testLex("trueX", [ Token(symbol!"Ident",loc,Value(null),"trueX") ]); 1713 1714 // Raw Backtick Strings 1715 testLex("`hello world`", [ Token(symbol!"Value",loc,Value(`hello world` )) ]); 1716 testLex("` hello world `", [ Token(symbol!"Value",loc,Value(` hello world ` )) ]); 1717 testLex("`hello \\t world`", [ Token(symbol!"Value",loc,Value(`hello \t world`)) ]); 1718 testLex("`hello \\n world`", [ Token(symbol!"Value",loc,Value(`hello \n world`)) ]); 1719 testLex("`hello \n world`", [ Token(symbol!"Value",loc,Value("hello \n world")) ]); 1720 testLex("`hello \r\n world`", [ Token(symbol!"Value",loc,Value("hello \r\n world")) ]); 1721 testLex("`hello \"world\"`", [ Token(symbol!"Value",loc,Value(`hello "world"` )) ]); 1722 1723 testLexThrows("`foo"); 1724 testLexThrows("`"); 1725 1726 // Double-Quote Strings 1727 testLex(`"hello world"`, [ Token(symbol!"Value",loc,Value("hello world" )) ]); 1728 testLex(`" hello world "`, [ Token(symbol!"Value",loc,Value(" hello world " )) ]); 1729 testLex(`"hello \t world"`, [ Token(symbol!"Value",loc,Value("hello \t world")) ]); 1730 testLex(`"hello \n world"`, [ Token(symbol!"Value",loc,Value("hello \n world")) ]); 1731 testLex("\"hello \\\n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]); 1732 testLex("\"hello \\ \n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]); 1733 testLex("\"hello \\ \n\n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]); 1734 testLex(`"\"hello world\""`, [ Token(symbol!"Value",loc,Value(`"hello world"` )) ]); 1735 testLex(`""`, [ Token(symbol!"Value",loc,Value("" )) ]); // issue #34 1736 1737 testLexThrows("\"hello \n world\""); 1738 testLexThrows(`"foo`); 1739 testLexThrows(`"`); 1740 1741 // Characters 1742 testLex("'a'", [ Token(symbol!"Value",loc,Value(cast(dchar) 'a')) ]); 1743 testLex("'\\n'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\n')) ]); 1744 testLex("'\\t'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\t')) ]); 1745 testLex("'\t'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\t')) ]); 1746 testLex("'\\''", [ Token(symbol!"Value",loc,Value(cast(dchar)'\'')) ]); 1747 testLex(`'\\'`, [ Token(symbol!"Value",loc,Value(cast(dchar)'\\')) ]); 1748 1749 testLexThrows("'a"); 1750 testLexThrows("'aa'"); 1751 testLexThrows("''"); 1752 testLexThrows("'\\\n'"); 1753 testLexThrows("'\n'"); 1754 testLexThrows(`'\`); 1755 testLexThrows(`'\'`); 1756 testLexThrows("'"); 1757 1758 // Unicode 1759 testLex("日本語", [ Token(symbol!"Ident",loc,Value(null), "日本語") ]); 1760 testLex("`おはよう、日本。`", [ Token(symbol!"Value",loc,Value(`おはよう、日本。`)) ]); 1761 testLex(`"おはよう、日本。"`, [ Token(symbol!"Value",loc,Value(`おはよう、日本。`)) ]); 1762 testLex("'月'", [ Token(symbol!"Value",loc,Value("月"d.dup[0])) ]); 1763 1764 // Base64 Binary 1765 testLex("[aGVsbG8gd29ybGQ=]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]); 1766 testLex("[ aGVsbG8gd29ybGQ= ]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]); 1767 testLex("[\n aGVsbG8g \n \n d29ybGQ= \n]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]); 1768 1769 testLexThrows("[aGVsbG8gd29ybGQ]"); // Ie: Not multiple of 4 1770 testLexThrows("[ aGVsbG8gd29ybGQ ]"); 1771 1772 // Date 1773 testLex( "1999/12/5", [ Token(symbol!"Value",loc,Value(Date( 1999, 12, 5))) ]); 1774 testLex( "2013/2/22", [ Token(symbol!"Value",loc,Value(Date( 2013, 2, 22))) ]); 1775 testLex("-2013/2/22", [ Token(symbol!"Value",loc,Value(Date(-2013, 2, 22))) ]); 1776 1777 testLexThrows("7/"); 1778 testLexThrows("2013/2/22a"); 1779 testLexThrows("2013/2/22f"); 1780 1781 testLex("1999/12/5\n", [ 1782 Token(symbol!"Value",loc,Value(Date(1999, 12, 5))), 1783 Token(symbol!"EOL",loc), 1784 ]); 1785 1786 // DateTime, no timezone 1787 testLex( "2013/2/22 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1788 testLex( "2013/2/22 \t 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1789 testLex( "2013/2/22/*foo*/07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1790 testLex( "2013/2/22 /*foo*/ \\\n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1791 testLex( "2013/2/22 /*foo*/ \\\n\n \n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1792 testLex( "2013/2/22 /*foo*/ \\\n\\\n \\\n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1793 testLex( "2013/2/22/*foo*/\\\n/*bar*/07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1794 testLex("-2013/2/22 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime(-2013, 2, 22, 7, 53, 0)))) ]); 1795 testLex( "2013/2/22 -07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53)))) ]); 1796 testLex("-2013/2/22 -07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53)))) ]); 1797 testLex( "2013/2/22 07:53:34", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34)))) ]); 1798 testLex( "2013/2/22 07:53:34.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs))) ]); 1799 testLex( "2013/2/22 07:53:34.12", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), 120.msecs))) ]); 1800 testLex( "2013/2/22 07:53:34.1", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), 100.msecs))) ]); 1801 testLex( "2013/2/22 07:53.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs))) ]); 1802 1803 testLex( "2013/2/22 34:65", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds( 0)))) ]); 1804 testLex( "2013/2/22 34:65:77.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds(77), 123.msecs))) ]); 1805 testLex( "2013/2/22 34:65.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds( 0), 123.msecs))) ]); 1806 1807 testLex( "2013/2/22 -34:65", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0)))) ]); 1808 testLex( "2013/2/22 -34:65:77.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds(77), -123.msecs))) ]); 1809 testLex( "2013/2/22 -34:65.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0), -123.msecs))) ]); 1810 1811 testLexThrows("2013/2/22 07:53a"); 1812 testLexThrows("2013/2/22 07:53f"); 1813 testLexThrows("2013/2/22 07:53:34.123a"); 1814 testLexThrows("2013/2/22 07:53:34.123f"); 1815 testLexThrows("2013/2/22a 07:53"); 1816 1817 testLex(`2013/2/22 "foo"`, [ 1818 Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), 1819 Token(symbol!"Value",loc,Value("foo")), 1820 ]); 1821 1822 testLex("2013/2/22 07", [ 1823 Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), 1824 Token(symbol!"Value",loc,Value(cast(int)7)), 1825 ]); 1826 1827 testLex("2013/2/22 1.2F", [ 1828 Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), 1829 Token(symbol!"Value",loc,Value(cast(float)1.2)), 1830 ]); 1831 1832 testLex("2013/2/22 .2F", [ 1833 Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), 1834 Token(symbol!"Value",loc,Value(cast(float)0.2)), 1835 ]); 1836 1837 testLex("2013/2/22 -1.2F", [ 1838 Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), 1839 Token(symbol!"Value",loc,Value(cast(float)-1.2)), 1840 ]); 1841 1842 testLex("2013/2/22 -.2F", [ 1843 Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), 1844 Token(symbol!"Value",loc,Value(cast(float)-0.2)), 1845 ]); 1846 1847 // DateTime, with known timezone 1848 testLex( "2013/2/22 07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(0) )))) ]); 1849 testLex("-2013/2/22 07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime(-2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(0) )))) ]); 1850 testLex( "2013/2/22 -07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), new immutable SimpleTimeZone( hours(0) )))) ]); 1851 testLex("-2013/2/22 -07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), new immutable SimpleTimeZone( hours(0) )))) ]); 1852 testLex( "2013/2/22 07:53-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]); 1853 testLex( "2013/2/22 07:53-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); 1854 testLex( "2013/2/22 07:53:34-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone( hours(0) )))) ]); 1855 testLex( "2013/2/22 07:53:34-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]); 1856 testLex( "2013/2/22 07:53:34-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); 1857 testLex( "2013/2/22 07:53:34.123-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, new immutable SimpleTimeZone( hours(0) )))) ]); 1858 testLex( "2013/2/22 07:53:34.123-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]); 1859 testLex( "2013/2/22 07:53:34.123-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); 1860 testLex( "2013/2/22 07:53.123-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, new immutable SimpleTimeZone( hours(0) )))) ]); 1861 testLex( "2013/2/22 07:53.123-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]); 1862 testLex( "2013/2/22 07:53.123-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); 1863 1864 testLex( "2013/2/22 -34:65-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); 1865 1866 // DateTime, with Java SDLang's occasionally weird interpretation of some 1867 // "not quite ISO" variations of the "GMT with offset" timezone strings. 1868 Token testTokenSimpleTimeZone(Duration d) 1869 { 1870 auto dateTime = DateTime(2013, 2, 22, 7, 53, 0); 1871 auto tz = new immutable SimpleTimeZone(d); 1872 return Token( symbol!"Value", loc, Value(SysTime(dateTime,tz)) ); 1873 } 1874 Token testTokenUnknownTimeZone(string tzName) 1875 { 1876 auto dateTime = DateTime(2013, 2, 22, 7, 53, 0); 1877 auto frac = 0.msecs; 1878 return Token( symbol!"Value", loc, Value(DateTimeFracUnknownZone(dateTime,frac,tzName)) ); 1879 } 1880 testLex("2013/2/22 07:53-GMT+", [ testTokenUnknownTimeZone("GMT+") ]); 1881 testLex("2013/2/22 07:53-GMT+:", [ testTokenUnknownTimeZone("GMT+:") ]); 1882 testLex("2013/2/22 07:53-GMT+:3", [ testTokenUnknownTimeZone("GMT+:3") ]); 1883 testLex("2013/2/22 07:53-GMT+:03", [ testTokenSimpleTimeZone(minutes(3)) ]); 1884 testLex("2013/2/22 07:53-GMT+:003", [ testTokenUnknownTimeZone("GMT+:003") ]); 1885 1886 testLex("2013/2/22 07:53-GMT+4", [ testTokenSimpleTimeZone(hours(4)) ]); 1887 testLex("2013/2/22 07:53-GMT+4:", [ testTokenUnknownTimeZone("GMT+4:") ]); 1888 testLex("2013/2/22 07:53-GMT+4:3", [ testTokenUnknownTimeZone("GMT+4:3") ]); 1889 testLex("2013/2/22 07:53-GMT+4:03", [ testTokenSimpleTimeZone(hours(4)+minutes(3)) ]); 1890 testLex("2013/2/22 07:53-GMT+4:003", [ testTokenUnknownTimeZone("GMT+4:003") ]); 1891 1892 testLex("2013/2/22 07:53-GMT+04", [ testTokenSimpleTimeZone(hours(4)) ]); 1893 testLex("2013/2/22 07:53-GMT+04:", [ testTokenUnknownTimeZone("GMT+04:") ]); 1894 testLex("2013/2/22 07:53-GMT+04:3", [ testTokenUnknownTimeZone("GMT+04:3") ]); 1895 testLex("2013/2/22 07:53-GMT+04:03", [ testTokenSimpleTimeZone(hours(4)+minutes(3)) ]); 1896 testLex("2013/2/22 07:53-GMT+04:03abc", [ testTokenUnknownTimeZone("GMT+04:03abc") ]); 1897 testLex("2013/2/22 07:53-GMT+04:003", [ testTokenUnknownTimeZone("GMT+04:003") ]); 1898 1899 testLex("2013/2/22 07:53-GMT+004", [ testTokenSimpleTimeZone(minutes(4)) ]); 1900 testLex("2013/2/22 07:53-GMT+004:", [ testTokenUnknownTimeZone("GMT+004:") ]); 1901 testLex("2013/2/22 07:53-GMT+004:3", [ testTokenUnknownTimeZone("GMT+004:3") ]); 1902 testLex("2013/2/22 07:53-GMT+004:03", [ testTokenUnknownTimeZone("GMT+004:03") ]); 1903 testLex("2013/2/22 07:53-GMT+004:003", [ testTokenUnknownTimeZone("GMT+004:003") ]); 1904 1905 testLex("2013/2/22 07:53-GMT+0004", [ testTokenSimpleTimeZone(minutes(4)) ]); 1906 testLex("2013/2/22 07:53-GMT+0004:", [ testTokenUnknownTimeZone("GMT+0004:") ]); 1907 testLex("2013/2/22 07:53-GMT+0004:3", [ testTokenUnknownTimeZone("GMT+0004:3") ]); 1908 testLex("2013/2/22 07:53-GMT+0004:03", [ testTokenUnknownTimeZone("GMT+0004:03") ]); 1909 testLex("2013/2/22 07:53-GMT+0004:003", [ testTokenUnknownTimeZone("GMT+0004:003") ]); 1910 1911 testLex("2013/2/22 07:53-GMT+00004", [ testTokenSimpleTimeZone(minutes(4)) ]); 1912 testLex("2013/2/22 07:53-GMT+00004:", [ testTokenUnknownTimeZone("GMT+00004:") ]); 1913 testLex("2013/2/22 07:53-GMT+00004:3", [ testTokenUnknownTimeZone("GMT+00004:3") ]); 1914 testLex("2013/2/22 07:53-GMT+00004:03", [ testTokenUnknownTimeZone("GMT+00004:03") ]); 1915 testLex("2013/2/22 07:53-GMT+00004:003", [ testTokenUnknownTimeZone("GMT+00004:003") ]); 1916 1917 // DateTime, with unknown timezone 1918 testLex( "2013/2/22 07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 0), 0.msecs, "Bogus/Foo")), "2013/2/22 07:53-Bogus/Foo") ]); 1919 testLex("-2013/2/22 07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime(-2013, 2, 22, 7, 53, 0), 0.msecs, "Bogus/Foo"))) ]); 1920 testLex( "2013/2/22 -07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), 0.msecs, "Bogus/Foo"))) ]); 1921 testLex("-2013/2/22 -07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), 0.msecs, "Bogus/Foo"))) ]); 1922 testLex( "2013/2/22 07:53:34-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 34), 0.msecs, "Bogus/Foo"))) ]); 1923 testLex( "2013/2/22 07:53:34.123-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, "Bogus/Foo"))) ]); 1924 testLex( "2013/2/22 07:53.123-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, "Bogus/Foo"))) ]); 1925 1926 // Time Span 1927 testLex( "12:14:42", [ Token(symbol!"Value",loc,Value( days( 0)+hours(12)+minutes(14)+seconds(42)+msecs( 0))) ]); 1928 testLex("-12:14:42", [ Token(symbol!"Value",loc,Value(-days( 0)-hours(12)-minutes(14)-seconds(42)-msecs( 0))) ]); 1929 testLex( "00:09:12", [ Token(symbol!"Value",loc,Value( days( 0)+hours( 0)+minutes( 9)+seconds(12)+msecs( 0))) ]); 1930 testLex( "00:00:01.023", [ Token(symbol!"Value",loc,Value( days( 0)+hours( 0)+minutes( 0)+seconds( 1)+msecs( 23))) ]); 1931 testLex( "23d:05:21:23.532", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(532))) ]); 1932 testLex( "23d:05:21:23.53", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(530))) ]); 1933 testLex( "23d:05:21:23.5", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(500))) ]); 1934 testLex("-23d:05:21:23.532", [ Token(symbol!"Value",loc,Value(-days(23)-hours( 5)-minutes(21)-seconds(23)-msecs(532))) ]); 1935 testLex("-23d:05:21:23.5", [ Token(symbol!"Value",loc,Value(-days(23)-hours( 5)-minutes(21)-seconds(23)-msecs(500))) ]); 1936 testLex( "23d:05:21:23", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs( 0))) ]); 1937 1938 testLexThrows("12:14:42a"); 1939 testLexThrows("23d:05:21:23.532a"); 1940 testLexThrows("23d:05:21:23.532f"); 1941 1942 // Combination 1943 testLex("foo. 7", [ 1944 Token(symbol!"Ident",loc,Value( null),"foo."), 1945 Token(symbol!"Value",loc,Value(cast(int)7)) 1946 ]); 1947 1948 testLex(` 1949 namespace:person "foo" "bar" 1 23L name.first="ひとみ" name.last="Smith" { 1950 namespace:age 37; namespace:favorite_color "blue" // comment 1951 somedate 2013/2/22 07:53 -- comment 1952 1953 inventory /* comment */ { 1954 socks 1955 } 1956 } 1957 `, 1958 [ 1959 Token(symbol!"EOL",loc,Value(null),"\n"), 1960 1961 Token(symbol!"Ident", loc, Value( null ), "namespace"), 1962 Token(symbol!":", loc, Value( null ), ":"), 1963 Token(symbol!"Ident", loc, Value( null ), "person"), 1964 Token(symbol!"Value", loc, Value( "foo" ), `"foo"`), 1965 Token(symbol!"Value", loc, Value( "bar" ), `"bar"`), 1966 Token(symbol!"Value", loc, Value( cast( int) 1 ), "1"), 1967 Token(symbol!"Value", loc, Value( cast(long)23 ), "23L"), 1968 Token(symbol!"Ident", loc, Value( null ), "name.first"), 1969 Token(symbol!"=", loc, Value( null ), "="), 1970 Token(symbol!"Value", loc, Value( "ひとみ" ), `"ひとみ"`), 1971 Token(symbol!"Ident", loc, Value( null ), "name.last"), 1972 Token(symbol!"=", loc, Value( null ), "="), 1973 Token(symbol!"Value", loc, Value( "Smith" ), `"Smith"`), 1974 Token(symbol!"{", loc, Value( null ), "{"), 1975 Token(symbol!"EOL", loc, Value( null ), "\n"), 1976 1977 Token(symbol!"Ident", loc, Value( null ), "namespace"), 1978 Token(symbol!":", loc, Value( null ), ":"), 1979 Token(symbol!"Ident", loc, Value( null ), "age"), 1980 Token(symbol!"Value", loc, Value( cast(int)37 ), "37"), 1981 Token(symbol!"EOL", loc, Value( null ), ";"), 1982 Token(symbol!"Ident", loc, Value( null ), "namespace"), 1983 Token(symbol!":", loc, Value( null ), ":"), 1984 Token(symbol!"Ident", loc, Value( null ), "favorite_color"), 1985 Token(symbol!"Value", loc, Value( "blue" ), `"blue"`), 1986 Token(symbol!"EOL", loc, Value( null ), "\n"), 1987 1988 Token(symbol!"Ident", loc, Value( null ), "somedate"), 1989 Token(symbol!"Value", loc, Value( DateTimeFrac(DateTime(2013, 2, 22, 7, 53, 0)) ), "2013/2/22 07:53"), 1990 Token(symbol!"EOL", loc, Value( null ), "\n"), 1991 Token(symbol!"EOL", loc, Value( null ), "\n"), 1992 1993 Token(symbol!"Ident", loc, Value(null), "inventory"), 1994 Token(symbol!"{", loc, Value(null), "{"), 1995 Token(symbol!"EOL", loc, Value(null), "\n"), 1996 1997 Token(symbol!"Ident", loc, Value(null), "socks"), 1998 Token(symbol!"EOL", loc, Value(null), "\n"), 1999 2000 Token(symbol!"}", loc, Value(null), "}"), 2001 Token(symbol!"EOL", loc, Value(null), "\n"), 2002 2003 Token(symbol!"}", loc, Value(null), "}"), 2004 Token(symbol!"EOL", loc, Value(null), "\n"), 2005 ]); 2006 2007 if(numErrors > 0) 2008 stderr.writeln(numErrors, " failed test(s)"); 2009 } 2010 2011 @("lexer: Regression test issue #8") 2012 unittest 2013 { 2014 testLex(`"\n \n"`, [ Token(symbol!"Value",loc,Value("\n \n"),`"\n \n"`) ]); 2015 testLex(`"\t\t"`, [ Token(symbol!"Value",loc,Value("\t\t"),`"\t\t"`) ]); 2016 testLex(`"\n\n"`, [ Token(symbol!"Value",loc,Value("\n\n"),`"\n\n"`) ]); 2017 } 2018 2019 @("lexer: Regression test issue #11") 2020 unittest 2021 { 2022 void test(string input) 2023 { 2024 testLex( 2025 input, 2026 [ 2027 Token(symbol!"EOL", loc, Value(null), "\n"), 2028 Token(symbol!"Ident",loc,Value(null), "a") 2029 ] 2030 ); 2031 } 2032 2033 test("//X\na"); 2034 test("//\na"); 2035 test("--\na"); 2036 test("#\na"); 2037 } 2038 2039 @("ast: Regression test issue #28") 2040 unittest 2041 { 2042 enum offset = 1; // workaround for an of-by-one error for line numbers 2043 testLex("test", [ 2044 Token(symbol!"Ident", Location("filename", 0, 0, 0), Value(null), "test") 2045 ], true); 2046 testLex("\ntest", [ 2047 Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\n"), 2048 Token(symbol!"Ident", Location("filename", 1, 0, 1), Value(null), "test") 2049 ], true); 2050 testLex("\rtest", [ 2051 Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r"), 2052 Token(symbol!"Ident", Location("filename", 1, 0, 1), Value(null), "test") 2053 ], true); 2054 testLex("\r\ntest", [ 2055 Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r\n"), 2056 Token(symbol!"Ident", Location("filename", 1, 0, 2), Value(null), "test") 2057 ], true); 2058 testLex("\r\n\ntest", [ 2059 Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r\n"), 2060 Token(symbol!"EOL", Location("filename", 1, 0, 2), Value(null), "\n"), 2061 Token(symbol!"Ident", Location("filename", 2, 0, 3), Value(null), "test") 2062 ], true); 2063 testLex("\r\r\ntest", [ 2064 Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r"), 2065 Token(symbol!"EOL", Location("filename", 1, 0, 1), Value(null), "\r\n"), 2066 Token(symbol!"Ident", Location("filename", 2, 0, 3), Value(null), "test") 2067 ], true); 2068 }