1 // This file is part of Visual D 2 // 3 // Visual D integrates the D programming language into Visual Studio 4 // Copyright (c) 2010-2011 by Rainer Schuetze, All Rights Reserved 5 // 6 // Distributed under the Boost Software License, Version 1.0. 7 // See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt 8 9 module vdc.parser.engine; 10 11 import std.exception; 12 import std.stdio; 13 import std.string; 14 import std.conv; 15 import std.utf; 16 17 import stdext.util; 18 19 import core.bitop; 20 21 import vdc.util; 22 import vdc.lexer; 23 import vdc.parser.expr; 24 import vdc.parser.mod; 25 import vdc.parser.stmt; 26 27 import vdc.ast.node; 28 import vdc.ast.writer; 29 30 // debug version = TraceParser; 31 // version = recoverError; 32 33 class ParseException : Exception 34 { 35 this(TextSpan _span, string msg) 36 { 37 super(msg); 38 span = _span; 39 } 40 41 this() 42 { 43 super("syntax error"); 44 } 45 46 TextSpan span; 47 } 48 49 alias Action function (Parser p) State; 50 51 enum { Forward, Accept, Reject } 52 53 alias int Action; 54 alias TokenId Info; 55 56 struct Stack(T) 57 { 58 int depth; 59 T[] stack; // use Appender instead? 60 61 ref T top() 62 { 63 assert(depth > 0); 64 return stack[depth-1]; 65 } 66 67 void push(T t) 68 { 69 static if(is(T == Token)) 70 { 71 if(depth >= stack.length) 72 { 73 stack ~= new T; 74 } 75 stack[depth].copy(t); 76 } 77 else 78 { 79 if(depth >= stack.length) 80 stack ~= t; 81 else 82 stack[depth] = t; 83 } 84 depth++; 85 } 86 87 T pop() 88 { 89 assert(depth > 0); 90 auto s = stack[--depth]; 91 return s; 92 } 93 94 void copyTo(ref Stack!T other) 95 { 96 other.depth = depth; 97 other.stack.length = depth; 98 other.stack[] = stack[0..depth]; 99 } 100 101 bool compare(ref Stack!T other, int maxdepth) 102 { 103 if(maxdepth > depth) 104 maxdepth = depth; 105 if(other.depth < maxdepth) 106 return false; 107 for(int i = 0; i < maxdepth; i++) 108 if(stack[i] !is other.stack[i]) 109 return false; 110 return true; 111 } 112 } 113 114 struct Snapshot 115 { 116 int stateStackDepth; 117 int nodeStackDepth; 118 int tokenStackDepth; 119 120 int tokenPos; 121 State rollbackState; 122 } 123 124 struct ParseError 125 { 126 TextSpan span; 127 string msg; 128 } 129 130 class Parser 131 { 132 Stack!State stateStack; 133 Stack!Node nodeStack; 134 Stack!Token tokenStack; 135 136 Stack!Snapshot rollbackStack; // for backtracking parsing 137 Stack!Snapshot recoverStack; // to continue after errors 138 139 Stack!Token tokenHistory; 140 int tokenHistoryStart; 141 142 Stack!Token redoTokens; 143 144 string filename; 145 int lineno; 146 int tokenPos; 147 Token lookaheadToken; 148 Token tok; 149 Token lexerTok; 150 string partialString; 151 TextSpan partialStringSpan; 152 153 int countErrors; 154 int lastErrorTokenPos; 155 string lastError; 156 TextSpan lastErrorSpan; 157 158 version(recoverError) 159 { 160 Stack!State errStateStack; 161 Stack!Node errNodeStack; 162 Stack!Token errTokenStack; 163 } 164 165 version(TraceParser) State[] traceState; 166 version(TraceParser) string[] traceToken; 167 168 bool recovering; 169 bool abort; 170 bool saveErrors; 171 ParseError[] errors; 172 State lastState; 173 174 this() 175 { 176 lexerTok = new Token; 177 } 178 179 // node stack ////////////////// 180 @property T topNode(T = Node)() 181 { 182 Node n = nodeStack.top(); 183 return static_cast!T(n); 184 } 185 186 void pushNode(Node n) 187 { 188 nodeStack.push(n); 189 } 190 191 T popNode(T = Node)() 192 { 193 Node n = nodeStack.pop(); 194 nodeStack.stack[nodeStack.depth] = null; 195 return static_cast!T(n); 196 } 197 198 // replace item on the node stack, appending old top as child 199 void appendReplaceTopNode(Node n) 200 { 201 auto node = popNode(); 202 n.addMember(node); 203 pushNode(n); 204 } 205 206 // pop top item from the node stack and add it to the members of the new top item 207 void popAppendTopNode(T = Node, P = Node)() 208 { 209 auto node = popNode(); 210 if(!__ctfe) assert(cast(P) node); 211 if(!__ctfe) assert(cast(T) topNode()); 212 topNode().addMember(node); 213 } 214 215 // extend the full psan of the node on top of the node stack 216 void extendTopNode(Token tok) 217 { 218 if(nodeStack.depth > 0) 219 topNode().extendSpan(tok.span); 220 } 221 222 // state stack ////////////////// 223 void pushState(State fn) 224 { 225 stateStack.push(fn); 226 } 227 228 State popState() 229 { 230 State s = stateStack.pop(); 231 stateStack.stack[stateStack.depth] = null; 232 return s; 233 } 234 235 // token stack ////////////////// 236 Token topToken() 237 { 238 return tokenStack.top(); 239 } 240 241 void pushToken(Token token) 242 { 243 tokenStack.push(token); 244 } 245 246 Token popToken() 247 { 248 return tokenStack.pop(); 249 } 250 251 // error handling ////////////////// 252 string createError(string msg) 253 { 254 string where; 255 if(filename.length) 256 where = filename ~ "(" ~ text(lineno) ~ "): '" ~ tok.txt ~ "' - "; 257 else 258 where = "line " ~ text(lineno) ~ " '" ~ tok.txt ~ "': "; 259 return where ~ msg; 260 } 261 262 Action parseError(string msg) 263 { 264 if(tokenPos < lastErrorTokenPos || recovering) 265 return Reject; 266 267 lastErrorTokenPos = tokenPos; 268 lastError = createError(msg); 269 lastErrorSpan = tok.span; 270 271 version(recoverError) 272 { 273 stateStack.copyTo(errStateStack); 274 nodeStack.copyTo(errNodeStack); 275 tokenStack.copyTo(errTokenStack); 276 errStateStack.push(lastState); 277 } 278 279 return Reject; 280 } 281 282 void writeError(ref const(TextSpan) errorSpan, string msg) 283 { 284 if(saveErrors) 285 errors ~= ParseError(errorSpan, msg); 286 else 287 writeln(msg); 288 countErrors++; 289 } 290 291 void writeError(string msg) 292 { 293 writeError(tok.span, msg); 294 } 295 296 Action notImplementedError(string what = "") 297 { 298 return parseError("not implemented: " ~ what); 299 } 300 301 // backtrace parsing 302 void pushRollback(State rollbackState) 303 { 304 Snapshot ss; 305 ss.stateStackDepth = stateStack.depth; 306 ss.nodeStackDepth = nodeStack.depth; 307 ss.tokenStackDepth = tokenStack.depth; 308 ss.tokenPos = tokenHistory.depth; 309 ss.rollbackState = rollbackState; 310 311 rollbackStack.push(ss); 312 } 313 314 void popRollback() 315 { 316 rollbackStack.pop(); 317 if(rollbackStack.depth == 0) 318 tokenHistory.depth = 0; 319 } 320 321 void rollback() 322 { 323 Snapshot ss = rollbackStack.pop(); 324 325 assert(stateStack.depth >= ss.stateStackDepth); 326 assert(nodeStack.depth >= ss.nodeStackDepth); 327 assert(tokenStack.depth >= ss.tokenStackDepth); 328 assert(ss.tokenPos < tokenHistory.depth); 329 330 stateStack.depth = ss.stateStackDepth; 331 nodeStack.depth = ss.nodeStackDepth; 332 tokenStack.depth = ss.tokenStackDepth; 333 334 while(ss.tokenPos < tokenHistory.depth) 335 { 336 Token token = tokenHistory.pop(); 337 redoTokens.push(token); 338 tokenPos--; 339 } 340 341 pushState(ss.rollbackState); 342 } 343 344 version(recoverError) 345 void recoverNode(ref Snapshot ss) 346 { 347 assert(stateStack.compare(errStateStack, ss.stateStackDepth)); 348 assert(nodeStack.compare(errNodeStack, ss.nodeStackDepth)); 349 assert(tokenStack.compare(errTokenStack, ss.tokenStackDepth)); 350 351 Token _tok = new Token; 352 _tok.copy(lexerTok); 353 _tok.id = TOK_RECOVER; 354 _tok.txt = "__recover" ~ to!string(countErrors); 355 _tok.span.end = tok.span.start; 356 tok = _tok; 357 358 recovering = true; 359 scope(exit) recovering = false; 360 361 errStateStack.copyTo(stateStack); 362 errNodeStack.copyTo(nodeStack); 363 errTokenStack.copyTo(tokenStack); 364 365 Action act = Forward; 366 while(stateStack.depth > ss.stateStackDepth && 367 nodeStack.depth >= ss.nodeStackDepth && 368 tokenStack.depth >= ss.tokenStackDepth && 369 act == Forward) 370 { 371 State fn = popState(); 372 act = fn(this); 373 } 374 } 375 376 // recover from error 377 void pushRecoverState(State recoverState) 378 { 379 Snapshot ss; 380 ss.stateStackDepth = stateStack.depth; 381 ss.nodeStackDepth = nodeStack.depth; 382 ss.tokenStackDepth = tokenStack.depth; 383 ss.rollbackState = recoverState; 384 385 recoverStack.push(ss); 386 } 387 388 void popRecoverState() 389 { 390 recoverStack.pop(); 391 } 392 393 void recover() 394 { 395 Snapshot ss = recoverStack.top(); 396 397 assert(stateStack.depth >= ss.stateStackDepth); 398 assert(nodeStack.depth >= ss.nodeStackDepth); 399 assert(tokenStack.depth >= ss.tokenStackDepth); 400 401 version(recoverError) 402 recoverNode(ss); 403 404 stateStack.depth = ss.stateStackDepth; 405 nodeStack.depth = ss.nodeStackDepth; 406 tokenStack.depth = ss.tokenStackDepth; 407 408 pushState(ss.rollbackState); 409 } 410 411 412 static Action keepRecover(Parser p) 413 { 414 // can be inserted into the state stack to avoid implicitely removing an entry of the recover stack 415 return Forward; 416 } 417 static Action recoverSemiCurly(Parser p) 418 { 419 switch(p.tok.id) 420 { 421 case TOK_semicolon: 422 return Accept; 423 case TOK_EOF: 424 case TOK_rcurly: 425 return Forward; 426 case TOK_lcurly: 427 // stop after closing curly (if not nested in some other braces 428 p.pushState(&recoverBlock!TOK_rcurly); 429 return Accept; 430 case TOK_lparen: 431 p.pushState(&recoverSemiCurly); 432 p.pushState(&recoverBlock!TOK_rparen); 433 return Accept; 434 case TOK_lbracket: 435 p.pushState(&recoverSemiCurly); 436 p.pushState(&recoverBlock!TOK_rbracket); 437 return Accept; 438 default: 439 p.pushState(&recoverSemiCurly); 440 return Accept; 441 } 442 } 443 // skip over nested brace blocks 444 static Action recoverBlock(TokenId id)(Parser p) 445 { 446 switch(p.tok.id) 447 { 448 case TOK_EOF: 449 return Forward; 450 451 case TOK_rcurly: 452 case TOK_rparen: 453 case TOK_rbracket: 454 return p.tok.id == id ? Accept : Forward; 455 456 case TOK_lparen: 457 p.pushState(&recoverBlock!id); 458 p.pushState(&recoverBlock!TOK_rparen); 459 return Accept; 460 case TOK_lbracket: 461 p.pushState(&recoverBlock!id); 462 p.pushState(&recoverBlock!TOK_rbracket); 463 return Accept; 464 case TOK_lcurly: 465 p.pushState(&recoverBlock!id); 466 p.pushState(&recoverBlock!TOK_rcurly); 467 return Accept; 468 default: 469 p.pushState(&recoverBlock!id); 470 return Accept; 471 } 472 } 473 474 /////////////////////////////////////////////////////////// 475 void verifyAttributes(Attribute attr, ref Attribute newAttr, Attribute mask) 476 { 477 if((newAttr & mask) && (attr & mask) && (newAttr & mask) != (attr & mask)) 478 { 479 string txt; 480 writeError(createError("conflicting attributes " ~ 481 attrToString(attr & mask) ~ " and " ~ 482 attrToString(newAttr & mask))); 483 newAttr &= ~mask; 484 } 485 } 486 487 void combineAttributes(ref Attribute attr, Attribute newAttr) 488 { 489 if(newAttr & attr) 490 { 491 string txt; 492 DCodeWriter writer = new DCodeWriter(getStringSink(txt)); 493 writer.writeAttributes(newAttr & attr); 494 writeError(createError("multiple specification of " ~ txt)); 495 } 496 verifyAttributes(attr, newAttr, Attr_AlignMask); 497 verifyAttributes(attr, newAttr, Attr_CallMask); 498 verifyAttributes(attr, newAttr, Attr_ShareMask); 499 500 attr = attr | newAttr; 501 } 502 503 void verifyAnnotations(Annotation annot, ref Annotation newAnnot, Annotation mask) 504 { 505 if((newAnnot & mask) && (annot & mask) && (newAnnot & mask) != (annot & mask)) 506 { 507 string txt; 508 writeError(createError("conflicting attributes " ~ 509 annotationToString(annot & mask) ~ " and " ~ 510 annotationToString(newAnnot & mask))); 511 newAnnot &= ~mask; 512 } 513 } 514 515 void combineAnnotations(ref Annotation annot, Annotation newAnnot) 516 { 517 if(newAnnot & annot) 518 { 519 string txt; 520 DCodeWriter writer = new DCodeWriter(getStringSink(txt)); 521 writer.writeAnnotations(newAnnot & annot); 522 writeError(createError("multiple specification of " ~ txt)); 523 } 524 verifyAttributes(annot, newAnnot, Annotation_ProtectionMask); 525 verifyAttributes(annot, newAnnot, Annotation_SafeMask); 526 527 annot = annot | newAnnot; 528 } 529 530 /////////////////////////////////////////////////////////// 531 // skip over nested parenthesis blocks 532 static Action lookaheadParen(Parser p) 533 { 534 switch(p.tok.id) 535 { 536 case TOK_EOF: 537 return Forward; 538 case TOK_rparen: 539 return Accept; 540 case TOK_lparen: 541 p.pushState(&lookaheadParen); 542 p.pushState(&lookaheadParen); 543 return Accept; 544 default: 545 p.pushState(&lookaheadParen); 546 return Accept; 547 } 548 } 549 550 static Action finishLookaheadParen(Parser p) 551 { 552 p.lookaheadToken = p.tok; 553 return Reject; 554 } 555 556 static Action rollbackPeekAfterParen(Parser p) 557 { 558 assert(p.tok.id == TOK_lparen); 559 return Forward; 560 } 561 562 // look ahead after closing paren and return token after ')' on token stack 563 Action peekAfterParen(Parser p, State fn) 564 { 565 assert(tok.id == TOK_lparen); 566 p.pushState(fn); 567 p.pushRollback(&rollbackPeekAfterParen); 568 p.pushState(&finishLookaheadParen); 569 return lookaheadParen(p); 570 } 571 572 // parsing ////////////////// 573 static Action forward(Parser p) 574 { 575 return Forward; 576 } 577 578 static Action popForward(Parser p) 579 { 580 p.popAppendTopNode!()(); 581 return Forward; 582 } 583 584 static Action accept(Parser p) 585 { 586 return Accept; 587 } 588 589 Action shiftOne(Token _tok) 590 { 591 tok = _tok; 592 Action act; 593 do 594 { 595 assert(stateStack.depth > 0); 596 597 State fn = popState(); 598 lastState = fn; 599 600 while(recoverStack.depth > 0 && stateStack.depth <= recoverStack.top().stateStackDepth) 601 popRecoverState(); 602 603 version(TraceParser) 604 { 605 traceState ~= fn; 606 traceToken ~= tok.txt; 607 if(traceState.length > 200) 608 { 609 traceState = traceState[$-100 .. $]; 610 traceToken = traceToken[$-100 .. $]; 611 } 612 } 613 614 act = fn(this); 615 if(act == Accept) 616 extendTopNode(tok); 617 } 618 while(act == Forward); 619 return act; 620 } 621 622 bool shift(Token _tok) 623 { 624 Action act; 625 redoTokens.push(_tok); 626 627 while(redoTokens.depth > 0) 628 { 629 Token t = redoTokens.pop(); 630 tokenPos++; 631 retryToken: 632 act = shiftOne(t); 633 if(rollbackStack.depth > 0) 634 tokenHistory.push(tok); 635 636 if(act == Reject) 637 { 638 if(rollbackStack.depth > 0) 639 { 640 rollback(); 641 continue; 642 } 643 if(recoverStack.depth > 0) 644 { 645 writeError(lastErrorSpan, lastError); 646 recover(); 647 goto retryToken; 648 } 649 throw new ParseException(lastErrorSpan, lastError); 650 } 651 } 652 653 return act == Accept; 654 } 655 656 bool shiftEOF() 657 { 658 lexerTok.id = TOK_EOF; 659 lexerTok.txt = ""; 660 if(!shift(lexerTok)) 661 return false; 662 663 if (nodeStack.depth > 1) 664 return parseError("parsing unfinished before end of file") == Accept; 665 if (nodeStack.depth == 0) 666 return false; 667 return true; 668 } 669 670 void parseLine(S)(ref int state, S line, int lno) 671 { 672 version(log) writeln(line); 673 674 if(partialString.length) 675 partialString ~= "\n"; 676 677 lineno = lno; 678 for(uint pos = 0; pos < line.length && !abort; ) 679 { 680 int tokid; 681 uint prevpos = pos; 682 TokenCat type = cast(TokenCat) Lexer.scan(state, line, pos, tokid); 683 684 if(tokid != TOK_Space && tokid != TOK_Comment) 685 { 686 string txt = line[prevpos .. pos]; 687 lexerTok.span.start.line = lexerTok.span.end.line = lineno; 688 lexerTok.span.start.index = prevpos; 689 lexerTok.span.end.index = pos; 690 691 if(tokid == TOK_StringLiteral) 692 { 693 if(Lexer.scanState(state) != Lexer.State.kWhite || 694 Lexer.tokenStringLevel(state) > 0) 695 { 696 if(partialString.length == 0) 697 partialStringSpan = lexerTok.span; 698 partialString ~= txt; 699 continue; 700 } 701 else 702 { 703 if(partialString.length) 704 { 705 lexerTok.span.start.line = partialStringSpan.start.line; 706 lexerTok.span.start.index = partialStringSpan.start.index; 707 txt = partialString ~ txt; 708 partialString = partialString.init; 709 } 710 } 711 } 712 713 lexerTok.txt = txt; 714 lexerTok.id = tokid; 715 shift(lexerTok); 716 } 717 } 718 } 719 720 void parseText(S)(S text) 721 { 722 int state = 0; 723 724 version(all) 725 { 726 Lexer lex; 727 lex.mTokenizeTokenString = false; 728 lineno = 1; 729 size_t linepos = 0; // position after last line break 730 int tokid; 731 for(size_t pos = 0; pos < text.length && !abort; ) 732 { 733 int prevlineno = lineno; 734 size_t prevlinepos = linepos; 735 size_t prevpos = pos; 736 TokenCat type = cast(TokenCat) lex.scan(state, text, pos, tokid); 737 738 if(tokid == TOK_Space || tokid == TOK_Comment || tokid == TOK_StringLiteral || tokid == TOK_CharacterLiteral) 739 { 740 for(size_t lpos = prevpos; lpos < pos; lpos++) 741 if(text[lpos] == '\n') 742 { 743 lineno++; 744 linepos = lpos + 1; 745 } 746 } 747 if(tokid != TOK_Space && tokid != TOK_Comment) 748 { 749 lexerTok.txt = text[prevpos .. pos]; 750 lexerTok.id = tokid; 751 lexerTok.span.start.line = prevlineno; 752 lexerTok.span.end.line = lineno; 753 lexerTok.span.start.index = cast(int) (prevpos - prevlinepos); 754 lexerTok.span.end.index = cast(int) (pos - linepos); 755 shift(lexerTok); 756 } 757 } 758 } 759 else 760 { 761 S[] lines = splitLines(text); 762 foreach(lno, line; lines) 763 parseLine(state, line, lno + 1); 764 } 765 } 766 767 Node parseModule(S)(S text) 768 { 769 reinit(); 770 pushState(&Module.enter); 771 772 parseText(text); 773 774 if(abort || !shiftEOF()) 775 return null; 776 return popNode(); 777 } 778 779 Node[] parseCurlyBlock(S)(S text, TextSpan mixinSpan) 780 { 781 lexerTok.txt = "{"; 782 lexerTok.id = TOK_lcurly; 783 lexerTok.span = mixinSpan; 784 lexerTok.span.end.index = mixinSpan.end.index + 1; 785 if(!shift(lexerTok)) 786 return null; 787 788 parseText(text); 789 790 lexerTok.txt = "}"; 791 lexerTok.id = TOK_rcurly; 792 if(abort || !shift(lexerTok)) 793 return null; 794 if (nodeStack.depth > 1) 795 { 796 parseError("parsing unfinished before end of mixin"); 797 return null; 798 } 799 return popNode().members; 800 } 801 802 Node[] parseDeclarations(S)(S text, TextSpan mixinSpan) 803 { 804 reinit(); 805 pushState(&DeclarationBlock.enter); 806 807 return parseCurlyBlock(text, mixinSpan); 808 } 809 810 Node[] parseStatements(S)(S text, TextSpan mixinSpan) 811 { 812 reinit(); 813 pushState(&BlockStatement.enter); 814 815 return parseCurlyBlock(text, mixinSpan); 816 } 817 818 Node parseExpression(S)(S text, TextSpan mixinSpan) 819 { 820 reinit(); 821 pushState(&Expression.enter); 822 823 lexerTok.txt = "("; 824 lexerTok.id = TOK_lparen; 825 lexerTok.span = mixinSpan; 826 lexerTok.span.end.index = mixinSpan.end.index + 1; 827 if(!shift(lexerTok)) 828 return null; 829 830 parseText(text); 831 832 lexerTok.txt = ")"; 833 lexerTok.id = TOK_rparen; 834 if(abort || !shift(lexerTok)) 835 return null; 836 if (nodeStack.depth > 1) 837 { 838 parseError("parsing unfinished before end of mixin"); 839 return null; 840 } 841 return popNode(); 842 } 843 844 void reinit() 845 { 846 stateStack = stateStack.init; 847 nodeStack = nodeStack.init; 848 tokenStack = tokenStack.init; 849 version(recoverError) 850 { 851 errStateStack = errStateStack.init; 852 errNodeStack = errNodeStack.init; 853 errTokenStack = errTokenStack.init; 854 } 855 lastErrorTokenPos = 0; 856 lastError = ""; 857 errors = errors.init; 858 countErrors = 0; 859 abort = false; 860 recovering = false; 861 lastState = null; 862 } 863 } 864 865 string readUtf8(string fname) 866 { 867 /* Convert all non-UTF-8 formats to UTF-8. 868 * BOM : http://www.unicode.org/faq/utf_bom.html 869 * 00 00 FE FF UTF-32BE, big-endian 870 * FF FE 00 00 UTF-32LE, little-endian 871 * FE FF UTF-16BE, big-endian 872 * FF FE UTF-16LE, little-endian 873 * EF BB BF UTF-8 874 */ 875 static const ubyte[4] bomUTF32BE = [ 0x00, 0x00, 0xFE, 0xFF ]; // UTF-32, big-endian 876 static const ubyte[4] bomUTF32LE = [ 0xFF, 0xFE, 0x00, 0x00 ]; // UTF-32, little-endian 877 static const ubyte[2] bomUTF16BE = [ 0xFE, 0xFF ]; // UTF-16, big-endian 878 static const ubyte[2] bomUTF16LE = [ 0xFF, 0xFE ]; // UTF-16, little-endian 879 static const ubyte[3] bomUTF8 = [ 0xEF, 0xBB, 0xBF ]; // UTF-8 880 881 import std.file : read; 882 ubyte[] data = cast(ubyte[]) read(fname); 883 if(data.length >= 4 && data[0..4] == bomUTF32BE[]) 884 foreach(ref d; cast(uint[]) data) 885 d = bswap(d); 886 if(data.length >= 2 && data[0..2] == bomUTF16BE[]) 887 foreach(ref d; cast(ushort[]) data) 888 d = bswap(d) >> 16; 889 890 if(data.length >= 4 && data[0..4] == bomUTF32LE[]) 891 return toUTF8(cast(dchar[]) data[4..$]); 892 if(data.length >= 2 && data[0..2] == bomUTF16LE[]) 893 return toUTF8(cast(wchar[]) data[2..$]); 894 if(data.length >= 3 && data[0..3] == bomUTF8[]) 895 return toUTF8(cast(string) data[3..$]); 896 897 return cast(string)data; 898 } 899 900 //////////////////////////////////////////////////////////////// 901 902 bool isInOps(ops...)(TokenId tok) 903 { 904 foreach(o; ops) 905 if(tok == o) 906 return true; 907 return false; 908 } 909 910 class NoASTNode {} 911 912 // always adds a node with an array of ASTNodeType nodes, even if empty 913 // if trailingSeparator, sep at the end is allowed, but closing bracket is expected afterwards 914 mixin template ListNode(ASTNodeType, SubType, TokenId sep, bool trailingSeparator = false, bool allowEmpty = false) 915 { 916 static Action enter(Parser p) 917 { 918 static if(!is(ASTNodeType == NoASTNode)) 919 p.pushNode(new ASTNodeType(p.tok)); 920 921 static if(allowEmpty) 922 switch(p.tok.id) 923 { 924 case TOK_rparen: 925 case TOK_rbracket: 926 case TOK_rcurly: 927 return Forward; 928 default: 929 } 930 931 p.pushState(&shift); 932 return SubType.enter(p); 933 } 934 935 static Action shift(Parser p) 936 { 937 p.popAppendTopNode!ASTNodeType(); 938 if(p.tok.id != sep) 939 return Forward; 940 941 static if(trailingSeparator) 942 { 943 p.pushState(&shiftSeparator); 944 } 945 else 946 { 947 p.pushState(&shift); 948 p.pushState(&SubType.enter); 949 } 950 return Accept; 951 } 952 953 static if(trailingSeparator) 954 static Action shiftSeparator(Parser p) 955 { 956 switch(p.tok.id) 957 { 958 case TOK_rparen: 959 case TOK_rbracket: 960 case TOK_rcurly: 961 return Forward; 962 default: 963 p.pushState(&shift); 964 return SubType.enter(p); 965 } 966 } 967 } 968 969 // binary operator, left/right/no recursion 970 // 971 //BinaryNode: 972 // SubType 973 // R: SubType op BinaryNode 974 // L: BinaryNode op SubType 975 // N: SubType op SubType 976 mixin template BinaryNode(ASTNodeType, string recursion, SubType, ops...) 977 { 978 static assert(recursion == "L" || recursion == "R" || recursion == "N"); 979 980 static Action enter(Parser p) 981 { 982 p.pushState(&shift); 983 return SubType.enter(p); 984 } 985 986 static Action shift(Parser p) 987 { 988 if(!isInOps!(ops)(p.tok.id)) 989 return Forward; 990 991 p.appendReplaceTopNode(new ASTNodeType(p.tok)); 992 p.pushState(&shiftNext); 993 994 static if(recursion == "L" || recursion == "N") 995 p.pushState(&SubType.enter); 996 else 997 p.pushState(&enter); 998 return Accept; 999 } 1000 1001 static Action shiftNext(Parser p) 1002 { 1003 p.popAppendTopNode!ASTNodeType(); 1004 1005 static if(recursion == "L") 1006 return shift(p); 1007 else 1008 return Forward; 1009 } 1010 } 1011 1012 // ternary operator, right recursion 1013 // 1014 //TernaryNode: 1015 // SubType1 1016 // SubType1 op1 SubType2 op2 TernaryNode 1017 mixin template TernaryNode(ASTNodeType, SubType1, TokenId op1, SubType2, TokenId op2) 1018 { 1019 static Action enter(Parser p) 1020 { 1021 p.pushState(&shift); 1022 return SubType1.enter(p); 1023 } 1024 1025 static Action shift(Parser p) 1026 { 1027 if(p.tok.id != op1) 1028 return Forward; 1029 1030 p.appendReplaceTopNode(new ASTNodeType(p.tok)); 1031 p.pushState(&shiftNext); 1032 p.pushState(&SubType2.enter); 1033 return Accept; 1034 } 1035 1036 static Action shiftNext(Parser p) 1037 { 1038 if(p.tok.id != op2) 1039 return p.parseError("second operator '" ~ tokenString(op2) ~ "'in ternary expression expected"); 1040 1041 p.popAppendTopNode!ASTNodeType(); 1042 1043 p.pushState(&shiftLast); 1044 p.pushState(&enter); 1045 return Accept; 1046 } 1047 1048 static Action shiftLast(Parser p) 1049 { 1050 p.popAppendTopNode!ASTNodeType(); 1051 return Forward; 1052 } 1053 } 1054 1055 //OptionalNode: 1056 // SubType1 1057 // SubType1 op SubType2 1058 mixin template OptionalNode(ASTNodeType, SubType1, TokenId op, SubType2) 1059 { 1060 static Action enter(Parser p) 1061 { 1062 p.pushState(&shiftSubType1); 1063 return SubType1.enter(p); 1064 } 1065 1066 static Action shiftSubType1(Parser p) 1067 { 1068 if(p.tok.id != op) 1069 return Forward; 1070 1071 p.appendReplaceTopNode(new ASTNodeType(p.tok)); 1072 p.pushState(&shiftSubType2); 1073 p.pushState(&SubType2.enter); 1074 return Accept; 1075 } 1076 1077 static Action shiftSubType2(Parser p) 1078 { 1079 p.popAppendTopNode!ASTNodeType(); 1080 return Forward; 1081 } 1082 } 1083 1084 //SequenceNode: 1085 // SubType1/Token1 SubType2/Token2 SubType3/Token3 SubType4/Token4 1086 mixin template SequenceNode(ASTNodeType, T...) 1087 { 1088 static Action enter(Parser p) 1089 { 1090 static if(!is(ASTNodeType == NoASTNode)) 1091 p.pushNode(new ASTNodeType(p.tok)); 1092 1093 return shift0.next(p); 1094 } 1095 1096 mixin template ShiftState(int n, alias shiftFn, alias nextFn) 1097 { 1098 static if(n < T.length) 1099 { 1100 static Action shift(Parser p) 1101 { 1102 static if(is(T[n-1] == class)) 1103 { 1104 static if(!__traits(compiles,T[n-1].doNotPopNode)) 1105 static if(!is(ASTNodeType == NoASTNode)) 1106 p.popAppendTopNode!ASTNodeType(); 1107 } 1108 return next(p); 1109 } 1110 1111 static Action next(Parser p) 1112 { 1113 static if (is(typeof(& T[n]) U : U*) && is(U == function)) 1114 { 1115 return T[n](p); 1116 } 1117 else 1118 { 1119 static if(__traits(compiles,T[n].startsWithOp(p.tok.id))) 1120 if(!T[n].startsWithOp(p.tok.id)) 1121 return nextFn(p); 1122 1123 static if(n < T.length-1) 1124 p.pushState(&shiftFn); 1125 1126 static if(is(T[n] == class)) 1127 { 1128 static if(n == T.length-1) 1129 p.pushState(&reduce); 1130 1131 return T[n].enter(p); 1132 } 1133 else 1134 { 1135 if(p.tok.id != T[n]) 1136 return p.parseError("'" ~ tokenString(T[n]) ~ "' expected"); 1137 return Accept; 1138 } 1139 } 1140 } 1141 } 1142 else 1143 { 1144 static Action shift(Parser p) { return Forward; } 1145 static Action next(Parser p) { return Forward; } 1146 } 1147 } 1148 1149 mixin ShiftState!(9, reduce, reduce) shift9; 1150 mixin ShiftState!(8, shift9.shift, shift9.next) shift8; 1151 mixin ShiftState!(7, shift8.shift, shift8.next) shift7; 1152 mixin ShiftState!(6, shift7.shift, shift7.next) shift6; 1153 mixin ShiftState!(5, shift6.shift, shift6.next) shift5; 1154 mixin ShiftState!(4, shift5.shift, shift5.next) shift4; 1155 mixin ShiftState!(3, shift4.shift, shift4.next) shift3; 1156 mixin ShiftState!(2, shift3.shift, shift3.next) shift2; 1157 mixin ShiftState!(1, shift2.shift, shift2.next) shift1; 1158 mixin ShiftState!(0, shift1.shift, shift1.next) shift0; 1159 1160 static Action reduce(Parser p) 1161 { 1162 static if(!is(ASTNodeType == NoASTNode)) 1163 p.popAppendTopNode!ASTNodeType(); 1164 return Forward; 1165 } 1166 } 1167 1168 class Opt(NT, ops...) 1169 { 1170 static bool startsWithOp(TokenId tok) 1171 { 1172 foreach(o; ops) 1173 if(tok == o) 1174 return true; 1175 return false; 1176 } 1177 static Action enter(Parser p) 1178 { 1179 return NT.enter(p); 1180 } 1181 } 1182 1183 //////////////////////////////////////////////////////////////// 1184 1185 // unfortunately, states have to be in reverse order because of unresolved forward references 1186 1187 mixin template stateEnterToken(TokenId id, ASTType, alias newstate) 1188 { 1189 static Action enter(Parser p) 1190 { 1191 switch(p.tok.id) 1192 { 1193 case id: 1194 static if(!is(ASTType : NoASTNode)) 1195 p.pushNode(new ASTType(p.tok)); 1196 p.pushState(&newstate); 1197 return Accept; 1198 default: 1199 return p.parseError(tokenString(id) ~ " expected"); 1200 } 1201 } 1202 } 1203 1204 mixin template stateEnterClass(SubType, ASTType, alias newstate) 1205 { 1206 static Action enter(Parser p) 1207 { 1208 static if(!is(ASTType : NoASTNode)) 1209 p.pushNode(new ASTType(p.tok)); 1210 p.pushState(&enterReduce); 1211 return SubType.enter(p); 1212 } 1213 static Action enterReduce(Parser p) 1214 { 1215 static if(!is(ASTType : NoASTNode)) 1216 p.popAppendTopNode!(); 1217 return newstate(p); 1218 } 1219 } 1220 1221 mixin template stateShiftToken(TokenId id1, alias newstate1, 1222 TokenId id2 = 0, alias newstate2 = Parser.forward, 1223 TokenId id3 = 0, alias newstate3 = Parser.forward, 1224 TokenId id4 = 0, alias newstate4 = Parser.forward) 1225 { 1226 static Action shift(Parser p) 1227 { 1228 switch(p.tok.id) 1229 { 1230 static if(id1 > 0) 1231 { 1232 case id1: 1233 static if(&newstate1 !is &Parser.forward) 1234 p.pushState(&newstate1); 1235 return Accept; 1236 } 1237 static if(id2 > 0) 1238 { 1239 case id2: 1240 static if(&newstate2 !is &Parser.forward) 1241 p.pushState(&newstate2); 1242 return Accept; 1243 } 1244 static if(id3 > 0) 1245 { 1246 case id3: 1247 static if(&newstate3 !is &Parser.forward) 1248 p.pushState(&newstate3); 1249 return Accept; 1250 } 1251 static if(id4 > 0) 1252 { 1253 case id4: 1254 static if(&newstate4 !is &Parser.forward) 1255 p.pushState(&newstate4); 1256 return Accept; 1257 } 1258 default: 1259 static if(id1 == -1) 1260 static if(&newstate1 is &Parser.forward) 1261 return Forward; 1262 else 1263 return newstate1(p); 1264 1265 else static if(id2 == -1) 1266 static if(&newstate2 is &Parser.forward) 1267 return Forward; 1268 else 1269 return newstate2(p); 1270 1271 else static if(id3 == -1) 1272 static if(&newstate3 is &Parser.forward) 1273 return Forward; 1274 else 1275 return newstate3(p); 1276 1277 else static if(id4 == -1) 1278 static if(&newstate4 is &Parser.forward) 1279 return Forward; 1280 else 1281 return newstate4(p); 1282 1283 else 1284 { 1285 string msg = tokenString(id1); 1286 static if(id2 != 0) msg ~= " or " ~ tokenString(id2); 1287 static if(id3 != 0) msg ~= " or " ~ tokenString(id3); 1288 static if(id4 != 0) msg ~= " or " ~ tokenString(id4); 1289 return p.parseError(tokenString(id1) ~ " expected"); 1290 } 1291 } 1292 } 1293 } 1294 1295 mixin template stateAppendClass(C, alias newstate) 1296 { 1297 static Action shift(Parser p) 1298 { 1299 p.pushState(&reduce); 1300 return C.enter(p); 1301 } 1302 static Action reduce(Parser p) 1303 { 1304 p.popAppendTopNode!(); 1305 return newstate(p); 1306 } 1307 }