1 // This file is part of Visual D
2 //
3 // Visual D integrates the D programming language into Visual Studio
4 // Copyright (c) 2010-2011 by Rainer Schuetze, All Rights Reserved
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt
8 
9 module vdc.parser.engine;
10 
11 import std.exception;
12 import std.stdio;
13 import std.string;
14 import std.conv;
15 import std.utf;
16 
17 import stdext.util;
18 
19 import core.bitop;
20 
21 import vdc.util;
22 import vdc.lexer;
23 import vdc.parser.expr;
24 import vdc.parser.mod;
25 import vdc.parser.stmt;
26 
27 import vdc.ast.node;
28 import vdc.ast.writer;
29 
30 // debug version = TraceParser;
31 // version = recoverError;
32 
33 class ParseException : Exception
34 {
35     this(TextSpan _span, string msg)
36     {
37         super(msg);
38         span = _span;
39     }
40 
41     this()
42     {
43         super("syntax error");
44     }
45 
46     TextSpan span;
47 }
48 
49 alias Action function (Parser p) State;
50 
51 enum { Forward, Accept, Reject }
52 
53 alias int Action;
54 alias TokenId Info;
55 
56 struct Stack(T)
57 {
58     int depth;
59     T[] stack; // use Appender instead?
60 
61     ref T top()
62     {
63         assert(depth > 0);
64         return stack[depth-1];
65     }
66 
67     void push(T t)
68     {
69         static if(is(T == Token))
70         {
71             if(depth >= stack.length)
72             {
73                 stack ~= new T;
74             }
75             stack[depth].copy(t);
76         }
77         else
78         {
79             if(depth >= stack.length)
80                 stack ~= t;
81             else
82                 stack[depth] = t;
83         }
84         depth++;
85     }
86 
87     T pop()
88     {
89         assert(depth > 0);
90         auto s = stack[--depth];
91         return s;
92     }
93 
94     void copyTo(ref Stack!T other)
95     {
96         other.depth = depth;
97         other.stack.length = depth;
98         other.stack[] = stack[0..depth];
99     }
100 
101     bool compare(ref Stack!T other, int maxdepth)
102     {
103         if(maxdepth > depth)
104             maxdepth = depth;
105         if(other.depth < maxdepth)
106             return false;
107         for(int i = 0; i < maxdepth; i++)
108             if(stack[i] !is other.stack[i])
109                 return false;
110         return true;
111     }
112 }
113 
114 struct Snapshot
115 {
116     int stateStackDepth;
117     int nodeStackDepth;
118     int tokenStackDepth;
119 
120     int tokenPos;
121     State rollbackState;
122 }
123 
124 struct ParseError
125 {
126     TextSpan span;
127     string msg;
128 }
129 
130 class Parser
131 {
132     Stack!State stateStack;
133     Stack!Node  nodeStack;
134     Stack!Token tokenStack;
135 
136     Stack!Snapshot rollbackStack;  // for backtracking parsing
137     Stack!Snapshot recoverStack;   // to continue after errors
138 
139     Stack!Token tokenHistory;
140     int tokenHistoryStart;
141 
142     Stack!Token redoTokens;
143 
144     string filename;
145     int lineno;
146     int tokenPos;
147     Token lookaheadToken;
148     Token tok;
149     Token lexerTok;
150     string partialString;
151     TextSpan partialStringSpan;
152 
153     int countErrors;
154     int lastErrorTokenPos;
155     string lastError;
156     TextSpan lastErrorSpan;
157 
158     version(recoverError)
159     {
160         Stack!State errStateStack;
161         Stack!Node  errNodeStack;
162         Stack!Token errTokenStack;
163     }
164 
165     version(TraceParser) State[] traceState;
166     version(TraceParser) string[] traceToken;
167 
168     bool recovering;
169     bool abort;
170     bool saveErrors;
171     ParseError[] errors;
172     State lastState;
173 
174     this()
175     {
176         lexerTok = new Token;
177     }
178 
179     // node stack //////////////////
180     @property T topNode(T = Node)()
181     {
182         Node n = nodeStack.top();
183         return static_cast!T(n);
184     }
185 
186     void pushNode(Node n)
187     {
188         nodeStack.push(n);
189     }
190 
191     T popNode(T = Node)()
192     {
193         Node n = nodeStack.pop();
194         nodeStack.stack[nodeStack.depth] = null;
195         return static_cast!T(n);
196     }
197 
198     // replace item on the node stack, appending old top as child
199     void appendReplaceTopNode(Node n)
200     {
201         auto node = popNode();
202         n.addMember(node);
203         pushNode(n);
204     }
205 
206     // pop top item from the node stack and add it to the members of the new top item
207     void popAppendTopNode(T = Node, P = Node)()
208     {
209         auto node = popNode();
210         if(!__ctfe) assert(cast(P) node);
211         if(!__ctfe) assert(cast(T) topNode());
212         topNode().addMember(node);
213     }
214 
215     // extend the full psan of the node on top of the node stack
216     void extendTopNode(Token tok)
217     {
218         if(nodeStack.depth > 0)
219             topNode().extendSpan(tok.span);
220     }
221 
222     // state stack //////////////////
223     void pushState(State fn)
224     {
225         stateStack.push(fn);
226     }
227 
228     State popState()
229     {
230         State s = stateStack.pop();
231         stateStack.stack[stateStack.depth] = null;
232         return s;
233     }
234 
235     // token stack //////////////////
236     Token topToken()
237     {
238         return tokenStack.top();
239     }
240 
241     void pushToken(Token token)
242     {
243         tokenStack.push(token);
244     }
245 
246     Token popToken()
247     {
248         return tokenStack.pop();
249     }
250 
251     // error handling //////////////////
252     string createError(string msg)
253     {
254         string where;
255         if(filename.length)
256             where = filename ~ "(" ~ text(lineno) ~ "): '" ~ tok.txt ~ "' - ";
257         else
258             where = "line " ~ text(lineno) ~ " '" ~ tok.txt ~ "': ";
259         return where ~ msg;
260     }
261 
262     Action parseError(string msg)
263     {
264         if(tokenPos < lastErrorTokenPos || recovering)
265             return Reject;
266 
267         lastErrorTokenPos = tokenPos;
268         lastError = createError(msg);
269         lastErrorSpan = tok.span;
270 
271         version(recoverError)
272         {
273             stateStack.copyTo(errStateStack);
274             nodeStack.copyTo(errNodeStack);
275             tokenStack.copyTo(errTokenStack);
276             errStateStack.push(lastState);
277         }
278 
279         return Reject;
280     }
281 
282     void writeError(ref const(TextSpan) errorSpan, string msg)
283     {
284         if(saveErrors)
285             errors ~= ParseError(errorSpan, msg);
286         else
287             writeln(msg);
288         countErrors++;
289     }
290 
291     void writeError(string msg)
292     {
293         writeError(tok.span, msg);
294     }
295 
296     Action notImplementedError(string what = "")
297     {
298         return parseError("not implemented: " ~ what);
299     }
300 
301     // backtrace parsing
302     void pushRollback(State rollbackState)
303     {
304         Snapshot ss;
305         ss.stateStackDepth = stateStack.depth;
306         ss.nodeStackDepth = nodeStack.depth;
307         ss.tokenStackDepth = tokenStack.depth;
308         ss.tokenPos = tokenHistory.depth;
309         ss.rollbackState = rollbackState;
310 
311         rollbackStack.push(ss);
312     }
313 
314     void popRollback()
315     {
316         rollbackStack.pop();
317         if(rollbackStack.depth == 0)
318             tokenHistory.depth = 0;
319     }
320 
321     void rollback()
322     {
323         Snapshot ss = rollbackStack.pop();
324 
325         assert(stateStack.depth >= ss.stateStackDepth);
326         assert(nodeStack.depth >= ss.nodeStackDepth);
327         assert(tokenStack.depth >= ss.tokenStackDepth);
328         assert(ss.tokenPos < tokenHistory.depth);
329 
330         stateStack.depth = ss.stateStackDepth;
331         nodeStack.depth = ss.nodeStackDepth;
332         tokenStack.depth = ss.tokenStackDepth;
333 
334         while(ss.tokenPos < tokenHistory.depth)
335         {
336             Token token = tokenHistory.pop();
337             redoTokens.push(token);
338             tokenPos--;
339         }
340 
341         pushState(ss.rollbackState);
342     }
343 
344     version(recoverError)
345     void recoverNode(ref Snapshot ss)
346     {
347         assert(stateStack.compare(errStateStack, ss.stateStackDepth));
348         assert(nodeStack.compare(errNodeStack, ss.nodeStackDepth));
349         assert(tokenStack.compare(errTokenStack, ss.tokenStackDepth));
350 
351         Token _tok = new Token;
352         _tok.copy(lexerTok);
353         _tok.id = TOK_RECOVER;
354         _tok.txt = "__recover" ~ to!string(countErrors);
355         _tok.span.end = tok.span.start;
356         tok = _tok;
357 
358         recovering = true;
359         scope(exit) recovering = false;
360 
361         errStateStack.copyTo(stateStack);
362         errNodeStack.copyTo(nodeStack);
363         errTokenStack.copyTo(tokenStack);
364 
365         Action act = Forward;
366         while(stateStack.depth > ss.stateStackDepth &&
367               nodeStack.depth >= ss.nodeStackDepth &&
368               tokenStack.depth >= ss.tokenStackDepth &&
369               act == Forward)
370         {
371             State fn = popState();
372             act = fn(this);
373         }
374     }
375 
376     // recover from error
377     void pushRecoverState(State recoverState)
378     {
379         Snapshot ss;
380         ss.stateStackDepth = stateStack.depth;
381         ss.nodeStackDepth = nodeStack.depth;
382         ss.tokenStackDepth = tokenStack.depth;
383         ss.rollbackState = recoverState;
384 
385         recoverStack.push(ss);
386     }
387 
388     void popRecoverState()
389     {
390         recoverStack.pop();
391     }
392 
393     void recover()
394     {
395         Snapshot ss = recoverStack.top();
396 
397         assert(stateStack.depth >= ss.stateStackDepth);
398         assert(nodeStack.depth >= ss.nodeStackDepth);
399         assert(tokenStack.depth >= ss.tokenStackDepth);
400 
401         version(recoverError)
402             recoverNode(ss);
403 
404         stateStack.depth = ss.stateStackDepth;
405         nodeStack.depth = ss.nodeStackDepth;
406         tokenStack.depth = ss.tokenStackDepth;
407 
408         pushState(ss.rollbackState);
409     }
410 
411 
412     static Action keepRecover(Parser p)
413     {
414         // can be inserted into the state stack to avoid implicitely removing an entry of the recover stack
415         return Forward;
416     }
417     static Action recoverSemiCurly(Parser p)
418     {
419         switch(p.tok.id)
420         {
421             case TOK_semicolon:
422                 return Accept;
423             case TOK_EOF:
424             case TOK_rcurly:
425                 return Forward;
426             case TOK_lcurly:
427                 // stop after closing curly (if not nested in some other braces
428                 p.pushState(&recoverBlock!TOK_rcurly);
429                 return Accept;
430             case TOK_lparen:
431                 p.pushState(&recoverSemiCurly);
432                 p.pushState(&recoverBlock!TOK_rparen);
433                 return Accept;
434             case TOK_lbracket:
435                 p.pushState(&recoverSemiCurly);
436                 p.pushState(&recoverBlock!TOK_rbracket);
437                 return Accept;
438             default:
439                 p.pushState(&recoverSemiCurly);
440                 return Accept;
441         }
442     }
443     // skip over nested brace blocks
444     static Action recoverBlock(TokenId id)(Parser p)
445     {
446         switch(p.tok.id)
447         {
448             case TOK_EOF:
449                 return Forward;
450 
451             case TOK_rcurly:
452             case TOK_rparen:
453             case TOK_rbracket:
454                 return p.tok.id == id ? Accept : Forward;
455 
456             case TOK_lparen:
457                 p.pushState(&recoverBlock!id);
458                 p.pushState(&recoverBlock!TOK_rparen);
459                 return Accept;
460             case TOK_lbracket:
461                 p.pushState(&recoverBlock!id);
462                 p.pushState(&recoverBlock!TOK_rbracket);
463                 return Accept;
464             case TOK_lcurly:
465                 p.pushState(&recoverBlock!id);
466                 p.pushState(&recoverBlock!TOK_rcurly);
467                 return Accept;
468             default:
469                 p.pushState(&recoverBlock!id);
470                 return Accept;
471         }
472     }
473 
474     ///////////////////////////////////////////////////////////
475     void verifyAttributes(Attribute attr, ref Attribute newAttr, Attribute mask)
476     {
477         if((newAttr & mask) && (attr & mask) && (newAttr & mask) != (attr & mask))
478         {
479             string txt;
480             writeError(createError("conflicting attributes " ~
481                                    attrToString(attr & mask) ~ " and " ~
482                                    attrToString(newAttr & mask)));
483             newAttr &= ~mask;
484         }
485     }
486 
487     void combineAttributes(ref Attribute attr, Attribute newAttr)
488     {
489         if(newAttr & attr)
490         {
491             string txt;
492             DCodeWriter writer = new DCodeWriter(getStringSink(txt));
493             writer.writeAttributes(newAttr & attr);
494             writeError(createError("multiple specification of " ~ txt));
495         }
496         verifyAttributes(attr, newAttr, Attr_AlignMask);
497         verifyAttributes(attr, newAttr, Attr_CallMask);
498         verifyAttributes(attr, newAttr, Attr_ShareMask);
499 
500         attr = attr | newAttr;
501     }
502 
503     void verifyAnnotations(Annotation annot, ref Annotation newAnnot, Annotation mask)
504     {
505         if((newAnnot & mask) && (annot & mask) && (newAnnot & mask) != (annot & mask))
506         {
507             string txt;
508             writeError(createError("conflicting attributes " ~
509                                    annotationToString(annot & mask) ~ " and " ~
510                                    annotationToString(newAnnot & mask)));
511             newAnnot &= ~mask;
512         }
513     }
514 
515     void combineAnnotations(ref Annotation annot, Annotation newAnnot)
516     {
517         if(newAnnot & annot)
518         {
519             string txt;
520             DCodeWriter writer = new DCodeWriter(getStringSink(txt));
521             writer.writeAnnotations(newAnnot & annot);
522             writeError(createError("multiple specification of " ~ txt));
523         }
524         verifyAttributes(annot, newAnnot, Annotation_ProtectionMask);
525         verifyAttributes(annot, newAnnot, Annotation_SafeMask);
526 
527         annot = annot | newAnnot;
528     }
529 
530     ///////////////////////////////////////////////////////////
531     // skip over nested parenthesis blocks
532     static Action lookaheadParen(Parser p)
533     {
534         switch(p.tok.id)
535         {
536             case TOK_EOF:
537                 return Forward;
538             case TOK_rparen:
539                 return Accept;
540             case TOK_lparen:
541                 p.pushState(&lookaheadParen);
542                 p.pushState(&lookaheadParen);
543                 return Accept;
544             default:
545                 p.pushState(&lookaheadParen);
546                 return Accept;
547         }
548     }
549 
550     static Action finishLookaheadParen(Parser p)
551     {
552         p.lookaheadToken = p.tok;
553         return Reject;
554     }
555 
556     static Action rollbackPeekAfterParen(Parser p)
557     {
558         assert(p.tok.id == TOK_lparen);
559         return Forward;
560     }
561 
562     // look ahead after closing paren and return token after ')' on token stack
563     Action peekAfterParen(Parser p, State fn)
564     {
565         assert(tok.id == TOK_lparen);
566         p.pushState(fn);
567         p.pushRollback(&rollbackPeekAfterParen);
568         p.pushState(&finishLookaheadParen);
569         return lookaheadParen(p);
570     }
571 
572     // parsing //////////////////
573     static Action forward(Parser p)
574     {
575         return Forward;
576     }
577 
578     static Action popForward(Parser p)
579     {
580         p.popAppendTopNode!()();
581         return Forward;
582     }
583 
584     static Action accept(Parser p)
585     {
586         return Accept;
587     }
588 
589     Action shiftOne(Token _tok)
590     {
591         tok = _tok;
592         Action act;
593         do
594         {
595             assert(stateStack.depth > 0);
596 
597             State fn = popState();
598             lastState = fn;
599 
600             while(recoverStack.depth > 0 && stateStack.depth <= recoverStack.top().stateStackDepth)
601                 popRecoverState();
602 
603             version(TraceParser)
604             {
605                 traceState ~= fn;
606                 traceToken ~= tok.txt;
607                 if(traceState.length > 200)
608                 {
609                     traceState = traceState[$-100 .. $];
610                     traceToken = traceToken[$-100 .. $];
611                 }
612             }
613 
614             act = fn(this);
615             if(act == Accept)
616                 extendTopNode(tok);
617         }
618         while(act == Forward);
619         return act;
620     }
621 
622     bool shift(Token _tok)
623     {
624         Action act;
625         redoTokens.push(_tok);
626 
627         while(redoTokens.depth > 0)
628         {
629             Token t = redoTokens.pop();
630             tokenPos++;
631         retryToken:
632             act = shiftOne(t);
633             if(rollbackStack.depth > 0)
634                 tokenHistory.push(tok);
635 
636             if(act == Reject)
637             {
638                 if(rollbackStack.depth > 0)
639                 {
640                     rollback();
641                     continue;
642                 }
643                 if(recoverStack.depth > 0)
644                 {
645                     writeError(lastErrorSpan, lastError);
646                     recover();
647                     goto retryToken;
648                 }
649                 throw new ParseException(lastErrorSpan, lastError);
650             }
651         }
652 
653         return act == Accept;
654     }
655 
656     bool shiftEOF()
657     {
658         lexerTok.id = TOK_EOF;
659         lexerTok.txt = "";
660         if(!shift(lexerTok))
661             return false;
662 
663         if (nodeStack.depth > 1)
664             return parseError("parsing unfinished before end of file") == Accept;
665         if (nodeStack.depth == 0)
666             return false;
667         return true;
668     }
669 
670     void parseLine(S)(ref int state, S line, int lno)
671     {
672         version(log) writeln(line);
673 
674         if(partialString.length)
675             partialString ~= "\n";
676 
677         lineno = lno;
678         for(uint pos = 0; pos < line.length && !abort; )
679         {
680             int tokid;
681             uint prevpos = pos;
682             TokenCat type = cast(TokenCat) Lexer.scan(state, line, pos, tokid);
683 
684             if(tokid != TOK_Space && tokid != TOK_Comment)
685             {
686                 string txt = line[prevpos .. pos];
687                 lexerTok.span.start.line = lexerTok.span.end.line = lineno;
688                 lexerTok.span.start.index = prevpos;
689                 lexerTok.span.end.index = pos;
690 
691                 if(tokid == TOK_StringLiteral)
692                 {
693                     if(Lexer.scanState(state) != Lexer.State.kWhite ||
694                        Lexer.tokenStringLevel(state) > 0)
695                     {
696                         if(partialString.length == 0)
697                             partialStringSpan = lexerTok.span;
698                         partialString ~= txt;
699                         continue;
700                     }
701                     else
702                     {
703                         if(partialString.length)
704                         {
705                             lexerTok.span.start.line = partialStringSpan.start.line;
706                             lexerTok.span.start.index = partialStringSpan.start.index;
707                             txt = partialString ~ txt;
708                             partialString = partialString.init;
709                         }
710                     }
711                 }
712 
713                 lexerTok.txt = txt;
714                 lexerTok.id = tokid;
715                 shift(lexerTok);
716             }
717         }
718     }
719 
720     void parseText(S)(S text)
721     {
722         int state = 0;
723 
724     version(all)
725     {
726         Lexer lex;
727         lex.mTokenizeTokenString = false;
728         lineno = 1;
729         size_t linepos = 0; // position after last line break
730         int tokid;
731         for(size_t pos = 0; pos < text.length && !abort; )
732         {
733             int prevlineno = lineno;
734             size_t prevlinepos = linepos;
735             size_t prevpos = pos;
736             TokenCat type = cast(TokenCat) lex.scan(state, text, pos, tokid);
737 
738             if(tokid == TOK_Space || tokid == TOK_Comment || tokid == TOK_StringLiteral || tokid == TOK_CharacterLiteral)
739             {
740                 for(size_t lpos = prevpos; lpos < pos; lpos++)
741                     if(text[lpos] == '\n')
742                     {
743                         lineno++;
744                         linepos = lpos + 1;
745                     }
746             }
747             if(tokid != TOK_Space && tokid != TOK_Comment)
748             {
749                 lexerTok.txt = text[prevpos .. pos];
750                 lexerTok.id = tokid;
751                 lexerTok.span.start.line = prevlineno;
752                 lexerTok.span.end.line = lineno;
753                 lexerTok.span.start.index = cast(int) (prevpos - prevlinepos);
754                 lexerTok.span.end.index   = cast(int) (pos - linepos);
755                 shift(lexerTok);
756             }
757         }
758     }
759     else
760     {
761         S[] lines = splitLines(text);
762         foreach(lno, line; lines)
763             parseLine(state, line, lno + 1);
764     }
765     }
766 
767     Node parseModule(S)(S text)
768     {
769         reinit();
770         pushState(&Module.enter);
771 
772         parseText(text);
773 
774         if(abort || !shiftEOF())
775             return null;
776         return popNode();
777     }
778 
779     Node[] parseCurlyBlock(S)(S text, TextSpan mixinSpan)
780     {
781         lexerTok.txt = "{";
782         lexerTok.id = TOK_lcurly;
783         lexerTok.span = mixinSpan;
784         lexerTok.span.end.index = mixinSpan.end.index + 1;
785         if(!shift(lexerTok))
786             return null;
787 
788         parseText(text);
789 
790         lexerTok.txt = "}";
791         lexerTok.id = TOK_rcurly;
792         if(abort || !shift(lexerTok))
793             return null;
794         if (nodeStack.depth > 1)
795         {
796             parseError("parsing unfinished before end of mixin");
797             return null;
798         }
799         return popNode().members;
800     }
801 
802     Node[] parseDeclarations(S)(S text, TextSpan mixinSpan)
803     {
804         reinit();
805         pushState(&DeclarationBlock.enter);
806 
807         return parseCurlyBlock(text, mixinSpan);
808     }
809 
810     Node[] parseStatements(S)(S text, TextSpan mixinSpan)
811     {
812         reinit();
813         pushState(&BlockStatement.enter);
814 
815         return parseCurlyBlock(text, mixinSpan);
816     }
817 
818     Node parseExpression(S)(S text, TextSpan mixinSpan)
819     {
820         reinit();
821         pushState(&Expression.enter);
822 
823         lexerTok.txt = "(";
824         lexerTok.id = TOK_lparen;
825         lexerTok.span = mixinSpan;
826         lexerTok.span.end.index = mixinSpan.end.index + 1;
827         if(!shift(lexerTok))
828             return null;
829 
830         parseText(text);
831 
832         lexerTok.txt = ")";
833         lexerTok.id = TOK_rparen;
834         if(abort || !shift(lexerTok))
835             return null;
836         if (nodeStack.depth > 1)
837         {
838             parseError("parsing unfinished before end of mixin");
839             return null;
840         }
841         return popNode();
842     }
843 
844     void reinit()
845     {
846         stateStack = stateStack.init;
847         nodeStack  = nodeStack.init;
848         tokenStack = tokenStack.init;
849         version(recoverError)
850         {
851             errStateStack = errStateStack.init;
852             errNodeStack  = errNodeStack.init;
853             errTokenStack = errTokenStack.init;
854         }
855         lastErrorTokenPos = 0;
856         lastError = "";
857         errors = errors.init;
858         countErrors = 0;
859         abort = false;
860         recovering = false;
861         lastState = null;
862     }
863 }
864 
865 string readUtf8(string fname)
866 {
867     /* Convert all non-UTF-8 formats to UTF-8.
868      * BOM : http://www.unicode.org/faq/utf_bom.html
869      * 00 00 FE FF  UTF-32BE, big-endian
870      * FF FE 00 00  UTF-32LE, little-endian
871      * FE FF        UTF-16BE, big-endian
872      * FF FE        UTF-16LE, little-endian
873      * EF BB BF     UTF-8
874      */
875     static const ubyte[4] bomUTF32BE = [ 0x00, 0x00, 0xFE, 0xFF ]; // UTF-32, big-endian
876     static const ubyte[4] bomUTF32LE = [ 0xFF, 0xFE, 0x00, 0x00 ]; // UTF-32, little-endian
877     static const ubyte[2] bomUTF16BE = [ 0xFE, 0xFF ];             // UTF-16, big-endian
878     static const ubyte[2] bomUTF16LE = [ 0xFF, 0xFE ];             // UTF-16, little-endian
879     static const ubyte[3] bomUTF8    = [ 0xEF, 0xBB, 0xBF ];       // UTF-8
880 
881     import std.file : read;
882     ubyte[] data = cast(ubyte[]) read(fname);
883     if(data.length >= 4 && data[0..4] == bomUTF32BE[])
884         foreach(ref d; cast(uint[]) data)
885             d = bswap(d);
886     if(data.length >= 2 && data[0..2] == bomUTF16BE[])
887         foreach(ref d; cast(ushort[]) data)
888             d = bswap(d) >> 16;
889 
890     if(data.length >= 4 && data[0..4] == bomUTF32LE[])
891         return toUTF8(cast(dchar[]) data[4..$]);
892     if(data.length >= 2 && data[0..2] == bomUTF16LE[])
893         return toUTF8(cast(wchar[]) data[2..$]);
894     if(data.length >= 3 && data[0..3] == bomUTF8[])
895         return toUTF8(cast(string) data[3..$]);
896 
897     return cast(string)data;
898 }
899 
900 ////////////////////////////////////////////////////////////////
901 
902 bool isInOps(ops...)(TokenId tok)
903 {
904     foreach(o; ops)
905         if(tok == o)
906             return true;
907     return false;
908 }
909 
910 class NoASTNode {}
911 
912 // always adds a node with an array of ASTNodeType nodes, even if empty
913 // if trailingSeparator, sep at the end is allowed, but closing bracket is expected afterwards
914 mixin template ListNode(ASTNodeType, SubType, TokenId sep, bool trailingSeparator = false, bool allowEmpty = false)
915 {
916     static Action enter(Parser p)
917     {
918         static if(!is(ASTNodeType == NoASTNode))
919             p.pushNode(new ASTNodeType(p.tok));
920 
921         static if(allowEmpty)
922             switch(p.tok.id)
923             {
924                 case TOK_rparen:
925                 case TOK_rbracket:
926                 case TOK_rcurly:
927                     return Forward;
928                 default:
929             }
930 
931         p.pushState(&shift);
932         return SubType.enter(p);
933     }
934 
935     static Action shift(Parser p)
936     {
937         p.popAppendTopNode!ASTNodeType();
938         if(p.tok.id != sep)
939             return Forward;
940 
941         static if(trailingSeparator)
942         {
943             p.pushState(&shiftSeparator);
944         }
945         else
946         {
947             p.pushState(&shift);
948             p.pushState(&SubType.enter);
949         }
950         return Accept;
951     }
952 
953     static if(trailingSeparator)
954     static Action shiftSeparator(Parser p)
955     {
956         switch(p.tok.id)
957         {
958             case TOK_rparen:
959             case TOK_rbracket:
960             case TOK_rcurly:
961                 return Forward;
962             default:
963                 p.pushState(&shift);
964                 return SubType.enter(p);
965         }
966     }
967 }
968 
969 // binary operator, left/right/no recursion
970 //
971 //BinaryNode:
972 //    SubType
973 // R: SubType    op BinaryNode
974 // L: BinaryNode op SubType
975 // N: SubType    op SubType
976 mixin template BinaryNode(ASTNodeType, string recursion, SubType, ops...)
977 {
978     static assert(recursion == "L" || recursion == "R" || recursion == "N");
979 
980     static Action enter(Parser p)
981     {
982         p.pushState(&shift);
983         return SubType.enter(p);
984     }
985 
986     static Action shift(Parser p)
987     {
988         if(!isInOps!(ops)(p.tok.id))
989             return Forward;
990 
991         p.appendReplaceTopNode(new ASTNodeType(p.tok));
992         p.pushState(&shiftNext);
993 
994         static if(recursion == "L" || recursion == "N")
995             p.pushState(&SubType.enter);
996         else
997             p.pushState(&enter);
998         return Accept;
999     }
1000 
1001     static Action shiftNext(Parser p)
1002     {
1003         p.popAppendTopNode!ASTNodeType();
1004 
1005         static if(recursion == "L")
1006             return shift(p);
1007         else
1008             return Forward;
1009     }
1010 }
1011 
1012 // ternary operator, right recursion
1013 //
1014 //TernaryNode:
1015 //    SubType1
1016 //    SubType1 op1 SubType2 op2 TernaryNode
1017 mixin template TernaryNode(ASTNodeType, SubType1, TokenId op1, SubType2, TokenId op2)
1018 {
1019     static Action enter(Parser p)
1020     {
1021         p.pushState(&shift);
1022         return SubType1.enter(p);
1023     }
1024 
1025     static Action shift(Parser p)
1026     {
1027         if(p.tok.id != op1)
1028             return Forward;
1029 
1030         p.appendReplaceTopNode(new ASTNodeType(p.tok));
1031         p.pushState(&shiftNext);
1032         p.pushState(&SubType2.enter);
1033         return Accept;
1034     }
1035 
1036     static Action shiftNext(Parser p)
1037     {
1038         if(p.tok.id != op2)
1039             return p.parseError("second operator '" ~ tokenString(op2) ~ "'in ternary expression expected");
1040 
1041         p.popAppendTopNode!ASTNodeType();
1042 
1043         p.pushState(&shiftLast);
1044         p.pushState(&enter);
1045         return Accept;
1046     }
1047 
1048     static Action shiftLast(Parser p)
1049     {
1050         p.popAppendTopNode!ASTNodeType();
1051         return Forward;
1052     }
1053 }
1054 
1055 //OptionalNode:
1056 //    SubType1
1057 //    SubType1 op SubType2
1058 mixin template OptionalNode(ASTNodeType, SubType1, TokenId op, SubType2)
1059 {
1060     static Action enter(Parser p)
1061     {
1062         p.pushState(&shiftSubType1);
1063         return SubType1.enter(p);
1064     }
1065 
1066     static Action shiftSubType1(Parser p)
1067     {
1068         if(p.tok.id != op)
1069             return Forward;
1070 
1071         p.appendReplaceTopNode(new ASTNodeType(p.tok));
1072         p.pushState(&shiftSubType2);
1073         p.pushState(&SubType2.enter);
1074         return Accept;
1075     }
1076 
1077     static Action shiftSubType2(Parser p)
1078     {
1079         p.popAppendTopNode!ASTNodeType();
1080         return Forward;
1081     }
1082 }
1083 
1084 //SequenceNode:
1085 //    SubType1/Token1 SubType2/Token2 SubType3/Token3 SubType4/Token4
1086 mixin template SequenceNode(ASTNodeType, T...)
1087 {
1088     static Action enter(Parser p)
1089     {
1090         static if(!is(ASTNodeType == NoASTNode))
1091             p.pushNode(new ASTNodeType(p.tok));
1092 
1093         return shift0.next(p);
1094     }
1095 
1096     mixin template ShiftState(int n, alias shiftFn, alias nextFn)
1097     {
1098         static if(n < T.length)
1099         {
1100             static Action shift(Parser p)
1101             {
1102                 static if(is(T[n-1] == class))
1103                 {
1104                     static if(!__traits(compiles,T[n-1].doNotPopNode))
1105                         static if(!is(ASTNodeType == NoASTNode))
1106                             p.popAppendTopNode!ASTNodeType();
1107                 }
1108                 return next(p);
1109             }
1110 
1111             static Action next(Parser p)
1112             {
1113                 static if (is(typeof(& T[n]) U : U*) && is(U == function))
1114                 {
1115                     return T[n](p);
1116                 }
1117                 else
1118                 {
1119                     static if(__traits(compiles,T[n].startsWithOp(p.tok.id)))
1120                         if(!T[n].startsWithOp(p.tok.id))
1121                             return nextFn(p);
1122 
1123                     static if(n < T.length-1)
1124                         p.pushState(&shiftFn);
1125 
1126                     static if(is(T[n] == class))
1127                     {
1128                         static if(n == T.length-1)
1129                             p.pushState(&reduce);
1130 
1131                         return T[n].enter(p);
1132                     }
1133                     else
1134                     {
1135                         if(p.tok.id != T[n])
1136                             return p.parseError("'" ~ tokenString(T[n]) ~ "' expected");
1137                         return Accept;
1138                     }
1139                 }
1140             }
1141         }
1142         else
1143         {
1144             static Action shift(Parser p) { return Forward; }
1145             static Action next(Parser p) { return Forward; }
1146         }
1147     }
1148 
1149     mixin ShiftState!(9, reduce,       reduce)      shift9;
1150     mixin ShiftState!(8, shift9.shift, shift9.next) shift8;
1151     mixin ShiftState!(7, shift8.shift, shift8.next) shift7;
1152     mixin ShiftState!(6, shift7.shift, shift7.next) shift6;
1153     mixin ShiftState!(5, shift6.shift, shift6.next) shift5;
1154     mixin ShiftState!(4, shift5.shift, shift5.next) shift4;
1155     mixin ShiftState!(3, shift4.shift, shift4.next) shift3;
1156     mixin ShiftState!(2, shift3.shift, shift3.next) shift2;
1157     mixin ShiftState!(1, shift2.shift, shift2.next) shift1;
1158     mixin ShiftState!(0, shift1.shift, shift1.next) shift0;
1159 
1160     static Action reduce(Parser p)
1161     {
1162         static if(!is(ASTNodeType == NoASTNode))
1163             p.popAppendTopNode!ASTNodeType();
1164         return Forward;
1165     }
1166 }
1167 
1168 class Opt(NT, ops...)
1169 {
1170     static bool startsWithOp(TokenId tok)
1171     {
1172         foreach(o; ops)
1173             if(tok == o)
1174                 return true;
1175         return false;
1176     }
1177     static Action enter(Parser p)
1178     {
1179         return NT.enter(p);
1180     }
1181 }
1182 
1183 ////////////////////////////////////////////////////////////////
1184 
1185 // unfortunately, states have to be in reverse order because of unresolved forward references
1186 
1187 mixin template stateEnterToken(TokenId id, ASTType, alias newstate)
1188 {
1189     static Action enter(Parser p)
1190     {
1191         switch(p.tok.id)
1192         {
1193             case id:
1194                 static if(!is(ASTType : NoASTNode))
1195                     p.pushNode(new ASTType(p.tok));
1196                 p.pushState(&newstate);
1197                 return Accept;
1198             default:
1199                 return p.parseError(tokenString(id) ~ " expected");
1200         }
1201     }
1202 }
1203 
1204 mixin template stateEnterClass(SubType, ASTType, alias newstate)
1205 {
1206     static Action enter(Parser p)
1207     {
1208         static if(!is(ASTType : NoASTNode))
1209             p.pushNode(new ASTType(p.tok));
1210         p.pushState(&enterReduce);
1211         return SubType.enter(p);
1212     }
1213     static Action enterReduce(Parser p)
1214     {
1215         static if(!is(ASTType : NoASTNode))
1216             p.popAppendTopNode!();
1217         return newstate(p);
1218     }
1219 }
1220 
1221 mixin template stateShiftToken(TokenId id1, alias newstate1,
1222                                TokenId id2 = 0, alias newstate2 = Parser.forward,
1223                                TokenId id3 = 0, alias newstate3 = Parser.forward,
1224                                TokenId id4 = 0, alias newstate4 = Parser.forward)
1225 {
1226     static Action shift(Parser p)
1227     {
1228         switch(p.tok.id)
1229         {
1230             static if(id1 > 0)
1231             {
1232                 case id1:
1233                     static if(&newstate1 !is &Parser.forward)
1234                         p.pushState(&newstate1);
1235                     return Accept;
1236             }
1237             static if(id2 > 0)
1238             {
1239                 case id2:
1240                     static if(&newstate2 !is &Parser.forward)
1241                         p.pushState(&newstate2);
1242                     return Accept;
1243             }
1244             static if(id3 > 0)
1245             {
1246                 case id3:
1247                     static if(&newstate3 !is &Parser.forward)
1248                         p.pushState(&newstate3);
1249                     return Accept;
1250             }
1251             static if(id4 > 0)
1252             {
1253                 case id4:
1254                     static if(&newstate4 !is &Parser.forward)
1255                         p.pushState(&newstate4);
1256                     return Accept;
1257             }
1258             default:
1259                 static if(id1 == -1)
1260                     static if(&newstate1 is &Parser.forward)
1261                         return Forward;
1262                     else
1263                         return newstate1(p);
1264 
1265                 else static if(id2 == -1)
1266                     static if(&newstate2 is &Parser.forward)
1267                         return Forward;
1268                     else
1269                         return newstate2(p);
1270 
1271                 else static if(id3 == -1)
1272                     static if(&newstate3 is &Parser.forward)
1273                         return Forward;
1274                     else
1275                         return newstate3(p);
1276 
1277                 else static if(id4 == -1)
1278                     static if(&newstate4 is &Parser.forward)
1279                         return Forward;
1280                     else
1281                         return newstate4(p);
1282 
1283                 else
1284                 {
1285                     string msg = tokenString(id1);
1286                     static if(id2 != 0) msg ~= " or " ~ tokenString(id2);
1287                     static if(id3 != 0) msg ~= " or " ~ tokenString(id3);
1288                     static if(id4 != 0) msg ~= " or " ~ tokenString(id4);
1289                     return p.parseError(tokenString(id1) ~ " expected");
1290                 }
1291         }
1292     }
1293 }
1294 
1295 mixin template stateAppendClass(C, alias newstate)
1296 {
1297     static Action shift(Parser p)
1298     {
1299         p.pushState(&reduce);
1300         return C.enter(p);
1301     }
1302     static Action reduce(Parser p)
1303     {
1304         p.popAppendTopNode!();
1305         return newstate(p);
1306     }
1307 }