1 /**
2  * Generates a human-readable stack-trace on POSIX targets using DWARF
3  *
4  * The common use case for printing a stack trace is when `toString` is called
5  * on a `Throwable` (see `object.d`). It will iterate on what is likely to be
6  * the default trace handler (see `core.runtime : defaultTraceHandler`).
7  * The class returned by `defaultTraceHandler` is what ends up calling into
8  * this module, through the use of `core.internal.traits : externDFunc`.
9  *
10  * The entry point of this module is `traceHandlerOpApplyImpl`,
11  * and the only really "public" symbol (since all `rt` symbols are private).
12  * In the future, this implementation should probably be made idiomatic,
13  * so that it can for example work with attributes.
14  *
15  * Resilience:
16  * As this module is used for diagnostic, it should handle failures
17  * as gracefully as possible. Having the runtime error out on printing
18  * the stack trace one is trying to debug would be quite a terrible UX.
19  * For this reason, this module works on a "best effort" basis and will
20  * sometimes print mangled symbols, or "???" when it cannot do anything
21  * more useful.
22  *
23  * Source_of_data:
24  * This module uses two main sources for generating human-readable data.
25  * First, it uses `backtrace_symbols` to obtain the name of the symbols
26  * (functions or methods) associated with the addresses.
27  * Since the names are mangled, it will also call into `core.demangle`,
28  * and doesn't need to use any DWARF information for this,
29  * however a future extension  could make use of the call frame information
30  * (See DWARF4 "6.4 Call Frame Information", PDF page 126).
31  *
32  * The other piece of data used is the DWARF `.debug_line` section,
33  * which contains the line informations of a program, necessary to associate
34  * the instruction address with its (file, line) information.
35  *
36  * Since debug lines informations are quite large, they are encoded using a
37  * program that is to be fed to a finite state machine.
38  * See `runStateMachine` and `readLineNumberProgram` for more details.
39  *
40  * DWARF_Version:
41  * This module only supports DWARF 3, 4 and 5.
42  *
43  * Reference: http://www.dwarfstd.org/
44  * Copyright: Copyright Digital Mars 2015 - 2015.
45  * License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
46  * Authors:   Yazan Dabain, Sean Kelly
47  * Source: $(DRUNTIMESRC rt/backtrace/dwarf.d)
48  */
49 
50 module core.internal.backtrace.dwarf;
51 
52 import core.internal.execinfo;
53 import core.internal.string;
54 
55 version (Posix):
56 
57 version (OSX)
58     version = Darwin;
59 else version (iOS)
60     version = Darwin;
61 else version (TVOS)
62     version = Darwin;
63 else version (WatchOS)
64     version = Darwin;
65 
66 version (Darwin)
67     import core.internal.backtrace.macho;
68 else
69     import core.internal.backtrace.elf;
70 
71 import core.internal.container.array;
72 import core.stdc.string : strlen, memcpy;
73 
74 //debug = DwarfDebugMachine;
75 debug(DwarfDebugMachine) import core.stdc.stdio : printf;
76 
77 struct Location
78 {
79     /**
80      * Address of the instruction for which this location is for.
81      */
82     const(void)* address;
83 
84     /**
85      * The name of the procedure, or function, this address is in.
86      */
87     const(char)[] procedure;
88 
89     /**
90      * Path to the file this location references, relative to `directory`
91      *
92      * Note that depending on implementation, this could be just a name,
93      * a relative path, or an absolute path.
94      *
95      * If no debug info is present, this may be `null`.
96      */
97     const(char)[] file;
98 
99     /**
100      * Directory where `file` resides
101      *
102      * This may be `null`, either if there is no debug info,
103      * or if the compiler implementation doesn't use this feature (e.g. DMD).
104      */
105     const(char)[] directory;
106 
107     /**
108      * Line within the file that correspond to this `location`.
109      *
110      * Note that in addition to a positive value, the values `0` and `-1`
111      * are to be expected by consumers. A value of `0` means that the code
112      * is not attributable to a specific line in the file, e.g. module-specific
113      * generated code, and `-1` means that no debug info could be found.
114      */
115     int line = -1;
116 
117     /// Format this location into a human-readable string
118     void toString (scope void delegate(scope const char[]) sink) const
119     {
120         import core.demangle;
121 
122         // If there's no file information, there shouldn't be any directory
123         // information. If there is we will simply ignore it.
124         if (this.file.length)
125         {
126             // Note: Sink needs to handle empty data
127             sink(this.directory);
128             // Only POSIX path because this module is not used on Windows
129             if (this.directory.length && this.directory[$ - 1] != '/')
130                 sink("/");
131             sink(this.file);
132         }
133         else
134             // Most likely, no debug information
135             sink("??");
136 
137         // Also no debug infos
138         if (this.line < 0)
139             sink(":?");
140         // Line can be 0, e.g. if the frame is in generated code
141         else if (this.line)
142         {
143             sink(":");
144             sink(signedToTempString(this.line));
145         }
146 
147         char[1024] symbolBuffer = void;
148         // When execinfo style is used, procedure can be null if the format
149         // of the line cannot be read, but it generally should not happen
150         if (this.procedure.length)
151         {
152             sink(" ");
153             sink(demangle(this.procedure, symbolBuffer, getCXXDemangler()));
154         }
155 
156         sink(" [0x");
157         sink(unsignedToTempString!16(cast(size_t) this.address));
158         sink("]");
159     }
160 }
161 
162 int traceHandlerOpApplyImpl(size_t numFrames,
163                             scope const(void)* delegate(size_t) getNthAddress,
164                             scope const(char)[] delegate(size_t) getNthFuncName,
165                             scope int delegate(ref size_t, ref const(char[])) dg)
166 {
167     auto image = Image.openSelf();
168 
169     Array!Location locations;
170     locations.length = numFrames;
171     size_t startIdx;
172     foreach (idx; 0 .. numFrames)
173     {
174         locations[idx].address = getNthAddress(idx);
175         locations[idx].procedure = getNthFuncName(idx);
176 
177         // NOTE: The first few frames with the current implementation are
178         //       inside core.runtime and the object code, so eliminate
179         //       these for readability.
180         // They also might depend on build parameters, which would make
181         // using a fixed number of frames otherwise brittle.
182         version (LDC) enum BaseExceptionFunctionName = "_d_throw_exception";
183         else          enum BaseExceptionFunctionName = "_d_throwdwarf";
184         if (!startIdx && locations[idx].procedure == BaseExceptionFunctionName)
185             startIdx = idx + 1;
186     }
187 
188 
189     if (!image.isValid())
190         return locations[startIdx .. $].processCallstack(null, 0, dg);
191 
192     // find address -> file, line mapping using dwarf debug_line
193     return image.processDebugLineSectionData(
194         (line) => locations[startIdx .. $].processCallstack(line, image.baseAddress, dg));
195 }
196 
197 struct TraceInfoBuffer
198 {
199     private char[1536] buf = void;
200     private size_t position;
201 
202     // BUG: https://issues.dlang.org/show_bug.cgi?id=21285
203     @safe pure nothrow @nogc
204     {
205         ///
206         inout(char)[] opSlice() inout return
207         {
208             return this.buf[0 .. this.position > $ ? $ : this.position];
209         }
210 
211         ///
212         void reset()
213         {
214             this.position = 0;
215         }
216     }
217 
218     /// Used as `sink` argument to `Location.toString`
219     void put(scope const char[] data)
220     {
221         // We cannot write anymore
222         if (this.position > this.buf.length)
223             return;
224 
225         if (this.position + data.length > this.buf.length)
226         {
227             this.buf[this.position .. $] = data[0 .. this.buf.length - this.position];
228             this.buf[$ - 3 .. $] = "...";
229             // +1 is a marker for the '...', otherwise if the symbol
230             // name was to exactly fill the buffer,
231             // we'd discard anything else without printing the '...'.
232             this.position = this.buf.length + 1;
233             return;
234         }
235 
236         this.buf[this.position .. this.position + data.length] = data;
237         this.position += data.length;
238     }
239 }
240 
241 private:
242 
243 int processCallstack(Location[] locations, const(ubyte)[] debugLineSectionData,
244                      size_t baseAddress, scope int delegate(ref size_t, ref const(char[])) dg)
245 {
246     if (debugLineSectionData)
247         resolveAddresses(debugLineSectionData, locations, baseAddress);
248 
249     TraceInfoBuffer buffer;
250     foreach (idx, const ref loc; locations)
251     {
252         buffer.reset();
253         loc.toString(&buffer.put);
254 
255         auto lvalue = buffer[];
256         if (auto ret = dg(idx, lvalue))
257             return ret;
258 
259         if (loc.procedure == "_Dmain")
260             break;
261     }
262 
263     return 0;
264 }
265 
266 /**
267  * Resolve the addresses of `locations` using `debugLineSectionData`
268  *
269  * Runs the DWARF state machine on `debugLineSectionData`,
270  * assuming it represents a debugging program describing the addresses
271  * in a continous and increasing manner.
272  *
273  * After this function successfully completes, `locations` will contains
274  * file / lines informations.
275  *
276  * Note that the lifetime of the `Location` data is bound to the lifetime
277  * of `debugLineSectionData`.
278  *
279  * Params:
280  *   debugLineSectionData = A DWARF program to feed the state machine
281  *   locations = The locations to resolve
282  *   baseAddress = The offset to apply to every address
283  */
284 void resolveAddresses(const(ubyte)[] debugLineSectionData, Location[] locations, size_t baseAddress) @nogc nothrow
285 {
286     debug(DwarfDebugMachine) import core.stdc.stdio;
287 
288     size_t numberOfLocationsFound = 0;
289 
290     const(ubyte)[] dbg = debugLineSectionData;
291     while (dbg.length > 0)
292     {
293         debug(DwarfDebugMachine) printf("new debug program\n");
294         const lp = readLineNumberProgram(dbg);
295 
296         LocationInfo lastLoc = LocationInfo(-1, -1);
297         const(void)* lastAddress;
298 
299         debug(DwarfDebugMachine) printf("program:\n");
300         runStateMachine(lp,
301             (const(void)* address, LocationInfo locInfo, bool isEndSequence)
302             {
303                 // adjust to ASLR offset
304                 address += baseAddress;
305                 debug (DwarfDebugMachine)
306                     printf("-- offsetting %p to %p\n", address - baseAddress, address);
307 
308                 foreach (ref loc; locations)
309                 {
310                     // If loc.line != -1, then it has been set previously.
311                     // Some implementations (eg. dmd) write an address to
312                     // the debug data multiple times, but so far I have found
313                     // that the first occurrence to be the correct one.
314                     if (loc.line != -1)
315                         continue;
316 
317                     // Can be called with either `locInfo` or `lastLoc`
318                     void update(const ref LocationInfo match)
319                     {
320                         // File indices are 1-based for DWARF < 5
321                         const fileIndex = match.file - (lp.dwarfVersion < 5 ? 1 : 0);
322                         const sourceFile = lp.sourceFiles[fileIndex];
323                         debug (DwarfDebugMachine)
324                         {
325                             printf("-- found for [%p]:\n", loc.address);
326                             printf("--   file: %.*s\n",
327                                    cast(int) sourceFile.file.length, sourceFile.file.ptr);
328                             printf("--   line: %d\n", match.line);
329                         }
330                         // DMD emits entries with FQN, but other implementations
331                         // (e.g. LDC) make use of directories
332                         // See https://github.com/dlang/druntime/pull/2945
333                         if (sourceFile.dirIndex != 0)
334                             loc.directory = lp.includeDirectories[sourceFile.dirIndex - 1];
335 
336                         loc.file = sourceFile.file;
337                         loc.line = match.line;
338                         numberOfLocationsFound++;
339                     }
340 
341                     // The state machine will not contain an entry for each
342                     // address, as consecutive addresses with the same file/line
343                     // are merged together to save on space, so we need to
344                     // check if our address is within two addresses we get
345                     // called with.
346                     //
347                     // Specs (DWARF v4, Section 6.2, PDF p.109) says:
348                     // "We shrink it with two techniques. First, we delete from
349                     // the matrix each row whose file, line, source column and
350                     // discriminator information is identical with that of its
351                     // predecessors.
352                     if (loc.address == address)
353                         update(locInfo);
354                     else if (lastAddress &&
355                              loc.address > lastAddress && loc.address < address)
356                         update(lastLoc);
357                 }
358 
359                 if (isEndSequence)
360                 {
361                     lastAddress = null;
362                 }
363                 else
364                 {
365                     lastAddress = address;
366                     lastLoc = locInfo;
367                 }
368 
369                 return numberOfLocationsFound < locations.length;
370             }
371         );
372 
373         if (numberOfLocationsFound == locations.length) return;
374     }
375 }
376 
377 /**
378  * A callback type for `runStateMachine`
379  *
380  * The callback is called when certain specific opcode are encountered
381  * (a.k.a when a complete `LocationInfo` is encountered).
382  * See `runStateMachine` implementation and the DWARF specs for more detail.
383  *
384  * Params:
385  *   address = The address that the `LocationInfo` describes
386  *   info = The `LocationInfo` itself, describing `address`
387  *   isEndSequence = Whether the end of a sequence has been reached
388  */
389 alias RunStateMachineCallback =
390     bool delegate(const(void)* address, LocationInfo info, bool isEndSequence)
391     @nogc nothrow;
392 
393 /**
394  * Run the state machine to generate line number matrix
395  *
396  * Line number informations generated by the compiler are stored in the
397  * `.debug_line` section. Conceptually, they can be seen as a large matrix,
398  * with row such as "file", "line", "column", "is_statement", etc...
399  * However such a matrix would be too big to store in an object file,
400  * so DWARF instead generate this matrix using bytecode fed to a state machine.
401  *
402  * Note:
403  * Each compilation unit can have its own line number program.
404  *
405  * See_Also:
406  * - DWARF v4, Section 6.2: Line Number Information
407  *
408  * Params:
409  *   lp = Program to execute
410  *   callback = Delegate to call whenever a LocationInfo is completed
411  *
412  * Returns:
413  *   `false` if an error happened (e.g. unknown opcode)
414  */
415 bool runStateMachine(ref const(LineNumberProgram) lp, scope RunStateMachineCallback callback) @nogc nothrow
416 {
417     StateMachine machine;
418     machine.isStatement = lp.defaultIsStatement;
419 
420     const(ubyte)[] program = lp.program;
421     while (program.length > 0)
422     {
423         size_t advanceAddressAndOpIndex(size_t operationAdvance)
424         {
425             const addressIncrement = lp.minimumInstructionLength * ((machine.operationIndex + operationAdvance) / lp.maximumOperationsPerInstruction);
426             machine.address += addressIncrement;
427             machine.operationIndex = (machine.operationIndex + operationAdvance) % lp.maximumOperationsPerInstruction;
428             return addressIncrement;
429         }
430 
431         ubyte opcode = program.read!ubyte();
432         if (opcode < lp.opcodeBase)
433         {
434             switch (opcode) with (StandardOpcode)
435             {
436                 case extendedOp:
437                     size_t len = cast(size_t) program.readULEB128();
438                     ubyte eopcode = program.read!ubyte();
439 
440                     switch (eopcode) with (ExtendedOpcode)
441                     {
442                         case endSequence:
443                             machine.isEndSequence = true;
444                             debug(DwarfDebugMachine) printf("endSequence %p\n", machine.address);
445                             if (!callback(machine.address, LocationInfo(machine.fileIndex, machine.line), true)) return true;
446                             machine = StateMachine.init;
447                             machine.isStatement = lp.defaultIsStatement;
448                             break;
449 
450                         case setAddress:
451                             const address = program.read!(void*)();
452                             debug(DwarfDebugMachine) printf("setAddress %p\n", address);
453                             machine.address = address;
454                             machine.operationIndex = 0;
455                             break;
456 
457                         case defineFile: // TODO: add proper implementation
458                             debug(DwarfDebugMachine) printf("defineFile\n");
459                             program = program[len - 1 .. $];
460                             break;
461 
462                         case setDiscriminator:
463                             const discriminator = cast(uint) program.readULEB128();
464                             debug(DwarfDebugMachine) printf("setDiscriminator %d\n", discriminator);
465                             machine.discriminator = discriminator;
466                             break;
467 
468                         default:
469                             // unknown opcode
470                             debug(DwarfDebugMachine) printf("unknown extended opcode %d\n", cast(int) eopcode);
471                             program = program[len - 1 .. $];
472                             break;
473                     }
474 
475                     break;
476 
477                 case copy:
478                     debug(DwarfDebugMachine) printf("copy %p\n", machine.address);
479                     if (!callback(machine.address, LocationInfo(machine.fileIndex, machine.line), false)) return true;
480                     machine.isBasicBlock = false;
481                     machine.isPrologueEnd = false;
482                     machine.isEpilogueBegin = false;
483                     machine.discriminator = 0;
484                     break;
485 
486                 case advancePC:
487                     const operationAdvance = cast(size_t) readULEB128(program);
488                     advanceAddressAndOpIndex(operationAdvance);
489                     debug(DwarfDebugMachine) printf("advancePC %d to %p\n", cast(int) operationAdvance, machine.address);
490                     break;
491 
492                 case advanceLine:
493                     long ad = readSLEB128(program);
494                     machine.line += ad;
495                     debug(DwarfDebugMachine) printf("advanceLine %d to %d\n", cast(int) ad, cast(int) machine.line);
496                     break;
497 
498                 case setFile:
499                     uint index = cast(uint) readULEB128(program);
500                     debug(DwarfDebugMachine) printf("setFile to %d\n", cast(int) index);
501                     machine.fileIndex = index;
502                     break;
503 
504                 case setColumn:
505                     uint col = cast(uint) readULEB128(program);
506                     debug(DwarfDebugMachine) printf("setColumn %d\n", cast(int) col);
507                     machine.column = col;
508                     break;
509 
510                 case negateStatement:
511                     debug(DwarfDebugMachine) printf("negateStatement\n");
512                     machine.isStatement = !machine.isStatement;
513                     break;
514 
515                 case setBasicBlock:
516                     debug(DwarfDebugMachine) printf("setBasicBlock\n");
517                     machine.isBasicBlock = true;
518                     break;
519 
520                 case constAddPC:
521                     const operationAdvance = (255 - lp.opcodeBase) / lp.lineRange;
522                     advanceAddressAndOpIndex(operationAdvance);
523                     debug(DwarfDebugMachine) printf("constAddPC %p\n", machine.address);
524                     break;
525 
526                 case fixedAdvancePC:
527                     const add = program.read!ushort();
528                     machine.address += add;
529                     machine.operationIndex = 0;
530                     debug(DwarfDebugMachine) printf("fixedAdvancePC %d to %p\n", cast(int) add, machine.address);
531                     break;
532 
533                 case setPrologueEnd:
534                     machine.isPrologueEnd = true;
535                     debug(DwarfDebugMachine) printf("setPrologueEnd\n");
536                     break;
537 
538                 case setEpilogueBegin:
539                     machine.isEpilogueBegin = true;
540                     debug(DwarfDebugMachine) printf("setEpilogueBegin\n");
541                     break;
542 
543                 case setISA:
544                     machine.isa = cast(uint) readULEB128(program);
545                     debug(DwarfDebugMachine) printf("setISA %d\n", cast(int) machine.isa);
546                     break;
547 
548                 default:
549                     debug(DwarfDebugMachine) printf("unknown opcode %d\n", cast(int) opcode);
550                     return false;
551             }
552         }
553         else
554         {
555             opcode -= lp.opcodeBase;
556             const operationAdvance = opcode / lp.lineRange;
557             const addressIncrement = advanceAddressAndOpIndex(operationAdvance);
558             const lineIncrement = lp.lineBase + (opcode % lp.lineRange);
559             machine.line += lineIncrement;
560 
561             debug (DwarfDebugMachine)
562                 printf("special %d %d to %p line %d\n", cast(int) addressIncrement,
563                        cast(int) lineIncrement, machine.address, machine.line);
564 
565             if (!callback(machine.address, LocationInfo(machine.fileIndex, machine.line), false)) return true;
566 
567             machine.isBasicBlock = false;
568             machine.isPrologueEnd = false;
569             machine.isEpilogueBegin = false;
570             machine.discriminator = 0;
571         }
572     }
573 
574     return true;
575 }
576 
577 T read(T)(ref const(ubyte)[] buffer) @nogc nothrow
578 {
579     version (X86)         enum hasUnalignedLoads = true;
580     else version (X86_64) enum hasUnalignedLoads = true;
581     else                  enum hasUnalignedLoads = false;
582 
583     static if (hasUnalignedLoads || T.alignof == 1)
584     {
585         T result = *(cast(T*) buffer.ptr);
586     }
587     else
588     {
589         T result = void;
590         memcpy(&result, buffer.ptr, T.sizeof);
591     }
592 
593     buffer = buffer[T.sizeof .. $];
594     return result;
595 }
596 
597 // Reads a null-terminated string from `buffer`.
598 const(char)[] readStringz(ref const(ubyte)[] buffer) @nogc nothrow
599 {
600     const p = cast(char*) buffer.ptr;
601     const str = p[0 .. strlen(p)];
602     buffer = buffer[str.length+1 .. $];
603     return str;
604 }
605 
606 ulong readULEB128(ref const(ubyte)[] buffer) @nogc nothrow
607 {
608     ulong val = 0;
609     uint shift = 0;
610 
611     while (true)
612     {
613         ubyte b = buffer.read!ubyte();
614 
615         val |= (b & 0x7f) << shift;
616         if ((b & 0x80) == 0) break;
617         shift += 7;
618     }
619 
620     return val;
621 }
622 
623 unittest
624 {
625     const(ubyte)[] data = [0xe5, 0x8e, 0x26, 0xDE, 0xAD, 0xBE, 0xEF];
626     assert(readULEB128(data) == 624_485);
627     assert(data[] == [0xDE, 0xAD, 0xBE, 0xEF]);
628 }
629 
630 long readSLEB128(ref const(ubyte)[] buffer) @nogc nothrow
631 {
632     long val = 0;
633     uint shift = 0;
634     int size = 8 << 3;
635     ubyte b;
636 
637     while (true)
638     {
639         b = buffer.read!ubyte();
640         val |= (b & 0x7f) << shift;
641         shift += 7;
642         if ((b & 0x80) == 0)
643             break;
644     }
645 
646     if (shift < size && (b & 0x40) != 0)
647         val |= -(1 << shift);
648 
649     return val;
650 }
651 
652 enum DW_LNCT : ushort
653 {
654     path = 1,
655     directoryIndex = 2,
656     timestamp = 3,
657     size = 4,
658     md5 = 5,
659     loUser = 0x2000,
660     hiUser = 0x3fff,
661 }
662 
663 enum DW_FORM : ubyte
664 {
665     addr = 1,
666     block2 = 3,
667     block4 = 4,
668     data2 = 5,
669     data4 = 6,
670     data8 = 7,
671     string_ = 8,
672     block = 9,
673     block1 = 10,
674     data1 = 11,
675     flag = 12,
676     sdata = 13,
677     strp = 14,
678     udata = 15,
679     ref_addr = 16,
680     ref1 = 17,
681     ref2 = 18,
682     ref4 = 19,
683     ref8 = 20,
684     ref_udata = 21,
685     indirect = 22,
686     sec_offset = 23,
687     exprloc = 24,
688     flag_present = 25,
689     strx = 26,
690     addrx = 27,
691     ref_sup4 = 28,
692     strp_sup = 29,
693     data16 = 30,
694     line_strp = 31,
695     ref_sig8 = 32,
696     implicit_const = 33,
697     loclistx = 34,
698     rnglistx = 35,
699     ref_sup8 = 36,
700     strx1 = 37,
701     strx2 = 38,
702     strx3 = 39,
703     strx4 = 40,
704     addrx1 = 41,
705     addrx2 = 42,
706     addrx3 = 43,
707     addrx4 = 44,
708 }
709 
710 struct EntryFormatPair
711 {
712     DW_LNCT type;
713     DW_FORM form;
714 }
715 
716 /// Reads a DWARF v5 directory/file name entry format.
717 Array!EntryFormatPair readEntryFormat(ref const(ubyte)[] buffer) @nogc nothrow
718 {
719     const numPairs = buffer.read!ubyte();
720 
721     Array!EntryFormatPair pairs;
722     pairs.length = numPairs;
723 
724     foreach (ref pair; pairs)
725     {
726         pair.type = cast(DW_LNCT) buffer.readULEB128();
727         pair.form = cast(DW_FORM) buffer.readULEB128();
728     }
729 
730     debug (DwarfDebugMachine)
731     {
732         printf("entryFormat: (%d)\n", cast(int) pairs.length);
733         foreach (ref pair; pairs)
734             printf("\t- type: %d, form: %d\n", cast(int) pair.type, cast(int) pair.form);
735     }
736 
737     return pairs;
738 }
739 
740 enum StandardOpcode : ubyte
741 {
742     extendedOp = 0,
743     copy = 1,
744     advancePC = 2,
745     advanceLine = 3,
746     setFile = 4,
747     setColumn = 5,
748     negateStatement = 6,
749     setBasicBlock = 7,
750     constAddPC = 8,
751     fixedAdvancePC = 9,
752     setPrologueEnd = 10,
753     setEpilogueBegin = 11,
754     setISA = 12,
755 }
756 
757 enum ExtendedOpcode : ubyte
758 {
759     endSequence = 1,
760     setAddress = 2,
761     defineFile = 3,
762     setDiscriminator = 4,
763 }
764 
765 struct StateMachine
766 {
767     const(void)* address;
768     uint operationIndex = 0;
769     uint fileIndex = 1;
770     uint line = 1;
771     uint column = 0;
772     uint isa = 0;
773     uint discriminator = 0;
774     bool isStatement;
775     bool isBasicBlock = false;
776     bool isEndSequence = false;
777     bool isPrologueEnd = false;
778     bool isEpilogueBegin = false;
779 }
780 
781 struct LocationInfo
782 {
783     int file;
784     int line;
785 }
786 
787 struct LineNumberProgram
788 {
789     ulong unitLength;
790     ushort dwarfVersion;
791     ubyte addressSize;
792     ubyte segmentSelectorSize;
793     ulong headerLength;
794     ubyte minimumInstructionLength;
795     ubyte maximumOperationsPerInstruction;
796     bool defaultIsStatement;
797     byte lineBase;
798     ubyte lineRange;
799     ubyte opcodeBase;
800     const(ubyte)[] standardOpcodeLengths;
801     Array!(const(char)[]) includeDirectories;
802     Array!SourceFile sourceFiles;
803     const(ubyte)[] program;
804 }
805 
806 struct SourceFile
807 {
808     const(char)[] file;
809     size_t dirIndex; // 1-based
810 }
811 
812 LineNumberProgram readLineNumberProgram(ref const(ubyte)[] data) @nogc nothrow
813 {
814     const originalData = data;
815 
816     LineNumberProgram lp;
817 
818     bool is64bitDwarf = false;
819     lp.unitLength = data.read!uint();
820     if (lp.unitLength == uint.max)
821     {
822         is64bitDwarf = true;
823         lp.unitLength = data.read!ulong();
824     }
825 
826     const dwarfVersionFieldOffset = cast(size_t) (data.ptr - originalData.ptr);
827     lp.dwarfVersion = data.read!ushort();
828     assert(lp.dwarfVersion < 6, "DWARF v6+ not supported yet");
829 
830     if (lp.dwarfVersion >= 5)
831     {
832         lp.addressSize = data.read!ubyte();
833         lp.segmentSelectorSize = data.read!ubyte();
834     }
835 
836     lp.headerLength = (is64bitDwarf ? data.read!ulong() : data.read!uint());
837 
838     const minimumInstructionLengthFieldOffset = cast(size_t) (data.ptr - originalData.ptr);
839     lp.minimumInstructionLength = data.read!ubyte();
840 
841     lp.maximumOperationsPerInstruction = (lp.dwarfVersion >= 4 ? data.read!ubyte() : 1);
842     lp.defaultIsStatement = (data.read!ubyte() != 0);
843     lp.lineBase = data.read!byte();
844     lp.lineRange = data.read!ubyte();
845     lp.opcodeBase = data.read!ubyte();
846 
847     lp.standardOpcodeLengths = data[0 .. lp.opcodeBase - 1];
848     data = data[lp.opcodeBase - 1 .. $];
849 
850     if (lp.dwarfVersion >= 5)
851     {
852         static void consumeGenericForm(ref const(ubyte)[] data, DW_FORM form, bool is64bitDwarf)
853         {
854             with (DW_FORM) switch (form)
855             {
856                 case strp, strp_sup, line_strp:
857                     data = data[is64bitDwarf ? 8 : 4 .. $]; break;
858                 case data1, strx1:
859                     data = data[1 .. $]; break;
860                 case data2, strx2:
861                     data = data[2 .. $]; break;
862                 case strx3:
863                     data = data[3 .. $]; break;
864                 case data4, strx4:
865                     data = data[4 .. $]; break;
866                 case data8:
867                     data = data[8 .. $]; break;
868                 case data16:
869                     data = data[16 .. $]; break;
870                 case udata, strx:
871                     data.readULEB128(); break;
872                 case block:
873                     const length = cast(size_t) data.readULEB128();
874                     data = data[length .. $];
875                     break;
876                 default:
877                     assert(0); // TODO: support other forms for vendor extensions
878             }
879         }
880 
881         const dirFormat = data.readEntryFormat();
882         lp.includeDirectories.length = cast(size_t) data.readULEB128();
883         foreach (ref dir; lp.includeDirectories)
884         {
885             dir = "<unknown dir>"; // fallback
886             foreach (ref pair; dirFormat)
887             {
888                 if (pair.type == DW_LNCT.path &&
889                     // TODO: support other forms too (offsets in other sections)
890                     pair.form == DW_FORM.string_)
891                 {
892                     dir = data.readStringz();
893                 }
894                 else // uninteresting type
895                     consumeGenericForm(data, pair.form, is64bitDwarf);
896             }
897         }
898 
899         const fileFormat = data.readEntryFormat();
900         lp.sourceFiles.length = cast(size_t) data.readULEB128();
901         foreach (ref sf; lp.sourceFiles)
902         {
903             sf.file = "<unknown file>"; // fallback
904             foreach (ref pair; fileFormat)
905             {
906                 if (pair.type == DW_LNCT.path &&
907                     // TODO: support other forms too (offsets in other sections)
908                     pair.form == DW_FORM.string_)
909                 {
910                     sf.file = data.readStringz();
911                 }
912                 else if (pair.type == DW_LNCT.directoryIndex)
913                 {
914                     if (pair.form == DW_FORM.data1)
915                         sf.dirIndex = data.read!ubyte();
916                     else if (pair.form == DW_FORM.data2)
917                         sf.dirIndex = data.read!ushort();
918                     else if (pair.form == DW_FORM.udata)
919                         sf.dirIndex = cast(size_t) data.readULEB128();
920                     else
921                         assert(0); // not allowed by DWARF 5 spec
922                     sf.dirIndex++; // DWARF v5 indices are 0-based
923                 }
924                 else // uninteresting type
925                     consumeGenericForm(data, pair.form, is64bitDwarf);
926             }
927         }
928     }
929     else
930     {
931         // A sequence ends with a null-byte.
932         static auto readSequence(alias ReadEntry)(ref const(ubyte)[] data)
933         {
934             alias ResultType = typeof(ReadEntry(data));
935 
936             static size_t count(const(ubyte)[] data)
937             {
938                 size_t count = 0;
939                 while (data.length && data[0] != 0)
940                 {
941                     ReadEntry(data);
942                     ++count;
943                 }
944                 return count;
945             }
946 
947             const numEntries = count(data);
948 
949             Array!ResultType result;
950             result.length = numEntries;
951 
952             foreach (i; 0 .. numEntries)
953                 result[i] = ReadEntry(data);
954 
955             data = data[1 .. $]; // skip over sequence-terminating null
956 
957             return result;
958         }
959 
960         /// Directories are simply a sequence of NUL-terminated strings
961         static const(char)[] readIncludeDirectoryEntry(ref const(ubyte)[] data)
962         {
963             return data.readStringz();
964         }
965         lp.includeDirectories = readSequence!readIncludeDirectoryEntry(data);
966 
967         static SourceFile readFileNameEntry(ref const(ubyte)[] data)
968         {
969             const file = data.readStringz();
970             const dirIndex = cast(size_t) data.readULEB128();
971             data.readULEB128(); // last mod
972             data.readULEB128(); // file len
973 
974             return SourceFile(
975                 file,
976                 dirIndex,
977             );
978         }
979         lp.sourceFiles = readSequence!readFileNameEntry(data);
980     }
981 
982     debug (DwarfDebugMachine)
983     {
984         printf("include_directories: (%d)\n", cast(int) lp.includeDirectories.length);
985         foreach (dir; lp.includeDirectories)
986             printf("\t- %.*s\n", cast(int) dir.length, dir.ptr);
987         printf("source_files: (%d)\n", cast(int) lp.sourceFiles.length);
988         foreach (ref sf; lp.sourceFiles)
989         {
990             if (sf.dirIndex > lp.includeDirectories.length)
991                 printf("\t- Out of bound directory! (%llu): %.*s\n",
992                        sf.dirIndex, cast(int) sf.file.length, sf.file.ptr);
993             else if (sf.dirIndex > 0)
994             {
995                 const dir = lp.includeDirectories[sf.dirIndex - 1];
996                 printf("\t- (Dir:%llu:%.*s/)%.*s\n", sf.dirIndex,
997                        cast(int) dir.length, dir.ptr,
998                        cast(int) sf.file.length, sf.file.ptr);
999             }
1000             else
1001                 printf("\t- %.*s\n", cast(int) sf.file.length, sf.file.ptr);
1002         }
1003     }
1004 
1005     const programStart = cast(size_t) (minimumInstructionLengthFieldOffset + lp.headerLength);
1006     const programEnd = cast(size_t) (dwarfVersionFieldOffset + lp.unitLength);
1007     lp.program = originalData[programStart .. programEnd];
1008 
1009     data = originalData[programEnd .. $];
1010 
1011     return lp;
1012 }