1 /** 2 * Generates a human-readable stack-trace on POSIX targets using DWARF 3 * 4 * The common use case for printing a stack trace is when `toString` is called 5 * on a `Throwable` (see `object.d`). It will iterate on what is likely to be 6 * the default trace handler (see `core.runtime : defaultTraceHandler`). 7 * The class returned by `defaultTraceHandler` is what ends up calling into 8 * this module, through the use of `core.internal.traits : externDFunc`. 9 * 10 * The entry point of this module is `traceHandlerOpApplyImpl`, 11 * and the only really "public" symbol (since all `rt` symbols are private). 12 * In the future, this implementation should probably be made idiomatic, 13 * so that it can for example work with attributes. 14 * 15 * Resilience: 16 * As this module is used for diagnostic, it should handle failures 17 * as gracefully as possible. Having the runtime error out on printing 18 * the stack trace one is trying to debug would be quite a terrible UX. 19 * For this reason, this module works on a "best effort" basis and will 20 * sometimes print mangled symbols, or "???" when it cannot do anything 21 * more useful. 22 * 23 * Source_of_data: 24 * This module uses two main sources for generating human-readable data. 25 * First, it uses `backtrace_symbols` to obtain the name of the symbols 26 * (functions or methods) associated with the addresses. 27 * Since the names are mangled, it will also call into `core.demangle`, 28 * and doesn't need to use any DWARF information for this, 29 * however a future extension could make use of the call frame information 30 * (See DWARF4 "6.4 Call Frame Information", PDF page 126). 31 * 32 * The other piece of data used is the DWARF `.debug_line` section, 33 * which contains the line informations of a program, necessary to associate 34 * the instruction address with its (file, line) information. 35 * 36 * Since debug lines informations are quite large, they are encoded using a 37 * program that is to be fed to a finite state machine. 38 * See `runStateMachine` and `readLineNumberProgram` for more details. 39 * 40 * DWARF_Version: 41 * This module only supports DWARF 3, 4 and 5. 42 * 43 * Reference: http://www.dwarfstd.org/ 44 * Copyright: Copyright Digital Mars 2015 - 2015. 45 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 46 * Authors: Yazan Dabain, Sean Kelly 47 * Source: $(DRUNTIMESRC rt/backtrace/dwarf.d) 48 */ 49 50 module core.internal.backtrace.dwarf; 51 52 import core.internal.execinfo; 53 import core.internal.string; 54 55 version (Posix): 56 57 version (OSX) 58 version = Darwin; 59 else version (iOS) 60 version = Darwin; 61 else version (TVOS) 62 version = Darwin; 63 else version (WatchOS) 64 version = Darwin; 65 66 version (Darwin) 67 import core.internal.backtrace.macho; 68 else 69 import core.internal.backtrace.elf; 70 71 import core.internal.container.array; 72 import core.stdc.string : strlen, memcpy; 73 74 //debug = DwarfDebugMachine; 75 debug(DwarfDebugMachine) import core.stdc.stdio : printf; 76 77 struct Location 78 { 79 /** 80 * Address of the instruction for which this location is for. 81 */ 82 const(void)* address; 83 84 /** 85 * The name of the procedure, or function, this address is in. 86 */ 87 const(char)[] procedure; 88 89 /** 90 * Path to the file this location references, relative to `directory` 91 * 92 * Note that depending on implementation, this could be just a name, 93 * a relative path, or an absolute path. 94 * 95 * If no debug info is present, this may be `null`. 96 */ 97 const(char)[] file; 98 99 /** 100 * Directory where `file` resides 101 * 102 * This may be `null`, either if there is no debug info, 103 * or if the compiler implementation doesn't use this feature (e.g. DMD). 104 */ 105 const(char)[] directory; 106 107 /** 108 * Line within the file that correspond to this `location`. 109 * 110 * Note that in addition to a positive value, the values `0` and `-1` 111 * are to be expected by consumers. A value of `0` means that the code 112 * is not attributable to a specific line in the file, e.g. module-specific 113 * generated code, and `-1` means that no debug info could be found. 114 */ 115 int line = -1; 116 117 /// Format this location into a human-readable string 118 void toString (scope void delegate(scope const char[]) sink) const 119 { 120 import core.demangle; 121 122 // If there's no file information, there shouldn't be any directory 123 // information. If there is we will simply ignore it. 124 if (this.file.length) 125 { 126 // Note: Sink needs to handle empty data 127 sink(this.directory); 128 // Only POSIX path because this module is not used on Windows 129 if (this.directory.length && this.directory[$ - 1] != '/') 130 sink("/"); 131 sink(this.file); 132 } 133 else 134 // Most likely, no debug information 135 sink("??"); 136 137 // Also no debug infos 138 if (this.line < 0) 139 sink(":?"); 140 // Line can be 0, e.g. if the frame is in generated code 141 else if (this.line) 142 { 143 sink(":"); 144 sink(signedToTempString(this.line)); 145 } 146 147 char[1024] symbolBuffer = void; 148 // When execinfo style is used, procedure can be null if the format 149 // of the line cannot be read, but it generally should not happen 150 if (this.procedure.length) 151 { 152 sink(" "); 153 sink(demangle(this.procedure, symbolBuffer, getCXXDemangler())); 154 } 155 156 sink(" [0x"); 157 sink(unsignedToTempString!16(cast(size_t) this.address)); 158 sink("]"); 159 } 160 } 161 162 int traceHandlerOpApplyImpl(size_t numFrames, 163 scope const(void)* delegate(size_t) getNthAddress, 164 scope const(char)[] delegate(size_t) getNthFuncName, 165 scope int delegate(ref size_t, ref const(char[])) dg) 166 { 167 auto image = Image.openSelf(); 168 169 Array!Location locations; 170 locations.length = numFrames; 171 size_t startIdx; 172 foreach (idx; 0 .. numFrames) 173 { 174 locations[idx].address = getNthAddress(idx); 175 locations[idx].procedure = getNthFuncName(idx); 176 177 // NOTE: The first few frames with the current implementation are 178 // inside core.runtime and the object code, so eliminate 179 // these for readability. 180 // They also might depend on build parameters, which would make 181 // using a fixed number of frames otherwise brittle. 182 version (LDC) enum BaseExceptionFunctionName = "_d_throw_exception"; 183 else enum BaseExceptionFunctionName = "_d_throwdwarf"; 184 if (!startIdx && locations[idx].procedure == BaseExceptionFunctionName) 185 startIdx = idx + 1; 186 } 187 188 189 if (!image.isValid()) 190 return locations[startIdx .. $].processCallstack(null, 0, dg); 191 192 // find address -> file, line mapping using dwarf debug_line 193 return image.processDebugLineSectionData( 194 (line) => locations[startIdx .. $].processCallstack(line, image.baseAddress, dg)); 195 } 196 197 struct TraceInfoBuffer 198 { 199 private char[1536] buf = void; 200 private size_t position; 201 202 // BUG: https://issues.dlang.org/show_bug.cgi?id=21285 203 @safe pure nothrow @nogc 204 { 205 /// 206 inout(char)[] opSlice() inout return 207 { 208 return this.buf[0 .. this.position > $ ? $ : this.position]; 209 } 210 211 /// 212 void reset() 213 { 214 this.position = 0; 215 } 216 } 217 218 /// Used as `sink` argument to `Location.toString` 219 void put(scope const char[] data) 220 { 221 // We cannot write anymore 222 if (this.position > this.buf.length) 223 return; 224 225 if (this.position + data.length > this.buf.length) 226 { 227 this.buf[this.position .. $] = data[0 .. this.buf.length - this.position]; 228 this.buf[$ - 3 .. $] = "..."; 229 // +1 is a marker for the '...', otherwise if the symbol 230 // name was to exactly fill the buffer, 231 // we'd discard anything else without printing the '...'. 232 this.position = this.buf.length + 1; 233 return; 234 } 235 236 this.buf[this.position .. this.position + data.length] = data; 237 this.position += data.length; 238 } 239 } 240 241 private: 242 243 int processCallstack(Location[] locations, const(ubyte)[] debugLineSectionData, 244 size_t baseAddress, scope int delegate(ref size_t, ref const(char[])) dg) 245 { 246 if (debugLineSectionData) 247 resolveAddresses(debugLineSectionData, locations, baseAddress); 248 249 TraceInfoBuffer buffer; 250 foreach (idx, const ref loc; locations) 251 { 252 buffer.reset(); 253 loc.toString(&buffer.put); 254 255 auto lvalue = buffer[]; 256 if (auto ret = dg(idx, lvalue)) 257 return ret; 258 259 if (loc.procedure == "_Dmain") 260 break; 261 } 262 263 return 0; 264 } 265 266 /** 267 * Resolve the addresses of `locations` using `debugLineSectionData` 268 * 269 * Runs the DWARF state machine on `debugLineSectionData`, 270 * assuming it represents a debugging program describing the addresses 271 * in a continous and increasing manner. 272 * 273 * After this function successfully completes, `locations` will contains 274 * file / lines informations. 275 * 276 * Note that the lifetime of the `Location` data is bound to the lifetime 277 * of `debugLineSectionData`. 278 * 279 * Params: 280 * debugLineSectionData = A DWARF program to feed the state machine 281 * locations = The locations to resolve 282 * baseAddress = The offset to apply to every address 283 */ 284 void resolveAddresses(const(ubyte)[] debugLineSectionData, Location[] locations, size_t baseAddress) @nogc nothrow 285 { 286 debug(DwarfDebugMachine) import core.stdc.stdio; 287 288 size_t numberOfLocationsFound = 0; 289 290 const(ubyte)[] dbg = debugLineSectionData; 291 while (dbg.length > 0) 292 { 293 debug(DwarfDebugMachine) printf("new debug program\n"); 294 const lp = readLineNumberProgram(dbg); 295 296 LocationInfo lastLoc = LocationInfo(-1, -1); 297 const(void)* lastAddress; 298 299 debug(DwarfDebugMachine) printf("program:\n"); 300 runStateMachine(lp, 301 (const(void)* address, LocationInfo locInfo, bool isEndSequence) 302 { 303 // adjust to ASLR offset 304 address += baseAddress; 305 debug (DwarfDebugMachine) 306 printf("-- offsetting %p to %p\n", address - baseAddress, address); 307 308 foreach (ref loc; locations) 309 { 310 // If loc.line != -1, then it has been set previously. 311 // Some implementations (eg. dmd) write an address to 312 // the debug data multiple times, but so far I have found 313 // that the first occurrence to be the correct one. 314 if (loc.line != -1) 315 continue; 316 317 // Can be called with either `locInfo` or `lastLoc` 318 void update(const ref LocationInfo match) 319 { 320 // File indices are 1-based for DWARF < 5 321 const fileIndex = match.file - (lp.dwarfVersion < 5 ? 1 : 0); 322 const sourceFile = lp.sourceFiles[fileIndex]; 323 debug (DwarfDebugMachine) 324 { 325 printf("-- found for [%p]:\n", loc.address); 326 printf("-- file: %.*s\n", 327 cast(int) sourceFile.file.length, sourceFile.file.ptr); 328 printf("-- line: %d\n", match.line); 329 } 330 // DMD emits entries with FQN, but other implementations 331 // (e.g. LDC) make use of directories 332 // See https://github.com/dlang/druntime/pull/2945 333 if (sourceFile.dirIndex != 0) 334 loc.directory = lp.includeDirectories[sourceFile.dirIndex - 1]; 335 336 loc.file = sourceFile.file; 337 loc.line = match.line; 338 numberOfLocationsFound++; 339 } 340 341 // The state machine will not contain an entry for each 342 // address, as consecutive addresses with the same file/line 343 // are merged together to save on space, so we need to 344 // check if our address is within two addresses we get 345 // called with. 346 // 347 // Specs (DWARF v4, Section 6.2, PDF p.109) says: 348 // "We shrink it with two techniques. First, we delete from 349 // the matrix each row whose file, line, source column and 350 // discriminator information is identical with that of its 351 // predecessors. 352 if (loc.address == address) 353 update(locInfo); 354 else if (lastAddress && 355 loc.address > lastAddress && loc.address < address) 356 update(lastLoc); 357 } 358 359 if (isEndSequence) 360 { 361 lastAddress = null; 362 } 363 else 364 { 365 lastAddress = address; 366 lastLoc = locInfo; 367 } 368 369 return numberOfLocationsFound < locations.length; 370 } 371 ); 372 373 if (numberOfLocationsFound == locations.length) return; 374 } 375 } 376 377 /** 378 * A callback type for `runStateMachine` 379 * 380 * The callback is called when certain specific opcode are encountered 381 * (a.k.a when a complete `LocationInfo` is encountered). 382 * See `runStateMachine` implementation and the DWARF specs for more detail. 383 * 384 * Params: 385 * address = The address that the `LocationInfo` describes 386 * info = The `LocationInfo` itself, describing `address` 387 * isEndSequence = Whether the end of a sequence has been reached 388 */ 389 alias RunStateMachineCallback = 390 bool delegate(const(void)* address, LocationInfo info, bool isEndSequence) 391 @nogc nothrow; 392 393 /** 394 * Run the state machine to generate line number matrix 395 * 396 * Line number informations generated by the compiler are stored in the 397 * `.debug_line` section. Conceptually, they can be seen as a large matrix, 398 * with row such as "file", "line", "column", "is_statement", etc... 399 * However such a matrix would be too big to store in an object file, 400 * so DWARF instead generate this matrix using bytecode fed to a state machine. 401 * 402 * Note: 403 * Each compilation unit can have its own line number program. 404 * 405 * See_Also: 406 * - DWARF v4, Section 6.2: Line Number Information 407 * 408 * Params: 409 * lp = Program to execute 410 * callback = Delegate to call whenever a LocationInfo is completed 411 * 412 * Returns: 413 * `false` if an error happened (e.g. unknown opcode) 414 */ 415 bool runStateMachine(ref const(LineNumberProgram) lp, scope RunStateMachineCallback callback) @nogc nothrow 416 { 417 StateMachine machine; 418 machine.isStatement = lp.defaultIsStatement; 419 420 const(ubyte)[] program = lp.program; 421 while (program.length > 0) 422 { 423 size_t advanceAddressAndOpIndex(size_t operationAdvance) 424 { 425 const addressIncrement = lp.minimumInstructionLength * ((machine.operationIndex + operationAdvance) / lp.maximumOperationsPerInstruction); 426 machine.address += addressIncrement; 427 machine.operationIndex = (machine.operationIndex + operationAdvance) % lp.maximumOperationsPerInstruction; 428 return addressIncrement; 429 } 430 431 ubyte opcode = program.read!ubyte(); 432 if (opcode < lp.opcodeBase) 433 { 434 switch (opcode) with (StandardOpcode) 435 { 436 case extendedOp: 437 size_t len = cast(size_t) program.readULEB128(); 438 ubyte eopcode = program.read!ubyte(); 439 440 switch (eopcode) with (ExtendedOpcode) 441 { 442 case endSequence: 443 machine.isEndSequence = true; 444 debug(DwarfDebugMachine) printf("endSequence %p\n", machine.address); 445 if (!callback(machine.address, LocationInfo(machine.fileIndex, machine.line), true)) return true; 446 machine = StateMachine.init; 447 machine.isStatement = lp.defaultIsStatement; 448 break; 449 450 case setAddress: 451 const address = program.read!(void*)(); 452 debug(DwarfDebugMachine) printf("setAddress %p\n", address); 453 machine.address = address; 454 machine.operationIndex = 0; 455 break; 456 457 case defineFile: // TODO: add proper implementation 458 debug(DwarfDebugMachine) printf("defineFile\n"); 459 program = program[len - 1 .. $]; 460 break; 461 462 case setDiscriminator: 463 const discriminator = cast(uint) program.readULEB128(); 464 debug(DwarfDebugMachine) printf("setDiscriminator %d\n", discriminator); 465 machine.discriminator = discriminator; 466 break; 467 468 default: 469 // unknown opcode 470 debug(DwarfDebugMachine) printf("unknown extended opcode %d\n", cast(int) eopcode); 471 program = program[len - 1 .. $]; 472 break; 473 } 474 475 break; 476 477 case copy: 478 debug(DwarfDebugMachine) printf("copy %p\n", machine.address); 479 if (!callback(machine.address, LocationInfo(machine.fileIndex, machine.line), false)) return true; 480 machine.isBasicBlock = false; 481 machine.isPrologueEnd = false; 482 machine.isEpilogueBegin = false; 483 machine.discriminator = 0; 484 break; 485 486 case advancePC: 487 const operationAdvance = cast(size_t) readULEB128(program); 488 advanceAddressAndOpIndex(operationAdvance); 489 debug(DwarfDebugMachine) printf("advancePC %d to %p\n", cast(int) operationAdvance, machine.address); 490 break; 491 492 case advanceLine: 493 long ad = readSLEB128(program); 494 machine.line += ad; 495 debug(DwarfDebugMachine) printf("advanceLine %d to %d\n", cast(int) ad, cast(int) machine.line); 496 break; 497 498 case setFile: 499 uint index = cast(uint) readULEB128(program); 500 debug(DwarfDebugMachine) printf("setFile to %d\n", cast(int) index); 501 machine.fileIndex = index; 502 break; 503 504 case setColumn: 505 uint col = cast(uint) readULEB128(program); 506 debug(DwarfDebugMachine) printf("setColumn %d\n", cast(int) col); 507 machine.column = col; 508 break; 509 510 case negateStatement: 511 debug(DwarfDebugMachine) printf("negateStatement\n"); 512 machine.isStatement = !machine.isStatement; 513 break; 514 515 case setBasicBlock: 516 debug(DwarfDebugMachine) printf("setBasicBlock\n"); 517 machine.isBasicBlock = true; 518 break; 519 520 case constAddPC: 521 const operationAdvance = (255 - lp.opcodeBase) / lp.lineRange; 522 advanceAddressAndOpIndex(operationAdvance); 523 debug(DwarfDebugMachine) printf("constAddPC %p\n", machine.address); 524 break; 525 526 case fixedAdvancePC: 527 const add = program.read!ushort(); 528 machine.address += add; 529 machine.operationIndex = 0; 530 debug(DwarfDebugMachine) printf("fixedAdvancePC %d to %p\n", cast(int) add, machine.address); 531 break; 532 533 case setPrologueEnd: 534 machine.isPrologueEnd = true; 535 debug(DwarfDebugMachine) printf("setPrologueEnd\n"); 536 break; 537 538 case setEpilogueBegin: 539 machine.isEpilogueBegin = true; 540 debug(DwarfDebugMachine) printf("setEpilogueBegin\n"); 541 break; 542 543 case setISA: 544 machine.isa = cast(uint) readULEB128(program); 545 debug(DwarfDebugMachine) printf("setISA %d\n", cast(int) machine.isa); 546 break; 547 548 default: 549 debug(DwarfDebugMachine) printf("unknown opcode %d\n", cast(int) opcode); 550 return false; 551 } 552 } 553 else 554 { 555 opcode -= lp.opcodeBase; 556 const operationAdvance = opcode / lp.lineRange; 557 const addressIncrement = advanceAddressAndOpIndex(operationAdvance); 558 const lineIncrement = lp.lineBase + (opcode % lp.lineRange); 559 machine.line += lineIncrement; 560 561 debug (DwarfDebugMachine) 562 printf("special %d %d to %p line %d\n", cast(int) addressIncrement, 563 cast(int) lineIncrement, machine.address, machine.line); 564 565 if (!callback(machine.address, LocationInfo(machine.fileIndex, machine.line), false)) return true; 566 567 machine.isBasicBlock = false; 568 machine.isPrologueEnd = false; 569 machine.isEpilogueBegin = false; 570 machine.discriminator = 0; 571 } 572 } 573 574 return true; 575 } 576 577 T read(T)(ref const(ubyte)[] buffer) @nogc nothrow 578 { 579 version (X86) enum hasUnalignedLoads = true; 580 else version (X86_64) enum hasUnalignedLoads = true; 581 else enum hasUnalignedLoads = false; 582 583 static if (hasUnalignedLoads || T.alignof == 1) 584 { 585 T result = *(cast(T*) buffer.ptr); 586 } 587 else 588 { 589 T result = void; 590 memcpy(&result, buffer.ptr, T.sizeof); 591 } 592 593 buffer = buffer[T.sizeof .. $]; 594 return result; 595 } 596 597 // Reads a null-terminated string from `buffer`. 598 const(char)[] readStringz(ref const(ubyte)[] buffer) @nogc nothrow 599 { 600 const p = cast(char*) buffer.ptr; 601 const str = p[0 .. strlen(p)]; 602 buffer = buffer[str.length+1 .. $]; 603 return str; 604 } 605 606 ulong readULEB128(ref const(ubyte)[] buffer) @nogc nothrow 607 { 608 ulong val = 0; 609 uint shift = 0; 610 611 while (true) 612 { 613 ubyte b = buffer.read!ubyte(); 614 615 val |= (b & 0x7f) << shift; 616 if ((b & 0x80) == 0) break; 617 shift += 7; 618 } 619 620 return val; 621 } 622 623 unittest 624 { 625 const(ubyte)[] data = [0xe5, 0x8e, 0x26, 0xDE, 0xAD, 0xBE, 0xEF]; 626 assert(readULEB128(data) == 624_485); 627 assert(data[] == [0xDE, 0xAD, 0xBE, 0xEF]); 628 } 629 630 long readSLEB128(ref const(ubyte)[] buffer) @nogc nothrow 631 { 632 long val = 0; 633 uint shift = 0; 634 int size = 8 << 3; 635 ubyte b; 636 637 while (true) 638 { 639 b = buffer.read!ubyte(); 640 val |= (b & 0x7f) << shift; 641 shift += 7; 642 if ((b & 0x80) == 0) 643 break; 644 } 645 646 if (shift < size && (b & 0x40) != 0) 647 val |= -(1 << shift); 648 649 return val; 650 } 651 652 enum DW_LNCT : ushort 653 { 654 path = 1, 655 directoryIndex = 2, 656 timestamp = 3, 657 size = 4, 658 md5 = 5, 659 loUser = 0x2000, 660 hiUser = 0x3fff, 661 } 662 663 enum DW_FORM : ubyte 664 { 665 addr = 1, 666 block2 = 3, 667 block4 = 4, 668 data2 = 5, 669 data4 = 6, 670 data8 = 7, 671 string_ = 8, 672 block = 9, 673 block1 = 10, 674 data1 = 11, 675 flag = 12, 676 sdata = 13, 677 strp = 14, 678 udata = 15, 679 ref_addr = 16, 680 ref1 = 17, 681 ref2 = 18, 682 ref4 = 19, 683 ref8 = 20, 684 ref_udata = 21, 685 indirect = 22, 686 sec_offset = 23, 687 exprloc = 24, 688 flag_present = 25, 689 strx = 26, 690 addrx = 27, 691 ref_sup4 = 28, 692 strp_sup = 29, 693 data16 = 30, 694 line_strp = 31, 695 ref_sig8 = 32, 696 implicit_const = 33, 697 loclistx = 34, 698 rnglistx = 35, 699 ref_sup8 = 36, 700 strx1 = 37, 701 strx2 = 38, 702 strx3 = 39, 703 strx4 = 40, 704 addrx1 = 41, 705 addrx2 = 42, 706 addrx3 = 43, 707 addrx4 = 44, 708 } 709 710 struct EntryFormatPair 711 { 712 DW_LNCT type; 713 DW_FORM form; 714 } 715 716 /// Reads a DWARF v5 directory/file name entry format. 717 Array!EntryFormatPair readEntryFormat(ref const(ubyte)[] buffer) @nogc nothrow 718 { 719 const numPairs = buffer.read!ubyte(); 720 721 Array!EntryFormatPair pairs; 722 pairs.length = numPairs; 723 724 foreach (ref pair; pairs) 725 { 726 pair.type = cast(DW_LNCT) buffer.readULEB128(); 727 pair.form = cast(DW_FORM) buffer.readULEB128(); 728 } 729 730 debug (DwarfDebugMachine) 731 { 732 printf("entryFormat: (%d)\n", cast(int) pairs.length); 733 foreach (ref pair; pairs) 734 printf("\t- type: %d, form: %d\n", cast(int) pair.type, cast(int) pair.form); 735 } 736 737 return pairs; 738 } 739 740 enum StandardOpcode : ubyte 741 { 742 extendedOp = 0, 743 copy = 1, 744 advancePC = 2, 745 advanceLine = 3, 746 setFile = 4, 747 setColumn = 5, 748 negateStatement = 6, 749 setBasicBlock = 7, 750 constAddPC = 8, 751 fixedAdvancePC = 9, 752 setPrologueEnd = 10, 753 setEpilogueBegin = 11, 754 setISA = 12, 755 } 756 757 enum ExtendedOpcode : ubyte 758 { 759 endSequence = 1, 760 setAddress = 2, 761 defineFile = 3, 762 setDiscriminator = 4, 763 } 764 765 struct StateMachine 766 { 767 const(void)* address; 768 uint operationIndex = 0; 769 uint fileIndex = 1; 770 uint line = 1; 771 uint column = 0; 772 uint isa = 0; 773 uint discriminator = 0; 774 bool isStatement; 775 bool isBasicBlock = false; 776 bool isEndSequence = false; 777 bool isPrologueEnd = false; 778 bool isEpilogueBegin = false; 779 } 780 781 struct LocationInfo 782 { 783 int file; 784 int line; 785 } 786 787 struct LineNumberProgram 788 { 789 ulong unitLength; 790 ushort dwarfVersion; 791 ubyte addressSize; 792 ubyte segmentSelectorSize; 793 ulong headerLength; 794 ubyte minimumInstructionLength; 795 ubyte maximumOperationsPerInstruction; 796 bool defaultIsStatement; 797 byte lineBase; 798 ubyte lineRange; 799 ubyte opcodeBase; 800 const(ubyte)[] standardOpcodeLengths; 801 Array!(const(char)[]) includeDirectories; 802 Array!SourceFile sourceFiles; 803 const(ubyte)[] program; 804 } 805 806 struct SourceFile 807 { 808 const(char)[] file; 809 size_t dirIndex; // 1-based 810 } 811 812 LineNumberProgram readLineNumberProgram(ref const(ubyte)[] data) @nogc nothrow 813 { 814 const originalData = data; 815 816 LineNumberProgram lp; 817 818 bool is64bitDwarf = false; 819 lp.unitLength = data.read!uint(); 820 if (lp.unitLength == uint.max) 821 { 822 is64bitDwarf = true; 823 lp.unitLength = data.read!ulong(); 824 } 825 826 const dwarfVersionFieldOffset = cast(size_t) (data.ptr - originalData.ptr); 827 lp.dwarfVersion = data.read!ushort(); 828 assert(lp.dwarfVersion < 6, "DWARF v6+ not supported yet"); 829 830 if (lp.dwarfVersion >= 5) 831 { 832 lp.addressSize = data.read!ubyte(); 833 lp.segmentSelectorSize = data.read!ubyte(); 834 } 835 836 lp.headerLength = (is64bitDwarf ? data.read!ulong() : data.read!uint()); 837 838 const minimumInstructionLengthFieldOffset = cast(size_t) (data.ptr - originalData.ptr); 839 lp.minimumInstructionLength = data.read!ubyte(); 840 841 lp.maximumOperationsPerInstruction = (lp.dwarfVersion >= 4 ? data.read!ubyte() : 1); 842 lp.defaultIsStatement = (data.read!ubyte() != 0); 843 lp.lineBase = data.read!byte(); 844 lp.lineRange = data.read!ubyte(); 845 lp.opcodeBase = data.read!ubyte(); 846 847 lp.standardOpcodeLengths = data[0 .. lp.opcodeBase - 1]; 848 data = data[lp.opcodeBase - 1 .. $]; 849 850 if (lp.dwarfVersion >= 5) 851 { 852 static void consumeGenericForm(ref const(ubyte)[] data, DW_FORM form, bool is64bitDwarf) 853 { 854 with (DW_FORM) switch (form) 855 { 856 case strp, strp_sup, line_strp: 857 data = data[is64bitDwarf ? 8 : 4 .. $]; break; 858 case data1, strx1: 859 data = data[1 .. $]; break; 860 case data2, strx2: 861 data = data[2 .. $]; break; 862 case strx3: 863 data = data[3 .. $]; break; 864 case data4, strx4: 865 data = data[4 .. $]; break; 866 case data8: 867 data = data[8 .. $]; break; 868 case data16: 869 data = data[16 .. $]; break; 870 case udata, strx: 871 data.readULEB128(); break; 872 case block: 873 const length = cast(size_t) data.readULEB128(); 874 data = data[length .. $]; 875 break; 876 default: 877 assert(0); // TODO: support other forms for vendor extensions 878 } 879 } 880 881 const dirFormat = data.readEntryFormat(); 882 lp.includeDirectories.length = cast(size_t) data.readULEB128(); 883 foreach (ref dir; lp.includeDirectories) 884 { 885 dir = "<unknown dir>"; // fallback 886 foreach (ref pair; dirFormat) 887 { 888 if (pair.type == DW_LNCT.path && 889 // TODO: support other forms too (offsets in other sections) 890 pair.form == DW_FORM.string_) 891 { 892 dir = data.readStringz(); 893 } 894 else // uninteresting type 895 consumeGenericForm(data, pair.form, is64bitDwarf); 896 } 897 } 898 899 const fileFormat = data.readEntryFormat(); 900 lp.sourceFiles.length = cast(size_t) data.readULEB128(); 901 foreach (ref sf; lp.sourceFiles) 902 { 903 sf.file = "<unknown file>"; // fallback 904 foreach (ref pair; fileFormat) 905 { 906 if (pair.type == DW_LNCT.path && 907 // TODO: support other forms too (offsets in other sections) 908 pair.form == DW_FORM.string_) 909 { 910 sf.file = data.readStringz(); 911 } 912 else if (pair.type == DW_LNCT.directoryIndex) 913 { 914 if (pair.form == DW_FORM.data1) 915 sf.dirIndex = data.read!ubyte(); 916 else if (pair.form == DW_FORM.data2) 917 sf.dirIndex = data.read!ushort(); 918 else if (pair.form == DW_FORM.udata) 919 sf.dirIndex = cast(size_t) data.readULEB128(); 920 else 921 assert(0); // not allowed by DWARF 5 spec 922 sf.dirIndex++; // DWARF v5 indices are 0-based 923 } 924 else // uninteresting type 925 consumeGenericForm(data, pair.form, is64bitDwarf); 926 } 927 } 928 } 929 else 930 { 931 // A sequence ends with a null-byte. 932 static auto readSequence(alias ReadEntry)(ref const(ubyte)[] data) 933 { 934 alias ResultType = typeof(ReadEntry(data)); 935 936 static size_t count(const(ubyte)[] data) 937 { 938 size_t count = 0; 939 while (data.length && data[0] != 0) 940 { 941 ReadEntry(data); 942 ++count; 943 } 944 return count; 945 } 946 947 const numEntries = count(data); 948 949 Array!ResultType result; 950 result.length = numEntries; 951 952 foreach (i; 0 .. numEntries) 953 result[i] = ReadEntry(data); 954 955 data = data[1 .. $]; // skip over sequence-terminating null 956 957 return result; 958 } 959 960 /// Directories are simply a sequence of NUL-terminated strings 961 static const(char)[] readIncludeDirectoryEntry(ref const(ubyte)[] data) 962 { 963 return data.readStringz(); 964 } 965 lp.includeDirectories = readSequence!readIncludeDirectoryEntry(data); 966 967 static SourceFile readFileNameEntry(ref const(ubyte)[] data) 968 { 969 const file = data.readStringz(); 970 const dirIndex = cast(size_t) data.readULEB128(); 971 data.readULEB128(); // last mod 972 data.readULEB128(); // file len 973 974 return SourceFile( 975 file, 976 dirIndex, 977 ); 978 } 979 lp.sourceFiles = readSequence!readFileNameEntry(data); 980 } 981 982 debug (DwarfDebugMachine) 983 { 984 printf("include_directories: (%d)\n", cast(int) lp.includeDirectories.length); 985 foreach (dir; lp.includeDirectories) 986 printf("\t- %.*s\n", cast(int) dir.length, dir.ptr); 987 printf("source_files: (%d)\n", cast(int) lp.sourceFiles.length); 988 foreach (ref sf; lp.sourceFiles) 989 { 990 if (sf.dirIndex > lp.includeDirectories.length) 991 printf("\t- Out of bound directory! (%llu): %.*s\n", 992 sf.dirIndex, cast(int) sf.file.length, sf.file.ptr); 993 else if (sf.dirIndex > 0) 994 { 995 const dir = lp.includeDirectories[sf.dirIndex - 1]; 996 printf("\t- (Dir:%llu:%.*s/)%.*s\n", sf.dirIndex, 997 cast(int) dir.length, dir.ptr, 998 cast(int) sf.file.length, sf.file.ptr); 999 } 1000 else 1001 printf("\t- %.*s\n", cast(int) sf.file.length, sf.file.ptr); 1002 } 1003 } 1004 1005 const programStart = cast(size_t) (minimumInstructionLengthFieldOffset + lp.headerLength); 1006 const programEnd = cast(size_t) (dwarfVersionFieldOffset + lp.unitLength); 1007 lp.program = originalData[programStart .. programEnd]; 1008 1009 data = originalData[programEnd .. $]; 1010 1011 return lp; 1012 }