1 /** 2 * Extract symbols from a Mach-O object file. 3 * 4 * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved 5 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 6 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanmach.d, _scanmach.d) 8 * Documentation: https://dlang.org/phobos/dmd_scanmach.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanmach.d 10 */ 11 12 module dmd.scanmach; 13 14 import core.stdc.string; 15 import core.stdc.stdint; 16 import dmd.globals; 17 import dmd.errors; 18 import dmd.location; 19 20 //import core.sys.darwin.mach.loader; 21 import dmd.backend.mach; 22 23 private enum LOG = false; 24 25 /***************************************** 26 * Reads an object module from base[] and passes the names 27 * of any exported symbols to (*pAddSymbol)(). 28 * Params: 29 * pAddSymbol = function to pass the names to 30 * base = array of contents of object module 31 * module_name = name of the object module (used for error messages) 32 * loc = location to use for error printing 33 */ 34 void scanMachObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol, 35 const(ubyte)[] base, const(char)* module_name, Loc loc) 36 { 37 static if (LOG) 38 { 39 printf("scanMachObjModule(%s)\n", module_name); 40 } 41 42 void corrupt(int reason) 43 { 44 error(loc, "corrupt Mach-O object module `%s` %d", module_name, reason); 45 } 46 47 const buf = base.ptr; 48 const buflen = base.length; 49 uint32_t ncmds; 50 mach_header* header = cast(mach_header*)buf; 51 mach_header_64* header64 = null; 52 /* First do sanity checks on object file 53 */ 54 if (buflen < mach_header.sizeof) 55 return corrupt(__LINE__); 56 57 if (header.magic == MH_MAGIC) 58 { 59 if (header.cputype != CPU_TYPE_I386) 60 { 61 error(loc, "Mach-O object module `%s` has cputype = %d, should be %d", module_name, header.cputype, CPU_TYPE_I386); 62 return; 63 } 64 if (header.filetype != MH_OBJECT) 65 { 66 error(loc, "Mach-O object module `%s` has file type = %d, should be %d", module_name, header.filetype, MH_OBJECT); 67 return; 68 } 69 if (buflen < mach_header.sizeof + header.sizeofcmds) 70 return corrupt(__LINE__); 71 ncmds = header.ncmds; 72 } 73 else if (header.magic == MH_MAGIC_64) 74 { 75 header64 = cast(mach_header_64*)buf; 76 if (buflen < mach_header_64.sizeof) 77 return corrupt(__LINE__); 78 if (header64.cputype != CPU_TYPE_X86_64) 79 { 80 error(loc, "Mach-O object module `%s` has cputype = %d, should be %d", module_name, header64.cputype, CPU_TYPE_X86_64); 81 return; 82 } 83 if (header64.filetype != MH_OBJECT) 84 { 85 error(loc, "Mach-O object module `%s` has file type = %d, should be %d", module_name, header64.filetype, MH_OBJECT); 86 return; 87 } 88 if (buflen < mach_header_64.sizeof + header64.sizeofcmds) 89 return corrupt(__LINE__); 90 ncmds = header64.ncmds; 91 } 92 else 93 return corrupt(__LINE__); 94 95 symtab_command* symtab_commands; 96 // Commands immediately follow mach_header 97 char* commands = cast(char*)buf + (header.magic == MH_MAGIC_64 ? mach_header_64.sizeof : mach_header.sizeof); 98 for (uint32_t i = 0; i < ncmds; i++) 99 { 100 load_command* command = cast(load_command*)commands; 101 //printf("cmd = 0x%02x, cmdsize = %u\n", command.cmd, command.cmdsize); 102 if (command.cmd == LC_SYMTAB) 103 symtab_commands = cast(symtab_command*)command; 104 commands += command.cmdsize; 105 } 106 107 if (!symtab_commands) 108 return; 109 110 // Get pointer to string table 111 char* strtab = cast(char*)buf + symtab_commands.stroff; 112 if (buflen < symtab_commands.stroff + symtab_commands.strsize) 113 return corrupt(__LINE__); 114 115 if (header.magic == MH_MAGIC_64) 116 { 117 // Get pointer to symbol table 118 nlist_64* symtab = cast(nlist_64*)(cast(char*)buf + symtab_commands.symoff); 119 if (buflen < symtab_commands.symoff + symtab_commands.nsyms * nlist_64.sizeof) 120 return corrupt(__LINE__); 121 122 // For each symbol 123 for (int i = 0; i < symtab_commands.nsyms; i++) 124 { 125 nlist_64* s = symtab + i; 126 const(char)* name = strtab + s.n_strx; 127 const namelen = strlen(name); 128 if (s.n_type & N_STAB) 129 { 130 // values in /usr/include/mach-o/stab.h 131 //printf(" N_STAB"); 132 continue; 133 } 134 135 version (none) 136 { 137 if (s.n_type & N_PEXT) 138 { 139 } 140 if (s.n_type & N_EXT) 141 { 142 } 143 } 144 switch (s.n_type & N_TYPE) 145 { 146 case N_UNDF: 147 if (s.n_type & N_EXT && s.n_value != 0) // comdef 148 pAddSymbol(name[0 .. namelen], 1); 149 break; 150 case N_ABS: 151 break; 152 case N_SECT: 153 if (s.n_type & N_EXT) /*&& !(s.n_desc & N_REF_TO_WEAK)*/ 154 pAddSymbol(name[0 .. namelen], 1); 155 break; 156 case N_PBUD: 157 break; 158 case N_INDR: 159 break; 160 default: 161 break; 162 } 163 164 } 165 } 166 else 167 { 168 // Get pointer to symbol table 169 nlist* symtab = cast(nlist*)(cast(char*)buf + symtab_commands.symoff); 170 if (buflen < symtab_commands.symoff + symtab_commands.nsyms * nlist.sizeof) 171 return corrupt(__LINE__); 172 173 // For each symbol 174 for (int i = 0; i < symtab_commands.nsyms; i++) 175 { 176 nlist* s = symtab + i; 177 const(char)* name = strtab + s.n_strx; 178 const namelen = strlen(name); 179 if (s.n_type & N_STAB) 180 { 181 // values in /usr/include/mach-o/stab.h 182 //printf(" N_STAB"); 183 continue; 184 } 185 186 version (none) 187 { 188 if (s.n_type & N_PEXT) 189 { 190 } 191 if (s.n_type & N_EXT) 192 { 193 } 194 } 195 switch (s.n_type & N_TYPE) 196 { 197 case N_UNDF: 198 if (s.n_type & N_EXT && s.n_value != 0) // comdef 199 pAddSymbol(name[0 .. namelen], 1); 200 break; 201 case N_ABS: 202 break; 203 case N_SECT: 204 if (s.n_type & N_EXT) /*&& !(s.n_desc & N_REF_TO_WEAK)*/ 205 pAddSymbol(name[0 .. namelen], 1); 206 break; 207 case N_PBUD: 208 break; 209 case N_INDR: 210 break; 211 default: 212 break; 213 } 214 } 215 } 216 } 217 218 private: // for the remainder of this module 219 220 enum CPU_TYPE_I386 = 7; 221 enum CPU_TYPE_X86_64 = CPU_TYPE_I386 | 0x1000000; 222 223 enum MH_OBJECT = 0x1; 224 225 struct segment_command 226 { 227 uint32_t cmd; 228 uint32_t cmdsize; 229 char[16] segname; 230 uint32_t vmaddr; 231 uint32_t vmsize; 232 uint32_t fileoff; 233 uint32_t filesize; 234 int32_t maxprot; 235 int32_t initprot; 236 uint32_t nsects; 237 uint32_t flags; 238 } 239 240 struct segment_command_64 241 { 242 uint32_t cmd; 243 uint32_t cmdsize; 244 char[16] segname; 245 uint64_t vmaddr; 246 uint64_t vmsize; 247 uint64_t fileoff; 248 uint64_t filesize; 249 int32_t maxprot; 250 int32_t initprot; 251 uint32_t nsects; 252 uint32_t flags; 253 } 254 255 struct symtab_command 256 { 257 uint32_t cmd; 258 uint32_t cmdsize; 259 uint32_t symoff; 260 uint32_t nsyms; 261 uint32_t stroff; 262 uint32_t strsize; 263 } 264 265 struct dysymtab_command 266 { 267 uint32_t cmd; 268 uint32_t cmdsize; 269 uint32_t ilocalsym; 270 uint32_t nlocalsym; 271 uint32_t iextdefsym; 272 uint32_t nextdefsym; 273 uint32_t iundefsym; 274 uint32_t nundefsym; 275 uint32_t tocoff; 276 uint32_t ntoc; 277 uint32_t modtaboff; 278 uint32_t nmodtab; 279 uint32_t extrefsymoff; 280 uint32_t nextrefsyms; 281 uint32_t indirectsymoff; 282 uint32_t nindirectsyms; 283 uint32_t extreloff; 284 uint32_t nextrel; 285 uint32_t locreloff; 286 uint32_t nlocrel; 287 } 288 289 enum LC_SEGMENT = 1; 290 enum LC_SYMTAB = 2; 291 enum LC_DYSYMTAB = 11; 292 enum LC_SEGMENT_64 = 0x19; 293 294 struct load_command 295 { 296 uint32_t cmd; 297 uint32_t cmdsize; 298 } 299 300 enum N_EXT = 1; 301 enum N_STAB = 0xE0; 302 enum N_PEXT = 0x10; 303 enum N_TYPE = 0x0E; 304 enum N_UNDF = 0; 305 enum N_ABS = 2; 306 enum N_INDR = 10; 307 enum N_PBUD = 12; 308 enum N_SECT = 14; 309 310 struct nlist 311 { 312 int32_t n_strx; 313 uint8_t n_type; 314 uint8_t n_sect; 315 int16_t n_desc; 316 uint32_t n_value; 317 } 318 319 struct nlist_64 320 { 321 uint32_t n_strx; 322 uint8_t n_type; 323 uint8_t n_sect; 324 uint16_t n_desc; 325 uint64_t n_value; 326 }