1 /** 2 * Extract symbols from a Mach-O object file. 3 * 4 * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved 5 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 6 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanmach.d, _scanmach.d) 8 * Documentation: https://dlang.org/phobos/dmd_scanmach.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanmach.d 10 */ 11 12 module dmd.scanmach; 13 14 import core.stdc.string; 15 import core.stdc.stdint; 16 17 import dmd.errorsink; 18 import dmd.location; 19 20 //import core.sys.darwin.mach.loader; 21 import dmd.backend.mach; 22 23 private enum LOG = false; 24 25 /***************************************** 26 * Reads an object module from base[] and passes the names 27 * of any exported symbols to (*pAddSymbol)(). 28 * Params: 29 * pAddSymbol = function to pass the names to 30 * base = array of contents of object module 31 * module_name = name of the object module (used for error messages) 32 * loc = location to use for error printing 33 * eSink = where the error messages go 34 */ 35 void scanMachObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol, 36 const ubyte[] base, const char* module_name, Loc loc, ErrorSink eSink) 37 { 38 static if (LOG) 39 { 40 printf("scanMachObjModule(%s)\n", module_name); 41 } 42 43 void corrupt(int reason) 44 { 45 eSink.error(loc, "corrupt Mach-O object module `%s` %d", module_name, reason); 46 } 47 48 const buf = base.ptr; 49 const buflen = base.length; 50 uint32_t ncmds; 51 mach_header* header = cast(mach_header*)buf; 52 mach_header_64* header64 = null; 53 /* First do sanity checks on object file 54 */ 55 if (buflen < mach_header.sizeof) 56 return corrupt(__LINE__); 57 58 if (header.magic == MH_MAGIC) 59 { 60 if (header.cputype != CPU_TYPE_I386) 61 { 62 eSink.error(loc, "Mach-O object module `%s` has cputype = %d, should be %d", module_name, header.cputype, CPU_TYPE_I386); 63 return; 64 } 65 if (header.filetype != MH_OBJECT) 66 { 67 eSink.error(loc, "Mach-O object module `%s` has file type = %d, should be %d", module_name, header.filetype, MH_OBJECT); 68 return; 69 } 70 if (buflen < mach_header.sizeof + header.sizeofcmds) 71 return corrupt(__LINE__); 72 ncmds = header.ncmds; 73 } 74 else if (header.magic == MH_MAGIC_64) 75 { 76 header64 = cast(mach_header_64*)buf; 77 if (buflen < mach_header_64.sizeof) 78 return corrupt(__LINE__); 79 if (header64.cputype != CPU_TYPE_X86_64) 80 { 81 eSink.error(loc, "Mach-O object module `%s` has cputype = %d, should be %d", module_name, header64.cputype, CPU_TYPE_X86_64); 82 return; 83 } 84 if (header64.filetype != MH_OBJECT) 85 { 86 eSink.error(loc, "Mach-O object module `%s` has file type = %d, should be %d", module_name, header64.filetype, MH_OBJECT); 87 return; 88 } 89 if (buflen < mach_header_64.sizeof + header64.sizeofcmds) 90 return corrupt(__LINE__); 91 ncmds = header64.ncmds; 92 } 93 else 94 return corrupt(__LINE__); 95 96 symtab_command* symtab_commands; 97 // Commands immediately follow mach_header 98 char* commands = cast(char*)buf + (header.magic == MH_MAGIC_64 ? mach_header_64.sizeof : mach_header.sizeof); 99 foreach (i; 0 .. ncmds) 100 { 101 load_command* command = cast(load_command*)commands; 102 //printf("cmd = 0x%02x, cmdsize = %u\n", command.cmd, command.cmdsize); 103 if (command.cmd == LC_SYMTAB) 104 symtab_commands = cast(symtab_command*)command; 105 commands += command.cmdsize; 106 } 107 108 if (!symtab_commands) 109 return; 110 111 // Get pointer to string table 112 char* strtab = cast(char*)buf + symtab_commands.stroff; 113 if (buflen < symtab_commands.stroff + symtab_commands.strsize) 114 return corrupt(__LINE__); 115 116 if (header.magic == MH_MAGIC_64) 117 { 118 // Get pointer to symbol table 119 nlist_64* symtab = cast(nlist_64*)(cast(char*)buf + symtab_commands.symoff); 120 if (buflen < symtab_commands.symoff + symtab_commands.nsyms * nlist_64.sizeof) 121 return corrupt(__LINE__); 122 123 // For each symbol 124 foreach (i; 0 .. symtab_commands.nsyms) 125 { 126 nlist_64* s = symtab + i; 127 const(char)* name = strtab + s.n_strx; 128 const namelen = strlen(name); 129 if (s.n_type & N_STAB) 130 { 131 // values in /usr/include/mach-o/stab.h 132 //printf(" N_STAB"); 133 continue; 134 } 135 136 version (none) 137 { 138 if (s.n_type & N_PEXT) 139 { 140 } 141 if (s.n_type & N_EXT) 142 { 143 } 144 } 145 switch (s.n_type & N_TYPE) 146 { 147 case N_UNDF: 148 if (s.n_type & N_EXT && s.n_value != 0) // comdef 149 pAddSymbol(name[0 .. namelen], 1); 150 break; 151 case N_ABS: 152 break; 153 case N_SECT: 154 if (s.n_type & N_EXT) /*&& !(s.n_desc & N_REF_TO_WEAK)*/ 155 pAddSymbol(name[0 .. namelen], 1); 156 break; 157 case N_PBUD: 158 break; 159 case N_INDR: 160 break; 161 default: 162 break; 163 } 164 165 } 166 } 167 else 168 { 169 // Get pointer to symbol table 170 nlist* symtab = cast(nlist*)(cast(char*)buf + symtab_commands.symoff); 171 if (buflen < symtab_commands.symoff + symtab_commands.nsyms * nlist.sizeof) 172 return corrupt(__LINE__); 173 174 // For each symbol 175 foreach (i; 0 .. symtab_commands.nsyms) 176 { 177 nlist* s = symtab + i; 178 const(char)* name = strtab + s.n_strx; 179 const namelen = strlen(name); 180 if (s.n_type & N_STAB) 181 { 182 // values in /usr/include/mach-o/stab.h 183 //printf(" N_STAB"); 184 continue; 185 } 186 187 version (none) 188 { 189 if (s.n_type & N_PEXT) 190 { 191 } 192 if (s.n_type & N_EXT) 193 { 194 } 195 } 196 switch (s.n_type & N_TYPE) 197 { 198 case N_UNDF: 199 if (s.n_type & N_EXT && s.n_value != 0) // comdef 200 pAddSymbol(name[0 .. namelen], 1); 201 break; 202 case N_ABS: 203 break; 204 case N_SECT: 205 if (s.n_type & N_EXT) /*&& !(s.n_desc & N_REF_TO_WEAK)*/ 206 pAddSymbol(name[0 .. namelen], 1); 207 break; 208 case N_PBUD: 209 break; 210 case N_INDR: 211 break; 212 default: 213 break; 214 } 215 } 216 } 217 } 218 219 private: // for the remainder of this module 220 221 enum CPU_TYPE_I386 = 7; 222 enum CPU_TYPE_X86_64 = CPU_TYPE_I386 | 0x1000000; 223 224 enum MH_OBJECT = 0x1; 225 226 struct segment_command 227 { 228 uint32_t cmd; 229 uint32_t cmdsize; 230 char[16] segname; 231 uint32_t vmaddr; 232 uint32_t vmsize; 233 uint32_t fileoff; 234 uint32_t filesize; 235 int32_t maxprot; 236 int32_t initprot; 237 uint32_t nsects; 238 uint32_t flags; 239 } 240 241 struct segment_command_64 242 { 243 uint32_t cmd; 244 uint32_t cmdsize; 245 char[16] segname; 246 uint64_t vmaddr; 247 uint64_t vmsize; 248 uint64_t fileoff; 249 uint64_t filesize; 250 int32_t maxprot; 251 int32_t initprot; 252 uint32_t nsects; 253 uint32_t flags; 254 } 255 256 struct symtab_command 257 { 258 uint32_t cmd; 259 uint32_t cmdsize; 260 uint32_t symoff; 261 uint32_t nsyms; 262 uint32_t stroff; 263 uint32_t strsize; 264 } 265 266 struct dysymtab_command 267 { 268 uint32_t cmd; 269 uint32_t cmdsize; 270 uint32_t ilocalsym; 271 uint32_t nlocalsym; 272 uint32_t iextdefsym; 273 uint32_t nextdefsym; 274 uint32_t iundefsym; 275 uint32_t nundefsym; 276 uint32_t tocoff; 277 uint32_t ntoc; 278 uint32_t modtaboff; 279 uint32_t nmodtab; 280 uint32_t extrefsymoff; 281 uint32_t nextrefsyms; 282 uint32_t indirectsymoff; 283 uint32_t nindirectsyms; 284 uint32_t extreloff; 285 uint32_t nextrel; 286 uint32_t locreloff; 287 uint32_t nlocrel; 288 } 289 290 enum LC_SEGMENT = 1; 291 enum LC_SYMTAB = 2; 292 enum LC_DYSYMTAB = 11; 293 enum LC_SEGMENT_64 = 0x19; 294 295 struct load_command 296 { 297 uint32_t cmd; 298 uint32_t cmdsize; 299 } 300 301 enum N_EXT = 1; 302 enum N_STAB = 0xE0; 303 enum N_PEXT = 0x10; 304 enum N_TYPE = 0x0E; 305 enum N_UNDF = 0; 306 enum N_ABS = 2; 307 enum N_INDR = 10; 308 enum N_PBUD = 12; 309 enum N_SECT = 14; 310 311 struct nlist 312 { 313 int32_t n_strx; 314 uint8_t n_type; 315 uint8_t n_sect; 316 int16_t n_desc; 317 uint32_t n_value; 318 } 319 320 struct nlist_64 321 { 322 uint32_t n_strx; 323 uint8_t n_type; 324 uint8_t n_sect; 325 uint16_t n_desc; 326 uint64_t n_value; 327 }