1 /** 2 * A library in the Mach-O format, used on macOS. 3 * 4 * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved 5 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 6 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/libmach.d, _libmach.d) 8 * Documentation: https://dlang.org/phobos/dmd_libmach.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/libmach.d 10 */ 11 12 module dmd.libmach; 13 14 import core.stdc.time; 15 import core.stdc.string; 16 import core.stdc.stdlib; 17 import core.stdc.stdio; 18 import core.stdc.config; 19 20 version (Posix) 21 { 22 import core.sys.posix.sys.stat; 23 import core.sys.posix.unistd; 24 } 25 version (Windows) 26 { 27 import core.sys.windows.stat; 28 } 29 30 import dmd.globals; 31 import dmd.lib; 32 import dmd.location; 33 import dmd.utils; 34 35 import dmd.root.array; 36 import dmd.root.file; 37 import dmd.root.filename; 38 import dmd.common.outbuffer; 39 import dmd.root.port; 40 import dmd.root.rmem; 41 import dmd.root.string; 42 import dmd.root.stringtable; 43 44 import dmd.scanmach; 45 46 // Entry point (only public symbol in this module). 47 public extern (C++) Library LibMach_factory() 48 { 49 return new LibMach(); 50 } 51 52 private: // for the remainder of this module 53 54 enum LOG = false; 55 56 struct MachObjSymbol 57 { 58 const(char)[] name; // still has a terminating 0 59 MachObjModule* om; 60 } 61 62 alias MachObjModules = Array!(MachObjModule*); 63 alias MachObjSymbols = Array!(MachObjSymbol*); 64 65 final class LibMach : Library 66 { 67 MachObjModules objmodules; // MachObjModule[] 68 MachObjSymbols objsymbols; // MachObjSymbol[] 69 StringTable!(MachObjSymbol*) tab; 70 71 extern (D) this() 72 { 73 tab._init(14_000); 74 } 75 76 /*************************************** 77 * Add object module or library to the library. 78 * Examine the buffer to see which it is. 79 * If the buffer is NULL, use module_name as the file name 80 * and load the file. 81 */ 82 override void addObject(const(char)[] module_name, const ubyte[] buffer) 83 { 84 static if (LOG) 85 { 86 printf("LibMach::addObject(%.*s)\n", 87 cast(int)module_name.length, module_name.ptr); 88 } 89 90 void corrupt(int reason) 91 { 92 error("corrupt Mach object module %.*s %d", 93 cast(int)module_name.length, module_name.ptr, reason); 94 } 95 96 int fromfile = 0; 97 auto buf = buffer.ptr; 98 auto buflen = buffer.length; 99 if (!buf) 100 { 101 assert(module_name[0]); 102 // read file and take buffer ownership 103 auto data = readFile(Loc.initial, module_name).extractSlice(); 104 buf = data.ptr; 105 buflen = data.length; 106 fromfile = 1; 107 } 108 if (buflen < 16) 109 { 110 static if (LOG) 111 { 112 printf("buf = %p, buflen = %d\n", buf, buflen); 113 } 114 return corrupt(__LINE__); 115 } 116 if (memcmp(buf, "!<arch>\n".ptr, 8) == 0) 117 { 118 /* Library file. 119 * Pull each object module out of the library and add it 120 * to the object module array. 121 */ 122 static if (LOG) 123 { 124 printf("archive, buf = %p, buflen = %d\n", buf, buflen); 125 } 126 uint offset = 8; 127 char* symtab = null; 128 uint symtab_size = 0; 129 uint mstart = cast(uint)objmodules.length; 130 while (offset < buflen) 131 { 132 if (offset + MachLibHeader.sizeof >= buflen) 133 return corrupt(__LINE__); 134 MachLibHeader* header = cast(MachLibHeader*)(cast(ubyte*)buf + offset); 135 offset += MachLibHeader.sizeof; 136 char* endptr = null; 137 uint size = cast(uint)strtoul(header.file_size.ptr, &endptr, 10); 138 if (endptr >= header.file_size.ptr + 10 || *endptr != ' ') 139 return corrupt(__LINE__); 140 if (offset + size > buflen) 141 return corrupt(__LINE__); 142 if (memcmp(header.object_name.ptr, "__.SYMDEF ".ptr, 16) == 0 || 143 memcmp(header.object_name.ptr, "__.SYMDEF SORTED".ptr, 16) == 0) 144 { 145 /* Instead of rescanning the object modules we pull from a 146 * library, just use the already created symbol table. 147 */ 148 if (symtab) 149 return corrupt(__LINE__); 150 symtab = cast(char*)buf + offset; 151 symtab_size = size; 152 if (size < 4) 153 return corrupt(__LINE__); 154 } 155 else 156 { 157 auto om = new MachObjModule(); 158 om.base = cast(ubyte*)buf + offset - MachLibHeader.sizeof; 159 om.length = cast(uint)(size + MachLibHeader.sizeof); 160 om.offset = 0; 161 const n = cast(const(char)*)(om.base + MachLibHeader.sizeof); 162 om.name = n.toDString(); 163 om.file_time = cast(uint)strtoul(header.file_time.ptr, &endptr, 10); 164 om.user_id = cast(uint)strtoul(header.user_id.ptr, &endptr, 10); 165 om.group_id = cast(uint)strtoul(header.group_id.ptr, &endptr, 10); 166 om.file_mode = cast(uint)strtoul(header.file_mode.ptr, &endptr, 8); 167 om.scan = 0; // don't scan object module for symbols 168 objmodules.push(om); 169 } 170 offset += (size + 1) & ~1; 171 } 172 if (offset != buflen) 173 return corrupt(__LINE__); 174 /* Scan the library's symbol table, and insert it into our own. 175 * We use this instead of rescanning the object module, because 176 * the library's creator may have a different idea of what symbols 177 * go into the symbol table than we do. 178 * This is also probably faster. 179 */ 180 uint nsymbols = Port.readlongLE(symtab) / 8; 181 char* s = symtab + 4 + nsymbols * 8 + 4; 182 if (4 + nsymbols * 8 + 4 > symtab_size) 183 return corrupt(__LINE__); 184 for (uint i = 0; i < nsymbols; i++) 185 { 186 uint soff = Port.readlongLE(symtab + 4 + i * 8); 187 const(char)* name = s + soff; 188 size_t namelen = strlen(name); 189 //printf("soff = x%x name = %s\n", soff, name); 190 if (s + namelen + 1 - symtab > symtab_size) 191 return corrupt(__LINE__); 192 uint moff = Port.readlongLE(symtab + 4 + i * 8 + 4); 193 //printf("symtab[%d] moff = x%x x%x, name = %s\n", i, moff, moff + MachLibHeader.sizeof, name); 194 for (uint m = mstart; 1; m++) 195 { 196 if (m == objmodules.length) 197 return corrupt(__LINE__); // didn't find it 198 MachObjModule* om = objmodules[m]; 199 //printf("\tom offset = x%x\n", cast(char *)om.base - cast(char *)buf); 200 if (moff == cast(char*)om.base - cast(char*)buf) 201 { 202 addSymbol(om, name[0 .. namelen], 1); 203 //if (mstart == m) 204 // mstart++; 205 break; 206 } 207 } 208 } 209 return; 210 } 211 /* It's an object module 212 */ 213 auto om = new MachObjModule(); 214 om.base = cast(ubyte*)buf; 215 om.length = cast(uint)buflen; 216 om.offset = 0; 217 const n = FileName.name(module_name); // remove path, but not extension 218 om.name = n; 219 om.scan = 1; 220 if (fromfile) 221 { 222 version (Posix) 223 stat_t statbuf; 224 version (Windows) 225 struct_stat statbuf; 226 int i = module_name.toCStringThen!(slice => stat(slice.ptr, &statbuf)); 227 if (i == -1) // error, errno is set 228 return corrupt(__LINE__); 229 om.file_time = statbuf.st_ctime; 230 om.user_id = statbuf.st_uid; 231 om.group_id = statbuf.st_gid; 232 om.file_mode = statbuf.st_mode; 233 } 234 else 235 { 236 /* Mock things up for the object module file that never was 237 * actually written out. 238 */ 239 version (Posix) 240 { 241 __gshared uid_t uid; 242 __gshared gid_t gid; 243 __gshared int _init; 244 if (!_init) 245 { 246 _init = 1; 247 uid = getuid(); 248 gid = getgid(); 249 } 250 om.user_id = uid; 251 om.group_id = gid; 252 } 253 version (Windows) 254 { 255 om.user_id = 0; // meaningless on Windows 256 om.group_id = 0; // meaningless on Windows 257 } 258 time(&om.file_time); 259 om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644 260 } 261 objmodules.push(om); 262 } 263 264 /*****************************************************************************/ 265 266 void addSymbol(MachObjModule* om, const(char)[] name, int pickAny = 0) 267 { 268 static if (LOG) 269 { 270 printf("LibMach::addSymbol(%s, %s, %d)\n", om.name.ptr, name.ptr, pickAny); 271 } 272 version (none) 273 { 274 // let linker sort out duplicates 275 StringValue* s = tab.insert(name.ptr, name.length, null); 276 if (!s) 277 { 278 // already in table 279 if (!pickAny) 280 { 281 s = tab.lookup(name.ptr, name.length); 282 assert(s); 283 MachObjSymbol* os = cast(MachObjSymbol*)s.ptrvalue; 284 error("multiple definition of %s: %s and %s: %s", om.name.ptr, name.ptr, os.om.name.ptr, os.name.ptr); 285 } 286 } 287 else 288 { 289 auto os = new MachObjSymbol(); 290 os.name = xarraydup(name); 291 os.om = om; 292 s.ptrvalue = cast(void*)os; 293 objsymbols.push(os); 294 } 295 } 296 else 297 { 298 auto os = new MachObjSymbol(); 299 os.name = xarraydup(name); 300 os.om = om; 301 objsymbols.push(os); 302 } 303 } 304 305 private: 306 /************************************ 307 * Scan single object module for dictionary symbols. 308 * Send those symbols to LibMach::addSymbol(). 309 */ 310 void scanObjModule(MachObjModule* om) 311 { 312 static if (LOG) 313 { 314 printf("LibMach::scanObjModule(%s)\n", om.name.ptr); 315 } 316 317 extern (D) void addSymbol(const(char)[] name, int pickAny) 318 { 319 this.addSymbol(om, name, pickAny); 320 } 321 322 scanMachObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc); 323 } 324 325 /*****************************************************************************/ 326 /*****************************************************************************/ 327 /********************************************** 328 * Create and write library to libbuf. 329 * The library consists of: 330 * !<arch>\n 331 * header 332 * dictionary 333 * object modules... 334 */ 335 protected override void WriteLibToBuffer(OutBuffer* libbuf) 336 { 337 static if (LOG) 338 { 339 printf("LibMach::WriteLibToBuffer()\n"); 340 } 341 __gshared char* pad = [0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A]; 342 /************* Scan Object Modules for Symbols ******************/ 343 for (size_t i = 0; i < objmodules.length; i++) 344 { 345 MachObjModule* om = objmodules[i]; 346 if (om.scan) 347 { 348 scanObjModule(om); 349 } 350 } 351 /************* Determine module offsets ******************/ 352 uint moffset = 8 + MachLibHeader.sizeof + 4 + 4; 353 for (size_t i = 0; i < objsymbols.length; i++) 354 { 355 MachObjSymbol* os = objsymbols[i]; 356 moffset += 8 + os.name.length + 1; 357 } 358 moffset = (moffset + 3) & ~3; 359 //if (moffset & 4) 360 // moffset += 4; 361 uint hoffset = moffset; 362 static if (LOG) 363 { 364 printf("\tmoffset = x%x\n", moffset); 365 } 366 for (size_t i = 0; i < objmodules.length; i++) 367 { 368 MachObjModule* om = objmodules[i]; 369 moffset += moffset & 1; 370 om.offset = moffset; 371 if (om.scan) 372 { 373 const slen = om.name.length; 374 int nzeros = 8 - ((slen + 4) & 7); 375 if (nzeros < 4) 376 nzeros += 8; // emulate mysterious behavior of ar 377 int filesize = om.length; 378 filesize = (filesize + 7) & ~7; 379 moffset += MachLibHeader.sizeof + slen + nzeros + filesize; 380 } 381 else 382 { 383 moffset += om.length; 384 } 385 } 386 libbuf.reserve(moffset); 387 /************* Write the library ******************/ 388 libbuf.write("!<arch>\n"); 389 MachObjModule om; 390 om.base = null; 391 om.length = cast(uint)(hoffset - (8 + MachLibHeader.sizeof)); 392 om.offset = 8; 393 om.name = ""; 394 .time(&om.file_time); 395 version (Posix) 396 { 397 om.user_id = getuid(); 398 om.group_id = getgid(); 399 } 400 version (Windows) 401 { 402 om.user_id = 0; 403 om.group_id = 0; 404 } 405 om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644 406 MachLibHeader h; 407 MachOmToHeader(&h, &om); 408 memcpy(h.object_name.ptr, "__.SYMDEF".ptr, 9); 409 int len = snprintf(h.file_size.ptr, MACH_FILE_SIZE_SIZE, "%u", om.length); 410 assert(len <= 10); 411 memset(h.file_size.ptr + len, ' ', 10 - len); 412 libbuf.write((&h)[0 .. 1]); 413 char[4] buf; 414 Port.writelongLE(cast(uint)(objsymbols.length * 8), buf.ptr); 415 libbuf.write(buf[0 .. 4]); 416 int stringoff = 0; 417 for (size_t i = 0; i < objsymbols.length; i++) 418 { 419 MachObjSymbol* os = objsymbols[i]; 420 Port.writelongLE(stringoff, buf.ptr); 421 libbuf.write(buf[0 .. 4]); 422 Port.writelongLE(os.om.offset, buf.ptr); 423 libbuf.write(buf[0 .. 4]); 424 stringoff += os.name.length + 1; 425 } 426 Port.writelongLE(stringoff, buf.ptr); 427 libbuf.write(buf[0 .. 4]); 428 for (size_t i = 0; i < objsymbols.length; i++) 429 { 430 MachObjSymbol* os = objsymbols[i]; 431 libbuf.writestring(os.name); 432 libbuf.writeByte(0); 433 } 434 while (libbuf.length & 3) 435 libbuf.writeByte(0); 436 //if (libbuf.length & 4) 437 // libbuf.write(pad[0 .. 4]); 438 static if (LOG) 439 { 440 printf("\tlibbuf.moffset = x%x\n", libbuf.length); 441 } 442 assert(libbuf.length == hoffset); 443 /* Write out each of the object modules 444 */ 445 for (size_t i = 0; i < objmodules.length; i++) 446 { 447 MachObjModule* om2 = objmodules[i]; 448 if (libbuf.length & 1) 449 libbuf.writeByte('\n'); // module alignment 450 assert(libbuf.length == om2.offset); 451 if (om2.scan) 452 { 453 MachOmToHeader(&h, om2); 454 libbuf.write((&h)[0 .. 1]); // module header 455 libbuf.write(om2.name.ptr[0 .. om2.name.length]); 456 int nzeros = 8 - ((om2.name.length + 4) & 7); 457 if (nzeros < 4) 458 nzeros += 8; // emulate mysterious behavior of ar 459 libbuf.fill0(nzeros); 460 libbuf.write(om2.base[0 .. om2.length]); // module contents 461 // obj modules are padded out to 8 bytes in length with 0x0A 462 int filealign = om2.length & 7; 463 if (filealign) 464 { 465 libbuf.write(pad[0 .. 8 - filealign]); 466 } 467 } 468 else 469 { 470 libbuf.write(om2.base[0 .. om2.length]); // module contents 471 } 472 } 473 static if (LOG) 474 { 475 printf("moffset = x%x, libbuf.length = x%x\n", moffset, libbuf.length); 476 } 477 assert(libbuf.length == moffset); 478 } 479 } 480 481 /*****************************************************************************/ 482 /*****************************************************************************/ 483 struct MachObjModule 484 { 485 ubyte* base; // where are we holding it in memory 486 uint length; // in bytes 487 uint offset; // offset from start of library 488 const(char)[] name; // module name (file name) with terminating 0 489 c_long file_time; // file time 490 uint user_id; 491 uint group_id; 492 uint file_mode; 493 int scan; // 1 means scan for symbols 494 } 495 496 enum MACH_OBJECT_NAME_SIZE = 16; 497 enum MACH_FILE_TIME_SIZE = 12; 498 enum MACH_USER_ID_SIZE = 6; 499 enum MACH_GROUP_ID_SIZE = 6; 500 enum MACH_FILE_MODE_SIZE = 8; 501 enum MACH_FILE_SIZE_SIZE = 10; 502 enum MACH_TRAILER_SIZE = 2; 503 504 struct MachLibHeader 505 { 506 char[MACH_OBJECT_NAME_SIZE] object_name; 507 char[MACH_FILE_TIME_SIZE] file_time; 508 char[MACH_USER_ID_SIZE] user_id; 509 char[MACH_GROUP_ID_SIZE] group_id; 510 char[MACH_FILE_MODE_SIZE] file_mode; // in octal 511 char[MACH_FILE_SIZE_SIZE] file_size; 512 char[MACH_TRAILER_SIZE] trailer; 513 } 514 515 extern (C++) void MachOmToHeader(MachLibHeader* h, MachObjModule* om) 516 { 517 const slen = om.name.length; 518 int nzeros = 8 - ((slen + 4) & 7); 519 if (nzeros < 4) 520 nzeros += 8; // emulate mysterious behavior of ar 521 size_t len = snprintf(h.object_name.ptr, MACH_OBJECT_NAME_SIZE, "#1/%lld", cast(long)(slen + nzeros)); 522 memset(h.object_name.ptr + len, ' ', MACH_OBJECT_NAME_SIZE - len); 523 len = snprintf(h.file_time.ptr, MACH_FILE_TIME_SIZE, "%llu", cast(long)om.file_time); 524 assert(len <= 12); 525 memset(h.file_time.ptr + len, ' ', 12 - len); 526 if (om.user_id > 999_999) // yes, it happens 527 om.user_id = 0; // don't really know what to do here 528 len = snprintf(h.user_id.ptr, MACH_USER_ID_SIZE, "%u", om.user_id); 529 assert(len <= 6); 530 memset(h.user_id.ptr + len, ' ', 6 - len); 531 if (om.group_id > 999_999) // yes, it happens 532 om.group_id = 0; // don't really know what to do here 533 len = snprintf(h.group_id.ptr, MACH_GROUP_ID_SIZE, "%u", om.group_id); 534 assert(len <= 6); 535 memset(h.group_id.ptr + len, ' ', 6 - len); 536 len = snprintf(h.file_mode.ptr, MACH_FILE_MODE_SIZE, "%o", om.file_mode); 537 assert(len <= 8); 538 memset(h.file_mode.ptr + len, ' ', 8 - len); 539 int filesize = om.length; 540 filesize = (filesize + 7) & ~7; 541 len = snprintf(h.file_size.ptr, MACH_FILE_SIZE_SIZE, "%llu", cast(ulong)(slen + nzeros + filesize)); 542 assert(len <= 10); 543 memset(h.file_size.ptr + len, ' ', 10 - len); 544 h.trailer[0] = '`'; 545 h.trailer[1] = '\n'; 546 }