1 /** 2 * A library in the Mach-O format, used on macOS. 3 * 4 * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved 5 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 6 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/libmach.d, _libmach.d) 8 * Documentation: https://dlang.org/phobos/dmd_libmach.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/libmach.d 10 */ 11 12 module dmd.libmach; 13 14 import core.stdc.time; 15 import core.stdc.string; 16 import core.stdc.stdlib; 17 import core.stdc.stdio; 18 import core.stdc.config; 19 20 version (Posix) 21 { 22 import core.sys.posix.sys.stat; 23 import core.sys.posix.unistd; 24 } 25 version (Windows) 26 { 27 import core.sys.windows.stat; 28 } 29 30 import dmd.lib; 31 import dmd.location; 32 import dmd.utils; 33 34 import dmd.root.array; 35 import dmd.root.filename; 36 import dmd.common.outbuffer; 37 import dmd.root.port; 38 import dmd.root.rmem; 39 import dmd.root.string; 40 import dmd.root.stringtable; 41 42 import dmd.scanmach; 43 44 // Entry point (only public symbol in this module). 45 public extern (C++) Library LibMach_factory() 46 { 47 return new LibMach(); 48 } 49 50 private: // for the remainder of this module 51 52 enum LOG = false; 53 54 struct MachObjSymbol 55 { 56 const(char)[] name; // still has a terminating 0 57 MachObjModule* om; 58 } 59 60 alias MachObjModules = Array!(MachObjModule*); 61 alias MachObjSymbols = Array!(MachObjSymbol*); 62 63 final class LibMach : Library 64 { 65 MachObjModules objmodules; // MachObjModule[] 66 MachObjSymbols objsymbols; // MachObjSymbol[] 67 StringTable!(MachObjSymbol*) tab; 68 69 extern (D) this() 70 { 71 tab._init(14_000); 72 } 73 74 /*************************************** 75 * Add object module or library to the library. 76 * Examine the buffer to see which it is. 77 * If the buffer is NULL, use module_name as the file name 78 * and load the file. 79 */ 80 override void addObject(const(char)[] module_name, const ubyte[] buffer) 81 { 82 static if (LOG) 83 { 84 printf("LibMach::addObject(%.*s)\n", 85 cast(int)module_name.length, module_name.ptr); 86 } 87 88 void corrupt(int reason) 89 { 90 eSink.error(loc, "corrupt Mach object module %.*s %d", 91 cast(int)module_name.length, module_name.ptr, reason); 92 } 93 94 int fromfile = 0; 95 auto buf = buffer.ptr; 96 auto buflen = buffer.length; 97 if (!buf) 98 { 99 assert(module_name[0]); 100 // read file and take buffer ownership 101 auto data = readFile(Loc.initial, module_name).extractSlice(); 102 buf = data.ptr; 103 buflen = data.length; 104 fromfile = 1; 105 } 106 if (buflen < 16) 107 { 108 static if (LOG) 109 { 110 printf("buf = %p, buflen = %d\n", buf, buflen); 111 } 112 return corrupt(__LINE__); 113 } 114 if (memcmp(buf, "!<arch>\n".ptr, 8) == 0) 115 { 116 /* Library file. 117 * Pull each object module out of the library and add it 118 * to the object module array. 119 */ 120 static if (LOG) 121 { 122 printf("archive, buf = %p, buflen = %d\n", buf, buflen); 123 } 124 uint offset = 8; 125 char* symtab = null; 126 uint symtab_size = 0; 127 uint mstart = cast(uint)objmodules.length; 128 while (offset < buflen) 129 { 130 if (offset + MachLibHeader.sizeof >= buflen) 131 return corrupt(__LINE__); 132 MachLibHeader* header = cast(MachLibHeader*)(cast(ubyte*)buf + offset); 133 offset += MachLibHeader.sizeof; 134 char* endptr = null; 135 uint size = cast(uint)strtoul(header.file_size.ptr, &endptr, 10); 136 if (endptr >= header.file_size.ptr + 10 || *endptr != ' ') 137 return corrupt(__LINE__); 138 if (offset + size > buflen) 139 return corrupt(__LINE__); 140 if (memcmp(header.object_name.ptr, "__.SYMDEF ".ptr, 16) == 0 || 141 memcmp(header.object_name.ptr, "__.SYMDEF SORTED".ptr, 16) == 0) 142 { 143 /* Instead of rescanning the object modules we pull from a 144 * library, just use the already created symbol table. 145 */ 146 if (symtab) 147 return corrupt(__LINE__); 148 symtab = cast(char*)buf + offset; 149 symtab_size = size; 150 if (size < 4) 151 return corrupt(__LINE__); 152 } 153 else 154 { 155 auto om = new MachObjModule(); 156 om.base = cast(ubyte*)buf + offset - MachLibHeader.sizeof; 157 om.length = cast(uint)(size + MachLibHeader.sizeof); 158 om.offset = 0; 159 const n = cast(const(char)*)(om.base + MachLibHeader.sizeof); 160 om.name = n.toDString(); 161 om.file_time = cast(uint)strtoul(header.file_time.ptr, &endptr, 10); 162 om.user_id = cast(uint)strtoul(header.user_id.ptr, &endptr, 10); 163 om.group_id = cast(uint)strtoul(header.group_id.ptr, &endptr, 10); 164 om.file_mode = cast(uint)strtoul(header.file_mode.ptr, &endptr, 8); 165 om.scan = 0; // don't scan object module for symbols 166 objmodules.push(om); 167 } 168 offset += (size + 1) & ~1; 169 } 170 if (offset != buflen) 171 return corrupt(__LINE__); 172 /* Scan the library's symbol table, and insert it into our own. 173 * We use this instead of rescanning the object module, because 174 * the library's creator may have a different idea of what symbols 175 * go into the symbol table than we do. 176 * This is also probably faster. 177 */ 178 uint nsymbols = Port.readlongLE(symtab) / 8; 179 char* s = symtab + 4 + nsymbols * 8 + 4; 180 if (4 + nsymbols * 8 + 4 > symtab_size) 181 return corrupt(__LINE__); 182 for (uint i = 0; i < nsymbols; i++) 183 { 184 uint soff = Port.readlongLE(symtab + 4 + i * 8); 185 const(char)* name = s + soff; 186 size_t namelen = strlen(name); 187 //printf("soff = x%x name = %s\n", soff, name); 188 if (s + namelen + 1 - symtab > symtab_size) 189 return corrupt(__LINE__); 190 uint moff = Port.readlongLE(symtab + 4 + i * 8 + 4); 191 //printf("symtab[%d] moff = x%x x%x, name = %s\n", i, moff, moff + MachLibHeader.sizeof, name); 192 for (uint m = mstart; 1; m++) 193 { 194 if (m == objmodules.length) 195 return corrupt(__LINE__); // didn't find it 196 MachObjModule* om = objmodules[m]; 197 //printf("\tom offset = x%x\n", cast(char *)om.base - cast(char *)buf); 198 if (moff == cast(char*)om.base - cast(char*)buf) 199 { 200 addSymbol(om, name[0 .. namelen], 1); 201 //if (mstart == m) 202 // mstart++; 203 break; 204 } 205 } 206 } 207 return; 208 } 209 /* It's an object module 210 */ 211 auto om = new MachObjModule(); 212 om.base = cast(ubyte*)buf; 213 om.length = cast(uint)buflen; 214 om.offset = 0; 215 const n = FileName.name(module_name); // remove path, but not extension 216 om.name = n; 217 om.scan = 1; 218 if (fromfile) 219 { 220 version (Posix) 221 stat_t statbuf; 222 version (Windows) 223 struct_stat statbuf; 224 int i = module_name.toCStringThen!(slice => stat(slice.ptr, &statbuf)); 225 if (i == -1) // error, errno is set 226 return corrupt(__LINE__); 227 om.file_time = statbuf.st_ctime; 228 om.user_id = statbuf.st_uid; 229 om.group_id = statbuf.st_gid; 230 om.file_mode = statbuf.st_mode; 231 } 232 else 233 { 234 /* Mock things up for the object module file that never was 235 * actually written out. 236 */ 237 version (Posix) 238 { 239 __gshared uid_t uid; 240 __gshared gid_t gid; 241 __gshared int _init; 242 if (!_init) 243 { 244 _init = 1; 245 uid = getuid(); 246 gid = getgid(); 247 } 248 om.user_id = uid; 249 om.group_id = gid; 250 } 251 version (Windows) 252 { 253 om.user_id = 0; // meaningless on Windows 254 om.group_id = 0; // meaningless on Windows 255 } 256 time(&om.file_time); 257 om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644 258 } 259 objmodules.push(om); 260 } 261 262 /*****************************************************************************/ 263 264 void addSymbol(MachObjModule* om, const(char)[] name, int pickAny = 0) 265 { 266 static if (LOG) 267 { 268 printf("LibMach::addSymbol(%s, %s, %d)\n", om.name.ptr, name.ptr, pickAny); 269 } 270 version (none) 271 { 272 // let linker sort out duplicates 273 StringValue* s = tab.insert(name.ptr, name.length, null); 274 if (!s) 275 { 276 // already in table 277 if (!pickAny) 278 { 279 s = tab.lookup(name.ptr, name.length); 280 assert(s); 281 MachObjSymbol* os = cast(MachObjSymbol*)s.ptrvalue; 282 eSink.error(loc, "multiple definition of %s: %s and %s: %s", om.name.ptr, name.ptr, os.om.name.ptr, os.name.ptr); 283 } 284 } 285 else 286 { 287 auto os = new MachObjSymbol(); 288 os.name = xarraydup(name); 289 os.om = om; 290 s.ptrvalue = cast(void*)os; 291 objsymbols.push(os); 292 } 293 } 294 else 295 { 296 auto os = new MachObjSymbol(); 297 os.name = xarraydup(name); 298 os.om = om; 299 objsymbols.push(os); 300 } 301 } 302 303 private: 304 /************************************ 305 * Scan single object module for dictionary symbols. 306 * Send those symbols to LibMach::addSymbol(). 307 */ 308 void scanObjModule(MachObjModule* om) 309 { 310 static if (LOG) 311 { 312 printf("LibMach::scanObjModule(%s)\n", om.name.ptr); 313 } 314 315 extern (D) void addSymbol(const(char)[] name, int pickAny) 316 { 317 this.addSymbol(om, name, pickAny); 318 } 319 320 scanMachObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc, eSink); 321 } 322 323 /*****************************************************************************/ 324 /*****************************************************************************/ 325 /********************************************** 326 * Create and write library to libbuf. 327 * The library consists of: 328 * !<arch>\n 329 * header 330 * dictionary 331 * object modules... 332 */ 333 protected override void writeLibToBuffer(ref OutBuffer libbuf) 334 { 335 static if (LOG) 336 { 337 printf("LibMach::WriteLibToBuffer()\n"); 338 } 339 __gshared char* pad = [0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A]; 340 /************* Scan Object Modules for Symbols ******************/ 341 for (size_t i = 0; i < objmodules.length; i++) 342 { 343 MachObjModule* om = objmodules[i]; 344 if (om.scan) 345 { 346 scanObjModule(om); 347 } 348 } 349 /************* Determine module offsets ******************/ 350 uint moffset = 8 + MachLibHeader.sizeof + 4 + 4; 351 for (size_t i = 0; i < objsymbols.length; i++) 352 { 353 MachObjSymbol* os = objsymbols[i]; 354 moffset += 8 + os.name.length + 1; 355 } 356 moffset = (moffset + 3) & ~3; 357 //if (moffset & 4) 358 // moffset += 4; 359 uint hoffset = moffset; 360 static if (LOG) 361 { 362 printf("\tmoffset = x%x\n", moffset); 363 } 364 for (size_t i = 0; i < objmodules.length; i++) 365 { 366 MachObjModule* om = objmodules[i]; 367 moffset += moffset & 1; 368 om.offset = moffset; 369 if (om.scan) 370 { 371 const slen = om.name.length; 372 int nzeros = 8 - ((slen + 4) & 7); 373 if (nzeros < 4) 374 nzeros += 8; // emulate mysterious behavior of ar 375 int filesize = om.length; 376 filesize = (filesize + 7) & ~7; 377 moffset += MachLibHeader.sizeof + slen + nzeros + filesize; 378 } 379 else 380 { 381 moffset += om.length; 382 } 383 } 384 libbuf.reserve(moffset); 385 /************* Write the library ******************/ 386 libbuf.write("!<arch>\n"); 387 MachObjModule om; 388 om.base = null; 389 om.length = cast(uint)(hoffset - (8 + MachLibHeader.sizeof)); 390 om.offset = 8; 391 om.name = ""; 392 .time(&om.file_time); 393 version (Posix) 394 { 395 om.user_id = getuid(); 396 om.group_id = getgid(); 397 } 398 version (Windows) 399 { 400 om.user_id = 0; 401 om.group_id = 0; 402 } 403 om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644 404 MachLibHeader h; 405 MachOmToHeader(&h, &om); 406 memcpy(h.object_name.ptr, "__.SYMDEF".ptr, 9); 407 int len = snprintf(h.file_size.ptr, MACH_FILE_SIZE_SIZE, "%u", om.length); 408 assert(len <= 10); 409 memset(h.file_size.ptr + len, ' ', 10 - len); 410 libbuf.write((&h)[0 .. 1]); 411 char[4] buf; 412 Port.writelongLE(cast(uint)(objsymbols.length * 8), buf.ptr); 413 libbuf.write(buf[0 .. 4]); 414 int stringoff = 0; 415 for (size_t i = 0; i < objsymbols.length; i++) 416 { 417 MachObjSymbol* os = objsymbols[i]; 418 Port.writelongLE(stringoff, buf.ptr); 419 libbuf.write(buf[0 .. 4]); 420 Port.writelongLE(os.om.offset, buf.ptr); 421 libbuf.write(buf[0 .. 4]); 422 stringoff += os.name.length + 1; 423 } 424 Port.writelongLE(stringoff, buf.ptr); 425 libbuf.write(buf[0 .. 4]); 426 for (size_t i = 0; i < objsymbols.length; i++) 427 { 428 MachObjSymbol* os = objsymbols[i]; 429 libbuf.writestring(os.name); 430 libbuf.writeByte(0); 431 } 432 while (libbuf.length & 3) 433 libbuf.writeByte(0); 434 //if (libbuf.length & 4) 435 // libbuf.write(pad[0 .. 4]); 436 static if (LOG) 437 { 438 printf("\tlibbuf.moffset = x%x\n", libbuf.length); 439 } 440 assert(libbuf.length == hoffset); 441 /* Write out each of the object modules 442 */ 443 for (size_t i = 0; i < objmodules.length; i++) 444 { 445 MachObjModule* om2 = objmodules[i]; 446 if (libbuf.length & 1) 447 libbuf.writeByte('\n'); // module alignment 448 assert(libbuf.length == om2.offset); 449 if (om2.scan) 450 { 451 MachOmToHeader(&h, om2); 452 libbuf.write((&h)[0 .. 1]); // module header 453 libbuf.write(om2.name.ptr[0 .. om2.name.length]); 454 int nzeros = 8 - ((om2.name.length + 4) & 7); 455 if (nzeros < 4) 456 nzeros += 8; // emulate mysterious behavior of ar 457 libbuf.fill0(nzeros); 458 libbuf.write(om2.base[0 .. om2.length]); // module contents 459 // obj modules are padded out to 8 bytes in length with 0x0A 460 int filealign = om2.length & 7; 461 if (filealign) 462 { 463 libbuf.write(pad[0 .. 8 - filealign]); 464 } 465 } 466 else 467 { 468 libbuf.write(om2.base[0 .. om2.length]); // module contents 469 } 470 } 471 static if (LOG) 472 { 473 printf("moffset = x%x, libbuf.length = x%x\n", moffset, libbuf.length); 474 } 475 assert(libbuf.length == moffset); 476 } 477 } 478 479 /*****************************************************************************/ 480 /*****************************************************************************/ 481 struct MachObjModule 482 { 483 ubyte* base; // where are we holding it in memory 484 uint length; // in bytes 485 uint offset; // offset from start of library 486 const(char)[] name; // module name (file name) with terminating 0 487 c_long file_time; // file time 488 uint user_id; 489 uint group_id; 490 uint file_mode; 491 int scan; // 1 means scan for symbols 492 } 493 494 enum MACH_OBJECT_NAME_SIZE = 16; 495 enum MACH_FILE_TIME_SIZE = 12; 496 enum MACH_USER_ID_SIZE = 6; 497 enum MACH_GROUP_ID_SIZE = 6; 498 enum MACH_FILE_MODE_SIZE = 8; 499 enum MACH_FILE_SIZE_SIZE = 10; 500 enum MACH_TRAILER_SIZE = 2; 501 502 struct MachLibHeader 503 { 504 char[MACH_OBJECT_NAME_SIZE] object_name; 505 char[MACH_FILE_TIME_SIZE] file_time; 506 char[MACH_USER_ID_SIZE] user_id; 507 char[MACH_GROUP_ID_SIZE] group_id; 508 char[MACH_FILE_MODE_SIZE] file_mode; // in octal 509 char[MACH_FILE_SIZE_SIZE] file_size; 510 char[MACH_TRAILER_SIZE] trailer; 511 } 512 513 extern (C++) void MachOmToHeader(MachLibHeader* h, MachObjModule* om) 514 { 515 const slen = om.name.length; 516 int nzeros = 8 - ((slen + 4) & 7); 517 if (nzeros < 4) 518 nzeros += 8; // emulate mysterious behavior of ar 519 size_t len = snprintf(h.object_name.ptr, MACH_OBJECT_NAME_SIZE, "#1/%lld", cast(long)(slen + nzeros)); 520 memset(h.object_name.ptr + len, ' ', MACH_OBJECT_NAME_SIZE - len); 521 len = snprintf(h.file_time.ptr, MACH_FILE_TIME_SIZE, "%llu", cast(long)om.file_time); 522 assert(len <= 12); 523 memset(h.file_time.ptr + len, ' ', 12 - len); 524 if (om.user_id > 999_999) // yes, it happens 525 om.user_id = 0; // don't really know what to do here 526 len = snprintf(h.user_id.ptr, MACH_USER_ID_SIZE, "%u", om.user_id); 527 assert(len <= 6); 528 memset(h.user_id.ptr + len, ' ', 6 - len); 529 if (om.group_id > 999_999) // yes, it happens 530 om.group_id = 0; // don't really know what to do here 531 len = snprintf(h.group_id.ptr, MACH_GROUP_ID_SIZE, "%u", om.group_id); 532 assert(len <= 6); 533 memset(h.group_id.ptr + len, ' ', 6 - len); 534 len = snprintf(h.file_mode.ptr, MACH_FILE_MODE_SIZE, "%o", om.file_mode); 535 assert(len <= 8); 536 memset(h.file_mode.ptr + len, ' ', 8 - len); 537 int filesize = om.length; 538 filesize = (filesize + 7) & ~7; 539 len = snprintf(h.file_size.ptr, MACH_FILE_SIZE_SIZE, "%llu", cast(ulong)(slen + nzeros + filesize)); 540 assert(len <= 10); 541 memset(h.file_size.ptr + len, ' ', 10 - len); 542 h.trailer[0] = '`'; 543 h.trailer[1] = '\n'; 544 }