1 /**
2  * A library in the Mach-O format, used on macOS.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/libmach.d, _libmach.d)
8  * Documentation:  https://dlang.org/phobos/dmd_libmach.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/libmach.d
10  */
11 
12 module dmd.libmach;
13 
14 import core.stdc.time;
15 import core.stdc.string;
16 import core.stdc.stdlib;
17 import core.stdc.stdio;
18 import core.stdc.config;
19 
20 version (Posix)
21 {
22     import core.sys.posix.sys.stat;
23     import core.sys.posix.unistd;
24 }
25 version (Windows)
26 {
27     import core.sys.windows.stat;
28 }
29 
30 import dmd.globals;
31 import dmd.lib;
32 import dmd.location;
33 import dmd.utils;
34 
35 import dmd.root.array;
36 import dmd.root.file;
37 import dmd.root.filename;
38 import dmd.common.outbuffer;
39 import dmd.root.port;
40 import dmd.root.rmem;
41 import dmd.root.string;
42 import dmd.root.stringtable;
43 
44 import dmd.scanmach;
45 
46 // Entry point (only public symbol in this module).
47 public extern (C++) Library LibMach_factory()
48 {
49     return new LibMach();
50 }
51 
52 private: // for the remainder of this module
53 
54 enum LOG = false;
55 
56 struct MachObjSymbol
57 {
58     const(char)[] name;         // still has a terminating 0
59     MachObjModule* om;
60 }
61 
62 alias MachObjModules = Array!(MachObjModule*);
63 alias MachObjSymbols = Array!(MachObjSymbol*);
64 
65 final class LibMach : Library
66 {
67     MachObjModules objmodules; // MachObjModule[]
68     MachObjSymbols objsymbols; // MachObjSymbol[]
69     StringTable!(MachObjSymbol*) tab;
70 
71     extern (D) this()
72     {
73         tab._init(14_000);
74     }
75 
76     /***************************************
77      * Add object module or library to the library.
78      * Examine the buffer to see which it is.
79      * If the buffer is NULL, use module_name as the file name
80      * and load the file.
81      */
82     override void addObject(const(char)[] module_name, const ubyte[] buffer)
83     {
84         static if (LOG)
85         {
86             printf("LibMach::addObject(%.*s)\n",
87                    cast(int)module_name.length, module_name.ptr);
88         }
89 
90         void corrupt(int reason)
91         {
92             error("corrupt Mach object module %.*s %d",
93                   cast(int)module_name.length, module_name.ptr, reason);
94         }
95 
96         int fromfile = 0;
97         auto buf = buffer.ptr;
98         auto buflen = buffer.length;
99         if (!buf)
100         {
101             assert(module_name[0]);
102             // read file and take buffer ownership
103             auto data = readFile(Loc.initial, module_name).extractSlice();
104             buf = data.ptr;
105             buflen = data.length;
106             fromfile = 1;
107         }
108         if (buflen < 16)
109         {
110             static if (LOG)
111             {
112                 printf("buf = %p, buflen = %d\n", buf, buflen);
113             }
114             return corrupt(__LINE__);
115         }
116         if (memcmp(buf, "!<arch>\n".ptr, 8) == 0)
117         {
118             /* Library file.
119              * Pull each object module out of the library and add it
120              * to the object module array.
121              */
122             static if (LOG)
123             {
124                 printf("archive, buf = %p, buflen = %d\n", buf, buflen);
125             }
126             uint offset = 8;
127             char* symtab = null;
128             uint symtab_size = 0;
129             uint mstart = cast(uint)objmodules.length;
130             while (offset < buflen)
131             {
132                 if (offset + MachLibHeader.sizeof >= buflen)
133                     return corrupt(__LINE__);
134                 MachLibHeader* header = cast(MachLibHeader*)(cast(ubyte*)buf + offset);
135                 offset += MachLibHeader.sizeof;
136                 char* endptr = null;
137                 uint size = cast(uint)strtoul(header.file_size.ptr, &endptr, 10);
138                 if (endptr >= header.file_size.ptr + 10 || *endptr != ' ')
139                     return corrupt(__LINE__);
140                 if (offset + size > buflen)
141                     return corrupt(__LINE__);
142                 if (memcmp(header.object_name.ptr, "__.SYMDEF       ".ptr, 16) == 0 ||
143                     memcmp(header.object_name.ptr, "__.SYMDEF SORTED".ptr, 16) == 0)
144                 {
145                     /* Instead of rescanning the object modules we pull from a
146                      * library, just use the already created symbol table.
147                      */
148                     if (symtab)
149                         return corrupt(__LINE__);
150                     symtab = cast(char*)buf + offset;
151                     symtab_size = size;
152                     if (size < 4)
153                         return corrupt(__LINE__);
154                 }
155                 else
156                 {
157                     auto om = new MachObjModule();
158                     om.base = cast(ubyte*)buf + offset - MachLibHeader.sizeof;
159                     om.length = cast(uint)(size + MachLibHeader.sizeof);
160                     om.offset = 0;
161                     const n = cast(const(char)*)(om.base + MachLibHeader.sizeof);
162                     om.name = n.toDString();
163                     om.file_time = cast(uint)strtoul(header.file_time.ptr, &endptr, 10);
164                     om.user_id = cast(uint)strtoul(header.user_id.ptr, &endptr, 10);
165                     om.group_id = cast(uint)strtoul(header.group_id.ptr, &endptr, 10);
166                     om.file_mode = cast(uint)strtoul(header.file_mode.ptr, &endptr, 8);
167                     om.scan = 0; // don't scan object module for symbols
168                     objmodules.push(om);
169                 }
170                 offset += (size + 1) & ~1;
171             }
172             if (offset != buflen)
173                 return corrupt(__LINE__);
174             /* Scan the library's symbol table, and insert it into our own.
175              * We use this instead of rescanning the object module, because
176              * the library's creator may have a different idea of what symbols
177              * go into the symbol table than we do.
178              * This is also probably faster.
179              */
180             uint nsymbols = Port.readlongLE(symtab) / 8;
181             char* s = symtab + 4 + nsymbols * 8 + 4;
182             if (4 + nsymbols * 8 + 4 > symtab_size)
183                 return corrupt(__LINE__);
184             for (uint i = 0; i < nsymbols; i++)
185             {
186                 uint soff = Port.readlongLE(symtab + 4 + i * 8);
187                 const(char)* name = s + soff;
188                 size_t namelen = strlen(name);
189                 //printf("soff = x%x name = %s\n", soff, name);
190                 if (s + namelen + 1 - symtab > symtab_size)
191                     return corrupt(__LINE__);
192                 uint moff = Port.readlongLE(symtab + 4 + i * 8 + 4);
193                 //printf("symtab[%d] moff = x%x  x%x, name = %s\n", i, moff, moff + MachLibHeader.sizeof, name);
194                 for (uint m = mstart; 1; m++)
195                 {
196                     if (m == objmodules.length)
197                         return corrupt(__LINE__);       // didn't find it
198                     MachObjModule* om = objmodules[m];
199                     //printf("\tom offset = x%x\n", cast(char *)om.base - cast(char *)buf);
200                     if (moff == cast(char*)om.base - cast(char*)buf)
201                     {
202                         addSymbol(om, name[0 .. namelen], 1);
203                         //if (mstart == m)
204                         //    mstart++;
205                         break;
206                     }
207                 }
208             }
209             return;
210         }
211         /* It's an object module
212          */
213         auto om = new MachObjModule();
214         om.base = cast(ubyte*)buf;
215         om.length = cast(uint)buflen;
216         om.offset = 0;
217         const n = FileName.name(module_name); // remove path, but not extension
218         om.name = n;
219         om.scan = 1;
220         if (fromfile)
221         {
222             version (Posix)
223                 stat_t statbuf;
224             version (Windows)
225                 struct_stat statbuf;
226             int i = module_name.toCStringThen!(slice => stat(slice.ptr, &statbuf));
227             if (i == -1) // error, errno is set
228                 return corrupt(__LINE__);
229             om.file_time = statbuf.st_ctime;
230             om.user_id = statbuf.st_uid;
231             om.group_id = statbuf.st_gid;
232             om.file_mode = statbuf.st_mode;
233         }
234         else
235         {
236             /* Mock things up for the object module file that never was
237              * actually written out.
238              */
239             version (Posix)
240             {
241                 __gshared uid_t uid;
242                 __gshared gid_t gid;
243                 __gshared int _init;
244                 if (!_init)
245                 {
246                     _init = 1;
247                     uid = getuid();
248                     gid = getgid();
249                 }
250                 om.user_id = uid;
251                 om.group_id = gid;
252             }
253             version (Windows)
254             {
255                 om.user_id = 0; // meaningless on Windows
256                 om.group_id = 0;        // meaningless on Windows
257             }
258             time(&om.file_time);
259             om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
260         }
261         objmodules.push(om);
262     }
263 
264     /*****************************************************************************/
265 
266     void addSymbol(MachObjModule* om, const(char)[] name, int pickAny = 0)
267     {
268         static if (LOG)
269         {
270             printf("LibMach::addSymbol(%s, %s, %d)\n", om.name.ptr, name.ptr, pickAny);
271         }
272         version (none)
273         {
274             // let linker sort out duplicates
275             StringValue* s = tab.insert(name.ptr, name.length, null);
276             if (!s)
277             {
278                 // already in table
279                 if (!pickAny)
280                 {
281                     s = tab.lookup(name.ptr, name.length);
282                     assert(s);
283                     MachObjSymbol* os = cast(MachObjSymbol*)s.ptrvalue;
284                     error("multiple definition of %s: %s and %s: %s", om.name.ptr, name.ptr, os.om.name.ptr, os.name.ptr);
285                 }
286             }
287             else
288             {
289                 auto os = new MachObjSymbol();
290                 os.name = xarraydup(name);
291                 os.om = om;
292                 s.ptrvalue = cast(void*)os;
293                 objsymbols.push(os);
294             }
295         }
296         else
297         {
298             auto os = new MachObjSymbol();
299             os.name = xarraydup(name);
300             os.om = om;
301             objsymbols.push(os);
302         }
303     }
304 
305 private:
306     /************************************
307      * Scan single object module for dictionary symbols.
308      * Send those symbols to LibMach::addSymbol().
309      */
310     void scanObjModule(MachObjModule* om)
311     {
312         static if (LOG)
313         {
314             printf("LibMach::scanObjModule(%s)\n", om.name.ptr);
315         }
316 
317         extern (D) void addSymbol(const(char)[] name, int pickAny)
318         {
319             this.addSymbol(om, name, pickAny);
320         }
321 
322         scanMachObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc);
323     }
324 
325     /*****************************************************************************/
326     /*****************************************************************************/
327     /**********************************************
328      * Create and write library to libbuf.
329      * The library consists of:
330      *      !<arch>\n
331      *      header
332      *      dictionary
333      *      object modules...
334      */
335     protected override void WriteLibToBuffer(OutBuffer* libbuf)
336     {
337         static if (LOG)
338         {
339             printf("LibMach::WriteLibToBuffer()\n");
340         }
341         __gshared char* pad = [0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A];
342         /************* Scan Object Modules for Symbols ******************/
343         for (size_t i = 0; i < objmodules.length; i++)
344         {
345             MachObjModule* om = objmodules[i];
346             if (om.scan)
347             {
348                 scanObjModule(om);
349             }
350         }
351         /************* Determine module offsets ******************/
352         uint moffset = 8 + MachLibHeader.sizeof + 4 + 4;
353         for (size_t i = 0; i < objsymbols.length; i++)
354         {
355             MachObjSymbol* os = objsymbols[i];
356             moffset += 8 + os.name.length + 1;
357         }
358         moffset = (moffset + 3) & ~3;
359         //if (moffset & 4)
360         //    moffset += 4;
361         uint hoffset = moffset;
362         static if (LOG)
363         {
364             printf("\tmoffset = x%x\n", moffset);
365         }
366         for (size_t i = 0; i < objmodules.length; i++)
367         {
368             MachObjModule* om = objmodules[i];
369             moffset += moffset & 1;
370             om.offset = moffset;
371             if (om.scan)
372             {
373                 const slen = om.name.length;
374                 int nzeros = 8 - ((slen + 4) & 7);
375                 if (nzeros < 4)
376                     nzeros += 8; // emulate mysterious behavior of ar
377                 int filesize = om.length;
378                 filesize = (filesize + 7) & ~7;
379                 moffset += MachLibHeader.sizeof + slen + nzeros + filesize;
380             }
381             else
382             {
383                 moffset += om.length;
384             }
385         }
386         libbuf.reserve(moffset);
387         /************* Write the library ******************/
388         libbuf.write("!<arch>\n");
389         MachObjModule om;
390         om.base = null;
391         om.length = cast(uint)(hoffset - (8 + MachLibHeader.sizeof));
392         om.offset = 8;
393         om.name = "";
394         .time(&om.file_time);
395         version (Posix)
396         {
397             om.user_id = getuid();
398             om.group_id = getgid();
399         }
400         version (Windows)
401         {
402             om.user_id = 0;
403             om.group_id = 0;
404         }
405         om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
406         MachLibHeader h;
407         MachOmToHeader(&h, &om);
408         memcpy(h.object_name.ptr, "__.SYMDEF".ptr, 9);
409         int len = snprintf(h.file_size.ptr, MACH_FILE_SIZE_SIZE, "%u", om.length);
410         assert(len <= 10);
411         memset(h.file_size.ptr + len, ' ', 10 - len);
412         libbuf.write((&h)[0 .. 1]);
413         char[4] buf;
414         Port.writelongLE(cast(uint)(objsymbols.length * 8), buf.ptr);
415         libbuf.write(buf[0 .. 4]);
416         int stringoff = 0;
417         for (size_t i = 0; i < objsymbols.length; i++)
418         {
419             MachObjSymbol* os = objsymbols[i];
420             Port.writelongLE(stringoff, buf.ptr);
421             libbuf.write(buf[0 .. 4]);
422             Port.writelongLE(os.om.offset, buf.ptr);
423             libbuf.write(buf[0 .. 4]);
424             stringoff += os.name.length + 1;
425         }
426         Port.writelongLE(stringoff, buf.ptr);
427         libbuf.write(buf[0 .. 4]);
428         for (size_t i = 0; i < objsymbols.length; i++)
429         {
430             MachObjSymbol* os = objsymbols[i];
431             libbuf.writestring(os.name);
432             libbuf.writeByte(0);
433         }
434         while (libbuf.length & 3)
435             libbuf.writeByte(0);
436         //if (libbuf.length & 4)
437         //    libbuf.write(pad[0 .. 4]);
438         static if (LOG)
439         {
440             printf("\tlibbuf.moffset = x%x\n", libbuf.length);
441         }
442         assert(libbuf.length == hoffset);
443         /* Write out each of the object modules
444          */
445         for (size_t i = 0; i < objmodules.length; i++)
446         {
447             MachObjModule* om2 = objmodules[i];
448             if (libbuf.length & 1)
449                 libbuf.writeByte('\n'); // module alignment
450             assert(libbuf.length == om2.offset);
451             if (om2.scan)
452             {
453                 MachOmToHeader(&h, om2);
454                 libbuf.write((&h)[0 .. 1]); // module header
455                 libbuf.write(om2.name.ptr[0 .. om2.name.length]);
456                 int nzeros = 8 - ((om2.name.length + 4) & 7);
457                 if (nzeros < 4)
458                     nzeros += 8; // emulate mysterious behavior of ar
459                 libbuf.fill0(nzeros);
460                 libbuf.write(om2.base[0 .. om2.length]); // module contents
461                 // obj modules are padded out to 8 bytes in length with 0x0A
462                 int filealign = om2.length & 7;
463                 if (filealign)
464                 {
465                     libbuf.write(pad[0 .. 8 - filealign]);
466                 }
467             }
468             else
469             {
470                 libbuf.write(om2.base[0 .. om2.length]); // module contents
471             }
472         }
473         static if (LOG)
474         {
475             printf("moffset = x%x, libbuf.length = x%x\n", moffset, libbuf.length);
476         }
477         assert(libbuf.length == moffset);
478     }
479 }
480 
481 /*****************************************************************************/
482 /*****************************************************************************/
483 struct MachObjModule
484 {
485     ubyte* base; // where are we holding it in memory
486     uint length; // in bytes
487     uint offset; // offset from start of library
488     const(char)[] name; // module name (file name) with terminating 0
489     c_long file_time; // file time
490     uint user_id;
491     uint group_id;
492     uint file_mode;
493     int scan; // 1 means scan for symbols
494 }
495 
496 enum MACH_OBJECT_NAME_SIZE = 16;
497 enum MACH_FILE_TIME_SIZE = 12;
498 enum MACH_USER_ID_SIZE = 6;
499 enum MACH_GROUP_ID_SIZE = 6;
500 enum MACH_FILE_MODE_SIZE = 8;
501 enum MACH_FILE_SIZE_SIZE = 10;
502 enum MACH_TRAILER_SIZE = 2;
503 
504 struct MachLibHeader
505 {
506     char[MACH_OBJECT_NAME_SIZE] object_name;
507     char[MACH_FILE_TIME_SIZE] file_time;
508     char[MACH_USER_ID_SIZE] user_id;
509     char[MACH_GROUP_ID_SIZE] group_id;
510     char[MACH_FILE_MODE_SIZE] file_mode; // in octal
511     char[MACH_FILE_SIZE_SIZE] file_size;
512     char[MACH_TRAILER_SIZE] trailer;
513 }
514 
515 extern (C++) void MachOmToHeader(MachLibHeader* h, MachObjModule* om)
516 {
517     const slen = om.name.length;
518     int nzeros = 8 - ((slen + 4) & 7);
519     if (nzeros < 4)
520         nzeros += 8; // emulate mysterious behavior of ar
521     size_t len = snprintf(h.object_name.ptr, MACH_OBJECT_NAME_SIZE, "#1/%lld", cast(long)(slen + nzeros));
522     memset(h.object_name.ptr + len, ' ', MACH_OBJECT_NAME_SIZE - len);
523     len = snprintf(h.file_time.ptr, MACH_FILE_TIME_SIZE, "%llu", cast(long)om.file_time);
524     assert(len <= 12);
525     memset(h.file_time.ptr + len, ' ', 12 - len);
526     if (om.user_id > 999_999) // yes, it happens
527         om.user_id = 0; // don't really know what to do here
528     len = snprintf(h.user_id.ptr, MACH_USER_ID_SIZE, "%u", om.user_id);
529     assert(len <= 6);
530     memset(h.user_id.ptr + len, ' ', 6 - len);
531     if (om.group_id > 999_999) // yes, it happens
532         om.group_id = 0; // don't really know what to do here
533     len = snprintf(h.group_id.ptr, MACH_GROUP_ID_SIZE, "%u", om.group_id);
534     assert(len <= 6);
535     memset(h.group_id.ptr + len, ' ', 6 - len);
536     len = snprintf(h.file_mode.ptr, MACH_FILE_MODE_SIZE, "%o", om.file_mode);
537     assert(len <= 8);
538     memset(h.file_mode.ptr + len, ' ', 8 - len);
539     int filesize = om.length;
540     filesize = (filesize + 7) & ~7;
541     len = snprintf(h.file_size.ptr, MACH_FILE_SIZE_SIZE, "%llu", cast(ulong)(slen + nzeros + filesize));
542     assert(len <= 10);
543     memset(h.file_size.ptr + len, ' ', 10 - len);
544     h.trailer[0] = '`';
545     h.trailer[1] = '\n';
546 }