1 /**
2  * Extract symbols from a Mach-O object file.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanmach.d, _scanmach.d)
8  * Documentation:  https://dlang.org/phobos/dmd_scanmach.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanmach.d
10  */
11 
12 module dmd.scanmach;
13 
14 import core.stdc.string;
15 import core.stdc.stdint;
16 import dmd.globals;
17 import dmd.errors;
18 import dmd.location;
19 
20 //import core.sys.darwin.mach.loader;
21 import dmd.backend.mach;
22 
23 private enum LOG = false;
24 
25 /*****************************************
26  * Reads an object module from base[] and passes the names
27  * of any exported symbols to (*pAddSymbol)().
28  * Params:
29  *      pAddSymbol =  function to pass the names to
30  *      base =        array of contents of object module
31  *      module_name = name of the object module (used for error messages)
32  *      loc =         location to use for error printing
33  */
34 void scanMachObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol,
35         const(ubyte)[] base, const(char)* module_name, Loc loc)
36 {
37     static if (LOG)
38     {
39         printf("scanMachObjModule(%s)\n", module_name);
40     }
41 
42     void corrupt(int reason)
43     {
44         error(loc, "corrupt Mach-O object module `%s` %d", module_name, reason);
45     }
46 
47     const buf = base.ptr;
48     const buflen = base.length;
49     uint32_t ncmds;
50     mach_header* header = cast(mach_header*)buf;
51     mach_header_64* header64 = null;
52     /* First do sanity checks on object file
53      */
54     if (buflen < mach_header.sizeof)
55         return corrupt(__LINE__);
56 
57     if (header.magic == MH_MAGIC)
58     {
59         if (header.cputype != CPU_TYPE_I386)
60         {
61             error(loc, "Mach-O object module `%s` has cputype = %d, should be %d", module_name, header.cputype, CPU_TYPE_I386);
62             return;
63         }
64         if (header.filetype != MH_OBJECT)
65         {
66             error(loc, "Mach-O object module `%s` has file type = %d, should be %d", module_name, header.filetype, MH_OBJECT);
67             return;
68         }
69         if (buflen < mach_header.sizeof + header.sizeofcmds)
70             return corrupt(__LINE__);
71         ncmds = header.ncmds;
72     }
73     else if (header.magic == MH_MAGIC_64)
74     {
75         header64 = cast(mach_header_64*)buf;
76         if (buflen < mach_header_64.sizeof)
77             return corrupt(__LINE__);
78         if (header64.cputype != CPU_TYPE_X86_64)
79         {
80             error(loc, "Mach-O object module `%s` has cputype = %d, should be %d", module_name, header64.cputype, CPU_TYPE_X86_64);
81             return;
82         }
83         if (header64.filetype != MH_OBJECT)
84         {
85             error(loc, "Mach-O object module `%s` has file type = %d, should be %d", module_name, header64.filetype, MH_OBJECT);
86             return;
87         }
88         if (buflen < mach_header_64.sizeof + header64.sizeofcmds)
89             return corrupt(__LINE__);
90         ncmds = header64.ncmds;
91     }
92     else
93         return corrupt(__LINE__);
94 
95     symtab_command* symtab_commands;
96     // Commands immediately follow mach_header
97     char* commands = cast(char*)buf + (header.magic == MH_MAGIC_64 ? mach_header_64.sizeof : mach_header.sizeof);
98     for (uint32_t i = 0; i < ncmds; i++)
99     {
100         load_command* command = cast(load_command*)commands;
101         //printf("cmd = 0x%02x, cmdsize = %u\n", command.cmd, command.cmdsize);
102         if (command.cmd == LC_SYMTAB)
103             symtab_commands = cast(symtab_command*)command;
104         commands += command.cmdsize;
105     }
106 
107     if (!symtab_commands)
108         return;
109 
110     // Get pointer to string table
111     char* strtab = cast(char*)buf + symtab_commands.stroff;
112     if (buflen < symtab_commands.stroff + symtab_commands.strsize)
113         return corrupt(__LINE__);
114 
115     if (header.magic == MH_MAGIC_64)
116     {
117         // Get pointer to symbol table
118         nlist_64* symtab = cast(nlist_64*)(cast(char*)buf + symtab_commands.symoff);
119         if (buflen < symtab_commands.symoff + symtab_commands.nsyms * nlist_64.sizeof)
120             return corrupt(__LINE__);
121 
122         // For each symbol
123         for (int i = 0; i < symtab_commands.nsyms; i++)
124         {
125             nlist_64* s = symtab + i;
126             const(char)* name = strtab + s.n_strx;
127             const namelen = strlen(name);
128             if (s.n_type & N_STAB)
129             {
130                 // values in /usr/include/mach-o/stab.h
131                 //printf(" N_STAB");
132                 continue;
133             }
134 
135             version (none)
136             {
137                 if (s.n_type & N_PEXT)
138                 {
139                 }
140                 if (s.n_type & N_EXT)
141                 {
142                 }
143             }
144             switch (s.n_type & N_TYPE)
145             {
146             case N_UNDF:
147                 if (s.n_type & N_EXT && s.n_value != 0) // comdef
148                     pAddSymbol(name[0 .. namelen], 1);
149                 break;
150             case N_ABS:
151                 break;
152             case N_SECT:
153                 if (s.n_type & N_EXT) /*&& !(s.n_desc & N_REF_TO_WEAK)*/
154                     pAddSymbol(name[0 .. namelen], 1);
155                 break;
156             case N_PBUD:
157                 break;
158             case N_INDR:
159                 break;
160             default:
161                 break;
162             }
163 
164         }
165     }
166     else
167     {
168         // Get pointer to symbol table
169         nlist* symtab = cast(nlist*)(cast(char*)buf + symtab_commands.symoff);
170         if (buflen < symtab_commands.symoff + symtab_commands.nsyms * nlist.sizeof)
171             return corrupt(__LINE__);
172 
173         // For each symbol
174         for (int i = 0; i < symtab_commands.nsyms; i++)
175         {
176             nlist* s = symtab + i;
177             const(char)* name = strtab + s.n_strx;
178             const namelen = strlen(name);
179             if (s.n_type & N_STAB)
180             {
181                 // values in /usr/include/mach-o/stab.h
182                 //printf(" N_STAB");
183                 continue;
184             }
185 
186             version (none)
187             {
188                 if (s.n_type & N_PEXT)
189                 {
190                 }
191                 if (s.n_type & N_EXT)
192                 {
193                 }
194             }
195             switch (s.n_type & N_TYPE)
196             {
197             case N_UNDF:
198                 if (s.n_type & N_EXT && s.n_value != 0) // comdef
199                     pAddSymbol(name[0 .. namelen], 1);
200                 break;
201             case N_ABS:
202                 break;
203             case N_SECT:
204                 if (s.n_type & N_EXT) /*&& !(s.n_desc & N_REF_TO_WEAK)*/
205                     pAddSymbol(name[0 .. namelen], 1);
206                 break;
207             case N_PBUD:
208                 break;
209             case N_INDR:
210                 break;
211             default:
212                 break;
213             }
214         }
215     }
216 }
217 
218 private: // for the remainder of this module
219 
220 enum CPU_TYPE_I386 = 7;
221 enum CPU_TYPE_X86_64 = CPU_TYPE_I386 | 0x1000000;
222 
223 enum MH_OBJECT = 0x1;
224 
225 struct segment_command
226 {
227     uint32_t cmd;
228     uint32_t cmdsize;
229     char[16] segname;
230     uint32_t vmaddr;
231     uint32_t vmsize;
232     uint32_t fileoff;
233     uint32_t filesize;
234     int32_t  maxprot;
235     int32_t  initprot;
236     uint32_t nsects;
237     uint32_t flags;
238 }
239 
240 struct segment_command_64
241 {
242     uint32_t cmd;
243     uint32_t cmdsize;
244     char[16] segname;
245     uint64_t vmaddr;
246     uint64_t vmsize;
247     uint64_t fileoff;
248     uint64_t filesize;
249     int32_t  maxprot;
250     int32_t  initprot;
251     uint32_t nsects;
252     uint32_t flags;
253 }
254 
255 struct symtab_command
256 {
257     uint32_t cmd;
258     uint32_t cmdsize;
259     uint32_t symoff;
260     uint32_t nsyms;
261     uint32_t stroff;
262     uint32_t strsize;
263 }
264 
265 struct dysymtab_command
266 {
267     uint32_t cmd;
268     uint32_t cmdsize;
269     uint32_t ilocalsym;
270     uint32_t nlocalsym;
271     uint32_t iextdefsym;
272     uint32_t nextdefsym;
273     uint32_t iundefsym;
274     uint32_t nundefsym;
275     uint32_t tocoff;
276     uint32_t ntoc;
277     uint32_t modtaboff;
278     uint32_t nmodtab;
279     uint32_t extrefsymoff;
280     uint32_t nextrefsyms;
281     uint32_t indirectsymoff;
282     uint32_t nindirectsyms;
283     uint32_t extreloff;
284     uint32_t nextrel;
285     uint32_t locreloff;
286     uint32_t nlocrel;
287 }
288 
289 enum LC_SEGMENT    = 1;
290 enum LC_SYMTAB     = 2;
291 enum LC_DYSYMTAB   = 11;
292 enum LC_SEGMENT_64 = 0x19;
293 
294 struct load_command
295 {
296     uint32_t cmd;
297     uint32_t cmdsize;
298 }
299 
300 enum N_EXT  = 1;
301 enum N_STAB = 0xE0;
302 enum N_PEXT = 0x10;
303 enum N_TYPE = 0x0E;
304 enum N_UNDF = 0;
305 enum N_ABS  = 2;
306 enum N_INDR = 10;
307 enum N_PBUD = 12;
308 enum N_SECT = 14;
309 
310 struct nlist
311 {
312     int32_t n_strx;
313     uint8_t n_type;
314     uint8_t n_sect;
315     int16_t n_desc;
316     uint32_t n_value;
317 }
318 
319 struct nlist_64
320 {
321     uint32_t n_strx;
322     uint8_t n_type;
323     uint8_t n_sect;
324     uint16_t n_desc;
325     uint64_t n_value;
326 }