1 /**
2  * Extract symbols from a Mach-O object file.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanmach.d, _scanmach.d)
8  * Documentation:  https://dlang.org/phobos/dmd_scanmach.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanmach.d
10  */
11 
12 module dmd.scanmach;
13 
14 import core.stdc.string;
15 import core.stdc.stdint;
16 
17 import dmd.errorsink;
18 import dmd.location;
19 
20 //import core.sys.darwin.mach.loader;
21 import dmd.backend.mach;
22 
23 private enum LOG = false;
24 
25 /*****************************************
26  * Reads an object module from base[] and passes the names
27  * of any exported symbols to (*pAddSymbol)().
28  * Params:
29  *      pAddSymbol =  function to pass the names to
30  *      base =        array of contents of object module
31  *      module_name = name of the object module (used for error messages)
32  *      loc =         location to use for error printing
33  *      eSink =       where the error messages go
34  */
35 void scanMachObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol,
36         const ubyte[] base, const char* module_name, Loc loc, ErrorSink eSink)
37 {
38     static if (LOG)
39     {
40         printf("scanMachObjModule(%s)\n", module_name);
41     }
42 
43     void corrupt(int reason)
44     {
45         eSink.error(loc, "corrupt Mach-O object module `%s` %d", module_name, reason);
46     }
47 
48     const buf = base.ptr;
49     const buflen = base.length;
50     uint32_t ncmds;
51     mach_header* header = cast(mach_header*)buf;
52     mach_header_64* header64 = null;
53     /* First do sanity checks on object file
54      */
55     if (buflen < mach_header.sizeof)
56         return corrupt(__LINE__);
57 
58     if (header.magic == MH_MAGIC)
59     {
60         if (header.cputype != CPU_TYPE_I386)
61         {
62             eSink.error(loc, "Mach-O object module `%s` has cputype = %d, should be %d", module_name, header.cputype, CPU_TYPE_I386);
63             return;
64         }
65         if (header.filetype != MH_OBJECT)
66         {
67             eSink.error(loc, "Mach-O object module `%s` has file type = %d, should be %d", module_name, header.filetype, MH_OBJECT);
68             return;
69         }
70         if (buflen < mach_header.sizeof + header.sizeofcmds)
71             return corrupt(__LINE__);
72         ncmds = header.ncmds;
73     }
74     else if (header.magic == MH_MAGIC_64)
75     {
76         header64 = cast(mach_header_64*)buf;
77         if (buflen < mach_header_64.sizeof)
78             return corrupt(__LINE__);
79         if (header64.cputype != CPU_TYPE_X86_64)
80         {
81             eSink.error(loc, "Mach-O object module `%s` has cputype = %d, should be %d", module_name, header64.cputype, CPU_TYPE_X86_64);
82             return;
83         }
84         if (header64.filetype != MH_OBJECT)
85         {
86             eSink.error(loc, "Mach-O object module `%s` has file type = %d, should be %d", module_name, header64.filetype, MH_OBJECT);
87             return;
88         }
89         if (buflen < mach_header_64.sizeof + header64.sizeofcmds)
90             return corrupt(__LINE__);
91         ncmds = header64.ncmds;
92     }
93     else
94         return corrupt(__LINE__);
95 
96     symtab_command* symtab_commands;
97     // Commands immediately follow mach_header
98     char* commands = cast(char*)buf + (header.magic == MH_MAGIC_64 ? mach_header_64.sizeof : mach_header.sizeof);
99     foreach (i; 0 .. ncmds)
100     {
101         load_command* command = cast(load_command*)commands;
102         //printf("cmd = 0x%02x, cmdsize = %u\n", command.cmd, command.cmdsize);
103         if (command.cmd == LC_SYMTAB)
104             symtab_commands = cast(symtab_command*)command;
105         commands += command.cmdsize;
106     }
107 
108     if (!symtab_commands)
109         return;
110 
111     // Get pointer to string table
112     char* strtab = cast(char*)buf + symtab_commands.stroff;
113     if (buflen < symtab_commands.stroff + symtab_commands.strsize)
114         return corrupt(__LINE__);
115 
116     if (header.magic == MH_MAGIC_64)
117     {
118         // Get pointer to symbol table
119         nlist_64* symtab = cast(nlist_64*)(cast(char*)buf + symtab_commands.symoff);
120         if (buflen < symtab_commands.symoff + symtab_commands.nsyms * nlist_64.sizeof)
121             return corrupt(__LINE__);
122 
123         // For each symbol
124         foreach (i; 0 .. symtab_commands.nsyms)
125         {
126             nlist_64* s = symtab + i;
127             const(char)* name = strtab + s.n_strx;
128             const namelen = strlen(name);
129             if (s.n_type & N_STAB)
130             {
131                 // values in /usr/include/mach-o/stab.h
132                 //printf(" N_STAB");
133                 continue;
134             }
135 
136             version (none)
137             {
138                 if (s.n_type & N_PEXT)
139                 {
140                 }
141                 if (s.n_type & N_EXT)
142                 {
143                 }
144             }
145             switch (s.n_type & N_TYPE)
146             {
147             case N_UNDF:
148                 if (s.n_type & N_EXT && s.n_value != 0) // comdef
149                     pAddSymbol(name[0 .. namelen], 1);
150                 break;
151             case N_ABS:
152                 break;
153             case N_SECT:
154                 if (s.n_type & N_EXT) /*&& !(s.n_desc & N_REF_TO_WEAK)*/
155                     pAddSymbol(name[0 .. namelen], 1);
156                 break;
157             case N_PBUD:
158                 break;
159             case N_INDR:
160                 break;
161             default:
162                 break;
163             }
164 
165         }
166     }
167     else
168     {
169         // Get pointer to symbol table
170         nlist* symtab = cast(nlist*)(cast(char*)buf + symtab_commands.symoff);
171         if (buflen < symtab_commands.symoff + symtab_commands.nsyms * nlist.sizeof)
172             return corrupt(__LINE__);
173 
174         // For each symbol
175         foreach (i; 0 .. symtab_commands.nsyms)
176         {
177             nlist* s = symtab + i;
178             const(char)* name = strtab + s.n_strx;
179             const namelen = strlen(name);
180             if (s.n_type & N_STAB)
181             {
182                 // values in /usr/include/mach-o/stab.h
183                 //printf(" N_STAB");
184                 continue;
185             }
186 
187             version (none)
188             {
189                 if (s.n_type & N_PEXT)
190                 {
191                 }
192                 if (s.n_type & N_EXT)
193                 {
194                 }
195             }
196             switch (s.n_type & N_TYPE)
197             {
198             case N_UNDF:
199                 if (s.n_type & N_EXT && s.n_value != 0) // comdef
200                     pAddSymbol(name[0 .. namelen], 1);
201                 break;
202             case N_ABS:
203                 break;
204             case N_SECT:
205                 if (s.n_type & N_EXT) /*&& !(s.n_desc & N_REF_TO_WEAK)*/
206                     pAddSymbol(name[0 .. namelen], 1);
207                 break;
208             case N_PBUD:
209                 break;
210             case N_INDR:
211                 break;
212             default:
213                 break;
214             }
215         }
216     }
217 }
218 
219 private: // for the remainder of this module
220 
221 enum CPU_TYPE_I386 = 7;
222 enum CPU_TYPE_X86_64 = CPU_TYPE_I386 | 0x1000000;
223 
224 enum MH_OBJECT = 0x1;
225 
226 struct segment_command
227 {
228     uint32_t cmd;
229     uint32_t cmdsize;
230     char[16] segname;
231     uint32_t vmaddr;
232     uint32_t vmsize;
233     uint32_t fileoff;
234     uint32_t filesize;
235     int32_t  maxprot;
236     int32_t  initprot;
237     uint32_t nsects;
238     uint32_t flags;
239 }
240 
241 struct segment_command_64
242 {
243     uint32_t cmd;
244     uint32_t cmdsize;
245     char[16] segname;
246     uint64_t vmaddr;
247     uint64_t vmsize;
248     uint64_t fileoff;
249     uint64_t filesize;
250     int32_t  maxprot;
251     int32_t  initprot;
252     uint32_t nsects;
253     uint32_t flags;
254 }
255 
256 struct symtab_command
257 {
258     uint32_t cmd;
259     uint32_t cmdsize;
260     uint32_t symoff;
261     uint32_t nsyms;
262     uint32_t stroff;
263     uint32_t strsize;
264 }
265 
266 struct dysymtab_command
267 {
268     uint32_t cmd;
269     uint32_t cmdsize;
270     uint32_t ilocalsym;
271     uint32_t nlocalsym;
272     uint32_t iextdefsym;
273     uint32_t nextdefsym;
274     uint32_t iundefsym;
275     uint32_t nundefsym;
276     uint32_t tocoff;
277     uint32_t ntoc;
278     uint32_t modtaboff;
279     uint32_t nmodtab;
280     uint32_t extrefsymoff;
281     uint32_t nextrefsyms;
282     uint32_t indirectsymoff;
283     uint32_t nindirectsyms;
284     uint32_t extreloff;
285     uint32_t nextrel;
286     uint32_t locreloff;
287     uint32_t nlocrel;
288 }
289 
290 enum LC_SEGMENT    = 1;
291 enum LC_SYMTAB     = 2;
292 enum LC_DYSYMTAB   = 11;
293 enum LC_SEGMENT_64 = 0x19;
294 
295 struct load_command
296 {
297     uint32_t cmd;
298     uint32_t cmdsize;
299 }
300 
301 enum N_EXT  = 1;
302 enum N_STAB = 0xE0;
303 enum N_PEXT = 0x10;
304 enum N_TYPE = 0x0E;
305 enum N_UNDF = 0;
306 enum N_ABS  = 2;
307 enum N_INDR = 10;
308 enum N_PBUD = 12;
309 enum N_SECT = 14;
310 
311 struct nlist
312 {
313     int32_t n_strx;
314     uint8_t n_type;
315     uint8_t n_sect;
316     int16_t n_desc;
317     uint32_t n_value;
318 }
319 
320 struct nlist_64
321 {
322     uint32_t n_strx;
323     uint8_t n_type;
324     uint8_t n_sect;
325     uint16_t n_desc;
326     uint64_t n_value;
327 }