1 /**
2  * Extract symbols from a COFF object file.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanmscoff.d, _scanmscoff.d)
8  * Documentation:  https://dlang.org/phobos/dmd_scanmscoff.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanmscoff.d
10  */
11 
12 module dmd.scanmscoff;
13 
14 import core.stdc.stdio;
15 import core.stdc.string;
16 
17 import dmd.root.string;
18 
19 import dmd.errorsink;
20 import dmd.location;
21 
22 private enum LOG = false;
23 
24 /*****************************************
25  * Reads an object module from base[] and passes the names
26  * of any exported symbols to (*pAddSymbol)().
27  * Params:
28  *      pAddSymbol =  function to pass the names to
29  *      base =        array of contents of object module
30  *      module_name = name of the object module (used for error messages)
31  *      loc =         location to use for error printing
32  *      eSink =       where the error messages go
33  */
34 void scanMSCoffObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol,
35         scope const ubyte[] base, const char* module_name, Loc loc, ErrorSink eSink)
36 {
37     static if (LOG)
38     {
39         printf("scanMSCoffObjModule(%s)\n", module_name);
40     }
41 
42     void corrupt(int reason)
43     {
44         eSink.error(loc, "corrupt MS-Coff object module `%s` %d", module_name, reason);
45     }
46 
47     const buf = &base[0];
48     const buflen = base.length;
49     /* First do sanity checks on object file
50      */
51     if (buflen < BIGOBJ_HEADER.sizeof)
52         return corrupt(__LINE__);
53 
54     BIGOBJ_HEADER* header = cast(BIGOBJ_HEADER*)buf;
55     bool is_old_coff = false;
56     BIGOBJ_HEADER bigobj_header = void;
57     if (header.Sig2 != 0xFFFF && header.Version != 2)
58     {
59         is_old_coff = true;
60         IMAGE_FILE_HEADER header_old = *cast(IMAGE_FILE_HEADER*)buf;
61         bigobj_header = BIGOBJ_HEADER.init;
62         header = &bigobj_header;
63         header.Machine              = header_old.Machine;
64         header.NumberOfSections     = header_old.NumberOfSections;
65         header.TimeDateStamp        = header_old.TimeDateStamp;
66         header.PointerToSymbolTable = header_old.PointerToSymbolTable;
67         header.NumberOfSymbols      = header_old.NumberOfSymbols;
68     }
69     switch (header.Machine)
70     {
71     case IMAGE_FILE_MACHINE_UNKNOWN:
72     case IMAGE_FILE_MACHINE_I386:
73     case IMAGE_FILE_MACHINE_AMD64:
74         break;
75     default:
76         if (buf[0] == 0x80)
77             eSink.error(loc, "object module `%s` is 32 bit OMF, but it should be 64 bit MS-Coff", module_name);
78         else
79             eSink.error(loc, "MS-Coff object module `%s` has magic = %x, should be %x", module_name, header.Machine, IMAGE_FILE_MACHINE_AMD64);
80         return;
81     }
82     // Get string table:  string_table[0..string_len]
83     size_t off = header.PointerToSymbolTable;
84     if (off == 0)
85     {
86         eSink.error(loc, "MS-Coff object module `%s` has no string table", module_name);
87         return;
88     }
89     off += header.NumberOfSymbols * (is_old_coff ? SymbolTable.sizeof : SymbolTable32.sizeof);
90     if (off + 4 > buflen)
91         return corrupt(__LINE__);
92 
93     size_t string_len = *cast(uint*)(buf + off);
94     char* string_table = cast(char*)(buf + off + 4);
95     if (off + string_len > buflen)
96         return corrupt(__LINE__);
97 
98     string_len -= 4;
99     foreach (i; 0 .. header.NumberOfSymbols)
100     {
101         static if (LOG)
102         {
103             printf("Symbol %d:\n", i);
104         }
105         off = header.PointerToSymbolTable + i * (is_old_coff ? SymbolTable.sizeof : SymbolTable32.sizeof);
106         if (off > buflen)
107             return corrupt(__LINE__);
108 
109         auto n = cast(SymbolTable32*)(buf + off);
110         SymbolTable32 st32 = void;
111         if (is_old_coff)
112         {
113             SymbolTable n2 = *cast(SymbolTable*)n;
114             st32 = SymbolTable32.init;
115             n = &st32;
116             n.Name               = n2.Name;
117             n.Value              = n2.Value;
118             n.SectionNumber      = n2.SectionNumber;
119             n.Type               = n2.Type;
120             n.StorageClass       = n2.StorageClass;
121             n.NumberOfAuxSymbols = n2.NumberOfAuxSymbols;
122         }
123 
124         char[SYMNMLEN + 1] s = void;
125         char* p;
126         if (n.Zeros)
127         {
128             s[0 .. SYMNMLEN] = n.Name;
129             s[SYMNMLEN] = 0;
130             p = &s[0];
131         }
132         else
133             p = string_table + n.Offset - 4;
134         i += n.NumberOfAuxSymbols;
135         static if (LOG)
136         {
137             printf("n_name    = '%s'\n", p);
138             printf("n_value   = x%08lx\n", n.Value);
139             printf("n_scnum   = %d\n", n.SectionNumber);
140             printf("n_type    = x%04x\n", n.Type);
141             printf("n_sclass  = %d\n", n.StorageClass);
142             printf("n_numaux  = %d\n", n.NumberOfAuxSymbols);
143         }
144         switch (n.SectionNumber)
145         {
146         case IMAGE_SYM_DEBUG:
147             continue;
148         case IMAGE_SYM_ABSOLUTE:
149             if (strcmp(p, "@comp.id") == 0)
150                 continue;
151             break;
152         case IMAGE_SYM_UNDEFINED:
153             // A non-zero value indicates a common block
154             if (n.Value)
155                 break;
156             continue;
157         default:
158             break;
159         }
160         switch (n.StorageClass)
161         {
162         case IMAGE_SYM_CLASS_EXTERNAL:
163             break;
164         case IMAGE_SYM_CLASS_STATIC:
165             if (n.Value == 0) // if it's a section name
166                 continue;
167             continue;
168         case IMAGE_SYM_CLASS_FUNCTION:
169         case IMAGE_SYM_CLASS_FILE:
170         case IMAGE_SYM_CLASS_LABEL:
171             continue;
172         default:
173             continue;
174         }
175         pAddSymbol(p[0 .. strlen(p)], 1);
176     }
177 }
178 
179 private: // for the remainder of this module
180 
181 alias BYTE  = ubyte;
182 alias WORD  = ushort;
183 alias DWORD = uint;
184 
185 align(1)
186 struct BIGOBJ_HEADER
187 {
188     WORD Sig1;                  // IMAGE_FILE_MACHINE_UNKNOWN
189     WORD Sig2;                  // 0xFFFF
190     WORD Version;               // 2
191     WORD Machine;               // identifies type of target machine
192     DWORD TimeDateStamp;        // creation date, number of seconds since 1970
193     BYTE[16]  UUID;             //  { '\xc7', '\xa1', '\xba', '\xd1', '\xee', '\xba', '\xa9', '\x4b',
194                                 //    '\xaf', '\x20', '\xfa', '\xf6', '\x6a', '\xa4', '\xdc', '\xb8' };
195     DWORD[4] unused;            // { 0, 0, 0, 0 }
196     DWORD NumberOfSections;     // number of sections
197     DWORD PointerToSymbolTable; // file offset of symbol table
198     DWORD NumberOfSymbols;      // number of entries in the symbol table
199 }
200 
201 align(1)
202 struct IMAGE_FILE_HEADER
203 {
204     WORD  Machine;
205     WORD  NumberOfSections;
206     DWORD TimeDateStamp;
207     DWORD PointerToSymbolTable;
208     DWORD NumberOfSymbols;
209     WORD  SizeOfOptionalHeader;
210     WORD  Characteristics;
211 }
212 
213 enum SYMNMLEN = 8;
214 
215 enum IMAGE_FILE_MACHINE_UNKNOWN = 0;            // applies to any machine type
216 enum IMAGE_FILE_MACHINE_I386    = 0x14C;        // x86
217 enum IMAGE_FILE_MACHINE_AMD64   = 0x8664;       // x86_64
218 
219 enum IMAGE_SYM_DEBUG     = -2;
220 enum IMAGE_SYM_ABSOLUTE  = -1;
221 enum IMAGE_SYM_UNDEFINED = 0;
222 
223 enum IMAGE_SYM_CLASS_EXTERNAL = 2;
224 enum IMAGE_SYM_CLASS_STATIC   = 3;
225 enum IMAGE_SYM_CLASS_LABEL    = 6;
226 enum IMAGE_SYM_CLASS_FUNCTION = 101;
227 enum IMAGE_SYM_CLASS_FILE     = 103;
228 
229 align(1) struct SymbolTable32
230 {
231     union
232     {
233         char[SYMNMLEN] Name;
234         struct
235         {
236             DWORD Zeros;
237             DWORD Offset;
238         }
239     }
240 
241     DWORD Value;
242     DWORD SectionNumber;
243     WORD Type;
244     BYTE StorageClass;
245     BYTE NumberOfAuxSymbols;
246 }
247 
248 align(1) struct SymbolTable
249 {
250     char[SYMNMLEN] Name;
251     DWORD Value;
252     WORD SectionNumber;
253     WORD Type;
254     BYTE StorageClass;
255     BYTE NumberOfAuxSymbols;
256 }