1 /** 2 * Extract symbols from an OMF object file. 3 * 4 * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved 5 * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 6 * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanomf.d, _scanomf.d) 8 * Documentation: https://dlang.org/phobos/dmd_scanomf.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanomf.d 10 */ 11 12 module dmd.scanomf; 13 14 import core.stdc.string; 15 import core.stdc.stdlib; 16 17 import dmd.arraytypes; 18 import dmd.common.outbuffer; 19 import dmd.errorsink; 20 import dmd.location; 21 import dmd.root.rmem; 22 import dmd.root.string; 23 24 private enum LOG = false; 25 26 /***************************************** 27 * Reads an object module from base[] and passes the names 28 * of any exported symbols to (*pAddSymbol)(). 29 * Params: 30 * pAddSymbol = function to pass the names to 31 * base = array of contents of object module 32 * module_name = name of the object module (used for error messages) 33 * loc = location to use for error printing 34 * eSink = where the error messages go 35 */ 36 void scanOmfObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol, 37 scope const ubyte[] base, scope const char* module_name, Loc loc, ErrorSink eSink) 38 { 39 static if (LOG) 40 { 41 printf("scanOmfObjModule(%s)\n", module_name); 42 } 43 char[LIBIDMAX + 1] name = void; 44 Strings names; 45 scope(exit) 46 for (size_t u = 1; u < names.length; u++) 47 free(cast(void*)names[u]); 48 names.push(null); // don't use index 0 49 bool easyomf = false; // assume not EASY-OMF 50 const pend = base.ptr + base.length; 51 const(ubyte)* pnext; 52 for (auto p = base.ptr; 1; p = pnext) 53 { 54 assert(p < pend); 55 ubyte recTyp = *p++; 56 ushort recLen = *cast(ushort*)p; 57 p += 2; 58 pnext = p + recLen; 59 recLen--; // forget the checksum 60 switch (recTyp) 61 { 62 case LNAMES: 63 case LLNAMES: 64 while (p + 1 < pnext) 65 { 66 parseName(p, name.ptr); 67 char* copy = cast(char*)Mem.check(strdup(name.ptr)); 68 names.push(copy); 69 } 70 break; 71 case PUBDEF: 72 if (easyomf) 73 recTyp = PUB386; // convert to MS format 74 goto case; 75 case PUB386: 76 if (!(parseIdx(p) | parseIdx(p))) 77 p += 2; // skip seg, grp, frame 78 while (p + 1 < pnext) 79 { 80 parseName(p, name.ptr); 81 p += (recTyp == PUBDEF) ? 2 : 4; // skip offset 82 parseIdx(p); // skip type index 83 pAddSymbol(name[0 .. strlen(name.ptr)], 0); 84 } 85 break; 86 case COMDAT: 87 if (easyomf) 88 recTyp = COMDAT + 1; // convert to MS format 89 goto case; 90 case COMDAT + 1: 91 { 92 int pickAny = 0; 93 if (*p++ & 5) // if continuation or local comdat 94 break; 95 ubyte attr = *p++; 96 if (attr & 0xF0) // attr: if multiple instances allowed 97 pickAny = 1; 98 p++; // align 99 p += 2; // enum data offset 100 if (recTyp == COMDAT + 1) 101 p += 2; // enum data offset 102 parseIdx(p); // type index 103 if ((attr & 0x0F) == 0) // if explicit allocation 104 { 105 parseIdx(p); // base group 106 parseIdx(p); // base segment 107 } 108 uint idx = parseIdx(p); // public name index 109 if (idx == 0 || idx >= names.length) 110 { 111 //debug(printf("[s] name idx=%d, uCntNames=%d\n", idx, uCntNames)); 112 eSink.error(loc, "corrupt COMDAT"); 113 return; 114 } 115 //printf("[s] name='%s'\n",name); 116 const(char)* n = names[idx]; 117 pAddSymbol(n.toDString(), pickAny); 118 break; 119 } 120 case COMDEF: 121 { 122 while (p + 1 < pnext) 123 { 124 parseName(p, name.ptr); 125 parseIdx(p); // type index 126 skipDataType(p); // data type 127 pAddSymbol(name[0 .. strlen(name.ptr)], 1); 128 } 129 break; 130 } 131 case ALIAS: 132 while (p + 1 < pnext) 133 { 134 parseName(p, name.ptr); 135 pAddSymbol(name[0 .. strlen(name.ptr)], 0); 136 parseName(p, name.ptr); 137 } 138 break; 139 case MODEND: 140 case M386END: 141 return; 142 case COMENT: 143 // Recognize Phar Lap EASY-OMF format 144 { 145 __gshared ubyte* omfstr1 = [0x80, 0xAA, '8', '0', '3', '8', '6']; 146 if (recLen == (omfstr1).sizeof) 147 { 148 for (uint i = 0; i < (omfstr1).sizeof; i++) 149 if (*p++ != omfstr1[i]) 150 goto L1; 151 easyomf = true; 152 break; 153 L1: 154 } 155 } 156 // Recognize .IMPDEF Import Definition Records 157 { 158 __gshared ubyte* omfstr2 = [0, 0xA0, 1]; 159 if (recLen >= 7) 160 { 161 p++; 162 for (uint i = 1; i < (omfstr2).sizeof; i++) 163 if (*p++ != omfstr2[i]) 164 goto L2; 165 p++; // skip OrdFlag field 166 parseName(p, name.ptr); 167 pAddSymbol(name[0 .. strlen(name.ptr)], 0); 168 break; 169 L2: 170 } 171 } 172 break; 173 default: 174 // ignore 175 } 176 } 177 } 178 179 /************************************************* 180 * Scan a block of memory buf[0..buflen], pulling out each 181 * OMF object module in it and sending the info in it to (*pAddObjModule). 182 * Returns: 183 * true for corrupt OMF data 184 */ 185 bool scanOmfLib(void delegate(char* name, void* base, size_t length) pAddObjModule, scope void* buf, size_t buflen, uint pagesize) 186 { 187 /* Split up the buffer buf[0..buflen] into multiple object modules, 188 * each aligned on a pagesize boundary. 189 */ 190 const(ubyte)* base = null; 191 char[LIBIDMAX + 1] name = void; 192 auto p = cast(const(ubyte)*)buf; 193 auto pend = p + buflen; 194 const(ubyte)* pnext; 195 for (; p < pend; p = pnext) // for each OMF record 196 { 197 if (p + 3 >= pend) 198 return true; // corrupt 199 ubyte recTyp = *p; 200 ushort recLen = *cast(const(ushort)*)(p + 1); 201 pnext = p + 3 + recLen; 202 if (pnext > pend) 203 return true; // corrupt 204 recLen--; // forget the checksum 205 switch (recTyp) 206 { 207 case LHEADR: 208 case THEADR: 209 if (!base) 210 { 211 base = p; 212 p += 3; 213 parseName(p, name.ptr); 214 if (name[0] == 'C' && name[1] == 0) // old C compilers did this 215 base = pnext; // skip past THEADR 216 } 217 break; 218 case MODEND: 219 case M386END: 220 { 221 if (base) 222 { 223 pAddObjModule(name.ptr, cast(ubyte*)base, pnext - base); 224 base = null; 225 } 226 // Round up to next page 227 uint t = cast(uint)(pnext - cast(const(ubyte)*)buf); 228 t = (t + pagesize - 1) & ~cast(uint)(pagesize - 1); 229 pnext = cast(const(ubyte)*)buf + t; 230 break; 231 } 232 default: 233 // ignore 234 } 235 } 236 return (base !is null); // missing MODEND record 237 } 238 239 uint OMFObjSize(scope const void* base, size_t length, scope const char* name) 240 { 241 ubyte c = *cast(const(ubyte)*)base; 242 if (c != THEADR && c != LHEADR) 243 { 244 size_t len = strlen(name); 245 assert(len <= LIBIDMAX); 246 length += len + 5; 247 } 248 return cast(uint)length; 249 } 250 251 void writeOMFObj(ref OutBuffer buf, scope const void* base, size_t length, scope const char* name) 252 { 253 ubyte c = *cast(const(ubyte)*)base; 254 if (c != THEADR && c != LHEADR) 255 { 256 const len = strlen(name); 257 assert(len <= LIBIDMAX); 258 ubyte[4 + LIBIDMAX + 1] header = void; 259 header[0] = THEADR; 260 header[1] = cast(ubyte)(2 + len); 261 header[2] = 0; 262 header[3] = cast(ubyte)len; 263 assert(len <= 0xFF - 2); 264 memcpy(4 + header.ptr, name, len); 265 // Compute and store record checksum 266 uint n = cast(uint)(len + 4); 267 ubyte checksum = 0; 268 ubyte* p = header.ptr; 269 while (n--) 270 { 271 checksum -= *p; 272 p++; 273 } 274 *p = checksum; 275 buf.write(header.ptr[0 .. len + 5]); 276 } 277 buf.write(base[0 .. length]); 278 } 279 280 private: // for the remainder of this module 281 282 /************************** 283 * Record types: 284 */ 285 enum RHEADR = 0x6E; 286 enum REGINT = 0x70; 287 enum REDATA = 0x72; 288 enum RIDATA = 0x74; 289 enum OVLDEF = 0x76; 290 enum ENDREC = 0x78; 291 enum BLKDEF = 0x7A; 292 enum BLKEND = 0x7C; 293 enum DEBSYM = 0x7E; 294 enum THEADR = 0x80; 295 enum LHEADR = 0x82; 296 enum PEDATA = 0x84; 297 enum PIDATA = 0x86; 298 enum COMENT = 0x88; 299 enum MODEND = 0x8A; 300 enum M386END = 0x8B; /* 32 bit module end record */ 301 enum EXTDEF = 0x8C; 302 enum TYPDEF = 0x8E; 303 enum PUBDEF = 0x90; 304 enum PUB386 = 0x91; 305 enum LOCSYM = 0x92; 306 enum LINNUM = 0x94; 307 enum LNAMES = 0x96; 308 enum SEGDEF = 0x98; 309 enum GRPDEF = 0x9A; 310 enum FIXUPP = 0x9C; 311 /*#define (none) 0x9E */ 312 enum LEDATA = 0xA0; 313 enum LIDATA = 0xA2; 314 enum LIBHED = 0xA4; 315 enum LIBNAM = 0xA6; 316 enum LIBLOC = 0xA8; 317 enum LIBDIC = 0xAA; 318 enum COMDEF = 0xB0; 319 enum LEXTDEF = 0xB4; 320 enum LPUBDEF = 0xB6; 321 enum LCOMDEF = 0xB8; 322 enum CEXTDEF = 0xBC; 323 enum COMDAT = 0xC2; 324 enum LINSYM = 0xC4; 325 enum ALIAS = 0xC6; 326 enum LLNAMES = 0xCA; 327 enum LIBIDMAX = (512 - 0x25 - 3 - 4); 328 329 // max size that will fit in dictionary 330 void parseName(ref scope const(ubyte)* pp, char* name) 331 { 332 auto p = pp; 333 uint len = *p++; 334 if (len == 0xFF && *p == 0) // if long name 335 { 336 len = p[1] & 0xFF; 337 len |= cast(uint)p[2] << 8; 338 p += 3; 339 assert(len <= LIBIDMAX); 340 } 341 memcpy(name, p, len); 342 name[len] = 0; 343 pp = p + len; 344 } 345 346 ushort parseIdx(ref scope const(ubyte)* pp) 347 { 348 auto p = pp; 349 const c = *p++; 350 ushort idx = (0x80 & c) ? ((0x7F & c) << 8) + *p++ : c; 351 pp = p; 352 return idx; 353 } 354 355 // skip numeric field of a data type of a COMDEF record 356 void skipNumericField(ref scope const(ubyte)* pp) 357 { 358 const(ubyte)* p = pp; 359 const c = *p++; 360 if (c == 0x81) 361 p += 2; 362 else if (c == 0x84) 363 p += 3; 364 else if (c == 0x88) 365 p += 4; 366 else 367 assert(c <= 0x80); 368 pp = p; 369 } 370 371 // skip data type of a COMDEF record 372 void skipDataType(ref scope const(ubyte)* pp) 373 { 374 auto p = pp; 375 const c = *p++; 376 if (c == 0x61) 377 { 378 // FAR data 379 skipNumericField(p); 380 skipNumericField(p); 381 } 382 else if (c == 0x62) 383 { 384 // NEAR data 385 skipNumericField(p); 386 } 387 else 388 { 389 assert(1 <= c && c <= 0x5f); // Borland segment indices 390 } 391 pp = p; 392 }