1 /**
2  * A library in the OMF format, a legacy format for 32-bit Windows.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/libomf.d, _libomf.d)
8  * Documentation:  https://dlang.org/phobos/dmd_libomf.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/libomf.d
10  */
11 
12 module dmd.libomf;
13 
14 import core.stdc.stdio;
15 import core.stdc.string;
16 import core.stdc.stdlib;
17 import core.bitop;
18 
19 import dmd.utils;
20 import dmd.lib;
21 import dmd.location;
22 
23 import dmd.root.array;
24 import dmd.root.filename;
25 import dmd.root.rmem;
26 import dmd.common.outbuffer;
27 import dmd.root.string;
28 import dmd.root.stringtable;
29 
30 import dmd.scanomf;
31 
32 // Entry point (only public symbol in this module).
33 extern (C++) Library LibOMF_factory()
34 {
35     return new LibOMF();
36 }
37 
38 private: // for the remainder of this module
39 
40 enum LOG = false;
41 
42 struct OmfObjSymbol
43 {
44     char* name;
45     OmfObjModule* om;
46 
47     /// Predicate for `Array.sort`for name comparison
48     static int name_pred (scope const OmfObjSymbol** ppe1, scope const OmfObjSymbol** ppe2) nothrow @nogc pure
49     {
50         return strcmp((**ppe1).name, (**ppe2).name);
51     }
52 }
53 
54 alias OmfObjModules = Array!(OmfObjModule*);
55 alias OmfObjSymbols = Array!(OmfObjSymbol*);
56 
57 final class LibOMF : Library
58 {
59     OmfObjModules objmodules; // OmfObjModule[]
60     OmfObjSymbols objsymbols; // OmfObjSymbol[]
61     StringTable!(OmfObjSymbol*) tab;
62 
63     extern (D) this()
64     {
65         tab._init(14_000);
66     }
67 
68     /***************************************
69      * Add object module or library to the library.
70      * Examine the buffer to see which it is.
71      * If the buffer is NULL, use module_name as the file name
72      * and load the file.
73      */
74     override void addObject(const(char)[] module_name, const ubyte[] buffer)
75     {
76         static if (LOG)
77         {
78             printf("LibOMF::addObject(%.*s)\n", cast(int)module_name.length,
79                    module_name.ptr);
80         }
81 
82         void corrupt(int reason)
83         {
84             eSink.error(loc, "corrupt OMF object module %.*s %d",
85                   cast(int)module_name.length, module_name.ptr, reason);
86         }
87 
88         auto buf = buffer.ptr;
89         auto buflen = buffer.length;
90         if (!buf)
91         {
92             assert(module_name.length, "No module nor buffer provided to `addObject`");
93             // read file and take buffer ownership
94             auto data = readFile(Loc.initial, module_name).extractSlice();
95             buf = data.ptr;
96             buflen = data.length;
97         }
98         uint g_page_size;
99         ubyte* pstart = cast(ubyte*)buf;
100         bool islibrary = false;
101         /* See if it's an OMF library.
102          * Don't go by file extension.
103          */
104         struct LibHeader
105         {
106         align(1):
107             ubyte recTyp; // 0xF0
108             ushort pagesize;
109             uint lSymSeek;
110             ushort ndicpages;
111         }
112 
113         /* Determine if it is an OMF library, an OMF object module,
114          * or something else.
115          */
116         if (buflen < (LibHeader).sizeof)
117             return corrupt(__LINE__);
118         const lh = cast(const(LibHeader)*)buf;
119         if (lh.recTyp == 0xF0)
120         {
121             /* OMF library
122              * The modules are all at buf[g_page_size .. lh.lSymSeek]
123              */
124             islibrary = 1;
125             g_page_size = lh.pagesize + 3;
126             buf = cast(ubyte*)(pstart + g_page_size);
127             if (lh.lSymSeek > buflen || g_page_size > buflen)
128                 return corrupt(__LINE__);
129             buflen = lh.lSymSeek - g_page_size;
130         }
131         else if (lh.recTyp == '!' && memcmp(lh, "!<arch>\n".ptr, 8) == 0)
132         {
133             eSink.error(loc, "COFF libraries not supported");
134             return;
135         }
136         else
137         {
138             // Not a library, assume OMF object module
139             g_page_size = 16;
140         }
141         bool firstmodule = true;
142 
143         void addOmfObjModule(char* name, void* base, size_t length)
144         {
145             auto om = new OmfObjModule();
146             om.base = cast(ubyte*)base;
147             om.page = cast(ushort)((om.base - pstart) / g_page_size);
148             om.length = cast(uint)length;
149             /* Determine the name of the module
150              */
151             if (firstmodule && module_name && !islibrary)
152             {
153                 // Remove path and extension
154                 om.name = FileName.removeExt(FileName.name(module_name));
155             }
156             else
157             {
158                 /* Use THEADR name as module name,
159                  * removing path and extension.
160                  */
161                 om.name = FileName.removeExt(FileName.name(name.toDString()));
162             }
163             firstmodule = false;
164             this.objmodules.push(om);
165         }
166 
167         if (scanOmfLib(&addOmfObjModule, cast(void*)buf, buflen, g_page_size))
168             return corrupt(__LINE__);
169     }
170 
171     /*****************************************************************************/
172 
173     void addSymbol(OmfObjModule* om, const(char)[] name, int pickAny = 0)
174     {
175         assert(name.length == strlen(name.ptr));
176         static if (LOG)
177         {
178             printf("LibOMF::addSymbol(%.*s, %.*s, %d)\n",
179                 cast(int)om.name.length, om.name.ptr,
180                 cast(int)name.length, name.ptr, pickAny);
181         }
182         if (auto s = tab.insert(name, null))
183         {
184             auto os = new OmfObjSymbol();
185             os.name = cast(char*)Mem.check(strdup(name.ptr));
186             os.om = om;
187             s.value = os;
188             objsymbols.push(os);
189         }
190         else
191         {
192             // already in table
193             if (!pickAny)
194             {
195                 const s2 = tab.lookup(name);
196                 assert(s2);
197                 const os = s2.value;
198                 eSink.error(loc, "multiple definition of %.*s: %.*s and %.*s: %s",
199                     cast(int)om.name.length, om.name.ptr,
200                     cast(int)name.length, name.ptr,
201                     cast(int)os.om.name.length, os.om.name.ptr, os.name);
202             }
203         }
204     }
205 
206 private:
207     /************************************
208      * Scan single object module for dictionary symbols.
209      * Send those symbols to LibOMF::addSymbol().
210      */
211     void scanObjModule(OmfObjModule* om)
212     {
213         static if (LOG)
214         {
215             printf("LibMSCoff::scanObjModule(%s)\n", om.name.ptr);
216         }
217 
218         extern (D) void addSymbol(const(char)[] name, int pickAny)
219         {
220             this.addSymbol(om, name, pickAny);
221         }
222 
223         scanOmfObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc, eSink);
224     }
225 
226     /***********************************
227      * Calculates number of pages needed for dictionary
228      * Returns:
229      *      number of pages
230      */
231     ushort numDictPages(uint padding)
232     {
233         ushort ndicpages;
234         ushort bucksForHash;
235         ushort bucksForSize;
236         uint symSize = 0;
237         foreach (s; objsymbols)
238         {
239             symSize += (strlen(s.name) + 4) & ~1;
240         }
241         foreach (om; objmodules)
242         {
243             size_t len = om.name.length;
244             if (len > 0xFF)
245                 len += 2; // Digital Mars long name extension
246             symSize += (len + 4 + 1) & ~1;
247         }
248         bucksForHash = cast(ushort)((objsymbols.length + objmodules.length + HASHMOD - 3) / (HASHMOD - 2));
249         bucksForSize = cast(ushort)((symSize + BUCKETSIZE - padding - padding - 1) / (BUCKETSIZE - padding));
250         ndicpages = (bucksForHash > bucksForSize) ? bucksForHash : bucksForSize;
251         //printf("ndicpages = %u\n",ndicpages);
252         // Find prime number greater than ndicpages
253         __gshared uint* primes =
254         [
255             1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
256             47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103,
257             107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
258             163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
259             223, 227, 229, 233, 239, 241, 251, 257, 263, 269,
260             271, 277, 281, 283, 293, 307, 311, 313, 317, 331,
261             337, 347, 349, 353, 359, 367, 373, 379, 383, 389,
262             397, 401, 409, 419, 421, 431, 433, 439, 443, 449,
263             457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
264             //521,523,541,547,
265             0
266         ];
267         for (size_t i = 0; 1; i++)
268         {
269             if (primes[i] == 0)
270             {
271                 // Quick and easy way is out.
272                 // Now try and find first prime number > ndicpages
273                 uint prime;
274                 for (prime = (ndicpages + 1) | 1; 1; prime += 2)
275                 {
276                     // Determine if prime is prime
277                     for (uint u = 3; u < prime / 2; u += 2)
278                     {
279                         if ((prime / u) * u == prime)
280                             goto L1;
281                     }
282                     break;
283                 L1:
284                 }
285                 ndicpages = cast(ushort)prime;
286                 break;
287             }
288             if (primes[i] > ndicpages)
289             {
290                 ndicpages = cast(ushort)primes[i];
291                 break;
292             }
293         }
294         return ndicpages;
295     }
296 
297     /*******************************************
298      * Write the module and symbol names to the dictionary.
299      * Returns:
300      *      false   failure
301      */
302     bool FillDict(ubyte* bucketsP, ushort ndicpages)
303     {
304         // max size that will fit in dictionary
305         enum LIBIDMAX = (512 - 0x25 - 3 - 4);
306         ubyte[4 + LIBIDMAX + 2 + 1] entry;
307         //printf("FillDict()\n");
308         // Add each of the module names
309         foreach (om; objmodules)
310         {
311             ushort n = cast(ushort)om.name.length;
312             if (n > 255)
313             {
314                 entry[0] = 0xFF;
315                 entry[1] = 0;
316                 *cast(ushort*)(entry.ptr + 2) = cast(ushort)(n + 1);
317                 memcpy(entry.ptr + 4, om.name.ptr, n);
318                 n += 3;
319             }
320             else
321             {
322                 entry[0] = cast(ubyte)(1 + n);
323                 memcpy(entry.ptr + 1, om.name.ptr, n);
324             }
325             entry[n + 1] = '!';
326             *(cast(ushort*)(n + 2 + entry.ptr)) = om.page;
327             if (n & 1)
328                 entry[n + 2 + 2] = 0;
329             if (!EnterDict(bucketsP, ndicpages, entry.ptr, n + 1))
330                 return false;
331         }
332         // Sort the symbols
333         objsymbols.sort!(OmfObjSymbol.name_pred);
334         // Add each of the symbols
335         foreach (os; objsymbols)
336         {
337             ushort n = cast(ushort)strlen(os.name);
338             if (n > 255)
339             {
340                 entry[0] = 0xFF;
341                 entry[1] = 0;
342                 *cast(ushort*)(entry.ptr + 2) = n;
343                 memcpy(entry.ptr + 4, os.name, n);
344                 n += 3;
345             }
346             else
347             {
348                 entry[0] = cast(ubyte)n;
349                 memcpy(entry.ptr + 1, os.name, n);
350             }
351             *(cast(ushort*)(n + 1 + entry.ptr)) = os.om.page;
352             if ((n & 1) == 0)
353                 entry[n + 3] = 0;
354             if (!EnterDict(bucketsP, ndicpages, entry.ptr, n))
355             {
356                 return false;
357             }
358         }
359         return true;
360     }
361 
362     /**********************************************
363      * Create and write library to libbuf.
364      * The library consists of:
365      *      library header
366      *      object modules...
367      *      dictionary header
368      *      dictionary pages...
369      */
370     protected override void writeLibToBuffer(ref OutBuffer libbuf)
371     {
372         /* Scan each of the object modules for symbols
373          * to go into the dictionary
374          */
375         foreach (om; objmodules)
376         {
377             scanObjModule(om);
378         }
379         uint g_page_size = 16;
380         /* Calculate page size so that the number of pages
381          * fits in 16 bits. This is because object modules
382          * are indexed by page number, stored as an unsigned short.
383          */
384         while (1)
385         {
386         Lagain:
387             static if (LOG)
388             {
389                 printf("g_page_size = %d\n", g_page_size);
390             }
391             uint offset = g_page_size;
392             foreach (om; objmodules)
393             {
394                 uint page = offset / g_page_size;
395                 if (page > 0xFFFF)
396                 {
397                     // Page size is too small, double it and try again
398                     g_page_size *= 2;
399                     goto Lagain;
400                 }
401                 offset += OMFObjSize(om.base, om.length, om.name.ptr);
402                 // Round the size of the file up to the next page size
403                 // by filling with 0s
404                 uint n = (g_page_size - 1) & offset;
405                 if (n)
406                     offset += g_page_size - n;
407             }
408             break;
409         }
410         /* Leave one page of 0s at start as a dummy library header.
411          * Fill it in later with the real data.
412          */
413         libbuf.fill0(g_page_size);
414         /* Write each object module into the library
415          */
416         foreach (om; objmodules)
417         {
418             uint page = cast(uint)(libbuf.length / g_page_size);
419             assert(page <= 0xFFFF);
420             om.page = cast(ushort)page;
421             // Write out the object module om
422             writeOMFObj(libbuf, om.base, om.length, om.name.ptr);
423             // Round the size of the file up to the next page size
424             // by filling with 0s
425             uint n = (g_page_size - 1) & libbuf.length;
426             if (n)
427                 libbuf.fill0(g_page_size - n);
428         }
429         // File offset of start of dictionary
430         uint offset = cast(uint)libbuf.length;
431         // Write dictionary header, then round it to a BUCKETPAGE boundary
432         ushort size = (BUCKETPAGE - (cast(short)offset + 3)) & (BUCKETPAGE - 1);
433         libbuf.writeByte(0xF1);
434         libbuf.writeword(size);
435         libbuf.fill0(size);
436         // Create dictionary
437         ubyte* bucketsP = null;
438         ushort ndicpages;
439         ushort padding = 32;
440         for (;;)
441         {
442             ndicpages = numDictPages(padding);
443             static if (LOG)
444             {
445                 printf("ndicpages = %d\n", ndicpages);
446             }
447             // Allocate dictionary
448             if (bucketsP)
449                 bucketsP = cast(ubyte*)Mem.check(realloc(bucketsP, ndicpages * BUCKETPAGE));
450             else
451                 bucketsP = cast(ubyte*)Mem.check(malloc(ndicpages * BUCKETPAGE));
452             memset(bucketsP, 0, ndicpages * BUCKETPAGE);
453             for (uint u = 0; u < ndicpages; u++)
454             {
455                 // 'next available' slot
456                 bucketsP[u * BUCKETPAGE + HASHMOD] = (HASHMOD + 1) >> 1;
457             }
458             if (FillDict(bucketsP, ndicpages))
459                 break;
460             padding += 16; // try again with more margins
461         }
462         // Write dictionary
463         libbuf.write(bucketsP[0 .. ndicpages * BUCKETPAGE]);
464         if (bucketsP)
465             free(bucketsP);
466         // Create library header
467         struct Libheader
468         {
469         align(1):
470             ubyte recTyp;
471             ushort recLen;
472             uint trailerPosn;
473             ushort ndicpages;
474             ubyte flags;
475             uint filler;
476         }
477 
478         Libheader libHeader;
479         memset(&libHeader, 0, (Libheader).sizeof);
480         libHeader.recTyp = 0xF0;
481         libHeader.recLen = 0x0D;
482         libHeader.trailerPosn = offset + (3 + size);
483         libHeader.recLen = cast(ushort)(g_page_size - 3);
484         libHeader.ndicpages = ndicpages;
485         libHeader.flags = 1; // always case sensitive
486         // Write library header at start of buffer
487         memcpy(cast(void*)libbuf[].ptr, &libHeader, (libHeader).sizeof);
488     }
489 }
490 
491 /*****************************************************************************/
492 /*****************************************************************************/
493 struct OmfObjModule
494 {
495     ubyte* base; // where are we holding it in memory
496     uint length; // in bytes
497     ushort page; // page module starts in output file
498     const(char)[] name; // module name, with terminating 0
499 }
500 
501 enum HASHMOD = 0x25;
502 enum BUCKETPAGE = 512;
503 enum BUCKETSIZE = (BUCKETPAGE - HASHMOD - 1);
504 
505 /*******************************************
506  * Write a single entry into dictionary.
507  * Returns:
508  *      false   failure
509  */
510 bool EnterDict(ubyte* bucketsP, ushort ndicpages, ubyte* entry, uint entrylen)
511 {
512     ushort uStartIndex;
513     ushort uStep;
514     ushort uStartPage;
515     ushort uPageStep;
516     ushort uIndex;
517     ushort uPage;
518     ushort n;
519     uint u;
520     uint nbytes;
521     ubyte* aP;
522     ubyte* zP;
523     aP = entry;
524     zP = aP + entrylen; // point at last char in identifier
525     uStartPage = 0;
526     uPageStep = 0;
527     uStartIndex = 0;
528     uStep = 0;
529     u = entrylen;
530     while (u--)
531     {
532         uStartPage  = rol!(ushort)(uStartPage, 2)  ^ (*aP   | 0x20);
533         uStep       = ror!(ushort)(uStep, 2)       ^ (*aP++ | 0x20);
534         uStartIndex = ror!(ushort)(uStartIndex, 2) ^ (*zP   | 0x20);
535         uPageStep   = rol!(ushort)(uPageStep, 2)   ^ (*zP-- | 0x20);
536     }
537     uStartPage %= ndicpages;
538     uPageStep %= ndicpages;
539     if (uPageStep == 0)
540         uPageStep++;
541     uStartIndex %= HASHMOD;
542     uStep %= HASHMOD;
543     if (uStep == 0)
544         uStep++;
545     uPage = uStartPage;
546     uIndex = uStartIndex;
547     // number of bytes in entry
548     nbytes = 1 + entrylen + 2;
549     if (entrylen > 255)
550         nbytes += 2;
551     while (1)
552     {
553         aP = &bucketsP[uPage * BUCKETPAGE];
554         uStartIndex = uIndex;
555         while (1)
556         {
557             if (0 == aP[uIndex])
558             {
559                 // n = next available position in this page
560                 n = aP[HASHMOD] << 1;
561                 assert(n > HASHMOD);
562                 // if off end of this page
563                 if (n + nbytes > BUCKETPAGE)
564                 {
565                     aP[HASHMOD] = 0xFF;
566                     break;
567                     // next page
568                 }
569                 else
570                 {
571                     aP[uIndex] = cast(ubyte)(n >> 1);
572                     memcpy((aP + n), entry, nbytes);
573                     aP[HASHMOD] += (nbytes + 1) >> 1;
574                     if (aP[HASHMOD] == 0)
575                         aP[HASHMOD] = 0xFF;
576                     return true;
577                 }
578             }
579             uIndex += uStep;
580             uIndex %= 0x25;
581             /*if (uIndex > 0x25)
582              uIndex -= 0x25;*/
583             if (uIndex == uStartIndex)
584                 break;
585         }
586         uPage += uPageStep;
587         if (uPage >= ndicpages)
588             uPage -= ndicpages;
589         if (uPage == uStartPage)
590             break;
591     }
592     return false;
593 }