1 /**
2  * A library in the ELF format, used on Unix.
3  *
4  * Copyright:   Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/libelf.d, _libelf.d)
8  * Documentation:  https://dlang.org/phobos/dmd_libelf.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/libelf.d
10  */
11 
12 module dmd.libelf;
13 
14 import core.stdc.time;
15 import core.stdc.string;
16 import core.stdc.stdlib;
17 import core.stdc.stdio;
18 version (Posix)
19 {
20     import core.sys.posix.sys.stat;
21     import core.sys.posix.unistd;
22 }
23 version (Windows)
24 {
25     import core.sys.windows.stat;
26 }
27 
28 import dmd.globals;
29 import dmd.lib;
30 import dmd.location;
31 import dmd.utils;
32 
33 import dmd.root.array;
34 import dmd.root.file;
35 import dmd.root.filename;
36 import dmd.common.outbuffer;
37 import dmd.root.port;
38 import dmd.root.rmem;
39 import dmd.root.string;
40 import dmd.root.stringtable;
41 
42 import dmd.scanelf;
43 
44 // Entry point (only public symbol in this module).
45 public extern (C++) Library LibElf_factory()
46 {
47     return new LibElf();
48 }
49 
50 private: // for the remainder of this module
51 
52 enum LOG = false;
53 
54 struct ElfObjSymbol
55 {
56     const(char)[] name;
57     ElfObjModule* om;
58 }
59 
60 alias ElfObjModules = Array!(ElfObjModule*);
61 alias ElfObjSymbols = Array!(ElfObjSymbol*);
62 
63 final class LibElf : Library
64 {
65     ElfObjModules objmodules; // ElfObjModule[]
66     ElfObjSymbols objsymbols; // ElfObjSymbol[]
67     StringTable!(ElfObjSymbol*) tab;
68 
69     extern (D) this()
70     {
71         tab._init(14_000);
72     }
73 
74     /***************************************
75      * Add object module or library to the library.
76      * Examine the buffer to see which it is.
77      * If the buffer is NULL, use module_name as the file name
78      * and load the file.
79      */
80     override void addObject(const(char)[] module_name, const ubyte[] buffer)
81     {
82         static if (LOG)
83         {
84             printf("LibElf::addObject(%.*s)\n",
85                    cast(int)module_name.length, module_name.ptr);
86         }
87 
88         void corrupt(int reason)
89         {
90             error("corrupt ELF object module %.*s %d",
91                   cast(int)module_name.length, module_name.ptr, reason);
92         }
93 
94         int fromfile = 0;
95         auto buf = buffer.ptr;
96         auto buflen = buffer.length;
97         if (!buf)
98         {
99             assert(module_name.length);
100             // read file and take buffer ownership
101             auto data = readFile(Loc.initial, module_name).extractSlice();
102             buf = data.ptr;
103             buflen = data.length;
104             fromfile = 1;
105         }
106         if (buflen < 16)
107         {
108             static if (LOG)
109             {
110                 printf("buf = %p, buflen = %d\n", buf, buflen);
111             }
112             return corrupt(__LINE__);
113         }
114         if (memcmp(buf, "!<arch>\n".ptr, 8) == 0)
115         {
116             /* Library file.
117              * Pull each object module out of the library and add it
118              * to the object module array.
119              */
120             static if (LOG)
121             {
122                 printf("archive, buf = %p, buflen = %d\n", buf, buflen);
123             }
124             uint offset = 8;
125             char* symtab = null;
126             uint symtab_size = 0;
127             char* filenametab = null;
128             uint filenametab_size = 0;
129             uint mstart = cast(uint)objmodules.length;
130             while (offset < buflen)
131             {
132                 if (offset + ElfLibHeader.sizeof >= buflen)
133                     return corrupt(__LINE__);
134                 ElfLibHeader* header = cast(ElfLibHeader*)(cast(ubyte*)buf + offset);
135                 offset += ElfLibHeader.sizeof;
136                 char* endptr = null;
137                 uint size = cast(uint)strtoul(header.file_size.ptr, &endptr, 10);
138                 if (endptr >= header.file_size.ptr + 10 || *endptr != ' ')
139                     return corrupt(__LINE__);
140                 if (offset + size > buflen)
141                     return corrupt(__LINE__);
142                 if (header.object_name[0] == '/' && header.object_name[1] == ' ')
143                 {
144                     /* Instead of rescanning the object modules we pull from a
145                      * library, just use the already created symbol table.
146                      */
147                     if (symtab)
148                         return corrupt(__LINE__);
149                     symtab = cast(char*)buf + offset;
150                     symtab_size = size;
151                     if (size < 4)
152                         return corrupt(__LINE__);
153                 }
154                 else if (header.object_name[0] == '/' && header.object_name[1] == '/')
155                 {
156                     /* This is the file name table, save it for later.
157                      */
158                     if (filenametab)
159                         return corrupt(__LINE__);
160                     filenametab = cast(char*)buf + offset;
161                     filenametab_size = size;
162                 }
163                 else
164                 {
165                     auto om = new ElfObjModule();
166                     om.base = cast(ubyte*)buf + offset; /*- sizeof(ElfLibHeader)*/
167                     om.length = size;
168                     om.offset = 0;
169                     if (header.object_name[0] == '/')
170                     {
171                         /* Pick long name out of file name table
172                          */
173                         uint foff = cast(uint)strtoul(header.object_name.ptr + 1, &endptr, 10);
174                         uint i;
175                         for (i = 0; 1; i++)
176                         {
177                             if (foff + i >= filenametab_size)
178                                 return corrupt(__LINE__);
179                             char c = filenametab[foff + i];
180                             if (c == '/')
181                                 break;
182                         }
183                         auto n = cast(char*)Mem.check(malloc(i + 1));
184                         memcpy(n, filenametab + foff, i);
185                         n[i] = 0;
186                         om.name = n[0 .. i];
187                     }
188                     else
189                     {
190                         /* Pick short name out of header
191                          */
192                         auto n = cast(char*)Mem.check(malloc(ELF_OBJECT_NAME_SIZE));
193                         for (int i = 0; 1; i++)
194                         {
195                             if (i == ELF_OBJECT_NAME_SIZE)
196                                 return corrupt(__LINE__);
197                             char c = header.object_name[i];
198                             if (c == '/')
199                             {
200                                 n[i] = 0;
201                                 om.name = n[0 .. i];
202                                 break;
203                             }
204                             n[i] = c;
205                         }
206                     }
207                     om.name_offset = -1;
208                     om.file_time = strtoul(header.file_time.ptr, &endptr, 10);
209                     om.user_id = cast(uint)strtoul(header.user_id.ptr, &endptr, 10);
210                     om.group_id = cast(uint)strtoul(header.group_id.ptr, &endptr, 10);
211                     om.file_mode = cast(uint)strtoul(header.file_mode.ptr, &endptr, 8);
212                     om.scan = 0; // don't scan object module for symbols
213                     objmodules.push(om);
214                 }
215                 offset += (size + 1) & ~1;
216             }
217             if (offset != buflen)
218                 return corrupt(__LINE__);
219             /* Scan the library's symbol table, and insert it into our own.
220              * We use this instead of rescanning the object module, because
221              * the library's creator may have a different idea of what symbols
222              * go into the symbol table than we do.
223              * This is also probably faster.
224              */
225             uint nsymbols = Port.readlongBE(symtab);
226             char* s = symtab + 4 + nsymbols * 4;
227             if (4 + nsymbols * (4 + 1) > symtab_size)
228                 return corrupt(__LINE__);
229             for (uint i = 0; i < nsymbols; i++)
230             {
231                 const(char)[] name = s.toDString();
232                 s += name.length + 1;
233                 if (s - symtab > symtab_size)
234                     return corrupt(__LINE__);
235                 uint moff = Port.readlongBE(symtab + 4 + i * 4);
236                 //printf("symtab[%d] moff = %x  %x, name = %s\n", i, moff, moff + ElfLibHeader.sizeof, name.ptr);
237                 for (uint m = mstart; 1; m++)
238                 {
239                     if (m == objmodules.length)
240                         return corrupt(__LINE__);  // didn't find it
241                     ElfObjModule* om = objmodules[m];
242                     //printf("\t%x\n", cast(char *)om.base - cast(char *)buf);
243                     if (moff + ElfLibHeader.sizeof == cast(char*)om.base - cast(char*)buf)
244                     {
245                         addSymbol(om, name, 1);
246                         //if (mstart == m)
247                         //    mstart++;
248                         break;
249                     }
250                 }
251             }
252             return;
253         }
254         /* It's an object module
255          */
256         auto om = new ElfObjModule();
257         om.base = cast(ubyte*)buf;
258         om.length = cast(uint)buflen;
259         om.offset = 0;
260         // remove path, but not extension
261         om.name = FileName.name(module_name);
262         om.name_offset = -1;
263         om.scan = 1;
264         if (fromfile)
265         {
266             version (Posix)
267                 stat_t statbuf;
268             version (Windows)
269                 struct_stat statbuf;
270             int i = module_name.toCStringThen!(name => stat(name.ptr, &statbuf));
271             if (i == -1) // error, errno is set
272                 return corrupt(__LINE__);
273             om.file_time = statbuf.st_ctime;
274             om.user_id = statbuf.st_uid;
275             om.group_id = statbuf.st_gid;
276             om.file_mode = statbuf.st_mode;
277         }
278         else
279         {
280             /* Mock things up for the object module file that never was
281              * actually written out.
282              */
283             version (Posix)
284             {
285                 __gshared uid_t uid;
286                 __gshared gid_t gid;
287                 __gshared int _init;
288                 if (!_init)
289                 {
290                     _init = 1;
291                     uid = getuid();
292                     gid = getgid();
293                 }
294                 om.user_id = uid;
295                 om.group_id = gid;
296             }
297             version (Windows)
298             {
299                 om.user_id = 0;  // meaningless on Windows
300                 om.group_id = 0; // meaningless on Windows
301             }
302             time_t file_time = 0;
303             time(&file_time);
304             om.file_time = cast(long)file_time;
305             om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
306         }
307         objmodules.push(om);
308     }
309 
310     /*****************************************************************************/
311 
312     void addSymbol(ElfObjModule* om, const(char)[] name, int pickAny = 0)
313     {
314         static if (LOG)
315         {
316             printf("LibElf::addSymbol(%s, %s, %d)\n", om.name.ptr, name.ptr, pickAny);
317         }
318         auto s = tab.insert(name.ptr, name.length, null);
319         if (!s)
320         {
321             // already in table
322             if (!pickAny)
323             {
324                 s = tab.lookup(name.ptr, name.length);
325                 assert(s);
326                 ElfObjSymbol* os = s.value;
327                 error("multiple definition of %s: %s and %s: %s", om.name.ptr, name.ptr, os.om.name.ptr, os.name.ptr);
328             }
329         }
330         else
331         {
332             auto os = new ElfObjSymbol();
333             os.name = xarraydup(name);
334             os.om = om;
335             s.value = os;
336             objsymbols.push(os);
337         }
338     }
339 
340 private:
341     /************************************
342      * Scan single object module for dictionary symbols.
343      * Send those symbols to LibElf::addSymbol().
344      */
345     void scanObjModule(ElfObjModule* om)
346     {
347         static if (LOG)
348         {
349             printf("LibElf::scanObjModule(%s)\n", om.name.ptr);
350         }
351 
352         extern (D) void addSymbol(const(char)[] name, int pickAny)
353         {
354             this.addSymbol(om, name, pickAny);
355         }
356 
357         scanElfObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc);
358     }
359 
360     /*****************************************************************************/
361     /*****************************************************************************/
362     /**********************************************
363      * Create and write library to libbuf.
364      * The library consists of:
365      *      !<arch>\n
366      *      header
367      *      dictionary
368      *      object modules...
369      */
370     protected override void WriteLibToBuffer(OutBuffer* libbuf)
371     {
372         static if (LOG)
373         {
374             printf("LibElf::WriteLibToBuffer()\n");
375         }
376         /************* Scan Object Modules for Symbols ******************/
377         foreach (om; objmodules)
378         {
379             if (om.scan)
380             {
381                 scanObjModule(om);
382             }
383         }
384         /************* Determine string section ******************/
385         /* The string section is where we store long file names.
386          */
387         uint noffset = 0;
388         foreach (om; objmodules)
389         {
390             size_t len = om.name.length;
391             if (len >= ELF_OBJECT_NAME_SIZE)
392             {
393                 om.name_offset = noffset;
394                 noffset += len + 2;
395             }
396             else
397                 om.name_offset = -1;
398         }
399         static if (LOG)
400         {
401             printf("\tnoffset = x%x\n", noffset);
402         }
403         /************* Determine module offsets ******************/
404         uint moffset = 8 + ElfLibHeader.sizeof + 4;
405         foreach (os; objsymbols)
406         {
407             moffset += 4 + os.name.length + 1;
408         }
409         uint hoffset = moffset;
410         static if (LOG)
411         {
412             printf("\tmoffset = x%x\n", moffset);
413         }
414         moffset += moffset & 1;
415         if (noffset)
416             moffset += ElfLibHeader.sizeof + noffset;
417         foreach (om; objmodules)
418         {
419             moffset += moffset & 1;
420             om.offset = moffset;
421             moffset += ElfLibHeader.sizeof + om.length;
422         }
423         libbuf.reserve(moffset);
424         /************* Write the library ******************/
425         libbuf.write("!<arch>\n");
426         ElfObjModule om;
427         om.name_offset = -1;
428         om.base = null;
429         om.length = cast(uint)(hoffset - (8 + ElfLibHeader.sizeof));
430         om.offset = 8;
431         om.name = "";
432         .time(&om.file_time);
433         om.user_id = 0;
434         om.group_id = 0;
435         om.file_mode = 0;
436         ElfLibHeader h;
437         ElfOmToHeader(&h, &om);
438         libbuf.write((&h)[0 .. 1]);
439         char[4] buf;
440         Port.writelongBE(cast(uint)objsymbols.length, buf.ptr);
441         libbuf.write(buf[0 .. 4]);
442         foreach (os; objsymbols)
443         {
444             Port.writelongBE(os.om.offset, buf.ptr);
445             libbuf.write(buf[0 .. 4]);
446         }
447         foreach (os; objsymbols)
448         {
449             libbuf.writestring(os.name);
450             libbuf.writeByte(0);
451         }
452         static if (LOG)
453         {
454             printf("\tlibbuf.moffset = x%x\n", libbuf.length);
455         }
456         /* Write out the string section
457          */
458         if (noffset)
459         {
460             if (libbuf.length & 1)
461                 libbuf.writeByte('\n');
462             // header
463             memset(&h, ' ', ElfLibHeader.sizeof);
464             h.object_name[0] = '/';
465             h.object_name[1] = '/';
466             size_t len = snprintf(h.file_size.ptr, ELF_FILE_SIZE_SIZE, "%u", noffset);
467             assert(len < 10);
468             h.file_size[len] = ' ';
469             h.trailer[0] = '`';
470             h.trailer[1] = '\n';
471             libbuf.write((&h)[0 .. 1]);
472             foreach (om2; objmodules)
473             {
474                 if (om2.name_offset >= 0)
475                 {
476                     libbuf.writestring(om2.name);
477                     libbuf.writeByte('/');
478                     libbuf.writeByte('\n');
479                 }
480             }
481         }
482         /* Write out each of the object modules
483          */
484         foreach (om2; objmodules)
485         {
486             if (libbuf.length & 1)
487                 libbuf.writeByte('\n'); // module alignment
488             assert(libbuf.length == om2.offset);
489             ElfOmToHeader(&h, om2);
490             libbuf.write((&h)[0 .. 1]); // module header
491             libbuf.write(om2.base[0 .. om2.length]); // module contents
492         }
493         static if (LOG)
494         {
495             printf("moffset = x%x, libbuf.length = x%x\n", moffset, libbuf.length);
496         }
497         assert(libbuf.length == moffset);
498     }
499 }
500 
501 /*****************************************************************************/
502 /*****************************************************************************/
503 struct ElfObjModule
504 {
505     ubyte* base; // where are we holding it in memory
506     uint length; // in bytes
507     uint offset; // offset from start of library
508     const(char)[] name; // module name (file name) with terminating 0
509     int name_offset; // if not -1, offset into string table of name
510     time_t file_time; // file time
511     uint user_id;
512     uint group_id;
513     uint file_mode;
514     int scan; // 1 means scan for symbols
515 }
516 
517 enum ELF_OBJECT_NAME_SIZE = 16;
518 enum ELF_FILE_TIME_SIZE = 12;
519 enum ELF_USER_ID_SIZE = 6;
520 enum ELF_GROUP_ID_SIZE = 6;
521 enum ELF_FILE_MODE_SIZE = 8;
522 enum ELF_FILE_SIZE_SIZE = 10;
523 enum ELF_TRAILER_SIZE = 2;
524 
525 struct ElfLibHeader
526 {
527     char[ELF_OBJECT_NAME_SIZE] object_name;
528     char[ELF_FILE_TIME_SIZE] file_time;
529     char[ELF_USER_ID_SIZE] user_id;
530     char[ELF_GROUP_ID_SIZE] group_id;
531     char[ELF_FILE_MODE_SIZE] file_mode; // in octal
532     char[ELF_FILE_SIZE_SIZE] file_size;
533     char[ELF_TRAILER_SIZE] trailer;
534 }
535 
536 extern (C++) void ElfOmToHeader(ElfLibHeader* h, ElfObjModule* om)
537 {
538     char* buffer = cast(char*)h;
539     // user_id and group_id are padded on 6 characters in Header struct.
540     // Squashing to 0 if more than 999999.
541     if (om.user_id > 999_999)
542         om.user_id = 0;
543     if (om.group_id > 999_999)
544         om.group_id = 0;
545     size_t len;
546     if (om.name_offset == -1)
547     {
548         // "name/           1423563789  5000  5000  100640  3068      `\n"
549         //  |^^^^^^^^^^^^^^^|^^^^^^^^^^^|^^^^^|^^^^^|^^^^^^^|^^^^^^^^^|^^
550         //        name       file_time   u_id gr_id  fmode    fsize   trailer
551         len = snprintf(buffer, ElfLibHeader.sizeof, "%-16s%-12llu%-6u%-6u%-8o%-10u`", om.name.ptr, cast(long)om.file_time, om.user_id, om.group_id, om.file_mode, om.length);
552         // adding '/' after the name field
553         const(size_t) name_length = om.name.length;
554         assert(name_length < ELF_OBJECT_NAME_SIZE);
555         buffer[name_length] = '/';
556     }
557     else
558     {
559         // "/162007         1423563789  5000  5000  100640  3068      `\n"
560         //  |^^^^^^^^^^^^^^^|^^^^^^^^^^^|^^^^^|^^^^^|^^^^^^^|^^^^^^^^^|^^
561         //     name_offset   file_time   u_id gr_id  fmode    fsize   trailer
562         len = snprintf(buffer, ElfLibHeader.sizeof, "/%-15d%-12llu%-6u%-6u%-8o%-10u`", om.name_offset, cast(long)om.file_time, om.user_id, om.group_id, om.file_mode, om.length);
563     }
564     assert(ElfLibHeader.sizeof > 0 && len == ElfLibHeader.sizeof - 1);
565     // replace trailing \0 with \n
566     buffer[len] = '\n';
567 }