1 /**
2  * This module provides OS specific helper function for DLL support
3  *
4  * Copyright: Copyright Digital Mars 2010 - 2012.
5  * License: Distributed under the
6  *      $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0).
7  *    (See accompanying file LICENSE)
8  * Authors:   Rainer Schuetze
9  * Source: $(DRUNTIMESRC core/sys/windows/_dll.d)
10  */
11 
12 module core.sys.windows.dll;
13 version (Windows):
14 
15 import core.sys.windows.winbase;
16 import core.sys.windows.winnt;
17 import core.stdc.string;
18 import core.runtime;
19 
20 public import core.sys.windows.threadaux;
21 
22 ///////////////////////////////////////////////////////////////////
23 // support fixing implicit TLS for dynamically loaded DLLs on Windows XP
24 
25 // in this special case, we have to treat _tlsstart and _tlsend as non-TLS variables
26 //  as they are used to simulate TLS when it is not set up under XP. In this case we must
27 //  not access tls_array[tls_index] as needed for thread local _tlsstart and _tlsend
28 extern (C)
29 {
30     version (Win32)
31     {
32         version (CRuntime_DigitalMars)
33         {
34             extern __gshared byte  _tlsstart;
35             extern __gshared byte  _tlsend;
36             extern __gshared void* _tls_callbacks_a;
37         }
38         else version (CRuntime_Microsoft)
39         {
40             extern __gshared byte  _tls_start;
41             extern __gshared byte  _tls_end;
42             extern __gshared void*  __xl_a;
43             alias _tls_start _tlsstart;
44             alias _tls_end   _tlsend;
45             alias __xl_a     _tls_callbacks_a;
46         }
47         extern __gshared int   _tls_index;
48     }
49 }
50 
51 extern (C) // rt.minfo
52 {
53     void rt_moduleTlsCtor();
54     void rt_moduleTlsDtor();
55 }
56 
57 private:
58 struct dll_aux
59 {
60     // don't let symbols leak into other modules
61 version (Win32)
62 {
63     struct LdrpTlsListEntry
64     {
65         LdrpTlsListEntry* next;
66         LdrpTlsListEntry* prev;
67         void* tlsstart;
68         void* tlsend;
69         void* ptr_tlsindex;
70         void* callbacks;
71         void* zerofill;
72         int   tlsindex;
73     }
74 
75     alias fnRtlAllocateHeap = extern(Windows)
76     void* function(void* HeapHandle, uint Flags, size_t Size) nothrow;
77 
78     // find a code sequence and return the address after the sequence
79     static void* findCodeSequence( void* adr, int len, ref ubyte[] pattern ) nothrow
80     {
81         if ( !adr )
82             return null;
83 
84         ubyte* code = cast(ubyte*) adr;
85         for ( int p = 0; p < len; p++ )
86         {
87             if ( code[ p .. p + pattern.length ] == pattern[ 0 .. $ ] )
88             {
89                 ubyte* padr = code + p + pattern.length;
90                 return padr;
91             }
92         }
93         return null;
94     }
95 
96     // find a code sequence and return the (relative) address that follows
97     static void* findCodeReference( void* adr, int len, ref ubyte[] pattern, bool relative ) nothrow
98     {
99         if ( !adr )
100             return null;
101 
102         ubyte* padr = cast(ubyte*) findCodeSequence( adr, len, pattern );
103         if ( padr )
104         {
105             if ( relative )
106                 return padr + 4 + *cast(int*) padr;
107             return *cast(void**) padr;
108         }
109         return null;
110     }
111 
112     // crawl through ntdll to find function _LdrpAllocateTls@0 and references
113     //  to _LdrpNumberOfTlsEntries, _NtdllBaseTag and _LdrpTlsList
114     // LdrInitializeThunk
115     // -> _LdrpInitialize@12
116     // -> _LdrpInitializeThread@4
117     // -> _LdrpAllocateTls@0
118     // -> je chunk
119     //     _LdrpNumberOfTlsEntries - number of entries in TlsList
120     //     _NtdllBaseTag           - tag used for RtlAllocateHeap
121     //     _LdrpTlsList            - root of the double linked list with TlsList entries
122 
123     static __gshared int* pNtdllBaseTag; // remembered for reusage in addTlsData
124 
125     static __gshared ubyte[] jmp_LdrpInitialize = [ 0x33, 0xED, 0xE9 ]; // xor ebp,ebp; jmp _LdrpInitialize
126     static __gshared ubyte[] jmp__LdrpInitialize = [ 0x5D, 0xE9 ]; // pop ebp; jmp __LdrpInitialize
127     static __gshared ubyte[] jmp__LdrpInitialize_xp64 = [ 0x5D, 0x90, 0x90, 0x90, 0x90, 0x90 ]; // pop ebp; nop; nop; nop; nop; nop;
128     static __gshared ubyte[] call_LdrpInitializeThread = [ 0xFF, 0x75, 0x08, 0xE8 ]; // push [ebp+8]; call _LdrpInitializeThread
129     static __gshared ubyte[] call_LdrpAllocateTls = [ 0x00, 0x00, 0xE8 ]; // jne 0xc3; call _LdrpAllocateTls
130     static __gshared ubyte[] call_LdrpAllocateTls_svr03 = [ 0x65, 0xfc, 0x00, 0xE8 ]; // and [ebp+fc], 0; call _LdrpAllocateTls
131     static __gshared ubyte[] jne_LdrpAllocateTls = [ 0x0f, 0x85 ]; // jne body_LdrpAllocateTls
132     static __gshared ubyte[] mov_LdrpNumberOfTlsEntries = [ 0x8B, 0x0D ]; // mov ecx, _LdrpNumberOfTlsEntries
133     static __gshared ubyte[] mov_NtdllBaseTag = [ 0x51, 0x8B, 0x0D ]; // push ecx; mov ecx, _NtdllBaseTag
134     static __gshared ubyte[] mov_NtdllBaseTag_srv03 = [ 0x50, 0xA1 ]; // push eax; mov eax, _NtdllBaseTag
135     static __gshared ubyte[] mov_LdrpTlsList = [ 0x8B, 0x3D ]; // mov edi, _LdrpTlsList
136 
137     static LdrpTlsListEntry* addTlsListEntry( void** peb, void* tlsstart, void* tlsend, void* tls_callbacks_a, int* tlsindex ) nothrow
138     {
139         HANDLE hnd = GetModuleHandleA( "NTDLL" );
140         assert( hnd, "cannot get module handle for ntdll" );
141         ubyte* fn = cast(ubyte*) GetProcAddress( hnd, "LdrInitializeThunk" );
142         assert( fn, "cannot find LdrInitializeThunk in ntdll" );
143 
144         void* pLdrpInitialize = findCodeReference( fn, 20, jmp_LdrpInitialize, true );
145         void* p_LdrpInitialize = findCodeReference( pLdrpInitialize, 40, jmp__LdrpInitialize, true );
146         if ( !p_LdrpInitialize )
147             p_LdrpInitialize = findCodeSequence( pLdrpInitialize, 40, jmp__LdrpInitialize_xp64 );
148         void* pLdrpInitializeThread = findCodeReference( p_LdrpInitialize, 200, call_LdrpInitializeThread, true );
149         void* pLdrpAllocateTls = findCodeReference( pLdrpInitializeThread, 40, call_LdrpAllocateTls, true );
150         if (!pLdrpAllocateTls)
151             pLdrpAllocateTls = findCodeReference( pLdrpInitializeThread, 100, call_LdrpAllocateTls_svr03, true );
152         void* pBodyAllocateTls = findCodeReference( pLdrpAllocateTls, 40, jne_LdrpAllocateTls, true );
153 
154         int* pLdrpNumberOfTlsEntries = cast(int*) findCodeReference( pBodyAllocateTls, 60, mov_LdrpNumberOfTlsEntries, false );
155         pNtdllBaseTag = cast(int*) findCodeReference( pBodyAllocateTls, 30, mov_NtdllBaseTag, false );
156         if (!pNtdllBaseTag)
157             pNtdllBaseTag = cast(int*) findCodeReference( pBodyAllocateTls, 30, mov_NtdllBaseTag_srv03, false );
158         LdrpTlsListEntry* pLdrpTlsList = cast(LdrpTlsListEntry*)findCodeReference( pBodyAllocateTls, 80, mov_LdrpTlsList, false );
159 
160         if ( !pLdrpNumberOfTlsEntries || !pNtdllBaseTag || !pLdrpTlsList )
161             return null;
162 
163         fnRtlAllocateHeap fnAlloc = cast(fnRtlAllocateHeap) GetProcAddress( hnd, "RtlAllocateHeap" );
164         if ( !fnAlloc )
165             return null;
166 
167         // allocate new TlsList entry (adding 0xC0000 to the tag is obviously a flag also usesd by
168         //  the nt-loader, could be the result of HEAP_MAKE_TAG_FLAGS(0,HEAP_NO_SERIALIZE|HEAP_GROWABLE)
169         //  but this is not documented in the msdn entry for RtlAlloateHeap
170         void* heap = peb[6];
171         LdrpTlsListEntry* entry = cast(LdrpTlsListEntry*) (*fnAlloc)( heap, *pNtdllBaseTag | 0xc0000, LdrpTlsListEntry.sizeof );
172         if ( !entry )
173             return null;
174 
175         // fill entry
176         entry.tlsstart = tlsstart;
177         entry.tlsend = tlsend;
178         entry.ptr_tlsindex = tlsindex;
179         entry.callbacks = tls_callbacks_a;
180         entry.zerofill = null;
181         entry.tlsindex = *pLdrpNumberOfTlsEntries;
182 
183         // and add it to the end of TlsList
184         *tlsindex = *pLdrpNumberOfTlsEntries;
185         entry.next = pLdrpTlsList;
186         entry.prev = pLdrpTlsList.prev;
187         pLdrpTlsList.prev.next = entry;
188         pLdrpTlsList.prev = entry;
189         (*pLdrpNumberOfTlsEntries)++;
190 
191         return entry;
192     }
193 
194     // reallocate TLS array and create a copy of the TLS data section
195     static bool addTlsData( void** teb, void* tlsstart, void* tlsend, int tlsindex ) nothrow
196     {
197         HANDLE hnd = GetModuleHandleA( "NTDLL" );
198         assert( hnd, "cannot get module handle for ntdll" );
199 
200         fnRtlAllocateHeap fnAlloc = cast(fnRtlAllocateHeap) GetProcAddress( hnd, "RtlAllocateHeap" );
201         if ( !fnAlloc || !pNtdllBaseTag )
202             return false;
203 
204         void** peb = cast(void**) teb[12];
205         void* heap = peb[6];
206 
207         auto sz = tlsend - tlsstart;
208         void* tlsdata = cast(void*) (*fnAlloc)( heap, *pNtdllBaseTag | 0xc0000, sz );
209         if ( !tlsdata )
210             return false;
211 
212         // no relocations! not even self-relocations. Windows does not do them.
213         core.stdc..string.memcpy( tlsdata, tlsstart, sz );
214 
215         // create copy of tls pointer array
216         void** array = cast(void**) (*fnAlloc)( heap, *pNtdllBaseTag | 0xc0000, (tlsindex + 1) * (void*).sizeof );
217         if ( !array )
218             return false;
219 
220         if ( tlsindex > 0 && teb[11] )
221             core.stdc..string.memcpy( array, teb[11], tlsindex * (void*).sizeof);
222         array[tlsindex] = tlsdata;
223         teb[11] = cast(void*) array;
224 
225         // let the old array leak, in case a oncurrent thread is still relying on it
226         return true;
227     }
228 } // Win32
229 
230     alias bool BOOLEAN;
231 
232     struct UNICODE_STRING
233     {
234         short Length;
235         short MaximumLength;
236         wchar* Buffer;
237     }
238 
239     struct LIST_ENTRY
240     {
241         LIST_ENTRY* next;
242         LIST_ENTRY* prev;
243     }
244 
245     // the following structures can be found here:
246     // https://www.geoffchappell.com/studies/windows/win32/ntdll/structs/ldr_data_table_entry.htm
247     // perhaps this should be same as LDR_DATA_TABLE_ENTRY, which is introduced with PEB_LDR_DATA
248     struct LDR_MODULE
249     {
250         LIST_ENTRY      InLoadOrderModuleList;
251         LIST_ENTRY      InMemoryOrderModuleList;
252         LIST_ENTRY      InInitializationOrderModuleList;
253         PVOID           BaseAddress;
254         PVOID           EntryPoint;
255         SIZE_T          SizeOfImage;
256         UNICODE_STRING  FullDllName;
257         UNICODE_STRING  BaseDllName;
258         ULONG           Flags;
259         SHORT           LoadCount; // obsolete after Version 6.1
260         SHORT           TlsIndex;
261         LIST_ENTRY      HashTableEntry;
262         ULONG           TimeDateStamp;
263         PVOID           EntryPointActivationContext;
264         PVOID           PatchInformation;
265         LDR_DDAG_NODE  *DdagNode; // starting with Version 6.2
266     }
267 
268     struct LDR_DDAG_NODE
269     {
270         LIST_ENTRY Modules;
271         void* ServiceTagList;  // LDR_SERVICE_TAG_RECORD
272         ULONG LoadCount;
273         ULONG ReferenceCount;  // Version 10: ULONG LoadWhileUnloadingCount;
274         ULONG DependencyCount; // Version 10: ULONG LowestLink;
275     }
276 
277     struct PEB_LDR_DATA
278     {
279         ULONG           Length;
280         BOOLEAN         Initialized;
281         PVOID           SsHandle;
282         LIST_ENTRY      InLoadOrderModuleList;
283         LIST_ENTRY      InMemoryOrderModuleList;
284         LIST_ENTRY      InInitializationOrderModuleList;
285     }
286 
287     static LDR_MODULE* findLdrModule( HINSTANCE hInstance, void** peb ) nothrow @nogc
288     {
289         PEB_LDR_DATA* ldrData = cast(PEB_LDR_DATA*) peb[3];
290         LIST_ENTRY* root = &ldrData.InLoadOrderModuleList;
291         for (LIST_ENTRY* entry = root.next; entry != root; entry = entry.next)
292         {
293             LDR_MODULE *ldrMod = cast(LDR_MODULE*) entry;
294             if (ldrMod.BaseAddress == hInstance)
295                 return ldrMod;
296         }
297         return null;
298     }
299 
300     static bool setDllTlsUsage( HINSTANCE hInstance, void** peb ) nothrow
301     {
302         LDR_MODULE *thisMod = findLdrModule( hInstance, peb );
303         if ( !thisMod )
304             return false;
305 
306         thisMod.TlsIndex = -1;  // uses TLS (not the index itself)
307         thisMod.LoadCount = -1; // never unload
308         return true;
309     }
310 }
311 
312 public:
313 /* *****************************************************
314  * Fix implicit thread local storage for the case when a DLL is loaded
315  * dynamically after process initialization.
316  * The link time variables are passed to allow placing this function into
317  * an RTL DLL itself.
318  * The problem is described in Bugzilla 3342 and
319  * http://www.nynaeve.net/?p=187, to quote from the latter:
320  *
321  * "When a DLL using implicit TLS is loaded, because the loader doesn't process the TLS
322  *  directory, the _tls_index value is not initialized by the loader, nor is there space
323  *  allocated for module's TLS data in the ThreadLocalStoragePointer arrays of running
324  *  threads. The DLL continues to load, however, and things will appear to work... until the
325  *  first access to a __declspec(thread) variable occurs, that is."
326  *
327  * _tls_index is initialized by the compiler to 0, so we can use this as a test.
328  */
329 bool dll_fixTLS( HINSTANCE hInstance, void* tlsstart, void* tlsend, void* tls_callbacks_a, int* tlsindex ) nothrow
330 {
331     version (GNU_EMUTLS)
332         return true;
333     else version (Win64)
334         return true;                // fixed
335     else version (Win32)
336     {
337     /* If the OS has allocated a TLS slot for us, we don't have to do anything
338      * tls_index 0 means: the OS has not done anything, or it has allocated slot 0
339      * Vista and later Windows systems should do this correctly and not need
340      * this function.
341      */
342     if ( *tlsindex != 0 )
343         return true;
344 
345     void** peb;
346     asm pure nothrow @nogc
347     {
348         mov EAX,FS:[0x30];
349         mov peb, EAX;
350     }
351     dll_aux.LDR_MODULE *ldrMod = dll_aux.findLdrModule( hInstance, peb );
352     if ( !ldrMod )
353         return false; // not in module list, bail out
354     if ( ldrMod.TlsIndex != 0 )
355         return true;  // the OS has already setup TLS
356 
357     dll_aux.LdrpTlsListEntry* entry = dll_aux.addTlsListEntry( peb, tlsstart, tlsend, tls_callbacks_a, tlsindex );
358     if ( !entry )
359         return false;
360 
361     scope (failure) assert(0); // enforce nothrow, Bugzilla 13561
362 
363     if ( !enumProcessThreads(
364         function (uint id, void* context) nothrow {
365             dll_aux.LdrpTlsListEntry* entry = cast(dll_aux.LdrpTlsListEntry*) context;
366             return dll_aux.addTlsData( getTEB( id ), entry.tlsstart, entry.tlsend, entry.tlsindex );
367         }, entry ) )
368         return false;
369 
370     ldrMod.TlsIndex = -1;  // flag TLS usage (not the index itself)
371     ldrMod.LoadCount = -1; // prevent unloading of the DLL,
372                            // since XP does not keep track of used TLS entries
373     return true;
374     }
375 }
376 
377 private extern (Windows) ULONGLONG VerSetConditionMask(ULONGLONG, DWORD, BYTE) nothrow @nogc;
378 
379 private bool isWindows8OrLater() nothrow @nogc
380 {
381     OSVERSIONINFOEXW osvi;
382     osvi.dwOSVersionInfoSize = osvi.sizeof;
383     DWORDLONG dwlConditionMask = VerSetConditionMask(
384         VerSetConditionMask(
385         VerSetConditionMask(
386             0, VER_MAJORVERSION, VER_GREATER_EQUAL),
387                VER_MINORVERSION, VER_GREATER_EQUAL),
388                VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL);
389 
390     osvi.dwMajorVersion = 6;
391     osvi.dwMinorVersion = 2;
392     osvi.wServicePackMajor = 0;
393 
394     return VerifyVersionInfoW(&osvi, VER_MAJORVERSION | VER_MINORVERSION | VER_SERVICEPACKMAJOR, dwlConditionMask) != FALSE;
395 }
396 
397 /* *****************************************************
398  * Get the process reference count for the given DLL handle
399  * Params:
400  *   hInstance = DLL instance handle
401  * Returns:
402  *   the reference count for the DLL in the current process,
403  *   -1 if the DLL is implicitely loaded with the process
404  *   or -2 if the DLL handle is invalid
405  */
406 int dll_getRefCount( HINSTANCE hInstance ) nothrow @nogc
407 {
408     void** peb;
409     version (Win64)
410     {
411         asm pure nothrow @nogc
412         {
413             mov RAX, 0x60;
414             mov RAX,GS:[RAX];
415             mov peb, RAX;
416         }
417     }
418     else version (Win32)
419     {
420         asm pure nothrow @nogc
421         {
422             mov EAX,FS:[0x30];
423             mov peb, EAX;
424         }
425     }
426     dll_aux.LDR_MODULE *ldrMod = dll_aux.findLdrModule( hInstance, peb );
427     if ( !ldrMod )
428         return -2; // not in module list, bail out
429     if (isWindows8OrLater())
430         return ldrMod.DdagNode.LoadCount;
431     return ldrMod.LoadCount;
432 }
433 
434 // fixup TLS storage, initialize runtime and attach to threads
435 // to be called from DllMain with reason DLL_PROCESS_ATTACH
436 bool dll_process_attach( HINSTANCE hInstance, bool attach_threads,
437                          void* tlsstart, void* tlsend, void* tls_callbacks_a, int* tlsindex )
438 {
439     version (Win32)
440     {
441         if ( !dll_fixTLS( hInstance, tlsstart, tlsend, tls_callbacks_a, tlsindex ) )
442             return false;
443     }
444 
445     Runtime.initialize();
446 
447     if ( !attach_threads )
448         return true;
449 
450     // attach to all other threads
451     return enumProcessThreads(
452         function (uint id, void* context) {
453             if ( !thread_findByAddr( id ) && !findLowLevelThread( id ) )
454             {
455                 // if the OS has not prepared TLS for us, don't attach to the thread
456                 if ( GetTlsDataAddress( id ) )
457                 {
458                     thread_attachByAddr( id );
459                     thread_moduleTlsCtor( id );
460                 }
461             }
462             return true;
463         }, null );
464 }
465 
466 // same as above, but only usable if druntime is linked statically
467 bool dll_process_attach( HINSTANCE hInstance, bool attach_threads = true )
468 {
469     version (Win64)
470     {
471         return dll_process_attach( hInstance, attach_threads,
472                                    null, null, null, null );
473     }
474     else version (Win32)
475     {
476         return dll_process_attach( hInstance, attach_threads,
477                                    &_tlsstart, &_tlsend, &_tls_callbacks_a, &_tls_index );
478     }
479 }
480 
481 // to be called from DllMain with reason DLL_PROCESS_DETACH
482 void dll_process_detach( HINSTANCE hInstance, bool detach_threads = true )
483 {
484     // notify core.thread.joinLowLevelThread that the DLL is about to be unloaded
485     thread_DLLProcessDetaching = true;
486 
487     // detach from all other threads
488     if ( detach_threads )
489         enumProcessThreads(
490             function (uint id, void* context)
491             {
492                 if ( id != GetCurrentThreadId() )
493                 {
494                     if ( auto t = thread_findByAddr( id ) )
495                     {
496                         thread_moduleTlsDtor( id );
497                         if ( !t.isMainThread() )
498                             thread_detachByAddr( id );
499                     }
500                 }
501                 return true;
502             }, null );
503 
504     Runtime.terminate();
505 }
506 
507 /* Make sure that tlsCtorRun is itself a tls variable
508  */
509 static bool tlsCtorRun;
510 static this() { tlsCtorRun = true; }
511 static ~this() { tlsCtorRun = false; }
512 
513 // to be called from DllMain with reason DLL_THREAD_ATTACH
514 bool dll_thread_attach( bool attach_thread = true, bool initTls = true )
515 {
516     // if the OS has not prepared TLS for us, don't attach to the thread
517     //  (happened when running under x64 OS)
518     auto tid = GetCurrentThreadId();
519     if ( !GetTlsDataAddress( tid ) )
520         return false;
521     if ( !thread_findByAddr( tid ) && !findLowLevelThread( tid ) )
522     {
523         // only attach to thread and initalize it if it is not in the thread list (so it's not created by "new Thread")
524         if ( attach_thread )
525             thread_attachThis();
526         if ( initTls && !tlsCtorRun ) // avoid duplicate calls
527             rt_moduleTlsCtor();
528     }
529     return true;
530 }
531 
532 // to be called from DllMain with reason DLL_THREAD_DETACH
533 bool dll_thread_detach( bool detach_thread = true, bool exitTls = true )
534 {
535     // if the OS has not prepared TLS for us, we did not attach to the thread
536     if ( !GetTlsDataAddress( GetCurrentThreadId() ) )
537          return false;
538     if ( thread_findByAddr( GetCurrentThreadId() ) )
539     {
540         if ( exitTls && tlsCtorRun ) // avoid dtors to be run twice
541             rt_moduleTlsDtor();
542         if ( detach_thread )
543             thread_detachThis();
544     }
545     return true;
546 }
547 
548 /// A simple mixin to provide a $(D DllMain) which calls the necessary
549 /// runtime initialization and termination functions automatically.
550 ///
551 /// Instead of writing a custom $(D DllMain), simply write:
552 ///
553 /// ---
554 /// mixin SimpleDllMain;
555 /// ---
556 mixin template SimpleDllMain()
557 {
558     import core.sys.windows.windef : HINSTANCE, BOOL, DWORD, LPVOID;
559 
560     extern(Windows)
561     BOOL DllMain(HINSTANCE hInstance, DWORD ulReason, LPVOID reserved)
562     {
563         import core.sys.windows.winnt;
564         import core.sys.windows.dll :
565             dll_process_attach, dll_process_detach,
566             dll_thread_attach, dll_thread_detach;
567         switch (ulReason)
568         {
569             default: assert(0);
570             case DLL_PROCESS_ATTACH:
571                 return dll_process_attach( hInstance, true );
572 
573             case DLL_PROCESS_DETACH:
574                 dll_process_detach( hInstance, true );
575                 return true;
576 
577             case DLL_THREAD_ATTACH:
578                 return dll_thread_attach( true, true );
579 
580             case DLL_THREAD_DETACH:
581                 return dll_thread_detach( true, true );
582         }
583     }
584 }