From fe52f1eeacc1c29886c25902635b4aa6194bdf54 Mon Sep 17 00:00:00 2001 From: Scott Ehlert Date: Sat, 31 May 2014 03:09:17 -0500 Subject: [PATCH 1/3] Don't bother saving and restoring eax in hamsandwich trampolines. --- dlls/hamsandwich/Trampolines.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/dlls/hamsandwich/Trampolines.h b/dlls/hamsandwich/Trampolines.h index bca62dc4..ee97a6b3 100644 --- a/dlls/hamsandwich/Trampolines.h +++ b/dlls/hamsandwich/Trampolines.h @@ -78,7 +78,6 @@ namespace Trampolines const unsigned char codeVoidPrologue[] = { 0x55, // push ebp 0x89, 0xE5, // mov ebp, esp - 0x50, // push eax }; /** @@ -184,17 +183,15 @@ namespace Trampolines * Epilogue of a void return function */ const unsigned char codeVoidEpilogue[] = { - 0x58, // pop eax 0x5D, // pop ebp 0xC3 // ret }; const unsigned char codeVoidEpilogueN[] = { - 0x58, // pop eax 0x5D, // pop ebp 0xC2, 0xCD, 0xAB // retn 0xABCD }; - const int codeVoidEpilogueNReplace = 3; + const int codeVoidEpilogueNReplace = 2; From 3987085572b846e089ab11c2225f4afae0a0abf8 Mon Sep 17 00:00:00 2001 From: Scott Ehlert Date: Sat, 31 May 2014 04:00:25 -0500 Subject: [PATCH 2/3] Combine prologue and epilogue code for void and non-void functions in hamsandwich trampolines. --- dlls/hamsandwich/Trampolines.h | 189 +++++++-------------------------- 1 file changed, 36 insertions(+), 153 deletions(-) diff --git a/dlls/hamsandwich/Trampolines.h b/dlls/hamsandwich/Trampolines.h index ee97a6b3..8a1d65e1 100644 --- a/dlls/hamsandwich/Trampolines.h +++ b/dlls/hamsandwich/Trampolines.h @@ -72,28 +72,13 @@ namespace Trampolines namespace Bytecode { /** - * Prologue for a void function - * Clobbers EBX and EAX + * Prologue for a function */ - const unsigned char codeVoidPrologue[] = { + const unsigned char codePrologue[] = { 0x55, // push ebp 0x89, 0xE5, // mov ebp, esp }; - /** - * Prologue for a function that returns - * Clobbers EBX, EAX too but not after call - */ - const unsigned char codeReturnPrologue[] = { - 0x55, // push ebp - 0x89, 0xE5, // mov ebp, esp - }; - const unsigned char codeThisReturnPrologue[] = { - 0x55, // push ebp - 0x89, 0xE5, // mov ebp, esp - }; - - /** * Takes a paramter from the trampoline's stack * and pushes it onto the target's stack. @@ -166,34 +151,17 @@ namespace Trampolines const unsigned int codeFreeStackReplace = 2; /** - * Epilogue of a simple return function + * Epilogue of a simple function */ - const unsigned char codeReturnEpilogue[] = { + const unsigned char codeEpilogue[] = { 0x5D, // pop ebp 0xC3 // ret }; - const unsigned char codeReturnEpilogueN[] = { + const unsigned char codeEpilogueN[] = { 0x5D, // pop ebp - 0xC2, 0xCD, 0xAB // retn 0xABCD + 0xC2, 0xCD, 0xAB // retn 0xABCD }; - const int codeReturnEpilogueNReplace = 2; - - - /** - * Epilogue of a void return function - */ - const unsigned char codeVoidEpilogue[] = { - 0x5D, // pop ebp - 0xC3 // ret - }; - - const unsigned char codeVoidEpilogueN[] = { - 0x5D, // pop ebp - 0xC2, 0xCD, 0xAB // retn 0xABCD - }; - const int codeVoidEpilogueNReplace = 2; - - + const int codeEpilogueNReplace = 2; const unsigned char codeBreakpoint[] = { 0xCC // int 3 @@ -269,65 +237,50 @@ namespace Trampolines }; /** - * Adds the "return prologue", pushes registers and prepares stack + * Adds the prologue, pushes registers, prepares the stack */ - void ReturnPrologue() + void Prologue() { - Append(&::Trampolines::Bytecode::codeReturnPrologue[0],sizeof(::Trampolines::Bytecode::codeReturnPrologue)); - m_paramstart=0; - m_thiscall=0; - }; - void ThisReturnPrologue() - { - this->ReturnPrologue(); - m_thiscall=1; - }; - - /** - * Adds the void prologue pushes registers, prepares the stack - */ - void VoidPrologue() - { - Append(&::Trampolines::Bytecode::codeVoidPrologue[0],sizeof(::Trampolines::Bytecode::codeVoidPrologue)); + Append(&::Trampolines::Bytecode::codePrologue[0],sizeof(::Trampolines::Bytecode::codePrologue)); m_paramstart=0; m_thiscall=0; }; /** - * Flags this trampoline as a thiscall trampoline, and prepares the void prologue. + * Flags this trampoline as a thiscall trampoline, and prepares the prologue. */ - void ThisVoidPrologue() + void ThisPrologue() { - this->VoidPrologue(); + this->Prologue(); m_thiscall=1; }; + /** - * Epilogue for a returning function pops registers but does not free any more of the stack! + * Epilogue for a function pops registers but does not free any more of the stack! */ - void ReturnEpilogue() + void Epilogue() { - Append(&::Trampolines::Bytecode::codeReturnEpilogue[0],sizeof(::Trampolines::Bytecode::codeReturnEpilogue)); + Append(&::Trampolines::Bytecode::codeEpilogue[0],sizeof(::Trampolines::Bytecode::codeEpilogue)); }; /** * Epilogue that also frees it's estimated stack usage. Useful for stdcall/thiscall/fastcall. */ - void ReturnEpilogueAndFree() + void EpilogueAndFree() { - this->ReturnEpilogue(m_mystack); + this->Epilogue(m_mystack); }; /** - * Return epilogue. Pops registers, and frees given amount of data from the stack. + * Epilogue. Pops registers, and frees given amount of data from the stack. * * @param howmuch How many bytes to free from the stack. */ - void ReturnEpilogue(int howmuch) + void Epilogue(int howmuch) { + unsigned char code[sizeof(::Trampolines::Bytecode::codeEpilogueN)]; - unsigned char code[sizeof(::Trampolines::Bytecode::codeReturnEpilogueN)]; - - memcpy(&code[0],&::Trampolines::Bytecode::codeReturnEpilogueN[0],sizeof(::Trampolines::Bytecode::codeReturnEpilogueN)); + memcpy(&code[0],&::Trampolines::Bytecode::codeEpilogueN[0],sizeof(::Trampolines::Bytecode::codeEpilogueN)); unsigned char *c=&code[0]; @@ -340,57 +293,11 @@ namespace Trampolines bi.i=howmuch; - c+=::Trampolines::Bytecode::codeReturnEpilogueNReplace; + c+=::Trampolines::Bytecode::codeEpilogueNReplace; *c++=bi.b[0]; *c++=bi.b[1]; - Append(&code[0],sizeof(::Trampolines::Bytecode::codeReturnEpilogueN)); - //Append(&::Trampolines::Bytecode::codeReturnEpilogueN[0],sizeof(::Trampolines::Bytecode::codeReturnEpilogueN)); - }; - - /** - * Void epilogue, pops registers and frees the estimated stack usage of the trampoline. - */ - void VoidEpilogueAndFree() - { - this->VoidEpilogue(m_mystack); - }; - /** - * Void epilogue, pops registers, nothing else done with stack. - */ - void VoidEpilogue() - { - Append(&::Trampolines::Bytecode::codeVoidEpilogue[0],sizeof(::Trampolines::Bytecode::codeVoidEpilogue)); - }; - /** - * Void epilogue, pops registers, frees given amount of data off of the stack. - * - * @param howmuch How many bytes to free from the stack. - */ - void VoidEpilogue(int howmuch) - { - - unsigned char code[sizeof(::Trampolines::Bytecode::codeVoidEpilogueN)]; - - memcpy(&code[0],&::Trampolines::Bytecode::codeVoidEpilogueN[0],sizeof(::Trampolines::Bytecode::codeVoidEpilogueN)); - - - unsigned char *c=&code[0]; - - union - { - int i; - unsigned char b[4]; - } bi; - - bi.i=howmuch; - - c+=::Trampolines::Bytecode::codeVoidEpilogueNReplace; - *c++=bi.b[0]; - *c++=bi.b[1]; - - Append(&code[0],sizeof(::Trampolines::Bytecode::codeVoidEpilogueN)); - Append(&::Trampolines::Bytecode::codeVoidEpilogueN[0],sizeof(::Trampolines::Bytecode::codeVoidEpilogueN)); + Append(&code[0],sizeof(::Trampolines::Bytecode::codeEpilogueN)); }; /** @@ -642,27 +549,13 @@ inline void *CreateGenericTrampoline(bool thiscall, bool voidcall, bool retbuf, { Trampolines::TrampolineMaker tramp; - if (voidcall) + if (thiscall) { - if (thiscall) - { - tramp.ThisVoidPrologue(); - } - else - { - tramp.VoidPrologue(); - } + tramp.ThisPrologue(); } else { - if (thiscall) - { - tramp.ThisReturnPrologue(); - } - else - { - tramp.ReturnPrologue(); - } + tramp.Prologue(); } while (paramcount) @@ -676,31 +569,21 @@ inline void *CreateGenericTrampoline(bool thiscall, bool voidcall, bool retbuf, tramp.PushNum(reinterpret_cast(extraptr)); tramp.Call(callee); tramp.FreeTargetStack(); - if (voidcall) - { + #if defined(_WIN32) - tramp.VoidEpilogueAndFree(); + tramp.EpilogueAndFree(); #elif defined(__linux__) || defined(__APPLE__) - if (retbuf) - { - tramp.VoidEpilogue(4); - } - else - { - tramp.VoidEpilogue(); - } -#endif + if (retbuf) + { + tramp.Epilogue(4); } else { -#if defined(_WIN32) - tramp.ReturnEpilogueAndFree(); -#elif defined(__linux__) || defined(__APPLE__) - tramp.ReturnEpilogue(); -#endif + tramp.Epilogue(); } - return tramp.Finish(NULL); +#endif + return tramp.Finish(NULL); }; From 423eb0499b5ea93ee315909f69465529b9f83fcb Mon Sep 17 00:00:00 2001 From: Scott Ehlert Date: Sat, 31 May 2014 05:33:50 -0500 Subject: [PATCH 3/3] Align stack on 16 byte boundary in hamsandwich trampolines. --- dlls/hamsandwich/Trampolines.h | 63 +++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/dlls/hamsandwich/Trampolines.h b/dlls/hamsandwich/Trampolines.h index 8a1d65e1..816019fa 100644 --- a/dlls/hamsandwich/Trampolines.h +++ b/dlls/hamsandwich/Trampolines.h @@ -57,6 +57,7 @@ #include // memalign #include +#include namespace Trampolines { @@ -79,6 +80,26 @@ namespace Trampolines 0x89, 0xE5, // mov ebp, esp }; + /** + * Align stack on 16 byte boundary + */ + const unsigned char codeAlignStack16[] = { + 0x83, 0xE4, 0xF0, // and esp, 0xFFFFFFF0 + }; + + /** + * Allocate stack space (8-bit) by adding to ESP + */ + const unsigned char codeAllocStack[] = { + 0x83, 0xEC, 0xFF, // sub esp, 0xFF + }; + + /** + * Offset of codeAllocStack to modify at runtime + * to contain amount of stack space to allocate. + */ + const unsigned int codeAllocStackReplace = 2; + /** * Takes a paramter from the trampoline's stack * and pushes it onto the target's stack. @@ -154,14 +175,16 @@ namespace Trampolines * Epilogue of a simple function */ const unsigned char codeEpilogue[] = { + 0x89, 0xEC, // mov esp, ebp 0x5D, // pop ebp 0xC3 // ret }; const unsigned char codeEpilogueN[] = { + 0x89, 0xEC, // mov esp, ebp 0x5D, // pop ebp 0xC2, 0xCD, 0xAB // retn 0xABCD }; - const int codeEpilogueNReplace = 2; + const int codeEpilogueNReplace = 4; const unsigned char codeBreakpoint[] = { 0xCC // int 3 @@ -300,6 +323,42 @@ namespace Trampolines Append(&code[0],sizeof(::Trampolines::Bytecode::codeEpilogueN)); }; + /** + * Aligns stack on 16 byte boundary for functions that use aligned SSE instructions. + * This also allocates extra stack space to allow the specified number of slots to be used + * for function paramaters that will be pushed onto the stack. + */ + void AlignStack16(int slots) + { + const size_t stackNeeded = slots * sizeof(void *); + const size_t stackReserve = ke::Align(stackNeeded, 16); + const size_t stackExtra = stackReserve - stackNeeded; + + // Stack space should fit in a byte + assert(stackExtra <= 0xFF); + + const size_t codeAlignStackSize = sizeof(::Trampolines::Bytecode::codeAlignStack16); + const size_t codeAllocStackSize = sizeof(::Trampolines::Bytecode::codeAllocStack); + unsigned char code[codeAlignStackSize + codeAllocStackSize]; + + memcpy(&code[0], &::Trampolines::Bytecode::codeAlignStack16[0], codeAlignStackSize); + + if (stackExtra > 0) + { + unsigned char *c = &code[codeAlignStackSize]; + memcpy(c, &::Trampolines::Bytecode::codeAllocStack[0], codeAllocStackSize); + + c += ::Trampolines::Bytecode::codeAllocStackReplace; + *c = (unsigned char)stackExtra; + + Append(&code[0], codeAlignStackSize + codeAllocStackSize); + } + else + { + Append(&code[0], codeAlignStackSize); + } + } + /** * Pushes the "this" pointer onto the callee stack. Pushes ECX for MSVC, and param0 on GCC. */ @@ -552,10 +611,12 @@ inline void *CreateGenericTrampoline(bool thiscall, bool voidcall, bool retbuf, if (thiscall) { tramp.ThisPrologue(); + tramp.AlignStack16(paramcount + 2); // Param count + this ptr + extra ptr } else { tramp.Prologue(); + tramp.AlignStack16(paramcount + 1); // Param count + extra ptr } while (paramcount)