From 1960a815e1e3571ac51ca2f5e2c5d0b9ebeecd9b Mon Sep 17 00:00:00 2001 From: Peter Veenstra Date: Fri, 16 Nov 2018 12:40:37 +0000 Subject: [PATCH] Improvements for compiling dynrec core in 64 bit mode on windows. Thanks for the help PgrAm and kjliew. Imported-from: https://svn.code.sf.net/p/dosbox/code-0/dosbox/trunk@4171 --- src/cpu/core_dynrec/risc_x64.h | 132 +++++++++++++++++++++++++-------- 1 file changed, 103 insertions(+), 29 deletions(-) diff --git a/src/cpu/core_dynrec/risc_x64.h b/src/cpu/core_dynrec/risc_x64.h index 5c7e59dc..d6617697 100644 --- a/src/cpu/core_dynrec/risc_x64.h +++ b/src/cpu/core_dynrec/risc_x64.h @@ -56,11 +56,16 @@ typedef Bit8u HostReg; // then define DRC_PROTECT_ADDR_REG above #define FC_ADDR HOST_EBX +#if defined (_WIN64) +#define FC_OP1 HOST_ECX +#define FC_OP2 HOST_EDX +#else // register that holds the first parameter #define FC_OP1 HOST_EDI // register that holds the second parameter #define FC_OP2 HOST_ESI +#endif // special register that holds the third parameter for _R3 calls (byte accessible) #define FC_OP3 HOST_EAX @@ -359,18 +364,31 @@ static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) { // generate a call to a parameterless function static void INLINE gen_call_function_raw(void * func) { - cache_addb(0x48); - cache_addw(0xec83); - cache_addb(0x08); // sub rsp,0x08 (align stack to 16 byte boundary) +// cache_addb(0x48); +// cache_addw(0xec83); +#if defined (_WIN64) +// cache_addb(0x28); // allocate windows shadow space + cache_addd(0x28ec8348); +#else +// cache_addb(0x08); // sub rsp,0x08 (align stack to 16 byte boundary) + cache_addd(0x08ec8348); +#endif - cache_addb(0x48); - cache_addb(0xb8); // mov reg,imm64 +// cache_addb(0x48); +// cache_addb(0xb8); // mov reg,imm64 + cache_addw(0xb848); cache_addq((Bit64u)func); cache_addw(0xd0ff); - cache_addb(0x48); - cache_addw(0xc483); - cache_addb(0x08); // add rsp,0x08 (reset alignment) +// cache_addb(0x48); +// cache_addw(0xc483); +#if defined (_WIN64) +// cache_addb(0x28); // deallocate windows shadow space + cache_addd(0x28c48348); +#else +// cache_addb(0x08); // add rsp,0x08 (reset alignment) + cache_addd(0x08c48348); +#endif } // generate a call to a function with paramcount parameters @@ -381,33 +399,51 @@ static Bit64u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fa cache_addb(0x48); cache_addw(0xc48b); // mov rax,rsp - cache_addb(0x48); - cache_addw(0xec83); // sub rsp,0x08 - cache_addb(0x08); // 0x08==return address pushed onto stack by call +// cache_addb(0x48); +// cache_addw(0xec83); // sub rsp,0x08 +// cache_addb(0x08); // 0x08==return address pushed onto stack by call + cache_addd(0x08ec8348); - cache_addb(0x48); - cache_addw(0xe483); // and esp,0xfffffffffffffff0 - cache_addb(0xf0); +// cache_addb(0x48); +// cache_addw(0xe483); // and esp,0xfffffffffffffff0 +// cache_addb(0xf0); + cache_addd(0xf0e48348); - cache_addb(0x48); - cache_addw(0xc483); // add rsp,0x08 - cache_addb(0x08); +// cache_addb(0x48); +// cache_addw(0xc483); // add rsp,0x08 +// cache_addb(0x08); + cache_addd(0x08c48348); // stack is 16 byte aligned now cache_addb(0x50); // push rax (==old rsp) +#if defined (_WIN64) +// cache_addb(0x48); +// cache_addw(0xec83); // sub rsp,0x20 +// cache_addb(0x20); // allocate windows shadow space + cache_addd(0x20ec8348); +#endif + // returned address relates to where the address is stored in gen_call_function_raw Bit64u proc_addr=(Bit64u)cache.pos-4; // Do the actual call to the procedure - cache_addb(0x48); - cache_addb(0xb8); // mov reg,imm64 +// cache_addb(0x48); +// cache_addb(0xb8); // mov reg,imm64 + cache_addw(0xb848); cache_addq((Bit64u)func); cache_addw(0xd0ff); +#if defined (_WIN64) +// cache_addb(0x48); +// cache_addw(0xc483); // add rsp,0x20 +// cache_addb(0x20); // deallocate windows shadow space + cache_addd(0x20c48348); +#endif + // restore stack cache_addb(0x5c); // pop rsp @@ -425,7 +461,7 @@ static void INLINE gen_load_param_imm(Bitu imm,Bitu param) { case 1: // mov param2,imm32 gen_mov_dword_to_reg_imm(FC_OP2,(Bit32u)imm); break; -#if defined (_MSC_VER) +#if defined (_WIN64) case 2: // mov r8,imm32 cache_addw(0xb849); cache_addq((Bit32u)imm); @@ -458,7 +494,7 @@ static void INLINE gen_load_param_addr(DRC_PTR_SIZE_IM addr,Bitu param) { case 1: // mov param2,addr64 gen_mov_reg_qword(FC_OP2,addr); break; -#if defined (_MSC_VER) +#if defined (_WIN64) case 2: // mov r8,addr64 cache_addw(0xb849); cache_addq(addr); @@ -491,14 +527,14 @@ static void INLINE gen_load_param_reg(Bitu reg,Bitu param) { case 1: // mov param2,reg&7 gen_mov_regs(FC_OP2,reg&7); break; -#if defined (_MSC_VER) +#if defined (_WIN64) case 2: // mov r8,reg&7 - cache_addb(0x49); - gen_mov_regs(0,reg&7); + cache_addw(0x8949); + cache_addb(0xc0 + ((reg & 7) << 3)); break; case 3: // mov r9,reg&7 - cache_addb(0x49); - gen_mov_regs(1,reg&7); + cache_addw(0x8949); + cache_addb(0xc1 + ((reg & 7) << 3)); break; #else case 2: // mov rdx,reg&7 @@ -524,12 +560,12 @@ static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { case 1: // mov param2,[mem] gen_mov_word_to_reg(FC_OP2,(void*)mem,true); break; -#if defined (_MSC_VER) +#if defined (_WIN64) case 2: // mov r8,[mem] - gen_mov_word_to_reg(0,(void*)mem,true,0x49); // 0x49, use x64 rX regs + gen_mov_word_to_reg(0,(void*)mem,true,0x4c); // 0x4c, use x64 rX regs break; case 3: // mov r9,[mem] - gen_mov_word_to_reg(1,(void*)mem,true,0x49); // 0x49, use x64 rX regs + gen_mov_word_to_reg(1,(void*)mem,true,0x4c); // 0x4c, use x64 rX regs break; #else case 2: // mov rdx,[mem] @@ -629,7 +665,13 @@ static void gen_fill_branch_long(Bit64u data) { static void gen_run_code(void) { cache_addb(0x53); // push rbx +#if defined (_WIN64) + cache_addw(0x5657); // push rdi; push rsi +#endif cache_addw(0xd0ff+(FC_OP1<<8)); // call rdi +#if defined (_WIN64) + cache_addw(0x5f5e); // pop rsi; pop rdi +#endif cache_addb(0x5b); // pop rbx } @@ -650,7 +692,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_ADDb: case t_ADDw: case t_ADDd: +#if defined (_WIN64) + *(Bit32u*)(pos+0)=0xd001c889; // mov eax, ecx; add eax, edx +#else *(Bit32u*)(pos+0)=0xf001f889; // mov eax,edi; add eax,esi +#endif *(Bit32u*)(pos+4)=0x90900eeb; // skip *(Bit32u*)(pos+8)=0x90909090; *(Bit32u*)(pos+12)=0x90909090; @@ -659,7 +705,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_ORb: case t_ORw: case t_ORd: +#if defined (_WIN64) + *(Bit32u*)(pos+0)=0xd009c889; // mov eax, ecx; or eax, edx +#else *(Bit32u*)(pos+0)=0xf009f889; // mov eax,edi; or eax,esi +#endif *(Bit32u*)(pos+4)=0x90900eeb; // skip *(Bit32u*)(pos+8)=0x90909090; *(Bit32u*)(pos+12)=0x90909090; @@ -668,7 +718,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_ANDb: case t_ANDw: case t_ANDd: +#if defined (_WIN64) + *(Bit32u*)(pos+0)=0xd021c889; // mov eax, ecx; and eax, edx +#else *(Bit32u*)(pos+0)=0xf021f889; // mov eax,edi; and eax,esi +#endif *(Bit32u*)(pos+4)=0x90900eeb; // skip *(Bit32u*)(pos+8)=0x90909090; *(Bit32u*)(pos+12)=0x90909090; @@ -677,7 +731,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_SUBb: case t_SUBw: case t_SUBd: +#if defined (_WIN64) + *(Bit32u*)(pos+0)=0xd029c889; // mov eax, ecx; sub eax, edx +#else *(Bit32u*)(pos+0)=0xf029f889; // mov eax,edi; sub eax,esi +#endif *(Bit32u*)(pos+4)=0x90900eeb; // skip *(Bit32u*)(pos+8)=0x90909090; *(Bit32u*)(pos+12)=0x90909090; @@ -686,7 +744,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_XORb: case t_XORw: case t_XORd: +#if defined (_WIN64) + *(Bit32u*)(pos+0)=0xd031c889; // mov eax, ecx; xor eax, edx +#else *(Bit32u*)(pos+0)=0xf031f889; // mov eax,edi; xor eax,esi +#endif *(Bit32u*)(pos+4)=0x90900eeb; // skip *(Bit32u*)(pos+8)=0x90909090; *(Bit32u*)(pos+12)=0x90909090; @@ -707,7 +769,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_INCb: case t_INCw: case t_INCd: +#if defined (_WIN64) + *(Bit32u*)(pos+0)=0xc0ffc889; // mov eax, ecx; inc eax +#else *(Bit32u*)(pos+0)=0xc0fff889; // mov eax,edi; inc eax +#endif *(Bit32u*)(pos+4)=0x90900eeb; // skip *(Bit32u*)(pos+8)=0x90909090; *(Bit32u*)(pos+12)=0x90909090; @@ -716,7 +782,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_DECb: case t_DECw: case t_DECd: +#if defined (_WIN64) + *(Bit32u*)(pos+0)=0xc8ffc889; // mov eax, ecx; dec eax +#else *(Bit32u*)(pos+0)=0xc8fff889; // mov eax,edi; dec eax +#endif *(Bit32u*)(pos+4)=0x90900eeb; // skip *(Bit32u*)(pos+8)=0x90909090; *(Bit32u*)(pos+12)=0x90909090; @@ -725,7 +795,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_NEGb: case t_NEGw: case t_NEGd: +#if defined (_WIN64) + *(Bit32u*)(pos+0)=0xd8f7c889; // mov eax, ecx; neg eax +#else *(Bit32u*)(pos+0)=0xd8f7f889; // mov eax,edi; neg eax +#endif *(Bit32u*)(pos+4)=0x90900eeb; // skip *(Bit32u*)(pos+8)=0x90909090; *(Bit32u*)(pos+12)=0x90909090;