1
0
Fork 0

Improvements for compiling dynrec core in 64 bit mode on windows. Thanks for the help PgrAm and kjliew.

Imported-from: https://svn.code.sf.net/p/dosbox/code-0/dosbox/trunk@4171
This commit is contained in:
Peter Veenstra 2018-11-16 12:40:37 +00:00
parent 80342e104e
commit 1960a815e1

View file

@ -56,11 +56,16 @@ typedef Bit8u HostReg;
// then define DRC_PROTECT_ADDR_REG above
#define FC_ADDR HOST_EBX
#if defined (_WIN64)
#define FC_OP1 HOST_ECX
#define FC_OP2 HOST_EDX
#else
// register that holds the first parameter
#define FC_OP1 HOST_EDI
// register that holds the second parameter
#define FC_OP2 HOST_ESI
#endif
// special register that holds the third parameter for _R3 calls (byte accessible)
#define FC_OP3 HOST_EAX
@ -359,18 +364,31 @@ static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
// generate a call to a parameterless function
static void INLINE gen_call_function_raw(void * func) {
cache_addb(0x48);
cache_addw(0xec83);
cache_addb(0x08); // sub rsp,0x08 (align stack to 16 byte boundary)
// cache_addb(0x48);
// cache_addw(0xec83);
#if defined (_WIN64)
// cache_addb(0x28); // allocate windows shadow space
cache_addd(0x28ec8348);
#else
// cache_addb(0x08); // sub rsp,0x08 (align stack to 16 byte boundary)
cache_addd(0x08ec8348);
#endif
cache_addb(0x48);
cache_addb(0xb8); // mov reg,imm64
// cache_addb(0x48);
// cache_addb(0xb8); // mov reg,imm64
cache_addw(0xb848);
cache_addq((Bit64u)func);
cache_addw(0xd0ff);
cache_addb(0x48);
cache_addw(0xc483);
cache_addb(0x08); // add rsp,0x08 (reset alignment)
// cache_addb(0x48);
// cache_addw(0xc483);
#if defined (_WIN64)
// cache_addb(0x28); // deallocate windows shadow space
cache_addd(0x28c48348);
#else
// cache_addb(0x08); // add rsp,0x08 (reset alignment)
cache_addd(0x08c48348);
#endif
}
// generate a call to a function with paramcount parameters
@ -381,33 +399,51 @@ static Bit64u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fa
cache_addb(0x48);
cache_addw(0xc48b); // mov rax,rsp
cache_addb(0x48);
cache_addw(0xec83); // sub rsp,0x08
cache_addb(0x08); // 0x08==return address pushed onto stack by call
// cache_addb(0x48);
// cache_addw(0xec83); // sub rsp,0x08
// cache_addb(0x08); // 0x08==return address pushed onto stack by call
cache_addd(0x08ec8348);
cache_addb(0x48);
cache_addw(0xe483); // and esp,0xfffffffffffffff0
cache_addb(0xf0);
// cache_addb(0x48);
// cache_addw(0xe483); // and esp,0xfffffffffffffff0
// cache_addb(0xf0);
cache_addd(0xf0e48348);
cache_addb(0x48);
cache_addw(0xc483); // add rsp,0x08
cache_addb(0x08);
// cache_addb(0x48);
// cache_addw(0xc483); // add rsp,0x08
// cache_addb(0x08);
cache_addd(0x08c48348);
// stack is 16 byte aligned now
cache_addb(0x50); // push rax (==old rsp)
#if defined (_WIN64)
// cache_addb(0x48);
// cache_addw(0xec83); // sub rsp,0x20
// cache_addb(0x20); // allocate windows shadow space
cache_addd(0x20ec8348);
#endif
// returned address relates to where the address is stored in gen_call_function_raw
Bit64u proc_addr=(Bit64u)cache.pos-4;
// Do the actual call to the procedure
cache_addb(0x48);
cache_addb(0xb8); // mov reg,imm64
// cache_addb(0x48);
// cache_addb(0xb8); // mov reg,imm64
cache_addw(0xb848);
cache_addq((Bit64u)func);
cache_addw(0xd0ff);
#if defined (_WIN64)
// cache_addb(0x48);
// cache_addw(0xc483); // add rsp,0x20
// cache_addb(0x20); // deallocate windows shadow space
cache_addd(0x20c48348);
#endif
// restore stack
cache_addb(0x5c); // pop rsp
@ -425,7 +461,7 @@ static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
case 1: // mov param2,imm32
gen_mov_dword_to_reg_imm(FC_OP2,(Bit32u)imm);
break;
#if defined (_MSC_VER)
#if defined (_WIN64)
case 2: // mov r8,imm32
cache_addw(0xb849);
cache_addq((Bit32u)imm);
@ -458,7 +494,7 @@ static void INLINE gen_load_param_addr(DRC_PTR_SIZE_IM addr,Bitu param) {
case 1: // mov param2,addr64
gen_mov_reg_qword(FC_OP2,addr);
break;
#if defined (_MSC_VER)
#if defined (_WIN64)
case 2: // mov r8,addr64
cache_addw(0xb849);
cache_addq(addr);
@ -491,14 +527,14 @@ static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
case 1: // mov param2,reg&7
gen_mov_regs(FC_OP2,reg&7);
break;
#if defined (_MSC_VER)
#if defined (_WIN64)
case 2: // mov r8,reg&7
cache_addb(0x49);
gen_mov_regs(0,reg&7);
cache_addw(0x8949);
cache_addb(0xc0 + ((reg & 7) << 3));
break;
case 3: // mov r9,reg&7
cache_addb(0x49);
gen_mov_regs(1,reg&7);
cache_addw(0x8949);
cache_addb(0xc1 + ((reg & 7) << 3));
break;
#else
case 2: // mov rdx,reg&7
@ -524,12 +560,12 @@ static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
case 1: // mov param2,[mem]
gen_mov_word_to_reg(FC_OP2,(void*)mem,true);
break;
#if defined (_MSC_VER)
#if defined (_WIN64)
case 2: // mov r8,[mem]
gen_mov_word_to_reg(0,(void*)mem,true,0x49); // 0x49, use x64 rX regs
gen_mov_word_to_reg(0,(void*)mem,true,0x4c); // 0x4c, use x64 rX regs
break;
case 3: // mov r9,[mem]
gen_mov_word_to_reg(1,(void*)mem,true,0x49); // 0x49, use x64 rX regs
gen_mov_word_to_reg(1,(void*)mem,true,0x4c); // 0x4c, use x64 rX regs
break;
#else
case 2: // mov rdx,[mem]
@ -629,7 +665,13 @@ static void gen_fill_branch_long(Bit64u data) {
static void gen_run_code(void) {
cache_addb(0x53); // push rbx
#if defined (_WIN64)
cache_addw(0x5657); // push rdi; push rsi
#endif
cache_addw(0xd0ff+(FC_OP1<<8)); // call rdi
#if defined (_WIN64)
cache_addw(0x5f5e); // pop rsi; pop rdi
#endif
cache_addb(0x5b); // pop rbx
}
@ -650,7 +692,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
case t_ADDb:
case t_ADDw:
case t_ADDd:
#if defined (_WIN64)
*(Bit32u*)(pos+0)=0xd001c889; // mov eax, ecx; add eax, edx
#else
*(Bit32u*)(pos+0)=0xf001f889; // mov eax,edi; add eax,esi
#endif
*(Bit32u*)(pos+4)=0x90900eeb; // skip
*(Bit32u*)(pos+8)=0x90909090;
*(Bit32u*)(pos+12)=0x90909090;
@ -659,7 +705,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
case t_ORb:
case t_ORw:
case t_ORd:
#if defined (_WIN64)
*(Bit32u*)(pos+0)=0xd009c889; // mov eax, ecx; or eax, edx
#else
*(Bit32u*)(pos+0)=0xf009f889; // mov eax,edi; or eax,esi
#endif
*(Bit32u*)(pos+4)=0x90900eeb; // skip
*(Bit32u*)(pos+8)=0x90909090;
*(Bit32u*)(pos+12)=0x90909090;
@ -668,7 +718,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
case t_ANDb:
case t_ANDw:
case t_ANDd:
#if defined (_WIN64)
*(Bit32u*)(pos+0)=0xd021c889; // mov eax, ecx; and eax, edx
#else
*(Bit32u*)(pos+0)=0xf021f889; // mov eax,edi; and eax,esi
#endif
*(Bit32u*)(pos+4)=0x90900eeb; // skip
*(Bit32u*)(pos+8)=0x90909090;
*(Bit32u*)(pos+12)=0x90909090;
@ -677,7 +731,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
case t_SUBb:
case t_SUBw:
case t_SUBd:
#if defined (_WIN64)
*(Bit32u*)(pos+0)=0xd029c889; // mov eax, ecx; sub eax, edx
#else
*(Bit32u*)(pos+0)=0xf029f889; // mov eax,edi; sub eax,esi
#endif
*(Bit32u*)(pos+4)=0x90900eeb; // skip
*(Bit32u*)(pos+8)=0x90909090;
*(Bit32u*)(pos+12)=0x90909090;
@ -686,7 +744,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
case t_XORb:
case t_XORw:
case t_XORd:
#if defined (_WIN64)
*(Bit32u*)(pos+0)=0xd031c889; // mov eax, ecx; xor eax, edx
#else
*(Bit32u*)(pos+0)=0xf031f889; // mov eax,edi; xor eax,esi
#endif
*(Bit32u*)(pos+4)=0x90900eeb; // skip
*(Bit32u*)(pos+8)=0x90909090;
*(Bit32u*)(pos+12)=0x90909090;
@ -707,7 +769,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
case t_INCb:
case t_INCw:
case t_INCd:
#if defined (_WIN64)
*(Bit32u*)(pos+0)=0xc0ffc889; // mov eax, ecx; inc eax
#else
*(Bit32u*)(pos+0)=0xc0fff889; // mov eax,edi; inc eax
#endif
*(Bit32u*)(pos+4)=0x90900eeb; // skip
*(Bit32u*)(pos+8)=0x90909090;
*(Bit32u*)(pos+12)=0x90909090;
@ -716,7 +782,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
case t_DECb:
case t_DECw:
case t_DECd:
#if defined (_WIN64)
*(Bit32u*)(pos+0)=0xc8ffc889; // mov eax, ecx; dec eax
#else
*(Bit32u*)(pos+0)=0xc8fff889; // mov eax,edi; dec eax
#endif
*(Bit32u*)(pos+4)=0x90900eeb; // skip
*(Bit32u*)(pos+8)=0x90909090;
*(Bit32u*)(pos+12)=0x90909090;
@ -725,7 +795,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
case t_NEGb:
case t_NEGw:
case t_NEGd:
#if defined (_WIN64)
*(Bit32u*)(pos+0)=0xd8f7c889; // mov eax, ecx; neg eax
#else
*(Bit32u*)(pos+0)=0xd8f7f889; // mov eax,edi; neg eax
#endif
*(Bit32u*)(pos+4)=0x90900eeb; // skip
*(Bit32u*)(pos+8)=0x90909090;
*(Bit32u*)(pos+12)=0x90909090;