From fe177b413603793a3342cfe22f1fc9e050cbea76 Mon Sep 17 00:00:00 2001 From: Peter Veenstra Date: Mon, 30 Sep 2019 18:30:11 +0000 Subject: [PATCH] Add 64-bit support to dynamic_x86 core from vogons topic 67673. Thanks jmarsh! Imported-from: https://svn.code.sf.net/p/dosbox/code-0/dosbox/trunk@4260 --- configure.ac | 19 +- src/cpu/core_dyn_x86.cpp | 160 ++-- src/cpu/core_dyn_x86/Makefile.am | 4 +- src/cpu/core_dyn_x86/cache.h | 76 +- src/cpu/core_dyn_x86/decoder.h | 322 +++++++- src/cpu/core_dyn_x86/dyn_fpu.h | 150 ++-- src/cpu/core_dyn_x86/dyn_fpu_dh.h | 187 ++--- src/cpu/core_dyn_x86/helpers.h | 8 +- src/cpu/core_dyn_x86/risc_x64.h | 1272 +++++++++++++++++++++++++++++ src/cpu/core_dyn_x86/risc_x86.h | 26 +- src/cpu/core_dyn_x86/string.h | 8 +- 11 files changed, 1793 insertions(+), 439 deletions(-) create mode 100644 src/cpu/core_dyn_x86/risc_x64.h diff --git a/configure.ac b/configure.ac index a084e926..af394c65 100644 --- a/configure.ac +++ b/configure.ac @@ -340,7 +340,7 @@ AC_MSG_CHECKING(whether x86 dynamic cpu core will be enabled) if test x$enable_dynamic_x86 = xno -o x$enable_dynamic_core = xno; then AC_MSG_RESULT(no) else - if test x$c_targetcpu = xx86 ; then + if test x$c_targetcpu = xx86 -o x$c_targetcpu = xx86_64; then AC_DEFINE(C_DYNAMIC_X86,1) AC_MSG_RESULT(yes) else @@ -355,7 +355,7 @@ if test x$enable_dynrec = xno -o x$enable_dynamic_core = xno; then AC_MSG_RESULT(no) else dnl x86 only enable it if dynamic-x86 is disabled. - if test x$c_targetcpu = xx86 ; then + if test x$c_targetcpu = xx86 -o x$c_targetcpu = xx86_64; then if test x$enable_dynamic_x86 = xno ; then AC_DEFINE(C_DYNREC,1) AC_MSG_RESULT(yes) @@ -363,16 +363,11 @@ dnl x86 only enable it if dynamic-x86 is disabled. AC_MSG_RESULT([no, using dynamic-x86]) fi else - if test x$c_targetcpu = xx86_64 ; then - AC_DEFINE(C_DYNREC,1) - AC_MSG_RESULT(yes) - else - if test x$c_targetcpu = xarm ; then - AC_DEFINE(C_DYNREC,1) - AC_MSG_RESULT(yes) - else - AC_MSG_RESULT(no) - fi + if test x$c_targetcpu = xarm ; then + AC_DEFINE(C_DYNREC,1) + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) fi fi fi diff --git a/src/cpu/core_dyn_x86.cpp b/src/cpu/core_dyn_x86.cpp index 17e84221..ec65238b 100644 --- a/src/cpu/core_dyn_x86.cpp +++ b/src/cpu/core_dyn_x86.cpp @@ -65,6 +65,7 @@ #if C_FPU #define CPU_FPU 1 //Enable FPU escape instructions +#define X86_DYNFPU_DH_ENABLED #endif enum { @@ -154,7 +155,7 @@ static DynReg DynRegs[G_MAX]; #define DREG(_WHICH_) &DynRegs[G_ ## _WHICH_ ] static struct { - Bitu ea,tmpb,tmpd,stack,shift,newesp; + Bit32u ea,tmpb,tmpd,stack,shift,newesp; } extra_regs; static void IllegalOption(const char* msg) { @@ -168,17 +169,37 @@ static struct { Bit32u readdata; } core_dyn; +#if defined(X86_DYNFPU_DH_ENABLED) static struct { - Bit32u state[32]; + Bit16u cw,host_cw; + bool state_used; + // some fields expanded here for alignment purposes + struct { + Bit32u cw; + Bit32u sw; + Bit32u tag; + Bit32u ip; + Bit32u cs; + Bit32u ea; + Bit32u ds; + Bit8u st_reg[8][10]; + } state; FPU_P_Reg temp,temp2; Bit32u dh_fpu_enabled; - Bit32u state_used; - Bit32u cw,host_cw; Bit8u temp_state[128]; } dyn_dh_fpu; +#endif +#define X86 0x01 +#define X86_64 0x02 +#if C_TARGETCPU == X86_64 +#include "core_dyn_x86/risc_x64.h" +#elif C_TARGETCPU == X86 #include "core_dyn_x86/risc_x86.h" +#else +#error DYN_X86 core not supported for this CPU target. +#endif struct DynState { DynReg regs[G_MAX]; @@ -233,31 +254,19 @@ static void dyn_restoreregister(DynReg * src_reg, DynReg * dst_reg) { #include "core_dyn_x86/decoder.h" -#if defined (_MSC_VER) -#define DH_FPU_SAVE_REINIT \ -{ \ - __asm { \ - __asm fnsave dyn_dh_fpu.state[0] \ - } \ - dyn_dh_fpu.state_used=false; \ - dyn_dh_fpu.state[0]|=0x3f; \ -} -#else -#define DH_FPU_SAVE_REINIT \ -{ \ - __asm__ volatile ( \ - "fnsave %0 \n" \ - : "=m" (dyn_dh_fpu.state[0]) \ - : \ - : "memory" \ - ); \ - dyn_dh_fpu.state_used=false; \ - dyn_dh_fpu.state[0]|=0x3f; \ -} -#endif - - Bits CPU_Core_Dyn_X86_Run(void) { + // helper class to auto-save DH_FPU state on function exit + class auto_dh_fpu { + public: + ~auto_dh_fpu(void) { +#if defined(X86_DYNFPU_DH_ENABLED) + if (dyn_dh_fpu.state_used) + gen_dh_fpu_save(); +#endif + }; + }; + auto_dh_fpu fpu_saver; + /* Determine the linear address of CS:EIP */ restart_core: PhysPt ip_point=SegPhys(cs)+reg_eip; @@ -272,7 +281,6 @@ restart_core: goto restart_core; } if (!chandler) { - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT return CPU_Core_Normal_Run(); } /* Find correct Dynamic Block to run */ @@ -281,10 +289,11 @@ restart_core: if (!chandler->invalidation_map || (chandler->invalidation_map[ip_point&4095]<4)) { block=CreateCacheBlock(chandler,ip_point,32); } else { - Bitu old_cycles=CPU_Cycles; + Bit32s old_cycles=CPU_Cycles; CPU_Cycles=1; + // manually save + fpu_saver = auto_dh_fpu(); Bits nc_retcode=CPU_Core_Normal_Run(); - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT if (!nc_retcode) { CPU_Cycles=old_cycles-1; goto restart_core; @@ -304,21 +313,17 @@ run_block: #if C_DEBUG #if C_HEAVY_DEBUG if (DEBUG_HeavyIsBreakpoint()) { - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT return debugCallback; } #endif #endif if (!GETFLAG(TF)) { if (GETFLAG(IF) && PIC_IRQCheck) { - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT return CBRET_NONE; } goto restart_core; } cpudecoder=CPU_Core_Dyn_X86_Trap_Run; - if (!dyn_dh_fpu.state_used) return CBRET_NONE; - DH_FPU_SAVE_REINIT return CBRET_NONE; case BR_Normal: /* Maybe check if we staying in the same page? */ @@ -334,12 +339,8 @@ run_block: if (DEBUG_HeavyIsBreakpoint()) return debugCallback; #endif #endif - if (!dyn_dh_fpu.state_used) return CBRET_NONE; - DH_FPU_SAVE_REINIT return CBRET_NONE; case BR_CallBack: - if (!dyn_dh_fpu.state_used) return core_dyn.callback; - DH_FPU_SAVE_REINIT return core_dyn.callback; case BR_SMCBlock: // LOG_MSG("selfmodification of running block at %x:%x",SegValue(cs),reg_eip); @@ -348,19 +349,17 @@ run_block: case BR_Opcode: CPU_CycleLeft+=CPU_Cycles; CPU_Cycles=1; - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT return CPU_Core_Normal_Run(); #if (C_DEBUG) case BR_OpcodeFull: CPU_CycleLeft+=CPU_Cycles; CPU_Cycles=1; - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT return CPU_Core_Full_Run(); #endif case BR_Link1: case BR_Link2: { - Bitu temp_ip=SegPhys(cs)+reg_eip; + Bit32u temp_ip=SegPhys(cs)+reg_eip; CodePageHandler * temp_handler=(CodePageHandler *)get_tlb_readhandler(temp_ip); if (temp_handler->flags & (cpu.code.big ? PFLAG_HASCODE32:PFLAG_HASCODE16)) { block=temp_handler->FindCacheBlock(temp_ip & 4095); @@ -371,12 +370,11 @@ run_block: } goto restart_core; } - if (dyn_dh_fpu.state_used) DH_FPU_SAVE_REINIT return CBRET_NONE; } Bits CPU_Core_Dyn_X86_Trap_Run(void) { - Bits oldCycles = CPU_Cycles; + Bit32s oldCycles = CPU_Cycles; CPU_Cycles = 1; cpu.trap_skip = false; @@ -449,25 +447,15 @@ void CPU_Core_Dyn_X86_Init(void) { /* Init the generator */ gen_init(); +#if defined(X86_DYNFPU_DH_ENABLED) /* Init the fpu state */ dyn_dh_fpu.dh_fpu_enabled=true; dyn_dh_fpu.state_used=false; dyn_dh_fpu.cw=0x37f; -#if defined (_MSC_VER) - __asm { - __asm finit - __asm fsave dyn_dh_fpu.state[0] - __asm fstcw dyn_dh_fpu.host_cw - } -#else - __asm__ volatile ( - "finit \n" - "fsave %0 \n" - "fstcw %1 \n" - : "=m" (dyn_dh_fpu.state[0]), "=m" (dyn_dh_fpu.host_cw) - : - : "memory" - ); + // FINIT + memset(&dyn_dh_fpu.state, 0, sizeof(dyn_dh_fpu.state)); + dyn_dh_fpu.state.cw = 0x37F; + dyn_dh_fpu.state.tag = 0xFFFF; #endif return; @@ -482,62 +470,10 @@ void CPU_Core_Dyn_X86_Cache_Close(void) { cache_close(); } -void CPU_Core_Dyn_X86_Cache_Reset(void) { - cache_reset(); -} - void CPU_Core_Dyn_X86_SetFPUMode(bool dh_fpu) { +#if defined(X86_DYNFPU_DH_ENABLED) dyn_dh_fpu.dh_fpu_enabled=dh_fpu; -} - -Bit32u fpu_state[32]; - -void CPU_Core_Dyn_X86_SaveDHFPUState(void) { - if (dyn_dh_fpu.dh_fpu_enabled) { - if (dyn_dh_fpu.state_used!=0) { -#if defined (_MSC_VER) - __asm { - __asm fsave fpu_state[0] - __asm finit - } -#else - __asm__ volatile ( - "fsave %0 \n" - "finit \n" - : "=m" (fpu_state[0]) - : - : "memory" - ); #endif - } - } -} - -void CPU_Core_Dyn_X86_RestoreDHFPUState(void) { - if (dyn_dh_fpu.dh_fpu_enabled) { - if (dyn_dh_fpu.state_used!=0) { -#if defined (_MSC_VER) - __asm { - __asm frstor fpu_state[0] - } -#else - __asm__ volatile ( - "frstor %0 \n" - : - : "m" (fpu_state[0]) - : - ); -#endif - } - } -} - -#else - -void CPU_Core_Dyn_X86_SaveDHFPUState(void) { -} - -void CPU_Core_Dyn_X86_RestoreDHFPUState(void) { } #endif diff --git a/src/cpu/core_dyn_x86/Makefile.am b/src/cpu/core_dyn_x86/Makefile.am index 3d9be090..62cb2bb6 100644 --- a/src/cpu/core_dyn_x86/Makefile.am +++ b/src/cpu/core_dyn_x86/Makefile.am @@ -1,2 +1,2 @@ -noinst_HEADERS = cache.h helpers.h decoder.h risc_x86.h string.h \ - dyn_fpu.h dyn_fpu_dh.h \ No newline at end of file +noinst_HEADERS = cache.h helpers.h decoder.h risc_x86.h risc_x64.h string.h \ + dyn_fpu.h dyn_fpu_dh.h diff --git a/src/cpu/core_dyn_x86/cache.h b/src/cpu/core_dyn_x86/cache.h index 08a3526b..6cb38754 100644 --- a/src/cpu/core_dyn_x86/cache.h +++ b/src/cpu/core_dyn_x86/cache.h @@ -466,6 +466,10 @@ static INLINE void cache_addd(Bit32u val) { cache.pos+=4; } +static INLINE void cache_addq(Bit64u val) { + *(Bit64u*)cache.pos=val; + cache.pos+=8; +} static void gen_return(BlockReturn retcode); @@ -569,75 +573,3 @@ static void cache_close(void) { cache_code_link_blocks = NULL; cache_initialized = false; */ } - -static void cache_reset(void) { - if (cache_initialized) { - for (;;) { - if (cache.used_pages) { - CodePageHandler * cpage=cache.used_pages; - CodePageHandler * npage=cache.used_pages->next; - cpage->ClearRelease(); - delete cpage; - cache.used_pages=npage; - } else break; - } - - if (cache_blocks == NULL) { - cache_blocks=(CacheBlock*)malloc(CACHE_BLOCKS*sizeof(CacheBlock)); - if(!cache_blocks) E_Exit("Allocating cache_blocks has failed"); - } - memset(cache_blocks,0,sizeof(CacheBlock)*CACHE_BLOCKS); - cache.block.free=&cache_blocks[0]; - for (Bits i=0;icache.start=&cache_code[0]; - block->cache.size=CACHE_TOTAL; - block->cache.next=0; //Last block in the list - - /* Setup the default blocks for block linkage returns */ - cache.pos=&cache_code_link_blocks[0]; - link_blocks[0].cache.start=cache.pos; - gen_return(BR_Link1); - cache.pos=&cache_code_link_blocks[32]; - link_blocks[1].cache.start=cache.pos; - gen_return(BR_Link2); - cache.free_pages=0; - cache.last_page=0; - cache.used_pages=0; - /* Setup the code pages */ - for (Bitu i=0;inext=cache.free_pages; - cache.free_pages=newpage; - } - } -} diff --git a/src/cpu/core_dyn_x86/decoder.h b/src/cpu/core_dyn_x86/decoder.h index 416f10b2..2de1fd87 100644 --- a/src/cpu/core_dyn_x86/decoder.h +++ b/src/cpu/core_dyn_x86/decoder.h @@ -17,7 +17,6 @@ */ -#define X86_DYNFPU_DH_ENABLED #define X86_INLINED_MEMACCESS @@ -332,7 +331,7 @@ static BlockReturn DynRunException(Bit32u eip_add,Bit32u cycle_sub,Bit32u dflags } static void dyn_check_bool_exception(DynReg * check) { - gen_dop_byte(DOP_OR,check,0,check,0); + gen_dop_byte(DOP_TEST,check,0,check,0); save_info[used_save_info].branch_pos=gen_create_branch_long(BR_NZ); dyn_savestate(&save_info[used_save_info].state); if (!decode.cycles) decode.cycles++; @@ -344,7 +343,7 @@ static void dyn_check_bool_exception(DynReg * check) { } static void dyn_check_bool_exception_al(void) { - cache_addw(0xc00a); // or al, al + cache_addw(0xC084); // test al,al save_info[used_save_info].branch_pos=gen_create_branch_long(BR_NZ); dyn_savestate(&save_info[used_save_info].state); if (!decode.cycles) decode.cycles++; @@ -359,7 +358,7 @@ static void dyn_check_bool_exception_al(void) { static void dyn_check_irqrequest(void) { gen_load_host(&PIC_IRQCheck,DREG(TMPB),4); - gen_dop_word(DOP_OR,true,DREG(TMPB),DREG(TMPB)); + gen_dop_word(DOP_TEST,true,DREG(TMPB),DREG(TMPB)); save_info[used_save_info].branch_pos=gen_create_branch_long(BR_NZ); gen_releasereg(DREG(TMPB)); dyn_savestate(&save_info[used_save_info].state); @@ -403,21 +402,29 @@ static void dyn_fill_blocks(void) { dyn_save_critical_regs(); gen_return(BR_Cycles); break; +#ifdef X86_DYNFPU_DH_ENABLED case fpu_restore: dyn_loadstate(&save_info[sct].state); - gen_load_host(&dyn_dh_fpu.state_used,DREG(TMPB),4); - gen_sop_word(SOP_INC,true,DREG(TMPB)); - GenReg * gr1=FindDynReg(DREG(TMPB)); +#if C_TARGETCPU == X86 + cache_addb(0xd9); // FNSTCW fpu.host_cw + cache_addb(0x3d); + cache_addd((Bit32u)(&dyn_dh_fpu.host_cw)); cache_addb(0xdd); // FRSTOR fpu.state (fpu_restore) cache_addb(0x25); - cache_addd((Bit32u)(&(dyn_dh_fpu.state[0]))); - cache_addb(0x89); // mov fpu.state_used,1 - cache_addb(0x05|(gr1->index<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.state_used))); - gen_releasereg(DREG(TMPB)); + cache_addd((Bit32u)(&dyn_dh_fpu.state)); + cache_addb(0xC6); // mov byte [fpu.state_used], 1 + cache_addb(0x05); + cache_addd((Bit32u)(&dyn_dh_fpu.state_used)); + cache_addb(1); +#else // X86_64 + opcode(7).setabsaddr(&dyn_dh_fpu.host_cw).Emit8(0xD9); // FNSTCW [&fpu.host_cw] + opcode(4).setabsaddr(&dyn_dh_fpu.state).Emit8(0xDD); // FRSTOR [&fpu.state] + opcode(0).setimm(1,1).setabsaddr(&dyn_dh_fpu.state_used).Emit8(0xC6); // mov byte[], imm8 +#endif dyn_synchstate(&save_info[sct].state); gen_create_jump(save_info[sct].return_pos); break; +#endif } } used_save_info=0; @@ -427,7 +434,7 @@ static void dyn_fill_blocks(void) { #if !defined(X86_INLINED_MEMACCESS) static void dyn_read_byte(DynReg * addr,DynReg * dst,Bitu high) { gen_protectflags(); - gen_call_function((void *)&mem_readb_checked,"%Dd%Id",addr,&core_dyn.readdata); + gen_call_function((void *)&mem_readb_checked,"%Dd%Ip",addr,&core_dyn.readdata); dyn_check_bool_exception_al(); gen_mov_host(&core_dyn.readdata,dst,1,high); } @@ -439,8 +446,8 @@ static void dyn_write_byte(DynReg * addr,DynReg * val,Bitu high) { } static void dyn_read_word(DynReg * addr,DynReg * dst,bool dword) { gen_protectflags(); - if (dword) gen_call_function((void *)&mem_readd_checked,"%Dd%Id",addr,&core_dyn.readdata); - else gen_call_function((void *)&mem_readw_checked,"%Dd%Id",addr,&core_dyn.readdata); + if (dword) gen_call_function((void *)&mem_readd_checked,"%Dd%Ip",addr,&core_dyn.readdata); + else gen_call_function((void *)&mem_readw_checked,"%Dd%Ip",addr,&core_dyn.readdata); dyn_check_bool_exception_al(); gen_mov_host(&core_dyn.readdata,dst,dword?4:2); } @@ -452,31 +459,32 @@ static void dyn_write_word(DynReg * addr,DynReg * val,bool dword) { } static void dyn_read_byte_release(DynReg * addr,DynReg * dst,Bitu high) { gen_protectflags(); - gen_call_function((void *)&mem_readb_checked,"%Ddr%Id",addr,&core_dyn.readdata); + gen_call_function((void *)&mem_readb_checked,"%Drd%Ip",addr,&core_dyn.readdata); dyn_check_bool_exception_al(); gen_mov_host(&core_dyn.readdata,dst,1,high); } static void dyn_write_byte_release(DynReg * addr,DynReg * val,Bitu high) { gen_protectflags(); - if (high) gen_call_function((void *)&mem_writeb_checked,"%Ddr%Dh",addr,val); - else gen_call_function((void *)&mem_writeb_checked,"%Ddr%Dd",addr,val); + if (high) gen_call_function((void *)&mem_writeb_checked,"%Drd%Dh",addr,val); + else gen_call_function((void *)&mem_writeb_checked,"%Drd%Dd",addr,val); dyn_check_bool_exception_al(); } static void dyn_read_word_release(DynReg * addr,DynReg * dst,bool dword) { gen_protectflags(); - if (dword) gen_call_function((void *)&mem_readd_checked,"%Ddr%Id",addr,&core_dyn.readdata); - else gen_call_function((void *)&mem_readw_checked,"%Ddr%Id",addr,&core_dyn.readdata); + if (dword) gen_call_function((void *)&mem_readd_checked,"%Drd%Ip",addr,&core_dyn.readdata); + else gen_call_function((void *)&mem_readw_checked,"%Drd%Ip",addr,&core_dyn.readdata); dyn_check_bool_exception_al(); gen_mov_host(&core_dyn.readdata,dst,dword?4:2); } static void dyn_write_word_release(DynReg * addr,DynReg * val,bool dword) { gen_protectflags(); - if (dword) gen_call_function((void *)&mem_writed_checked,"%Ddr%Dd",addr,val); - else gen_call_function((void *)&mem_writew_checked,"%Ddr%Dd",addr,val); + if (dword) gen_call_function((void *)&mem_writed_checked,"%Drd%Dd",addr,val); + else gen_call_function((void *)&mem_writew_checked,"%Drd%Dd",addr,val); dyn_check_bool_exception_al(); } #else +#if C_TARGETCPU == X86 static void dyn_read_intro(DynReg * addr,bool release_addr=true) { gen_protectflags(); @@ -634,7 +642,7 @@ static void dyn_read_word(DynReg * addr,DynReg * dst,bool dword) { gen_fill_jump(jmp_loc); } else { gen_protectflags(); - gen_call_function((void *)&mem_readw_checked,"%Dd%Id",addr,&core_dyn.readdata); + gen_call_function((void *)&mem_readw_checked,"%Dd%Ip",addr,&core_dyn.readdata); dyn_check_bool_exception_al(); gen_mov_host(&core_dyn.readdata,dst,2); } @@ -680,7 +688,7 @@ static void dyn_read_word_release(DynReg * addr,DynReg * dst,bool dword) { gen_fill_jump(jmp_loc); } else { gen_protectflags(); - gen_call_function((void *)&mem_readw_checked,"%Ddr%Id",addr,&core_dyn.readdata); + gen_call_function((void *)&mem_readw_checked,"%Drd%Ip",addr,&core_dyn.readdata); dyn_check_bool_exception_al(); gen_mov_host(&core_dyn.readdata,dst,2); } @@ -888,11 +896,247 @@ static void dyn_write_word_release(DynReg * addr,DynReg * val,bool dword) { gen_fill_jump(jmp_loc); } else { gen_protectflags(); - gen_call_function((void *)&mem_writew_checked,"%Ddr%Dd",addr,val); + gen_call_function((void *)&mem_writew_checked,"%Drd%Dd",addr,val); dyn_check_bool_exception_al(); } } +#else // X86_64 +static bool mem_readd_checked_dcx64(PhysPt address, Bit32u* dst) { + return get_tlb_readhandler(address)->readd_checked(address, dst); +} +static bool mem_readw_checked_dcx64(PhysPt address, Bit16u* dst) { + return get_tlb_readhandler(address)->readw_checked(address, dst); +} +static bool mem_writed_checked_dcx64(PhysPt address, Bitu val) { + return get_tlb_writehandler(address)->writed_checked(address, val); +} +static bool mem_writew_checked_dcx64(PhysPt address, Bitu val) { + return get_tlb_writehandler(address)->writew_checked(address, val); +} +static bool mem_readb_checked_dcx64(PhysPt address, Bit8u* dst) { + return get_tlb_readhandler(address)->readb_checked(address, dst); +} +static bool mem_writeb_checked_dcx64(PhysPt address, Bitu val) { + return get_tlb_writehandler(address)->writeb_checked(address, val); +} +static void dyn_read_word_internal(DynReg * addr,DynReg * dst,bool dword,bool release) { + DynState callstate; + gen_protectflags(); + + x64gen.regs[X64_REG_RAX]->Clear(); + x64gen.regs[X64_REG_RAX]->notusable = true; + GenReg *gensrc = FindDynReg(addr); + if (dword && release) gen_releasereg(addr); + GenReg *gendst = FindDynReg(dst,dword); + if (!dword && release) gen_releasereg(addr); + x64gen.regs[X64_REG_RAX]->notusable = false; + dyn_savestate(&callstate); + + Bit8u *page_brk; + opcode(0).set64().setea(gensrc->index,-1,0,dword?3:1).Emit8(0x8D); // lea rax, [dst+(dword?3:1)] + if (dword) { + opcode(0).set64().setimm(~0xFFF,4).Emit8Reg(0x25); // and rax, ~0xFFF + opcode(gensrc->index).set64().setrm(0).Emit8(0x39); // cmp rax,src + page_brk=gen_create_branch(BR_NBE); + } else { + opcode(0,false).setimm(0xFFF,2).Emit8Reg(0xA9); // test ax,0xFFF + page_brk=gen_create_branch(BR_Z); + } + + opcode(5).setrm(0).setimm(12,1).Emit8(0xC1); // shr eax,12 + // mov rax, [8*rax+paging.tlb.read(rbp)] + opcode(0).set64().setea(5,0,3,(Bits)paging.tlb.read-(Bits)&cpu_regs).Emit8(0x8B); + opcode(0).set64().setrm(0).Emit8(0x85); // test rax,rax + Bit8u *nomap=gen_create_branch(BR_Z); + //mov dst, [RAX+src] + opcode(gendst->index,dword).setea(0,gensrc->index).Emit8(0x8B); + Bit8u* jmp_loc = gen_create_short_jump(); + + gen_fill_branch(page_brk); + gen_load_imm(0, (Bitu)(dword?(void*)mem_unalignedreadd_checked:(void*)mem_unalignedreadw_checked)); + Bit8u* page_jmp = gen_create_short_jump(); + gen_fill_branch(nomap); + gen_load_imm(0, (Bitu)(dword?(void*)mem_readd_checked_dcx64:(void*)mem_readw_checked_dcx64)); + gen_fill_short_jump(page_jmp); + + if (gensrc->index != ARG0_REG) { + x64gen.regs[reg_args[0]]->Clear(); + opcode(ARG0_REG).setrm(gensrc->index).Emit8(0x8B); + } + x64gen.regs[reg_args[1]]->Clear(); + gen_load_imm(ARG1_REG, (Bitu)dst->data); + gendst->Clear(); + gen_call_ptr(); + dyn_check_bool_exception_al(); + + dyn_synchstate(&callstate); + dst->flags |= DYNFLG_CHANGED; + gen_fill_short_jump(jmp_loc); +} + +static void dyn_read_word(DynReg * addr,DynReg * dst,bool dword) { + dyn_read_word_internal(addr,dst,dword,false); +} +static void dyn_read_word_release(DynReg * addr,DynReg * dst,bool dword) { + dyn_read_word_internal(addr,dst,dword,true); +} +static void dyn_read_byte_internal(DynReg * addr,DynReg * dst,bool high,bool release) { + DynState callstate; + gen_protectflags(); + + x64gen.regs[X64_REG_RAX]->Clear(); + x64gen.regs[X64_REG_RAX]->notusable = true; + GenReg *gensrc = FindDynReg(addr); + GenReg *gendst = FindDynReg(dst); + if (release) gen_releasereg(addr); + x64gen.regs[X64_REG_RAX]->notusable = false; + dyn_savestate(&callstate); + + if (gendst->index>3) IllegalOption("dyn_read_byte"); + + opcode(0).setrm(gensrc->index).Emit8(0x8B); // mov eax, src + opcode(5).setrm(0).setimm(12,1).Emit8(0xC1); // shr eax,12 + // mov rax, [8*rax+paging.tlb.read(rbp)] + opcode(0).set64().setea(5,0,3,(Bits)paging.tlb.read-(Bits)&cpu_regs).Emit8(0x8B); + opcode(0).set64().setrm(0).Emit8(0x85); // test rax,rax + Bit8u *nomap=gen_create_branch(BR_Z); + + int src = gensrc->index; + if (high && src>=8) { // can't use REX prefix with high-byte reg + opcode(0).set64().setrm(src).Emit8(0x03); // add rax, src + src = -1; + } + // mov dst, byte [rax+src] + opcode(gendst->index,true,high?4:0).setea(0,src).Emit8(0x8A); + Bit8u* jmp_loc=gen_create_short_jump(); + + gen_fill_branch(nomap); + if (gensrc->index != ARG0_REG) { + x64gen.regs[reg_args[0]]->Clear(); + opcode(ARG0_REG).setrm(gensrc->index).Emit8(0x8B); // mov ARG0,src + } + x64gen.regs[reg_args[1]]->Clear(); + gen_load_imm(ARG1_REG, (Bitu)(high?((Bit8u*)dst->data)+1:dst->data)); + gendst->Clear(); + gen_call_ptr((void*)mem_readb_checked_dcx64); + dyn_check_bool_exception_al(); + + dyn_synchstate(&callstate); + dst->flags |= DYNFLG_CHANGED; + gen_fill_short_jump(jmp_loc); +} +static void dyn_read_byte(DynReg * addr,DynReg * dst,bool high) { + dyn_read_byte_internal(addr,dst,high,false); +} +static void dyn_read_byte_release(DynReg * addr,DynReg * dst,bool high) { + dyn_read_byte_internal(addr,dst,high,true); +} +static void dyn_write_word_internal(DynReg * addr,DynReg * val,bool dword,bool release) { + DynState callstate; + gen_protectflags(); + + x64gen.regs[X64_REG_RAX]->Clear(); + x64gen.regs[X64_REG_RAX]->notusable = true; + GenReg *gendst = FindDynReg(addr); + GenReg *genval = FindDynReg(val); + if (release) gen_releasereg(addr); + x64gen.regs[X64_REG_RAX]->notusable = false; + dyn_savestate(&callstate); + + Bit8u *page_brk; + opcode(0).set64().setea(gendst->index,-1,0,dword?3:1).Emit8(0x8D); // lea rax, [dst+(dword?3:1)] + if (dword) { + opcode(0).set64().setimm(~0xFFF,4).Emit8Reg(0x25); // and rax, ~0xFFF + opcode(gendst->index).set64().setrm(0).Emit8(0x39); // cmp rax,dst + page_brk=gen_create_branch(BR_NBE); + } else { + opcode(0,false).setimm(0xFFF,2).Emit8Reg(0xA9); // test ax,0xFFF + page_brk=gen_create_branch(BR_Z); + } + + opcode(5).setrm(0).setimm(12,1).Emit8(0xC1); // shr eax,12 + // mov rax, [8*rax+paging.tlb.write(rbp)] + opcode(0).set64().setea(5,0,3,(Bits)paging.tlb.write-(Bits)&cpu_regs).Emit8(0x8B); + opcode(0).set64().setrm(0).Emit8(0x85); // test rax,rax + Bit8u *nomap=gen_create_branch(BR_Z); + //mov [RAX+src], dst + opcode(genval->index,dword).setea(0,gendst->index).Emit8(0x89); + Bit8u* jmp_loc = gen_create_short_jump(); + + gen_fill_branch(page_brk); + gen_load_imm(0, (Bitu)(dword?(void*)mem_unalignedwrited_checked:(void*)mem_unalignedwritew_checked)); + Bit8u* page_jmp = gen_create_short_jump(); + gen_fill_branch(nomap); + gen_load_imm(0, (Bitu)(dword?(void*)mem_writed_checked_dcx64:(void*)mem_writew_checked_dcx64)); + gen_fill_short_jump(page_jmp); + + if (gendst->index != ARG0_REG) { + x64gen.regs[reg_args[0]]->Clear(); + opcode(ARG0_REG).setrm(gendst->index).Emit8(0x8B); + } + gen_load_arg_reg(1, val, dword ? "d":"w"); + gen_call_ptr(); + dyn_check_bool_exception_al(); + dyn_synchstate(&callstate); + gen_fill_short_jump(jmp_loc); +} +static void dyn_write_word(DynReg * addr,DynReg * val,bool dword) { + dyn_write_word_internal(addr, val, dword, false); +} +static void dyn_write_word_release(DynReg * addr,DynReg * val,bool dword) { + dyn_write_word_internal(addr, val, dword, true); +} +static void dyn_write_byte_internal(DynReg * addr,DynReg * val,bool high,bool release) { + DynState callstate; + gen_protectflags(); + + x64gen.regs[X64_REG_RAX]->Clear(); + x64gen.regs[X64_REG_RAX]->notusable = true; + GenReg *gendst = FindDynReg(addr); + GenReg *genval = FindDynReg(val); + if (release) gen_releasereg(addr); + x64gen.regs[X64_REG_RAX]->notusable = false; + dyn_savestate(&callstate); + + if (genval->index>3) IllegalOption("dyn_write_byte"); + + opcode(0).setrm(gendst->index).Emit8(0x8B); // mov eax, dst + opcode(5).setrm(0).setimm(12,1).Emit8(0xC1); // shr eax,12 + // mov rax, [8*rax+paging.tlb.write(rbp)] + opcode(0).set64().setea(5,0,3,(Bits)paging.tlb.write-(Bits)&cpu_regs).Emit8(0x8B); + opcode(0).set64().setrm(0).Emit8(0x85); // test rax,rax + Bit8u *nomap=gen_create_branch(BR_Z); + + int dst = gendst->index; + if (high && dst>=8) { // can't use REX prefix with high-byte reg + opcode(0).set64().setrm(dst).Emit8(0x03); // add rax, dst + dst = -1; + } + // mov byte [rax+src], val + opcode(genval->index,true,high?4:0).setea(0,dst).Emit8(0x88); + + Bit8u* jmp_loc=gen_create_short_jump(); + gen_fill_branch(nomap); + + if (gendst->index != ARG0_REG) { + x64gen.regs[reg_args[0]]->Clear(); + opcode(ARG0_REG).setrm(gendst->index).Emit8(0x8B); // mov ARG0,dst + } + gen_load_arg_reg(1, val, high ? "h":"l"); + gen_call_ptr((void*)mem_writeb_checked_dcx64); + dyn_check_bool_exception_al(); + + dyn_synchstate(&callstate); + gen_fill_short_jump(jmp_loc); +} +static void dyn_write_byte(DynReg * addr,DynReg * src,bool high) { + dyn_write_byte_internal(addr,src,high,false); +} +static void dyn_write_byte_release(DynReg * addr,DynReg * src,bool high) { + dyn_write_byte_internal(addr,src,high,true); +} +#endif #endif @@ -938,9 +1182,9 @@ static void dyn_pop(DynReg * dynreg,bool checked=true) { gen_dop_word(DOP_ADD,true,DREG(STACK),DREG(SS)); if (checked) { if (decode.big_op) { - gen_call_function((void *)&mem_readd_checked,"%Drd%Id",DREG(STACK),&core_dyn.readdata); + gen_call_function((void *)&mem_readd_checked,"%Drd%Ip",DREG(STACK),&core_dyn.readdata); } else { - gen_call_function((void *)&mem_readw_checked,"%Drd%Id",DREG(STACK),&core_dyn.readdata); + gen_call_function((void *)&mem_readw_checked,"%Drd%Ip",DREG(STACK),&core_dyn.readdata); } dyn_check_bool_exception_al(); gen_mov_host(&core_dyn.readdata,dynreg,decode.big_op?4:2); @@ -1055,7 +1299,7 @@ skip_extend_word: segbase=DREG(DS); Bitu val; if (decode_fetchd_imm(val)) { - gen_mov_host((void*)val,DREG(EA),4); + gen_dop_word_imm_mem(DOP_MOV,true,DREG(EA),(void*)val); if (!addseg) { gen_lea(reg_ea,DREG(EA),scaled,scale,0); } else { @@ -1088,7 +1332,7 @@ skip_extend_word: case 2: { Bitu val; if (decode_fetchd_imm(val)) { - gen_mov_host((void*)val,DREG(EA),4); + gen_dop_word_imm_mem(DOP_MOV,true,DREG(EA),(void*)val); if (!addseg) { gen_lea(DREG(EA),DREG(EA),scaled,scale,0); gen_lea(reg_ea,DREG(EA),base,0,0); @@ -1560,7 +1804,7 @@ static void dyn_grp2_ev(grp2_types type) { if (decode_fetchb_imm(val)) { if (decode.modrm.reg < 4) gen_needflags(); else gen_discardflags(); - gen_load_host((void*)val,DREG(TMPB),1); + gen_dop_byte_imm_mem(DOP_MOV,DREG(TMPB),0,(void*)val); gen_shift_word_cl(decode.modrm.reg,decode.big_op,src,DREG(TMPB)); gen_releasereg(DREG(TMPB)); break; @@ -1619,7 +1863,7 @@ static void dyn_grp3_eb(void) { gen_dop_byte(DOP_MOV,DREG(TMPB),0,&DynRegs[decode.modrm.rm&3],decode.modrm.rm&4); gen_releasereg(DREG(EAX)); gen_call_function((decode.modrm.reg==6) ? (void *)&dyn_helper_divb : (void *)&dyn_helper_idivb, - "%Rd%Dd",DREG(TMPB),DREG(TMPB)); + "%Rd%Drd",DREG(TMPB),DREG(TMPB)); dyn_check_bool_exception(DREG(TMPB)); goto skipsave; } @@ -1661,7 +1905,7 @@ static void dyn_grp3_ev(void) { void * func=(decode.modrm.reg==6) ? (decode.big_op ? (void *)&dyn_helper_divd : (void *)&dyn_helper_divw) : (decode.big_op ? (void *)&dyn_helper_idivd : (void *)&dyn_helper_idivw); - gen_call_function(func,"%Rd%Dd",DREG(TMPB),DREG(TMPW)); + gen_call_function(func,"%Rd%Drd",DREG(TMPB),DREG(TMPW)); dyn_check_bool_exception(DREG(TMPB)); gen_releasereg(DREG(TMPB)); goto skipsave; @@ -1739,8 +1983,8 @@ static void dyn_pop_ev(void) { if (decode.modrm.mod<3) { dyn_fill_ea(); // dyn_write_word_release(DREG(EA),DREG(TMPW),decode.big_op); - if (decode.big_op) gen_call_function((void *)&mem_writed_inline,"%Ddr%Dd",DREG(EA),DREG(TMPW)); - else gen_call_function((void *)&mem_writew_inline,"%Ddr%Dd",DREG(EA),DREG(TMPW)); + if (decode.big_op) gen_call_function((void *)&mem_writed_inline,"%Drd%Dd",DREG(EA),DREG(TMPW)); + else gen_call_function((void *)&mem_writew_inline,"%Drd%Dd",DREG(EA),DREG(TMPW)); } else { gen_dop_word(DOP_MOV,decode.big_op,&DynRegs[decode.modrm.rm],DREG(TMPW)); } @@ -1861,7 +2105,7 @@ static void dyn_loop(LoopTypes type) { branch2=gen_create_branch(BR_Z); break; case LOOP_JCXZ: - gen_dop_word(DOP_OR,decode.big_addr,DREG(ECX),DREG(ECX)); + gen_dop_word(DOP_TEST,decode.big_addr,DREG(ECX),DREG(ECX)); gen_releasereg(DREG(ECX)); branch2=gen_create_branch(BR_NZ); break; @@ -1982,8 +2226,8 @@ static void dyn_add_iocheck_var(Bit8u accessed_port,Bitu access_size) { #define dh_fpu_startup() { \ fpu_used=true; \ gen_protectflags(); \ - gen_load_host(&dyn_dh_fpu.state_used,DREG(TMPB),4); \ - gen_dop_word_imm(DOP_CMP,true,DREG(TMPB),0); \ + gen_load_host(&dyn_dh_fpu.state_used,DREG(TMPB),1); \ + gen_dop_byte(DOP_TEST,DREG(TMPB),0,DREG(TMPB),0); \ gen_releasereg(DREG(TMPB)); \ save_info[used_save_info].branch_pos=gen_create_branch_long(BR_Z); \ dyn_savestate(&save_info[used_save_info].state); \ @@ -2009,15 +2253,15 @@ static CacheBlock * CreateCacheBlock(CodePageHandler * codepage,PhysPt start,Bit decode.block->page.start=decode.page.index; codepage->AddCacheBlock(decode.block); - gen_save_host_direct(&cache.block.running,(Bit32u)decode.block); for (i=0;i> 3) & 7; switch (group){ case 0x00: /* FADD ST,STi */ - gen_call_function((void*)&FPU_FADD_EA,"%Ddr",DREG(TMPB)); + gen_call_function((void*)&FPU_FADD_EA,"%Drd",DREG(TMPB)); break; case 0x01: /* FMUL ST,STi */ - gen_call_function((void*)&FPU_FMUL_EA,"%Ddr",DREG(TMPB)); + gen_call_function((void*)&FPU_FMUL_EA,"%Drd",DREG(TMPB)); break; case 0x02: /* FCOM STi */ - gen_call_function((void*)&FPU_FCOM_EA,"%Ddr",DREG(TMPB)); + gen_call_function((void*)&FPU_FCOM_EA,"%Drd",DREG(TMPB)); break; case 0x03: /* FCOMP STi */ - gen_call_function((void*)&FPU_FCOM_EA,"%Ddr",DREG(TMPB)); + gen_call_function((void*)&FPU_FCOM_EA,"%Drd",DREG(TMPB)); gen_call_function((void*)&FPU_FPOP,""); break; case 0x04: /* FSUB ST,STi */ - gen_call_function((void*)&FPU_FSUB_EA,"%Ddr",DREG(TMPB)); + gen_call_function((void*)&FPU_FSUB_EA,"%Drd",DREG(TMPB)); break; case 0x05: /* FSUBR ST,STi */ - gen_call_function((void*)&FPU_FSUBR_EA,"%Ddr",DREG(TMPB)); + gen_call_function((void*)&FPU_FSUBR_EA,"%Drd",DREG(TMPB)); break; case 0x06: /* FDIV ST,STi */ - gen_call_function((void*)&FPU_FDIV_EA,"%Ddr",DREG(TMPB)); + gen_call_function((void*)&FPU_FDIV_EA,"%Drd",DREG(TMPB)); break; case 0x07: /* FDIVR ST,STi */ - gen_call_function((void*)&FPU_FDIVR_EA,"%Ddr",DREG(TMPB)); + gen_call_function((void*)&FPU_FDIVR_EA,"%Drd",DREG(TMPB)); break; default: break; @@ -101,36 +101,36 @@ static void dyn_fpu_esc0(){ Bitu sub=(decode.modrm.val & 7); switch (group){ case 0x00: //FADD ST,STi / - gen_call_function((void*)&FPU_FADD,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FADD,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; case 0x01: // FMUL ST,STi / - gen_call_function((void*)&FPU_FMUL,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FMUL,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; case 0x02: // FCOM STi / - gen_call_function((void*)&FPU_FCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FCOM,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; case 0x03: // FCOMP STi / - gen_call_function((void*)&FPU_FCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FCOM,"%Drd%Drd",DREG(TMPB),DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; case 0x04: // FSUB ST,STi / - gen_call_function((void*)&FPU_FSUB,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FSUB,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; case 0x05: // FSUBR ST,STi / - gen_call_function((void*)&FPU_FSUBR,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FSUBR,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; case 0x06: // FDIV ST,STi / - gen_call_function((void*)&FPU_FDIV,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FDIV,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; case 0x07: // FDIVR ST,STi / - gen_call_function((void*)&FPU_FDIVR,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FDIVR,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; default: break; } } else { dyn_fill_ea(); - gen_call_function((void*)&FPU_FLD_F32_EA,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FLD_F32_EA,"%Drd",DREG(EA)); gen_load_host(&TOP,DREG(TMPB),4); dyn_eatree(); } @@ -149,18 +149,18 @@ static void dyn_fpu_esc1(){ gen_dop_word_imm(DOP_AND,true,DREG(EA),7); gen_call_function((void*)&FPU_PREP_PUSH,""); gen_load_host(&TOP,DREG(TMPB),4); - gen_call_function((void*)&FPU_FST,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FST,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x01: /* FXCH STi */ dyn_fpu_top(); - gen_call_function((void*)&FPU_FXCH,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FXCH,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; case 0x02: /* FNOP */ gen_call_function((void*)&FPU_FNOP,""); break; case 0x03: /* FSTP STi */ dyn_fpu_top(); - gen_call_function((void*)&FPU_FST,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FST,"%Drd%Drd",DREG(TMPB),DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; case 0x04: @@ -290,29 +290,29 @@ static void dyn_fpu_esc1(){ gen_protectflags(); gen_call_function((void*)&FPU_PREP_PUSH,""); gen_load_host(&TOP,DREG(TMPB),4); - gen_call_function((void*)&FPU_FLD_F32,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FLD_F32,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x01: /* UNKNOWN */ LOG(LOG_FPU,LOG_WARN)("ESC EA 1:Unhandled group %d subfunction %d",group,sub); break; case 0x02: /* FST float*/ - gen_call_function((void*)&FPU_FST_F32,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FST_F32,"%Drd",DREG(EA)); break; case 0x03: /* FSTP float*/ - gen_call_function((void*)&FPU_FST_F32,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FST_F32,"%Drd",DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; case 0x04: /* FLDENV */ - gen_call_function((void*)&FPU_FLDENV,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FLDENV,"%Drd",DREG(EA)); break; case 0x05: /* FLDCW */ - gen_call_function((void *)&FPU_FLDCW,"%Ddr",DREG(EA)); + gen_call_function((void *)&FPU_FLDCW,"%Drd",DREG(EA)); break; case 0x06: /* FSTENV */ - gen_call_function((void *)&FPU_FSTENV,"%Ddr",DREG(EA)); + gen_call_function((void *)&FPU_FSTENV,"%Drd",DREG(EA)); break; case 0x07: /* FNSTCW*/ - gen_call_function((void *)&FPU_FNSTCW,"%Ddr",DREG(EA)); + gen_call_function((void *)&FPU_FNSTCW,"%Drd",DREG(EA)); break; default: LOG(LOG_FPU,LOG_WARN)("ESC EA 1:Unhandled group %d subfunction %d",group,sub); @@ -335,7 +335,7 @@ static void dyn_fpu_esc2(){ gen_dop_word_imm(DOP_ADD,true,DREG(EA),1); gen_dop_word_imm(DOP_AND,true,DREG(EA),7); gen_load_host(&TOP,DREG(TMPB),4); - gen_call_function((void *)&FPU_FUCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void *)&FPU_FUCOM,"%Drd%Drd",DREG(TMPB),DREG(EA)); gen_call_function((void *)&FPU_FPOP,""); gen_call_function((void *)&FPU_FPOP,""); break; @@ -350,7 +350,7 @@ static void dyn_fpu_esc2(){ } } else { dyn_fill_ea(); - gen_call_function((void*)&FPU_FLD_I32_EA,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FLD_I32_EA,"%Drd",DREG(EA)); gen_load_host(&TOP,DREG(TMPB),4); dyn_eatree(); } @@ -395,24 +395,24 @@ static void dyn_fpu_esc3(){ gen_call_function((void*)&FPU_PREP_PUSH,""); gen_protectflags(); gen_load_host(&TOP,DREG(TMPB),4); - gen_call_function((void*)&FPU_FLD_I32,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FLD_I32,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x01: /* FISTTP */ LOG(LOG_FPU,LOG_WARN)("ESC 3 EA:Unhandled group %d subfunction %d",group,sub); break; case 0x02: /* FIST */ - gen_call_function((void*)&FPU_FST_I32,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FST_I32,"%Drd",DREG(EA)); break; case 0x03: /* FISTP */ - gen_call_function((void*)&FPU_FST_I32,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FST_I32,"%Drd",DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; case 0x05: /* FLD 80 Bits Real */ gen_call_function((void*)&FPU_PREP_PUSH,""); - gen_call_function((void*)&FPU_FLD_F80,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FLD_F80,"%Drd",DREG(EA)); break; case 0x07: /* FSTP 80 Bits Real */ - gen_call_function((void*)&FPU_FST_F80,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FST_F80,"%Drd",DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; default: @@ -429,36 +429,36 @@ static void dyn_fpu_esc4(){ dyn_fpu_top(); switch(group){ case 0x00: /* FADD STi,ST*/ - gen_call_function((void*)&FPU_FADD,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FADD,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x01: /* FMUL STi,ST*/ - gen_call_function((void*)&FPU_FMUL,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FMUL,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x02: /* FCOM*/ - gen_call_function((void*)&FPU_FCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FCOM,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; case 0x03: /* FCOMP*/ - gen_call_function((void*)&FPU_FCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FCOM,"%Drd%Drd",DREG(TMPB),DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; case 0x04: /* FSUBR STi,ST*/ - gen_call_function((void*)&FPU_FSUBR,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FSUBR,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x05: /* FSUB STi,ST*/ - gen_call_function((void*)&FPU_FSUB,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FSUB,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x06: /* FDIVR STi,ST*/ - gen_call_function((void*)&FPU_FDIVR,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FDIVR,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x07: /* FDIV STi,ST*/ - gen_call_function((void*)&FPU_FDIV,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FDIV,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; default: break; } } else { dyn_fill_ea(); - gen_call_function((void*)&FPU_FLD_F64_EA,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FLD_F64_EA,"%Drd",DREG(EA)); gen_load_host(&TOP,DREG(TMPB),4); dyn_eatree(); } @@ -472,23 +472,23 @@ static void dyn_fpu_esc5(){ dyn_fpu_top(); switch(group){ case 0x00: /* FFREE STi */ - gen_call_function((void*)&FPU_FFREE,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FFREE,"%Drd",DREG(EA)); break; case 0x01: /* FXCH STi*/ - gen_call_function((void*)&FPU_FXCH,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FXCH,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; case 0x02: /* FST STi */ - gen_call_function((void*)&FPU_FST,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FST,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; case 0x03: /* FSTP STi*/ - gen_call_function((void*)&FPU_FST,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FST,"%Drd%Drd",DREG(TMPB),DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; case 0x04: /* FUCOM STi */ - gen_call_function((void*)&FPU_FUCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FUCOM,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; case 0x05: /*FUCOMP STi */ - gen_call_function((void*)&FPU_FUCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FUCOM,"%Drd%Drd",DREG(TMPB),DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; default: @@ -504,30 +504,30 @@ static void dyn_fpu_esc5(){ gen_call_function((void*)&FPU_PREP_PUSH,""); gen_protectflags(); gen_load_host(&TOP,DREG(TMPB),4); - gen_call_function((void*)&FPU_FLD_F64,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FLD_F64,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x01: /* FISTTP longint*/ LOG(LOG_FPU,LOG_WARN)("ESC 5 EA:Unhandled group %d subfunction %d",group,sub); break; case 0x02: /* FST double real*/ - gen_call_function((void*)&FPU_FST_F64,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FST_F64,"%Drd",DREG(EA)); break; case 0x03: /* FSTP double real*/ - gen_call_function((void*)&FPU_FST_F64,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FST_F64,"%Drd",DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; case 0x04: /* FRSTOR */ - gen_call_function((void*)&FPU_FRSTOR,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FRSTOR,"%Drd",DREG(EA)); break; case 0x06: /* FSAVE */ - gen_call_function((void*)&FPU_FSAVE,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FSAVE,"%Drd",DREG(EA)); break; case 0x07: /*FNSTSW */ gen_protectflags(); gen_load_host(&TOP,DREG(TMPB),4); gen_call_function((void*)&FPU_SET_TOP,"%Dd",DREG(TMPB)); gen_load_host(&fpu.sw,DREG(TMPB),4); - gen_call_function((void*)&mem_writew,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&mem_writew,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; default: LOG(LOG_FPU,LOG_WARN)("ESC 5 EA:Unhandled group %d subfunction %d",group,sub); @@ -543,13 +543,13 @@ static void dyn_fpu_esc6(){ dyn_fpu_top(); switch(group){ case 0x00: /*FADDP STi,ST*/ - gen_call_function((void*)&FPU_FADD,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FADD,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x01: /* FMULP STi,ST*/ - gen_call_function((void*)&FPU_FMUL,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FMUL,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x02: /* FCOMP5*/ - gen_call_function((void*)&FPU_FCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FCOM,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; /* TODO IS THIS ALLRIGHT ????????? */ case 0x03: /*FCOMPP*/ if(sub != 1) { @@ -559,20 +559,20 @@ static void dyn_fpu_esc6(){ gen_load_host(&TOP,DREG(EA),4); gen_dop_word_imm(DOP_ADD,true,DREG(EA),1); gen_dop_word_imm(DOP_AND,true,DREG(EA),7); - gen_call_function((void*)&FPU_FCOM,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FCOM,"%Drd%Drd",DREG(TMPB),DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); /* extra pop at the bottom*/ break; case 0x04: /* FSUBRP STi,ST*/ - gen_call_function((void*)&FPU_FSUBR,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FSUBR,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x05: /* FSUBP STi,ST*/ - gen_call_function((void*)&FPU_FSUB,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FSUB,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x06: /* FDIVRP STi,ST*/ - gen_call_function((void*)&FPU_FDIVR,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FDIVR,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x07: /* FDIVP STi,ST*/ - gen_call_function((void*)&FPU_FDIV,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FDIV,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; default: break; @@ -580,7 +580,7 @@ static void dyn_fpu_esc6(){ gen_call_function((void*)&FPU_FPOP,""); } else { dyn_fill_ea(); - gen_call_function((void*)&FPU_FLD_I16_EA,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FLD_I16_EA,"%Drd",DREG(EA)); gen_load_host(&TOP,DREG(TMPB),4); dyn_eatree(); } @@ -594,24 +594,24 @@ static void dyn_fpu_esc7(){ switch (group){ case 0x00: /* FFREEP STi*/ dyn_fpu_top(); - gen_call_function((void*)&FPU_FFREE,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FFREE,"%Drd",DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; case 0x01: /* FXCH STi*/ dyn_fpu_top(); - gen_call_function((void*)&FPU_FXCH,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FXCH,"%Drd%Drd",DREG(TMPB),DREG(EA)); break; case 0x02: /* FSTP STi*/ case 0x03: /* FSTP STi*/ dyn_fpu_top(); - gen_call_function((void*)&FPU_FST,"%Ddr%Ddr",DREG(TMPB),DREG(EA)); + gen_call_function((void*)&FPU_FST,"%Drd%Drd",DREG(TMPB),DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; case 0x04: switch(sub){ case 0x00: /* FNSTSW AX*/ gen_load_host(&TOP,DREG(TMPB),4); - gen_call_function((void*)&FPU_SET_TOP,"%Ddr",DREG(TMPB)); + gen_call_function((void*)&FPU_SET_TOP,"%Drd",DREG(TMPB)); gen_mov_host(&fpu.sw,DREG(EAX),2); break; default: @@ -629,34 +629,34 @@ static void dyn_fpu_esc7(){ case 0x00: /* FILD Bit16s */ gen_call_function((void*)&FPU_PREP_PUSH,""); gen_load_host(&TOP,DREG(TMPB),4); - gen_call_function((void*)&FPU_FLD_I16,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FLD_I16,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x01: LOG(LOG_FPU,LOG_WARN)("ESC 7 EA:Unhandled group %d subfunction %d",group,sub); break; case 0x02: /* FIST Bit16s */ - gen_call_function((void*)&FPU_FST_I16,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FST_I16,"%Drd",DREG(EA)); break; case 0x03: /* FISTP Bit16s */ - gen_call_function((void*)&FPU_FST_I16,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FST_I16,"%Drd",DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; case 0x04: /* FBLD packed BCD */ gen_call_function((void*)&FPU_PREP_PUSH,""); gen_load_host(&TOP,DREG(TMPB),4); - gen_call_function((void*)&FPU_FBLD,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FBLD,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x05: /* FILD Bit64s */ gen_call_function((void*)&FPU_PREP_PUSH,""); gen_load_host(&TOP,DREG(TMPB),4); - gen_call_function((void*)&FPU_FLD_I64,"%Ddr%Ddr",DREG(EA),DREG(TMPB)); + gen_call_function((void*)&FPU_FLD_I64,"%Drd%Drd",DREG(EA),DREG(TMPB)); break; case 0x06: /* FBSTP packed BCD */ - gen_call_function((void*)&FPU_FBST,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FBST,"%Drd",DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; case 0x07: /* FISTP Bit64s */ - gen_call_function((void*)&FPU_FST_I64,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FST_I64,"%Drd",DREG(EA)); gen_call_function((void*)&FPU_FPOP,""); break; default: diff --git a/src/cpu/core_dyn_x86/dyn_fpu_dh.h b/src/cpu/core_dyn_x86/dyn_fpu_dh.h index 03b6bd5d..6899d5d8 100644 --- a/src/cpu/core_dyn_x86/dyn_fpu_dh.h +++ b/src/cpu/core_dyn_x86/dyn_fpu_dh.h @@ -148,6 +148,16 @@ static void FPU_FRSTOR_DH(PhysPt addr){ } } +static void dh_fpu_mem(Bit8u inst, Bitu reg=decode.modrm.reg, void* mem=&dyn_dh_fpu.temp.m1) { +#if C_TARGETCPU == X86 + cache_addb(inst); + cache_addb(0x05|(reg<<3)); + cache_addd((Bit32u)(mem)); +#else // X86_64 + opcode(reg).setabsaddr(mem).Emit8(inst); +#endif +} + static void dh_fpu_esc0(){ dyn_get_modrm(); if (decode.modrm.val >= 0xc0) { @@ -155,10 +165,8 @@ static void dh_fpu_esc0(){ cache_addb(decode.modrm.val); } else { dyn_fill_ea(); - gen_call_function((void*)&FPU_FLD_32,"%Ddr",DREG(EA)); - cache_addb(0xd8); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void*)&FPU_FLD_32,"%Drd",DREG(EA)); + dh_fpu_mem(0xd8); } } @@ -173,46 +181,34 @@ static void dh_fpu_esc1(){ dyn_fill_ea(); switch(group){ case 0x00: /* FLD float*/ - gen_call_function((void*)&FPU_FLD_32,"%Ddr",DREG(EA)); - cache_addb(0xd9); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void*)&FPU_FLD_32,"%Drd",DREG(EA)); + dh_fpu_mem(0xd9); break; case 0x01: /* UNKNOWN */ LOG(LOG_FPU,LOG_WARN)("ESC EA 1:Unhandled group %d subfunction %d",group,sub); break; case 0x02: /* FST float*/ - cache_addb(0xd9); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FST_32,"%Ddr",DREG(EA)); + dh_fpu_mem(0xd9); + gen_call_function((void*)&FPU_FST_32,"%Drd",DREG(EA)); break; case 0x03: /* FSTP float*/ - cache_addb(0xd9); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FST_32,"%Ddr",DREG(EA)); + dh_fpu_mem(0xd9); + gen_call_function((void*)&FPU_FST_32,"%Drd",DREG(EA)); break; case 0x04: /* FLDENV */ - gen_call_function((void*)&FPU_FLDENV_DH,"%Ddr",DREG(EA)); - cache_addb(0xd9); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void*)&FPU_FLDENV_DH,"%Drd",DREG(EA)); + dh_fpu_mem(0xd9); break; case 0x05: /* FLDCW */ - gen_call_function((void *)&FPU_FLDCW_DH,"%Ddr",DREG(EA)); - cache_addb(0xd9); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void *)&FPU_FLDCW_DH,"%Drd",DREG(EA)); + dh_fpu_mem(0xd9); break; case 0x06: /* FSTENV */ - cache_addb(0xd9); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FSTENV_DH,"%Ddr",DREG(EA)); + dh_fpu_mem(0xd9); + gen_call_function((void*)&FPU_FSTENV_DH,"%Drd",DREG(EA)); break; case 0x07: /* FNSTCW*/ - gen_call_function((void*)&FPU_FNSTCW_DH,"%Ddr",DREG(EA)); + gen_call_function((void*)&FPU_FNSTCW_DH,"%Drd",DREG(EA)); break; default: LOG(LOG_FPU,LOG_WARN)("ESC EA 1:Unhandled group %d subfunction %d",group,sub); @@ -228,10 +224,8 @@ static void dh_fpu_esc2(){ cache_addb(decode.modrm.val); } else { dyn_fill_ea(); - gen_call_function((void*)&FPU_FLD_32,"%Ddr",DREG(EA)); - cache_addb(0xda); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void*)&FPU_FLD_32,"%Drd",DREG(EA)); + dh_fpu_mem(0xda); } } @@ -274,37 +268,27 @@ static void dh_fpu_esc3(){ dyn_fill_ea(); switch(group){ case 0x00: /* FILD */ - gen_call_function((void*)&FPU_FLD_32,"%Ddr",DREG(EA)); - cache_addb(0xdb); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void*)&FPU_FLD_32,"%Drd",DREG(EA)); + dh_fpu_mem(0xdb); break; case 0x01: /* FISTTP */ LOG(LOG_FPU,LOG_WARN)("ESC 3 EA:Unhandled group %d subfunction %d",group,sub); break; case 0x02: /* FIST */ - cache_addb(0xdb); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FST_32,"%Ddr",DREG(EA)); + dh_fpu_mem(0xdb); + gen_call_function((void*)&FPU_FST_32,"%Drd",DREG(EA)); break; case 0x03: /* FISTP */ - cache_addb(0xdb); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FST_32,"%Ddr",DREG(EA)); + dh_fpu_mem(0xdb); + gen_call_function((void*)&FPU_FST_32,"%Drd",DREG(EA)); break; case 0x05: /* FLD 80 Bits Real */ - gen_call_function((void*)&FPU_FLD_80,"%Ddr",DREG(EA)); - cache_addb(0xdb); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void*)&FPU_FLD_80,"%Drd",DREG(EA)); + dh_fpu_mem(0xdb); break; case 0x07: /* FSTP 80 Bits Real */ - cache_addb(0xdb); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FST_80,"%Ddr",DREG(EA)); + dh_fpu_mem(0xdb); + gen_call_function((void*)&FPU_FST_80,"%Drd",DREG(EA)); break; default: LOG(LOG_FPU,LOG_WARN)("ESC 3 EA:Unhandled group %d subfunction %d",group,sub); @@ -321,10 +305,8 @@ static void dh_fpu_esc4(){ cache_addb(decode.modrm.val); } else { dyn_fill_ea(); - gen_call_function((void*)&FPU_FLD_64,"%Ddr",DREG(EA)); - cache_addb(0xdc); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void*)&FPU_FLD_64,"%Drd",DREG(EA)); + dh_fpu_mem(0xdc); } } @@ -339,45 +321,32 @@ static void dh_fpu_esc5(){ Bitu sub=(decode.modrm.val & 7); switch(group){ case 0x00: /* FLD double real*/ - gen_call_function((void*)&FPU_FLD_64,"%Ddr",DREG(EA)); - cache_addb(0xdd); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void*)&FPU_FLD_64,"%Drd",DREG(EA)); + dh_fpu_mem(0xdd); break; case 0x01: /* FISTTP longint*/ LOG(LOG_FPU,LOG_WARN)("ESC 5 EA:Unhandled group %d subfunction %d",group,sub); break; case 0x02: /* FST double real*/ - cache_addb(0xdd); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FST_64,"%Ddr",DREG(EA)); + dh_fpu_mem(0xdd); + gen_call_function((void*)&FPU_FST_64,"%Drd",DREG(EA)); break; case 0x03: /* FSTP double real*/ - cache_addb(0xdd); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FST_64,"%Ddr",DREG(EA)); + dh_fpu_mem(0xdd); + gen_call_function((void*)&FPU_FST_64,"%Drd",DREG(EA)); break; case 0x04: /* FRSTOR */ - gen_call_function((void*)&FPU_FRSTOR_DH,"%Ddr",DREG(EA)); - cache_addb(0xdd); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp_state[0]))); + gen_call_function((void*)&FPU_FRSTOR_DH,"%Drd",DREG(EA)); + dh_fpu_mem(0xdd, decode.modrm.reg, &(dyn_dh_fpu.temp_state[0])); break; case 0x06: /* FSAVE */ - cache_addb(0xdd); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp_state[0]))); - gen_call_function((void*)&FPU_FSAVE_DH,"%Ddr",DREG(EA)); - cache_addb(0xdb); - cache_addb(0xe3); + dh_fpu_mem(0xdd, decode.modrm.reg, &(dyn_dh_fpu.temp_state[0])); + gen_call_function((void*)&FPU_FSAVE_DH,"%Drd",DREG(EA)); + cache_addw(0xE3DB); break; case 0x07: /* FNSTSW */ - cache_addb(0xdd); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FST_16,"%Ddr",DREG(EA)); + dh_fpu_mem(0xdd); + gen_call_function((void*)&FPU_FST_16,"%Drd",DREG(EA)); break; default: LOG(LOG_FPU,LOG_WARN)("ESC 5 EA:Unhandled group %d subfunction %d",group,sub); @@ -394,10 +363,8 @@ static void dh_fpu_esc6(){ cache_addb(decode.modrm.val); } else { dyn_fill_ea(); - gen_call_function((void*)&FPU_FLD_16,"%Ddr",DREG(EA)); - cache_addb(0xde); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void*)&FPU_FLD_16,"%Drd",DREG(EA)); + dh_fpu_mem(0xde); } } @@ -423,9 +390,7 @@ static void dh_fpu_esc7(){ case 0x04: switch(sub){ case 0x00: /* FNSTSW AX*/ - cache_addb(0xdd); - cache_addb(0x05|(0x07<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + dh_fpu_mem(0xdd, 7); gen_load_host(&(dyn_dh_fpu.temp.m1),DREG(TMPB),4); gen_dop_word(DOP_MOV,false,DREG(EAX),DREG(TMPB)); gen_releasereg(DREG(TMPB)); @@ -443,49 +408,35 @@ static void dh_fpu_esc7(){ dyn_fill_ea(); switch(group){ case 0x00: /* FILD Bit16s */ - gen_call_function((void*)&FPU_FLD_16,"%Ddr",DREG(EA)); - cache_addb(0xdf); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void*)&FPU_FLD_16,"%Drd",DREG(EA)); + dh_fpu_mem(0xdf); break; case 0x01: LOG(LOG_FPU,LOG_WARN)("ESC 7 EA:Unhandled group %d subfunction %d",group,sub); break; case 0x02: /* FIST Bit16s */ - cache_addb(0xdf); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FST_16,"%Ddr",DREG(EA)); + dh_fpu_mem(0xdf); + gen_call_function((void*)&FPU_FST_16,"%Drd",DREG(EA)); break; case 0x03: /* FISTP Bit16s */ - cache_addb(0xdf); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FST_16,"%Ddr",DREG(EA)); + dh_fpu_mem(0xdf); + gen_call_function((void*)&FPU_FST_16,"%Drd",DREG(EA)); break; case 0x04: /* FBLD packed BCD */ - gen_call_function((void*)&FPU_FLD_80,"%Ddr",DREG(EA)); - cache_addb(0xdf); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void*)&FPU_FLD_80,"%Drd",DREG(EA)); + dh_fpu_mem(0xdf); break; case 0x05: /* FILD Bit64s */ - gen_call_function((void*)&FPU_FLD_64,"%Ddr",DREG(EA)); - cache_addb(0xdf); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); + gen_call_function((void*)&FPU_FLD_64,"%Drd",DREG(EA)); + dh_fpu_mem(0xdf); break; case 0x06: /* FBSTP packed BCD */ - cache_addb(0xdf); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FST_80,"%Ddr",DREG(EA)); + dh_fpu_mem(0xdf); + gen_call_function((void*)&FPU_FST_80,"%Drd",DREG(EA)); break; case 0x07: /* FISTP Bit64s */ - cache_addb(0xdf); - cache_addb(0x05|(decode.modrm.reg<<3)); - cache_addd((Bit32u)(&(dyn_dh_fpu.temp.m1))); - gen_call_function((void*)&FPU_FST_64,"%Ddr",DREG(EA)); + dh_fpu_mem(0xdf); + gen_call_function((void*)&FPU_FST_64,"%Drd",DREG(EA)); break; default: LOG(LOG_FPU,LOG_WARN)("ESC 7 EA:Unhandled group %d subfunction %d",group,sub); diff --git a/src/cpu/core_dyn_x86/helpers.h b/src/cpu/core_dyn_x86/helpers.h index 0cea15cd..0bba9817 100644 --- a/src/cpu/core_dyn_x86/helpers.h +++ b/src/cpu/core_dyn_x86/helpers.h @@ -40,8 +40,8 @@ static bool dyn_helper_idivb(Bit8s val) { static bool dyn_helper_divw(Bit16u val) { if (!val) return CPU_PrepareException(0,0); - Bitu num=(reg_dx<<16)|reg_ax; - Bitu quo=num/val; + Bit32u num=(((Bit32u)reg_dx)<<16)|reg_ax; + Bit32u quo=num/val; Bit16u rem=(Bit16u)(num % val); Bit16u quo16=(Bit16u)(quo&0xffff); if (quo!=(Bit32u)quo16) return CPU_PrepareException(0,0); @@ -52,8 +52,8 @@ static bool dyn_helper_divw(Bit16u val) { static bool dyn_helper_idivw(Bit16s val) { if (!val) return CPU_PrepareException(0,0); - Bits num=(reg_dx<<16)|reg_ax; - Bits quo=num/val; + Bit32s num=(((Bit32u)reg_dx)<<16)|reg_ax; + Bit32s quo=num/val; Bit16s rem=(Bit16s)(num % val); Bit16s quo16s=(Bit16s)quo; if (quo!=(Bit32s)quo16s) return CPU_PrepareException(0,0); diff --git a/src/cpu/core_dyn_x86/risc_x64.h b/src/cpu/core_dyn_x86/risc_x64.h new file mode 100644 index 00000000..a08ab963 --- /dev/null +++ b/src/cpu/core_dyn_x86/risc_x64.h @@ -0,0 +1,1272 @@ +/* + * Copyright (C) 2002-2019 The DOSBox Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#if defined(_WIN64) +enum { + X64_REG_RAX, + X64_REG_RBX, + X64_REG_RCX, + X64_REG_RDX, + // volatiles + X64_REG_R8, + X64_REG_R9, + X64_REG_R10, + X64_REG_R11, + // non-volatiles + X64_REG_R12, + X64_REG_R13, + X64_REG_R14, + X64_REG_R15, + X64_REG_RSI, + X64_REG_RDI, + X64_REGS +}; +static const int reg_args[4] = {X64_REG_RCX, X64_REG_RDX, X64_REG_R8, X64_REG_R9}; +#define ARG0_REG 1 +#define ARG1_REG 2 +#define CALLSTACK 32 +#else +enum { + // (high)byte-accessible + X64_REG_RAX, + X64_REG_RBX, + X64_REG_RCX, + X64_REG_RDX, + // volatiles + X64_REG_RSI, + X64_REG_RDI, + X64_REG_R8, + X64_REG_R9, + X64_REG_R10, + X64_REG_R11, + // non-volatiles + X64_REG_R12, + X64_REG_R13, + X64_REG_R14, + X64_REG_R15, + // delimiter + X64_REGS +}; +static const int reg_args[4] = {X64_REG_RDI, X64_REG_RSI, X64_REG_RDX, X64_REG_RCX}; +#define ARG0_REG 7 +#define ARG1_REG 6 +#define CALLSTACK 0 +#endif + +static struct { + bool flagsactive; + Bitu last_used; + GenReg * regs[X64_REGS]; +} x64gen; + +class opcode { +public: + opcode(void) : is_word(false), imm_size(0), rex(0) {} + opcode(int reg,bool dword=true,Bit8u acc=1) : is_word(!dword), imm_size(0), rex(0) { + setreg(reg, acc); + } + + opcode& setword() {is_word=true; return *this;} + opcode& set64(void) {rex|=0x48;return *this;} + opcode& setimm(Bit64u _imm, int size) {imm=_imm;imm_size=size;return *this;} + + opcode& setreg(int r, Bit8u acc=1); // acc: 0=low byte, 1=word/dword, 4=high byte + opcode& setrm(int r, Bit8u acc=1); + opcode& setabsaddr(void* addr); + opcode& setea(int rbase, int rscale=-1, int scale=0, Bit32s off=0); + + void Emit8Reg(Bit8u op); + void Emit8(Bit8u op); + void Emit16(Bit16u op); + +private: + bool is_word; + int reg; + Bit64u imm; + int imm_size; + + Bit8u rex, modrm, sib; + Bits offset; + + void EmitImm(void) { + switch(imm_size) { + case 1: cache_addb((Bit8u)imm);break; + case 2: cache_addw((Bit16u)imm);break; + case 4: cache_addd((Bit32u)imm);break; + case 8: cache_addq(imm);break; + } + } + + void EmitSibOffImm(void) { + if (modrm<0xC0) { + if ((modrm&7)==4) cache_addb(sib); + switch (modrm>>6) { + case 0: + if ((modrm&7)==5) { + // update offset to be RIP relative + Bits diff = offset - (Bits)cache.pos - 4 - imm_size; + if ((Bit32s)diff == diff) offset = diff; + else { // try 32-bit absolute address + if ((Bit32s)offset != offset) IllegalOption("opcode::Emit: bad RIP address"); + // change emitted modrm base from 5 to 4 (use sib) + cache.pos[-1] -= 1; + cache_addb(0x25); // sib: [none+1*none+simm32] + } + } else if ((modrm&7)!=4 || (sib&7)!=5) + break; + case 2: cache_addd((Bit32u)offset); break; + case 1: cache_addb((Bit8u)offset); break; + } + } + EmitImm(); + } +}; + +void opcode::Emit8Reg(Bit8u op) { + if (is_word) cache_addb(0x66); + if (reg>=8) rex |= 0x41; + if (rex) cache_addb(rex); + cache_addb(op|(reg&7)); + EmitImm(); +} + +void opcode::Emit8(Bit8u op) { + if (is_word) cache_addb(0x66); + if (rex) cache_addb(rex); + cache_addw(op+(modrm<<8)); + EmitSibOffImm(); +} + +void opcode::Emit16(Bit16u op) { + if (is_word) cache_addb(0x66); + if (rex) cache_addb(rex); + cache_addw(op); + cache_addb(modrm); + EmitSibOffImm(); +} + +opcode& opcode::setreg(int r, Bit8u acc) { + if (acc==4) { + if (r>3 || rex) IllegalOption("opcode::setreg: cannot encode high byte"); + r += 4; + } + else if (acc==0 && r>3) rex |= 0x40; + reg = r; + return *this; +} + +opcode& opcode::setrm(int r, Bit8u acc) { + if (reg>=8) rex |= 0x44; + if (r>=8) rex |= 0x41; + if (acc==4) { + if (r>3 || rex) IllegalOption("opcode::setrm: cannot encode high byte"); + r += 4; + } + else if (acc==0 && r>3) rex |= 0x40; + modrm = 0xC0+((reg&7)<<3)+(r&7); + return *this; +} + +opcode& opcode::setabsaddr(void* addr) { + /* address must be in one of three ranges (in order of preference: + * &cpu_regs +/- 2GB (RBP relative) enc: modrm+1 or 4 bytes + * cache.pos +/- 2GB (RIP relative) enc: modrm+4 bytes + * < 0x80000000 or >= 0xFFFFFFFF80000000 (signed 32-bit absolute) enc: modrm+sib+4 bytes + */ + if (reg>=8) rex |= 0x44; + modrm = (reg&7)<<3; + offset = (Bits)addr - (Bits)&cpu_regs; + if ((Bit32s)offset == offset) { // [RBP+(Bit8s/Bit32s)] + if ((Bit8s)offset == offset) modrm += 0x45; + else modrm += 0x85; + } else { + offset = (Bits)addr; + modrm += 5; // [RIP+Bit32s] or [abs Bit32s] + } + + return *this; +} + +opcode& opcode::setea(int rbase, int rscale, int scale, Bit32s off) { + if (reg>=8) rex |= 0x44; + if (rbase>=8) rex |= 0x41, rbase &= 7; + if (rscale>=8) rex |= 0x42, rscale &= 7; + modrm = (reg&7)<<3; + offset = off; + + if (rbase<0 || rscale>=0 || rbase==4) { // sib required + modrm += 4; + if (rscale>=0) sib = (scale<<6)+(rscale<<3); + else sib = 4<<3; + if (rbase>=0) sib += rbase; + else sib += 5; + } else modrm += rbase; + + if (rbase==5 || (off && rbase>=0)) { + if ((Bit8s)off == off) modrm += 0x40; + else modrm += 0x80; + } + + return *this; +} + + +class GenReg { +public: + GenReg(Bit8u _index) : index(_index) { + notusable=false;dynreg=0; + } + DynReg * dynreg; + Bitu last_used; //Keeps track of last assigned regs + const Bit8u index; + bool notusable; + void Load(DynReg * _dynreg,bool stale=false) { + if (!_dynreg) return; + if (GCC_UNLIKELY((Bitu)dynreg)) Clear(); + dynreg=_dynreg; + last_used=x64gen.last_used; + dynreg->flags&=~DYNFLG_CHANGED; + dynreg->genreg=this; + if ((!stale) && (dynreg->flags & (DYNFLG_LOAD|DYNFLG_ACTIVE))) { + opcode(index).setabsaddr(dynreg->data).Emit8(0x8B); // mov r32, [] + } + dynreg->flags|=DYNFLG_ACTIVE; + } + void Save(void) { + if (GCC_UNLIKELY(!((Bitu)dynreg))) IllegalOption("GenReg->Save"); + dynreg->flags&=~DYNFLG_CHANGED; + opcode(index).setabsaddr(dynreg->data).Emit8(0x89); // mov [], r32 + } + void Release(void) { + if (GCC_UNLIKELY(!((Bitu)dynreg))) return; + if (dynreg->flags&DYNFLG_CHANGED && dynreg->flags&DYNFLG_SAVE) { + Save(); + } + dynreg->flags&=~(DYNFLG_CHANGED|DYNFLG_ACTIVE); + dynreg->genreg=0;dynreg=0; + } + void Clear(void) { + if (!dynreg) return; + if (dynreg->flags&DYNFLG_CHANGED) { + Save(); + } + dynreg->genreg=0;dynreg=0; + } +}; + +static BlockReturn gen_runcodeInit(Bit8u *code); +static BlockReturn (*gen_runcode)(Bit8u *code) = gen_runcodeInit; + +static BlockReturn gen_runcodeInit(Bit8u *code) { + Bit8u* oldpos = cache.pos; + cache.pos = &cache_code_link_blocks[128]; + gen_runcode = (BlockReturn(*)(Bit8u*))cache.pos; + + opcode(5).Emit8Reg(0x50); // push rbp + opcode(15).Emit8Reg(0x50); // push r15 + opcode(14).Emit8Reg(0x50); // push r14 + // mov rbp, &cpu_regs + if ((Bit32u)(Bitu)&cpu_regs == (Bitu)&cpu_regs) opcode(5).setimm((Bitu)&cpu_regs,4).Emit8Reg(0xB8); + else opcode(5).set64().setimm((Bitu)&cpu_regs,8).Emit8Reg(0xB8); + opcode(13).Emit8Reg(0x50); // push r13 + opcode(12).Emit8Reg(0x50); // push r12 + opcode(3).Emit8Reg(0x50); // push rbx + opcode(0).setea(5,-1,0,offsetof(CPU_Regs,flags)).Emit8(0x8B); // mov eax, [reg_flags(rbp)] +#if defined(_WIN64) + opcode(7).Emit8Reg(0x50); // push rdi + opcode(6).Emit8Reg(0x50); // push rsi +#endif + opcode(15).set64().setrm(4).Emit8(0x8B); // mov r15, rsp + opcode(0).setimm(FMASK_TEST,4).Emit8Reg(0x25); // and eax, FMASK_TEST + cache_addb(0x48);cache_addw(0x158D); // lea rdx, [rip+simm32] + Bit8u *diff = cache.pos; + cache_addd(0); + opcode(4).set64().setrm(4).setimm(~15,1).Emit8(0x83); // and rsp, ~15 + opcode(15).Emit8Reg(0x50); // push r15 + opcode(2).Emit8Reg(0x50); // push rdx + opcode(5).set64().setrm(4).setimm(CALLSTACK*2+16,1).Emit8(0x83); // sub rsp, 16/80 + opcode(0).setea(4,-1,0,CALLSTACK+8).Emit8(0x89); // mov [rsp+8/40], eax + opcode(4).setrm(ARG0_REG).Emit8(0xFF); // jmp ARG0 + + *(Bit32u*)diff = (Bit32u)(cache.pos - diff - 4); + // eax = return value, ecx = flags + opcode(2).setea(5,-1,0,offsetof(CPU_Regs,flags)).Emit8(0x8B); // mov edx, [reg_flags(rbp)] + opcode(4).setrm(1).setimm(FMASK_TEST,4).Emit8(0x81); // and ecx,FMASK_TEST + opcode(4).setrm(2).setimm(~FMASK_TEST,4).Emit8(0x81); // and edx,~FMASK_TEST + opcode(1).setrm(2).Emit8(0x0B); // or ecx,edx + opcode(1).setea(5,-1,0,offsetof(CPU_Regs,flags)).Emit8(0x89); // mov [reg_flags(rbp)],ecx + + opcode(4).set64().setea(4,-1,0,CALLSTACK+8).Emit8(0x8B); // mov rsp, [rsp+8/40] +#if defined(_WIN64) + opcode(6).Emit8Reg(0x58); // pop rsi + opcode(7).Emit8Reg(0x58); // pop rdi +#endif + opcode(3).Emit8Reg(0x58); // pop rbx + opcode(12).Emit8Reg(0x58); // pop r12 + opcode(13).Emit8Reg(0x58); // pop r13 + opcode(14).Emit8Reg(0x58); // pop r14 + opcode(15).Emit8Reg(0x58); // pop r15 + opcode(5).Emit8Reg(0x58); // pop rbp + cache_addb(0xc3); // ret + + cache.pos = oldpos; + return gen_runcode(code); +} + +static GenReg * FindDynReg(DynReg * dynreg,bool stale=false) { + x64gen.last_used++; + if (dynreg->genreg) { + dynreg->genreg->last_used=x64gen.last_used; + return dynreg->genreg; + } + /* Find best match for selected global reg */ + Bits i; + Bits first_used,first_index; + first_used=-1; + if (dynreg->flags & DYNFLG_HAS8) { + /* Has to be rax,rbx,rcx,rdx */ + for (i=first_index=0;i<=3;i++) { + GenReg * genreg=x64gen.regs[i]; + if (genreg->notusable) continue; + if (!(genreg->dynreg)) { + genreg->Load(dynreg,stale); + return genreg; + } + if (genreg->last_used<(Bitu)first_used) { + first_used=genreg->last_used; + first_index=i; + } + } + } else { + for (i=first_index=X64_REGS-1;i>=0;i--) { + GenReg * genreg=x64gen.regs[i]; + if (genreg->notusable) continue; + if (!(genreg->dynreg)) { + genreg->Load(dynreg,stale); + return genreg; + } + if (genreg->last_used<(Bitu)first_used) { + first_used=genreg->last_used; + first_index=i; + } + } + } + /* No free register found use earliest assigned one */ + GenReg * newreg=x64gen.regs[first_index]; + newreg->Load(dynreg,stale); + return newreg; +} + +static Bit8u GetNextReg(bool low=false) { + Bitu i; + Bitu first_used,first_index; + first_used=x64gen.last_used+1; + for (i=first_index=0;inotusable) continue; + if (low && genreg->index>=8) continue; + if (!(genreg->dynreg)) { + first_index=i; + break; + } + if (genreg->last_usedlast_used; + first_index = i; + } + } + x64gen.regs[first_index]->Clear(); + return x64gen.regs[first_index]->index; +} + +static void ForceDynReg(GenReg * genreg,DynReg * dynreg) { + genreg->last_used = ++x64gen.last_used; + if (dynreg->genreg) { + if (dynreg->genreg==genreg) return; + if (genreg->dynreg) genreg->Clear(); + // mov dst32, src32 + opcode(genreg->index).setrm(dynreg->genreg->index).Emit8(0x8B); + dynreg->genreg->dynreg=0; + dynreg->genreg=genreg; + genreg->dynreg=dynreg; + } else genreg->Load(dynreg); +} + +static void gen_preloadreg(DynReg * dynreg) { + FindDynReg(dynreg); +} + +static void gen_releasereg(DynReg * dynreg) { + GenReg * genreg=dynreg->genreg; + if (genreg) genreg->Release(); + else dynreg->flags&=~(DYNFLG_ACTIVE|DYNFLG_CHANGED); +} + +static void gen_setupreg(DynReg * dnew,DynReg * dsetup) { + dnew->flags=dsetup->flags; + if (dnew->genreg==dsetup->genreg) return; + /* Not the same genreg must be wrong */ + if (dnew->genreg) { + /* Check if the genreg i'm changing is actually linked to me */ + if (dnew->genreg->dynreg==dnew) dnew->genreg->dynreg=0; + } + dnew->genreg=dsetup->genreg; + if (dnew->genreg) dnew->genreg->dynreg=dnew; +} + +static void gen_synchreg(DynReg * dnew,DynReg * dsynch) { + /* First make sure the registers match */ + if (dnew->genreg!=dsynch->genreg) { + if (dnew->genreg) dnew->genreg->Clear(); + if (dsynch->genreg) { + dsynch->genreg->Load(dnew); + } + } + /* Always use the loadonce flag from either state */ + dnew->flags|=(dsynch->flags & dnew->flags&DYNFLG_ACTIVE); + if ((dnew->flags ^ dsynch->flags) & DYNFLG_CHANGED) { + /* Ensure the changed value gets saved */ + if (dnew->flags & DYNFLG_CHANGED) { + dnew->genreg->Save(); + } else dnew->flags|=DYNFLG_CHANGED; + } +} + +static void gen_needflags(void) { + if (!x64gen.flagsactive) { + x64gen.flagsactive=true; + opcode(0).set64().setrm(4).setimm(CALLSTACK+8,1).Emit8(0x83); // add rsp,8/40 + cache_addb(0x9d); //POPFQ + } +} + +static void gen_protectflags(void) { + if (x64gen.flagsactive) { + x64gen.flagsactive=false; + cache_addb(0x9c); //PUSHFQ + opcode(4).set64().setea(4,-1,0,-(CALLSTACK+8)).Emit8(0x8D); // lea rsp, [rsp-8/40] + } +} + +static void gen_discardflags(void) { + if (!x64gen.flagsactive) { + x64gen.flagsactive=true; + opcode(0).set64().setrm(4).setimm(CALLSTACK+16,1).Emit8(0x83); // add rsp,16/48 + } +} + +static void gen_needcarry(void) { + gen_needflags(); +} + +static void gen_setzeroflag(void) { + if (x64gen.flagsactive) IllegalOption("gen_setzeroflag"); + opcode(1).setea(4,-1,0,CALLSTACK+8).setimm(0x40,1).Emit8(0x83); // or dword [rsp+8/40],0x40 +} + +static void gen_clearzeroflag(void) { + if (x64gen.flagsactive) IllegalOption("gen_clearzeroflag"); + opcode(4).setea(4,-1,0,CALLSTACK+8).setimm(~0x40,1).Emit8(0x83); // and dword [rsp+8/40],~0x40 +} + +static bool skip_flags=false; + +static void set_skipflags(bool state) { + if (!state) gen_discardflags(); + skip_flags=state; +} + +static void gen_reinit(void) { + x64gen.last_used=0; + x64gen.flagsactive=false; + for (Bitu i=0;idynreg=0; + } +} + +static void gen_load_host(void * data,DynReg * dr1,Bitu size) { + opcode op = opcode(FindDynReg(dr1,true)->index).setabsaddr(data); + switch (size) { + case 1: // movzx r32, byte[] + op.Emit16(0xB60F); + break; + case 2: // movzx r32, word[] + op.Emit16(0xB70F); + break; + case 4: // mov r32, [] + op.Emit8(0x8B); + break; + default: + IllegalOption("gen_load_host"); + } + dr1->flags|=DYNFLG_CHANGED; +} + +static void gen_mov_host(void * data,DynReg * dr1,Bitu size,Bit8u di1=0) { + int idx = FindDynReg(dr1,size==4)->index; + opcode op; + Bit8u tmp; + switch (size) { + case 1: + op.setreg(idx,di1); + tmp = 0x8A; // mov r8, [] + break; + case 2: op.setword(); // mov r16, [] + case 4: op.setreg(idx); + tmp = 0x8B; // mov r32, [] + break; + default: + IllegalOption("gen_mov_host"); + } + op.setabsaddr(data).Emit8(tmp); + dr1->flags|=DYNFLG_CHANGED; +} + +static void gen_load_arg_reg(int argno,DynReg *dr,const char *s) { + GenReg *gen = x64gen.regs[reg_args[argno]]; + GenReg *src = dr->genreg; + opcode op(gen->index); + + if (*s=='r') { + s++; + gen_releasereg(dr); + } + + gen->Clear(); + + switch (*s) { + case 'h': + if (src) { + if (src->index>3 || gen->index>3) { + // shld r32,r32,24 + opcode(src->index).setimm(24,1).setrm(gen->index).Emit16(0xA40F); + op.setrm(gen->index,0); + } else op.setrm(src->index,4); + } else op.setabsaddr(((Bit8u*)dr->data)+1); + op.Emit16(0xB60F); // movzx r32, r/m8 + break; + case 'l': + if (src) op.setrm(src->index,0); + else op.setabsaddr(dr->data); + op.Emit16(0xB60F); // movzx r32, r/m8 + break; + case 'w': + if (src) op.setrm(src->index); + else op.setabsaddr(dr->data); + op.Emit16(0xB70F); // movzx r32, r/m16 + break; + case 'd': + if (src) { + if (src != gen) op.setrm(src->index).Emit8(0x8B); + } else op.setabsaddr(dr->data).Emit8(0x8B); + break; + default: + IllegalOption("gen_load_arg_reg param:DREG"); + } +} + +static void gen_load_imm(int index,Bitu imm) { + if (imm==0) + opcode(index).setrm(index).Emit8(0x33); // xor r32,r32 + else if ((Bit32u)imm==imm) + opcode(index).setimm(imm,4).Emit8Reg(0xB8); // MOV r32, imm32 + else if ((Bit32s)imm==imm) + opcode(0).set64().setimm(imm,4).setrm(index).Emit8(0xC7); // mov r64, simm32 + else + opcode(index).set64().setabsaddr((void*)imm).Emit8(0x8D); // lea r64, [imm] +} + +static void gen_dop_byte(DualOps op,DynReg * dr1,Bit8u di1,DynReg * dr2,Bit8u di2) { + Bit8u tmp; + opcode i(FindDynReg(dr1)->index,true,di1); + i.setrm(FindDynReg(dr2)->index,di2); + + switch (op) { + case DOP_ADD: tmp=0x02; break; + case DOP_ADC: tmp=0x12; break; + case DOP_SUB: tmp=0x2a; break; + case DOP_SBB: tmp=0x1a; break; + case DOP_CMP: tmp=0x3a; goto nochange; + case DOP_XOR: tmp=0x32; break; + case DOP_AND: tmp=0x22; if ((dr1==dr2) && (di1==di2)) goto nochange; break; + case DOP_OR: tmp=0x0a; if ((dr1==dr2) && (di1==di2)) goto nochange; break; + case DOP_TEST: tmp=0x84; goto nochange; + case DOP_MOV: if ((dr1==dr2) && (di1==di2)) return; tmp=0x8a; break; + case DOP_XCHG: tmp=0x86; dr2->flags|=DYNFLG_CHANGED; break; + default: + IllegalOption("gen_dop_byte"); + } + dr1->flags|=DYNFLG_CHANGED; +nochange: + i.Emit8(tmp); +} + +static void gen_dop_byte_imm(DualOps op,DynReg * dr1,Bit8u di1,Bitu imm) { + Bit8u tmp=0x80; + int dst = FindDynReg(dr1)->index; + opcode i; + i.setimm(imm,1); + imm &= 0xff; + + switch (op) { + case DOP_ADD: i.setreg(0); if (!imm) goto nochange; break; + case DOP_ADC: i.setreg(2); break; + case DOP_SUB: i.setreg(5); if (!imm) goto nochange; break; + case DOP_SBB: i.setreg(3); break; + case DOP_CMP: i.setreg(7); goto nochange; //Doesn't change + case DOP_XOR: i.setreg(6); if (!imm) goto nochange; break; + case DOP_AND: i.setreg(4); if (imm==255) goto nochange; break; + case DOP_OR: i.setreg(1); if (!imm) goto nochange; break; + case DOP_TEST: i.setreg(0);tmp=0xF6;goto nochange; + case DOP_MOV: i.setreg(dst,di1).Emit8Reg(0xB0); + dr1->flags|=DYNFLG_CHANGED; + return; + default: + IllegalOption("gen_dop_byte_imm"); + } + dr1->flags|=DYNFLG_CHANGED; +nochange: + i.setrm(dst,di1).Emit8(tmp); +} + +static void gen_dop_byte_imm_mem(DualOps op,DynReg * dr1,Bit8u di1,void* data) { + opcode i; + Bits addr = (Bits)data; + Bits rbpdiff = addr - (Bits)&cpu_regs; + Bits ripdiff = addr - (Bits)cache.pos; + if (ripdiff<0) ripdiff = ~ripdiff+32; + if ((Bit32s)addr==addr || (Bit32s)rbpdiff==rbpdiff || ripdiff < 0x7FFFFFE0ll) + i = opcode(FindDynReg(dr1)->index,true,di1).setabsaddr(data); + else { + GenReg* dst = FindDynReg(dr1); + dst->notusable=true; + int src = GetNextReg(di1); + dst->notusable=false; + if ((Bit32u)addr == (Bitu)addr) opcode(src).setimm(addr,4).Emit8Reg(0xB8); + else opcode(src).setimm(addr,8).set64().Emit8Reg(0xB8); + i = opcode(dst->index,true,di1).setea(src); + } + + Bit8u tmp; + switch (op) { + case DOP_ADD: tmp=0x02; break; + case DOP_ADC: tmp=0x12; break; + case DOP_SUB: tmp=0x2a; break; + case DOP_SBB: tmp=0x1a; break; + case DOP_CMP: tmp=0x3a; goto nochange; //Doesn't change + case DOP_XOR: tmp=0x32; break; + case DOP_AND: tmp=0x22; break; + case DOP_OR: tmp=0x0a; break; + case DOP_TEST: tmp=0x84; goto nochange; //Doesn't change + case DOP_MOV: tmp=0x8A; break; + default: + IllegalOption("gen_dop_byte_imm_mem"); + } + dr1->flags|=DYNFLG_CHANGED; +nochange: + i.Emit8(tmp); +} + +static void gen_sop_byte(SingleOps op,DynReg * dr1,Bit8u di1) { + Bit8u tmp; + int dst = FindDynReg(dr1)->index; + opcode i; + + switch (op) { + case SOP_INC: i.setreg(0);tmp=0xFE; break; + case SOP_DEC: i.setreg(1);tmp=0xFE; break; + case SOP_NOT: i.setreg(2);tmp=0xF6; break; + case SOP_NEG: i.setreg(3);tmp=0xF6; break; + default: + IllegalOption("gen_sop_byte"); + } + i.setrm(dst,di1).Emit8(tmp); + dr1->flags|=DYNFLG_CHANGED; +} + +static void gen_extend_word(bool sign,DynReg * ddr,DynReg * dsr) { + if (ddr==dsr && dsr->genreg==NULL) + opcode(FindDynReg(ddr,true)->index).setabsaddr(dsr->data).Emit16(sign ? 0xBF0F:0xB70F); + else { + int src = FindDynReg(dsr)->index; + int dst = FindDynReg(ddr,true)->index; + if (sign && (src|dst)==0) cache_addb(0x98); // cwde + else opcode(dst).setrm(src).Emit16(sign ? 0xBF0F:0xB70F); // movsx/movzx dst32, src16 + } + + ddr->flags|=DYNFLG_CHANGED; +} + +static void gen_extend_byte(bool sign,bool dword,DynReg * ddr,DynReg * dsr,Bit8u dsi) { + if (ddr==dsr && dword && dsr->genreg==NULL) { + opcode op = opcode(FindDynReg(ddr,true)->index); + if (dsi) op.setabsaddr((void*)(((Bit8u*)dsr->data)+1)); + else op.setabsaddr(dsr->data); + op.Emit16(sign ? 0xBE0F:0xB60F); // movsx/movzx r32,m8 + } else { + int src = FindDynReg(dsr)->index; + int dst = FindDynReg(ddr,dword)->index; + if (dsi && (src>3 || dst>=8)) { // high-byte + REX = extra work required + // high-byte + REX prefix = extra work required: + // move source high-byte to dest low-byte then extend dest + gen_protectflags(); // shld changes flags, movzx/movsx does not + + // shld r16, r16, 8 + opcode(src,false).setimm(8,1).setrm(dst).Emit16(0xA40F); + src = dst; + dsi = 0; + } + if (sign && !dword && (src|dst|dsi)==0) cache_addw(0x9866); // cbw + else opcode(dst,dword).setrm(src,dsi).Emit16(sign ? 0xBE0F:0xB60F); + } + ddr->flags|=DYNFLG_CHANGED; +} + +static void gen_lea(DynReg * ddr,DynReg * dsr1,DynReg * dsr2,Bitu scale,Bit32s imm) { + if (ddr==dsr1 && dsr2==NULL && !imm) + return; + if (ddr==dsr2 && dsr1==NULL) { + if (!scale && !imm) + return; + else if (scale<2) { + // change [2*reg] to [reg+reg] + // or [0+1*reg] to [reg+0*reg] + // (index with no base requires 32-bit offset) + dsr1 = dsr2; + if (!scale) dsr2 = NULL; + else scale = 0; + } + } + + GenReg * gdr=FindDynReg(ddr,ddr!=dsr1 && ddr!=dsr2); + + int idx1 = dsr1 ? FindDynReg(dsr1)->index : -1; + int idx2 = dsr2 ? FindDynReg(dsr2)->index : -1; + + if (idx1==13 && dsr2 && idx2!=13 && !scale && !imm) { + // use r13 as index instead of base to avoid mandatory offset + int s = idx1; + idx1 = idx2; + idx2 = s; + } + + opcode(gdr->index).setea(idx1, idx2, scale, imm).Emit8(0x8D); + ddr->flags|=DYNFLG_CHANGED; +} + +static void gen_lea_imm_mem(DynReg * ddr,DynReg * dsr,void* data) { + gen_load_host(data, ddr, 4); + gen_lea(ddr, ddr, dsr, 0, 0); +} + +static void gen_dop_word(DualOps op,bool dword,DynReg * dr1,DynReg * dr2) { + Bit8u tmp; + GenReg *gr2 = FindDynReg(dr2); + GenReg *gr1 = FindDynReg(dr1,dword && op==DOP_MOV); + + switch (op) { + case DOP_ADD: tmp=0x03; break; + case DOP_ADC: tmp=0x13; break; + case DOP_SUB: tmp=0x2b; break; + case DOP_SBB: tmp=0x1b; break; + case DOP_CMP: tmp=0x3b; goto nochange; + case DOP_XOR: tmp=0x33; break; + case DOP_AND: tmp=0x23; if (dr1==dr2) goto nochange; break; + case DOP_OR: tmp=0x0b; if (dr1==dr2) goto nochange; break; + case DOP_TEST: tmp=0x85; goto nochange; + case DOP_MOV: if (dr1==dr2) return; tmp=0x8b; break; + case DOP_XCHG: + dr2->flags|=DYNFLG_CHANGED; + if (dword && !((dr1->flags&DYNFLG_HAS8) ^ (dr2->flags&DYNFLG_HAS8))) { + dr1->genreg=gr2;gr2->dynreg=dr1; + dr2->genreg=gr1;gr1->dynreg=dr2; + dr1->flags|=DYNFLG_CHANGED; + return; + } + tmp=0x87; + break; + default: + IllegalOption("gen_dop_word"); + } + dr1->flags|=DYNFLG_CHANGED; +nochange: + opcode(gr1->index,dword).setrm(gr2->index).Emit8(tmp); +} + +static void gen_dop_word_imm(DualOps op,bool dword,DynReg * dr1,Bits imm) { + Bit8u tmp=0x81; + int dst = FindDynReg(dr1,dword && op==DOP_MOV)->index; + opcode i; + if (!dword) i.setword(); + if (op <= DOP_OR && (Bit8s)imm==imm) { + i.setimm(imm, 1); + tmp = 0x83; + } else i.setimm(imm, dword?4:2); + + switch (op) { + case DOP_ADD: i.setreg(0); if (!imm) goto nochange; break; + case DOP_ADC: i.setreg(2); break; + case DOP_SUB: i.setreg(5); if (!imm) goto nochange; break; + case DOP_SBB: i.setreg(3); break; + case DOP_CMP: i.setreg(7); goto nochange; //Doesn't change + case DOP_XOR: i.setreg(6); if (!imm) goto nochange; break; + case DOP_AND: i.setreg(4); if (imm==-1) goto nochange; break; + case DOP_OR: i.setreg(1); if (!imm) goto nochange; break; + case DOP_TEST: i.setreg(0);tmp=0xF7; goto nochange; //Doesn't change + case DOP_MOV: i.setreg(dst).Emit8Reg(0xB8); dr1->flags|=DYNFLG_CHANGED; return; + default: + IllegalOption("gen_dop_word_imm"); + } + dr1->flags|=DYNFLG_CHANGED; +nochange: + i.setrm(dst).Emit8(tmp); +} + +static void gen_dop_word(DualOps op,DynReg *dr1,opcode &i) { + Bit8u tmp; + switch (op) { + case DOP_ADD: tmp=0x03; break; + case DOP_ADC: tmp=0x13; break; + case DOP_SUB: tmp=0x2b; break; + case DOP_SBB: tmp=0x1b; break; + case DOP_CMP: tmp=0x3b; goto nochange; //Doesn't change + case DOP_XOR: tmp=0x33; break; + case DOP_AND: tmp=0x23; break; + case DOP_OR: tmp=0x0b; break; + case DOP_TEST: tmp=0x85; goto nochange; //Doesn't change + case DOP_MOV: tmp=0x8b; break; + case DOP_XCHG: tmp=0x87; break; + default: + IllegalOption("gen_dop_word0"); + } + dr1->flags|=DYNFLG_CHANGED; +nochange: + i.Emit8(tmp); +} + +static void gen_dop_word_var(DualOps op,bool dword,DynReg * dr1,void* drd) { + opcode i = opcode(FindDynReg(dr1,dword && op==DOP_MOV)->index,dword).setabsaddr(drd); + gen_dop_word(op,dr1,i); +} + +static void gen_dop_word_imm_mem(DualOps op,bool dword,DynReg * dr1,void* data) { + opcode i; + Bits addr = (Bits)data; + Bits rbpdiff = addr - (Bits)&cpu_regs; + Bits ripdiff = addr - (Bits)cache.pos; + if (ripdiff<0) ripdiff = ~ripdiff+32; + if ((Bit32s)addr==addr || (Bit32s)rbpdiff==rbpdiff || ripdiff < 0x7FFFFFE0ll) + i = opcode(FindDynReg(dr1,dword && op==DOP_MOV)->index,dword).setabsaddr(data); + else if (dword && op==DOP_MOV) { + if (dr1->genreg) dr1->genreg->dynreg=0; + x64gen.regs[X64_REG_RAX]->Load(dr1,true); + if ((Bit32u)addr == (Bitu)addr) { + cache_addb(0x67); + opcode(0).setimm(addr,4).Emit8Reg(0xA1); + } else opcode(0).setimm(addr,8).Emit8Reg(0xA1); + dr1->flags|=DYNFLG_CHANGED; + return; + } else { + GenReg* dst = FindDynReg(dr1,false); + dst->notusable=true; + int src = GetNextReg(); + dst->notusable=false; + if ((Bit32u)addr == (Bitu)addr) opcode(src).setimm(addr,4).Emit8Reg(0xB8); + else opcode(src).setimm(addr,8).set64().Emit8Reg(0xB8); + i = opcode(dst->index,dword).setea(src); + } + gen_dop_word(op,dr1,i); +} + +static void gen_imul_word(bool dword,DynReg * dr1,DynReg * dr2) { + // dr1 = dr1*dr2 + opcode(FindDynReg(dr1)->index,dword).setrm(FindDynReg(dr2)->index).Emit16(0xAF0F); + dr1->flags|=DYNFLG_CHANGED; +} + +static void gen_imul_word_imm(bool dword,DynReg * dr1,DynReg * dr2,Bits imm) { + // dr1 = dr2*imm + opcode op; + if (dr1==dr2 && dword && dr1->genreg==NULL) + op = opcode(FindDynReg(dr1,true)->index).setabsaddr(dr2->data); + else + op = opcode(FindDynReg(dr1,dword&&dr1!=dr2)->index,dword).setrm(FindDynReg(dr2)->index); + + if ((Bit8s)imm==imm) op.setimm(imm,1).Emit8(0x6B); + else op.setimm(imm,dword?4:2).Emit8(0x69); + dr1->flags|=DYNFLG_CHANGED; +} + +static void gen_sop_word(SingleOps op,bool dword,DynReg * dr1) { + opcode i; + Bit8u tmp; + if (!dword) i.setword(); + switch (op) { + case SOP_INC: i.setreg(0);tmp=0xFF;break; + case SOP_DEC: i.setreg(1);tmp=0xFF;break; + case SOP_NOT: i.setreg(2);tmp=0xF7;break; + case SOP_NEG: i.setreg(3);tmp=0xF7;break; + default: + IllegalOption("gen_sop_word"); + } + i.setrm(FindDynReg(dr1)->index).Emit8(tmp); + dr1->flags|=DYNFLG_CHANGED; +} + +static void gen_shift_byte_cl(Bitu op,DynReg * dr1,Bit8u di1,DynReg * drecx) { + ForceDynReg(x64gen.regs[X64_REG_RCX],drecx); + opcode(op).setrm(FindDynReg(dr1)->index,di1).Emit8(0xD2); + dr1->flags|=DYNFLG_CHANGED; +} + +static void gen_shift_byte_imm(Bitu op,DynReg * dr1,Bit8u di1,Bit8u imm) { + opcode inst = opcode(op).setrm(FindDynReg(dr1)->index,di1); + if (imm==1) inst.Emit8(0xD0); + else inst.setimm(imm,1).Emit8(0xC0); + dr1->flags|=DYNFLG_CHANGED; +} + +static void gen_shift_word_cl(Bitu op,bool dword,DynReg * dr1,DynReg * drecx) { + ForceDynReg(x64gen.regs[X64_REG_RCX],drecx); + opcode(op,dword).setrm(FindDynReg(dr1)->index).Emit8(0xD3); + dr1->flags|=DYNFLG_CHANGED; +} + +static void gen_shift_word_imm(Bitu op,bool dword,DynReg * dr1,Bit8u imm) { + opcode inst = opcode(op,dword).setrm(FindDynReg(dr1)->index); + if (imm==1) inst.Emit8(0xD1); + else inst.setimm(imm,1).Emit8(0xC1); + dr1->flags|=DYNFLG_CHANGED; +} + +static void gen_cbw(bool dword,DynReg * dyn_ax) { + if (dword) gen_extend_word(true,dyn_ax,dyn_ax); + else gen_extend_byte(true,false,dyn_ax,dyn_ax,0); +} + +static void gen_cwd(bool dword,DynReg * dyn_ax,DynReg * dyn_dx) { + if (dyn_dx->genreg != x64gen.regs[X64_REG_RDX]) { + if (dword) { + if (dyn_dx->genreg) dyn_dx->genreg->dynreg = NULL; + x64gen.regs[X64_REG_RDX]->Load(dyn_dx,true); + } else ForceDynReg(x64gen.regs[X64_REG_RDX],dyn_dx); + } + ForceDynReg(x64gen.regs[X64_REG_RAX],dyn_ax); + dyn_dx->flags|=DYNFLG_CHANGED; + if (!dword) cache_addw(0x9966); + else cache_addb(0x99); +} + +static void gen_mul_byte(bool imul,DynReg * dyn_ax,DynReg * dr1,Bit8u di1) { + ForceDynReg(x64gen.regs[X64_REG_RAX],dyn_ax); + opcode(imul?5:4).setrm(FindDynReg(dr1)->index,di1).Emit8(0xF6); + dyn_ax->flags|=DYNFLG_CHANGED; +} + +static void gen_mul_word(bool imul,DynReg * dyn_ax,DynReg * dyn_dx,bool dword,DynReg * dr1) { + ForceDynReg(x64gen.regs[X64_REG_RAX],dyn_ax); + if (dword && dyn_dx!=dr1) { + // release current genreg + if (dyn_dx->genreg) dyn_dx->genreg->dynreg = NULL; + x64gen.regs[X64_REG_RDX]->Load(dyn_dx,true); + } else ForceDynReg(x64gen.regs[X64_REG_RDX],dyn_dx); + opcode(imul?5:4,dword).setrm(FindDynReg(dr1)->index).Emit8(0xF7); + dyn_ax->flags|=DYNFLG_CHANGED; + dyn_dx->flags|=DYNFLG_CHANGED; +} + +static void gen_dshift_imm(bool dword,bool left,DynReg * dr1,DynReg * dr2,Bitu imm) { + // shld/shrd imm + opcode(FindDynReg(dr2)->index,dword).setimm(imm,1).setrm(FindDynReg(dr1)->index).Emit16(left ? 0xA40F:0xAC0F); + dr1->flags|=DYNFLG_CHANGED; +} + +static void gen_dshift_cl(bool dword,bool left,DynReg * dr1,DynReg * dr2,DynReg * drecx) { + ForceDynReg(x64gen.regs[X64_REG_RCX],drecx); + // shld/shrd cl + opcode(FindDynReg(dr2)->index,dword).setrm(FindDynReg(dr1)->index).Emit16(left ? 0xA50F:0xAD0F); + dr1->flags|=DYNFLG_CHANGED; +} + +static void gen_call_ptr(void *func=NULL) { + x64gen.regs[X64_REG_RAX]->Clear(); + x64gen.regs[X64_REG_RCX]->Clear(); + x64gen.regs[X64_REG_RDX]->Clear(); +#if !defined(_WIN64) + x64gen.regs[X64_REG_RSI]->Clear(); + x64gen.regs[X64_REG_RDI]->Clear(); +#endif + x64gen.regs[X64_REG_R8]->Clear(); + x64gen.regs[X64_REG_R9]->Clear(); + x64gen.regs[X64_REG_R10]->Clear(); + x64gen.regs[X64_REG_R11]->Clear(); + + /* Do the actual call to the procedure */ + if (func==NULL) cache_addw(0xD0FF); // call rax + else { + Bits diff = (Bits)func - (Bits)cache.pos - 5; + if ((Bit32s)diff == diff) opcode(0).setimm(diff,4).Emit8Reg(0xE8); // call rel32 + else { + gen_load_imm(0, (Bitu)func); + cache_addw(0xD0FF); + } + } +} + +static void gen_call_function(void * func,const char* ops,...) { + Bits paramcount=0; + va_list params; + DynReg *dynret=NULL; + char rettype; + + /* Save the flags */ + if (GCC_LIKELY(!skip_flags)) gen_protectflags(); + if (ops==NULL) IllegalOption("gen_call_function NULL format"); + va_start(params,ops); + while (*ops) { + if (*ops++=='%') { + GenReg *gen; + switch (*ops++) { + case 'I': /* immediate value */ + gen = x64gen.regs[reg_args[paramcount++]]; + gen->Clear(); + if (*ops++!='p') gen_load_imm(gen->index,va_arg(params,Bit32u)); + else gen_load_imm(gen->index,va_arg(params,Bitu)); + break; + case 'D': /* Dynamic register */ + gen_load_arg_reg(paramcount++, va_arg(params,DynReg*), ops++); + break; + case 'R': /* Dynamic register for returned value */ + dynret = va_arg(params,DynReg*); + rettype = *ops++; + break; + case 'F': /* arg is flags, release */ + gen = x64gen.regs[reg_args[paramcount++]]; + gen->Clear(); + opcode(gen->index).setea(4,-1,0,CALLSTACK+8).Emit8(0x8B); // mov reg, [rsp+8/40] + opcode(0).set64().setimm(CALLSTACK+16,1).setrm(4).Emit8(0x83); // add rsp,16/48 + break; + default: + IllegalOption("gen_call_function unknown param"); + } + } + } + va_end(params); + + gen_call_ptr(func); + + /* Save the return value in correct register */ + if (dynret) { + GenReg * genret; + if (rettype == 'd') { + genret=x64gen.regs[X64_REG_RAX]; + if (dynret->genreg) dynret->genreg->dynreg=0; + genret->Load(dynret,true); + } else { + opcode op(0); // src=eax/ax/al/ah + x64gen.regs[X64_REG_RAX]->notusable = true; + genret = FindDynReg(dynret); + x64gen.regs[X64_REG_RAX]->notusable = false; + switch (rettype) { + case 'w': + // mov r16, ax + op.setword().setrm(genret->index).Emit8(0x89); + break; + case 'h': + // mov reg8h, al + op.setrm(genret->index,4).Emit8(0x88); + break; + case 'l': + // mov r/m8, al + op.setrm(genret->index,0).Emit8(0x88); + break; + } + } + dynret->flags|=DYNFLG_CHANGED; + } +} + +static void gen_call_write(DynReg * dr,Bit32u val,Bitu write_size) { + void *func; + gen_protectflags(); + gen_load_arg_reg(0,dr,"rd"); + + switch (write_size) { + case 1: func = (void*)mem_writeb_checked; break; + case 2: func = (void*)mem_writew_checked; break; + case 4: func = (void*)mem_writed_checked; break; + default: IllegalOption("gen_call_write"); + } + + x64gen.regs[reg_args[1]]->Clear(); + opcode(ARG1_REG).setimm(val,4).Emit8Reg(0xB8); // mov ARG2, imm32 + gen_call_ptr(func); +} + +static Bit8u * gen_create_branch(BranchTypes type) { + /* First free all registers */ + cache_addw(0x70+type); + return (cache.pos-1); +} + +static void gen_fill_branch(Bit8u * data,Bit8u * from=cache.pos) { +#if C_DEBUG + Bits len=from-data-1; + if (len<0) len=~len; + if (len>127) + LOG_MSG("Big jump %d",len); +#endif + *data=(from-data-1); +} + +static Bit8u * gen_create_branch_long(BranchTypes type) { + cache_addw(0x800f+(type<<8)); + cache_addd(0); + return (cache.pos-4); +} + +static void gen_fill_branch_long(Bit8u * data,Bit8u * from=cache.pos) { + *(Bit32u*)data=(from-data-4); +} + +static Bit8u * gen_create_jump(Bit8u * to=0) { + /* First free all registers */ + cache_addb(0xe9); + cache_addd(to-(cache.pos+4)); + return (cache.pos-4); +} + +static void gen_fill_jump(Bit8u * data,Bit8u * to=cache.pos) { + *(Bit32u*)data=(to-data-4); +} + +static Bit8u * gen_create_short_jump(void) { + cache_addw(0x00EB); + return cache.pos-1; +} + +static void gen_fill_short_jump(Bit8u * data, Bit8u * to=cache.pos) { +#if C_DEBUG + Bits len=to-data-1; + if (len<0) len=~len; + if (len>127) + LOG_MSG("Big jump %d",len); +#endif + data[0] = to-data-1; +} + +static void gen_jmp_ptr(void * _ptr,Bit32s imm=0) { + Bitu ptr = (Bitu)_ptr; + if ((Bit32u)ptr == ptr) { + cache_addb(0x67); // 32-bit abs address + opcode(0).set64().setimm(ptr,4).Emit8Reg(0xA1); + } else opcode(0).set64().setimm(ptr,8).Emit8Reg(0xA1); + opcode(4).setea(0,-1,0,imm).Emit8(0xFF); // jmp [rax+imm] +} + +static void gen_save_flags(DynReg * dynreg) { + if (GCC_UNLIKELY(x64gen.flagsactive)) IllegalOption("gen_save_flags"); + opcode(FindDynReg(dynreg)->index).setea(4,-1,0,CALLSTACK+8).Emit8(0x8B); // mov reg32, [rsp+8/40] + dynreg->flags|=DYNFLG_CHANGED; +} + +static void gen_load_flags(DynReg * dynreg) { + if (GCC_UNLIKELY(x64gen.flagsactive)) IllegalOption("gen_load_flags"); + opcode(FindDynReg(dynreg)->index).setea(4,-1,0,CALLSTACK+8).Emit8(0x89); // mov [rsp+8/40],reg32 +} + +static void gen_save_host_direct(void *data,Bitu imm) { + if ((Bit32s)imm != imm) { + opcode(0).setimm(imm,4).setabsaddr(data).Emit8(0xC7); // mov dword[], imm32 (low dword) + opcode(0).setimm(imm>>32,4).setabsaddr((Bit8u*)data+4).Emit8(0xC7); // high dword + } else + opcode(0).set64().setimm(imm,4).setabsaddr(data).Emit8(0xC7); // mov qword[], Bit32s +} + +static void gen_return(BlockReturn retcode) { + gen_protectflags(); + opcode(1).setea(4,-1,0,CALLSTACK+8).Emit8(0x8B); // mov ecx, [rsp+8/40] + opcode(0).set64().setrm(4).setimm(CALLSTACK+16,1).Emit8(0x83); // add rsp,16/48 + if (retcode==0) cache_addw(0xc033); //MOV EAX, 0 + else { + cache_addb(0xb8); //MOV EAX, retcode + cache_addd(retcode); + } + opcode(4).setea(4,-1,0,CALLSTACK).Emit8(0xFF); // jmp [rsp+CALLSTACK] +} + +static void gen_return_fast(BlockReturn retcode,bool ret_exception=false) { + if (GCC_UNLIKELY(x64gen.flagsactive)) IllegalOption("gen_return_fast"); + opcode(1).setabsaddr(®_flags).Emit8(0x8B); // mov ECX, [cpu_regs.flags] + if (!ret_exception) { + opcode(0).set64().setrm(4).setimm(CALLSTACK+16,1).Emit8(0x83); // add rsp,16/48 + if (retcode==0) cache_addw(0xc033); //MOV EAX, 0 + else { + cache_addb(0xb8); //MOV EAX, retcode + cache_addd(retcode); + } + } + opcode(4).setea(4,-1,0,CALLSTACK).Emit8(0xFF); // jmp [rsp+CALLSTACK] +} + +static void gen_init(void) { + x64gen.regs[X64_REG_RAX]=new GenReg(0); + x64gen.regs[X64_REG_RCX]=new GenReg(1); + x64gen.regs[X64_REG_RDX]=new GenReg(2); + x64gen.regs[X64_REG_RBX]=new GenReg(3); + x64gen.regs[X64_REG_RSI]=new GenReg(6); + x64gen.regs[X64_REG_RDI]=new GenReg(7); + x64gen.regs[X64_REG_R8]=new GenReg(8); + x64gen.regs[X64_REG_R9]=new GenReg(9); + x64gen.regs[X64_REG_R10]=new GenReg(10); + x64gen.regs[X64_REG_R11]=new GenReg(11); + x64gen.regs[X64_REG_R12]=new GenReg(12); + x64gen.regs[X64_REG_R13]=new GenReg(13); + x64gen.regs[X64_REG_R14]=new GenReg(14); + x64gen.regs[X64_REG_R15]=new GenReg(15); +} + +#if defined(X86_DYNFPU_DH_ENABLED) +static void gen_dh_fpu_saveInit(void); +static void (*gen_dh_fpu_save)(void) = gen_dh_fpu_saveInit; + +// DO NOT USE opcode::setabsaddr IN THIS FUNCTION (RBP unavailable at execution time) +static void gen_dh_fpu_saveInit(void) { + Bit8u* oldpos = cache.pos; + cache.pos = &cache_code_link_blocks[64]; + gen_dh_fpu_save = (void(*)(void))cache.pos; + + Bits addr = (Bits)&dyn_dh_fpu; + // mov RAX, &dyn_dh_fpu + if ((Bit32u)addr == addr) opcode(0).setimm(addr,4).Emit8Reg(0xB8); + else opcode(0).set64().setimm(addr,8).Emit8Reg(0xB8); + + // fnsave [RAX+offs8] + cache_addw(0x70DD);cache_addb((Bits)&dyn_dh_fpu.state-addr); + // fldcw [RAX+offs8] + cache_addw(0x68D9);cache_addb((Bits)&dyn_dh_fpu.host_cw-addr); + // mov byte [RAX+offs8], 0 + cache_addw(0x40C6);cache_addw((Bit8u)((Bits)&dyn_dh_fpu.state_used-addr)); + // or byte [RAX+offs8], 0x3F + cache_addw(0x4880);cache_addb((Bits)&dyn_dh_fpu.state.cw-addr);cache_addb(0x3F); + cache_addb(0xC3); // RET + + cache.pos = oldpos; + gen_dh_fpu_save(); +} +#endif + diff --git a/src/cpu/core_dyn_x86/risc_x86.h b/src/cpu/core_dyn_x86/risc_x86.h index 5bf626ee..989cc4ba 100644 --- a/src/cpu/core_dyn_x86/risc_x86.h +++ b/src/cpu/core_dyn_x86/risc_x86.h @@ -1069,4 +1069,28 @@ static void gen_init(void) { x86gen.regs[X86_REG_EDI]=new GenReg(7); } - +#if defined(X86_DYNFPU_DH_ENABLED) +static void gen_dh_fpu_save(void) +#if defined (_MSC_VER) +{ + __asm { + __asm fnsave dyn_dh_fpu.state + __asm fldcw dyn_dh_fpu.host_cw + } + dyn_dh_fpu.state_used=false; + dyn_dh_fpu.state.cw|=0x3f; +} +#else +{ + __asm__ volatile ( + "fnsave %0 \n" + "fldcw %1 \n" + : "=m" (dyn_dh_fpu.state) + : "m" (dyn_dh_fpu.host_cw) + : "memory" + ); + dyn_dh_fpu.state_used=false; + dyn_dh_fpu.state.cw|=0x3f; +} +#endif +#endif diff --git a/src/cpu/core_dyn_x86/string.h b/src/cpu/core_dyn_x86/string.h index 60cfc8e0..8b27672b 100644 --- a/src/cpu/core_dyn_x86/string.h +++ b/src/cpu/core_dyn_x86/string.h @@ -82,7 +82,7 @@ static void dyn_string(STRING_OP op) { Bit8u * rep_ecx_jmp; /* Check if ECX!=zero */ if (decode.rep) { - gen_dop_word(DOP_OR,decode.big_addr,DREG(ECX),DREG(ECX)); + gen_dop_word(DOP_TEST,decode.big_addr,DREG(ECX),DREG(ECX)); rep_ecx_jmp=gen_create_branch_long(BR_Z); } if (usesi) { @@ -99,11 +99,11 @@ static void dyn_string(STRING_OP op) { } switch (op) { case STR_OUTSB: - gen_call_function((void*)&IO_WriteB,"%Id%Dl",DREG(EDX),tmp_reg);break; + gen_call_function((void*)&IO_WriteB,"%Dw%Dl",DREG(EDX),tmp_reg);break; case STR_OUTSW: - gen_call_function((void*)&IO_WriteW,"%Id%Dw",DREG(EDX),tmp_reg);break; + gen_call_function((void*)&IO_WriteW,"%Dw%Dw",DREG(EDX),tmp_reg);break; case STR_OUTSD: - gen_call_function((void*)&IO_WriteD,"%Id%Dd",DREG(EDX),tmp_reg);break; + gen_call_function((void*)&IO_WriteD,"%Dw%Dd",DREG(EDX),tmp_reg);break; } } if (usedi) {