From 06135e8beb5e17565a07699302c71c62171304f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Strohh=C3=A4cker?= Date: Sat, 3 Sep 2005 19:20:08 +0000 Subject: [PATCH] dynamic core optimizations; incorporate patch from esaelon, parts of ih8regs optimizations; thanks to kekko for several ideas Imported-from: https://svn.code.sf.net/p/dosbox/code-0/dosbox/trunk@2300 --- include/paging.h | 40 ++- src/cpu/core_dyn_x86.cpp | 13 +- src/cpu/core_dyn_x86/decoder.h | 428 ++++++++++++++++++++------------ src/cpu/core_dyn_x86/risc_x86.h | 307 +++++++++++++---------- 4 files changed, 502 insertions(+), 286 deletions(-) diff --git a/include/paging.h b/include/paging.h index f7597265..9ab16588 100644 --- a/include/paging.h +++ b/include/paging.h @@ -16,7 +16,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* $Id: paging.h,v 1.17 2005-09-03 11:38:18 c2woody Exp $ */ +/* $Id: paging.h,v 1.18 2005-09-03 19:20:08 c2woody Exp $ */ #ifndef DOSBOX_PAGING_H #define DOSBOX_PAGING_H @@ -162,7 +162,6 @@ INLINE Bit16u mem_readw_inline(PhysPt address) { } else return mem_unalignedreadw(address); } - INLINE Bit32u mem_readd_inline(PhysPt address) { if (!(address & 3)) { Bitu index=(address>>12); @@ -197,4 +196,41 @@ INLINE void mem_writed_inline(PhysPt address,Bit32u val) { } else mem_unalignedwrited(address,val); } + +INLINE Bit16u mem_readw_dyncorex86(PhysPt address) { + if ((address & 0xfff)<0xfff) { + Bitu index=(address>>12); + + if (paging.tlb.read[index]) return host_readw(paging.tlb.read[index]+address); + else return paging.tlb.handler[index]->readw(address); + } else return mem_unalignedreadw(address); +} + +INLINE Bit32u mem_readd_dyncorex86(PhysPt address) { + if ((address & 0xfff)<0xffd) { + Bitu index=(address>>12); + + if (paging.tlb.read[index]) return host_readd(paging.tlb.read[index]+address); + else return paging.tlb.handler[index]->readd(address); + } else return mem_unalignedreadd(address); +} + +INLINE void mem_writew_dyncorex86(PhysPt address,Bit16u val) { + if ((address & 0xfff)<0xfff) { + Bitu index=(address>>12); + + if (paging.tlb.write[index]) host_writew(paging.tlb.write[index]+address,val); + else paging.tlb.handler[index]->writew(address,val); + } else mem_unalignedwritew(address,val); +} + +INLINE void mem_writed_dyncorex86(PhysPt address,Bit32u val) { + if ((address & 0xfff)<0xffd) { + Bitu index=(address>>12); + + if (paging.tlb.write[index]) host_writed(paging.tlb.write[index]+address,val); + else paging.tlb.handler[index]->writed(address,val); + } else mem_unalignedwrited(address,val); +} + #endif diff --git a/src/cpu/core_dyn_x86.cpp b/src/cpu/core_dyn_x86.cpp index e66df13f..3ed19175 100644 --- a/src/cpu/core_dyn_x86.cpp +++ b/src/cpu/core_dyn_x86.cpp @@ -194,12 +194,23 @@ static void dyn_loadstate(DynState * state) { } } - static void dyn_synchstate(DynState * state) { for (Bitu i=0;iregs[i]); } } + +static void dyn_saveregister(DynReg * src_reg, DynReg * dst_reg) { + dst_reg->flags=src_reg->flags; + dst_reg->genreg=src_reg->genreg; +} + +static void dyn_restoreregister(DynReg * src_reg, DynReg * dst_reg) { + dst_reg->flags=src_reg->flags; + dst_reg->genreg=src_reg->genreg; + dst_reg->genreg->dynreg=dst_reg; // necessary when register has been released +} + #include "core_dyn_x86/decoder.h" Bits CPU_Core_Dyn_X86_Run(void) { diff --git a/src/cpu/core_dyn_x86/decoder.h b/src/cpu/core_dyn_x86/decoder.h index e1a6b0fd..423d4022 100644 --- a/src/cpu/core_dyn_x86/decoder.h +++ b/src/cpu/core_dyn_x86/decoder.h @@ -106,18 +106,35 @@ static void dyn_read_byte(DynReg * addr,DynReg * dst,Bitu high) { } static void dyn_write_byte(DynReg * addr,DynReg * val,Bitu high) { if (high) gen_call_function((void *)&mem_writeb,"%Dd%Dh",addr,val); - else gen_call_function((void *)&mem_writeb,"%Dd%Dl",addr,val); + else gen_call_function((void *)&mem_writeb,"%Dd%Dd",addr,val); } - static void dyn_read_word(DynReg * addr,DynReg * dst,bool dword) { - if (dword) gen_call_function((void *)&mem_readd,"%Dd%Rd",addr,dst); - else gen_call_function((void *)&mem_readw,"%Dd%Rw",addr,dst); + if (dword) gen_call_function((void *)&mem_readd_dyncorex86,"%Dd%Rd",addr,dst); + else gen_call_function((void *)&mem_readw_dyncorex86,"%Dd%Rw",addr,dst); +} +static void dyn_write_word(DynReg * addr,DynReg * val,bool dword) { + if (dword) gen_call_function((void *)&mem_writed_dyncorex86,"%Dd%Dd",addr,val); + else gen_call_function((void *)&mem_writew_dyncorex86,"%Dd%Dd",addr,val); } -static void dyn_write_word(DynReg * addr,DynReg * val,bool dword) { - if (dword) gen_call_function((void *)&mem_writed,"%Dd%Dd",addr,val); - else gen_call_function((void *)&mem_writew,"%Dd%Dw",addr,val); + +static void dyn_read_byte_release(DynReg * addr,DynReg * dst,Bitu high) { + if (high) gen_call_function((void *)&mem_readb,"%Drd%Rh",addr,dst); + else gen_call_function((void *)&mem_readb,"%Drd%Rl",addr,dst); } +static void dyn_write_byte_release(DynReg * addr,DynReg * val,Bitu high) { + if (high) gen_call_function((void *)&mem_writeb,"%Drd%Dh",addr,val); + else gen_call_function((void *)&mem_writeb,"%Drd%Dd",addr,val); +} +static void dyn_read_word_release(DynReg * addr,DynReg * dst,bool dword) { + if (dword) gen_call_function((void *)&mem_readd_dyncorex86,"%Drd%Rd",addr,dst); + else gen_call_function((void *)&mem_readw_dyncorex86,"%Drd%Rw",addr,dst); +} +static void dyn_write_word_release(DynReg * addr,DynReg * val,bool dword) { + if (dword) gen_call_function((void *)&mem_writed_dyncorex86,"%Drd%Dd",addr,val); + else gen_call_function((void *)&mem_writew_dyncorex86,"%Drd%Dd",addr,val); +} + static void dyn_reduce_cycles(void) { gen_protectflags(); @@ -125,7 +142,7 @@ static void dyn_reduce_cycles(void) { gen_dop_word_imm(DOP_SUB,true,DREG(CYCLES),decode.cycles); } -static void dyn_save_critical_regs(void) { +static void dyn_save_noncritical_regs(void) { gen_releasereg(DREG(EAX)); gen_releasereg(DREG(ECX)); gen_releasereg(DREG(EDX)); @@ -134,6 +151,10 @@ static void dyn_save_critical_regs(void) { gen_releasereg(DREG(EBP)); gen_releasereg(DREG(ESI)); gen_releasereg(DREG(EDI)); +} + +static void dyn_save_critical_regs(void) { + dyn_save_noncritical_regs(); gen_releasereg(DREG(FLAGS)); gen_releasereg(DREG(EIP)); gen_releasereg(DREG(CYCLES)); @@ -171,7 +192,6 @@ static void dyn_push(DynReg * dynreg) { //Can just push the whole 32-bit word as operand gen_call_function((void *)&mem_writew,"%Drd%Dd",DREG(STACK),dynreg); } - gen_releasereg(DREG(STACK)); } static void dyn_pop(DynReg * dynreg) { @@ -191,7 +211,6 @@ static void dyn_pop(DynReg * dynreg) { gen_dop_word_imm(DOP_ADD,true,DREG(ESP),2); } } - gen_releasereg(DREG(STACK)); } static void INLINE dyn_get_modrm(void) { @@ -201,7 +220,7 @@ static void INLINE dyn_get_modrm(void) { decode.modrm.rm=(decode.modrm.val & 7); } -static void dyn_fill_ea(bool addseg=true) { +static void dyn_fill_ea(bool addseg=true, DynReg * reg_ea=DREG(EA)) { DynReg * segbase; if (!decode.big_addr) { Bits imm; @@ -210,47 +229,56 @@ static void dyn_fill_ea(bool addseg=true) { case 1:imm=(Bit8s)decode_fetchb();break; case 2:imm=(Bit16s)decode_fetchw();break; } + DynReg * extend_src=reg_ea; switch (decode.modrm.rm) { case 0:/* BX+SI */ - gen_lea(DREG(EA),DREG(EBX),DREG(ESI),0,imm); + gen_lea(reg_ea,DREG(EBX),DREG(ESI),0,imm); segbase=DREG(DS); break; case 1:/* BX+DI */ - gen_lea(DREG(EA),DREG(EBX),DREG(EDI),0,imm); + gen_lea(reg_ea,DREG(EBX),DREG(EDI),0,imm); segbase=DREG(DS); break; case 2:/* BP+SI */ - gen_lea(DREG(EA),DREG(EBP),DREG(ESI),0,imm); + gen_lea(reg_ea,DREG(EBP),DREG(ESI),0,imm); segbase=DREG(SS); break; case 3:/* BP+DI */ - gen_lea(DREG(EA),DREG(EBP),DREG(EDI),0,imm); + gen_lea(reg_ea,DREG(EBP),DREG(EDI),0,imm); segbase=DREG(SS); break; case 4:/* SI */ - gen_lea(DREG(EA),DREG(ESI),0,0,imm); + if (imm) gen_lea(reg_ea,DREG(ESI),0,0,imm); + else extend_src=DREG(ESI); segbase=DREG(DS); break; case 5:/* DI */ - gen_lea(DREG(EA),DREG(EDI),0,0,imm); + if (imm) gen_lea(reg_ea,DREG(EDI),0,0,imm); + else extend_src=DREG(EDI); segbase=DREG(DS); break; case 6:/* imm/BP */ if (!decode.modrm.mod) { - imm=(Bit16s)decode_fetchw(); - gen_dop_word_imm(DOP_MOV,true,DREG(EA),imm); + imm=decode_fetchw(); + gen_dop_word_imm(DOP_MOV,true,reg_ea,imm); segbase=DREG(DS); + goto skip_extend_word; } else { - gen_lea(DREG(EA),DREG(EBP),0,0,imm); + gen_lea(reg_ea,DREG(EBP),0,0,imm); segbase=DREG(SS); } break; case 7: /* BX */ - gen_lea(DREG(EA),DREG(EBX),0,0,imm); + if (imm) gen_lea(reg_ea,DREG(EBX),0,0,imm); + else extend_src=DREG(EBX); segbase=DREG(DS); break; } - gen_extend_word(false,DREG(EA),DREG(EA)); + gen_extend_word(false,reg_ea,extend_src); +skip_extend_word: + if (addseg) { + gen_lea(reg_ea,reg_ea,decode.segprefix ? decode.segprefix : segbase,0,0); + } } else { Bits imm=0; DynReg * base=0;DynReg * scaled=0;Bitu scale=0; @@ -300,10 +328,17 @@ static void dyn_fill_ea(bool addseg=true) { case 1:imm=(Bit8s)decode_fetchb();break; case 2:imm=(Bit32s)decode_fetchd();break; } - gen_lea(DREG(EA),base,scaled,scale,imm); - } - if (addseg) { - gen_lea(DREG(EA),DREG(EA),decode.segprefix ? decode.segprefix : segbase,0,0); + if (!addseg) { + gen_lea(reg_ea,base,scaled,scale,imm); + } else { + DynReg** seg = decode.segprefix ? &decode.segprefix : &segbase; + if (!base) gen_lea(reg_ea,*seg,scaled,scale,imm); + else if (!scaled) gen_lea(reg_ea,base,*seg,0,imm); + else { + gen_lea(reg_ea,base,scaled,scale,imm); + gen_lea(reg_ea,reg_ea,decode.segprefix ? decode.segprefix : segbase,0,0); + } + } } } @@ -335,13 +370,21 @@ static void dyn_dop_ebgb(DualOps op) { dyn_get_modrm();DynReg * rm_reg=&DynRegs[decode.modrm.reg&3]; if (decode.modrm.mod<3) { dyn_fill_ea(); + if ((op<=DOP_TEST) && (op!=DOP_ADC && op!=DOP_SBB)) set_skipflags(true); dyn_read_byte(DREG(EA),DREG(TMPB),false); - if (op<=DOP_TEST) gen_needflags(); + if (op<=DOP_TEST) { + if (op==DOP_ADC || op==DOP_SBB) gen_needcarry(); + else set_skipflags(false); + } gen_dop_byte(op,DREG(TMPB),0,rm_reg,decode.modrm.reg&4); - dyn_write_byte(DREG(EA),DREG(TMPB),false); - gen_releasereg(DREG(EA));gen_releasereg(DREG(TMPB)); + if (op!=DOP_CMP) dyn_write_byte_release(DREG(EA),DREG(TMPB),false); + else gen_releasereg(DREG(EA)); + gen_releasereg(DREG(TMPB)); } else { - if (op<=DOP_TEST) gen_needflags(); + if (op<=DOP_TEST) { + if (op==DOP_ADC || op==DOP_SBB) gen_needcarry(); + else gen_discardflags(); + } gen_dop_byte(op,&DynRegs[decode.modrm.rm&3],decode.modrm.rm&4,rm_reg,decode.modrm.reg&4); } } @@ -351,12 +394,19 @@ static void dyn_dop_gbeb(DualOps op) { dyn_get_modrm();DynReg * rm_reg=&DynRegs[decode.modrm.reg&3]; if (decode.modrm.mod<3) { dyn_fill_ea(); - dyn_read_byte(DREG(EA),DREG(TMPB),false); - if (op<=DOP_TEST) gen_needflags(); + if ((op<=DOP_TEST) && (op!=DOP_ADC && op!=DOP_SBB)) set_skipflags(true); + dyn_read_byte_release(DREG(EA),DREG(TMPB),false); + if (op<=DOP_TEST) { + if (op==DOP_ADC || op==DOP_SBB) gen_needcarry(); + else set_skipflags(false); + } gen_dop_byte(op,rm_reg,decode.modrm.reg&4,DREG(TMPB),0); - gen_releasereg(DREG(EA));gen_releasereg(DREG(TMPB)); + gen_releasereg(DREG(TMPB)); } else { - if (op<=DOP_TEST) gen_needflags(); + if (op<=DOP_TEST) { + if (op==DOP_ADC || op==DOP_SBB) gen_needcarry(); + else gen_discardflags(); + } gen_dop_byte(op,rm_reg,decode.modrm.reg&4,&DynRegs[decode.modrm.rm&3],decode.modrm.rm&4); } } @@ -364,11 +414,8 @@ static void dyn_dop_gbeb(DualOps op) { static void dyn_mov_ebib(void) { dyn_get_modrm(); if (decode.modrm.mod<3) { - //TODO Maybe not use a temp register here and call mem_writeb directly? dyn_fill_ea(); - gen_dop_byte_imm(DOP_MOV,DREG(TMPB),0,decode_fetchb()); - dyn_write_byte(DREG(EA),DREG(TMPB),false); - gen_releasereg(DREG(EA));gen_releasereg(DREG(TMPB)); + gen_call_write(DREG(EA),decode_fetchb(),1); } else { gen_dop_byte_imm(DOP_MOV,&DynRegs[decode.modrm.rm&3],decode.modrm.rm&4,decode_fetchb()); } @@ -379,8 +426,7 @@ static void dyn_mov_ebgb(void) { DynReg * rm_reg=&DynRegs[decode.modrm.reg&3];Bitu rm_regi=decode.modrm.reg&4; if (decode.modrm.mod<3) { dyn_fill_ea(); - dyn_write_byte(DREG(EA),rm_reg,rm_regi); - gen_releasereg(DREG(EA)); + dyn_write_byte_release(DREG(EA),rm_reg,rm_regi); } else { gen_dop_byte(DOP_MOV,&DynRegs[decode.modrm.rm&3],decode.modrm.rm&4,rm_reg,rm_regi); } @@ -391,8 +437,7 @@ static void dyn_mov_gbeb(void) { DynReg * rm_reg=&DynRegs[decode.modrm.reg&3];Bitu rm_regi=decode.modrm.reg&4; if (decode.modrm.mod<3) { dyn_fill_ea(); - dyn_read_byte(DREG(EA),rm_reg,rm_regi); - gen_releasereg(DREG(EA)); + dyn_read_byte_release(DREG(EA),rm_reg,rm_regi); } else { gen_dop_byte(DOP_MOV,rm_reg,rm_regi,&DynRegs[decode.modrm.rm&3],decode.modrm.rm&4); } @@ -403,13 +448,21 @@ static void dyn_dop_evgv(DualOps op) { DynReg * rm_reg=&DynRegs[decode.modrm.reg]; if (decode.modrm.mod<3) { dyn_fill_ea(); + if ((op<=DOP_TEST) && (op!=DOP_ADC && op!=DOP_SBB)) set_skipflags(true); dyn_read_word(DREG(EA),DREG(TMPW),decode.big_op); - if (op<=DOP_TEST) gen_needflags(); + if (op<=DOP_TEST) { + if (op==DOP_ADC || op==DOP_SBB) gen_needcarry(); + else set_skipflags(false); + } gen_dop_word(op,decode.big_op,DREG(TMPW),rm_reg); - dyn_write_word(DREG(EA),DREG(TMPW),decode.big_op); - gen_releasereg(DREG(EA));gen_releasereg(DREG(TMPW)); + if (op!=DOP_CMP) dyn_write_word_release(DREG(EA),DREG(TMPW),decode.big_op); + else gen_releasereg(DREG(EA)); + gen_releasereg(DREG(TMPW)); } else { - if (op<=DOP_TEST) gen_needflags(); + if (op<=DOP_TEST) { + if (op==DOP_ADC || op==DOP_SBB) gen_needcarry(); + else gen_discardflags(); + } gen_dop_word(op,decode.big_op,&DynRegs[decode.modrm.rm],rm_reg); } } @@ -418,8 +471,8 @@ static void dyn_imul_gvev(Bitu immsize) { dyn_get_modrm();DynReg * src; DynReg * rm_reg=&DynRegs[decode.modrm.reg]; if (decode.modrm.mod<3) { - dyn_fill_ea();dyn_read_word(DREG(EA),DREG(TMPW),decode.big_op); - src=DREG(TMPW);gen_releasereg(DREG(EA)); + dyn_fill_ea();dyn_read_word_release(DREG(EA),DREG(TMPW),decode.big_op); + src=DREG(TMPW); } else { src=&DynRegs[decode.modrm.rm]; } @@ -438,12 +491,19 @@ static void dyn_dop_gvev(DualOps op) { DynReg * rm_reg=&DynRegs[decode.modrm.reg]; if (decode.modrm.mod<3) { dyn_fill_ea(); - dyn_read_word(DREG(EA),DREG(TMPW),decode.big_op); - if (op<=DOP_TEST) gen_needflags(); + if ((op<=DOP_TEST) && (op!=DOP_ADC && op!=DOP_SBB)) set_skipflags(true); + dyn_read_word_release(DREG(EA),DREG(TMPW),decode.big_op); + if (op<=DOP_TEST) { + if (op==DOP_ADC || op==DOP_SBB) gen_needcarry(); + else set_skipflags(false); + } gen_dop_word(op,decode.big_op,rm_reg,DREG(TMPW)); - gen_releasereg(DREG(EA));gen_releasereg(DREG(TMPW)); + gen_releasereg(DREG(TMPW)); } else { - if (op<=DOP_TEST) gen_needflags(); + if (op<=DOP_TEST) { + if (op==DOP_ADC || op==DOP_SBB) gen_needcarry(); + else gen_discardflags(); + } gen_dop_word(op,decode.big_op,rm_reg,&DynRegs[decode.modrm.rm]); } } @@ -453,8 +513,7 @@ static void dyn_mov_evgv(void) { DynReg * rm_reg=&DynRegs[decode.modrm.reg]; if (decode.modrm.mod<3) { dyn_fill_ea(); - dyn_write_word(DREG(EA),rm_reg,decode.big_op); - gen_releasereg(DREG(EA)); + dyn_write_word_release(DREG(EA),rm_reg,decode.big_op); } else { gen_dop_word(DOP_MOV,decode.big_op,&DynRegs[decode.modrm.rm],rm_reg); } @@ -465,8 +524,7 @@ static void dyn_mov_gvev(void) { DynReg * rm_reg=&DynRegs[decode.modrm.reg]; if (decode.modrm.mod<3) { dyn_fill_ea(); - dyn_read_word(DREG(EA),rm_reg,decode.big_op); - gen_releasereg(DREG(EA)); + dyn_read_word_release(DREG(EA),rm_reg,decode.big_op); } else { gen_dop_word(DOP_MOV,decode.big_op,rm_reg,&DynRegs[decode.modrm.rm]); } @@ -475,9 +533,7 @@ static void dyn_mov_eviv(void) { dyn_get_modrm(); if (decode.modrm.mod<3) { dyn_fill_ea(); - gen_dop_word_imm(DOP_MOV,decode.big_op,DREG(TMPW),decode.big_op ? decode_fetchd() : decode_fetchw()); - dyn_write_word(DREG(EA),DREG(TMPW),decode.big_op); - gen_releasereg(DREG(EA));gen_releasereg(DREG(TMPW)); + gen_call_write(DREG(EA),decode.big_op ? decode_fetchd() : decode_fetchw(),decode.big_op?4:2); } else { gen_dop_word_imm(DOP_MOV,decode.big_op,&DynRegs[decode.modrm.rm],decode.big_op ? decode_fetchd() : decode_fetchw()); } @@ -487,8 +543,7 @@ static void dyn_mov_ev_gb(bool sign) { dyn_get_modrm();DynReg * rm_reg=&DynRegs[decode.modrm.reg]; if (decode.modrm.mod<3) { dyn_fill_ea(); - dyn_read_byte(DREG(EA),DREG(TMPB),false); - gen_releasereg(DREG(EA)); + dyn_read_byte_release(DREG(EA),DREG(TMPB),false); gen_extend_byte(sign,decode.big_op,rm_reg,DREG(TMPB),0); gen_releasereg(DREG(TMPB)); } else { @@ -504,9 +559,9 @@ static void dyn_mov_ev_gw(bool sign) { dyn_get_modrm();DynReg * rm_reg=&DynRegs[decode.modrm.reg]; if (decode.modrm.mod<3) { dyn_fill_ea(); - dyn_read_word(DREG(EA),DREG(TMPW),false); - gen_releasereg(DREG(EA)); + dyn_read_word_release(DREG(EA),DREG(TMPW),false); gen_extend_word(sign,rm_reg,DREG(TMPW)); + gen_releasereg(DREG(TMPW)); } else { gen_extend_word(sign,rm_reg,&DynRegs[decode.modrm.rm]); } @@ -524,8 +579,8 @@ static void dyn_dshift_ev_gv(bool left,bool immediate) { if (immediate) gen_dshift_imm(decode.big_op,left,ea_reg,rm_reg,decode_fetchb()); else gen_dshift_cl(decode.big_op,left,ea_reg,rm_reg,DREG(ECX)); if (decode.modrm.mod<3) { - dyn_write_word(DREG(EA),DREG(TMPW),decode.big_op); - gen_releasereg(DREG(EA));gen_releasereg(DREG(TMPW)); + dyn_write_word_release(DREG(EA),DREG(TMPW),decode.big_op); + gen_releasereg(DREG(TMPW)); } } @@ -533,32 +588,50 @@ static void dyn_dshift_ev_gv(bool left,bool immediate) { static DualOps grp1_table[8]={DOP_ADD,DOP_OR,DOP_ADC,DOP_SBB,DOP_AND,DOP_SUB,DOP_XOR,DOP_CMP}; static void dyn_grp1_eb_ib(void) { dyn_get_modrm(); + DualOps op=grp1_table[decode.modrm.reg]; if (decode.modrm.mod<3) { dyn_fill_ea(); + if ((op<=DOP_TEST) && (op!=DOP_ADC && op!=DOP_SBB)) set_skipflags(true); dyn_read_byte(DREG(EA),DREG(TMPB),false); - gen_needflags(); + if (op<=DOP_TEST) { + if (op==DOP_ADC || op==DOP_SBB) gen_needcarry(); + else set_skipflags(false); + } gen_dop_byte_imm(grp1_table[decode.modrm.reg],DREG(TMPB),0,decode_fetchb()); - if (grp1_table[decode.modrm.reg]!=DOP_CMP) dyn_write_byte(DREG(EA),DREG(TMPB),false); - gen_releasereg(DREG(EA));gen_releasereg(DREG(TMPB)); + if (op!=DOP_CMP) dyn_write_byte_release(DREG(EA),DREG(TMPB),false); + else gen_releasereg(DREG(EA)); + gen_releasereg(DREG(TMPB)); } else { - gen_needflags(); + if (op<=DOP_TEST) { + if (op==DOP_ADC || op==DOP_SBB) gen_needcarry(); + else gen_discardflags(); + } gen_dop_byte_imm(grp1_table[decode.modrm.reg],&DynRegs[decode.modrm.rm&3],decode.modrm.rm&4,decode_fetchb()); } } static void dyn_grp1_ev_ivx(bool withbyte) { dyn_get_modrm(); + DualOps op=grp1_table[decode.modrm.reg]; if (decode.modrm.mod<3) { dyn_fill_ea(); + if ((op<=DOP_TEST) && (op!=DOP_ADC && op!=DOP_SBB)) set_skipflags(true); dyn_read_word(DREG(EA),DREG(TMPW),decode.big_op); Bits imm=withbyte ? (Bit8s)decode_fetchb() : (decode.big_op ? decode_fetchd(): decode_fetchw()); - gen_needflags(); + if (op<=DOP_TEST) { + if (op==DOP_ADC || op==DOP_SBB) gen_needcarry(); + else set_skipflags(false); + } gen_dop_word_imm(grp1_table[decode.modrm.reg],decode.big_op,DREG(TMPW),imm); - dyn_write_word(DREG(EA),DREG(TMPW),decode.big_op); - gen_releasereg(DREG(EA));gen_releasereg(DREG(TMPW)); + if (op!=DOP_CMP) dyn_write_word_release(DREG(EA),DREG(TMPW),decode.big_op); + else gen_releasereg(DREG(EA)); + gen_releasereg(DREG(TMPW)); } else { Bits imm=withbyte ? (Bit8s)decode_fetchb() : (decode.big_op ? decode_fetchd(): decode_fetchw()); - gen_needflags(); + if (op<=DOP_TEST) { + if (op==DOP_ADC || op==DOP_SBB) gen_needcarry(); + else gen_discardflags(); + } gen_dop_word_imm(grp1_table[decode.modrm.reg],decode.big_op,&DynRegs[decode.modrm.rm],imm); } } @@ -577,21 +650,31 @@ static void dyn_grp2_eb(grp2_types type) { src=&DynRegs[decode.modrm.rm&3]; src_i=decode.modrm.rm&4; } - gen_needflags(); switch (type) { case grp2_1: + /* rotates (first 4 ops) alter cf/of only; shifts (last 4 ops) alter all flags */ + if (decode.modrm.reg < 4) gen_needflags(); + else gen_discardflags(); gen_shift_byte_imm(decode.modrm.reg,src,src_i,1); break; - case grp2_imm: - gen_shift_byte_imm(decode.modrm.reg,src,src_i,decode_fetchb()); + case grp2_imm: { + Bit8u imm=decode_fetchb(); + if (imm) { + /* rotates (first 4 ops) alter cf/of only; shifts (last 4 ops) alter all flags */ + if (decode.modrm.reg < 4) gen_needflags(); + else gen_discardflags(); + gen_shift_byte_imm(decode.modrm.reg,src,src_i,imm); + } else return; + } break; case grp2_cl: + gen_needflags(); /* flags must not be changed on ecx==0 */ gen_shift_byte_cl (decode.modrm.reg,src,src_i,DREG(ECX)); break; } if (decode.modrm.mod<3) { - dyn_write_byte(DREG(EA),src,false); - gen_releasereg(DREG(EA));gen_releasereg(src); + dyn_write_byte_release(DREG(EA),src,false); + gen_releasereg(src); } } @@ -603,21 +686,31 @@ static void dyn_grp2_ev(grp2_types type) { } else { src=&DynRegs[decode.modrm.rm]; } - gen_needflags(); switch (type) { case grp2_1: + /* rotates (first 4 ops) alter cf/of only; shifts (last 4 ops) alter all flags */ + if (decode.modrm.reg < 4) gen_needflags(); + else gen_discardflags(); gen_shift_word_imm(decode.modrm.reg,decode.big_op,src,1); break; - case grp2_imm: - gen_shift_word_imm(decode.modrm.reg,decode.big_op,src,decode_fetchb()); + case grp2_imm: { + Bit8u imm=decode_fetchb(); + if (imm) { + /* rotates (first 4 ops) alter cf/of only; shifts (last 4 ops) alter all flags */ + if (decode.modrm.reg < 4) gen_needflags(); + else gen_discardflags(); + gen_shift_word_imm(decode.modrm.reg,decode.big_op,src,imm); + } else return; + } break; case grp2_cl: + gen_needflags(); /* flags must not be changed on ecx==0 */ gen_shift_word_cl (decode.modrm.reg,decode.big_op,src,DREG(ECX)); break; } if (decode.modrm.mod<3) { - dyn_write_word(DREG(EA),src,decode.big_op); - gen_releasereg(DREG(EA));gen_releasereg(src); + dyn_write_word_release(DREG(EA),src,decode.big_op); + gen_releasereg(src); } } @@ -625,6 +718,7 @@ static void dyn_grp3_eb(void) { dyn_get_modrm();DynReg * src;Bit8u src_i; if (decode.modrm.mod<3) { dyn_fill_ea(); + if ((decode.modrm.reg==0) || (decode.modrm.reg==3)) set_skipflags(true); dyn_read_byte(DREG(EA),DREG(TMPB),false); src=DREG(TMPB);src_i=0; } else { @@ -633,13 +727,13 @@ static void dyn_grp3_eb(void) { } switch (decode.modrm.reg) { case 0x0: /* test eb,ib */ - gen_needflags();gen_dop_byte_imm(DOP_TEST,src,src_i,decode_fetchb()); + set_skipflags(false);gen_dop_byte_imm(DOP_TEST,src,src_i,decode_fetchb()); goto skipsave; case 0x2: /* NOT Eb */ - gen_needflags();gen_sop_byte(SOP_NOT,src,src_i); + gen_sop_byte(SOP_NOT,src,src_i); break; case 0x3: /* NEG Eb */ - gen_needflags();gen_sop_byte(SOP_NEG,src,src_i); + set_skipflags(false);gen_sop_byte(SOP_NEG,src,src_i); break; case 0x4: /* mul Eb */ gen_needflags();gen_mul_byte(false,DREG(EAX),src,src_i); @@ -651,15 +745,15 @@ static void dyn_grp3_eb(void) { case 0x7: /* idiv Eb */ /* EAX could be used, so precache it */ if (decode.modrm.mod==3) - gen_dop_byte(DOP_MOV,DREG(TMPB),0,&DynRegs[decode.modrm.rm&3],decode.modrm.rm&4); + gen_dop_byte(DOP_MOV,src,0,&DynRegs[decode.modrm.rm&3],decode.modrm.rm&4); gen_releasereg(DREG(EAX)); gen_call_function((decode.modrm.reg==6) ? (void *)&dyn_helper_divb : (void *)&dyn_helper_idivb, - "%Rd%Drl",DREG(TMPB),DREG(TMPB)); + "%Rd%Dd",DREG(TMPB),src); dyn_check_bool_exception(DREG(TMPB)); goto skipsave; } /* Save the result if memory op */ - if (decode.modrm.mod<3) dyn_write_byte(DREG(EA),DREG(TMPB),false); + if (decode.modrm.mod<3) dyn_write_byte_release(DREG(EA),src,false); skipsave: gen_releasereg(DREG(TMPB));gen_releasereg(DREG(EA)); } @@ -668,17 +762,18 @@ static void dyn_grp3_ev(void) { dyn_get_modrm();DynReg * src; if (decode.modrm.mod<3) { dyn_fill_ea();src=DREG(TMPW); + if ((decode.modrm.reg==0) || (decode.modrm.reg==3)) set_skipflags(true); dyn_read_word(DREG(EA),DREG(TMPW),decode.big_op); } else src=&DynRegs[decode.modrm.rm]; switch (decode.modrm.reg) { case 0x0: /* test ev,iv */ - gen_needflags();gen_dop_word_imm(DOP_TEST,decode.big_op,src,decode.big_op ? decode_fetchd() : decode_fetchw()); + set_skipflags(false);gen_dop_word_imm(DOP_TEST,decode.big_op,src,decode.big_op ? decode_fetchd() : decode_fetchw()); goto skipsave; case 0x2: /* NOT Ev */ - gen_needflags();gen_sop_word(SOP_NOT,decode.big_op,src); + gen_sop_word(SOP_NOT,decode.big_op,src); break; case 0x3: /* NEG Eb */ - gen_needflags();gen_sop_word(SOP_NEG,decode.big_op,src); + set_skipflags(false);gen_sop_word(SOP_NEG,decode.big_op,src); break; case 0x4: /* mul Eb */ gen_needflags();gen_mul_word(false,DREG(EAX),DREG(EDX),decode.big_op,src); @@ -690,17 +785,18 @@ static void dyn_grp3_ev(void) { case 0x7: /* idiv Eb */ /* EAX could be used, so precache it */ if (decode.modrm.mod==3) - gen_dop_word(DOP_MOV,decode.big_op,DREG(TMPW),&DynRegs[decode.modrm.rm]); + gen_dop_word(DOP_MOV,decode.big_op,src,&DynRegs[decode.modrm.rm]); gen_releasereg(DREG(EAX));gen_releasereg(DREG(EDX)); void * func=(decode.modrm.reg==6) ? (decode.big_op ? (void *)&dyn_helper_divd : (void *)&dyn_helper_divw) : (decode.big_op ? (void *)&dyn_helper_idivd : (void *)&dyn_helper_idivw); - gen_call_function(func,"%Rd%Drd",DREG(TMPB),DREG(TMPW)); + gen_call_function(func,"%Rd%Dd",DREG(TMPB),src); dyn_check_bool_exception(DREG(TMPB)); + gen_releasereg(DREG(TMPB)); goto skipsave; } /* Save the result if memory op */ - if (decode.modrm.mod<3) dyn_write_word(DREG(EA),DREG(TMPW),decode.big_op); + if (decode.modrm.mod<3) dyn_write_word_release(DREG(EA),src,decode.big_op); skipsave: gen_releasereg(DREG(TMPW));gen_releasereg(DREG(EA)); } @@ -710,8 +806,7 @@ static void dyn_mov_ev_seg(void) { gen_load_host(&Segs.val[decode.modrm.reg],DREG(TMPW),2); if (decode.modrm.mod<3) { dyn_fill_ea(); - dyn_write_word(DREG(EA),DREG(TMPW),false); - gen_releasereg(DREG(EA)); + dyn_write_word_release(DREG(EA),DREG(TMPW),false); } else { gen_dop_word(DOP_MOV,decode.big_op,&DynRegs[decode.modrm.rm],DREG(TMPW)); } @@ -722,6 +817,7 @@ static void dyn_load_seg(SegNames seg,DynReg * src) { if (cpu.pmode) { gen_call_function((void *)&CPU_SetSegGeneral,"%Rd%Id%Drw",DREG(TMPB),seg,src); dyn_check_bool_exception(DREG(TMPB)); + gen_releasereg(DREG(TMPB)); } else gen_call_function((void *)CPU_SetSegGeneral,"%Id%Drw",seg,src); gen_releasereg(&DynRegs[G_ES+seg]); if (seg==ss) gen_releasereg(DREG(SMASK)); @@ -729,13 +825,12 @@ static void dyn_load_seg(SegNames seg,DynReg * src) { static void dyn_load_seg_off_ea(SegNames seg) { dyn_get_modrm(); - if (decode.modrm.mod<3) { + if (GCC_UNLIKELY(decode.modrm.mod<3)) { dyn_fill_ea(); gen_lea(DREG(TMPW),DREG(EA),0,0,decode.big_op ? 4:2); dyn_read_word(DREG(TMPW),DREG(TMPW),false); dyn_load_seg(seg,DREG(TMPW));gen_releasereg(DREG(TMPW)); - dyn_read_word(DREG(EA),&DynRegs[decode.modrm.reg],decode.big_op); - gen_releasereg(DREG(EA)); + dyn_read_word_release(DREG(EA),&DynRegs[decode.modrm.reg],decode.big_op); } else { IllegalOption(); } @@ -744,7 +839,7 @@ static void dyn_load_seg_off_ea(SegNames seg) { static void dyn_mov_seg_ev(void) { dyn_get_modrm(); SegNames seg=(SegNames)decode.modrm.reg; - if (seg==cs) IllegalOption(); + if (GCC_UNLIKELY(seg==cs)) IllegalOption(); if (decode.modrm.mod<3) { dyn_fill_ea(); dyn_read_word(DREG(EA),DREG(EA),false); @@ -770,6 +865,7 @@ static void dyn_pop_seg(SegNames seg) { gen_releasereg(DREG(ESP)); gen_call_function((void *)&CPU_PopSeg,"%Rd%Id%Id",DREG(TMPB),seg,decode.big_op); dyn_check_bool_exception(DREG(TMPB)); + gen_releasereg(DREG(TMPB)); gen_releasereg(&DynRegs[G_ES+seg]); gen_releasereg(DREG(ESP)); if (seg==ss) gen_releasereg(DREG(SMASK)); @@ -781,8 +877,7 @@ static void dyn_pop_ev(void) { dyn_get_modrm(); if (decode.modrm.mod<3) { dyn_fill_ea(); - dyn_write_word(DREG(EA),DREG(TMPW),decode.big_op); - gen_releasereg(DREG(EA)); + dyn_write_word_release(DREG(EA),DREG(TMPW),decode.big_op); } else { gen_dop_word(DOP_MOV,decode.big_op,&DynRegs[decode.modrm.rm],DREG(TMPW)); } @@ -810,7 +905,7 @@ static void dyn_leave(void) { } static void dyn_segprefix(SegNames seg) { - if (decode.segprefix) IllegalOption(); + if (GCC_UNLIKELY(decode.segprefix)) IllegalOption(); decode.segprefix=&DynRegs[G_ES+seg]; } @@ -840,21 +935,36 @@ static void dyn_exit_link(Bits eip_change) { static void dyn_branched_exit(BranchTypes btype,Bit32s eip_add) { Bitu eip_base=decode.code-decode.code_start; - dyn_reduce_cycles(); - dyn_save_critical_regs(); - gen_needflags(); - gen_protectflags(); + gen_needflags(); + gen_protectflags(); + dyn_save_noncritical_regs(); + gen_releasereg(DREG(FLAGS)); + gen_releasereg(DREG(EIP)); + + gen_preloadreg(DREG(CYCLES)); + gen_preloadreg(DREG(EIP)); + DynReg save_cycles,save_eip; + dyn_saveregister(DREG(CYCLES),&save_cycles); + dyn_saveregister(DREG(EIP),&save_eip); Bit8u * data=gen_create_branch(btype); - /* Branch not taken */ - gen_dop_word_imm(DOP_ADD,decode.big_op,DREG(EIP),eip_base); - gen_releasereg(DREG(EIP)); - gen_jmp_ptr(&decode.block->link[0].to,offsetof(CacheBlock,cache.start)); - gen_fill_branch(data); - /* Branch taken */ - gen_dop_word_imm(DOP_ADD,decode.big_op,DREG(EIP),eip_base+eip_add); - gen_releasereg(DREG(EIP)); - gen_jmp_ptr(&decode.block->link[1].to,offsetof(CacheBlock,cache.start)); - dyn_closeblock(); + + /* Branch not taken */ + dyn_reduce_cycles(); + gen_dop_word_imm(DOP_ADD,decode.big_op,DREG(EIP),eip_base); + gen_releasereg(DREG(CYCLES)); + gen_releasereg(DREG(EIP)); + gen_jmp_ptr(&decode.block->link[0].to,offsetof(CacheBlock,cache.start)); + gen_fill_branch(data); + + /* Branch taken */ + dyn_restoreregister(&save_cycles,DREG(CYCLES)); + dyn_restoreregister(&save_eip,DREG(EIP)); + dyn_reduce_cycles(); + gen_dop_word_imm(DOP_ADD,decode.big_op,DREG(EIP),eip_base+eip_add); + gen_releasereg(DREG(CYCLES)); + gen_releasereg(DREG(EIP)); + gen_jmp_ptr(&decode.block->link[1].to,offsetof(CacheBlock,cache.start)); + dyn_closeblock(); } enum LoopTypes { @@ -1040,8 +1150,8 @@ restart_prefix: case 0x01:dyn_dop_evgv(DOP_ADD);break; case 0x02:dyn_dop_gbeb(DOP_ADD);break; case 0x03:dyn_dop_gvev(DOP_ADD);break; - case 0x04:gen_needflags();gen_dop_byte_imm(DOP_ADD,DREG(EAX),0,decode_fetchb());break; - case 0x05:gen_needflags();gen_dop_word_imm(DOP_ADD,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; + case 0x04:gen_discardflags();gen_dop_byte_imm(DOP_ADD,DREG(EAX),0,decode_fetchb());break; + case 0x05:gen_discardflags();gen_dop_word_imm(DOP_ADD,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; case 0x06:dyn_push_seg(es);break; case 0x07:dyn_pop_seg(es);break; @@ -1049,8 +1159,8 @@ restart_prefix: case 0x09:dyn_dop_evgv(DOP_OR);break; case 0x0a:dyn_dop_gbeb(DOP_OR);break; case 0x0b:dyn_dop_gvev(DOP_OR);break; - case 0x0c:gen_needflags();gen_dop_byte_imm(DOP_OR,DREG(EAX),0,decode_fetchb());break; - case 0x0d:gen_needflags();gen_dop_word_imm(DOP_OR,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; + case 0x0c:gen_discardflags();gen_dop_byte_imm(DOP_OR,DREG(EAX),0,decode_fetchb());break; + case 0x0d:gen_discardflags();gen_dop_word_imm(DOP_OR,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; case 0x0e:dyn_push_seg(cs);break; case 0x0f: { @@ -1096,8 +1206,8 @@ restart_prefix: case 0x11:dyn_dop_evgv(DOP_ADC);break; case 0x12:dyn_dop_gbeb(DOP_ADC);break; case 0x13:dyn_dop_gvev(DOP_ADC);break; - case 0x14:gen_needflags();gen_dop_byte_imm(DOP_ADC,DREG(EAX),0,decode_fetchb());break; - case 0x15:gen_needflags();gen_dop_word_imm(DOP_ADC,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; + case 0x14:gen_needcarry();gen_dop_byte_imm(DOP_ADC,DREG(EAX),0,decode_fetchb());break; + case 0x15:gen_needcarry();gen_dop_word_imm(DOP_ADC,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; case 0x16:dyn_push_seg(ss);break; case 0x17:dyn_pop_seg(ss);break; @@ -1105,48 +1215,48 @@ restart_prefix: case 0x19:dyn_dop_evgv(DOP_SBB);break; case 0x1a:dyn_dop_gbeb(DOP_SBB);break; case 0x1b:dyn_dop_gvev(DOP_SBB);break; - case 0x1c:gen_needflags();gen_dop_byte_imm(DOP_SBB,DREG(EAX),0,decode_fetchb());break; - case 0x1d:gen_needflags();gen_dop_word_imm(DOP_SBB,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; + case 0x1c:gen_needcarry();gen_dop_byte_imm(DOP_SBB,DREG(EAX),0,decode_fetchb());break; + case 0x1d:gen_needcarry();gen_dop_word_imm(DOP_SBB,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; case 0x1e:dyn_push_seg(ds);break; case 0x1f:dyn_pop_seg(ds);break; case 0x20:dyn_dop_ebgb(DOP_AND);break; case 0x21:dyn_dop_evgv(DOP_AND);break; case 0x22:dyn_dop_gbeb(DOP_AND);break; case 0x23:dyn_dop_gvev(DOP_AND);break; - case 0x24:gen_needflags();gen_dop_byte_imm(DOP_AND,DREG(EAX),0,decode_fetchb());break; - case 0x25:gen_needflags();gen_dop_word_imm(DOP_AND,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; + case 0x24:gen_discardflags();gen_dop_byte_imm(DOP_AND,DREG(EAX),0,decode_fetchb());break; + case 0x25:gen_discardflags();gen_dop_word_imm(DOP_AND,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; case 0x26:dyn_segprefix(es);goto restart_prefix; case 0x28:dyn_dop_ebgb(DOP_SUB);break; case 0x29:dyn_dop_evgv(DOP_SUB);break; case 0x2a:dyn_dop_gbeb(DOP_SUB);break; case 0x2b:dyn_dop_gvev(DOP_SUB);break; - case 0x2c:gen_needflags();gen_dop_byte_imm(DOP_SUB,DREG(EAX),0,decode_fetchb());break; - case 0x2d:gen_needflags();gen_dop_word_imm(DOP_SUB,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; + case 0x2c:gen_discardflags();gen_dop_byte_imm(DOP_SUB,DREG(EAX),0,decode_fetchb());break; + case 0x2d:gen_discardflags();gen_dop_word_imm(DOP_SUB,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; case 0x2e:dyn_segprefix(cs);goto restart_prefix; case 0x30:dyn_dop_ebgb(DOP_XOR);break; case 0x31:dyn_dop_evgv(DOP_XOR);break; case 0x32:dyn_dop_gbeb(DOP_XOR);break; case 0x33:dyn_dop_gvev(DOP_XOR);break; - case 0x34:gen_needflags();gen_dop_byte_imm(DOP_XOR,DREG(EAX),0,decode_fetchb());break; - case 0x35:gen_needflags();gen_dop_word_imm(DOP_XOR,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; + case 0x34:gen_discardflags();gen_dop_byte_imm(DOP_XOR,DREG(EAX),0,decode_fetchb());break; + case 0x35:gen_discardflags();gen_dop_word_imm(DOP_XOR,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; case 0x36:dyn_segprefix(ss);goto restart_prefix; case 0x38:dyn_dop_ebgb(DOP_CMP);break; case 0x39:dyn_dop_evgv(DOP_CMP);break; case 0x3a:dyn_dop_gbeb(DOP_CMP);break; case 0x3b:dyn_dop_gvev(DOP_CMP);break; - case 0x3c:gen_needflags();gen_dop_byte_imm(DOP_CMP,DREG(EAX),0,decode_fetchb());break; - case 0x3d:gen_needflags();gen_dop_word_imm(DOP_CMP,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; + case 0x3c:gen_discardflags();gen_dop_byte_imm(DOP_CMP,DREG(EAX),0,decode_fetchb());break; + case 0x3d:gen_discardflags();gen_dop_word_imm(DOP_CMP,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; case 0x3e:dyn_segprefix(ds);goto restart_prefix; /* INC/DEC general register */ case 0x40:case 0x41:case 0x42:case 0x43:case 0x44:case 0x45:case 0x46:case 0x47: - gen_needflags();gen_sop_word(SOP_INC,decode.big_op,&DynRegs[opcode&7]); + gen_needcarry();gen_sop_word(SOP_INC,decode.big_op,&DynRegs[opcode&7]); break; case 0x48:case 0x49:case 0x4a:case 0x4b:case 0x4c:case 0x4d:case 0x4e:case 0x4f: - gen_needflags();gen_sop_word(SOP_DEC,decode.big_op,&DynRegs[opcode&7]); + gen_needcarry();gen_sop_word(SOP_DEC,decode.big_op,&DynRegs[opcode&7]); break; /* PUSH/POP General register */ case 0x50:case 0x51:case 0x52:case 0x53:case 0x55:case 0x56:case 0x57: @@ -1220,9 +1330,14 @@ restart_prefix: case 0x8c:dyn_mov_ev_seg();break; /* LEA Gv */ case 0x8d: - dyn_get_modrm();dyn_fill_ea(false); - gen_dop_word(DOP_MOV,decode.big_op,&DynRegs[decode.modrm.reg],DREG(EA)); - gen_releasereg(DREG(EA)); + dyn_get_modrm(); + if (decode.big_op) { + dyn_fill_ea(false,&DynRegs[decode.modrm.reg]); + } else { + dyn_fill_ea(false); + gen_dop_word(DOP_MOV,decode.big_op,&DynRegs[decode.modrm.reg],DREG(EA)); + gen_releasereg(DREG(EA)); + } break; /* Mov seg,ev */ case 0x8e:dyn_mov_seg_ev();break; @@ -1265,36 +1380,32 @@ restart_prefix: case 0xa0: gen_lea(DREG(EA),decode.segprefix ? decode.segprefix : DREG(DS),0,0, decode.big_addr ? decode_fetchd() : decode_fetchw()); - dyn_read_byte(DREG(EA),DREG(EAX),false); - gen_releasereg(DREG(EA)); + dyn_read_byte_release(DREG(EA),DREG(EAX),false); break; /* MOV AX,direct addresses */ case 0xa1: gen_lea(DREG(EA),decode.segprefix ? decode.segprefix : DREG(DS),0,0, decode.big_addr ? decode_fetchd() : decode_fetchw()); - dyn_read_word(DREG(EA),DREG(EAX),decode.big_op); - gen_releasereg(DREG(EA)); + dyn_read_word_release(DREG(EA),DREG(EAX),decode.big_op); break; /* MOV direct address,AL */ case 0xa2: gen_lea(DREG(EA),decode.segprefix ? decode.segprefix : DREG(DS),0,0, decode.big_addr ? decode_fetchd() : decode_fetchw()); - dyn_write_byte(DREG(EA),DREG(EAX),false); - gen_releasereg(DREG(EA)); + dyn_write_byte_release(DREG(EA),DREG(EAX),false); break; /* MOV direct addresses,AX */ case 0xa3: gen_lea(DREG(EA),decode.segprefix ? decode.segprefix : DREG(DS),0,0, decode.big_addr ? decode_fetchd() : decode_fetchw()); - dyn_write_word(DREG(EA),DREG(EAX),decode.big_op); - gen_releasereg(DREG(EA)); + dyn_write_word_release(DREG(EA),DREG(EAX),decode.big_op); break; /* MOVSB/W/D*/ case 0xa4:dyn_string(STR_MOVSB);break; case 0xa5:dyn_string(decode.big_op ? STR_MOVSD : STR_MOVSW);break; /* TEST AL,AX Imm */ - case 0xa8:gen_needflags();gen_dop_byte_imm(DOP_TEST,DREG(EAX),0,decode_fetchb());break; - case 0xa9:gen_needflags();gen_dop_word_imm(DOP_TEST,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; + case 0xa8:gen_discardflags();gen_dop_byte_imm(DOP_TEST,DREG(EAX),0,decode_fetchb());break; + case 0xa9:gen_discardflags();gen_dop_word_imm(DOP_TEST,decode.big_op,DREG(EAX),decode.big_op ? decode_fetchd() : decode_fetchw());break; /* STOSB/W/D*/ case 0xaa:dyn_string(STR_STOSB);break; case 0xab:dyn_string(decode.big_op ? STR_STOSD : STR_STOSW);break; @@ -1377,7 +1488,6 @@ restart_prefix: case 0xe2:dyn_loop(LOOP_NONE);goto finish_block; case 0xe3:dyn_loop(LOOP_JCXZ);goto finish_block; //IN AL/AX,imm - case 0xe4:gen_call_function((void*)&IO_ReadB,"%Id%Rl",decode_fetchb(),DREG(EAX));break; case 0xe5: if (decode.big_op) { @@ -1424,7 +1534,6 @@ restart_prefix: gen_call_function((void*)&IO_WriteW,"%Dw%Dw",DREG(EDX),DREG(EAX)); } break; - case 0xf2: //REPNE/NZ decode.rep=REP_NZ; goto restart_prefix; @@ -1444,10 +1553,12 @@ restart_prefix: case 0xfa: //CLI gen_call_function((void *)&CPU_CLI,"%Rd",DREG(TMPB)); if (cpu.pmode) dyn_check_bool_exception(DREG(TMPB)); + gen_releasereg(DREG(TMPB)); break; case 0xfb: //STI gen_call_function((void *)&CPU_STI,"%Rd",DREG(TMPB)); if (cpu.pmode) dyn_check_bool_exception(DREG(TMPB)); + gen_releasereg(DREG(TMPB)); if (max_opcodes<=0) max_opcodes=1; //Allow 1 extra opcode break; case 0xfc: //CLD @@ -1468,12 +1579,12 @@ restart_prefix: case 0x1://DEC Eb if (decode.modrm.mod<3) { dyn_fill_ea();dyn_read_byte(DREG(EA),DREG(TMPB),false); - gen_needflags(); + gen_needcarry(); gen_sop_byte(decode.modrm.reg==0 ? SOP_INC : SOP_DEC,DREG(TMPB),0); - dyn_write_byte(DREG(EA),DREG(TMPB),false); - gen_releasereg(DREG(EA));gen_releasereg(DREG(TMPB)); + dyn_write_byte_release(DREG(EA),DREG(TMPB),false); + gen_releasereg(DREG(TMPB)); } else { - gen_needflags(); + gen_needcarry(); gen_sop_byte(decode.modrm.reg==0 ? SOP_INC : SOP_DEC, &DynRegs[decode.modrm.rm&3],decode.modrm.rm&4); } @@ -1500,11 +1611,11 @@ restart_prefix: switch (decode.modrm.reg) { case 0x0://INC Ev case 0x1://DEC Ev - gen_needflags(); + gen_needcarry(); gen_sop_word(decode.modrm.reg==0 ? SOP_INC : SOP_DEC,decode.big_op,src); if (decode.modrm.mod<3){ - dyn_write_word(DREG(EA),DREG(TMPW),decode.big_op); - gen_releasereg(DREG(EA));gen_releasereg(DREG(TMPW)); + dyn_write_word_release(DREG(EA),DREG(TMPW),decode.big_op); + gen_releasereg(DREG(TMPW)); } break; case 0x2: /* CALL Ev */ @@ -1530,6 +1641,7 @@ restart_prefix: dyn_flags_host_to_gen(); goto core_close_block; case 0x6: /* PUSH Ev */ + gen_releasereg(DREG(EA)); dyn_push(src); break; default: diff --git a/src/cpu/core_dyn_x86/risc_x86.h b/src/cpu/core_dyn_x86/risc_x86.h index 3f50ae83..53e8df24 100644 --- a/src/cpu/core_dyn_x86/risc_x86.h +++ b/src/cpu/core_dyn_x86/risc_x86.h @@ -39,36 +39,35 @@ static struct { class GenReg { public: - GenReg(Bit8u _index,bool _protect) { - index=_index;protect=_protect; + GenReg(Bit8u _index) { + index=_index; notusable=false;dynreg=0; } DynReg * dynreg; Bitu last_used; //Keeps track of last assigned regs Bit8u index; bool notusable; - bool protect; - void Load(DynReg * _dynreg) { + void Load(DynReg * _dynreg,bool stale=false) { if (!_dynreg) return; - if (dynreg) Clear(); + if (GCC_UNLIKELY(dynreg)) Clear(); dynreg=_dynreg; last_used=x86gen.last_used; dynreg->flags&=~DYNFLG_CHANGED; dynreg->genreg=this; - if (dynreg->flags & (DYNFLG_LOAD|DYNFLG_ACTIVE)) { + if ((!stale) && (dynreg->flags & (DYNFLG_LOAD|DYNFLG_ACTIVE))) { cache_addw(0x058b+(index << (8+3))); //Mov reg,[data] cache_addd((Bit32u)dynreg->data); } dynreg->flags|=DYNFLG_ACTIVE; } void Save(void) { - if (!dynreg) IllegalOption(); + if (GCC_UNLIKELY(!dynreg)) IllegalOption(); dynreg->flags&=~DYNFLG_CHANGED; cache_addw(0x0589+(index << (8+3))); //Mov [data],reg cache_addd((Bit32u)dynreg->data); } void Release(void) { - if (!dynreg) return; + if (GCC_UNLIKELY(!dynreg)) return; if (dynreg->flags&DYNFLG_CHANGED && dynreg->flags&DYNFLG_SAVE) { Save(); } @@ -82,8 +81,6 @@ public: } dynreg->genreg=0;dynreg=0; } - - }; static BlockReturn gen_runcode(Bit8u * code) { @@ -131,7 +128,7 @@ return_address: return retval; } -static GenReg * FindDynReg(DynReg * dynreg) { +static GenReg * FindDynReg(DynReg * dynreg,bool stale=false) { x86gen.last_used++; if (dynreg->genreg) { dynreg->genreg->last_used=x86gen.last_used; @@ -143,11 +140,11 @@ static GenReg * FindDynReg(DynReg * dynreg) { first_used=-1; if (dynreg->flags & DYNFLG_HAS8) { /* Has to be eax,ebx,ecx,edx */ - for (i=first_index=0;i<=X86_REG_EDX;i++) { + for (i=first_index=0;i<=X86_REG_EBX;i++) { GenReg * genreg=x86gen.regs[i]; if (genreg->notusable) continue; if (!(genreg->dynreg)) { - genreg->Load(dynreg); + genreg->Load(dynreg,stale); return genreg; } if (genreg->last_usedLoad(dynreg); - return newreg; } else { for (i=first_index=X86_REGS-1;i>=0;i--) { GenReg * genreg=x86gen.regs[i]; if (genreg->notusable) continue; if (!(genreg->dynreg)) { - genreg->Load(dynreg); + genreg->Load(dynreg,stale); return genreg; } if (genreg->last_usedLoad(dynreg); - return newreg; } + /* No free register found use earliest assigned one */ + GenReg * newreg=x86gen.regs[first_index]; + newreg->Load(dynreg,stale); + return newreg; } static GenReg * ForceDynReg(GenReg * genreg,DynReg * dynreg) { @@ -242,6 +235,30 @@ static void gen_protectflags(void) { } } +static void gen_discardflags(void) { + if (!x86gen.flagsactive) { + x86gen.flagsactive=true; + cache_addw(0xc483); //ADD ESP,4 + cache_addb(0x4); + } +} + +static void gen_needcarry(void) { + if (!x86gen.flagsactive) { + x86gen.flagsactive=true; + cache_addw(0x2cd1); //SHR DWORD [ESP],1 + cache_addb(0x24); + cache_addd(0x0424648d); //LEA ESP,[ESP+4] + } +} + +static bool skip_flags=false; + +static void set_skipflags(bool state) { + if (!state) gen_discardflags(); + skip_flags=state; +} + static void gen_reinit(void) { x86gen.last_used=0; x86gen.flagsactive=false; @@ -252,87 +269,72 @@ static void gen_reinit(void) { static void gen_dop_byte(DualOps op,DynReg * dr1,Bit8u di1,DynReg * dr2,Bit8u di2) { GenReg * gr1=FindDynReg(dr1);GenReg * gr2=FindDynReg(dr2); + Bit8u tmp; switch (op) { - case DOP_ADD:cache_addb(0x02);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_OR: cache_addb(0x0a);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_ADC:cache_addb(0x12);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_SBB:cache_addb(0x1a);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_AND:cache_addb(0x22);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_SUB:cache_addb(0x2a);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_XOR:cache_addb(0x32);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_CMP:cache_addb(0x3a);break; - case DOP_MOV:cache_addb(0x8a);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_XCHG:cache_addb(0x86);dr1->flags|=DYNFLG_CHANGED;dr2->flags|=DYNFLG_CHANGED;break; - case DOP_TEST:cache_addb(0x84);break; + case DOP_ADD: tmp=0x02; break; + case DOP_ADC: tmp=0x12; break; + case DOP_SUB: tmp=0x2a; break; + case DOP_SBB: tmp=0x1a; break; + case DOP_CMP: tmp=0x3a; goto nochange; + case DOP_XOR: tmp=0x32; break; + case DOP_AND: tmp=0x22; if ((dr1==dr2) && (di1==di2)) goto nochange; break; + case DOP_OR: tmp=0x0a; if ((dr1==dr2) && (di1==di2)) goto nochange; break; + case DOP_TEST: tmp=0x84; goto nochange; + case DOP_MOV: if ((dr1==dr2) && (di1==di2)) return; tmp=0x8a; break; + case DOP_XCHG: tmp=0x86; dr2->flags|=DYNFLG_CHANGED; break; default: IllegalOption(); } - cache_addb(0xc0+((gr1->index+di1)<<3)+gr2->index+di2); + dr1->flags|=DYNFLG_CHANGED; +nochange: + cache_addw(tmp|(0xc0+((gr1->index+di1)<<3)+gr2->index+di2)<<8); } static void gen_dop_byte_imm(DualOps op,DynReg * dr1,Bit8u di1,Bitu imm) { GenReg * gr1=FindDynReg(dr1); + Bit16u tmp; switch (op) { - case DOP_ADD: - cache_addw(0xc080+((gr1->index+di1)<<8)); - dr1->flags|=DYNFLG_CHANGED; - break; - case DOP_OR: - cache_addw(0xc880+((gr1->index+di1)<<8)); - dr1->flags|=DYNFLG_CHANGED; - break; - case DOP_ADC: - cache_addw(0xd080+((gr1->index+di1)<<8)); - dr1->flags|=DYNFLG_CHANGED; - break; - case DOP_SBB: - cache_addw(0xd880+((gr1->index+di1)<<8)); - dr1->flags|=DYNFLG_CHANGED; - break; - case DOP_AND: - cache_addw(0xe080+((gr1->index+di1)<<8)); - dr1->flags|=DYNFLG_CHANGED; - break; - case DOP_SUB: - cache_addw(0xe880+((gr1->index+di1)<<8)); - dr1->flags|=DYNFLG_CHANGED; - break; - case DOP_XOR: - cache_addw(0xf080+((gr1->index+di1)<<8)); - dr1->flags|=DYNFLG_CHANGED; - break; - case DOP_CMP: - cache_addw(0xf880+((gr1->index+di1)<<8)); - break;//Doesn't change - case DOP_MOV: - cache_addb(0xb0+gr1->index+di1); - dr1->flags|=DYNFLG_CHANGED; - break; - case DOP_TEST: - cache_addw(0xc0f6+((gr1->index+di1)<<8)); - break;//Doesn't change + case DOP_ADD: tmp=0xc080; break; + case DOP_ADC: tmp=0xd080; break; + case DOP_SUB: tmp=0xe880; break; + case DOP_SBB: tmp=0xd880; break; + case DOP_CMP: tmp=0xf880; goto nochange; //Doesn't change + case DOP_XOR: tmp=0xf080; break; + case DOP_AND: tmp=0xe080; break; + case DOP_OR: tmp=0xc880; break; + case DOP_TEST: tmp=0xc0f6; goto nochange; //Doesn't change + case DOP_MOV: cache_addb(0xb0+gr1->index+di1); + dr1->flags|=DYNFLG_CHANGED; + goto finish; default: IllegalOption(); } + dr1->flags|=DYNFLG_CHANGED; +nochange: + cache_addw(tmp+((gr1->index+di1)<<8)); +finish: cache_addb(imm); } static void gen_sop_byte(SingleOps op,DynReg * dr1,Bit8u di1) { GenReg * gr1=FindDynReg(dr1); + Bit16u tmp; switch (op) { - case SOP_INC:cache_addw(0xc0FE + ((gr1->index+di1)<<8));break; - case SOP_DEC:cache_addw(0xc8FE + ((gr1->index+di1)<<8));break; - case SOP_NOT:cache_addw(0xd0f6 + ((gr1->index+di1)<<8));break; - case SOP_NEG:cache_addw(0xd8f6 + ((gr1->index+di1)<<8));break; + case SOP_INC: tmp=0xc0FE; break; + case SOP_DEC: tmp=0xc8FE; break; + case SOP_NOT: tmp=0xd0f6; break; + case SOP_NEG: tmp=0xd8f6; break; default: IllegalOption(); } + cache_addw(tmp + ((gr1->index+di1)<<8)); dr1->flags|=DYNFLG_CHANGED; } static void gen_extend_word(bool sign,DynReg * ddr,DynReg * dsr) { - GenReg * gdr=FindDynReg(ddr);GenReg * gsr=FindDynReg(dsr); + GenReg * gsr=FindDynReg(dsr); + GenReg * gdr=FindDynReg(ddr,true); if (sign) cache_addw(0xbf0f); else cache_addw(0xb70f); cache_addb(0xc0+(gdr->index<<3)+(gsr->index)); @@ -340,7 +342,8 @@ static void gen_extend_word(bool sign,DynReg * ddr,DynReg * dsr) { } static void gen_extend_byte(bool sign,bool dword,DynReg * ddr,DynReg * dsr,Bit8u dsi) { - GenReg * gdr=FindDynReg(ddr);GenReg * gsr=FindDynReg(dsr); + GenReg * gsr=FindDynReg(dsr); + GenReg * gdr=FindDynReg(ddr,dword); if (!dword) cache_addb(0x66); if (sign) cache_addw(0xbe0f); else cache_addw(0xb60f); @@ -368,6 +371,7 @@ static void gen_lea(DynReg * ddr,DynReg * dsr1,DynReg * dsr2,Bitu scale,Bits imm Bit8u sib=(gsr1->index)+(gsr2->index<<3)+(scale<<6); cache_addb(sib); } else { + if ((ddr==dsr1) && !imm_size) return; cache_addb(0x8d); //LEA cache_addb(rm_base+gsr1->index); } @@ -394,59 +398,80 @@ static void gen_lea(DynReg * ddr,DynReg * dsr1,DynReg * dsr2,Bitu scale,Bits imm } static void gen_dop_word(DualOps op,bool dword,DynReg * dr1,DynReg * dr2) { - GenReg * gr1=FindDynReg(dr1);GenReg * gr2=FindDynReg(dr2); - if (!dword) cache_addb(0x66); + GenReg * gr2=FindDynReg(dr2); + GenReg * gr1=FindDynReg(dr1,dword && op==DOP_MOV); + Bit8u tmp; switch (op) { - case DOP_ADD:cache_addb(0x03);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_OR: cache_addb(0x0b);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_ADC:cache_addb(0x13);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_SBB:cache_addb(0x1b);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_AND:cache_addb(0x23);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_SUB:cache_addb(0x2b);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_XOR:cache_addb(0x33);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_CMP:cache_addb(0x3b);break; - case DOP_MOV:cache_addb(0x8b);dr1->flags|=DYNFLG_CHANGED;break; - case DOP_XCHG:cache_addb(0x87);dr1->flags|=DYNFLG_CHANGED;dr2->flags|=DYNFLG_CHANGED;break; - case DOP_TEST:cache_addb(0x85);break; + case DOP_ADD: tmp=0x03; break; + case DOP_ADC: tmp=0x13; break; + case DOP_SUB: tmp=0x2b; break; + case DOP_SBB: tmp=0x1b; break; + case DOP_CMP: tmp=0x3b; goto nochange; + case DOP_XOR: tmp=0x33; break; + case DOP_AND: tmp=0x23; if (dr1==dr2) goto nochange; break; + case DOP_OR: tmp=0x0b; if (dr1==dr2) goto nochange; break; + case DOP_TEST: tmp=0x85; goto nochange; + case DOP_MOV: if (dr1==dr2) return; tmp=0x8b; break; + case DOP_XCHG: + dr2->flags|=DYNFLG_CHANGED; + if (dword && !((dr1->flags&DYNFLG_HAS8) ^ (dr2->flags&DYNFLG_HAS8))) { + dr1->genreg=gr2;dr1->genreg->dynreg=dr1; + dr2->genreg=gr1;dr2->genreg->dynreg=dr2; + dr1->flags|=DYNFLG_CHANGED; + return; + } + tmp=0x87; + break; default: IllegalOption(); } - cache_addb(0xc0+(gr1->index<<3)+gr2->index); + dr1->flags|=DYNFLG_CHANGED; +nochange: + if (!dword) cache_addb(0x66); + cache_addw(tmp|(0xc0+(gr1->index<<3)+gr2->index)<<8); } static void gen_dop_word_imm(DualOps op,bool dword,DynReg * dr1,Bits imm) { - GenReg * gr1=FindDynReg(dr1); + GenReg * gr1=FindDynReg(dr1,dword && op==DOP_MOV); + Bit16u tmp; if (!dword) cache_addb(0x66); switch (op) { - case DOP_ADD:cache_addw(0xc081+(gr1->index<<8));dr1->flags|=DYNFLG_CHANGED;break; - case DOP_OR: cache_addw(0xc881+(gr1->index<<8));dr1->flags|=DYNFLG_CHANGED;break; - case DOP_ADC:cache_addw(0xd081+(gr1->index<<8));dr1->flags|=DYNFLG_CHANGED;break; - case DOP_SBB:cache_addw(0xd881+(gr1->index<<8));dr1->flags|=DYNFLG_CHANGED;break; - case DOP_AND:cache_addw(0xe081+(gr1->index<<8));dr1->flags|=DYNFLG_CHANGED;break; - case DOP_SUB:cache_addw(0xe881+(gr1->index<<8));dr1->flags|=DYNFLG_CHANGED;break; - case DOP_XOR:cache_addw(0xf081+(gr1->index<<8));dr1->flags|=DYNFLG_CHANGED;break; - case DOP_CMP:cache_addw(0xf881+(gr1->index<<8));break;//Doesn't change - case DOP_MOV:cache_addb(0xb8+(gr1->index));dr1->flags|=DYNFLG_CHANGED;break; - case DOP_TEST:cache_addw(0xc0f7+(gr1->index<<8));break;//Doesn't change + case DOP_ADD: tmp=0xc081; break; + case DOP_ADC: tmp=0xd081; break; + case DOP_SUB: tmp=0xe881; break; + case DOP_SBB: tmp=0xd881; break; + case DOP_CMP: tmp=0xf881; goto nochange; //Doesn't change + case DOP_XOR: tmp=0xf081; break; + case DOP_AND: tmp=0xe081; break; + case DOP_OR: tmp=0xc881; break; + case DOP_TEST: tmp=0xc0f7; goto nochange; //Doesn't change + case DOP_MOV: cache_addb(0xb8+(gr1->index)); dr1->flags|=DYNFLG_CHANGED; goto finish; default: IllegalOption(); } + dr1->flags|=DYNFLG_CHANGED; +nochange: + cache_addw(tmp+(gr1->index<<8)); +finish: if (dword) cache_addd(imm); else cache_addw(imm); } static void gen_imul_word(bool dword,DynReg * dr1,DynReg * dr2) { GenReg * gr1=FindDynReg(dr1);GenReg * gr2=FindDynReg(dr2); - if (!dword) cache_addb(0x66); - cache_addw(0xaf0f); - cache_addb(0xc0+(gr1->index<<3)+gr2->index); dr1->flags|=DYNFLG_CHANGED; + if (!dword) { + cache_addd(0xaf0f66|(0xc0+(gr1->index<<3)+gr2->index)<<24); + } else { + cache_addw(0xaf0f); + cache_addb(0xc0+(gr1->index<<3)+gr2->index); + } } static void gen_imul_word_imm(bool dword,DynReg * dr1,DynReg * dr2,Bits imm) { GenReg * gr1=FindDynReg(dr1);GenReg * gr2=FindDynReg(dr2); if (!dword) cache_addb(0x66); - if ((imm>=-128 && imm<=127)) { + if ((imm>=-128 && imm<=127)) { cache_addb(0x6b); cache_addb(0xc0+(gr1->index<<3)+gr2->index); cache_addb(imm); @@ -498,10 +523,13 @@ static void gen_shift_word_cl(Bitu op,bool dword,DynReg * dr1,DynReg * drecx) { static void gen_shift_word_imm(Bitu op,bool dword,DynReg * dr1,Bit8u imm) { GenReg * gr1=FindDynReg(dr1); - if (!dword) cache_addb(0x66); - cache_addw(0xc0c1+(((Bit16u)op) << 11) + ((gr1->index)<<8)); - cache_addb(imm); dr1->flags|=DYNFLG_CHANGED; + if (!dword) { + cache_addd(0x66|((0xc0c1+((Bit16u)op << 11) + (gr1->index<<8))|imm<<16)<<8); + } else { + cache_addw(0xc0c1+((Bit16u)op << 11) + (gr1->index<<8)); + cache_addb(imm); + } } static void gen_cbw(bool dword,DynReg * dyn_ax) { @@ -514,10 +542,10 @@ static void gen_cbw(bool dword,DynReg * dyn_ax) { static void gen_cwd(bool dword,DynReg * dyn_ax,DynReg * dyn_dx) { ForceDynReg(x86gen.regs[X86_REG_EAX],dyn_ax); ForceDynReg(x86gen.regs[X86_REG_EDX],dyn_dx); - if (!dword) cache_addb(0x66); - cache_addb(0x99); dyn_ax->flags|=DYNFLG_CHANGED; dyn_dx->flags|=DYNFLG_CHANGED; + if (!dword) cache_addw(0x9966); + else cache_addb(0x99); } static void gen_mul_byte(bool imul,DynReg * dyn_ax,DynReg * dr1,Bit8u di1) { @@ -570,9 +598,9 @@ static void gen_call_function(void * func,char * ops,...) { ParamInfo * retparam=0; /* Clear the EAX Genreg for usage */ x86gen.regs[X86_REG_EAX]->Clear(); - x86gen.regs[X86_REG_EAX]->notusable=true;; + x86gen.regs[X86_REG_EAX]->notusable=true; /* Save the flags */ - gen_protectflags(); + if (GCC_UNLIKELY(!skip_flags)) gen_protectflags(); /* Scan for the amount of params */ if (ops) { va_list params; @@ -655,6 +683,7 @@ static void gen_call_function(void * func,char * ops,...) { if (retparam) { DynReg * dynreg=(DynReg *)retparam->value; GenReg * genreg=FindDynReg(dynreg); + if (genreg->index) // test for (e)ax/al switch (*retparam->line) { case 'd': cache_addw(0xc08b+(genreg->index <<(8+3))); //mov reg,eax @@ -676,10 +705,38 @@ static void gen_call_function(void * func,char * ops,...) { x86gen.regs[X86_REG_EAX]->notusable=false; } +static void gen_call_write(DynReg * dr,Bit32u val,Bitu write_size) { + /* Clear the EAX Genreg for usage */ + x86gen.regs[X86_REG_EAX]->Clear(); + x86gen.regs[X86_REG_EAX]->notusable=true; + gen_protectflags(); + + cache_addb(0x68); //PUSH val + cache_addd(val); + GenReg * genreg=FindDynReg(dr); + cache_addb(0x50+genreg->index); //PUSH reg + + /* Clear some unprotected registers */ + x86gen.regs[X86_REG_ECX]->Clear(); + x86gen.regs[X86_REG_EDX]->Clear(); + /* Do the actual call to the procedure */ + cache_addb(0xe8); + switch (write_size) { + case 1: cache_addd((Bit32u)mem_writeb - (Bit32u)cache.pos-4); break; + case 2: cache_addd((Bit32u)mem_writew_dyncorex86 - (Bit32u)cache.pos-4); break; + case 4: cache_addd((Bit32u)mem_writed_dyncorex86 - (Bit32u)cache.pos-4); break; + default: IllegalOption(); + } + + cache_addw(0xc483); //ADD ESP,8 + cache_addb(2*4); + x86gen.regs[X86_REG_EAX]->notusable=false; + gen_releasereg(dr); +} + static Bit8u * gen_create_branch(BranchTypes type) { /* First free all registers */ - cache_addb(0x70+type); - cache_addb(0); + cache_addw(0x70+type); return (cache.pos-1); } @@ -720,7 +777,7 @@ static void gen_jmp_ptr(void * ptr,Bits imm=0) { } static void gen_save_flags(DynReg * dynreg) { - if (x86gen.flagsactive) IllegalOption(); + if (GCC_UNLIKELY(x86gen.flagsactive)) IllegalOption(); GenReg * genreg=FindDynReg(dynreg); cache_addb(0x8b); //MOV REG,[esp] cache_addw(0x2404+(genreg->index << 3)); @@ -728,7 +785,7 @@ static void gen_save_flags(DynReg * dynreg) { } static void gen_load_flags(DynReg * dynreg) { - if (x86gen.flagsactive) IllegalOption(); + if (GCC_UNLIKELY(x86gen.flagsactive)) IllegalOption(); cache_addw(0xc483); //ADD ESP,4 cache_addb(0x4); GenReg * genreg=FindDynReg(dynreg); @@ -764,13 +821,13 @@ static void gen_return(BlockReturn retcode) { } static void gen_init(void) { - x86gen.regs[X86_REG_EAX]=new GenReg(0,false); - x86gen.regs[X86_REG_ECX]=new GenReg(1,false); - x86gen.regs[X86_REG_EDX]=new GenReg(2,false); - x86gen.regs[X86_REG_EBX]=new GenReg(3,true); - x86gen.regs[X86_REG_EBP]=new GenReg(5,true); - x86gen.regs[X86_REG_ESI]=new GenReg(6,true); - x86gen.regs[X86_REG_EDI]=new GenReg(7,true); + x86gen.regs[X86_REG_EAX]=new GenReg(0); + x86gen.regs[X86_REG_ECX]=new GenReg(1); + x86gen.regs[X86_REG_EDX]=new GenReg(2); + x86gen.regs[X86_REG_EBX]=new GenReg(3); + x86gen.regs[X86_REG_EBP]=new GenReg(5); + x86gen.regs[X86_REG_ESI]=new GenReg(6); + x86gen.regs[X86_REG_EDI]=new GenReg(7); }