From eb9633a6e443cd766186916b3b64d91e372aa3c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Strohh=C3=A4cker?= Date: Sat, 27 Jun 2009 12:51:10 +0000 Subject: [PATCH] optimizations for the recompiler arm backend (M-HT) Imported-from: https://svn.code.sf.net/p/dosbox/code-0/dosbox/trunk@3435 --- src/cpu/core_dynrec/risc_armv4le-o3.h | 77 ++-- src/cpu/core_dynrec/risc_armv4le-s3.h | 77 ++-- src/cpu/core_dynrec/risc_armv4le-thumb-iw.h | 385 +++++++----------- src/cpu/core_dynrec/risc_armv4le-thumb-niw.h | 399 ++++++++----------- src/cpu/core_dynrec/risc_armv4le-thumb.h | 376 ++++++++--------- 5 files changed, 576 insertions(+), 738 deletions(-) diff --git a/src/cpu/core_dynrec/risc_armv4le-o3.h b/src/cpu/core_dynrec/risc_armv4le-o3.h index 9866a7fe..cbb48232 100644 --- a/src/cpu/core_dynrec/risc_armv4le-o3.h +++ b/src/cpu/core_dynrec/risc_armv4le-o3.h @@ -16,7 +16,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* $Id: risc_armv4le-o3.h,v 1.5 2009-06-25 19:31:43 c2woody Exp $ */ +/* $Id: risc_armv4le-o3.h,v 1.6 2009-06-27 12:51:10 c2woody Exp $ */ /* ARMv4 (little endian) backend by M-HT (size-tweaked arm version) */ @@ -24,11 +24,11 @@ // temporary registers #define temp1 HOST_ip -#define temp2 HOST_v5 +#define temp2 HOST_v3 #define temp3 HOST_v4 // register that holds function return values -#define FC_RETOP HOST_v3 +#define FC_RETOP HOST_a1 // register used for address calculations, #define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG @@ -627,7 +627,6 @@ static void INLINE gen_call_function_raw(void * func) { cache_addd( ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4 cache_addd( BX(temp1) ); // bx temp1 cache_addd((Bit32u)func); // .int func - cache_addd( MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 0) ); // mov FC_RETOP, a1 } // generate a call to a function with paramcount parameters @@ -786,7 +785,7 @@ static void INLINE gen_fill_branch_long(Bit32u data) { static void gen_run_code(void) { cache_addd(0xe92d4000); // stmfd sp!, {lr} - cache_addd(0xe92d0df0); // stmfd sp!, {v1-v5,v7,v8} + cache_addd(0xe92d0cf0); // stmfd sp!, {v1-v4,v7,v8} // adr: 8 cache_addd( LDR_IMM(FC_SEGS_ADDR, HOST_pc, 64 - (8 + 8)) ); // ldr FC_SEGS_ADDR, [pc, #(&Segs)] @@ -797,7 +796,7 @@ static void gen_run_code(void) { cache_addd(0xe92d4000); // stmfd sp!, {lr} cache_addd( BX(HOST_r0) ); // bx r0 - cache_addd(0xe8bd0df0); // ldmfd sp!, {v1-v5,v7,v8} + cache_addd(0xe8bd0cf0); // ldmfd sp!, {v1-v4,v7,v8} cache_addd(0xe8bd4000); // ldmfd sp!, {lr} cache_addd( BX(HOST_lr) ); // bx lr @@ -818,7 +817,6 @@ static void gen_run_code(void) { // return from a function static void gen_return_function(void) { - cache_addd( MOV_REG_LSL_IMM(HOST_a1, FC_RETOP, 0) ); // mov a1, FC_RETOP cache_addd(0xe8bd4000); // ldmfd sp!, {lr} cache_addd( BX(HOST_lr) ); // bx lr } @@ -835,31 +833,41 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_ADDw: case t_ADDd: *(Bit32u*)pos=ADD_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // add FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_ORb: case t_ORw: case t_ORd: *(Bit32u*)pos=ORR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // orr FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_ANDb: case t_ANDw: case t_ANDd: *(Bit32u*)pos=AND_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // and FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_SUBb: case t_SUBw: case t_SUBd: *(Bit32u*)pos=SUB_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // sub FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_XORb: case t_XORw: case t_XORd: *(Bit32u*)pos=EOR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // eor FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_CMPb: case t_CMPw: @@ -867,106 +875,105 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_TESTb: case t_TESTw: case t_TESTd: - *(Bit32u*)pos=B_FWD(12); // b (pc+3*4) + *(Bit32u*)pos=B_FWD(8); // b (pc+2*4) break; case t_INCb: case t_INCw: case t_INCd: *(Bit32u*)pos=ADD_IMM(FC_RETOP, HOST_a1, 1, 0); // add FC_RETOP, a1, #1 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_DECb: case t_DECw: case t_DECd: *(Bit32u*)pos=SUB_IMM(FC_RETOP, HOST_a1, 1, 0); // sub FC_RETOP, a1, #1 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_SHLb: case t_SHLw: case t_SHLd: *(Bit32u*)pos=MOV_REG_LSL_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsl a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_SHRb: *(Bit32u*)pos=AND_IMM(FC_RETOP, HOST_a1, 0xff, 0); // and FC_RETOP, a1, #0xff *(Bit32u*)(pos+4)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2 *(Bit32u*)(pos+8)=NOP; // nop *(Bit32u*)(pos+12)=NOP; // nop - *(Bit32u*)(pos+16)=NOP; // nop break; case t_SHRw: *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+4)=MOV_REG_LSR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+8)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2 *(Bit32u*)(pos+12)=NOP; // nop - *(Bit32u*)(pos+16)=NOP; // nop break; case t_SHRd: *(Bit32u*)pos=MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsr a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_SARb: *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 *(Bit32u*)(pos+4)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 24); // mov FC_RETOP, FC_RETOP, asr #24 *(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 *(Bit32u*)(pos+12)=NOP; // nop - *(Bit32u*)(pos+16)=NOP; // nop break; case t_SARw: *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+4)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, asr #16 *(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 *(Bit32u*)(pos+12)=NOP; // nop - *(Bit32u*)(pos+16)=NOP; // nop break; case t_SARd: *(Bit32u*)pos=MOV_REG_ASR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, asr a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_RORb: *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 *(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 8); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8 *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 - *(Bit32u*)(pos+16)=NOP; // nop break; case t_RORw: *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 *(Bit32u*)(pos+12)=NOP; // nop - *(Bit32u*)(pos+16)=NOP; // nop break; case t_RORd: *(Bit32u*)pos=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) - break; - case t_ROLb: - *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 - *(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 - *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 8); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8 - *(Bit32u*)(pos+12)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 - *(Bit32u*)(pos+16)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_ROLw: *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 - *(Bit32u*)(pos+16)=NOP; // nop break; case t_ROLd: *(Bit32u*)pos=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 *(Bit32u*)(pos+4)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 *(Bit32u*)(pos+8)=NOP; // nop *(Bit32u*)(pos+12)=NOP; // nop - *(Bit32u*)(pos+16)=NOP; // nop break; case t_NEGb: case t_NEGw: case t_NEGd: *(Bit32u*)pos=RSB_IMM(FC_RETOP, HOST_a1, 0, 0); // rsb FC_RETOP, a1, #0 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; default: *(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func diff --git a/src/cpu/core_dynrec/risc_armv4le-s3.h b/src/cpu/core_dynrec/risc_armv4le-s3.h index d89ee13b..ae804b1f 100644 --- a/src/cpu/core_dynrec/risc_armv4le-s3.h +++ b/src/cpu/core_dynrec/risc_armv4le-s3.h @@ -16,7 +16,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* $Id: risc_armv4le-s3.h,v 1.5 2009-06-25 19:31:43 c2woody Exp $ */ +/* $Id: risc_armv4le-s3.h,v 1.6 2009-06-27 12:51:10 c2woody Exp $ */ /* ARMv4 (little endian) backend by M-HT (speed-tweaked arm version) */ @@ -24,11 +24,11 @@ // temporary registers #define temp1 HOST_ip -#define temp2 HOST_v5 +#define temp2 HOST_v3 #define temp3 HOST_v4 // register that holds function return values -#define FC_RETOP HOST_v3 +#define FC_RETOP HOST_a1 // register used for address calculations, #define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG @@ -475,7 +475,6 @@ static void INLINE gen_call_function_raw(void * func) { cache_addd( ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4 cache_addd( BX(temp1) ); // bx temp1 cache_addd((Bit32u)func); // .int func - cache_addd( MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 0) ); // mov FC_RETOP, a1 } // generate a call to a function with paramcount parameters @@ -622,7 +621,7 @@ static void INLINE gen_fill_branch_long(Bit32u data) { static void gen_run_code(void) { cache_addd(0xe92d4000); // stmfd sp!, {lr} - cache_addd(0xe92d0df0); // stmfd sp!, {v1-v5,v7,v8} + cache_addd(0xe92d0cf0); // stmfd sp!, {v1-v4,v7,v8} // adr: 8 cache_addd( LDR_IMM(FC_SEGS_ADDR, HOST_pc, 64 - (8 + 8)) ); // ldr FC_SEGS_ADDR, [pc, #(&Segs)] @@ -633,7 +632,7 @@ static void gen_run_code(void) { cache_addd(0xe92d4000); // stmfd sp!, {lr} cache_addd( BX(HOST_r0) ); // bx r0 - cache_addd(0xe8bd0df0); // ldmfd sp!, {v1-v5,v7,v8} + cache_addd(0xe8bd0cf0); // ldmfd sp!, {v1-v4,v7,v8} cache_addd(0xe8bd4000); // ldmfd sp!, {lr} cache_addd( BX(HOST_lr) ); // bx lr @@ -654,7 +653,6 @@ static void gen_run_code(void) { // return from a function static void gen_return_function(void) { - cache_addd( MOV_REG_LSL_IMM(HOST_a1, FC_RETOP, 0) ); // mov a1, FC_RETOP cache_addd(0xe8bd4000); // ldmfd sp!, {lr} cache_addd( BX(HOST_lr) ); // bx lr } @@ -671,31 +669,41 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_ADDw: case t_ADDd: *(Bit32u*)pos=ADD_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // add FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_ORb: case t_ORw: case t_ORd: *(Bit32u*)pos=ORR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // orr FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_ANDb: case t_ANDw: case t_ANDd: *(Bit32u*)pos=AND_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // and FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_SUBb: case t_SUBw: case t_SUBd: *(Bit32u*)pos=SUB_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // sub FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_XORb: case t_XORw: case t_XORd: *(Bit32u*)pos=EOR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // eor FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_CMPb: case t_CMPw: @@ -703,106 +711,105 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_TESTb: case t_TESTw: case t_TESTd: - *(Bit32u*)pos=B_FWD(12); // b (pc+3*4) + *(Bit32u*)pos=B_FWD(8); // b (pc+2*4) break; case t_INCb: case t_INCw: case t_INCd: *(Bit32u*)pos=ADD_IMM(FC_RETOP, HOST_a1, 1, 0); // add FC_RETOP, a1, #1 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_DECb: case t_DECw: case t_DECd: *(Bit32u*)pos=SUB_IMM(FC_RETOP, HOST_a1, 1, 0); // sub FC_RETOP, a1, #1 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_SHLb: case t_SHLw: case t_SHLd: *(Bit32u*)pos=MOV_REG_LSL_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsl a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_SHRb: *(Bit32u*)pos=AND_IMM(FC_RETOP, HOST_a1, 0xff, 0); // and FC_RETOP, a1, #0xff *(Bit32u*)(pos+4)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2 *(Bit32u*)(pos+8)=NOP; // nop *(Bit32u*)(pos+12)=NOP; // nop - *(Bit32u*)(pos+16)=NOP; // nop break; case t_SHRw: *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+4)=MOV_REG_LSR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+8)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2 *(Bit32u*)(pos+12)=NOP; // nop - *(Bit32u*)(pos+16)=NOP; // nop break; case t_SHRd: *(Bit32u*)pos=MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsr a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_SARb: *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 *(Bit32u*)(pos+4)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 24); // mov FC_RETOP, FC_RETOP, asr #24 *(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 *(Bit32u*)(pos+12)=NOP; // nop - *(Bit32u*)(pos+16)=NOP; // nop break; case t_SARw: *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+4)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, asr #16 *(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 *(Bit32u*)(pos+12)=NOP; // nop - *(Bit32u*)(pos+16)=NOP; // nop break; case t_SARd: *(Bit32u*)pos=MOV_REG_ASR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, asr a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_RORb: *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 *(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 8); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8 *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 - *(Bit32u*)(pos+16)=NOP; // nop break; case t_RORw: *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 *(Bit32u*)(pos+12)=NOP; // nop - *(Bit32u*)(pos+16)=NOP; // nop break; case t_RORd: *(Bit32u*)pos=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) - break; - case t_ROLb: - *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 - *(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 - *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 8); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8 - *(Bit32u*)(pos+12)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 - *(Bit32u*)(pos+16)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; case t_ROLw: *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 - *(Bit32u*)(pos+16)=NOP; // nop break; case t_ROLd: *(Bit32u*)pos=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 *(Bit32u*)(pos+4)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 *(Bit32u*)(pos+8)=NOP; // nop *(Bit32u*)(pos+12)=NOP; // nop - *(Bit32u*)(pos+16)=NOP; // nop break; case t_NEGb: case t_NEGw: case t_NEGd: *(Bit32u*)pos=RSB_IMM(FC_RETOP, HOST_a1, 0, 0); // rsb FC_RETOP, a1, #0 - *(Bit32u*)(pos+4)=B_FWD(8); // b (pc+2*4) + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+12)=NOP; // nop break; default: *(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func diff --git a/src/cpu/core_dynrec/risc_armv4le-thumb-iw.h b/src/cpu/core_dynrec/risc_armv4le-thumb-iw.h index 0a2f6631..57828866 100644 --- a/src/cpu/core_dynrec/risc_armv4le-thumb-iw.h +++ b/src/cpu/core_dynrec/risc_armv4le-thumb-iw.h @@ -16,7 +16,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* $Id: risc_armv4le-thumb-iw.h,v 1.4 2009-06-25 19:31:43 c2woody Exp $ */ +/* $Id: risc_armv4le-thumb-iw.h,v 1.5 2009-06-27 12:51:10 c2woody Exp $ */ /* ARMv4 (little endian) backend by M-HT (thumb version with data pool, requires -mthumb-interwork switch when compiling dosbox) */ @@ -25,15 +25,10 @@ // temporary "lo" registers #define templo1 HOST_v3 #define templo2 HOST_v4 - -// temporary "lo" register - value must be preserved when using it -#define templosav HOST_a3 - -// temporary "hi" register -#define temphi1 HOST_ip +#define templo3 HOST_v2 // register that holds function return values -#define FC_RETOP HOST_v2 +#define FC_RETOP HOST_a1 // register used for address calculations, #define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG @@ -86,8 +81,6 @@ #define ADD_IMM8(dst, imm) (0x3000 + ((dst) << 8) + (imm) ) // add dst, src1, src2 #define ADD_REG(dst, src1, src2) (0x1800 + (dst) + ((src1) << 3) + ((src2) << 6) ) -// add dst, src -#define ADD_LO_HI(dst, src) (0x4440 + (dst) + (((src) - HOST_r8) << 3) ) // add dst, pc, #imm @ 0 <= imm < 1024 & imm mod 4 = 0 #define ADD_LO_PC_IMM(dst, imm) (0xa000 + ((dst) << 8) + ((imm) >> 2) ) // sub dst, src1, src2 @@ -502,11 +495,9 @@ static void gen_extend_word(bool sign,HostReg reg) { // add a 32bit value from memory to a full register static void gen_add(HostReg reg,void* op) { + gen_mov_word_to_reg(templo3, op, 1); cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, reg) ); // mov temphi1, reg - gen_mov_word_to_reg(reg, op, 1); - cache_checkinstr(2); - cache_addw( ADD_LO_HI(reg, temphi1) ); // add reg, temphi1 + cache_addw( ADD_REG(reg, reg, templo3) ); // add reg, reg, templo3 } // add a 32bit constant value to a full register @@ -528,12 +519,8 @@ static void gen_and_imm(HostReg reg,Bit32u imm) { // move a 32bit constant value into memory static void gen_mov_direct_dword(void* dest,Bit32u imm) { - cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav - gen_mov_dword_to_reg_imm(templosav, imm); - gen_mov_word_from_reg(templosav, dest, 1); - cache_checkinstr(2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + gen_mov_dword_to_reg_imm(templo3, imm); + gen_mov_word_from_reg(templo3, dest, 1); } // move an address into memory @@ -544,19 +531,15 @@ static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) { // add an 8bit constant value to a dword memory value static void gen_add_direct_byte(void* dest,Bit8s imm) { if(!imm) return; - cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templosav, dest, 1, templo2); + gen_mov_word_to_reg_helper(templo3, dest, 1, templo2); cache_checkinstr(2); if (imm >= 0) { - cache_addw( ADD_IMM8(templosav, (Bit32s)imm) ); // add templosav, #(imm) + cache_addw( ADD_IMM8(templo3, (Bit32s)imm) ); // add templo3, #(imm) } else { - cache_addw( SUB_IMM8(templosav, -((Bit32s)imm)) ); // sub templosav, #(-imm) + cache_addw( SUB_IMM8(templo3, -((Bit32s)imm)) ); // sub templo3, #(-imm) } - gen_mov_word_from_reg_helper(templosav, dest, 1, templo2); - cache_checkinstr(2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + gen_mov_word_from_reg_helper(templo3, dest, 1, templo2); } // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value @@ -566,38 +549,30 @@ static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) { gen_add_direct_byte(dest,(Bit8s)imm); return; } - cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templosav, dest, dword, templo2); + gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); if (dword) { gen_mov_dword_to_reg_imm(templo1, imm); } else { gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); } cache_checkinstr(2); - cache_addw( ADD_REG(templosav, templosav, templo1) ); // add templosav, templosav, templo1 - gen_mov_word_from_reg_helper(templosav, dest, dword, templo2); - cache_checkinstr(2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + cache_addw( ADD_REG(templo3, templo3, templo1) ); // add templo3, templo3, templo1 + gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); } // subtract an 8bit constant value from a dword memory value static void gen_sub_direct_byte(void* dest,Bit8s imm) { if(!imm) return; - cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templosav, dest, 1, templo2); + gen_mov_word_to_reg_helper(templo3, dest, 1, templo2); cache_checkinstr(2); if (imm >= 0) { - cache_addw( SUB_IMM8(templosav, (Bit32s)imm) ); // sub templosav, #(imm) + cache_addw( SUB_IMM8(templo3, (Bit32s)imm) ); // sub templo3, #(imm) } else { - cache_addw( ADD_IMM8(templosav, -((Bit32s)imm)) ); // add templosav, #(-imm) + cache_addw( ADD_IMM8(templo3, -((Bit32s)imm)) ); // add templo3, #(-imm) } - gen_mov_word_from_reg_helper(templosav, dest, 1, templo2); - cache_checkinstr(2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + gen_mov_word_from_reg_helper(templo3, dest, 1, templo2); } // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value @@ -607,20 +582,16 @@ static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { gen_sub_direct_byte(dest,(Bit8s)imm); return; } - cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templosav, dest, dword, templo2); + gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); if (dword) { gen_mov_dword_to_reg_imm(templo1, imm); } else { gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); } cache_checkinstr(2); - cache_addw( SUB_REG(templosav, templosav, templo1) ); // sub templosav, templosav, templo1 - gen_mov_word_from_reg_helper(templosav, dest, dword, templo2); - cache_checkinstr(2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + cache_addw( SUB_REG(templo3, templo3, templo1) ); // sub templo3, templo3, templo1 + gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); } // effective address calculation, destination is dest_reg @@ -673,12 +644,11 @@ static void gen_call_function_helper(void * func) { // after_call: // thumb state from now on - cache_addw( MOV_REG(FC_RETOP, HOST_a1) ); // mov FC_RETOP, a1 } // generate a call to a parameterless function static void INLINE gen_call_function_raw(void * func) { - cache_checkinstr(14); + cache_checkinstr(12); gen_call_function_helper(func); } @@ -686,13 +656,13 @@ static void INLINE gen_call_function_raw(void * func) { // note: the parameters are loaded in the architecture specific way // using the gen_load_param_ functions below static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) { - cache_checkinstr(14); + cache_checkinstr(12); Bit32u proc_addr = (Bit32u)cache.pos; gen_call_function_helper(func); return proc_addr; // if proc_addr is on word boundary ((proc_addr & 0x03) == 0) - // then length of generated code is 14 bytes - // otherwise length of generated code is 12 bytes + // then length of generated code is 12 bytes + // otherwise length of generated code is 10 bytes } #if (1) @@ -723,33 +693,31 @@ static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { // jump to an address pointed at by ptr, offset is in imm static void gen_jmp_ptr(void * ptr,Bits imm=0) { - cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav - gen_mov_word_to_reg(templosav, ptr, 1); + gen_mov_word_to_reg(templo3, ptr, 1); if (imm) { gen_mov_dword_to_reg_imm(templo2, imm); cache_checkinstr(2); - cache_addw( ADD_REG(templosav, templosav, templo2) ); // add templosav, templosav, templo2 + cache_addw( ADD_REG(templo3, templo3, templo2) ); // add templo3, templo3, templo2 } #if (1) // (*ptr) should be word aligned if ((imm & 0x03) == 0) { - cache_checkinstr(8); - cache_addw( LDR_IMM(templo2, templosav, 0) ); // ldr templo2, [templosav] + cache_checkinstr(6); + cache_addw( LDR_IMM(templo2, templo3, 0) ); // ldr templo2, [templo3] } else #endif { - cache_checkinstr(26); - cache_addw( LDRB_IMM(templo2, templosav, 0) ); // ldrb templo2, [templosav] - cache_addw( LDRB_IMM(templo1, templosav, 1) ); // ldrb templo1, [templosav, #1] + cache_checkinstr(24); + cache_addw( LDRB_IMM(templo2, templo3, 0) ); // ldrb templo2, [templo3] + cache_addw( LDRB_IMM(templo1, templo3, 1) ); // ldrb templo1, [templo3, #1] cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1 - cache_addw( LDRB_IMM(templo1, templosav, 2) ); // ldrb templo1, [templosav, #2] + cache_addw( LDRB_IMM(templo1, templo3, 2) ); // ldrb templo1, [templo3, #2] cache_addw( LSL_IMM(templo1, templo1, 16) ); // lsl templo1, templo1, #16 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1 - cache_addw( LDRB_IMM(templo1, templosav, 3) ); // ldrb templo1, [templosav, #3] + cache_addw( LDRB_IMM(templo1, templo3, 3) ); // ldrb templo1, [templo3, #3] cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1 } @@ -757,8 +725,6 @@ static void gen_jmp_ptr(void * ptr,Bits imm=0) { // increase jmp address to keep thumb state cache_addw( ADD_IMM3(templo2, templo2, 1) ); // add templo2, templo2, #1 - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 - cache_addw( BX(templo2) ); // bx templo2 } @@ -898,8 +864,7 @@ static void gen_run_code(void) { // return from a function static void gen_return_function(void) { - cache_checkinstr(6); - cache_addw( MOV_REG(HOST_a1, FC_RETOP) ); // mov a1, FC_RETOP + cache_checkinstr(4); cache_addw(0xbc08); // pop {r3} cache_addw( BX(HOST_r3) ); // bx r3 } @@ -933,35 +898,32 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_ADDb: case t_ADDw: case t_ADDd: - *(Bit16u*)pos=ADD_REG(FC_RETOP, HOST_a1, HOST_a2); // add FC_RETOP, a1, a2 - *(Bit16u*)(pos+2)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2); // add a1, a1, a2 + *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) break; case t_ORb: case t_ORw: case t_ORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ORR(FC_RETOP, HOST_a2); // orr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=ORR(HOST_a1, HOST_a2); // orr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) break; case t_ANDb: case t_ANDw: case t_ANDd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=AND(FC_RETOP, HOST_a2); // and FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=AND(HOST_a1, HOST_a2); // and a1, a2 + *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) break; case t_SUBb: case t_SUBw: case t_SUBd: - *(Bit16u*)pos=SUB_REG(FC_RETOP, HOST_a1, HOST_a2); // sub FC_RETOP, a1, a2 - *(Bit16u*)(pos+2)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2); // sub a1, a1, a2 + *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) break; case t_XORb: case t_XORw: case t_XORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=EOR(FC_RETOP, HOST_a2); // eor FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=EOR(HOST_a1, HOST_a2); // eor a1, a2 + *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) break; case t_CMPb: case t_CMPw: @@ -969,117 +931,107 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_TESTb: case t_TESTw: case t_TESTd: - *(Bit16u*)pos=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=B_FWD(8); // b after_call (pc+8) break; case t_INCb: case t_INCw: case t_INCd: - *(Bit16u*)pos=ADD_IMM3(FC_RETOP, HOST_a1, 1); // add FC_RETOP, a1, #1 - *(Bit16u*)(pos+2)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1); // add a1, a1, #1 + *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) break; case t_DECb: case t_DECw: case t_DECd: - *(Bit16u*)pos=SUB_IMM3(FC_RETOP, HOST_a1, 1); // sub FC_RETOP, a1, #1 - *(Bit16u*)(pos+2)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1); // sub a1, a1, #1 + *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) break; case t_SHLb: case t_SHLw: case t_SHLd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=LSL_REG(FC_RETOP, HOST_a2); // lsl FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2); // lsl a1, a2 + *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) break; case t_SHRb: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 24); // lsl FC_RETOP, a1, #24 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, FC_RETOP, 24); // lsr FC_RETOP, FC_RETOP, #24 - *(Bit16u*)(pos+4)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(4); // b after_call (pc+4) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=NOP; // nop + *(Bit16u*)(pos+4)=LSR_IMM(HOST_a1, HOST_a1, 24); // lsr a1, a1, #24 + *(Bit16u*)(pos+6)=NOP; // nop + *(Bit16u*)(pos+8)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+10)=NOP; // nop break; case t_SHRw: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 16); // lsl FC_RETOP, a1, #16 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, FC_RETOP, 16); // lsr FC_RETOP, FC_RETOP, #16 - *(Bit16u*)(pos+4)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(4); // b after_call (pc+4) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=NOP; // nop + *(Bit16u*)(pos+4)=LSR_IMM(HOST_a1, HOST_a1, 16); // lsr a1, a1, #16 + *(Bit16u*)(pos+6)=NOP; // nop + *(Bit16u*)(pos+8)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+10)=NOP; // nop break; case t_SHRd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) break; case t_SARb: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 24); // lsl FC_RETOP, a1, #24 - *(Bit16u*)(pos+2)=ASR_IMM(FC_RETOP, FC_RETOP, 24); // asr FC_RETOP, FC_RETOP, #24 - *(Bit16u*)(pos+4)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(4); // b after_call (pc+4) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=NOP; // nop + *(Bit16u*)(pos+4)=ASR_IMM(HOST_a1, HOST_a1, 24); // asr a1, a1, #24 + *(Bit16u*)(pos+6)=NOP; // nop + *(Bit16u*)(pos+8)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+10)=NOP; // nop break; case t_SARw: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 16); // lsl FC_RETOP, a1, #16 - *(Bit16u*)(pos+2)=ASR_IMM(FC_RETOP, FC_RETOP, 16); // asr FC_RETOP, FC_RETOP, #16 - *(Bit16u*)(pos+4)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(4); // b after_call (pc+4) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=NOP; // nop + *(Bit16u*)(pos+4)=ASR_IMM(HOST_a1, HOST_a1, 16); // asr a1, a1, #16 + *(Bit16u*)(pos+6)=NOP; // nop + *(Bit16u*)(pos+8)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+10)=NOP; // nop break; case t_SARd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) break; case t_RORb: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, HOST_a1, 8); // lsr FC_RETOP, a1, #8 - *(Bit16u*)(pos+4)=ORR(HOST_a1, FC_RETOP); // orr a1, FC_RETOP - *(Bit16u*)(pos+6)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+8)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+10)=NOP; // nop - *(Bit16u*)(pos+12)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 + *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8 + *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+6)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 break; case t_RORw: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 - *(Bit16u*)(pos+2)=NOP; // nop - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+6)=NOP; // nop - *(Bit16u*)(pos+8)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+10)=NOP; // nop - *(Bit16u*)(pos+12)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 + *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+4)=NOP; // nop + *(Bit16u*)(pos+6)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+8)=NOP; // nop + *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 break; case t_RORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) break; - /*case t_ROLb: - *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 - *(Bit16u*)(pos+2)=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 8); // lsr FC_RETOP, a1, #8 - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=ORR(HOST_a1, FC_RETOP); // orr a1, FC_RETOP - *(Bit16u*)(pos+10)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+12)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+14)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 - break;*/ case t_ROLw: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 - *(Bit16u*)(pos+2)=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+10)=NOP; // nop - *(Bit16u*)(pos+12)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 + *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 break; case t_ROLd: - *(Bit16u*)pos=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+2)=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+4)=NOP; // nop - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=NOP; // nop - *(Bit16u*)(pos+10)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 - *(Bit16u*)(pos+12)=NOP; // nop + *(Bit16u*)pos=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+2)=NOP; // nop + *(Bit16u*)(pos+4)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+6)=NOP; // nop + *(Bit16u*)(pos+8)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+10)=NOP; // nop break; case t_NEGb: case t_NEGw: case t_NEGd: - *(Bit16u*)pos=NEG(FC_RETOP, HOST_a1); // neg FC_RETOP, a1 - *(Bit16u*)(pos+2)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)pos=NEG(HOST_a1, HOST_a1); // neg a1, a1 + *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) break; default: *(Bit32u*)( ( ((Bit32u) (*pos)) << 2 ) + ((Bit32u)pos + 4) ) = (Bit32u)fct_ptr; // simple_func @@ -1093,35 +1045,32 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_ADDb: case t_ADDw: case t_ADDd: - *(Bit16u*)pos=ADD_REG(FC_RETOP, HOST_a1, HOST_a2); // add FC_RETOP, a1, a2 - *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2); // add a1, a1, a2 + *(Bit16u*)(pos+2)=B_FWD(4); // b after_call (pc+4) break; case t_ORb: case t_ORw: case t_ORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ORR(FC_RETOP, HOST_a2); // orr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(4); // b after_call (pc+4) + *(Bit16u*)pos=ORR(HOST_a1, HOST_a2); // orr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(4); // b after_call (pc+4) break; case t_ANDb: case t_ANDw: case t_ANDd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=AND(FC_RETOP, HOST_a2); // and FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(4); // b after_call (pc+4) + *(Bit16u*)pos=AND(HOST_a1, HOST_a2); // and a1, a2 + *(Bit16u*)(pos+2)=B_FWD(4); // b after_call (pc+4) break; case t_SUBb: case t_SUBw: case t_SUBd: - *(Bit16u*)pos=SUB_REG(FC_RETOP, HOST_a1, HOST_a2); // sub FC_RETOP, a1, a2 - *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2); // sub a1, a1, a2 + *(Bit16u*)(pos+2)=B_FWD(4); // b after_call (pc+4) break; case t_XORb: case t_XORw: case t_XORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=EOR(FC_RETOP, HOST_a2); // eor FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(4); // b after_call (pc+4) + *(Bit16u*)pos=EOR(HOST_a1, HOST_a2); // eor a1, a2 + *(Bit16u*)(pos+2)=B_FWD(4); // b after_call (pc+4) break; case t_CMPb: case t_CMPw: @@ -1129,121 +1078,85 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_TESTb: case t_TESTw: case t_TESTd: - *(Bit16u*)pos=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)pos=B_FWD(6); // b after_call (pc+6) break; case t_INCb: case t_INCw: case t_INCd: - *(Bit16u*)pos=ADD_IMM3(FC_RETOP, HOST_a1, 1); // add FC_RETOP, a1, #1 - *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1); // add a1, a1, #1 + *(Bit16u*)(pos+2)=B_FWD(4); // b after_call (pc+4) break; case t_DECb: case t_DECw: case t_DECd: - *(Bit16u*)pos=SUB_IMM3(FC_RETOP, HOST_a1, 1); // sub FC_RETOP, a1, #1 - *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1); // sub a1, a1, #1 + *(Bit16u*)(pos+2)=B_FWD(4); // b after_call (pc+4) break; case t_SHLb: case t_SHLw: case t_SHLd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=LSL_REG(FC_RETOP, HOST_a2); // lsl FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(4); // b after_call (pc+4) + *(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2); // lsl a1, a2 + *(Bit16u*)(pos+2)=B_FWD(4); // b after_call (pc+4) break; case t_SHRb: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 24); // lsl FC_RETOP, a1, #24 + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 *(Bit16u*)(pos+2)=NOP; // nop - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, FC_RETOP, 24); // lsr FC_RETOP, FC_RETOP, #24 + *(Bit16u*)(pos+4)=LSR_IMM(HOST_a1, HOST_a1, 24); // lsr a1, a1, #24 *(Bit16u*)(pos+6)=NOP; // nop - *(Bit16u*)(pos+8)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+10)=NOP; // nop + *(Bit16u*)(pos+8)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 break; case t_SHRw: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 16); // lsl FC_RETOP, a1, #16 + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 *(Bit16u*)(pos+2)=NOP; // nop - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, FC_RETOP, 16); // lsr FC_RETOP, FC_RETOP, #16 + *(Bit16u*)(pos+4)=LSR_IMM(HOST_a1, HOST_a1, 16); // lsr a1, a1, #16 *(Bit16u*)(pos+6)=NOP; // nop - *(Bit16u*)(pos+8)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+10)=NOP; // nop + *(Bit16u*)(pos+8)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 break; case t_SHRd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(4); // b after_call (pc+4) + *(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(4); // b after_call (pc+4) break; case t_SARb: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 24); // lsl FC_RETOP, a1, #24 + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 *(Bit16u*)(pos+2)=NOP; // nop - *(Bit16u*)(pos+4)=ASR_IMM(FC_RETOP, FC_RETOP, 24); // asr FC_RETOP, FC_RETOP, #24 + *(Bit16u*)(pos+4)=ASR_IMM(HOST_a1, HOST_a1, 24); // asr a1, a1, #24 *(Bit16u*)(pos+6)=NOP; // nop - *(Bit16u*)(pos+8)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+10)=NOP; // nop + *(Bit16u*)(pos+8)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 break; case t_SARw: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 16); // lsl FC_RETOP, a1, #16 + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 *(Bit16u*)(pos+2)=NOP; // nop - *(Bit16u*)(pos+4)=ASR_IMM(FC_RETOP, FC_RETOP, 16); // asr FC_RETOP, FC_RETOP, #16 + *(Bit16u*)(pos+4)=ASR_IMM(HOST_a1, HOST_a1, 16); // asr a1, a1, #16 *(Bit16u*)(pos+6)=NOP; // nop - *(Bit16u*)(pos+8)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+10)=NOP; // nop + *(Bit16u*)(pos+8)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 break; case t_SARd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(4); // b after_call (pc+4) - break; - case t_RORb: - *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, HOST_a1, 8); // lsr FC_RETOP, a1, #8 - *(Bit16u*)(pos+4)=ORR(HOST_a1, FC_RETOP); // orr a1, FC_RETOP - *(Bit16u*)(pos+6)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+8)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+10)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 + *(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(4); // b after_call (pc+4) break; case t_RORw: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 + *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 *(Bit16u*)(pos+4)=NOP; // nop - *(Bit16u*)(pos+6)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+8)=NOP; // nop - *(Bit16u*)(pos+10)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 + *(Bit16u*)(pos+6)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+8)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 break; case t_RORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(4); // b after_call (pc+4) - break; - /*case t_ROLb: - *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 - *(Bit16u*)(pos+2)=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 8); // lsr FC_RETOP, a1, #8 - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=ORR(HOST_a1, FC_RETOP); // orr a1, FC_RETOP - *(Bit16u*)(pos+10)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+12)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+14)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 - break;*/ - case t_ROLw: - *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 - *(Bit16u*)(pos+2)=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+10)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 + *(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+2)=B_FWD(4); // b after_call (pc+4) break; case t_ROLd: - *(Bit16u*)pos=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+2)=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+4)=ADD_IMM8(templo1, 32); // add templo1, #32 + *(Bit16u*)pos=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+2)=NOP; // nop + *(Bit16u*)(pos+4)=ADD_IMM8(HOST_a2, 32); // add a2, #32 *(Bit16u*)(pos+6)=NOP; // nop - *(Bit16u*)(pos+8)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 - *(Bit16u*)(pos+10)=NOP; // nop + *(Bit16u*)(pos+8)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 break; case t_NEGb: case t_NEGw: case t_NEGd: - *(Bit16u*)pos=NEG(FC_RETOP, HOST_a1); // neg FC_RETOP, a1 - *(Bit16u*)(pos+2)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=NEG(HOST_a1, HOST_a1); // neg a1, a1 + *(Bit16u*)(pos+2)=B_FWD(4); // b after_call (pc+4) break; default: *(Bit32u*)( ( ((Bit32u) (*pos)) << 2 ) + ((Bit32u)pos + 2) ) = (Bit32u)fct_ptr; // simple_func diff --git a/src/cpu/core_dynrec/risc_armv4le-thumb-niw.h b/src/cpu/core_dynrec/risc_armv4le-thumb-niw.h index 0c99cc3c..25056e4a 100644 --- a/src/cpu/core_dynrec/risc_armv4le-thumb-niw.h +++ b/src/cpu/core_dynrec/risc_armv4le-thumb-niw.h @@ -16,7 +16,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* $Id: risc_armv4le-thumb-niw.h,v 1.4 2009-06-25 19:31:43 c2woody Exp $ */ +/* $Id: risc_armv4le-thumb-niw.h,v 1.5 2009-06-27 12:51:10 c2woody Exp $ */ /* ARMv4 (little endian) backend by M-HT (thumb version with data pool) */ @@ -25,15 +25,10 @@ // temporary "lo" registers #define templo1 HOST_v3 #define templo2 HOST_v4 - -// temporary "lo" register - value must be preserved when using it -#define templosav HOST_a3 - -// temporary "hi" register -#define temphi1 HOST_ip +#define templo3 HOST_v2 // register that holds function return values -#define FC_RETOP HOST_v2 +#define FC_RETOP HOST_a1 // register used for address calculations, #define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG @@ -86,8 +81,6 @@ #define ADD_IMM8(dst, imm) (0x3000 + ((dst) << 8) + (imm) ) // add dst, src1, src2 #define ADD_REG(dst, src1, src2) (0x1800 + (dst) + ((src1) << 3) + ((src2) << 6) ) -// add dst, src -#define ADD_LO_HI(dst, src) (0x4440 + (dst) + (((src) - HOST_r8) << 3) ) // add dst, pc, #imm @ 0 <= imm < 1024 & imm mod 4 = 0 #define ADD_LO_PC_IMM(dst, imm) (0xa000 + ((dst) << 8) + ((imm) >> 2) ) // sub dst, src1, src2 @@ -502,11 +495,9 @@ static void gen_extend_word(bool sign,HostReg reg) { // add a 32bit value from memory to a full register static void gen_add(HostReg reg,void* op) { + gen_mov_word_to_reg(templo3, op, 1); cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, reg) ); // mov temphi1, reg - gen_mov_word_to_reg(reg, op, 1); - cache_checkinstr(2); - cache_addw( ADD_LO_HI(reg, temphi1) ); // add reg, temphi1 + cache_addw( ADD_REG(reg, reg, templo3) ); // add reg, reg, templo3 } // add a 32bit constant value to a full register @@ -528,12 +519,8 @@ static void gen_and_imm(HostReg reg,Bit32u imm) { // move a 32bit constant value into memory static void gen_mov_direct_dword(void* dest,Bit32u imm) { - cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav - gen_mov_dword_to_reg_imm(templosav, imm); - gen_mov_word_from_reg(templosav, dest, 1); - cache_checkinstr(2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + gen_mov_dword_to_reg_imm(templo3, imm); + gen_mov_word_from_reg(templo3, dest, 1); } // move an address into memory @@ -544,19 +531,15 @@ static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) { // add an 8bit constant value to a dword memory value static void gen_add_direct_byte(void* dest,Bit8s imm) { if(!imm) return; - cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templosav, dest, 1, templo2); + gen_mov_word_to_reg_helper(templo3, dest, 1, templo2); cache_checkinstr(2); if (imm >= 0) { - cache_addw( ADD_IMM8(templosav, (Bit32s)imm) ); // add templosav, #(imm) + cache_addw( ADD_IMM8(templo3, (Bit32s)imm) ); // add templo3, #(imm) } else { - cache_addw( SUB_IMM8(templosav, -((Bit32s)imm)) ); // sub templosav, #(-imm) + cache_addw( SUB_IMM8(templo3, -((Bit32s)imm)) ); // sub templo3, #(-imm) } - gen_mov_word_from_reg_helper(templosav, dest, 1, templo2); - cache_checkinstr(2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + gen_mov_word_from_reg_helper(templo3, dest, 1, templo2); } // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value @@ -566,38 +549,30 @@ static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) { gen_add_direct_byte(dest,(Bit8s)imm); return; } - cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templosav, dest, dword, templo2); + gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); if (dword) { gen_mov_dword_to_reg_imm(templo1, imm); } else { gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); } cache_checkinstr(2); - cache_addw( ADD_REG(templosav, templosav, templo1) ); // add templosav, templosav, templo1 - gen_mov_word_from_reg_helper(templosav, dest, dword, templo2); - cache_checkinstr(2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + cache_addw( ADD_REG(templo3, templo3, templo1) ); // add templo3, templo3, templo1 + gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); } // subtract an 8bit constant value from a dword memory value static void gen_sub_direct_byte(void* dest,Bit8s imm) { if(!imm) return; - cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templosav, dest, 1, templo2); + gen_mov_word_to_reg_helper(templo3, dest, 1, templo2); cache_checkinstr(2); if (imm >= 0) { - cache_addw( SUB_IMM8(templosav, (Bit32s)imm) ); // sub templosav, #(imm) + cache_addw( SUB_IMM8(templo3, (Bit32s)imm) ); // sub templo3, #(imm) } else { - cache_addw( ADD_IMM8(templosav, -((Bit32s)imm)) ); // add templosav, #(-imm) + cache_addw( ADD_IMM8(templo3, -((Bit32s)imm)) ); // add templo3, #(-imm) } - gen_mov_word_from_reg_helper(templosav, dest, 1, templo2); - cache_checkinstr(2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + gen_mov_word_from_reg_helper(templo3, dest, 1, templo2); } // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value @@ -607,20 +582,16 @@ static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { gen_sub_direct_byte(dest,(Bit8s)imm); return; } - cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templosav, dest, dword, templo2); + gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); if (dword) { gen_mov_dword_to_reg_imm(templo1, imm); } else { gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); } cache_checkinstr(2); - cache_addw( SUB_REG(templosav, templosav, templo1) ); // sub templosav, templosav, templo1 - gen_mov_word_from_reg_helper(templosav, dest, dword, templo2); - cache_checkinstr(2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + cache_addw( SUB_REG(templo3, templo3, templo1) ); // sub templo3, templo3, templo1 + gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); } // effective address calculation, destination is dest_reg @@ -675,12 +646,11 @@ static void gen_call_function_helper(void * func) { cache_addd(0xe12fff10 + (templo1)); // bx templo1 // thumb state from now on - cache_addw( MOV_REG(FC_RETOP, HOST_a1) ); // mov FC_RETOP, a1 } // generate a call to a parameterless function static void INLINE gen_call_function_raw(void * func) { - cache_checkinstr(20); + cache_checkinstr(18); gen_call_function_helper(func); } @@ -688,13 +658,13 @@ static void INLINE gen_call_function_raw(void * func) { // note: the parameters are loaded in the architecture specific way // using the gen_load_param_ functions below static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) { - cache_checkinstr(20); + cache_checkinstr(18); Bit32u proc_addr = (Bit32u)cache.pos; gen_call_function_helper(func); return proc_addr; // if proc_addr is on word boundary ((proc_addr & 0x03) == 0) - // then length of generated code is 18 bytes - // otherwise length of generated code is 20 bytes + // then length of generated code is 16 bytes + // otherwise length of generated code is 18 bytes } #if (1) @@ -725,33 +695,31 @@ static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { // jump to an address pointed at by ptr, offset is in imm static void gen_jmp_ptr(void * ptr,Bits imm=0) { - cache_checkinstr(2); - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav - gen_mov_word_to_reg(templosav, ptr, 1); + gen_mov_word_to_reg(templo3, ptr, 1); if (imm) { gen_mov_dword_to_reg_imm(templo2, imm); cache_checkinstr(2); - cache_addw( ADD_REG(templosav, templosav, templo2) ); // add templosav, templosav, templo2 + cache_addw( ADD_REG(templo3, templo3, templo2) ); // add templo3, templo3, templo2 } #if (1) // (*ptr) should be word aligned if ((imm & 0x03) == 0) { - cache_checkinstr(8); - cache_addw( LDR_IMM(templo2, templosav, 0) ); // ldr templo2, [templosav] + cache_checkinstr(6); + cache_addw( LDR_IMM(templo2, templo3, 0) ); // ldr templo2, [templo3] } else #endif { - cache_checkinstr(26); - cache_addw( LDRB_IMM(templo2, templosav, 0) ); // ldrb templo2, [templosav] - cache_addw( LDRB_IMM(templo1, templosav, 1) ); // ldrb templo1, [templosav, #1] + cache_checkinstr(24); + cache_addw( LDRB_IMM(templo2, templo3, 0) ); // ldrb templo2, [templo3] + cache_addw( LDRB_IMM(templo1, templo3, 1) ); // ldrb templo1, [templo3, #1] cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1 - cache_addw( LDRB_IMM(templo1, templosav, 2) ); // ldrb templo1, [templosav, #2] + cache_addw( LDRB_IMM(templo1, templo3, 2) ); // ldrb templo1, [templo3, #2] cache_addw( LSL_IMM(templo1, templo1, 16) ); // lsl templo1, templo1, #16 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1 - cache_addw( LDRB_IMM(templo1, templosav, 3) ); // ldrb templo1, [templosav, #3] + cache_addw( LDRB_IMM(templo1, templo3, 3) ); // ldrb templo1, [templo3, #3] cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1 } @@ -759,8 +727,6 @@ static void gen_jmp_ptr(void * ptr,Bits imm=0) { // increase jmp address to keep thumb state cache_addw( ADD_IMM3(templo2, templo2, 1) ); // add templo2, templo2, #1 - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 - cache_addw( BX(templo2) ); // bx templo2 } @@ -900,8 +866,7 @@ static void gen_run_code(void) { // return from a function static void gen_return_function(void) { - cache_checkinstr(6); - cache_addw( MOV_REG(HOST_a1, FC_RETOP) ); // mov a1, FC_RETOP + cache_checkinstr(4); cache_addw(0xbc08); // pop {r3} cache_addw( BX(HOST_r3) ); // bx r3 } @@ -935,35 +900,32 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_ADDb: case t_ADDw: case t_ADDd: - *(Bit16u*)pos=ADD_REG(FC_RETOP, HOST_a1, HOST_a2); // add FC_RETOP, a1, a2 - *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2); // add a1, a1, a2 + *(Bit16u*)(pos+2)=B_FWD(10); // b after_call (pc+10) break; case t_ORb: case t_ORw: case t_ORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ORR(FC_RETOP, HOST_a2); // orr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=ORR(HOST_a1, HOST_a2); // orr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(10); // b after_call (pc+10) break; case t_ANDb: case t_ANDw: case t_ANDd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=AND(FC_RETOP, HOST_a2); // and FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=AND(HOST_a1, HOST_a2); // and a1, a2 + *(Bit16u*)(pos+2)=B_FWD(10); // b after_call (pc+10) break; case t_SUBb: case t_SUBw: case t_SUBd: - *(Bit16u*)pos=SUB_REG(FC_RETOP, HOST_a1, HOST_a2); // sub FC_RETOP, a1, a2 - *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2); // sub a1, a1, a2 + *(Bit16u*)(pos+2)=B_FWD(10); // b after_call (pc+10) break; case t_XORb: case t_XORw: case t_XORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=EOR(FC_RETOP, HOST_a2); // eor FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=EOR(HOST_a1, HOST_a2); // eor a1, a2 + *(Bit16u*)(pos+2)=B_FWD(10); // b after_call (pc+10) break; case t_CMPb: case t_CMPw: @@ -971,118 +933,110 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_TESTb: case t_TESTw: case t_TESTd: - *(Bit16u*)pos=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=B_FWD(12); // b after_call (pc+12) break; case t_INCb: case t_INCw: case t_INCd: - *(Bit16u*)pos=ADD_IMM3(FC_RETOP, HOST_a1, 1); // add FC_RETOP, a1, #1 - *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1); // add a1, a1, #1 + *(Bit16u*)(pos+2)=B_FWD(10); // b after_call (pc+10) break; case t_DECb: case t_DECw: case t_DECd: - *(Bit16u*)pos=SUB_IMM3(FC_RETOP, HOST_a1, 1); // sub FC_RETOP, a1, #1 - *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1); // sub a1, a1, #1 + *(Bit16u*)(pos+2)=B_FWD(10); // b after_call (pc+10) break; case t_SHLb: case t_SHLw: case t_SHLd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=LSL_REG(FC_RETOP, HOST_a2); // lsl FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2); // lsl a1, a2 + *(Bit16u*)(pos+2)=B_FWD(10); // b after_call (pc+10) break; case t_SHRb: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 24); // lsl FC_RETOP, a1, #24 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, FC_RETOP, 24); // lsr FC_RETOP, FC_RETOP, #24 - *(Bit16u*)(pos+4)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 24); // lsr a1, a1, #24 + *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(6); // b after_call (pc+6) break; case t_SHRw: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 16); // lsl FC_RETOP, a1, #16 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, FC_RETOP, 16); // lsr FC_RETOP, FC_RETOP, #16 - *(Bit16u*)(pos+4)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 16); // lsr a1, a1, #16 + *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(6); // b after_call (pc+6) break; case t_SHRd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(10); // b after_call (pc+10) break; case t_SARb: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 24); // lsl FC_RETOP, a1, #24 - *(Bit16u*)(pos+2)=ASR_IMM(FC_RETOP, FC_RETOP, 24); // asr FC_RETOP, FC_RETOP, #24 - *(Bit16u*)(pos+4)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 24); // asr a1, a1, #24 + *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(6); // b after_call (pc+6) break; case t_SARw: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 16); // lsl FC_RETOP, a1, #16 - *(Bit16u*)(pos+2)=ASR_IMM(FC_RETOP, FC_RETOP, 16); // asr FC_RETOP, FC_RETOP, #16 - *(Bit16u*)(pos+4)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 16); // asr a1, a1, #16 + *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(6); // b after_call (pc+6) break; case t_SARd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(10); // b after_call (pc+10) break; case t_RORb: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, HOST_a1, 8); // lsr FC_RETOP, a1, #8 - *(Bit16u*)(pos+4)=ORR(HOST_a1, FC_RETOP); // orr a1, FC_RETOP + *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8 + *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1 *(Bit16u*)(pos+6)=NOP; // nop - *(Bit16u*)(pos+8)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 + *(Bit16u*)(pos+8)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 *(Bit16u*)(pos+10)=NOP; // nop - *(Bit16u*)(pos+12)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+14)=NOP; // nop - *(Bit16u*)(pos+16)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 + *(Bit16u*)(pos+12)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+14)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 break; case t_RORw: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+4)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+6)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+8)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+6)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+8)=B_FWD(4); // b after_call (pc+4) break; case t_RORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+2)=B_FWD(10); // b after_call (pc+10) break; case t_ROLb: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 - *(Bit16u*)(pos+2)=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 8); // lsr FC_RETOP, a1, #8 - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=ORR(HOST_a1, FC_RETOP); // orr a1, FC_RETOP - *(Bit16u*)(pos+10)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+12)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+14)=NOP; // nop - *(Bit16u*)(pos+16)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 + *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8 + *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+10)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+12)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+14)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 break; case t_ROLw: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 - *(Bit16u*)(pos+2)=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 + *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1 *(Bit16u*)(pos+10)=NOP; // nop - *(Bit16u*)(pos+12)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 + *(Bit16u*)(pos+12)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 *(Bit16u*)(pos+14)=NOP; // nop - *(Bit16u*)(pos+16)=NOP; // nop break; case t_ROLd: - *(Bit16u*)pos=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+2)=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+4)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+6)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 - *(Bit16u*)(pos+8)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)pos=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+2)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+4)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+6)=B_FWD(6); // b after_call (pc+6) break; case t_NEGb: case t_NEGw: case t_NEGd: - *(Bit16u*)pos=NEG(FC_RETOP, HOST_a1); // neg FC_RETOP, a1 - *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=NEG(HOST_a1, HOST_a1); // neg a1, a1 + *(Bit16u*)(pos+2)=B_FWD(10); // b after_call (pc+10) break; default: *(Bit32u*)( ( ((Bit32u) (*pos)) << 2 ) + ((Bit32u)pos + 4) ) = (Bit32u)fct_ptr; // simple_func @@ -1096,35 +1050,32 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_ADDb: case t_ADDw: case t_ADDd: - *(Bit16u*)pos=ADD_REG(FC_RETOP, HOST_a1, HOST_a2); // add FC_RETOP, a1, a2 - *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2); // add a1, a1, a2 + *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) break; case t_ORb: case t_ORw: case t_ORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ORR(FC_RETOP, HOST_a2); // orr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=ORR(HOST_a1, HOST_a2); // orr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) break; case t_ANDb: case t_ANDw: case t_ANDd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=AND(FC_RETOP, HOST_a2); // and FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=AND(HOST_a1, HOST_a2); // and a1, a2 + *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) break; case t_SUBb: case t_SUBw: case t_SUBd: - *(Bit16u*)pos=SUB_REG(FC_RETOP, HOST_a1, HOST_a2); // sub FC_RETOP, a1, a2 - *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2); // sub a1, a1, a2 + *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) break; case t_XORb: case t_XORw: case t_XORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=EOR(FC_RETOP, HOST_a2); // eor FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=EOR(HOST_a1, HOST_a2); // eor a1, a2 + *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) break; case t_CMPb: case t_CMPw: @@ -1132,115 +1083,113 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_TESTb: case t_TESTw: case t_TESTd: - *(Bit16u*)pos=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=B_FWD(14); // b after_call (pc+14) break; case t_INCb: case t_INCw: case t_INCd: - *(Bit16u*)pos=ADD_IMM3(FC_RETOP, HOST_a1, 1); // add FC_RETOP, a1, #1 - *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1); // add a1, a1, #1 + *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) break; case t_DECb: case t_DECw: case t_DECd: - *(Bit16u*)pos=SUB_IMM3(FC_RETOP, HOST_a1, 1); // sub FC_RETOP, a1, #1 - *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1); // sub a1, a1, #1 + *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) break; case t_SHLb: case t_SHLw: case t_SHLd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=LSL_REG(FC_RETOP, HOST_a2); // lsl FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2); // lsl a1, a2 + *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) break; case t_SHRb: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 24); // lsl FC_RETOP, a1, #24 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, FC_RETOP, 24); // lsr FC_RETOP, FC_RETOP, #24 - *(Bit16u*)(pos+4)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 24); // lsr a1, a1, #24 + *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(8); // b after_call (pc+8) break; case t_SHRw: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 16); // lsl FC_RETOP, a1, #16 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, FC_RETOP, 16); // lsr FC_RETOP, FC_RETOP, #16 - *(Bit16u*)(pos+4)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 16); // lsr a1, a1, #16 + *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(8); // b after_call (pc+8) break; case t_SHRd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) break; case t_SARb: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 24); // lsl FC_RETOP, a1, #24 - *(Bit16u*)(pos+2)=ASR_IMM(FC_RETOP, FC_RETOP, 24); // asr FC_RETOP, FC_RETOP, #24 - *(Bit16u*)(pos+4)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 24); // asr a1, a1, #24 + *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(8); // b after_call (pc+8) break; case t_SARw: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 16); // lsl FC_RETOP, a1, #16 - *(Bit16u*)(pos+2)=ASR_IMM(FC_RETOP, FC_RETOP, 16); // asr FC_RETOP, FC_RETOP, #16 - *(Bit16u*)(pos+4)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 16); // asr a1, a1, #16 + *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(8); // b after_call (pc+8) break; case t_SARd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) break; case t_RORb: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, HOST_a1, 8); // lsr FC_RETOP, a1, #8 - *(Bit16u*)(pos+4)=ORR(HOST_a1, FC_RETOP); // orr a1, FC_RETOP - *(Bit16u*)(pos+6)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+8)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+10)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+12)=B_FWD(4); // b after_call (pc+4) + *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8 + *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+6)=NOP; // nop + *(Bit16u*)(pos+8)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+10)=NOP; // nop + *(Bit16u*)(pos+12)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+14)=NOP; // nop + *(Bit16u*)(pos+16)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 break; case t_RORw: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+4)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+6)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+8)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+6)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+8)=B_FWD(6); // b after_call (pc+6) break; case t_RORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) break; case t_ROLb: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 - *(Bit16u*)(pos+2)=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 8); // lsr FC_RETOP, a1, #8 - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=ORR(HOST_a1, FC_RETOP); // orr a1, FC_RETOP - *(Bit16u*)(pos+10)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+12)=NOP; // nop - *(Bit16u*)(pos+14)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+16)=NOP; // nop - *(Bit16u*)(pos+18)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 + *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8 + *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+10)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+12)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+14)=NOP; // nop + *(Bit16u*)(pos+16)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 break; case t_ROLw: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 - *(Bit16u*)(pos+2)=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+10)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 - *(Bit16u*)(pos+12)=B_FWD(4); // b after_call (pc+4) + *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+10)=NOP; // nop + *(Bit16u*)(pos+12)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+14)=NOP; // nop + *(Bit16u*)(pos+16)=NOP; // nop break; case t_ROLd: - *(Bit16u*)pos=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+2)=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+4)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+6)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 - *(Bit16u*)(pos+8)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)pos=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+2)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+4)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+6)=B_FWD(8); // b after_call (pc+8) break; case t_NEGb: case t_NEGw: case t_NEGd: - *(Bit16u*)pos=NEG(FC_RETOP, HOST_a1); // neg FC_RETOP, a1 - *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=NEG(HOST_a1, HOST_a1); // neg a1, a1 + *(Bit16u*)(pos+2)=B_FWD(12); // b after_call (pc+12) break; default: *(Bit32u*)( ( ((Bit32u) (*pos)) << 2 ) + ((Bit32u)pos + 2) ) = (Bit32u)fct_ptr; // simple_func diff --git a/src/cpu/core_dynrec/risc_armv4le-thumb.h b/src/cpu/core_dynrec/risc_armv4le-thumb.h index 37ae9c12..49a6e879 100644 --- a/src/cpu/core_dynrec/risc_armv4le-thumb.h +++ b/src/cpu/core_dynrec/risc_armv4le-thumb.h @@ -16,7 +16,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* $Id: risc_armv4le-thumb.h,v 1.5 2009-06-25 19:31:43 c2woody Exp $ */ +/* $Id: risc_armv4le-thumb.h,v 1.6 2009-06-27 12:51:10 c2woody Exp $ */ /* ARMv4 (little endian) backend by M-HT (thumb version) */ @@ -25,15 +25,10 @@ // temporary "lo" registers #define templo1 HOST_v3 #define templo2 HOST_v4 - -// temporary "lo" register - value must be preserved when using it -#define templosav HOST_a3 - -// temporary "hi" register -#define temphi1 HOST_ip +#define templo3 HOST_v2 // register that holds function return values -#define FC_RETOP HOST_v2 +#define FC_RETOP HOST_a1 // register used for address calculations, #define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG @@ -86,8 +81,6 @@ #define ADD_IMM8(dst, imm) (0x3000 + ((dst) << 8) + (imm) ) // add dst, src1, src2 #define ADD_REG(dst, src1, src2) (0x1800 + (dst) + ((src1) << 3) + ((src2) << 6) ) -// add dst, src -#define ADD_LO_HI(dst, src) (0x4440 + (dst) + (((src) - HOST_r8) << 3) ) // add dst, pc, #imm @ 0 <= imm < 1024 & imm mod 4 = 0 #define ADD_LO_PC_IMM(dst, imm) (0xa000 + ((dst) << 8) + ((imm) >> 2) ) // sub dst, src1, src2 @@ -364,9 +357,8 @@ static void gen_extend_word(bool sign,HostReg reg) { // add a 32bit value from memory to a full register static void gen_add(HostReg reg,void* op) { - cache_addw( MOV_HI_LO(temphi1, reg) ); // mov temphi1, reg - gen_mov_word_to_reg(reg, op, 1); - cache_addw( ADD_LO_HI(reg, temphi1) ); // add reg, temphi1 + gen_mov_word_to_reg(templo3, op, 1); + cache_addw( ADD_REG(reg, reg, templo3) ); // add reg, reg, templo3 } // add a 32bit constant value to a full register @@ -386,10 +378,8 @@ static void gen_and_imm(HostReg reg,Bit32u imm) { // move a 32bit constant value into memory static void gen_mov_direct_dword(void* dest,Bit32u imm) { - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav - gen_mov_dword_to_reg_imm(templosav, imm); - gen_mov_word_from_reg(templosav, dest, 1); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + gen_mov_dword_to_reg_imm(templo3, imm); + gen_mov_word_from_reg(templo3, dest, 1); } // move an address into memory @@ -400,16 +390,14 @@ static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) { // add an 8bit constant value to a dword memory value static void gen_add_direct_byte(void* dest,Bit8s imm) { if(!imm) return; - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templosav, dest, 1, templo2); + gen_mov_word_to_reg_helper(templo3, dest, 1, templo2); if (imm >= 0) { - cache_addw( ADD_IMM8(templosav, (Bit32s)imm) ); // add templosav, #(imm) + cache_addw( ADD_IMM8(templo3, (Bit32s)imm) ); // add templo3, #(imm) } else { - cache_addw( SUB_IMM8(templosav, -((Bit32s)imm)) ); // sub templosav, #(-imm) + cache_addw( SUB_IMM8(templo3, -((Bit32s)imm)) ); // sub templo3, #(-imm) } - gen_mov_word_from_reg_helper(templosav, dest, 1, templo2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + gen_mov_word_from_reg_helper(templo3, dest, 1, templo2); } // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value @@ -419,32 +407,28 @@ static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) { gen_add_direct_byte(dest,(Bit8s)imm); return; } - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templosav, dest, dword, templo2); + gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); if (dword) { gen_mov_dword_to_reg_imm(templo1, imm); } else { gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); } - cache_addw( ADD_REG(templosav, templosav, templo1) ); // add templosav, templosav, templo1 - gen_mov_word_from_reg_helper(templosav, dest, dword, templo2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + cache_addw( ADD_REG(templo3, templo3, templo1) ); // add templo3, templo3, templo1 + gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); } // subtract an 8bit constant value from a dword memory value static void gen_sub_direct_byte(void* dest,Bit8s imm) { if(!imm) return; - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templosav, dest, 1, templo2); + gen_mov_word_to_reg_helper(templo3, dest, 1, templo2); if (imm >= 0) { - cache_addw( SUB_IMM8(templosav, (Bit32s)imm) ); // sub templosav, #(imm) + cache_addw( SUB_IMM8(templo3, (Bit32s)imm) ); // sub templo3, #(imm) } else { - cache_addw( ADD_IMM8(templosav, -((Bit32s)imm)) ); // add templosav, #(-imm) + cache_addw( ADD_IMM8(templo3, -((Bit32s)imm)) ); // add templo3, #(-imm) } - gen_mov_word_from_reg_helper(templosav, dest, 1, templo2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + gen_mov_word_from_reg_helper(templo3, dest, 1, templo2); } // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value @@ -454,17 +438,15 @@ static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { gen_sub_direct_byte(dest,(Bit8s)imm); return; } - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templosav, dest, dword, templo2); + gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); if (dword) { gen_mov_dword_to_reg_imm(templo1, imm); } else { gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); } - cache_addw( SUB_REG(templosav, templosav, templo1) ); // sub templosav, templosav, templo1 - gen_mov_word_from_reg_helper(templosav, dest, dword, templo2); - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 + cache_addw( SUB_REG(templo3, templo3, templo1) ); // sub templo3, templo3, templo1 + gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); } // effective address calculation, destination is dest_reg @@ -512,7 +494,6 @@ static void INLINE gen_call_function_raw(void * func) { cache_addd(0xe12fff10 + (templo1)); // bx templo1 // thumb state from now on - cache_addw( MOV_REG(FC_RETOP, HOST_a1) ); // mov FC_RETOP, a1 } // generate a call to a function with paramcount parameters @@ -523,8 +504,8 @@ static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fa gen_call_function_raw(func); return proc_addr; // if proc_addr is on word boundary ((proc_addr & 0x03) == 0) - // then length of generated code is 22 bytes - // otherwise length of generated code is 24 bytes + // then length of generated code is 20 bytes + // otherwise length of generated code is 22 bytes } #if (1) @@ -555,29 +536,28 @@ static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { // jump to an address pointed at by ptr, offset is in imm static void gen_jmp_ptr(void * ptr,Bits imm=0) { - cache_addw( MOV_HI_LO(temphi1, templosav) ); // mov temphi1, templosav - gen_mov_word_to_reg(templosav, ptr, 1); + gen_mov_word_to_reg(templo3, ptr, 1); if (imm) { gen_mov_dword_to_reg_imm(templo2, imm); - cache_addw( ADD_REG(templosav, templosav, templo2) ); // add templosav, templosav, templo2 + cache_addw( ADD_REG(templo3, templo3, templo2) ); // add templo3, templo3, templo2 } #if (1) // (*ptr) should be word aligned if ((imm & 0x03) == 0) { - cache_addw( LDR_IMM(templo2, templosav, 0) ); // ldr templo2, [templosav] + cache_addw( LDR_IMM(templo2, templo3, 0) ); // ldr templo2, [templo3] } else #endif { - cache_addw( LDRB_IMM(templo2, templosav, 0) ); // ldrb templo2, [templosav] - cache_addw( LDRB_IMM(templo1, templosav, 1) ); // ldrb templo1, [templosav, #1] + cache_addw( LDRB_IMM(templo2, templo3, 0) ); // ldrb templo2, [templo3] + cache_addw( LDRB_IMM(templo1, templo3, 1) ); // ldrb templo1, [templo3, #1] cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1 - cache_addw( LDRB_IMM(templo1, templosav, 2) ); // ldrb templo1, [templosav, #2] + cache_addw( LDRB_IMM(templo1, templo3, 2) ); // ldrb templo1, [templo3, #2] cache_addw( LSL_IMM(templo1, templo1, 16) ); // lsl templo1, templo1, #16 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1 - cache_addw( LDRB_IMM(templo1, templosav, 3) ); // ldrb templo1, [templosav, #3] + cache_addw( LDRB_IMM(templo1, templo3, 3) ); // ldrb templo1, [templo3, #3] cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1 } @@ -585,8 +565,6 @@ static void gen_jmp_ptr(void * ptr,Bits imm=0) { // increase jmp address to keep thumb state cache_addw( ADD_IMM3(templo2, templo2, 1) ); // add templo2, templo2, #1 - cache_addw( MOV_LO_HI(templosav, temphi1) ); // mov templosav, temphi1 - cache_addw( BX(templo2) ); // bx templo2 } @@ -721,7 +699,6 @@ static void gen_run_code(void) { // return from a function static void gen_return_function(void) { - cache_addw( MOV_REG(HOST_a1, FC_RETOP) ); // mov a1, FC_RETOP cache_addw(0xbc08); // pop {r3} cache_addw( BX(HOST_r3) ); // bx r3 } @@ -739,35 +716,32 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_ADDb: case t_ADDw: case t_ADDd: - *(Bit16u*)pos=ADD_REG(FC_RETOP, HOST_a1, HOST_a2); // add FC_RETOP, a1, a2 - *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2); // add a1, a1, a2 + *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) break; case t_ORb: case t_ORw: case t_ORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ORR(FC_RETOP, HOST_a2); // orr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=ORR(HOST_a1, HOST_a2); // orr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) break; case t_ANDb: case t_ANDw: case t_ANDd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=AND(FC_RETOP, HOST_a2); // and FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=AND(HOST_a1, HOST_a2); // and a1, a2 + *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) break; case t_SUBb: case t_SUBw: case t_SUBd: - *(Bit16u*)pos=SUB_REG(FC_RETOP, HOST_a1, HOST_a2); // sub FC_RETOP, a1, a2 - *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2); // sub a1, a1, a2 + *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) break; case t_XORb: case t_XORw: case t_XORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=EOR(FC_RETOP, HOST_a2); // eor FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=EOR(HOST_a1, HOST_a2); // eor a1, a2 + *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) break; case t_CMPb: case t_CMPw: @@ -775,116 +749,110 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_TESTb: case t_TESTw: case t_TESTd: - *(Bit16u*)pos=B_FWD(18); // b after_call (pc+18) + *(Bit16u*)pos=B_FWD(16); // b after_call (pc+16) break; case t_INCb: case t_INCw: case t_INCd: - *(Bit16u*)pos=ADD_IMM3(FC_RETOP, HOST_a1, 1); // add FC_RETOP, a1, #1 - *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1); // add a1, a1, #1 + *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) break; case t_DECb: case t_DECw: case t_DECd: - *(Bit16u*)pos=SUB_IMM3(FC_RETOP, HOST_a1, 1); // sub FC_RETOP, a1, #1 - *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1); // sub a1, a1, #1 + *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) break; case t_SHLb: case t_SHLw: case t_SHLd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=LSL_REG(FC_RETOP, HOST_a2); // lsl FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2); // lsl a1, a2 + *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) break; case t_SHRb: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 24); // lsl FC_RETOP, a1, #24 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, FC_RETOP, 24); // lsr FC_RETOP, FC_RETOP, #24 - *(Bit16u*)(pos+4)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 24); // lsr a1, a1, #24 + *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10) break; case t_SHRw: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 16); // lsl FC_RETOP, a1, #16 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, FC_RETOP, 16); // lsr FC_RETOP, FC_RETOP, #16 - *(Bit16u*)(pos+4)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 16); // lsr a1, a1, #16 + *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10) break; case t_SHRd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) break; case t_SARb: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 24); // lsl FC_RETOP, a1, #24 - *(Bit16u*)(pos+2)=ASR_IMM(FC_RETOP, FC_RETOP, 24); // asr FC_RETOP, FC_RETOP, #24 - *(Bit16u*)(pos+4)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 24); // asr a1, a1, #24 + *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10) break; case t_SARw: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 16); // lsl FC_RETOP, a1, #16 - *(Bit16u*)(pos+2)=ASR_IMM(FC_RETOP, FC_RETOP, 16); // asr FC_RETOP, FC_RETOP, #16 - *(Bit16u*)(pos+4)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 16); // asr a1, a1, #16 + *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10) break; case t_SARd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) break; case t_RORb: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, HOST_a1, 8); // lsr FC_RETOP, a1, #8 - *(Bit16u*)(pos+4)=ORR(HOST_a1, FC_RETOP); // orr a1, FC_RETOP - *(Bit16u*)(pos+6)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+8)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+10)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+12)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8 + *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+6)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+12)=B_FWD(4); // b after_call (pc+4) break; case t_RORw: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+4)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+6)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+8)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+6)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+8)=B_FWD(8); // b after_call (pc+8) break; case t_RORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) break; case t_ROLb: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 - *(Bit16u*)(pos+2)=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 8); // lsr FC_RETOP, a1, #8 - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=ORR(HOST_a1, FC_RETOP); // orr a1, FC_RETOP + *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8 + *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1 *(Bit16u*)(pos+10)=NOP; // nop - *(Bit16u*)(pos+12)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 + *(Bit16u*)(pos+12)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 *(Bit16u*)(pos+14)=NOP; // nop - *(Bit16u*)(pos+16)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+18)=NOP; // nop - *(Bit16u*)(pos+20)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 + *(Bit16u*)(pos+16)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+18)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 break; case t_ROLw: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 - *(Bit16u*)(pos+2)=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+10)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 - *(Bit16u*)(pos+12)=B_FWD(6); // b after_call (pc+6) + *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+12)=B_FWD(4); // b after_call (pc+4) break; case t_ROLd: - *(Bit16u*)pos=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+2)=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+4)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+6)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 - *(Bit16u*)(pos+8)=B_FWD(10); // b after_call (pc+10) + *(Bit16u*)pos=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+2)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+4)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+6)=B_FWD(10); // b after_call (pc+10) break; case t_NEGb: case t_NEGw: case t_NEGd: - *(Bit16u*)pos=NEG(FC_RETOP, HOST_a1); // neg FC_RETOP, a1 - *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=NEG(HOST_a1, HOST_a1); // neg a1, a1 + *(Bit16u*)(pos+2)=B_FWD(14); // b after_call (pc+14) break; default: *(Bit32u*)(pos+8)=(Bit32u)fct_ptr; // simple_func @@ -898,35 +866,32 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_ADDb: case t_ADDw: case t_ADDd: - *(Bit16u*)pos=ADD_REG(FC_RETOP, HOST_a1, HOST_a2); // add FC_RETOP, a1, a2 - *(Bit16u*)(pos+2)=B_FWD(18); // b after_call (pc+18) + *(Bit16u*)pos=ADD_REG(HOST_a1, HOST_a1, HOST_a2); // add a1, a1, a2 + *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) break; case t_ORb: case t_ORw: case t_ORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ORR(FC_RETOP, HOST_a2); // orr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=ORR(HOST_a1, HOST_a2); // orr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) break; case t_ANDb: case t_ANDw: case t_ANDd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=AND(FC_RETOP, HOST_a2); // and FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=AND(HOST_a1, HOST_a2); // and a1, a2 + *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) break; case t_SUBb: case t_SUBw: case t_SUBd: - *(Bit16u*)pos=SUB_REG(FC_RETOP, HOST_a1, HOST_a2); // sub FC_RETOP, a1, a2 - *(Bit16u*)(pos+2)=B_FWD(18); // b after_call (pc+18) + *(Bit16u*)pos=SUB_REG(HOST_a1, HOST_a1, HOST_a2); // sub a1, a1, a2 + *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) break; case t_XORb: case t_XORw: case t_XORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=EOR(FC_RETOP, HOST_a2); // eor FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=EOR(HOST_a1, HOST_a2); // eor a1, a2 + *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) break; case t_CMPb: case t_CMPw: @@ -934,114 +899,111 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_TESTb: case t_TESTw: case t_TESTd: - *(Bit16u*)pos=B_FWD(20); // b after_call (pc+20) + *(Bit16u*)pos=B_FWD(18); // b after_call (pc+18) break; case t_INCb: case t_INCw: case t_INCd: - *(Bit16u*)pos=ADD_IMM3(FC_RETOP, HOST_a1, 1); // add FC_RETOP, a1, #1 - *(Bit16u*)(pos+2)=B_FWD(18); // b after_call (pc+18) + *(Bit16u*)pos=ADD_IMM3(HOST_a1, HOST_a1, 1); // add a1, a1, #1 + *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) break; case t_DECb: case t_DECw: case t_DECd: - *(Bit16u*)pos=SUB_IMM3(FC_RETOP, HOST_a1, 1); // sub FC_RETOP, a1, #1 - *(Bit16u*)(pos+2)=B_FWD(18); // b after_call (pc+18) + *(Bit16u*)pos=SUB_IMM3(HOST_a1, HOST_a1, 1); // sub a1, a1, #1 + *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) break; case t_SHLb: case t_SHLw: case t_SHLd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=LSL_REG(FC_RETOP, HOST_a2); // lsl FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=LSL_REG(HOST_a1, HOST_a2); // lsl a1, a2 + *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) break; case t_SHRb: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 24); // lsl FC_RETOP, a1, #24 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, FC_RETOP, 24); // lsr FC_RETOP, FC_RETOP, #24 - *(Bit16u*)(pos+4)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 24); // lsr a1, a1, #24 + *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12) break; case t_SHRw: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 16); // lsl FC_RETOP, a1, #16 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, FC_RETOP, 16); // lsr FC_RETOP, FC_RETOP, #16 - *(Bit16u*)(pos+4)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=LSR_IMM(HOST_a1, HOST_a1, 16); // lsr a1, a1, #16 + *(Bit16u*)(pos+4)=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12) break; case t_SHRd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=LSR_REG(FC_RETOP, HOST_a2); // lsr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=LSR_REG(HOST_a1, HOST_a2); // lsr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) break; case t_SARb: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 24); // lsl FC_RETOP, a1, #24 - *(Bit16u*)(pos+2)=ASR_IMM(FC_RETOP, FC_RETOP, 24); // asr FC_RETOP, FC_RETOP, #24 - *(Bit16u*)(pos+4)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 24); // asr a1, a1, #24 + *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12) break; case t_SARw: - *(Bit16u*)pos=LSL_IMM(FC_RETOP, HOST_a1, 16); // lsl FC_RETOP, a1, #16 - *(Bit16u*)(pos+2)=ASR_IMM(FC_RETOP, FC_RETOP, 16); // asr FC_RETOP, FC_RETOP, #16 - *(Bit16u*)(pos+4)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+6)=B_FWD(14); // b after_call (pc+14) + *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=ASR_IMM(HOST_a1, HOST_a1, 16); // asr a1, a1, #16 + *(Bit16u*)(pos+4)=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12) break; case t_SARd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ASR_REG(FC_RETOP, HOST_a2); // asr FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=ASR_REG(HOST_a1, HOST_a2); // asr a1, a2 + *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) break; case t_RORb: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, HOST_a1, 8); // lsr FC_RETOP, a1, #8 - *(Bit16u*)(pos+4)=ORR(HOST_a1, FC_RETOP); // orr a1, FC_RETOP - *(Bit16u*)(pos+6)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+8)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+10)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+12)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8 + *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+6)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+12)=B_FWD(6); // b after_call (pc+6) break; case t_RORw: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 - *(Bit16u*)(pos+2)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+4)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+6)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+8)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)(pos+2)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+4)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+6)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+8)=B_FWD(10); // b after_call (pc+10) break; case t_RORd: - *(Bit16u*)pos=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+2)=ROR_REG(FC_RETOP, HOST_a2); // ror FC_RETOP, a2 - *(Bit16u*)(pos+4)=B_FWD(16); // b after_call (pc+16) + *(Bit16u*)pos=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) break; case t_ROLb: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 24); // lsl a1, a1, #24 - *(Bit16u*)(pos+2)=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 8); // lsr FC_RETOP, a1, #8 - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=ORR(HOST_a1, FC_RETOP); // orr a1, FC_RETOP - *(Bit16u*)(pos+10)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+12)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+14)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 - *(Bit16u*)(pos+16)=B_FWD(4); // b after_call (pc+4) + *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 8); // lsr templo1, a1, #8 + *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+10)=NOP; // nop + *(Bit16u*)(pos+12)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+14)=NOP; // nop + *(Bit16u*)(pos+16)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+18)=NOP; // nop + *(Bit16u*)(pos+20)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 break; case t_ROLw: *(Bit16u*)pos=LSL_IMM(HOST_a1, HOST_a1, 16); // lsl a1, a1, #16 - *(Bit16u*)(pos+2)=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+4)=LSR_IMM(FC_RETOP, HOST_a1, 16); // lsr FC_RETOP, a1, #16 - *(Bit16u*)(pos+6)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+8)=ORR(FC_RETOP, HOST_a1); // orr FC_RETOP, a1 - *(Bit16u*)(pos+10)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 - *(Bit16u*)(pos+12)=B_FWD(8); // b after_call (pc+8) + *(Bit16u*)(pos+2)=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+4)=LSR_IMM(templo1, HOST_a1, 16); // lsr templo1, a1, #16 + *(Bit16u*)(pos+6)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+8)=ORR(HOST_a1, templo1); // orr a1, templo1 + *(Bit16u*)(pos+10)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+12)=B_FWD(6); // b after_call (pc+6) break; case t_ROLd: - *(Bit16u*)pos=NEG(templo1, HOST_a2); // neg templo1, a2 - *(Bit16u*)(pos+2)=MOV_REG(FC_RETOP, HOST_a1); // mov FC_RETOP, a1 - *(Bit16u*)(pos+4)=ADD_IMM8(templo1, 32); // add templo1, #32 - *(Bit16u*)(pos+6)=ROR_REG(FC_RETOP, templo1); // ror FC_RETOP, templo1 - *(Bit16u*)(pos+8)=B_FWD(12); // b after_call (pc+12) + *(Bit16u*)pos=NEG(HOST_a2, HOST_a2); // neg a2, a2 + *(Bit16u*)(pos+2)=ADD_IMM8(HOST_a2, 32); // add a2, #32 + *(Bit16u*)(pos+4)=ROR_REG(HOST_a1, HOST_a2); // ror a1, a2 + *(Bit16u*)(pos+6)=B_FWD(12); // b after_call (pc+12) break; case t_NEGb: case t_NEGw: case t_NEGd: - *(Bit16u*)pos=NEG(FC_RETOP, HOST_a1); // neg FC_RETOP, a1 - *(Bit16u*)(pos+2)=B_FWD(18); // b after_call (pc+18) + *(Bit16u*)pos=NEG(HOST_a1, HOST_a1); // neg a1, a1 + *(Bit16u*)(pos+2)=B_FWD(16); // b after_call (pc+16) break; default: *(Bit32u*)(pos+10)=(Bit32u)fct_ptr; // simple_func