From 80df53942c715ea18cf65a91d6f325d17e0db883 Mon Sep 17 00:00:00 2001 From: Peter Veenstra Date: Mon, 11 Nov 2013 13:25:50 +0000 Subject: [PATCH] Update Arm dynrec core. Most important change: use ARMv7 instructions. Thanks a lot M-HT Imported-from: https://svn.code.sf.net/p/dosbox/code-0/dosbox/trunk@3846 --- src/cpu/core_dynrec.cpp | 9 +- src/cpu/core_dynrec/Makefile.am | 2 +- src/cpu/core_dynrec/risc_armv4le-o3.h | 870 +++++++++++------- src/cpu/core_dynrec/risc_armv4le-s3.h | 918 ------------------- src/cpu/core_dynrec/risc_armv4le-thumb-iw.h | 476 +++++++--- src/cpu/core_dynrec/risc_armv4le-thumb-niw.h | 476 +++++++--- src/cpu/core_dynrec/risc_armv4le-thumb.h | 454 ++++++--- src/cpu/core_dynrec/risc_armv4le.h | 18 +- 8 files changed, 1575 insertions(+), 1648 deletions(-) delete mode 100644 src/cpu/core_dynrec/risc_armv4le-s3.h diff --git a/src/cpu/core_dynrec.cpp b/src/cpu/core_dynrec.cpp index 35151780..16dd73c2 100644 --- a/src/cpu/core_dynrec.cpp +++ b/src/cpu/core_dynrec.cpp @@ -137,6 +137,7 @@ static struct { #define X86_64 0x02 #define MIPSEL 0x03 #define ARMV4LE 0x04 +#define ARMV7LE 0x05 #define POWERPC 0x04 #if C_TARGETCPU == X86_64 @@ -145,7 +146,7 @@ static struct { #include "core_dynrec/risc_x86.h" #elif C_TARGETCPU == MIPSEL #include "core_dynrec/risc_mipsel32.h" -#elif C_TARGETCPU == ARMV4LE +#elif (C_TARGETCPU == ARMV4LE) || (C_TARGETCPU == ARMV7LE) #include "core_dynrec/risc_armv4le.h" #elif C_TARGETCPU == POWERPC #include "core_dynrec/risc_ppc.h" @@ -163,7 +164,7 @@ CacheBlockDynRec * LinkBlocks(BlockReturn ret) { block=temp_handler->FindCacheBlock(temp_ip & 4095); if (!block) return NULL; - // found it, link the current block to + // found it, link the current block to cache.block.running->LinkTo(ret==BR_Link2,block); return block; } @@ -219,7 +220,7 @@ Bits CPU_Core_Dynrec_Run(void) { continue; } CPU_CycleLeft+=old_cycles; - return nc_retcode; + return nc_retcode; } } @@ -255,7 +256,7 @@ run_block: case BR_Cycles: // cycles went negative, return from the core to handle // external events, schedule the pic... -#if C_HEAVY_DEBUG +#if C_HEAVY_DEBUG if (DEBUG_HeavyIsBreakpoint()) return debugCallback; #endif return CBRET_NONE; diff --git a/src/cpu/core_dynrec/Makefile.am b/src/cpu/core_dynrec/Makefile.am index 2bc15bbc..288b9154 100644 --- a/src/cpu/core_dynrec/Makefile.am +++ b/src/cpu/core_dynrec/Makefile.am @@ -1,5 +1,5 @@ noinst_HEADERS = cache.h decoder.h decoder_basic.h decoder_opcodes.h \ dyn_fpu.h operators.h risc_x64.h risc_x86.h risc_mipsel32.h \ risc_armv4le.h risc_armv4le-common.h \ - risc_armv4le-s3.h risc_armv4le-o3.h risc_armv4le-thumb.h \ + risc_armv4le-o3.h risc_armv4le-thumb.h \ risc_armv4le-thumb-iw.h risc_armv4le-thumb-niw.h diff --git a/src/cpu/core_dynrec/risc_armv4le-o3.h b/src/cpu/core_dynrec/risc_armv4le-o3.h index f1100bb6..e95edee7 100644 --- a/src/cpu/core_dynrec/risc_armv4le-o3.h +++ b/src/cpu/core_dynrec/risc_armv4le-o3.h @@ -18,7 +18,7 @@ -/* ARMv4 (little endian) backend by M-HT (size-tweaked arm version) */ +/* ARMv4/ARMv7 (little endian) backend by M-HT (arm version) */ // temporary registers @@ -50,15 +50,14 @@ // temporary register for LEA #define TEMP_REG_DRC HOST_v2 -#ifdef DRC_USE_REGS_ADDR // used to hold the address of "cpu_regs" - preferably filled in function gen_run_code #define FC_REGS_ADDR HOST_v7 -#endif -#ifdef DRC_USE_SEGS_ADDR // used to hold the address of "Segs" - preferably filled in function gen_run_code #define FC_SEGS_ADDR HOST_v8 -#endif + +// used to hold the address of "core_dynrec.readdata" - filled in function gen_run_code +#define readdata_addr HOST_v5 // helper macro @@ -88,6 +87,12 @@ #define MOV_REG_ROR_REG(dst, src, rreg) (0xe1a00070 + ((dst) << 12) + (src) + ((rreg) << 8) ) // mvn dst, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define MVN_IMM(dst, imm, rimm) (0xe3e00000 + ((dst) << 12) + (imm) + ((rimm) << 7) ) +#if C_TARGETCPU == ARMV7LE +// movw dst, #imm @ 0 <= imm <= 65535 +#define MOVW(dst, imm) (0xe3000000 + ((dst) << 12) + (((imm) & 0xf000) << 4) + ((imm) & 0x0fff) ) +// movt dst, #imm @ 0 <= imm <= 65535 +#define MOVT(dst, imm) (0xe3400000 + ((dst) << 12) + (((imm) & 0xf000) << 4) + ((imm) & 0x0fff) ) +#endif // arithmetic // add dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 @@ -103,7 +108,11 @@ // cmp src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define CMP_IMM(src, imm, rimm) (0xe3500000 + ((src) << 16) + (imm) + ((rimm) << 7) ) // nop +#if C_TARGETCPU == ARMV7LE +#define NOP (0xe320f000) +#else #define NOP MOV_REG_LSL_IMM(HOST_r0, HOST_r0, 0) +#endif // logical // tst src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 @@ -122,34 +131,70 @@ #define EOR_REG_LSL_IMM(dst, src1, src2, imm) (0xe0200000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) // bic dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 #define BIC_IMM(dst, src, imm, rimm) (0xe3c00000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) +// bic dst, src1, src2, lsl #imm @ 0 <= imm <= 31 +#define BIC_REG_LSL_IMM(dst, src1, src2, imm) (0xe1c00000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) // load // ldr reg, [addr, #imm] @ 0 <= imm < 4096 #define LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) ) +// ldr reg, [addr, #-(imm)] @ 0 <= imm < 4096 +#define LDR_IMM_M(reg, addr, imm) (0xe5100000 + ((reg) << 12) + ((addr) << 16) + (imm) ) // ldrh reg, [addr, #imm] @ 0 <= imm < 256 #define LDRH_IMM(reg, addr, imm) (0xe1d000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) ) +// ldrh reg, [addr, #-(imm)] @ 0 <= imm < 256 +#define LDRH_IMM_M(reg, addr, imm) (0xe15000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) ) // ldrb reg, [addr, #imm] @ 0 <= imm < 4096 #define LDRB_IMM(reg, addr, imm) (0xe5d00000 + ((reg) << 12) + ((addr) << 16) + (imm) ) +// ldrb reg, [addr, #-(imm)] @ 0 <= imm < 4096 +#define LDRB_IMM_M(reg, addr, imm) (0xe5500000 + ((reg) << 12) + ((addr) << 16) + (imm) ) +// ldr reg, [addr1, addr2, lsl #imm] @ 0 <= imm < 31 +#define LDR_REG_LSL_IMM(reg, addr1, addr2, imm) (0xe7900000 + ((reg) << 12) + ((addr1) << 16) + (addr2) + ((imm) << 7) ) // store // str reg, [addr, #imm] @ 0 <= imm < 4096 #define STR_IMM(reg, addr, imm) (0xe5800000 + ((reg) << 12) + ((addr) << 16) + (imm) ) +// str reg, [addr, #-(imm)] @ 0 <= imm < 4096 +#define STR_IMM_M(reg, addr, imm) (0xe5000000 + ((reg) << 12) + ((addr) << 16) + (imm) ) // strh reg, [addr, #imm] @ 0 <= imm < 256 #define STRH_IMM(reg, addr, imm) (0xe1c000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) ) +// strh reg, [addr, #-(imm)] @ 0 <= imm < 256 +#define STRH_IMM_M(reg, addr, imm) (0xe14000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) ) // strb reg, [addr, #imm] @ 0 <= imm < 4096 #define STRB_IMM(reg, addr, imm) (0xe5c00000 + ((reg) << 12) + ((addr) << 16) + (imm) ) +// strb reg, [addr, #-(imm)] @ 0 <= imm < 4096 +#define STRB_IMM_M(reg, addr, imm) (0xe5400000 + ((reg) << 12) + ((addr) << 16) + (imm) ) // branch // beq pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 #define BEQ_FWD(imm) (0x0a000000 + ((imm) >> 2) ) // bne pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 #define BNE_FWD(imm) (0x1a000000 + ((imm) >> 2) ) -// bgt pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 -#define BGT_FWD(imm) (0xca000000 + ((imm) >> 2) ) +// ble pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 +#define BLE_FWD(imm) (0xda000000 + ((imm) >> 2) ) // b pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 #define B_FWD(imm) (0xea000000 + ((imm) >> 2) ) // bx reg #define BX(reg) (0xe12fff10 + (reg) ) +#if C_TARGETCPU == ARMV7LE +// blx reg +#define BLX_REG(reg) (0xe12fff30 + (reg) ) + +// extend +// sxth dst, src, ror #rimm @ rimm = 0 | 8 | 16 | 24 +#define SXTH(dst, src, rimm) (0xe6bf0070 + ((dst) << 12) + (src) + (((rimm) & 24) << 7) ) +// sxtb dst, src, ror #rimm @ rimm = 0 | 8 | 16 | 24 +#define SXTB(dst, src, rimm) (0xe6af0070 + ((dst) << 12) + (src) + (((rimm) & 24) << 7) ) +// uxth dst, src, ror #rimm @ rimm = 0 | 8 | 16 | 24 +#define UXTH(dst, src, rimm) (0xe6ff0070 + ((dst) << 12) + (src) + (((rimm) & 24) << 7) ) +// uxtb dst, src, ror #rimm @ rimm = 0 | 8 | 16 | 24 +#define UXTB(dst, src, rimm) (0xe6ef0070 + ((dst) << 12) + (src) + (((rimm) & 24) << 7) ) + +// bit field +// bfi dst, src, #lsb, #width @ lsb >= 0, width >= 1, lsb+width <= 32 +#define BFI(dst, src, lsb, width) (0xe7c00010 + ((dst) << 12) + (src) + ((lsb) << 7) + (((lsb) + (width) - 1) << 16) ) +// bfc dst, #lsb, #width @ lsb >= 0, width >= 1, lsb+width <= 32 +#define BFC(dst, lsb, width) (0xe7c0001f + ((dst) << 12) + ((lsb) << 7) + (((lsb) + (width) - 1) << 16) ) +#endif // move a full register from reg_src to reg_dst @@ -158,6 +203,28 @@ static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) { cache_addd( MOV_REG_LSL_IMM(reg_dst, reg_src, 0) ); // mov reg_dst, reg_src } +// helper function +static bool val_is_operand2(Bit32u value, Bit32u *val_shift) { + Bit32u shift; + + if (GCC_UNLIKELY(value == 0)) { + *val_shift = 0; + return true; + } + + shift = 0; + while ((value & 3) == 0) { + value>>=2; + shift+=2; + } + + if ((value >> 8) != 0) return false; + + *val_shift = shift; + return true; +} + +#if C_TARGETCPU != ARMV7LE // helper function static Bits get_imm_gen_len(Bit32u imm) { Bits ret; @@ -177,79 +244,44 @@ static Bits get_imm_gen_len(Bit32u imm) { } // helper function -static Bits get_method_imm_gen_len(Bit32u imm, Bits preffer00, Bits *num) { - Bits num00, num15, numadd, numsub, numret, ret; - num00 = get_imm_gen_len(imm); - num15 = get_imm_gen_len(~imm); - numadd = get_imm_gen_len(imm - ((Bit32u)cache.pos+8)); - numsub = get_imm_gen_len(((Bit32u)cache.pos+8) - imm); - if (numsub < numadd && numsub < num00 && numsub < num15) { - ret = 0; - numret = numsub; - } else if (numadd < num00 && numadd < num15) { - ret = 1; - numret = numadd; - } else if (num00 < num15 || (num00 == num15 && preffer00)) { - ret = 2; - numret = num00; - } else { - ret = 3; - numret = num15; - } - if (num != NULL) *num = numret; - return ret; +static Bits get_min_imm_gen_len(Bit32u imm) { + Bits num1, num2; + + num1 = get_imm_gen_len(imm); + num2 = get_imm_gen_len(~imm); + + return (num1 <= num2)?num1:num2; } +#endif // move a 32bit constant value into dest_reg static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { - Bits first, method, scale; - Bit32u imm2, dist; - if (imm == 0) { - cache_addd( MOV_IMM(dest_reg, 0, 0) ); // mov dest_reg, #0 - } else if (imm == 0xffffffff) { - cache_addd( MVN_IMM(dest_reg, 0, 0) ); // mvn dest_reg, #0 - } else { - method = get_method_imm_gen_len(imm, 1, NULL); +#if C_TARGETCPU == ARMV7LE + Bit32u scale; - scale = 0; - first = 1; - if (method == 0) { - dist = ((Bit32u)cache.pos+8) - imm; - while (dist) { - while ((dist & 3) == 0) { - dist>>=2; - scale+=2; - } - if (first) { - cache_addd( SUB_IMM(dest_reg, HOST_pc, dist & 0xff, ROTATE_SCALE(scale)) ); // sub dest_reg, pc, #((dist & 0xff) << scale) - first = 0; - } else { - cache_addd( SUB_IMM(dest_reg, dest_reg, dist & 0xff, ROTATE_SCALE(scale)) ); // sub dest_reg, dest_reg, #((dist & 0xff) << scale) - } - dist>>=8; - scale+=8; - } - } else if (method == 1) { - dist = imm - ((Bit32u)cache.pos+8); - if (dist == 0) { - cache_addd( MOV_REG_LSL_IMM(dest_reg, HOST_pc, 0) ); // mov dest_reg, pc - } else { - while (dist) { - while ((dist & 3) == 0) { - dist>>=2; - scale+=2; - } - if (first) { - cache_addd( ADD_IMM(dest_reg, HOST_pc, dist & 0xff, ROTATE_SCALE(scale)) ); // add dest_reg, pc, #((dist & 0xff) << scale) - first = 0; - } else { - cache_addd( ADD_IMM(dest_reg, dest_reg, dist & 0xff, ROTATE_SCALE(scale)) ); // add dest_reg, dest_reg, #((dist & 0xff) << scale) - } - dist>>=8; - scale+=8; - } - } - } else if (method == 2) { + if ( val_is_operand2(imm, &scale) ) { + cache_addd( MOV_IMM(dest_reg, imm >> scale, ROTATE_SCALE(scale)) ); // mov dest_reg, #imm + } else if ( val_is_operand2(~imm, &scale) ) { + cache_addd( MVN_IMM(dest_reg, (~imm) >> scale, ROTATE_SCALE(scale)) ); // mvn dest_reg, #~imm + } else { + cache_addd( MOVW(dest_reg, imm & 0xffff) ); // movw dest_reg, #(imm & 0xffff) + + if (imm >= 0x10000) + { + cache_addd( MOVT(dest_reg, imm >> 16) ); // movt dest_reg, #(imm >> 16) + } + } +#else + Bit32u imm2, first, scale; + + scale = 0; + first = 1; + imm2 = ~imm; + + if (get_imm_gen_len(imm) <= get_imm_gen_len(imm2)) { + if (imm == 0) { + cache_addd( MOV_IMM(dest_reg, 0, 0) ); // mov dest_reg, #0 + } else { while (imm) { while ((imm & 3) == 0) { imm>>=2; @@ -264,8 +296,11 @@ static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { imm>>=8; scale+=8; } + } + } else { + if (imm2 == 0) { + cache_addd( MVN_IMM(dest_reg, 0, 0) ); // mvn dest_reg, #0 } else { - imm2 = ~imm; while (imm2) { while ((imm2 & 3) == 0) { imm2>>=2; @@ -282,12 +317,67 @@ static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { } } } +#endif +} + +// helper function +static bool gen_mov_memval_to_reg_helper(HostReg dest_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) { + switch (size) { + case 4: +#if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) + if ((data & 3) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 4096)) { + cache_addd( LDR_IMM(dest_reg, addr_reg, data - addr_data) ); // ldr dest_reg, [addr_reg, #(data - addr_data)] + return true; + } else if ((data < addr_data) && (data > addr_data - 4096)) { + cache_addd( LDR_IMM_M(dest_reg, addr_reg, addr_data - data) ); // ldr dest_reg, [addr_reg, #-(addr_data - data)] + return true; + } + } + break; + case 2: +#if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) + if ((data & 1) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 256)) { + cache_addd( LDRH_IMM(dest_reg, addr_reg, data - addr_data) ); // ldrh dest_reg, [addr_reg, #(data - addr_data)] + return true; + } else if ((data < addr_data) && (data > addr_data - 256)) { + cache_addd( LDRH_IMM_M(dest_reg, addr_reg, addr_data - data) ); // ldrh dest_reg, [addr_reg, #-(addr_data - data)] + return true; + } + } + break; + case 1: + if ((data >= addr_data) && (data < addr_data + 4096)) { + cache_addd( LDRB_IMM(dest_reg, addr_reg, data - addr_data) ); // ldrb dest_reg, [addr_reg, #(data - addr_data)] + return true; + } else if ((data < addr_data) && (data > addr_data - 4096)) { + cache_addd( LDRB_IMM_M(dest_reg, addr_reg, addr_data - data) ); // ldrb dest_reg, [addr_reg, #-(addr_data - data)] + return true; + } + default: + break; + } + return false; +} + +// helper function +static bool gen_mov_memval_to_reg(HostReg dest_reg, void *data, Bitu size) { + if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true; + if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true; + if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true; + return false; } // helper function for gen_mov_word_to_reg static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) { // alignment.... if (dword) { +#if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) if ((Bit32u)data & 3) { if ( ((Bit32u)data & 3) == 2 ) { cache_addd( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg] @@ -300,15 +390,20 @@ static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,Ho cache_addd( LDRB_IMM(temp2, data_reg, 3) ); // ldrb temp2, [data_reg, #3] cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 24) ); // orr dest_reg, dest_reg, temp2, lsl #24 } - } else { + } else +#endif + { cache_addd( LDR_IMM(dest_reg, data_reg, 0) ); // ldr dest_reg, [data_reg] } } else { +#if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) if ((Bit32u)data & 1) { cache_addd( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg] cache_addd( LDRB_IMM(temp2, data_reg, 1) ); // ldrb temp2, [data_reg, #1] cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 8) ); // orr dest_reg, dest_reg, temp2, lsl #8 - } else { + } else +#endif + { cache_addd( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg] } } @@ -317,42 +412,76 @@ static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,Ho // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg // 16bit moves may destroy the upper 16bit of the destination register static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) { - gen_mov_dword_to_reg_imm(temp1, (Bit32u)data); - gen_mov_word_to_reg_helper(dest_reg, data, dword, temp1); + if (!gen_mov_memval_to_reg(dest_reg, data, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)data); + gen_mov_word_to_reg_helper(dest_reg, data, dword, temp1); + } } // move a 16bit constant value into dest_reg // the upper 16bit of the destination register may be destroyed -static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) { - Bits first, scale; - Bit32u imm2; - if (imm == 0) { - cache_addd( MOV_IMM(dest_reg, 0, 0) ); // mov dest_reg, #0 - } else { - scale = 0; - first = 1; - imm2 = (Bit32u)imm; - while (imm2) { - while ((imm2 & 3) == 0) { - imm2>>=2; - scale+=2; +static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) { + gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm); +} + +// helper function +static bool gen_mov_memval_from_reg_helper(HostReg src_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) { + switch (size) { + case 4: +#if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) + if ((data & 3) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 4096)) { + cache_addd( STR_IMM(src_reg, addr_reg, data - addr_data) ); // str src_reg, [addr_reg, #(data - addr_data)] + return true; + } else if ((data < addr_data) && (data > addr_data - 4096)) { + cache_addd( STR_IMM_M(src_reg, addr_reg, addr_data - data) ); // str src_reg, [addr_reg, #-(addr_data - data)] + return true; + } } - if (first) { - cache_addd( MOV_IMM(dest_reg, imm2 & 0xff, ROTATE_SCALE(scale)) ); // mov dest_reg, #((imm2 & 0xff) << scale) - first = 0; - } else { - cache_addd( ORR_IMM(dest_reg, dest_reg, imm2 & 0xff, ROTATE_SCALE(scale)) ); // orr dest_reg, dest_reg, #((imm2 & 0xff) << scale) + break; + case 2: +#if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) + if ((data & 1) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 256)) { + cache_addd( STRH_IMM(src_reg, addr_reg, data - addr_data) ); // strh src_reg, [addr_reg, #(data - addr_data)] + return true; + } else if ((data < addr_data) && (data > addr_data - 256)) { + cache_addd( STRH_IMM_M(src_reg, addr_reg, addr_data - data) ); // strh src_reg, [addr_reg, #-(addr_data - data)] + return true; + } } - imm2>>=8; - scale+=8; - } + break; + case 1: + if ((data >= addr_data) && (data < addr_data + 4096)) { + cache_addd( STRB_IMM(src_reg, addr_reg, data - addr_data) ); // strb src_reg, [addr_reg, #(data - addr_data)] + return true; + } else if ((data < addr_data) && (data > addr_data - 4096)) { + cache_addd( STRB_IMM_M(src_reg, addr_reg, addr_data - data) ); // strb src_reg, [addr_reg, #-(addr_data - data)] + return true; + } + default: + break; } + return false; +} + +// helper function +static bool gen_mov_memval_from_reg(HostReg src_reg, void *dest, Bitu size) { + if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true; + if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true; + if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true; + return false; } // helper function for gen_mov_word_from_reg static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) { // alignment.... if (dword) { +#if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) if ((Bit32u)dest & 3) { if ( ((Bit32u)dest & 3) == 2 ) { cache_addd( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg] @@ -365,15 +494,20 @@ static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, cache_addd( MOV_REG_LSR_IMM(temp2, temp2, 16) ); // mov temp2, temp2, lsr #16 cache_addd( STRB_IMM(temp2, data_reg, 3) ); // strb temp2, [data_reg, #3] } - } else { + } else +#endif + { cache_addd( STR_IMM(src_reg, data_reg, 0) ); // str src_reg, [data_reg] } } else { +#if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) if ((Bit32u)dest & 1) { cache_addd( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg] cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 8) ); // mov temp2, src_reg, lsr #8 cache_addd( STRB_IMM(temp2, data_reg, 1) ); // strb temp2, [data_reg, #1] - } else { + } else +#endif + { cache_addd( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg] } } @@ -381,8 +515,10 @@ static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, // move 32bit (dword==true) or 16bit (dword==false) of a register into memory static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { - gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); - gen_mov_word_from_reg_helper(src_reg, dest, dword, temp1); + if (!gen_mov_memval_from_reg(src_reg, dest, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_from_reg_helper(src_reg, dest, dword, temp1); + } } // move an 8bit value from memory into dest_reg @@ -390,8 +526,10 @@ static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { // this function does not use FC_OP1/FC_OP2 as dest_reg as these // registers might not be directly byte-accessible on some architectures static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) { - gen_mov_dword_to_reg_imm(temp1, (Bit32u)data); - cache_addd( LDRB_IMM(dest_reg, temp1, 0) ); // ldrb dest_reg, [temp1] + if (!gen_mov_memval_to_reg(dest_reg, data, 1)) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)data); + cache_addd( LDRB_IMM(dest_reg, temp1, 0) ); // ldrb dest_reg, [temp1] + } } // move an 8bit value from memory into dest_reg @@ -420,8 +558,10 @@ static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u // move the lowest 8bit of a register into memory static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) { - gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); - cache_addd( STRB_IMM(src_reg, temp1, 0) ); // strb src_reg, [temp1] + if (!gen_mov_memval_from_reg(src_reg, dest, 1)) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + cache_addd( STRB_IMM(src_reg, temp1, 0) ); // strb src_reg, [temp1] + } } @@ -430,10 +570,18 @@ static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) { // the register is zero-extended (sign==false) or sign-extended (sign==true) static void gen_extend_byte(bool sign,HostReg reg) { if (sign) { +#if C_TARGETCPU == ARMV7LE + cache_addd( SXTB(reg, reg, 0) ); // sxtb reg, reg +#else cache_addd( MOV_REG_LSL_IMM(reg, reg, 24) ); // mov reg, reg, lsl #24 cache_addd( MOV_REG_ASR_IMM(reg, reg, 24) ); // mov reg, reg, asr #24 +#endif } else { +#if C_TARGETCPU == ARMV7LE + cache_addd( UXTB(reg, reg, 0) ); // uxtb reg, reg +#else cache_addd( AND_IMM(reg, reg, 0xff, 0) ); // and reg, reg, #0xff +#endif } } @@ -441,11 +589,19 @@ static void gen_extend_byte(bool sign,HostReg reg) { // the register is zero-extended (sign==false) or sign-extended (sign==true) static void gen_extend_word(bool sign,HostReg reg) { if (sign) { +#if C_TARGETCPU == ARMV7LE + cache_addd( SXTH(reg, reg, 0) ); // sxth reg, reg +#else cache_addd( MOV_REG_LSL_IMM(reg, reg, 16) ); // mov reg, reg, lsl #16 cache_addd( MOV_REG_ASR_IMM(reg, reg, 16) ); // mov reg, reg, asr #16 +#endif } else { +#if C_TARGETCPU == ARMV7LE + cache_addd( UXTH(reg, reg, 0) ); // uxth reg, reg +#else cache_addd( MOV_REG_LSL_IMM(reg, reg, 16) ); // mov reg, reg, lsl #16 cache_addd( MOV_REG_LSR_IMM(reg, reg, 16) ); // mov reg, reg, lsr #16 +#endif } } @@ -457,72 +613,57 @@ static void gen_add(HostReg reg,void* op) { // add a 32bit constant value to a full register static void gen_add_imm(HostReg reg,Bit32u imm) { - Bits method1, method2, num1, num2, scale, sub; - if(!imm) return; - if (imm == 1) { - cache_addd( ADD_IMM(reg, reg, 1, 0) ); // add reg, reg, #1 - } else if (imm == 0xffffffff) { - cache_addd( SUB_IMM(reg, reg, 1, 0) ); // sub reg, reg, #1 - } else { - method1 = get_method_imm_gen_len(imm, 1, &num1); - method2 = get_method_imm_gen_len(-((Bit32s)imm), 1, &num2); - if (num2 < num1) { - method1 = method2; - imm = (Bit32u)(-((Bit32s)imm)); - sub = 1; - } else sub = 0; + Bit32u imm2, scale; - if (method1 != 2) { - gen_mov_dword_to_reg_imm(temp3, imm); - if (sub) { - cache_addd( SUB_REG_LSL_IMM(reg, reg, temp3, 0) ); // sub reg, reg, temp3 - } else { - cache_addd( ADD_REG_LSL_IMM(reg, reg, temp3, 0) ); // add reg, reg, temp3 - } + if(!imm) return; + + imm2 = (Bit32u) (-((Bit32s)imm)); + + if ( val_is_operand2(imm, &scale) ) { + cache_addd( ADD_IMM(reg, reg, imm >> scale, ROTATE_SCALE(scale)) ); // add reg, reg, #imm + } else if ( val_is_operand2(imm2, &scale) ) { + cache_addd( SUB_IMM(reg, reg, imm2 >> scale, ROTATE_SCALE(scale)) ); // sub reg, reg, #(-imm) +#if C_TARGETCPU == ARMV7LE + } else if (imm2 < 0x10000) { + cache_addd( MOVW(temp2, imm2) ); // movw temp2, #(-imm) + cache_addd( SUB_REG_LSL_IMM(reg, reg, temp2, 0) ); // sub reg, reg, temp2 +#endif + } else { +#if C_TARGETCPU != ARMV7LE + if (get_min_imm_gen_len(imm) <= get_min_imm_gen_len(imm2)) { +#endif + gen_mov_dword_to_reg_imm(temp2, imm); + cache_addd( ADD_REG_LSL_IMM(reg, reg, temp2, 0) ); // add reg, reg, temp2 +#if C_TARGETCPU != ARMV7LE } else { - scale = 0; - while (imm) { - while ((imm & 3) == 0) { - imm>>=2; - scale+=2; - } - if (sub) { - cache_addd( SUB_IMM(reg, reg, imm & 0xff, ROTATE_SCALE(scale)) ); // sub reg, reg, #((imm & 0xff) << scale) - } else { - cache_addd( ADD_IMM(reg, reg, imm & 0xff, ROTATE_SCALE(scale)) ); // add reg, reg, #((imm & 0xff) << scale) - } - imm>>=8; - scale+=8; - } + gen_mov_dword_to_reg_imm(temp2, imm2); + cache_addd( SUB_REG_LSL_IMM(reg, reg, temp2, 0) ); // sub reg, reg, temp2 } +#endif } } // and a 32bit constant value with a full register static void gen_and_imm(HostReg reg,Bit32u imm) { - Bits method, scale; - Bit32u imm2; + Bit32u imm2, scale; + imm2 = ~imm; if(!imm2) return; + if (!imm) { cache_addd( MOV_IMM(reg, 0, 0) ); // mov reg, #0 + } else if ( val_is_operand2(imm, &scale) ) { + cache_addd( AND_IMM(reg, reg, imm >> scale, ROTATE_SCALE(scale)) ); // and reg, reg, #imm + } else if ( val_is_operand2(imm2, &scale) ) { + cache_addd( BIC_IMM(reg, reg, imm2 >> scale, ROTATE_SCALE(scale)) ); // bic reg, reg, #(~imm) +#if C_TARGETCPU == ARMV7LE + } else if (imm2 < 0x10000) { + cache_addd( MOVW(temp2, imm2) ); // movw temp2, #(~imm) + cache_addd( BIC_REG_LSL_IMM(reg, reg, temp2, 0) ); // bic reg, reg, temp2 +#endif } else { - method = get_method_imm_gen_len(imm, 0, NULL); - if (method != 3) { - gen_mov_dword_to_reg_imm(temp3, imm); - cache_addd( AND_REG_LSL_IMM(reg, reg, temp3, 0) ); // and reg, reg, temp3 - } else { - scale = 0; - while (imm2) { - while ((imm2 & 3) == 0) { - imm2>>=2; - scale+=2; - } - cache_addd( BIC_IMM(reg, reg, imm2 & 0xff, ROTATE_SCALE(scale)) ); // bic reg, reg, #((imm2 & 0xff) << scale) - imm2>>=8; - scale+=8; - } - } + gen_mov_dword_to_reg_imm(temp2, imm); + cache_addd( AND_REG_LSL_IMM(reg, reg, temp2, 0) ); // and reg, reg, temp2 } } @@ -538,68 +679,71 @@ static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) { gen_mov_direct_dword(dest,(Bit32u)imm); } -// add an 8bit constant value to a dword memory value -static void gen_add_direct_byte(void* dest,Bit8s imm) { - if(!imm) return; - gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); - gen_mov_word_to_reg_helper(temp3, dest, 1, temp1); - if (imm >= 0) { - cache_addd( ADD_IMM(temp3, temp3, (Bit32s)imm, 0) ); // add temp3, temp3, #(imm) - } else { - cache_addd( SUB_IMM(temp3, temp3, -((Bit32s)imm), 0) ); // sub temp3, temp3, #(-imm) - } - gen_mov_word_from_reg_helper(temp3, dest, 1, temp1); -} - // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) { + if (!dword) imm &= 0xffff; if(!imm) return; - if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { - gen_add_direct_byte(dest,(Bit8s)imm); - return; + + if (!gen_mov_memval_to_reg(temp3, dest, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_to_reg_helper(temp3, dest, dword, temp1); } - gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); - gen_mov_word_to_reg_helper(temp3, dest, dword, temp1); - // maybe use function gen_add_imm - if (dword) { - gen_mov_dword_to_reg_imm(temp2, imm); - } else { - gen_mov_word_to_reg_imm(temp2, (Bit16u)imm); + gen_add_imm(temp3, imm); + if (!gen_mov_memval_from_reg(temp3, dest, (dword)?4:2)) { + gen_mov_word_from_reg_helper(temp3, dest, dword, temp1); } - cache_addd( ADD_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // add temp3, temp3, temp2 - gen_mov_word_from_reg_helper(temp3, dest, dword, temp1); } -// subtract an 8bit constant value from a dword memory value -static void gen_sub_direct_byte(void* dest,Bit8s imm) { - if(!imm) return; - gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); - gen_mov_word_to_reg_helper(temp3, dest, 1, temp1); - if (imm >= 0) { - cache_addd( SUB_IMM(temp3, temp3, (Bit32s)imm, 0) ); // sub temp3, temp3, #(imm) - } else { - cache_addd( ADD_IMM(temp3, temp3, -((Bit32s)imm), 0) ); // add temp3, temp3, #(-imm) - } - gen_mov_word_from_reg_helper(temp3, dest, 1, temp1); +// add an 8bit constant value to a dword memory value +static void gen_add_direct_byte(void* dest,Bit8s imm) { + gen_add_direct_word(dest, (Bit32s)imm, 1); } // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { + Bit32u imm2, scale; + + if (!dword) imm &= 0xffff; if(!imm) return; - if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { - gen_sub_direct_byte(dest,(Bit8s)imm); - return; + + if (!gen_mov_memval_to_reg(temp3, dest, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_to_reg_helper(temp3, dest, dword, temp1); } - gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); - gen_mov_word_to_reg_helper(temp3, dest, dword, temp1); - // maybe use function gen_add_imm/gen_sub_imm - if (dword) { - gen_mov_dword_to_reg_imm(temp2, imm); + + imm2 = (Bit32u) (-((Bit32s)imm)); + + if ( val_is_operand2(imm, &scale) ) { + cache_addd( SUB_IMM(temp3, temp3, imm >> scale, ROTATE_SCALE(scale)) ); // sub temp3, temp3, #imm + } else if ( val_is_operand2(imm2, &scale) ) { + cache_addd( ADD_IMM(temp3, temp3, imm2 >> scale, ROTATE_SCALE(scale)) ); // add temp3, temp3, #(-imm) +#if C_TARGETCPU == ARMV7LE + } else if (imm2 < 0x10000) { + cache_addd( MOVW(temp2, imm2) ); // movw temp2, #(-imm) + cache_addd( ADD_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // add temp3, temp3, temp2 +#endif } else { - gen_mov_word_to_reg_imm(temp2, (Bit16u)imm); +#if C_TARGETCPU != ARMV7LE + if (get_min_imm_gen_len(imm) <= get_min_imm_gen_len(imm2)) { +#endif + gen_mov_dword_to_reg_imm(temp2, imm); + cache_addd( SUB_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // sub temp3, temp3, temp2 +#if C_TARGETCPU != ARMV7LE + } else { + gen_mov_dword_to_reg_imm(temp2, imm2); + cache_addd( ADD_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // add temp3, temp3, temp2 + } +#endif } - cache_addd( SUB_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // sub temp3, temp3, temp2 - gen_mov_word_from_reg_helper(temp3, dest, dword, temp1); + + if (!gen_mov_memval_from_reg(temp3, dest, (dword)?4:2)) { + gen_mov_word_from_reg_helper(temp3, dest, dword, temp1); + } +} + +// subtract an 8bit constant value from a dword memory value +static void gen_sub_direct_byte(void* dest,Bit8s imm) { + gen_sub_direct_word(dest, (Bit32s)imm, 1); } // effective address calculation, destination is dest_reg @@ -622,10 +766,16 @@ static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) { // generate a call to a parameterless function static void INLINE gen_call_function_raw(void * func) { +#if C_TARGETCPU == ARMV7LE + cache_addd( MOVW(temp1, ((Bit32u)func) & 0xffff) ); // movw temp1, #(func & 0xffff) + cache_addd( MOVT(temp1, ((Bit32u)func) >> 16) ); // movt temp1, #(func >> 16) + cache_addd( BLX_REG(temp1) ); // blx temp1 +#else cache_addd( LDR_IMM(temp1, HOST_pc, 4) ); // ldr temp1, [pc, #4] cache_addd( ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4 cache_addd( BX(temp1) ); // bx temp1 cache_addd((Bit32u)func); // .int func +#endif } // generate a call to a function with paramcount parameters @@ -665,43 +815,24 @@ static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { // jump to an address pointed at by ptr, offset is in imm static void gen_jmp_ptr(void * ptr,Bits imm=0) { - Bits num1, num2, scale, sub; - Bitu imm2; + Bit32u scale; + gen_mov_word_to_reg(temp3, ptr, 1); - if (imm) { - num1 = get_imm_gen_len(imm); - num2 = get_imm_gen_len(-imm); - - if (num2 < num1) { - imm = -imm; - sub = 1; - } else sub = 0; - - scale = 0; - imm2 = (Bitu)imm; - while (imm2) { - while ((imm2 & 3) == 0) { - imm2>>=2; - scale+=2; - } - if (sub) { - cache_addd( SUB_IMM(temp3, temp3, imm2 & 0xff, ROTATE_SCALE(scale)) ); // sub temp3, temp3, #((imm2 & 0xff) << scale) - } else { - cache_addd( ADD_IMM(temp3, temp3, imm2 & 0xff, ROTATE_SCALE(scale)) ); // add temp3, temp3, #((imm2 & 0xff) << scale) - } - imm2>>=8; - scale+=8; - } - } - -#if (1) -// (*ptr) should be word aligned +#if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) +// (*ptr) should be word aligned if ((imm & 0x03) == 0) { - cache_addd( LDR_IMM(temp1, temp3, 0) ); // ldr temp1, [temp3] - } else #endif - { + if ((imm >= 0) && (imm < 4096)) { + cache_addd( LDR_IMM(temp1, temp3, imm) ); // ldr temp1, [temp3, #imm] + } else { + gen_mov_dword_to_reg_imm(temp2, imm); + cache_addd( LDR_REG_LSL_IMM(temp1, temp3, temp2, 0) ); // ldr temp1, [temp3, temp2] + } +#if !(defined(C_UNALIGNED_MEMORY) || (C_TARGETCPU == ARMV7LE)) + } else { + gen_add_imm(temp3, imm); + cache_addd( LDRB_IMM(temp1, temp3, 0) ); // ldrb temp1, [temp3] cache_addd( LDRB_IMM(temp2, temp3, 1) ); // ldrb temp2, [temp3, #1] cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 8) ); // orr temp1, temp1, temp2, lsl #8 @@ -710,6 +841,7 @@ static void gen_jmp_ptr(void * ptr,Bits imm=0) { cache_addd( LDRB_IMM(temp2, temp3, 3) ); // ldrb temp2, [temp3, #3] cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 24) ); // orr temp1, temp1, temp2, lsl #24 } +#endif cache_addd( BX(temp1) ); // bx temp1 } @@ -757,67 +889,74 @@ static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) { } else { cache_addd( TST_IMM(reg, 0xff, 0) ); // tst reg, #0xff } - cache_addd( BEQ_FWD(8) ); // beq nobranch (pc +8) - cache_addd( LDR_IMM(temp1, HOST_pc, 0) ); // ldr temp1, [pc, #0] - cache_addd( BX(temp1) ); // bx temp1 - cache_addd(0); // fill j - // nobranch: + cache_addd( BNE_FWD(0) ); // bne j return ((Bit32u)cache.pos-4); } // compare 32bit-register against zero and jump if value less/equal than zero static Bit32u gen_create_branch_long_leqzero(HostReg reg) { cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0 - cache_addd( BGT_FWD(8) ); // bgt nobranch (pc+8) - cache_addd( LDR_IMM(temp1, HOST_pc, 0) ); // ldr temp1, [pc, #0] - cache_addd( BX(temp1) ); // bx temp1 - cache_addd(0); // fill j - // nobranch: + cache_addd( BLE_FWD(0) ); // ble j return ((Bit32u)cache.pos-4); } // calculate long relative offset and fill it into the location pointed to by data static void INLINE gen_fill_branch_long(Bit32u data) { - // this is an absolute branch - *(Bit32u*)data=(Bit32u)cache.pos; + *(Bit32u*)data=( (*(Bit32u*)data) & 0xff000000 ) | ( ( ((Bit32u)cache.pos - (data+8)) >> 2 ) & 0x00ffffff ); } static void gen_run_code(void) { - cache_addd(0xe92d4000); // stmfd sp!, {lr} - cache_addd(0xe92d0cf0); // stmfd sp!, {v1-v4,v7,v8} +#if C_TARGETCPU == ARMV7LE + cache_addd(0xe92d4df0); // stmfd sp!, {v1-v5,v7,v8,lr} - // adr: 8 - cache_addd( LDR_IMM(FC_SEGS_ADDR, HOST_pc, 64 - (8 + 8)) ); // ldr FC_SEGS_ADDR, [pc, #(&Segs)] - // adr: 12 - cache_addd( LDR_IMM(FC_REGS_ADDR, HOST_pc, 68 - (12 + 8)) ); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)] + cache_addd( MOVW(FC_SEGS_ADDR, ((Bit32u)&Segs) & 0xffff) ); // movw FC_SEGS_ADDR, #(&Segs & 0xffff) + cache_addd( MOVT(FC_SEGS_ADDR, ((Bit32u)&Segs) >> 16) ); // movt FC_SEGS_ADDR, #(&Segs >> 16) - cache_addd( ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4 - cache_addd(0xe92d4000); // stmfd sp!, {lr} - cache_addd( BX(HOST_r0) ); // bx r0 + cache_addd( MOVW(FC_REGS_ADDR, ((Bit32u)&cpu_regs) & 0xffff) ); // movw FC_REGS_ADDR, #(&cpu_regs & 0xffff) + cache_addd( MOVT(FC_REGS_ADDR, ((Bit32u)&cpu_regs) >> 16) ); // movt FC_REGS_ADDR, #(&cpu_regs >> 16) - cache_addd(0xe8bd0cf0); // ldmfd sp!, {v1-v4,v7,v8} + cache_addd( MOVW(readdata_addr, ((Bitu)&core_dynrec.readdata) & 0xffff) ); // movw readdata_addr, #(&core_dynrec.readdata & 0xffff) + cache_addd( MOVT(readdata_addr, ((Bitu)&core_dynrec.readdata) >> 16) ); // movt readdata_addr, #(&core_dynrec.readdata >> 16) - cache_addd(0xe8bd4000); // ldmfd sp!, {lr} - cache_addd( BX(HOST_lr) ); // bx lr + cache_addd( BX(HOST_r0) ); // bx r0 +#else + Bit8u *pos1, *pos2, *pos3; - // fill up to 64 bytes - cache_addd( NOP ); // nop - cache_addd( NOP ); // nop - cache_addd( NOP ); // nop - cache_addd( NOP ); // nop - cache_addd( NOP ); // nop - cache_addd( NOP ); // nop + cache_addd(0xe92d4df0); // stmfd sp!, {v1-v5,v7,v8,lr} - // adr: 64 + pos1 = cache.pos; + cache_addd( 0 ); + pos2 = cache.pos; + cache_addd( 0 ); + pos3 = cache.pos; + cache_addd( 0 ); + + cache_addd( BX(HOST_r0) ); // bx r0 + + // align cache.pos to 32 bytes + if ((((Bitu)cache.pos) & 0x1f) != 0) { + cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f)); + } + + *(Bit32u*)pos1 = LDR_IMM(FC_SEGS_ADDR, HOST_pc, cache.pos - (pos1 + 8)); // ldr FC_SEGS_ADDR, [pc, #(&Segs)] cache_addd((Bit32u)&Segs); // address of "Segs" - // adr: 68 + + *(Bit32u*)pos2 = LDR_IMM(FC_REGS_ADDR, HOST_pc, cache.pos - (pos2 + 8)); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)] cache_addd((Bit32u)&cpu_regs); // address of "cpu_regs" + + *(Bit32u*)pos3 = LDR_IMM(readdata_addr, HOST_pc, cache.pos - (pos3 + 8)); // ldr readdata_addr, [pc, #(&core_dynrec.readdata)] + cache_addd((Bit32u)&core_dynrec.readdata); // address of "core_dynrec.readdata" + + // align cache.pos to 32 bytes + if ((((Bitu)cache.pos) & 0x1f) != 0) { + cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f)); + } +#endif } // return from a function static void gen_return_function(void) { - cache_addd(0xe8bd4000); // ldmfd sp!, {lr} - cache_addd( BX(HOST_lr) ); // bx lr + cache_addd(0xe8bd8df0); // ldmfd sp!, {v1-v5,v7,v8,pc} } #ifdef DRC_FLAGS_INVALIDATION @@ -831,42 +970,52 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_ADDb: case t_ADDw: case t_ADDd: - *(Bit32u*)pos=ADD_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // add FC_RETOP, a1, a2 + *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+8)=ADD_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // add FC_RETOP, a1, a2 +#if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop +#endif break; case t_ORb: case t_ORw: case t_ORd: - *(Bit32u*)pos=ORR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // orr FC_RETOP, a1, a2 + *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+8)=ORR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // orr FC_RETOP, a1, a2 +#if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop +#endif break; case t_ANDb: case t_ANDw: case t_ANDd: - *(Bit32u*)pos=AND_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // and FC_RETOP, a1, a2 + *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+8)=AND_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // and FC_RETOP, a1, a2 +#if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop +#endif break; case t_SUBb: case t_SUBw: case t_SUBd: - *(Bit32u*)pos=SUB_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // sub FC_RETOP, a1, a2 + *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+8)=SUB_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // sub FC_RETOP, a1, a2 +#if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop +#endif break; case t_XORb: case t_XORw: case t_XORd: - *(Bit32u*)pos=EOR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // eor FC_RETOP, a1, a2 + *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+8)=EOR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // eor FC_RETOP, a1, a2 +#if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop +#endif break; case t_CMPb: case t_CMPw: @@ -874,114 +1023,185 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { case t_TESTb: case t_TESTw: case t_TESTd: - *(Bit32u*)pos=B_FWD(8); // b (pc+2*4) + *(Bit32u*)pos=NOP; // nop + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=NOP; // nop +#if C_TARGETCPU != ARMV7LE + *(Bit32u*)(pos+12)=NOP; // nop +#endif break; case t_INCb: case t_INCw: case t_INCd: - *(Bit32u*)pos=ADD_IMM(FC_RETOP, HOST_a1, 1, 0); // add FC_RETOP, a1, #1 + *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+8)=ADD_IMM(FC_RETOP, HOST_a1, 1, 0); // add FC_RETOP, a1, #1 +#if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop +#endif break; case t_DECb: case t_DECw: case t_DECd: - *(Bit32u*)pos=SUB_IMM(FC_RETOP, HOST_a1, 1, 0); // sub FC_RETOP, a1, #1 + *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+8)=SUB_IMM(FC_RETOP, HOST_a1, 1, 0); // sub FC_RETOP, a1, #1 +#if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop +#endif break; case t_SHLb: case t_SHLw: case t_SHLd: - *(Bit32u*)pos=MOV_REG_LSL_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsl a2 + *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+8)=MOV_REG_LSL_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsl a2 +#if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop +#endif break; case t_SHRb: - *(Bit32u*)pos=AND_IMM(FC_RETOP, HOST_a1, 0xff, 0); // and FC_RETOP, a1, #0xff - *(Bit32u*)(pos+4)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2 - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop + *(Bit32u*)pos=NOP; // nop +#if C_TARGETCPU == ARMV7LE + *(Bit32u*)(pos+4)=BFC(HOST_a1, 8, 24); // bfc a1, 8, 24 + *(Bit32u*)(pos+8)=MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsr a2 +#else + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=AND_IMM(FC_RETOP, HOST_a1, 0xff, 0); // and FC_RETOP, a1, #0xff + *(Bit32u*)(pos+12)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2 +#endif break; case t_SHRw: - *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 - *(Bit32u*)(pos+4)=MOV_REG_LSR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, lsr #16 - *(Bit32u*)(pos+8)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2 - *(Bit32u*)(pos+12)=NOP; // nop + *(Bit32u*)pos=NOP; // nop +#if C_TARGETCPU == ARMV7LE + *(Bit32u*)(pos+4)=BFC(HOST_a1, 16, 16); // bfc a1, 16, 16 + *(Bit32u*)(pos+8)=MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsr a2 +#else + *(Bit32u*)(pos+4)=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 + *(Bit32u*)(pos+8)=MOV_REG_LSR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, lsr #16 + *(Bit32u*)(pos+12)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2 +#endif break; case t_SHRd: - *(Bit32u*)pos=MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsr a2 + *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+8)=MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsr a2 +#if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop +#endif break; case t_SARb: - *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 - *(Bit32u*)(pos+4)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 24); // mov FC_RETOP, FC_RETOP, asr #24 + *(Bit32u*)pos=NOP; // nop +#if C_TARGETCPU == ARMV7LE + *(Bit32u*)(pos+4)=SXTB(FC_RETOP, HOST_a1, 0); // sxtb FC_RETOP, a1 *(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 - *(Bit32u*)(pos+12)=NOP; // nop +#else + *(Bit32u*)(pos+4)=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 + *(Bit32u*)(pos+8)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 24); // mov FC_RETOP, FC_RETOP, asr #24 + *(Bit32u*)(pos+12)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 +#endif break; case t_SARw: - *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 - *(Bit32u*)(pos+4)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, asr #16 + *(Bit32u*)pos=NOP; // nop +#if C_TARGETCPU == ARMV7LE + *(Bit32u*)(pos+4)=SXTH(FC_RETOP, HOST_a1, 0); // sxth FC_RETOP, a1 *(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 - *(Bit32u*)(pos+12)=NOP; // nop +#else + *(Bit32u*)(pos+4)=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 + *(Bit32u*)(pos+8)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, asr #16 + *(Bit32u*)(pos+12)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 +#endif break; case t_SARd: - *(Bit32u*)pos=MOV_REG_ASR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, asr a2 + *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, asr a2 +#if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop +#endif break; case t_RORb: +#if C_TARGETCPU == ARMV7LE + *(Bit32u*)pos=BFI(HOST_a1, HOST_a1, 8, 8); // bfi a1, a1, 8, 8 + *(Bit32u*)(pos+4)=BFI(HOST_a1, HOST_a1, 16, 16); // bfi a1, a1, 16, 16 + *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 +#else *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 *(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 8); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8 *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 +#endif break; case t_RORw: - *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 - *(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 - *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 - *(Bit32u*)(pos+12)=NOP; // nop + *(Bit32u*)pos=NOP; // nop +#if C_TARGETCPU == ARMV7LE + *(Bit32u*)(pos+4)=BFI(HOST_a1, HOST_a1, 16, 16); // bfi a1, a1, 16, 16 + *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 +#else + *(Bit32u*)(pos+4)=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 + *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 + *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 +#endif break; case t_RORd: - *(Bit32u*)pos=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 + *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 +#if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop +#endif break; case t_ROLw: +#if C_TARGETCPU == ARMV7LE + *(Bit32u*)pos=BFI(HOST_a1, HOST_a1, 16, 16); // bfi a1, a1, 16, 16 + *(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 + *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 +#else *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 *(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 +#endif break; case t_ROLd: - *(Bit32u*)pos=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 - *(Bit32u*)(pos+4)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop + *(Bit32u*)pos=NOP; // nop +#if C_TARGETCPU == ARMV7LE + *(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 + *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 +#else + *(Bit32u*)(pos+4)=NOP; // nop + *(Bit32u*)(pos+8)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 + *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 +#endif break; case t_NEGb: case t_NEGw: case t_NEGd: - *(Bit32u*)pos=RSB_IMM(FC_RETOP, HOST_a1, 0, 0); // rsb FC_RETOP, a1, #0 + *(Bit32u*)pos=NOP; // nop *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop + *(Bit32u*)(pos+8)=RSB_IMM(FC_RETOP, HOST_a1, 0, 0); // rsb FC_RETOP, a1, #0 +#if C_TARGETCPU != ARMV7LE *(Bit32u*)(pos+12)=NOP; // nop +#endif break; default: +#if C_TARGETCPU == ARMV7LE + *(Bit32u*)pos=MOVW(temp1, ((Bit32u)fct_ptr) & 0xffff); // movw temp1, #(fct_ptr & 0xffff) + *(Bit32u*)(pos+4)=MOVT(temp1, ((Bit32u)fct_ptr) >> 16); // movt temp1, #(fct_ptr >> 16) +#else *(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func +#endif break; } +#else +#if C_TARGETCPU == ARMV7LE + *(Bit32u*)pos=MOVW(temp1, ((Bit32u)fct_ptr) & 0xffff); // movw temp1, #(fct_ptr & 0xffff) + *(Bit32u*)(pos+4)=MOVT(temp1, ((Bit32u)fct_ptr) >> 16); // movt temp1, #(fct_ptr >> 16) #else *(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func #endif +#endif } #endif @@ -1043,7 +1263,7 @@ static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) { // the upper 24bit of the destination register can be destroyed // this function can use FC_OP1/FC_OP2 as dest_reg which are // not directly byte-accessible on some architectures -static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) { +static void gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) { cache_addd( LDRB_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrb dest_reg, [FC_REGS_ADDR, #index] } diff --git a/src/cpu/core_dynrec/risc_armv4le-s3.h b/src/cpu/core_dynrec/risc_armv4le-s3.h deleted file mode 100644 index 358c04d3..00000000 --- a/src/cpu/core_dynrec/risc_armv4le-s3.h +++ /dev/null @@ -1,918 +0,0 @@ -/* - * Copyright (C) 2002-2013 The DOSBox Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - - - -/* ARMv4 (little endian) backend by M-HT (speed-tweaked arm version) */ - - -// temporary registers -#define temp1 HOST_ip -#define temp2 HOST_v3 -#define temp3 HOST_v4 - -// register that holds function return values -#define FC_RETOP HOST_a1 - -// register used for address calculations, -#define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG - -// register that holds the first parameter -#define FC_OP1 HOST_a1 - -// register that holds the second parameter -#define FC_OP2 HOST_a2 - -// special register that holds the third parameter for _R3 calls (byte accessible) -#define FC_OP3 HOST_v2 - -// register that holds byte-accessible temporary values -#define FC_TMP_BA1 HOST_a1 - -// register that holds byte-accessible temporary values -#define FC_TMP_BA2 HOST_a2 - -// temporary register for LEA -#define TEMP_REG_DRC HOST_v2 - -#ifdef DRC_USE_REGS_ADDR -// used to hold the address of "cpu_regs" - preferably filled in function gen_run_code -#define FC_REGS_ADDR HOST_v7 -#endif - -#ifdef DRC_USE_SEGS_ADDR -// used to hold the address of "Segs" - preferably filled in function gen_run_code -#define FC_SEGS_ADDR HOST_v8 -#endif - - -// helper macro -#define ROTATE_SCALE(x) ( (x)?(32 - x):(0) ) - - -// instruction encodings - -// move -// mov dst, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 -#define MOV_IMM(dst, imm, rimm) (0xe3a00000 + ((dst) << 12) + (imm) + ((rimm) << 7) ) -// mov dst, src, lsl #imm -#define MOV_REG_LSL_IMM(dst, src, imm) (0xe1a00000 + ((dst) << 12) + (src) + ((imm) << 7) ) -// movs dst, src, lsl #imm -#define MOVS_REG_LSL_IMM(dst, src, imm) (0xe1b00000 + ((dst) << 12) + (src) + ((imm) << 7) ) -// mov dst, src, lsr #imm -#define MOV_REG_LSR_IMM(dst, src, imm) (0xe1a00020 + ((dst) << 12) + (src) + ((imm) << 7) ) -// mov dst, src, asr #imm -#define MOV_REG_ASR_IMM(dst, src, imm) (0xe1a00040 + ((dst) << 12) + (src) + ((imm) << 7) ) -// mov dst, src, lsl rreg -#define MOV_REG_LSL_REG(dst, src, rreg) (0xe1a00010 + ((dst) << 12) + (src) + ((rreg) << 8) ) -// mov dst, src, lsr rreg -#define MOV_REG_LSR_REG(dst, src, rreg) (0xe1a00030 + ((dst) << 12) + (src) + ((rreg) << 8) ) -// mov dst, src, asr rreg -#define MOV_REG_ASR_REG(dst, src, rreg) (0xe1a00050 + ((dst) << 12) + (src) + ((rreg) << 8) ) -// mov dst, src, ror rreg -#define MOV_REG_ROR_REG(dst, src, rreg) (0xe1a00070 + ((dst) << 12) + (src) + ((rreg) << 8) ) -// mvn dst, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 -#define MVN_IMM(dst, imm, rimm) (0xe3e00000 + ((dst) << 12) + (imm) + ((rimm) << 7) ) - -// arithmetic -// add dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 -#define ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) -// add dst, src1, src2, lsl #imm -#define ADD_REG_LSL_IMM(dst, src1, src2, imm) (0xe0800000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) -// sub dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 -#define SUB_IMM(dst, src, imm, rimm) (0xe2400000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) -// sub dst, src1, src2, lsl #imm -#define SUB_REG_LSL_IMM(dst, src1, src2, imm) (0xe0400000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) -// rsb dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 -#define RSB_IMM(dst, src, imm, rimm) (0xe2600000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) -// cmp src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 -#define CMP_IMM(src, imm, rimm) (0xe3500000 + ((src) << 16) + (imm) + ((rimm) << 7) ) -// nop -#define NOP MOV_REG_LSL_IMM(HOST_r0, HOST_r0, 0) - -// logical -// tst src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 -#define TST_IMM(src, imm, rimm) (0xe3100000 + ((src) << 16) + (imm) + ((rimm) << 7) ) -// and dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 -#define AND_IMM(dst, src, imm, rimm) (0xe2000000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) -// and dst, src1, src2, lsl #imm -#define AND_REG_LSL_IMM(dst, src1, src2, imm) (0xe0000000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) -// orr dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 -#define ORR_IMM(dst, src, imm, rimm) (0xe3800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) -// orr dst, src1, src2, lsl #imm -#define ORR_REG_LSL_IMM(dst, src1, src2, imm) (0xe1800000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) -// orr dst, src1, src2, lsr #imm -#define ORR_REG_LSR_IMM(dst, src1, src2, imm) (0xe1800020 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) -// eor dst, src1, src2, lsl #imm -#define EOR_REG_LSL_IMM(dst, src1, src2, imm) (0xe0200000 + ((dst) << 12) + ((src1) << 16) + (src2) + ((imm) << 7) ) -// bic dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 -#define BIC_IMM(dst, src, imm, rimm) (0xe3c00000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) - -// load -// ldr reg, [addr, #imm] @ 0 <= imm < 4096 -#define LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) ) -// ldrh reg, [addr, #imm] @ 0 <= imm < 256 -#define LDRH_IMM(reg, addr, imm) (0xe1d000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) ) -// ldrb reg, [addr, #imm] @ 0 <= imm < 4096 -#define LDRB_IMM(reg, addr, imm) (0xe5d00000 + ((reg) << 12) + ((addr) << 16) + (imm) ) - -// store -// str reg, [addr, #imm] @ 0 <= imm < 4096 -#define STR_IMM(reg, addr, imm) (0xe5800000 + ((reg) << 12) + ((addr) << 16) + (imm) ) -// strh reg, [addr, #imm] @ 0 <= imm < 256 -#define STRH_IMM(reg, addr, imm) (0xe1c000b0 + ((reg) << 12) + ((addr) << 16) + (((imm) & 0xf0) << 4) + ((imm) & 0x0f) ) -// strb reg, [addr, #imm] @ 0 <= imm < 4096 -#define STRB_IMM(reg, addr, imm) (0xe5c00000 + ((reg) << 12) + ((addr) << 16) + (imm) ) - -// branch -// beq pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 -#define BEQ_FWD(imm) (0x0a000000 + ((imm) >> 2) ) -// bne pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 -#define BNE_FWD(imm) (0x1a000000 + ((imm) >> 2) ) -// bgt pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 -#define BGT_FWD(imm) (0xca000000 + ((imm) >> 2) ) -// b pc+imm @ 0 <= imm < 32M & imm mod 4 = 0 -#define B_FWD(imm) (0xea000000 + ((imm) >> 2) ) -// bx reg -#define BX(reg) (0xe12fff10 + (reg) ) - - -// move a full register from reg_src to reg_dst -static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) { - if(reg_src == reg_dst) return; - cache_addd( MOV_REG_LSL_IMM(reg_dst, reg_src, 0) ); // mov reg_dst, reg_src -} - -// move a 32bit constant value into dest_reg -static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { - Bits first, scale; - if (imm == 0) { - cache_addd( MOV_IMM(dest_reg, 0, 0) ); // mov dest_reg, #0 - } else { - scale = 0; - first = 1; - while (imm) { - while ((imm & 3) == 0) { - imm>>=2; - scale+=2; - } - if (first) { - cache_addd( MOV_IMM(dest_reg, imm & 0xff, ROTATE_SCALE(scale)) ); // mov dest_reg, #((imm & 0xff) << scale) - first = 0; - } else { - cache_addd( ORR_IMM(dest_reg, dest_reg, imm & 0xff, ROTATE_SCALE(scale)) ); // orr dest_reg, dest_reg, #((imm & 0xff) << scale) - } - imm>>=8; - scale+=8; - } - } -} - -// helper function for gen_mov_word_to_reg -static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) { - // alignment.... - if (dword) { - if ((Bit32u)data & 3) { - if ( ((Bit32u)data & 3) == 2 ) { - cache_addd( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg] - cache_addd( LDRH_IMM(temp2, data_reg, 2) ); // ldrh temp2, [data_reg, #2] - cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 16) ); // orr dest_reg, dest_reg, temp2, lsl #16 - } else { - cache_addd( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg] - cache_addd( LDRH_IMM(temp2, data_reg, 1) ); // ldrh temp2, [data_reg, #1] - cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 8) ); // orr dest_reg, dest_reg, temp2, lsl #8 - cache_addd( LDRB_IMM(temp2, data_reg, 3) ); // ldrb temp2, [data_reg, #3] - cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 24) ); // orr dest_reg, dest_reg, temp2, lsl #24 - } - } else { - cache_addd( LDR_IMM(dest_reg, data_reg, 0) ); // ldr dest_reg, [data_reg] - } - } else { - if ((Bit32u)data & 1) { - cache_addd( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg] - cache_addd( LDRB_IMM(temp2, data_reg, 1) ); // ldrb temp2, [data_reg, #1] - cache_addd( ORR_REG_LSL_IMM(dest_reg, dest_reg, temp2, 8) ); // orr dest_reg, dest_reg, temp2, lsl #8 - } else { - cache_addd( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg] - } - } -} - -// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg -// 16bit moves may destroy the upper 16bit of the destination register -static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) { - gen_mov_dword_to_reg_imm(temp1, (Bit32u)data); - gen_mov_word_to_reg_helper(dest_reg, data, dword, temp1); -} - -// move a 16bit constant value into dest_reg -// the upper 16bit of the destination register may be destroyed -static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) { - gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm); -} - -// helper function for gen_mov_word_from_reg -static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) { - // alignment.... - if (dword) { - if ((Bit32u)dest & 3) { - if ( ((Bit32u)dest & 3) == 2 ) { - cache_addd( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg] - cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 16) ); // mov temp2, src_reg, lsr #16 - cache_addd( STRH_IMM(temp2, data_reg, 2) ); // strh temp2, [data_reg, #2] - } else { - cache_addd( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg] - cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 8) ); // mov temp2, src_reg, lsr #8 - cache_addd( STRH_IMM(temp2, data_reg, 1) ); // strh temp2, [data_reg, #1] - cache_addd( MOV_REG_LSR_IMM(temp2, temp2, 16) ); // mov temp2, temp2, lsr #16 - cache_addd( STRB_IMM(temp2, data_reg, 3) ); // strb temp2, [data_reg, #3] - } - } else { - cache_addd( STR_IMM(src_reg, data_reg, 0) ); // str src_reg, [data_reg] - } - } else { - if ((Bit32u)dest & 1) { - cache_addd( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg] - cache_addd( MOV_REG_LSR_IMM(temp2, src_reg, 8) ); // mov temp2, src_reg, lsr #8 - cache_addd( STRB_IMM(temp2, data_reg, 1) ); // strb temp2, [data_reg, #1] - } else { - cache_addd( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg] - } - } -} - -// move 32bit (dword==true) or 16bit (dword==false) of a register into memory -static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { - gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); - gen_mov_word_from_reg_helper(src_reg, dest, dword, temp1); -} - -// move an 8bit value from memory into dest_reg -// the upper 24bit of the destination register can be destroyed -// this function does not use FC_OP1/FC_OP2 as dest_reg as these -// registers might not be directly byte-accessible on some architectures -static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) { - gen_mov_dword_to_reg_imm(temp1, (Bit32u)data); - cache_addd( LDRB_IMM(dest_reg, temp1, 0) ); // ldrb dest_reg, [temp1] -} - -// move an 8bit value from memory into dest_reg -// the upper 24bit of the destination register can be destroyed -// this function can use FC_OP1/FC_OP2 as dest_reg which are -// not directly byte-accessible on some architectures -static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) { - gen_mov_byte_to_reg_low(dest_reg, data); -} - -// move an 8bit constant value into dest_reg -// the upper 24bit of the destination register can be destroyed -// this function does not use FC_OP1/FC_OP2 as dest_reg as these -// registers might not be directly byte-accessible on some architectures -static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) { - cache_addd( MOV_IMM(dest_reg, imm, 0) ); // mov dest_reg, #(imm) -} - -// move an 8bit constant value into dest_reg -// the upper 24bit of the destination register can be destroyed -// this function can use FC_OP1/FC_OP2 as dest_reg which are -// not directly byte-accessible on some architectures -static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) { - gen_mov_byte_to_reg_low_imm(dest_reg, imm); -} - -// move the lowest 8bit of a register into memory -static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) { - gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); - cache_addd( STRB_IMM(src_reg, temp1, 0) ); // strb src_reg, [temp1] -} - - - -// convert an 8bit word to a 32bit dword -// the register is zero-extended (sign==false) or sign-extended (sign==true) -static void gen_extend_byte(bool sign,HostReg reg) { - if (sign) { - cache_addd( MOV_REG_LSL_IMM(reg, reg, 24) ); // mov reg, reg, lsl #24 - cache_addd( MOV_REG_ASR_IMM(reg, reg, 24) ); // mov reg, reg, asr #24 - } else { - cache_addd( AND_IMM(reg, reg, 0xff, 0) ); // and reg, reg, #0xff - } -} - -// convert a 16bit word to a 32bit dword -// the register is zero-extended (sign==false) or sign-extended (sign==true) -static void gen_extend_word(bool sign,HostReg reg) { - if (sign) { - cache_addd( MOV_REG_LSL_IMM(reg, reg, 16) ); // mov reg, reg, lsl #16 - cache_addd( MOV_REG_ASR_IMM(reg, reg, 16) ); // mov reg, reg, asr #16 - } else { - cache_addd( MOV_REG_LSL_IMM(reg, reg, 16) ); // mov reg, reg, lsl #16 - cache_addd( MOV_REG_LSR_IMM(reg, reg, 16) ); // mov reg, reg, lsr #16 - } -} - -// add a 32bit value from memory to a full register -static void gen_add(HostReg reg,void* op) { - gen_mov_word_to_reg(temp3, op, 1); - cache_addd( ADD_REG_LSL_IMM(reg, reg, temp3, 0) ); // add reg, reg, temp3 -} - -// add a 32bit constant value to a full register -static void gen_add_imm(HostReg reg,Bit32u imm) { - Bits scale; - if(!imm) return; - if (imm == 0xffffffff) { - cache_addd( SUB_IMM(reg, reg, 1, 0) ); // sub reg, reg, #1 - } else { - scale = 0; - while (imm) { - while ((imm & 3) == 0) { - imm>>=2; - scale+=2; - } - cache_addd( ADD_IMM(reg, reg, imm & 0xff, ROTATE_SCALE(scale)) ); // add reg, reg, #((imm & 0xff) << scale) - imm>>=8; - scale+=8; - } - } -} - -// and a 32bit constant value with a full register -static void gen_and_imm(HostReg reg,Bit32u imm) { - Bits scale; - Bit32u imm2; - imm2 = ~imm; - if(!imm2) return; - if (!imm) { - cache_addd( MOV_IMM(reg, 0, 0) ); // mov reg, #0 - } else { - scale = 0; - while (imm2) { - while ((imm2 & 3) == 0) { - imm2>>=2; - scale+=2; - } - cache_addd( BIC_IMM(reg, reg, imm2 & 0xff, ROTATE_SCALE(scale)) ); // bic reg, reg, #((imm2 & 0xff) << scale) - imm2>>=8; - scale+=8; - } - } -} - - -// move a 32bit constant value into memory -static void gen_mov_direct_dword(void* dest,Bit32u imm) { - gen_mov_dword_to_reg_imm(temp3, imm); - gen_mov_word_from_reg(temp3, dest, 1); -} - -// move an address into memory -static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) { - gen_mov_direct_dword(dest,(Bit32u)imm); -} - -// add an 8bit constant value to a dword memory value -static void gen_add_direct_byte(void* dest,Bit8s imm) { - if(!imm) return; - gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); - gen_mov_word_to_reg_helper(temp3, dest, 1, temp1); - if (imm >= 0) { - cache_addd( ADD_IMM(temp3, temp3, (Bit32s)imm, 0) ); // add temp3, temp3, #(imm) - } else { - cache_addd( SUB_IMM(temp3, temp3, -((Bit32s)imm), 0) ); // sub temp3, temp3, #(-imm) - } - gen_mov_word_from_reg_helper(temp3, dest, 1, temp1); -} - -// add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value -static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) { - if(!imm) return; - if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { - gen_add_direct_byte(dest,(Bit8s)imm); - return; - } - gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); - gen_mov_word_to_reg_helper(temp3, dest, dword, temp1); - // maybe use function gen_add_imm - if (dword) { - gen_mov_dword_to_reg_imm(temp2, imm); - } else { - gen_mov_word_to_reg_imm(temp2, (Bit16u)imm); - } - cache_addd( ADD_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // add temp3, temp3, temp2 - gen_mov_word_from_reg_helper(temp3, dest, dword, temp1); -} - -// subtract an 8bit constant value from a dword memory value -static void gen_sub_direct_byte(void* dest,Bit8s imm) { - if(!imm) return; - gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); - gen_mov_word_to_reg_helper(temp3, dest, 1, temp1); - if (imm >= 0) { - cache_addd( SUB_IMM(temp3, temp3, (Bit32s)imm, 0) ); // sub temp3, temp3, #(imm) - } else { - cache_addd( ADD_IMM(temp3, temp3, -((Bit32s)imm), 0) ); // add temp3, temp3, #(-imm) - } - gen_mov_word_from_reg_helper(temp3, dest, 1, temp1); -} - -// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value -static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { - if(!imm) return; - if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { - gen_sub_direct_byte(dest,(Bit8s)imm); - return; - } - gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); - gen_mov_word_to_reg_helper(temp3, dest, dword, temp1); - // maybe use function gen_add_imm/gen_sub_imm - if (dword) { - gen_mov_dword_to_reg_imm(temp2, imm); - } else { - gen_mov_word_to_reg_imm(temp2, (Bit16u)imm); - } - cache_addd( SUB_REG_LSL_IMM(temp3, temp3, temp2, 0) ); // sub temp3, temp3, temp2 - gen_mov_word_from_reg_helper(temp3, dest, dword, temp1); -} - -// effective address calculation, destination is dest_reg -// scale_reg is scaled by scale (scale_reg*(2^scale)) and -// added to dest_reg, then the immediate value is added -static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) { - cache_addd( ADD_REG_LSL_IMM(dest_reg, dest_reg, scale_reg, scale) ); // add dest_reg, dest_reg, scale_reg, lsl #(scale) - gen_add_imm(dest_reg, imm); -} - -// effective address calculation, destination is dest_reg -// dest_reg is scaled by scale (dest_reg*(2^scale)), -// then the immediate value is added -static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) { - if (scale) { - cache_addd( MOV_REG_LSL_IMM(dest_reg, dest_reg, scale) ); // mov dest_reg, dest_reg, lsl #(scale) - } - gen_add_imm(dest_reg, imm); -} - -// generate a call to a parameterless function -static void INLINE gen_call_function_raw(void * func) { - cache_addd( LDR_IMM(temp1, HOST_pc, 4) ); // ldr temp1, [pc, #4] - cache_addd( ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4 - cache_addd( BX(temp1) ); // bx temp1 - cache_addd((Bit32u)func); // .int func -} - -// generate a call to a function with paramcount parameters -// note: the parameters are loaded in the architecture specific way -// using the gen_load_param_ functions below -static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) { - Bit32u proc_addr = (Bit32u)cache.pos; - gen_call_function_raw(func); - return proc_addr; -} - -#if (1) -// max of 4 parameters in a1-a4 - -// load an immediate value as param'th function parameter -static void INLINE gen_load_param_imm(Bitu imm,Bitu param) { - gen_mov_dword_to_reg_imm(param, imm); -} - -// load an address as param'th function parameter -static void INLINE gen_load_param_addr(Bitu addr,Bitu param) { - gen_mov_dword_to_reg_imm(param, addr); -} - -// load a host-register as param'th function parameter -static void INLINE gen_load_param_reg(Bitu reg,Bitu param) { - gen_mov_regs(param, reg); -} - -// load a value from memory as param'th function parameter -static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { - gen_mov_word_to_reg(param, (void *)mem, 1); -} -#else - other arm abis -#endif - -// jump to an address pointed at by ptr, offset is in imm -static void gen_jmp_ptr(void * ptr,Bits imm=0) { - Bits scale; - Bitu imm2; - gen_mov_word_to_reg(temp3, ptr, 1); - - if (imm) { - scale = 0; - imm2 = (Bitu)imm; - while (imm2) { - while ((imm2 & 3) == 0) { - imm2>>=2; - scale+=2; - } - cache_addd( ADD_IMM(temp3, temp3, imm2 & 0xff, ROTATE_SCALE(scale)) ); // add temp3, temp3, #((imm2 & 0xff) << scale) - imm2>>=8; - scale+=8; - } - } - -#if (1) -// (*ptr) should be word aligned - if ((imm & 0x03) == 0) { - cache_addd( LDR_IMM(temp1, temp3, 0) ); // ldr temp1, [temp3] - } else -#endif - { - cache_addd( LDRB_IMM(temp1, temp3, 0) ); // ldrb temp1, [temp3] - cache_addd( LDRB_IMM(temp2, temp3, 1) ); // ldrb temp2, [temp3, #1] - cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 8) ); // orr temp1, temp1, temp2, lsl #8 - cache_addd( LDRB_IMM(temp2, temp3, 2) ); // ldrb temp2, [temp3, #2] - cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 16) ); // orr temp1, temp1, temp2, lsl #16 - cache_addd( LDRB_IMM(temp2, temp3, 3) ); // ldrb temp2, [temp3, #3] - cache_addd( ORR_REG_LSL_IMM(temp1, temp1, temp2, 24) ); // orr temp1, temp1, temp2, lsl #24 - } - - cache_addd( BX(temp1) ); // bx temp1 -} - -// short conditional jump (+-127 bytes) if register is zero -// the destination is set by gen_fill_branch() later -static Bit32u gen_create_branch_on_zero(HostReg reg,bool dword) { - if (dword) { - cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0 - } else { - cache_addd( MOVS_REG_LSL_IMM(temp1, reg, 16) ); // movs temp1, reg, lsl #16 - } - cache_addd( BEQ_FWD(0) ); // beq j - return ((Bit32u)cache.pos-4); -} - -// short conditional jump (+-127 bytes) if register is nonzero -// the destination is set by gen_fill_branch() later -static Bit32u gen_create_branch_on_nonzero(HostReg reg,bool dword) { - if (dword) { - cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0 - } else { - cache_addd( MOVS_REG_LSL_IMM(temp1, reg, 16) ); // movs temp1, reg, lsl #16 - } - cache_addd( BNE_FWD(0) ); // bne j - return ((Bit32u)cache.pos-4); -} - -// calculate relative offset and fill it into the location pointed to by data -static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) { -#if C_DEBUG - Bits len=(Bit32u)cache.pos-(data+8); - if (len<0) len=-len; - if (len>0x02000000) LOG_MSG("Big jump %d",len); -#endif - *(Bit32u*)data=( (*(Bit32u*)data) & 0xff000000 ) | ( ( ((Bit32u)cache.pos - (data+8)) >> 2 ) & 0x00ffffff ); -} - -// conditional jump if register is nonzero -// for isdword==true the 32bit of the register are tested -// for isdword==false the lowest 8bit of the register are tested -static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) { - if (isdword) { - cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0 - } else { - cache_addd( TST_IMM(reg, 0xff, 0) ); // tst reg, #0xff - } - cache_addd( BEQ_FWD(8) ); // beq nobranch (pc +8) - cache_addd( LDR_IMM(temp1, HOST_pc, 0) ); // ldr temp1, [pc, #0] - cache_addd( BX(temp1) ); // bx temp1 - cache_addd(0); // fill j - // nobranch: - return ((Bit32u)cache.pos-4); -} - -// compare 32bit-register against zero and jump if value less/equal than zero -static Bit32u gen_create_branch_long_leqzero(HostReg reg) { - cache_addd( CMP_IMM(reg, 0, 0) ); // cmp reg, #0 - cache_addd( BGT_FWD(8) ); // bgt nobranch (pc+8) - cache_addd( LDR_IMM(temp1, HOST_pc, 0) ); // ldr temp1, [pc, #0] - cache_addd( BX(temp1) ); // bx temp1 - cache_addd(0); // fill j - // nobranch: - return ((Bit32u)cache.pos-4); -} - -// calculate long relative offset and fill it into the location pointed to by data -static void INLINE gen_fill_branch_long(Bit32u data) { - // this is an absolute branch - *(Bit32u*)data=(Bit32u)cache.pos; -} - -static void gen_run_code(void) { - cache_addd(0xe92d4000); // stmfd sp!, {lr} - cache_addd(0xe92d0cf0); // stmfd sp!, {v1-v4,v7,v8} - - // adr: 8 - cache_addd( LDR_IMM(FC_SEGS_ADDR, HOST_pc, 64 - (8 + 8)) ); // ldr FC_SEGS_ADDR, [pc, #(&Segs)] - // adr: 12 - cache_addd( LDR_IMM(FC_REGS_ADDR, HOST_pc, 68 - (12 + 8)) ); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)] - - cache_addd( ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4 - cache_addd(0xe92d4000); // stmfd sp!, {lr} - cache_addd( BX(HOST_r0) ); // bx r0 - - cache_addd(0xe8bd0cf0); // ldmfd sp!, {v1-v4,v7,v8} - - cache_addd(0xe8bd4000); // ldmfd sp!, {lr} - cache_addd( BX(HOST_lr) ); // bx lr - - // fill up to 64 bytes - cache_addd( NOP ); // nop - cache_addd( NOP ); // nop - cache_addd( NOP ); // nop - cache_addd( NOP ); // nop - cache_addd( NOP ); // nop - cache_addd( NOP ); // nop - - // adr: 64 - cache_addd((Bit32u)&Segs); // address of "Segs" - // adr: 68 - cache_addd((Bit32u)&cpu_regs); // address of "cpu_regs" -} - -// return from a function -static void gen_return_function(void) { - cache_addd(0xe8bd4000); // ldmfd sp!, {lr} - cache_addd( BX(HOST_lr) ); // bx lr -} - -#ifdef DRC_FLAGS_INVALIDATION - -// called when a call to a function can be replaced by a -// call to a simpler function -static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { -#ifdef DRC_FLAGS_INVALIDATION_DCODE - // try to avoid function calls but rather directly fill in code - switch (flags_type) { - case t_ADDb: - case t_ADDw: - case t_ADDd: - *(Bit32u*)pos=ADD_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // add FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_ORb: - case t_ORw: - case t_ORd: - *(Bit32u*)pos=ORR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // orr FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_ANDb: - case t_ANDw: - case t_ANDd: - *(Bit32u*)pos=AND_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // and FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_SUBb: - case t_SUBw: - case t_SUBd: - *(Bit32u*)pos=SUB_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // sub FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_XORb: - case t_XORw: - case t_XORd: - *(Bit32u*)pos=EOR_REG_LSL_IMM(FC_RETOP, HOST_a1, HOST_a2, 0); // eor FC_RETOP, a1, a2 - *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_CMPb: - case t_CMPw: - case t_CMPd: - case t_TESTb: - case t_TESTw: - case t_TESTd: - *(Bit32u*)pos=B_FWD(8); // b (pc+2*4) - break; - case t_INCb: - case t_INCw: - case t_INCd: - *(Bit32u*)pos=ADD_IMM(FC_RETOP, HOST_a1, 1, 0); // add FC_RETOP, a1, #1 - *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_DECb: - case t_DECw: - case t_DECd: - *(Bit32u*)pos=SUB_IMM(FC_RETOP, HOST_a1, 1, 0); // sub FC_RETOP, a1, #1 - *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_SHLb: - case t_SHLw: - case t_SHLd: - *(Bit32u*)pos=MOV_REG_LSL_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsl a2 - *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_SHRb: - *(Bit32u*)pos=AND_IMM(FC_RETOP, HOST_a1, 0xff, 0); // and FC_RETOP, a1, #0xff - *(Bit32u*)(pos+4)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2 - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_SHRw: - *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 - *(Bit32u*)(pos+4)=MOV_REG_LSR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, lsr #16 - *(Bit32u*)(pos+8)=MOV_REG_LSR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, lsr a2 - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_SHRd: - *(Bit32u*)pos=MOV_REG_LSR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, lsr a2 - *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_SARb: - *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 - *(Bit32u*)(pos+4)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 24); // mov FC_RETOP, FC_RETOP, asr #24 - *(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_SARw: - *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 - *(Bit32u*)(pos+4)=MOV_REG_ASR_IMM(FC_RETOP, FC_RETOP, 16); // mov FC_RETOP, FC_RETOP, asr #16 - *(Bit32u*)(pos+8)=MOV_REG_ASR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, asr a2 - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_SARd: - *(Bit32u*)pos=MOV_REG_ASR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, asr a2 - *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_RORb: - *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 24); // mov FC_RETOP, a1, lsl #24 - *(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 8); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8 - *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 - *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 - break; - case t_RORw: - *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 - *(Bit32u*)(pos+4)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 - *(Bit32u*)(pos+8)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_RORd: - *(Bit32u*)pos=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 - *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_ROLw: - *(Bit32u*)pos=MOV_REG_LSL_IMM(FC_RETOP, HOST_a1, 16); // mov FC_RETOP, a1, lsl #16 - *(Bit32u*)(pos+4)=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 - *(Bit32u*)(pos+8)=ORR_REG_LSR_IMM(FC_RETOP, FC_RETOP, FC_RETOP, 16); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 - *(Bit32u*)(pos+12)=MOV_REG_ROR_REG(FC_RETOP, FC_RETOP, HOST_a2); // mov FC_RETOP, FC_RETOP, ror a2 - break; - case t_ROLd: - *(Bit32u*)pos=RSB_IMM(HOST_a2, HOST_a2, 32, 0); // rsb a2, a2, #32 - *(Bit32u*)(pos+4)=MOV_REG_ROR_REG(FC_RETOP, HOST_a1, HOST_a2); // mov FC_RETOP, a1, ror a2 - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - case t_NEGb: - case t_NEGw: - case t_NEGd: - *(Bit32u*)pos=RSB_IMM(FC_RETOP, HOST_a1, 0, 0); // rsb FC_RETOP, a1, #0 - *(Bit32u*)(pos+4)=NOP; // nop - *(Bit32u*)(pos+8)=NOP; // nop - *(Bit32u*)(pos+12)=NOP; // nop - break; - default: - *(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func - break; - - } -#else - *(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func -#endif -} -#endif - -static void cache_block_before_close(void) { } - -#ifdef DRC_USE_SEGS_ADDR - -// mov 16bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 2 must be zero) -// 16bit moves may destroy the upper 16bit of the destination register -static void gen_mov_seg16_to_reg(HostReg dest_reg,Bitu index) { - cache_addd( LDRH_IMM(dest_reg, FC_SEGS_ADDR, index) ); // ldrh dest_reg, [FC_SEGS_ADDR, #index] -} - -// mov 32bit value from Segs[index] into dest_reg using FC_SEGS_ADDR (index modulo 4 must be zero) -static void gen_mov_seg32_to_reg(HostReg dest_reg,Bitu index) { - cache_addd( LDR_IMM(dest_reg, FC_SEGS_ADDR, index) ); // ldr dest_reg, [FC_SEGS_ADDR, #index] -} - -// add a 32bit value from Segs[index] to a full register using FC_SEGS_ADDR (index modulo 4 must be zero) -static void gen_add_seg32_to_reg(HostReg reg,Bitu index) { - cache_addd( LDR_IMM(temp1, FC_SEGS_ADDR, index) ); // ldr temp1, [FC_SEGS_ADDR, #index] - cache_addd( ADD_REG_LSL_IMM(reg, reg, temp1, 0) ); // add reg, reg, temp1 -} - -#endif - -#ifdef DRC_USE_REGS_ADDR - -// mov 16bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 2 must be zero) -// 16bit moves may destroy the upper 16bit of the destination register -static void gen_mov_regval16_to_reg(HostReg dest_reg,Bitu index) { - cache_addd( LDRH_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrh dest_reg, [FC_REGS_ADDR, #index] -} - -// mov 32bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (index modulo 4 must be zero) -static void gen_mov_regval32_to_reg(HostReg dest_reg,Bitu index) { - cache_addd( LDR_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldr dest_reg, [FC_REGS_ADDR, #index] -} - -// move a 32bit (dword==true) or 16bit (dword==false) value from cpu_regs[index] into dest_reg using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero) -// 16bit moves may destroy the upper 16bit of the destination register -static void gen_mov_regword_to_reg(HostReg dest_reg,Bitu index,bool dword) { - if (dword) { - cache_addd( LDR_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldr dest_reg, [FC_REGS_ADDR, #index] - } else { - cache_addd( LDRH_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrh dest_reg, [FC_REGS_ADDR, #index] - } -} - -// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR -// the upper 24bit of the destination register can be destroyed -// this function does not use FC_OP1/FC_OP2 as dest_reg as these -// registers might not be directly byte-accessible on some architectures -static void gen_mov_regbyte_to_reg_low(HostReg dest_reg,Bitu index) { - cache_addd( LDRB_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrb dest_reg, [FC_REGS_ADDR, #index] -} - -// move an 8bit value from cpu_regs[index] into dest_reg using FC_REGS_ADDR -// the upper 24bit of the destination register can be destroyed -// this function can use FC_OP1/FC_OP2 as dest_reg which are -// not directly byte-accessible on some architectures -static void INLINE gen_mov_regbyte_to_reg_low_canuseword(HostReg dest_reg,Bitu index) { - cache_addd( LDRB_IMM(dest_reg, FC_REGS_ADDR, index) ); // ldrb dest_reg, [FC_REGS_ADDR, #index] -} - - -// add a 32bit value from cpu_regs[index] to a full register using FC_REGS_ADDR (index modulo 4 must be zero) -static void gen_add_regval32_to_reg(HostReg reg,Bitu index) { - cache_addd( LDR_IMM(temp2, FC_REGS_ADDR, index) ); // ldr temp2, [FC_REGS_ADDR, #index] - cache_addd( ADD_REG_LSL_IMM(reg, reg, temp2, 0) ); // add reg, reg, temp2 -} - - -// move 16bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 2 must be zero) -static void gen_mov_regval16_from_reg(HostReg src_reg,Bitu index) { - cache_addd( STRH_IMM(src_reg, FC_REGS_ADDR, index) ); // strh src_reg, [FC_REGS_ADDR, #index] -} - -// move 32bit of register into cpu_regs[index] using FC_REGS_ADDR (index modulo 4 must be zero) -static void gen_mov_regval32_from_reg(HostReg src_reg,Bitu index) { - cache_addd( STR_IMM(src_reg, FC_REGS_ADDR, index) ); // str src_reg, [FC_REGS_ADDR, #index] -} - -// move 32bit (dword==true) or 16bit (dword==false) of a register into cpu_regs[index] using FC_REGS_ADDR (if dword==true index modulo 4 must be zero) (if dword==false index modulo 2 must be zero) -static void gen_mov_regword_from_reg(HostReg src_reg,Bitu index,bool dword) { - if (dword) { - cache_addd( STR_IMM(src_reg, FC_REGS_ADDR, index) ); // str src_reg, [FC_REGS_ADDR, #index] - } else { - cache_addd( STRH_IMM(src_reg, FC_REGS_ADDR, index) ); // strh src_reg, [FC_REGS_ADDR, #index] - } -} - -// move the lowest 8bit of a register into cpu_regs[index] using FC_REGS_ADDR -static void gen_mov_regbyte_from_reg_low(HostReg src_reg,Bitu index) { - cache_addd( STRB_IMM(src_reg, FC_REGS_ADDR, index) ); // strb src_reg, [FC_REGS_ADDR, #index] -} - -#endif diff --git a/src/cpu/core_dynrec/risc_armv4le-thumb-iw.h b/src/cpu/core_dynrec/risc_armv4le-thumb-iw.h index 6ead4b79..f962c5bf 100644 --- a/src/cpu/core_dynrec/risc_armv4le-thumb-iw.h +++ b/src/cpu/core_dynrec/risc_armv4le-thumb-iw.h @@ -50,15 +50,14 @@ // temporary register for LEA #define TEMP_REG_DRC HOST_a4 -#ifdef DRC_USE_REGS_ADDR // used to hold the address of "cpu_regs" - preferably filled in function gen_run_code #define FC_REGS_ADDR HOST_v7 -#endif -#ifdef DRC_USE_SEGS_ADDR // used to hold the address of "Segs" - preferably filled in function gen_run_code #define FC_SEGS_ADDR HOST_v8 -#endif + +// used to hold the address of "core_dynrec.readdata" - filled in function gen_run_code +#define readdata_addr HOST_v5 // instruction encodings @@ -98,10 +97,14 @@ // logical // and dst, src #define AND(dst, src) (0x4000 + (dst) + ((src) << 3) ) +// bic dst, src +#define BIC(dst, src) (0x4380 + (dst) + ((src) << 3) ) // eor dst, src #define EOR(dst, src) (0x4040 + (dst) + ((src) << 3) ) // orr dst, src #define ORR(dst, src) (0x4300 + (dst) + ((src) << 3) ) +// mvn dst, src +#define MVN(dst, src) (0x43c0 + (dst) + ((src) << 3) ) // shift/rotate // lsl dst, src, #imm @@ -128,6 +131,8 @@ #define LDRB_IMM(reg, addr, imm) (0x7800 + (reg) + ((addr) << 3) + ((imm) << 6) ) // ldr reg, [pc, #imm] @ 0 <= imm < 1024 & imm mod 4 = 0 #define LDR_PC_IMM(reg, imm) (0x4800 + ((reg) << 8) + ((imm) >> 2) ) +// ldr reg, [addr1, addr2] +#define LDR_REG(reg, addr1, addr2) (0x5800 + (reg) + ((addr1) << 3) + ((addr2) << 6) ) // store // str reg, [addr, #imm] @ 0 <= imm < 128 & imm mod 4 = 0 @@ -150,6 +155,25 @@ #define BX(reg) (0x4700 + ((reg) << 3) ) +// arm instructions + +// arithmetic +// add dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 +#define ARM_ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) + +// load +// ldr reg, [addr, #imm] @ 0 <= imm < 4096 +#define ARM_LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) ) + +// store +// str reg, [addr, #-(imm)]! @ 0 <= imm < 4096 +#define ARM_STR_IMM_M_W(reg, addr, imm) (0xe5200000 + ((reg) << 12) + ((addr) << 16) + (imm) ) + +// branch +// bx reg +#define ARM_BX(reg) (0xe12fff10 + (reg) ) + + // data pool defines #define CACHE_DATA_JUMP (2) #define CACHE_DATA_ALIGN (32) @@ -193,7 +217,7 @@ static void cache_checkinstr(Bit32u size) { cache_datapos = (Bit8u *) (((Bitu)cache.block.active->cache.start + cache.block.active->cache.size - CACHE_DATA_ALIGN) & ~(CACHE_DATA_ALIGN - 1)); } else { register Bit32u cachemodsize; - + cachemodsize = (cache.pos - cache.block.active->cache.start) & (CACHE_MAXSIZE - 1); if (cachemodsize + CACHE_DATA_MAX + CACHE_DATA_ALIGN <= CACHE_MAXSIZE || @@ -275,30 +299,49 @@ static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) { cache_addw( MOV_REG(reg_dst, reg_src) ); // mov reg_dst, reg_src } +// helper function +static bool val_single_shift(Bit32u value, Bit32u *val_shift) { + Bit32u shift; + + if (GCC_UNLIKELY(value == 0)) { + *val_shift = 0; + return true; + } + + shift = 0; + while ((value & 1) == 0) { + value>>=1; + shift+=1; + } + + if ((value >> 8) != 0) return false; + + *val_shift = shift; + return true; +} + // move a 32bit constant value into dest_reg static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { - if ((imm & 0xffffff00) == 0) { + Bit32u scale; + + if (imm < 256) { cache_checkinstr(2); cache_addw( MOV_IMM(dest_reg, imm) ); // mov dest_reg, #(imm) - } else if ((imm & 0xffff00ff) == 0) { + } else if ((~imm) < 256) { cache_checkinstr(4); - cache_addw( MOV_IMM(dest_reg, imm >> 8) ); // mov dest_reg, #(imm >> 8) - cache_addw( LSL_IMM(dest_reg, dest_reg, 8) ); // lsl dest_reg, dest_reg, #8 - } else if ((imm & 0xff00ffff) == 0) { + cache_addw( MOV_IMM(dest_reg, ~imm) ); // mov dest_reg, #(~imm) + cache_addw( MVN(dest_reg, dest_reg) ); // mvn dest_reg, dest_reg + } else if (val_single_shift(imm, &scale)) { cache_checkinstr(4); - cache_addw( MOV_IMM(dest_reg, imm >> 16) ); // mov dest_reg, #(imm >> 16) - cache_addw( LSL_IMM(dest_reg, dest_reg, 16) ); // lsl dest_reg, dest_reg, #16 - } else if ((imm & 0x00ffffff) == 0) { - cache_checkinstr(4); - cache_addw( MOV_IMM(dest_reg, imm >> 24) ); // mov dest_reg, #(imm >> 24) - cache_addw( LSL_IMM(dest_reg, dest_reg, 24) ); // lsl dest_reg, dest_reg, #24 + cache_addw( MOV_IMM(dest_reg, imm >> scale) ); // mov dest_reg, #(imm >> scale) + cache_addw( LSL_IMM(dest_reg, dest_reg, scale) ); // lsl dest_reg, dest_reg, #scale } else { Bit32u diff; cache_checkinstr(4); diff = imm - ((Bit32u)cache.pos+4); - + if ((diff < 1024) && ((imm & 0x03) == 0)) { if (((Bit32u)cache.pos & 0x03) == 0) { cache_addw( ADD_LO_PC_IMM(dest_reg, diff >> 2) ); // add dest_reg, pc, #(diff >> 2) @@ -321,10 +364,61 @@ static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { } } +// helper function +static bool gen_mov_memval_to_reg_helper(HostReg dest_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) { + switch (size) { + case 4: +#if !defined(C_UNALIGNED_MEMORY) + if ((data & 3) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) { + cache_checkinstr(4); + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( LDR_IMM(dest_reg, templo2, data - addr_data) ); // ldr dest_reg, [templo2, #(data - addr_data)] + return true; + } + } + break; + case 2: +#if !defined(C_UNALIGNED_MEMORY) + if ((data & 1) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) { + cache_checkinstr(4); + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( LDRH_IMM(dest_reg, templo2, data - addr_data) ); // ldrh dest_reg, [templo2, #(data - addr_data)] + return true; + } + } + break; + case 1: + if ((data >= addr_data) && (data < addr_data + 32)) { + cache_checkinstr(4); + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( LDRB_IMM(dest_reg, templo2, data - addr_data) ); // ldrb dest_reg, [templo2, #(data - addr_data)] + return true; + } + default: + break; + } + return false; +} + +// helper function +static bool gen_mov_memval_to_reg(HostReg dest_reg, void *data, Bitu size) { + if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true; + if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true; + if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true; + return false; +} + // helper function for gen_mov_word_to_reg static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) { // alignment.... if (dword) { +#if !defined(C_UNALIGNED_MEMORY) if ((Bit32u)data & 3) { if ( ((Bit32u)data & 3) == 2 ) { cache_checkinstr(8); @@ -343,18 +437,23 @@ static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,Ho cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1 } - } else { + } else +#endif + { cache_checkinstr(2); cache_addw( LDR_IMM(dest_reg, data_reg, 0) ); // ldr dest_reg, [data_reg] } } else { +#if !defined(C_UNALIGNED_MEMORY) if ((Bit32u)data & 1) { cache_checkinstr(8); cache_addw( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg] cache_addw( LDRB_IMM(templo1, data_reg, 1) ); // ldrb templo1, [data_reg, #1] cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1 - } else { + } else +#endif + { cache_checkinstr(2); cache_addw( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg] } @@ -364,8 +463,10 @@ static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,Ho // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg // 16bit moves may destroy the upper 16bit of the destination register static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) { - gen_mov_dword_to_reg_imm(templo2, (Bit32u)data); - gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2); + if (!gen_mov_memval_to_reg(dest_reg, data, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)data); + gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2); + } } // move a 16bit constant value into dest_reg @@ -374,10 +475,61 @@ static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) { gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm); } +// helper function +static bool gen_mov_memval_from_reg_helper(HostReg src_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) { + switch (size) { + case 4: +#if !defined(C_UNALIGNED_MEMORY) + if ((data & 3) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) { + cache_checkinstr(4); + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( STR_IMM(src_reg, templo2, data - addr_data) ); // str src_reg, [templo2, #(data - addr_data)] + return true; + } + } + break; + case 2: +#if !defined(C_UNALIGNED_MEMORY) + if ((data & 1) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) { + cache_checkinstr(4); + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( STRH_IMM(src_reg, templo2, data - addr_data) ); // strh src_reg, [templo2, #(data - addr_data)] + return true; + } + } + break; + case 1: + if ((data >= addr_data) && (data < addr_data + 32)) { + cache_checkinstr(4); + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( STRB_IMM(src_reg, templo2, data - addr_data) ); // strb src_reg, [templo2, #(data - addr_data)] + return true; + } + default: + break; + } + return false; +} + +// helper function +static bool gen_mov_memval_from_reg(HostReg src_reg, void *dest, Bitu size) { + if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true; + if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true; + if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true; + return false; +} + // helper function for gen_mov_word_from_reg static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) { // alignment.... if (dword) { +#if !defined(C_UNALIGNED_MEMORY) if ((Bit32u)dest & 3) { if ( ((Bit32u)dest & 3) == 2 ) { cache_checkinstr(8); @@ -398,18 +550,23 @@ static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, cache_addw( LSR_IMM(templo1, templo1, 24) ); // lsr templo1, templo1, #24 cache_addw( STRB_IMM(templo1, data_reg, 3) ); // strb templo1, [data_reg, #3] } - } else { + } else +#endif + { cache_checkinstr(2); cache_addw( STR_IMM(src_reg, data_reg, 0) ); // str src_reg, [data_reg] } } else { +#if !defined(C_UNALIGNED_MEMORY) if ((Bit32u)dest & 1) { cache_checkinstr(8); cache_addw( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg] cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg cache_addw( LSR_IMM(templo1, templo1, 8) ); // lsr templo1, templo1, #8 cache_addw( STRB_IMM(templo1, data_reg, 1) ); // strb templo1, [data_reg, #1] - } else { + } else +#endif + { cache_checkinstr(2); cache_addw( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg] } @@ -418,8 +575,10 @@ static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, // move 32bit (dword==true) or 16bit (dword==false) of a register into memory static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2); + if (!gen_mov_memval_from_reg(src_reg, dest, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2); + } } // move an 8bit value from memory into dest_reg @@ -427,9 +586,11 @@ static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { // this function does not use FC_OP1/FC_OP2 as dest_reg as these // registers might not be directly byte-accessible on some architectures static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) { - gen_mov_dword_to_reg_imm(templo1, (Bit32u)data); - cache_checkinstr(2); - cache_addw( LDRB_IMM(dest_reg, templo1, 0) ); // ldrb dest_reg, [templo1] + if (!gen_mov_memval_to_reg(dest_reg, data, 1)) { + gen_mov_dword_to_reg_imm(templo1, (Bit32u)data); + cache_checkinstr(2); + cache_addw( LDRB_IMM(dest_reg, templo1, 0) ); // ldrb dest_reg, [templo1] + } } // move an 8bit value from memory into dest_reg @@ -459,9 +620,11 @@ static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u // move the lowest 8bit of a register into memory static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) { - gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest); - cache_checkinstr(2); - cache_addw( STRB_IMM(src_reg, templo1, 0) ); // strb src_reg, [templo1] + if (!gen_mov_memval_from_reg(src_reg, dest, 1)) { + gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest); + cache_checkinstr(2); + cache_addw( STRB_IMM(src_reg, templo1, 0) ); // strb src_reg, [templo1] + } } @@ -501,18 +664,58 @@ static void gen_add(HostReg reg,void* op) { // add a 32bit constant value to a full register static void gen_add_imm(HostReg reg,Bit32u imm) { + Bit32u imm2, scale; + if(!imm) return; - gen_mov_dword_to_reg_imm(templo1, imm); - cache_checkinstr(2); - cache_addw( ADD_REG(reg, reg, templo1) ); // add reg, reg, templo1 + + imm2 = (Bit32u) (-((Bit32s)imm)); + + if (imm <= 255) { + cache_checkinstr(2); + cache_addw( ADD_IMM8(reg, imm) ); // add reg, #imm + } else if (imm2 <= 255) { + cache_checkinstr(2); + cache_addw( SUB_IMM8(reg, imm2) ); // sub reg, #(-imm) + } else { + if (val_single_shift(imm2, &scale)) { + cache_checkinstr((scale)?6:4); + cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale) + if (scale) { + cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale + } + cache_addw( SUB_REG(reg, reg, templo1) ); // sub reg, reg, templo1 + } else { + gen_mov_dword_to_reg_imm(templo1, imm); + cache_checkinstr(2); + cache_addw( ADD_REG(reg, reg, templo1) ); // add reg, reg, templo1 + } + } } // and a 32bit constant value with a full register static void gen_and_imm(HostReg reg,Bit32u imm) { - if(imm == 0xffffffff) return; - gen_mov_dword_to_reg_imm(templo1, imm); - cache_checkinstr(2); - cache_addw( AND(reg, templo1) ); // and reg, templo1 + Bit32u imm2, scale; + + imm2 = ~imm; + if(!imm2) return; + + if (!imm) { + cache_checkinstr(2); + cache_addw( MOV_IMM(reg, 0) ); // mov reg, #0 + } else { + if (val_single_shift(imm2, &scale)) { + cache_checkinstr((scale)?6:4); + cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale) + if (scale) { + cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale + } + cache_addw( BIC(reg, templo1) ); // bic reg, templo1 + } else { + gen_mov_dword_to_reg_imm(templo1, imm); + cache_checkinstr(2); + cache_addw( AND(reg, templo1) ); // and reg, templo1 + } + } } @@ -527,70 +730,69 @@ static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) { gen_mov_direct_dword(dest,(Bit32u)imm); } -// add an 8bit constant value to a dword memory value -static void gen_add_direct_byte(void* dest,Bit8s imm) { - if(!imm) return; - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templo3, dest, 1, templo2); - cache_checkinstr(2); - if (imm >= 0) { - cache_addw( ADD_IMM8(templo3, (Bit32s)imm) ); // add templo3, #(imm) - } else { - cache_addw( SUB_IMM8(templo3, -((Bit32s)imm)) ); // sub templo3, #(-imm) - } - gen_mov_word_from_reg_helper(templo3, dest, 1, templo2); -} - // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) { + if (!dword) imm &= 0xffff; if(!imm) return; - if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { - gen_add_direct_byte(dest,(Bit8s)imm); - return; + + if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); } - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); - if (dword) { - gen_mov_dword_to_reg_imm(templo1, imm); - } else { - gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); + gen_add_imm(templo3, imm); + if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) { + gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); } - cache_checkinstr(2); - cache_addw( ADD_REG(templo3, templo3, templo1) ); // add templo3, templo3, templo1 - gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); } -// subtract an 8bit constant value from a dword memory value -static void gen_sub_direct_byte(void* dest,Bit8s imm) { - if(!imm) return; - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templo3, dest, 1, templo2); - cache_checkinstr(2); - if (imm >= 0) { - cache_addw( SUB_IMM8(templo3, (Bit32s)imm) ); // sub templo3, #(imm) - } else { - cache_addw( ADD_IMM8(templo3, -((Bit32s)imm)) ); // add templo3, #(-imm) - } - gen_mov_word_from_reg_helper(templo3, dest, 1, templo2); +// add an 8bit constant value to a dword memory value +static void gen_add_direct_byte(void* dest,Bit8s imm) { + gen_add_direct_word(dest, (Bit32s)imm, 1); } // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { + Bit32u imm2, scale; + + if (!dword) imm &= 0xffff; if(!imm) return; - if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { - gen_sub_direct_byte(dest,(Bit8s)imm); - return; + + if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); } - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); - if (dword) { - gen_mov_dword_to_reg_imm(templo1, imm); + + imm2 = (Bit32u) (-((Bit32s)imm)); + + if (imm <= 255) { + cache_checkinstr(2); + cache_addw( SUB_IMM8(templo3, imm) ); // sub templo3, #imm + } else if (imm2 <= 255) { + cache_checkinstr(2); + cache_addw( ADD_IMM8(templo3, imm2) ); // add templo3, #(-imm) } else { - gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); + if (val_single_shift(imm2, &scale)) { + cache_checkinstr((scale)?6:4); + cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale) + if (scale) { + cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale + } + cache_addw( ADD_REG(templo3, templo3, templo1) ); // add templo3, templo3, templo1 + } else { + gen_mov_dword_to_reg_imm(templo1, imm); + cache_checkinstr(2); + cache_addw( SUB_REG(templo3, templo3, templo1) ); // sub templo3, templo3, templo1 + } } - cache_checkinstr(2); - cache_addw( SUB_REG(templo3, templo3, templo1) ); // sub templo3, templo3, templo1 - gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); + + if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) { + gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); + } +} + +// subtract an 8bit constant value from a dword memory value +static void gen_sub_direct_byte(void* dest,Bit8s imm) { + gen_sub_direct_word(dest, (Bit32s)imm, 1); } // effective address calculation, destination is dest_reg @@ -694,20 +896,22 @@ static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { static void gen_jmp_ptr(void * ptr,Bits imm=0) { gen_mov_word_to_reg(templo3, ptr, 1); - if (imm) { - gen_mov_dword_to_reg_imm(templo2, imm); - cache_checkinstr(2); - cache_addw( ADD_REG(templo3, templo3, templo2) ); // add templo3, templo3, templo2 - } - -#if (1) -// (*ptr) should be word aligned +#if !defined(C_UNALIGNED_MEMORY) +// (*ptr) should be word aligned if ((imm & 0x03) == 0) { - cache_checkinstr(6); - cache_addw( LDR_IMM(templo2, templo3, 0) ); // ldr templo2, [templo3] - } else #endif - { + if ((imm >= 0) && (imm < 128) && ((imm & 3) == 0)) { + cache_checkinstr(6); + cache_addw( LDR_IMM(templo2, templo3, imm) ); // ldr templo2, [templo3, #imm] + } else { + gen_mov_dword_to_reg_imm(templo2, imm); + cache_checkinstr(6); + cache_addw( LDR_REG(templo2, templo3, templo2) ); // ldr templo2, [templo3, templo2] + } +#if !defined(C_UNALIGNED_MEMORY) + } else { + gen_add_imm(templo3, imm); + cache_checkinstr(24); cache_addw( LDRB_IMM(templo2, templo3, 0) ); // ldrb templo2, [templo3] cache_addw( LDRB_IMM(templo1, templo3, 1) ); // ldrb templo1, [templo3, #1] @@ -720,6 +924,7 @@ static void gen_jmp_ptr(void * ptr,Bits imm=0) { cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1 } +#endif // increase jmp address to keep thumb state cache_addw( ADD_IMM3(templo2, templo2, 1) ); // add templo2, templo2, #1 @@ -815,50 +1020,53 @@ static void INLINE gen_fill_branch_long(Bit32u data) { } static void gen_run_code(void) { - // switch from arm to thumb state - cache_addd(0xe2800000 + (HOST_r3 << 12) + (HOST_pc << 16) + (1)); // add r3, pc, #1 - cache_addd(0xe12fff10 + (HOST_r3)); // bx r3 + Bit8u *pos1, *pos2, *pos3; - // thumb state from now on - cache_addw(0xb500); // push {lr} - cache_addw( MOV_LO_HI(HOST_r3, FC_SEGS_ADDR) ); // mov r3, FC_SEGS_ADDR - cache_addw( MOV_LO_HI(HOST_r2, FC_REGS_ADDR) ); // mov r2, FC_REGS_ADDR - cache_addw(0xb4fc); // push {r2,r3,v1-v4} +#if (__ARM_EABI__) + // 8-byte stack alignment + cache_addd(0xe92d4ff0); // stmfd sp!, {v1-v8,lr} +#else + cache_addd(0xe92d4df0); // stmfd sp!, {v1-v5,v7,v8,lr} +#endif - // adr: 16 - cache_addw( LDR_PC_IMM(HOST_r3, 64 - (16 + 4)) ); // ldr r3, [pc, #(&Segs)] - // adr: 18 - cache_addw( LDR_PC_IMM(HOST_r2, 68 - (18 + 2)) ); // ldr r2, [pc, #(&cpu_regs)] - cache_addw( MOV_HI_LO(FC_SEGS_ADDR, HOST_r3) ); // mov FC_SEGS_ADDR, r3 - cache_addw( MOV_HI_LO(FC_REGS_ADDR, HOST_r2) ); // mov FC_REGS_ADDR, r2 + cache_addd( ARM_ADD_IMM(HOST_r0, HOST_r0, 1, 0) ); // add r0, r0, #1 - // align 4 - cache_addw( ADD_LO_PC_IMM(HOST_r3, 8) ); // add r3, pc, #8 - cache_addw( ADD_IMM8(HOST_r0, 1) ); // add r0, #1 - cache_addw( ADD_IMM8(HOST_r3, 1) ); // add r3, #1 - cache_addw(0xb408); // push {r3} - cache_addw( BX(HOST_r0) ); // bx r0 - cache_addw( NOP ); // nop + pos1 = cache.pos; + cache_addd( 0 ); + pos2 = cache.pos; + cache_addd( 0 ); + pos3 = cache.pos; + cache_addd( 0 ); - // align 4 - cache_addw(0xbcfc); // pop {r2,r3,v1-v4} - cache_addw( MOV_HI_LO(FC_SEGS_ADDR, HOST_r3) ); // mov FC_SEGS_ADDR, r3 - cache_addw( MOV_HI_LO(FC_REGS_ADDR, HOST_r2) ); // mov FC_REGS_ADDR, r2 + cache_addd( ARM_ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4 + cache_addd( ARM_STR_IMM_M_W(HOST_lr, HOST_sp, 4) ); // str lr, [sp, #-4]! + cache_addd( ARM_BX(HOST_r0) ); // bx r0 - cache_addw(0xbc08); // pop {r3} - cache_addw( BX(HOST_r3) ); // bx r3 +#if (__ARM_EABI__) + cache_addd(0xe8bd4ff0); // ldmfd sp!, {v1-v8,lr} +#else + cache_addd(0xe8bd4df0); // ldmfd sp!, {v1-v5,v7,v8,lr} +#endif + cache_addd( ARM_BX(HOST_lr) ); // bx lr - // fill up to 64 bytes - cache_addw( NOP ); // nop - cache_addd( NOP | (NOP << 16) ); // nop, nop - cache_addd( NOP | (NOP << 16) ); // nop, nop - cache_addd( NOP | (NOP << 16) ); // nop, nop - cache_addd( NOP | (NOP << 16) ); // nop, nop + // align cache.pos to 32 bytes + if ((((Bitu)cache.pos) & 0x1f) != 0) { + cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f)); + } - // adr: 64 + *(Bit32u*)pos1 = ARM_LDR_IMM(FC_SEGS_ADDR, HOST_pc, cache.pos - (pos1 + 8)); // ldr FC_SEGS_ADDR, [pc, #(&Segs)] cache_addd((Bit32u)&Segs); // address of "Segs" - // adr: 68 + + *(Bit32u*)pos2 = ARM_LDR_IMM(FC_REGS_ADDR, HOST_pc, cache.pos - (pos2 + 8)); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)] cache_addd((Bit32u)&cpu_regs); // address of "cpu_regs" + + *(Bit32u*)pos3 = ARM_LDR_IMM(readdata_addr, HOST_pc, cache.pos - (pos3 + 8)); // ldr readdata_addr, [pc, #(&core_dynrec.readdata)] + cache_addd((Bit32u)&core_dynrec.readdata); // address of "core_dynrec.readdata" + + // align cache.pos to 32 bytes + if ((((Bitu)cache.pos) & 0x1f) != 0) { + cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f)); + } } // return from a function diff --git a/src/cpu/core_dynrec/risc_armv4le-thumb-niw.h b/src/cpu/core_dynrec/risc_armv4le-thumb-niw.h index 2f49865c..2a6b24be 100644 --- a/src/cpu/core_dynrec/risc_armv4le-thumb-niw.h +++ b/src/cpu/core_dynrec/risc_armv4le-thumb-niw.h @@ -50,15 +50,14 @@ // temporary register for LEA #define TEMP_REG_DRC HOST_a4 -#ifdef DRC_USE_REGS_ADDR // used to hold the address of "cpu_regs" - preferably filled in function gen_run_code #define FC_REGS_ADDR HOST_v7 -#endif -#ifdef DRC_USE_SEGS_ADDR // used to hold the address of "Segs" - preferably filled in function gen_run_code #define FC_SEGS_ADDR HOST_v8 -#endif + +// used to hold the address of "core_dynrec.readdata" - filled in function gen_run_code +#define readdata_addr HOST_v5 // instruction encodings @@ -98,10 +97,14 @@ // logical // and dst, src #define AND(dst, src) (0x4000 + (dst) + ((src) << 3) ) +// bic dst, src +#define BIC(dst, src) (0x4380 + (dst) + ((src) << 3) ) // eor dst, src #define EOR(dst, src) (0x4040 + (dst) + ((src) << 3) ) // orr dst, src #define ORR(dst, src) (0x4300 + (dst) + ((src) << 3) ) +// mvn dst, src +#define MVN(dst, src) (0x43c0 + (dst) + ((src) << 3) ) // shift/rotate // lsl dst, src, #imm @@ -128,6 +131,8 @@ #define LDRB_IMM(reg, addr, imm) (0x7800 + (reg) + ((addr) << 3) + ((imm) << 6) ) // ldr reg, [pc, #imm] @ 0 <= imm < 1024 & imm mod 4 = 0 #define LDR_PC_IMM(reg, imm) (0x4800 + ((reg) << 8) + ((imm) >> 2) ) +// ldr reg, [addr1, addr2] +#define LDR_REG(reg, addr1, addr2) (0x5800 + (reg) + ((addr1) << 3) + ((addr2) << 6) ) // store // str reg, [addr, #imm] @ 0 <= imm < 128 & imm mod 4 = 0 @@ -150,6 +155,25 @@ #define BX(reg) (0x4700 + ((reg) << 3) ) +// arm instructions + +// arithmetic +// add dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 +#define ARM_ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) + +// load +// ldr reg, [addr, #imm] @ 0 <= imm < 4096 +#define ARM_LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) ) + +// store +// str reg, [addr, #-(imm)]! @ 0 <= imm < 4096 +#define ARM_STR_IMM_M_W(reg, addr, imm) (0xe5200000 + ((reg) << 12) + ((addr) << 16) + (imm) ) + +// branch +// bx reg +#define ARM_BX(reg) (0xe12fff10 + (reg) ) + + // data pool defines #define CACHE_DATA_JUMP (2) #define CACHE_DATA_ALIGN (32) @@ -193,7 +217,7 @@ static void cache_checkinstr(Bit32u size) { cache_datapos = (Bit8u *) (((Bitu)cache.block.active->cache.start + cache.block.active->cache.size - CACHE_DATA_ALIGN) & ~(CACHE_DATA_ALIGN - 1)); } else { register Bit32u cachemodsize; - + cachemodsize = (cache.pos - cache.block.active->cache.start) & (CACHE_MAXSIZE - 1); if (cachemodsize + CACHE_DATA_MAX + CACHE_DATA_ALIGN <= CACHE_MAXSIZE || @@ -275,30 +299,49 @@ static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) { cache_addw( MOV_REG(reg_dst, reg_src) ); // mov reg_dst, reg_src } +// helper function +static bool val_single_shift(Bit32u value, Bit32u *val_shift) { + Bit32u shift; + + if (GCC_UNLIKELY(value == 0)) { + *val_shift = 0; + return true; + } + + shift = 0; + while ((value & 1) == 0) { + value>>=1; + shift+=1; + } + + if ((value >> 8) != 0) return false; + + *val_shift = shift; + return true; +} + // move a 32bit constant value into dest_reg static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { - if ((imm & 0xffffff00) == 0) { + Bit32u scale; + + if (imm < 256) { cache_checkinstr(2); cache_addw( MOV_IMM(dest_reg, imm) ); // mov dest_reg, #(imm) - } else if ((imm & 0xffff00ff) == 0) { + } else if ((~imm) < 256) { cache_checkinstr(4); - cache_addw( MOV_IMM(dest_reg, imm >> 8) ); // mov dest_reg, #(imm >> 8) - cache_addw( LSL_IMM(dest_reg, dest_reg, 8) ); // lsl dest_reg, dest_reg, #8 - } else if ((imm & 0xff00ffff) == 0) { + cache_addw( MOV_IMM(dest_reg, ~imm) ); // mov dest_reg, #(~imm) + cache_addw( MVN(dest_reg, dest_reg) ); // mvn dest_reg, dest_reg + } else if (val_single_shift(imm, &scale)) { cache_checkinstr(4); - cache_addw( MOV_IMM(dest_reg, imm >> 16) ); // mov dest_reg, #(imm >> 16) - cache_addw( LSL_IMM(dest_reg, dest_reg, 16) ); // lsl dest_reg, dest_reg, #16 - } else if ((imm & 0x00ffffff) == 0) { - cache_checkinstr(4); - cache_addw( MOV_IMM(dest_reg, imm >> 24) ); // mov dest_reg, #(imm >> 24) - cache_addw( LSL_IMM(dest_reg, dest_reg, 24) ); // lsl dest_reg, dest_reg, #24 + cache_addw( MOV_IMM(dest_reg, imm >> scale) ); // mov dest_reg, #(imm >> scale) + cache_addw( LSL_IMM(dest_reg, dest_reg, scale) ); // lsl dest_reg, dest_reg, #scale } else { Bit32u diff; cache_checkinstr(4); diff = imm - ((Bit32u)cache.pos+4); - + if ((diff < 1024) && ((imm & 0x03) == 0)) { if (((Bit32u)cache.pos & 0x03) == 0) { cache_addw( ADD_LO_PC_IMM(dest_reg, diff >> 2) ); // add dest_reg, pc, #(diff >> 2) @@ -321,10 +364,61 @@ static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { } } +// helper function +static bool gen_mov_memval_to_reg_helper(HostReg dest_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) { + switch (size) { + case 4: +#if !defined(C_UNALIGNED_MEMORY) + if ((data & 3) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) { + cache_checkinstr(4); + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( LDR_IMM(dest_reg, templo2, data - addr_data) ); // ldr dest_reg, [templo2, #(data - addr_data)] + return true; + } + } + break; + case 2: +#if !defined(C_UNALIGNED_MEMORY) + if ((data & 1) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) { + cache_checkinstr(4); + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( LDRH_IMM(dest_reg, templo2, data - addr_data) ); // ldrh dest_reg, [templo2, #(data - addr_data)] + return true; + } + } + break; + case 1: + if ((data >= addr_data) && (data < addr_data + 32)) { + cache_checkinstr(4); + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( LDRB_IMM(dest_reg, templo2, data - addr_data) ); // ldrb dest_reg, [templo2, #(data - addr_data)] + return true; + } + default: + break; + } + return false; +} + +// helper function +static bool gen_mov_memval_to_reg(HostReg dest_reg, void *data, Bitu size) { + if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true; + if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true; + if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true; + return false; +} + // helper function for gen_mov_word_to_reg static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) { // alignment.... if (dword) { +#if !defined(C_UNALIGNED_MEMORY) if ((Bit32u)data & 3) { if ( ((Bit32u)data & 3) == 2 ) { cache_checkinstr(8); @@ -343,18 +437,23 @@ static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,Ho cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1 } - } else { + } else +#endif + { cache_checkinstr(2); cache_addw( LDR_IMM(dest_reg, data_reg, 0) ); // ldr dest_reg, [data_reg] } } else { +#if !defined(C_UNALIGNED_MEMORY) if ((Bit32u)data & 1) { cache_checkinstr(8); cache_addw( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg] cache_addw( LDRB_IMM(templo1, data_reg, 1) ); // ldrb templo1, [data_reg, #1] cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1 - } else { + } else +#endif + { cache_checkinstr(2); cache_addw( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg] } @@ -364,8 +463,10 @@ static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,Ho // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg // 16bit moves may destroy the upper 16bit of the destination register static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) { - gen_mov_dword_to_reg_imm(templo2, (Bit32u)data); - gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2); + if (!gen_mov_memval_to_reg(dest_reg, data, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)data); + gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2); + } } // move a 16bit constant value into dest_reg @@ -374,10 +475,61 @@ static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) { gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm); } +// helper function +static bool gen_mov_memval_from_reg_helper(HostReg src_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) { + switch (size) { + case 4: +#if !defined(C_UNALIGNED_MEMORY) + if ((data & 3) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) { + cache_checkinstr(4); + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( STR_IMM(src_reg, templo2, data - addr_data) ); // str src_reg, [templo2, #(data - addr_data)] + return true; + } + } + break; + case 2: +#if !defined(C_UNALIGNED_MEMORY) + if ((data & 1) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) { + cache_checkinstr(4); + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( STRH_IMM(src_reg, templo2, data - addr_data) ); // strh src_reg, [templo2, #(data - addr_data)] + return true; + } + } + break; + case 1: + if ((data >= addr_data) && (data < addr_data + 32)) { + cache_checkinstr(4); + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( STRB_IMM(src_reg, templo2, data - addr_data) ); // strb src_reg, [templo2, #(data - addr_data)] + return true; + } + default: + break; + } + return false; +} + +// helper function +static bool gen_mov_memval_from_reg(HostReg src_reg, void *dest, Bitu size) { + if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true; + if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true; + if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true; + return false; +} + // helper function for gen_mov_word_from_reg static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) { // alignment.... if (dword) { +#if !defined(C_UNALIGNED_MEMORY) if ((Bit32u)dest & 3) { if ( ((Bit32u)dest & 3) == 2 ) { cache_checkinstr(8); @@ -398,18 +550,23 @@ static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, cache_addw( LSR_IMM(templo1, templo1, 24) ); // lsr templo1, templo1, #24 cache_addw( STRB_IMM(templo1, data_reg, 3) ); // strb templo1, [data_reg, #3] } - } else { + } else +#endif + { cache_checkinstr(2); cache_addw( STR_IMM(src_reg, data_reg, 0) ); // str src_reg, [data_reg] } } else { +#if !defined(C_UNALIGNED_MEMORY) if ((Bit32u)dest & 1) { cache_checkinstr(8); cache_addw( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg] cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg cache_addw( LSR_IMM(templo1, templo1, 8) ); // lsr templo1, templo1, #8 cache_addw( STRB_IMM(templo1, data_reg, 1) ); // strb templo1, [data_reg, #1] - } else { + } else +#endif + { cache_checkinstr(2); cache_addw( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg] } @@ -418,8 +575,10 @@ static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, // move 32bit (dword==true) or 16bit (dword==false) of a register into memory static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2); + if (!gen_mov_memval_from_reg(src_reg, dest, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2); + } } // move an 8bit value from memory into dest_reg @@ -427,9 +586,11 @@ static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { // this function does not use FC_OP1/FC_OP2 as dest_reg as these // registers might not be directly byte-accessible on some architectures static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) { - gen_mov_dword_to_reg_imm(templo1, (Bit32u)data); - cache_checkinstr(2); - cache_addw( LDRB_IMM(dest_reg, templo1, 0) ); // ldrb dest_reg, [templo1] + if (!gen_mov_memval_to_reg(dest_reg, data, 1)) { + gen_mov_dword_to_reg_imm(templo1, (Bit32u)data); + cache_checkinstr(2); + cache_addw( LDRB_IMM(dest_reg, templo1, 0) ); // ldrb dest_reg, [templo1] + } } // move an 8bit value from memory into dest_reg @@ -459,9 +620,11 @@ static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u // move the lowest 8bit of a register into memory static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) { - gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest); - cache_checkinstr(2); - cache_addw( STRB_IMM(src_reg, templo1, 0) ); // strb src_reg, [templo1] + if (!gen_mov_memval_from_reg(src_reg, dest, 1)) { + gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest); + cache_checkinstr(2); + cache_addw( STRB_IMM(src_reg, templo1, 0) ); // strb src_reg, [templo1] + } } @@ -501,18 +664,58 @@ static void gen_add(HostReg reg,void* op) { // add a 32bit constant value to a full register static void gen_add_imm(HostReg reg,Bit32u imm) { + Bit32u imm2, scale; + if(!imm) return; - gen_mov_dword_to_reg_imm(templo1, imm); - cache_checkinstr(2); - cache_addw( ADD_REG(reg, reg, templo1) ); // add reg, reg, templo1 + + imm2 = (Bit32u) (-((Bit32s)imm)); + + if (imm <= 255) { + cache_checkinstr(2); + cache_addw( ADD_IMM8(reg, imm) ); // add reg, #imm + } else if (imm2 <= 255) { + cache_checkinstr(2); + cache_addw( SUB_IMM8(reg, imm2) ); // sub reg, #(-imm) + } else { + if (val_single_shift(imm2, &scale)) { + cache_checkinstr((scale)?6:4); + cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale) + if (scale) { + cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale + } + cache_addw( SUB_REG(reg, reg, templo1) ); // sub reg, reg, templo1 + } else { + gen_mov_dword_to_reg_imm(templo1, imm); + cache_checkinstr(2); + cache_addw( ADD_REG(reg, reg, templo1) ); // add reg, reg, templo1 + } + } } // and a 32bit constant value with a full register static void gen_and_imm(HostReg reg,Bit32u imm) { - if(imm == 0xffffffff) return; - gen_mov_dword_to_reg_imm(templo1, imm); - cache_checkinstr(2); - cache_addw( AND(reg, templo1) ); // and reg, templo1 + Bit32u imm2, scale; + + imm2 = ~imm; + if(!imm2) return; + + if (!imm) { + cache_checkinstr(2); + cache_addw( MOV_IMM(reg, 0) ); // mov reg, #0 + } else { + if (val_single_shift(imm2, &scale)) { + cache_checkinstr((scale)?6:4); + cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale) + if (scale) { + cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale + } + cache_addw( BIC(reg, templo1) ); // bic reg, templo1 + } else { + gen_mov_dword_to_reg_imm(templo1, imm); + cache_checkinstr(2); + cache_addw( AND(reg, templo1) ); // and reg, templo1 + } + } } @@ -527,70 +730,69 @@ static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) { gen_mov_direct_dword(dest,(Bit32u)imm); } -// add an 8bit constant value to a dword memory value -static void gen_add_direct_byte(void* dest,Bit8s imm) { - if(!imm) return; - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templo3, dest, 1, templo2); - cache_checkinstr(2); - if (imm >= 0) { - cache_addw( ADD_IMM8(templo3, (Bit32s)imm) ); // add templo3, #(imm) - } else { - cache_addw( SUB_IMM8(templo3, -((Bit32s)imm)) ); // sub templo3, #(-imm) - } - gen_mov_word_from_reg_helper(templo3, dest, 1, templo2); -} - // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) { + if (!dword) imm &= 0xffff; if(!imm) return; - if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { - gen_add_direct_byte(dest,(Bit8s)imm); - return; + + if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); } - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); - if (dword) { - gen_mov_dword_to_reg_imm(templo1, imm); - } else { - gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); + gen_add_imm(templo3, imm); + if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) { + gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); } - cache_checkinstr(2); - cache_addw( ADD_REG(templo3, templo3, templo1) ); // add templo3, templo3, templo1 - gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); } -// subtract an 8bit constant value from a dword memory value -static void gen_sub_direct_byte(void* dest,Bit8s imm) { - if(!imm) return; - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templo3, dest, 1, templo2); - cache_checkinstr(2); - if (imm >= 0) { - cache_addw( SUB_IMM8(templo3, (Bit32s)imm) ); // sub templo3, #(imm) - } else { - cache_addw( ADD_IMM8(templo3, -((Bit32s)imm)) ); // add templo3, #(-imm) - } - gen_mov_word_from_reg_helper(templo3, dest, 1, templo2); +// add an 8bit constant value to a dword memory value +static void gen_add_direct_byte(void* dest,Bit8s imm) { + gen_add_direct_word(dest, (Bit32s)imm, 1); } // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { + Bit32u imm2, scale; + + if (!dword) imm &= 0xffff; if(!imm) return; - if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { - gen_sub_direct_byte(dest,(Bit8s)imm); - return; + + if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); } - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); - if (dword) { - gen_mov_dword_to_reg_imm(templo1, imm); + + imm2 = (Bit32u) (-((Bit32s)imm)); + + if (imm <= 255) { + cache_checkinstr(2); + cache_addw( SUB_IMM8(templo3, imm) ); // sub templo3, #imm + } else if (imm2 <= 255) { + cache_checkinstr(2); + cache_addw( ADD_IMM8(templo3, imm2) ); // add templo3, #(-imm) } else { - gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); + if (val_single_shift(imm2, &scale)) { + cache_checkinstr((scale)?6:4); + cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale) + if (scale) { + cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale + } + cache_addw( ADD_REG(templo3, templo3, templo1) ); // add templo3, templo3, templo1 + } else { + gen_mov_dword_to_reg_imm(templo1, imm); + cache_checkinstr(2); + cache_addw( SUB_REG(templo3, templo3, templo1) ); // sub templo3, templo3, templo1 + } } - cache_checkinstr(2); - cache_addw( SUB_REG(templo3, templo3, templo1) ); // sub templo3, templo3, templo1 - gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); + + if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) { + gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); + } +} + +// subtract an 8bit constant value from a dword memory value +static void gen_sub_direct_byte(void* dest,Bit8s imm) { + gen_sub_direct_word(dest, (Bit32s)imm, 1); } // effective address calculation, destination is dest_reg @@ -696,20 +898,22 @@ static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { static void gen_jmp_ptr(void * ptr,Bits imm=0) { gen_mov_word_to_reg(templo3, ptr, 1); - if (imm) { - gen_mov_dword_to_reg_imm(templo2, imm); - cache_checkinstr(2); - cache_addw( ADD_REG(templo3, templo3, templo2) ); // add templo3, templo3, templo2 - } - -#if (1) -// (*ptr) should be word aligned +#if !defined(C_UNALIGNED_MEMORY) +// (*ptr) should be word aligned if ((imm & 0x03) == 0) { - cache_checkinstr(6); - cache_addw( LDR_IMM(templo2, templo3, 0) ); // ldr templo2, [templo3] - } else #endif - { + if ((imm >= 0) && (imm < 128) && ((imm & 3) == 0)) { + cache_checkinstr(6); + cache_addw( LDR_IMM(templo2, templo3, imm) ); // ldr templo2, [templo3, #imm] + } else { + gen_mov_dword_to_reg_imm(templo2, imm); + cache_checkinstr(6); + cache_addw( LDR_REG(templo2, templo3, templo2) ); // ldr templo2, [templo3, templo2] + } +#if !defined(C_UNALIGNED_MEMORY) + } else { + gen_add_imm(templo3, imm); + cache_checkinstr(24); cache_addw( LDRB_IMM(templo2, templo3, 0) ); // ldrb templo2, [templo3] cache_addw( LDRB_IMM(templo1, templo3, 1) ); // ldrb templo1, [templo3, #1] @@ -722,6 +926,7 @@ static void gen_jmp_ptr(void * ptr,Bits imm=0) { cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1 } +#endif // increase jmp address to keep thumb state cache_addw( ADD_IMM3(templo2, templo2, 1) ); // add templo2, templo2, #1 @@ -817,50 +1022,53 @@ static void INLINE gen_fill_branch_long(Bit32u data) { } static void gen_run_code(void) { - // switch from arm to thumb state - cache_addd(0xe2800000 + (HOST_r3 << 12) + (HOST_pc << 16) + (1)); // add r3, pc, #1 - cache_addd(0xe12fff10 + (HOST_r3)); // bx r3 + Bit8u *pos1, *pos2, *pos3; - // thumb state from now on - cache_addw(0xb500); // push {lr} - cache_addw( MOV_LO_HI(HOST_r3, FC_SEGS_ADDR) ); // mov r3, FC_SEGS_ADDR - cache_addw( MOV_LO_HI(HOST_r2, FC_REGS_ADDR) ); // mov r2, FC_REGS_ADDR - cache_addw(0xb4fc); // push {r2,r3,v1-v4} +#if (__ARM_EABI__) + // 8-byte stack alignment + cache_addd(0xe92d4ff0); // stmfd sp!, {v1-v8,lr} +#else + cache_addd(0xe92d4df0); // stmfd sp!, {v1-v5,v7,v8,lr} +#endif - // adr: 16 - cache_addw( LDR_PC_IMM(HOST_r3, 64 - (16 + 4)) ); // ldr r3, [pc, #(&Segs)] - // adr: 18 - cache_addw( LDR_PC_IMM(HOST_r2, 68 - (18 + 2)) ); // ldr r2, [pc, #(&cpu_regs)] - cache_addw( MOV_HI_LO(FC_SEGS_ADDR, HOST_r3) ); // mov FC_SEGS_ADDR, r3 - cache_addw( MOV_HI_LO(FC_REGS_ADDR, HOST_r2) ); // mov FC_REGS_ADDR, r2 + cache_addd( ARM_ADD_IMM(HOST_r0, HOST_r0, 1, 0) ); // add r0, r0, #1 - // align 4 - cache_addw( ADD_LO_PC_IMM(HOST_r3, 8) ); // add r3, pc, #8 - cache_addw( ADD_IMM8(HOST_r0, 1) ); // add r0, #1 - cache_addw( ADD_IMM8(HOST_r3, 1) ); // add r3, #1 - cache_addw(0xb408); // push {r3} - cache_addw( BX(HOST_r0) ); // bx r0 - cache_addw( NOP ); // nop + pos1 = cache.pos; + cache_addd( 0 ); + pos2 = cache.pos; + cache_addd( 0 ); + pos3 = cache.pos; + cache_addd( 0 ); - // align 4 - cache_addw(0xbcfc); // pop {r2,r3,v1-v4} - cache_addw( MOV_HI_LO(FC_SEGS_ADDR, HOST_r3) ); // mov FC_SEGS_ADDR, r3 - cache_addw( MOV_HI_LO(FC_REGS_ADDR, HOST_r2) ); // mov FC_REGS_ADDR, r2 + cache_addd( ARM_ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4 + cache_addd( ARM_STR_IMM_M_W(HOST_lr, HOST_sp, 4) ); // str lr, [sp, #-4]! + cache_addd( ARM_BX(HOST_r0) ); // bx r0 - cache_addw(0xbc08); // pop {r3} - cache_addw( BX(HOST_r3) ); // bx r3 +#if (__ARM_EABI__) + cache_addd(0xe8bd4ff0); // ldmfd sp!, {v1-v8,lr} +#else + cache_addd(0xe8bd4df0); // ldmfd sp!, {v1-v5,v7,v8,lr} +#endif + cache_addd( ARM_BX(HOST_lr) ); // bx lr - // fill up to 64 bytes - cache_addw( NOP ); // nop - cache_addd( NOP | (NOP << 16) ); // nop, nop - cache_addd( NOP | (NOP << 16) ); // nop, nop - cache_addd( NOP | (NOP << 16) ); // nop, nop - cache_addd( NOP | (NOP << 16) ); // nop, nop + // align cache.pos to 32 bytes + if ((((Bitu)cache.pos) & 0x1f) != 0) { + cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f)); + } - // adr: 64 + *(Bit32u*)pos1 = ARM_LDR_IMM(FC_SEGS_ADDR, HOST_pc, cache.pos - (pos1 + 8)); // ldr FC_SEGS_ADDR, [pc, #(&Segs)] cache_addd((Bit32u)&Segs); // address of "Segs" - // adr: 68 + + *(Bit32u*)pos2 = ARM_LDR_IMM(FC_REGS_ADDR, HOST_pc, cache.pos - (pos2 + 8)); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)] cache_addd((Bit32u)&cpu_regs); // address of "cpu_regs" + + *(Bit32u*)pos3 = ARM_LDR_IMM(readdata_addr, HOST_pc, cache.pos - (pos3 + 8)); // ldr readdata_addr, [pc, #(&core_dynrec.readdata)] + cache_addd((Bit32u)&core_dynrec.readdata); // address of "core_dynrec.readdata" + + // align cache.pos to 32 bytes + if ((((Bitu)cache.pos) & 0x1f) != 0) { + cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f)); + } } // return from a function diff --git a/src/cpu/core_dynrec/risc_armv4le-thumb.h b/src/cpu/core_dynrec/risc_armv4le-thumb.h index 33409edd..829cb554 100644 --- a/src/cpu/core_dynrec/risc_armv4le-thumb.h +++ b/src/cpu/core_dynrec/risc_armv4le-thumb.h @@ -50,15 +50,14 @@ // temporary register for LEA #define TEMP_REG_DRC HOST_a4 -#ifdef DRC_USE_REGS_ADDR // used to hold the address of "cpu_regs" - preferably filled in function gen_run_code #define FC_REGS_ADDR HOST_v7 -#endif -#ifdef DRC_USE_SEGS_ADDR // used to hold the address of "Segs" - preferably filled in function gen_run_code #define FC_SEGS_ADDR HOST_v8 -#endif + +// used to hold the address of "core_dynrec.readdata" - filled in function gen_run_code +#define readdata_addr HOST_v5 // instruction encodings @@ -98,10 +97,14 @@ // logical // and dst, src #define AND(dst, src) (0x4000 + (dst) + ((src) << 3) ) +// bic dst, src +#define BIC(dst, src) (0x4380 + (dst) + ((src) << 3) ) // eor dst, src #define EOR(dst, src) (0x4040 + (dst) + ((src) << 3) ) // orr dst, src #define ORR(dst, src) (0x4300 + (dst) + ((src) << 3) ) +// mvn dst, src +#define MVN(dst, src) (0x43c0 + (dst) + ((src) << 3) ) // shift/rotate // lsl dst, src, #imm @@ -128,6 +131,8 @@ #define LDRB_IMM(reg, addr, imm) (0x7800 + (reg) + ((addr) << 3) + ((imm) << 6) ) // ldr reg, [pc, #imm] @ 0 <= imm < 1024 & imm mod 4 = 0 #define LDR_PC_IMM(reg, imm) (0x4800 + ((reg) << 8) + ((imm) >> 2) ) +// ldr reg, [addr1, addr2] +#define LDR_REG(reg, addr1, addr2) (0x5800 + (reg) + ((addr1) << 3) + ((addr2) << 6) ) // store // str reg, [addr, #imm] @ 0 <= imm < 128 & imm mod 4 = 0 @@ -150,30 +155,69 @@ #define BX(reg) (0x4700 + ((reg) << 3) ) +// arm instructions + +// arithmetic +// add dst, src, #(imm ror rimm) @ 0 <= imm <= 255 & rimm mod 2 = 0 +#define ARM_ADD_IMM(dst, src, imm, rimm) (0xe2800000 + ((dst) << 12) + ((src) << 16) + (imm) + ((rimm) << 7) ) + +// load +// ldr reg, [addr, #imm] @ 0 <= imm < 4096 +#define ARM_LDR_IMM(reg, addr, imm) (0xe5900000 + ((reg) << 12) + ((addr) << 16) + (imm) ) + +// store +// str reg, [addr, #-(imm)]! @ 0 <= imm < 4096 +#define ARM_STR_IMM_M_W(reg, addr, imm) (0xe5200000 + ((reg) << 12) + ((addr) << 16) + (imm) ) + +// branch +// bx reg +#define ARM_BX(reg) (0xe12fff10 + (reg) ) + + // move a full register from reg_src to reg_dst static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) { if(reg_src == reg_dst) return; cache_addw( MOV_REG(reg_dst, reg_src) ); // mov reg_dst, reg_src } +// helper function +static bool val_single_shift(Bit32u value, Bit32u *val_shift) { + Bit32u shift; + + if (GCC_UNLIKELY(value == 0)) { + *val_shift = 0; + return true; + } + + shift = 0; + while ((value & 1) == 0) { + value>>=1; + shift+=1; + } + + if ((value >> 8) != 0) return false; + + *val_shift = shift; + return true; +} + // move a 32bit constant value into dest_reg static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { - if ((imm & 0xffffff00) == 0) { - cache_addw( MOV_IMM(dest_reg, imm) ); // mov dest_reg, #(imm) - } else if ((imm & 0xffff00ff) == 0) { - cache_addw( MOV_IMM(dest_reg, imm >> 8) ); // mov dest_reg, #(imm >> 8) - cache_addw( LSL_IMM(dest_reg, dest_reg, 8) ); // lsl dest_reg, dest_reg, #8 - } else if ((imm & 0xff00ffff) == 0) { - cache_addw( MOV_IMM(dest_reg, imm >> 16) ); // mov dest_reg, #(imm >> 16) - cache_addw( LSL_IMM(dest_reg, dest_reg, 16) ); // lsl dest_reg, dest_reg, #16 - } else if ((imm & 0x00ffffff) == 0) { - cache_addw( MOV_IMM(dest_reg, imm >> 24) ); // mov dest_reg, #(imm >> 24) - cache_addw( LSL_IMM(dest_reg, dest_reg, 24) ); // lsl dest_reg, dest_reg, #24 + Bit32u scale; + + if (imm < 256) { + cache_addw( MOV_IMM(dest_reg, imm) ); // mov dest_reg, #imm + } else if ((~imm) < 256) { + cache_addw( MOV_IMM(dest_reg, ~imm) ); // mov dest_reg, #(~imm) + cache_addw( MVN(dest_reg, dest_reg) ); // mvn dest_reg, dest_reg + } else if (val_single_shift(imm, &scale)) { + cache_addw( MOV_IMM(dest_reg, imm >> scale) ); // mov dest_reg, #(imm >> scale) + cache_addw( LSL_IMM(dest_reg, dest_reg, scale) ); // lsl dest_reg, dest_reg, #scale } else { Bit32u diff; - + diff = imm - ((Bit32u)cache.pos+4); - + if ((diff < 1024) && ((imm & 0x03) == 0)) { if (((Bit32u)cache.pos & 0x03) == 0) { cache_addw( ADD_LO_PC_IMM(dest_reg, diff) ); // add dest_reg, pc, #(diff >> 2) @@ -198,10 +242,58 @@ static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { } } +// helper function +static bool gen_mov_memval_to_reg_helper(HostReg dest_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) { + switch (size) { + case 4: +#if !defined(C_UNALIGNED_MEMORY) + if ((data & 3) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) { + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( LDR_IMM(dest_reg, templo2, data - addr_data) ); // ldr dest_reg, [templo2, #(data - addr_data)] + return true; + } + } + break; + case 2: +#if !defined(C_UNALIGNED_MEMORY) + if ((data & 1) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) { + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( LDRH_IMM(dest_reg, templo2, data - addr_data) ); // ldrh dest_reg, [templo2, #(data - addr_data)] + return true; + } + } + break; + case 1: + if ((data >= addr_data) && (data < addr_data + 32)) { + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( LDRB_IMM(dest_reg, templo2, data - addr_data) ); // ldrb dest_reg, [templo2, #(data - addr_data)] + return true; + } + default: + break; + } + return false; +} + +// helper function +static bool gen_mov_memval_to_reg(HostReg dest_reg, void *data, Bitu size) { + if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true; + if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true; + if (gen_mov_memval_to_reg_helper(dest_reg, (Bit32u)data, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true; + return false; +} + // helper function for gen_mov_word_to_reg static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) { // alignment.... if (dword) { +#if !defined(C_UNALIGNED_MEMORY) if ((Bit32u)data & 3) { if ( ((Bit32u)data & 3) == 2 ) { cache_addw( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg] @@ -218,16 +310,21 @@ static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,Ho cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1 } - } else { + } else +#endif + { cache_addw( LDR_IMM(dest_reg, data_reg, 0) ); // ldr dest_reg, [data_reg] } } else { +#if !defined(C_UNALIGNED_MEMORY) if ((Bit32u)data & 1) { cache_addw( LDRB_IMM(dest_reg, data_reg, 0) ); // ldrb dest_reg, [data_reg] cache_addw( LDRB_IMM(templo1, data_reg, 1) ); // ldrb templo1, [data_reg, #1] cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8 cache_addw( ORR(dest_reg, templo1) ); // orr dest_reg, templo1 - } else { + } else +#endif + { cache_addw( LDRH_IMM(dest_reg, data_reg, 0) ); // ldrh dest_reg, [data_reg] } } @@ -236,8 +333,10 @@ static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,Ho // move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg // 16bit moves may destroy the upper 16bit of the destination register static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) { - gen_mov_dword_to_reg_imm(templo2, (Bit32u)data); - gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2); + if (!gen_mov_memval_to_reg(dest_reg, data, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)data); + gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2); + } } // move a 16bit constant value into dest_reg @@ -246,10 +345,58 @@ static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) { gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm); } +// helper function +static bool gen_mov_memval_from_reg_helper(HostReg src_reg, Bit32u data, Bitu size, HostReg addr_reg, Bit32u addr_data) { + switch (size) { + case 4: +#if !defined(C_UNALIGNED_MEMORY) + if ((data & 3) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 128) && (((data - addr_data) & 3) == 0)) { + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( STR_IMM(src_reg, templo2, data - addr_data) ); // str src_reg, [templo2, #(data - addr_data)] + return true; + } + } + break; + case 2: +#if !defined(C_UNALIGNED_MEMORY) + if ((data & 1) == 0) +#endif + { + if ((data >= addr_data) && (data < addr_data + 64) && (((data - addr_data) & 1) == 0)) { + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( STRH_IMM(src_reg, templo2, data - addr_data) ); // strh src_reg, [templo2, #(data - addr_data)] + return true; + } + } + break; + case 1: + if ((data >= addr_data) && (data < addr_data + 32)) { + cache_addw( MOV_LO_HI(templo2, addr_reg) ); // mov templo2, addr_reg + cache_addw( STRB_IMM(src_reg, templo2, data - addr_data) ); // strb src_reg, [templo2, #(data - addr_data)] + return true; + } + default: + break; + } + return false; +} + +// helper function +static bool gen_mov_memval_from_reg(HostReg src_reg, void *dest, Bitu size) { + if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_REGS_ADDR, (Bit32u)&cpu_regs)) return true; + if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, readdata_addr, (Bit32u)&core_dynrec.readdata)) return true; + if (gen_mov_memval_from_reg_helper(src_reg, (Bit32u)dest, size, FC_SEGS_ADDR, (Bit32u)&Segs)) return true; + return false; +} + // helper function for gen_mov_word_from_reg static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) { // alignment.... if (dword) { +#if !defined(C_UNALIGNED_MEMORY) if ((Bit32u)dest & 3) { if ( ((Bit32u)dest & 3) == 2 ) { cache_addw( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg] @@ -268,16 +415,21 @@ static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, cache_addw( LSR_IMM(templo1, templo1, 24) ); // lsr templo1, templo1, #24 cache_addw( STRB_IMM(templo1, data_reg, 3) ); // strb templo1, [data_reg, #3] } - } else { + } else +#endif + { cache_addw( STR_IMM(src_reg, data_reg, 0) ); // str src_reg, [data_reg] } } else { +#if !defined(C_UNALIGNED_MEMORY) if ((Bit32u)dest & 1) { cache_addw( STRB_IMM(src_reg, data_reg, 0) ); // strb src_reg, [data_reg] cache_addw( MOV_REG(templo1, src_reg) ); // mov templo1, src_reg cache_addw( LSR_IMM(templo1, templo1, 8) ); // lsr templo1, templo1, #8 cache_addw( STRB_IMM(templo1, data_reg, 1) ); // strb templo1, [data_reg, #1] - } else { + } else +#endif + { cache_addw( STRH_IMM(src_reg, data_reg, 0) ); // strh src_reg, [data_reg] } } @@ -285,8 +437,10 @@ static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, // move 32bit (dword==true) or 16bit (dword==false) of a register into memory static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2); + if (!gen_mov_memval_from_reg(src_reg, dest, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2); + } } // move an 8bit value from memory into dest_reg @@ -294,8 +448,10 @@ static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { // this function does not use FC_OP1/FC_OP2 as dest_reg as these // registers might not be directly byte-accessible on some architectures static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) { - gen_mov_dword_to_reg_imm(templo1, (Bit32u)data); - cache_addw( LDRB_IMM(dest_reg, templo1, 0) ); // ldrb dest_reg, [templo1] + if (!gen_mov_memval_to_reg(dest_reg, data, 1)) { + gen_mov_dword_to_reg_imm(templo1, (Bit32u)data); + cache_addw( LDRB_IMM(dest_reg, templo1, 0) ); // ldrb dest_reg, [templo1] + } } // move an 8bit value from memory into dest_reg @@ -324,8 +480,10 @@ static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u // move the lowest 8bit of a register into memory static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) { - gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest); - cache_addw( STRB_IMM(src_reg, templo1, 0) ); // strb src_reg, [templo1] + if (!gen_mov_memval_from_reg(src_reg, dest, 1)) { + gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest); + cache_addw( STRB_IMM(src_reg, templo1, 0) ); // strb src_reg, [templo1] + } } @@ -362,16 +520,51 @@ static void gen_add(HostReg reg,void* op) { // add a 32bit constant value to a full register static void gen_add_imm(HostReg reg,Bit32u imm) { + Bit32u imm2, scale; + if(!imm) return; - gen_mov_dword_to_reg_imm(templo1, imm); - cache_addw( ADD_REG(reg, reg, templo1) ); // add reg, reg, templo1 + + imm2 = (Bit32u) (-((Bit32s)imm)); + + if (imm <= 255) { + cache_addw( ADD_IMM8(reg, imm) ); // add reg, #imm + } else if (imm2 <= 255) { + cache_addw( SUB_IMM8(reg, imm2) ); // sub reg, #(-imm) + } else { + if (val_single_shift(imm2, &scale)) { + cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale) + if (scale) { + cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale + } + cache_addw( SUB_REG(reg, reg, templo1) ); // sub reg, reg, templo1 + } else { + gen_mov_dword_to_reg_imm(templo1, imm); + cache_addw( ADD_REG(reg, reg, templo1) ); // add reg, reg, templo1 + } + } } // and a 32bit constant value with a full register static void gen_and_imm(HostReg reg,Bit32u imm) { - if(imm == 0xffffffff) return; - gen_mov_dword_to_reg_imm(templo1, imm); - cache_addw( AND(reg, templo1) ); // and reg, templo1 + Bit32u imm2, scale; + + imm2 = ~imm; + if(!imm2) return; + + if (!imm) { + cache_addw( MOV_IMM(reg, 0) ); // mov reg, #0 + } else { + if (val_single_shift(imm2, &scale)) { + cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale) + if (scale) { + cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale + } + cache_addw( BIC(reg, templo1) ); // bic reg, templo1 + } else { + gen_mov_dword_to_reg_imm(templo1, imm); + cache_addw( AND(reg, templo1) ); // and reg, templo1 + } + } } @@ -386,66 +579,65 @@ static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) { gen_mov_direct_dword(dest,(Bit32u)imm); } -// add an 8bit constant value to a dword memory value -static void gen_add_direct_byte(void* dest,Bit8s imm) { - if(!imm) return; - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templo3, dest, 1, templo2); - if (imm >= 0) { - cache_addw( ADD_IMM8(templo3, (Bit32s)imm) ); // add templo3, #(imm) - } else { - cache_addw( SUB_IMM8(templo3, -((Bit32s)imm)) ); // sub templo3, #(-imm) - } - gen_mov_word_from_reg_helper(templo3, dest, 1, templo2); -} - // add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) { + if (!dword) imm &= 0xffff; if(!imm) return; - if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { - gen_add_direct_byte(dest,(Bit8s)imm); - return; + + if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); } - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); - if (dword) { - gen_mov_dword_to_reg_imm(templo1, imm); - } else { - gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); + gen_add_imm(templo3, imm); + if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) { + gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); } - cache_addw( ADD_REG(templo3, templo3, templo1) ); // add templo3, templo3, templo1 - gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); } -// subtract an 8bit constant value from a dword memory value -static void gen_sub_direct_byte(void* dest,Bit8s imm) { - if(!imm) return; - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templo3, dest, 1, templo2); - if (imm >= 0) { - cache_addw( SUB_IMM8(templo3, (Bit32s)imm) ); // sub templo3, #(imm) - } else { - cache_addw( ADD_IMM8(templo3, -((Bit32s)imm)) ); // add templo3, #(-imm) - } - gen_mov_word_from_reg_helper(templo3, dest, 1, templo2); +// add an 8bit constant value to a dword memory value +static void gen_add_direct_byte(void* dest,Bit8s imm) { + gen_add_direct_word(dest, (Bit32s)imm, 1); } // subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { + Bit32u imm2, scale; + + if (!dword) imm &= 0xffff; if(!imm) return; - if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { - gen_sub_direct_byte(dest,(Bit8s)imm); - return; + + if (!gen_mov_memval_to_reg(templo3, dest, (dword)?4:2)) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); } - gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); - gen_mov_word_to_reg_helper(templo3, dest, dword, templo2); - if (dword) { - gen_mov_dword_to_reg_imm(templo1, imm); + + imm2 = (Bit32u) (-((Bit32s)imm)); + + if (imm <= 255) { + cache_addw( SUB_IMM8(templo3, imm) ); // sub templo3, #imm + } else if (imm2 <= 255) { + cache_addw( ADD_IMM8(templo3, imm2) ); // add templo3, #(-imm) } else { - gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); + if (val_single_shift(imm2, &scale)) { + cache_addw( MOV_IMM(templo1, imm2 >> scale) ); // mov templo1, #(~imm >> scale) + if (scale) { + cache_addw( LSL_IMM(templo1, templo1, scale) ); // lsl templo1, templo1, #scale + } + cache_addw( ADD_REG(templo3, templo3, templo1) ); // add templo3, templo3, templo1 + } else { + gen_mov_dword_to_reg_imm(templo1, imm); + cache_addw( SUB_REG(templo3, templo3, templo1) ); // sub templo3, templo3, templo1 + } } - cache_addw( SUB_REG(templo3, templo3, templo1) ); // sub templo3, templo3, templo1 - gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); + + if (!gen_mov_memval_from_reg(templo3, dest, (dword)?4:2)) { + gen_mov_word_from_reg_helper(templo3, dest, dword, templo2); + } +} + +// subtract an 8bit constant value from a dword memory value +static void gen_sub_direct_byte(void* dest,Bit8s imm) { + gen_sub_direct_word(dest, (Bit32s)imm, 1); } // effective address calculation, destination is dest_reg @@ -491,7 +683,7 @@ static void INLINE gen_call_function_raw(void * func) { // switch from arm to thumb state cache_addd(0xe2800000 + (templo1 << 12) + (HOST_pc << 16) + (1)); // add templo1, pc, #1 cache_addd(0xe12fff10 + (templo1)); // bx templo1 - + // thumb state from now on } @@ -537,18 +729,20 @@ static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { static void gen_jmp_ptr(void * ptr,Bits imm=0) { gen_mov_word_to_reg(templo3, ptr, 1); - if (imm) { - gen_mov_dword_to_reg_imm(templo2, imm); - cache_addw( ADD_REG(templo3, templo3, templo2) ); // add templo3, templo3, templo2 - } - -#if (1) -// (*ptr) should be word aligned +#if !defined(C_UNALIGNED_MEMORY) +// (*ptr) should be word aligned if ((imm & 0x03) == 0) { - cache_addw( LDR_IMM(templo2, templo3, 0) ); // ldr templo2, [templo3] - } else #endif - { + if ((imm >= 0) && (imm < 128) && ((imm & 3) == 0)) { + cache_addw( LDR_IMM(templo2, templo3, imm) ); // ldr templo2, [templo3, #imm] + } else { + gen_mov_dword_to_reg_imm(templo2, imm); + cache_addw( LDR_REG(templo2, templo3, templo2) ); // ldr templo2, [templo3, templo2] + } +#if !defined(C_UNALIGNED_MEMORY) + } else { + gen_add_imm(templo3, imm); + cache_addw( LDRB_IMM(templo2, templo3, 0) ); // ldrb templo2, [templo3] cache_addw( LDRB_IMM(templo1, templo3, 1) ); // ldrb templo1, [templo3, #1] cache_addw( LSL_IMM(templo1, templo1, 8) ); // lsl templo1, templo1, #8 @@ -560,6 +754,7 @@ static void gen_jmp_ptr(void * ptr,Bits imm=0) { cache_addw( LSL_IMM(templo1, templo1, 24) ); // lsl templo1, templo1, #24 cache_addw( ORR(templo2, templo1) ); // orr templo2, templo1 } +#endif // increase jmp address to keep thumb state cache_addw( ADD_IMM3(templo2, templo2, 1) ); // add templo2, templo2, #1 @@ -650,50 +845,53 @@ static void INLINE gen_fill_branch_long(Bit32u data) { } static void gen_run_code(void) { - // switch from arm to thumb state - cache_addd(0xe2800000 + (HOST_r3 << 12) + (HOST_pc << 16) + (1)); // add r3, pc, #1 - cache_addd(0xe12fff10 + (HOST_r3)); // bx r3 + Bit8u *pos1, *pos2, *pos3; - // thumb state from now on - cache_addw(0xb500); // push {lr} - cache_addw( MOV_LO_HI(HOST_r3, FC_SEGS_ADDR) ); // mov r3, FC_SEGS_ADDR - cache_addw( MOV_LO_HI(HOST_r2, FC_REGS_ADDR) ); // mov r2, FC_REGS_ADDR - cache_addw(0xb4fc); // push {r2,r3,v1-v4} +#if (__ARM_EABI__) + // 8-byte stack alignment + cache_addd(0xe92d4ff0); // stmfd sp!, {v1-v8,lr} +#else + cache_addd(0xe92d4df0); // stmfd sp!, {v1-v5,v7,v8,lr} +#endif - // adr: 16 - cache_addw( LDR_PC_IMM(HOST_r3, 64 - (16 + 4)) ); // ldr r3, [pc, #(&Segs)] - // adr: 18 - cache_addw( LDR_PC_IMM(HOST_r2, 68 - (18 + 2)) ); // ldr r2, [pc, #(&cpu_regs)] - cache_addw( MOV_HI_LO(FC_SEGS_ADDR, HOST_r3) ); // mov FC_SEGS_ADDR, r3 - cache_addw( MOV_HI_LO(FC_REGS_ADDR, HOST_r2) ); // mov FC_REGS_ADDR, r2 + cache_addd( ARM_ADD_IMM(HOST_r0, HOST_r0, 1, 0) ); // add r0, r0, #1 - // align 4 - cache_addw( ADD_LO_PC_IMM(HOST_r3, 8) ); // add r3, pc, #8 - cache_addw( ADD_IMM8(HOST_r0, 1) ); // add r0, #1 - cache_addw( ADD_IMM8(HOST_r3, 1) ); // add r3, #1 - cache_addw(0xb408); // push {r3} - cache_addw( BX(HOST_r0) ); // bx r0 - cache_addw( NOP ); // nop + pos1 = cache.pos; + cache_addd( 0 ); + pos2 = cache.pos; + cache_addd( 0 ); + pos3 = cache.pos; + cache_addd( 0 ); - // align 4 - cache_addw(0xbcfc); // pop {r2,r3,v1-v4} - cache_addw( MOV_HI_LO(FC_SEGS_ADDR, HOST_r3) ); // mov FC_SEGS_ADDR, r3 - cache_addw( MOV_HI_LO(FC_REGS_ADDR, HOST_r2) ); // mov FC_REGS_ADDR, r2 + cache_addd( ARM_ADD_IMM(HOST_lr, HOST_pc, 4, 0) ); // add lr, pc, #4 + cache_addd( ARM_STR_IMM_M_W(HOST_lr, HOST_sp, 4) ); // str lr, [sp, #-4]! + cache_addd( ARM_BX(HOST_r0) ); // bx r0 - cache_addw(0xbc08); // pop {r3} - cache_addw( BX(HOST_r3) ); // bx r3 +#if (__ARM_EABI__) + cache_addd(0xe8bd4ff0); // ldmfd sp!, {v1-v8,lr} +#else + cache_addd(0xe8bd4df0); // ldmfd sp!, {v1-v5,v7,v8,lr} +#endif + cache_addd( ARM_BX(HOST_lr) ); // bx lr - // fill up to 64 bytes - cache_addw( NOP ); // nop - cache_addd( NOP | (NOP << 16) ); // nop, nop - cache_addd( NOP | (NOP << 16) ); // nop, nop - cache_addd( NOP | (NOP << 16) ); // nop, nop - cache_addd( NOP | (NOP << 16) ); // nop, nop + // align cache.pos to 32 bytes + if ((((Bitu)cache.pos) & 0x1f) != 0) { + cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f)); + } - // adr: 64 + *(Bit32u*)pos1 = ARM_LDR_IMM(FC_SEGS_ADDR, HOST_pc, cache.pos - (pos1 + 8)); // ldr FC_SEGS_ADDR, [pc, #(&Segs)] cache_addd((Bit32u)&Segs); // address of "Segs" - // adr: 68 + + *(Bit32u*)pos2 = ARM_LDR_IMM(FC_REGS_ADDR, HOST_pc, cache.pos - (pos2 + 8)); // ldr FC_REGS_ADDR, [pc, #(&cpu_regs)] cache_addd((Bit32u)&cpu_regs); // address of "cpu_regs" + + *(Bit32u*)pos3 = ARM_LDR_IMM(readdata_addr, HOST_pc, cache.pos - (pos3 + 8)); // ldr readdata_addr, [pc, #(&core_dynrec.readdata)] + cache_addd((Bit32u)&core_dynrec.readdata); // address of "core_dynrec.readdata" + + // align cache.pos to 32 bytes + if ((((Bitu)cache.pos) & 0x1f) != 0) { + cache.pos = cache.pos + (32 - (((Bitu)cache.pos) & 0x1f)); + } } // return from a function @@ -1023,7 +1221,11 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { } #endif -static void cache_block_before_close(void) { } +static void cache_block_before_close(void) { + if ((((Bit32u)cache.pos) & 3) != 0) { + cache_addw( NOP ); // nop + } +} #ifdef DRC_USE_SEGS_ADDR diff --git a/src/cpu/core_dynrec/risc_armv4le.h b/src/cpu/core_dynrec/risc_armv4le.h index a4d74d42..7a38d8f2 100644 --- a/src/cpu/core_dynrec/risc_armv4le.h +++ b/src/cpu/core_dynrec/risc_armv4le.h @@ -18,13 +18,19 @@ -/* ARMv4 (little endian) backend (switcher) by M-HT */ +/* ARMv4/ARMv7 (little endian) backend (switcher) by M-HT */ #include "risc_armv4le-common.h" // choose your destiny: -#include "risc_armv4le-thumb-niw.h" -//#include "risc_armv4le-thumb-iw.h" -//#include "risc_armv4le-thumb.h" -//#include "risc_armv4le-s3.h" -//#include "risc_armv4le-o3.h" +#if C_TARGETCPU == ARMV7LE + #include "risc_armv4le-o3.h" +#else + #if defined(__THUMB_INTERWORK__) + #include "risc_armv4le-thumb-iw.h" + #else + #include "risc_armv4le-o3.h" +// #include "risc_armv4le-thumb-niw.h" +// #include "risc_armv4le-thumb.h" + #endif +#endif