From 021c76f6d9417f9e20a2699db1f8210e6ca53e65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Strohh=C3=A4cker?= Date: Wed, 20 Aug 2008 14:13:21 +0000 Subject: [PATCH] add arm4 dynamic recompiler backend (M-HT) Imported-from: https://svn.code.sf.net/p/dosbox/code-0/dosbox/trunk@3206 --- src/cpu/core_dynrec.cpp | 7 +- src/cpu/core_dynrec/Makefile.am | 4 +- src/cpu/core_dynrec/decoder_basic.h | 5 +- src/cpu/core_dynrec/risc_armv4le-common.h | 97 +++ src/cpu/core_dynrec/risc_armv4le-o3.h | 858 ++++++++++++++++++++ src/cpu/core_dynrec/risc_armv4le-s3.h | 694 ++++++++++++++++ src/cpu/core_dynrec/risc_armv4le-thumb.h | 934 ++++++++++++++++++++++ src/cpu/core_dynrec/risc_armv4le.h | 29 + src/cpu/core_dynrec/risc_mipsel32.h | 16 +- src/cpu/core_dynrec/risc_x64.h | 6 +- src/cpu/core_dynrec/risc_x86.h | 6 +- 11 files changed, 2649 insertions(+), 7 deletions(-) create mode 100644 src/cpu/core_dynrec/risc_armv4le-common.h create mode 100644 src/cpu/core_dynrec/risc_armv4le-o3.h create mode 100644 src/cpu/core_dynrec/risc_armv4le-s3.h create mode 100644 src/cpu/core_dynrec/risc_armv4le-thumb.h create mode 100644 src/cpu/core_dynrec/risc_armv4le.h diff --git a/src/cpu/core_dynrec.cpp b/src/cpu/core_dynrec.cpp index cca2c66e..787bc908 100644 --- a/src/cpu/core_dynrec.cpp +++ b/src/cpu/core_dynrec.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002-2007 The DOSBox Team + * Copyright (C) 2002-2008 The DOSBox Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,7 +16,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* $Id: core_dynrec.cpp,v 1.9 2007-11-24 17:26:48 c2woody Exp $ */ +/* $Id: core_dynrec.cpp,v 1.10 2008-08-20 14:13:21 c2woody Exp $ */ #include "dosbox.h" @@ -138,6 +138,7 @@ static struct { #define X86 0x01 #define X86_64 0x02 #define MIPSEL 0x03 +#define ARMV4LE 0x04 #if C_TARGETCPU == X86_64 #include "core_dynrec/risc_x64.h" @@ -145,6 +146,8 @@ static struct { #include "core_dynrec/risc_x86.h" #elif C_TARGETCPU == MIPSEL #include "core_dynrec/risc_mipsel32.h" +#elif C_TARGETCPU == ARMV4LE +#include "core_dynrec/risc_armv4le.h" #endif #include "core_dynrec/decoder.h" diff --git a/src/cpu/core_dynrec/Makefile.am b/src/cpu/core_dynrec/Makefile.am index 50c57a7a..ca58ca90 100644 --- a/src/cpu/core_dynrec/Makefile.am +++ b/src/cpu/core_dynrec/Makefile.am @@ -1,2 +1,4 @@ noinst_HEADERS = cache.h decoder.h decoder_basic.h decoder_opcodes.h \ - dyn_fpu.h operators.h risc_x64.h risc_x86.h risc_mipsel32.h \ No newline at end of file + dyn_fpu.h operators.h risc_x64.h risc_x86.h risc_mipsel32.h \ + risc_armv4le.h risc_armv4le-common.h \ + risc_armv4le-s3.h risc_armv4le-o3.h risc_armv4le-thumb.h diff --git a/src/cpu/core_dynrec/decoder_basic.h b/src/cpu/core_dynrec/decoder_basic.h index c3d95773..4b4f7321 100644 --- a/src/cpu/core_dynrec/decoder_basic.h +++ b/src/cpu/core_dynrec/decoder_basic.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002-2007 The DOSBox Team + * Copyright (C) 2002-2008 The DOSBox Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,6 +16,8 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/* $Id: decoder_basic.h,v 1.11 2008-08-20 14:13:21 c2woody Exp $ */ + /* This file provides some definitions and basic level functions @@ -554,6 +556,7 @@ static void dyn_closeblock(void) { //Shouldn't create empty block normally but let's do it like this dyn_fill_blocks(); cache_closeblock(); + cache_block_closing(decode.block->cache.start,decode.block->cache.size); } diff --git a/src/cpu/core_dynrec/risc_armv4le-common.h b/src/cpu/core_dynrec/risc_armv4le-common.h new file mode 100644 index 00000000..599f65ac --- /dev/null +++ b/src/cpu/core_dynrec/risc_armv4le-common.h @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2002-2008 The DOSBox Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* $Id: risc_armv4le-common.h,v 1.1 2008-08-20 14:13:21 c2woody Exp $ */ + + +/* ARMv4 (little endian) backend by M-HT (common data/functions) */ + + +// some configuring defines that specify the capabilities of this architecture +// or aspects of the recompiling + +// protect FC_ADDR over function calls if necessaray +// #define DRC_PROTECT_ADDR_REG + +// try to use non-flags generating functions if possible +#define DRC_FLAGS_INVALIDATION +// try to replace _simple functions by code +#define DRC_FLAGS_INVALIDATION_DCODE + +// type with the same size as a pointer +#define DRC_PTR_SIZE_IM Bit32u + +// calling convention modifier +#define DRC_CALL_CONV /* nothing */ +#define DRC_FC /* nothing */ + +// register mapping +typedef Bit8u HostReg; + +// "lo" registers +#define HOST_r0 0 +#define HOST_r1 1 +#define HOST_r2 2 +#define HOST_r3 3 +#define HOST_r4 4 +#define HOST_r5 5 +#define HOST_r6 6 +#define HOST_r7 7 +// "hi" registers +#define HOST_r8 8 +#define HOST_r9 9 +#define HOST_r10 10 +#define HOST_r11 11 +#define HOST_r12 12 +#define HOST_r13 13 +#define HOST_r14 14 +#define HOST_r15 15 + +// register aliases +// "lo" registers +#define HOST_a1 HOST_r0 +#define HOST_a2 HOST_r1 +#define HOST_a3 HOST_r2 +#define HOST_a4 HOST_r3 +#define HOST_v1 HOST_r4 +#define HOST_v2 HOST_r5 +#define HOST_v3 HOST_r6 +#define HOST_v4 HOST_r7 +// "hi" registers +#define HOST_v5 HOST_r8 +#define HOST_v6 HOST_r9 +#define HOST_v7 HOST_r10 +#define HOST_v8 HOST_r11 +#define HOST_ip HOST_r12 +#define HOST_sp HOST_r13 +#define HOST_lr HOST_r14 +#define HOST_pc HOST_r15 + + +static void cache_block_closing(Bit8u* block_start,Bitu block_size) { +// GP2X BEGIN + //flush cache + register unsigned long _beg __asm ("a1") = (unsigned long)(block_start); // block start + register unsigned long _end __asm ("a2") = (unsigned long)(block_start+block_size); // block end + register unsigned long _flg __asm ("a3") = 0; + __asm __volatile ("swi 0x9f0002 @ sys_cacheflush" + : // no outputs + : // no inputs + : "a1"); +// GP2X END +} diff --git a/src/cpu/core_dynrec/risc_armv4le-o3.h b/src/cpu/core_dynrec/risc_armv4le-o3.h new file mode 100644 index 00000000..d639d297 --- /dev/null +++ b/src/cpu/core_dynrec/risc_armv4le-o3.h @@ -0,0 +1,858 @@ +/* + * Copyright (C) 2002-2008 The DOSBox Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* $Id: risc_armv4le-o3.h,v 1.1 2008-08-20 14:13:21 c2woody Exp $ */ + + +/* ARMv4 (little endian) backend by M-HT (size-tweaked arm version) */ + + +// temporary registers +#define temp1 HOST_ip +#define temp2 HOST_v5 +#define temp3 HOST_v4 + +// register that holds function return values +#define FC_RETOP HOST_v3 + +// register used for address calculations, +#define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG + +// register that holds the first parameter +#define FC_OP1 HOST_a1 + +// register that holds the second parameter +#define FC_OP2 HOST_a2 + +// register that holds byte-accessible temporary values +#define FC_TMP_BA1 HOST_a1 + +// register that holds byte-accessible temporary values +#define FC_TMP_BA2 HOST_a2 + +// temporary register for LEA +#define TEMP_REG_DRC HOST_v2 + +// helper macro +#define ROTATE_SCALE(x) ( (x)?(32 - x):(0) ) + +// move a full register from reg_src to reg_dst +static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) { + if(reg_src == reg_dst) return; + cache_addd(0xe1a00000 + (reg_dst << 12) + reg_src); // mov reg_dst, reg_src +} + +// helper function +static Bits get_imm_gen_len(Bit32u imm) { + Bits ret; + if (imm == 0) { + return 1; + } else { + ret = 0; + while (imm) { + while ((imm & 3) == 0) { + imm>>=2; + } + ret++; + imm>>=8; + } + return ret; + } +} + +// helper function +static Bits get_method_imm_gen_len(Bit32u imm, Bits preffer00, Bits *num) { + Bits num00, num15, numadd, numsub, numret, ret; + num00 = get_imm_gen_len(imm); + num15 = get_imm_gen_len(~imm); + numadd = get_imm_gen_len(imm - ((Bit32u)cache.pos+8)); + numsub = get_imm_gen_len(((Bit32u)cache.pos+8) - imm); + if (numsub < numadd && numsub < num00 && numsub < num15) { + ret = 0; + numret = numsub; + } else if (numadd < num00 && numadd < num15) { + ret = 1; + numret = numadd; + } else if (num00 < num15 || (num00 == num15 && preffer00)) { + ret = 2; + numret = num00; + } else { + ret = 3; + numret = num15; + } + if (num != NULL) *num = numret; + return ret; +} + +// move a 32bit constant value into dest_reg +static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { + Bits first, method, scale; + Bit32u imm2, dist; + if (imm == 0) { + cache_addd(0xe3a00000 + (dest_reg << 12)); // mov dest_reg, #0 + } else if (imm == 0xffffffff) { + cache_addd(0xe3e00000 + (dest_reg << 12)); // mvn dest_reg, #0 + } else { + method = get_method_imm_gen_len(imm, 1, NULL); + + scale = 0; + first = 1; + if (method == 0) { + dist = ((Bit32u)cache.pos+8) - imm; + while (dist) { + while ((dist & 3) == 0) { + dist>>=2; + scale+=2; + } + if (first) { + cache_addd(0xe2400000 + (dest_reg << 12) + (HOST_pc << 16) + (ROTATE_SCALE(scale) << 7) + (dist & 0xff)); // sub dest_reg, pc, #((dist & 0xff) << scale) + first = 0; + } else { + cache_addd(0xe2400000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (dist & 0xff)); // sub dest_reg, dest_reg, #((dist & 0xff) << scale) + } + dist>>=8; + scale+=8; + } + } else if (method == 1) { + dist = imm - ((Bit32u)cache.pos+8); + if (dist == 0) { + cache_addd(0xe1a00000 + (dest_reg << 12) + HOST_pc); // mov dest_reg, pc + } else { + while (dist) { + while ((dist & 3) == 0) { + dist>>=2; + scale+=2; + } + if (first) { + cache_addd(0xe2800000 + (dest_reg << 12) + (HOST_pc << 16) + (ROTATE_SCALE(scale) << 7) + (dist & 0xff)); // add dest_reg, pc, #((dist & 0xff) << scale) + first = 0; + } else { + cache_addd(0xe2800000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (dist & 0xff)); // add dest_reg, dest_reg, #((dist & 0xff) << scale) + } + dist>>=8; + scale+=8; + } + } + } else if (method == 2) { + while (imm) { + while ((imm & 3) == 0) { + imm>>=2; + scale+=2; + } + if (first) { + cache_addd(0xe3a00000 + (dest_reg << 12) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // mov dest_reg, #((imm & 0xff) << scale) + first = 0; + } else { + cache_addd(0xe3800000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // orr dest_reg, dest_reg, #((imm & 0xff) << scale) + } + imm>>=8; + scale+=8; + } + } else { + imm2 = ~imm; + while (imm2) { + while ((imm2 & 3) == 0) { + imm2>>=2; + scale+=2; + } + if (first) { + cache_addd(0xe3e00000 + (dest_reg << 12) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // mvn dest_reg, #((imm2 & 0xff) << scale) + first = 0; + } else { + cache_addd(0xe3c00000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // bic dest_reg, dest_reg, #((imm2 & 0xff) << scale) + } + imm2>>=8; + scale+=8; + } + } + } +} + +// helper function for gen_mov_word_to_reg +static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) { + // alignment.... + if (dword) { + if ((Bit32u)data & 3) { + if ( ((Bit32u)data & 3) == 2 ) { + cache_addd(0xe1d000b0 + (dest_reg << 12) + (data_reg << 16)); // ldrh dest_reg, [data_reg] + cache_addd(0xe1d000b2 + (temp2 << 12) + (data_reg << 16)); // ldrh temp2, [data_reg, #2] + cache_addd(0xe1800800 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #16 + } else { + cache_addd(0xe5d00000 + (dest_reg << 12) + (data_reg << 16)); // ldrb dest_reg, [data_reg] + cache_addd(0xe1d000b1 + (temp2 << 12) + (data_reg << 16)); // ldrh temp2, [data_reg, #1] + cache_addd(0xe1800400 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #8 + cache_addd(0xe5d00003 + (temp2 << 12) + (data_reg << 16)); // ldrb temp2, [data_reg, #3] + cache_addd(0xe1800c00 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #24 + } + } else { + cache_addd(0xe5900000 + (dest_reg << 12) + (data_reg << 16)); // ldr dest_reg, [data_reg] + } + } else { + if ((Bit32u)data & 1) { + cache_addd(0xe5d00000 + (dest_reg << 12) + (data_reg << 16)); // ldrb dest_reg, [data_reg] + cache_addd(0xe5d00001 + (temp2 << 12) + (data_reg << 16)); // ldrb temp2, [data_reg, #1] + cache_addd(0xe1800400 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #8 + } else { + cache_addd(0xe1d000b0 + (dest_reg << 12) + (data_reg << 16)); // ldrh dest_reg, [data_reg] + } + } +} + +// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg +// 16bit moves may destroy the upper 16bit of the destination register +static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)data); + gen_mov_word_to_reg_helper(dest_reg, data, dword, temp1); +} + +// move a 16bit constant value into dest_reg +// the upper 16bit of the destination register may be destroyed +static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) { + Bits first, scale; + Bit32u imm2; + if (imm == 0) { + cache_addd(0xe3a00000 + (dest_reg << 12)); // mov dest_reg, #0 + } else { + scale = 0; + first = 1; + imm2 = (Bit32u)imm; + while (imm2) { + while ((imm2 & 3) == 0) { + imm2>>=2; + scale+=2; + } + if (first) { + cache_addd(0xe3a00000 + (dest_reg << 12) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // mov dest_reg, #((imm2 & 0xff) << scale) + first = 0; + } else { + cache_addd(0xe3800000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // orr dest_reg, dest_reg, #((imm2 & 0xff) << scale) + } + imm2>>=8; + scale+=8; + } + } +} + +// helper function for gen_mov_word_from_reg +static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) { + // alignment.... + if (dword) { + if ((Bit32u)dest & 3) { + if ( ((Bit32u)dest & 3) == 2 ) { + cache_addd(0xe1c000b0 + (src_reg << 12) + (data_reg << 16)); // strh src_reg, [data_reg] + cache_addd(0xe1a00820 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #16 + cache_addd(0xe1c000b2 + (temp2 << 12) + (data_reg << 16)); // strh temp2, [data_reg, #2] + } else { + cache_addd(0xe5c00000 + (src_reg << 12) + (data_reg << 16)); // strb src_reg, [data_reg] + cache_addd(0xe1a00420 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #8 + cache_addd(0xe1c000b1 + (temp2 << 12) + (data_reg << 16)); // strh temp2, [data_reg, #1] + cache_addd(0xe1a00820 + (temp2 << 12) + (temp2)); // mov temp2, temp2, lsr #16 + cache_addd(0xe5c00003 + (temp2 << 12) + (data_reg << 16)); // strb temp2, [data_reg, #3] + } + } else { + cache_addd(0xe5800000 + (src_reg << 12) + (data_reg << 16)); // str src_reg, [data_reg] + } + } else { + if ((Bit32u)dest & 1) { + cache_addd(0xe5c00000 + (src_reg << 12) + (data_reg << 16)); // strb src_reg, [data_reg] + cache_addd(0xe1a00420 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #8 + cache_addd(0xe5c00001 + (temp2 << 12) + (data_reg << 16)); // strb temp2, [data_reg, #1] + } else { + cache_addd(0xe1c000b0 + (src_reg << 12) + (data_reg << 16)); // strh src_reg, [data_reg] + } + } +} + +// move 32bit (dword==true) or 16bit (dword==false) of a register into memory +static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_from_reg_helper(src_reg, dest, dword, temp1); +} + +// move an 8bit value from memory into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function does not use FC_OP1/FC_OP2 as dest_reg as these +// registers might not be directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)data); + cache_addd(0xe5d00000 + (dest_reg << 12) + (temp1 << 16)); // ldrb dest_reg, [temp1] +} + +// move an 8bit value from memory into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function can use FC_OP1/FC_OP2 as dest_reg which are +// not directly byte-accessible on some architectures +static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) { + gen_mov_byte_to_reg_low(dest_reg, data); +} + +// move an 8bit constant value into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function does not use FC_OP1/FC_OP2 as dest_reg as these +// registers might not be directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) { + cache_addd(0xe3a00000 + (dest_reg << 12) + (imm)); // mov dest_reg, #(imm) +} + +// move an 8bit constant value into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function can use FC_OP1/FC_OP2 as dest_reg which are +// not directly byte-accessible on some architectures +static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) { + gen_mov_byte_to_reg_low_imm(dest_reg, imm); +} + +// move the lowest 8bit of a register into memory +static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + cache_addd(0xe5c00000 + (src_reg << 12) + (temp1 << 16)); // strb src_reg, [temp1] +} + + + +// convert an 8bit word to a 32bit dword +// the register is zero-extended (sign==false) or sign-extended (sign==true) +static void gen_extend_byte(bool sign,HostReg reg) { + if (sign) { + cache_addd(0xe1a00c00 + (reg << 12) + (reg)); // mov reg, reg, lsl #24 + cache_addd(0xe1a00c40 + (reg << 12) + (reg)); // mov reg, reg, asr #24 + } else { + cache_addd(0xe20000ff + (reg << 12) + (reg << 16)); // and reg, reg, #0xff + } +} + +// convert a 16bit word to a 32bit dword +// the register is zero-extended (sign==false) or sign-extended (sign==true) +static void gen_extend_word(bool sign,HostReg reg) { + if (sign) { + cache_addd(0xe1a00800 + (reg << 12) + (reg)); // mov reg, reg, lsl #16 + cache_addd(0xe1a00840 + (reg << 12) + (reg)); // mov reg, reg, asr #16 + } else { + cache_addd(0xe1a00800 + (reg << 12) + (reg)); // mov reg, reg, lsl #16 + cache_addd(0xe1a00820 + (reg << 12) + (reg)); // mov reg, reg, lsr #16 + } +} + +// add a 32bit value from memory to a full register +static void gen_add(HostReg reg,void* op) { + gen_mov_word_to_reg(temp3, op, 1); + cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp3)); // add reg, reg, temp3 +} + +// add a 32bit constant value to a full register +static void gen_add_imm(HostReg reg,Bit32u imm) { + Bits method1, method2, num1, num2, scale, sub; + if(!imm) return; + if (imm == 1) { + cache_addd(0xe2800001 + (reg << 12) + (reg << 16)); // add reg, reg, #1 + } else if (imm == 0xffffffff) { + cache_addd(0xe2400001 + (reg << 12) + (reg << 16)); // sub reg, reg, #1 + } else { + method1 = get_method_imm_gen_len(imm, 1, &num1); + method2 = get_method_imm_gen_len(-((Bit32s)imm), 1, &num2); + if (num2 < num1) { + method1 = method2; + imm = (Bit32u)(-((Bit32s)imm)); + sub = 1; + } else sub = 0; + + if (method1 != 2) { + gen_mov_dword_to_reg_imm(temp3, imm); + if (sub) { + cache_addd(0xe0400000 + (reg << 12) + (reg << 16) + (temp3)); // sub reg, reg, temp3 + } else { + cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp3)); // add reg, reg, temp3 + } + } else { + scale = 0; + while (imm) { + while ((imm & 3) == 0) { + imm>>=2; + scale+=2; + } + if (sub) { + cache_addd(0xe2400000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // sub reg, reg, #((imm & 0xff) << scale) + } else { + cache_addd(0xe2800000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // add reg, reg, #((imm & 0xff) << scale) + } + imm>>=8; + scale+=8; + } + } + } +} + +// and a 32bit constant value with a full register +static void gen_and_imm(HostReg reg,Bit32u imm) { + Bits method, scale; + Bit32u imm2; + imm2 = ~imm; + if(!imm2) return; + if (!imm) { + cache_addd(0xe3a00000 + (reg << 12)); // mov reg, #0 + } else { + method = get_method_imm_gen_len(imm, 0, NULL); + if (method != 3) { + gen_mov_dword_to_reg_imm(temp3, imm); + cache_addd(0xe0000000 + (reg << 12) + (reg << 16) + (temp3)); // and reg, reg, temp3 + } else { + scale = 0; + while (imm2) { + while ((imm2 & 3) == 0) { + imm2>>=2; + scale+=2; + } + cache_addd(0xe3c00000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // bic reg, reg, #((imm2 & 0xff) << scale) + imm2>>=8; + scale+=8; + } + } + } +} + + +// move a 32bit constant value into memory +static void gen_mov_direct_dword(void* dest,Bit32u imm) { + gen_mov_dword_to_reg_imm(temp3, imm); + gen_mov_word_from_reg(temp3, dest, 1); +} + +// move an address into memory +static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) { + gen_mov_direct_dword(dest,(Bit32u)imm); +} + +// add an 8bit constant value to a dword memory value +static void gen_add_direct_byte(void* dest,Bit8s imm) { + if(!imm) return; + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_to_reg_helper(temp3, dest, 1, temp1); + if (imm >= 0) { + cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + ((Bit32s)imm)); // add temp3, temp3, #(imm) + } else { + cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + (-((Bit32s)imm))); // sub temp3, temp3, #(-imm) + } + gen_mov_word_from_reg_helper(temp3, dest, 1, temp1); +} + +// add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value +static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) { + if(!imm) return; + if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { + gen_add_direct_byte(dest,(Bit8s)imm); + return; + } + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_to_reg_helper(temp3, dest, dword, temp1); + // maybe use function gen_add_imm + if (dword) { + gen_mov_dword_to_reg_imm(temp2, imm); + } else { + gen_mov_word_to_reg_imm(temp2, (Bit16u)imm); + } + cache_addd(0xe0800000 + (temp3 << 12) + (temp3 << 16) + (temp2)); // add temp3, temp3, temp2 + gen_mov_word_from_reg_helper(temp3, dest, dword, temp1); +} + +// subtract an 8bit constant value from a dword memory value +static void gen_sub_direct_byte(void* dest,Bit8s imm) { + if(!imm) return; + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_to_reg_helper(temp3, dest, 1, temp1); + if (imm >= 0) { + cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + ((Bit32s)imm)); // sub temp3, temp3, #(imm) + } else { + cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + (-((Bit32s)imm))); // add temp3, temp3, #(-imm) + } + gen_mov_word_from_reg_helper(temp3, dest, 1, temp1); +} + +// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value +static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { + if(!imm) return; + if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { + gen_sub_direct_byte(dest,(Bit8s)imm); + return; + } + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_to_reg_helper(temp3, dest, dword, temp1); + // maybe use function gen_add_imm/gen_sub_imm + if (dword) { + gen_mov_dword_to_reg_imm(temp2, imm); + } else { + gen_mov_word_to_reg_imm(temp2, (Bit16u)imm); + } + cache_addd(0xe0400000 + (temp3 << 12) + (temp3 << 16) + (temp2)); // sub temp3, temp3, temp2 + gen_mov_word_from_reg_helper(temp3, dest, dword, temp1); +} + +// effective address calculation, destination is dest_reg +// scale_reg is scaled by scale (scale_reg*(2^scale)) and +// added to dest_reg, then the immediate value is added +static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) { + cache_addd(0xe0800000 + (dest_reg << 12) + (dest_reg << 16) + (scale_reg) + (scale << 7)); // add dest_reg, dest_reg, scale_reg, lsl #(scale) + gen_add_imm(dest_reg, imm); +} + +// effective address calculation, destination is dest_reg +// dest_reg is scaled by scale (dest_reg*(2^scale)), +// then the immediate value is added +static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) { + if (scale) { + cache_addd(0xe1a00000 + (dest_reg << 12) + (dest_reg) + (scale << 7)); // mov dest_reg, dest_reg, lsl #(scale) + } + gen_add_imm(dest_reg, imm); +} + +// generate a call to a parameterless function +static void INLINE gen_call_function_raw(void * func) { + cache_addd(0xe5900004 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #4] + cache_addd(0xe2800004 + (HOST_lr << 12) + (HOST_pc << 16)); // add lr, pc, #4 + cache_addd(0xe12fff10 + (temp1)); // bx temp1 + cache_addd((Bit32u)func); // .int func + cache_addd(0xe1a00000 + (FC_RETOP << 12) + HOST_a1); // mov FC_RETOP, a1 +} + +// generate a call to a function with paramcount parameters +// note: the parameters are loaded in the architecture specific way +// using the gen_load_param_ functions below +static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) { + Bit32u proc_addr = (Bit32u)cache.pos; + gen_call_function_raw(func); + return proc_addr; +} + +#if (1) +// max of 4 parameters in a1-a4 + +// load an immediate value as param'th function parameter +static void INLINE gen_load_param_imm(Bitu imm,Bitu param) { + gen_mov_dword_to_reg_imm(param, imm); +} + +// load an address as param'th function parameter +static void INLINE gen_load_param_addr(Bitu addr,Bitu param) { + gen_mov_dword_to_reg_imm(param, addr); +} + +// load a host-register as param'th function parameter +static void INLINE gen_load_param_reg(Bitu reg,Bitu param) { + gen_mov_regs(param, reg); +} + +// load a value from memory as param'th function parameter +static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { + gen_mov_word_to_reg(param, (void *)mem, 1); +} +#else + other arm abis +#endif + +// jump to an address pointed at by ptr, offset is in imm +static void gen_jmp_ptr(void * ptr,Bits imm=0) { + Bits num1, num2, scale, sub; + Bitu imm2; + gen_mov_word_to_reg(temp3, ptr, 1); + + if (imm) { + num1 = get_imm_gen_len(imm); + num2 = get_imm_gen_len(-imm); + + if (num2 < num1) { + imm = -imm; + sub = 1; + } else sub = 0; + + scale = 0; + imm2 = (Bitu)imm; + while (imm2) { + while ((imm2 & 3) == 0) { + imm2>>=2; + scale+=2; + } + if (sub) { + cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // sub temp3, temp3, #((imm2 & 0xff) << scale) + } else { + cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // add temp3, temp3, #((imm2 & 0xff) << scale) + } + imm2>>=8; + scale+=8; + } + } + +#if (1) +// (*ptr) should be word aligned + if ((imm & 0x03) == 0) { + cache_addd(0xe5900000 + (temp1 << 12) + (temp3 << 16)); // ldr temp1, [temp3] + } else +#endif + { + cache_addd(0xe5d00000 + (temp1 << 12) + (temp3 << 16)); // ldrb temp1, [temp3] + cache_addd(0xe5d00001 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #1] + cache_addd(0xe1800400 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #8 + cache_addd(0xe5d00002 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #2] + cache_addd(0xe1800800 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #16 + cache_addd(0xe5d00003 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #3] + cache_addd(0xe1800c00 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #24 + } + + cache_addd(0xe12fff10 + (temp1)); // bx temp1 +} + +// short conditional jump (+-127 bytes) if register is zero +// the destination is set by gen_fill_branch() later +static Bit32u INLINE gen_create_branch_on_zero(HostReg reg,bool dword) { + if (dword) { + cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0 + } else { + cache_addd(0xe1b00800 + (temp1 << 12) + (reg)); // movs temp1, reg, lsl #16 + } + cache_addd(0x0a000000); // beq j + return ((Bit32u)cache.pos-4); +} + +// short conditional jump (+-127 bytes) if register is nonzero +// the destination is set by gen_fill_branch() later +static Bit32u INLINE gen_create_branch_on_nonzero(HostReg reg,bool dword) { + if (dword) { + cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0 + } else { + cache_addd(0xe1b00800 + (temp1 << 12) + (reg)); // movs temp1, reg, lsl #16 + } + cache_addd(0x1a000000); // bne j + return ((Bit32u)cache.pos-4); +} + +// calculate relative offset and fill it into the location pointed to by data +static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) { +#if C_DEBUG + Bits len=(Bit32u)cache.pos-(data+8); + if (len<0) len=-len; + if (len>0x02000000) LOG_MSG("Big jump %d",len); +#endif + *(Bit32u*)data=( (*(Bit32u*)data) & 0xff000000 ) | ( ( ((Bit32u)cache.pos - (data+8)) >> 2 ) & 0x00ffffff ); +} + +// conditional jump if register is nonzero +// for isdword==true the 32bit of the register are tested +// for isdword==false the lowest 8bit of the register are tested +static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) { + if (isdword) { + cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0 + } else { + cache_addd(0xe31000ff + (reg << 16)); // tst reg, #0xff + } + cache_addd(0x0a000002); // beq nobranch + cache_addd(0xe5900000 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #0] + cache_addd(0xe12fff10 + (temp1)); // bx temp1 + cache_addd(0); // fill j + // nobranch: + return ((Bit32u)cache.pos-4); +} + +// compare 32bit-register against zero and jump if value less/equal than zero +static Bit32u INLINE gen_create_branch_long_leqzero(HostReg reg) { + cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0 + cache_addd(0xca000002); // bgt nobranch + cache_addd(0xe5900000 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #0] + cache_addd(0xe12fff10 + (temp1)); // bx temp1 + cache_addd(0); // fill j + // nobranch: + return ((Bit32u)cache.pos-4); +} + +// calculate long relative offset and fill it into the location pointed to by data +static void INLINE gen_fill_branch_long(Bit32u data) { + // this is an absolute branch + *(Bit32u*)data=(Bit32u)cache.pos; +} + +static void gen_run_code(void) { + cache_addd(0xe92d4000); // stmfd sp!, {lr} + cache_addd(0xe92d01f0); // stmfd sp!, {v1-v5} + cache_addd(0xe28fe004); // add lr, pc, #4 + cache_addd(0xe92d4000); // stmfd sp!, {lr} + cache_addd(0xe12fff10); // bx r0 + cache_addd(0xe8bd01f0); // ldmfd sp!, {v1-v5} + + cache_addd(0xe8bd4000); // ldmfd sp!, {lr} + cache_addd(0xe12fff1e); // bx lr +} + +// return from a function +static void gen_return_function(void) { + cache_addd(0xe1a00000 + (HOST_a1 << 12) + FC_RETOP); // mov a1, FC_RETOP + cache_addd(0xe8bd4000); // ldmfd sp!, {lr} + cache_addd(0xe12fff1e); // bx lr +} + +#ifdef DRC_FLAGS_INVALIDATION + +// called when a call to a function can be replaced by a +// call to a simpler function +static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { +#ifdef DRC_FLAGS_INVALIDATION_DCODE + // try to avoid function calls but rather directly fill in code + switch (flags_type) { + case t_ADDb: + case t_ADDw: + case t_ADDd: + *(Bit32u*)pos=0xe0800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // add FC_RETOP, a1, a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_ORb: + case t_ORw: + case t_ORd: + *(Bit32u*)pos=0xe1800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // orr FC_RETOP, a1, a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_ANDb: + case t_ANDw: + case t_ANDd: + *(Bit32u*)pos=0xe0000000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // and FC_RETOP, a1, a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_SUBb: + case t_SUBw: + case t_SUBd: + *(Bit32u*)pos=0xe0400000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // sub FC_RETOP, a1, a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_XORb: + case t_XORw: + case t_XORd: + *(Bit32u*)pos=0xe0200000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // eor FC_RETOP, a1, a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_CMPb: + case t_CMPw: + case t_CMPd: + case t_TESTb: + case t_TESTw: + case t_TESTd: + *(Bit32u*)pos=0xea000000 + (3); // b (pc+3*4) + break; + case t_INCb: + case t_INCw: + case t_INCd: + *(Bit32u*)pos=0xe2800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (1); // add FC_RETOP, a1, #1 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_DECb: + case t_DECw: + case t_DECd: + *(Bit32u*)pos=0xe2400000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (1); // sub FC_RETOP, a1, #1 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_SHLb: + case t_SHLw: + case t_SHLd: + *(Bit32u*)pos=0xe1a00010 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, lsl a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_SHRb: + *(Bit32u*)pos=0xe2000000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (0xff); // and FC_RETOP, a1, #0xff + *(Bit32u*)(pos+4)=0xe1a00030 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, lsr a2 + *(Bit32u*)(pos+8)=0xe1a00000; // nop + *(Bit32u*)(pos+12)=0xe1a00000; // nop + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_SHRw: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16 + *(Bit32u*)(pos+4)=0xe1a00020 + (FC_RETOP << 12) + (FC_RETOP) + (16 << 7); // mov FC_RETOP, FC_RETOP, lsr #16 + *(Bit32u*)(pos+8)=0xe1a00030 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, lsr a2 + *(Bit32u*)(pos+12)=0xe1a00000; // nop + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_SHRd: + *(Bit32u*)pos=0xe1a00030 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, lsr a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_SARb: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24 + *(Bit32u*)(pos+4)=0xe1a00040 + (FC_RETOP << 12) + (FC_RETOP) + (24 << 7); // mov FC_RETOP, FC_RETOP, asr #24 + *(Bit32u*)(pos+8)=0xe1a00050 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, asr a2 + *(Bit32u*)(pos+12)=0xe1a00000; // nop + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_SARw: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16 + *(Bit32u*)(pos+4)=0xe1a00040 + (FC_RETOP << 12) + (FC_RETOP) + (16 << 7); // mov FC_RETOP, FC_RETOP, asr #16 + *(Bit32u*)(pos+8)=0xe1a00050 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, asr a2 + *(Bit32u*)(pos+12)=0xe1a00000; // nop + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_SARd: + *(Bit32u*)pos=0xe1a00050 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, asr a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_RORb: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24 + *(Bit32u*)(pos+4)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (8 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8 + *(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 + *(Bit32u*)(pos+12)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2 + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_RORw: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16 + *(Bit32u*)(pos+4)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 + *(Bit32u*)(pos+8)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2 + *(Bit32u*)(pos+12)=0xe1a00000; // nop + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_RORd: + *(Bit32u*)pos=0xe1a00070 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, ror a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_ROLb: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24 + *(Bit32u*)(pos+4)=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32 + *(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (8 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8 + *(Bit32u*)(pos+12)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 + *(Bit32u*)(pos+16)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2 + break; + case t_ROLw: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16 + *(Bit32u*)(pos+4)=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32 + *(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 + *(Bit32u*)(pos+12)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2 + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_ROLd: + *(Bit32u*)pos=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32 + *(Bit32u*)(pos+4)=0xe1a00070 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, ror a2 + *(Bit32u*)(pos+8)=0xe1a00000; // nop + *(Bit32u*)(pos+12)=0xe1a00000; // nop + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_NEGb: + case t_NEGw: + case t_NEGd: + *(Bit32u*)pos=0xe2600000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (0); // rsb FC_RETOP, a1, #0 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + default: + *(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func + break; + + } +#else + *(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func +#endif +} +#endif diff --git a/src/cpu/core_dynrec/risc_armv4le-s3.h b/src/cpu/core_dynrec/risc_armv4le-s3.h new file mode 100644 index 00000000..6388538d --- /dev/null +++ b/src/cpu/core_dynrec/risc_armv4le-s3.h @@ -0,0 +1,694 @@ +/* + * Copyright (C) 2002-2008 The DOSBox Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* $Id: risc_armv4le-s3.h,v 1.1 2008-08-20 14:13:21 c2woody Exp $ */ + + +/* ARMv4 (little endian) backend by M-HT (speed-tweaked arm version) */ + + +// temporary registers +#define temp1 HOST_ip +#define temp2 HOST_v5 +#define temp3 HOST_v4 + +// register that holds function return values +#define FC_RETOP HOST_v3 + +// register used for address calculations, +#define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG + +// register that holds the first parameter +#define FC_OP1 HOST_a1 + +// register that holds the second parameter +#define FC_OP2 HOST_a2 + +// register that holds byte-accessible temporary values +#define FC_TMP_BA1 HOST_a1 + +// register that holds byte-accessible temporary values +#define FC_TMP_BA2 HOST_a2 + +// temporary register for LEA +#define TEMP_REG_DRC HOST_v2 + +// helper macro +#define ROTATE_SCALE(x) ( (x)?(32 - x):(0) ) + +// move a full register from reg_src to reg_dst +static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) { + if(reg_src == reg_dst) return; + cache_addd(0xe1a00000 + (reg_dst << 12) + reg_src); // mov reg_dst, reg_src +} + +// move a 32bit constant value into dest_reg +static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { + Bits first, scale; + if (imm == 0) { + cache_addd(0xe3a00000 + (dest_reg << 12)); // mov dest_reg, #0 + } else { + scale = 0; + first = 1; + while (imm) { + while ((imm & 3) == 0) { + imm>>=2; + scale+=2; + } + if (first) { + cache_addd(0xe3a00000 + (dest_reg << 12) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // mov dest_reg, #((imm & 0xff) << scale) + first = 0; + } else { + cache_addd(0xe3800000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // orr dest_reg, dest_reg, #((imm & 0xff) << scale) + } + imm>>=8; + scale+=8; + } + } +} + +// helper function for gen_mov_word_to_reg +static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) { + // alignment.... + if (dword) { + if ((Bit32u)data & 3) { + if ( ((Bit32u)data & 3) == 2 ) { + cache_addd(0xe1d000b0 + (dest_reg << 12) + (data_reg << 16)); // ldrh dest_reg, [data_reg] + cache_addd(0xe1d000b2 + (temp2 << 12) + (data_reg << 16)); // ldrh temp2, [data_reg, #2] + cache_addd(0xe1800800 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #16 + } else { + cache_addd(0xe5d00000 + (dest_reg << 12) + (data_reg << 16)); // ldrb dest_reg, [data_reg] + cache_addd(0xe1d000b1 + (temp2 << 12) + (data_reg << 16)); // ldrh temp2, [data_reg, #1] + cache_addd(0xe1800400 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #8 + cache_addd(0xe5d00003 + (temp2 << 12) + (data_reg << 16)); // ldrb temp2, [data_reg, #3] + cache_addd(0xe1800c00 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #24 + } + } else { + cache_addd(0xe5900000 + (dest_reg << 12) + (data_reg << 16)); // ldr dest_reg, [data_reg] + } + } else { + if ((Bit32u)data & 1) { + cache_addd(0xe5d00000 + (dest_reg << 12) + (data_reg << 16)); // ldrb dest_reg, [data_reg] + cache_addd(0xe5d00001 + (temp2 << 12) + (data_reg << 16)); // ldrb temp2, [data_reg, #1] + cache_addd(0xe1800400 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #8 + } else { + cache_addd(0xe1d000b0 + (dest_reg << 12) + (data_reg << 16)); // ldrh dest_reg, [data_reg] + } + } +} + +// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg +// 16bit moves may destroy the upper 16bit of the destination register +static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)data); + gen_mov_word_to_reg_helper(dest_reg, data, dword, temp1); +} + +// move a 16bit constant value into dest_reg +// the upper 16bit of the destination register may be destroyed +static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) { + gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm); +} + +// helper function for gen_mov_word_from_reg +static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) { + // alignment.... + if (dword) { + if ((Bit32u)dest & 3) { + if ( ((Bit32u)dest & 3) == 2 ) { + cache_addd(0xe1c000b0 + (src_reg << 12) + (data_reg << 16)); // strh src_reg, [data_reg] + cache_addd(0xe1a00820 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #16 + cache_addd(0xe1c000b2 + (temp2 << 12) + (data_reg << 16)); // strh temp2, [data_reg, #2] + } else { + cache_addd(0xe5c00000 + (src_reg << 12) + (data_reg << 16)); // strb src_reg, [data_reg] + cache_addd(0xe1a00420 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #8 + cache_addd(0xe1c000b1 + (temp2 << 12) + (data_reg << 16)); // strh temp2, [data_reg, #1] + cache_addd(0xe1a00820 + (temp2 << 12) + (temp2)); // mov temp2, temp2, lsr #16 + cache_addd(0xe5c00003 + (temp2 << 12) + (data_reg << 16)); // strb temp2, [data_reg, #3] + } + } else { + cache_addd(0xe5800000 + (src_reg << 12) + (data_reg << 16)); // str src_reg, [data_reg] + } + } else { + if ((Bit32u)dest & 1) { + cache_addd(0xe5c00000 + (src_reg << 12) + (data_reg << 16)); // strb src_reg, [data_reg] + cache_addd(0xe1a00420 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #8 + cache_addd(0xe5c00001 + (temp2 << 12) + (data_reg << 16)); // strb temp2, [data_reg, #1] + } else { + cache_addd(0xe1c000b0 + (src_reg << 12) + (data_reg << 16)); // strh src_reg, [data_reg] + } + } +} + +// move 32bit (dword==true) or 16bit (dword==false) of a register into memory +static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_from_reg_helper(src_reg, dest, dword, temp1); +} + +// move an 8bit value from memory into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function does not use FC_OP1/FC_OP2 as dest_reg as these +// registers might not be directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)data); + cache_addd(0xe5d00000 + (dest_reg << 12) + (temp1 << 16)); // ldrb dest_reg, [temp1] +} + +// move an 8bit value from memory into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function can use FC_OP1/FC_OP2 as dest_reg which are +// not directly byte-accessible on some architectures +static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) { + gen_mov_byte_to_reg_low(dest_reg, data); +} + +// move an 8bit constant value into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function does not use FC_OP1/FC_OP2 as dest_reg as these +// registers might not be directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) { + cache_addd(0xe3a00000 + (dest_reg << 12) + (imm)); // mov dest_reg, #(imm) +} + +// move an 8bit constant value into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function can use FC_OP1/FC_OP2 as dest_reg which are +// not directly byte-accessible on some architectures +static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) { + gen_mov_byte_to_reg_low_imm(dest_reg, imm); +} + +// move the lowest 8bit of a register into memory +static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) { + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + cache_addd(0xe5c00000 + (src_reg << 12) + (temp1 << 16)); // strb src_reg, [temp1] +} + + + +// convert an 8bit word to a 32bit dword +// the register is zero-extended (sign==false) or sign-extended (sign==true) +static void gen_extend_byte(bool sign,HostReg reg) { + if (sign) { + cache_addd(0xe1a00c00 + (reg << 12) + (reg)); // mov reg, reg, lsl #24 + cache_addd(0xe1a00c40 + (reg << 12) + (reg)); // mov reg, reg, asr #24 + } else { + cache_addd(0xe20000ff + (reg << 12) + (reg << 16)); // and reg, reg, #0xff + } +} + +// convert a 16bit word to a 32bit dword +// the register is zero-extended (sign==false) or sign-extended (sign==true) +static void gen_extend_word(bool sign,HostReg reg) { + if (sign) { + cache_addd(0xe1a00800 + (reg << 12) + (reg)); // mov reg, reg, lsl #16 + cache_addd(0xe1a00840 + (reg << 12) + (reg)); // mov reg, reg, asr #16 + } else { + cache_addd(0xe1a00800 + (reg << 12) + (reg)); // mov reg, reg, lsl #16 + cache_addd(0xe1a00820 + (reg << 12) + (reg)); // mov reg, reg, lsr #16 + } +} + +// add a 32bit value from memory to a full register +static void gen_add(HostReg reg,void* op) { + gen_mov_word_to_reg(temp3, op, 1); + cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp3)); // add reg, reg, temp3 +} + +// add a 32bit constant value to a full register +static void gen_add_imm(HostReg reg,Bit32u imm) { + Bits scale; + if(!imm) return; + if (imm == 0xffffffff) { + cache_addd(0xe2400001 + (reg << 12) + (reg << 16)); // sub reg, reg, #1 + } else { + scale = 0; + while (imm) { + while ((imm & 3) == 0) { + imm>>=2; + scale+=2; + } + cache_addd(0xe2800000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // add reg, reg, #((imm & 0xff) << scale) + imm>>=8; + scale+=8; + } + } +} + +// and a 32bit constant value with a full register +static void gen_and_imm(HostReg reg,Bit32u imm) { + Bits scale; + Bit32u imm2; + imm2 = ~imm; + if(!imm2) return; + if (!imm) { + cache_addd(0xe3a00000 + (reg << 12)); // mov reg, #0 + } else { + scale = 0; + while (imm2) { + while ((imm2 & 3) == 0) { + imm2>>=2; + scale+=2; + } + cache_addd(0xe3c00000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // bic reg, reg, #((imm2 & 0xff) << scale) + imm2>>=8; + scale+=8; + } + } +} + + +// move a 32bit constant value into memory +static void gen_mov_direct_dword(void* dest,Bit32u imm) { + gen_mov_dword_to_reg_imm(temp3, imm); + gen_mov_word_from_reg(temp3, dest, 1); +} + +// move an address into memory +static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) { + gen_mov_direct_dword(dest,(Bit32u)imm); +} + +// add an 8bit constant value to a dword memory value +static void gen_add_direct_byte(void* dest,Bit8s imm) { + if(!imm) return; + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_to_reg_helper(temp3, dest, 1, temp1); + if (imm >= 0) { + cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + ((Bit32s)imm)); // add temp3, temp3, #(imm) + } else { + cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + (-((Bit32s)imm))); // sub temp3, temp3, #(-imm) + } + gen_mov_word_from_reg_helper(temp3, dest, 1, temp1); +} + +// add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value +static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) { + if(!imm) return; + if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { + gen_add_direct_byte(dest,(Bit8s)imm); + return; + } + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_to_reg_helper(temp3, dest, dword, temp1); + // maybe use function gen_add_imm + if (dword) { + gen_mov_dword_to_reg_imm(temp2, imm); + } else { + gen_mov_word_to_reg_imm(temp2, (Bit16u)imm); + } + cache_addd(0xe0800000 + (temp3 << 12) + (temp3 << 16) + (temp2)); // add temp3, temp3, temp2 + gen_mov_word_from_reg_helper(temp3, dest, dword, temp1); +} + +// subtract an 8bit constant value from a dword memory value +static void gen_sub_direct_byte(void* dest,Bit8s imm) { + if(!imm) return; + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_to_reg_helper(temp3, dest, 1, temp1); + if (imm >= 0) { + cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + ((Bit32s)imm)); // sub temp3, temp3, #(imm) + } else { + cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + (-((Bit32s)imm))); // add temp3, temp3, #(-imm) + } + gen_mov_word_from_reg_helper(temp3, dest, 1, temp1); +} + +// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value +static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { + if(!imm) return; + if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { + gen_sub_direct_byte(dest,(Bit8s)imm); + return; + } + gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest); + gen_mov_word_to_reg_helper(temp3, dest, dword, temp1); + // maybe use function gen_add_imm/gen_sub_imm + if (dword) { + gen_mov_dword_to_reg_imm(temp2, imm); + } else { + gen_mov_word_to_reg_imm(temp2, (Bit16u)imm); + } + cache_addd(0xe0400000 + (temp3 << 12) + (temp3 << 16) + (temp2)); // sub temp3, temp3, temp2 + gen_mov_word_from_reg_helper(temp3, dest, dword, temp1); +} + +// effective address calculation, destination is dest_reg +// scale_reg is scaled by scale (scale_reg*(2^scale)) and +// added to dest_reg, then the immediate value is added +static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) { + cache_addd(0xe0800000 + (dest_reg << 12) + (dest_reg << 16) + (scale_reg) + (scale << 7)); // add dest_reg, dest_reg, scale_reg, lsl #(scale) + gen_add_imm(dest_reg, imm); +} + +// effective address calculation, destination is dest_reg +// dest_reg is scaled by scale (dest_reg*(2^scale)), +// then the immediate value is added +static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) { + if (scale) { + cache_addd(0xe1a00000 + (dest_reg << 12) + (dest_reg) + (scale << 7)); // mov dest_reg, dest_reg, lsl #(scale) + } + gen_add_imm(dest_reg, imm); +} + +// generate a call to a parameterless function +static void INLINE gen_call_function_raw(void * func) { + cache_addd(0xe5900004 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #4] + cache_addd(0xe2800004 + (HOST_lr << 12) + (HOST_pc << 16)); // add lr, pc, #4 + cache_addd(0xe12fff10 + (temp1)); // bx temp1 + cache_addd((Bit32u)func); // .int func + cache_addd(0xe1a00000 + (FC_RETOP << 12) + HOST_a1); // mov FC_RETOP, a1 +} + +// generate a call to a function with paramcount parameters +// note: the parameters are loaded in the architecture specific way +// using the gen_load_param_ functions below +static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) { + Bit32u proc_addr = (Bit32u)cache.pos; + gen_call_function_raw(func); + return proc_addr; +} + +#if (1) +// max of 4 parameters in a1-a4 + +// load an immediate value as param'th function parameter +static void INLINE gen_load_param_imm(Bitu imm,Bitu param) { + gen_mov_dword_to_reg_imm(param, imm); +} + +// load an address as param'th function parameter +static void INLINE gen_load_param_addr(Bitu addr,Bitu param) { + gen_mov_dword_to_reg_imm(param, addr); +} + +// load a host-register as param'th function parameter +static void INLINE gen_load_param_reg(Bitu reg,Bitu param) { + gen_mov_regs(param, reg); +} + +// load a value from memory as param'th function parameter +static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { + gen_mov_word_to_reg(param, (void *)mem, 1); +} +#else + other arm abis +#endif + +// jump to an address pointed at by ptr, offset is in imm +static void gen_jmp_ptr(void * ptr,Bits imm=0) { + Bits scale; + Bitu imm2; + gen_mov_word_to_reg(temp3, ptr, 1); + + if (imm) { + scale = 0; + imm2 = (Bitu)imm; + while (imm2) { + while ((imm2 & 3) == 0) { + imm2>>=2; + scale+=2; + } + cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // add temp3, temp3, #((imm2 & 0xff) << scale) + imm2>>=8; + scale+=8; + } + } + +#if (1) +// (*ptr) should be word aligned + if ((imm & 0x03) == 0) { + cache_addd(0xe5900000 + (temp1 << 12) + (temp3 << 16)); // ldr temp1, [temp3] + } else +#endif + { + cache_addd(0xe5d00000 + (temp1 << 12) + (temp3 << 16)); // ldrb temp1, [temp3] + cache_addd(0xe5d00001 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #1] + cache_addd(0xe1800400 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #8 + cache_addd(0xe5d00002 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #2] + cache_addd(0xe1800800 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #16 + cache_addd(0xe5d00003 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #3] + cache_addd(0xe1800c00 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #24 + } + + cache_addd(0xe12fff10 + (temp1)); // bx temp1 +} + +// short conditional jump (+-127 bytes) if register is zero +// the destination is set by gen_fill_branch() later +static Bit32u INLINE gen_create_branch_on_zero(HostReg reg,bool dword) { + if (dword) { + cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0 + } else { + cache_addd(0xe1b00800 + (temp1 << 12) + (reg)); // movs temp1, reg, lsl #16 + } + cache_addd(0x0a000000); // beq j + return ((Bit32u)cache.pos-4); +} + +// short conditional jump (+-127 bytes) if register is nonzero +// the destination is set by gen_fill_branch() later +static Bit32u INLINE gen_create_branch_on_nonzero(HostReg reg,bool dword) { + if (dword) { + cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0 + } else { + cache_addd(0xe1b00800 + (temp1 << 12) + (reg)); // movs temp1, reg, lsl #16 + } + cache_addd(0x1a000000); // bne j + return ((Bit32u)cache.pos-4); +} + +// calculate relative offset and fill it into the location pointed to by data +static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) { +#if C_DEBUG + Bits len=(Bit32u)cache.pos-(data+8); + if (len<0) len=-len; + if (len>0x02000000) LOG_MSG("Big jump %d",len); +#endif + *(Bit32u*)data=( (*(Bit32u*)data) & 0xff000000 ) | ( ( ((Bit32u)cache.pos - (data+8)) >> 2 ) & 0x00ffffff ); +} + +// conditional jump if register is nonzero +// for isdword==true the 32bit of the register are tested +// for isdword==false the lowest 8bit of the register are tested +static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) { + if (isdword) { + cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0 + } else { + cache_addd(0xe31000ff + (reg << 16)); // tst reg, #0xff + } + cache_addd(0x0a000002); // beq nobranch + cache_addd(0xe5900000 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #0] + cache_addd(0xe12fff10 + (temp1)); // bx temp1 + cache_addd(0); // fill j + // nobranch: + return ((Bit32u)cache.pos-4); +} + +// compare 32bit-register against zero and jump if value less/equal than zero +static Bit32u INLINE gen_create_branch_long_leqzero(HostReg reg) { + cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0 + cache_addd(0xca000002); // bgt nobranch + cache_addd(0xe5900000 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #0] + cache_addd(0xe12fff10 + (temp1)); // bx temp1 + cache_addd(0); // fill j + // nobranch: + return ((Bit32u)cache.pos-4); +} + +// calculate long relative offset and fill it into the location pointed to by data +static void INLINE gen_fill_branch_long(Bit32u data) { + // this is an absolute branch + *(Bit32u*)data=(Bit32u)cache.pos; +} + +static void gen_run_code(void) { + cache_addd(0xe92d4000); // stmfd sp!, {lr} + cache_addd(0xe92d01f0); // stmfd sp!, {v1-v5} + cache_addd(0xe28fe004); // add lr, pc, #4 + cache_addd(0xe92d4000); // stmfd sp!, {lr} + cache_addd(0xe12fff10); // bx r0 + cache_addd(0xe8bd01f0); // ldmfd sp!, {v1-v5} + + cache_addd(0xe8bd4000); // ldmfd sp!, {lr} + cache_addd(0xe12fff1e); // bx lr +} + +// return from a function +static void gen_return_function(void) { + cache_addd(0xe1a00000 + (HOST_a1 << 12) + FC_RETOP); // mov a1, FC_RETOP + cache_addd(0xe8bd4000); // ldmfd sp!, {lr} + cache_addd(0xe12fff1e); // bx lr +} + +#ifdef DRC_FLAGS_INVALIDATION + +// called when a call to a function can be replaced by a +// call to a simpler function +static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { +#ifdef DRC_FLAGS_INVALIDATION_DCODE + // try to avoid function calls but rather directly fill in code + switch (flags_type) { + case t_ADDb: + case t_ADDw: + case t_ADDd: + *(Bit32u*)pos=0xe0800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // add FC_RETOP, a1, a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_ORb: + case t_ORw: + case t_ORd: + *(Bit32u*)pos=0xe1800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // orr FC_RETOP, a1, a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_ANDb: + case t_ANDw: + case t_ANDd: + *(Bit32u*)pos=0xe0000000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // and FC_RETOP, a1, a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_SUBb: + case t_SUBw: + case t_SUBd: + *(Bit32u*)pos=0xe0400000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // sub FC_RETOP, a1, a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_XORb: + case t_XORw: + case t_XORd: + *(Bit32u*)pos=0xe0200000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // eor FC_RETOP, a1, a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_CMPb: + case t_CMPw: + case t_CMPd: + case t_TESTb: + case t_TESTw: + case t_TESTd: + *(Bit32u*)pos=0xea000000 + (3); // b (pc+3*4) + break; + case t_INCb: + case t_INCw: + case t_INCd: + *(Bit32u*)pos=0xe2800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (1); // add FC_RETOP, a1, #1 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_DECb: + case t_DECw: + case t_DECd: + *(Bit32u*)pos=0xe2400000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (1); // sub FC_RETOP, a1, #1 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_SHLb: + case t_SHLw: + case t_SHLd: + *(Bit32u*)pos=0xe1a00010 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, lsl a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_SHRb: + *(Bit32u*)pos=0xe2000000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (0xff); // and FC_RETOP, a1, #0xff + *(Bit32u*)(pos+4)=0xe1a00030 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, lsr a2 + *(Bit32u*)(pos+8)=0xe1a00000; // nop + *(Bit32u*)(pos+12)=0xe1a00000; // nop + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_SHRw: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16 + *(Bit32u*)(pos+4)=0xe1a00020 + (FC_RETOP << 12) + (FC_RETOP) + (16 << 7); // mov FC_RETOP, FC_RETOP, lsr #16 + *(Bit32u*)(pos+8)=0xe1a00030 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, lsr a2 + *(Bit32u*)(pos+12)=0xe1a00000; // nop + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_SHRd: + *(Bit32u*)pos=0xe1a00030 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, lsr a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_SARb: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24 + *(Bit32u*)(pos+4)=0xe1a00040 + (FC_RETOP << 12) + (FC_RETOP) + (24 << 7); // mov FC_RETOP, FC_RETOP, asr #24 + *(Bit32u*)(pos+8)=0xe1a00050 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, asr a2 + *(Bit32u*)(pos+12)=0xe1a00000; // nop + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_SARw: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16 + *(Bit32u*)(pos+4)=0xe1a00040 + (FC_RETOP << 12) + (FC_RETOP) + (16 << 7); // mov FC_RETOP, FC_RETOP, asr #16 + *(Bit32u*)(pos+8)=0xe1a00050 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, asr a2 + *(Bit32u*)(pos+12)=0xe1a00000; // nop + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_SARd: + *(Bit32u*)pos=0xe1a00050 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, asr a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_RORb: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24 + *(Bit32u*)(pos+4)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (8 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8 + *(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 + *(Bit32u*)(pos+12)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2 + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_RORw: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16 + *(Bit32u*)(pos+4)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 + *(Bit32u*)(pos+8)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2 + *(Bit32u*)(pos+12)=0xe1a00000; // nop + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_RORd: + *(Bit32u*)pos=0xe1a00070 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, ror a2 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + case t_ROLb: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24 + *(Bit32u*)(pos+4)=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32 + *(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (8 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8 + *(Bit32u*)(pos+12)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 + *(Bit32u*)(pos+16)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2 + break; + case t_ROLw: + *(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16 + *(Bit32u*)(pos+4)=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32 + *(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16 + *(Bit32u*)(pos+12)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2 + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_ROLd: + *(Bit32u*)pos=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32 + *(Bit32u*)(pos+4)=0xe1a00070 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, ror a2 + *(Bit32u*)(pos+8)=0xe1a00000; // nop + *(Bit32u*)(pos+12)=0xe1a00000; // nop + *(Bit32u*)(pos+16)=0xe1a00000; // nop + break; + case t_NEGb: + case t_NEGw: + case t_NEGd: + *(Bit32u*)pos=0xe2600000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (0); // rsb FC_RETOP, a1, #0 + *(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4) + break; + default: + *(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func + break; + + } +#else + *(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func +#endif +} +#endif diff --git a/src/cpu/core_dynrec/risc_armv4le-thumb.h b/src/cpu/core_dynrec/risc_armv4le-thumb.h new file mode 100644 index 00000000..1820fa39 --- /dev/null +++ b/src/cpu/core_dynrec/risc_armv4le-thumb.h @@ -0,0 +1,934 @@ +/* + * Copyright (C) 2002-2008 The DOSBox Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* $Id: risc_armv4le-thumb.h,v 1.1 2008-08-20 14:13:21 c2woody Exp $ */ + + +/* ARMv4 (little endian) backend by M-HT */ + + +// temporary "lo" registers +#define templo1 HOST_v3 +#define templo2 HOST_v4 + +// temporary "lo" register - value must be preserved when using it +#define templosav HOST_a3 + +// temporary "hi" register +#define temphi1 HOST_ip + +// register that holds function return values +#define FC_RETOP HOST_v2 + +// register used for address calculations, +#define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG + +// register that holds the first parameter +#define FC_OP1 HOST_a1 + +// register that holds the second parameter +#define FC_OP2 HOST_a2 + +// register that holds byte-accessible temporary values +#define FC_TMP_BA1 HOST_a1 + +// register that holds byte-accessible temporary values +#define FC_TMP_BA2 HOST_a2 + +// temporary register for LEA +#define TEMP_REG_DRC HOST_a4 + +// move a full register from reg_src to reg_dst +static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) { + if(reg_src == reg_dst) return; + cache_addw(0x1c00 + reg_dst + (reg_src << 3)); // mov reg_dst, reg_src +} + +// move a 32bit constant value into dest_reg +static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) { + if ((imm & 0xffffff00) == 0) { + cache_addw(0x2000 + (dest_reg << 8) + (Bit8u)(imm & 0xff)); // mov dest_reg, #(imm) + } else if ((imm & 0xffff00ff) == 0) { + cache_addw(0x2000 + (dest_reg << 8) + (Bit8u)(imm >> 8)); // mov dest_reg, #(imm >> 8) + cache_addw(0x0000 + dest_reg + (dest_reg << 3) + (8 << 6)); // lsl dest_reg, dest_reg, #8 + } else if ((imm & 0xff00ffff) == 0) { + cache_addw(0x2000 + (dest_reg << 8) + (imm >> 16)); // mov dest_reg, #(imm >> 16) + cache_addw(0x0000 + dest_reg + (dest_reg << 3) + (16 << 6)); // lsl dest_reg, dest_reg, #16 + } else if ((imm & 0x00ffffff) == 0) { + cache_addw(0x2000 + (dest_reg << 8) + (imm >> 24)); // mov dest_reg, #(imm >> 24) + cache_addw(0x0000 + dest_reg + (dest_reg << 3) + (24 << 6)); // lsl dest_reg, dest_reg, #24 + } else { + Bit32u diff; + + diff = imm - ((Bit32u)cache.pos+4); + + if ((diff < 1024) && ((imm & 0x03) == 0)) { + if (((Bit32u)cache.pos & 0x03) == 0) { + cache_addw(0xa000 + (dest_reg << 8) + (Bit8u)(diff >> 2)); // add dest_reg, pc, #(dist >> 2) + } else { + cache_addw(0x46c0); // nop + // is the result of (diff-2)>>2 correct for diff<2 ??? + cache_addw(0xa000 + (dest_reg << 8) + (Bit8u)((diff - 2) >> 2)); // add dest_reg, pc, #((dist - 2) >> 2) + } + } else { + if (((Bit32u)cache.pos & 0x03) == 0) { + cache_addw(0x4800 + (dest_reg << 8)); // ldr dest_reg, [pc, #0] + cache_addw(0xe000 + (2 >> 1)); // b next_code (pc+2) + cache_addd(imm); // .int imm + // next_code: + } else { + cache_addw(0x4800 + (dest_reg << 8) + (4 >> 2)); // ldr dest_reg, [pc, #4] + cache_addw(0xe000 + (4 >> 1)); // b next_code (pc+4) + cache_addw(0x46c0); // nop + cache_addd(imm); // .int imm + // next_code: + } + } + } +} + +// helper function for gen_mov_word_to_reg +static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) { + // alignment.... + if (dword) { + if ((Bit32u)data & 3) { + if ( ((Bit32u)data & 3) == 2 ) { + cache_addw(0x8800 + dest_reg + (data_reg << 3)); // ldrh dest_reg, [data_reg] + cache_addw(0x8800 + templo1 + (data_reg << 3) + (2 << 5)); // ldrh templo1, [data_reg, #2] + cache_addw(0x0000 + templo1 + (templo1 << 3) + (16 << 6)); // lsl templo1, templo1, #16 + cache_addw(0x4300 + dest_reg + (templo1 << 3)); // orr dest_reg, templo1 + } else { + cache_addw(0x7800 + dest_reg + (data_reg << 3)); // ldrb dest_reg, [data_reg] + cache_addw(0x1c00 + templo1 + (data_reg << 3) + (1 << 6)); // add templo1, data_reg, #1 + cache_addw(0x8800 + templo1 + (templo1 << 3)); // ldrh templo1, [templo1] + cache_addw(0x0000 + templo1 + (templo1 << 3) + (8 << 6)); // lsl templo1, templo1, #8 + cache_addw(0x4300 + dest_reg + (templo1 << 3)); // orr dest_reg, templo1 + cache_addw(0x7800 + templo1 + (data_reg << 3) + (3 << 6)); // ldrb templo1, [data_reg, #3] + cache_addw(0x0000 + templo1 + (templo1 << 3) + (24 << 6)); // lsl templo1, templo1, #24 + cache_addw(0x4300 + dest_reg + (templo1 << 3)); // orr dest_reg, templo1 + } + } else { + cache_addw(0x6800 + dest_reg + (data_reg << 3)); // ldr dest_reg, [data_reg] + } + } else { + if ((Bit32u)data & 1) { + cache_addw(0x7800 + dest_reg + (data_reg << 3)); // ldrb dest_reg, [data_reg] + cache_addw(0x7800 + templo1 + (data_reg << 3) + (1 << 6)); // ldrb templo1, [data_reg, #1] + cache_addw(0x0000 + templo1 + (templo1 << 3) + (8 << 6)); // lsl templo1, templo1, #8 + cache_addw(0x4300 + dest_reg + (templo1 << 3)); // orr dest_reg, templo1 + } else { + cache_addw(0x8800 + dest_reg + (data_reg << 3)); // ldrh dest_reg, [data_reg] + } + } +} + +// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg +// 16bit moves may destroy the upper 16bit of the destination register +static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)data); + gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2); +} + +// move a 16bit constant value into dest_reg +// the upper 16bit of the destination register may be destroyed +static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) { + gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm); +} + +// helper function for gen_mov_word_from_reg +static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) { + // alignment.... + if (dword) { + if ((Bit32u)dest & 3) { + if ( ((Bit32u)dest & 3) == 2 ) { + cache_addw(0x8000 + src_reg + (data_reg << 3)); // strh src_reg, [data_reg] + cache_addw(0x1c00 + templo1 + (src_reg << 3)); // mov templo1, src_reg + cache_addw(0x0800 + templo1 + (templo1 << 3) + (16 << 6)); // lsr templo1, templo1, #16 + cache_addw(0x8000 + templo1 + (data_reg << 3) + (2 << 5)); // strh templo1, [data_reg, #2] + } else { + cache_addw(0x7000 + src_reg + (data_reg << 3)); // strb src_reg, [data_reg] + cache_addw(0x1c00 + templo1 + (src_reg << 3)); // mov templo1, src_reg + cache_addw(0x0800 + templo1 + (templo1 << 3) + (8 << 6)); // lsr templo1, templo1, #8 + cache_addw(0x7000 + templo1 + (data_reg << 3) + (1 << 6)); // strb templo1, [data_reg, #1] + cache_addw(0x1c00 + templo1 + (src_reg << 3)); // mov templo1, src_reg + cache_addw(0x0800 + templo1 + (templo1 << 3) + (16 << 6)); // lsr templo1, templo1, #16 + cache_addw(0x7000 + templo1 + (data_reg << 3) + (2 << 6)); // strb templo1, [data_reg, #2] + cache_addw(0x1c00 + templo1 + (src_reg << 3)); // mov templo1, src_reg + cache_addw(0x0800 + templo1 + (templo1 << 3) + (24 << 6)); // lsr templo1, templo1, #24 + cache_addw(0x7000 + templo1 + (data_reg << 3) + (3 << 6)); // strb templo1, [data_reg, #3] + } + } else { + cache_addw(0x6000 + src_reg + (data_reg << 3)); // str src_reg, [data_reg] + } + } else { + if ((Bit32u)dest & 1) { + cache_addw(0x7000 + src_reg + (data_reg << 3)); // strb src_reg, [data_reg] + cache_addw(0x1c00 + templo1 + (src_reg << 3)); // mov templo1, src_reg + cache_addw(0x0800 + templo1 + (templo1 << 3) + (8 << 6)); // lsr templo1, templo1, #8 + cache_addw(0x7000 + templo1 + (data_reg << 3) + (1 << 6)); // strb templo1, [data_reg, #1] + } else { + cache_addw(0x8000 + src_reg + (data_reg << 3)); // strh src_reg, [data_reg] + } + } +} + +// move 32bit (dword==true) or 16bit (dword==false) of a register into memory +static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) { + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2); +} + +// move an 8bit value from memory into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function does not use FC_OP1/FC_OP2 as dest_reg as these +// registers might not be directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) { + gen_mov_dword_to_reg_imm(templo1, (Bit32u)data); + cache_addw(0x7800 + dest_reg + (templo1 << 3)); // ldrb dest_reg, [templo1] +} + +// move an 8bit value from memory into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function can use FC_OP1/FC_OP2 as dest_reg which are +// not directly byte-accessible on some architectures +static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) { + gen_mov_byte_to_reg_low(dest_reg, data); +} + +// move an 8bit constant value into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function does not use FC_OP1/FC_OP2 as dest_reg as these +// registers might not be directly byte-accessible on some architectures +static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) { + cache_addw(0x2000 + (dest_reg << 8) + imm); // mov dest_reg, #(imm) +} + +// move an 8bit constant value into dest_reg +// the upper 24bit of the destination register can be destroyed +// this function can use FC_OP1/FC_OP2 as dest_reg which are +// not directly byte-accessible on some architectures +static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) { + gen_mov_byte_to_reg_low_imm(dest_reg, imm); +} + +// move the lowest 8bit of a register into memory +static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) { + gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest); + cache_addw(0x7000 + src_reg + (templo1 << 3)); // strb src_reg, [templo1] +} + + + +// convert an 8bit word to a 32bit dword +// the register is zero-extended (sign==false) or sign-extended (sign==true) +static void gen_extend_byte(bool sign,HostReg reg) { + cache_addw(0x0000 + reg + (reg << 3) + (24 << 6)); // lsl reg, reg, #24 + + if (sign) { + cache_addw(0x1000 + reg + (reg << 3) + (24 << 6)); // asr reg, reg, #24 + } else { + cache_addw(0x0800 + reg + (reg << 3) + (24 << 6)); // lsr reg, reg, #24 + } +} + +// convert a 16bit word to a 32bit dword +// the register is zero-extended (sign==false) or sign-extended (sign==true) +static void gen_extend_word(bool sign,HostReg reg) { + cache_addw(0x0000 + reg + (reg << 3) + (16 << 6)); // lsl reg, reg, #16 + + if (sign) { + cache_addw(0x1000 + reg + (reg << 3) + (16 << 6)); // asr reg, reg, #16 + } else { + cache_addw(0x0800 + reg + (reg << 3) + (16 << 6)); // lsr reg, reg, #16 + } +} + +// add a 32bit value from memory to a full register +static void gen_add(HostReg reg,void* op) { + cache_addw(0x4680 + (temphi1 - HOST_r8) + (reg << 3)); // mov temphi1, reg + gen_mov_word_to_reg(reg, op, 1); + cache_addw(0x4440 + (reg) + ((temphi1 - HOST_r8) << 3)); // add reg, temphi1 +} + +// add a 32bit constant value to a full register +static void gen_add_imm(HostReg reg,Bit32u imm) { + if(!imm) return; + gen_mov_dword_to_reg_imm(templo1, imm); + cache_addw(0x1800 + reg + (reg << 3) + (templo1 << 6)); // add reg, reg, templo1 +} + +// and a 32bit constant value with a full register +static void gen_and_imm(HostReg reg,Bit32u imm) { + if(imm == 0xffffffff) return; + gen_mov_dword_to_reg_imm(templo1, imm); + cache_addw(0x4000 + reg + (templo1<< 3)); // and reg, templo1 +} + + +// move a 32bit constant value into memory +static void gen_mov_direct_dword(void* dest,Bit32u imm) { + cache_addw(0x4680 + (temphi1 - HOST_r8) + (templosav << 3)); // mov temphi1, templosav + gen_mov_dword_to_reg_imm(templosav, imm); + gen_mov_word_from_reg(templosav, dest, 1); + cache_addw(0x4640 + templosav + ((temphi1 - HOST_r8) << 3)); // mov templosav, temphi1 +} + +// move an address into memory +static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) { + gen_mov_direct_dword(dest,(Bit32u)imm); +} + +// add an 8bit constant value to a dword memory value +static void gen_add_direct_byte(void* dest,Bit8s imm) { + if(!imm) return; + cache_addw(0x4680 + (temphi1 - HOST_r8) + (templosav << 3)); // mov temphi1, templosav + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_to_reg_helper(templosav, dest, 1, templo2); + if (imm >= 0) { + cache_addw(0x3000 + (templosav << 8) + ((Bit32s)imm)); // add templosav, #(imm) + } else { + cache_addw(0x3800 + (templosav << 8) + (-((Bit32s)imm))); // sub templosav, #(-imm) + } + gen_mov_word_from_reg_helper(templosav, dest, 1, templo2); + cache_addw(0x4640 + templosav + ((temphi1 - HOST_r8) << 3)); // mov templosav, temphi1 +} + +// add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value +static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) { + if(!imm) return; + if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { + gen_add_direct_byte(dest,(Bit8s)imm); + return; + } + cache_addw(0x4680 + (temphi1 - HOST_r8) + (templosav << 3)); // mov temphi1, templosav + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_to_reg_helper(templosav, dest, dword, templo2); + if (dword) { + gen_mov_dword_to_reg_imm(templo1, imm); + } else { + gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); + } + cache_addw(0x1800 + templosav + (templosav << 3) + (templo1 << 6)); // add templosav, templosav, templo1 + gen_mov_word_from_reg_helper(templosav, dest, dword, templo2); + cache_addw(0x4640 + templosav + ((temphi1 - HOST_r8) << 3)); // mov templosav, temphi1 +} + +// subtract an 8bit constant value from a dword memory value +static void gen_sub_direct_byte(void* dest,Bit8s imm) { + if(!imm) return; + cache_addw(0x4680 + (temphi1 - HOST_r8) + (templosav << 3)); // mov temphi1, templosav + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_to_reg_helper(templosav, dest, 1, templo2); + if (imm >= 0) { + cache_addw(0x3800 + (templosav << 8) + ((Bit32s)imm)); // sub templosav, #(imm) + } else { + cache_addw(0x3000 + (templosav << 8) + (-((Bit32s)imm))); // add templosav, #(-imm) + } + gen_mov_word_from_reg_helper(templosav, dest, 1, templo2); + cache_addw(0x4640 + templosav + ((temphi1 - HOST_r8) << 3)); // mov templosav, temphi1 +} + +// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value +static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) { + if(!imm) return; + if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) { + gen_sub_direct_byte(dest,(Bit8s)imm); + return; + } + cache_addw(0x4680 + (temphi1 - HOST_r8) + (templosav << 3)); // mov temphi1, templosav + gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest); + gen_mov_word_to_reg_helper(templosav, dest, dword, templo2); + if (dword) { + gen_mov_dword_to_reg_imm(templo1, imm); + } else { + gen_mov_word_to_reg_imm(templo1, (Bit16u)imm); + } + cache_addw(0x1a00 + templosav + (templosav << 3) + (templo1 << 6)); // sub templosav, templosav, templo1 + gen_mov_word_from_reg_helper(templosav, dest, dword, templo2); + cache_addw(0x4640 + templosav + ((temphi1 - HOST_r8) << 3)); // mov templosav, temphi1 +} + +// effective address calculation, destination is dest_reg +// scale_reg is scaled by scale (scale_reg*(2^scale)) and +// added to dest_reg, then the immediate value is added +static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) { + if (scale) { + cache_addw(0x0000 + templo1 + (scale_reg << 3) + (scale << 6)); // lsl templo1, scale_reg, #(scale) + cache_addw(0x1800 + dest_reg + (dest_reg << 3) + (templo1 << 6)); // add dest_reg, dest_reg, templo1 + } else { + cache_addw(0x1800 + dest_reg + (dest_reg << 3) + (scale_reg << 6)); // add dest_reg, dest_reg, scale_reg + } + gen_add_imm(dest_reg, imm); +} + +// effective address calculation, destination is dest_reg +// dest_reg is scaled by scale (dest_reg*(2^scale)), +// then the immediate value is added +static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) { + if (scale) { + cache_addw(0x0000 + dest_reg + (dest_reg << 3) + (scale << 6)); // lsl dest_reg, dest_reg, #(scale) + } + gen_add_imm(dest_reg, imm); +} + +// generate a call to a parameterless function +static void INLINE gen_call_function_raw(void * func) { + if (((Bit32u)cache.pos & 0x03) == 0) { + cache_addw(0x4800 + (templo1 << 8) + (4 >> 2)); // ldr templo1, [pc, #4] + cache_addw(0xa000 + (templo2 << 8) + (8 >> 2)); // adr templo2, after_call (add templo2, pc, #8) + cache_addw(0x4680 + (HOST_lr - HOST_r8) + (templo2 << 3)); // mov lr, templo2 + cache_addw(0x4700 + (templo1 << 3)); // bx templo1 --- switch to arm state + } else { + cache_addw(0x4800 + (templo1 << 8) + (8 >> 2)); // ldr templo1, [pc, #8] + cache_addw(0xa000 + (templo2 << 8) + (8 >> 2)); // adr templo2, after_call (add templo2, pc, #8) + cache_addw(0x4680 + (HOST_lr - HOST_r8) + (templo2 << 3)); // mov lr, templo2 + cache_addw(0x4700 + (templo1 << 3)); // bx templo1 --- switch to arm state + cache_addw(0x46c0); // nop + } + cache_addd((Bit32u)func); // .int func + // after_call: + + // switch from arm to thumb state + cache_addd(0xe2800000 + (templo1 << 12) + (HOST_pc << 16) + (1)); // add templo1, pc, #1 + cache_addd(0xe12fff10 + (templo1)); // bx templo1 + + // thumb state from now on + cache_addw(0x1c00 + FC_RETOP + (HOST_a1 << 3)); // mov FC_RETOP, a1 +} + +// generate a call to a function with paramcount parameters +// note: the parameters are loaded in the architecture specific way +// using the gen_load_param_ functions below +static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) { + Bit32u proc_addr = (Bit32u)cache.pos; + gen_call_function_raw(func); + return proc_addr; + // if proc_addr is on word boundary ((proc_addr & 0x03) == 0) + // then length of generated code is 22 bytes + // otherwise length of generated code is 24 bytes +} + +#if (1) +// max of 4 parameters in a1-a4 + +// load an immediate value as param'th function parameter +static void INLINE gen_load_param_imm(Bitu imm,Bitu param) { + gen_mov_dword_to_reg_imm(param, imm); +} + +// load an address as param'th function parameter +static void INLINE gen_load_param_addr(Bitu addr,Bitu param) { + gen_mov_dword_to_reg_imm(param, addr); +} + +// load a host-register as param'th function parameter +static void INLINE gen_load_param_reg(Bitu reg,Bitu param) { + gen_mov_regs(param, reg); +} + +// load a value from memory as param'th function parameter +static void INLINE gen_load_param_mem(Bitu mem,Bitu param) { + gen_mov_word_to_reg(param, (void *)mem, 1); +} +#else + other arm abis +#endif + +// jump to an address pointed at by ptr, offset is in imm +static void gen_jmp_ptr(void * ptr,Bits imm=0) { + cache_addw(0x4680 + (temphi1 - HOST_r8) + (templosav << 3)); // mov temphi1, templosav + gen_mov_word_to_reg(templosav, ptr, 1); + + if (imm) { + gen_mov_dword_to_reg_imm(templo2, imm); + cache_addw(0x1800 + templosav + (templosav << 3) + (templo2 << 6)); // add templosav, templosav, templo2 + } + +#if (1) +// (*ptr) should be word aligned + if ((imm & 0x03) == 0) { + cache_addw(0x6800 + templo2 + (templosav << 3)); // ldr templo2, [templosav] + } else +#endif + { + cache_addw(0x7800 + templo2 + (templosav << 3)); // ldrb templo2, [templosav] + cache_addw(0x7800 + templo1 + (templosav << 3) + (1 << 6)); // ldrb templo1, [templosav, #1] + cache_addw(0x0000 + templo1 + (templo1 << 3) + (8 << 6)); // lsl templo1, templo1, #8 + cache_addw(0x4300 + templo2 + (templo1 << 3)); // orr templo2, templo1 + cache_addw(0x7800 + templo1 + (templosav << 3) + (2 << 6)); // ldrb templo1, [templosav, #2] + cache_addw(0x0000 + templo1 + (templo1 << 3) + (16 << 6)); // lsl templo1, templo1, #16 + cache_addw(0x4300 + templo2 + (templo1 << 3)); // orr templo2, templo1 + cache_addw(0x7800 + templo1 + (templosav << 3) + (3 << 6)); // ldrb templo1, [templosav, #3] + cache_addw(0x0000 + templo1 + (templo1 << 3) + (24 << 6)); // lsl templo1, templo1, #24 + cache_addw(0x4300 + templo2 + (templo1 << 3)); // orr templo2, templo1 + } + + // increase jmp address to keep thumb state + cache_addw(0x1c00 + templo2 + (templo2 << 3) + (1 << 6)); // add templo2, templo2, #1 + + cache_addw(0x4640 + templosav + ((temphi1 - HOST_r8) << 3)); // mov templosav, temphi1 + + cache_addw(0x4700 + (templo2 << 3)); // bx templo2 +} + +// short conditional jump (+-127 bytes) if register is zero +// the destination is set by gen_fill_branch() later +static Bit32u INLINE gen_create_branch_on_zero(HostReg reg,bool dword) { + if (dword) { + cache_addw(0x2800 + (reg << 8)); // cmp reg, #0 + } else { + cache_addw(0x0000 + templo1 + (reg << 3) + (16 << 6)); // lsl templo1, reg, #16 + } + cache_addw(0xd000); // beq j + return ((Bit32u)cache.pos-2); +} + +// short conditional jump (+-127 bytes) if register is nonzero +// the destination is set by gen_fill_branch() later +static Bit32u INLINE gen_create_branch_on_nonzero(HostReg reg,bool dword) { + if (dword) { + cache_addw(0x2800 + (reg << 8)); // cmp reg, #0 + } else { + cache_addw(0x0000 + templo1 + (reg << 3) + (16 << 6)); // lsl templo1, reg, #16 + } + cache_addw(0xd100); // bne j + return ((Bit32u)cache.pos-2); +} + +// calculate relative offset and fill it into the location pointed to by data +static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) { +#if C_DEBUG + Bits len=(Bit32u)cache.pos-(data+4); + if (len<0) len=-len; + if (len>252) LOG_MSG("Big jump %d",len); +#endif + *(Bit8u*)data=(Bit8u)( ((Bit32u)cache.pos-(data+4)) >> 1 ); +} + +// conditional jump if register is nonzero +// for isdword==true the 32bit of the register are tested +// for isdword==false the lowest 8bit of the register are tested +static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) { + if (isdword) { + cache_addw(0x2800 + (reg << 8)); // cmp reg, #0 + } else { + cache_addw(0x0000 + templo2 + (reg << 3) + (24 << 6)); // lsl templo2, reg, #24 + } + if (((Bit32u)cache.pos & 0x03) == 0) { + cache_addw(0xd000 + (8 >> 1)); // beq nobranch (pc+8) + cache_addw(0x4800 + (templo1 << 8) + (4 >> 2)); // ldr templo1, [pc, #4] + cache_addw(0x4700 + (templo1 << 3)); // bx templo1 + cache_addw(0x46c0); // nop + } else { + cache_addw(0xd000 + (6 >> 1)); // beq nobranch (pc+6) + cache_addw(0x4800 + (templo1 << 8)); // ldr templo1, [pc, #0] + cache_addw(0x4700 + (templo1 << 3)); // bx templo1 + } + cache_addd(0); // fill j + // nobranch: + return ((Bit32u)cache.pos-4); +} + +// compare 32bit-register against zero and jump if value less/equal than zero +static Bit32u INLINE gen_create_branch_long_leqzero(HostReg reg) { + cache_addw(0x2800 + (reg << 8)); // cmp reg, #0 + if (((Bit32u)cache.pos & 0x03) == 0) { + cache_addw(0xdc00 + (8 >> 1)); // bgt nobranch (pc+8) + cache_addw(0x4800 + (templo1 << 8) + (4 >> 2)); // ldr templo1, [pc, #4] + cache_addw(0x4700 + (templo1 << 3)); // bx templo1 + cache_addw(0x46c0); // nop + } else { + cache_addw(0xdc00 + (6 >> 1)); // bgt nobranch (pc+6) + cache_addw(0x4800 + (templo1 << 8)); // ldr templo1, [pc, #0] + cache_addw(0x4700 + (templo1 << 3)); // bx templo1 + } + cache_addd(0); // fill j + // nobranch: + return ((Bit32u)cache.pos-4); +} + +// calculate long relative offset and fill it into the location pointed to by data +static void INLINE gen_fill_branch_long(Bit32u data) { + // this is an absolute branch + *(Bit32u*)data=((Bit32u)cache.pos) + 1; // add 1 to keep processor in thumb state +} + +static void gen_run_code(void) { + // switch from arm to thumb state + cache_addd(0xe2800000 + (HOST_r3 << 12) + (HOST_pc << 16) + (1)); // add r3, pc, #1 + cache_addd(0xe12fff10 + (HOST_r3)); // bx r3 + + // thumb state from now on + cache_addw(0xb500); // push {lr} + cache_addw(0xb4f0); // push {v1-v4} + + cache_addw(0xa302); // add r3, pc, #8 + cache_addw(0x3001); // add r0, #1 + cache_addw(0x3301); // add r3, #1 + cache_addw(0xb408); // push {r3} + cache_addw(0x4700); // bx r0 + cache_addw(0x46c0); // nop + + cache_addw(0xbcf0); // pop {v1-v4} + + cache_addw(0xbc08); // pop {r3} + cache_addw(0x4718); // bx r3 +} + +// return from a function +static void gen_return_function(void) { + cache_addw(0x1c00 + HOST_a1 + (FC_RETOP << 3)); // mov a1, FC_RETOP + cache_addw(0xbc08); // pop {r3} + cache_addw(0x4718); // bx r3 +} + +#ifdef DRC_FLAGS_INVALIDATION + +// called when a call to a function can be replaced by a +// call to a simpler function +static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { +#ifdef DRC_FLAGS_INVALIDATION_DCODE + if (((Bit32u)pos & 0x03) == 0) + { + // try to avoid function calls but rather directly fill in code + switch (flags_type) { + case t_ADDb: + case t_ADDw: + case t_ADDd: + *(Bit16u*)pos=0x1800 + FC_RETOP + (HOST_a1 << 3) + (HOST_a2 << 6); // add FC_RETOP, a1, a2 + *(Bit16u*)(pos+2)=0xe000 + (16 >> 1); // b after_call (pc+16) + break; + case t_ORb: + case t_ORw: + case t_ORd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x4300 + FC_RETOP + (HOST_a2 << 3); // orr FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14) + break; + case t_ANDb: + case t_ANDw: + case t_ANDd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x4000 + FC_RETOP + (HOST_a2 << 3); // and FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14) + break; + case t_SUBb: + case t_SUBw: + case t_SUBd: + *(Bit16u*)pos=0x1a00 + FC_RETOP + (HOST_a1 << 3) + (HOST_a2 << 6); // sub FC_RETOP, a1, a2 + *(Bit16u*)(pos+2)=0xe000 + (16 >> 1); // b after_call (pc+16) + break; + case t_XORb: + case t_XORw: + case t_XORd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x4040 + FC_RETOP + (HOST_a2 << 3); // eor FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14) + break; + case t_CMPb: + case t_CMPw: + case t_CMPd: + case t_TESTb: + case t_TESTw: + case t_TESTd: + *(Bit16u*)pos=0xe000 + (18 >> 1); // b after_call (pc+18) + break; + case t_INCb: + case t_INCw: + case t_INCd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3) + (1 << 6); // add FC_RETOP, a1, #1 + *(Bit16u*)(pos+2)=0xe000 + (16 >> 1); // b after_call (pc+16) + break; + case t_DECb: + case t_DECw: + case t_DECd: + *(Bit16u*)pos=0x1e00 + FC_RETOP + (HOST_a1 << 3) + (1 << 6); // sub FC_RETOP, a1, #1 + *(Bit16u*)(pos+2)=0xe000 + (16 >> 1); // b after_call (pc+16) + break; + case t_SHLb: + case t_SHLw: + case t_SHLd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x4080 + FC_RETOP + (HOST_a2 << 3); // lsl FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14) + break; + case t_SHRb: + *(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (24 << 6); // lsl FC_RETOP, a1, #24 + *(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (FC_RETOP << 3) + (24 << 6); // lsr FC_RETOP, FC_RETOP, #24 + *(Bit16u*)(pos+4)=0x40c0 + FC_RETOP + (HOST_a2 << 3); // lsr FC_RETOP, a2 + *(Bit16u*)(pos+6)=0xe000 + (12 >> 1); // b after_call (pc+12) + break; + case t_SHRw: + *(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsl FC_RETOP, a1, #16 + *(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (FC_RETOP << 3) + (16 << 6); // lsr FC_RETOP, FC_RETOP, #16 + *(Bit16u*)(pos+4)=0x40c0 + FC_RETOP + (HOST_a2 << 3); // lsr FC_RETOP, a2 + *(Bit16u*)(pos+6)=0xe000 + (12 >> 1); // b after_call (pc+12) + break; + case t_SHRd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x40c0 + FC_RETOP + (HOST_a2 << 3); // lsr FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14) + break; + case t_SARb: + *(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (24 << 6); // lsl FC_RETOP, a1, #24 + *(Bit16u*)(pos+2)=0x1000 + FC_RETOP + (FC_RETOP << 3) + (24 << 6); // asr FC_RETOP, FC_RETOP, #24 + *(Bit16u*)(pos+4)=0x4100 + FC_RETOP + (HOST_a2 << 3); // asr FC_RETOP, a2 + *(Bit16u*)(pos+6)=0xe000 + (12 >> 1); // b after_call (pc+12) + break; + case t_SARw: + *(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsl FC_RETOP, a1, #16 + *(Bit16u*)(pos+2)=0x1000 + FC_RETOP + (FC_RETOP << 3) + (16 << 6); // asr FC_RETOP, FC_RETOP, #16 + *(Bit16u*)(pos+4)=0x4100 + FC_RETOP + (HOST_a2 << 3); // asr FC_RETOP, a2 + *(Bit16u*)(pos+6)=0xe000 + (12 >> 1); // b after_call (pc+12) + break; + case t_SARd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x4100 + FC_RETOP + (HOST_a2 << 3); // asr FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14) + break; + case t_RORb: + *(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (24 << 6); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (8 << 6); // lsr FC_RETOP, a1, #8 + *(Bit16u*)(pos+4)=0x4300 + HOST_a1 + (FC_RETOP << 3); // orr a1, FC_RETOP + *(Bit16u*)(pos+6)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16 + *(Bit16u*)(pos+8)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1 + *(Bit16u*)(pos+10)=0x41c0 + FC_RETOP + (HOST_a2 << 3); // ror FC_RETOP, a2 + *(Bit16u*)(pos+12)=0xe000 + (6 >> 1); // b after_call (pc+6) + break; + case t_RORw: + *(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (16 << 6); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16 + *(Bit16u*)(pos+4)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1 + *(Bit16u*)(pos+6)=0x41c0 + FC_RETOP + (HOST_a2 << 3); // ror FC_RETOP, a2 + *(Bit16u*)(pos+8)=0xe000 + (10 >> 1); // b after_call (pc+10) + break; + case t_RORd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x41c0 + FC_RETOP + (HOST_a2 << 3); // ror FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14) + break; + case t_ROLb: + *(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (24 << 6); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=0x4240 + templo1 + (HOST_a2 << 3); // neg templo1, a2 + *(Bit16u*)(pos+4)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (8 << 6); // lsr FC_RETOP, a1, #8 + *(Bit16u*)(pos+6)=0x3000 + (templo1 << 8) + (32); // add templo1, #32 + *(Bit16u*)(pos+8)=0x4300 + HOST_a1 + (FC_RETOP << 3); // orr a1, FC_RETOP + *(Bit16u*)(pos+10)=0x46c0; // nop + *(Bit16u*)(pos+12)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16 + *(Bit16u*)(pos+14)=0x46c0; // nop + *(Bit16u*)(pos+16)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1 + *(Bit16u*)(pos+18)=0x46c0; // nop + *(Bit16u*)(pos+20)=0x41c0 + FC_RETOP + (templo1 << 3); // ror FC_RETOP, templo1 + break; + case t_ROLw: + *(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (16 << 6); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=0x4240 + templo1 + (HOST_a2 << 3); // neg templo1, a2 + *(Bit16u*)(pos+4)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16 + *(Bit16u*)(pos+6)=0x3000 + (templo1 << 8) + (32); // add templo1, #32 + *(Bit16u*)(pos+8)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1 + *(Bit16u*)(pos+10)=0x41c0 + FC_RETOP + (templo1 << 3); // ror FC_RETOP, templo1 + *(Bit16u*)(pos+12)=0xe000 + (6 >> 1); // b after_call (pc+6) + break; + case t_ROLd: + *(Bit16u*)pos=0x4240 + templo1 + (HOST_a2 << 3); // neg templo1, a2 + *(Bit16u*)(pos+2)=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+4)=0x3000 + (templo1 << 8) + (32); // add templo1, #32 + *(Bit16u*)(pos+6)=0x41c0 + FC_RETOP + (templo1 << 3); // ror FC_RETOP, templo1 + *(Bit16u*)(pos+8)=0xe000 + (10 >> 1); // b after_call (pc+10) + break; + case t_NEGb: + case t_NEGw: + case t_NEGd: + *(Bit16u*)pos=0x4240 + FC_RETOP + (HOST_a1 << 3); // neg FC_RETOP, a1 + *(Bit16u*)(pos+2)=0xe000 + (16 >> 1); // b after_call (pc+16) + break; + default: + *(Bit32u*)(pos+8)=(Bit32u)fct_ptr; // simple_func + break; + } + } + else + { + // try to avoid function calls but rather directly fill in code + switch (flags_type) { + case t_ADDb: + case t_ADDw: + case t_ADDd: + *(Bit16u*)pos=0x1800 + FC_RETOP + (HOST_a1 << 3) + (HOST_a2 << 6); // add FC_RETOP, a1, a2 + *(Bit16u*)(pos+2)=0xe000 + (18 >> 1); // b after_call (pc+18) + break; + case t_ORb: + case t_ORw: + case t_ORd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x4300 + FC_RETOP + (HOST_a2 << 3); // orr FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16) + break; + case t_ANDb: + case t_ANDw: + case t_ANDd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x4000 + FC_RETOP + (HOST_a2 << 3); // and FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16) + break; + case t_SUBb: + case t_SUBw: + case t_SUBd: + *(Bit16u*)pos=0x1a00 + FC_RETOP + (HOST_a1 << 3) + (HOST_a2 << 6); // sub FC_RETOP, a1, a2 + *(Bit16u*)(pos+2)=0xe000 + (18 >> 1); // b after_call (pc+18) + break; + case t_XORb: + case t_XORw: + case t_XORd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x4040 + FC_RETOP + (HOST_a2 << 3); // eor FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16) + break; + case t_CMPb: + case t_CMPw: + case t_CMPd: + case t_TESTb: + case t_TESTw: + case t_TESTd: + *(Bit16u*)pos=0xe000 + (20 >> 1); // b after_call (pc+20) + break; + case t_INCb: + case t_INCw: + case t_INCd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3) + (1 << 6); // add FC_RETOP, a1, #1 + *(Bit16u*)(pos+2)=0xe000 + (18 >> 1); // b after_call (pc+18) + break; + case t_DECb: + case t_DECw: + case t_DECd: + *(Bit16u*)pos=0x1e00 + FC_RETOP + (HOST_a1 << 3) + (1 << 6); // sub FC_RETOP, a1, #1 + *(Bit16u*)(pos+2)=0xe000 + (18 >> 1); // b after_call (pc+18) + break; + case t_SHLb: + case t_SHLw: + case t_SHLd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x4080 + FC_RETOP + (HOST_a2 << 3); // lsl FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16) + break; + case t_SHRb: + *(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (24 << 6); // lsl FC_RETOP, a1, #24 + *(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (FC_RETOP << 3) + (24 << 6); // lsr FC_RETOP, FC_RETOP, #24 + *(Bit16u*)(pos+4)=0x40c0 + FC_RETOP + (HOST_a2 << 3); // lsr FC_RETOP, a2 + *(Bit16u*)(pos+6)=0xe000 + (14 >> 1); // b after_call (pc+14) + break; + case t_SHRw: + *(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsl FC_RETOP, a1, #16 + *(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (FC_RETOP << 3) + (16 << 6); // lsr FC_RETOP, FC_RETOP, #16 + *(Bit16u*)(pos+4)=0x40c0 + FC_RETOP + (HOST_a2 << 3); // lsr FC_RETOP, a2 + *(Bit16u*)(pos+6)=0xe000 + (14 >> 1); // b after_call (pc+14) + break; + case t_SHRd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x40c0 + FC_RETOP + (HOST_a2 << 3); // lsr FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16) + break; + case t_SARb: + *(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (24 << 6); // lsl FC_RETOP, a1, #24 + *(Bit16u*)(pos+2)=0x1000 + FC_RETOP + (FC_RETOP << 3) + (24 << 6); // asr FC_RETOP, FC_RETOP, #24 + *(Bit16u*)(pos+4)=0x4100 + FC_RETOP + (HOST_a2 << 3); // asr FC_RETOP, a2 + *(Bit16u*)(pos+6)=0xe000 + (14 >> 1); // b after_call (pc+14) + break; + case t_SARw: + *(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsl FC_RETOP, a1, #16 + *(Bit16u*)(pos+2)=0x1000 + FC_RETOP + (FC_RETOP << 3) + (16 << 6); // asr FC_RETOP, FC_RETOP, #16 + *(Bit16u*)(pos+4)=0x4100 + FC_RETOP + (HOST_a2 << 3); // asr FC_RETOP, a2 + *(Bit16u*)(pos+6)=0xe000 + (14 >> 1); // b after_call (pc+14) + break; + case t_SARd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x4100 + FC_RETOP + (HOST_a2 << 3); // asr FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16) + break; + case t_RORb: + *(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (24 << 6); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (8 << 6); // lsr FC_RETOP, a1, #8 + *(Bit16u*)(pos+4)=0x4300 + HOST_a1 + (FC_RETOP << 3); // orr a1, FC_RETOP + *(Bit16u*)(pos+6)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16 + *(Bit16u*)(pos+8)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1 + *(Bit16u*)(pos+10)=0x41c0 + FC_RETOP + (HOST_a2 << 3); // ror FC_RETOP, a2 + *(Bit16u*)(pos+12)=0xe000 + (8 >> 1); // b after_call (pc+8) + break; + case t_RORw: + *(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (16 << 6); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16 + *(Bit16u*)(pos+4)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1 + *(Bit16u*)(pos+6)=0x41c0 + FC_RETOP + (HOST_a2 << 3); // ror FC_RETOP, a2 + *(Bit16u*)(pos+8)=0xe000 + (12 >> 1); // b after_call (pc+12) + break; + case t_RORd: + *(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+2)=0x41c0 + FC_RETOP + (HOST_a2 << 3); // ror FC_RETOP, a2 + *(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16) + break; + case t_ROLb: + *(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (24 << 6); // lsl a1, a1, #24 + *(Bit16u*)(pos+2)=0x4240 + templo1 + (HOST_a2 << 3); // neg templo1, a2 + *(Bit16u*)(pos+4)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (8 << 6); // lsr FC_RETOP, a1, #8 + *(Bit16u*)(pos+6)=0x3000 + (templo1 << 8) + (32); // add templo1, #32 + *(Bit16u*)(pos+8)=0x4300 + HOST_a1 + (FC_RETOP << 3); // orr a1, FC_RETOP + *(Bit16u*)(pos+10)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16 + *(Bit16u*)(pos+12)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1 + *(Bit16u*)(pos+14)=0x41c0 + FC_RETOP + (templo1 << 3); // ror FC_RETOP, templo1 + *(Bit16u*)(pos+16)=0xe000 + (4 >> 1); // b after_call (pc+4) + break; + case t_ROLw: + *(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (16 << 6); // lsl a1, a1, #16 + *(Bit16u*)(pos+2)=0x4240 + templo1 + (HOST_a2 << 3); // neg templo1, a2 + *(Bit16u*)(pos+4)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16 + *(Bit16u*)(pos+6)=0x3000 + (templo1 << 8) + (32); // add templo1, #32 + *(Bit16u*)(pos+8)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1 + *(Bit16u*)(pos+10)=0x41c0 + FC_RETOP + (templo1 << 3); // ror FC_RETOP, templo1 + *(Bit16u*)(pos+12)=0xe000 + (8 >> 1); // b after_call (pc+8) + break; + case t_ROLd: + *(Bit16u*)pos=0x4240 + templo1 + (HOST_a2 << 3); // neg templo1, a2 + *(Bit16u*)(pos+2)=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1 + *(Bit16u*)(pos+4)=0x3000 + (templo1 << 8) + (32); // add templo1, #32 + *(Bit16u*)(pos+6)=0x41c0 + FC_RETOP + (templo1 << 3); // ror FC_RETOP, templo1 + *(Bit16u*)(pos+8)=0xe000 + (12 >> 1); // b after_call (pc+12) + break; + case t_NEGb: + case t_NEGw: + case t_NEGd: + *(Bit16u*)pos=0x4240 + FC_RETOP + (HOST_a1 << 3); // neg FC_RETOP, a1 + *(Bit16u*)(pos+2)=0xe000 + (18 >> 1); // b after_call (pc+18) + break; + default: + *(Bit32u*)(pos+10)=(Bit32u)fct_ptr; // simple_func + break; + } + + } +#else + if (((Bit32u)pos & 0x03) == 0) + { + *(Bit32u*)(pos+8)=(Bit32u)fct_ptr; // simple_func + } + else + { + *(Bit32u*)(pos+10)=(Bit32u)fct_ptr; // simple_func + } +#endif +} +#endif diff --git a/src/cpu/core_dynrec/risc_armv4le.h b/src/cpu/core_dynrec/risc_armv4le.h new file mode 100644 index 00000000..d7e6f6a0 --- /dev/null +++ b/src/cpu/core_dynrec/risc_armv4le.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2002-2008 The DOSBox Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* $Id: risc_armv4le.h,v 1.1 2008-08-20 14:13:21 c2woody Exp $ */ + + +/* ARMv4 (little endian) backend (switcher) by M-HT */ + +#include "risc_armv4le-common.h" + +// choose your destiny: +#include "risc_armv4le-s3.h" +//#include "risc_armv4le-o3.h" +//#include "risc_armv4le-thumb.h" diff --git a/src/cpu/core_dynrec/risc_mipsel32.h b/src/cpu/core_dynrec/risc_mipsel32.h index a6718feb..451081b6 100644 --- a/src/cpu/core_dynrec/risc_mipsel32.h +++ b/src/cpu/core_dynrec/risc_mipsel32.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002-2007 The DOSBox Team + * Copyright (C) 2002-2008 The DOSBox Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,6 +16,8 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/* $Id: risc_mipsel32.h,v 1.2 2008-08-20 14:13:21 c2woody Exp $ */ + /* MIPS32 (little endian) backend by crazyc */ @@ -630,3 +632,15 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { #endif } #endif + +static void cache_block_closing(Bit8u* block_start,Bitu block_size) { +#ifdef PSP +// writeback dcache and invalidate icache + Bit32u inval_start = ((Bit32u)block_start) & ~63; + Bit32u inval_end = (((Bit32u)block_start) + block_size + 64) & ~63; + for (;inval_start < inval_end; inval_start+=64) { + __builtin_allegrex_cache(0x1a, inval_start); + __builtin_allegrex_cache(0x08, inval_start); + } +#endif +} diff --git a/src/cpu/core_dynrec/risc_x64.h b/src/cpu/core_dynrec/risc_x64.h index 1464ed61..59ef05cc 100644 --- a/src/cpu/core_dynrec/risc_x64.h +++ b/src/cpu/core_dynrec/risc_x64.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002-2007 The DOSBox Team + * Copyright (C) 2002-2008 The DOSBox Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,6 +16,8 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/* $Id: risc_x64.h,v 1.10 2008-08-20 14:13:21 c2woody Exp $ */ + // some configuring defines that specify the capabilities of this architecture // or aspects of the recompiling @@ -671,3 +673,5 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { #endif } #endif + +static void cache_block_closing(Bit8u* block_start,Bitu block_size) { } diff --git a/src/cpu/core_dynrec/risc_x86.h b/src/cpu/core_dynrec/risc_x86.h index 88fe4361..5e7992a4 100644 --- a/src/cpu/core_dynrec/risc_x86.h +++ b/src/cpu/core_dynrec/risc_x86.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2002-2007 The DOSBox Team + * Copyright (C) 2002-2008 The DOSBox Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,6 +16,8 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/* $Id: risc_x86.h,v 1.7 2008-08-20 14:13:21 c2woody Exp $ */ + // some configuring defines that specify the capabilities of this architecture // or aspects of the recompiling @@ -505,3 +507,5 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) { #endif } #endif + +static void cache_block_closing(Bit8u* block_start,Bitu block_size) { }