1
0
Fork 0

add arm4 dynamic recompiler backend (M-HT)

Imported-from: https://svn.code.sf.net/p/dosbox/code-0/dosbox/trunk@3206
This commit is contained in:
Sebastian Strohhäcker 2008-08-20 14:13:21 +00:00
parent 6640a3e6f0
commit 021c76f6d9
11 changed files with 2649 additions and 7 deletions

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2002-2007 The DOSBox Team
* Copyright (C) 2002-2008 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -16,7 +16,7 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: core_dynrec.cpp,v 1.9 2007-11-24 17:26:48 c2woody Exp $ */
/* $Id: core_dynrec.cpp,v 1.10 2008-08-20 14:13:21 c2woody Exp $ */
#include "dosbox.h"
@ -138,6 +138,7 @@ static struct {
#define X86 0x01
#define X86_64 0x02
#define MIPSEL 0x03
#define ARMV4LE 0x04
#if C_TARGETCPU == X86_64
#include "core_dynrec/risc_x64.h"
@ -145,6 +146,8 @@ static struct {
#include "core_dynrec/risc_x86.h"
#elif C_TARGETCPU == MIPSEL
#include "core_dynrec/risc_mipsel32.h"
#elif C_TARGETCPU == ARMV4LE
#include "core_dynrec/risc_armv4le.h"
#endif
#include "core_dynrec/decoder.h"

View file

@ -1,2 +1,4 @@
noinst_HEADERS = cache.h decoder.h decoder_basic.h decoder_opcodes.h \
dyn_fpu.h operators.h risc_x64.h risc_x86.h risc_mipsel32.h
dyn_fpu.h operators.h risc_x64.h risc_x86.h risc_mipsel32.h \
risc_armv4le.h risc_armv4le-common.h \
risc_armv4le-s3.h risc_armv4le-o3.h risc_armv4le-thumb.h

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2002-2007 The DOSBox Team
* Copyright (C) 2002-2008 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -16,6 +16,8 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: decoder_basic.h,v 1.11 2008-08-20 14:13:21 c2woody Exp $ */
/*
This file provides some definitions and basic level functions
@ -554,6 +556,7 @@ static void dyn_closeblock(void) {
//Shouldn't create empty block normally but let's do it like this
dyn_fill_blocks();
cache_closeblock();
cache_block_closing(decode.block->cache.start,decode.block->cache.size);
}

View file

@ -0,0 +1,97 @@
/*
* Copyright (C) 2002-2008 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: risc_armv4le-common.h,v 1.1 2008-08-20 14:13:21 c2woody Exp $ */
/* ARMv4 (little endian) backend by M-HT (common data/functions) */
// some configuring defines that specify the capabilities of this architecture
// or aspects of the recompiling
// protect FC_ADDR over function calls if necessaray
// #define DRC_PROTECT_ADDR_REG
// try to use non-flags generating functions if possible
#define DRC_FLAGS_INVALIDATION
// try to replace _simple functions by code
#define DRC_FLAGS_INVALIDATION_DCODE
// type with the same size as a pointer
#define DRC_PTR_SIZE_IM Bit32u
// calling convention modifier
#define DRC_CALL_CONV /* nothing */
#define DRC_FC /* nothing */
// register mapping
typedef Bit8u HostReg;
// "lo" registers
#define HOST_r0 0
#define HOST_r1 1
#define HOST_r2 2
#define HOST_r3 3
#define HOST_r4 4
#define HOST_r5 5
#define HOST_r6 6
#define HOST_r7 7
// "hi" registers
#define HOST_r8 8
#define HOST_r9 9
#define HOST_r10 10
#define HOST_r11 11
#define HOST_r12 12
#define HOST_r13 13
#define HOST_r14 14
#define HOST_r15 15
// register aliases
// "lo" registers
#define HOST_a1 HOST_r0
#define HOST_a2 HOST_r1
#define HOST_a3 HOST_r2
#define HOST_a4 HOST_r3
#define HOST_v1 HOST_r4
#define HOST_v2 HOST_r5
#define HOST_v3 HOST_r6
#define HOST_v4 HOST_r7
// "hi" registers
#define HOST_v5 HOST_r8
#define HOST_v6 HOST_r9
#define HOST_v7 HOST_r10
#define HOST_v8 HOST_r11
#define HOST_ip HOST_r12
#define HOST_sp HOST_r13
#define HOST_lr HOST_r14
#define HOST_pc HOST_r15
static void cache_block_closing(Bit8u* block_start,Bitu block_size) {
// GP2X BEGIN
//flush cache
register unsigned long _beg __asm ("a1") = (unsigned long)(block_start); // block start
register unsigned long _end __asm ("a2") = (unsigned long)(block_start+block_size); // block end
register unsigned long _flg __asm ("a3") = 0;
__asm __volatile ("swi 0x9f0002 @ sys_cacheflush"
: // no outputs
: // no inputs
: "a1");
// GP2X END
}

View file

@ -0,0 +1,858 @@
/*
* Copyright (C) 2002-2008 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: risc_armv4le-o3.h,v 1.1 2008-08-20 14:13:21 c2woody Exp $ */
/* ARMv4 (little endian) backend by M-HT (size-tweaked arm version) */
// temporary registers
#define temp1 HOST_ip
#define temp2 HOST_v5
#define temp3 HOST_v4
// register that holds function return values
#define FC_RETOP HOST_v3
// register used for address calculations,
#define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG
// register that holds the first parameter
#define FC_OP1 HOST_a1
// register that holds the second parameter
#define FC_OP2 HOST_a2
// register that holds byte-accessible temporary values
#define FC_TMP_BA1 HOST_a1
// register that holds byte-accessible temporary values
#define FC_TMP_BA2 HOST_a2
// temporary register for LEA
#define TEMP_REG_DRC HOST_v2
// helper macro
#define ROTATE_SCALE(x) ( (x)?(32 - x):(0) )
// move a full register from reg_src to reg_dst
static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
if(reg_src == reg_dst) return;
cache_addd(0xe1a00000 + (reg_dst << 12) + reg_src); // mov reg_dst, reg_src
}
// helper function
static Bits get_imm_gen_len(Bit32u imm) {
Bits ret;
if (imm == 0) {
return 1;
} else {
ret = 0;
while (imm) {
while ((imm & 3) == 0) {
imm>>=2;
}
ret++;
imm>>=8;
}
return ret;
}
}
// helper function
static Bits get_method_imm_gen_len(Bit32u imm, Bits preffer00, Bits *num) {
Bits num00, num15, numadd, numsub, numret, ret;
num00 = get_imm_gen_len(imm);
num15 = get_imm_gen_len(~imm);
numadd = get_imm_gen_len(imm - ((Bit32u)cache.pos+8));
numsub = get_imm_gen_len(((Bit32u)cache.pos+8) - imm);
if (numsub < numadd && numsub < num00 && numsub < num15) {
ret = 0;
numret = numsub;
} else if (numadd < num00 && numadd < num15) {
ret = 1;
numret = numadd;
} else if (num00 < num15 || (num00 == num15 && preffer00)) {
ret = 2;
numret = num00;
} else {
ret = 3;
numret = num15;
}
if (num != NULL) *num = numret;
return ret;
}
// move a 32bit constant value into dest_reg
static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
Bits first, method, scale;
Bit32u imm2, dist;
if (imm == 0) {
cache_addd(0xe3a00000 + (dest_reg << 12)); // mov dest_reg, #0
} else if (imm == 0xffffffff) {
cache_addd(0xe3e00000 + (dest_reg << 12)); // mvn dest_reg, #0
} else {
method = get_method_imm_gen_len(imm, 1, NULL);
scale = 0;
first = 1;
if (method == 0) {
dist = ((Bit32u)cache.pos+8) - imm;
while (dist) {
while ((dist & 3) == 0) {
dist>>=2;
scale+=2;
}
if (first) {
cache_addd(0xe2400000 + (dest_reg << 12) + (HOST_pc << 16) + (ROTATE_SCALE(scale) << 7) + (dist & 0xff)); // sub dest_reg, pc, #((dist & 0xff) << scale)
first = 0;
} else {
cache_addd(0xe2400000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (dist & 0xff)); // sub dest_reg, dest_reg, #((dist & 0xff) << scale)
}
dist>>=8;
scale+=8;
}
} else if (method == 1) {
dist = imm - ((Bit32u)cache.pos+8);
if (dist == 0) {
cache_addd(0xe1a00000 + (dest_reg << 12) + HOST_pc); // mov dest_reg, pc
} else {
while (dist) {
while ((dist & 3) == 0) {
dist>>=2;
scale+=2;
}
if (first) {
cache_addd(0xe2800000 + (dest_reg << 12) + (HOST_pc << 16) + (ROTATE_SCALE(scale) << 7) + (dist & 0xff)); // add dest_reg, pc, #((dist & 0xff) << scale)
first = 0;
} else {
cache_addd(0xe2800000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (dist & 0xff)); // add dest_reg, dest_reg, #((dist & 0xff) << scale)
}
dist>>=8;
scale+=8;
}
}
} else if (method == 2) {
while (imm) {
while ((imm & 3) == 0) {
imm>>=2;
scale+=2;
}
if (first) {
cache_addd(0xe3a00000 + (dest_reg << 12) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // mov dest_reg, #((imm & 0xff) << scale)
first = 0;
} else {
cache_addd(0xe3800000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // orr dest_reg, dest_reg, #((imm & 0xff) << scale)
}
imm>>=8;
scale+=8;
}
} else {
imm2 = ~imm;
while (imm2) {
while ((imm2 & 3) == 0) {
imm2>>=2;
scale+=2;
}
if (first) {
cache_addd(0xe3e00000 + (dest_reg << 12) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // mvn dest_reg, #((imm2 & 0xff) << scale)
first = 0;
} else {
cache_addd(0xe3c00000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // bic dest_reg, dest_reg, #((imm2 & 0xff) << scale)
}
imm2>>=8;
scale+=8;
}
}
}
}
// helper function for gen_mov_word_to_reg
static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) {
// alignment....
if (dword) {
if ((Bit32u)data & 3) {
if ( ((Bit32u)data & 3) == 2 ) {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (data_reg << 16)); // ldrh dest_reg, [data_reg]
cache_addd(0xe1d000b2 + (temp2 << 12) + (data_reg << 16)); // ldrh temp2, [data_reg, #2]
cache_addd(0xe1800800 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #16
} else {
cache_addd(0xe5d00000 + (dest_reg << 12) + (data_reg << 16)); // ldrb dest_reg, [data_reg]
cache_addd(0xe1d000b1 + (temp2 << 12) + (data_reg << 16)); // ldrh temp2, [data_reg, #1]
cache_addd(0xe1800400 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #8
cache_addd(0xe5d00003 + (temp2 << 12) + (data_reg << 16)); // ldrb temp2, [data_reg, #3]
cache_addd(0xe1800c00 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #24
}
} else {
cache_addd(0xe5900000 + (dest_reg << 12) + (data_reg << 16)); // ldr dest_reg, [data_reg]
}
} else {
if ((Bit32u)data & 1) {
cache_addd(0xe5d00000 + (dest_reg << 12) + (data_reg << 16)); // ldrb dest_reg, [data_reg]
cache_addd(0xe5d00001 + (temp2 << 12) + (data_reg << 16)); // ldrb temp2, [data_reg, #1]
cache_addd(0xe1800400 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #8
} else {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (data_reg << 16)); // ldrh dest_reg, [data_reg]
}
}
}
// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
// 16bit moves may destroy the upper 16bit of the destination register
static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)data);
gen_mov_word_to_reg_helper(dest_reg, data, dword, temp1);
}
// move a 16bit constant value into dest_reg
// the upper 16bit of the destination register may be destroyed
static void gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
Bits first, scale;
Bit32u imm2;
if (imm == 0) {
cache_addd(0xe3a00000 + (dest_reg << 12)); // mov dest_reg, #0
} else {
scale = 0;
first = 1;
imm2 = (Bit32u)imm;
while (imm2) {
while ((imm2 & 3) == 0) {
imm2>>=2;
scale+=2;
}
if (first) {
cache_addd(0xe3a00000 + (dest_reg << 12) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // mov dest_reg, #((imm2 & 0xff) << scale)
first = 0;
} else {
cache_addd(0xe3800000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // orr dest_reg, dest_reg, #((imm2 & 0xff) << scale)
}
imm2>>=8;
scale+=8;
}
}
}
// helper function for gen_mov_word_from_reg
static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) {
// alignment....
if (dword) {
if ((Bit32u)dest & 3) {
if ( ((Bit32u)dest & 3) == 2 ) {
cache_addd(0xe1c000b0 + (src_reg << 12) + (data_reg << 16)); // strh src_reg, [data_reg]
cache_addd(0xe1a00820 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #16
cache_addd(0xe1c000b2 + (temp2 << 12) + (data_reg << 16)); // strh temp2, [data_reg, #2]
} else {
cache_addd(0xe5c00000 + (src_reg << 12) + (data_reg << 16)); // strb src_reg, [data_reg]
cache_addd(0xe1a00420 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #8
cache_addd(0xe1c000b1 + (temp2 << 12) + (data_reg << 16)); // strh temp2, [data_reg, #1]
cache_addd(0xe1a00820 + (temp2 << 12) + (temp2)); // mov temp2, temp2, lsr #16
cache_addd(0xe5c00003 + (temp2 << 12) + (data_reg << 16)); // strb temp2, [data_reg, #3]
}
} else {
cache_addd(0xe5800000 + (src_reg << 12) + (data_reg << 16)); // str src_reg, [data_reg]
}
} else {
if ((Bit32u)dest & 1) {
cache_addd(0xe5c00000 + (src_reg << 12) + (data_reg << 16)); // strb src_reg, [data_reg]
cache_addd(0xe1a00420 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #8
cache_addd(0xe5c00001 + (temp2 << 12) + (data_reg << 16)); // strb temp2, [data_reg, #1]
} else {
cache_addd(0xe1c000b0 + (src_reg << 12) + (data_reg << 16)); // strh src_reg, [data_reg]
}
}
}
// move 32bit (dword==true) or 16bit (dword==false) of a register into memory
static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_from_reg_helper(src_reg, dest, dword, temp1);
}
// move an 8bit value from memory into dest_reg
// the upper 24bit of the destination register can be destroyed
// this function does not use FC_OP1/FC_OP2 as dest_reg as these
// registers might not be directly byte-accessible on some architectures
static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)data);
cache_addd(0xe5d00000 + (dest_reg << 12) + (temp1 << 16)); // ldrb dest_reg, [temp1]
}
// move an 8bit value from memory into dest_reg
// the upper 24bit of the destination register can be destroyed
// this function can use FC_OP1/FC_OP2 as dest_reg which are
// not directly byte-accessible on some architectures
static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
gen_mov_byte_to_reg_low(dest_reg, data);
}
// move an 8bit constant value into dest_reg
// the upper 24bit of the destination register can be destroyed
// this function does not use FC_OP1/FC_OP2 as dest_reg as these
// registers might not be directly byte-accessible on some architectures
static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
cache_addd(0xe3a00000 + (dest_reg << 12) + (imm)); // mov dest_reg, #(imm)
}
// move an 8bit constant value into dest_reg
// the upper 24bit of the destination register can be destroyed
// this function can use FC_OP1/FC_OP2 as dest_reg which are
// not directly byte-accessible on some architectures
static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
gen_mov_byte_to_reg_low_imm(dest_reg, imm);
}
// move the lowest 8bit of a register into memory
static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
cache_addd(0xe5c00000 + (src_reg << 12) + (temp1 << 16)); // strb src_reg, [temp1]
}
// convert an 8bit word to a 32bit dword
// the register is zero-extended (sign==false) or sign-extended (sign==true)
static void gen_extend_byte(bool sign,HostReg reg) {
if (sign) {
cache_addd(0xe1a00c00 + (reg << 12) + (reg)); // mov reg, reg, lsl #24
cache_addd(0xe1a00c40 + (reg << 12) + (reg)); // mov reg, reg, asr #24
} else {
cache_addd(0xe20000ff + (reg << 12) + (reg << 16)); // and reg, reg, #0xff
}
}
// convert a 16bit word to a 32bit dword
// the register is zero-extended (sign==false) or sign-extended (sign==true)
static void gen_extend_word(bool sign,HostReg reg) {
if (sign) {
cache_addd(0xe1a00800 + (reg << 12) + (reg)); // mov reg, reg, lsl #16
cache_addd(0xe1a00840 + (reg << 12) + (reg)); // mov reg, reg, asr #16
} else {
cache_addd(0xe1a00800 + (reg << 12) + (reg)); // mov reg, reg, lsl #16
cache_addd(0xe1a00820 + (reg << 12) + (reg)); // mov reg, reg, lsr #16
}
}
// add a 32bit value from memory to a full register
static void gen_add(HostReg reg,void* op) {
gen_mov_word_to_reg(temp3, op, 1);
cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp3)); // add reg, reg, temp3
}
// add a 32bit constant value to a full register
static void gen_add_imm(HostReg reg,Bit32u imm) {
Bits method1, method2, num1, num2, scale, sub;
if(!imm) return;
if (imm == 1) {
cache_addd(0xe2800001 + (reg << 12) + (reg << 16)); // add reg, reg, #1
} else if (imm == 0xffffffff) {
cache_addd(0xe2400001 + (reg << 12) + (reg << 16)); // sub reg, reg, #1
} else {
method1 = get_method_imm_gen_len(imm, 1, &num1);
method2 = get_method_imm_gen_len(-((Bit32s)imm), 1, &num2);
if (num2 < num1) {
method1 = method2;
imm = (Bit32u)(-((Bit32s)imm));
sub = 1;
} else sub = 0;
if (method1 != 2) {
gen_mov_dword_to_reg_imm(temp3, imm);
if (sub) {
cache_addd(0xe0400000 + (reg << 12) + (reg << 16) + (temp3)); // sub reg, reg, temp3
} else {
cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp3)); // add reg, reg, temp3
}
} else {
scale = 0;
while (imm) {
while ((imm & 3) == 0) {
imm>>=2;
scale+=2;
}
if (sub) {
cache_addd(0xe2400000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // sub reg, reg, #((imm & 0xff) << scale)
} else {
cache_addd(0xe2800000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // add reg, reg, #((imm & 0xff) << scale)
}
imm>>=8;
scale+=8;
}
}
}
}
// and a 32bit constant value with a full register
static void gen_and_imm(HostReg reg,Bit32u imm) {
Bits method, scale;
Bit32u imm2;
imm2 = ~imm;
if(!imm2) return;
if (!imm) {
cache_addd(0xe3a00000 + (reg << 12)); // mov reg, #0
} else {
method = get_method_imm_gen_len(imm, 0, NULL);
if (method != 3) {
gen_mov_dword_to_reg_imm(temp3, imm);
cache_addd(0xe0000000 + (reg << 12) + (reg << 16) + (temp3)); // and reg, reg, temp3
} else {
scale = 0;
while (imm2) {
while ((imm2 & 3) == 0) {
imm2>>=2;
scale+=2;
}
cache_addd(0xe3c00000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // bic reg, reg, #((imm2 & 0xff) << scale)
imm2>>=8;
scale+=8;
}
}
}
}
// move a 32bit constant value into memory
static void gen_mov_direct_dword(void* dest,Bit32u imm) {
gen_mov_dword_to_reg_imm(temp3, imm);
gen_mov_word_from_reg(temp3, dest, 1);
}
// move an address into memory
static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) {
gen_mov_direct_dword(dest,(Bit32u)imm);
}
// add an 8bit constant value to a dword memory value
static void gen_add_direct_byte(void* dest,Bit8s imm) {
if(!imm) return;
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_to_reg_helper(temp3, dest, 1, temp1);
if (imm >= 0) {
cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + ((Bit32s)imm)); // add temp3, temp3, #(imm)
} else {
cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + (-((Bit32s)imm))); // sub temp3, temp3, #(-imm)
}
gen_mov_word_from_reg_helper(temp3, dest, 1, temp1);
}
// add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
if(!imm) return;
if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) {
gen_add_direct_byte(dest,(Bit8s)imm);
return;
}
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_to_reg_helper(temp3, dest, dword, temp1);
// maybe use function gen_add_imm
if (dword) {
gen_mov_dword_to_reg_imm(temp2, imm);
} else {
gen_mov_word_to_reg_imm(temp2, (Bit16u)imm);
}
cache_addd(0xe0800000 + (temp3 << 12) + (temp3 << 16) + (temp2)); // add temp3, temp3, temp2
gen_mov_word_from_reg_helper(temp3, dest, dword, temp1);
}
// subtract an 8bit constant value from a dword memory value
static void gen_sub_direct_byte(void* dest,Bit8s imm) {
if(!imm) return;
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_to_reg_helper(temp3, dest, 1, temp1);
if (imm >= 0) {
cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + ((Bit32s)imm)); // sub temp3, temp3, #(imm)
} else {
cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + (-((Bit32s)imm))); // add temp3, temp3, #(-imm)
}
gen_mov_word_from_reg_helper(temp3, dest, 1, temp1);
}
// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
if(!imm) return;
if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) {
gen_sub_direct_byte(dest,(Bit8s)imm);
return;
}
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_to_reg_helper(temp3, dest, dword, temp1);
// maybe use function gen_add_imm/gen_sub_imm
if (dword) {
gen_mov_dword_to_reg_imm(temp2, imm);
} else {
gen_mov_word_to_reg_imm(temp2, (Bit16u)imm);
}
cache_addd(0xe0400000 + (temp3 << 12) + (temp3 << 16) + (temp2)); // sub temp3, temp3, temp2
gen_mov_word_from_reg_helper(temp3, dest, dword, temp1);
}
// effective address calculation, destination is dest_reg
// scale_reg is scaled by scale (scale_reg*(2^scale)) and
// added to dest_reg, then the immediate value is added
static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
cache_addd(0xe0800000 + (dest_reg << 12) + (dest_reg << 16) + (scale_reg) + (scale << 7)); // add dest_reg, dest_reg, scale_reg, lsl #(scale)
gen_add_imm(dest_reg, imm);
}
// effective address calculation, destination is dest_reg
// dest_reg is scaled by scale (dest_reg*(2^scale)),
// then the immediate value is added
static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
if (scale) {
cache_addd(0xe1a00000 + (dest_reg << 12) + (dest_reg) + (scale << 7)); // mov dest_reg, dest_reg, lsl #(scale)
}
gen_add_imm(dest_reg, imm);
}
// generate a call to a parameterless function
static void INLINE gen_call_function_raw(void * func) {
cache_addd(0xe5900004 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #4]
cache_addd(0xe2800004 + (HOST_lr << 12) + (HOST_pc << 16)); // add lr, pc, #4
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd((Bit32u)func); // .int func
cache_addd(0xe1a00000 + (FC_RETOP << 12) + HOST_a1); // mov FC_RETOP, a1
}
// generate a call to a function with paramcount parameters
// note: the parameters are loaded in the architecture specific way
// using the gen_load_param_ functions below
static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) {
Bit32u proc_addr = (Bit32u)cache.pos;
gen_call_function_raw(func);
return proc_addr;
}
#if (1)
// max of 4 parameters in a1-a4
// load an immediate value as param'th function parameter
static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
gen_mov_dword_to_reg_imm(param, imm);
}
// load an address as param'th function parameter
static void INLINE gen_load_param_addr(Bitu addr,Bitu param) {
gen_mov_dword_to_reg_imm(param, addr);
}
// load a host-register as param'th function parameter
static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
gen_mov_regs(param, reg);
}
// load a value from memory as param'th function parameter
static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
gen_mov_word_to_reg(param, (void *)mem, 1);
}
#else
other arm abis
#endif
// jump to an address pointed at by ptr, offset is in imm
static void gen_jmp_ptr(void * ptr,Bits imm=0) {
Bits num1, num2, scale, sub;
Bitu imm2;
gen_mov_word_to_reg(temp3, ptr, 1);
if (imm) {
num1 = get_imm_gen_len(imm);
num2 = get_imm_gen_len(-imm);
if (num2 < num1) {
imm = -imm;
sub = 1;
} else sub = 0;
scale = 0;
imm2 = (Bitu)imm;
while (imm2) {
while ((imm2 & 3) == 0) {
imm2>>=2;
scale+=2;
}
if (sub) {
cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // sub temp3, temp3, #((imm2 & 0xff) << scale)
} else {
cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // add temp3, temp3, #((imm2 & 0xff) << scale)
}
imm2>>=8;
scale+=8;
}
}
#if (1)
// (*ptr) should be word aligned
if ((imm & 0x03) == 0) {
cache_addd(0xe5900000 + (temp1 << 12) + (temp3 << 16)); // ldr temp1, [temp3]
} else
#endif
{
cache_addd(0xe5d00000 + (temp1 << 12) + (temp3 << 16)); // ldrb temp1, [temp3]
cache_addd(0xe5d00001 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #1]
cache_addd(0xe1800400 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #8
cache_addd(0xe5d00002 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #2]
cache_addd(0xe1800800 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #16
cache_addd(0xe5d00003 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #3]
cache_addd(0xe1800c00 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #24
}
cache_addd(0xe12fff10 + (temp1)); // bx temp1
}
// short conditional jump (+-127 bytes) if register is zero
// the destination is set by gen_fill_branch() later
static Bit32u INLINE gen_create_branch_on_zero(HostReg reg,bool dword) {
if (dword) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
} else {
cache_addd(0xe1b00800 + (temp1 << 12) + (reg)); // movs temp1, reg, lsl #16
}
cache_addd(0x0a000000); // beq j
return ((Bit32u)cache.pos-4);
}
// short conditional jump (+-127 bytes) if register is nonzero
// the destination is set by gen_fill_branch() later
static Bit32u INLINE gen_create_branch_on_nonzero(HostReg reg,bool dword) {
if (dword) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
} else {
cache_addd(0xe1b00800 + (temp1 << 12) + (reg)); // movs temp1, reg, lsl #16
}
cache_addd(0x1a000000); // bne j
return ((Bit32u)cache.pos-4);
}
// calculate relative offset and fill it into the location pointed to by data
static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) {
#if C_DEBUG
Bits len=(Bit32u)cache.pos-(data+8);
if (len<0) len=-len;
if (len>0x02000000) LOG_MSG("Big jump %d",len);
#endif
*(Bit32u*)data=( (*(Bit32u*)data) & 0xff000000 ) | ( ( ((Bit32u)cache.pos - (data+8)) >> 2 ) & 0x00ffffff );
}
// conditional jump if register is nonzero
// for isdword==true the 32bit of the register are tested
// for isdword==false the lowest 8bit of the register are tested
static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
if (isdword) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
} else {
cache_addd(0xe31000ff + (reg << 16)); // tst reg, #0xff
}
cache_addd(0x0a000002); // beq nobranch
cache_addd(0xe5900000 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #0]
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd(0); // fill j
// nobranch:
return ((Bit32u)cache.pos-4);
}
// compare 32bit-register against zero and jump if value less/equal than zero
static Bit32u INLINE gen_create_branch_long_leqzero(HostReg reg) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
cache_addd(0xca000002); // bgt nobranch
cache_addd(0xe5900000 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #0]
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd(0); // fill j
// nobranch:
return ((Bit32u)cache.pos-4);
}
// calculate long relative offset and fill it into the location pointed to by data
static void INLINE gen_fill_branch_long(Bit32u data) {
// this is an absolute branch
*(Bit32u*)data=(Bit32u)cache.pos;
}
static void gen_run_code(void) {
cache_addd(0xe92d4000); // stmfd sp!, {lr}
cache_addd(0xe92d01f0); // stmfd sp!, {v1-v5}
cache_addd(0xe28fe004); // add lr, pc, #4
cache_addd(0xe92d4000); // stmfd sp!, {lr}
cache_addd(0xe12fff10); // bx r0
cache_addd(0xe8bd01f0); // ldmfd sp!, {v1-v5}
cache_addd(0xe8bd4000); // ldmfd sp!, {lr}
cache_addd(0xe12fff1e); // bx lr
}
// return from a function
static void gen_return_function(void) {
cache_addd(0xe1a00000 + (HOST_a1 << 12) + FC_RETOP); // mov a1, FC_RETOP
cache_addd(0xe8bd4000); // ldmfd sp!, {lr}
cache_addd(0xe12fff1e); // bx lr
}
#ifdef DRC_FLAGS_INVALIDATION
// called when a call to a function can be replaced by a
// call to a simpler function
static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
#ifdef DRC_FLAGS_INVALIDATION_DCODE
// try to avoid function calls but rather directly fill in code
switch (flags_type) {
case t_ADDb:
case t_ADDw:
case t_ADDd:
*(Bit32u*)pos=0xe0800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // add FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_ORb:
case t_ORw:
case t_ORd:
*(Bit32u*)pos=0xe1800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // orr FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_ANDb:
case t_ANDw:
case t_ANDd:
*(Bit32u*)pos=0xe0000000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // and FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_SUBb:
case t_SUBw:
case t_SUBd:
*(Bit32u*)pos=0xe0400000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // sub FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_XORb:
case t_XORw:
case t_XORd:
*(Bit32u*)pos=0xe0200000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // eor FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_CMPb:
case t_CMPw:
case t_CMPd:
case t_TESTb:
case t_TESTw:
case t_TESTd:
*(Bit32u*)pos=0xea000000 + (3); // b (pc+3*4)
break;
case t_INCb:
case t_INCw:
case t_INCd:
*(Bit32u*)pos=0xe2800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (1); // add FC_RETOP, a1, #1
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_DECb:
case t_DECw:
case t_DECd:
*(Bit32u*)pos=0xe2400000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (1); // sub FC_RETOP, a1, #1
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_SHLb:
case t_SHLw:
case t_SHLd:
*(Bit32u*)pos=0xe1a00010 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, lsl a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_SHRb:
*(Bit32u*)pos=0xe2000000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (0xff); // and FC_RETOP, a1, #0xff
*(Bit32u*)(pos+4)=0xe1a00030 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, lsr a2
*(Bit32u*)(pos+8)=0xe1a00000; // nop
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_SHRw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe1a00020 + (FC_RETOP << 12) + (FC_RETOP) + (16 << 7); // mov FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+8)=0xe1a00030 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, lsr a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_SHRd:
*(Bit32u*)pos=0xe1a00030 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, lsr a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_SARb:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=0xe1a00040 + (FC_RETOP << 12) + (FC_RETOP) + (24 << 7); // mov FC_RETOP, FC_RETOP, asr #24
*(Bit32u*)(pos+8)=0xe1a00050 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, asr a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_SARw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe1a00040 + (FC_RETOP << 12) + (FC_RETOP) + (16 << 7); // mov FC_RETOP, FC_RETOP, asr #16
*(Bit32u*)(pos+8)=0xe1a00050 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, asr a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_SARd:
*(Bit32u*)pos=0xe1a00050 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, asr a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_RORb:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (8 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
*(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+12)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_RORw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+8)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_RORd:
*(Bit32u*)pos=0xe1a00070 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, ror a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_ROLb:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32
*(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (8 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
*(Bit32u*)(pos+12)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+16)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
break;
case t_ROLw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32
*(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+12)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_ROLd:
*(Bit32u*)pos=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32
*(Bit32u*)(pos+4)=0xe1a00070 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, ror a2
*(Bit32u*)(pos+8)=0xe1a00000; // nop
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_NEGb:
case t_NEGw:
case t_NEGd:
*(Bit32u*)pos=0xe2600000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (0); // rsb FC_RETOP, a1, #0
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
default:
*(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func
break;
}
#else
*(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func
#endif
}
#endif

View file

@ -0,0 +1,694 @@
/*
* Copyright (C) 2002-2008 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: risc_armv4le-s3.h,v 1.1 2008-08-20 14:13:21 c2woody Exp $ */
/* ARMv4 (little endian) backend by M-HT (speed-tweaked arm version) */
// temporary registers
#define temp1 HOST_ip
#define temp2 HOST_v5
#define temp3 HOST_v4
// register that holds function return values
#define FC_RETOP HOST_v3
// register used for address calculations,
#define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG
// register that holds the first parameter
#define FC_OP1 HOST_a1
// register that holds the second parameter
#define FC_OP2 HOST_a2
// register that holds byte-accessible temporary values
#define FC_TMP_BA1 HOST_a1
// register that holds byte-accessible temporary values
#define FC_TMP_BA2 HOST_a2
// temporary register for LEA
#define TEMP_REG_DRC HOST_v2
// helper macro
#define ROTATE_SCALE(x) ( (x)?(32 - x):(0) )
// move a full register from reg_src to reg_dst
static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
if(reg_src == reg_dst) return;
cache_addd(0xe1a00000 + (reg_dst << 12) + reg_src); // mov reg_dst, reg_src
}
// move a 32bit constant value into dest_reg
static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
Bits first, scale;
if (imm == 0) {
cache_addd(0xe3a00000 + (dest_reg << 12)); // mov dest_reg, #0
} else {
scale = 0;
first = 1;
while (imm) {
while ((imm & 3) == 0) {
imm>>=2;
scale+=2;
}
if (first) {
cache_addd(0xe3a00000 + (dest_reg << 12) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // mov dest_reg, #((imm & 0xff) << scale)
first = 0;
} else {
cache_addd(0xe3800000 + (dest_reg << 12) + (dest_reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // orr dest_reg, dest_reg, #((imm & 0xff) << scale)
}
imm>>=8;
scale+=8;
}
}
}
// helper function for gen_mov_word_to_reg
static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) {
// alignment....
if (dword) {
if ((Bit32u)data & 3) {
if ( ((Bit32u)data & 3) == 2 ) {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (data_reg << 16)); // ldrh dest_reg, [data_reg]
cache_addd(0xe1d000b2 + (temp2 << 12) + (data_reg << 16)); // ldrh temp2, [data_reg, #2]
cache_addd(0xe1800800 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #16
} else {
cache_addd(0xe5d00000 + (dest_reg << 12) + (data_reg << 16)); // ldrb dest_reg, [data_reg]
cache_addd(0xe1d000b1 + (temp2 << 12) + (data_reg << 16)); // ldrh temp2, [data_reg, #1]
cache_addd(0xe1800400 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #8
cache_addd(0xe5d00003 + (temp2 << 12) + (data_reg << 16)); // ldrb temp2, [data_reg, #3]
cache_addd(0xe1800c00 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #24
}
} else {
cache_addd(0xe5900000 + (dest_reg << 12) + (data_reg << 16)); // ldr dest_reg, [data_reg]
}
} else {
if ((Bit32u)data & 1) {
cache_addd(0xe5d00000 + (dest_reg << 12) + (data_reg << 16)); // ldrb dest_reg, [data_reg]
cache_addd(0xe5d00001 + (temp2 << 12) + (data_reg << 16)); // ldrb temp2, [data_reg, #1]
cache_addd(0xe1800400 + (dest_reg << 12) + (dest_reg << 16) + (temp2)); // orr dest_reg, dest_reg, temp2, lsl #8
} else {
cache_addd(0xe1d000b0 + (dest_reg << 12) + (data_reg << 16)); // ldrh dest_reg, [data_reg]
}
}
}
// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
// 16bit moves may destroy the upper 16bit of the destination register
static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)data);
gen_mov_word_to_reg_helper(dest_reg, data, dword, temp1);
}
// move a 16bit constant value into dest_reg
// the upper 16bit of the destination register may be destroyed
static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm);
}
// helper function for gen_mov_word_from_reg
static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) {
// alignment....
if (dword) {
if ((Bit32u)dest & 3) {
if ( ((Bit32u)dest & 3) == 2 ) {
cache_addd(0xe1c000b0 + (src_reg << 12) + (data_reg << 16)); // strh src_reg, [data_reg]
cache_addd(0xe1a00820 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #16
cache_addd(0xe1c000b2 + (temp2 << 12) + (data_reg << 16)); // strh temp2, [data_reg, #2]
} else {
cache_addd(0xe5c00000 + (src_reg << 12) + (data_reg << 16)); // strb src_reg, [data_reg]
cache_addd(0xe1a00420 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #8
cache_addd(0xe1c000b1 + (temp2 << 12) + (data_reg << 16)); // strh temp2, [data_reg, #1]
cache_addd(0xe1a00820 + (temp2 << 12) + (temp2)); // mov temp2, temp2, lsr #16
cache_addd(0xe5c00003 + (temp2 << 12) + (data_reg << 16)); // strb temp2, [data_reg, #3]
}
} else {
cache_addd(0xe5800000 + (src_reg << 12) + (data_reg << 16)); // str src_reg, [data_reg]
}
} else {
if ((Bit32u)dest & 1) {
cache_addd(0xe5c00000 + (src_reg << 12) + (data_reg << 16)); // strb src_reg, [data_reg]
cache_addd(0xe1a00420 + (temp2 << 12) + (src_reg)); // mov temp2, src_reg, lsr #8
cache_addd(0xe5c00001 + (temp2 << 12) + (data_reg << 16)); // strb temp2, [data_reg, #1]
} else {
cache_addd(0xe1c000b0 + (src_reg << 12) + (data_reg << 16)); // strh src_reg, [data_reg]
}
}
}
// move 32bit (dword==true) or 16bit (dword==false) of a register into memory
static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_from_reg_helper(src_reg, dest, dword, temp1);
}
// move an 8bit value from memory into dest_reg
// the upper 24bit of the destination register can be destroyed
// this function does not use FC_OP1/FC_OP2 as dest_reg as these
// registers might not be directly byte-accessible on some architectures
static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)data);
cache_addd(0xe5d00000 + (dest_reg << 12) + (temp1 << 16)); // ldrb dest_reg, [temp1]
}
// move an 8bit value from memory into dest_reg
// the upper 24bit of the destination register can be destroyed
// this function can use FC_OP1/FC_OP2 as dest_reg which are
// not directly byte-accessible on some architectures
static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
gen_mov_byte_to_reg_low(dest_reg, data);
}
// move an 8bit constant value into dest_reg
// the upper 24bit of the destination register can be destroyed
// this function does not use FC_OP1/FC_OP2 as dest_reg as these
// registers might not be directly byte-accessible on some architectures
static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
cache_addd(0xe3a00000 + (dest_reg << 12) + (imm)); // mov dest_reg, #(imm)
}
// move an 8bit constant value into dest_reg
// the upper 24bit of the destination register can be destroyed
// this function can use FC_OP1/FC_OP2 as dest_reg which are
// not directly byte-accessible on some architectures
static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
gen_mov_byte_to_reg_low_imm(dest_reg, imm);
}
// move the lowest 8bit of a register into memory
static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
cache_addd(0xe5c00000 + (src_reg << 12) + (temp1 << 16)); // strb src_reg, [temp1]
}
// convert an 8bit word to a 32bit dword
// the register is zero-extended (sign==false) or sign-extended (sign==true)
static void gen_extend_byte(bool sign,HostReg reg) {
if (sign) {
cache_addd(0xe1a00c00 + (reg << 12) + (reg)); // mov reg, reg, lsl #24
cache_addd(0xe1a00c40 + (reg << 12) + (reg)); // mov reg, reg, asr #24
} else {
cache_addd(0xe20000ff + (reg << 12) + (reg << 16)); // and reg, reg, #0xff
}
}
// convert a 16bit word to a 32bit dword
// the register is zero-extended (sign==false) or sign-extended (sign==true)
static void gen_extend_word(bool sign,HostReg reg) {
if (sign) {
cache_addd(0xe1a00800 + (reg << 12) + (reg)); // mov reg, reg, lsl #16
cache_addd(0xe1a00840 + (reg << 12) + (reg)); // mov reg, reg, asr #16
} else {
cache_addd(0xe1a00800 + (reg << 12) + (reg)); // mov reg, reg, lsl #16
cache_addd(0xe1a00820 + (reg << 12) + (reg)); // mov reg, reg, lsr #16
}
}
// add a 32bit value from memory to a full register
static void gen_add(HostReg reg,void* op) {
gen_mov_word_to_reg(temp3, op, 1);
cache_addd(0xe0800000 + (reg << 12) + (reg << 16) + (temp3)); // add reg, reg, temp3
}
// add a 32bit constant value to a full register
static void gen_add_imm(HostReg reg,Bit32u imm) {
Bits scale;
if(!imm) return;
if (imm == 0xffffffff) {
cache_addd(0xe2400001 + (reg << 12) + (reg << 16)); // sub reg, reg, #1
} else {
scale = 0;
while (imm) {
while ((imm & 3) == 0) {
imm>>=2;
scale+=2;
}
cache_addd(0xe2800000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm & 0xff)); // add reg, reg, #((imm & 0xff) << scale)
imm>>=8;
scale+=8;
}
}
}
// and a 32bit constant value with a full register
static void gen_and_imm(HostReg reg,Bit32u imm) {
Bits scale;
Bit32u imm2;
imm2 = ~imm;
if(!imm2) return;
if (!imm) {
cache_addd(0xe3a00000 + (reg << 12)); // mov reg, #0
} else {
scale = 0;
while (imm2) {
while ((imm2 & 3) == 0) {
imm2>>=2;
scale+=2;
}
cache_addd(0xe3c00000 + (reg << 12) + (reg << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // bic reg, reg, #((imm2 & 0xff) << scale)
imm2>>=8;
scale+=8;
}
}
}
// move a 32bit constant value into memory
static void gen_mov_direct_dword(void* dest,Bit32u imm) {
gen_mov_dword_to_reg_imm(temp3, imm);
gen_mov_word_from_reg(temp3, dest, 1);
}
// move an address into memory
static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) {
gen_mov_direct_dword(dest,(Bit32u)imm);
}
// add an 8bit constant value to a dword memory value
static void gen_add_direct_byte(void* dest,Bit8s imm) {
if(!imm) return;
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_to_reg_helper(temp3, dest, 1, temp1);
if (imm >= 0) {
cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + ((Bit32s)imm)); // add temp3, temp3, #(imm)
} else {
cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + (-((Bit32s)imm))); // sub temp3, temp3, #(-imm)
}
gen_mov_word_from_reg_helper(temp3, dest, 1, temp1);
}
// add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
if(!imm) return;
if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) {
gen_add_direct_byte(dest,(Bit8s)imm);
return;
}
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_to_reg_helper(temp3, dest, dword, temp1);
// maybe use function gen_add_imm
if (dword) {
gen_mov_dword_to_reg_imm(temp2, imm);
} else {
gen_mov_word_to_reg_imm(temp2, (Bit16u)imm);
}
cache_addd(0xe0800000 + (temp3 << 12) + (temp3 << 16) + (temp2)); // add temp3, temp3, temp2
gen_mov_word_from_reg_helper(temp3, dest, dword, temp1);
}
// subtract an 8bit constant value from a dword memory value
static void gen_sub_direct_byte(void* dest,Bit8s imm) {
if(!imm) return;
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_to_reg_helper(temp3, dest, 1, temp1);
if (imm >= 0) {
cache_addd(0xe2400000 + (temp3 << 12) + (temp3 << 16) + ((Bit32s)imm)); // sub temp3, temp3, #(imm)
} else {
cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + (-((Bit32s)imm))); // add temp3, temp3, #(-imm)
}
gen_mov_word_from_reg_helper(temp3, dest, 1, temp1);
}
// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
if(!imm) return;
if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) {
gen_sub_direct_byte(dest,(Bit8s)imm);
return;
}
gen_mov_dword_to_reg_imm(temp1, (Bit32u)dest);
gen_mov_word_to_reg_helper(temp3, dest, dword, temp1);
// maybe use function gen_add_imm/gen_sub_imm
if (dword) {
gen_mov_dword_to_reg_imm(temp2, imm);
} else {
gen_mov_word_to_reg_imm(temp2, (Bit16u)imm);
}
cache_addd(0xe0400000 + (temp3 << 12) + (temp3 << 16) + (temp2)); // sub temp3, temp3, temp2
gen_mov_word_from_reg_helper(temp3, dest, dword, temp1);
}
// effective address calculation, destination is dest_reg
// scale_reg is scaled by scale (scale_reg*(2^scale)) and
// added to dest_reg, then the immediate value is added
static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
cache_addd(0xe0800000 + (dest_reg << 12) + (dest_reg << 16) + (scale_reg) + (scale << 7)); // add dest_reg, dest_reg, scale_reg, lsl #(scale)
gen_add_imm(dest_reg, imm);
}
// effective address calculation, destination is dest_reg
// dest_reg is scaled by scale (dest_reg*(2^scale)),
// then the immediate value is added
static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
if (scale) {
cache_addd(0xe1a00000 + (dest_reg << 12) + (dest_reg) + (scale << 7)); // mov dest_reg, dest_reg, lsl #(scale)
}
gen_add_imm(dest_reg, imm);
}
// generate a call to a parameterless function
static void INLINE gen_call_function_raw(void * func) {
cache_addd(0xe5900004 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #4]
cache_addd(0xe2800004 + (HOST_lr << 12) + (HOST_pc << 16)); // add lr, pc, #4
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd((Bit32u)func); // .int func
cache_addd(0xe1a00000 + (FC_RETOP << 12) + HOST_a1); // mov FC_RETOP, a1
}
// generate a call to a function with paramcount parameters
// note: the parameters are loaded in the architecture specific way
// using the gen_load_param_ functions below
static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) {
Bit32u proc_addr = (Bit32u)cache.pos;
gen_call_function_raw(func);
return proc_addr;
}
#if (1)
// max of 4 parameters in a1-a4
// load an immediate value as param'th function parameter
static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
gen_mov_dword_to_reg_imm(param, imm);
}
// load an address as param'th function parameter
static void INLINE gen_load_param_addr(Bitu addr,Bitu param) {
gen_mov_dword_to_reg_imm(param, addr);
}
// load a host-register as param'th function parameter
static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
gen_mov_regs(param, reg);
}
// load a value from memory as param'th function parameter
static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
gen_mov_word_to_reg(param, (void *)mem, 1);
}
#else
other arm abis
#endif
// jump to an address pointed at by ptr, offset is in imm
static void gen_jmp_ptr(void * ptr,Bits imm=0) {
Bits scale;
Bitu imm2;
gen_mov_word_to_reg(temp3, ptr, 1);
if (imm) {
scale = 0;
imm2 = (Bitu)imm;
while (imm2) {
while ((imm2 & 3) == 0) {
imm2>>=2;
scale+=2;
}
cache_addd(0xe2800000 + (temp3 << 12) + (temp3 << 16) + (ROTATE_SCALE(scale) << 7) + (imm2 & 0xff)); // add temp3, temp3, #((imm2 & 0xff) << scale)
imm2>>=8;
scale+=8;
}
}
#if (1)
// (*ptr) should be word aligned
if ((imm & 0x03) == 0) {
cache_addd(0xe5900000 + (temp1 << 12) + (temp3 << 16)); // ldr temp1, [temp3]
} else
#endif
{
cache_addd(0xe5d00000 + (temp1 << 12) + (temp3 << 16)); // ldrb temp1, [temp3]
cache_addd(0xe5d00001 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #1]
cache_addd(0xe1800400 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #8
cache_addd(0xe5d00002 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #2]
cache_addd(0xe1800800 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #16
cache_addd(0xe5d00003 + (temp2 << 12) + (temp3 << 16)); // ldrb temp2, [temp3, #3]
cache_addd(0xe1800c00 + (temp1 << 12) + (temp1 << 16) + (temp2)); // orr temp1, temp1, temp2, lsl #24
}
cache_addd(0xe12fff10 + (temp1)); // bx temp1
}
// short conditional jump (+-127 bytes) if register is zero
// the destination is set by gen_fill_branch() later
static Bit32u INLINE gen_create_branch_on_zero(HostReg reg,bool dword) {
if (dword) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
} else {
cache_addd(0xe1b00800 + (temp1 << 12) + (reg)); // movs temp1, reg, lsl #16
}
cache_addd(0x0a000000); // beq j
return ((Bit32u)cache.pos-4);
}
// short conditional jump (+-127 bytes) if register is nonzero
// the destination is set by gen_fill_branch() later
static Bit32u INLINE gen_create_branch_on_nonzero(HostReg reg,bool dword) {
if (dword) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
} else {
cache_addd(0xe1b00800 + (temp1 << 12) + (reg)); // movs temp1, reg, lsl #16
}
cache_addd(0x1a000000); // bne j
return ((Bit32u)cache.pos-4);
}
// calculate relative offset and fill it into the location pointed to by data
static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) {
#if C_DEBUG
Bits len=(Bit32u)cache.pos-(data+8);
if (len<0) len=-len;
if (len>0x02000000) LOG_MSG("Big jump %d",len);
#endif
*(Bit32u*)data=( (*(Bit32u*)data) & 0xff000000 ) | ( ( ((Bit32u)cache.pos - (data+8)) >> 2 ) & 0x00ffffff );
}
// conditional jump if register is nonzero
// for isdword==true the 32bit of the register are tested
// for isdword==false the lowest 8bit of the register are tested
static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
if (isdword) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
} else {
cache_addd(0xe31000ff + (reg << 16)); // tst reg, #0xff
}
cache_addd(0x0a000002); // beq nobranch
cache_addd(0xe5900000 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #0]
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd(0); // fill j
// nobranch:
return ((Bit32u)cache.pos-4);
}
// compare 32bit-register against zero and jump if value less/equal than zero
static Bit32u INLINE gen_create_branch_long_leqzero(HostReg reg) {
cache_addd(0xe3500000 + (reg << 16)); // cmp reg, #0
cache_addd(0xca000002); // bgt nobranch
cache_addd(0xe5900000 + (temp1 << 12) + (HOST_pc << 16)); // ldr temp1, [pc, #0]
cache_addd(0xe12fff10 + (temp1)); // bx temp1
cache_addd(0); // fill j
// nobranch:
return ((Bit32u)cache.pos-4);
}
// calculate long relative offset and fill it into the location pointed to by data
static void INLINE gen_fill_branch_long(Bit32u data) {
// this is an absolute branch
*(Bit32u*)data=(Bit32u)cache.pos;
}
static void gen_run_code(void) {
cache_addd(0xe92d4000); // stmfd sp!, {lr}
cache_addd(0xe92d01f0); // stmfd sp!, {v1-v5}
cache_addd(0xe28fe004); // add lr, pc, #4
cache_addd(0xe92d4000); // stmfd sp!, {lr}
cache_addd(0xe12fff10); // bx r0
cache_addd(0xe8bd01f0); // ldmfd sp!, {v1-v5}
cache_addd(0xe8bd4000); // ldmfd sp!, {lr}
cache_addd(0xe12fff1e); // bx lr
}
// return from a function
static void gen_return_function(void) {
cache_addd(0xe1a00000 + (HOST_a1 << 12) + FC_RETOP); // mov a1, FC_RETOP
cache_addd(0xe8bd4000); // ldmfd sp!, {lr}
cache_addd(0xe12fff1e); // bx lr
}
#ifdef DRC_FLAGS_INVALIDATION
// called when a call to a function can be replaced by a
// call to a simpler function
static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
#ifdef DRC_FLAGS_INVALIDATION_DCODE
// try to avoid function calls but rather directly fill in code
switch (flags_type) {
case t_ADDb:
case t_ADDw:
case t_ADDd:
*(Bit32u*)pos=0xe0800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // add FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_ORb:
case t_ORw:
case t_ORd:
*(Bit32u*)pos=0xe1800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // orr FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_ANDb:
case t_ANDw:
case t_ANDd:
*(Bit32u*)pos=0xe0000000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // and FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_SUBb:
case t_SUBw:
case t_SUBd:
*(Bit32u*)pos=0xe0400000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // sub FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_XORb:
case t_XORw:
case t_XORd:
*(Bit32u*)pos=0xe0200000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (HOST_a2); // eor FC_RETOP, a1, a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_CMPb:
case t_CMPw:
case t_CMPd:
case t_TESTb:
case t_TESTw:
case t_TESTd:
*(Bit32u*)pos=0xea000000 + (3); // b (pc+3*4)
break;
case t_INCb:
case t_INCw:
case t_INCd:
*(Bit32u*)pos=0xe2800000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (1); // add FC_RETOP, a1, #1
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_DECb:
case t_DECw:
case t_DECd:
*(Bit32u*)pos=0xe2400000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (1); // sub FC_RETOP, a1, #1
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_SHLb:
case t_SHLw:
case t_SHLd:
*(Bit32u*)pos=0xe1a00010 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, lsl a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_SHRb:
*(Bit32u*)pos=0xe2000000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (0xff); // and FC_RETOP, a1, #0xff
*(Bit32u*)(pos+4)=0xe1a00030 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, lsr a2
*(Bit32u*)(pos+8)=0xe1a00000; // nop
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_SHRw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe1a00020 + (FC_RETOP << 12) + (FC_RETOP) + (16 << 7); // mov FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+8)=0xe1a00030 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, lsr a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_SHRd:
*(Bit32u*)pos=0xe1a00030 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, lsr a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_SARb:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=0xe1a00040 + (FC_RETOP << 12) + (FC_RETOP) + (24 << 7); // mov FC_RETOP, FC_RETOP, asr #24
*(Bit32u*)(pos+8)=0xe1a00050 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, asr a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_SARw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe1a00040 + (FC_RETOP << 12) + (FC_RETOP) + (16 << 7); // mov FC_RETOP, FC_RETOP, asr #16
*(Bit32u*)(pos+8)=0xe1a00050 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, asr a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_SARd:
*(Bit32u*)pos=0xe1a00050 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, asr a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_RORb:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (8 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
*(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+12)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_RORw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+8)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_RORd:
*(Bit32u*)pos=0xe1a00070 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, ror a2
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
case t_ROLb:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (24 << 7); // mov FC_RETOP, a1, lsl #24
*(Bit32u*)(pos+4)=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32
*(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (8 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #8
*(Bit32u*)(pos+12)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+16)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
break;
case t_ROLw:
*(Bit32u*)pos=0xe1a00000 + (FC_RETOP << 12) + (HOST_a1) + (16 << 7); // mov FC_RETOP, a1, lsl #16
*(Bit32u*)(pos+4)=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32
*(Bit32u*)(pos+8)=0xe1800020 + (FC_RETOP << 12) + (FC_RETOP << 16) + (FC_RETOP) + (16 << 7); // orr FC_RETOP, FC_RETOP, FC_RETOP, lsr #16
*(Bit32u*)(pos+12)=0xe1a00070 + (FC_RETOP << 12) + (FC_RETOP) + (HOST_a2 << 8); // mov FC_RETOP, FC_RETOP, ror a2
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_ROLd:
*(Bit32u*)pos=0xe2600000 + (HOST_a2 << 12) + (HOST_a2 << 16) + (32); // rsb a2, a2, #32
*(Bit32u*)(pos+4)=0xe1a00070 + (FC_RETOP << 12) + (HOST_a1) + (HOST_a2 << 8); // mov FC_RETOP, a1, ror a2
*(Bit32u*)(pos+8)=0xe1a00000; // nop
*(Bit32u*)(pos+12)=0xe1a00000; // nop
*(Bit32u*)(pos+16)=0xe1a00000; // nop
break;
case t_NEGb:
case t_NEGw:
case t_NEGd:
*(Bit32u*)pos=0xe2600000 + (FC_RETOP << 12) + (HOST_a1 << 16) + (0); // rsb FC_RETOP, a1, #0
*(Bit32u*)(pos+4)=0xea000000 + (2); // b (pc+2*4)
break;
default:
*(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func
break;
}
#else
*(Bit32u*)(pos+12)=(Bit32u)fct_ptr; // simple_func
#endif
}
#endif

View file

@ -0,0 +1,934 @@
/*
* Copyright (C) 2002-2008 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: risc_armv4le-thumb.h,v 1.1 2008-08-20 14:13:21 c2woody Exp $ */
/* ARMv4 (little endian) backend by M-HT */
// temporary "lo" registers
#define templo1 HOST_v3
#define templo2 HOST_v4
// temporary "lo" register - value must be preserved when using it
#define templosav HOST_a3
// temporary "hi" register
#define temphi1 HOST_ip
// register that holds function return values
#define FC_RETOP HOST_v2
// register used for address calculations,
#define FC_ADDR HOST_v1 // has to be saved across calls, see DRC_PROTECT_ADDR_REG
// register that holds the first parameter
#define FC_OP1 HOST_a1
// register that holds the second parameter
#define FC_OP2 HOST_a2
// register that holds byte-accessible temporary values
#define FC_TMP_BA1 HOST_a1
// register that holds byte-accessible temporary values
#define FC_TMP_BA2 HOST_a2
// temporary register for LEA
#define TEMP_REG_DRC HOST_a4
// move a full register from reg_src to reg_dst
static void gen_mov_regs(HostReg reg_dst,HostReg reg_src) {
if(reg_src == reg_dst) return;
cache_addw(0x1c00 + reg_dst + (reg_src << 3)); // mov reg_dst, reg_src
}
// move a 32bit constant value into dest_reg
static void gen_mov_dword_to_reg_imm(HostReg dest_reg,Bit32u imm) {
if ((imm & 0xffffff00) == 0) {
cache_addw(0x2000 + (dest_reg << 8) + (Bit8u)(imm & 0xff)); // mov dest_reg, #(imm)
} else if ((imm & 0xffff00ff) == 0) {
cache_addw(0x2000 + (dest_reg << 8) + (Bit8u)(imm >> 8)); // mov dest_reg, #(imm >> 8)
cache_addw(0x0000 + dest_reg + (dest_reg << 3) + (8 << 6)); // lsl dest_reg, dest_reg, #8
} else if ((imm & 0xff00ffff) == 0) {
cache_addw(0x2000 + (dest_reg << 8) + (imm >> 16)); // mov dest_reg, #(imm >> 16)
cache_addw(0x0000 + dest_reg + (dest_reg << 3) + (16 << 6)); // lsl dest_reg, dest_reg, #16
} else if ((imm & 0x00ffffff) == 0) {
cache_addw(0x2000 + (dest_reg << 8) + (imm >> 24)); // mov dest_reg, #(imm >> 24)
cache_addw(0x0000 + dest_reg + (dest_reg << 3) + (24 << 6)); // lsl dest_reg, dest_reg, #24
} else {
Bit32u diff;
diff = imm - ((Bit32u)cache.pos+4);
if ((diff < 1024) && ((imm & 0x03) == 0)) {
if (((Bit32u)cache.pos & 0x03) == 0) {
cache_addw(0xa000 + (dest_reg << 8) + (Bit8u)(diff >> 2)); // add dest_reg, pc, #(dist >> 2)
} else {
cache_addw(0x46c0); // nop
// is the result of (diff-2)>>2 correct for diff<2 ???
cache_addw(0xa000 + (dest_reg << 8) + (Bit8u)((diff - 2) >> 2)); // add dest_reg, pc, #((dist - 2) >> 2)
}
} else {
if (((Bit32u)cache.pos & 0x03) == 0) {
cache_addw(0x4800 + (dest_reg << 8)); // ldr dest_reg, [pc, #0]
cache_addw(0xe000 + (2 >> 1)); // b next_code (pc+2)
cache_addd(imm); // .int imm
// next_code:
} else {
cache_addw(0x4800 + (dest_reg << 8) + (4 >> 2)); // ldr dest_reg, [pc, #4]
cache_addw(0xe000 + (4 >> 1)); // b next_code (pc+4)
cache_addw(0x46c0); // nop
cache_addd(imm); // .int imm
// next_code:
}
}
}
}
// helper function for gen_mov_word_to_reg
static void gen_mov_word_to_reg_helper(HostReg dest_reg,void* data,bool dword,HostReg data_reg) {
// alignment....
if (dword) {
if ((Bit32u)data & 3) {
if ( ((Bit32u)data & 3) == 2 ) {
cache_addw(0x8800 + dest_reg + (data_reg << 3)); // ldrh dest_reg, [data_reg]
cache_addw(0x8800 + templo1 + (data_reg << 3) + (2 << 5)); // ldrh templo1, [data_reg, #2]
cache_addw(0x0000 + templo1 + (templo1 << 3) + (16 << 6)); // lsl templo1, templo1, #16
cache_addw(0x4300 + dest_reg + (templo1 << 3)); // orr dest_reg, templo1
} else {
cache_addw(0x7800 + dest_reg + (data_reg << 3)); // ldrb dest_reg, [data_reg]
cache_addw(0x1c00 + templo1 + (data_reg << 3) + (1 << 6)); // add templo1, data_reg, #1
cache_addw(0x8800 + templo1 + (templo1 << 3)); // ldrh templo1, [templo1]
cache_addw(0x0000 + templo1 + (templo1 << 3) + (8 << 6)); // lsl templo1, templo1, #8
cache_addw(0x4300 + dest_reg + (templo1 << 3)); // orr dest_reg, templo1
cache_addw(0x7800 + templo1 + (data_reg << 3) + (3 << 6)); // ldrb templo1, [data_reg, #3]
cache_addw(0x0000 + templo1 + (templo1 << 3) + (24 << 6)); // lsl templo1, templo1, #24
cache_addw(0x4300 + dest_reg + (templo1 << 3)); // orr dest_reg, templo1
}
} else {
cache_addw(0x6800 + dest_reg + (data_reg << 3)); // ldr dest_reg, [data_reg]
}
} else {
if ((Bit32u)data & 1) {
cache_addw(0x7800 + dest_reg + (data_reg << 3)); // ldrb dest_reg, [data_reg]
cache_addw(0x7800 + templo1 + (data_reg << 3) + (1 << 6)); // ldrb templo1, [data_reg, #1]
cache_addw(0x0000 + templo1 + (templo1 << 3) + (8 << 6)); // lsl templo1, templo1, #8
cache_addw(0x4300 + dest_reg + (templo1 << 3)); // orr dest_reg, templo1
} else {
cache_addw(0x8800 + dest_reg + (data_reg << 3)); // ldrh dest_reg, [data_reg]
}
}
}
// move a 32bit (dword==true) or 16bit (dword==false) value from memory into dest_reg
// 16bit moves may destroy the upper 16bit of the destination register
static void gen_mov_word_to_reg(HostReg dest_reg,void* data,bool dword) {
gen_mov_dword_to_reg_imm(templo2, (Bit32u)data);
gen_mov_word_to_reg_helper(dest_reg, data, dword, templo2);
}
// move a 16bit constant value into dest_reg
// the upper 16bit of the destination register may be destroyed
static void INLINE gen_mov_word_to_reg_imm(HostReg dest_reg,Bit16u imm) {
gen_mov_dword_to_reg_imm(dest_reg, (Bit32u)imm);
}
// helper function for gen_mov_word_from_reg
static void gen_mov_word_from_reg_helper(HostReg src_reg,void* dest,bool dword, HostReg data_reg) {
// alignment....
if (dword) {
if ((Bit32u)dest & 3) {
if ( ((Bit32u)dest & 3) == 2 ) {
cache_addw(0x8000 + src_reg + (data_reg << 3)); // strh src_reg, [data_reg]
cache_addw(0x1c00 + templo1 + (src_reg << 3)); // mov templo1, src_reg
cache_addw(0x0800 + templo1 + (templo1 << 3) + (16 << 6)); // lsr templo1, templo1, #16
cache_addw(0x8000 + templo1 + (data_reg << 3) + (2 << 5)); // strh templo1, [data_reg, #2]
} else {
cache_addw(0x7000 + src_reg + (data_reg << 3)); // strb src_reg, [data_reg]
cache_addw(0x1c00 + templo1 + (src_reg << 3)); // mov templo1, src_reg
cache_addw(0x0800 + templo1 + (templo1 << 3) + (8 << 6)); // lsr templo1, templo1, #8
cache_addw(0x7000 + templo1 + (data_reg << 3) + (1 << 6)); // strb templo1, [data_reg, #1]
cache_addw(0x1c00 + templo1 + (src_reg << 3)); // mov templo1, src_reg
cache_addw(0x0800 + templo1 + (templo1 << 3) + (16 << 6)); // lsr templo1, templo1, #16
cache_addw(0x7000 + templo1 + (data_reg << 3) + (2 << 6)); // strb templo1, [data_reg, #2]
cache_addw(0x1c00 + templo1 + (src_reg << 3)); // mov templo1, src_reg
cache_addw(0x0800 + templo1 + (templo1 << 3) + (24 << 6)); // lsr templo1, templo1, #24
cache_addw(0x7000 + templo1 + (data_reg << 3) + (3 << 6)); // strb templo1, [data_reg, #3]
}
} else {
cache_addw(0x6000 + src_reg + (data_reg << 3)); // str src_reg, [data_reg]
}
} else {
if ((Bit32u)dest & 1) {
cache_addw(0x7000 + src_reg + (data_reg << 3)); // strb src_reg, [data_reg]
cache_addw(0x1c00 + templo1 + (src_reg << 3)); // mov templo1, src_reg
cache_addw(0x0800 + templo1 + (templo1 << 3) + (8 << 6)); // lsr templo1, templo1, #8
cache_addw(0x7000 + templo1 + (data_reg << 3) + (1 << 6)); // strb templo1, [data_reg, #1]
} else {
cache_addw(0x8000 + src_reg + (data_reg << 3)); // strh src_reg, [data_reg]
}
}
}
// move 32bit (dword==true) or 16bit (dword==false) of a register into memory
static void gen_mov_word_from_reg(HostReg src_reg,void* dest,bool dword) {
gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
gen_mov_word_from_reg_helper(src_reg, dest, dword, templo2);
}
// move an 8bit value from memory into dest_reg
// the upper 24bit of the destination register can be destroyed
// this function does not use FC_OP1/FC_OP2 as dest_reg as these
// registers might not be directly byte-accessible on some architectures
static void gen_mov_byte_to_reg_low(HostReg dest_reg,void* data) {
gen_mov_dword_to_reg_imm(templo1, (Bit32u)data);
cache_addw(0x7800 + dest_reg + (templo1 << 3)); // ldrb dest_reg, [templo1]
}
// move an 8bit value from memory into dest_reg
// the upper 24bit of the destination register can be destroyed
// this function can use FC_OP1/FC_OP2 as dest_reg which are
// not directly byte-accessible on some architectures
static void INLINE gen_mov_byte_to_reg_low_canuseword(HostReg dest_reg,void* data) {
gen_mov_byte_to_reg_low(dest_reg, data);
}
// move an 8bit constant value into dest_reg
// the upper 24bit of the destination register can be destroyed
// this function does not use FC_OP1/FC_OP2 as dest_reg as these
// registers might not be directly byte-accessible on some architectures
static void gen_mov_byte_to_reg_low_imm(HostReg dest_reg,Bit8u imm) {
cache_addw(0x2000 + (dest_reg << 8) + imm); // mov dest_reg, #(imm)
}
// move an 8bit constant value into dest_reg
// the upper 24bit of the destination register can be destroyed
// this function can use FC_OP1/FC_OP2 as dest_reg which are
// not directly byte-accessible on some architectures
static void INLINE gen_mov_byte_to_reg_low_imm_canuseword(HostReg dest_reg,Bit8u imm) {
gen_mov_byte_to_reg_low_imm(dest_reg, imm);
}
// move the lowest 8bit of a register into memory
static void gen_mov_byte_from_reg_low(HostReg src_reg,void* dest) {
gen_mov_dword_to_reg_imm(templo1, (Bit32u)dest);
cache_addw(0x7000 + src_reg + (templo1 << 3)); // strb src_reg, [templo1]
}
// convert an 8bit word to a 32bit dword
// the register is zero-extended (sign==false) or sign-extended (sign==true)
static void gen_extend_byte(bool sign,HostReg reg) {
cache_addw(0x0000 + reg + (reg << 3) + (24 << 6)); // lsl reg, reg, #24
if (sign) {
cache_addw(0x1000 + reg + (reg << 3) + (24 << 6)); // asr reg, reg, #24
} else {
cache_addw(0x0800 + reg + (reg << 3) + (24 << 6)); // lsr reg, reg, #24
}
}
// convert a 16bit word to a 32bit dword
// the register is zero-extended (sign==false) or sign-extended (sign==true)
static void gen_extend_word(bool sign,HostReg reg) {
cache_addw(0x0000 + reg + (reg << 3) + (16 << 6)); // lsl reg, reg, #16
if (sign) {
cache_addw(0x1000 + reg + (reg << 3) + (16 << 6)); // asr reg, reg, #16
} else {
cache_addw(0x0800 + reg + (reg << 3) + (16 << 6)); // lsr reg, reg, #16
}
}
// add a 32bit value from memory to a full register
static void gen_add(HostReg reg,void* op) {
cache_addw(0x4680 + (temphi1 - HOST_r8) + (reg << 3)); // mov temphi1, reg
gen_mov_word_to_reg(reg, op, 1);
cache_addw(0x4440 + (reg) + ((temphi1 - HOST_r8) << 3)); // add reg, temphi1
}
// add a 32bit constant value to a full register
static void gen_add_imm(HostReg reg,Bit32u imm) {
if(!imm) return;
gen_mov_dword_to_reg_imm(templo1, imm);
cache_addw(0x1800 + reg + (reg << 3) + (templo1 << 6)); // add reg, reg, templo1
}
// and a 32bit constant value with a full register
static void gen_and_imm(HostReg reg,Bit32u imm) {
if(imm == 0xffffffff) return;
gen_mov_dword_to_reg_imm(templo1, imm);
cache_addw(0x4000 + reg + (templo1<< 3)); // and reg, templo1
}
// move a 32bit constant value into memory
static void gen_mov_direct_dword(void* dest,Bit32u imm) {
cache_addw(0x4680 + (temphi1 - HOST_r8) + (templosav << 3)); // mov temphi1, templosav
gen_mov_dword_to_reg_imm(templosav, imm);
gen_mov_word_from_reg(templosav, dest, 1);
cache_addw(0x4640 + templosav + ((temphi1 - HOST_r8) << 3)); // mov templosav, temphi1
}
// move an address into memory
static void INLINE gen_mov_direct_ptr(void* dest,DRC_PTR_SIZE_IM imm) {
gen_mov_direct_dword(dest,(Bit32u)imm);
}
// add an 8bit constant value to a dword memory value
static void gen_add_direct_byte(void* dest,Bit8s imm) {
if(!imm) return;
cache_addw(0x4680 + (temphi1 - HOST_r8) + (templosav << 3)); // mov temphi1, templosav
gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
gen_mov_word_to_reg_helper(templosav, dest, 1, templo2);
if (imm >= 0) {
cache_addw(0x3000 + (templosav << 8) + ((Bit32s)imm)); // add templosav, #(imm)
} else {
cache_addw(0x3800 + (templosav << 8) + (-((Bit32s)imm))); // sub templosav, #(-imm)
}
gen_mov_word_from_reg_helper(templosav, dest, 1, templo2);
cache_addw(0x4640 + templosav + ((temphi1 - HOST_r8) << 3)); // mov templosav, temphi1
}
// add a 32bit (dword==true) or 16bit (dword==false) constant value to a memory value
static void gen_add_direct_word(void* dest,Bit32u imm,bool dword) {
if(!imm) return;
if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) {
gen_add_direct_byte(dest,(Bit8s)imm);
return;
}
cache_addw(0x4680 + (temphi1 - HOST_r8) + (templosav << 3)); // mov temphi1, templosav
gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
gen_mov_word_to_reg_helper(templosav, dest, dword, templo2);
if (dword) {
gen_mov_dword_to_reg_imm(templo1, imm);
} else {
gen_mov_word_to_reg_imm(templo1, (Bit16u)imm);
}
cache_addw(0x1800 + templosav + (templosav << 3) + (templo1 << 6)); // add templosav, templosav, templo1
gen_mov_word_from_reg_helper(templosav, dest, dword, templo2);
cache_addw(0x4640 + templosav + ((temphi1 - HOST_r8) << 3)); // mov templosav, temphi1
}
// subtract an 8bit constant value from a dword memory value
static void gen_sub_direct_byte(void* dest,Bit8s imm) {
if(!imm) return;
cache_addw(0x4680 + (temphi1 - HOST_r8) + (templosav << 3)); // mov temphi1, templosav
gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
gen_mov_word_to_reg_helper(templosav, dest, 1, templo2);
if (imm >= 0) {
cache_addw(0x3800 + (templosav << 8) + ((Bit32s)imm)); // sub templosav, #(imm)
} else {
cache_addw(0x3000 + (templosav << 8) + (-((Bit32s)imm))); // add templosav, #(-imm)
}
gen_mov_word_from_reg_helper(templosav, dest, 1, templo2);
cache_addw(0x4640 + templosav + ((temphi1 - HOST_r8) << 3)); // mov templosav, temphi1
}
// subtract a 32bit (dword==true) or 16bit (dword==false) constant value from a memory value
static void gen_sub_direct_word(void* dest,Bit32u imm,bool dword) {
if(!imm) return;
if (dword && ( (imm<128) || (imm>=0xffffff80) ) ) {
gen_sub_direct_byte(dest,(Bit8s)imm);
return;
}
cache_addw(0x4680 + (temphi1 - HOST_r8) + (templosav << 3)); // mov temphi1, templosav
gen_mov_dword_to_reg_imm(templo2, (Bit32u)dest);
gen_mov_word_to_reg_helper(templosav, dest, dword, templo2);
if (dword) {
gen_mov_dword_to_reg_imm(templo1, imm);
} else {
gen_mov_word_to_reg_imm(templo1, (Bit16u)imm);
}
cache_addw(0x1a00 + templosav + (templosav << 3) + (templo1 << 6)); // sub templosav, templosav, templo1
gen_mov_word_from_reg_helper(templosav, dest, dword, templo2);
cache_addw(0x4640 + templosav + ((temphi1 - HOST_r8) << 3)); // mov templosav, temphi1
}
// effective address calculation, destination is dest_reg
// scale_reg is scaled by scale (scale_reg*(2^scale)) and
// added to dest_reg, then the immediate value is added
static INLINE void gen_lea(HostReg dest_reg,HostReg scale_reg,Bitu scale,Bits imm) {
if (scale) {
cache_addw(0x0000 + templo1 + (scale_reg << 3) + (scale << 6)); // lsl templo1, scale_reg, #(scale)
cache_addw(0x1800 + dest_reg + (dest_reg << 3) + (templo1 << 6)); // add dest_reg, dest_reg, templo1
} else {
cache_addw(0x1800 + dest_reg + (dest_reg << 3) + (scale_reg << 6)); // add dest_reg, dest_reg, scale_reg
}
gen_add_imm(dest_reg, imm);
}
// effective address calculation, destination is dest_reg
// dest_reg is scaled by scale (dest_reg*(2^scale)),
// then the immediate value is added
static INLINE void gen_lea(HostReg dest_reg,Bitu scale,Bits imm) {
if (scale) {
cache_addw(0x0000 + dest_reg + (dest_reg << 3) + (scale << 6)); // lsl dest_reg, dest_reg, #(scale)
}
gen_add_imm(dest_reg, imm);
}
// generate a call to a parameterless function
static void INLINE gen_call_function_raw(void * func) {
if (((Bit32u)cache.pos & 0x03) == 0) {
cache_addw(0x4800 + (templo1 << 8) + (4 >> 2)); // ldr templo1, [pc, #4]
cache_addw(0xa000 + (templo2 << 8) + (8 >> 2)); // adr templo2, after_call (add templo2, pc, #8)
cache_addw(0x4680 + (HOST_lr - HOST_r8) + (templo2 << 3)); // mov lr, templo2
cache_addw(0x4700 + (templo1 << 3)); // bx templo1 --- switch to arm state
} else {
cache_addw(0x4800 + (templo1 << 8) + (8 >> 2)); // ldr templo1, [pc, #8]
cache_addw(0xa000 + (templo2 << 8) + (8 >> 2)); // adr templo2, after_call (add templo2, pc, #8)
cache_addw(0x4680 + (HOST_lr - HOST_r8) + (templo2 << 3)); // mov lr, templo2
cache_addw(0x4700 + (templo1 << 3)); // bx templo1 --- switch to arm state
cache_addw(0x46c0); // nop
}
cache_addd((Bit32u)func); // .int func
// after_call:
// switch from arm to thumb state
cache_addd(0xe2800000 + (templo1 << 12) + (HOST_pc << 16) + (1)); // add templo1, pc, #1
cache_addd(0xe12fff10 + (templo1)); // bx templo1
// thumb state from now on
cache_addw(0x1c00 + FC_RETOP + (HOST_a1 << 3)); // mov FC_RETOP, a1
}
// generate a call to a function with paramcount parameters
// note: the parameters are loaded in the architecture specific way
// using the gen_load_param_ functions below
static Bit32u INLINE gen_call_function_setup(void * func,Bitu paramcount,bool fastcall=false) {
Bit32u proc_addr = (Bit32u)cache.pos;
gen_call_function_raw(func);
return proc_addr;
// if proc_addr is on word boundary ((proc_addr & 0x03) == 0)
// then length of generated code is 22 bytes
// otherwise length of generated code is 24 bytes
}
#if (1)
// max of 4 parameters in a1-a4
// load an immediate value as param'th function parameter
static void INLINE gen_load_param_imm(Bitu imm,Bitu param) {
gen_mov_dword_to_reg_imm(param, imm);
}
// load an address as param'th function parameter
static void INLINE gen_load_param_addr(Bitu addr,Bitu param) {
gen_mov_dword_to_reg_imm(param, addr);
}
// load a host-register as param'th function parameter
static void INLINE gen_load_param_reg(Bitu reg,Bitu param) {
gen_mov_regs(param, reg);
}
// load a value from memory as param'th function parameter
static void INLINE gen_load_param_mem(Bitu mem,Bitu param) {
gen_mov_word_to_reg(param, (void *)mem, 1);
}
#else
other arm abis
#endif
// jump to an address pointed at by ptr, offset is in imm
static void gen_jmp_ptr(void * ptr,Bits imm=0) {
cache_addw(0x4680 + (temphi1 - HOST_r8) + (templosav << 3)); // mov temphi1, templosav
gen_mov_word_to_reg(templosav, ptr, 1);
if (imm) {
gen_mov_dword_to_reg_imm(templo2, imm);
cache_addw(0x1800 + templosav + (templosav << 3) + (templo2 << 6)); // add templosav, templosav, templo2
}
#if (1)
// (*ptr) should be word aligned
if ((imm & 0x03) == 0) {
cache_addw(0x6800 + templo2 + (templosav << 3)); // ldr templo2, [templosav]
} else
#endif
{
cache_addw(0x7800 + templo2 + (templosav << 3)); // ldrb templo2, [templosav]
cache_addw(0x7800 + templo1 + (templosav << 3) + (1 << 6)); // ldrb templo1, [templosav, #1]
cache_addw(0x0000 + templo1 + (templo1 << 3) + (8 << 6)); // lsl templo1, templo1, #8
cache_addw(0x4300 + templo2 + (templo1 << 3)); // orr templo2, templo1
cache_addw(0x7800 + templo1 + (templosav << 3) + (2 << 6)); // ldrb templo1, [templosav, #2]
cache_addw(0x0000 + templo1 + (templo1 << 3) + (16 << 6)); // lsl templo1, templo1, #16
cache_addw(0x4300 + templo2 + (templo1 << 3)); // orr templo2, templo1
cache_addw(0x7800 + templo1 + (templosav << 3) + (3 << 6)); // ldrb templo1, [templosav, #3]
cache_addw(0x0000 + templo1 + (templo1 << 3) + (24 << 6)); // lsl templo1, templo1, #24
cache_addw(0x4300 + templo2 + (templo1 << 3)); // orr templo2, templo1
}
// increase jmp address to keep thumb state
cache_addw(0x1c00 + templo2 + (templo2 << 3) + (1 << 6)); // add templo2, templo2, #1
cache_addw(0x4640 + templosav + ((temphi1 - HOST_r8) << 3)); // mov templosav, temphi1
cache_addw(0x4700 + (templo2 << 3)); // bx templo2
}
// short conditional jump (+-127 bytes) if register is zero
// the destination is set by gen_fill_branch() later
static Bit32u INLINE gen_create_branch_on_zero(HostReg reg,bool dword) {
if (dword) {
cache_addw(0x2800 + (reg << 8)); // cmp reg, #0
} else {
cache_addw(0x0000 + templo1 + (reg << 3) + (16 << 6)); // lsl templo1, reg, #16
}
cache_addw(0xd000); // beq j
return ((Bit32u)cache.pos-2);
}
// short conditional jump (+-127 bytes) if register is nonzero
// the destination is set by gen_fill_branch() later
static Bit32u INLINE gen_create_branch_on_nonzero(HostReg reg,bool dword) {
if (dword) {
cache_addw(0x2800 + (reg << 8)); // cmp reg, #0
} else {
cache_addw(0x0000 + templo1 + (reg << 3) + (16 << 6)); // lsl templo1, reg, #16
}
cache_addw(0xd100); // bne j
return ((Bit32u)cache.pos-2);
}
// calculate relative offset and fill it into the location pointed to by data
static void INLINE gen_fill_branch(DRC_PTR_SIZE_IM data) {
#if C_DEBUG
Bits len=(Bit32u)cache.pos-(data+4);
if (len<0) len=-len;
if (len>252) LOG_MSG("Big jump %d",len);
#endif
*(Bit8u*)data=(Bit8u)( ((Bit32u)cache.pos-(data+4)) >> 1 );
}
// conditional jump if register is nonzero
// for isdword==true the 32bit of the register are tested
// for isdword==false the lowest 8bit of the register are tested
static Bit32u gen_create_branch_long_nonzero(HostReg reg,bool isdword) {
if (isdword) {
cache_addw(0x2800 + (reg << 8)); // cmp reg, #0
} else {
cache_addw(0x0000 + templo2 + (reg << 3) + (24 << 6)); // lsl templo2, reg, #24
}
if (((Bit32u)cache.pos & 0x03) == 0) {
cache_addw(0xd000 + (8 >> 1)); // beq nobranch (pc+8)
cache_addw(0x4800 + (templo1 << 8) + (4 >> 2)); // ldr templo1, [pc, #4]
cache_addw(0x4700 + (templo1 << 3)); // bx templo1
cache_addw(0x46c0); // nop
} else {
cache_addw(0xd000 + (6 >> 1)); // beq nobranch (pc+6)
cache_addw(0x4800 + (templo1 << 8)); // ldr templo1, [pc, #0]
cache_addw(0x4700 + (templo1 << 3)); // bx templo1
}
cache_addd(0); // fill j
// nobranch:
return ((Bit32u)cache.pos-4);
}
// compare 32bit-register against zero and jump if value less/equal than zero
static Bit32u INLINE gen_create_branch_long_leqzero(HostReg reg) {
cache_addw(0x2800 + (reg << 8)); // cmp reg, #0
if (((Bit32u)cache.pos & 0x03) == 0) {
cache_addw(0xdc00 + (8 >> 1)); // bgt nobranch (pc+8)
cache_addw(0x4800 + (templo1 << 8) + (4 >> 2)); // ldr templo1, [pc, #4]
cache_addw(0x4700 + (templo1 << 3)); // bx templo1
cache_addw(0x46c0); // nop
} else {
cache_addw(0xdc00 + (6 >> 1)); // bgt nobranch (pc+6)
cache_addw(0x4800 + (templo1 << 8)); // ldr templo1, [pc, #0]
cache_addw(0x4700 + (templo1 << 3)); // bx templo1
}
cache_addd(0); // fill j
// nobranch:
return ((Bit32u)cache.pos-4);
}
// calculate long relative offset and fill it into the location pointed to by data
static void INLINE gen_fill_branch_long(Bit32u data) {
// this is an absolute branch
*(Bit32u*)data=((Bit32u)cache.pos) + 1; // add 1 to keep processor in thumb state
}
static void gen_run_code(void) {
// switch from arm to thumb state
cache_addd(0xe2800000 + (HOST_r3 << 12) + (HOST_pc << 16) + (1)); // add r3, pc, #1
cache_addd(0xe12fff10 + (HOST_r3)); // bx r3
// thumb state from now on
cache_addw(0xb500); // push {lr}
cache_addw(0xb4f0); // push {v1-v4}
cache_addw(0xa302); // add r3, pc, #8
cache_addw(0x3001); // add r0, #1
cache_addw(0x3301); // add r3, #1
cache_addw(0xb408); // push {r3}
cache_addw(0x4700); // bx r0
cache_addw(0x46c0); // nop
cache_addw(0xbcf0); // pop {v1-v4}
cache_addw(0xbc08); // pop {r3}
cache_addw(0x4718); // bx r3
}
// return from a function
static void gen_return_function(void) {
cache_addw(0x1c00 + HOST_a1 + (FC_RETOP << 3)); // mov a1, FC_RETOP
cache_addw(0xbc08); // pop {r3}
cache_addw(0x4718); // bx r3
}
#ifdef DRC_FLAGS_INVALIDATION
// called when a call to a function can be replaced by a
// call to a simpler function
static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
#ifdef DRC_FLAGS_INVALIDATION_DCODE
if (((Bit32u)pos & 0x03) == 0)
{
// try to avoid function calls but rather directly fill in code
switch (flags_type) {
case t_ADDb:
case t_ADDw:
case t_ADDd:
*(Bit16u*)pos=0x1800 + FC_RETOP + (HOST_a1 << 3) + (HOST_a2 << 6); // add FC_RETOP, a1, a2
*(Bit16u*)(pos+2)=0xe000 + (16 >> 1); // b after_call (pc+16)
break;
case t_ORb:
case t_ORw:
case t_ORd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x4300 + FC_RETOP + (HOST_a2 << 3); // orr FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14)
break;
case t_ANDb:
case t_ANDw:
case t_ANDd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x4000 + FC_RETOP + (HOST_a2 << 3); // and FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14)
break;
case t_SUBb:
case t_SUBw:
case t_SUBd:
*(Bit16u*)pos=0x1a00 + FC_RETOP + (HOST_a1 << 3) + (HOST_a2 << 6); // sub FC_RETOP, a1, a2
*(Bit16u*)(pos+2)=0xe000 + (16 >> 1); // b after_call (pc+16)
break;
case t_XORb:
case t_XORw:
case t_XORd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x4040 + FC_RETOP + (HOST_a2 << 3); // eor FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14)
break;
case t_CMPb:
case t_CMPw:
case t_CMPd:
case t_TESTb:
case t_TESTw:
case t_TESTd:
*(Bit16u*)pos=0xe000 + (18 >> 1); // b after_call (pc+18)
break;
case t_INCb:
case t_INCw:
case t_INCd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3) + (1 << 6); // add FC_RETOP, a1, #1
*(Bit16u*)(pos+2)=0xe000 + (16 >> 1); // b after_call (pc+16)
break;
case t_DECb:
case t_DECw:
case t_DECd:
*(Bit16u*)pos=0x1e00 + FC_RETOP + (HOST_a1 << 3) + (1 << 6); // sub FC_RETOP, a1, #1
*(Bit16u*)(pos+2)=0xe000 + (16 >> 1); // b after_call (pc+16)
break;
case t_SHLb:
case t_SHLw:
case t_SHLd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x4080 + FC_RETOP + (HOST_a2 << 3); // lsl FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14)
break;
case t_SHRb:
*(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (24 << 6); // lsl FC_RETOP, a1, #24
*(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (FC_RETOP << 3) + (24 << 6); // lsr FC_RETOP, FC_RETOP, #24
*(Bit16u*)(pos+4)=0x40c0 + FC_RETOP + (HOST_a2 << 3); // lsr FC_RETOP, a2
*(Bit16u*)(pos+6)=0xe000 + (12 >> 1); // b after_call (pc+12)
break;
case t_SHRw:
*(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsl FC_RETOP, a1, #16
*(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (FC_RETOP << 3) + (16 << 6); // lsr FC_RETOP, FC_RETOP, #16
*(Bit16u*)(pos+4)=0x40c0 + FC_RETOP + (HOST_a2 << 3); // lsr FC_RETOP, a2
*(Bit16u*)(pos+6)=0xe000 + (12 >> 1); // b after_call (pc+12)
break;
case t_SHRd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x40c0 + FC_RETOP + (HOST_a2 << 3); // lsr FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14)
break;
case t_SARb:
*(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (24 << 6); // lsl FC_RETOP, a1, #24
*(Bit16u*)(pos+2)=0x1000 + FC_RETOP + (FC_RETOP << 3) + (24 << 6); // asr FC_RETOP, FC_RETOP, #24
*(Bit16u*)(pos+4)=0x4100 + FC_RETOP + (HOST_a2 << 3); // asr FC_RETOP, a2
*(Bit16u*)(pos+6)=0xe000 + (12 >> 1); // b after_call (pc+12)
break;
case t_SARw:
*(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsl FC_RETOP, a1, #16
*(Bit16u*)(pos+2)=0x1000 + FC_RETOP + (FC_RETOP << 3) + (16 << 6); // asr FC_RETOP, FC_RETOP, #16
*(Bit16u*)(pos+4)=0x4100 + FC_RETOP + (HOST_a2 << 3); // asr FC_RETOP, a2
*(Bit16u*)(pos+6)=0xe000 + (12 >> 1); // b after_call (pc+12)
break;
case t_SARd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x4100 + FC_RETOP + (HOST_a2 << 3); // asr FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14)
break;
case t_RORb:
*(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (24 << 6); // lsl a1, a1, #24
*(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (8 << 6); // lsr FC_RETOP, a1, #8
*(Bit16u*)(pos+4)=0x4300 + HOST_a1 + (FC_RETOP << 3); // orr a1, FC_RETOP
*(Bit16u*)(pos+6)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16
*(Bit16u*)(pos+8)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1
*(Bit16u*)(pos+10)=0x41c0 + FC_RETOP + (HOST_a2 << 3); // ror FC_RETOP, a2
*(Bit16u*)(pos+12)=0xe000 + (6 >> 1); // b after_call (pc+6)
break;
case t_RORw:
*(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (16 << 6); // lsl a1, a1, #16
*(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16
*(Bit16u*)(pos+4)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1
*(Bit16u*)(pos+6)=0x41c0 + FC_RETOP + (HOST_a2 << 3); // ror FC_RETOP, a2
*(Bit16u*)(pos+8)=0xe000 + (10 >> 1); // b after_call (pc+10)
break;
case t_RORd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x41c0 + FC_RETOP + (HOST_a2 << 3); // ror FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (14 >> 1); // b after_call (pc+14)
break;
case t_ROLb:
*(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (24 << 6); // lsl a1, a1, #24
*(Bit16u*)(pos+2)=0x4240 + templo1 + (HOST_a2 << 3); // neg templo1, a2
*(Bit16u*)(pos+4)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (8 << 6); // lsr FC_RETOP, a1, #8
*(Bit16u*)(pos+6)=0x3000 + (templo1 << 8) + (32); // add templo1, #32
*(Bit16u*)(pos+8)=0x4300 + HOST_a1 + (FC_RETOP << 3); // orr a1, FC_RETOP
*(Bit16u*)(pos+10)=0x46c0; // nop
*(Bit16u*)(pos+12)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16
*(Bit16u*)(pos+14)=0x46c0; // nop
*(Bit16u*)(pos+16)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1
*(Bit16u*)(pos+18)=0x46c0; // nop
*(Bit16u*)(pos+20)=0x41c0 + FC_RETOP + (templo1 << 3); // ror FC_RETOP, templo1
break;
case t_ROLw:
*(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (16 << 6); // lsl a1, a1, #16
*(Bit16u*)(pos+2)=0x4240 + templo1 + (HOST_a2 << 3); // neg templo1, a2
*(Bit16u*)(pos+4)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16
*(Bit16u*)(pos+6)=0x3000 + (templo1 << 8) + (32); // add templo1, #32
*(Bit16u*)(pos+8)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1
*(Bit16u*)(pos+10)=0x41c0 + FC_RETOP + (templo1 << 3); // ror FC_RETOP, templo1
*(Bit16u*)(pos+12)=0xe000 + (6 >> 1); // b after_call (pc+6)
break;
case t_ROLd:
*(Bit16u*)pos=0x4240 + templo1 + (HOST_a2 << 3); // neg templo1, a2
*(Bit16u*)(pos+2)=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+4)=0x3000 + (templo1 << 8) + (32); // add templo1, #32
*(Bit16u*)(pos+6)=0x41c0 + FC_RETOP + (templo1 << 3); // ror FC_RETOP, templo1
*(Bit16u*)(pos+8)=0xe000 + (10 >> 1); // b after_call (pc+10)
break;
case t_NEGb:
case t_NEGw:
case t_NEGd:
*(Bit16u*)pos=0x4240 + FC_RETOP + (HOST_a1 << 3); // neg FC_RETOP, a1
*(Bit16u*)(pos+2)=0xe000 + (16 >> 1); // b after_call (pc+16)
break;
default:
*(Bit32u*)(pos+8)=(Bit32u)fct_ptr; // simple_func
break;
}
}
else
{
// try to avoid function calls but rather directly fill in code
switch (flags_type) {
case t_ADDb:
case t_ADDw:
case t_ADDd:
*(Bit16u*)pos=0x1800 + FC_RETOP + (HOST_a1 << 3) + (HOST_a2 << 6); // add FC_RETOP, a1, a2
*(Bit16u*)(pos+2)=0xe000 + (18 >> 1); // b after_call (pc+18)
break;
case t_ORb:
case t_ORw:
case t_ORd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x4300 + FC_RETOP + (HOST_a2 << 3); // orr FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16)
break;
case t_ANDb:
case t_ANDw:
case t_ANDd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x4000 + FC_RETOP + (HOST_a2 << 3); // and FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16)
break;
case t_SUBb:
case t_SUBw:
case t_SUBd:
*(Bit16u*)pos=0x1a00 + FC_RETOP + (HOST_a1 << 3) + (HOST_a2 << 6); // sub FC_RETOP, a1, a2
*(Bit16u*)(pos+2)=0xe000 + (18 >> 1); // b after_call (pc+18)
break;
case t_XORb:
case t_XORw:
case t_XORd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x4040 + FC_RETOP + (HOST_a2 << 3); // eor FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16)
break;
case t_CMPb:
case t_CMPw:
case t_CMPd:
case t_TESTb:
case t_TESTw:
case t_TESTd:
*(Bit16u*)pos=0xe000 + (20 >> 1); // b after_call (pc+20)
break;
case t_INCb:
case t_INCw:
case t_INCd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3) + (1 << 6); // add FC_RETOP, a1, #1
*(Bit16u*)(pos+2)=0xe000 + (18 >> 1); // b after_call (pc+18)
break;
case t_DECb:
case t_DECw:
case t_DECd:
*(Bit16u*)pos=0x1e00 + FC_RETOP + (HOST_a1 << 3) + (1 << 6); // sub FC_RETOP, a1, #1
*(Bit16u*)(pos+2)=0xe000 + (18 >> 1); // b after_call (pc+18)
break;
case t_SHLb:
case t_SHLw:
case t_SHLd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x4080 + FC_RETOP + (HOST_a2 << 3); // lsl FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16)
break;
case t_SHRb:
*(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (24 << 6); // lsl FC_RETOP, a1, #24
*(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (FC_RETOP << 3) + (24 << 6); // lsr FC_RETOP, FC_RETOP, #24
*(Bit16u*)(pos+4)=0x40c0 + FC_RETOP + (HOST_a2 << 3); // lsr FC_RETOP, a2
*(Bit16u*)(pos+6)=0xe000 + (14 >> 1); // b after_call (pc+14)
break;
case t_SHRw:
*(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsl FC_RETOP, a1, #16
*(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (FC_RETOP << 3) + (16 << 6); // lsr FC_RETOP, FC_RETOP, #16
*(Bit16u*)(pos+4)=0x40c0 + FC_RETOP + (HOST_a2 << 3); // lsr FC_RETOP, a2
*(Bit16u*)(pos+6)=0xe000 + (14 >> 1); // b after_call (pc+14)
break;
case t_SHRd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x40c0 + FC_RETOP + (HOST_a2 << 3); // lsr FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16)
break;
case t_SARb:
*(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (24 << 6); // lsl FC_RETOP, a1, #24
*(Bit16u*)(pos+2)=0x1000 + FC_RETOP + (FC_RETOP << 3) + (24 << 6); // asr FC_RETOP, FC_RETOP, #24
*(Bit16u*)(pos+4)=0x4100 + FC_RETOP + (HOST_a2 << 3); // asr FC_RETOP, a2
*(Bit16u*)(pos+6)=0xe000 + (14 >> 1); // b after_call (pc+14)
break;
case t_SARw:
*(Bit16u*)pos=0x0000 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsl FC_RETOP, a1, #16
*(Bit16u*)(pos+2)=0x1000 + FC_RETOP + (FC_RETOP << 3) + (16 << 6); // asr FC_RETOP, FC_RETOP, #16
*(Bit16u*)(pos+4)=0x4100 + FC_RETOP + (HOST_a2 << 3); // asr FC_RETOP, a2
*(Bit16u*)(pos+6)=0xe000 + (14 >> 1); // b after_call (pc+14)
break;
case t_SARd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x4100 + FC_RETOP + (HOST_a2 << 3); // asr FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16)
break;
case t_RORb:
*(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (24 << 6); // lsl a1, a1, #24
*(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (8 << 6); // lsr FC_RETOP, a1, #8
*(Bit16u*)(pos+4)=0x4300 + HOST_a1 + (FC_RETOP << 3); // orr a1, FC_RETOP
*(Bit16u*)(pos+6)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16
*(Bit16u*)(pos+8)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1
*(Bit16u*)(pos+10)=0x41c0 + FC_RETOP + (HOST_a2 << 3); // ror FC_RETOP, a2
*(Bit16u*)(pos+12)=0xe000 + (8 >> 1); // b after_call (pc+8)
break;
case t_RORw:
*(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (16 << 6); // lsl a1, a1, #16
*(Bit16u*)(pos+2)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16
*(Bit16u*)(pos+4)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1
*(Bit16u*)(pos+6)=0x41c0 + FC_RETOP + (HOST_a2 << 3); // ror FC_RETOP, a2
*(Bit16u*)(pos+8)=0xe000 + (12 >> 1); // b after_call (pc+12)
break;
case t_RORd:
*(Bit16u*)pos=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+2)=0x41c0 + FC_RETOP + (HOST_a2 << 3); // ror FC_RETOP, a2
*(Bit16u*)(pos+4)=0xe000 + (16 >> 1); // b after_call (pc+16)
break;
case t_ROLb:
*(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (24 << 6); // lsl a1, a1, #24
*(Bit16u*)(pos+2)=0x4240 + templo1 + (HOST_a2 << 3); // neg templo1, a2
*(Bit16u*)(pos+4)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (8 << 6); // lsr FC_RETOP, a1, #8
*(Bit16u*)(pos+6)=0x3000 + (templo1 << 8) + (32); // add templo1, #32
*(Bit16u*)(pos+8)=0x4300 + HOST_a1 + (FC_RETOP << 3); // orr a1, FC_RETOP
*(Bit16u*)(pos+10)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16
*(Bit16u*)(pos+12)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1
*(Bit16u*)(pos+14)=0x41c0 + FC_RETOP + (templo1 << 3); // ror FC_RETOP, templo1
*(Bit16u*)(pos+16)=0xe000 + (4 >> 1); // b after_call (pc+4)
break;
case t_ROLw:
*(Bit16u*)pos=0x0000 + HOST_a1 + (HOST_a1 << 3) + (16 << 6); // lsl a1, a1, #16
*(Bit16u*)(pos+2)=0x4240 + templo1 + (HOST_a2 << 3); // neg templo1, a2
*(Bit16u*)(pos+4)=0x0800 + FC_RETOP + (HOST_a1 << 3) + (16 << 6); // lsr FC_RETOP, a1, #16
*(Bit16u*)(pos+6)=0x3000 + (templo1 << 8) + (32); // add templo1, #32
*(Bit16u*)(pos+8)=0x4300 + FC_RETOP + (HOST_a1 << 3); // orr FC_RETOP, a1
*(Bit16u*)(pos+10)=0x41c0 + FC_RETOP + (templo1 << 3); // ror FC_RETOP, templo1
*(Bit16u*)(pos+12)=0xe000 + (8 >> 1); // b after_call (pc+8)
break;
case t_ROLd:
*(Bit16u*)pos=0x4240 + templo1 + (HOST_a2 << 3); // neg templo1, a2
*(Bit16u*)(pos+2)=0x1c00 + FC_RETOP + (HOST_a1 << 3); // mov FC_RETOP, a1
*(Bit16u*)(pos+4)=0x3000 + (templo1 << 8) + (32); // add templo1, #32
*(Bit16u*)(pos+6)=0x41c0 + FC_RETOP + (templo1 << 3); // ror FC_RETOP, templo1
*(Bit16u*)(pos+8)=0xe000 + (12 >> 1); // b after_call (pc+12)
break;
case t_NEGb:
case t_NEGw:
case t_NEGd:
*(Bit16u*)pos=0x4240 + FC_RETOP + (HOST_a1 << 3); // neg FC_RETOP, a1
*(Bit16u*)(pos+2)=0xe000 + (18 >> 1); // b after_call (pc+18)
break;
default:
*(Bit32u*)(pos+10)=(Bit32u)fct_ptr; // simple_func
break;
}
}
#else
if (((Bit32u)pos & 0x03) == 0)
{
*(Bit32u*)(pos+8)=(Bit32u)fct_ptr; // simple_func
}
else
{
*(Bit32u*)(pos+10)=(Bit32u)fct_ptr; // simple_func
}
#endif
}
#endif

View file

@ -0,0 +1,29 @@
/*
* Copyright (C) 2002-2008 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: risc_armv4le.h,v 1.1 2008-08-20 14:13:21 c2woody Exp $ */
/* ARMv4 (little endian) backend (switcher) by M-HT */
#include "risc_armv4le-common.h"
// choose your destiny:
#include "risc_armv4le-s3.h"
//#include "risc_armv4le-o3.h"
//#include "risc_armv4le-thumb.h"

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2002-2007 The DOSBox Team
* Copyright (C) 2002-2008 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -16,6 +16,8 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: risc_mipsel32.h,v 1.2 2008-08-20 14:13:21 c2woody Exp $ */
/* MIPS32 (little endian) backend by crazyc */
@ -630,3 +632,15 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
#endif
}
#endif
static void cache_block_closing(Bit8u* block_start,Bitu block_size) {
#ifdef PSP
// writeback dcache and invalidate icache
Bit32u inval_start = ((Bit32u)block_start) & ~63;
Bit32u inval_end = (((Bit32u)block_start) + block_size + 64) & ~63;
for (;inval_start < inval_end; inval_start+=64) {
__builtin_allegrex_cache(0x1a, inval_start);
__builtin_allegrex_cache(0x08, inval_start);
}
#endif
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2002-2007 The DOSBox Team
* Copyright (C) 2002-2008 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -16,6 +16,8 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: risc_x64.h,v 1.10 2008-08-20 14:13:21 c2woody Exp $ */
// some configuring defines that specify the capabilities of this architecture
// or aspects of the recompiling
@ -671,3 +673,5 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
#endif
}
#endif
static void cache_block_closing(Bit8u* block_start,Bitu block_size) { }

View file

@ -1,5 +1,5 @@
/*
* Copyright (C) 2002-2007 The DOSBox Team
* Copyright (C) 2002-2008 The DOSBox Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -16,6 +16,8 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* $Id: risc_x86.h,v 1.7 2008-08-20 14:13:21 c2woody Exp $ */
// some configuring defines that specify the capabilities of this architecture
// or aspects of the recompiling
@ -505,3 +507,5 @@ static void gen_fill_function_ptr(Bit8u * pos,void* fct_ptr,Bitu flags_type) {
#endif
}
#endif
static void cache_block_closing(Bit8u* block_start,Bitu block_size) { }