From fd28399da212cf570b736dc4b5b4d13eff40b5bc Mon Sep 17 00:00:00 2001 From: Peter Veenstra Date: Tue, 22 Feb 2005 13:06:07 +0000 Subject: [PATCH] New assembly x86 fpu core + fixing some bugs in the old one (Thanks wd) Imported-from: https://svn.code.sf.net/p/dosbox/code-0/dosbox/trunk@2113 --- INSTALL | 8 + configure.in | 18 + src/fpu/Makefile.am | 3 +- src/fpu/fpu.cpp | 379 ++++++--------- src/fpu/fpu_instructions.h | 474 +++++++++++------- src/fpu/fpu_instructions_x86.h | 864 +++++++++++++++++++++++++++++++++ src/fpu/fpu_types.h | 12 +- src/platform/visualc/config.h | 3 + visualc/dosbox.dsp | 4 + visualc_net/dosbox.vcproj | 3 + 10 files changed, 1377 insertions(+), 391 deletions(-) create mode 100644 src/fpu/fpu_instructions_x86.h diff --git a/INSTALL b/INSTALL index 936cde7d..70289563 100644 --- a/INSTALL +++ b/INSTALL @@ -50,6 +50,14 @@ In step 1 you could add the following switches: enables some memory increasing inlines. This greatly increases compiletime for maybe a increase in speed. +--disable-dynamic-x86 + disables the dynamic cpu core. Although it's unstable it can greatly + improve the speed of dosbox on x86 hosts. + +--disable-fpu-x86 + disables the assembly fpu core. Although relatively new the x86 fpu + core has more accuracy then the regular fpu core. + Check the src subdir for the binary. diff --git a/configure.in b/configure.in index 53436924..ae00c5ac 100644 --- a/configure.in +++ b/configure.in @@ -137,6 +137,24 @@ else AC_MSG_RESULT(no) fi +AH_TEMPLATE(C_FPU_X86,[Define to 1 to use a x86 assembly fpu core]) +AC_ARG_ENABLE(fpu-x86,AC_HELP_STRING([--disable-fpu-x86],[Disable x86 assembly fpu core]),,enable_fpu_x86=yes) +AC_MSG_CHECKING(whether x86 assembly fpu core will be enabled) +if test x$enable_fpu_x86 = xno ; then + AC_MSG_RESULT(no) +else + if test x$enable_fpu = xyes; then + if test x$c_hostcpu = xx86 ; then + AC_DEFINE(C_FPU_X86,1) + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) + fi + else + AC_MSG_RESULT(no) + fi +fi + AH_TEMPLATE(C_SSHOT,[Define to 1 to enable screenshots, requires libpng]) AC_CHECK_HEADER(png.h,have_png_h=yes,) AC_CHECK_LIB(png, png_check_sig, have_png_lib=yes, ,-lz) diff --git a/src/fpu/Makefile.am b/src/fpu/Makefile.am index 14c2d3d4..d081a682 100644 --- a/src/fpu/Makefile.am +++ b/src/fpu/Makefile.am @@ -1,4 +1,5 @@ AM_CPPFLAGS = -I$(top_srcdir)/include noinst_LIBRARIES = libfpu.a -libfpu_a_SOURCES = fpu.cpp fpu_types.h fpu_instructions.h \ No newline at end of file +libfpu_a_SOURCES = fpu.cpp fpu_types.h fpu_instructions.h \ + fpu_instructions_x86.h diff --git a/src/fpu/fpu.cpp b/src/fpu/fpu.cpp index 05c5ae88..65428abf 100644 --- a/src/fpu/fpu.cpp +++ b/src/fpu/fpu.cpp @@ -16,7 +16,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* $Id: fpu.cpp,v 1.24 2005-02-10 10:20:52 qbix79 Exp $ */ +/* $Id: fpu.cpp,v 1.25 2005-02-22 13:06:05 qbix79 Exp $ */ #include "dosbox.h" #if C_FPU @@ -31,7 +31,7 @@ typedef PhysPt EAPoint; #define TOP fpu.top -#define ST(i) ( (fpu.top+ (i) ) & 7 ) +#define STV(i) ( (fpu.top+ (i) ) & 7 ) #define LoadMb(off) mem_readb(off) #define LoadMw(off) mem_readw(off) @@ -44,21 +44,19 @@ typedef PhysPt EAPoint; #include "fpu_types.h" struct { - FPU_Reg regs[9]; - FPU_Tag tags[9]; - Bitu cw; - FPU_Round round; - Bitu ex_mask; - Bitu sw; - Bitu top; - + FPU_Reg regs[9]; + FPU_P_Reg p_regs[9]; + FPU_Tag tags[9]; + Bit16u cw,cw_mask_all; + Bit16u sw; + Bitu top; + FPU_Round round; } fpu; -INLINE void FPU_SetCW(Bitu word) { +INLINE void FPU_SetCW(Bitu word){ fpu.cw = word; + fpu.cw_mask_all = word | 0x3f; fpu.round = (FPU_Round)((word >> 10) & 3); - // word >>8 &3 is precission - fpu.ex_mask = word & 0x3f; } static Bit16u FPU_GetTag(void){ @@ -68,22 +66,17 @@ static Bit16u FPU_GetTag(void){ return tag; } -static void FPU_SetTag(Bit16u tag) -{ +static void FPU_SetTag(Bit16u tag){ for(Bitu i=0;i<8;i++) fpu.tags[i]= static_cast((tag >>(2*i))&3); } - - - INLINE Bitu FPU_GET_TOP(void){ return (fpu.sw & 0x3800)>>11; } INLINE void FPU_SET_TOP(Bitu val){ fpu.sw &= ~0x3800; fpu.sw |= (val&7)<<11; - return; } INLINE void FPU_SET_C0(Bitu C){ @@ -103,71 +96,54 @@ INLINE void FPU_SET_C3(Bitu C){ if(C) fpu.sw |= 0x4000; } -INLINE Bitu FPU_GET_C0(void){ - return (fpu.sw & 0x0100)>>8; -} -INLINE Bitu FPU_GET_C1(void){ - return (fpu.sw & 0x0200)>>9; -} -INLINE Bitu FPU_GET_C2(void){ - return (fpu.sw & 0x0400)>>10; -} -INLINE Bitu FPU_GET_C3(void){ - return (fpu.sw & 0x4000)>>14; -} - +#if C_FPU_X86 +#include "fpu_instructions_x86.h" +#else #include "fpu_instructions.h" - -/* TODO : ESC6normal => esc4normal+pop or a define as well -*/ +#endif /* WATCHIT : ALWAYS UPDATE REGISTERS BEFORE AND AFTER USING THEM STATUS WORD => FPU_SET_TOP(TOP) BEFORE a read TOP=FPU_GET_TOP() after a write; */ + static void EATREE(Bitu _rm){ Bitu group=(_rm >> 3) & 7; /* data will allready be put in register 8 by caller */ switch(group){ - case 0x00: /* FIADD */ + case 0x00: /* FADD */ FPU_FADD(TOP, 8); break; - case 0x01: /* FIMUL */ + case 0x01: /* FMUL */ FPU_FMUL(TOP, 8); break; - case 0x02: /* FICOM */ + case 0x02: /* FCOM */ FPU_FCOM(TOP,8); break; - case 0x03: /* FICOMP */ + case 0x03: /* FCOMP */ FPU_FCOM(TOP,8); FPU_FPOP(); break; - case 0x04: /* FISUB */ + case 0x04: /* FSUB */ FPU_FSUB(TOP,8); break; - case 0x05: /* FISUBR */ + case 0x05: /* FSUBR */ FPU_FSUBR(TOP,8); break; - case 0x06: /* FIDIV */ + case 0x06: /* FDIV */ FPU_FDIV(TOP, 8); break; - case 0x07: /* FIDIVR */ + case 0x07: /* FDIVR */ FPU_FDIVR(TOP,8); break; default: break; } - } void FPU_ESC0_EA(Bitu rm,PhysPt addr) { - /* REGULAR TREE WITH 32 BITS REALS -> float */ - union { - float f; - Bit32u l; - } blah; - blah.l = mem_readd(addr); - fpu.regs[8].d = static_cast(blah.f); + /* REGULAR TREE WITH 32 BITS REALS */ + FPU_FLD_F32(addr,8); EATREE(rm); } @@ -176,33 +152,32 @@ void FPU_ESC0_Normal(Bitu rm) { Bitu sub=(rm & 7); switch (group){ case 0x00: /* FADD ST,STi */ - FPU_FADD(TOP,ST(sub)); + FPU_FADD(TOP,STV(sub)); break; case 0x01: /* FMUL ST,STi */ - FPU_FMUL(TOP,ST(sub)); + FPU_FMUL(TOP,STV(sub)); break; case 0x02: /* FCOM STi */ - FPU_FCOM(TOP,ST(sub)); + FPU_FCOM(TOP,STV(sub)); break; case 0x03: /* FCOMP STi */ - FPU_FCOM(TOP,ST(sub)); + FPU_FCOM(TOP,STV(sub)); FPU_FPOP(); break; case 0x04: /* FSUB ST,STi */ - FPU_FSUB(TOP,ST(sub)); + FPU_FSUB(TOP,STV(sub)); break; case 0x05: /* FSUBR ST,STi */ - FPU_FSUBR(TOP,ST(sub)); + FPU_FSUBR(TOP,STV(sub)); break; case 0x06: /* FDIV ST,STi */ - FPU_FDIV(TOP,ST(sub)); + FPU_FDIV(TOP,STV(sub)); break; case 0x07: /* FDIVR ST,STi */ - FPU_FDIVR(TOP,ST(sub)); + FPU_FDIVR(TOP,STV(sub)); break; default: break; - } } @@ -212,40 +187,17 @@ void FPU_ESC1_EA(Bitu rm,PhysPt addr) { Bitu sub=(rm & 7); switch(group){ case 0x00: /* FLD float*/ - { - union { - float f; - Bit32u l; - } blah; - blah.l = mem_readd(addr); - FPU_PUSH(static_cast(blah.f)); - } + FPU_PREP_PUSH(); + FPU_FLD_F32(addr,TOP); break; - case 0x01: /* UNKNOWN */ LOG(LOG_FPU,LOG_WARN)("ESC EA 1:Unhandled group %d subfunction %d",group,sub); break; case 0x02: /* FST float*/ - { - union { - float f; - Bit32u l; - } blah; - //should depend on rounding method - blah.f = static_cast(fpu.regs[TOP].d); - mem_writed(addr,blah.l); - } + FPU_FST_F32(addr); break; - case 0x03: /* FSTP float*/ - { - union { - float f; - Bit32u l; - } blah; - blah.f = static_cast(fpu.regs[TOP].d); - mem_writed(addr,blah.l); - } + FPU_FST_F32(addr); FPU_FPOP(); break; case 0x04: /* FLDENV */ @@ -253,7 +205,7 @@ void FPU_ESC1_EA(Bitu rm,PhysPt addr) { break; case 0x05: /* FLDCW */ { - Bit16u temp =mem_readw(addr); + Bit16u temp = mem_readw(addr); FPU_SetCW(temp); } break; @@ -267,7 +219,6 @@ void FPU_ESC1_EA(Bitu rm,PhysPt addr) { LOG(LOG_FPU,LOG_WARN)("ESC EA 1:Unhandled group %d subfunction %d",group,sub); break; } - } void FPU_ESC1_Normal(Bitu rm) { @@ -275,33 +226,36 @@ void FPU_ESC1_Normal(Bitu rm) { Bitu sub=(rm & 7); switch (group){ case 0x00: /* FLD STi */ - FPU_PUSH(fpu.regs[ST(sub)].d); - break; + { + Bitu reg_from=STV(sub); + FPU_PREP_PUSH(); + FPU_FST(reg_from, TOP); + break; + } case 0x01: /* FXCH STi */ - FPU_FXCH(TOP,ST(sub)); + FPU_FXCH(TOP,STV(sub)); break; case 0x02: /* FNOP */ - FPU_FNOP(); + FPU_FNOP(); break; case 0x03: /* FSTP STi */ - FPU_FST(TOP,ST(sub)); - FPU_FPOP(); + FPU_FST(TOP,STV(sub)); + FPU_FPOP(); break; case 0x04: switch(sub){ case 0x00: /* FCHS */ - fpu.regs[TOP].d = -1.0*(fpu.regs[TOP].d); + FPU_FCHS(); break; case 0x01: /* FABS */ - fpu.regs[TOP].d = fabs(fpu.regs[TOP].d); + FPU_FABS(); break; case 0x02: /* UNKNOWN */ case 0x03: /* ILLEGAL */ LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub); break; case 0x04: /* FTST */ - fpu.regs[8].d=0.0; - FPU_FCOM(TOP,8); + FPU_FTST(); break; case 0x05: /* FXAM */ FPU_FXAM(); @@ -315,25 +269,25 @@ void FPU_ESC1_Normal(Bitu rm) { case 0x05: switch(sub){ case 0x00: /* FLD1 */ - FPU_PUSH(1.0); + FPU_FLD1(); break; case 0x01: /* FLDL2T */ - FPU_PUSH(L2T); + FPU_FLDL2T(); break; case 0x02: /* FLDL2E */ - FPU_PUSH(L2E); + FPU_FLDL2E(); break; case 0x03: /* FLDPI */ - FPU_PUSH(PI); + FPU_FLDPI(); break; case 0x04: /* FLDLG2 */ - FPU_PUSH(LG2); + FPU_FLDLG2(); break; case 0x05: /* FLDLN2 */ - FPU_PUSH(LN2); + FPU_FLDLN2(); break; case 0x06: /* FLDZ*/ - FPU_PUSH_ZERO(); + FPU_FLDZ(); break; case 0x07: /* ILLEGAL */ LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub); @@ -342,24 +296,33 @@ void FPU_ESC1_Normal(Bitu rm) { break; case 0x06: switch(sub){ - case 0x00: /* F2XM1 */ + case 0x00: /* F2XM1 */ FPU_F2XM1(); break; - case 0x01: /* FYL2X */ + case 0x01: /* FYL2X */ FPU_FYL2X(); break; - case 0x02: /* FPTAN */ + case 0x02: /* FPTAN */ FPU_FPTAN(); break; - case 0x03: /* FPATAN */ + case 0x03: /* FPATAN */ FPU_FPATAN(); break; - case 0x04: /* FXTRACT */ + case 0x04: /* FXTRACT */ FPU_FXTRACT(); break; + case 0x05: /* FPREM1 */ + FPU_FPREM1(); + break; + case 0x06: /* FDECSTP */ + TOP = (TOP - 1) & 7; + break; + case 0x07: /* FINCSTP */ + TOP = (TOP + 1) & 7; + break; default: - LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub); - break; + LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub); + break; } break; case 0x07: @@ -367,6 +330,9 @@ void FPU_ESC1_Normal(Bitu rm) { case 0x00: /* FPREM */ FPU_FPREM(); break; + case 0x01: /* FYL2XP1 */ + FPU_FYL2XP1(); + break; case 0x02: /* FSQRT */ FPU_FSQRT(); break; @@ -374,12 +340,7 @@ void FPU_ESC1_Normal(Bitu rm) { FPU_FSINCOS(); break; case 0x04: /* FRNDINT */ - { -//TODO - Bit64s temp= static_cast(FROUND(fpu.regs[TOP].d)); - fpu.regs[TOP].d=static_cast(temp); - } - //TODO + FPU_FRNDINT(); break; case 0x05: /* FSCALE */ FPU_FSCALE(); @@ -390,7 +351,6 @@ void FPU_ESC1_Normal(Bitu rm) { case 0x07: /* FCOS */ FPU_FCOS(); break; - case 0x01: /* FYL2XP1 */ default: LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub); break; @@ -399,15 +359,12 @@ void FPU_ESC1_Normal(Bitu rm) { default: LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub); } - -// LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub); } void FPU_ESC2_EA(Bitu rm,PhysPt addr) { /* 32 bits integer operants */ - Bit32s blah = mem_readd(addr); - fpu.regs[8].d = static_cast(blah); + FPU_FLD_I32(addr,8); EATREE(rm); } @@ -417,8 +374,8 @@ void FPU_ESC2_Normal(Bitu rm) { switch(group){ case 0x05: switch(sub){ - case 0x01: /* FUCOMPP Almost the same as FCOMPP */ - FPU_FCOM(TOP,ST(1)); + case 0x01: /* FUCOMPP */ + FPU_FUCOM(TOP,STV(1)); FPU_FPOP(); FPU_FPOP(); break; @@ -438,31 +395,26 @@ void FPU_ESC3_EA(Bitu rm,PhysPt addr) { Bitu group=(rm >> 3) & 7; Bitu sub=(rm & 7); switch(group){ - case 0x00: /* FLD */ - { - Bit32s blah = mem_readd(addr); - FPU_PUSH( static_cast(blah)); - } + case 0x00: /* FILD */ + FPU_PREP_PUSH(); + FPU_FLD_I32(addr,TOP); break; - case 0x01: /* FISTTP */ + case 0x01: /* FISTTP */ LOG(LOG_FPU,LOG_WARN)("ESC 3 EA:Unhandled group %d subfunction %d",group,sub); break; - - case 0x02: /* FIST */ - mem_writed(addr,static_cast(FROUND(fpu.regs[TOP].d))); + case 0x02: /* FIST */ + FPU_FST_I32(addr); break; - case 0x03: /*FISTP */ - mem_writed(addr,static_cast(FROUND(fpu.regs[TOP].d))); + case 0x03: /* FISTP */ + FPU_FST_I32(addr); FPU_FPOP(); break; case 0x05: /* FLD 80 Bits Real */ - { - Real64 val = FPU_FLD80(addr); - FPU_PUSH(val); - } + FPU_PREP_PUSH(); + FPU_FLD_F80(addr); break; case 0x07: /* FSTP 80 Bits Real */ - FPU_ST80(addr,TOP); + FPU_FST_F80(addr); FPU_FPOP(); break; default: @@ -504,41 +456,40 @@ void FPU_ESC3_Normal(Bitu rm) { void FPU_ESC4_EA(Bitu rm,PhysPt addr) { - /* REGULAR TREE WITH 64 BITS REALS: double */ - fpu.regs[8].l.lower=mem_readd(addr); - fpu.regs[8].l.upper=mem_readd(addr+4); + /* REGULAR TREE WITH 64 BITS REALS */ + FPU_FLD_F64(addr,8); EATREE(rm); } void FPU_ESC4_Normal(Bitu rm) { - //LOOKS LIKE number 6 without popping*/ + /* LOOKS LIKE number 6 without popping */ Bitu group=(rm >> 3) & 7; Bitu sub=(rm & 7); switch(group){ - case 0x00: /*FADDP STi,ST*/ - FPU_FADD(ST(sub),TOP); + case 0x00: /* FADD STi,ST*/ + FPU_FADD(STV(sub),TOP); break; - case 0x01: /* FMULP STi,ST*/ - FPU_FMUL(ST(sub),TOP); + case 0x01: /* FMUL STi,ST*/ + FPU_FMUL(STV(sub),TOP); break; case 0x02: /* FCOM*/ - FPU_FCOM(TOP,ST(sub)); - break; /* TODO IS THIS ALLRIGHT ????????? (maybe reverse operators) */ + FPU_FCOM(TOP,STV(sub)); + break; case 0x03: /* FCOMP*/ - FPU_FCOM(TOP,ST(sub)); + FPU_FCOM(TOP,STV(sub)); FPU_FPOP(); break; case 0x04: /* FSUBR STi,ST*/ - FPU_FSUBR(ST(sub),TOP); + FPU_FSUBR(STV(sub),TOP); break; case 0x05: /* FSUB STi,ST*/ - FPU_FSUB(ST(sub),TOP); + FPU_FSUB(STV(sub),TOP); break; case 0x06: /* FDIVR STi,ST*/ - FPU_FDIVR(ST(sub),TOP); + FPU_FDIVR(STV(sub),TOP); break; case 0x07: /* FDIV STi,ST*/ - FPU_FDIV(ST(sub),TOP); + FPU_FDIV(STV(sub),TOP); break; default: break; @@ -550,28 +501,21 @@ void FPU_ESC5_EA(Bitu rm,PhysPt addr) { Bitu sub=(rm & 7); switch(group){ case 0x00: /* FLD double real*/ - { - FPU_Reg blah; - blah.l.lower=mem_readd(addr); - blah.l.upper=mem_readd(addr+4); - FPU_PUSH(blah.d); - } + FPU_PREP_PUSH(); + FPU_FLD_F64(addr,TOP); break; case 0x01: /* FISTTP longint*/ LOG(LOG_FPU,LOG_WARN)("ESC 5 EA:Unhandled group %d subfunction %d",group,sub); break; - - case 0x02: /* FIST double real*/ - mem_writed(addr,fpu.regs[TOP].l.lower); - mem_writed(addr+4,fpu.regs[TOP].l.upper); + case 0x02: /* FST double real*/ + FPU_FST_F64(addr); break; - case 0x03: /*FISTP double real*/ - mem_writed(addr,fpu.regs[TOP].l.lower); - mem_writed(addr+4,fpu.regs[TOP].l.upper); + case 0x03: /* FSTP double real*/ + FPU_FST_F64(addr); FPU_FPOP(); break; - case 0x04: /* FSTOR */ - FPU_FSTOR(addr); + case 0x04: /* FRSTOR */ + FPU_FRSTOR(addr); break; case 0x06: /* FSAVE */ FPU_FSAVE(addr); @@ -591,23 +535,23 @@ void FPU_ESC5_Normal(Bitu rm) { Bitu sub=(rm & 7); switch(group){ case 0x00: /* FFREE STi */ - fpu.tags[ST(sub)]=TAG_Empty; + fpu.tags[STV(sub)]=TAG_Empty; break; case 0x01: /* FXCH STi*/ - FPU_FXCH(TOP,ST(sub)); + FPU_FXCH(TOP,STV(sub)); break; case 0x02: /* FST STi */ - FPU_FST(TOP,ST(sub)); + FPU_FST(TOP,STV(sub)); break; case 0x03: /* FSTP STi*/ - FPU_FST(TOP,ST(sub)); + FPU_FST(TOP,STV(sub)); FPU_FPOP(); break; case 0x04: /* FUCOM STi */ - FPU_FUCOM(TOP,ST(sub)); + FPU_FUCOM(TOP,STV(sub)); break; case 0x05: /*FUCOMP STi */ - FPU_FUCOM(TOP,ST(sub)); + FPU_FUCOM(TOP,STV(sub)); FPU_FPOP(); break; default: @@ -619,8 +563,7 @@ void FPU_ESC5_Normal(Bitu rm) { void FPU_ESC6_EA(Bitu rm,PhysPt addr) { /* 16 bit (word integer) operants */ - Bit16s blah = mem_readw(addr); - fpu.regs[8].d = static_cast(blah); + FPU_FLD_I16(addr,8); EATREE(rm); } @@ -631,34 +574,33 @@ void FPU_ESC6_Normal(Bitu rm) { Bitu sub=(rm & 7); switch(group){ case 0x00: /*FADDP STi,ST*/ - FPU_FADD(ST(sub),TOP); + FPU_FADD(STV(sub),TOP); break; case 0x01: /* FMULP STi,ST*/ - FPU_FMUL(ST(sub),TOP); + FPU_FMUL(STV(sub),TOP); break; case 0x02: /* FCOMP5*/ - FPU_FCOM(TOP,ST(sub)); - break; /* TODO IS THIS ALLRIGHT ????????? */ - case 0x03: /* weird*/ /*FCOMPP*/ - if(sub != 1){ - LOG(LOG_FPU,LOG_WARN)("ESC 6:Unhandled group %d subfunction %d",group,sub); - ; - break; + FPU_FCOM(TOP,STV(sub)); + break; /* TODO IS THIS ALLRIGHT ????????? */ + case 0x03: /*FCOMPP*/ + if(sub != 1) { + LOG(LOG_FPU,LOG_WARN)("ESC 6:Unhandled group %d subfunction %d",group,sub); + return; } - FPU_FCOM(TOP,ST(1)); + FPU_FCOM(TOP,STV(1)); FPU_FPOP(); /* extra pop at the bottom*/ break; case 0x04: /* FSUBRP STi,ST*/ - FPU_FSUBR(ST(sub),TOP); + FPU_FSUBR(STV(sub),TOP); break; case 0x05: /* FSUBP STi,ST*/ - FPU_FSUB(ST(sub),TOP); + FPU_FSUB(STV(sub),TOP); break; case 0x06: /* FDIVRP STi,ST*/ - FPU_FDIVR(ST(sub),TOP); + FPU_FDIVR(STV(sub),TOP); break; case 0x07: /* FDIVP STi,ST*/ - FPU_FDIV(ST(sub),TOP); + FPU_FDIV(STV(sub),TOP); break; default: break; @@ -668,55 +610,39 @@ void FPU_ESC6_Normal(Bitu rm) { void FPU_ESC7_EA(Bitu rm,PhysPt addr) { - /* ROUNDING*/ - Bitu group=(rm >> 3) & 7; Bitu sub=(rm & 7); switch(group){ case 0x00: /* FILD Bit16s */ - { - Bit16s blah = mem_readw(addr); - FPU_PUSH( static_cast(blah)); - } + FPU_PREP_PUSH(); + FPU_FLD_I16(addr,TOP); break; - case 0x01: /* FISTTP Bit16s */ + case 0x01: LOG(LOG_FPU,LOG_WARN)("ESC 7 EA:Unhandled group %d subfunction %d",group,sub); break; - case 0x02: /* FIST Bit16s */ - mem_writew(addr,static_cast(FROUND(fpu.regs[TOP].d))); + FPU_FST_I16(addr); break; case 0x03: /* FISTP Bit16s */ - mem_writew(addr,static_cast(FROUND(fpu.regs[TOP].d))); + FPU_FST_I16(addr); FPU_FPOP(); break; + case 0x04: /* FBLD packed BCD */ + FPU_PREP_PUSH(); + FPU_FBLD(addr,TOP); + break; case 0x05: /* FILD Bit64s */ - { - FPU_Reg blah; - blah.l.lower = mem_readd(addr); - blah.l.upper = mem_readd(addr+4); - FPU_PUSH(static_cast(blah.ll)); - } + FPU_PREP_PUSH(); + FPU_FLD_I64(addr,TOP); break; case 0x06: /* FBSTP packed BCD */ FPU_FBST(addr); FPU_FPOP(); break; case 0x07: /* FISTP Bit64s */ - { - FPU_Reg blah; - blah.ll = static_cast(FROUND(fpu.regs[TOP].d)); - mem_writed(addr,blah.l.lower); - mem_writed(addr+4,blah.l.upper); - } + FPU_FST_I64(addr); FPU_FPOP(); break; - case 0x04: /* FBLD packed BCD */ - { - Real64 in = FPU_FBLD(addr); - FPU_PUSH(in); - } - break; default: LOG(LOG_FPU,LOG_WARN)("ESC 7 EA:Unhandled group %d subfunction %d",group,sub); break; @@ -728,12 +654,12 @@ void FPU_ESC7_Normal(Bitu rm) { Bitu sub=(rm & 7); switch (group){ case 0x01: /* FXCH STi*/ - FPU_FXCH(TOP,ST(sub)); + FPU_FXCH(TOP,STV(sub)); break; case 0x02: /* FSTP STi*/ case 0x03: /* FSTP STi*/ - FPU_FST(TOP,ST(sub)); - FPU_FPOP(); + FPU_FST(TOP,STV(sub)); + FPU_FPOP(); break; case 0x04: switch(sub){ @@ -742,7 +668,7 @@ void FPU_ESC7_Normal(Bitu rm) { reg_ax = fpu.sw; break; default: - LOG(LOG_FPU,LOG_WARN)("ESC 7:Unhandled group %d subfunction %d",group,sub); + LOG(LOG_FPU,LOG_WARN)("ESC 7:Unhandled group %d subfunction %d",group,sub); break; } break; @@ -750,7 +676,6 @@ void FPU_ESC7_Normal(Bitu rm) { LOG(LOG_FPU,LOG_WARN)("ESC 7:Unhandled group %d subfunction %d",group,sub); break; } - } diff --git a/src/fpu/fpu_instructions.h b/src/fpu/fpu_instructions.h index 0c094a41..f7dd9fb0 100644 --- a/src/fpu/fpu_instructions.h +++ b/src/fpu/fpu_instructions.h @@ -16,26 +16,27 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* $Id: fpu_instructions.h,v 1.26 2005-02-10 10:20:52 qbix79 Exp $ */ +/* $Id: fpu_instructions.h,v 1.27 2005-02-22 13:06:06 qbix79 Exp $ */ static void FPU_FINIT(void) { FPU_SetCW(0x37F); - fpu.sw=0; + fpu.sw = 0; TOP=FPU_GET_TOP(); - fpu.tags[0]=TAG_Empty; - fpu.tags[1]=TAG_Empty; - fpu.tags[2]=TAG_Empty; - fpu.tags[3]=TAG_Empty; - fpu.tags[4]=TAG_Empty; - fpu.tags[5]=TAG_Empty; - fpu.tags[6]=TAG_Empty; - fpu.tags[7]=TAG_Empty; - fpu.tags[8]=TAG_Valid; // is only used by us + fpu.tags[0] = TAG_Empty; + fpu.tags[1] = TAG_Empty; + fpu.tags[2] = TAG_Empty; + fpu.tags[3] = TAG_Empty; + fpu.tags[4] = TAG_Empty; + fpu.tags[5] = TAG_Empty; + fpu.tags[6] = TAG_Empty; + fpu.tags[7] = TAG_Empty; + fpu.tags[8] = TAG_Valid; // is only used by us } + static void FPU_FCLEX(void){ - fpu.sw&=0x7f00; //should clear exceptions -}; + fpu.sw &= 0x7f00; //should clear exceptions +} static void FPU_FNOP(void){ return; @@ -44,15 +45,17 @@ static void FPU_FNOP(void){ static void FPU_PUSH(double in){ TOP = (TOP - 1) &7; //actually check if empty - fpu.tags[TOP]=TAG_Valid; - fpu.regs[TOP].d=in; + fpu.tags[TOP] = TAG_Valid; + fpu.regs[TOP].d = in; // LOG(LOG_FPU,LOG_ERROR)("Pushed at %d %g to the stack",newtop,in); return; } -static void FPU_PUSH_ZERO(void){ - FPU_PUSH(0.0); - return; //maybe oneday needed + +static void FPU_PREP_PUSH(void){ + TOP = (TOP - 1) &7; + fpu.tags[TOP] = TAG_Valid; } + static void FPU_FPOP(void){ fpu.tags[TOP]=TAG_Empty; //maybe set zero in it as well @@ -61,6 +64,192 @@ static void FPU_FPOP(void){ return; } +static double FROUND(double in){ + switch(fpu.round){ + case ROUND_Nearest: + if (in-floor(in)>0.5) return (floor(in)+1); + else if (in-floor(in)<0.5) return (floor(in)); + else return (((static_cast(floor(in)))&1)!=0)?(floor(in)+1):(floor(in)); + break; + case ROUND_Down: + return (floor(in)); + break; + case ROUND_Up: + return (ceil(in)); + break; + case ROUND_Chop: + return in; //the cast afterwards will do it right maybe cast here + break; + default: + return in; + break; + } +} + +#define BIAS80 16383 +#define BIAS64 1023 + +static Real64 FPU_FLD80(PhysPt addr) { + struct { + Bit16s begin; + FPU_Reg eind; + } test; + test.eind.l.lower = mem_readd(addr); + test.eind.l.upper = mem_readd(addr+4); + test.begin = mem_readw(addr+8); + + Bit64s exp64 = (((test.begin&0x7fff) - BIAS80)); + Bit64s blah = ((exp64 >0)?exp64:-exp64)&0x3ff; + Bit64s exp64final = ((exp64 >0)?blah:-blah) +BIAS64; + + Bit64s mant64 = (test.eind.ll >> 11) & LONGTYPE(0xfffffffffffff); + Bit64s sign = (test.begin&0x8000)?1:0; + FPU_Reg result; + result.ll = (sign <<63)|(exp64final << 52)| mant64; + return result.d; + + //mant64= test.mant80/2***64 * 2 **53 +} + +static void FPU_ST80(PhysPt addr,Bitu reg) { + struct { + Bit16s begin; + FPU_Reg eind; + } test; + Bit64s sign80 = (fpu.regs[reg].ll&LONGTYPE(0x8000000000000000))?1:0; + Bit64s exp80 = fpu.regs[reg].ll&LONGTYPE(0x7ff0000000000000); + Bit64s exp80final = (exp80>>52) - BIAS64 + BIAS80; + Bit64s mant80 = fpu.regs[reg].ll&LONGTYPE(0x000fffffffffffff); + Bit64s mant80final = (mant80 << 11); + // Elvira wants the 8 and tcalc doesn't + if(fpu.regs[reg].d != 0) mant80final |= LONGTYPE(0x8000000000000000); + test.begin= (static_cast(sign80)<<15)| static_cast(exp80final); + test.eind.ll = mant80final; + mem_writed(addr,test.eind.l.lower); + mem_writed(addr+4,test.eind.l.upper); + mem_writew(addr+8,test.begin); +} + + +static void FPU_FLD_F32(PhysPt addr,Bitu store_to) { + union { + float f; + Bit32u l; + } blah; + blah.l = mem_readd(addr); + fpu.regs[store_to].d = static_cast(blah.f); +} + +static void FPU_FLD_F64(PhysPt addr,Bitu store_to) { + fpu.regs[store_to].l.lower = mem_readd(addr); + fpu.regs[store_to].l.upper = mem_readd(addr+4); +} + +static void FPU_FLD_F80(PhysPt addr) { + fpu.regs[TOP].d = FPU_FLD80(addr); +} + +static void FPU_FLD_I16(PhysPt addr,Bitu store_to) { + Bit16s blah = mem_readw(addr); + fpu.regs[store_to].d = static_cast(blah); +} + +static void FPU_FLD_I32(PhysPt addr,Bitu store_to) { + Bit32s blah = mem_readd(addr); + fpu.regs[store_to].d = static_cast(blah); +} + +static void FPU_FLD_I64(PhysPt addr,Bitu store_to) { + FPU_Reg blah; + blah.l.lower = mem_readd(addr); + blah.l.upper = mem_readd(addr+4); + fpu.regs[store_to].d = static_cast(blah.ll); +} + +static void FPU_FBLD(PhysPt addr,Bitu store_to) { + Bit64u val = 0; + Bitu in = 0; + Bit64u base = 1; + for(Bitu i = 0;i < 9;i++){ + in = mem_readb(addr + i); + val += ( (in&0xf) * base); //in&0xf shouldn't be higher then 9 + base *= 10; + val += ((( in>>4)&0xf) * base); + base *= 10; + } + + //last number, only now convert to float in order to get + //the best signification + Real64 temp = static_cast(val); + in = mem_readb(addr + 9); + temp += ( (in&0xf) * base ); + if(in&0x80) temp *= -1.0; + fpu.regs[store_to].d = temp; +} + +static void FPU_FST_F32(PhysPt addr) { + union { + float f; + Bit32u l; + } blah; + //should depend on rounding method + blah.f = static_cast(fpu.regs[TOP].d); + mem_writed(addr,blah.l); +} + +static void FPU_FST_F64(PhysPt addr) { + mem_writed(addr,fpu.regs[TOP].l.lower); + mem_writed(addr+4,fpu.regs[TOP].l.upper); +} + +static void FPU_FST_F80(PhysPt addr) { + FPU_ST80(addr,TOP); +} + +static void FPU_FST_I16(PhysPt addr) { + mem_writew(addr,static_cast(FROUND(fpu.regs[TOP].d))); +} + +static void FPU_FST_I32(PhysPt addr) { + mem_writed(addr,static_cast(FROUND(fpu.regs[TOP].d))); +} + +static void FPU_FST_I64(PhysPt addr) { + FPU_Reg blah; + blah.ll = static_cast(FROUND(fpu.regs[TOP].d)); + mem_writed(addr,blah.l.lower); + mem_writed(addr+4,blah.l.upper); +} + +static void FPU_FBST(PhysPt addr) { + FPU_Reg val = fpu.regs[TOP]; + bool sign = false; + if(val.d<0.0){ //sign + sign=true; + val.d=-val.d; + } + //numbers from back to front + Real64 temp=val.d; + Bitu p; + for(Bitu i=0;i<9;i++){ + val.d=temp; + temp = static_cast(static_cast(floor(val.d/10.0))); + p = static_cast(val.d - 10.0*temp); + val.d=temp; + temp = static_cast(static_cast(floor(val.d/10.0))); + p |= (static_cast(val.d - 10.0*temp)<<4); + + mem_writeb(addr+i,p); + } + val.d=temp; + temp = static_cast(static_cast(floor(val.d/10.0))); + p = static_cast(val.d - 10.0*temp); + if(sign) + p|=0x80; + mem_writeb(addr+9,p); +} + + static void FPU_FADD(Bitu op1, Bitu op2){ fpu.regs[op1].d+=fpu.regs[op2].d; //flags and such :) @@ -96,9 +285,8 @@ static void FPU_FSQRT(void){ return; } static void FPU_FPATAN(void){ - fpu.regs[ST(1)].d = atan2(fpu.regs[ST(1)].d,fpu.regs[TOP].d); + fpu.regs[STV(1)].d = atan2(fpu.regs[STV(1)].d,fpu.regs[TOP].d); FPU_FPOP(); - FPU_SET_C2(0); //flags and such :) return; } @@ -154,9 +342,9 @@ static void FPU_FST(Bitu st, Bitu other){ } - static void FPU_FCOM(Bitu st, Bitu other){ - if((fpu.tags[st] != TAG_Valid) || (fpu.tags[other] != TAG_Valid)){ + if(((fpu.tags[st] != TAG_Valid) && (fpu.tags[st] != TAG_Zero)) || + ((fpu.tags[other] != TAG_Valid) && (fpu.tags[other] != TAG_Zero))){ FPU_SET_C3(1);FPU_SET_C2(1);FPU_SET_C0(1);return; } if(fpu.regs[st].d == fpu.regs[other].d){ @@ -174,31 +362,14 @@ static void FPU_FUCOM(Bitu st, Bitu other){ FPU_FCOM(st,other); } -static double FROUND(double in){ - switch(fpu.round){ - case ROUND_Nearest: - if (in-floor(in)>0.5) return (floor(in)+1); - else if (in-floor(in)<0.5) return (floor(in)); - else return (((static_cast(floor(in)))&1)!=0)?(floor(in)+1):(floor(in)); - break; - case ROUND_Down: - return (floor(in)); - break; - case ROUND_Up: - return (ceil(in)); - break; - case ROUND_Chop: - return in; //the cast afterwards will do it right maybe cast here - break; - default: - return in; - break; - } +static void FPU_FRNDINT(void){ + Bit64s temp= static_cast(FROUND(fpu.regs[TOP].d)); + fpu.regs[TOP].d=static_cast(temp); } static void FPU_FPREM(void){ Real64 valtop = fpu.regs[TOP].d; - Real64 valdiv = fpu.regs[ST(1)].d; + Real64 valdiv = fpu.regs[STV(1)].d; Bit64s ressaved = static_cast( (valtop/valdiv) ); // Some backups // Real64 res=valtop - ressaved*valdiv; @@ -210,143 +381,71 @@ static void FPU_FPREM(void){ FPU_SET_C2(0); } +static void FPU_FPREM1(void){ + Real64 valtop = fpu.regs[TOP].d; + Real64 valdiv = fpu.regs[STV(1)].d; + double quot = valtop/valdiv; + double quotf = floor(quot); + Bit64s ressaved; + if (quot-quotf>0.5) ressaved = static_cast(quotf+1); + else if (quot-quotf<0.5) ressaved = static_cast(quotf); + else ressaved = static_cast((((static_cast(quotf))&1)!=0)?(quotf+1):(quotf)); + fpu.regs[TOP].d = valtop - ressaved*valdiv; + FPU_SET_C0(static_cast(ressaved&4)); + FPU_SET_C3(static_cast(ressaved&2)); + FPU_SET_C1(static_cast(ressaved&1)); + FPU_SET_C2(0); +} + static void FPU_FXAM(void){ - if(fpu.tags[TOP] == TAG_Empty) - { - FPU_SET_C3(1);FPU_SET_C0(1); - return; - } if(fpu.regs[TOP].ll & LONGTYPE(0x8000000000000000)) //sign { FPU_SET_C1(1); - } - else + } + else { FPU_SET_C1(0); } + if(fpu.tags[TOP] == TAG_Empty) + { + FPU_SET_C3(1);FPU_SET_C2(0);FPU_SET_C0(1); + return; + } if(fpu.regs[TOP].d == 0.0) //zero or normalized number. { FPU_SET_C3(1);FPU_SET_C2(0);FPU_SET_C0(0); } - else{ + else + { FPU_SET_C3(0);FPU_SET_C2(1);FPU_SET_C0(0); } } -static void FPU_FBST(PhysPt addr) -{ - FPU_Reg val = fpu.regs[TOP]; - bool sign = false; - if(val.d<0.0){ //sign - sign=true; - val.d=-val.d; - } - //numbers from back to front - Real64 temp=val.d; - Bitu p; - for(Bitu i=0;i<9;i++){ - val.d=temp; - temp = static_cast(static_cast(floor(val.d/10.0))); - p = static_cast(val.d - 10.0*temp); - val.d=temp; - temp = static_cast(static_cast(floor(val.d/10.0))); - p |= (static_cast(val.d - 10.0*temp)<<4); - - mem_writeb(addr+i,p); - } - val.d=temp; - temp = static_cast(static_cast(floor(val.d/10.0))); - p = static_cast(val.d - 10.0*temp); - if(sign) - p|=0x80; - mem_writeb(addr+9,p); -} - -static Real64 FPU_FBLD(PhysPt addr) -{ - Bit64u val = 0; - Bitu in = 0; - Bit64u base = 1; - for(Bitu i = 0;i < 9;i++){ - in = mem_readb(addr + i); - val += ( (in&0xf) * base); //in&0xf shouldn't be higher then 9 - base *= 10; - val += ((( in>>4)&0xf) * base); - base *= 10; - } - - //last number, only now convert to float in order to get - //the best signification - Real64 temp = static_cast(val); - in = mem_readb(addr + 9); - temp += ( (in&0xf) * base ); - if(in&0x80) temp *= -1.0; - return temp; -} - - -#define BIAS80 16383 -#define BIAS64 1023 - -static Real64 FPU_FLD80(PhysPt addr) -{ - struct{ - Bit16s begin; - FPU_Reg eind; - } test; - test.eind.l.lower=mem_readd(addr); - test.eind.l.upper =mem_readd(addr+4); - test.begin=mem_readw(addr+8); - - Bit64s exp64= (((test.begin & 0x7fff) - BIAS80)); - Bit64s blah= ((exp64 >0)?exp64:-exp64)&0x3ff; - Bit64s exp64final= ((exp64 >0)?blah:-blah) +BIAS64; - - Bit64s mant64= (test.eind.ll >> 11) & LONGTYPE(0xfffffffffffff); - Bit64s sign = (test.begin &0x8000)?1:0; - FPU_Reg result; - result.ll= (sign <<63)|(exp64final << 52)| mant64; - return result.d; - - //mant64= test.mant80/2***64 * 2 **53 -} - -static void FPU_ST80(PhysPt addr,Bitu reg) -{ - struct{ - Bit16s begin; - FPU_Reg eind; - } test; - Bit64s sign80= (fpu.regs[reg].ll&LONGTYPE(0x8000000000000000))?1:0; - Bit64s exp80 = fpu.regs[reg].ll&LONGTYPE(0x7ff0000000000000); - Bit64s exp80final= (exp80>>52) - BIAS64 + BIAS80; - Bit64s mant80 = fpu.regs[reg].ll&LONGTYPE(0x000fffffffffffff); - Bit64s mant80final= (mant80 << 11); - // Elvira wants the 8 and tcalc doesn't - if(fpu.regs[reg].d != 0) mant80final |= LONGTYPE(0x8000000000000000); - test.begin= (static_cast(sign80)<<15)| static_cast(exp80final); - test.eind.ll=mant80final; - mem_writed(addr,test.eind.l.lower); - mem_writed(addr+4,test.eind.l.upper); - mem_writew(addr+8,test.begin); -} static void FPU_F2XM1(void){ - fpu.regs[TOP].d=pow(2.0,fpu.regs[TOP].d) -1; + fpu.regs[TOP].d = pow(2.0,fpu.regs[TOP].d) - 1; return; } static void FPU_FYL2X(void){ - fpu.regs[ST(1)].d*=log(fpu.regs[TOP].d)/log(static_cast(2.0)); + fpu.regs[STV(1)].d*=log(fpu.regs[TOP].d)/log(static_cast(2.0)); FPU_FPOP(); return; } + +static void FPU_FYL2XP1(void){ + fpu.regs[STV(1)].d*=log(fpu.regs[TOP].d+1.0)/log(static_cast(2.0)); + FPU_FPOP(); + return; +} + static void FPU_FSCALE(void){ - fpu.regs[TOP].d *= pow(2.0,static_cast(static_cast(fpu.regs[ST(1)].d))); + fpu.regs[TOP].d *= pow(2.0,static_cast(static_cast(fpu.regs[STV(1)].d))); return; //2^x where x is chopped. } static void FPU_FSTENV(PhysPt addr){ + FPU_SET_TOP(TOP); if(!cpu.code.big) { mem_writew(addr+0,static_cast(fpu.cw)); mem_writew(addr+2,static_cast(fpu.sw)); @@ -368,29 +467,31 @@ static void FPU_FLDENV(PhysPt addr){ tag = mem_readw(addr+4); } else { cw = mem_readd(addr+0); - fpu.sw = mem_readd(addr+4); + fpu.sw = (Bit16u)mem_readd(addr+4); tagbig = mem_readd(addr+8); tag = static_cast(tagbig); } FPU_SetTag(tag); FPU_SetCW(cw); + TOP = FPU_GET_TOP(); } static void FPU_FSAVE(PhysPt addr){ FPU_FSTENV(addr); - Bitu start=(cpu.code.big?28:14); - for(Bitu i=0;i<8;i++){ - FPU_ST80(addr+start,i); - start+=10; + Bitu start = (cpu.code.big?28:14); + for(Bitu i = 0;i < 8;i++){ + FPU_ST80(addr+start,STV(i)); + start += 10; } + FPU_FINIT(); } -static void FPU_FSTOR(PhysPt addr){ +static void FPU_FRSTOR(PhysPt addr){ FPU_FLDENV(addr); - Bitu start=(cpu.code.big?28:14); - for(Bitu i=0;i<8;i++){ - fpu.regs[i].d=FPU_FLD80(addr+start); - start+=10; + Bitu start = (cpu.code.big?28:14); + for(Bitu i = 0;i < 8;i++){ + fpu.regs[STV(i)].d = FPU_FLD80(addr+start); + start += 10; } } @@ -403,6 +504,55 @@ static void FPU_FXTRACT(void) { Bit64s exp80 = test.ll&LONGTYPE(0x7ff0000000000000); Bit64s exp80final = (exp80>>52) - BIAS64; Real64 mant = test.d / (pow(2.0,static_cast(exp80final))); - fpu.regs[TOP].d=exp80final; + fpu.regs[TOP].d = static_cast(exp80final); FPU_PUSH(mant); } + +static void FPU_FCHS(void){ + fpu.regs[TOP].d = -1.0*(fpu.regs[TOP].d); +} + +static void FPU_FABS(void){ + fpu.regs[TOP].d = fabs(fpu.regs[TOP].d); +} + +static void FPU_FTST(void){ + fpu.regs[8].d = 0.0; + FPU_FCOM(TOP,8); +} + +static void FPU_FLD1(void){ + FPU_PREP_PUSH(); + fpu.regs[TOP].d = 1.0; +} + +static void FPU_FLDL2T(void){ + FPU_PREP_PUSH(); + fpu.regs[TOP].d = L2T; +} + +static void FPU_FLDL2E(void){ + FPU_PREP_PUSH(); + fpu.regs[TOP].d = L2E; +} + +static void FPU_FLDPI(void){ + FPU_PREP_PUSH(); + fpu.regs[TOP].d = PI; +} + +static void FPU_FLDLG2(void){ + FPU_PREP_PUSH(); + fpu.regs[TOP].d = LG2; +} + +static void FPU_FLDLN2(void){ + FPU_PREP_PUSH(); + fpu.regs[TOP].d = LN2; +} + +static void FPU_FLDZ(void){ + FPU_PREP_PUSH(); + fpu.regs[TOP].d = 0.0; + fpu.tags[TOP] = TAG_Zero; +} diff --git a/src/fpu/fpu_instructions_x86.h b/src/fpu/fpu_instructions_x86.h new file mode 100644 index 00000000..a10cd162 --- /dev/null +++ b/src/fpu/fpu_instructions_x86.h @@ -0,0 +1,864 @@ +/* + * Copyright (C) 2002-2005 The DOSBox Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +/* $Id: fpu_instructions_x86.h,v 1.1 2005-02-22 13:06:06 qbix79 Exp $ */ + + +#if defined (_MSC_VER) + +#define FPUD_LOAD(op,szI,szA) \ + Bit16u new_sw; \ + __asm { \ + __asm mov eax, 8 \ + __asm shl eax, 4 \ + __asm mov ebx, store_to \ + __asm shl ebx, 4 \ + __asm fclex \ + __asm op szI PTR fpu.p_regs[eax].m1 \ + __asm fnstsw new_sw \ + __asm fstp TBYTE PTR fpu.p_regs[ebx].m1 \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +#define FPUD_STORE(op,szI,szA) \ + Bit16u new_sw,save_cw; \ + __asm { \ + __asm fnstcw save_cw \ + __asm fldcw fpu.cw_mask_all \ + __asm mov eax, TOP \ + __asm shl eax, 4 \ + __asm mov ebx, 8 \ + __asm shl ebx, 4 \ + __asm fld TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fclex \ + __asm op szI PTR fpu.p_regs[ebx].m1 \ + __asm fnstsw new_sw \ + __asm fldcw save_cw \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fsin,fcos,f2xm1,fchs,fabs +#define FPUD_TRIG(op) \ + Bit16u new_sw; \ + __asm { \ + __asm mov eax, TOP \ + __asm shl eax, 4 \ + __asm fld TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fclex \ + __asm op \ + __asm fnstsw new_sw \ + __asm fstp TBYTE PTR fpu.p_regs[eax].m1 \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fsincos +#define FPUD_SINCOS \ + Bit16u new_sw; \ + __asm { \ + __asm mov eax, TOP \ + __asm mov ebx, eax \ + __asm dec ebx \ + __asm and ebx, 7 \ + __asm shl eax, 4 \ + __asm shl ebx, 4 \ + __asm fld TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fclex \ + __asm fsincos \ + __asm fnstsw new_sw \ + __asm mov cx, new_sw \ + __asm and ch, 0x04 \ + __asm jnz argument_too_large1 \ + __asm fstp TBYTE PTR fpu.p_regs[ebx].m1 \ + __asm fstp TBYTE PTR fpu.p_regs[eax].m1 \ + __asm jmp end_sincos \ + __asm argument_too_large1: \ + __asm fstp st(0) \ + __asm end_sincos: \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); \ + if ((new_sw&0x0400)==0) FPU_PREP_PUSH(); + +// handles fptan +#define FPUD_PTAN \ + Bit16u new_sw; \ + __asm { \ + __asm mov eax, TOP \ + __asm mov ebx, eax \ + __asm dec ebx \ + __asm and ebx, 7 \ + __asm shl eax, 4 \ + __asm shl ebx, 4 \ + __asm fld TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fclex \ + __asm fptan \ + __asm fnstsw new_sw \ + __asm mov cx, new_sw \ + __asm and ch, 0x04 \ + __asm jnz argument_too_large2 \ + __asm fstp TBYTE PTR fpu.p_regs[ebx].m1 \ + __asm fstp TBYTE PTR fpu.p_regs[eax].m1 \ + __asm jmp end_ptan \ + __asm argument_too_large2: \ + __asm fstp st(0) \ + __asm end_ptan: \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); \ + if ((new_sw&0x0400)==0) FPU_PREP_PUSH(); + +// handles fxtract +#define FPUD_XTRACT \ + Bit16u new_sw; \ + __asm { \ + __asm mov eax, TOP \ + __asm mov ebx, eax \ + __asm dec ebx \ + __asm and ebx, 7 \ + __asm shl eax, 4 \ + __asm shl ebx, 4 \ + __asm fld TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fclex \ + __asm fxtract \ + __asm fnstsw new_sw \ + __asm fstp TBYTE PTR fpu.p_regs[ebx].m1 \ + __asm fstp TBYTE PTR fpu.p_regs[eax].m1 \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); \ + FPU_PREP_PUSH(); + +// handles fadd,fmul,fsub,fsubr,fdiv,fdivr +#define FPUD_ARITH1(op) \ + Bit16u new_sw,save_cw; \ + __asm { \ + __asm fnstcw save_cw \ + __asm fldcw fpu.cw_mask_all \ + __asm mov eax, op1 \ + __asm shl eax, 4 \ + __asm mov ebx, op2 \ + __asm shl ebx, 4 \ + __asm fld TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fld TBYTE PTR fpu.p_regs[ebx].m1 \ + __asm fclex \ + __asm op st(1), st(0) \ + __asm fnstsw new_sw \ + __asm fstp TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fldcw save_cw \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fsqrt,frndint +#define FPUD_ARITH2(op) \ + Bit16u new_sw,save_cw; \ + __asm { \ + __asm fnstcw save_cw \ + __asm fldcw fpu.cw_mask_all \ + __asm mov eax, TOP \ + __asm shl eax, 4 \ + __asm fld TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fclex \ + __asm op \ + __asm fnstsw new_sw \ + __asm fstp TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fldcw save_cw \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fprem,fprem1,fscale +#define FPUD_REMINDER(op) \ + Bit16u new_sw; \ + __asm { \ + __asm mov eax, TOP \ + __asm mov ebx, eax \ + __asm inc ebx \ + __asm and ebx, 7 \ + __asm shl ebx, 4 \ + __asm shl eax, 4 \ + __asm fld TBYTE PTR fpu.p_regs[ebx].m1 \ + __asm fld TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fclex \ + __asm op \ + __asm fnstsw new_sw \ + __asm fstp TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fstp st(0) \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fcom,fucom +#define FPUD_COMPARE(op) \ + Bit16u new_sw; \ + __asm { \ + __asm mov ebx, op2 \ + __asm shl ebx, 4 \ + __asm mov eax, op1 \ + __asm shl eax, 4 \ + __asm fld TBYTE PTR fpu.p_regs[ebx].m1 \ + __asm fld TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fclex \ + __asm op \ + __asm fnstsw new_sw \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fxam,ftst +#define FPUD_EXAMINE(op) \ + Bit16u new_sw; \ + __asm { \ + __asm mov eax, TOP \ + __asm shl eax, 4 \ + __asm fld TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fclex \ + __asm op \ + __asm fnstsw new_sw \ + __asm fstp st(0) \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fpatan,fyl2x,fyl2xp1 +#define FPUD_WITH_POP(op) \ + Bit16u new_sw; \ + __asm { \ + __asm mov eax, TOP \ + __asm mov ebx, eax \ + __asm inc ebx \ + __asm and ebx, 7 \ + __asm shl ebx, 4 \ + __asm shl eax, 4 \ + __asm fld TBYTE PTR fpu.p_regs[ebx].m1 \ + __asm fld TBYTE PTR fpu.p_regs[eax].m1 \ + __asm fclex \ + __asm op \ + __asm fnstsw new_sw \ + __asm fstp TBYTE PTR fpu.p_regs[ebx].m1 \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); \ + FPU_FPOP(); + +// load math constants +#define FPUD_LOAD_CONST(op) \ + Bit16u new_sw; \ + FPU_PREP_PUSH(); \ + __asm { \ + __asm mov eax, TOP \ + __asm shl eax, 4 \ + __asm fclex \ + __asm op \ + __asm fnstsw new_sw \ + __asm fstp TBYTE PTR fpu.p_regs[eax].m1 \ + } \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +#else + +#define FPUD_LOAD(op,szI,szA) \ + Bit16u new_sw; \ + __asm__ volatile ( \ + "movl $8, %%eax \n" \ + "shl $4, %%eax \n" \ + "shl $4, %1 \n" \ + "fclex \n" \ + #op #szA " (%2, %%eax) \n" \ + "fnstsw %0 \n" \ + "fstpt (%2, %1) " \ + : "=m" (new_sw) \ + : "r" (store_to), "r" (fpu.p_regs) \ + : "eax", "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +#define FPUD_STORE(op,szI,szA) \ + Bit16u new_sw,save_cw; \ + __asm__ volatile ( \ + "fnstcw %1 \n" \ + "fldcw %4 \n" \ + "shll $4, %2 \n" \ + "movl $8, %%eax \n" \ + "shl $4, %%eax \n" \ + "fldt (%3, %2) \n" \ + "fclex \n" \ + #op #szA " (%3, %%eax) \n" \ + "fnstsw %0 \n" \ + "fldcw %1 " \ + : "=m" (new_sw), "=m" (save_cw) \ + : "r" (TOP), "r" (fpu.p_regs), "m" (fpu.cw_mask_all) \ + : "eax", "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fsin,fcos,f2xm1,fchs,fabs +#define FPUD_TRIG(op) \ + Bit16u new_sw; \ + __asm__ volatile ( \ + "shll $4, %1 \n" \ + "fldt (%2, %1) \n" \ + "fclex \n" \ + #op" \n" \ + "fnstsw %0 \n" \ + "fstpt (%2, %1) " \ + : "=m" (new_sw) \ + : "r" (TOP), "r" (fpu.p_regs) \ + : "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fsincos +#define FPUD_SINCOS \ + Bit16u new_sw; \ + __asm__ volatile ( \ + "movl %1, %%eax \n" \ + "shll $4, %1 \n" \ + "decl %%eax \n" \ + "andl $7, %%eax \n" \ + "shll $4, %%eax \n" \ + "fldt (%2, %1) \n" \ + "fclex \n" \ + "fsincos \n" \ + "fnstsw %0 \n" \ + "fstpt (%2, %%eax) \n" \ + "movw %0, %%ax \n" \ + "sahf \n" \ + "jp argument_too_large1 \n" \ + "fstpt (%2, %1) \n" \ + "argument_too_large1: " \ + : "=m" (new_sw) \ + : "r" (TOP), "r" (fpu.p_regs) \ + : "eax", "cc", "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); \ + if ((new_sw&0x0400)==0) FPU_PREP_PUSH(); + +// handles fptan +#define FPUD_PTAN \ + Bit16u new_sw; \ + __asm__ volatile ( \ + "movl %1, %%eax \n" \ + "shll $4, %1 \n" \ + "decl %%eax \n" \ + "andl $7, %%eax \n" \ + "shll $4, %%eax \n" \ + "fldt (%2, %1) \n" \ + "fclex \n" \ + "fptan \n" \ + "fnstsw %0 \n" \ + "fstpt (%2, %%eax) \n" \ + "movw %0, %%ax \n" \ + "sahf \n" \ + "jp argument_too_large2 \n" \ + "fstpt (%2, %1) \n" \ + "argument_too_large2: " \ + : "=m" (new_sw) \ + : "r" (TOP), "r" (fpu.p_regs) \ + : "eax", "cc", "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); \ + if ((new_sw&0x0400)==0) FPU_PREP_PUSH(); + +// handles fxtract +#define FPUD_XTRACT \ + Bit16u new_sw; \ + __asm__ volatile ( \ + "movl %1, %%eax \n" \ + "shll $4, %1 \n" \ + "decl %%eax \n" \ + "andl $7, %%eax \n" \ + "shll $4, %%eax \n" \ + "fldt (%2, %1) \n" \ + "fclex \n" \ + "fxtract \n" \ + "fnstsw %0 \n" \ + "fstpt (%2, %%eax) \n" \ + "fstpt (%2, %1) " \ + : "=m" (new_sw) \ + : "r" (TOP), "r" (fpu.p_regs) \ + : "eax", "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); \ + FPU_PREP_PUSH(); + +// handles fadd,fmul,fsub,fsubr,fdiv,fdivr +#define FPUD_ARITH1(op) \ + Bit16u new_sw,save_cw; \ + __asm__ volatile ( \ + "fnstcw %1 \n" \ + "fldcw %5 \n" \ + "shll $4, %3 \n" \ + "shll $4, %2 \n" \ + "fldt (%4, %3) \n" \ + "fldt (%4, %2) \n" \ + "fclex \n" \ + #op" \n" \ + "fnstsw %0 \n" \ + "fstpt (%4, %2) \n" \ + "fldcw %1 " \ + : "=m" (new_sw), "=m" (save_cw) \ + : "r" (op1), "r" (op2), "r" (fpu.p_regs), "m" (fpu.cw_mask_all) \ + : "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fsqrt,frndint +#define FPUD_ARITH2(op) \ + Bit16u new_sw,save_cw; \ + __asm__ volatile ( \ + "fnstcw %1 \n" \ + "fldcw %4 \n" \ + "shll $4, %2 \n" \ + "fldt (%3, %2) \n" \ + "fclex \n" \ + #op" \n" \ + "fnstsw %0 \n" \ + "fstpt (%3, %2) \n" \ + "fldcw %1 " \ + : "=m" (new_sw), "=m" (save_cw) \ + : "r" (TOP), "r" (fpu.p_regs), "m" (fpu.cw_mask_all) \ + : "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fprem,fprem1,fscale +#define FPUD_REMINDER(op) \ + Bit16u new_sw; \ + __asm__ volatile ( \ + "movl %1, %%eax \n" \ + "incl %%eax \n" \ + "andl $7, %%eax \n" \ + "shll $4, %%eax \n" \ + "shll $4, %1 \n" \ + "fldt (%2, %%eax) \n" \ + "fldt (%2, %1) \n" \ + "fclex \n" \ + #op" \n" \ + "fnstsw %0 \n" \ + "fstpt (%2, %1) \n" \ + "fstp %%st(0) " \ + : "=m" (new_sw) \ + : "r" (TOP), "r" (fpu.p_regs) \ + : "eax", "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fcom,fucom +#define FPUD_COMPARE(op) \ + Bit16u new_sw; \ + __asm__ volatile ( \ + "shll $4, %2 \n" \ + "shll $4, %1 \n" \ + "fldt (%3, %2) \n" \ + "fldt (%3, %1) \n" \ + "fclex \n" \ + #op" \n" \ + "fnstsw %0 " \ + : "=m" (new_sw) \ + : "r" (op1), "r" (op2), "r" (fpu.p_regs) \ + : "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fxam,ftst +#define FPUD_EXAMINE(op) \ + Bit16u new_sw; \ + __asm__ volatile ( \ + "shll $4, %1 \n" \ + "fldt (%2, %1) \n" \ + "fclex \n" \ + #op" \n" \ + "fnstsw %0 \n" \ + "fstp %%st(0) " \ + : "=m" (new_sw) \ + : "r" (TOP), "r" (fpu.p_regs) \ + : "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); + +// handles fpatan,fyl2x,fyl2xp1 +#define FPUD_WITH_POP(op) \ + Bit16u new_sw; \ + __asm__ volatile ( \ + "movl %1, %%eax \n" \ + "incl %%eax \n" \ + "andl $7, %%eax \n" \ + "shll $4, %%eax \n" \ + "shll $4, %1 \n" \ + "fldt (%2, %%eax) \n" \ + "fldt (%2, %1) \n" \ + "fclex \n" \ + #op" \n" \ + "fnstsw %0 \n" \ + "fstpt (%2, %%eax) \n" \ + : "=m" (new_sw) \ + : "r" (TOP), "r" (fpu.p_regs) \ + : "eax", "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); \ + FPU_FPOP(); + +// load math constants +#define FPUD_LOAD_CONST(op) \ + Bit16u new_sw; \ + FPU_PREP_PUSH(); \ + __asm__ volatile ( \ + "shll $4, %1 \n" \ + "fclex \n" \ + #op" \n" \ + "fnstsw %0 \n" \ + "fstpt (%2, %1) \n" \ + : "=m" (new_sw) \ + : "r" (TOP), "r" (fpu.p_regs) \ + : "memory" \ + ); \ + fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff); \ + +#endif + +static void FPU_FINIT(void) { + FPU_SetCW(0x37F); + fpu.sw=0; + TOP=FPU_GET_TOP(); + fpu.tags[0]=TAG_Empty; + fpu.tags[1]=TAG_Empty; + fpu.tags[2]=TAG_Empty; + fpu.tags[3]=TAG_Empty; + fpu.tags[4]=TAG_Empty; + fpu.tags[5]=TAG_Empty; + fpu.tags[6]=TAG_Empty; + fpu.tags[7]=TAG_Empty; + fpu.tags[8]=TAG_Valid; // is only used by us +} + +static void FPU_FCLEX(void){ + fpu.sw&=0x7f00; //should clear exceptions +} + +static void FPU_FNOP(void){ +} + +static void FPU_PREP_PUSH(void){ + TOP = (TOP - 1) &7; + fpu.tags[TOP]=TAG_Valid; +} + +static void FPU_FPOP(void){ + fpu.tags[TOP]=TAG_Empty; + TOP = ((TOP+1)&7); +} + +static void FPU_FLD_F32(PhysPt addr,Bitu store_to) { + fpu.p_regs[8].m1 = mem_readd(addr); + FPUD_LOAD(fld,DWORD,s) +} + +static void FPU_FLD_F64(PhysPt addr,Bitu store_to) { + fpu.p_regs[8].m1 = mem_readd(addr); + fpu.p_regs[8].m2 = mem_readd(addr+4); + FPUD_LOAD(fld,QWORD,l) +} + +static void FPU_FLD_F80(PhysPt addr) { + fpu.p_regs[TOP].m1 = mem_readd(addr); + fpu.p_regs[TOP].m2 = mem_readd(addr+4); + fpu.p_regs[TOP].m3 = mem_readw(addr+8); + FPU_SET_C1(0); +} + +static void FPU_FLD_I16(PhysPt addr,Bitu store_to) { + fpu.p_regs[8].m1 = (Bit32u)mem_readw(addr); + FPUD_LOAD(fild,WORD,) +} + +static void FPU_FLD_I32(PhysPt addr,Bitu store_to) { + fpu.p_regs[8].m1 = mem_readd(addr); + FPUD_LOAD(fild,DWORD,l) +} + +static void FPU_FLD_I64(PhysPt addr,Bitu store_to) { + fpu.p_regs[8].m1 = mem_readd(addr); + fpu.p_regs[8].m2 = mem_readd(addr+4); + FPUD_LOAD(fild,QWORD,q) +} + +static void FPU_FBLD(PhysPt addr,Bitu store_to) { + fpu.p_regs[8].m1 = mem_readd(addr); + fpu.p_regs[8].m2 = mem_readd(addr+4); + fpu.p_regs[8].m3 = mem_readw(addr+8); + FPUD_LOAD(fbld,TBYTE,) +} + +static void FPU_FST_F32(PhysPt addr) { + FPUD_STORE(fstp,DWORD,s) + mem_writed(addr,fpu.p_regs[8].m1); +} + +static void FPU_FST_F64(PhysPt addr) { + FPUD_STORE(fstp,QWORD,l) + mem_writed(addr,fpu.p_regs[8].m1); + mem_writed(addr+4,fpu.p_regs[8].m2); +} + +static void FPU_FST_F80(PhysPt addr) { + mem_writed(addr,fpu.p_regs[TOP].m1); + mem_writed(addr+4,fpu.p_regs[TOP].m2); + mem_writew(addr+8,fpu.p_regs[TOP].m3); + FPU_SET_C1(0); +} + +static void FPU_FST_I16(PhysPt addr) { + FPUD_STORE(fistp,WORD,) + mem_writew(addr,(Bit16u)fpu.p_regs[8].m1); +} + +static void FPU_FST_I32(PhysPt addr) { + FPUD_STORE(fistp,DWORD,l) + mem_writed(addr,fpu.p_regs[8].m1); +} + +static void FPU_FST_I64(PhysPt addr) { + FPUD_STORE(fistp,QWORD,q) + mem_writed(addr,fpu.p_regs[8].m1); + mem_writed(addr+4,fpu.p_regs[8].m2); +} + +static void FPU_FBST(PhysPt addr) { + FPUD_STORE(fbstp,TBYTE,) + mem_writed(addr,fpu.p_regs[8].m1); + mem_writed(addr+4,fpu.p_regs[8].m2); + mem_writew(addr+8,fpu.p_regs[8].m3); +} + + +static void FPU_FSIN(void){ + FPUD_TRIG(fsin) +} + +static void FPU_FSINCOS(void){ + FPUD_SINCOS +} + +static void FPU_FCOS(void){ + FPUD_TRIG(fcos) +} + +static void FPU_FSQRT(void){ + FPUD_ARITH2(fsqrt) +} + +static void FPU_FPATAN(void){ + FPUD_WITH_POP(fpatan) +} + +static void FPU_FPTAN(void){ + FPUD_PTAN +} + + +static void FPU_FADD(Bitu op1, Bitu op2){ + FPUD_ARITH1(faddp) +} + +static void FPU_FDIV(Bitu op1, Bitu op2){ + FPUD_ARITH1(fdivp) +} + +static void FPU_FDIVR(Bitu op1, Bitu op2){ + FPUD_ARITH1(fdivrp) +} + +static void FPU_FMUL(Bitu op1, Bitu op2){ + FPUD_ARITH1(fmulp) +} + +static void FPU_FSUB(Bitu op1, Bitu op2){ + FPUD_ARITH1(fsubp) +} + +static void FPU_FSUBR(Bitu op1, Bitu op2){ + FPUD_ARITH1(fsubrp) +} + +static void FPU_FXCH(Bitu stv, Bitu other){ + FPU_Tag tag = fpu.tags[other]; + fpu.tags[other] = fpu.tags[stv]; + fpu.tags[stv] = tag; + + Bit32u m1s = fpu.p_regs[other].m1; + Bit32u m2s = fpu.p_regs[other].m2; + Bit16u m3s = fpu.p_regs[other].m3; + fpu.p_regs[other].m1 = fpu.p_regs[stv].m1; + fpu.p_regs[other].m2 = fpu.p_regs[stv].m2; + fpu.p_regs[other].m3 = fpu.p_regs[stv].m3; + fpu.p_regs[stv].m1 = m1s; + fpu.p_regs[stv].m2 = m2s; + fpu.p_regs[stv].m3 = m3s; + + FPU_SET_C1(0); +} + +static void FPU_FST(Bitu stv, Bitu other){ + fpu.tags[other] = fpu.tags[stv]; + + fpu.p_regs[other].m1 = fpu.p_regs[stv].m1; + fpu.p_regs[other].m2 = fpu.p_regs[stv].m2; + fpu.p_regs[other].m3 = fpu.p_regs[stv].m3; + + FPU_SET_C1(0); +} + + +static void FPU_FCOM(Bitu op1, Bitu op2){ + FPUD_COMPARE(fcompp) +} + +static void FPU_FUCOM(Bitu op1, Bitu op2){ + FPUD_COMPARE(fucompp) +} + +static void FPU_FRNDINT(void){ + FPUD_ARITH2(frndint) +} + +static void FPU_FPREM(void){ + FPUD_REMINDER(fprem) +} + +static void FPU_FPREM1(void){ + FPUD_REMINDER(fprem1) +} + +static void FPU_FXAM(void){ + FPUD_EXAMINE(fxam) + // handle empty registers (C1 set to sign in any way!) + if(fpu.tags[TOP] == TAG_Empty) { + FPU_SET_C3(1);FPU_SET_C2(0);FPU_SET_C0(1); + return; + } +} + +static void FPU_F2XM1(void){ + FPUD_TRIG(f2xm1) +} + +static void FPU_FYL2X(void){ + FPUD_WITH_POP(fyl2x) +} + +static void FPU_FYL2XP1(void){ + FPUD_WITH_POP(fyl2xp1) +} + +static void FPU_FSCALE(void){ + FPUD_REMINDER(fscale) +} + + +static void FPU_FSTENV(PhysPt addr){ + FPU_SET_TOP(TOP); + if(!cpu.code.big) { + mem_writew(addr+0,static_cast(fpu.cw)); + mem_writew(addr+2,static_cast(fpu.sw)); + mem_writew(addr+4,static_cast(FPU_GetTag())); + } else { + mem_writed(addr+0,static_cast(fpu.cw)); + mem_writed(addr+4,static_cast(fpu.sw)); + mem_writed(addr+8,static_cast(FPU_GetTag())); + } +} + +static void FPU_FLDENV(PhysPt addr){ + Bit16u tag; + Bit32u tagbig; + Bitu cw; + if(!cpu.code.big) { + cw = mem_readw(addr+0); + fpu.sw = mem_readw(addr+2); + tag = mem_readw(addr+4); + } else { + cw = mem_readd(addr+0); + fpu.sw = (Bit16u)mem_readd(addr+4); + tagbig = mem_readd(addr+8); + tag = static_cast(tagbig); + } + FPU_SetTag(tag); + FPU_SetCW(cw); + TOP=FPU_GET_TOP(); +} + +static void FPU_FSAVE(PhysPt addr){ + FPU_FSTENV(addr); + Bitu start=(cpu.code.big?28:14); + for(Bitu i=0;i<8;i++){ + mem_writed(addr+start,fpu.p_regs[STV(i)].m1); + mem_writed(addr+start+4,fpu.p_regs[STV(i)].m2); + mem_writew(addr+start+8,fpu.p_regs[STV(i)].m3); + start+=10; + } + FPU_FINIT(); +} + +static void FPU_FRSTOR(PhysPt addr){ + FPU_FLDENV(addr); + Bitu start=(cpu.code.big?28:14); + for(Bitu i=0;i<8;i++){ + fpu.p_regs[STV(i)].m1 = mem_readd(addr+start); + fpu.p_regs[STV(i)].m2 = mem_readd(addr+start+4); + fpu.p_regs[STV(i)].m3 = mem_readw(addr+start+8); + start+=10; + } +} + + +static void FPU_FXTRACT(void) { + FPUD_XTRACT +} + +static void FPU_FCHS(void){ + FPUD_TRIG(fchs) +} + +static void FPU_FABS(void){ + FPUD_TRIG(fabs) +} + +static void FPU_FTST(void){ + FPUD_EXAMINE(ftst) +} + +static void FPU_FLD1(void){ + FPUD_LOAD_CONST(fld1) +} + +static void FPU_FLDL2T(void){ + FPUD_LOAD_CONST(fldl2t) +} + +static void FPU_FLDL2E(void){ + FPUD_LOAD_CONST(fldl2e) +} + +static void FPU_FLDPI(void){ + FPUD_LOAD_CONST(fldpi) +} + +static void FPU_FLDLG2(void){ + FPUD_LOAD_CONST(fldlg2) +} + +static void FPU_FLDLN2(void){ + FPUD_LOAD_CONST(fldln2) +} + +static void FPU_FLDZ(void){ + FPUD_LOAD_CONST(fldz) + fpu.tags[TOP]=TAG_Zero; +} diff --git a/src/fpu/fpu_types.h b/src/fpu/fpu_types.h index eeb0ddf9..3a4ac2fe 100644 --- a/src/fpu/fpu_types.h +++ b/src/fpu/fpu_types.h @@ -16,6 +16,7 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +/* $Id: fpu_types.h,v 1.12 2005-02-22 13:06:06 qbix79 Exp $ */ typedef union { double d; #ifndef WORDS_BIGENDIAN @@ -32,6 +33,15 @@ typedef union { Bit64s ll; } FPU_Reg; +typedef struct { + Bit32u m1; + Bit32u m2; + Bit16u m3; + + Bit16u d1; + Bit32u d2; +} FPU_P_Reg; + enum FPU_Tag { TAG_Valid = 0, TAG_Zero = 1, @@ -39,13 +49,13 @@ enum FPU_Tag { TAG_Empty = 3 }; - enum FPU_Round { ROUND_Nearest = 0, ROUND_Down = 1, ROUND_Up = 2, ROUND_Chop = 3 }; + //get pi from a real library #define PI 3.14159265358979323846 #define L2E 1.4426950408889634 diff --git a/src/platform/visualc/config.h b/src/platform/visualc/config.h index d31afd04..d9e66c87 100644 --- a/src/platform/visualc/config.h +++ b/src/platform/visualc/config.h @@ -33,6 +33,9 @@ /* Enable the FPU module, still only for beta testing */ #define C_FPU 1 +/* Define to 1 to use a x86 assembly fpu core */ +#define C_FPU_X86 1 + /* environ is defined */ #define ENVIRON_INCLUDED 1 diff --git a/visualc/dosbox.dsp b/visualc/dosbox.dsp index a01dc422..306f9f76 100644 --- a/visualc/dosbox.dsp +++ b/visualc/dosbox.dsp @@ -639,6 +639,10 @@ SOURCE=..\src\fpu\fpu_instructions.h # End Source File # Begin Source File +SOURCE=..\src\fpu\fpu_instructions_x86.h +# End Source File +# Begin Source File + SOURCE=..\src\fpu\fpu_types.h # End Source File # End Group diff --git a/visualc_net/dosbox.vcproj b/visualc_net/dosbox.vcproj index f397a0f1..cf873eaf 100644 --- a/visualc_net/dosbox.vcproj +++ b/visualc_net/dosbox.vcproj @@ -657,6 +657,9 @@ RelativePath="..\src\fpu\fpu_instructions.h"> + RelativePath="..\src\fpu\fpu_types.h">