From fd28399da212cf570b736dc4b5b4d13eff40b5bc Mon Sep 17 00:00:00 2001
From: Peter Veenstra <qbix79@users.sourceforge.net>
Date: Tue, 22 Feb 2005 13:06:07 +0000
Subject: [PATCH] New assembly x86 fpu core + fixing some bugs in the old one
 (Thanks wd)

Imported-from: https://svn.code.sf.net/p/dosbox/code-0/dosbox/trunk@2113
---
 INSTALL                        |   8 +
 configure.in                   |  18 +
 src/fpu/Makefile.am            |   3 +-
 src/fpu/fpu.cpp                | 379 ++++++---------
 src/fpu/fpu_instructions.h     | 474 +++++++++++-------
 src/fpu/fpu_instructions_x86.h | 864 +++++++++++++++++++++++++++++++++
 src/fpu/fpu_types.h            |  12 +-
 src/platform/visualc/config.h  |   3 +
 visualc/dosbox.dsp             |   4 +
 visualc_net/dosbox.vcproj      |   3 +
 10 files changed, 1377 insertions(+), 391 deletions(-)
 create mode 100644 src/fpu/fpu_instructions_x86.h

diff --git a/INSTALL b/INSTALL
index 936cde7d..70289563 100644
--- a/INSTALL
+++ b/INSTALL
@@ -50,6 +50,14 @@ In step 1 you could add the following switches:
         enables some memory increasing inlines. This greatly increases 
         compiletime for maybe a increase in speed.
 
+--disable-dynamic-x86
+        disables the dynamic cpu core. Although it's unstable it can greatly
+        improve the speed of dosbox on x86 hosts.
+
+--disable-fpu-x86
+        disables the assembly fpu core. Although relatively new the x86 fpu 
+	core has more accuracy then the regular fpu core. 
+
 Check the src subdir for the binary.
 
 
diff --git a/configure.in b/configure.in
index 53436924..ae00c5ac 100644
--- a/configure.in
+++ b/configure.in
@@ -137,6 +137,24 @@ else
   AC_MSG_RESULT(no)
 fi 
 
+AH_TEMPLATE(C_FPU_X86,[Define to 1 to use a x86 assembly fpu core])
+AC_ARG_ENABLE(fpu-x86,AC_HELP_STRING([--disable-fpu-x86],[Disable x86 assembly fpu core]),,enable_fpu_x86=yes)
+AC_MSG_CHECKING(whether x86 assembly fpu core will be enabled) 
+if test x$enable_fpu_x86 = xno ; then 
+   AC_MSG_RESULT(no)
+else
+  if test x$enable_fpu = xyes; then
+    if test x$c_hostcpu = xx86 ; then
+        AC_DEFINE(C_FPU_X86,1)
+        AC_MSG_RESULT(yes)
+    else
+        AC_MSG_RESULT(no)
+    fi
+  else
+      AC_MSG_RESULT(no)
+  fi
+fi
+
 AH_TEMPLATE(C_SSHOT,[Define to 1 to enable screenshots, requires libpng])
 AC_CHECK_HEADER(png.h,have_png_h=yes,)
 AC_CHECK_LIB(png, png_check_sig, have_png_lib=yes, ,-lz)
diff --git a/src/fpu/Makefile.am b/src/fpu/Makefile.am
index 14c2d3d4..d081a682 100644
--- a/src/fpu/Makefile.am
+++ b/src/fpu/Makefile.am
@@ -1,4 +1,5 @@
 AM_CPPFLAGS = -I$(top_srcdir)/include
 
 noinst_LIBRARIES = libfpu.a
-libfpu_a_SOURCES = fpu.cpp fpu_types.h fpu_instructions.h
\ No newline at end of file
+libfpu_a_SOURCES = fpu.cpp fpu_types.h fpu_instructions.h \
+                   fpu_instructions_x86.h
diff --git a/src/fpu/fpu.cpp b/src/fpu/fpu.cpp
index 05c5ae88..65428abf 100644
--- a/src/fpu/fpu.cpp
+++ b/src/fpu/fpu.cpp
@@ -16,7 +16,7 @@
  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
-/* $Id: fpu.cpp,v 1.24 2005-02-10 10:20:52 qbix79 Exp $ */
+/* $Id: fpu.cpp,v 1.25 2005-02-22 13:06:05 qbix79 Exp $ */
 
 #include "dosbox.h"
 #if C_FPU
@@ -31,7 +31,7 @@
 typedef PhysPt EAPoint;
 
 #define TOP fpu.top
-#define ST(i)  ( (fpu.top+ (i) ) & 7 )
+#define STV(i)  ( (fpu.top+ (i) ) & 7 )
 
 #define LoadMb(off) mem_readb(off)
 #define LoadMw(off) mem_readw(off)
@@ -44,21 +44,19 @@ typedef PhysPt EAPoint;
 #include "fpu_types.h"
 
 struct {
-	FPU_Reg    regs[9];
-	FPU_Tag    tags[9];
-	Bitu       cw;
-	FPU_Round  round;
-	Bitu       ex_mask;
-	Bitu       sw;
-	Bitu       top;
-
+	FPU_Reg		regs[9];
+	FPU_P_Reg	p_regs[9];
+	FPU_Tag		tags[9];
+	Bit16u		cw,cw_mask_all;
+	Bit16u		sw;
+	Bitu		top;
+	FPU_Round	round;
 } fpu;
 
-INLINE void FPU_SetCW(Bitu word) {
+INLINE void FPU_SetCW(Bitu word){
 	fpu.cw = word;
+	fpu.cw_mask_all = word | 0x3f;
 	fpu.round = (FPU_Round)((word >> 10) & 3);
-	// word >>8 &3 is precission
-	fpu.ex_mask = word & 0x3f;
 }
 	
 static Bit16u FPU_GetTag(void){
@@ -68,22 +66,17 @@ static Bit16u FPU_GetTag(void){
 	return tag;
 }
 
-static void FPU_SetTag(Bit16u tag)
-{
+static void FPU_SetTag(Bit16u tag){
 	for(Bitu i=0;i<8;i++)
 		fpu.tags[i]= static_cast<FPU_Tag>((tag >>(2*i))&3);
 }
 
-
-
-
 INLINE Bitu FPU_GET_TOP(void){
 	return (fpu.sw & 0x3800)>>11;
 }
 INLINE void FPU_SET_TOP(Bitu val){
 	fpu.sw &= ~0x3800;
 	fpu.sw |= (val&7)<<11;
-	return;
 }
 
 INLINE void FPU_SET_C0(Bitu C){
@@ -103,71 +96,54 @@ INLINE void FPU_SET_C3(Bitu C){
 	if(C) fpu.sw |= 0x4000;
 }
 
-INLINE Bitu FPU_GET_C0(void){
-	return (fpu.sw & 0x0100)>>8;
-}
-INLINE Bitu FPU_GET_C1(void){
-	return (fpu.sw & 0x0200)>>9;
-}
-INLINE Bitu FPU_GET_C2(void){
-	return (fpu.sw & 0x0400)>>10;
-}
-INLINE Bitu FPU_GET_C3(void){
-	return (fpu.sw & 0x4000)>>14;
-}
-
+#if C_FPU_X86
+#include "fpu_instructions_x86.h"
+#else
 #include "fpu_instructions.h"
-
-/* TODO   : ESC6normal => esc4normal+pop  or a define as well 
-*/
+#endif
 
 /* WATCHIT : ALWAYS UPDATE REGISTERS BEFORE AND AFTER USING THEM 
 			STATUS WORD =>	FPU_SET_TOP(TOP) BEFORE a read
 			TOP=FPU_GET_TOP() after a write;
 			*/
+
 static void EATREE(Bitu _rm){
 	Bitu group=(_rm >> 3) & 7;
 	/* data will allready be put in register 8 by caller */
 	switch(group){
-		case 0x00:	/* FIADD */
+		case 0x00:	/* FADD */
 			FPU_FADD(TOP, 8);
 			break;
-		case 0x01:	/* FIMUL  */
+		case 0x01:	/* FMUL  */
 			FPU_FMUL(TOP, 8);
 			break;
-		case 0x02:	/* FICOM */
+		case 0x02:	/* FCOM */
 			FPU_FCOM(TOP,8);
 			break;
-		case 0x03:  /* FICOMP */
+		case 0x03:	/* FCOMP */
 			FPU_FCOM(TOP,8);
 			FPU_FPOP();
 			break;
-		case 0x04:	/* FISUB */
+		case 0x04:	/* FSUB */
 			FPU_FSUB(TOP,8);
 			break;
-		case 0x05:  /* FISUBR */
+		case 0x05:	/* FSUBR */
 			FPU_FSUBR(TOP,8);
 			break;
-		case 0x06: /* FIDIV */
+		case 0x06:	/* FDIV */
 			FPU_FDIV(TOP, 8);
 			break;
-		case 0x07:  /* FIDIVR */
+		case 0x07:	/* FDIVR */
 			FPU_FDIVR(TOP,8);
 			break;
 		default:
 			break;
 	}
-
 }
 
 void FPU_ESC0_EA(Bitu rm,PhysPt addr) {
-	/* REGULAR TREE WITH 32 BITS REALS -> float */
-	union {
-		float f;
-		Bit32u l;
-	}	blah;
-	blah.l = mem_readd(addr);
-	fpu.regs[8].d = static_cast<double>(blah.f);
+	/* REGULAR TREE WITH 32 BITS REALS */
+	FPU_FLD_F32(addr,8);
 	EATREE(rm);
 }
 
@@ -176,33 +152,32 @@ void FPU_ESC0_Normal(Bitu rm) {
 	Bitu sub=(rm & 7);
 	switch (group){
 	case 0x00:		/* FADD ST,STi */
-		FPU_FADD(TOP,ST(sub));
+		FPU_FADD(TOP,STV(sub));
 		break;
 	case 0x01:		/* FMUL  ST,STi */
-		FPU_FMUL(TOP,ST(sub));
+		FPU_FMUL(TOP,STV(sub));
 		break;
 	case 0x02:		/* FCOM  STi */
-		FPU_FCOM(TOP,ST(sub));
+		FPU_FCOM(TOP,STV(sub));
 		break;
 	case 0x03:		/* FCOMP STi */
-		FPU_FCOM(TOP,ST(sub));
+		FPU_FCOM(TOP,STV(sub));
 		FPU_FPOP();
 		break;
 	case 0x04:		/* FSUB  ST,STi */
-		FPU_FSUB(TOP,ST(sub));
+		FPU_FSUB(TOP,STV(sub));
 		break;	
 	case 0x05:		/* FSUBR ST,STi */
-		FPU_FSUBR(TOP,ST(sub));
+		FPU_FSUBR(TOP,STV(sub));
 		break;
 	case 0x06:		/* FDIV  ST,STi */
-		FPU_FDIV(TOP,ST(sub));
+		FPU_FDIV(TOP,STV(sub));
 		break;
 	case 0x07:		/* FDIVR ST,STi */
-		FPU_FDIVR(TOP,ST(sub));
+		FPU_FDIVR(TOP,STV(sub));
 		break;
 	default:
 		break;
-
 	}
 }
 
@@ -212,40 +187,17 @@ void FPU_ESC1_EA(Bitu rm,PhysPt addr) {
 	Bitu sub=(rm & 7);
 	switch(group){
 	case 0x00: /* FLD float*/
-		{
-		union {
-			float f;
-			Bit32u l;
-		}	blah;
-		blah.l = mem_readd(addr);
-		FPU_PUSH(static_cast<double>(blah.f));
-		}
+		FPU_PREP_PUSH();
+		FPU_FLD_F32(addr,TOP);
 		break;
-
 	case 0x01: /* UNKNOWN */
 		LOG(LOG_FPU,LOG_WARN)("ESC EA 1:Unhandled group %d subfunction %d",group,sub);
 		break;
 	case 0x02: /* FST float*/
-		{
-		union {
-			float f;
-			Bit32u l;
-		}	blah;
-		//should depend on rounding method
-		blah.f = static_cast<float>(fpu.regs[TOP].d);
-		mem_writed(addr,blah.l);
-		}
+		FPU_FST_F32(addr);
 		break;
-
 	case 0x03: /* FSTP float*/
-		{
-		union {
-			float f;
-			Bit32u l;
-		}	blah;
-		blah.f = static_cast<float>(fpu.regs[TOP].d);
-		mem_writed(addr,blah.l);
-		}
+		FPU_FST_F32(addr);
 		FPU_FPOP();
 		break;
 	case 0x04: /* FLDENV */
@@ -253,7 +205,7 @@ void FPU_ESC1_EA(Bitu rm,PhysPt addr) {
 		break;
 	case 0x05: /* FLDCW */
 		{
-			Bit16u temp =mem_readw(addr);
+			Bit16u temp = mem_readw(addr);
 			FPU_SetCW(temp);
 		}
 		break;
@@ -267,7 +219,6 @@ void FPU_ESC1_EA(Bitu rm,PhysPt addr) {
 		LOG(LOG_FPU,LOG_WARN)("ESC EA 1:Unhandled group %d subfunction %d",group,sub);
 		break;
 	}
-
 }
 
 void FPU_ESC1_Normal(Bitu rm) {
@@ -275,33 +226,36 @@ void FPU_ESC1_Normal(Bitu rm) {
 	Bitu sub=(rm & 7);
 	switch (group){
 	case 0x00: /* FLD STi */
-			FPU_PUSH(fpu.regs[ST(sub)].d);
-		break;
+		{
+			Bitu reg_from=STV(sub);
+			FPU_PREP_PUSH();
+			FPU_FST(reg_from, TOP);
+			break;
+		}
 	case 0x01: /* FXCH STi */
-			FPU_FXCH(TOP,ST(sub));
+		FPU_FXCH(TOP,STV(sub));
 		break;
 	case 0x02: /* FNOP */
-			FPU_FNOP();
+		FPU_FNOP();
 		break;
 	case 0x03: /* FSTP STi */
-			FPU_FST(TOP,ST(sub));
-			FPU_FPOP();
+		FPU_FST(TOP,STV(sub));
+		FPU_FPOP();
 		break;   
 	case 0x04:
 		switch(sub){
 		case 0x00:       /* FCHS */
-			fpu.regs[TOP].d = -1.0*(fpu.regs[TOP].d);
+			FPU_FCHS();
 			break;
 		case 0x01:       /* FABS */
-			fpu.regs[TOP].d = fabs(fpu.regs[TOP].d);
+			FPU_FABS();
 			break;
 		case 0x02:       /* UNKNOWN */
 		case 0x03:       /* ILLEGAL */
 			LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub);
 			break;
 		case 0x04:       /* FTST */
-			fpu.regs[8].d=0.0;
-			FPU_FCOM(TOP,8);
+			FPU_FTST();
 			break;
 		case 0x05:       /* FXAM */
 			FPU_FXAM();
@@ -315,25 +269,25 @@ void FPU_ESC1_Normal(Bitu rm) {
 	case 0x05:
 		switch(sub){	
 		case 0x00:       /* FLD1 */
-			FPU_PUSH(1.0);
+			FPU_FLD1();
 			break;
 		case 0x01:       /* FLDL2T */
-			FPU_PUSH(L2T);
+			FPU_FLDL2T();
 			break;
 		case 0x02:       /* FLDL2E */
-			FPU_PUSH(L2E);
+			FPU_FLDL2E();
 			break;
 		case 0x03:       /* FLDPI */
-			FPU_PUSH(PI);
+			FPU_FLDPI();
 			break;
 		case 0x04:       /* FLDLG2 */
-			FPU_PUSH(LG2);
+			FPU_FLDLG2();
 			break;
 		case 0x05:       /* FLDLN2 */
-			FPU_PUSH(LN2);
+			FPU_FLDLN2();
 			break;
 		case 0x06:       /* FLDZ*/
-			FPU_PUSH_ZERO();
+			FPU_FLDZ();
 			break;
 		case 0x07:       /* ILLEGAL */
 			LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub);
@@ -342,24 +296,33 @@ void FPU_ESC1_Normal(Bitu rm) {
 		break;
 	case 0x06:
 		switch(sub){
-		case 0x00:   /* F2XM1 */
+		case 0x00:	/* F2XM1 */
 			FPU_F2XM1();
 			break;
-		case 0x01:	  /* FYL2X */
+		case 0x01:	/* FYL2X */
 			FPU_FYL2X();
 			break;
-		case 0x02:	 /* FPTAN  */
+		case 0x02:	/* FPTAN  */
 			FPU_FPTAN();
 			break;
-		case 0x03:   /* FPATAN */
+		case 0x03:	/* FPATAN */
 			FPU_FPATAN();
 			break;
-		case 0x04:  /* FXTRACT */
+		case 0x04:	/* FXTRACT */
 			FPU_FXTRACT();
 			break;
+		case 0x05:	/* FPREM1 */
+			FPU_FPREM1();
+			break;
+		case 0x06:	/* FDECSTP */
+			TOP = (TOP - 1) & 7;
+			break;
+		case 0x07:	/* FINCSTP */
+			TOP = (TOP + 1) & 7;
+			break;
 		default:
-		LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub);
-		break;
+			LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub);
+			break;
 		}
 		break;
 	case 0x07:
@@ -367,6 +330,9 @@ void FPU_ESC1_Normal(Bitu rm) {
 		case 0x00:		/* FPREM */
 			FPU_FPREM();
 			break;
+		case 0x01:		/* FYL2XP1 */
+			FPU_FYL2XP1();
+			break;
 		case 0x02:		/* FSQRT */
 			FPU_FSQRT();
 			break;
@@ -374,12 +340,7 @@ void FPU_ESC1_Normal(Bitu rm) {
 			FPU_FSINCOS();
 			break;
 		case 0x04:		/* FRNDINT */
-			{
-//TODO
-				Bit64s temp= static_cast<Bit64s>(FROUND(fpu.regs[TOP].d));
-				fpu.regs[TOP].d=static_cast<double>(temp);
-			}			
-			//TODO
+			FPU_FRNDINT();
 			break;
 		case 0x05:		/* FSCALE */
 			FPU_FSCALE();
@@ -390,7 +351,6 @@ void FPU_ESC1_Normal(Bitu rm) {
 		case 0x07:		/* FCOS */
 			FPU_FCOS();
 			break;
-		case 0x01:      /* FYL2XP1 */
 		default:
 			LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub);
 			break;
@@ -399,15 +359,12 @@ void FPU_ESC1_Normal(Bitu rm) {
 		default:
 			LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub);
 	}
-
-//	LOG(LOG_FPU,LOG_WARN)("ESC 1:Unhandled group %X subfunction %X",group,sub);
 }
 
 
 void FPU_ESC2_EA(Bitu rm,PhysPt addr) {
 	/* 32 bits integer operants */
-	Bit32s blah = mem_readd(addr);
-	fpu.regs[8].d = static_cast<Real64>(blah);
+	FPU_FLD_I32(addr,8);
 	EATREE(rm);
 }
 
@@ -417,8 +374,8 @@ void FPU_ESC2_Normal(Bitu rm) {
 	switch(group){
 	case 0x05:
 		switch(sub){
-		case 0x01:		/* FUCOMPP Almost the same as FCOMPP */
-			FPU_FCOM(TOP,ST(1));
+		case 0x01:		/* FUCOMPP */
+			FPU_FUCOM(TOP,STV(1));
 			FPU_FPOP();
 			FPU_FPOP();
 			break;
@@ -438,31 +395,26 @@ void FPU_ESC3_EA(Bitu rm,PhysPt addr) {
 	Bitu group=(rm >> 3) & 7;
 	Bitu sub=(rm & 7);
 	switch(group){
-	case 0x00:  /* FLD */
-		{
-				Bit32s blah = mem_readd(addr);
-				FPU_PUSH( static_cast<double>(blah));
-		}
+	case 0x00:	/* FILD */
+		FPU_PREP_PUSH();
+		FPU_FLD_I32(addr,TOP);
 		break;
-	case 0x01:  /* FISTTP */
+	case 0x01:	/* FISTTP */
 		LOG(LOG_FPU,LOG_WARN)("ESC 3 EA:Unhandled group %d subfunction %d",group,sub);
 		break;
-
-	case 0x02:   /* FIST */
-		mem_writed(addr,static_cast<Bit32s>(FROUND(fpu.regs[TOP].d)));
+	case 0x02:	/* FIST */
+		FPU_FST_I32(addr);
 		break;
-	case 0x03:	/*FISTP */
-		mem_writed(addr,static_cast<Bit32s>(FROUND(fpu.regs[TOP].d)));	
+	case 0x03:	/* FISTP */
+		FPU_FST_I32(addr);
 		FPU_FPOP();
 		break;
 	case 0x05:	/* FLD 80 Bits Real */
-		{
-			Real64 val = FPU_FLD80(addr);
-			FPU_PUSH(val);
-		}
+		FPU_PREP_PUSH();
+		FPU_FLD_F80(addr);
 		break;
 	case 0x07:	/* FSTP 80 Bits Real */
-		FPU_ST80(addr,TOP);
+		FPU_FST_F80(addr);
 		FPU_FPOP();
 		break;
 	default:
@@ -504,41 +456,40 @@ void FPU_ESC3_Normal(Bitu rm) {
 
 
 void FPU_ESC4_EA(Bitu rm,PhysPt addr) {
-	/* REGULAR TREE WITH 64 BITS REALS: double  */
-	fpu.regs[8].l.lower=mem_readd(addr);
-	fpu.regs[8].l.upper=mem_readd(addr+4);
+	/* REGULAR TREE WITH 64 BITS REALS */
+	FPU_FLD_F64(addr,8);
 	EATREE(rm);
 }
 
 void FPU_ESC4_Normal(Bitu rm) {
-	//LOOKS LIKE number 6 without popping*/
+	/* LOOKS LIKE number 6 without popping */
 	Bitu group=(rm >> 3) & 7;
 	Bitu sub=(rm & 7);
 	switch(group){
-	case 0x00:	/*FADDP STi,ST*/
-		FPU_FADD(ST(sub),TOP);
+	case 0x00:	/* FADD STi,ST*/
+		FPU_FADD(STV(sub),TOP);
 		break;
-	case 0x01:	/* FMULP STi,ST*/
-		FPU_FMUL(ST(sub),TOP);
+	case 0x01:	/* FMUL STi,ST*/
+		FPU_FMUL(STV(sub),TOP);
 		break;
 	case 0x02:  /* FCOM*/
-		FPU_FCOM(TOP,ST(sub));
-		break;     /* TODO IS THIS ALLRIGHT ????????? (maybe reverse operators) */
+		FPU_FCOM(TOP,STV(sub));
+		break;
 	case 0x03:  /* FCOMP*/
-		FPU_FCOM(TOP,ST(sub));
+		FPU_FCOM(TOP,STV(sub));
 		FPU_FPOP();
 		break;
 	case 0x04:  /* FSUBR STi,ST*/
-		FPU_FSUBR(ST(sub),TOP);
+		FPU_FSUBR(STV(sub),TOP);
 		break;
 	case 0x05:  /* FSUB  STi,ST*/
-		FPU_FSUB(ST(sub),TOP);
+		FPU_FSUB(STV(sub),TOP);
 		break;
 	case 0x06:  /* FDIVR STi,ST*/
-		FPU_FDIVR(ST(sub),TOP);
+		FPU_FDIVR(STV(sub),TOP);
 		break;
 	case 0x07:  /* FDIV STi,ST*/
-		FPU_FDIV(ST(sub),TOP);
+		FPU_FDIV(STV(sub),TOP);
 		break;
 	default:
 		break;
@@ -550,28 +501,21 @@ void FPU_ESC5_EA(Bitu rm,PhysPt addr) {
 	Bitu sub=(rm & 7);
 	switch(group){
 	case 0x00:  /* FLD double real*/
-		{
-				FPU_Reg blah;
-				blah.l.lower=mem_readd(addr);
-				blah.l.upper=mem_readd(addr+4);
-				FPU_PUSH(blah.d);
-		}
+		FPU_PREP_PUSH();
+		FPU_FLD_F64(addr,TOP);
 		break;
 	case 0x01:  /* FISTTP longint*/
 		LOG(LOG_FPU,LOG_WARN)("ESC 5 EA:Unhandled group %d subfunction %d",group,sub);
 		break;
-
-	case 0x02:   /* FIST double real*/
-		mem_writed(addr,fpu.regs[TOP].l.lower);
-		mem_writed(addr+4,fpu.regs[TOP].l.upper);
+	case 0x02:   /* FST double real*/
+		FPU_FST_F64(addr);
 		break;
-	case 0x03:	/*FISTP double real*/
-		mem_writed(addr,fpu.regs[TOP].l.lower);
-		mem_writed(addr+4,fpu.regs[TOP].l.upper);
+	case 0x03:	/* FSTP double real*/
+		FPU_FST_F64(addr);
 		FPU_FPOP();
 		break;
-	case 0x04:	/* FSTOR */
-		FPU_FSTOR(addr);
+	case 0x04:	/* FRSTOR */
+		FPU_FRSTOR(addr);
 		break;
 	case 0x06:	/* FSAVE */
 		FPU_FSAVE(addr);
@@ -591,23 +535,23 @@ void FPU_ESC5_Normal(Bitu rm) {
 	Bitu sub=(rm & 7);
 	switch(group){
 	case 0x00: /* FFREE STi */
-		fpu.tags[ST(sub)]=TAG_Empty;
+		fpu.tags[STV(sub)]=TAG_Empty;
 		break;
 	case 0x01: /* FXCH STi*/
-		FPU_FXCH(TOP,ST(sub));
+		FPU_FXCH(TOP,STV(sub));
 		break;
 	case 0x02: /* FST STi */
-		FPU_FST(TOP,ST(sub));
+		FPU_FST(TOP,STV(sub));
 		break;
 	case 0x03:  /* FSTP STi*/
-		FPU_FST(TOP,ST(sub));
+		FPU_FST(TOP,STV(sub));
 		FPU_FPOP();
 		break;
 	case 0x04:	/* FUCOM STi */
-		FPU_FUCOM(TOP,ST(sub));
+		FPU_FUCOM(TOP,STV(sub));
 		break;
 	case 0x05:	/*FUCOMP STi */
-		FPU_FUCOM(TOP,ST(sub));
+		FPU_FUCOM(TOP,STV(sub));
 		FPU_FPOP();
 		break;
 	default:
@@ -619,8 +563,7 @@ void FPU_ESC5_Normal(Bitu rm) {
 
 void FPU_ESC6_EA(Bitu rm,PhysPt addr) {
 	/* 16 bit (word integer) operants */
-	Bit16s blah = mem_readw(addr);
-	fpu.regs[8].d = static_cast<Real64>(blah);
+	FPU_FLD_I16(addr,8);
 	EATREE(rm);
 }
 
@@ -631,34 +574,33 @@ void FPU_ESC6_Normal(Bitu rm) {
 	Bitu sub=(rm & 7);
 	switch(group){
 	case 0x00:	/*FADDP STi,ST*/
-		FPU_FADD(ST(sub),TOP);
+		FPU_FADD(STV(sub),TOP);
 		break;
 	case 0x01:	/* FMULP STi,ST*/
-		FPU_FMUL(ST(sub),TOP);
+		FPU_FMUL(STV(sub),TOP);
 		break;
 	case 0x02:  /* FCOMP5*/
-		FPU_FCOM(TOP,ST(sub));
-		break;     /* TODO IS THIS ALLRIGHT ????????? */
-	case 0x03:  /* weird*/ /*FCOMPP*/
-		if(sub != 1){
-		LOG(LOG_FPU,LOG_WARN)("ESC 6:Unhandled group %d subfunction %d",group,sub);
-		;
-		break;
+		FPU_FCOM(TOP,STV(sub));
+		break;	/* TODO IS THIS ALLRIGHT ????????? */
+	case 0x03:  /*FCOMPP*/
+		if(sub != 1) {
+			LOG(LOG_FPU,LOG_WARN)("ESC 6:Unhandled group %d subfunction %d",group,sub);
+			return;
 		}
-		FPU_FCOM(TOP,ST(1));
+		FPU_FCOM(TOP,STV(1));
 		FPU_FPOP(); /* extra pop at the bottom*/
 		break;
 	case 0x04:  /* FSUBRP STi,ST*/
-		FPU_FSUBR(ST(sub),TOP);
+		FPU_FSUBR(STV(sub),TOP);
 		break;
 	case 0x05:  /* FSUBP  STi,ST*/
-		FPU_FSUB(ST(sub),TOP);
+		FPU_FSUB(STV(sub),TOP);
 		break;
 	case 0x06:	/* FDIVRP STi,ST*/
-		FPU_FDIVR(ST(sub),TOP);
+		FPU_FDIVR(STV(sub),TOP);
 		break;
 	case 0x07:  /* FDIVP STi,ST*/
-		FPU_FDIV(ST(sub),TOP);
+		FPU_FDIV(STV(sub),TOP);
 		break;
 	default:
 		break;
@@ -668,55 +610,39 @@ void FPU_ESC6_Normal(Bitu rm) {
 
 
 void FPU_ESC7_EA(Bitu rm,PhysPt addr) {
-	/* ROUNDING*/
-	
 	Bitu group=(rm >> 3) & 7;
 	Bitu sub=(rm & 7);
 	switch(group){
 	case 0x00:  /* FILD Bit16s */
-		{
-				Bit16s blah = mem_readw(addr);
-				FPU_PUSH( static_cast<Real64>(blah));
-		}
+		FPU_PREP_PUSH();
+		FPU_FLD_I16(addr,TOP);
 		break;
-	case 0x01:  /* FISTTP Bit16s */
+	case 0x01:
 		LOG(LOG_FPU,LOG_WARN)("ESC 7 EA:Unhandled group %d subfunction %d",group,sub);
 		break;
-
 	case 0x02:   /* FIST Bit16s */
-		mem_writew(addr,static_cast<Bit16s>(FROUND(fpu.regs[TOP].d)));
+		FPU_FST_I16(addr);
 		break;
 	case 0x03:	/* FISTP Bit16s */
-		mem_writew(addr,static_cast<Bit16s>(FROUND(fpu.regs[TOP].d)));	
+		FPU_FST_I16(addr);
 		FPU_FPOP();
 		break;
+	case 0x04:   /* FBLD packed BCD */
+		FPU_PREP_PUSH();
+		FPU_FBLD(addr,TOP);
+		break;
 	case 0x05:  /* FILD Bit64s */
-		{
-			FPU_Reg blah;
-			blah.l.lower = mem_readd(addr);
-			blah.l.upper = mem_readd(addr+4);
-			FPU_PUSH(static_cast<Real64>(blah.ll));
-		}
+		FPU_PREP_PUSH();
+		FPU_FLD_I64(addr,TOP);
 		break;
 	case 0x06:	/* FBSTP packed BCD */
 		FPU_FBST(addr);
 		FPU_FPOP();
 		break;
 	case 0x07:  /* FISTP Bit64s */
-		{
-			FPU_Reg blah;
-			blah.ll = static_cast<Bit64s>(FROUND(fpu.regs[TOP].d));
-			mem_writed(addr,blah.l.lower);
-			mem_writed(addr+4,blah.l.upper);
-		}
+		FPU_FST_I64(addr);
 		FPU_FPOP();
 		break;
-	case 0x04:   /* FBLD packed BCD */
-		{
-			Real64 in = FPU_FBLD(addr);
-			FPU_PUSH(in);
-		}
-		break;
 	default:
 		LOG(LOG_FPU,LOG_WARN)("ESC 7 EA:Unhandled group %d subfunction %d",group,sub);
 		break;
@@ -728,12 +654,12 @@ void FPU_ESC7_Normal(Bitu rm) {
 	Bitu sub=(rm & 7);
 	switch (group){
 	case 0x01: /* FXCH STi*/
-			FPU_FXCH(TOP,ST(sub));
+		FPU_FXCH(TOP,STV(sub));
 		break;
 	case 0x02:  /* FSTP STi*/
 	case 0x03:  /* FSTP STi*/
-			FPU_FST(TOP,ST(sub));
-			FPU_FPOP();
+		FPU_FST(TOP,STV(sub));
+		FPU_FPOP();
 		break;
 	case 0x04:
 		switch(sub){
@@ -742,7 +668,7 @@ void FPU_ESC7_Normal(Bitu rm) {
 				reg_ax = fpu.sw;
 				break;
 			default:
-			LOG(LOG_FPU,LOG_WARN)("ESC 7:Unhandled group %d subfunction %d",group,sub);
+				LOG(LOG_FPU,LOG_WARN)("ESC 7:Unhandled group %d subfunction %d",group,sub);
 				break;
 		}
 		break;
@@ -750,7 +676,6 @@ void FPU_ESC7_Normal(Bitu rm) {
 		LOG(LOG_FPU,LOG_WARN)("ESC 7:Unhandled group %d subfunction %d",group,sub);
 		break;
 	}
-	
 }
 
 
diff --git a/src/fpu/fpu_instructions.h b/src/fpu/fpu_instructions.h
index 0c094a41..f7dd9fb0 100644
--- a/src/fpu/fpu_instructions.h
+++ b/src/fpu/fpu_instructions.h
@@ -16,26 +16,27 @@
  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
-/* $Id: fpu_instructions.h,v 1.26 2005-02-10 10:20:52 qbix79 Exp $ */
+/* $Id: fpu_instructions.h,v 1.27 2005-02-22 13:06:06 qbix79 Exp $ */
 
 
 static void FPU_FINIT(void) {
 	FPU_SetCW(0x37F);
-	fpu.sw=0;
+	fpu.sw = 0;
 	TOP=FPU_GET_TOP();
-	fpu.tags[0]=TAG_Empty;
-	fpu.tags[1]=TAG_Empty;
-	fpu.tags[2]=TAG_Empty;
-	fpu.tags[3]=TAG_Empty;
-	fpu.tags[4]=TAG_Empty;
-	fpu.tags[5]=TAG_Empty;
-	fpu.tags[6]=TAG_Empty;
-	fpu.tags[7]=TAG_Empty;
-	fpu.tags[8]=TAG_Valid; // is only used by us
+	fpu.tags[0] = TAG_Empty;
+	fpu.tags[1] = TAG_Empty;
+	fpu.tags[2] = TAG_Empty;
+	fpu.tags[3] = TAG_Empty;
+	fpu.tags[4] = TAG_Empty;
+	fpu.tags[5] = TAG_Empty;
+	fpu.tags[6] = TAG_Empty;
+	fpu.tags[7] = TAG_Empty;
+	fpu.tags[8] = TAG_Valid; // is only used by us
 }
+
 static void FPU_FCLEX(void){
-	fpu.sw&=0x7f00;				//should clear exceptions
-};
+	fpu.sw &= 0x7f00;			//should clear exceptions
+}
 
 static void FPU_FNOP(void){
 	return;
@@ -44,15 +45,17 @@ static void FPU_FNOP(void){
 static void FPU_PUSH(double in){
 	TOP = (TOP - 1) &7;
 	//actually check if empty
-	fpu.tags[TOP]=TAG_Valid;
-	fpu.regs[TOP].d=in;
+	fpu.tags[TOP] = TAG_Valid;
+	fpu.regs[TOP].d = in;
 //	LOG(LOG_FPU,LOG_ERROR)("Pushed at %d  %g to the stack",newtop,in);
 	return;
 }
-static void FPU_PUSH_ZERO(void){
-	FPU_PUSH(0.0);
-	return; //maybe oneday needed
+
+static void FPU_PREP_PUSH(void){
+	TOP = (TOP - 1) &7;
+	fpu.tags[TOP] = TAG_Valid;
 }
+
 static void FPU_FPOP(void){
 	fpu.tags[TOP]=TAG_Empty;
 	//maybe set zero in it as well
@@ -61,6 +64,192 @@ static void FPU_FPOP(void){
 	return;
 }
 
+static double FROUND(double in){
+	switch(fpu.round){
+	case ROUND_Nearest:	
+		if (in-floor(in)>0.5) return (floor(in)+1);
+		else if (in-floor(in)<0.5) return (floor(in));
+		else return (((static_cast<Bit64s>(floor(in)))&1)!=0)?(floor(in)+1):(floor(in));
+		break;
+	case ROUND_Down:
+		return (floor(in));
+		break;
+	case ROUND_Up:
+		return (ceil(in));
+		break;
+	case ROUND_Chop:
+		return in; //the cast afterwards will do it right maybe cast here
+		break;
+	default:
+		return in;
+		break;
+	}
+}
+
+#define BIAS80 16383
+#define BIAS64 1023
+
+static Real64 FPU_FLD80(PhysPt addr) {
+	struct {
+		Bit16s begin;
+		FPU_Reg eind;
+	} test;
+	test.eind.l.lower = mem_readd(addr);
+	test.eind.l.upper = mem_readd(addr+4);
+	test.begin = mem_readw(addr+8);
+
+	Bit64s exp64 = (((test.begin&0x7fff) - BIAS80));
+	Bit64s blah = ((exp64 >0)?exp64:-exp64)&0x3ff;
+	Bit64s exp64final = ((exp64 >0)?blah:-blah) +BIAS64;
+
+	Bit64s mant64 = (test.eind.ll >> 11) & LONGTYPE(0xfffffffffffff);
+	Bit64s sign = (test.begin&0x8000)?1:0;
+	FPU_Reg result;
+	result.ll = (sign <<63)|(exp64final << 52)| mant64;
+	return result.d;   
+
+	//mant64= test.mant80/2***64    * 2 **53 
+}
+
+static void FPU_ST80(PhysPt addr,Bitu reg) {
+	struct {
+		Bit16s begin;
+		FPU_Reg eind;
+	} test;
+	Bit64s sign80 = (fpu.regs[reg].ll&LONGTYPE(0x8000000000000000))?1:0;
+	Bit64s exp80 =  fpu.regs[reg].ll&LONGTYPE(0x7ff0000000000000);
+	Bit64s exp80final = (exp80>>52) - BIAS64 + BIAS80;
+	Bit64s mant80 = fpu.regs[reg].ll&LONGTYPE(0x000fffffffffffff);
+	Bit64s mant80final = (mant80 << 11);
+	// Elvira wants the 8 and tcalc doesn't 
+	if(fpu.regs[reg].d != 0) mant80final |= LONGTYPE(0x8000000000000000);
+	test.begin= (static_cast<Bit16s>(sign80)<<15)| static_cast<Bit16s>(exp80final);
+	test.eind.ll = mant80final;
+	mem_writed(addr,test.eind.l.lower);
+	mem_writed(addr+4,test.eind.l.upper);
+	mem_writew(addr+8,test.begin);
+}
+
+
+static void FPU_FLD_F32(PhysPt addr,Bitu store_to) {
+	union {
+		float f;
+		Bit32u l;
+	}	blah;
+	blah.l = mem_readd(addr);
+	fpu.regs[store_to].d = static_cast<Real64>(blah.f);
+}
+
+static void FPU_FLD_F64(PhysPt addr,Bitu store_to) {
+	fpu.regs[store_to].l.lower = mem_readd(addr);
+	fpu.regs[store_to].l.upper = mem_readd(addr+4);
+}
+
+static void FPU_FLD_F80(PhysPt addr) {
+	fpu.regs[TOP].d = FPU_FLD80(addr);
+}
+
+static void FPU_FLD_I16(PhysPt addr,Bitu store_to) {
+	Bit16s blah = mem_readw(addr);
+	fpu.regs[store_to].d = static_cast<Real64>(blah);
+}
+
+static void FPU_FLD_I32(PhysPt addr,Bitu store_to) {
+	Bit32s blah = mem_readd(addr);
+	fpu.regs[store_to].d = static_cast<Real64>(blah);
+}
+
+static void FPU_FLD_I64(PhysPt addr,Bitu store_to) {
+	FPU_Reg blah;
+	blah.l.lower = mem_readd(addr);
+	blah.l.upper = mem_readd(addr+4);
+	fpu.regs[store_to].d = static_cast<Real64>(blah.ll);
+}
+
+static void FPU_FBLD(PhysPt addr,Bitu store_to) {
+	Bit64u val = 0;
+	Bitu in = 0;
+	Bit64u base = 1;
+	for(Bitu i = 0;i < 9;i++){
+		in = mem_readb(addr + i);
+		val += ( (in&0xf) * base); //in&0xf shouldn't be higher then 9
+		base *= 10;
+		val += ((( in>>4)&0xf) * base);
+		base *= 10;
+	}
+
+	//last number, only now convert to float in order to get
+	//the best signification
+	Real64 temp = static_cast<Real64>(val);
+	in = mem_readb(addr + 9);
+	temp += ( (in&0xf) * base );
+	if(in&0x80) temp *= -1.0;
+	fpu.regs[store_to].d = temp;
+}
+
+static void FPU_FST_F32(PhysPt addr) {
+	union {
+		float f;
+		Bit32u l;
+	}	blah;
+	//should depend on rounding method
+	blah.f = static_cast<float>(fpu.regs[TOP].d);
+	mem_writed(addr,blah.l);
+}
+
+static void FPU_FST_F64(PhysPt addr) {
+	mem_writed(addr,fpu.regs[TOP].l.lower);
+	mem_writed(addr+4,fpu.regs[TOP].l.upper);
+}
+
+static void FPU_FST_F80(PhysPt addr) {
+	FPU_ST80(addr,TOP);
+}
+
+static void FPU_FST_I16(PhysPt addr) {
+	mem_writew(addr,static_cast<Bit16s>(FROUND(fpu.regs[TOP].d)));
+}
+
+static void FPU_FST_I32(PhysPt addr) {
+	mem_writed(addr,static_cast<Bit32s>(FROUND(fpu.regs[TOP].d)));
+}
+
+static void FPU_FST_I64(PhysPt addr) {
+	FPU_Reg blah;
+	blah.ll = static_cast<Bit64s>(FROUND(fpu.regs[TOP].d));
+	mem_writed(addr,blah.l.lower);
+	mem_writed(addr+4,blah.l.upper);
+}
+
+static void FPU_FBST(PhysPt addr) {
+	FPU_Reg val = fpu.regs[TOP];
+	bool sign = false;
+	if(val.d<0.0){ //sign
+		sign=true;
+		val.d=-val.d;
+	}
+	//numbers from back to front
+	Real64 temp=val.d;
+	Bitu p;
+	for(Bitu i=0;i<9;i++){
+		val.d=temp;
+		temp = static_cast<Real64>(static_cast<Bit64s>(floor(val.d/10.0)));
+		p = static_cast<Bitu>(val.d - 10.0*temp);  
+		val.d=temp;
+		temp = static_cast<Real64>(static_cast<Bit64s>(floor(val.d/10.0)));
+		p |= (static_cast<Bitu>(val.d - 10.0*temp)<<4);
+
+		mem_writeb(addr+i,p);
+	}
+	val.d=temp;
+	temp = static_cast<Real64>(static_cast<Bit64s>(floor(val.d/10.0)));
+	p = static_cast<Bitu>(val.d - 10.0*temp);
+	if(sign)
+		p|=0x80;
+	mem_writeb(addr+9,p);
+}
+
+
 static void FPU_FADD(Bitu op1, Bitu op2){
 	fpu.regs[op1].d+=fpu.regs[op2].d;
 	//flags and such :)
@@ -96,9 +285,8 @@ static void FPU_FSQRT(void){
 	return;
 }
 static void FPU_FPATAN(void){
-	fpu.regs[ST(1)].d = atan2(fpu.regs[ST(1)].d,fpu.regs[TOP].d);
+	fpu.regs[STV(1)].d = atan2(fpu.regs[STV(1)].d,fpu.regs[TOP].d);
 	FPU_FPOP();
-	FPU_SET_C2(0);
 	//flags and such :)
 	return;
 }
@@ -154,9 +342,9 @@ static void FPU_FST(Bitu st, Bitu other){
 }
 
 
-
 static void FPU_FCOM(Bitu st, Bitu other){
-	if((fpu.tags[st] != TAG_Valid) || (fpu.tags[other] != TAG_Valid)){
+	if(((fpu.tags[st] != TAG_Valid) && (fpu.tags[st] != TAG_Zero)) || 
+		((fpu.tags[other] != TAG_Valid) && (fpu.tags[other] != TAG_Zero))){
 		FPU_SET_C3(1);FPU_SET_C2(1);FPU_SET_C0(1);return;
 	}
 	if(fpu.regs[st].d == fpu.regs[other].d){
@@ -174,31 +362,14 @@ static void FPU_FUCOM(Bitu st, Bitu other){
 	FPU_FCOM(st,other);
 }
 
-static double FROUND(double in){
-	switch(fpu.round){
-	case ROUND_Nearest:	
-		if (in-floor(in)>0.5) return (floor(in)+1);
-		else if (in-floor(in)<0.5) return (floor(in));
-		else return (((static_cast<Bit64s>(floor(in)))&1)!=0)?(floor(in)+1):(floor(in));
-		break;
-	case ROUND_Down:
-		return (floor(in));
-		break;
-	case ROUND_Up:
-		return (ceil(in));
-		break;
-	case ROUND_Chop:
-		return in; //the cast afterwards will do it right maybe cast here
-		break;
-	default:
-		return in;
-		break;
-	}
+static void FPU_FRNDINT(void){
+	Bit64s temp= static_cast<Bit64s>(FROUND(fpu.regs[TOP].d));
+	fpu.regs[TOP].d=static_cast<double>(temp);
 }
 
 static void FPU_FPREM(void){
 	Real64 valtop = fpu.regs[TOP].d;
-	Real64 valdiv = fpu.regs[ST(1)].d;
+	Real64 valdiv = fpu.regs[STV(1)].d;
 	Bit64s ressaved = static_cast<Bit64s>( (valtop/valdiv) );
 // Some backups
 //	Real64 res=valtop - ressaved*valdiv; 
@@ -210,143 +381,71 @@ static void FPU_FPREM(void){
 	FPU_SET_C2(0);
 }
 
+static void FPU_FPREM1(void){
+	Real64 valtop = fpu.regs[TOP].d;
+	Real64 valdiv = fpu.regs[STV(1)].d;
+	double quot = valtop/valdiv;
+	double quotf = floor(quot);
+	Bit64s ressaved;
+	if (quot-quotf>0.5) ressaved = static_cast<Bit64s>(quotf+1);
+	else if (quot-quotf<0.5) ressaved = static_cast<Bit64s>(quotf);
+	else ressaved = static_cast<Bit64s>((((static_cast<Bit64s>(quotf))&1)!=0)?(quotf+1):(quotf));
+	fpu.regs[TOP].d = valtop - ressaved*valdiv;
+	FPU_SET_C0(static_cast<Bitu>(ressaved&4));
+	FPU_SET_C3(static_cast<Bitu>(ressaved&2));
+	FPU_SET_C1(static_cast<Bitu>(ressaved&1));
+	FPU_SET_C2(0);
+}
+
 static void FPU_FXAM(void){
-	if(fpu.tags[TOP] == TAG_Empty)
-	{
-		FPU_SET_C3(1);FPU_SET_C0(1);
-		return;
-	}
 	if(fpu.regs[TOP].ll & LONGTYPE(0x8000000000000000))	//sign
 	{ 
 		FPU_SET_C1(1);
-	}
-	else
+	} 
+	else 
 	{
 		FPU_SET_C1(0);
 	}
+	if(fpu.tags[TOP] == TAG_Empty)
+	{
+		FPU_SET_C3(1);FPU_SET_C2(0);FPU_SET_C0(1);
+		return;
+	}
 	if(fpu.regs[TOP].d == 0.0)		//zero or normalized number.
 	{ 
 		FPU_SET_C3(1);FPU_SET_C2(0);FPU_SET_C0(0);
 	}
-	else{
+	else
+	{
 		FPU_SET_C3(0);FPU_SET_C2(1);FPU_SET_C0(0);
 	}
 }
 
-static void FPU_FBST(PhysPt addr)
-{
-	FPU_Reg val = fpu.regs[TOP];
-	bool sign = false;
-	if(val.d<0.0){ //sign
-		sign=true;
-		val.d=-val.d;
-	}
-	//numbers from back to front
-	Real64 temp=val.d;
-	Bitu p;
-	for(Bitu i=0;i<9;i++){
-		val.d=temp;
-		temp = static_cast<Real64>(static_cast<Bit64s>(floor(val.d/10.0)));
-		p = static_cast<Bitu>(val.d - 10.0*temp);  
-		val.d=temp;
-		temp = static_cast<Real64>(static_cast<Bit64s>(floor(val.d/10.0)));
-		p |= (static_cast<Bitu>(val.d - 10.0*temp)<<4);
-
-		mem_writeb(addr+i,p);
-	}
-	val.d=temp;
-	temp = static_cast<Real64>(static_cast<Bit64s>(floor(val.d/10.0)));
-	p = static_cast<Bitu>(val.d - 10.0*temp);
-	if(sign)
-		p|=0x80;
-	mem_writeb(addr+9,p);
-}
-
-static Real64 FPU_FBLD(PhysPt addr)
-{
-	Bit64u val = 0;
-	Bitu in = 0;
-	Bit64u base = 1;
-	for(Bitu i = 0;i < 9;i++){
-		in = mem_readb(addr + i);
-		val += ( (in&0xf) * base); //in&0xf shouldn't be higher then 9
-		base *= 10;
-		val += ((( in>>4)&0xf) * base);
-		base *= 10;
-	}
-
-	//last number, only now convert to float in order to get
-	//the best signification
-	Real64 temp = static_cast<Real64>(val);
-	in = mem_readb(addr + 9);
-	temp += ( (in&0xf) * base );
-	if(in&0x80) temp *= -1.0;
-	return temp;
-}
-
-
-#define BIAS80 16383
-#define BIAS64 1023
-
-static Real64 FPU_FLD80(PhysPt addr)
-{
-	struct{
-		Bit16s begin;
-		FPU_Reg eind;
-	} test;
-	test.eind.l.lower=mem_readd(addr);
-	test.eind.l.upper =mem_readd(addr+4);
-	test.begin=mem_readw(addr+8);
-
-	Bit64s exp64= (((test.begin & 0x7fff) - BIAS80));
-	Bit64s blah= ((exp64 >0)?exp64:-exp64)&0x3ff;
-	Bit64s exp64final= ((exp64 >0)?blah:-blah) +BIAS64;
-
-	Bit64s mant64= (test.eind.ll >> 11) & LONGTYPE(0xfffffffffffff);
-	Bit64s sign = (test.begin &0x8000)?1:0;
-	FPU_Reg result;
-	result.ll= (sign <<63)|(exp64final << 52)| mant64;
-	return result.d;   
-
-	//mant64= test.mant80/2***64    * 2 **53 
-}
-
-static void FPU_ST80(PhysPt addr,Bitu reg)
-{
-	struct{
-		Bit16s begin;
-		FPU_Reg eind;
-	} test;
-	Bit64s sign80= (fpu.regs[reg].ll&LONGTYPE(0x8000000000000000))?1:0;
-	Bit64s exp80 =  fpu.regs[reg].ll&LONGTYPE(0x7ff0000000000000);
-	Bit64s exp80final= (exp80>>52) - BIAS64 + BIAS80;
-	Bit64s mant80 = fpu.regs[reg].ll&LONGTYPE(0x000fffffffffffff);
-	Bit64s mant80final= (mant80 << 11);
-	// Elvira wants the 8 and tcalc doesn't 
-	if(fpu.regs[reg].d != 0) mant80final |= LONGTYPE(0x8000000000000000);
-	test.begin= (static_cast<Bit16s>(sign80)<<15)| static_cast<Bit16s>(exp80final);
-	test.eind.ll=mant80final;
-	mem_writed(addr,test.eind.l.lower);
-	mem_writed(addr+4,test.eind.l.upper);
-	mem_writew(addr+8,test.begin);
-}
 
 static void FPU_F2XM1(void){
-	fpu.regs[TOP].d=pow(2.0,fpu.regs[TOP].d) -1;
+	fpu.regs[TOP].d = pow(2.0,fpu.regs[TOP].d) - 1;
 	return;
 }
 
 static void FPU_FYL2X(void){
-	fpu.regs[ST(1)].d*=log(fpu.regs[TOP].d)/log(static_cast<Real64>(2.0));
+	fpu.regs[STV(1)].d*=log(fpu.regs[TOP].d)/log(static_cast<Real64>(2.0));
 	FPU_FPOP();
 	return;
 }
+
+static void FPU_FYL2XP1(void){
+	fpu.regs[STV(1)].d*=log(fpu.regs[TOP].d+1.0)/log(static_cast<Real64>(2.0));
+	FPU_FPOP();
+	return;
+}
+
 static void FPU_FSCALE(void){
-	fpu.regs[TOP].d *= pow(2.0,static_cast<Real64>(static_cast<Bit64s>(fpu.regs[ST(1)].d)));
+	fpu.regs[TOP].d *= pow(2.0,static_cast<Real64>(static_cast<Bit64s>(fpu.regs[STV(1)].d)));
 	return; //2^x where x is chopped.
 }
 
 static void FPU_FSTENV(PhysPt addr){
+	FPU_SET_TOP(TOP);
 	if(!cpu.code.big) {
 		mem_writew(addr+0,static_cast<Bit16u>(fpu.cw));
 		mem_writew(addr+2,static_cast<Bit16u>(fpu.sw));
@@ -368,29 +467,31 @@ static void FPU_FLDENV(PhysPt addr){
 		tag    = mem_readw(addr+4);
 	} else { 
 		cw     = mem_readd(addr+0);
-		fpu.sw = mem_readd(addr+4);
+		fpu.sw = (Bit16u)mem_readd(addr+4);
 		tagbig = mem_readd(addr+8);
 		tag    = static_cast<Bit16u>(tagbig);
 	}
 	FPU_SetTag(tag);
 	FPU_SetCW(cw);
+	TOP = FPU_GET_TOP();
 }
 
 static void FPU_FSAVE(PhysPt addr){
 	FPU_FSTENV(addr);
-	Bitu start=(cpu.code.big?28:14);
-	for(Bitu i=0;i<8;i++){
-		FPU_ST80(addr+start,i);
-		start+=10;
+	Bitu start = (cpu.code.big?28:14);
+	for(Bitu i = 0;i < 8;i++){
+		FPU_ST80(addr+start,STV(i));
+		start += 10;
 	}
+	FPU_FINIT();
 }
 
-static void FPU_FSTOR(PhysPt addr){
+static void FPU_FRSTOR(PhysPt addr){
 	FPU_FLDENV(addr);
-	Bitu start=(cpu.code.big?28:14);
-	for(Bitu i=0;i<8;i++){
-		fpu.regs[i].d=FPU_FLD80(addr+start);
-		start+=10;
+	Bitu start = (cpu.code.big?28:14);
+	for(Bitu i = 0;i < 8;i++){
+		fpu.regs[STV(i)].d = FPU_FLD80(addr+start);
+		start += 10;
 	}
 }
 
@@ -403,6 +504,55 @@ static void FPU_FXTRACT(void) {
 	Bit64s exp80 =  test.ll&LONGTYPE(0x7ff0000000000000);
 	Bit64s exp80final = (exp80>>52) - BIAS64;
 	Real64 mant = test.d / (pow(2.0,static_cast<Real64>(exp80final)));
-	fpu.regs[TOP].d=exp80final;
+	fpu.regs[TOP].d = static_cast<Real64>(exp80final);
 	FPU_PUSH(mant); 
 }
+
+static void FPU_FCHS(void){
+	fpu.regs[TOP].d = -1.0*(fpu.regs[TOP].d);
+}
+
+static void FPU_FABS(void){
+	fpu.regs[TOP].d = fabs(fpu.regs[TOP].d);
+}
+
+static void FPU_FTST(void){
+	fpu.regs[8].d = 0.0;
+	FPU_FCOM(TOP,8);
+}
+
+static void FPU_FLD1(void){
+	FPU_PREP_PUSH();
+	fpu.regs[TOP].d = 1.0;
+}
+
+static void FPU_FLDL2T(void){
+	FPU_PREP_PUSH();
+	fpu.regs[TOP].d = L2T;
+}
+
+static void FPU_FLDL2E(void){
+	FPU_PREP_PUSH();
+	fpu.regs[TOP].d = L2E;
+}
+
+static void FPU_FLDPI(void){
+	FPU_PREP_PUSH();
+	fpu.regs[TOP].d = PI;
+}
+
+static void FPU_FLDLG2(void){
+	FPU_PREP_PUSH();
+	fpu.regs[TOP].d = LG2;
+}
+
+static void FPU_FLDLN2(void){
+	FPU_PREP_PUSH();
+	fpu.regs[TOP].d = LN2;
+}
+
+static void FPU_FLDZ(void){
+	FPU_PREP_PUSH();
+	fpu.regs[TOP].d = 0.0;
+	fpu.tags[TOP] = TAG_Zero;
+}
diff --git a/src/fpu/fpu_instructions_x86.h b/src/fpu/fpu_instructions_x86.h
new file mode 100644
index 00000000..a10cd162
--- /dev/null
+++ b/src/fpu/fpu_instructions_x86.h
@@ -0,0 +1,864 @@
+/*
+ *  Copyright (C) 2002-2005  The DOSBox Team
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/* $Id: fpu_instructions_x86.h,v 1.1 2005-02-22 13:06:06 qbix79 Exp $ */
+
+
+#if defined (_MSC_VER)
+
+#define FPUD_LOAD(op,szI,szA)			\
+		Bit16u new_sw;					\
+		__asm {							\
+		__asm	mov		eax, 8			\
+		__asm	shl		eax, 4			\
+		__asm	mov		ebx, store_to	\
+		__asm	shl		ebx, 4			\
+		__asm	fclex					\
+		__asm	op		szI PTR fpu.p_regs[eax].m1		\
+		__asm	fnstsw	new_sw			\
+		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
+		}								\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+#define FPUD_STORE(op,szI,szA)				\
+		Bit16u new_sw,save_cw;				\
+		__asm {								\
+		__asm	fnstcw	save_cw				\
+		__asm	fldcw	fpu.cw_mask_all		\
+		__asm	mov		eax, TOP			\
+		__asm	shl		eax, 4				\
+		__asm	mov		ebx, 8				\
+		__asm	shl		ebx, 4				\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fclex						\
+		__asm	op		szI PTR fpu.p_regs[ebx].m1		\
+		__asm	fnstsw	new_sw				\
+		__asm	fldcw	save_cw				\
+		}									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fsin,fcos,f2xm1,fchs,fabs
+#define FPUD_TRIG(op)				\
+		Bit16u new_sw;				\
+		__asm {						\
+		__asm	mov		eax, TOP	\
+		__asm	shl		eax, 4		\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fclex				\
+		__asm	op					\
+		__asm	fnstsw	new_sw		\
+		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
+		}							\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fsincos
+#define FPUD_SINCOS						\
+		Bit16u new_sw;					\
+		__asm {							\
+		__asm	mov		eax, TOP		\
+		__asm	mov		ebx, eax		\
+		__asm	dec     ebx				\
+		__asm	and     ebx, 7			\
+		__asm	shl		eax, 4			\
+		__asm	shl		ebx, 4			\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fclex					\
+		__asm	fsincos					\
+		__asm	fnstsw	new_sw			\
+		__asm	mov		cx, new_sw		\
+		__asm	and		ch, 0x04 		\
+		__asm	jnz		argument_too_large1				\
+		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
+		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	jmp		end_sincos		\
+		__asm	argument_too_large1:	\
+		__asm	fstp	st(0)			\
+		__asm	end_sincos:				\
+		}												\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);			\
+		if ((new_sw&0x0400)==0) FPU_PREP_PUSH();
+
+// handles fptan
+#define FPUD_PTAN						\
+		Bit16u new_sw;					\
+		__asm {							\
+		__asm	mov		eax, TOP		\
+		__asm	mov		ebx, eax		\
+		__asm	dec     ebx				\
+		__asm	and     ebx, 7			\
+		__asm	shl		eax, 4			\
+		__asm	shl		ebx, 4			\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fclex					\
+		__asm	fptan					\
+		__asm	fnstsw	new_sw			\
+		__asm	mov		cx, new_sw		\
+		__asm	and		ch, 0x04 		\
+		__asm	jnz		argument_too_large2				\
+		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
+		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	jmp		end_ptan		\
+		__asm	argument_too_large2:	\
+		__asm	fstp	st(0)			\
+		__asm	end_ptan:				\
+		}												\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);			\
+		if ((new_sw&0x0400)==0) FPU_PREP_PUSH();
+
+// handles fxtract
+#define FPUD_XTRACT						\
+		Bit16u new_sw;					\
+		__asm {							\
+		__asm	mov		eax, TOP		\
+		__asm	mov		ebx, eax		\
+		__asm	dec     ebx				\
+		__asm	and     ebx, 7			\
+		__asm	shl		eax, 4			\
+		__asm	shl		ebx, 4			\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fclex					\
+		__asm	fxtract					\
+		__asm	fnstsw	new_sw			\
+		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
+		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
+		}												\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);			\
+		FPU_PREP_PUSH();
+
+// handles fadd,fmul,fsub,fsubr,fdiv,fdivr
+#define FPUD_ARITH1(op)						\
+		Bit16u new_sw,save_cw;				\
+		__asm {								\
+		__asm	fnstcw	save_cw				\
+		__asm	fldcw	fpu.cw_mask_all		\
+		__asm	mov		eax, op1			\
+		__asm	shl		eax, 4				\
+		__asm	mov		ebx, op2			\
+		__asm	shl		ebx, 4				\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
+		__asm	fclex						\
+		__asm	op		st(1), st(0)		\
+		__asm	fnstsw	new_sw				\
+		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
+		__asm	fldcw	save_cw				\
+		}									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fsqrt,frndint
+#define FPUD_ARITH2(op)						\
+		Bit16u new_sw,save_cw;				\
+		__asm {								\
+		__asm	fnstcw	save_cw				\
+		__asm	fldcw	fpu.cw_mask_all		\
+		__asm	mov		eax, TOP			\
+		__asm	shl		eax, 4				\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fclex						\
+		__asm	op							\
+		__asm	fnstsw	new_sw				\
+		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
+		__asm	fldcw	save_cw				\
+		}									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fprem,fprem1,fscale
+#define FPUD_REMINDER(op)			\
+		Bit16u new_sw;				\
+		__asm {						\
+		__asm	mov		eax, TOP	\
+		__asm	mov		ebx, eax	\
+		__asm	inc     ebx			\
+		__asm	and     ebx, 7		\
+		__asm	shl		ebx, 4		\
+		__asm	shl		eax, 4		\
+		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fclex				\
+		__asm	op					\
+		__asm	fnstsw	new_sw		\
+		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fstp	st(0)		\
+		}							\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fcom,fucom
+#define FPUD_COMPARE(op)			\
+		Bit16u new_sw;				\
+		__asm {						\
+		__asm	mov		ebx, op2	\
+		__asm	shl		ebx, 4		\
+		__asm	mov		eax, op1	\
+		__asm	shl		eax, 4		\
+		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fclex				\
+		__asm	op					\
+		__asm	fnstsw	new_sw		\
+		}							\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fxam,ftst
+#define FPUD_EXAMINE(op)			\
+		Bit16u new_sw;				\
+		__asm {						\
+		__asm	mov		eax, TOP	\
+		__asm	shl		eax, 4		\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fclex				\
+		__asm	op					\
+		__asm	fnstsw	new_sw		\
+		__asm	fstp	st(0)		\
+		}							\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fpatan,fyl2x,fyl2xp1
+#define FPUD_WITH_POP(op)			\
+		Bit16u new_sw;				\
+		__asm {						\
+		__asm	mov		eax, TOP	\
+		__asm	mov		ebx, eax	\
+		__asm	inc     ebx			\
+		__asm	and     ebx, 7		\
+		__asm	shl		ebx, 4		\
+		__asm	shl		eax, 4		\
+		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fclex				\
+		__asm	op					\
+		__asm	fnstsw	new_sw		\
+		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
+		}								\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);	\
+		FPU_FPOP();
+
+// load math constants
+#define FPUD_LOAD_CONST(op)			\
+		Bit16u new_sw;				\
+		FPU_PREP_PUSH();			\
+		__asm {						\
+		__asm	mov		eax, TOP	\
+		__asm	shl		eax, 4		\
+		__asm	fclex				\
+		__asm	op					\
+		__asm	fnstsw	new_sw		\
+		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
+		}							\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+#else
+
+#define FPUD_LOAD(op,szI,szA)				\
+		Bit16u new_sw;						\
+		__asm__ volatile (					\
+			"movl		$8, %%eax		\n"	\
+			"shl		$4, %%eax		\n"	\
+			"shl		$4, %1			\n"	\
+			"fclex						\n"	\
+			#op #szA "	(%2, %%eax)		\n"	\
+			"fnstsw		%0				\n"	\
+			"fstpt		(%2, %1)		"	\
+			:	"=m" (new_sw)				\
+			:	"r" (store_to), "r" (fpu.p_regs)	\
+			:	"eax", "memory"						\
+		);									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+#define FPUD_STORE(op,szI,szA)				\
+		Bit16u new_sw,save_cw;				\
+		__asm__ volatile (					\
+			"fnstcw		%1				\n"	\
+			"fldcw		%4				\n"	\
+			"shll		$4, %2			\n"	\
+			"movl		$8, %%eax		\n"	\
+			"shl		$4, %%eax		\n"	\
+			"fldt		(%3, %2)		\n"	\
+			"fclex						\n"	\
+			#op #szA "	(%3, %%eax)		\n"	\
+			"fnstsw		%0				\n"	\
+			"fldcw		%1				"	\
+			:	"=m" (new_sw), "=m" (save_cw)	\
+			:	"r" (TOP), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
+			:	"eax", "memory"						\
+		);										\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fsin,fcos,f2xm1,fchs,fabs
+#define FPUD_TRIG(op)						\
+		Bit16u new_sw;						\
+		__asm__ volatile (					\
+			"shll		$4, %1			\n"	\
+			"fldt		(%2, %1)		\n"	\
+			"fclex						\n"	\
+			#op" 						\n"	\
+			"fnstsw		%0				\n"	\
+			"fstpt		(%2, %1)		"	\
+			:	"=m" (new_sw)				\
+			:	"r" (TOP), "r" (fpu.p_regs)	\
+			:	"memory"				\
+		);									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fsincos
+#define FPUD_SINCOS							\
+		Bit16u new_sw;						\
+		__asm__ volatile (					\
+			"movl		%1, %%eax		\n"	\
+			"shll		$4, %1			\n"	\
+			"decl		%%eax			\n"	\
+			"andl		$7, %%eax		\n"	\
+			"shll		$4, %%eax		\n"	\
+			"fldt		(%2, %1)		\n"	\
+			"fclex						\n"	\
+			"fsincos					\n"	\
+			"fnstsw		%0				\n"	\
+			"fstpt		(%2, %%eax)		\n"	\
+			"movw		%0, %%ax		\n"	\
+			"sahf						\n"	\
+			"jp			argument_too_large1		\n"	\
+			"fstpt		(%2, %1)		\n"	\
+			"argument_too_large1:		"	\
+			:	"=m" (new_sw)				\
+			:	"r" (TOP), "r" (fpu.p_regs)	\
+			:	"eax", "cc", "memory"		\
+		);									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
+		if ((new_sw&0x0400)==0) FPU_PREP_PUSH();
+
+// handles fptan
+#define FPUD_PTAN							\
+		Bit16u new_sw;						\
+		__asm__ volatile (					\
+			"movl		%1, %%eax		\n"	\
+			"shll		$4, %1			\n"	\
+			"decl		%%eax			\n"	\
+			"andl		$7, %%eax		\n"	\
+			"shll		$4, %%eax		\n"	\
+			"fldt		(%2, %1)		\n"	\
+			"fclex						\n"	\
+			"fptan 						\n"	\
+			"fnstsw		%0				\n"	\
+			"fstpt		(%2, %%eax)		\n"	\
+			"movw		%0, %%ax		\n"	\
+			"sahf						\n"	\
+			"jp			argument_too_large2		\n"	\
+			"fstpt		(%2, %1)		\n"	\
+			"argument_too_large2:		"	\
+			:	"=m" (new_sw)				\
+			:	"r" (TOP), "r" (fpu.p_regs)	\
+			:	"eax", "cc", "memory"		\
+		);									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
+		if ((new_sw&0x0400)==0) FPU_PREP_PUSH();
+
+// handles fxtract
+#define FPUD_XTRACT						\
+		Bit16u new_sw;						\
+		__asm__ volatile (					\
+			"movl		%1, %%eax		\n"	\
+			"shll		$4, %1			\n"	\
+			"decl		%%eax			\n"	\
+			"andl		$7, %%eax		\n"	\
+			"shll		$4, %%eax		\n"	\
+			"fldt		(%2, %1)		\n"	\
+			"fclex						\n"	\
+			"fxtract					\n"	\
+			"fnstsw		%0				\n"	\
+			"fstpt		(%2, %%eax)		\n"	\
+			"fstpt		(%2, %1)		"	\
+			:	"=m" (new_sw)				\
+			:	"r" (TOP), "r" (fpu.p_regs)	\
+			:	"eax", "memory"						\
+		);									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
+		FPU_PREP_PUSH();
+
+// handles fadd,fmul,fsub,fsubr,fdiv,fdivr
+#define FPUD_ARITH1(op)						\
+		Bit16u new_sw,save_cw;				\
+		__asm__ volatile (					\
+			"fnstcw		%1				\n"	\
+			"fldcw		%5				\n"	\
+			"shll		$4, %3			\n"	\
+			"shll		$4, %2			\n"	\
+			"fldt		(%4, %3)		\n"	\
+			"fldt		(%4, %2)		\n"	\
+			"fclex						\n"	\
+			#op"						\n"	\
+			"fnstsw		%0				\n"	\
+			"fstpt		(%4, %2)		\n"	\
+			"fldcw		%1				"	\
+			:	"=m" (new_sw), "=m" (save_cw)		\
+			:	"r" (op1), "r" (op2), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
+			:	"memory"				\
+		);									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fsqrt,frndint
+#define FPUD_ARITH2(op)						\
+		Bit16u new_sw,save_cw;				\
+		__asm__ volatile (					\
+			"fnstcw		%1				\n"	\
+			"fldcw		%4				\n"	\
+			"shll		$4, %2			\n"	\
+			"fldt		(%3, %2)		\n"	\
+			"fclex						\n"	\
+			#op" 						\n"	\
+			"fnstsw		%0				\n"	\
+			"fstpt		(%3, %2)		\n"	\
+			"fldcw		%1				"	\
+			:	"=m" (new_sw), "=m" (save_cw)	\
+			:	"r" (TOP), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
+			:	"memory"				\
+		);										\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fprem,fprem1,fscale
+#define FPUD_REMINDER(op)					\
+		Bit16u new_sw;						\
+		__asm__ volatile (					\
+			"movl		%1, %%eax		\n"	\
+			"incl		%%eax			\n"	\
+			"andl		$7, %%eax		\n"	\
+			"shll		$4, %%eax		\n"	\
+			"shll		$4, %1			\n"	\
+			"fldt		(%2, %%eax)		\n"	\
+			"fldt		(%2, %1)		\n"	\
+			"fclex						\n"	\
+			#op" 						\n"	\
+			"fnstsw		%0				\n"	\
+			"fstpt		(%2, %1)		\n"	\
+			"fstp		%%st(0)			"	\
+			:	"=m" (new_sw)				\
+			:	"r" (TOP), "r" (fpu.p_regs)	\
+			:	"eax", "memory"						\
+		);									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fcom,fucom
+#define FPUD_COMPARE(op)					\
+		Bit16u new_sw;						\
+		__asm__ volatile (					\
+			"shll		$4, %2			\n"	\
+			"shll		$4, %1			\n"	\
+			"fldt		(%3, %2)		\n"	\
+			"fldt		(%3, %1)		\n"	\
+			"fclex						\n"	\
+			#op" 						\n"	\
+			"fnstsw		%0				"	\
+			:	"=m" (new_sw)				\
+			:	"r" (op1), "r" (op2), "r" (fpu.p_regs) 		\
+			:	"memory"				\
+		);									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fxam,ftst
+#define FPUD_EXAMINE(op)					\
+		Bit16u new_sw;						\
+		__asm__ volatile (					\
+			"shll		$4, %1			\n"	\
+			"fldt		(%2, %1)		\n"	\
+			"fclex						\n"	\
+			#op" 						\n"	\
+			"fnstsw		%0				\n"	\
+			"fstp		%%st(0)			"	\
+			:	"=m" (new_sw)				\
+			:	"r" (TOP), "r" (fpu.p_regs)	\
+			:	"memory"				\
+		);									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+
+// handles fpatan,fyl2x,fyl2xp1
+#define FPUD_WITH_POP(op)					\
+		Bit16u new_sw;						\
+		__asm__ volatile (					\
+			"movl		%1, %%eax		\n"	\
+			"incl		%%eax			\n"	\
+			"andl		$7, %%eax		\n"	\
+			"shll		$4, %%eax		\n"	\
+			"shll		$4, %1			\n"	\
+			"fldt		(%2, %%eax)		\n"	\
+			"fldt		(%2, %1)		\n"	\
+			"fclex						\n"	\
+			#op" 						\n"	\
+			"fnstsw		%0				\n"	\
+			"fstpt		(%2, %%eax)		\n"	\
+			:	"=m" (new_sw)				\
+			:	"r" (TOP), "r" (fpu.p_regs)	\
+			:	"eax", "memory"						\
+		);									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
+		FPU_FPOP();
+
+// load math constants
+#define FPUD_LOAD_CONST(op)					\
+		Bit16u new_sw;						\
+		FPU_PREP_PUSH();					\
+		__asm__ volatile (					\
+			"shll		$4, %1			\n"	\
+			"fclex						\n"	\
+			#op" 						\n"	\
+			"fnstsw		%0				\n"	\
+			"fstpt		(%2, %1)		\n"	\
+			:	"=m" (new_sw)				\
+			:	"r" (TOP), "r" (fpu.p_regs)	\
+			:	"memory"				\
+		);									\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
+
+#endif
+
+static void FPU_FINIT(void) {
+	FPU_SetCW(0x37F);
+	fpu.sw=0;
+	TOP=FPU_GET_TOP();
+	fpu.tags[0]=TAG_Empty;
+	fpu.tags[1]=TAG_Empty;
+	fpu.tags[2]=TAG_Empty;
+	fpu.tags[3]=TAG_Empty;
+	fpu.tags[4]=TAG_Empty;
+	fpu.tags[5]=TAG_Empty;
+	fpu.tags[6]=TAG_Empty;
+	fpu.tags[7]=TAG_Empty;
+	fpu.tags[8]=TAG_Valid; // is only used by us
+}
+
+static void FPU_FCLEX(void){
+	fpu.sw&=0x7f00;				//should clear exceptions
+}
+
+static void FPU_FNOP(void){
+}
+
+static void FPU_PREP_PUSH(void){
+	TOP = (TOP - 1) &7;
+	fpu.tags[TOP]=TAG_Valid;
+}
+
+static void FPU_FPOP(void){
+	fpu.tags[TOP]=TAG_Empty;
+	TOP = ((TOP+1)&7);
+}
+
+static void FPU_FLD_F32(PhysPt addr,Bitu store_to) {
+	fpu.p_regs[8].m1 = mem_readd(addr);
+	FPUD_LOAD(fld,DWORD,s)
+}
+
+static void FPU_FLD_F64(PhysPt addr,Bitu store_to) {
+	fpu.p_regs[8].m1 = mem_readd(addr);
+	fpu.p_regs[8].m2 = mem_readd(addr+4);
+	FPUD_LOAD(fld,QWORD,l)
+}
+
+static void FPU_FLD_F80(PhysPt addr) {
+	fpu.p_regs[TOP].m1 = mem_readd(addr);
+	fpu.p_regs[TOP].m2 = mem_readd(addr+4);
+	fpu.p_regs[TOP].m3 = mem_readw(addr+8);
+	FPU_SET_C1(0);
+}
+
+static void FPU_FLD_I16(PhysPt addr,Bitu store_to) {
+	fpu.p_regs[8].m1 = (Bit32u)mem_readw(addr);
+	FPUD_LOAD(fild,WORD,)
+}
+
+static void FPU_FLD_I32(PhysPt addr,Bitu store_to) {
+	fpu.p_regs[8].m1 = mem_readd(addr);
+	FPUD_LOAD(fild,DWORD,l)
+}
+
+static void FPU_FLD_I64(PhysPt addr,Bitu store_to) {
+	fpu.p_regs[8].m1 = mem_readd(addr);
+	fpu.p_regs[8].m2 = mem_readd(addr+4);
+	FPUD_LOAD(fild,QWORD,q)
+}
+
+static void FPU_FBLD(PhysPt addr,Bitu store_to) {
+	fpu.p_regs[8].m1 = mem_readd(addr);
+	fpu.p_regs[8].m2 = mem_readd(addr+4);
+	fpu.p_regs[8].m3 = mem_readw(addr+8);
+	FPUD_LOAD(fbld,TBYTE,)
+}
+
+static void FPU_FST_F32(PhysPt addr) {
+	FPUD_STORE(fstp,DWORD,s)
+	mem_writed(addr,fpu.p_regs[8].m1);
+}
+
+static void FPU_FST_F64(PhysPt addr) {
+	FPUD_STORE(fstp,QWORD,l)
+	mem_writed(addr,fpu.p_regs[8].m1);
+	mem_writed(addr+4,fpu.p_regs[8].m2);
+}
+
+static void FPU_FST_F80(PhysPt addr) {
+	mem_writed(addr,fpu.p_regs[TOP].m1);
+	mem_writed(addr+4,fpu.p_regs[TOP].m2);
+	mem_writew(addr+8,fpu.p_regs[TOP].m3);
+	FPU_SET_C1(0);
+}
+
+static void FPU_FST_I16(PhysPt addr) {
+	FPUD_STORE(fistp,WORD,)
+	mem_writew(addr,(Bit16u)fpu.p_regs[8].m1);
+}
+
+static void FPU_FST_I32(PhysPt addr) {
+	FPUD_STORE(fistp,DWORD,l)
+	mem_writed(addr,fpu.p_regs[8].m1);
+}
+
+static void FPU_FST_I64(PhysPt addr) {
+	FPUD_STORE(fistp,QWORD,q)
+	mem_writed(addr,fpu.p_regs[8].m1);
+	mem_writed(addr+4,fpu.p_regs[8].m2);
+}
+
+static void FPU_FBST(PhysPt addr) {
+	FPUD_STORE(fbstp,TBYTE,)
+	mem_writed(addr,fpu.p_regs[8].m1);
+	mem_writed(addr+4,fpu.p_regs[8].m2);
+	mem_writew(addr+8,fpu.p_regs[8].m3);
+}
+
+
+static void FPU_FSIN(void){
+	FPUD_TRIG(fsin)
+}
+
+static void FPU_FSINCOS(void){
+	FPUD_SINCOS
+}
+
+static void FPU_FCOS(void){
+	FPUD_TRIG(fcos)
+}
+
+static void FPU_FSQRT(void){
+	FPUD_ARITH2(fsqrt)
+}
+
+static void FPU_FPATAN(void){
+	FPUD_WITH_POP(fpatan)
+}
+
+static void FPU_FPTAN(void){
+	FPUD_PTAN
+}
+
+
+static void FPU_FADD(Bitu op1, Bitu op2){
+	FPUD_ARITH1(faddp)
+}
+
+static void FPU_FDIV(Bitu op1, Bitu op2){
+	FPUD_ARITH1(fdivp)
+}
+
+static void FPU_FDIVR(Bitu op1, Bitu op2){
+	FPUD_ARITH1(fdivrp)
+}
+
+static void FPU_FMUL(Bitu op1, Bitu op2){
+	FPUD_ARITH1(fmulp)
+}
+
+static void FPU_FSUB(Bitu op1, Bitu op2){
+	FPUD_ARITH1(fsubp)
+}
+
+static void FPU_FSUBR(Bitu op1, Bitu op2){
+	FPUD_ARITH1(fsubrp)
+}
+
+static void FPU_FXCH(Bitu stv, Bitu other){
+	FPU_Tag tag = fpu.tags[other];
+	fpu.tags[other] = fpu.tags[stv];
+	fpu.tags[stv] = tag;
+
+	Bit32u m1s = fpu.p_regs[other].m1;
+	Bit32u m2s = fpu.p_regs[other].m2;
+	Bit16u m3s = fpu.p_regs[other].m3;
+	fpu.p_regs[other].m1 = fpu.p_regs[stv].m1;
+	fpu.p_regs[other].m2 = fpu.p_regs[stv].m2;
+	fpu.p_regs[other].m3 = fpu.p_regs[stv].m3;
+	fpu.p_regs[stv].m1 = m1s;
+	fpu.p_regs[stv].m2 = m2s;
+	fpu.p_regs[stv].m3 = m3s;
+
+	FPU_SET_C1(0);
+}
+
+static void FPU_FST(Bitu stv, Bitu other){
+	fpu.tags[other] = fpu.tags[stv];
+
+	fpu.p_regs[other].m1 = fpu.p_regs[stv].m1;
+	fpu.p_regs[other].m2 = fpu.p_regs[stv].m2;
+	fpu.p_regs[other].m3 = fpu.p_regs[stv].m3;
+
+	FPU_SET_C1(0);
+}
+
+
+static void FPU_FCOM(Bitu op1, Bitu op2){
+	FPUD_COMPARE(fcompp)
+}
+
+static void FPU_FUCOM(Bitu op1, Bitu op2){
+	FPUD_COMPARE(fucompp)
+}
+
+static void FPU_FRNDINT(void){
+	FPUD_ARITH2(frndint)
+}
+
+static void FPU_FPREM(void){
+	FPUD_REMINDER(fprem)
+}
+
+static void FPU_FPREM1(void){
+	FPUD_REMINDER(fprem1)
+}
+
+static void FPU_FXAM(void){
+	FPUD_EXAMINE(fxam)
+	// handle empty registers (C1 set to sign in any way!)
+	if(fpu.tags[TOP] == TAG_Empty) {
+		FPU_SET_C3(1);FPU_SET_C2(0);FPU_SET_C0(1);
+		return;
+	}
+}
+
+static void FPU_F2XM1(void){
+	FPUD_TRIG(f2xm1)
+}
+
+static void FPU_FYL2X(void){
+	FPUD_WITH_POP(fyl2x)
+}
+
+static void FPU_FYL2XP1(void){
+	FPUD_WITH_POP(fyl2xp1)
+}
+
+static void FPU_FSCALE(void){
+	FPUD_REMINDER(fscale)
+}
+
+
+static void FPU_FSTENV(PhysPt addr){
+	FPU_SET_TOP(TOP);
+	if(!cpu.code.big) {
+		mem_writew(addr+0,static_cast<Bit16u>(fpu.cw));
+		mem_writew(addr+2,static_cast<Bit16u>(fpu.sw));
+		mem_writew(addr+4,static_cast<Bit16u>(FPU_GetTag()));
+	} else { 
+		mem_writed(addr+0,static_cast<Bit32u>(fpu.cw));
+		mem_writed(addr+4,static_cast<Bit32u>(fpu.sw));
+		mem_writed(addr+8,static_cast<Bit32u>(FPU_GetTag()));
+	}
+}
+
+static void FPU_FLDENV(PhysPt addr){
+	Bit16u tag;
+	Bit32u tagbig;
+	Bitu cw;
+	if(!cpu.code.big) {
+		cw     = mem_readw(addr+0);
+		fpu.sw = mem_readw(addr+2);
+		tag    = mem_readw(addr+4);
+	} else { 
+		cw     = mem_readd(addr+0);
+		fpu.sw = (Bit16u)mem_readd(addr+4);
+		tagbig = mem_readd(addr+8);
+		tag    = static_cast<Bit16u>(tagbig);
+	}
+	FPU_SetTag(tag);
+	FPU_SetCW(cw);
+	TOP=FPU_GET_TOP();
+}
+
+static void FPU_FSAVE(PhysPt addr){
+	FPU_FSTENV(addr);
+	Bitu start=(cpu.code.big?28:14);
+	for(Bitu i=0;i<8;i++){
+		mem_writed(addr+start,fpu.p_regs[STV(i)].m1);
+		mem_writed(addr+start+4,fpu.p_regs[STV(i)].m2);
+		mem_writew(addr+start+8,fpu.p_regs[STV(i)].m3);
+		start+=10;
+	}
+	FPU_FINIT();
+}
+
+static void FPU_FRSTOR(PhysPt addr){
+	FPU_FLDENV(addr);
+	Bitu start=(cpu.code.big?28:14);
+	for(Bitu i=0;i<8;i++){
+		fpu.p_regs[STV(i)].m1 = mem_readd(addr+start);
+		fpu.p_regs[STV(i)].m2 = mem_readd(addr+start+4);
+		fpu.p_regs[STV(i)].m3 = mem_readw(addr+start+8);
+		start+=10;
+	}
+}
+
+
+static void FPU_FXTRACT(void) {
+	FPUD_XTRACT
+}
+
+static void FPU_FCHS(void){
+	FPUD_TRIG(fchs)
+}
+
+static void FPU_FABS(void){
+	FPUD_TRIG(fabs)
+}
+
+static void FPU_FTST(void){
+	FPUD_EXAMINE(ftst)
+}
+
+static void FPU_FLD1(void){
+	FPUD_LOAD_CONST(fld1)
+}
+
+static void FPU_FLDL2T(void){
+	FPUD_LOAD_CONST(fldl2t)
+}
+
+static void FPU_FLDL2E(void){
+	FPUD_LOAD_CONST(fldl2e)
+}
+
+static void FPU_FLDPI(void){
+	FPUD_LOAD_CONST(fldpi)
+}
+
+static void FPU_FLDLG2(void){
+	FPUD_LOAD_CONST(fldlg2)
+}
+
+static void FPU_FLDLN2(void){
+	FPUD_LOAD_CONST(fldln2)
+}
+
+static void FPU_FLDZ(void){
+	FPUD_LOAD_CONST(fldz)
+	fpu.tags[TOP]=TAG_Zero;
+}
diff --git a/src/fpu/fpu_types.h b/src/fpu/fpu_types.h
index eeb0ddf9..3a4ac2fe 100644
--- a/src/fpu/fpu_types.h
+++ b/src/fpu/fpu_types.h
@@ -16,6 +16,7 @@
  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
+/* $Id: fpu_types.h,v 1.12 2005-02-22 13:06:06 qbix79 Exp $ */
 typedef union {
     double d;
 #ifndef WORDS_BIGENDIAN
@@ -32,6 +33,15 @@ typedef union {
     Bit64s ll;
 } FPU_Reg;
 
+typedef struct {
+    Bit32u m1;
+    Bit32u m2;
+    Bit16u m3;
+
+    Bit16u d1;
+    Bit32u d2;
+} FPU_P_Reg;
+
 enum FPU_Tag {
 	TAG_Valid = 0,
 	TAG_Zero  = 1,
@@ -39,13 +49,13 @@ enum FPU_Tag {
 	TAG_Empty = 3
 };
 
-
 enum FPU_Round {
 	ROUND_Nearest = 0,		
 	ROUND_Down    = 1,
 	ROUND_Up      = 2,	
 	ROUND_Chop    = 3
 };
+
 //get pi from a real library
 #define PI		3.14159265358979323846
 #define L2E		1.4426950408889634
diff --git a/src/platform/visualc/config.h b/src/platform/visualc/config.h
index d31afd04..d9e66c87 100644
--- a/src/platform/visualc/config.h
+++ b/src/platform/visualc/config.h
@@ -33,6 +33,9 @@
 /* Enable the FPU module, still only for beta testing */
 #define C_FPU 1
 
+/* Define to 1 to use a x86 assembly fpu core */
+#define C_FPU_X86 1
+
 /* environ is defined */
 #define ENVIRON_INCLUDED 1
 
diff --git a/visualc/dosbox.dsp b/visualc/dosbox.dsp
index a01dc422..306f9f76 100644
--- a/visualc/dosbox.dsp
+++ b/visualc/dosbox.dsp
@@ -639,6 +639,10 @@ SOURCE=..\src\fpu\fpu_instructions.h
 # End Source File
 # Begin Source File
 
+SOURCE=..\src\fpu\fpu_instructions_x86.h
+# End Source File
+# Begin Source File
+
 SOURCE=..\src\fpu\fpu_types.h
 # End Source File
 # End Group
diff --git a/visualc_net/dosbox.vcproj b/visualc_net/dosbox.vcproj
index f397a0f1..cf873eaf 100644
--- a/visualc_net/dosbox.vcproj
+++ b/visualc_net/dosbox.vcproj
@@ -657,6 +657,9 @@
 					RelativePath="..\src\fpu\fpu_instructions.h">
 				</File>
 				<File
+				<File
+					RelativePath="..\src\fpu\fpu_instructions_x86.h">
+				</File>
 					RelativePath="..\src\fpu\fpu_types.h">
 				</File>
 			</Filter>