From 8faeb470b4d49359ccef63db1643b4e55d04c76c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20Strohh=C3=A4cker?=
 <c2woody@users.sourceforge.net>
Date: Sun, 8 Jan 2006 18:05:06 +0000
Subject: [PATCH] ignore unneeded fpu-exception flags

Imported-from: https://svn.code.sf.net/p/dosbox/code-0/dosbox/trunk@2425
---
 src/fpu/fpu_instructions_x86.h | 366 +++++++++++++++++++++++++--------
 1 file changed, 281 insertions(+), 85 deletions(-)

diff --git a/src/fpu/fpu_instructions_x86.h b/src/fpu/fpu_instructions_x86.h
index a10cd162..45e3c6e5 100644
--- a/src/fpu/fpu_instructions_x86.h
+++ b/src/fpu/fpu_instructions_x86.h
@@ -16,11 +16,31 @@
  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
-/* $Id: fpu_instructions_x86.h,v 1.1 2005-02-22 13:06:06 qbix79 Exp $ */
+/* $Id: fpu_instructions_x86.h,v 1.2 2006-01-08 18:05:06 c2woody Exp $ */
+
+
+#define WEAK_EXCEPTIONS
 
 
 #if defined (_MSC_VER)
 
+#ifdef WEAK_EXCEPTIONS
+#define clx
+#else
+#define clx fclex
+#endif
+
+#ifdef WEAK_EXCEPTIONS
+#define FPUD_LOAD(op,szI,szA)			\
+		__asm {							\
+		__asm	mov		eax, 8			\
+		__asm	shl		eax, 4			\
+		__asm	mov		ebx, store_to	\
+		__asm	shl		ebx, 4			\
+		__asm	op		szI PTR fpu.p_regs[eax].m1		\
+		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
+		}
+#else
 #define FPUD_LOAD(op,szI,szA)			\
 		Bit16u new_sw;					\
 		__asm {							\
@@ -34,6 +54,7 @@
 		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
 		}								\
 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+#endif
 
 #define FPUD_STORE(op,szI,szA)				\
 		Bit16u new_sw,save_cw;				\
@@ -45,12 +66,12 @@
 		__asm	mov		ebx, 8				\
 		__asm	shl		ebx, 4				\
 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
-		__asm	fclex						\
+		__asm	clx							\
 		__asm	op		szI PTR fpu.p_regs[ebx].m1		\
 		__asm	fnstsw	new_sw				\
 		__asm	fldcw	save_cw				\
 		}									\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
 
 // handles fsin,fcos,f2xm1,fchs,fabs
 #define FPUD_TRIG(op)				\
@@ -59,15 +80,15 @@
 		__asm	mov		eax, TOP	\
 		__asm	shl		eax, 4		\
 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
-		__asm	fclex				\
+		__asm	clx					\
 		__asm	op					\
 		__asm	fnstsw	new_sw		\
 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
 		}							\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
 
 // handles fsincos
-#define FPUD_SINCOS						\
+#define FPUD_SINCOS()				\
 		Bit16u new_sw;					\
 		__asm {							\
 		__asm	mov		eax, TOP		\
@@ -77,7 +98,7 @@
 		__asm	shl		eax, 4			\
 		__asm	shl		ebx, 4			\
 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
-		__asm	fclex					\
+		__asm	clx						\
 		__asm	fsincos					\
 		__asm	fnstsw	new_sw			\
 		__asm	mov		cx, new_sw		\
@@ -90,11 +111,11 @@
 		__asm	fstp	st(0)			\
 		__asm	end_sincos:				\
 		}												\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);			\
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);		\
 		if ((new_sw&0x0400)==0) FPU_PREP_PUSH();
 
 // handles fptan
-#define FPUD_PTAN						\
+#define FPUD_PTAN()					\
 		Bit16u new_sw;					\
 		__asm {							\
 		__asm	mov		eax, TOP		\
@@ -104,7 +125,7 @@
 		__asm	shl		eax, 4			\
 		__asm	shl		ebx, 4			\
 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
-		__asm	fclex					\
+		__asm	clx					\
 		__asm	fptan					\
 		__asm	fnstsw	new_sw			\
 		__asm	mov		cx, new_sw		\
@@ -117,10 +138,26 @@
 		__asm	fstp	st(0)			\
 		__asm	end_ptan:				\
 		}												\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);			\
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);		\
 		if ((new_sw&0x0400)==0) FPU_PREP_PUSH();
 
 // handles fxtract
+#ifdef WEAK_EXCEPTIONS
+#define FPUD_XTRACT						\
+		__asm {							\
+		__asm	mov		eax, TOP		\
+		__asm	mov		ebx, eax		\
+		__asm	dec     ebx				\
+		__asm	and     ebx, 7			\
+		__asm	shl		eax, 4			\
+		__asm	shl		ebx, 4			\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fxtract					\
+		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
+		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
+		}												\
+		FPU_PREP_PUSH();
+#else
 #define FPUD_XTRACT						\
 		Bit16u new_sw;					\
 		__asm {							\
@@ -139,9 +176,47 @@
 		}												\
 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);			\
 		FPU_PREP_PUSH();
+#endif
 
-// handles fadd,fmul,fsub,fsubr,fdiv,fdivr
+// handles fadd,fmul,fsub,fsubr
 #define FPUD_ARITH1(op)						\
+		Bit16u new_sw,save_cw;				\
+		__asm {								\
+		__asm	fnstcw	save_cw				\
+		__asm	fldcw	fpu.cw_mask_all		\
+		__asm	mov		eax, op1			\
+		__asm	shl		eax, 4				\
+		__asm	mov		ebx, op2			\
+		__asm	shl		ebx, 4				\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
+		__asm	clx							\
+		__asm	op		st(1), st(0)		\
+		__asm	fnstsw	new_sw				\
+		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
+		__asm	fldcw	save_cw				\
+		}									\
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
+
+// handles fsqrt,frndint
+#define FPUD_ARITH2(op)						\
+		Bit16u new_sw,save_cw;				\
+		__asm {								\
+		__asm	fnstcw	save_cw				\
+		__asm	fldcw	fpu.cw_mask_all		\
+		__asm	mov		eax, TOP			\
+		__asm	shl		eax, 4				\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	clx							\
+		__asm	op							\
+		__asm	fnstsw	new_sw				\
+		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
+		__asm	fldcw	save_cw				\
+		}									\
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
+
+// handles fdiv,fdivr
+#define FPUD_ARITH3(op)						\
 		Bit16u new_sw,save_cw;				\
 		__asm {								\
 		__asm	fnstcw	save_cw				\
@@ -160,23 +235,6 @@
 		}									\
 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
 
-// handles fsqrt,frndint
-#define FPUD_ARITH2(op)						\
-		Bit16u new_sw,save_cw;				\
-		__asm {								\
-		__asm	fnstcw	save_cw				\
-		__asm	fldcw	fpu.cw_mask_all		\
-		__asm	mov		eax, TOP			\
-		__asm	shl		eax, 4				\
-		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
-		__asm	fclex						\
-		__asm	op							\
-		__asm	fnstsw	new_sw				\
-		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	 \
-		__asm	fldcw	save_cw				\
-		}									\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
-
 // handles fprem,fprem1,fscale
 #define FPUD_REMINDER(op)			\
 		Bit16u new_sw;				\
@@ -207,11 +265,11 @@
 		__asm	shl		eax, 4		\
 		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
-		__asm	fclex				\
+		__asm	clx					\
 		__asm	op					\
 		__asm	fnstsw	new_sw		\
 		}							\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
 
 // handles fxam,ftst
 #define FPUD_EXAMINE(op)			\
@@ -220,15 +278,52 @@
 		__asm	mov		eax, TOP	\
 		__asm	shl		eax, 4		\
 		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
-		__asm	fclex				\
+		__asm	clx					\
 		__asm	op					\
 		__asm	fnstsw	new_sw		\
 		__asm	fstp	st(0)		\
 		}							\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
 
-// handles fpatan,fyl2x,fyl2xp1
+// handles fpatan,fyl2xp1
+#ifdef WEAK_EXCEPTIONS
 #define FPUD_WITH_POP(op)			\
+		__asm {						\
+		__asm	mov		eax, TOP	\
+		__asm	mov		ebx, eax	\
+		__asm	inc     ebx			\
+		__asm	and     ebx, 7		\
+		__asm	shl		ebx, 4		\
+		__asm	shl		eax, 4		\
+		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	op					\
+		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
+		}							\
+		FPU_FPOP();
+#else
+#define FPUD_WITH_POP(op)			\
+		Bit16u new_sw;				\
+		__asm {						\
+		__asm	mov		eax, TOP	\
+		__asm	mov		ebx, eax	\
+		__asm	inc     ebx			\
+		__asm	and     ebx, 7		\
+		__asm	shl		ebx, 4		\
+		__asm	shl		eax, 4		\
+		__asm	fld		TBYTE PTR fpu.p_regs[ebx].m1	\
+		__asm	fld		TBYTE PTR fpu.p_regs[eax].m1	\
+		__asm	fclex				\
+		__asm	op					\
+		__asm	fnstsw	new_sw		\
+		__asm	fstp	TBYTE PTR fpu.p_regs[ebx].m1	\
+		}								\
+		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);	\
+		FPU_FPOP();
+#endif
+
+// handles fyl2x
+#define FPUD_FYL2X(op)				\
 		Bit16u new_sw;				\
 		__asm {						\
 		__asm	mov		eax, TOP	\
@@ -248,21 +343,37 @@
 		FPU_FPOP();
 
 // load math constants
-#define FPUD_LOAD_CONST(op)			\
-		Bit16u new_sw;				\
+#define FPUD_LOAD_CONST(op)		\
 		FPU_PREP_PUSH();			\
 		__asm {						\
 		__asm	mov		eax, TOP	\
 		__asm	shl		eax, 4		\
-		__asm	fclex				\
+		__asm	clx					\
 		__asm	op					\
-		__asm	fnstsw	new_sw		\
 		__asm	fstp	TBYTE PTR fpu.p_regs[eax].m1	\
 		}							\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
 
 #else
 
+#ifdef WEAK_EXCEPTIONS
+#define clx
+#else
+#define clx "fclex"
+#endif
+
+#ifdef WEAK_EXCEPTIONS
+#define FPUD_LOAD(op,szI,szA)				\
+		__asm__ volatile (					\
+			"movl		$8, %%eax		\n"	\
+			"shl		$4, %%eax		\n"	\
+			"shl		$4, %0			\n"	\
+			#op #szA "	(%1, %%eax)		\n"	\
+			"fstpt		(%1, %0)		"	\
+			:								\
+			:	"r" (store_to), "r" (fpu.p_regs)	\
+			:	"eax", "memory"						\
+		);
+#else
 #define FPUD_LOAD(op,szI,szA)				\
 		Bit16u new_sw;						\
 		__asm__ volatile (					\
@@ -278,6 +389,7 @@
 			:	"eax", "memory"						\
 		);									\
 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+#endif
 
 #define FPUD_STORE(op,szI,szA)				\
 		Bit16u new_sw,save_cw;				\
@@ -288,7 +400,7 @@
 			"movl		$8, %%eax		\n"	\
 			"shl		$4, %%eax		\n"	\
 			"fldt		(%3, %2)		\n"	\
-			"fclex						\n"	\
+			clx" 						\n"	\
 			#op #szA "	(%3, %%eax)		\n"	\
 			"fnstsw		%0				\n"	\
 			"fldcw		%1				"	\
@@ -296,7 +408,7 @@
 			:	"r" (TOP), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
 			:	"eax", "memory"						\
 		);										\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
 
 // handles fsin,fcos,f2xm1,fchs,fabs
 #define FPUD_TRIG(op)						\
@@ -304,7 +416,7 @@
 		__asm__ volatile (					\
 			"shll		$4, %1			\n"	\
 			"fldt		(%2, %1)		\n"	\
-			"fclex						\n"	\
+			clx" 						\n"	\
 			#op" 						\n"	\
 			"fnstsw		%0				\n"	\
 			"fstpt		(%2, %1)		"	\
@@ -312,10 +424,10 @@
 			:	"r" (TOP), "r" (fpu.p_regs)	\
 			:	"memory"				\
 		);									\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
 
 // handles fsincos
-#define FPUD_SINCOS							\
+#define FPUD_SINCOS()					\
 		Bit16u new_sw;						\
 		__asm__ volatile (					\
 			"movl		%1, %%eax		\n"	\
@@ -324,7 +436,7 @@
 			"andl		$7, %%eax		\n"	\
 			"shll		$4, %%eax		\n"	\
 			"fldt		(%2, %1)		\n"	\
-			"fclex						\n"	\
+			clx" 						\n"	\
 			"fsincos					\n"	\
 			"fnstsw		%0				\n"	\
 			"fstpt		(%2, %%eax)		\n"	\
@@ -337,11 +449,11 @@
 			:	"r" (TOP), "r" (fpu.p_regs)	\
 			:	"eax", "cc", "memory"		\
 		);									\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);		\
 		if ((new_sw&0x0400)==0) FPU_PREP_PUSH();
 
 // handles fptan
-#define FPUD_PTAN							\
+#define FPUD_PTAN()						\
 		Bit16u new_sw;						\
 		__asm__ volatile (					\
 			"movl		%1, %%eax		\n"	\
@@ -350,7 +462,7 @@
 			"andl		$7, %%eax		\n"	\
 			"shll		$4, %%eax		\n"	\
 			"fldt		(%2, %1)		\n"	\
-			"fclex						\n"	\
+			clx" 						\n"	\
 			"fptan 						\n"	\
 			"fnstsw		%0				\n"	\
 			"fstpt		(%2, %%eax)		\n"	\
@@ -363,10 +475,28 @@
 			:	"r" (TOP), "r" (fpu.p_regs)	\
 			:	"eax", "cc", "memory"		\
 		);									\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);		\
 		if ((new_sw&0x0400)==0) FPU_PREP_PUSH();
 
 // handles fxtract
+#ifdef WEAK_EXCEPTIONS
+#define FPUD_XTRACT						\
+		__asm__ volatile (					\
+			"movl		%0, %%eax		\n"	\
+			"shll		$4, %0			\n"	\
+			"decl		%%eax			\n"	\
+			"andl		$7, %%eax		\n"	\
+			"shll		$4, %%eax		\n"	\
+			"fldt		(%1, %0)		\n"	\
+			"fxtract					\n"	\
+			"fstpt		(%1, %%eax)		\n"	\
+			"fstpt		(%1, %0)		"	\
+			:								\
+			:	"r" (TOP), "r" (fpu.p_regs)	\
+			:	"eax", "memory"				\
+		);									\
+		FPU_PREP_PUSH();
+#else
 #define FPUD_XTRACT						\
 		Bit16u new_sw;						\
 		__asm__ volatile (					\
@@ -387,9 +517,50 @@
 		);									\
 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
 		FPU_PREP_PUSH();
+#endif
 
-// handles fadd,fmul,fsub,fsubr,fdiv,fdivr
+// handles fadd,fmul,fsub,fsubr
 #define FPUD_ARITH1(op)						\
+		Bit16u new_sw,save_cw;				\
+		__asm__ volatile (					\
+			"fnstcw		%1				\n"	\
+			"fldcw		%5				\n"	\
+			"shll		$4, %3			\n"	\
+			"shll		$4, %2			\n"	\
+			"fldt		(%4, %3)		\n"	\
+			"fldt		(%4, %2)		\n"	\
+			clx" 						\n"	\
+			#op"						\n"	\
+			"fnstsw		%0				\n"	\
+			"fstpt		(%4, %2)		\n"	\
+			"fldcw		%1				"	\
+			:	"=m" (new_sw), "=m" (save_cw)		\
+			:	"r" (op1), "r" (op2), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
+			:	"memory"				\
+		);									\
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
+
+// handles fsqrt,frndint
+#define FPUD_ARITH2(op)						\
+		Bit16u new_sw,save_cw;				\
+		__asm__ volatile (					\
+			"fnstcw		%1				\n"	\
+			"fldcw		%4				\n"	\
+			"shll		$4, %2			\n"	\
+			"fldt		(%3, %2)		\n"	\
+			clx" 						\n"	\
+			#op" 						\n"	\
+			"fnstsw		%0				\n"	\
+			"fstpt		(%3, %2)		\n"	\
+			"fldcw		%1				"	\
+			:	"=m" (new_sw), "=m" (save_cw)	\
+			:	"r" (TOP), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
+			:	"memory"				\
+		);										\
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
+
+// handles fdiv,fdivr
+#define FPUD_ARITH3(op)						\
 		Bit16u new_sw,save_cw;				\
 		__asm__ volatile (					\
 			"fnstcw		%1				\n"	\
@@ -405,29 +576,10 @@
 			"fldcw		%1				"	\
 			:	"=m" (new_sw), "=m" (save_cw)		\
 			:	"r" (op1), "r" (op2), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
-			:	"memory"				\
+			:	"memory"					\
 		);									\
 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
 
-// handles fsqrt,frndint
-#define FPUD_ARITH2(op)						\
-		Bit16u new_sw,save_cw;				\
-		__asm__ volatile (					\
-			"fnstcw		%1				\n"	\
-			"fldcw		%4				\n"	\
-			"shll		$4, %2			\n"	\
-			"fldt		(%3, %2)		\n"	\
-			"fclex						\n"	\
-			#op" 						\n"	\
-			"fnstsw		%0				\n"	\
-			"fstpt		(%3, %2)		\n"	\
-			"fldcw		%1				"	\
-			:	"=m" (new_sw), "=m" (save_cw)	\
-			:	"r" (TOP), "r" (fpu.p_regs), "m" (fpu.cw_mask_all)		\
-			:	"memory"				\
-		);										\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
-
 // handles fprem,fprem1,fscale
 #define FPUD_REMINDER(op)					\
 		Bit16u new_sw;						\
@@ -458,14 +610,14 @@
 			"shll		$4, %1			\n"	\
 			"fldt		(%3, %2)		\n"	\
 			"fldt		(%3, %1)		\n"	\
-			"fclex						\n"	\
+			clx" 						\n"	\
 			#op" 						\n"	\
 			"fnstsw		%0				"	\
 			:	"=m" (new_sw)				\
 			:	"r" (op1), "r" (op2), "r" (fpu.p_regs) 		\
 			:	"memory"				\
 		);									\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
 
 // handles fxam,ftst
 #define FPUD_EXAMINE(op)					\
@@ -473,7 +625,7 @@
 		__asm__ volatile (					\
 			"shll		$4, %1			\n"	\
 			"fldt		(%2, %1)		\n"	\
-			"fclex						\n"	\
+			clx" 						\n"	\
 			#op" 						\n"	\
 			"fnstsw		%0				\n"	\
 			"fstp		%%st(0)			"	\
@@ -481,9 +633,27 @@
 			:	"r" (TOP), "r" (fpu.p_regs)	\
 			:	"memory"				\
 		);									\
-		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);
+		fpu.sw=(new_sw&exc_mask)|(fpu.sw&0x80ff);
 
-// handles fpatan,fyl2x,fyl2xp1
+// handles fpatan,fyl2xp1
+#ifdef WEAK_EXCEPTIONS
+#define FPUD_WITH_POP(op)					\
+		__asm__ volatile (					\
+			"movl		%0, %%eax		\n"	\
+			"incl		%%eax			\n"	\
+			"andl		$7, %%eax		\n"	\
+			"shll		$4, %%eax		\n"	\
+			"shll		$4, %0			\n"	\
+			"fldt		(%1, %%eax)		\n"	\
+			"fldt		(%1, %0)		\n"	\
+			#op" 						\n"	\
+			"fstpt		(%1, %%eax)		\n"	\
+			:								\
+			:	"r" (TOP), "r" (fpu.p_regs)	\
+			:	"eax", "memory"				\
+		);									\
+		FPU_FPOP();
+#else
 #define FPUD_WITH_POP(op)					\
 		Bit16u new_sw;						\
 		__asm__ volatile (					\
@@ -504,23 +674,49 @@
 		);									\
 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
 		FPU_FPOP();
+#endif
 
-// load math constants
-#define FPUD_LOAD_CONST(op)					\
+// handles fyl2x
+#define FPUD_FYL2X(op)						\
 		Bit16u new_sw;						\
-		FPU_PREP_PUSH();					\
 		__asm__ volatile (					\
+			"movl		%1, %%eax		\n"	\
+			"incl		%%eax			\n"	\
+			"andl		$7, %%eax		\n"	\
+			"shll		$4, %%eax		\n"	\
 			"shll		$4, %1			\n"	\
+			"fldt		(%2, %%eax)		\n"	\
+			"fldt		(%2, %1)		\n"	\
 			"fclex						\n"	\
 			#op" 						\n"	\
 			"fnstsw		%0				\n"	\
-			"fstpt		(%2, %1)		\n"	\
+			"fstpt		(%2, %%eax)		\n"	\
 			:	"=m" (new_sw)				\
 			:	"r" (TOP), "r" (fpu.p_regs)	\
-			:	"memory"				\
+			:	"eax", "memory"				\
 		);									\
 		fpu.sw=(new_sw&0xffbf)|(fpu.sw&0x80ff);		\
+		FPU_FPOP();
 
+// load math constants
+#define FPUD_LOAD_CONST(op)				\
+		FPU_PREP_PUSH();					\
+		__asm__ volatile (					\
+			"shll		$4, %0			\n"	\
+			clx" 						\n"	\
+			#op" 						\n"	\
+			"fstpt		(%1, %0)		\n"	\
+			:								\
+			:	"r" (TOP), "r" (fpu.p_regs)	\
+			:	"memory"					\
+		);
+
+#endif
+
+#ifdef WEAK_EXCEPTIONS
+const Bit16u exc_mask=0x7f00;
+#else
+const Bit16u exc_mask=0xffbf;
 #endif
 
 static void FPU_FINIT(void) {
@@ -643,7 +839,7 @@ static void FPU_FSIN(void){
 }
 
 static void FPU_FSINCOS(void){
-	FPUD_SINCOS
+	FPUD_SINCOS()
 }
 
 static void FPU_FCOS(void){
@@ -659,7 +855,7 @@ static void FPU_FPATAN(void){
 }
 
 static void FPU_FPTAN(void){
-	FPUD_PTAN
+	FPUD_PTAN()
 }
 
 
@@ -668,11 +864,11 @@ static void FPU_FADD(Bitu op1, Bitu op2){
 }
 
 static void FPU_FDIV(Bitu op1, Bitu op2){
-	FPUD_ARITH1(fdivp)
+	FPUD_ARITH3(fdivp)
 }
 
 static void FPU_FDIVR(Bitu op1, Bitu op2){
-	FPUD_ARITH1(fdivrp)
+	FPUD_ARITH3(fdivrp)
 }
 
 static void FPU_FMUL(Bitu op1, Bitu op2){
@@ -750,7 +946,7 @@ static void FPU_F2XM1(void){
 }
 
 static void FPU_FYL2X(void){
-	FPUD_WITH_POP(fyl2x)
+	FPUD_FYL2X(fyl2x)
 }
 
 static void FPU_FYL2XP1(void){