diff --git a/src/gui/render.cpp b/src/gui/render.cpp index 6341bf0a..094078bb 100644 --- a/src/gui/render.cpp +++ b/src/gui/render.cpp @@ -16,7 +16,6 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include #include #include "dosbox.h" @@ -26,11 +25,8 @@ #include "keyboard.h" #include "cross.h" - #define MAX_RES 2048 - - struct PalData { struct { Bit8u red; @@ -55,18 +51,16 @@ static struct { Bitu pitch; Bitu flags; float ratio; - RENDER_Part_Handler part_handler; RENDER_Draw_Handler draw_handler; } src; struct { Bitu width; Bitu height; Bitu pitch; - Bitu next_line; - Bitu next_pixel; Bitu bpp; /* The type of BPP the operation requires for input */ - RENDER_Operation want_type; RENDER_Operation type; + RENDER_Operation want_type; + RENDER_Part_Handler part_handler; void * dest; void * buffer; void * pixels; @@ -77,6 +71,13 @@ static struct { } frameskip; Bitu flags; PalData pal; +#if (C_SSHOT) + struct { + RENDER_Operation type; + Bitu pitch; + const char * dir; + } shot; +#endif bool keep_small; bool screenshot; bool active; @@ -86,9 +87,13 @@ static struct { static void RENDER_ResetPal(void); /* Include the different rendering routines */ -#include "render_support.h" +#include "render_normal.h" +#include "render_scale2x.h" + + +#if (C_SSHOT) +#include -static const char * snapshots_dir; /* Take a screenshot of the data that should be rendered */ static void TakeScreenShot(Bit8u * bitmap) { @@ -109,9 +114,9 @@ static void TakeScreenShot(Bit8u * bitmap) { return; } /* Find a filename to open */ - dir=opendir(snapshots_dir); + dir=opendir(render.shot.dir); if (!dir) { - LOG_MSG("Can't open snapshot dir %s",snapshots_dir); + LOG_MSG("Can't open snapshot dir %s",render.shot.dir); return; } while (dir_ent=readdir(dir)) { @@ -126,7 +131,7 @@ static void TakeScreenShot(Bit8u * bitmap) { if (num>=last) last=num+1; } closedir(dir); - sprintf(file_name,"%s%csnap%05d.png",snapshots_dir,CROSS_FILESPLIT,last); + sprintf(file_name,"%s%csnap%05d.png",render.shot.dir,CROSS_FILESPLIT,last); /* Open the actual file */ FILE * fp=fopen(file_name,"wb"); if (!fp) { @@ -162,7 +167,7 @@ static void TakeScreenShot(Bit8u * bitmap) { /*Allocate an array of scanline pointers*/ row_pointers=(png_bytep*)malloc(render.src.height*sizeof(png_bytep)); for (i=0;i(sec); - snapshots_dir=section->Get_string("snapshots"); render.pal.first=256; render.pal.last=0; render.keep_small=section->Get_bool("keepsmall"); render.frameskip.max=section->Get_int("frameskip"); render.frameskip.count=0; +#if (C_SSHOT) + render.shot.dir=section->Get_string("snapshots"); KEYBOARD_AddEvent(KBD_f5,KBD_MOD_CTRL,EnableScreenShot); +#endif + const char * scaler=section->Get_string("scaler"); + if (!stricmp(scaler,"none")) render.op.want_type=OP_None; + else if (!stricmp(scaler,"scale2x")) render.op.want_type=OP_Scale2x; + else { + render.op.want_type=OP_None; + LOG_MSG("Illegal scaler type %s,falling back to none.",scaler); + } KEYBOARD_AddEvent(KBD_f7,KBD_MOD_CTRL,DecreaseFrameSkip); KEYBOARD_AddEvent(KBD_f8,KBD_MOD_CTRL,IncreaseFrameSkip); } diff --git a/src/gui/render_normal.h b/src/gui/render_normal.h new file mode 100644 index 00000000..5d4beeee --- /dev/null +++ b/src/gui/render_normal.h @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2002-2003 The DOSBox Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Library General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#define LOOPSIZE 4 + +#define SIZE_8 1 +#define SIZE_16 2 +#define SIZE_32 4 + +#define MAKE_8(FROM) Bit8u val=*(Bit8u *)(FROM); +#define MAKE_16(FROM) Bit16u val=render.pal.lookup.bpp16[*(Bit8u *)(FROM)]; +#define MAKE_32(FROM) Bit32u val=render.pal.lookup.bpp32[*(Bit8u *)(FROM)]; + +#define SAVE_8(WHERE) *(Bit8u *)(WHERE)=val; +#define SAVE_16(WHERE) *(Bit16u *)(WHERE)=val; +#define SAVE_32(WHERE) *(Bit32u *)(WHERE)=val; + +#define LINES_DN 1 +#define LINES_DW 1 +#define LINES_DH 2 +#define LINES_DB 2 + +#define PIXELS_DN 1 +#define PIXELS_DW 2 +#define PIXELS_DH 1 +#define PIXELS_DB 2 + +#define NORMAL_DN(BPP,FROM,DEST) \ + MAKE_ ## BPP(FROM); \ + SAVE_ ## BPP(DEST); \ + +#define NORMAL_DW(BPP,FROM,DEST) \ + MAKE_ ## BPP (FROM); \ + SAVE_ ## BPP (DEST); \ + SAVE_ ## BPP (DEST + SIZE_ ## BPP); \ + +#define NORMAL_DH(BPP,FROM,DEST) \ + MAKE_ ## BPP (FROM); \ + SAVE_ ## BPP (DEST); \ + SAVE_ ## BPP ((DEST) + render.op.pitch); \ + + +#define NORMAL_DB(BPP,FROM,DEST) \ + MAKE_ ## BPP (FROM); \ + SAVE_ ## BPP (DEST); \ + SAVE_ ## BPP ((DEST)+SIZE_ ## BPP ); \ + SAVE_ ## BPP ((DEST)+render.op.pitch ); \ + SAVE_ ## BPP ((DEST)+render.op.pitch+SIZE_ ## BPP ); \ + + +#define NORMAL_LOOP(COUNT,FUNC,BPP) \ + if (COUNT>0) {NORMAL_ ## FUNC (BPP,(src+0),(dest+0 * PIXELS_ ## FUNC * SIZE_ ## BPP )) }\ + if (COUNT>1) {NORMAL_ ## FUNC (BPP,(src+1),(dest+1 * PIXELS_ ## FUNC * SIZE_ ## BPP )) }\ + if (COUNT>2) {NORMAL_ ## FUNC (BPP,(src+2),(dest+2 * PIXELS_ ## FUNC * SIZE_ ## BPP )) }\ + if (COUNT>3) {NORMAL_ ## FUNC (BPP,(src+3),(dest+3 * PIXELS_ ## FUNC * SIZE_ ## BPP )) }\ + if (COUNT>4) {NORMAL_ ## FUNC (BPP,(src+4),(dest+4 * PIXELS_ ## FUNC * SIZE_ ## BPP )) }\ + if (COUNT>5) {NORMAL_ ## FUNC (BPP,(src+5),(dest+5 * PIXELS_ ## FUNC * SIZE_ ## BPP )) }\ + if (COUNT>6) {NORMAL_ ## FUNC (BPP,(src+6),(dest+6 * PIXELS_ ## FUNC * SIZE_ ## BPP )) }\ + if (COUNT>7) {NORMAL_ ## FUNC (BPP,(src+7),(dest+7 * PIXELS_ ## FUNC * SIZE_ ## BPP )) }\ + +#define MAKENORMAL(FUNC,BPP) \ +static void Normal_ ## FUNC ## _ ##BPP(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { \ + Bit8u * dest=(Bit8u *)render.op.pixels+y*LINES_ ## FUNC*render.op.pitch+x*PIXELS_ ## FUNC * SIZE_ ## BPP; \ + Bitu next_src=render.src.pitch-dx; \ + Bitu next_dest=(LINES_ ## FUNC*render.op.pitch) - (dx*PIXELS_ ## FUNC * SIZE_ ## BPP); \ + dx/=LOOPSIZE; \ + dy--; \ + for (;dy>0;dy--) { \ + for (Bitu tempx=dx;tempx>0;tempx--) { \ + NORMAL_LOOP(LOOPSIZE,FUNC,BPP); \ + src+=LOOPSIZE;dest+=LOOPSIZE*PIXELS_ ## FUNC * SIZE_ ## BPP; \ + } \ + src+=next_src;dest+=next_dest; \ + } \ +} + +MAKENORMAL(DW,8); +MAKENORMAL(DB,8); + +MAKENORMAL(DN,16); +MAKENORMAL(DW,16); +MAKENORMAL(DH,16); +MAKENORMAL(DB,16); + +MAKENORMAL(DN,32); +MAKENORMAL(DW,32); +MAKENORMAL(DH,32); +MAKENORMAL(DB,32); + +/* Special versions for the 8-bit ones that can do direct line copying */ + +static void Normal_DN_8(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { + Bit8u * dest=(Bit8u *)render.op.pixels+y*render.op.pitch+x; + Bitu next_src=render.src.pitch-dx; + Bitu next_dest=render.op.pitch-dx; + Bitu rem=dx&3;dx>>=2; + for (;dy>0;dy--) { + Bitu tempx; + for (tempx=dx;tempx>0;tempx--) { + Bit32u temp=*(Bit32u *)src;src+=4; + *(Bit32u *)dest=temp; + dest+=4; + } + for (tempx=rem;tempx>0;tempx--) { + *dest++=*src++; + } + src+=next_src;dest+=next_dest; + } +} + +static void Normal_DH_8(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { + Bit8u * dest=(Bit8u *)render.op.pixels+2*y*render.op.pitch+x; + Bitu next_src=render.src.pitch-dx; + Bitu next_dest=(2*render.op.pitch)-dx; + Bitu rem=dx&3;dx>>=2; + for (;dy>0;dy--) { + Bitu tempx; + for (tempx=dx;tempx>0;tempx--) { + Bit32u temp=*(Bit32u *)src;src+=4; + *(Bit32u *)dest=temp; + *(Bit32u *)(dest+render.op.pitch)=temp; + dest+=4; + } + for (tempx=rem;tempx>0;tempx--) { + *dest=*src; + *(dest+render.op.pitch)=*src; + dest++; + } + src+=next_src;dest+=next_dest; + } +} + +static RENDER_Part_Handler Render_Normal_8_Table[4]= { + Normal_DN_8,Normal_DW_8,Normal_DH_8,Normal_DB_8, +}; + +static RENDER_Part_Handler Render_Normal_16_Table[4]= { + Normal_DN_16,Normal_DW_16,Normal_DH_16,Normal_DB_16, +}; + +static RENDER_Part_Handler Render_Normal_32_Table[4]= { + Normal_DN_32,Normal_DW_32,Normal_DH_32,Normal_DB_32, +}; + +static void Render_Normal_CallBack(Bitu width,Bitu height,Bitu bpp,Bitu pitch,Bitu flags) { + if (!(flags & MODE_SET)) return; + render.op.width=width; + render.op.height=height; + render.op.bpp=bpp; + render.op.pitch=pitch; + render.op.type=OP_None; + switch (bpp) { + case 8: + render.op.part_handler=Render_Normal_8_Table[render.src.flags]; + break; + case 16: + render.op.part_handler=Render_Normal_16_Table[render.src.flags]; + break; + case 32: + render.op.part_handler=Render_Normal_32_Table[render.src.flags]; + break; + default: + E_Exit("RENDER:Unsupported display depth of %d",bpp); + break; + } + RENDER_ResetPal(); +} diff --git a/src/gui/render_scale2x.h b/src/gui/render_scale2x.h new file mode 100644 index 00000000..e439bd11 --- /dev/null +++ b/src/gui/render_scale2x.h @@ -0,0 +1,545 @@ +/* + * This file is part of the Scale2x project. + * + * Copyright (C) 2001-2002 Andrea Mazzoleni + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * In addition, as a special exception, Andrea Mazzoleni + * gives permission to link the code of this program with + * the MAME library (or with modified versions of MAME that use the + * same license as MAME), and distribute linked combinations including + * the two. You must obey the GNU General Public License in all + * respects for all of the code used other than MAME. If you modify + * this file, you may extend this exception to your version of the + * file, but you are not obligated to do so. If you do not wish to + * do so, delete this exception statement from your version. + */ + +/* + * This file contains a C and MMX implentation of the Scale2x effect. + * + * You can found an high level description of the effect at : + * + * http://scale2x.sourceforge.net/scale2x.html + * + * Alternatively at the previous license terms, you are allowed to use this + * code in your program with these conditions: + * - the program is not used in commercial activities. + * - the whole source code of the program is released with the binary. + * - derivative works of the program are allowed. + */ + +/* + * Made some changes to only support the 8-bit version. + * Also added mulitple destination bpp targets. + */ + +#ifndef __SCALE2X_H +#define __SCALE2X_H + +#include + +/***************************************************************************/ +/* basic types */ + +typedef Bit8u scale2x_uint8; +typedef Bit16u scale2x_uint16; +typedef Bit32u scale2x_uint32; + +#if !defined(__GNUC__) && !defined(__i386__) + +#define SCALE2X_NORMAL 1 + +static void scale2x_line_8(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count) +{ + assert(count >= 2); + + /* first pixel */ + dst0[0] = src1[0]; + dst1[0] = src1[0]; + if (src1[1] == src0[0] && src2[0] != src0[0]) + dst0[1] = src0[0]; + else + dst0[1] = src1[0]; + if (src1[1] == src2[0] && src0[0] != src2[0]) + dst1[1] = src2[0]; + else + dst1[1] = src1[0]; + ++src0; + ++src1; + ++src2; + dst0 += 2; + dst1 += 2; + + /* central pixels */ + count -= 2; + while (count) { + if (src1[-1] == src0[0] && src2[0] != src0[0] && src1[1] != src0[0]) + dst0[0] = src0[0]; + else + dst0[0] = src1[0]; + if (src1[1] == src0[0] && src2[0] != src0[0] && src1[-1] != src0[0]) + dst0[1] = src0[0]; + else + dst0[1] = src1[0]; + + if (src1[-1] == src2[0] && src0[0] != src2[0] && src1[1] != src2[0]) + dst1[0] = src2[0]; + else + dst1[0] = src1[0]; + if (src1[1] == src2[0] && src0[0] != src2[0] && src1[-1] != src2[0]) + dst1[1] = src2[0]; + else + dst1[1] = src1[0]; + + ++src0; + ++src1; + ++src2; + dst0 += 2; + dst1 += 2; + --count; + } + + /* last pixel */ + if (src1[-1] == src0[0] && src2[0] != src0[0]) + dst0[0] = src0[0]; + else + dst0[0] = src1[0]; + if (src1[-1] == src2[0] && src0[0] != src2[0]) + dst1[0] = src2[0]; + else + dst1[0] = src1[0]; + dst0[1] = src1[0]; + dst1[1] = src1[0]; +} + +static void scale2x_line_16(scale2x_uint16* dst0, scale2x_uint16* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count) +{ + assert(count >= 2); + + /* first pixel */ + dst0[0] = render.pal.lookup.bpp16[src1[0]]; + dst1[0] = render.pal.lookup.bpp16[src1[0]]; + if (src1[1] == src0[0] && src2[0] != src0[0]) + dst0[1] = render.pal.lookup.bpp16[src0[0]]; + else + dst0[1] = render.pal.lookup.bpp16[src1[0]]; + if (src1[1] == src2[0] && src0[0] != src2[0]) + dst1[1] = render.pal.lookup.bpp16[src2[0]]; + else + dst1[1] = render.pal.lookup.bpp16[src1[0]]; + ++src0; + ++src1; + ++src2; + dst0 += 2; + dst1 += 2; + + /* central pixels */ + count -= 2; + while (count) { + if (src1[-1] == src0[0] && src2[0] != src0[0] && src1[1] != src0[0]) + dst0[0] = render.pal.lookup.bpp16[src0[0]]; + else + dst0[0] = render.pal.lookup.bpp16[src1[0]]; + if (src1[1] == src0[0] && src2[0] != src0[0] && src1[-1] != src0[0]) + dst0[1] = render.pal.lookup.bpp16[src0[0]]; + else + dst0[1] = render.pal.lookup.bpp16[src1[0]]; + + if (src1[-1] == src2[0] && src0[0] != src2[0] && src1[1] != src2[0]) + dst1[0] = render.pal.lookup.bpp16[src2[0]]; + else + dst1[0] = render.pal.lookup.bpp16[src1[0]]; + if (src1[1] == src2[0] && src0[0] != src2[0] && src1[-1] != src2[0]) + dst1[1] = render.pal.lookup.bpp16[src2[0]]; + else + dst1[1] = render.pal.lookup.bpp16[src1[0]]; + + ++src0; + ++src1; + ++src2; + dst0 += 2; + dst1 += 2; + --count; + } + + /* last pixel */ + if (src1[-1] == src0[0] && src2[0] != src0[0]) + dst0[0] = render.pal.lookup.bpp16[src0[0]]; + else + dst0[0] = render.pal.lookup.bpp16[src1[0]]; + if (src1[-1] == src2[0] && src0[0] != src2[0]) + dst1[0] = render.pal.lookup.bpp16[src2[0]]; + else + dst1[0] = render.pal.lookup.bpp16[src1[0]]; + dst0[1] = render.pal.lookup.bpp16[src1[0]]; + dst1[1] = render.pal.lookup.bpp16[src1[0]]; +} + +static void scale2x_line_32(scale2x_uint32* dst0, scale2x_uint32* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count) +{ + assert(count >= 2); + + /* first pixel */ + dst0[0] = render.pal.lookup.bpp32[src1[0]]; + dst1[0] = render.pal.lookup.bpp32[src1[0]]; + if (src1[1] == src0[0] && src2[0] != src0[0]) + dst0[1] = render.pal.lookup.bpp32[src0[0]]; + else + dst0[1] = render.pal.lookup.bpp32[src1[0]]; + if (src1[1] == src2[0] && src0[0] != src2[0]) + dst1[1] = render.pal.lookup.bpp32[src2[0]]; + else + dst1[1] = render.pal.lookup.bpp32[src1[0]]; + ++src0; + ++src1; + ++src2; + dst0 += 2; + dst1 += 2; + + /* central pixels */ + count -= 2; + while (count) { + if (src1[-1] == src0[0] && src2[0] != src0[0] && src1[1] != src0[0]) + dst0[0] = render.pal.lookup.bpp32[src0[0]]; + else + dst0[0] = render.pal.lookup.bpp32[src1[0]]; + if (src1[1] == src0[0] && src2[0] != src0[0] && src1[-1] != src0[0]) + dst0[1] = render.pal.lookup.bpp32[src0[0]]; + else + dst0[1] = render.pal.lookup.bpp32[src1[0]]; + + if (src1[-1] == src2[0] && src0[0] != src2[0] && src1[1] != src2[0]) + dst1[0] = render.pal.lookup.bpp32[src2[0]]; + else + dst1[0] = render.pal.lookup.bpp32[src1[0]]; + if (src1[1] == src2[0] && src0[0] != src2[0] && src1[-1] != src2[0]) + dst1[1] = render.pal.lookup.bpp32[src2[0]]; + else + dst1[1] = render.pal.lookup.bpp32[src1[0]]; + + ++src0; + ++src1; + ++src2; + dst0 += 2; + dst1 += 2; + --count; + } + + /* last pixel */ + if (src1[-1] == src0[0] && src2[0] != src0[0]) + dst0[0] = render.pal.lookup.bpp32[src0[0]]; + else + dst0[0] = render.pal.lookup.bpp32[src1[0]]; + if (src1[-1] == src2[0] && src0[0] != src2[0]) + dst1[0] = render.pal.lookup.bpp32[src2[0]]; + else + dst1[0] = render.pal.lookup.bpp32[src1[0]]; + dst0[1] = render.pal.lookup.bpp32[src1[0]]; + dst1[1] = render.pal.lookup.bpp32[src1[0]]; +} + +static void Scale2x_8(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { + if (dy<3) return; + Bit8u * dest=(Bit8u *)render.op.pixels+2*y*render.op.pitch; + /* First line */ + scale2x_line_8(dest,dest+render.op.pitch,src,src,src+render.src.pitch,dx); + dest+=render.op.pitch*2; + src+=render.src.pitch; + dy-=2; + /* Middle part */ + for (;dy>0;dy--) { + scale2x_line_8(dest,dest+render.op.pitch,src,src+render.src.pitch,src+2*render.src.pitch,dx); + dest+=render.op.pitch*2; + src+=render.src.pitch; + } + /* Last Line */ + scale2x_line_8(dest,dest+render.op.pitch,src,src+render.src.pitch,src+render.src.pitch,dx); +} + +static void Scale2x_16(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { + if (dy<3) return; + Bit8u * dest=(Bit8u *)render.op.pixels+2*y*render.op.pitch; + /* First line */ + scale2x_line_16((Bit16u *)dest,(Bit16u *)(dest+render.op.pitch),src,src,src+render.src.pitch,dx); + dest+=render.op.pitch*2; + src+=render.src.pitch; + dy-=2; + /* Middle part */ + for (;dy>0;dy--) { + scale2x_line_16((Bit16u *)dest,(Bit16u *)(dest+render.op.pitch),src,src+render.src.pitch,src+2*render.src.pitch,dx); + dest+=render.op.pitch*2; + src+=render.src.pitch; + } + /* Last Line */ + scale2x_line_16((Bit16u *)dest,(Bit16u *)(dest+render.op.pitch),src,src+render.src.pitch,src+render.src.pitch,dx); +} + +static void Scale2x_32(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { + if (dy<3) return; + Bit8u * dest=(Bit8u *)render.op.pixels+2*y*render.op.pitch; + /* First line */ + scale2x_line_32((Bit32u *)dest,(Bit32u *)(dest+render.op.pitch),src,src,src+render.src.pitch,dx); + dest+=render.op.pitch*2; + src+=render.src.pitch; + dy-=2; + /* Middle part */ + for (;dy>0;dy--) { + scale2x_line_32((Bit32u *)dest,(Bit32u *)(dest+render.op.pitch),src,src+render.src.pitch,src+2*render.src.pitch,dx); + dest+=render.op.pitch*2; + src+=render.src.pitch; + } + /* Last Line */ + scale2x_line_32((Bit32u *)dest,(Bit32u *)(dest+render.op.pitch),src,src+render.src.pitch,src+render.src.pitch,dx); +} + +#else + +#define SCALE2X_MMX 1 + +static __inline__ void scale2x_8_mmx_single(scale2x_uint8* dst, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count) +{ + assert(count >= 16); + assert(count % 8 == 0); + + /* always do the first and last run */ + count -= 2*8; + + __asm__ __volatile__( +/* first run */ + /* set the current, current_pre, current_next registers */ + "movq 0(%1),%%mm0\n" + "movq 0(%1),%%mm7\n" + "movq 8(%1),%%mm1\n" + "psllq $56,%%mm0\n" + "psllq $56,%%mm1\n" + "psrlq $56,%%mm0\n" + "movq %%mm7,%%mm2\n" + "movq %%mm7,%%mm3\n" + "psllq $8,%%mm2\n" + "psrlq $8,%%mm3\n" + "por %%mm2,%%mm0\n" + "por %%mm3,%%mm1\n" + + /* current_upper */ + "movq (%0),%%mm6\n" + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "movq %%mm0,%%mm3\n" + "movq %%mm1,%%mm5\n" + "pcmpeqb %%mm6,%%mm2\n" + "pcmpeqb %%mm6,%%mm4\n" + "pcmpeqb (%2),%%mm3\n" + "pcmpeqb (%2),%%mm5\n" + "pandn %%mm2,%%mm3\n" + "pandn %%mm4,%%mm5\n" + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "pcmpeqb %%mm1,%%mm2\n" + "pcmpeqb %%mm0,%%mm4\n" + "pandn %%mm3,%%mm2\n" + "pandn %%mm5,%%mm4\n" + "movq %%mm2,%%mm3\n" + "movq %%mm4,%%mm5\n" + "pand %%mm6,%%mm2\n" + "pand %%mm6,%%mm4\n" + "pandn %%mm7,%%mm3\n" + "pandn %%mm7,%%mm5\n" + "por %%mm3,%%mm2\n" + "por %%mm5,%%mm4\n" + + /* set *dst */ + "movq %%mm2,%%mm3\n" + "punpcklbw %%mm4,%%mm2\n" + "punpckhbw %%mm4,%%mm3\n" + "movq %%mm2,(%3)\n" + "movq %%mm3,8(%3)\n" + + /* next */ + "addl $8,%0\n" + "addl $8,%1\n" + "addl $8,%2\n" + "addl $16,%3\n" + +/* central runs */ + "shrl $3,%4\n" + "jz 1f\n" + + "0:\n" + + /* set the current, current_pre, current_next registers */ + "movq -8(%1),%%mm0\n" + "movq (%1),%%mm7\n" + "movq 8(%1),%%mm1\n" + "psrlq $56,%%mm0\n" + "psllq $56,%%mm1\n" + "movq %%mm7,%%mm2\n" + "movq %%mm7,%%mm3\n" + "psllq $8,%%mm2\n" + "psrlq $8,%%mm3\n" + "por %%mm2,%%mm0\n" + "por %%mm3,%%mm1\n" + + /* current_upper */ + "movq (%0),%%mm6\n" + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "movq %%mm0,%%mm3\n" + "movq %%mm1,%%mm5\n" + "pcmpeqb %%mm6,%%mm2\n" + "pcmpeqb %%mm6,%%mm4\n" + "pcmpeqb (%2),%%mm3\n" + "pcmpeqb (%2),%%mm5\n" + "pandn %%mm2,%%mm3\n" + "pandn %%mm4,%%mm5\n" + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "pcmpeqb %%mm1,%%mm2\n" + "pcmpeqb %%mm0,%%mm4\n" + "pandn %%mm3,%%mm2\n" + "pandn %%mm5,%%mm4\n" + "movq %%mm2,%%mm3\n" + "movq %%mm4,%%mm5\n" + "pand %%mm6,%%mm2\n" + "pand %%mm6,%%mm4\n" + "pandn %%mm7,%%mm3\n" + "pandn %%mm7,%%mm5\n" + "por %%mm3,%%mm2\n" + "por %%mm5,%%mm4\n" + + /* set *dst */ + "movq %%mm2,%%mm3\n" + "punpcklbw %%mm4,%%mm2\n" + "punpckhbw %%mm4,%%mm3\n" + "movq %%mm2,(%3)\n" + "movq %%mm3,8(%3)\n" + + /* next */ + "addl $8,%0\n" + "addl $8,%1\n" + "addl $8,%2\n" + "addl $16,%3\n" + + "decl %4\n" + "jnz 0b\n" + "1:\n" + +/* final run */ + /* set the current, current_pre, current_next registers */ + "movq (%1),%%mm1\n" + "movq (%1),%%mm7\n" + "movq -8(%1),%%mm0\n" + "psrlq $56,%%mm1\n" + "psrlq $56,%%mm0\n" + "psllq $56,%%mm1\n" + "movq %%mm7,%%mm2\n" + "movq %%mm7,%%mm3\n" + "psllq $8,%%mm2\n" + "psrlq $8,%%mm3\n" + "por %%mm2,%%mm0\n" + "por %%mm3,%%mm1\n" + + /* current_upper */ + "movq (%0),%%mm6\n" + + /* compute the upper-left pixel for dst on %%mm2 */ + /* compute the upper-right pixel for dst on %%mm4 */ + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "movq %%mm0,%%mm3\n" + "movq %%mm1,%%mm5\n" + "pcmpeqb %%mm6,%%mm2\n" + "pcmpeqb %%mm6,%%mm4\n" + "pcmpeqb (%2),%%mm3\n" + "pcmpeqb (%2),%%mm5\n" + "pandn %%mm2,%%mm3\n" + "pandn %%mm4,%%mm5\n" + "movq %%mm0,%%mm2\n" + "movq %%mm1,%%mm4\n" + "pcmpeqb %%mm1,%%mm2\n" + "pcmpeqb %%mm0,%%mm4\n" + "pandn %%mm3,%%mm2\n" + "pandn %%mm5,%%mm4\n" + "movq %%mm2,%%mm3\n" + "movq %%mm4,%%mm5\n" + "pand %%mm6,%%mm2\n" + "pand %%mm6,%%mm4\n" + "pandn %%mm7,%%mm3\n" + "pandn %%mm7,%%mm5\n" + "por %%mm3,%%mm2\n" + "por %%mm5,%%mm4\n" + + /* set *dst */ + "movq %%mm2,%%mm3\n" + "punpcklbw %%mm4,%%mm2\n" + "punpckhbw %%mm4,%%mm3\n" + "movq %%mm2,(%3)\n" + "movq %%mm3,8(%3)\n" + + : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (dst), "+r" (count) + : + : "cc" + ); +} + +static void scale2x_line_8_mmx(scale2x_uint8* dst0, scale2x_uint8* dst1, const scale2x_uint8* src0, const scale2x_uint8* src1, const scale2x_uint8* src2, unsigned count) +{ + assert(count >= 16); + assert(count % 8 == 0); + + scale2x_8_mmx_single(dst0, src0, src1, src2, count); + scale2x_8_mmx_single(dst1, src2, src1, src0, count); +} + +#endif + +static void Render_Scale2x_CallBack(Bitu width,Bitu height,Bitu bpp,Bitu pitch,Bitu flags) { + if (!(flags & MODE_SET)) return; + render.op.width=width; + render.op.height=height; + render.op.bpp=bpp; + render.op.pitch=pitch; + render.op.type=OP_Scale2x; + render.op.part_handler=Scale2x_8; +#if defined(SCALE2X_NORMAL) + switch (bpp) { + case 8: + render.op.part_handler=Scale2x_8; + break; + case 16: + render.op.part_handler=Scale2x_16;; + break; + case 32: + render.op.part_handler=Scale2x_32; + break; + default: + E_Exit("RENDER:Unsupported display depth of %d",bpp); + break; + } +#elif defined(SCALE2X_MMX) + assert (bpp==8); + render.op.part_handler=Scale2x_8_mmx; +#endif + RENDER_ResetPal(); +} + +#endif diff --git a/src/gui/render_support.h b/src/gui/render_support.h deleted file mode 100644 index 3d91cb0f..00000000 --- a/src/gui/render_support.h +++ /dev/null @@ -1,228 +0,0 @@ -static void Render_Normal_8_None(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { - Bit8u * dest=(Bit8u *)render.op.pixels+y*render.op.pitch+x; - Bitu next_src=render.src.pitch-dx; - Bitu next_dest=render.op.pitch-dx; - Bitu rem=dx&3;dx>>=2; - for (;dy>0;dy--) { - Bitu tempx; - for (tempx=dx;tempx>0;tempx--) { - Bit32u temp=*(Bit32u *)src;src+=4; - *(Bit32u *)dest=temp; - dest+=4; - } - for (tempx=rem;tempx>0;tempx--) { - *dest++=*src++; - } - src+=next_src;dest+=next_dest; - } -} - -static void Render_Normal_8_DoubleWidth(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { - Bit8u * dest=(Bit8u *)render.op.pixels+y*render.op.pitch+x*2; - Bitu next_src=render.src.pitch-dx; - Bitu next_dest=render.op.pitch-dx*2; - for (;dy>0;dy--) { - for (Bitu tempx=dx;tempx>0;tempx--) { - *dest=*src;*(dest+1)=*src; - src++;dest+=2; - } - src+=next_src;dest+=next_dest; - } -} - -static void Render_Normal_8_DoubleHeight(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { - Bit8u * dest=(Bit8u *)render.op.pixels+2*y*render.op.pitch+x; - Bitu next_src=render.src.pitch-dx; - Bitu next_dest=(2*render.op.pitch)-dx; - Bitu rem=dx&3;dx>>=2; - for (;dy>0;dy--) { - Bitu tempx; - for (tempx=dx;tempx>0;tempx--) { - Bit32u temp=*(Bit32u *)src;src+=4; - *(Bit32u *)dest=temp; - *(Bit32u *)(dest+render.op.pitch)=temp; - dest+=4; - } - for (tempx=rem;tempx>0;tempx--) { - *dest=*src; - *(dest+render.op.pitch)=*src; - dest++; - } - src+=next_src;dest+=next_dest; - } -} - -static void Render_Normal_8_DoubleBoth(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { - Bit8u * dest=(Bit8u *)render.op.pixels+y*render.op.pitch+x; - Bitu next_src=render.src.pitch-dx; - Bitu next_dest=(2*render.op.pitch)-dx*2; - for (;dy>0;dy--) { - for (Bitu tempx=dx;tempx>0;tempx--) { - Bit8u val=src[0];src++; - dest[0]=val;dest[1]=val; - dest[render.op.pitch]=val;dest[render.op.pitch+1]=val; - dest+=2; - } - src+=next_src;dest+=next_dest; - } -} - -static void Render_Normal_16_None(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { - Bit8u * dest=(Bit8u *)render.op.pixels+y*render.op.pitch+x; - Bitu next_src=render.src.pitch-dx; - Bitu next_dest=render.op.pitch-dx*2; - for (;dy>0;dy--) { - for (Bitu tempx=dx;tempx>0;tempx--) { - Bit16u val=render.pal.lookup.bpp16[src[0]];src++; - *(Bit16u *)dest=val; - dest+=2; - } - src+=next_src;dest+=next_dest; - } -} - -static void Render_Normal_16_DoubleWidth(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { - Bit8u * dest=(Bit8u *)render.op.pixels+y*render.op.pitch+x*4; - Bitu next_src=render.src.pitch-dx; - Bitu next_dest=render.op.pitch-dx*4; - for (;dy>0;dy--) { - for (Bitu tempx=dx;tempx>0;tempx--) { - Bit16u val=render.pal.lookup.bpp16[src[0]];src++; - *(Bit16u *)dest=val; - *(Bit16u *)(dest+2)=val; - dest+=4; - } - src+=next_src;dest+=next_dest; - } -} - -static void Render_Normal_16_DoubleHeight(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { - Bit8u * dest=(Bit8u *)render.op.pixels+2*y*render.op.pitch+x*2; - Bitu next_src=render.src.pitch-dx; - Bitu next_dest=(2*render.op.pitch)-dx*2; - for (;dy>0;dy--) { - for (Bitu tempx=dx;tempx>0;tempx--) { - Bit16u val=render.pal.lookup.bpp16[src[0]];src++; - *(Bit16u *)dest=val; - *(Bit16u *)(dest+render.op.pitch)=val; - dest+=2; - } - src+=next_src;dest+=next_dest; - } -} - -static void Render_Normal_16_DoubleBoth(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { - Bit8u * dest=(Bit8u *)render.op.pixels+2*y*render.op.pitch+x*4; - Bitu next_src=render.src.pitch-dx; - Bitu next_dest=(2*render.op.pitch)-dx*4; - for (;dy>0;dy--) { - for (Bitu tempx=dx;tempx>0;tempx--) { - Bit16u val=render.pal.lookup.bpp16[src[0]];src++; - *(Bit16u *)(dest+0)=val; - *(Bit16u *)(dest+2)=val; - *(Bit16u *)(dest+render.op.pitch)=val; - *(Bit16u *)(dest+render.op.pitch+2)=val; - dest+=4; - } - src+=next_src;dest+=next_dest; - } -} - - -static void Render_Normal_32_None(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { - Bit8u * dest=(Bit8u *)render.op.pixels+y*render.op.pitch+x*4; - Bitu next_src=render.src.pitch-dx; - Bitu next_dest=render.op.pitch-dx*4; - for (;dy>0;dy--) { - for (Bitu tempx=dx;tempx>0;tempx--) { - Bit32u val=render.pal.lookup.bpp32[src[0]];src++; - *(Bit32u *)dest=val; - dest+=4; - } - src+=next_src;dest+=next_dest; - } -} - -static void Render_Normal_32_DoubleWidth(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { - Bit8u * dest=(Bit8u *)render.op.pixels+y*render.op.pitch+x*8; - Bitu next_src=render.src.pitch-dx; - Bitu next_dest=render.op.pitch-dx*8; - for (;dy>0;dy--) { - for (Bitu tempx=dx;tempx>0;tempx--) { - Bit32u val=render.pal.lookup.bpp32[src[0]];src++; - *(Bit32u *)dest=val; - *(Bit32u *)(dest+4)=val; - dest+=8; - } - src+=next_src;dest+=next_dest; - } -} - -static void Render_Normal_32_DoubleHeight(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { - Bit8u * dest=(Bit8u *)render.op.pixels+2*y*render.op.pitch+x*4; - Bitu next_src=render.src.pitch-dx; - Bitu next_dest=(2*render.op.pitch)-dx*4; - for (;dy>0;dy--) { - for (Bitu tempx=dx;tempx>0;tempx--) { - Bit32u val=render.pal.lookup.bpp32[src[0]];src++; - *(Bit32u *)dest=val; - *(Bit32u *)(dest+render.op.pitch)=val; - dest+=4; - } - src+=next_src;dest+=next_dest; - } -} - -static void Render_Normal_32_DoubleBoth(Bit8u * src,Bitu x,Bitu y,Bitu dx,Bitu dy) { - Bit8u * dest=(Bit8u *)render.op.pixels+2*y*render.op.pitch+x*8; - Bitu next_src=render.src.pitch-dx; - Bitu next_dest=(2*render.op.pitch)-dx*8; - for (;dy>0;dy--) { - for (Bitu tempx=dx;tempx>0;tempx--) { - Bit32u val=render.pal.lookup.bpp32[src[0]];src++; - *(Bit32u *)(dest+0)=val; - *(Bit32u *)(dest+4)=val; - *(Bit32u *)(dest+render.op.pitch)=val; - *(Bit32u *)(dest+render.op.pitch+4)=val; - dest+=8; - } - src+=next_src;dest+=next_dest; - } -} - - -static RENDER_Part_Handler Render_Normal_8_Table[4]= { - Render_Normal_8_None,Render_Normal_8_DoubleWidth,Render_Normal_8_DoubleHeight,Render_Normal_8_DoubleBoth -}; - -static RENDER_Part_Handler Render_Normal_16_Table[4]= { - Render_Normal_16_None,Render_Normal_16_DoubleWidth,Render_Normal_16_DoubleHeight,Render_Normal_16_DoubleBoth -}; - -static RENDER_Part_Handler Render_Normal_32_Table[4]= { - Render_Normal_32_None,Render_Normal_32_DoubleWidth,Render_Normal_32_DoubleHeight,Render_Normal_32_DoubleBoth -}; - - -static void Render_Normal_CallBack(Bitu width,Bitu height,Bitu bpp,Bitu pitch,Bitu flags) { - if (!(flags & MODE_SET)) return; - render.op.width=width; - render.op.height=height; - render.op.bpp=bpp; - render.op.pitch=pitch; - switch (bpp) { - case 8: - render.src.part_handler=Render_Normal_8_Table[render.src.flags]; - break; - case 16: - render.src.part_handler=Render_Normal_16_Table[render.src.flags]; - break; - case 32: - render.src.part_handler=Render_Normal_32_Table[render.src.flags]; - break; - default: - E_Exit("RENDER:Unsupported display depth of %d",bpp); - break; - } - RENDER_ResetPal(); -}