Date: Thu, 21 May 1998 21:46:04 +0200 (MET DST)
From: Geert Uytterhoeven <geert@thomas.kotnet.org>
To: Linux/m68k <linux-m68k@lists.linux-m68k.org>
cc: Bernd Harries <bharries@vossnet.de>
Subject: L68K: Acceleration for ATI boards
Sender: owner-linux-m68k@phil.uni-sb.de


Finally I got acceleration working on my PPC. Let's hope it also works on m68k
(Bernd?).

Moreover this is the first frame buffer device that supports bitblt
acceleration in not only 8 bpp, but also 16 and 32 bpp! Especially on machines
with slow CPUs (m68k) and slow buses (ISA) this should make a big difference.

Some benchmark data (PCI ATI RAGE II+DVD, 4 MB SGRAM, 1024x768, 75 Hz, 8x16
font) for scrolling:

  o 8 bpp:	memcpy: 20.34s		bitblt:  1.66s
  o 16 bpp:	memcpy: 41.52s		bitblt:  3.75s
  o 32 bpp:	memcpy: 90.67s		bitblt: 10.99s

The `bad' results for 32 bpp are mainly caused by the fact that SGRAM is not
dual ported (unlike VRAM).

However, this is not the maximum you can achieve. By combining bitblt with
panning on a large virtual screen (1024x4000), I got a speed up of more than
100, compared to the original CPU-does-memcpy scheme:

  o 8bpp pan:	memcpy: 0.41s		bitblt:  0.19s

--- m68k-2.1.101/drivers/video/aty.h.orig	Thu May 21 21:28:45 1998
+++ m68k-2.1.101/drivers/video/aty.h	Thu May 21 21:28:21 1998
@@ -223,8 +223,8 @@
 #define DP_BKGD_CLR		0x02C0	/* Dword offset 0_B0 */
 #define DP_FOG_CLR		0x02C4	/* Dword offset 0_B1 */
 #define DP_FRGD_CLR		0x02C4	/* Dword offset 0_B1 */
-#define DP_WRITE_MSK		0x02C8	/* Dword offset 0_B2 */
-#define DP_CHAIN_MSK		0x02CC	/* Dword offset 0_B3 */
+#define DP_WRITE_MASK		0x02C8	/* Dword offset 0_B2 */
+#define DP_CHAIN_MASK		0x02CC	/* Dword offset 0_B3 */
 #define DP_PIX_WIDTH		0x02D0	/* Dword offset 0_B4 */
 #define DP_MIX			0x02D4	/* Dword offset 0_B5 */
 #define DP_SRC			0x02D8	/* Dword offset 0_B6 */
@@ -238,7 +238,7 @@
 #define DP_SET_GUI_ENGINE	0x02FC	/* Dword offset 0_BF */
 
 #define CLR_CMP_CLR		0x0300	/* Dword offset 0_C0 */
-#define CLR_CMP_MSK		0x0304	/* Dword offset 0_C1 */
+#define CLR_CMP_MASK		0x0304	/* Dword offset 0_C1 */
 #define CLR_CMP_CNTL		0x0308	/* Dword offset 0_C2 */
 
 #define FIFO_STAT		0x0310	/* Dword offset 0_C4 */
@@ -936,30 +936,4 @@
 #define MACH64_NUM_CLOCKS	16
 #define MACH64_NUM_FREQS	50
 
-/* Wait until "v" queue entries are free */
-#define aty_WaitQueue(v, info) \
-{ \
-    while ((aty_ld_le32(FIFO_STAT, info) & 0xffff) > ((u16)(0x8000 >> (v)))); \
-}
-
-/* Wait until GP is idle and queue is empty */
-#define aty_WaitIdleEmpty(info) \
-{ \
-    aty_WaitQueue(16, info); \
-    while ((aty_ld_le32(GUI_STAT, info) & 1) != 0); \
-}
-
-#define SKIP_2(_v) ((((_v)<<1)&0xfff8)|((_v)&0x3)|(((_v)&0x80)>>5))
-
-#define MACH64_BIT_BLT(_srcx, _srcy, _dstx, _dsty, _w, _h, _dir, info) \
-{ \
-    aty_WaitQueue(5, info); \
-    aty_st_le32(SRC_Y_X, (((_srcx) << 16) | ((_srcy) & 0x0000ffff)), info); \
-    aty_st_le32(SRC_WIDTH1, (_w), info); \
-    aty_st_le32(DST_CNTL, (_dir), info); \
-    aty_st_le32(DST_Y_X, (((_dstx) << 16) | ((_dsty) & 0x0000ffff)), info); \
-    aty_st_le32(DST_HEIGHT_WIDTH, (((_w) << 16) | ((_h) & 0x0000ffff)), \
-		info); \
-}
 #endif /* REGMACH64_H */
-
--- m68k-2.1.101/drivers/video/atyfb.c.orig	Fri May 15 00:58:03 1998
+++ m68k-2.1.101/drivers/video/atyfb.c	Thu May 21 21:31:39 1998
@@ -21,15 +21,12 @@
  *  more details.
  */
 
-
 /******************************************************************************
 
   TODO:
 
     - support arbitrary video modes
 
-    - fix acceleration on PPC
-
 ******************************************************************************/
 
 #include <linux/config.h>
@@ -49,7 +46,7 @@
 #include <linux/pci.h>
 #include <linux/nvram.h>
 #ifdef CONFIG_FB_COMPAT_XPMAC
-#include <linux/vc_ioctl.h>
+#include <asm/vc_ioctl.h>
 #endif
 #include <asm/io.h>
 #if defined(CONFIG_PMAC) || defined(CONFIG_CHRP)
@@ -130,10 +127,6 @@
 static int default_vmode = VMODE_NVRAM;
 static int default_cmode = CMODE_NVRAM;
 
-static struct atyfb_par default_par;
-static struct atyfb_par current_par;
-
-
 #if defined(CONFIG_PMAC) || defined(CONFIG_CHRP)
 /*
  * Addresses in NVRAM where video mode and pixel size are stored.
@@ -254,6 +247,8 @@
     u32 total_vram;
     struct aty_cmap_regs *aty_cmap_regs;
     struct { u8 red, green, blue, pad; } palette[256];
+    struct atyfb_par default_par;
+    struct atyfb_par current_par;
 };
 
 #ifdef CONFIG_ATARI
@@ -366,6 +361,12 @@
 #ifdef CONFIG_FBCON_CFB8
 static struct display_switch fbcon_aty8;
 #endif
+#ifdef CONFIG_FBCON_CFB16
+static struct display_switch fbcon_aty16;
+#endif
+#ifdef CONFIG_FBCON_CFB32
+static struct display_switch fbcon_aty32;
+#endif
 
 
 #ifdef CONFIG_FB_COMPAT_XPMAC
@@ -417,9 +418,7 @@
     asm("lwbrx %0,%1,%2": "=r"(val):"r"(regindex), "r"(temp));
 #else
     temp = info->ati_regbase+regindex;
-    val = *((volatile u32 *)(temp));
-    val = (val>>16) | (val<<16);
-    val = ((val>>8) & 0x00ff00ff) | ((val<<8) & 0xff00ff00);
+    val = le32_to_cpu(*((volatile u32 *)(temp)));
 #endif
     return val;
 }
@@ -434,9 +433,7 @@
     asm("stwbrx %0,%1,%2": : "r"(val), "r"(regindex), "r"(temp):"memory");
 #else
     temp = info->ati_regbase+regindex;
-    val = (val>>16) | (val<<16);
-    val = ((val>>8) & 0x00ff00ff) | ((val<<8) & 0xff00ff00);
-    *((volatile u32 *)(temp)) = val;
+    *((volatile u32 *)(temp)) = cpu_to_le32(val);
 #endif
 }
 
@@ -452,9 +449,178 @@
     *(volatile u8 *)(info->ati_regbase+regindex) = val;
 }
 
+    /*
+     *  All writes to draw engine registers are automatically routed through a
+     *  32-bit-wide, 16-entry-deep command FIFO ...
+     *  Register writes to registers with DWORD offsets less than 40h are not
+     *  FIFOed.
+     *  (from Chapter 5 of the Mach64 Programmer's Guide)
+     */
+
+static inline void wait_for_fifo(u16 entries, struct fb_info_aty *info)
+{
+    while ((aty_ld_le32(FIFO_STAT, info) & 0xffff) >
+	   ((u32)(0x8000 >> entries)));
+}
+
+static inline void wait_for_idle(struct fb_info_aty *info)
+{
+    wait_for_fifo(16, info);
+    while ((aty_ld_le32(GUI_STAT, info) & 1)!= 0);
+}
+
+static void reset_engine(struct fb_info_aty *info)
+{
+    /* reset engine */
+    aty_st_le32(GEN_TEST_CNTL,
+		aty_ld_le32(GEN_TEST_CNTL, info) & ~GUI_ENGINE_ENABLE, info);
+    /* enable engine */
+    aty_st_le32(GEN_TEST_CNTL,
+		aty_ld_le32(GEN_TEST_CNTL, info) | GUI_ENGINE_ENABLE, info);
+    /* ensure engine is not locked up by clearing any FIFO or */
+    /* HOST errors */
+    aty_st_le32(BUS_CNTL, aty_ld_le32(BUS_CNTL, info) | BUS_HOST_ERR_ACK |
+			  BUS_FIFO_ERR_ACK, info);
+}
+
+static void init_engine(const struct atyfb_par *par, struct fb_info_aty *info)
+{
+    u32 pitch_value;
+
+    /* determine modal information from global mode structure */
+    pitch_value = par->vxres;
+
+#if 0
+    if (par->hw.gx.cmode == CMODE_24) {
+	/* In 24 bpp, the engine is in 8 bpp - this requires that all */
+	/* horizontal coordinates and widths must be adjusted */
+	pitch_value = pitch_value * 3;
+    }
+#endif
+
+    /* Reset engine, enable, and clear any engine errors */
+    reset_engine(info);
+    /* Ensure that vga page pointers are set to zero - the upper */
+    /* page pointers are set to 1 to handle overflows in the */
+    /* lower page */
+    aty_st_le32(MEM_VGA_WP_SEL, 0x00010000, info);
+    aty_st_le32(MEM_VGA_RP_SEL, 0x00010000, info);
+
+    /* ---- Setup standard engine context ---- */
+
+    /* All GUI registers here are FIFOed - therefore, wait for */
+    /* the appropriate number of empty FIFO entries */
+    wait_for_fifo(14, info);
+
+    /* enable all registers to be loaded for context loads */
+    aty_st_le32(CONTEXT_MASK, 0xFFFFFFFF, info);
+
+    /* set destination pitch to modal pitch, set offset to zero */
+    aty_st_le32(DST_OFF_PITCH, (pitch_value / 8) << 22, info);
+
+    /* zero these registers (set them to a known state) */
+    aty_st_le32(DST_Y_X, 0, info);
+    aty_st_le32(DST_HEIGHT, 0, info);
+    aty_st_le32(DST_BRES_ERR, 0, info);
+    aty_st_le32(DST_BRES_INC, 0, info);
+    aty_st_le32(DST_BRES_DEC, 0, info);
+
+    /* set destination drawing attributes */
+    aty_st_le32(DST_CNTL, DST_LAST_PEL | DST_Y_TOP_TO_BOTTOM |
+			  DST_X_LEFT_TO_RIGHT, info);
+
+    /* set source pitch to modal pitch, set offset to zero */
+    aty_st_le32(SRC_OFF_PITCH, (pitch_value / 8) << 22, info);
+
+    /* set these registers to a known state */
+    aty_st_le32(SRC_Y_X, 0, info);
+    aty_st_le32(SRC_HEIGHT1_WIDTH1, 1, info);
+    aty_st_le32(SRC_Y_X_START, 0, info);
+    aty_st_le32(SRC_HEIGHT2_WIDTH2, 1, info);
+
+    /* set source pixel retrieving attributes */
+    aty_st_le32(SRC_CNTL, SRC_LINE_X_LEFT_TO_RIGHT, info);
+
+    /* set host attributes */
+    wait_for_fifo(13, info);
+    aty_st_le32(HOST_CNTL, 0, info);
+
+    /* set pattern attributes */
+    aty_st_le32(PAT_REG0, 0, info);
+    aty_st_le32(PAT_REG1, 0, info);
+    aty_st_le32(PAT_CNTL, 0, info);
+
+    /* set scissors to modal size */
+    aty_st_le32(SC_LEFT, 0, info);
+    aty_st_le32(SC_TOP, 0, info);
+    aty_st_le32(SC_BOTTOM, par->vyres-1, info);
+    aty_st_le32(SC_RIGHT, pitch_value-1, info);
+
+    /* set background color to minimum value (usually BLACK) */
+    aty_st_le32(DP_BKGD_CLR, 0, info);
+
+    /* set foreground color to maximum value (usually WHITE) */
+    aty_st_le32(DP_FRGD_CLR, 0xFFFFFFFF, info);
+
+    /* set write mask to effect all pixel bits */
+    aty_st_le32(DP_WRITE_MASK, 0xFFFFFFFF, info);
+
+    /* set foreground mix to overpaint and background mix to */
+    /* no-effect */
+    aty_st_le32(DP_MIX, FRGD_MIX_S | BKGD_MIX_D, info);
+
+    /* set primary source pixel channel to foreground color */
+    /* register */
+    aty_st_le32(DP_SRC, FRGD_SRC_FRGD_CLR, info);
+
+    /* set compare functionality to false (no-effect on */
+    /* destination) */
+    wait_for_fifo(3, info);
+    aty_st_le32(CLR_CMP_CLR, 0, info);
+    aty_st_le32(CLR_CMP_MASK, 0xFFFFFFFF, info);
+    aty_st_le32(CLR_CMP_CNTL, 0, info);
+
+    /* set pixel depth */
+    wait_for_fifo(2, info);
+    switch(par->hw.gx.cmode) {
+#ifdef CONFIG_FBCON_CFB8
+	case CMODE_8:
+	    aty_st_le32(DP_PIX_WIDTH, HOST_8BPP | SRC_8BPP | DST_8BPP |
+				      BYTE_ORDER_LSB_TO_MSB,
+	    info);
+	    aty_st_le32(DP_CHAIN_MASK, 0x8080, info);
+	    break;
+#endif
+#ifdef CONFIG_FBCON_CFB16
+	case CMODE_16:
+	    aty_st_le32(DP_PIX_WIDTH, HOST_15BPP | SRC_15BPP | DST_15BPP |
+				      BYTE_ORDER_LSB_TO_MSB,
+	    info);
+	    aty_st_le32(DP_CHAIN_MASK, 0x4210, info);
+	    break;
+#endif
+#if 0
+	case CMODE_24:
+	    aty_st_le32(DP_PIX_WIDTH, HOST_8BPP | SRC_8BPP | DST_8BPP |
+				      BYTE_ORDER_LSB_TO_MSB,
+	    info);
+	    aty_st_le32(DP_CHAIN_MASK, 0x8080, info);
+	    break;
+#endif
+#ifdef CONFIG_FBCON_CFB32
+	case CMODE_32:
+	    aty_st_le32(DP_PIX_WIDTH, HOST_32BPP | SRC_32BPP | DST_32BPP |
+				      BYTE_ORDER_LSB_TO_MSB, info);
+	    aty_st_le32(DP_CHAIN_MASK, 0x8080, info);
+	    break;
+#endif
+    }
+    /* insure engine is idle before leaving */
+    wait_for_idle(info);
+}
+
 static void aty_st_514(int offset, u8 val, struct fb_info_aty *info)
 {
-    aty_WaitQueue(5, info);
     aty_st_8(DAC_CNTL, 1, info);
     /* right addr byte */
     aty_st_8(DAC_W_INDEX, offset & 0xff, info);	
@@ -468,7 +634,6 @@
 
 static void aty_st_pll(int offset, u8 val, struct fb_info_aty *info)
 {
-    aty_WaitQueue(3, info);
     /* write addr byte */
     aty_st_8(CLOCK_CNTL + 1, (offset << 2) | PLL_WR_EN, info);
     eieio();
@@ -540,9 +705,9 @@
     } RGB514_DAC_Table;
 
     static RGB514_DAC_Table RGB514DAC_Tab[8] = {
-	{0, 0x41, 0x03, 0x71, 0x45},	// 8bpp
-	{0, 0x45, 0x04, 0x0c, 0x01},	// 555
-	{0, 0x45, 0x06, 0x0e, 0x00},	// XRGB
+	{0, 0x41, 0x03, 0x71, 0x45},	/* 8bpp */
+	{0, 0x45, 0x04, 0x0c, 0x01},	/* 555 */
+	{0, 0x45, 0x06, 0x0e, 0x00},	/* XRGB */
     };
     RGB514_DAC_Table *pDacProgTab;
 
@@ -568,32 +733,6 @@
     pitch = par->vxres>>3;
     offset = ((par->yoffset*par->vxres+par->xoffset)>>3)<<par->hw.gx.cmode;
     aty_st_le32(CRTC_OFF_PITCH, pitch<<22 | offset, info);
-    if (1 /* info->chip_type == MACH64_GT_ID */) {
-	/* Is this OK for other chips? */
-	aty_WaitQueue(13, info);
-	aty_st_le32(DST_OFF_PITCH, pitch<<22 | offset, info);
-	aty_st_le32(SRC_OFF_PITCH, pitch<<22 | offset, info);
-	aty_st_le32(SC_LEFT_RIGHT, (par->vxres -1) << 16, info);
-	aty_st_le32(SC_TOP_BOTTOM, (par->vyres -1) << 16, info);
-
-	/* Caution: This is valid for 8 bpp only!!!! */
-
-	aty_st_le32(DP_BKGD_CLR, 0x00000000, info);
-	aty_st_le32(DP_FRGD_CLR, 0x00000001, info);
-
-	aty_st_le32(DP_WRITE_MSK, 0x000000ff /* pGC->planemask */, info);
-	aty_st_le32(DP_PIX_WIDTH, BYTE_ORDER_MSB_TO_LSB | HOST_8BPP |
-				  SRC_8BPP | DST_8BPP,
-		    info);
-	aty_st_le32(DP_MIX, FRGD_MIX_S | BKGD_MIX_D, info);
-	aty_st_le32(DP_SRC, FRGD_SRC_BLIT | BKGD_SRC_BKGD_CLR, info);
-
-	aty_st_le32(CLR_CMP_CNTL, 0x00000000, info);
-	aty_st_le32(CLR_CMP_CLR, 0x00000000, info);
-
-	aty_st_le32(GUI_TRAJ_CNTL, DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM,
-		    info);
-    }
 }
 
 static void atyfb_set_par(struct atyfb_par *par, struct fb_info_aty *info)
@@ -605,21 +744,9 @@
     if (init == 0)	/* paranoia, shouldn't get here */
 	panic("aty: display mode %d not supported", par->hw.gx.vmode);
 
-    current_par = *par;
+    info->current_par = *par;
     hres = vmode_attrs[par->hw.gx.vmode-1].hres;
 
-    /* clear FIFO errors */
-    aty_st_le32(BUS_CNTL, aty_ld_le32(BUS_CNTL, info) | BUS_HOST_ERR_ACK
-			  | BUS_FIFO_ERR_ACK, info);
-
-    /* Reset engine */
-    i = aty_ld_le32(GEN_TEST_CNTL, info);
-    aty_st_le32(GEN_TEST_CNTL, i & ~GUI_ENGINE_ENABLE, info);
-    eieio();
-    aty_WaitIdleEmpty(info);
-    aty_st_le32(GEN_TEST_CNTL, i | GUI_ENGINE_ENABLE, info);
-    aty_WaitIdleEmpty(info);
-
     if (info->chip_type != MACH64_GT_ID) {
 	i = aty_ld_le32(CRTC_GEN_CNTL, info);
 	aty_st_le32(CRTC_GEN_CNTL, i | CRTC_EXT_DISP_EN, info);
@@ -685,7 +812,7 @@
 	    break;
 	default:
 	    RGB514_Program(par->hw.gx.cmode, info);
-	    aty_WaitIdleEmpty(info);
+	    wait_for_idle(info);
 	    aty_st_514(0x06, 0x02, info);
 	    aty_st_514(0x10, 0x01, info);
 	    aty_st_514(0x70, 0x01, info);
@@ -698,7 +825,7 @@
     }
 
     aty_ld_8(DAC_REGS, info);	/* clear counter */
-    aty_WaitIdleEmpty(info);
+    wait_for_idle(info);
 
     aty_st_le32(CRTC_H_TOTAL_DISP, init->crtc_h_tot_disp, info);
     aty_st_le32(CRTC_H_SYNC_STRT_WID,
@@ -728,7 +855,7 @@
 	    aty_st_le32(DSP_ON_OFF,
 			sgram_dsp[par->hw.gx.vmode-1][par->hw.gx.cmode], info);
 
-	//aty_st_le32(CLOCK_CNTL,8192, info);
+	    /* aty_st_le32(CLOCK_CNTL, 8192, info); */
 	} else {
 	    i |= ((1 * par->hw.gx.cmode) << 26) | 0x300090;
 	    aty_st_le32(DSP_ON_OFF, init->mem_cntl[par->hw.gx.cmode], info);
@@ -819,6 +946,10 @@
     /* Gentlemen, start your crtc engine */
     aty_st_le32(CRTC_GEN_CNTL, CRTC_EXT_DISP_EN | CRTC_ENABLE | i, info);
 
+    /* Initialize the graphics engine */
+    if (par->accel & FB_ACCELF_TEXT)
+	init_engine(par, info);
+
 #ifdef CONFIG_FB_COMPAT_XPMAC
     if (console_fb_info == &info->fb_info) {
 	display_info.height = vmode_attrs[par->hw.gx.vmode-1].vres;
@@ -957,21 +1088,20 @@
     if (par->xoffset+xres > par->vxres || par->yoffset+yres > par->vyres)
 	return -EINVAL;
 
-    if (bpp <= 8) {
+    if (bpp <= 8)
 	par->hw.gx.cmode = CMODE_8;
-	if (var->accel_flags & FB_ACCELF_TEXT)
-	    par->accel = FB_ACCELF_TEXT;
-	else
-	    par->accel = 0;
-    } else if (bpp <= 16) {
+    else if (bpp <= 16)
 	par->hw.gx.cmode = CMODE_16;
-	par->accel = 0;
-    } else if (bpp <= 32) {
+    else if (bpp <= 32)
 	par->hw.gx.cmode = CMODE_32;
-	par->accel = 0;
-    } else
+    else
 	return -EINVAL;
 
+    if (var->accel_flags & FB_ACCELF_TEXT)
+	par->accel = FB_ACCELF_TEXT;
+    else
+	par->accel = 0;
+
     if (aty_vram_reqd(par) > info->total_vram)
 	return -EINVAL;
 
@@ -1164,12 +1294,7 @@
     par->vyres = vmode_attrs[vmode-1].vres;
     par->xoffset = 0;
     par->yoffset = 0;
-#ifdef CONFIG_ATARI
     par->accel = FB_ACCELF_TEXT;
-#else
-    /* FIXME: accel doesn't work yet on PPC */
-    par->accel = 0;
-#endif
 }
 
 
@@ -1184,7 +1309,7 @@
     struct atyfb_par par;
 
     if (con == -1)
-	par = default_par;
+	par = info2->default_par;
     else
 	decode_var(&fb_display[con].var, &par, info2);
     encode_fix(fix, &par, info2);
@@ -1199,8 +1324,10 @@
 static int atyfb_get_var(struct fb_var_screeninfo *var, int con,
 			 struct fb_info *info)
 {
+    struct fb_info_aty *info2 = (struct fb_info_aty *)info;
+
     if (con == -1)
-	encode_var(var, &default_par, (struct fb_info_aty *)info);
+	encode_var(var, &info2->default_par, (struct fb_info_aty *)info);
     else
 	*var = fb_display[con].var;
     return 0;
@@ -1217,8 +1344,7 @@
     struct fb_info_aty *info2 = (struct fb_info_aty *)info;
     struct atyfb_par par;
     struct display *display;
-    int oldxres, oldyres, oldvxres, oldvyres, oldbpp, oldaccel;
-    int err;
+    int oldxres, oldyres, oldvxres, oldvyres, oldbpp, oldaccel, accel, err;
     int activate = var->activate;
 
     if (con >= 0)
@@ -1254,24 +1380,21 @@
 	    display->line_length = fix.line_length;
 	    display->can_soft_blank = 1;
 	    display->inverse = 0;
+	    accel = var->accel_flags & FB_ACCELF_TEXT;
 	    switch (par.hw.gx.cmode) {
 #ifdef CONFIG_FBCON_CFB8
 		case CMODE_8:
-		    if (var->accel_flags & FB_ACCELF_TEXT) {
-			display->dispsw = &fbcon_aty8;	/* doesn't work yet */
-#warning FIXME: We should reinit the graphics engine here
-		    } else
-			display->dispsw = &fbcon_cfb8;
+		    display->dispsw = accel ? &fbcon_aty8 : &fbcon_cfb8;
 		    break;
 #endif
 #ifdef CONFIG_FBCON_CFB16
 		case CMODE_16:
-		    display->dispsw = &fbcon_cfb16;
+		    display->dispsw = accel ? &fbcon_aty16 : &fbcon_cfb16;
 		    break;
 #endif
 #ifdef CONFIG_FBCON_CFB32
 		case CMODE_32:
-		    display->dispsw = &fbcon_cfb32;
+		    display->dispsw = accel ? &fbcon_aty32 : &fbcon_cfb32;
 		    break;
 #endif
 		default:
@@ -1305,7 +1428,7 @@
 {
     struct fb_info_aty *info2 = (struct fb_info_aty *)info;
     u32 xres, yres, xoffset, yoffset;
-    struct atyfb_par *par = &current_par;
+    struct atyfb_par *par = &info2->current_par;
 
     xres = vmode_attrs[par->hw.gx.vmode-1].hres;
     yres = vmode_attrs[par->hw.gx.vmode-1].vres;
@@ -1532,11 +1655,14 @@
 	    default:
 		info->total_vram = 0x80000;
 	}
-#ifdef CONFIG_ATARI /* this is definately the wrong way to set this */
-    info->total_vram -= 0x00080000;
+#ifdef CONFIG_ATARI	/* this is definately not the wrong way to set this */
+    if ((info->total_vram == 0x400000) || (info->total_vram == 0x800000)) {
+	/* protect GUI-regs if complete Aperture is VRAM */
+	info->total_vram -= 0x00001000;
+    }
 #endif
 
-#if 1
+#if 0
     printk("aty_init: regbase = %lx, frame_buffer = %lx, total_vram = %x\n",
 	   info->ati_regbase, info->frame_buffer, info->total_vram);
 #endif
@@ -1573,20 +1699,20 @@
     if (default_cmode < CMODE_8 || default_cmode > CMODE_32)
 	default_cmode = CMODE_8;
 
-    init_par(&default_par, default_vmode, default_cmode);
-    while (aty_vram_reqd(&default_par) > info->total_vram) {
+    init_par(&info->default_par, default_vmode, default_cmode);
+    while (aty_vram_reqd(&info->default_par) > info->total_vram) {
 	while (default_cmode > CMODE_8 &&
-	       aty_vram_reqd(&default_par) > info->total_vram) {
+	       aty_vram_reqd(&info->default_par) > info->total_vram) {
 	    --default_cmode;
-	    init_par(&default_par, default_vmode, default_cmode);
+	    init_par(&info->default_par, default_vmode, default_cmode);
 	}
 	/*
 	 * Adjust the video mode smaller if there still is not enough VRAM
 	 */
-	if (aty_vram_reqd(&default_par) > info->total_vram)
+	if (aty_vram_reqd(&info->default_par) > info->total_vram)
 	    do {
 		default_vmode--;
-		init_par(&default_par, default_vmode, default_cmode);
+		init_par(&info->default_par, default_vmode, default_cmode);
 		init = get_aty_struct(default_vmode, info);
 	    } while ((init == 0) &&
 		     (default_vmode > VMODE_640_480_60));
@@ -1597,7 +1723,7 @@
 	&& init->crtc_gen_cntl[1] == 0) {
 	    default_vmode = VMODE_640_480_67;
 	    default_cmode = CMODE_8;
-	    init_par(&default_par, default_vmode, default_cmode);
+	    init_par(&info->default_par, default_vmode, default_cmode);
     }
 
     switch (info->chip_type) {
@@ -1633,13 +1759,12 @@
     err = register_framebuffer(&info->fb_info);
     if (err < 0)
 	return 0;
-    atyfb_set_par(&default_par, info);
-    encode_var(&var, &default_par, info);
+    atyfb_set_par(&info->default_par, info);
+    encode_var(&var, &info->default_par, info);
     atyfb_set_var(&var, -1, &info->fb_info);
 
     printk("fb%d: %s frame buffer device on %s\n",
 	   GET_FB_IDX(info->fb_info.node), atyfb_name, name);
-
     return 1;
 }
 
@@ -1863,8 +1988,8 @@
 {
     struct fb_info_aty *info2 = (struct fb_info_aty *)info;
 
-    current_par.yoffset = fb_display[con].var.yoffset;
-    set_off_pitch(&current_par, info2);
+    info2->current_par.yoffset = fb_display[con].var.yoffset;
+    set_off_pitch(&info2->current_par, info2);
     return 0;
 }
 
@@ -1926,7 +2051,6 @@
     info2->palette[regno].red = red;
     info2->palette[regno].green = green;
     info2->palette[regno].blue = blue;
-    aty_WaitQueue(2, info2);
     i = aty_ld_8(DAC_CNTL, info2) & 0xfc;
     if (info2->chip_type == MACH64_GT_ID)
 	i |= 0x2;	/*DAC_CNTL|0x2 turns off the extra brightness for gt*/
@@ -1934,8 +2058,7 @@
     aty_st_8(DAC_REGS + DAC_MASK, 0xff, info2);
     eieio();
     scale = ((info2->chip_type != MACH64_GX_ID) &&
-	     (current_par.hw.gx.cmode == CMODE_16)) ? 3 : 0;
-    aty_WaitQueue(4, info2);
+	     (info2->current_par.hw.gx.cmode == CMODE_16)) ? 3 : 0;
     info2->aty_cmap_regs->windex = regno << scale;
     eieio();
     info2->aty_cmap_regs->lut = red << scale;
@@ -1975,15 +2098,33 @@
      *  Accelerated functions
      */
 
-static void aty_waitblit(struct fb_info_aty *info)
+static inline void draw_rect(s16 x, s16 y, u16 width, u16 height,
+			     struct fb_info_aty *info)
 {
-    aty_WaitIdleEmpty(info);	/* Make sure that all commands have finished */
+    /* perform rectangle fill */
+    wait_for_fifo(2, info);
+    aty_st_le32(DST_Y_X, (x << 16) | y, info);
+    aty_st_le32(DST_HEIGHT_WIDTH, (width << 16) | height, info);
 }
 
-static void aty_rectcopy(int srcx, int srcy, int dstx, int dsty, u_int width,
-			 u_int height, struct fb_info_aty *info)
+static inline void aty_rectcopy(int srcx, int srcy, int dstx, int dsty,
+				u_int width, u_int height,
+				struct fb_info_aty *info)
 {
-    u_int direction = 0;
+    u32 direction = DST_LAST_PEL;
+    u32 pitch_value;
+
+    if (!width || !height)
+	return;
+
+    pitch_value = info->current_par.vxres;
+#if 0
+    if (par->hw.gx.cmode == CMODE_24) {
+	/* In 24 bpp, the engine is in 8 bpp - this requires that all */
+	/* horizontal coordinates and widths must be adjusted */
+	pitch_value = pitch_value * 3;
+    }
+#endif
 
     if (srcy < dsty) {
 	dsty += height - 1;
@@ -1997,48 +2138,33 @@
     } else
 	direction |= DST_X_LEFT_TO_RIGHT;
 
-    aty_WaitQueue(4, info);
-    aty_st_le32(DP_WRITE_MSK, 0x000000FF /* pGC->planemask */ , info);
-    aty_st_le32(DP_MIX, (MIX_SRC << 16) |  MIX_DST, info);
+    wait_for_fifo(5, info);
     aty_st_le32(DP_SRC, FRGD_SRC_BLIT, info);
-
-    aty_WaitQueue(5, info);
-    aty_st_le32(SRC_Y_X, (srcx << 16) | (srcy & 0x0000ffff), info);
-    aty_st_le32(SRC_WIDTH1, width, info);
-    aty_st_le32(DST_CNTL, direction, info);
-    aty_st_le32(DST_Y_X, (dstx << 16) | (dsty & 0x0000ffff), info);
-    aty_st_le32(DST_HEIGHT_WIDTH, (width << 16) | (height & 0x0000ffff), info);
-
-    aty_WaitIdleEmpty(info);	/* Make sure that all commands have finished */
-
     /*
-     * Make sure that the destination trajectory is correctly set
-     * for subsequent calls.  MACH64_BIT_BLT is the only function that
-     * currently changes the destination trajectory from L->R and T->B.
-     */
-    aty_st_le32(DST_CNTL, (DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM), info);
+     * ++Geert:
+     * Warning: SRC_OFF_PITCH may be thrashed by writing to other registers
+     * (e.g. CRTC_H_TOTAL_DISP, DP_SRC, DP_FRGD_CLR)
+     */
+    aty_st_le32(SRC_OFF_PITCH, (pitch_value / 8) << 22, info);
+    aty_st_le32(SRC_Y_X, (srcx << 16) | srcy, info);
+    aty_st_le32(SRC_HEIGHT1_WIDTH1, (width << 16) | height, info);
+    aty_st_le32(DST_CNTL, direction, info);
+    draw_rect(dstx, dsty, width, height, info);
 }
 
-static void aty_rectfill(int dstx, int dsty, u_int width, u_int height,
-			 u_int color, struct fb_info_aty *info)
+static inline void aty_rectfill(int dstx, int dsty, u_int width, u_int height,
+				u_int color, struct fb_info_aty *info)
 {
     if (!width || !height)
 	return;
 
-    aty_WaitQueue(5, info);
-    aty_st_le32(DP_FRGD_CLR, color /* pGC->fgPixel */ , info);
-    aty_st_le32(DP_WRITE_MSK, 0x000000FF /* pGC->planemask */ , info);
-    aty_st_le32(DP_MIX, (MIX_SRC << 16) | MIX_DST, info);
-    aty_st_le32(DP_SRC, FRGD_SRC_FRGD_CLR, info);
-
-    aty_st_le32(DST_CNTL, DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM, info);
-
-    aty_WaitQueue(2, info);
-    aty_st_le32(DST_Y_X, (((u_int)dstx << 16) | ((u_int)dsty & 0x0000ffff)),
+    wait_for_fifo(3, info);
+    aty_st_le32(DP_FRGD_CLR, color, info);
+    aty_st_le32(DP_SRC, BKGD_SRC_BKGD_CLR | FRGD_SRC_FRGD_CLR | MONO_SRC_ONE,
 		info);
-    aty_st_le32(DST_HEIGHT_WIDTH, (((u_int)width << 16) | height), info);
-
-    aty_WaitIdleEmpty(info);	/* Make sure that all commands have finished */
+    aty_st_le32(DST_CNTL, DST_LAST_PEL | DST_Y_TOP_TO_BOTTOM |
+			  DST_X_LEFT_TO_RIGHT, info);
+    draw_rect(dstx, dsty, width, height, info);
 }
 
 
@@ -2046,8 +2172,8 @@
      *  Text console acceleration
      */
 
-static void fbcon_aty8_bmove(struct display *p, int sy, int sx, int dy, int dx,
-			     int height, int width)
+static void fbcon_aty_bmove(struct display *p, int sy, int sx, int dy, int dx,
+			    int height, int width)
 {
     sx *= p->fontwidth;
     sy *= p->fontheight;
@@ -2060,8 +2186,8 @@
 		 (struct fb_info_aty *)p->fb_info);
 }
 
-static void fbcon_aty8_clear(struct vc_data *conp, struct display *p, int sy,
-			     int sx, int height, int width)
+static void fbcon_aty_clear(struct vc_data *conp, struct display *p, int sy,
+			    int sx, int height, int width)
 {
     u32 bgx = attr_bgcol_ec(p, conp);
     bgx |= (bgx << 8);
@@ -2079,20 +2205,58 @@
 static void fbcon_aty8_putc(struct vc_data *conp, struct display *p, int c,
 			    int yy, int xx)
 {
-    aty_waitblit((struct fb_info_aty *)p->fb_info);
+    wait_for_idle((struct fb_info_aty *)p->fb_info);
     fbcon_cfb8_putc(conp, p, c, yy, xx);
 }
 
 static void fbcon_aty8_putcs(struct vc_data *conp, struct display *p,
 			     const char *s, int count, int yy, int xx)
 {
-    aty_waitblit((struct fb_info_aty *)p->fb_info);
+    wait_for_idle((struct fb_info_aty *)p->fb_info);
     fbcon_cfb8_putcs(conp, p, s, count, yy, xx);
 }
 
+static void fbcon_aty16_putc(struct vc_data *conp, struct display *p, int c,
+			     int yy, int xx)
+{
+    wait_for_idle((struct fb_info_aty *)p->fb_info);
+    fbcon_cfb16_putc(conp, p, c, yy, xx);
+}
+
+static void fbcon_aty16_putcs(struct vc_data *conp, struct display *p,
+			      const char *s, int count, int yy, int xx)
+{
+    wait_for_idle((struct fb_info_aty *)p->fb_info);
+    fbcon_cfb16_putcs(conp, p, s, count, yy, xx);
+}
+
+static void fbcon_aty32_putc(struct vc_data *conp, struct display *p, int c,
+			     int yy, int xx)
+{
+    wait_for_idle((struct fb_info_aty *)p->fb_info);
+    fbcon_cfb32_putc(conp, p, c, yy, xx);
+}
+
+static void fbcon_aty32_putcs(struct vc_data *conp, struct display *p,
+			      const char *s, int count, int yy, int xx)
+{
+    wait_for_idle((struct fb_info_aty *)p->fb_info);
+    fbcon_cfb32_putcs(conp, p, s, count, yy, xx);
+}
+
 static struct display_switch fbcon_aty8 = {
-    fbcon_cfb8_setup, fbcon_aty8_bmove, fbcon_aty8_clear, fbcon_aty8_putc,
+    fbcon_cfb8_setup, fbcon_aty_bmove, fbcon_aty_clear, fbcon_aty8_putc,
     fbcon_aty8_putcs, fbcon_cfb8_revc, NULL
+};
+
+static struct display_switch fbcon_aty16 = {
+    fbcon_cfb16_setup, fbcon_aty_bmove, fbcon_aty_clear, fbcon_aty16_putc,
+    fbcon_aty16_putcs, fbcon_cfb16_revc, NULL
+};
+
+static struct display_switch fbcon_aty32 = {
+    fbcon_cfb32_setup, fbcon_aty_bmove, fbcon_aty_clear, fbcon_aty32_putc,
+    fbcon_aty32_putcs, fbcon_cfb32_revc, NULL
 };
 
 

Greetings,

						Geert

--
Geert Uytterhoeven                     Geert.Uytterhoeven@thomas.kotnet.org
Linux/{m68k~Amiga,ppc~CHRP}, Wavelets  http://www.cs.kuleuven.ac.be/~geert/
KotNET@Thomas Network Administration --- Make your bed part of Cyberspace!!

