瀏覽代碼

m68k: Atari fb revival

Update the atari fb to 2.6 by Michael Schmitz,
Reformatting and rewrite of bit plane functions by Roman Zippel,
A few more fixes by Geert Uytterhoeven.

Signed-off-by: Michael Schmitz <schmitz@debian.org>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Michael Schmitz 18 年之前
父節點
當前提交
a100501212

+ 4 - 1
drivers/video/Kconfig

@@ -389,7 +389,10 @@ config FB_ARC
 
 
 config FB_ATARI
 config FB_ATARI
 	bool "Atari native chipset support"
 	bool "Atari native chipset support"
-	depends on (FB = y) && ATARI && BROKEN
+	depends on (FB = y) && ATARI
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
 	help
 	help
 	  This is the frame buffer device driver for the builtin graphics
 	  This is the frame buffer device driver for the builtin graphics
 	  chipset found in Ataris.
 	  chipset found in Ataris.

+ 2 - 1
drivers/video/Makefile

@@ -63,7 +63,8 @@ obj-$(CONFIG_FB_TCX)              += tcx.o sbuslib.o
 obj-$(CONFIG_FB_LEO)              += leo.o sbuslib.o
 obj-$(CONFIG_FB_LEO)              += leo.o sbuslib.o
 obj-$(CONFIG_FB_SGIVW)            += sgivwfb.o
 obj-$(CONFIG_FB_SGIVW)            += sgivwfb.o
 obj-$(CONFIG_FB_ACORN)            += acornfb.o
 obj-$(CONFIG_FB_ACORN)            += acornfb.o
-obj-$(CONFIG_FB_ATARI)            += atafb.o
+obj-$(CONFIG_FB_ATARI)            += atafb.o c2p.o atafb_mfb.o \
+                                     atafb_iplan2p2.o atafb_iplan2p4.o atafb_iplan2p8.o
 obj-$(CONFIG_FB_MAC)              += macfb.o
 obj-$(CONFIG_FB_MAC)              += macfb.o
 obj-$(CONFIG_FB_HGA)              += hgafb.o
 obj-$(CONFIG_FB_HGA)              += hgafb.o
 obj-$(CONFIG_FB_IGA)              += igafb.o
 obj-$(CONFIG_FB_IGA)              += igafb.o

文件差異過大導致無法顯示
+ 440 - 325
drivers/video/atafb.c


+ 36 - 0
drivers/video/atafb.h

@@ -0,0 +1,36 @@
+#ifndef _VIDEO_ATAFB_H
+#define _VIDEO_ATAFB_H
+
+void atafb_mfb_copyarea(struct fb_info *info, u_long next_line, int sy, int sx, int dy,
+			int dx, int height, int width);
+void atafb_mfb_fillrect(struct fb_info *info, u_long next_line, u32 color,
+			int sy, int sx, int height, int width);
+void atafb_mfb_linefill(struct fb_info *info, u_long next_line,
+			int dy, int dx, u32 width,
+			const u8 *data, u32 bgcolor, u32 fgcolor);
+
+void atafb_iplan2p2_copyarea(struct fb_info *info, u_long next_line, int sy, int sx, int dy,
+			     int dx, int height, int width);
+void atafb_iplan2p2_fillrect(struct fb_info *info, u_long next_line, u32 color,
+			     int sy, int sx, int height, int width);
+void atafb_iplan2p2_linefill(struct fb_info *info, u_long next_line,
+			     int dy, int dx, u32 width,
+			     const u8 *data, u32 bgcolor, u32 fgcolor);
+
+void atafb_iplan2p4_copyarea(struct fb_info *info, u_long next_line, int sy, int sx, int dy,
+			     int dx, int height, int width);
+void atafb_iplan2p4_fillrect(struct fb_info *info, u_long next_line, u32 color,
+			     int sy, int sx, int height, int width);
+void atafb_iplan2p4_linefill(struct fb_info *info, u_long next_line,
+			     int dy, int dx, u32 width,
+			     const u8 *data, u32 bgcolor, u32 fgcolor);
+
+void atafb_iplan2p8_copyarea(struct fb_info *info, u_long next_line, int sy, int sx, int dy,
+			     int dx, int height, int width);
+void atafb_iplan2p8_fillrect(struct fb_info *info, u_long next_line, u32 color,
+			     int sy, int sx, int height, int width);
+void atafb_iplan2p8_linefill(struct fb_info *info, u_long next_line,
+			     int dy, int dx, u32 width,
+			     const u8 *data, u32 bgcolor, u32 fgcolor);
+
+#endif /* _VIDEO_ATAFB_H */

+ 293 - 0
drivers/video/atafb_iplan2p2.c

@@ -0,0 +1,293 @@
+/*
+ *  linux/drivers/video/iplan2p2.c -- Low level frame buffer operations for
+ *				      interleaved bitplanes à la Atari (2
+ *				      planes, 2 bytes interleave)
+ *
+ *	Created 5 Apr 1997 by Geert Uytterhoeven
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of this archive for
+ *  more details.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fb.h>
+
+#include <asm/setup.h>
+
+#include "atafb.h"
+
+#define BPL	2
+#include "atafb_utils.h"
+
+void atafb_iplan2p2_copyarea(struct fb_info *info, u_long next_line,
+			     int sy, int sx, int dy, int dx,
+			     int height, int width)
+{
+	/*  bmove() has to distinguish two major cases: If both, source and
+	 *  destination, start at even addresses or both are at odd
+	 *  addresses, just the first odd and last even column (if present)
+	 *  require special treatment (memmove_col()). The rest between
+	 *  then can be copied by normal operations, because all adjacent
+	 *  bytes are affected and are to be stored in the same order.
+	 *    The pathological case is when the move should go from an odd
+	 *  address to an even or vice versa. Since the bytes in the plane
+	 *  words must be assembled in new order, it seems wisest to make
+	 *  all movements by memmove_col().
+	 */
+
+	u8 *src, *dst;
+	u32 *s, *d;
+	int w, l , i, j;
+	u_int colsize;
+	u_int upwards = (dy < sy) || (dy == sy && dx < sx);
+
+	colsize = height;
+	if (!((sx ^ dx) & 15)) {
+		/* odd->odd or even->even */
+
+		if (upwards) {
+			src = (u8 *)info->screen_base + sy * next_line + (sx & ~15) / (8 / BPL);
+			dst = (u8 *)info->screen_base + dy * next_line + (dx & ~15) / (8 / BPL);
+			if (sx & 15) {
+				memmove32_col(dst, src, 0xff00ff, height, next_line - BPL * 2);
+				src += BPL * 2;
+				dst += BPL * 2;
+				width -= 8;
+			}
+			w = width >> 4;
+			if (w) {
+				s = (u32 *)src;
+				d = (u32 *)dst;
+				w *= BPL / 2;
+				l = next_line - w * 4;
+				for (j = height; j > 0; j--) {
+					for (i = w; i > 0; i--)
+						*d++ = *s++;
+					s = (u32 *)((u8 *)s + l);
+					d = (u32 *)((u8 *)d + l);
+				}
+			}
+			if (width & 15)
+				memmove32_col(dst + width / (8 / BPL), src + width / (8 / BPL),
+					      0xff00ff00, height, next_line - BPL * 2);
+		} else {
+			src = (u8 *)info->screen_base + (sy - 1) * next_line + ((sx + width + 8) & ~15) / (8 / BPL);
+			dst = (u8 *)info->screen_base + (dy - 1) * next_line + ((dx + width + 8) & ~15) / (8 / BPL);
+
+			if ((sx + width) & 15) {
+				src -= BPL * 2;
+				dst -= BPL * 2;
+				memmove32_col(dst, src, 0xff00ff00, colsize, -next_line - BPL * 2);
+				width -= 8;
+			}
+			w = width >> 4;
+			if (w) {
+				s = (u32 *)src;
+				d = (u32 *)dst;
+				w *= BPL / 2;
+				l = next_line - w * 4;
+				for (j = height; j > 0; j--) {
+					for (i = w; i > 0; i--)
+						*--d = *--s;
+					s = (u32 *)((u8 *)s - l);
+					d = (u32 *)((u8 *)d - l);
+				}
+			}
+			if (sx & 15)
+				memmove32_col(dst - (width - 16) / (8 / BPL),
+					      src - (width - 16) / (8 / BPL),
+					      0xff00ff, colsize, -next_line - BPL * 2);
+		}
+	} else {
+		/* odd->even or even->odd */
+		if (upwards) {
+			u32 *src32, *dst32;
+			u32 pval[4], v, v1, mask;
+			int i, j, w, f;
+
+			src = (u8 *)info->screen_base + sy * next_line + (sx & ~15) / (8 / BPL);
+			dst = (u8 *)info->screen_base + dy * next_line + (dx & ~15) / (8 / BPL);
+
+			mask = 0xff00ff00;
+			f = 0;
+			w = width;
+			if (sx & 15) {
+				f = 1;
+				w += 8;
+			}
+			if ((sx + width) & 15)
+				f |= 2;
+			w >>= 4;
+			for (i = height; i; i--) {
+				src32 = (u32 *)src;
+				dst32 = (u32 *)dst;
+
+				if (f & 1) {
+					pval[0] = (*src32++ << 8) & mask;
+				} else {
+					pval[0] = dst32[0] & mask;
+				}
+
+				for (j = w; j > 0; j--) {
+					v = *src32++;
+					v1 = v & mask;
+					*dst32++ = pval[0] | (v1 >> 8);
+					pval[0] = (v ^ v1) << 8;
+				}
+
+				if (f & 2) {
+					dst32[0] = (dst32[0] & mask) | pval[0];
+				}
+
+				src += next_line;
+				dst += next_line;
+			}
+		} else {
+			u32 *src32, *dst32;
+			u32 pval[4], v, v1, mask;
+			int i, j, w, f;
+
+			src = (u8 *)info->screen_base + (sy - 1) * next_line + ((sx + width + 8) & ~15) / (8 / BPL);
+			dst = (u8 *)info->screen_base + (dy - 1) * next_line + ((dx + width + 8) & ~15) / (8 / BPL);
+
+			mask = 0xff00ff;
+			f = 0;
+			w = width;
+			if ((dx + width) & 15)
+				f = 1;
+			if (sx & 15) {
+				f |= 2;
+				w += 8;
+			}
+			w >>= 4;
+			for (i = height; i; i--) {
+				src32 = (u32 *)src;
+				dst32 = (u32 *)dst;
+
+				if (f & 1) {
+					pval[0] = dst32[-1] & mask;
+				} else {
+					pval[0] = (*--src32 >> 8) & mask;
+				}
+
+				for (j = w; j > 0; j--) {
+					v = *--src32;
+					v1 = v & mask;
+					*--dst32 = pval[0] | (v1 << 8);
+					pval[0] = (v ^ v1) >> 8;
+				}
+
+				if (!(f & 2)) {
+					dst32[-1] = (dst32[-1] & mask) | pval[0];
+				}
+
+				src -= next_line;
+				dst -= next_line;
+			}
+		}
+	}
+}
+
+void atafb_iplan2p2_fillrect(struct fb_info *info, u_long next_line, u32 color,
+                             int sy, int sx, int height, int width)
+{
+	u32 *dest;
+	int rows, i;
+	u32 cval[4];
+
+	dest = (u32 *)(info->screen_base + sy * next_line + (sx & ~15) / (8 / BPL));
+	if (sx & 15) {
+		u8 *dest8 = (u8 *)dest + 1;
+
+		expand8_col2mask(color, cval);
+
+		for (i = height; i; i--) {
+			fill8_col(dest8, cval);
+			dest8 += next_line;
+		}
+		dest += BPL / 2;
+		width -= 8;
+	}
+
+	expand16_col2mask(color, cval);
+	rows = width >> 4;
+	if (rows) {
+		u32 *d = dest;
+		u32 off = next_line - rows * BPL * 2;
+		for (i = height; i; i--) {
+			d = fill16_col(d, rows, cval);
+			d = (u32 *)((long)d + off);
+		}
+		dest += rows * BPL / 2;
+		width &= 15;
+	}
+
+	if (width) {
+		u8 *dest8 = (u8 *)dest;
+
+		expand8_col2mask(color, cval);
+
+		for (i = height; i; i--) {
+			fill8_col(dest8, cval);
+			dest8 += next_line;
+		}
+	}
+}
+
+void atafb_iplan2p2_linefill(struct fb_info *info, u_long next_line,
+                             int dy, int dx, u32 width,
+                             const u8 *data, u32 bgcolor, u32 fgcolor)
+{
+	u32 *dest;
+	const u16 *data16;
+	int rows;
+	u32 fgm[4], bgm[4], m;
+
+	dest = (u32 *)(info->screen_base + dy * next_line + (dx & ~15) / (8 / BPL));
+	if (dx & 15) {
+		fill8_2col((u8 *)dest + 1, fgcolor, bgcolor, *data++);
+		dest += BPL / 2;
+		width -= 8;
+	}
+
+	if (width >= 16) {
+		data16 = (const u16 *)data;
+		expand16_2col2mask(fgcolor, bgcolor, fgm, bgm);
+
+		for (rows = width / 16; rows; rows--) {
+			u16 d = *data16++;
+			m = d | ((u32)d << 16);
+			*dest++ = (m & fgm[0]) ^ bgm[0];
+		}
+
+		data = (const u8 *)data16;
+		width &= 15;
+	}
+
+	if (width)
+		fill8_2col((u8 *)dest, fgcolor, bgcolor, *data);
+}
+
+#ifdef MODULE
+MODULE_LICENSE("GPL");
+
+int init_module(void)
+{
+	return 0;
+}
+
+void cleanup_module(void)
+{
+}
+#endif /* MODULE */
+
+
+    /*
+     *  Visible symbols for modules
+     */
+
+EXPORT_SYMBOL(atafb_iplan2p2_copyarea);
+EXPORT_SYMBOL(atafb_iplan2p2_fillrect);
+EXPORT_SYMBOL(atafb_iplan2p2_linefill);

+ 308 - 0
drivers/video/atafb_iplan2p4.c

@@ -0,0 +1,308 @@
+/*
+ *  linux/drivers/video/iplan2p4.c -- Low level frame buffer operations for
+ *				      interleaved bitplanes à la Atari (4
+ *				      planes, 2 bytes interleave)
+ *
+ *	Created 5 Apr 1997 by Geert Uytterhoeven
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of this archive for
+ *  more details.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fb.h>
+
+#include <asm/setup.h>
+
+#include "atafb.h"
+
+#define BPL	4
+#include "atafb_utils.h"
+
+void atafb_iplan2p4_copyarea(struct fb_info *info, u_long next_line,
+			     int sy, int sx, int dy, int dx,
+			     int height, int width)
+{
+	/*  bmove() has to distinguish two major cases: If both, source and
+	 *  destination, start at even addresses or both are at odd
+	 *  addresses, just the first odd and last even column (if present)
+	 *  require special treatment (memmove_col()). The rest between
+	 *  then can be copied by normal operations, because all adjacent
+	 *  bytes are affected and are to be stored in the same order.
+	 *    The pathological case is when the move should go from an odd
+	 *  address to an even or vice versa. Since the bytes in the plane
+	 *  words must be assembled in new order, it seems wisest to make
+	 *  all movements by memmove_col().
+	 */
+
+	u8 *src, *dst;
+	u32 *s, *d;
+	int w, l , i, j;
+	u_int colsize;
+	u_int upwards = (dy < sy) || (dy == sy && dx < sx);
+
+	colsize = height;
+	if (!((sx ^ dx) & 15)) {
+		/* odd->odd or even->even */
+
+		if (upwards) {
+			src = (u8 *)info->screen_base + sy * next_line + (sx & ~15) / (8 / BPL);
+			dst = (u8 *)info->screen_base + dy * next_line + (dx & ~15) / (8 / BPL);
+			if (sx & 15) {
+				memmove32_col(dst, src, 0xff00ff, height, next_line - BPL * 2);
+				src += BPL * 2;
+				dst += BPL * 2;
+				width -= 8;
+			}
+			w = width >> 4;
+			if (w) {
+				s = (u32 *)src;
+				d = (u32 *)dst;
+				w *= BPL / 2;
+				l = next_line - w * 4;
+				for (j = height; j > 0; j--) {
+					for (i = w; i > 0; i--)
+						*d++ = *s++;
+					s = (u32 *)((u8 *)s + l);
+					d = (u32 *)((u8 *)d + l);
+				}
+			}
+			if (width & 15)
+				memmove32_col(dst + width / (8 / BPL), src + width / (8 / BPL),
+					      0xff00ff00, height, next_line - BPL * 2);
+		} else {
+			src = (u8 *)info->screen_base + (sy - 1) * next_line + ((sx + width + 8) & ~15) / (8 / BPL);
+			dst = (u8 *)info->screen_base + (dy - 1) * next_line + ((dx + width + 8) & ~15) / (8 / BPL);
+
+			if ((sx + width) & 15) {
+				src -= BPL * 2;
+				dst -= BPL * 2;
+				memmove32_col(dst, src, 0xff00ff00, colsize, -next_line - BPL * 2);
+				width -= 8;
+			}
+			w = width >> 4;
+			if (w) {
+				s = (u32 *)src;
+				d = (u32 *)dst;
+				w *= BPL / 2;
+				l = next_line - w * 4;
+				for (j = height; j > 0; j--) {
+					for (i = w; i > 0; i--)
+						*--d = *--s;
+					s = (u32 *)((u8 *)s - l);
+					d = (u32 *)((u8 *)d - l);
+				}
+			}
+			if (sx & 15)
+				memmove32_col(dst - (width - 16) / (8 / BPL),
+					      src - (width - 16) / (8 / BPL),
+					      0xff00ff, colsize, -next_line - BPL * 2);
+		}
+	} else {
+		/* odd->even or even->odd */
+		if (upwards) {
+			u32 *src32, *dst32;
+			u32 pval[4], v, v1, mask;
+			int i, j, w, f;
+
+			src = (u8 *)info->screen_base + sy * next_line + (sx & ~15) / (8 / BPL);
+			dst = (u8 *)info->screen_base + dy * next_line + (dx & ~15) / (8 / BPL);
+
+			mask = 0xff00ff00;
+			f = 0;
+			w = width;
+			if (sx & 15) {
+				f = 1;
+				w += 8;
+			}
+			if ((sx + width) & 15)
+				f |= 2;
+			w >>= 4;
+			for (i = height; i; i--) {
+				src32 = (u32 *)src;
+				dst32 = (u32 *)dst;
+
+				if (f & 1) {
+					pval[0] = (*src32++ << 8) & mask;
+					pval[1] = (*src32++ << 8) & mask;
+				} else {
+					pval[0] = dst32[0] & mask;
+					pval[1] = dst32[1] & mask;
+				}
+
+				for (j = w; j > 0; j--) {
+					v = *src32++;
+					v1 = v & mask;
+					*dst32++ = pval[0] | (v1 >> 8);
+					pval[0] = (v ^ v1) << 8;
+					v = *src32++;
+					v1 = v & mask;
+					*dst32++ = pval[1] | (v1 >> 8);
+					pval[1] = (v ^ v1) << 8;
+				}
+
+				if (f & 2) {
+					dst32[0] = (dst32[0] & mask) | pval[0];
+					dst32[1] = (dst32[1] & mask) | pval[1];
+				}
+
+				src += next_line;
+				dst += next_line;
+			}
+		} else {
+			u32 *src32, *dst32;
+			u32 pval[4], v, v1, mask;
+			int i, j, w, f;
+
+			src = (u8 *)info->screen_base + (sy - 1) * next_line + ((sx + width + 8) & ~15) / (8 / BPL);
+			dst = (u8 *)info->screen_base + (dy - 1) * next_line + ((dx + width + 8) & ~15) / (8 / BPL);
+
+			mask = 0xff00ff;
+			f = 0;
+			w = width;
+			if ((dx + width) & 15)
+				f = 1;
+			if (sx & 15) {
+				f |= 2;
+				w += 8;
+			}
+			w >>= 4;
+			for (i = height; i; i--) {
+				src32 = (u32 *)src;
+				dst32 = (u32 *)dst;
+
+				if (f & 1) {
+					pval[0] = dst32[-1] & mask;
+					pval[1] = dst32[-2] & mask;
+				} else {
+					pval[0] = (*--src32 >> 8) & mask;
+					pval[1] = (*--src32 >> 8) & mask;
+				}
+
+				for (j = w; j > 0; j--) {
+					v = *--src32;
+					v1 = v & mask;
+					*--dst32 = pval[0] | (v1 << 8);
+					pval[0] = (v ^ v1) >> 8;
+					v = *--src32;
+					v1 = v & mask;
+					*--dst32 = pval[1] | (v1 << 8);
+					pval[1] = (v ^ v1) >> 8;
+				}
+
+				if (!(f & 2)) {
+					dst32[-1] = (dst32[-1] & mask) | pval[0];
+					dst32[-2] = (dst32[-2] & mask) | pval[1];
+				}
+
+				src -= next_line;
+				dst -= next_line;
+			}
+		}
+	}
+}
+
+void atafb_iplan2p4_fillrect(struct fb_info *info, u_long next_line, u32 color,
+                             int sy, int sx, int height, int width)
+{
+	u32 *dest;
+	int rows, i;
+	u32 cval[4];
+
+	dest = (u32 *)(info->screen_base + sy * next_line + (sx & ~15) / (8 / BPL));
+	if (sx & 15) {
+		u8 *dest8 = (u8 *)dest + 1;
+
+		expand8_col2mask(color, cval);
+
+		for (i = height; i; i--) {
+			fill8_col(dest8, cval);
+			dest8 += next_line;
+		}
+		dest += BPL / 2;
+		width -= 8;
+	}
+
+	expand16_col2mask(color, cval);
+	rows = width >> 4;
+	if (rows) {
+		u32 *d = dest;
+		u32 off = next_line - rows * BPL * 2;
+		for (i = height; i; i--) {
+			d = fill16_col(d, rows, cval);
+			d = (u32 *)((long)d + off);
+		}
+		dest += rows * BPL / 2;
+		width &= 15;
+	}
+
+	if (width) {
+		u8 *dest8 = (u8 *)dest;
+
+		expand8_col2mask(color, cval);
+
+		for (i = height; i; i--) {
+			fill8_col(dest8, cval);
+			dest8 += next_line;
+		}
+	}
+}
+
+void atafb_iplan2p4_linefill(struct fb_info *info, u_long next_line,
+                             int dy, int dx, u32 width,
+                             const u8 *data, u32 bgcolor, u32 fgcolor)
+{
+	u32 *dest;
+	const u16 *data16;
+	int rows;
+	u32 fgm[4], bgm[4], m;
+
+	dest = (u32 *)(info->screen_base + dy * next_line + (dx & ~15) / (8 / BPL));
+	if (dx & 15) {
+		fill8_2col((u8 *)dest + 1, fgcolor, bgcolor, *data++);
+		dest += BPL / 2;
+		width -= 8;
+	}
+
+	if (width >= 16) {
+		data16 = (const u16 *)data;
+		expand16_2col2mask(fgcolor, bgcolor, fgm, bgm);
+
+		for (rows = width / 16; rows; rows--) {
+			u16 d = *data16++;
+			m = d | ((u32)d << 16);
+			*dest++ = (m & fgm[0]) ^ bgm[0];
+			*dest++ = (m & fgm[1]) ^ bgm[1];
+		}
+
+		data = (const u8 *)data16;
+		width &= 15;
+	}
+
+	if (width)
+		fill8_2col((u8 *)dest, fgcolor, bgcolor, *data);
+}
+
+#ifdef MODULE
+MODULE_LICENSE("GPL");
+
+int init_module(void)
+{
+	return 0;
+}
+
+void cleanup_module(void)
+{
+}
+#endif /* MODULE */
+
+
+    /*
+     *  Visible symbols for modules
+     */
+
+EXPORT_SYMBOL(atafb_iplan2p4_copyarea);
+EXPORT_SYMBOL(atafb_iplan2p4_fillrect);
+EXPORT_SYMBOL(atafb_iplan2p4_linefill);

+ 345 - 0
drivers/video/atafb_iplan2p8.c

@@ -0,0 +1,345 @@
+/*
+ *  linux/drivers/video/iplan2p8.c -- Low level frame buffer operations for
+ *				      interleaved bitplanes à la Atari (8
+ *				      planes, 2 bytes interleave)
+ *
+ *	Created 5 Apr 1997 by Geert Uytterhoeven
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of this archive for
+ *  more details.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fb.h>
+
+#include <asm/setup.h>
+
+#include "atafb.h"
+
+#define BPL	8
+#include "atafb_utils.h"
+
+
+/* Copies a 8 plane column from 's', height 'h', to 'd'. */
+
+/* This expands a 8 bit color into two longs for two movepl (8 plane)
+ * operations.
+ */
+
+void atafb_iplan2p8_copyarea(struct fb_info *info, u_long next_line,
+			     int sy, int sx, int dy, int dx,
+			     int height, int width)
+{
+	/*  bmove() has to distinguish two major cases: If both, source and
+	 *  destination, start at even addresses or both are at odd
+	 *  addresses, just the first odd and last even column (if present)
+	 *  require special treatment (memmove_col()). The rest between
+	 *  then can be copied by normal operations, because all adjacent
+	 *  bytes are affected and are to be stored in the same order.
+	 *    The pathological case is when the move should go from an odd
+	 *  address to an even or vice versa. Since the bytes in the plane
+	 *  words must be assembled in new order, it seems wisest to make
+	 *  all movements by memmove_col().
+	 */
+
+	u8 *src, *dst;
+	u32 *s, *d;
+	int w, l , i, j;
+	u_int colsize;
+	u_int upwards = (dy < sy) || (dy == sy && dx < sx);
+
+	colsize = height;
+	if (!((sx ^ dx) & 15)) {
+		/* odd->odd or even->even */
+
+		if (upwards) {
+			src = (u8 *)info->screen_base + sy * next_line + (sx & ~15) / (8 / BPL);
+			dst = (u8 *)info->screen_base + dy * next_line + (dx & ~15) / (8 / BPL);
+			if (sx & 15) {
+				memmove32_col(dst, src, 0xff00ff, height, next_line - BPL * 2);
+				src += BPL * 2;
+				dst += BPL * 2;
+				width -= 8;
+			}
+			w = width >> 4;
+			if (w) {
+				s = (u32 *)src;
+				d = (u32 *)dst;
+				w *= BPL / 2;
+				l = next_line - w * 4;
+				for (j = height; j > 0; j--) {
+					for (i = w; i > 0; i--)
+						*d++ = *s++;
+					s = (u32 *)((u8 *)s + l);
+					d = (u32 *)((u8 *)d + l);
+				}
+			}
+			if (width & 15)
+				memmove32_col(dst + width / (8 / BPL), src + width / (8 / BPL),
+					      0xff00ff00, height, next_line - BPL * 2);
+		} else {
+			src = (u8 *)info->screen_base + (sy - 1) * next_line + ((sx + width + 8) & ~15) / (8 / BPL);
+			dst = (u8 *)info->screen_base + (dy - 1) * next_line + ((dx + width + 8) & ~15) / (8 / BPL);
+
+			if ((sx + width) & 15) {
+				src -= BPL * 2;
+				dst -= BPL * 2;
+				memmove32_col(dst, src, 0xff00ff00, colsize, -next_line - BPL * 2);
+				width -= 8;
+			}
+			w = width >> 4;
+			if (w) {
+				s = (u32 *)src;
+				d = (u32 *)dst;
+				w *= BPL / 2;
+				l = next_line - w * 4;
+				for (j = height; j > 0; j--) {
+					for (i = w; i > 0; i--)
+						*--d = *--s;
+					s = (u32 *)((u8 *)s - l);
+					d = (u32 *)((u8 *)d - l);
+				}
+			}
+			if (sx & 15)
+				memmove32_col(dst - (width - 16) / (8 / BPL),
+					      src - (width - 16) / (8 / BPL),
+					      0xff00ff, colsize, -next_line - BPL * 2);
+		}
+	} else {
+		/* odd->even or even->odd */
+		if (upwards) {
+			u32 *src32, *dst32;
+			u32 pval[4], v, v1, mask;
+			int i, j, w, f;
+
+			src = (u8 *)info->screen_base + sy * next_line + (sx & ~15) / (8 / BPL);
+			dst = (u8 *)info->screen_base + dy * next_line + (dx & ~15) / (8 / BPL);
+
+			mask = 0xff00ff00;
+			f = 0;
+			w = width;
+			if (sx & 15) {
+				f = 1;
+				w += 8;
+			}
+			if ((sx + width) & 15)
+				f |= 2;
+			w >>= 4;
+			for (i = height; i; i--) {
+				src32 = (u32 *)src;
+				dst32 = (u32 *)dst;
+
+				if (f & 1) {
+					pval[0] = (*src32++ << 8) & mask;
+					pval[1] = (*src32++ << 8) & mask;
+					pval[2] = (*src32++ << 8) & mask;
+					pval[3] = (*src32++ << 8) & mask;
+				} else {
+					pval[0] = dst32[0] & mask;
+					pval[1] = dst32[1] & mask;
+					pval[2] = dst32[2] & mask;
+					pval[3] = dst32[3] & mask;
+				}
+
+				for (j = w; j > 0; j--) {
+					v = *src32++;
+					v1 = v & mask;
+					*dst32++ = pval[0] | (v1 >> 8);
+					pval[0] = (v ^ v1) << 8;
+					v = *src32++;
+					v1 = v & mask;
+					*dst32++ = pval[1] | (v1 >> 8);
+					pval[1] = (v ^ v1) << 8;
+					v = *src32++;
+					v1 = v & mask;
+					*dst32++ = pval[2] | (v1 >> 8);
+					pval[2] = (v ^ v1) << 8;
+					v = *src32++;
+					v1 = v & mask;
+					*dst32++ = pval[3] | (v1 >> 8);
+					pval[3] = (v ^ v1) << 8;
+				}
+
+				if (f & 2) {
+					dst32[0] = (dst32[0] & mask) | pval[0];
+					dst32[1] = (dst32[1] & mask) | pval[1];
+					dst32[2] = (dst32[2] & mask) | pval[2];
+					dst32[3] = (dst32[3] & mask) | pval[3];
+				}
+
+				src += next_line;
+				dst += next_line;
+			}
+		} else {
+			u32 *src32, *dst32;
+			u32 pval[4], v, v1, mask;
+			int i, j, w, f;
+
+			src = (u8 *)info->screen_base + (sy - 1) * next_line + ((sx + width + 8) & ~15) / (8 / BPL);
+			dst = (u8 *)info->screen_base + (dy - 1) * next_line + ((dx + width + 8) & ~15) / (8 / BPL);
+
+			mask = 0xff00ff;
+			f = 0;
+			w = width;
+			if ((dx + width) & 15)
+				f = 1;
+			if (sx & 15) {
+				f |= 2;
+				w += 8;
+			}
+			w >>= 4;
+			for (i = height; i; i--) {
+				src32 = (u32 *)src;
+				dst32 = (u32 *)dst;
+
+				if (f & 1) {
+					pval[0] = dst32[-1] & mask;
+					pval[1] = dst32[-2] & mask;
+					pval[2] = dst32[-3] & mask;
+					pval[3] = dst32[-4] & mask;
+				} else {
+					pval[0] = (*--src32 >> 8) & mask;
+					pval[1] = (*--src32 >> 8) & mask;
+					pval[2] = (*--src32 >> 8) & mask;
+					pval[3] = (*--src32 >> 8) & mask;
+				}
+
+				for (j = w; j > 0; j--) {
+					v = *--src32;
+					v1 = v & mask;
+					*--dst32 = pval[0] | (v1 << 8);
+					pval[0] = (v ^ v1) >> 8;
+					v = *--src32;
+					v1 = v & mask;
+					*--dst32 = pval[1] | (v1 << 8);
+					pval[1] = (v ^ v1) >> 8;
+					v = *--src32;
+					v1 = v & mask;
+					*--dst32 = pval[2] | (v1 << 8);
+					pval[2] = (v ^ v1) >> 8;
+					v = *--src32;
+					v1 = v & mask;
+					*--dst32 = pval[3] | (v1 << 8);
+					pval[3] = (v ^ v1) >> 8;
+				}
+
+				if (!(f & 2)) {
+					dst32[-1] = (dst32[-1] & mask) | pval[0];
+					dst32[-2] = (dst32[-2] & mask) | pval[1];
+					dst32[-3] = (dst32[-3] & mask) | pval[2];
+					dst32[-4] = (dst32[-4] & mask) | pval[3];
+				}
+
+				src -= next_line;
+				dst -= next_line;
+			}
+		}
+	}
+}
+
+void atafb_iplan2p8_fillrect(struct fb_info *info, u_long next_line, u32 color,
+                             int sy, int sx, int height, int width)
+{
+	u32 *dest;
+	int rows, i;
+	u32 cval[4];
+
+	dest = (u32 *)(info->screen_base + sy * next_line + (sx & ~15) / (8 / BPL));
+	if (sx & 15) {
+		u8 *dest8 = (u8 *)dest + 1;
+
+		expand8_col2mask(color, cval);
+
+		for (i = height; i; i--) {
+			fill8_col(dest8, cval);
+			dest8 += next_line;
+		}
+		dest += BPL / 2;
+		width -= 8;
+	}
+
+	expand16_col2mask(color, cval);
+	rows = width >> 4;
+	if (rows) {
+		u32 *d = dest;
+		u32 off = next_line - rows * BPL * 2;
+		for (i = height; i; i--) {
+			d = fill16_col(d, rows, cval);
+			d = (u32 *)((long)d + off);
+		}
+		dest += rows * BPL / 2;
+		width &= 15;
+	}
+
+	if (width) {
+		u8 *dest8 = (u8 *)dest;
+
+		expand8_col2mask(color, cval);
+
+		for (i = height; i; i--) {
+			fill8_col(dest8, cval);
+			dest8 += next_line;
+		}
+	}
+}
+
+void atafb_iplan2p8_linefill(struct fb_info *info, u_long next_line,
+			     int dy, int dx, u32 width,
+			     const u8 *data, u32 bgcolor, u32 fgcolor)
+{
+	u32 *dest;
+	const u16 *data16;
+	int rows;
+	u32 fgm[4], bgm[4], m;
+
+	dest = (u32 *)(info->screen_base + dy * next_line + (dx & ~15) / (8 / BPL));
+	if (dx & 15) {
+		fill8_2col((u8 *)dest + 1, fgcolor, bgcolor, *data++);
+		dest += BPL / 2;
+		width -= 8;
+	}
+
+	if (width >= 16) {
+		data16 = (const u16 *)data;
+		expand16_2col2mask(fgcolor, bgcolor, fgm, bgm);
+
+		for (rows = width / 16; rows; rows--) {
+			u16 d = *data16++;
+			m = d | ((u32)d << 16);
+			*dest++ = (m & fgm[0]) ^ bgm[0];
+			*dest++ = (m & fgm[1]) ^ bgm[1];
+			*dest++ = (m & fgm[2]) ^ bgm[2];
+			*dest++ = (m & fgm[3]) ^ bgm[3];
+		}
+
+		data = (const u8 *)data16;
+		width &= 15;
+	}
+
+	if (width)
+		fill8_2col((u8 *)dest, fgcolor, bgcolor, *data);
+}
+
+#ifdef MODULE
+MODULE_LICENSE("GPL");
+
+int init_module(void)
+{
+	return 0;
+}
+
+void cleanup_module(void)
+{
+}
+#endif /* MODULE */
+
+
+    /*
+     *  Visible symbols for modules
+     */
+
+EXPORT_SYMBOL(atafb_iplan2p8_copyarea);
+EXPORT_SYMBOL(atafb_iplan2p8_fillrect);
+EXPORT_SYMBOL(atafb_iplan2p8_linefill);

+ 112 - 0
drivers/video/atafb_mfb.c

@@ -0,0 +1,112 @@
+/*
+ *  linux/drivers/video/mfb.c -- Low level frame buffer operations for
+ *				 monochrome
+ *
+ *	Created 5 Apr 1997 by Geert Uytterhoeven
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of this archive for
+ *  more details.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fb.h>
+
+#include "atafb.h"
+#include "atafb_utils.h"
+
+
+    /*
+     *  Monochrome
+     */
+
+void atafb_mfb_copyarea(struct fb_info *info, u_long next_line,
+			int sy, int sx, int dy, int dx,
+			int height, int width)
+{
+	u8 *src, *dest;
+	u_int rows;
+
+	if (sx == 0 && dx == 0 && width == next_line) {
+		src = (u8 *)info->screen_base + sy * (width >> 3);
+		dest = (u8 *)info->screen_base + dy * (width >> 3);
+		fb_memmove(dest, src, height * (width >> 3));
+	} else if (dy <= sy) {
+		src = (u8 *)info->screen_base + sy * next_line + (sx >> 3);
+		dest = (u8 *)info->screen_base + dy * next_line + (dx >> 3);
+		for (rows = height; rows--;) {
+			fb_memmove(dest, src, width >> 3);
+			src += next_line;
+			dest += next_line;
+		}
+	} else {
+		src = (u8 *)info->screen_base + (sy + height - 1) * next_line + (sx >> 3);
+		dest = (u8 *)info->screen_base + (dy + height - 1) * next_line + (dx >> 3);
+		for (rows = height; rows--;) {
+			fb_memmove(dest, src, width >> 3);
+			src -= next_line;
+			dest -= next_line;
+		}
+	}
+}
+
+void atafb_mfb_fillrect(struct fb_info *info, u_long next_line, u32 color,
+			int sy, int sx, int height, int width)
+{
+	u8 *dest;
+	u_int rows;
+
+	dest = (u8 *)info->screen_base + sy * next_line + (sx >> 3);
+
+	if (sx == 0 && width == next_line) {
+		if (color)
+			fb_memset255(dest, height * (width >> 3));
+		else
+			fb_memclear(dest, height * (width >> 3));
+	} else {
+		for (rows = height; rows--; dest += next_line) {
+			if (color)
+				fb_memset255(dest, width >> 3);
+			else
+				fb_memclear_small(dest, width >> 3);
+		}
+	}
+}
+
+void atafb_mfb_linefill(struct fb_info *info, u_long next_line,
+			int dy, int dx, u32 width,
+			const u8 *data, u32 bgcolor, u32 fgcolor)
+{
+	u8 *dest;
+	u_int rows;
+
+	dest = (u8 *)info->screen_base + dy * next_line + (dx >> 3);
+
+	for (rows = width / 8; rows--; /* check margins */ ) {
+		// use fast_memmove or fb_memmove
+		*dest++ = *data++;
+	}
+}
+
+#ifdef MODULE
+MODULE_LICENSE("GPL");
+
+int init_module(void)
+{
+	return 0;
+}
+
+void cleanup_module(void)
+{
+}
+#endif /* MODULE */
+
+
+    /*
+     *  Visible symbols for modules
+     */
+
+EXPORT_SYMBOL(atafb_mfb_copyarea);
+EXPORT_SYMBOL(atafb_mfb_fillrect);
+EXPORT_SYMBOL(atafb_mfb_linefill);

+ 400 - 0
drivers/video/atafb_utils.h

@@ -0,0 +1,400 @@
+#ifndef _VIDEO_ATAFB_UTILS_H
+#define _VIDEO_ATAFB_UTILS_H
+
+/* ================================================================= */
+/*                      Utility Assembler Functions                  */
+/* ================================================================= */
+
+/* ====================================================================== */
+
+/* Those of a delicate disposition might like to skip the next couple of
+ * pages.
+ *
+ * These functions are drop in replacements for memmove and
+ * memset(_, 0, _). However their five instances add at least a kilobyte
+ * to the object file. You have been warned.
+ *
+ * Not a great fan of assembler for the sake of it, but I think
+ * that these routines are at least 10 times faster than their C
+ * equivalents for large blits, and that's important to the lowest level of
+ * a graphics driver. Question is whether some scheme with the blitter
+ * would be faster. I suspect not for simple text system - not much
+ * asynchrony.
+ *
+ * Code is very simple, just gruesome expansion. Basic strategy is to
+ * increase data moved/cleared at each step to 16 bytes to reduce
+ * instruction per data move overhead. movem might be faster still
+ * For more than 15 bytes, we try to align the write direction on a
+ * longword boundary to get maximum speed. This is even more gruesome.
+ * Unaligned read/write used requires 68020+ - think this is a problem?
+ *
+ * Sorry!
+ */
+
+
+/* ++roman: I've optimized Robert's original versions in some minor
+ * aspects, e.g. moveq instead of movel, let gcc choose the registers,
+ * use movem in some places...
+ * For other modes than 1 plane, lots of more such assembler functions
+ * were needed (e.g. the ones using movep or expanding color values).
+ */
+
+/* ++andreas: more optimizations:
+   subl #65536,d0 replaced by clrw d0; subql #1,d0 for dbcc
+   addal is faster than addaw
+   movep is rather expensive compared to ordinary move's
+   some functions rewritten in C for clarity, no speed loss */
+
+static inline void *fb_memclear_small(void *s, size_t count)
+{
+	if (!count)
+		return 0;
+
+	asm volatile ("\n"
+		"	lsr.l	#1,%1 ; jcc 1f ; move.b %2,-(%0)\n"
+		"1:	lsr.l	#1,%1 ; jcc 1f ; move.w %2,-(%0)\n"
+		"1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0)\n"
+		"1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0) ; move.l %2,-(%0)\n"
+		"1:"
+		: "=a" (s), "=d" (count)
+		: "d" (0), "0" ((char *)s + count), "1" (count));
+	asm volatile ("\n"
+		"	subq.l  #1,%1\n"
+		"	jcs	3f\n"
+		"	move.l	%2,%%d4; move.l %2,%%d5; move.l %2,%%d6\n"
+		"2:	movem.l	%2/%%d4/%%d5/%%d6,-(%0)\n"
+		"	dbra	%1,2b\n"
+		"3:"
+		: "=a" (s), "=d" (count)
+		: "d" (0), "0" (s), "1" (count)
+		: "d4", "d5", "d6"
+		);
+
+	return 0;
+}
+
+
+static inline void *fb_memclear(void *s, size_t count)
+{
+	if (!count)
+		return 0;
+
+	if (count < 16) {
+		asm volatile ("\n"
+			"	lsr.l	#1,%1 ; jcc 1f ; clr.b (%0)+\n"
+			"1:	lsr.l	#1,%1 ; jcc 1f ; clr.w (%0)+\n"
+			"1:	lsr.l	#1,%1 ; jcc 1f ; clr.l (%0)+\n"
+			"1:	lsr.l	#1,%1 ; jcc 1f ; clr.l (%0)+ ; clr.l (%0)+\n"
+			"1:"
+			: "=a" (s), "=d" (count)
+			: "0" (s), "1" (count));
+	} else {
+		long tmp;
+		asm volatile ("\n"
+			"	move.l	%1,%2\n"
+			"	lsr.l	#1,%2 ; jcc 1f ; clr.b (%0)+ ; subq.w #1,%1\n"
+			"	lsr.l	#1,%2 ; jcs 2f\n"  /* %0 increased=>bit 2 switched*/
+			"	clr.w	(%0)+  ; subq.w  #2,%1 ; jra 2f\n"
+			"1:	lsr.l	#1,%2 ; jcc 2f\n"
+			"	clr.w	(%0)+  ; subq.w  #2,%1\n"
+			"2:	move.w	%1,%2; lsr.l #2,%1 ; jeq 6f\n"
+			"	lsr.l	#1,%1 ; jcc 3f ; clr.l (%0)+\n"
+			"3:	lsr.l	#1,%1 ; jcc 4f ; clr.l (%0)+ ; clr.l (%0)+\n"
+			"4:	subq.l	#1,%1 ; jcs 6f\n"
+			"5:	clr.l	(%0)+; clr.l (%0)+ ; clr.l (%0)+ ; clr.l (%0)+\n"
+			"	dbra	%1,5b ; clr.w %1; subq.l #1,%1; jcc 5b\n"
+			"6:	move.w	%2,%1; btst #1,%1 ; jeq 7f ; clr.w (%0)+\n"
+			"7:	btst	#0,%1 ; jeq 8f ; clr.b (%0)+\n"
+			"8:"
+			: "=a" (s), "=d" (count), "=d" (tmp)
+			: "0" (s), "1" (count));
+	}
+
+	return 0;
+}
+
+
+static inline void *fb_memset255(void *s, size_t count)
+{
+	if (!count)
+		return 0;
+
+	asm volatile ("\n"
+		"	lsr.l	#1,%1 ; jcc 1f ; move.b %2,-(%0)\n"
+		"1:	lsr.l	#1,%1 ; jcc 1f ; move.w %2,-(%0)\n"
+		"1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0)\n"
+		"1:	lsr.l	#1,%1 ; jcc 1f ; move.l %2,-(%0) ; move.l %2,-(%0)\n"
+		"1:"
+		: "=a" (s), "=d" (count)
+		: "d" (-1), "0" ((char *)s+count), "1" (count));
+	asm volatile ("\n"
+		"	subq.l	#1,%1 ; jcs 3f\n"
+		"	move.l	%2,%%d4; move.l %2,%%d5; move.l %2,%%d6\n"
+		"2:	movem.l	%2/%%d4/%%d5/%%d6,-(%0)\n"
+		"	dbra	%1,2b\n"
+		"3:"
+		: "=a" (s), "=d" (count)
+		: "d" (-1), "0" (s), "1" (count)
+		: "d4", "d5", "d6");
+
+	return 0;
+}
+
+
+static inline void *fb_memmove(void *d, const void *s, size_t count)
+{
+	if (d < s) {
+		if (count < 16) {
+			asm volatile ("\n"
+				"	lsr.l	#1,%2 ; jcc 1f ; move.b (%1)+,(%0)+\n"
+				"1:	lsr.l	#1,%2 ; jcc 1f ; move.w (%1)+,(%0)+\n"
+				"1:	lsr.l	#1,%2 ; jcc 1f ; move.l (%1)+,(%0)+\n"
+				"1:	lsr.l	#1,%2 ; jcc 1f ; move.l (%1)+,(%0)+ ; move.l (%1)+,(%0)+\n"
+				"1:"
+				: "=a" (d), "=a" (s), "=d" (count)
+				: "0" (d), "1" (s), "2" (count));
+		} else {
+			long tmp;
+			asm volatile ("\n"
+				"	move.l	%0,%3\n"
+				"	lsr.l	#1,%3 ; jcc 1f ; move.b (%1)+,(%0)+ ; subqw #1,%2\n"
+				"	lsr.l	#1,%3 ; jcs 2f\n"  /* %0 increased=>bit 2 switched*/
+				"	move.w	(%1)+,(%0)+  ; subqw  #2,%2 ; jra 2f\n"
+				"1:	lsr.l   #1,%3 ; jcc 2f\n"
+				"	move.w	(%1)+,(%0)+  ; subqw  #2,%2\n"
+				"2:	move.w	%2,%-; lsr.l #2,%2 ; jeq 6f\n"
+				"	lsr.l	#1,%2 ; jcc 3f ; move.l (%1)+,(%0)+\n"
+				"3:	lsr.l	#1,%2 ; jcc 4f ; move.l (%1)+,(%0)+ ; move.l (%1)+,(%0)+\n"
+				"4:	subq.l	#1,%2 ; jcs 6f\n"
+				"5:	move.l	(%1)+,(%0)+; move.l (%1)+,(%0)+\n"
+				"	move.l	(%1)+,(%0)+; move.l (%1)+,(%0)+\n"
+				"	dbra	%2,5b ; clr.w %2; subq.l #1,%2; jcc 5b\n"
+				"6:	move.w	%+,%2; btst #1,%2 ; jeq 7f ; move.w (%1)+,(%0)+\n"
+				"7:	btst	#0,%2 ; jeq 8f ; move.b (%1)+,(%0)+\n"
+				"8:"
+				: "=a" (d), "=a" (s), "=d" (count), "=d" (tmp)
+				: "0" (d), "1" (s), "2" (count));
+		}
+	} else {
+		if (count < 16) {
+			asm volatile ("\n"
+				"	lsr.l	#1,%2 ; jcc 1f ; move.b -(%1),-(%0)\n"
+				"1:	lsr.l	#1,%2 ; jcc 1f ; move.w -(%1),-(%0)\n"
+				"1:	lsr.l	#1,%2 ; jcc 1f ; move.l -(%1),-(%0)\n"
+				"1:	lsr.l	#1,%2 ; jcc 1f ; move.l -(%1),-(%0) ; move.l -(%1),-(%0)\n"
+				"1:"
+				: "=a" (d), "=a" (s), "=d" (count)
+				: "0" ((char *) d + count), "1" ((char *) s + count), "2" (count));
+		} else {
+			long tmp;
+
+			asm volatile ("\n"
+				"	move.l	%0,%3\n"
+				"	lsr.l	#1,%3 ; jcc 1f ; move.b -(%1),-(%0) ; subqw #1,%2\n"
+				"	lsr.l	#1,%3 ; jcs 2f\n"  /* %0 increased=>bit 2 switched*/
+				"	move.w	-(%1),-(%0) ; subqw  #2,%2 ; jra 2f\n"
+				"1:	lsr.l	#1,%3 ; jcc 2f\n"
+				"	move.w	-(%1),-(%0) ; subqw  #2,%2\n"
+				"2:	move.w	%2,%-; lsr.l #2,%2 ; jeq 6f\n"
+				"	lsr.l	#1,%2 ; jcc 3f ; move.l -(%1),-(%0)\n"
+				"3:	lsr.l	#1,%2 ; jcc 4f ; move.l -(%1),-(%0) ; move.l -(%1),-(%0)\n"
+				"4:	subq.l	#1,%2 ; jcs 6f\n"
+				"5:	move.l	-(%1),-(%0); move.l -(%1),-(%0)\n"
+				"	move.l	-(%1),-(%0); move.l -(%1),-(%0)\n"
+				"	dbra	%2,5b ; clr.w %2; subq.l #1,%2; jcc 5b\n"
+				"6:	move.w	%+,%2; btst #1,%2 ; jeq 7f ; move.w -(%1),-(%0)\n"
+				"7:	btst	#0,%2 ; jeq 8f ; move.b -(%1),-(%0)\n"
+				"8:"
+				: "=a" (d), "=a" (s), "=d" (count), "=d" (tmp)
+				: "0" ((char *) d + count), "1" ((char *) s + count), "2" (count));
+		}
+	}
+
+	return 0;
+}
+
+
+/* ++andreas: Simple and fast version of memmove, assumes size is
+   divisible by 16, suitable for moving the whole screen bitplane */
+static inline void fast_memmove(char *dst, const char *src, size_t size)
+{
+	if (!size)
+		return;
+	if (dst < src)
+		asm volatile ("\n"
+			"1:	movem.l	(%0)+,%%d0/%%d1/%%a0/%%a1\n"
+			"	movem.l	%%d0/%%d1/%%a0/%%a1,%1@\n"
+			"	addq.l	#8,%1; addq.l #8,%1\n"
+			"	dbra	%2,1b\n"
+			"	clr.w	%2; subq.l #1,%2\n"
+			"	jcc	1b"
+			: "=a" (src), "=a" (dst), "=d" (size)
+			: "0" (src), "1" (dst), "2" (size / 16 - 1)
+			: "d0", "d1", "a0", "a1", "memory");
+	else
+		asm volatile ("\n"
+			"1:	subq.l	#8,%0; subq.l #8,%0\n"
+			"	movem.l	%0@,%%d0/%%d1/%%a0/%%a1\n"
+			"	movem.l	%%d0/%%d1/%%a0/%%a1,-(%1)\n"
+			"	dbra	%2,1b\n"
+			"	clr.w	%2; subq.l #1,%2\n"
+			"	jcc 1b"
+			: "=a" (src), "=a" (dst), "=d" (size)
+			: "0" (src + size), "1" (dst + size), "2" (size / 16 - 1)
+			: "d0", "d1", "a0", "a1", "memory");
+}
+
+#ifdef BPL
+
+/*
+ * This expands a up to 8 bit color into two longs
+ * for movel operations.
+ */
+static const u32 four2long[] = {
+	0x00000000, 0x000000ff, 0x0000ff00, 0x0000ffff,
+	0x00ff0000, 0x00ff00ff, 0x00ffff00, 0x00ffffff,
+	0xff000000, 0xff0000ff, 0xff00ff00, 0xff00ffff,
+	0xffff0000, 0xffff00ff, 0xffffff00, 0xffffffff,
+};
+
+static inline void expand8_col2mask(u8 c, u32 m[])
+{
+	m[0] = four2long[c & 15];
+#if BPL > 4
+	m[1] = four2long[c >> 4];
+#endif
+}
+
+static inline void expand8_2col2mask(u8 fg, u8 bg, u32 fgm[], u32 bgm[])
+{
+	fgm[0] = four2long[fg & 15] ^ (bgm[0] = four2long[bg & 15]);
+#if BPL > 4
+	fgm[1] = four2long[fg >> 4] ^ (bgm[1] = four2long[bg >> 4]);
+#endif
+}
+
+/*
+ * set an 8bit value to a color
+ */
+static inline void fill8_col(u8 *dst, u32 m[])
+{
+	u32 tmp = m[0];
+	dst[0] = tmp;
+	dst[2] = (tmp >>= 8);
+#if BPL > 2
+	dst[4] = (tmp >>= 8);
+	dst[6] = tmp >> 8;
+#endif
+#if BPL > 4
+	tmp = m[1];
+	dst[8] = tmp;
+	dst[10] = (tmp >>= 8);
+	dst[12] = (tmp >>= 8);
+	dst[14] = tmp >> 8;
+#endif
+}
+
+/*
+ * set an 8bit value according to foreground/background color
+ */
+static inline void fill8_2col(u8 *dst, u8 fg, u8 bg, u32 mask)
+{
+	u32 fgm[2], bgm[2], tmp;
+
+	expand8_2col2mask(fg, bg, fgm, bgm);
+
+	mask |= mask << 8;
+#if BPL > 2
+	mask |= mask << 16;
+#endif
+	tmp = (mask & fgm[0]) ^ bgm[0];
+	dst[0] = tmp;
+	dst[2] = (tmp >>= 8);
+#if BPL > 2
+	dst[4] = (tmp >>= 8);
+	dst[6] = tmp >> 8;
+#endif
+#if BPL > 4
+	tmp = (mask & fgm[1]) ^ bgm[1];
+	dst[8] = tmp;
+	dst[10] = (tmp >>= 8);
+	dst[12] = (tmp >>= 8);
+	dst[14] = tmp >> 8;
+#endif
+}
+
+static const u32 two2word[] = {
+	0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff
+};
+
+static inline void expand16_col2mask(u8 c, u32 m[])
+{
+	m[0] = two2word[c & 3];
+#if BPL > 2
+	m[1] = two2word[(c >> 2) & 3];
+#endif
+#if BPL > 4
+	m[2] = two2word[(c >> 4) & 3];
+	m[3] = two2word[c >> 6];
+#endif
+}
+
+static inline void expand16_2col2mask(u8 fg, u8 bg, u32 fgm[], u32 bgm[])
+{
+	bgm[0] = two2word[bg & 3];
+	fgm[0] = two2word[fg & 3] ^ bgm[0];
+#if BPL > 2
+	bgm[1] = two2word[(bg >> 2) & 3];
+	fgm[1] = two2word[(fg >> 2) & 3] ^ bgm[1];
+#endif
+#if BPL > 4
+	bgm[2] = two2word[(bg >> 4) & 3];
+	fgm[2] = two2word[(fg >> 4) & 3] ^ bgm[2];
+	bgm[3] = two2word[bg >> 6];
+	fgm[3] = two2word[fg >> 6] ^ bgm[3];
+#endif
+}
+
+static inline u32 *fill16_col(u32 *dst, int rows, u32 m[])
+{
+	while (rows) {
+		*dst++ = m[0];
+#if BPL > 2
+		*dst++ = m[1];
+#endif
+#if BPL > 4
+		*dst++ = m[2];
+		*dst++ = m[3];
+#endif
+		rows--;
+	}
+	return dst;
+}
+
+static inline void memmove32_col(void *dst, void *src, u32 mask, u32 h, u32 bytes)
+{
+	u32 *s, *d, v;
+
+        s = src;
+        d = dst;
+        do {
+                v = (*s++ & mask) | (*d  & ~mask);
+                *d++ = v;
+#if BPL > 2
+                v = (*s++ & mask) | (*d  & ~mask);
+                *d++ = v;
+#endif
+#if BPL > 4
+                v = (*s++ & mask) | (*d  & ~mask);
+                *d++ = v;
+                v = (*s++ & mask) | (*d  & ~mask);
+                *d++ = v;
+#endif
+                d = (u32 *)((u8 *)d + bytes);
+                s = (u32 *)((u8 *)s + bytes);
+        } while (--h);
+}
+
+#endif
+
+#endif /* _VIDEO_ATAFB_UTILS_H */

部分文件因文件數量過多而無法顯示