diff options
author | Mandar padmawar <mpadmawar@nvidia.com> | 2014-06-10 07:13:21 -0700 |
---|---|---|
committer | Mandar padmawar <mpadmawar@nvidia.com> | 2014-06-10 07:15:56 -0700 |
commit | da616b70c1939e02f9dc962b3d9196fe27cfee39 (patch) | |
tree | e3dc844dd92f1ea51fe5819d5919dec1f59e5760 /drivers/video | |
parent | bbf3566b5c9c0106300933665f42e94c18415702 (diff) | |
parent | dce68ca8e1015cf5bcd33284c0c1c9191e79885d (diff) |
Merge commit 'refs/changes/82/419382/1' of ssh://git-master:12001/linux-3.10 into promotion_build
Change-Id: I9418a05ad5c56b2e902249218bac2fa594d99f56
Diffstat (limited to 'drivers/video')
-rw-r--r-- | drivers/video/aty/mach64_accel.c | 3 | ||||
-rw-r--r-- | drivers/video/aty/mach64_cursor.c | 22 | ||||
-rw-r--r-- | drivers/video/cfbcopyarea.c | 153 | ||||
-rw-r--r-- | drivers/video/matrox/matroxfb_accel.c | 38 | ||||
-rw-r--r-- | drivers/video/matrox/matroxfb_base.h | 2 | ||||
-rw-r--r-- | drivers/video/tgafb.c | 264 |
6 files changed, 177 insertions, 305 deletions
diff --git a/drivers/video/aty/mach64_accel.c b/drivers/video/aty/mach64_accel.c index e45833ce975b..182bd680141f 100644 --- a/drivers/video/aty/mach64_accel.c +++ b/drivers/video/aty/mach64_accel.c @@ -4,6 +4,7 @@ */ #include <linux/delay.h> +#include <asm/unaligned.h> #include <linux/fb.h> #include <video/mach64.h> #include "atyfb.h" @@ -419,7 +420,7 @@ void atyfb_imageblit(struct fb_info *info, const struct fb_image *image) u32 *pbitmap, dwords = (src_bytes + 3) / 4; for (pbitmap = (u32*)(image->data); dwords; dwords--, pbitmap++) { wait_for_fifo(1, par); - aty_st_le32(HOST_DATA0, le32_to_cpup(pbitmap), par); + aty_st_le32(HOST_DATA0, get_unaligned_le32(pbitmap), par); } } diff --git a/drivers/video/aty/mach64_cursor.c b/drivers/video/aty/mach64_cursor.c index 95ec042ddbf8..0fe02e22d9a4 100644 --- a/drivers/video/aty/mach64_cursor.c +++ b/drivers/video/aty/mach64_cursor.c @@ -5,6 +5,7 @@ #include <linux/fb.h> #include <linux/init.h> #include <linux/string.h> +#include "../fb_draw.h" #include <asm/io.h> @@ -157,24 +158,33 @@ static int atyfb_cursor(struct fb_info *info, struct fb_cursor *cursor) for (i = 0; i < height; i++) { for (j = 0; j < width; j++) { + u16 l = 0xaaaa; b = *src++; m = *msk++; switch (cursor->rop) { case ROP_XOR: // Upper 4 bits of mask data - fb_writeb(cursor_bits_lookup[(b ^ m) >> 4], dst++); + l = cursor_bits_lookup[(b ^ m) >> 4] | // Lower 4 bits of mask - fb_writeb(cursor_bits_lookup[(b ^ m) & 0x0f], - dst++); + (cursor_bits_lookup[(b ^ m) & 0x0f] << 8); break; case ROP_COPY: // Upper 4 bits of mask data - fb_writeb(cursor_bits_lookup[(b & m) >> 4], dst++); + l = cursor_bits_lookup[(b & m) >> 4] | // Lower 4 bits of mask - fb_writeb(cursor_bits_lookup[(b & m) & 0x0f], - dst++); + (cursor_bits_lookup[(b & m) & 0x0f] << 8); break; } + /* + * If cursor size is not a multiple of 8 characters + * we must pad it with transparent pattern (0xaaaa). + */ + if ((j + 1) * 8 > cursor->image.width) { + l = comp(l, 0xaaaa, + (1 << ((cursor->image.width & 7) * 2)) - 1); + } + fb_writeb(l & 0xff, dst++); + fb_writeb(l >> 8, dst++); } dst += offset; } diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c index bb5a96b1645d..bcb57235fcc7 100644 --- a/drivers/video/cfbcopyarea.c +++ b/drivers/video/cfbcopyarea.c @@ -43,13 +43,22 @@ */ static void -bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, - const unsigned long __iomem *src, int src_idx, int bits, +bitcpy(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx, + const unsigned long __iomem *src, unsigned src_idx, int bits, unsigned n, u32 bswapmask) { unsigned long first, last; int const shift = dst_idx-src_idx; - int left, right; + +#if 0 + /* + * If you suspect bug in this function, compare it with this simple + * memmove implementation. + */ + fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8, + (char *)src + ((src_idx & (bits - 1))) / 8, n / 8); + return; +#endif first = fb_shifted_pixels_mask_long(p, dst_idx, bswapmask); last = ~fb_shifted_pixels_mask_long(p, (dst_idx+n) % bits, bswapmask); @@ -98,9 +107,8 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, unsigned long d0, d1; int m; - right = shift & (bits - 1); - left = -shift & (bits - 1); - bswapmask &= shift; + int const left = shift & (bits - 1); + int const right = -shift & (bits - 1); if (dst_idx+n <= bits) { // Single destination word @@ -110,15 +118,15 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, d0 = fb_rev_pixels_in_long(d0, bswapmask); if (shift > 0) { // Single source word - d0 >>= right; + d0 <<= left; } else if (src_idx+n <= bits) { // Single source word - d0 <<= left; + d0 >>= right; } else { // 2 source words d1 = FB_READL(src + 1); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0<<left | d1>>right; + d0 = d0 >> right | d1 << left; } d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(comp(d0, FB_READL(dst), first), dst); @@ -135,60 +143,59 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, if (shift > 0) { // Single source word d1 = d0; - d0 >>= right; - dst++; + d0 <<= left; n -= bits - dst_idx; } else { // 2 source words d1 = FB_READL(src++); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0<<left | d1>>right; - dst++; + d0 = d0 >> right | d1 << left; n -= bits - dst_idx; } d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(comp(d0, FB_READL(dst), first), dst); d0 = d1; + dst++; // Main chunk m = n % bits; n /= bits; while ((n >= 4) && !bswapmask) { d1 = FB_READL(src++); - FB_WRITEL(d0 << left | d1 >> right, dst++); + FB_WRITEL(d0 >> right | d1 << left, dst++); d0 = d1; d1 = FB_READL(src++); - FB_WRITEL(d0 << left | d1 >> right, dst++); + FB_WRITEL(d0 >> right | d1 << left, dst++); d0 = d1; d1 = FB_READL(src++); - FB_WRITEL(d0 << left | d1 >> right, dst++); + FB_WRITEL(d0 >> right | d1 << left, dst++); d0 = d1; d1 = FB_READL(src++); - FB_WRITEL(d0 << left | d1 >> right, dst++); + FB_WRITEL(d0 >> right | d1 << left, dst++); d0 = d1; n -= 4; } while (n--) { d1 = FB_READL(src++); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0 << left | d1 >> right; + d0 = d0 >> right | d1 << left; d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(d0, dst++); d0 = d1; } // Trailing bits - if (last) { - if (m <= right) { + if (m) { + if (m <= bits - right) { // Single source word - d0 <<= left; + d0 >>= right; } else { // 2 source words d1 = FB_READL(src); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0<<left | d1>>right; + d0 = d0 >> right | d1 << left; } d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(comp(d0, FB_READL(dst), last), dst); @@ -202,43 +209,46 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, */ static void -bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, - const unsigned long __iomem *src, int src_idx, int bits, +bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, unsigned dst_idx, + const unsigned long __iomem *src, unsigned src_idx, int bits, unsigned n, u32 bswapmask) { unsigned long first, last; int shift; - dst += (n-1)/bits; - src += (n-1)/bits; - if ((n-1) % bits) { - dst_idx += (n-1) % bits; - dst += dst_idx >> (ffs(bits) - 1); - dst_idx &= bits - 1; - src_idx += (n-1) % bits; - src += src_idx >> (ffs(bits) - 1); - src_idx &= bits - 1; - } +#if 0 + /* + * If you suspect bug in this function, compare it with this simple + * memmove implementation. + */ + fb_memmove((char *)dst + ((dst_idx & (bits - 1))) / 8, + (char *)src + ((src_idx & (bits - 1))) / 8, n / 8); + return; +#endif + + dst += (dst_idx + n - 1) / bits; + src += (src_idx + n - 1) / bits; + dst_idx = (dst_idx + n - 1) % bits; + src_idx = (src_idx + n - 1) % bits; shift = dst_idx-src_idx; - first = fb_shifted_pixels_mask_long(p, bits - 1 - dst_idx, bswapmask); - last = ~fb_shifted_pixels_mask_long(p, bits - 1 - ((dst_idx-n) % bits), - bswapmask); + first = ~fb_shifted_pixels_mask_long(p, (dst_idx + 1) % bits, bswapmask); + last = fb_shifted_pixels_mask_long(p, (bits + dst_idx + 1 - n) % bits, bswapmask); if (!shift) { // Same alignment for source and dest if ((unsigned long)dst_idx+1 >= n) { // Single word - if (last) - first &= last; - FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst); + if (first) + last &= first; + FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst); } else { // Multiple destination words // Leading bits - if (first != ~0UL) { + if (first) { FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst); dst--; src--; @@ -262,7 +272,7 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, FB_WRITEL(FB_READL(src--), dst--); // Trailing bits - if (last) + if (last != -1UL) FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst); } } else { @@ -270,29 +280,28 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, unsigned long d0, d1; int m; - int const left = -shift & (bits-1); - int const right = shift & (bits-1); - bswapmask &= shift; + int const left = shift & (bits-1); + int const right = -shift & (bits-1); if ((unsigned long)dst_idx+1 >= n) { // Single destination word - if (last) - first &= last; + if (first) + last &= first; d0 = FB_READL(src); if (shift < 0) { // Single source word - d0 <<= left; + d0 >>= right; } else if (1+(unsigned long)src_idx >= n) { // Single source word - d0 >>= right; + d0 <<= left; } else { // 2 source words d1 = FB_READL(src - 1); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0>>right | d1<<left; + d0 = d0 << left | d1 >> right; } d0 = fb_rev_pixels_in_long(d0, bswapmask); - FB_WRITEL(comp(d0, FB_READL(dst), first), dst); + FB_WRITEL(comp(d0, FB_READL(dst), last), dst); } else { // Multiple destination words /** We must always remember the last value read, because in case @@ -307,12 +316,12 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, if (shift < 0) { // Single source word d1 = d0; - d0 <<= left; + d0 >>= right; } else { // 2 source words d1 = FB_READL(src--); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0>>right | d1<<left; + d0 = d0 << left | d1 >> right; } d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(comp(d0, FB_READL(dst), first), dst); @@ -325,39 +334,39 @@ bitcpy_rev(struct fb_info *p, unsigned long __iomem *dst, int dst_idx, n /= bits; while ((n >= 4) && !bswapmask) { d1 = FB_READL(src--); - FB_WRITEL(d0 >> right | d1 << left, dst--); + FB_WRITEL(d0 << left | d1 >> right, dst--); d0 = d1; d1 = FB_READL(src--); - FB_WRITEL(d0 >> right | d1 << left, dst--); + FB_WRITEL(d0 << left | d1 >> right, dst--); d0 = d1; d1 = FB_READL(src--); - FB_WRITEL(d0 >> right | d1 << left, dst--); + FB_WRITEL(d0 << left | d1 >> right, dst--); d0 = d1; d1 = FB_READL(src--); - FB_WRITEL(d0 >> right | d1 << left, dst--); + FB_WRITEL(d0 << left | d1 >> right, dst--); d0 = d1; n -= 4; } while (n--) { d1 = FB_READL(src--); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0 >> right | d1 << left; + d0 = d0 << left | d1 >> right; d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(d0, dst--); d0 = d1; } // Trailing bits - if (last) { - if (m <= left) { + if (m) { + if (m <= bits - left) { // Single source word - d0 >>= right; + d0 <<= left; } else { // 2 source words d1 = FB_READL(src); d1 = fb_rev_pixels_in_long(d1, bswapmask); - d0 = d0>>right | d1<<left; + d0 = d0 << left | d1 >> right; } d0 = fb_rev_pixels_in_long(d0, bswapmask); FB_WRITEL(comp(d0, FB_READL(dst), last), dst); @@ -371,9 +380,9 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy; u32 height = area->height, width = area->width; unsigned long const bits_per_line = p->fix.line_length*8u; - unsigned long __iomem *dst = NULL, *src = NULL; + unsigned long __iomem *base = NULL; int bits = BITS_PER_LONG, bytes = bits >> 3; - int dst_idx = 0, src_idx = 0, rev_copy = 0; + unsigned dst_idx = 0, src_idx = 0, rev_copy = 0; u32 bswapmask = fb_compute_bswapmask(p); if (p->state != FBINFO_STATE_RUNNING) @@ -389,7 +398,7 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) // split the base of the framebuffer into a long-aligned address and the // index of the first bit - dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1)); + base = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1)); dst_idx = src_idx = 8*((unsigned long)p->screen_base & (bytes-1)); // add offset of source and target area dst_idx += dy*bits_per_line + dx*p->var.bits_per_pixel; @@ -402,20 +411,14 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) while (height--) { dst_idx -= bits_per_line; src_idx -= bits_per_line; - dst += dst_idx >> (ffs(bits) - 1); - dst_idx &= (bytes - 1); - src += src_idx >> (ffs(bits) - 1); - src_idx &= (bytes - 1); - bitcpy_rev(p, dst, dst_idx, src, src_idx, bits, + bitcpy_rev(p, base + (dst_idx / bits), dst_idx % bits, + base + (src_idx / bits), src_idx % bits, bits, width*p->var.bits_per_pixel, bswapmask); } } else { while (height--) { - dst += dst_idx >> (ffs(bits) - 1); - dst_idx &= (bytes - 1); - src += src_idx >> (ffs(bits) - 1); - src_idx &= (bytes - 1); - bitcpy(p, dst, dst_idx, src, src_idx, bits, + bitcpy(p, base + (dst_idx / bits), dst_idx % bits, + base + (src_idx / bits), src_idx % bits, bits, width*p->var.bits_per_pixel, bswapmask); dst_idx += bits_per_line; src_idx += bits_per_line; diff --git a/drivers/video/matrox/matroxfb_accel.c b/drivers/video/matrox/matroxfb_accel.c index 8335a6fe303e..0d5cb85d071a 100644 --- a/drivers/video/matrox/matroxfb_accel.c +++ b/drivers/video/matrox/matroxfb_accel.c @@ -192,10 +192,18 @@ void matrox_cfbX_init(struct matrox_fb_info *minfo) minfo->accel.m_dwg_rect = M_DWG_TRAP | M_DWG_SOLID | M_DWG_ARZERO | M_DWG_SGNZERO | M_DWG_SHIFTZERO; if (isMilleniumII(minfo)) minfo->accel.m_dwg_rect |= M_DWG_TRANSC; minfo->accel.m_opmode = mopmode; + minfo->accel.m_access = maccess; + minfo->accel.m_pitch = mpitch; } EXPORT_SYMBOL(matrox_cfbX_init); +static void matrox_accel_restore_maccess(struct matrox_fb_info *minfo) +{ + mga_outl(M_MACCESS, minfo->accel.m_access); + mga_outl(M_PITCH, minfo->accel.m_pitch); +} + static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy, int sx, int dy, int dx, int height, int width) { @@ -207,7 +215,8 @@ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy, CRITBEGIN if ((dy < sy) || ((dy == sy) && (dx <= sx))) { - mga_fifo(2); + mga_fifo(4); + matrox_accel_restore_maccess(minfo); mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO | M_DWG_BFCOL | M_DWG_REPLACE); mga_outl(M_AR5, vxres); @@ -215,7 +224,8 @@ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy, start = sy*vxres+sx+curr_ydstorg(minfo); end = start+width; } else { - mga_fifo(3); + mga_fifo(5); + matrox_accel_restore_maccess(minfo); mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE); mga_outl(M_SGN, 5); mga_outl(M_AR5, -vxres); @@ -224,7 +234,8 @@ static void matrox_accel_bmove(struct matrox_fb_info *minfo, int vxres, int sy, start = end+width; dy += height-1; } - mga_fifo(4); + mga_fifo(6); + matrox_accel_restore_maccess(minfo); mga_outl(M_AR0, end); mga_outl(M_AR3, start); mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx); @@ -246,7 +257,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres, CRITBEGIN if ((dy < sy) || ((dy == sy) && (dx <= sx))) { - mga_fifo(2); + mga_fifo(4); + matrox_accel_restore_maccess(minfo); mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_SGNZERO | M_DWG_BFCOL | M_DWG_REPLACE); mga_outl(M_AR5, vxres); @@ -254,7 +266,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres, start = sy*vxres+sx+curr_ydstorg(minfo); end = start+width; } else { - mga_fifo(3); + mga_fifo(5); + matrox_accel_restore_maccess(minfo); mga_outl(M_DWGCTL, M_DWG_BITBLT | M_DWG_SHIFTZERO | M_DWG_BFCOL | M_DWG_REPLACE); mga_outl(M_SGN, 5); mga_outl(M_AR5, -vxres); @@ -263,7 +276,8 @@ static void matrox_accel_bmove_lin(struct matrox_fb_info *minfo, int vxres, start = end+width; dy += height-1; } - mga_fifo(5); + mga_fifo(7); + matrox_accel_restore_maccess(minfo); mga_outl(M_AR0, end); mga_outl(M_AR3, start); mga_outl(M_FXBNDRY, ((dx+width)<<16) | dx); @@ -298,7 +312,8 @@ static void matroxfb_accel_clear(struct matrox_fb_info *minfo, u_int32_t color, CRITBEGIN - mga_fifo(5); + mga_fifo(7); + matrox_accel_restore_maccess(minfo); mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE); mga_outl(M_FCOL, color); mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx); @@ -341,7 +356,8 @@ static void matroxfb_cfb4_clear(struct matrox_fb_info *minfo, u_int32_t bgx, width >>= 1; sx >>= 1; if (width) { - mga_fifo(5); + mga_fifo(7); + matrox_accel_restore_maccess(minfo); mga_outl(M_DWGCTL, minfo->accel.m_dwg_rect | M_DWG_REPLACE2); mga_outl(M_FCOL, bgx); mga_outl(M_FXBNDRY, ((sx + width) << 16) | sx); @@ -415,7 +431,8 @@ static void matroxfb_1bpp_imageblit(struct matrox_fb_info *minfo, u_int32_t fgx, CRITBEGIN - mga_fifo(3); + mga_fifo(5); + matrox_accel_restore_maccess(minfo); if (easy) mga_outl(M_DWGCTL, M_DWG_ILOAD | M_DWG_SGNZERO | M_DWG_SHIFTZERO | M_DWG_BMONOWF | M_DWG_LINEAR | M_DWG_REPLACE); else @@ -425,7 +442,8 @@ static void matroxfb_1bpp_imageblit(struct matrox_fb_info *minfo, u_int32_t fgx, fxbndry = ((xx + width - 1) << 16) | xx; mmio = minfo->mmio.vbase; - mga_fifo(6); + mga_fifo(8); + matrox_accel_restore_maccess(minfo); mga_writel(mmio, M_FXBNDRY, fxbndry); mga_writel(mmio, M_AR0, ar0); mga_writel(mmio, M_AR3, 0); diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h index 11ed57bb704e..556d96ce40bf 100644 --- a/drivers/video/matrox/matroxfb_base.h +++ b/drivers/video/matrox/matroxfb_base.h @@ -307,6 +307,8 @@ struct matrox_accel_data { #endif u_int32_t m_dwg_rect; u_int32_t m_opmode; + u_int32_t m_access; + u_int32_t m_pitch; }; struct v4l2_queryctrl; diff --git a/drivers/video/tgafb.c b/drivers/video/tgafb.c index c9c8e5a1fdee..ba77f753649c 100644 --- a/drivers/video/tgafb.c +++ b/drivers/video/tgafb.c @@ -1142,222 +1142,57 @@ copyarea_line_32bpp(struct fb_info *info, u32 dy, u32 sy, __raw_writel(TGA_MODE_SBM_24BPP|TGA_MODE_SIMPLE, tga_regs+TGA_MODE_REG); } -/* The general case of forward copy in 8bpp mode. */ +/* The (almost) general case of backward copy in 8bpp mode. */ static inline void -copyarea_foreward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy, - u32 height, u32 width, u32 line_length) +copyarea_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy, + u32 height, u32 width, u32 line_length, + const struct fb_copyarea *area) { struct tga_par *par = (struct tga_par *) info->par; - unsigned long i, copied, left; - unsigned long dpos, spos, dalign, salign, yincr; - u32 smask_first, dmask_first, dmask_last; - int pixel_shift, need_prime, need_second; - unsigned long n64, n32, xincr_first; + unsigned i, yincr; + int depos, sepos, backward, last_step, step; + u32 mask_last; + unsigned n32; void __iomem *tga_regs; void __iomem *tga_fb; - yincr = line_length; - if (dy > sy) { - dy += height - 1; - sy += height - 1; - yincr = -yincr; - } - - /* Compute the offsets and alignments in the frame buffer. - More than anything else, these control how we do copies. */ - dpos = dy * line_length + dx; - spos = sy * line_length + sx; - dalign = dpos & 7; - salign = spos & 7; - dpos &= -8; - spos &= -8; - - /* Compute the value for the PIXELSHIFT register. This controls - both non-co-aligned source and destination and copy direction. */ - if (dalign >= salign) - pixel_shift = dalign - salign; - else - pixel_shift = 8 - (salign - dalign); - - /* Figure out if we need an additional priming step for the - residue register. */ - need_prime = (salign > dalign); - if (need_prime) - dpos -= 8; - - /* Begin by copying the leading unaligned destination. Copy enough - to make the next destination address 32-byte aligned. */ - copied = 32 - (dalign + (dpos & 31)); - if (copied == 32) - copied = 0; - xincr_first = (copied + 7) & -8; - smask_first = dmask_first = (1ul << copied) - 1; - smask_first <<= salign; - dmask_first <<= dalign + need_prime*8; - if (need_prime && copied > 24) - copied -= 8; - left = width - copied; - - /* Care for small copies. */ - if (copied > width) { - u32 t; - t = (1ul << width) - 1; - t <<= dalign + need_prime*8; - dmask_first &= t; - left = 0; - } - - /* Attempt to use 64-byte copies. This is only possible if the - source and destination are co-aligned at 64 bytes. */ - n64 = need_second = 0; - if ((dpos & 63) == (spos & 63) - && (height == 1 || line_length % 64 == 0)) { - /* We may need a 32-byte copy to ensure 64 byte alignment. */ - need_second = (dpos + xincr_first) & 63; - if ((need_second & 32) != need_second) - printk(KERN_ERR "tgafb: need_second wrong\n"); - if (left >= need_second + 64) { - left -= need_second; - n64 = left / 64; - left %= 64; - } else - need_second = 0; - } - - /* Copy trailing full 32-byte sections. This will be the main - loop if the 64 byte loop can't be used. */ - n32 = left / 32; - left %= 32; - - /* Copy the trailing unaligned destination. */ - dmask_last = (1ul << left) - 1; - - tga_regs = par->tga_regs_base; - tga_fb = par->tga_fb_base; - - /* Set up the MODE and PIXELSHIFT registers. */ - __raw_writel(TGA_MODE_SBM_8BPP|TGA_MODE_COPY, tga_regs+TGA_MODE_REG); - __raw_writel(pixel_shift, tga_regs+TGA_PIXELSHIFT_REG); - wmb(); - - for (i = 0; i < height; ++i) { - unsigned long j; - void __iomem *sfb; - void __iomem *dfb; - - sfb = tga_fb + spos; - dfb = tga_fb + dpos; - if (dmask_first) { - __raw_writel(smask_first, sfb); - wmb(); - __raw_writel(dmask_first, dfb); - wmb(); - sfb += xincr_first; - dfb += xincr_first; - } - - if (need_second) { - __raw_writel(0xffffffff, sfb); - wmb(); - __raw_writel(0xffffffff, dfb); - wmb(); - sfb += 32; - dfb += 32; - } - - if (n64 && (((unsigned long)sfb | (unsigned long)dfb) & 63)) - printk(KERN_ERR - "tgafb: misaligned copy64 (s:%p, d:%p)\n", - sfb, dfb); - - for (j = 0; j < n64; ++j) { - __raw_writel(sfb - tga_fb, tga_regs+TGA_COPY64_SRC); - wmb(); - __raw_writel(dfb - tga_fb, tga_regs+TGA_COPY64_DST); - wmb(); - sfb += 64; - dfb += 64; - } - - for (j = 0; j < n32; ++j) { - __raw_writel(0xffffffff, sfb); - wmb(); - __raw_writel(0xffffffff, dfb); - wmb(); - sfb += 32; - dfb += 32; - } - - if (dmask_last) { - __raw_writel(0xffffffff, sfb); - wmb(); - __raw_writel(dmask_last, dfb); - wmb(); - } - - spos += yincr; - dpos += yincr; + /* Do acceleration only if we are aligned on 8 pixels */ + if ((dx | sx | width) & 7) { + cfb_copyarea(info, area); + return; } - /* Reset the MODE register to normal. */ - __raw_writel(TGA_MODE_SBM_8BPP|TGA_MODE_SIMPLE, tga_regs+TGA_MODE_REG); -} - -/* The (almost) general case of backward copy in 8bpp mode. */ -static inline void -copyarea_backward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy, - u32 height, u32 width, u32 line_length, - const struct fb_copyarea *area) -{ - struct tga_par *par = (struct tga_par *) info->par; - unsigned long i, left, yincr; - unsigned long depos, sepos, dealign, sealign; - u32 mask_first, mask_last; - unsigned long n32; - void __iomem *tga_regs; - void __iomem *tga_fb; - yincr = line_length; if (dy > sy) { dy += height - 1; sy += height - 1; yincr = -yincr; } + backward = dy == sy && dx > sx && dx < sx + width; /* Compute the offsets and alignments in the frame buffer. More than anything else, these control how we do copies. */ - depos = dy * line_length + dx + width; - sepos = sy * line_length + sx + width; - dealign = depos & 7; - sealign = sepos & 7; - - /* ??? The documentation appears to be incorrect (or very - misleading) wrt how pixel shifting works in backward copy - mode, i.e. when PIXELSHIFT is negative. I give up for now. - Do handle the common case of co-aligned backward copies, - but frob everything else back on generic code. */ - if (dealign != sealign) { - cfb_copyarea(info, area); - return; - } - - /* We begin the copy with the trailing pixels of the - unaligned destination. */ - mask_first = (1ul << dealign) - 1; - left = width - dealign; - - /* Care for small copies. */ - if (dealign > width) { - mask_first ^= (1ul << (dealign - width)) - 1; - left = 0; - } + depos = dy * line_length + dx; + sepos = sy * line_length + sx; + if (backward) + depos += width, sepos += width; /* Next copy full words at a time. */ - n32 = left / 32; - left %= 32; + n32 = width / 32; + last_step = width % 32; /* Finally copy the unaligned head of the span. */ - mask_last = -1 << (32 - left); + mask_last = (1ul << last_step) - 1; + + if (!backward) { + step = 32; + last_step = 32; + } else { + step = -32; + last_step = -last_step; + sepos -= 32; + depos -= 32; + } tga_regs = par->tga_regs_base; tga_fb = par->tga_fb_base; @@ -1374,25 +1209,33 @@ copyarea_backward_8bpp(struct fb_info *info, u32 dx, u32 dy, u32 sx, u32 sy, sfb = tga_fb + sepos; dfb = tga_fb + depos; - if (mask_first) { - __raw_writel(mask_first, sfb); - wmb(); - __raw_writel(mask_first, dfb); - wmb(); - } - for (j = 0; j < n32; ++j) { - sfb -= 32; - dfb -= 32; + for (j = 0; j < n32; j++) { + if (j < 2 && j + 1 < n32 && !backward && + !(((unsigned long)sfb | (unsigned long)dfb) & 63)) { + do { + __raw_writel(sfb - tga_fb, tga_regs+TGA_COPY64_SRC); + wmb(); + __raw_writel(dfb - tga_fb, tga_regs+TGA_COPY64_DST); + wmb(); + sfb += 64; + dfb += 64; + j += 2; + } while (j + 1 < n32); + j--; + continue; + } __raw_writel(0xffffffff, sfb); wmb(); __raw_writel(0xffffffff, dfb); wmb(); + sfb += step; + dfb += step; } if (mask_last) { - sfb -= 32; - dfb -= 32; + sfb += last_step - step; + dfb += last_step - step; __raw_writel(mask_last, sfb); wmb(); __raw_writel(mask_last, dfb); @@ -1453,14 +1296,9 @@ tgafb_copyarea(struct fb_info *info, const struct fb_copyarea *area) else if (bpp == 32) cfb_copyarea(info, area); - /* Detect overlapping source and destination that requires - a backward copy. */ - else if (dy == sy && dx > sx && dx < sx + width) - copyarea_backward_8bpp(info, dx, dy, sx, sy, height, - width, line_length, area); else - copyarea_foreward_8bpp(info, dx, dy, sx, sy, height, - width, line_length); + copyarea_8bpp(info, dx, dy, sx, sy, height, + width, line_length, area); } |