diff options
Diffstat (limited to 'drivers/media/video/mxc/opl/rotate270_u16.c')
-rw-r--r-- | drivers/media/video/mxc/opl/rotate270_u16.c | 285 |
1 files changed, 285 insertions, 0 deletions
diff --git a/drivers/media/video/mxc/opl/rotate270_u16.c b/drivers/media/video/mxc/opl/rotate270_u16.c new file mode 100644 index 000000000000..8b995212aeff --- /dev/null +++ b/drivers/media/video/mxc/opl/rotate270_u16.c @@ -0,0 +1,285 @@ +/* + * Copyright 2004-2010 Freescale Semiconductor, Inc. All Rights Reserved. + */ + +/* + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include <linux/module.h> +#include "opl.h" + +static int opl_rotate270_u16_by16(const u8 *src, int src_line_stride, + int width, int height, u8 *dst, + int dst_line_stride, int vmirror); +static int opl_rotate270_u16_by4(const u8 *src, int src_line_stride, int width, + int height, u8 *dst, int dst_line_stride, + int vmirror); +static int opl_rotate270_vmirror_u16_both(const u8 *src, int src_line_stride, + int width, int height, u8 *dst, + int dst_line_stride, int vmirror); +int opl_rotate270_u16_qcif(const u8 *src, u8 *dst); + +int opl_rotate270_u16(const u8 *src, int src_line_stride, int width, + int height, u8 *dst, int dst_line_stride) +{ + return opl_rotate270_vmirror_u16_both(src, src_line_stride, width, + height, dst, dst_line_stride, 0); +} + +int opl_rotate270_vmirror_u16(const u8 *src, int src_line_stride, int width, + int height, u8 *dst, int dst_line_stride) +{ + return opl_rotate270_vmirror_u16_both(src, src_line_stride, width, + height, dst, dst_line_stride, 1); +} + +static int opl_rotate270_vmirror_u16_both(const u8 *src, int src_line_stride, + int width, int height, u8 *dst, + int dst_line_stride, int vmirror) +{ + const int BLOCK_SIZE_PIXELS = CACHE_LINE_WORDS * BYTES_PER_WORD + / BYTES_PER_PIXEL; + const int BLOCK_SIZE_PIXELS_BY4 = CACHE_LINE_WORDS * BYTES_PER_WORD + / BYTES_PER_PIXEL / 4; + + if (!src || !dst) + return OPLERR_NULL_PTR; + + if (width == 0 || height == 0 || src_line_stride == 0 + || dst_line_stride == 0) + return OPLERR_BAD_ARG; + + /* The QCIF algorithm doesn't support vertical mirroring */ + if (vmirror == 0 && width == QCIF_Y_WIDTH && height == QCIF_Y_HEIGHT + && src_line_stride == QCIF_Y_WIDTH * 2 + && src_line_stride == QCIF_Y_HEIGHT * 2) + return opl_rotate270_u16_qcif(src, dst); + else if (width % BLOCK_SIZE_PIXELS == 0 + && height % BLOCK_SIZE_PIXELS == 0) + return opl_rotate270_u16_by16(src, src_line_stride, width, + height, dst, dst_line_stride, + vmirror); + else if (width % BLOCK_SIZE_PIXELS_BY4 == 0 + && height % BLOCK_SIZE_PIXELS_BY4 == 0) + return opl_rotate270_u16_by4(src, src_line_stride, width, + height, dst, dst_line_stride, + vmirror); + else + return OPLERR_BAD_ARG; +} + +/* + * Rotate Counter Clockwise, divide RGB component into 16 row strips, read + * non sequentially and write sequentially. This is done in 16 line strips + * so that the cache is used better. Cachelines are 8 words = 32 bytes. Pixels + * are 2 bytes. The 16 reads will be cache misses, but the next 240 should + * be from cache. The writes to the output buffer will be sequential for 16 + * writes. + * + * Example: + * Input data matrix: output matrix + * + * 0 | 1 | 2 | 3 | 4 | 4 | 0 | 0 | 3 | + * 4 | 3 | 2 | 1 | 0 | 3 | 1 | 9 | 6 | + * 6 | 7 | 8 | 9 | 0 | 2 | 2 | 8 | 2 | + * 5 | 3 | 2 | 6 | 3 | 1 | 3 | 7 | 3 | + * ^ 0 | 4 | 6 | 5 | < Write the input data sequentially + * Read first column + * Start at the bottom + * Move to next column and repeat + * + * Loop over k decreasing (blocks) + * in_block_ptr = src + (((RGB_HEIGHT_PIXELS / BLOCK_SIZE_PIXELS) - k) + * * BLOCK_SIZE_PIXELS) * (RGB_WIDTH_BYTES) + * out_block_ptr = dst + (((RGB_HEIGHT_PIXELS / BLOCK_SIZE_PIXELS) - k) + * * BLOCK_SIZE_BYTES) + (RGB_WIDTH_PIXELS - 1) + * * RGB_HEIGHT_PIXELS * BYTES_PER_PIXEL + * + * Loop over i decreasing (width) + * Each pix: + * in_block_ptr += RGB_WIDTH_BYTES + * out_block_ptr += 4 + * + * Each row of block: + * in_block_ptr -= RGB_WIDTH_BYTES * BLOCK_SIZE_PIXELS - 2 + * out_block_ptr -= RGB_HEIGHT_PIXELS * BYTES_PER_PIXEL + 2 * BLOCK_SIZE_PIXELS; + * + * It may perform vertical mirroring too depending on the vmirror flag. + */ +static int opl_rotate270_u16_by16(const u8 *src, int src_line_stride, + int width, int height, u8 *dst, + int dst_line_stride, int vmirror) +{ + const int BLOCK_SIZE_PIXELS = CACHE_LINE_WORDS * BYTES_PER_WORD + / BYTES_PER_PIXEL; + const int IN_INDEX = src_line_stride * BLOCK_SIZE_PIXELS + - BYTES_PER_PIXEL; + const int OUT_INDEX = vmirror ? + -dst_line_stride + BYTES_PER_PIXEL * BLOCK_SIZE_PIXELS + : dst_line_stride + BYTES_PER_PIXEL * BLOCK_SIZE_PIXELS; + const u8 *in_block_ptr; + u8 *out_block_ptr; + int i, k; + + for (k = height / BLOCK_SIZE_PIXELS; k > 0; k--) { + in_block_ptr = src + (((height / BLOCK_SIZE_PIXELS) - k) + * BLOCK_SIZE_PIXELS) * src_line_stride; + out_block_ptr = dst + (((height / BLOCK_SIZE_PIXELS) - k) + * BLOCK_SIZE_PIXELS * BYTES_PER_PIXEL) + + (width - 1) * dst_line_stride; + + /* + * For vertical mirroring the writing starts from the + * first line + */ + if (vmirror) + out_block_ptr -= dst_line_stride * (width - 1); + + for (i = width; i > 0; i--) { + __asm__ volatile ( + "ldrh r2, [%0], %4\n\t" + "ldrh r3, [%0], %4\n\t" + "ldrh r4, [%0], %4\n\t" + "ldrh r5, [%0], %4\n\t" + "orr r2, r2, r3, lsl #16\n\t" + "orr r4, r4, r5, lsl #16\n\t" + "str r2, [%1], #4\n\t" + "str r4, [%1], #4\n\t" + + "ldrh r2, [%0], %4\n\t" + "ldrh r3, [%0], %4\n\t" + "ldrh r4, [%0], %4\n\t" + "ldrh r5, [%0], %4\n\t" + "orr r2, r2, r3, lsl #16\n\t" + "orr r4, r4, r5, lsl #16\n\t" + "str r2, [%1], #4\n\t" + "str r4, [%1], #4\n\t" + + "ldrh r2, [%0], %4\n\t" + "ldrh r3, [%0], %4\n\t" + "ldrh r4, [%0], %4\n\t" + "ldrh r5, [%0], %4\n\t" + "orr r2, r2, r3, lsl #16\n\t" + "orr r4, r4, r5, lsl #16\n\t" + "str r2, [%1], #4\n\t" + "str r4, [%1], #4\n\t" + + "ldrh r2, [%0], %4\n\t" + "ldrh r3, [%0], %4\n\t" + "ldrh r4, [%0], %4\n\t" + "ldrh r5, [%0], %4\n\t" + "orr r2, r2, r3, lsl #16\n\t" + "orr r4, r4, r5, lsl #16\n\t" + "str r2, [%1], #4\n\t" + "str r4, [%1], #4\n\t" + + : "+r" (in_block_ptr), "+r"(out_block_ptr) /* output */ + : "0"(in_block_ptr), "1"(out_block_ptr), "r"(src_line_stride) /* input */ + : "r2", "r3", "r4", "r5", "memory" /* modify */ + ); + in_block_ptr -= IN_INDEX; + out_block_ptr -= OUT_INDEX; + } + } + + return OPLERR_SUCCESS; +} + +/* + * Rotate Counter Clockwise, divide RGB component into 4 row strips, read + * non sequentially and write sequentially. This is done in 4 line strips + * so that the cache is used better. Cachelines are 8 words = 32 bytes. Pixels + * are 2 bytes. The 4 reads will be cache misses, but the next 60 should + * be from cache. The writes to the output buffer will be sequential for 4 + * writes. + * + * Example: + * Input data matrix: output matrix + * + * 0 | 1 | 2 | 3 | 4 | 4 | 0 | 0 | 3 | + * 4 | 3 | 2 | 1 | 0 | 3 | 1 | 9 | 6 | + * 6 | 7 | 8 | 9 | 0 | 2 | 2 | 8 | 2 | + * 5 | 3 | 2 | 6 | 3 | 1 | 3 | 7 | 3 | + * ^ 0 | 4 | 6 | 5 | < Write the input data sequentially + * Read first column + * Start at the bottom + * Move to next column and repeat + * + * Loop over k decreasing (blocks) + * in_block_ptr = src + (((RGB_HEIGHT_PIXELS / BLOCK_SIZE_PIXELS) - k) + * * BLOCK_SIZE_PIXELS) * (RGB_WIDTH_BYTES) + * out_block_ptr = dst + (((RGB_HEIGHT_PIXELS / BLOCK_SIZE_PIXELS) - k) + * * BLOCK_SIZE_BYTES) + (RGB_WIDTH_PIXELS - 1) + * * RGB_HEIGHT_PIXELS * BYTES_PER_PIXEL + * + * Loop over i decreasing (width) + * Each pix: + * in_block_ptr += RGB_WIDTH_BYTES + * out_block_ptr += 4 + * + * Each row of block: + * in_block_ptr -= RGB_WIDTH_BYTES * BLOCK_SIZE_PIXELS - 2 + * out_block_ptr -= RGB_HEIGHT_PIXELS * BYTES_PER_PIXEL + 2 * BLOCK_SIZE_PIXELS; + * + * It may perform vertical mirroring too depending on the vmirror flag. + */ +static int opl_rotate270_u16_by4(const u8 *src, int src_line_stride, int width, + int height, u8 *dst, int dst_line_stride, + int vmirror) +{ + const int BLOCK_SIZE_PIXELS = CACHE_LINE_WORDS * BYTES_PER_WORD + / BYTES_PER_PIXEL / 4; + const int IN_INDEX = src_line_stride * BLOCK_SIZE_PIXELS + - BYTES_PER_PIXEL; + const int OUT_INDEX = vmirror ? + -dst_line_stride + BYTES_PER_PIXEL * BLOCK_SIZE_PIXELS + : dst_line_stride + BYTES_PER_PIXEL * BLOCK_SIZE_PIXELS; + const u8 *in_block_ptr; + u8 *out_block_ptr; + int i, k; + + for (k = height / BLOCK_SIZE_PIXELS; k > 0; k--) { + in_block_ptr = src + (((height / BLOCK_SIZE_PIXELS) - k) + * BLOCK_SIZE_PIXELS) * src_line_stride; + out_block_ptr = dst + (((height / BLOCK_SIZE_PIXELS) - k) + * BLOCK_SIZE_PIXELS * BYTES_PER_PIXEL) + + (width - 1) * dst_line_stride; + + /* + * For vertical mirroring the writing starts from the + * first line + */ + if (vmirror) + out_block_ptr -= dst_line_stride * (width - 1); + + for (i = width; i > 0; i--) { + __asm__ volatile ( + "ldrh r2, [%0], %4\n\t" + "ldrh r3, [%0], %4\n\t" + "ldrh r4, [%0], %4\n\t" + "ldrh r5, [%0], %4\n\t" + "orr r2, r2, r3, lsl #16\n\t" + "orr r4, r4, r5, lsl #16\n\t" + "str r2, [%1], #4\n\t" + "str r4, [%1], #4\n\t" + + : "+r" (in_block_ptr), "+r"(out_block_ptr) /* output */ + : "0"(in_block_ptr), "1"(out_block_ptr), "r"(src_line_stride) /* input */ + : "r2", "r3", "r4", "r5", "memory" /* modify */ + ); + in_block_ptr -= IN_INDEX; + out_block_ptr -= OUT_INDEX; + } + } + + return OPLERR_SUCCESS; +} + +EXPORT_SYMBOL(opl_rotate270_u16); +EXPORT_SYMBOL(opl_rotate270_vmirror_u16); |