From 9c2019421511a1bc646981d55528334ae46464c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Oct 2007 11:16:21 +0200 Subject: i386: move crypto Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/crypto/Makefile | 5 + arch/x86/crypto/Makefile_32 | 12 + arch/x86/crypto/aes-i586-asm_32.S | 373 ++++++++++++++++++++++++ arch/x86/crypto/aes_32.c | 515 ++++++++++++++++++++++++++++++++++ arch/x86/crypto/twofish-i586-asm_32.S | 335 ++++++++++++++++++++++ arch/x86/crypto/twofish_32.c | 97 +++++++ 6 files changed, 1337 insertions(+) create mode 100644 arch/x86/crypto/Makefile create mode 100644 arch/x86/crypto/Makefile_32 create mode 100644 arch/x86/crypto/aes-i586-asm_32.S create mode 100644 arch/x86/crypto/aes_32.c create mode 100644 arch/x86/crypto/twofish-i586-asm_32.S create mode 100644 arch/x86/crypto/twofish_32.c (limited to 'arch/x86') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile new file mode 100644 index 000000000000..b1bcf7c63028 --- /dev/null +++ b/arch/x86/crypto/Makefile @@ -0,0 +1,5 @@ +ifeq ($(CONFIG_X86_32),y) +include ${srctree}/arch/x86/crypto/Makefile_32 +else +include ${srctree}/arch/x86_64/crypto/Makefile_64 +endif diff --git a/arch/x86/crypto/Makefile_32 b/arch/x86/crypto/Makefile_32 new file mode 100644 index 000000000000..2d873a2388ed --- /dev/null +++ b/arch/x86/crypto/Makefile_32 @@ -0,0 +1,12 @@ +# +# x86/crypto/Makefile +# +# Arch-specific CryptoAPI modules. +# + +obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o +obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o + +aes-i586-y := aes-i586-asm_32.o aes_32.o +twofish-i586-y := twofish-i586-asm_32.o twofish_32.o + diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S new file mode 100644 index 000000000000..f942f0c8f630 --- /dev/null +++ b/arch/x86/crypto/aes-i586-asm_32.S @@ -0,0 +1,373 @@ +// ------------------------------------------------------------------------- +// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. +// All rights reserved. +// +// LICENSE TERMS +// +// The free distribution and use of this software in both source and binary +// form is allowed (with or without changes) provided that: +// +// 1. distributions of this source code include the above copyright +// notice, this list of conditions and the following disclaimer// +// +// 2. distributions in binary form include the above copyright +// notice, this list of conditions and the following disclaimer +// in the documentation and/or other associated materials// +// +// 3. the copyright holder's name is not used to endorse products +// built using this software without specific written permission. +// +// +// ALTERNATIVELY, provided that this notice is retained in full, this product +// may be distributed under the terms of the GNU General Public License (GPL), +// in which case the provisions of the GPL apply INSTEAD OF those given above. +// +// Copyright (c) 2004 Linus Torvalds +// Copyright (c) 2004 Red Hat, Inc., James Morris + +// DISCLAIMER +// +// This software is provided 'as is' with no explicit or implied warranties +// in respect of its properties including, but not limited to, correctness +// and fitness for purpose. +// ------------------------------------------------------------------------- +// Issue Date: 29/07/2002 + +.file "aes-i586-asm.S" +.text + +#include + +#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) + +/* offsets to parameters with one register pushed onto stack */ +#define tfm 8 +#define out_blk 12 +#define in_blk 16 + +/* offsets in crypto_tfm structure */ +#define ekey (crypto_tfm_ctx_offset + 0) +#define nrnd (crypto_tfm_ctx_offset + 256) +#define dkey (crypto_tfm_ctx_offset + 260) + +// register mapping for encrypt and decrypt subroutines + +#define r0 eax +#define r1 ebx +#define r2 ecx +#define r3 edx +#define r4 esi +#define r5 edi + +#define eaxl al +#define eaxh ah +#define ebxl bl +#define ebxh bh +#define ecxl cl +#define ecxh ch +#define edxl dl +#define edxh dh + +#define _h(reg) reg##h +#define h(reg) _h(reg) + +#define _l(reg) reg##l +#define l(reg) _l(reg) + +// This macro takes a 32-bit word representing a column and uses +// each of its four bytes to index into four tables of 256 32-bit +// words to obtain values that are then xored into the appropriate +// output registers r0, r1, r4 or r5. + +// Parameters: +// table table base address +// %1 out_state[0] +// %2 out_state[1] +// %3 out_state[2] +// %4 out_state[3] +// idx input register for the round (destroyed) +// tmp scratch register for the round +// sched key schedule + +#define do_col(table, a1,a2,a3,a4, idx, tmp) \ + movzx %l(idx),%tmp; \ + xor table(,%tmp,4),%a1; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+2*tlen(,%tmp,4),%a3; \ + xor table+3*tlen(,%idx,4),%a4; + +// initialise output registers from the key schedule +// NB1: original value of a3 is in idx on exit +// NB2: original values of a1,a2,a4 aren't used +#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ + mov 0 sched,%a1; \ + movzx %l(idx),%tmp; \ + mov 12 sched,%a2; \ + xor table(,%tmp,4),%a1; \ + mov 4 sched,%a4; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+3*tlen(,%idx,4),%a4; \ + mov %a3,%idx; \ + mov 8 sched,%a3; \ + xor table+2*tlen(,%tmp,4),%a3; + +// initialise output registers from the key schedule +// NB1: original value of a3 is in idx on exit +// NB2: original values of a1,a2,a4 aren't used +#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ + mov 0 sched,%a1; \ + movzx %l(idx),%tmp; \ + mov 4 sched,%a2; \ + xor table(,%tmp,4),%a1; \ + mov 12 sched,%a4; \ + movzx %h(idx),%tmp; \ + shr $16,%idx; \ + xor table+tlen(,%tmp,4),%a2; \ + movzx %l(idx),%tmp; \ + movzx %h(idx),%idx; \ + xor table+3*tlen(,%idx,4),%a4; \ + mov %a3,%idx; \ + mov 8 sched,%a3; \ + xor table+2*tlen(,%tmp,4),%a3; + + +// original Gladman had conditional saves to MMX regs. +#define save(a1, a2) \ + mov %a2,4*a1(%esp) + +#define restore(a1, a2) \ + mov 4*a2(%esp),%a1 + +// These macros perform a forward encryption cycle. They are entered with +// the first previous round column values in r0,r1,r4,r5 and +// exit with the final values in the same registers, using stack +// for temporary storage. + +// round column values +// on entry: r0,r1,r4,r5 +// on exit: r2,r1,r4,r5 +#define fwd_rnd1(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ + do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ + restore(r0,0); \ + do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ + restore(r0,1); \ + do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ + +// round column values +// on entry: r2,r1,r4,r5 +// on exit: r0,r1,r4,r5 +#define fwd_rnd2(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ + do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ + restore(r2,0); \ + do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ + restore(r2,1); \ + do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ + +// These macros performs an inverse encryption cycle. They are entered with +// the first previous round column values in r0,r1,r4,r5 and +// exit with the final values in the same registers, using stack +// for temporary storage + +// round column values +// on entry: r0,r1,r4,r5 +// on exit: r2,r1,r4,r5 +#define inv_rnd1(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ + do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ + restore(r0,0); \ + do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ + restore(r0,1); \ + do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ + +// round column values +// on entry: r2,r1,r4,r5 +// on exit: r0,r1,r4,r5 +#define inv_rnd2(arg, table) \ + save (0,r1); \ + save (1,r5); \ + \ + /* compute new column values */ \ + do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ + do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ + restore(r2,0); \ + do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ + restore(r2,1); \ + do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ + +// AES (Rijndael) Encryption Subroutine +/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ + +.global aes_enc_blk + +.extern ft_tab +.extern fl_tab + +.align 4 + +aes_enc_blk: + push %ebp + mov tfm(%esp),%ebp + +// CAUTION: the order and the values used in these assigns +// rely on the register mappings + +1: push %ebx + mov in_blk+4(%esp),%r2 + push %esi + mov nrnd(%ebp),%r3 // number of rounds + push %edi +#if ekey != 0 + lea ekey(%ebp),%ebp // key pointer +#endif + +// input four columns and xor in first round key + + mov (%r2),%r0 + mov 4(%r2),%r1 + mov 8(%r2),%r4 + mov 12(%r2),%r5 + xor (%ebp),%r0 + xor 4(%ebp),%r1 + xor 8(%ebp),%r4 + xor 12(%ebp),%r5 + + sub $8,%esp // space for register saves on stack + add $16,%ebp // increment to next round key + cmp $12,%r3 + jb 4f // 10 rounds for 128-bit key + lea 32(%ebp),%ebp + je 3f // 12 rounds for 192-bit key + lea 32(%ebp),%ebp + +2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key + fwd_rnd2( -48(%ebp) ,ft_tab) +3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key + fwd_rnd2( -16(%ebp) ,ft_tab) +4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key + fwd_rnd2( +16(%ebp) ,ft_tab) + fwd_rnd1( +32(%ebp) ,ft_tab) + fwd_rnd2( +48(%ebp) ,ft_tab) + fwd_rnd1( +64(%ebp) ,ft_tab) + fwd_rnd2( +80(%ebp) ,ft_tab) + fwd_rnd1( +96(%ebp) ,ft_tab) + fwd_rnd2(+112(%ebp) ,ft_tab) + fwd_rnd1(+128(%ebp) ,ft_tab) + fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table + +// move final values to the output array. CAUTION: the +// order of these assigns rely on the register mappings + + add $8,%esp + mov out_blk+12(%esp),%ebp + mov %r5,12(%ebp) + pop %edi + mov %r4,8(%ebp) + pop %esi + mov %r1,4(%ebp) + pop %ebx + mov %r0,(%ebp) + pop %ebp + mov $1,%eax + ret + +// AES (Rijndael) Decryption Subroutine +/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ + +.global aes_dec_blk + +.extern it_tab +.extern il_tab + +.align 4 + +aes_dec_blk: + push %ebp + mov tfm(%esp),%ebp + +// CAUTION: the order and the values used in these assigns +// rely on the register mappings + +1: push %ebx + mov in_blk+4(%esp),%r2 + push %esi + mov nrnd(%ebp),%r3 // number of rounds + push %edi +#if dkey != 0 + lea dkey(%ebp),%ebp // key pointer +#endif + mov %r3,%r0 + shl $4,%r0 + add %r0,%ebp + +// input four columns and xor in first round key + + mov (%r2),%r0 + mov 4(%r2),%r1 + mov 8(%r2),%r4 + mov 12(%r2),%r5 + xor (%ebp),%r0 + xor 4(%ebp),%r1 + xor 8(%ebp),%r4 + xor 12(%ebp),%r5 + + sub $8,%esp // space for register saves on stack + sub $16,%ebp // increment to next round key + cmp $12,%r3 + jb 4f // 10 rounds for 128-bit key + lea -32(%ebp),%ebp + je 3f // 12 rounds for 192-bit key + lea -32(%ebp),%ebp + +2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key + inv_rnd2( +48(%ebp), it_tab) +3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key + inv_rnd2( +16(%ebp), it_tab) +4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key + inv_rnd2( -16(%ebp), it_tab) + inv_rnd1( -32(%ebp), it_tab) + inv_rnd2( -48(%ebp), it_tab) + inv_rnd1( -64(%ebp), it_tab) + inv_rnd2( -80(%ebp), it_tab) + inv_rnd1( -96(%ebp), it_tab) + inv_rnd2(-112(%ebp), it_tab) + inv_rnd1(-128(%ebp), it_tab) + inv_rnd2(-144(%ebp), il_tab) // last round uses a different table + +// move final values to the output array. CAUTION: the +// order of these assigns rely on the register mappings + + add $8,%esp + mov out_blk+12(%esp),%ebp + mov %r5,12(%ebp) + pop %edi + mov %r4,8(%ebp) + pop %esi + mov %r1,4(%ebp) + pop %ebx + mov %r0,(%ebp) + pop %ebp + mov $1,%eax + ret + diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c new file mode 100644 index 000000000000..49aad9397f10 --- /dev/null +++ b/arch/x86/crypto/aes_32.c @@ -0,0 +1,515 @@ +/* + * + * Glue Code for optimized 586 assembler version of AES + * + * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK. + * All rights reserved. + * + * LICENSE TERMS + * + * The free distribution and use of this software in both source and binary + * form is allowed (with or without changes) provided that: + * + * 1. distributions of this source code include the above copyright + * notice, this list of conditions and the following disclaimer; + * + * 2. distributions in binary form include the above copyright + * notice, this list of conditions and the following disclaimer + * in the documentation and/or other associated materials; + * + * 3. the copyright holder's name is not used to endorse products + * built using this software without specific written permission. + * + * ALTERNATIVELY, provided that this notice is retained in full, this product + * may be distributed under the terms of the GNU General Public License (GPL), + * in which case the provisions of the GPL apply INSTEAD OF those given above. + * + * DISCLAIMER + * + * This software is provided 'as is' with no explicit or implied warranties + * in respect of its properties, including, but not limited to, correctness + * and/or fitness for purpose. + * + * Copyright (c) 2003, Adam J. Richter (conversion to + * 2.5 API). + * Copyright (c) 2003, 2004 Fruhwirth Clemens + * Copyright (c) 2004 Red Hat, Inc., James Morris + * + */ + +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + +#define AES_MIN_KEY_SIZE 16 +#define AES_MAX_KEY_SIZE 32 +#define AES_BLOCK_SIZE 16 +#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE +#define RC_LENGTH 29 + +struct aes_ctx { + u32 ekey[AES_KS_LENGTH]; + u32 rounds; + u32 dkey[AES_KS_LENGTH]; +}; + +#define WPOLY 0x011b +#define bytes2word(b0, b1, b2, b3) \ + (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0)) + +/* define the finite field multiplies required for Rijndael */ +#define f2(x) ((x) ? pow[log[x] + 0x19] : 0) +#define f3(x) ((x) ? pow[log[x] + 0x01] : 0) +#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0) +#define fb(x) ((x) ? pow[log[x] + 0x68] : 0) +#define fd(x) ((x) ? pow[log[x] + 0xee] : 0) +#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0) +#define fi(x) ((x) ? pow[255 - log[x]]: 0) + +static inline u32 upr(u32 x, int n) +{ + return (x << 8 * n) | (x >> (32 - 8 * n)); +} + +static inline u8 bval(u32 x, int n) +{ + return x >> 8 * n; +} + +/* The forward and inverse affine transformations used in the S-box */ +#define fwd_affine(x) \ + (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8))) + +#define inv_affine(x) \ + (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8))) + +static u32 rcon_tab[RC_LENGTH]; + +u32 ft_tab[4][256]; +u32 fl_tab[4][256]; +static u32 im_tab[4][256]; +u32 il_tab[4][256]; +u32 it_tab[4][256]; + +static void gen_tabs(void) +{ + u32 i, w; + u8 pow[512], log[256]; + + /* + * log and power tables for GF(2^8) finite field with + * WPOLY as modular polynomial - the simplest primitive + * root is 0x03, used here to generate the tables. + */ + i = 0; w = 1; + + do { + pow[i] = (u8)w; + pow[i + 255] = (u8)w; + log[w] = (u8)i++; + w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0); + } while (w != 1); + + for(i = 0, w = 1; i < RC_LENGTH; ++i) { + rcon_tab[i] = bytes2word(w, 0, 0, 0); + w = f2(w); + } + + for(i = 0; i < 256; ++i) { + u8 b; + + b = fwd_affine(fi((u8)i)); + w = bytes2word(f2(b), b, b, f3(b)); + + /* tables for a normal encryption round */ + ft_tab[0][i] = w; + ft_tab[1][i] = upr(w, 1); + ft_tab[2][i] = upr(w, 2); + ft_tab[3][i] = upr(w, 3); + w = bytes2word(b, 0, 0, 0); + + /* + * tables for last encryption round + * (may also be used in the key schedule) + */ + fl_tab[0][i] = w; + fl_tab[1][i] = upr(w, 1); + fl_tab[2][i] = upr(w, 2); + fl_tab[3][i] = upr(w, 3); + + b = fi(inv_affine((u8)i)); + w = bytes2word(fe(b), f9(b), fd(b), fb(b)); + + /* tables for the inverse mix column operation */ + im_tab[0][b] = w; + im_tab[1][b] = upr(w, 1); + im_tab[2][b] = upr(w, 2); + im_tab[3][b] = upr(w, 3); + + /* tables for a normal decryption round */ + it_tab[0][i] = w; + it_tab[1][i] = upr(w,1); + it_tab[2][i] = upr(w,2); + it_tab[3][i] = upr(w,3); + + w = bytes2word(b, 0, 0, 0); + + /* tables for last decryption round */ + il_tab[0][i] = w; + il_tab[1][i] = upr(w,1); + il_tab[2][i] = upr(w,2); + il_tab[3][i] = upr(w,3); + } +} + +#define four_tables(x,tab,vf,rf,c) \ +( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \ + tab[1][bval(vf(x,1,c),rf(1,c))] ^ \ + tab[2][bval(vf(x,2,c),rf(2,c))] ^ \ + tab[3][bval(vf(x,3,c),rf(3,c))] \ +) + +#define vf1(x,r,c) (x) +#define rf1(r,c) (r) +#define rf2(r,c) ((r-c)&3) + +#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0) +#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c) + +#define ff(x) inv_mcol(x) + +#define ke4(k,i) \ +{ \ + k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ + k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; \ + k[4*(i)+7] = ss[3] ^= ss[2]; \ +} + +#define kel4(k,i) \ +{ \ + k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ + k[4*(i)+5] = ss[1] ^= ss[0]; \ + k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \ +} + +#define ke6(k,i) \ +{ \ + k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ + k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; \ + k[6*(i)+ 9] = ss[3] ^= ss[2]; \ + k[6*(i)+10] = ss[4] ^= ss[3]; \ + k[6*(i)+11] = ss[5] ^= ss[4]; \ +} + +#define kel6(k,i) \ +{ \ + k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ + k[6*(i)+ 7] = ss[1] ^= ss[0]; \ + k[6*(i)+ 8] = ss[2] ^= ss[1]; \ + k[6*(i)+ 9] = ss[3] ^= ss[2]; \ +} + +#define ke8(k,i) \ +{ \ + k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ + k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; \ + k[8*(i)+11] = ss[3] ^= ss[2]; \ + k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \ + k[8*(i)+13] = ss[5] ^= ss[4]; \ + k[8*(i)+14] = ss[6] ^= ss[5]; \ + k[8*(i)+15] = ss[7] ^= ss[6]; \ +} + +#define kel8(k,i) \ +{ \ + k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ + k[8*(i)+ 9] = ss[1] ^= ss[0]; \ + k[8*(i)+10] = ss[2] ^= ss[1]; \ + k[8*(i)+11] = ss[3] ^= ss[2]; \ +} + +#define kdf4(k,i) \ +{ \ + ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \ + ss[1] = ss[1] ^ ss[3]; \ + ss[2] = ss[2] ^ ss[3]; \ + ss[3] = ss[3]; \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ + ss[i % 4] ^= ss[4]; \ + ss[4] ^= k[4*(i)]; \ + k[4*(i)+4] = ff(ss[4]); \ + ss[4] ^= k[4*(i)+1]; \ + k[4*(i)+5] = ff(ss[4]); \ + ss[4] ^= k[4*(i)+2]; \ + k[4*(i)+6] = ff(ss[4]); \ + ss[4] ^= k[4*(i)+3]; \ + k[4*(i)+7] = ff(ss[4]); \ +} + +#define kd4(k,i) \ +{ \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ + ss[i % 4] ^= ss[4]; \ + ss[4] = ff(ss[4]); \ + k[4*(i)+4] = ss[4] ^= k[4*(i)]; \ + k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \ + k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \ + k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \ +} + +#define kdl4(k,i) \ +{ \ + ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ + ss[i % 4] ^= ss[4]; \ + k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \ + k[4*(i)+5] = ss[1] ^ ss[3]; \ + k[4*(i)+6] = ss[0]; \ + k[4*(i)+7] = ss[1]; \ +} + +#define kdf6(k,i) \ +{ \ + ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ + k[6*(i)+ 6] = ff(ss[0]); \ + ss[1] ^= ss[0]; \ + k[6*(i)+ 7] = ff(ss[1]); \ + ss[2] ^= ss[1]; \ + k[6*(i)+ 8] = ff(ss[2]); \ + ss[3] ^= ss[2]; \ + k[6*(i)+ 9] = ff(ss[3]); \ + ss[4] ^= ss[3]; \ + k[6*(i)+10] = ff(ss[4]); \ + ss[5] ^= ss[4]; \ + k[6*(i)+11] = ff(ss[5]); \ +} + +#define kd6(k,i) \ +{ \ + ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \ + ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \ + k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \ + ss[1] ^= ss[0]; \ + k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \ + ss[2] ^= ss[1]; \ + k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \ + ss[3] ^= ss[2]; \ + k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \ + ss[4] ^= ss[3]; \ + k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \ + ss[5] ^= ss[4]; \ + k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \ +} + +#define kdl6(k,i) \ +{ \ + ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ + k[6*(i)+ 6] = ss[0]; \ + ss[1] ^= ss[0]; \ + k[6*(i)+ 7] = ss[1]; \ + ss[2] ^= ss[1]; \ + k[6*(i)+ 8] = ss[2]; \ + ss[3] ^= ss[2]; \ + k[6*(i)+ 9] = ss[3]; \ +} + +#define kdf8(k,i) \ +{ \ + ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ + k[8*(i)+ 8] = ff(ss[0]); \ + ss[1] ^= ss[0]; \ + k[8*(i)+ 9] = ff(ss[1]); \ + ss[2] ^= ss[1]; \ + k[8*(i)+10] = ff(ss[2]); \ + ss[3] ^= ss[2]; \ + k[8*(i)+11] = ff(ss[3]); \ + ss[4] ^= ls_box(ss[3],0); \ + k[8*(i)+12] = ff(ss[4]); \ + ss[5] ^= ss[4]; \ + k[8*(i)+13] = ff(ss[5]); \ + ss[6] ^= ss[5]; \ + k[8*(i)+14] = ff(ss[6]); \ + ss[7] ^= ss[6]; \ + k[8*(i)+15] = ff(ss[7]); \ +} + +#define kd8(k,i) \ +{ \ + u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \ + ss[0] ^= __g; \ + __g = ff(__g); \ + k[8*(i)+ 8] = __g ^= k[8*(i)]; \ + ss[1] ^= ss[0]; \ + k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \ + ss[2] ^= ss[1]; \ + k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \ + ss[3] ^= ss[2]; \ + k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \ + __g = ls_box(ss[3],0); \ + ss[4] ^= __g; \ + __g = ff(__g); \ + k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \ + ss[5] ^= ss[4]; \ + k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \ + ss[6] ^= ss[5]; \ + k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \ + ss[7] ^= ss[6]; \ + k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \ +} + +#define kdl8(k,i) \ +{ \ + ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ + k[8*(i)+ 8] = ss[0]; \ + ss[1] ^= ss[0]; \ + k[8*(i)+ 9] = ss[1]; \ + ss[2] ^= ss[1]; \ + k[8*(i)+10] = ss[2]; \ + ss[3] ^= ss[2]; \ + k[8*(i)+11] = ss[3]; \ +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + int i; + u32 ss[8]; + struct aes_ctx *ctx = crypto_tfm_ctx(tfm); + const __le32 *key = (const __le32 *)in_key; + u32 *flags = &tfm->crt_flags; + + /* encryption schedule */ + + ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]); + ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]); + ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]); + ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]); + + switch(key_len) { + case 16: + for (i = 0; i < 9; i++) + ke4(ctx->ekey, i); + kel4(ctx->ekey, 9); + ctx->rounds = 10; + break; + + case 24: + ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); + ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); + for (i = 0; i < 7; i++) + ke6(ctx->ekey, i); + kel6(ctx->ekey, 7); + ctx->rounds = 12; + break; + + case 32: + ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); + ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); + ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]); + ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]); + for (i = 0; i < 6; i++) + ke8(ctx->ekey, i); + kel8(ctx->ekey, 6); + ctx->rounds = 14; + break; + + default: + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + /* decryption schedule */ + + ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]); + ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]); + ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]); + ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]); + + switch (key_len) { + case 16: + kdf4(ctx->dkey, 0); + for (i = 1; i < 9; i++) + kd4(ctx->dkey, i); + kdl4(ctx->dkey, 9); + break; + + case 24: + ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); + ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); + kdf6(ctx->dkey, 0); + for (i = 1; i < 7; i++) + kd6(ctx->dkey, i); + kdl6(ctx->dkey, 7); + break; + + case 32: + ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); + ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); + ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6])); + ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7])); + kdf8(ctx->dkey, 0); + for (i = 1; i < 6; i++) + kd8(ctx->dkey, i); + kdl8(ctx->dkey, 6); + break; + } + return 0; +} + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + aes_enc_blk(tfm, dst, src); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + aes_dec_blk(tfm, dst, src); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-i586", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct aes_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}; + +static int __init aes_init(void) +{ + gen_tabs(); + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter"); +MODULE_ALIAS("aes"); diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S new file mode 100644 index 000000000000..39b98ed2c1b9 --- /dev/null +++ b/arch/x86/crypto/twofish-i586-asm_32.S @@ -0,0 +1,335 @@ +/*************************************************************************** +* Copyright (C) 2006 by Joachim Fritschi, * +* * +* This program is free software; you can redistribute it and/or modify * +* it under the terms of the GNU General Public License as published by * +* the Free Software Foundation; either version 2 of the License, or * +* (at your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, * +* but WITHOUT ANY WARRANTY; without even the implied warranty of * +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * +* GNU General Public License for more details. * +* * +* You should have received a copy of the GNU General Public License * +* along with this program; if not, write to the * +* Free Software Foundation, Inc., * +* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * +***************************************************************************/ + +.file "twofish-i586-asm.S" +.text + +#include + +/* return adress at 0 */ + +#define in_blk 12 /* input byte array address parameter*/ +#define out_blk 8 /* output byte array address parameter*/ +#define tfm 4 /* Twofish context structure */ + +#define a_offset 0 +#define b_offset 4 +#define c_offset 8 +#define d_offset 12 + +/* Structure of the crypto context struct*/ + +#define s0 0 /* S0 Array 256 Words each */ +#define s1 1024 /* S1 Array */ +#define s2 2048 /* S2 Array */ +#define s3 3072 /* S3 Array */ +#define w 4096 /* 8 whitening keys (word) */ +#define k 4128 /* key 1-32 ( word ) */ + +/* define a few register aliases to allow macro substitution */ + +#define R0D %eax +#define R0B %al +#define R0H %ah + +#define R1D %ebx +#define R1B %bl +#define R1H %bh + +#define R2D %ecx +#define R2B %cl +#define R2H %ch + +#define R3D %edx +#define R3B %dl +#define R3H %dh + + +/* performs input whitening */ +#define input_whitening(src,context,offset)\ + xor w+offset(context), src; + +/* performs input whitening */ +#define output_whitening(src,context,offset)\ + xor w+16+offset(context), src; + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define encrypt_round(a,b,c,d,round)\ + push d ## D;\ + movzx b ## B, %edi;\ + mov s1(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + mov s2(%ebp,%edi,4),%esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%ebp,%edi,4),d ## D;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),%esi;\ + movzx b ## B, %edi;\ + xor s3(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + xor (%ebp,%edi,4), %esi;\ + movzx b ## H, %edi;\ + ror $15, b ## D;\ + xor (%ebp,%edi,4), d ## D;\ + movzx a ## H, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + pop %edi;\ + add d ## D, %esi;\ + add %esi, d ## D;\ + add k+round(%ebp), %esi;\ + xor %esi, c ## D;\ + rol $15, c ## D;\ + add k+4+round(%ebp),d ## D;\ + xor %edi, d ## D; + +/* + * a input register containing a (rotated 16) + * b input register containing b + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * last round has different rotations for the output preparation + */ +#define encrypt_last_round(a,b,c,d,round)\ + push d ## D;\ + movzx b ## B, %edi;\ + mov s1(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + mov s2(%ebp,%edi,4),%esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor s2(%ebp,%edi,4),d ## D;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),%esi;\ + movzx b ## B, %edi;\ + xor s3(%ebp,%edi,4),d ## D;\ + movzx a ## B, %edi;\ + xor (%ebp,%edi,4), %esi;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), d ## D;\ + movzx a ## H, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + pop %edi;\ + add d ## D, %esi;\ + add %esi, d ## D;\ + add k+round(%ebp), %esi;\ + xor %esi, c ## D;\ + ror $1, c ## D;\ + add k+4+round(%ebp),d ## D;\ + xor %edi, d ## D; + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + */ +#define decrypt_round(a,b,c,d,round)\ + push c ## D;\ + movzx a ## B, %edi;\ + mov (%ebp,%edi,4), c ## D;\ + movzx b ## B, %edi;\ + mov s3(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), %esi;\ + movzx a ## B, %edi;\ + xor s2(%ebp,%edi,4),c ## D;\ + movzx b ## B, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $15, a ## D;\ + xor s3(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + xor s2(%ebp,%edi,4),%esi;\ + pop %edi;\ + add %esi, c ## D;\ + add c ## D, %esi;\ + add k+round(%ebp), c ## D;\ + xor %edi, c ## D;\ + add k+4+round(%ebp),%esi;\ + xor %esi, d ## D;\ + rol $15, d ## D; + +/* + * a input register containing a + * b input register containing b (rotated 16) + * c input register containing c + * d input register containing d (already rol $1) + * operations on a and b are interleaved to increase performance + * last round has different rotations for the output preparation + */ +#define decrypt_last_round(a,b,c,d,round)\ + push c ## D;\ + movzx a ## B, %edi;\ + mov (%ebp,%edi,4), c ## D;\ + movzx b ## B, %edi;\ + mov s3(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s1(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + ror $16, b ## D;\ + xor (%ebp,%edi,4), %esi;\ + movzx a ## B, %edi;\ + xor s2(%ebp,%edi,4),c ## D;\ + movzx b ## B, %edi;\ + xor s1(%ebp,%edi,4),%esi;\ + movzx a ## H, %edi;\ + ror $16, a ## D;\ + xor s3(%ebp,%edi,4),c ## D;\ + movzx b ## H, %edi;\ + xor s2(%ebp,%edi,4),%esi;\ + pop %edi;\ + add %esi, c ## D;\ + add c ## D, %esi;\ + add k+round(%ebp), c ## D;\ + xor %edi, c ## D;\ + add k+4+round(%ebp),%esi;\ + xor %esi, d ## D;\ + ror $1, d ## D; + +.align 4 +.global twofish_enc_blk +.global twofish_dec_blk + +twofish_enc_blk: + push %ebp /* save registers according to calling convention*/ + push %ebx + push %esi + push %edi + + mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ + add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ + mov in_blk+16(%esp),%edi /* input adress in edi */ + + mov (%edi), %eax + mov b_offset(%edi), %ebx + mov c_offset(%edi), %ecx + mov d_offset(%edi), %edx + input_whitening(%eax,%ebp,a_offset) + ror $16, %eax + input_whitening(%ebx,%ebp,b_offset) + input_whitening(%ecx,%ebp,c_offset) + input_whitening(%edx,%ebp,d_offset) + rol $1, %edx + + encrypt_round(R0,R1,R2,R3,0); + encrypt_round(R2,R3,R0,R1,8); + encrypt_round(R0,R1,R2,R3,2*8); + encrypt_round(R2,R3,R0,R1,3*8); + encrypt_round(R0,R1,R2,R3,4*8); + encrypt_round(R2,R3,R0,R1,5*8); + encrypt_round(R0,R1,R2,R3,6*8); + encrypt_round(R2,R3,R0,R1,7*8); + encrypt_round(R0,R1,R2,R3,8*8); + encrypt_round(R2,R3,R0,R1,9*8); + encrypt_round(R0,R1,R2,R3,10*8); + encrypt_round(R2,R3,R0,R1,11*8); + encrypt_round(R0,R1,R2,R3,12*8); + encrypt_round(R2,R3,R0,R1,13*8); + encrypt_round(R0,R1,R2,R3,14*8); + encrypt_last_round(R2,R3,R0,R1,15*8); + + output_whitening(%eax,%ebp,c_offset) + output_whitening(%ebx,%ebp,d_offset) + output_whitening(%ecx,%ebp,a_offset) + output_whitening(%edx,%ebp,b_offset) + mov out_blk+16(%esp),%edi; + mov %eax, c_offset(%edi) + mov %ebx, d_offset(%edi) + mov %ecx, (%edi) + mov %edx, b_offset(%edi) + + pop %edi + pop %esi + pop %ebx + pop %ebp + mov $1, %eax + ret + +twofish_dec_blk: + push %ebp /* save registers according to calling convention*/ + push %ebx + push %esi + push %edi + + + mov tfm + 16(%esp), %ebp /* abuse the base pointer: set new base bointer to the crypto tfm */ + add $crypto_tfm_ctx_offset, %ebp /* ctx adress */ + mov in_blk+16(%esp),%edi /* input adress in edi */ + + mov (%edi), %eax + mov b_offset(%edi), %ebx + mov c_offset(%edi), %ecx + mov d_offset(%edi), %edx + output_whitening(%eax,%ebp,a_offset) + output_whitening(%ebx,%ebp,b_offset) + ror $16, %ebx + output_whitening(%ecx,%ebp,c_offset) + output_whitening(%edx,%ebp,d_offset) + rol $1, %ecx + + decrypt_round(R0,R1,R2,R3,15*8); + decrypt_round(R2,R3,R0,R1,14*8); + decrypt_round(R0,R1,R2,R3,13*8); + decrypt_round(R2,R3,R0,R1,12*8); + decrypt_round(R0,R1,R2,R3,11*8); + decrypt_round(R2,R3,R0,R1,10*8); + decrypt_round(R0,R1,R2,R3,9*8); + decrypt_round(R2,R3,R0,R1,8*8); + decrypt_round(R0,R1,R2,R3,7*8); + decrypt_round(R2,R3,R0,R1,6*8); + decrypt_round(R0,R1,R2,R3,5*8); + decrypt_round(R2,R3,R0,R1,4*8); + decrypt_round(R0,R1,R2,R3,3*8); + decrypt_round(R2,R3,R0,R1,2*8); + decrypt_round(R0,R1,R2,R3,1*8); + decrypt_last_round(R2,R3,R0,R1,0); + + input_whitening(%eax,%ebp,c_offset) + input_whitening(%ebx,%ebp,d_offset) + input_whitening(%ecx,%ebp,a_offset) + input_whitening(%edx,%ebp,b_offset) + mov out_blk+16(%esp),%edi; + mov %eax, c_offset(%edi) + mov %ebx, d_offset(%edi) + mov %ecx, (%edi) + mov %edx, b_offset(%edi) + + pop %edi + pop %esi + pop %ebx + pop %ebp + mov $1, %eax + ret diff --git a/arch/x86/crypto/twofish_32.c b/arch/x86/crypto/twofish_32.c new file mode 100644 index 000000000000..e3004dfe9c7a --- /dev/null +++ b/arch/x86/crypto/twofish_32.c @@ -0,0 +1,97 @@ +/* + * Glue Code for optimized 586 assembler version of TWOFISH + * + * Originally Twofish for GPG + * By Matthew Skala , July 26, 1998 + * 256-bit key length added March 20, 1999 + * Some modifications to reduce the text size by Werner Koch, April, 1998 + * Ported to the kerneli patch by Marc Mutz + * Ported to CryptoAPI by Colin Slater + * + * The original author has disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * This code is a "clean room" implementation, written from the paper + * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, + * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available + * through http://www.counterpane.com/twofish.html + * + * For background information on multiplication in finite fields, used for + * the matrix operations in the key schedule, see the book _Contemporary + * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the + * Third Edition. + */ + +#include +#include +#include +#include +#include + + +asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + +static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_enc_blk(tfm, dst, src); +} + +static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_dec_blk(tfm, dst, src); +} + +static struct crypto_alg alg = { + .cra_name = "twofish", + .cra_driver_name = "twofish-i586", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = TF_MIN_KEY_SIZE, + .cia_max_keysize = TF_MAX_KEY_SIZE, + .cia_setkey = twofish_setkey, + .cia_encrypt = twofish_encrypt, + .cia_decrypt = twofish_decrypt + } + } +}; + +static int __init init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized"); +MODULE_ALIAS("twofish"); -- cgit v1.2.3