From 254eff771441f4ee7aa9cf770a6e4820492c9dab Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 19 Feb 2009 14:42:19 +0800 Subject: crypto: cryptd - Per-CPU thread implementation based on kcrypto_wq Original cryptd thread implementation has scalability issue, this patch solve the issue with a per-CPU thread implementation. struct cryptd_queue is defined to be a per-CPU queue, which holds one struct cryptd_cpu_queue for each CPU. In struct cryptd_cpu_queue, a struct crypto_queue holds all requests for the CPU, a struct work_struct is used to run all requests for the CPU. Testing based on dm-crypt on an Intel Core 2 E6400 (two cores) machine shows 19.2% performance gain. The testing script is as follow: -------------------- script begin --------------------------- #!/bin/sh dmc_create() { # Create a crypt device using dmsetup dmsetup create $2 --table "0 `blockdev --getsize $1` crypt cbc(aes-asm)?cryptd?plain:plain babebabebabebabebabebabebabebabe 0 $1 0" } dmsetup remove crypt0 dmsetup remove crypt1 dd if=/dev/zero of=/dev/ram0 bs=1M count=4 >& /dev/null dd if=/dev/zero of=/dev/ram1 bs=1M count=4 >& /dev/null dmc_create /dev/ram0 crypt0 dmc_create /dev/ram1 crypt1 cat >tr.sh <& /dev/null & dd if=/dev/dm-1 of=/dev/null >& /dev/null & done wait EOF for n in $(seq 10); do /usr/bin/time sh tr.sh done rm tr.sh -------------------- script end --------------------------- The separator of dm-crypt parameter is changed from "-" to "?", because "-" is used in some cipher driver name too, and cryptds need to specify cipher driver name instead of cipher name. The test result on an Intel Core2 E6400 (two cores) is as follow: without patch: -----------------wo begin -------------------------- 0.04user 0.38system 0:00.39elapsed 107%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6566minor)pagefaults 0swaps 0.07user 0.35system 0:00.35elapsed 121%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6567minor)pagefaults 0swaps 0.06user 0.34system 0:00.30elapsed 135%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6562minor)pagefaults 0swaps 0.05user 0.37system 0:00.36elapsed 119%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6607minor)pagefaults 0swaps 0.06user 0.36system 0:00.35elapsed 120%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6562minor)pagefaults 0swaps 0.05user 0.37system 0:00.31elapsed 136%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6594minor)pagefaults 0swaps 0.04user 0.34system 0:00.30elapsed 126%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6597minor)pagefaults 0swaps 0.06user 0.32system 0:00.31elapsed 125%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6571minor)pagefaults 0swaps 0.06user 0.34system 0:00.31elapsed 134%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6581minor)pagefaults 0swaps 0.05user 0.38system 0:00.31elapsed 138%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6600minor)pagefaults 0swaps -----------------wo end -------------------------- with patch: ------------------w begin -------------------------- 0.02user 0.31system 0:00.24elapsed 141%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6554minor)pagefaults 0swaps 0.05user 0.34system 0:00.31elapsed 127%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6606minor)pagefaults 0swaps 0.07user 0.33system 0:00.26elapsed 155%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6559minor)pagefaults 0swaps 0.07user 0.32system 0:00.26elapsed 151%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6562minor)pagefaults 0swaps 0.05user 0.34system 0:00.26elapsed 150%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6603minor)pagefaults 0swaps 0.03user 0.36system 0:00.31elapsed 124%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6562minor)pagefaults 0swaps 0.04user 0.35system 0:00.26elapsed 147%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6586minor)pagefaults 0swaps 0.03user 0.37system 0:00.27elapsed 146%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6562minor)pagefaults 0swaps 0.04user 0.36system 0:00.26elapsed 154%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6594minor)pagefaults 0swaps 0.04user 0.35system 0:00.26elapsed 154%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+6557minor)pagefaults 0swaps ------------------w end -------------------------- The middle value of elapsed time is: wo cryptwq: 0.31 w cryptwq: 0.26 The performance gain is about (0.31-0.26)/0.26 = 0.192. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'crypto/Kconfig') diff --git a/crypto/Kconfig b/crypto/Kconfig index 420b630a17cf..24c31efde882 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -114,6 +114,7 @@ config CRYPTO_CRYPTD select CRYPTO_BLKCIPHER select CRYPTO_HASH select CRYPTO_MANAGER + select CRYPTO_WORKQUEUE help This is a generic software asynchronous crypto daemon that converts an arbitrary synchronous software crypto algorithm -- cgit v1.2.3