From d2c758a5fa26777d955fc2bade9c338d1aed5117 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 20 Jan 2008 22:12:16 +0200 Subject: [IPV4] ROUTE: ip_rt_dump() is unecessary slow [ Upstream commit: d8c9283089287341c85a0a69de32c2287a990e71 ] I noticed "ip route list cache x.y.z.t" can be *very* slow. While strace-ing -T it I also noticed that first part of route cache is fetched quite fast : recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202 GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = +3772 <0.000047> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"\234\0\0\0\30\0\2\0\254i\ 202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) += 3736 <0.000042> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"\204\0\0\0\30\0\2\0\254i\ 202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) += 3740 <0.000055> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"\234\0\0\0\30\0\2\0\254i\ 202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) += 3712 <0.000043> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"\204\0\0\0\30\0\2\0\254i\ 202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) += 3732 <0.000053> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202 GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = +3708 <0.000052> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202 GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = +3680 <0.000041> while the part at the end of the table is more expensive: recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"\204\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\1\0\2"..., +16384}], msg_controllen=0, msg_flags=0}, 0) = 3656 <0.003857> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"\204\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\1\0\2"..., +16384}], msg_controllen=0, msg_flags=0}, 0) = 3772 <0.003891> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\2\0\2\0"..., +16384}], msg_controllen=0, msg_flags=0}, 0) = 3712 <0.003765> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\2\0\2\0"..., +16384}], msg_controllen=0, msg_flags=0}, 0) = 3700 <0.003879> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\2\0\2\0"..., +16384}], msg_controllen=0, msg_flags=0}, 0) = 3676 <0.003797> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\2\0\2\0"..., +16384}], msg_controllen=0, msg_flags=0}, 0) = 3724 <0.003856> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, +msg_iov(1)=[{"\234\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\1\0\2"..., +16384}], msg_controllen=0, msg_flags=0}, 0) = 3736 <0.003848> The following patch corrects this performance/latency problem, removing quadratic behavior. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Adrian Bunk --- net/ipv4/route.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c83066cc342a..ba7e8ebd7453 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2821,11 +2821,10 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) int idx, s_idx; s_h = cb->args[0]; + if (s_h < 0) + s_h = 0; s_idx = idx = cb->args[1]; - for (h = 0; h <= rt_hash_mask; h++) { - if (h < s_h) continue; - if (h > s_h) - s_idx = 0; + for (h = s_h; h <= rt_hash_mask; h++) { rcu_read_lock_bh(); for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; rt = rcu_dereference(rt->u.rt_next), idx++) { @@ -2842,6 +2841,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) dst_release(xchg(&skb->dst, NULL)); } rcu_read_unlock_bh(); + s_idx = 0; } done: -- cgit v1.2.3