summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2018-07-23 16:50:48 +0200
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-07-28 07:49:11 +0200
commit93d94fec94c7df51c7f36f45fdc18994073f47ee (patch)
treee691eefa93c640f058cfc1132d0bf7b84dd94cb9 /net
parent650321fe96150898c69c58c6684b33edbe689de1 (diff)
ip: hash fragments consistently
[ Upstream commit 3dd1c9a1270736029ffca670e9bd0265f4120600 ] The skb hash for locally generated ip[v6] fragments belonging to the same datagram can vary in several circumstances: * for connected UDP[v6] sockets, the first fragment get its hash via set_owner_w()/skb_set_hash_from_sk() * for unconnected IPv6 UDPv6 sockets, the first fragment can get its hash via ip6_make_flowlabel()/skb_get_hash_flowi6(), if auto_flowlabel is enabled For the following frags the hash is usually computed via skb_get_hash(). The above can cause OoO for unconnected IPv6 UDPv6 socket: in that scenario the egress tx queue can be selected on a per packet basis via the skb hash. It may also fool flow-oriented schedulers to place fragments belonging to the same datagram in different flows. Fix the issue by copying the skb hash from the head frag into the others at fragmentation time. Before this commit: perf probe -a "dev_queue_xmit skb skb->hash skb->l4_hash:b1@0/8 skb->sw_hash:b1@1/8" netperf -H $IPV4 -t UDP_STREAM -l 5 -- -m 2000 -n & perf record -e probe:dev_queue_xmit -e probe:skb_set_owner_w -a sleep 0.1 perf script probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=3713014309 l4_hash=1 sw_hash=0 probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=0 l4_hash=0 sw_hash=0 After this commit: probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=2171763177 l4_hash=1 sw_hash=0 probe:dev_queue_xmit: (ffffffff8c6b1b20) hash=2171763177 l4_hash=1 sw_hash=0 Fixes: b73c3d0e4f0e ("net: Save TX flow hash in sock and set in skbuf on xmit") Fixes: 67800f9b1f4e ("ipv6: Call skb_get_hash_flowi6 to get skb->hash in ip6_make_flowlabel") Signed-off-by: Paolo Abeni <pabeni@redhat.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv6/ip6_output.c2
2 files changed, 4 insertions, 0 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3b1f3bc8becb..100c86f1f547 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -497,6 +497,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->dev = from->dev;
to->mark = from->mark;
+ skb_copy_hash(to, from);
+
/* Copy the flags to each fragment. */
IPCB(to)->flags = IPCB(from)->flags;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index eb9046eae581..ea14466cdca8 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -576,6 +576,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->dev = from->dev;
to->mark = from->mark;
+ skb_copy_hash(to, from);
+
#ifdef CONFIG_NET_SCHED
to->tc_index = from->tc_index;
#endif