summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorNeal Cardwell <ncardwell@google.com>2015-05-29 13:47:07 -0400
committerSasha Levin <sasha.levin@oracle.com>2015-06-15 14:26:53 -0400
commit189debb56afd572f29cbd35abea68b4391185627 (patch)
treebdc1f9f2e7caa826f89d366f3994852be1b64e9c /net
parentee4ab7d8328b0a505d376b6c08d569778c8689af (diff)
tcp: fix child sockets to use system default congestion control if not set
[ Upstream commit 9f950415e4e28e7cfae2e416b43e862e8101d996 ] Linux 3.17 and earlier are explicitly engineered so that if the app doesn't specifically request a CC module on a listener before the SYN arrives, then the child gets the system default CC when the connection is established. See tcp_init_congestion_control() in 3.17 or earlier, which says "if no choice made yet assign the current value set as default". The change ("net: tcp: assign tcp cong_ops when tcp sk is created") altered these semantics, so that children got their parent listener's congestion control even if the system default had changed after the listener was created. This commit returns to those original semantics from 3.17 and earlier, since they are the original semantics from 2007 in 4d4d3d1e8 ("[TCP]: Congestion control initialization."), and some Linux congestion control workflows depend on that. In summary, if a listener socket specifically sets TCP_CONGESTION to "x", or the route locks the CC module to "x", then the child gets "x". Otherwise the child gets current system default from net.ipv4.tcp_congestion_control. That's the behavior in 3.17 and earlier, and this commit restores that. Fixes: 55d8694fa82c ("net: tcp: assign tcp cong_ops when tcp sk is created") Cc: Florian Westphal <fw@strlen.de> Cc: Daniel Borkmann <dborkman@redhat.com> Cc: Glenn Judd <glenn.judd@morganstanley.com> Cc: Stephen Hemminger <stephen@networkplumber.org> Signed-off-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_cong.c6
-rw-r--r--net/ipv4/tcp_minisocks.c3
2 files changed, 6 insertions, 3 deletions
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index b1c5970d47a1..b3316c8279a0 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -248,9 +248,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
ca = tcp_ca_find(name);
/* no change asking for existing value */
- if (ca == icsk->icsk_ca_ops)
+ if (ca == icsk->icsk_ca_ops) {
+ icsk->icsk_ca_setsockopt = 1;
goto out;
-
+ }
#ifdef CONFIG_MODULES
/* not found attempt to autoload module */
if (!ca && capable(CAP_NET_ADMIN)) {
@@ -273,6 +274,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
else {
tcp_cleanup_congestion_control(sk);
icsk->icsk_ca_ops = ca;
+ icsk->icsk_ca_setsockopt = 1;
if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 71d001dcf2f8..2f6667116e85 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -451,7 +451,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->snd_cwnd = TCP_INIT_CWND;
newtp->snd_cwnd_cnt = 0;
- if (!try_module_get(newicsk->icsk_ca_ops->owner))
+ if (!newicsk->icsk_ca_setsockopt ||
+ !try_module_get(newicsk->icsk_ca_ops->owner))
tcp_assign_congestion_control(newsk);
tcp_set_ca_state(newsk, TCP_CA_Open);