summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c15
-rw-r--r--net/ipv4/arp.c9
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c7
-rw-r--r--net/ipv4/fib_frontend.c7
-rw-r--r--net/ipv4/fib_rules.c6
-rw-r--r--net/ipv4/fib_semantics.c96
-rw-r--r--net/ipv4/igmp.c9
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c19
-rw-r--r--net/ipv4/inet_fragment.c8
-rw-r--r--net/ipv4/inet_hashtables.c68
-rw-r--r--net/ipv4/ip_fragment.c3
-rw-r--r--net/ipv4/ip_gre.c13
-rw-r--r--net/ipv4/ip_output.c11
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c5
-rw-r--r--net/ipv4/nexthop.c35
-rw-r--r--net/ipv4/ping.c11
-rw-r--r--net/ipv4/raw.c5
-rw-r--r--net/ipv4/syncookies.c1
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_bpf.c14
-rw-r--r--net/ipv4/tcp_cubic.c5
-rw-r--r--net/ipv4/tcp_input.c15
-rw-r--r--net/ipv4/tcp_ipv4.c15
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_rate.c11
-rw-r--r--net/ipv4/udp.c10
30 files changed, 309 insertions, 113 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 70f92aaca411..a7a6b1adb698 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1344,8 +1344,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
}
ops = rcu_dereference(inet_offloads[proto]);
- if (likely(ops && ops->callbacks.gso_segment))
+ if (likely(ops && ops->callbacks.gso_segment)) {
segs = ops->callbacks.gso_segment(skb, features);
+ if (!segs)
+ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
+ }
if (IS_ERR_OR_NULL(segs))
goto out;
@@ -1974,6 +1977,10 @@ static int __init inet_init(void)
ip_init();
+ /* Initialise per-cpu ipv4 mibs */
+ if (init_ipv4_mibs())
+ panic("%s: Cannot init ipv4 mibs\n", __func__);
+
/* Setup TCP slab cache for open requests. */
tcp_init();
@@ -2004,12 +2011,6 @@ static int __init inet_init(void)
if (init_inet_pernet_ops())
pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
- /*
- * Initialise per-cpu ipv4 mibs
- */
-
- if (init_ipv4_mibs())
- pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
ipv4_proc_init();
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7b951992c372..b8fe943ae89d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1116,13 +1116,18 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
return err;
}
-static int arp_invalidate(struct net_device *dev, __be32 ip)
+int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
{
struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
int err = -ENXIO;
struct neigh_table *tbl = &arp_tbl;
if (neigh) {
+ if ((neigh->nud_state & NUD_VALID) && !force) {
+ neigh_release(neigh);
+ return 0;
+ }
+
if (neigh->nud_state & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
@@ -1169,7 +1174,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
if (!dev)
return -EINVAL;
}
- return arp_invalidate(dev, ip);
+ return arp_invalidate(dev, ip, true);
}
/*
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 603a3495afa6..4a8ad46397c0 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2585,7 +2585,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
free:
kfree(t);
out:
- return -ENOBUFS;
+ return -ENOMEM;
}
static void __devinet_sysctl_unregister(struct net *net,
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 86c836fa2145..ef20f550d2f8 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -277,6 +277,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
struct page *page;
struct sk_buff *trailer;
int tailen = esp->tailen;
+ unsigned int allocsz;
/* this is non-NULL only with UDP Encapsulation */
if (x->encap) {
@@ -286,6 +287,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
return err;
}
+ allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
+ if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+ goto cow;
+
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
@@ -499,7 +504,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb);
u32 padto;
- padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached));
+ padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached));
if (skb->len < padto)
esp.tfclen = padto - skb->len;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b875b98820ed..ef3e7a3e3a29 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1122,9 +1122,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
return;
/* Add broadcast address, if it is explicitly assigned. */
- if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
+ if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) {
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
prim, 0);
+ arp_invalidate(dev, ifa->ifa_broadcast, false);
+ }
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
@@ -1140,6 +1142,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
prim, 0);
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
32, prim, 0);
+ arp_invalidate(dev, prefix | ~mask, false);
}
}
}
@@ -1588,7 +1591,7 @@ static int __net_init fib_net_init(struct net *net)
int error;
#ifdef CONFIG_IP_ROUTE_CLASSID
- net->ipv4.fib_num_tclassid_users = 0;
+ atomic_set(&net->ipv4.fib_num_tclassid_users, 0);
#endif
error = ip_fib_net_init(net);
if (error < 0)
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index b43a7ba5c6a4..e9a3cc9e98df 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -137,7 +137,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
return err;
}
-static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)
+static bool fib4_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg)
{
struct fib_result *result = (struct fib_result *) arg->result;
struct net_device *dev = NULL;
@@ -258,7 +258,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
if (tb[FRA_FLOW]) {
rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
if (rule4->tclassid)
- net->ipv4.fib_num_tclassid_users++;
+ atomic_inc(&net->ipv4.fib_num_tclassid_users);
}
#endif
@@ -290,7 +290,7 @@ static int fib4_rule_delete(struct fib_rule *rule)
#ifdef CONFIG_IP_ROUTE_CLASSID
if (((struct fib4_rule *)rule)->tclassid)
- net->ipv4.fib_num_tclassid_users--;
+ atomic_dec(&net->ipv4.fib_num_tclassid_users);
#endif
net->ipv4.fib_has_custom_rules = true;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index dce85a9c20c6..f99ad4a98907 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,6 +29,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/netlink.h>
+#include <linux/hash.h>
#include <net/arp.h>
#include <net/ip.h>
@@ -222,7 +223,7 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh)
{
#ifdef CONFIG_IP_ROUTE_CLASSID
if (fib_nh->nh_tclassid)
- net->ipv4.fib_num_tclassid_users--;
+ atomic_dec(&net->ipv4.fib_num_tclassid_users);
#endif
fib_nh_common_release(&fib_nh->nh_common);
}
@@ -318,11 +319,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi)
static inline unsigned int fib_devindex_hashfn(unsigned int val)
{
- unsigned int mask = DEVINDEX_HASHSIZE - 1;
+ return hash_32(val, DEVINDEX_HASHBITS);
+}
+
+static struct hlist_head *
+fib_info_devhash_bucket(const struct net_device *dev)
+{
+ u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex;
- return (val ^
- (val >> DEVINDEX_HASHBITS) ^
- (val >> (DEVINDEX_HASHBITS * 2))) & mask;
+ return &fib_info_devhash[fib_devindex_hashfn(val)];
}
static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope,
@@ -432,12 +437,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
{
struct hlist_head *head;
struct fib_nh *nh;
- unsigned int hash;
spin_lock(&fib_info_lock);
- hash = fib_devindex_hashfn(dev->ifindex);
- head = &fib_info_devhash[hash];
+ head = fib_info_devhash_bucket(dev);
+
hlist_for_each_entry(nh, head, nh_hash) {
if (nh->fib_nh_dev == dev &&
nh->fib_nh_gw4 == gw &&
@@ -624,7 +628,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh,
#ifdef CONFIG_IP_ROUTE_CLASSID
nh->nh_tclassid = cfg->fc_flow;
if (nh->nh_tclassid)
- net->ipv4.fib_num_tclassid_users++;
+ atomic_inc(&net->ipv4.fib_num_tclassid_users);
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
nh->fib_nh_weight = nh_weight;
@@ -654,6 +658,19 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
return nhs;
}
+static int fib_gw_from_attr(__be32 *gw, struct nlattr *nla,
+ struct netlink_ext_ack *extack)
+{
+ if (nla_len(nla) < sizeof(*gw)) {
+ NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_GATEWAY");
+ return -EINVAL;
+ }
+
+ *gw = nla_get_in_addr(nla);
+
+ return 0;
+}
+
/* only called when fib_nh is integrated into fib_info */
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
int remaining, struct fib_config *cfg,
@@ -696,7 +713,11 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
return -EINVAL;
}
if (nla) {
- fib_cfg.fc_gw4 = nla_get_in_addr(nla);
+ ret = fib_gw_from_attr(&fib_cfg.fc_gw4, nla,
+ extack);
+ if (ret)
+ goto errout;
+
if (fib_cfg.fc_gw4)
fib_cfg.fc_gw_family = AF_INET;
} else if (nlav) {
@@ -706,10 +727,18 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
}
nla = nla_find(attrs, attrlen, RTA_FLOW);
- if (nla)
+ if (nla) {
+ if (nla_len(nla) < sizeof(u32)) {
+ NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW");
+ return -EINVAL;
+ }
fib_cfg.fc_flow = nla_get_u32(nla);
+ }
fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
+ /* RTA_ENCAP_TYPE length checked in
+ * lwtunnel_valid_encap_type_attr
+ */
nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
if (nla)
fib_cfg.fc_encap_type = nla_get_u16(nla);
@@ -847,8 +876,13 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
}
if (cfg->fc_oif || cfg->fc_gw_family) {
- struct fib_nh *nh = fib_info_nh(fi, 0);
+ struct fib_nh *nh;
+ /* cannot match on nexthop object attributes */
+ if (fi->nh)
+ return 1;
+
+ nh = fib_info_nh(fi, 0);
if (cfg->fc_encap) {
if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
nh, cfg, extack))
@@ -894,6 +928,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
attrlen = rtnh_attrlen(rtnh);
if (attrlen > 0) {
struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh);
+ int err;
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
nlav = nla_find(attrs, attrlen, RTA_VIA);
@@ -904,12 +939,17 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
}
if (nla) {
+ __be32 gw;
+
+ err = fib_gw_from_attr(&gw, nla, extack);
+ if (err)
+ return err;
+
if (nh->fib_nh_gw_family != AF_INET ||
- nla_get_in_addr(nla) != nh->fib_nh_gw4)
+ gw != nh->fib_nh_gw4)
return 1;
} else if (nlav) {
struct fib_config cfg2;
- int err;
err = fib_gw_from_via(&cfg2, nlav, extack);
if (err)
@@ -932,8 +972,14 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
#ifdef CONFIG_IP_ROUTE_CLASSID
nla = nla_find(attrs, attrlen, RTA_FLOW);
- if (nla && nla_get_u32(nla) != nh->nh_tclassid)
- return 1;
+ if (nla) {
+ if (nla_len(nla) < sizeof(u32)) {
+ NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW");
+ return -EINVAL;
+ }
+ if (nla_get_u32(nla) != nh->nh_tclassid)
+ return 1;
+ }
#endif
}
@@ -1557,12 +1603,10 @@ link_it:
} else {
change_nexthops(fi) {
struct hlist_head *head;
- unsigned int hash;
if (!nexthop_nh->fib_nh_dev)
continue;
- hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex);
- head = &fib_info_devhash[hash];
+ head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev);
hlist_add_head(&nexthop_nh->nh_hash, head);
} endfor_nexthops(fi)
}
@@ -1903,8 +1947,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
{
- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
- struct hlist_head *head = &fib_info_devhash[hash];
+ struct hlist_head *head = fib_info_devhash_bucket(dev);
struct fib_nh *nh;
hlist_for_each_entry(nh, head, nh_hash) {
@@ -1923,12 +1966,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
*/
int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
- int ret = 0;
- int scope = RT_SCOPE_NOWHERE;
+ struct hlist_head *head = fib_info_devhash_bucket(dev);
struct fib_info *prev_fi = NULL;
- unsigned int hash = fib_devindex_hashfn(dev->ifindex);
- struct hlist_head *head = &fib_info_devhash[hash];
+ int scope = RT_SCOPE_NOWHERE;
struct fib_nh *nh;
+ int ret = 0;
if (force)
scope = -1;
@@ -2073,7 +2115,6 @@ out:
int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{
struct fib_info *prev_fi;
- unsigned int hash;
struct hlist_head *head;
struct fib_nh *nh;
int ret;
@@ -2089,8 +2130,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
}
prev_fi = NULL;
- hash = fib_devindex_hashfn(dev->ifindex);
- head = &fib_info_devhash[hash];
+ head = fib_info_devhash_bucket(dev);
ret = 0;
hlist_for_each_entry(nh, head, nh_hash) {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b1ecc9195517..cac2fdd08df0 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2403,9 +2403,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
newpsl->sl_addr[i] = psl->sl_addr[i];
/* decrease mem now to avoid the memleak warning */
atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
}
rcu_assign_pointer(pmc->sflist, newpsl);
+ if (psl)
+ kfree_rcu(psl, rcu);
psl = newpsl;
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
@@ -2503,11 +2504,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
psl->sl_count, psl->sl_addr, 0);
/* decrease mem now to avoid the memleak warning */
atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
- } else
+ } else {
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
0, NULL, 0);
+ }
rcu_assign_pointer(pmc->sflist, newpsl);
+ if (psl)
+ kfree_rcu(psl, rcu);
pmc->sfmode = msf->imsf_fmode;
err = 0;
done:
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 85a88425edc4..6cbf0db57ad0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -791,7 +791,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
- inet_ehash_insert(req_to_sk(req), NULL);
+ inet_ehash_insert(req_to_sk(req), NULL, NULL);
/* before letting lookups find us, make sure all req fields
* are committed to memory and refcnt initialized.
*/
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 4f71aca15666..f8f79672cc5f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -200,6 +200,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
r->idiag_state = sk->sk_state;
r->idiag_timer = 0;
r->idiag_retrans = 0;
+ r->idiag_expires = 0;
if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
goto errout;
@@ -240,20 +241,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
r->idiag_timer = 1;
r->idiag_retrans = icsk->icsk_retransmits;
r->idiag_expires =
- jiffies_to_msecs(icsk->icsk_timeout - jiffies);
+ jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies);
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
r->idiag_timer = 4;
r->idiag_retrans = icsk->icsk_probes_out;
r->idiag_expires =
- jiffies_to_msecs(icsk->icsk_timeout - jiffies);
+ jiffies_delta_to_msecs(icsk->icsk_timeout - jiffies);
} else if (timer_pending(&sk->sk_timer)) {
r->idiag_timer = 2;
r->idiag_retrans = icsk->icsk_probes_out;
r->idiag_expires =
- jiffies_to_msecs(sk->sk_timer.expires - jiffies);
- } else {
- r->idiag_timer = 0;
- r->idiag_expires = 0;
+ jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies);
}
if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
@@ -338,16 +336,13 @@ static int inet_twsk_diag_fill(struct sock *sk,
r = nlmsg_data(nlh);
BUG_ON(tw->tw_state != TCP_TIME_WAIT);
- tmo = tw->tw_timer.expires - jiffies;
- if (tmo < 0)
- tmo = 0;
-
inet_diag_msg_common_fill(r, sk);
r->idiag_retrans = 0;
r->idiag_state = tw->tw_substate;
r->idiag_timer = 3;
- r->idiag_expires = jiffies_to_msecs(tmo);
+ tmo = tw->tw_timer.expires - jiffies;
+ r->idiag_expires = jiffies_delta_to_msecs(tmo);
r->idiag_rqueue = 0;
r->idiag_wqueue = 0;
r->idiag_uid = 0;
@@ -381,7 +376,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
offsetof(struct sock, sk_cookie));
tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies;
- r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0;
+ r->idiag_expires = jiffies_delta_to_msecs(tmo);
r->idiag_rqueue = 0;
r->idiag_wqueue = 0;
r->idiag_uid = 0;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 10d31733297d..e0e8a65d561e 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -204,9 +204,9 @@ void inet_frag_kill(struct inet_frag_queue *fq)
/* The RCU read lock provides a memory barrier
* guaranteeing that if fqdir->dead is false then
* the hash table destruction will not start until
- * after we unlock. Paired with inet_frags_exit_net().
+ * after we unlock. Paired with fqdir_pre_exit().
*/
- if (!fqdir->dead) {
+ if (!READ_ONCE(fqdir->dead)) {
rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
fqdir->f->rhash_params);
refcount_dec(&fq->refcnt);
@@ -321,9 +321,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
{
+ /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */
+ long high_thresh = READ_ONCE(fqdir->high_thresh);
struct inet_frag_queue *fq = NULL, *prev;
- if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh)
+ if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
return NULL;
rcu_read_lock();
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 72fdf1fcbcaa..cbbeb0eea0c3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -20,6 +20,9 @@
#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/inet6_hashtables.h>
+#endif
#include <net/secure_seq.h>
#include <net/ip.h>
#include <net/tcp.h>
@@ -470,10 +473,52 @@ static u32 inet_sk_port_offset(const struct sock *sk)
inet->inet_dport);
}
-/* insert a socket into ehash, and eventually remove another one
- * (The another one can be a SYN_RECV or TIMEWAIT
+/* Searches for an exsiting socket in the ehash bucket list.
+ * Returns true if found, false otherwise.
*/
-bool inet_ehash_insert(struct sock *sk, struct sock *osk)
+static bool inet_ehash_lookup_by_sk(struct sock *sk,
+ struct hlist_nulls_head *list)
+{
+ const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);
+ const int sdif = sk->sk_bound_dev_if;
+ const int dif = sk->sk_bound_dev_if;
+ const struct hlist_nulls_node *node;
+ struct net *net = sock_net(sk);
+ struct sock *esk;
+
+ INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
+
+ sk_nulls_for_each_rcu(esk, node, list) {
+ if (esk->sk_hash != sk->sk_hash)
+ continue;
+ if (sk->sk_family == AF_INET) {
+ if (unlikely(INET_MATCH(esk, net, acookie,
+ sk->sk_daddr,
+ sk->sk_rcv_saddr,
+ ports, dif, sdif))) {
+ return true;
+ }
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (sk->sk_family == AF_INET6) {
+ if (unlikely(INET6_MATCH(esk, net,
+ &sk->sk_v6_daddr,
+ &sk->sk_v6_rcv_saddr,
+ ports, dif, sdif))) {
+ return true;
+ }
+ }
+#endif
+ }
+ return false;
+}
+
+/* Insert a socket into ehash, and eventually remove another one
+ * (The another one can be a SYN_RECV or TIMEWAIT)
+ * If an existing socket already exists, socket sk is not inserted,
+ * and sets found_dup_sk parameter to true.
+ */
+bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list;
@@ -492,16 +537,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk)
if (osk) {
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
ret = sk_nulls_del_node_init_rcu(osk);
+ } else if (found_dup_sk) {
+ *found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
+ if (*found_dup_sk)
+ ret = false;
}
+
if (ret)
__sk_nulls_add_node_rcu(sk, list);
+
spin_unlock(lock);
+
return ret;
}
-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
{
- bool ok = inet_ehash_insert(sk, osk);
+ bool ok = inet_ehash_insert(sk, osk, found_dup_sk);
if (ok) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -545,7 +597,7 @@ int __inet_hash(struct sock *sk, struct sock *osk)
int err = 0;
if (sk->sk_state != TCP_LISTEN) {
- inet_ehash_nolisten(sk, osk);
+ inet_ehash_nolisten(sk, osk, NULL);
return 0;
}
WARN_ON(!sk_unhashed(sk));
@@ -641,7 +693,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- inet_ehash_nolisten(sk, NULL);
+ inet_ehash_nolisten(sk, NULL, NULL);
spin_unlock_bh(&head->lock);
return 0;
}
@@ -720,7 +772,7 @@ ok:
inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
- inet_ehash_nolisten(sk, (struct sock *)tw);
+ inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
}
if (tw)
inet_twsk_bind_unhash(tw, hinfo);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cfeb8890f94e..fad803d2d711 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t)
rcu_read_lock();
- if (qp->q.fqdir->dead)
+ /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */
+ if (READ_ONCE(qp->q.fqdir->dead))
goto out_rcu_unlock;
spin_lock(&qp->q.lock);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index c4989e5903e4..5b38d03f6d79 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -432,14 +432,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
-
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
+ __be16 flags = tunnel->parms.o_flags;
/* Push GRE header. */
gre_build_header(skb, tunnel->tun_hlen,
- tunnel->parms.o_flags, proto, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ flags, proto, tunnel->parms.o_key,
+ (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -577,8 +575,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
key = &info->key;
ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
- tunnel_id_to_key32(key->tun_id), key->tos, 0,
- skb->mark, skb_get_hash(skb));
+ tunnel_id_to_key32(key->tun_id),
+ key->tos & ~INET_ECN_MASK, 0, skb->mark,
+ skb_get_hash(skb));
rt = ip_route_output_key(dev_net(dev), &fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0ec529d77a56..418e93987800 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -161,12 +161,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- if (ip_dont_fragment(sk, &rt->dst)) {
+ /* Do not bother generating IPID for small packets (eg SYNACK) */
+ if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) {
iph->frag_off = htons(IP_DF);
iph->id = 0;
} else {
iph->frag_off = 0;
- __ip_select_ident(net, iph, 1);
+ /* TCP packets here are SYNACK with fat IPv4/TCP options.
+ * Avoid using the hashed IP ident generator.
+ */
+ if (sk->sk_protocol == IPPROTO_TCP)
+ iph->id = (__force __be16)prandom_u32();
+ else
+ __ip_select_ident(net, iph, 1);
}
if (opt && opt->opt.optlen) {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d71935618871..2da689608036 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -259,7 +259,9 @@ static int __net_init ipmr_rules_init(struct net *net)
return 0;
err2:
+ rtnl_lock();
ipmr_free_table(mrt);
+ rtnl_unlock();
err1:
fib_rules_unregister(ops);
return err;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 6bdb1ab8af61..63ebb87d8533 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -505,8 +505,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
if (IS_ERR(config))
return PTR_ERR(config);
}
- } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN))
+ } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) {
+ clusterip_config_entry_put(config);
+ clusterip_config_put(config);
return -EINVAL;
+ }
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0) {
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 858bb10d8341..4d69b3de980a 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -839,15 +839,36 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
/* if any FIB entries reference this nexthop, any dst entries
* need to be regenerated
*/
-static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
+static void nh_rt_cache_flush(struct net *net, struct nexthop *nh,
+ struct nexthop *replaced_nh)
{
struct fib6_info *f6i;
+ struct nh_group *nhg;
+ int i;
if (!list_empty(&nh->fi_list))
rt_cache_flush(net);
list_for_each_entry(f6i, &nh->f6i_list, nh_list)
ipv6_stub->fib6_update_sernum(net, f6i);
+
+ /* if an IPv6 group was replaced, we have to release all old
+ * dsts to make sure all refcounts are released
+ */
+ if (!replaced_nh->is_group)
+ return;
+
+ /* new dsts must use only the new nexthop group */
+ synchronize_net();
+
+ nhg = rtnl_dereference(replaced_nh->nh_grp);
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+ struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info);
+
+ if (nhi->family == AF_INET6)
+ ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh);
+ }
}
static int replace_nexthop_grp(struct net *net, struct nexthop *old,
@@ -994,7 +1015,7 @@ static int replace_nexthop(struct net *net, struct nexthop *old,
err = replace_nexthop_single(net, old, new, extack);
if (!err) {
- nh_rt_cache_flush(net, old);
+ nh_rt_cache_flush(net, old, new);
__remove_nexthop(net, new, NULL);
nexthop_put(new);
@@ -1231,11 +1252,15 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh,
/* sets nh_dev if successful */
err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
extack);
- if (err)
+ if (err) {
+ /* IPv6 is not enabled, don't call fib6_nh_release */
+ if (err == -EAFNOSUPPORT)
+ goto out;
ipv6_stub->fib6_nh_release(fib6_nh);
- else
+ } else {
nh->nh_flags = fib6_nh->fib_nh_flags;
-
+ }
+out:
return err;
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 1c3d5d3702a1..33e6392e8b82 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -172,16 +172,22 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
struct sock *sk = NULL;
struct inet_sock *isk;
struct hlist_nulls_node *hnode;
- int dif = skb->dev->ifindex;
+ int dif, sdif;
if (skb->protocol == htons(ETH_P_IP)) {
+ dif = inet_iif(skb);
+ sdif = inet_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
(int)ident, &ip_hdr(skb)->daddr, dif);
#if IS_ENABLED(CONFIG_IPV6)
} else if (skb->protocol == htons(ETH_P_IPV6)) {
+ dif = inet6_iif(skb);
+ sdif = inet6_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
(int)ident, &ipv6_hdr(skb)->daddr, dif);
#endif
+ } else {
+ return NULL;
}
read_lock_bh(&ping_table.lock);
@@ -220,7 +226,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
continue;
}
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+ sk->sk_bound_dev_if != sdif)
continue;
sock_hold(sk);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 3183413ebc6c..ddc24e57dc55 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -720,6 +720,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int ret = -EINVAL;
int chk_addr_ret;
+ lock_sock(sk);
if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
goto out;
@@ -739,7 +740,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
sk_dst_reset(sk);
ret = 0;
-out: return ret;
+out:
+ release_sock(sk);
+ return ret;
}
/*
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 2b45d1455592..6811174ad518 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -332,6 +332,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq = inet_rsk(req);
treq = tcp_rsk(req);
+ treq->af_specific = &tcp_request_sock_ipv4_ops;
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
treq->ts_off = 0;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9f53d25e047e..4815cf72569e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1652,11 +1652,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (!copied)
copied = used;
break;
- } else if (used <= len) {
- seq += used;
- copied += used;
- offset += used;
}
+ if (WARN_ON_ONCE(used > len))
+ used = len;
+ seq += used;
+ copied += used;
+ offset += used;
+
/* If recv_actor drops the lock (e.g. TCP splice
* receive) the skb pointer might be invalid when
* getting here: tcp_collapse might have deleted it
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 7df7ec74807a..bcc13368c836 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -296,10 +296,9 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
struct sk_psock *psock = sk_psock_get(sk);
int ret;
- if (unlikely(!psock)) {
- sk_msg_free(sk, msg);
- return 0;
- }
+ if (unlikely(!psock))
+ return -EPIPE;
+
ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
tcp_bpf_push_locked(sk, msg, bytes, flags, false);
sk_psock_put(sk, psock);
@@ -367,7 +366,7 @@ more_data:
cork = true;
psock->cork = NULL;
}
- sk_msg_return(sk, msg, tosend);
+ sk_msg_return(sk, msg, msg->sg.size);
release_sock(sk);
ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
@@ -407,8 +406,11 @@ more_data:
}
if (msg &&
msg->sg.data[msg->sg.start].page_link &&
- msg->sg.data[msg->sg.start].length)
+ msg->sg.data[msg->sg.start].length) {
+ if (eval == __SK_REDIRECT)
+ sk_mem_charge(sk, msg->sg.size);
goto more_data;
+ }
}
return ret;
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index ee6c38a73325..44be7a5a1391 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -341,8 +341,6 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
if (tcp_in_slow_start(tp)) {
- if (hystart && after(ack, ca->end_seq))
- bictcp_hystart_reset(sk);
acked = tcp_slow_start(tp, acked);
if (!acked)
return;
@@ -384,6 +382,9 @@ static void hystart_update(struct sock *sk, u32 delay)
if (ca->found & hystart_detect)
return;
+ if (after(tp->snd_una, ca->end_seq))
+ bictcp_hystart_reset(sk);
+
if (hystart_detect & HYSTART_ACK_TRAIN) {
u32 now = bictcp_clock();
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c0fcfa296468..b0e6fc2c5e10 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3717,7 +3717,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_process_tlp_ack(sk, ack, flag);
if (tcp_ack_is_dubious(sk, flag)) {
- if (!(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP))) {
+ if (!(flag & (FLAG_SND_UNA_ADVANCED |
+ FLAG_NOT_DUP | FLAG_DSACKING_ACK))) {
num_dupack = 1;
/* Consider if pure acks were aggregated in tcp_add_backlog() */
if (!(flag & FLAG_DATA))
@@ -5230,7 +5231,17 @@ static void tcp_new_space(struct sock *sk)
sk->sk_write_space(sk);
}
-static void tcp_check_space(struct sock *sk)
+/* Caller made space either from:
+ * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced)
+ * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt)
+ *
+ * We might be able to generate EPOLLOUT to the application if:
+ * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2
+ * 2) notsent amount (tp->write_seq - tp->snd_nxt) became
+ * small enough that tcp_stream_memory_free() decides it
+ * is time to generate EPOLLOUT.
+ */
+void tcp_check_space(struct sock *sk)
{
if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2ce85e52aea7..72fe93ace7d7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1383,7 +1383,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
.syn_ack_timeout = tcp_syn_ack_timeout,
};
-static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
+const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.mss_clamp = TCP_MSS_DEFAULT,
#ifdef CONFIG_TCP_MD5SIG
.req_md5_lookup = tcp_v4_md5_lookup,
@@ -1426,6 +1426,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
bool *own_req)
{
struct inet_request_sock *ireq;
+ bool found_dup_sk = false;
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
@@ -1496,12 +1497,22 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
+ &found_dup_sk);
if (likely(*own_req)) {
tcp_move_syn(newtp, req);
ireq->ireq_opt = NULL;
} else {
newinet->inet_opt = NULL;
+
+ if (!req_unhash && found_dup_sk) {
+ /* This code path should only be executed in the
+ * syncookie case only
+ */
+ bh_unlock_sock(newsk);
+ sock_put(newsk);
+ newsk = NULL;
+ }
}
return newsk;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 194743bd3fc1..9b038cb0a43d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -538,7 +538,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
- if (newtp->af_specific->md5_lookup(sk, newsk))
+ if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))
newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 638d7b49ad71..67493ec6318a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -81,6 +81,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
tcp_skb_pcount(skb));
+ tcp_check_space(sk);
}
/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one
@@ -3492,6 +3493,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
*/
static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
int space, err = 0;
@@ -3506,8 +3508,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
* private TCP options. The cost is reduced data space in SYN :(
*/
tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
+ /* Sync mss_cache after updating the mss_clamp */
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
+ space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
space = min_t(size_t, space, fo->size);
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index 0de693565963..6ab197928abb 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -73,26 +73,31 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
*
* If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is
* called multiple times. We favor the information from the most recently
- * sent skb, i.e., the skb with the highest prior_delivered count.
+ * sent skb, i.e., the skb with the most recently sent time and the highest
+ * sequence.
*/
void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ u64 tx_tstamp;
if (!scb->tx.delivered_mstamp)
return;
+ tx_tstamp = tcp_skb_timestamp_us(skb);
if (!rs->prior_delivered ||
- after(scb->tx.delivered, rs->prior_delivered)) {
+ tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp,
+ scb->end_seq, rs->last_end_seq)) {
rs->prior_delivered = scb->tx.delivered;
rs->prior_mstamp = scb->tx.delivered_mstamp;
rs->is_app_limited = scb->tx.is_app_limited;
rs->is_retrans = scb->sacked & TCPCB_RETRANS;
+ rs->last_end_seq = scb->end_seq;
/* Record send time of most recently ACKed packet: */
- tp->first_tx_mstamp = tcp_skb_timestamp_us(skb);
+ tp->first_tx_mstamp = tx_tstamp;
/* Find the duration of the "send phase" of this window: */
rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp,
scb->tx.first_tx_mstamp);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fdbd56ee1300..83fd4fa40d5e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -544,6 +544,12 @@ void udp_encap_enable(void)
}
EXPORT_SYMBOL(udp_encap_enable);
+void udp_encap_disable(void)
+{
+ static_branch_dec(&udp_encap_needed_key);
+}
+EXPORT_SYMBOL(udp_encap_disable);
+
/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
* through error handlers in encapsulations looking for a match.
*/
@@ -845,7 +851,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
kfree_skb(skb);
return -EINVAL;
}
- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+ if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
kfree_skb(skb);
return -EINVAL;
}
@@ -2943,7 +2949,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
{
seq_setwidth(seq, 127);
if (v == SEQ_START_TOKEN)
- seq_puts(seq, " sl local_address rem_address st tx_queue "
+ seq_puts(seq, " sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
"inode ref pointer drops");
else {