summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorMarcel Ziswiler <marcel.ziswiler@toradex.com>2018-12-12 14:08:45 +0100
committerMarcel Ziswiler <marcel.ziswiler@toradex.com>2018-12-12 14:08:45 +0100
commitcfbbc7703fff59c67761c93a8b1de29a79f9841c (patch)
tree58b4b37bed385b27fc5956435b2451c760f26f5f /net
parent5f3fecbc0715a70437501e1d85e74726c4f561be (diff)
parent1aa861ff238ecd17a3095b0dbd2d20bdf7bfaf14 (diff)
Merge tag 'v4.9.144' into 4.9-2.3.x-imx
This is the 4.9.144 stable release
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/iphc.c1
-rw-r--r--net/9p/protocol.c5
-rw-r--r--net/batman-adv/bat_v_elp.c8
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c10
-rw-r--r--net/batman-adv/network-coding.c27
-rw-r--r--net/batman-adv/soft-interface.c25
-rw-r--r--net/batman-adv/sysfs.c30
-rw-r--r--net/batman-adv/translation-table.c6
-rw-r--r--net/batman-adv/tvlv.c8
-rw-r--r--net/bluetooth/mgmt.c7
-rw-r--r--net/bluetooth/smp.c29
-rw-r--r--net/bluetooth/smp.h3
-rw-r--r--net/bridge/br_multicast.c9
-rw-r--r--net/bridge/netfilter/ebt_arpreply.c3
-rw-r--r--net/ceph/auth.c20
-rw-r--r--net/ceph/auth_x.c225
-rw-r--r--net/ceph/auth_x_protocol.h7
-rw-r--r--net/ceph/messenger.c113
-rw-r--r--net/ceph/osd_client.c15
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c32
-rw-r--r--net/core/ethtool.c8
-rw-r--r--net/core/flow_dissector.c4
-rw-r--r--net/core/netclassid_cgroup.c1
-rw-r--r--net/core/rtnetlink.c16
-rw-r--r--net/core/skbuff.c36
-rw-r--r--net/dccp/input.c4
-rw-r--r--net/dccp/ipv4.c4
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h26
-rw-r--r--net/ieee802154/6lowpan/reassembly.c148
-rw-r--r--net/ipv4/cipso_ipv4.c11
-rw-r--r--net/ipv4/fib_frontend.c12
-rw-r--r--net/ipv4/fib_semantics.c50
-rw-r--r--net/ipv4/inet_connection_sock.c5
-rw-r--r--net/ipv4/inet_fragment.c389
-rw-r--r--net/ipv4/ip_fragment.c585
-rw-r--r--net/ipv4/ip_sockglue.c3
-rw-r--r--net/ipv4/ip_tunnel.c13
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/proc.c7
-rw-r--r--net/ipv4/tcp_input.c37
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/udp.c20
-rw-r--r--net/ipv6/addrconf.c10
-rw-r--r--net/ipv6/af_inet6.c6
-rw-r--r--net/ipv6/ip6_checksum.c20
-rw-r--r--net/ipv6/ip6_tunnel.c27
-rw-r--r--net/ipv6/mcast.c16
-rw-r--r--net/ipv6/ndisc.c3
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c102
-rw-r--r--net/ipv6/proc.c5
-rw-r--r--net/ipv6/raw.c29
-rw-r--r--net/ipv6/reassembly.c212
-rw-r--r--net/ipv6/route.c7
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/l2tp/l2tp_core.c14
-rw-r--r--net/l2tp/l2tp_core.h3
-rw-r--r--net/l2tp/l2tp_debugfs.c4
-rw-r--r--net/l2tp/l2tp_netlink.c3
-rw-r--r--net/llc/af_llc.c11
-rw-r--r--net/llc/llc_conn.c1
-rw-r--r--net/mac80211/agg-tx.c128
-rw-r--r--net/mac80211/cfg.c2
-rw-r--r--net/mac80211/ht.c16
-rw-r--r--net/mac80211/ibss.c22
-rw-r--r--net/mac80211/ieee80211_i.h14
-rw-r--r--net/mac80211/iface.c11
-rw-r--r--net/mac80211/main.c28
-rw-r--r--net/mac80211/mesh_hwmp.c4
-rw-r--r--net/mac80211/mlme.c70
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c7
-rw-r--r--net/mac80211/tdls.c8
-rw-r--r--net/mac80211/tx.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c8
-rw-r--r--net/netfilter/nf_conntrack_core.c13
-rw-r--r--net/netfilter/nf_nat_core.c2
-rw-r--r--net/netfilter/xt_IDLETIMER.c20
-rw-r--r--net/netlabel/netlabel_unlabeled.c3
-rw-r--r--net/rds/ib_cm.c3
-rw-r--r--net/rxrpc/input.c54
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sctp/associola.c10
-rw-r--r--net/sctp/socket.c37
-rw-r--r--net/socket.c11
-rw-r--r--net/sunrpc/auth_generic.c8
-rw-r--r--net/sunrpc/svc_xprt.c2
-rw-r--r--net/sunrpc/xdr.c7
-rw-r--r--net/tipc/socket.c6
-rw-r--r--net/tipc/subscr.c4
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/reg.c1
-rw-r--r--net/wireless/scan.c58
-rw-r--r--net/wireless/util.c2
-rw-r--r--net/xfrm/xfrm_policy.c8
-rw-r--r--net/xfrm/xfrm_user.c15
96 files changed, 1721 insertions, 1317 deletions
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 79f1fa22509a..23654f1902f3 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -745,6 +745,7 @@ int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
hdr.hop_limit, &hdr.daddr);
skb_push(skb, sizeof(hdr));
+ skb_reset_mac_header(skb);
skb_reset_network_header(skb);
skb_copy_to_linear_data(skb, &hdr, sizeof(hdr));
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 16d287565987..145f80518064 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -46,10 +46,15 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
void p9stat_free(struct p9_wstat *stbuf)
{
kfree(stbuf->name);
+ stbuf->name = NULL;
kfree(stbuf->uid);
+ stbuf->uid = NULL;
kfree(stbuf->gid);
+ stbuf->gid = NULL;
kfree(stbuf->muid);
+ stbuf->muid = NULL;
kfree(stbuf->extension);
+ stbuf->extension = NULL;
}
EXPORT_SYMBOL(p9stat_free);
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index ee08540ce503..5d79004de25c 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -243,6 +243,7 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
struct batadv_priv *bat_priv;
struct sk_buff *skb;
u32 elp_interval;
+ bool ret;
bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work);
hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v);
@@ -304,8 +305,11 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
* may sleep and that is not allowed in an rcu protected
* context. Therefore schedule a task for that.
*/
- queue_work(batadv_event_workqueue,
- &hardif_neigh->bat_v.metric_work);
+ ret = queue_work(batadv_event_workqueue,
+ &hardif_neigh->bat_v.metric_work);
+
+ if (!ret)
+ batadv_hardif_neigh_put(hardif_neigh);
}
rcu_read_unlock();
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 582e27698bf0..8b6f654bc85d 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1767,6 +1767,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
{
struct batadv_bla_backbone_gw *backbone_gw;
struct ethhdr *ethhdr;
+ bool ret;
ethhdr = eth_hdr(skb);
@@ -1790,8 +1791,13 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb,
if (unlikely(!backbone_gw))
return true;
- queue_work(batadv_event_workqueue, &backbone_gw->report_work);
- /* backbone_gw is unreferenced in the report work function function */
+ ret = queue_work(batadv_event_workqueue, &backbone_gw->report_work);
+
+ /* backbone_gw is unreferenced in the report work function function
+ * if queue_work() call was successful
+ */
+ if (!ret)
+ batadv_backbone_gw_put(backbone_gw);
return true;
}
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index e3baf697a35c..a7b5cf08d363 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -845,16 +845,27 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
spinlock_t *lock; /* Used to lock list selected by "int in_coding" */
struct list_head *list;
+ /* Select ingoing or outgoing coding node */
+ if (in_coding) {
+ lock = &orig_neigh_node->in_coding_list_lock;
+ list = &orig_neigh_node->in_coding_list;
+ } else {
+ lock = &orig_neigh_node->out_coding_list_lock;
+ list = &orig_neigh_node->out_coding_list;
+ }
+
+ spin_lock_bh(lock);
+
/* Check if nc_node is already added */
nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding);
/* Node found */
if (nc_node)
- return nc_node;
+ goto unlock;
nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC);
if (!nc_node)
- return NULL;
+ goto unlock;
/* Initialize nc_node */
INIT_LIST_HEAD(&nc_node->list);
@@ -863,22 +874,14 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv,
kref_get(&orig_neigh_node->refcount);
nc_node->orig_node = orig_neigh_node;
- /* Select ingoing or outgoing coding node */
- if (in_coding) {
- lock = &orig_neigh_node->in_coding_list_lock;
- list = &orig_neigh_node->in_coding_list;
- } else {
- lock = &orig_neigh_node->out_coding_list_lock;
- list = &orig_neigh_node->out_coding_list;
- }
-
batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n",
nc_node->addr, nc_node->orig_node->orig);
/* Add nc_node to orig_node */
- spin_lock_bh(lock);
kref_get(&nc_node->refcount);
list_add_tail_rcu(&nc_node->list, list);
+
+unlock:
spin_unlock_bh(lock);
return nc_node;
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 84c1b388d9ed..05bc176decf0 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -565,15 +565,20 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
struct batadv_softif_vlan *vlan;
int err;
+ spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+
vlan = batadv_softif_vlan_get(bat_priv, vid);
if (vlan) {
batadv_softif_vlan_put(vlan);
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
return -EEXIST;
}
vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC);
- if (!vlan)
+ if (!vlan) {
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
return -ENOMEM;
+ }
vlan->bat_priv = bat_priv;
vlan->vid = vid;
@@ -581,17 +586,23 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid)
atomic_set(&vlan->ap_isolation, 0);
+ kref_get(&vlan->refcount);
+ hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
+ spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
+
+ /* batadv_sysfs_add_vlan cannot be in the spinlock section due to the
+ * sleeping behavior of the sysfs functions and the fs_reclaim lock
+ */
err = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan);
if (err) {
- kfree(vlan);
+ /* ref for the function */
+ batadv_softif_vlan_put(vlan);
+
+ /* ref for the list */
+ batadv_softif_vlan_put(vlan);
return err;
}
- spin_lock_bh(&bat_priv->softif_vlan_list_lock);
- kref_get(&vlan->refcount);
- hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
- spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
-
/* add a new TT local entry. This one will be marked with the NOPURGE
* flag
*/
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 02d96f224c60..31d7e239a1fd 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -187,7 +187,8 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \
\
return __batadv_store_uint_attr(buff, count, _min, _max, \
_post_func, attr, \
- &bat_priv->_var, net_dev); \
+ &bat_priv->_var, net_dev, \
+ NULL); \
}
#define BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \
@@ -261,7 +262,9 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \
\
length = __batadv_store_uint_attr(buff, count, _min, _max, \
_post_func, attr, \
- &hard_iface->_var, net_dev); \
+ &hard_iface->_var, \
+ hard_iface->soft_iface, \
+ net_dev); \
\
batadv_hardif_put(hard_iface); \
return length; \
@@ -355,10 +358,12 @@ __batadv_store_bool_attr(char *buff, size_t count,
static int batadv_store_uint_attr(const char *buff, size_t count,
struct net_device *net_dev,
+ struct net_device *slave_dev,
const char *attr_name,
unsigned int min, unsigned int max,
atomic_t *attr)
{
+ char ifname[IFNAMSIZ + 3] = "";
unsigned long uint_val;
int ret;
@@ -384,8 +389,11 @@ static int batadv_store_uint_attr(const char *buff, size_t count,
if (atomic_read(attr) == uint_val)
return count;
- batadv_info(net_dev, "%s: Changing from: %i to: %lu\n",
- attr_name, atomic_read(attr), uint_val);
+ if (slave_dev)
+ snprintf(ifname, sizeof(ifname), "%s: ", slave_dev->name);
+
+ batadv_info(net_dev, "%s: %sChanging from: %i to: %lu\n",
+ attr_name, ifname, atomic_read(attr), uint_val);
atomic_set(attr, uint_val);
return count;
@@ -396,12 +404,13 @@ static ssize_t __batadv_store_uint_attr(const char *buff, size_t count,
void (*post_func)(struct net_device *),
const struct attribute *attr,
atomic_t *attr_store,
- struct net_device *net_dev)
+ struct net_device *net_dev,
+ struct net_device *slave_dev)
{
int ret;
- ret = batadv_store_uint_attr(buff, count, net_dev, attr->name, min, max,
- attr_store);
+ ret = batadv_store_uint_attr(buff, count, net_dev, slave_dev,
+ attr->name, min, max, attr_store);
if (post_func && ret)
post_func(net_dev);
@@ -570,7 +579,7 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj,
return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
batadv_post_gw_reselect, attr,
&bat_priv->gw.sel_class,
- bat_priv->soft_iface);
+ bat_priv->soft_iface, NULL);
}
static ssize_t batadv_show_gw_bwidth(struct kobject *kobj,
@@ -1084,8 +1093,9 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj,
if (old_tp_override == tp_override)
goto out;
- batadv_info(net_dev, "%s: Changing from: %u.%u MBit to: %u.%u MBit\n",
- "throughput_override",
+ batadv_info(hard_iface->soft_iface,
+ "%s: %s: Changing from: %u.%u MBit to: %u.%u MBit\n",
+ "throughput_override", net_dev->name,
old_tp_override / 10, old_tp_override % 10,
tp_override / 10, tp_override % 10);
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 0dc85eb1cb7a..b9f9a310eb78 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1550,6 +1550,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
{
struct batadv_tt_orig_list_entry *orig_entry;
+ spin_lock_bh(&tt_global->list_lock);
+
orig_entry = batadv_tt_global_orig_entry_find(tt_global, orig_node);
if (orig_entry) {
/* refresh the ttvn: the current value could be a bogus one that
@@ -1570,16 +1572,16 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
orig_entry->ttvn = ttvn;
kref_init(&orig_entry->refcount);
- spin_lock_bh(&tt_global->list_lock);
kref_get(&orig_entry->refcount);
hlist_add_head_rcu(&orig_entry->list,
&tt_global->orig_list);
- spin_unlock_bh(&tt_global->list_lock);
atomic_inc(&tt_global->orig_list_count);
out:
if (orig_entry)
batadv_tt_orig_list_entry_put(orig_entry);
+
+ spin_unlock_bh(&tt_global->list_lock);
}
/**
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 77654f055f24..8e91a26e9b00 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -528,15 +528,20 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
{
struct batadv_tvlv_handler *tvlv_handler;
+ spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+
tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version);
if (tvlv_handler) {
+ spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
batadv_tvlv_handler_put(tvlv_handler);
return;
}
tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC);
- if (!tvlv_handler)
+ if (!tvlv_handler) {
+ spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
return;
+ }
tvlv_handler->ogm_handler = optr;
tvlv_handler->unicast_handler = uptr;
@@ -546,7 +551,6 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
kref_init(&tvlv_handler->refcount);
INIT_HLIST_NODE(&tvlv_handler->list);
- spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
kref_get(&tvlv_handler->refcount);
hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 1fba2a03f8ae..ba24f613c0fc 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2298,9 +2298,8 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
/* LE address type */
addr_type = le_addr_type(cp->addr.type);
- hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type);
-
- err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type);
+ /* Abort any ongoing SMP pairing. Removes ltk and irk if they exist. */
+ err = smp_cancel_and_remove_pairing(hdev, &cp->addr.bdaddr, addr_type);
if (err < 0) {
err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE,
MGMT_STATUS_NOT_PAIRED, &rp,
@@ -2314,8 +2313,6 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
goto done;
}
- /* Abort any ongoing SMP pairing */
- smp_cancel_pairing(conn);
/* Defer clearing up the connection parameters until closing to
* give a chance of keeping them if a repairing happens.
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index ead4d1baeaa6..1abfbcd8090a 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2353,30 +2353,51 @@ unlock:
return ret;
}
-void smp_cancel_pairing(struct hci_conn *hcon)
+int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ u8 addr_type)
{
- struct l2cap_conn *conn = hcon->l2cap_data;
+ struct hci_conn *hcon;
+ struct l2cap_conn *conn;
struct l2cap_chan *chan;
struct smp_chan *smp;
+ int err;
+
+ err = hci_remove_ltk(hdev, bdaddr, addr_type);
+ hci_remove_irk(hdev, bdaddr, addr_type);
+
+ hcon = hci_conn_hash_lookup_le(hdev, bdaddr, addr_type);
+ if (!hcon)
+ goto done;
+ conn = hcon->l2cap_data;
if (!conn)
- return;
+ goto done;
chan = conn->smp;
if (!chan)
- return;
+ goto done;
l2cap_chan_lock(chan);
smp = chan->data;
if (smp) {
+ /* Set keys to NULL to make sure smp_failure() does not try to
+ * remove and free already invalidated rcu list entries. */
+ smp->ltk = NULL;
+ smp->slave_ltk = NULL;
+ smp->remote_irk = NULL;
+
if (test_bit(SMP_FLAG_COMPLETE, &smp->flags))
smp_failure(conn, 0);
else
smp_failure(conn, SMP_UNSPECIFIED);
+ err = 0;
}
l2cap_chan_unlock(chan);
+
+done:
+ return err;
}
static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index ffcc70b6b199..993cbd7bcfe7 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -180,7 +180,8 @@ enum smp_key_pref {
};
/* SMP Commands */
-void smp_cancel_pairing(struct hci_conn *hcon);
+int smp_cancel_and_remove_pairing(struct hci_dev *hdev, bdaddr_t *bdaddr,
+ u8 addr_type);
bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level,
enum smp_key_pref key_pref);
int smp_conn_security(struct hci_conn *hcon, __u8 sec_level);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2136e45f5277..4bd57507b9a4 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1287,7 +1287,14 @@ static void br_multicast_query_received(struct net_bridge *br,
return;
br_multicast_update_query_timer(br, query, max_delay);
- br_multicast_mark_router(br, port);
+
+ /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules,
+ * the arrival port for IGMP Queries where the source address
+ * is 0.0.0.0 should not be added to router port list.
+ */
+ if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) ||
+ saddr->proto == htons(ETH_P_IPV6))
+ br_multicast_mark_router(br, port);
}
static int br_ip4_multicast_query(struct net_bridge *br,
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 070cf134a22f..f2660c1b29e4 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -67,6 +67,9 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
if (e->ethproto != htons(ETH_P_ARP) ||
e->invflags & EBT_IPROTO)
return -EINVAL;
+ if (ebt_invalid_target(info->target))
+ return -EINVAL;
+
return 0;
}
diff --git a/net/ceph/auth.c b/net/ceph/auth.c
index c822b3ae1bd3..8e79dca81748 100644
--- a/net/ceph/auth.c
+++ b/net/ceph/auth.c
@@ -314,14 +314,30 @@ int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
}
EXPORT_SYMBOL(ceph_auth_update_authorizer);
+int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a,
+ void *challenge_buf,
+ int challenge_buf_len)
+{
+ int ret = 0;
+
+ mutex_lock(&ac->mutex);
+ if (ac->ops && ac->ops->add_authorizer_challenge)
+ ret = ac->ops->add_authorizer_challenge(ac, a, challenge_buf,
+ challenge_buf_len);
+ mutex_unlock(&ac->mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge);
+
int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
- struct ceph_authorizer *a, size_t len)
+ struct ceph_authorizer *a)
{
int ret = 0;
mutex_lock(&ac->mutex);
if (ac->ops && ac->ops->verify_authorizer_reply)
- ret = ac->ops->verify_authorizer_reply(ac, a, len);
+ ret = ac->ops->verify_authorizer_reply(ac, a);
mutex_unlock(&ac->mutex);
return ret;
}
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index b216131915e7..29e23b5cb2ed 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -8,6 +8,7 @@
#include <linux/ceph/decode.h>
#include <linux/ceph/auth.h>
+#include <linux/ceph/ceph_features.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/messenger.h>
@@ -69,25 +70,40 @@ static int ceph_x_encrypt(struct ceph_crypto_key *secret, void *buf,
return sizeof(u32) + ciphertext_len;
}
+static int __ceph_x_decrypt(struct ceph_crypto_key *secret, void *p,
+ int ciphertext_len)
+{
+ struct ceph_x_encrypt_header *hdr = p;
+ int plaintext_len;
+ int ret;
+
+ ret = ceph_crypt(secret, false, p, ciphertext_len, ciphertext_len,
+ &plaintext_len);
+ if (ret)
+ return ret;
+
+ if (le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC) {
+ pr_err("%s bad magic\n", __func__);
+ return -EINVAL;
+ }
+
+ return plaintext_len - sizeof(*hdr);
+}
+
static int ceph_x_decrypt(struct ceph_crypto_key *secret, void **p, void *end)
{
- struct ceph_x_encrypt_header *hdr = *p + sizeof(u32);
- int ciphertext_len, plaintext_len;
+ int ciphertext_len;
int ret;
ceph_decode_32_safe(p, end, ciphertext_len, e_inval);
ceph_decode_need(p, end, ciphertext_len, e_inval);
- ret = ceph_crypt(secret, false, *p, end - *p, ciphertext_len,
- &plaintext_len);
- if (ret)
+ ret = __ceph_x_decrypt(secret, *p, ciphertext_len);
+ if (ret < 0)
return ret;
- if (hdr->struct_v != 1 || le64_to_cpu(hdr->magic) != CEPHX_ENC_MAGIC)
- return -EPERM;
-
*p += ciphertext_len;
- return plaintext_len - sizeof(struct ceph_x_encrypt_header);
+ return ret;
e_inval:
return -EINVAL;
@@ -271,6 +287,51 @@ bad:
return -EINVAL;
}
+/*
+ * Encode and encrypt the second part (ceph_x_authorize_b) of the
+ * authorizer. The first part (ceph_x_authorize_a) should already be
+ * encoded.
+ */
+static int encrypt_authorizer(struct ceph_x_authorizer *au,
+ u64 *server_challenge)
+{
+ struct ceph_x_authorize_a *msg_a;
+ struct ceph_x_authorize_b *msg_b;
+ void *p, *end;
+ int ret;
+
+ msg_a = au->buf->vec.iov_base;
+ WARN_ON(msg_a->ticket_blob.secret_id != cpu_to_le64(au->secret_id));
+ p = (void *)(msg_a + 1) + le32_to_cpu(msg_a->ticket_blob.blob_len);
+ end = au->buf->vec.iov_base + au->buf->vec.iov_len;
+
+ msg_b = p + ceph_x_encrypt_offset();
+ msg_b->struct_v = 2;
+ msg_b->nonce = cpu_to_le64(au->nonce);
+ if (server_challenge) {
+ msg_b->have_challenge = 1;
+ msg_b->server_challenge_plus_one =
+ cpu_to_le64(*server_challenge + 1);
+ } else {
+ msg_b->have_challenge = 0;
+ msg_b->server_challenge_plus_one = 0;
+ }
+
+ ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
+ if (ret < 0)
+ return ret;
+
+ p += ret;
+ if (server_challenge) {
+ WARN_ON(p != end);
+ } else {
+ WARN_ON(p > end);
+ au->buf->vec.iov_len = p - au->buf->vec.iov_base;
+ }
+
+ return 0;
+}
+
static void ceph_x_authorizer_cleanup(struct ceph_x_authorizer *au)
{
ceph_crypto_key_destroy(&au->session_key);
@@ -287,7 +348,6 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
int maxlen;
struct ceph_x_authorize_a *msg_a;
struct ceph_x_authorize_b *msg_b;
- void *p, *end;
int ret;
int ticket_blob_len =
(th->ticket_blob ? th->ticket_blob->vec.iov_len : 0);
@@ -331,21 +391,13 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
dout(" th %p secret_id %lld %lld\n", th, th->secret_id,
le64_to_cpu(msg_a->ticket_blob.secret_id));
- p = msg_a + 1;
- p += ticket_blob_len;
- end = au->buf->vec.iov_base + au->buf->vec.iov_len;
-
- msg_b = p + ceph_x_encrypt_offset();
- msg_b->struct_v = 1;
get_random_bytes(&au->nonce, sizeof(au->nonce));
- msg_b->nonce = cpu_to_le64(au->nonce);
- ret = ceph_x_encrypt(&au->session_key, p, end - p, sizeof(*msg_b));
- if (ret < 0)
+ ret = encrypt_authorizer(au, NULL);
+ if (ret) {
+ pr_err("failed to encrypt authorizer: %d", ret);
goto out_au;
+ }
- p += ret;
- WARN_ON(p > end);
- au->buf->vec.iov_len = p - au->buf->vec.iov_base;
dout(" built authorizer nonce %llx len %d\n", au->nonce,
(int)au->buf->vec.iov_len);
return 0;
@@ -622,8 +674,56 @@ static int ceph_x_update_authorizer(
return 0;
}
+static int decrypt_authorize_challenge(struct ceph_x_authorizer *au,
+ void *challenge_buf,
+ int challenge_buf_len,
+ u64 *server_challenge)
+{
+ struct ceph_x_authorize_challenge *ch =
+ challenge_buf + sizeof(struct ceph_x_encrypt_header);
+ int ret;
+
+ /* no leading len */
+ ret = __ceph_x_decrypt(&au->session_key, challenge_buf,
+ challenge_buf_len);
+ if (ret < 0)
+ return ret;
+ if (ret < sizeof(*ch)) {
+ pr_err("bad size %d for ceph_x_authorize_challenge\n", ret);
+ return -EINVAL;
+ }
+
+ *server_challenge = le64_to_cpu(ch->server_challenge);
+ return 0;
+}
+
+static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
+ struct ceph_authorizer *a,
+ void *challenge_buf,
+ int challenge_buf_len)
+{
+ struct ceph_x_authorizer *au = (void *)a;
+ u64 server_challenge;
+ int ret;
+
+ ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len,
+ &server_challenge);
+ if (ret) {
+ pr_err("failed to decrypt authorize challenge: %d", ret);
+ return ret;
+ }
+
+ ret = encrypt_authorizer(au, &server_challenge);
+ if (ret) {
+ pr_err("failed to encrypt authorizer w/ challenge: %d", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
- struct ceph_authorizer *a, size_t len)
+ struct ceph_authorizer *a)
{
struct ceph_x_authorizer *au = (void *)a;
void *p = au->enc_buf;
@@ -633,8 +733,10 @@ static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN);
if (ret < 0)
return ret;
- if (ret != sizeof(*reply))
- return -EPERM;
+ if (ret < sizeof(*reply)) {
+ pr_err("bad size %d for ceph_x_authorize_reply\n", ret);
+ return -EINVAL;
+ }
if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one))
ret = -EPERM;
@@ -700,26 +802,64 @@ static int calc_signature(struct ceph_x_authorizer *au, struct ceph_msg *msg,
__le64 *psig)
{
void *enc_buf = au->enc_buf;
- struct {
- __le32 len;
- __le32 header_crc;
- __le32 front_crc;
- __le32 middle_crc;
- __le32 data_crc;
- } __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
int ret;
- sigblock->len = cpu_to_le32(4*sizeof(u32));
- sigblock->header_crc = msg->hdr.crc;
- sigblock->front_crc = msg->footer.front_crc;
- sigblock->middle_crc = msg->footer.middle_crc;
- sigblock->data_crc = msg->footer.data_crc;
- ret = ceph_x_encrypt(&au->session_key, enc_buf, CEPHX_AU_ENC_BUF_LEN,
- sizeof(*sigblock));
- if (ret < 0)
- return ret;
+ if (msg->con->peer_features & CEPH_FEATURE_CEPHX_V2) {
+ struct {
+ __le32 len;
+ __le32 header_crc;
+ __le32 front_crc;
+ __le32 middle_crc;
+ __le32 data_crc;
+ } __packed *sigblock = enc_buf + ceph_x_encrypt_offset();
+
+ sigblock->len = cpu_to_le32(4*sizeof(u32));
+ sigblock->header_crc = msg->hdr.crc;
+ sigblock->front_crc = msg->footer.front_crc;
+ sigblock->middle_crc = msg->footer.middle_crc;
+ sigblock->data_crc = msg->footer.data_crc;
+
+ ret = ceph_x_encrypt(&au->session_key, enc_buf,
+ CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock));
+ if (ret < 0)
+ return ret;
+
+ *psig = *(__le64 *)(enc_buf + sizeof(u32));
+ } else {
+ struct {
+ __le32 header_crc;
+ __le32 front_crc;
+ __le32 front_len;
+ __le32 middle_crc;
+ __le32 middle_len;
+ __le32 data_crc;
+ __le32 data_len;
+ __le32 seq_lower_word;
+ } __packed *sigblock = enc_buf;
+ struct {
+ __le64 a, b, c, d;
+ } __packed *penc = enc_buf;
+ int ciphertext_len;
+
+ sigblock->header_crc = msg->hdr.crc;
+ sigblock->front_crc = msg->footer.front_crc;
+ sigblock->front_len = msg->hdr.front_len;
+ sigblock->middle_crc = msg->footer.middle_crc;
+ sigblock->middle_len = msg->hdr.middle_len;
+ sigblock->data_crc = msg->footer.data_crc;
+ sigblock->data_len = msg->hdr.data_len;
+ sigblock->seq_lower_word = *(__le32 *)&msg->hdr.seq;
+
+ /* no leading len, no ceph_x_encrypt_header */
+ ret = ceph_crypt(&au->session_key, true, enc_buf,
+ CEPHX_AU_ENC_BUF_LEN, sizeof(*sigblock),
+ &ciphertext_len);
+ if (ret)
+ return ret;
+
+ *psig = penc->a ^ penc->b ^ penc->c ^ penc->d;
+ }
- *psig = *(__le64 *)(enc_buf + sizeof(u32));
return 0;
}
@@ -774,6 +914,7 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
.handle_reply = ceph_x_handle_reply,
.create_authorizer = ceph_x_create_authorizer,
.update_authorizer = ceph_x_update_authorizer,
+ .add_authorizer_challenge = ceph_x_add_authorizer_challenge,
.verify_authorizer_reply = ceph_x_verify_authorizer_reply,
.invalidate_authorizer = ceph_x_invalidate_authorizer,
.reset = ceph_x_reset,
diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h
index 671d30576c4f..a7cd203aacc2 100644
--- a/net/ceph/auth_x_protocol.h
+++ b/net/ceph/auth_x_protocol.h
@@ -69,6 +69,13 @@ struct ceph_x_authorize_a {
struct ceph_x_authorize_b {
__u8 struct_v;
__le64 nonce;
+ __u8 have_challenge;
+ __le64 server_challenge_plus_one;
+} __attribute__ ((packed));
+
+struct ceph_x_authorize_challenge {
+ __u8 struct_v;
+ __le64 server_challenge;
} __attribute__ ((packed));
struct ceph_x_authorize_reply {
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 98ea28dc03f9..5a8075d9f2e7 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -588,9 +588,15 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
int ret;
struct kvec iov;
- /* sendpage cannot properly handle pages with page_count == 0,
- * we need to fallback to sendmsg if that's the case */
- if (page_count(page) >= 1)
+ /*
+ * sendpage cannot properly handle pages with page_count == 0,
+ * we need to fall back to sendmsg if that's the case.
+ *
+ * Same goes for slab pages: skb_can_coalesce() allows
+ * coalescing neighboring slab objects into a single frag which
+ * triggers one of hardened usercopy checks.
+ */
+ if (page_count(page) >= 1 && !PageSlab(page))
return __ceph_tcp_sendpage(sock, page, offset, size, more);
iov.iov_base = kmap(page) + offset;
@@ -1388,30 +1394,26 @@ static void prepare_write_keepalive(struct ceph_connection *con)
* Connection negotiation.
*/
-static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
- int *auth_proto)
+static int get_connect_authorizer(struct ceph_connection *con)
{
struct ceph_auth_handshake *auth;
+ int auth_proto;
if (!con->ops->get_authorizer) {
+ con->auth = NULL;
con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
con->out_connect.authorizer_len = 0;
- return NULL;
+ return 0;
}
- /* Can't hold the mutex while getting authorizer */
- mutex_unlock(&con->mutex);
- auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
- mutex_lock(&con->mutex);
-
+ auth = con->ops->get_authorizer(con, &auth_proto, con->auth_retry);
if (IS_ERR(auth))
- return auth;
- if (con->state != CON_STATE_NEGOTIATING)
- return ERR_PTR(-EAGAIN);
+ return PTR_ERR(auth);
- con->auth_reply_buf = auth->authorizer_reply_buf;
- con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
- return auth;
+ con->auth = auth;
+ con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
+ con->out_connect.authorizer_len = cpu_to_le32(auth->authorizer_buf_len);
+ return 0;
}
/*
@@ -1427,12 +1429,22 @@ static void prepare_write_banner(struct ceph_connection *con)
con_flag_set(con, CON_FLAG_WRITE_PENDING);
}
+static void __prepare_write_connect(struct ceph_connection *con)
+{
+ con_out_kvec_add(con, sizeof(con->out_connect), &con->out_connect);
+ if (con->auth)
+ con_out_kvec_add(con, con->auth->authorizer_buf_len,
+ con->auth->authorizer_buf);
+
+ con->out_more = 0;
+ con_flag_set(con, CON_FLAG_WRITE_PENDING);
+}
+
static int prepare_write_connect(struct ceph_connection *con)
{
unsigned int global_seq = get_global_seq(con->msgr, 0);
int proto;
- int auth_proto;
- struct ceph_auth_handshake *auth;
+ int ret;
switch (con->peer_name.type) {
case CEPH_ENTITY_TYPE_MON:
@@ -1459,24 +1471,11 @@ static int prepare_write_connect(struct ceph_connection *con)
con->out_connect.protocol_version = cpu_to_le32(proto);
con->out_connect.flags = 0;
- auth_proto = CEPH_AUTH_UNKNOWN;
- auth = get_connect_authorizer(con, &auth_proto);
- if (IS_ERR(auth))
- return PTR_ERR(auth);
-
- con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
- con->out_connect.authorizer_len = auth ?
- cpu_to_le32(auth->authorizer_buf_len) : 0;
-
- con_out_kvec_add(con, sizeof (con->out_connect),
- &con->out_connect);
- if (auth && auth->authorizer_buf_len)
- con_out_kvec_add(con, auth->authorizer_buf_len,
- auth->authorizer_buf);
-
- con->out_more = 0;
- con_flag_set(con, CON_FLAG_WRITE_PENDING);
+ ret = get_connect_authorizer(con);
+ if (ret)
+ return ret;
+ __prepare_write_connect(con);
return 0;
}
@@ -1737,11 +1736,21 @@ static int read_partial_connect(struct ceph_connection *con)
if (ret <= 0)
goto out;
- size = le32_to_cpu(con->in_reply.authorizer_len);
- end += size;
- ret = read_partial(con, end, size, con->auth_reply_buf);
- if (ret <= 0)
- goto out;
+ if (con->auth) {
+ size = le32_to_cpu(con->in_reply.authorizer_len);
+ if (size > con->auth->authorizer_reply_buf_len) {
+ pr_err("authorizer reply too big: %d > %zu\n", size,
+ con->auth->authorizer_reply_buf_len);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ end += size;
+ ret = read_partial(con, end, size,
+ con->auth->authorizer_reply_buf);
+ if (ret <= 0)
+ goto out;
+ }
dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
con, (int)con->in_reply.tag,
@@ -1749,7 +1758,6 @@ static int read_partial_connect(struct ceph_connection *con)
le32_to_cpu(con->in_reply.global_seq));
out:
return ret;
-
}
/*
@@ -2033,13 +2041,28 @@ static int process_connect(struct ceph_connection *con)
dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
- if (con->auth_reply_buf) {
+ if (con->auth) {
/*
* Any connection that defines ->get_authorizer()
- * should also define ->verify_authorizer_reply().
+ * should also define ->add_authorizer_challenge() and
+ * ->verify_authorizer_reply().
+ *
* See get_connect_authorizer().
*/
- ret = con->ops->verify_authorizer_reply(con, 0);
+ if (con->in_reply.tag == CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
+ ret = con->ops->add_authorizer_challenge(
+ con, con->auth->authorizer_reply_buf,
+ le32_to_cpu(con->in_reply.authorizer_len));
+ if (ret < 0)
+ return ret;
+
+ con_out_kvec_reset(con);
+ __prepare_write_connect(con);
+ prepare_read_connect(con);
+ return 0;
+ }
+
+ ret = con->ops->verify_authorizer_reply(con);
if (ret < 0) {
con->error_msg = "bad authorize reply";
return ret;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 0ffeb60cfe67..70ccb0716fc5 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4478,14 +4478,24 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
return auth;
}
+static int add_authorizer_challenge(struct ceph_connection *con,
+ void *challenge_buf, int challenge_buf_len)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_osd_client *osdc = o->o_osdc;
+ struct ceph_auth_client *ac = osdc->client->monc.auth;
+
+ return ceph_auth_add_authorizer_challenge(ac, o->o_auth.authorizer,
+ challenge_buf, challenge_buf_len);
+}
-static int verify_authorizer_reply(struct ceph_connection *con, int len)
+static int verify_authorizer_reply(struct ceph_connection *con)
{
struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
- return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer, len);
+ return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer);
}
static int invalidate_authorizer(struct ceph_connection *con)
@@ -4519,6 +4529,7 @@ static const struct ceph_connection_operations osd_con_ops = {
.put = put_osd_con,
.dispatch = dispatch,
.get_authorizer = get_authorizer,
+ .add_authorizer_challenge = add_authorizer_challenge,
.verify_authorizer_reply = verify_authorizer_reply,
.invalidate_authorizer = invalidate_authorizer,
.alloc_msg = alloc_msg,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 4fa4011feec1..146502f310ce 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -754,8 +754,9 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
return -EINVAL;
}
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
- netdev_rx_csum_fault(skb->dev);
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
+ netdev_rx_csum_fault(NULL);
}
return 0;
fault:
diff --git a/net/core/dev.c b/net/core/dev.c
index b85e789044d5..071c589f7994 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1664,6 +1664,28 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);
+/**
+ * call_netdevice_notifiers_mtu - call all network notifier blocks
+ * @val: value passed unmodified to notifier function
+ * @dev: net_device pointer passed unmodified to notifier function
+ * @arg: additional u32 argument passed to the notifier function
+ *
+ * Call all network notifier blocks. Parameters and return value
+ * are as for raw_notifier_call_chain().
+ */
+static int call_netdevice_notifiers_mtu(unsigned long val,
+ struct net_device *dev, u32 arg)
+{
+ struct netdev_notifier_info_ext info = {
+ .info.dev = dev,
+ .ext.mtu = arg,
+ };
+
+ BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
+
+ return call_netdevice_notifiers_info(val, dev, &info.info);
+}
+
#ifdef CONFIG_NET_INGRESS
static struct static_key ingress_needed __read_mostly;
@@ -4734,6 +4756,10 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->vlan_tci = 0;
skb->dev = napi->dev;
skb->skb_iif = 0;
+
+ /* eth_type_trans() assumes pkt_type is PACKET_HOST */
+ skb->pkt_type = PACKET_HOST;
+
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
@@ -6589,14 +6615,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
err = __dev_set_mtu(dev, new_mtu);
if (!err) {
- err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+ err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
+ orig_mtu);
err = notifier_to_errno(err);
if (err) {
/* setting mtu back and notifying everyone again,
* so that they have a chance to revert changes.
*/
__dev_set_mtu(dev, orig_mtu);
- call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+ call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
+ new_mtu);
}
}
return err;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 7913771ec474..a8a9938aeceb 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2397,13 +2397,17 @@ roll_back:
return ret;
}
-static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr)
+static int ethtool_set_per_queue(struct net_device *dev,
+ void __user *useraddr, u32 sub_cmd)
{
struct ethtool_per_queue_op per_queue_opt;
if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt)))
return -EFAULT;
+ if (per_queue_opt.sub_command != sub_cmd)
+ return -EINVAL;
+
switch (per_queue_opt.sub_command) {
case ETHTOOL_GCOALESCE:
return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt);
@@ -2669,7 +2673,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
rc = ethtool_get_phy_stats(dev, useraddr);
break;
case ETHTOOL_PERQUEUE:
- rc = ethtool_set_per_queue(dev, useraddr);
+ rc = ethtool_set_per_queue(dev, useraddr, sub_cmd);
break;
case ETHTOOL_GLINKSETTINGS:
rc = ethtool_get_link_ksettings(dev, useraddr);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 862d63ec56e4..ab7c50026cae 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -538,8 +538,8 @@ ip_proto_again:
break;
}
- if (dissector_uses_key(flow_dissector,
- FLOW_DISSECTOR_KEY_PORTS)) {
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS) &&
+ !(key_control->flags & FLOW_DIS_IS_FRAGMENT)) {
key_ports = skb_flow_dissector_target(flow_dissector,
FLOW_DISSECTOR_KEY_PORTS,
target_container);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 46e8830c1979..2e4eef71471d 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -104,6 +104,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
iterate_fd(p->files, 0, update_classid_sock,
(void *)(unsigned long)cs->classid);
task_unlock(p);
+ cond_resched();
}
css_task_iter_end(&it);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 194e844e1021..928a0b84469d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2368,6 +2368,12 @@ struct net_device *rtnl_create_link(struct net *net,
else if (ops->get_num_rx_queues)
num_rx_queues = ops->get_num_rx_queues();
+ if (num_tx_queues < 1 || num_tx_queues > 4096)
+ return ERR_PTR(-EINVAL);
+
+ if (num_rx_queues < 1 || num_rx_queues > 4096)
+ return ERR_PTR(-EINVAL);
+
err = -ENOMEM;
dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
ops->setup, num_tx_queues, num_rx_queues);
@@ -2981,6 +2987,11 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
}
+ if (dev->type != ARPHRD_ETHER) {
+ pr_info("PF_BRIDGE: FDB add only supported for Ethernet devices");
+ return -EINVAL;
+ }
+
addr = nla_data(tb[NDA_LLADDR]);
err = fdb_vid_parse(tb[NDA_VLAN], &vid);
@@ -3084,6 +3095,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
}
+ if (dev->type != ARPHRD_ETHER) {
+ pr_info("PF_BRIDGE: FDB delete only supported for Ethernet devices");
+ return -EINVAL;
+ }
+
addr = nla_data(tb[NDA_LLADDR]);
err = fdb_vid_parse(tb[NDA_VLAN], &vid);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 84c731aef0d8..dca1fed0d7da 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1578,6 +1578,21 @@ done:
}
EXPORT_SYMBOL(___pskb_trim);
+/* Note : use pskb_trim_rcsum() instead of calling this directly
+ */
+int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
+{
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ int delta = skb->len - len;
+
+ skb->csum = csum_block_sub(skb->csum,
+ skb_checksum(skb, len, delta, 0),
+ len);
+ }
+ return __pskb_trim(skb, len);
+}
+EXPORT_SYMBOL(pskb_trim_rcsum_slow);
+
/**
* __pskb_pull_tail - advance tail of skb header
* @skb: buffer to reallocate
@@ -2425,20 +2440,27 @@ EXPORT_SYMBOL(skb_queue_purge);
/**
* skb_rbtree_purge - empty a skb rbtree
* @root: root of the rbtree to empty
+ * Return value: the sum of truesizes of all purged skbs.
*
* Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
* the list and one reference dropped. This function does not take
* any lock. Synchronization should be handled by the caller (e.g., TCP
* out-of-order queue is protected by the socket lock).
*/
-void skb_rbtree_purge(struct rb_root *root)
+unsigned int skb_rbtree_purge(struct rb_root *root)
{
- struct sk_buff *skb, *next;
+ struct rb_node *p = rb_first(root);
+ unsigned int sum = 0;
- rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode)
- kfree_skb(skb);
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
- *root = RB_ROOT;
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ sum += skb->truesize;
+ kfree_skb(skb);
+ }
+ return sum;
}
/**
@@ -4399,6 +4421,10 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
nf_reset(skb);
nf_reset_trace(skb);
+#ifdef CONFIG_NET_SWITCHDEV
+ skb->offload_fwd_mark = 0;
+#endif
+
if (!xnet)
return;
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 4a05d7876850..84ff43acd427 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -605,11 +605,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (sk->sk_state == DCCP_LISTEN) {
if (dh->dccph_type == DCCP_PKT_REQUEST) {
/* It is possible that we process SYN packets from backlog,
- * so we need to make sure to disable BH right there.
+ * so we need to make sure to disable BH and RCU right there.
*/
+ rcu_read_lock();
local_bh_disable();
acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0;
local_bh_enable();
+ rcu_read_unlock();
if (!acceptable)
return 1;
consume_skb(skb);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 6697b180e122..28ad6f187e19 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -493,9 +493,11 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr,
ireq->ir_rmt_addr);
+ rcu_read_lock();
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq_opt_deref(ireq));
+ rcu_dereference(ireq->ireq_opt));
+ rcu_read_unlock();
err = net_xmit_eval(err);
}
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index 5ac778962e4e..78916c510d9a 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -16,37 +16,19 @@ typedef unsigned __bitwise__ lowpan_rx_result;
#define LOWPAN_DISPATCH_FRAG1 0xc0
#define LOWPAN_DISPATCH_FRAGN 0xe0
-struct lowpan_create_arg {
+struct frag_lowpan_compare_key {
u16 tag;
u16 d_size;
- const struct ieee802154_addr *src;
- const struct ieee802154_addr *dst;
+ struct ieee802154_addr src;
+ struct ieee802154_addr dst;
};
-/* Equivalent of ipv4 struct ip
+/* Equivalent of ipv4 struct ipq
*/
struct lowpan_frag_queue {
struct inet_frag_queue q;
-
- u16 tag;
- u16 d_size;
- struct ieee802154_addr saddr;
- struct ieee802154_addr daddr;
};
-static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
-{
- switch (a->mode) {
- case IEEE802154_ADDR_LONG:
- return (((__force u64)a->extended_addr) >> 32) ^
- (((__force u64)a->extended_addr) & 0xffffffff);
- case IEEE802154_ADDR_SHORT:
- return (__force u32)(a->short_addr + (a->pan_id << 16));
- default:
- return 0;
- }
-}
-
int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
void lowpan_net_frag_exit(void);
int lowpan_net_frag_init(void);
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index f85b08baff16..aab1e2dfdfca 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -37,47 +37,15 @@ static struct inet_frags lowpan_frags;
static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
struct sk_buff *prev, struct net_device *ldev);
-static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
- const struct ieee802154_addr *saddr,
- const struct ieee802154_addr *daddr)
-{
- net_get_random_once(&lowpan_frags.rnd, sizeof(lowpan_frags.rnd));
- return jhash_3words(ieee802154_addr_hash(saddr),
- ieee802154_addr_hash(daddr),
- (__force u32)(tag + (d_size << 16)),
- lowpan_frags.rnd);
-}
-
-static unsigned int lowpan_hashfn(const struct inet_frag_queue *q)
-{
- const struct lowpan_frag_queue *fq;
-
- fq = container_of(q, struct lowpan_frag_queue, q);
- return lowpan_hash_frag(fq->tag, fq->d_size, &fq->saddr, &fq->daddr);
-}
-
-static bool lowpan_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct lowpan_frag_queue *fq;
- const struct lowpan_create_arg *arg = a;
-
- fq = container_of(q, struct lowpan_frag_queue, q);
- return fq->tag == arg->tag && fq->d_size == arg->d_size &&
- ieee802154_addr_equal(&fq->saddr, arg->src) &&
- ieee802154_addr_equal(&fq->daddr, arg->dst);
-}
-
static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
{
- const struct lowpan_create_arg *arg = a;
+ const struct frag_lowpan_compare_key *key = a;
struct lowpan_frag_queue *fq;
fq = container_of(q, struct lowpan_frag_queue, q);
- fq->tag = arg->tag;
- fq->d_size = arg->d_size;
- fq->saddr = *arg->src;
- fq->daddr = *arg->dst;
+ BUILD_BUG_ON(sizeof(*key) > sizeof(q->key));
+ memcpy(&q->key, key, sizeof(*key));
}
static void lowpan_frag_expire(unsigned long data)
@@ -93,10 +61,10 @@ static void lowpan_frag_expire(unsigned long data)
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
- inet_frag_kill(&fq->q, &lowpan_frags);
+ inet_frag_kill(&fq->q);
out:
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &lowpan_frags);
+ inet_frag_put(&fq->q);
}
static inline struct lowpan_frag_queue *
@@ -104,25 +72,20 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
const struct ieee802154_addr *src,
const struct ieee802154_addr *dst)
{
- struct inet_frag_queue *q;
- struct lowpan_create_arg arg;
- unsigned int hash;
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ struct frag_lowpan_compare_key key = {};
+ struct inet_frag_queue *q;
- arg.tag = cb->d_tag;
- arg.d_size = cb->d_size;
- arg.src = src;
- arg.dst = dst;
-
- hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
+ key.tag = cb->d_tag;
+ key.d_size = cb->d_size;
+ key.src = *src;
+ key.dst = *dst;
- q = inet_frag_find(&ieee802154_lowpan->frags,
- &lowpan_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&ieee802154_lowpan->frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct lowpan_frag_queue, q);
}
@@ -229,7 +192,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
struct sk_buff *fp, *head = fq->q.fragments;
int sum_truesize;
- inet_frag_kill(&fq->q, &lowpan_frags);
+ inet_frag_kill(&fq->q);
/* Make the one we just received the head. */
if (prev) {
@@ -408,7 +371,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
struct lowpan_frag_queue *fq;
struct net *net = dev_net(skb->dev);
struct lowpan_802154_cb *cb = lowpan_802154_cb(skb);
- struct ieee802154_hdr hdr;
+ struct ieee802154_hdr hdr = {};
int err;
if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
@@ -437,7 +400,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
ret = lowpan_frag_queue(fq, skb, frag_type);
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &lowpan_frags);
+ inet_frag_put(&fq->q);
return ret;
}
@@ -447,24 +410,22 @@ err:
}
#ifdef CONFIG_SYSCTL
-static int zero;
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
{
.procname = "6lowpanfrag_high_thresh",
.data = &init_net.ieee802154_lowpan.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
},
{
.procname = "6lowpanfrag_low_thresh",
.data = &init_net.ieee802154_lowpan.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
},
{
@@ -580,14 +541,20 @@ static int __net_init lowpan_frags_init_net(struct net *net)
{
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
+ int res;
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
+ ieee802154_lowpan->frags.f = &lowpan_frags;
- inet_frags_init_net(&ieee802154_lowpan->frags);
-
- return lowpan_frags_ns_sysctl_register(net);
+ res = inet_frags_init_net(&ieee802154_lowpan->frags);
+ if (res < 0)
+ return res;
+ res = lowpan_frags_ns_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&ieee802154_lowpan->frags);
+ return res;
}
static void __net_exit lowpan_frags_exit_net(struct net *net)
@@ -596,7 +563,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
net_ieee802154_lowpan(net);
lowpan_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&ieee802154_lowpan->frags, &lowpan_frags);
+ inet_frags_exit_net(&ieee802154_lowpan->frags);
}
static struct pernet_operations lowpan_frags_ops = {
@@ -604,32 +571,63 @@ static struct pernet_operations lowpan_frags_ops = {
.exit = lowpan_frags_exit_net,
};
-int __init lowpan_net_frag_init(void)
+static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed)
{
- int ret;
+ return jhash2(data,
+ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
- ret = lowpan_frags_sysctl_register();
- if (ret)
- return ret;
+static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
- ret = register_pernet_subsys(&lowpan_frags_ops);
- if (ret)
- goto err_pernet;
+ return jhash2((const u32 *)&fq->key,
+ sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed);
+}
+
+static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_lowpan_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params lowpan_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .hashfn = lowpan_key_hashfn,
+ .obj_hashfn = lowpan_obj_hashfn,
+ .obj_cmpfn = lowpan_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
+int __init lowpan_net_frag_init(void)
+{
+ int ret;
- lowpan_frags.hashfn = lowpan_hashfn;
lowpan_frags.constructor = lowpan_frag_init;
lowpan_frags.destructor = NULL;
lowpan_frags.qsize = sizeof(struct frag_queue);
- lowpan_frags.match = lowpan_frag_match;
lowpan_frags.frag_expire = lowpan_frag_expire;
lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
+ lowpan_frags.rhash_params = lowpan_rhash_params;
ret = inet_frags_init(&lowpan_frags);
if (ret)
- goto err_pernet;
+ goto out;
+ ret = lowpan_frags_sysctl_register();
+ if (ret)
+ goto err_sysctl;
+
+ ret = register_pernet_subsys(&lowpan_frags_ops);
+ if (ret)
+ goto err_pernet;
+out:
return ret;
err_pernet:
lowpan_frags_sysctl_unregister();
+err_sysctl:
+ inet_frags_fini(&lowpan_frags);
return ret;
}
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 65a15889d432..571d079e262f 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1512,7 +1512,7 @@ static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def,
*
* Description:
* Parse the packet's IP header looking for a CIPSO option. Returns a pointer
- * to the start of the CIPSO option on success, NULL if one if not found.
+ * to the start of the CIPSO option on success, NULL if one is not found.
*
*/
unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
@@ -1522,10 +1522,8 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
int optlen;
int taglen;
- for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) {
+ for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 1; ) {
switch (optptr[0]) {
- case IPOPT_CIPSO:
- return optptr;
case IPOPT_END:
return NULL;
case IPOPT_NOOP:
@@ -1534,6 +1532,11 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
default:
taglen = optptr[1];
}
+ if (!taglen || taglen > optlen)
+ return NULL;
+ if (optptr[0] == IPOPT_CIPSO)
+ return optptr;
+
optlen -= taglen;
optptr += taglen;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 6a2ef162088d..9364c39d0555 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1171,7 +1171,8 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct netdev_notifier_changeupper_info *info;
+ struct netdev_notifier_changeupper_info *upper_info = ptr;
+ struct netdev_notifier_info_ext *info_ext = ptr;
struct in_device *in_dev;
struct net *net = dev_net(dev);
unsigned int flags;
@@ -1206,16 +1207,19 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
fib_sync_up(dev, RTNH_F_LINKDOWN);
else
fib_sync_down_dev(dev, event, false);
- /* fall through */
+ rt_cache_flush(net);
+ break;
case NETDEV_CHANGEMTU:
+ fib_sync_mtu(dev, info_ext->ext.mtu);
rt_cache_flush(net);
break;
case NETDEV_CHANGEUPPER:
- info = ptr;
+ upper_info = ptr;
/* flush all routes if dev is linked to or unlinked from
* an L3 master device (e.g., VRF)
*/
- if (info->upper_dev && netif_is_l3_master(info->upper_dev))
+ if (upper_info->upper_dev &&
+ netif_is_l3_master(upper_info->upper_dev))
fib_disable_ip(dev, NETDEV_DOWN, true);
break;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index a88dab33cdf6..90c654012510 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1377,6 +1377,56 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local)
return ret;
}
+/* Update the PMTU of exceptions when:
+ * - the new MTU of the first hop becomes smaller than the PMTU
+ * - the old MTU was the same as the PMTU, and it limited discovery of
+ * larger MTUs on the path. With that limit raised, we can now
+ * discover larger MTUs
+ * A special case is locked exceptions, for which the PMTU is smaller
+ * than the minimal accepted PMTU:
+ * - if the new MTU is greater than the PMTU, don't make any change
+ * - otherwise, unlock and set PMTU
+ */
+static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
+{
+ struct fnhe_hash_bucket *bucket;
+ int i;
+
+ bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
+ if (!bucket)
+ return;
+
+ for (i = 0; i < FNHE_HASH_SIZE; i++) {
+ struct fib_nh_exception *fnhe;
+
+ for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
+ fnhe;
+ fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
+ if (fnhe->fnhe_mtu_locked) {
+ if (new <= fnhe->fnhe_pmtu) {
+ fnhe->fnhe_pmtu = new;
+ fnhe->fnhe_mtu_locked = false;
+ }
+ } else if (new < fnhe->fnhe_pmtu ||
+ orig == fnhe->fnhe_pmtu) {
+ fnhe->fnhe_pmtu = new;
+ }
+ }
+ }
+}
+
+void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
+{
+ unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+ struct hlist_head *head = &fib_info_devhash[hash];
+ struct fib_nh *nh;
+
+ hlist_for_each_entry(nh, head, nh_hash) {
+ if (nh->nh_dev == dev)
+ nh_update_mtu(nh, dev->mtu, orig_mtu);
+ }
+}
+
/* Event force Flags Description
* NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host
* NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d1cab49393e2..528a6777cda0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -410,7 +410,8 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
struct ip_options_rcu *opt;
struct rtable *rt;
- opt = ireq_opt_deref(ireq);
+ rcu_read_lock();
+ opt = rcu_dereference(ireq->ireq_opt);
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
@@ -424,11 +425,13 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
+ rcu_read_unlock();
return &rt->dst;
route_err:
ip_rt_put(rt);
no_route:
+ rcu_read_unlock();
__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index f8b41aaac76f..5a8c26c9872d 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,12 +25,6 @@
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
-#define INETFRAGS_EVICT_BUCKETS 128
-#define INETFRAGS_EVICT_MAX 512
-
-/* don't rebuild inetfrag table with new secret more often than this */
-#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
-
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
* Value : 0xff if frame should be dropped.
* 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -52,157 +46,8 @@ const u8 ip_frag_ecn_table[16] = {
};
EXPORT_SYMBOL(ip_frag_ecn_table);
-static unsigned int
-inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
-{
- return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
-}
-
-static bool inet_frag_may_rebuild(struct inet_frags *f)
-{
- return time_after(jiffies,
- f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
-}
-
-static void inet_frag_secret_rebuild(struct inet_frags *f)
-{
- int i;
-
- write_seqlock_bh(&f->rnd_seqlock);
-
- if (!inet_frag_may_rebuild(f))
- goto out;
-
- get_random_bytes(&f->rnd, sizeof(u32));
-
- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
- struct inet_frag_bucket *hb;
- struct inet_frag_queue *q;
- struct hlist_node *n;
-
- hb = &f->hash[i];
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(q, n, &hb->chain, list) {
- unsigned int hval = inet_frag_hashfn(f, q);
-
- if (hval != i) {
- struct inet_frag_bucket *hb_dest;
-
- hlist_del(&q->list);
-
- /* Relink to new hash chain. */
- hb_dest = &f->hash[hval];
-
- /* This is the only place where we take
- * another chain_lock while already holding
- * one. As this will not run concurrently,
- * we cannot deadlock on hb_dest lock below, if its
- * already locked it will be released soon since
- * other caller cannot be waiting for hb lock
- * that we've taken above.
- */
- spin_lock_nested(&hb_dest->chain_lock,
- SINGLE_DEPTH_NESTING);
- hlist_add_head(&q->list, &hb_dest->chain);
- spin_unlock(&hb_dest->chain_lock);
- }
- }
- spin_unlock(&hb->chain_lock);
- }
-
- f->rebuild = false;
- f->last_rebuild_jiffies = jiffies;
-out:
- write_sequnlock_bh(&f->rnd_seqlock);
-}
-
-static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
-{
- if (!hlist_unhashed(&q->list_evictor))
- return false;
-
- return q->net->low_thresh == 0 ||
- frag_mem_limit(q->net) >= q->net->low_thresh;
-}
-
-static unsigned int
-inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
-{
- struct inet_frag_queue *fq;
- struct hlist_node *n;
- unsigned int evicted = 0;
- HLIST_HEAD(expired);
-
- spin_lock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
- if (!inet_fragq_should_evict(fq))
- continue;
-
- if (!del_timer(&fq->timer))
- continue;
-
- hlist_add_head(&fq->list_evictor, &expired);
- ++evicted;
- }
-
- spin_unlock(&hb->chain_lock);
-
- hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
- f->frag_expire((unsigned long) fq);
-
- return evicted;
-}
-
-static void inet_frag_worker(struct work_struct *work)
-{
- unsigned int budget = INETFRAGS_EVICT_BUCKETS;
- unsigned int i, evicted = 0;
- struct inet_frags *f;
-
- f = container_of(work, struct inet_frags, frags_work);
-
- BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
-
- local_bh_disable();
-
- for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
- evicted += inet_evict_bucket(f, &f->hash[i]);
- i = (i + 1) & (INETFRAGS_HASHSZ - 1);
- if (evicted > INETFRAGS_EVICT_MAX)
- break;
- }
-
- f->next_bucket = i;
-
- local_bh_enable();
-
- if (f->rebuild && inet_frag_may_rebuild(f))
- inet_frag_secret_rebuild(f);
-}
-
-static void inet_frag_schedule_worker(struct inet_frags *f)
-{
- if (unlikely(!work_pending(&f->frags_work)))
- schedule_work(&f->frags_work);
-}
-
int inet_frags_init(struct inet_frags *f)
{
- int i;
-
- INIT_WORK(&f->frags_work, inet_frag_worker);
-
- for (i = 0; i < INETFRAGS_HASHSZ; i++) {
- struct inet_frag_bucket *hb = &f->hash[i];
-
- spin_lock_init(&hb->chain_lock);
- INIT_HLIST_HEAD(&hb->chain);
- }
-
- seqlock_init(&f->rnd_seqlock);
- f->last_rebuild_jiffies = 0;
f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
NULL);
if (!f->frags_cachep)
@@ -214,83 +59,75 @@ EXPORT_SYMBOL(inet_frags_init);
void inet_frags_fini(struct inet_frags *f)
{
- cancel_work_sync(&f->frags_work);
+ /* We must wait that all inet_frag_destroy_rcu() have completed. */
+ rcu_barrier();
+
kmem_cache_destroy(f->frags_cachep);
+ f->frags_cachep = NULL;
}
EXPORT_SYMBOL(inet_frags_fini);
-void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
+static void inet_frags_free_cb(void *ptr, void *arg)
{
- unsigned int seq;
- int i;
+ struct inet_frag_queue *fq = ptr;
- nf->low_thresh = 0;
-
-evict_again:
- local_bh_disable();
- seq = read_seqbegin(&f->rnd_seqlock);
-
- for (i = 0; i < INETFRAGS_HASHSZ ; i++)
- inet_evict_bucket(f, &f->hash[i]);
-
- local_bh_enable();
- cond_resched();
-
- if (read_seqretry(&f->rnd_seqlock, seq) ||
- sum_frag_mem_limit(nf))
- goto evict_again;
-}
-EXPORT_SYMBOL(inet_frags_exit_net);
-
-static struct inet_frag_bucket *
-get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
-__acquires(hb->chain_lock)
-{
- struct inet_frag_bucket *hb;
- unsigned int seq, hash;
-
- restart:
- seq = read_seqbegin(&f->rnd_seqlock);
-
- hash = inet_frag_hashfn(f, fq);
- hb = &f->hash[hash];
+ /* If we can not cancel the timer, it means this frag_queue
+ * is already disappearing, we have nothing to do.
+ * Otherwise, we own a refcount until the end of this function.
+ */
+ if (!del_timer(&fq->timer))
+ return;
- spin_lock(&hb->chain_lock);
- if (read_seqretry(&f->rnd_seqlock, seq)) {
- spin_unlock(&hb->chain_lock);
- goto restart;
+ spin_lock_bh(&fq->lock);
+ if (!(fq->flags & INET_FRAG_COMPLETE)) {
+ fq->flags |= INET_FRAG_COMPLETE;
+ atomic_dec(&fq->refcnt);
}
+ spin_unlock_bh(&fq->lock);
- return hb;
+ inet_frag_put(fq);
}
-static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
+void inet_frags_exit_net(struct netns_frags *nf)
{
- struct inet_frag_bucket *hb;
+ nf->low_thresh = 0; /* prevent creation of new frags */
- hb = get_frag_bucket_locked(fq, f);
- hlist_del(&fq->list);
- fq->flags |= INET_FRAG_COMPLETE;
- spin_unlock(&hb->chain_lock);
+ rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
}
+EXPORT_SYMBOL(inet_frags_exit_net);
-void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
+void inet_frag_kill(struct inet_frag_queue *fq)
{
if (del_timer(&fq->timer))
atomic_dec(&fq->refcnt);
if (!(fq->flags & INET_FRAG_COMPLETE)) {
- fq_unlink(fq, f);
+ struct netns_frags *nf = fq->net;
+
+ fq->flags |= INET_FRAG_COMPLETE;
+ rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
atomic_dec(&fq->refcnt);
}
}
EXPORT_SYMBOL(inet_frag_kill);
-void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
+static void inet_frag_destroy_rcu(struct rcu_head *head)
+{
+ struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
+ rcu);
+ struct inet_frags *f = q->net->f;
+
+ if (f->destructor)
+ f->destructor(q);
+ kmem_cache_free(f->frags_cachep, q);
+}
+
+void inet_frag_destroy(struct inet_frag_queue *q)
{
struct sk_buff *fp;
struct netns_frags *nf;
unsigned int sum, sum_truesize = 0;
+ struct inet_frags *f;
WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
WARN_ON(del_timer(&q->timer) != 0);
@@ -298,64 +135,35 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
/* Release all fragment data. */
fp = q->fragments;
nf = q->net;
- while (fp) {
- struct sk_buff *xp = fp->next;
-
- sum_truesize += fp->truesize;
- kfree_skb(fp);
- fp = xp;
+ f = nf->f;
+ if (fp) {
+ do {
+ struct sk_buff *xp = fp->next;
+
+ sum_truesize += fp->truesize;
+ kfree_skb(fp);
+ fp = xp;
+ } while (fp);
+ } else {
+ sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
}
sum = sum_truesize + f->qsize;
- if (f->destructor)
- f->destructor(q);
- kmem_cache_free(f->frags_cachep, q);
+ call_rcu(&q->rcu, inet_frag_destroy_rcu);
sub_frag_mem_limit(nf, sum);
}
EXPORT_SYMBOL(inet_frag_destroy);
-static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
- struct inet_frag_queue *qp_in,
- struct inet_frags *f,
- void *arg)
-{
- struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
- struct inet_frag_queue *qp;
-
-#ifdef CONFIG_SMP
- /* With SMP race we have to recheck hash table, because
- * such entry could have been created on other cpu before
- * we acquired hash bucket lock.
- */
- hlist_for_each_entry(qp, &hb->chain, list) {
- if (qp->net == nf && f->match(qp, arg)) {
- atomic_inc(&qp->refcnt);
- spin_unlock(&hb->chain_lock);
- qp_in->flags |= INET_FRAG_COMPLETE;
- inet_frag_put(qp_in, f);
- return qp;
- }
- }
-#endif
- qp = qp_in;
- if (!mod_timer(&qp->timer, jiffies + nf->timeout))
- atomic_inc(&qp->refcnt);
-
- atomic_inc(&qp->refcnt);
- hlist_add_head(&qp->list, &hb->chain);
-
- spin_unlock(&hb->chain_lock);
-
- return qp;
-}
-
static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
struct inet_frags *f,
void *arg)
{
struct inet_frag_queue *q;
+ if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
+ return NULL;
+
q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
if (!q)
return NULL;
@@ -366,75 +174,52 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
- atomic_set(&q->refcnt, 1);
+ atomic_set(&q->refcnt, 3);
return q;
}
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
- struct inet_frags *f,
- void *arg)
+ void *arg,
+ struct inet_frag_queue **prev)
{
+ struct inet_frags *f = nf->f;
struct inet_frag_queue *q;
q = inet_frag_alloc(nf, f, arg);
- if (!q)
- return NULL;
-
- return inet_frag_intern(nf, q, f, arg);
-}
-
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
- struct inet_frags *f, void *key,
- unsigned int hash)
-{
- struct inet_frag_bucket *hb;
- struct inet_frag_queue *q;
- int depth = 0;
-
- if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) {
- inet_frag_schedule_worker(f);
+ if (!q) {
+ *prev = ERR_PTR(-ENOMEM);
return NULL;
}
-
- if (frag_mem_limit(nf) > nf->low_thresh)
- inet_frag_schedule_worker(f);
-
- hash &= (INETFRAGS_HASHSZ - 1);
- hb = &f->hash[hash];
-
- spin_lock(&hb->chain_lock);
- hlist_for_each_entry(q, &hb->chain, list) {
- if (q->net == nf && f->match(q, key)) {
- atomic_inc(&q->refcnt);
- spin_unlock(&hb->chain_lock);
- return q;
- }
- depth++;
- }
- spin_unlock(&hb->chain_lock);
-
- if (depth <= INETFRAGS_MAXDEPTH)
- return inet_frag_create(nf, f, key);
-
- if (inet_frag_may_rebuild(f)) {
- if (!f->rebuild)
- f->rebuild = true;
- inet_frag_schedule_worker(f);
+ mod_timer(&q->timer, jiffies + nf->timeout);
+
+ *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
+ &q->node, f->rhash_params);
+ if (*prev) {
+ q->flags |= INET_FRAG_COMPLETE;
+ inet_frag_kill(q);
+ inet_frag_destroy(q);
+ return NULL;
}
-
- return ERR_PTR(-ENOBUFS);
+ return q;
}
-EXPORT_SYMBOL(inet_frag_find);
+EXPORT_SYMBOL(inet_frag_create);
-void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q,
- const char *prefix)
+/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
+struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
{
- static const char msg[] = "inet_frag_find: Fragment hash bucket"
- " list length grew over limit " __stringify(INETFRAGS_MAXDEPTH)
- ". Dropping fragment.\n";
+ struct inet_frag_queue *fq = NULL, *prev;
- if (PTR_ERR(q) == -ENOBUFS)
- net_dbg_ratelimited("%s%s", prefix, msg);
+ rcu_read_lock();
+ prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+ if (!prev)
+ fq = inet_frag_create(nf, key, &prev);
+ if (prev && !IS_ERR(prev)) {
+ fq = prev;
+ if (!atomic_inc_not_zero(&fq->refcnt))
+ fq = NULL;
+ }
+ rcu_read_unlock();
+ return fq;
}
-EXPORT_SYMBOL(inet_frag_maybe_warn_overflow);
+EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 752711cd4834..80e48f40c3a8 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -56,27 +56,64 @@
*/
static const char ip_frag_cache_name[] = "ip4-frags";
-struct ipfrag_skb_cb
-{
+/* Use skb->cb to track consecutive/adjacent fragments coming at
+ * the end of the queue. Nodes in the rb-tree queue will
+ * contain "runs" of one or more adjacent fragments.
+ *
+ * Invariants:
+ * - next_frag is NULL at the tail of a "run";
+ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+ */
+struct ipfrag_skb_cb {
struct inet_skb_parm h;
- int offset;
+ struct sk_buff *next_frag;
+ int frag_run_len;
};
-#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
+
+static void ip4_frag_init_run(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+
+ FRAG_CB(skb)->next_frag = NULL;
+ FRAG_CB(skb)->frag_run_len = skb->len;
+}
+
+/* Append skb to the last "run". */
+static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
+ struct sk_buff *skb)
+{
+ RB_CLEAR_NODE(&skb->rbnode);
+ FRAG_CB(skb)->next_frag = NULL;
+
+ FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+ FRAG_CB(q->fragments_tail)->next_frag = skb;
+ q->fragments_tail = skb;
+}
+
+/* Create a new "run" with the skb. */
+static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
+{
+ if (q->last_run_head)
+ rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+ &q->last_run_head->rbnode.rb_right);
+ else
+ rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+ rb_insert_color(&skb->rbnode, &q->rb_fragments);
+
+ ip4_frag_init_run(skb);
+ q->fragments_tail = skb;
+ q->last_run_head = skb;
+}
/* Describe an entry in the "incomplete datagrams" queue. */
struct ipq {
struct inet_frag_queue q;
- u32 user;
- __be32 saddr;
- __be32 daddr;
- __be16 id;
- u8 protocol;
u8 ecn; /* RFC3168 support */
u16 max_df_size; /* largest frag with DF set seen */
int iif;
- int vif; /* L3 master device index */
unsigned int rid;
struct inet_peer *peer;
};
@@ -88,49 +125,9 @@ static u8 ip4_frag_ecn(u8 tos)
static struct inet_frags ip4_frags;
-int ip_frag_mem(struct net *net)
-{
- return sum_frag_mem_limit(&net->ipv4.frags);
-}
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev);
-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
- struct net_device *dev);
-
-struct ip4_create_arg {
- struct iphdr *iph;
- u32 user;
- int vif;
-};
-
-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
-{
- net_get_random_once(&ip4_frags.rnd, sizeof(ip4_frags.rnd));
- return jhash_3words((__force u32)id << 16 | prot,
- (__force u32)saddr, (__force u32)daddr,
- ip4_frags.rnd);
-}
-
-static unsigned int ip4_hashfn(const struct inet_frag_queue *q)
-{
- const struct ipq *ipq;
-
- ipq = container_of(q, struct ipq, q);
- return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
-}
-
-static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct ipq *qp;
- const struct ip4_create_arg *arg = a;
-
- qp = container_of(q, struct ipq, q);
- return qp->id == arg->iph->id &&
- qp->saddr == arg->iph->saddr &&
- qp->daddr == arg->iph->daddr &&
- qp->protocol == arg->iph->protocol &&
- qp->user == arg->user &&
- qp->vif == arg->vif;
-}
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
@@ -139,17 +136,12 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
frags);
struct net *net = container_of(ipv4, struct net, ipv4);
- const struct ip4_create_arg *arg = a;
+ const struct frag_v4_compare_key *key = a;
- qp->protocol = arg->iph->protocol;
- qp->id = arg->iph->id;
- qp->ecn = ip4_frag_ecn(arg->iph->tos);
- qp->saddr = arg->iph->saddr;
- qp->daddr = arg->iph->daddr;
- qp->vif = arg->vif;
- qp->user = arg->user;
+ q->key.v4 = *key;
+ qp->ecn = 0;
qp->peer = q->net->max_dist ?
- inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) :
+ inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
NULL;
}
@@ -167,7 +159,7 @@ static void ip4_frag_free(struct inet_frag_queue *q)
static void ipq_put(struct ipq *ipq)
{
- inet_frag_put(&ipq->q, &ip4_frags);
+ inet_frag_put(&ipq->q);
}
/* Kill ipq entry. It is not destroyed immediately,
@@ -175,7 +167,7 @@ static void ipq_put(struct ipq *ipq)
*/
static void ipq_kill(struct ipq *ipq)
{
- inet_frag_kill(&ipq->q, &ip4_frags);
+ inet_frag_kill(&ipq->q);
}
static bool frag_expire_skip_icmp(u32 user)
@@ -192,8 +184,11 @@ static bool frag_expire_skip_icmp(u32 user)
*/
static void ip_expire(unsigned long arg)
{
- struct ipq *qp;
+ const struct iphdr *iph;
+ struct sk_buff *head = NULL;
struct net *net;
+ struct ipq *qp;
+ int err;
qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
@@ -206,51 +201,65 @@ static void ip_expire(unsigned long arg)
ipq_kill(qp);
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
+ __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
- if (!inet_frag_evicting(&qp->q)) {
- struct sk_buff *clone, *head = qp->q.fragments;
- const struct iphdr *iph;
- int err;
-
- __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT);
+ if (!(qp->q.flags & INET_FRAG_FIRST_IN))
+ goto out;
- if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
+ /* sk_buff::dev and sk_buff::rbnode are unionized. So we
+ * pull the head out of the tree in order to be able to
+ * deal with head->dev.
+ */
+ if (qp->q.fragments) {
+ head = qp->q.fragments;
+ qp->q.fragments = head->next;
+ } else {
+ head = skb_rb_first(&qp->q.rb_fragments);
+ if (!head)
goto out;
+ if (FRAG_CB(head)->next_frag)
+ rb_replace_node(&head->rbnode,
+ &FRAG_CB(head)->next_frag->rbnode,
+ &qp->q.rb_fragments);
+ else
+ rb_erase(&head->rbnode, &qp->q.rb_fragments);
+ memset(&head->rbnode, 0, sizeof(head->rbnode));
+ barrier();
+ }
+ if (head == qp->q.fragments_tail)
+ qp->q.fragments_tail = NULL;
- head->dev = dev_get_by_index_rcu(net, qp->iif);
- if (!head->dev)
- goto out;
+ sub_frag_mem_limit(qp->q.net, head->truesize);
+
+ head->dev = dev_get_by_index_rcu(net, qp->iif);
+ if (!head->dev)
+ goto out;
- /* skb has no dst, perform route lookup again */
- iph = ip_hdr(head);
- err = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ /* skb has no dst, perform route lookup again */
+ iph = ip_hdr(head);
+ err = ip_route_input_noref(head, iph->daddr, iph->saddr,
iph->tos, head->dev);
- if (err)
- goto out;
+ if (err)
+ goto out;
- /* Only an end host needs to send an ICMP
- * "Fragment Reassembly Timeout" message, per RFC792.
- */
- if (frag_expire_skip_icmp(qp->user) &&
- (skb_rtable(head)->rt_type != RTN_LOCAL))
- goto out;
+ /* Only an end host needs to send an ICMP
+ * "Fragment Reassembly Timeout" message, per RFC792.
+ */
+ if (frag_expire_skip_icmp(qp->q.key.v4.user) &&
+ (skb_rtable(head)->rt_type != RTN_LOCAL))
+ goto out;
- clone = skb_clone(head, GFP_ATOMIC);
+ spin_unlock(&qp->q.lock);
+ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+ goto out_rcu_unlock;
- /* Send an ICMP "Fragment Reassembly Timeout" message. */
- if (clone) {
- spin_unlock(&qp->q.lock);
- icmp_send(clone, ICMP_TIME_EXCEEDED,
- ICMP_EXC_FRAGTIME, 0);
- consume_skb(clone);
- goto out_rcu_unlock;
- }
- }
out:
spin_unlock(&qp->q.lock);
out_rcu_unlock:
rcu_read_unlock();
+ if (head)
+ kfree_skb(head);
ipq_put(qp);
}
@@ -260,21 +269,20 @@ out_rcu_unlock:
static struct ipq *ip_find(struct net *net, struct iphdr *iph,
u32 user, int vif)
{
+ struct frag_v4_compare_key key = {
+ .saddr = iph->saddr,
+ .daddr = iph->daddr,
+ .user = user,
+ .vif = vif,
+ .id = iph->id,
+ .protocol = iph->protocol,
+ };
struct inet_frag_queue *q;
- struct ip4_create_arg arg;
- unsigned int hash;
- arg.iph = iph;
- arg.user = user;
- arg.vif = vif;
-
- hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
-
- q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&net->ipv4.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct ipq, q);
}
@@ -294,7 +302,7 @@ static int ip_frag_too_far(struct ipq *qp)
end = atomic_inc_return(&peer->rid);
qp->rid = end;
- rc = qp->q.fragments && (end - start) > max;
+ rc = qp->q.fragments_tail && (end - start) > max;
if (rc) {
struct net *net;
@@ -308,7 +316,6 @@ static int ip_frag_too_far(struct ipq *qp)
static int ip_frag_reinit(struct ipq *qp)
{
- struct sk_buff *fp;
unsigned int sum_truesize = 0;
if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
@@ -316,21 +323,16 @@ static int ip_frag_reinit(struct ipq *qp)
return -ETIMEDOUT;
}
- fp = qp->q.fragments;
- do {
- struct sk_buff *xp = fp->next;
-
- sum_truesize += fp->truesize;
- kfree_skb(fp);
- fp = xp;
- } while (fp);
+ sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
sub_frag_mem_limit(qp->q.net, sum_truesize);
qp->q.flags = 0;
qp->q.len = 0;
qp->q.meat = 0;
qp->q.fragments = NULL;
+ qp->q.rb_fragments = RB_ROOT;
qp->q.fragments_tail = NULL;
+ qp->q.last_run_head = NULL;
qp->iif = 0;
qp->ecn = 0;
@@ -340,7 +342,9 @@ static int ip_frag_reinit(struct ipq *qp)
/* Add new segment to existing queue. */
static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
- struct sk_buff *prev, *next;
+ struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ struct rb_node **rbn, *parent;
+ struct sk_buff *skb1, *prev_tail;
struct net_device *dev;
unsigned int fragsize;
int flags, offset;
@@ -403,99 +407,61 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
if (err)
goto err;
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
- prev = qp->q.fragments_tail;
- if (!prev || FRAG_CB(prev)->offset < offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = qp->q.fragments; next != NULL; next = next->next) {
- if (FRAG_CB(next)->offset >= offset)
- break; /* bingo! */
- prev = next;
- }
-
-found:
- /* We found where to put this one. Check for overlap with
- * preceding fragment, and, if needed, align things so that
- * any overlaps are eliminated.
+ /* Note : skb->rbnode and skb->dev share the same location. */
+ dev = skb->dev;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+
+ /* RFC5722, Section 4, amended by Errata ID : 3089
+ * When reassembling an IPv6 datagram, if
+ * one or more its constituent fragments is determined to be an
+ * overlapping fragment, the entire datagram (and any constituent
+ * fragments) MUST be silently discarded.
+ *
+ * We do the same here for IPv4 (and increment an snmp counter).
*/
- if (prev) {
- int i = (FRAG_CB(prev)->offset + prev->len) - offset;
- if (i > 0) {
- offset += i;
- err = -EINVAL;
- if (end <= offset)
- goto err;
- err = -ENOMEM;
- if (!pskb_pull(skb, i))
- goto err;
- if (skb->ip_summed != CHECKSUM_UNNECESSARY)
- skb->ip_summed = CHECKSUM_NONE;
- }
- }
-
- err = -ENOMEM;
-
- while (next && FRAG_CB(next)->offset < end) {
- int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
-
- if (i < next->len) {
- int delta = -next->truesize;
-
- /* Eat head of the next overlapped fragment
- * and leave the loop. The next ones cannot overlap.
- */
- if (!pskb_pull(next, i))
- goto err;
- delta += next->truesize;
- if (delta)
- add_frag_mem_limit(qp->q.net, delta);
- FRAG_CB(next)->offset += i;
- qp->q.meat -= i;
- if (next->ip_summed != CHECKSUM_UNNECESSARY)
- next->ip_summed = CHECKSUM_NONE;
- break;
- } else {
- struct sk_buff *free_it = next;
-
- /* Old fragment is completely overridden with
- * new one drop it.
- */
- next = next->next;
-
- if (prev)
- prev->next = next;
- else
- qp->q.fragments = next;
-
- qp->q.meat -= free_it->len;
- sub_frag_mem_limit(qp->q.net, free_it->truesize);
- kfree_skb(free_it);
- }
+ /* Find out where to put this fragment. */
+ prev_tail = qp->q.fragments_tail;
+ if (!prev_tail)
+ ip4_frag_create_run(&qp->q, skb); /* First fragment. */
+ else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
+ /* This is the common case: skb goes to the end. */
+ /* Detect and discard overlaps. */
+ if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
+ goto discard_qp;
+ if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
+ ip4_frag_append_to_last_run(&qp->q, skb);
+ else
+ ip4_frag_create_run(&qp->q, skb);
+ } else {
+ /* Binary search. Note that skb can become the first fragment,
+ * but not the last (covered above).
+ */
+ rbn = &qp->q.rb_fragments.rb_node;
+ do {
+ parent = *rbn;
+ skb1 = rb_to_skb(parent);
+ if (end <= skb1->ip_defrag_offset)
+ rbn = &parent->rb_left;
+ else if (offset >= skb1->ip_defrag_offset +
+ FRAG_CB(skb1)->frag_run_len)
+ rbn = &parent->rb_right;
+ else /* Found an overlap with skb1. */
+ goto discard_qp;
+ } while (*rbn);
+ /* Here we have parent properly set, and rbn pointing to
+ * one of its NULL left/right children. Insert skb.
+ */
+ ip4_frag_init_run(skb);
+ rb_link_node(&skb->rbnode, parent, rbn);
+ rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
}
- FRAG_CB(skb)->offset = offset;
-
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- qp->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- qp->q.fragments = skb;
-
- dev = skb->dev;
- if (dev) {
+ if (dev)
qp->iif = dev->ifindex;
- skb->dev = NULL;
- }
+ skb->ip_defrag_offset = offset;
+
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
@@ -517,7 +483,7 @@ found:
unsigned long orefdst = skb->_skb_refdst;
skb->_skb_refdst = 0UL;
- err = ip_frag_reasm(qp, prev, dev);
+ err = ip_frag_reasm(qp, skb, prev_tail, dev);
skb->_skb_refdst = orefdst;
return err;
}
@@ -525,20 +491,24 @@ found:
skb_dst_drop(skb);
return -EINPROGRESS;
+discard_qp:
+ inet_frag_kill(&qp->q);
+ err = -EINVAL;
+ __IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
err:
kfree_skb(skb);
return err;
}
-
/* Build a new IP datagram from all its fragments. */
-
-static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
- struct net_device *dev)
+static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev)
{
struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
struct iphdr *iph;
- struct sk_buff *fp, *head = qp->q.fragments;
+ struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
+ struct sk_buff **nextp; /* To build frag_list. */
+ struct rb_node *rbn;
int len;
int ihlen;
int err;
@@ -552,26 +522,27 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
goto out_fail;
}
/* Make the one we just received the head. */
- if (prev) {
- head = prev->next;
- fp = skb_clone(head, GFP_ATOMIC);
+ if (head != skb) {
+ fp = skb_clone(skb, GFP_ATOMIC);
if (!fp)
goto out_nomem;
-
- fp->next = head->next;
- if (!fp->next)
+ FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
+ if (RB_EMPTY_NODE(&skb->rbnode))
+ FRAG_CB(prev_tail)->next_frag = fp;
+ else
+ rb_replace_node(&skb->rbnode, &fp->rbnode,
+ &qp->q.rb_fragments);
+ if (qp->q.fragments_tail == skb)
qp->q.fragments_tail = fp;
- prev->next = fp;
-
- skb_morph(head, qp->q.fragments);
- head->next = qp->q.fragments->next;
-
- consume_skb(qp->q.fragments);
- qp->q.fragments = head;
+ skb_morph(skb, head);
+ FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+ rb_replace_node(&head->rbnode, &skb->rbnode,
+ &qp->q.rb_fragments);
+ consume_skb(head);
+ head = skb;
}
- WARN_ON(!head);
- WARN_ON(FRAG_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
/* Allocate a new buffer for the datagram. */
ihlen = ip_hdrlen(head);
@@ -595,35 +566,61 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
clone = alloc_skb(0, GFP_ATOMIC);
if (!clone)
goto out_nomem;
- clone->next = head->next;
- head->next = clone;
skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
skb_frag_list_init(head);
for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
clone->len = clone->data_len = head->data_len - plen;
- head->data_len -= clone->len;
- head->len -= clone->len;
+ head->truesize += clone->truesize;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
add_frag_mem_limit(qp->q.net, clone->truesize);
+ skb_shinfo(head)->frag_list = clone;
+ nextp = &clone->next;
+ } else {
+ nextp = &skb_shinfo(head)->frag_list;
}
- skb_shinfo(head)->frag_list = head->next;
skb_push(head, head->data - skb_network_header(head));
- for (fp=head->next; fp; fp = fp->next) {
- head->data_len += fp->len;
- head->len += fp->len;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
+ /* Traverse the tree in order, to build frag_list. */
+ fp = FRAG_CB(head)->next_frag;
+ rbn = rb_next(&head->rbnode);
+ rb_erase(&head->rbnode, &qp->q.rb_fragments);
+ while (rbn || fp) {
+ /* fp points to the next sk_buff in the current run;
+ * rbn points to the next run.
+ */
+ /* Go through the current run. */
+ while (fp) {
+ *nextp = fp;
+ nextp = &fp->next;
+ fp->prev = NULL;
+ memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+ fp->sk = NULL;
+ head->data_len += fp->len;
+ head->len += fp->len;
+ if (head->ip_summed != fp->ip_summed)
+ head->ip_summed = CHECKSUM_NONE;
+ else if (head->ip_summed == CHECKSUM_COMPLETE)
+ head->csum = csum_add(head->csum, fp->csum);
+ head->truesize += fp->truesize;
+ fp = FRAG_CB(fp)->next_frag;
+ }
+ /* Move to the next run. */
+ if (rbn) {
+ struct rb_node *rbnext = rb_next(rbn);
+
+ fp = rb_to_skb(rbn);
+ rb_erase(rbn, &qp->q.rb_fragments);
+ rbn = rbnext;
+ }
}
sub_frag_mem_limit(qp->q.net, head->truesize);
+ *nextp = NULL;
head->next = NULL;
+ head->prev = NULL;
head->dev = dev;
head->tstamp = qp->q.stamp;
IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
@@ -651,7 +648,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
__IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
qp->q.fragments = NULL;
+ qp->q.rb_fragments = RB_ROOT;
qp->q.fragments_tail = NULL;
+ qp->q.last_run_head = NULL;
return 0;
out_nomem:
@@ -659,7 +658,7 @@ out_nomem:
err = -ENOMEM;
goto out_fail;
out_oversize:
- net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
+ net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->q.key.v4.saddr);
out_fail:
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
return err;
@@ -719,10 +718,14 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
if (ip_is_fragment(&iph)) {
skb = skb_share_check(skb, GFP_ATOMIC);
if (skb) {
- if (!pskb_may_pull(skb, netoff + iph.ihl * 4))
- return skb;
- if (pskb_trim_rcsum(skb, netoff + len))
- return skb;
+ if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+ if (pskb_trim_rcsum(skb, netoff + len)) {
+ kfree_skb(skb);
+ return NULL;
+ }
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
if (ip_defrag(net, skb, user))
return NULL;
@@ -733,25 +736,46 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
}
EXPORT_SYMBOL(ip_check_defrag);
+unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+{
+ struct rb_node *p = rb_first(root);
+ unsigned int sum = 0;
+
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ while (skb) {
+ struct sk_buff *next = FRAG_CB(skb)->next_frag;
+
+ sum += skb->truesize;
+ kfree_skb(skb);
+ skb = next;
+ }
+ }
+ return sum;
+}
+EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
#ifdef CONFIG_SYSCTL
-static int zero;
+static int dist_min;
static struct ctl_table ip4_frags_ns_ctl_table[] = {
{
.procname = "ipfrag_high_thresh",
.data = &init_net.ipv4.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv4.frags.low_thresh
},
{
.procname = "ipfrag_low_thresh",
.data = &init_net.ipv4.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ipv4.frags.high_thresh
},
{
@@ -767,7 +791,7 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero
+ .extra1 = &dist_min,
},
{ }
};
@@ -849,6 +873,8 @@ static void __init ip4_frags_ctl_register(void)
static int __net_init ipv4_frags_init_net(struct net *net)
{
+ int res;
+
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
@@ -873,16 +899,21 @@ static int __net_init ipv4_frags_init_net(struct net *net)
net->ipv4.frags.timeout = IP_FRAG_TIME;
net->ipv4.frags.max_dist = 64;
-
- inet_frags_init_net(&net->ipv4.frags);
-
- return ip4_frags_ns_ctl_register(net);
+ net->ipv4.frags.f = &ip4_frags;
+
+ res = inet_frags_init_net(&net->ipv4.frags);
+ if (res < 0)
+ return res;
+ res = ip4_frags_ns_ctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->ipv4.frags);
+ return res;
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
{
ip4_frags_ns_ctl_unregister(net);
- inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
+ inet_frags_exit_net(&net->ipv4.frags);
}
static struct pernet_operations ip4_frags_ops = {
@@ -890,17 +921,49 @@ static struct pernet_operations ip4_frags_ops = {
.exit = ipv4_frags_exit_net,
};
+
+static u32 ip4_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ return jhash2(data,
+ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip4_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
+
+ return jhash2((const u32 *)&fq->key.v4,
+ sizeof(struct frag_v4_compare_key) / sizeof(u32), seed);
+}
+
+static int ip4_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_v4_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+static const struct rhashtable_params ip4_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .key_offset = offsetof(struct inet_frag_queue, key),
+ .key_len = sizeof(struct frag_v4_compare_key),
+ .hashfn = ip4_key_hashfn,
+ .obj_hashfn = ip4_obj_hashfn,
+ .obj_cmpfn = ip4_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+
void __init ipfrag_init(void)
{
- ip4_frags_ctl_register();
- register_pernet_subsys(&ip4_frags_ops);
- ip4_frags.hashfn = ip4_hashfn;
ip4_frags.constructor = ip4_frag_init;
ip4_frags.destructor = ip4_frag_free;
ip4_frags.qsize = sizeof(struct ipq);
- ip4_frags.match = ip4_frag_match;
ip4_frags.frag_expire = ip_expire;
ip4_frags.frags_cache_name = ip_frag_cache_name;
+ ip4_frags.rhash_params = ip4_rhash_params;
if (inet_frags_init(&ip4_frags))
panic("IP: failed to allocate ip4_frags cache\n");
+ ip4_frags_ctl_register();
+ register_pernet_subsys(&ip4_frags_ops);
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b21e435f428c..a5851c0bc278 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -134,7 +134,6 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
{
struct sockaddr_in sin;
- const struct iphdr *iph = ip_hdr(skb);
__be16 *ports;
int end;
@@ -149,7 +148,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
ports = (__be16 *)skb_transport_header(skb);
sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = iph->daddr;
+ sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
sin.sin_port = ports[1];
memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e1271e75e107..e6ee6acac80c 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -261,8 +261,8 @@ static struct net_device *__ip_tunnel_create(struct net *net,
} else {
if (strlen(ops->kind) > (IFNAMSIZ - 3))
goto failed;
- strlcpy(name, ops->kind, IFNAMSIZ);
- strncat(name, "%d", 2);
+ strcpy(name, ops->kind);
+ strcat(name, "%d");
}
ASSERT_RTNL();
@@ -627,6 +627,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, u8 protocol)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ unsigned int inner_nhdr_len = 0;
const struct iphdr *inner_iph;
struct flowi4 fl4;
u8 tos, ttl;
@@ -636,6 +637,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
__be32 dst;
bool connected;
+ /* ensure we can access the inner net header, for several users below */
+ if (skb->protocol == htons(ETH_P_IP))
+ inner_nhdr_len = sizeof(struct iphdr);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ inner_nhdr_len = sizeof(struct ipv6hdr);
+ if (unlikely(!pskb_may_pull(skb, inner_nhdr_len)))
+ goto tx_error;
+
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
connected = (tunnel->parms.iph.daddr != 0);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 0fd1976ab63b..2220a1b396af 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -80,7 +80,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
iph->version = 4;
iph->ihl = sizeof(struct iphdr) >> 2;
- iph->frag_off = df;
+ iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : df;
iph->protocol = proto;
iph->tos = tos;
iph->daddr = dst;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 7143ca1a6af9..ec48d8eafc7e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,7 +54,6 @@
static int sockstat_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem;
int orphans, sockets;
local_bh_disable();
@@ -74,8 +73,9 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplite_prot));
seq_printf(seq, "RAW: inuse %d\n",
sock_prot_inuse_get(net, &raw_prot));
- frag_mem = ip_frag_mem(net);
- seq_printf(seq, "FRAG: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG: inuse %u memory %lu\n",
+ atomic_read(&net->ipv4.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv4.frags));
return 0;
}
@@ -134,6 +134,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
SNMP_MIB_ITEM("InECT1Pkts", IPSTATS_MIB_ECT1PKTS),
SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS),
+ SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9d0b73aa649f..dbb153c6b21a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4406,7 +4406,7 @@ static void tcp_ofo_queue(struct sock *sk)
p = rb_first(&tp->out_of_order_queue);
while (p) {
- skb = rb_entry(p, struct sk_buff, rbnode);
+ skb = rb_to_skb(p);
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
@@ -4470,7 +4470,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct rb_node **p, *q, *parent;
+ struct rb_node **p, *parent;
struct sk_buff *skb1;
u32 seq, end_seq;
bool fragstolen;
@@ -4529,7 +4529,7 @@ coalesce_done:
parent = NULL;
while (*p) {
parent = *p;
- skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ skb1 = rb_to_skb(parent);
if (before(seq, TCP_SKB_CB(skb1)->seq)) {
p = &parent->rb_left;
continue;
@@ -4574,9 +4574,7 @@ insert:
merge_right:
/* Remove other segments covered by skb. */
- while ((q = rb_next(&skb->rbnode)) != NULL) {
- skb1 = rb_entry(q, struct sk_buff, rbnode);
-
+ while ((skb1 = skb_rb_next(skb)) != NULL) {
if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
break;
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
@@ -4591,7 +4589,7 @@ merge_right:
tcp_drop(sk, skb1);
}
/* If there is no skb after us, we are the last_skb ! */
- if (!q)
+ if (!skb1)
tp->ooo_last_skb = skb;
add_sack:
@@ -4792,7 +4790,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li
if (list)
return !skb_queue_is_last(list, skb) ? skb->next : NULL;
- return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+ return skb_rb_next(skb);
}
static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@ -4821,7 +4819,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
while (*p) {
parent = *p;
- skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ skb1 = rb_to_skb(parent);
if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
p = &parent->rb_left;
else
@@ -4941,19 +4939,12 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
u32 range_truesize, sum_tiny = 0;
struct sk_buff *skb, *head;
- struct rb_node *p;
u32 start, end;
- p = rb_first(&tp->out_of_order_queue);
- skb = rb_entry_safe(p, struct sk_buff, rbnode);
+ skb = skb_rb_first(&tp->out_of_order_queue);
new_range:
if (!skb) {
- p = rb_last(&tp->out_of_order_queue);
- /* Note: This is possible p is NULL here. We do not
- * use rb_entry_safe(), as ooo_last_skb is valid only
- * if rbtree is not empty.
- */
- tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
+ tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
return;
}
start = TCP_SKB_CB(skb)->seq;
@@ -4961,7 +4952,7 @@ new_range:
range_truesize = skb->truesize;
for (head = skb;;) {
- skb = tcp_skb_next(skb, NULL);
+ skb = skb_rb_next(skb);
/* Range is terminated when we see a gap or when
* we are at the queue end.
@@ -5017,7 +5008,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
goal -= rb_to_skb(node)->truesize;
- tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
+ tcp_drop(sk, rb_to_skb(node));
if (!prev || goal <= 0) {
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
@@ -5027,7 +5018,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
}
node = prev;
} while (node);
- tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+ tp->ooo_last_skb = rb_to_skb(prev);
/* Reset SACK state. A conforming SACK implementation will
* do the same at a timeout based retransmit. When a connection
@@ -5978,11 +5969,13 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (th->fin)
goto discard;
/* It is possible that we process SYN packets from backlog,
- * so we need to make sure to disable BH right there.
+ * so we need to make sure to disable BH and RCU right there.
*/
+ rcu_read_lock();
local_bh_disable();
acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
local_bh_enable();
+ rcu_read_unlock();
if (!acceptable)
return 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 16dea67792e0..1ea0c91ba994 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -859,9 +859,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
+ rcu_read_lock();
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq_opt_deref(ireq));
+ rcu_dereference(ireq->ireq_opt));
+ rcu_read_unlock();
err = net_xmit_eval(err);
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b9b2a9828d98..5d4b5e0f6b5e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1726,8 +1726,24 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
/* Note, we are only interested in != 0 or == 0, thus the
* force to int.
*/
- return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
- inet_compute_pseudo);
+ err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+ inet_compute_pseudo);
+ if (err)
+ return err;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) {
+ /* If SW calculated the value, we know it's bad */
+ if (skb->csum_complete_sw)
+ return 1;
+
+ /* HW says the value is bad. Let's validate that.
+ * skb->csum is no longer the full packet checksum,
+ * so don't treat it as such.
+ */
+ skb_checksum_complete_unset(skb);
+ }
+
+ return 0;
}
/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3a27cf762da1..8f79f0414bc3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4068,7 +4068,6 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
p++;
continue;
}
- state->offset++;
return ifa;
}
@@ -4092,13 +4091,12 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
return ifa;
}
+ state->offset = 0;
while (++state->bucket < IN6_ADDR_HSIZE) {
- state->offset = 0;
hlist_for_each_entry_rcu_bh(ifa,
&inet6_addr_lst[state->bucket], addr_lst) {
if (!net_eq(dev_net(ifa->idev->dev), net))
continue;
- state->offset++;
return ifa;
}
}
@@ -4723,8 +4721,8 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
/* unicast address incl. temp addr */
list_for_each_entry(ifa, &idev->addr_list, if_list) {
- if (++ip_idx < s_ip_idx)
- continue;
+ if (ip_idx < s_ip_idx)
+ goto next;
err = inet6_fill_ifaddr(skb, ifa,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
@@ -4733,6 +4731,8 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
if (err < 0)
break;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+next:
+ ip_idx++;
}
break;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 421379014995..f7b425615c12 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1045,11 +1045,11 @@ netfilter_fail:
igmp_fail:
ndisc_cleanup();
ndisc_fail:
- ip6_mr_cleanup();
+ icmpv6_cleanup();
icmp_fail:
- unregister_pernet_subsys(&inet6_net_ops);
+ ip6_mr_cleanup();
ipmr_fail:
- icmpv6_cleanup();
+ unregister_pernet_subsys(&inet6_net_ops);
register_pernet_fail:
sock_unregister(PF_INET6);
rtnl_unregister_all(PF_INET6);
diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c
index 1dc023ca98fd..9d9a16e219d6 100644
--- a/net/ipv6/ip6_checksum.c
+++ b/net/ipv6/ip6_checksum.c
@@ -87,8 +87,24 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)
* Note, we are only interested in != 0 or == 0, thus the
* force to int.
*/
- return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
- ip6_compute_pseudo);
+ err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+ ip6_compute_pseudo);
+ if (err)
+ return err;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) {
+ /* If SW calculated the value, we know it's bad */
+ if (skb->csum_complete_sw)
+ return 1;
+
+ /* HW says the value is bad. Let's validate that.
+ * skb->csum is no longer the full packet checksum,
+ * so don't treat is as such.
+ */
+ skb_checksum_complete_unset(skb);
+ }
+
+ return 0;
}
EXPORT_SYMBOL(udp6_csum_init);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index cda63426eefb..9c5afa5153ce 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1185,11 +1185,6 @@ route_lookup:
}
skb_dst_set(skb, dst);
- if (encap_limit >= 0) {
- init_tel_txopt(&opt, encap_limit);
- ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
- }
-
/* Calculate max headroom for all the headers and adjust
* needed_headroom if necessary.
*/
@@ -1202,6 +1197,11 @@ route_lookup:
if (err)
return err;
+ if (encap_limit >= 0) {
+ init_tel_txopt(&opt, encap_limit);
+ ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
+ }
+
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
@@ -1226,7 +1226,7 @@ static inline int
ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- const struct iphdr *iph = ip_hdr(skb);
+ const struct iphdr *iph;
int encap_limit = -1;
struct flowi6 fl6;
__u8 dsfield;
@@ -1234,6 +1234,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
+ /* ensure we can access the full inner ip header */
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ return -1;
+
+ iph = ip_hdr(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
tproto = ACCESS_ONCE(t->parms.proto);
@@ -1253,7 +1258,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowi6_proto = IPPROTO_IPIP;
fl6.daddr = key->u.ipv6.dst;
fl6.flowlabel = key->label;
- dsfield = ip6_tclass(key->label);
+ dsfield = key->tos;
} else {
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
@@ -1293,7 +1298,7 @@ static inline int
ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ipv6hdr *ipv6h;
int encap_limit = -1;
__u16 offset;
struct flowi6 fl6;
@@ -1302,6 +1307,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
+ return -1;
+
+ ipv6h = ipv6_hdr(skb);
tproto = ACCESS_ONCE(t->parms.proto);
if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
ip6_tnl_addr_conflict(t, ipv6h))
@@ -1320,7 +1329,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowi6_proto = IPPROTO_IPV6;
fl6.daddr = key->u.ipv6.dst;
fl6.flowlabel = key->label;
- dsfield = ip6_tclass(key->label);
+ dsfield = key->tos;
} else {
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 6c54c76847bf..40262abb15db 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2413,17 +2413,17 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
{
int err;
- /* callers have the socket lock and rtnl lock
- * so no other readers or writers of iml or its sflist
- */
+ write_lock_bh(&iml->sflock);
if (!iml->sflist) {
/* any-source empty exclude case */
- return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+ } else {
+ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
+ iml->sflist->sl_count, iml->sflist->sl_addr, 0);
+ sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
+ iml->sflist = NULL;
}
- err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
- iml->sflist->sl_count, iml->sflist->sl_addr, 0);
- sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
- iml->sflist = NULL;
+ write_unlock_bh(&iml->sflock);
return err;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 21f3bf2125f4..505d048ffff5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1692,10 +1692,9 @@ int ndisc_rcv(struct sk_buff *skb)
return 0;
}
- memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
-
switch (msg->icmph.icmp6_type) {
case NDISC_NEIGHBOUR_SOLICITATION:
+ memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
ndisc_recv_ns(skb);
break;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index ee33a6743f3b..e46185377981 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -63,7 +63,6 @@ struct nf_ct_frag6_skb_cb
static struct inet_frags nf_frags;
#ifdef CONFIG_SYSCTL
-static int zero;
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
@@ -76,18 +75,17 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_low_thresh",
.data = &init_net.nf_frag.frags.low_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
.data = &init_net.nf_frag.frags.high_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
@@ -152,23 +150,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
-static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, nf_frags.rnd);
-}
-
-
-static unsigned int nf_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *nq;
-
- nq = container_of(q, struct frag_queue, q);
- return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
-}
-
static void nf_ct_frag6_expire(unsigned long data)
{
struct frag_queue *fq;
@@ -177,34 +158,26 @@ static void nf_ct_frag6_expire(unsigned long data)
fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
- ip6_expire_frag_queue(net, fq, &nf_frags);
+ ip6_expire_frag_queue(net, fq);
}
/* Creation primitives. */
-static inline struct frag_queue *fq_find(struct net *net, __be32 id,
- u32 user, struct in6_addr *src,
- struct in6_addr *dst, int iif, u8 ecn)
+static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+ const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = user,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
-
- arg.id = id;
- arg.user = user;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
-
- local_bh_disable();
- hash = nf_hash_frag(id, src, dst);
-
- q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
- local_bh_enable();
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+
+ q = inet_frag_find(&net->nf_frag.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}
@@ -263,7 +236,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
return -EPROTO;
}
if (end > fq->q.len) {
@@ -356,7 +329,7 @@ found:
return 0;
discard_fq:
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
err:
return -EINVAL;
}
@@ -378,7 +351,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
int payload_len;
u8 ecn;
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
WARN_ON(head == NULL);
WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
@@ -479,6 +452,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
+ fp->sk = NULL;
}
sub_frag_mem_limit(fq->q.net, head->truesize);
@@ -497,6 +471,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
head->csum);
fq->q.fragments = NULL;
+ fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
return true;
@@ -591,9 +566,13 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
+ if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
+ fhdr->frag_off & htons(IP6_MF))
+ return -EINVAL;
+
skb_orphan(skb);
- fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ fq = fq_find(net, fhdr->identification, user, hdr,
+ skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
return -ENOMEM;
@@ -618,30 +597,36 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
fq->q.meat == fq->q.len &&
nf_ct_frag6_reasm(fq, skb, dev))
ret = 0;
- else
- skb_dst_drop(skb);
out_unlock:
spin_unlock_bh(&fq->q.lock);
- inet_frag_put(&fq->q, &nf_frags);
+ inet_frag_put(&fq->q);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
static int nf_ct_net_init(struct net *net)
{
+ int res;
+
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- inet_frags_init_net(&net->nf_frag.frags);
-
- return nf_ct_frag6_sysctl_register(net);
+ net->nf_frag.frags.f = &nf_frags;
+
+ res = inet_frags_init_net(&net->nf_frag.frags);
+ if (res < 0)
+ return res;
+ res = nf_ct_frag6_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->nf_frag.frags);
+ return res;
}
static void nf_ct_net_exit(struct net *net)
{
nf_ct_frags6_sysctl_unregister(net);
- inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+ inet_frags_exit_net(&net->nf_frag.frags);
}
static struct pernet_operations nf_ct_net_ops = {
@@ -653,13 +638,12 @@ int nf_ct_frag6_init(void)
{
int ret = 0;
- nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
nf_frags.qsize = sizeof(struct frag_queue);
- nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.frags_cache_name = nf_frags_cache_name;
+ nf_frags.rhash_params = ip6_rhash_params;
ret = inet_frags_init(&nf_frags);
if (ret)
goto out;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index e88bcb8ff0fd..dc04c024986c 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -38,7 +38,6 @@
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem = ip6_frag_mem(net);
seq_printf(seq, "TCP6: inuse %d\n",
sock_prot_inuse_get(net, &tcpv6_prot));
@@ -48,7 +47,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
- seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
+ atomic_read(&net->ipv6.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv6.frags));
return 0;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 71ffa526cb23..a4f979ff31b9 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -645,8 +645,6 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->protocol = htons(ETH_P_IPV6);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_dst_set(skb, &rt->dst);
- *dstp = NULL;
skb_put(skb, length);
skb_reset_network_header(skb);
@@ -656,8 +654,14 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->transport_header = skb->network_header;
err = memcpy_from_msg(iph, msg, length);
- if (err)
- goto error_fault;
+ if (err) {
+ err = -EFAULT;
+ kfree_skb(skb);
+ goto error;
+ }
+
+ skb_dst_set(skb, &rt->dst);
+ *dstp = NULL;
/* if egress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -666,21 +670,28 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
if (unlikely(!skb))
return 0;
+ /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev
+ * in the error path. Since skb has been freed, the dst could
+ * have been queued for deletion.
+ */
+ rcu_read_lock();
IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, rt->dst.dev, dst_output);
if (err > 0)
err = net_xmit_errno(err);
- if (err)
- goto error;
+ if (err) {
+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ rcu_read_unlock();
+ goto error_check;
+ }
+ rcu_read_unlock();
out:
return 0;
-error_fault:
- err = -EFAULT;
- kfree_skb(skb);
error:
IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+error_check:
if (err == -ENOBUFS && !np->recverr)
err = 0;
return err;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e585c0a2591c..74ffbcb306a6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -79,94 +79,58 @@ static struct inet_frags ip6_frags;
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct net_device *dev);
-/*
- * callers should be careful not to use the hash value outside the ipfrag_lock
- * as doing so could race with ipfrag_hash_rnd being recalculated.
- */
-static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, ip6_frags.rnd);
-}
-
-static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *fq;
-
- fq = container_of(q, struct frag_queue, q);
- return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
-}
-
-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct frag_queue *fq;
- const struct ip6_create_arg *arg = a;
-
- fq = container_of(q, struct frag_queue, q);
- return fq->id == arg->id &&
- fq->user == arg->user &&
- ipv6_addr_equal(&fq->saddr, arg->src) &&
- ipv6_addr_equal(&fq->daddr, arg->dst) &&
- (arg->iif == fq->iif ||
- !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
- IPV6_ADDR_LINKLOCAL)));
-}
-EXPORT_SYMBOL(ip6_frag_match);
-
void ip6_frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
- const struct ip6_create_arg *arg = a;
+ const struct frag_v6_compare_key *key = a;
- fq->id = arg->id;
- fq->user = arg->user;
- fq->saddr = *arg->src;
- fq->daddr = *arg->dst;
- fq->ecn = arg->ecn;
+ q->key.v6 = *key;
+ fq->ecn = 0;
}
EXPORT_SYMBOL(ip6_frag_init);
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
- struct inet_frags *frags)
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
+ struct sk_buff *head;
+ rcu_read_lock();
spin_lock(&fq->q.lock);
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
- inet_frag_kill(&fq->q, frags);
+ inet_frag_kill(&fq->q);
- rcu_read_lock();
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
- goto out_rcu_unlock;
+ goto out;
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
-
- if (inet_frag_evicting(&fq->q))
- goto out_rcu_unlock;
-
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
/* Don't send error if the first segment did not arrive. */
- if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
- goto out_rcu_unlock;
+ head = fq->q.fragments;
+ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+ goto out;
/* But use as source device on which LAST ARRIVED
* segment was received. And do not use fq->dev
* pointer directly, device might already disappeared.
*/
- fq->q.fragments->dev = dev;
- icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
-out_rcu_unlock:
- rcu_read_unlock();
+ head->dev = dev;
+ skb_get(head);
+ spin_unlock(&fq->q.lock);
+
+ icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+ kfree_skb(head);
+ goto out_rcu_unlock;
+
out:
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, frags);
+out_rcu_unlock:
+ rcu_read_unlock();
+ inet_frag_put(&fq->q);
}
EXPORT_SYMBOL(ip6_expire_frag_queue);
@@ -178,31 +142,29 @@ static void ip6_frag_expire(unsigned long data)
fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);
- ip6_expire_frag_queue(net, fq, &ip6_frags);
+ ip6_expire_frag_queue(net, fq);
}
static struct frag_queue *
-fq_find(struct net *net, __be32 id, const struct in6_addr *src,
- const struct in6_addr *dst, int iif, u8 ecn)
+fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = IP6_DEFRAG_LOCAL_DELIVER,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
- arg.id = id;
- arg.user = IP6_DEFRAG_LOCAL_DELIVER;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
+ if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)))
+ key.iif = 0;
- hash = inet6_hash_frag(id, src, dst);
-
- q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&net->ipv6.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}
@@ -359,7 +321,7 @@ found:
return -1;
discard_fq:
- inet_frag_kill(&fq->q, &ip6_frags);
+ inet_frag_kill(&fq->q);
err:
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
@@ -386,7 +348,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
int sum_truesize;
u8 ecn;
- inet_frag_kill(&fq->q, &ip6_frags);
+ inet_frag_kill(&fq->q);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
@@ -504,6 +466,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
rcu_read_unlock();
fq->q.fragments = NULL;
+ fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
return 1;
@@ -525,6 +488,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct frag_queue *fq;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct net *net = dev_net(skb_dst(skb)->dev);
+ int iif;
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
goto fail_hdr;
@@ -553,17 +517,22 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
+ fhdr->frag_off & htons(IP6_MF))
+ goto fail_hdr;
+
+ iif = skb->dev ? skb->dev->ifindex : 0;
+ fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) {
int ret;
spin_lock(&fq->q.lock);
+ fq->iif = iif;
ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &ip6_frags);
+ inet_frag_put(&fq->q);
return ret;
}
@@ -584,24 +553,22 @@ static const struct inet6_protocol frag_protocol = {
};
#ifdef CONFIG_SYSCTL
-static int zero;
static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.procname = "ip6frag_high_thresh",
.data = &init_net.ipv6.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
.data = &init_net.ipv6.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ipv6.frags.high_thresh
},
{
@@ -644,10 +611,6 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
table[1].data = &net->ipv6.frags.low_thresh;
table[1].extra2 = &net->ipv6.frags.high_thresh;
table[2].data = &net->ipv6.frags.timeout;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
}
hdr = register_net_sysctl(net, "net/ipv6", table);
@@ -709,19 +672,27 @@ static void ip6_frags_sysctl_unregister(void)
static int __net_init ipv6_frags_init_net(struct net *net)
{
+ int res;
+
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+ net->ipv6.frags.f = &ip6_frags;
- inet_frags_init_net(&net->ipv6.frags);
+ res = inet_frags_init_net(&net->ipv6.frags);
+ if (res < 0)
+ return res;
- return ip6_frags_ns_sysctl_register(net);
+ res = ip6_frags_ns_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->ipv6.frags);
+ return res;
}
static void __net_exit ipv6_frags_exit_net(struct net *net)
{
ip6_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+ inet_frags_exit_net(&net->ipv6.frags);
}
static struct pernet_operations ip6_frags_ops = {
@@ -729,14 +700,55 @@ static struct pernet_operations ip6_frags_ops = {
.exit = ipv6_frags_exit_net,
};
+static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ return jhash2(data,
+ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
+
+ return jhash2((const u32 *)&fq->key.v6,
+ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_v6_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+const struct rhashtable_params ip6_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .hashfn = ip6_key_hashfn,
+ .obj_hashfn = ip6_obj_hashfn,
+ .obj_cmpfn = ip6_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+EXPORT_SYMBOL(ip6_rhash_params);
+
int __init ipv6_frag_init(void)
{
int ret;
- ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ ip6_frags.constructor = ip6_frag_init;
+ ip6_frags.destructor = NULL;
+ ip6_frags.qsize = sizeof(struct frag_queue);
+ ip6_frags.frag_expire = ip6_frag_expire;
+ ip6_frags.frags_cache_name = ip6_frag_cache_name;
+ ip6_frags.rhash_params = ip6_rhash_params;
+ ret = inet_frags_init(&ip6_frags);
if (ret)
goto out;
+ ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ if (ret)
+ goto err_protocol;
+
ret = ip6_frags_sysctl_register();
if (ret)
goto err_sysctl;
@@ -745,16 +757,6 @@ int __init ipv6_frag_init(void)
if (ret)
goto err_pernet;
- ip6_frags.hashfn = ip6_hashfn;
- ip6_frags.constructor = ip6_frag_init;
- ip6_frags.destructor = NULL;
- ip6_frags.qsize = sizeof(struct frag_queue);
- ip6_frags.match = ip6_frag_match;
- ip6_frags.frag_expire = ip6_frag_expire;
- ip6_frags.frags_cache_name = ip6_frag_cache_name;
- ret = inet_frags_init(&ip6_frags);
- if (ret)
- goto err_pernet;
out:
return ret;
@@ -762,6 +764,8 @@ err_pernet:
ip6_frags_sysctl_unregister();
err_sysctl:
inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+err_protocol:
+ inet_frags_fini(&ip6_frags);
goto out;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 70fa31e37360..b0a72677b7e5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1439,10 +1439,13 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
{
+ int oif = sk->sk_bound_dev_if;
struct dst_entry *dst;
- ip6_update_pmtu(skb, sock_net(sk), mtu,
- sk->sk_bound_dev_if, sk->sk_mark);
+ if (!oif && skb->dev)
+ oif = l3mdev_master_ifindex(skb->dev);
+
+ ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark);
dst = __sk_dst_get(sk);
if (!dst || !dst->obsolete ||
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 4d09ce6fa90e..64862c5084ee 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -165,9 +165,11 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (toobig && xfrm6_local_dontfrag(skb)) {
xfrm6_local_rxpmtu(skb, mtu);
+ kfree_skb(skb);
return -EMSGSIZE;
} else if (!skb->ignore_df && toobig && skb->sk) {
xfrm_local_error(skb, mtu);
+ kfree_skb(skb);
return -EMSGSIZE;
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index a5333f6cb65a..b96dbe38ecad 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -845,10 +845,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
}
}
- /* Session data offset is handled differently for L2TPv2 and
- * L2TPv3. For L2TPv2, there is an optional 16-bit value in
- * the header. For L2TPv3, the offset is negotiated using AVPs
- * in the session setup control protocol.
+ /* Session data offset is defined only for L2TPv2 and is
+ * indicated by an optional 16-bit value in the header.
*/
if (tunnel->version == L2TP_HDR_VER_2) {
/* If offset bit set, skip it. */
@@ -856,8 +854,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
offset = ntohs(*(__be16 *)ptr);
ptr += 2 + offset;
}
- } else
- ptr += session->offset;
+ }
offset = ptr - optr;
if (!pskb_may_pull(skb, offset))
@@ -1141,8 +1138,6 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
}
bufp += session->l2specific_len;
}
- if (session->offset)
- bufp += session->offset;
return bufp - optr;
}
@@ -1827,7 +1822,7 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
if (session->send_seq)
session->hdr_len += 4;
} else {
- session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset;
+ session->hdr_len = 4 + session->cookie_len + session->l2specific_len;
if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
session->hdr_len += 4;
}
@@ -1878,7 +1873,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
session->recv_seq = cfg->recv_seq;
session->lns_mode = cfg->lns_mode;
session->reorder_timeout = cfg->reorder_timeout;
- session->offset = cfg->offset;
session->l2specific_type = cfg->l2specific_type;
session->l2specific_len = cfg->l2specific_len;
session->cookie_len = cfg->cookie_len;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 42419f1c24cf..86356a23a0a7 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -68,7 +68,6 @@ struct l2tp_session_cfg {
int debug; /* bitmask of debug message
* categories */
u16 vlan_id; /* VLAN pseudowire only */
- u16 offset; /* offset to payload */
u16 l2specific_len; /* Layer 2 specific length */
u16 l2specific_type; /* Layer 2 specific type */
u8 cookie[8]; /* optional cookie */
@@ -94,8 +93,6 @@ struct l2tp_session {
int cookie_len;
u8 peer_cookie[8];
int peer_cookie_len;
- u16 offset; /* offset from end of L2TP header
- to beginning of data */
u16 l2specific_len;
u16 l2specific_type;
u16 hdr_len;
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index d100aed3d06f..2d2a73280ec2 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -181,8 +181,8 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
session->lns_mode ? "LNS" : "LAC",
session->debug,
jiffies_to_msecs(session->reorder_timeout));
- seq_printf(m, " offset %hu l2specific %hu/%hu\n",
- session->offset, session->l2specific_type, session->l2specific_len);
+ seq_printf(m, " offset 0 l2specific %hu/%hu\n",
+ session->l2specific_type, session->l2specific_len);
if (session->cookie_len) {
seq_printf(m, " cookie %02x%02x%02x%02x",
session->cookie[0], session->cookie[1],
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index ee03bc866d1b..d6fccfdca201 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -536,9 +536,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
}
if (tunnel->version > 2) {
- if (info->attrs[L2TP_ATTR_OFFSET])
- cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]);
-
if (info->attrs[L2TP_ATTR_DATA_SEQ])
cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 85aae8c84aeb..789e66b0187a 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -726,7 +726,6 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
struct sk_buff *skb = NULL;
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
- unsigned long cpu_flags;
size_t copied = 0;
u32 peek_seq = 0;
u32 *seq, skb_len;
@@ -851,9 +850,8 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
goto copy_uaddr;
if (!(flags & MSG_PEEK)) {
- spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
- sk_eat_skb(sk, skb);
- spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
+ skb_unlink(skb, &sk->sk_receive_queue);
+ kfree_skb(skb);
*seq = 0;
}
@@ -874,9 +872,8 @@ copy_uaddr:
llc_cmsg_rcv(msg, skb);
if (!(flags & MSG_PEEK)) {
- spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
- sk_eat_skb(sk, skb);
- spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
+ skb_unlink(skb, &sk->sk_receive_queue);
+ kfree_skb(skb);
*seq = 0;
}
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 79c346fd859b..b9290a183a2f 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -734,6 +734,7 @@ void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk)
llc_sk(sk)->sap = sap;
spin_lock_bh(&sap->sk_lock);
+ sock_set_flag(sk, SOCK_RCU_FREE);
sap->sk_count++;
sk_nulls_add_node_rcu(sk, laddr_hb);
hlist_add_head(&llc->dev_hash_node, dev_hb);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 45319cc01121..80c45567ee3a 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -7,7 +7,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
- * Copyright(c) 2015 Intel Deutschland GmbH
+ * Copyright(c) 2015-2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -741,46 +741,43 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
ieee80211_agg_start_txq(sta, tid, true);
}
-void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
+void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
+ struct tid_ampdu_tx *tid_tx)
{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
- struct sta_info *sta;
- struct tid_ampdu_tx *tid_tx;
- trace_api_start_tx_ba_cb(sdata, ra, tid);
+ if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
+ return;
+
+ if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state))
+ ieee80211_agg_tx_operational(local, sta, tid);
+}
+
+static struct tid_ampdu_tx *
+ieee80211_lookup_tid_tx(struct ieee80211_sub_if_data *sdata,
+ const u8 *ra, u16 tid, struct sta_info **sta)
+{
+ struct tid_ampdu_tx *tid_tx;
if (tid >= IEEE80211_NUM_TIDS) {
ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n",
tid, IEEE80211_NUM_TIDS);
- return;
+ return NULL;
}
- mutex_lock(&local->sta_mtx);
- sta = sta_info_get_bss(sdata, ra);
- if (!sta) {
- mutex_unlock(&local->sta_mtx);
+ *sta = sta_info_get_bss(sdata, ra);
+ if (!*sta) {
ht_dbg(sdata, "Could not find station: %pM\n", ra);
- return;
+ return NULL;
}
- mutex_lock(&sta->ampdu_mlme.mtx);
- tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
+ tid_tx = rcu_dereference((*sta)->ampdu_mlme.tid_tx[tid]);
- if (WARN_ON(!tid_tx)) {
+ if (WARN_ON(!tid_tx))
ht_dbg(sdata, "addBA was not requested!\n");
- goto unlock;
- }
- if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
- goto unlock;
-
- if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state))
- ieee80211_agg_tx_operational(local, sta, tid);
-
- unlock:
- mutex_unlock(&sta->ampdu_mlme.mtx);
- mutex_unlock(&local->sta_mtx);
+ return tid_tx;
}
void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
@@ -788,19 +785,20 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_local *local = sdata->local;
- struct ieee80211_ra_tid *ra_tid;
- struct sk_buff *skb = dev_alloc_skb(0);
+ struct sta_info *sta;
+ struct tid_ampdu_tx *tid_tx;
- if (unlikely(!skb))
- return;
+ trace_api_start_tx_ba_cb(sdata, ra, tid);
- ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
- memcpy(&ra_tid->ra, ra, ETH_ALEN);
- ra_tid->tid = tid;
+ rcu_read_lock();
+ tid_tx = ieee80211_lookup_tid_tx(sdata, ra, tid, &sta);
+ if (!tid_tx)
+ goto out;
- skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_START;
- skb_queue_tail(&sdata->skb_queue, skb);
- ieee80211_queue_work(&local->hw, &sdata->work);
+ set_bit(HT_AGG_STATE_START_CB, &tid_tx->state);
+ ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ out:
+ rcu_read_unlock();
}
EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
@@ -860,37 +858,18 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
}
EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
-void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
+void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid,
+ struct tid_ampdu_tx *tid_tx)
{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
- struct ieee80211_local *local = sdata->local;
- struct sta_info *sta;
- struct tid_ampdu_tx *tid_tx;
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
bool send_delba = false;
- trace_api_stop_tx_ba_cb(sdata, ra, tid);
-
- if (tid >= IEEE80211_NUM_TIDS) {
- ht_dbg(sdata, "Bad TID value: tid = %d (>= %d)\n",
- tid, IEEE80211_NUM_TIDS);
- return;
- }
-
- ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", ra, tid);
-
- mutex_lock(&local->sta_mtx);
-
- sta = sta_info_get_bss(sdata, ra);
- if (!sta) {
- ht_dbg(sdata, "Could not find station: %pM\n", ra);
- goto unlock;
- }
+ ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n",
+ sta->sta.addr, tid);
- mutex_lock(&sta->ampdu_mlme.mtx);
spin_lock_bh(&sta->lock);
- tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
- if (!tid_tx || !test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
+ if (!test_bit(HT_AGG_STATE_STOPPING, &tid_tx->state)) {
ht_dbg(sdata,
"unexpected callback to A-MPDU stop for %pM tid %d\n",
sta->sta.addr, tid);
@@ -906,12 +885,8 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
spin_unlock_bh(&sta->lock);
if (send_delba)
- ieee80211_send_delba(sdata, ra, tid,
+ ieee80211_send_delba(sdata, sta->sta.addr, tid,
WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
-
- mutex_unlock(&sta->ampdu_mlme.mtx);
- unlock:
- mutex_unlock(&local->sta_mtx);
}
void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
@@ -919,19 +894,20 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_local *local = sdata->local;
- struct ieee80211_ra_tid *ra_tid;
- struct sk_buff *skb = dev_alloc_skb(0);
+ struct sta_info *sta;
+ struct tid_ampdu_tx *tid_tx;
- if (unlikely(!skb))
- return;
+ trace_api_stop_tx_ba_cb(sdata, ra, tid);
- ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
- memcpy(&ra_tid->ra, ra, ETH_ALEN);
- ra_tid->tid = tid;
+ rcu_read_lock();
+ tid_tx = ieee80211_lookup_tid_tx(sdata, ra, tid, &sta);
+ if (!tid_tx)
+ goto out;
- skb->pkt_type = IEEE80211_SDATA_QUEUE_AGG_STOP;
- skb_queue_tail(&sdata->skb_queue, skb);
- ieee80211_queue_work(&local->hw, &sdata->work);
+ set_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state);
+ ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+ out:
+ rcu_read_unlock();
}
EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e63fd12f923a..6ef9d32c34f1 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -386,7 +386,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
/* Keys without a station are used for TX only */
- if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP))
+ if (sta && test_sta_flag(sta, WLAN_STA_MFP))
key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;
break;
case NL80211_IFTYPE_ADHOC:
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index f4a528773563..6ca5442b1e03 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -7,6 +7,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
+ * Copyright 2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -289,8 +290,6 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
{
int i;
- cancel_work_sync(&sta->ampdu_mlme.work);
-
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
__ieee80211_stop_tx_ba_session(sta, i, reason);
__ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
@@ -298,6 +297,9 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
reason != AGG_STOP_DESTROY_STA &&
reason != AGG_STOP_PEER_REQUEST);
}
+
+ /* stopping might queue the work again - so cancel only afterwards */
+ cancel_work_sync(&sta->ampdu_mlme.work);
}
void ieee80211_ba_session_work(struct work_struct *work)
@@ -352,10 +354,16 @@ void ieee80211_ba_session_work(struct work_struct *work)
spin_unlock_bh(&sta->lock);
tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
- if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP,
- &tid_tx->state))
+ if (!tid_tx)
+ continue;
+
+ if (test_and_clear_bit(HT_AGG_STATE_START_CB, &tid_tx->state))
+ ieee80211_start_tx_ba_cb(sta, tid, tid_tx);
+ if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, &tid_tx->state))
___ieee80211_stop_tx_ba_session(sta, tid,
AGG_STOP_LOCAL_REQUEST);
+ if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state))
+ ieee80211_stop_tx_ba_cb(sta, tid, tid_tx);
}
mutex_unlock(&sta->ampdu_mlme.mtx);
}
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a5acaf1efaab..0c0695eb2609 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -948,8 +948,8 @@ static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata,
if (len < IEEE80211_DEAUTH_FRAME_LEN)
return;
- ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM BSSID=%pM (reason: %d)\n",
- mgmt->sa, mgmt->da, mgmt->bssid, reason);
+ ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da);
+ ibss_dbg(sdata, "\tBSSID=%pM (reason: %d)\n", mgmt->bssid, reason);
sta_info_destroy_addr(sdata, mgmt->sa);
}
@@ -967,9 +967,9 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
- ibss_dbg(sdata,
- "RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n",
- mgmt->sa, mgmt->da, mgmt->bssid, auth_transaction);
+ ibss_dbg(sdata, "RX Auth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da);
+ ibss_dbg(sdata, "\tBSSID=%pM (auth_transaction=%d)\n",
+ mgmt->bssid, auth_transaction);
if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1)
return;
@@ -1176,10 +1176,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
rx_timestamp = drv_get_tsf(local, sdata);
}
- ibss_dbg(sdata,
- "RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n",
+ ibss_dbg(sdata, "RX beacon SA=%pM BSSID=%pM TSF=0x%llx\n",
mgmt->sa, mgmt->bssid,
- (unsigned long long)rx_timestamp,
+ (unsigned long long)rx_timestamp);
+ ibss_dbg(sdata, "\tBCN=0x%llx diff=%lld @%lu\n",
(unsigned long long)beacon_timestamp,
(unsigned long long)(rx_timestamp - beacon_timestamp),
jiffies);
@@ -1538,9 +1538,9 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
tx_last_beacon = drv_tx_last_beacon(local);
- ibss_dbg(sdata,
- "RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n",
- mgmt->sa, mgmt->da, mgmt->bssid, tx_last_beacon);
+ ibss_dbg(sdata, "RX ProbeReq SA=%pM DA=%pM\n", mgmt->sa, mgmt->da);
+ ibss_dbg(sdata, "\tBSSID=%pM (tx_last_beacon=%d)\n",
+ mgmt->bssid, tx_last_beacon);
if (!tx_last_beacon && is_multicast_ether_addr(mgmt->da))
return;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 7fd544d970d9..8a690ebd7374 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1026,8 +1026,6 @@ struct ieee80211_rx_agg {
enum sdata_queue_type {
IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0,
- IEEE80211_SDATA_QUEUE_AGG_START = 1,
- IEEE80211_SDATA_QUEUE_AGG_STOP = 2,
IEEE80211_SDATA_QUEUE_RX_AGG_START = 3,
IEEE80211_SDATA_QUEUE_RX_AGG_STOP = 4,
};
@@ -1416,12 +1414,6 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
return local->hw.wiphy->bands[band];
}
-/* this struct represents 802.11n's RA/TID combination */
-struct ieee80211_ra_tid {
- u8 ra[ETH_ALEN];
- u16 tid;
-};
-
/* this struct holds the value parsing from channel switch IE */
struct ieee80211_csa_ie {
struct cfg80211_chan_def chandef;
@@ -1765,8 +1757,10 @@ int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
enum ieee80211_agg_stop_reason reason);
int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
enum ieee80211_agg_stop_reason reason);
-void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
-void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
+void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
+ struct tid_ampdu_tx *tid_tx);
+void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid,
+ struct tid_ampdu_tx *tid_tx);
void ieee80211_ba_session_work(struct work_struct *work);
void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index fa7d757fef95..760ba8ec2944 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1248,7 +1248,6 @@ static void ieee80211_iface_work(struct work_struct *work)
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
struct sta_info *sta;
- struct ieee80211_ra_tid *ra_tid;
struct ieee80211_rx_agg *rx_agg;
if (!ieee80211_sdata_running(sdata))
@@ -1264,15 +1263,7 @@ static void ieee80211_iface_work(struct work_struct *work)
while ((skb = skb_dequeue(&sdata->skb_queue))) {
struct ieee80211_mgmt *mgmt = (void *)skb->data;
- if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_START) {
- ra_tid = (void *)&skb->cb;
- ieee80211_start_tx_ba_cb(&sdata->vif, ra_tid->ra,
- ra_tid->tid);
- } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_AGG_STOP) {
- ra_tid = (void *)&skb->cb;
- ieee80211_stop_tx_ba_cb(&sdata->vif, ra_tid->ra,
- ra_tid->tid);
- } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) {
+ if (skb->pkt_type == IEEE80211_SDATA_QUEUE_RX_AGG_START) {
rx_agg = (void *)&skb->cb;
mutex_lock(&local->sta_mtx);
sta = sta_info_get_bss(sdata, rx_agg->addr);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 2bb6899854d4..e3bbfb20ae82 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -254,8 +254,27 @@ static void ieee80211_restart_work(struct work_struct *work)
"%s called with hardware scan in progress\n", __func__);
rtnl_lock();
- list_for_each_entry(sdata, &local->interfaces, list)
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ /*
+ * XXX: there may be more work for other vif types and even
+ * for station mode: a good thing would be to run most of
+ * the iface type's dependent _stop (ieee80211_mg_stop,
+ * ieee80211_ibss_stop) etc...
+ * For now, fix only the specific bug that was seen: race
+ * between csa_connection_drop_work and us.
+ */
+ if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+ /*
+ * This worker is scheduled from the iface worker that
+ * runs on mac80211's workqueue, so we can't be
+ * scheduling this worker after the cancel right here.
+ * The exception is ieee80211_chswitch_done.
+ * Then we can have a race...
+ */
+ cancel_work_sync(&sdata->u.mgd.csa_connection_drop_work);
+ }
flush_delayed_work(&sdata->dec_tailroom_needed_wk);
+ }
ieee80211_scan_cancel(local);
/* make sure any new ROC will consider local->in_reconfig */
@@ -466,10 +485,7 @@ static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = {
cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC |
IEEE80211_VHT_CAP_SHORT_GI_80 |
IEEE80211_VHT_CAP_SHORT_GI_160 |
- IEEE80211_VHT_CAP_RXSTBC_1 |
- IEEE80211_VHT_CAP_RXSTBC_2 |
- IEEE80211_VHT_CAP_RXSTBC_3 |
- IEEE80211_VHT_CAP_RXSTBC_4 |
+ IEEE80211_VHT_CAP_RXSTBC_MASK |
IEEE80211_VHT_CAP_TXSTBC |
IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
@@ -1164,6 +1180,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
#if IS_ENABLED(CONFIG_IPV6)
unregister_inet6addr_notifier(&local->ifa6_notifier);
#endif
+ ieee80211_txq_teardown_flows(local);
rtnl_lock();
@@ -1191,7 +1208,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
skb_queue_purge(&local->skb_queue);
skb_queue_purge(&local->skb_queue_unreliable);
skb_queue_purge(&local->skb_queue_tdls_chsw);
- ieee80211_txq_teardown_flows(local);
destroy_workqueue(local->workqueue);
wiphy_unregister(local->hw.wiphy);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index fed598a202c8..b0acb2961e80 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -563,6 +563,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
forward = false;
reply = true;
target_metric = 0;
+
+ if (SN_GT(target_sn, ifmsh->sn))
+ ifmsh->sn = target_sn;
+
if (time_after(jiffies, ifmsh->last_sn_update +
net_traversal_jiffies(sdata)) ||
time_before(jiffies, ifmsh->last_sn_update)) {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index e6f42d12222e..39451c84c785 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -989,6 +989,10 @@ static void ieee80211_chswitch_work(struct work_struct *work)
*/
if (sdata->reserved_chanctx) {
+ struct ieee80211_supported_band *sband = NULL;
+ struct sta_info *mgd_sta = NULL;
+ enum ieee80211_sta_rx_bandwidth bw = IEEE80211_STA_RX_BW_20;
+
/*
* with multi-vif csa driver may call ieee80211_csa_finish()
* many times while waiting for other interfaces to use their
@@ -997,6 +1001,48 @@ static void ieee80211_chswitch_work(struct work_struct *work)
if (sdata->reserved_ready)
goto out;
+ if (sdata->vif.bss_conf.chandef.width !=
+ sdata->csa_chandef.width) {
+ /*
+ * For managed interface, we need to also update the AP
+ * station bandwidth and align the rate scale algorithm
+ * on the bandwidth change. Here we only consider the
+ * bandwidth of the new channel definition (as channel
+ * switch flow does not have the full HT/VHT/HE
+ * information), assuming that if additional changes are
+ * required they would be done as part of the processing
+ * of the next beacon from the AP.
+ */
+ switch (sdata->csa_chandef.width) {
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ case NL80211_CHAN_WIDTH_20:
+ default:
+ bw = IEEE80211_STA_RX_BW_20;
+ break;
+ case NL80211_CHAN_WIDTH_40:
+ bw = IEEE80211_STA_RX_BW_40;
+ break;
+ case NL80211_CHAN_WIDTH_80:
+ bw = IEEE80211_STA_RX_BW_80;
+ break;
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_160:
+ bw = IEEE80211_STA_RX_BW_160;
+ break;
+ }
+
+ mgd_sta = sta_info_get(sdata, ifmgd->bssid);
+ sband =
+ local->hw.wiphy->bands[sdata->csa_chandef.chan->band];
+ }
+
+ if (sdata->vif.bss_conf.chandef.width >
+ sdata->csa_chandef.width) {
+ mgd_sta->sta.bandwidth = bw;
+ rate_control_rate_update(local, sband, mgd_sta,
+ IEEE80211_RC_BW_CHANGED);
+ }
+
ret = ieee80211_vif_use_reserved_context(sdata);
if (ret) {
sdata_info(sdata,
@@ -1007,6 +1053,13 @@ static void ieee80211_chswitch_work(struct work_struct *work)
goto out;
}
+ if (sdata->vif.bss_conf.chandef.width <
+ sdata->csa_chandef.width) {
+ mgd_sta->sta.bandwidth = bw;
+ rate_control_rate_update(local, sband, mgd_sta,
+ IEEE80211_RC_BW_CHANGED);
+ }
+
goto out;
}
@@ -1229,6 +1282,16 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
cbss->beacon_interval));
return;
drop_connection:
+ /*
+ * This is just so that the disconnect flow will know that
+ * we were trying to switch channel and failed. In case the
+ * mode is 1 (we are not allowed to Tx), we will know not to
+ * send a deauthentication frame. Those two fields will be
+ * reset when the disconnection worker runs.
+ */
+ sdata->vif.csa_active = true;
+ sdata->csa_block_tx = csa_ie.mode;
+
ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work);
mutex_unlock(&local->chanctx_mtx);
mutex_unlock(&local->mtx);
@@ -2401,6 +2464,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
+ bool tx;
sdata_lock(sdata);
if (!ifmgd->associated) {
@@ -2408,6 +2472,8 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
return;
}
+ tx = !sdata->csa_block_tx;
+
/* AP is probably out of range (or not reachable for another reason) so
* remove the bss struct for that AP.
*/
@@ -2415,7 +2481,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
- true, frame_buf);
+ tx, frame_buf);
mutex_lock(&local->mtx);
sdata->vif.csa_active = false;
ifmgd->csa_waiting_bcn = false;
@@ -2426,7 +2492,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
}
mutex_unlock(&local->mtx);
- ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true,
+ ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx,
WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY);
sdata_unlock(sdata);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 15599c70a38f..cc808ac783e5 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -115,6 +115,8 @@ enum ieee80211_sta_info_flags {
#define HT_AGG_STATE_STOPPING 3
#define HT_AGG_STATE_WANT_START 4
#define HT_AGG_STATE_WANT_STOP 5
+#define HT_AGG_STATE_START_CB 6
+#define HT_AGG_STATE_STOP_CB 7
enum ieee80211_agg_stop_reason {
AGG_STOP_DECLINED,
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 72fe9bc7a1f9..7892bac21eac 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -472,11 +472,6 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
if (!skb)
return;
- if (dropped) {
- dev_kfree_skb_any(skb);
- return;
- }
-
if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) {
u64 cookie = IEEE80211_SKB_CB(skb)->ack.cookie;
struct ieee80211_sub_if_data *sdata;
@@ -498,6 +493,8 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
rcu_read_unlock();
dev_kfree_skb_any(skb);
+ } else if (dropped) {
+ dev_kfree_skb_any(skb);
} else {
/* consumes skb */
skb_complete_wifi_ack(skb, acked);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index f20dcf1b1830..c64ae68ae4f8 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -16,6 +16,7 @@
#include "ieee80211_i.h"
#include "driver-ops.h"
#include "rate.h"
+#include "wme.h"
/* give usermode some time for retries in setting up the TDLS session */
#define TDLS_PEER_SETUP_TIMEOUT (15 * HZ)
@@ -1019,14 +1020,13 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
switch (action_code) {
case WLAN_TDLS_SETUP_REQUEST:
case WLAN_TDLS_SETUP_RESPONSE:
- skb_set_queue_mapping(skb, IEEE80211_AC_BK);
- skb->priority = 2;
+ skb->priority = 256 + 2;
break;
default:
- skb_set_queue_mapping(skb, IEEE80211_AC_VI);
- skb->priority = 5;
+ skb->priority = 256 + 5;
break;
}
+ skb_set_queue_mapping(skb, ieee80211_select_queue(sdata, skb));
/*
* Set the WLAN_TDLS_TEARDOWN flag to indicate a teardown in progress.
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 84582998f65f..58fba4e569e6 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1833,7 +1833,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
if (invoke_tx_handlers_early(&tx))
- return false;
+ return true;
if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb))
return true;
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 9a14c237830f..b259a5814965 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -213,13 +213,13 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CIDR]) {
e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+ if (e.cidr[0] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
if (tb[IPSET_ATTR_CIDR2]) {
e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
- if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+ if (e.cidr[1] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
@@ -492,13 +492,13 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
if (tb[IPSET_ATTR_CIDR]) {
e.cidr[0] = nla_get_u8(tb[IPSET_ATTR_CIDR]);
- if (!e.cidr[0] || e.cidr[0] > HOST_MASK)
+ if (e.cidr[0] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
if (tb[IPSET_ATTR_CIDR2]) {
e.cidr[1] = nla_get_u8(tb[IPSET_ATTR_CIDR2]);
- if (!e.cidr[1] || e.cidr[1] > HOST_MASK)
+ if (e.cidr[1] > HOST_MASK)
return -IPSET_ERR_INVALID_CIDR;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index db3586ba1211..19b3f4fbea52 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -918,19 +918,22 @@ static unsigned int early_drop_list(struct net *net,
return drops;
}
-static noinline int early_drop(struct net *net, unsigned int _hash)
+static noinline int early_drop(struct net *net, unsigned int hash)
{
- unsigned int i;
+ unsigned int i, bucket;
for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
struct hlist_nulls_head *ct_hash;
- unsigned int hash, hsize, drops;
+ unsigned int hsize, drops;
rcu_read_lock();
nf_conntrack_get_ht(&ct_hash, &hsize);
- hash = reciprocal_scale(_hash++, hsize);
+ if (!i)
+ bucket = reciprocal_scale(hash, hsize);
+ else
+ bucket = (bucket + 1) % hsize;
- drops = early_drop_list(net, &ct_hash[hash]);
+ drops = early_drop_list(net, &ct_hash[bucket]);
rcu_read_unlock();
if (drops) {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 624d6e4dcd5c..51b0d832bd07 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -421,7 +421,7 @@ nf_nat_setup_info(struct nf_conn *ct,
else
ct->status |= IPS_DST_NAT;
- if (nfct_help(ct))
+ if (nfct_help(ct) && !nfct_seqadj(ct))
if (!nfct_seqadj_ext_add(ct))
return NF_DROP;
}
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index bb5d6a058fb7..921c9bd7e1e7 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -116,6 +116,22 @@ static void idletimer_tg_expired(unsigned long data)
schedule_work(&timer->work);
}
+static int idletimer_check_sysfs_name(const char *name, unsigned int size)
+{
+ int ret;
+
+ ret = xt_check_proc_name(name, size);
+ if (ret < 0)
+ return ret;
+
+ if (!strcmp(name, "power") ||
+ !strcmp(name, "subsystem") ||
+ !strcmp(name, "uevent"))
+ return -EINVAL;
+
+ return 0;
+}
+
static int idletimer_tg_create(struct idletimer_tg_info *info)
{
int ret;
@@ -126,6 +142,10 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
goto out;
}
+ ret = idletimer_check_sysfs_name(info->label, sizeof(info->label));
+ if (ret < 0)
+ goto out_free_timer;
+
sysfs_attr_init(&info->timer->attr.attr);
info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
if (!info->timer->attr.attr.name) {
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index a123d0dc1ef9..053ba8646155 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -787,7 +787,8 @@ static int netlbl_unlabel_addrinfo_get(struct genl_info *info,
{
u32 addr_len;
- if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) {
+ if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] &&
+ info->attrs[NLBL_UNLABEL_A_IPV4MASK]) {
addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]);
if (addr_len != sizeof(struct in_addr) &&
addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK]))
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 169156cfd4c8..96e61eab19bc 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -505,7 +505,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
ic->i_send_cq, ic->i_recv_cq);
- return ret;
+ goto out;
sends_out:
vfree(ic->i_sends);
@@ -530,6 +530,7 @@ send_cq_out:
ic->i_send_cq = NULL;
rds_ibdev_out:
rds_ib_remove_conn(rds_ibdev, conn);
+out:
rds_ib_dev_put(rds_ibdev);
return ret;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index f3ac85a285a2..a4380e182e6c 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -216,10 +216,11 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb,
/*
* Apply a hard ACK by advancing the Tx window.
*/
-static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
+static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
struct rxrpc_ack_summary *summary)
{
struct sk_buff *skb, *list = NULL;
+ bool rot_last = false;
int ix;
u8 annotation;
@@ -243,15 +244,17 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
skb->next = list;
list = skb;
- if (annotation & RXRPC_TX_ANNO_LAST)
+ if (annotation & RXRPC_TX_ANNO_LAST) {
set_bit(RXRPC_CALL_TX_LAST, &call->flags);
+ rot_last = true;
+ }
if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK)
summary->nr_rot_new_acks++;
}
spin_unlock(&call->lock);
- trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ?
+ trace_rxrpc_transmit(call, (rot_last ?
rxrpc_transmit_rotate_last :
rxrpc_transmit_rotate));
wake_up(&call->waitq);
@@ -262,6 +265,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
skb->next = NULL;
rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
}
+
+ return rot_last;
}
/*
@@ -332,11 +337,11 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call)
ktime_get_real());
}
- if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags))
- rxrpc_rotate_tx_window(call, top, &summary);
if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
- rxrpc_proto_abort("TXL", call, top);
- return false;
+ if (!rxrpc_rotate_tx_window(call, top, &summary)) {
+ rxrpc_proto_abort("TXL", call, top);
+ return false;
+ }
}
if (!rxrpc_end_tx_phase(call, true, "ETD"))
return false;
@@ -803,6 +808,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
rxrpc_propose_ack_respond_to_ack);
}
+ /* Discard any out-of-order or duplicate ACKs. */
+ if (before_eq(sp->hdr.serial, call->acks_latest)) {
+ _debug("discard ACK %d <= %d",
+ sp->hdr.serial, call->acks_latest);
+ return;
+ }
+ call->acks_latest_ts = skb->tstamp;
+ call->acks_latest = sp->hdr.serial;
+
+ /* Parse rwind and mtu sizes if provided. */
ioffset = offset + nr_acks + 3;
if (skb->len >= ioffset + sizeof(buf.info)) {
if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0)
@@ -824,23 +839,18 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
return;
}
- /* Discard any out-of-order or duplicate ACKs. */
- if (before_eq(sp->hdr.serial, call->acks_latest)) {
- _debug("discard ACK %d <= %d",
- sp->hdr.serial, call->acks_latest);
- return;
- }
- call->acks_latest_ts = skb->tstamp;
- call->acks_latest = sp->hdr.serial;
-
if (before(hard_ack, call->tx_hard_ack) ||
after(hard_ack, call->tx_top))
return rxrpc_proto_abort("AKW", call, 0);
if (nr_acks > call->tx_top - hard_ack)
return rxrpc_proto_abort("AKN", call, 0);
- if (after(hard_ack, call->tx_hard_ack))
- rxrpc_rotate_tx_window(call, hard_ack, &summary);
+ if (after(hard_ack, call->tx_hard_ack)) {
+ if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
+ rxrpc_end_tx_phase(call, false, "ETA");
+ return;
+ }
+ }
if (nr_acks > 0) {
if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0)
@@ -849,11 +859,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
&summary);
}
- if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
- rxrpc_end_tx_phase(call, false, "ETA");
- return;
- }
-
if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
RXRPC_TX_ANNO_LAST &&
summary.nr_acks == call->tx_top - hard_ack &&
@@ -875,8 +880,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
_proto("Rx ACKALL %%%u", sp->hdr.serial);
- rxrpc_rotate_tx_window(call, call->tx_top, &summary);
- if (test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+ if (rxrpc_rotate_tx_window(call, call->tx_top, &summary))
rxrpc_end_tx_phase(call, false, "ETL");
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 44941e25f3ad..729c0e4eca21 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -411,7 +411,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
if (tb[TCA_GRED_LIMIT] != NULL)
sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
- return gred_change_table_def(sch, opt);
+ return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
}
if (tb[TCA_GRED_PARMS] == NULL ||
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 738c55e994c4..7e127cde1ccc 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -488,8 +488,9 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
void sctp_assoc_rm_peer(struct sctp_association *asoc,
struct sctp_transport *peer)
{
- struct list_head *pos;
- struct sctp_transport *transport;
+ struct sctp_transport *transport;
+ struct list_head *pos;
+ struct sctp_chunk *ch;
pr_debug("%s: association:%p addr:%pISpc\n",
__func__, asoc, &peer->ipaddr.sa);
@@ -547,7 +548,6 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
*/
if (!list_empty(&peer->transmitted)) {
struct sctp_transport *active = asoc->peer.active_path;
- struct sctp_chunk *ch;
/* Reset the transport of each chunk on this list */
list_for_each_entry(ch, &peer->transmitted,
@@ -569,6 +569,10 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
sctp_transport_hold(active);
}
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list)
+ if (ch->transport == peer)
+ ch->transport = NULL;
+
asoc->peer.transport_count--;
sctp_transport_free(peer);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 64d2d9ea2f8c..93e60068800b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -185,13 +185,13 @@ static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
list_for_each_entry(chunk, &t->transmitted, transmitted_list)
cb(chunk);
- list_for_each_entry(chunk, &q->retransmit, list)
+ list_for_each_entry(chunk, &q->retransmit, transmitted_list)
cb(chunk);
- list_for_each_entry(chunk, &q->sacked, list)
+ list_for_each_entry(chunk, &q->sacked, transmitted_list)
cb(chunk);
- list_for_each_entry(chunk, &q->abandoned, list)
+ list_for_each_entry(chunk, &q->abandoned, transmitted_list)
cb(chunk);
list_for_each_entry(chunk, &q->out_chunk_list, list)
@@ -248,11 +248,10 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id)
spin_lock_bh(&sctp_assocs_id_lock);
asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id);
+ if (asoc && (asoc->base.sk != sk || asoc->base.dead))
+ asoc = NULL;
spin_unlock_bh(&sctp_assocs_id_lock);
- if (!asoc || (asoc->base.sk != sk) || asoc->base.dead)
- return NULL;
-
return asoc;
}
@@ -3733,32 +3732,16 @@ static int sctp_setsockopt_pr_supported(struct sock *sk,
unsigned int optlen)
{
struct sctp_assoc_value params;
- struct sctp_association *asoc;
- int retval = -EINVAL;
if (optlen != sizeof(params))
- goto out;
-
- if (copy_from_user(&params, optval, optlen)) {
- retval = -EFAULT;
- goto out;
- }
-
- asoc = sctp_id2assoc(sk, params.assoc_id);
- if (asoc) {
- asoc->prsctp_enable = !!params.assoc_value;
- } else if (!params.assoc_id) {
- struct sctp_sock *sp = sctp_sk(sk);
+ return -EINVAL;
- sp->ep->prsctp_enable = !!params.assoc_value;
- } else {
- goto out;
- }
+ if (copy_from_user(&params, optval, optlen))
+ return -EFAULT;
- retval = 0;
+ sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value;
-out:
- return retval;
+ return 0;
}
static int sctp_setsockopt_default_prinfo(struct sock *sk,
diff --git a/net/socket.c b/net/socket.c
index 35fa349ba274..d9e2989c10c4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2774,9 +2774,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
copy_in_user(&rxnfc->fs.ring_cookie,
&compat_rxnfc->fs.ring_cookie,
(void __user *)(&rxnfc->fs.location + 1) -
- (void __user *)&rxnfc->fs.ring_cookie) ||
- copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt,
- sizeof(rxnfc->rule_cnt)))
+ (void __user *)&rxnfc->fs.ring_cookie))
+ return -EFAULT;
+ if (ethcmd == ETHTOOL_GRXCLSRLALL) {
+ if (put_user(rule_cnt, &rxnfc->rule_cnt))
+ return -EFAULT;
+ } else if (copy_in_user(&rxnfc->rule_cnt,
+ &compat_rxnfc->rule_cnt,
+ sizeof(rxnfc->rule_cnt)))
return -EFAULT;
}
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index f1df9837f1ac..1ac08dcbf85d 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -281,13 +281,7 @@ static bool generic_key_to_expire(struct rpc_cred *cred)
{
struct auth_cred *acred = &container_of(cred, struct generic_cred,
gc_base)->acred;
- bool ret;
-
- get_rpccred(cred);
- ret = test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
- put_rpccred(cred);
-
- return ret;
+ return test_bit(RPC_CRED_KEY_EXPIRE_SOON, &acred->ac_flags);
}
static const struct rpc_credops generic_credops = {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 9c9db55a0c1e..064f20bb845a 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1038,7 +1038,7 @@ static void call_xpt_users(struct svc_xprt *xprt)
spin_lock(&xprt->xpt_lock);
while (!list_empty(&xprt->xpt_users)) {
u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list);
- list_del(&u->list);
+ list_del_init(&u->list);
u->callback(u);
}
spin_unlock(&xprt->xpt_lock);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 7f1071e103ca..69846c6574ef 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -512,7 +512,7 @@ EXPORT_SYMBOL_GPL(xdr_commit_encode);
static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
size_t nbytes)
{
- static __be32 *p;
+ __be32 *p;
int space_left;
int frag1bytes, frag2bytes;
@@ -639,11 +639,10 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
WARN_ON_ONCE(xdr->iov);
return;
}
- if (fraglen) {
+ if (fraglen)
xdr->end = head->iov_base + head->iov_len;
- xdr->page_ptr--;
- }
/* (otherwise assume xdr->end is already set) */
+ xdr->page_ptr--;
head->iov_len = len;
buf->len = len;
xdr->p = head->iov_base + head->iov_len;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 25bc5c30d7fb..9d3f047305ce 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2277,8 +2277,8 @@ void tipc_sk_reinit(struct net *net)
do {
tsk = ERR_PTR(rhashtable_walk_start(&iter));
- if (tsk)
- continue;
+ if (IS_ERR(tsk))
+ goto walk_stop;
while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
spin_lock_bh(&tsk->sk.sk_lock.slock);
@@ -2287,7 +2287,7 @@ void tipc_sk_reinit(struct net *net)
msg_set_orignode(msg, tn->own_addr);
spin_unlock_bh(&tsk->sk.sk_lock.slock);
}
-
+walk_stop:
rhashtable_walk_stop(&iter);
} while (tsk == ERR_PTR(-EAGAIN));
}
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 271cd66e4b3b..d62affeb2a38 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -256,7 +256,9 @@ static void tipc_subscrp_delete(struct tipc_subscription *sub)
static void tipc_subscrp_cancel(struct tipc_subscr *s,
struct tipc_subscriber *subscriber)
{
+ tipc_subscrb_get(subscriber);
tipc_subscrb_subscrp_delete(subscriber, s);
+ tipc_subscrb_put(subscriber);
}
static struct tipc_subscription *tipc_subscrp_create(struct net *net,
@@ -387,7 +389,7 @@ int tipc_topsrv_start(struct net *net)
topsrv->tipc_conn_new = tipc_subscrb_connect_cb;
topsrv->tipc_conn_release = tipc_subscrb_release_cb;
- strncpy(topsrv->name, name, strlen(name) + 1);
+ strscpy(topsrv->name, name, sizeof(topsrv->name));
tn->topsrv = topsrv;
atomic_set(&tn->subscription_count, 0);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6afac189d20f..549d0a4083b3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3422,6 +3422,7 @@ static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband,
return false;
/* check availability */
+ ridx = array_index_nospec(ridx, IEEE80211_HT_MCS_MASK_LEN);
if (sband->ht_cap.mcs.rx_mask[ridx] & rbit)
mcs[ridx] |= rbit;
else
@@ -11148,6 +11149,7 @@ static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
if (!info->attrs[NL80211_ATTR_MDID] ||
+ !info->attrs[NL80211_ATTR_IE] ||
!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
return -EINVAL;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 11235e2e4d5d..608f6eed9f04 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2298,6 +2298,7 @@ static int regulatory_hint_core(const char *alpha2)
request->alpha2[0] = alpha2[0];
request->alpha2[1] = alpha2[1];
request->initiator = NL80211_REGDOM_SET_BY_CORE;
+ request->wiphy_idx = WIPHY_IDX_INVALID;
queue_regulatory_request(request);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 35ad69fd0838..435f904c1be5 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -978,13 +978,23 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
return NULL;
}
+/*
+ * Update RX channel information based on the available frame payload
+ * information. This is mainly for the 2.4 GHz band where frames can be received
+ * from neighboring channels and the Beacon frames use the DSSS Parameter Set
+ * element to indicate the current (transmitting) channel, but this might also
+ * be needed on other bands if RX frequency does not match with the actual
+ * operating channel of a BSS.
+ */
static struct ieee80211_channel *
cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
- struct ieee80211_channel *channel)
+ struct ieee80211_channel *channel,
+ enum nl80211_bss_scan_width scan_width)
{
const u8 *tmp;
u32 freq;
int channel_number = -1;
+ struct ieee80211_channel *alt_channel;
tmp = cfg80211_find_ie(WLAN_EID_DS_PARAMS, ie, ielen);
if (tmp && tmp[1] == 1) {
@@ -998,16 +1008,45 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
}
}
- if (channel_number < 0)
+ if (channel_number < 0) {
+ /* No channel information in frame payload */
return channel;
+ }
freq = ieee80211_channel_to_frequency(channel_number, channel->band);
- channel = ieee80211_get_channel(wiphy, freq);
- if (!channel)
- return NULL;
- if (channel->flags & IEEE80211_CHAN_DISABLED)
+ alt_channel = ieee80211_get_channel(wiphy, freq);
+ if (!alt_channel) {
+ if (channel->band == NL80211_BAND_2GHZ) {
+ /*
+ * Better not allow unexpected channels when that could
+ * be going beyond the 1-11 range (e.g., discovering
+ * BSS on channel 12 when radio is configured for
+ * channel 11.
+ */
+ return NULL;
+ }
+
+ /* No match for the payload channel number - ignore it */
+ return channel;
+ }
+
+ if (scan_width == NL80211_BSS_CHAN_WIDTH_10 ||
+ scan_width == NL80211_BSS_CHAN_WIDTH_5) {
+ /*
+ * Ignore channel number in 5 and 10 MHz channels where there
+ * may not be an n:1 or 1:n mapping between frequencies and
+ * channel numbers.
+ */
+ return channel;
+ }
+
+ /*
+ * Use the channel determined through the payload channel number
+ * instead of the RX channel reported by the driver.
+ */
+ if (alt_channel->flags & IEEE80211_CHAN_DISABLED)
return NULL;
- return channel;
+ return alt_channel;
}
/* Returned bss is reference counted and must be cleaned up appropriately. */
@@ -1032,7 +1071,8 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
(data->signal < 0 || data->signal > 100)))
return NULL;
- channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan);
+ channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan,
+ data->scan_width);
if (!channel)
return NULL;
@@ -1130,7 +1170,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
return NULL;
channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable,
- ielen, data->chan);
+ ielen, data->chan, data->scan_width);
if (!channel)
return NULL;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index e04919d67429..42fde4a72516 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1435,7 +1435,7 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
u8 *op_class)
{
u8 vht_opclass;
- u16 freq = chandef->center_freq1;
+ u32 freq = chandef->center_freq1;
if (freq >= 2412 && freq <= 2472) {
if (chandef->width > NL80211_CHAN_WIDTH_40)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d0dcfc68c043..155b1591b17a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -659,9 +659,9 @@ static void xfrm_hash_rebuild(struct work_struct *work)
break;
}
if (newpos)
- hlist_add_behind(&policy->bydst, newpos);
+ hlist_add_behind_rcu(&policy->bydst, newpos);
else
- hlist_add_head(&policy->bydst, chain);
+ hlist_add_head_rcu(&policy->bydst, chain);
}
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
@@ -800,9 +800,9 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
break;
}
if (newpos)
- hlist_add_behind(&policy->bydst, newpos);
+ hlist_add_behind_rcu(&policy->bydst, newpos);
else
- hlist_add_head(&policy->bydst, chain);
+ hlist_add_head_rcu(&policy->bydst, chain);
__xfrm_policy_link(policy, dir);
atomic_inc(&net->xfrm.flow_cache_genid);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6e768093d7c8..026770884d46 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -151,10 +151,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
err = -EINVAL;
switch (p->family) {
case AF_INET:
+ if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32)
+ goto out;
+
break;
case AF_INET6:
#if IS_ENABLED(CONFIG_IPV6)
+ if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128)
+ goto out;
+
break;
#else
err = -EAFNOSUPPORT;
@@ -1316,10 +1322,16 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
switch (p->sel.family) {
case AF_INET:
+ if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32)
+ return -EINVAL;
+
break;
case AF_INET6:
#if IS_ENABLED(CONFIG_IPV6)
+ if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128)
+ return -EINVAL;
+
break;
#else
return -EAFNOSUPPORT;
@@ -1400,6 +1412,9 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
(ut[i].family != prev_family))
return -EINVAL;
+ if (ut[i].mode >= XFRM_MODE_MAX)
+ return -EINVAL;
+
prev_family = ut[i].family;
switch (ut[i].family) {