summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c9
-rw-r--r--net/9p/client.c7
-rw-r--r--net/9p/trans_virtio.c13
-rw-r--r--net/bluetooth/bnep/core.c11
-rw-r--r--net/bluetooth/cmtp/core.c17
-rw-r--r--net/bluetooth/hci_sock.c3
-rw-r--r--net/bluetooth/hidp/core.c33
-rw-r--r--net/bluetooth/l2cap_core.c80
-rw-r--r--net/bluetooth/smp.c35
-rw-r--r--net/bridge/br_device.c3
-rw-r--r--net/bridge/br_forward.c3
-rw-r--r--net/bridge/br_input.c1
-rw-r--r--net/bridge/br_mdb.c3
-rw-r--r--net/bridge/br_netfilter_hooks.c21
-rw-r--r--net/bridge/br_netlink.c14
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/bridge/br_stp_timer.c2
-rw-r--r--net/caif/cfpkt_skbuff.c6
-rw-r--r--net/ceph/crypto.c4
-rw-r--r--net/ceph/messenger.c6
-rw-r--r--net/ceph/osd_client.c3
-rw-r--r--net/ceph/osdmap.c1
-rw-r--r--net/compat.c17
-rw-r--r--net/core/datagram.c36
-rw-r--r--net/core/dev.c99
-rw-r--r--net/core/dev_ioctl.c1
-rw-r--r--net/core/dst.c37
-rw-r--r--net/core/ethtool.c11
-rw-r--r--net/core/filter.c1
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/netclassid_cgroup.c32
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/rtnetlink.c49
-rw-r--r--net/core/skbuff.c50
-rw-r--r--net/core/sock.c44
-rw-r--r--net/core/sock_reuseport.c12
-rw-r--r--net/dccp/ccids/ccid2.c1
-rw-r--r--net/dccp/feat.c7
-rw-r--r--net/dccp/input.c13
-rw-r--r--net/dccp/ipv4.c17
-rw-r--r--net/dccp/ipv6.c15
-rw-r--r--net/dccp/minisocks.c25
-rw-r--r--net/dccp/proto.c19
-rw-r--r--net/decnet/dn_route.c14
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c4
-rw-r--r--net/dns_resolver/dns_key.c2
-rw-r--r--net/dsa/Kconfig5
-rw-r--r--net/dsa/slave.c34
-rw-r--r--net/ieee802154/6lowpan/reassembly.c11
-rw-r--r--net/ipv4/af_inet.c13
-rw-r--r--net/ipv4/ah4.c3
-rw-r--r--net/ipv4/arp.c12
-rw-r--r--net/ipv4/cipso_ipv4.c24
-rw-r--r--net/ipv4/fib_frontend.c27
-rw-r--r--net/ipv4/fib_semantics.c29
-rw-r--r--net/ipv4/fib_trie.c26
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/igmp.c28
-rw-r--r--net/ipv4/inet_connection_sock.c11
-rw-r--r--net/ipv4/inet_fragment.c4
-rw-r--r--net/ipv4/inet_hashtables.c5
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv4/ip_output.c7
-rw-r--r--net/ipv4/ip_sockglue.c24
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/ip_vti.c3
-rw-r--r--net/ipv4/ipip.c59
-rw-r--r--net/ipv4/netfilter.c7
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c1
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/ping.c5
-rw-r--r--net/ipv4/raw.c3
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c2
-rw-r--r--net/ipv4/tcp.c32
-rw-r--r--net/ipv4/tcp_bbr.c49
-rw-r--r--net/ipv4/tcp_cong.c12
-rw-r--r--net/ipv4/tcp_input.c32
-rw-r--r--net/ipv4/tcp_ipv4.c31
-rw-r--r--net/ipv4/tcp_lp.c6
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_nv.c2
-rw-r--r--net/ipv4/tcp_output.c67
-rw-r--r--net/ipv4/tcp_probe.c4
-rw-r--r--net/ipv4/tcp_timer.c9
-rw-r--r--net/ipv4/udp.c7
-rw-r--r--net/ipv4/udp_offload.c7
-rw-r--r--net/ipv6/addrconf.c81
-rw-r--r--net/ipv6/calipso.c6
-rw-r--r--net/ipv6/datagram.c24
-rw-r--r--net/ipv6/fib6_rules.c22
-rw-r--r--net/ipv6/ila/ila_xlat.c1
-rw-r--r--net/ipv6/ip6_fib.c92
-rw-r--r--net/ipv6/ip6_flowlabel.c1
-rw-r--r--net/ipv6/ip6_gre.c62
-rw-r--r--net/ipv6/ip6_offload.c15
-rw-r--r--net/ipv6/ip6_output.c47
-rw-r--r--net/ipv6/ip6_tunnel.c65
-rw-r--r--net/ipv6/ip6_vti.c7
-rw-r--r--net/ipv6/ip6mr.c13
-rw-r--r--net/ipv6/ipv6_sockglue.c16
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c13
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c3
-rw-r--r--net/ipv6/output_core.c18
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/proc.c2
-rw-r--r--net/ipv6/raw.c7
-rw-r--r--net/ipv6/reassembly.c12
-rw-r--r--net/ipv6/route.c74
-rw-r--r--net/ipv6/syncookies.c1
-rw-r--r--net/ipv6/tcp_ipv6.c21
-rw-r--r--net/ipv6/udp.c7
-rw-r--r--net/ipv6/udp_offload.c8
-rw-r--r--net/ipv6/xfrm6_mode_ro.c2
-rw-r--r--net/ipv6/xfrm6_mode_transport.c2
-rw-r--r--net/ipx/af_ipx.c5
-rw-r--r--net/irda/af_irda.c2
-rw-r--r--net/irda/irqueue.c34
-rw-r--r--net/kcm/kcmsock.c56
-rw-r--r--net/key/af_key.c158
-rw-r--r--net/l2tp/l2tp_core.c174
-rw-r--r--net/l2tp/l2tp_core.h14
-rw-r--r--net/l2tp/l2tp_debugfs.c10
-rw-r--r--net/l2tp/l2tp_eth.c10
-rw-r--r--net/l2tp/l2tp_ip.c24
-rw-r--r--net/l2tp/l2tp_ip6.c27
-rw-r--r--net/l2tp/l2tp_netlink.c52
-rw-r--r--net/l2tp/l2tp_ppp.c97
-rw-r--r--net/llc/llc_conn.c3
-rw-r--r--net/llc/llc_sap.c3
-rw-r--r--net/mac80211/agg-rx.c1
-rw-r--r--net/mac80211/cfg.c2
-rw-r--r--net/mac80211/ibss.c6
-rw-r--r--net/mac80211/ieee80211_i.h1
-rw-r--r--net/mac80211/iface.c41
-rw-r--r--net/mac80211/key.c54
-rw-r--r--net/mac80211/main.c13
-rw-r--r--net/mac80211/mesh.c3
-rw-r--r--net/mac80211/mesh_plink.c14
-rw-r--r--net/mac80211/mesh_sync.c11
-rw-r--r--net/mac80211/offchannel.c2
-rw-r--r--net/mac80211/pm.c1
-rw-r--r--net/mac80211/rx.c135
-rw-r--r--net/mac80211/sta_info.c4
-rw-r--r--net/mac80211/sta_info.h6
-rw-r--r--net/mac80211/status.c3
-rw-r--r--net/mac80211/tx.c36
-rw-r--r--net/mac80211/vht.c4
-rw-r--r--net/mac80211/wpa.c9
-rw-r--r--net/mpls/af_mpls.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c19
-rw-r--r--net/netfilter/nf_conntrack_core.c98
-rw-r--r--net/netfilter/nf_conntrack_ecache.c2
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nf_conntrack_extend.c13
-rw-r--r--net/netfilter/nf_conntrack_helper.c39
-rw-r--r--net/netfilter/nf_conntrack_netlink.c5
-rw-r--r--net/netfilter/nf_conntrack_sip.c2
-rw-r--r--net/netfilter/nf_log.c1
-rw-r--r--net/netfilter/nf_nat_core.c151
-rw-r--r--net/netfilter/nf_tables_api.c18
-rw-r--r--net/netfilter/nfnetlink_cthelper.c20
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c2
-rw-r--r--net/netfilter/nft_log.c3
-rw-r--r--net/netfilter/nft_meta.c28
-rw-r--r--net/netfilter/nft_queue.c2
-rw-r--r--net/netfilter/nft_set_rbtree.c22
-rw-r--r--net/netfilter/xt_TCPMSS.c6
-rw-r--r--net/netlink/af_netlink.c27
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/nfc/core.c31
-rw-r--r--net/nfc/llcp_sock.c9
-rw-r--r--net/nfc/nci/core.c3
-rw-r--r--net/nfc/netlink.c4
-rw-r--r--net/openvswitch/actions.c1
-rw-r--r--net/openvswitch/conntrack.c8
-rw-r--r--net/openvswitch/datapath.c7
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/openvswitch/flow_netlink.c4
-rw-r--r--net/packet/af_packet.c173
-rw-r--r--net/rds/ib_cm.c47
-rw-r--r--net/rds/ib_frmr.c5
-rw-r--r--net/rds/ib_send.c25
-rw-r--r--net/rds/rdma.c19
-rw-r--r--net/rds/rds.h4
-rw-r--r--net/rds/send.c46
-rw-r--r--net/rds/tcp_listen.c2
-rw-r--r--net/rxrpc/key.c64
-rw-r--r--net/sched/act_api.c11
-rw-r--r--net/sched/act_connmark.c3
-rw-r--r--net/sched/act_ipt.c4
-rw-r--r--net/sched/act_skbmod.c1
-rw-r--r--net/sched/cls_matchall.c1
-rw-r--r--net/sched/sch_api.c8
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sched/sch_hhf.c8
-rw-r--r--net/sched/sch_mq.c10
-rw-r--r--net/sched/sch_mqprio.c19
-rw-r--r--net/sched/sch_sfq.c8
-rw-r--r--net/sctp/input.c18
-rw-r--r--net/sctp/ipv6.c62
-rw-r--r--net/sctp/offload.c2
-rw-r--r--net/sctp/sctp_diag.c7
-rw-r--r--net/sctp/socket.c65
-rw-r--r--net/sctp/ulpqueue.c3
-rw-r--r--net/socket.c4
-rw-r--r--net/strparser/strparser.c1
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c3
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/svc.c148
-rw-r--r--net/sunrpc/svcsock.c22
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c62
-rw-r--r--net/sunrpc/xprtrdma/transport.c2
-rw-r--r--net/sunrpc/xprtrdma/verbs.c16
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h3
-rw-r--r--net/tipc/discover.c4
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/msg.c20
-rw-r--r--net/tipc/msg.h2
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/net.c4
-rw-r--r--net/tipc/netlink_compat.c6
-rw-r--r--net/tipc/node.c9
-rw-r--r--net/tipc/server.c32
-rw-r--r--net/tipc/socket.c30
-rw-r--r--net/tipc/subscr.c124
-rw-r--r--net/tipc/subscr.h1
-rw-r--r--net/unix/af_unix.c7
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/unix/garbage.c17
-rw-r--r--net/vmw_vsock/af_vsock.c21
-rw-r--r--net/wireless/nl80211.c158
-rw-r--r--net/wireless/sme.c50
-rw-r--r--net/wireless/sysfs.c10
-rw-r--r--net/wireless/util.c10
-rw-r--r--net/xfrm/xfrm_policy.c72
-rw-r--r--net/xfrm/xfrm_user.c34
238 files changed, 3215 insertions, 1896 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index f2531ad66b68..4a47074d1d7f 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -277,7 +277,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
return 0;
out_free_newdev:
- free_netdev(new_dev);
+ if (new_dev->reg_state == NETREG_UNINITIALIZED)
+ free_netdev(new_dev);
return err;
}
@@ -375,6 +376,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
dev->name);
vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
}
+ if (event == NETDEV_DOWN &&
+ (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
vlan_info = rtnl_dereference(dev->vlan_info);
if (!vlan_info)
@@ -422,9 +426,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
struct net_device *tmp;
LIST_HEAD(close_list);
- if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
- vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
-
/* Put all VLANs for this dev in the down state too. */
vlan_group_for_each_dev(grp, i, vlandev) {
flgs = vlandev->flags;
diff --git a/net/9p/client.c b/net/9p/client.c
index 3fc94a49ccd5..1fd60190177e 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -749,8 +749,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
}
again:
/* Wait for the response */
- err = wait_event_interruptible(*req->wq,
- req->status >= REQ_STATUS_RCVD);
+ err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD);
/*
* Make sure our req is coherent with regard to updates in other
@@ -2101,6 +2100,10 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
trace_9p_protocol_dump(clnt, req->rc);
goto free_and_error;
}
+ if (rsize < count) {
+ pr_err("bogus RREADDIR count (%d > %d)\n", count, rsize);
+ count = rsize;
+ }
p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index f24b25c25106..f3a4efcf1456 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -286,8 +286,8 @@ req_retry:
if (err == -ENOSPC) {
chan->ring_bufs_avail = 0;
spin_unlock_irqrestore(&chan->lock, flags);
- err = wait_event_interruptible(*chan->vc_wq,
- chan->ring_bufs_avail);
+ err = wait_event_killable(*chan->vc_wq,
+ chan->ring_bufs_avail);
if (err == -ERESTARTSYS)
return err;
@@ -327,7 +327,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
* Other zc request to finish here
*/
if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
- err = wait_event_interruptible(vp_wq,
+ err = wait_event_killable(vp_wq,
(atomic_read(&vp_pinned) < chan->p9_max_pages));
if (err == -ERESTARTSYS)
return err;
@@ -471,8 +471,8 @@ req_retry_pinned:
if (err == -ENOSPC) {
chan->ring_bufs_avail = 0;
spin_unlock_irqrestore(&chan->lock, flags);
- err = wait_event_interruptible(*chan->vc_wq,
- chan->ring_bufs_avail);
+ err = wait_event_killable(*chan->vc_wq,
+ chan->ring_bufs_avail);
if (err == -ERESTARTSYS)
goto err_out;
@@ -489,8 +489,7 @@ req_retry_pinned:
virtqueue_kick(chan->vq);
spin_unlock_irqrestore(&chan->lock, flags);
p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
- err = wait_event_interruptible(*req->wq,
- req->status >= REQ_STATUS_RCVD);
+ err = wait_event_killable(*req->wq, req->status >= REQ_STATUS_RCVD);
/*
* Non kernel buffers are pinned, unpin them
*/
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index fbf251fef70f..4d6b94d7ce5f 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -484,16 +484,16 @@ static int bnep_session(void *arg)
struct net_device *dev = s->dev;
struct sock *sk = s->sock->sk;
struct sk_buff *skb;
- wait_queue_t wait;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
BT_DBG("");
set_user_nice(current, -15);
- init_waitqueue_entry(&wait, current);
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
+ /* Ensure session->terminate is updated */
+ smp_mb__before_atomic();
if (atomic_read(&s->terminate))
break;
@@ -515,9 +515,8 @@ static int bnep_session(void *arg)
break;
netif_wake_queue(dev);
- schedule();
+ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
- __set_current_state(TASK_RUNNING);
remove_wait_queue(sk_sleep(sk), &wait);
/* Cleanup session */
@@ -666,7 +665,7 @@ int bnep_del_connection(struct bnep_conndel_req *req)
s = __bnep_get_session(req->dst);
if (s) {
atomic_inc(&s->terminate);
- wake_up_process(s->task);
+ wake_up_interruptible(sk_sleep(s->sock->sk));
} else
err = -ENOENT;
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 9e59b6654126..1152ce34dad4 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -280,16 +280,16 @@ static int cmtp_session(void *arg)
struct cmtp_session *session = arg;
struct sock *sk = session->sock->sk;
struct sk_buff *skb;
- wait_queue_t wait;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
BT_DBG("session %p", session);
set_user_nice(current, -15);
- init_waitqueue_entry(&wait, current);
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
- set_current_state(TASK_INTERRUPTIBLE);
+ /* Ensure session->terminate is updated */
+ smp_mb__before_atomic();
if (atomic_read(&session->terminate))
break;
@@ -306,9 +306,8 @@ static int cmtp_session(void *arg)
cmtp_process_transmit(session);
- schedule();
+ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
- __set_current_state(TASK_RUNNING);
remove_wait_queue(sk_sleep(sk), &wait);
down_write(&cmtp_session_sem);
@@ -393,7 +392,7 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
err = cmtp_attach_device(session);
if (err < 0) {
atomic_inc(&session->terminate);
- wake_up_process(session->task);
+ wake_up_interruptible(sk_sleep(session->sock->sk));
up_write(&cmtp_session_sem);
return err;
}
@@ -431,7 +430,11 @@ int cmtp_del_connection(struct cmtp_conndel_req *req)
/* Stop session thread */
atomic_inc(&session->terminate);
- wake_up_process(session->task);
+
+ /* Ensure session->terminate is updated */
+ smp_mb__after_atomic();
+
+ wake_up_interruptible(sk_sleep(session->sock->sk));
} else
err = -ENOENT;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 48f9471e7c85..c88a6007e643 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1680,7 +1680,8 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
- if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE))
+ if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_NOSIGNAL|MSG_ERRQUEUE|
+ MSG_CMSG_COMPAT))
return -EINVAL;
if (len < 4 || len > HCI_MAX_FRAME_SIZE)
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 0bec4588c3c8..1fc076420d1e 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -36,6 +36,7 @@
#define VERSION "1.2"
static DECLARE_RWSEM(hidp_session_sem);
+static DECLARE_WAIT_QUEUE_HEAD(hidp_session_wq);
static LIST_HEAD(hidp_session_list);
static unsigned char hidp_keycode[256] = {
@@ -1068,12 +1069,12 @@ static int hidp_session_start_sync(struct hidp_session *session)
* Wake up session thread and notify it to stop. This is asynchronous and
* returns immediately. Call this whenever a runtime error occurs and you want
* the session to stop.
- * Note: wake_up_process() performs any necessary memory-barriers for us.
+ * Note: wake_up_interruptible() performs any necessary memory-barriers for us.
*/
static void hidp_session_terminate(struct hidp_session *session)
{
atomic_inc(&session->terminate);
- wake_up_process(session->task);
+ wake_up_interruptible(&hidp_session_wq);
}
/*
@@ -1180,7 +1181,9 @@ static void hidp_session_run(struct hidp_session *session)
struct sock *ctrl_sk = session->ctrl_sock->sk;
struct sock *intr_sk = session->intr_sock->sk;
struct sk_buff *skb;
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ add_wait_queue(&hidp_session_wq, &wait);
for (;;) {
/*
* This thread can be woken up two ways:
@@ -1188,12 +1191,10 @@ static void hidp_session_run(struct hidp_session *session)
* session->terminate flag and wakes this thread up.
* - Via modifying the socket state of ctrl/intr_sock. This
* thread is woken up by ->sk_state_changed().
- *
- * Note: set_current_state() performs any necessary
- * memory-barriers for us.
*/
- set_current_state(TASK_INTERRUPTIBLE);
+ /* Ensure session->terminate is updated */
+ smp_mb__before_atomic();
if (atomic_read(&session->terminate))
break;
@@ -1227,11 +1228,22 @@ static void hidp_session_run(struct hidp_session *session)
hidp_process_transmit(session, &session->ctrl_transmit,
session->ctrl_sock);
- schedule();
+ wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
}
+ remove_wait_queue(&hidp_session_wq, &wait);
atomic_inc(&session->terminate);
- set_current_state(TASK_RUNNING);
+
+ /* Ensure session->terminate is updated */
+ smp_mb__after_atomic();
+}
+
+static int hidp_session_wake_function(wait_queue_t *wait,
+ unsigned int mode,
+ int sync, void *key)
+{
+ wake_up_interruptible(&hidp_session_wq);
+ return false;
}
/*
@@ -1244,7 +1256,8 @@ static void hidp_session_run(struct hidp_session *session)
static int hidp_session_thread(void *arg)
{
struct hidp_session *session = arg;
- wait_queue_t ctrl_wait, intr_wait;
+ DEFINE_WAIT_FUNC(ctrl_wait, hidp_session_wake_function);
+ DEFINE_WAIT_FUNC(intr_wait, hidp_session_wake_function);
BT_DBG("session %p", session);
@@ -1254,8 +1267,6 @@ static int hidp_session_thread(void *arg)
set_user_nice(current, -15);
hidp_set_timer(session);
- init_waitqueue_entry(&ctrl_wait, current);
- init_waitqueue_entry(&intr_wait, current);
add_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait);
add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait);
/* This memory barrier is paired with wq_has_sleeper(). See
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 577f1c01454a..ffd09c1675d4 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -58,7 +58,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
u8 code, u8 ident, u16 dlen, void *data);
static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len,
void *data);
-static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data);
+static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size);
static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err);
static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
@@ -1473,7 +1473,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
set_bit(CONF_REQ_SENT, &chan->conf_state);
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf), buf);
+ l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
chan->num_conf_req++;
}
@@ -2977,12 +2977,15 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen,
return len;
}
-static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
+static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val, size_t size)
{
struct l2cap_conf_opt *opt = *ptr;
BT_DBG("type 0x%2.2x len %u val 0x%lx", type, len, val);
+ if (size < L2CAP_CONF_OPT_SIZE + len)
+ return;
+
opt->type = type;
opt->len = len;
@@ -3007,7 +3010,7 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
*ptr += L2CAP_CONF_OPT_SIZE + len;
}
-static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan)
+static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan, size_t size)
{
struct l2cap_conf_efs efs;
@@ -3035,7 +3038,7 @@ static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan)
}
l2cap_add_conf_opt(ptr, L2CAP_CONF_EFS, sizeof(efs),
- (unsigned long) &efs);
+ (unsigned long) &efs, size);
}
static void l2cap_ack_timeout(struct work_struct *work)
@@ -3181,11 +3184,12 @@ static inline void l2cap_txwin_setup(struct l2cap_chan *chan)
chan->ack_win = chan->tx_win;
}
-static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data)
+static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size)
{
struct l2cap_conf_req *req = data;
struct l2cap_conf_rfc rfc = { .mode = chan->mode };
void *ptr = req->data;
+ void *endptr = data + data_size;
u16 size;
BT_DBG("chan %p", chan);
@@ -3210,7 +3214,7 @@ static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data)
done:
if (chan->imtu != L2CAP_DEFAULT_MTU)
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr);
switch (chan->mode) {
case L2CAP_MODE_BASIC:
@@ -3229,7 +3233,7 @@ done:
rfc.max_pdu_size = 0;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
- (unsigned long) &rfc);
+ (unsigned long) &rfc, endptr - ptr);
break;
case L2CAP_MODE_ERTM:
@@ -3249,21 +3253,21 @@ done:
L2CAP_DEFAULT_TX_WINDOW);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
- (unsigned long) &rfc);
+ (unsigned long) &rfc, endptr - ptr);
if (test_bit(FLAG_EFS_ENABLE, &chan->flags))
- l2cap_add_opt_efs(&ptr, chan);
+ l2cap_add_opt_efs(&ptr, chan, endptr - ptr);
if (test_bit(FLAG_EXT_CTRL, &chan->flags))
l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2,
- chan->tx_win);
+ chan->tx_win, endptr - ptr);
if (chan->conn->feat_mask & L2CAP_FEAT_FCS)
if (chan->fcs == L2CAP_FCS_NONE ||
test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) {
chan->fcs = L2CAP_FCS_NONE;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1,
- chan->fcs);
+ chan->fcs, endptr - ptr);
}
break;
@@ -3281,17 +3285,17 @@ done:
rfc.max_pdu_size = cpu_to_le16(size);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
- (unsigned long) &rfc);
+ (unsigned long) &rfc, endptr - ptr);
if (test_bit(FLAG_EFS_ENABLE, &chan->flags))
- l2cap_add_opt_efs(&ptr, chan);
+ l2cap_add_opt_efs(&ptr, chan, endptr - ptr);
if (chan->conn->feat_mask & L2CAP_FEAT_FCS)
if (chan->fcs == L2CAP_FCS_NONE ||
test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) {
chan->fcs = L2CAP_FCS_NONE;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1,
- chan->fcs);
+ chan->fcs, endptr - ptr);
}
break;
}
@@ -3302,10 +3306,11 @@ done:
return ptr - data;
}
-static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data)
+static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data_size)
{
struct l2cap_conf_rsp *rsp = data;
void *ptr = rsp->data;
+ void *endptr = data + data_size;
void *req = chan->conf_req;
int len = chan->conf_len;
int type, hint, olen;
@@ -3407,7 +3412,7 @@ done:
return -ECONNREFUSED;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
- (unsigned long) &rfc);
+ (unsigned long) &rfc, endptr - ptr);
}
if (result == L2CAP_CONF_SUCCESS) {
@@ -3420,7 +3425,7 @@ done:
chan->omtu = mtu;
set_bit(CONF_MTU_DONE, &chan->conf_state);
}
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu, endptr - ptr);
if (remote_efs) {
if (chan->local_stype != L2CAP_SERV_NOTRAFIC &&
@@ -3434,7 +3439,7 @@ done:
l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS,
sizeof(efs),
- (unsigned long) &efs);
+ (unsigned long) &efs, endptr - ptr);
} else {
/* Send PENDING Conf Rsp */
result = L2CAP_CONF_PENDING;
@@ -3467,7 +3472,7 @@ done:
set_bit(CONF_MODE_DONE, &chan->conf_state);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
- sizeof(rfc), (unsigned long) &rfc);
+ sizeof(rfc), (unsigned long) &rfc, endptr - ptr);
if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) {
chan->remote_id = efs.id;
@@ -3481,7 +3486,7 @@ done:
le32_to_cpu(efs.sdu_itime);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS,
sizeof(efs),
- (unsigned long) &efs);
+ (unsigned long) &efs, endptr - ptr);
}
break;
@@ -3495,7 +3500,7 @@ done:
set_bit(CONF_MODE_DONE, &chan->conf_state);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
- (unsigned long) &rfc);
+ (unsigned long) &rfc, endptr - ptr);
break;
@@ -3517,10 +3522,11 @@ done:
}
static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
- void *data, u16 *result)
+ void *data, size_t size, u16 *result)
{
struct l2cap_conf_req *req = data;
void *ptr = req->data;
+ void *endptr = data + size;
int type, olen;
unsigned long val;
struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC };
@@ -3538,13 +3544,13 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
chan->imtu = L2CAP_DEFAULT_MIN_MTU;
} else
chan->imtu = val;
- l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu);
+ l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr);
break;
case L2CAP_CONF_FLUSH_TO:
chan->flush_to = val;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO,
- 2, chan->flush_to);
+ 2, chan->flush_to, endptr - ptr);
break;
case L2CAP_CONF_RFC:
@@ -3558,13 +3564,13 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
chan->fcs = 0;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
- sizeof(rfc), (unsigned long) &rfc);
+ sizeof(rfc), (unsigned long) &rfc, endptr - ptr);
break;
case L2CAP_CONF_EWS:
chan->ack_win = min_t(u16, val, chan->ack_win);
l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2,
- chan->tx_win);
+ chan->tx_win, endptr - ptr);
break;
case L2CAP_CONF_EFS:
@@ -3577,7 +3583,7 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len,
return -ECONNREFUSED;
l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs),
- (unsigned long) &efs);
+ (unsigned long) &efs, endptr - ptr);
break;
case L2CAP_CONF_FCS:
@@ -3682,7 +3688,7 @@ void __l2cap_connect_rsp_defer(struct l2cap_chan *chan)
return;
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf), buf);
+ l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
chan->num_conf_req++;
}
@@ -3890,7 +3896,7 @@ sendresp:
u8 buf[128];
set_bit(CONF_REQ_SENT, &chan->conf_state);
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf), buf);
+ l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
chan->num_conf_req++;
}
@@ -3968,7 +3974,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
break;
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, req), req);
+ l2cap_build_conf_req(chan, req, sizeof(req)), req);
chan->num_conf_req++;
break;
@@ -4080,7 +4086,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
}
/* Complete config. */
- len = l2cap_parse_conf_req(chan, rsp);
+ len = l2cap_parse_conf_req(chan, rsp, sizeof(rsp));
if (len < 0) {
l2cap_send_disconn_req(chan, ECONNRESET);
goto unlock;
@@ -4114,7 +4120,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
if (!test_and_set_bit(CONF_REQ_SENT, &chan->conf_state)) {
u8 buf[64];
l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf), buf);
+ l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
chan->num_conf_req++;
}
@@ -4174,7 +4180,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
char buf[64];
len = l2cap_parse_conf_rsp(chan, rsp->data, len,
- buf, &result);
+ buf, sizeof(buf), &result);
if (len < 0) {
l2cap_send_disconn_req(chan, ECONNRESET);
goto done;
@@ -4204,7 +4210,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
/* throw out any old stored conf requests */
result = L2CAP_CONF_SUCCESS;
len = l2cap_parse_conf_rsp(chan, rsp->data, len,
- req, &result);
+ req, sizeof(req), &result);
if (len < 0) {
l2cap_send_disconn_req(chan, ECONNRESET);
goto done;
@@ -4781,7 +4787,7 @@ static void l2cap_do_create(struct l2cap_chan *chan, int result,
set_bit(CONF_REQ_SENT, &chan->conf_state);
l2cap_send_cmd(chan->conn, l2cap_get_ident(chan->conn),
L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf), buf);
+ l2cap_build_conf_req(chan, buf, sizeof(buf)), buf);
chan->num_conf_req++;
}
}
@@ -7457,7 +7463,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
set_bit(CONF_REQ_SENT, &chan->conf_state);
l2cap_send_cmd(conn, l2cap_get_ident(conn),
L2CAP_CONF_REQ,
- l2cap_build_conf_req(chan, buf),
+ l2cap_build_conf_req(chan, buf, sizeof(buf)),
buf);
chan->num_conf_req++;
}
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 43faf2aea2ab..658c900752c6 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -23,6 +23,7 @@
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
#include <linux/crypto.h>
+#include <crypto/algapi.h>
#include <crypto/b128ops.h>
#include <crypto/hash.h>
@@ -506,7 +507,7 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16],
if (err)
return false;
- return !memcmp(bdaddr->b, hash, 3);
+ return !crypto_memneq(bdaddr->b, hash, 3);
}
int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa)
@@ -559,7 +560,7 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
/* This is unlikely, but we need to check that
* we didn't accidentially generate a debug key.
*/
- if (memcmp(smp->local_sk, debug_sk, 32))
+ if (crypto_memneq(smp->local_sk, debug_sk, 32))
break;
}
smp->debug_key = false;
@@ -973,7 +974,7 @@ static u8 smp_random(struct smp_chan *smp)
if (ret)
return SMP_UNSPECIFIED;
- if (memcmp(smp->pcnf, confirm, sizeof(smp->pcnf)) != 0) {
+ if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) {
BT_ERR("Pairing failed (confirmation values mismatch)");
return SMP_CONFIRM_FAILED;
}
@@ -1473,7 +1474,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op)
smp->rrnd, r, cfm))
return SMP_UNSPECIFIED;
- if (memcmp(smp->pcnf, cfm, 16))
+ if (crypto_memneq(smp->pcnf, cfm, 16))
return SMP_CONFIRM_FAILED;
smp->passkey_round++;
@@ -1857,7 +1858,7 @@ static u8 sc_send_public_key(struct smp_chan *smp)
/* This is unlikely, but we need to check that
* we didn't accidentially generate a debug key.
*/
- if (memcmp(smp->local_sk, debug_sk, 32))
+ if (crypto_memneq(smp->local_sk, debug_sk, 32))
break;
}
}
@@ -2122,7 +2123,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb)
if (err)
return SMP_UNSPECIFIED;
- if (memcmp(smp->pcnf, cfm, 16))
+ if (crypto_memneq(smp->pcnf, cfm, 16))
return SMP_CONFIRM_FAILED;
} else {
smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
@@ -2603,7 +2604,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
if (err)
return SMP_UNSPECIFIED;
- if (memcmp(cfm.confirm_val, smp->pcnf, 16))
+ if (crypto_memneq(cfm.confirm_val, smp->pcnf, 16))
return SMP_CONFIRM_FAILED;
}
@@ -2636,7 +2637,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
else
hcon->pending_sec_level = BT_SECURITY_FIPS;
- if (!memcmp(debug_pk, smp->remote_pk, 64))
+ if (!crypto_memneq(debug_pk, smp->remote_pk, 64))
set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags);
if (smp->method == DSP_PASSKEY) {
@@ -2735,7 +2736,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb)
if (err)
return SMP_UNSPECIFIED;
- if (memcmp(check->e, e, 16))
+ if (crypto_memneq(check->e, e, 16))
return SMP_DHKEY_CHECK_FAILED;
if (!hcon->out) {
@@ -3446,7 +3447,7 @@ static int __init test_ah(struct crypto_cipher *tfm_aes)
if (err)
return err;
- if (memcmp(res, exp, 3))
+ if (crypto_memneq(res, exp, 3))
return -EINVAL;
return 0;
@@ -3476,7 +3477,7 @@ static int __init test_c1(struct crypto_cipher *tfm_aes)
if (err)
return err;
- if (memcmp(res, exp, 16))
+ if (crypto_memneq(res, exp, 16))
return -EINVAL;
return 0;
@@ -3501,7 +3502,7 @@ static int __init test_s1(struct crypto_cipher *tfm_aes)
if (err)
return err;
- if (memcmp(res, exp, 16))
+ if (crypto_memneq(res, exp, 16))
return -EINVAL;
return 0;
@@ -3533,7 +3534,7 @@ static int __init test_f4(struct crypto_shash *tfm_cmac)
if (err)
return err;
- if (memcmp(res, exp, 16))
+ if (crypto_memneq(res, exp, 16))
return -EINVAL;
return 0;
@@ -3567,10 +3568,10 @@ static int __init test_f5(struct crypto_shash *tfm_cmac)
if (err)
return err;
- if (memcmp(mackey, exp_mackey, 16))
+ if (crypto_memneq(mackey, exp_mackey, 16))
return -EINVAL;
- if (memcmp(ltk, exp_ltk, 16))
+ if (crypto_memneq(ltk, exp_ltk, 16))
return -EINVAL;
return 0;
@@ -3603,7 +3604,7 @@ static int __init test_f6(struct crypto_shash *tfm_cmac)
if (err)
return err;
- if (memcmp(res, exp, 16))
+ if (crypto_memneq(res, exp, 16))
return -EINVAL;
return 0;
@@ -3657,7 +3658,7 @@ static int __init test_h6(struct crypto_shash *tfm_cmac)
if (err)
return err;
- if (memcmp(res, exp, 16))
+ if (crypto_memneq(res, exp, 16))
return -EINVAL;
return 0;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 89a687f3c0a3..5f5e28f210e0 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -53,6 +53,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
brstats->tx_bytes += skb->len;
u64_stats_update_end(&brstats->syncp);
+#ifdef CONFIG_NET_SWITCHDEV
+ skb->offload_fwd_mark = 0;
+#endif
BR_INPUT_SKB_CB(skb)->brdev = dev;
skb_reset_mac_header(skb);
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 7cb41aee4c82..8498e3503605 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -186,8 +186,9 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
/* Do not flood unicast traffic to ports that turn it off */
if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD))
continue;
+ /* Do not flood if mc off, except for traffic we originate */
if (pkt_type == BR_PKT_MULTICAST &&
- !(p->flags & BR_MCAST_FLOOD))
+ !(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev)
continue;
/* Do not flood to ports that enable proxy ARP */
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 855b72fbe1da..267b46af407f 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -29,6 +29,7 @@ EXPORT_SYMBOL(br_should_route_hook);
static int
br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ br_drop_fake_rtable(skb);
return netif_receive_skb(skb);
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 7dbc80d01eb0..6406010e155b 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -323,7 +323,8 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p,
__mdb_entry_to_br_ip(entry, &complete_info->ip);
mdb.obj.complete_priv = complete_info;
mdb.obj.complete = br_mdb_complete;
- switchdev_port_obj_add(port_dev, &mdb.obj);
+ if (switchdev_port_obj_add(port_dev, &mdb.obj))
+ kfree(complete_info);
}
} else if (port_dev && type == RTM_DELMDB) {
switchdev_port_obj_del(port_dev, &mdb.obj);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 7fbdbae58e65..aa1df1a10dd7 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -521,21 +521,6 @@ static unsigned int br_nf_pre_routing(void *priv,
}
-/* PF_BRIDGE/LOCAL_IN ************************************************/
-/* The packet is locally destined, which requires a real
- * dst_entry, so detach the fake one. On the way up, the
- * packet would pass through PRE_ROUTING again (which already
- * took place when the packet entered the bridge), but we
- * register an IPv4 PRE_ROUTING 'sabotage' hook that will
- * prevent this from happening. */
-static unsigned int br_nf_local_in(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- br_drop_fake_rtable(skb);
- return NF_ACCEPT;
-}
-
/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
@@ -906,12 +891,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
.priority = NF_BR_PRI_BRNF,
},
{
- .hook = br_nf_local_in,
- .pf = NFPROTO_BRIDGE,
- .hooknum = NF_BR_LOCAL_IN,
- .priority = NF_BR_PRI_BRNF,
- },
- {
.hook = br_nf_forward_ip,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 04741064a173..5d4006e589cb 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -776,6 +776,13 @@ static int br_validate(struct nlattr *tb[], struct nlattr *data[])
return -EPROTONOSUPPORT;
}
}
+
+ if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
+ __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
+
+ if (defpvid >= VLAN_VID_MASK)
+ return -EINVAL;
+ }
#endif
return 0;
@@ -1091,11 +1098,14 @@ static int br_dev_newlink(struct net *src_net, struct net_device *dev,
spin_unlock_bh(&br->lock);
}
- err = br_changelink(dev, tb, data);
+ err = register_netdevice(dev);
if (err)
return err;
- return register_netdevice(dev);
+ err = br_changelink(dev, tb, data);
+ if (err)
+ unregister_netdevice(dev);
+ return err;
}
static size_t br_get_size(const struct net_device *brdev)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d8ad73b38de2..16b5aa9a91f1 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -185,6 +185,8 @@ static void br_stp_start(struct net_bridge *br)
br_debug(br, "using kernel STP\n");
/* To start timers on any ports left in blocking */
+ if (br->dev->flags & IFF_UP)
+ mod_timer(&br->hello_timer, jiffies + br->hello_time);
br_port_state_selection(br);
}
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index da058b85aa22..15826fd52af5 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -40,7 +40,7 @@ static void br_hello_timer_expired(unsigned long arg)
if (br->dev->flags & IFF_UP) {
br_config_bpdu_generation(br);
- if (br->stp_enabled != BR_USER_STP)
+ if (br->stp_enabled == BR_KERNEL_STP)
mod_timer(&br->hello_timer,
round_jiffies(jiffies + br->hello_time));
}
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 59ce1fcc220c..71b6ab240dea 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -81,11 +81,7 @@ static struct cfpkt *cfpkt_create_pfx(u16 len, u16 pfx)
{
struct sk_buff *skb;
- if (likely(in_interrupt()))
- skb = alloc_skb(len + pfx, GFP_ATOMIC);
- else
- skb = alloc_skb(len + pfx, GFP_KERNEL);
-
+ skb = alloc_skb(len + pfx, GFP_ATOMIC);
if (unlikely(skb == NULL))
return NULL;
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 292e33bd916e..5f3a627afcc6 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -34,7 +34,9 @@ static int set_secret(struct ceph_crypto_key *key, void *buf)
return -ENOTSUPP;
}
- WARN_ON(!key->len);
+ if (!key->len)
+ return -EINVAL;
+
key->key = kmemdup(buf, key->len, GFP_NOIO);
if (!key->key) {
ret = -ENOMEM;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 2efb335deada..25a30be862e9 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -7,6 +7,7 @@
#include <linux/kthread.h>
#include <linux/net.h>
#include <linux/nsproxy.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/string.h>
@@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con)
{
struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
struct socket *sock;
+ unsigned int noio_flag;
int ret;
BUG_ON(con->sock);
+
+ /* sock_create_kern() allocates with GFP_KERNEL */
+ noio_flag = memalloc_noio_save();
ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
SOCK_STREAM, IPPROTO_TCP, &sock);
+ memalloc_noio_restore(noio_flag);
if (ret)
return ret;
sock->sk->sk_allocation = GFP_NOFS;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index e6ae15bc41b7..0ffeb60cfe67 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -672,7 +672,8 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
BUG_ON(length > previous);
op->extent.length = length;
- op->indata_len -= previous - length;
+ if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL)
+ op->indata_len -= previous - length;
}
EXPORT_SYMBOL(osd_req_op_extent_update);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index d2436880b305..d3f6c26425b3 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1334,7 +1334,6 @@ static int decode_new_up_state_weight(void **p, void *end,
if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
(xorstate & CEPH_OSD_EXISTS)) {
pr_info("osd%d does not exist\n", osd);
- map->osd_weight[osd] = CEPH_OSD_IN;
ret = set_primary_affinity(map, osd,
CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
if (ret)
diff --git a/net/compat.c b/net/compat.c
index 1cd2ec046164..a96fd2f3507b 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -22,6 +22,7 @@
#include <linux/filter.h>
#include <linux/compat.h>
#include <linux/security.h>
+#include <linux/audit.h>
#include <linux/export.h>
#include <net/scm.h>
@@ -781,14 +782,24 @@ COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
COMPAT_SYSCALL_DEFINE2(socketcall, int, call, u32 __user *, args)
{
- int ret;
- u32 a[6];
+ u32 a[AUDITSC_ARGS];
+ unsigned int len;
u32 a0, a1;
+ int ret;
if (call < SYS_SOCKET || call > SYS_SENDMMSG)
return -EINVAL;
- if (copy_from_user(a, args, nas[call]))
+ len = nas[call];
+ if (len > sizeof(a))
+ return -EINVAL;
+
+ if (copy_from_user(a, args, len))
return -EFAULT;
+
+ ret = audit_socketcall_compat(len / sizeof(a[0]), a);
+ if (ret)
+ return ret;
+
a0 = a[0];
a1 = a[1];
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b7de71f8d5d3..4fa4011feec1 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -351,7 +351,7 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
if (flags & MSG_PEEK) {
err = -ENOENT;
spin_lock_bh(&sk->sk_receive_queue.lock);
- if (skb == skb_peek(&sk->sk_receive_queue)) {
+ if (skb->next) {
__skb_unlink(skb, &sk->sk_receive_queue);
atomic_dec(&skb->users);
err = 0;
@@ -378,7 +378,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
struct iov_iter *to, int len)
{
int start = skb_headlen(skb);
- int i, copy = start - offset;
+ int i, copy = start - offset, start_off = offset, n;
struct sk_buff *frag_iter;
trace_skb_copy_datagram_iovec(skb, len);
@@ -387,11 +387,12 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
if (copy > 0) {
if (copy > len)
copy = len;
- if (copy_to_iter(skb->data + offset, copy, to) != copy)
+ n = copy_to_iter(skb->data + offset, copy, to);
+ offset += n;
+ if (n != copy)
goto short_copy;
if ((len -= copy) == 0)
return 0;
- offset += copy;
}
/* Copy paged appendix. Hmm... why does this look so complicated? */
@@ -405,13 +406,14 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
- if (copy_page_to_iter(skb_frag_page(frag),
+ n = copy_page_to_iter(skb_frag_page(frag),
frag->page_offset + offset -
- start, copy, to) != copy)
+ start, copy, to);
+ offset += n;
+ if (n != copy)
goto short_copy;
if (!(len -= copy))
return 0;
- offset += copy;
}
start = end;
}
@@ -443,6 +445,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
*/
fault:
+ iov_iter_revert(to, offset - start_off);
return -EFAULT;
short_copy:
@@ -593,7 +596,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
__wsum *csump)
{
int start = skb_headlen(skb);
- int i, copy = start - offset;
+ int i, copy = start - offset, start_off = offset;
struct sk_buff *frag_iter;
int pos = 0;
int n;
@@ -603,11 +606,11 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
if (copy > len)
copy = len;
n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
+ offset += n;
if (n != copy)
goto fault;
if ((len -= copy) == 0)
return 0;
- offset += copy;
pos = copy;
}
@@ -629,12 +632,12 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
offset - start, copy,
&csum2, to);
kunmap(page);
+ offset += n;
if (n != copy)
goto fault;
*csump = csum_block_add(*csump, csum2, pos);
if (!(len -= copy))
return 0;
- offset += copy;
pos += copy;
}
start = end;
@@ -667,6 +670,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
return 0;
fault:
+ iov_iter_revert(to, offset - start_off);
return -EFAULT;
}
@@ -736,7 +740,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
if (msg_data_left(msg) < chunk) {
if (__skb_checksum_complete(skb))
- goto csum_error;
+ return -EINVAL;
if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
goto fault;
} else {
@@ -744,14 +748,16 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
chunk, &csum))
goto fault;
- if (csum_fold(csum))
- goto csum_error;
+
+ if (csum_fold(csum)) {
+ iov_iter_revert(&msg->msg_iter, chunk);
+ return -EINVAL;
+ }
+
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
}
return 0;
-csum_error:
- return -EINVAL;
fault:
return -EFAULT;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 60b0a6049e72..c37891828e4e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1115,9 +1115,8 @@ static int dev_alloc_name_ns(struct net *net,
return ret;
}
-static int dev_get_valid_name(struct net *net,
- struct net_device *dev,
- const char *name)
+int dev_get_valid_name(struct net *net, struct net_device *dev,
+ const char *name)
{
BUG_ON(!net);
@@ -1133,6 +1132,7 @@ static int dev_get_valid_name(struct net *net,
return 0;
}
+EXPORT_SYMBOL(dev_get_valid_name);
/**
* dev_change_name - change name of a device
@@ -1250,8 +1250,9 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
if (!new_ifalias)
return -ENOMEM;
dev->ifalias = new_ifalias;
+ memcpy(dev->ifalias, alias, len);
+ dev->ifalias[len] = 0;
- strlcpy(dev->ifalias, alias, len+1);
return len;
}
@@ -1697,27 +1698,54 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
static struct static_key netstamp_needed __read_mostly;
#ifdef HAVE_JUMP_LABEL
static atomic_t netstamp_needed_deferred;
+static atomic_t netstamp_wanted;
static void netstamp_clear(struct work_struct *work)
{
int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
+ int wanted;
- while (deferred--)
- static_key_slow_dec(&netstamp_needed);
+ wanted = atomic_add_return(deferred, &netstamp_wanted);
+ if (wanted > 0)
+ static_key_enable(&netstamp_needed);
+ else
+ static_key_disable(&netstamp_needed);
}
static DECLARE_WORK(netstamp_work, netstamp_clear);
#endif
void net_enable_timestamp(void)
{
+#ifdef HAVE_JUMP_LABEL
+ int wanted;
+
+ while (1) {
+ wanted = atomic_read(&netstamp_wanted);
+ if (wanted <= 0)
+ break;
+ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
+ return;
+ }
+ atomic_inc(&netstamp_needed_deferred);
+ schedule_work(&netstamp_work);
+#else
static_key_slow_inc(&netstamp_needed);
+#endif
}
EXPORT_SYMBOL(net_enable_timestamp);
void net_disable_timestamp(void)
{
#ifdef HAVE_JUMP_LABEL
- /* net_disable_timestamp() can be called from non process context */
- atomic_inc(&netstamp_needed_deferred);
+ int wanted;
+
+ while (1) {
+ wanted = atomic_read(&netstamp_wanted);
+ if (wanted <= 1)
+ break;
+ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
+ return;
+ }
+ atomic_dec(&netstamp_needed_deferred);
schedule_work(&netstamp_work);
#else
static_key_slow_dec(&netstamp_needed);
@@ -2327,6 +2355,9 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
{
unsigned long flags;
+ if (unlikely(!skb))
+ return;
+
if (likely(atomic_read(&skb->users) == 1)) {
smp_rmb();
atomic_set(&skb->users, 0);
@@ -2674,9 +2705,10 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
{
if (tx_path)
- return skb->ip_summed != CHECKSUM_PARTIAL;
- else
- return skb->ip_summed == CHECKSUM_NONE;
+ return skb->ip_summed != CHECKSUM_PARTIAL &&
+ skb->ip_summed != CHECKSUM_UNNECESSARY;
+
+ return skb->ip_summed == CHECKSUM_NONE;
}
/**
@@ -2695,11 +2727,12 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
netdev_features_t features, bool tx_path)
{
+ struct sk_buff *segs;
+
if (unlikely(skb_needs_check(skb, tx_path))) {
int err;
- skb_warn_bad_offload(skb);
-
+ /* We're going to init ->check field in TCP or UDP header */
err = skb_cow_head(skb, 0);
if (err < 0)
return ERR_PTR(err);
@@ -2727,7 +2760,12 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
- return skb_mac_gso_segment(skb, features);
+ segs = skb_mac_gso_segment(skb, features);
+
+ if (unlikely(skb_needs_check(skb, tx_path)))
+ skb_warn_bad_offload(skb);
+
+ return segs;
}
EXPORT_SYMBOL(__skb_gso_segment);
@@ -4613,6 +4651,12 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_complete_by_type);
+static void napi_skb_free_stolen_head(struct sk_buff *skb)
+{
+ skb_dst_drop(skb);
+ kmem_cache_free(skbuff_head_cache, skb);
+}
+
static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
{
switch (ret) {
@@ -4626,12 +4670,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
break;
case GRO_MERGED_FREE:
- if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
- skb_dst_drop(skb);
- kmem_cache_free(skbuff_head_cache, skb);
- } else {
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ napi_skb_free_stolen_head(skb);
+ else
__kfree_skb(skb);
- }
break;
case GRO_HELD:
@@ -4701,10 +4743,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
break;
case GRO_DROP:
- case GRO_MERGED_FREE:
napi_reuse_skb(napi, skb);
break;
+ case GRO_MERGED_FREE:
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ napi_skb_free_stolen_head(skb);
+ else
+ napi_reuse_skb(napi, skb);
+ break;
+
case GRO_MERGED:
break;
}
@@ -5292,12 +5340,13 @@ EXPORT_SYMBOL(netdev_has_upper_dev);
* Find out if a device is linked to an upper device and return true in case
* it is. The caller must hold the RTNL lock.
*/
-static bool netdev_has_any_upper_dev(struct net_device *dev)
+bool netdev_has_any_upper_dev(struct net_device *dev)
{
ASSERT_RTNL();
return !list_empty(&dev->all_adj_list.upper);
}
+EXPORT_SYMBOL(netdev_has_any_upper_dev);
/**
* netdev_master_upper_dev_get - Get master upper device
@@ -7493,7 +7542,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
{
#if BITS_PER_LONG == 64
BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
- memcpy(stats64, netdev_stats, sizeof(*stats64));
+ memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
/* zero out counters that only exist in rtnl_link_stats64 */
memset((char *)stats64 + sizeof(*netdev_stats), 0,
sizeof(*stats64) - sizeof(*netdev_stats));
@@ -7535,9 +7584,9 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
} else {
netdev_stats_to_stats64(storage, &dev->stats);
}
- storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
- storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
- storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
+ storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
+ storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
+ storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d293506..151e047ce072 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -28,6 +28,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
+ ifr.ifr_name[IFNAMSIZ-1] = 0;
error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
if (error)
diff --git a/net/core/dst.c b/net/core/dst.c
index b5cbbe07f786..39cc11968cf9 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(dst_discard_out);
-const u32 dst_default_metrics[RTAX_MAX + 1] = {
+const struct dst_metrics dst_default_metrics = {
/* This initializer is needed to force linker to place this variable
* into const section. Otherwise it might end into bss section.
* We really want to avoid false sharing on this variable, and catch
* any writes on it.
*/
- [RTAX_MAX] = 0xdeadbeef,
+ .refcnt = ATOMIC_INIT(1),
};
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
if (dev)
dev_hold(dev);
dst->ops = ops;
- dst_init_metrics(dst, dst_default_metrics, true);
+ dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
dst->path = dst;
dst->from = NULL;
@@ -315,25 +315,30 @@ EXPORT_SYMBOL(dst_release);
u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
{
- u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+ struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
if (p) {
- u32 *old_p = __DST_METRICS_PTR(old);
+ struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
unsigned long prev, new;
- memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+ atomic_set(&p->refcnt, 1);
+ memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
new = (unsigned long) p;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev != old) {
kfree(p);
- p = __DST_METRICS_PTR(prev);
+ p = (struct dst_metrics *)__DST_METRICS_PTR(prev);
if (prev & DST_METRICS_READ_ONLY)
p = NULL;
+ } else if (prev & DST_METRICS_REFCOUNTED) {
+ if (atomic_dec_and_test(&old_p->refcnt))
+ kfree(old_p);
}
}
- return p;
+ BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0);
+ return (u32 *)p;
}
EXPORT_SYMBOL(dst_cow_metrics_generic);
@@ -342,7 +347,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
{
unsigned long prev, new;
- new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY;
+ new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev == old)
kfree(__DST_METRICS_PTR(old));
@@ -465,6 +470,20 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
spin_lock_bh(&dst_garbage.lock);
dst = dst_garbage.list;
dst_garbage.list = NULL;
+ /* The code in dst_ifdown places a hold on the loopback device.
+ * If the gc entry processing is set to expire after a lengthy
+ * interval, this hold can cause netdev_wait_allrefs() to hang
+ * out and wait for a long time -- until the the loopback
+ * interface is released. If we're really unlucky, it'll emit
+ * pr_emerg messages to console too. Reset the interval here,
+ * so dst cleanups occur in a more timely fashion.
+ */
+ if (dst_garbage.timer_inc > DST_GC_INC) {
+ dst_garbage.timer_inc = DST_GC_INC;
+ dst_garbage.timer_expires = DST_GC_MIN;
+ mod_delayed_work(system_wq, &dst_gc_work,
+ dst_garbage.timer_expires);
+ }
spin_unlock_bh(&dst_garbage.lock);
if (last)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 047a1752ece1..e9989b835a66 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1394,9 +1394,12 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
if (regs.len > reglen)
regs.len = reglen;
- regbuf = vzalloc(reglen);
- if (reglen && !regbuf)
- return -ENOMEM;
+ regbuf = NULL;
+ if (reglen) {
+ regbuf = vzalloc(reglen);
+ if (!regbuf)
+ return -ENOMEM;
+ }
ops->get_regs(dev, &regs, regbuf);
@@ -1701,7 +1704,7 @@ static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
void __user *useraddr)
{
- struct ethtool_channels channels, max;
+ struct ethtool_channels channels, max = { .cmd = ETHTOOL_GCHANNELS };
u32 max_rx_in_use = 0;
if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
diff --git a/net/core/filter.c b/net/core/filter.c
index b391209838ef..4eb4ce0aeef4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2198,6 +2198,7 @@ bool bpf_helper_changes_skb_data(void *func)
func == bpf_skb_change_proto ||
func == bpf_skb_change_tail ||
func == bpf_skb_pull_data ||
+ func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace)
return true;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2ae929f9bd06..f45f6198851f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -859,7 +859,8 @@ static void neigh_probe(struct neighbour *neigh)
if (skb)
skb = skb_clone(skb, GFP_ATOMIC);
write_unlock(&neigh->lock);
- neigh->ops->solicit(neigh, skb);
+ if (neigh->ops->solicit)
+ neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
kfree_skb(skb);
}
@@ -2927,7 +2928,8 @@ static void neigh_proc_update(struct ctl_table *ctl, int write)
return;
set_bit(index, p->data_state);
- call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
+ if (index == NEIGH_VAR_DELAY_PROBE_TIME)
+ call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p);
if (!dev) /* NULL dev means this is default value */
neigh_copy_dflt_parms(net, p, index);
}
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 11fce17274f6..46e8830c1979 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -69,27 +69,17 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
return 0;
}
-static void update_classid(struct cgroup_subsys_state *css, void *v)
+static void cgrp_attach(struct cgroup_taskset *tset)
{
- struct css_task_iter it;
+ struct cgroup_subsys_state *css;
struct task_struct *p;
- css_task_iter_start(css, &it);
- while ((p = css_task_iter_next(&it))) {
+ cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock, v);
+ iterate_fd(p->files, 0, update_classid_sock,
+ (void *)(unsigned long)css_cls_state(css)->classid);
task_unlock(p);
}
- css_task_iter_end(&it);
-}
-
-static void cgrp_attach(struct cgroup_taskset *tset)
-{
- struct cgroup_subsys_state *css;
-
- cgroup_taskset_first(tset, &css);
- update_classid(css,
- (void *)(unsigned long)css_cls_state(css)->classid);
}
static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -101,12 +91,22 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
u64 value)
{
struct cgroup_cls_state *cs = css_cls_state(css);
+ struct css_task_iter it;
+ struct task_struct *p;
cgroup_sk_alloc_disable();
cs->classid = (u32)value;
- update_classid(css, (void *)(unsigned long)cs->classid);
+ css_task_iter_start(css, &it);
+ while ((p = css_task_iter_next(&it))) {
+ task_lock(p);
+ iterate_fd(p->files, 0, update_classid_sock,
+ (void *)(unsigned long)cs->classid);
+ task_unlock(p);
+ }
+ css_task_iter_end(&it);
+
return 0;
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 53599bd0c82d..457f882b0f7b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -105,15 +105,21 @@ static void queue_process(struct work_struct *work)
while ((skb = skb_dequeue(&npinfo->txq))) {
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
+ unsigned int q_index;
if (!netif_device_present(dev) || !netif_running(dev)) {
kfree_skb(skb);
continue;
}
- txq = skb_get_tx_queue(dev, skb);
-
local_irq_save(flags);
+ /* check if skb->queue_mapping is still valid */
+ q_index = skb_get_queue_mapping(skb);
+ if (unlikely(q_index >= dev->real_num_tx_queues)) {
+ q_index = q_index % dev->real_num_tx_queues;
+ skb_set_queue_mapping(skb, q_index);
+ }
+ txq = netdev_get_tx_queue(dev, q_index);
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (netif_xmit_frozen_or_stopped(txq) ||
netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b7f9ae7b1c5f..c2339b865164 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -937,6 +937,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(4) /* IFLA_CARRIER_CHANGES */
+ nla_total_size(4) /* IFLA_LINK_NETNSID */
+ + nla_total_size(4) /* IFLA_GROUP */
+ nla_total_size(ext_filter_mask
& RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -1059,7 +1060,7 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
return err;
}
- if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name))
+ if (nla_put_string(skb, IFLA_PHYS_PORT_NAME, name))
return -EMSGSIZE;
return 0;
@@ -1130,6 +1131,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
struct ifla_vf_mac vf_mac;
struct ifla_vf_info ivi;
+ memset(&ivi, 0, sizeof(ivi));
+
/* Not all SR-IOV capable drivers support the
* spoofcheck and "RSS query enable" query. Preset to
* -1 so the user space tool can detect that the driver
@@ -1138,7 +1141,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
ivi.spoofchk = -1;
ivi.rss_query_en = -1;
ivi.trusted = -1;
- memset(ivi.mac, 0, sizeof(ivi.mac));
/* The default value for VF link state is "auto"
* IFLA_VF_LINK_STATE_AUTO which equals zero
*/
@@ -1464,6 +1466,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_LINK_NETNSID] = { .type = NLA_S32 },
[IFLA_PROTO_DOWN] = { .type = NLA_U8 },
[IFLA_XDP] = { .type = NLA_NESTED },
+ [IFLA_GROUP] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1617,13 +1620,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
cb->nlh->nlmsg_seq, 0,
flags,
ext_filter_mask);
- /* If we ran out of room on the first message,
- * we're in trouble
- */
- WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
- if (err < 0)
- goto out;
+ if (err < 0) {
+ if (likely(skb->len))
+ goto out;
+
+ goto out_err;
+ }
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
@@ -1631,10 +1634,12 @@ cont:
}
}
out:
+ err = skb->len;
+out_err:
cb->args[1] = idx;
cb->args[0] = h;
- return skb->len;
+ return err;
}
int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len)
@@ -1960,7 +1965,8 @@ static int do_setlink(const struct sk_buff *skb,
struct sockaddr *sa;
int len;
- len = sizeof(sa_family_t) + dev->addr_len;
+ len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len,
+ sizeof(*sa));
sa = kmalloc(len, GFP_KERNEL);
if (!sa) {
err = -ENOMEM;
@@ -3413,8 +3419,12 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
err = br_dev->netdev_ops->ndo_bridge_getlink(
skb, portid, seq, dev,
filter_mask, NLM_F_MULTI);
- if (err < 0 && err != -EOPNOTSUPP)
- break;
+ if (err < 0 && err != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ break;
+
+ goto out_err;
+ }
}
idx++;
}
@@ -3425,16 +3435,22 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
seq, dev,
filter_mask,
NLM_F_MULTI);
- if (err < 0 && err != -EOPNOTSUPP)
- break;
+ if (err < 0 && err != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ break;
+
+ goto out_err;
+ }
}
idx++;
}
}
+ err = skb->len;
+out_err:
rcu_read_unlock();
cb->args[0] = idx;
- return skb->len;
+ return err;
}
static inline size_t bridge_nlmsg_size(void)
@@ -3742,6 +3758,9 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
return -EMSGSIZE;
ifsm = nlmsg_data(nlh);
+ ifsm->family = PF_UNSPEC;
+ ifsm->pad1 = 0;
+ ifsm->pad2 = 0;
ifsm->ifindex = dev->ifindex;
ifsm->filter_mask = filter_mask;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1e3e0087245b..aec5605944d3 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3076,22 +3076,32 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
if (sg && csum && (mss != GSO_BY_FRAGS)) {
if (!(features & NETIF_F_GSO_PARTIAL)) {
struct sk_buff *iter;
+ unsigned int frag_len;
if (!list_skb ||
!net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
goto normal;
- /* Split the buffer at the frag_list pointer.
- * This is based on the assumption that all
- * buffers in the chain excluding the last
- * containing the same amount of data.
+ /* If we get here then all the required
+ * GSO features except frag_list are supported.
+ * Try to split the SKB to multiple GSO SKBs
+ * with no frag_list.
+ * Currently we can do that only when the buffers don't
+ * have a linear part and all the buffers except
+ * the last are of the same length.
*/
+ frag_len = list_skb->len;
skb_walk_frags(head_skb, iter) {
+ if (frag_len != iter->len && iter->next)
+ goto normal;
if (skb_headlen(iter))
goto normal;
len -= iter->len;
}
+
+ if (len != frag_len)
+ goto normal;
}
/* GSO partial only requires that we trim off any excess that
@@ -3779,6 +3789,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = tstype;
+ serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk->sk_protocol == IPPROTO_TCP &&
@@ -3814,13 +3825,14 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
if (!skb_may_tx_timestamp(sk, false))
return;
- /* take a reference to prevent skb_orphan() from freeing the socket */
- sock_hold(sk);
-
- *skb_hwtstamps(skb) = *hwtstamps;
- __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
-
- sock_put(sk);
+ /* Take a reference to prevent skb_orphan() from freeing the socket,
+ * but only if the socket refcount is not zero.
+ */
+ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ *skb_hwtstamps(skb) = *hwtstamps;
+ __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+ sock_put(sk);
+ }
}
EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
@@ -3871,7 +3883,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
{
struct sock *sk = skb->sk;
struct sock_exterr_skb *serr;
- int err;
+ int err = 1;
skb->wifi_acked_valid = 1;
skb->wifi_acked = acked;
@@ -3881,14 +3893,15 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
- /* take a reference to prevent skb_orphan() from freeing the socket */
- sock_hold(sk);
-
- err = sock_queue_err_skb(sk, skb);
+ /* Take a reference to prevent skb_orphan() from freeing the socket,
+ * but only if the socket refcount is not zero.
+ */
+ if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
+ err = sock_queue_err_skb(sk, skb);
+ sock_put(sk);
+ }
if (err)
kfree_skb(skb);
-
- sock_put(sk);
}
EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
@@ -4362,6 +4375,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
if (!xnet)
return;
+ ipvs_reset(skb);
skb_orphan(skb);
skb->mark = 0;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index bc6543f7de36..e3b60460dc9c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -138,10 +138,7 @@
#include <trace/events/sock.h>
-#ifdef CONFIG_INET
#include <net/tcp.h>
-#endif
-
#include <net/busy_poll.h>
static DEFINE_MUTEX(proto_list_mutex);
@@ -1437,6 +1434,11 @@ static void __sk_destruct(struct rcu_head *head)
pr_debug("%s: optmem leakage (%d bytes) detected\n",
__func__, atomic_read(&sk->sk_omem_alloc));
+ if (sk->sk_frag.page) {
+ put_page(sk->sk_frag.page);
+ sk->sk_frag.page = NULL;
+ }
+
if (sk->sk_peer_cred)
put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
@@ -1491,6 +1493,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
sock_copy(newsk, sk);
+ newsk->sk_prot_creator = sk->sk_prot;
+
/* SANITY */
if (likely(newsk->sk_net_refcnt))
get_net(sock_net(newsk));
@@ -1522,6 +1526,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
sock_reset_flag(newsk, SOCK_DONE);
+ cgroup_sk_alloc(&newsk->sk_cgrp_data);
skb_queue_head_init(&newsk->sk_error_queue);
filter = rcu_dereference_protected(newsk->sk_filter, 1);
@@ -1533,6 +1538,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
is_charged = sk_filter_charge(newsk, filter);
if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
+ /* We need to make sure that we don't uncharge the new
+ * socket if we couldn't charge it in the first place
+ * as otherwise we uncharge the parent's filter.
+ */
+ if (!is_charged)
+ RCU_INIT_POINTER(newsk->sk_filter, NULL);
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
newsk->sk_destruct = NULL;
@@ -1550,8 +1561,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
atomic64_set(&newsk->sk_cookie, 0);
mem_cgroup_sk_alloc(newsk);
- cgroup_sk_alloc(&newsk->sk_cgrp_data);
-
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -1676,28 +1685,24 @@ EXPORT_SYMBOL(skb_set_owner_w);
* delay queue. We want to allow the owner socket to send more
* packets, as if they were already TX completed by a typical driver.
* But we also want to keep skb->sk set because some packet schedulers
- * rely on it (sch_fq for example). So we set skb->truesize to a small
- * amount (1) and decrease sk_wmem_alloc accordingly.
+ * rely on it (sch_fq for example).
*/
void skb_orphan_partial(struct sk_buff *skb)
{
- /* If this skb is a TCP pure ACK or already went here,
- * we have nothing to do. 2 is already a very small truesize.
- */
- if (skb->truesize <= 2)
+ if (skb_is_tcp_pure_ack(skb))
return;
- /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
- * so we do not completely orphan skb, but transfert all
- * accounted bytes but one, to avoid unexpected reorders.
- */
if (skb->destructor == sock_wfree
#ifdef CONFIG_INET
|| skb->destructor == tcp_wfree
#endif
) {
- atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
- skb->truesize = 1;
+ struct sock *sk = skb->sk;
+
+ if (atomic_inc_not_zero(&sk->sk_refcnt)) {
+ atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+ skb->destructor = sock_efree;
+ }
} else {
skb_orphan(skb);
}
@@ -2738,11 +2743,6 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
- if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
- sk->sk_frag.page = NULL;
- }
-
sock_put(sk);
}
EXPORT_SYMBOL(sk_common_release);
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 9a1a352fd1eb..77f396b679ce 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -36,9 +36,14 @@ int reuseport_alloc(struct sock *sk)
* soft irq of receive path or setsockopt from process context
*/
spin_lock_bh(&reuseport_lock);
- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- "multiple allocations for the same socket");
+
+ /* Allocation attempts can occur concurrently via the setsockopt path
+ * and the bind/hash path. Nothing to do when we lose the race.
+ */
+ if (rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock)))
+ goto out;
+
reuse = __reuseport_alloc(INIT_SOCKS);
if (!reuse) {
spin_unlock_bh(&reuseport_lock);
@@ -49,6 +54,7 @@ int reuseport_alloc(struct sock *sk)
reuse->num_socks = 1;
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+out:
spin_unlock_bh(&reuseport_lock);
return 0;
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index f053198e730c..5e3a7302f774 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -749,6 +749,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
for (i = 0; i < hc->tx_seqbufc; i++)
kfree(hc->tx_seqbuf[i]);
hc->tx_seqbufc = 0;
+ dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
}
static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 1704948e6a12..f227f002c73d 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1471,9 +1471,12 @@ int dccp_feat_init(struct sock *sk)
* singleton values (which always leads to failure).
* These settings can still (later) be overridden via sockopts.
*/
- if (ccid_get_builtin_ccids(&tx.val, &tx.len) ||
- ccid_get_builtin_ccids(&rx.val, &rx.len))
+ if (ccid_get_builtin_ccids(&tx.val, &tx.len))
return -ENOBUFS;
+ if (ccid_get_builtin_ccids(&rx.val, &rx.len)) {
+ kfree(tx.val);
+ return -ENOBUFS;
+ }
if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) ||
!dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len))
diff --git a/net/dccp/input.c b/net/dccp/input.c
index ba347184bda9..4a05d7876850 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -577,6 +577,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
const int old_state = sk->sk_state;
+ bool acceptable;
int queued = 0;
/*
@@ -603,10 +604,16 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
*/
if (sk->sk_state == DCCP_LISTEN) {
if (dh->dccph_type == DCCP_PKT_REQUEST) {
- if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
- skb) < 0)
+ /* It is possible that we process SYN packets from backlog,
+ * so we need to make sure to disable BH right there.
+ */
+ local_bh_disable();
+ acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0;
+ local_bh_enable();
+ if (!acceptable)
return 1;
- goto discard;
+ consume_skb(skb);
+ return 0;
}
if (dh->dccph_type == DCCP_PKT_RESET)
goto discard;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index edbe59d203ef..8c7799cdd3cf 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -289,7 +289,8 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
switch (type) {
case ICMP_REDIRECT:
- dccp_do_redirect(skb, sk);
+ if (!sock_owned_by_user(sk))
+ dccp_do_redirect(skb, sk);
goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
@@ -413,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newinet->inet_saddr = ireq->ir_loc_addr;
- newinet->inet_opt = ireq->opt;
- ireq->opt = NULL;
+ RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->inet_id = jiffies;
@@ -429,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
-
+ if (*own_req)
+ ireq->ireq_opt = NULL;
+ else
+ newinet->inet_opt = NULL;
return newsk;
exit_overflow:
@@ -440,6 +443,7 @@ exit:
__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL;
put_and_exit:
+ newinet->inet_opt = NULL;
inet_csk_prepare_forced_close(newsk);
dccp_done(newsk);
goto exit;
@@ -491,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
ireq->ir_rmt_addr);
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq->opt);
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
@@ -547,7 +551,7 @@ out:
static void dccp_v4_reqsk_destructor(struct request_sock *req)
{
dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
- kfree(inet_rsk(req)->opt);
+ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
void dccp_syn_ack_timeout(const struct request_sock *req)
@@ -636,6 +640,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+ reqsk_put(req);
return 0;
drop_and_free:
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7506c03a7db9..28e8252cc5ea 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -122,10 +122,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
np = inet6_sk(sk);
if (type == NDISC_REDIRECT) {
- struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+ if (!sock_owned_by_user(sk)) {
+ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
- if (dst)
- dst->ops->redirect(dst, sk, skb);
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+ }
goto out;
}
@@ -378,6 +380,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+ reqsk_put(req);
return 0;
drop_and_free:
@@ -424,6 +427,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
newsk->sk_backlog_rcv = dccp_v4_do_rcv;
newnp->pktoptions = NULL;
newnp->opt = NULL;
+ newnp->ipv6_mc_list = NULL;
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;
newnp->mcast_oif = inet6_iif(skb);
newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
@@ -488,6 +494,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
/* Clone RX bits */
newnp->rxopt.all = np->rxopt.all;
+ newnp->ipv6_mc_list = NULL;
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
newnp->opt = NULL;
newnp->mcast_oif = inet6_iif(skb);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 53eddf99e4f6..39e7e2bca8db 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -122,6 +122,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
newsk->sk_destruct = NULL;
+ bh_unlock_sock(newsk);
sk_free(newsk);
return NULL;
}
@@ -145,6 +146,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
struct dccp_request_sock *dreq = dccp_rsk(req);
bool own_req;
+ /* TCP/DCCP listeners became lockless.
+ * DCCP stores complex state in its request_sock, so we need
+ * a protection for them, now this code runs without being protected
+ * by the parent (listener) lock.
+ */
+ spin_lock_bh(&dreq->dreq_lock);
+
/* Check for retransmitted REQUEST */
if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
@@ -159,7 +167,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
inet_rtx_syn_ack(sk, req);
}
/* Network Duplicate, discard packet */
- return NULL;
+ goto out;
}
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
@@ -185,20 +193,20 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
req, &own_req);
- if (!child)
- goto listen_overflow;
-
- return inet_csk_complete_hashdance(sk, child, req, own_req);
+ if (child) {
+ child = inet_csk_complete_hashdance(sk, child, req, own_req);
+ goto out;
+ }
-listen_overflow:
- dccp_pr_debug("listen_overflow!\n");
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
drop:
if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
req->rsk_ops->send_reset(sk, skb);
inet_csk_reqsk_queue_drop(sk, req);
- return NULL;
+out:
+ spin_unlock_bh(&dreq->dreq_lock);
+ return child;
}
EXPORT_SYMBOL_GPL(dccp_check_req);
@@ -249,6 +257,7 @@ int dccp_reqsk_init(struct request_sock *req,
{
struct dccp_request_sock *dreq = dccp_rsk(req);
+ spin_lock_init(&dreq->dreq_lock);
inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport;
inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport);
inet_rsk(req)->acked = 0;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 9fe25bf63296..b68168fcc06a 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -24,6 +24,7 @@
#include <net/checksum.h>
#include <net/inet_sock.h>
+#include <net/inet_common.h>
#include <net/sock.h>
#include <net/xfrm.h>
@@ -170,6 +171,15 @@ const char *dccp_packet_name(const int type)
EXPORT_SYMBOL_GPL(dccp_packet_name);
+static void dccp_sk_destruct(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+ dp->dccps_hc_tx_ccid = NULL;
+ inet_sock_destruct(sk);
+}
+
int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
{
struct dccp_sock *dp = dccp_sk(sk);
@@ -179,6 +189,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
icsk->icsk_syn_retries = sysctl_dccp_request_retries;
sk->sk_state = DCCP_CLOSED;
sk->sk_write_space = dccp_write_space;
+ sk->sk_destruct = dccp_sk_destruct;
icsk->icsk_sync_mss = dccp_sync_mss;
dp->dccps_mss_cache = 536;
dp->dccps_rate_last = jiffies;
@@ -201,10 +212,7 @@ void dccp_destroy_sock(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
- /*
- * DCCP doesn't use sk_write_queue, just sk_send_head
- * for retransmissions
- */
+ __skb_queue_purge(&sk->sk_write_queue);
if (sk->sk_send_head != NULL) {
kfree_skb(sk->sk_send_head);
sk->sk_send_head = NULL;
@@ -222,8 +230,7 @@ void dccp_destroy_sock(struct sock *sk)
dp->dccps_hc_rx_ackvec = NULL;
}
ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
- ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
- dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+ dp->dccps_hc_rx_ccid = NULL;
/* clean up feature negotiation state */
dccp_feat_list_purge(&dp->dccps_featneg);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index b1dc096d22f8..403593bd2b83 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -188,12 +188,6 @@ static inline void dnrt_free(struct dn_route *rt)
call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
}
-static inline void dnrt_drop(struct dn_route *rt)
-{
- dst_release(&rt->dst);
- call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
-}
-
static void dn_dst_check_expire(unsigned long dummy)
{
int i;
@@ -248,7 +242,7 @@ static int dn_dst_gc(struct dst_ops *ops)
}
*rtp = rt->dst.dn_next;
rt->dst.dn_next = NULL;
- dnrt_drop(rt);
+ dnrt_free(rt);
break;
}
spin_unlock_bh(&dn_rt_hash_table[i].lock);
@@ -350,7 +344,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
dst_use(&rth->dst, now);
spin_unlock_bh(&dn_rt_hash_table[hash].lock);
- dnrt_drop(rt);
+ dst_free(&rt->dst);
*rp = rth;
return 0;
}
@@ -380,7 +374,7 @@ static void dn_run_flush(unsigned long dummy)
for(; rt; rt = next) {
next = rcu_dereference_raw(rt->dst.dn_next);
RCU_INIT_POINTER(rt->dst.dn_next, NULL);
- dst_free((struct dst_entry *)rt);
+ dnrt_free(rt);
}
nothing_to_declare:
@@ -1187,7 +1181,7 @@ make_route:
if (dev_out->flags & IFF_LOOPBACK)
flags |= RTCF_LOCAL;
- rt = dst_alloc(&dn_dst_ops, dev_out, 1, DST_OBSOLETE_NONE, DST_HOST);
+ rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, DST_HOST);
if (rt == NULL)
goto e_nobufs;
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 85f2fdc360c2..29246bc9a7b4 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -102,7 +102,9 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);
- if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
+ if (skb->len < sizeof(*nlh) ||
+ nlh->nlmsg_len < sizeof(*nlh) ||
+ skb->len < nlh->nlmsg_len)
return;
if (!netlink_capable(skb, CAP_NET_ADMIN))
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 8737412c7b27..e1d4d898a007 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -224,7 +224,7 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data)
static void dns_resolver_describe(const struct key *key, struct seq_file *m)
{
seq_puts(m, key->description);
- if (key_is_instantiated(key)) {
+ if (key_is_positive(key)) {
int err = PTR_ERR(key->payload.data[dns_key_error]);
if (err)
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 96e47c539bee..39bb5b3a82f2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,12 +1,13 @@
config HAVE_NET_DSA
def_bool y
- depends on NETDEVICES && !S390
+ depends on INET && NETDEVICES && !S390
# Drivers must select NET_DSA and the appropriate tagging format
config NET_DSA
tristate "Distributed Switch Architecture"
- depends on HAVE_NET_DSA && NET_SWITCHDEV
+ depends on HAVE_NET_DSA
+ select NET_SWITCHDEV
select PHYLIB
---help---
Say Y if you want to enable support for the hardware switches supported
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 3ff9d97cf56b..5000e6f20f4a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1103,10 +1103,8 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p,
/* Use already configured phy mode */
if (p->phy_interface == PHY_INTERFACE_MODE_NA)
p->phy_interface = p->phy->interface;
- phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
- p->phy_interface);
-
- return 0;
+ return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
+ p->phy_interface);
}
static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
@@ -1271,26 +1269,32 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
p->old_duplex = -1;
ds->ports[port].netdev = slave_dev;
- ret = register_netdev(slave_dev);
- if (ret) {
- netdev_err(master, "error %d registering interface %s\n",
- ret, slave_dev->name);
- ds->ports[port].netdev = NULL;
- free_netdev(slave_dev);
- return ret;
- }
netif_carrier_off(slave_dev);
ret = dsa_slave_phy_setup(p, slave_dev);
if (ret) {
netdev_err(master, "error %d setting up slave phy\n", ret);
- unregister_netdev(slave_dev);
- free_netdev(slave_dev);
- return ret;
+ goto out_free;
+ }
+
+ ret = register_netdev(slave_dev);
+ if (ret) {
+ netdev_err(master, "error %d registering interface %s\n",
+ ret, slave_dev->name);
+ goto out_phy;
}
return 0;
+
+out_phy:
+ phy_disconnect(p->phy);
+ if (of_phy_is_fixed_link(ds->ports[port].dn))
+ of_phy_deregister_fixed_link(ds->ports[port].dn);
+out_free:
+ free_netdev(slave_dev);
+ ds->ports[port].netdev = NULL;
+ return ret;
}
void dsa_slave_destroy(struct net_device *slave_dev)
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 30d875dff6b5..f85b08baff16 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -580,19 +580,14 @@ static int __net_init lowpan_frags_init_net(struct net *net)
{
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
- int res;
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
- res = inet_frags_init_net(&ieee802154_lowpan->frags);
- if (res)
- return res;
- res = lowpan_frags_ns_sysctl_register(net);
- if (res)
- inet_frags_uninit_net(&ieee802154_lowpan->frags);
- return res;
+ inet_frags_init_net(&ieee802154_lowpan->frags);
+
+ return lowpan_frags_ns_sysctl_register(net);
}
static void __net_exit lowpan_frags_exit_net(struct net *net)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 215143246e4b..b5116ec31757 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1015,7 +1015,7 @@ static struct inet_protosw inetsw_array[] =
.type = SOCK_DGRAM,
.protocol = IPPROTO_ICMP,
.prot = &ping_prot,
- .ops = &inet_dgram_ops,
+ .ops = &inet_sockraw_ops,
.flags = INET_PROTOSW_REUSE,
},
@@ -1460,8 +1460,10 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
int proto = iph->protocol;
int err = -ENOSYS;
- if (skb->encapsulation)
+ if (skb->encapsulation) {
+ skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IP));
skb_set_inner_network_header(skb, nhoff);
+ }
csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
@@ -1691,6 +1693,13 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_dynaddr = 0;
net->ipv4.sysctl_ip_early_demux = 1;
+ /* Some igmp sysctl, whose values are always used */
+ net->ipv4.sysctl_igmp_max_memberships = 20;
+ net->ipv4.sysctl_igmp_max_msf = 10;
+ /* IGMP reports for link-local multicast groups are enabled by default */
+ net->ipv4.sysctl_igmp_llm_reports = 1;
+ net->ipv4.sysctl_igmp_qrv = 2;
+
return 0;
}
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index f2a71025a770..22377c8ff14b 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -270,6 +270,9 @@ static void ah_input_done(struct crypto_async_request *base, int err)
int ihl = ip_hdrlen(skb);
int ah_hlen = (ah->hdrlen + 2) << 2;
+ if (err)
+ goto out;
+
work_iph = AH_SKB_CB(skb)->tmp;
auth_data = ah_tmp_auth(work_iph, ihl);
icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 89a8cac4726a..51b27ae09fbd 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1263,7 +1263,7 @@ void __init arp_init(void)
/*
* ax25 -> ASCII conversion
*/
-static char *ax2asc2(ax25_address *a, char *buf)
+static void ax2asc2(ax25_address *a, char *buf)
{
char c, *s;
int n;
@@ -1285,10 +1285,10 @@ static char *ax2asc2(ax25_address *a, char *buf)
*s++ = n + '0';
*s++ = '\0';
- if (*buf == '\0' || *buf == '-')
- return "*";
-
- return buf;
+ if (*buf == '\0' || *buf == '-') {
+ buf[0] = '*';
+ buf[1] = '\0';
+ }
}
#endif /* CONFIG_AX25 */
@@ -1322,7 +1322,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
}
#endif
sprintf(tbuf, "%pI4", n->primary_key);
- seq_printf(seq, "%-16s 0x%-10x0x%-10x%s * %s\n",
+ seq_printf(seq, "%-16s 0x%-10x0x%-10x%-17s * %s\n",
tbuf, hatype, arp_state_to_flags(n), hbuffer, dev->name);
read_unlock(&n->lock);
}
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index ae206163c273..972353cd1778 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1943,7 +1943,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
buf = NULL;
req_inet = inet_rsk(req);
- opt = xchg(&req_inet->opt, opt);
+ opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
if (opt)
kfree_rcu(opt, rcu);
@@ -1965,11 +1965,13 @@ req_setattr_failure:
* values on failure.
*
*/
-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
+static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
{
+ struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
int hdr_delta = 0;
- struct ip_options_rcu *opt = *opt_ptr;
+ if (!opt || opt->opt.cipso == 0)
+ return 0;
if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
u8 cipso_len;
u8 cipso_off;
@@ -2031,14 +2033,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
*/
void cipso_v4_sock_delattr(struct sock *sk)
{
- int hdr_delta;
- struct ip_options_rcu *opt;
struct inet_sock *sk_inet;
+ int hdr_delta;
sk_inet = inet_sk(sk);
- opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
- if (!opt || opt->opt.cipso == 0)
- return;
hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
if (sk_inet->is_icsk && hdr_delta > 0) {
@@ -2058,15 +2056,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
*/
void cipso_v4_req_delattr(struct request_sock *req)
{
- struct ip_options_rcu *opt;
- struct inet_request_sock *req_inet;
-
- req_inet = inet_rsk(req);
- opt = req_inet->opt;
- if (!opt || opt->opt.cipso == 0)
- return;
-
- cipso_v4_delopt(&req_inet->opt);
+ cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
}
/**
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 5b03d7f3b255..968d8e165e3d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -758,7 +758,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
unsigned int e = 0, s_e;
struct fib_table *tb;
struct hlist_head *head;
- int dumped = 0;
+ int dumped = 0, err;
if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED)
@@ -778,20 +778,27 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (dumped)
memset(&cb->args[2], 0, sizeof(cb->args) -
2 * sizeof(cb->args[0]));
- if (fib_table_dump(tb, skb, cb) < 0)
- goto out;
+ err = fib_table_dump(tb, skb, cb);
+ if (err < 0) {
+ if (likely(skb->len))
+ goto out;
+
+ goto out_err;
+ }
dumped = 1;
next:
e++;
}
}
out:
+ err = skb->len;
+out_err:
rcu_read_unlock();
cb->args[1] = e;
cb->args[0] = h;
- return skb->len;
+ return err;
}
/* Prepare and feed intra-kernel routing request.
@@ -1081,7 +1088,8 @@ static void nl_fib_input(struct sk_buff *skb)
net = sock_net(skb->sk);
nlh = nlmsg_hdr(skb);
- if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len ||
+ if (skb->len < nlmsg_total_size(sizeof(*frn)) ||
+ skb->len < nlh->nlmsg_len ||
nlmsg_len(nlh) < sizeof(*frn))
return;
@@ -1311,13 +1319,14 @@ static struct pernet_operations fib_net_ops = {
void __init ip_fib_init(void)
{
- rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
- rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+ fib_trie_init();
register_pernet_subsys(&fib_net_ops);
+
register_netdevice_notifier(&fib_netdev_notifier);
register_inetaddr_notifier(&fib_inetaddr_notifier);
- fib_trie_init();
+ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
+ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6a4068031aaa..38c1c979ecb1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -204,6 +204,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
static void free_fib_info_rcu(struct rcu_head *head)
{
struct fib_info *fi = container_of(head, struct fib_info, rcu);
+ struct dst_metrics *m;
change_nexthops(fi) {
if (nexthop_nh->nh_dev)
@@ -214,8 +215,9 @@ static void free_fib_info_rcu(struct rcu_head *head)
rt_fibinfo_free(&nexthop_nh->nh_rth_input);
} endfor_nexthops(fi);
- if (fi->fib_metrics != (u32 *) dst_default_metrics)
- kfree(fi->fib_metrics);
+ m = fi->fib_metrics;
+ if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt))
+ kfree(m);
kfree(fi);
}
@@ -982,11 +984,11 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg)
val = 255;
if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
return -EINVAL;
- fi->fib_metrics[type - 1] = val;
+ fi->fib_metrics->metrics[type - 1] = val;
}
if (ecn_ca)
- fi->fib_metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
+ fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
return 0;
}
@@ -1042,14 +1044,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (!fi)
goto failure;
- fib_info_cnt++;
if (cfg->fc_mx) {
- fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
- if (!fi->fib_metrics)
- goto failure;
- } else
- fi->fib_metrics = (u32 *) dst_default_metrics;
-
+ fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
+ if (unlikely(!fi->fib_metrics)) {
+ kfree(fi);
+ return ERR_PTR(err);
+ }
+ atomic_set(&fi->fib_metrics->refcnt, 1);
+ } else {
+ fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
+ }
+ fib_info_cnt++;
fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
fi->fib_scope = cfg->fc_scope;
@@ -1252,7 +1257,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
if (fi->fib_priority &&
nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority))
goto nla_put_failure;
- if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
+ if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0)
goto nla_put_failure;
if (fi->fib_prefsrc &&
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e3665bf7a7f3..ef40bb659a7a 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1932,6 +1932,8 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
/* rcu_read_lock is hold by caller */
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+ int err;
+
if (i < s_i) {
i++;
continue;
@@ -1942,17 +1944,14 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
continue;
}
- if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWROUTE,
- tb->tb_id,
- fa->fa_type,
- xkey,
- KEYLENGTH - fa->fa_slen,
- fa->fa_tos,
- fa->fa_info, NLM_F_MULTI) < 0) {
+ err = fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWROUTE,
+ tb->tb_id, fa->fa_type,
+ xkey, KEYLENGTH - fa->fa_slen,
+ fa->fa_tos, fa->fa_info, NLM_F_MULTI);
+ if (err < 0) {
cb->args[4] = i;
- return -1;
+ return err;
}
i++;
}
@@ -1974,10 +1973,13 @@ int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
t_key key = cb->args[3];
while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
- if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
+ int err;
+
+ err = fn_trie_dump_leaf(l, tb, skb, cb);
+ if (err < 0) {
cb->args[3] = key;
cb->args[2] = count;
- return -1;
+ return err;
}
++count;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index d5cac99170b1..8c72034df28e 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -98,7 +98,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
greh = (struct gre_base_hdr *)skb_transport_header(skb);
pcsum = (__sum16 *)(greh + 1);
- if (gso_partial) {
+ if (gso_partial && skb_is_gso(skb)) {
unsigned int partial_adj;
/* Adjust checksum to account for the fact that
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1bc623d7f754..08575e3bd135 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1112,6 +1112,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
if (!pmc)
return;
+ spin_lock_init(&pmc->lock);
spin_lock_bh(&im->lock);
pmc->interface = im->interface;
in_dev_hold(in_dev);
@@ -2071,21 +2072,26 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
static void ip_mc_clear_src(struct ip_mc_list *pmc)
{
- struct ip_sf_list *psf, *nextpsf;
+ struct ip_sf_list *psf, *nextpsf, *tomb, *sources;
- for (psf = pmc->tomb; psf; psf = nextpsf) {
+ spin_lock_bh(&pmc->lock);
+ tomb = pmc->tomb;
+ pmc->tomb = NULL;
+ sources = pmc->sources;
+ pmc->sources = NULL;
+ pmc->sfmode = MCAST_EXCLUDE;
+ pmc->sfcount[MCAST_INCLUDE] = 0;
+ pmc->sfcount[MCAST_EXCLUDE] = 1;
+ spin_unlock_bh(&pmc->lock);
+
+ for (psf = tomb; psf; psf = nextpsf) {
nextpsf = psf->sf_next;
kfree(psf);
}
- pmc->tomb = NULL;
- for (psf = pmc->sources; psf; psf = nextpsf) {
+ for (psf = sources; psf; psf = nextpsf) {
nextpsf = psf->sf_next;
kfree(psf);
}
- pmc->sources = NULL;
- pmc->sfmode = MCAST_EXCLUDE;
- pmc->sfcount[MCAST_INCLUDE] = 0;
- pmc->sfcount[MCAST_EXCLUDE] = 1;
}
/* Join a multicast group
@@ -2968,12 +2974,6 @@ static int __net_init igmp_net_init(struct net *net)
goto out_sock;
}
- /* Sysctl initialization */
- net->ipv4.sysctl_igmp_max_memberships = 20;
- net->ipv4.sysctl_igmp_max_msf = 10;
- /* IGMP reports for link-local multicast groups are enabled by default */
- net->ipv4.sysctl_igmp_llm_reports = 1;
- net->ipv4.sysctl_igmp_qrv = 2;
return 0;
out_sock:
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 61a9deec2993..d1cab49393e2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -407,9 +407,11 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct net *net = read_pnet(&ireq->ireq_net);
- struct ip_options_rcu *opt = ireq->opt;
+ struct ip_options_rcu *opt;
struct rtable *rt;
+ opt = ireq_opt_deref(ireq);
+
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -443,10 +445,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
struct flowi4 *fl4;
struct rtable *rt;
+ opt = rcu_dereference(ireq->ireq_opt);
fl4 = &newinet->cork.fl.u.ip4;
- rcu_read_lock();
- opt = rcu_dereference(newinet->inet_opt);
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -459,13 +460,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
- rcu_read_unlock();
return &rt->dst;
route_err:
ip_rt_put(rt);
no_route:
- rcu_read_unlock();
__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
@@ -665,6 +664,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
/* listeners have SOCK_RCU_FREE, not the children */
sock_reset_flag(newsk, SOCK_RCU_FREE);
+ inet_sk(newsk)->mc_list = NULL;
+
newsk->sk_mark = inet_rsk(req)->ir_mark;
atomic64_set(&newsk->sk_cookie,
atomic64_read(&inet_rsk(req)->ir_cookie));
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index b5e9317eaf9e..631c0d0d7cf8 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -234,10 +234,8 @@ evict_again:
cond_resched();
if (read_seqretry(&f->rnd_seqlock, seq) ||
- percpu_counter_sum(&nf->mem))
+ sum_frag_mem_limit(nf))
goto evict_again;
-
- percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ca97835bfec4..b9bcf3db3af9 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -455,10 +455,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
return reuseport_add_sock(sk, sk2);
}
- /* Initial allocation may have already happened via setsockopt */
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return reuseport_alloc(sk);
- return 0;
+ return reuseport_alloc(sk);
}
int __inet_hash(struct sock *sk, struct sock *osk,
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbe7f72db9c1..453db950dc9f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -835,8 +835,6 @@ static void __init ip4_frags_ctl_register(void)
static int __net_init ipv4_frags_init_net(struct net *net)
{
- int res;
-
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
@@ -862,13 +860,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
net->ipv4.frags.max_dist = 64;
- res = inet_frags_init_net(&net->ipv4.frags);
- if (res)
- return res;
- res = ip4_frags_ns_ctl_register(net);
- if (res)
- inet_frags_uninit_net(&net->ipv4.frags);
- return res;
+ inet_frags_init_net(&net->ipv4.frags);
+
+ return ip4_frags_ns_ctl_register(net);
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e5c1dbef3626..2c3c1a223df4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -936,10 +936,12 @@ static int __ip_append_data(struct sock *sk,
csummode = CHECKSUM_PARTIAL;
cork->length += length;
- if (((length > mtu) || (skb && skb_is_gso(skb))) &&
+ if ((skb && skb_is_gso(skb)) ||
+ ((length > mtu) &&
+ (skb_queue_len(queue) <= 1) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
- (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
+ (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, transhdrlen,
maxfraglen, flags);
@@ -1255,6 +1257,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
return -EINVAL;
if ((size + skb->len > mtu) &&
+ (skb_queue_len(&sk->sk_write_queue) == 1) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO)) {
if (skb->ip_summed != CHECKSUM_PARTIAL)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 65336f38a5d8..551dd393ceec 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -105,10 +105,10 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
if (skb->ip_summed != CHECKSUM_COMPLETE)
return;
- if (offset != 0)
- csum = csum_sub(csum,
- csum_partial(skb_transport_header(skb) + tlen,
- offset, 0));
+ if (offset != 0) {
+ int tend_off = skb_transport_offset(skb) + tlen;
+ csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0));
+ }
put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
}
@@ -474,16 +474,15 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
return false;
/* Support IP_PKTINFO on tstamp packets if requested, to correlate
- * timestamp with egress dev. Not possible for packets without dev
+ * timestamp with egress dev. Not possible for packets without iif
* or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
*/
- if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
- (!skb->dev))
+ info = PKTINFO_SKB_CB(skb);
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
+ !info->ipi_ifindex)
return false;
- info = PKTINFO_SKB_CB(skb);
info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
- info->ipi_ifindex = skb->dev->ifindex;
return true;
}
@@ -820,6 +819,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
{
struct ip_mreqn mreq;
struct net_device *dev = NULL;
+ int midx;
if (sk->sk_type == SOCK_STREAM)
goto e_inval;
@@ -864,11 +864,15 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EADDRNOTAVAIL;
if (!dev)
break;
+
+ midx = l3mdev_master_ifindex(dev);
+
dev_put(dev);
err = -EINVAL;
if (sk->sk_bound_dev_if &&
- mreq.imr_ifindex != sk->sk_bound_dev_if)
+ mreq.imr_ifindex != sk->sk_bound_dev_if &&
+ (!midx || midx != sk->sk_bound_dev_if))
break;
inet->mc_index = mreq.imr_ifindex;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5719d6ba0824..bd7f1836bb70 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -609,8 +609,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
ip_rt_put(rt);
goto tx_dropped;
}
- iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos,
- key->ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+ iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
+ df, !net_eq(tunnel->net, dev_net(dev)));
return;
tx_error:
dev->stats.tx_errors++;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 5d7944f394d9..b120b9b11402 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -168,6 +168,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
struct ip_tunnel_parm *parms = &tunnel->parms;
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev; /* Device to other host */
+ int pkt_len = skb->len;
int err;
int mtu;
@@ -229,7 +230,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
err = dst_output(tunnel->net, skb->sk, skb);
if (net_xmit_eval(err) == 0)
- err = skb->len;
+ err = pkt_len;
iptunnel_xmit_stats(dev, err);
return NETDEV_TX_OK;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c9392589c415..56d71a004dce 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly;
static int ipip_err(struct sk_buff *skb, u32 info)
{
-
-/* All the routers (except for Linux) return only
- 8 bytes of packet payload. It means, that precise relaying of
- ICMP in the real Internet is absolutely infeasible.
- */
+ /* All the routers (except for Linux) return only
+ * 8 bytes of packet payload. It means, that precise relaying of
+ * ICMP in the real Internet is absolutely infeasible.
+ */
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
const struct iphdr *iph = (const struct iphdr *)skb->data;
- struct ip_tunnel *t;
- int err;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
+ struct ip_tunnel *t;
+ int err = 0;
+
+ switch (type) {
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ /* Impossible event. */
+ goto out;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ * rfc2003 contains "deep thoughts" about NET_UNREACH,
+ * I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ goto out;
+ break;
+
+ case ICMP_REDIRECT:
+ break;
+
+ default:
+ goto out;
+ }
- err = -ENOENT;
t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->daddr, iph->saddr, 0);
- if (!t)
+ if (!t) {
+ err = -ENOENT;
goto out;
+ }
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, iph->protocol, 0);
- err = 0;
+ ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
+ iph->protocol, 0);
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- iph->protocol, 0);
- err = 0;
+ ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
goto out;
}
- if (t->parms.iph.daddr == 0)
+ if (t->parms.iph.daddr == 0) {
+ err = -ENOENT;
goto out;
+ }
- err = 0;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
goto out;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index b3cc1335adbc..c0cc6aa8cfaa 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -23,7 +23,8 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
struct rtable *rt;
struct flowi4 fl4 = {};
__be32 saddr = iph->saddr;
- __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
+ const struct sock *sk = skb_to_full_sk(skb);
+ __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0;
struct net_device *dev = skb_dst(skb)->dev;
unsigned int hh_len;
@@ -40,7 +41,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
fl4.daddr = iph->daddr;
fl4.saddr = saddr;
fl4.flowi4_tos = RT_TOS(iph->tos);
- fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+ fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
if (!fl4.flowi4_oif)
fl4.flowi4_oif = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;
@@ -61,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
struct dst_entry *dst = skb_dst(skb);
skb_dst_set(skb, NULL);
- dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
+ dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0);
if (IS_ERR(dst))
return PTR_ERR(dst);
skb_dst_set(skb, dst);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index c9b52c361da2..5a8f7c360887 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1304,6 +1304,7 @@ static int __init nf_nat_snmp_basic_init(void)
static void __exit nf_nat_snmp_basic_fini(void)
{
RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
+ synchronize_rcu();
nf_conntrack_helper_unregister(&snmp_trap_helper);
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index fd8220213afc..146d86105183 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -126,6 +126,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));
+ nskb->mark = IP4_REPLY_MARK(net, oldskb->mark);
+
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
ip4_dst_hoplimit(skb_dst(nskb)));
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 105c0748c52f..e612991c9185 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -156,17 +156,18 @@ int ping_hash(struct sock *sk)
void ping_unhash(struct sock *sk)
{
struct inet_sock *isk = inet_sk(sk);
+
pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
+ write_lock_bh(&ping_table.lock);
if (sk_hashed(sk)) {
- write_lock_bh(&ping_table.lock);
hlist_nulls_del(&sk->sk_nulls_node);
sk_nulls_node_init(&sk->sk_nulls_node);
sock_put(sk);
isk->inet_num = 0;
isk->inet_sport = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
- write_unlock_bh(&ping_table.lock);
}
+ write_unlock_bh(&ping_table.lock);
}
EXPORT_SYMBOL_GPL(ping_unhash);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ecbe5a7c2d6d..9879b73d5565 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -356,6 +356,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
rt->dst.dev->mtu);
return -EMSGSIZE;
}
+ if (length < sizeof(struct iphdr))
+ return -EINVAL;
+
if (flags&MSG_PROBE)
goto out;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d851cae27dac..6a5b7783932e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1247,7 +1247,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
if (mtu)
return mtu;
- mtu = dst->dev->mtu;
+ mtu = READ_ONCE(dst->dev->mtu);
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
if (rt->rt_uses_gateway && mtu > 576)
@@ -1364,8 +1364,12 @@ static void rt_add_uncached_list(struct rtable *rt)
static void ipv4_dst_destroy(struct dst_entry *dst)
{
+ struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst);
struct rtable *rt = (struct rtable *) dst;
+ if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt))
+ kfree(p);
+
if (!list_empty(&rt->rt_uncached)) {
struct uncached_list *ul = rt->rt_uncached_list;
@@ -1417,7 +1421,11 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
rt->rt_gateway = nh->nh_gw;
rt->rt_uses_gateway = 1;
}
- dst_init_metrics(&rt->dst, fi->fib_metrics, true);
+ dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true);
+ if (fi->fib_metrics != &dst_default_metrics) {
+ rt->dst._metrics |= DST_METRICS_REFCOUNTED;
+ atomic_inc(&fi->fib_metrics->refcnt);
+ }
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
@@ -1968,6 +1976,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
{
int res;
+ tos &= IPTOS_RT_MASK;
rcu_read_lock();
/* Multicast recognition logic is moved from route cache to here.
@@ -2568,7 +2577,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
skb_reset_network_header(skb);
/* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
- ip_hdr(skb)->protocol = IPPROTO_ICMP;
+ ip_hdr(skb)->protocol = IPPROTO_UDP;
skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e3c4043c27de..0597ad73a1fa 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -334,6 +334,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
treq = tcp_rsk(req);
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
+ treq->txhash = net_tx_rndhash();
req->mss = mss;
ireq->ir_num = ntohs(th->dest);
ireq->ir_rmt_port = th->source;
@@ -353,7 +354,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
*/
- ireq->opt = tcp_v4_save_options(skb);
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
if (security_inet_conn_request(sk, skb, req)) {
reqsk_free(req);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 80bc36b25de2..566cfc50f7cf 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -958,7 +958,7 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_tcp_notsent_lowat,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_douintvec,
},
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6a90a0e130dc..dd33c785ce16 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -431,7 +431,7 @@ EXPORT_SYMBOL(tcp_init_sock);
static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
{
- if (tsflags) {
+ if (tsflags && skb) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -966,10 +966,8 @@ new_segment:
copied += copy;
offset += copy;
size -= copy;
- if (!size) {
- tcp_tx_timestamp(sk, sk->sk_tsflags, skb);
+ if (!size)
goto out;
- }
if (skb->len < size_goal || (flags & MSG_OOB))
continue;
@@ -995,8 +993,11 @@ wait_for_memory:
}
out:
- if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
- tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+ if (copied) {
+ tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk));
+ if (!(flags & MSG_SENDPAGE_NOTLAST))
+ tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+ }
return copied;
do_error:
@@ -1078,9 +1079,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
int *copied, size_t size)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
+ if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
+ uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
if (tp->fastopen_req)
return -EALREADY; /* Another Fast Open is in progress */
@@ -1093,7 +1097,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
tp->fastopen_req->size = size;
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
- err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
+ err = __inet_stream_connect(sk->sk_socket, uaddr,
msg->msg_namelen, flags);
*copied = tp->fastopen_req->copied;
tcp_free_fastopen_req(tp);
@@ -1286,7 +1290,6 @@ new_segment:
copied += copy;
if (!msg_data_left(msg)) {
- tcp_tx_timestamp(sk, sockc.tsflags, skb);
if (unlikely(flags & MSG_EOR))
TCP_SKB_CB(skb)->eor = 1;
goto out;
@@ -1317,8 +1320,10 @@ wait_for_memory:
}
out:
- if (copied)
+ if (copied) {
+ tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk));
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+ }
out_nopush:
release_sock(sk);
return copied + copied_syn;
@@ -2294,9 +2299,16 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
+ /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
+ * issue in __tcp_select_window()
+ */
+ icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
tcp_init_send_head(sk);
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
+ dst_release(sk->sk_rx_dst);
+ sk->sk_rx_dst = NULL;
+ tcp_saved_syn_free(tp);
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 0ea66c2c9344..cb8db347c680 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -83,7 +83,8 @@ struct bbr {
cwnd_gain:10, /* current gain for setting cwnd */
full_bw_cnt:3, /* number of rounds without large bw gains */
cycle_idx:3, /* current index in pacing_gain cycle array */
- unused_b:6;
+ has_seen_rtt:1, /* have we seen an RTT sample yet? */
+ unused_b:5;
u32 prior_cwnd; /* prior cwnd upon entering loss recovery */
u32 full_bw; /* recent bw, to estimate if pipe is full */
};
@@ -182,6 +183,35 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
return rate >> BW_SCALE;
}
+/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
+static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
+{
+ u64 rate = bw;
+
+ rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+ rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+ return rate;
+}
+
+/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
+static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u64 bw;
+ u32 rtt_us;
+
+ if (tp->srtt_us) { /* any RTT sample yet? */
+ rtt_us = max(tp->srtt_us >> 3, 1U);
+ bbr->has_seen_rtt = 1;
+ } else { /* no RTT sample yet */
+ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
+ }
+ bw = (u64)tp->snd_cwnd * BW_UNIT;
+ do_div(bw, rtt_us);
+ sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
+}
+
/* Pace using current bw estimate and a gain factor. In order to help drive the
* network toward lower queues while maintaining high utilization and low
* latency, the average pacing rate aims to be slightly (~1%) lower than the
@@ -191,12 +221,13 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
*/
static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u64 rate = bw;
+ u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
- rate = bbr_rate_bytes_per_sec(sk, rate, gain);
- rate = min_t(u64, rate, sk->sk_max_pacing_rate);
- if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate)
+ if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
+ bbr_init_pacing_rate_from_rtt(sk);
+ if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
sk->sk_pacing_rate = rate;
}
@@ -769,7 +800,6 @@ static void bbr_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u64 bw;
bbr->prior_cwnd = 0;
bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
@@ -785,11 +815,8 @@ static void bbr_init(struct sock *sk)
minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
- /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
- bw = (u64)tp->snd_cwnd * BW_UNIT;
- do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC);
- sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */
- bbr_set_pacing_rate(sk, bw, bbr_high_gain);
+ bbr->has_seen_rtt = 0;
+ bbr_init_pacing_rate_from_rtt(sk);
bbr->restore_cwnd = 0;
bbr->round_start = 0;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index f9038d6b109e..0cdbea9b9288 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -167,12 +167,8 @@ void tcp_assign_congestion_control(struct sock *sk)
}
out:
rcu_read_unlock();
+ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
- /* Clear out private data before diag gets it and
- * the ca has not been initialized.
- */
- if (ca->get_info)
- memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
if (ca->flags & TCP_CONG_NEEDS_ECN)
INET_ECN_xmit(sk);
else
@@ -183,6 +179,7 @@ void tcp_init_congestion_control(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
+ tcp_sk(sk)->prior_ssthresh = 0;
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
if (tcp_ca_needs_ecn(sk))
@@ -199,11 +196,10 @@ static void tcp_reinit_congestion_control(struct sock *sk,
tcp_cleanup_congestion_control(sk);
icsk->icsk_ca_ops = ca;
icsk->icsk_ca_setsockopt = 1;
+ memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
- if (sk->sk_state != TCP_CLOSE) {
- memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+ if (sk->sk_state != TCP_CLOSE)
tcp_init_congestion_control(sk);
- }
}
/* Manage refcounts on socket close. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c71d49ce0c93..8fcd0c642742 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1177,13 +1177,14 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
*/
if (pkt_len > mss) {
unsigned int new_len = (pkt_len / mss) * mss;
- if (!in_sack && new_len < pkt_len) {
+ if (!in_sack && new_len < pkt_len)
new_len += mss;
- if (new_len >= skb->len)
- return 0;
- }
pkt_len = new_len;
}
+
+ if (pkt_len >= skb->len && !in_sack)
+ return 0;
+
err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
if (err < 0)
return err;
@@ -2559,8 +2560,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
return;
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
- if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
- (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
+ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
+ (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
tp->snd_cwnd = tp->snd_ssthresh;
tp->snd_cwnd_stamp = tcp_time_stamp;
}
@@ -3035,8 +3036,7 @@ void tcp_rearm_rto(struct sock *sk)
/* delta may not be positive if the socket is locked
* when the retrans timer fires and is rescheduled.
*/
- if (delta > 0)
- rto = delta;
+ rto = max(delta, 1);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
TCP_RTO_MAX);
@@ -3232,7 +3232,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
int delta;
/* Non-retransmitted hole got filled? That's reordering */
- if (reord < prior_fackets)
+ if (reord < prior_fackets && reord <= tp->fackets_out)
tcp_update_reordering(sk, tp->fackets_out - reord, 0);
delta = tcp_is_fack(tp) ? pkts_acked :
@@ -5571,6 +5571,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
struct inet_connection_sock *icsk = inet_csk(sk);
tcp_set_state(sk, TCP_ESTABLISHED);
+ icsk->icsk_ack.lrcvtime = tcp_time_stamp;
if (skb) {
icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
@@ -5789,7 +5790,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* to stand against the temptation 8) --ANK
*/
inet_csk_schedule_ack(sk);
- icsk->icsk_ack.lrcvtime = tcp_time_stamp;
tcp_enter_quickack_mode(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
@@ -5916,9 +5916,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (th->syn) {
if (th->fin)
goto discard;
- if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
- return 1;
+ /* It is possible that we process SYN packets from backlog,
+ * so we need to make sure to disable BH right there.
+ */
+ local_bh_disable();
+ acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
+ local_bh_enable();
+ if (!acceptable)
+ return 1;
consume_skb(skb);
return 0;
}
@@ -6231,7 +6237,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
struct inet_request_sock *ireq = inet_rsk(req);
kmemcheck_annotate_bitfield(ireq, flags);
- ireq->opt = NULL;
+ ireq->ireq_opt = NULL;
#if IS_ENABLED(CONFIG_IPV6)
ireq->pktopts = NULL;
#endif
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2259114c7242..d577ec07a0d8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -269,10 +269,13 @@ EXPORT_SYMBOL(tcp_v4_connect);
*/
void tcp_v4_mtu_reduced(struct sock *sk)
{
- struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
- u32 mtu = tcp_sk(sk)->mtu_info;
+ struct dst_entry *dst;
+ u32 mtu;
+ if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
+ return;
+ mtu = tcp_sk(sk)->mtu_info;
dst = inet_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -418,7 +421,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
switch (type) {
case ICMP_REDIRECT:
- do_redirect(icmp_skb, sk);
+ if (!sock_owned_by_user(sk))
+ do_redirect(icmp_skb, sk);
goto out;
case ICMP_SOURCE_QUENCH:
/* Just silently ignore these. */
@@ -857,7 +861,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq->opt);
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
@@ -869,7 +873,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
*/
static void tcp_v4_reqsk_destructor(struct request_sock *req)
{
- kfree(inet_rsk(req)->opt);
+ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
#ifdef CONFIG_TCP_MD5SIG
@@ -1195,7 +1199,7 @@ static void tcp_v4_init_req(struct request_sock *req,
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
- ireq->opt = tcp_v4_save_options(skb);
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
}
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
@@ -1291,10 +1295,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newsk->sk_bound_dev_if = ireq->ir_iif;
- newinet->inet_saddr = ireq->ir_loc_addr;
- inet_opt = ireq->opt;
- rcu_assign_pointer(newinet->inet_opt, inet_opt);
- ireq->opt = NULL;
+ newinet->inet_saddr = ireq->ir_loc_addr;
+ inet_opt = rcu_dereference(ireq->ireq_opt);
+ RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos;
@@ -1342,9 +1345,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
- if (*own_req)
+ if (likely(*own_req)) {
tcp_move_syn(newtp, req);
-
+ ireq->ireq_opt = NULL;
+ } else {
+ newinet->inet_opt = NULL;
+ }
return newsk;
exit_overflow:
@@ -1355,6 +1361,7 @@ exit:
tcp_listendrop(sk);
return NULL;
put_and_exit:
+ newinet->inet_opt = NULL;
inet_csk_prepare_forced_close(newsk);
tcp_done(newsk);
goto exit;
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index c67ece1390c2..7d86fc505397 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -264,13 +264,15 @@ static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample)
{
struct tcp_sock *tp = tcp_sk(sk);
struct lp *lp = inet_csk_ca(sk);
+ u32 delta;
if (sample->rtt_us > 0)
tcp_lp_rtt_sample(sk, sample->rtt_us);
/* calc inference */
- if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
- lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr);
+ delta = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+ if ((s32)delta > 0)
+ lp->inference = 3 * delta;
/* test if within inference */
if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference))
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6234ebaa7db1..64e1ba49c3e2 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -466,6 +466,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
newicsk->icsk_rto = TCP_TIMEOUT_INIT;
+ newicsk->icsk_ack.lrcvtime = tcp_time_stamp;
newtp->packets_out = 0;
newtp->retrans_out = 0;
@@ -542,6 +543,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
newtp->rx_opt.mss_clamp = req->mss;
tcp_ecn_openreq_child(newtp, req);
+ newtp->fastopen_req = NULL;
newtp->fastopen_rsk = NULL;
newtp->syn_data_acked = 0;
newtp->rack.mstamp.v64 = 0;
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 5de82a8d4d87..e45e2c41c7bd 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -263,7 +263,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
/* rate in 100's bits per second */
rate64 = ((u64)sample->in_flight) * 8000000;
- rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100));
+ rate = (u32)div64_u64(rate64, (u64)(avg_rtt ?: 1) * 100);
/* Remember the maximum rate seen during this RTT
* Note: It may be more than one RTT. This function should be
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 65d6189140bc..3d7b59ecc76c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -914,6 +914,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
struct tcp_skb_cb *tcb;
struct tcp_out_options opts;
unsigned int tcp_options_size, tcp_header_size;
+ struct sk_buff *oskb = NULL;
struct tcp_md5sig_key *md5;
struct tcphdr *th;
int err;
@@ -922,11 +923,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
tp = tcp_sk(sk);
if (clone_it) {
- skb_mstamp_get(&skb->skb_mstamp);
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
- tcp_rate_skb_sent(sk, skb);
-
+ oskb = skb;
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
else
@@ -934,6 +933,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (unlikely(!skb))
return -ENOBUFS;
}
+ skb_mstamp_get(&skb->skb_mstamp);
inet = inet_sk(sk);
tcb = TCP_SKB_CB(skb);
@@ -1035,12 +1035,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
- if (likely(err <= 0))
- return err;
-
- tcp_enter_cwr(sk);
-
- return net_xmit_eval(err);
+ if (unlikely(err > 0)) {
+ tcp_enter_cwr(sk);
+ err = net_xmit_eval(err);
+ }
+ if (!err && oskb) {
+ skb_mstamp_get(&oskb->skb_mstamp);
+ tcp_rate_skb_sent(sk, oskb);
+ }
+ return err;
}
/* This routine just queues the buffer for sending.
@@ -1246,7 +1249,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
* eventually). The difference is that pulled data not copied, but
* immediately discarded.
*/
-static void __pskb_trim_head(struct sk_buff *skb, int len)
+static int __pskb_trim_head(struct sk_buff *skb, int len)
{
struct skb_shared_info *shinfo;
int i, k, eat;
@@ -1256,7 +1259,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
__skb_pull(skb, eat);
len -= eat;
if (!len)
- return;
+ return 0;
}
eat = len;
k = 0;
@@ -1282,23 +1285,28 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
skb_reset_tail_pointer(skb);
skb->data_len -= len;
skb->len = skb->data_len;
+ return len;
}
/* Remove acked data from a packet in the transmit queue. */
int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
{
+ u32 delta_truesize;
+
if (skb_unclone(skb, GFP_ATOMIC))
return -ENOMEM;
- __pskb_trim_head(skb, len);
+ delta_truesize = __pskb_trim_head(skb, len);
TCP_SKB_CB(skb)->seq += len;
skb->ip_summed = CHECKSUM_PARTIAL;
- skb->truesize -= len;
- sk->sk_wmem_queued -= len;
- sk_mem_uncharge(sk, len);
- sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
+ if (delta_truesize) {
+ skb->truesize -= delta_truesize;
+ sk->sk_wmem_queued -= delta_truesize;
+ sk_mem_uncharge(sk, delta_truesize);
+ sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
+ }
/* Any change of skb->len requires recalculation of tso factor. */
if (tcp_skb_pcount(skb) > 1)
@@ -1988,6 +1996,7 @@ static int tcp_mtu_probe(struct sock *sk)
nskb->ip_summed = skb->ip_summed;
tcp_insert_write_queue_before(nskb, skb, sk);
+ tcp_highest_sack_replace(sk, skb, nskb);
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
@@ -2527,7 +2536,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
- tcp_highest_sack_combine(sk, next_skb, skb);
+ tcp_highest_sack_replace(sk, next_skb, skb);
tcp_unlink_write_queue(next_skb, sk);
@@ -2704,10 +2713,11 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb;
- skb_mstamp_get(&skb->skb_mstamp);
nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-ENOBUFS;
+ if (!err)
+ skb_mstamp_get(&skb->skb_mstamp);
} else {
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
@@ -3100,13 +3110,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
tcp_ecn_make_synack(req, th);
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
- /* Setting of flags are superfluous here for callers (and ECE is
- * not even correctly set)
- */
- tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
- TCPHDR_SYN | TCPHDR_ACK);
-
- th->seq = htonl(TCP_SKB_CB(skb)->seq);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ th->seq = htonl(tcp_rsk(req)->snt_isn);
/* XXX data is queued and acked as is. No buffer/window check */
th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
@@ -3320,6 +3325,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
goto done;
}
+ /* data was not sent, this is our new send_head */
+ sk->sk_send_head = syn_data;
+ tp->packets_out -= tcp_skb_pcount(syn_data);
+
fallback:
/* Send a regular SYN with Fast Open cookie request option */
if (fo->cookie.len > 0)
@@ -3339,6 +3348,9 @@ int tcp_connect(struct sock *sk)
struct sk_buff *buff;
int err;
+ if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
+ return -EHOSTUNREACH; /* Routing failure or similar. */
+
tcp_connect_init(sk);
if (unlikely(tp->repair)) {
@@ -3366,6 +3378,11 @@ int tcp_connect(struct sock *sk)
*/
tp->snd_nxt = tp->write_seq;
tp->pushed_seq = tp->write_seq;
+ buff = tcp_send_head(sk);
+ if (unlikely(buff)) {
+ tp->snd_nxt = TCP_SKB_CB(buff)->seq;
+ tp->pushed_seq = TCP_SKB_CB(buff)->seq;
+ }
TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index f6c50af24a64..3d063eb37848 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -117,7 +117,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
(fwmark > 0 && skb->mark == fwmark)) &&
(full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
- spin_lock(&tcp_probe.lock);
+ spin_lock_bh(&tcp_probe.lock);
/* If log fills, just silently drop */
if (tcp_probe_avail() > 1) {
struct tcp_log *p = tcp_probe.log + tcp_probe.head;
@@ -157,7 +157,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1);
}
tcp_probe.lastcwnd = tp->snd_cwnd;
- spin_unlock(&tcp_probe.lock);
+ spin_unlock_bh(&tcp_probe.lock);
wake_up(&tcp_probe.wait);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 3ea1cf804748..74db43b47917 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -249,7 +249,8 @@ void tcp_delack_timer_handler(struct sock *sk)
sk_mem_reclaim_partial(sk);
- if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+ if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+ !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
goto out;
if (time_after(icsk->icsk_ack.timeout, jiffies)) {
@@ -552,7 +553,8 @@ void tcp_write_timer_handler(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
int event;
- if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending)
+ if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) ||
+ !icsk->icsk_pending)
goto out;
if (time_after(icsk->icsk_timeout, jiffies)) {
@@ -652,7 +654,8 @@ static void tcp_keepalive_timer (unsigned long data)
goto death;
}
- if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
+ if (!sock_flag(sk, SOCK_KEEPOPEN) ||
+ ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
goto out;
elapsed = keepalive_time_when(tp);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5bab6c3f7a2f..bef4a94ce1a0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -222,10 +222,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
}
}
- /* Initial allocation may have already happened via setsockopt */
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return reuseport_alloc(sk);
- return 0;
+ return reuseport_alloc(sk);
}
/**
@@ -813,7 +810,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
if (is_udplite) /* UDP-Lite */
csum = udplite_csum(skb);
- else if (sk->sk_no_check_tx) { /* UDP csum disabled */
+ else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */
skb->ip_summed = CHECKSUM_NONE;
goto send;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index b2be1d9757ef..6401574cd638 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -29,6 +29,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
u16 mac_len = skb->mac_len;
int udp_offset, outer_hlen;
__wsum partial;
+ bool need_ipsec;
if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
goto out;
@@ -62,8 +63,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP);
+ need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb));
/* Try to offload checksum if possible */
offload_csum = !!(need_csum &&
+ !need_ipsec &&
(skb->dev->features &
(is_ipv6 ? (NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) :
(NETIF_F_HW_CSUM | NETIF_F_IP_CSUM))));
@@ -119,7 +122,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
* will be using a length value equal to only one MSS sized
* segment instead of the entire frame.
*/
- if (gso_partial) {
+ if (gso_partial && skb_is_gso(skb)) {
uh->len = htons(skb_shinfo(skb)->gso_size +
SKB_GSO_CB(skb)->data_offset +
skb->head - (unsigned char *)uh);
@@ -232,7 +235,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
- skb->ip_summed = CHECKSUM_NONE;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
/* If there is no outer header we can fake a checksum offload
* due to the fact that we have already done the checksum in
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 95dfcba38ff6..a4fb90c4819f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -315,9 +315,9 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
unsigned long delay)
{
- if (!delayed_work_pending(&ifp->dad_work))
- in6_ifa_hold(ifp);
- mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
+ in6_ifa_hold(ifp);
+ if (mod_delayed_work(addrconf_wq, &ifp->dad_work, delay))
+ in6_ifa_put(ifp);
}
static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -1875,15 +1875,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
if (dad_failed)
ifp->flags |= IFA_F_DADFAILED;
- if (ifp->flags&IFA_F_PERMANENT) {
- spin_lock_bh(&ifp->lock);
- addrconf_del_dad_work(ifp);
- ifp->flags |= IFA_F_TENTATIVE;
- spin_unlock_bh(&ifp->lock);
- if (dad_failed)
- ipv6_ifa_notify(0, ifp);
- in6_ifa_put(ifp);
- } else if (ifp->flags&IFA_F_TEMPORARY) {
+ if (ifp->flags&IFA_F_TEMPORARY) {
struct inet6_ifaddr *ifpub;
spin_lock_bh(&ifp->lock);
ifpub = ifp->ifpub;
@@ -1896,6 +1888,14 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_unlock_bh(&ifp->lock);
}
ipv6_del_addr(ifp);
+ } else if (ifp->flags&IFA_F_PERMANENT || !dad_failed) {
+ spin_lock_bh(&ifp->lock);
+ addrconf_del_dad_work(ifp);
+ ifp->flags |= IFA_F_TENTATIVE;
+ spin_unlock_bh(&ifp->lock);
+ if (dad_failed)
+ ipv6_ifa_notify(0, ifp);
+ in6_ifa_put(ifp);
} else {
ipv6_del_addr(ifp);
}
@@ -3253,14 +3253,24 @@ static void addrconf_gre_config(struct net_device *dev)
static int fixup_permanent_addr(struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
{
- if (!ifp->rt) {
- struct rt6_info *rt;
+ /* rt6i_ref == 0 means the host route was removed from the
+ * FIB, for example, if 'lo' device is taken down. In that
+ * case regenerate the host route.
+ */
+ if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
+ struct rt6_info *rt, *prev;
rt = addrconf_dst_alloc(idev, &ifp->addr, false);
if (unlikely(IS_ERR(rt)))
return PTR_ERR(rt);
+ /* ifp->rt can be accessed outside of rtnl */
+ spin_lock(&ifp->lock);
+ prev = ifp->rt;
ifp->rt = rt;
+ spin_unlock(&ifp->lock);
+
+ ip6_rt_put(prev);
}
if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
@@ -3268,7 +3278,8 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
idev->dev, 0, 0);
}
- addrconf_dad_start(ifp);
+ if (ifp->state == INET6_IFADDR_STATE_PREDAD)
+ addrconf_dad_start(ifp);
return 0;
}
@@ -3288,6 +3299,7 @@ static void addrconf_permanent_addr(struct net_device *dev)
if ((ifp->flags & IFA_F_PERMANENT) &&
fixup_permanent_addr(idev, ifp) < 0) {
write_unlock_bh(&idev->lock);
+ in6_ifa_hold(ifp);
ipv6_del_addr(ifp);
write_lock_bh(&idev->lock);
@@ -3305,6 +3317,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netdev_notifier_changeupper_info *info;
struct inet6_dev *idev = __in6_dev_get(dev);
+ struct net *net = dev_net(dev);
int run_pending = 0;
int err;
@@ -3320,7 +3333,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
case NETDEV_CHANGEMTU:
/* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
if (dev->mtu < IPV6_MIN_MTU) {
- addrconf_ifdown(dev, 1);
+ addrconf_ifdown(dev, dev != net->loopback_dev);
break;
}
@@ -3376,9 +3389,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
}
if (idev) {
- if (idev->if_flags & IF_READY)
- /* device is already configured. */
+ if (idev->if_flags & IF_READY) {
+ /* device is already configured -
+ * but resend MLD reports, we might
+ * have roamed and need to update
+ * multicast snooping switches
+ */
+ ipv6_mc_up(idev);
break;
+ }
idev->if_flags |= IF_READY;
}
@@ -3430,7 +3449,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
* IPV6_MIN_MTU stop IPv6 on this interface.
*/
if (dev->mtu < IPV6_MIN_MTU)
- addrconf_ifdown(dev, 1);
+ addrconf_ifdown(dev, dev != net->loopback_dev);
}
break;
@@ -3481,6 +3500,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
*/
static struct notifier_block ipv6_dev_notf = {
.notifier_call = addrconf_notify,
+ .priority = ADDRCONF_NOTIFY_PRIORITY,
};
static void addrconf_type_change(struct net_device *dev, unsigned long event)
@@ -3602,17 +3622,22 @@ restart:
INIT_LIST_HEAD(&del_list);
list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
struct rt6_info *rt = NULL;
+ bool keep;
addrconf_del_dad_work(ifa);
+ keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
+ !addr_is_local(&ifa->addr);
+ if (!keep)
+ list_move(&ifa->if_list, &del_list);
+
write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock);
- if (keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
- !addr_is_local(&ifa->addr)) {
+ if (keep) {
/* set state to skip the notifier below */
state = INET6_IFADDR_STATE_DEAD;
- ifa->state = 0;
+ ifa->state = INET6_IFADDR_STATE_PREDAD;
if (!(ifa->flags & IFA_F_NODAD))
ifa->flags |= IFA_F_TENTATIVE;
@@ -3621,8 +3646,6 @@ restart:
} else {
state = ifa->state;
ifa->state = INET6_IFADDR_STATE_DEAD;
-
- list_move(&ifa->if_list, &del_list);
}
spin_unlock_bh(&ifa->lock);
@@ -3989,6 +4012,12 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
if (bump_id)
rt_genid_bump_ipv6(dev_net(dev));
+
+ /* Make sure that a new temporary address will be created
+ * before this temporary address becomes deprecated.
+ */
+ if (ifp->flags & IFA_F_TEMPORARY)
+ addrconf_verify_rtnl();
}
static void addrconf_dad_run(struct inet6_dev *idev)
@@ -5415,7 +5444,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
* our DAD process, so we don't need
* to do it again
*/
- if (!(ifp->rt->rt6i_node))
+ if (!rcu_access_pointer(ifp->rt->rt6i_node))
ip6_ins_rt(ifp->rt);
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
@@ -6250,6 +6279,8 @@ int __init addrconf_init(void)
goto errlo;
}
+ ip6_route_init_special_entries();
+
for (i = 0; i < IN6_ADDR_HSIZE; i++)
INIT_HLIST_HEAD(&inet6_addr_lst[i]);
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 37ac9de713c6..8d772fea1dde 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -1319,7 +1319,7 @@ static int calipso_skbuff_setattr(struct sk_buff *skb,
struct ipv6hdr *ip6_hdr;
struct ipv6_opt_hdr *hop;
unsigned char buf[CALIPSO_MAX_BUFFER];
- int len_delta, new_end, pad;
+ int len_delta, new_end, pad, payload;
unsigned int start, end;
ip6_hdr = ipv6_hdr(skb);
@@ -1346,6 +1346,8 @@ static int calipso_skbuff_setattr(struct sk_buff *skb,
if (ret_val < 0)
return ret_val;
+ ip6_hdr = ipv6_hdr(skb); /* Reset as skb_cow() may have moved it */
+
if (len_delta) {
if (len_delta > 0)
skb_push(skb, len_delta);
@@ -1355,6 +1357,8 @@ static int calipso_skbuff_setattr(struct sk_buff *skb,
sizeof(*ip6_hdr) + start);
skb_reset_network_header(skb);
ip6_hdr = ipv6_hdr(skb);
+ payload = ntohs(ip6_hdr->payload_len);
+ ip6_hdr->payload_len = htons(payload + len_delta);
}
hop = (struct ipv6_opt_hdr *)(ip6_hdr + 1);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 8616d17cf08f..38062f403ceb 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -166,18 +166,22 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
if (np->sndflow)
fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
- addr_type = ipv6_addr_type(&usin->sin6_addr);
-
- if (addr_type == IPV6_ADDR_ANY) {
+ if (ipv6_addr_any(&usin->sin6_addr)) {
/*
* connect to self
*/
- usin->sin6_addr.s6_addr[15] = 0x01;
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
+ &usin->sin6_addr);
+ else
+ usin->sin6_addr = in6addr_loopback;
}
+ addr_type = ipv6_addr_type(&usin->sin6_addr);
+
daddr = &usin->sin6_addr;
- if (addr_type == IPV6_ADDR_MAPPED) {
+ if (addr_type & IPV6_ADDR_MAPPED) {
struct sockaddr_in sin;
if (__ipv6_only_sock(sk)) {
@@ -400,9 +404,6 @@ static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr)
* At one point, excluding local errors was a quick test to identify icmp/icmp6
* errors. This is no longer true, but the test remained, so the v6 stack,
* unlike v4, also honors cmsg requests on all wifi and timestamp errors.
- *
- * Timestamp code paths do not initialize the fields expected by cmsg:
- * the PKTINFO fields in skb->cb[]. Fill those in here.
*/
static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
struct sock_exterr_skb *serr)
@@ -414,14 +415,9 @@ static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL)
return false;
- if (!skb->dev)
+ if (!IP6CB(skb)->iif)
return false;
- if (skb->protocol == htons(ETH_P_IPV6))
- IP6CB(skb)->iif = skb->dev->ifindex;
- else
- PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex;
-
return true;
}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index eea23b57c6a5..ec849d88a662 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -32,7 +32,6 @@ struct fib6_rule {
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
int flags, pol_lookup_t lookup)
{
- struct rt6_info *rt;
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
.flags = FIB_LOOKUP_NOREF,
@@ -44,21 +43,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
fib_rules_lookup(net->ipv6.fib6_rules_ops,
flowi6_to_flowi(fl6), flags, &arg);
- rt = arg.result;
+ if (arg.result)
+ return arg.result;
- if (!rt) {
- dst_hold(&net->ipv6.ip6_null_entry->dst);
- return &net->ipv6.ip6_null_entry->dst;
- }
-
- if (rt->rt6i_flags & RTF_REJECT &&
- rt->dst.error == -EAGAIN) {
- ip6_rt_put(rt);
- rt = net->ipv6.ip6_null_entry;
- dst_hold(&rt->dst);
- }
-
- return &rt->dst;
+ dst_hold(&net->ipv6.ip6_null_entry->dst);
+ return &net->ipv6.ip6_null_entry->dst;
}
static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
@@ -121,7 +110,8 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
flp6->saddr = saddr;
}
err = rt->dst.error;
- goto out;
+ if (err != -EAGAIN)
+ goto out;
}
again:
ip6_rt_put(rt);
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index e604013dd814..7a5b9812af10 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -68,6 +68,7 @@ static inline u32 ila_locator_hash(struct ila_locator loc)
{
u32 *v = (u32 *)loc.v32;
+ __ila_hash_secret_init();
return jhash_2words(v[0], v[1], hashrnd);
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ef5485204522..5da864997495 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -148,11 +148,23 @@ static struct fib6_node *node_alloc(void)
return fn;
}
-static void node_free(struct fib6_node *fn)
+static void node_free_immediate(struct fib6_node *fn)
+{
+ kmem_cache_free(fib6_node_kmem, fn);
+}
+
+static void node_free_rcu(struct rcu_head *head)
{
+ struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
+
kmem_cache_free(fib6_node_kmem, fn);
}
+static void node_free(struct fib6_node *fn)
+{
+ call_rcu(&fn->rcu, node_free_rcu);
+}
+
static void rt6_rcu_free(struct rt6_info *rt)
{
call_rcu(&rt->dst.rcu_head, dst_rcu_free);
@@ -189,6 +201,12 @@ static void rt6_release(struct rt6_info *rt)
}
}
+static void fib6_free_table(struct fib6_table *table)
+{
+ inetpeer_invalidate_tree(&table->tb6_peers);
+ kfree(table);
+}
+
static void fib6_link_table(struct net *net, struct fib6_table *tb)
{
unsigned int h;
@@ -289,8 +307,7 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
struct rt6_info *rt;
rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
- if (rt->rt6i_flags & RTF_REJECT &&
- rt->dst.error == -EAGAIN) {
+ if (rt->dst.error == -EAGAIN) {
ip6_rt_put(rt);
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
@@ -590,9 +607,9 @@ insert_above:
if (!in || !ln) {
if (in)
- node_free(in);
+ node_free_immediate(in);
if (ln)
- node_free(ln);
+ node_free_immediate(ln);
return ERR_PTR(-ENOMEM);
}
@@ -772,10 +789,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
goto next_iter;
}
- if (iter->dst.dev == rt->dst.dev &&
- iter->rt6i_idev == rt->rt6i_idev &&
- ipv6_addr_equal(&iter->rt6i_gateway,
- &rt->rt6i_gateway)) {
+ if (rt6_duplicate_nexthop(iter, rt)) {
if (rt->rt6i_nsiblings)
rt->rt6i_nsiblings = 0;
if (!(iter->rt6i_flags & RTF_EXPIRES))
@@ -866,7 +880,7 @@ add:
rt->dst.rt6_next = iter;
*ins = rt;
- rt->rt6i_node = fn;
+ rcu_assign_pointer(rt->rt6i_node, fn);
atomic_inc(&rt->rt6i_ref);
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -891,7 +905,7 @@ add:
return err;
*ins = rt;
- rt->rt6i_node = fn;
+ rcu_assign_pointer(rt->rt6i_node, fn);
rt->dst.rt6_next = iter->dst.rt6_next;
atomic_inc(&rt->rt6i_ref);
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
@@ -901,6 +915,8 @@ add:
}
nsiblings = iter->rt6i_nsiblings;
fib6_purge_rt(iter, fn, info->nl_net);
+ if (fn->rr_ptr == iter)
+ fn->rr_ptr = NULL;
rt6_release(iter);
if (nsiblings) {
@@ -908,9 +924,13 @@ add:
ins = &rt->dst.rt6_next;
iter = *ins;
while (iter) {
+ if (iter->rt6i_metric > rt->rt6i_metric)
+ break;
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->dst.rt6_next;
fib6_purge_rt(iter, fn, info->nl_net);
+ if (fn->rr_ptr == iter)
+ fn->rr_ptr = NULL;
rt6_release(iter);
nsiblings--;
} else {
@@ -999,7 +1019,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
/* Create subtree root node */
sfn = node_alloc();
if (!sfn)
- goto st_failure;
+ goto failure;
sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
@@ -1015,12 +1035,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (IS_ERR(sn)) {
/* If it is failed, discard just allocated
- root, and then (in st_failure) stale node
+ root, and then (in failure) stale node
in main tree.
*/
- node_free(sfn);
+ node_free_immediate(sfn);
err = PTR_ERR(sn);
- goto st_failure;
+ goto failure;
}
/* Now link new subtree to main tree */
@@ -1034,7 +1054,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (IS_ERR(sn)) {
err = PTR_ERR(sn);
- goto st_failure;
+ goto failure;
}
}
@@ -1076,22 +1096,22 @@ out:
atomic_inc(&pn->leaf->rt6i_ref);
}
#endif
- if (!(rt->dst.flags & DST_NOCACHE))
- dst_free(&rt->dst);
+ goto failure;
}
return err;
-#ifdef CONFIG_IPV6_SUBTREES
- /* Subtree creation failed, probably main tree node
- is orphan. If it is, shoot it.
+failure:
+ /* fn->leaf could be NULL if fn is an intermediate node and we
+ * failed to add the new route to it in both subtree creation
+ * failure and fib6_add_rt2node() failure case.
+ * In both cases, fib6_repair_tree() should be called to fix
+ * fn->leaf.
*/
-st_failure:
if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
fib6_repair_tree(info->nl_net, fn);
if (!(rt->dst.flags & DST_NOCACHE))
dst_free(&rt->dst);
return err;
-#endif
}
/*
@@ -1445,8 +1465,9 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
int fib6_del(struct rt6_info *rt, struct nl_info *info)
{
+ struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
struct net *net = info->nl_net;
- struct fib6_node *fn = rt->rt6i_node;
struct rt6_info **rtp;
#if RT6_DEBUG >= 2
@@ -1635,7 +1656,9 @@ static int fib6_clean_node(struct fib6_walker *w)
if (res) {
#if RT6_DEBUG >= 2
pr_debug("%s: del failed: rt=%p@%p err=%d\n",
- __func__, rt, rt->rt6i_node, res);
+ __func__, rt,
+ rcu_access_pointer(rt->rt6i_node),
+ res);
#endif
continue;
}
@@ -1876,15 +1899,22 @@ out_timer:
static void fib6_net_exit(struct net *net)
{
+ unsigned int i;
+
rt6_ifdown(net, NULL);
del_timer_sync(&net->ipv6.ip6_fib_timer);
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
- inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers);
- kfree(net->ipv6.fib6_local_tbl);
-#endif
- inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers);
- kfree(net->ipv6.fib6_main_tbl);
+ for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
+ struct hlist_head *head = &net->ipv6.fib_table_hash[i];
+ struct hlist_node *tmp;
+ struct fib6_table *tb;
+
+ hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) {
+ hlist_del(&tb->tb6_hlist);
+ fib6_free_table(tb);
+ }
+ }
+
kfree(net->ipv6.fib_table_hash);
kfree(net->ipv6.rt6_stats);
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b912f0dbaf72..b82e439804d1 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
}
opt_space->dst1opt = fopt->dst1opt;
opt_space->opt_flen = fopt->opt_flen;
+ opt_space->tot_len = fopt->tot_len;
return opt_space;
}
EXPORT_SYMBOL_GPL(fl6_merge_options);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 0a5922055da2..e9b14e3493f2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -408,13 +408,16 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
- break;
+ if (code != ICMPV6_PORT_UNREACH)
+ break;
+ return;
case ICMPV6_TIME_EXCEED:
if (code == ICMPV6_EXC_HOPLIMIT) {
net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
t->parms.name);
+ break;
}
- break;
+ return;
case ICMPV6_PARAMPROB:
teli = 0;
if (code == ICMPV6_HDR_FIELD)
@@ -430,13 +433,15 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
t->parms.name);
}
- break;
+ return;
case ICMPV6_PKT_TOOBIG:
- mtu = be32_to_cpu(info) - offset;
+ mtu = be32_to_cpu(info) - offset - t->tun_hlen;
+ if (t->dev->type == ARPHRD_ETHER)
+ mtu -= ETH_HLEN;
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
t->dev->mtu = mtu;
- break;
+ return;
}
if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
@@ -503,8 +508,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
__u32 *pmtu, __be16 proto)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
- __be16 protocol = (dev->type == ARPHRD_ETHER) ?
- htons(ETH_P_TEB) : proto;
+ struct dst_entry *dst = skb_dst(skb);
+ __be16 protocol;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -518,9 +523,14 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
tunnel->o_seqno++;
/* Push GRE header. */
+ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+ /* TooBig packet may have updated dst->dev's mtu */
+ if (dst && dst_mtu(dst) > dst->dev->mtu)
+ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE);
}
@@ -542,11 +552,10 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- dsfield = ipv4_get_dsfield(iph);
-
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
- & IPV6_TCLASS_MASK;
+ dsfield = ipv4_get_dsfield(iph);
+ else
+ dsfield = ip6_tclass(t->parms.flowinfo);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
@@ -582,6 +591,9 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
return -1;
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+ ipv6h = ipv6_hdr(skb);
+
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
@@ -596,9 +608,11 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- dsfield = ipv6_get_dsfield(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
+ dsfield = ipv6_get_dsfield(ipv6h);
+ else
+ dsfield = ip6_tclass(t->parms.flowinfo);
+
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
fl6.flowlabel |= ip6_flowlabel(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
@@ -930,24 +944,25 @@ done:
}
static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
- unsigned short type,
- const void *daddr, const void *saddr, unsigned int len)
+ unsigned short type, const void *daddr,
+ const void *saddr, unsigned int len)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
- __be16 *p = (__be16 *)(ipv6h+1);
+ struct ipv6hdr *ipv6h;
+ __be16 *p;
- ip6_flow_hdr(ipv6h, 0,
- ip6_make_flowlabel(dev_net(dev), skb,
- t->fl.u.ip6.flowlabel, true,
- &t->fl.u.ip6));
+ ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen + sizeof(*ipv6h));
+ ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb,
+ t->fl.u.ip6.flowlabel,
+ true, &t->fl.u.ip6));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = NEXTHDR_GRE;
ipv6h->saddr = t->parms.laddr;
ipv6h->daddr = t->parms.raddr;
- p[0] = t->parms.o_flags;
- p[1] = htons(type);
+ p = (__be16 *)(ipv6h + 1);
+ p[0] = t->parms.o_flags;
+ p[1] = htons(type);
/*
* Set the source hardware address.
@@ -1291,6 +1306,7 @@ static void ip6gre_tap_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netif_keep_dst(dev);
}
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index fc7b4017ba24..649f4d87b318 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -63,7 +63,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
const struct net_offload *ops;
int proto;
struct frag_hdr *fptr;
- unsigned int unfrag_ip6hlen;
unsigned int payload_len;
u8 *prevhdr;
int offset = 0;
@@ -106,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
for (skb = segs; skb; skb = skb->next) {
ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
- if (gso_partial)
+ if (gso_partial && skb_is_gso(skb))
payload_len = skb_shinfo(skb)->gso_size +
SKB_GSO_CB(skb)->data_offset +
skb->head - (unsigned char *)(ipv6h + 1);
@@ -116,8 +115,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
skb->network_header = (u8 *)ipv6h - skb->head;
if (udpfrag) {
- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
- fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
+ int err = ip6_find_1stfragopt(skb, &prevhdr);
+ if (err < 0) {
+ kfree_skb_list(segs);
+ return ERR_PTR(err);
+ }
+ fptr = (struct frag_hdr *)((u8 *)ipv6h + err);
fptr->frag_off = htons(offset);
if (skb->next)
fptr->frag_off |= htons(IP6_MF);
@@ -294,8 +297,10 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
int err = -ENOSYS;
- if (skb->encapsulation)
+ if (skb->encapsulation) {
+ skb_set_inner_protocol(skb, cpu_to_be16(ETH_P_IPV6));
skb_set_inner_network_header(skb, nhoff);
+ }
iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 9a87bfb2ec16..6e01c9a8dfd3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -586,7 +586,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0;
- hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ err = ip6_find_1stfragopt(skb, &prevhdr);
+ if (err < 0)
+ goto fail;
+ hlen = err;
nexthdr = *prevhdr;
mtu = ip6_skb_dst_mtu(skb);
@@ -659,8 +662,6 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
*prevhdr = NEXTHDR_FRAGMENT;
tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
if (!tmp_hdr) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_FRAGFAILS);
err = -ENOMEM;
goto fail;
}
@@ -757,13 +758,14 @@ slow_path:
* Fragment the datagram.
*/
- *prevhdr = NEXTHDR_FRAGMENT;
troom = rt->dst.dev->needed_tailroom;
/*
* Keep copying data until we run out.
*/
while (left > 0) {
+ u8 *fragnexthdr_offset;
+
len = left;
/* IF: it doesn't fit, use 'mtu' - the data space left */
if (len > mtu)
@@ -778,8 +780,6 @@ slow_path:
frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
hroom + troom, GFP_ATOMIC);
if (!frag) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_FRAGFAILS);
err = -ENOMEM;
goto fail;
}
@@ -808,6 +808,10 @@ slow_path:
*/
skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
+ fragnexthdr_offset = skb_network_header(frag);
+ fragnexthdr_offset += prevhdr - skb_network_header(skb);
+ *fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
/*
* Build fragment header.
*/
@@ -1011,6 +1015,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
}
}
#endif
+ if (ipv6_addr_v4mapped(&fl6->saddr) &&
+ !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
+ err = -EAFNOSUPPORT;
+ goto out_err_release;
+ }
return 0;
@@ -1206,11 +1215,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (WARN_ON(v6_cork->opt))
return -EINVAL;
- v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
+ v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
if (unlikely(!v6_cork->opt))
return -ENOBUFS;
- v6_cork->opt->tot_len = opt->tot_len;
+ v6_cork->opt->tot_len = sizeof(*opt);
v6_cork->opt->opt_flen = opt->opt_flen;
v6_cork->opt->opt_nflen = opt->opt_nflen;
@@ -1363,11 +1372,12 @@ emsgsize:
*/
cork->length += length;
- if (((length > mtu) ||
- (skb && skb_is_gso(skb))) &&
+ if ((skb && skb_is_gso(skb)) ||
+ (((length + fragheaderlen) > mtu) &&
+ (skb_queue_len(queue) <= 1) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
- (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
+ (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, exthdrlen,
transhdrlen, mtu, flags, fl6);
@@ -1439,6 +1449,11 @@ alloc_new_skb:
*/
alloclen += sizeof(struct frag_hdr);
+ copy = datalen - transhdrlen - fraggap;
+ if (copy < 0) {
+ err = -EINVAL;
+ goto error;
+ }
if (transhdrlen) {
skb = sock_alloc_send_skb(sk,
alloclen + hh_len,
@@ -1488,13 +1503,9 @@ alloc_new_skb:
data += fraggap;
pskb_trim_unique(skb_prev, maxfraglen);
}
- copy = datalen - transhdrlen - fraggap;
-
- if (copy < 0) {
- err = -EINVAL;
- kfree_skb(skb);
- goto error;
- } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
+ if (copy > 0 &&
+ getfrag(from, data + transhdrlen, offset,
+ copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index f6ba45242851..12b2fd512f32 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1037,11 +1037,12 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
struct ip6_tnl *t = netdev_priv(dev);
struct net *net = t->net;
struct net_device_stats *stats = &t->dev->stats;
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ipv6hdr *ipv6h;
struct ipv6_tel_txoption opt;
struct dst_entry *dst = NULL, *ndst = NULL;
struct net_device *tdev;
int mtu;
+ unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
unsigned int max_headroom = psh_hlen;
bool use_cache = false;
@@ -1057,26 +1058,28 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
/* NBMA tunnel */
if (ipv6_addr_any(&t->parms.raddr)) {
- struct in6_addr *addr6;
- struct neighbour *neigh;
- int addr_type;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ struct in6_addr *addr6;
+ struct neighbour *neigh;
+ int addr_type;
- if (!skb_dst(skb))
- goto tx_err_link_failure;
+ if (!skb_dst(skb))
+ goto tx_err_link_failure;
- neigh = dst_neigh_lookup(skb_dst(skb),
- &ipv6_hdr(skb)->daddr);
- if (!neigh)
- goto tx_err_link_failure;
+ neigh = dst_neigh_lookup(skb_dst(skb),
+ &ipv6_hdr(skb)->daddr);
+ if (!neigh)
+ goto tx_err_link_failure;
- addr6 = (struct in6_addr *)&neigh->primary_key;
- addr_type = ipv6_addr_type(addr6);
+ addr6 = (struct in6_addr *)&neigh->primary_key;
+ addr_type = ipv6_addr_type(addr6);
- if (addr_type == IPV6_ADDR_ANY)
- addr6 = &ipv6_hdr(skb)->daddr;
+ if (addr_type == IPV6_ADDR_ANY)
+ addr6 = &ipv6_hdr(skb)->daddr;
- memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
- neigh_release(neigh);
+ memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
+ neigh_release(neigh);
+ }
} else if (!(t->parms.flags &
(IP6_TNL_F_USE_ORIG_TCLASS | IP6_TNL_F_USE_ORIG_FWMARK))) {
/* enable the cache only only if the routing decision does
@@ -1118,7 +1121,7 @@ route_lookup:
t->parms.name);
goto tx_err_dst_release;
}
- mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen;
+ mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen;
if (encap_limit >= 0) {
max_headroom += 8;
mtu -= 8;
@@ -1127,7 +1130,7 @@ route_lookup:
mtu = IPV6_MIN_MTU;
if (skb_dst(skb) && !t->parms.collect_md)
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
- if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) {
+ if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
err = -EMSGSIZE;
goto tx_err_dst_release;
@@ -1194,7 +1197,7 @@ route_lookup:
skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
- ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
+ ip6_flow_hdr(ipv6h, dsfield,
ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
ipv6h->hop_limit = hop_limit;
ipv6h->nexthdr = proto;
@@ -1229,8 +1232,6 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (tproto != IPPROTO_IPIP && tproto != 0)
return -1;
- dsfield = ipv4_get_dsfield(iph);
-
if (t->parms.collect_md) {
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
@@ -1244,6 +1245,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowi6_proto = IPPROTO_IPIP;
fl6.daddr = key->u.ipv6.dst;
fl6.flowlabel = key->label;
+ dsfield = ip6_tclass(key->label);
} else {
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
@@ -1252,8 +1254,9 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowi6_proto = IPPROTO_IPIP;
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
- & IPV6_TCLASS_MASK;
+ dsfield = ipv4_get_dsfield(iph);
+ else
+ dsfield = ip6_tclass(t->parms.flowinfo);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
fl6.flowi6_mark = skb->mark;
}
@@ -1261,6 +1264,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
+ dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
+
skb_set_inner_ipproto(skb, IPPROTO_IPIP);
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -1294,8 +1299,6 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
ip6_tnl_addr_conflict(t, ipv6h))
return -1;
- dsfield = ipv6_get_dsfield(ipv6h);
-
if (t->parms.collect_md) {
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
@@ -1309,8 +1312,11 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowi6_proto = IPPROTO_IPV6;
fl6.daddr = key->u.ipv6.dst;
fl6.flowlabel = key->label;
+ dsfield = ip6_tclass(key->label);
} else {
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+ ipv6h = ipv6_hdr(skb);
if (offset > 0) {
struct ipv6_tlv_tnl_enc_lim *tel;
@@ -1329,7 +1335,9 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
fl6.flowi6_proto = IPPROTO_IPV6;
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK);
+ dsfield = ipv6_get_dsfield(ipv6h);
+ else
+ dsfield = ip6_tclass(t->parms.flowinfo);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
fl6.flowlabel |= ip6_flowlabel(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
@@ -1339,6 +1347,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
return -1;
+ dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
+
skb_set_inner_ipproto(skb, IPPROTO_IPV6);
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
@@ -2222,6 +2232,9 @@ static int __init ip6_tunnel_init(void)
{
int err;
+ if (!ipv6_mod_enabled())
+ return -EOPNOTSUPP;
+
err = register_pernet_device(&ip6_tnl_net_ops);
if (err < 0)
goto out_pernet;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index c299c1e2bbf0..816f79d1a8a3 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -445,6 +445,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev;
struct xfrm_state *x;
+ int pkt_len = skb->len;
int err = -1;
int mtu;
@@ -498,7 +499,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
u64_stats_update_begin(&tstats->syncp);
- tstats->tx_bytes += skb->len;
+ tstats->tx_bytes += pkt_len;
tstats->tx_packets++;
u64_stats_update_end(&tstats->syncp);
} else {
@@ -691,6 +692,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
u->link = p->link;
u->i_key = p->i_key;
u->o_key = p->o_key;
+ if (u->i_key)
+ u->i_flags |= GRE_KEY;
+ if (u->o_key)
+ u->o_flags |= GRE_KEY;
u->proto = p->proto;
memcpy(u->name, p->name, sizeof(u->name));
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7f4265b1649b..117405dd07a3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -774,7 +774,8 @@ failure:
* Delete a VIF entry
*/
-static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
+static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+ struct list_head *head)
{
struct mif_device *v;
struct net_device *dev;
@@ -820,7 +821,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
dev->ifindex, &in6_dev->cnf);
}
- if (v->flags & MIFF_REGISTER)
+ if ((v->flags & MIFF_REGISTER) && !notify)
unregister_netdevice_queue(dev, head);
dev_put(dev);
@@ -1331,7 +1332,6 @@ static int ip6mr_device_event(struct notifier_block *this,
struct mr6_table *mrt;
struct mif_device *v;
int ct;
- LIST_HEAD(list);
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
@@ -1340,10 +1340,9 @@ static int ip6mr_device_event(struct notifier_block *this,
v = &mrt->vif6_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
if (v->dev == dev)
- mif6_delete(mrt, ct, &list);
+ mif6_delete(mrt, ct, 1, NULL);
}
}
- unregister_netdevice_many(&list);
return NOTIFY_DONE;
}
@@ -1552,7 +1551,7 @@ static void mroute_clean_tables(struct mr6_table *mrt, bool all)
for (i = 0; i < mrt->maxvif; i++) {
if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
continue;
- mif6_delete(mrt, i, &list);
+ mif6_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
@@ -1706,7 +1705,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
return -EFAULT;
rtnl_lock();
- ret = mif6_delete(mrt, mifi, NULL);
+ ret = mif6_delete(mrt, mifi, 0, NULL);
rtnl_unlock();
return ret;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 636ec56f5f50..38bee173dc2b 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -585,16 +585,24 @@ done:
if (val) {
struct net_device *dev;
+ int midx;
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
- goto e_inval;
+ rcu_read_lock();
- dev = dev_get_by_index(net, val);
+ dev = dev_get_by_index_rcu(net, val);
if (!dev) {
+ rcu_read_unlock();
retv = -ENODEV;
break;
}
- dev_put(dev);
+ midx = l3mdev_master_ifindex_rcu(dev);
+
+ rcu_read_unlock();
+
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != val &&
+ (!midx || midx != sk->sk_bound_dev_if))
+ goto e_inval;
}
np->mcast_oif = val;
retv = 0;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 9948b5ce52da..b263bf3a19f7 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -589,6 +589,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
+ skb_orphan(skb);
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq == NULL) {
@@ -621,18 +622,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
static int nf_ct_net_init(struct net *net)
{
- int res;
-
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- res = inet_frags_init_net(&net->nf_frag.frags);
- if (res)
- return res;
- res = nf_ct_frag6_sysctl_register(net);
- if (res)
- inet_frags_uninit_net(&net->nf_frag.frags);
- return res;
+ inet_frags_init_net(&net->nf_frag.frags);
+
+ return nf_ct_frag6_sysctl_register(net);
}
static void nf_ct_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 10090400c72f..eedee5d108d9 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -157,6 +157,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
fl6.fl6_sport = otcph->dest;
fl6.fl6_dport = otcph->source;
fl6.flowi6_oif = l3mdev_master_ifindex(skb_dst(oldskb)->dev);
+ fl6.flowi6_mark = IP6_REPLY_MARK(net, oldskb->mark);
security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
@@ -180,6 +181,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
skb_dst_set(nskb, dst);
+ nskb->mark = fl6.flowi6_mark;
+
skb_reserve(nskb, hh_len + dst->header_len);
ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP,
ip6_dst_hoplimit(dst));
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index cd4252346a32..a338bbc33cf3 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -78,15 +78,14 @@ EXPORT_SYMBOL(ipv6_select_ident);
int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
{
- u16 offset = sizeof(struct ipv6hdr);
- struct ipv6_opt_hdr *exthdr =
- (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
+ unsigned int offset = sizeof(struct ipv6hdr);
unsigned int packet_len = skb_tail_pointer(skb) -
skb_network_header(skb);
int found_rhdr = 0;
*nexthdr = &ipv6_hdr(skb)->nexthdr;
- while (offset + 1 <= packet_len) {
+ while (offset <= packet_len) {
+ struct ipv6_opt_hdr *exthdr;
switch (**nexthdr) {
@@ -107,13 +106,18 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
return offset;
}
- offset += ipv6_optlen(exthdr);
- *nexthdr = &exthdr->nexthdr;
+ if (offset + sizeof(struct ipv6_opt_hdr) > packet_len)
+ return -EINVAL;
+
exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
offset);
+ offset += ipv6_optlen(exthdr);
+ if (offset > IPV6_MAXPLEN)
+ return -EINVAL;
+ *nexthdr = &exthdr->nexthdr;
}
- return offset;
+ return -EINVAL;
}
EXPORT_SYMBOL(ip6_find_1stfragopt);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 66e2d9dfc43a..982868193dbb 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -198,7 +198,7 @@ static struct inet_protosw pingv6_protosw = {
.type = SOCK_DGRAM,
.protocol = IPPROTO_ICMPV6,
.prot = &pingv6_prot,
- .ops = &inet6_dgram_ops,
+ .ops = &inet6_sockraw_ops,
.flags = INET_PROTOSW_REUSE,
};
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index cc8e3ae9ca73..e88bcb8ff0fd 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -219,7 +219,7 @@ static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
u64 buff64[SNMP_MIB_MAX];
int i;
- memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+ memset(buff64, 0, sizeof(u64) * SNMP_MIB_MAX);
snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
for (i = 0; itemlist[i].name; i++)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 869ffc76befa..71ffa526cb23 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -630,6 +630,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu);
return -EMSGSIZE;
}
+ if (length < sizeof(struct ipv6hdr))
+ return -EINVAL;
if (flags&MSG_PROBE)
goto out;
@@ -1171,8 +1173,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
- amount = skb_tail_pointer(skb) -
- skb_transport_header(skb);
+ amount = skb->len;
spin_unlock_bh(&sk->sk_receive_queue.lock);
return put_user(amount, (int __user *)arg);
}
@@ -1329,7 +1330,7 @@ void raw6_proc_exit(void)
#endif /* CONFIG_PROC_FS */
/* Same as inet6_dgram_ops, sans udp_poll. */
-static const struct proto_ops inet6_sockraw_ops = {
+const struct proto_ops inet6_sockraw_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
.release = inet6_release,
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 3815e8505ed2..e585c0a2591c 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -709,19 +709,13 @@ static void ip6_frags_sysctl_unregister(void)
static int __net_init ipv6_frags_init_net(struct net *net)
{
- int res;
-
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
- res = inet_frags_init_net(&net->ipv6.frags);
- if (res)
- return res;
- res = ip6_frags_ns_sysctl_register(net);
- if (res)
- inet_frags_uninit_net(&net->ipv6.frags);
- return res;
+ inet_frags_init_net(&net->ipv6.frags);
+
+ return ip6_frags_ns_sysctl_register(net);
}
static void __net_exit ipv6_frags_exit_net(struct net *net)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index bff4460f17be..6e8bacb0b458 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1267,7 +1267,9 @@ static void rt6_dst_from_metrics_check(struct rt6_info *rt)
static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
{
- if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+ u32 rt_cookie = 0;
+
+ if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
return NULL;
if (rt6_check_expired(rt))
@@ -1335,8 +1337,14 @@ static void ip6_link_failure(struct sk_buff *skb)
if (rt->rt6i_flags & RTF_CACHE) {
dst_hold(&rt->dst);
ip6_del_rt(rt);
- } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
- rt->rt6i_node->fn_sernum = -1;
+ } else {
+ struct fib6_node *fn;
+
+ rcu_read_lock();
+ fn = rcu_dereference(rt->rt6i_node);
+ if (fn && (rt->rt6i_flags & RTF_DEFAULT))
+ fn->fn_sernum = -1;
+ rcu_read_unlock();
}
}
}
@@ -1353,7 +1361,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
{
return !(rt->rt6i_flags & RTF_CACHE) &&
- (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
+ (rt->rt6i_flags & RTF_PCPU ||
+ rcu_access_pointer(rt->rt6i_node));
}
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
@@ -1826,6 +1835,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
int addr_type;
int err = -EINVAL;
+ /* RTF_PCPU is an internal flag; can not be set by userspace */
+ if (cfg->fc_flags & RTF_PCPU)
+ goto out;
+
if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
goto out;
#ifndef CONFIG_IPV6_SUBTREES
@@ -2166,6 +2179,8 @@ static int ip6_route_del(struct fib6_config *cfg)
continue;
if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
continue;
+ if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
+ continue;
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
@@ -2947,17 +2962,11 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list,
struct rt6_info *rt, struct fib6_config *r_cfg)
{
struct rt6_nh *nh;
- struct rt6_info *rtnh;
int err = -EEXIST;
list_for_each_entry(nh, rt6_nh_list, next) {
/* check if rt6_info already exists */
- rtnh = nh->rt6_info;
-
- if (rtnh->dst.dev == rt->dst.dev &&
- rtnh->rt6i_idev == rt->rt6i_idev &&
- ipv6_addr_equal(&rtnh->rt6i_gateway,
- &rt->rt6i_gateway))
+ if (rt6_duplicate_nexthop(nh->rt6_info, rt))
return err;
}
@@ -3474,7 +3483,10 @@ static int ip6_route_dev_notify(struct notifier_block *this,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
- if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
+ if (!(dev->flags & IFF_LOOPBACK))
+ return NOTIFY_OK;
+
+ if (event == NETDEV_REGISTER) {
net->ipv6.ip6_null_entry->dst.dev = dev;
net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -3483,6 +3495,16 @@ static int ip6_route_dev_notify(struct notifier_block *this,
net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
#endif
+ } else if (event == NETDEV_UNREGISTER &&
+ dev->reg_state != NETREG_UNREGISTERED) {
+ /* NETDEV_UNREGISTER could be fired for multiple times by
+ * netdev_wait_allrefs(). Make sure we only call this once.
+ */
+ in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
+ in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
+#endif
}
return NOTIFY_OK;
@@ -3789,9 +3811,24 @@ static struct pernet_operations ip6_route_net_late_ops = {
static struct notifier_block ip6_route_dev_notifier = {
.notifier_call = ip6_route_dev_notify,
- .priority = 0,
+ .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
};
+void __init ip6_route_init_special_entries(void)
+{
+ /* Registering of the loopback is done before this portion of code,
+ * the loopback reference in rt6_info will not be taken, do it
+ * manually for init_net */
+ init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+ init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
+ init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
+ #endif
+}
+
int __init ip6_route_init(void)
{
int ret;
@@ -3818,17 +3855,6 @@ int __init ip6_route_init(void)
ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
- /* Registering of the loopback is done before this portion of code,
- * the loopback reference in rt6_info will not be taken, do it
- * manually for init_net */
- init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
- init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
- #ifdef CONFIG_IPV6_MULTIPLE_TABLES
- init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
- init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
- init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
- init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
- #endif
ret = fib6_init();
if (ret)
goto out_register_subsys;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 59c483937aec..7a86433d8896 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -209,6 +209,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
treq->snt_synack.v64 = 0;
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
+ treq->txhash = net_tx_rndhash();
/*
* We need to lookup the dst_entry to get the correct window size.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 667396536feb..7ac2365aa6fb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -148,8 +148,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
* connect() to INADDR_ANY means loopback (BSD'ism).
*/
- if (ipv6_addr_any(&usin->sin6_addr))
- usin->sin6_addr.s6_addr[15] = 0x1;
+ if (ipv6_addr_any(&usin->sin6_addr)) {
+ if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
+ &usin->sin6_addr);
+ else
+ usin->sin6_addr = in6addr_loopback;
+ }
addr_type = ipv6_addr_type(&usin->sin6_addr);
@@ -188,7 +193,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
* TCP over IPv4
*/
- if (addr_type == IPV6_ADDR_MAPPED) {
+ if (addr_type & IPV6_ADDR_MAPPED) {
u32 exthdrlen = icsk->icsk_ext_hdr_len;
struct sockaddr_in sin;
@@ -375,10 +380,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
np = inet6_sk(sk);
if (type == NDISC_REDIRECT) {
- struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
+ if (!sock_owned_by_user(sk)) {
+ struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
- if (dst)
- dst->ops->redirect(dst, sk, skb);
+ if (dst)
+ dst->ops->redirect(dst, sk, skb);
+ }
goto out;
}
@@ -1044,6 +1051,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
#endif
+ newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
newnp->pktoptions = NULL;
@@ -1113,6 +1121,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
First: no IPv4 options.
*/
newinet->inet_opt = NULL;
+ newnp->ipv6_mc_list = NULL;
newnp->ipv6_ac_list = NULL;
newnp->ipv6_fl_list = NULL;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e4a8000d59ad..4db5f541bca6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -915,6 +915,7 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
*/
offset = skb_transport_offset(skb);
skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
+ csum = skb->csum;
skb->ip_summed = CHECKSUM_NONE;
@@ -1037,6 +1038,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc6.hlimit = -1;
ipc6.tclass = -1;
ipc6.dontfrag = -1;
+ sockc.tsflags = sk->sk_tsflags;
/* destination address check */
if (sin6) {
@@ -1048,6 +1050,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
daddr = &sin6->sin6_addr;
+ if (ipv6_addr_any(daddr) &&
+ ipv6_addr_v4mapped(&np->saddr))
+ ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
+ daddr);
break;
case AF_INET:
goto do_udp_sendmsg;
@@ -1156,7 +1162,6 @@ do_udp_sendmsg:
fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
fl6.flowi6_mark = sk->sk_mark;
- sockc.tsflags = sk->sk_tsflags;
if (msg->msg_controllen) {
opt = &opt_space;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index ac858c480f2f..e7d378c032cb 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -29,6 +29,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
u8 frag_hdr_sz = sizeof(struct frag_hdr);
__wsum csum;
int tnl_hlen;
+ int err;
mss = skb_shinfo(skb)->gso_size;
if (unlikely(skb->len <= mss))
@@ -71,7 +72,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
- skb->ip_summed = CHECKSUM_NONE;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
/* If there is no outer header we can fake a checksum offload
* due to the fact that we have already done the checksum in
@@ -90,7 +91,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
/* Find the unfragmentable header and shift it left by frag_hdr_sz
* bytes to insert fragment header.
*/
- unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ err = ip6_find_1stfragopt(skb, &prevhdr);
+ if (err < 0)
+ return ERR_PTR(err);
+ unfrag_ip6hlen = err;
nexthdr = *prevhdr;
*prevhdr = NEXTHDR_FRAGMENT;
unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) +
diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c
index 0e015906f9ca..07d36573f50b 100644
--- a/net/ipv6/xfrm6_mode_ro.c
+++ b/net/ipv6/xfrm6_mode_ro.c
@@ -47,6 +47,8 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb)
iph = ipv6_hdr(skb);
hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+ if (hdr_len < 0)
+ return hdr_len;
skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
skb_set_network_header(skb, -x->props.header_len);
skb->transport_header = skb->network_header + hdr_len;
diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c
index 4e344105b3fd..1d3bbe6e1183 100644
--- a/net/ipv6/xfrm6_mode_transport.c
+++ b/net/ipv6/xfrm6_mode_transport.c
@@ -28,6 +28,8 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb)
iph = ipv6_hdr(skb);
hdr_len = x->type->hdr_offset(x, skb, &prevhdr);
+ if (hdr_len < 0)
+ return hdr_len;
skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data);
skb_set_network_header(skb, -x->props.header_len);
skb->transport_header = skb->network_header + hdr_len;
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 48d0dc89b58d..e735f781e4f3 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1168,11 +1168,10 @@ static int ipxitf_ioctl(unsigned int cmd, void __user *arg)
sipx->sipx_network = ipxif->if_netnum;
memcpy(sipx->sipx_node, ipxif->if_node,
sizeof(sipx->sipx_node));
- rc = -EFAULT;
+ rc = 0;
if (copy_to_user(arg, &ifr, sizeof(ifr)))
- break;
+ rc = -EFAULT;
ipxitf_put(ipxif);
- rc = 0;
break;
}
case SIOCAIPXITFCRT:
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 391c3cbd2eed..101ed6c42808 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -2223,7 +2223,7 @@ static int irda_getsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk = sock->sk;
struct irda_sock *self = irda_sk(sk);
- struct irda_device_list list;
+ struct irda_device_list list = { 0 };
struct irda_device_info *discoveries;
struct irda_ias_set * ias_opt; /* IAS get/query params */
struct ias_object * ias_obj; /* Object in IAS */
diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c
index acbe61c7e683..160dc89335e2 100644
--- a/net/irda/irqueue.c
+++ b/net/irda/irqueue.c
@@ -383,9 +383,6 @@ EXPORT_SYMBOL(hashbin_new);
* for deallocating this structure if it's complex. If not the user can
* just supply kfree, which should take care of the job.
*/
-#ifdef CONFIG_LOCKDEP
-static int hashbin_lock_depth = 0;
-#endif
int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
{
irda_queue_t* queue;
@@ -396,22 +393,27 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
IRDA_ASSERT(hashbin->magic == HB_MAGIC, return -1;);
/* Synchronize */
- if ( hashbin->hb_type & HB_LOCK ) {
- spin_lock_irqsave_nested(&hashbin->hb_spinlock, flags,
- hashbin_lock_depth++);
- }
+ if (hashbin->hb_type & HB_LOCK)
+ spin_lock_irqsave(&hashbin->hb_spinlock, flags);
/*
* Free the entries in the hashbin, TODO: use hashbin_clear when
* it has been shown to work
*/
for (i = 0; i < HASHBIN_SIZE; i ++ ) {
- queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]);
- while (queue ) {
- if (free_func)
- (*free_func)(queue);
- queue = dequeue_first(
- (irda_queue_t**) &hashbin->hb_queue[i]);
+ while (1) {
+ queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]);
+
+ if (!queue)
+ break;
+
+ if (free_func) {
+ if (hashbin->hb_type & HB_LOCK)
+ spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
+ free_func(queue);
+ if (hashbin->hb_type & HB_LOCK)
+ spin_lock_irqsave(&hashbin->hb_spinlock, flags);
+ }
}
}
@@ -420,12 +422,8 @@ int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func)
hashbin->magic = ~HB_MAGIC;
/* Release lock */
- if ( hashbin->hb_type & HB_LOCK) {
+ if (hashbin->hb_type & HB_LOCK)
spin_unlock_irqrestore(&hashbin->hb_spinlock, flags);
-#ifdef CONFIG_LOCKDEP
- hashbin_lock_depth--;
-#endif
- }
/*
* Free the hashbin structure
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 7e08a4d3d77d..7eb0e8fe3ca8 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -929,23 +929,25 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out_error;
}
- /* New message, alloc head skb */
- head = alloc_skb(0, sk->sk_allocation);
- while (!head) {
- kcm_push(kcm);
- err = sk_stream_wait_memory(sk, &timeo);
- if (err)
- goto out_error;
-
+ if (msg_data_left(msg)) {
+ /* New message, alloc head skb */
head = alloc_skb(0, sk->sk_allocation);
- }
+ while (!head) {
+ kcm_push(kcm);
+ err = sk_stream_wait_memory(sk, &timeo);
+ if (err)
+ goto out_error;
- skb = head;
+ head = alloc_skb(0, sk->sk_allocation);
+ }
- /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
- * csum_and_copy_from_iter from skb_do_copy_data_nocache.
- */
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb = head;
+
+ /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
+ * csum_and_copy_from_iter from skb_do_copy_data_nocache.
+ */
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
start:
while (msg_data_left(msg)) {
@@ -1018,10 +1020,12 @@ wait_for_memory:
if (eor) {
bool not_busy = skb_queue_empty(&sk->sk_write_queue);
- /* Message complete, queue it on send buffer */
- __skb_queue_tail(&sk->sk_write_queue, head);
- kcm->seq_skb = NULL;
- KCM_STATS_INCR(kcm->stats.tx_msgs);
+ if (head) {
+ /* Message complete, queue it on send buffer */
+ __skb_queue_tail(&sk->sk_write_queue, head);
+ kcm->seq_skb = NULL;
+ KCM_STATS_INCR(kcm->stats.tx_msgs);
+ }
if (msg->msg_flags & MSG_BATCH) {
kcm->tx_wait_more = true;
@@ -1040,8 +1044,10 @@ wait_for_memory:
} else {
/* Message not complete, save state */
partial_message:
- kcm->seq_skb = head;
- kcm_tx_msg(head)->last_skb = skb;
+ if (head) {
+ kcm->seq_skb = head;
+ kcm_tx_msg(head)->last_skb = skb;
+ }
}
KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
@@ -1375,6 +1381,10 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
if (!csk)
return -EINVAL;
+ /* We must prevent loops or risk deadlock ! */
+ if (csk->sk_family == PF_KCM)
+ return -EOPNOTSUPP;
+
psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
if (!psock)
return -ENOMEM;
@@ -1679,7 +1689,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct kcm_attach info;
if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
- err = -EFAULT;
+ return -EFAULT;
err = kcm_attach_ioctl(sock, &info);
@@ -1689,7 +1699,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct kcm_unattach info;
if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
- err = -EFAULT;
+ return -EFAULT;
err = kcm_unattach_ioctl(sock, &info);
@@ -1700,7 +1710,7 @@ static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct socket *newsock = NULL;
if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
- err = -EFAULT;
+ return -EFAULT;
err = kcm_clone(sock, &info, &newsock);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index f9c9ecb0cdd3..94bf810ad242 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -63,8 +63,13 @@ struct pfkey_sock {
} u;
struct sk_buff *skb;
} dump;
+ struct mutex dump_lock;
};
+static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len,
+ xfrm_address_t *saddr, xfrm_address_t *daddr,
+ u16 *family);
+
static inline struct pfkey_sock *pfkey_sk(struct sock *sk)
{
return (struct pfkey_sock *)sk;
@@ -139,6 +144,7 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
{
struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id);
struct sock *sk;
+ struct pfkey_sock *pfk;
int err;
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -153,6 +159,9 @@ static int pfkey_create(struct net *net, struct socket *sock, int protocol,
if (sk == NULL)
goto out;
+ pfk = pfkey_sk(sk);
+ mutex_init(&pfk->dump_lock);
+
sock->ops = &pfkey_ops;
sock_init_data(sock, sk);
@@ -219,7 +228,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
#define BROADCAST_ONE 1
#define BROADCAST_REGISTERED 2
#define BROADCAST_PROMISC_ONLY 4
-static int pfkey_broadcast(struct sk_buff *skb,
+static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
int broadcast_flags, struct sock *one_sk,
struct net *net)
{
@@ -269,7 +278,7 @@ static int pfkey_broadcast(struct sk_buff *skb,
rcu_read_unlock();
if (one_sk != NULL)
- err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk);
+ err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
kfree_skb(skb2);
kfree_skb(skb);
@@ -281,23 +290,36 @@ static int pfkey_do_dump(struct pfkey_sock *pfk)
struct sadb_msg *hdr;
int rc;
+ mutex_lock(&pfk->dump_lock);
+ if (!pfk->dump.dump) {
+ rc = 0;
+ goto out;
+ }
+
rc = pfk->dump.dump(pfk);
- if (rc == -ENOBUFS)
- return 0;
+ if (rc == -ENOBUFS) {
+ rc = 0;
+ goto out;
+ }
if (pfk->dump.skb) {
- if (!pfkey_can_dump(&pfk->sk))
- return 0;
+ if (!pfkey_can_dump(&pfk->sk)) {
+ rc = 0;
+ goto out;
+ }
hdr = (struct sadb_msg *) pfk->dump.skb->data;
hdr->sadb_msg_seq = 0;
hdr->sadb_msg_errno = rc;
- pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
+ pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = NULL;
}
pfkey_terminate_dump(pfk);
+
+out:
+ mutex_unlock(&pfk->dump_lock);
return rc;
}
@@ -333,7 +355,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
hdr->sadb_msg_len = (sizeof(struct sadb_msg) /
sizeof(uint64_t));
- pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk));
+ pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk));
return 0;
}
@@ -1135,6 +1157,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
goto out;
}
+ err = -ENOBUFS;
key = ext_hdrs[SADB_EXT_KEY_AUTH - 1];
if (sa->sadb_sa_auth) {
int keysize = 0;
@@ -1146,8 +1169,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
if (key)
keysize = (key->sadb_key_bits + 7) / 8;
x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL);
- if (!x->aalg)
+ if (!x->aalg) {
+ err = -ENOMEM;
goto out;
+ }
strcpy(x->aalg->alg_name, a->name);
x->aalg->alg_key_len = 0;
if (key) {
@@ -1166,8 +1191,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
goto out;
}
x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL);
- if (!x->calg)
+ if (!x->calg) {
+ err = -ENOMEM;
goto out;
+ }
strcpy(x->calg->alg_name, a->name);
x->props.calgo = sa->sadb_sa_encrypt;
} else {
@@ -1181,8 +1208,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
if (key)
keysize = (key->sadb_key_bits + 7) / 8;
x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL);
- if (!x->ealg)
+ if (!x->ealg) {
+ err = -ENOMEM;
goto out;
+ }
strcpy(x->ealg->alg_name, a->name);
x->ealg->alg_key_len = 0;
if (key) {
@@ -1227,8 +1256,10 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
struct xfrm_encap_tmpl *natt;
x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
- if (!x->encap)
+ if (!x->encap) {
+ err = -ENOMEM;
goto out;
+ }
natt = x->encap;
n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];
@@ -1365,7 +1396,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
xfrm_state_put(x);
- pfkey_broadcast(resp_skb, BROADCAST_ONE, sk, net);
+ pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net);
return 0;
}
@@ -1452,7 +1483,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c)
hdr->sadb_msg_seq = c->seq;
hdr->sadb_msg_pid = c->portid;
- pfkey_broadcast(skb, BROADCAST_ALL, NULL, xs_net(x));
+ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x));
return 0;
}
@@ -1565,7 +1596,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg
out_hdr->sadb_msg_reserved = 0;
out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
- pfkey_broadcast(out_skb, BROADCAST_ONE, sk, sock_net(sk));
+ pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk));
return 0;
}
@@ -1670,8 +1701,8 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
return -ENOBUFS;
}
- pfkey_broadcast(supp_skb, BROADCAST_REGISTERED, sk, sock_net(sk));
-
+ pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk,
+ sock_net(sk));
return 0;
}
@@ -1689,7 +1720,8 @@ static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr)
hdr->sadb_msg_errno = (uint8_t) 0;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
- return pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk));
+ return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk,
+ sock_net(sk));
}
static int key_notify_sa_flush(const struct km_event *c)
@@ -1710,7 +1742,7 @@ static int key_notify_sa_flush(const struct km_event *c)
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
hdr->sadb_msg_reserved = 0;
- pfkey_broadcast(skb, BROADCAST_ALL, NULL, c->net);
+ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
return 0;
}
@@ -1767,7 +1799,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr)
out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
if (pfk->dump.skb)
- pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
+ pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = out_skb;
@@ -1793,19 +1825,26 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
struct xfrm_address_filter *filter = NULL;
struct pfkey_sock *pfk = pfkey_sk(sk);
- if (pfk->dump.dump != NULL)
+ mutex_lock(&pfk->dump_lock);
+ if (pfk->dump.dump != NULL) {
+ mutex_unlock(&pfk->dump_lock);
return -EBUSY;
+ }
proto = pfkey_satype2proto(hdr->sadb_msg_satype);
- if (proto == 0)
+ if (proto == 0) {
+ mutex_unlock(&pfk->dump_lock);
return -EINVAL;
+ }
if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
filter = kmalloc(sizeof(*filter), GFP_KERNEL);
- if (filter == NULL)
+ if (filter == NULL) {
+ mutex_unlock(&pfk->dump_lock);
return -ENOMEM;
+ }
memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr,
sizeof(xfrm_address_t));
@@ -1821,6 +1860,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
pfk->dump.dump = pfkey_dump_sa;
pfk->dump.done = pfkey_dump_sa_done;
xfrm_state_walk_init(&pfk->dump.u.state, proto, filter);
+ mutex_unlock(&pfk->dump_lock);
return pfkey_do_dump(pfk);
}
@@ -1847,7 +1887,7 @@ static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb
new_hdr->sadb_msg_errno = 0;
}
- pfkey_broadcast(skb, BROADCAST_ALL, NULL, sock_net(sk));
+ pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk));
return 0;
}
@@ -1913,19 +1953,14 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
/* addresses present only in tunnel mode */
if (t->mode == XFRM_MODE_TUNNEL) {
- u8 *sa = (u8 *) (rq + 1);
- int family, socklen;
-
- family = pfkey_sockaddr_extract((struct sockaddr *)sa,
- &t->saddr);
- if (!family)
- return -EINVAL;
+ int err;
- socklen = pfkey_sockaddr_len(family);
- if (pfkey_sockaddr_extract((struct sockaddr *)(sa + socklen),
- &t->id.daddr) != family)
- return -EINVAL;
- t->encap_family = family;
+ err = parse_sockaddr_pair(
+ (struct sockaddr *)(rq + 1),
+ rq->sadb_x_ipsecrequest_len - sizeof(*rq),
+ &t->saddr, &t->id.daddr, &t->encap_family);
+ if (err)
+ return err;
} else
t->encap_family = xp->family;
@@ -1945,7 +1980,11 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy))
return -EINVAL;
- while (len >= sizeof(struct sadb_x_ipsecrequest)) {
+ while (len >= sizeof(*rq)) {
+ if (len < rq->sadb_x_ipsecrequest_len ||
+ rq->sadb_x_ipsecrequest_len < sizeof(*rq))
+ return -EINVAL;
+
if ((err = parse_ipsecrequest(xp, rq)) < 0)
return err;
len -= rq->sadb_x_ipsecrequest_len;
@@ -2181,7 +2220,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev
out_hdr->sadb_msg_errno = 0;
out_hdr->sadb_msg_seq = c->seq;
out_hdr->sadb_msg_pid = c->portid;
- pfkey_broadcast(out_skb, BROADCAST_ALL, NULL, xp_net(xp));
+ pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp));
return 0;
}
@@ -2401,14 +2440,13 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struc
out_hdr->sadb_msg_errno = 0;
out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
- pfkey_broadcast(out_skb, BROADCAST_ONE, sk, xp_net(xp));
+ pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp));
err = 0;
out:
return err;
}
-#ifdef CONFIG_NET_KEY_MIGRATE
static int pfkey_sockaddr_pair_size(sa_family_t family)
{
return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2);
@@ -2420,7 +2458,7 @@ static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len,
{
int af, socklen;
- if (ext_len < pfkey_sockaddr_pair_size(sa->sa_family))
+ if (ext_len < 2 || ext_len < pfkey_sockaddr_pair_size(sa->sa_family))
return -EINVAL;
af = pfkey_sockaddr_extract(sa, saddr);
@@ -2436,6 +2474,7 @@ static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len,
return 0;
}
+#ifdef CONFIG_NET_KEY_MIGRATE
static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
struct xfrm_migrate *m)
{
@@ -2443,13 +2482,14 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
struct sadb_x_ipsecrequest *rq2;
int mode;
- if (len <= sizeof(struct sadb_x_ipsecrequest) ||
- len < rq1->sadb_x_ipsecrequest_len)
+ if (len < sizeof(*rq1) ||
+ len < rq1->sadb_x_ipsecrequest_len ||
+ rq1->sadb_x_ipsecrequest_len < sizeof(*rq1))
return -EINVAL;
/* old endoints */
err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1),
- rq1->sadb_x_ipsecrequest_len,
+ rq1->sadb_x_ipsecrequest_len - sizeof(*rq1),
&m->old_saddr, &m->old_daddr,
&m->old_family);
if (err)
@@ -2458,13 +2498,14 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len,
rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len);
len -= rq1->sadb_x_ipsecrequest_len;
- if (len <= sizeof(struct sadb_x_ipsecrequest) ||
- len < rq2->sadb_x_ipsecrequest_len)
+ if (len <= sizeof(*rq2) ||
+ len < rq2->sadb_x_ipsecrequest_len ||
+ rq2->sadb_x_ipsecrequest_len < sizeof(*rq2))
return -EINVAL;
/* new endpoints */
err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1),
- rq2->sadb_x_ipsecrequest_len,
+ rq2->sadb_x_ipsecrequest_len - sizeof(*rq2),
&m->new_saddr, &m->new_daddr,
&m->new_family);
if (err)
@@ -2655,7 +2696,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
if (pfk->dump.skb)
- pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
+ pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = out_skb;
@@ -2679,14 +2720,18 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb
{
struct pfkey_sock *pfk = pfkey_sk(sk);
- if (pfk->dump.dump != NULL)
+ mutex_lock(&pfk->dump_lock);
+ if (pfk->dump.dump != NULL) {
+ mutex_unlock(&pfk->dump_lock);
return -EBUSY;
+ }
pfk->dump.msg_version = hdr->sadb_msg_version;
pfk->dump.msg_portid = hdr->sadb_msg_pid;
pfk->dump.dump = pfkey_dump_sp;
pfk->dump.done = pfkey_dump_sp_done;
xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN);
+ mutex_unlock(&pfk->dump_lock);
return pfkey_do_dump(pfk);
}
@@ -2708,7 +2753,7 @@ static int key_notify_policy_flush(const struct km_event *c)
hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
hdr->sadb_msg_reserved = 0;
- pfkey_broadcast(skb_out, BROADCAST_ALL, NULL, c->net);
+ pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
return 0;
}
@@ -2770,7 +2815,7 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
void *ext_hdrs[SADB_EXT_MAX];
int err;
- pfkey_broadcast(skb_clone(skb, GFP_KERNEL),
+ pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
memset(ext_hdrs, 0, sizeof(ext_hdrs));
@@ -2992,7 +3037,8 @@ static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c)
out_hdr->sadb_msg_seq = 0;
out_hdr->sadb_msg_pid = 0;
- pfkey_broadcast(out_skb, BROADCAST_REGISTERED, NULL, xs_net(x));
+ pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL,
+ xs_net(x));
return 0;
}
@@ -3182,7 +3228,8 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
xfrm_ctx->ctx_len);
}
- return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x));
+ return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL,
+ xs_net(x));
}
static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
@@ -3380,7 +3427,8 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
n_port->sadb_x_nat_t_port_port = sport;
n_port->sadb_x_nat_t_port_reserved = 0;
- return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x));
+ return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL,
+ xs_net(x));
}
#ifdef CONFIG_NET_KEY_MIGRATE
@@ -3572,7 +3620,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* broadcast migrate message to sockets */
- pfkey_broadcast(skb, BROADCAST_ALL, NULL, &init_net);
+ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net);
return 0;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index a2ed3bda4ddc..b06acd0f400d 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -278,7 +278,57 @@ struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunn
}
EXPORT_SYMBOL_GPL(l2tp_session_find);
-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
+/* Like l2tp_session_find() but takes a reference on the returned session.
+ * Optionally calls session->ref() too if do_ref is true.
+ */
+struct l2tp_session *l2tp_session_get(struct net *net,
+ struct l2tp_tunnel *tunnel,
+ u32 session_id, bool do_ref)
+{
+ struct hlist_head *session_list;
+ struct l2tp_session *session;
+
+ if (!tunnel) {
+ struct l2tp_net *pn = l2tp_pernet(net);
+
+ session_list = l2tp_session_id_hash_2(pn, session_id);
+
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu(session, session_list, global_hlist) {
+ if (session->session_id == session_id) {
+ l2tp_session_inc_refcount(session);
+ if (do_ref && session->ref)
+ session->ref(session);
+ rcu_read_unlock_bh();
+
+ return session;
+ }
+ }
+ rcu_read_unlock_bh();
+
+ return NULL;
+ }
+
+ session_list = l2tp_session_id_hash(tunnel, session_id);
+ read_lock_bh(&tunnel->hlist_lock);
+ hlist_for_each_entry(session, session_list, hlist) {
+ if (session->session_id == session_id) {
+ l2tp_session_inc_refcount(session);
+ if (do_ref && session->ref)
+ session->ref(session);
+ read_unlock_bh(&tunnel->hlist_lock);
+
+ return session;
+ }
+ }
+ read_unlock_bh(&tunnel->hlist_lock);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_get);
+
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
+ bool do_ref)
{
int hash;
struct l2tp_session *session;
@@ -288,6 +338,9 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) {
if (++count > nth) {
+ l2tp_session_inc_refcount(session);
+ if (do_ref && session->ref)
+ session->ref(session);
read_unlock_bh(&tunnel->hlist_lock);
return session;
}
@@ -298,12 +351,13 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
+EXPORT_SYMBOL_GPL(l2tp_session_get_nth);
/* Lookup a session by interface name.
* This is very inefficient but is only used by management interfaces.
*/
-struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
+struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+ bool do_ref)
{
struct l2tp_net *pn = l2tp_pernet(net);
int hash;
@@ -313,7 +367,11 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) {
if (!strcmp(session->ifname, ifname)) {
+ l2tp_session_inc_refcount(session);
+ if (do_ref && session->ref)
+ session->ref(session);
rcu_read_unlock_bh();
+
return session;
}
}
@@ -323,7 +381,49 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
return NULL;
}
-EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
+EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname);
+
+static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel,
+ struct l2tp_session *session)
+{
+ struct l2tp_session *session_walk;
+ struct hlist_head *g_head;
+ struct hlist_head *head;
+ struct l2tp_net *pn;
+
+ head = l2tp_session_id_hash(tunnel, session->session_id);
+
+ write_lock_bh(&tunnel->hlist_lock);
+ hlist_for_each_entry(session_walk, head, hlist)
+ if (session_walk->session_id == session->session_id)
+ goto exist;
+
+ if (tunnel->version == L2TP_HDR_VER_3) {
+ pn = l2tp_pernet(tunnel->l2tp_net);
+ g_head = l2tp_session_id_hash_2(l2tp_pernet(tunnel->l2tp_net),
+ session->session_id);
+
+ spin_lock_bh(&pn->l2tp_session_hlist_lock);
+ hlist_for_each_entry(session_walk, g_head, global_hlist)
+ if (session_walk->session_id == session->session_id)
+ goto exist_glob;
+
+ hlist_add_head_rcu(&session->global_hlist, g_head);
+ spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+ }
+
+ hlist_add_head(&session->hlist, head);
+ write_unlock_bh(&tunnel->hlist_lock);
+
+ return 0;
+
+exist_glob:
+ spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+exist:
+ write_unlock_bh(&tunnel->hlist_lock);
+
+ return -EEXIST;
+}
/* Lookup a tunnel by id
*/
@@ -633,6 +733,9 @@ discard:
* a data (not control) frame before coming here. Fields up to the
* session-id have already been parsed and ptr points to the data
* after the session-id.
+ *
+ * session->ref() must have been called prior to l2tp_recv_common().
+ * session->deref() will be called automatically after skb is processed.
*/
void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
unsigned char *ptr, unsigned char *optr, u16 hdrflags,
@@ -642,14 +745,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
int offset;
u32 ns, nr;
- /* The ref count is increased since we now hold a pointer to
- * the session. Take care to decrement the refcnt when exiting
- * this function from now on...
- */
- l2tp_session_inc_refcount(session);
- if (session->ref)
- (*session->ref)(session);
-
/* Parse and check optional cookie */
if (session->peer_cookie_len > 0) {
if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
@@ -802,8 +897,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
/* Try to dequeue as many skbs from reorder_q as we can. */
l2tp_recv_dequeue(session);
- l2tp_session_dec_refcount(session);
-
return;
discard:
@@ -812,8 +905,6 @@ discard:
if (session->deref)
(*session->deref)(session);
-
- l2tp_session_dec_refcount(session);
}
EXPORT_SYMBOL(l2tp_recv_common);
@@ -920,8 +1011,14 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
}
/* Find the session context */
- session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
+ session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true);
if (!session || !session->recv_skb) {
+ if (session) {
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+ }
+
/* Not found? Pass to userspace to deal with */
l2tp_info(tunnel, L2TP_MSG_DATA,
"%s: no session found (%u/%u). Passing up.\n",
@@ -930,6 +1027,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
}
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook);
+ l2tp_session_dec_refcount(session);
return 0;
@@ -1317,6 +1415,9 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
struct sock *sk = NULL;
tunnel = container_of(work, struct l2tp_tunnel, del_work);
+
+ l2tp_tunnel_closeall(tunnel);
+
sk = l2tp_tunnel_sock_lookup(tunnel);
if (!sk)
goto out;
@@ -1636,15 +1737,12 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
/* This function is used by the netlink TUNNEL_DELETE command.
*/
-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
{
- l2tp_tunnel_inc_refcount(tunnel);
- l2tp_tunnel_closeall(tunnel);
- if (false == queue_work(l2tp_wq, &tunnel->del_work)) {
- l2tp_tunnel_dec_refcount(tunnel);
- return 1;
+ if (!test_and_set_bit(0, &tunnel->dead)) {
+ l2tp_tunnel_inc_refcount(tunnel);
+ queue_work(l2tp_wq, &tunnel->del_work);
}
- return 0;
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
@@ -1736,6 +1834,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
struct l2tp_session *session;
+ int err;
session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
if (session != NULL) {
@@ -1791,6 +1890,13 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
l2tp_session_set_header_len(session, tunnel->version);
+ err = l2tp_session_add_to_tunnel(tunnel, session);
+ if (err) {
+ kfree(session);
+
+ return ERR_PTR(err);
+ }
+
/* Bump the reference count. The session context is deleted
* only when this drops to zero.
*/
@@ -1800,28 +1906,14 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
/* Ensure tunnel socket isn't deleted */
sock_hold(tunnel->sock);
- /* Add session to the tunnel's hash list */
- write_lock_bh(&tunnel->hlist_lock);
- hlist_add_head(&session->hlist,
- l2tp_session_id_hash(tunnel, session_id));
- write_unlock_bh(&tunnel->hlist_lock);
-
- /* And to the global session list if L2TPv3 */
- if (tunnel->version != L2TP_HDR_VER_2) {
- struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
-
- spin_lock_bh(&pn->l2tp_session_hlist_lock);
- hlist_add_head_rcu(&session->global_hlist,
- l2tp_session_id_hash_2(pn, session_id));
- spin_unlock_bh(&pn->l2tp_session_hlist_lock);
- }
-
/* Ignore management session in session count value */
if (session->session_id != 0)
atomic_inc(&l2tp_session_count);
+
+ return session;
}
- return session;
+ return ERR_PTR(-ENOMEM);
}
EXPORT_SYMBOL_GPL(l2tp_session_create);
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 181e755c2fc4..42419f1c24cf 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -169,6 +169,9 @@ struct l2tp_tunnel_cfg {
struct l2tp_tunnel {
int magic; /* Should be L2TP_TUNNEL_MAGIC */
+
+ unsigned long dead;
+
struct rcu_head rcu;
rwlock_t hlist_lock; /* protect session_hlist */
struct hlist_head session_hlist[L2TP_HASH_SIZE];
@@ -240,11 +243,16 @@ out:
return tunnel;
}
+struct l2tp_session *l2tp_session_get(struct net *net,
+ struct l2tp_tunnel *tunnel,
+ u32 session_id, bool do_ref);
struct l2tp_session *l2tp_session_find(struct net *net,
struct l2tp_tunnel *tunnel,
u32 session_id);
-struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
-struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
+struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth,
+ bool do_ref);
+struct l2tp_session *l2tp_session_get_by_ifname(struct net *net, char *ifname,
+ bool do_ref);
struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
@@ -252,7 +260,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id,
u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
struct l2tp_tunnel **tunnelp);
void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
+void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
struct l2tp_session *l2tp_session_create(int priv_size,
struct l2tp_tunnel *tunnel,
u32 session_id, u32 peer_session_id,
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 2d6760a2ae34..d100aed3d06f 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
{
- pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+ pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
pd->session_idx++;
if (pd->session == NULL) {
@@ -238,10 +238,14 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
}
/* Show the tunnel or session context */
- if (pd->session == NULL)
+ if (!pd->session) {
l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
- else
+ } else {
l2tp_dfs_seq_session_show(m, pd->session);
+ if (pd->session->deref)
+ pd->session->deref(pd->session);
+ l2tp_session_dec_refcount(pd->session);
+ }
out:
return 0;
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 965f7e344cef..eecc64e138de 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -223,12 +223,6 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
goto out;
}
- session = l2tp_session_find(net, tunnel, session_id);
- if (session) {
- rc = -EEXIST;
- goto out;
- }
-
if (cfg->ifname) {
dev = dev_get_by_name(net, cfg->ifname);
if (dev) {
@@ -242,8 +236,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
peer_session_id, cfg);
- if (!session) {
- rc = -ENOMEM;
+ if (IS_ERR(session)) {
+ rc = PTR_ERR(session);
goto out;
}
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index c0f0750639bd..3468d5635d0a 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -143,19 +143,19 @@ static int l2tp_ip_recv(struct sk_buff *skb)
}
/* Ok, this is a data packet. Lookup the session. */
- session = l2tp_session_find(net, NULL, session_id);
- if (session == NULL)
+ session = l2tp_session_get(net, NULL, session_id, true);
+ if (!session)
goto discard;
tunnel = session->tunnel;
- if (tunnel == NULL)
- goto discard;
+ if (!tunnel)
+ goto discard_sess;
/* Trace packet contents, if enabled */
if (tunnel->debug & L2TP_MSG_DATA) {
length = min(32u, skb->len);
if (!pskb_may_pull(skb, length))
- goto discard;
+ goto discard_sess;
/* Point to L2TP header */
optr = ptr = skb->data;
@@ -165,6 +165,7 @@ static int l2tp_ip_recv(struct sk_buff *skb)
}
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
+ l2tp_session_dec_refcount(session);
return 0;
@@ -178,9 +179,10 @@ pass_up:
tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel != NULL)
+ if (tunnel) {
sk = tunnel->sock;
- else {
+ sock_hold(sk);
+ } else {
struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
read_lock_bh(&l2tp_ip_lock);
@@ -202,6 +204,12 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
+discard_sess:
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+ goto discard;
+
discard_put:
sock_put(sk);
@@ -388,7 +396,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
drop:
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS);
kfree_skb(skb);
- return -1;
+ return 0;
}
/* Userspace will call sendmsg() on the tunnel socket to send L2TP
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 1a65c9a517b6..1d522ce833e6 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -64,7 +64,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
struct sock *sk;
sk_for_each_bound(sk, &l2tp_ip6_bind_table) {
- const struct in6_addr *addr = inet6_rcv_saddr(sk);
+ const struct in6_addr *sk_laddr = inet6_rcv_saddr(sk);
struct l2tp_ip6_sock *l2tp = l2tp_ip6_sk(sk);
if (l2tp == NULL)
@@ -72,7 +72,7 @@ static struct sock *__l2tp_ip6_bind_lookup(struct net *net,
if ((l2tp->conn_id == tunnel_id) &&
net_eq(sock_net(sk), net) &&
- (!addr || ipv6_addr_equal(addr, laddr)) &&
+ (!sk_laddr || ipv6_addr_any(sk_laddr) || ipv6_addr_equal(sk_laddr, laddr)) &&
(!sk->sk_bound_dev_if || !dif ||
sk->sk_bound_dev_if == dif))
goto found;
@@ -156,19 +156,19 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
}
/* Ok, this is a data packet. Lookup the session. */
- session = l2tp_session_find(net, NULL, session_id);
- if (session == NULL)
+ session = l2tp_session_get(net, NULL, session_id, true);
+ if (!session)
goto discard;
tunnel = session->tunnel;
- if (tunnel == NULL)
- goto discard;
+ if (!tunnel)
+ goto discard_sess;
/* Trace packet contents, if enabled */
if (tunnel->debug & L2TP_MSG_DATA) {
length = min(32u, skb->len);
if (!pskb_may_pull(skb, length))
- goto discard;
+ goto discard_sess;
/* Point to L2TP header */
optr = ptr = skb->data;
@@ -179,6 +179,8 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
l2tp_recv_common(session, skb, ptr, optr, 0, skb->len,
tunnel->recv_payload_hook);
+ l2tp_session_dec_refcount(session);
+
return 0;
pass_up:
@@ -191,9 +193,10 @@ pass_up:
tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel != NULL)
+ if (tunnel) {
sk = tunnel->sock;
- else {
+ sock_hold(sk);
+ } else {
struct ipv6hdr *iph = ipv6_hdr(skb);
read_lock_bh(&l2tp_ip6_lock);
@@ -215,6 +218,12 @@ pass_up:
return sk_receive_skb(sk, skb, 1);
+discard_sess:
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+ goto discard;
+
discard_put:
sock_put(sk);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index bf3117771822..1ccd310d01a5 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -55,7 +55,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
/* Accessed under genl lock */
static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
-static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
+static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
+ bool do_ref)
{
u32 tunnel_id;
u32 session_id;
@@ -66,14 +67,15 @@ static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
if (info->attrs[L2TP_ATTR_IFNAME]) {
ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
- session = l2tp_session_find_by_ifname(net, ifname);
+ session = l2tp_session_get_by_ifname(net, ifname, do_ref);
} else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
(info->attrs[L2TP_ATTR_CONN_ID])) {
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
tunnel = l2tp_tunnel_find(net, tunnel_id);
if (tunnel)
- session = l2tp_session_find(net, tunnel, session_id);
+ session = l2tp_session_get(net, tunnel, session_id,
+ do_ref);
}
return session;
@@ -634,10 +636,12 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
session_id, peer_session_id, &cfg);
if (ret >= 0) {
- session = l2tp_session_find(net, tunnel, session_id);
- if (session)
+ session = l2tp_session_get(net, tunnel, session_id, false);
+ if (session) {
ret = l2tp_session_notify(&l2tp_nl_family, info, session,
L2TP_CMD_SESSION_CREATE);
+ l2tp_session_dec_refcount(session);
+ }
}
out:
@@ -650,7 +654,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
struct l2tp_session *session;
u16 pw_type;
- session = l2tp_nl_session_find(info);
+ session = l2tp_nl_session_get(info, true);
if (session == NULL) {
ret = -ENODEV;
goto out;
@@ -664,6 +668,10 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+
out:
return ret;
}
@@ -673,7 +681,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
int ret = 0;
struct l2tp_session *session;
- session = l2tp_nl_session_find(info);
+ session = l2tp_nl_session_get(info, false);
if (session == NULL) {
ret = -ENODEV;
goto out;
@@ -708,6 +716,8 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
ret = l2tp_session_notify(&l2tp_nl_family, info,
session, L2TP_CMD_SESSION_MODIFY);
+ l2tp_session_dec_refcount(session);
+
out:
return ret;
}
@@ -803,29 +813,34 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg;
int ret;
- session = l2tp_nl_session_find(info);
+ session = l2tp_nl_session_get(info, false);
if (session == NULL) {
ret = -ENODEV;
- goto out;
+ goto err;
}
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto out;
+ goto err_ref;
}
ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
0, session, L2TP_CMD_SESSION_GET);
if (ret < 0)
- goto err_out;
+ goto err_ref_msg;
- return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
+ ret = genlmsg_unicast(genl_info_net(info), msg, info->snd_portid);
-err_out:
- nlmsg_free(msg);
+ l2tp_session_dec_refcount(session);
-out:
+ return ret;
+
+err_ref_msg:
+ nlmsg_free(msg);
+err_ref:
+ l2tp_session_dec_refcount(session);
+err:
return ret;
}
@@ -844,7 +859,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
goto out;
}
- session = l2tp_session_find_nth(tunnel, si);
+ session = l2tp_session_get_nth(tunnel, si, false);
if (session == NULL) {
ti++;
tunnel = NULL;
@@ -854,8 +869,11 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- session, L2TP_CMD_SESSION_GET) < 0)
+ session, L2TP_CMD_SESSION_GET) < 0) {
+ l2tp_session_dec_refcount(session);
break;
+ }
+ l2tp_session_dec_refcount(session);
si++;
}
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 41d47bfda15c..163f1fa53917 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -450,6 +450,10 @@ static void pppol2tp_session_close(struct l2tp_session *session)
static void pppol2tp_session_destruct(struct sock *sk)
{
struct l2tp_session *session = sk->sk_user_data;
+
+ skb_queue_purge(&sk->sk_receive_queue);
+ skb_queue_purge(&sk->sk_write_queue);
+
if (session) {
sk->sk_user_data = NULL;
BUG_ON(session->magic != L2TP_SESSION_MAGIC);
@@ -488,9 +492,6 @@ static int pppol2tp_release(struct socket *sock)
l2tp_session_queue_purge(session);
sock_put(sk);
}
- skb_queue_purge(&sk->sk_receive_queue);
- skb_queue_purge(&sk->sk_write_queue);
-
release_sock(sk);
/* This will delete the session context via
@@ -582,6 +583,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
int error = 0;
u32 tunnel_id, peer_tunnel_id;
u32 session_id, peer_session_id;
+ bool drop_refcnt = false;
int ver = 2;
int fd;
@@ -683,36 +685,36 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
if (tunnel->peer_tunnel_id == 0)
tunnel->peer_tunnel_id = peer_tunnel_id;
- /* Create session if it doesn't already exist. We handle the
- * case where a session was previously created by the netlink
- * interface by checking that the session doesn't already have
- * a socket and its tunnel socket are what we expect. If any
- * of those checks fail, return EEXIST to the caller.
- */
- session = l2tp_session_find(sock_net(sk), tunnel, session_id);
- if (session == NULL) {
- /* Default MTU must allow space for UDP/L2TP/PPP
- * headers.
+ session = l2tp_session_get(sock_net(sk), tunnel, session_id, false);
+ if (session) {
+ drop_refcnt = true;
+ ps = l2tp_session_priv(session);
+
+ /* Using a pre-existing session is fine as long as it hasn't
+ * been connected yet.
*/
- cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+ if (ps->sock) {
+ error = -EEXIST;
+ goto end;
+ }
- /* Allocate and initialize a new session context. */
- session = l2tp_session_create(sizeof(struct pppol2tp_session),
- tunnel, session_id,
- peer_session_id, &cfg);
- if (session == NULL) {
- error = -ENOMEM;
+ /* consistency checks */
+ if (ps->tunnel_sock != tunnel->sock) {
+ error = -EEXIST;
goto end;
}
} else {
- ps = l2tp_session_priv(session);
- error = -EEXIST;
- if (ps->sock != NULL)
- goto end;
+ /* Default MTU must allow space for UDP/L2TP/PPP headers */
+ cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+ cfg.mru = cfg.mtu;
- /* consistency checks */
- if (ps->tunnel_sock != tunnel->sock)
+ session = l2tp_session_create(sizeof(struct pppol2tp_session),
+ tunnel, session_id,
+ peer_session_id, &cfg);
+ if (IS_ERR(session)) {
+ error = PTR_ERR(session);
goto end;
+ }
}
/* Associate session with its PPPoL2TP socket */
@@ -777,6 +779,8 @@ out_no_ppp:
session->name);
end:
+ if (drop_refcnt)
+ l2tp_session_dec_refcount(session);
release_sock(sk);
return error;
@@ -804,12 +808,6 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
if (tunnel->sock == NULL)
goto out;
- /* Check that this session doesn't already exist */
- error = -EEXIST;
- session = l2tp_session_find(net, tunnel, session_id);
- if (session != NULL)
- goto out;
-
/* Default MTU values. */
if (cfg->mtu == 0)
cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
@@ -817,12 +815,13 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i
cfg->mru = cfg->mtu;
/* Allocate and initialize a new session context. */
- error = -ENOMEM;
session = l2tp_session_create(sizeof(struct pppol2tp_session),
tunnel, session_id,
peer_session_id, cfg);
- if (session == NULL)
+ if (IS_ERR(session)) {
+ error = PTR_ERR(session);
goto out;
+ }
ps = l2tp_session_priv(session);
ps->tunnel_sock = tunnel->sock;
@@ -994,6 +993,9 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
session->name, cmd, arg);
sk = ps->sock;
+ if (!sk)
+ return -EBADR;
+
sock_hold(sk);
switch (cmd) {
@@ -1140,11 +1142,18 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
if (stats.session_id != 0) {
/* resend to session ioctl handler */
struct l2tp_session *session =
- l2tp_session_find(sock_net(sk), tunnel, stats.session_id);
- if (session != NULL)
- err = pppol2tp_session_ioctl(session, cmd, arg);
- else
+ l2tp_session_get(sock_net(sk), tunnel,
+ stats.session_id, true);
+
+ if (session) {
+ err = pppol2tp_session_ioctl(session, cmd,
+ arg);
+ if (session->deref)
+ session->deref(session);
+ l2tp_session_dec_refcount(session);
+ } else {
err = -EBADR;
+ }
break;
}
#ifdef CONFIG_XFRM
@@ -1554,7 +1563,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
{
- pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+ pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true);
pd->session_idx++;
if (pd->session == NULL) {
@@ -1681,10 +1690,14 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v)
/* Show the tunnel or session context.
*/
- if (pd->session == NULL)
+ if (!pd->session) {
pppol2tp_seq_tunnel_show(m, pd->tunnel);
- else
+ } else {
pppol2tp_seq_session_show(m, pd->session);
+ if (pd->session->deref)
+ pd->session->deref(pd->session);
+ l2tp_session_dec_refcount(pd->session);
+ }
out:
return 0;
@@ -1843,4 +1856,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP");
MODULE_LICENSE("GPL");
MODULE_VERSION(PPPOL2TP_DRV_VERSION);
MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OL2TP);
-MODULE_ALIAS_L2TP_PWTYPE(11);
+MODULE_ALIAS_L2TP_PWTYPE(7);
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 3e821daf9dd4..8bc5a1bd2d45 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -821,7 +821,10 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
* another trick required to cope with how the PROCOM state
* machine works. -acme
*/
+ skb_orphan(skb);
+ sock_hold(sk);
skb->sk = sk;
+ skb->destructor = sock_efree;
}
if (!sock_owned_by_user(sk))
llc_conn_rcv(sk, skb);
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index d0e1e804ebd7..5404d0d195cc 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -290,7 +290,10 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb,
ev->type = LLC_SAP_EV_TYPE_PDU;
ev->reason = 0;
+ skb_orphan(skb);
+ sock_hold(sk);
skb->sk = sk;
+ skb->destructor = sock_efree;
llc_sap_state_process(sap, skb);
}
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 3b5fd4188f2a..58ad23a44109 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -398,6 +398,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
tid_agg_rx->timeout = timeout;
tid_agg_rx->stored_mpdu_num = 0;
tid_agg_rx->auto_seq = auto_seq;
+ tid_agg_rx->started = false;
tid_agg_rx->reorder_buf_filtered = 0;
status = WLAN_STATUS_SUCCESS;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index fd6541f3ade3..07001b6d36cc 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -865,6 +865,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
default:
return -EINVAL;
}
+ sdata->u.ap.req_smps = sdata->smps_mode;
+
sdata->needed_rx_chains = sdata->local->rx_chains;
mutex_lock(&local->mtx);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a31d30713d08..62d13eabe17f 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -66,6 +66,8 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
2 + (IEEE80211_MAX_SUPP_RATES - 8) +
2 + sizeof(struct ieee80211_ht_cap) +
2 + sizeof(struct ieee80211_ht_operation) +
+ 2 + sizeof(struct ieee80211_vht_cap) +
+ 2 + sizeof(struct ieee80211_vht_operation) +
ifibss->ie_len;
presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL);
if (!presp)
@@ -487,14 +489,14 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
struct beacon_data *presp, *old_presp;
struct cfg80211_bss *cbss;
const struct cfg80211_bss_ies *ies;
- u16 capability = 0;
+ u16 capability = WLAN_CAPABILITY_IBSS;
u64 tsf;
int ret = 0;
sdata_assert_lock(sdata);
if (ifibss->privacy)
- capability = WLAN_CAPABILITY_PRIVACY;
+ capability |= WLAN_CAPABILITY_PRIVACY;
cbss = cfg80211_get_bss(sdata->local->hw.wiphy, ifibss->chandef.chan,
ifibss->bssid, ifibss->ssid,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 34c2add2c455..03dbc6bd8598 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -681,7 +681,6 @@ struct ieee80211_if_mesh {
const struct ieee80211_mesh_sync_ops *sync_ops;
s64 sync_offset_clockdrift_max;
spinlock_t sync_offset_lock;
- bool adjusting_tbtt;
/* mesh power save */
enum nl80211_mesh_power_mode nonpeer_pm;
int ps_peers_light_sleep;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 638ec0759078..a7aa54f45e19 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -6,6 +6,7 @@
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (c) 2016 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -726,7 +727,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
ieee80211_recalc_ps(local);
if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
- sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+ sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+ local->ops->wake_tx_queue) {
/* XXX: for AP_VLAN, actually track AP queues */
netif_tx_start_all_queues(dev);
} else if (dev) {
@@ -789,6 +791,7 @@ static int ieee80211_open(struct net_device *dev)
static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
bool going_down)
{
+ struct ieee80211_sub_if_data *txq_sdata = sdata;
struct ieee80211_local *local = sdata->local;
struct fq *fq = &local->fq;
unsigned long flags;
@@ -929,6 +932,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN:
+ txq_sdata = container_of(sdata->bss,
+ struct ieee80211_sub_if_data, u.ap);
+
mutex_lock(&local->mtx);
list_del(&sdata->u.vlan.list);
mutex_unlock(&local->mtx);
@@ -999,8 +1005,17 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
}
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
- if (sdata->vif.txq) {
- struct txq_info *txqi = to_txq_info(sdata->vif.txq);
+ if (txq_sdata->vif.txq) {
+ struct txq_info *txqi = to_txq_info(txq_sdata->vif.txq);
+
+ /*
+ * FIXME FIXME
+ *
+ * We really shouldn't purge the *entire* txqi since that
+ * contains frames for the other AP_VLANs (and possibly
+ * the AP itself) as well, but there's no API in FQ now
+ * to be able to filter.
+ */
spin_lock_bh(&fq->lock);
ieee80211_txq_purge(local, txqi);
@@ -1306,6 +1321,26 @@ static void ieee80211_iface_work(struct work_struct *work)
} else if (ieee80211_is_action(mgmt->frame_control) &&
mgmt->u.action.category == WLAN_CATEGORY_VHT) {
switch (mgmt->u.action.u.vht_group_notif.action_code) {
+ case WLAN_VHT_ACTION_OPMODE_NOTIF: {
+ struct ieee80211_rx_status *status;
+ enum nl80211_band band;
+ u8 opmode;
+
+ status = IEEE80211_SKB_RXCB(skb);
+ band = status->band;
+ opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
+
+ mutex_lock(&local->sta_mtx);
+ sta = sta_info_get_bss(sdata, mgmt->sa);
+
+ if (sta)
+ ieee80211_vht_handle_opmode(sdata, sta,
+ opmode,
+ band);
+
+ mutex_unlock(&local->sta_mtx);
+ break;
+ }
case WLAN_VHT_ACTION_GROUPID_MGMT:
ieee80211_process_mu_groups(sdata, mgmt);
break;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index edd6f2945f69..4c625a325ce2 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2015 Intel Deutschland GmbH
+ * Copyright 2015-2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <net/mac80211.h>
+#include <crypto/algapi.h>
#include <asm/unaligned.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -608,6 +609,39 @@ void ieee80211_key_free_unused(struct ieee80211_key *key)
ieee80211_key_free_common(key);
}
+static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_key *old,
+ struct ieee80211_key *new)
+{
+ u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP];
+ u8 *tk_old, *tk_new;
+
+ if (!old || new->conf.keylen != old->conf.keylen)
+ return false;
+
+ tk_old = old->conf.key;
+ tk_new = new->conf.key;
+
+ /*
+ * In station mode, don't compare the TX MIC key, as it's never used
+ * and offloaded rekeying may not care to send it to the host. This
+ * is the case in iwlwifi, for example.
+ */
+ if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+ new->conf.cipher == WLAN_CIPHER_SUITE_TKIP &&
+ new->conf.keylen == WLAN_KEY_LEN_TKIP &&
+ !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+ memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP);
+ memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP);
+ memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+ memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+ tk_old = tkip_old;
+ tk_new = tkip_new;
+ }
+
+ return !crypto_memneq(tk_old, tk_new, new->conf.keylen);
+}
+
int ieee80211_key_link(struct ieee80211_key *key,
struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
@@ -619,9 +653,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
idx = key->conf.keyidx;
- key->local = sdata->local;
- key->sdata = sdata;
- key->sta = sta;
mutex_lock(&sdata->local->key_mtx);
@@ -632,6 +663,20 @@ int ieee80211_key_link(struct ieee80211_key *key,
else
old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
+ /*
+ * Silently accept key re-installation without really installing the
+ * new version of the key to avoid nonce reuse or replay issues.
+ */
+ if (ieee80211_key_identical(sdata, old_key, key)) {
+ ieee80211_key_free_unused(key);
+ ret = 0;
+ goto out;
+ }
+
+ key->local = sdata->local;
+ key->sdata = sdata;
+ key->sta = sta;
+
increment_tailroom_need_count(sdata);
ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
@@ -647,6 +692,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
ret = 0;
}
+ out:
mutex_unlock(&sdata->local->key_mtx);
return ret;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 1075ac24c8c5..2bb6899854d4 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -908,12 +908,17 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
supp_ht = supp_ht || sband->ht_cap.ht_supported;
supp_vht = supp_vht || sband->vht_cap.vht_supported;
- if (sband->ht_cap.ht_supported)
- local->rx_chains =
- max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs),
- local->rx_chains);
+ if (!sband->ht_cap.ht_supported)
+ continue;
/* TODO: consider VHT for RX chains, hopefully it's the same */
+ local->rx_chains =
+ max(ieee80211_mcs_to_chains(&sband->ht_cap.mcs),
+ local->rx_chains);
+
+ /* no need to mask, SM_PS_DISABLED has all bits set */
+ sband->ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED <<
+ IEEE80211_HT_CAP_SM_PS_SHIFT;
}
/* if low-level driver supports AP, we also support VLAN */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 50e1b7f78bd4..5c67a696e046 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -279,8 +279,6 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata,
/* Mesh PS mode. See IEEE802.11-2012 8.4.2.100.8 */
*pos |= ifmsh->ps_peers_deep_sleep ?
IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL : 0x00;
- *pos++ |= ifmsh->adjusting_tbtt ?
- IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING : 0x00;
*pos++ = 0x00;
return 0;
@@ -850,7 +848,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
ifmsh->mesh_cc_id = 0; /* Disabled */
/* register sync ops from extensible synchronization framework */
ifmsh->sync_ops = ieee80211_mesh_sync_ops_get(ifmsh->mesh_sp_id);
- ifmsh->adjusting_tbtt = false;
ifmsh->sync_offset_clockdrift_max = 0;
set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
ieee80211_mesh_root_setup(ifmsh);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 7fcdcf622655..fcba70e57073 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -505,12 +505,14 @@ mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,
/* Userspace handles station allocation */
if (sdata->u.mesh.user_mpm ||
- sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED)
- cfg80211_notify_new_peer_candidate(sdata->dev, addr,
- elems->ie_start,
- elems->total_len,
- GFP_KERNEL);
- else
+ sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) {
+ if (mesh_peer_accepts_plinks(elems) &&
+ mesh_plink_availables(sdata))
+ cfg80211_notify_new_peer_candidate(sdata->dev, addr,
+ elems->ie_start,
+ elems->total_len,
+ GFP_KERNEL);
+ } else
sta = __mesh_sta_info_alloc(sdata, addr);
return sta;
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index faca22cd02b5..75608c07dc7b 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -123,7 +123,6 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
*/
if (elems->mesh_config && mesh_peer_tbtt_adjusting(elems)) {
- clear_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN);
msync_dbg(sdata, "STA %pM : is adjusting TBTT\n",
sta->sta.addr);
goto no_sync;
@@ -172,11 +171,9 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata,
struct beacon_data *beacon)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- u8 cap;
WARN_ON(ifmsh->mesh_sp_id != IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET);
WARN_ON(!rcu_read_lock_held());
- cap = beacon->meshconf->meshconf_cap;
spin_lock_bh(&ifmsh->sync_offset_lock);
@@ -190,21 +187,13 @@ static void mesh_sync_offset_adjust_tbtt(struct ieee80211_sub_if_data *sdata,
"TBTT : kicking off TBTT adjustment with clockdrift_max=%lld\n",
ifmsh->sync_offset_clockdrift_max);
set_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags);
-
- ifmsh->adjusting_tbtt = true;
} else {
msync_dbg(sdata,
"TBTT : max clockdrift=%lld; too small to adjust\n",
(long long)ifmsh->sync_offset_clockdrift_max);
ifmsh->sync_offset_clockdrift_max = 0;
-
- ifmsh->adjusting_tbtt = false;
}
spin_unlock_bh(&ifmsh->sync_offset_lock);
-
- beacon->meshconf->meshconf_cap = ifmsh->adjusting_tbtt ?
- IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING | cap :
- ~IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING & cap;
}
static const struct sync_method sync_methods[] = {
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index eede5c6db8d5..30bba53c2992 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -707,6 +707,8 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
if (!cookie)
return -ENOENT;
+ flush_work(&local->hw_roc_start);
+
mutex_lock(&local->mtx);
list_for_each_entry_safe(roc, tmp, &local->roc_list, list) {
if (!mgmt_tx && roc->cookie != cookie)
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 28a3a0957c9e..76a8bcd8ef11 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -168,6 +168,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
break;
}
+ flush_delayed_work(&sdata->dec_tailroom_needed_wk);
drv_remove_interface(local, sdata);
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2384b4aae064..439e597fd374 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -208,6 +208,51 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local,
return len;
}
+static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb,
+ int rtap_vendor_space)
+{
+ struct {
+ struct ieee80211_hdr_3addr hdr;
+ u8 category;
+ u8 action_code;
+ } __packed action;
+
+ if (!sdata)
+ return;
+
+ BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE + 1);
+
+ if (skb->len < rtap_vendor_space + sizeof(action) +
+ VHT_MUMIMO_GROUPS_DATA_LEN)
+ return;
+
+ if (!is_valid_ether_addr(sdata->u.mntr.mu_follow_addr))
+ return;
+
+ skb_copy_bits(skb, rtap_vendor_space, &action, sizeof(action));
+
+ if (!ieee80211_is_action(action.hdr.frame_control))
+ return;
+
+ if (action.category != WLAN_CATEGORY_VHT)
+ return;
+
+ if (action.action_code != WLAN_VHT_ACTION_GROUPID_MGMT)
+ return;
+
+ if (!ether_addr_equal(action.hdr.addr1, sdata->u.mntr.mu_follow_addr))
+ return;
+
+ skb = skb_copy(skb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
+ skb_queue_tail(&sdata->skb_queue, skb);
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+}
+
/*
* ieee80211_add_rx_radiotap_header - add radiotap header
*
@@ -515,7 +560,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
struct net_device *prev_dev = NULL;
int present_fcs_len = 0;
unsigned int rtap_vendor_space = 0;
- struct ieee80211_mgmt *mgmt;
struct ieee80211_sub_if_data *monitor_sdata =
rcu_dereference(local->monitor_sdata);
@@ -553,6 +597,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
return remove_monitor_info(local, origskb, rtap_vendor_space);
}
+ ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_vendor_space);
+
/* room for the radiotap header based on driver features */
rt_hdrlen = ieee80211_rx_radiotap_hdrlen(local, status, origskb);
needed_headroom = rt_hdrlen - rtap_vendor_space;
@@ -618,23 +664,6 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
ieee80211_rx_stats(sdata->dev, skb->len);
}
- mgmt = (void *)skb->data;
- if (monitor_sdata &&
- skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + VHT_MUMIMO_GROUPS_DATA_LEN &&
- ieee80211_is_action(mgmt->frame_control) &&
- mgmt->u.action.category == WLAN_CATEGORY_VHT &&
- mgmt->u.action.u.vht_group_notif.action_code == WLAN_VHT_ACTION_GROUPID_MGMT &&
- is_valid_ether_addr(monitor_sdata->u.mntr.mu_follow_addr) &&
- ether_addr_equal(mgmt->da, monitor_sdata->u.mntr.mu_follow_addr)) {
- struct sk_buff *mu_skb = skb_copy(skb, GFP_ATOMIC);
-
- if (mu_skb) {
- mu_skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
- skb_queue_tail(&monitor_sdata->skb_queue, mu_skb);
- ieee80211_queue_work(&local->hw, &monitor_sdata->work);
- }
- }
-
if (prev_dev) {
skb->dev = prev_dev;
netif_receive_skb(skb);
@@ -1034,6 +1063,18 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
buf_size = tid_agg_rx->buf_size;
head_seq_num = tid_agg_rx->head_seq_num;
+ /*
+ * If the current MPDU's SN is smaller than the SSN, it shouldn't
+ * be reordered.
+ */
+ if (unlikely(!tid_agg_rx->started)) {
+ if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
+ ret = false;
+ goto out;
+ }
+ tid_agg_rx->started = true;
+ }
+
/* frame with out of date sequence number */
if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
dev_kfree_skb(skb);
@@ -1544,12 +1585,16 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
*/
if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) &&
!ieee80211_has_morefrags(hdr->frame_control) &&
+ !ieee80211_is_back_req(hdr->frame_control) &&
!(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&
(rx->sdata->vif.type == NL80211_IFTYPE_AP ||
rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
- /* PM bit is only checked in frames where it isn't reserved,
+ /*
+ * PM bit is only checked in frames where it isn't reserved,
* in AP mode it's reserved in non-bufferable management frames
* (cf. IEEE 802.11-2012 8.2.4.1.7 Power Management field)
+ * BAR frames should be ignored as specified in
+ * IEEE 802.11-2012 10.2.1.2.
*/
(!ieee80211_is_mgmt(hdr->frame_control) ||
ieee80211_is_bufferable_mmpdu(hdr->frame_control))) {
@@ -2426,7 +2471,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
if (is_multicast_ether_addr(hdr->addr1)) {
mpp_addr = hdr->addr3;
proxied_addr = mesh_hdr->eaddr1;
- } else if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6) {
+ } else if ((mesh_hdr->flags & MESH_FLAGS_AE) ==
+ MESH_FLAGS_AE_A5_A6) {
/* has_a4 already checked in ieee80211_rx_mesh_check */
mpp_addr = hdr->addr4;
proxied_addr = mesh_hdr->eaddr2;
@@ -2877,17 +2923,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
switch (mgmt->u.action.u.vht_opmode_notif.action_code) {
case WLAN_VHT_ACTION_OPMODE_NOTIF: {
- u8 opmode;
-
/* verify opmode is present */
if (len < IEEE80211_MIN_ACTION_SIZE + 2)
goto invalid;
-
- opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode;
-
- ieee80211_vht_handle_opmode(rx->sdata, rx->sta,
- opmode, status->band);
- goto handled;
+ goto queue;
}
case WLAN_VHT_ACTION_GROUPID_MGMT: {
if (len < IEEE80211_MIN_ACTION_SIZE + 25)
@@ -3605,6 +3644,27 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
!ether_addr_equal(bssid, hdr->addr1))
return false;
}
+
+ /*
+ * 802.11-2016 Table 9-26 says that for data frames, A1 must be
+ * the BSSID - we've checked that already but may have accepted
+ * the wildcard (ff:ff:ff:ff:ff:ff).
+ *
+ * It also says:
+ * The BSSID of the Data frame is determined as follows:
+ * a) If the STA is contained within an AP or is associated
+ * with an AP, the BSSID is the address currently in use
+ * by the STA contained in the AP.
+ *
+ * So we should not accept data frames with an address that's
+ * multicast.
+ *
+ * Accepting it also opens a security problem because stations
+ * could encrypt it with the GTK and inject traffic that way.
+ */
+ if (ieee80211_is_data(hdr->frame_control) && multicast)
+ return false;
+
return true;
case NL80211_IFTYPE_WDS:
if (bssid || !ieee80211_is_data(hdr->frame_control))
@@ -3887,6 +3947,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
stats->last_rate = sta_stats_encode_rate(status);
stats->fragments++;
+ stats->packets++;
if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
stats->last_signal = status->signal;
@@ -4080,15 +4141,17 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
ieee80211_is_beacon(hdr->frame_control)))
ieee80211_scan_rx(local, skb);
- if (pubsta) {
- rx.sta = container_of(pubsta, struct sta_info, sta);
- rx.sdata = rx.sta->sdata;
- if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
- return;
- goto out;
- } else if (ieee80211_is_data(fc)) {
+ if (ieee80211_is_data(fc)) {
struct sta_info *sta, *prev_sta;
+ if (pubsta) {
+ rx.sta = container_of(pubsta, struct sta_info, sta);
+ rx.sdata = rx.sta->sdata;
+ if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
+ return;
+ goto out;
+ }
+
prev_sta = NULL;
for_each_sta_info(local, hdr->addr2, sta, tmp) {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 8e05032689f0..348700b424ea 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -688,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
}
/* No need to do anything if the driver does all */
- if (ieee80211_hw_check(&local->hw, AP_LINK_PS))
+ if (ieee80211_hw_check(&local->hw, AP_LINK_PS) && !local->ops->set_tim)
return;
if (sta->dead)
@@ -2148,7 +2148,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
struct ieee80211_sta_rx_stats *cpurxs;
cpurxs = per_cpu_ptr(sta->pcpu_rx_stats, cpu);
- sinfo->rx_packets += cpurxs->dropped;
+ sinfo->rx_dropped_misc += cpurxs->dropped;
}
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index dd06ef0b8861..15599c70a38f 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -189,6 +189,7 @@ struct tid_ampdu_tx {
* @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
* and ssn.
* @removed: this session is removed (but might have been found due to RCU)
+ * @started: this session has started (head ssn or higher was received)
*
* This structure's lifetime is managed by RCU, assignments to
* the array holding it must hold the aggregation mutex.
@@ -212,8 +213,9 @@ struct tid_ampdu_rx {
u16 ssn;
u16 buf_size;
u16 timeout;
- bool auto_seq;
- bool removed;
+ u8 auto_seq:1,
+ removed:1,
+ started:1;
};
/**
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index ddf71c648cab..ad37b4e58c2f 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -51,7 +51,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
struct ieee80211_hdr *hdr = (void *)skb->data;
int ac;
- if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
+ if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER |
+ IEEE80211_TX_CTL_AMPDU)) {
ieee80211_free_txskb(&local->hw, skb);
return;
}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index dd190ff3daea..274c564bd9af 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1277,11 +1277,6 @@ static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
}
-static void ieee80211_set_skb_vif(struct sk_buff *skb, struct txq_info *txqi)
-{
- IEEE80211_SKB_CB(skb)->control.vif = txqi->txq.vif;
-}
-
static u32 codel_skb_len_func(const struct sk_buff *skb)
{
return skb->len;
@@ -3388,6 +3383,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
struct ieee80211_tx_info *info;
struct ieee80211_tx_data tx;
ieee80211_tx_result r;
+ struct ieee80211_vif *vif;
spin_lock_bh(&fq->lock);
@@ -3404,8 +3400,6 @@ begin:
if (!skb)
goto out;
- ieee80211_set_skb_vif(skb, txqi);
-
hdr = (struct ieee80211_hdr *)skb->data;
info = IEEE80211_SKB_CB(skb);
@@ -3462,6 +3456,34 @@ begin:
}
}
+ switch (tx.sdata->vif.type) {
+ case NL80211_IFTYPE_MONITOR:
+ if (tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
+ vif = &tx.sdata->vif;
+ break;
+ }
+ tx.sdata = rcu_dereference(local->monitor_sdata);
+ if (tx.sdata) {
+ vif = &tx.sdata->vif;
+ info->hw_queue =
+ vif->hw_queue[skb_get_queue_mapping(skb)];
+ } else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) {
+ ieee80211_free_txskb(&local->hw, skb);
+ goto begin;
+ } else {
+ vif = NULL;
+ }
+ break;
+ case NL80211_IFTYPE_AP_VLAN:
+ tx.sdata = container_of(tx.sdata->bss,
+ struct ieee80211_sub_if_data, u.ap);
+ /* fall through */
+ default:
+ vif = &tx.sdata->vif;
+ break;
+ }
+
+ IEEE80211_SKB_CB(skb)->control.vif = vif;
out:
spin_unlock_bh(&fq->lock);
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 6832bf6ab69f..43e45bb660bc 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -527,8 +527,10 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
u32 changed = __ieee80211_vht_handle_opmode(sdata, sta, opmode, band);
- if (changed > 0)
+ if (changed > 0) {
+ ieee80211_recalc_min_chandef(sdata);
rate_control_rate_update(local, sband, sta, changed);
+ }
}
void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 42ce9bd4426f..5c71d60f3a64 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -17,6 +17,7 @@
#include <asm/unaligned.h>
#include <net/mac80211.h>
#include <crypto/aes.h>
+#include <crypto/algapi.h>
#include "ieee80211_i.h"
#include "michael.h"
@@ -153,7 +154,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
data_len = skb->len - hdrlen - MICHAEL_MIC_LEN;
key = &rx->key->conf.key[NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY];
michael_mic(key, hdr, data, data_len, mic);
- if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0)
+ if (crypto_memneq(mic, data + data_len, MICHAEL_MIC_LEN))
goto mic_fail;
/* remove Michael MIC from payload */
@@ -1047,7 +1048,7 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
bip_aad(skb, aad);
ieee80211_aes_cmac(key->u.aes_cmac.tfm, aad,
skb->data + 24, skb->len - 24, mic);
- if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+ if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
key->u.aes_cmac.icverrors++;
return RX_DROP_UNUSABLE;
}
@@ -1097,7 +1098,7 @@ ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx)
bip_aad(skb, aad);
ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad,
skb->data + 24, skb->len - 24, mic);
- if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+ if (crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
key->u.aes_cmac.icverrors++;
return RX_DROP_UNUSABLE;
}
@@ -1201,7 +1202,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce,
skb->data + 24, skb->len - 24,
mic) < 0 ||
- memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+ crypto_memneq(mic, mmie->mic, sizeof(mmie->mic))) {
key->u.aes_gmac.icverrors++;
return RX_DROP_UNUSABLE;
}
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 5b77377e5a15..1309e2c34764 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -956,7 +956,8 @@ static void mpls_ifdown(struct net_device *dev, int event)
/* fall through */
case NETDEV_CHANGE:
nh->nh_flags |= RTNH_F_LINKDOWN;
- ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
+ if (event != NETDEV_UNREGISTER)
+ ACCESS_ONCE(rt->rt_nhn_alive) = rt->rt_nhn_alive - 1;
break;
}
if (event == NETDEV_UNREGISTER)
@@ -1696,6 +1697,7 @@ static void mpls_net_exit(struct net *net)
for (index = 0; index < platform_labels; index++) {
struct mpls_route *rt = rtnl_dereference(platform_label[index]);
RCU_INIT_POINTER(platform_label[index], NULL);
+ mpls_notify_route(net, index, rt, NULL, NULL);
mpls_rt_free(rt);
}
rtnl_unlock();
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 2c1b498a7a27..e34d3f60fccd 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -849,10 +849,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
{
unsigned int verdict = NF_DROP;
- if (IP_VS_FWD_METHOD(cp) != 0) {
- pr_err("shouldn't reach here, because the box is on the "
- "half connection in the tun/dr module.\n");
- }
+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+ goto ignore_cp;
/* Ensure the checksum is correct */
if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
@@ -886,6 +884,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
ip_vs_notrack(skb);
else
ip_vs_update_conntrack(skb, cp, 0);
+
+ignore_cp:
verdict = NF_ACCEPT;
out:
@@ -1385,8 +1385,11 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
*/
cp = pp->conn_out_get(ipvs, af, skb, &iph);
- if (likely(cp))
+ if (likely(cp)) {
+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
+ goto ignore_cp;
return handle_response(af, skb, pd, cp, &iph, hooknum);
+ }
/* Check for real-server-started requests */
if (atomic_read(&ipvs->conn_out_counter)) {
@@ -1444,9 +1447,15 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
}
}
}
+
+out:
IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
"ip_vs_out: packet continues traversal as normal");
return NF_ACCEPT;
+
+ignore_cp:
+ __ip_vs_conn_put(cp);
+ goto out;
}
/*
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0f87e5d21be7..750b8bf13e60 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -85,29 +85,36 @@ static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
-#define GC_MAX_BUCKETS_DIV 64u
-/* upper bound of scan intervals */
-#define GC_INTERVAL_MAX (2 * HZ)
-/* maximum conntracks to evict per gc run */
-#define GC_MAX_EVICTS 256u
+#define GC_MAX_BUCKETS_DIV 128u
+/* upper bound of full table scan */
+#define GC_MAX_SCAN_JIFFIES (16u * HZ)
+/* desired ratio of entries found to be expired */
+#define GC_EVICT_RATIO 50u
static struct conntrack_gc_work conntrack_gc_work;
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
{
+ /* 1) Acquire the lock */
spin_lock(lock);
- while (unlikely(nf_conntrack_locks_all)) {
- spin_unlock(lock);
- /*
- * Order the 'nf_conntrack_locks_all' load vs. the
- * spin_unlock_wait() loads below, to ensure
- * that 'nf_conntrack_locks_all_lock' is indeed held:
- */
- smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
- spin_unlock_wait(&nf_conntrack_locks_all_lock);
- spin_lock(lock);
- }
+ /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics
+ * It pairs with the smp_store_release() in nf_conntrack_all_unlock()
+ */
+ if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false))
+ return;
+
+ /* fast path failed, unlock */
+ spin_unlock(lock);
+
+ /* Slow path 1) get global lock */
+ spin_lock(&nf_conntrack_locks_all_lock);
+
+ /* Slow path 2) get the lock we want */
+ spin_lock(lock);
+
+ /* Slow path 3) release the global lock */
+ spin_unlock(&nf_conntrack_locks_all_lock);
}
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
@@ -148,28 +155,27 @@ static void nf_conntrack_all_lock(void)
int i;
spin_lock(&nf_conntrack_locks_all_lock);
- nf_conntrack_locks_all = true;
- /*
- * Order the above store of 'nf_conntrack_locks_all' against
- * the spin_unlock_wait() loads below, such that if
- * nf_conntrack_lock() observes 'nf_conntrack_locks_all'
- * we must observe nf_conntrack_locks[] held:
- */
- smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
+ nf_conntrack_locks_all = true;
for (i = 0; i < CONNTRACK_LOCKS; i++) {
- spin_unlock_wait(&nf_conntrack_locks[i]);
+ spin_lock(&nf_conntrack_locks[i]);
+
+ /* This spin_unlock provides the "release" to ensure that
+ * nf_conntrack_locks_all==true is visible to everyone that
+ * acquired spin_lock(&nf_conntrack_locks[]).
+ */
+ spin_unlock(&nf_conntrack_locks[i]);
}
}
static void nf_conntrack_all_unlock(void)
{
- /*
- * All prior stores must be complete before we clear
+ /* All prior stores must be complete before we clear
* 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
* might observe the false value but not the entire
- * critical section:
+ * critical section.
+ * It pairs with the smp_load_acquire() in nf_conntrack_lock()
*/
smp_store_release(&nf_conntrack_locks_all, false);
spin_unlock(&nf_conntrack_locks_all_lock);
@@ -683,7 +689,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
- !nfct_nat(ct) &&
+ ((ct->status & IPS_NAT_DONE_MASK) == 0) &&
!nf_ct_is_dying(ct) &&
atomic_inc_not_zero(&ct->ct_general.use)) {
nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
@@ -938,6 +944,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
static void gc_worker(struct work_struct *work)
{
+ unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
unsigned int i, goal, buckets = 0, expired_count = 0;
struct conntrack_gc_work *gc_work;
unsigned int ratio, scanned = 0;
@@ -979,8 +986,7 @@ static void gc_worker(struct work_struct *work)
*/
rcu_read_unlock();
cond_resched_rcu_qs();
- } while (++buckets < goal &&
- expired_count < GC_MAX_EVICTS);
+ } while (++buckets < goal);
if (gc_work->exiting)
return;
@@ -997,27 +1003,25 @@ static void gc_worker(struct work_struct *work)
* 1. Minimize time until we notice a stale entry
* 2. Maximize scan intervals to not waste cycles
*
- * Normally, expired_count will be 0, this increases the next_run time
- * to priorize 2) above.
+ * Normally, expire ratio will be close to 0.
*
- * As soon as a timed-out entry is found, move towards 1) and increase
- * the scan frequency.
- * In case we have lots of evictions next scan is done immediately.
+ * As soon as a sizeable fraction of the entries have expired
+ * increase scan frequency.
*/
ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
- gc_work->next_gc_run = 0;
- next_run = 0;
- } else if (expired_count) {
- gc_work->next_gc_run /= 2U;
- next_run = msecs_to_jiffies(1);
+ if (ratio > GC_EVICT_RATIO) {
+ gc_work->next_gc_run = min_interval;
} else {
- if (gc_work->next_gc_run < GC_INTERVAL_MAX)
- gc_work->next_gc_run += msecs_to_jiffies(1);
+ unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
+
+ BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
- next_run = gc_work->next_gc_run;
+ gc_work->next_gc_run += min_interval;
+ if (gc_work->next_gc_run > max)
+ gc_work->next_gc_run = max;
}
+ next_run = gc_work->next_gc_run;
gc_work->last_bucket = i;
queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
}
@@ -1025,7 +1029,7 @@ static void gc_worker(struct work_struct *work)
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
- gc_work->next_gc_run = GC_INTERVAL_MAX;
+ gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
@@ -1918,7 +1922,7 @@ int nf_conntrack_init_start(void)
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
conntrack_gc_work_init(&conntrack_gc_work);
- queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
+ queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ);
return 0;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index da9df2d56e66..22fc32143e9c 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -290,6 +290,7 @@ void nf_conntrack_unregister_notifier(struct net *net,
BUG_ON(notify != new);
RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
+ /* synchronize_rcu() is called from ctnetlink_exit. */
}
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
@@ -326,6 +327,7 @@ void nf_ct_expect_unregister_notifier(struct net *net,
BUG_ON(notify != new);
RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
+ /* synchronize_rcu() is called from ctnetlink_exit. */
}
EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index f8dbacf66795..0d6c72d6b9ba 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -411,7 +411,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
struct net *net = nf_ct_exp_net(expect);
struct hlist_node *next;
unsigned int h;
- int ret = 1;
+ int ret = 0;
if (!master_help) {
ret = -ESHUTDOWN;
@@ -461,7 +461,7 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
spin_lock_bh(&nf_conntrack_expect_lock);
ret = __nf_ct_expect_check(expect);
- if (ret <= 0)
+ if (ret < 0)
goto out;
ret = nf_ct_expect_insert(expect);
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 02bcf00c2492..008299b7f78f 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -53,7 +53,11 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id,
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[id]);
- BUG_ON(t == NULL);
+ if (!t) {
+ rcu_read_unlock();
+ return NULL;
+ }
+
off = ALIGN(sizeof(struct nf_ct_ext), t->align);
len = off + t->len + var_alloc_len;
alloc_size = t->alloc_size + var_alloc_len;
@@ -88,7 +92,10 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[id]);
- BUG_ON(t == NULL);
+ if (!t) {
+ rcu_read_unlock();
+ return NULL;
+ }
newoff = ALIGN(old->len, t->align);
newlen = newoff + t->len + var_alloc_len;
@@ -175,6 +182,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type)
RCU_INIT_POINTER(nf_ct_ext_types[type->id], NULL);
update_alloc_size(type);
mutex_unlock(&nf_ct_ext_type_mutex);
- rcu_barrier(); /* Wait for completion of call_rcu()'s */
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(nf_ct_extend_unregister);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 7341adf7059d..6dc44d9b4190 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -188,6 +188,26 @@ nf_ct_helper_ext_add(struct nf_conn *ct,
}
EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
+static struct nf_conntrack_helper *
+nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
+{
+ if (!net->ct.sysctl_auto_assign_helper) {
+ if (net->ct.auto_assign_helper_warned)
+ return NULL;
+ if (!__nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple))
+ return NULL;
+ pr_info("nf_conntrack: default automatic helper assignment "
+ "has been turned off for security reasons and CT-based "
+ " firewall rule not found. Use the iptables CT target "
+ "to attach helpers instead.\n");
+ net->ct.auto_assign_helper_warned = 1;
+ return NULL;
+ }
+
+ return __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+}
+
+
int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
gfp_t flags)
{
@@ -213,21 +233,14 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
}
help = nfct_help(ct);
- if (net->ct.sysctl_auto_assign_helper && helper == NULL) {
- helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- if (unlikely(!net->ct.auto_assign_helper_warned && helper)) {
- pr_info("nf_conntrack: automatic helper "
- "assignment is deprecated and it will "
- "be removed soon. Use the iptables CT target "
- "to attach helpers instead.\n");
- net->ct.auto_assign_helper_warned = true;
- }
- }
if (helper == NULL) {
- if (help)
- RCU_INIT_POINTER(help->helper, NULL);
- return 0;
+ helper = nf_ct_lookup_helper(ct, net);
+ if (helper == NULL) {
+ if (help)
+ RCU_INIT_POINTER(help->helper, NULL);
+ return 0;
+ }
}
if (help == NULL) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 27540455dc62..d5caed5bcfb1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -45,6 +45,8 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_l4proto.h>
@@ -1800,6 +1802,8 @@ ctnetlink_create_conntrack(struct net *net,
nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC);
nf_ct_labels_ext_add(ct);
+ nfct_seqadj_ext_add(ct);
+ nfct_synproxy_ext_add(ct);
/* we must add conntrack extensions before confirmation. */
ct->status |= IPS_CONFIRMED;
@@ -3409,6 +3413,7 @@ static void __exit ctnetlink_exit(void)
#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
RCU_INIT_POINTER(nfnl_ct_hook, NULL);
#endif
+ synchronize_rcu();
}
module_init(ctnetlink_init);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index c3fc14e021ec..3a8dc39a9116 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1630,8 +1630,6 @@ static int __init nf_conntrack_sip_init(void)
ports[ports_c++] = SIP_PORT;
for (i = 0; i < ports_c; i++) {
- memset(&sip[i], 0, sizeof(sip[i]));
-
nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip",
SIP_PORT, ports[i], i, sip_exp_policy,
SIP_EXPECT_MAX,
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 3dca90dc24ad..ffb9e8ada899 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -13,7 +13,6 @@
/* Internal logging interface, which relies on the real
LOG target modules */
-#define NF_LOG_PREFIXLEN 128
#define NFLOGGER_NAME_LEN 64
static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 5b9c884a452e..624d6e4dcd5c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,19 +30,17 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
+static DEFINE_SPINLOCK(nf_nat_lock);
+
static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
__read_mostly;
static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
-struct nf_nat_conn_key {
- const struct net *net;
- const struct nf_conntrack_tuple *tuple;
- const struct nf_conntrack_zone *zone;
-};
-
-static struct rhltable nf_nat_bysource_table;
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
+static unsigned int nf_nat_hash_rnd __read_mostly;
inline const struct nf_nat_l3proto *
__nf_nat_l3proto_find(u8 family)
@@ -121,17 +119,19 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
EXPORT_SYMBOL(nf_xfrm_me_harder);
#endif /* CONFIG_XFRM */
-static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
+/* We keep an extra hash for each conntrack, for fast searching. */
+static inline unsigned int
+hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
- const struct nf_conntrack_tuple *t;
- const struct nf_conn *ct = data;
+ unsigned int hash;
+
+ get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
- t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* Original src, to ensure we map it consistently if poss. */
+ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
+ tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
- seed ^= net_hash_mix(nf_ct_net(ct));
- return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
- t->dst.protonum ^ seed);
+ return reciprocal_scale(hash, nf_nat_htable_size);
}
/* Is this tuple already taken? (not by us) */
@@ -187,28 +187,6 @@ same_src(const struct nf_conn *ct,
t->src.u.all == tuple->src.u.all);
}
-static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
- const void *obj)
-{
- const struct nf_nat_conn_key *key = arg->key;
- const struct nf_conn *ct = obj;
-
- if (!same_src(ct, key->tuple) ||
- !net_eq(nf_ct_net(ct), key->net) ||
- !nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL))
- return 1;
-
- return 0;
-}
-
-static struct rhashtable_params nf_nat_bysource_params = {
- .head_offset = offsetof(struct nf_conn, nat_bysource),
- .obj_hashfn = nf_nat_bysource_hash,
- .obj_cmpfn = nf_nat_bysource_cmp,
- .nelem_hint = 256,
- .min_size = 1024,
-};
-
/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net,
@@ -219,26 +197,23 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
+ unsigned int h = hash_by_src(net, tuple);
const struct nf_conn *ct;
- struct nf_nat_conn_key key = {
- .net = net,
- .tuple = tuple,
- .zone = zone
- };
- struct rhlist_head *hl;
-
- hl = rhltable_lookup(&nf_nat_bysource_table, &key,
- nf_nat_bysource_params);
- if (!hl)
- return 0;
- ct = container_of(hl, typeof(*ct), nat_bysource);
-
- nf_ct_invert_tuplepr(result,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- result->dst = tuple->dst;
-
- return in_range(l3proto, l4proto, result, range);
+ hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
+ if (same_src(ct, tuple) &&
+ net_eq(net, nf_ct_net(ct)) &&
+ nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
+ /* Copy source part from reply tuple. */
+ nf_ct_invert_tuplepr(result,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ result->dst = tuple->dst;
+
+ if (in_range(l3proto, l4proto, result, range))
+ return 1;
+ }
+ }
+ return 0;
}
/* For [FUTURE] fragmentation handling, we want the least-used
@@ -410,6 +385,7 @@ nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple curr_tuple, new_tuple;
struct nf_conn_nat *nat;
@@ -451,19 +427,16 @@ nf_nat_setup_info(struct nf_conn *ct,
}
if (maniptype == NF_NAT_MANIP_SRC) {
- struct nf_nat_conn_key key = {
- .net = nf_ct_net(ct),
- .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- .zone = nf_ct_zone(ct),
- };
- int err;
-
- err = rhltable_insert_key(&nf_nat_bysource_table,
- &key,
- &ct->nat_bysource,
- nf_nat_bysource_params);
- if (err)
- return NF_DROP;
+ unsigned int srchash;
+
+ srchash = hash_by_src(net,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ spin_lock_bh(&nf_nat_lock);
+ /* nf_conntrack_alter_reply might re-allocate extension aera */
+ nat = nfct_nat(ct);
+ hlist_add_head_rcu(&ct->nat_bysource,
+ &nf_nat_bysource[srchash]);
+ spin_unlock_bh(&nf_nat_lock);
}
/* It's done. */
@@ -549,10 +522,6 @@ struct nf_nat_proto_clean {
static int nf_nat_proto_remove(struct nf_conn *i, void *data)
{
const struct nf_nat_proto_clean *clean = data;
- struct nf_conn_nat *nat = nfct_nat(i);
-
- if (!nat)
- return 0;
if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) ||
(clean->l4proto && nf_ct_protonum(i) != clean->l4proto))
@@ -563,12 +532,10 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
{
- struct nf_conn_nat *nat = nfct_nat(ct);
-
if (nf_nat_proto_remove(ct, data))
return 1;
- if (!nat)
+ if ((ct->status & IPS_SRC_NAT_DONE) == 0)
return 0;
/* This netns is being destroyed, and conntrack has nat null binding.
@@ -577,9 +544,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
* Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
* will delete entry from already-freed table.
*/
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&ct->nat_bysource);
ct->status &= ~IPS_NAT_DONE_MASK;
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ spin_unlock_bh(&nf_nat_lock);
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -704,13 +672,11 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
/* No one using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
- struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
-
- if (!nat)
- return;
-
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ if (ct->status & IPS_SRC_NAT_DONE) {
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&ct->nat_bysource);
+ spin_unlock_bh(&nf_nat_lock);
+ }
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -845,13 +811,16 @@ static int __init nf_nat_init(void)
{
int ret;
- ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
- if (ret)
- return ret;
+ /* Leave them the same for the moment. */
+ nf_nat_htable_size = nf_conntrack_htable_size;
+
+ nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+ if (!nf_nat_bysource)
+ return -ENOMEM;
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
- rhltable_destroy(&nf_nat_bysource_table);
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
return ret;
}
@@ -875,7 +844,7 @@ static int __init nf_nat_init(void)
return 0;
cleanup_extend:
- rhltable_destroy(&nf_nat_bysource_table);
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
nf_ct_extend_unregister(&nat_extend);
return ret;
}
@@ -891,10 +860,12 @@ static void __exit nf_nat_cleanup(void)
#ifdef CONFIG_XFRM
RCU_INIT_POINTER(nf_nat_decode_session_hook, NULL);
#endif
+ synchronize_rcu();
+
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
-
- rhltable_destroy(&nf_nat_bysource_table);
+ synchronize_net();
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
}
MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e5194f6f906c..fa3ef25441e5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2068,7 +2068,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
* is called on error from nf_tables_newrule().
*/
expr = nft_expr_first(rule);
- while (expr->ops && expr != nft_expr_last(rule)) {
+ while (expr != nft_expr_last(rule) && expr->ops) {
nf_tables_expr_destroy(ctx, expr);
expr = nft_expr_next(expr);
}
@@ -3637,10 +3637,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err5;
}
+ if (set->size &&
+ !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) {
+ err = -ENFILE;
+ goto err6;
+ }
+
nft_trans_elem(trans) = elem;
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
+err6:
+ set->ops->remove(set, &elem);
err5:
kfree(trans);
err4:
@@ -3687,15 +3695,9 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
- if (set->size &&
- !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
- return -ENFILE;
-
err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags);
- if (err < 0) {
- atomic_dec(&set->nelems);
+ if (err < 0)
break;
- }
}
return err;
}
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 3b79f34b5095..b1fcfa08f0b4 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -161,6 +161,7 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
int i, ret;
struct nf_conntrack_expect_policy *expect_policy;
struct nlattr *tb[NFCTH_POLICY_SET_MAX+1];
+ unsigned int class_max;
ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr,
nfnl_cthelper_expect_policy_set);
@@ -170,19 +171,18 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
if (!tb[NFCTH_POLICY_SET_NUM])
return -EINVAL;
- helper->expect_class_max =
- ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
-
- if (helper->expect_class_max != 0 &&
- helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES)
+ class_max = ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM]));
+ if (class_max == 0)
+ return -EINVAL;
+ if (class_max > NF_CT_MAX_EXPECT_CLASSES)
return -EOVERFLOW;
expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) *
- helper->expect_class_max, GFP_KERNEL);
+ class_max, GFP_KERNEL);
if (expect_policy == NULL)
return -ENOMEM;
- for (i=0; i<helper->expect_class_max; i++) {
+ for (i = 0; i < class_max; i++) {
if (!tb[NFCTH_POLICY_SET+i])
goto err;
@@ -191,6 +191,8 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper,
if (ret < 0)
goto err;
}
+
+ helper->expect_class_max = class_max - 1;
helper->expect_policy = expect_policy;
return 0;
err:
@@ -377,10 +379,10 @@ nfnl_cthelper_dump_policy(struct sk_buff *skb,
goto nla_put_failure;
if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM,
- htonl(helper->expect_class_max)))
+ htonl(helper->expect_class_max + 1)))
goto nla_put_failure;
- for (i=0; i<helper->expect_class_max; i++) {
+ for (i = 0; i < helper->expect_class_max + 1; i++) {
nest_parms2 = nla_nest_start(skb,
(NFCTH_POLICY_SET+i) | NLA_F_NESTED);
if (nest_parms2 == NULL)
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 139e0867e56e..47d6656c9119 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -646,8 +646,8 @@ static void __exit cttimeout_exit(void)
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
+ synchronize_rcu();
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- rcu_barrier();
}
module_init(cttimeout_init);
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 1b01404bb33f..c7704e9123ef 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -38,7 +38,8 @@ static void nft_log_eval(const struct nft_expr *expr,
static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
[NFTA_LOG_GROUP] = { .type = NLA_U16 },
- [NFTA_LOG_PREFIX] = { .type = NLA_STRING },
+ [NFTA_LOG_PREFIX] = { .type = NLA_STRING,
+ .len = NF_LOG_PREFIXLEN - 1 },
[NFTA_LOG_SNAPLEN] = { .type = NLA_U32 },
[NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 },
[NFTA_LOG_LEVEL] = { .type = NLA_U32 },
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 6c1e0246706e..7c3395513ff0 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -159,8 +159,34 @@ void nft_meta_get_eval(const struct nft_expr *expr,
else
*dest = PACKET_BROADCAST;
break;
+ case NFPROTO_NETDEV:
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ int noff = skb_network_offset(skb);
+ struct iphdr *iph, _iph;
+
+ iph = skb_header_pointer(skb, noff,
+ sizeof(_iph), &_iph);
+ if (!iph)
+ goto err;
+
+ if (ipv4_is_multicast(iph->daddr))
+ *dest = PACKET_MULTICAST;
+ else
+ *dest = PACKET_BROADCAST;
+
+ break;
+ }
+ case htons(ETH_P_IPV6):
+ *dest = PACKET_MULTICAST;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ goto err;
+ }
+ break;
default:
- WARN_ON(1);
+ WARN_ON_ONCE(1);
goto err;
}
break;
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 393d359a1889..ef4768a451f4 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -38,7 +38,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
if (priv->queues_total > 1) {
if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) {
- int cpu = smp_processor_id();
+ int cpu = raw_smp_processor_id();
queue = priv->queuenum + cpu % priv->queues_total;
} else {
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 36493a7cae88..93820e0d8814 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -118,17 +118,17 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
else if (d > 0)
p = &parent->rb_right;
else {
- if (nft_set_elem_active(&rbe->ext, genmask)) {
- if (nft_rbtree_interval_end(rbe) &&
- !nft_rbtree_interval_end(new))
- p = &parent->rb_left;
- else if (!nft_rbtree_interval_end(rbe) &&
- nft_rbtree_interval_end(new))
- p = &parent->rb_right;
- else {
- *ext = &rbe->ext;
- return -EEXIST;
- }
+ if (nft_rbtree_interval_end(rbe) &&
+ !nft_rbtree_interval_end(new)) {
+ p = &parent->rb_left;
+ } else if (!nft_rbtree_interval_end(rbe) &&
+ nft_rbtree_interval_end(new)) {
+ p = &parent->rb_right;
+ } else if (nft_set_elem_active(&rbe->ext, genmask)) {
+ *ext = &rbe->ext;
+ return -EEXIST;
+ } else {
+ p = &parent->rb_left;
}
}
}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 872db2d0e2a9..119e51fdcebc 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -104,7 +104,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
tcp_hdrlen = tcph->doff * 4;
- if (len < tcp_hdrlen)
+ if (len < tcp_hdrlen || tcp_hdrlen < sizeof(struct tcphdr))
return -1;
if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
@@ -152,6 +152,10 @@ tcpmss_mangle_packet(struct sk_buff *skb,
if (len > tcp_hdrlen)
return 0;
+ /* tcph->doff has 4 bits, do not wrap it to 0 */
+ if (tcp_hdrlen >= 15 * 4)
+ return 0;
+
/*
* MSS Option not found ?! add it..
*/
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 246f29d365c0..c9fac08a53b1 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2077,7 +2077,7 @@ static int netlink_dump(struct sock *sk)
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
struct module *module;
- int len, err = -ENOBUFS;
+ int err = -ENOBUFS;
int alloc_min_size;
int alloc_size;
@@ -2124,9 +2124,11 @@ static int netlink_dump(struct sock *sk)
skb_reserve(skb, skb_tailroom(skb) - alloc_size);
netlink_skb_set_owner_r(skb, sk);
- len = cb->dump(skb, cb);
+ if (nlk->dump_done_errno > 0)
+ nlk->dump_done_errno = cb->dump(skb, cb);
- if (len > 0) {
+ if (nlk->dump_done_errno > 0 ||
+ skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
mutex_unlock(nlk->cb_mutex);
if (sk_filter(sk, skb))
@@ -2136,13 +2138,15 @@ static int netlink_dump(struct sock *sk)
return 0;
}
- nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
- if (!nlh)
+ nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
+ sizeof(nlk->dump_done_errno), NLM_F_MULTI);
+ if (WARN_ON(!nlh))
goto errout_skb;
nl_dump_check_consistent(cb, nlh);
- memcpy(nlmsg_data(nlh), &len, sizeof(len));
+ memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
+ sizeof(nlk->dump_done_errno));
if (sk_filter(sk, skb))
kfree_skb(skb);
@@ -2207,14 +2211,19 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->min_dump_alloc = control->min_dump_alloc;
cb->skb = skb;
+ if (cb->start) {
+ ret = cb->start(cb);
+ if (ret)
+ goto error_unlock;
+ }
+
nlk->cb_running = true;
+ nlk->dump_done_errno = INT_MAX;
mutex_unlock(nlk->cb_mutex);
- if (cb->start)
- cb->start(cb);
-
ret = netlink_dump(sk);
+
sock_put(sk);
if (ret)
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 4fdb38318977..bae961cfa3ad 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -24,6 +24,7 @@ struct netlink_sock {
wait_queue_head_t wait;
bool bound;
bool cb_running;
+ int dump_done_errno;
struct netlink_callback cb;
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 122bb81da918..c699d64a0753 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -982,6 +982,8 @@ static void nfc_release(struct device *d)
kfree(se);
}
+ ida_simple_remove(&nfc_index_ida, dev->idx);
+
kfree(dev);
}
@@ -1056,6 +1058,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
int tx_headroom, int tx_tailroom)
{
struct nfc_dev *dev;
+ int rc;
if (!ops->start_poll || !ops->stop_poll || !ops->activate_target ||
!ops->deactivate_target || !ops->im_transceive)
@@ -1068,6 +1071,15 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
if (!dev)
return NULL;
+ rc = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL);
+ if (rc < 0)
+ goto err_free_dev;
+ dev->idx = rc;
+
+ dev->dev.class = &nfc_class;
+ dev_set_name(&dev->dev, "nfc%d", dev->idx);
+ device_initialize(&dev->dev);
+
dev->ops = ops;
dev->supported_protocols = supported_protocols;
dev->tx_headroom = tx_headroom;
@@ -1090,6 +1102,11 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
}
return dev;
+
+err_free_dev:
+ kfree(dev);
+
+ return NULL;
}
EXPORT_SYMBOL(nfc_allocate_device);
@@ -1104,14 +1121,6 @@ int nfc_register_device(struct nfc_dev *dev)
pr_debug("dev_name=%s\n", dev_name(&dev->dev));
- dev->idx = ida_simple_get(&nfc_index_ida, 0, 0, GFP_KERNEL);
- if (dev->idx < 0)
- return dev->idx;
-
- dev->dev.class = &nfc_class;
- dev_set_name(&dev->dev, "nfc%d", dev->idx);
- device_initialize(&dev->dev);
-
mutex_lock(&nfc_devlist_mutex);
nfc_devlist_generation++;
rc = device_add(&dev->dev);
@@ -1149,12 +1158,10 @@ EXPORT_SYMBOL(nfc_register_device);
*/
void nfc_unregister_device(struct nfc_dev *dev)
{
- int rc, id;
+ int rc;
pr_debug("dev_name=%s\n", dev_name(&dev->dev));
- id = dev->idx;
-
if (dev->rfkill) {
rfkill_unregister(dev->rfkill);
rfkill_destroy(dev->rfkill);
@@ -1179,8 +1186,6 @@ void nfc_unregister_device(struct nfc_dev *dev)
nfc_devlist_generation++;
device_del(&dev->dev);
mutex_unlock(&nfc_devlist_mutex);
-
- ida_simple_remove(&nfc_index_ida, id);
}
EXPORT_SYMBOL(nfc_unregister_device);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index b9edf5fae6ae..e31dea17473d 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -76,7 +76,8 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
struct sockaddr_nfc_llcp llcp_addr;
int len, ret = 0;
- if (!addr || addr->sa_family != AF_NFC)
+ if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
+ addr->sa_family != AF_NFC)
return -EINVAL;
pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family);
@@ -150,7 +151,8 @@ static int llcp_raw_sock_bind(struct socket *sock, struct sockaddr *addr,
struct sockaddr_nfc_llcp llcp_addr;
int len, ret = 0;
- if (!addr || addr->sa_family != AF_NFC)
+ if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
+ addr->sa_family != AF_NFC)
return -EINVAL;
pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family);
@@ -661,8 +663,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags);
- if (!addr || len < sizeof(struct sockaddr_nfc) ||
- addr->sa_family != AF_NFC)
+ if (!addr || len < sizeof(*addr) || addr->sa_family != AF_NFC)
return -EINVAL;
if (addr->service_name_len == 0 && addr->dsap == 0)
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 61fff422424f..85a3d9ed4c29 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1173,8 +1173,7 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
return ndev;
free_nfc:
- kfree(ndev->nfc_dev);
-
+ nfc_free_device(ndev->nfc_dev);
free_nci:
kfree(ndev);
return NULL;
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index ea023b35f1c2..102c681c48b5 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -910,7 +910,9 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
u32 device_idx, target_idx, protocol;
int rc;
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+ !info->attrs[NFC_ATTR_TARGET_INDEX] ||
+ !info->attrs[NFC_ATTR_PROTOCOLS])
return -EINVAL;
device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 4e03f64709bc..05d9f42fc309 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1240,6 +1240,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
goto out;
}
+ OVS_CB(skb)->acts_origlen = acts->orig_len;
err = do_execute_actions(dp, skb, key,
acts->actions, acts->actions_len);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index eab210bb1ef0..b28e45b691de 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -367,7 +367,6 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
} else if (key->eth.type == htons(ETH_P_IPV6)) {
enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
- skb_orphan(skb);
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
err = nf_ct_frag6_gather(net, skb, user);
if (err) {
@@ -1089,8 +1088,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
- int maxlen = ovs_ct_attr_lens[type].maxlen;
- int minlen = ovs_ct_attr_lens[type].minlen;
+ int maxlen;
+ int minlen;
if (type > OVS_CT_ATTR_MAX) {
OVS_NLERR(log,
@@ -1098,6 +1097,9 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
type, OVS_CT_ATTR_MAX);
return -EINVAL;
}
+
+ maxlen = ovs_ct_attr_lens[type].maxlen;
+ minlen = ovs_ct_attr_lens[type].minlen;
if (nla_len(a) < minlen || nla_len(a) > maxlen) {
OVS_NLERR(log,
"Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 4d67ea856067..453f806afe6e 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -383,7 +383,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
}
static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
- unsigned int hdrlen)
+ unsigned int hdrlen, int actions_attrlen)
{
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
@@ -400,7 +400,7 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
/* OVS_PACKET_ATTR_ACTIONS */
if (upcall_info->actions_len)
- size += nla_total_size(upcall_info->actions_len);
+ size += nla_total_size(actions_attrlen);
/* OVS_PACKET_ATTR_MRU */
if (upcall_info->mru)
@@ -467,7 +467,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
else
hlen = skb->len;
- len = upcall_msg_size(upcall_info, hlen - cutlen);
+ len = upcall_msg_size(upcall_info, hlen - cutlen,
+ OVS_CB(skb)->acts_origlen);
user_skb = genlmsg_new(len, GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index ab85c1cae255..e19ace428e38 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -100,12 +100,14 @@ struct datapath {
* @input_vport: The original vport packet came in on. This value is cached
* when a packet is received by OVS.
* @mru: The maximum received fragement size; 0 if the packet is not
+ * @acts_origlen: The netlink size of the flow actions applied to this skb.
* @cutlen: The number of bytes from the packet end to be removed.
* fragmented.
*/
struct ovs_skb_cb {
struct vport *input_vport;
u16 mru;
+ u16 acts_origlen;
u32 cutlen;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index ae25ded82b3b..07925418c2a5 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -588,7 +588,7 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
ipv4 = true;
break;
case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
- SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
nla_get_in6_addr(a), is_mask);
ipv6 = true;
break;
@@ -649,6 +649,8 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
tun_flags |= TUNNEL_VXLAN_OPT;
opts_type = type;
break;
+ case OVS_TUNNEL_KEY_ATTR_PAD:
+ break;
default:
OVS_NLERR(log, "Unknown IP tunnel attribute %d",
type);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 458722b938c7..e7f6657269e0 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1497,6 +1497,8 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po)
f->arr[f->num_members] = sk;
smp_wmb();
f->num_members++;
+ if (f->num_members == 1)
+ dev_add_pack(&f->prot_hook);
spin_unlock(&f->lock);
}
@@ -1513,6 +1515,8 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
BUG_ON(i >= f->num_members);
f->arr[i] = f->arr[f->num_members - 1];
f->num_members--;
+ if (f->num_members == 0)
+ __dev_remove_pack(&f->prot_hook);
spin_unlock(&f->lock);
}
@@ -1619,6 +1623,7 @@ static void fanout_release_data(struct packet_fanout *f)
static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
{
+ struct packet_rollover *rollover = NULL;
struct packet_sock *po = pkt_sk(sk);
struct packet_fanout *f, *match;
u8 type = type_flags & 0xff;
@@ -1641,23 +1646,24 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
return -EINVAL;
}
- if (!po->running)
- return -EINVAL;
+ mutex_lock(&fanout_mutex);
+ err = -EALREADY;
if (po->fanout)
- return -EALREADY;
+ goto out;
if (type == PACKET_FANOUT_ROLLOVER ||
(type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
- po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL);
- if (!po->rollover)
- return -ENOMEM;
- atomic_long_set(&po->rollover->num, 0);
- atomic_long_set(&po->rollover->num_huge, 0);
- atomic_long_set(&po->rollover->num_failed, 0);
+ err = -ENOMEM;
+ rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
+ if (!rollover)
+ goto out;
+ atomic_long_set(&rollover->num, 0);
+ atomic_long_set(&rollover->num_huge, 0);
+ atomic_long_set(&rollover->num_failed, 0);
+ po->rollover = rollover;
}
- mutex_lock(&fanout_mutex);
match = NULL;
list_for_each_entry(f, &fanout_list, list) {
if (f->id == id &&
@@ -1687,11 +1693,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
match->prot_hook.func = packet_rcv_fanout;
match->prot_hook.af_packet_priv = match;
match->prot_hook.id_match = match_fanout_group;
- dev_add_pack(&match->prot_hook);
list_add(&match->list, &fanout_list);
}
err = -EINVAL;
- if (match->type == type &&
+
+ spin_lock(&po->bind_lock);
+ if (po->running &&
+ match->type == type &&
match->prot_hook.type == po->prot_hook.type &&
match->prot_hook.dev == po->prot_hook.dev) {
err = -ENOSPC;
@@ -1703,37 +1711,50 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
err = 0;
}
}
+ spin_unlock(&po->bind_lock);
+
+ if (err && !atomic_read(&match->sk_ref)) {
+ list_del(&match->list);
+ kfree(match);
+ }
+
out:
- mutex_unlock(&fanout_mutex);
- if (err) {
- kfree(po->rollover);
+ if (err && rollover) {
+ kfree_rcu(rollover, rcu);
po->rollover = NULL;
}
+ mutex_unlock(&fanout_mutex);
return err;
}
-static void fanout_release(struct sock *sk)
+/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes
+ * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout.
+ * It is the responsibility of the caller to call fanout_release_data() and
+ * free the returned packet_fanout (after synchronize_net())
+ */
+static struct packet_fanout *fanout_release(struct sock *sk)
{
struct packet_sock *po = pkt_sk(sk);
struct packet_fanout *f;
+ mutex_lock(&fanout_mutex);
f = po->fanout;
- if (!f)
- return;
+ if (f) {
+ po->fanout = NULL;
- mutex_lock(&fanout_mutex);
- po->fanout = NULL;
+ if (atomic_dec_and_test(&f->sk_ref))
+ list_del(&f->list);
+ else
+ f = NULL;
- if (atomic_dec_and_test(&f->sk_ref)) {
- list_del(&f->list);
- dev_remove_pack(&f->prot_hook);
- fanout_release_data(f);
- kfree(f);
+ if (po->rollover) {
+ kfree_rcu(po->rollover, rcu);
+ po->rollover = NULL;
+ }
}
mutex_unlock(&fanout_mutex);
- if (po->rollover)
- kfree_rcu(po->rollover, rcu);
+ return f;
}
static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
@@ -2138,6 +2159,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct timespec ts;
__u32 ts_status;
bool is_drop_n_account = false;
+ bool do_vnet = false;
/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
* We may add members to them until current aligned size without forcing
@@ -2188,8 +2210,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
netoff = TPACKET_ALIGN(po->tp_hdrlen +
(maclen < 16 ? 16 : maclen)) +
po->tp_reserve;
- if (po->has_vnet_hdr)
+ if (po->has_vnet_hdr) {
netoff += sizeof(struct virtio_net_hdr);
+ do_vnet = true;
+ }
macoff = netoff - maclen;
}
if (po->tp_version <= TPACKET_V2) {
@@ -2206,8 +2230,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
skb_set_owner_r(copy_skb, sk);
}
snaplen = po->rx_ring.frame_size - macoff;
- if ((int)snaplen < 0)
+ if ((int)snaplen < 0) {
snaplen = 0;
+ do_vnet = false;
+ }
}
} else if (unlikely(macoff + snaplen >
GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
@@ -2220,6 +2246,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (unlikely((int)snaplen < 0)) {
snaplen = 0;
macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
+ do_vnet = false;
}
}
spin_lock(&sk->sk_receive_queue.lock);
@@ -2245,7 +2272,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
}
spin_unlock(&sk->sk_receive_queue.lock);
- if (po->has_vnet_hdr) {
+ if (do_vnet) {
if (__packet_rcv_vnet(skb, h.raw + macoff -
sizeof(struct virtio_net_hdr))) {
spin_lock(&sk->sk_receive_queue.lock);
@@ -2639,13 +2666,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
}
- sockc.tsflags = po->sk.sk_tsflags;
- if (msg->msg_controllen) {
- err = sock_cmsg_send(&po->sk, msg, &sockc);
- if (unlikely(err))
- goto out;
- }
-
err = -ENXIO;
if (unlikely(dev == NULL))
goto out;
@@ -2653,6 +2673,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if (unlikely(!(dev->flags & IFF_UP)))
goto out_put;
+ sockc.tsflags = po->sk.sk_tsflags;
+ if (msg->msg_controllen) {
+ err = sock_cmsg_send(&po->sk, msg, &sockc);
+ if (unlikely(err))
+ goto out_put;
+ }
+
if (po->sk.sk_socket->type == SOCK_RAW)
reserve = dev->hard_header_len;
size_max = po->tx_ring.frame_size
@@ -2813,6 +2840,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
struct virtio_net_hdr vnet_hdr = { 0 };
int offset = 0;
struct packet_sock *po = pkt_sk(sk);
+ bool has_vnet_hdr = false;
int hlen, tlen, linear;
int extra_len = 0;
@@ -2856,6 +2884,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
if (err)
goto out_unlock;
+ has_vnet_hdr = true;
}
if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
@@ -2916,7 +2945,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
packet_pick_tx_queue(dev, skb);
- if (po->has_vnet_hdr) {
+ if (has_vnet_hdr) {
err = packet_snd_vnet_gso(skb, &vnet_hdr);
if (err)
goto out_free;
@@ -2965,6 +2994,7 @@ static int packet_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct packet_sock *po;
+ struct packet_fanout *f;
struct net *net;
union tpacket_req_u req_u;
@@ -3004,9 +3034,14 @@ static int packet_release(struct socket *sock)
packet_set_ring(sk, &req_u, 1, 1);
}
- fanout_release(sk);
+ f = fanout_release(sk);
synchronize_net();
+
+ if (f) {
+ fanout_release_data(f);
+ kfree(f);
+ }
/*
* Now the socket is dead. No more input will appear.
*/
@@ -3038,13 +3073,15 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
int ret = 0;
bool unlisted = false;
- if (po->fanout)
- return -EINVAL;
-
lock_sock(sk);
spin_lock(&po->bind_lock);
rcu_read_lock();
+ if (po->fanout) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
if (name) {
dev = dev_get_by_name_rcu(sock_net(sk), name);
if (!dev) {
@@ -3121,7 +3158,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
int addr_len)
{
struct sock *sk = sock->sk;
- char name[15];
+ char name[sizeof(uaddr->sa_data) + 1];
/*
* Check legality
@@ -3129,7 +3166,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
if (addr_len != sizeof(struct sockaddr))
return -EINVAL;
- strlcpy(name, uaddr->sa_data, sizeof(name));
+ /* uaddr->sa_data comes from the userspace, it's not guaranteed to be
+ * zero-terminated.
+ */
+ memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
+ name[sizeof(uaddr->sa_data)] = 0;
return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
}
@@ -3675,12 +3716,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
- po->tp_reserve = val;
- return 0;
+ if (val > INT_MAX)
+ return -EINVAL;
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->tp_reserve = val;
+ ret = 0;
+ }
+ release_sock(sk);
+ return ret;
}
case PACKET_LOSS:
{
@@ -3805,6 +3853,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
void *data = &val;
union tpacket_stats_u st;
struct tpacket_rollover_stats rstats;
+ struct packet_rollover *rollover;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3848,6 +3897,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
case PACKET_HDRLEN:
if (len > sizeof(int))
len = sizeof(int);
+ if (len < sizeof(int))
+ return -EINVAL;
if (copy_from_user(&val, optval, len))
return -EFAULT;
switch (val) {
@@ -3881,13 +3932,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
0);
break;
case PACKET_ROLLOVER_STATS:
- if (!po->rollover)
+ rcu_read_lock();
+ rollover = rcu_dereference(po->rollover);
+ if (rollover) {
+ rstats.tp_all = atomic_long_read(&rollover->num);
+ rstats.tp_huge = atomic_long_read(&rollover->num_huge);
+ rstats.tp_failed = atomic_long_read(&rollover->num_failed);
+ data = &rstats;
+ lv = sizeof(rstats);
+ }
+ rcu_read_unlock();
+ if (!rollover)
return -EINVAL;
- rstats.tp_all = atomic_long_read(&po->rollover->num);
- rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
- rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
- data = &rstats;
- lv = sizeof(rstats);
break;
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
@@ -3958,7 +4014,6 @@ static int packet_notifier(struct notifier_block *this,
}
if (msg == NETDEV_UNREGISTER) {
packet_cached_dev_reset(po);
- fanout_release(sk);
po->ifindex = -1;
if (po->prot_hook.dev)
dev_put(po->prot_hook.dev);
@@ -4213,8 +4268,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
goto out;
if (po->tp_version >= TPACKET_V3 &&
- (int)(req->tp_block_size -
- BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0)
+ req->tp_block_size <=
+ BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv))
goto out;
if (unlikely(req->tp_frame_size < po->tp_hdrlen +
po->tp_reserve))
@@ -4225,6 +4280,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
if (unlikely(rb->frames_per_block == 0))
goto out;
+ if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr))
+ goto out;
if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
req->tp_frame_nr))
goto out;
@@ -4296,7 +4353,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
register_prot_hook(sk);
}
spin_unlock(&po->bind_lock);
- if (closing && (po->tp_version > TPACKET_V2)) {
+ if (pg_vec && (po->tp_version > TPACKET_V2)) {
/* Because we don't support block-based V3 on tx-ring */
if (!tx_ring)
prb_shutdown_retire_blk_timer(po, rb_queue);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 5b2ab95afa07..169156cfd4c8 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -405,7 +405,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
ret = PTR_ERR(ic->i_send_cq);
ic->i_send_cq = NULL;
rdsdebug("ib_create_cq send failed: %d\n", ret);
- goto out;
+ goto rds_ibdev_out;
}
cq_attr.cqe = ic->i_recv_ring.w_nr;
@@ -416,19 +416,19 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
ret = PTR_ERR(ic->i_recv_cq);
ic->i_recv_cq = NULL;
rdsdebug("ib_create_cq recv failed: %d\n", ret);
- goto out;
+ goto send_cq_out;
}
ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
if (ret) {
rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
- goto out;
+ goto recv_cq_out;
}
ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
if (ret) {
rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
- goto out;
+ goto recv_cq_out;
}
/* XXX negotiate max send/recv with remote? */
@@ -453,7 +453,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
if (ret) {
rdsdebug("rdma_create_qp failed: %d\n", ret);
- goto out;
+ goto recv_cq_out;
}
ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
@@ -463,7 +463,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
if (!ic->i_send_hdrs) {
ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent send failed\n");
- goto out;
+ goto qp_out;
}
ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
@@ -473,7 +473,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
if (!ic->i_recv_hdrs) {
ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent recv failed\n");
- goto out;
+ goto send_hdrs_dma_out;
}
ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
@@ -481,7 +481,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
if (!ic->i_ack) {
ret = -ENOMEM;
rdsdebug("ib_dma_alloc_coherent ack failed\n");
- goto out;
+ goto recv_hdrs_dma_out;
}
ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
@@ -489,7 +489,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
if (!ic->i_sends) {
ret = -ENOMEM;
rdsdebug("send allocation failed\n");
- goto out;
+ goto ack_dma_out;
}
ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
@@ -497,7 +497,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
if (!ic->i_recvs) {
ret = -ENOMEM;
rdsdebug("recv allocation failed\n");
- goto out;
+ goto sends_out;
}
rds_ib_recv_init_ack(ic);
@@ -505,8 +505,33 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd,
ic->i_send_cq, ic->i_recv_cq);
-out:
+ return ret;
+
+sends_out:
+ vfree(ic->i_sends);
+ack_dma_out:
+ ib_dma_free_coherent(dev, sizeof(struct rds_header),
+ ic->i_ack, ic->i_ack_dma);
+recv_hdrs_dma_out:
+ ib_dma_free_coherent(dev, ic->i_recv_ring.w_nr *
+ sizeof(struct rds_header),
+ ic->i_recv_hdrs, ic->i_recv_hdrs_dma);
+send_hdrs_dma_out:
+ ib_dma_free_coherent(dev, ic->i_send_ring.w_nr *
+ sizeof(struct rds_header),
+ ic->i_send_hdrs, ic->i_send_hdrs_dma);
+qp_out:
+ rdma_destroy_qp(ic->i_cm_id);
+recv_cq_out:
+ if (!ib_destroy_cq(ic->i_recv_cq))
+ ic->i_recv_cq = NULL;
+send_cq_out:
+ if (!ib_destroy_cq(ic->i_send_cq))
+ ic->i_send_cq = NULL;
+rds_ibdev_out:
+ rds_ib_remove_conn(rds_ibdev, conn);
rds_ib_dev_put(rds_ibdev);
+
return ret;
}
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index d921adc62765..66b3d6228a15 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -104,14 +104,15 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
struct rds_ib_frmr *frmr = &ibmr->u.frmr;
struct ib_send_wr *failed_wr;
struct ib_reg_wr reg_wr;
- int ret;
+ int ret, off = 0;
while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
atomic_inc(&ibmr->ic->i_fastreg_wrs);
cpu_relax();
}
- ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, 0, PAGE_SIZE);
+ ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len,
+ &off, PAGE_SIZE);
if (unlikely(ret != ibmr->sg_len))
return ret < 0 ? ret : -EINVAL;
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 84d90c97332f..191098173018 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -69,16 +69,6 @@ static void rds_ib_send_complete(struct rds_message *rm,
complete(rm, notify_status);
}
-static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
- struct rm_data_op *op,
- int wc_status)
-{
- if (op->op_nents)
- ib_dma_unmap_sg(ic->i_cm_id->device,
- op->op_sg, op->op_nents,
- DMA_TO_DEVICE);
-}
-
static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
struct rm_rdma_op *op,
int wc_status)
@@ -139,6 +129,21 @@ static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
rds_ib_stats_inc(s_ib_atomic_fadd);
}
+static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
+ struct rm_data_op *op,
+ int wc_status)
+{
+ struct rds_message *rm = container_of(op, struct rds_message, data);
+
+ if (op->op_nents)
+ ib_dma_unmap_sg(ic->i_cm_id->device,
+ op->op_sg, op->op_nents,
+ DMA_TO_DEVICE);
+
+ if (rm->rdma.op_active && rm->data.op_notify)
+ rds_ib_send_unmap_rdma(ic, &rm->rdma, wc_status);
+}
+
/*
* Unmap the resources associated with a struct send_work.
*
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 4c93badeabf2..60e90f761838 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -40,7 +40,6 @@
/*
* XXX
* - build with sparse
- * - should we limit the size of a mr region? let transport return failure?
* - should we detect duplicate keys on a socket? hmm.
* - an rdma is an mlock, apply rlimit?
*/
@@ -200,6 +199,14 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
goto out;
}
+ /* Restrict the size of mr irrespective of underlying transport
+ * To account for unaligned mr regions, subtract one from nr_pages
+ */
+ if ((nr_pages - 1) > (RDS_MAX_MSG_SIZE >> PAGE_SHIFT)) {
+ ret = -EMSGSIZE;
+ goto out;
+ }
+
rdsdebug("RDS: get_mr addr %llx len %llu nr_pages %u\n",
args->vec.addr, args->vec.bytes, nr_pages);
@@ -626,6 +633,16 @@ int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
}
op->op_notifier->n_user_token = args->user_token;
op->op_notifier->n_status = RDS_RDMA_SUCCESS;
+
+ /* Enable rmda notification on data operation for composite
+ * rds messages and make sure notification is enabled only
+ * for the data operation which follows it so that application
+ * gets notified only after full message gets delivered.
+ */
+ if (rm->data.op_sg) {
+ rm->rdma.op_notify = 0;
+ rm->data.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
+ }
}
/* The cookie contains the R_Key of the remote memory region, and
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 67ba67c058b1..30a51fec0f63 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -50,6 +50,9 @@ void rdsdebug(char *fmt, ...)
#define RDS_FRAG_SHIFT 12
#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
+/* Used to limit both RDMA and non-RDMA RDS message to 1MB */
+#define RDS_MAX_MSG_SIZE ((unsigned int)(1 << 20))
+
#define RDS_CONG_MAP_BYTES (65536 / 8)
#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
@@ -414,6 +417,7 @@ struct rds_message {
} rdma;
struct rm_data_op {
unsigned int op_active:1;
+ unsigned int op_notify:1;
unsigned int op_nents;
unsigned int op_count;
unsigned int op_dmasg;
diff --git a/net/rds/send.c b/net/rds/send.c
index 896626b9a0ef..ad247dc71ebb 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -475,12 +475,14 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
struct rm_rdma_op *ro;
struct rds_notifier *notifier;
unsigned long flags;
+ unsigned int notify = 0;
spin_lock_irqsave(&rm->m_rs_lock, flags);
+ notify = rm->rdma.op_notify | rm->data.op_notify;
ro = &rm->rdma;
if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
- ro->op_active && ro->op_notify && ro->op_notifier) {
+ ro->op_active && notify && ro->op_notifier) {
notifier = ro->op_notifier;
rs = rm->m_rs;
sock_hold(rds_rs_to_sk(rs));
@@ -944,6 +946,11 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
ret = rds_cmsg_rdma_map(rs, rm, cmsg);
if (!ret)
*allocated_mr = 1;
+ else if (ret == -ENODEV)
+ /* Accommodate the get_mr() case which can fail
+ * if connection isn't established yet.
+ */
+ ret = -EAGAIN;
break;
case RDS_CMSG_ATOMIC_CSWP:
case RDS_CMSG_ATOMIC_FADD:
@@ -986,6 +993,26 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn)
return hash;
}
+static int rds_rdma_bytes(struct msghdr *msg, size_t *rdma_bytes)
+{
+ struct rds_rdma_args *args;
+ struct cmsghdr *cmsg;
+
+ for_each_cmsghdr(cmsg, msg) {
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+
+ if (cmsg->cmsg_level != SOL_RDS)
+ continue;
+
+ if (cmsg->cmsg_type == RDS_CMSG_RDMA_ARGS) {
+ args = CMSG_DATA(cmsg);
+ *rdma_bytes += args->remote_vec.bytes;
+ }
+ }
+ return 0;
+}
+
int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
{
struct sock *sk = sock->sk;
@@ -1000,6 +1027,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
int nonblock = msg->msg_flags & MSG_DONTWAIT;
long timeo = sock_sndtimeo(sk, nonblock);
struct rds_conn_path *cpath;
+ size_t total_payload_len = payload_len, rdma_payload_len = 0;
/* Mirror Linux UDP mirror of BSD error message compatibility */
/* XXX: Perhaps MSG_MORE someday */
@@ -1032,6 +1060,16 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
}
release_sock(sk);
+ ret = rds_rdma_bytes(msg, &rdma_payload_len);
+ if (ret)
+ goto out;
+
+ total_payload_len += rdma_payload_len;
+ if (max_t(size_t, payload_len, rdma_payload_len) > RDS_MAX_MSG_SIZE) {
+ ret = -EMSGSIZE;
+ goto out;
+ }
+
if (payload_len > rds_sk_sndbuf(rs)) {
ret = -EMSGSIZE;
goto out;
@@ -1081,8 +1119,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
/* Parse any control messages the user may have included. */
ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
- if (ret)
+ if (ret) {
+ /* Trigger connection so that its ready for the next retry */
+ if (ret == -EAGAIN)
+ rds_conn_connect_if_down(conn);
goto out;
+ }
if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index e0b23fb5b8d5..525b624fec8b 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -129,7 +129,7 @@ int rds_tcp_accept_one(struct socket *sock)
if (!sock) /* module unload or netns delete in progress */
return -ENETUNREACH;
- ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family,
+ ret = sock_create_lite(sock->sk->sk_family,
sock->sk->sk_type, sock->sk->sk_protocol,
&new_sock);
if (ret)
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 18c737a61d80..7fc340726d03 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -217,7 +217,7 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
unsigned int *_toklen)
{
const __be32 *xdr = *_xdr;
- unsigned int toklen = *_toklen, n_parts, loop, tmp;
+ unsigned int toklen = *_toklen, n_parts, loop, tmp, paddedlen;
/* there must be at least one name, and at least #names+1 length
* words */
@@ -247,16 +247,16 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
toklen -= 4;
if (tmp <= 0 || tmp > AFSTOKEN_STRING_MAX)
return -EINVAL;
- if (tmp > toklen)
+ paddedlen = (tmp + 3) & ~3;
+ if (paddedlen > toklen)
return -EINVAL;
princ->name_parts[loop] = kmalloc(tmp + 1, GFP_KERNEL);
if (!princ->name_parts[loop])
return -ENOMEM;
memcpy(princ->name_parts[loop], xdr, tmp);
princ->name_parts[loop][tmp] = 0;
- tmp = (tmp + 3) & ~3;
- toklen -= tmp;
- xdr += tmp >> 2;
+ toklen -= paddedlen;
+ xdr += paddedlen >> 2;
}
if (toklen < 4)
@@ -265,16 +265,16 @@ static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
toklen -= 4;
if (tmp <= 0 || tmp > AFSTOKEN_K5_REALM_MAX)
return -EINVAL;
- if (tmp > toklen)
+ paddedlen = (tmp + 3) & ~3;
+ if (paddedlen > toklen)
return -EINVAL;
princ->realm = kmalloc(tmp + 1, GFP_KERNEL);
if (!princ->realm)
return -ENOMEM;
memcpy(princ->realm, xdr, tmp);
princ->realm[tmp] = 0;
- tmp = (tmp + 3) & ~3;
- toklen -= tmp;
- xdr += tmp >> 2;
+ toklen -= paddedlen;
+ xdr += paddedlen >> 2;
_debug("%s/...@%s", princ->name_parts[0], princ->realm);
@@ -293,7 +293,7 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td,
unsigned int *_toklen)
{
const __be32 *xdr = *_xdr;
- unsigned int toklen = *_toklen, len;
+ unsigned int toklen = *_toklen, len, paddedlen;
/* there must be at least one tag and one length word */
if (toklen <= 8)
@@ -307,15 +307,17 @@ static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td,
toklen -= 8;
if (len > max_data_size)
return -EINVAL;
+ paddedlen = (len + 3) & ~3;
+ if (paddedlen > toklen)
+ return -EINVAL;
td->data_len = len;
if (len > 0) {
td->data = kmemdup(xdr, len, GFP_KERNEL);
if (!td->data)
return -ENOMEM;
- len = (len + 3) & ~3;
- toklen -= len;
- xdr += len >> 2;
+ toklen -= paddedlen;
+ xdr += paddedlen >> 2;
}
_debug("tag %x len %x", td->tag, td->data_len);
@@ -387,7 +389,7 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
const __be32 **_xdr, unsigned int *_toklen)
{
const __be32 *xdr = *_xdr;
- unsigned int toklen = *_toklen, len;
+ unsigned int toklen = *_toklen, len, paddedlen;
/* there must be at least one length word */
if (toklen <= 4)
@@ -399,6 +401,9 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
toklen -= 4;
if (len > AFSTOKEN_K5_TIX_MAX)
return -EINVAL;
+ paddedlen = (len + 3) & ~3;
+ if (paddedlen > toklen)
+ return -EINVAL;
*_tktlen = len;
_debug("ticket len %u", len);
@@ -407,9 +412,8 @@ static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
*_ticket = kmemdup(xdr, len, GFP_KERNEL);
if (!*_ticket)
return -ENOMEM;
- len = (len + 3) & ~3;
- toklen -= len;
- xdr += len >> 2;
+ toklen -= paddedlen;
+ xdr += paddedlen >> 2;
}
*_xdr = xdr;
@@ -552,7 +556,7 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
{
const __be32 *xdr = prep->data, *token;
const char *cp;
- unsigned int len, tmp, loop, ntoken, toklen, sec_ix;
+ unsigned int len, paddedlen, loop, ntoken, toklen, sec_ix;
size_t datalen = prep->datalen;
int ret;
@@ -578,22 +582,21 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
if (len < 1 || len > AFSTOKEN_CELL_MAX)
goto not_xdr;
datalen -= 4;
- tmp = (len + 3) & ~3;
- if (tmp > datalen)
+ paddedlen = (len + 3) & ~3;
+ if (paddedlen > datalen)
goto not_xdr;
cp = (const char *) xdr;
for (loop = 0; loop < len; loop++)
if (!isprint(cp[loop]))
goto not_xdr;
- if (len < tmp)
- for (; loop < tmp; loop++)
- if (cp[loop])
- goto not_xdr;
+ for (; loop < paddedlen; loop++)
+ if (cp[loop])
+ goto not_xdr;
_debug("cellname: [%u/%u] '%*.*s'",
- len, tmp, len, len, (const char *) xdr);
- datalen -= tmp;
- xdr += tmp >> 2;
+ len, paddedlen, len, len, (const char *) xdr);
+ datalen -= paddedlen;
+ xdr += paddedlen >> 2;
/* get the token count */
if (datalen < 12)
@@ -614,10 +617,11 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
sec_ix = ntohl(*xdr);
datalen -= 4;
_debug("token: [%x/%zx] %x", toklen, datalen, sec_ix);
- if (toklen < 20 || toklen > datalen)
+ paddedlen = (toklen + 3) & ~3;
+ if (toklen < 20 || toklen > datalen || paddedlen > datalen)
goto not_xdr;
- datalen -= (toklen + 3) & ~3;
- xdr += (toklen + 3) >> 2;
+ datalen -= paddedlen;
+ xdr += paddedlen >> 2;
} while (--loop > 0);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index c6c2a93cc2a2..f3117324146a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -141,7 +141,7 @@ static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
hlist_for_each_entry_safe(p, n, head, tcfa_head) {
ret = __tcf_hash_release(p, false, true);
if (ret == ACT_P_DELETED) {
- module_put(p->ops->owner);
+ module_put(ops->owner);
n_i++;
} else if (ret < 0)
goto nla_put_failure;
@@ -450,13 +450,15 @@ EXPORT_SYMBOL(tcf_action_exec);
int tcf_action_destroy(struct list_head *actions, int bind)
{
+ const struct tc_action_ops *ops;
struct tc_action *a, *tmp;
int ret = 0;
list_for_each_entry_safe(a, tmp, actions, list) {
+ ops = a->ops;
ret = __tcf_hash_release(a, bind, true);
if (ret == ACT_P_DELETED)
- module_put(a->ops->owner);
+ module_put(ops->owner);
else if (ret < 0)
return ret;
}
@@ -820,10 +822,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
goto out_module_put;
err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
- if (err < 0)
+ if (err <= 0)
goto out_module_put;
- if (err == 0)
- goto noflush_out;
nla_nest_end(skb, nest);
@@ -840,7 +840,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
out_module_put:
module_put(ops->owner);
err_out:
-noflush_out:
kfree_skb(skb);
return err;
}
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index eae07a2e774d..1191179c0341 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -113,6 +113,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
if (ret < 0)
return ret;
+ if (!tb[TCA_CONNMARK_PARMS])
+ return -EINVAL;
+
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
if (!tcf_hash_check(tn, parm->index, a, bind)) {
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 378c1c976058..50030519a89b 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -41,6 +41,7 @@ static int ipt_init_target(struct xt_entry_target *t, char *table,
{
struct xt_tgchk_param par;
struct xt_target *target;
+ struct ipt_entry e = {};
int ret = 0;
target = xt_request_find_target(AF_INET, t->u.user.name,
@@ -49,8 +50,9 @@ static int ipt_init_target(struct xt_entry_target *t, char *table,
return PTR_ERR(target);
t->u.kernel.target = target;
+ memset(&par, 0, sizeof(par));
par.table = table;
- par.entryinfo = NULL;
+ par.entryinfo = &e;
par.target = target;
par.targinfo = t->data;
par.hook_mask = hook;
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index e7d96381c908..f85313d60a4d 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -228,7 +228,6 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
return skb->len;
nla_put_failure:
- rcu_read_unlock();
nlmsg_trim(skb, b);
return -1;
}
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index b12bc2abea93..e75fb65037d7 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -32,6 +32,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
if (tc_skip_sw(head->flags))
return -1;
+ *res = head->res;
return tcf_exts_exec(skb, &head->exts, res);
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 206dc24add3a..ea13df1be067 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -277,9 +277,6 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
void qdisc_hash_add(struct Qdisc *q)
{
if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
- struct Qdisc *root = qdisc_dev(q)->qdisc;
-
- WARN_ON_ONCE(root == &noop_qdisc);
ASSERT_RTNL();
hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
}
@@ -299,6 +296,8 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
+ if (!handle)
+ return NULL;
q = qdisc_match_from_root(dev->qdisc, handle);
if (q)
goto out;
@@ -1008,6 +1007,9 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
return sch;
}
+ /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
+ if (ops->destroy)
+ ops->destroy(sch);
err_out3:
dev_put(dev);
kfree((char *) sch - sch->padded);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6cfb6e9038c2..9016c8baf2aa 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -681,6 +681,7 @@ void qdisc_reset(struct Qdisc *qdisc)
qdisc->gso_skb = NULL;
}
qdisc->q.qlen = 0;
+ qdisc->qstats.backlog = 0;
}
EXPORT_SYMBOL(qdisc_reset);
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index e3d0458af17b..2fae8b5f1b80 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -627,7 +627,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
q->hhf_arrays[i] = hhf_zalloc(HHF_ARRAYS_LEN *
sizeof(u32));
if (!q->hhf_arrays[i]) {
- hhf_destroy(sch);
+ /* Note: hhf_destroy() will be called
+ * by our caller.
+ */
return -ENOMEM;
}
}
@@ -638,7 +640,9 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt)
q->hhf_valid_bits[i] = hhf_zalloc(HHF_ARRAYS_LEN /
BITS_PER_BYTE);
if (!q->hhf_valid_bits[i]) {
- hhf_destroy(sch);
+ /* Note: hhf_destroy() will be called
+ * by our caller.
+ */
return -ENOMEM;
}
}
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 2bc8d7f8df16..20b7f1646f69 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -52,7 +52,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
/* pre-allocate qdiscs, attachment can't fail */
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
GFP_KERNEL);
- if (priv->qdiscs == NULL)
+ if (!priv->qdiscs)
return -ENOMEM;
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
@@ -60,18 +60,14 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
qdisc = qdisc_create_dflt(dev_queue, get_default_qdisc_ops(dev, ntx),
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(ntx + 1)));
- if (qdisc == NULL)
- goto err;
+ if (!qdisc)
+ return -ENOMEM;
priv->qdiscs[ntx] = qdisc;
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
sch->flags |= TCQ_F_MQROOT;
return 0;
-
-err:
- mq_destroy(sch);
- return -ENOMEM;
}
static void mq_attach(struct Qdisc *sch)
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index b5c502c78143..922683418e53 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -118,10 +118,8 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
/* pre-allocate qdisc, attachment can't fail */
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
GFP_KERNEL);
- if (priv->qdiscs == NULL) {
- err = -ENOMEM;
- goto err;
- }
+ if (!priv->qdiscs)
+ return -ENOMEM;
for (i = 0; i < dev->num_tx_queues; i++) {
dev_queue = netdev_get_tx_queue(dev, i);
@@ -129,10 +127,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
get_default_qdisc_ops(dev, i),
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(i + 1)));
- if (qdisc == NULL) {
- err = -ENOMEM;
- goto err;
- }
+ if (!qdisc)
+ return -ENOMEM;
+
priv->qdiscs[i] = qdisc;
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
}
@@ -148,7 +145,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
priv->hw_owned = 1;
err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, &tc);
if (err)
- goto err;
+ return err;
} else {
netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++)
@@ -162,10 +159,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
sch->flags |= TCQ_F_MQROOT;
return 0;
-
-err:
- mqprio_destroy(sch);
- return err;
}
static void mqprio_attach(struct Qdisc *sch)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 7f195ed4d568..ea8a56f76b32 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -434,6 +434,7 @@ congestion_drop:
qdisc_drop(head, sch, to_free);
slot_queue_add(slot, skb);
+ qdisc_tree_reduce_backlog(sch, 0, delta);
return NET_XMIT_CN;
}
@@ -465,8 +466,10 @@ enqueue:
/* Return Congestion Notification only if we dropped a packet
* from this flow.
*/
- if (qlen != slot->qlen)
+ if (qlen != slot->qlen) {
+ qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb));
return NET_XMIT_CN;
+ }
/* As we dropped a packet, better let upper stack know this */
qdisc_tree_reduce_backlog(sch, 1, dropped);
@@ -742,9 +745,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
if (!q->ht || !q->slots) {
- sfq_destroy(sch);
+ /* Note: sfq_destroy() will be called by our caller */
return -ENOMEM;
}
+
for (i = 0; i < q->divisor; i++)
q->ht[i] = SFQ_EMPTY_SLOT;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index a01a56ec8b8c..68b84d3a7cac 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t,
{
struct dst_entry *dst;
- if (!t)
+ if (sock_owned_by_user(sk) || !t)
return;
dst = sctp_transport_dst_check(t);
if (dst)
@@ -473,15 +473,14 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
struct sctp_association **app,
struct sctp_transport **tpp)
{
+ struct sctp_init_chunk *chunkhdr, _chunkhdr;
union sctp_addr saddr;
union sctp_addr daddr;
struct sctp_af *af;
struct sock *sk = NULL;
struct sctp_association *asoc;
struct sctp_transport *transport = NULL;
- struct sctp_init_chunk *chunkhdr;
__u32 vtag = ntohl(sctphdr->vtag);
- int len = skb->len - ((void *)sctphdr - (void *)skb->data);
*app = NULL; *tpp = NULL;
@@ -516,13 +515,16 @@ struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *skb,
* discard the packet.
*/
if (vtag == 0) {
- chunkhdr = (void *)sctphdr + sizeof(struct sctphdr);
- if (len < sizeof(struct sctphdr) + sizeof(sctp_chunkhdr_t)
- + sizeof(__be32) ||
+ /* chunk header + first 4 octects of init header */
+ chunkhdr = skb_header_pointer(skb, skb_transport_offset(skb) +
+ sizeof(struct sctphdr),
+ sizeof(struct sctp_chunkhdr) +
+ sizeof(__be32), &_chunkhdr);
+ if (!chunkhdr ||
chunkhdr->chunk_hdr.type != SCTP_CID_INIT ||
- ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag) {
+ ntohl(chunkhdr->init_hdr.init_tag) != asoc->c.my_vtag)
goto out;
- }
+
} else if (vtag != asoc->c.peer_vtag) {
goto out;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 6a2532dd31c4..5d015270e454 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -240,12 +240,10 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
struct sctp_bind_addr *bp;
struct ipv6_pinfo *np = inet6_sk(sk);
struct sctp_sockaddr_entry *laddr;
- union sctp_addr *baddr = NULL;
union sctp_addr *daddr = &t->ipaddr;
union sctp_addr dst_saddr;
struct in6_addr *final_p, final;
__u8 matchlen = 0;
- __u8 bmatchlen;
sctp_scope_t scope;
memset(fl6, 0, sizeof(struct flowi6));
@@ -312,23 +310,37 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
*/
rcu_read_lock();
list_for_each_entry_rcu(laddr, &bp->address_list, list) {
- if (!laddr->valid)
+ struct dst_entry *bdst;
+ __u8 bmatchlen;
+
+ if (!laddr->valid ||
+ laddr->state != SCTP_ADDR_SRC ||
+ laddr->a.sa.sa_family != AF_INET6 ||
+ scope > sctp_scope(&laddr->a))
continue;
- if ((laddr->state == SCTP_ADDR_SRC) &&
- (laddr->a.sa.sa_family == AF_INET6) &&
- (scope <= sctp_scope(&laddr->a))) {
- bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
- if (!baddr || (matchlen < bmatchlen)) {
- baddr = &laddr->a;
- matchlen = bmatchlen;
- }
- }
- }
- if (baddr) {
- fl6->saddr = baddr->v6.sin6_addr;
- fl6->fl6_sport = baddr->v6.sin6_port;
+
+ fl6->saddr = laddr->a.v6.sin6_addr;
+ fl6->fl6_sport = laddr->a.v6.sin6_port;
final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
- dst = ip6_dst_lookup_flow(sk, fl6, final_p);
+ bdst = ip6_dst_lookup_flow(sk, fl6, final_p);
+
+ if (!IS_ERR(bdst) &&
+ ipv6_chk_addr(dev_net(bdst->dev),
+ &laddr->a.v6.sin6_addr, bdst->dev, 1)) {
+ if (!IS_ERR_OR_NULL(dst))
+ dst_release(dst);
+ dst = bdst;
+ break;
+ }
+
+ bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a);
+ if (matchlen > bmatchlen)
+ continue;
+
+ if (!IS_ERR_OR_NULL(dst))
+ dst_release(dst);
+ dst = bdst;
+ matchlen = bmatchlen;
}
rcu_read_unlock();
@@ -500,7 +512,9 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr,
{
addr->sa.sa_family = AF_INET6;
addr->v6.sin6_port = port;
+ addr->v6.sin6_flowinfo = 0;
addr->v6.sin6_addr = *saddr;
+ addr->v6.sin6_scope_id = 0;
}
/* Compare addresses exactly.
@@ -666,6 +680,9 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
newnp = inet6_sk(newsk);
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+ newnp->ipv6_mc_list = NULL;
+ newnp->ipv6_ac_list = NULL;
+ newnp->ipv6_fl_list = NULL;
rcu_read_lock();
opt = rcu_dereference(np->opt);
@@ -789,9 +806,10 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
addr->v6.sin6_flowinfo = 0;
addr->v6.sin6_port = sh->source;
addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
- if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
+ if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb);
- }
+ else
+ addr->v6.sin6_scope_id = 0;
}
*addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr);
@@ -864,8 +882,10 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
net = sock_net(&opt->inet.sk);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id);
- if (!dev ||
- !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) {
+ if (!dev || !(opt->inet.freebind ||
+ net->ipv6.sysctl.ip_nonlocal_bind ||
+ ipv6_chk_addr(net, &addr->v6.sin6_addr,
+ dev, 0))) {
rcu_read_unlock();
return 0;
}
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 7e869d0cca69..4f5a2b580aa5 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -68,7 +68,7 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
goto out;
}
- segs = skb_segment(skb, features | NETIF_F_HW_CSUM);
+ segs = skb_segment(skb, features | NETIF_F_HW_CSUM | NETIF_F_SG);
if (IS_ERR(segs))
goto out;
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 048954eee984..e8f56b7c5afb 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -70,7 +70,8 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
info = nla_data(attr);
list_for_each_entry_rcu(laddr, address_list, list) {
- memcpy(info, &laddr->a, addrlen);
+ memcpy(info, &laddr->a, sizeof(laddr->a));
+ memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a));
info += addrlen;
}
@@ -93,7 +94,9 @@ static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb,
info = nla_data(attr);
list_for_each_entry(from, &asoc->peer.transport_addr_list,
transports) {
- memcpy(info, &from->ipaddr, addrlen);
+ memcpy(info, &from->ipaddr, sizeof(from->ipaddr));
+ memset(info + sizeof(from->ipaddr), 0,
+ addrlen - sizeof(from->ipaddr));
info += addrlen;
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6cbe5bdf2b15..c062ceae19e6 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -168,6 +168,36 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
sk_mem_charge(sk, chunk->skb->truesize);
}
+static void sctp_clear_owner_w(struct sctp_chunk *chunk)
+{
+ skb_orphan(chunk->skb);
+}
+
+static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
+ void (*cb)(struct sctp_chunk *))
+
+{
+ struct sctp_outq *q = &asoc->outqueue;
+ struct sctp_transport *t;
+ struct sctp_chunk *chunk;
+
+ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
+ list_for_each_entry(chunk, &t->transmitted, transmitted_list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->retransmit, list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->sacked, list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->abandoned, list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->out_chunk_list, list)
+ cb(chunk);
+}
+
/* Verify that this is a valid address. */
static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
int len)
@@ -235,8 +265,12 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk,
sctp_assoc_t id)
{
struct sctp_association *addr_asoc = NULL, *id_asoc = NULL;
- struct sctp_transport *transport;
+ struct sctp_af *af = sctp_get_af_specific(addr->ss_family);
union sctp_addr *laddr = (union sctp_addr *)addr;
+ struct sctp_transport *transport;
+
+ if (!af || sctp_verify_addr(sk, laddr, af->sockaddr_len))
+ return NULL;
addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep,
laddr,
@@ -4369,8 +4403,7 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
info->sctpi_ictrlchunks = asoc->stats.ictrlchunks;
prim = asoc->peer.primary_path;
- memcpy(&info->sctpi_p_address, &prim->ipaddr,
- sizeof(struct sockaddr_storage));
+ memcpy(&info->sctpi_p_address, &prim->ipaddr, sizeof(prim->ipaddr));
info->sctpi_p_state = prim->state;
info->sctpi_p_cwnd = prim->cwnd;
info->sctpi_p_srtt = prim->srtt;
@@ -4456,13 +4489,13 @@ int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *),
for (head = sctp_ep_hashtable; hash < sctp_ep_hashsize;
hash++, head++) {
- read_lock(&head->lock);
+ read_lock_bh(&head->lock);
sctp_for_each_hentry(epb, &head->chain) {
err = cb(sctp_ep(epb), p);
if (err)
break;
}
- read_unlock(&head->lock);
+ read_unlock_bh(&head->lock);
}
return err;
@@ -4502,9 +4535,8 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
if (err)
return err;
- sctp_transport_get_idx(net, &hti, pos);
- obj = sctp_transport_get_next(net, &hti);
- for (; obj && !IS_ERR(obj); obj = sctp_transport_get_next(net, &hti)) {
+ obj = sctp_transport_get_idx(net, &hti, pos + 1);
+ for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) {
struct sctp_transport *transport = obj;
if (!sctp_transport_hold(transport))
@@ -4732,9 +4764,19 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
struct socket *sock;
int err = 0;
+ /* Do not peel off from one netns to another one. */
+ if (!net_eq(current->nsproxy->net_ns, sock_net(sk)))
+ return -EINVAL;
+
if (!asoc)
return -EINVAL;
+ /* If there is a thread waiting on more sndbuf space for
+ * sending on this asoc, it cannot be peeled.
+ */
+ if (waitqueue_active(&asoc->wait))
+ return -EBUSY;
+
/* An association cannot be branched off from an already peeled-off
* socket, nor is this supported for tcp style sockets.
*/
@@ -6855,6 +6897,9 @@ int sctp_inet_listen(struct socket *sock, int backlog)
if (sock->state != SS_UNCONNECTED)
goto out;
+ if (!sctp_sstate(sk, LISTENING) && !sctp_sstate(sk, CLOSED))
+ goto out;
+
/* If backlog is zero, disable listening. */
if (!backlog) {
if (sctp_sstate(sk, CLOSED))
@@ -7427,8 +7472,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
*/
release_sock(sk);
current_timeo = schedule_timeout(current_timeo);
- if (sk != asoc->base.sk)
- goto do_error;
lock_sock(sk);
*timeo_p = current_timeo;
@@ -7817,7 +7860,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* paths won't try to lock it and then oldsk.
*/
lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
+ sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w);
sctp_assoc_migrate(assoc, newsk);
+ sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w);
/* If the association on the newsk is already closed before accept()
* is called, set RCV_SHUTDOWN flag.
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 84d0fdaf7de9..d3cfbf2f407d 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -265,7 +265,8 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event)
sctp_ulpq_clear_pd(ulpq);
if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) {
- sp->data_ready_signalled = 1;
+ if (!sock_owned_by_user(sk))
+ sp->data_ready_signalled = 1;
sk->sk_data_ready(sk);
}
return 1;
diff --git a/net/socket.c b/net/socket.c
index 73dc69f9681e..6bbccf05854f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2197,8 +2197,10 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
return err;
err = sock_error(sock->sk);
- if (err)
+ if (err) {
+ datagrams = err;
goto out_put;
+ }
entry = mmsg;
compat_entry = (struct compat_mmsghdr __user *)mmsg;
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 41adf362936d..b5c279b22680 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -504,6 +504,7 @@ static int __init strp_mod_init(void)
static void __exit strp_mod_exit(void)
{
+ destroy_workqueue(strp_wq);
}
module_init(strp_mod_init);
module_exit(strp_mod_exit);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 90115ceefd49..79aec90259cd 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -34,6 +34,7 @@
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
+#include <crypto/algapi.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
#include <linux/err.h>
@@ -927,7 +928,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
if (ret)
goto out_err;
- if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
+ if (crypto_memneq(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
ret = GSS_S_BAD_SIG;
goto out_err;
}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 6fdffde28733..153082598522 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1548,7 +1548,7 @@ complete:
ret = SVC_COMPLETE;
goto out;
drop:
- ret = SVC_DROP;
+ ret = SVC_CLOSE;
out:
if (rsci)
cache_put(&rsci->h, sn->rsc_cache);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 7c8070ec93c8..272c34551979 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -702,59 +702,32 @@ found_pool:
return task;
}
-/*
- * Create or destroy enough new threads to make the number
- * of threads the given number. If `pool' is non-NULL, applies
- * only to threads in that pool, otherwise round-robins between
- * all pools. Caller must ensure that mutual exclusion between this and
- * server startup or shutdown.
- *
- * Destroying threads relies on the service threads filling in
- * rqstp->rq_task, which only the nfs ones do. Assumes the serv
- * has been created using svc_create_pooled().
- *
- * Based on code that used to be in nfsd_svc() but tweaked
- * to be pool-aware.
- */
-int
-svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+/* create new threads */
+static int
+svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
struct svc_rqst *rqstp;
struct task_struct *task;
struct svc_pool *chosen_pool;
- int error = 0;
unsigned int state = serv->sv_nrthreads-1;
int node;
- if (pool == NULL) {
- /* The -1 assumes caller has done a svc_get() */
- nrservs -= (serv->sv_nrthreads-1);
- } else {
- spin_lock_bh(&pool->sp_lock);
- nrservs -= pool->sp_nrthreads;
- spin_unlock_bh(&pool->sp_lock);
- }
-
- /* create new threads */
- while (nrservs > 0) {
+ do {
nrservs--;
chosen_pool = choose_pool(serv, pool, &state);
node = svc_pool_map_get_node(chosen_pool->sp_id);
rqstp = svc_prepare_thread(serv, chosen_pool, node);
- if (IS_ERR(rqstp)) {
- error = PTR_ERR(rqstp);
- break;
- }
+ if (IS_ERR(rqstp))
+ return PTR_ERR(rqstp);
__module_get(serv->sv_ops->svo_module);
task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp,
node, "%s", serv->sv_name);
if (IS_ERR(task)) {
- error = PTR_ERR(task);
module_put(serv->sv_ops->svo_module);
svc_exit_thread(rqstp);
- break;
+ return PTR_ERR(task);
}
rqstp->rq_task = task;
@@ -763,18 +736,103 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
svc_sock_update_bufs(serv);
wake_up_process(task);
- }
+ } while (nrservs > 0);
+
+ return 0;
+}
+
+
+/* destroy old threads */
+static int
+svc_signal_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+ struct task_struct *task;
+ unsigned int state = serv->sv_nrthreads-1;
+
/* destroy old threads */
- while (nrservs < 0 &&
- (task = choose_victim(serv, pool, &state)) != NULL) {
+ do {
+ task = choose_victim(serv, pool, &state);
+ if (task == NULL)
+ break;
send_sig(SIGINT, task, 1);
nrservs++;
+ } while (nrservs < 0);
+
+ return 0;
+}
+
+/*
+ * Create or destroy enough new threads to make the number
+ * of threads the given number. If `pool' is non-NULL, applies
+ * only to threads in that pool, otherwise round-robins between
+ * all pools. Caller must ensure that mutual exclusion between this and
+ * server startup or shutdown.
+ *
+ * Destroying threads relies on the service threads filling in
+ * rqstp->rq_task, which only the nfs ones do. Assumes the serv
+ * has been created using svc_create_pooled().
+ *
+ * Based on code that used to be in nfsd_svc() but tweaked
+ * to be pool-aware.
+ */
+int
+svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+ if (pool == NULL) {
+ /* The -1 assumes caller has done a svc_get() */
+ nrservs -= (serv->sv_nrthreads-1);
+ } else {
+ spin_lock_bh(&pool->sp_lock);
+ nrservs -= pool->sp_nrthreads;
+ spin_unlock_bh(&pool->sp_lock);
}
- return error;
+ if (nrservs > 0)
+ return svc_start_kthreads(serv, pool, nrservs);
+ if (nrservs < 0)
+ return svc_signal_kthreads(serv, pool, nrservs);
+ return 0;
}
EXPORT_SYMBOL_GPL(svc_set_num_threads);
+/* destroy old threads */
+static int
+svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+ struct task_struct *task;
+ unsigned int state = serv->sv_nrthreads-1;
+
+ /* destroy old threads */
+ do {
+ task = choose_victim(serv, pool, &state);
+ if (task == NULL)
+ break;
+ kthread_stop(task);
+ nrservs++;
+ } while (nrservs < 0);
+ return 0;
+}
+
+int
+svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+ if (pool == NULL) {
+ /* The -1 assumes caller has done a svc_get() */
+ nrservs -= (serv->sv_nrthreads-1);
+ } else {
+ spin_lock_bh(&pool->sp_lock);
+ nrservs -= pool->sp_nrthreads;
+ spin_unlock_bh(&pool->sp_lock);
+ }
+
+ if (nrservs > 0)
+ return svc_start_kthreads(serv, pool, nrservs);
+ if (nrservs < 0)
+ return svc_stop_kthreads(serv, pool, nrservs);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
+
/*
* Called from a server thread as it's exiting. Caller must hold the "service
* mutex" for the service.
@@ -1155,8 +1213,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
case SVC_DENIED:
goto err_bad_auth;
case SVC_CLOSE:
- if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
- svc_close_xprt(rqstp->rq_xprt);
+ goto close;
case SVC_DROP:
goto dropit;
case SVC_COMPLETE:
@@ -1246,7 +1303,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
sendit:
if (svc_authorise(rqstp))
- goto dropit;
+ goto close;
return 1; /* Caller can now send it */
dropit:
@@ -1254,11 +1311,16 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
dprintk("svc: svc_process dropit\n");
return 0;
+ close:
+ if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
+ svc_close_xprt(rqstp->rq_xprt);
+ dprintk("svc: svc_process close\n");
+ return 0;
+
err_short_len:
svc_printk(rqstp, "short len %Zd, dropping request\n",
argv->iov_len);
-
- goto dropit; /* drop request */
+ goto close;
err_bad_rpc:
serv->sv_stats->rpcbadfmt++;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a4bc98265d88..266a30c8b88b 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -408,6 +408,9 @@ static void svc_data_ready(struct sock *sk)
dprintk("svc: socket %p(inet %p), busy=%d\n",
svsk, sk,
test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
svsk->sk_odata(sk);
if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
svc_xprt_enqueue(&svsk->sk_xprt);
@@ -424,6 +427,9 @@ static void svc_write_space(struct sock *sk)
if (svsk) {
dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
svsk->sk_owspace(sk);
svc_xprt_enqueue(&svsk->sk_xprt);
}
@@ -748,8 +754,12 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
dprintk("svc: socket %p TCP (listen) state change %d\n",
sk, sk->sk_state);
- if (svsk)
+ if (svsk) {
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
svsk->sk_odata(sk);
+ }
+
/*
* This callback may called twice when a new connection
* is established as a child socket inherits everything
@@ -782,6 +792,8 @@ static void svc_tcp_state_change(struct sock *sk)
if (!svsk)
printk("svc: socket %p: no user data\n", sk);
else {
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
svsk->sk_ostate(sk);
if (sk->sk_state != TCP_ESTABLISHED) {
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1368,12 +1380,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
return ERR_PTR(err);
}
- inet->sk_user_data = svsk;
svsk->sk_sock = sock;
svsk->sk_sk = inet;
svsk->sk_ostate = inet->sk_state_change;
svsk->sk_odata = inet->sk_data_ready;
svsk->sk_owspace = inet->sk_write_space;
+ /*
+ * This barrier is necessary in order to prevent race condition
+ * with svc_data_ready(), svc_listen_data_ready() and others
+ * when calling callbacks above.
+ */
+ wmb();
+ inet->sk_user_data = svsk;
/* Initialize the socket */
if (sock->type == SOCK_DGRAM)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index d987c2d3dd6e..f57c9f0ab8f9 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -125,14 +125,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
/* The client can send a request inline as long as the RPCRDMA header
* plus the RPC call fit under the transport's inline limit. If the
* combined call message size exceeds that limit, the client must use
- * the read chunk list for this operation.
+ * a Read chunk for this operation.
+ *
+ * A Read chunk is also required if sending the RPC call inline would
+ * exceed this device's max_sge limit.
*/
static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst)
{
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct xdr_buf *xdr = &rqst->rq_snd_buf;
+ unsigned int count, remaining, offset;
+
+ if (xdr->len > r_xprt->rx_ia.ri_max_inline_write)
+ return false;
+
+ if (xdr->page_len) {
+ remaining = xdr->page_len;
+ offset = xdr->page_base & ~PAGE_MASK;
+ count = 0;
+ while (remaining) {
+ remaining -= min_t(unsigned int,
+ PAGE_SIZE - offset, remaining);
+ offset = 0;
+ if (++count > r_xprt->rx_ia.ri_max_send_sges)
+ return false;
+ }
+ }
- return rqst->rq_snd_buf.len <= ia->ri_max_inline_write;
+ return true;
}
/* The client can't know how large the actual reply will be. Thus it
@@ -186,9 +206,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n)
*/
static int
-rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
- enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg,
- bool reminv_expected)
+rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
+ unsigned int pos, enum rpcrdma_chunktype type,
+ struct rpcrdma_mr_seg *seg)
{
int len, n, p, page_base;
struct page **ppages;
@@ -226,22 +246,21 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
if (len && n == RPCRDMA_MAX_SEGS)
goto out_overflow;
- /* When encoding the read list, the tail is always sent inline */
- if (type == rpcrdma_readch)
+ /* When encoding a Read chunk, the tail iovec contains an
+ * XDR pad and may be omitted.
+ */
+ if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
return n;
- /* When encoding the Write list, some servers need to see an extra
- * segment for odd-length Write chunks. The upper layer provides
- * space in the tail iovec for this purpose.
+ /* When encoding a Write chunk, some servers need to see an
+ * extra segment for non-XDR-aligned Write chunks. The upper
+ * layer provides space in the tail iovec that may be used
+ * for this purpose.
*/
- if (type == rpcrdma_writech && reminv_expected)
+ if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
return n;
if (xdrbuf->tail[0].iov_len) {
- /* the rpcrdma protocol allows us to omit any trailing
- * xdr pad bytes, saving the server an RDMA operation. */
- if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
- return n;
n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n);
if (n == RPCRDMA_MAX_SEGS)
goto out_overflow;
@@ -293,7 +312,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
if (rtype == rpcrdma_areadch)
pos = 0;
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false);
+ nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
+ rtype, seg);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -355,10 +375,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
}
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf,
+ nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
rqst->rq_rcv_buf.head[0].iov_len,
- wtype, seg,
- r_xprt->rx_ia.ri_reminv_expected);
+ wtype, seg);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -423,8 +442,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
}
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg,
- r_xprt->rx_ia.ri_reminv_expected);
+ nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
if (nsegs < 0)
return ERR_PTR(nsegs);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index ed5e285fd2ea..fa324fe73946 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -67,7 +67,7 @@ unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
- int xprt_rdma_pad_optimize = 1;
+ int xprt_rdma_pad_optimize = 0;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8da7f6a4dfc3..69502fa68a3c 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -208,6 +208,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
/* Default settings for RPC-over-RDMA Version One */
r_xprt->rx_ia.ri_reminv_expected = false;
+ r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
@@ -215,6 +216,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
pmsg->cp_magic == rpcrdma_cmp_magic &&
pmsg->cp_version == RPCRDMA_CMP_VERSION) {
r_xprt->rx_ia.ri_reminv_expected = true;
+ r_xprt->rx_ia.ri_implicit_roundup = true;
rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
}
@@ -477,18 +479,20 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
*/
int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
- struct rpcrdma_create_data_internal *cdata)
+ struct rpcrdma_create_data_internal *cdata)
{
struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
+ unsigned int max_qp_wr, max_sge;
struct ib_cq *sendcq, *recvcq;
- unsigned int max_qp_wr;
int rc;
- if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) {
- dprintk("RPC: %s: insufficient sge's available\n",
- __func__);
+ max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
+ RPCRDMA_MAX_SEND_SGES);
+ if (max_sge < RPCRDMA_MIN_SEND_SGES) {
+ pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
return -ENOMEM;
}
+ ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES;
if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
dprintk("RPC: %s: insufficient wqe's available\n",
@@ -513,7 +517,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
- ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES;
+ ep->rep_attr.cap.max_send_sge = max_sge;
ep->rep_attr.cap.max_recv_sge = 1;
ep->rep_attr.cap.max_inline_data = 0;
ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index f6ae1b22da47..48989d5b2883 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -74,7 +74,9 @@ struct rpcrdma_ia {
unsigned int ri_max_frmr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
+ unsigned int ri_max_send_sges;
bool ri_reminv_expected;
+ bool ri_implicit_roundup;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
};
@@ -309,6 +311,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
* - xdr_buf tail iovec
*/
enum {
+ RPCRDMA_MIN_SEND_SGES = 3,
RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1,
RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1,
RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 6b109a808d4c..02462d67d191 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -169,7 +169,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
/* Send response, if necessary */
if (respond && (mtyp == DSC_REQ_MSG)) {
- rskb = tipc_buf_acquire(MAX_H_SIZE);
+ rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
if (!rskb)
return;
tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
@@ -278,7 +278,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b,
req = kmalloc(sizeof(*req), GFP_ATOMIC);
if (!req)
return -ENOMEM;
- req->buf = tipc_buf_acquire(MAX_H_SIZE);
+ req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC);
if (!req->buf) {
kfree(req);
return -ENOMEM;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index bda89bf9f4ff..4e8647aef01c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1395,7 +1395,7 @@ tnl:
msg_set_seqno(hdr, seqno++);
pktlen = msg_size(hdr);
msg_set_size(&tnlhdr, pktlen + INT_H_SIZE);
- tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE);
+ tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC);
if (!tnlskb) {
pr_warn("%sunable to send packet\n", link_co_err);
return;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 17201aa8423d..912f1fb97c06 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -58,12 +58,12 @@ static unsigned int align(unsigned int i)
* NOTE: Headroom is reserved to allow prepending of a data link header.
* There may also be unrequested tailroom present at the buffer's end.
*/
-struct sk_buff *tipc_buf_acquire(u32 size)
+struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp)
{
struct sk_buff *skb;
unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
- skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
+ skb = alloc_skb_fclone(buf_size, gfp);
if (skb) {
skb_reserve(skb, BUF_HEADROOM);
skb_put(skb, size);
@@ -95,7 +95,7 @@ struct sk_buff *tipc_msg_create(uint user, uint type,
struct tipc_msg *msg;
struct sk_buff *buf;
- buf = tipc_buf_acquire(hdr_sz + data_sz);
+ buf = tipc_buf_acquire(hdr_sz + data_sz, GFP_ATOMIC);
if (unlikely(!buf))
return NULL;
@@ -261,7 +261,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
/* No fragmentation needed? */
if (likely(msz <= pktmax)) {
- skb = tipc_buf_acquire(msz);
+ skb = tipc_buf_acquire(msz, GFP_KERNEL);
if (unlikely(!skb))
return -ENOMEM;
skb_orphan(skb);
@@ -282,7 +282,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
msg_set_importance(&pkthdr, msg_importance(mhdr));
/* Prepare first fragment */
- skb = tipc_buf_acquire(pktmax);
+ skb = tipc_buf_acquire(pktmax, GFP_KERNEL);
if (!skb)
return -ENOMEM;
skb_orphan(skb);
@@ -313,7 +313,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
pktsz = drem + INT_H_SIZE;
else
pktsz = pktmax;
- skb = tipc_buf_acquire(pktsz);
+ skb = tipc_buf_acquire(pktsz, GFP_KERNEL);
if (!skb) {
rc = -ENOMEM;
goto error;
@@ -448,7 +448,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
if (msz > (max / 2))
return false;
- _skb = tipc_buf_acquire(max);
+ _skb = tipc_buf_acquire(max, GFP_ATOMIC);
if (!_skb)
return false;
@@ -496,7 +496,7 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
/* Never return SHORT header; expand by replacing buffer if necessary */
if (msg_short(hdr)) {
- *skb = tipc_buf_acquire(BASIC_H_SIZE + dlen);
+ *skb = tipc_buf_acquire(BASIC_H_SIZE + dlen, GFP_ATOMIC);
if (!*skb)
goto exit;
memcpy((*skb)->data + BASIC_H_SIZE, msg_data(hdr), dlen);
@@ -508,7 +508,7 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
}
if (skb_cloned(_skb) &&
- pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL))
+ pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
goto exit;
/* Now reverse the concerned fields */
@@ -547,7 +547,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
return false;
if (msg_errcode(msg))
return false;
- *err = -TIPC_ERR_NO_NAME;
+ *err = TIPC_ERR_NO_NAME;
if (skb_linearize(skb))
return false;
msg = buf_msg(skb);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 50a739860d37..6c0455caf302 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -820,7 +820,7 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG);
}
-struct sk_buff *tipc_buf_acquire(u32 size);
+struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
bool tipc_msg_validate(struct sk_buff *skb);
bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index c1cfd92de17a..23f8899e0f8c 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -69,7 +69,7 @@ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
u32 dest)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size);
+ struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC);
struct tipc_msg *msg;
if (buf != NULL) {
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 28bf4feeb81c..ab8a2d5d1e32 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -110,6 +110,10 @@ int tipc_net_start(struct net *net, u32 addr)
char addr_string[16];
tn->own_addr = addr;
+
+ /* Ensure that the new address is visible before we reinit. */
+ smp_mb();
+
tipc_named_reinit(net);
tipc_sk_reinit(net);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 1fd464764765..aedc476fac02 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -258,13 +258,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
arg = nlmsg_new(0, GFP_KERNEL);
if (!arg) {
kfree_skb(msg->rep);
+ msg->rep = NULL;
return -ENOMEM;
}
err = __tipc_nl_compat_dumpit(cmd, msg, arg);
- if (err)
+ if (err) {
kfree_skb(msg->rep);
-
+ msg->rep = NULL;
+ }
kfree_skb(arg);
return err;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 9d2f4c2b08ab..27753325e06e 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -263,6 +263,11 @@ static void tipc_node_write_lock(struct tipc_node *n)
write_lock_bh(&n->lock);
}
+static void tipc_node_write_unlock_fast(struct tipc_node *n)
+{
+ write_unlock_bh(&n->lock);
+}
+
static void tipc_node_write_unlock(struct tipc_node *n)
{
struct net *net = n->net;
@@ -417,7 +422,7 @@ void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr)
}
tipc_node_write_lock(n);
list_add_tail(subscr, &n->publ_list);
- tipc_node_write_unlock(n);
+ tipc_node_write_unlock_fast(n);
tipc_node_put(n);
}
@@ -435,7 +440,7 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr)
}
tipc_node_write_lock(n);
list_del_init(subscr);
- tipc_node_write_unlock(n);
+ tipc_node_write_unlock_fast(n);
tipc_node_put(n);
}
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 215849ce453d..f89c0c2e8c16 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -91,7 +91,8 @@ static void tipc_sock_release(struct tipc_conn *con);
static void tipc_conn_kref_release(struct kref *kref)
{
struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
- struct sockaddr_tipc *saddr = con->server->saddr;
+ struct tipc_server *s = con->server;
+ struct sockaddr_tipc *saddr = s->saddr;
struct socket *sock = con->sock;
struct sock *sk;
@@ -106,6 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref)
tipc_sock_release(con);
sock_release(sock);
con->sock = NULL;
+
+ spin_lock_bh(&s->idr_lock);
+ idr_remove(&s->conn_idr, con->conid);
+ s->idr_in_use--;
+ spin_unlock_bh(&s->idr_lock);
}
tipc_clean_outqueues(con);
@@ -128,8 +134,10 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid)
spin_lock_bh(&s->idr_lock);
con = idr_find(&s->conn_idr, conid);
- if (con)
+ if (con && test_bit(CF_CONNECTED, &con->flags))
conn_get(con);
+ else
+ con = NULL;
spin_unlock_bh(&s->idr_lock);
return con;
}
@@ -198,15 +206,8 @@ static void tipc_sock_release(struct tipc_conn *con)
static void tipc_close_conn(struct tipc_conn *con)
{
- struct tipc_server *s = con->server;
-
if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
- spin_lock_bh(&s->idr_lock);
- idr_remove(&s->conn_idr, con->conid);
- s->idr_in_use--;
- spin_unlock_bh(&s->idr_lock);
-
/* We shouldn't flush pending works as we may be in the
* thread. In fact the races with pending rx/tx work structs
* are harmless for us here as we have already deleted this
@@ -458,6 +459,11 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid,
if (!con)
return -EINVAL;
+ if (!test_bit(CF_CONNECTED, &con->flags)) {
+ conn_put(con);
+ return 0;
+ }
+
e = tipc_alloc_entry(data, len);
if (!e) {
conn_put(con);
@@ -471,12 +477,8 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid,
list_add_tail(&e->list, &con->outqueue);
spin_unlock_bh(&con->outqueue_lock);
- if (test_bit(CF_CONNECTED, &con->flags)) {
- if (!queue_work(s->send_wq, &con->swork))
- conn_put(con);
- } else {
+ if (!queue_work(s->send_wq, &con->swork))
conn_put(con);
- }
return 0;
}
@@ -500,7 +502,7 @@ static void tipc_send_to_sock(struct tipc_conn *con)
int ret;
spin_lock_bh(&con->outqueue_lock);
- while (1) {
+ while (test_bit(CF_CONNECTED, &con->flags)) {
e = list_entry(con->outqueue.next, struct outqueue_entry,
list);
if ((struct list_head *) e == &con->outqueue)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 41f013888f07..25bc5c30d7fb 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -335,8 +335,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
INIT_LIST_HEAD(&tsk->publications);
msg = &tsk->phdr;
tn = net_generic(sock_net(sk), tipc_net_id);
- tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
- NAMED_H_SIZE, 0);
/* Finish initializing socket data structures */
sock->ops = ops;
@@ -346,6 +344,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
pr_warn("Socket create failed; port number exhausted\n");
return -EINVAL;
}
+
+ /* Ensure tsk is visible before we read own_addr. */
+ smp_mb();
+
+ tipc_msg_init(tn->own_addr, msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
+ NAMED_H_SIZE, 0);
+
msg_set_origport(msg, tsk->portid);
setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
sk->sk_backlog_rcv = tipc_backlog_rcv;
@@ -2264,24 +2269,27 @@ static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
void tipc_sk_reinit(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- const struct bucket_table *tbl;
- struct rhash_head *pos;
+ struct rhashtable_iter iter;
struct tipc_sock *tsk;
struct tipc_msg *msg;
- int i;
- rcu_read_lock();
- tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
- for (i = 0; i < tbl->size; i++) {
- rht_for_each_entry_rcu(tsk, pos, tbl, i, node) {
+ rhashtable_walk_enter(&tn->sk_rht, &iter);
+
+ do {
+ tsk = ERR_PTR(rhashtable_walk_start(&iter));
+ if (tsk)
+ continue;
+
+ while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
spin_lock_bh(&tsk->sk.sk_lock.slock);
msg = &tsk->phdr;
msg_set_prevnode(msg, tn->own_addr);
msg_set_orignode(msg, tn->own_addr);
spin_unlock_bh(&tsk->sk.sk_lock.slock);
}
- }
- rcu_read_unlock();
+
+ rhashtable_walk_stop(&iter);
+ } while (tsk == ERR_PTR(-EAGAIN));
}
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 0dd02244e21d..9d94e65d0894 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -54,6 +54,8 @@ struct tipc_subscriber {
static void tipc_subscrp_delete(struct tipc_subscription *sub);
static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
+static void tipc_subscrp_put(struct tipc_subscription *subscription);
+static void tipc_subscrp_get(struct tipc_subscription *subscription);
/**
* htohl - convert value to endianness used by destination
@@ -123,6 +125,7 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
{
struct tipc_name_seq seq;
+ tipc_subscrp_get(sub);
tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq);
if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper))
return;
@@ -132,30 +135,23 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower,
tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref,
node);
+ tipc_subscrp_put(sub);
}
static void tipc_subscrp_timeout(unsigned long data)
{
struct tipc_subscription *sub = (struct tipc_subscription *)data;
- struct tipc_subscriber *subscriber = sub->subscriber;
/* Notify subscriber of timeout */
tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper,
TIPC_SUBSCR_TIMEOUT, 0, 0);
- spin_lock_bh(&subscriber->lock);
- tipc_subscrp_delete(sub);
- spin_unlock_bh(&subscriber->lock);
-
- tipc_subscrb_put(subscriber);
+ tipc_subscrp_put(sub);
}
static void tipc_subscrb_kref_release(struct kref *kref)
{
- struct tipc_subscriber *subcriber = container_of(kref,
- struct tipc_subscriber, kref);
-
- kfree(subcriber);
+ kfree(container_of(kref,struct tipc_subscriber, kref));
}
static void tipc_subscrb_put(struct tipc_subscriber *subscriber)
@@ -168,6 +164,59 @@ static void tipc_subscrb_get(struct tipc_subscriber *subscriber)
kref_get(&subscriber->kref);
}
+static void tipc_subscrp_kref_release(struct kref *kref)
+{
+ struct tipc_subscription *sub = container_of(kref,
+ struct tipc_subscription,
+ kref);
+ struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
+ struct tipc_subscriber *subscriber = sub->subscriber;
+
+ spin_lock_bh(&subscriber->lock);
+ tipc_nametbl_unsubscribe(sub);
+ list_del(&sub->subscrp_list);
+ atomic_dec(&tn->subscription_count);
+ spin_unlock_bh(&subscriber->lock);
+ kfree(sub);
+ tipc_subscrb_put(subscriber);
+}
+
+static void tipc_subscrp_put(struct tipc_subscription *subscription)
+{
+ kref_put(&subscription->kref, tipc_subscrp_kref_release);
+}
+
+static void tipc_subscrp_get(struct tipc_subscription *subscription)
+{
+ kref_get(&subscription->kref);
+}
+
+/* tipc_subscrb_subscrp_delete - delete a specific subscription or all
+ * subscriptions for a given subscriber.
+ */
+static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
+ struct tipc_subscr *s)
+{
+ struct list_head *subscription_list = &subscriber->subscrp_list;
+ struct tipc_subscription *sub, *temp;
+
+ spin_lock_bh(&subscriber->lock);
+ list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) {
+ if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
+ continue;
+
+ tipc_subscrp_get(sub);
+ spin_unlock_bh(&subscriber->lock);
+ tipc_subscrp_delete(sub);
+ tipc_subscrp_put(sub);
+ spin_lock_bh(&subscriber->lock);
+
+ if (s)
+ break;
+ }
+ spin_unlock_bh(&subscriber->lock);
+}
+
static struct tipc_subscriber *tipc_subscrb_create(int conid)
{
struct tipc_subscriber *subscriber;
@@ -177,8 +226,8 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid)
pr_warn("Subscriber rejected, no memory\n");
return NULL;
}
- kref_init(&subscriber->kref);
INIT_LIST_HEAD(&subscriber->subscrp_list);
+ kref_init(&subscriber->kref);
subscriber->conid = conid;
spin_lock_init(&subscriber->lock);
@@ -187,55 +236,22 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid)
static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
{
- struct tipc_subscription *sub, *temp;
- u32 timeout;
-
- spin_lock_bh(&subscriber->lock);
- /* Destroy any existing subscriptions for subscriber */
- list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
- subscrp_list) {
- timeout = htohl(sub->evt.s.timeout, sub->swap);
- if ((timeout == TIPC_WAIT_FOREVER) || del_timer(&sub->timer)) {
- tipc_subscrp_delete(sub);
- tipc_subscrb_put(subscriber);
- }
- }
- spin_unlock_bh(&subscriber->lock);
-
+ tipc_subscrb_subscrp_delete(subscriber, NULL);
tipc_subscrb_put(subscriber);
}
static void tipc_subscrp_delete(struct tipc_subscription *sub)
{
- struct tipc_net *tn = net_generic(sub->net, tipc_net_id);
+ u32 timeout = htohl(sub->evt.s.timeout, sub->swap);
- tipc_nametbl_unsubscribe(sub);
- list_del(&sub->subscrp_list);
- kfree(sub);
- atomic_dec(&tn->subscription_count);
+ if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer))
+ tipc_subscrp_put(sub);
}
static void tipc_subscrp_cancel(struct tipc_subscr *s,
struct tipc_subscriber *subscriber)
{
- struct tipc_subscription *sub, *temp;
- u32 timeout;
-
- spin_lock_bh(&subscriber->lock);
- /* Find first matching subscription, exit if not found */
- list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list,
- subscrp_list) {
- if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
- timeout = htohl(sub->evt.s.timeout, sub->swap);
- if ((timeout == TIPC_WAIT_FOREVER) ||
- del_timer(&sub->timer)) {
- tipc_subscrp_delete(sub);
- tipc_subscrb_put(subscriber);
- }
- break;
- }
- }
- spin_unlock_bh(&subscriber->lock);
+ tipc_subscrb_subscrp_delete(subscriber, s);
}
static struct tipc_subscription *tipc_subscrp_create(struct net *net,
@@ -272,6 +288,7 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net,
sub->swap = swap;
memcpy(&sub->evt.s, s, sizeof(*s));
atomic_inc(&tn->subscription_count);
+ kref_init(&sub->kref);
return sub;
}
@@ -288,17 +305,16 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s,
spin_lock_bh(&subscriber->lock);
list_add(&sub->subscrp_list, &subscriber->subscrp_list);
- tipc_subscrb_get(subscriber);
sub->subscriber = subscriber;
tipc_nametbl_subscribe(sub);
+ tipc_subscrb_get(subscriber);
spin_unlock_bh(&subscriber->lock);
+ setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
timeout = htohl(sub->evt.s.timeout, swap);
- if (timeout == TIPC_WAIT_FOREVER)
- return;
- setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub);
- mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
+ if (timeout != TIPC_WAIT_FOREVER)
+ mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout));
}
/* Handle one termination request for the subscriber */
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index be60103082c9..ffdc214c117a 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -57,6 +57,7 @@ struct tipc_subscriber;
* @evt: template for events generated by subscription
*/
struct tipc_subscription {
+ struct kref kref;
struct tipc_subscriber *subscriber;
struct net *net;
struct timer_list timer;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2d03d5bcb5b9..915abe98174e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -998,7 +998,8 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct path path = { NULL, NULL };
err = -EINVAL;
- if (sunaddr->sun_family != AF_UNIX)
+ if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
+ sunaddr->sun_family != AF_UNIX)
goto out;
if (addr_len == sizeof(short)) {
@@ -1109,6 +1110,10 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
unsigned int hash;
int err;
+ err = -EINVAL;
+ if (alen < offsetofend(struct sockaddr, sa_family))
+ goto out;
+
if (addr->sa_family != AF_UNSPEC) {
err = unix_mkname(sunaddr, alen, &hash);
if (err < 0)
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 4d9679701a6d..384c84e83462 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb,
err = -ENOENT;
if (sk == NULL)
goto out_nosk;
+ if (!net_eq(sock_net(sk), net))
+ goto out;
err = sock_diag_check_cookie(sk, req->udiag_cookie);
if (err)
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 6a0d48525fcf..c36757e72844 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -146,6 +146,7 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
if (s) {
struct unix_sock *u = unix_sk(s);
+ BUG_ON(!atomic_long_read(&u->inflight));
BUG_ON(list_empty(&u->link));
if (atomic_long_dec_and_test(&u->inflight))
@@ -341,6 +342,14 @@ void unix_gc(void)
}
list_del(&cursor);
+ /* Now gc_candidates contains only garbage. Restore original
+ * inflight counters for these as well, and remove the skbuffs
+ * which are creating the cycle(s).
+ */
+ skb_queue_head_init(&hitlist);
+ list_for_each_entry(u, &gc_candidates, link)
+ scan_children(&u->sk, inc_inflight, &hitlist);
+
/* not_cycle_list contains those sockets which do not make up a
* cycle. Restore these to the inflight list.
*/
@@ -350,14 +359,6 @@ void unix_gc(void)
list_move_tail(&u->link, &gc_inflight_list);
}
- /* Now gc_candidates contains only garbage. Restore original
- * inflight counters for these as well, and remove the skbuffs
- * which are creating the cycle(s).
- */
- skb_queue_head_init(&hitlist);
- list_for_each_entry(u, &gc_candidates, link)
- scan_children(&u->sk, inc_inflight, &hitlist);
-
spin_unlock(&unix_gc_lock);
/* Here we are. Hitlist is filled. Die. */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 8a398b3fb532..2f633eec6b7a 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1524,8 +1524,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
long timeout;
int err;
struct vsock_transport_send_notify_data send_data;
-
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
sk = sock->sk;
vsk = vsock_sk(sk);
@@ -1568,11 +1567,10 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
if (err < 0)
goto out;
-
while (total_written < len) {
ssize_t written;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ add_wait_queue(sk_sleep(sk), &wait);
while (vsock_stream_has_space(vsk) == 0 &&
sk->sk_err == 0 &&
!(sk->sk_shutdown & SEND_SHUTDOWN) &&
@@ -1581,33 +1579,30 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
/* Don't wait for non-blocking sockets. */
if (timeout == 0) {
err = -EAGAIN;
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
goto out_err;
}
err = transport->notify_send_pre_block(vsk, &send_data);
if (err < 0) {
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
goto out_err;
}
release_sock(sk);
- timeout = schedule_timeout(timeout);
+ timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
lock_sock(sk);
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
goto out_err;
} else if (timeout == 0) {
err = -EAGAIN;
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
goto out_err;
}
-
- prepare_to_wait(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
}
- finish_wait(sk_sleep(sk), &wait);
+ remove_wait_queue(sk_sleep(sk), &wait);
/* These checks occur both as part of and after the loop
* conditional since we need to check before and after
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index cd7a419faa21..c626f679e1c8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -305,8 +305,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
[NL80211_ATTR_PID] = { .type = NLA_U32 },
[NL80211_ATTR_4ADDR] = { .type = NLA_U8 },
- [NL80211_ATTR_PMKID] = { .type = NLA_BINARY,
- .len = WLAN_PMKID_LEN },
+ [NL80211_ATTR_PMKID] = { .len = WLAN_PMKID_LEN },
[NL80211_ATTR_DURATION] = { .type = NLA_U32 },
[NL80211_ATTR_COOKIE] = { .type = NLA_U64 },
[NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED },
@@ -362,6 +361,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 },
[NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 },
[NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 },
+ [NL80211_ATTR_LOCAL_MESH_POWER_MODE] = {. type = NLA_U32 },
[NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 },
[NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED },
[NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 },
@@ -512,7 +512,7 @@ nl80211_bss_select_policy[NL80211_BSS_SELECT_ATTR_MAX + 1] = {
static const struct nla_policy
nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = {
[NL80211_NAN_FUNC_TYPE] = { .type = NLA_U8 },
- [NL80211_NAN_FUNC_SERVICE_ID] = { .type = NLA_BINARY,
+ [NL80211_NAN_FUNC_SERVICE_ID] = {
.len = NL80211_NAN_FUNC_SERVICE_ID_LEN },
[NL80211_NAN_FUNC_PUBLISH_TYPE] = { .type = NLA_U8 },
[NL80211_NAN_FUNC_PUBLISH_BCAST] = { .type = NLA_FLAG },
@@ -541,6 +541,14 @@ nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = {
[NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED },
};
+/* policy for packet pattern attributes */
+static const struct nla_policy
+nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = {
+ [NL80211_PKTPAT_MASK] = { .type = NLA_BINARY, },
+ [NL80211_PKTPAT_PATTERN] = { .type = NLA_BINARY, },
+ [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 },
+};
+
static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
struct netlink_callback *cb,
struct cfg80211_registered_device **rdev,
@@ -548,21 +556,17 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
{
int err;
- rtnl_lock();
-
if (!cb->args[0]) {
err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
nl80211_fam.attrbuf, nl80211_fam.maxattr,
nl80211_policy);
if (err)
- goto out_unlock;
+ return err;
*wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
nl80211_fam.attrbuf);
- if (IS_ERR(*wdev)) {
- err = PTR_ERR(*wdev);
- goto out_unlock;
- }
+ if (IS_ERR(*wdev))
+ return PTR_ERR(*wdev);
*rdev = wiphy_to_rdev((*wdev)->wiphy);
/* 0 is the first index - add 1 to parse only once */
cb->args[0] = (*rdev)->wiphy_idx + 1;
@@ -572,10 +576,8 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
struct wireless_dev *tmp;
- if (!wiphy) {
- err = -ENODEV;
- goto out_unlock;
- }
+ if (!wiphy)
+ return -ENODEV;
*rdev = wiphy_to_rdev(wiphy);
*wdev = NULL;
@@ -586,21 +588,11 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
}
}
- if (!*wdev) {
- err = -ENODEV;
- goto out_unlock;
- }
+ if (!*wdev)
+ return -ENODEV;
}
return 0;
- out_unlock:
- rtnl_unlock();
- return err;
-}
-
-static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev)
-{
- rtnl_unlock();
}
/* IE validation */
@@ -2584,17 +2576,17 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
int filter_wiphy = -1;
struct cfg80211_registered_device *rdev;
struct wireless_dev *wdev;
+ int ret;
rtnl_lock();
if (!cb->args[2]) {
struct nl80211_dump_wiphy_state state = {
.filter_wiphy = -1,
};
- int ret;
ret = nl80211_dump_wiphy_parse(skb, cb, &state);
if (ret)
- return ret;
+ goto out_unlock;
filter_wiphy = state.filter_wiphy;
@@ -2639,12 +2631,14 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
wp_idx++;
}
out:
- rtnl_unlock();
-
cb->args[0] = wp_idx;
cb->args[1] = if_idx;
- return skb->len;
+ ret = skb->len;
+ out_unlock:
+ rtnl_unlock();
+
+ return ret;
}
static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
@@ -4371,9 +4365,10 @@ static int nl80211_dump_station(struct sk_buff *skb,
int sta_idx = cb->args[2];
int err;
+ rtnl_lock();
err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
if (err)
- return err;
+ goto out_err;
if (!wdev->netdev) {
err = -EINVAL;
@@ -4408,7 +4403,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
cb->args[2] = sta_idx;
err = skb->len;
out_err:
- nl80211_finish_wdev_dump(rdev);
+ rtnl_unlock();
return err;
}
@@ -5179,9 +5174,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
int path_idx = cb->args[2];
int err;
+ rtnl_lock();
err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
if (err)
- return err;
+ goto out_err;
if (!rdev->ops->dump_mpath) {
err = -EOPNOTSUPP;
@@ -5214,7 +5210,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
cb->args[2] = path_idx;
err = skb->len;
out_err:
- nl80211_finish_wdev_dump(rdev);
+ rtnl_unlock();
return err;
}
@@ -5374,9 +5370,10 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
int path_idx = cb->args[2];
int err;
+ rtnl_lock();
err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
if (err)
- return err;
+ goto out_err;
if (!rdev->ops->dump_mpp) {
err = -EOPNOTSUPP;
@@ -5409,7 +5406,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
cb->args[2] = path_idx;
err = skb->len;
out_err:
- nl80211_finish_wdev_dump(rdev);
+ rtnl_unlock();
return err;
}
@@ -6337,6 +6334,10 @@ static int validate_scan_freqs(struct nlattr *freqs)
struct nlattr *attr1, *attr2;
int n_channels = 0, tmp1, tmp2;
+ nla_for_each_nested(attr1, freqs, tmp1)
+ if (nla_len(attr1) != sizeof(u32))
+ return 0;
+
nla_for_each_nested(attr1, freqs, tmp1) {
n_channels++;
/*
@@ -7556,9 +7557,12 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
int start = cb->args[2], idx = 0;
int err;
+ rtnl_lock();
err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
- if (err)
+ if (err) {
+ rtnl_unlock();
return err;
+ }
wdev_lock(wdev);
spin_lock_bh(&rdev->bss_lock);
@@ -7581,7 +7585,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
wdev_unlock(wdev);
cb->args[2] = idx;
- nl80211_finish_wdev_dump(rdev);
+ rtnl_unlock();
return skb->len;
}
@@ -7665,9 +7669,10 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
int res;
bool radio_stats;
+ rtnl_lock();
res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
if (res)
- return res;
+ goto out_err;
/* prepare_wdev_dump parsed the attributes */
radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
@@ -7708,7 +7713,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[2] = survey_idx;
res = skb->len;
out_err:
- nl80211_finish_wdev_dump(rdev);
+ rtnl_unlock();
return res;
}
@@ -10012,7 +10017,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
u8 *mask_pat;
nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
- nla_len(pat), NULL);
+ nla_len(pat), nl80211_packet_pattern_policy);
err = -EINVAL;
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
@@ -10262,7 +10267,7 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
u8 *mask_pat;
nla_parse(pat_tb, MAX_NL80211_PKTPAT, nla_data(pat),
- nla_len(pat), NULL);
+ nla_len(pat), nl80211_packet_pattern_policy);
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
return -EINVAL;
@@ -10388,6 +10393,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
+ if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] ||
+ !tb[NL80211_REKEY_DATA_KCK])
+ return -EINVAL;
if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN)
return -ERANGE;
if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN)
@@ -11299,17 +11307,13 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
void *data = NULL;
unsigned int data_len = 0;
- rtnl_lock();
-
if (cb->args[0]) {
/* subtract the 1 again here */
struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
struct wireless_dev *tmp;
- if (!wiphy) {
- err = -ENODEV;
- goto out_unlock;
- }
+ if (!wiphy)
+ return -ENODEV;
*rdev = wiphy_to_rdev(wiphy);
*wdev = NULL;
@@ -11330,13 +11334,11 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
nl80211_fam.attrbuf, nl80211_fam.maxattr,
nl80211_policy);
if (err)
- goto out_unlock;
+ return err;
if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] ||
- !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
- err = -EINVAL;
- goto out_unlock;
- }
+ !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD])
+ return -EINVAL;
*wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
nl80211_fam.attrbuf);
@@ -11345,10 +11347,8 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
*rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk),
nl80211_fam.attrbuf);
- if (IS_ERR(*rdev)) {
- err = PTR_ERR(*rdev);
- goto out_unlock;
- }
+ if (IS_ERR(*rdev))
+ return PTR_ERR(*rdev);
vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]);
subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
@@ -11361,19 +11361,15 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd)
continue;
- if (!vcmd->dumpit) {
- err = -EOPNOTSUPP;
- goto out_unlock;
- }
+ if (!vcmd->dumpit)
+ return -EOPNOTSUPP;
vcmd_idx = i;
break;
}
- if (vcmd_idx < 0) {
- err = -EOPNOTSUPP;
- goto out_unlock;
- }
+ if (vcmd_idx < 0)
+ return -EOPNOTSUPP;
if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) {
data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
@@ -11390,9 +11386,6 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
/* keep rtnl locked in successful case */
return 0;
- out_unlock:
- rtnl_unlock();
- return err;
}
static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
@@ -11407,9 +11400,10 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
int err;
struct nlattr *vendor_data;
+ rtnl_lock();
err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev);
if (err)
- return err;
+ goto out;
vcmd_idx = cb->args[2];
data = (void *)cb->args[3];
@@ -11418,18 +11412,26 @@ static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV |
WIPHY_VENDOR_CMD_NEED_NETDEV)) {
- if (!wdev)
- return -EINVAL;
+ if (!wdev) {
+ err = -EINVAL;
+ goto out;
+ }
if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV &&
- !wdev->netdev)
- return -EINVAL;
+ !wdev->netdev) {
+ err = -EINVAL;
+ goto out;
+ }
if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
if (wdev->netdev &&
- !netif_running(wdev->netdev))
- return -ENETDOWN;
- if (!wdev->netdev && !wdev->p2p_started)
- return -ENETDOWN;
+ !netif_running(wdev->netdev)) {
+ err = -ENETDOWN;
+ goto out;
+ }
+ if (!wdev->netdev && !wdev->p2p_started) {
+ err = -ENETDOWN;
+ goto out;
+ }
}
}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 35cc1de85dcc..6fd24f6435c3 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -505,11 +505,6 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
return -EOPNOTSUPP;
if (wdev->current_bss) {
- if (!prev_bssid)
- return -EALREADY;
- if (prev_bssid &&
- !ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid))
- return -ENOTCONN;
cfg80211_unhold_bss(wdev->current_bss);
cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub);
wdev->current_bss = NULL;
@@ -1025,11 +1020,35 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
ASSERT_WDEV_LOCK(wdev);
- if (WARN_ON(wdev->connect_keys)) {
- kzfree(wdev->connect_keys);
- wdev->connect_keys = NULL;
+ /*
+ * If we have an ssid_len, we're trying to connect or are
+ * already connected, so reject a new SSID unless it's the
+ * same (which is the case for re-association.)
+ */
+ if (wdev->ssid_len &&
+ (wdev->ssid_len != connect->ssid_len ||
+ memcmp(wdev->ssid, connect->ssid, wdev->ssid_len)))
+ return -EALREADY;
+
+ /*
+ * If connected, reject (re-)association unless prev_bssid
+ * matches the current BSSID.
+ */
+ if (wdev->current_bss) {
+ if (!prev_bssid)
+ return -EALREADY;
+ if (!ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid))
+ return -ENOTCONN;
}
+ /*
+ * Reject if we're in the process of connecting with WEP,
+ * this case isn't very interesting and trying to handle
+ * it would make the code much more complex.
+ */
+ if (wdev->connect_keys)
+ return -EINPROGRESS;
+
cfg80211_oper_and_ht_capa(&connect->ht_capa_mask,
rdev->wiphy.ht_capa_mod_mask);
@@ -1080,7 +1099,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
if (err) {
wdev->connect_keys = NULL;
- wdev->ssid_len = 0;
+ /*
+ * This could be reassoc getting refused, don't clear
+ * ssid_len in that case.
+ */
+ if (!wdev->current_bss)
+ wdev->ssid_len = 0;
return err;
}
@@ -1105,5 +1129,13 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
else if (wdev->current_bss)
err = rdev_disconnect(rdev, dev, reason);
+ /*
+ * Clear ssid_len unless we actually were fully connected,
+ * in which case cfg80211_disconnected() will take care of
+ * this later.
+ */
+ if (!wdev->current_bss)
+ wdev->ssid_len = 0;
+
return err;
}
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 14b3f007826d..2927d06faa6e 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -130,12 +130,10 @@ static int wiphy_resume(struct device *dev)
/* Age scan results with time spent in suspend */
cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at);
- if (rdev->ops->resume) {
- rtnl_lock();
- if (rdev->wiphy.registered)
- ret = rdev_resume(rdev);
- rtnl_unlock();
- }
+ rtnl_lock();
+ if (rdev->wiphy.registered && rdev->ops->resume)
+ ret = rdev_resume(rdev);
+ rtnl_unlock();
return ret;
}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 659b507b347d..c921c2eed15d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -454,6 +454,8 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
if (iftype == NL80211_IFTYPE_MESH_POINT)
skb_copy_bits(skb, hdrlen, &mesh_flags, 1);
+ mesh_flags &= MESH_FLAGS_AE;
+
switch (hdr->frame_control &
cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
case cpu_to_le16(IEEE80211_FCTL_TODS):
@@ -469,9 +471,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
iftype != NL80211_IFTYPE_STATION))
return -1;
if (iftype == NL80211_IFTYPE_MESH_POINT) {
- if (mesh_flags & MESH_FLAGS_AE_A4)
+ if (mesh_flags == MESH_FLAGS_AE_A4)
return -1;
- if (mesh_flags & MESH_FLAGS_AE_A5_A6) {
+ if (mesh_flags == MESH_FLAGS_AE_A5_A6) {
skb_copy_bits(skb, hdrlen +
offsetof(struct ieee80211s_hdr, eaddr1),
tmp.h_dest, 2 * ETH_ALEN);
@@ -487,9 +489,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
ether_addr_equal(tmp.h_source, addr)))
return -1;
if (iftype == NL80211_IFTYPE_MESH_POINT) {
- if (mesh_flags & MESH_FLAGS_AE_A5_A6)
+ if (mesh_flags == MESH_FLAGS_AE_A5_A6)
return -1;
- if (mesh_flags & MESH_FLAGS_AE_A4)
+ if (mesh_flags == MESH_FLAGS_AE_A4)
skb_copy_bits(skb, hdrlen +
offsetof(struct ieee80211s_hdr, eaddr1),
tmp.h_source, ETH_ALEN);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5bf7e1bfeac7..8ce5711ea21b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1248,7 +1248,7 @@ static inline int policy_to_flow_dir(int dir)
}
static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
- const struct flowi *fl)
+ const struct flowi *fl, u16 family)
{
struct xfrm_policy *pol;
@@ -1256,8 +1256,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
again:
pol = rcu_dereference(sk->sk_policy[dir]);
if (pol != NULL) {
- bool match = xfrm_selector_match(&pol->selector, fl,
- sk->sk_family);
+ bool match = xfrm_selector_match(&pol->selector, fl, family);
int err = 0;
if (match) {
@@ -1808,43 +1807,6 @@ free_dst:
goto out;
}
-#ifdef CONFIG_XFRM_SUB_POLICY
-static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
-{
- if (!*target) {
- *target = kmalloc(size, GFP_ATOMIC);
- if (!*target)
- return -ENOMEM;
- }
-
- memcpy(*target, src, size);
- return 0;
-}
-#endif
-
-static int xfrm_dst_update_parent(struct dst_entry *dst,
- const struct xfrm_selector *sel)
-{
-#ifdef CONFIG_XFRM_SUB_POLICY
- struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
- return xfrm_dst_alloc_copy((void **)&(xdst->partner),
- sel, sizeof(*sel));
-#else
- return 0;
-#endif
-}
-
-static int xfrm_dst_update_origin(struct dst_entry *dst,
- const struct flowi *fl)
-{
-#ifdef CONFIG_XFRM_SUB_POLICY
- struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
- return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
-#else
- return 0;
-#endif
-}
-
static int xfrm_expand_policies(const struct flowi *fl, u16 family,
struct xfrm_policy **pols,
int *num_pols, int *num_xfrms)
@@ -1916,16 +1878,6 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
xdst = (struct xfrm_dst *)dst;
xdst->num_xfrms = err;
- if (num_pols > 1)
- err = xfrm_dst_update_parent(dst, &pols[1]->selector);
- else
- err = xfrm_dst_update_origin(dst, fl);
- if (unlikely(err)) {
- dst_free(dst);
- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
- return ERR_PTR(err);
- }
-
xdst->num_pols = num_pols;
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
xdst->policy_genid = atomic_read(&pols[0]->genid);
@@ -2253,7 +2205,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
sk = sk_const_to_full_sk(sk);
if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
num_pols = 1;
- pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+ pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family);
err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
if (err < 0)
@@ -2532,7 +2484,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
pol = NULL;
sk = sk_to_full_sk(sk);
if (sk && sk->sk_policy[dir]) {
- pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+ pol = xfrm_sk_policy_lookup(sk, dir, &fl, family);
if (IS_ERR(pol)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
return 0;
@@ -3062,6 +3014,11 @@ static int __net_init xfrm_net_init(struct net *net)
{
int rv;
+ /* Initialize the per-net locks here */
+ spin_lock_init(&net->xfrm.xfrm_state_lock);
+ spin_lock_init(&net->xfrm.xfrm_policy_lock);
+ mutex_init(&net->xfrm.xfrm_cfg_mutex);
+
rv = xfrm_statistics_init(net);
if (rv < 0)
goto out_statistics;
@@ -3078,11 +3035,6 @@ static int __net_init xfrm_net_init(struct net *net)
if (rv < 0)
goto out;
- /* Initialize the per-net locks here */
- spin_lock_init(&net->xfrm.xfrm_state_lock);
- spin_lock_init(&net->xfrm.xfrm_policy_lock);
- mutex_init(&net->xfrm.xfrm_cfg_mutex);
-
return 0;
out:
@@ -3356,9 +3308,15 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_state *x_new[XFRM_MAX_DEPTH];
struct xfrm_migrate *mp;
+ /* Stage 0 - sanity checks */
if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
goto out;
+ if (dir >= XFRM_POLICY_MAX) {
+ err = -EINVAL;
+ goto out;
+ }
+
/* Stage 1 - find policy */
if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
err = -ENOENT;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 671a1d0333f0..22934885bd3f 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -412,7 +412,14 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es
up = nla_data(rp);
ulen = xfrm_replay_state_esn_len(up);
- if (nla_len(rp) < ulen || xfrm_replay_state_esn_len(replay_esn) != ulen)
+ /* Check the overall length and the internal bitmap length to avoid
+ * potential overflow. */
+ if (nla_len(rp) < ulen ||
+ xfrm_replay_state_esn_len(replay_esn) != ulen ||
+ replay_esn->bmp_len != up->bmp_len)
+ return -EINVAL;
+
+ if (up->replay_window > up->bmp_len * sizeof(__u32) * 8)
return -EINVAL;
return 0;
@@ -1649,32 +1656,34 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
static int xfrm_dump_policy_done(struct netlink_callback *cb)
{
- struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1];
+ struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
struct net *net = sock_net(cb->skb->sk);
xfrm_policy_walk_done(walk, net);
return 0;
}
+static int xfrm_dump_policy_start(struct netlink_callback *cb)
+{
+ struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
+
+ BUILD_BUG_ON(sizeof(*walk) > sizeof(cb->args));
+
+ xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
+ return 0;
+}
+
static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
- struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1];
+ struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args;
struct xfrm_dump_info info;
- BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) >
- sizeof(cb->args) - sizeof(cb->args[0]));
-
info.in_skb = cb->skb;
info.out_skb = skb;
info.nlmsg_seq = cb->nlh->nlmsg_seq;
info.nlmsg_flags = NLM_F_MULTI;
- if (!cb->args[0]) {
- cb->args[0] = 1;
- xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY);
- }
-
(void) xfrm_policy_walk(net, walk, dump_one_policy, &info);
return skb->len;
@@ -2408,6 +2417,7 @@ static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = {
static const struct xfrm_link {
int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
+ int (*start)(struct netlink_callback *);
int (*dump)(struct sk_buff *, struct netlink_callback *);
int (*done)(struct netlink_callback *);
const struct nla_policy *nla_pol;
@@ -2421,6 +2431,7 @@ static const struct xfrm_link {
[XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy },
[XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy },
[XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy,
+ .start = xfrm_dump_policy_start,
.dump = xfrm_dump_policy,
.done = xfrm_dump_policy_done },
[XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi },
@@ -2472,6 +2483,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct netlink_dump_control c = {
+ .start = link->start,
.dump = link->dump,
.done = link->done,
};