summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c64
-rw-r--r--net/core/sock.c3
-rw-r--r--net/ipv4/icmp.c30
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/mac80211/rx.c2
6 files changed, 94 insertions, 14 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 5a13edfc9f73..c74f35799519 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -222,14 +222,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
static inline void rps_lock(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
- spin_lock(&sd->input_pkt_queue.lock);
+ raw_spin_lock(&sd->input_pkt_queue.raw_lock);
#endif
}
static inline void rps_unlock(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
- spin_unlock(&sd->input_pkt_queue.lock);
+ raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
#endif
}
@@ -3034,16 +3034,46 @@ int netif_rx_ni(struct sk_buff *skb)
{
int err;
- preempt_disable();
+ migrate_disable();
err = netif_rx(skb);
if (local_softirq_pending())
- do_softirq();
- preempt_enable();
+ thread_do_softirq();
+ migrate_enable();
return err;
}
EXPORT_SYMBOL(netif_rx_ni);
+#ifdef CONFIG_PREEMPT_RT_FULL
+/*
+ * RT runs ksoftirqd as a real time thread and the root_lock is a
+ * "sleeping spinlock". If the trylock fails then we can go into an
+ * infinite loop when ksoftirqd preempted the task which actually
+ * holds the lock, because we requeue q and raise NET_TX softirq
+ * causing ksoftirqd to loop forever.
+ *
+ * It's safe to use spin_lock on RT here as softirqs run in thread
+ * context and cannot deadlock against the thread which is holding
+ * root_lock.
+ *
+ * On !RT the trylock might fail, but there we bail out from the
+ * softirq loop after 10 attempts which we can't do on RT. And the
+ * task holding root_lock cannot be preempted, so the only downside of
+ * that trylock is that we need 10 loops to decide that we should have
+ * given up in the first one :)
+ */
+static inline int take_root_lock(spinlock_t *lock)
+{
+ spin_lock(lock);
+ return 1;
+}
+#else
+static inline int take_root_lock(spinlock_t *lock)
+{
+ return spin_trylock(lock);
+}
+#endif
+
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -3082,7 +3112,7 @@ static void net_tx_action(struct softirq_action *h)
head = head->next_sched;
root_lock = qdisc_lock(q);
- if (spin_trylock(root_lock)) {
+ if (take_root_lock(root_lock)) {
smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
@@ -3407,7 +3437,7 @@ static void flush_backlog(void *arg)
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev == dev) {
__skb_unlink(skb, &sd->input_pkt_queue);
- kfree_skb(skb);
+ __skb_queue_tail(&sd->tofree_queue, skb);
input_queue_head_incr(sd);
}
}
@@ -3416,10 +3446,13 @@ static void flush_backlog(void *arg)
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev == dev) {
__skb_unlink(skb, &sd->process_queue);
- kfree_skb(skb);
+ __skb_queue_tail(&sd->tofree_queue, skb);
input_queue_head_incr(sd);
}
}
+
+ if (!skb_queue_empty(&sd->tofree_queue))
+ raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
static int napi_gro_complete(struct sk_buff *skb)
@@ -3896,10 +3929,17 @@ static void net_rx_action(struct softirq_action *h)
struct softnet_data *sd = &__get_cpu_var(softnet_data);
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
+ struct sk_buff *skb;
void *have;
local_irq_disable();
+ while ((skb = __skb_dequeue(&sd->tofree_queue))) {
+ local_irq_enable();
+ kfree_skb(skb);
+ local_irq_disable();
+ }
+
while (!list_empty(&sd->poll_list)) {
struct napi_struct *n;
int work, weight;
@@ -6365,6 +6405,9 @@ static int dev_cpu_callback(struct notifier_block *nfb,
netif_rx(skb);
input_queue_head_incr(oldsd);
}
+ while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
+ kfree_skb(skb);
+ }
return NOTIFY_OK;
}
@@ -6631,8 +6674,9 @@ static int __init net_dev_init(void)
struct softnet_data *sd = &per_cpu(softnet_data, i);
memset(sd, 0, sizeof(*sd));
- skb_queue_head_init(&sd->input_pkt_queue);
- skb_queue_head_init(&sd->process_queue);
+ skb_queue_head_init_raw(&sd->input_pkt_queue);
+ skb_queue_head_init_raw(&sd->process_queue);
+ skb_queue_head_init_raw(&sd->tofree_queue);
sd->completion_queue = NULL;
INIT_LIST_HEAD(&sd->poll_list);
sd->output_queue = NULL;
diff --git a/net/core/sock.c b/net/core/sock.c
index b23f174ab84c..a87eb16faabf 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2031,12 +2031,11 @@ void lock_sock_nested(struct sock *sk, int subclass)
if (sk->sk_lock.owned)
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
+ spin_unlock_bh(&sk->sk_lock.slock);
/*
* The sk_lock has mutex_lock() semantics here:
*/
mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
- local_bh_enable();
}
EXPORT_SYMBOL(lock_sock_nested);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index ab188ae12fd9..028eb47226e8 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -67,6 +67,7 @@
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
+#include <linux/sysrq.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
@@ -801,6 +802,30 @@ out_err:
}
/*
+ * 32bit and 64bit have different timestamp length, so we check for
+ * the cookie at offset 20 and verify it is repeated at offset 50
+ */
+#define CO_POS0 20
+#define CO_POS1 50
+#define CO_SIZE sizeof(int)
+#define ICMP_SYSRQ_SIZE 57
+
+/*
+ * We got a ICMP_SYSRQ_SIZE sized ping request. Check for the cookie
+ * pattern and if it matches send the next byte as a trigger to sysrq.
+ */
+static void icmp_check_sysrq(struct net *net, struct sk_buff *skb)
+{
+ int cookie = htonl(net->ipv4.sysctl_icmp_echo_sysrq);
+ char *p = skb->data;
+
+ if (!memcmp(&cookie, p + CO_POS0, CO_SIZE) &&
+ !memcmp(&cookie, p + CO_POS1, CO_SIZE) &&
+ p[CO_POS0 + CO_SIZE] == p[CO_POS1 + CO_SIZE])
+ handle_sysrq(p[CO_POS0 + CO_SIZE]);
+}
+
+/*
* Handle ICMP_ECHO ("ping") requests.
*
* RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo
@@ -827,6 +852,11 @@ static void icmp_echo(struct sk_buff *skb)
icmp_param.data_len = skb->len;
icmp_param.head_len = sizeof(struct icmphdr);
icmp_reply(&icmp_param, skb);
+
+ if (skb->len == ICMP_SYSRQ_SIZE &&
+ net->ipv4.sysctl_icmp_echo_sysrq) {
+ icmp_check_sysrq(net, skb);
+ }
}
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 94cdbc55ca7e..5cb9301bb28c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -251,7 +251,7 @@ struct rt_hash_bucket {
};
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
- defined(CONFIG_PROVE_LOCKING)
+ defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_PREEMPT_RT_FULL)
/*
* Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
* The size of this table is a power of two and depends on the number of CPUS.
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 69fd7201129a..0ecdb72e032a 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -680,6 +680,13 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "icmp_echo_sysrq",
+ .data = &init_net.ipv4.sysctl_icmp_echo_sysrq,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "icmp_ignore_bogus_error_responses",
.data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
.maxlen = sizeof(int),
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fb123e2e081a..be01bee78a2c 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2952,7 +2952,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
struct ieee80211_supported_band *sband;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- WARN_ON_ONCE(softirq_count() == 0);
+ WARN_ON_ONCE_NONRT(softirq_count() == 0);
if (WARN_ON(status->band < 0 ||
status->band >= IEEE80211_NUM_BANDS))