From c9fdec9f3970eeaa1b176422f46167f5f5158804 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 20 Jul 2015 12:14:39 +0300 Subject: iwlwifi: pcie: fix prepare card flow When the card is not owned by the PCIe bus, we need to acquire ownership first. This flow is implemented in iwl_pcie_prepare_card_hw. Because of a hardware bug, we need to disable link power management before we can request ownership otherwise the other user of the device won't get notified that we are requesting the device which will prevent us from acquire ownership. Same holds for the down flow where we need to make sure that any other potential user is notified that the driver is going down. CC: [4.1] Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/trans.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 6203c4ad9bba..9e144e71da0b 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -478,10 +478,16 @@ static void iwl_pcie_apm_stop(struct iwl_trans *trans, bool op_mode_leave) if (trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) iwl_set_bits_prph(trans, APMG_PCIDEV_STT_REG, APMG_PCIDEV_STT_VAL_WAKE_ME); - else if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) + else if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) { + iwl_set_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, CSR_HW_IF_CONFIG_REG_PREPARE | CSR_HW_IF_CONFIG_REG_ENABLE_PME); + mdelay(1); + iwl_clear_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + } mdelay(5); } @@ -575,6 +581,10 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) if (ret >= 0) return 0; + iwl_set_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + msleep(1); + for (iter = 0; iter < 10; iter++) { /* If HW is not ready, prepare the conditions to check again */ iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG, @@ -582,8 +592,10 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) do { ret = iwl_pcie_set_hw_ready(trans); - if (ret >= 0) - return 0; + if (ret >= 0) { + ret = 0; + goto out; + } usleep_range(200, 1000); t += 200; @@ -593,6 +605,10 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) IWL_ERR(trans, "Couldn't prepare the card\n"); +out: + iwl_clear_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, + CSR_RESET_LINK_PWR_MGMT_DISABLED); + return ret; } -- cgit v1.2.3 From dc9f69b907f3853952d3ffb7918d05a662146712 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Tue, 7 Jul 2015 16:53:42 +0300 Subject: iwlwifi: mvm: Fix regular scan priority The code checks the total number of iterations to differentiate between regular scan and scheduled scan. However, regular scan has a total of one iteration, not zero. As a result, regular scan will have lower priority than it should have, and in case scheduled scan is already running when regular scan is requested, regular scan will be delayed until scheduled scan is aborted. Fix that by checking for total iterations number of one as an identifier for regular scan. Fixes: 133c8259f885 ("iwlwifi: mvm: rename generic_scan_cmd functions to dwell") Signed-off-by: Avraham Stern Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/mvm/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c index 5000bfcded61..5514ad6d4e54 100644 --- a/drivers/net/wireless/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/iwlwifi/mvm/scan.c @@ -1023,7 +1023,7 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm, cmd->scan_priority = iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6); - if (iwl_mvm_scan_total_iterations(params) == 0) + if (iwl_mvm_scan_total_iterations(params) == 1) cmd->ooc_priority = iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6); else -- cgit v1.2.3 From f0ad462189cc898aa0ef8ced849533ee03392bcc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 23 Jul 2015 13:06:10 +0200 Subject: netfilter: nf_conntrack: silence warning on falling back to vmalloc() Since 88eab472ec21 ("netfilter: conntrack: adjust nf_conntrack_buckets default value"), the hashtable can easily hit this warning. We got reports from users that are getting this message in a quite spamming fashion, so better silence this. Signed-off-by: Pablo Neira Ayuso Acked-by: Florian Westphal --- net/netfilter/nf_conntrack_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 651039ad1681..f1680995fc49 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1544,10 +1544,8 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) sz = nr_slots * sizeof(struct hlist_nulls_head); hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, get_order(sz)); - if (!hash) { - printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); + if (!hash) hash = vzalloc(sz); - } if (hash && nulls) for (i = 0; i < nr_slots; i++) -- cgit v1.2.3 From 1a727c63612fc582370cf3dc01239d3d239743b5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 28 Jul 2015 01:42:28 +0300 Subject: netfilter: nf_conntrack: checking for IS_ERR() instead of NULL We recently changed this from nf_conntrack_alloc() to nf_ct_tmpl_alloc() so the error handling needs to changed to check for NULL instead of IS_ERR(). Fixes: 0838aa7fcfcd ('netfilter: fix netns dependencies with conntrack templates') Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_synproxy_core.c | 4 +--- net/netfilter/xt_CT.c | 5 +++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 71f1e9fdfa18..d7f168527903 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -353,10 +353,8 @@ static int __net_init synproxy_net_init(struct net *net) int err = -ENOMEM; ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL); - if (IS_ERR(ct)) { - err = PTR_ERR(ct); + if (!ct) goto err1; - } if (!nfct_seqadj_ext_add(ct)) goto err2; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index c6630030c912..43ddeee404e9 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -202,9 +202,10 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, goto err1; ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL); - ret = PTR_ERR(ct); - if (IS_ERR(ct)) + if (!ct) { + ret = -ENOMEM; goto err2; + } ret = 0; if ((info->ct_events || info->exp_events) && -- cgit v1.2.3 From aecdc63d87891c75e60906973c7b7c9cd58403d6 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 29 Jul 2015 23:06:41 +0300 Subject: iwlwifi: pcie: fix stuck queue detection for sleeping clients The stuck queue detection mechanism allows to detect queues that are stuck. For sleeping clients, a queue may rightfully be stuck: if a poor client implementation stays asleep for more than 10s, then we don't want to trigger recovery flows because of that client. In order to cope with this, I added a mechanism that monitors the state of the client: when a client goes to sleep, the timer of his queues is frozen. When he wakes up, the timer is reset to the right value so that if a client was awake for more than 10s and the queues are stuck, only then, the recovery flow will kick in. This is valid only on non-shared queues: A-MPDU queues. There was a bug in case we Tx to a sleeping client that has an empty A-MPDU queue: the timer was armed to now + 10s. This is bad, but pretty harmless. The problem is that when the client wakes up, the timer is modified to be now + remainder. But remainder is 0 since the queue was empty when that client went to sleep... Fix this by checking the state of the client before playing with the timer when we add a packet to an empty queue. Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/iwlwifi/pcie/tx.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/tx.c b/drivers/net/wireless/iwlwifi/pcie/tx.c index 2b86c2135de3..607acb53c847 100644 --- a/drivers/net/wireless/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/iwlwifi/pcie/tx.c @@ -1875,8 +1875,19 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, /* start timer if queue currently empty */ if (q->read_ptr == q->write_ptr) { - if (txq->wd_timeout) - mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); + if (txq->wd_timeout) { + /* + * If the TXQ is active, then set the timer, if not, + * set the timer in remainder so that the timer will + * be armed with the right value when the station will + * wake up. + */ + if (!txq->frozen) + mod_timer(&txq->stuck_timer, + jiffies + txq->wd_timeout); + else + txq->frozen_expiry_remainder = txq->wd_timeout; + } IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", q->id); iwl_trans_pcie_ref(trans); } -- cgit v1.2.3 From 5d5cd85ff441534a52f23f821d0a7c644d3b6cce Mon Sep 17 00:00:00 2001 From: Mike Looijmans Date: Tue, 28 Jul 2015 07:51:01 +0200 Subject: rsi: Fix failure to load firmware after memory leak fix and fix the leak Fixes commit eae79b4f3e82 ("rsi: fix memory leak in rsi_load_ta_instructions()") which stopped the driver from functioning. Firmware data has been allocated using vmalloc(), resulting in memory that cannot be used for DMA. Hence the firmware was first copied to a buffer allocated with kmalloc() in the original code. This patch reverts the commit and only calls "kfree()" to release the buffer after sending the data. This fixes the memory leak without breaking the driver. Add a comment to the kmemdup() calls to explain why this is done, and abort if memory allocation fails. Tested on a Topic Miami-Florida board which contains the rsi SDIO chip. Also added the same kfree() call to the USB glue driver. This was not tested on actual hardware though, as I only have the SDIO version. Fixes: eae79b4f3e82 ("rsi: fix memory leak in rsi_load_ta_instructions()") Signed-off-by: Mike Looijmans Cc: stable@vger.kernel.org Signed-off-by: Kalle Valo --- drivers/net/wireless/rsi/rsi_91x_sdio_ops.c | 8 +++++++- drivers/net/wireless/rsi/rsi_91x_usb_ops.c | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c index b6cc9ff47fc2..1c6788aecc62 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio_ops.c @@ -172,6 +172,7 @@ static int rsi_load_ta_instructions(struct rsi_common *common) (struct rsi_91x_sdiodev *)adapter->rsi_dev; u32 len; u32 num_blocks; + const u8 *fw; const struct firmware *fw_entry = NULL; u32 block_size = dev->tx_blk_size; int status = 0; @@ -200,6 +201,10 @@ static int rsi_load_ta_instructions(struct rsi_common *common) return status; } + /* Copy firmware into DMA-accessible memory */ + fw = kmemdup(fw_entry->data, fw_entry->size, GFP_KERNEL); + if (!fw) + return -ENOMEM; len = fw_entry->size; if (len % 4) @@ -210,7 +215,8 @@ static int rsi_load_ta_instructions(struct rsi_common *common) rsi_dbg(INIT_ZONE, "%s: Instruction size:%d\n", __func__, len); rsi_dbg(INIT_ZONE, "%s: num blocks: %d\n", __func__, num_blocks); - status = rsi_copy_to_card(common, fw_entry->data, len, num_blocks); + status = rsi_copy_to_card(common, fw, len, num_blocks); + kfree(fw); release_firmware(fw_entry); return status; } diff --git a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c index 1106ce76707e..30c2cf7fa93b 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb_ops.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb_ops.c @@ -146,7 +146,10 @@ static int rsi_load_ta_instructions(struct rsi_common *common) return status; } + /* Copy firmware into DMA-accessible memory */ fw = kmemdup(fw_entry->data, fw_entry->size, GFP_KERNEL); + if (!fw) + return -ENOMEM; len = fw_entry->size; if (len % 4) @@ -158,6 +161,7 @@ static int rsi_load_ta_instructions(struct rsi_common *common) rsi_dbg(INIT_ZONE, "%s: num blocks: %d\n", __func__, num_blocks); status = rsi_copy_to_card(common, fw, len, num_blocks); + kfree(fw); release_firmware(fw_entry); return status; } -- cgit v1.2.3 From 098697dbad9070249eb07a0241c4001aa367bb89 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Wed, 29 Jul 2015 23:36:30 +0200 Subject: b43: fix extpa_gain check for 2GHz On the 2GHz and and on the 5GHZ band only the extpa_gain setting from the 5GHz band was checked. this patch makes it check the property from the correct band. Signed-off-by: Hauke Mehrtens Signed-off-by: Kalle Valo --- drivers/net/wireless/b43/tables_nphy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/b43/tables_nphy.c b/drivers/net/wireless/b43/tables_nphy.c index 25d1cbd34306..b2f0d245bcf3 100644 --- a/drivers/net/wireless/b43/tables_nphy.c +++ b/drivers/net/wireless/b43/tables_nphy.c @@ -3728,7 +3728,7 @@ const u32 *b43_nphy_get_tx_gain_table(struct b43_wldev *dev) switch (phy->rev) { case 6: case 5: - if (sprom->fem.ghz5.extpa_gain == 3) + if (sprom->fem.ghz2.extpa_gain == 3) return b43_ntab_tx_gain_epa_rev3_hi_pwr_2g; /* fall through */ case 4: -- cgit v1.2.3 From 7c62940165e9ae4004ce4e6b5117330bab94df68 Mon Sep 17 00:00:00 2001 From: Luis Felipe Dominguez Vega Date: Wed, 29 Jul 2015 21:11:20 -0500 Subject: rtlwifi: Fix NULL dereference when PCI driver used as an AP In commit 33511b157bbcebaef853cc1811992b664a2e5862 ("rtlwifi: add support to send beacon frame"), the mechanism for sending beacons was established. That patch works correctly for rtl8192cu, but there is a possibility of getting the following warnings in the PCI drivers: WARNING: CPU: 1 PID: 2439 at net/mac80211/driver-ops.h:12 ieee80211_bss_info_change_notify+0x179/0x1d0 [mac80211]() wlp5s0: Failed check-sdata-in-driver check, flags: 0x0 The warning is followed by a NULL pointer dereference as follows: BUG: unable to handle kernel NULL pointer dereference at 0000000000000006 IP: [] rtl_get_tcb_desc+0x5e/0x760 [rtlwifi] This problem was reported at http://thread.gmane.org/gmane.linux.kernel.wireless.general/138645, but no solution was found at that time. The problem was also reported at https://bugzilla.kernel.org/show_bug.cgi?id=9744 and this solution was developed and tested there. The USB driver works with a NULL final argument in the adapter_tx() callback; however, the PCI drivers need a struct rtl_tcb_desc in that position. Fixes: 33511b157bbc ("rtlwifi: add support to send beacon frame.") Signed-off-by: Luis Felipe Dominguez Vega Signed-off-by: Larry Finger Cc: Stable [3.19+] Signed-off-by: Kalle Valo --- drivers/net/wireless/rtlwifi/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c index 3b3a88b53b11..585d0883c7e5 100644 --- a/drivers/net/wireless/rtlwifi/core.c +++ b/drivers/net/wireless/rtlwifi/core.c @@ -1015,9 +1015,12 @@ static void send_beacon_frame(struct ieee80211_hw *hw, { struct rtl_priv *rtlpriv = rtl_priv(hw); struct sk_buff *skb = ieee80211_beacon_get(hw, vif); + struct rtl_tcb_desc tcb_desc; - if (skb) - rtlpriv->intf_ops->adapter_tx(hw, NULL, skb, NULL); + if (skb) { + memset(&tcb_desc, 0, sizeof(struct rtl_tcb_desc)); + rtlpriv->intf_ops->adapter_tx(hw, NULL, skb, &tcb_desc); + } } static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, -- cgit v1.2.3 From 1ebd47efa4e17391dfac8caa349c6a8d35f996d1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 2 Aug 2015 19:29:16 +0200 Subject: rocker: free netdevice during netdevice removal When removing a port's netdevice in 'rocker_remove_ports', we should also free the allocated 'net_device' structure. Do that by calling 'free_netdev' after unregistering it. Signed-off-by: Ido Schimmel Signed-off-by: Jiri Pirko Fixes: 4b8ac9660af ("rocker: introduce rocker switch driver") Acked-by: Scott Feldman Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c index 2d8578cade03..2e7f9a2834be 100644 --- a/drivers/net/ethernet/rocker/rocker.c +++ b/drivers/net/ethernet/rocker/rocker.c @@ -4821,6 +4821,7 @@ static void rocker_remove_ports(const struct rocker *rocker) rocker_port_ig_tbl(rocker_port, SWITCHDEV_TRANS_NONE, ROCKER_OP_FLAG_REMOVE); unregister_netdev(rocker_port->dev); + free_netdev(rocker_port->dev); } kfree(rocker->ports); } -- cgit v1.2.3 From 3d0e0af40672a0bf16ca0f0591165535138c1f30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Jul 2015 17:53:39 -0700 Subject: fq_codel: explicitly reset flows in ->reset() Alex reported the following crash when using fq_codel with htb: crash> bt PID: 630839 TASK: ffff8823c990d280 CPU: 14 COMMAND: "tc" [... snip ...] #8 [ffff8820ceec17a0] page_fault at ffffffff8160a8c2 [exception RIP: htb_qlen_notify+24] RIP: ffffffffa0841718 RSP: ffff8820ceec1858 RFLAGS: 00010282 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88241747b400 RDX: ffff88241747b408 RSI: 0000000000000000 RDI: ffff8811fb27d000 RBP: ffff8820ceec1868 R8: ffff88120cdeff24 R9: ffff88120cdeff30 R10: 0000000000000bd4 R11: ffffffffa0840919 R12: ffffffffa0843340 R13: 0000000000000000 R14: 0000000000000001 R15: ffff8808dae5c2e8 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #9 [...] qdisc_tree_decrease_qlen at ffffffff81565375 #10 [...] fq_codel_dequeue at ffffffffa084e0a0 [sch_fq_codel] #11 [...] fq_codel_reset at ffffffffa084e2f8 [sch_fq_codel] #12 [...] qdisc_destroy at ffffffff81560d2d #13 [...] htb_destroy_class at ffffffffa08408f8 [sch_htb] #14 [...] htb_put at ffffffffa084095c [sch_htb] #15 [...] tc_ctl_tclass at ffffffff815645a3 #16 [...] rtnetlink_rcv_msg at ffffffff81552cb0 [... snip ...] As Jamal pointed out, there is actually no need to call dequeue to purge the queued skb's in reset, data structures can be just reset explicitly. Therefore, we reset everything except config's and stats, so that we would have a fresh start after device flipping. Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") Reported-by: Alex Gartrell Cc: Alex Gartrell Cc: Jamal Hadi Salim Signed-off-by: Eric Dumazet [xiyou.wangcong@gmail.com: added codel_vars_init() and qdisc_qstats_backlog_dec()] Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 21ca33c9f036..a9ba030435a2 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -288,10 +288,26 @@ begin: static void fq_codel_reset(struct Qdisc *sch) { - struct sk_buff *skb; + struct fq_codel_sched_data *q = qdisc_priv(sch); + int i; - while ((skb = fq_codel_dequeue(sch)) != NULL) - kfree_skb(skb); + INIT_LIST_HEAD(&q->new_flows); + INIT_LIST_HEAD(&q->old_flows); + for (i = 0; i < q->flows_cnt; i++) { + struct fq_codel_flow *flow = q->flows + i; + + while (flow->head) { + struct sk_buff *skb = dequeue_head(flow); + + qdisc_qstats_backlog_dec(sch, skb); + kfree_skb(skb); + } + + INIT_LIST_HEAD(&flow->flowchain); + codel_vars_init(&flow->cvars); + } + memset(q->backlogs, 0, q->flows_cnt * sizeof(u32)); + sch->q.qlen = 0; } static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = { -- cgit v1.2.3 From 741e3b9902d11585e18bfc7f8d47e913616bb070 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 2 Aug 2015 13:24:13 -0500 Subject: rtlwifi: rtl8723be: Add module parameter for MSI interrupts The driver code allows for the disabling of MSI interrupts; however the module_parm line was missed and the option fails to show with modinfo. Signed-off-by: Larry Finger Cc: Stable [3.15+] Signed-off-by: Kalle Valo --- drivers/net/wireless/rtlwifi/rtl8723be/sw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/rtlwifi/rtl8723be/sw.c index 1017f02d7bf7..7bf88d9dcdc3 100644 --- a/drivers/net/wireless/rtlwifi/rtl8723be/sw.c +++ b/drivers/net/wireless/rtlwifi/rtl8723be/sw.c @@ -385,6 +385,7 @@ module_param_named(debug, rtl8723be_mod_params.debug, int, 0444); module_param_named(ips, rtl8723be_mod_params.inactiveps, bool, 0444); module_param_named(swlps, rtl8723be_mod_params.swctrl_lps, bool, 0444); module_param_named(fwlps, rtl8723be_mod_params.fwctrl_lps, bool, 0444); +module_param_named(msi, rtl8723be_mod_params.msi_support, bool, 0444); module_param_named(disable_watchdog, rtl8723be_mod_params.disable_watchdog, bool, 0444); MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n"); -- cgit v1.2.3 From 3576fd794b38306e196498ac54bb3b21c32e1ae4 Mon Sep 17 00:00:00 2001 From: Glenn Griffin Date: Mon, 3 Aug 2015 09:56:54 -0700 Subject: openvswitch: Fix L4 checksum handling when dealing with IP fragments openvswitch modifies the L4 checksum of a packet when modifying the ip address. When an IP packet is fragmented only the first fragment contains an L4 header and checksum. Prior to this change openvswitch would modify all fragments, modifying application data in non-first fragments, causing checksum failures in the reassembled packet. Signed-off-by: Glenn Griffin Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 8a8c0b8b4f63..ee34f474ad14 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -273,28 +273,36 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } -static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, - __be32 *addr, __be32 new_addr) +static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, + __be32 addr, __be32 new_addr) { int transport_len = skb->len - skb_transport_offset(skb); + if (nh->frag_off & htons(IP_OFFSET)) + return; + if (nh->protocol == IPPROTO_TCP) { if (likely(transport_len >= sizeof(struct tcphdr))) inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb, - *addr, new_addr, 1); + addr, new_addr, 1); } else if (nh->protocol == IPPROTO_UDP) { if (likely(transport_len >= sizeof(struct udphdr))) { struct udphdr *uh = udp_hdr(skb); if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace4(&uh->check, skb, - *addr, new_addr, 1); + addr, new_addr, 1); if (!uh->check) uh->check = CSUM_MANGLED_0; } } } +} +static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh, + __be32 *addr, __be32 new_addr) +{ + update_ip_l4_checksum(skb, nh, *addr, new_addr); csum_replace4(&nh->check, *addr, new_addr); skb_clear_hash(skb); *addr = new_addr; -- cgit v1.2.3 From 636dba8e12d797357b2063981476390f11262c08 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 30 Jul 2015 17:12:20 -0700 Subject: act_mirred: avoid calling tcf_hash_release() when binding When we share an action within a filter, the bind refcnt should increase, therefore we should not call tcf_hash_release(). Cc: Jamal Hadi Salim Cc: Daniel Borkmann Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index a42a3b257226..268545050ddb 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -98,6 +98,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return ret; ret = ACT_P_CREATED; } else { + if (bind) + return 0; if (!ovr) { tcf_hash_release(a, bind); return -EEXIST; -- cgit v1.2.3 From 468b732b6f76b138c0926eadf38ac88467dcd271 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 1 Aug 2015 15:33:26 +0300 Subject: rds: fix an integer overflow test in rds_info_getsockopt() "len" is a signed integer. We check that len is not negative, so it goes from zero to INT_MAX. PAGE_SIZE is unsigned long so the comparison is type promoted to unsigned long. ULONG_MAX - 4095 is a higher than INT_MAX so the condition can never be true. I don't know if this is harmful but it seems safe to limit "len" to INT_MAX - 4095. Fixes: a8c879a7ee98 ('RDS: Info and stats') Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/rds/info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/info.c b/net/rds/info.c index 9a6b4f66187c..140a44a5f7b7 100644 --- a/net/rds/info.c +++ b/net/rds/info.c @@ -176,7 +176,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, /* check for all kinds of wrapping and the like */ start = (unsigned long)optval; - if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { + if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) { ret = -EINVAL; goto out; } -- cgit v1.2.3 From 2fc09962e24ace45154d0c16024f1eb15700f3e8 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Mon, 3 Aug 2015 11:18:12 +0800 Subject: 3c59x: Fix resource leaks in vortex_open When vortex_up is failed, the skb buffers allocated by __netdev_alloc_skb in vortex_open are not released, which may cause resource leaks. This bug has been submitted before. This patch modifies the error handling code to fix it. Signed-off-by: Jia-Ju Bai Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c59x.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 2d1ce3c5d0dd..753887d02b46 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -1763,16 +1763,9 @@ vortex_open(struct net_device *dev) vp->rx_ring[i].addr = cpu_to_le32(pci_map_single(VORTEX_PCI(vp), skb->data, PKT_BUF_SZ, PCI_DMA_FROMDEVICE)); } if (i != RX_RING_SIZE) { - int j; pr_emerg("%s: no memory for rx ring\n", dev->name); - for (j = 0; j < i; j++) { - if (vp->rx_skbuff[j]) { - dev_kfree_skb(vp->rx_skbuff[j]); - vp->rx_skbuff[j] = NULL; - } - } retval = -ENOMEM; - goto err_free_irq; + goto err_free_skb; } /* Wrap the ring. */ vp->rx_ring[i-1].next = cpu_to_le32(vp->rx_ring_dma); @@ -1782,7 +1775,13 @@ vortex_open(struct net_device *dev) if (!retval) goto out; -err_free_irq: +err_free_skb: + for (i = 0; i < RX_RING_SIZE; i++) { + if (vp->rx_skbuff[i]) { + dev_kfree_skb(vp->rx_skbuff[i]); + vp->rx_skbuff[i] = NULL; + } + } free_irq(dev->irq, dev); err: if (vortex_debug > 1) -- cgit v1.2.3 From 10e2eb878f3ca07ac2f05fa5ca5e6c4c9174a27a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 1 Aug 2015 12:14:33 +0200 Subject: udp: fix dst races with multicast early demux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multicast dst are not cached. They carry DST_NOCACHE. As mentioned in commit f8864972126899 ("ipv4: fix dst race in sk_dst_get()"), these dst need special care before caching them into a socket. Caching them is allowed only if their refcnt was not 0, ie we must use atomic_inc_not_zero() Also, we must use READ_ONCE() to fetch sk->sk_rx_dst, as mentioned in commit d0c294c53a771 ("tcp: prevent fetching dst twice in early demux code") Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Tested-by: Gregory Hoggarth Signed-off-by: Eric Dumazet Reported-by: Gregory Hoggarth Reported-by: Alex Gartrell Cc: Michal Kubeček Signed-off-by: David S. Miller --- net/ipv4/udp.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 83aa604f9273..1b8c5ba7d5f7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1995,12 +1995,19 @@ void udp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_efree; - dst = sk->sk_rx_dst; + dst = READ_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); - if (dst) - skb_dst_set_noref(skb, dst); + if (dst) { + /* DST_NOCACHE can not be used without taking a reference */ + if (dst->flags & DST_NOCACHE) { + if (likely(atomic_inc_not_zero(&dst->__refcnt))) + skb_dst_set(skb, dst); + } else { + skb_dst_set_noref(skb, dst); + } + } } int udp_rcv(struct sk_buff *skb) -- cgit v1.2.3 From 2475b22526d70234ecfe4a1ff88aed69badefba9 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 3 Aug 2015 15:38:03 +0100 Subject: xen-netback: Allocate fraglist early to avoid complex rollback Determine if a fraglist is needed in the tx path, and allocate it if necessary before setting up the copy and map operations. Otherwise, undoing the copy and map operations is tricky. This fixes a use-after-free: if allocating the fraglist failed, the copy and map operations that had been set up were still executed, writing over the data area of a freed skb. Signed-off-by: Ross Lagerwall Signed-off-by: David S. Miller --- drivers/net/xen-netback/netback.c | 61 +++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 7d50711476fe..1b406e706a01 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -810,23 +810,17 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, struct sk_buff *skb, struct xen_netif_tx_request *txp, - struct gnttab_map_grant_ref *gop) + struct gnttab_map_grant_ref *gop, + unsigned int frag_overflow, + struct sk_buff *nskb) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; int start; pending_ring_idx_t index; - unsigned int nr_slots, frag_overflow = 0; + unsigned int nr_slots; - /* At this point shinfo->nr_frags is in fact the number of - * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. - */ - if (shinfo->nr_frags > MAX_SKB_FRAGS) { - frag_overflow = shinfo->nr_frags - MAX_SKB_FRAGS; - BUG_ON(frag_overflow > MAX_SKB_FRAGS); - shinfo->nr_frags = MAX_SKB_FRAGS; - } nr_slots = shinfo->nr_frags; /* Skip first skb fragment if it is on same page as header fragment. */ @@ -841,13 +835,6 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que } if (frag_overflow) { - struct sk_buff *nskb = xenvif_alloc_skb(0); - if (unlikely(nskb == NULL)) { - if (net_ratelimit()) - netdev_err(queue->vif->dev, - "Can't allocate the frag_list skb.\n"); - return NULL; - } shinfo = skb_shinfo(nskb); frags = shinfo->frags; @@ -1175,9 +1162,10 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, unsigned *copy_ops, unsigned *map_ops) { - struct gnttab_map_grant_ref *gop = queue->tx_map_ops, *request_gop; - struct sk_buff *skb; + struct gnttab_map_grant_ref *gop = queue->tx_map_ops; + struct sk_buff *skb, *nskb; int ret; + unsigned int frag_overflow; while (skb_queue_len(&queue->tx_queue) < budget) { struct xen_netif_tx_request txreq; @@ -1265,6 +1253,29 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, break; } + skb_shinfo(skb)->nr_frags = ret; + if (data_len < txreq.size) + skb_shinfo(skb)->nr_frags++; + /* At this point shinfo->nr_frags is in fact the number of + * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. + */ + frag_overflow = 0; + nskb = NULL; + if (skb_shinfo(skb)->nr_frags > MAX_SKB_FRAGS) { + frag_overflow = skb_shinfo(skb)->nr_frags - MAX_SKB_FRAGS; + BUG_ON(frag_overflow > MAX_SKB_FRAGS); + skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS; + nskb = xenvif_alloc_skb(0); + if (unlikely(nskb == NULL)) { + kfree_skb(skb); + xenvif_tx_err(queue, &txreq, idx); + if (net_ratelimit()) + netdev_err(queue->vif->dev, + "Can't allocate the frag_list skb.\n"); + break; + } + } + if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { struct xen_netif_extra_info *gso; gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; @@ -1272,6 +1283,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, if (xenvif_set_skb_gso(queue->vif, skb, gso)) { /* Failure in xenvif_set_skb_gso is fatal. */ kfree_skb(skb); + kfree_skb(nskb); break; } } @@ -1294,9 +1306,7 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, (*copy_ops)++; - skb_shinfo(skb)->nr_frags = ret; if (data_len < txreq.size) { - skb_shinfo(skb)->nr_frags++; frag_set_pending_idx(&skb_shinfo(skb)->frags[0], pending_idx); xenvif_tx_create_map_op(queue, pending_idx, &txreq, gop); @@ -1310,13 +1320,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, queue->pending_cons++; - request_gop = xenvif_get_requests(queue, skb, txfrags, gop); - if (request_gop == NULL) { - kfree_skb(skb); - xenvif_tx_err(queue, &txreq, idx); - break; - } - gop = request_gop; + gop = xenvif_get_requests(queue, skb, txfrags, gop, + frag_overflow, nskb); __skb_queue_tail(&queue->tx_queue, skb); -- cgit v1.2.3 From f202a666e933f3c7557126d63833a6a3b577ac15 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 16 Jun 2015 21:06:24 +0200 Subject: batman-adv: avoid DAT to mess up LAN state When a node running DAT receives an ARP request from the LAN for the first time, it is likely that this node will request the ARP entry through the distributed ARP table (DAT) in the mesh. Once a DAT reply is received the asking node must check if the MAC address for which the IP address has been asked is local. If it is, the node must drop the ARP reply bceause the client should have replied on its own locally. Forwarding this reply means fooling any L2 bridge (e.g. Ethernet switches) lying between the batman-adv node and the LAN. This happens because the L2 bridge will think that the client sending the ARP reply lies somewhere in the mesh, while this node is sitting in the same LAN. Reported-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/distributed-arp-table.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index fb54e6aed096..6d0b471eede8 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -1138,6 +1138,9 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @skb: packet to check * @hdr_size: size of the encapsulation header + * + * Returns true if the packet was snooped and consumed by DAT. False if the + * packet has to be delivered to the interface */ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -1145,7 +1148,7 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, uint16_t type; __be32 ip_src, ip_dst; uint8_t *hw_src, *hw_dst; - bool ret = false; + bool dropped = false; unsigned short vid; if (!atomic_read(&bat_priv->distributed_arp_table)) @@ -1174,12 +1177,17 @@ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, /* if this REPLY is directed to a client of mine, let's deliver the * packet to the interface */ - ret = !batadv_is_my_client(bat_priv, hw_dst, vid); + dropped = !batadv_is_my_client(bat_priv, hw_dst, vid); + + /* if this REPLY is sent on behalf of a client of mine, let's drop the + * packet because the client will reply by itself + */ + dropped |= batadv_is_my_client(bat_priv, hw_src, vid); out: - if (ret) + if (dropped) kfree_skb(skb); - /* if ret == false -> packet has to be delivered to the interface */ - return ret; + /* if dropped == false -> deliver to the interface */ + return dropped; } /** -- cgit v1.2.3 From 354136bcc3c4f40a2813bba8f57ca5267d812d15 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Tue, 9 Jun 2015 21:24:36 +0800 Subject: batman-adv: fix kernel crash due to missing NULL checks batadv_softif_vlan_get() may return NULL which has to be verified by the caller. Fixes: 35df3b298fc8 ("batman-adv: fix TT VLAN inconsistency on VLAN re-add") Reported-by: Ryan Thompson Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/soft-interface.c | 3 +++ net/batman-adv/translation-table.c | 18 ++++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index c002961da75d..a2fc843c2243 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -479,6 +479,9 @@ out: */ void batadv_softif_vlan_free_ref(struct batadv_softif_vlan *vlan) { + if (!vlan) + return; + if (atomic_dec_and_test(&vlan->refcount)) { spin_lock_bh(&vlan->bat_priv->softif_vlan_list_lock); hlist_del_rcu(&vlan->list); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index b4824951010b..38b83c50f936 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -594,6 +594,9 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, /* increase the refcounter of the related vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); + if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d", + addr, BATADV_PRINT_VID(vid))) + goto out; batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n", @@ -1066,6 +1069,9 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, /* decrease the reference held for this vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); + if (!vlan) + goto out; + batadv_softif_vlan_free_ref(vlan); batadv_softif_vlan_free_ref(vlan); @@ -1166,8 +1172,10 @@ static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) /* decrease the reference held for this vlan */ vlan = batadv_softif_vlan_get(bat_priv, tt_common_entry->vid); - batadv_softif_vlan_free_ref(vlan); - batadv_softif_vlan_free_ref(vlan); + if (vlan) { + batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); + } batadv_tt_local_entry_free_ref(tt_local); } @@ -3207,8 +3215,10 @@ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) /* decrease the reference held for this vlan */ vlan = batadv_softif_vlan_get(bat_priv, tt_common->vid); - batadv_softif_vlan_free_ref(vlan); - batadv_softif_vlan_free_ref(vlan); + if (vlan) { + batadv_softif_vlan_free_ref(vlan); + batadv_softif_vlan_free_ref(vlan); + } batadv_tt_local_entry_free_ref(tt_local); } -- cgit v1.2.3 From ef72706a0543d0c3a5ab29bd6378fdfb368118d9 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Wed, 17 Jun 2015 20:01:36 +0800 Subject: batman-adv: protect tt_local_entry from concurrent delete events The tt_local_entry deletion performed in batadv_tt_local_remove() was neither protecting against simultaneous deletes nor checking whether the element was still part of the list before calling hlist_del_rcu(). Replacing the hlist_del_rcu() call with batadv_hash_remove() provides adequate protection via hash spinlocks as well as an is-element-still-in-hash check to avoid 'blind' hash removal. Fixes: 068ee6e204e1 ("batman-adv: roaming handling mechanism redesign") Reported-by: alfonsname@web.de Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/translation-table.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 38b83c50f936..5e953297d3b2 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1037,6 +1037,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry; uint16_t flags, curr_flags = BATADV_NO_FLAGS; struct batadv_softif_vlan *vlan; + void *tt_entry_exists; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) @@ -1064,7 +1065,15 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, * immediately purge it */ batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); - hlist_del_rcu(&tt_local_entry->common.hash_entry); + + tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash, + batadv_compare_tt, + batadv_choose_tt, + &tt_local_entry->common); + if (!tt_entry_exists) + goto out; + + /* extra call to free the local tt entry */ batadv_tt_local_entry_free_ref(tt_local_entry); /* decrease the reference held for this vlan */ -- cgit v1.2.3 From 27a4d5efd417b6ef3190e9af357715532d4617a3 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 24 Jun 2015 14:50:19 +0200 Subject: batman-adv: initialize up/down values when adding a gateway Without this initialization, gateways which actually announce up/down bandwidth of 0/0 could be added. If these nodes get purged via _batadv_purge_orig() later, the gw_node structure does not get removed since batadv_gw_node_delete() updates the gw_node with up/down bandwidth of 0/0, and the updating function then discards the change and does not free gw_node. This results in leaking the gw_node structures, which references other structures: gw_node -> orig_node -> orig_node_ifinfo -> hardif. When removing the interface later, the open reference on the hardif may cause hangs with the infamous "unregister_netdevice: waiting for mesh1 to become free. Usage count = 1" message. Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/gateway_client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index bb0158620628..cffa92dd9877 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -439,6 +439,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, INIT_HLIST_NODE(&gw_node->list); gw_node->orig_node = orig_node; + gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); + gw_node->bandwidth_up = ntohl(gateway->bandwidth_up); atomic_set(&gw_node->refcount, 1); spin_lock_bh(&bat_priv->gw.list_lock); -- cgit v1.2.3 From f58e5aa7b873b8a4376b816993d4b0e903befcba Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Tue, 4 Aug 2015 18:34:00 -0700 Subject: netfilter: conntrack: Use flags in nf_ct_tmpl_alloc() The flags were ignored for this function when it was introduced. Also fix the style problem in kzalloc. Fixes: 0838aa7fc (netfilter: fix netns dependencies with conntrack templates) Signed-off-by: Joe Stringer Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f1680995fc49..3c20d02aee73 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -292,7 +292,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) { struct nf_conn *tmpl; - tmpl = kzalloc(sizeof(struct nf_conn), GFP_KERNEL); + tmpl = kzalloc(sizeof(*tmpl), flags); if (tmpl == NULL) return NULL; @@ -303,7 +303,7 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) if (zone) { struct nf_conntrack_zone *nf_ct_zone; - nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, GFP_ATOMIC); + nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, flags); if (!nf_ct_zone) goto out_free; nf_ct_zone->id = zone; -- cgit v1.2.3 From cb92205bad2e4dd630b884142dd707b72504c200 Mon Sep 17 00:00:00 2001 From: Jakub Pawlowski Date: Wed, 5 Aug 2015 23:16:29 +0200 Subject: Bluetooth: fix MGMT_EV_NEW_LONG_TERM_KEY event This patch fixes how MGMT_EV_NEW_LONG_TERM_KEY event is build. Right now val vield is filled with only 1 byte, instead of whole value. This bug was introduced in commit 1fc62c526a57 ("Bluetooth: Fix exposing full value of shortened LTKs") Before that patch, if you paired with device using bluetoothd using simple pairing, and then restarted bluetoothd, you would be able to re-connect, but device would fail to establish encryption and would terminate connection. After this patch connecting after bluetoothd restart works fine. Signed-off-by: Jakub Pawlowski Signed-off-by: Marcel Holtmann --- net/bluetooth/mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7998fb279165..92720f3fe573 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -7820,7 +7820,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) /* Make sure we copy only the significant bytes based on the * encryption key size, and set the rest of the value to zeroes. */ - memcpy(ev.key.val, key->val, sizeof(key->enc_size)); + memcpy(ev.key.val, key->val, key->enc_size); memset(ev.key.val + key->enc_size, 0, sizeof(ev.key.val) - key->enc_size); -- cgit v1.2.3 From 14d2b7c1a96ef37eb571599c73d4a1a606b964d6 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 3 Aug 2015 17:50:11 +0200 Subject: net: fec: fix initial runtime PM refcount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The clocks are initially active and thus the device is marked active. This still keeps the PM refcount at 0, the pm_runtime_put_autosuspend() call at the end of probe then leaves us with an invalid refcount of -1, which in turn leads to the device staying in suspended state even though netdev open had been called. Fix this by initializing the refcount to be coherent with the initial device status. Fixes: 8fff755e9f8 (net: fec: Ensure clocks are enabled while using mdio bus) Signed-off-by: Lucas Stach Tested-by: Uwe Kleine-König Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 32e3807c650e..271bb5862346 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3433,6 +3433,7 @@ fec_probe(struct platform_device *pdev) pm_runtime_set_autosuspend_delay(&pdev->dev, FEC_MDIO_PM_TIMEOUT); pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_get_noresume(&pdev->dev); pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); -- cgit v1.2.3 From a0a2a6602496a45ae838a96db8b8173794b5d398 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 4 Aug 2015 15:42:47 +0800 Subject: net: Fix skb_set_peeked use-after-free bug The commit 738ac1ebb96d02e0d23bc320302a6ea94c612dec ("net: Clone skb before setting peeked flag") introduced a use-after-free bug in skb_recv_datagram. This is because skb_set_peeked may create a new skb and free the existing one. As it stands the caller will continue to use the old freed skb. This patch fixes it by making skb_set_peeked return the new skb (or the old one if unchanged). Fixes: 738ac1ebb96d ("net: Clone skb before setting peeked flag") Reported-by: Brenden Blanco Signed-off-by: Herbert Xu Tested-by: Brenden Blanco Reviewed-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- net/core/datagram.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index 4967262b2707..617088aee21d 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -131,12 +131,12 @@ out_noerr: goto out; } -static int skb_set_peeked(struct sk_buff *skb) +static struct sk_buff *skb_set_peeked(struct sk_buff *skb) { struct sk_buff *nskb; if (skb->peeked) - return 0; + return skb; /* We have to unshare an skb before modifying it. */ if (!skb_shared(skb)) @@ -144,7 +144,7 @@ static int skb_set_peeked(struct sk_buff *skb) nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) - return -ENOMEM; + return ERR_PTR(-ENOMEM); skb->prev->next = nskb; skb->next->prev = nskb; @@ -157,7 +157,7 @@ static int skb_set_peeked(struct sk_buff *skb) done: skb->peeked = 1; - return 0; + return skb; } /** @@ -229,8 +229,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, continue; } - error = skb_set_peeked(skb); - if (error) + skb = skb_set_peeked(skb); + error = PTR_ERR(skb); + if (IS_ERR(skb)) goto unlock_err; atomic_inc(&skb->users); -- cgit v1.2.3 From 57b229063ae6dc65036209018dc7f4290cc026bb Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Tue, 4 Aug 2015 15:40:59 +0100 Subject: xen/netback: Wake dealloc thread after completing zerocopy work Waking the dealloc thread before decrementing inflight_packets is racy because it means the thread may go to sleep before inflight_packets is decremented. If kthread_stop() has already been called, the dealloc thread may wait forever with nothing to wake it. Instead, wake the thread only after decrementing inflight_packets. Signed-off-by: Ross Lagerwall Signed-off-by: David S. Miller --- drivers/net/xen-netback/interface.c | 6 ++++++ drivers/net/xen-netback/netback.c | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 1a83e190fc15..28577a31549d 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -61,6 +61,12 @@ void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue, void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) { atomic_dec(&queue->inflight_packets); + + /* Wake the dealloc thread _after_ decrementing inflight_packets so + * that if kthread_stop() has already been called, the dealloc thread + * does not wait forever with nothing to wake it. + */ + wake_up(&queue->dealloc_wq); } int xenvif_schedulable(struct xenvif *vif) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 1b406e706a01..3f44b522b831 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1541,7 +1541,6 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success) smp_wmb(); queue->dealloc_prod++; } while (ubuf); - wake_up(&queue->dealloc_wq); spin_unlock_irqrestore(&queue->callback_lock, flags); if (likely(zerocopy_success)) -- cgit v1.2.3 From 7ba8bd75ddc6b041b5716dbb29e49df3e9cc2928 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 4 Aug 2015 18:33:34 +0200 Subject: net: pktgen: don't abuse current->state in pktgen_thread_worker() Commit 1fbe4b46caca "net: pktgen: kill the Wait for kthread_stop code in pktgen_thread_worker()" removed (in particular) the final __set_current_state(TASK_RUNNING) and I didn't notice the previous set_current_state(TASK_INTERRUPTIBLE). This triggers the warning in __might_sleep() after return. Afaics, we can simply remove both set_current_state()'s, and we could do this a long ago right after ef87979c273a2 "pktgen: better scheduler friendliness" which changed pktgen_thread_worker() to use wait_event_interruptible_timeout(). Reported-by: Huang Ying Signed-off-by: Oleg Nesterov Signed-off-by: David S. Miller --- net/core/pktgen.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 1ebdf1c0d118..1cbd209192ea 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3514,8 +3514,6 @@ static int pktgen_thread_worker(void *arg) set_freezable(); - __set_current_state(TASK_RUNNING); - while (!kthread_should_stop()) { pkt_dev = next_to_run(t); @@ -3560,7 +3558,6 @@ static int pktgen_thread_worker(void *arg) try_to_freeze(); } - set_current_state(TASK_INTERRUPTIBLE); pr_debug("%s stopping all device\n", t->tsk->comm); pktgen_stop(t); -- cgit v1.2.3 From 355b9f9df1f0311f20087350aee8ad96eedca8a9 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 4 Aug 2015 19:06:32 +0200 Subject: bridge: netlink: account for the IFLA_BRPORT_PROXYARP attribute size and policy The attribute size wasn't accounted for in the get_slave_size() callback (br_port_get_slave_size) when it was introduced, so fix it now. Also add a policy entry for it in br_port_policy. Signed-off-by: Nikolay Aleksandrov Fixes: 958501163ddd ("bridge: Add support for IEEE 802.11 Proxy ARP") Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 3da5525eb8a2..5390536d500c 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -112,6 +112,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ + 0; } @@ -506,6 +507,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_FAST_LEAVE]= { .type = NLA_U8 }, [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROXYARP] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ -- cgit v1.2.3 From 786c2077ec8e9eab37a88fc14aac4309a8061e18 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 4 Aug 2015 19:06:33 +0200 Subject: bridge: netlink: account for the IFLA_BRPORT_PROXYARP_WIFI attribute size and policy The attribute size wasn't accounted for in the get_slave_size() callback (br_port_get_slave_size) when it was introduced, so fix it now. Also add a policy entry for it in br_port_policy. Signed-off-by: Nikolay Aleksandrov Fixes: 842a9ae08a25 ("bridge: Extend Proxy ARP design to allow optional rules for Wi-Fi") Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 5390536d500c..4d74a0639c4c 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -113,6 +113,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ + + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ + 0; } @@ -508,6 +509,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, [IFLA_BRPORT_PROXYARP] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ -- cgit v1.2.3 From 7ebc482202fd54e7cf718619e869f4320b8e3bb6 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 4 Aug 2015 22:11:43 +0200 Subject: r8169: enforce RX_MULTI_EN on rtl8168ep/8111ep chips Enforcing this flag in RxConfig for the mentioned chips fixes netdev watchdog issues prepended with AMD IOMMU message(s) like: AMD-Vi: Event logged [IO_PAGE_FAULT device=01:00.0 domain=0x001d address=0x0000000000003000 flags=0x0050] Note that this flag is also set in Realtek's own driver for these chips. Signed-off-by: Ivan Vecera Tested-by: Alexander Lindqvist Acked-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 3df51faf18ae..f790f61ea78a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4875,10 +4875,12 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_46: case RTL_GIGA_MAC_VER_47: case RTL_GIGA_MAC_VER_48: + RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST | RX_EARLY_OFF); + break; case RTL_GIGA_MAC_VER_49: case RTL_GIGA_MAC_VER_50: case RTL_GIGA_MAC_VER_51: - RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST | RX_EARLY_OFF); + RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF); break; default: RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST); -- cgit v1.2.3 From 22f54bf932a0eed73134b696b238db4bdcff5cd4 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 4 Aug 2015 20:25:55 +0100 Subject: net: thunderx: remove effective "default y" from Kconfig if ARCH_THUNDER=y As well as for kernels built only for ThunderX ARCH_THUNDERX is also enabled for kernels which support multiple platforms (such as distro kernels). Thus "default ARCH_THUNDER" is inappropriate. I believe default m is equally frowned upon, so remove the line completely rather than "default m if ARCH_THUNDER". Signed-off-by: Ian Campbell Cc: Sunil Goutham Cc: Robert Richter Cc: Derek Chickles Cc: Satanand Burla Cc: Felix Manlunas Cc: Raghu Vatsavayi Cc: Arnd Bergmann Cc: linux-arm-kernel@lists.infradead.org Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index c4d6bbe9458d..02e23e6f1424 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -16,7 +16,6 @@ if NET_VENDOR_CAVIUM config THUNDER_NIC_PF tristate "Thunder Physical function driver" depends on 64BIT - default ARCH_THUNDER select THUNDER_NIC_BGX ---help--- This driver supports Thunder's NIC physical function. @@ -29,14 +28,12 @@ config THUNDER_NIC_PF config THUNDER_NIC_VF tristate "Thunder Virtual function driver" depends on 64BIT - default ARCH_THUNDER ---help--- This driver supports Thunder's NIC virtual function config THUNDER_NIC_BGX tristate "Thunder MAC interface driver (BGX)" depends on 64BIT - default ARCH_THUNDER ---help--- This driver supports programming and controlling of MAC interface from NIC physical function driver. -- cgit v1.2.3 From 866b8b18e380f810ba96e21d25843b841271bb07 Mon Sep 17 00:00:00 2001 From: WingMan Kwok Date: Tue, 4 Aug 2015 16:56:53 -0400 Subject: net: netcp: fix unused interface rx buffer size configuration Prior to this patch, rx buffer size for each rx queue of an interface is configurable through dts bindings. But for an interface, the first rx queue's rx buffer size is always the usual MTU size (plus usual overhead) and page size for the remaining rx queues (if they are enabled by specifying a non-zero rx queue depth dts binding of the corresponding interface). This patch removes the rx buffer size configuration capability. Signed-off-by: WingMan Kwok Acked-by: Murali Karicheri Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp.h | 1 - drivers/net/ethernet/ti/netcp_core.c | 35 +++++++++++++---------------------- 2 files changed, 13 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h index a8a730641bbb..bb1bb72121c0 100644 --- a/drivers/net/ethernet/ti/netcp.h +++ b/drivers/net/ethernet/ti/netcp.h @@ -85,7 +85,6 @@ struct netcp_intf { struct list_head rxhook_list_head; unsigned int rx_queue_id; void *rx_fdq[KNAV_DMA_FDQ_PER_CHAN]; - u32 rx_buffer_sizes[KNAV_DMA_FDQ_PER_CHAN]; struct napi_struct rx_napi; struct napi_struct tx_napi; diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 9749dfd78c43..4755838c6137 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -34,6 +34,7 @@ #define NETCP_SOP_OFFSET (NET_IP_ALIGN + NET_SKB_PAD) #define NETCP_NAPI_WEIGHT 64 #define NETCP_TX_TIMEOUT (5 * HZ) +#define NETCP_PACKET_SIZE (ETH_FRAME_LEN + ETH_FCS_LEN) #define NETCP_MIN_PACKET_SIZE ETH_ZLEN #define NETCP_MAX_MCAST_ADDR 16 @@ -804,30 +805,28 @@ static void netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq) if (likely(fdq == 0)) { unsigned int primary_buf_len; /* Allocate a primary receive queue entry */ - buf_len = netcp->rx_buffer_sizes[0] + NETCP_SOP_OFFSET; + buf_len = NETCP_PACKET_SIZE + NETCP_SOP_OFFSET; primary_buf_len = SKB_DATA_ALIGN(buf_len) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - if (primary_buf_len <= PAGE_SIZE) { - bufptr = netdev_alloc_frag(primary_buf_len); - pad[1] = primary_buf_len; - } else { - bufptr = kmalloc(primary_buf_len, GFP_ATOMIC | - GFP_DMA32 | __GFP_COLD); - pad[1] = 0; - } + bufptr = netdev_alloc_frag(primary_buf_len); + pad[1] = primary_buf_len; if (unlikely(!bufptr)) { - dev_warn_ratelimited(netcp->ndev_dev, "Primary RX buffer alloc failed\n"); + dev_warn_ratelimited(netcp->ndev_dev, + "Primary RX buffer alloc failed\n"); goto fail; } dma = dma_map_single(netcp->dev, bufptr, buf_len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(netcp->dev, dma))) + goto fail; + pad[0] = (u32)bufptr; } else { /* Allocate a secondary receive queue entry */ - page = alloc_page(GFP_ATOMIC | GFP_DMA32 | __GFP_COLD); + page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD); if (unlikely(!page)) { dev_warn_ratelimited(netcp->ndev_dev, "Secondary page alloc failed\n"); goto fail; @@ -1010,7 +1009,7 @@ netcp_tx_map_skb(struct sk_buff *skb, struct netcp_intf *netcp) /* Map the linear buffer */ dma_addr = dma_map_single(dev, skb->data, pkt_len, DMA_TO_DEVICE); - if (unlikely(!dma_addr)) { + if (unlikely(dma_mapping_error(dev, dma_addr))) { dev_err(netcp->ndev_dev, "Failed to map skb buffer\n"); return NULL; } @@ -1546,8 +1545,8 @@ static int netcp_setup_navigator_resources(struct net_device *ndev) knav_queue_disable_notify(netcp->rx_queue); /* open Rx FDQs */ - for (i = 0; i < KNAV_DMA_FDQ_PER_CHAN && - netcp->rx_queue_depths[i] && netcp->rx_buffer_sizes[i]; ++i) { + for (i = 0; i < KNAV_DMA_FDQ_PER_CHAN && netcp->rx_queue_depths[i]; + ++i) { snprintf(name, sizeof(name), "rx-fdq-%s-%d", ndev->name, i); netcp->rx_fdq[i] = knav_queue_open(name, KNAV_QUEUE_GP, 0); if (IS_ERR_OR_NULL(netcp->rx_fdq[i])) { @@ -1941,14 +1940,6 @@ static int netcp_create_interface(struct netcp_device *netcp_device, netcp->rx_queue_depths[0] = 128; } - ret = of_property_read_u32_array(node_interface, "rx-buffer-size", - netcp->rx_buffer_sizes, - KNAV_DMA_FDQ_PER_CHAN); - if (ret) { - dev_err(dev, "missing \"rx-buffer-size\" parameter\n"); - netcp->rx_buffer_sizes[0] = 1536; - } - ret = of_property_read_u32_array(node_interface, "rx-pool", temp, 2); if (ret < 0) { dev_err(dev, "missing \"rx-pool\" parameter\n"); -- cgit v1.2.3 From 4f7eb70f7be1099236670f36b2f1f90a63e29699 Mon Sep 17 00:00:00 2001 From: Mathieu Olivari Date: Tue, 4 Aug 2015 17:25:02 -0700 Subject: stmmac: dwmac-ipq806x: fix static checker warning The patch b1c17215d718: "stmmac: add ipq806x glue layer", leads to the following static checker warning: .../stmmac/dwmac-ipq806x.c:314 ipq806x_gmac_probe() warn: double left shift '1 << (1 << gmac->id)' The NSS_COMMON_CLK_SRC_CTRL_OFFSET macro is used once as an offset, and once as a mask, which is a bug indeed. We'll fix it by defining the offset as the real offset value and computing the mask from it when required. Tested on IPQ806x ref designs AP148 & DB149. Reported-by: Dan Carpenter Signed-off-by: Mathieu Olivari Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 7e3129e7f143..f0e4bb4e3ec5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -42,7 +42,7 @@ #define NSS_COMMON_CLK_DIV_MASK 0x7f #define NSS_COMMON_CLK_SRC_CTRL 0x14 -#define NSS_COMMON_CLK_SRC_CTRL_OFFSET(x) (1 << x) +#define NSS_COMMON_CLK_SRC_CTRL_OFFSET(x) (x) /* Mode is coded on 1 bit but is different depending on the MAC ID: * MAC0: QSGMII=0 RGMII=1 * MAC1: QSGMII=0 SGMII=0 RGMII=1 @@ -291,7 +291,7 @@ static void *ipq806x_gmac_setup(struct platform_device *pdev) /* Configure the clock src according to the mode */ regmap_read(gmac->nss_common, NSS_COMMON_CLK_SRC_CTRL, &val); - val &= ~NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id); + val &= ~(1 << NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id)); switch (gmac->phy_mode) { case PHY_INTERFACE_MODE_RGMII: val |= NSS_COMMON_CLK_SRC_CTRL_RGMII(gmac->id) << -- cgit v1.2.3 From 48900cb6af4282fa0fb6ff4d72a81aa3dadb5c39 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 5 Aug 2015 10:34:04 +0800 Subject: virtio-net: drop NETIF_F_FRAGLIST virtio declares support for NETIF_F_FRAGLIST, but assumes that there are at most MAX_SKB_FRAGS + 2 fragments which isn't always true with a fraglist. A longer fraglist in the skb will make the call to skb_to_sgvec overflow the sg array, leading to memory corruption. Drop NETIF_F_FRAGLIST so we only get what we can handle. Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7fbca37a1adf..237f8e5e493d 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1756,9 +1756,9 @@ static int virtnet_probe(struct virtio_device *vdev) /* Do we support "hardware" checksums? */ if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { /* This opens up the world of extra features. */ - dev->hw_features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; + dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG; if (csum) - dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; + dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { dev->hw_features |= NETIF_F_TSO | NETIF_F_UFO -- cgit v1.2.3 From bcc84140a62c04f522eacceb793e6eef92965c84 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Wed, 5 Aug 2015 03:27:48 -0400 Subject: be2net: enable IFACE filters only after creating RXQs HW issues were observed on Lancer adapters if IFACE filters (flags, mac addrs etc) are enabled *before* creating RXQs. This patch changes the driver design by enabling filters in be_open() -- instead of be_setup() -- after RXQs are created and buffers posted. Two new wrapper functions, be_enable_if_filters() and be_disable_if_filters() are introduced to enable/disable IFACE filters in be_open()/be_close() respectively. In be_setup() the IFACE is now created only with the RSS flag. Signed-off-by: Kalesh AP Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_cmds.h | 5 ++ drivers/net/ethernet/emulex/benet/be_main.c | 127 ++++++++++++++++++---------- 2 files changed, 85 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 2716e6f30d9a..00e3a6b6b822 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -620,6 +620,11 @@ enum be_if_flags { BE_IF_FLAGS_VLAN_PROMISCUOUS |\ BE_IF_FLAGS_MCAST_PROMISCUOUS) +#define BE_IF_EN_FLAGS (BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_PASS_L3L4_ERRORS |\ + BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_UNTAGGED) + +#define BE_IF_ALL_FILT_FLAGS (BE_IF_EN_FLAGS | BE_IF_FLAGS_ALL_PROMISCUOUS) + /* An RX interface is an object with one or more MAC addresses and * filtering capabilities. */ struct be_cmd_req_if_create { diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 6f642426308c..7730f21b6071 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -273,6 +273,10 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) if (ether_addr_equal(addr->sa_data, netdev->dev_addr)) return 0; + /* if device is not running, copy MAC to netdev->dev_addr */ + if (!netif_running(netdev)) + goto done; + /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT * privilege or if PF did not provision the new MAC address. * On BE3, this cmd will always fail if the VF doesn't have the @@ -307,9 +311,9 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) status = -EPERM; goto err; } - - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); - dev_info(dev, "MAC address changed to %pM\n", mac); +done: + ether_addr_copy(netdev->dev_addr, addr->sa_data); + dev_info(dev, "MAC address changed to %pM\n", addr->sa_data); return 0; err: dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data); @@ -3361,6 +3365,33 @@ static void be_rx_qs_destroy(struct be_adapter *adapter) } } +static void be_disable_if_filters(struct be_adapter *adapter) +{ + be_cmd_pmac_del(adapter, adapter->if_handle, + adapter->pmac_id[0], 0); + + be_clear_uc_list(adapter); + + /* The IFACE flags are enabled in the open path and cleared + * in the close path. When a VF gets detached from the host and + * assigned to a VM the following happens: + * - VF's IFACE flags get cleared in the detach path + * - IFACE create is issued by the VF in the attach path + * Due to a bug in the BE3/Skyhawk-R FW + * (Lancer FW doesn't have the bug), the IFACE capability flags + * specified along with the IFACE create cmd issued by a VF are not + * honoured by FW. As a consequence, if a *new* driver + * (that enables/disables IFACE flags in open/close) + * is loaded in the host and an *old* driver is * used by a VM/VF, + * the IFACE gets created *without* the needed flags. + * To avoid this, disable RX-filter flags only for Lancer. + */ + if (lancer_chip(adapter)) { + be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF); + adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS; + } +} + static int be_close(struct net_device *netdev) { struct be_adapter *adapter = netdev_priv(netdev); @@ -3373,6 +3404,8 @@ static int be_close(struct net_device *netdev) if (!(adapter->flags & BE_FLAGS_SETUP_DONE)) return 0; + be_disable_if_filters(adapter); + be_roce_dev_close(adapter); if (adapter->flags & BE_FLAGS_NAPI_ENABLED) { @@ -3392,7 +3425,6 @@ static int be_close(struct net_device *netdev) be_tx_compl_clean(adapter); be_rx_qs_destroy(adapter); - be_clear_uc_list(adapter); for_all_evt_queues(adapter, eqo, i) { if (msix_enabled(adapter)) @@ -3477,6 +3509,31 @@ static int be_rx_qs_create(struct be_adapter *adapter) return 0; } +static int be_enable_if_filters(struct be_adapter *adapter) +{ + int status; + + status = be_cmd_rx_filter(adapter, BE_IF_EN_FLAGS, ON); + if (status) + return status; + + /* For BE3 VFs, the PF programs the initial MAC address */ + if (!(BEx_chip(adapter) && be_virtfn(adapter))) { + status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr, + adapter->if_handle, + &adapter->pmac_id[0], 0); + if (status) + return status; + } + + if (adapter->vlans_added) + be_vid_config(adapter); + + be_set_rx_mode(adapter->netdev); + + return 0; +} + static int be_open(struct net_device *netdev) { struct be_adapter *adapter = netdev_priv(netdev); @@ -3490,6 +3547,10 @@ static int be_open(struct net_device *netdev) if (status) goto err; + status = be_enable_if_filters(adapter); + if (status) + goto err; + status = be_irq_register(adapter); if (status) goto err; @@ -3686,16 +3747,6 @@ static void be_cancel_err_detection(struct be_adapter *adapter) } } -static void be_mac_clear(struct be_adapter *adapter) -{ - if (adapter->pmac_id) { - be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[0], 0); - kfree(adapter->pmac_id); - adapter->pmac_id = NULL; - } -} - #ifdef CONFIG_BE2NET_VXLAN static void be_disable_vxlan_offloads(struct be_adapter *adapter) { @@ -3770,8 +3821,8 @@ static int be_clear(struct be_adapter *adapter) #ifdef CONFIG_BE2NET_VXLAN be_disable_vxlan_offloads(adapter); #endif - /* delete the primary mac along with the uc-mac list */ - be_mac_clear(adapter); + kfree(adapter->pmac_id); + adapter->pmac_id = NULL; be_cmd_if_destroy(adapter, adapter->if_handle, 0); @@ -3782,25 +3833,11 @@ static int be_clear(struct be_adapter *adapter) return 0; } -static int be_if_create(struct be_adapter *adapter, u32 *if_handle, - u32 cap_flags, u32 vf) -{ - u32 en_flags; - - en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | - BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS | - BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS; - - en_flags &= cap_flags; - - return be_cmd_if_create(adapter, cap_flags, en_flags, if_handle, vf); -} - static int be_vfs_if_create(struct be_adapter *adapter) { struct be_resources res = {0}; + u32 cap_flags, en_flags, vf; struct be_vf_cfg *vf_cfg; - u32 cap_flags, vf; int status; /* If a FW profile exists, then cap_flags are updated */ @@ -3821,8 +3858,12 @@ static int be_vfs_if_create(struct be_adapter *adapter) } } - status = be_if_create(adapter, &vf_cfg->if_handle, - cap_flags, vf + 1); + en_flags = cap_flags & (BE_IF_FLAGS_UNTAGGED | + BE_IF_FLAGS_BROADCAST | + BE_IF_FLAGS_MULTICAST | + BE_IF_FLAGS_PASS_L3L4_ERRORS); + status = be_cmd_if_create(adapter, cap_flags, en_flags, + &vf_cfg->if_handle, vf + 1); if (status) return status; } @@ -4194,15 +4235,8 @@ static int be_mac_setup(struct be_adapter *adapter) memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN); memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN); - } else { - /* Maybe the HW was reset; dev_addr must be re-programmed */ - memcpy(mac, adapter->netdev->dev_addr, ETH_ALEN); } - /* For BE3-R VFs, the PF programs the initial MAC address */ - if (!(BEx_chip(adapter) && be_virtfn(adapter))) - be_cmd_pmac_add(adapter, mac, adapter->if_handle, - &adapter->pmac_id[0], 0); return 0; } @@ -4342,6 +4376,7 @@ static int be_func_init(struct be_adapter *adapter) static int be_setup(struct be_adapter *adapter) { struct device *dev = &adapter->pdev->dev; + u32 en_flags; int status; status = be_func_init(adapter); @@ -4364,8 +4399,11 @@ static int be_setup(struct be_adapter *adapter) if (status) goto err; - status = be_if_create(adapter, &adapter->if_handle, - be_if_cap_flags(adapter), 0); + /* will enable all the needed filter flags in be_open() */ + en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS; + en_flags = en_flags & be_if_cap_flags(adapter); + status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags, + &adapter->if_handle, 0); if (status) goto err; @@ -4391,11 +4429,6 @@ static int be_setup(struct be_adapter *adapter) dev_err(dev, "Please upgrade firmware to version >= 4.0\n"); } - if (adapter->vlans_added) - be_vid_config(adapter); - - be_set_rx_mode(adapter->netdev); - status = be_cmd_set_flow_control(adapter, adapter->tx_fc, adapter->rx_fc); if (status) -- cgit v1.2.3 From 99b44304f205a826501721d41928e87b0b9cf3b3 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Wed, 5 Aug 2015 03:27:49 -0400 Subject: be2net: post buffers before destroying RXQs in Lancer An RX stall issue was seen on Lancer adapters, when RXQs are destroyed while they are in an "out of buffer" state. This patch fixes this issue by posting 64 buffers to each RXQ before destroying them in the close path. This is done after ensuring that no more new packets are selected for transfer to the RXQs by disabling interface filters. Signed-off-by: Kalesh AP Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 42 ++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 7730f21b6071..14ae67a8949e 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2451,10 +2451,24 @@ static void be_eq_clean(struct be_eq_obj *eqo) be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0); } -static void be_rx_cq_clean(struct be_rx_obj *rxo) +/* Free posted rx buffers that were not used */ +static void be_rxq_clean(struct be_rx_obj *rxo) { - struct be_rx_page_info *page_info; struct be_queue_info *rxq = &rxo->q; + struct be_rx_page_info *page_info; + + while (atomic_read(&rxq->used) > 0) { + page_info = get_rx_page_info(rxo); + put_page(page_info->page); + memset(page_info, 0, sizeof(*page_info)); + } + BUG_ON(atomic_read(&rxq->used)); + rxq->tail = 0; + rxq->head = 0; +} + +static void be_rx_cq_clean(struct be_rx_obj *rxo) +{ struct be_queue_info *rx_cq = &rxo->cq; struct be_rx_compl_info *rxcp; struct be_adapter *adapter = rxo->adapter; @@ -2491,16 +2505,6 @@ static void be_rx_cq_clean(struct be_rx_obj *rxo) /* After cleanup, leave the CQ in unarmed state */ be_cq_notify(adapter, rx_cq->id, false, 0); - - /* Then free posted rx buffers that were not used */ - while (atomic_read(&rxq->used) > 0) { - page_info = get_rx_page_info(rxo); - put_page(page_info->page); - memset(page_info, 0, sizeof(*page_info)); - } - BUG_ON(atomic_read(&rxq->used)); - rxq->tail = 0; - rxq->head = 0; } static void be_tx_compl_clean(struct be_adapter *adapter) @@ -3358,8 +3362,22 @@ static void be_rx_qs_destroy(struct be_adapter *adapter) for_all_rx_queues(adapter, rxo, i) { q = &rxo->q; if (q->created) { + /* If RXQs are destroyed while in an "out of buffer" + * state, there is a possibility of an HW stall on + * Lancer. So, post 64 buffers to each queue to relieve + * the "out of buffer" condition. + * Make sure there's space in the RXQ before posting. + */ + if (lancer_chip(adapter)) { + be_rx_cq_clean(rxo); + if (atomic_read(&q->used) == 0) + be_post_rx_frags(rxo, GFP_KERNEL, + MAX_RX_POST); + } + be_cmd_rxq_destroy(adapter, q); be_rx_cq_clean(rxo); + be_rxq_clean(rxo); } be_queue_free(adapter, q); } -- cgit v1.2.3 From 649886a36b5f023811321819eceaa8ba66444e3b Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Wed, 5 Aug 2015 03:27:50 -0400 Subject: be2net: protect eqo->affinity_mask from getting freed twice There are paths in the driver such as an unrecoverable error (UE) detection followed by a driver unload wherein be_clear() is invoked twice. Individual data structures are reset so that they are not cleaned/freed twice. This patch does the same for eqo->affinity_mask. It is freed only if EQs haven't yet been destroyed. This fixes a possible crash when affinity_mask is freed twice. Signed-off-by: Kalesh AP Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 14ae67a8949e..c28e3bfdccd7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2584,8 +2584,8 @@ static void be_evt_queues_destroy(struct be_adapter *adapter) be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ); napi_hash_del(&eqo->napi); netif_napi_del(&eqo->napi); + free_cpumask_var(eqo->affinity_mask); } - free_cpumask_var(eqo->affinity_mask); be_queue_free(adapter, &eqo->q); } } @@ -2602,13 +2602,7 @@ static int be_evt_queues_create(struct be_adapter *adapter) for_all_evt_queues(adapter, eqo, i) { int numa_node = dev_to_node(&adapter->pdev->dev); - if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL)) - return -ENOMEM; - cpumask_set_cpu(cpumask_local_spread(i, numa_node), - eqo->affinity_mask); - netif_napi_add(adapter->netdev, &eqo->napi, be_poll, - BE_NAPI_WEIGHT); - napi_hash_add(&eqo->napi); + aic = &adapter->aic_obj[i]; eqo->adapter = adapter; eqo->idx = i; @@ -2624,6 +2618,14 @@ static int be_evt_queues_create(struct be_adapter *adapter) rc = be_cmd_eq_create(adapter, eqo); if (rc) return rc; + + if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_set_cpu(cpumask_local_spread(i, numa_node), + eqo->affinity_mask); + netif_napi_add(adapter->netdev, &eqo->napi, be_poll, + BE_NAPI_WEIGHT); + napi_hash_add(&eqo->napi); } return 0; } -- cgit v1.2.3 From fe1e1876d8f6d8d4b45e3940e6dd43cd3b18d958 Mon Sep 17 00:00:00 2001 From: Carol L Soto Date: Wed, 5 Aug 2015 11:05:32 -0500 Subject: net/mlx5_core: Set log_uar_page_sz for non 4K page size architecture failed to configure the page size for architectures with page size different than 4K. Fixes: 938fe83 ("net/mlx5_core: New device capabilities handling") Signed-off-by: Carol L Soto Acked-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index afad529838de..06e3e1e54c35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -391,6 +391,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) /* disable cmdif checksum */ MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); + MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); + err = set_caps(dev, set_ctx, set_sz); query_ex: -- cgit v1.2.3 From 96fffb4f23f124f297d51dedc9cf51d19eb88ee1 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Sun, 9 Aug 2015 13:14:15 +0200 Subject: netfilter: ip6t_SYNPROXY: fix NULL pointer dereference This happens when networking namespaces are enabled. Suggested-by: Patrick McHardy Signed-off-by: Phil Sutter Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_SYNPROXY.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 6edb7b106de7..bcebc24c6f0b 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -37,12 +37,13 @@ synproxy_build_ip(struct sk_buff *skb, const struct in6_addr *saddr, } static void -synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb, +synproxy_send_tcp(const struct synproxy_net *snet, + const struct sk_buff *skb, struct sk_buff *nskb, struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, struct ipv6hdr *niph, struct tcphdr *nth, unsigned int tcp_hdr_size) { - struct net *net = nf_ct_net((struct nf_conn *)nfct); + struct net *net = nf_ct_net(snet->tmpl); struct dst_entry *dst; struct flowi6 fl6; @@ -83,7 +84,8 @@ free_nskb: } static void -synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th, +synproxy_send_client_synack(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { struct sk_buff *nskb; @@ -119,7 +121,7 @@ synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } @@ -163,7 +165,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, + synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, niph, nth, tcp_hdr_size); } @@ -203,7 +205,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); + synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); } static void @@ -241,7 +243,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); + synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); } static bool @@ -301,7 +303,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) XT_SYNPROXY_OPT_SACK_PERM | XT_SYNPROXY_OPT_ECN); - synproxy_send_client_synack(skb, th, &opts); + synproxy_send_client_synack(snet, skb, th, &opts); return NF_DROP; } else if (th->ack && !(th->fin || th->rst || th->syn)) { -- cgit v1.2.3 From 3c16241c445303a90529565e7437e1f240acfef2 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Tue, 28 Jul 2015 00:53:26 +0200 Subject: netfilter: SYNPROXY: fix sending window update to client Upon receipt of SYNACK from the server, ipt_SYNPROXY first sends back an ACK to finish the server handshake, then calls nf_ct_seqadj_init() to initiate sequence number adjustment of forwarded packets to the client and finally sends a window update to the client to unblock it's TX queue. Since synproxy_send_client_ack() does not set synproxy_send_tcp()'s nfct parameter, no sequence number adjustment happens and the client receives the window update with incorrect sequence number. Depending on client TCP implementation, this leads to a significant delay (until a window probe is being sent). Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_SYNPROXY.c | 3 ++- net/ipv6/netfilter/ip6t_SYNPROXY.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index fe8cc183411e..95ea633e8356 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -226,7 +226,8 @@ synproxy_send_client_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); + synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + niph, nth, tcp_hdr_size); } static bool diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index bcebc24c6f0b..ebbb754c2111 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -243,7 +243,8 @@ synproxy_send_client_ack(const struct synproxy_net *snet, synproxy_build_options(nth, opts); - synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); + synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + niph, nth, tcp_hdr_size); } static bool -- cgit v1.2.3 From d53793c5d6eb0cfe4175d9c315a30d65adf3478b Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Thu, 6 Aug 2015 19:00:28 +0200 Subject: net: mvpp2: remove excessive spinlocks from driver initialization Using spinlocks protection during one-time driver initialization is not necessary. Moreover it resulted in invalid GFP_KERNEL allocation under the lock. This commit removes redundant spinlocks from buffer manager part of mvpp2 initialization. Signed-off-by: Marcin Wojtas Reported-by: Alexandre Fournier Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 3e8b1bfb1f2e..f94bd122f0bd 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -913,8 +913,6 @@ struct mvpp2_bm_pool { /* Occupied buffers indicator */ atomic_t in_use; int in_use_thresh; - - spinlock_t lock; }; struct mvpp2_buff_hdr { @@ -3376,7 +3374,6 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev, bm_pool->pkt_size = 0; bm_pool->buf_num = 0; atomic_set(&bm_pool->in_use, 0); - spin_lock_init(&bm_pool->lock); return 0; } @@ -3647,7 +3644,6 @@ static struct mvpp2_bm_pool * mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type, int pkt_size) { - unsigned long flags = 0; struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool]; int num; @@ -3656,8 +3652,6 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type, return NULL; } - spin_lock_irqsave(&new_pool->lock, flags); - if (new_pool->type == MVPP2_BM_FREE) new_pool->type = type; @@ -3686,8 +3680,6 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type, if (num != pkts_num) { WARN(1, "pool %d: %d of %d allocated\n", new_pool->id, num, pkts_num); - /* We need to undo the bufs_add() allocations */ - spin_unlock_irqrestore(&new_pool->lock, flags); return NULL; } } @@ -3695,15 +3687,12 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, int pool, enum mvpp2_bm_type type, mvpp2_bm_pool_bufsize_set(port->priv, new_pool, MVPP2_RX_BUF_SIZE(new_pool->pkt_size)); - spin_unlock_irqrestore(&new_pool->lock, flags); - return new_pool; } /* Initialize pools for swf */ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port) { - unsigned long flags = 0; int rxq; if (!port->pool_long) { @@ -3714,9 +3703,7 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port) if (!port->pool_long) return -ENOMEM; - spin_lock_irqsave(&port->pool_long->lock, flags); port->pool_long->port_map |= (1 << port->id); - spin_unlock_irqrestore(&port->pool_long->lock, flags); for (rxq = 0; rxq < rxq_number; rxq++) mvpp2_rxq_long_pool_set(port, rxq, port->pool_long->id); @@ -3730,9 +3717,7 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port) if (!port->pool_short) return -ENOMEM; - spin_lock_irqsave(&port->pool_short->lock, flags); port->pool_short->port_map |= (1 << port->id); - spin_unlock_irqrestore(&port->pool_short->lock, flags); for (rxq = 0; rxq < rxq_number; rxq++) mvpp2_rxq_short_pool_set(port, rxq, -- cgit v1.2.3 From 71ce391dfb7843f4d31abcd7287967a00cb1b8a1 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Thu, 6 Aug 2015 19:00:29 +0200 Subject: net: mvpp2: enable proper per-CPU TX buffers unmapping mvpp2 driver allows usage of per-CPU TX processing. Once the packets are prepared independetly on each CPU, the hardware enqueues the descriptors in common TX queue. After they are sent, the buffers and associated sk_buffs should be released on the corresponding CPU. This is why a special index is maintained in order to point to the right data to be released after transmission takes place. Each per-CPU TX queue comprise an array of sent sk_buffs, freed in mvpp2_txq_bufs_free function. However, the index was used there also for obtaining a descriptor (and therefore a buffer to be DMA-unmapped) from common TX queue, which was wrong, because it was not referring to the current CPU. This commit enables proper unmapping of sent data buffers by indexing them in per-CPU queues using a dedicated array for keeping their physical addresses. Signed-off-by: Marcin Wojtas Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 52 +++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index f94bd122f0bd..3e25d31414bc 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -776,6 +776,9 @@ struct mvpp2_txq_pcpu { /* Array of transmitted skb */ struct sk_buff **tx_skb; + /* Array of transmitted buffers' physical addresses */ + dma_addr_t *tx_buffs; + /* Index of last TX DMA descriptor that was inserted */ int txq_put_index; @@ -961,9 +964,13 @@ static void mvpp2_txq_inc_get(struct mvpp2_txq_pcpu *txq_pcpu) } static void mvpp2_txq_inc_put(struct mvpp2_txq_pcpu *txq_pcpu, - struct sk_buff *skb) + struct sk_buff *skb, + struct mvpp2_tx_desc *tx_desc) { txq_pcpu->tx_skb[txq_pcpu->txq_put_index] = skb; + if (skb) + txq_pcpu->tx_buffs[txq_pcpu->txq_put_index] = + tx_desc->buf_phys_addr; txq_pcpu->txq_put_index++; if (txq_pcpu->txq_put_index == txq_pcpu->size) txq_pcpu->txq_put_index = 0; @@ -4392,8 +4399,8 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port, int i; for (i = 0; i < num; i++) { - struct mvpp2_tx_desc *tx_desc = txq->descs + - txq_pcpu->txq_get_index; + dma_addr_t buf_phys_addr = + txq_pcpu->tx_buffs[txq_pcpu->txq_get_index]; struct sk_buff *skb = txq_pcpu->tx_skb[txq_pcpu->txq_get_index]; mvpp2_txq_inc_get(txq_pcpu); @@ -4401,8 +4408,8 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port, if (!skb) continue; - dma_unmap_single(port->dev->dev.parent, tx_desc->buf_phys_addr, - tx_desc->data_size, DMA_TO_DEVICE); + dma_unmap_single(port->dev->dev.parent, buf_phys_addr, + skb_headlen(skb), DMA_TO_DEVICE); dev_kfree_skb_any(skb); } } @@ -4634,12 +4641,13 @@ static int mvpp2_txq_init(struct mvpp2_port *port, txq_pcpu->tx_skb = kmalloc(txq_pcpu->size * sizeof(*txq_pcpu->tx_skb), GFP_KERNEL); - if (!txq_pcpu->tx_skb) { - dma_free_coherent(port->dev->dev.parent, - txq->size * MVPP2_DESC_ALIGNED_SIZE, - txq->descs, txq->descs_phys); - return -ENOMEM; - } + if (!txq_pcpu->tx_skb) + goto error; + + txq_pcpu->tx_buffs = kmalloc(txq_pcpu->size * + sizeof(dma_addr_t), GFP_KERNEL); + if (!txq_pcpu->tx_buffs) + goto error; txq_pcpu->count = 0; txq_pcpu->reserved_num = 0; @@ -4648,6 +4656,19 @@ static int mvpp2_txq_init(struct mvpp2_port *port, } return 0; + +error: + for_each_present_cpu(cpu) { + txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); + kfree(txq_pcpu->tx_skb); + kfree(txq_pcpu->tx_buffs); + } + + dma_free_coherent(port->dev->dev.parent, + txq->size * MVPP2_DESC_ALIGNED_SIZE, + txq->descs, txq->descs_phys); + + return -ENOMEM; } /* Free allocated TXQ resources */ @@ -4660,6 +4681,7 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port, for_each_present_cpu(cpu) { txq_pcpu = per_cpu_ptr(txq->pcpu, cpu); kfree(txq_pcpu->tx_skb); + kfree(txq_pcpu->tx_buffs); } if (txq->descs) @@ -5129,11 +5151,11 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb, if (i == (skb_shinfo(skb)->nr_frags - 1)) { /* Last descriptor */ tx_desc->command = MVPP2_TXD_L_DESC; - mvpp2_txq_inc_put(txq_pcpu, skb); + mvpp2_txq_inc_put(txq_pcpu, skb, tx_desc); } else { /* Descriptor in the middle: Not First, Not Last */ tx_desc->command = 0; - mvpp2_txq_inc_put(txq_pcpu, NULL); + mvpp2_txq_inc_put(txq_pcpu, NULL, tx_desc); } } @@ -5199,12 +5221,12 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev) /* First and Last descriptor */ tx_cmd |= MVPP2_TXD_F_DESC | MVPP2_TXD_L_DESC; tx_desc->command = tx_cmd; - mvpp2_txq_inc_put(txq_pcpu, skb); + mvpp2_txq_inc_put(txq_pcpu, skb, tx_desc); } else { /* First but not Last */ tx_cmd |= MVPP2_TXD_F_DESC | MVPP2_TXD_PADDING_DISABLE; tx_desc->command = tx_cmd; - mvpp2_txq_inc_put(txq_pcpu, NULL); + mvpp2_txq_inc_put(txq_pcpu, NULL, tx_desc); /* Continue with other skb fragments */ if (mvpp2_tx_frag_process(port, skb, aggr_txq, txq)) { -- cgit v1.2.3 From edc660fa09e2295b6ee2d2bf742c2a72dfeb18d2 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Thu, 6 Aug 2015 19:00:30 +0200 Subject: net: mvpp2: replace TX coalescing interrupts with hrtimer The PP2 controller is capable of per-CPU TX processing, which means there are per-CPU banked register sets and queues. Current version of the driver supports TX packet coalescing - once on given CPU sent packets amount reaches a threshold value, an IRQ occurs. However, there is a single interrupt line responsible for CPU0/1 TX and RX events (the latter is not per-CPU, the hardware does not support RSS). When the top-half executes the interrupt cause is not known. This is why in NAPI poll function, along with RX processing, IRQ cause register on both CPU's is accessed in order to determine on which of them the TX coalescing threshold might have been reached. Thus the egress processing and releasing the buffers is able to take place on the corresponding CPU. Hitherto approach lead to an illegal usage of on_each_cpu function in softirq context. The problem is solved by resigning from TX coalescing interrupts and separating egress finalization from NAPI processing. For that purpose a method of using hrtimer is introduced. In main transmit function (mvpp2_tx) buffers are released once a software coalescing threshold is reached. In case not all the data is processed a timer is set on this CPU - in its interrupt context a tasklet is scheduled in which all queues are processed. At once only one timer per-CPU can be running, which is controlled by a dedicated flag. This commit removes TX processing from NAPI polling function, disables hardware coalescing and enables hrtimer with tasklet, using new per-CPU port structure (mvpp2_port_pcpu). Signed-off-by: Marcin Wojtas Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 177 +++++++++++++++++++++++++---------- 1 file changed, 130 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 3e25d31414bc..d9884fd15b45 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include #include @@ -299,6 +301,7 @@ /* Coalescing */ #define MVPP2_TXDONE_COAL_PKTS_THRESH 15 +#define MVPP2_TXDONE_HRTIMER_PERIOD_NS 1000000UL #define MVPP2_RX_COAL_PKTS 32 #define MVPP2_RX_COAL_USEC 100 @@ -660,6 +663,14 @@ struct mvpp2_pcpu_stats { u64 tx_bytes; }; +/* Per-CPU port control */ +struct mvpp2_port_pcpu { + struct hrtimer tx_done_timer; + bool timer_scheduled; + /* Tasklet for egress finalization */ + struct tasklet_struct tx_done_tasklet; +}; + struct mvpp2_port { u8 id; @@ -679,6 +690,9 @@ struct mvpp2_port { u32 pending_cause_rx; struct napi_struct napi; + /* Per-CPU port control */ + struct mvpp2_port_pcpu __percpu *pcpu; + /* Flags */ unsigned long flags; @@ -3798,7 +3812,6 @@ static void mvpp2_interrupts_unmask(void *arg) mvpp2_write(port->priv, MVPP2_ISR_RX_TX_MASK_REG(port->id), (MVPP2_CAUSE_MISC_SUM_MASK | - MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK | MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK)); } @@ -4374,23 +4387,6 @@ static void mvpp2_rx_time_coal_set(struct mvpp2_port *port, rxq->time_coal = usec; } -/* Set threshold for TX_DONE pkts coalescing */ -static void mvpp2_tx_done_pkts_coal_set(void *arg) -{ - struct mvpp2_port *port = arg; - int queue; - u32 val; - - for (queue = 0; queue < txq_number; queue++) { - struct mvpp2_tx_queue *txq = port->txqs[queue]; - - val = (txq->done_pkts_coal << MVPP2_TRANSMITTED_THRESH_OFFSET) & - MVPP2_TRANSMITTED_THRESH_MASK; - mvpp2_write(port->priv, MVPP2_TXQ_NUM_REG, txq->id); - mvpp2_write(port->priv, MVPP2_TXQ_THRESH_REG, val); - } -} - /* Free Tx queue skbuffs */ static void mvpp2_txq_bufs_free(struct mvpp2_port *port, struct mvpp2_tx_queue *txq, @@ -4425,7 +4421,7 @@ static inline struct mvpp2_rx_queue *mvpp2_get_rx_queue(struct mvpp2_port *port, static inline struct mvpp2_tx_queue *mvpp2_get_tx_queue(struct mvpp2_port *port, u32 cause) { - int queue = fls(cause >> 16) - 1; + int queue = fls(cause) - 1; return port->txqs[queue]; } @@ -4452,6 +4448,29 @@ static void mvpp2_txq_done(struct mvpp2_port *port, struct mvpp2_tx_queue *txq, netif_tx_wake_queue(nq); } +static unsigned int mvpp2_tx_done(struct mvpp2_port *port, u32 cause) +{ + struct mvpp2_tx_queue *txq; + struct mvpp2_txq_pcpu *txq_pcpu; + unsigned int tx_todo = 0; + + while (cause) { + txq = mvpp2_get_tx_queue(port, cause); + if (!txq) + break; + + txq_pcpu = this_cpu_ptr(txq->pcpu); + + if (txq_pcpu->count) { + mvpp2_txq_done(port, txq, txq_pcpu); + tx_todo += txq_pcpu->count; + } + + cause &= ~(1 << txq->log_id); + } + return tx_todo; +} + /* Rx/Tx queue initialization/cleanup methods */ /* Allocate and initialize descriptors for aggr TXQ */ @@ -4812,7 +4831,6 @@ static int mvpp2_setup_txqs(struct mvpp2_port *port) goto err_cleanup; } - on_each_cpu(mvpp2_tx_done_pkts_coal_set, port, 1); on_each_cpu(mvpp2_txq_sent_counter_clear, port, 1); return 0; @@ -4894,6 +4912,49 @@ static void mvpp2_link_event(struct net_device *dev) } } +static void mvpp2_timer_set(struct mvpp2_port_pcpu *port_pcpu) +{ + ktime_t interval; + + if (!port_pcpu->timer_scheduled) { + port_pcpu->timer_scheduled = true; + interval = ktime_set(0, MVPP2_TXDONE_HRTIMER_PERIOD_NS); + hrtimer_start(&port_pcpu->tx_done_timer, interval, + HRTIMER_MODE_REL_PINNED); + } +} + +static void mvpp2_tx_proc_cb(unsigned long data) +{ + struct net_device *dev = (struct net_device *)data; + struct mvpp2_port *port = netdev_priv(dev); + struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu); + unsigned int tx_todo, cause; + + if (!netif_running(dev)) + return; + port_pcpu->timer_scheduled = false; + + /* Process all the Tx queues */ + cause = (1 << txq_number) - 1; + tx_todo = mvpp2_tx_done(port, cause); + + /* Set the timer in case not all the packets were processed */ + if (tx_todo) + mvpp2_timer_set(port_pcpu); +} + +static enum hrtimer_restart mvpp2_hr_timer_cb(struct hrtimer *timer) +{ + struct mvpp2_port_pcpu *port_pcpu = container_of(timer, + struct mvpp2_port_pcpu, + tx_done_timer); + + tasklet_schedule(&port_pcpu->tx_done_tasklet); + + return HRTIMER_NORESTART; +} + /* Main RX/TX processing routines */ /* Display more error info */ @@ -5262,6 +5323,17 @@ out: dev_kfree_skb_any(skb); } + /* Finalize TX processing */ + if (txq_pcpu->count >= txq->done_pkts_coal) + mvpp2_txq_done(port, txq, txq_pcpu); + + /* Set the timer in case not all frags were processed */ + if (txq_pcpu->count <= frags && txq_pcpu->count > 0) { + struct mvpp2_port_pcpu *port_pcpu = this_cpu_ptr(port->pcpu); + + mvpp2_timer_set(port_pcpu); + } + return NETDEV_TX_OK; } @@ -5275,10 +5347,11 @@ static inline void mvpp2_cause_error(struct net_device *dev, int cause) netdev_err(dev, "tx fifo underrun error\n"); } -static void mvpp2_txq_done_percpu(void *arg) +static int mvpp2_poll(struct napi_struct *napi, int budget) { - struct mvpp2_port *port = arg; - u32 cause_rx_tx, cause_tx, cause_misc; + u32 cause_rx_tx, cause_rx, cause_misc; + int rx_done = 0; + struct mvpp2_port *port = netdev_priv(napi->dev); /* Rx/Tx cause register * @@ -5292,7 +5365,7 @@ static void mvpp2_txq_done_percpu(void *arg) */ cause_rx_tx = mvpp2_read(port->priv, MVPP2_ISR_RX_TX_CAUSE_REG(port->id)); - cause_tx = cause_rx_tx & MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK; + cause_rx_tx &= ~MVPP2_CAUSE_TXQ_OCCUP_DESC_ALL_MASK; cause_misc = cause_rx_tx & MVPP2_CAUSE_MISC_SUM_MASK; if (cause_misc) { @@ -5304,26 +5377,6 @@ static void mvpp2_txq_done_percpu(void *arg) cause_rx_tx & ~MVPP2_CAUSE_MISC_SUM_MASK); } - /* Release TX descriptors */ - if (cause_tx) { - struct mvpp2_tx_queue *txq = mvpp2_get_tx_queue(port, cause_tx); - struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu); - - if (txq_pcpu->count) - mvpp2_txq_done(port, txq, txq_pcpu); - } -} - -static int mvpp2_poll(struct napi_struct *napi, int budget) -{ - u32 cause_rx_tx, cause_rx; - int rx_done = 0; - struct mvpp2_port *port = netdev_priv(napi->dev); - - on_each_cpu(mvpp2_txq_done_percpu, port, 1); - - cause_rx_tx = mvpp2_read(port->priv, - MVPP2_ISR_RX_TX_CAUSE_REG(port->id)); cause_rx = cause_rx_tx & MVPP2_CAUSE_RXQ_OCCUP_DESC_ALL_MASK; /* Process RX packets */ @@ -5568,6 +5621,8 @@ err_cleanup_rxqs: static int mvpp2_stop(struct net_device *dev) { struct mvpp2_port *port = netdev_priv(dev); + struct mvpp2_port_pcpu *port_pcpu; + int cpu; mvpp2_stop_dev(port); mvpp2_phy_disconnect(port); @@ -5576,6 +5631,13 @@ static int mvpp2_stop(struct net_device *dev) on_each_cpu(mvpp2_interrupts_mask, port, 1); free_irq(port->irq, port); + for_each_present_cpu(cpu) { + port_pcpu = per_cpu_ptr(port->pcpu, cpu); + + hrtimer_cancel(&port_pcpu->tx_done_timer); + port_pcpu->timer_scheduled = false; + tasklet_kill(&port_pcpu->tx_done_tasklet); + } mvpp2_cleanup_rxqs(port); mvpp2_cleanup_txqs(port); @@ -5791,7 +5853,6 @@ static int mvpp2_ethtool_set_coalesce(struct net_device *dev, txq->done_pkts_coal = c->tx_max_coalesced_frames; } - on_each_cpu(mvpp2_tx_done_pkts_coal_set, port, 1); return 0; } @@ -6042,6 +6103,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, { struct device_node *phy_node; struct mvpp2_port *port; + struct mvpp2_port_pcpu *port_pcpu; struct net_device *dev; struct resource *res; const char *dt_mac_addr; @@ -6051,7 +6113,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, int features; int phy_mode; int priv_common_regs_num = 2; - int err, i; + int err, i, cpu; dev = alloc_etherdev_mqs(sizeof(struct mvpp2_port), txq_number, rxq_number); @@ -6142,6 +6204,24 @@ static int mvpp2_port_probe(struct platform_device *pdev, } mvpp2_port_power_up(port); + port->pcpu = alloc_percpu(struct mvpp2_port_pcpu); + if (!port->pcpu) { + err = -ENOMEM; + goto err_free_txq_pcpu; + } + + for_each_present_cpu(cpu) { + port_pcpu = per_cpu_ptr(port->pcpu, cpu); + + hrtimer_init(&port_pcpu->tx_done_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_PINNED); + port_pcpu->tx_done_timer.function = mvpp2_hr_timer_cb; + port_pcpu->timer_scheduled = false; + + tasklet_init(&port_pcpu->tx_done_tasklet, mvpp2_tx_proc_cb, + (unsigned long)dev); + } + netif_napi_add(dev, &port->napi, mvpp2_poll, NAPI_POLL_WEIGHT); features = NETIF_F_SG | NETIF_F_IP_CSUM; dev->features = features | NETIF_F_RXCSUM; @@ -6151,7 +6231,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, err = register_netdev(dev); if (err < 0) { dev_err(&pdev->dev, "failed to register netdev\n"); - goto err_free_txq_pcpu; + goto err_free_port_pcpu; } netdev_info(dev, "Using %s mac address %pM\n", mac_from, dev->dev_addr); @@ -6160,6 +6240,8 @@ static int mvpp2_port_probe(struct platform_device *pdev, priv->port_list[id] = port; return 0; +err_free_port_pcpu: + free_percpu(port->pcpu); err_free_txq_pcpu: for (i = 0; i < txq_number; i++) free_percpu(port->txqs[i]->pcpu); @@ -6178,6 +6260,7 @@ static void mvpp2_port_remove(struct mvpp2_port *port) int i; unregister_netdev(port->dev); + free_percpu(port->pcpu); free_percpu(port->stats); for (i = 0; i < txq_number; i++) free_percpu(port->txqs[i]->pcpu); -- cgit v1.2.3 From ade4dc3e616e33c80d7e62855fe1b6f9895bc7c3 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 6 Aug 2015 22:48:23 +0200 Subject: bna: fix interrupts storm caused by erroneous packets The commit "e29aa33 bna: Enable Multi Buffer RX" moved packets counter increment from the beginning of the NAPI processing loop after the check for erroneous packets so they are never accounted. This counter is used to inform firmware about number of processed completions (packets). As these packets are never acked the firmware fires IRQs for them again and again. Fixes: e29aa33 ("bna: Enable Multi Buffer RX") Signed-off-by: Ivan Vecera Acked-by: Rasesh Mody Signed-off-by: David S. Miller --- drivers/net/ethernet/brocade/bna/bnad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 0612b19f6313..506047c38607 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -676,6 +676,7 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget) if (!next_cmpl->valid) break; } + packets++; /* TODO: BNA_CQ_EF_LOCAL ? */ if (unlikely(flags & (BNA_CQ_EF_MAC_ERROR | @@ -692,7 +693,6 @@ bnad_cq_process(struct bnad *bnad, struct bna_ccb *ccb, int budget) else bnad_cq_setup_skb_frags(rcb, skb, sop_ci, nvecs, len); - packets++; rcb->rxq->rx_packets++; rcb->rxq->rx_bytes += totlen; ccb->bytes_per_intr += totlen; -- cgit v1.2.3 From 4e7c1330689e27556de407d3fdadc65ffff5eb12 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 7 Aug 2015 00:26:41 +0200 Subject: netlink: make sure -EBUSY won't escape from netlink_insert Linus reports the following deadlock on rtnl_mutex; triggered only once so far (extract): [12236.694209] NetworkManager D 0000000000013b80 0 1047 1 0x00000000 [12236.694218] ffff88003f902640 0000000000000000 ffffffff815d15a9 0000000000000018 [12236.694224] ffff880119538000 ffff88003f902640 ffffffff81a8ff84 00000000ffffffff [12236.694230] ffffffff81a8ff88 ffff880119c47f00 ffffffff815d133a ffffffff81a8ff80 [12236.694235] Call Trace: [12236.694250] [] ? schedule_preempt_disabled+0x9/0x10 [12236.694257] [] ? schedule+0x2a/0x70 [12236.694263] [] ? schedule_preempt_disabled+0x9/0x10 [12236.694271] [] ? __mutex_lock_slowpath+0x7f/0xf0 [12236.694280] [] ? mutex_lock+0x16/0x30 [12236.694291] [] ? rtnetlink_rcv+0x10/0x30 [12236.694299] [] ? netlink_unicast+0xfb/0x180 [12236.694309] [] ? rtnl_getlink+0x113/0x190 [12236.694319] [] ? rtnetlink_rcv_msg+0x7a/0x210 [12236.694331] [] ? sock_has_perm+0x5c/0x70 [12236.694339] [] ? rtnetlink_rcv+0x30/0x30 [12236.694346] [] ? netlink_rcv_skb+0x9c/0xc0 [12236.694354] [] ? rtnetlink_rcv+0x1f/0x30 [12236.694360] [] ? netlink_unicast+0xfb/0x180 [12236.694367] [] ? netlink_sendmsg+0x484/0x5d0 [12236.694376] [] ? __wake_up+0x2f/0x50 [12236.694387] [] ? sock_sendmsg+0x33/0x40 [12236.694396] [] ? ___sys_sendmsg+0x22e/0x240 [12236.694405] [] ? ___sys_recvmsg+0x135/0x1a0 [12236.694415] [] ? eventfd_write+0x82/0x210 [12236.694423] [] ? fsnotify+0x32e/0x4c0 [12236.694429] [] ? wake_up_q+0x60/0x60 [12236.694434] [] ? __sys_sendmsg+0x39/0x70 [12236.694440] [] ? entry_SYSCALL_64_fastpath+0x12/0x6a It seems so far plausible that the recursive call into rtnetlink_rcv() looks suspicious. One way, where this could trigger is that the senders NETLINK_CB(skb).portid was wrongly 0 (which is rtnetlink socket), so the rtnl_getlink() request's answer would be sent to the kernel instead to the actual user process, thus grabbing rtnl_mutex() twice. One theory would be that netlink_autobind() triggered via netlink_sendmsg() internally overwrites the -EBUSY error to 0, but where it is wrongly originating from __netlink_insert() instead. That would reset the socket's portid to 0, which is then filled into NETLINK_CB(skb).portid later on. As commit d470e3b483dc ("[NETLINK]: Fix two socket hashing bugs.") also puts it, -EBUSY should not be propagated from netlink_insert(). It looks like it's very unlikely to reproduce. We need to trigger the rhashtable_insert_rehash() handler under a situation where rehashing currently occurs (one /rare/ way would be to hit ht->elasticity limits while not filled enough to expand the hashtable, but that would rather require a specifically crafted bind() sequence with knowledge about destination slots, seems unlikely). It probably makes sense to guard __netlink_insert() in any case and remap that error. It was suggested that EOVERFLOW might be better than an already overloaded ENOMEM. Reference: http://thread.gmane.org/gmane.linux.network/372676 Reported-by: Linus Torvalds Signed-off-by: Daniel Borkmann Acked-by: Herbert Xu Acked-by: Thomas Graf Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index d8e2e3918ce2..67d210477863 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1096,6 +1096,11 @@ static int netlink_insert(struct sock *sk, u32 portid) err = __netlink_insert(table, sk); if (err) { + /* In case the hashtable backend returns with -EBUSY + * from here, it must not escape to the caller. + */ + if (unlikely(err == -EBUSY)) + err = -EOVERFLOW; if (err == -EEXIST) err = -EADDRINUSE; nlk_sk(sk)->portid = 0; -- cgit v1.2.3 From 330567b71d8716704b189454553c2696e1eceb6c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 7 Aug 2015 10:54:28 +0200 Subject: ipv6: don't reject link-local nexthop on other interface 48ed7b26faa7 ("ipv6: reject locally assigned nexthop addresses") is too strict; it rejects following corner-case: ip -6 route add default via fe80::1:2:3 dev eth1 [ where fe80::1:2:3 is assigned to a local interface, but not eth1 ] Fix this by restricting search to given device if nh is linklocal. Joint work with Hannes Frederic Sowa. Fixes: 48ed7b26faa7 ("ipv6: reject locally assigned nexthop addresses") Signed-off-by: Hannes Frederic Sowa Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6090969937f8..9de4d2bcd916 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1831,6 +1831,7 @@ int ip6_route_add(struct fib6_config *cfg) int gwa_type; gw_addr = &cfg->fc_gateway; + gwa_type = ipv6_addr_type(gw_addr); /* if gw_addr is local we will fail to detect this in case * address is still TENTATIVE (DAD in progress). rt6_lookup() @@ -1838,11 +1839,12 @@ int ip6_route_add(struct fib6_config *cfg) * prefix route was assigned to, which might be non-loopback. */ err = -EINVAL; - if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0)) + if (ipv6_chk_addr_and_flags(net, gw_addr, + gwa_type & IPV6_ADDR_LINKLOCAL ? + dev : NULL, 0, 0)) goto out; rt->rt6i_gateway = *gw_addr; - gwa_type = ipv6_addr_type(gw_addr); if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { struct rt6_info *grt; -- cgit v1.2.3 From 7a76a021cd5a292be875fbc616daf03eab1e6996 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 7 Aug 2015 09:32:21 -0700 Subject: net-timestamp: Update skb_complete_tx_timestamp comment After "62bccb8 net-timestamp: Make the clone operation stand-alone from phy timestamping" the hwtstamps parameter of skb_complete_tx_timestamp() may no longer be NULL. Signed-off-by: Benjamin Poirier Cc: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d6cdd6e87d53..22b6d9ca1654 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2884,11 +2884,11 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) * * PHY drivers may accept clones of transmitted packets for * timestamping via their phy_driver.txtstamp method. These drivers - * must call this function to return the skb back to the stack, with - * or without a timestamp. + * must call this function to return the skb back to the stack with a + * timestamp. * * @skb: clone of the the original outgoing packet - * @hwtstamps: hardware time stamps, may be NULL if not available + * @hwtstamps: hardware time stamps * */ void skb_complete_tx_timestamp(struct sk_buff *skb, -- cgit v1.2.3 From 21a447637d28eb824a1163c1fc5f41ffa4b28e33 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 8 Aug 2015 22:15:25 +0300 Subject: cxgb4: missing curly braces in t4_setup_debugfs() There were missing curly braces so it means we call add_debugfs_mem() unintentionally. Fixes: 3ccc6cf74d8c ('cxgb4: Adds support for T6 adapter') Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index a11485fbb33f..c3c7db41819d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2332,10 +2332,11 @@ int t4_setup_debugfs(struct adapter *adap) EXT_MEM1_SIZE_G(size)); } } else { - if (i & EXT_MEM_ENABLE_F) + if (i & EXT_MEM_ENABLE_F) { size = t4_read_reg(adap, MA_EXT_MEMORY_BAR_A); add_debugfs_mem(adap, "mc", MEM_MC, EXT_MEM_SIZE_G(size)); + } } de = debugfs_create_file_size("flash", S_IRUSR, adap->debugfs_root, adap, -- cgit v1.2.3 From e1615903eb6b5e599396d4b3d8e3e96f6d432a6e Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 10 Aug 2015 12:49:35 +0300 Subject: bnx2x: Prevent null pointer dereference on SKB release On error flows its possible to free an SKB even if it was not allocated. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index a90d7364334f..f7fbdc9d1325 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -262,9 +262,9 @@ static u16 bnx2x_free_tx_pkt(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata, if (likely(skb)) { (*pkts_compl)++; (*bytes_compl) += skb->len; + dev_kfree_skb_any(skb); } - dev_kfree_skb_any(skb); tx_buf->first_bd = 0; tx_buf->skb = NULL; -- cgit v1.2.3 From 0ea853dfa93371e651d8b7b27fd2344e973a86ed Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 10 Aug 2015 12:49:36 +0300 Subject: bnx2x: Free NVRAM lock at end of each page Writing each 4Kb page into flash might take up-to ~100 miliseconds, during which time management firmware cannot acces the nvram for its own uses. Firmware upgrade utility use the ethtool API to burn new flash images for the device via the ethtool API, doing so by writing several page-worth of data on each command. Such action might create problems for the management firmware, as the nvram might not be accessible for a long time. This patch changes the write implementation, releasing the nvram lock on the completion of each page, allowing the management firmware time to claim it and perform its own required actions. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 76b9052a961c..5907c821d131 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -1718,6 +1718,22 @@ static int bnx2x_nvram_write(struct bnx2x *bp, u32 offset, u8 *data_buf, offset += sizeof(u32); data_buf += sizeof(u32); written_so_far += sizeof(u32); + + /* At end of each 4Kb page, release nvram lock to allow MFW + * chance to take it for its own use. + */ + if ((cmd_flags & MCPR_NVM_COMMAND_LAST) && + (written_so_far < buf_size)) { + DP(BNX2X_MSG_ETHTOOL | BNX2X_MSG_NVM, + "Releasing NVM lock after offset 0x%x\n", + (u32)(offset - sizeof(u32))); + bnx2x_release_nvram_lock(bp); + usleep_range(1000, 2000); + rc = bnx2x_acquire_nvram_lock(bp); + if (rc) + return rc; + } + cmd_flags = 0; } -- cgit v1.2.3 From 9d332d92a95c9c67abe08b5f7cba64d8fc1e3c76 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 10 Aug 2015 14:22:43 -0300 Subject: mkiss: Fix error handling in mkiss_open() If register_netdev() fails we are not propagating the error and we return success because ax_open() succeeded previously. Fix this by checking the return value of ax_open() and register_netdev() and propagate the error in case of failure. Reported-by: RUC_Soft_Sec Signed-off-by: Fabio Estevam Signed-off-by: David S. Miller --- drivers/net/hamradio/mkiss.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 2ffbf13471d0..216bfd350169 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -728,11 +728,12 @@ static int mkiss_open(struct tty_struct *tty) dev->type = ARPHRD_AX25; /* Perform the low-level AX25 initialization. */ - if ((err = ax_open(ax->dev))) { + err = ax_open(ax->dev); + if (err) goto out_free_netdev; - } - if (register_netdev(dev)) + err = register_netdev(dev); + if (err) goto out_free_buffers; /* after register_netdev() - because else printk smashes the kernel */ -- cgit v1.2.3 From 2235f2ac75fd2501c251b0b699a9632e80239a6d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Aug 2015 09:09:13 -0700 Subject: inet: fix races with reqsk timers reqsk_queue_destroy() and reqsk_queue_unlink() should use del_timer_sync() instead of del_timer() before calling reqsk_put(), otherwise we could free a req still used by another cpu. But before doing so, reqsk_queue_destroy() must release syn_wait_lock spinlock or risk a dead lock, as reqsk_timer_handler() might need to take this same spinlock from reqsk_queue_unlink() (called from inet_csk_reqsk_queue_drop()) Fixes: fa76ce7328b2 ("inet: get rid of central tcp/dccp listener timer") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/request_sock.c | 8 +++++++- net/ipv4/inet_connection_sock.c | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 87b22c0bc08c..b42f0e26f89e 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -103,10 +103,16 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) spin_lock_bh(&queue->syn_wait_lock); while ((req = lopt->syn_table[i]) != NULL) { lopt->syn_table[i] = req->dl_next; + /* Because of following del_timer_sync(), + * we must release the spinlock here + * or risk a dead lock. + */ + spin_unlock_bh(&queue->syn_wait_lock); atomic_inc(&lopt->qlen_dec); - if (del_timer(&req->rsk_timer)) + if (del_timer_sync(&req->rsk_timer)) reqsk_put(req); reqsk_put(req); + spin_lock_bh(&queue->syn_wait_lock); } spin_unlock_bh(&queue->syn_wait_lock); } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 60021d0d9326..05e3145f7dc3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -593,7 +593,7 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue, } spin_unlock(&queue->syn_wait_lock); - if (del_timer(&req->rsk_timer)) + if (del_timer_sync(&req->rsk_timer)) reqsk_put(req); return found; } -- cgit v1.2.3 From 3257d8b12f954c462d29de6201664a846328a522 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 10 Aug 2015 15:07:34 -0700 Subject: inet: fix possible request socket leak In commit b357a364c57c9 ("inet: fix possible panic in reqsk_queue_unlink()"), I missed fact that tcp_check_req() can return the listener socket in one case, and that we must release the request socket refcount or we leak it. Tested: Following packetdrill test template shows the issue 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 +0 < S 0:0(0) win 2920 +0 > S. 0:0(0) ack 1 +.002 < . 1:1(0) ack 21 win 2920 +0 > R 21:21(0) Fixes: b357a364c57c9 ("inet: fix possible panic in reqsk_queue_unlink()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d7d4c2b79cf2..0ea2e1c5d395 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1348,7 +1348,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); if (req) { nsk = tcp_check_req(sk, skb, req, false); - if (!nsk) + if (!nsk || nsk == sk) reqsk_put(req); return nsk; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6748c4277aff..7a6cea5e4274 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -943,7 +943,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); if (req) { nsk = tcp_check_req(sk, skb, req, false); - if (!nsk) + if (!nsk || nsk == sk) reqsk_put(req); return nsk; } -- cgit v1.2.3 From 8961822c46cc363c239503f998a6d24bbeb346d5 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Tue, 11 Aug 2015 12:11:00 +0200 Subject: net: fs_enet: explicitly remove I flag on TX partial frames We are not interested in interrupts for partially transmitted frames, we have to clear BD_ENET_TX_INTR explicitly otherwise it may remain from a previously used descriptor. Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 56316db6c5a6..cf8e54652df9 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -586,7 +586,8 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) frag = skb_shinfo(skb)->frags; while (nr_frags) { CBDC_SC(bdp, - BD_ENET_TX_STATS | BD_ENET_TX_LAST | BD_ENET_TX_TC); + BD_ENET_TX_STATS | BD_ENET_TX_INTR | BD_ENET_TX_LAST | + BD_ENET_TX_TC); CBDS_SC(bdp, BD_ENET_TX_READY); if ((CBDR_SC(bdp) & BD_ENET_TX_WRAP) == 0) -- cgit v1.2.3 From c68875fa82a8ab2f45a32aa8adab059f3cb1ed01 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Tue, 11 Aug 2015 12:11:03 +0200 Subject: net: fs_enet: mask interrupts for TX partial frames. We are not interested in interrupts for partially transmitted frames. Unlike SCC and FCC, the FEC doesn't handle the I bit in buffer descriptors, instead it defines two interrupt bits, TXB and TXF. We have to mask TXB in order to only get interrupts once the frame is fully transmitted. Signed-off-by: Christophe Leroy Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fs_enet/mac-fec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index b34214e2df5f..016743e355de 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -110,7 +110,7 @@ static int do_pd_setup(struct fs_enet_private *fep) } #define FEC_NAPI_RX_EVENT_MSK (FEC_ENET_RXF | FEC_ENET_RXB) -#define FEC_NAPI_TX_EVENT_MSK (FEC_ENET_TXF | FEC_ENET_TXB) +#define FEC_NAPI_TX_EVENT_MSK (FEC_ENET_TXF) #define FEC_RX_EVENT (FEC_ENET_RXF) #define FEC_TX_EVENT (FEC_ENET_TXF) #define FEC_ERR_EVENT_MSK (FEC_ENET_HBERR | FEC_ENET_BABR | \ -- cgit v1.2.3 From 211c504a444710b1d8ce3431ac19f2578602ca27 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sat, 8 Aug 2015 12:58:57 -0700 Subject: net: dsa: Do not override PHY interface if already configured In case we need to divert reads/writes using the slave MII bus, we may have already fetched a valid PHY interface property from Device Tree, and that mode is used by the PHY driver to make configuration decisions. If we could not fetch the "phy-mode" property, we will assign p->phy_interface to PHY_INTERFACE_MODE_NA, such that we can actually check for that condition as to whether or not we should override the interface value. Fixes: 19334920eaf7 ("net: dsa: Set valid phy interface type") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/slave.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 0917123790ea..35c47ddd04f0 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -756,7 +756,8 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p, return -ENODEV; /* Use already configured phy mode */ - p->phy_interface = p->phy->interface; + if (p->phy_interface == PHY_INTERFACE_MODE_NA) + p->phy_interface = p->phy->interface; phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, p->phy_interface); -- cgit v1.2.3 From b02e3e948de6c11fded1821d89012e24d953da12 Mon Sep 17 00:00:00 2001 From: Venkat Venkatsubra Date: Tue, 11 Aug 2015 07:57:23 -0700 Subject: bonding: Gratuitous ARP gets dropped when first slave added When the first slave is added (such as during bootup) the first gratuitous ARP gets dropped. We don't see this drop during a failover. The packet gets dropped in qdisc (noop_enqueue). The fix is to delay the sending of gratuitous ARPs till the bond dev's carrier is present. It can also be worked around by setting num_grat_arp to more than 1. Signed-off-by: Venkat Venkatsubra Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e1ccefce9a9d..a98dd4f1b0e3 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -786,6 +786,7 @@ static bool bond_should_notify_peers(struct bonding *bond) slave ? slave->dev->name : "NULL"); if (!slave || !bond->send_peer_notif || + !netif_carrier_ok(bond->dev) || test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) return false; -- cgit v1.2.3 From a898fe040f62a32b90e26dc1f1b5973608054b29 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 12 Aug 2015 02:41:55 +0200 Subject: gianfar: correct filer table writing MAX_FILER_IDX is the last usable index. Using less-than will already guarantee that one entry for catch-all rule will be left, no need to subtract 1 here. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar_ethtool.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 3c0a8f825b63..4a710f3eb5eb 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1583,11 +1583,10 @@ static int gfar_write_filer_table(struct gfar_private *priv, return -EBUSY; /* Fill regular entries */ - for (; i < MAX_FILER_IDX - 1 && (tab->fe[i].ctrl | tab->fe[i].prop); - i++) + for (; i < MAX_FILER_IDX && (tab->fe[i].ctrl | tab->fe[i].prop); i++) gfar_write_filer(priv, i, tab->fe[i].ctrl, tab->fe[i].prop); /* Fill the rest with fall-troughs */ - for (; i < MAX_FILER_IDX - 1; i++) + for (; i < MAX_FILER_IDX; i++) gfar_write_filer(priv, i, 0x60, 0xFFFFFFFF); /* Last entry must be default accept * because that's what people expect -- cgit v1.2.3 From b5c8c8906e425f71efb83291c3837e4b78b769ea Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 12 Aug 2015 02:41:56 +0200 Subject: gianfar: correct list membership accounting At a cost of one line let's make sure .count is correct when calling gfar_process_filer_changes(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index 4a710f3eb5eb..f477b67730bb 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1721,13 +1721,14 @@ static int gfar_add_cls(struct gfar_private *priv, } process: + priv->rx_list.count++; ret = gfar_process_filer_changes(priv); if (ret) goto clean_list; - priv->rx_list.count++; return ret; clean_list: + priv->rx_list.count--; list_del(&temp->list); clean_mem: kfree(temp); -- cgit v1.2.3 From 1f2b72933422dfdaa80b59dc3a4c37eef25c4297 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 12 Aug 2015 02:41:57 +0200 Subject: gianfar: remove faulty filer optimizer Current filer rule optimization is broken in several ways: (1) Can perform reads/writes beyond end of allocated tables. (gianfar_ethtool.c:1326). (2) It breaks badly for rules with more than 2 specifiers (e.g. matching ip, port, tos). Example: # ethtool -N eth2 flow-type udp4 dst-ip 10.0.0.1 dst-port 1 tos 1 action 1 Added rule with ID 254 # ethtool -N eth2 flow-type udp4 dst-ip 10.0.0.2 dst-port 2 tos 2 action 9 Added rule with ID 253 # ethtool -N eth2 flow-type udp4 dst-ip 10.0.0.3 dst-port 3 tos 3 action 17 Added rule with ID 252 # ./filer_decode /sys/kernel/debug/gfar1/filer_raw 00: MASK == 00000210 AND Q:00 ctrl:00000080 prop:00000210 01: FPR == 00000210 AND CLE Q:00 ctrl:00000281 prop:00000210 02: MASK == ffffffff AND Q:00 ctrl:00000080 prop:ffffffff 03: DPT == 00000003 AND Q:00 ctrl:0000008e prop:00000003 04: TOS == 00000003 AND Q:00 ctrl:0000008a prop:00000003 05: DIA == 0a000003 AND Q:11 ctrl:0000448c prop:0a000003 06: DPT == 00000002 AND Q:00 ctrl:0000008e prop:00000002 07: TOS == 00000002 AND Q:00 ctrl:0000008a prop:00000002 08: DIA == 0a000002 AND Q:09 ctrl:0000248c prop:0a000002 09: DIA == 0a000001 AND Q:00 ctrl:0000008c prop:0a000001 0a: DPT == 00000001 AND Q:00 ctrl:0000008e prop:00000001 0b: TOS == 00000001 CLE Q:01 ctrl:0000060a prop:00000001 ff: MASK >= 00000000 Q:00 ctrl:00000020 prop:00000000 (Entire cluster gets AND-ed together). (3) We observed that the masking rules it generates do not play well with clustering on P2020. Only first rule of the cluster would ever fire. Given that optimizer relies heavily on masking this is very hard to fix. Example: # ethtool -N eth2 flow-type udp4 dst-ip 10.0.0.1 dst-port 1 action 1 Added rule with ID 254 # ethtool -N eth2 flow-type udp4 dst-ip 10.0.0.2 dst-port 2 action 9 Added rule with ID 253 # ethtool -N eth2 flow-type udp4 dst-ip 10.0.0.3 dst-port 3 action 17 Added rule with ID 252 # ./filer_decode /sys/kernel/debug/gfar1/filer_raw 00: MASK == 00000210 AND Q:00 ctrl:00000080 prop:00000210 01: FPR == 00000210 AND CLE Q:00 ctrl:00000281 prop:00000210 02: MASK == ffffffff AND Q:00 ctrl:00000080 prop:ffffffff 03: DPT == 00000003 AND Q:00 ctrl:0000008e prop:00000003 04: DIA == 0a000003 Q:11 ctrl:0000440c prop:0a000003 05: DPT == 00000002 AND Q:00 ctrl:0000008e prop:00000002 06: DIA == 0a000002 Q:09 ctrl:0000240c prop:0a000002 07: DIA == 0a000001 AND Q:00 ctrl:0000008c prop:0a000001 08: DPT == 00000001 CLE Q:01 ctrl:0000060e prop:00000001 ff: MASK >= 00000000 Q:00 ctrl:00000020 prop:00000000 Which looks correct according to the spec but only the first (eth id 252)/last added rule for 10.0.0.3 will ever trigger. As if filer did not treat the AND CLE as cluster start but also kept AND-ing the rules. We found no errata covering this. The fact that nobody noticed (2) or (3) makes me think that this feature is not very widely used and we should just remove it. Reported-by: Aleksander Dutkowski Signed-off-by: Jakub Kicinski Acked-by: Claudiu Manoil Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar_ethtool.c | 337 ----------------------- 1 file changed, 337 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index f477b67730bb..5b90fcf96265 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -900,27 +900,6 @@ static int gfar_check_filer_hardware(struct gfar_private *priv) return 0; } -static int gfar_comp_asc(const void *a, const void *b) -{ - return memcmp(a, b, 4); -} - -static int gfar_comp_desc(const void *a, const void *b) -{ - return -memcmp(a, b, 4); -} - -static void gfar_swap(void *a, void *b, int size) -{ - u32 *_a = a; - u32 *_b = b; - - swap(_a[0], _b[0]); - swap(_a[1], _b[1]); - swap(_a[2], _b[2]); - swap(_a[3], _b[3]); -} - /* Write a mask to filer cache */ static void gfar_set_mask(u32 mask, struct filer_table *tab) { @@ -1270,310 +1249,6 @@ static int gfar_convert_to_filer(struct ethtool_rx_flow_spec *rule, return 0; } -/* Copy size filer entries */ -static void gfar_copy_filer_entries(struct gfar_filer_entry dst[0], - struct gfar_filer_entry src[0], s32 size) -{ - while (size > 0) { - size--; - dst[size].ctrl = src[size].ctrl; - dst[size].prop = src[size].prop; - } -} - -/* Delete the contents of the filer-table between start and end - * and collapse them - */ -static int gfar_trim_filer_entries(u32 begin, u32 end, struct filer_table *tab) -{ - int length; - - if (end > MAX_FILER_CACHE_IDX || end < begin) - return -EINVAL; - - end++; - length = end - begin; - - /* Copy */ - while (end < tab->index) { - tab->fe[begin].ctrl = tab->fe[end].ctrl; - tab->fe[begin++].prop = tab->fe[end++].prop; - - } - /* Fill up with don't cares */ - while (begin < tab->index) { - tab->fe[begin].ctrl = 0x60; - tab->fe[begin].prop = 0xFFFFFFFF; - begin++; - } - - tab->index -= length; - return 0; -} - -/* Make space on the wanted location */ -static int gfar_expand_filer_entries(u32 begin, u32 length, - struct filer_table *tab) -{ - if (length == 0 || length + tab->index > MAX_FILER_CACHE_IDX || - begin > MAX_FILER_CACHE_IDX) - return -EINVAL; - - gfar_copy_filer_entries(&(tab->fe[begin + length]), &(tab->fe[begin]), - tab->index - length + 1); - - tab->index += length; - return 0; -} - -static int gfar_get_next_cluster_start(int start, struct filer_table *tab) -{ - for (; (start < tab->index) && (start < MAX_FILER_CACHE_IDX - 1); - start++) { - if ((tab->fe[start].ctrl & (RQFCR_AND | RQFCR_CLE)) == - (RQFCR_AND | RQFCR_CLE)) - return start; - } - return -1; -} - -static int gfar_get_next_cluster_end(int start, struct filer_table *tab) -{ - for (; (start < tab->index) && (start < MAX_FILER_CACHE_IDX - 1); - start++) { - if ((tab->fe[start].ctrl & (RQFCR_AND | RQFCR_CLE)) == - (RQFCR_CLE)) - return start; - } - return -1; -} - -/* Uses hardwares clustering option to reduce - * the number of filer table entries - */ -static void gfar_cluster_filer(struct filer_table *tab) -{ - s32 i = -1, j, iend, jend; - - while ((i = gfar_get_next_cluster_start(++i, tab)) != -1) { - j = i; - while ((j = gfar_get_next_cluster_start(++j, tab)) != -1) { - /* The cluster entries self and the previous one - * (a mask) must be identical! - */ - if (tab->fe[i].ctrl != tab->fe[j].ctrl) - break; - if (tab->fe[i].prop != tab->fe[j].prop) - break; - if (tab->fe[i - 1].ctrl != tab->fe[j - 1].ctrl) - break; - if (tab->fe[i - 1].prop != tab->fe[j - 1].prop) - break; - iend = gfar_get_next_cluster_end(i, tab); - jend = gfar_get_next_cluster_end(j, tab); - if (jend == -1 || iend == -1) - break; - - /* First we make some free space, where our cluster - * element should be. Then we copy it there and finally - * delete in from its old location. - */ - if (gfar_expand_filer_entries(iend, (jend - j), tab) == - -EINVAL) - break; - - gfar_copy_filer_entries(&(tab->fe[iend + 1]), - &(tab->fe[jend + 1]), jend - j); - - if (gfar_trim_filer_entries(jend - 1, - jend + (jend - j), - tab) == -EINVAL) - return; - - /* Mask out cluster bit */ - tab->fe[iend].ctrl &= ~(RQFCR_CLE); - } - } -} - -/* Swaps the masked bits of a1<>a2 and b1<>b2 */ -static void gfar_swap_bits(struct gfar_filer_entry *a1, - struct gfar_filer_entry *a2, - struct gfar_filer_entry *b1, - struct gfar_filer_entry *b2, u32 mask) -{ - u32 temp[4]; - temp[0] = a1->ctrl & mask; - temp[1] = a2->ctrl & mask; - temp[2] = b1->ctrl & mask; - temp[3] = b2->ctrl & mask; - - a1->ctrl &= ~mask; - a2->ctrl &= ~mask; - b1->ctrl &= ~mask; - b2->ctrl &= ~mask; - - a1->ctrl |= temp[1]; - a2->ctrl |= temp[0]; - b1->ctrl |= temp[3]; - b2->ctrl |= temp[2]; -} - -/* Generate a list consisting of masks values with their start and - * end of validity and block as indicator for parts belonging - * together (glued by ANDs) in mask_table - */ -static u32 gfar_generate_mask_table(struct gfar_mask_entry *mask_table, - struct filer_table *tab) -{ - u32 i, and_index = 0, block_index = 1; - - for (i = 0; i < tab->index; i++) { - - /* LSByte of control = 0 sets a mask */ - if (!(tab->fe[i].ctrl & 0xF)) { - mask_table[and_index].mask = tab->fe[i].prop; - mask_table[and_index].start = i; - mask_table[and_index].block = block_index; - if (and_index >= 1) - mask_table[and_index - 1].end = i - 1; - and_index++; - } - /* cluster starts and ends will be separated because they should - * hold their position - */ - if (tab->fe[i].ctrl & RQFCR_CLE) - block_index++; - /* A not set AND indicates the end of a depended block */ - if (!(tab->fe[i].ctrl & RQFCR_AND)) - block_index++; - } - - mask_table[and_index - 1].end = i - 1; - - return and_index; -} - -/* Sorts the entries of mask_table by the values of the masks. - * Important: The 0xFF80 flags of the first and last entry of a - * block must hold their position (which queue, CLusterEnable, ReJEct, - * AND) - */ -static void gfar_sort_mask_table(struct gfar_mask_entry *mask_table, - struct filer_table *temp_table, u32 and_index) -{ - /* Pointer to compare function (_asc or _desc) */ - int (*gfar_comp)(const void *, const void *); - - u32 i, size = 0, start = 0, prev = 1; - u32 old_first, old_last, new_first, new_last; - - gfar_comp = &gfar_comp_desc; - - for (i = 0; i < and_index; i++) { - if (prev != mask_table[i].block) { - old_first = mask_table[start].start + 1; - old_last = mask_table[i - 1].end; - sort(mask_table + start, size, - sizeof(struct gfar_mask_entry), - gfar_comp, &gfar_swap); - - /* Toggle order for every block. This makes the - * thing more efficient! - */ - if (gfar_comp == gfar_comp_desc) - gfar_comp = &gfar_comp_asc; - else - gfar_comp = &gfar_comp_desc; - - new_first = mask_table[start].start + 1; - new_last = mask_table[i - 1].end; - - gfar_swap_bits(&temp_table->fe[new_first], - &temp_table->fe[old_first], - &temp_table->fe[new_last], - &temp_table->fe[old_last], - RQFCR_QUEUE | RQFCR_CLE | - RQFCR_RJE | RQFCR_AND); - - start = i; - size = 0; - } - size++; - prev = mask_table[i].block; - } -} - -/* Reduces the number of masks needed in the filer table to save entries - * This is done by sorting the masks of a depended block. A depended block is - * identified by gluing ANDs or CLE. The sorting order toggles after every - * block. Of course entries in scope of a mask must change their location with - * it. - */ -static int gfar_optimize_filer_masks(struct filer_table *tab) -{ - struct filer_table *temp_table; - struct gfar_mask_entry *mask_table; - - u32 and_index = 0, previous_mask = 0, i = 0, j = 0, size = 0; - s32 ret = 0; - - /* We need a copy of the filer table because - * we want to change its order - */ - temp_table = kmemdup(tab, sizeof(*temp_table), GFP_KERNEL); - if (temp_table == NULL) - return -ENOMEM; - - mask_table = kcalloc(MAX_FILER_CACHE_IDX / 2 + 1, - sizeof(struct gfar_mask_entry), GFP_KERNEL); - - if (mask_table == NULL) { - ret = -ENOMEM; - goto end; - } - - and_index = gfar_generate_mask_table(mask_table, tab); - - gfar_sort_mask_table(mask_table, temp_table, and_index); - - /* Now we can copy the data from our duplicated filer table to - * the real one in the order the mask table says - */ - for (i = 0; i < and_index; i++) { - size = mask_table[i].end - mask_table[i].start + 1; - gfar_copy_filer_entries(&(tab->fe[j]), - &(temp_table->fe[mask_table[i].start]), size); - j += size; - } - - /* And finally we just have to check for duplicated masks and drop the - * second ones - */ - for (i = 0; i < tab->index && i < MAX_FILER_CACHE_IDX; i++) { - if (tab->fe[i].ctrl == 0x80) { - previous_mask = i++; - break; - } - } - for (; i < tab->index && i < MAX_FILER_CACHE_IDX; i++) { - if (tab->fe[i].ctrl == 0x80) { - if (tab->fe[i].prop == tab->fe[previous_mask].prop) { - /* Two identical ones found! - * So drop the second one! - */ - gfar_trim_filer_entries(i, i, tab); - } else - /* Not identical! */ - previous_mask = i; - } - } - - kfree(mask_table); -end: kfree(temp_table); - return ret; -} - /* Write the bit-pattern from software's buffer to hardware registers */ static int gfar_write_filer_table(struct gfar_private *priv, struct filer_table *tab) @@ -1620,7 +1295,6 @@ static int gfar_process_filer_changes(struct gfar_private *priv) { struct ethtool_flow_spec_container *j; struct filer_table *tab; - s32 i = 0; s32 ret = 0; /* So index is set to zero, too! */ @@ -1645,17 +1319,6 @@ static int gfar_process_filer_changes(struct gfar_private *priv) } } - i = tab->index; - - /* Optimizations to save entries */ - gfar_cluster_filer(tab); - gfar_optimize_filer_masks(tab); - - pr_debug("\tSummary:\n" - "\tData on hardware: %d\n" - "\tCompression rate: %d%%\n", - tab->index, 100 - (100 * tab->index) / i); - /* Write everything to hardware */ ret = gfar_write_filer_table(priv, tab); if (ret == -EBUSY) { -- cgit v1.2.3 From e6d006938c9bda7ffd22af9d3e1257fd75941fb7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Aug 2015 00:08:01 +0300 Subject: cosa: missing error code on failure in probe() If register_hdlc_device() fails, the current code returns 0 but we should return an error code instead. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/wan/cosa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 7193b7304fdd..848ea6a399f2 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -589,7 +589,8 @@ static int cosa_probe(int base, int irq, int dma) chan->netdev->base_addr = chan->cosa->datareg; chan->netdev->irq = chan->cosa->irq; chan->netdev->dma = chan->cosa->dma; - if (register_hdlc_device(chan->netdev)) { + err = register_hdlc_device(chan->netdev); + if (err) { netdev_warn(chan->netdev, "register_hdlc_device() failed\n"); free_netdev(chan->netdev); -- cgit v1.2.3