From dbd79aed1aea2bece0bf43cc2ff3b2f9baf48a08 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 27 May 2008 19:03:16 +0900 Subject: pciehp: fix NULL dereference in interrupt handler Fix the following NULL dereference problem reported from Pierre Ossman and Ingo Molnar. pciehp: HPC vendor_id 8086 device_id 27d0 ss_vid 0 ss_did 0 pciehp: pciehp_find_slot: slot (device=0x0) not found BUG: unable to handle kernel NULL pointer dereference at 0000000000000070 IP: [] pciehp_handle_presence_change+0x7e/0x113 PGD 0 Oops: 0000 [1] CPU 0 Modules linked in: Pid: 1, comm: swapper Tainted: G W 2.6.26-rc3-sched-devel.git-00001-g2b99b26-dirty #170 RIP: 0010:[] [] pciehp_handle_presence_change+0x7e/0x113 RSP: 0000:ffff81003f83fbb0 EFLAGS: 00010046 RAX: 0000000000000039 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000046 RBP: ffff81003f83fbd0 R08: 0000000000000001 R09: ffffffff80245103 R10: 0000000000000020 R11: 0000000000000000 R12: ffff81003ea53a30 R13: 0000000000000000 R14: 0000000000000011 R15: ffffffff80495926 FS: 0000000000000000(0000) GS:ffffffff80be7400(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000000000000070 CR3: 0000000000201000 CR4: 00000000000006a0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper (pid: 1, threadinfo ffff81003f83e000, task ffff81003f840000) Stack: 0000000000000008 ffff81003f83fbf6 ffff81003ea53a30 0000000000000008 ffff81003f83fc10 ffffffff80495ab4 0000000000000011 0000000000000002 0000000000000202 0000000000000202 00000000fffffff4 ffff81003ea53a30 Call Trace: [] pcie_isr+0x18e/0x1bc [] request_irq+0x106/0x12f [] pcie_init+0x15e/0x6cc [] pciehp_probe+0x64/0x541 [] pcie_port_probe_service+0x4c/0x76 [] driver_probe_device+0xd4/0x1f0 [] __driver_attach+0x7c/0x7e [] ? __driver_attach+0x0/0x7e [] bus_for_each_dev+0x53/0x7d [] driver_attach+0x1c/0x1e [] bus_add_driver+0xdd/0x25b [] ? pcied_init+0x0/0x8b [] driver_register+0x5f/0x13e [] ? pcied_init+0x0/0x8b [] pcie_port_service_register+0x47/0x49 [] pcied_init+0x15/0x8b [] kernel_init+0x75/0x243 [] ? _spin_unlock_irq+0x2b/0x3a [] ? finish_task_switch+0x57/0x9a [] child_rip+0xa/0x12 [] ? restore_args+0x0/0x30 [] ? kernel_init+0x0/0x243 [] ? child_rip+0x0/0x12 Code: 83 80 00 00 00 48 39 f0 75 e1 0f b6 c9 48 c7 c2 00 0e 8d 80 48 c7 c6 8a 60 a6 80 48 c7 c7 10 db a8 80 31 c0 e8 3f 8d d9 ff 31 db <48> 8b 43 70 48 8d 75 ef 48 89 df ff 50 30 80 7d ef 00 74 37 48 RIP [] pciehp_handle_presence_change+0x7e/0x113 RSP CR2: 0000000000000070 Kernel panic - not syncing: Fatal exception The situation under which it occurs is hw and timing related: it appears to happen on a system that has PCI hotplug hardware but with no active hotplug cards, and another interrupt in the same (shared) IRQ line arrives too early, before the hotplug-slot entry has been set up - as triggered by CONFIG_DEBUG_SHIRQ=y: This patch contains the following two fixes. (1) Clear all events bits in Slot Status register to prevent the pciehp driver from detecting the spurious events that would have been occur before pciehp loading. (2) Add check whether slot initialization had been already done. This is short term fix. We need more structural fixes to install interrupt handler after slot initialization is done. Signed-off-by: Ingo Molnar Signed-off-by: Kenji Kaneshige Signed-off-by: Kristen Carlson Accardi Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_hpc.c | 42 ++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) (limited to 'drivers/pci/hotplug/pciehp_hpc.c') diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 891f81a0400c..425a0f609977 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -722,6 +722,7 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) { struct controller *ctrl = (struct controller *)dev_id; u16 detected, intr_loc; + struct slot *p_slot; /* * In order to guarantee that all interrupt events are @@ -756,21 +757,38 @@ static irqreturn_t pcie_isr(int irq, void *dev_id) wake_up_interruptible(&ctrl->queue); } + if (!(intr_loc & ~CMD_COMPLETED)) + return IRQ_HANDLED; + + /* + * Return without handling events if this handler routine is + * called before controller initialization is done. This may + * happen if hotplug event or another interrupt that shares + * the IRQ with pciehp arrives before slot initialization is + * done after interrupt handler is registered. + * + * FIXME - Need more structural fixes. We need to be ready to + * handle the event before installing interrupt handler. + */ + p_slot = pciehp_find_slot(ctrl, ctrl->slot_device_offset); + if (!p_slot || !p_slot->hpc_ops) + return IRQ_HANDLED; + /* Check MRL Sensor Changed */ if (intr_loc & MRL_SENS_CHANGED) - pciehp_handle_switch_change(0, ctrl); + pciehp_handle_switch_change(p_slot); /* Check Attention Button Pressed */ if (intr_loc & ATTN_BUTTN_PRESSED) - pciehp_handle_attention_button(0, ctrl); + pciehp_handle_attention_button(p_slot); /* Check Presence Detect Changed */ if (intr_loc & PRSN_DETECT_CHANGED) - pciehp_handle_presence_change(0, ctrl); + pciehp_handle_presence_change(p_slot); /* Check Power Fault Detected */ if (intr_loc & PWR_FAULT_DETECTED) - pciehp_handle_power_fault(0, ctrl); + pciehp_handle_power_fault(p_slot); return IRQ_HANDLED; } @@ -1028,6 +1046,12 @@ static int pciehp_acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev) static int pcie_init_hardware_part1(struct controller *ctrl, struct pcie_device *dev) { + /* Clear all remaining event bits in Slot Status register */ + if (pciehp_writew(ctrl, SLOTSTATUS, 0x1f)) { + err("%s: Cannot write to SLOTSTATUS register\n", __func__); + return -1; + } + /* Mask Hot-plug Interrupt Enable */ if (pcie_write_cmd(ctrl, 0, HP_INTR_ENABLE | CMD_CMPL_INTR_ENABLE)) { err("%s: Cannot mask hotplug interrupt enable\n", __func__); @@ -1040,16 +1064,6 @@ int pcie_init_hardware_part2(struct controller *ctrl, struct pcie_device *dev) { u16 cmd, mask; - /* - * We need to clear all events before enabling hotplug interrupt - * notification mechanism in order for hotplug controler to - * generate interrupts. - */ - if (pciehp_writew(ctrl, SLOTSTATUS, 0x1f)) { - err("%s: Cannot write to SLOTSTATUS register\n", __FUNCTION__); - return -1; - } - cmd = PRSN_DETECT_ENABLE; if (ATTN_BUTTN(ctrl)) cmd |= ATTN_BUTTN_ENABLE; -- cgit v1.2.3 From 5808639bfa98d69f77a481d759570d85f164fea0 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 27 May 2008 19:04:30 +0900 Subject: pciehp: fix slow probing Fix the "pciehp probing slow" problem reported from Jan C. Nordholz in http://bugzilla.kernel.org/show_bug.cgi?id=10751. The command completed bit in Slot Status register applies only to commands issued to control the attention indicator, power indicator, power controller, or electromechanical interlock. However, writes to other parts of the Slot Control register would end up writing to the control fields. Hence, any write to Slot Control register is considered as a command. However, if the controller doesn't support any of attention indicator, power indicator, power controller and electromechanical interlock, command completed bit would not set in writing to Slot Control register. In this case, we should not wait for command completed bit set, otherwise all commands would be considered not completed in timeout seconds (1 sec.). The cause of the problem is pciehp driver didn't take this situation into account. This patch changes pciehp to take it into account. This patch also add the check for "No Command Completed Support" bit in Slot Capability register. If it is set, we should not wait for command completed bit set as well. This problem seems to be revealed by the commit c27fb883dffe11aa4cb35ecea1fa1832ba45d4da that fixed the bug that pciehp did not wait for command completed properly (pciehp just ignored the command completion event). Signed-off-by: Kenji Kaneshige Signed-off-by: Kristen Carlson Accardi Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_hpc.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) (limited to 'drivers/pci/hotplug/pciehp_hpc.c') diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 425a0f609977..70940fb3fffa 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -286,12 +286,28 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) goto out; } - if ((slot_status & CMD_COMPLETED) == CMD_COMPLETED ) { - /* After 1 sec and CMD_COMPLETED still not set, just - proceed forward to issue the next command according - to spec. Just print out the error message */ - dbg("%s: CMD_COMPLETED not clear after 1 sec.\n", - __func__); + if (slot_status & CMD_COMPLETED) { + if (!ctrl->no_cmd_complete) { + /* + * After 1 sec and CMD_COMPLETED still not set, just + * proceed forward to issue the next command according + * to spec. Just print out the error message. + */ + dbg("%s: CMD_COMPLETED not clear after 1 sec.\n", + __func__); + } else if (!NO_CMD_CMPL(ctrl)) { + /* + * This controller semms to notify of command completed + * event even though it supports none of power + * controller, attention led, power led and EMI. + */ + dbg("%s: Unexpected CMD_COMPLETED. Need to wait for " + "command completed event.\n", __func__); + ctrl->no_cmd_complete = 0; + } else { + dbg("%s: Unexpected CMD_COMPLETED. Maybe the " + "controller is broken.\n", __func__); + } } retval = pciehp_readw(ctrl, SLOTCTRL, &slot_ctrl); @@ -315,7 +331,7 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) /* * Wait for command completion. */ - if (!retval) + if (!retval && !ctrl->no_cmd_complete) retval = pcie_wait_cmd(ctrl); out: mutex_unlock(&ctrl->ctrl_lock); @@ -1130,6 +1146,7 @@ static inline void dbg_ctrl(struct controller *ctrl) dbg(" Power Indicator : %3s\n", PWR_LED(ctrl) ? "yes" : "no"); dbg(" Hot-Plug Surprise : %3s\n", HP_SUPR_RM(ctrl) ? "yes" : "no"); dbg(" EMI Present : %3s\n", EMI(ctrl) ? "yes" : "no"); + dbg(" Comamnd Completed : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes"); pciehp_readw(ctrl, SLOTSTATUS, ®16); dbg("Slot Status : 0x%04x\n", reg16); pciehp_readw(ctrl, SLOTSTATUS, ®16); @@ -1161,6 +1178,15 @@ int pcie_init(struct controller *ctrl, struct pcie_device *dev) mutex_init(&ctrl->ctrl_lock); init_waitqueue_head(&ctrl->queue); dbg_ctrl(ctrl); + /* + * Controller doesn't notify of command completion if the "No + * Command Completed Support" bit is set in Slot Capability + * register or the controller supports none of power + * controller, attention led, power led and EMI. + */ + if (NO_CMD_CMPL(ctrl) || + !(POWER_CTRL(ctrl) | ATTN_LED(ctrl) | PWR_LED(ctrl) | EMI(ctrl))) + ctrl->no_cmd_complete = 1; info("HPC vendor_id %x device_id %x ss_vid %x ss_did %x\n", pdev->vendor, pdev->device, -- cgit v1.2.3 From 6592e02ae4bd7b277230aa0c5821588a13b9d8e3 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 27 May 2008 19:05:26 +0900 Subject: pciehp: poll cmd completion if hotplug interrupt is disabled Fix improper long wait for command completion in pciehp probing. As described in PCI Express specification, software notification is not generated if the command that occurs as a result of a write to the Slot Control register that disables software notification of command completed events. Since pciehp driver doesn't take it into account, such command is issued in pciehp probing, and it causes improper long wait for command completion. This patch changes the pciehp driver to take such command into account. Signed-off-by: Kenji Kaneshige Signed-off-by: Kristen Carlson Accardi Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_hpc.c | 42 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) (limited to 'drivers/pci/hotplug/pciehp_hpc.c') diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 70940fb3fffa..eb631af94738 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -247,14 +247,38 @@ static inline void pciehp_free_irq(struct controller *ctrl) free_irq(ctrl->pci_dev->irq, ctrl); } -static inline int pcie_wait_cmd(struct controller *ctrl) +static inline int pcie_poll_cmd(struct controller *ctrl) +{ + u16 slot_status; + int timeout = 1000; + + if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) + if (slot_status & CMD_COMPLETED) + goto completed; + for (timeout = 1000; timeout > 0; timeout -= 100) { + msleep(100); + if (!pciehp_readw(ctrl, SLOTSTATUS, &slot_status)) + if (slot_status & CMD_COMPLETED) + goto completed; + } + return 0; /* timeout */ + +completed: + pciehp_writew(ctrl, SLOTSTATUS, CMD_COMPLETED); + return timeout; +} + +static inline int pcie_wait_cmd(struct controller *ctrl, int poll) { int retval = 0; unsigned int msecs = pciehp_poll_mode ? 2500 : 1000; unsigned long timeout = msecs_to_jiffies(msecs); int rc; - rc = wait_event_interruptible_timeout(ctrl->queue, + if (poll) + rc = pcie_poll_cmd(ctrl); + else + rc = wait_event_interruptible_timeout(ctrl->queue, !ctrl->cmd_busy, timeout); if (!rc) dbg("Command not completed in 1000 msec\n"); @@ -331,8 +355,18 @@ static int pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask) /* * Wait for command completion. */ - if (!retval && !ctrl->no_cmd_complete) - retval = pcie_wait_cmd(ctrl); + if (!retval && !ctrl->no_cmd_complete) { + int poll = 0; + /* + * if hotplug interrupt is not enabled or command + * completed interrupt is not enabled, we need to poll + * command completed event. + */ + if (!(slot_ctrl & HP_INTR_ENABLE) || + !(slot_ctrl & CMD_CMPL_INTR_ENABLE)) + poll = 1; + retval = pcie_wait_cmd(ctrl, poll); + } out: mutex_unlock(&ctrl->ctrl_lock); return retval; -- cgit v1.2.3 From 0711c70ec0e9d2c002b1e9b5fb9f21e49d77f4fd Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Tue, 27 May 2008 19:06:22 +0900 Subject: pciehp: move msleep after power off According to the PCI Express specification, we must wait for at least 1 second after turning power off before taking any action that relies on power having been removed from the slot/adapter. For this, current pciehp wait for 1 second after issuing the power off command in hpc_power_off_slot() function. But waiting for 1 second in hpc_power_off_slot() can make pciehp probing slow-down because pciehp probe code calls hpc_power_off_slot() if the slot is not occupied just in case. We don't need to wait for 1 second at the pciehp probe time because there is no action on that empty slot. So move 1 second wait from hpc_power_off_slot() to the caller of hpc_power_off_slot(). Signed-off-by: Kenji Kaneshige Signed-off-by: Kristen Carlson Accardi Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_hpc.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/pci/hotplug/pciehp_hpc.c') diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index eb631af94738..79f104963166 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -754,13 +754,6 @@ static int hpc_power_off_slot(struct slot * slot) } dbg("%s: SLOTCTRL %x write cmd %x\n", __func__, ctrl->cap_base + SLOTCTRL, slot_cmd); - - /* - * After turning power off, we must wait for at least 1 second - * before taking any action that relies on power having been - * removed from the slot/adapter. - */ - msleep(1000); out: if (changed) pcie_unmask_bad_dllp(ctrl); -- cgit v1.2.3