From 0a67119fce8e0246ce7c448e3db12afd57857ac0 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 27 Jun 2013 16:39:48 -0600 Subject: PCI: Check all ACS features for multifunction downstream ports The multifunction ACS rules do not apply to downstream ports. Those should be tested regardless of whether they are single function or multifunction. The PCIe spec also fully specifies which PCIe types are subject to the multifunction rules and excludes event collectors and PCIe-to-PCI bridges entirely. Document each rule to the section of the PCIe spec and provide overall documentation of the function. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Acked-by: Donald Dutile --- drivers/pci/pci.c | 84 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 70 insertions(+), 14 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e37fea6e178d..3c1ff63aa56d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2359,6 +2359,19 @@ void pci_enable_acs(struct pci_dev *dev) pci_write_config_word(dev, pos + PCI_ACS_CTRL, ctrl); } +static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags) +{ + int pos; + u16 ctrl; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS); + if (!pos) + return false; + + pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl); + return (ctrl & acs_flags) == acs_flags; +} + /** * pci_acs_enabled - test ACS against required flags for a given device * @pdev: device to test @@ -2366,36 +2379,79 @@ void pci_enable_acs(struct pci_dev *dev) * * Return true if the device supports the provided flags. Automatically * filters out flags that are not implemented on multifunction devices. + * + * Note that this interface checks the effective ACS capabilities of the + * device rather than the actual capabilities. For instance, most single + * function endpoints are not required to support ACS because they have no + * opportunity for peer-to-peer access. We therefore return 'true' + * regardless of whether the device exposes an ACS capability. This makes + * it much easier for callers of this function to ignore the actual type + * or topology of the device when testing ACS support. */ bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags) { - int pos, ret; - u16 ctrl; + int ret; ret = pci_dev_specific_acs_enabled(pdev, acs_flags); if (ret >= 0) return ret > 0; + /* + * Conventional PCI and PCI-X devices never support ACS, either + * effectively or actually. The shared bus topology implies that + * any device on the bus can receive or snoop DMA. + */ if (!pci_is_pcie(pdev)) return false; - /* Filter out flags not applicable to multifunction */ - if (pdev->multifunction) + switch (pci_pcie_type(pdev)) { + /* + * PCI/X-to-PCIe bridges are not specifically mentioned by the spec, + * but since their primary inteface is PCI/X, we conservatively + * handle them as we would a non-PCIe device. + */ + case PCI_EXP_TYPE_PCIE_BRIDGE: + /* + * PCIe 3.0, 6.12.1 excludes ACS on these devices. "ACS is never + * applicable... must never implement an ACS Extended Capability...". + * This seems arbitrary, but we take a conservative interpretation + * of this statement. + */ + case PCI_EXP_TYPE_PCI_BRIDGE: + case PCI_EXP_TYPE_RC_EC: + return false; + /* + * PCIe 3.0, 6.12.1.1 specifies that downstream and root ports should + * implement ACS in order to indicate their peer-to-peer capabilities, + * regardless of whether they are single- or multi-function devices. + */ + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_ROOT_PORT: + return pci_acs_flags_enabled(pdev, acs_flags); + /* + * PCIe 3.0, 6.12.1.2 specifies ACS capabilities that should be + * implemented by the remaining PCIe types to indicate peer-to-peer + * capabilities, but only when they are part of a multifunciton + * device. The footnote for section 6.12 indicates the specific + * PCIe types included here. + */ + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_LEG_END: + case PCI_EXP_TYPE_RC_END: + if (!pdev->multifunction) + break; + acs_flags &= (PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC | PCI_ACS_DT); - if (pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM || - pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT || - pdev->multifunction) { - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS); - if (!pos) - return false; - - pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl); - if ((ctrl & acs_flags) != acs_flags) - return false; + return pci_acs_flags_enabled(pdev, acs_flags); } + /* + * PCIe 3.0, 6.12.1.3 specifies no ACS capabilties are applicable + * to single function devices with the exception of downstream ports. + */ return true; } -- cgit v1.2.3 From 83db7e0bdb70a9bb93cd000fefc3fbac3394f516 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 27 Jun 2013 16:39:54 -0600 Subject: PCI: Differentiate ACS controllable from enabled We currently misinterpret that in order for an ACS feature to be enabled it must be set in the control field. In reality, this means that the feature is not only enabled, but controllable. Many of the ACS capability bits are not required if the device behaves by default in the way specified when both the capability and control bit are set and does not support or allow the alternate mode. We therefore need to check the capabilities and mask out flags that are enabled but not controllable. Egress control seems to be the only flag which is purely optional. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas Acked-by: Donald Dutile --- drivers/pci/pci.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 3c1ff63aa56d..a599a6bbdf37 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2362,12 +2362,20 @@ void pci_enable_acs(struct pci_dev *dev) static bool pci_acs_flags_enabled(struct pci_dev *pdev, u16 acs_flags) { int pos; - u16 ctrl; + u16 cap, ctrl; pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS); if (!pos) return false; + /* + * Except for egress control, capabilities are either required + * or only required if controllable. Features missing from the + * capability field can therefore be assumed as hard-wired enabled. + */ + pci_read_config_word(pdev, pos + PCI_ACS_CAP, &cap); + acs_flags &= (cap | PCI_ACS_EC); + pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl); return (ctrl & acs_flags) == acs_flags; } @@ -2442,9 +2450,6 @@ bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags) if (!pdev->multifunction) break; - acs_flags &= (PCI_ACS_RR | PCI_ACS_CR | - PCI_ACS_EC | PCI_ACS_DT); - return pci_acs_flags_enabled(pdev, acs_flags); } -- cgit v1.2.3 From 928bea964827d7824b548c1f8e06eccbbc4d0d7d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 22 Jul 2013 14:37:17 -0700 Subject: PCI: Delay enabling bridges until they're needed We currently enable PCI bridges after scanning a bus and assigning resources. This is often done in arch code. This patch changes this so we don't enable a bridge until necessary, i.e., until we enable a PCI device behind the bridge. We do this in the generic pci_enable_device() path, so this also removes the arch-specific code to enable bridges. [bhelgaas: changelog] Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e37fea6e178d..44a1a8a0ad7b 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1145,6 +1145,24 @@ int pci_reenable_device(struct pci_dev *dev) return 0; } +static void pci_enable_bridge(struct pci_dev *dev) +{ + int retval; + + if (!dev) + return; + + pci_enable_bridge(dev->bus->self); + + if (pci_is_enabled(dev)) + return; + retval = pci_enable_device(dev); + if (retval) + dev_err(&dev->dev, "Error enabling bridge (%d), continuing\n", + retval); + pci_set_master(dev); +} + static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags) { int err; @@ -1165,6 +1183,8 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags) if (atomic_inc_return(&dev->enable_cnt) > 1) return 0; /* already enabled */ + pci_enable_bridge(dev->bus->self); + /* only skip sriov related */ for (i = 0; i <= PCI_ROM_RESOURCE; i++) if (dev->resource[i].flags & flags) -- cgit v1.2.3 From 81377c8d3563e7aec5c8baaaacacb48034f430a0 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 31 Jul 2013 06:53:26 +0000 Subject: PCI: Add function to obtain minimum link width and speed A PCI Express device can potentially report a link width and speed which it will not properly fulfill due to being plugged into a slower link higher in the chain. This function walks up the PCI bus chain and calculates the minimum link width and speed of this entire chain. This can be useful to enable a device to determine if it has enough bandwidth for optimum functionality. Signed-off-by: Jacob Keller Tested-by: Phil Schmitt Acked-by: Bjorn Helgaas Signed-off-by: Jeff Kirsher --- drivers/pci/pci.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e37fea6e178d..c71e78c46705 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3578,6 +3578,49 @@ int pcie_set_mps(struct pci_dev *dev, int mps) PCI_EXP_DEVCTL_PAYLOAD, v); } +/** + * pcie_get_minimum_link - determine minimum link settings of a PCI device + * @dev: PCI device to query + * @speed: storage for minimum speed + * @width: storage for minimum width + * + * This function will walk up the PCI device chain and determine the minimum + * link width and speed of the device. + */ +int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + int ret; + + *speed = PCI_SPEED_UNKNOWN; + *width = PCIE_LNK_WIDTH_UNKNOWN; + + while (dev) { + u16 lnksta; + enum pci_bus_speed next_speed; + enum pcie_link_width next_width; + + ret = pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &lnksta); + if (ret) + return ret; + + next_speed = pcie_link_speed[lnksta & PCI_EXP_LNKSTA_CLS]; + next_width = (lnksta & PCI_EXP_LNKSTA_NLW) >> + PCI_EXP_LNKSTA_NLW_SHIFT; + + if (next_speed < *speed) + *speed = next_speed; + + if (next_width < *width) + *width = next_width; + + dev = dev->bus->self; + } + + return 0; +} +EXPORT_SYMBOL(pcie_get_minimum_link); + /** * pci_select_bars - Make BAR mask from the type of resource * @dev: the PCI device for which BAR mask is made -- cgit v1.2.3 From ce1be10bf6dc8406ae773f0ac6265585a4154d37 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 1 Aug 2013 21:05:27 +0800 Subject: PCI: Fix comment typo for pci_add_cap_save_buffer() Fix trivial comment typo for pci_add_cap_save_buffer(). Signed-off-by: Yijing Wang Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e37fea6e178d..69dcd32e0fe8 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1992,7 +1992,7 @@ static void pci_add_saved_cap(struct pci_dev *pci_dev, } /** - * pci_add_save_buffer - allocate buffer for saving given capability registers + * pci_add_cap_save_buffer - allocate buffer for saving given capability registers * @dev: the PCI device * @cap: the capability to allocate the buffer for * @size: requested size of the buffer -- cgit v1.2.3 From 64e8674fbe6bc848333a9b7e19f8cc019dde9eab Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 8 Aug 2013 14:09:24 -0600 Subject: PCI: Add pci_reset_bridge_secondary_bus() Move the secondary bus reset code from pci_parent_bus_reset() into its own function. Export it as we'll later be calling it from hotplug controllers and elsewhere. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e37fea6e178d..d46860898e1f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3215,9 +3215,30 @@ static int pci_pm_reset(struct pci_dev *dev, int probe) return 0; } -static int pci_parent_bus_reset(struct pci_dev *dev, int probe) +/** + * pci_reset_bridge_secondary_bus - Reset the secondary bus on a PCI bridge. + * @dev: Bridge device + * + * Use the bridge control register to assert reset on the secondary bus. + * Devices on the secondary bus are left in power-on state. + */ +void pci_reset_bridge_secondary_bus(struct pci_dev *dev) { u16 ctrl; + + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); + ctrl |= PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + msleep(100); + + ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); + msleep(100); +} +EXPORT_SYMBOL_GPL(pci_reset_bridge_secondary_bus); + +static int pci_parent_bus_reset(struct pci_dev *dev, int probe) +{ struct pci_dev *pdev; if (pci_is_root_bus(dev->bus) || dev->subordinate || !dev->bus->self) @@ -3230,14 +3251,7 @@ static int pci_parent_bus_reset(struct pci_dev *dev, int probe) if (probe) return 0; - pci_read_config_word(dev->bus->self, PCI_BRIDGE_CONTROL, &ctrl); - ctrl |= PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(dev->bus->self, PCI_BRIDGE_CONTROL, ctrl); - msleep(100); - - ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; - pci_write_config_word(dev->bus->self, PCI_BRIDGE_CONTROL, ctrl); - msleep(100); + pci_reset_bridge_secondary_bus(dev->bus->self); return 0; } -- cgit v1.2.3 From 3775a209d38aa3a0c7ed89a7d0f529e0230f280e Mon Sep 17 00:00:00 2001 From: Casey Leedom Date: Tue, 6 Aug 2013 15:48:36 +0530 Subject: PCI: Add pci_wait_for_pending_transaction() New routine to avoid duplication of code to wait for pending PCI transactions to complete. Signed-off-by: Casey Leedom Signed-off-by: Vipul Pandya Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e37fea6e178d..10ab64e8878e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3098,19 +3098,17 @@ int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask) } EXPORT_SYMBOL(pci_set_dma_seg_boundary); -static int pcie_flr(struct pci_dev *dev, int probe) +/** + * pci_wait_for_pending_transaction - waits for pending transaction + * @dev: the PCI device to operate on + * + * Return 0 if transaction is pending 1 otherwise. + */ +int pci_wait_for_pending_transaction(struct pci_dev *dev) { int i; - u32 cap; u16 status; - pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap); - if (!(cap & PCI_EXP_DEVCAP_FLR)) - return -ENOTTY; - - if (probe) - return 0; - /* Wait for Transaction Pending bit clean */ for (i = 0; i < 4; i++) { if (i) @@ -3118,13 +3116,27 @@ static int pcie_flr(struct pci_dev *dev, int probe) pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &status); if (!(status & PCI_EXP_DEVSTA_TRPND)) - goto clear; + return 1; } - dev_err(&dev->dev, "transaction is not cleared; " - "proceeding with reset anyway\n"); + return 0; +} +EXPORT_SYMBOL(pci_wait_for_pending_transaction); + +static int pcie_flr(struct pci_dev *dev, int probe) +{ + u32 cap; + + pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap); + if (!(cap & PCI_EXP_DEVCAP_FLR)) + return -ENOTTY; + + if (probe) + return 0; + + if (!pci_wait_for_pending_transaction(dev)) + dev_err(&dev->dev, "transaction is not cleared; proceeding with reset anyway\n"); -clear: pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR); msleep(100); -- cgit v1.2.3 From 608c388122c72e1bf11ba8113434eb3d0c40c32d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 8 Aug 2013 14:09:43 -0600 Subject: PCI: Add slot reset option to pci_dev_reset() If the hotplug controller provides a way to reset a slot, use that before a direct parent bus reset. Like the bus reset option, this is only available when a single pci_dev occupies the slot. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d46860898e1f..9407aabc77a3 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include "pci.h" @@ -3256,6 +3257,35 @@ static int pci_parent_bus_reset(struct pci_dev *dev, int probe) return 0; } +static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, int probe) +{ + int rc = -ENOTTY; + + if (!hotplug || !try_module_get(hotplug->ops->owner)) + return rc; + + if (hotplug->ops->reset_slot) + rc = hotplug->ops->reset_slot(hotplug, probe); + + module_put(hotplug->ops->owner); + + return rc; +} + +static int pci_dev_reset_slot_function(struct pci_dev *dev, int probe) +{ + struct pci_dev *pdev; + + if (dev->subordinate || !dev->slot) + return -ENOTTY; + + list_for_each_entry(pdev, &dev->bus->devices, bus_list) + if (pdev != dev && pdev->slot == dev->slot) + return -ENOTTY; + + return pci_reset_hotplug_slot(dev->slot->hotplug, probe); +} + static int __pci_dev_reset(struct pci_dev *dev, int probe) { int rc; @@ -3278,6 +3308,10 @@ static int __pci_dev_reset(struct pci_dev *dev, int probe) if (rc != -ENOTTY) goto done; + rc = pci_dev_reset_slot_function(dev, probe); + if (rc != -ENOTTY) + goto done; + rc = pci_parent_bus_reset(dev, probe); done: return rc; -- cgit v1.2.3 From 77cb985ad4acbe66a92ead1bb826deffa47dd33f Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 8 Aug 2013 14:09:49 -0600 Subject: PCI: Split out pci_dev lock/unlock and save/restore Only cosmetic code changes to existing paths. Expand the comment in the new pci_dev_save_and_disable() function since there's a lot hidden in that Command register write. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 55 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 17 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9407aabc77a3..d48d9febc241 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3317,22 +3317,49 @@ done: return rc; } +static void pci_dev_lock(struct pci_dev *dev) +{ + pci_cfg_access_lock(dev); + /* block PM suspend, driver probe, etc. */ + device_lock(&dev->dev); +} + +static void pci_dev_unlock(struct pci_dev *dev) +{ + device_unlock(&dev->dev); + pci_cfg_access_unlock(dev); +} + +static void pci_dev_save_and_disable(struct pci_dev *dev) +{ + pci_save_state(dev); + /* + * Disable the device by clearing the Command register, except for + * INTx-disable which is set. This not only disables MMIO and I/O port + * BARs, but also prevents the device from being Bus Master, preventing + * DMA from the device including MSI/MSI-X interrupts. For PCI 2.3 + * compliant devices, INTx-disable prevents legacy interrupts. + */ + pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); +} + +static void pci_dev_restore(struct pci_dev *dev) +{ + pci_restore_state(dev); +} + static int pci_dev_reset(struct pci_dev *dev, int probe) { int rc; - if (!probe) { - pci_cfg_access_lock(dev); - /* block PM suspend, driver probe, etc. */ - device_lock(&dev->dev); - } + if (!probe) + pci_dev_lock(dev); rc = __pci_dev_reset(dev, probe); - if (!probe) { - device_unlock(&dev->dev); - pci_cfg_access_unlock(dev); - } + if (!probe) + pci_dev_unlock(dev); + return rc; } /** @@ -3423,17 +3450,11 @@ int pci_reset_function(struct pci_dev *dev) if (rc) return rc; - pci_save_state(dev); - - /* - * both INTx and MSI are disabled after the Interrupt Disable bit - * is set and the Bus Master bit is cleared. - */ - pci_write_config_word(dev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); + pci_dev_save_and_disable(dev); rc = pci_dev_reset(dev, 0); - pci_restore_state(dev); + pci_dev_restore(dev); return rc; } -- cgit v1.2.3 From 090a3c5322e900f468b3205b76d0837003ad57b2 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 8 Aug 2013 14:09:55 -0600 Subject: PCI: Add pci_reset_slot() and pci_reset_bus() Sometimes pci_reset_function() is not sufficient. We have cases where devices do not support any kind of reset, but there might be multiple functions on the bus preventing pci_reset_function() from doing a secondary bus reset. We also have cases where a device will advertise that it supports a PM reset, but really does nothing on D3hot->D0 (graphics cards are notorious for this). These devices often also have more than one function, so even blacklisting PM reset for them wouldn't allow a secondary bus reset through pci_reset_function(). If a driver supports multiple devices it should have the ability to induce a bus reset when it needs to. This patch provides that ability through pci_reset_slot() and pci_reset_bus(). It's the caller's responsibility when using these interfaces to understand that all of the devices in or below the slot (or on or below the bus) will be reset and therefore should be under control of the caller. PCI state of all the affected devices is saved and restored around these resets, but internal state of all of the affected devices is reset (which should be the intention). Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 209 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d48d9febc241..12fccc24925c 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3460,6 +3460,215 @@ int pci_reset_function(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_reset_function); +/* Lock devices from the top of the tree down */ +static void pci_bus_lock(struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + pci_dev_lock(dev); + if (dev->subordinate) + pci_bus_lock(dev->subordinate); + } +} + +/* Unlock devices from the bottom of the tree up */ +static void pci_bus_unlock(struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + if (dev->subordinate) + pci_bus_unlock(dev->subordinate); + pci_dev_unlock(dev); + } +} + +/* Lock devices from the top of the tree down */ +static void pci_slot_lock(struct pci_slot *slot) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &slot->bus->devices, bus_list) { + if (!dev->slot || dev->slot != slot) + continue; + pci_dev_lock(dev); + if (dev->subordinate) + pci_bus_lock(dev->subordinate); + } +} + +/* Unlock devices from the bottom of the tree up */ +static void pci_slot_unlock(struct pci_slot *slot) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &slot->bus->devices, bus_list) { + if (!dev->slot || dev->slot != slot) + continue; + if (dev->subordinate) + pci_bus_unlock(dev->subordinate); + pci_dev_unlock(dev); + } +} + +/* Save and disable devices from the top of the tree down */ +static void pci_bus_save_and_disable(struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + pci_dev_save_and_disable(dev); + if (dev->subordinate) + pci_bus_save_and_disable(dev->subordinate); + } +} + +/* + * Restore devices from top of the tree down - parent bridges need to be + * restored before we can get to subordinate devices. + */ +static void pci_bus_restore(struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + pci_dev_restore(dev); + if (dev->subordinate) + pci_bus_restore(dev->subordinate); + } +} + +/* Save and disable devices from the top of the tree down */ +static void pci_slot_save_and_disable(struct pci_slot *slot) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &slot->bus->devices, bus_list) { + if (!dev->slot || dev->slot != slot) + continue; + pci_dev_save_and_disable(dev); + if (dev->subordinate) + pci_bus_save_and_disable(dev->subordinate); + } +} + +/* + * Restore devices from top of the tree down - parent bridges need to be + * restored before we can get to subordinate devices. + */ +static void pci_slot_restore(struct pci_slot *slot) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &slot->bus->devices, bus_list) { + if (!dev->slot || dev->slot != slot) + continue; + pci_dev_restore(dev); + if (dev->subordinate) + pci_bus_restore(dev->subordinate); + } +} + +static int pci_slot_reset(struct pci_slot *slot, int probe) +{ + int rc; + + if (!slot) + return -ENOTTY; + + if (!probe) + pci_slot_lock(slot); + + might_sleep(); + + rc = pci_reset_hotplug_slot(slot->hotplug, probe); + + if (!probe) + pci_slot_unlock(slot); + + return rc; +} + +/** + * pci_reset_slot - reset a PCI slot + * @slot: PCI slot to reset + * + * A PCI bus may host multiple slots, each slot may support a reset mechanism + * independent of other slots. For instance, some slots may support slot power + * control. In the case of a 1:1 bus to slot architecture, this function may + * wrap the bus reset to avoid spurious slot related events such as hotplug. + * Generally a slot reset should be attempted before a bus reset. All of the + * function of the slot and any subordinate buses behind the slot are reset + * through this function. PCI config space of all devices in the slot and + * behind the slot is saved before and restored after reset. + * + * Return 0 on success, non-zero on error. + */ +int pci_reset_slot(struct pci_slot *slot) +{ + int rc; + + rc = pci_slot_reset(slot, 1); + if (rc) + return rc; + + pci_slot_save_and_disable(slot); + + rc = pci_slot_reset(slot, 0); + + pci_slot_restore(slot); + + return rc; +} +EXPORT_SYMBOL_GPL(pci_reset_slot); + +static int pci_bus_reset(struct pci_bus *bus, int probe) +{ + if (!bus->self) + return -ENOTTY; + + if (probe) + return 0; + + pci_bus_lock(bus); + + might_sleep(); + + pci_reset_bridge_secondary_bus(bus->self); + + pci_bus_unlock(bus); + + return 0; +} + +/** + * pci_reset_bus - reset a PCI bus + * @bus: top level PCI bus to reset + * + * Do a bus reset on the given bus and any subordinate buses, saving + * and restoring state of all devices. + * + * Return 0 on success, non-zero on error. + */ +int pci_reset_bus(struct pci_bus *bus) +{ + int rc; + + rc = pci_bus_reset(bus, 1); + if (rc) + return rc; + + pci_bus_save_and_disable(bus); + + rc = pci_bus_reset(bus, 0); + + pci_bus_restore(bus); + + return rc; +} +EXPORT_SYMBOL_GPL(pci_reset_bus); + /** * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count * @dev: PCI device to query -- cgit v1.2.3 From a6cbaadea0af9b4aa6eee2882f2aa761ab91a4f8 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 8 Aug 2013 14:10:02 -0600 Subject: PCI: Wake-up devices before saving config space for reset Devices come out of reset in D0. Restoring a device to a different post-reset state takes more smarts than our simple config space restore, which can leave devices in an inconsistent state. For example, if a device is reset in D3, but the restore doesn't successfully return the device to D3, then the actual state of the device and dev->current_state are contradictory. Put everything in D0 going into the reset, then we don't need to do anything special on the way out. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 12fccc24925c..deb7fa9cc638 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3332,6 +3332,13 @@ static void pci_dev_unlock(struct pci_dev *dev) static void pci_dev_save_and_disable(struct pci_dev *dev) { + /* + * Wake-up device prior to save. PM registers default to D0 after + * reset and a simple register restore doesn't reliably return + * to a non-D0 state anyway. + */ + pci_set_power_state(dev, PCI_D0); + pci_save_state(dev); /* * Disable the device by clearing the Command register, except for -- cgit v1.2.3 From de0c548c33429cc78fd47a3c190c6d00b0e4e441 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 8 Aug 2013 14:10:13 -0600 Subject: PCI: Tune secondary bus reset timing The PCI spec indicates that with stable power, reset needs to be asserted for a minimum of 1ms (Trst). We should be able to assume stable power for a Hot Reset, but we add another millisecond as a fudge factor to make sure the reset is seen on the bus for at least a full 1ms. After reset is de-asserted we must wait for devices to complete initialization. The specs refer to this as "recovery time" (Trhfa). For PCI this is 2^25 clock cycles or 2^26 for PCI-X. For minimum bus speeds, both of those come to 1s. PCIe "softens" this requirement with the Configuration Request Retry Status (CRS) completion status. Theoretically we could use CRS to shorten the wait time. We don't make use of that here, using a fixed 1s delay to allow devices to re-initialize. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index deb7fa9cc638..ea5e70486174 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3230,11 +3230,23 @@ void pci_reset_bridge_secondary_bus(struct pci_dev *dev) pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl); ctrl |= PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); - msleep(100); + /* + * PCI spec v3.0 7.6.4.2 requires minimum Trst of 1ms. Double + * this to 2ms to ensure that we meet the minium requirement. + */ + msleep(2); ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl); - msleep(100); + + /* + * Trhfa for conventional PCI is 2^25 clock cycles. + * Assuming a minimum 33MHz clock this results in a 1s + * delay before we can consider subordinate devices to + * be re-initialized. PCIe has some ways to shorten this, + * but we don't make use of them yet. + */ + ssleep(1); } EXPORT_SYMBOL_GPL(pci_reset_bridge_secondary_bus); -- cgit v1.2.3 From 9a3d2b9beefd5b07c1d8f70ded01b88f203ee304 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 14 Aug 2013 14:06:05 -0600 Subject: PCI: Add pci_probe_reset_slot() and pci_probe_reset_bus() Users of pci_reset_bus() and pci_reset_slot() need a way to probe whether the bus or slot supports reset. Add trivial helper functions and export them as vfio-pci will make use of these. Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index ea5e70486174..7f89372483dd 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3609,6 +3609,18 @@ static int pci_slot_reset(struct pci_slot *slot, int probe) return rc; } +/** + * pci_probe_reset_slot - probe whether a PCI slot can be reset + * @slot: PCI slot to probe + * + * Return 0 if slot can be reset, negative if a slot reset is not supported. + */ +int pci_probe_reset_slot(struct pci_slot *slot) +{ + return pci_slot_reset(slot, 1); +} +EXPORT_SYMBOL_GPL(pci_probe_reset_slot); + /** * pci_reset_slot - reset a PCI slot * @slot: PCI slot to reset @@ -3661,6 +3673,18 @@ static int pci_bus_reset(struct pci_bus *bus, int probe) return 0; } +/** + * pci_probe_reset_bus - probe whether a PCI bus can be reset + * @bus: PCI bus to probe + * + * Return 0 if bus can be reset, negative if a bus reset is not supported. + */ +int pci_probe_reset_bus(struct pci_bus *bus) +{ + return pci_bus_reset(bus, 1); +} +EXPORT_SYMBOL_GPL(pci_probe_reset_bus); + /** * pci_reset_bus - reset a PCI bus * @bus: top level PCI bus to reset -- cgit v1.2.3 From f67577118d1154154cf3c1d96f870c4da13846b4 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 22 Aug 2013 11:24:45 +0800 Subject: PCI: Remove unnecessary check for pcie_get_mps() failure After 59875ae489 ("PCI/core: Use PCI Express Capability accessors"), pcie_get_mps() never returns an error, so don't bother to check for it. No functional change. [bhelgaas: changelog, fix pcie_get_mps() doc] Signed-off-by: Yijing Wang Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e37fea6e178d..5bb97ee2307e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3525,8 +3525,6 @@ int pcie_set_readrq(struct pci_dev *dev, int rq) if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { int mps = pcie_get_mps(dev); - if (mps < 0) - return mps; if (mps < rq) rq = mps; } @@ -3543,7 +3541,6 @@ EXPORT_SYMBOL(pcie_set_readrq); * @dev: PCI device to query * * Returns maximum payload size in bytes - * or appropriate error value. */ int pcie_get_mps(struct pci_dev *dev) { -- cgit v1.2.3 From da27f4b3ec77a04672345381cbfeeb841d427327 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 22 Aug 2013 14:45:21 -0600 Subject: PCI: Add comment about needing pci_msi_off() even when CONFIG_PCI_MSI=n Per f5f2b13129 ("msi: sanely support hardware level msi disabling"), we want pci_msi_off() to work even if MSI support is not compiled into the kernel, and there are existing callers that use it when CONFIG_PCI_MSI=n. This adds a comment to that effect. No functional change. Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 69dcd32e0fe8..42e5f86e2387 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3059,18 +3059,23 @@ bool pci_check_and_unmask_intx(struct pci_dev *dev) EXPORT_SYMBOL_GPL(pci_check_and_unmask_intx); /** - * pci_msi_off - disables any msi or msix capabilities + * pci_msi_off - disables any MSI or MSI-X capabilities * @dev: the PCI device to operate on * - * If you want to use msi see pci_enable_msi and friends. - * This is a lower level primitive that allows us to disable - * msi operation at the device level. + * If you want to use MSI, see pci_enable_msi() and friends. + * This is a lower-level primitive that allows us to disable + * MSI operation at the device level. */ void pci_msi_off(struct pci_dev *dev) { int pos; u16 control; + /* + * This looks like it could go in msi.c, but we need it even when + * CONFIG_PCI_MSI=n. For the same reason, we can't use + * dev->msi_cap or dev->msix_cap here. + */ pos = pci_find_capability(dev, PCI_CAP_ID_MSI); if (pos) { pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); -- cgit v1.2.3 From d2ab1fa68c61f01b28ab0859a972c892d81f5d32 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 27 Aug 2013 11:11:10 -0600 Subject: PCI: Rename PCIe capability definitions to follow convention All other PCIe capability register fields include "PCI_EXP" + + . This renames PCI_EXP_OBFF_MASK, PCI_EXP_IDO_REQ_EN, PCI_EXP_LTR_EN, and related fields using the same convention. No functional change. Signed-off-by: Bjorn Helgaas Acked-by: Samuel Ortiz # for MFD driver --- drivers/pci/pci.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 42e5f86e2387..3d5d45cdc4dd 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2095,9 +2095,9 @@ void pci_enable_ido(struct pci_dev *dev, unsigned long type) u16 ctrl = 0; if (type & PCI_EXP_IDO_REQUEST) - ctrl |= PCI_EXP_IDO_REQ_EN; + ctrl |= PCI_EXP_DEVCTL2_IDO_REQ_EN; if (type & PCI_EXP_IDO_COMPLETION) - ctrl |= PCI_EXP_IDO_CMP_EN; + ctrl |= PCI_EXP_DEVCTL2_IDO_CMP_EN; if (ctrl) pcie_capability_set_word(dev, PCI_EXP_DEVCTL2, ctrl); } @@ -2113,9 +2113,9 @@ void pci_disable_ido(struct pci_dev *dev, unsigned long type) u16 ctrl = 0; if (type & PCI_EXP_IDO_REQUEST) - ctrl |= PCI_EXP_IDO_REQ_EN; + ctrl |= PCI_EXP_DEVCTL2_IDO_REQ_EN; if (type & PCI_EXP_IDO_COMPLETION) - ctrl |= PCI_EXP_IDO_CMP_EN; + ctrl |= PCI_EXP_DEVCTL2_IDO_CMP_EN; if (ctrl) pcie_capability_clear_word(dev, PCI_EXP_DEVCTL2, ctrl); } @@ -2147,7 +2147,7 @@ int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type type) int ret; pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap); - if (!(cap & PCI_EXP_OBFF_MASK)) + if (!(cap & PCI_EXP_DEVCAP2_OBFF_MASK)) return -ENOTSUPP; /* no OBFF support at all */ /* Make sure the topology supports OBFF as well */ @@ -2158,17 +2158,17 @@ int pci_enable_obff(struct pci_dev *dev, enum pci_obff_signal_type type) } pcie_capability_read_word(dev, PCI_EXP_DEVCTL2, &ctrl); - if (cap & PCI_EXP_OBFF_WAKE) - ctrl |= PCI_EXP_OBFF_WAKE_EN; + if (cap & PCI_EXP_DEVCAP2_OBFF_WAKE) + ctrl |= PCI_EXP_DEVCTL2_OBFF_WAKE_EN; else { switch (type) { case PCI_EXP_OBFF_SIGNAL_L0: - if (!(ctrl & PCI_EXP_OBFF_WAKE_EN)) - ctrl |= PCI_EXP_OBFF_MSGA_EN; + if (!(ctrl & PCI_EXP_DEVCTL2_OBFF_WAKE_EN)) + ctrl |= PCI_EXP_DEVCTL2_OBFF_MSGA_EN; break; case PCI_EXP_OBFF_SIGNAL_ALWAYS: - ctrl &= ~PCI_EXP_OBFF_WAKE_EN; - ctrl |= PCI_EXP_OBFF_MSGB_EN; + ctrl &= ~PCI_EXP_DEVCTL2_OBFF_WAKE_EN; + ctrl |= PCI_EXP_DEVCTL2_OBFF_MSGB_EN; break; default: WARN(1, "bad OBFF signal type\n"); @@ -2189,7 +2189,8 @@ EXPORT_SYMBOL(pci_enable_obff); */ void pci_disable_obff(struct pci_dev *dev) { - pcie_capability_clear_word(dev, PCI_EXP_DEVCTL2, PCI_EXP_OBFF_WAKE_EN); + pcie_capability_clear_word(dev, PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_OBFF_WAKE_EN); } EXPORT_SYMBOL(pci_disable_obff); @@ -2237,7 +2238,8 @@ int pci_enable_ltr(struct pci_dev *dev) return ret; } - return pcie_capability_set_word(dev, PCI_EXP_DEVCTL2, PCI_EXP_LTR_EN); + return pcie_capability_set_word(dev, PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_LTR_EN); } EXPORT_SYMBOL(pci_enable_ltr); @@ -2254,7 +2256,8 @@ void pci_disable_ltr(struct pci_dev *dev) if (!pci_ltr_supported(dev)) return; - pcie_capability_clear_word(dev, PCI_EXP_DEVCTL2, PCI_EXP_LTR_EN); + pcie_capability_clear_word(dev, PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_LTR_EN); } EXPORT_SYMBOL(pci_disable_ltr); -- cgit v1.2.3 From f41f064cf4352e6a7fd982f1de8a690897702513 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 28 Sep 2013 13:13:07 -0700 Subject: PCI: Workaround missing pci_set_master in pci drivers Ben Herrenschmidt found that commit 928bea964827 ("PCI: Delay enabling bridges until they're needed") breaks PCI in some powerpc environments. The reason is that the PCIe port driver will call pci_enable_device() on the bridge, so the device is enabled, but skips pci_set_master because pcie_port_auto and no acpi on powerpc. Because of that, pci_enable_bridge() later on (called as a result of the child device driver doing pci_enable_device) will see the bridge as already enabled and will not call pci_set_master() on it. Fixed by add checking in pci_enable_bridge, and call pci_set_master if driver skip that. That will make the code more robot and wade off problem for missing pci_set_master in drivers. Reported-by: Benjamin Herrenschmidt Signed-off-by: Yinghai Lu Signed-off-by: Linus Torvalds --- drivers/pci/pci.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/pci/pci.c') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e8ccf6c0f08a..bdd64b1b4817 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1155,8 +1155,14 @@ static void pci_enable_bridge(struct pci_dev *dev) pci_enable_bridge(dev->bus->self); - if (pci_is_enabled(dev)) + if (pci_is_enabled(dev)) { + if (!dev->is_busmaster) { + dev_warn(&dev->dev, "driver skip pci_set_master, fix it!\n"); + pci_set_master(dev); + } return; + } + retval = pci_enable_device(dev); if (retval) dev_err(&dev->dev, "Error enabling bridge (%d), continuing\n", -- cgit v1.2.3