// SPDX-License-Identifier: GPL-2.0+ /* * PCIe bandwidth controller * * Author: Alexandru Gagniuc * * Copyright (C) 2019 Dell Inc * Copyright (C) 2023-2024 Intel Corporation * * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds * and notify the operating system when the Link Width or Speed changes. The * notification capability is required for all Root Ports and Downstream * Ports supporting Link Width wider than x1 and/or multiple Link Speeds. * * This service port driver hooks into the Bandwidth Notification interrupt * watching for changes or links becoming degraded in operation. It updates * the cached Current Link Speed that is exposed to user space through sysfs. */ #define dev_fmt(fmt) "bwctrl: " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include "../pci.h" #include "portdrv.h" /** * struct pcie_bwctrl_data - PCIe bandwidth controller * @set_speed_mutex: Serializes link speed changes * @lbms_count: Count for LBMS (since last reset) * @cdev: Thermal cooling device associated with the port */ struct pcie_bwctrl_data { struct mutex set_speed_mutex; atomic_t lbms_count; struct thermal_cooling_device *cdev; }; /* * Prevent port removal during LBMS count accessors and Link Speed changes. * * These have to be differentiated because pcie_bwctrl_change_speed() calls * pcie_retrain_link() which uses LBMS count reset accessor on success * (using just one rwsem triggers "possible recursive locking detected" * warning). */ static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem); static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem); static bool pcie_valid_speed(enum pci_bus_speed speed) { return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT); } static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed) { static const u8 speed_conv[] = { [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT, [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT, [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT, [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT, [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT, [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT, }; if (WARN_ON_ONCE(!pcie_valid_speed(speed))) return 0; return speed_conv[speed]; } static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds) { return __fls(supported_speeds); } /** * pcie_bwctrl_select_speed - Select Target Link Speed * @port: PCIe Port * @speed_req: Requested PCIe Link Speed * * Select Target Link Speed by take into account Supported Link Speeds of * both the Root Port and the Endpoint. * * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.) */ static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req) { struct pci_bus *bus = port->subordinate; u8 desired_speeds, supported_speeds; struct pci_dev *dev; desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req), __fls(PCI_EXP_LNKCAP2_SLS_2_5GB)); supported_speeds = port->supported_speeds; if (bus) { down_read(&pci_bus_sem); dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list); if (dev) supported_speeds &= dev->supported_speeds; up_read(&pci_bus_sem); } if (!supported_speeds) return PCI_EXP_LNKCAP2_SLS_2_5GB; return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds); } static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt) { int ret; ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2, PCI_EXP_LNKCTL2_TLS, target_speed); if (ret != PCIBIOS_SUCCESSFUL) return pcibios_err_to_errno(ret); ret = pcie_retrain_link(port, use_lt); if (ret < 0) return ret; /* * Ensure link speed updates also with platforms that have problems * with notifications. */ if (port->subordinate) pcie_update_link_speed(port->subordinate); return 0; } /** * pcie_set_target_speed - Set downstream Link Speed for PCIe Port * @port: PCIe Port * @speed_req: Requested PCIe Link Speed * @use_lt: Wait for the LT or DLLLA bit to detect the end of link training * * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be * adjusted downwards to the best speed supported by both the Port and PCIe * Device underneath it. * * Return: * * 0 - on success * * -EINVAL - @speed_req is not a PCIe Link Speed * * -ENODEV - @port is not controllable * * -ETIMEDOUT - changing Link Speed took too long * * -EAGAIN - Link Speed was changed but @speed_req was not achieved */ int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, bool use_lt) { struct pci_bus *bus = port->subordinate; u16 target_speed; int ret; if (WARN_ON_ONCE(!pcie_valid_speed(speed_req))) return -EINVAL; if (bus && bus->cur_bus_speed == speed_req) return 0; target_speed = pcie_bwctrl_select_speed(port, speed_req); scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) { struct pcie_bwctrl_data *data = port->link_bwctrl; /* * port->link_bwctrl is NULL during initial scan when called * e.g. from the Target Speed quirk. */ if (data) mutex_lock(&data->set_speed_mutex); ret = pcie_bwctrl_change_speed(port, target_speed, use_lt); if (data) mutex_unlock(&data->set_speed_mutex); } /* * Despite setting higher speed into the Target Link Speed, empty * bus won't train to 5GT+ speeds. */ if (!ret && bus && bus->cur_bus_speed != speed_req && !list_empty(&bus->devices)) ret = -EAGAIN; return ret; } static void pcie_bwnotif_enable(struct pcie_device *srv) { struct pcie_bwctrl_data *data = srv->port->link_bwctrl; struct pci_dev *port = srv->port; u16 link_status; int ret; /* Count LBMS seen so far as one */ ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) atomic_inc(&data->lbms_count); pcie_capability_set_word(port, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); pcie_capability_write_word(port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); /* * Update after enabling notifications & clearing status bits ensures * link speed is up to date. */ pcie_update_link_speed(port->subordinate); } static void pcie_bwnotif_disable(struct pci_dev *port) { pcie_capability_clear_word(port, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); } static irqreturn_t pcie_bwnotif_irq(int irq, void *context) { struct pcie_device *srv = context; struct pcie_bwctrl_data *data = srv->port->link_bwctrl; struct pci_dev *port = srv->port; u16 link_status, events; int ret; ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); if (ret != PCIBIOS_SUCCESSFUL) return IRQ_NONE; events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS); if (!events) return IRQ_NONE; if (events & PCI_EXP_LNKSTA_LBMS) atomic_inc(&data->lbms_count); pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); /* * Interrupts will not be triggered from any further Link Speed * change until LBMS is cleared by the write. Therefore, re-read the * speed (inside pcie_update_link_speed()) after LBMS has been * cleared to avoid missing link speed changes. */ pcie_update_link_speed(port->subordinate); return IRQ_HANDLED; } void pcie_reset_lbms_count(struct pci_dev *port) { struct pcie_bwctrl_data *data; guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); data = port->link_bwctrl; if (data) atomic_set(&data->lbms_count, 0); else pcie_capability_write_word(port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); } int pcie_lbms_count(struct pci_dev *port, unsigned long *val) { struct pcie_bwctrl_data *data; guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); data = port->link_bwctrl; if (!data) return -ENOTTY; *val = atomic_read(&data->lbms_count); return 0; } static int pcie_bwnotif_probe(struct pcie_device *srv) { struct pci_dev *port = srv->port; int ret; struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; ret = devm_mutex_init(&srv->device, &data->set_speed_mutex); if (ret) return ret; scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { port->link_bwctrl = data; ret = request_irq(srv->irq, pcie_bwnotif_irq, IRQF_SHARED, "PCIe bwctrl", srv); if (ret) { port->link_bwctrl = NULL; return ret; } pcie_bwnotif_enable(srv); } } pci_dbg(port, "enabled with IRQ %d\n", srv->irq); /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */ port->link_bwctrl->cdev = pcie_cooling_device_register(port); if (IS_ERR(port->link_bwctrl->cdev)) port->link_bwctrl->cdev = NULL; return 0; } static void pcie_bwnotif_remove(struct pcie_device *srv) { struct pcie_bwctrl_data *data = srv->port->link_bwctrl; pcie_cooling_device_unregister(data->cdev); scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { pcie_bwnotif_disable(srv->port); free_irq(srv->irq, srv); srv->port->link_bwctrl = NULL; } } } static int pcie_bwnotif_suspend(struct pcie_device *srv) { pcie_bwnotif_disable(srv->port); return 0; } static int pcie_bwnotif_resume(struct pcie_device *srv) { pcie_bwnotif_enable(srv); return 0; } static struct pcie_port_service_driver pcie_bwctrl_driver = { .name = "pcie_bwctrl", .port_type = PCIE_ANY_PORT, .service = PCIE_PORT_SERVICE_BWCTRL, .probe = pcie_bwnotif_probe, .suspend = pcie_bwnotif_suspend, .resume = pcie_bwnotif_resume, .remove = pcie_bwnotif_remove, }; int __init pcie_bwctrl_init(void) { return pcie_port_service_register(&pcie_bwctrl_driver); }