Add MSI doorbell support to reduce latency between PCI host and EP. Before this change: ping 169.254.172.137 64 bytes from 169.254.172.137: icmp_seq=1 ttl=64 time=0.575 ms 64 bytes from 169.254.172.137: icmp_seq=2 ttl=64 time=1.80 ms 64 bytes from 169.254.172.137: icmp_seq=3 ttl=64 time=8.19 ms 64 bytes from 169.254.172.137: icmp_seq=4 ttl=64 time=2.00 ms After this change: ping 169.254.144.71 64 bytes from 169.254.144.71: icmp_seq=1 ttl=64 time=0.215 ms 64 bytes from 169.254.144.71: icmp_seq=2 ttl=64 time=0.456 ms 64 bytes from 169.254.144.71: icmp_seq=3 ttl=64 time=0.448 ms Change u64 db to atomic_64 because difference doorbell may happen at the same time. Signed-off-by: Frank Li <Frank.Li@xxxxxxx> --- drivers/pci/endpoint/functions/pci-epf-vntb.c | 153 +++++++++++++++++++++++--- 1 file changed, 136 insertions(+), 17 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index 83e9ab10f9c4fc2b485d5463faa2172500f12999..1c586205835fe9c7c5352e74819bccb4ece84438 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -36,11 +36,13 @@ * PCIe Root Port PCI EP */ +#include <linux/atomic.h> #include <linux/delay.h> #include <linux/io.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/pci-ep-msi.h> #include <linux/pci-epc.h> #include <linux/pci-epf.h> #include <linux/ntb.h> @@ -126,12 +128,13 @@ struct epf_ntb { u32 db_count; u32 spad_count; u64 mws_size[MAX_MW]; - u64 db; + atomic64_t db; u32 vbus_number; u16 vntb_pid; u16 vntb_vid; bool linkup; + bool msi_doorbell; u32 spad_size; enum pci_barno epf_ntb_bar[VNTB_BAR_NUM]; @@ -258,9 +261,9 @@ static void epf_ntb_cmd_handler(struct work_struct *work) ntb = container_of(work, struct epf_ntb, cmd_handler.work); - for (i = 1; i < ntb->db_count; i++) { + for (i = 1; i < ntb->db_count && !ntb->msi_doorbell; i++) { if (ntb->epf_db[i]) { - ntb->db |= 1 << (i - 1); + atomic64_or(1 << (i - 1), &ntb->db); ntb_db_event(&ntb->ntb, i); ntb->epf_db[i] = 0; } @@ -319,7 +322,24 @@ static void epf_ntb_cmd_handler(struct work_struct *work) reset_handler: queue_delayed_work(kpcintb_workqueue, &ntb->cmd_handler, - msecs_to_jiffies(5)); + ntb->msi_doorbell ? msecs_to_jiffies(500) : msecs_to_jiffies(5)); +} + +static irqreturn_t epf_ntb_doorbell_handler(int irq, void *data) +{ + struct epf_ntb *ntb = data; + int i = 0; + + for (i = 1; i < ntb->db_count; i++) + if (irq == ntb->epf->db_msg[i].virq) { + atomic64_or(1 << (i - 1), &ntb->db); + ntb_db_event(&ntb->ntb, i); + } + + if (irq == ntb->epf->db_msg[0].virq) + queue_delayed_work(kpcintb_workqueue, &ntb->cmd_handler, 0); + + return IRQ_HANDLED; } /** @@ -500,6 +520,90 @@ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb) return 0; } +static int epf_ntb_db_bar_init_msi_doorbell(struct epf_ntb *ntb, + struct pci_epf_bar *db_bar, + const struct pci_epc_features *epc_features, + enum pci_barno barno) +{ + struct pci_epf *epf = ntb->epf; + dma_addr_t low, high; + struct msi_msg *msg; + size_t sz; + int ret; + int i; + + ret = pci_epf_alloc_doorbell(epf, ntb->db_count); + if (ret) + return ret; + + for (i = 0; i < ntb->db_count; i++) { + ret = request_irq(epf->db_msg[i].virq, epf_ntb_doorbell_handler, + 0, "vntb_db", ntb); + + if (ret) { + dev_err(&epf->dev, + "Failed to request doorbell IRQ: %d\n", + epf->db_msg[i].virq); + goto err_request_irq; + } + } + + msg = &epf->db_msg[0].msg; + + high = 0; + low = (u64)msg->address_hi << 32 | msg->address_lo; + + for (i = 0; i < ntb->db_count; i++) { + struct msi_msg *msg = &epf->db_msg[i].msg; + dma_addr_t addr = (u64)msg->address_hi << 32 | msg->address_lo; + + low = min(low, addr); + high = max(high, addr); + } + + sz = high - low + sizeof(u32); + + ret = pci_epf_set_inbound_space(epf, sz, barno, + epc_features, 0, false, low); + + ret = pci_epc_set_bar(ntb->epf->epc, ntb->epf->func_no, ntb->epf->vfunc_no, db_bar); + if (ret) { + dev_err(&epf->dev, "Doorbell BAR set failed\n"); + goto err_request_irq; + } + + for (i = 0; i < ntb->db_count; i++) { + struct msi_msg *msg = &epf->db_msg[i].msg; + dma_addr_t addr; + size_t offset; + + ret = pci_epf_align_inbound_addr(epf, db_bar->barno, + ((u64)msg->address_hi << 32) | msg->address_lo, + &addr, &offset); + + if (ret) { + ntb->msi_doorbell = false; + goto err_request_irq; + } + + ntb->reg->db_data[i] = msg->data; + ntb->reg->db_offset[i] = offset; + } + + ntb->reg->db_entry_size = 0; + + ntb->msi_doorbell = true; + + return 0; + +err_request_irq: + for (i--; i >= 0; i--) + free_irq(epf->db_msg[i].virq, ntb); + + pci_epf_free_doorbell(ntb->epf); + return ret; +} + /** * epf_ntb_db_bar_init() - Configure Doorbell window BARs * @ntb: NTB device that facilitates communication between HOST and VHOST @@ -520,22 +624,27 @@ static int epf_ntb_db_bar_init(struct epf_ntb *ntb) ntb->epf->func_no, ntb->epf->vfunc_no); barno = ntb->epf_ntb_bar[BAR_DB]; - - mw_addr = pci_epf_alloc_space(ntb->epf, size, barno, epc_features, 0); - if (!mw_addr) { - dev_err(dev, "Failed to allocate OB address\n"); - return -ENOMEM; - } - - ntb->epf_db = mw_addr; - epf_bar = &ntb->epf->bar[barno]; - ret = pci_epc_set_bar(ntb->epf->epc, ntb->epf->func_no, ntb->epf->vfunc_no, epf_bar); + ret = epf_ntb_db_bar_init_msi_doorbell(ntb, epf_bar, epc_features, barno); if (ret) { - dev_err(dev, "Doorbell BAR set failed\n"); + /* fall back to polling mode */ + mw_addr = pci_epf_alloc_space(ntb->epf, size, barno, epc_features, 0); + if (!mw_addr) { + dev_err(dev, "Failed to allocate OB address\n"); + return -ENOMEM; + } + + ntb->epf_db = mw_addr; + + ret = pci_epc_set_bar(ntb->epf->epc, ntb->epf->func_no, + ntb->epf->vfunc_no, epf_bar); + if (ret) { + dev_err(dev, "Doorbell BAR set failed\n"); goto err_alloc_peer_mem; + } } + return ret; err_alloc_peer_mem: @@ -554,6 +663,16 @@ static void epf_ntb_db_bar_clear(struct epf_ntb *ntb) { enum pci_barno barno; + if (ntb->msi_doorbell) { + int i; + + for (i = 0; i < ntb->db_count; i++) + free_irq(ntb->epf->db_msg[i].virq, ntb); + } + + if (ntb->epf->db_msg) + pci_epf_free_doorbell(ntb->epf); + barno = ntb->epf_ntb_bar[BAR_DB]; pci_epf_free_space(ntb->epf, ntb->epf_db, barno, 0); pci_epc_clear_bar(ntb->epf->epc, @@ -1268,7 +1387,7 @@ static u64 vntb_epf_db_read(struct ntb_dev *ndev) { struct epf_ntb *ntb = ntb_ndev(ndev); - return ntb->db; + return atomic64_read(&ntb->db); } static int vntb_epf_mw_get_align(struct ntb_dev *ndev, int pidx, int idx, @@ -1308,7 +1427,7 @@ static int vntb_epf_db_clear(struct ntb_dev *ndev, u64 db_bits) { struct epf_ntb *ntb = ntb_ndev(ndev); - ntb->db &= ~db_bits; + atomic64_and(~db_bits, &ntb->db); return 0; } -- 2.34.1