diff options
Diffstat (limited to 'drivers/infiniband/hw')
25 files changed, 2921 insertions, 1427 deletions
diff --git a/drivers/infiniband/hw/mthca/Makefile b/drivers/infiniband/hw/mthca/Makefile index 5dcbd43073e2..c44f7bae5424 100644 --- a/drivers/infiniband/hw/mthca/Makefile +++ b/drivers/infiniband/hw/mthca/Makefile @@ -1,5 +1,3 @@ -EXTRA_CFLAGS += -Idrivers/infiniband/include - ifdef CONFIG_INFINIBAND_MTHCA_DEBUG EXTRA_CFLAGS += -DDEBUG endif @@ -9,4 +7,4 @@ obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \ mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \ mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \ - mthca_provider.o mthca_memfree.o mthca_uar.o + mthca_provider.o mthca_memfree.o mthca_uar.o mthca_srq.o diff --git a/drivers/infiniband/hw/mthca/mthca_allocator.c b/drivers/infiniband/hw/mthca/mthca_allocator.c index b1db48dd91d6..9ba3211cef7c 100644 --- a/drivers/infiniband/hw/mthca/mthca_allocator.c +++ b/drivers/infiniband/hw/mthca/mthca_allocator.c @@ -177,3 +177,119 @@ void mthca_array_cleanup(struct mthca_array *array, int nent) kfree(array->page_list); } + +/* + * Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. If the + * requested size is > max_direct, we split the allocation into + * multiple pages, so we don't require too much contiguous memory. + */ + +int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, + union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, + int hca_write, struct mthca_mr *mr) +{ + int err = -ENOMEM; + int npages, shift; + u64 *dma_list = NULL; + dma_addr_t t; + int i; + + if (size <= max_direct) { + *is_direct = 1; + npages = 1; + shift = get_order(size) + PAGE_SHIFT; + + buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, + size, &t, GFP_KERNEL); + if (!buf->direct.buf) + return -ENOMEM; + + pci_unmap_addr_set(&buf->direct, mapping, t); + + memset(buf->direct.buf, 0, size); + + while (t & ((1 << shift) - 1)) { + --shift; + npages *= 2; + } + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + goto err_free; + + for (i = 0; i < npages; ++i) + dma_list[i] = t + i * (1 << shift); + } else { + *is_direct = 0; + npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; + shift = PAGE_SHIFT; + + dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + if (!dma_list) + return -ENOMEM; + + buf->page_list = kmalloc(npages * sizeof *buf->page_list, + GFP_KERNEL); + if (!buf->page_list) + goto err_out; + + for (i = 0; i < npages; ++i) + buf->page_list[i].buf = NULL; + + for (i = 0; i < npages; ++i) { + buf->page_list[i].buf = + dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + &t, GFP_KERNEL); + if (!buf->page_list[i].buf) + goto err_free; + + dma_list[i] = t; + pci_unmap_addr_set(&buf->page_list[i], mapping, t); + + memset(buf->page_list[i].buf, 0, PAGE_SIZE); + } + } + + err = mthca_mr_alloc_phys(dev, pd->pd_num, + dma_list, shift, npages, + 0, size, + MTHCA_MPT_FLAG_LOCAL_READ | + (hca_write ? MTHCA_MPT_FLAG_LOCAL_WRITE : 0), + mr); + if (err) + goto err_free; + + kfree(dma_list); + + return 0; + +err_free: + mthca_buf_free(dev, size, buf, *is_direct, NULL); + +err_out: + kfree(dma_list); + + return err; +} + +void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, + int is_direct, struct mthca_mr *mr) +{ + int i; + + if (mr) + mthca_free_mr(dev, mr); + + if (is_direct) + dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, + pci_unmap_addr(&buf->direct, mapping)); + else { + for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + buf->page_list[i].buf, + pci_unmap_addr(&buf->page_list[i], + mapping)); + kfree(buf->page_list); + } +} diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index 085baf393ca4..889e85096736 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,22 +35,22 @@ #include <linux/init.h> -#include <ib_verbs.h> -#include <ib_cache.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_cache.h> #include "mthca_dev.h" struct mthca_av { - u32 port_pd; - u8 reserved1; - u8 g_slid; - u16 dlid; - u8 reserved2; - u8 gid_index; - u8 msg_sr; - u8 hop_limit; - u32 sl_tclass_flowlabel; - u32 dgid[4]; + __be32 port_pd; + u8 reserved1; + u8 g_slid; + __be16 dlid; + u8 reserved2; + u8 gid_index; + u8 msg_sr; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + __be32 dgid[4]; }; int mthca_create_ah(struct mthca_dev *dev, @@ -127,7 +128,7 @@ on_hca_fail: av, (unsigned long) ah->avdma); for (j = 0; j < 8; ++j) printk(KERN_DEBUG " [%2x] %08x\n", - j * 4, be32_to_cpu(((u32 *) av)[j])); + j * 4, be32_to_cpu(((__be32 *) av)[j])); } if (ah->type == MTHCA_AH_ON_HCA) { @@ -168,7 +169,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah, header->lrh.service_level = be32_to_cpu(ah->av->sl_tclass_flowlabel) >> 28; header->lrh.destination_lid = ah->av->dlid; - header->lrh.source_lid = ah->av->g_slid & 0x7f; + header->lrh.source_lid = cpu_to_be16(ah->av->g_slid & 0x7f); if (ah->av->g_slid & 0x80) { header->grh_present = 1; header->grh.traffic_class = diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index cd9ed958d92f..cc758a2d2bc6 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,7 +37,7 @@ #include <linux/pci.h> #include <linux/errno.h> #include <asm/io.h> -#include <ib_mad.h> +#include <rdma/ib_mad.h> #include "mthca_dev.h" #include "mthca_config_reg.h" @@ -108,6 +109,7 @@ enum { CMD_SW2HW_SRQ = 0x35, CMD_HW2SW_SRQ = 0x36, CMD_QUERY_SRQ = 0x37, + CMD_ARM_SRQ = 0x40, /* QP/EE commands */ CMD_RST2INIT_QPEE = 0x19, @@ -219,20 +221,20 @@ static int mthca_cmd_post(struct mthca_dev *dev, * (and some architectures such as ia64 implement memcpy_toio * in terms of writeb). */ - __raw_writel(cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4); - __raw_writel(cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4); - __raw_writel(cpu_to_be32(in_modifier), dev->hcr + 2 * 4); - __raw_writel(cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4); - __raw_writel(cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4); - __raw_writel(cpu_to_be32(token << 16), dev->hcr + 5 * 4); + __raw_writel((__force u32) cpu_to_be32(in_param >> 32), dev->hcr + 0 * 4); + __raw_writel((__force u32) cpu_to_be32(in_param & 0xfffffffful), dev->hcr + 1 * 4); + __raw_writel((__force u32) cpu_to_be32(in_modifier), dev->hcr + 2 * 4); + __raw_writel((__force u32) cpu_to_be32(out_param >> 32), dev->hcr + 3 * 4); + __raw_writel((__force u32) cpu_to_be32(out_param & 0xfffffffful), dev->hcr + 4 * 4); + __raw_writel((__force u32) cpu_to_be32(token << 16), dev->hcr + 5 * 4); /* __raw_writel may not order writes. */ wmb(); - __raw_writel(cpu_to_be32((1 << HCR_GO_BIT) | - (event ? (1 << HCA_E_BIT) : 0) | - (op_modifier << HCR_OPMOD_SHIFT) | - op), dev->hcr + 6 * 4); + __raw_writel((__force u32) cpu_to_be32((1 << HCR_GO_BIT) | + (event ? (1 << HCA_E_BIT) : 0) | + (op_modifier << HCR_OPMOD_SHIFT) | + op), dev->hcr + 6 * 4); out: up(&dev->cmd.hcr_sem); @@ -273,12 +275,14 @@ static int mthca_cmd_poll(struct mthca_dev *dev, goto out; } - if (out_is_imm) { - memcpy_fromio(out_param, dev->hcr + HCR_OUT_PARAM_OFFSET, sizeof (u64)); - be64_to_cpus(out_param); - } + if (out_is_imm) + *out_param = + (u64) be32_to_cpu((__force __be32) + __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET)) << 32 | + (u64) be32_to_cpu((__force __be32) + __raw_readl(dev->hcr + HCR_OUT_PARAM_OFFSET + 4)); - *status = be32_to_cpu(__raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24; + *status = be32_to_cpu((__force __be32) __raw_readl(dev->hcr + HCR_STATUS_OFFSET)) >> 24; out: up(&dev->cmd.poll_sem); @@ -431,6 +435,36 @@ static int mthca_cmd_imm(struct mthca_dev *dev, timeout, status); } +int mthca_cmd_init(struct mthca_dev *dev) +{ + sema_init(&dev->cmd.hcr_sem, 1); + sema_init(&dev->cmd.poll_sem, 1); + dev->cmd.use_events = 0; + + dev->hcr = ioremap(pci_resource_start(dev->pdev, 0) + MTHCA_HCR_BASE, + MTHCA_HCR_SIZE); + if (!dev->hcr) { + mthca_err(dev, "Couldn't map command register."); + return -ENOMEM; + } + + dev->cmd.pool = pci_pool_create("mthca_cmd", dev->pdev, + MTHCA_MAILBOX_SIZE, + MTHCA_MAILBOX_SIZE, 0); + if (!dev->cmd.pool) { + iounmap(dev->hcr); + return -ENOMEM; + } + + return 0; +} + +void mthca_cmd_cleanup(struct mthca_dev *dev) +{ + pci_pool_destroy(dev->cmd.pool); + iounmap(dev->hcr); +} + /* * Switch to using events to issue FW commands (should be called after * event queue to command events has been initialized). @@ -489,6 +523,33 @@ void mthca_cmd_use_polling(struct mthca_dev *dev) up(&dev->cmd.poll_sem); } +struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, + unsigned int gfp_mask) +{ + struct mthca_mailbox *mailbox; + + mailbox = kmalloc(sizeof *mailbox, gfp_mask); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = pci_pool_alloc(dev->cmd.pool, gfp_mask, &mailbox->dma); + if (!mailbox->buf) { + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + + return mailbox; +} + +void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox) +{ + if (!mailbox) + return; + + pci_pool_free(dev->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} + int mthca_SYS_EN(struct mthca_dev *dev, u8 *status) { u64 out; @@ -513,20 +574,20 @@ int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status) static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, u64 virt, u8 *status) { - u32 *inbox; - dma_addr_t indma; + struct mthca_mailbox *mailbox; struct mthca_icm_iter iter; + __be64 *pages; int lg; int nent = 0; int i; int err = 0; int ts = 0, tc = 0; - inbox = pci_alloc_consistent(dev->pdev, PAGE_SIZE, &indma); - if (!inbox) - return -ENOMEM; - - memset(inbox, 0, PAGE_SIZE); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + memset(mailbox->buf, 0, MTHCA_MAILBOX_SIZE); + pages = mailbox->buf; for (mthca_icm_first(icm, &iter); !mthca_icm_last(&iter); @@ -546,19 +607,17 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, } for (i = 0; i < mthca_icm_size(&iter) / (1 << lg); ++i, ++nent) { if (virt != -1) { - *((__be64 *) (inbox + nent * 4)) = - cpu_to_be64(virt); + pages[nent * 2] = cpu_to_be64(virt); virt += 1 << lg; } - *((__be64 *) (inbox + nent * 4 + 2)) = - cpu_to_be64((mthca_icm_addr(&iter) + - (i << lg)) | (lg - 12)); + pages[nent * 2 + 1] = cpu_to_be64((mthca_icm_addr(&iter) + + (i << lg)) | (lg - 12)); ts += 1 << (lg - 10); ++tc; - if (nent == PAGE_SIZE / 16) { - err = mthca_cmd(dev, indma, nent, 0, op, + if (nent == MTHCA_MAILBOX_SIZE / 16) { + err = mthca_cmd(dev, mailbox->dma, nent, 0, op, CMD_TIME_CLASS_B, status); if (err || *status) goto out; @@ -568,7 +627,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, } if (nent) - err = mthca_cmd(dev, indma, nent, 0, op, + err = mthca_cmd(dev, mailbox->dma, nent, 0, op, CMD_TIME_CLASS_B, status); switch (op) { @@ -585,7 +644,7 @@ static int mthca_map_cmd(struct mthca_dev *dev, u16 op, struct mthca_icm *icm, } out: - pci_free_consistent(dev->pdev, PAGE_SIZE, inbox, indma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -606,8 +665,8 @@ int mthca_RUN_FW(struct mthca_dev *dev, u8 *status) int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) { + struct mthca_mailbox *mailbox; u32 *outbox; - dma_addr_t outdma; int err = 0; u8 lg; @@ -625,12 +684,12 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) #define QUERY_FW_EQ_ARM_BASE_OFFSET 0x40 #define QUERY_FW_EQ_SET_CI_BASE_OFFSET 0x48 - outbox = pci_alloc_consistent(dev->pdev, QUERY_FW_OUT_SIZE, &outdma); - if (!outbox) { - return -ENOMEM; - } + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_FW, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_FW, CMD_TIME_CLASS_A, status); if (err) @@ -681,15 +740,15 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) } out: - pci_free_consistent(dev->pdev, QUERY_FW_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) { + struct mthca_mailbox *mailbox; u8 info; u32 *outbox; - dma_addr_t outdma; int err = 0; #define ENABLE_LAM_OUT_SIZE 0x100 @@ -700,11 +759,12 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) #define ENABLE_LAM_INFO_HIDDEN_FLAG (1 << 4) #define ENABLE_LAM_INFO_ECC_MASK 0x3 - outbox = pci_alloc_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, &outdma); - if (!outbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_ENABLE_LAM, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_ENABLE_LAM, CMD_TIME_CLASS_C, status); if (err) @@ -733,7 +793,7 @@ int mthca_ENABLE_LAM(struct mthca_dev *dev, u8 *status) (unsigned long long) dev->ddr_end); out: - pci_free_consistent(dev->pdev, ENABLE_LAM_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -744,9 +804,9 @@ int mthca_DISABLE_LAM(struct mthca_dev *dev, u8 *status) int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) { + struct mthca_mailbox *mailbox; u8 info; u32 *outbox; - dma_addr_t outdma; int err = 0; #define QUERY_DDR_OUT_SIZE 0x100 @@ -757,11 +817,12 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) #define QUERY_DDR_INFO_HIDDEN_FLAG (1 << 4) #define QUERY_DDR_INFO_ECC_MASK 0x3 - outbox = pci_alloc_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, &outdma); - if (!outbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DDR, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DDR, CMD_TIME_CLASS_A, status); if (err) @@ -787,15 +848,15 @@ int mthca_QUERY_DDR(struct mthca_dev *dev, u8 *status) (unsigned long long) dev->ddr_end); out: - pci_free_consistent(dev->pdev, QUERY_DDR_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, struct mthca_dev_lim *dev_lim, u8 *status) { + struct mthca_mailbox *mailbox; u32 *outbox; - dma_addr_t outdma; u8 field; u16 size; int err; @@ -860,11 +921,12 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, #define QUERY_DEV_LIM_LAMR_OFFSET 0x9f #define QUERY_DEV_LIM_MAX_ICM_SZ_OFFSET 0xa0 - outbox = pci_alloc_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, &outdma); - if (!outbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_DEV_LIM, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_DEV_LIM, CMD_TIME_CLASS_A, status); if (err) @@ -971,6 +1033,8 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, mthca_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n", dev_lim->max_qps, dev_lim->reserved_qps, dev_lim->qpc_entry_sz); + mthca_dbg(dev, "Max SRQs: %d, reserved SRQs: %d, entry size: %d\n", + dev_lim->max_srqs, dev_lim->reserved_srqs, dev_lim->srq_entry_sz); mthca_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n", dev_lim->max_cqs, dev_lim->reserved_cqs, dev_lim->cqc_entry_sz); mthca_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n", @@ -1020,15 +1084,43 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev, } out: - pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } +static void get_board_id(void *vsd, char *board_id) +{ + int i; + +#define VSD_OFFSET_SIG1 0x00 +#define VSD_OFFSET_SIG2 0xde +#define VSD_OFFSET_MLX_BOARD_ID 0xd0 +#define VSD_OFFSET_TS_BOARD_ID 0x20 + +#define VSD_SIGNATURE_TOPSPIN 0x5ad + + memset(board_id, 0, MTHCA_BOARD_ID_LEN); + + if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && + be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { + strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN); + } else { + /* + * The board ID is a string but the firmware byte + * swaps each 4-byte word before passing it back to + * us. Therefore we need to swab it before printing. + */ + for (i = 0; i < 4; ++i) + ((u32 *) board_id)[i] = + swab32(*(u32 *) (vsd + VSD_OFFSET_MLX_BOARD_ID + i * 4)); + } +} + int mthca_QUERY_ADAPTER(struct mthca_dev *dev, struct mthca_adapter *adapter, u8 *status) { + struct mthca_mailbox *mailbox; u32 *outbox; - dma_addr_t outdma; int err; #define QUERY_ADAPTER_OUT_SIZE 0x100 @@ -1036,24 +1128,29 @@ int mthca_QUERY_ADAPTER(struct mthca_dev *dev, #define QUERY_ADAPTER_DEVICE_ID_OFFSET 0x04 #define QUERY_ADAPTER_REVISION_ID_OFFSET 0x08 #define QUERY_ADAPTER_INTA_PIN_OFFSET 0x10 +#define QUERY_ADAPTER_VSD_OFFSET 0x20 - outbox = pci_alloc_consistent(dev->pdev, QUERY_ADAPTER_OUT_SIZE, &outdma); - if (!outbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + outbox = mailbox->buf; - err = mthca_cmd_box(dev, 0, outdma, 0, 0, CMD_QUERY_ADAPTER, + err = mthca_cmd_box(dev, 0, mailbox->dma, 0, 0, CMD_QUERY_ADAPTER, CMD_TIME_CLASS_A, status); if (err) goto out; - MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET); - MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET); + MTHCA_GET(adapter->vendor_id, outbox, QUERY_ADAPTER_VENDOR_ID_OFFSET); + MTHCA_GET(adapter->device_id, outbox, QUERY_ADAPTER_DEVICE_ID_OFFSET); MTHCA_GET(adapter->revision_id, outbox, QUERY_ADAPTER_REVISION_ID_OFFSET); - MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); + MTHCA_GET(adapter->inta_pin, outbox, QUERY_ADAPTER_INTA_PIN_OFFSET); + + get_board_id(outbox + QUERY_ADAPTER_VSD_OFFSET / 4, + adapter->board_id); out: - pci_free_consistent(dev->pdev, QUERY_DEV_LIM_OUT_SIZE, outbox, outdma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -1061,8 +1158,8 @@ int mthca_INIT_HCA(struct mthca_dev *dev, struct mthca_init_hca_param *param, u8 *status) { - u32 *inbox; - dma_addr_t indma; + struct mthca_mailbox *mailbox; + __be32 *inbox; int err; #define INIT_HCA_IN_SIZE 0x200 @@ -1102,9 +1199,10 @@ int mthca_INIT_HCA(struct mthca_dev *dev, #define INIT_HCA_UAR_SCATCH_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x10) #define INIT_HCA_UAR_CTX_BASE_OFFSET (INIT_HCA_UAR_OFFSET + 0x18) - inbox = pci_alloc_consistent(dev->pdev, INIT_HCA_IN_SIZE, &indma); - if (!inbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; memset(inbox, 0, INIT_HCA_IN_SIZE); @@ -1167,10 +1265,9 @@ int mthca_INIT_HCA(struct mthca_dev *dev, MTHCA_PUT(inbox, param->uarc_base, INIT_HCA_UAR_CTX_BASE_OFFSET); } - err = mthca_cmd(dev, indma, 0, 0, CMD_INIT_HCA, - HZ, status); + err = mthca_cmd(dev, mailbox->dma, 0, 0, CMD_INIT_HCA, HZ, status); - pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -1178,8 +1275,8 @@ int mthca_INIT_IB(struct mthca_dev *dev, struct mthca_init_ib_param *param, int port, u8 *status) { + struct mthca_mailbox *mailbox; u32 *inbox; - dma_addr_t indma; int err; u32 flags; @@ -1188,10 +1285,8 @@ int mthca_INIT_IB(struct mthca_dev *dev, #define INIT_IB_FLAG_SIG (1 << 18) #define INIT_IB_FLAG_NG (1 << 17) #define INIT_IB_FLAG_G0 (1 << 16) -#define INIT_IB_FLAG_1X (1 << 8) -#define INIT_IB_FLAG_4X (1 << 9) -#define INIT_IB_FLAG_12X (1 << 11) #define INIT_IB_VL_SHIFT 4 +#define INIT_IB_PORT_WIDTH_SHIFT 8 #define INIT_IB_MTU_SHIFT 12 #define INIT_IB_MAX_GID_OFFSET 0x06 #define INIT_IB_MAX_PKEY_OFFSET 0x0a @@ -1199,19 +1294,19 @@ int mthca_INIT_IB(struct mthca_dev *dev, #define INIT_IB_NODE_GUID_OFFSET 0x18 #define INIT_IB_SI_GUID_OFFSET 0x20 - inbox = pci_alloc_consistent(dev->pdev, INIT_IB_IN_SIZE, &indma); - if (!inbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; memset(inbox, 0, INIT_IB_IN_SIZE); flags = 0; - flags |= param->enable_1x ? INIT_IB_FLAG_1X : 0; - flags |= param->enable_4x ? INIT_IB_FLAG_4X : 0; flags |= param->set_guid0 ? INIT_IB_FLAG_G0 : 0; flags |= param->set_node_guid ? INIT_IB_FLAG_NG : 0; flags |= param->set_si_guid ? INIT_IB_FLAG_SIG : 0; flags |= param->vl_cap << INIT_IB_VL_SHIFT; + flags |= param->port_width << INIT_IB_PORT_WIDTH_SHIFT; flags |= param->mtu_cap << INIT_IB_MTU_SHIFT; MTHCA_PUT(inbox, flags, INIT_IB_FLAGS_OFFSET); @@ -1221,10 +1316,10 @@ int mthca_INIT_IB(struct mthca_dev *dev, MTHCA_PUT(inbox, param->node_guid, INIT_IB_NODE_GUID_OFFSET); MTHCA_PUT(inbox, param->si_guid, INIT_IB_SI_GUID_OFFSET); - err = mthca_cmd(dev, indma, port, 0, CMD_INIT_IB, + err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_INIT_IB, CMD_TIME_CLASS_A, status); - pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -1241,8 +1336,8 @@ int mthca_CLOSE_HCA(struct mthca_dev *dev, int panic, u8 *status) int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, int port, u8 *status) { + struct mthca_mailbox *mailbox; u32 *inbox; - dma_addr_t indma; int err; u32 flags = 0; @@ -1253,9 +1348,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, #define SET_IB_CAP_MASK_OFFSET 0x04 #define SET_IB_SI_GUID_OFFSET 0x08 - inbox = pci_alloc_consistent(dev->pdev, SET_IB_IN_SIZE, &indma); - if (!inbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; memset(inbox, 0, SET_IB_IN_SIZE); @@ -1266,10 +1362,10 @@ int mthca_SET_IB(struct mthca_dev *dev, struct mthca_set_ib_param *param, MTHCA_PUT(inbox, param->cap_mask, SET_IB_CAP_MASK_OFFSET); MTHCA_PUT(inbox, param->si_guid, SET_IB_SI_GUID_OFFSET); - err = mthca_cmd(dev, indma, port, 0, CMD_SET_IB, + err = mthca_cmd(dev, mailbox->dma, port, 0, CMD_SET_IB, CMD_TIME_CLASS_B, status); - pci_free_consistent(dev->pdev, INIT_HCA_IN_SIZE, inbox, indma); + mthca_free_mailbox(dev, mailbox); return err; } @@ -1280,20 +1376,22 @@ int mthca_MAP_ICM(struct mthca_dev *dev, struct mthca_icm *icm, u64 virt, u8 *st int mthca_MAP_ICM_page(struct mthca_dev *dev, u64 dma_addr, u64 virt, u8 *status) { - u64 *inbox; - dma_addr_t indma; + struct mthca_mailbox *mailbox; + __be64 *inbox; int err; - inbox = pci_alloc_consistent(dev->pdev, 16, &indma); - if (!inbox) - return -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + inbox = mailbox->buf; inbox[0] = cpu_to_be64(virt); inbox[1] = cpu_to_be64(dma_addr); - err = mthca_cmd(dev, indma, 1, 0, CMD_MAP_ICM, CMD_TIME_CLASS_B, status); + err = mthca_cmd(dev, mailbox->dma, 1, 0, CMD_MAP_ICM, + CMD_TIME_CLASS_B, status); - pci_free_consistent(dev->pdev, 16, inbox, indma); + mthca_free_mailbox(dev, mailbox); if (!err) mthca_dbg(dev, "Mapped page at %llx to %llx for ICM.\n", @@ -1338,69 +1436,26 @@ int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, return 0; } -int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry, +int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int mpt_index, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, mpt_entry, - MTHCA_MPT_ENTRY_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, mpt_index, 0, CMD_SW2HW_MPT, - CMD_TIME_CLASS_B, status); - - pci_unmap_single(dev->pdev, indma, - MTHCA_MPT_ENTRY_SIZE, PCI_DMA_TODEVICE); - return err; + return mthca_cmd(dev, mailbox->dma, mpt_index, 0, CMD_SW2HW_MPT, + CMD_TIME_CLASS_B, status); } -int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry, +int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int mpt_index, u8 *status) { - dma_addr_t outdma = 0; - int err; - - if (mpt_entry) { - outdma = pci_map_single(dev->pdev, mpt_entry, - MTHCA_MPT_ENTRY_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - } - - err = mthca_cmd_box(dev, 0, outdma, mpt_index, !mpt_entry, - CMD_HW2SW_MPT, - CMD_TIME_CLASS_B, status); - - if (mpt_entry) - pci_unmap_single(dev->pdev, outdma, - MTHCA_MPT_ENTRY_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index, + !mailbox, CMD_HW2SW_MPT, + CMD_TIME_CLASS_B, status); } -int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry, +int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int num_mtt, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, mtt_entry, - (num_mtt + 2) * 8, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, num_mtt, 0, CMD_WRITE_MTT, - CMD_TIME_CLASS_B, status); - - pci_unmap_single(dev->pdev, indma, - (num_mtt + 2) * 8, PCI_DMA_TODEVICE); - return err; + return mthca_cmd(dev, mailbox->dma, num_mtt, 0, CMD_WRITE_MTT, + CMD_TIME_CLASS_B, status); } int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status) @@ -1418,92 +1473,59 @@ int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, 0, CMD_MAP_EQ, CMD_TIME_CLASS_B, status); } -int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context, +int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int eq_num, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, eq_context, - MTHCA_EQ_CONTEXT_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, eq_num, 0, CMD_SW2HW_EQ, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, indma, - MTHCA_EQ_CONTEXT_SIZE, PCI_DMA_TODEVICE); - return err; + return mthca_cmd(dev, mailbox->dma, eq_num, 0, CMD_SW2HW_EQ, + CMD_TIME_CLASS_A, status); } -int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context, +int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int eq_num, u8 *status) { - dma_addr_t outdma = 0; - int err; - - outdma = pci_map_single(dev->pdev, eq_context, - MTHCA_EQ_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - - err = mthca_cmd_box(dev, 0, outdma, eq_num, 0, - CMD_HW2SW_EQ, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, outdma, - MTHCA_EQ_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox->dma, eq_num, 0, + CMD_HW2SW_EQ, + CMD_TIME_CLASS_A, status); } -int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context, +int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, cq_context, - MTHCA_CQ_CONTEXT_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, cq_num, 0, CMD_SW2HW_CQ, + return mthca_cmd(dev, mailbox->dma, cq_num, 0, CMD_SW2HW_CQ, CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, indma, - MTHCA_CQ_CONTEXT_SIZE, PCI_DMA_TODEVICE); - return err; } -int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context, +int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status) { - dma_addr_t outdma = 0; - int err; + return mthca_cmd_box(dev, 0, mailbox->dma, cq_num, 0, + CMD_HW2SW_CQ, + CMD_TIME_CLASS_A, status); +} - outdma = pci_map_single(dev->pdev, cq_context, - MTHCA_CQ_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; +int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status) +{ + return mthca_cmd(dev, mailbox->dma, srq_num, 0, CMD_SW2HW_SRQ, + CMD_TIME_CLASS_A, status); +} - err = mthca_cmd_box(dev, 0, outdma, cq_num, 0, - CMD_HW2SW_CQ, - CMD_TIME_CLASS_A, status); +int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status) +{ + return mthca_cmd_box(dev, 0, mailbox->dma, srq_num, 0, + CMD_HW2SW_SRQ, + CMD_TIME_CLASS_A, status); +} - pci_unmap_single(dev->pdev, outdma, - MTHCA_CQ_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - return err; +int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status) +{ + return mthca_cmd(dev, limit, srq_num, 0, CMD_ARM_SRQ, + CMD_TIME_CLASS_B, status); } int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, - int is_ee, void *qp_context, u32 optmask, + int is_ee, struct mthca_mailbox *mailbox, u32 optmask, u8 *status) { static const u16 op[] = { @@ -1520,36 +1542,34 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, [MTHCA_TRANS_ANY2RST] = CMD_ERR2RST_QPEE }; u8 op_mod = 0; - - dma_addr_t indma; + int my_mailbox = 0; int err; if (trans < 0 || trans >= ARRAY_SIZE(op)) return -EINVAL; if (trans == MTHCA_TRANS_ANY2RST) { - indma = 0; op_mod = 3; /* don't write outbox, any->reset */ /* For debugging */ - qp_context = pci_alloc_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE, - &indma); - op_mod = 2; /* write outbox, any->reset */ + if (!mailbox) { + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (!IS_ERR(mailbox)) { + my_mailbox = 1; + op_mod = 2; /* write outbox, any->reset */ + } else + mailbox = NULL; + } } else { - indma = pci_map_single(dev->pdev, qp_context, - MTHCA_QP_CONTEXT_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - if (0) { int i; mthca_dbg(dev, "Dumping QP context:\n"); - printk(" opt param mask: %08x\n", be32_to_cpup(qp_context)); + printk(" opt param mask: %08x\n", be32_to_cpup(mailbox->buf)); for (i = 0; i < 0x100 / 4; ++i) { if (i % 8 == 0) printk(" [%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2])); + printk(" %08x", + be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); if ((i + 1) % 8 == 0) printk("\n"); } @@ -1557,55 +1577,39 @@ int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, } if (trans == MTHCA_TRANS_ANY2RST) { - err = mthca_cmd_box(dev, 0, indma, (!!is_ee << 24) | num, - op_mod, op[trans], CMD_TIME_CLASS_C, status); + err = mthca_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, + (!!is_ee << 24) | num, op_mod, + op[trans], CMD_TIME_CLASS_C, status); - if (0) { + if (0 && mailbox) { int i; mthca_dbg(dev, "Dumping QP context:\n"); - printk(" %08x\n", be32_to_cpup(qp_context)); + printk(" %08x\n", be32_to_cpup(mailbox->buf)); for (i = 0; i < 0x100 / 4; ++i) { if (i % 8 == 0) printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((u32 *) qp_context)[i + 2])); + printk(" %08x", + be32_to_cpu(((__be32 *) mailbox->buf)[i + 2])); if ((i + 1) % 8 == 0) printk("\n"); } } } else - err = mthca_cmd(dev, indma, (!!is_ee << 24) | num, + err = mthca_cmd(dev, mailbox->dma, (!!is_ee << 24) | num, op_mod, op[trans], CMD_TIME_CLASS_C, status); - if (trans != MTHCA_TRANS_ANY2RST) - pci_unmap_single(dev->pdev, indma, - MTHCA_QP_CONTEXT_SIZE, PCI_DMA_TODEVICE); - else - pci_free_consistent(dev->pdev, MTHCA_QP_CONTEXT_SIZE, - qp_context, indma); + if (my_mailbox) + mthca_free_mailbox(dev, mailbox); + return err; } int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, - void *qp_context, u8 *status) + struct mthca_mailbox *mailbox, u8 *status) { - dma_addr_t outdma = 0; - int err; - - outdma = pci_map_single(dev->pdev, qp_context, - MTHCA_QP_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - - err = mthca_cmd_box(dev, 0, outdma, (!!is_ee << 24) | num, 0, - CMD_QUERY_QPEE, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, outdma, - MTHCA_QP_CONTEXT_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox->dma, (!!is_ee << 24) | num, 0, + CMD_QUERY_QPEE, CMD_TIME_CLASS_A, status); } int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, @@ -1635,11 +1639,11 @@ int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, } int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc* in_wc, struct ib_grh* in_grh, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad, u8 *status) { - void *box; - dma_addr_t dma; + struct mthca_mailbox *inmailbox, *outmailbox; + void *inbox; int err; u32 in_modifier = port; u8 op_modifier = 0; @@ -1653,11 +1657,18 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, #define MAD_IFC_PKEY_OFFSET 0x10e #define MAD_IFC_GRH_OFFSET 0x140 - box = pci_alloc_consistent(dev->pdev, MAD_IFC_BOX_SIZE, &dma); - if (!box) - return -ENOMEM; + inmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(inmailbox)) + return PTR_ERR(inmailbox); + inbox = inmailbox->buf; - memcpy(box, in_mad, 256); + outmailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(outmailbox)) { + mthca_free_mailbox(dev, inmailbox); + return PTR_ERR(outmailbox); + } + + memcpy(inbox, in_mad, 256); /* * Key check traps can't be generated unless we have in_wc to @@ -1671,97 +1682,65 @@ int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, if (in_wc) { u8 val; - memset(box + 256, 0, 256); + memset(inbox + 256, 0, 256); - MTHCA_PUT(box, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET); - MTHCA_PUT(box, in_wc->src_qp, MAD_IFC_RQPN_OFFSET); + MTHCA_PUT(inbox, in_wc->qp_num, MAD_IFC_MY_QPN_OFFSET); + MTHCA_PUT(inbox, in_wc->src_qp, MAD_IFC_RQPN_OFFSET); val = in_wc->sl << 4; - MTHCA_PUT(box, val, MAD_IFC_SL_OFFSET); + MTHCA_PUT(inbox, val, MAD_IFC_SL_OFFSET); val = in_wc->dlid_path_bits | (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0); - MTHCA_PUT(box, val, MAD_IFC_GRH_OFFSET); + MTHCA_PUT(inbox, val, MAD_IFC_GRH_OFFSET); - MTHCA_PUT(box, in_wc->slid, MAD_IFC_RLID_OFFSET); - MTHCA_PUT(box, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); + MTHCA_PUT(inbox, in_wc->slid, MAD_IFC_RLID_OFFSET); + MTHCA_PUT(inbox, in_wc->pkey_index, MAD_IFC_PKEY_OFFSET); if (in_grh) - memcpy((u8 *) box + MAD_IFC_GRH_OFFSET, in_grh, 40); + memcpy(inbox + MAD_IFC_GRH_OFFSET, in_grh, 40); op_modifier |= 0x10; in_modifier |= in_wc->slid << 16; } - err = mthca_cmd_box(dev, dma, dma + 512, in_modifier, op_modifier, + err = mthca_cmd_box(dev, inmailbox->dma, outmailbox->dma, + in_modifier, op_modifier, CMD_MAD_IFC, CMD_TIME_CLASS_C, status); if (!err && !*status) - memcpy(response_mad, box + 512, 256); + memcpy(response_mad, outmailbox->buf, 256); - pci_free_consistent(dev->pdev, MAD_IFC_BOX_SIZE, box, dma); + mthca_free_mailbox(dev, inmailbox); + mthca_free_mailbox(dev, outmailbox); return err; } -int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm, - u8 *status) +int mthca_READ_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status) { - dma_addr_t outdma = 0; - int err; - - outdma = pci_map_single(dev->pdev, mgm, - MTHCA_MGM_ENTRY_SIZE, - PCI_DMA_FROMDEVICE); - if (pci_dma_mapping_error(outdma)) - return -ENOMEM; - - err = mthca_cmd_box(dev, 0, outdma, index, 0, - CMD_READ_MGM, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, outdma, - MTHCA_MGM_ENTRY_SIZE, - PCI_DMA_FROMDEVICE); - return err; + return mthca_cmd_box(dev, 0, mailbox->dma, index, 0, + CMD_READ_MGM, CMD_TIME_CLASS_A, status); } -int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm, - u8 *status) +int mthca_WRITE_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status) { - dma_addr_t indma; - int err; - - indma = pci_map_single(dev->pdev, mgm, - MTHCA_MGM_ENTRY_SIZE, - PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd(dev, indma, index, 0, CMD_WRITE_MGM, - CMD_TIME_CLASS_A, status); - - pci_unmap_single(dev->pdev, indma, - MTHCA_MGM_ENTRY_SIZE, PCI_DMA_TODEVICE); - return err; + return mthca_cmd(dev, mailbox->dma, index, 0, CMD_WRITE_MGM, + CMD_TIME_CLASS_A, status); } -int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash, - u8 *status) +int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + u16 *hash, u8 *status) { - dma_addr_t indma; u64 imm; int err; - indma = pci_map_single(dev->pdev, gid, 16, PCI_DMA_TODEVICE); - if (pci_dma_mapping_error(indma)) - return -ENOMEM; - - err = mthca_cmd_imm(dev, indma, &imm, 0, 0, CMD_MGID_HASH, + err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH, CMD_TIME_CLASS_A, status); - *hash = imm; - pci_unmap_single(dev->pdev, indma, 16, PCI_DMA_TODEVICE); + *hash = imm; return err; } diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.h b/drivers/infiniband/hw/mthca/mthca_cmd.h index adf039b3c540..65f976a13e02 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.h +++ b/drivers/infiniband/hw/mthca/mthca_cmd.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,10 +36,9 @@ #ifndef MTHCA_CMD_H #define MTHCA_CMD_H -#include <ib_verbs.h> +#include <rdma/ib_verbs.h> -#define MTHCA_CMD_MAILBOX_ALIGN 16UL -#define MTHCA_CMD_MAILBOX_EXTRA (MTHCA_CMD_MAILBOX_ALIGN - 1) +#define MTHCA_MAILBOX_SIZE 4096 enum { /* command completed successfully: */ @@ -112,6 +112,11 @@ enum { DEV_LIM_FLAG_UD_MULTI = 1 << 21, }; +struct mthca_mailbox { + dma_addr_t dma; + void *buf; +}; + struct mthca_dev_lim { int max_srq_sz; int max_qp_sz; @@ -179,10 +184,11 @@ struct mthca_dev_lim { }; struct mthca_adapter { - u32 vendor_id; - u32 device_id; - u32 revision_id; - u8 inta_pin; + u32 vendor_id; + u32 device_id; + u32 revision_id; + char board_id[MTHCA_BOARD_ID_LEN]; + u8 inta_pin; }; struct mthca_init_hca_param { @@ -214,8 +220,7 @@ struct mthca_init_hca_param { }; struct mthca_init_ib_param { - int enable_1x; - int enable_4x; + int port_width; int vl_cap; int mtu_cap; u16 gid_cap; @@ -235,11 +240,17 @@ struct mthca_set_ib_param { u32 cap_mask; }; +int mthca_cmd_init(struct mthca_dev *dev); +void mthca_cmd_cleanup(struct mthca_dev *dev); int mthca_cmd_use_events(struct mthca_dev *dev); void mthca_cmd_use_polling(struct mthca_dev *dev); void mthca_cmd_event(struct mthca_dev *dev, u16 token, u8 status, u64 out_param); +struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev, + unsigned int gfp_mask); +void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox); + int mthca_SYS_EN(struct mthca_dev *dev, u8 *status); int mthca_SYS_DIS(struct mthca_dev *dev, u8 *status); int mthca_MAP_FA(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status); @@ -270,41 +281,44 @@ int mthca_MAP_ICM_AUX(struct mthca_dev *dev, struct mthca_icm *icm, u8 *status); int mthca_UNMAP_ICM_AUX(struct mthca_dev *dev, u8 *status); int mthca_SET_ICM_SIZE(struct mthca_dev *dev, u64 icm_size, u64 *aux_pages, u8 *status); -int mthca_SW2HW_MPT(struct mthca_dev *dev, void *mpt_entry, +int mthca_SW2HW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int mpt_index, u8 *status); -int mthca_HW2SW_MPT(struct mthca_dev *dev, void *mpt_entry, +int mthca_HW2SW_MPT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int mpt_index, u8 *status); -int mthca_WRITE_MTT(struct mthca_dev *dev, u64 *mtt_entry, +int mthca_WRITE_MTT(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int num_mtt, u8 *status); int mthca_SYNC_TPT(struct mthca_dev *dev, u8 *status); int mthca_MAP_EQ(struct mthca_dev *dev, u64 event_mask, int unmap, int eq_num, u8 *status); -int mthca_SW2HW_EQ(struct mthca_dev *dev, void *eq_context, +int mthca_SW2HW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int eq_num, u8 *status); -int mthca_HW2SW_EQ(struct mthca_dev *dev, void *eq_context, +int mthca_HW2SW_EQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int eq_num, u8 *status); -int mthca_SW2HW_CQ(struct mthca_dev *dev, void *cq_context, +int mthca_SW2HW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status); -int mthca_HW2SW_CQ(struct mthca_dev *dev, void *cq_context, +int mthca_HW2SW_CQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, int cq_num, u8 *status); +int mthca_SW2HW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status); +int mthca_HW2SW_SRQ(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + int srq_num, u8 *status); +int mthca_ARM_SRQ(struct mthca_dev *dev, int srq_num, int limit, u8 *status); int mthca_MODIFY_QP(struct mthca_dev *dev, int trans, u32 num, - int is_ee, void *qp_context, u32 optmask, + int is_ee, struct mthca_mailbox *mailbox, u32 optmask, u8 *status); int mthca_QUERY_QP(struct mthca_dev *dev, u32 num, int is_ee, - void *qp_context, u8 *status); + struct mthca_mailbox *mailbox, u8 *status); int mthca_CONF_SPECIAL_QP(struct mthca_dev *dev, int type, u32 qpn, u8 *status); int mthca_MAD_IFC(struct mthca_dev *dev, int ignore_mkey, int ignore_bkey, - int port, struct ib_wc* in_wc, struct ib_grh* in_grh, + int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad, u8 *status); -int mthca_READ_MGM(struct mthca_dev *dev, int index, void *mgm, - u8 *status); -int mthca_WRITE_MGM(struct mthca_dev *dev, int index, void *mgm, - u8 *status); -int mthca_MGID_HASH(struct mthca_dev *dev, void *gid, u16 *hash, - u8 *status); +int mthca_READ_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status); +int mthca_WRITE_MGM(struct mthca_dev *dev, int index, + struct mthca_mailbox *mailbox, u8 *status); +int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox, + u16 *hash, u8 *status); int mthca_NOP(struct mthca_dev *dev, u8 *status); -#define MAILBOX_ALIGN(x) ((void *) ALIGN((unsigned long) (x), MTHCA_CMD_MAILBOX_ALIGN)) - #endif /* MTHCA_CMD_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_config_reg.h b/drivers/infiniband/hw/mthca/mthca_config_reg.h index b4bfbbfe2c3d..afa56bfaab2e 100644 --- a/drivers/infiniband/hw/mthca/mthca_config_reg.h +++ b/drivers/infiniband/hw/mthca/mthca_config_reg.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 2bf347b84c31..8600b6c3e0c2 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -1,5 +1,9 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,7 +39,7 @@ #include <linux/init.h> #include <linux/hardirq.h> -#include <ib_pack.h> +#include <rdma/ib_pack.h> #include "mthca_dev.h" #include "mthca_cmd.h" @@ -53,21 +57,21 @@ enum { * Must be packed because start is 64 bits but only aligned to 32 bits. */ struct mthca_cq_context { - u32 flags; - u64 start; - u32 logsize_usrpage; - u32 error_eqn; /* Tavor only */ - u32 comp_eqn; - u32 pd; - u32 lkey; - u32 last_notified_index; - u32 solicit_producer_index; - u32 consumer_index; - u32 producer_index; - u32 cqn; - u32 ci_db; /* Arbel only */ - u32 state_db; /* Arbel only */ - u32 reserved; + __be32 flags; + __be64 start; + __be32 logsize_usrpage; + __be32 error_eqn; /* Tavor only */ + __be32 comp_eqn; + __be32 pd; + __be32 lkey; + __be32 last_notified_index; + __be32 solicit_producer_index; + __be32 consumer_index; + __be32 producer_index; + __be32 cqn; + __be32 ci_db; /* Arbel only */ + __be32 state_db; /* Arbel only */ + u32 reserved; } __attribute__((packed)); #define MTHCA_CQ_STATUS_OK ( 0 << 28) @@ -106,31 +110,31 @@ enum { }; struct mthca_cqe { - u32 my_qpn; - u32 my_ee; - u32 rqpn; - u16 sl_g_mlpath; - u16 rlid; - u32 imm_etype_pkey_eec; - u32 byte_cnt; - u32 wqe; - u8 opcode; - u8 is_send; - u8 reserved; - u8 owner; + __be32 my_qpn; + __be32 my_ee; + __be32 rqpn; + __be16 sl_g_mlpath; + __be16 rlid; + __be32 imm_etype_pkey_eec; + __be32 byte_cnt; + __be32 wqe; + u8 opcode; + u8 is_send; + u8 reserved; + u8 owner; }; struct mthca_err_cqe { - u32 my_qpn; - u32 reserved1[3]; - u8 syndrome; - u8 reserved2; - u16 db_cnt; - u32 reserved3; - u32 wqe; - u8 opcode; - u8 reserved4[2]; - u8 owner; + __be32 my_qpn; + u32 reserved1[3]; + u8 syndrome; + u8 reserved2; + __be16 db_cnt; + u32 reserved3; + __be32 wqe; + u8 opcode; + u8 reserved4[2]; + u8 owner; }; #define MTHCA_CQ_ENTRY_OWNER_SW (0 << 7) @@ -171,6 +175,17 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe) cqe->owner = MTHCA_CQ_ENTRY_OWNER_HW; } +static void dump_cqe(struct mthca_dev *dev, void *cqe_ptr) +{ + __be32 *cqe = cqe_ptr; + + (void) cqe; /* avoid warning if mthca_dbg compiled away... */ + mthca_dbg(dev, "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", + be32_to_cpu(cqe[0]), be32_to_cpu(cqe[1]), be32_to_cpu(cqe[2]), + be32_to_cpu(cqe[3]), be32_to_cpu(cqe[4]), be32_to_cpu(cqe[5]), + be32_to_cpu(cqe[6]), be32_to_cpu(cqe[7])); +} + /* * incr is ignored in native Arbel (mem-free) mode, so cq->cons_index * should be correct before calling update_cons_index(). @@ -178,7 +193,7 @@ static inline void set_cqe_hw(struct mthca_cqe *cqe) static inline void update_cons_index(struct mthca_dev *dev, struct mthca_cq *cq, int incr) { - u32 doorbell[2]; + __be32 doorbell[2]; if (mthca_is_memfree(dev)) { *cq->set_ci_db = cpu_to_be32(cq->cons_index); @@ -209,7 +224,8 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } -void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn) +void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, + struct mthca_srq *srq) { struct mthca_cq *cq; struct mthca_cqe *cqe; @@ -250,8 +266,11 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn) */ while (prod_index > cq->cons_index) { cqe = get_cqe(cq, (prod_index - 1) & cq->ibcq.cqe); - if (cqe->my_qpn == cpu_to_be32(qpn)) + if (cqe->my_qpn == cpu_to_be32(qpn)) { + if (srq) + mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe)); ++nfreed; + } else if (nfreed) memcpy(get_cqe(cq, (prod_index - 1 + nfreed) & cq->ibcq.cqe), @@ -278,18 +297,14 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, { int err; int dbd; - u32 new_wqe; - - if (1 && cqe->syndrome != SYNDROME_WR_FLUSH_ERR) { - int j; - - mthca_dbg(dev, "%x/%d: error CQE -> QPN %06x, WQE @ %08x\n", - cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), - be32_to_cpu(cqe->wqe)); - - for (j = 0; j < 8; ++j) - printk(KERN_DEBUG " [%2x] %08x\n", - j * 4, be32_to_cpu(((u32 *) cqe)[j])); + __be32 new_wqe; + + if (cqe->syndrome == SYNDROME_LOCAL_QP_OP_ERR) { + mthca_dbg(dev, "local QP operation err " + "(QPN %06x, WQE @ %08x, CQN %06x, index %d)\n", + be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe), + cq->cqn, cq->cons_index); + dump_cqe(dev, cqe); } /* @@ -356,6 +371,13 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, break; } + /* + * Mem-free HCAs always generate one CQE per WQE, even in the + * error case, so we don't have to check the doorbell count, etc. + */ + if (mthca_is_memfree(dev)) + return 0; + err = mthca_free_err_wqe(dev, qp, is_send, wqe_index, &dbd, &new_wqe); if (err) return err; @@ -377,15 +399,6 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq, return 0; } -static void dump_cqe(struct mthca_cqe *cqe) -{ - int j; - - for (j = 0; j < 8; ++j) - printk(KERN_DEBUG " [%2x] %08x\n", - j * 4, be32_to_cpu(((u32 *) cqe)[j])); -} - static inline int mthca_poll_one(struct mthca_dev *dev, struct mthca_cq *cq, struct mthca_qp **cur_qp, @@ -414,8 +427,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev, mthca_dbg(dev, "%x/%d: CQE -> QPN %06x, WQE @ %08x\n", cq->cqn, cq->cons_index, be32_to_cpu(cqe->my_qpn), be32_to_cpu(cqe->wqe)); - - dump_cqe(cqe); + dump_cqe(dev, cqe); } is_error = (cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) == @@ -447,23 +459,27 @@ static inline int mthca_poll_one(struct mthca_dev *dev, >> wq->wqe_shift); entry->wr_id = (*cur_qp)->wrid[wqe_index + (*cur_qp)->rq.max]; + } else if ((*cur_qp)->ibqp.srq) { + struct mthca_srq *srq = to_msrq((*cur_qp)->ibqp.srq); + u32 wqe = be32_to_cpu(cqe->wqe); + wq = NULL; + wqe_index = wqe >> srq->wqe_shift; + entry->wr_id = srq->wrid[wqe_index]; + mthca_free_srq_wqe(srq, wqe); } else { wq = &(*cur_qp)->rq; wqe_index = be32_to_cpu(cqe->wqe) >> wq->wqe_shift; entry->wr_id = (*cur_qp)->wrid[wqe_index]; } - if (wq->last_comp < wqe_index) - wq->tail += wqe_index - wq->last_comp; - else - wq->tail += wqe_index + wq->max - wq->last_comp; - - wq->last_comp = wqe_index; + if (wq) { + if (wq->last_comp < wqe_index) + wq->tail += wqe_index - wq->last_comp; + else + wq->tail += wqe_index + wq->max - wq->last_comp; - if (0) - mthca_dbg(dev, "%s completion for QP %06x, index %d (nr %d)\n", - is_send ? "Send" : "Receive", - (*cur_qp)->qpn, wqe_index, wq->max); + wq->last_comp = wqe_index; + } if (is_error) { err = handle_error_cqe(dev, cq, *cur_qp, wqe_index, is_send, @@ -581,13 +597,13 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32((notify == IB_CQ_SOLICITED ? MTHCA_TAVOR_CQ_DB_REQ_NOT_SOL : MTHCA_TAVOR_CQ_DB_REQ_NOT) | to_mcq(cq)->cqn); - doorbell[1] = 0xffffffff; + doorbell[1] = (__force __be32) 0xffffffff; mthca_write64(doorbell, to_mdev(cq->device)->kar + MTHCA_CQ_DOORBELL, @@ -599,9 +615,9 @@ int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify) int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) { struct mthca_cq *cq = to_mcq(ibcq); - u32 doorbell[2]; + __be32 doorbell[2]; u32 sn; - u32 ci; + __be32 ci; sn = cq->arm_sn & 3; ci = cpu_to_be32(cq->cons_index); @@ -634,119 +650,16 @@ int mthca_arbel_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify notify) static void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq *cq) { - int i; - int size; - - if (cq->is_direct) - pci_free_consistent(dev->pdev, - (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, - cq->queue.direct.buf, - pci_unmap_addr(&cq->queue.direct, - mapping)); - else { - size = (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE; - for (i = 0; i < (size + PAGE_SIZE - 1) / PAGE_SIZE; ++i) - if (cq->queue.page_list[i].buf) - pci_free_consistent(dev->pdev, PAGE_SIZE, - cq->queue.page_list[i].buf, - pci_unmap_addr(&cq->queue.page_list[i], - mapping)); - - kfree(cq->queue.page_list); - } -} - -static int mthca_alloc_cq_buf(struct mthca_dev *dev, int size, - struct mthca_cq *cq) -{ - int err = -ENOMEM; - int npages, shift; - u64 *dma_list = NULL; - dma_addr_t t; - int i; - - if (size <= MTHCA_MAX_DIRECT_CQ_SIZE) { - cq->is_direct = 1; - npages = 1; - shift = get_order(size) + PAGE_SHIFT; - - cq->queue.direct.buf = pci_alloc_consistent(dev->pdev, - size, &t); - if (!cq->queue.direct.buf) - return -ENOMEM; - - pci_unmap_addr_set(&cq->queue.direct, mapping, t); - - memset(cq->queue.direct.buf, 0, size); - - while (t & ((1 << shift) - 1)) { - --shift; - npages *= 2; - } - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - goto err_free; - - for (i = 0; i < npages; ++i) - dma_list[i] = t + i * (1 << shift); - } else { - cq->is_direct = 0; - npages = (size + PAGE_SIZE - 1) / PAGE_SIZE; - shift = PAGE_SHIFT; - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - return -ENOMEM; - - cq->queue.page_list = kmalloc(npages * sizeof *cq->queue.page_list, - GFP_KERNEL); - if (!cq->queue.page_list) - goto err_out; - - for (i = 0; i < npages; ++i) - cq->queue.page_list[i].buf = NULL; - - for (i = 0; i < npages; ++i) { - cq->queue.page_list[i].buf = - pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t); - if (!cq->queue.page_list[i].buf) - goto err_free; - - dma_list[i] = t; - pci_unmap_addr_set(&cq->queue.page_list[i], mapping, t); - - memset(cq->queue.page_list[i].buf, 0, PAGE_SIZE); - } - } - - err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, - dma_list, shift, npages, - 0, size, - MTHCA_MPT_FLAG_LOCAL_WRITE | - MTHCA_MPT_FLAG_LOCAL_READ, - &cq->mr); - if (err) - goto err_free; - - kfree(dma_list); - - return 0; - -err_free: - mthca_free_cq_buf(dev, cq); - -err_out: - kfree(dma_list); - - return err; + mthca_buf_free(dev, (cq->ibcq.cqe + 1) * MTHCA_CQ_ENTRY_SIZE, + &cq->queue, cq->is_direct, &cq->mr); } int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, struct mthca_cq *cq) { int size = nent * MTHCA_CQ_ENTRY_SIZE; - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_cq_context *cq_context; int err = -ENOMEM; u8 status; @@ -754,45 +667,51 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, might_sleep(); - cq->ibcq.cqe = nent - 1; + cq->ibcq.cqe = nent - 1; + cq->is_kernel = !ctx; cq->cqn = mthca_alloc(&dev->cq_table.alloc); if (cq->cqn == -1) return -ENOMEM; if (mthca_is_memfree(dev)) { - cq->arm_sn = 1; - err = mthca_table_get(dev, dev->cq_table.table, cq->cqn); if (err) goto err_out; - err = -ENOMEM; + if (cq->is_kernel) { + cq->arm_sn = 1; - cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, - cq->cqn, &cq->set_ci_db); - if (cq->set_ci_db_index < 0) - goto err_out_icm; + err = -ENOMEM; - cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, - cq->cqn, &cq->arm_db); - if (cq->arm_db_index < 0) - goto err_out_ci; + cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, + cq->cqn, &cq->set_ci_db); + if (cq->set_ci_db_index < 0) + goto err_out_icm; + + cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, + cq->cqn, &cq->arm_db); + if (cq->arm_db_index < 0) + goto err_out_ci; + } } - mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) - goto err_out_mailbox; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + goto err_out_arm; - cq_context = MAILBOX_ALIGN(mailbox); + cq_context = mailbox->buf; - err = mthca_alloc_cq_buf(dev, size, cq); - if (err) - goto err_out_mailbox; + if (cq->is_kernel) { + err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_CQ_SIZE, + &cq->queue, &cq->is_direct, + &dev->driver_pd, 1, &cq->mr); + if (err) + goto err_out_mailbox; - for (i = 0; i < nent; ++i) - set_cqe_hw(get_cqe(cq, i)); + for (i = 0; i < nent; ++i) + set_cqe_hw(get_cqe(cq, i)); + } spin_lock_init(&cq->lock); atomic_set(&cq->refcount, 1); @@ -802,12 +721,14 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq_context->flags = cpu_to_be32(MTHCA_CQ_STATUS_OK | MTHCA_CQ_STATE_DISARMED | MTHCA_CQ_FLAG_TR); - cq_context->start = cpu_to_be64(0); - cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 | - dev->driver_uar.index); + cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24); + if (ctx) + cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index); + else + cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index); cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); - cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num); + cq_context->pd = cpu_to_be32(pdn); cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey); cq_context->cqn = cpu_to_be32(cq->cqn); @@ -816,7 +737,7 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq_context->state_db = cpu_to_be32(cq->arm_db_index); } - err = mthca_SW2HW_CQ(dev, cq_context, cq->cqn, &status); + err = mthca_SW2HW_CQ(dev, mailbox, cq->cqn, &status); if (err) { mthca_warn(dev, "SW2HW_CQ failed (%d)\n", err); goto err_out_free_mr; @@ -840,22 +761,23 @@ int mthca_init_cq(struct mthca_dev *dev, int nent, cq->cons_index = 0; - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return 0; err_out_free_mr: - mthca_free_mr(dev, &cq->mr); - mthca_free_cq_buf(dev, cq); + if (cq->is_kernel) + mthca_free_cq_buf(dev, cq); err_out_mailbox: - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); - if (mthca_is_memfree(dev)) +err_out_arm: + if (cq->is_kernel && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); err_out_ci: - if (mthca_is_memfree(dev)) + if (cq->is_kernel && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); err_out_icm: @@ -870,32 +792,31 @@ err_out: void mthca_free_cq(struct mthca_dev *dev, struct mthca_cq *cq) { - void *mailbox; + struct mthca_mailbox *mailbox; int err; u8 status; might_sleep(); - mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) { + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { mthca_warn(dev, "No memory for mailbox to free CQ.\n"); return; } - err = mthca_HW2SW_CQ(dev, MAILBOX_ALIGN(mailbox), cq->cqn, &status); + err = mthca_HW2SW_CQ(dev, mailbox, cq->cqn, &status); if (err) mthca_warn(dev, "HW2SW_CQ failed (%d)\n", err); else if (status) - mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", - status); + mthca_warn(dev, "HW2SW_CQ returned status 0x%02x\n", status); if (0) { - u32 *ctx = MAILBOX_ALIGN(mailbox); + __be32 *ctx = mailbox->buf; int j; printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n", - cq->cqn, cq->cons_index, !!next_cqe_sw(cq)); + cq->cqn, cq->cons_index, + cq->is_kernel ? !!next_cqe_sw(cq) : 0); for (j = 0; j < 16; ++j) printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j])); } @@ -913,17 +834,17 @@ void mthca_free_cq(struct mthca_dev *dev, atomic_dec(&cq->refcount); wait_event(cq->wait, !atomic_read(&cq->refcount)); - mthca_free_mr(dev, &cq->mr); - mthca_free_cq_buf(dev, cq); - - if (mthca_is_memfree(dev)) { - mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); - mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); - mthca_table_put(dev, dev->cq_table.table, cq->cqn); + if (cq->is_kernel) { + mthca_free_cq_buf(dev, cq); + if (mthca_is_memfree(dev)) { + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); + } } + mthca_table_put(dev, dev->cq_table.table, cq->cqn); mthca_free(&dev->cq_table.alloc, cq->cqn); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); } int __devinit mthca_init_cq_table(struct mthca_dev *dev) diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index e3d79e267dc9..7bff5a8425f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -1,5 +1,9 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -46,8 +50,8 @@ #define DRV_NAME "ib_mthca" #define PFX DRV_NAME ": " -#define DRV_VERSION "0.06-pre" -#define DRV_RELDATE "November 8, 2004" +#define DRV_VERSION "0.06" +#define DRV_RELDATE "June 23, 2005" enum { MTHCA_FLAG_DDR_HIDDEN = 1 << 1, @@ -65,6 +69,10 @@ enum { }; enum { + MTHCA_BOARD_ID_LEN = 64 +}; + +enum { MTHCA_EQ_CONTEXT_SIZE = 0x40, MTHCA_CQ_CONTEXT_SIZE = 0x40, MTHCA_QP_CONTEXT_SIZE = 0x200, @@ -98,6 +106,7 @@ enum { }; struct mthca_cmd { + struct pci_pool *pool; int use_events; struct semaphore hcr_sem; struct semaphore poll_sem; @@ -139,6 +148,7 @@ struct mthca_limits { int reserved_mcgs; int num_pds; int reserved_pds; + u8 port_width_cap; }; struct mthca_alloc { @@ -208,6 +218,13 @@ struct mthca_cq_table { struct mthca_icm_table *table; }; +struct mthca_srq_table { + struct mthca_alloc alloc; + spinlock_t lock; + struct mthca_array srq; + struct mthca_icm_table *table; +}; + struct mthca_qp_table { struct mthca_alloc alloc; u32 rdb_base; @@ -243,6 +260,7 @@ struct mthca_dev { unsigned long device_cap_flags; u32 rev_id; + char board_id[MTHCA_BOARD_ID_LEN]; /* firmware info */ u64 fw_ver; @@ -288,6 +306,7 @@ struct mthca_dev { struct mthca_mr_table mr_table; struct mthca_eq_table eq_table; struct mthca_cq_table cq_table; + struct mthca_srq_table srq_table; struct mthca_qp_table qp_table; struct mthca_av_table av_table; struct mthca_mcg_table mcg_table; @@ -328,14 +347,13 @@ extern void __buggy_use_of_MTHCA_PUT(void); #define MTHCA_PUT(dest, source, offset) \ do { \ - __typeof__(source) *__p = \ - (__typeof__(source) *) ((char *) (dest) + (offset)); \ + void *__d = ((char *) (dest) + (offset)); \ switch (sizeof(source)) { \ - case 1: *__p = (source); break; \ - case 2: *__p = cpu_to_be16(source); break; \ - case 4: *__p = cpu_to_be32(source); break; \ - case 8: *__p = cpu_to_be64(source); break; \ - default: __buggy_use_of_MTHCA_PUT(); \ + case 1: *(u8 *) __d = (source); break; \ + case 2: *(__be16 *) __d = cpu_to_be16(source); break; \ + case 4: *(__be32 *) __d = cpu_to_be32(source); break; \ + case 8: *(__be64 *) __d = cpu_to_be64(source); break; \ + default: __buggy_use_of_MTHCA_PUT(); \ } \ } while (0) @@ -351,12 +369,18 @@ int mthca_array_set(struct mthca_array *array, int index, void *value); void mthca_array_clear(struct mthca_array *array, int index); int mthca_array_init(struct mthca_array *array, int nent); void mthca_array_cleanup(struct mthca_array *array, int nent); +int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct, + union mthca_buf *buf, int *is_direct, struct mthca_pd *pd, + int hca_write, struct mthca_mr *mr); +void mthca_buf_free(struct mthca_dev *dev, int size, union mthca_buf *buf, + int is_direct, struct mthca_mr *mr); int mthca_init_uar_table(struct mthca_dev *dev); int mthca_init_pd_table(struct mthca_dev *dev); int mthca_init_mr_table(struct mthca_dev *dev); int mthca_init_eq_table(struct mthca_dev *dev); int mthca_init_cq_table(struct mthca_dev *dev); +int mthca_init_srq_table(struct mthca_dev *dev); int mthca_init_qp_table(struct mthca_dev *dev); int mthca_init_av_table(struct mthca_dev *dev); int mthca_init_mcg_table(struct mthca_dev *dev); @@ -366,6 +390,7 @@ void mthca_cleanup_pd_table(struct mthca_dev *dev); void mthca_cleanup_mr_table(struct mthca_dev *dev); void mthca_cleanup_eq_table(struct mthca_dev *dev); void mthca_cleanup_cq_table(struct mthca_dev *dev); +void mthca_cleanup_srq_table(struct mthca_dev *dev); void mthca_cleanup_qp_table(struct mthca_dev *dev); void mthca_cleanup_av_table(struct mthca_dev *dev); void mthca_cleanup_mcg_table(struct mthca_dev *dev); @@ -376,9 +401,15 @@ void mthca_unregister_device(struct mthca_dev *dev); int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); -int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd); +int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd); void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd); +struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size); +void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt); +int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len); +int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, + u64 iova, u64 total_size, u32 access, struct mthca_mr *mr); int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, u32 access, struct mthca_mr *mr); int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, @@ -405,11 +436,24 @@ int mthca_poll_cq(struct ib_cq *ibcq, int num_entries, int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, struct mthca_cq *cq); void mthca_free_cq(struct mthca_dev *dev, struct mthca_cq *cq); void mthca_cq_event(struct mthca_dev *dev, u32 cqn); -void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn); +void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn, + struct mthca_srq *srq); + +int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, + struct ib_srq_attr *attr, struct mthca_srq *srq); +void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); +void mthca_srq_event(struct mthca_dev *dev, u32 srqn, + enum ib_event_type event_type); +void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr); +int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); void mthca_qp_event(struct mthca_dev *dev, u32 qpn, enum ib_event_type event_type); @@ -423,19 +467,21 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr); int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, - int index, int *dbd, u32 *new_wqe); + int index, int *dbd, __be32 *new_wqe); int mthca_alloc_qp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_qp_type type, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, struct mthca_qp *qp); int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, int qpn, int port, struct mthca_sqp *sqp); diff --git a/drivers/infiniband/hw/mthca/mthca_doorbell.h b/drivers/infiniband/hw/mthca/mthca_doorbell.h index 821039a49049..dd9a44d170c9 100644 --- a/drivers/infiniband/hw/mthca/mthca_doorbell.h +++ b/drivers/infiniband/hw/mthca/mthca_doorbell.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -56,13 +58,13 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest) __raw_writeq((__force u64) val, dest); } -static inline void mthca_write64(u32 val[2], void __iomem *dest, +static inline void mthca_write64(__be32 val[2], void __iomem *dest, spinlock_t *doorbell_lock) { __raw_writeq(*(u64 *) val, dest); } -static inline void mthca_write_db_rec(u32 val[2], u32 *db) +static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) { *(u64 *) db = *(u64 *) val; } @@ -85,18 +87,18 @@ static inline void mthca_write64_raw(__be64 val, void __iomem *dest) __raw_writel(((__force u32 *) &val)[1], dest + 4); } -static inline void mthca_write64(u32 val[2], void __iomem *dest, +static inline void mthca_write64(__be32 val[2], void __iomem *dest, spinlock_t *doorbell_lock) { unsigned long flags; spin_lock_irqsave(doorbell_lock, flags); - __raw_writel(val[0], dest); - __raw_writel(val[1], dest + 4); + __raw_writel((__force u32) val[0], dest); + __raw_writel((__force u32) val[1], dest + 4); spin_unlock_irqrestore(doorbell_lock, flags); } -static inline void mthca_write_db_rec(u32 val[2], u32 *db) +static inline void mthca_write_db_rec(__be32 val[2], __be32 *db) { db[0] = val[0]; wmb(); diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index f46d615d396f..18f0981eb0c1 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -51,18 +52,18 @@ enum { * Must be packed because start is 64 bits but only aligned to 32 bits. */ struct mthca_eq_context { - u32 flags; - u64 start; - u32 logsize_usrpage; - u32 tavor_pd; /* reserved for Arbel */ - u8 reserved1[3]; - u8 intr; - u32 arbel_pd; /* lost_count for Tavor */ - u32 lkey; - u32 reserved2[2]; - u32 consumer_index; - u32 producer_index; - u32 reserved3[4]; + __be32 flags; + __be64 start; + __be32 logsize_usrpage; + __be32 tavor_pd; /* reserved for Arbel */ + u8 reserved1[3]; + u8 intr; + __be32 arbel_pd; /* lost_count for Tavor */ + __be32 lkey; + u32 reserved2[2]; + __be32 consumer_index; + __be32 producer_index; + u32 reserved3[4]; } __attribute__((packed)); #define MTHCA_EQ_STATUS_OK ( 0 << 28) @@ -127,28 +128,28 @@ struct mthca_eqe { union { u32 raw[6]; struct { - u32 cqn; + __be32 cqn; } __attribute__((packed)) comp; struct { - u16 reserved1; - u16 token; - u32 reserved2; - u8 reserved3[3]; - u8 status; - u64 out_param; + u16 reserved1; + __be16 token; + u32 reserved2; + u8 reserved3[3]; + u8 status; + __be64 out_param; } __attribute__((packed)) cmd; struct { - u32 qpn; + __be32 qpn; } __attribute__((packed)) qp; struct { - u32 cqn; - u32 reserved1; - u8 reserved2[3]; - u8 syndrome; + __be32 cqn; + u32 reserved1; + u8 reserved2[3]; + u8 syndrome; } __attribute__((packed)) cq_err; struct { - u32 reserved1[2]; - u32 port; + u32 reserved1[2]; + __be32 port; } __attribute__((packed)) port_change; } event; u8 reserved3[3]; @@ -167,7 +168,7 @@ static inline u64 async_mask(struct mthca_dev *dev) static inline void tavor_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_SET_CI | eq->eqn); doorbell[1] = cpu_to_be32(ci & (eq->nent - 1)); @@ -190,8 +191,8 @@ static inline void arbel_set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u { /* See comment in tavor_set_eq_ci() above. */ wmb(); - __raw_writel(cpu_to_be32(ci), dev->eq_regs.arbel.eq_set_ci_base + - eq->eqn * 8); + __raw_writel((__force u32) cpu_to_be32(ci), + dev->eq_regs.arbel.eq_set_ci_base + eq->eqn * 8); /* We still want ordering, just not swabbing, so add a barrier */ mb(); } @@ -206,7 +207,7 @@ static inline void set_eq_ci(struct mthca_dev *dev, struct mthca_eq *eq, u32 ci) static inline void tavor_eq_req_not(struct mthca_dev *dev, int eqn) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_REQ_NOT | eqn); doorbell[1] = 0; @@ -224,7 +225,7 @@ static inline void arbel_eq_req_not(struct mthca_dev *dev, u32 eqn_mask) static inline void disarm_cq(struct mthca_dev *dev, int eqn, int cqn) { if (!mthca_is_memfree(dev)) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32(MTHCA_EQ_DB_DISARM_CQ | eqn); doorbell[1] = cpu_to_be32(cqn); @@ -469,7 +470,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, PAGE_SIZE; u64 *dma_list = NULL; dma_addr_t t; - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_eq_context *eq_context; int err = -ENOMEM; int i; @@ -494,17 +495,16 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, if (!dma_list) goto err_out_free; - mailbox = kmalloc(sizeof *eq_context + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) goto err_out_free; - eq_context = MAILBOX_ALIGN(mailbox); + eq_context = mailbox->buf; for (i = 0; i < npages; ++i) { - eq->page_list[i].buf = pci_alloc_consistent(dev->pdev, - PAGE_SIZE, &t); + eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, + PAGE_SIZE, &t, GFP_KERNEL); if (!eq->page_list[i].buf) - goto err_out_free; + goto err_out_free_pages; dma_list[i] = t; pci_unmap_addr_set(&eq->page_list[i], mapping, t); @@ -517,7 +517,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, eq->eqn = mthca_alloc(&dev->eq_table.alloc); if (eq->eqn == -1) - goto err_out_free; + goto err_out_free_pages; err = mthca_mr_alloc_phys(dev, dev->driver_pd.pd_num, dma_list, PAGE_SHIFT, npages, @@ -548,7 +548,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, eq_context->intr = intr; eq_context->lkey = cpu_to_be32(eq->mr.ibmr.lkey); - err = mthca_SW2HW_EQ(dev, eq_context, eq->eqn, &status); + err = mthca_SW2HW_EQ(dev, mailbox, eq->eqn, &status); if (err) { mthca_warn(dev, "SW2HW_EQ failed (%d)\n", err); goto err_out_free_mr; @@ -561,7 +561,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, } kfree(dma_list); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); eq->eqn_mask = swab32(1 << eq->eqn); eq->cons_index = 0; @@ -579,17 +579,19 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, err_out_free_eq: mthca_free(&dev->eq_table.alloc, eq->eqn); - err_out_free: + err_out_free_pages: for (i = 0; i < npages; ++i) if (eq->page_list[i].buf) - pci_free_consistent(dev->pdev, PAGE_SIZE, - eq->page_list[i].buf, - pci_unmap_addr(&eq->page_list[i], - mapping)); + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + eq->page_list[i].buf, + pci_unmap_addr(&eq->page_list[i], + mapping)); + + mthca_free_mailbox(dev, mailbox); + err_out_free: kfree(eq->page_list); kfree(dma_list); - kfree(mailbox); err_out: return err; @@ -598,25 +600,22 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, static void mthca_free_eq(struct mthca_dev *dev, struct mthca_eq *eq) { - void *mailbox = NULL; + struct mthca_mailbox *mailbox; int err; u8 status; int npages = (eq->nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) / PAGE_SIZE; int i; - mailbox = kmalloc(sizeof (struct mthca_eq_context) + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) return; - err = mthca_HW2SW_EQ(dev, MAILBOX_ALIGN(mailbox), - eq->eqn, &status); + err = mthca_HW2SW_EQ(dev, mailbox, eq->eqn, &status); if (err) mthca_warn(dev, "HW2SW_EQ failed (%d)\n", err); if (status) - mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", - status); + mthca_warn(dev, "HW2SW_EQ returned status 0x%02x\n", status); dev->eq_table.arm_mask &= ~eq->eqn_mask; @@ -625,7 +624,7 @@ static void mthca_free_eq(struct mthca_dev *dev, for (i = 0; i < sizeof (struct mthca_eq_context) / 4; ++i) { if (i % 4 == 0) printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpup(MAILBOX_ALIGN(mailbox) + i * 4)); + printk(" %08x", be32_to_cpup(mailbox->buf + i * 4)); if ((i + 1) % 4 == 0) printk("\n"); } @@ -638,7 +637,7 @@ static void mthca_free_eq(struct mthca_dev *dev, pci_unmap_addr(&eq->page_list[i], mapping)); kfree(eq->page_list); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); } static void mthca_free_irqs(struct mthca_dev *dev) @@ -709,8 +708,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) if (mthca_map_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & dev->fw.arbel.eq_arm_base) + 4, 4, &dev->eq_regs.arbel.eq_arm)) { - mthca_err(dev, "Couldn't map interrupt clear register, " - "aborting.\n"); + mthca_err(dev, "Couldn't map EQ arm register, aborting.\n"); mthca_unmap_reg(dev, (pci_resource_len(dev->pdev, 0) - 1) & dev->fw.arbel.clr_int_base, MTHCA_CLR_INT_SIZE, dev->clr_base); @@ -721,8 +719,7 @@ static int __devinit mthca_map_eq_regs(struct mthca_dev *dev) dev->fw.arbel.eq_set_ci_base, MTHCA_EQ_SET_CI_SIZE, &dev->eq_regs.arbel.eq_set_ci_base)) { - mthca_err(dev, "Couldn't map interrupt clear register, " - "aborting.\n"); + mthca_err(dev, "Couldn't map EQ CI register, aborting.\n"); mthca_unmap_reg(dev, ((pci_resource_len(dev->pdev, 0) - 1) & dev->fw.arbel.eq_arm_base) + 4, 4, dev->eq_regs.arbel.eq_arm); diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7df223642015..9804174f7f3c 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,9 +34,9 @@ * $Id: mthca_mad.c 1349 2004-12-16 21:09:43Z roland $ */ -#include <ib_verbs.h> -#include <ib_mad.h> -#include <ib_smi.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_mad.h> +#include <rdma/ib_smi.h> #include "mthca_dev.h" #include "mthca_cmd.h" @@ -192,7 +194,7 @@ int mthca_process_mad(struct ib_device *ibdev, { int err; u8 status; - u16 slid = in_wc ? in_wc->slid : IB_LID_PERMISSIVE; + u16 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); /* Forward locally generated traps to the SM */ if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index d40590356df8..3241d6c9dc11 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -33,7 +35,6 @@ */ #include <linux/config.h> -#include <linux/version.h> #include <linux/module.h> #include <linux/init.h> #include <linux/errno.h> @@ -69,7 +70,7 @@ MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero"); #endif /* CONFIG_PCI_MSI */ static const char mthca_version[] __devinitdata = - "ib_mthca: Mellanox InfiniBand HCA driver v" + DRV_NAME ": Mellanox InfiniBand HCA driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; static struct mthca_profile default_profile = { @@ -170,6 +171,7 @@ static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim mdev->limits.reserved_mrws = dev_lim->reserved_mrws; mdev->limits.reserved_uars = dev_lim->reserved_uars; mdev->limits.reserved_pds = dev_lim->reserved_pds; + mdev->limits.port_width_cap = dev_lim->max_port_width; /* IB_DEVICE_RESIZE_MAX_WR not supported by driver. May be doable since hardware supports it for SRQ. @@ -211,7 +213,6 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev) struct mthca_dev_lim dev_lim; struct mthca_profile profile; struct mthca_init_hca_param init_hca; - struct mthca_adapter adapter; err = mthca_SYS_EN(mdev, &status); if (err) { @@ -252,6 +253,8 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev) profile = default_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; profile.uarc_size = 0; + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + profile.num_srq = dev_lim.max_srqs; err = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca); if (err < 0) @@ -269,26 +272,8 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev) goto err_disable; } - err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); - if (err) { - mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); - goto err_close; - } - if (status) { - mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " - "aborting.\n", status); - err = -EINVAL; - goto err_close; - } - - mdev->eq_table.inta_pin = adapter.inta_pin; - mdev->rev_id = adapter.revision_id; - return 0; -err_close: - mthca_CLOSE_HCA(mdev, 0, &status); - err_disable: mthca_SYS_DIS(mdev, &status); @@ -441,15 +426,29 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev, } mdev->cq_table.table = mthca_alloc_icm_table(mdev, init_hca->cqc_base, - dev_lim->cqc_entry_sz, - mdev->limits.num_cqs, - mdev->limits.reserved_cqs, 0); + dev_lim->cqc_entry_sz, + mdev->limits.num_cqs, + mdev->limits.reserved_cqs, 0); if (!mdev->cq_table.table) { mthca_err(mdev, "Failed to map CQ context memory, aborting.\n"); err = -ENOMEM; goto err_unmap_rdb; } + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) { + mdev->srq_table.table = + mthca_alloc_icm_table(mdev, init_hca->srqc_base, + dev_lim->srq_entry_sz, + mdev->limits.num_srqs, + mdev->limits.reserved_srqs, 0); + if (!mdev->srq_table.table) { + mthca_err(mdev, "Failed to map SRQ context memory, " + "aborting.\n"); + err = -ENOMEM; + goto err_unmap_cq; + } + } + /* * It's not strictly required, but for simplicity just map the * whole multicast group table now. The table isn't very big @@ -465,11 +464,15 @@ static int __devinit mthca_init_icm(struct mthca_dev *mdev, if (!mdev->mcg_table.table) { mthca_err(mdev, "Failed to map MCG context memory, aborting.\n"); err = -ENOMEM; - goto err_unmap_cq; + goto err_unmap_srq; } return 0; +err_unmap_srq: + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); + err_unmap_cq: mthca_free_icm_table(mdev, mdev->cq_table.table); @@ -505,7 +508,6 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev) struct mthca_dev_lim dev_lim; struct mthca_profile profile; struct mthca_init_hca_param init_hca; - struct mthca_adapter adapter; u64 icm_size; u8 status; int err; @@ -550,6 +552,8 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev) profile = default_profile; profile.num_uar = dev_lim.uar_size / PAGE_SIZE; profile.num_udav = 0; + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + profile.num_srq = dev_lim.max_srqs; icm_size = mthca_make_profile(mdev, &profile, &dev_lim, &init_hca); if ((int) icm_size < 0) { @@ -573,24 +577,11 @@ static int __devinit mthca_init_arbel(struct mthca_dev *mdev) goto err_free_icm; } - err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); - if (err) { - mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); - goto err_free_icm; - } - if (status) { - mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " - "aborting.\n", status); - err = -EINVAL; - goto err_free_icm; - } - - mdev->eq_table.inta_pin = adapter.inta_pin; - mdev->rev_id = adapter.revision_id; - return 0; err_free_icm: + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); mthca_free_icm_table(mdev, mdev->cq_table.table); mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); @@ -613,12 +604,70 @@ err_disable: return err; } +static void mthca_close_hca(struct mthca_dev *mdev) +{ + u8 status; + + mthca_CLOSE_HCA(mdev, 0, &status); + + if (mthca_is_memfree(mdev)) { + if (mdev->mthca_flags & MTHCA_FLAG_SRQ) + mthca_free_icm_table(mdev, mdev->srq_table.table); + mthca_free_icm_table(mdev, mdev->cq_table.table); + mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); + mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); + mthca_free_icm_table(mdev, mdev->qp_table.qp_table); + mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); + mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); + mthca_unmap_eq_icm(mdev); + + mthca_UNMAP_ICM_AUX(mdev, &status); + mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); + + mthca_UNMAP_FA(mdev, &status); + mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); + + if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) + mthca_DISABLE_LAM(mdev, &status); + } else + mthca_SYS_DIS(mdev, &status); +} + static int __devinit mthca_init_hca(struct mthca_dev *mdev) { + u8 status; + int err; + struct mthca_adapter adapter; + if (mthca_is_memfree(mdev)) - return mthca_init_arbel(mdev); + err = mthca_init_arbel(mdev); else - return mthca_init_tavor(mdev); + err = mthca_init_tavor(mdev); + + if (err) + return err; + + err = mthca_QUERY_ADAPTER(mdev, &adapter, &status); + if (err) { + mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n"); + goto err_close; + } + if (status) { + mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, " + "aborting.\n", status); + err = -EINVAL; + goto err_close; + } + + mdev->eq_table.inta_pin = adapter.inta_pin; + mdev->rev_id = adapter.revision_id; + memcpy(mdev->board_id, adapter.board_id, sizeof mdev->board_id); + + return 0; + +err_close: + mthca_close_hca(mdev); + return err; } static int __devinit mthca_setup_hca(struct mthca_dev *dev) @@ -664,7 +713,7 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev) goto err_pd_table_free; } - err = mthca_pd_alloc(dev, &dev->driver_pd); + err = mthca_pd_alloc(dev, 1, &dev->driver_pd); if (err) { mthca_err(dev, "Failed to create driver PD, " "aborting.\n"); @@ -708,11 +757,18 @@ static int __devinit mthca_setup_hca(struct mthca_dev *dev) goto err_cmd_poll; } + err = mthca_init_srq_table(dev); + if (err) { + mthca_err(dev, "Failed to initialize " + "shared receive queue table, aborting.\n"); + goto err_cq_table_free; + } + err = mthca_init_qp_table(dev); if (err) { mthca_err(dev, "Failed to initialize " "queue pair table, aborting.\n"); - goto err_cq_table_free; + goto err_srq_table_free; } err = mthca_init_av_table(dev); @@ -737,6 +793,9 @@ err_av_table_free: err_qp_table_free: mthca_cleanup_qp_table(dev); +err_srq_table_free: + mthca_cleanup_srq_table(dev); + err_cq_table_free: mthca_cleanup_cq_table(dev); @@ -843,33 +902,6 @@ static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev) return 0; } -static void mthca_close_hca(struct mthca_dev *mdev) -{ - u8 status; - - mthca_CLOSE_HCA(mdev, 0, &status); - - if (mthca_is_memfree(mdev)) { - mthca_free_icm_table(mdev, mdev->cq_table.table); - mthca_free_icm_table(mdev, mdev->qp_table.rdb_table); - mthca_free_icm_table(mdev, mdev->qp_table.eqp_table); - mthca_free_icm_table(mdev, mdev->qp_table.qp_table); - mthca_free_icm_table(mdev, mdev->mr_table.mpt_table); - mthca_free_icm_table(mdev, mdev->mr_table.mtt_table); - mthca_unmap_eq_icm(mdev); - - mthca_UNMAP_ICM_AUX(mdev, &status); - mthca_free_icm(mdev, mdev->fw.arbel.aux_icm); - - mthca_UNMAP_FA(mdev, &status); - mthca_free_icm(mdev, mdev->fw.arbel.fw_icm); - - if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM)) - mthca_DISABLE_LAM(mdev, &status); - } else - mthca_SYS_DIS(mdev, &status); -} - /* Types of supported HCA */ enum { TAVOR, /* MT23108 */ @@ -886,9 +918,9 @@ static struct { int is_memfree; int is_pcie; } mthca_hca_table[] = { - [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 2), .is_memfree = 0, .is_pcie = 0 }, - [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 6, 2), .is_memfree = 0, .is_pcie = 1 }, - [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 0, 1), .is_memfree = 1, .is_pcie = 1 }, + [TAVOR] = { .latest_fw = MTHCA_FW_VER(3, 3, 3), .is_memfree = 0, .is_pcie = 0 }, + [ARBEL_COMPAT] = { .latest_fw = MTHCA_FW_VER(4, 7, 0), .is_memfree = 0, .is_pcie = 1 }, + [ARBEL_NATIVE] = { .latest_fw = MTHCA_FW_VER(5, 1, 0), .is_memfree = 1, .is_pcie = 1 }, [SINAI] = { .latest_fw = MTHCA_FW_VER(1, 0, 1), .is_memfree = 1, .is_pcie = 1 } }; @@ -927,13 +959,13 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, */ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) || pci_resource_len(pdev, 0) != 1 << 20) { - dev_err(&pdev->dev, "Missing DCS, aborting."); + dev_err(&pdev->dev, "Missing DCS, aborting.\n"); err = -ENODEV; goto err_disable_pdev; } if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) || pci_resource_len(pdev, 2) != 1 << 23) { - dev_err(&pdev->dev, "Missing UAR, aborting."); + dev_err(&pdev->dev, "Missing UAR, aborting.\n"); err = -ENODEV; goto err_disable_pdev; } @@ -1004,25 +1036,18 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, !pci_enable_msi(pdev)) mdev->mthca_flags |= MTHCA_FLAG_MSI; - sema_init(&mdev->cmd.hcr_sem, 1); - sema_init(&mdev->cmd.poll_sem, 1); - mdev->cmd.use_events = 0; - - mdev->hcr = ioremap(pci_resource_start(pdev, 0) + MTHCA_HCR_BASE, MTHCA_HCR_SIZE); - if (!mdev->hcr) { - mthca_err(mdev, "Couldn't map command register, " - "aborting.\n"); - err = -ENOMEM; + if (mthca_cmd_init(mdev)) { + mthca_err(mdev, "Failed to init command interface, aborting.\n"); goto err_free_dev; } err = mthca_tune_pci(mdev); if (err) - goto err_iounmap; + goto err_cmd; err = mthca_init_hca(mdev); if (err) - goto err_iounmap; + goto err_cmd; if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) { mthca_warn(mdev, "HCA FW version %x.%x.%x is old (%x.%x.%x is current).\n", @@ -1057,6 +1082,7 @@ err_cleanup: mthca_cleanup_mcg_table(mdev); mthca_cleanup_av_table(mdev); mthca_cleanup_qp_table(mdev); + mthca_cleanup_srq_table(mdev); mthca_cleanup_cq_table(mdev); mthca_cmd_use_polling(mdev); mthca_cleanup_eq_table(mdev); @@ -1070,8 +1096,8 @@ err_cleanup: err_close: mthca_close_hca(mdev); -err_iounmap: - iounmap(mdev->hcr); +err_cmd: + mthca_cmd_cleanup(mdev); err_free_dev: if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) @@ -1106,6 +1132,7 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev) mthca_cleanup_mcg_table(mdev); mthca_cleanup_av_table(mdev); mthca_cleanup_qp_table(mdev); + mthca_cleanup_srq_table(mdev); mthca_cleanup_cq_table(mdev); mthca_cmd_use_polling(mdev); mthca_cleanup_eq_table(mdev); @@ -1118,10 +1145,8 @@ static void __devexit mthca_remove_one(struct pci_dev *pdev) iounmap(mdev->kar); mthca_uar_free(mdev, &mdev->driver_uar); mthca_cleanup_uar_table(mdev); - mthca_close_hca(mdev); - - iounmap(mdev->hcr); + mthca_cmd_cleanup(mdev); if (mdev->mthca_flags & MTHCA_FLAG_MSI_X) pci_disable_msix(pdev); @@ -1163,7 +1188,7 @@ static struct pci_device_id mthca_pci_table[] = { MODULE_DEVICE_TABLE(pci, mthca_pci_table); static struct pci_driver mthca_driver = { - .name = "ib_mthca", + .name = DRV_NAME, .id_table = mthca_pci_table, .probe = mthca_init_one, .remove = __devexit_p(mthca_remove_one) diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index 70a6553a588e..a2707605f4c8 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -42,10 +42,10 @@ enum { }; struct mthca_mgm { - u32 next_gid_index; - u32 reserved[3]; - u8 gid[16]; - u32 qp[MTHCA_QP_PER_MGM]; + __be32 next_gid_index; + u32 reserved[3]; + u8 gid[16]; + __be32 qp[MTHCA_QP_PER_MGM]; }; static const u8 zero_gid[16]; /* automatically initialized to 0 */ @@ -66,22 +66,23 @@ static const u8 zero_gid[16]; /* automatically initialized to 0 */ * entry in hash chain and *mgm holds end of hash chain. */ static int find_mgm(struct mthca_dev *dev, - u8 *gid, struct mthca_mgm *mgm, + u8 *gid, struct mthca_mailbox *mgm_mailbox, u16 *hash, int *prev, int *index) { - void *mailbox; + struct mthca_mailbox *mailbox; + struct mthca_mgm *mgm = mgm_mailbox->buf; u8 *mgid; int err; u8 status; - mailbox = kmalloc(16 + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); - if (!mailbox) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) return -ENOMEM; - mgid = MAILBOX_ALIGN(mailbox); + mgid = mailbox->buf; memcpy(mgid, gid, 16); - err = mthca_MGID_HASH(dev, mgid, hash, &status); + err = mthca_MGID_HASH(dev, mailbox, hash, &status); if (err) goto out; if (status) { @@ -93,17 +94,21 @@ static int find_mgm(struct mthca_dev *dev, if (0) mthca_dbg(dev, "Hash for %04x:%04x:%04x:%04x:" "%04x:%04x:%04x:%04x is %04x\n", - be16_to_cpu(((u16 *) gid)[0]), be16_to_cpu(((u16 *) gid)[1]), - be16_to_cpu(((u16 *) gid)[2]), be16_to_cpu(((u16 *) gid)[3]), - be16_to_cpu(((u16 *) gid)[4]), be16_to_cpu(((u16 *) gid)[5]), - be16_to_cpu(((u16 *) gid)[6]), be16_to_cpu(((u16 *) gid)[7]), + be16_to_cpu(((__be16 *) gid)[0]), + be16_to_cpu(((__be16 *) gid)[1]), + be16_to_cpu(((__be16 *) gid)[2]), + be16_to_cpu(((__be16 *) gid)[3]), + be16_to_cpu(((__be16 *) gid)[4]), + be16_to_cpu(((__be16 *) gid)[5]), + be16_to_cpu(((__be16 *) gid)[6]), + be16_to_cpu(((__be16 *) gid)[7]), *hash); *index = *hash; *prev = -1; do { - err = mthca_READ_MGM(dev, *index, mgm, &status); + err = mthca_READ_MGM(dev, *index, mgm_mailbox, &status); if (err) goto out; if (status) { @@ -129,14 +134,14 @@ static int find_mgm(struct mthca_dev *dev, *index = -1; out: - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mthca_dev *dev = to_mdev(ibqp->device); - void *mailbox; + struct mthca_mailbox *mailbox; struct mthca_mgm *mgm; u16 hash; int index, prev; @@ -145,15 +150,15 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int err; u8 status; - mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); - if (!mailbox) - return -ENOMEM; - mgm = MAILBOX_ALIGN(mailbox); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; if (down_interruptible(&dev->mcg_table.sem)) return -EINTR; - err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index); + err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); if (err) goto out; @@ -170,7 +175,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; } - err = mthca_READ_MGM(dev, index, mgm, &status); + err = mthca_READ_MGM(dev, index, mailbox, &status); if (err) goto out; if (status) { @@ -195,7 +200,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; } - err = mthca_WRITE_MGM(dev, index, mgm, &status); + err = mthca_WRITE_MGM(dev, index, mailbox, &status); if (err) goto out; if (status) { @@ -206,7 +211,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (!link) goto out; - err = mthca_READ_MGM(dev, prev, mgm, &status); + err = mthca_READ_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { @@ -217,7 +222,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) mgm->next_gid_index = cpu_to_be32(index << 5); - err = mthca_WRITE_MGM(dev, prev, mgm, &status); + err = mthca_WRITE_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { @@ -227,14 +232,14 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) out: up(&dev->mcg_table.sem); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return err; } int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mthca_dev *dev = to_mdev(ibqp->device); - void *mailbox; + struct mthca_mailbox *mailbox; struct mthca_mgm *mgm; u16 hash; int prev, index; @@ -242,29 +247,29 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) int err; u8 status; - mailbox = kmalloc(sizeof *mgm + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); - if (!mailbox) - return -ENOMEM; - mgm = MAILBOX_ALIGN(mailbox); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mgm = mailbox->buf; if (down_interruptible(&dev->mcg_table.sem)) return -EINTR; - err = find_mgm(dev, gid->raw, mgm, &hash, &prev, &index); + err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index); if (err) goto out; if (index == -1) { mthca_err(dev, "MGID %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x " "not found\n", - be16_to_cpu(((u16 *) gid->raw)[0]), - be16_to_cpu(((u16 *) gid->raw)[1]), - be16_to_cpu(((u16 *) gid->raw)[2]), - be16_to_cpu(((u16 *) gid->raw)[3]), - be16_to_cpu(((u16 *) gid->raw)[4]), - be16_to_cpu(((u16 *) gid->raw)[5]), - be16_to_cpu(((u16 *) gid->raw)[6]), - be16_to_cpu(((u16 *) gid->raw)[7])); + be16_to_cpu(((__be16 *) gid->raw)[0]), + be16_to_cpu(((__be16 *) gid->raw)[1]), + be16_to_cpu(((__be16 *) gid->raw)[2]), + be16_to_cpu(((__be16 *) gid->raw)[3]), + be16_to_cpu(((__be16 *) gid->raw)[4]), + be16_to_cpu(((__be16 *) gid->raw)[5]), + be16_to_cpu(((__be16 *) gid->raw)[6]), + be16_to_cpu(((__be16 *) gid->raw)[7])); err = -EINVAL; goto out; } @@ -285,7 +290,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) mgm->qp[loc] = mgm->qp[i - 1]; mgm->qp[i - 1] = 0; - err = mthca_WRITE_MGM(dev, index, mgm, &status); + err = mthca_WRITE_MGM(dev, index, mailbox, &status); if (err) goto out; if (status) { @@ -304,7 +309,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (be32_to_cpu(mgm->next_gid_index) >> 5) { err = mthca_READ_MGM(dev, be32_to_cpu(mgm->next_gid_index) >> 5, - mgm, &status); + mailbox, &status); if (err) goto out; if (status) { @@ -316,7 +321,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } else memset(mgm->gid, 0, 16); - err = mthca_WRITE_MGM(dev, index, mgm, &status); + err = mthca_WRITE_MGM(dev, index, mailbox, &status); if (err) goto out; if (status) { @@ -327,7 +332,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) } else { /* Remove entry from AMGM */ index = be32_to_cpu(mgm->next_gid_index) >> 5; - err = mthca_READ_MGM(dev, prev, mgm, &status); + err = mthca_READ_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { @@ -338,7 +343,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) mgm->next_gid_index = cpu_to_be32(index << 5); - err = mthca_WRITE_MGM(dev, prev, mgm, &status); + err = mthca_WRITE_MGM(dev, prev, mailbox, &status); if (err) goto out; if (status) { @@ -350,7 +355,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) out: up(&dev->mcg_table.sem); - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return err; } diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 637b30e35592..1827400f189b 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -47,6 +49,15 @@ enum { MTHCA_TABLE_CHUNK_SIZE = 1 << 18 }; +struct mthca_user_db_table { + struct semaphore mutex; + struct { + u64 uvirt; + struct scatterlist mem; + int refcount; + } page[0]; +}; + void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm) { struct mthca_icm_chunk *chunk, *tmp; @@ -179,9 +190,14 @@ out: void mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj) { - int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; + int i; u8 status; + if (!mthca_is_memfree(dev)) + return; + + i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE; + down(&table->mutex); if (--table->icm[i]->refcount == 0) { @@ -256,6 +272,9 @@ void mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table, { int i; + if (!mthca_is_memfree(dev)) + return; + for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size) mthca_table_put(dev, table, i); } @@ -267,6 +286,7 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, { struct mthca_icm_table *table; int num_icm; + unsigned chunk_size; int i; u8 status; @@ -287,7 +307,11 @@ struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev, table->icm[i] = NULL; for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) { - table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT, + chunk_size = MTHCA_TABLE_CHUNK_SIZE; + if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size) + chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE; + + table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT, (use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN); if (!table->icm[i]) @@ -336,14 +360,134 @@ void mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table) kfree(table); } -static u64 mthca_uarc_virt(struct mthca_dev *dev, int page) +static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page) { return dev->uar_table.uarc_base + - dev->driver_uar.index * dev->uar_table.uarc_size + + uar->index * dev->uar_table.uarc_size + page * 4096; } -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db) +int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index, u64 uaddr) +{ + int ret = 0; + u8 status; + int i; + + if (!mthca_is_memfree(dev)) + return 0; + + if (index < 0 || index > dev->uar_table.uarc_size / 8) + return -EINVAL; + + down(&db_tab->mutex); + + i = index / MTHCA_DB_REC_PER_PAGE; + + if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) || + (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) || + (uaddr & 4095)) { + ret = -EINVAL; + goto out; + } + + if (db_tab->page[i].refcount) { + ++db_tab->page[i].refcount; + goto out; + } + + ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0, + &db_tab->page[i].mem.page, NULL); + if (ret < 0) + goto out; + + db_tab->page[i].mem.length = 4096; + db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK; + + ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + if (ret < 0) { + put_page(db_tab->page[i].mem.page); + goto out; + } + + ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem), + mthca_uarc_virt(dev, uar, i), &status); + if (!ret && status) + ret = -EINVAL; + if (ret) { + pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + put_page(db_tab->page[i].mem.page); + goto out; + } + + db_tab->page[i].uvirt = uaddr; + db_tab->page[i].refcount = 1; + +out: + up(&db_tab->mutex); + return ret; +} + +void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index) +{ + if (!mthca_is_memfree(dev)) + return; + + /* + * To make our bookkeeping simpler, we don't unmap DB + * pages until we clean up the whole db table. + */ + + down(&db_tab->mutex); + + --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount; + + up(&db_tab->mutex); +} + +struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev) +{ + struct mthca_user_db_table *db_tab; + int npages; + int i; + + if (!mthca_is_memfree(dev)) + return NULL; + + npages = dev->uar_table.uarc_size / 4096; + db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL); + if (!db_tab) + return ERR_PTR(-ENOMEM); + + init_MUTEX(&db_tab->mutex); + for (i = 0; i < npages; ++i) { + db_tab->page[i].refcount = 0; + db_tab->page[i].uvirt = 0; + } + + return db_tab; +} + +void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab) +{ + int i; + u8 status; + + if (!mthca_is_memfree(dev)) + return; + + for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) { + if (db_tab->page[i].uvirt) { + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status); + pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + put_page(db_tab->page[i].mem.page); + } + } +} + +int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db) { int group; int start, end, dir; @@ -399,7 +543,8 @@ int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db) } memset(page->db_rec, 0, 4096); - ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status); + ret = mthca_MAP_ICM_page(dev, page->mapping, + mthca_uarc_virt(dev, &dev->driver_uar, i), &status); if (!ret && status) ret = -EINVAL; if (ret) { @@ -425,7 +570,7 @@ found: page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5)); - *db = (u32 *) &page->db_rec[j]; + *db = (__be32 *) &page->db_rec[j]; out: up(&dev->db_tab->mutex); @@ -453,7 +598,7 @@ void mthca_free_db(struct mthca_dev *dev, int type, int db_index) if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) && i >= dev->db_tab->max_group1 - 1) { - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status); + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); dma_free_coherent(&dev->pdev->dev, 4096, page->db_rec, page->mapping); @@ -522,7 +667,7 @@ void mthca_cleanup_db_tab(struct mthca_dev *dev) if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE)) mthca_warn(dev, "Kernel UARC page %d not empty\n", i); - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status); + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); dma_free_coherent(&dev->pdev->dev, 4096, dev->db_tab->page[i].db_rec, diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h index fe7be2a6bc4a..bafa51544aa3 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.h +++ b/drivers/infiniband/hw/mthca/mthca_memfree.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -136,7 +138,7 @@ enum { struct mthca_db_page { DECLARE_BITMAP(used, MTHCA_DB_REC_PER_PAGE); - u64 *db_rec; + __be64 *db_rec; dma_addr_t mapping; }; @@ -148,7 +150,7 @@ struct mthca_db_table { struct semaphore mutex; }; -enum { +enum mthca_db_type { MTHCA_DB_TYPE_INVALID = 0x0, MTHCA_DB_TYPE_CQ_SET_CI = 0x1, MTHCA_DB_TYPE_CQ_ARM = 0x2, @@ -158,9 +160,20 @@ enum { MTHCA_DB_TYPE_GROUP_SEP = 0x7 }; +struct mthca_user_db_table; +struct mthca_uar; + +int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index, u64 uaddr); +void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index); +struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev); +void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab); + int mthca_init_db_tab(struct mthca_dev *dev); void mthca_cleanup_db_tab(struct mthca_dev *dev); -int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db); +int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, __be32 **db); void mthca_free_db(struct mthca_dev *dev, int type, int db_index); #endif /* MTHCA_MEMFREE_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index 8960fc2306be..1f97a44477f5 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -40,22 +41,28 @@ #include "mthca_cmd.h" #include "mthca_memfree.h" +struct mthca_mtt { + struct mthca_buddy *buddy; + int order; + u32 first_seg; +}; + /* * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. */ struct mthca_mpt_entry { - u32 flags; - u32 page_size; - u32 key; - u32 pd; - u64 start; - u64 length; - u32 lkey; - u32 window_count; - u32 window_count_limit; - u64 mtt_seg; - u32 mtt_sz; /* Arbel only */ - u32 reserved[2]; + __be32 flags; + __be32 page_size; + __be32 key; + __be32 pd; + __be64 start; + __be64 length; + __be32 lkey; + __be32 window_count; + __be32 window_count_limit; + __be64 mtt_seg; + __be32 mtt_sz; /* Arbel only */ + u32 reserved[2]; } __attribute__((packed)); #define MTHCA_MPT_FLAG_SW_OWNS (0xfUL << 28) @@ -173,8 +180,8 @@ static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy) kfree(buddy->bits); } -static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order, - struct mthca_buddy *buddy) +static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order, + struct mthca_buddy *buddy) { u32 seg = mthca_buddy_alloc(buddy, order); @@ -191,14 +198,102 @@ static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order, return seg; } -static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order, - struct mthca_buddy* buddy) +static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size, + struct mthca_buddy *buddy) { - mthca_buddy_free(buddy, seg, order); + struct mthca_mtt *mtt; + int i; - if (mthca_is_memfree(dev)) - mthca_table_put_range(dev, dev->mr_table.mtt_table, seg, - seg + (1 << order) - 1); + if (size <= 0) + return ERR_PTR(-EINVAL); + + mtt = kmalloc(sizeof *mtt, GFP_KERNEL); + if (!mtt) + return ERR_PTR(-ENOMEM); + + mtt->buddy = buddy; + mtt->order = 0; + for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1) + ++mtt->order; + + mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy); + if (mtt->first_seg == -1) { + kfree(mtt); + return ERR_PTR(-ENOMEM); + } + + return mtt; +} + +struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size) +{ + return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy); +} + +void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt) +{ + if (!mtt) + return; + + mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order); + + mthca_table_put_range(dev, dev->mr_table.mtt_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + + kfree(mtt); +} + +int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt, + int start_index, u64 *buffer_list, int list_len) +{ + struct mthca_mailbox *mailbox; + __be64 *mtt_entry; + int err = 0; + u8 status; + int i; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + mtt_entry = mailbox->buf; + + while (list_len > 0) { + mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + + mtt->first_seg * MTHCA_MTT_SEG_SIZE + + start_index * 8); + mtt_entry[1] = 0; + for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i) + mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] | + MTHCA_MTT_FLAG_PRESENT); + + /* + * If we have an odd number of entries to write, add + * one more dummy entry for firmware efficiency. + */ + if (i & 1) + mtt_entry[i + 2] = 0; + + err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status); + if (err) { + mthca_warn(dev, "WRITE_MTT failed (%d)\n", err); + goto out; + } + if (status) { + mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n", + status); + err = -EINVAL; + goto out; + } + + list_len -= i; + start_index += i; + buffer_list += i; + } + +out: + mthca_free_mailbox(dev, mailbox); + return err; } static inline u32 tavor_hw_index_to_key(u32 ind) @@ -237,18 +332,20 @@ static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key) return tavor_key_to_hw_index(key); } -int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, - u32 access, struct mthca_mr *mr) +int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift, + u64 iova, u64 total_size, u32 access, struct mthca_mr *mr) { - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_mpt_entry *mpt_entry; u32 key; + int i; int err; u8 status; might_sleep(); - mr->order = -1; + WARN_ON(buffer_size_shift >= 32); + key = mthca_alloc(&dev->mr_table.mpt_alloc); if (key == -1) return -ENOMEM; @@ -260,201 +357,109 @@ int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, goto err_out_mpt_free; } - mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) { - err = -ENOMEM; + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); goto err_out_table; } - mpt_entry = MAILBOX_ALIGN(mailbox); + mpt_entry = mailbox->buf; mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | MTHCA_MPT_FLAG_MIO | - MTHCA_MPT_FLAG_PHYSICAL | MTHCA_MPT_FLAG_REGION | access); - mpt_entry->page_size = 0; + if (!mr->mtt) + mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL); + + mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12); mpt_entry->key = cpu_to_be32(key); mpt_entry->pd = cpu_to_be32(pd); - mpt_entry->start = 0; - mpt_entry->length = ~0ULL; + mpt_entry->start = cpu_to_be64(iova); + mpt_entry->length = cpu_to_be64(total_size); memset(&mpt_entry->lkey, 0, sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); - err = mthca_SW2HW_MPT(dev, mpt_entry, + if (mr->mtt) + mpt_entry->mtt_seg = + cpu_to_be64(dev->mr_table.mtt_base + + mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE); + + if (0) { + mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); + for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { + if (i % 4 == 0) + printk("[%02x] ", i * 4); + printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); + if ((i + 1) % 4 == 0) + printk("\n"); + } + } + + err = mthca_SW2HW_MPT(dev, mailbox, key & (dev->limits.num_mpts - 1), &status); if (err) { mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); - goto err_out_table; + goto err_out_mailbox; } else if (status) { mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", status); err = -EINVAL; - goto err_out_table; + goto err_out_mailbox; } - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return err; +err_out_mailbox: + mthca_free_mailbox(dev, mailbox); + err_out_table: - if (mthca_is_memfree(dev)) - mthca_table_put(dev, dev->mr_table.mpt_table, key); + mthca_table_put(dev, dev->mr_table.mpt_table, key); err_out_mpt_free: mthca_free(&dev->mr_table.mpt_alloc, key); - kfree(mailbox); return err; } +int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd, + u32 access, struct mthca_mr *mr) +{ + mr->mtt = NULL; + return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr); +} + int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd, u64 *buffer_list, int buffer_size_shift, int list_len, u64 iova, u64 total_size, u32 access, struct mthca_mr *mr) { - void *mailbox; - u64 *mtt_entry; - struct mthca_mpt_entry *mpt_entry; - u32 key; - int err = -ENOMEM; - u8 status; - int i; - - might_sleep(); - WARN_ON(buffer_size_shift >= 32); - - key = mthca_alloc(&dev->mr_table.mpt_alloc); - if (key == -1) - return -ENOMEM; - mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key); - - if (mthca_is_memfree(dev)) { - err = mthca_table_get(dev, dev->mr_table.mpt_table, key); - if (err) - goto err_out_mpt_free; - } - - for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0; - i < list_len; - i <<= 1, ++mr->order) - ; /* nothing */ - - mr->first_seg = mthca_alloc_mtt(dev, mr->order, - &dev->mr_table.mtt_buddy); - if (mr->first_seg == -1) - goto err_out_table; - - /* - * If list_len is odd, we add one more dummy entry for - * firmware efficiency. - */ - mailbox = kmalloc(max(sizeof *mpt_entry, - (size_t) 8 * (list_len + (list_len & 1) + 2)) + - MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) - goto err_out_free_mtt; + int err; - mtt_entry = MAILBOX_ALIGN(mailbox); + mr->mtt = mthca_alloc_mtt(dev, list_len); + if (IS_ERR(mr->mtt)) + return PTR_ERR(mr->mtt); - mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base + - mr->first_seg * MTHCA_MTT_SEG_SIZE); - mtt_entry[1] = 0; - for (i = 0; i < list_len; ++i) - mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] | - MTHCA_MTT_FLAG_PRESENT); - if (list_len & 1) { - mtt_entry[i + 2] = 0; - ++list_len; - } - - if (0) { - mthca_dbg(dev, "Dumping MPT entry\n"); - for (i = 0; i < list_len + 2; ++i) - printk(KERN_ERR "[%2d] %016llx\n", - i, (unsigned long long) be64_to_cpu(mtt_entry[i])); - } - - err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status); + err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len); if (err) { - mthca_warn(dev, "WRITE_MTT failed (%d)\n", err); - goto err_out_mailbox_free; - } - if (status) { - mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n", - status); - err = -EINVAL; - goto err_out_mailbox_free; - } - - mpt_entry = MAILBOX_ALIGN(mailbox); - - mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | - MTHCA_MPT_FLAG_MIO | - MTHCA_MPT_FLAG_REGION | - access); - - mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12); - mpt_entry->key = cpu_to_be32(key); - mpt_entry->pd = cpu_to_be32(pd); - mpt_entry->start = cpu_to_be64(iova); - mpt_entry->length = cpu_to_be64(total_size); - memset(&mpt_entry->lkey, 0, - sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey)); - mpt_entry->mtt_seg = cpu_to_be64(dev->mr_table.mtt_base + - mr->first_seg * MTHCA_MTT_SEG_SIZE); - - if (0) { - mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey); - for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { - if (i % 4 == 0) - printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i])); - if ((i + 1) % 4 == 0) - printk("\n"); - } + mthca_free_mtt(dev, mr->mtt); + return err; } - err = mthca_SW2HW_MPT(dev, mpt_entry, - key & (dev->limits.num_mpts - 1), - &status); + err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova, + total_size, access, mr); if (err) - mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err); - else if (status) { - mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n", - status); - err = -EINVAL; - } - - kfree(mailbox); - return err; - -err_out_mailbox_free: - kfree(mailbox); - -err_out_free_mtt: - mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy); - -err_out_table: - if (mthca_is_memfree(dev)) - mthca_table_put(dev, dev->mr_table.mpt_table, key); + mthca_free_mtt(dev, mr->mtt); -err_out_mpt_free: - mthca_free(&dev->mr_table.mpt_alloc, key); return err; } /* Free mr or fmr */ -static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order, - u32 first_seg, struct mthca_buddy *buddy) +static void mthca_free_region(struct mthca_dev *dev, u32 lkey) { - if (order >= 0) - mthca_free_mtt(dev, first_seg, order, buddy); - - if (mthca_is_memfree(dev)) - mthca_table_put(dev, dev->mr_table.mpt_table, - arbel_key_to_hw_index(lkey)); + mthca_table_put(dev, dev->mr_table.mpt_table, + key_to_hw_index(dev, lkey)); mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey)); } @@ -476,15 +481,15 @@ void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr) mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n", status); - mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg, - &dev->mr_table.mtt_buddy); + mthca_free_region(dev, mr->ibmr.lkey); + mthca_free_mtt(dev, mr->mtt); } int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, u32 access, struct mthca_fmr *mr) { struct mthca_mpt_entry *mpt_entry; - void *mailbox; + struct mthca_mailbox *mailbox; u64 mtt_seg; u32 key, idx; u8 status; @@ -522,31 +527,24 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base + sizeof *(mr->mem.tavor.mpt) * idx; - for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0; - i < list_len; - i <<= 1, ++mr->order) - ; /* nothing */ - - mr->first_seg = mthca_alloc_mtt(dev, mr->order, - dev->mr_table.fmr_mtt_buddy); - if (mr->first_seg == -1) + mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy); + if (IS_ERR(mr->mtt)) goto err_out_table; - mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE; + mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE; if (mthca_is_memfree(dev)) { mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table, - mr->first_seg); + mr->mtt->first_seg); BUG_ON(!mr->mem.arbel.mtts); } else mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg; - mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA, - GFP_KERNEL); - if (!mailbox) + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) goto err_out_free_mtt; - mpt_entry = MAILBOX_ALIGN(mailbox); + mpt_entry = mailbox->buf; mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS | MTHCA_MPT_FLAG_MIO | @@ -565,13 +563,13 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) { if (i % 4 == 0) printk("[%02x] ", i * 4); - printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i])); + printk(" %08x", be32_to_cpu(((__be32 *) mpt_entry)[i])); if ((i + 1) % 4 == 0) printk("\n"); } } - err = mthca_SW2HW_MPT(dev, mpt_entry, + err = mthca_SW2HW_MPT(dev, mailbox, key & (dev->limits.num_mpts - 1), &status); if (err) { @@ -585,19 +583,17 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd, goto err_out_mailbox_free; } - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); return 0; err_out_mailbox_free: - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); err_out_free_mtt: - mthca_free_mtt(dev, mr->first_seg, mr->order, - dev->mr_table.fmr_mtt_buddy); + mthca_free_mtt(dev, mr->mtt); err_out_table: - if (mthca_is_memfree(dev)) - mthca_table_put(dev, dev->mr_table.mpt_table, key); + mthca_table_put(dev, dev->mr_table.mpt_table, key); err_out_mpt_free: mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey); @@ -609,8 +605,9 @@ int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr) if (fmr->maps) return -EBUSY; - mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg, - dev->mr_table.fmr_mtt_buddy); + mthca_free_region(dev, fmr->ibmr.lkey); + mthca_free_mtt(dev, fmr->mtt); + return 0; } @@ -673,7 +670,7 @@ int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size)); mpt_entry.start = cpu_to_be64(iova); - writel(mpt_entry.lkey, &fmr->mem.tavor.mpt->key); + __raw_writel((__force u32) mpt_entry.lkey, &fmr->mem.tavor.mpt->key); memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start, offsetof(struct mthca_mpt_entry, window_count) - offsetof(struct mthca_mpt_entry, start)); @@ -826,7 +823,8 @@ int __devinit mthca_init_mr_table(struct mthca_dev *dev) if (dev->limits.reserved_mtts) { i = fls(dev->limits.reserved_mtts - 1); - if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) { + if (mthca_alloc_mtt_range(dev, i, + dev->mr_table.fmr_mtt_buddy) == -1) { mthca_warn(dev, "MTT table of order %d is too small.\n", dev->mr_table.fmr_mtt_buddy->max_order); err = -ENOMEM; diff --git a/drivers/infiniband/hw/mthca/mthca_pd.c b/drivers/infiniband/hw/mthca/mthca_pd.c index ea66847e4ea3..3dbf06a6e6f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_pd.c +++ b/drivers/infiniband/hw/mthca/mthca_pd.c @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,23 +39,27 @@ #include "mthca_dev.h" -int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd) +int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd) { - int err; + int err = 0; might_sleep(); + pd->privileged = privileged; + atomic_set(&pd->sqp_count, 0); pd->pd_num = mthca_alloc(&dev->pd_table.alloc); if (pd->pd_num == -1) return -ENOMEM; - err = mthca_mr_alloc_notrans(dev, pd->pd_num, - MTHCA_MPT_FLAG_LOCAL_READ | - MTHCA_MPT_FLAG_LOCAL_WRITE, - &pd->ntmr); - if (err) - mthca_free(&dev->pd_table.alloc, pd->pd_num); + if (privileged) { + err = mthca_mr_alloc_notrans(dev, pd->pd_num, + MTHCA_MPT_FLAG_LOCAL_READ | + MTHCA_MPT_FLAG_LOCAL_WRITE, + &pd->ntmr); + if (err) + mthca_free(&dev->pd_table.alloc, pd->pd_num); + } return err; } @@ -61,7 +67,8 @@ int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd) void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) { might_sleep(); - mthca_free_mr(dev, &pd->ntmr); + if (pd->privileged) + mthca_free_mr(dev, &pd->ntmr); mthca_free(&dev->pd_table.alloc, pd->pd_num); } diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 4fedc32d5871..0576056b34f4 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -101,6 +102,7 @@ u64 mthca_make_profile(struct mthca_dev *dev, profile[MTHCA_RES_UARC].size = request->uarc_size; profile[MTHCA_RES_QP].num = request->num_qp; + profile[MTHCA_RES_SRQ].num = request->num_srq; profile[MTHCA_RES_EQP].num = request->num_qp; profile[MTHCA_RES_RDB].num = request->num_qp * request->rdb_per_qp; profile[MTHCA_RES_CQ].num = request->num_cq; diff --git a/drivers/infiniband/hw/mthca/mthca_profile.h b/drivers/infiniband/hw/mthca/mthca_profile.h index 17aef3357661..94641808f97f 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.h +++ b/drivers/infiniband/hw/mthca/mthca_profile.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -41,6 +42,7 @@ struct mthca_profile { int num_qp; int rdb_per_qp; + int num_srq; int num_cq; int num_mcg; int num_mpt; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 159f4e6c312d..1c1c2e230871 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -1,5 +1,9 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,10 +36,13 @@ * $Id: mthca_provider.c 1397 2004-12-28 05:09:00Z roland $ */ -#include <ib_smi.h> +#include <rdma/ib_smi.h> +#include <linux/mm.h> #include "mthca_dev.h" #include "mthca_cmd.h" +#include "mthca_user.h" +#include "mthca_memfree.h" static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) @@ -52,7 +59,7 @@ static int mthca_query_device(struct ib_device *ibdev, if (!in_mad || !out_mad) goto out; - memset(props, 0, sizeof props); + memset(props, 0, sizeof *props); props->fw_ver = mdev->fw_ver; @@ -74,10 +81,10 @@ static int mthca_query_device(struct ib_device *ibdev, } props->device_cap_flags = mdev->device_cap_flags; - props->vendor_id = be32_to_cpup((u32 *) (out_mad->data + 36)) & + props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; - props->vendor_part_id = be16_to_cpup((u16 *) (out_mad->data + 30)); - props->hw_ver = be16_to_cpup((u16 *) (out_mad->data + 32)); + props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30)); + props->hw_ver = be16_to_cpup((__be16 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); memcpy(&props->node_guid, out_mad->data + 12, 8); @@ -113,6 +120,8 @@ static int mthca_query_port(struct ib_device *ibdev, if (!in_mad || !out_mad) goto out; + memset(props, 0, sizeof *props); + memset(in_mad, 0, sizeof *in_mad); in_mad->base_version = 1; in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; @@ -131,16 +140,17 @@ static int mthca_query_port(struct ib_device *ibdev, goto out; } - props->lid = be16_to_cpup((u16 *) (out_mad->data + 16)); + props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; - props->sm_lid = be16_to_cpup((u16 *) (out_mad->data + 18)); + props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); props->sm_sl = out_mad->data[36] & 0xf; props->state = out_mad->data[32] & 0xf; props->phys_state = out_mad->data[33] >> 4; - props->port_cap_flags = be32_to_cpup((u32 *) (out_mad->data + 20)); + props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); props->gid_tbl_len = to_mdev(ibdev)->limits.gid_table_len; + props->max_msg_sz = 0x80000000; props->pkey_tbl_len = to_mdev(ibdev)->limits.pkey_table_len; - props->qkey_viol_cntr = be16_to_cpup((u16 *) (out_mad->data + 48)); + props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; props->active_speed = out_mad->data[35] >> 4; @@ -216,7 +226,7 @@ static int mthca_query_pkey(struct ib_device *ibdev, goto out; } - *pkey = be16_to_cpu(((u16 *) out_mad->data)[index % 32]); + *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); out: kfree(in_mad); @@ -283,7 +293,78 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port, return err; } -static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev) +static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct mthca_alloc_ucontext_resp uresp; + struct mthca_ucontext *context; + int err; + + memset(&uresp, 0, sizeof uresp); + + uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; + if (mthca_is_memfree(to_mdev(ibdev))) + uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size; + else + uresp.uarc_size = 0; + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + err = mthca_uar_alloc(to_mdev(ibdev), &context->uar); + if (err) { + kfree(context); + return ERR_PTR(err); + } + + context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev)); + if (IS_ERR(context->db_tab)) { + err = PTR_ERR(context->db_tab); + mthca_uar_free(to_mdev(ibdev), &context->uar); + kfree(context); + return ERR_PTR(err); + } + + if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { + mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab); + mthca_uar_free(to_mdev(ibdev), &context->uar); + kfree(context); + return ERR_PTR(-EFAULT); + } + + return &context->ibucontext; +} + +static int mthca_dealloc_ucontext(struct ib_ucontext *context) +{ + mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab); + mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar); + kfree(to_mucontext(context)); + + return 0; +} + +static int mthca_mmap_uar(struct ib_ucontext *context, + struct vm_area_struct *vma) +{ + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (io_remap_pfn_range(vma, vma->vm_start, + to_mucontext(context)->uar.pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) { struct mthca_pd *pd; int err; @@ -292,12 +373,20 @@ static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev) if (!pd) return ERR_PTR(-ENOMEM); - err = mthca_pd_alloc(to_mdev(ibdev), pd); + err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); if (err) { kfree(pd); return ERR_PTR(err); } + if (context) { + if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) { + mthca_pd_free(to_mdev(ibdev), pd); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } + return &pd->ibpd; } @@ -336,9 +425,82 @@ static int mthca_ah_destroy(struct ib_ah *ah) return 0; } +static struct ib_srq *mthca_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mthca_create_srq ucmd; + struct mthca_ucontext *context = NULL; + struct mthca_srq *srq; + int err; + + srq = kmalloc(sizeof *srq, GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + if (pd->uobject) { + context = to_mucontext(pd->uobject->context); + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, ucmd.db_index, + ucmd.db_page); + + if (err) + goto err_free; + + srq->mr.ibmr.lkey = ucmd.lkey; + srq->db_index = ucmd.db_index; + } + + err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), + &init_attr->attr, srq); + + if (err && pd->uobject) + mthca_unmap_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, ucmd.db_index); + + if (err) + goto err_free; + + if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof (__u32))) { + mthca_free_srq(to_mdev(pd->device), srq); + err = -EFAULT; + goto err_free; + } + + return &srq->ibsrq; + +err_free: + kfree(srq); + + return ERR_PTR(err); +} + +static int mthca_destroy_srq(struct ib_srq *srq) +{ + struct mthca_ucontext *context; + + if (srq->uobject) { + context = to_mucontext(srq->uobject->context); + + mthca_unmap_user_db(to_mdev(srq->device), &context->uar, + context->db_tab, to_msrq(srq)->db_index); + } + + mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); + kfree(srq); + + return 0; +} + static struct ib_qp *mthca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr) + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { + struct mthca_create_qp ucmd; struct mthca_qp *qp; int err; @@ -347,41 +509,82 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, case IB_QPT_UC: case IB_QPT_UD: { + struct mthca_ucontext *context; + qp = kmalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - qp->sq.max = init_attr->cap.max_send_wr; - qp->rq.max = init_attr->cap.max_recv_wr; - qp->sq.max_gs = init_attr->cap.max_send_sge; - qp->rq.max_gs = init_attr->cap.max_recv_sge; + if (pd->uobject) { + context = to_mucontext(pd->uobject->context); + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, + ucmd.sq_db_index, ucmd.sq_db_page); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, + ucmd.rq_db_index, ucmd.rq_db_page); + if (err) { + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.sq_db_index); + kfree(qp); + return ERR_PTR(err); + } + + qp->mr.ibmr.lkey = ucmd.lkey; + qp->sq.db_index = ucmd.sq_db_index; + qp->rq.db_index = ucmd.rq_db_index; + } err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd), to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), init_attr->qp_type, init_attr->sq_sig_type, - qp); + &init_attr->cap, qp); + + if (err && pd->uobject) { + context = to_mucontext(pd->uobject->context); + + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.sq_db_index); + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.rq_db_index); + } + qp->ibqp.qp_num = qp->qpn; break; } case IB_QPT_SMI: case IB_QPT_GSI: { + /* Don't allow userspace to create special QPs */ + if (pd->uobject) + return ERR_PTR(-EINVAL); + qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - qp->sq.max = init_attr->cap.max_send_wr; - qp->rq.max = init_attr->cap.max_recv_wr; - qp->sq.max_gs = init_attr->cap.max_send_sge; - qp->rq.max_gs = init_attr->cap.max_recv_sge; - qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd), to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), - init_attr->sq_sig_type, + init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, to_msqp(qp)); break; @@ -396,42 +599,115 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, return ERR_PTR(err); } - init_attr->cap.max_inline_data = 0; + init_attr->cap.max_inline_data = 0; + init_attr->cap.max_send_wr = qp->sq.max; + init_attr->cap.max_recv_wr = qp->rq.max; + init_attr->cap.max_send_sge = qp->sq.max_gs; + init_attr->cap.max_recv_sge = qp->rq.max_gs; return &qp->ibqp; } static int mthca_destroy_qp(struct ib_qp *qp) { + if (qp->uobject) { + mthca_unmap_user_db(to_mdev(qp->device), + &to_mucontext(qp->uobject->context)->uar, + to_mucontext(qp->uobject->context)->db_tab, + to_mqp(qp)->sq.db_index); + mthca_unmap_user_db(to_mdev(qp->device), + &to_mucontext(qp->uobject->context)->uar, + to_mucontext(qp->uobject->context)->db_tab, + to_mqp(qp)->rq.db_index); + } mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); kfree(qp); return 0; } -static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries) +static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, + struct ib_ucontext *context, + struct ib_udata *udata) { + struct mthca_create_cq ucmd; struct mthca_cq *cq; int nent; int err; + if (context) { + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.set_db_index, ucmd.set_db_page); + if (err) + return ERR_PTR(err); + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.arm_db_index, ucmd.arm_db_page); + if (err) + goto err_unmap_set; + } + cq = kmalloc(sizeof *cq, GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); + if (!cq) { + err = -ENOMEM; + goto err_unmap_arm; + } + + if (context) { + cq->mr.ibmr.lkey = ucmd.lkey; + cq->set_ci_db_index = ucmd.set_db_index; + cq->arm_db_index = ucmd.arm_db_index; + } for (nent = 1; nent <= entries; nent <<= 1) ; /* nothing */ - err = mthca_init_cq(to_mdev(ibdev), nent, cq); - if (err) { - kfree(cq); - cq = ERR_PTR(err); + err = mthca_init_cq(to_mdev(ibdev), nent, + context ? to_mucontext(context) : NULL, + context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, + cq); + if (err) + goto err_free; + + if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { + mthca_free_cq(to_mdev(ibdev), cq); + goto err_free; } return &cq->ibcq; + +err_free: + kfree(cq); + +err_unmap_arm: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.arm_db_index); + +err_unmap_set: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.set_db_index); + + return ERR_PTR(err); } static int mthca_destroy_cq(struct ib_cq *cq) { + if (cq->uobject) { + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->uobject->context)->uar, + to_mucontext(cq->uobject->context)->db_tab, + to_mcq(cq)->arm_db_index); + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->uobject->context)->uar, + to_mucontext(cq->uobject->context)->db_tab, + to_mcq(cq)->set_ci_db_index); + } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); kfree(cq); @@ -558,6 +834,7 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, convert_access(acc), mr); if (err) { + kfree(page_list); kfree(mr); return ERR_PTR(err); } @@ -566,6 +843,87 @@ static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd, return &mr->ibmr; } +static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, + int acc, struct ib_udata *udata) +{ + struct mthca_dev *dev = to_mdev(pd->device); + struct ib_umem_chunk *chunk; + struct mthca_mr *mr; + u64 *pages; + int shift, n, len; + int i, j, k; + int err = 0; + + shift = ffs(region->page_size) - 1; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + n = 0; + list_for_each_entry(chunk, ®ion->chunk_list, list) + n += chunk->nents; + + mr->mtt = mthca_alloc_mtt(dev, n); + if (IS_ERR(mr->mtt)) { + err = PTR_ERR(mr->mtt); + goto err; + } + + pages = (u64 *) __get_free_page(GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err_mtt; + } + + i = n = 0; + + list_for_each_entry(chunk, ®ion->chunk_list, list) + for (j = 0; j < chunk->nmap; ++j) { + len = sg_dma_len(&chunk->page_list[j]) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(&chunk->page_list[j]) + + region->page_size * k; + /* + * Be friendly to WRITE_MTT command + * and leave two empty slots for the + * index and reserved fields of the + * mailbox. + */ + if (i == PAGE_SIZE / sizeof (u64) - 2) { + err = mthca_write_mtt(dev, mr->mtt, + n, pages, i); + if (err) + goto mtt_done; + n += i; + i = 0; + } + } + } + + if (i) + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); +mtt_done: + free_page((unsigned long) pages); + if (err) + goto err_mtt; + + err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base, + region->length, convert_access(acc), mr); + + if (err) + goto err_mtt; + + return &mr->ibmr; + +err_mtt: + mthca_free_mtt(dev, mr->mtt); + +err: + kfree(mr); + return ERR_PTR(err); +} + static int mthca_dereg_mr(struct ib_mr *mr) { struct mthca_mr *mmr = to_mmr(mr); @@ -674,14 +1032,22 @@ static ssize_t show_hca(struct class_device *cdev, char *buf) } } +static ssize_t show_board(struct class_device *cdev, char *buf) +{ + struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev); + return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id); +} + static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); static struct class_device_attribute *mthca_class_attributes[] = { &class_device_attr_hw_rev, &class_device_attr_fw_ver, - &class_device_attr_hca_type + &class_device_attr_hca_type, + &class_device_attr_board_id }; int mthca_register_device(struct mthca_dev *dev) @@ -690,6 +1056,8 @@ int mthca_register_device(struct mthca_dev *dev) int i; strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); + dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.node_type = IB_NODE_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; @@ -699,10 +1067,24 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.modify_port = mthca_modify_port; dev->ib_dev.query_pkey = mthca_query_pkey; dev->ib_dev.query_gid = mthca_query_gid; + dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; + dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; + dev->ib_dev.mmap = mthca_mmap_uar; dev->ib_dev.alloc_pd = mthca_alloc_pd; dev->ib_dev.dealloc_pd = mthca_dealloc_pd; dev->ib_dev.create_ah = mthca_ah_create; dev->ib_dev.destroy_ah = mthca_ah_destroy; + + if (dev->mthca_flags & MTHCA_FLAG_SRQ) { + dev->ib_dev.create_srq = mthca_create_srq; + dev->ib_dev.destroy_srq = mthca_destroy_srq; + + if (mthca_is_memfree(dev)) + dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv; + else + dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv; + } + dev->ib_dev.create_qp = mthca_create_qp; dev->ib_dev.modify_qp = mthca_modify_qp; dev->ib_dev.destroy_qp = mthca_destroy_qp; @@ -711,6 +1093,7 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.poll_cq = mthca_poll_cq; dev->ib_dev.get_dma_mr = mthca_get_dma_mr; dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; + dev->ib_dev.reg_user_mr = mthca_reg_user_mr; dev->ib_dev.dereg_mr = mthca_dereg_mr; if (dev->mthca_flags & MTHCA_FLAG_FMR) { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 619710f95a87..bcd4b01a339c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -1,5 +1,7 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,8 +37,8 @@ #ifndef MTHCA_PROVIDER_H #define MTHCA_PROVIDER_H -#include <ib_verbs.h> -#include <ib_pack.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_pack.h> #define MTHCA_MPT_FLAG_ATOMIC (1 << 14) #define MTHCA_MPT_FLAG_REMOTE_WRITE (1 << 13) @@ -49,23 +51,36 @@ struct mthca_buf_list { DECLARE_PCI_UNMAP_ADDR(mapping) }; +union mthca_buf { + struct mthca_buf_list direct; + struct mthca_buf_list *page_list; +}; + struct mthca_uar { unsigned long pfn; int index; }; +struct mthca_user_db_table; + +struct mthca_ucontext { + struct ib_ucontext ibucontext; + struct mthca_uar uar; + struct mthca_user_db_table *db_tab; +}; + +struct mthca_mtt; + struct mthca_mr { - struct ib_mr ibmr; - int order; - u32 first_seg; + struct ib_mr ibmr; + struct mthca_mtt *mtt; }; struct mthca_fmr { - struct ib_fmr ibmr; + struct ib_fmr ibmr; struct ib_fmr_attr attr; - int order; - u32 first_seg; - int maps; + struct mthca_mtt *mtt; + int maps; union { struct { struct mthca_mpt_entry __iomem *mpt; @@ -83,6 +98,7 @@ struct mthca_pd { u32 pd_num; atomic_t sqp_count; struct mthca_mr ntmr; + int privileged; }; struct mthca_eq { @@ -167,22 +183,43 @@ struct mthca_cq { int cqn; u32 cons_index; int is_direct; + int is_kernel; /* Next fields are Arbel only */ int set_ci_db_index; - u32 *set_ci_db; + __be32 *set_ci_db; int arm_db_index; - u32 *arm_db; + __be32 *arm_db; int arm_sn; - union { - struct mthca_buf_list direct; - struct mthca_buf_list *page_list; - } queue; + union mthca_buf queue; struct mthca_mr mr; wait_queue_head_t wait; }; +struct mthca_srq { + struct ib_srq ibsrq; + spinlock_t lock; + atomic_t refcount; + int srqn; + int max; + int max_gs; + int wqe_shift; + int first_free; + int last_free; + u16 counter; /* Arbel only */ + int db_index; /* Arbel only */ + __be32 *db; /* Arbel only */ + void *last; + + int is_direct; + u64 *wrid; + union mthca_buf queue; + struct mthca_mr mr; + + wait_queue_head_t wait; +}; + struct mthca_wq { spinlock_t lock; int max; @@ -195,7 +232,7 @@ struct mthca_wq { int wqe_shift; int db_index; /* Arbel only */ - u32 *db; + __be32 *db; }; struct mthca_qp { @@ -216,10 +253,7 @@ struct mthca_qp { int send_wqe_offset; u64 *wrid; - union { - struct mthca_buf_list direct; - struct mthca_buf_list *page_list; - } queue; + union mthca_buf queue; wait_queue_head_t wait; }; @@ -236,6 +270,11 @@ struct mthca_sqp { dma_addr_t header_dma; }; +static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct mthca_ucontext, ibucontext); +} + static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr) { return container_of(ibmr, struct mthca_fmr, ibmr); @@ -261,6 +300,11 @@ static inline struct mthca_cq *to_mcq(struct ib_cq *ibcq) return container_of(ibcq, struct mthca_cq, ibcq); } +static inline struct mthca_srq *to_msrq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct mthca_srq, ibsrq); +} + static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp) { return container_of(ibqp, struct mthca_qp, ibqp); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index ca73bab11a02..0164b84d4ec6 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -1,5 +1,8 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * Copyright (c) 2005 Mellanox Technologies. All rights reserved. + * Copyright (c) 2004 Voltaire, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -34,19 +37,22 @@ #include <linux/init.h> -#include <ib_verbs.h> -#include <ib_cache.h> -#include <ib_pack.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_cache.h> +#include <rdma/ib_pack.h> #include "mthca_dev.h" #include "mthca_cmd.h" #include "mthca_memfree.h" +#include "mthca_wqe.h" enum { MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, MTHCA_ACK_REQ_FREQ = 10, MTHCA_FLIGHT_LIMIT = 9, - MTHCA_UD_HEADER_SIZE = 72 /* largest UD header possible */ + MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */ + MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */ + MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */ }; enum { @@ -92,62 +98,62 @@ enum { }; struct mthca_qp_path { - u32 port_pkey; - u8 rnr_retry; - u8 g_mylmc; - u16 rlid; - u8 ackto; - u8 mgid_index; - u8 static_rate; - u8 hop_limit; - u32 sl_tclass_flowlabel; - u8 rgid[16]; + __be32 port_pkey; + u8 rnr_retry; + u8 g_mylmc; + __be16 rlid; + u8 ackto; + u8 mgid_index; + u8 static_rate; + u8 hop_limit; + __be32 sl_tclass_flowlabel; + u8 rgid[16]; } __attribute__((packed)); struct mthca_qp_context { - u32 flags; - u32 tavor_sched_queue; /* Reserved on Arbel */ - u8 mtu_msgmax; - u8 rq_size_stride; /* Reserved on Tavor */ - u8 sq_size_stride; /* Reserved on Tavor */ - u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */ - u32 usr_page; - u32 local_qpn; - u32 remote_qpn; - u32 reserved1[2]; + __be32 flags; + __be32 tavor_sched_queue; /* Reserved on Arbel */ + u8 mtu_msgmax; + u8 rq_size_stride; /* Reserved on Tavor */ + u8 sq_size_stride; /* Reserved on Tavor */ + u8 rlkey_arbel_sched_queue; /* Reserved on Tavor */ + __be32 usr_page; + __be32 local_qpn; + __be32 remote_qpn; + u32 reserved1[2]; struct mthca_qp_path pri_path; struct mthca_qp_path alt_path; - u32 rdd; - u32 pd; - u32 wqe_base; - u32 wqe_lkey; - u32 params1; - u32 reserved2; - u32 next_send_psn; - u32 cqn_snd; - u32 snd_wqe_base_l; /* Next send WQE on Tavor */ - u32 snd_db_index; /* (debugging only entries) */ - u32 last_acked_psn; - u32 ssn; - u32 params2; - u32 rnr_nextrecvpsn; - u32 ra_buff_indx; - u32 cqn_rcv; - u32 rcv_wqe_base_l; /* Next recv WQE on Tavor */ - u32 rcv_db_index; /* (debugging only entries) */ - u32 qkey; - u32 srqn; - u32 rmsn; - u16 rq_wqe_counter; /* reserved on Tavor */ - u16 sq_wqe_counter; /* reserved on Tavor */ - u32 reserved3[18]; + __be32 rdd; + __be32 pd; + __be32 wqe_base; + __be32 wqe_lkey; + __be32 params1; + __be32 reserved2; + __be32 next_send_psn; + __be32 cqn_snd; + __be32 snd_wqe_base_l; /* Next send WQE on Tavor */ + __be32 snd_db_index; /* (debugging only entries) */ + __be32 last_acked_psn; + __be32 ssn; + __be32 params2; + __be32 rnr_nextrecvpsn; + __be32 ra_buff_indx; + __be32 cqn_rcv; + __be32 rcv_wqe_base_l; /* Next recv WQE on Tavor */ + __be32 rcv_db_index; /* (debugging only entries) */ + __be32 qkey; + __be32 srqn; + __be32 rmsn; + __be16 rq_wqe_counter; /* reserved on Tavor */ + __be16 sq_wqe_counter; /* reserved on Tavor */ + u32 reserved3[18]; } __attribute__((packed)); struct mthca_qp_param { - u32 opt_param_mask; - u32 reserved1; + __be32 opt_param_mask; + u32 reserved1; struct mthca_qp_context context; - u32 reserved2[62]; + u32 reserved2[62]; } __attribute__((packed)); enum { @@ -170,80 +176,6 @@ enum { MTHCA_QP_OPTPAR_SCHED_QUEUE = 1 << 16 }; -enum { - MTHCA_NEXT_DBD = 1 << 7, - MTHCA_NEXT_FENCE = 1 << 6, - MTHCA_NEXT_CQ_UPDATE = 1 << 3, - MTHCA_NEXT_EVENT_GEN = 1 << 2, - MTHCA_NEXT_SOLICIT = 1 << 1, - - MTHCA_MLX_VL15 = 1 << 17, - MTHCA_MLX_SLR = 1 << 16 -}; - -enum { - MTHCA_INVAL_LKEY = 0x100 -}; - -struct mthca_next_seg { - u32 nda_op; /* [31:6] next WQE [4:0] next opcode */ - u32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */ - u32 flags; /* [3] CQ [2] Event [1] Solicit */ - u32 imm; /* immediate data */ -}; - -struct mthca_tavor_ud_seg { - u32 reserved1; - u32 lkey; - u64 av_addr; - u32 reserved2[4]; - u32 dqpn; - u32 qkey; - u32 reserved3[2]; -}; - -struct mthca_arbel_ud_seg { - u32 av[8]; - u32 dqpn; - u32 qkey; - u32 reserved[2]; -}; - -struct mthca_bind_seg { - u32 flags; /* [31] Atomic [30] rem write [29] rem read */ - u32 reserved; - u32 new_rkey; - u32 lkey; - u64 addr; - u64 length; -}; - -struct mthca_raddr_seg { - u64 raddr; - u32 rkey; - u32 reserved; -}; - -struct mthca_atomic_seg { - u64 swap_add; - u64 compare; -}; - -struct mthca_data_seg { - u32 byte_count; - u32 lkey; - u64 addr; -}; - -struct mthca_mlx_seg { - u32 nda_op; - u32 nds; - u32 flags; /* [17] VL15 [16] SLR [14:12] static rate - [11:8] SL [3] C [2] E */ - u16 rlid; - u16 vcrc; -}; - static const u8 mthca_opcode[] = { [IB_WR_SEND] = MTHCA_OPCODE_SEND, [IB_WR_SEND_WITH_IMM] = MTHCA_OPCODE_SEND_IMM, @@ -357,6 +289,9 @@ static const struct { [UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), + [UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), @@ -378,6 +313,9 @@ static const struct { [UD] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY), + [UC] = (IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_ACCESS_FLAGS), [RC] = (IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS), @@ -388,6 +326,11 @@ static const struct { [IB_QPS_RTR] = { .trans = MTHCA_TRANS_INIT2RTR, .req_param = { + [UC] = (IB_QP_AV | + IB_QP_PATH_MTU | + IB_QP_DEST_QPN | + IB_QP_RQ_PSN | + IB_QP_MAX_DEST_RD_ATOMIC), [RC] = (IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | @@ -398,6 +341,9 @@ static const struct { .opt_param = { [UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), + [UC] = (IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX), [RC] = (IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX), @@ -413,6 +359,8 @@ static const struct { .trans = MTHCA_TRANS_RTR2RTS, .req_param = { [UD] = IB_QP_SQ_PSN, + [UC] = (IB_QP_SQ_PSN | + IB_QP_MAX_QP_RD_ATOMIC), [RC] = (IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | @@ -423,6 +371,11 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [UC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | @@ -442,6 +395,9 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [UC] = (IB_QP_ACCESS_FLAGS | + IB_QP_ALT_PATH | + IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_ACCESS_FLAGS | IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE | @@ -462,6 +418,10 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [UC] = (IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_CUR_STATE | IB_QP_ALT_PATH | IB_QP_ACCESS_FLAGS | @@ -476,6 +436,14 @@ static const struct { .opt_param = { [UD] = (IB_QP_PKEY_INDEX | IB_QP_QKEY), + [UC] = (IB_QP_AV | + IB_QP_MAX_QP_RD_ATOMIC | + IB_QP_MAX_DEST_RD_ATOMIC | + IB_QP_CUR_STATE | + IB_QP_ALT_PATH | + IB_QP_ACCESS_FLAGS | + IB_QP_PKEY_INDEX | + IB_QP_PATH_MIG_STATE), [RC] = (IB_QP_AV | IB_QP_TIMEOUT | IB_QP_RETRY_CNT | @@ -501,6 +469,7 @@ static const struct { .opt_param = { [UD] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [UC] = (IB_QP_CUR_STATE), [RC] = (IB_QP_CUR_STATE | IB_QP_MIN_RNR_TIMER), [MLX] = (IB_QP_CUR_STATE | @@ -533,12 +502,11 @@ static void init_port(struct mthca_dev *dev, int port) memset(¶m, 0, sizeof param); - param.enable_1x = 1; - param.enable_4x = 1; - param.vl_cap = dev->limits.vl_cap; - param.mtu_cap = dev->limits.mtu_cap; - param.gid_cap = dev->limits.gid_table_len; - param.pkey_cap = dev->limits.pkey_table_len; + param.port_width = dev->limits.port_width_cap; + param.vl_cap = dev->limits.vl_cap; + param.mtu_cap = dev->limits.mtu_cap; + param.gid_cap = dev->limits.gid_table_len; + param.pkey_cap = dev->limits.pkey_table_len; err = mthca_INIT_IB(dev, ¶m, port, &status); if (err) @@ -552,7 +520,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) struct mthca_dev *dev = to_mdev(ibqp->device); struct mthca_qp *qp = to_mqp(ibqp); enum ib_qp_state cur_state, new_state; - void *mailbox = NULL; + struct mthca_mailbox *mailbox; struct mthca_qp_param *qp_param; struct mthca_qp_context *qp_context; u32 req_param, opt_param; @@ -609,10 +577,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) return -EINVAL; } - mailbox = kmalloc(sizeof (*qp_param) + MTHCA_CMD_MAILBOX_EXTRA, GFP_KERNEL); - if (!mailbox) - return -ENOMEM; - qp_param = MAILBOX_ALIGN(mailbox); + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + qp_param = mailbox->buf; qp_context = &qp_param->context; memset(qp_param, 0, sizeof *qp_param); @@ -644,15 +612,22 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context->mtu_msgmax = (attr->path_mtu << 5) | 31; if (mthca_is_memfree(dev)) { - qp_context->rq_size_stride = - ((ffs(qp->rq.max) - 1) << 3) | (qp->rq.wqe_shift - 4); - qp_context->sq_size_stride = - ((ffs(qp->sq.max) - 1) << 3) | (qp->sq.wqe_shift - 4); + if (qp->rq.max) + qp_context->rq_size_stride = long_log2(qp->rq.max) << 3; + qp_context->rq_size_stride |= qp->rq.wqe_shift - 4; + + if (qp->sq.max) + qp_context->sq_size_stride = long_log2(qp->sq.max) << 3; + qp_context->sq_size_stride |= qp->sq.wqe_shift - 4; } /* leave arbel_sched_queue as 0 */ - qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); + if (qp->ibqp.uobject) + qp_context->usr_page = + cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index); + else + qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); qp_context->local_qpn = cpu_to_be32(qp->qpn); if (attr_mask & IB_QP_DEST_QPN) { qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num); @@ -683,7 +658,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) if (attr_mask & IB_QP_AV) { qp_context->pri_path.g_mylmc = attr->ah_attr.src_path_bits & 0x7f; qp_context->pri_path.rlid = cpu_to_be16(attr->ah_attr.dlid); - qp_context->pri_path.static_rate = (!!attr->ah_attr.static_rate) << 3; + qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate; if (attr->ah_attr.ah_flags & IB_AH_GRH) { qp_context->pri_path.g_mylmc |= 1 << 7; qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index; @@ -724,9 +699,9 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RETRY_COUNT); } - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { - qp_context->params1 |= cpu_to_be32(min(attr->max_dest_rd_atomic ? - ffs(attr->max_dest_rd_atomic) - 1 : 0, + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ? + ffs(attr->max_rd_atomic) - 1 : 0, 7) << 21); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX); } @@ -764,10 +739,10 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp->atomic_rd_en = attr->qp_access_flags; } - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { u8 rra_max; - if (qp->resp_depth && !attr->max_rd_atomic) { + if (qp->resp_depth && !attr->max_dest_rd_atomic) { /* * Lowering our responder resources to zero. * Turn off RDMA/atomics as responder. @@ -778,7 +753,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) MTHCA_QP_OPTPAR_RAE); } - if (!qp->resp_depth && attr->max_rd_atomic) { + if (!qp->resp_depth && attr->max_dest_rd_atomic) { /* * Increasing our responder resources from * zero. Turn on RDMA/atomics as appropriate. @@ -799,7 +774,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } for (rra_max = 0; - 1 << rra_max < attr->max_rd_atomic && + 1 << rra_max < attr->max_dest_rd_atomic && rra_max < dev->qp_table.rdb_shift; ++rra_max) ; /* nothing */ @@ -807,11 +782,14 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_context->params2 |= cpu_to_be32(rra_max << 21); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX); - qp->resp_depth = attr->max_rd_atomic; + qp->resp_depth = attr->max_dest_rd_atomic; } qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC); + if (ibqp->srq) + qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RIC); + if (attr_mask & IB_QP_MIN_RNR_TIMER) { qp_context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24); qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_TIMEOUT); @@ -834,8 +812,12 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_Q_KEY); } + if (ibqp->srq) + qp_context->srqn = cpu_to_be32(1 << 24 | + to_msrq(ibqp->srq)->srqn); + err = mthca_MODIFY_QP(dev, state_table[cur_state][new_state].trans, - qp->qpn, 0, qp_param, 0, &status); + qp->qpn, 0, mailbox, 0, &status); if (status) { mthca_warn(dev, "modify QP %d returned status %02x.\n", state_table[cur_state][new_state].trans, status); @@ -845,7 +827,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) if (!err) qp->state = new_state; - kfree(mailbox); + mthca_free_mailbox(dev, mailbox); if (is_sqp(dev, qp)) store_attrs(to_msqp(qp), attr, attr_mask); @@ -881,10 +863,6 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, struct mthca_qp *qp) { int size; - int i; - int npages, shift; - dma_addr_t t; - u64 *dma_list = NULL; int err = -ENOMEM; size = sizeof (struct mthca_next_seg) + @@ -917,6 +895,15 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift, 1 << qp->sq.wqe_shift); + + /* + * If this is a userspace QP, we don't actually have to + * allocate anything. All we need is to calculate the WQE + * sizes and the send_wqe_offset, so we're done now. + */ + if (pd->ibpd.uobject) + return 0; + size = PAGE_ALIGN(qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift)); @@ -925,100 +912,31 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, if (!qp->wrid) goto err_out; - if (size <= MTHCA_MAX_DIRECT_QP_SIZE) { - qp->is_direct = 1; - npages = 1; - shift = get_order(size) + PAGE_SHIFT; - - if (0) - mthca_dbg(dev, "Creating direct QP of size %d (shift %d)\n", - size, shift); - - qp->queue.direct.buf = pci_alloc_consistent(dev->pdev, size, &t); - if (!qp->queue.direct.buf) - goto err_out; - - pci_unmap_addr_set(&qp->queue.direct, mapping, t); - - memset(qp->queue.direct.buf, 0, size); - - while (t & ((1 << shift) - 1)) { - --shift; - npages *= 2; - } - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - goto err_out_free; - - for (i = 0; i < npages; ++i) - dma_list[i] = t + i * (1 << shift); - } else { - qp->is_direct = 0; - npages = size / PAGE_SIZE; - shift = PAGE_SHIFT; - - if (0) - mthca_dbg(dev, "Creating indirect QP with %d pages\n", npages); - - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); - if (!dma_list) - goto err_out; - - qp->queue.page_list = kmalloc(npages * - sizeof *qp->queue.page_list, - GFP_KERNEL); - if (!qp->queue.page_list) - goto err_out; - - for (i = 0; i < npages; ++i) { - qp->queue.page_list[i].buf = - pci_alloc_consistent(dev->pdev, PAGE_SIZE, &t); - if (!qp->queue.page_list[i].buf) - goto err_out_free; - - memset(qp->queue.page_list[i].buf, 0, PAGE_SIZE); - - pci_unmap_addr_set(&qp->queue.page_list[i], mapping, t); - dma_list[i] = t; - } - } - - err = mthca_mr_alloc_phys(dev, pd->pd_num, dma_list, shift, - npages, 0, size, - MTHCA_MPT_FLAG_LOCAL_READ, - &qp->mr); + err = mthca_buf_alloc(dev, size, MTHCA_MAX_DIRECT_QP_SIZE, + &qp->queue, &qp->is_direct, pd, 0, &qp->mr); if (err) - goto err_out_free; + goto err_out; - kfree(dma_list); return 0; - err_out_free: - if (qp->is_direct) { - pci_free_consistent(dev->pdev, size, - qp->queue.direct.buf, - pci_unmap_addr(&qp->queue.direct, mapping)); - } else - for (i = 0; i < npages; ++i) { - if (qp->queue.page_list[i].buf) - pci_free_consistent(dev->pdev, PAGE_SIZE, - qp->queue.page_list[i].buf, - pci_unmap_addr(&qp->queue.page_list[i], - mapping)); - - } - - err_out: +err_out: kfree(qp->wrid); - kfree(dma_list); return err; } -static int mthca_alloc_memfree(struct mthca_dev *dev, +static void mthca_free_wqe_buf(struct mthca_dev *dev, struct mthca_qp *qp) { - int ret = 0; + mthca_buf_free(dev, PAGE_ALIGN(qp->send_wqe_offset + + (qp->sq.max << qp->sq.wqe_shift)), + &qp->queue, qp->is_direct, &qp->mr); + kfree(qp->wrid); +} + +static int mthca_map_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + int ret; if (mthca_is_memfree(dev)) { ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn); @@ -1029,35 +947,15 @@ static int mthca_alloc_memfree(struct mthca_dev *dev, if (ret) goto err_qpc; - ret = mthca_table_get(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - if (ret) - goto err_eqpc; + ret = mthca_table_get(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + if (ret) + goto err_eqpc; - qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, - qp->qpn, &qp->rq.db); - if (qp->rq.db_index < 0) { - ret = -ENOMEM; - goto err_rdb; - } - - qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, - qp->qpn, &qp->sq.db); - if (qp->sq.db_index < 0) { - ret = -ENOMEM; - goto err_rq_db; - } } return 0; -err_rq_db: - mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); - -err_rdb: - mthca_table_put(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - err_eqpc: mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); @@ -1067,16 +965,41 @@ err_qpc: return ret; } +static void mthca_unmap_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + mthca_table_put(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); + mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); +} + +static int mthca_alloc_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + int ret = 0; + + if (mthca_is_memfree(dev)) { + qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, + qp->qpn, &qp->rq.db); + if (qp->rq.db_index < 0) + return ret; + + qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, + qp->qpn, &qp->sq.db); + if (qp->sq.db_index < 0) + mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); + } + + return ret; +} + static void mthca_free_memfree(struct mthca_dev *dev, struct mthca_qp *qp) { if (mthca_is_memfree(dev)) { mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index); mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); - mthca_table_put(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); - mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); } } @@ -1108,13 +1031,28 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, mthca_wq_init(&qp->sq); mthca_wq_init(&qp->rq); - ret = mthca_alloc_memfree(dev, qp); + ret = mthca_map_memfree(dev, qp); if (ret) return ret; ret = mthca_alloc_wqe_buf(dev, pd, qp); if (ret) { - mthca_free_memfree(dev, qp); + mthca_unmap_memfree(dev, qp); + return ret; + } + + /* + * If this is a userspace QP, we're done now. The doorbells + * will be allocated and buffers will be initialized in + * userspace. + */ + if (pd->ibpd.uobject) + return 0; + + ret = mthca_alloc_memfree(dev, qp); + if (ret) { + mthca_free_wqe_buf(dev, qp); + mthca_unmap_memfree(dev, qp); return ret; } @@ -1147,22 +1085,39 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, return 0; } -static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp) +static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, + struct mthca_qp *qp) { - int i; - - if (!mthca_is_memfree(dev)) - return; + /* Sanity check QP size before proceeding */ + if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 || + cap->max_send_sge > 64 || cap->max_recv_sge > 64) + return -EINVAL; - for (i = 0; 1 << i < qp->rq.max; ++i) - ; /* nothing */ + if (mthca_is_memfree(dev)) { + qp->rq.max = cap->max_recv_wr ? + roundup_pow_of_two(cap->max_recv_wr) : 0; + qp->sq.max = cap->max_send_wr ? + roundup_pow_of_two(cap->max_send_wr) : 0; + } else { + qp->rq.max = cap->max_recv_wr; + qp->sq.max = cap->max_send_wr; + } - qp->rq.max = 1 << i; + qp->rq.max_gs = cap->max_recv_sge; + qp->sq.max_gs = max_t(int, cap->max_send_sge, + ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE, + MTHCA_INLINE_CHUNK_SIZE) / + sizeof (struct mthca_data_seg)); - for (i = 0; 1 << i < qp->sq.max; ++i) - ; /* nothing */ + /* + * For MLX transport we need 2 extra S/G entries: + * one for the header and one for the checksum at the end + */ + if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) || + qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg) + return -EINVAL; - qp->sq.max = 1 << i; + return 0; } int mthca_alloc_qp(struct mthca_dev *dev, @@ -1171,11 +1126,14 @@ int mthca_alloc_qp(struct mthca_dev *dev, struct mthca_cq *recv_cq, enum ib_qp_type type, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, struct mthca_qp *qp) { int err; - mthca_align_qp_size(dev, qp); + err = mthca_set_qp_size(dev, cap, qp); + if (err) + return err; switch (type) { case IB_QPT_RC: qp->transport = RC; break; @@ -1208,14 +1166,17 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, int qpn, int port, struct mthca_sqp *sqp) { - int err = 0; u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; + int err; - mthca_align_qp_size(dev, &sqp->qp); + err = mthca_set_qp_size(dev, cap, &sqp->qp); + if (err) + return err; sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, @@ -1274,8 +1235,6 @@ void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp) { u8 status; - int size; - int i; struct mthca_cq *send_cq; struct mthca_cq *recv_cq; @@ -1305,31 +1264,23 @@ void mthca_free_qp(struct mthca_dev *dev, if (qp->state != IB_QPS_RESET) mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status); - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn); - if (qp->ibqp.send_cq != qp->ibqp.recv_cq) - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn); - - mthca_free_mr(dev, &qp->mr); - - size = PAGE_ALIGN(qp->send_wqe_offset + - (qp->sq.max << qp->sq.wqe_shift)); + /* + * If this is a userspace QP, the buffers, MR, CQs and so on + * will be cleaned up in userspace, so all we have to do is + * unref the mem-free tables and free the QPN in our table. + */ + if (!qp->ibqp.uobject) { + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); + if (qp->ibqp.send_cq != qp->ibqp.recv_cq) + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn, + qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL); - if (qp->is_direct) { - pci_free_consistent(dev->pdev, size, - qp->queue.direct.buf, - pci_unmap_addr(&qp->queue.direct, mapping)); - } else { - for (i = 0; i < size / PAGE_SIZE; ++i) { - pci_free_consistent(dev->pdev, PAGE_SIZE, - qp->queue.page_list[i].buf, - pci_unmap_addr(&qp->queue.page_list[i], - mapping)); - } + mthca_free_memfree(dev, qp); + mthca_free_wqe_buf(dev, qp); } - kfree(qp->wrid); - - mthca_free_memfree(dev, qp); + mthca_unmap_memfree(dev, qp); if (is_sqp(dev, qp)) { atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); @@ -1349,6 +1300,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, { int header_size; int err; + u16 pkey; ib_ud_header_init(256, /* assume a MAD */ sqp->ud_header.grh_present, @@ -1359,8 +1311,8 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, return err; mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | - (sqp->ud_header.lrh.destination_lid == 0xffff ? - MTHCA_MLX_SLR : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); mlx->rlid = sqp->ud_header.lrh.destination_lid; mlx->vcrc = 0; @@ -1380,18 +1332,16 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, } sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; - if (sqp->ud_header.lrh.destination_lid == 0xffff) - sqp->ud_header.lrh.source_lid = 0xffff; + if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) + sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); if (!sqp->qp.ibqp.qp_num) ib_get_cached_pkey(&dev->ib_dev, sqp->port, - sqp->pkey_index, - &sqp->ud_header.bth.pkey); + sqp->pkey_index, &pkey); else ib_get_cached_pkey(&dev->ib_dev, sqp->port, - wr->wr.ud.pkey_index, - &sqp->ud_header.bth.pkey); - cpu_to_be16s(&sqp->ud_header.bth.pkey); + wr->wr.ud.pkey_index, &pkey); + sqp->ud_header.bth.pkey = cpu_to_be16(pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ? @@ -1529,6 +1479,26 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, break; + case UC: + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cpu_to_be64(wr->wr.rdma.remote_addr); + ((struct mthca_raddr_seg *) wqe)->rkey = + cpu_to_be32(wr->wr.rdma.rkey); + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + case UD: ((struct mthca_tavor_ud_seg *) wqe)->lkey = cpu_to_be32(to_mah(wr->wr.ud.ah)->key); @@ -1614,7 +1584,7 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, out: if (likely(nreq)) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) + qp->send_wqe_offset) | f0 | op0); @@ -1715,7 +1685,7 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, out: if (likely(nreq)) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0); doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq); @@ -1814,9 +1784,29 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, sizeof (struct mthca_atomic_seg); break; + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + ((struct mthca_raddr_seg *) wqe)->raddr = + cpu_to_be64(wr->wr.rdma.remote_addr); + ((struct mthca_raddr_seg *) wqe)->rkey = + cpu_to_be32(wr->wr.rdma.rkey); + ((struct mthca_raddr_seg *) wqe)->reserved = 0; + wqe += sizeof (struct mthca_raddr_seg); + size += sizeof (struct mthca_raddr_seg) / 16; + break; + + default: + /* No extra segments required for sends */ + break; + } + + break; + + case UC: + switch (wr->opcode) { case IB_WR_RDMA_WRITE: case IB_WR_RDMA_WRITE_WITH_IMM: - case IB_WR_RDMA_READ: ((struct mthca_raddr_seg *) wqe)->raddr = cpu_to_be64(wr->wr.rdma.remote_addr); ((struct mthca_raddr_seg *) wqe)->rkey = @@ -1916,7 +1906,7 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, out: if (likely(nreq)) { - u32 doorbell[2]; + __be32 doorbell[2]; doorbell[0] = cpu_to_be32((nreq << 24) | ((qp->sq.head & 0xffff) << 8) | @@ -2026,19 +2016,25 @@ out: } int mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send, - int index, int *dbd, u32 *new_wqe) + int index, int *dbd, __be32 *new_wqe) { struct mthca_next_seg *next; + /* + * For SRQs, all WQEs generate a CQE, so we're always at the + * end of the doorbell chain. + */ + if (qp->ibqp.srq) { + *new_wqe = 0; + return 0; + } + if (is_send) next = get_send_wqe(qp, index); else next = get_recv_wqe(qp, index); - if (mthca_is_memfree(dev)) - *dbd = 1; - else - *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD)); + *dbd = !!(next->ee_nds & cpu_to_be32(MTHCA_NEXT_DBD)); if (next->ee_nds & cpu_to_be32(0x3f)) *new_wqe = (next->nda_op & cpu_to_be32(~0x3f)) | (next->ee_nds & cpu_to_be32(0x3f)); diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c new file mode 100644 index 000000000000..75cd2d84ef12 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -0,0 +1,591 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $ + */ + +#include "mthca_dev.h" +#include "mthca_cmd.h" +#include "mthca_memfree.h" +#include "mthca_wqe.h" + +enum { + MTHCA_MAX_DIRECT_SRQ_SIZE = 4 * PAGE_SIZE +}; + +struct mthca_tavor_srq_context { + __be64 wqe_base_ds; /* low 6 bits is descriptor size */ + __be32 state_pd; + __be32 lkey; + __be32 uar; + __be32 wqe_cnt; + u32 reserved[2]; +}; + +struct mthca_arbel_srq_context { + __be32 state_logsize_srqn; + __be32 lkey; + __be32 db_index; + __be32 logstride_usrpage; + __be64 wqe_base; + __be32 eq_pd; + __be16 limit_watermark; + __be16 wqe_cnt; + u16 reserved1; + __be16 wqe_counter; + u32 reserved2[3]; +}; + +static void *get_wqe(struct mthca_srq *srq, int n) +{ + if (srq->is_direct) + return srq->queue.direct.buf + (n << srq->wqe_shift); + else + return srq->queue.page_list[(n << srq->wqe_shift) >> PAGE_SHIFT].buf + + ((n << srq->wqe_shift) & (PAGE_SIZE - 1)); +} + +/* + * Return a pointer to the location within a WQE that we're using as a + * link when the WQE is in the free list. We use an offset of 4 + * because in the Tavor case, posting a WQE may overwrite the first + * four bytes of the previous WQE. The offset avoids corrupting our + * free list if the WQE has already completed and been put on the free + * list when we post the next WQE. + */ +static inline int *wqe_to_link(void *wqe) +{ + return (int *) (wqe + 4); +} + +static void mthca_tavor_init_srq_context(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_srq *srq, + struct mthca_tavor_srq_context *context) +{ + memset(context, 0, sizeof *context); + + context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4)); + context->state_pd = cpu_to_be32(pd->pd_num); + context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); + + if (pd->ibpd.uobject) + context->uar = + cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); + else + context->uar = cpu_to_be32(dev->driver_uar.index); +} + +static void mthca_arbel_init_srq_context(struct mthca_dev *dev, + struct mthca_pd *pd, + struct mthca_srq *srq, + struct mthca_arbel_srq_context *context) +{ + int logsize; + + memset(context, 0, sizeof *context); + + logsize = long_log2(srq->max) + srq->wqe_shift; + context->state_logsize_srqn = cpu_to_be32(logsize << 24 | srq->srqn); + context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); + context->db_index = cpu_to_be32(srq->db_index); + context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29); + if (pd->ibpd.uobject) + context->logstride_usrpage |= + cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); + else + context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index); + context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num); +} + +static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq) +{ + mthca_buf_free(dev, srq->max << srq->wqe_shift, &srq->queue, + srq->is_direct, &srq->mr); + kfree(srq->wrid); +} + +static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, + struct mthca_srq *srq) +{ + struct mthca_data_seg *scatter; + void *wqe; + int err; + int i; + + if (pd->ibpd.uobject) + return 0; + + srq->wrid = kmalloc(srq->max * sizeof (u64), GFP_KERNEL); + if (!srq->wrid) + return -ENOMEM; + + err = mthca_buf_alloc(dev, srq->max << srq->wqe_shift, + MTHCA_MAX_DIRECT_SRQ_SIZE, + &srq->queue, &srq->is_direct, pd, 1, &srq->mr); + if (err) { + kfree(srq->wrid); + return err; + } + + /* + * Now initialize the SRQ buffer so that all of the WQEs are + * linked into the list of free WQEs. In addition, set the + * scatter list L_Keys to the sentry value of 0x100. + */ + for (i = 0; i < srq->max; ++i) { + wqe = get_wqe(srq, i); + + *wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1; + + for (scatter = wqe + sizeof (struct mthca_next_seg); + (void *) scatter < wqe + (1 << srq->wqe_shift); + ++scatter) + scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + } + + return 0; +} + +int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, + struct ib_srq_attr *attr, struct mthca_srq *srq) +{ + struct mthca_mailbox *mailbox; + u8 status; + int ds; + int err; + + /* Sanity check SRQ size before proceeding */ + if (attr->max_wr > 16 << 20 || attr->max_sge > 64) + return -EINVAL; + + srq->max = attr->max_wr; + srq->max_gs = attr->max_sge; + srq->last = NULL; + srq->counter = 0; + + if (mthca_is_memfree(dev)) + srq->max = roundup_pow_of_two(srq->max + 1); + + ds = min(64UL, + roundup_pow_of_two(sizeof (struct mthca_next_seg) + + srq->max_gs * sizeof (struct mthca_data_seg))); + srq->wqe_shift = long_log2(ds); + + srq->srqn = mthca_alloc(&dev->srq_table.alloc); + if (srq->srqn == -1) + return -ENOMEM; + + if (mthca_is_memfree(dev)) { + err = mthca_table_get(dev, dev->srq_table.table, srq->srqn); + if (err) + goto err_out; + + if (!pd->ibpd.uobject) { + srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ, + srq->srqn, &srq->db); + if (srq->db_index < 0) { + err = -ENOMEM; + goto err_out_icm; + } + } + } + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_out_db; + } + + err = mthca_alloc_srq_buf(dev, pd, srq); + if (err) + goto err_out_mailbox; + + spin_lock_init(&srq->lock); + atomic_set(&srq->refcount, 1); + init_waitqueue_head(&srq->wait); + + if (mthca_is_memfree(dev)) + mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); + else + mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); + + err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn, &status); + + if (err) { + mthca_warn(dev, "SW2HW_SRQ failed (%d)\n", err); + goto err_out_free_buf; + } + if (status) { + mthca_warn(dev, "SW2HW_SRQ returned status 0x%02x\n", + status); + err = -EINVAL; + goto err_out_free_buf; + } + + spin_lock_irq(&dev->srq_table.lock); + if (mthca_array_set(&dev->srq_table.srq, + srq->srqn & (dev->limits.num_srqs - 1), + srq)) { + spin_unlock_irq(&dev->srq_table.lock); + goto err_out_free_srq; + } + spin_unlock_irq(&dev->srq_table.lock); + + mthca_free_mailbox(dev, mailbox); + + srq->first_free = 0; + srq->last_free = srq->max - 1; + + return 0; + +err_out_free_srq: + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); + if (err) + mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); + else if (status) + mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); + +err_out_free_buf: + if (!pd->ibpd.uobject) + mthca_free_srq_buf(dev, srq); + +err_out_mailbox: + mthca_free_mailbox(dev, mailbox); + +err_out_db: + if (!pd->ibpd.uobject && mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); + +err_out_icm: + mthca_table_put(dev, dev->srq_table.table, srq->srqn); + +err_out: + mthca_free(&dev->srq_table.alloc, srq->srqn); + + return err; +} + +void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq) +{ + struct mthca_mailbox *mailbox; + int err; + u8 status; + + mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); + if (IS_ERR(mailbox)) { + mthca_warn(dev, "No memory for mailbox to free SRQ.\n"); + return; + } + + err = mthca_HW2SW_SRQ(dev, mailbox, srq->srqn, &status); + if (err) + mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); + else if (status) + mthca_warn(dev, "HW2SW_SRQ returned status 0x%02x\n", status); + + spin_lock_irq(&dev->srq_table.lock); + mthca_array_clear(&dev->srq_table.srq, + srq->srqn & (dev->limits.num_srqs - 1)); + spin_unlock_irq(&dev->srq_table.lock); + + atomic_dec(&srq->refcount); + wait_event(srq->wait, !atomic_read(&srq->refcount)); + + if (!srq->ibsrq.uobject) { + mthca_free_srq_buf(dev, srq); + if (mthca_is_memfree(dev)) + mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); + } + + mthca_table_put(dev, dev->srq_table.table, srq->srqn); + mthca_free(&dev->srq_table.alloc, srq->srqn); + mthca_free_mailbox(dev, mailbox); +} + +void mthca_srq_event(struct mthca_dev *dev, u32 srqn, + enum ib_event_type event_type) +{ + struct mthca_srq *srq; + struct ib_event event; + + spin_lock(&dev->srq_table.lock); + srq = mthca_array_get(&dev->srq_table.srq, srqn & (dev->limits.num_srqs - 1)); + if (srq) + atomic_inc(&srq->refcount); + spin_unlock(&dev->srq_table.lock); + + if (!srq) { + mthca_warn(dev, "Async event for bogus SRQ %08x\n", srqn); + return; + } + + if (!srq->ibsrq.event_handler) + goto out; + + event.device = &dev->ib_dev; + event.event = event_type; + event.element.srq = &srq->ibsrq; + srq->ibsrq.event_handler(&event, srq->ibsrq.srq_context); + +out: + if (atomic_dec_and_test(&srq->refcount)) + wake_up(&srq->wait); +} + +/* + * This function must be called with IRQs disabled. + */ +void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) +{ + int ind; + + ind = wqe_addr >> srq->wqe_shift; + + spin_lock(&srq->lock); + + if (likely(srq->first_free >= 0)) + *wqe_to_link(get_wqe(srq, srq->last_free)) = ind; + else + srq->first_free = ind; + + *wqe_to_link(get_wqe(srq, ind)) = -1; + srq->last_free = ind; + + spin_unlock(&srq->lock); +} + +int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + unsigned long flags; + int err = 0; + int first_ind; + int ind; + int next_ind; + int nreq; + int i; + void *wqe; + void *prev_wqe; + + spin_lock_irqsave(&srq->lock, flags); + + first_ind = srq->first_free; + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + ind = srq->first_free; + + if (ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + return nreq; + } + + wqe = get_wqe(srq, ind); + next_ind = *wqe_to_link(wqe); + prev_wqe = srq->last; + srq->last = wqe; + + ((struct mthca_next_seg *) wqe)->nda_op = 0; + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + /* flags field will always remain 0 */ + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely(wr->num_sge > srq->max_gs)) { + err = -EINVAL; + *bad_wr = wr; + srq->last = prev_wqe; + return nreq; + } + + for (i = 0; i < wr->num_sge; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cpu_to_be32(wr->sg_list[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cpu_to_be32(wr->sg_list[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cpu_to_be64(wr->sg_list[i].addr); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < srq->max_gs) { + ((struct mthca_data_seg *) wqe)->byte_count = 0; + ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->addr = 0; + } + + if (likely(prev_wqe)) { + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cpu_to_be32((ind << srq->wqe_shift) | 1); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32(MTHCA_NEXT_DBD); + } + + srq->wrid[ind] = wr->wr_id; + srq->first_free = next_ind; + } + + return nreq; + + if (likely(nreq)) { + __be32 doorbell[2]; + + doorbell[0] = cpu_to_be32(first_ind << srq->wqe_shift); + doorbell[1] = cpu_to_be32((srq->srqn << 8) | nreq); + + /* + * Make sure that descriptors are written before + * doorbell is rung. + */ + wmb(); + + mthca_write64(doorbell, + dev->kar + MTHCA_RECEIVE_DOORBELL, + MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock)); + } + + spin_unlock_irqrestore(&srq->lock, flags); + return err; +} + +int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct mthca_dev *dev = to_mdev(ibsrq->device); + struct mthca_srq *srq = to_msrq(ibsrq); + unsigned long flags; + int err = 0; + int ind; + int next_ind; + int nreq; + int i; + void *wqe; + + spin_lock_irqsave(&srq->lock, flags); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + ind = srq->first_free; + + if (ind < 0) { + mthca_err(dev, "SRQ %06x full\n", srq->srqn); + err = -ENOMEM; + *bad_wr = wr; + return nreq; + } + + wqe = get_wqe(srq, ind); + next_ind = *wqe_to_link(wqe); + + ((struct mthca_next_seg *) wqe)->nda_op = + cpu_to_be32((next_ind << srq->wqe_shift) | 1); + ((struct mthca_next_seg *) wqe)->ee_nds = 0; + /* flags field will always remain 0 */ + + wqe += sizeof (struct mthca_next_seg); + + if (unlikely(wr->num_sge > srq->max_gs)) { + err = -EINVAL; + *bad_wr = wr; + return nreq; + } + + for (i = 0; i < wr->num_sge; ++i) { + ((struct mthca_data_seg *) wqe)->byte_count = + cpu_to_be32(wr->sg_list[i].length); + ((struct mthca_data_seg *) wqe)->lkey = + cpu_to_be32(wr->sg_list[i].lkey); + ((struct mthca_data_seg *) wqe)->addr = + cpu_to_be64(wr->sg_list[i].addr); + wqe += sizeof (struct mthca_data_seg); + } + + if (i < srq->max_gs) { + ((struct mthca_data_seg *) wqe)->byte_count = 0; + ((struct mthca_data_seg *) wqe)->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); + ((struct mthca_data_seg *) wqe)->addr = 0; + } + + srq->wrid[ind] = wr->wr_id; + srq->first_free = next_ind; + } + + if (likely(nreq)) { + srq->counter += nreq; + + /* + * Make sure that descriptors are written before + * we write doorbell record. + */ + wmb(); + *srq->db = cpu_to_be32(srq->counter); + } + + spin_unlock_irqrestore(&srq->lock, flags); + return err; +} + +int __devinit mthca_init_srq_table(struct mthca_dev *dev) +{ + int err; + + if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) + return 0; + + spin_lock_init(&dev->srq_table.lock); + + err = mthca_alloc_init(&dev->srq_table.alloc, + dev->limits.num_srqs, + dev->limits.num_srqs - 1, + dev->limits.reserved_srqs); + if (err) + return err; + + err = mthca_array_init(&dev->srq_table.srq, + dev->limits.num_srqs); + if (err) + mthca_alloc_cleanup(&dev->srq_table.alloc); + + return err; +} + +void __devexit mthca_cleanup_srq_table(struct mthca_dev *dev) +{ + if (!(dev->mthca_flags & MTHCA_FLAG_SRQ)) + return; + + mthca_array_cleanup(&dev->srq_table.srq, dev->limits.num_srqs); + mthca_alloc_cleanup(&dev->srq_table.alloc); +} diff --git a/drivers/infiniband/hw/mthca/mthca_user.h b/drivers/infiniband/hw/mthca/mthca_user.h new file mode 100644 index 000000000000..41613ec8a04e --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_user.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef MTHCA_USER_H +#define MTHCA_USER_H + +#include <linux/types.h> + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct mthca_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 uarc_size; +}; + +struct mthca_alloc_pd_resp { + __u32 pdn; + __u32 reserved; +}; + +struct mthca_create_cq { + __u32 lkey; + __u32 pdn; + __u64 arm_db_page; + __u64 set_db_page; + __u32 arm_db_index; + __u32 set_db_index; +}; + +struct mthca_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct mthca_create_srq { + __u32 lkey; + __u32 db_index; + __u64 db_page; +}; + +struct mthca_create_srq_resp { + __u32 srqn; + __u32 reserved; +}; + +struct mthca_create_qp { + __u32 lkey; + __u32 reserved; + __u64 sq_db_page; + __u64 rq_db_page; + __u32 sq_db_index; + __u32 rq_db_index; +}; + +#endif /* MTHCA_USER_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_wqe.h b/drivers/infiniband/hw/mthca/mthca_wqe.h new file mode 100644 index 000000000000..1f4c0ff28f79 --- /dev/null +++ b/drivers/infiniband/hw/mthca/mthca_wqe.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: mthca_wqe.h 3047 2005-08-10 03:59:35Z roland $ + */ + +#ifndef MTHCA_WQE_H +#define MTHCA_WQE_H + +#include <linux/types.h> + +enum { + MTHCA_NEXT_DBD = 1 << 7, + MTHCA_NEXT_FENCE = 1 << 6, + MTHCA_NEXT_CQ_UPDATE = 1 << 3, + MTHCA_NEXT_EVENT_GEN = 1 << 2, + MTHCA_NEXT_SOLICIT = 1 << 1, + + MTHCA_MLX_VL15 = 1 << 17, + MTHCA_MLX_SLR = 1 << 16 +}; + +enum { + MTHCA_INVAL_LKEY = 0x100 +}; + +struct mthca_next_seg { + __be32 nda_op; /* [31:6] next WQE [4:0] next opcode */ + __be32 ee_nds; /* [31:8] next EE [7] DBD [6] F [5:0] next WQE size */ + __be32 flags; /* [3] CQ [2] Event [1] Solicit */ + __be32 imm; /* immediate data */ +}; + +struct mthca_tavor_ud_seg { + u32 reserved1; + __be32 lkey; + __be64 av_addr; + u32 reserved2[4]; + __be32 dqpn; + __be32 qkey; + u32 reserved3[2]; +}; + +struct mthca_arbel_ud_seg { + __be32 av[8]; + __be32 dqpn; + __be32 qkey; + u32 reserved[2]; +}; + +struct mthca_bind_seg { + __be32 flags; /* [31] Atomic [30] rem write [29] rem read */ + u32 reserved; + __be32 new_rkey; + __be32 lkey; + __be64 addr; + __be64 length; +}; + +struct mthca_raddr_seg { + __be64 raddr; + __be32 rkey; + u32 reserved; +}; + +struct mthca_atomic_seg { + __be64 swap_add; + __be64 compare; +}; + +struct mthca_data_seg { + __be32 byte_count; + __be32 lkey; + __be64 addr; +}; + +struct mthca_mlx_seg { + __be32 nda_op; + __be32 nds; + __be32 flags; /* [17] VL15 [16] SLR [14:12] static rate + [11:8] SL [3] C [2] E */ + __be16 rlid; + __be16 vcrc; +}; + +#endif /* MTHCA_WQE_H */ |