[PATCH][RFC][3/4] IB: userspace verbs mthca changes

From: Roland Dreier
Date: Mon Apr 04 2005 - 17:52:21 EST


Add Mellanox HCA-specific userspace verbs support to mthca.

Signed-off-by: Roland Dreier <roland@xxxxxxxxxxx>

--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_cq.c 2005-04-04 14:57:12.228756073 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_cq.c 2005-04-04 14:58:12.364679525 -0700
@@ -743,6 +743,7 @@
}

int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq)
{
int size = nent * MTHCA_CQ_ENTRY_SIZE;
@@ -754,30 +755,33 @@

might_sleep();

- cq->ibcq.cqe = nent - 1;
+ cq->ibcq.cqe = nent - 1;
+ cq->is_kernel = !ctx;

cq->cqn = mthca_alloc(&dev->cq_table.alloc);
if (cq->cqn == -1)
return -ENOMEM;

if (mthca_is_memfree(dev)) {
- cq->arm_sn = 1;
-
err = mthca_table_get(dev, dev->cq_table.table, cq->cqn);
if (err)
goto err_out;

- err = -ENOMEM;
+ if (cq->is_kernel) {
+ cq->arm_sn = 1;
+
+ err = -ENOMEM;

- cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
- cq->cqn, &cq->set_ci_db);
- if (cq->set_ci_db_index < 0)
- goto err_out_icm;
-
- cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
- cq->cqn, &cq->arm_db);
- if (cq->arm_db_index < 0)
- goto err_out_ci;
+ cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI,
+ cq->cqn, &cq->set_ci_db);
+ if (cq->set_ci_db_index < 0)
+ goto err_out_icm;
+
+ cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM,
+ cq->cqn, &cq->arm_db);
+ if (cq->arm_db_index < 0)
+ goto err_out_ci;
+ }
}

mailbox = kmalloc(sizeof (struct mthca_cq_context) + MTHCA_CMD_MAILBOX_EXTRA,
@@ -787,12 +791,14 @@

cq_context = MAILBOX_ALIGN(mailbox);

- err = mthca_alloc_cq_buf(dev, size, cq);
- if (err)
- goto err_out_mailbox;
+ if (cq->is_kernel) {
+ err = mthca_alloc_cq_buf(dev, size, cq);
+ if (err)
+ goto err_out_mailbox;

- for (i = 0; i < nent; ++i)
- set_cqe_hw(get_cqe(cq, i));
+ for (i = 0; i < nent; ++i)
+ set_cqe_hw(get_cqe(cq, i));
+ }

spin_lock_init(&cq->lock);
atomic_set(&cq->refcount, 1);
@@ -803,11 +809,14 @@
MTHCA_CQ_STATE_DISARMED |
MTHCA_CQ_FLAG_TR);
cq_context->start = cpu_to_be64(0);
- cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 |
- dev->driver_uar.index);
+ cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24);
+ if (ctx)
+ cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index);
+ else
+ cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index);
cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn);
cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn);
- cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num);
+ cq_context->pd = cpu_to_be32(pdn);
cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey);
cq_context->cqn = cpu_to_be32(cq->cqn);

@@ -845,17 +854,19 @@
return 0;

err_out_free_mr:
- mthca_free_mr(dev, &cq->mr);
- mthca_free_cq_buf(dev, cq);
+ if (cq->is_kernel) {
+ mthca_free_mr(dev, &cq->mr);
+ mthca_free_cq_buf(dev, cq);
+ }

err_out_mailbox:
kfree(mailbox);

- if (mthca_is_memfree(dev))
+ if (cq->is_kernel && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);

err_out_ci:
- if (mthca_is_memfree(dev))
+ if (cq->is_kernel)
mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);

err_out_icm:
@@ -895,7 +906,8 @@
int j;

printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n",
- cq->cqn, cq->cons_index, !!next_cqe_sw(cq));
+ cq->cqn, cq->cons_index,
+ cq->is_kernel ? !!next_cqe_sw(cq) : 0);
for (j = 0; j < 16; ++j)
printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j]));
}
@@ -913,15 +925,17 @@
atomic_dec(&cq->refcount);
wait_event(cq->wait, !atomic_read(&cq->refcount));

- mthca_free_mr(dev, &cq->mr);
- mthca_free_cq_buf(dev, cq);
-
- if (mthca_is_memfree(dev)) {
- mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
- mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
- mthca_table_put(dev, dev->cq_table.table, cq->cqn);
+ if (cq->is_kernel) {
+ mthca_free_mr(dev, &cq->mr);
+ mthca_free_cq_buf(dev, cq);
+ if (mthca_is_memfree(dev)) {
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index);
+ mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index);
+ }
}

+ if (mthca_is_memfree(dev))
+ mthca_table_put(dev, dev->cq_table.table, cq->cqn);
mthca_free(&dev->cq_table.alloc, cq->cqn);
kfree(mailbox);
}
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-04 14:57:12.254750421 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_dev.h 2005-04-04 14:58:12.411669307 -0700
@@ -49,14 +49,6 @@
#define DRV_VERSION "0.06-pre"
#define DRV_RELDATE "November 8, 2004"

-/* XXX remove once SINAI defines make it into kernel.org */
-#ifndef PCI_DEVICE_ID_MELLANOX_SINAI_OLD
-#define PCI_DEVICE_ID_MELLANOX_SINAI_OLD 0x5e8c
-#endif
-#ifndef PCI_DEVICE_ID_MELLANOX_SINAI
-#define PCI_DEVICE_ID_MELLANOX_SINAI 0x6274
-#endif
-
enum {
MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
MTHCA_FLAG_SRQ = 1 << 2,
@@ -413,6 +405,7 @@
int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify);
int mthca_init_cq(struct mthca_dev *dev, int nent,
+ struct mthca_ucontext *ctx, u32 pdn,
struct mthca_cq *cq);
void mthca_free_cq(struct mthca_dev *dev,
struct mthca_cq *cq);
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-04-04 14:57:12.256749986 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-04-04 14:58:12.412669090 -0700
@@ -45,6 +45,15 @@
MTHCA_TABLE_CHUNK_SIZE = 1 << 18
};

+struct mthca_user_db_table {
+ struct semaphore mutex;
+ struct {
+ u64 uvirt;
+ struct scatterlist mem;
+ int refcount;
+ } page[0];
+};
+
void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
{
struct mthca_icm_chunk *chunk, *tmp;
@@ -334,13 +343,132 @@
kfree(table);
}

-static u64 mthca_uarc_virt(struct mthca_dev *dev, int page)
+static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page)
{
return dev->uar_table.uarc_base +
- dev->driver_uar.index * dev->uar_table.uarc_size +
+ uar->index * dev->uar_table.uarc_size +
page * 4096;
}

+int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index, u64 uaddr)
+{
+ int ret = 0;
+ u8 status;
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return 0;
+
+ if (index < 0 || index > dev->uar_table.uarc_size / 8)
+ return -EINVAL;
+
+ down(&db_tab->mutex);
+
+ i = index / MTHCA_DB_REC_PER_PAGE;
+
+ if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) ||
+ (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
+ (uaddr & 4095)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (db_tab->page[i].refcount) {
+ ++db_tab->page[i].refcount;
+ goto out;
+ }
+
+ ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
+ &db_tab->page[i].mem.page, NULL);
+ if (ret < 0)
+ goto out;
+
+ db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK;
+
+ ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ if (ret < 0) {
+ put_page(db_tab->page[i].mem.page);
+ goto out;
+ }
+
+ ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
+ mthca_uarc_virt(dev, uar, i), &status);
+ if (!ret && status)
+ ret = -EINVAL;
+ if (ret) {
+ pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ put_page(db_tab->page[i].mem.page);
+ goto out;
+ }
+
+ db_tab->page[i].uvirt = uaddr;
+ db_tab->page[i].refcount = 1;
+
+out:
+ up(&db_tab->mutex);
+ return ret;
+}
+
+void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index)
+{
+ if (!mthca_is_memfree(dev))
+ return;
+
+ /*
+ * To make our bookkeeping simpler, we don't unmap DB
+ * pages until we clean up the whole db table.
+ */
+
+ down(&db_tab->mutex);
+
+ --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
+
+ up(&db_tab->mutex);
+}
+
+struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
+{
+ struct mthca_user_db_table *db_tab;
+ int npages;
+ int i;
+
+ if (!mthca_is_memfree(dev))
+ return NULL;
+
+ npages = dev->uar_table.uarc_size / 4096;
+ db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
+ if (!db_tab)
+ return ERR_PTR(-ENOMEM);
+
+ init_MUTEX(&db_tab->mutex);
+ for (i = 0; i < npages; ++i) {
+ db_tab->page[i].refcount = 0;
+ db_tab->page[i].uvirt = 0;
+ }
+
+ return db_tab;
+}
+
+void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab)
+{
+ int i;
+ u8 status;
+
+ if (!mthca_is_memfree(dev))
+ return;
+
+ for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) {
+ if (db_tab->page[i].uvirt) {
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
+ pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
+ put_page(db_tab->page[i].mem.page);
+ }
+ }
+}
+
int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db)
{
int group;
@@ -397,7 +525,8 @@
}
memset(page->db_rec, 0, 4096);

- ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status);
+ ret = mthca_MAP_ICM_page(dev, page->mapping,
+ mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
if (!ret && status)
ret = -EINVAL;
if (ret) {
@@ -451,7 +580,7 @@

if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
i >= dev->db_tab->max_group1 - 1) {
- mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);

dma_free_coherent(&dev->pdev->dev, 4096,
page->db_rec, page->mapping);
@@ -520,7 +649,7 @@
if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
mthca_warn(dev, "Kernel UARC page %d not empty\n", i);

- mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status);
+ mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);

dma_free_coherent(&dev->pdev->dev, 4096,
dev->db_tab->page[i].db_rec,
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_memfree.h 2005-04-04 14:57:12.256749986 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_memfree.h 2005-04-04 14:58:12.413668872 -0700
@@ -148,7 +148,7 @@
struct semaphore mutex;
};

-enum {
+enum mthca_db_type {
MTHCA_DB_TYPE_INVALID = 0x0,
MTHCA_DB_TYPE_CQ_SET_CI = 0x1,
MTHCA_DB_TYPE_CQ_ARM = 0x2,
@@ -158,6 +158,17 @@
MTHCA_DB_TYPE_GROUP_SEP = 0x7
};

+struct mthca_user_db_table;
+struct mthca_uar;
+
+int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index, u64 uaddr);
+void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab, int index);
+struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev);
+void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
+ struct mthca_user_db_table *db_tab);
+
int mthca_init_db_tab(struct mthca_dev *dev);
void mthca_cleanup_db_tab(struct mthca_dev *dev);
int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db);
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-04 14:57:12.286743464 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.c 2005-04-04 14:58:12.444662133 -0700
@@ -29,13 +29,17 @@
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
- * $Id: mthca_provider.c 2100 2005-03-31 20:43:01Z roland $
+ * $Id: mthca_provider.c 2109 2005-04-04 21:10:34Z roland $
*/

+#include <asm/uaccess.h>
+
#include <ib_smi.h>

#include "mthca_dev.h"
#include "mthca_cmd.h"
+#include "mthca_user.h"
+#include "mthca_memfree.h"

static int mthca_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
@@ -283,11 +287,78 @@
return err;
}

-static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev)
+static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
+ const void __user *udata, int udatalen)
+{
+ struct mthca_alloc_ucontext ucmd;
+ struct mthca_alloc_ucontext_resp uresp;
+ struct mthca_ucontext *context;
+ int err;
+
+ if (copy_from_user(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
+ uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
+ if (mthca_is_memfree(to_mdev(ibdev)))
+ uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
+ else
+ uresp.uarc_size = 0;
+
+ context = kmalloc(sizeof *context, GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
+ if (err) {
+ kfree(context);
+ return ERR_PTR(err);
+ }
+
+ context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
+ if (IS_ERR(context->db_tab)) {
+ err = PTR_ERR(context->db_tab);
+ mthca_uar_free(to_mdev(ibdev), &context->uar);
+ kfree(context);
+ return ERR_PTR(err);
+ }
+
+ if (copy_to_user((void __user *) (unsigned long) ucmd.respbuf,
+ &uresp, sizeof uresp)) {
+ mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
+ mthca_uar_free(to_mdev(ibdev), &context->uar);
+ kfree(context);
+ return ERR_PTR(-EFAULT);
+ }
+
+ return &context->ibucontext;
+}
+
+static int mthca_dealloc_ucontext(struct ib_ucontext *context)
{
+ mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab);
+ mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
+ kfree(to_mucontext(context));
+
+ return 0;
+}
+
+static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ const void __user *udata, int udatalen)
+{
+ struct mthca_alloc_pd ucmd;
struct mthca_pd *pd;
int err;

+ if (context) {
+ if (udatalen != sizeof ucmd)
+ return ERR_PTR(-EINVAL);
+
+ if (copy_from_user(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+ }
+
pd = kmalloc(sizeof *pd, GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
@@ -298,6 +369,14 @@
return ERR_PTR(err);
}

+ if (context) {
+ if (put_user(pd->pd_num, (u32 __user *) (unsigned long) ucmd.pdnbuf)) {
+ mthca_pd_free(to_mdev(ibdev), pd);
+ kfree(pd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
return &pd->ibpd;
}

@@ -337,8 +416,10 @@
}

static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr)
+ struct ib_qp_init_attr *init_attr,
+ const void __user *udata, int udatalen)
{
+ struct mthca_create_qp ucmd;
struct mthca_qp *qp;
int err;

@@ -347,10 +428,48 @@
case IB_QPT_UC:
case IB_QPT_UD:
{
+ struct mthca_ucontext *context;
+
qp = kmalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);

+ if (pd->uobject) {
+ context = to_mucontext(pd->uobject->context);
+
+ if (udatalen != sizeof ucmd)
+ return ERR_PTR(-EINVAL);
+
+ if (copy_from_user(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
+ err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index, ucmd.sq_db_page);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
+ context->db_tab,
+ ucmd.rq_db_index, ucmd.rq_db_page);
+ if (err) {
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index);
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+ }
+
+ if (pd->uobject) {
+ qp->mr.ibmr.lkey = ucmd.lkey;
+ qp->sq.db_index = ucmd.sq_db_index;
+ qp->rq.db_index = ucmd.rq_db_index;
+ }
+
qp->sq.max = init_attr->cap.max_send_wr;
qp->rq.max = init_attr->cap.max_recv_wr;
qp->sq.max_gs = init_attr->cap.max_send_sge;
@@ -361,12 +480,30 @@
to_mcq(init_attr->recv_cq),
init_attr->qp_type, init_attr->sq_sig_type,
qp);
+
+ if (err && pd->uobject) {
+ context = to_mucontext(pd->uobject->context);
+
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.sq_db_index);
+ mthca_unmap_user_db(to_mdev(pd->device),
+ &context->uar,
+ context->db_tab,
+ ucmd.rq_db_index);
+ }
+
qp->ibqp.qp_num = qp->qpn;
break;
}
case IB_QPT_SMI:
case IB_QPT_GSI:
{
+ /* Don't allow userspace to create special QPs */
+ if (pd->uobject)
+ return ERR_PTR(-EINVAL);
+
qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
@@ -396,42 +533,116 @@
return ERR_PTR(err);
}

- init_attr->cap.max_inline_data = 0;
+ init_attr->cap.max_inline_data = 0;
+ init_attr->cap.max_send_wr = qp->sq.max;
+ init_attr->cap.max_recv_wr = qp->rq.max;

return &qp->ibqp;
}

static int mthca_destroy_qp(struct ib_qp *qp)
{
+ if (qp->uobject) {
+ mthca_unmap_user_db(to_mdev(qp->device),
+ &to_mucontext(qp->uobject->context)->uar,
+ to_mucontext(qp->uobject->context)->db_tab,
+ to_mqp(qp)->sq.db_index);
+ mthca_unmap_user_db(to_mdev(qp->device),
+ &to_mucontext(qp->uobject->context)->uar,
+ to_mucontext(qp->uobject->context)->db_tab,
+ to_mqp(qp)->rq.db_index);
+ }
mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
kfree(qp);
return 0;
}

-static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries)
+static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
+ struct ib_ucontext *context,
+ const void __user *udata, int udatalen)
{
+ struct mthca_create_cq ucmd;
struct mthca_cq *cq;
int nent;
int err;

+ if (context) {
+ if (udatalen != sizeof ucmd)
+ return ERR_PTR(-EINVAL);
+
+ if (copy_from_user(&ucmd, udata, sizeof ucmd))
+ return ERR_PTR(-EFAULT);
+
+ err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab,
+ ucmd.set_db_index, ucmd.set_db_page);
+ if (err)
+ return ERR_PTR(err);
+
+ err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab,
+ ucmd.arm_db_index, ucmd.arm_db_page);
+ if (err)
+ goto err_unmap_set;
+ }
+
cq = kmalloc(sizeof *cq, GFP_KERNEL);
- if (!cq)
- return ERR_PTR(-ENOMEM);
+ if (!cq) {
+ err = -ENOMEM;
+ goto err_unmap_arm;
+ }
+
+ if (context) {
+ cq->mr.ibmr.lkey = ucmd.lkey;
+ cq->set_ci_db_index = ucmd.set_db_index;
+ cq->arm_db_index = ucmd.arm_db_index;
+ }

for (nent = 1; nent <= entries; nent <<= 1)
; /* nothing */

- err = mthca_init_cq(to_mdev(ibdev), nent, cq);
- if (err) {
- kfree(cq);
- cq = ERR_PTR(err);
+ err = mthca_init_cq(to_mdev(ibdev), nent,
+ context ? to_mucontext(context) : NULL,
+ context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
+ cq);
+ if (err)
+ goto err_free;
+
+ if (context && put_user(cq->cqn, (u32 __user *) (unsigned long) ucmd.cqnbuf)) {
+ mthca_free_cq(to_mdev(ibdev), cq);
+ goto err_free;
}

return &cq->ibcq;
+
+err_free:
+ kfree(cq);
+
+err_unmap_arm:
+ if (context)
+ mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab, ucmd.arm_db_index);
+
+err_unmap_set:
+ if (context)
+ mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
+ to_mucontext(context)->db_tab, ucmd.set_db_index);
+
+ return ERR_PTR(err);
}

static int mthca_destroy_cq(struct ib_cq *cq)
{
+ if (cq->uobject) {
+ mthca_unmap_user_db(to_mdev(cq->device),
+ &to_mucontext(cq->uobject->context)->uar,
+ to_mucontext(cq->uobject->context)->db_tab,
+ to_mcq(cq)->arm_db_index);
+ mthca_unmap_user_db(to_mdev(cq->device),
+ &to_mucontext(cq->uobject->context)->uar,
+ to_mucontext(cq->uobject->context)->db_tab,
+ to_mcq(cq)->set_ci_db_index);
+ }
mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
kfree(cq);

@@ -558,6 +769,57 @@
convert_access(acc), mr);

if (err) {
+ kfree(page_list);
+ kfree(mr);
+ return ERR_PTR(err);
+ }
+
+ kfree(page_list);
+ return &mr->ibmr;
+}
+
+static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
+ int acc, const void __user *udata, int udatalen)
+{
+ struct ib_umem_chunk *chunk;
+ int npages = 0;
+ u64 *page_list;
+ struct mthca_mr *mr;
+ int shift;
+ int i, j, k;
+ int err;
+
+ shift = ffs(region->page_size) - 1;
+
+ mr = kmalloc(sizeof *mr, GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ list_for_each_entry(chunk, &region->chunk_list, list)
+ npages += chunk->nents;
+
+ page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
+ if (!page_list) {
+ kfree(mr);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ i = 0;
+
+ list_for_each_entry(chunk, &region->chunk_list, list)
+ for (j = 0; j < chunk->nmap; ++j)
+ for (k = 0; k < sg_dma_len(&chunk->page_list[j]) >> shift; ++k)
+ page_list[i++] = sg_dma_address(&chunk->page_list[j]) +
+ region->page_size * k;
+
+ err = mthca_mr_alloc_phys(to_mdev(pd->device),
+ to_mpd(pd)->pd_num,
+ page_list, shift, npages,
+ region->virt_base, region->length,
+ convert_access(acc), mr);
+
+ if (err) {
+ kfree(page_list);
kfree(mr);
return ERR_PTR(err);
}
@@ -574,6 +836,22 @@
return 0;
}

+static int mthca_mmap_uar(struct ib_ucontext *context,
+ struct vm_area_struct *vma)
+{
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ if (remap_pfn_range(vma, vma->vm_start,
+ to_mucontext(context)->uar.pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
struct ib_fmr_attr *fmr_attr)
{
@@ -690,6 +968,8 @@
int i;

strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
+ dev->ib_dev.owner = THIS_MODULE;
+
dev->ib_dev.node_type = IB_NODE_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.dma_device = &dev->pdev->dev;
@@ -699,6 +979,8 @@
dev->ib_dev.modify_port = mthca_modify_port;
dev->ib_dev.query_pkey = mthca_query_pkey;
dev->ib_dev.query_gid = mthca_query_gid;
+ dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext;
+ dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext;
dev->ib_dev.alloc_pd = mthca_alloc_pd;
dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
dev->ib_dev.create_ah = mthca_ah_create;
@@ -711,6 +993,7 @@
dev->ib_dev.poll_cq = mthca_poll_cq;
dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
+ dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;

if (dev->mthca_flags & MTHCA_FLAG_FMR) {
@@ -726,6 +1009,7 @@
dev->ib_dev.attach_mcast = mthca_multicast_attach;
dev->ib_dev.detach_mcast = mthca_multicast_detach;
dev->ib_dev.process_mad = mthca_process_mad;
+ dev->ib_dev.mmap = mthca_mmap_uar;

if (mthca_is_memfree(dev)) {
dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.h 2005-04-04 14:57:12.287743246 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.h 2005-04-04 14:58:12.445661916 -0700
@@ -54,6 +54,14 @@
int index;
};

+struct mthca_user_db_table;
+
+struct mthca_ucontext {
+ struct ib_ucontext ibucontext;
+ struct mthca_uar uar;
+ struct mthca_user_db_table *db_tab;
+};
+
struct mthca_mr {
struct ib_mr ibmr;
int order;
@@ -167,6 +175,7 @@
int cqn;
u32 cons_index;
int is_direct;
+ int is_kernel;

/* Next fields are Arbel only */
int set_ci_db_index;
@@ -236,6 +245,11 @@
dma_addr_t header_dma;
};

+static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct mthca_ucontext, ibucontext);
+}
+
static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr)
{
return container_of(ibmr, struct mthca_fmr, ibmr);
--- linux-export.orig/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-04 14:57:12.320736072 -0700
+++ linux-export/drivers/infiniband/hw/mthca/mthca_qp.c 2005-04-04 14:58:12.491651915 -0700
@@ -652,7 +652,11 @@

/* leave arbel_sched_queue as 0 */

- qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
+ if (qp->ibqp.uobject)
+ qp_context->usr_page =
+ cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
+ else
+ qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
qp_context->local_qpn = cpu_to_be32(qp->qpn);
if (attr_mask & IB_QP_DEST_QPN) {
qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num);
@@ -917,6 +921,15 @@

qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift,
1 << qp->sq.wqe_shift);
+
+ /*
+ * If this is a userspace QP, we don't actually have to
+ * allocate anything. All we need is to calculate the WQE
+ * sizes and the send_wqe_offset, so we're done now.
+ */
+ if (pd->ibpd.uobject)
+ return 0;
+
size = PAGE_ALIGN(qp->send_wqe_offset +
(qp->sq.max << qp->sq.wqe_shift));

@@ -1015,10 +1028,33 @@
return err;
}

-static int mthca_alloc_memfree(struct mthca_dev *dev,
+static void mthca_free_wqe_buf(struct mthca_dev *dev,
struct mthca_qp *qp)
{
- int ret = 0;
+ int i;
+ int size = PAGE_ALIGN(qp->send_wqe_offset +
+ (qp->sq.max << qp->sq.wqe_shift));
+
+ if (qp->is_direct) {
+ pci_free_consistent(dev->pdev, size,
+ qp->queue.direct.buf,
+ pci_unmap_addr(&qp->queue.direct, mapping));
+ } else {
+ for (i = 0; i < size / PAGE_SIZE; ++i) {
+ pci_free_consistent(dev->pdev, PAGE_SIZE,
+ qp->queue.page_list[i].buf,
+ pci_unmap_addr(&qp->queue.page_list[i],
+ mapping));
+ }
+ }
+
+ kfree(qp->wrid);
+}
+
+static int mthca_map_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ int ret;

if (mthca_is_memfree(dev)) {
ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn);
@@ -1029,35 +1065,15 @@
if (ret)
goto err_qpc;

- ret = mthca_table_get(dev, dev->qp_table.rdb_table,
- qp->qpn << dev->qp_table.rdb_shift);
- if (ret)
- goto err_eqpc;
-
- qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
- qp->qpn, &qp->rq.db);
- if (qp->rq.db_index < 0) {
- ret = -ENOMEM;
- goto err_rdb;
- }
+ ret = mthca_table_get(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
+ if (ret)
+ goto err_eqpc;

- qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
- qp->qpn, &qp->sq.db);
- if (qp->sq.db_index < 0) {
- ret = -ENOMEM;
- goto err_rq_db;
- }
}

return 0;

-err_rq_db:
- mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
-
-err_rdb:
- mthca_table_put(dev, dev->qp_table.rdb_table,
- qp->qpn << dev->qp_table.rdb_shift);
-
err_eqpc:
mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);

@@ -1067,16 +1083,43 @@
return ret;
}

+static void mthca_unmap_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ if (mthca_is_memfree(dev)) {
+ mthca_table_put(dev, dev->qp_table.rdb_table,
+ qp->qpn << dev->qp_table.rdb_shift);
+ mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
+ mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
+ }
+}
+
+static int mthca_alloc_memfree(struct mthca_dev *dev,
+ struct mthca_qp *qp)
+{
+ int ret = 0;
+
+ if (mthca_is_memfree(dev)) {
+ qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ,
+ qp->qpn, &qp->rq.db);
+ if (qp->rq.db_index < 0)
+ return ret;
+
+ qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ,
+ qp->qpn, &qp->sq.db);
+ if (qp->sq.db_index < 0)
+ mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
+ }
+
+ return ret;
+}
+
static void mthca_free_memfree(struct mthca_dev *dev,
struct mthca_qp *qp)
{
if (mthca_is_memfree(dev)) {
mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index);
mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index);
- mthca_table_put(dev, dev->qp_table.rdb_table,
- qp->qpn << dev->qp_table.rdb_shift);
- mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn);
- mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn);
}
}

@@ -1108,13 +1151,28 @@
mthca_wq_init(&qp->sq);
mthca_wq_init(&qp->rq);

- ret = mthca_alloc_memfree(dev, qp);
+ ret = mthca_map_memfree(dev, qp);
if (ret)
return ret;

ret = mthca_alloc_wqe_buf(dev, pd, qp);
if (ret) {
- mthca_free_memfree(dev, qp);
+ mthca_unmap_memfree(dev, qp);
+ return ret;
+ }
+
+ /*
+ * If this is a userspace QP, we're done now. The doorbells
+ * will be allocated and buffers will be initialized in
+ * userspace.
+ */
+ if (pd->ibpd.uobject)
+ return 0;
+
+ ret = mthca_alloc_memfree(dev, qp);
+ if (ret) {
+ mthca_free_wqe_buf(dev, qp);
+ mthca_unmap_memfree(dev, qp);
return ret;
}

@@ -1274,8 +1332,6 @@
struct mthca_qp *qp)
{
u8 status;
- int size;
- int i;
struct mthca_cq *send_cq;
struct mthca_cq *recv_cq;

@@ -1305,31 +1361,22 @@
if (qp->state != IB_QPS_RESET)
mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status);

- mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
- if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
- mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);
-
- mthca_free_mr(dev, &qp->mr);
-
- size = PAGE_ALIGN(qp->send_wqe_offset +
- (qp->sq.max << qp->sq.wqe_shift));
+ /*
+ * If this is a userspace QP, the buffers, MR, CQs and so on
+ * will be cleaned up in userspace, so all we have to do is
+ * unref the mem-free tables and free the QPN in our table.
+ */
+ if (!qp->ibqp.uobject) {
+ mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn);
+ if (qp->ibqp.send_cq != qp->ibqp.recv_cq)
+ mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn);

- if (qp->is_direct) {
- pci_free_consistent(dev->pdev, size,
- qp->queue.direct.buf,
- pci_unmap_addr(&qp->queue.direct, mapping));
- } else {
- for (i = 0; i < size / PAGE_SIZE; ++i) {
- pci_free_consistent(dev->pdev, PAGE_SIZE,
- qp->queue.page_list[i].buf,
- pci_unmap_addr(&qp->queue.page_list[i],
- mapping));
- }
+ mthca_free_mr(dev, &qp->mr);
+ mthca_free_memfree(dev, qp);
+ mthca_free_wqe_buf(dev, qp);
}

- kfree(qp->wrid);
-
- mthca_free_memfree(dev, qp);
+ mthca_unmap_memfree(dev, qp);

if (is_sqp(dev, qp)) {
atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count));
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-export/drivers/infiniband/hw/mthca/mthca_user.h 2005-04-04 14:58:12.491651915 -0700
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2005 Topspin Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef MTHCA_USER_H
+#define MTHCA_USER_H
+
+#include <linux/types.h>
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+
+struct mthca_alloc_ucontext {
+ __u64 respbuf;
+};
+
+struct mthca_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+ __u32 uarc_size;
+};
+
+struct mthca_alloc_pd {
+ __u64 pdnbuf;
+};
+
+struct mthca_alloc_pd_resp {
+ __u32 pdn;
+ __u32 reserved;
+};
+
+struct mthca_create_cq {
+ __u64 cqnbuf;
+ __u32 lkey;
+ __u32 pdn;
+ __u64 arm_db_page;
+ __u64 set_db_page;
+ __u32 arm_db_index;
+ __u32 set_db_index;
+};
+
+struct mthca_create_cq_resp {
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct mthca_create_qp {
+ __u32 lkey;
+ __u32 reserved;
+ __u64 sq_db_page;
+ __u64 rq_db_page;
+ __u32 sq_db_index;
+ __u32 rq_db_index;
+};
+
+#endif /* MTHCA_USER_H */

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/