From 1fe0cb848890a3f4dd965fdce8b4108feb03971f Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Wed, 12 Jun 2013 15:20:36 +0200 Subject: IB/srp: Fix remove_one crash due to resource exhaustion If the add_one callback fails during driver load no resources are allocated so there isn't a need to release any resources. Trying to clean the resource may lead to the following kernel panic: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] srp_remove_one+0x31/0x240 [ib_srp] RIP: 0010:[] [] srp_remove_one+0x31/0x240 [ib_srp] Process rmmod (pid: 4562, threadinfo ffff8800dd738000, task ffff8801167e60c0) Call Trace: [] ib_unregister_client+0x4e/0x120 [ib_core] [] srp_cleanup_module+0x15/0x71 [ib_srp] [] sys_delete_module+0x194/0x260 [] system_call_fastpath+0x16/0x1b Signed-off-by: Dotan Barak Reviewed-by: Eli Cohen Signed-off-by: Bart Van Assche Acked-by: Sebastian Riemer Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/ulp/srp/ib_srp.c') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 7ccf3284dda3..368d1606e16f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2507,6 +2507,8 @@ static void srp_remove_one(struct ib_device *device) struct srp_target_port *target; srp_dev = ib_get_client_data(device, &srp_client); + if (!srp_dev) + return; list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { device_unregister(&host->dev); -- cgit v1.2.3 From 086f44f58855ae18bab19fb794cce6c6d2c6143b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 12 Jun 2013 15:23:04 +0200 Subject: IB/srp: Avoid skipping srp_reset_host() after a transport error The SCSI error handler assumes that the transport layer is operational if an eh_abort_handler() returns SUCCESS. Hence srp_abort() only should return SUCCESS if sending the ABORT TASK task management function succeeded. This patch avoids the SCSI error handler skipping the srp_reset_host() call after a transport layer error. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/ulp/srp/ib_srp.c') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 368d1606e16f..759c55b9685f 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1744,18 +1744,23 @@ static int srp_abort(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_request *req = (struct srp_request *) scmnd->host_scribble; + int ret; shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); if (!req || !srp_claim_req(target, req, scmnd)) return FAILED; - srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, - SRP_TSK_ABORT_TASK); + if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, + SRP_TSK_ABORT_TASK) == 0 || + target->transport_offline) + ret = SUCCESS; + else + ret = FAILED; srp_free_req(target, req, scmnd, 0); scmnd->result = DID_ABORT << 16; scmnd->scsi_done(scmnd); - return SUCCESS; + return ret; } static int srp_reset_device(struct scsi_cmnd *scmnd) -- cgit v1.2.3 From 2742c1dadde602baea6f0547c028154aebd6f1ca Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 12 Jun 2013 15:24:25 +0200 Subject: IB/srp: Skip host settle delay The SRP initiator implements host reset by reconnecting to the SRP target. That means that communication with the target is possible as soon as host reset finished. Hence skip the host settle delay. Signed-off-by: Bart Van Assche Reviewed-by: Sebastian Riemer Reviewed-by: Christoph Hellwig Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/ulp/srp/ib_srp.c') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 759c55b9685f..bc13c8db7fc2 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1951,6 +1951,7 @@ static struct scsi_host_template srp_template = { .eh_abort_handler = srp_abort, .eh_device_reset_handler = srp_reset_device, .eh_host_reset_handler = srp_reset_host, + .skip_settle_delay = true, .sg_tablesize = SRP_DEF_SG_TABLESIZE, .can_queue = SRP_CMD_SQ_SIZE, .this_id = -1, -- cgit v1.2.3 From 99e1c1398f44a056b16e78122133988c82b66d97 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 28 Jun 2013 14:49:58 +0200 Subject: IB/srp: Fail I/O fast if target offline If reconnecting failed we know that no command completion will be received anymore. Hence let the SCSI error handler fail such commands immediately. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband/ulp/srp/ib_srp.c') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index bc13c8db7fc2..1f331011653e 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1754,6 +1754,8 @@ static int srp_abort(struct scsi_cmnd *scmnd) SRP_TSK_ABORT_TASK) == 0 || target->transport_offline) ret = SUCCESS; + else if (target->transport_offline) + ret = FAST_IO_FAIL; else ret = FAILED; srp_free_req(target, req, scmnd, 0); -- cgit v1.2.3 From 96fc248a4c6fe1e5ffac4903b39d2dd5cbe2dc9a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 28 Jun 2013 14:51:26 +0200 Subject: IB/srp: Maintain a single connection per I_T nexus An SRP target is required to maintain a single connection between initiator and target. This means that if the 'add_target' attribute is used to create a second connection to a target, the first connection will be logged out and that the SCSI error handler will kick in. The SCSI error handler will cause the SRP initiator to reconnect, which will cause I/O over the second connection to fail. Avoid such ping-pong behavior by disabling relogins. If reconnecting manually is necessary, that is possible by deleting and recreating an rport via sysfs. Signed-off-by: Bart Van Assche Signed-off-by: Sebastian Riemer Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 44 +++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/ulp/srp/ib_srp.c') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1f331011653e..830ef0037087 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -542,11 +542,11 @@ static void srp_remove_work(struct work_struct *work) WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); + srp_remove_target(target); + spin_lock(&target->srp_host->target_lock); list_del(&target->list); spin_unlock(&target->srp_host->target_lock); - - srp_remove_target(target); } static void srp_rport_delete(struct srp_rport *rport) @@ -2009,6 +2009,36 @@ static struct class srp_class = { .dev_release = srp_release_dev }; +/** + * srp_conn_unique() - check whether the connection to a target is unique + */ +static bool srp_conn_unique(struct srp_host *host, + struct srp_target_port *target) +{ + struct srp_target_port *t; + bool ret = false; + + if (target->state == SRP_TARGET_REMOVED) + goto out; + + ret = true; + + spin_lock(&host->target_lock); + list_for_each_entry(t, &host->target_list, list) { + if (t != target && + target->id_ext == t->id_ext && + target->ioc_guid == t->ioc_guid && + target->initiator_ext == t->initiator_ext) { + ret = false; + break; + } + } + spin_unlock(&host->target_lock); + +out: + return ret; +} + /* * Target ports are added by writing * @@ -2265,6 +2295,16 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err; + if (!srp_conn_unique(target->srp_host, target)) { + shost_printk(KERN_INFO, target->scsi_host, + PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n", + be64_to_cpu(target->id_ext), + be64_to_cpu(target->ioc_guid), + be64_to_cpu(target->initiator_ext)); + ret = -EEXIST; + goto err; + } + if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && target->cmd_sg_cnt < target->sg_tablesize) { pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); -- cgit v1.2.3 From 4b5e5f41c8e114bb856347134657eb9e1cccc822 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 28 Jun 2013 14:57:42 +0200 Subject: IB/srp: Make HCA completion vector configurable Several InfiniBand HCAs allow configuring the completion vector per CQ. This allows spreading the workload created by IB completion interrupts over multiple MSI-X vectors and hence over multiple CPU cores. In other words, configuring the completion vector properly not only allows reducing latency on an initiator connected to multiple SRP targets but also allows improving throughput. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/ulp/srp/ib_srp.c') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 830ef0037087..9b30366cb017 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -231,14 +231,16 @@ static int srp_create_target_ib(struct srp_target_port *target) return -ENOMEM; recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); + srp_recv_completion, NULL, target, SRP_RQ_SIZE, + target->comp_vector); if (IS_ERR(recv_cq)) { ret = PTR_ERR(recv_cq); goto err; } send_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); + srp_send_completion, NULL, target, SRP_SQ_SIZE, + target->comp_vector); if (IS_ERR(send_cq)) { ret = PTR_ERR(send_cq); goto err_recv_cq; @@ -1898,6 +1900,14 @@ static ssize_t show_local_ib_device(struct device *dev, return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name); } +static ssize_t show_comp_vector(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct srp_target_port *target = host_to_target(class_to_shost(dev)); + + return sprintf(buf, "%d\n", target->comp_vector); +} + static ssize_t show_cmd_sg_entries(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1924,6 +1934,7 @@ static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL); static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL); static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL); static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL); +static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL); static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL); static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL); @@ -1938,6 +1949,7 @@ static struct device_attribute *srp_host_attrs[] = { &dev_attr_zero_req_lim, &dev_attr_local_ib_port, &dev_attr_local_ib_device, + &dev_attr_comp_vector, &dev_attr_cmd_sg_entries, &dev_attr_allow_ext_sg, NULL @@ -2061,6 +2073,7 @@ enum { SRP_OPT_CMD_SG_ENTRIES = 1 << 9, SRP_OPT_ALLOW_EXT_SG = 1 << 10, SRP_OPT_SG_TABLESIZE = 1 << 11, + SRP_OPT_COMP_VECTOR = 1 << 12, SRP_OPT_ALL = (SRP_OPT_ID_EXT | SRP_OPT_IOC_GUID | SRP_OPT_DGID | @@ -2081,6 +2094,7 @@ static const match_table_t srp_opt_tokens = { { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" }, { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" }, { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" }, + { SRP_OPT_COMP_VECTOR, "comp_vector=%u" }, { SRP_OPT_ERR, NULL } }; @@ -2236,6 +2250,14 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) target->sg_tablesize = token; break; + case SRP_OPT_COMP_VECTOR: + if (match_int(args, &token) || token < 0) { + pr_warn("bad comp_vector parameter '%s'\n", p); + goto out; + } + target->comp_vector = token; + break; + default: pr_warn("unknown parameter or missing value '%s' in target creation request\n", p); -- cgit v1.2.3 From e8ca413558de8ea235a1223074c4ed09616b9034 Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Fri, 28 Jun 2013 14:59:08 +0200 Subject: IB/srp: Bump driver version and release date Signed-off-by: Vu Pham Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/ulp/srp/ib_srp.c') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9b30366cb017..9d8b46eafdb2 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -53,8 +53,8 @@ #define DRV_NAME "ib_srp" #define PFX DRV_NAME ": " -#define DRV_VERSION "0.2" -#define DRV_RELDATE "November 1, 2005" +#define DRV_VERSION "1.0" +#define DRV_RELDATE "July 1, 2013" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator " -- cgit v1.2.3 From 80d5e8a235624cd3c0e24be7c070fd6f445e590d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 10 Jul 2013 17:36:35 +0200 Subject: IB/srp: Let srp_abort() return FAST_IO_FAIL if TL offline If the transport layer is offline it is more appropriate to let srp_abort() return FAST_IO_FAIL instead of SUCCESS. Reported-by: Sebastian Riemer Acked-by: David Dillow Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband/ulp/srp/ib_srp.c') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9d8b46eafdb2..f93baf8254c4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1753,8 +1753,7 @@ static int srp_abort(struct scsi_cmnd *scmnd) if (!req || !srp_claim_req(target, req, scmnd)) return FAILED; if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, - SRP_TSK_ABORT_TASK) == 0 || - target->transport_offline) + SRP_TSK_ABORT_TASK) == 0) ret = SUCCESS; else if (target->transport_offline) ret = FAST_IO_FAIL; -- cgit v1.2.3