summaryrefslogtreecommitdiff
path: root/net/sunrpc
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/auth.c38
-rw-r--r--net/sunrpc/auth_generic.c20
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c295
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c6
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c18
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c28
-rw-r--r--net/sunrpc/clnt.c28
-rw-r--r--net/sunrpc/rpc_pipe.c42
-rw-r--r--net/sunrpc/rpcb_clnt.c57
-rw-r--r--net/sunrpc/svcauth_unix.c24
-rw-r--r--net/sunrpc/svcsock.c15
-rw-r--r--net/sunrpc/xdr.c50
-rw-r--r--net/sunrpc/xprt.c12
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c29
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c16
-rw-r--r--net/sunrpc/xprtrdma/transport.c55
-rw-r--r--net/sunrpc/xprtrdma/verbs.c747
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h17
-rw-r--r--net/sunrpc/xprtsock.c99
21 files changed, 1118 insertions, 484 deletions
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 6bfea9ed6869..0c431c277af5 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -83,10 +83,8 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
if (flavor >= RPC_AUTH_MAXFLAVOR)
goto out;
-#ifdef CONFIG_KMOD
if ((ops = auth_flavors[flavor]) == NULL)
request_module("rpc-auth-%u", flavor);
-#endif
spin_lock(&rpc_authflavor_lock);
ops = auth_flavors[flavor];
if (ops == NULL || !try_module_get(ops->owner)) {
@@ -230,19 +228,21 @@ static int
rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
{
spinlock_t *cache_lock;
- struct rpc_cred *cred;
+ struct rpc_cred *cred, *next;
unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
- while (!list_empty(&cred_unused)) {
- cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru);
+ list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
+
+ /* Enforce a 60 second garbage collection moratorium */
+ if (time_in_range_open(cred->cr_expire, expired, jiffies) &&
+ test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
+ continue;
+
list_del_init(&cred->cr_lru);
number_cred_unused--;
if (atomic_read(&cred->cr_count) != 0)
continue;
- /* Enforce a 5 second garbage collection moratorium */
- if (time_in_range(cred->cr_expire, expired, jiffies) &&
- test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0)
- continue;
+
cache_lock = &cred->cr_auth->au_credcache->lock;
spin_lock(cache_lock);
if (atomic_read(&cred->cr_count) == 0) {
@@ -350,16 +350,18 @@ EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache);
struct rpc_cred *
rpcauth_lookupcred(struct rpc_auth *auth, int flags)
{
- struct auth_cred acred = {
- .uid = current->fsuid,
- .gid = current->fsgid,
- .group_info = current->group_info,
- };
+ struct auth_cred acred;
struct rpc_cred *ret;
+ const struct cred *cred = current_cred();
dprintk("RPC: looking up %s cred\n",
auth->au_ops->au_name);
- get_group_info(acred.group_info);
+
+ memset(&acred, 0, sizeof(acred));
+ acred.uid = cred->fsuid;
+ acred.gid = cred->fsgid;
+ acred.group_info = get_group_info(((struct cred *)cred)->group_info);
+
ret = auth->au_ops->lookup_cred(auth, &acred, flags);
put_group_info(acred.group_info);
return ret;
@@ -455,7 +457,7 @@ need_lock:
}
if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
rpcauth_unhash_cred(cred);
- else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
+ if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
cred->cr_expire = jiffies;
list_add_tail(&cred->cr_lru, &cred_unused);
number_cred_unused++;
@@ -513,7 +515,7 @@ rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp,
if (cred->cr_ops->crwrap_req)
return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
/* By default, we encode the arguments normally. */
- return rpc_call_xdrproc(encode, rqstp, data, obj);
+ return encode(rqstp, data, obj);
}
int
@@ -528,7 +530,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
data, obj);
/* By default, we decode the arguments normally. */
- return rpc_call_xdrproc(decode, rqstp, data, obj);
+ return decode(rqstp, data, obj);
}
int
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 744b79fdcb19..4028502f0528 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -133,13 +133,29 @@ static int
generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
{
struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
+ int i;
if (gcred->acred.uid != acred->uid ||
gcred->acred.gid != acred->gid ||
- gcred->acred.group_info != acred->group_info ||
gcred->acred.machine_cred != acred->machine_cred)
- return 0;
+ goto out_nomatch;
+
+ /* Optimisation in the case where pointers are identical... */
+ if (gcred->acred.group_info == acred->group_info)
+ goto out_match;
+
+ /* Slow path... */
+ if (gcred->acred.group_info->ngroups != acred->group_info->ngroups)
+ goto out_nomatch;
+ for (i = 0; i < gcred->acred.group_info->ngroups; i++) {
+ if (GROUP_AT(gcred->acred.group_info, i) !=
+ GROUP_AT(acred->group_info, i))
+ goto out_nomatch;
+ }
+out_match:
return 1;
+out_nomatch:
+ return 0;
}
void __init rpc_init_generic_auth(void)
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 853a4142cea1..e630b38a6047 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -72,11 +72,25 @@ struct gss_auth {
struct gss_api_mech *mech;
enum rpc_gss_svc service;
struct rpc_clnt *client;
- struct dentry *dentry;
+ /*
+ * There are two upcall pipes; dentry[1], named "gssd", is used
+ * for the new text-based upcall; dentry[0] is named after the
+ * mechanism (for example, "krb5") and exists for
+ * backwards-compatibility with older gssd's.
+ */
+ struct dentry *dentry[2];
};
+/* pipe_version >= 0 if and only if someone has a pipe open. */
+static int pipe_version = -1;
+static atomic_t pipe_users = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(pipe_version_lock);
+static struct rpc_wait_queue pipe_version_rpc_waitqueue;
+static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue);
+
static void gss_free_ctx(struct gss_cl_ctx *);
-static struct rpc_pipe_ops gss_upcall_ops;
+static struct rpc_pipe_ops gss_upcall_ops_v0;
+static struct rpc_pipe_ops gss_upcall_ops_v1;
static inline struct gss_cl_ctx *
gss_get_ctx(struct gss_cl_ctx *ctx)
@@ -220,6 +234,7 @@ err:
return p;
}
+#define UPCALL_BUF_LEN 128
struct gss_upcall_msg {
atomic_t count;
@@ -227,16 +242,41 @@ struct gss_upcall_msg {
struct rpc_pipe_msg msg;
struct list_head list;
struct gss_auth *auth;
+ struct rpc_inode *inode;
struct rpc_wait_queue rpc_waitqueue;
wait_queue_head_t waitqueue;
struct gss_cl_ctx *ctx;
+ char databuf[UPCALL_BUF_LEN];
};
+static int get_pipe_version(void)
+{
+ int ret;
+
+ spin_lock(&pipe_version_lock);
+ if (pipe_version >= 0) {
+ atomic_inc(&pipe_users);
+ ret = pipe_version;
+ } else
+ ret = -EAGAIN;
+ spin_unlock(&pipe_version_lock);
+ return ret;
+}
+
+static void put_pipe_version(void)
+{
+ if (atomic_dec_and_lock(&pipe_users, &pipe_version_lock)) {
+ pipe_version = -1;
+ spin_unlock(&pipe_version_lock);
+ }
+}
+
static void
gss_release_msg(struct gss_upcall_msg *gss_msg)
{
if (!atomic_dec_and_test(&gss_msg->count))
return;
+ put_pipe_version();
BUG_ON(!list_empty(&gss_msg->list));
if (gss_msg->ctx != NULL)
gss_put_ctx(gss_msg->ctx);
@@ -266,8 +306,8 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid)
static inline struct gss_upcall_msg *
gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg)
{
- struct inode *inode = gss_auth->dentry->d_inode;
- struct rpc_inode *rpci = RPC_I(inode);
+ struct rpc_inode *rpci = gss_msg->inode;
+ struct inode *inode = &rpci->vfs_inode;
struct gss_upcall_msg *old;
spin_lock(&inode->i_lock);
@@ -293,8 +333,7 @@ __gss_unhash_msg(struct gss_upcall_msg *gss_msg)
static void
gss_unhash_msg(struct gss_upcall_msg *gss_msg)
{
- struct gss_auth *gss_auth = gss_msg->auth;
- struct inode *inode = gss_auth->dentry->d_inode;
+ struct inode *inode = &gss_msg->inode->vfs_inode;
if (list_empty(&gss_msg->list))
return;
@@ -310,7 +349,7 @@ gss_upcall_callback(struct rpc_task *task)
struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
struct gss_cred, gc_base);
struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
- struct inode *inode = gss_msg->auth->dentry->d_inode;
+ struct inode *inode = &gss_msg->inode->vfs_inode;
spin_lock(&inode->i_lock);
if (gss_msg->ctx)
@@ -323,22 +362,75 @@ gss_upcall_callback(struct rpc_task *task)
gss_release_msg(gss_msg);
}
+static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
+{
+ gss_msg->msg.data = &gss_msg->uid;
+ gss_msg->msg.len = sizeof(gss_msg->uid);
+}
+
+static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
+ struct rpc_clnt *clnt, int machine_cred)
+{
+ char *p = gss_msg->databuf;
+ int len = 0;
+
+ gss_msg->msg.len = sprintf(gss_msg->databuf, "mech=%s uid=%d ",
+ gss_msg->auth->mech->gm_name,
+ gss_msg->uid);
+ p += gss_msg->msg.len;
+ if (clnt->cl_principal) {
+ len = sprintf(p, "target=%s ", clnt->cl_principal);
+ p += len;
+ gss_msg->msg.len += len;
+ }
+ if (machine_cred) {
+ len = sprintf(p, "service=* ");
+ p += len;
+ gss_msg->msg.len += len;
+ } else if (!strcmp(clnt->cl_program->name, "nfs4_cb")) {
+ len = sprintf(p, "service=nfs ");
+ p += len;
+ gss_msg->msg.len += len;
+ }
+ len = sprintf(p, "\n");
+ gss_msg->msg.len += len;
+
+ gss_msg->msg.data = gss_msg->databuf;
+ BUG_ON(gss_msg->msg.len > UPCALL_BUF_LEN);
+}
+
+static void gss_encode_msg(struct gss_upcall_msg *gss_msg,
+ struct rpc_clnt *clnt, int machine_cred)
+{
+ if (pipe_version == 0)
+ gss_encode_v0_msg(gss_msg);
+ else /* pipe_version == 1 */
+ gss_encode_v1_msg(gss_msg, clnt, machine_cred);
+}
+
static inline struct gss_upcall_msg *
-gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid)
+gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid, struct rpc_clnt *clnt,
+ int machine_cred)
{
struct gss_upcall_msg *gss_msg;
+ int vers;
gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS);
- if (gss_msg != NULL) {
- INIT_LIST_HEAD(&gss_msg->list);
- rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
- init_waitqueue_head(&gss_msg->waitqueue);
- atomic_set(&gss_msg->count, 1);
- gss_msg->msg.data = &gss_msg->uid;
- gss_msg->msg.len = sizeof(gss_msg->uid);
- gss_msg->uid = uid;
- gss_msg->auth = gss_auth;
+ if (gss_msg == NULL)
+ return ERR_PTR(-ENOMEM);
+ vers = get_pipe_version();
+ if (vers < 0) {
+ kfree(gss_msg);
+ return ERR_PTR(vers);
}
+ gss_msg->inode = RPC_I(gss_auth->dentry[vers]->d_inode);
+ INIT_LIST_HEAD(&gss_msg->list);
+ rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq");
+ init_waitqueue_head(&gss_msg->waitqueue);
+ atomic_set(&gss_msg->count, 1);
+ gss_msg->uid = uid;
+ gss_msg->auth = gss_auth;
+ gss_encode_msg(gss_msg, clnt, machine_cred);
return gss_msg;
}
@@ -350,16 +442,13 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr
struct gss_upcall_msg *gss_new, *gss_msg;
uid_t uid = cred->cr_uid;
- /* Special case: rpc.gssd assumes that uid == 0 implies machine creds */
- if (gss_cred->gc_machine_cred != 0)
- uid = 0;
-
- gss_new = gss_alloc_msg(gss_auth, uid);
- if (gss_new == NULL)
- return ERR_PTR(-ENOMEM);
+ gss_new = gss_alloc_msg(gss_auth, uid, clnt, gss_cred->gc_machine_cred);
+ if (IS_ERR(gss_new))
+ return gss_new;
gss_msg = gss_add_msg(gss_auth, gss_new);
if (gss_msg == gss_new) {
- int res = rpc_queue_upcall(gss_auth->dentry->d_inode, &gss_new->msg);
+ struct inode *inode = &gss_new->inode->vfs_inode;
+ int res = rpc_queue_upcall(inode, &gss_new->msg);
if (res) {
gss_unhash_msg(gss_new);
gss_msg = ERR_PTR(res);
@@ -369,6 +458,18 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr
return gss_msg;
}
+static void warn_gssd(void)
+{
+ static unsigned long ratelimit;
+ unsigned long now = jiffies;
+
+ if (time_after(now, ratelimit)) {
+ printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n"
+ "Please check user daemon is running.\n");
+ ratelimit = now + 15*HZ;
+ }
+}
+
static inline int
gss_refresh_upcall(struct rpc_task *task)
{
@@ -378,16 +479,25 @@ gss_refresh_upcall(struct rpc_task *task)
struct gss_cred *gss_cred = container_of(cred,
struct gss_cred, gc_base);
struct gss_upcall_msg *gss_msg;
- struct inode *inode = gss_auth->dentry->d_inode;
+ struct inode *inode;
int err = 0;
dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid,
cred->cr_uid);
gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred);
+ if (IS_ERR(gss_msg) == -EAGAIN) {
+ /* XXX: warning on the first, under the assumption we
+ * shouldn't normally hit this case on a refresh. */
+ warn_gssd();
+ task->tk_timeout = 15*HZ;
+ rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
+ return 0;
+ }
if (IS_ERR(gss_msg)) {
err = PTR_ERR(gss_msg);
goto out;
}
+ inode = &gss_msg->inode->vfs_inode;
spin_lock(&inode->i_lock);
if (gss_cred->gc_upcall != NULL)
rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
@@ -414,18 +524,29 @@ out:
static inline int
gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
{
- struct inode *inode = gss_auth->dentry->d_inode;
+ struct inode *inode;
struct rpc_cred *cred = &gss_cred->gc_base;
struct gss_upcall_msg *gss_msg;
DEFINE_WAIT(wait);
int err = 0;
dprintk("RPC: gss_upcall for uid %u\n", cred->cr_uid);
+retry:
gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred);
+ if (PTR_ERR(gss_msg) == -EAGAIN) {
+ err = wait_event_interruptible_timeout(pipe_version_waitqueue,
+ pipe_version >= 0, 15*HZ);
+ if (err)
+ goto out;
+ if (pipe_version < 0)
+ warn_gssd();
+ goto retry;
+ }
if (IS_ERR(gss_msg)) {
err = PTR_ERR(gss_msg);
goto out;
}
+ inode = &gss_msg->inode->vfs_inode;
for (;;) {
prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE);
spin_lock(&inode->i_lock);
@@ -543,6 +664,38 @@ out:
return err;
}
+static int gss_pipe_open(struct inode *inode, int new_version)
+{
+ int ret = 0;
+
+ spin_lock(&pipe_version_lock);
+ if (pipe_version < 0) {
+ /* First open of any gss pipe determines the version: */
+ pipe_version = new_version;
+ rpc_wake_up(&pipe_version_rpc_waitqueue);
+ wake_up(&pipe_version_waitqueue);
+ } else if (pipe_version != new_version) {
+ /* Trying to open a pipe of a different version */
+ ret = -EBUSY;
+ goto out;
+ }
+ atomic_inc(&pipe_users);
+out:
+ spin_unlock(&pipe_version_lock);
+ return ret;
+
+}
+
+static int gss_pipe_open_v0(struct inode *inode)
+{
+ return gss_pipe_open(inode, 0);
+}
+
+static int gss_pipe_open_v1(struct inode *inode)
+{
+ return gss_pipe_open(inode, 1);
+}
+
static void
gss_pipe_release(struct inode *inode)
{
@@ -562,27 +715,22 @@ gss_pipe_release(struct inode *inode)
spin_lock(&inode->i_lock);
}
spin_unlock(&inode->i_lock);
+
+ put_pipe_version();
}
static void
gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
{
struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
- static unsigned long ratelimit;
if (msg->errno < 0) {
dprintk("RPC: gss_pipe_destroy_msg releasing msg %p\n",
gss_msg);
atomic_inc(&gss_msg->count);
gss_unhash_msg(gss_msg);
- if (msg->errno == -ETIMEDOUT) {
- unsigned long now = jiffies;
- if (time_after(now, ratelimit)) {
- printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n"
- "Please check user daemon is running!\n");
- ratelimit = now + 15*HZ;
- }
- }
+ if (msg->errno == -ETIMEDOUT)
+ warn_gssd();
gss_release_msg(gss_msg);
}
}
@@ -623,20 +771,38 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
atomic_set(&auth->au_count, 1);
kref_init(&gss_auth->kref);
- gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name,
- clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
- if (IS_ERR(gss_auth->dentry)) {
- err = PTR_ERR(gss_auth->dentry);
+ /*
+ * Note: if we created the old pipe first, then someone who
+ * examined the directory at the right moment might conclude
+ * that we supported only the old pipe. So we instead create
+ * the new pipe first.
+ */
+ gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_dentry,
+ "gssd",
+ clnt, &gss_upcall_ops_v1,
+ RPC_PIPE_WAIT_FOR_OPEN);
+ if (IS_ERR(gss_auth->dentry[1])) {
+ err = PTR_ERR(gss_auth->dentry[1]);
goto err_put_mech;
}
+ gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_dentry,
+ gss_auth->mech->gm_name,
+ clnt, &gss_upcall_ops_v0,
+ RPC_PIPE_WAIT_FOR_OPEN);
+ if (IS_ERR(gss_auth->dentry[0])) {
+ err = PTR_ERR(gss_auth->dentry[0]);
+ goto err_unlink_pipe_1;
+ }
err = rpcauth_init_credcache(auth);
if (err)
- goto err_unlink_pipe;
+ goto err_unlink_pipe_0;
return auth;
-err_unlink_pipe:
- rpc_unlink(gss_auth->dentry);
+err_unlink_pipe_0:
+ rpc_unlink(gss_auth->dentry[0]);
+err_unlink_pipe_1:
+ rpc_unlink(gss_auth->dentry[1]);
err_put_mech:
gss_mech_put(gss_auth->mech);
err_free:
@@ -649,8 +815,8 @@ out_dec:
static void
gss_free(struct gss_auth *gss_auth)
{
- rpc_unlink(gss_auth->dentry);
- gss_auth->dentry = NULL;
+ rpc_unlink(gss_auth->dentry[1]);
+ rpc_unlink(gss_auth->dentry[0]);
gss_mech_put(gss_auth->mech);
kfree(gss_auth);
@@ -693,7 +859,7 @@ gss_destroying_context(struct rpc_cred *cred)
struct rpc_task *task;
if (gss_cred->gc_ctx == NULL ||
- test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
+ test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
return 0;
gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY;
@@ -757,14 +923,12 @@ gss_free_cred_callback(struct rcu_head *head)
}
static void
-gss_destroy_cred(struct rpc_cred *cred)
+gss_destroy_nullcred(struct rpc_cred *cred)
{
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
struct gss_cl_ctx *ctx = gss_cred->gc_ctx;
- if (gss_destroying_context(cred))
- return;
rcu_assign_pointer(gss_cred->gc_ctx, NULL);
call_rcu(&cred->cr_rcu, gss_free_cred_callback);
if (ctx)
@@ -772,6 +936,15 @@ gss_destroy_cred(struct rpc_cred *cred)
kref_put(&gss_auth->kref, gss_free_callback);
}
+static void
+gss_destroy_cred(struct rpc_cred *cred)
+{
+
+ if (gss_destroying_context(cred))
+ return;
+ gss_destroy_nullcred(cred);
+}
+
/*
* Lookup RPCSEC_GSS cred for the current process
*/
@@ -1017,7 +1190,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
*p++ = htonl(rqstp->rq_seqno);
- status = rpc_call_xdrproc(encode, rqstp, p, obj);
+ status = encode(rqstp, p, obj);
if (status)
return status;
@@ -1111,7 +1284,7 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
*p++ = htonl(rqstp->rq_seqno);
- status = rpc_call_xdrproc(encode, rqstp, p, obj);
+ status = encode(rqstp, p, obj);
if (status)
return status;
@@ -1170,12 +1343,12 @@ gss_wrap_req(struct rpc_task *task,
/* The spec seems a little ambiguous here, but I think that not
* wrapping context destruction requests makes the most sense.
*/
- status = rpc_call_xdrproc(encode, rqstp, p, obj);
+ status = encode(rqstp, p, obj);
goto out;
}
switch (gss_cred->gc_service) {
case RPC_GSS_SVC_NONE:
- status = rpc_call_xdrproc(encode, rqstp, p, obj);
+ status = encode(rqstp, p, obj);
break;
case RPC_GSS_SVC_INTEGRITY:
status = gss_wrap_req_integ(cred, ctx, encode,
@@ -1291,7 +1464,7 @@ gss_unwrap_resp(struct rpc_task *task,
cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
+ (savedlen - head->iov_len);
out_decode:
- status = rpc_call_xdrproc(decode, rqstp, p, obj);
+ status = decode(rqstp, p, obj);
out:
gss_put_ctx(ctx);
dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid,
@@ -1324,7 +1497,7 @@ static const struct rpc_credops gss_credops = {
static const struct rpc_credops gss_nullops = {
.cr_name = "AUTH_GSS",
- .crdestroy = gss_destroy_cred,
+ .crdestroy = gss_destroy_nullcred,
.crbind = rpcauth_generic_bind_cred,
.crmatch = gss_match,
.crmarshal = gss_marshal,
@@ -1334,10 +1507,19 @@ static const struct rpc_credops gss_nullops = {
.crunwrap_resp = gss_unwrap_resp,
};
-static struct rpc_pipe_ops gss_upcall_ops = {
+static struct rpc_pipe_ops gss_upcall_ops_v0 = {
+ .upcall = gss_pipe_upcall,
+ .downcall = gss_pipe_downcall,
+ .destroy_msg = gss_pipe_destroy_msg,
+ .open_pipe = gss_pipe_open_v0,
+ .release_pipe = gss_pipe_release,
+};
+
+static struct rpc_pipe_ops gss_upcall_ops_v1 = {
.upcall = gss_pipe_upcall,
.downcall = gss_pipe_downcall,
.destroy_msg = gss_pipe_destroy_msg,
+ .open_pipe = gss_pipe_open_v1,
.release_pipe = gss_pipe_release,
};
@@ -1354,6 +1536,7 @@ static int __init init_rpcsec_gss(void)
err = gss_svc_init();
if (err)
goto out_unregister;
+ rpc_init_wait_queue(&pipe_version_rpc_waitqueue, "gss pipe version");
return 0;
out_unregister:
rpcauth_unregister(&authgss_ops);
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index d83b881685fe..c0ba39c4f5f2 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -152,7 +152,7 @@ g_token_size(struct xdr_netobj *mech, unsigned int body_size)
return(1 + der_length_size(body_size) + body_size);
}
-EXPORT_SYMBOL(g_token_size);
+EXPORT_SYMBOL_GPL(g_token_size);
/* fills in a buffer with the token header. The buffer is assumed to
be the right size. buf is advanced past the token header */
@@ -167,7 +167,7 @@ g_make_token_header(struct xdr_netobj *mech, int body_size, unsigned char **buf)
TWRITE_STR(*buf, mech->data, ((int) mech->len));
}
-EXPORT_SYMBOL(g_make_token_header);
+EXPORT_SYMBOL_GPL(g_make_token_header);
/*
* Given a buffer containing a token, reads and verifies the token,
@@ -231,5 +231,5 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
return(ret);
}
-EXPORT_SYMBOL(g_verify_token_header);
+EXPORT_SYMBOL_GPL(g_verify_token_header);
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index bce9d527af08..6efbb0cd3c7c 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -117,7 +117,7 @@ gss_mech_register(struct gss_api_mech *gm)
return 0;
}
-EXPORT_SYMBOL(gss_mech_register);
+EXPORT_SYMBOL_GPL(gss_mech_register);
void
gss_mech_unregister(struct gss_api_mech *gm)
@@ -129,7 +129,7 @@ gss_mech_unregister(struct gss_api_mech *gm)
gss_mech_free(gm);
}
-EXPORT_SYMBOL(gss_mech_unregister);
+EXPORT_SYMBOL_GPL(gss_mech_unregister);
struct gss_api_mech *
gss_mech_get(struct gss_api_mech *gm)
@@ -138,7 +138,7 @@ gss_mech_get(struct gss_api_mech *gm)
return gm;
}
-EXPORT_SYMBOL(gss_mech_get);
+EXPORT_SYMBOL_GPL(gss_mech_get);
struct gss_api_mech *
gss_mech_get_by_name(const char *name)
@@ -158,7 +158,7 @@ gss_mech_get_by_name(const char *name)
}
-EXPORT_SYMBOL(gss_mech_get_by_name);
+EXPORT_SYMBOL_GPL(gss_mech_get_by_name);
static inline int
mech_supports_pseudoflavor(struct gss_api_mech *gm, u32 pseudoflavor)
@@ -191,7 +191,7 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
return gm;
}
-EXPORT_SYMBOL(gss_mech_get_by_pseudoflavor);
+EXPORT_SYMBOL_GPL(gss_mech_get_by_pseudoflavor);
u32
gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
@@ -205,7 +205,7 @@ gss_svc_to_pseudoflavor(struct gss_api_mech *gm, u32 service)
}
return RPC_AUTH_MAXFLAVOR; /* illegal value */
}
-EXPORT_SYMBOL(gss_svc_to_pseudoflavor);
+EXPORT_SYMBOL_GPL(gss_svc_to_pseudoflavor);
u32
gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
@@ -219,7 +219,7 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor)
return 0;
}
-EXPORT_SYMBOL(gss_pseudoflavor_to_service);
+EXPORT_SYMBOL_GPL(gss_pseudoflavor_to_service);
char *
gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
@@ -233,7 +233,7 @@ gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service)
return NULL;
}
-EXPORT_SYMBOL(gss_service_to_auth_domain_name);
+EXPORT_SYMBOL_GPL(gss_service_to_auth_domain_name);
void
gss_mech_put(struct gss_api_mech * gm)
@@ -242,7 +242,7 @@ gss_mech_put(struct gss_api_mech * gm)
module_put(gm->gm_owner);
}
-EXPORT_SYMBOL(gss_mech_put);
+EXPORT_SYMBOL_GPL(gss_mech_put);
/* The mech could probably be determined from the token instead, but it's just
* as easy for now to pass it in. */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 81ae3d62a0cc..2278a50c6444 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -332,6 +332,7 @@ struct rsc {
struct svc_cred cred;
struct gss_svc_seq_data seqdata;
struct gss_ctx *mechctx;
+ char *client_name;
};
static struct cache_head *rsc_table[RSC_HASHMAX];
@@ -346,6 +347,7 @@ static void rsc_free(struct rsc *rsci)
gss_delete_sec_context(&rsci->mechctx);
if (rsci->cred.cr_group_info)
put_group_info(rsci->cred.cr_group_info);
+ kfree(rsci->client_name);
}
static void rsc_put(struct kref *ref)
@@ -383,6 +385,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp)
tmp->handle.data = NULL;
new->mechctx = NULL;
new->cred.cr_group_info = NULL;
+ new->client_name = NULL;
}
static void
@@ -397,6 +400,8 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp)
spin_lock_init(&new->seqdata.sd_lock);
new->cred = tmp->cred;
tmp->cred.cr_group_info = NULL;
+ new->client_name = tmp->client_name;
+ tmp->client_name = NULL;
}
static struct cache_head *
@@ -486,6 +491,15 @@ static int rsc_parse(struct cache_detail *cd,
status = gss_import_sec_context(buf, len, gm, &rsci.mechctx);
if (status)
goto out;
+
+ /* get client name */
+ len = qword_get(&mesg, buf, mlen);
+ if (len > 0) {
+ rsci.client_name = kstrdup(buf, GFP_KERNEL);
+ if (!rsci.client_name)
+ goto out;
+ }
+
}
rsci.h.expiry_time = expiry;
rscp = rsc_update(&rsci, rscp);
@@ -746,7 +760,7 @@ u32 svcauth_gss_flavor(struct auth_domain *dom)
return gd->pseudoflavor;
}
-EXPORT_SYMBOL(svcauth_gss_flavor);
+EXPORT_SYMBOL_GPL(svcauth_gss_flavor);
int
svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
@@ -780,7 +794,7 @@ out:
return stat;
}
-EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor);
+EXPORT_SYMBOL_GPL(svcauth_gss_register_pseudoflavor);
static inline int
read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
@@ -913,6 +927,16 @@ struct gss_svc_data {
struct rsc *rsci;
};
+char *svc_gss_principal(struct svc_rqst *rqstp)
+{
+ struct gss_svc_data *gd = (struct gss_svc_data *)rqstp->rq_auth_data;
+
+ if (gd && gd->rsci)
+ return gd->rsci->client_name;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(svc_gss_principal);
+
static int
svcauth_gss_set_client(struct svc_rqst *rqstp)
{
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index da0789fa1b88..836f15c0c4a3 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -197,6 +197,12 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
clnt->cl_rtt = &clnt->cl_rtt_default;
rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
+ clnt->cl_principal = NULL;
+ if (args->client_name) {
+ clnt->cl_principal = kstrdup(args->client_name, GFP_KERNEL);
+ if (!clnt->cl_principal)
+ goto out_no_principal;
+ }
kref_init(&clnt->cl_kref);
@@ -213,10 +219,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
}
/* save the nodename */
- clnt->cl_nodelen = strlen(utsname()->nodename);
+ clnt->cl_nodelen = strlen(init_utsname()->nodename);
if (clnt->cl_nodelen > UNX_MAXNODENAME)
clnt->cl_nodelen = UNX_MAXNODENAME;
- memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen);
+ memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen);
rpc_register_client(clnt);
return clnt;
@@ -226,6 +232,8 @@ out_no_auth:
rpc_put_mount();
}
out_no_path:
+ kfree(clnt->cl_principal);
+out_no_principal:
rpc_free_iostats(clnt->cl_metrics);
out_no_stats:
if (clnt->cl_server != clnt->cl_inline_name)
@@ -271,15 +279,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
case AF_INET: {
struct sockaddr_in *sin =
(struct sockaddr_in *)args->address;
- snprintf(servername, sizeof(servername), NIPQUAD_FMT,
- NIPQUAD(sin->sin_addr.s_addr));
+ snprintf(servername, sizeof(servername), "%pI4",
+ &sin->sin_addr.s_addr);
break;
}
case AF_INET6: {
struct sockaddr_in6 *sin =
(struct sockaddr_in6 *)args->address;
- snprintf(servername, sizeof(servername), NIP6_FMT,
- NIP6(sin->sin6_addr));
+ snprintf(servername, sizeof(servername), "%pI6",
+ &sin->sin6_addr);
break;
}
default:
@@ -354,6 +362,11 @@ rpc_clone_client(struct rpc_clnt *clnt)
new->cl_metrics = rpc_alloc_iostats(clnt);
if (new->cl_metrics == NULL)
goto out_no_stats;
+ if (clnt->cl_principal) {
+ new->cl_principal = kstrdup(clnt->cl_principal, GFP_KERNEL);
+ if (new->cl_principal == NULL)
+ goto out_no_principal;
+ }
kref_init(&new->cl_kref);
err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
if (err != 0)
@@ -366,6 +379,8 @@ rpc_clone_client(struct rpc_clnt *clnt)
rpciod_up();
return new;
out_no_path:
+ kfree(new->cl_principal);
+out_no_principal:
rpc_free_iostats(new->cl_metrics);
out_no_stats:
kfree(new);
@@ -417,6 +432,7 @@ rpc_free_client(struct kref *kref)
out_free:
rpc_unregister_client(clnt);
rpc_free_iostats(clnt->cl_metrics);
+ kfree(clnt->cl_principal);
clnt->cl_metrics = NULL;
xprt_put(clnt->cl_xprt);
rpciod_down();
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 23a2b8f6dc49..192453248870 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -113,7 +113,7 @@ out:
wake_up(&rpci->waitq);
return res;
}
-EXPORT_SYMBOL(rpc_queue_upcall);
+EXPORT_SYMBOL_GPL(rpc_queue_upcall);
static inline void
rpc_inode_setowner(struct inode *inode, void *private)
@@ -126,13 +126,14 @@ rpc_close_pipes(struct inode *inode)
{
struct rpc_inode *rpci = RPC_I(inode);
struct rpc_pipe_ops *ops;
+ int need_release;
mutex_lock(&inode->i_mutex);
ops = rpci->ops;
if (ops != NULL) {
LIST_HEAD(free_list);
-
spin_lock(&inode->i_lock);
+ need_release = rpci->nreaders != 0 || rpci->nwriters != 0;
rpci->nreaders = 0;
list_splice_init(&rpci->in_upcall, &free_list);
list_splice_init(&rpci->pipe, &free_list);
@@ -141,7 +142,7 @@ rpc_close_pipes(struct inode *inode)
spin_unlock(&inode->i_lock);
rpc_purge_list(rpci, &free_list, ops->destroy_msg, -EPIPE);
rpci->nwriters = 0;
- if (ops->release_pipe)
+ if (need_release && ops->release_pipe)
ops->release_pipe(inode);
cancel_delayed_work_sync(&rpci->queue_timeout);
}
@@ -169,16 +170,24 @@ static int
rpc_pipe_open(struct inode *inode, struct file *filp)
{
struct rpc_inode *rpci = RPC_I(inode);
+ int first_open;
int res = -ENXIO;
mutex_lock(&inode->i_mutex);
- if (rpci->ops != NULL) {
- if (filp->f_mode & FMODE_READ)
- rpci->nreaders ++;
- if (filp->f_mode & FMODE_WRITE)
- rpci->nwriters ++;
- res = 0;
+ if (rpci->ops == NULL)
+ goto out;
+ first_open = rpci->nreaders == 0 && rpci->nwriters == 0;
+ if (first_open && rpci->ops->open_pipe) {
+ res = rpci->ops->open_pipe(inode);
+ if (res)
+ goto out;
}
+ if (filp->f_mode & FMODE_READ)
+ rpci->nreaders++;
+ if (filp->f_mode & FMODE_WRITE)
+ rpci->nwriters++;
+ res = 0;
+out:
mutex_unlock(&inode->i_mutex);
return res;
}
@@ -188,6 +197,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
{
struct rpc_inode *rpci = RPC_I(inode);
struct rpc_pipe_msg *msg;
+ int last_close;
mutex_lock(&inode->i_mutex);
if (rpci->ops == NULL)
@@ -214,7 +224,8 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
rpci->ops->destroy_msg, -EAGAIN);
}
}
- if (rpci->ops->release_pipe)
+ last_close = rpci->nwriters == 0 && rpci->nreaders == 0;
+ if (last_close && rpci->ops->release_pipe)
rpci->ops->release_pipe(inode);
out:
mutex_unlock(&inode->i_mutex);
@@ -396,6 +407,7 @@ enum {
RPCAUTH_nfs,
RPCAUTH_portmap,
RPCAUTH_statd,
+ RPCAUTH_nfsd4_cb,
RPCAUTH_RootEOF
};
@@ -429,6 +441,10 @@ static struct rpc_filelist files[] = {
.name = "statd",
.mode = S_IFDIR | S_IRUGO | S_IXUGO,
},
+ [RPCAUTH_nfsd4_cb] = {
+ .name = "nfsd4_cb",
+ .mode = S_IFDIR | S_IRUGO | S_IXUGO,
+ },
};
enum {
@@ -748,7 +764,7 @@ rpc_rmdir(struct dentry *dentry)
* @name: name of pipe
* @private: private data to associate with the pipe, for the caller's use
* @ops: operations defining the behavior of the pipe: upcall, downcall,
- * release_pipe, and destroy_msg.
+ * release_pipe, open_pipe, and destroy_msg.
* @flags: rpc_inode flags
*
* Data is made available for userspace to read by calls to
@@ -808,7 +824,7 @@ err_dput:
-ENOMEM);
goto out;
}
-EXPORT_SYMBOL(rpc_mkpipe);
+EXPORT_SYMBOL_GPL(rpc_mkpipe);
/**
* rpc_unlink - remove a pipe
@@ -839,7 +855,7 @@ rpc_unlink(struct dentry *dentry)
dput(parent);
return error;
}
-EXPORT_SYMBOL(rpc_unlink);
+EXPORT_SYMBOL_GPL(rpc_unlink);
/*
* populate the filesystem
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 34abc91058d8..03ae007641e4 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -270,10 +270,9 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
char buf[32];
/* Construct AF_INET universal address */
- snprintf(buf, sizeof(buf),
- NIPQUAD_FMT".%u.%u",
- NIPQUAD(address_to_register->sin_addr.s_addr),
- port >> 8, port & 0xff);
+ snprintf(buf, sizeof(buf), "%pI4.%u.%u",
+ &address_to_register->sin_addr.s_addr,
+ port >> 8, port & 0xff);
map->r_addr = buf;
dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -305,9 +304,9 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
snprintf(buf, sizeof(buf), "::.%u.%u",
port >> 8, port & 0xff);
else
- snprintf(buf, sizeof(buf), NIP6_FMT".%u.%u",
- NIP6(address_to_register->sin6_addr),
- port >> 8, port & 0xff);
+ snprintf(buf, sizeof(buf), "%pI6.%u.%u",
+ &address_to_register->sin6_addr,
+ port >> 8, port & 0xff);
map->r_addr = buf;
dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -422,8 +421,8 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
struct rpc_clnt *rpcb_clnt;
int status;
- dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n",
- __func__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
+ dprintk("RPC: %s(%pI4, %u, %u, %d)\n",
+ __func__, &sin->sin_addr.s_addr, prog, vers, prot);
rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
sizeof(*sin), prot, RPCBVERS_2);
@@ -460,6 +459,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
return rpc_run_task(&task_setup_data);
}
+/*
+ * In the case where rpc clients have been cloned, we want to make
+ * sure that we use the program number/version etc of the actual
+ * owner of the xprt. To do so, we walk back up the tree of parents
+ * to find whoever created the transport and/or whoever has the
+ * autobind flag set.
+ */
+static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
+{
+ struct rpc_clnt *parent = clnt->cl_parent;
+
+ while (parent != clnt) {
+ if (parent->cl_xprt != clnt->cl_xprt)
+ break;
+ if (clnt->cl_autobind)
+ break;
+ clnt = parent;
+ parent = parent->cl_parent;
+ }
+ return clnt;
+}
+
/**
* rpcb_getport_async - obtain the port for a given RPC service on a given host
* @task: task that is waiting for portmapper request
@@ -469,10 +490,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
*/
void rpcb_getport_async(struct rpc_task *task)
{
- struct rpc_clnt *clnt = task->tk_client;
+ struct rpc_clnt *clnt;
struct rpc_procinfo *proc;
u32 bind_version;
- struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpc_xprt *xprt;
struct rpc_clnt *rpcb_clnt;
static struct rpcbind_args *map;
struct rpc_task *child;
@@ -481,13 +502,13 @@ void rpcb_getport_async(struct rpc_task *task)
size_t salen;
int status;
+ clnt = rpcb_find_transport_owner(task->tk_client);
+ xprt = clnt->cl_xprt;
+
dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
task->tk_pid, __func__,
clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot);
- /* Autobind on cloned rpc clients is discouraged */
- BUG_ON(clnt->cl_parent != clnt);
-
/* Put self on the wait queue to ensure we get notified if
* some other task is already attempting to bind the port */
rpc_sleep_on(&xprt->binding, task, NULL);
@@ -549,7 +570,7 @@ void rpcb_getport_async(struct rpc_task *task)
status = -ENOMEM;
dprintk("RPC: %5u %s: no memory available\n",
task->tk_pid, __func__);
- goto bailout_nofree;
+ goto bailout_release_client;
}
map->r_prog = clnt->cl_prog;
map->r_vers = clnt->cl_vers;
@@ -569,11 +590,13 @@ void rpcb_getport_async(struct rpc_task *task)
task->tk_pid, __func__);
return;
}
- rpc_put_task(child);
- task->tk_xprt->stat.bind_count++;
+ xprt->stat.bind_count++;
+ rpc_put_task(child);
return;
+bailout_release_client:
+ rpc_release_client(rpcb_clnt);
bailout_nofree:
rpcb_wake_rpcbind_waiters(xprt, status);
task->tk_status = status;
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index f24800f2c098..82240e6127b2 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -162,13 +162,9 @@ static void ip_map_request(struct cache_detail *cd,
struct ip_map *im = container_of(h, struct ip_map, h);
if (ipv6_addr_v4mapped(&(im->m_addr))) {
- snprintf(text_addr, 20, NIPQUAD_FMT,
- ntohl(im->m_addr.s6_addr32[3]) >> 24 & 0xff,
- ntohl(im->m_addr.s6_addr32[3]) >> 16 & 0xff,
- ntohl(im->m_addr.s6_addr32[3]) >> 8 & 0xff,
- ntohl(im->m_addr.s6_addr32[3]) >> 0 & 0xff);
+ snprintf(text_addr, 20, "%pI4", &im->m_addr.s6_addr32[3]);
} else {
- snprintf(text_addr, 40, NIP6_FMT, NIP6(im->m_addr));
+ snprintf(text_addr, 40, "%pI6", &im->m_addr);
}
qword_add(bpp, blen, im->m_class);
qword_add(bpp, blen, text_addr);
@@ -208,13 +204,13 @@ static int ip_map_parse(struct cache_detail *cd,
len = qword_get(&mesg, buf, mlen);
if (len <= 0) return -EINVAL;
- if (sscanf(buf, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) == 4) {
+ if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) == 4) {
addr.s6_addr32[0] = 0;
addr.s6_addr32[1] = 0;
addr.s6_addr32[2] = htonl(0xffff);
addr.s6_addr32[3] =
htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
- } else if (sscanf(buf, NIP6_FMT "%c",
+ } else if (sscanf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x%c",
&b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) {
addr.s6_addr16[0] = htons(b1);
addr.s6_addr16[1] = htons(b2);
@@ -278,16 +274,10 @@ static int ip_map_show(struct seq_file *m,
dom = im->m_client->h.name;
if (ipv6_addr_v4mapped(&addr)) {
- seq_printf(m, "%s " NIPQUAD_FMT " %s\n",
- im->m_class,
- ntohl(addr.s6_addr32[3]) >> 24 & 0xff,
- ntohl(addr.s6_addr32[3]) >> 16 & 0xff,
- ntohl(addr.s6_addr32[3]) >> 8 & 0xff,
- ntohl(addr.s6_addr32[3]) >> 0 & 0xff,
- dom);
+ seq_printf(m, "%s %pI4 %s\n",
+ im->m_class, &addr.s6_addr32[3], dom);
} else {
- seq_printf(m, "%s " NIP6_FMT " %s\n",
- im->m_class, NIP6(addr), dom);
+ seq_printf(m, "%s %pI6 %s\n", im->m_class, &addr, dom);
}
return 0;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 95293f549e9c..ef3238d665ee 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -250,10 +250,10 @@ static int one_sock_name(char *buf, struct svc_sock *svsk)
switch(svsk->sk_sk->sk_family) {
case AF_INET:
- len = sprintf(buf, "ipv4 %s %u.%u.%u.%u %d\n",
- svsk->sk_sk->sk_protocol==IPPROTO_UDP?
+ len = sprintf(buf, "ipv4 %s %pI4 %d\n",
+ svsk->sk_sk->sk_protocol == IPPROTO_UDP ?
"udp" : "tcp",
- NIPQUAD(inet_sk(svsk->sk_sk)->rcv_saddr),
+ &inet_sk(svsk->sk_sk)->rcv_saddr,
inet_sk(svsk->sk_sk)->num);
break;
default:
@@ -1183,7 +1183,11 @@ int svc_addsock(struct svc_serv *serv,
else if (so->state > SS_UNCONNECTED)
err = -EISCONN;
else {
- svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS);
+ if (!try_module_get(THIS_MODULE))
+ err = -ENOENT;
+ else
+ svsk = svc_setup_socket(serv, so, &err,
+ SVC_SOCK_DEFAULTS);
if (svsk) {
struct sockaddr_storage addr;
struct sockaddr *sin = (struct sockaddr *)&addr;
@@ -1196,7 +1200,8 @@ int svc_addsock(struct svc_serv *serv,
spin_unlock_bh(&serv->sv_lock);
svc_xprt_received(&svsk->sk_xprt);
err = 0;
- }
+ } else
+ module_put(THIS_MODULE);
}
if (err) {
sockfd_put(so);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 79a55d56cc98..406e26de584e 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -28,7 +28,7 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj)
memcpy(p, obj->data, obj->len);
return p + XDR_QUADLEN(obj->len);
}
-EXPORT_SYMBOL(xdr_encode_netobj);
+EXPORT_SYMBOL_GPL(xdr_encode_netobj);
__be32 *
xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj)
@@ -41,7 +41,7 @@ xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj)
obj->data = (u8 *) p;
return p + XDR_QUADLEN(len);
}
-EXPORT_SYMBOL(xdr_decode_netobj);
+EXPORT_SYMBOL_GPL(xdr_decode_netobj);
/**
* xdr_encode_opaque_fixed - Encode fixed length opaque data
@@ -71,7 +71,7 @@ __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int nbytes)
}
return p;
}
-EXPORT_SYMBOL(xdr_encode_opaque_fixed);
+EXPORT_SYMBOL_GPL(xdr_encode_opaque_fixed);
/**
* xdr_encode_opaque - Encode variable length opaque data
@@ -86,14 +86,14 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
*p++ = htonl(nbytes);
return xdr_encode_opaque_fixed(p, ptr, nbytes);
}
-EXPORT_SYMBOL(xdr_encode_opaque);
+EXPORT_SYMBOL_GPL(xdr_encode_opaque);
__be32 *
xdr_encode_string(__be32 *p, const char *string)
{
return xdr_encode_array(p, string, strlen(string));
}
-EXPORT_SYMBOL(xdr_encode_string);
+EXPORT_SYMBOL_GPL(xdr_encode_string);
__be32 *
xdr_decode_string_inplace(__be32 *p, char **sp,
@@ -108,7 +108,7 @@ xdr_decode_string_inplace(__be32 *p, char **sp,
*sp = (char *) p;
return p + XDR_QUADLEN(len);
}
-EXPORT_SYMBOL(xdr_decode_string_inplace);
+EXPORT_SYMBOL_GPL(xdr_decode_string_inplace);
void
xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
@@ -136,7 +136,7 @@ xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
xdr->buflen += len;
xdr->len += len;
}
-EXPORT_SYMBOL(xdr_encode_pages);
+EXPORT_SYMBOL_GPL(xdr_encode_pages);
void
xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
@@ -158,7 +158,7 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
xdr->buflen += len;
}
-EXPORT_SYMBOL(xdr_inline_pages);
+EXPORT_SYMBOL_GPL(xdr_inline_pages);
/*
* Helper routines for doing 'memmove' like operations on a struct xdr_buf
@@ -428,7 +428,7 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len)
{
xdr_shrink_bufhead(buf, len);
}
-EXPORT_SYMBOL(xdr_shift_buf);
+EXPORT_SYMBOL_GPL(xdr_shift_buf);
/**
* xdr_init_encode - Initialize a struct xdr_stream for sending data.
@@ -465,7 +465,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
iov->iov_len += len;
}
}
-EXPORT_SYMBOL(xdr_init_encode);
+EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
* xdr_reserve_space - Reserve buffer space for sending
@@ -492,7 +492,7 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
xdr->buf->len += nbytes;
return p;
}
-EXPORT_SYMBOL(xdr_reserve_space);
+EXPORT_SYMBOL_GPL(xdr_reserve_space);
/**
* xdr_write_pages - Insert a list of pages into an XDR buffer for sending
@@ -527,7 +527,7 @@ void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int b
buf->buflen += len;
buf->len += len;
}
-EXPORT_SYMBOL(xdr_write_pages);
+EXPORT_SYMBOL_GPL(xdr_write_pages);
/**
* xdr_init_decode - Initialize an xdr_stream for decoding data.
@@ -547,7 +547,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
xdr->p = p;
xdr->end = (__be32 *)((char *)iov->iov_base + len);
}
-EXPORT_SYMBOL(xdr_init_decode);
+EXPORT_SYMBOL_GPL(xdr_init_decode);
/**
* xdr_inline_decode - Retrieve non-page XDR data to decode
@@ -569,7 +569,7 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
xdr->p = q;
return p;
}
-EXPORT_SYMBOL(xdr_inline_decode);
+EXPORT_SYMBOL_GPL(xdr_inline_decode);
/**
* xdr_read_pages - Ensure page-based XDR data to decode is aligned at current pointer position
@@ -613,7 +613,7 @@ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len)
xdr->p = (__be32 *)((char *)iov->iov_base + padding);
xdr->end = (__be32 *)((char *)iov->iov_base + end);
}
-EXPORT_SYMBOL(xdr_read_pages);
+EXPORT_SYMBOL_GPL(xdr_read_pages);
/**
* xdr_enter_page - decode data from the XDR page
@@ -638,7 +638,7 @@ void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
xdr->p = (__be32 *)(kaddr + xdr->buf->page_base);
xdr->end = (__be32 *)((char *)xdr->p + len);
}
-EXPORT_SYMBOL(xdr_enter_page);
+EXPORT_SYMBOL_GPL(xdr_enter_page);
static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0};
@@ -650,7 +650,7 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf)
buf->page_len = 0;
buf->buflen = buf->len = iov->iov_len;
}
-EXPORT_SYMBOL(xdr_buf_from_iov);
+EXPORT_SYMBOL_GPL(xdr_buf_from_iov);
/* Sets subbuf to the portion of buf of length len beginning base bytes
* from the start of buf. Returns -1 if base of length are out of bounds. */
@@ -699,7 +699,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
return -1;
return 0;
}
-EXPORT_SYMBOL(xdr_buf_subsegment);
+EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
{
@@ -730,7 +730,7 @@ int read_bytes_from_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, u
__read_bytes_from_xdr_buf(&subbuf, obj, len);
return 0;
}
-EXPORT_SYMBOL(read_bytes_from_xdr_buf);
+EXPORT_SYMBOL_GPL(read_bytes_from_xdr_buf);
static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
{
@@ -774,7 +774,7 @@ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
*obj = ntohl(raw);
return 0;
}
-EXPORT_SYMBOL(xdr_decode_word);
+EXPORT_SYMBOL_GPL(xdr_decode_word);
int
xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
@@ -783,7 +783,7 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj));
}
-EXPORT_SYMBOL(xdr_encode_word);
+EXPORT_SYMBOL_GPL(xdr_encode_word);
/* If the netobj starting offset bytes from the start of xdr_buf is contained
* entirely in the head or the tail, set object to point to it; otherwise
@@ -821,7 +821,7 @@ int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned in
__read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
return 0;
}
-EXPORT_SYMBOL(xdr_buf_read_netobj);
+EXPORT_SYMBOL_GPL(xdr_buf_read_netobj);
/* Returns 0 on success, or else a negative error code. */
static int
@@ -1027,7 +1027,7 @@ xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
return xdr_xcode_array2(buf, base, desc, 0);
}
-EXPORT_SYMBOL(xdr_decode_array2);
+EXPORT_SYMBOL_GPL(xdr_decode_array2);
int
xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
@@ -1039,7 +1039,7 @@ xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
return xdr_xcode_array2(buf, base, desc, 1);
}
-EXPORT_SYMBOL(xdr_encode_array2);
+EXPORT_SYMBOL_GPL(xdr_encode_array2);
int
xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
@@ -1106,5 +1106,5 @@ xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
out:
return ret;
}
-EXPORT_SYMBOL(xdr_process_buf);
+EXPORT_SYMBOL_GPL(xdr_process_buf);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 99a52aabe332..29e401bb612e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport)
goto out;
}
- result = -EINVAL;
- if (try_module_get(THIS_MODULE)) {
- list_add_tail(&transport->list, &xprt_list);
- printk(KERN_INFO "RPC: Registered %s transport module.\n",
- transport->name);
- result = 0;
- }
+ list_add_tail(&transport->list, &xprt_list);
+ printk(KERN_INFO "RPC: Registered %s transport module.\n",
+ transport->name);
+ result = 0;
out:
spin_unlock(&xprt_list_lock);
@@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport)
"RPC: Unregistered %s transport module.\n",
transport->name);
list_del_init(&transport->list);
- module_put(THIS_MODULE);
goto out;
}
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 5c1954d28d09..14106d26bb95 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
}
if (xdrbuf->tail[0].iov_len) {
+ /* the rpcrdma protocol allows us to omit any trailing
+ * xdr pad bytes, saving the server an RDMA operation. */
+ if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
+ return n;
if (n == nsegs)
return 0;
seg[n].mr_page = NULL;
@@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
if (hdrlen == 0)
return -1;
- dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n"
- " headerp 0x%p base 0x%p lkey 0x%x\n",
+ dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
+ " headerp 0x%p base 0x%p lkey 0x%x\n",
__func__, transfertypes[wtype], hdrlen, rpclen, padlen,
headerp, base, req->rl_iov.lkey);
@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
* Scatter inline received data back into provided iov's.
*/
static void
-rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
+rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
{
int i, npages, curlen, olen;
char *destp;
@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
} else
rqst->rq_rcv_buf.tail[0].iov_len = 0;
+ if (pad) {
+ /* implicit padding on terminal chunk */
+ unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base;
+ while (pad--)
+ p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0;
+ }
+
if (copy_len)
dprintk("RPC: %s: %d bytes in"
" %d extra segments (%d lost)\n",
@@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
struct rpc_xprt *xprt = ep->rep_xprt;
spin_lock_bh(&xprt->transport_lock);
+ if (++xprt->connect_cookie == 0) /* maintain a reserved value */
+ ++xprt->connect_cookie;
if (ep->rep_connected > 0) {
if (!xprt_test_and_set_connected(xprt))
xprt_wake_pending_tasks(xprt, 0);
} else {
if (xprt_test_and_clear_connected(xprt))
- xprt_wake_pending_tasks(xprt, ep->rep_connected);
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
}
spin_unlock_bh(&xprt->transport_lock);
}
@@ -792,14 +805,20 @@ repost:
((unsigned char *)iptr - (unsigned char *)headerp);
status = rep->rr_len + rdmalen;
r_xprt->rx_stats.total_rdma_reply += rdmalen;
+ /* special case - last chunk may omit padding */
+ if (rdmalen &= 3) {
+ rdmalen = 4 - rdmalen;
+ status += rdmalen;
+ }
} else {
/* else ordinary inline */
+ rdmalen = 0;
iptr = (__be32 *)((unsigned char *)headerp + 28);
rep->rr_len -= 28; /*sizeof *headerp;*/
status = rep->rr_len;
}
/* Fix up the rpc results for upper layer */
- rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len);
+ rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
break;
case htonl(RDMA_NOMSG):
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index a4756576d687..629a28764da9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -646,8 +646,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
if (ret > 0) {
/* read-list posted, defer until data received from client. */
- svc_xprt_received(xprt);
- return 0;
+ goto defer;
}
if (ret < 0) {
/* Post of read-list failed, free context. */
@@ -679,6 +678,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
* close bit and call svc_xprt_delete
*/
set_bit(XPT_CLOSE, &xprt->xpt_flags);
+defer:
svc_xprt_received(xprt);
return 0;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 9a7a8e7ae038..a3334e3b73cc 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -69,7 +69,7 @@
* array is only concerned with the reply we are assured that we have
* on extra page for the RPCRMDA header.
*/
-int fast_reg_xdr(struct svcxprt_rdma *xprt,
+static int fast_reg_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct svc_rdma_req_map *vec)
{
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 6fb493cbd29f..3d810e7df3fb 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -61,7 +61,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt);
static void rq_cq_reap(struct svcxprt_rdma *xprt);
static void sq_cq_reap(struct svcxprt_rdma *xprt);
-DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL);
+static DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL);
static DEFINE_SPINLOCK(dto_lock);
static LIST_HEAD(dto_xprt_q);
@@ -827,7 +827,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct rdma_conn_param conn_param;
struct ib_qp_init_attr qp_attr;
struct ib_device_attr devattr;
- int dma_mr_acc;
+ int uninitialized_var(dma_mr_acc);
int need_dma_mr;
int ret;
int i;
@@ -1048,21 +1048,21 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk("svcrdma: new connection %p accepted with the following "
"attributes:\n"
- " local_ip : %d.%d.%d.%d\n"
+ " local_ip : %pI4\n"
" local_port : %d\n"
- " remote_ip : %d.%d.%d.%d\n"
+ " remote_ip : %pI4\n"
" remote_port : %d\n"
" max_sge : %d\n"
" sq_depth : %d\n"
" max_requests : %d\n"
" ord : %d\n",
newxprt,
- NIPQUAD(((struct sockaddr_in *)&newxprt->sc_cm_id->
- route.addr.src_addr)->sin_addr.s_addr),
+ &((struct sockaddr_in *)&newxprt->sc_cm_id->
+ route.addr.src_addr)->sin_addr.s_addr,
ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
route.addr.src_addr)->sin_port),
- NIPQUAD(((struct sockaddr_in *)&newxprt->sc_cm_id->
- route.addr.dst_addr)->sin_addr.s_addr),
+ &((struct sockaddr_in *)&newxprt->sc_cm_id->
+ route.addr.dst_addr)->sin_addr.s_addr,
ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id->
route.addr.dst_addr)->sin_port),
newxprt->sc_max_sge,
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index a564c1a39ec5..1dd6123070e9 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
-#if !RPCRDMA_PERSISTENT_REGISTRATION
-static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */
-#else
-static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
-#endif
+static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
+ int xprt_rdma_pad_optimize = 0;
#ifdef RPC_DEBUG
@@ -140,6 +137,14 @@ static ctl_table xr_tunables_table[] = {
.extra2 = &max_memreg,
},
{
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "rdma_pad_optimize",
+ .data = &xprt_rdma_pad_optimize,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
.ctl_name = 0,
},
};
@@ -169,7 +174,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
buf = kzalloc(20, GFP_KERNEL);
if (buf)
- snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr));
+ snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr);
xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
buf = kzalloc(8, GFP_KERNEL);
@@ -181,8 +186,8 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
buf = kzalloc(48, GFP_KERNEL);
if (buf)
- snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
- NIPQUAD(addr->sin_addr.s_addr),
+ snprintf(buf, 48, "addr=%pI4 port=%u proto=%s",
+ &addr->sin_addr.s_addr,
ntohs(addr->sin_port), "rdma");
xprt->address_strings[RPC_DISPLAY_ALL] = buf;
@@ -199,8 +204,8 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
buf = kzalloc(30, GFP_KERNEL);
if (buf)
- snprintf(buf, 30, NIPQUAD_FMT".%u.%u",
- NIPQUAD(addr->sin_addr.s_addr),
+ snprintf(buf, 30, "%pI4.%u.%u",
+ &addr->sin_addr.s_addr,
ntohs(addr->sin_port) >> 8,
ntohs(addr->sin_port) & 0xff);
xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
@@ -364,8 +369,8 @@ xprt_setup_rdma(struct xprt_create *args)
if (ntohs(sin->sin_port) != 0)
xprt_set_bound(xprt);
- dprintk("RPC: %s: %u.%u.%u.%u:%u\n", __func__,
- NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port));
+ dprintk("RPC: %s: %pI4:%u\n",
+ __func__, &sin->sin_addr.s_addr, ntohs(sin->sin_port));
/* Set max requests */
cdata.max_requests = xprt->max_reqs;
@@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
dprintk("RPC: %s: closing\n", __func__);
+ if (r_xprt->rx_ep.rep_connected > 0)
+ xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt);
(void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
}
@@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task)
/* Reconnect */
schedule_delayed_work(&r_xprt->rdma_connect,
xprt->reestablish_timeout);
+ xprt->reestablish_timeout <<= 1;
+ if (xprt->reestablish_timeout > (30 * HZ))
+ xprt->reestablish_timeout = (30 * HZ);
+ else if (xprt->reestablish_timeout < (5 * HZ))
+ xprt->reestablish_timeout = (5 * HZ);
} else {
schedule_delayed_work(&r_xprt->rdma_connect, 0);
if (!RPC_IS_ASYNC(task))
@@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
}
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
out:
+ req->rl_connect_cookie = 0; /* our reserved value */
return req->rl_xdr_buf;
outfail:
@@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task)
req->rl_reply->rr_xprt = xprt;
}
- if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) {
- xprt_disconnect_done(xprt);
- return -ENOTCONN; /* implies disconnect */
- }
+ /* Must suppress retransmit to maintain credits */
+ if (req->rl_connect_cookie == xprt->connect_cookie)
+ goto drop_connection;
+ req->rl_connect_cookie = xprt->connect_cookie;
+
+ if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
+ goto drop_connection;
+ task->tk_bytes_sent += rqst->rq_snd_buf.len;
rqst->rq_bytes_sent = 0;
return 0;
+
+drop_connection:
+ xprt_disconnect_done(xprt);
+ return -ENOTCONN; /* implies disconnect */
}
static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
@@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void)
{
int rc;
- dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
+ dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n");
#ifdef RPC_DEBUG
if (sunrpc_table_header) {
unregister_sysctl_table(sunrpc_table_header);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 8ea283ecc522..3b21e0cc5e69 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -276,7 +276,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
struct rpcrdma_xprt *xprt = id->context;
struct rpcrdma_ia *ia = &xprt->rx_ia;
struct rpcrdma_ep *ep = &xprt->rx_ep;
+#ifdef RPC_DEBUG
struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
+#endif
struct ib_qp_attr attr;
struct ib_qp_init_attr iattr;
int connstate = 0;
@@ -284,6 +286,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ ia->ri_async_rc = 0;
complete(&ia->ri_done);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
@@ -322,12 +325,11 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DEVICE_REMOVAL:
connstate = -ENODEV;
connected:
- dprintk("RPC: %s: %s: %u.%u.%u.%u:%u"
- " (ep 0x%p event 0x%x)\n",
+ dprintk("RPC: %s: %s: %pI4:%u (ep 0x%p event 0x%x)\n",
__func__,
(event->event <= 11) ? conn[event->event] :
"unknown connection error",
- NIPQUAD(addr->sin_addr.s_addr),
+ &addr->sin_addr.s_addr,
ntohs(addr->sin_port),
ep, event->event);
atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1);
@@ -338,13 +340,31 @@ connected:
wake_up_all(&ep->rep_connect_wait);
break;
default:
- ia->ri_async_rc = -EINVAL;
- dprintk("RPC: %s: unexpected CM event %X\n",
+ dprintk("RPC: %s: unexpected CM event %d\n",
__func__, event->event);
- complete(&ia->ri_done);
break;
}
+#ifdef RPC_DEBUG
+ if (connstate == 1) {
+ int ird = attr.max_dest_rd_atomic;
+ int tird = ep->rep_remote_cma.responder_resources;
+ printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
+ "on %s, memreg %d slots %d ird %d%s\n",
+ &addr->sin_addr.s_addr,
+ ntohs(addr->sin_port),
+ ia->ri_id->device->name,
+ ia->ri_memreg_strategy,
+ xprt->rx_buf.rb_max_requests,
+ ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
+ } else if (connstate < 0) {
+ printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
+ &addr->sin_addr.s_addr,
+ ntohs(addr->sin_port),
+ connstate);
+ }
+#endif
+
return 0;
}
@@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
struct rdma_cm_id *id;
int rc;
+ init_completion(&ia->ri_done);
+
id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP);
if (IS_ERR(id)) {
rc = PTR_ERR(id);
@@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
return id;
}
- ia->ri_async_rc = 0;
+ ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
if (rc) {
dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
__func__, rc);
goto out;
}
- wait_for_completion(&ia->ri_done);
+ wait_for_completion_interruptible_timeout(&ia->ri_done,
+ msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
rc = ia->ri_async_rc;
if (rc)
goto out;
- ia->ri_async_rc = 0;
+ ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
if (rc) {
dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
__func__, rc);
goto out;
}
- wait_for_completion(&ia->ri_done);
+ wait_for_completion_interruptible_timeout(&ia->ri_done,
+ msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
rc = ia->ri_async_rc;
if (rc)
goto out;
@@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq)
int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
{
- int rc;
+ int rc, mem_priv;
+ struct ib_device_attr devattr;
struct rpcrdma_ia *ia = &xprt->rx_ia;
- init_completion(&ia->ri_done);
-
ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
if (IS_ERR(ia->ri_id)) {
rc = PTR_ERR(ia->ri_id);
@@ -443,6 +466,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
}
/*
+ * Query the device to determine if the requested memory
+ * registration strategy is supported. If it isn't, set the
+ * strategy to a globally supported model.
+ */
+ rc = ib_query_device(ia->ri_id->device, &devattr);
+ if (rc) {
+ dprintk("RPC: %s: ib_query_device failed %d\n",
+ __func__, rc);
+ goto out2;
+ }
+
+ if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
+ ia->ri_have_dma_lkey = 1;
+ ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
+ }
+
+ switch (memreg) {
+ case RPCRDMA_MEMWINDOWS:
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
+ dprintk("RPC: %s: MEMWINDOWS registration "
+ "specified but not supported by adapter, "
+ "using slower RPCRDMA_REGISTER\n",
+ __func__);
+ memreg = RPCRDMA_REGISTER;
+ }
+ break;
+ case RPCRDMA_MTHCAFMR:
+ if (!ia->ri_id->device->alloc_fmr) {
+#if RPCRDMA_PERSISTENT_REGISTRATION
+ dprintk("RPC: %s: MTHCAFMR registration "
+ "specified but not supported by adapter, "
+ "using riskier RPCRDMA_ALLPHYSICAL\n",
+ __func__);
+ memreg = RPCRDMA_ALLPHYSICAL;
+#else
+ dprintk("RPC: %s: MTHCAFMR registration "
+ "specified but not supported by adapter, "
+ "using slower RPCRDMA_REGISTER\n",
+ __func__);
+ memreg = RPCRDMA_REGISTER;
+#endif
+ }
+ break;
+ case RPCRDMA_FRMR:
+ /* Requires both frmr reg and local dma lkey */
+ if ((devattr.device_cap_flags &
+ (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
+ (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
+#if RPCRDMA_PERSISTENT_REGISTRATION
+ dprintk("RPC: %s: FRMR registration "
+ "specified but not supported by adapter, "
+ "using riskier RPCRDMA_ALLPHYSICAL\n",
+ __func__);
+ memreg = RPCRDMA_ALLPHYSICAL;
+#else
+ dprintk("RPC: %s: FRMR registration "
+ "specified but not supported by adapter, "
+ "using slower RPCRDMA_REGISTER\n",
+ __func__);
+ memreg = RPCRDMA_REGISTER;
+#endif
+ }
+ break;
+ }
+
+ /*
* Optionally obtain an underlying physical identity mapping in
* order to do a memory window-based bind. This base registration
* is protected from remote access - that is enabled only by binding
@@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
* revoked after the corresponding completion similar to a storage
* adapter.
*/
- if (memreg > RPCRDMA_REGISTER) {
- int mem_priv = IB_ACCESS_LOCAL_WRITE;
- switch (memreg) {
+ switch (memreg) {
+ case RPCRDMA_BOUNCEBUFFERS:
+ case RPCRDMA_REGISTER:
+ case RPCRDMA_FRMR:
+ break;
#if RPCRDMA_PERSISTENT_REGISTRATION
- case RPCRDMA_ALLPHYSICAL:
- mem_priv |= IB_ACCESS_REMOTE_WRITE;
- mem_priv |= IB_ACCESS_REMOTE_READ;
- break;
+ case RPCRDMA_ALLPHYSICAL:
+ mem_priv = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ;
+ goto register_setup;
#endif
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- mem_priv |= IB_ACCESS_MW_BIND;
- break;
- default:
+ case RPCRDMA_MEMWINDOWS_ASYNC:
+ case RPCRDMA_MEMWINDOWS:
+ mem_priv = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_MW_BIND;
+ goto register_setup;
+ case RPCRDMA_MTHCAFMR:
+ if (ia->ri_have_dma_lkey)
break;
- }
+ mem_priv = IB_ACCESS_LOCAL_WRITE;
+ register_setup:
ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
if (IS_ERR(ia->ri_bind_mem)) {
printk(KERN_ALERT "%s: ib_get_dma_mr for "
@@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
memreg = RPCRDMA_REGISTER;
ia->ri_bind_mem = NULL;
}
+ break;
+ default:
+ printk(KERN_ERR "%s: invalid memory registration mode %d\n",
+ __func__, memreg);
+ rc = -EINVAL;
+ goto out2;
}
+ dprintk("RPC: %s: memory registration strategy is %d\n",
+ __func__, memreg);
/* Else will do memory reg/dereg for each chunk */
ia->ri_memreg_strategy = memreg;
@@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
return 0;
out2:
rdma_destroy_id(ia->ri_id);
+ ia->ri_id = NULL;
out1:
return rc;
}
@@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
dprintk("RPC: %s: ib_dereg_mr returned %i\n",
__func__, rc);
}
- if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp)
- rdma_destroy_qp(ia->ri_id);
+ if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
+ if (ia->ri_id->qp)
+ rdma_destroy_qp(ia->ri_id);
+ rdma_destroy_id(ia->ri_id);
+ ia->ri_id = NULL;
+ }
if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
rc = ib_dealloc_pd(ia->ri_pd);
dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
__func__, rc);
}
- if (ia->ri_id != NULL && !IS_ERR(ia->ri_id))
- rdma_destroy_id(ia->ri_id);
}
/*
@@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ /* Add room for frmr register and invalidate WRs */
+ ep->rep_attr.cap.max_send_wr *= 3;
+ if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
+ return -EINVAL;
+ break;
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
/* Add room for mw_binds+unbinds - overkill! */
@@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_remote_cma.private_data_len = 0;
/* Client offers RDMA Read but does not initiate */
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_BOUNCEBUFFERS:
+ ep->rep_remote_cma.initiator_depth = 0;
+ if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS)
ep->rep_remote_cma.responder_resources = 0;
- break;
- case RPCRDMA_MTHCAFMR:
- case RPCRDMA_REGISTER:
- ep->rep_remote_cma.responder_resources = cdata->max_requests *
- (RPCRDMA_MAX_DATA_SEGS / 8);
- break;
- case RPCRDMA_MEMWINDOWS:
- case RPCRDMA_MEMWINDOWS_ASYNC:
-#if RPCRDMA_PERSISTENT_REGISTRATION
- case RPCRDMA_ALLPHYSICAL:
-#endif
- ep->rep_remote_cma.responder_resources = cdata->max_requests *
- (RPCRDMA_MAX_DATA_SEGS / 2);
- break;
- default:
- break;
- }
- if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom)
+ else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
+ ep->rep_remote_cma.responder_resources = 32;
+ else
ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
- ep->rep_remote_cma.initiator_depth = 0;
ep->rep_remote_cma.retry_count = 7;
ep->rep_remote_cma.flow_control = 0;
@@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
if (rc)
dprintk("RPC: %s: rpcrdma_ep_disconnect"
" returned %i\n", __func__, rc);
+ rdma_destroy_qp(ia->ri_id);
+ ia->ri_id->qp = NULL;
}
- ep->rep_func = NULL;
-
/* padding - could be done in rpcrdma_buffer_destroy... */
if (ep->rep_pad_mr) {
rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
ep->rep_pad_mr = NULL;
}
- if (ia->ri_id->qp) {
- rdma_destroy_qp(ia->ri_id);
- ia->ri_id->qp = NULL;
- }
-
rpcrdma_clean_cq(ep->rep_cq);
rc = ib_destroy_cq(ep->rep_cq);
if (rc)
@@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
struct rdma_cm_id *id;
int rc = 0;
int retry_count = 0;
- int reconnect = (ep->rep_connected != 0);
- if (reconnect) {
+ if (ep->rep_connected != 0) {
struct rpcrdma_xprt *xprt;
retry:
rc = rpcrdma_ep_disconnect(ep, ia);
@@ -745,6 +836,7 @@ retry:
goto out;
}
/* END TEMP */
+ rdma_destroy_qp(ia->ri_id);
rdma_destroy_id(ia->ri_id);
ia->ri_id = id;
}
@@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
}
}
- /* Theoretically a client initiator_depth > 0 is not needed,
- * but many peers fail to complete the connection unless they
- * == responder_resources! */
- if (ep->rep_remote_cma.initiator_depth !=
- ep->rep_remote_cma.responder_resources)
- ep->rep_remote_cma.initiator_depth =
- ep->rep_remote_cma.responder_resources;
-
ep->rep_connected = 0;
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
goto out;
}
- if (reconnect)
- return 0;
-
wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
/*
@@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
if (ep->rep_connected <= 0) {
/* Sometimes, the only way to reliably connect to remote
* CMs is to use same nonzero values for ORD and IRD. */
- ep->rep_remote_cma.initiator_depth =
- ep->rep_remote_cma.responder_resources;
- if (ep->rep_remote_cma.initiator_depth == 0)
- ++ep->rep_remote_cma.initiator_depth;
- if (ep->rep_remote_cma.responder_resources == 0)
- ++ep->rep_remote_cma.responder_resources;
- if (retry_count++ == 0)
+ if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
+ (ep->rep_remote_cma.responder_resources == 0 ||
+ ep->rep_remote_cma.initiator_depth !=
+ ep->rep_remote_cma.responder_resources)) {
+ if (ep->rep_remote_cma.responder_resources == 0)
+ ep->rep_remote_cma.responder_resources = 1;
+ ep->rep_remote_cma.initiator_depth =
+ ep->rep_remote_cma.responder_resources;
goto retry;
+ }
rc = ep->rep_connected;
} else {
dprintk("RPC: %s: connected\n", __func__);
@@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
char *p;
size_t len;
int i, rc;
+ struct rpcrdma_mw *r;
buf->rb_max_requests = cdata->max_requests;
spin_lock_init(&buf->rb_lock);
@@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* 2. arrays of struct rpcrdma_req to fill in pointers
* 3. array of struct rpcrdma_rep for replies
* 4. padding, if any
- * 5. mw's, if any
+ * 5. mw's, fmr's or frmr's, if any
* Send/recv buffers in req/rep need to be registered
*/
@@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
(sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
len += cdata->padding;
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
+ sizeof(struct rpcrdma_mw);
+ break;
case RPCRDMA_MTHCAFMR:
/* TBD we are perhaps overallocating here */
len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
@@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* and also reduce unbind-to-bind collision.
*/
INIT_LIST_HEAD(&buf->rb_mws);
+ r = (struct rpcrdma_mw *)p;
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
+ r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
+ RPCRDMA_MAX_SEGS);
+ if (IS_ERR(r->r.frmr.fr_mr)) {
+ rc = PTR_ERR(r->r.frmr.fr_mr);
+ dprintk("RPC: %s: ib_alloc_fast_reg_mr"
+ " failed %i\n", __func__, rc);
+ goto out;
+ }
+ r->r.frmr.fr_pgl =
+ ib_alloc_fast_reg_page_list(ia->ri_id->device,
+ RPCRDMA_MAX_SEGS);
+ if (IS_ERR(r->r.frmr.fr_pgl)) {
+ rc = PTR_ERR(r->r.frmr.fr_pgl);
+ dprintk("RPC: %s: "
+ "ib_alloc_fast_reg_page_list "
+ "failed %i\n", __func__, rc);
+ goto out;
+ }
+ list_add(&r->mw_list, &buf->rb_mws);
+ ++r;
+ }
+ break;
case RPCRDMA_MTHCAFMR:
- {
- struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
- struct ib_fmr_attr fa = {
- RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT
- };
/* TBD we are perhaps overallocating here */
for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
+ static struct ib_fmr_attr fa =
+ { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
r->r.fmr = ib_alloc_fmr(ia->ri_pd,
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
&fa);
@@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
list_add(&r->mw_list, &buf->rb_mws);
++r;
}
- }
break;
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
- {
- struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
/* Allocate one extra request's worth, for full cycling */
for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
r->r.mw = ib_alloc_mw(ia->ri_pd);
@@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
list_add(&r->mw_list, &buf->rb_mws);
++r;
}
- }
break;
default:
break;
@@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
int rc, i;
struct rpcrdma_ia *ia = rdmab_to_ia(buf);
+ struct rpcrdma_mw *r;
/* clean up in reverse order from create
* 1. recv mr memory (mr free, then kfree)
@@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
}
if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
while (!list_empty(&buf->rb_mws)) {
- struct rpcrdma_mw *r;
r = list_entry(buf->rb_mws.next,
struct rpcrdma_mw, mw_list);
list_del(&r->mw_list);
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ rc = ib_dereg_mr(r->r.frmr.fr_mr);
+ if (rc)
+ dprintk("RPC: %s:"
+ " ib_dereg_mr"
+ " failed %i\n",
+ __func__, rc);
+ ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+ break;
case RPCRDMA_MTHCAFMR:
rc = ib_dealloc_fmr(r->r.fmr);
if (rc)
@@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
{
struct rpcrdma_req *req;
unsigned long flags;
+ int i;
+ struct rpcrdma_mw *r;
spin_lock_irqsave(&buffers->rb_lock, flags);
if (buffers->rb_send_index == buffers->rb_max_requests) {
@@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
}
buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
if (!list_empty(&buffers->rb_mws)) {
- int i = RPCRDMA_MAX_SEGS - 1;
+ i = RPCRDMA_MAX_SEGS - 1;
do {
- struct rpcrdma_mw *r;
r = list_entry(buffers->rb_mws.next,
struct rpcrdma_mw, mw_list);
list_del(&r->mw_list);
@@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
req->rl_reply = NULL;
}
switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
case RPCRDMA_MTHCAFMR:
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
@@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
va, len, DMA_BIDIRECTIONAL);
iov->length = len;
- if (ia->ri_bind_mem != NULL) {
+ if (ia->ri_have_dma_lkey) {
+ *mrp = NULL;
+ iov->lkey = ia->ri_dma_lkey;
+ return 0;
+ } else if (ia->ri_bind_mem != NULL) {
*mrp = NULL;
iov->lkey = ia->ri_bind_mem->lkey;
return 0;
@@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
}
+static int
+rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
+ int *nsegs, int writing, struct rpcrdma_ia *ia,
+ struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_mr_seg *seg1 = seg;
+ struct ib_send_wr frmr_wr, *bad_wr;
+ u8 key;
+ int len, pageoff;
+ int i, rc;
+
+ pageoff = offset_in_page(seg1->mr_offset);
+ seg1->mr_offset -= pageoff; /* start of page */
+ seg1->mr_len += pageoff;
+ len = -pageoff;
+ if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
+ *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ for (i = 0; i < *nsegs;) {
+ rpcrdma_map_one(ia, seg, writing);
+ seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
+ len += seg->mr_len;
+ ++seg;
+ ++i;
+ /* Check for holes */
+ if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
+ offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+ break;
+ }
+ dprintk("RPC: %s: Using frmr %p to map %d segments\n",
+ __func__, seg1->mr_chunk.rl_mw, i);
+
+ /* Bump the key */
+ key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
+
+ /* Prepare FRMR WR */
+ memset(&frmr_wr, 0, sizeof frmr_wr);
+ frmr_wr.opcode = IB_WR_FAST_REG_MR;
+ frmr_wr.send_flags = 0; /* unsignaled */
+ frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma;
+ frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
+ frmr_wr.wr.fast_reg.page_list_len = i;
+ frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
+ frmr_wr.wr.fast_reg.access_flags = (writing ?
+ IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ);
+ frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+
+ rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr);
+
+ if (rc) {
+ dprintk("RPC: %s: failed ib_post_send for register,"
+ " status %i\n", __func__, rc);
+ while (i--)
+ rpcrdma_unmap_one(ia, --seg);
+ } else {
+ seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+ seg1->mr_base = seg1->mr_dma + pageoff;
+ seg1->mr_nsegs = i;
+ seg1->mr_len = len;
+ }
+ *nsegs = i;
+ return rc;
+}
+
+static int
+rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
+ struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
+{
+ struct rpcrdma_mr_seg *seg1 = seg;
+ struct ib_send_wr invalidate_wr, *bad_wr;
+ int rc;
+
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+
+ memset(&invalidate_wr, 0, sizeof invalidate_wr);
+ invalidate_wr.opcode = IB_WR_LOCAL_INV;
+ invalidate_wr.send_flags = 0; /* unsignaled */
+ invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+
+ rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
+ if (rc)
+ dprintk("RPC: %s: failed ib_post_send for invalidate,"
+ " status %i\n", __func__, rc);
+ return rc;
+}
+
+static int
+rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
+ int *nsegs, int writing, struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_mr_seg *seg1 = seg;
+ u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
+ int len, pageoff, i, rc;
+
+ pageoff = offset_in_page(seg1->mr_offset);
+ seg1->mr_offset -= pageoff; /* start of page */
+ seg1->mr_len += pageoff;
+ len = -pageoff;
+ if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
+ *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ for (i = 0; i < *nsegs;) {
+ rpcrdma_map_one(ia, seg, writing);
+ physaddrs[i] = seg->mr_dma;
+ len += seg->mr_len;
+ ++seg;
+ ++i;
+ /* Check for holes */
+ if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
+ offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+ break;
+ }
+ rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
+ physaddrs, i, seg1->mr_dma);
+ if (rc) {
+ dprintk("RPC: %s: failed ib_map_phys_fmr "
+ "%u@0x%llx+%i (%d)... status %i\n", __func__,
+ len, (unsigned long long)seg1->mr_dma,
+ pageoff, i, rc);
+ while (i--)
+ rpcrdma_unmap_one(ia, --seg);
+ } else {
+ seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
+ seg1->mr_base = seg1->mr_dma + pageoff;
+ seg1->mr_nsegs = i;
+ seg1->mr_len = len;
+ }
+ *nsegs = i;
+ return rc;
+}
+
+static int
+rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
+ struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_mr_seg *seg1 = seg;
+ LIST_HEAD(l);
+ int rc;
+
+ list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
+ rc = ib_unmap_fmr(&l);
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+ if (rc)
+ dprintk("RPC: %s: failed ib_unmap_fmr,"
+ " status %i\n", __func__, rc);
+ return rc;
+}
+
+static int
+rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
+ int *nsegs, int writing, struct rpcrdma_ia *ia,
+ struct rpcrdma_xprt *r_xprt)
+{
+ int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
+ IB_ACCESS_REMOTE_READ);
+ struct ib_mw_bind param;
+ int rc;
+
+ *nsegs = 1;
+ rpcrdma_map_one(ia, seg, writing);
+ param.mr = ia->ri_bind_mem;
+ param.wr_id = 0ULL; /* no send cookie */
+ param.addr = seg->mr_dma;
+ param.length = seg->mr_len;
+ param.send_flags = 0;
+ param.mw_access_flags = mem_priv;
+
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+ rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
+ if (rc) {
+ dprintk("RPC: %s: failed ib_bind_mw "
+ "%u@0x%llx status %i\n",
+ __func__, seg->mr_len,
+ (unsigned long long)seg->mr_dma, rc);
+ rpcrdma_unmap_one(ia, seg);
+ } else {
+ seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
+ seg->mr_base = param.addr;
+ seg->mr_nsegs = 1;
+ }
+ return rc;
+}
+
+static int
+rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
+ struct rpcrdma_ia *ia,
+ struct rpcrdma_xprt *r_xprt, void **r)
+{
+ struct ib_mw_bind param;
+ LIST_HEAD(l);
+ int rc;
+
+ BUG_ON(seg->mr_nsegs != 1);
+ param.mr = ia->ri_bind_mem;
+ param.addr = 0ULL; /* unbind */
+ param.length = 0;
+ param.mw_access_flags = 0;
+ if (*r) {
+ param.wr_id = (u64) (unsigned long) *r;
+ param.send_flags = IB_SEND_SIGNALED;
+ INIT_CQCOUNT(&r_xprt->rx_ep);
+ } else {
+ param.wr_id = 0ULL;
+ param.send_flags = 0;
+ DECR_CQCOUNT(&r_xprt->rx_ep);
+ }
+ rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
+ rpcrdma_unmap_one(ia, seg);
+ if (rc)
+ dprintk("RPC: %s: failed ib_(un)bind_mw,"
+ " status %i\n", __func__, rc);
+ else
+ *r = NULL; /* will upcall on completion */
+ return rc;
+}
+
+static int
+rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
+ int *nsegs, int writing, struct rpcrdma_ia *ia)
+{
+ int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
+ IB_ACCESS_REMOTE_READ);
+ struct rpcrdma_mr_seg *seg1 = seg;
+ struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
+ int len, i, rc = 0;
+
+ if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
+ *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ for (len = 0, i = 0; i < *nsegs;) {
+ rpcrdma_map_one(ia, seg, writing);
+ ipb[i].addr = seg->mr_dma;
+ ipb[i].size = seg->mr_len;
+ len += seg->mr_len;
+ ++seg;
+ ++i;
+ /* Check for holes */
+ if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
+ offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
+ break;
+ }
+ seg1->mr_base = seg1->mr_dma;
+ seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
+ ipb, i, mem_priv, &seg1->mr_base);
+ if (IS_ERR(seg1->mr_chunk.rl_mr)) {
+ rc = PTR_ERR(seg1->mr_chunk.rl_mr);
+ dprintk("RPC: %s: failed ib_reg_phys_mr "
+ "%u@0x%llx (%d)... status %i\n",
+ __func__, len,
+ (unsigned long long)seg1->mr_dma, i, rc);
+ while (i--)
+ rpcrdma_unmap_one(ia, --seg);
+ } else {
+ seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
+ seg1->mr_nsegs = i;
+ seg1->mr_len = len;
+ }
+ *nsegs = i;
+ return rc;
+}
+
+static int
+rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
+ struct rpcrdma_ia *ia)
+{
+ struct rpcrdma_mr_seg *seg1 = seg;
+ int rc;
+
+ rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
+ seg1->mr_chunk.rl_mr = NULL;
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+ if (rc)
+ dprintk("RPC: %s: failed ib_dereg_mr,"
+ " status %i\n", __func__, rc);
+ return rc;
+}
+
int
rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
- IB_ACCESS_REMOTE_READ);
- struct rpcrdma_mr_seg *seg1 = seg;
- int i;
int rc = 0;
switch (ia->ri_memreg_strategy) {
@@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
break;
#endif
- /* Registration using fast memory registration */
+ /* Registration using frmr registration */
+ case RPCRDMA_FRMR:
+ rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
+ break;
+
+ /* Registration using fmr memory registration */
case RPCRDMA_MTHCAFMR:
- {
- u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
- int len, pageoff = offset_in_page(seg->mr_offset);
- seg1->mr_offset -= pageoff; /* start of page */
- seg1->mr_len += pageoff;
- len = -pageoff;
- if (nsegs > RPCRDMA_MAX_DATA_SEGS)
- nsegs = RPCRDMA_MAX_DATA_SEGS;
- for (i = 0; i < nsegs;) {
- rpcrdma_map_one(ia, seg, writing);
- physaddrs[i] = seg->mr_dma;
- len += seg->mr_len;
- ++seg;
- ++i;
- /* Check for holes */
- if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
- offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
- break;
- }
- nsegs = i;
- rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
- physaddrs, nsegs, seg1->mr_dma);
- if (rc) {
- dprintk("RPC: %s: failed ib_map_phys_fmr "
- "%u@0x%llx+%i (%d)... status %i\n", __func__,
- len, (unsigned long long)seg1->mr_dma,
- pageoff, nsegs, rc);
- while (nsegs--)
- rpcrdma_unmap_one(ia, --seg);
- } else {
- seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
- seg1->mr_base = seg1->mr_dma + pageoff;
- seg1->mr_nsegs = nsegs;
- seg1->mr_len = len;
- }
- }
+ rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
break;
/* Registration using memory windows */
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
- {
- struct ib_mw_bind param;
- rpcrdma_map_one(ia, seg, writing);
- param.mr = ia->ri_bind_mem;
- param.wr_id = 0ULL; /* no send cookie */
- param.addr = seg->mr_dma;
- param.length = seg->mr_len;
- param.send_flags = 0;
- param.mw_access_flags = mem_priv;
-
- DECR_CQCOUNT(&r_xprt->rx_ep);
- rc = ib_bind_mw(ia->ri_id->qp,
- seg->mr_chunk.rl_mw->r.mw, &param);
- if (rc) {
- dprintk("RPC: %s: failed ib_bind_mw "
- "%u@0x%llx status %i\n",
- __func__, seg->mr_len,
- (unsigned long long)seg->mr_dma, rc);
- rpcrdma_unmap_one(ia, seg);
- } else {
- seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
- seg->mr_base = param.addr;
- seg->mr_nsegs = 1;
- nsegs = 1;
- }
- }
+ rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
break;
/* Default registration each time */
default:
- {
- struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
- int len = 0;
- if (nsegs > RPCRDMA_MAX_DATA_SEGS)
- nsegs = RPCRDMA_MAX_DATA_SEGS;
- for (i = 0; i < nsegs;) {
- rpcrdma_map_one(ia, seg, writing);
- ipb[i].addr = seg->mr_dma;
- ipb[i].size = seg->mr_len;
- len += seg->mr_len;
- ++seg;
- ++i;
- /* Check for holes */
- if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
- offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
- break;
- }
- nsegs = i;
- seg1->mr_base = seg1->mr_dma;
- seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
- ipb, nsegs, mem_priv, &seg1->mr_base);
- if (IS_ERR(seg1->mr_chunk.rl_mr)) {
- rc = PTR_ERR(seg1->mr_chunk.rl_mr);
- dprintk("RPC: %s: failed ib_reg_phys_mr "
- "%u@0x%llx (%d)... status %i\n",
- __func__, len,
- (unsigned long long)seg1->mr_dma, nsegs, rc);
- while (nsegs--)
- rpcrdma_unmap_one(ia, --seg);
- } else {
- seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
- seg1->mr_nsegs = nsegs;
- seg1->mr_len = len;
- }
- }
+ rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
break;
}
if (rc)
@@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
struct rpcrdma_xprt *r_xprt, void *r)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct rpcrdma_mr_seg *seg1 = seg;
int nsegs = seg->mr_nsegs, rc;
switch (ia->ri_memreg_strategy) {
@@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
break;
#endif
+ case RPCRDMA_FRMR:
+ rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
+ break;
+
case RPCRDMA_MTHCAFMR:
- {
- LIST_HEAD(l);
- list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l);
- rc = ib_unmap_fmr(&l);
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia, seg++);
- }
- if (rc)
- dprintk("RPC: %s: failed ib_unmap_fmr,"
- " status %i\n", __func__, rc);
+ rc = rpcrdma_deregister_fmr_external(seg, ia);
break;
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
- {
- struct ib_mw_bind param;
- BUG_ON(nsegs != 1);
- param.mr = ia->ri_bind_mem;
- param.addr = 0ULL; /* unbind */
- param.length = 0;
- param.mw_access_flags = 0;
- if (r) {
- param.wr_id = (u64) (unsigned long) r;
- param.send_flags = IB_SEND_SIGNALED;
- INIT_CQCOUNT(&r_xprt->rx_ep);
- } else {
- param.wr_id = 0ULL;
- param.send_flags = 0;
- DECR_CQCOUNT(&r_xprt->rx_ep);
- }
- rc = ib_bind_mw(ia->ri_id->qp,
- seg->mr_chunk.rl_mw->r.mw, &param);
- rpcrdma_unmap_one(ia, seg);
- }
- if (rc)
- dprintk("RPC: %s: failed ib_(un)bind_mw,"
- " status %i\n", __func__, rc);
- else
- r = NULL; /* will upcall on completion */
+ rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r);
break;
default:
- rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
- seg1->mr_chunk.rl_mr = NULL;
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia, seg++);
- if (rc)
- dprintk("RPC: %s: failed ib_dereg_mr,"
- " status %i\n", __func__, rc);
+ rc = rpcrdma_deregister_default_external(seg, ia);
break;
}
if (r) {
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 2427822f8bd4..c7a7eba991bc 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -51,6 +51,9 @@
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
+#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
+#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
+
/*
* Interface Adapter -- one per transport instance
*/
@@ -58,6 +61,8 @@ struct rpcrdma_ia {
struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd;
struct ib_mr *ri_bind_mem;
+ u32 ri_dma_lkey;
+ int ri_have_dma_lkey;
struct completion ri_done;
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
@@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
union {
struct ib_mw *mw;
struct ib_fmr *fmr;
+ struct {
+ struct ib_fast_reg_page_list *fr_pgl;
+ struct ib_mr *fr_mr;
+ } frmr;
} r;
struct list_head mw_list;
} *rl_mw;
@@ -175,6 +184,7 @@ struct rpcrdma_req {
size_t rl_size; /* actual length of buffer */
unsigned int rl_niovs; /* 0, 2 or 4 */
unsigned int rl_nchunks; /* non-zero if chunks */
+ unsigned int rl_connect_cookie; /* retry detection */
struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
@@ -198,7 +208,7 @@ struct rpcrdma_buffer {
atomic_t rb_credits; /* most recent server credits */
unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */
int rb_max_requests;/* client max requests */
- struct list_head rb_mws; /* optional memory windows/fmrs */
+ struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
int rb_send_index;
struct rpcrdma_req **rb_send_bufs;
int rb_recv_index;
@@ -273,6 +283,11 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
+/* Setting this to 0 ensures interoperability with early servers.
+ * Setting this to 1 enhances certain unaligned read/write performance.
+ * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
+extern int xprt_rdma_pad_optimize;
+
/*
* Interface Adapter calls - xprtrdma/verbs.c
*/
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9a288d5eea64..5cbb404c4cdf 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -249,6 +249,7 @@ struct sock_xprt {
void (*old_data_ready)(struct sock *, int);
void (*old_state_change)(struct sock *);
void (*old_write_space)(struct sock *);
+ void (*old_error_report)(struct sock *);
};
/*
@@ -283,8 +284,7 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt,
buf = kzalloc(20, GFP_KERNEL);
if (buf) {
- snprintf(buf, 20, NIPQUAD_FMT,
- NIPQUAD(addr->sin_addr.s_addr));
+ snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr);
}
xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
@@ -299,8 +299,8 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt,
buf = kzalloc(48, GFP_KERNEL);
if (buf) {
- snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
- NIPQUAD(addr->sin_addr.s_addr),
+ snprintf(buf, 48, "addr=%pI4 port=%u proto=%s",
+ &addr->sin_addr.s_addr,
ntohs(addr->sin_port),
protocol);
}
@@ -322,8 +322,8 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt,
buf = kzalloc(30, GFP_KERNEL);
if (buf) {
- snprintf(buf, 30, NIPQUAD_FMT".%u.%u",
- NIPQUAD(addr->sin_addr.s_addr),
+ snprintf(buf, 30, "%pI4.%u.%u",
+ &addr->sin_addr.s_addr,
ntohs(addr->sin_port) >> 8,
ntohs(addr->sin_port) & 0xff);
}
@@ -341,8 +341,7 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt,
buf = kzalloc(40, GFP_KERNEL);
if (buf) {
- snprintf(buf, 40, NIP6_FMT,
- NIP6(addr->sin6_addr));
+ snprintf(buf, 40, "%pI6",&addr->sin6_addr);
}
xprt->address_strings[RPC_DISPLAY_ADDR] = buf;
@@ -357,18 +356,17 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt,
buf = kzalloc(64, GFP_KERNEL);
if (buf) {
- snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s",
- NIP6(addr->sin6_addr),
+ snprintf(buf, 64, "addr=%pI6 port=%u proto=%s",
+ &addr->sin6_addr,
ntohs(addr->sin6_port),
protocol);
}
xprt->address_strings[RPC_DISPLAY_ALL] = buf;
buf = kzalloc(36, GFP_KERNEL);
- if (buf) {
- snprintf(buf, 36, NIP6_SEQFMT,
- NIP6(addr->sin6_addr));
- }
+ if (buf)
+ snprintf(buf, 36, "%pi6", &addr->sin6_addr);
+
xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf;
buf = kzalloc(8, GFP_KERNEL);
@@ -380,10 +378,10 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt,
buf = kzalloc(50, GFP_KERNEL);
if (buf) {
- snprintf(buf, 50, NIP6_FMT".%u.%u",
- NIP6(addr->sin6_addr),
- ntohs(addr->sin6_port) >> 8,
- ntohs(addr->sin6_port) & 0xff);
+ snprintf(buf, 50, "%pI6.%u.%u",
+ &addr->sin6_addr,
+ ntohs(addr->sin6_port) >> 8,
+ ntohs(addr->sin6_port) & 0xff);
}
xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
@@ -698,8 +696,9 @@ static int xs_tcp_send_request(struct rpc_task *task)
case -EAGAIN:
xs_nospace(task);
break;
- case -ECONNREFUSED:
case -ECONNRESET:
+ xs_tcp_shutdown(xprt);
+ case -ECONNREFUSED:
case -ENOTCONN:
case -EPIPE:
status = -ENOTCONN;
@@ -742,6 +741,22 @@ out_release:
xprt_release_xprt(xprt, task);
}
+static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk)
+{
+ transport->old_data_ready = sk->sk_data_ready;
+ transport->old_state_change = sk->sk_state_change;
+ transport->old_write_space = sk->sk_write_space;
+ transport->old_error_report = sk->sk_error_report;
+}
+
+static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk)
+{
+ sk->sk_data_ready = transport->old_data_ready;
+ sk->sk_state_change = transport->old_state_change;
+ sk->sk_write_space = transport->old_write_space;
+ sk->sk_error_report = transport->old_error_report;
+}
+
/**
* xs_close - close a socket
* @xprt: transport
@@ -765,9 +780,8 @@ static void xs_close(struct rpc_xprt *xprt)
transport->sock = NULL;
sk->sk_user_data = NULL;
- sk->sk_data_ready = transport->old_data_ready;
- sk->sk_state_change = transport->old_state_change;
- sk->sk_write_space = transport->old_write_space;
+
+ xs_restore_old_callbacks(transport, sk);
write_unlock_bh(&sk->sk_callback_lock);
sk->sk_no_check = 0;
@@ -1180,6 +1194,28 @@ static void xs_tcp_state_change(struct sock *sk)
}
/**
+ * xs_tcp_error_report - callback mainly for catching RST events
+ * @sk: socket
+ */
+static void xs_tcp_error_report(struct sock *sk)
+{
+ struct rpc_xprt *xprt;
+
+ read_lock(&sk->sk_callback_lock);
+ if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED)
+ goto out;
+ if (!(xprt = xprt_from_sock(sk)))
+ goto out;
+ dprintk("RPC: %s client %p...\n"
+ "RPC: error %d\n",
+ __func__, xprt, sk->sk_err);
+
+ xprt_force_disconnect(xprt);
+out:
+ read_unlock(&sk->sk_callback_lock);
+}
+
+/**
* xs_udp_write_space - callback invoked when socket buffer space
* becomes available
* @sk: socket whose state has changed
@@ -1376,8 +1412,8 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
if (port > last)
nloop++;
} while (err == -EADDRINUSE && nloop != 2);
- dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n",
- __func__, NIPQUAD(myaddr.sin_addr),
+ dprintk("RPC: %s %pI4:%u: %s (%d)\n",
+ __func__, &myaddr.sin_addr,
port, err ? "failed" : "ok", err);
return err;
}
@@ -1409,8 +1445,8 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
if (port > last)
nloop++;
} while (err == -EADDRINUSE && nloop != 2);
- dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n",
- NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err);
+ dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n",
+ &myaddr.sin6_addr, port, err ? "failed" : "ok", err);
return err;
}
@@ -1454,10 +1490,9 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
write_lock_bh(&sk->sk_callback_lock);
+ xs_save_old_callbacks(transport, sk);
+
sk->sk_user_data = xprt;
- transport->old_data_ready = sk->sk_data_ready;
- transport->old_state_change = sk->sk_state_change;
- transport->old_write_space = sk->sk_write_space;
sk->sk_data_ready = xs_udp_data_ready;
sk->sk_write_space = xs_udp_write_space;
sk->sk_no_check = UDP_CSUM_NORCV;
@@ -1589,13 +1624,13 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
write_lock_bh(&sk->sk_callback_lock);
+ xs_save_old_callbacks(transport, sk);
+
sk->sk_user_data = xprt;
- transport->old_data_ready = sk->sk_data_ready;
- transport->old_state_change = sk->sk_state_change;
- transport->old_write_space = sk->sk_write_space;
sk->sk_data_ready = xs_tcp_data_ready;
sk->sk_state_change = xs_tcp_state_change;
sk->sk_write_space = xs_tcp_write_space;
+ sk->sk_error_report = xs_tcp_error_report;
sk->sk_allocation = GFP_ATOMIC;
/* socket options */