summaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/dir.c31
-rw-r--r--fs/ceph/file.c39
-rw-r--r--fs/ceph/inode.c38
-rw-r--r--fs/ceph/mds_client.c2
-rw-r--r--fs/ceph/super.h2
5 files changed, 71 insertions, 41 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 158c700fdca5..fa7ca04ee816 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -40,12 +40,13 @@ int ceph_init_dentry(struct dentry *dentry)
if (dentry->d_fsdata)
return 0;
- if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
- dentry->d_op = &ceph_dentry_ops;
+ if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
+ ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+ d_set_d_op(dentry, &ceph_dentry_ops);
else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
- dentry->d_op = &ceph_snapdir_dentry_ops;
+ d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
else
- dentry->d_op = &ceph_snap_dentry_ops;
+ d_set_d_op(dentry, &ceph_snap_dentry_ops);
di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
if (!di)
@@ -111,7 +112,7 @@ static int __dcache_readdir(struct file *filp,
dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos,
last);
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
/* start at beginning? */
if (filp->f_pos == 2 || last == NULL ||
@@ -135,6 +136,7 @@ more:
fi->at_end = 1;
goto out_unlock;
}
+ spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
if (!d_unhashed(dentry) && dentry->d_inode &&
ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
@@ -144,13 +146,15 @@ more:
dentry->d_name.len, dentry->d_name.name, di->offset,
filp->f_pos, d_unhashed(dentry) ? " unhashed" : "",
!dentry->d_inode ? " null" : "");
+ spin_unlock(&dentry->d_lock);
p = p->prev;
dentry = list_entry(p, struct dentry, d_u.d_child);
di = ceph_dentry(dentry);
}
- atomic_inc(&dentry->d_count);
- spin_unlock(&dcache_lock);
+ dget_dlock(dentry);
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&parent->d_lock);
dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
@@ -176,19 +180,19 @@ more:
filp->f_pos++;
- /* make sure a dentry wasn't dropped while we didn't have dcache_lock */
+ /* make sure a dentry wasn't dropped while we didn't have parent lock */
if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
err = -EAGAIN;
goto out;
}
- spin_lock(&dcache_lock);
+ spin_lock(&parent->d_lock);
p = p->prev; /* advance to next dentry */
goto more;
out_unlock:
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
out:
if (last)
dput(last);
@@ -986,7 +990,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
*/
static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- struct inode *dir = dentry->d_parent->d_inode;
+ struct inode *dir;
+
+ if (nd->flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ dir = dentry->d_parent->d_inode;
dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8d79b8912e31..7d0e4a82d898 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -282,7 +282,8 @@ int ceph_release(struct inode *inode, struct file *file)
static int striped_read(struct inode *inode,
u64 off, u64 len,
struct page **pages, int num_pages,
- int *checkeof, bool align_to_pages)
+ int *checkeof, bool align_to_pages,
+ unsigned long buf_align)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -307,7 +308,7 @@ static int striped_read(struct inode *inode,
more:
if (align_to_pages)
- page_align = (pos - io_align) & ~PAGE_MASK;
+ page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
else
page_align = pos & ~PAGE_MASK;
this_len = left;
@@ -376,16 +377,18 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
struct inode *inode = file->f_dentry->d_inode;
struct page **pages;
u64 off = *poff;
- int num_pages = calc_pages_for(off, len);
- int ret;
+ int num_pages, ret;
dout("sync_read on file %p %llu~%u %s\n", file, off, len,
(file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
- if (file->f_flags & O_DIRECT)
- pages = ceph_get_direct_page_vector(data, num_pages);
- else
+ if (file->f_flags & O_DIRECT) {
+ num_pages = calc_pages_for((unsigned long)data, len);
+ pages = ceph_get_direct_page_vector(data, num_pages, true);
+ } else {
+ num_pages = calc_pages_for(off, len);
pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+ }
if (IS_ERR(pages))
return PTR_ERR(pages);
@@ -400,7 +403,8 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
goto done;
ret = striped_read(inode, off, len, pages, num_pages, checkeof,
- file->f_flags & O_DIRECT);
+ file->f_flags & O_DIRECT,
+ (unsigned long)data & ~PAGE_MASK);
if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -409,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
done:
if (file->f_flags & O_DIRECT)
- ceph_put_page_vector(pages, num_pages);
+ ceph_put_page_vector(pages, num_pages, true);
else
ceph_release_page_vector(pages, num_pages);
dout("sync_read result %d\n", ret);
@@ -456,6 +460,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
int do_sync = 0;
int check_caps = 0;
int page_align, io_align;
+ unsigned long buf_align;
int ret;
struct timespec mtime = CURRENT_TIME;
@@ -471,6 +476,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
pos = *offset;
io_align = pos & ~PAGE_MASK;
+ buf_align = (unsigned long)data & ~PAGE_MASK;
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
if (ret < 0)
@@ -496,12 +502,15 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
*/
more:
len = left;
- if (file->f_flags & O_DIRECT)
+ if (file->f_flags & O_DIRECT) {
/* write from beginning of first page, regardless of
io alignment */
- page_align = (pos - io_align) & ~PAGE_MASK;
- else
+ page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
+ num_pages = calc_pages_for((unsigned long)data, len);
+ } else {
page_align = pos & ~PAGE_MASK;
+ num_pages = calc_pages_for(pos, len);
+ }
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), pos, &len,
CEPH_OSD_OP_WRITE, flags,
@@ -512,10 +521,8 @@ more:
if (!req)
return -ENOMEM;
- num_pages = calc_pages_for(pos, len);
-
if (file->f_flags & O_DIRECT) {
- pages = ceph_get_direct_page_vector(data, num_pages);
+ pages = ceph_get_direct_page_vector(data, num_pages, false);
if (IS_ERR(pages)) {
ret = PTR_ERR(pages);
goto out;
@@ -565,7 +572,7 @@ more:
}
if (file->f_flags & O_DIRECT)
- ceph_put_page_vector(pages, num_pages);
+ ceph_put_page_vector(pages, num_pages, false);
else if (file->f_flags & O_SYNC)
ceph_release_page_vector(pages, num_pages);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index bf1286588f26..e61de4f7b99d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -368,6 +368,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
return &ci->vfs_inode;
}
+static void ceph_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ceph_inode_cachep, ci);
+}
+
void ceph_destroy_inode(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -407,7 +416,7 @@ void ceph_destroy_inode(struct inode *inode)
if (ci->i_xattrs.prealloc_blob)
ceph_buffer_put(ci->i_xattrs.prealloc_blob);
- kmem_cache_free(ceph_inode_cachep, ci);
+ call_rcu(&inode->i_rcu, ceph_i_callback);
}
@@ -841,13 +850,13 @@ static void ceph_set_dentry_offset(struct dentry *dn)
di->offset = ceph_inode(inode)->i_max_offset++;
spin_unlock(&inode->i_lock);
- spin_lock(&dcache_lock);
- spin_lock(&dn->d_lock);
+ spin_lock(&dir->d_lock);
+ spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
list_move(&dn->d_u.d_child, &dir->d_subdirs);
dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
dn->d_u.d_child.prev, dn->d_u.d_child.next);
spin_unlock(&dn->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&dir->d_lock);
}
/*
@@ -879,8 +888,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
} else if (realdn) {
dout("dn %p (%d) spliced with %p (%d) "
"inode %p ino %llx.%llx\n",
- dn, atomic_read(&dn->d_count),
- realdn, atomic_read(&realdn->d_count),
+ dn, dn->d_count,
+ realdn, realdn->d_count,
realdn->d_inode, ceph_vinop(realdn->d_inode));
dput(dn);
dn = realdn;
@@ -1231,11 +1240,11 @@ retry_lookup:
goto retry_lookup;
} else {
/* reorder parent's d_subdirs */
- spin_lock(&dcache_lock);
- spin_lock(&dn->d_lock);
+ spin_lock(&parent->d_lock);
+ spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
list_move(&dn->d_u.d_child, &parent->d_subdirs);
spin_unlock(&dn->d_lock);
- spin_unlock(&dcache_lock);
+ spin_unlock(&parent->d_lock);
}
di = dn->d_fsdata;
@@ -1772,12 +1781,17 @@ int ceph_do_getattr(struct inode *inode, int mask)
* Check inode permissions. We verify we have a valid value for
* the AUTH cap, then call the generic handler.
*/
-int ceph_permission(struct inode *inode, int mask)
+int ceph_permission(struct inode *inode, int mask, unsigned int flags)
{
- int err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
+ int err;
+
+ if (flags & IPERM_FLAG_RCU)
+ return -ECHILD;
+
+ err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
if (!err)
- err = generic_permission(inode, mask, NULL);
+ err = generic_permission(inode, mask, flags, NULL);
return err;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 38800eaa81d0..a50fca1e03be 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1486,7 +1486,7 @@ retry:
*base = ceph_ino(temp->d_inode);
*plen = len;
dout("build_path on %p %d built %llx '%.*s'\n",
- dentry, atomic_read(&dentry->d_count), *base, len, path);
+ dentry, dentry->d_count, *base, len, path);
return path;
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7f01728a4657..4553d8829edb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -665,7 +665,7 @@ extern void ceph_queue_invalidate(struct inode *inode);
extern void ceph_queue_writeback(struct inode *inode);
extern int ceph_do_getattr(struct inode *inode, int mask);
-extern int ceph_permission(struct inode *inode, int mask);
+extern int ceph_permission(struct inode *inode, int mask, unsigned int flags);
extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);