From 65bac575e35915801ea518b9d8d8824367d125c8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 2 Jun 2009 14:24:01 +0200 Subject: ocfs2: Fix possible deadlock with quotas in ocfs2_setattr() We called vfs_dq_transfer() with global quota file lock held. This can lead to deadlocks as if vfs_dq_transfer() has to allocate new quota structure, it calls ocfs2_dquot_acquire() which tries to get quota file lock again and this can block if another node requested the lock in the mean time. Since we have to call vfs_dq_transfer() with transaction already started and quota file lock ranks above the transaction start, we cannot just rely on ocfs2_dquot_acquire() or ocfs2_dquot_release() on getting the lock if they need it. We fix the problem by acquiring pointers to all quota structures needed by vfs_dq_transfer() already before calling the function. By this we are sure that all quota structures are properly allocated and they can be freed only after we drop references to them. Thus we don't need quota file lock anywhere inside vfs_dq_transfer(). Signed-off-by: Jan Kara Signed-off-by: Joel Becker --- fs/ocfs2/file.c | 53 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 23 deletions(-) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index c2a87c885b73..1a96cac31791 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -894,9 +894,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) struct ocfs2_super *osb = OCFS2_SB(sb); struct buffer_head *bh = NULL; handle_t *handle = NULL; - int locked[MAXQUOTAS] = {0, 0}; - int credits, qtype; - struct ocfs2_mem_dqinfo *oinfo; + int qtype; + struct dquot *transfer_from[MAXQUOTAS] = { }; + struct dquot *transfer_to[MAXQUOTAS] = { }; mlog_entry("(0x%p, '%.*s')\n", dentry, dentry->d_name.len, dentry->d_name.name); @@ -969,30 +969,37 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { - credits = OCFS2_INODE_UPDATE_CREDITS; + /* + * Gather pointers to quota structures so that allocation / + * freeing of quota structures happens here and not inside + * vfs_dq_transfer() where we have problems with lock ordering + */ if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid && OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { - oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv; - status = ocfs2_lock_global_qf(oinfo, 1); - if (status < 0) + transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, + USRQUOTA); + transfer_from[USRQUOTA] = dqget(sb, inode->i_uid, + USRQUOTA); + if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) { + status = -ESRCH; goto bail_unlock; - credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) + - ocfs2_calc_qdel_credits(sb, USRQUOTA); - locked[USRQUOTA] = 1; + } } if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid && OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { - oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv; - status = ocfs2_lock_global_qf(oinfo, 1); - if (status < 0) + transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, + GRPQUOTA); + transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid, + GRPQUOTA); + if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) { + status = -ESRCH; goto bail_unlock; - credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) + - ocfs2_calc_qdel_credits(sb, GRPQUOTA); - locked[GRPQUOTA] = 1; + } } - handle = ocfs2_start_trans(osb, credits); + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS + + 2 * ocfs2_quota_trans_credits(sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); @@ -1030,12 +1037,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) bail_commit: ocfs2_commit_trans(osb, handle); bail_unlock: - for (qtype = 0; qtype < MAXQUOTAS; qtype++) { - if (!locked[qtype]) - continue; - oinfo = sb_dqinfo(sb, qtype)->dqi_priv; - ocfs2_unlock_global_qf(oinfo, 1); - } ocfs2_inode_unlock(inode, 1); bail_unlock_rw: if (size_change) @@ -1043,6 +1044,12 @@ bail_unlock_rw: bail: brelse(bh); + /* Release quota pointers in case we acquired them */ + for (qtype = 0; qtype < MAXQUOTAS; qtype++) { + dqput(transfer_to[qtype]); + dqput(transfer_from[qtype]); + } + if (!status && attr->ia_valid & ATTR_MODE) { status = ocfs2_acl_chmod(inode); if (status < 0) -- cgit v1.2.3 From e04cc15f52eb072937cec2bbd8499f37afe5e1ef Mon Sep 17 00:00:00 2001 From: Hisashi Hifumi Date: Tue, 9 Jun 2009 16:47:45 +0900 Subject: ocfs2: fdatasync should skip unimportant metadata writeout In ocfs2, fdatasync and fsync are identical. I think fdatasync should skip committing transaction when inode->i_state is set just I_DIRTY_SYNC and this indicates only atime or/and mtime updates. Following patch improves fdatasync throughput. #sysbench --num-threads=16 --max-requests=300000 --test=fileio --file-block-size=4K --file-total-size=16G --file-test-mode=rndwr --file-fsync-mode=fdatasync run Results: -2.6.30-rc8 Test execution summary: total time: 107.1445s total number of events: 119559 total time taken by event execution: 116.1050 per-request statistics: min: 0.0000s avg: 0.0010s max: 0.1220s approx. 95 percentile: 0.0016s Threads fairness: events (avg/stddev): 7472.4375/303.60 execution time (avg/stddev): 7.2566/0.64 -2.6.30-rc8-patched Test execution summary: total time: 86.8529s total number of events: 300016 total time taken by event execution: 24.3077 per-request statistics: min: 0.0000s avg: 0.0001s max: 0.0336s approx. 95 percentile: 0.0001s Threads fairness: events (avg/stddev): 18751.0000/718.75 execution time (avg/stddev): 1.5192/0.05 Signed-off-by: Hisashi Hifumi Acked-by: Mark Fasheh Signed-off-by: Joel Becker --- fs/ocfs2/file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ocfs2/file.c') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 1a96cac31791..07267e0da909 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -187,6 +187,9 @@ static int ocfs2_sync_file(struct file *file, if (err) goto bail; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + goto bail; + journal = osb->journal->j_journal; err = jbd2_journal_force_commit(journal); -- cgit v1.2.3