From 86d80f973434de24d8a807a92cd59d5ced7bd519 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 31 Jul 2015 16:24:30 -0400 Subject: NFSv4.1/pnfs: Fix atomicity of commit list updates pnfs_layout_mark_request_commit() needs to ensure that it adds the request to the commit list atomically with all the other updates in order to prevent corruption to buckets[ds_commit_idx].wlseg due to races with pnfs_generic_clear_request_commit(). Fixes: 338d00cfef07d ("pnfs: Refactor the *_layout_mark_request_commit...") Cc: stable@vger.kernel.org # v4.0+ Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/nfs/pnfs_nfs.c') diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index f37e25b6311c..7a282876662f 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -863,9 +863,10 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, } set_bit(PG_COMMIT_TO_DS, &req->wb_flags); cinfo->ds->nwritten++; - spin_unlock(cinfo->lock); - nfs_request_add_commit_list(req, list, cinfo); + nfs_request_add_commit_list_locked(req, list, cinfo); + spin_unlock(cinfo->lock); + nfs_mark_page_unstable(req->wb_page, cinfo); } EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit); -- cgit v1.2.3 From 27571297a7e9a2a845c232813a7ba7e1227f5ec6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Aug 2015 17:38:33 -0400 Subject: pNFS: Tighten up locking around DS commit buckets I'm not aware of any bugreports around this issue, but the locking around the pnfs_commit_bucket is inconsistent at best. This patch tightens it up by ensuring that the 'bucket->committing' list is always changed atomically w.r.t. the 'bucket->clseg' layout segment tracking. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 50 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'fs/nfs/pnfs_nfs.c') diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 7a282876662f..cd3c0403101b 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -124,11 +124,12 @@ pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, if (ret) { cinfo->ds->nwritten -= ret; cinfo->ds->ncommitting += ret; - bucket->clseg = bucket->wlseg; - if (list_empty(src)) + if (bucket->clseg == NULL) + bucket->clseg = pnfs_get_lseg(bucket->wlseg); + if (list_empty(src)) { + pnfs_put_lseg_locked(bucket->wlseg); bucket->wlseg = NULL; - else - pnfs_get_lseg(bucket->clseg); + } } return ret; } @@ -182,19 +183,23 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; struct pnfs_commit_bucket *bucket; struct pnfs_layout_segment *freeme; + LIST_HEAD(pages); int i; + spin_lock(cinfo->lock); for (i = idx; i < fl_cinfo->nbuckets; i++) { bucket = &fl_cinfo->buckets[i]; if (list_empty(&bucket->committing)) continue; - nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo, i); - spin_lock(cinfo->lock); freeme = bucket->clseg; bucket->clseg = NULL; + list_splice_init(&bucket->committing, &pages); spin_unlock(cinfo->lock); + nfs_retry_commit(&pages, freeme, cinfo, i); pnfs_put_lseg(freeme); + spin_lock(cinfo->lock); } + spin_unlock(cinfo->lock); } static unsigned int @@ -216,10 +221,6 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo, if (!data) break; data->ds_commit_index = i; - spin_lock(cinfo->lock); - data->lseg = bucket->clseg; - bucket->clseg = NULL; - spin_unlock(cinfo->lock); list_add(&data->pages, list); nreq++; } @@ -229,6 +230,22 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo, return nreq; } +static inline +void pnfs_fetch_commit_bucket_list(struct list_head *pages, + struct nfs_commit_data *data, + struct nfs_commit_info *cinfo) +{ + struct pnfs_commit_bucket *bucket; + + bucket = &cinfo->ds->buckets[data->ds_commit_index]; + spin_lock(cinfo->lock); + list_splice_init(pages, &bucket->committing); + data->lseg = bucket->clseg; + bucket->clseg = NULL; + spin_unlock(cinfo->lock); + +} + /* This follows nfs_commit_list pretty closely */ int pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, @@ -243,7 +260,7 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, if (!list_empty(mds_pages)) { data = nfs_commitdata_alloc(); if (data != NULL) { - data->lseg = NULL; + data->ds_commit_index = -1; list_add(&data->pages, &list); nreq++; } else { @@ -265,19 +282,16 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, list_for_each_entry_safe(data, tmp, &list, pages) { list_del_init(&data->pages); - if (!data->lseg) { + if (data->ds_commit_index < 0) { nfs_init_commit(data, mds_pages, NULL, cinfo); nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(data->inode), data->mds_ops, how, 0); } else { - struct pnfs_commit_bucket *buckets; + LIST_HEAD(pages); - buckets = cinfo->ds->buckets; - nfs_init_commit(data, - &buckets[data->ds_commit_index].committing, - data->lseg, - cinfo); + pnfs_fetch_commit_bucket_list(&pages, data, cinfo); + nfs_init_commit(data, &pages, data->lseg, cinfo); initiate_commit(data, how); } } -- cgit v1.2.3 From 6f536936b79bd4b5cea8fb0e5b8b0bce8cd1ea4a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 13 Aug 2015 10:59:07 -0400 Subject: NFSv4.1/pNFS: Fix borken function _same_data_server_addrs_locked() - Switch back to using list_for_each_entry(). Fixes an incorrect test for list NULL termination. - Do not assume that lists are sorted. - Finally, consider an existing entry to match if it consists of a subset of the addresses in the new entry. Cc: stable@vger.kernel.org # 4.0+ Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'fs/nfs/pnfs_nfs.c') diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 7a282876662f..e5c679f04099 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -359,26 +359,31 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) return false; } +/* + * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does, + * declare a match. + */ static bool _same_data_server_addrs_locked(const struct list_head *dsaddrs1, const struct list_head *dsaddrs2) { struct nfs4_pnfs_ds_addr *da1, *da2; - - /* step through both lists, comparing as we go */ - for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), - da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); - da1 != NULL && da2 != NULL; - da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), - da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { - if (!same_sockaddr((struct sockaddr *)&da1->da_addr, - (struct sockaddr *)&da2->da_addr)) - return false; + struct sockaddr *sa1, *sa2; + bool match = false; + + list_for_each_entry(da1, dsaddrs1, da_node) { + sa1 = (struct sockaddr *)&da1->da_addr; + match = false; + list_for_each_entry(da2, dsaddrs2, da_node) { + sa2 = (struct sockaddr *)&da2->da_addr; + match = same_sockaddr(sa1, sa2); + if (match) + break; + } + if (!match) + break; } - if (da1 == NULL && da2 == NULL) - return true; - - return false; + return match; } /* -- cgit v1.2.3 From 046be74da8f257c4f1925ed4b5d4ee4c822ef9c6 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 21 Aug 2015 10:32:50 +0800 Subject: NFS41: fix list splice type We want to move commiting pages to pages list instead. Otherwise it causes pnfs small writes crash like: [34560.037692] BUG: unable to handle kernel NULL pointer dereference at 0000000000000068 [34560.038557] IP: [] nfs_init_commit+0x26/0x130 [nfs] [34560.039400] PGD 69f5a067 PUD 69f59067 PMD 0 [34560.040207] Oops: 0000 [#1] SMP [34560.041014] Modules linked in: nfsv3(OE) nfs_layout_flexfiles(OE) nfsv4(OE) nfs(OE) fscache(E) rpcsec_gss_krb5(E) xt_addrtype(E) xt_conntrack(E) ipt_MASQUERADE(E) nf_nat_masquerade_ipv4(E) iptable_nat(E) nf_conntrack_ipv4(E) nf_defrag_ipv4(E) nf_nat_ipv4(E) iptable_filter(E) ip_tables(E) x_tables(E) nf_nat(E) nf_conntrack(E) bridge(E) stp(E) llc(E) dm_thin_pool(E) dm_persistent_data(E) dm_bio_prison(E) dm_bufio(E) ppdev(E) vmw_balloon(E) coretemp(E) crc32_pclmul(E) ghash_clmulni_intel(E) aesni_intel(E) aes_x86_64(E) glue_helper(E) lrw(E) gf128mul(E) ablk_helper(E) cryptd(E) psmouse(E) serio_raw(E) vmw_vmci(E) i2c_piix4(E) shpchp(E) parport_pc(E) parport(E) mac_hid(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sunrpc(E) xfs(E) libcrc32c(E) hid_generic(E) usbhid(E) hid(E) e1000(E) mptspi(E) [34560.045106] mptscsih(E) mptbase(E) vmwgfx(E) drm_kms_helper(E) ttm(E) drm(E) autofs4(E) [last unloaded: fscache] [34560.045897] CPU: 0 PID: 130543 Comm: bash Tainted: G OE 4.2.0-rc5-dp-00057-gf993a93 #11 [34560.046699] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 05/20/2014 [34560.047525] task: ffff880031b0a980 ti: ffff880045fec000 task.ti: ffff880045fec000 [34560.048264] RIP: 0010:[] [] nfs_init_commit+0x26/0x130 [nfs] [34560.049000] RSP: 0018:ffff880045fefc18 EFLAGS: 00010246 [34560.049717] RAX: 0000000000000000 RBX: ffff8800208fbc80 RCX: ffff880045fefd50 [34560.050396] RDX: ffff880031c19ec0 RSI: ffff880045fefc88 RDI: ffff8800208fbc80 [34560.051041] RBP: ffff880045fefc28 R08: ffff8800208fbe68 R09: ffff880045fefc88 [34560.051666] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880045fefc78 [34560.052247] R13: ffff880045fefc88 R14: ffff880045fefa90 R15: ffff880045fefd50 [34560.052825] FS: 00007fa02d58c740(0000) GS:ffff88006d600000(0000) knlGS:0000000000000000 [34560.053410] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [34560.053992] CR2: 0000000000000068 CR3: 000000003b37a000 CR4: 00000000001406f0 [34560.054615] Stack: [34560.055200] ffff8800208fbc80 ffff8800208fbc80 ffff880045fefcc8 ffffffffa05c1a5b [34560.055800] ffff880045fefcc8 ffff880045fefd50 0000000045fefcb8 ffff880045fefd40 [34560.056418] ffff8800420608e0 ffffffffa04f3910 0000000100000001 ffff880045fefd50 [34560.057013] Call Trace: [34560.057672] [] pnfs_generic_commit_pagelist+0x1cb/0x300 [nfsv4] [34560.058277] [] ? ff_layout_commit_pagelist+0x20/0x20 [nfs_layout_flexfiles] [34560.058907] [] ff_layout_commit_pagelist+0x15/0x20 [nfs_layout_flexfiles] [34560.059557] [] nfs_generic_commit_list+0xb1/0xf0 [nfs] [34560.060214] [] ? nfs_scan_commit+0x37/0xa0 [nfs] [34560.060825] [] nfs_commit_inode+0x81/0x150 [nfs] [34560.061432] [] nfs_wb_all+0x1ae/0x400 [nfs] [34560.062035] [] nfs_getattr+0x33d/0x510 [nfs] [34560.062630] [] vfs_getattr_nosec+0x2c/0x40 [34560.063223] [] vfs_getattr+0x26/0x30 [34560.063818] [] vfs_fstatat+0x65/0xa0 [34560.064413] [] SYSC_newstat+0x1f/0x40 [34560.065016] [] ? do_audit_syscall_entry+0x66/0x70 [34560.065626] [] ? syscall_trace_enter_phase1+0x113/0x170 [34560.066245] [] ? trace_hardirqs_on_thunk+0x17/0x19 [34560.066868] [] SyS_newstat+0xe/0x10 [34560.067533] [] entry_SYSCALL_64_fastpath+0x16/0x7a [34560.068173] Code: 0f 1f 44 00 00 0f 1f 44 00 00 55 4c 8d 87 e8 01 00 00 48 89 e5 53 48 89 fb 48 83 ec 08 4c 8b 0e 49 8b 41 18 4c 39 ce 48 8b 40 40 <4c> 8b 50 68 74 24 48 8b 87 e8 01 00 00 48 8b 7e 08 4d 89 41 08 [34560.069609] RIP [] nfs_init_commit+0x26/0x130 [nfs] [34560.070295] RSP [34560.071008] CR2: 0000000000000068 [34560.073207] ---[ end trace f85f873260977406 ]--- [fixes 27571297a7e(pNFS: Tighten up locking around DS commit buckets)] Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/pnfs_nfs.c') diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index bbd407b6bc1f..24655b807d44 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -239,7 +239,7 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages, bucket = &cinfo->ds->buckets[data->ds_commit_index]; spin_lock(cinfo->lock); - list_splice_init(pages, &bucket->committing); + list_splice_init(&bucket->committing, pages); data->lseg = bucket->clseg; bucket->clseg = NULL; spin_unlock(cinfo->lock); -- cgit v1.2.3