From 4246a0b63bd8f56a1469b12eafeb875b1041a451 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Jul 2015 15:29:37 +0200 Subject: block: add a bi_error field to struct bio Currently we have two different ways to signal an I/O error on a BIO: (1) by clearing the BIO_UPTODATE flag (2) by returning a Linux errno value to the bi_end_io callback The first one has the drawback of only communicating a single possible error (-EIO), and the second one has the drawback of not beeing persistent when bios are queued up, and are not passed along from child to parent bio in the ever more popular chaining scenario. Having both mechanisms available has the additional drawback of utterly confusing driver authors and introducing bugs where various I/O submitters only deal with one of them, and the others have to add boilerplate code to deal with both kinds of error returns. So add a new bi_error field to store an errno value directly in struct bio and remove the existing mechanisms to clean all this up. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: NeilBrown Signed-off-by: Jens Axboe --- fs/direct-io.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs/direct-io.c') diff --git a/fs/direct-io.c b/fs/direct-io.c index 745d2342651a..e1639c8c14d5 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -285,7 +285,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio); /* * Asynchronous IO callback. */ -static void dio_bio_end_aio(struct bio *bio, int error) +static void dio_bio_end_aio(struct bio *bio) { struct dio *dio = bio->bi_private; unsigned long remaining; @@ -318,7 +318,7 @@ static void dio_bio_end_aio(struct bio *bio, int error) * During I/O bi_private points at the dio. After I/O, bi_private is used to * implement a singly-linked list of completed BIOs, at dio->bio_list. */ -static void dio_bio_end_io(struct bio *bio, int error) +static void dio_bio_end_io(struct bio *bio) { struct dio *dio = bio->bi_private; unsigned long flags; @@ -345,9 +345,9 @@ void dio_end_io(struct bio *bio, int error) struct dio *dio = bio->bi_private; if (dio->is_async) - dio_bio_end_aio(bio, error); + dio_bio_end_aio(bio); else - dio_bio_end_io(bio, error); + dio_bio_end_io(bio); } EXPORT_SYMBOL_GPL(dio_end_io); @@ -457,11 +457,10 @@ static struct bio *dio_await_one(struct dio *dio) */ static int dio_bio_complete(struct dio *dio, struct bio *bio) { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec; unsigned i; - if (!uptodate) + if (bio->bi_error) dio->io_error = -EIO; if (dio->is_async && dio->rw == READ) { @@ -476,7 +475,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) } bio_put(bio); } - return uptodate ? 0 : -EIO; + return bio->bi_error; } /* -- cgit v1.2.3 From 9b81c842355ac96097ba32ad5632e9ef0ff59f92 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Mon, 10 Aug 2015 19:05:18 -0400 Subject: block: don't access bio->bi_error after bio_put() Commit 4246a0b6 ("block: add a bi_error field to struct bio") has added a few dereferences of 'bio' after a call to bio_put(). This causes use-after-frees such as: [521120.719695] BUG: KASan: use after free in dio_bio_complete+0x2b3/0x320 at addr ffff880f36b38714 [521120.720638] Read of size 4 by task mount.ocfs2/9644 [521120.721212] ============================================================================= [521120.722056] BUG kmalloc-256 (Not tainted): kasan: bad access detected [521120.722968] ----------------------------------------------------------------------------- [521120.722968] [521120.723915] Disabling lock debugging due to kernel taint [521120.724539] INFO: Slab 0xffffea003cdace00 objects=32 used=25 fp=0xffff880f36b38600 flags=0x46fffff80004080 [521120.726037] INFO: Object 0xffff880f36b38700 @offset=1792 fp=0xffff880f36b38800 [521120.726037] [521120.726974] Bytes b4 ffff880f36b386f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [521120.727898] Object ffff880f36b38700: 00 88 b3 36 0f 88 ff ff 00 00 d8 de 0b 88 ff ff ...6............ [521120.728822] Object ffff880f36b38710: 02 00 00 f0 00 00 00 00 00 00 00 00 00 00 00 00 ................ [521120.729705] Object ffff880f36b38720: 01 00 00 00 00 00 00 00 00 00 00 00 01 00 00 00 ................ [521120.730623] Object ffff880f36b38730: 00 00 00 00 00 00 00 00 01 00 00 00 00 02 00 00 ................ [521120.731621] Object ffff880f36b38740: 00 02 00 00 01 00 00 00 d0 f7 87 ad ff ff ff ff ................ [521120.732776] Object ffff880f36b38750: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [521120.733640] Object ffff880f36b38760: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [521120.734508] Object ffff880f36b38770: 01 00 03 00 01 00 00 00 88 87 b3 36 0f 88 ff ff ...........6.... [521120.735385] Object ffff880f36b38780: 00 73 22 ad 02 88 ff ff 40 13 e0 3c 00 ea ff ff .s".....@..<.... [521120.736667] Object ffff880f36b38790: 00 02 00 00 00 04 00 00 00 00 00 00 00 00 00 00 ................ [521120.737596] Object ffff880f36b387a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [521120.738524] Object ffff880f36b387b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [521120.739388] Object ffff880f36b387c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [521120.740277] Object ffff880f36b387d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [521120.741187] Object ffff880f36b387e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [521120.742233] Object ffff880f36b387f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ [521120.743229] CPU: 41 PID: 9644 Comm: mount.ocfs2 Tainted: G B 4.2.0-rc6-next-20150810-sasha-00039-gf909086 #2420 [521120.744274] ffff880f36b38000 ffff880d89c8f638 ffffffffb6e9ba8a ffff880101c0e5c0 [521120.745025] ffff880d89c8f668 ffffffffad76a313 ffff880101c0e5c0 ffffea003cdace00 [521120.745908] ffff880f36b38700 ffff880f36b38798 ffff880d89c8f690 ffffffffad772854 [521120.747063] Call Trace: [521120.747520] dump_stack (lib/dump_stack.c:52) [521120.748053] print_trailer (mm/slub.c:653) [521120.748582] object_err (mm/slub.c:660) [521120.749079] kasan_report_error (include/linux/kasan.h:20 mm/kasan/report.c:152 mm/kasan/report.c:194) [521120.750834] __asan_report_load4_noabort (mm/kasan/report.c:250) [521120.753580] dio_bio_complete (fs/direct-io.c:478) [521120.755752] do_blockdev_direct_IO (fs/direct-io.c:494 fs/direct-io.c:1291) [521120.759765] __blockdev_direct_IO (fs/direct-io.c:1322) [521120.761658] blkdev_direct_IO (fs/block_dev.c:162) [521120.762993] generic_file_read_iter (mm/filemap.c:1738) [521120.767405] blkdev_read_iter (fs/block_dev.c:1649) [521120.768556] __vfs_read (fs/read_write.c:423 fs/read_write.c:434) [521120.772126] vfs_read (fs/read_write.c:454) [521120.773118] SyS_pread64 (fs/read_write.c:607 fs/read_write.c:594) [521120.776062] entry_SYSCALL_64_fastpath (arch/x86/entry/entry_64.S:186) [521120.777375] Memory state around the buggy address: [521120.778118] ffff880f36b38600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [521120.779211] ffff880f36b38680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [521120.780315] >ffff880f36b38700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [521120.781465] ^ [521120.782083] ffff880f36b38780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [521120.783717] ffff880f36b38800: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [521120.784818] ================================================================== This patch fixes a few of those places that I caught while auditing the patch, but the original patch should be audited further for more occurences of this issue since I'm not too familiar with the code. Signed-off-by: Sasha Levin Signed-off-by: Jens Axboe --- fs/direct-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/direct-io.c') diff --git a/fs/direct-io.c b/fs/direct-io.c index e1639c8c14d5..818c647f36d3 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -459,12 +459,14 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) { struct bio_vec *bvec; unsigned i; + int err; if (bio->bi_error) dio->io_error = -EIO; if (dio->is_async && dio->rw == READ) { bio_check_pages_dirty(bio); /* transfers ownership */ + err = bio->bi_error; } else { bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; @@ -473,9 +475,10 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) set_page_dirty_lock(page); page_cache_release(page); } + err = bio->bi_error; bio_put(bio); } - return bio->bi_error; + return err; } /* -- cgit v1.2.3 From b54ffb73cadcdcff9cc1ae0e11f502407e3e2e4c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 19 May 2015 14:31:01 +0200 Subject: block: remove bio_get_nr_vecs() We can always fill up the bio now, no need to estimate the possible size based on queue parameters. Acked-by: Steven Whitehouse Signed-off-by: Kent Overstreet [hch: rebased and wrote a changelog] Signed-off-by: Christoph Hellwig Signed-off-by: Ming Lin Signed-off-by: Jens Axboe --- fs/direct-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/direct-io.c') diff --git a/fs/direct-io.c b/fs/direct-io.c index 818c647f36d3..11256291642e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -655,7 +655,7 @@ static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio, if (ret) goto out; sector = start_sector << (sdio->blkbits - 9); - nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev)); + nr_pages = min(sdio->pages_in_io, BIO_MAX_PAGES); BUG_ON(nr_pages <= 0); dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages); sdio->boundary = 0; -- cgit v1.2.3