From eb60ceac0ebd11e9896ed1ca84fcad71a4b23601 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Feb 2007 09:18:22 -0500 Subject: Btrfs: Add backing store, memory management Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 fs/btrfs/disk-io.c (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c new file mode 100644 index 000000000000..8d51a07051d9 --- /dev/null +++ b/fs/btrfs/disk-io.c @@ -0,0 +1,174 @@ +#define _XOPEN_SOURCE 500 +#include +#include +#include +#include +#include +#include +#include "kerncompat.h" +#include "radix-tree.h" +#include "ctree.h" +#include "disk-io.h" + +static int allocated_blocks = 0; + +struct ctree_header { + u64 root_block; +} __attribute__ ((__packed__)); + +static int get_free_block(struct ctree_root *root, u64 *block) +{ + struct stat st; + int ret; + + st.st_size = 0; + ret = fstat(root->fp, &st); + if (st.st_size > sizeof(struct ctree_header)) { + *block = (st.st_size - + sizeof(struct ctree_header)) / CTREE_BLOCKSIZE; + } else { + *block = 0; + } + ret = ftruncate(root->fp, sizeof(struct ctree_header) + (*block + 1) * + CTREE_BLOCKSIZE); + return ret; +} + +struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) +{ + struct tree_buffer *buf; + int ret; + buf = malloc(sizeof(struct tree_buffer)); + if (!buf) + return buf; + allocated_blocks++; + buf->blocknr = blocknr; + buf->count = 1; + radix_tree_preload(GFP_KERNEL); + ret = radix_tree_insert(&root->cache_radix, blocknr, buf); + radix_tree_preload_end(); + if (ret) { + free(buf); + return NULL; + } + return buf; +} + +struct tree_buffer *alloc_free_block(struct ctree_root *root) +{ + u64 free_block; + int ret; + struct tree_buffer * buf; + ret = get_free_block(root, &free_block); + if (ret) { + BUG(); + return NULL; + } + buf = alloc_tree_block(root, free_block); + if (!buf) + BUG(); + return buf; +} + +struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) +{ + loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header); + struct tree_buffer *buf; + int ret; + + buf = radix_tree_lookup(&root->cache_radix, blocknr); + if (buf) { + buf->count++; + if (buf->blocknr != blocknr) + BUG(); + if (buf->blocknr != buf->node.header.blocknr) + BUG(); + return buf; + } + buf = alloc_tree_block(root, blocknr); + if (!buf) + return NULL; + ret = pread(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); + if (ret != CTREE_BLOCKSIZE) { + free(buf); + return NULL; + } + if (buf->blocknr != buf->node.header.blocknr) + BUG(); + return buf; +} + +int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) +{ + u64 blocknr = buf->blocknr; + loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header); + int ret; + + if (buf->blocknr != buf->node.header.blocknr) + BUG(); + ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); + if (ret != CTREE_BLOCKSIZE) + return ret; + if (buf == root->node) + return update_root_block(root); + return 0; +} + +struct ctree_root *open_ctree(char *filename) +{ + struct ctree_root *root = malloc(sizeof(struct ctree_root)); + int fp; + u64 root_block; + int ret; + + fp = open(filename, O_CREAT | O_RDWR); + if (fp < 0) { + free(root); + return NULL; + } + root->fp = fp; + INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); + ret = pread(fp, &root_block, sizeof(u64), 0); + if (ret == sizeof(u64)) { + printf("reading root node at block %lu\n", root_block); + root->node = read_tree_block(root, root_block); + } else + root->node = NULL; + return root; +} + +int close_ctree(struct ctree_root *root) +{ + close(root->fp); + if (root->node) + tree_block_release(root, root->node); + free(root); + printf("on close %d blocks are allocated\n", allocated_blocks); + return 0; +} + +int update_root_block(struct ctree_root *root) +{ + int ret; + u64 root_block = root->node->blocknr; + + ret = pwrite(root->fp, &root_block, sizeof(u64), 0); + if (ret != sizeof(u64)) + return ret; + return 0; +} + +void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) +{ + buf->count--; + if (buf->count == 0) { + if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) + BUG(); + radix_tree_delete(&root->cache_radix, buf->blocknr); + memset(buf, 0, sizeof(*buf)); + free(buf); + BUG_ON(allocated_blocks == 0); + allocated_blocks--; + } +} + -- cgit v1.2.3 From d97e63b69ef21c02b67e20e41d9968b0e503572e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Feb 2007 16:40:44 -0500 Subject: Btrfs: early extent mapping support Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 76 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 23 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8d51a07051d9..653f18aab330 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -12,25 +12,27 @@ static int allocated_blocks = 0; -struct ctree_header { - u64 root_block; -} __attribute__ ((__packed__)); - static int get_free_block(struct ctree_root *root, u64 *block) { struct stat st; int ret; + if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks) + return -1; + + *block = root->alloc_extent->blocknr + root->alloc_extent->num_used; + root->alloc_extent->num_used += 1; + if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks) { + struct alloc_extent *ae = root->alloc_extent; + root->alloc_extent = root->reserve_extent; + root->reserve_extent = ae; + ae->num_blocks = 0; + } st.st_size = 0; ret = fstat(root->fp, &st); - if (st.st_size > sizeof(struct ctree_header)) { - *block = (st.st_size - - sizeof(struct ctree_header)) / CTREE_BLOCKSIZE; - } else { - *block = 0; - } - ret = ftruncate(root->fp, sizeof(struct ctree_header) + (*block + 1) * - CTREE_BLOCKSIZE); + if (st.st_size < (*block + 1) * CTREE_BLOCKSIZE) + ret = ftruncate(root->fp, + (*block + 1) * CTREE_BLOCKSIZE); return ret; } @@ -72,7 +74,7 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root) struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) { - loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header); + loff_t offset = blocknr * CTREE_BLOCKSIZE; struct tree_buffer *buf; int ret; @@ -101,7 +103,7 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) { u64 blocknr = buf->blocknr; - loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header); + loff_t offset = blocknr * CTREE_BLOCKSIZE; int ret; if (buf->blocknr != buf->node.header.blocknr) @@ -114,11 +116,32 @@ int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) return 0; } +struct ctree_super_block { + struct ctree_root_info root_info; + struct ctree_root_info extent_info; +} __attribute__ ((__packed__)); + +static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, + struct ctree_root_info *info, int fp) +{ + root->fp = fp; + root->node = read_tree_block(root, info->tree_root); + root->extent_root = extent_root; + memcpy(&root->ai1, &info->alloc_extent, sizeof(info->alloc_extent)); + memcpy(&root->ai2, &info->reserve_extent, sizeof(info->reserve_extent)); + root->alloc_extent = &root->ai1; + root->reserve_extent = &root->ai2; + INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); + printf("setup done reading root %p, used %lu\n", root, root->alloc_extent->num_used); + return 0; +} + struct ctree_root *open_ctree(char *filename) { struct ctree_root *root = malloc(sizeof(struct ctree_root)); + struct ctree_root *extent_root = malloc(sizeof(struct ctree_root)); + struct ctree_super_block super; int fp; - u64 root_block; int ret; fp = open(filename, O_CREAT | O_RDWR); @@ -126,14 +149,20 @@ struct ctree_root *open_ctree(char *filename) free(root); return NULL; } - root->fp = fp; - INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); - ret = pread(fp, &root_block, sizeof(u64), 0); - if (ret == sizeof(u64)) { - printf("reading root node at block %lu\n", root_block); - root->node = read_tree_block(root, root_block); - } else - root->node = NULL; + ret = pread(fp, &super, sizeof(struct ctree_super_block), + CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + if (ret == 0) { + ret = mkfs(fp); + if (ret) + return NULL; + ret = pread(fp, &super, sizeof(struct ctree_super_block), + CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + if (ret != sizeof(struct ctree_super_block)) + return NULL; + } + BUG_ON(ret < 0); + __setup_root(root, extent_root, &super.root_info, fp); + __setup_root(extent_root, extent_root, &super.extent_info, fp); return root; } @@ -160,6 +189,7 @@ int update_root_block(struct ctree_root *root) void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) { + return; buf->count--; if (buf->count == 0) { if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) -- cgit v1.2.3 From cfaa72952fa7b44aa5d967cbc266110900552aef Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 21 Feb 2007 17:04:57 -0500 Subject: Btrfs: extent fixes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 79 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 34 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 653f18aab330..2f71ccf1c3cb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -15,7 +15,7 @@ static int allocated_blocks = 0; static int get_free_block(struct ctree_root *root, u64 *block) { struct stat st; - int ret; + int ret = 0; if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks) return -1; @@ -30,9 +30,14 @@ static int get_free_block(struct ctree_root *root, u64 *block) } st.st_size = 0; ret = fstat(root->fp, &st); - if (st.st_size < (*block + 1) * CTREE_BLOCKSIZE) + if (st.st_size < (*block + 1) * CTREE_BLOCKSIZE) { ret = ftruncate(root->fp, (*block + 1) * CTREE_BLOCKSIZE); + if (ret) { + perror("ftruncate"); + exit(1); + } + } return ret; } @@ -81,11 +86,7 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) buf = radix_tree_lookup(&root->cache_radix, blocknr); if (buf) { buf->count++; - if (buf->blocknr != blocknr) - BUG(); - if (buf->blocknr != buf->node.header.blocknr) - BUG(); - return buf; + goto test; } buf = alloc_tree_block(root, blocknr); if (!buf) @@ -95,8 +96,11 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) free(buf); return NULL; } +test: if (buf->blocknr != buf->node.header.blocknr) BUG(); + if (root->node && buf->node.header.parentid != root->node->node.header.parentid) + BUG(); return buf; } @@ -111,36 +115,30 @@ int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); if (ret != CTREE_BLOCKSIZE) return ret; - if (buf == root->node) - return update_root_block(root); return 0; } -struct ctree_super_block { - struct ctree_root_info root_info; - struct ctree_root_info extent_info; -} __attribute__ ((__packed__)); - static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, struct ctree_root_info *info, int fp) { + INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); root->fp = fp; + root->node = NULL; root->node = read_tree_block(root, info->tree_root); root->extent_root = extent_root; memcpy(&root->ai1, &info->alloc_extent, sizeof(info->alloc_extent)); memcpy(&root->ai2, &info->reserve_extent, sizeof(info->reserve_extent)); root->alloc_extent = &root->ai1; root->reserve_extent = &root->ai2; - INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); - printf("setup done reading root %p, used %lu\n", root, root->alloc_extent->num_used); + printf("setup done reading root %p, used %lu available %lu\n", root, root->alloc_extent->num_used, root->alloc_extent->num_blocks); + printf("setup done reading root %p, reserve used %lu available %lu\n", root, root->reserve_extent->num_used, root->reserve_extent->num_blocks); return 0; } -struct ctree_root *open_ctree(char *filename) +struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) { struct ctree_root *root = malloc(sizeof(struct ctree_root)); struct ctree_root *extent_root = malloc(sizeof(struct ctree_root)); - struct ctree_super_block super; int fp; int ret; @@ -149,48 +147,61 @@ struct ctree_root *open_ctree(char *filename) free(root); return NULL; } - ret = pread(fp, &super, sizeof(struct ctree_super_block), + ret = pread(fp, super, sizeof(struct ctree_super_block), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); if (ret == 0) { ret = mkfs(fp); if (ret) return NULL; - ret = pread(fp, &super, sizeof(struct ctree_super_block), + ret = pread(fp, super, sizeof(struct ctree_super_block), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); if (ret != sizeof(struct ctree_super_block)) return NULL; } BUG_ON(ret < 0); - __setup_root(root, extent_root, &super.root_info, fp); - __setup_root(extent_root, extent_root, &super.extent_info, fp); + __setup_root(root, extent_root, &super->root_info, fp); + __setup_root(extent_root, extent_root, &super->extent_info, fp); return root; } -int close_ctree(struct ctree_root *root) +static int __update_root(struct ctree_root *root, struct ctree_root_info *info) { - close(root->fp); - if (root->node) - tree_block_release(root, root->node); - free(root); - printf("on close %d blocks are allocated\n", allocated_blocks); + info->tree_root = root->node->blocknr; + memcpy(&info->alloc_extent, root->alloc_extent, sizeof(struct alloc_extent)); + memcpy(&info->reserve_extent, root->reserve_extent, sizeof(struct alloc_extent)); return 0; } -int update_root_block(struct ctree_root *root) +int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s) { int ret; - u64 root_block = root->node->blocknr; - - ret = pwrite(root->fp, &root_block, sizeof(u64), 0); - if (ret != sizeof(u64)) + __update_root(root, &s->root_info); + __update_root(root->extent_root, &s->extent_info); + ret = pwrite(root->fp, s, sizeof(*s), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + if (ret != sizeof(*s)) { + fprintf(stderr, "failed to write new super block err %d\n", ret); return ret; + } + return 0; +} + +int close_ctree(struct ctree_root *root) +{ + close(root->fp); + if (root->node) + tree_block_release(root, root->node); + if (root->extent_root->node) + tree_block_release(root->extent_root, root->extent_root->node); + free(root); + printf("on close %d blocks are allocated\n", allocated_blocks); return 0; } void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) { - return; buf->count--; + if (buf->count < 0) + BUG(); if (buf->count == 0) { if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) BUG(); -- cgit v1.2.3 From 5c680ed620c2b69cf751aecf1a5e03ce2c89c7f3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 22 Feb 2007 11:39:13 -0500 Subject: Btrfs: switch to early splits Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2f71ccf1c3cb..a696a4278ac5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -149,7 +149,8 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) } ret = pread(fp, super, sizeof(struct ctree_super_block), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); - if (ret == 0) { + if (ret == 0 || super->root_info.tree_root == 0) { + printf("making new FS!\n"); ret = mkfs(fp); if (ret) return NULL; -- cgit v1.2.3 From 9a8dd1502de6aa683ae46cf0397e9b6e636416fb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Feb 2007 08:38:36 -0500 Subject: Btrfs: Block sized tree extents and extent deletion Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 90 +++++++++++++++++++----------------------------------- 1 file changed, 31 insertions(+), 59 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a696a4278ac5..14955e440773 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -12,33 +12,13 @@ static int allocated_blocks = 0; -static int get_free_block(struct ctree_root *root, u64 *block) +static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) { - struct stat st; - int ret = 0; - - if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks) - return -1; - - *block = root->alloc_extent->blocknr + root->alloc_extent->num_used; - root->alloc_extent->num_used += 1; - if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks) { - struct alloc_extent *ae = root->alloc_extent; - root->alloc_extent = root->reserve_extent; - root->reserve_extent = ae; - ae->num_blocks = 0; - } - st.st_size = 0; - ret = fstat(root->fp, &st); - if (st.st_size < (*block + 1) * CTREE_BLOCKSIZE) { - ret = ftruncate(root->fp, - (*block + 1) * CTREE_BLOCKSIZE); - if (ret) { - perror("ftruncate"); - exit(1); - } - } - return ret; + if (buf->blocknr != buf->node.header.blocknr) + BUG(); + if (root->node && buf->node.header.parentid != root->node->node.header.parentid) + BUG(); + return 0; } struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) @@ -61,22 +41,23 @@ struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) return buf; } -struct tree_buffer *alloc_free_block(struct ctree_root *root) +struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr) { - u64 free_block; - int ret; - struct tree_buffer * buf; - ret = get_free_block(root, &free_block); - if (ret) { - BUG(); - return NULL; + struct tree_buffer *buf; + buf = radix_tree_lookup(&root->cache_radix, blocknr); + if (buf) { + buf->count++; + } else { + buf = alloc_tree_block(root, blocknr); + if (!buf) { + BUG(); + return NULL; + } } - buf = alloc_tree_block(root, free_block); - if (!buf) - BUG(); return buf; } + struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) { loff_t offset = blocknr * CTREE_BLOCKSIZE; @@ -86,20 +67,17 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) buf = radix_tree_lookup(&root->cache_radix, blocknr); if (buf) { buf->count++; - goto test; - } - buf = alloc_tree_block(root, blocknr); - if (!buf) - return NULL; - ret = pread(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); - if (ret != CTREE_BLOCKSIZE) { - free(buf); - return NULL; + } else { + buf = alloc_tree_block(root, blocknr); + if (!buf) + return NULL; + ret = pread(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); + if (ret != CTREE_BLOCKSIZE) { + free(buf); + return NULL; + } } -test: - if (buf->blocknr != buf->node.header.blocknr) - BUG(); - if (root->node && buf->node.header.parentid != root->node->node.header.parentid) + if (check_tree_block(root, buf)) BUG(); return buf; } @@ -121,17 +99,10 @@ int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, struct ctree_root_info *info, int fp) { - INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); root->fp = fp; root->node = NULL; root->node = read_tree_block(root, info->tree_root); root->extent_root = extent_root; - memcpy(&root->ai1, &info->alloc_extent, sizeof(info->alloc_extent)); - memcpy(&root->ai2, &info->reserve_extent, sizeof(info->reserve_extent)); - root->alloc_extent = &root->ai1; - root->reserve_extent = &root->ai2; - printf("setup done reading root %p, used %lu available %lu\n", root, root->alloc_extent->num_used, root->alloc_extent->num_blocks); - printf("setup done reading root %p, reserve used %lu available %lu\n", root, root->reserve_extent->num_used, root->reserve_extent->num_blocks); return 0; } @@ -147,6 +118,8 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) free(root); return NULL; } + INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); ret = pread(fp, super, sizeof(struct ctree_super_block), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); if (ret == 0 || super->root_info.tree_root == 0) { @@ -168,8 +141,6 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) static int __update_root(struct ctree_root *root, struct ctree_root_info *info) { info->tree_root = root->node->blocknr; - memcpy(&info->alloc_extent, root->alloc_extent, sizeof(struct alloc_extent)); - memcpy(&info->reserve_extent, root->reserve_extent, sizeof(struct alloc_extent)); return 0; } @@ -201,6 +172,7 @@ int close_ctree(struct ctree_root *root) void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) { buf->count--; + write_tree_block(root, buf); if (buf->count < 0) BUG(); if (buf->count == 0) { -- cgit v1.2.3 From 5de08d7d50ba535b968d97101ed2ab07c2b8eb7d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 24 Feb 2007 06:24:44 -0500 Subject: Btrfs: Break up ctree.c a little Extent fixes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 14955e440773..f4c6ff202ba9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -172,7 +172,6 @@ int close_ctree(struct ctree_root *root) void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) { buf->count--; - write_tree_block(root, buf); if (buf->count < 0) BUG(); if (buf->count == 0) { -- cgit v1.2.3 From c673024aba596e57c07196cb3400cdcc9d28f3aa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Feb 2007 10:46:55 -0500 Subject: Btrfs: fixup dbfile perms Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f4c6ff202ba9..c42dc72706bf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -113,7 +113,7 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) int fp; int ret; - fp = open(filename, O_CREAT | O_RDWR); + fp = open(filename, O_CREAT | O_RDWR, 0600); if (fp < 0) { free(root); return NULL; -- cgit v1.2.3 From ed2ff2cba766dfe7976a0113f667c9a0a50dff02 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 1 Mar 2007 18:59:40 -0500 Subject: Btrfs: pretend page cache & commit code Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 89 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c42dc72706bf..656ace6147a8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -11,6 +11,8 @@ #include "disk-io.h" static int allocated_blocks = 0; +int cache_size = 0; +int cache_max = 10000; static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) { @@ -21,6 +23,25 @@ static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) return 0; } +static int free_some_buffers(struct ctree_root *root) +{ + struct list_head *node, *next; + struct tree_buffer *b; + if (root->cache_size < cache_max) + return 0; + list_for_each_safe(node, next, &root->cache) { + b = list_entry(node, struct tree_buffer, cache); + if (b->count == 1) { + BUG_ON(!list_empty(&b->dirty)); + list_del_init(&b->cache); + tree_block_release(root, b); + if (root->cache_size < cache_max) + return 0; + } + } + return 0; +} + struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) { struct tree_buffer *buf; @@ -30,10 +51,14 @@ struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) return buf; allocated_blocks++; buf->blocknr = blocknr; - buf->count = 1; + buf->count = 2; + INIT_LIST_HEAD(&buf->dirty); + free_some_buffers(root); radix_tree_preload(GFP_KERNEL); ret = radix_tree_insert(&root->cache_radix, blocknr, buf); radix_tree_preload_end(); + list_add_tail(&buf->cache, &root->cache); + root->cache_size++; if (ret) { free(buf); return NULL; @@ -57,7 +82,6 @@ struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr) return buf; } - struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) { loff_t offset = blocknr * CTREE_BLOCKSIZE; @@ -82,6 +106,24 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) return buf; } +int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf) +{ + if (!list_empty(&buf->dirty)) + return 0; + list_add_tail(&buf->dirty, &root->trans); + buf->count++; + return 0; +} + +int clean_tree_block(struct ctree_root *root, struct tree_buffer *buf) +{ + if (!list_empty(&buf->dirty)) { + list_del_init(&buf->dirty); + tree_block_release(root, buf); + } + return 0; +} + int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) { u64 blocknr = buf->blocknr; @@ -96,9 +138,37 @@ int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) return 0; } +static int __commit_transaction(struct ctree_root *root) +{ + struct tree_buffer *b; + int ret = 0; + int wret; + while(!list_empty(&root->trans)) { + b = list_entry(root->trans.next, struct tree_buffer, dirty); + list_del_init(&b->dirty); + wret = write_tree_block(root, b); + if (wret) + ret = wret; + tree_block_release(root, b); + } + return ret; +} + +int commit_transaction(struct ctree_root *root) +{ + int ret; + ret = __commit_transaction(root); + if (!ret && root != root->extent_root) + ret = __commit_transaction(root->extent_root); + BUG_ON(ret); + return ret; +} + static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, struct ctree_root_info *info, int fp) { + INIT_LIST_HEAD(&root->trans); + INIT_LIST_HEAD(&root->cache); root->fp = fp; root->node = NULL; root->node = read_tree_block(root, info->tree_root); @@ -157,8 +227,23 @@ int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s) return 0; } +static int drop_cache(struct ctree_root *root) +{ + while(!list_empty(&root->cache)) { + struct tree_buffer *b = list_entry(root->cache.next, + struct tree_buffer, cache); + list_del_init(&b->cache); + tree_block_release(root, b); + } + return 0; +} int close_ctree(struct ctree_root *root) { + drop_cache(root->extent_root); + drop_cache(root); + BUG_ON(!list_empty(&root->trans)); + BUG_ON(!list_empty(&root->extent_root->trans)); + close(root->fp); if (root->node) tree_block_release(root, root->node); @@ -182,6 +267,8 @@ void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) free(buf); BUG_ON(allocated_blocks == 0); allocated_blocks--; + BUG_ON(root->cache_size == 0); + root->cache_size--; } } -- cgit v1.2.3 From f0930a37f1c096c3a8f6a17b1e251c7fdf4d4457 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Mar 2007 09:47:58 -0500 Subject: Btrfs: Fix extent code to use merge during delete Remove implicit commit in del_item and insert_item Add implicit commit to close() Add commit op in random-test Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 656ace6147a8..f7ca5362291e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -239,6 +239,7 @@ static int drop_cache(struct ctree_root *root) } int close_ctree(struct ctree_root *root) { + commit_transaction(root); drop_cache(root->extent_root); drop_cache(root); BUG_ON(!list_empty(&root->trans)); -- cgit v1.2.3 From 77ce6846c40e83193df01295e5af437f8b6c7a2d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Mar 2007 10:06:43 -0500 Subject: Btrfs: period commit during initial fill in the random tester Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f7ca5362291e..b1a8149bbc84 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -11,7 +11,6 @@ #include "disk-io.h" static int allocated_blocks = 0; -int cache_size = 0; int cache_max = 10000; static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) @@ -36,7 +35,7 @@ static int free_some_buffers(struct ctree_root *root) list_del_init(&b->cache); tree_block_release(root, b); if (root->cache_size < cache_max) - return 0; + break; } } return 0; -- cgit v1.2.3 From 02217ed299c6340a35696e0610047eb96826de2d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Mar 2007 16:08:05 -0500 Subject: Btrfs: early reference counting Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b1a8149bbc84..0e1c31e682fb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -260,6 +260,8 @@ void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) if (buf->count < 0) BUG(); if (buf->count == 0) { + BUG_ON(!list_empty(&buf->cache)); + BUG_ON(!list_empty(&buf->dirty)); if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) BUG(); radix_tree_delete(&root->cache_radix, buf->blocknr); -- cgit v1.2.3 From a28ec19775d62d673b034082128aca95780d3737 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 6 Mar 2007 20:08:01 -0500 Subject: Btrfs: Fixup reference counting on cows Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0e1c31e682fb..2fe31c3508c1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -153,13 +153,24 @@ static int __commit_transaction(struct ctree_root *root) return ret; } -int commit_transaction(struct ctree_root *root) +int commit_transaction(struct ctree_root *root, struct ctree_super_block *s) { - int ret; + int ret = 0; + ret = __commit_transaction(root); if (!ret && root != root->extent_root) ret = __commit_transaction(root->extent_root); BUG_ON(ret); + if (root->commit_root != root->node) { + struct tree_buffer *snap = root->commit_root; + root->commit_root = root->node; + root->node->count++; + ret = btrfs_drop_snapshot(root, snap); + BUG_ON(ret); + tree_block_release(root, snap); + } + write_ctree_super(root, s); + btrfs_finish_extent_commit(root); return ret; } @@ -168,10 +179,13 @@ static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, { INIT_LIST_HEAD(&root->trans); INIT_LIST_HEAD(&root->cache); + root->cache_size = 0; root->fp = fp; root->node = NULL; - root->node = read_tree_block(root, info->tree_root); root->extent_root = extent_root; + root->commit_root = NULL; + root->node = read_tree_block(root, info->tree_root); + memset(&root->current_insert, 0, sizeof(root->current_insert)); return 0; } @@ -188,6 +202,8 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) return NULL; } INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL); + INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); ret = pread(fp, super, sizeof(struct ctree_super_block), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); @@ -204,6 +220,8 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) BUG_ON(ret < 0); __setup_root(root, extent_root, &super->root_info, fp); __setup_root(extent_root, extent_root, &super->extent_info, fp); + root->commit_root = root->node; + root->node->count++; return root; } @@ -236,9 +254,11 @@ static int drop_cache(struct ctree_root *root) } return 0; } -int close_ctree(struct ctree_root *root) +int close_ctree(struct ctree_root *root, struct ctree_super_block *s) { - commit_transaction(root); + commit_transaction(root, s); + __commit_transaction(root->extent_root); + write_ctree_super(root, s); drop_cache(root->extent_root); drop_cache(root); BUG_ON(!list_empty(&root->trans)); @@ -249,6 +269,7 @@ int close_ctree(struct ctree_root *root) tree_block_release(root, root->node); if (root->extent_root->node) tree_block_release(root->extent_root, root->extent_root->node); + tree_block_release(root, root->commit_root); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); return 0; -- cgit v1.2.3 From 0579da4280812f34f382fb0f8004d7b0219e7a33 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 Mar 2007 16:15:30 -0500 Subject: Btrfs: Fixup last found extent caching Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2fe31c3508c1..997cc578a185 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -186,6 +186,7 @@ static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, root->commit_root = NULL; root->node = read_tree_block(root, info->tree_root); memset(&root->current_insert, 0, sizeof(root->current_insert)); + memset(&root->last_insert, 0, sizeof(root->last_insert)); return 0; } -- cgit v1.2.3 From 83e15a28e046dbb4534dd263d1d3dc867a8994a2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 Mar 2007 09:03:27 -0400 Subject: fix leak in btrfs_drop_snapshot Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 997cc578a185..c9201ec81fef 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -167,7 +167,7 @@ int commit_transaction(struct ctree_root *root, struct ctree_super_block *s) root->node->count++; ret = btrfs_drop_snapshot(root, snap); BUG_ON(ret); - tree_block_release(root, snap); + // tree_block_release(root, snap); } write_ctree_super(root, s); btrfs_finish_extent_commit(root); -- cgit v1.2.3 From 7518a238ea0152dc849d1ed76d3cae8b44e12f46 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 Mar 2007 12:01:18 -0400 Subject: Btrfs: get/set for struct header fields Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c9201ec81fef..065e888d2c0b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -15,9 +15,10 @@ int cache_max = 10000; static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) { - if (buf->blocknr != buf->node.header.blocknr) + if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); - if (root->node && buf->node.header.parentid != root->node->node.header.parentid) + if (root->node && btrfs_header_parentid(&buf->node.header) != + btrfs_header_parentid(&root->node->node.header)) BUG(); return 0; } @@ -129,7 +130,7 @@ int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) loff_t offset = blocknr * CTREE_BLOCKSIZE; int ret; - if (buf->blocknr != buf->node.header.blocknr) + if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); if (ret != CTREE_BLOCKSIZE) -- cgit v1.2.3 From 234b63a091e1df6bd4261dd7b3a7490074830628 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 10:46:10 -0400 Subject: rename funcs and structs to btrfs Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 106 +++++++++++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 52 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 065e888d2c0b..c34c0c60935f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -13,7 +13,7 @@ static int allocated_blocks = 0; int cache_max = 10000; -static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) +static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); @@ -23,18 +23,18 @@ static int check_tree_block(struct ctree_root *root, struct tree_buffer *buf) return 0; } -static int free_some_buffers(struct ctree_root *root) +static int free_some_buffers(struct btrfs_root *root) { struct list_head *node, *next; - struct tree_buffer *b; + struct btrfs_buffer *b; if (root->cache_size < cache_max) return 0; list_for_each_safe(node, next, &root->cache) { - b = list_entry(node, struct tree_buffer, cache); + b = list_entry(node, struct btrfs_buffer, cache); if (b->count == 1) { BUG_ON(!list_empty(&b->dirty)); list_del_init(&b->cache); - tree_block_release(root, b); + btrfs_block_release(root, b); if (root->cache_size < cache_max) break; } @@ -42,11 +42,11 @@ static int free_some_buffers(struct ctree_root *root) return 0; } -struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) +struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) { - struct tree_buffer *buf; + struct btrfs_buffer *buf; int ret; - buf = malloc(sizeof(struct tree_buffer)); + buf = malloc(sizeof(struct btrfs_buffer)); if (!buf) return buf; allocated_blocks++; @@ -66,9 +66,9 @@ struct tree_buffer *alloc_tree_block(struct ctree_root *root, u64 blocknr) return buf; } -struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr) +struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) { - struct tree_buffer *buf; + struct btrfs_buffer *buf; buf = radix_tree_lookup(&root->cache_radix, blocknr); if (buf) { buf->count++; @@ -82,10 +82,10 @@ struct tree_buffer *find_tree_block(struct ctree_root *root, u64 blocknr) return buf; } -struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) +struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) { - loff_t offset = blocknr * CTREE_BLOCKSIZE; - struct tree_buffer *buf; + loff_t offset = blocknr * BTRFS_BLOCKSIZE; + struct btrfs_buffer *buf; int ret; buf = radix_tree_lookup(&root->cache_radix, blocknr); @@ -95,8 +95,8 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) buf = alloc_tree_block(root, blocknr); if (!buf) return NULL; - ret = pread(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); - if (ret != CTREE_BLOCKSIZE) { + ret = pread(root->fp, &buf->node, BTRFS_BLOCKSIZE, offset); + if (ret != BTRFS_BLOCKSIZE) { free(buf); return NULL; } @@ -106,7 +106,7 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr) return buf; } -int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf) +int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { if (!list_empty(&buf->dirty)) return 0; @@ -115,46 +115,47 @@ int dirty_tree_block(struct ctree_root *root, struct tree_buffer *buf) return 0; } -int clean_tree_block(struct ctree_root *root, struct tree_buffer *buf) +int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { if (!list_empty(&buf->dirty)) { list_del_init(&buf->dirty); - tree_block_release(root, buf); + btrfs_block_release(root, buf); } return 0; } -int write_tree_block(struct ctree_root *root, struct tree_buffer *buf) +int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { u64 blocknr = buf->blocknr; - loff_t offset = blocknr * CTREE_BLOCKSIZE; + loff_t offset = blocknr * BTRFS_BLOCKSIZE; int ret; if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); - ret = pwrite(root->fp, &buf->node, CTREE_BLOCKSIZE, offset); - if (ret != CTREE_BLOCKSIZE) + ret = pwrite(root->fp, &buf->node, BTRFS_BLOCKSIZE, offset); + if (ret != BTRFS_BLOCKSIZE) return ret; return 0; } -static int __commit_transaction(struct ctree_root *root) +static int __commit_transaction(struct btrfs_root *root) { - struct tree_buffer *b; + struct btrfs_buffer *b; int ret = 0; int wret; while(!list_empty(&root->trans)) { - b = list_entry(root->trans.next, struct tree_buffer, dirty); + b = list_entry(root->trans.next, struct btrfs_buffer, dirty); list_del_init(&b->dirty); wret = write_tree_block(root, b); if (wret) ret = wret; - tree_block_release(root, b); + btrfs_block_release(root, b); } return ret; } -int commit_transaction(struct ctree_root *root, struct ctree_super_block *s) +int btrfs_commit_transaction(struct btrfs_root *root, + struct btrfs_super_block *s) { int ret = 0; @@ -163,20 +164,20 @@ int commit_transaction(struct ctree_root *root, struct ctree_super_block *s) ret = __commit_transaction(root->extent_root); BUG_ON(ret); if (root->commit_root != root->node) { - struct tree_buffer *snap = root->commit_root; + struct btrfs_buffer *snap = root->commit_root; root->commit_root = root->node; root->node->count++; ret = btrfs_drop_snapshot(root, snap); BUG_ON(ret); - // tree_block_release(root, snap); + // btrfs_block_release(root, snap); } write_ctree_super(root, s); btrfs_finish_extent_commit(root); return ret; } -static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, - struct ctree_root_info *info, int fp) +static int __setup_root(struct btrfs_root *root, struct btrfs_root *extent_root, + struct btrfs_root_info *info, int fp) { INIT_LIST_HEAD(&root->trans); INIT_LIST_HEAD(&root->cache); @@ -191,10 +192,10 @@ static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root, return 0; } -struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) +struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) { - struct ctree_root *root = malloc(sizeof(struct ctree_root)); - struct ctree_root *extent_root = malloc(sizeof(struct ctree_root)); + struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); + struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); int fp; int ret; @@ -207,16 +208,16 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); - ret = pread(fp, super, sizeof(struct ctree_super_block), - CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + ret = pread(fp, super, sizeof(struct btrfs_super_block), + BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret == 0 || super->root_info.tree_root == 0) { printf("making new FS!\n"); ret = mkfs(fp); if (ret) return NULL; - ret = pread(fp, super, sizeof(struct ctree_super_block), - CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); - if (ret != sizeof(struct ctree_super_block)) + ret = pread(fp, super, sizeof(struct btrfs_super_block), + BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); + if (ret != sizeof(struct btrfs_super_block)) return NULL; } BUG_ON(ret < 0); @@ -227,18 +228,19 @@ struct ctree_root *open_ctree(char *filename, struct ctree_super_block *super) return root; } -static int __update_root(struct ctree_root *root, struct ctree_root_info *info) +static int __update_root(struct btrfs_root *root, struct btrfs_root_info *info) { info->tree_root = root->node->blocknr; return 0; } -int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s) +int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s) { int ret; __update_root(root, &s->root_info); __update_root(root->extent_root, &s->extent_info); - ret = pwrite(root->fp, s, sizeof(*s), CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE)); + ret = pwrite(root->fp, s, sizeof(*s), + BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret != sizeof(*s)) { fprintf(stderr, "failed to write new super block err %d\n", ret); return ret; @@ -246,19 +248,19 @@ int write_ctree_super(struct ctree_root *root, struct ctree_super_block *s) return 0; } -static int drop_cache(struct ctree_root *root) +static int drop_cache(struct btrfs_root *root) { while(!list_empty(&root->cache)) { - struct tree_buffer *b = list_entry(root->cache.next, - struct tree_buffer, cache); + struct btrfs_buffer *b = list_entry(root->cache.next, + struct btrfs_buffer, cache); list_del_init(&b->cache); - tree_block_release(root, b); + btrfs_block_release(root, b); } return 0; } -int close_ctree(struct ctree_root *root, struct ctree_super_block *s) +int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) { - commit_transaction(root, s); + btrfs_commit_transaction(root, s); __commit_transaction(root->extent_root); write_ctree_super(root, s); drop_cache(root->extent_root); @@ -268,16 +270,16 @@ int close_ctree(struct ctree_root *root, struct ctree_super_block *s) close(root->fp); if (root->node) - tree_block_release(root, root->node); + btrfs_block_release(root, root->node); if (root->extent_root->node) - tree_block_release(root->extent_root, root->extent_root->node); - tree_block_release(root, root->commit_root); + btrfs_block_release(root->extent_root, root->extent_root->node); + btrfs_block_release(root, root->commit_root); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); return 0; } -void tree_block_release(struct ctree_root *root, struct tree_buffer *buf) +void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf) { buf->count--; if (buf->count < 0) -- cgit v1.2.3 From 3768f3689fc76ecea17414936dff7a02746a4355 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 Mar 2007 16:47:54 -0400 Subject: Btrfs: Change the super to point to a tree of trees to enable persistent snapshots Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 135 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 107 insertions(+), 28 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c34c0c60935f..3d4bf6833f2a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -154,41 +154,96 @@ static int __commit_transaction(struct btrfs_root *root) return ret; } +static int commit_extent_and_tree_roots(struct btrfs_root *tree_root, + struct btrfs_root *extent_root) +{ + int ret; + u64 old_extent_block; + + while(1) { + old_extent_block = btrfs_root_blocknr(&extent_root->root_item); + if (old_extent_block == extent_root->node->blocknr) + break; + btrfs_set_root_blocknr(&extent_root->root_item, + extent_root->node->blocknr); + ret = btrfs_update_root(tree_root, + &extent_root->root_key, + &extent_root->root_item); + BUG_ON(ret); + } + __commit_transaction(extent_root); + __commit_transaction(tree_root); + return 0; +} + int btrfs_commit_transaction(struct btrfs_root *root, struct btrfs_super_block *s) { int ret = 0; + struct btrfs_buffer *snap = root->commit_root; + struct btrfs_key snap_key; ret = __commit_transaction(root); - if (!ret && root != root->extent_root) - ret = __commit_transaction(root->extent_root); BUG_ON(ret); - if (root->commit_root != root->node) { - struct btrfs_buffer *snap = root->commit_root; - root->commit_root = root->node; - root->node->count++; - ret = btrfs_drop_snapshot(root, snap); - BUG_ON(ret); - // btrfs_block_release(root, snap); - } + + if (root->commit_root == root->node) + return 0; + + memcpy(&snap_key, &root->root_key, sizeof(snap_key)); + root->root_key.offset++; + + btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); + ret = btrfs_insert_root(root->tree_root, &root->root_key, + &root->root_item); + BUG_ON(ret); + + ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root); + BUG_ON(ret); + write_ctree_super(root, s); - btrfs_finish_extent_commit(root); + btrfs_finish_extent_commit(root->extent_root); + btrfs_finish_extent_commit(root->tree_root); + + root->commit_root = root->node; + root->node->count++; + ret = btrfs_drop_snapshot(root, snap); + BUG_ON(ret); + + ret = btrfs_del_root(root->tree_root, &snap_key); + BUG_ON(ret); + return ret; } -static int __setup_root(struct btrfs_root *root, struct btrfs_root *extent_root, - struct btrfs_root_info *info, int fp) +static int __setup_root(struct btrfs_root *root, u64 objectid, int fp) { INIT_LIST_HEAD(&root->trans); INIT_LIST_HEAD(&root->cache); root->cache_size = 0; root->fp = fp; root->node = NULL; - root->extent_root = extent_root; root->commit_root = NULL; - root->node = read_tree_block(root, info->tree_root); memset(&root->current_insert, 0, sizeof(root->current_insert)); memset(&root->last_insert, 0, sizeof(root->last_insert)); + memset(&root->root_key, 0, sizeof(root->root_key)); + memset(&root->root_item, 0, sizeof(root->root_item)); + return 0; +} + +static int find_and_setup_root(struct btrfs_root *tree_root, u64 objectid, + struct btrfs_root *root, int fp) +{ + int ret; + + __setup_root(root, objectid, fp); + ret = btrfs_find_last_root(tree_root, objectid, + &root->root_item, &root->root_key); + BUG_ON(ret); + + root->node = read_tree_block(root, + btrfs_root_blocknr(&root->root_item)); + root->ref_cows = 0; + BUG_ON(!root->node); return 0; } @@ -196,9 +251,19 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) { struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); + struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); int fp; int ret; + root->extent_root = extent_root; + root->tree_root = tree_root; + + extent_root->extent_root = extent_root; + extent_root->tree_root = tree_root; + + tree_root->extent_root = extent_root; + tree_root->tree_root = tree_root; + fp = open(filename, O_CREAT | O_RDWR, 0600); if (fp < 0) { free(root); @@ -208,11 +273,14 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL); INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&tree_root->pinned_radix, GFP_KERNEL); + INIT_RADIX_TREE(&tree_root->cache_radix, GFP_KERNEL); + ret = pread(fp, super, sizeof(struct btrfs_super_block), BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); - if (ret == 0 || super->root_info.tree_root == 0) { + if (ret == 0 || btrfs_super_root(super) == 0) { printf("making new FS!\n"); - ret = mkfs(fp); + ret = mkfs(fp, 0, BTRFS_BLOCKSIZE); if (ret) return NULL; ret = pread(fp, super, sizeof(struct btrfs_super_block), @@ -221,24 +289,29 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) return NULL; } BUG_ON(ret < 0); - __setup_root(root, extent_root, &super->root_info, fp); - __setup_root(extent_root, extent_root, &super->extent_info, fp); + + __setup_root(tree_root, BTRFS_ROOT_TREE_OBJECTID, fp); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); + BUG_ON(!tree_root->node); + + ret = find_and_setup_root(tree_root, BTRFS_EXTENT_TREE_OBJECTID, + extent_root, fp); + BUG_ON(ret); + + ret = find_and_setup_root(tree_root, BTRFS_FS_TREE_OBJECTID, + root, fp); + BUG_ON(ret); + root->commit_root = root->node; root->node->count++; + root->ref_cows = 1; return root; } -static int __update_root(struct btrfs_root *root, struct btrfs_root_info *info) -{ - info->tree_root = root->node->blocknr; - return 0; -} - int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s) { int ret; - __update_root(root, &s->root_info); - __update_root(root->extent_root, &s->extent_info); + btrfs_set_super_root(s, root->tree_root->node->blocknr); ret = pwrite(root->fp, s, sizeof(*s), BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); if (ret != sizeof(*s)) { @@ -260,19 +333,25 @@ static int drop_cache(struct btrfs_root *root) } int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) { + int ret; btrfs_commit_transaction(root, s); - __commit_transaction(root->extent_root); + ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root); + BUG_ON(ret); write_ctree_super(root, s); drop_cache(root->extent_root); + drop_cache(root->tree_root); drop_cache(root); BUG_ON(!list_empty(&root->trans)); BUG_ON(!list_empty(&root->extent_root->trans)); + BUG_ON(!list_empty(&root->tree_root->trans)); close(root->fp); if (root->node) btrfs_block_release(root, root->node); if (root->extent_root->node) btrfs_block_release(root->extent_root, root->extent_root->node); + if (root->tree_root->node) + btrfs_block_release(root->tree_root, root->tree_root->node); btrfs_block_release(root, root->commit_root); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); -- cgit v1.2.3 From 123abc88c9087b9c5605566ee3491aaef17fd837 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 14 Mar 2007 14:14:43 -0400 Subject: Btrfs: variable block size support Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3d4bf6833f2a..8d9457b5aef5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -46,7 +46,8 @@ struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) { struct btrfs_buffer *buf; int ret; - buf = malloc(sizeof(struct btrfs_buffer)); + + buf = malloc(sizeof(struct btrfs_buffer) + root->blocksize); if (!buf) return buf; allocated_blocks++; @@ -84,7 +85,7 @@ struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) { - loff_t offset = blocknr * BTRFS_BLOCKSIZE; + loff_t offset = blocknr * root->blocksize; struct btrfs_buffer *buf; int ret; @@ -95,8 +96,8 @@ struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) buf = alloc_tree_block(root, blocknr); if (!buf) return NULL; - ret = pread(root->fp, &buf->node, BTRFS_BLOCKSIZE, offset); - if (ret != BTRFS_BLOCKSIZE) { + ret = pread(root->fp, &buf->node, root->blocksize, offset); + if (ret != root->blocksize) { free(buf); return NULL; } @@ -127,13 +128,13 @@ int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) { u64 blocknr = buf->blocknr; - loff_t offset = blocknr * BTRFS_BLOCKSIZE; + loff_t offset = blocknr * root->blocksize; int ret; if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); - ret = pwrite(root->fp, &buf->node, BTRFS_BLOCKSIZE, offset); - if (ret != BTRFS_BLOCKSIZE) + ret = pwrite(root->fp, &buf->node, root->blocksize, offset); + if (ret != root->blocksize) return ret; return 0; } @@ -215,7 +216,8 @@ int btrfs_commit_transaction(struct btrfs_root *root, return ret; } -static int __setup_root(struct btrfs_root *root, u64 objectid, int fp) +static int __setup_root(struct btrfs_super_block *super, + struct btrfs_root *root, u64 objectid, int fp) { INIT_LIST_HEAD(&root->trans); INIT_LIST_HEAD(&root->cache); @@ -223,6 +225,8 @@ static int __setup_root(struct btrfs_root *root, u64 objectid, int fp) root->fp = fp; root->node = NULL; root->commit_root = NULL; + root->blocksize = btrfs_super_blocksize(super); + root->ref_cows = 0; memset(&root->current_insert, 0, sizeof(root->current_insert)); memset(&root->last_insert, 0, sizeof(root->last_insert)); memset(&root->root_key, 0, sizeof(root->root_key)); @@ -230,19 +234,19 @@ static int __setup_root(struct btrfs_root *root, u64 objectid, int fp) return 0; } -static int find_and_setup_root(struct btrfs_root *tree_root, u64 objectid, - struct btrfs_root *root, int fp) +static int find_and_setup_root(struct btrfs_super_block *super, + struct btrfs_root *tree_root, u64 objectid, + struct btrfs_root *root, int fp) { int ret; - __setup_root(root, objectid, fp); + __setup_root(super, root, objectid, fp); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); root->node = read_tree_block(root, btrfs_root_blocknr(&root->root_item)); - root->ref_cows = 0; BUG_ON(!root->node); return 0; } @@ -277,28 +281,28 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) INIT_RADIX_TREE(&tree_root->cache_radix, GFP_KERNEL); ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); + BTRFS_SUPER_INFO_OFFSET); if (ret == 0 || btrfs_super_root(super) == 0) { printf("making new FS!\n"); - ret = mkfs(fp, 0, BTRFS_BLOCKSIZE); + ret = mkfs(fp, 0, 1024); if (ret) return NULL; ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); + BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(struct btrfs_super_block)) return NULL; } BUG_ON(ret < 0); - __setup_root(tree_root, BTRFS_ROOT_TREE_OBJECTID, fp); + __setup_root(super, tree_root, BTRFS_ROOT_TREE_OBJECTID, fp); tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(tree_root, BTRFS_EXTENT_TREE_OBJECTID, + ret = find_and_setup_root(super, tree_root, BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp); BUG_ON(ret); - ret = find_and_setup_root(tree_root, BTRFS_FS_TREE_OBJECTID, + ret = find_and_setup_root(super, tree_root, BTRFS_FS_TREE_OBJECTID, root, fp); BUG_ON(ret); @@ -313,7 +317,7 @@ int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s) int ret; btrfs_set_super_root(s, root->tree_root->node->blocknr); ret = pwrite(root->fp, s, sizeof(*s), - BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE)); + BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(*s)) { fprintf(stderr, "failed to write new super block err %d\n", ret); return ret; -- cgit v1.2.3 From e089f05c18ab36ed5fa7e2319052e03ab800d518 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 Mar 2007 16:20:31 -0400 Subject: Btrfs: transaction handles everywhere Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 60 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8d9457b5aef5..25ce07908ee3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -9,6 +9,7 @@ #include "radix-tree.h" #include "ctree.h" #include "disk-io.h" +#include "transaction.h" static int allocated_blocks = 0; int cache_max = 10000; @@ -107,7 +108,8 @@ struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) return buf; } -int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf) { if (!list_empty(&buf->dirty)) return 0; @@ -116,7 +118,8 @@ int dirty_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) return 0; } -int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf) { if (!list_empty(&buf->dirty)) { list_del_init(&buf->dirty); @@ -125,7 +128,8 @@ int clean_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) return 0; } -int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct btrfs_buffer *buf) { u64 blocknr = buf->blocknr; loff_t offset = blocknr * root->blocksize; @@ -139,7 +143,8 @@ int write_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) return 0; } -static int __commit_transaction(struct btrfs_root *root) +static int __commit_transaction(struct btrfs_trans_handle *trans, struct + btrfs_root *root) { struct btrfs_buffer *b; int ret = 0; @@ -147,7 +152,7 @@ static int __commit_transaction(struct btrfs_root *root) while(!list_empty(&root->trans)) { b = list_entry(root->trans.next, struct btrfs_buffer, dirty); list_del_init(&b->dirty); - wret = write_tree_block(root, b); + wret = write_tree_block(trans, root, b); if (wret) ret = wret; btrfs_block_release(root, b); @@ -155,8 +160,9 @@ static int __commit_transaction(struct btrfs_root *root) return ret; } -static int commit_extent_and_tree_roots(struct btrfs_root *tree_root, - struct btrfs_root *extent_root) +static int commit_extent_and_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_root *tree_root, struct + btrfs_root *extent_root) { int ret; u64 old_extent_block; @@ -167,24 +173,24 @@ static int commit_extent_and_tree_roots(struct btrfs_root *tree_root, break; btrfs_set_root_blocknr(&extent_root->root_item, extent_root->node->blocknr); - ret = btrfs_update_root(tree_root, + ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); BUG_ON(ret); } - __commit_transaction(extent_root); - __commit_transaction(tree_root); + __commit_transaction(trans, extent_root); + __commit_transaction(trans, tree_root); return 0; } -int btrfs_commit_transaction(struct btrfs_root *root, - struct btrfs_super_block *s) +int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct + btrfs_root *root, struct btrfs_super_block *s) { int ret = 0; struct btrfs_buffer *snap = root->commit_root; struct btrfs_key snap_key; - ret = __commit_transaction(root); + ret = __commit_transaction(trans, root); BUG_ON(ret); if (root->commit_root == root->node) @@ -194,23 +200,24 @@ int btrfs_commit_transaction(struct btrfs_root *root, root->root_key.offset++; btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); - ret = btrfs_insert_root(root->tree_root, &root->root_key, + ret = btrfs_insert_root(trans, root->tree_root, &root->root_key, &root->root_item); BUG_ON(ret); - ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root); + ret = commit_extent_and_tree_roots(trans, root->tree_root, + root->extent_root); BUG_ON(ret); - write_ctree_super(root, s); - btrfs_finish_extent_commit(root->extent_root); - btrfs_finish_extent_commit(root->tree_root); + write_ctree_super(trans, root, s); + btrfs_finish_extent_commit(trans, root->extent_root); + btrfs_finish_extent_commit(trans, root->tree_root); root->commit_root = root->node; root->node->count++; - ret = btrfs_drop_snapshot(root, snap); + ret = btrfs_drop_snapshot(trans, root, snap); BUG_ON(ret); - ret = btrfs_del_root(root->tree_root, &snap_key); + ret = btrfs_del_root(trans, root->tree_root, &snap_key); BUG_ON(ret); return ret; @@ -312,7 +319,8 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) return root; } -int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s) +int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root + *root, struct btrfs_super_block *s) { int ret; btrfs_set_super_root(s, root->tree_root->node->blocknr); @@ -338,10 +346,14 @@ static int drop_cache(struct btrfs_root *root) int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) { int ret; - btrfs_commit_transaction(root, s); - ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root); + struct btrfs_trans_handle *trans; + + trans = root->running_transaction; + btrfs_commit_transaction(trans, root, s); + ret = commit_extent_and_tree_roots(trans, root->tree_root, + root->extent_root); BUG_ON(ret); - write_ctree_super(root, s); + write_ctree_super(trans, root, s); drop_cache(root->extent_root); drop_cache(root->tree_root); drop_cache(root); -- cgit v1.2.3 From 9f5fae2fe6dc35b46bf56183f11398451851cb3f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Mar 2007 14:38:32 -0400 Subject: Btrfs: Add inode map, and the start of file extent items Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 176 +++++++++++++++++++++++++++++------------------------ 1 file changed, 96 insertions(+), 80 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 25ce07908ee3..1849a99690c8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,15 +28,15 @@ static int free_some_buffers(struct btrfs_root *root) { struct list_head *node, *next; struct btrfs_buffer *b; - if (root->cache_size < cache_max) + if (root->fs_info->cache_size < cache_max) return 0; - list_for_each_safe(node, next, &root->cache) { + list_for_each_safe(node, next, &root->fs_info->cache) { b = list_entry(node, struct btrfs_buffer, cache); if (b->count == 1) { BUG_ON(!list_empty(&b->dirty)); list_del_init(&b->cache); btrfs_block_release(root, b); - if (root->cache_size < cache_max) + if (root->fs_info->cache_size < cache_max) break; } } @@ -57,10 +57,10 @@ struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) INIT_LIST_HEAD(&buf->dirty); free_some_buffers(root); radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(&root->cache_radix, blocknr, buf); + ret = radix_tree_insert(&root->fs_info->cache_radix, blocknr, buf); radix_tree_preload_end(); - list_add_tail(&buf->cache, &root->cache); - root->cache_size++; + list_add_tail(&buf->cache, &root->fs_info->cache); + root->fs_info->cache_size++; if (ret) { free(buf); return NULL; @@ -71,7 +71,7 @@ struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) { struct btrfs_buffer *buf; - buf = radix_tree_lookup(&root->cache_radix, blocknr); + buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); if (buf) { buf->count++; } else { @@ -90,14 +90,15 @@ struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) struct btrfs_buffer *buf; int ret; - buf = radix_tree_lookup(&root->cache_radix, blocknr); + buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); if (buf) { buf->count++; } else { buf = alloc_tree_block(root, blocknr); if (!buf) return NULL; - ret = pread(root->fp, &buf->node, root->blocksize, offset); + ret = pread(root->fs_info->fp, &buf->node, root->blocksize, + offset); if (ret != root->blocksize) { free(buf); return NULL; @@ -113,7 +114,7 @@ int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, { if (!list_empty(&buf->dirty)) return 0; - list_add_tail(&buf->dirty, &root->trans); + list_add_tail(&buf->dirty, &root->fs_info->trans); buf->count++; return 0; } @@ -137,7 +138,7 @@ int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) BUG(); - ret = pwrite(root->fp, &buf->node, root->blocksize, offset); + ret = pwrite(root->fs_info->fp, &buf->node, root->blocksize, offset); if (ret != root->blocksize) return ret; return 0; @@ -149,8 +150,9 @@ static int __commit_transaction(struct btrfs_trans_handle *trans, struct struct btrfs_buffer *b; int ret = 0; int wret; - while(!list_empty(&root->trans)) { - b = list_entry(root->trans.next, struct btrfs_buffer, dirty); + while(!list_empty(&root->fs_info->trans)) { + b = list_entry(root->fs_info->trans.next, struct btrfs_buffer, + dirty); list_del_init(&b->dirty); wret = write_tree_block(trans, root, b); if (wret) @@ -160,13 +162,21 @@ static int __commit_transaction(struct btrfs_trans_handle *trans, struct return ret; } -static int commit_extent_and_tree_roots(struct btrfs_trans_handle *trans, - struct btrfs_root *tree_root, struct - btrfs_root *extent_root) +static int commit_tree_roots(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { int ret; u64 old_extent_block; - + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_root *inode_root = fs_info->inode_root; + + btrfs_set_root_blocknr(&inode_root->root_item, + inode_root->node->blocknr); + ret = btrfs_update_root(trans, tree_root, + &inode_root->root_key, + &inode_root->root_item); + BUG_ON(ret); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); if (old_extent_block == extent_root->node->blocknr) @@ -178,8 +188,6 @@ static int commit_extent_and_tree_roots(struct btrfs_trans_handle *trans, &extent_root->root_item); BUG_ON(ret); } - __commit_transaction(trans, extent_root); - __commit_transaction(trans, tree_root); return 0; } @@ -190,9 +198,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct struct btrfs_buffer *snap = root->commit_root; struct btrfs_key snap_key; - ret = __commit_transaction(trans, root); - BUG_ON(ret); - if (root->commit_root == root->node) return 0; @@ -200,54 +205,55 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct root->root_key.offset++; btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); - ret = btrfs_insert_root(trans, root->tree_root, &root->root_key, - &root->root_item); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, + &root->root_key, &root->root_item); + BUG_ON(ret); + + ret = commit_tree_roots(trans, root->fs_info); BUG_ON(ret); - ret = commit_extent_and_tree_roots(trans, root->tree_root, - root->extent_root); + ret = __commit_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root, s); - btrfs_finish_extent_commit(trans, root->extent_root); - btrfs_finish_extent_commit(trans, root->tree_root); + btrfs_finish_extent_commit(trans, root->fs_info->extent_root); + btrfs_finish_extent_commit(trans, root->fs_info->tree_root); root->commit_root = root->node; root->node->count++; ret = btrfs_drop_snapshot(trans, root, snap); BUG_ON(ret); - ret = btrfs_del_root(trans, root->tree_root, &snap_key); + ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); BUG_ON(ret); return ret; } static int __setup_root(struct btrfs_super_block *super, - struct btrfs_root *root, u64 objectid, int fp) + struct btrfs_root *root, + struct btrfs_fs_info *fs_info, + u64 objectid, int fp) { - INIT_LIST_HEAD(&root->trans); - INIT_LIST_HEAD(&root->cache); - root->cache_size = 0; - root->fp = fp; root->node = NULL; root->commit_root = NULL; root->blocksize = btrfs_super_blocksize(super); root->ref_cows = 0; - memset(&root->current_insert, 0, sizeof(root->current_insert)); - memset(&root->last_insert, 0, sizeof(root->last_insert)); + root->fs_info = fs_info; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); return 0; } static int find_and_setup_root(struct btrfs_super_block *super, - struct btrfs_root *tree_root, u64 objectid, + struct btrfs_root *tree_root, + struct btrfs_fs_info *fs_info, + u64 objectid, struct btrfs_root *root, int fp) { int ret; - __setup_root(super, root, objectid, fp); + __setup_root(super, root, fs_info, objectid, fp); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -263,29 +269,31 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); + struct btrfs_root *inode_root = malloc(sizeof(struct btrfs_root)); + struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); int fp; int ret; - root->extent_root = extent_root; - root->tree_root = tree_root; - - extent_root->extent_root = extent_root; - extent_root->tree_root = tree_root; - - tree_root->extent_root = extent_root; - tree_root->tree_root = tree_root; - fp = open(filename, O_CREAT | O_RDWR, 0600); if (fp < 0) { free(root); return NULL; } - INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL); - INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL); - INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL); - INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL); - INIT_RADIX_TREE(&tree_root->pinned_radix, GFP_KERNEL); - INIT_RADIX_TREE(&tree_root->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL); + INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); + INIT_LIST_HEAD(&fs_info->trans); + INIT_LIST_HEAD(&fs_info->cache); + fs_info->cache_size = 0; + fs_info->fp = fp; + fs_info->running_transaction = NULL; + fs_info->fs_root = root; + fs_info->tree_root = tree_root; + fs_info->extent_root = extent_root; + fs_info->inode_root = inode_root; + fs_info->last_inode_alloc = 0; + fs_info->last_inode_alloc_dirid = 0; + memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); + memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); ret = pread(fp, super, sizeof(struct btrfs_super_block), BTRFS_SUPER_INFO_OFFSET); @@ -301,16 +309,20 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) } BUG_ON(ret < 0); - __setup_root(super, tree_root, BTRFS_ROOT_TREE_OBJECTID, fp); + __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp); tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(super, tree_root, BTRFS_EXTENT_TREE_OBJECTID, - extent_root, fp); + ret = find_and_setup_root(super, tree_root, fs_info, + BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, BTRFS_FS_TREE_OBJECTID, - root, fp); + ret = find_and_setup_root(super, tree_root, fs_info, + BTRFS_INODE_MAP_OBJECTID, inode_root, fp); + BUG_ON(ret); + + ret = find_and_setup_root(super, tree_root, fs_info, + BTRFS_FS_TREE_OBJECTID, root, fp); BUG_ON(ret); root->commit_root = root->node; @@ -323,8 +335,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { int ret; - btrfs_set_super_root(s, root->tree_root->node->blocknr); - ret = pwrite(root->fp, s, sizeof(*s), + btrfs_set_super_root(s, root->fs_info->tree_root->node->blocknr); + ret = pwrite(root->fs_info->fp, s, sizeof(*s), BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(*s)) { fprintf(stderr, "failed to write new super block err %d\n", ret); @@ -335,9 +347,10 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root static int drop_cache(struct btrfs_root *root) { - while(!list_empty(&root->cache)) { - struct btrfs_buffer *b = list_entry(root->cache.next, - struct btrfs_buffer, cache); + while(!list_empty(&root->fs_info->cache)) { + struct btrfs_buffer *b = list_entry(root->fs_info->cache.next, + struct btrfs_buffer, + cache); list_del_init(&b->cache); btrfs_block_release(root, b); } @@ -348,26 +361,28 @@ int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) int ret; struct btrfs_trans_handle *trans; - trans = root->running_transaction; + trans = root->fs_info->running_transaction; btrfs_commit_transaction(trans, root, s); - ret = commit_extent_and_tree_roots(trans, root->tree_root, - root->extent_root); + ret = commit_tree_roots(trans, root->fs_info); + BUG_ON(ret); + ret = __commit_transaction(trans, root); BUG_ON(ret); write_ctree_super(trans, root, s); - drop_cache(root->extent_root); - drop_cache(root->tree_root); drop_cache(root); - BUG_ON(!list_empty(&root->trans)); - BUG_ON(!list_empty(&root->extent_root->trans)); - BUG_ON(!list_empty(&root->tree_root->trans)); + BUG_ON(!list_empty(&root->fs_info->trans)); - close(root->fp); + close(root->fs_info->fp); if (root->node) btrfs_block_release(root, root->node); - if (root->extent_root->node) - btrfs_block_release(root->extent_root, root->extent_root->node); - if (root->tree_root->node) - btrfs_block_release(root->tree_root, root->tree_root->node); + if (root->fs_info->extent_root->node) + btrfs_block_release(root->fs_info->extent_root, + root->fs_info->extent_root->node); + if (root->fs_info->inode_root->node) + btrfs_block_release(root->fs_info->inode_root, + root->fs_info->inode_root->node); + if (root->fs_info->tree_root->node) + btrfs_block_release(root->fs_info->tree_root, + root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); free(root); printf("on close %d blocks are allocated\n", allocated_blocks); @@ -382,15 +397,16 @@ void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf) if (buf->count == 0) { BUG_ON(!list_empty(&buf->cache)); BUG_ON(!list_empty(&buf->dirty)); - if (!radix_tree_lookup(&root->cache_radix, buf->blocknr)) + if (!radix_tree_lookup(&root->fs_info->cache_radix, + buf->blocknr)) BUG(); - radix_tree_delete(&root->cache_radix, buf->blocknr); + radix_tree_delete(&root->fs_info->cache_radix, buf->blocknr); memset(buf, 0, sizeof(*buf)); free(buf); BUG_ON(allocated_blocks == 0); allocated_blocks--; - BUG_ON(root->cache_size == 0); - root->cache_size--; + BUG_ON(root->fs_info->cache_size == 0); + root->fs_info->cache_size--; } } -- cgit v1.2.3 From 293ffd5fd340428276fbbd24ce7b98bf6728466b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Mar 2007 15:57:25 -0400 Subject: Btrfs: change dir-test to insert inode_items Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1849a99690c8..bacaa38ea82e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -226,6 +226,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); BUG_ON(ret); + root->fs_info->generation = root->root_key.offset + 1; return ret; } @@ -328,6 +329,7 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) root->commit_root = root->node; root->node->count++; root->ref_cows = 1; + root->fs_info->generation = root->root_key.offset + 1; return root; } -- cgit v1.2.3 From 1261ec42b3d3a3ad878bd172144940e3ac710749 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 20 Mar 2007 20:35:03 -0400 Subject: Btrfs: Better block record keeping, real mkfs Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bacaa38ea82e..0322c55162cb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -293,20 +293,15 @@ struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; + fs_info->disk_super = super; memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); ret = pread(fp, super, sizeof(struct btrfs_super_block), BTRFS_SUPER_INFO_OFFSET); if (ret == 0 || btrfs_super_root(super) == 0) { - printf("making new FS!\n"); - ret = mkfs(fp, 0, 1024); - if (ret) - return NULL; - ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET); - if (ret != sizeof(struct btrfs_super_block)) - return NULL; + BUG(); + return NULL; } BUG_ON(ret < 0); -- cgit v1.2.3 From 2e635a278354a1a7951e16cfea4c247d6d0e7c99 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 21 Mar 2007 11:12:56 -0400 Subject: Btrfs: initial move to kernel module land Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0322c55162cb..05637f9fd7c7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -266,20 +266,25 @@ static int find_and_setup_root(struct btrfs_super_block *super, } struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) +{ + int fp; + + fp = open(filename, O_CREAT | O_RDWR, 0600); + if (fp < 0) { + return NULL; + } + return open_ctree_fd(fp, super); +} + +struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) { struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); struct btrfs_root *inode_root = malloc(sizeof(struct btrfs_root)); struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); - int fp; int ret; - fp = open(filename, O_CREAT | O_RDWR, 0600); - if (fp < 0) { - free(root); - return NULL; - } INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans); -- cgit v1.2.3 From e20d96d64f9cf9288ffecc9ad4714e91c3b97ca8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 22 Mar 2007 12:13:20 -0400 Subject: Mountable btrfs, with readdir Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 281 ++++++++++++++++------------------------------------- 1 file changed, 83 insertions(+), 198 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 05637f9fd7c7..df2061a735cd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,165 +1,67 @@ -#define _XOPEN_SOURCE 500 -#include -#include -#include -#include -#include -#include -#include "kerncompat.h" -#include "radix-tree.h" +#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" -static int allocated_blocks = 0; -int cache_max = 10000; - -static int check_tree_block(struct btrfs_root *root, struct btrfs_buffer *buf) +static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { - if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) + struct btrfs_node *node = btrfs_buffer_node(buf); + if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) BUG(); - if (root->node && btrfs_header_parentid(&buf->node.header) != - btrfs_header_parentid(&root->node->node.header)) + if (root->node && btrfs_header_parentid(&node->header) != + btrfs_header_parentid(btrfs_buffer_header(root->node))) BUG(); return 0; } -static int free_some_buffers(struct btrfs_root *root) +struct buffer_head *alloc_tree_block(struct btrfs_root *root, u64 blocknr) { - struct list_head *node, *next; - struct btrfs_buffer *b; - if (root->fs_info->cache_size < cache_max) - return 0; - list_for_each_safe(node, next, &root->fs_info->cache) { - b = list_entry(node, struct btrfs_buffer, cache); - if (b->count == 1) { - BUG_ON(!list_empty(&b->dirty)); - list_del_init(&b->cache); - btrfs_block_release(root, b); - if (root->fs_info->cache_size < cache_max) - break; - } - } - return 0; + return sb_getblk(root->fs_info->sb, blocknr); } -struct btrfs_buffer *alloc_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr) { - struct btrfs_buffer *buf; - int ret; - - buf = malloc(sizeof(struct btrfs_buffer) + root->blocksize); - if (!buf) - return buf; - allocated_blocks++; - buf->blocknr = blocknr; - buf->count = 2; - INIT_LIST_HEAD(&buf->dirty); - free_some_buffers(root); - radix_tree_preload(GFP_KERNEL); - ret = radix_tree_insert(&root->fs_info->cache_radix, blocknr, buf); - radix_tree_preload_end(); - list_add_tail(&buf->cache, &root->fs_info->cache); - root->fs_info->cache_size++; - if (ret) { - free(buf); - return NULL; - } - return buf; + return sb_getblk(root->fs_info->sb, blocknr); } -struct btrfs_buffer *find_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct btrfs_buffer *buf; - buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); - if (buf) { - buf->count++; - } else { - buf = alloc_tree_block(root, blocknr); - if (!buf) { - BUG(); - return NULL; - } - } - return buf; -} - -struct btrfs_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) -{ - loff_t offset = blocknr * root->blocksize; - struct btrfs_buffer *buf; - int ret; + struct buffer_head *buf = sb_bread(root->fs_info->sb, blocknr); - buf = radix_tree_lookup(&root->fs_info->cache_radix, blocknr); - if (buf) { - buf->count++; - } else { - buf = alloc_tree_block(root, blocknr); - if (!buf) - return NULL; - ret = pread(root->fs_info->fp, &buf->node, root->blocksize, - offset); - if (ret != root->blocksize) { - free(buf); - return NULL; - } - } + if (!buf) + return buf; if (check_tree_block(root, buf)) BUG(); return buf; } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - if (!list_empty(&buf->dirty)) - return 0; - list_add_tail(&buf->dirty, &root->fs_info->trans); - buf->count++; + mark_buffer_dirty(buf); return 0; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - if (!list_empty(&buf->dirty)) { - list_del_init(&buf->dirty); - btrfs_block_release(root, buf); - } + clear_buffer_dirty(buf); return 0; } int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_buffer *buf) + struct buffer_head *buf) { - u64 blocknr = buf->blocknr; - loff_t offset = blocknr * root->blocksize; - int ret; - - if (buf->blocknr != btrfs_header_blocknr(&buf->node.header)) - BUG(); - ret = pwrite(root->fs_info->fp, &buf->node, root->blocksize, offset); - if (ret != root->blocksize) - return ret; + mark_buffer_dirty(buf); return 0; } static int __commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - struct btrfs_buffer *b; - int ret = 0; - int wret; - while(!list_empty(&root->fs_info->trans)) { - b = list_entry(root->fs_info->trans.next, struct btrfs_buffer, - dirty); - list_del_init(&b->dirty); - wret = write_tree_block(trans, root, b); - if (wret) - ret = wret; - btrfs_block_release(root, b); - } - return ret; + filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); + return 0; } static int commit_tree_roots(struct btrfs_trans_handle *trans, @@ -172,17 +74,17 @@ static int commit_tree_roots(struct btrfs_trans_handle *trans, struct btrfs_root *inode_root = fs_info->inode_root; btrfs_set_root_blocknr(&inode_root->root_item, - inode_root->node->blocknr); + inode_root->node->b_blocknr); ret = btrfs_update_root(trans, tree_root, &inode_root->root_key, &inode_root->root_item); BUG_ON(ret); while(1) { old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == extent_root->node->blocknr) + if (old_extent_block == extent_root->node->b_blocknr) break; btrfs_set_root_blocknr(&extent_root->root_item, - extent_root->node->blocknr); + extent_root->node->b_blocknr); ret = btrfs_update_root(trans, tree_root, &extent_root->root_key, &extent_root->root_item); @@ -195,7 +97,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { int ret = 0; - struct btrfs_buffer *snap = root->commit_root; + struct buffer_head *snap = root->commit_root; struct btrfs_key snap_key; if (root->commit_root == root->node) @@ -204,7 +106,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct memcpy(&snap_key, &root->root_key, sizeof(snap_key)); root->root_key.offset++; - btrfs_set_root_blocknr(&root->root_item, root->node->blocknr); + btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); ret = btrfs_insert_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); BUG_ON(ret); @@ -220,7 +122,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct btrfs_finish_extent_commit(trans, root->fs_info->tree_root); root->commit_root = root->node; - root->node->count++; + get_bh(root->node); ret = btrfs_drop_snapshot(trans, root, snap); BUG_ON(ret); @@ -234,7 +136,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct static int __setup_root(struct btrfs_super_block *super, struct btrfs_root *root, struct btrfs_fs_info *fs_info, - u64 objectid, int fp) + u64 objectid) { root->node = NULL; root->commit_root = NULL; @@ -250,11 +152,11 @@ static int find_and_setup_root(struct btrfs_super_block *super, struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, - struct btrfs_root *root, int fp) + struct btrfs_root *root) { int ret; - __setup_root(super, root, fs_info, objectid, fp); + __setup_root(super, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -265,32 +167,26 @@ static int find_and_setup_root(struct btrfs_super_block *super, return 0; } -struct btrfs_root *open_ctree(char *filename, struct btrfs_super_block *super) +struct btrfs_root *open_ctree(struct super_block *sb, + struct buffer_head *sb_buffer, + struct btrfs_super_block *disk_super) { - int fp; - - fp = open(filename, O_CREAT | O_RDWR, 0600); - if (fp < 0) { - return NULL; - } - return open_ctree_fd(fp, super); -} - -struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) -{ - struct btrfs_root *root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_root *inode_root = malloc(sizeof(struct btrfs_root)); - struct btrfs_fs_info *fs_info = malloc(sizeof(*fs_info)); + struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *inode_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), + GFP_NOFS); int ret; - INIT_RADIX_TREE(&fs_info->cache_radix, GFP_KERNEL); + /* FIXME: don't be stupid */ + if (!btrfs_super_root(disk_super)) + return NULL; INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); - INIT_LIST_HEAD(&fs_info->trans); - INIT_LIST_HEAD(&fs_info->cache); - fs_info->cache_size = 0; - fs_info->fp = fp; fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -298,36 +194,31 @@ struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; - fs_info->disk_super = super; + fs_info->disk_super = disk_super; + fs_info->sb_buffer = sb_buffer; + fs_info->sb = sb; memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); - ret = pread(fp, super, sizeof(struct btrfs_super_block), - BTRFS_SUPER_INFO_OFFSET); - if (ret == 0 || btrfs_super_root(super) == 0) { - BUG(); - return NULL; - } - BUG_ON(ret < 0); - - __setup_root(super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID, fp); - tree_root->node = read_tree_block(tree_root, btrfs_super_root(super)); + __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + tree_root->node = read_tree_block(tree_root, + btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_EXTENT_TREE_OBJECTID, extent_root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_INODE_MAP_OBJECTID, inode_root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(super, tree_root, fs_info, - BTRFS_FS_TREE_OBJECTID, root, fp); + ret = find_and_setup_root(disk_super, tree_root, fs_info, + BTRFS_FS_TREE_OBJECTID, root); BUG_ON(ret); root->commit_root = root->node; - root->node->count++; + get_bh(root->node); root->ref_cows = 1; root->fs_info->generation = root->root_key.offset + 1; return root; @@ -336,8 +227,11 @@ struct btrfs_root *open_ctree_fd(int fp, struct btrfs_super_block *super) int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_super_block *s) { + return 0; +#if 0 int ret; - btrfs_set_super_root(s, root->fs_info->tree_root->node->blocknr); + btrfs_set_super_root(s, root->fs_info->tree_root->node->b_blocknr); + ret = pwrite(root->fs_info->fp, s, sizeof(*s), BTRFS_SUPER_INFO_OFFSET); if (ret != sizeof(*s)) { @@ -345,35 +239,38 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return ret; } return 0; +#endif } static int drop_cache(struct btrfs_root *root) { + return 0; +#if 0 while(!list_empty(&root->fs_info->cache)) { - struct btrfs_buffer *b = list_entry(root->fs_info->cache.next, - struct btrfs_buffer, + struct buffer_head *b = list_entry(root->fs_info->cache.next, + struct buffer_head, cache); list_del_init(&b->cache); btrfs_block_release(root, b); } return 0; +#endif } -int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) + +int close_ctree(struct btrfs_root *root) { int ret; struct btrfs_trans_handle *trans; trans = root->fs_info->running_transaction; - btrfs_commit_transaction(trans, root, s); + btrfs_commit_transaction(trans, root, root->fs_info->disk_super); ret = commit_tree_roots(trans, root->fs_info); BUG_ON(ret); ret = __commit_transaction(trans, root); BUG_ON(ret); - write_ctree_super(trans, root, s); + write_ctree_super(trans, root, root->fs_info->disk_super); drop_cache(root); - BUG_ON(!list_empty(&root->fs_info->trans)); - close(root->fs_info->fp); if (root->node) btrfs_block_release(root, root->node); if (root->fs_info->extent_root->node) @@ -386,29 +283,17 @@ int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s) btrfs_block_release(root->fs_info->tree_root, root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); - free(root); - printf("on close %d blocks are allocated\n", allocated_blocks); + btrfs_block_release(root, root->fs_info->sb_buffer); + kfree(root->fs_info->extent_root); + kfree(root->fs_info->inode_root); + kfree(root->fs_info->tree_root); + kfree(root->fs_info); + kfree(root); return 0; } -void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf) +void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - buf->count--; - if (buf->count < 0) - BUG(); - if (buf->count == 0) { - BUG_ON(!list_empty(&buf->cache)); - BUG_ON(!list_empty(&buf->dirty)); - if (!radix_tree_lookup(&root->fs_info->cache_radix, - buf->blocknr)) - BUG(); - radix_tree_delete(&root->fs_info->cache_radix, buf->blocknr); - memset(buf, 0, sizeof(*buf)); - free(buf); - BUG_ON(allocated_blocks == 0); - allocated_blocks--; - BUG_ON(root->fs_info->cache_size == 0); - root->fs_info->cache_size--; - } + brelse(buf); } -- cgit v1.2.3 From 79154b1b5bcf87903db7ff16a30b360b78d6fe4f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 22 Mar 2007 15:59:16 -0400 Subject: Btrfs: transaction rework Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 115 +++++------------------------------------------------ 1 file changed, 9 insertions(+), 106 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index df2061a735cd..9cacca0c525c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -50,89 +50,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -int write_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) -{ - mark_buffer_dirty(buf); - return 0; -} - -static int __commit_transaction(struct btrfs_trans_handle *trans, struct - btrfs_root *root) -{ - filemap_write_and_wait(root->fs_info->sb->s_bdev->bd_inode->i_mapping); - return 0; -} - -static int commit_tree_roots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - int ret; - u64 old_extent_block; - struct btrfs_root *tree_root = fs_info->tree_root; - struct btrfs_root *extent_root = fs_info->extent_root; - struct btrfs_root *inode_root = fs_info->inode_root; - - btrfs_set_root_blocknr(&inode_root->root_item, - inode_root->node->b_blocknr); - ret = btrfs_update_root(trans, tree_root, - &inode_root->root_key, - &inode_root->root_item); - BUG_ON(ret); - while(1) { - old_extent_block = btrfs_root_blocknr(&extent_root->root_item); - if (old_extent_block == extent_root->node->b_blocknr) - break; - btrfs_set_root_blocknr(&extent_root->root_item, - extent_root->node->b_blocknr); - ret = btrfs_update_root(trans, tree_root, - &extent_root->root_key, - &extent_root->root_item); - BUG_ON(ret); - } - return 0; -} - -int btrfs_commit_transaction(struct btrfs_trans_handle *trans, struct - btrfs_root *root, struct btrfs_super_block *s) -{ - int ret = 0; - struct buffer_head *snap = root->commit_root; - struct btrfs_key snap_key; - - if (root->commit_root == root->node) - return 0; - - memcpy(&snap_key, &root->root_key, sizeof(snap_key)); - root->root_key.offset++; - - btrfs_set_root_blocknr(&root->root_item, root->node->b_blocknr); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, - &root->root_key, &root->root_item); - BUG_ON(ret); - - ret = commit_tree_roots(trans, root->fs_info); - BUG_ON(ret); - - ret = __commit_transaction(trans, root); - BUG_ON(ret); - - write_ctree_super(trans, root, s); - btrfs_finish_extent_commit(trans, root->fs_info->extent_root); - btrfs_finish_extent_commit(trans, root->fs_info->tree_root); - - root->commit_root = root->node; - get_bh(root->node); - ret = btrfs_drop_snapshot(trans, root, snap); - BUG_ON(ret); - - ret = btrfs_del_root(trans, root->fs_info->tree_root, &snap_key); - BUG_ON(ret); - root->fs_info->generation = root->root_key.offset + 1; - - return ret; -} - static int __setup_root(struct btrfs_super_block *super, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -197,6 +114,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->disk_super = disk_super; fs_info->sb_buffer = sb_buffer; fs_info->sb = sb; + mutex_init(&fs_info->trans_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); @@ -225,7 +143,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, } int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root - *root, struct btrfs_super_block *s) + *root) { return 0; #if 0 @@ -242,34 +160,19 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root #endif } -static int drop_cache(struct btrfs_root *root) -{ - return 0; -#if 0 - while(!list_empty(&root->fs_info->cache)) { - struct buffer_head *b = list_entry(root->fs_info->cache.next, - struct buffer_head, - cache); - list_del_init(&b->cache); - btrfs_block_release(root, b); - } - return 0; -#endif -} - int close_ctree(struct btrfs_root *root) { int ret; struct btrfs_trans_handle *trans; - trans = root->fs_info->running_transaction; - btrfs_commit_transaction(trans, root, root->fs_info->disk_super); - ret = commit_tree_roots(trans, root->fs_info); - BUG_ON(ret); - ret = __commit_transaction(trans, root); + trans = btrfs_start_transaction(root, 1); + btrfs_commit_transaction(trans, root); + /* run commit again to drop the original snapshot */ + trans = btrfs_start_transaction(root, 1); + btrfs_commit_transaction(trans, root); + ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); - write_ctree_super(trans, root, root->fs_info->disk_super); - drop_cache(root); + write_ctree_super(NULL, root); if (root->node) btrfs_block_release(root, root->node); -- cgit v1.2.3 From d571976292839cec05a2820b08f7629b145ed157 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 10:01:08 -0400 Subject: btrfs_create, btrfs_write_super, btrfs_sync_fs Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9cacca0c525c..8e1dcda0839c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -145,19 +145,20 @@ struct btrfs_root *open_ctree(struct super_block *sb, int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { - return 0; -#if 0 - int ret; - btrfs_set_super_root(s, root->fs_info->tree_root->node->b_blocknr); - - ret = pwrite(root->fs_info->fp, s, sizeof(*s), - BTRFS_SUPER_INFO_OFFSET); - if (ret != sizeof(*s)) { - fprintf(stderr, "failed to write new super block err %d\n", ret); - return ret; + struct buffer_head *bh = root->fs_info->sb_buffer; + btrfs_set_super_root(root->fs_info->disk_super, + root->fs_info->tree_root->node->b_blocknr); + lock_buffer(bh); + clear_buffer_dirty(bh); + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(WRITE, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) { + WARN_ON(1); + return -EIO; } return 0; -#endif } int close_ctree(struct btrfs_root *root) -- cgit v1.2.3 From df2ce34c8801af8e00c7f82435fcb1e4b45a2759 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 11:00:45 -0400 Subject: Btrfs: properly set new buffers for new blocks up to date Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e1dcda0839c..2f1eb4ae4e1d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -10,8 +10,13 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) BUG(); if (root->node && btrfs_header_parentid(&node->header) != - btrfs_header_parentid(btrfs_buffer_header(root->node))) - BUG(); + btrfs_header_parentid(btrfs_buffer_header(root->node))) { + printk("block %Lu parentids don't match buf %Lu, root %Lu\n", + buf->b_blocknr, + btrfs_header_parentid(&node->header), + btrfs_header_parentid(btrfs_buffer_header(root->node))); + WARN_ON(1); + } return 0; } -- cgit v1.2.3 From 7f5c15160e1436a53d01f9190db11c2a3a4d788a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 15:56:19 -0400 Subject: Add generation number to btrfs_header, readdir fixes, hash collision fixes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2f1eb4ae4e1d..3dea757d4cbb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -11,11 +11,7 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) BUG(); if (root->node && btrfs_header_parentid(&node->header) != btrfs_header_parentid(btrfs_buffer_header(root->node))) { - printk("block %Lu parentids don't match buf %Lu, root %Lu\n", - buf->b_blocknr, - btrfs_header_parentid(&node->header), - btrfs_header_parentid(btrfs_buffer_header(root->node))); - WARN_ON(1); + BUG(); } return 0; } -- cgit v1.2.3 From d561c025ee3a0aee53a225bedce0ee35fe12f335 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 23 Mar 2007 19:47:49 -0400 Subject: Btrfs: very minimal locking Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3dea757d4cbb..e32ddff55b0e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -116,6 +116,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb_buffer = sb_buffer; fs_info->sb = sb; mutex_init(&fs_info->trans_mutex); + mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); -- cgit v1.2.3 From 8ef97622caa2d5f78d1dc58ab918e2fbfa9b357a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Mar 2007 10:15:30 -0400 Subject: Btrfs: add a radix back bit tree Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e32ddff55b0e..758a62aba063 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -104,7 +104,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, /* FIXME: don't be stupid */ if (!btrfs_super_root(disk_super)) return NULL; - INIT_RADIX_TREE(&fs_info->pinned_radix, GFP_KERNEL); + init_bit_radix(&fs_info->pinned_radix); + init_bit_radix(&fs_info->pending_del_radix); fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; -- cgit v1.2.3 From d98237b3ede7ab98892f7fa62201a13694c526e2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Mar 2007 13:57:48 -0400 Subject: Btrfs: use a btree inode instead of sb_getblk Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 174 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 161 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 758a62aba063..970103f2cacd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,14 +1,17 @@ #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" + static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); - if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) + if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) { BUG(); + } if (root->node && btrfs_header_parentid(&node->header) != btrfs_header_parentid(btrfs_buffer_header(root->node))) { BUG(); @@ -16,25 +19,154 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) return 0; } -struct buffer_head *alloc_tree_block(struct btrfs_root *root, u64 blocknr) +struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) +{ + struct address_space *mapping = root->fs_info->btree_inode->i_mapping; + int blockbits = root->fs_info->sb->s_blocksize_bits; + unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); + struct page *page; + struct buffer_head *bh; + struct buffer_head *head; + struct buffer_head *ret = NULL; + + page = find_lock_page(mapping, index); + if (!page) + return NULL; + + if (!page_has_buffers(page)) + goto out_unlock; + + head = page_buffers(page); + bh = head; + do { + if (buffer_mapped(bh) && bh->b_blocknr == blocknr) { + ret = bh; + get_bh(bh); + goto out_unlock; + } + bh = bh->b_this_page; + } while (bh != head); +out_unlock: + unlock_page(page); + page_cache_release(page); + return ret; +} + +struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr) +{ + struct address_space *mapping = root->fs_info->btree_inode->i_mapping; + int blockbits = root->fs_info->sb->s_blocksize_bits; + unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); + struct page *page; + struct buffer_head *bh; + struct buffer_head *head; + struct buffer_head *ret = NULL; + u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + page = grab_cache_page(mapping, index); + if (!page) + return NULL; + + wait_on_page_writeback(page); + if (!page_has_buffers(page)) + create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0); + head = page_buffers(page); + bh = head; + do { + if (!buffer_mapped(bh)) { + bh->b_bdev = root->fs_info->sb->s_bdev; + bh->b_blocknr = first_block; + set_buffer_mapped(bh); + } + if (bh->b_blocknr == blocknr) { + ret = bh; + get_bh(bh); + goto out_unlock; + } + bh = bh->b_this_page; + first_block++; + } while (bh != head); +out_unlock: + unlock_page(page); + page_cache_release(page); + return ret; +} + +static sector_t max_block(struct block_device *bdev) +{ + sector_t retval = ~((sector_t)0); + loff_t sz = i_size_read(bdev->bd_inode); + + if (sz) { + unsigned int size = block_size(bdev); + unsigned int sizebits = blksize_bits(size); + retval = (sz >> sizebits); + } + return retval; +} + +static int btree_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh, int create) +{ + if (iblock >= max_block(inode->i_sb->s_bdev)) { + if (create) + return -EIO; + + /* + * for reads, we're just trying to fill a partial page. + * return a hole, they will have to call get_block again + * before they can fill it, and they will get -EIO at that + * time + */ + return 0; + } + bh->b_bdev = inode->i_sb->s_bdev; + bh->b_blocknr = iblock; + set_buffer_mapped(bh); + return 0; +} + +static int btree_writepage(struct page *page, struct writeback_control *wbc) { - return sb_getblk(root->fs_info->sb, blocknr); + return block_write_full_page(page, btree_get_block, wbc); } -struct buffer_head *find_tree_block(struct btrfs_root *root, u64 blocknr) +static int btree_readpage(struct file * file, struct page * page) { - return sb_getblk(root->fs_info->sb, blocknr); + return block_read_full_page(page, btree_get_block); } +static struct address_space_operations btree_aops = { + .readpage = btree_readpage, + .writepage = btree_writepage, + .sync_page = block_sync_page, +}; + struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *buf = sb_bread(root->fs_info->sb, blocknr); + struct buffer_head *bh = NULL; - if (!buf) - return buf; - if (check_tree_block(root, buf)) + bh = btrfs_find_create_tree_block(root, blocknr); + if (!bh) + return bh; + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh); + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + goto fail; + } else { + unlock_buffer(bh); + } + if (check_tree_block(root, bh)) BUG(); - return buf; + return bh; +fail: + brelse(bh); + return NULL; + } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -101,11 +233,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); int ret; - /* FIXME: don't be stupid */ if (!btrfs_super_root(disk_super)) return NULL; init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); + sb_set_blocksize(sb, sb_buffer->b_size); fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -114,14 +246,30 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; fs_info->disk_super = disk_super; - fs_info->sb_buffer = sb_buffer; fs_info->sb = sb; + fs_info->btree_inode = new_inode(sb); + fs_info->btree_inode->i_ino = 1; + fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; + fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); + mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + + fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr); + + if (!fs_info->sb_buffer) + return NULL; + + brelse(sb_buffer); + sb_buffer = NULL; + disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + fs_info->disk_super = disk_super; + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -137,7 +285,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = find_and_setup_root(disk_super, tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); BUG_ON(ret); - root->commit_root = root->node; get_bh(root->node); root->ref_cows = 1; @@ -191,6 +338,7 @@ int close_ctree(struct btrfs_root *root) root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); + iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); -- cgit v1.2.3 From 87cbda5c1f54aba67b1e318a4a4824c9e9f16324 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 28 Mar 2007 19:44:27 -0400 Subject: Btrfs: sha256 csums on metadata Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 970103f2cacd..2afb7922b062 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -126,8 +128,51 @@ static int btree_get_block(struct inode *inode, sector_t iblock, return 0; } +static int csum_tree_block(struct btrfs_root * root, struct buffer_head *bh, + int verify) +{ + struct btrfs_node *node = btrfs_buffer_node(bh); + struct scatterlist sg; + struct crypto_hash *tfm = root->fs_info->hash_tfm; + struct hash_desc desc; + int ret; + char result[32]; + + desc.tfm = tfm; + desc.flags = 0; + sg_init_one(&sg, bh->b_data + 32, bh->b_size - 32); + spin_lock(&root->fs_info->hash_lock); + ret = crypto_hash_digest(&desc, &sg, bh->b_size - 32, result); + spin_unlock(&root->fs_info->hash_lock); + if (ret) { + printk("sha256 digest failed\n"); + } + if (verify) { + if (memcmp(node->header.csum, result, sizeof(result))) + printk("csum verify failed on %Lu\n", bh->b_blocknr); + return -EINVAL; + } else + memcpy(node->header.csum, result, sizeof(node->header.csum)); + return 0; +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { + struct buffer_head *bh; + struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); + struct buffer_head *head; + + if (!page_has_buffers(page)) { + create_empty_buffers(page, root->fs_info->sb->s_blocksize, + (1 << BH_Dirty)|(1 << BH_Uptodate)); + } + head = page_buffers(page); + bh = head; + do { + if (buffer_dirty(bh)) + csum_tree_block(root, bh, 0); + bh = bh->b_this_page; + } while (bh != head); return block_write_full_page(page, btree_get_block, wbc); } @@ -157,6 +202,7 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto fail; + csum_tree_block(root, bh, 1); } else { unlock_buffer(bh); } @@ -233,8 +279,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); int ret; - if (!btrfs_super_root(disk_super)) + if (!btrfs_super_root(disk_super)) { return NULL; + } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); sb_set_blocksize(sb, sb_buffer->b_size); @@ -252,6 +299,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); + fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); + if (!fs_info->hash_tfm) { + printk("failed to allocate sha256 hash\n"); + return NULL; + } + spin_lock_init(&fs_info->hash_lock); mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); @@ -262,9 +315,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr); - if (!fs_info->sb_buffer) + if (!fs_info->sb_buffer) { +printk("failed2\n"); return NULL; - + } brelse(sb_buffer); sb_buffer = NULL; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; @@ -300,6 +354,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root root->fs_info->tree_root->node->b_blocknr); lock_buffer(bh); clear_buffer_dirty(bh); + csum_tree_block(root, bh, 0); bh->b_end_io = end_buffer_write_sync; get_bh(bh); submit_bh(WRITE, bh); @@ -338,6 +393,7 @@ int close_ctree(struct btrfs_root *root) root->fs_info->tree_root->node); btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); + crypto_free_hash(root->fs_info->hash_tfm); iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); -- cgit v1.2.3 From 30ae8467483d7ab023b5e728bf7d74a575c78023 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 29 Mar 2007 09:59:15 -0400 Subject: Btrfs: btree address space fixes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2afb7922b062..f2c1dd40c087 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -69,7 +69,6 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, if (!page) return NULL; - wait_on_page_writeback(page); if (!page_has_buffers(page)) create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0); head = page_buffers(page); @@ -300,11 +299,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping->a_ops = &btree_aops; mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); - if (!fs_info->hash_tfm) { + spin_lock_init(&fs_info->hash_lock); + + if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { printk("failed to allocate sha256 hash\n"); return NULL; } - spin_lock_init(&fs_info->hash_lock); mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); @@ -394,6 +394,7 @@ int close_ctree(struct btrfs_root *root) btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); crypto_free_hash(root->fs_info->hash_tfm); + truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); -- cgit v1.2.3 From f254e52c1ce550fdaa0d31f5e068f0d67c2485d4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 29 Mar 2007 15:15:27 -0400 Subject: Btrfs: verify csums on read Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f2c1dd40c087..f5db2b936502 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -127,31 +127,46 @@ static int btree_get_block(struct inode *inode, sector_t iblock, return 0; } -static int csum_tree_block(struct btrfs_root * root, struct buffer_head *bh, - int verify) +int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, + char *result) { - struct btrfs_node *node = btrfs_buffer_node(bh); struct scatterlist sg; struct crypto_hash *tfm = root->fs_info->hash_tfm; struct hash_desc desc; int ret; - char result[32]; desc.tfm = tfm; desc.flags = 0; - sg_init_one(&sg, bh->b_data + 32, bh->b_size - 32); + sg_init_one(&sg, data, len); spin_lock(&root->fs_info->hash_lock); - ret = crypto_hash_digest(&desc, &sg, bh->b_size - 32, result); + ret = crypto_hash_digest(&desc, &sg, len, result); spin_unlock(&root->fs_info->hash_lock); if (ret) { printk("sha256 digest failed\n"); } + return ret; +} +static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, + int verify) +{ + char result[BTRFS_CSUM_SIZE]; + int ret; + struct btrfs_node *node; + + ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, + bh->b_size - BTRFS_CSUM_SIZE, result); + if (ret) + return ret; if (verify) { - if (memcmp(node->header.csum, result, sizeof(result))) - printk("csum verify failed on %Lu\n", bh->b_blocknr); - return -EINVAL; - } else - memcpy(node->header.csum, result, sizeof(node->header.csum)); + if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) { + printk("checksum verify failed on %lu\n", + bh->b_blocknr); + return 1; + } + } else { + node = btrfs_buffer_node(bh); + memcpy(&node->header.csum, result, BTRFS_CSUM_SIZE); + } return 0; } -- cgit v1.2.3 From 22b0ebda6c63a1ad66b6a9e806bd226a4a03a049 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 30 Mar 2007 08:47:31 -0400 Subject: Btrfs: hunting slab corruption Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f5db2b936502..6c010463b9db 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -50,6 +51,8 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) } while (bh != head); out_unlock: unlock_page(page); + if (ret) + touch_buffer(ret); page_cache_release(page); return ret; } @@ -65,6 +68,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, struct buffer_head *head; struct buffer_head *ret = NULL; u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + page = grab_cache_page(mapping, index); if (!page) return NULL; @@ -89,6 +93,8 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, } while (bh != head); out_unlock: unlock_page(page); + if (ret) + touch_buffer(ret); page_cache_release(page); return ret; } @@ -139,7 +145,7 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, desc.flags = 0; sg_init_one(&sg, data, len); spin_lock(&root->fs_info->hash_lock); - ret = crypto_hash_digest(&desc, &sg, len, result); + ret = crypto_hash_digest(&desc, &sg, 1, result); spin_unlock(&root->fs_info->hash_lock); if (ret) { printk("sha256 digest failed\n"); @@ -153,6 +159,7 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, int ret; struct btrfs_node *node; + return 0; ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, bh->b_size - BTRFS_CSUM_SIZE, result); if (ret) @@ -165,17 +172,17 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, } } else { node = btrfs_buffer_node(bh); - memcpy(&node->header.csum, result, BTRFS_CSUM_SIZE); + memcpy(node->header.csum, result, BTRFS_CSUM_SIZE); } return 0; } static int btree_writepage(struct page *page, struct writeback_control *wbc) { +#if 0 struct buffer_head *bh; struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); struct buffer_head *head; - if (!page_has_buffers(page)) { create_empty_buffers(page, root->fs_info->sb->s_blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -187,6 +194,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) csum_tree_block(root, bh, 0); bh = bh->b_this_page; } while (bh != head); +#endif return block_write_full_page(page, btree_get_block, wbc); } @@ -312,6 +320,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + insert_inode_hash(fs_info->btree_inode); + mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); -- cgit v1.2.3 From d6025579531b7ea170ba283b171ff7a6bf7d0e12 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 30 Mar 2007 14:27:56 -0400 Subject: Btrfs: corruption hunt continues Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c010463b9db..bb133104e2e9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -8,6 +8,17 @@ #include "disk-io.h" #include "transaction.h" +#define PATTERN 0xDEADBEEFUL +static inline void check_pattern(struct buffer_head *buf) +{ + if (buf->b_private != (void *)PATTERN) + WARN_ON(1); +} + +static inline void set_pattern(struct buffer_head *buf) +{ + buf->b_private = (void *)PATTERN; +} static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { @@ -51,8 +62,10 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) } while (bh != head); out_unlock: unlock_page(page); - if (ret) + if (ret) { touch_buffer(ret); + check_pattern(ret); + } page_cache_release(page); return ret; } @@ -82,6 +95,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh->b_bdev = root->fs_info->sb->s_bdev; bh->b_blocknr = first_block; set_buffer_mapped(bh); + set_pattern(bh); } if (bh->b_blocknr == blocknr) { ret = bh; @@ -225,6 +239,7 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) if (!buffer_uptodate(bh)) goto fail; csum_tree_block(root, bh, 1); + set_pattern(bh); } else { unlock_buffer(bh); } @@ -240,6 +255,7 @@ fail: int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { + WARN_ON(atomic_read(&buf->b_count) == 0); mark_buffer_dirty(buf); return 0; } @@ -247,6 +263,7 @@ int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { + WARN_ON(atomic_read(&buf->b_count) == 0); clear_buffer_dirty(buf); return 0; } @@ -431,6 +448,7 @@ int close_ctree(struct btrfs_root *root) void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { + check_pattern(buf); brelse(buf); } -- cgit v1.2.3 From 2c90e5d658424bc71b111eb5a972240d5d06fe86 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 10:50:19 -0400 Subject: Btrfs: still corruption hunting Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 84 ++++++++++++++++++++++++++---------------------------- 1 file changed, 41 insertions(+), 43 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bb133104e2e9..2dbd55084a4e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -8,18 +8,6 @@ #include "disk-io.h" #include "transaction.h" -#define PATTERN 0xDEADBEEFUL -static inline void check_pattern(struct buffer_head *buf) -{ - if (buf->b_private != (void *)PATTERN) - WARN_ON(1); -} - -static inline void set_pattern(struct buffer_head *buf) -{ - buf->b_private = (void *)PATTERN; -} - static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); @@ -35,6 +23,8 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_find_get_block(root->fs_info->sb, blocknr); +#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -43,6 +33,7 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) struct buffer_head *head; struct buffer_head *ret = NULL; + page = find_lock_page(mapping, index); if (!page) return NULL; @@ -64,15 +55,17 @@ out_unlock: unlock_page(page); if (ret) { touch_buffer(ret); - check_pattern(ret); } page_cache_release(page); return ret; +#endif } struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_getblk(root->fs_info->sb, blocknr); +#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -95,7 +88,6 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh->b_bdev = root->fs_info->sb->s_bdev; bh->b_blocknr = first_block; set_buffer_mapped(bh); - set_pattern(bh); } if (bh->b_blocknr == blocknr) { ret = bh; @@ -111,6 +103,7 @@ out_unlock: touch_buffer(ret); page_cache_release(page); return ret; +#endif } static sector_t max_block(struct block_device *bdev) @@ -225,6 +218,8 @@ static struct address_space_operations btree_aops = { struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { + return sb_bread(root->fs_info->sb, blocknr); +#if 0 struct buffer_head *bh = NULL; bh = btrfs_find_create_tree_block(root, blocknr); @@ -239,7 +234,6 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) if (!buffer_uptodate(bh)) goto fail; csum_tree_block(root, bh, 1); - set_pattern(bh); } else { unlock_buffer(bh); } @@ -250,6 +244,7 @@ fail: brelse(bh); return NULL; +#endif } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -268,14 +263,14 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -static int __setup_root(struct btrfs_super_block *super, +static int __setup_root(int blocksize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { root->node = NULL; root->commit_root = NULL; - root->blocksize = btrfs_super_blocksize(super); + root->blocksize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; memset(&root->root_key, 0, sizeof(root->root_key)); @@ -283,7 +278,7 @@ static int __setup_root(struct btrfs_super_block *super, return 0; } -static int find_and_setup_root(struct btrfs_super_block *super, +static int find_and_setup_root(int blocksize, struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, @@ -291,7 +286,7 @@ static int find_and_setup_root(struct btrfs_super_block *super, { int ret; - __setup_root(super, root, fs_info, objectid); + __setup_root(blocksize, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -302,9 +297,7 @@ static int find_and_setup_root(struct btrfs_super_block *super, return 0; } -struct btrfs_root *open_ctree(struct super_block *sb, - struct buffer_head *sb_buffer, - struct btrfs_super_block *disk_super) +struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -317,13 +310,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; + struct btrfs_super_block *disk_super; - if (!btrfs_super_root(disk_super)) { - return NULL; - } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); - sb_set_blocksize(sb, sb_buffer->b_size); + sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->fs_root = root; fs_info->tree_root = tree_root; @@ -331,55 +322,59 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; - fs_info->disk_super = disk_super; fs_info->sb = sb; + fs_info->btree_inode = NULL; +#if 0 fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; + fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; insert_inode_hash(fs_info->btree_inode); - mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); +#endif fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); - if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { printk("failed to allocate sha256 hash\n"); return NULL; } - mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); - __setup_root(disk_super, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - - fs_info->sb_buffer = read_tree_block(tree_root, sb_buffer->b_blocknr); + __setup_root(sb->s_blocksize, tree_root, + fs_info, BTRFS_ROOT_TREE_OBJECTID); + fs_info->sb_buffer = read_tree_block(tree_root, + BTRFS_SUPER_INFO_OFFSET / + sb->s_blocksize); if (!fs_info->sb_buffer) { printk("failed2\n"); return NULL; } - brelse(sb_buffer); - sb_buffer = NULL; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + if (!btrfs_super_root(disk_super)) { + return NULL; + } fs_info->disk_super = disk_super; - tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + mutex_lock(&fs_info->fs_mutex); + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(disk_super, tree_root, fs_info, + ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); + mutex_unlock(&fs_info->fs_mutex); BUG_ON(ret); root->commit_root = root->node; get_bh(root->node); @@ -392,9 +387,11 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct buffer_head *bh = root->fs_info->sb_buffer; + btrfs_set_super_root(root->fs_info->disk_super, root->fs_info->tree_root->node->b_blocknr); lock_buffer(bh); + WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); csum_tree_block(root, bh, 0); bh->b_end_io = end_buffer_write_sync; @@ -413,6 +410,7 @@ int close_ctree(struct btrfs_root *root) int ret; struct btrfs_trans_handle *trans; + mutex_lock(&root->fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -421,6 +419,7 @@ int close_ctree(struct btrfs_root *root) ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); write_ctree_super(NULL, root); + mutex_unlock(&root->fs_info->fs_mutex); if (root->node) btrfs_block_release(root, root->node); @@ -436,8 +435,8 @@ int close_ctree(struct btrfs_root *root) btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); crypto_free_hash(root->fs_info->hash_tfm); - truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); - iput(root->fs_info->btree_inode); + // truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); + // iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); @@ -448,7 +447,6 @@ int close_ctree(struct btrfs_root *root) void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - check_pattern(buf); - brelse(buf); + // brelse(buf); } -- cgit v1.2.3 From 7cfcc17e7f2334a0503331264c1340de4e564c85 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 14:53:59 -0400 Subject: Btrfs: corruptions fixed Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2dbd55084a4e..a227e94b9e88 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,8 +23,6 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { - return sb_find_get_block(root->fs_info->sb, blocknr); -#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -58,14 +56,11 @@ out_unlock: } page_cache_release(page); return ret; -#endif } struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { - return sb_getblk(root->fs_info->sb, blocknr); -#if 0 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; int blockbits = root->fs_info->sb->s_blocksize_bits; unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); @@ -103,7 +98,6 @@ out_unlock: touch_buffer(ret); page_cache_release(page); return ret; -#endif } static sector_t max_block(struct block_device *bdev) @@ -186,7 +180,6 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, static int btree_writepage(struct page *page, struct writeback_control *wbc) { -#if 0 struct buffer_head *bh; struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); struct buffer_head *head; @@ -201,7 +194,6 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) csum_tree_block(root, bh, 0); bh = bh->b_this_page; } while (bh != head); -#endif return block_write_full_page(page, btree_get_block, wbc); } @@ -218,8 +210,6 @@ static struct address_space_operations btree_aops = { struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { - return sb_bread(root->fs_info->sb, blocknr); -#if 0 struct buffer_head *bh = NULL; bh = btrfs_find_create_tree_block(root, blocknr); @@ -243,8 +233,6 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) fail: brelse(bh); return NULL; - -#endif } int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -323,8 +311,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->last_inode_alloc = 0; fs_info->last_inode_alloc_dirid = 0; fs_info->sb = sb; - fs_info->btree_inode = NULL; -#if 0 fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; @@ -332,7 +318,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_mapping->a_ops = &btree_aops; insert_inode_hash(fs_info->btree_inode); mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); -#endif fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { @@ -435,8 +420,8 @@ int close_ctree(struct btrfs_root *root) btrfs_block_release(root, root->commit_root); btrfs_block_release(root, root->fs_info->sb_buffer); crypto_free_hash(root->fs_info->hash_tfm); - // truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); - // iput(root->fs_info->btree_inode); + truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); + iput(root->fs_info->btree_inode); kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); @@ -447,6 +432,6 @@ int close_ctree(struct btrfs_root *root) void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { - // brelse(buf); + brelse(buf); } -- cgit v1.2.3 From 2da566edd8ce32ae2952b863ee634bcc2e7d63c1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 2 Apr 2007 15:43:21 -0400 Subject: Btrfs: csum_verify_file_block locking fix Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a227e94b9e88..39b117a57a54 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -160,7 +160,6 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, int ret; struct btrfs_node *node; - return 0; ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, bh->b_size - BTRFS_CSUM_SIZE, result); if (ret) -- cgit v1.2.3 From 9d64272cd3be2aae71f89f7ababbee90a7587b33 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 3 Apr 2007 11:43:19 -0400 Subject: Btrfs: don't wait on uptodate buffers on read Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 39b117a57a54..055cd45e802e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -214,6 +214,8 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) bh = btrfs_find_create_tree_block(root, blocknr); if (!bh) return bh; + if (buffer_uptodate(bh)) + goto uptodate; lock_buffer(bh); if (!buffer_uptodate(bh)) { get_bh(bh); @@ -226,6 +228,7 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) } else { unlock_buffer(bh); } +uptodate: if (check_tree_block(root, bh)) BUG(); return bh; -- cgit v1.2.3 From b1a4d96509a78ad234d94e0b914b289c60d2969d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 4 Apr 2007 15:27:52 -0400 Subject: Btrfs: tweak the inode-map and free extent search starts on cold mount Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 055cd45e802e..de9ee3aa0aad 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -311,7 +311,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; - fs_info->last_inode_alloc_dirid = 0; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; -- cgit v1.2.3 From 5be6f7f174146d91039a27ebb2f1b4ac599172b3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 5 Apr 2007 13:35:25 -0400 Subject: Btrfs: dirindex optimizations Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index de9ee3aa0aad..5230554380d1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -311,6 +311,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->inode_root = inode_root; fs_info->last_inode_alloc = 0; + fs_info->highest_inode = 0; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -360,12 +361,15 @@ printk("failed2\n"); ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_FS_TREE_OBJECTID, root); - mutex_unlock(&fs_info->fs_mutex); BUG_ON(ret); root->commit_root = root->node; get_bh(root->node); root->ref_cows = 1; root->fs_info->generation = root->root_key.offset + 1; + ret = btrfs_find_highest_inode(root, &root->fs_info->last_inode_alloc); + if (ret == 0) + fs_info->highest_inode = fs_info->last_inode_alloc; + mutex_unlock(&fs_info->fs_mutex); return root; } -- cgit v1.2.3 From d6e4a428eb8f92bbb3537ccabadfb1195efb432b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 6 Apr 2007 15:37:36 -0400 Subject: Btrfs: start of support for many FS volumes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5230554380d1..b9301a5e4608 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -338,7 +338,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) sb->s_blocksize); if (!fs_info->sb_buffer) { -printk("failed2\n"); return NULL; } disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; @@ -369,6 +368,10 @@ printk("failed2\n"); ret = btrfs_find_highest_inode(root, &root->fs_info->last_inode_alloc); if (ret == 0) fs_info->highest_inode = fs_info->last_inode_alloc; + memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); + kobj_set_kset_s(fs_info, btrfs_subsys); + kobject_set_name(&fs_info->kobj, "%s", sb->s_id); + kobject_register(&fs_info->kobj); mutex_unlock(&fs_info->fs_mutex); return root; } @@ -430,7 +433,7 @@ int close_ctree(struct btrfs_root *root) kfree(root->fs_info->extent_root); kfree(root->fs_info->inode_root); kfree(root->fs_info->tree_root); - kfree(root->fs_info); + kobject_unregister(&root->fs_info->kobj); kfree(root); return 0; } -- cgit v1.2.3 From 0f7d52f4431c530b4f39c524448c688bb7754de5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 9 Apr 2007 10:42:37 -0400 Subject: Btrfs: groundwork for subvolume and snapshot roots Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 178 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 137 insertions(+), 41 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b9301a5e4608..b557bdd1e26a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4,9 +4,11 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "btrfs_inode.h" static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { @@ -180,7 +182,7 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct buffer_head *bh; - struct btrfs_root *root = btrfs_sb(page->mapping->host->i_sb); + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; struct buffer_head *head; if (!page_has_buffers(page)) { create_empty_buffers(page, root->fs_info->sb->s_blocksize, @@ -259,10 +261,13 @@ static int __setup_root(int blocksize, u64 objectid) { root->node = NULL; + root->inode = NULL; root->commit_root = NULL; root->blocksize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; + root->objectid = objectid; + root->last_trans = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); return 0; @@ -287,10 +292,78 @@ static int find_and_setup_root(int blocksize, return 0; } +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + struct btrfs_root *root; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_path *path; + struct btrfs_leaf *l; + int ret = 0; + +printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags); + root = kmalloc(sizeof(*root), GFP_NOFS); + if (!root) { +printk("failed1\n"); + return ERR_PTR(-ENOMEM); + } + if (location->offset == (u64)-1) { + ret = find_and_setup_root(fs_info->sb->s_blocksize, + fs_info->tree_root, fs_info, + location->objectid, root); + if (ret) { +printk("failed2\n"); + kfree(root); + return ERR_PTR(ret); + } + goto insert; + } + + __setup_root(fs_info->sb->s_blocksize, root, fs_info, + location->objectid); + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); + if (ret != 0) { +printk("internal search_slot gives us %d\n", ret); + if (ret > 0) + ret = -ENOENT; + goto out; + } + l = btrfs_buffer_leaf(path->nodes[0]); + memcpy(&root->root_item, + btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item), + sizeof(root->root_item)); + memcpy(&root->root_key, location, sizeof(*location)); + ret = 0; +out: + btrfs_release_path(root, path); + btrfs_free_path(path); + if (ret) { + kfree(root); + return ERR_PTR(ret); + } + root->node = read_tree_block(root, + btrfs_root_blocknr(&root->root_item)); + BUG_ON(!root->node); +insert: +printk("inserting %p\n", root); + root->ref_cows = 1; + ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root, + root); + if (ret) { +printk("radix_tree_insert gives us %d\n", ret); + brelse(root->node); + kfree(root); + return ERR_PTR(ret); + } +printk("all worked\n"); + return root; +} + struct btrfs_root *open_ctree(struct super_block *sb) { - struct btrfs_root *root = kmalloc(sizeof(struct btrfs_root), - GFP_NOFS); struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -304,9 +377,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); + INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; - fs_info->fs_root = root; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->inode_root = inode_root; @@ -318,6 +391,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + BTRFS_I(fs_info->btree_inode)->root = tree_root; + memset(&BTRFS_I(fs_info->btree_inode)->location, 0, + sizeof(struct btrfs_key)); insert_inode_hash(fs_info->btree_inode); mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); @@ -337,13 +413,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); - if (!fs_info->sb_buffer) { + if (!fs_info->sb_buffer) return NULL; - } disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; - if (!btrfs_super_root(disk_super)) { + if (!btrfs_super_root(disk_super)) return NULL; - } + fs_info->disk_super = disk_super; tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); @@ -358,14 +433,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_INODE_MAP_OBJECTID, inode_root); BUG_ON(ret); - ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, - BTRFS_FS_TREE_OBJECTID, root); - BUG_ON(ret); - root->commit_root = root->node; - get_bh(root->node); - root->ref_cows = 1; - root->fs_info->generation = root->root_key.offset + 1; - ret = btrfs_find_highest_inode(root, &root->fs_info->last_inode_alloc); + fs_info->generation = btrfs_super_generation(disk_super) + 1; + ret = btrfs_find_highest_inode(tree_root, &fs_info->last_inode_alloc); if (ret == 0) fs_info->highest_inode = fs_info->last_inode_alloc; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); @@ -373,7 +442,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) kobject_set_name(&fs_info->kobj, "%s", sb->s_id); kobject_register(&fs_info->kobj); mutex_unlock(&fs_info->fs_mutex); - return root; + return tree_root; } int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root @@ -398,12 +467,42 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +int del_fs_roots(struct btrfs_fs_info *fs_info) +{ + int ret; + struct btrfs_root *gang[8]; + int i; + + while(1) { + ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, + (void **)gang, 0, + ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + radix_tree_delete(&fs_info->fs_roots_radix, + (unsigned long)gang[i]); + if (gang[i]->inode) + iput(gang[i]->inode); + else + printk("no inode for root %p\n", gang[i]); + if (gang[i]->node) + brelse(gang[i]->node); + if (gang[i]->commit_root) + brelse(gang[i]->commit_root); + kfree(gang[i]); + } + } + return 0; +} + int close_ctree(struct btrfs_root *root) { int ret; struct btrfs_trans_handle *trans; + struct btrfs_fs_info *fs_info = root->fs_info; - mutex_lock(&root->fs_info->fs_mutex); + mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -412,29 +511,26 @@ int close_ctree(struct btrfs_root *root) ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); write_ctree_super(NULL, root); - mutex_unlock(&root->fs_info->fs_mutex); - - if (root->node) - btrfs_block_release(root, root->node); - if (root->fs_info->extent_root->node) - btrfs_block_release(root->fs_info->extent_root, - root->fs_info->extent_root->node); - if (root->fs_info->inode_root->node) - btrfs_block_release(root->fs_info->inode_root, - root->fs_info->inode_root->node); - if (root->fs_info->tree_root->node) - btrfs_block_release(root->fs_info->tree_root, - root->fs_info->tree_root->node); - btrfs_block_release(root, root->commit_root); - btrfs_block_release(root, root->fs_info->sb_buffer); - crypto_free_hash(root->fs_info->hash_tfm); - truncate_inode_pages(root->fs_info->btree_inode->i_mapping, 0); - iput(root->fs_info->btree_inode); - kfree(root->fs_info->extent_root); - kfree(root->fs_info->inode_root); - kfree(root->fs_info->tree_root); - kobject_unregister(&root->fs_info->kobj); - kfree(root); + mutex_unlock(&fs_info->fs_mutex); + + if (fs_info->extent_root->node) + btrfs_block_release(fs_info->extent_root, + fs_info->extent_root->node); + if (fs_info->inode_root->node) + btrfs_block_release(fs_info->inode_root, + fs_info->inode_root->node); + if (fs_info->tree_root->node) + btrfs_block_release(fs_info->tree_root, + fs_info->tree_root->node); + btrfs_block_release(root, fs_info->sb_buffer); + crypto_free_hash(fs_info->hash_tfm); + truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + iput(fs_info->btree_inode); + del_fs_roots(fs_info); + kfree(fs_info->extent_root); + kfree(fs_info->inode_root); + kfree(fs_info->tree_root); + kobject_unregister(&fs_info->kobj); return 0; } -- cgit v1.2.3 From c5739bba5260a59cebd20a51a55080592c8d3b07 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 09:27:04 -0400 Subject: Btrfs: snapshot progress Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b557bdd1e26a..6b097ede80b1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,10 +16,6 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) { BUG(); } - if (root->node && btrfs_header_parentid(&node->header) != - btrfs_header_parentid(btrfs_buffer_header(root->node))) { - BUG(); - } return 0; } -- cgit v1.2.3 From 1b05da2ee6217e7d55460d04335813fec25be4ca Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 12:13:09 -0400 Subject: Btrfs: drop the inode map tree Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6b097ede80b1..760fdc9a7664 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -264,6 +264,8 @@ static int __setup_root(int blocksize, root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; + root->highest_inode = 0; + root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); return 0; @@ -295,6 +297,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_path *path; struct btrfs_leaf *l; + u64 highest_inode; int ret = 0; printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags); @@ -354,6 +357,12 @@ printk("radix_tree_insert gives us %d\n", ret); kfree(root); return ERR_PTR(ret); } + ret = btrfs_find_highest_inode(root, &highest_inode); + if (ret == 0) { + root->highest_inode = highest_inode; + root->last_inode_alloc = highest_inode; +printk("highest inode is %Lu\n", highest_inode); + } printk("all worked\n"); return root; } @@ -364,8 +373,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *inode_root = kmalloc(sizeof(struct btrfs_root), - GFP_NOFS); struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; @@ -378,9 +385,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; - fs_info->inode_root = inode_root; - fs_info->last_inode_alloc = 0; - fs_info->highest_inode = 0; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -425,14 +429,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); - ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, - BTRFS_INODE_MAP_OBJECTID, inode_root); - BUG_ON(ret); - fs_info->generation = btrfs_super_generation(disk_super) + 1; - ret = btrfs_find_highest_inode(tree_root, &fs_info->last_inode_alloc); - if (ret == 0) - fs_info->highest_inode = fs_info->last_inode_alloc; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); kobj_set_kset_s(fs_info, btrfs_subsys); kobject_set_name(&fs_info->kobj, "%s", sb->s_id); @@ -512,9 +509,6 @@ int close_ctree(struct btrfs_root *root) if (fs_info->extent_root->node) btrfs_block_release(fs_info->extent_root, fs_info->extent_root->node); - if (fs_info->inode_root->node) - btrfs_block_release(fs_info->inode_root, - fs_info->inode_root->node); if (fs_info->tree_root->node) btrfs_block_release(fs_info->tree_root, fs_info->tree_root->node); @@ -524,7 +518,6 @@ int close_ctree(struct btrfs_root *root) iput(fs_info->btree_inode); del_fs_roots(fs_info); kfree(fs_info->extent_root); - kfree(fs_info->inode_root); kfree(fs_info->tree_root); kobject_unregister(&fs_info->kobj); return 0; -- cgit v1.2.3 From 2619ba1f0ff9540a9d84683310a1e350b5efde3d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 10 Apr 2007 16:58:11 -0400 Subject: Btrfs: subvolumes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 760fdc9a7664..ff69162829b3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -301,6 +301,12 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, int ret = 0; printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags); + root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)location->objectid); + if (root) { +printk("found %p in cache\n", root); + return root; + } root = kmalloc(sizeof(*root), GFP_NOFS); if (!root) { printk("failed1\n"); @@ -349,7 +355,8 @@ out: insert: printk("inserting %p\n", root); root->ref_cows = 1; - ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root, + ret = radix_tree_insert(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, root); if (ret) { printk("radix_tree_insert gives us %d\n", ret); @@ -460,6 +467,20 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } +static int free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) +{ + radix_tree_delete(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid); + if (root->inode) + iput(root->inode); + if (root->node) + brelse(root->node); + if (root->commit_root) + brelse(root->commit_root); + kfree(root); + return 0; +} + int del_fs_roots(struct btrfs_fs_info *fs_info) { int ret; @@ -472,19 +493,8 @@ int del_fs_roots(struct btrfs_fs_info *fs_info) ARRAY_SIZE(gang)); if (!ret) break; - for (i = 0; i < ret; i++) { - radix_tree_delete(&fs_info->fs_roots_radix, - (unsigned long)gang[i]); - if (gang[i]->inode) - iput(gang[i]->inode); - else - printk("no inode for root %p\n", gang[i]); - if (gang[i]->node) - brelse(gang[i]->node); - if (gang[i]->commit_root) - brelse(gang[i]->commit_root); - kfree(gang[i]); - } + for (i = 0; i < ret; i++) + free_fs_root(fs_info, gang[i]); } return 0; } -- cgit v1.2.3 From 0bd93ba022f5c1ddb4e5b722b02f3c4a4a8865ad Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Apr 2007 13:57:44 -0400 Subject: Btrfs: early support for multiple devices Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ff69162829b3..e09233262af6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -378,6 +378,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); + struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), @@ -392,6 +394,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; + fs_info->dev_root = dev_root; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -414,6 +417,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); + __setup_root(sb->s_blocksize, dev_root, + fs_info, BTRFS_DEV_TREE_OBJECTID); + __setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, @@ -427,6 +433,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) return NULL; fs_info->disk_super = disk_super; + dev_root->node = read_tree_block(tree_root, + btrfs_super_device_root(disk_super)); tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -519,6 +527,9 @@ int close_ctree(struct btrfs_root *root) if (fs_info->extent_root->node) btrfs_block_release(fs_info->extent_root, fs_info->extent_root->node); + if (fs_info->dev_root->node) + btrfs_block_release(fs_info->dev_root, + fs_info->dev_root->node); if (fs_info->tree_root->node) btrfs_block_release(fs_info->tree_root, fs_info->tree_root->node); -- cgit v1.2.3 From 7eccb903a817e890c947ba4bc90c6a9af9b4219a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Apr 2007 15:53:25 -0400 Subject: Btrfs: create a logical->phsyical block number mapping scheme Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 140 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 102 insertions(+), 38 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e09233262af6..c872a7e67abd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -10,10 +10,30 @@ #include "transaction.h" #include "btrfs_inode.h" +struct dev_lookup { + u64 block_start; + u64 num_blocks; + struct block_device *bdev; +}; + +u64 bh_blocknr(struct buffer_head *bh) +{ + int blkbits = bh->b_page->mapping->host->i_blkbits; + u64 blocknr = bh->b_page->index << (PAGE_CACHE_SHIFT - blkbits); + unsigned long offset; + + if (PageHighMem(bh->b_page)) + offset = (unsigned long)bh->b_data; + else + offset = bh->b_data - (char *)page_address(bh->b_page); + blocknr += offset >> (PAGE_CACHE_SHIFT - blkbits); + return blocknr; +} + static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); - if (buf->b_blocknr != btrfs_header_blocknr(&node->header)) { + if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { BUG(); } return 0; @@ -40,7 +60,7 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) head = page_buffers(page); bh = head; do { - if (buffer_mapped(bh) && bh->b_blocknr == blocknr) { + if (buffer_mapped(bh) && bh_blocknr(bh) == blocknr) { ret = bh; get_bh(bh); goto out_unlock; @@ -56,6 +76,33 @@ out_unlock: return ret; } +static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, + u64 logical) +{ + struct dev_lookup *lookup[2]; + char b[BDEVNAME_SIZE]; + + int ret; + + root = root->fs_info->dev_root; + ret = radix_tree_gang_lookup(&root->fs_info->dev_radix, + (void **)lookup, + (unsigned long)logical, + ARRAY_SIZE(lookup)); + if (ret == 0 || lookup[0]->block_start > logical || + lookup[0]->block_start + lookup[0]->num_blocks <= logical) { + ret = -ENOENT; + goto out; + } + bh->b_bdev = lookup[0]->bdev; + bh->b_blocknr = logical - lookup[0]->block_start; +printk("logical mapping %Lu to %lu bdev %s\n", logical, bh->b_blocknr, bdevname(bh->b_bdev, b)); + set_buffer_mapped(bh); + ret = 0; +out: + return ret; +} + struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { @@ -66,6 +113,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, struct buffer_head *bh; struct buffer_head *head; struct buffer_head *ret = NULL; + int err; u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); page = grab_cache_page(mapping, index); @@ -78,11 +126,10 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh = head; do { if (!buffer_mapped(bh)) { - bh->b_bdev = root->fs_info->sb->s_bdev; - bh->b_blocknr = first_block; - set_buffer_mapped(bh); + err = map_bh_to_logical(root, bh, first_block); + BUG_ON(err); } - if (bh->b_blocknr == blocknr) { + if (bh_blocknr(bh) == blocknr) { ret = bh; get_bh(bh); goto out_unlock; @@ -98,38 +145,13 @@ out_unlock: return ret; } -static sector_t max_block(struct block_device *bdev) -{ - sector_t retval = ~((sector_t)0); - loff_t sz = i_size_read(bdev->bd_inode); - - if (sz) { - unsigned int size = block_size(bdev); - unsigned int sizebits = blksize_bits(size); - retval = (sz >> sizebits); - } - return retval; -} - static int btree_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create) { - if (iblock >= max_block(inode->i_sb->s_bdev)) { - if (create) - return -EIO; - - /* - * for reads, we're just trying to fill a partial page. - * return a hole, they will have to call get_block again - * before they can fill it, and they will get -EIO at that - * time - */ - return 0; - } - bh->b_bdev = inode->i_sb->s_bdev; - bh->b_blocknr = iblock; - set_buffer_mapped(bh); - return 0; + int err; + struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; + err = map_bh_to_logical(root, bh, iblock); + return err; } int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, @@ -164,8 +186,8 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, return ret; if (verify) { if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) { - printk("checksum verify failed on %lu\n", - bh->b_blocknr); + printk("checksum verify failed on %Lu\n", + bh_blocknr(bh)); return 1; } } else { @@ -386,10 +408,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); int ret; struct btrfs_super_block *disk_super; + struct dev_lookup *dev_lookup; init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); + INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; @@ -422,6 +446,13 @@ struct btrfs_root *open_ctree(struct super_block *sb) __setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); + + dev_lookup = kmalloc(sizeof(*dev_lookup), GFP_NOFS); + dev_lookup->block_start = 0; + dev_lookup->num_blocks = (u32)-2; + dev_lookup->bdev = sb->s_bdev; + ret = radix_tree_insert(&fs_info->dev_radix, (u32)-2, dev_lookup); + BUG_ON(ret); fs_info->sb_buffer = read_tree_block(tree_root, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); @@ -432,6 +463,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) return NULL; + radix_tree_delete(&fs_info->dev_radix, (u32)-2); + dev_lookup->block_start = btrfs_super_device_block_start(disk_super); + dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); + ret = radix_tree_insert(&fs_info->dev_radix, + dev_lookup->block_start + + dev_lookup->num_blocks, dev_lookup); + BUG_ON(ret); + fs_info->disk_super = disk_super; dev_root->node = read_tree_block(tree_root, btrfs_super_device_root(disk_super)); @@ -459,7 +498,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root struct buffer_head *bh = root->fs_info->sb_buffer; btrfs_set_super_root(root->fs_info->disk_super, - root->fs_info->tree_root->node->b_blocknr); + bh_blocknr(root->fs_info->tree_root->node)); lock_buffer(bh); WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); @@ -506,6 +545,29 @@ int del_fs_roots(struct btrfs_fs_info *fs_info) } return 0; } +static int free_dev_radix(struct btrfs_fs_info *fs_info) +{ + struct dev_lookup *lookup[8]; + struct block_device *super_bdev = fs_info->sb->s_bdev; + int ret; + int i; + while(1) { + ret = radix_tree_gang_lookup(&fs_info->dev_radix, + (void **)lookup, 0, + ARRAY_SIZE(lookup)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + if (lookup[i]->bdev != super_bdev) + close_bdev_excl(lookup[i]->bdev); + radix_tree_delete(&fs_info->dev_radix, + lookup[i]->block_start + + lookup[i]->num_blocks); + kfree(lookup[i]); + } + } + return 0; +} int close_ctree(struct btrfs_root *root) { @@ -537,6 +599,8 @@ int close_ctree(struct btrfs_root *root) crypto_free_hash(fs_info->hash_tfm); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); + + free_dev_radix(fs_info); del_fs_roots(fs_info); kfree(fs_info->extent_root); kfree(fs_info->tree_root); -- cgit v1.2.3 From 8352d8a473ac84bf7a1c69690b626946d744ca58 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Apr 2007 10:43:05 -0400 Subject: Btrfs: add disk ioctl, mostly working Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 121 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c872a7e67abd..3ba4df20c913 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,6 +16,28 @@ struct dev_lookup { struct block_device *bdev; }; +int btrfs_insert_dev_radix(struct btrfs_root *root, + struct block_device *bdev, + u64 block_start, + u64 num_blocks) +{ + struct dev_lookup *lookup; + char b[BDEVNAME_SIZE]; + int ret; + + lookup = kmalloc(sizeof(*lookup), GFP_NOFS); + if (!lookup) + return -ENOMEM; + lookup->block_start = block_start; + lookup->num_blocks = num_blocks; + lookup->bdev = bdev; +printk("inserting %s into dev radix %Lu %Lu\n", bdevname(bdev, b), block_start, num_blocks); + + ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + + num_blocks - 1, lookup); + return ret; +} + u64 bh_blocknr(struct buffer_head *bh) { int blkbits = bh->b_page->mapping->host->i_blkbits; @@ -34,6 +56,8 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { + printk(KERN_CRIT "bh_blocknr(buf) is %Lu, header is %Lu\n", + bh_blocknr(buf), btrfs_header_blocknr(&node->header)); BUG(); } return 0; @@ -76,11 +100,10 @@ out_unlock: return ret; } -static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, +int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical) { struct dev_lookup *lookup[2]; - char b[BDEVNAME_SIZE]; int ret; @@ -96,7 +119,6 @@ static int map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, } bh->b_bdev = lookup[0]->bdev; bh->b_blocknr = logical - lookup[0]->block_start; -printk("logical mapping %Lu to %lu bdev %s\n", logical, bh->b_blocknr, bdevname(bh->b_bdev, b)); set_buffer_mapped(bh); ret = 0; out: @@ -126,7 +148,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, bh = head; do { if (!buffer_mapped(bh)) { - err = map_bh_to_logical(root, bh, first_block); + err = btrfs_map_bh_to_logical(root, bh, first_block); BUG_ON(err); } if (bh_blocknr(bh) == blocknr) { @@ -150,7 +172,7 @@ static int btree_get_block(struct inode *inode, sector_t iblock, { int err; struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - err = map_bh_to_logical(root, bh, iblock); + err = btrfs_map_bh_to_logical(root, bh, iblock); return err; } @@ -396,6 +418,89 @@ printk("all worked\n"); return root; } +int btrfs_open_disk(struct btrfs_root *root, u64 block_start, u64 num_blocks, + char *filename, int name_len) +{ + char *null_filename; + struct block_device *bdev; + int ret; + + if (block_start == 0) { +printk("skipping disk with block_start == 0\n"); +return 0; + } + null_filename = kmalloc(name_len + 1, GFP_NOFS); + if (!null_filename) + return -ENOMEM; + memcpy(null_filename, filename, name_len); + null_filename[name_len] = '\0'; + + bdev = open_bdev_excl(null_filename, O_RDWR, root->fs_info->sb); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + goto out; + } + set_blocksize(bdev, root->fs_info->sb->s_blocksize); + ret = btrfs_insert_dev_radix(root, bdev, block_start, num_blocks); + BUG_ON(ret); + ret = 0; +out: + kfree(null_filename); + return ret; +} + +static int read_device_info(struct btrfs_root *root) +{ + struct btrfs_path *path; + int ret; + struct btrfs_key key; + struct btrfs_leaf *leaf; + struct btrfs_device_item *dev_item; + int nritems; + int slot; + + root = root->fs_info->dev_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + key.objectid = 0; + key.offset = 0; + key.flags = 0; + btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY); + + mutex_lock(&root->fs_info->fs_mutex); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + while(1) { + slot = path->slots[0]; + if (slot >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret) + break; + leaf = btrfs_buffer_leaf(path->nodes[0]); + nritems = btrfs_header_nritems(&leaf->header); + slot = path->slots[0]; + } + btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); + if (btrfs_key_type(&key) != BTRFS_DEV_ITEM_KEY) { + path->slots[0]++; + continue; + } + dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item); +printk("found key %Lu %Lu\n", key.objectid, key.offset); + ret = btrfs_open_disk(root, key.objectid, key.offset, + (char *)(dev_item + 1), + btrfs_device_pathlen(dev_item)); + BUG_ON(ret); + path->slots[0]++; + } + btrfs_free_path(path); + mutex_unlock(&root->fs_info->fs_mutex); + return 0; +} + struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), @@ -463,17 +568,26 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) return NULL; + i_size_write(fs_info->btree_inode, + btrfs_super_total_blocks(disk_super) << + fs_info->btree_inode->i_blkbits); + radix_tree_delete(&fs_info->dev_radix, (u32)-2); dev_lookup->block_start = btrfs_super_device_block_start(disk_super); dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); ret = radix_tree_insert(&fs_info->dev_radix, dev_lookup->block_start + - dev_lookup->num_blocks, dev_lookup); + dev_lookup->num_blocks - 1, dev_lookup); BUG_ON(ret); fs_info->disk_super = disk_super; + dev_root->node = read_tree_block(tree_root, btrfs_super_device_root(disk_super)); + + ret = read_device_info(dev_root); + BUG_ON(ret); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -562,7 +676,7 @@ static int free_dev_radix(struct btrfs_fs_info *fs_info) close_bdev_excl(lookup[i]->bdev); radix_tree_delete(&fs_info->dev_radix, lookup[i]->block_start + - lookup[i]->num_blocks); + lookup[i]->num_blocks - 1); kfree(lookup[i]); } } -- cgit v1.2.3 From b4100d64241fed93a3f821ddf59d11ab4443a3ba Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Apr 2007 12:14:00 -0400 Subject: Btrfs: add a device id to device items Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3ba4df20c913..06b969c14625 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -13,11 +13,13 @@ struct dev_lookup { u64 block_start; u64 num_blocks; + u64 device_id; struct block_device *bdev; }; int btrfs_insert_dev_radix(struct btrfs_root *root, struct block_device *bdev, + u64 device_id, u64 block_start, u64 num_blocks) { @@ -31,6 +33,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, lookup->block_start = block_start; lookup->num_blocks = num_blocks; lookup->bdev = bdev; + lookup->device_id = device_id; printk("inserting %s into dev radix %Lu %Lu\n", bdevname(bdev, b), block_start, num_blocks); ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + @@ -418,17 +421,14 @@ printk("all worked\n"); return root; } -int btrfs_open_disk(struct btrfs_root *root, u64 block_start, u64 num_blocks, - char *filename, int name_len) +static int btrfs_open_disk(struct btrfs_root *root, u64 device_id, + u64 block_start, u64 num_blocks, + char *filename, int name_len) { char *null_filename; struct block_device *bdev; int ret; - if (block_start == 0) { -printk("skipping disk with block_start == 0\n"); -return 0; - } null_filename = kmalloc(name_len + 1, GFP_NOFS); if (!null_filename) return -ENOMEM; @@ -441,7 +441,8 @@ return 0; goto out; } set_blocksize(bdev, root->fs_info->sb->s_blocksize); - ret = btrfs_insert_dev_radix(root, bdev, block_start, num_blocks); + ret = btrfs_insert_dev_radix(root, bdev, device_id, + block_start, num_blocks); BUG_ON(ret); ret = 0; out: @@ -490,10 +491,14 @@ static int read_device_info(struct btrfs_root *root) } dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item); printk("found key %Lu %Lu\n", key.objectid, key.offset); - ret = btrfs_open_disk(root, key.objectid, key.offset, - (char *)(dev_item + 1), - btrfs_device_pathlen(dev_item)); - BUG_ON(ret); + if (btrfs_device_id(dev_item) != + btrfs_super_device_id(root->fs_info->disk_super)) { + ret = btrfs_open_disk(root, btrfs_device_id(dev_item), + key.objectid, key.offset, + (char *)(dev_item + 1), + btrfs_device_pathlen(dev_item)); + BUG_ON(ret); + } path->slots[0]++; } btrfs_free_path(path); @@ -556,6 +561,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) dev_lookup->block_start = 0; dev_lookup->num_blocks = (u32)-2; dev_lookup->bdev = sb->s_bdev; + dev_lookup->device_id = 0; ret = radix_tree_insert(&fs_info->dev_radix, (u32)-2, dev_lookup); BUG_ON(ret); fs_info->sb_buffer = read_tree_block(tree_root, @@ -575,6 +581,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) radix_tree_delete(&fs_info->dev_radix, (u32)-2); dev_lookup->block_start = btrfs_super_device_block_start(disk_super); dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); + dev_lookup->device_id = btrfs_super_device_id(disk_super); + ret = radix_tree_insert(&fs_info->dev_radix, dev_lookup->block_start + dev_lookup->num_blocks - 1, dev_lookup); @@ -659,6 +667,7 @@ int del_fs_roots(struct btrfs_fs_info *fs_info) } return 0; } + static int free_dev_radix(struct btrfs_fs_info *fs_info) { struct dev_lookup *lookup[8]; -- cgit v1.2.3 From 70b2befd0c8a4064715d8b340270650cc9d15af8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 17 Apr 2007 15:39:32 -0400 Subject: Btrfs: rework csums and extent item ordering Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 06b969c14625..a2a3f529cada 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -24,7 +24,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, u64 num_blocks) { struct dev_lookup *lookup; - char b[BDEVNAME_SIZE]; int ret; lookup = kmalloc(sizeof(*lookup), GFP_NOFS); @@ -34,7 +33,6 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, lookup->num_blocks = num_blocks; lookup->bdev = bdev; lookup->device_id = device_id; -printk("inserting %s into dev radix %Lu %Lu\n", bdevname(bdev, b), block_start, num_blocks); ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + num_blocks - 1, lookup); -- cgit v1.2.3 From 236454dfffb64a95ee01c50a215153f5de61c475 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 19 Apr 2007 13:37:44 -0400 Subject: Btrfs: many file_write fixes, inline data Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a2a3f529cada..11e17a2f736b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -108,6 +108,12 @@ int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, int ret; + if (logical == 0) { + bh->b_bdev = NULL; + bh->b_blocknr = 0; + set_buffer_mapped(bh); + return 0; + } root = root->fs_info->dev_root; ret = radix_tree_gang_lookup(&root->fs_info->dev_radix, (void **)lookup, -- cgit v1.2.3 From 8fd17795b2261ecb1bad2a6df09ef14c4957a3fb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 19 Apr 2007 21:01:03 -0400 Subject: Btrfs: early fsync support Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 11e17a2f736b..b666c3571aee 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -528,6 +528,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pending_del_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); + INIT_LIST_HEAD(&fs_info->trans_list); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; -- cgit v1.2.3 From e66f709b157ee8557166c14b67c01bae978ac32e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 20 Apr 2007 13:16:02 -0400 Subject: Btrfs: write barriers on commit, balance level before split Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b666c3571aee..13046295bf7a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -540,6 +540,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + fs_info->do_barriers = 1; BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -622,6 +623,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { + int ret; struct buffer_head *bh = root->fs_info->sb_buffer; btrfs_set_super_root(root->fs_info->disk_super, @@ -632,7 +634,15 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root csum_tree_block(root, bh, 0); bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(WRITE, bh); + if (root->fs_info->do_barriers) + ret = submit_bh(WRITE_BARRIER, bh); + else + ret = submit_bh(WRITE, bh); + if (ret == -EOPNOTSUPP) { + set_buffer_uptodate(bh); + root->fs_info->do_barriers = 0; + ret = submit_bh(WRITE, bh); + } wait_on_buffer(bh); if (!buffer_uptodate(bh)) { WARN_ON(1); -- cgit v1.2.3 From 4d775673091d43b39fa9d086071009f98dec289e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 20 Apr 2007 20:23:12 -0400 Subject: Btrfs: add owner and type fields to the extents aand block headers Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 13046295bf7a..7aff6bb55d91 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -319,6 +319,7 @@ static int __setup_root(int blocksize, root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); + root->root_key.objectid = objectid; return 0; } -- cgit v1.2.3 From f2458e1d8c90958ed3631654cb7fd5ab01478505 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Apr 2007 15:52:25 -0400 Subject: Btrfs: change around extent-tree prealloc Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7aff6bb55d91..956727f015a5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -542,6 +542,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; fs_info->do_barriers = 1; + fs_info->extent_tree_insert_nr = 0; + fs_info->extent_tree_prealloc_nr = 0; BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -555,7 +557,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - memset(&fs_info->current_insert, 0, sizeof(fs_info->current_insert)); memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); __setup_root(sb->s_blocksize, dev_root, -- cgit v1.2.3 From 9078a3e1e4e489dddc636c7bb8780349d4298743 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 26 Apr 2007 16:46:15 -0400 Subject: Btrfs: start of block group code Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 956727f015a5..1c27eb645510 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -529,6 +529,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pending_del_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); + INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; @@ -613,6 +614,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); + btrfs_read_block_groups(extent_root); + fs_info->generation = btrfs_super_generation(disk_super) + 1; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); kobj_set_kset_s(fs_info, btrfs_subsys); @@ -741,6 +744,7 @@ int close_ctree(struct btrfs_root *root) iput(fs_info->btree_inode); free_dev_radix(fs_info); + btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); kfree(fs_info->extent_root); kfree(fs_info->tree_root); -- cgit v1.2.3 From cd1bc4653dc37f6390f4d6df4f987044c64f700b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 27 Apr 2007 10:08:34 -0400 Subject: Btrfs: more block allocator work Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1c27eb645510..2489ffa5fb38 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -558,7 +558,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - memset(&fs_info->last_insert, 0, sizeof(fs_info->last_insert)); + fs_info->block_group_cache = NULL; __setup_root(sb->s_blocksize, dev_root, fs_info, BTRFS_DEV_TREE_OBJECTID); -- cgit v1.2.3 From 28b8bb9e001aa1c4abdd10a8e36972658237fe43 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 27 Apr 2007 11:42:05 -0400 Subject: Btrfs: allocator tweaks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2489ffa5fb38..e1b6e13a5ae8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -94,9 +94,6 @@ struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) } while (bh != head); out_unlock: unlock_page(page); - if (ret) { - touch_buffer(ret); - } page_cache_release(page); return ret; } -- cgit v1.2.3 From 090d18753c7fb73f7d846c3a89a50cd35136d144 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 1 May 2007 08:53:32 -0400 Subject: Btrfs: directory readahead Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e1b6e13a5ae8..354524adf984 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -253,6 +254,29 @@ static struct address_space_operations btree_aops = { .sync_page = block_sync_page, }; +int readahead_tree_block(struct btrfs_root *root, u64 blocknr) +{ + struct buffer_head *bh = NULL; + + bh = btrfs_find_create_tree_block(root, blocknr); + if (!bh) + return 0; + if (buffer_uptodate(bh)) + goto done; + if (test_set_buffer_locked(bh)) + goto done; + if (!buffer_uptodate(bh)) { + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh); + } else { + unlock_buffer(bh); + } +done: + brelse(bh); + return 0; +} + struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) { struct buffer_head *bh = NULL; @@ -270,11 +294,14 @@ struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto fail; - csum_tree_block(root, bh, 1); } else { unlock_buffer(bh); } uptodate: + if (!buffer_checked(bh)) { + csum_tree_block(root, bh, 1); + set_buffer_checked(bh); + } if (check_tree_block(root, bh)) BUG(); return bh; -- cgit v1.2.3 From 35b7e476107e3d54f03384e0f2fa3dfd68933353 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 2 May 2007 15:53:43 -0400 Subject: Btrfs: fix page cache memory leak Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 354524adf984..5828a104dfef 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -694,7 +694,7 @@ static int free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) return 0; } -int del_fs_roots(struct btrfs_fs_info *fs_info) +static int del_fs_roots(struct btrfs_fs_info *fs_info) { int ret; struct btrfs_root *gang[8]; @@ -781,3 +781,7 @@ void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) brelse(buf); } +void btrfs_btree_balance_dirty(struct btrfs_root *root) +{ + balance_dirty_pages_ratelimited(root->fs_info->btree_inode->i_mapping); +} -- cgit v1.2.3 From be74417553f4b2ee46be2088007a674ef2f02330 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 6 May 2007 10:15:01 -0400 Subject: Btrfs: more allocator enhancements Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5828a104dfef..7930458c227e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -554,6 +554,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); + INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; @@ -582,7 +583,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - fs_info->block_group_cache = NULL; __setup_root(sb->s_blocksize, dev_root, fs_info, BTRFS_DEV_TREE_OBJECTID); -- cgit v1.2.3 From e37c9e6921207cf503634b06bee37ecb7904408d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 May 2007 20:13:14 -0400 Subject: Btrfs: many allocator fixes, pretty solid Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7930458c227e..2dbf422a2b9a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -551,6 +551,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); + init_bit_radix(&fs_info->extent_map_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); -- cgit v1.2.3 From 509659cde578d891445afd67d87121dd13e71596 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 10 May 2007 12:36:17 -0400 Subject: Btrfs: switch to crc32c instead of sha256 Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2dbf422a2b9a..678456d1e2f9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -196,14 +196,14 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, ret = crypto_hash_digest(&desc, &sg, 1, result); spin_unlock(&root->fs_info->hash_lock); if (ret) { - printk("sha256 digest failed\n"); + printk("digest failed\n"); } return ret; } static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, int verify) { - char result[BTRFS_CSUM_SIZE]; + char result[BTRFS_CRC32_SIZE]; int ret; struct btrfs_node *node; @@ -212,14 +212,14 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, if (ret) return ret; if (verify) { - if (memcmp(bh->b_data, result, BTRFS_CSUM_SIZE)) { + if (memcmp(bh->b_data, result, BTRFS_CRC32_SIZE)) { printk("checksum verify failed on %Lu\n", bh_blocknr(bh)); return 1; } } else { node = btrfs_buffer_node(bh); - memcpy(node->header.csum, result, BTRFS_CSUM_SIZE); + memcpy(node->header.csum, result, BTRFS_CRC32_SIZE); } return 0; } @@ -576,10 +576,10 @@ struct btrfs_root *open_ctree(struct super_block *sb) sizeof(struct btrfs_key)); insert_inode_hash(fs_info->btree_inode); mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); - fs_info->hash_tfm = crypto_alloc_hash("sha256", 0, CRYPTO_ALG_ASYNC); + fs_info->hash_tfm = crypto_alloc_hash("crc32c", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { - printk("failed to allocate sha256 hash\n"); + printk("failed to allocate digest hash\n"); return NULL; } mutex_init(&fs_info->trans_mutex); -- cgit v1.2.3 From de428b63b16f9c74a24228a517713e8c65f79475 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 May 2007 13:28:27 -0400 Subject: Btrfs: allocator optimizations, truncate readahead Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 678456d1e2f9..4c0262b57664 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -257,24 +257,30 @@ static struct address_space_operations btree_aops = { int readahead_tree_block(struct btrfs_root *root, u64 blocknr) { struct buffer_head *bh = NULL; + int ret = 0; bh = btrfs_find_create_tree_block(root, blocknr); if (!bh) return 0; - if (buffer_uptodate(bh)) + if (buffer_uptodate(bh)) { + ret = 1; goto done; - if (test_set_buffer_locked(bh)) + } + if (test_set_buffer_locked(bh)) { + ret = 1; goto done; + } if (!buffer_uptodate(bh)) { get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); } else { unlock_buffer(bh); + ret = 1; } done: brelse(bh); - return 0; + return ret; } struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) -- cgit v1.2.3 From f9f3c6b666f717510b67036c314ec915b9059eaa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 21 May 2007 14:05:12 -0400 Subject: Btrfs: 2.6.21-git fixes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4c0262b57664..3da057f78bc8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -649,9 +649,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->generation = btrfs_super_generation(disk_super) + 1; memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); - kobj_set_kset_s(fs_info, btrfs_subsys); - kobject_set_name(&fs_info->kobj, "%s", sb->s_id); - kobject_register(&fs_info->kobj); mutex_unlock(&fs_info->fs_mutex); return tree_root; } -- cgit v1.2.3 From 08607c1b182b3b8b54d7141a0c012cda17d201e6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 8 Jun 2007 15:33:54 -0400 Subject: Btrfs: add compat ioctl Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3da057f78bc8..048282f06afb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -577,6 +577,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->extent_tree_insert_nr = 0; fs_info->extent_tree_prealloc_nr = 0; + INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -648,7 +649,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; - memset(&fs_info->kobj, 0, sizeof(fs_info->kobj)); mutex_unlock(&fs_info->fs_mutex); return tree_root; } @@ -746,6 +746,7 @@ int close_ctree(struct btrfs_root *root) struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = root->fs_info; + btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); @@ -776,7 +777,6 @@ int close_ctree(struct btrfs_root *root) del_fs_roots(fs_info); kfree(fs_info->extent_root); kfree(fs_info->tree_root); - kobject_unregister(&fs_info->kobj); return 0; } -- cgit v1.2.3 From facda1e787d43191a3368c322f682054991c41b8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 8 Jun 2007 18:11:48 -0400 Subject: Btrfs: get forced transaction commits via workqueue Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 048282f06afb..751069c0e9f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -563,6 +563,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); + INIT_LIST_HEAD(&fs_info->dead_roots); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; @@ -577,6 +578,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->extent_tree_insert_nr = 0; fs_info->extent_tree_prealloc_nr = 0; + fs_info->closing = 0; + INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, @@ -746,6 +749,7 @@ int close_ctree(struct btrfs_root *root) struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = root->fs_info; + fs_info->closing = 1; btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); -- cgit v1.2.3 From 0cf6c620176a294bdf1bedddc492f6ae857e0019 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sat, 9 Jun 2007 09:22:25 -0400 Subject: Btrfs: remove device tree Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 224 ++--------------------------------------------------- 1 file changed, 6 insertions(+), 218 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 751069c0e9f5..8a88404525e8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -11,47 +11,9 @@ #include "transaction.h" #include "btrfs_inode.h" -struct dev_lookup { - u64 block_start; - u64 num_blocks; - u64 device_id; - struct block_device *bdev; -}; - -int btrfs_insert_dev_radix(struct btrfs_root *root, - struct block_device *bdev, - u64 device_id, - u64 block_start, - u64 num_blocks) -{ - struct dev_lookup *lookup; - int ret; - - lookup = kmalloc(sizeof(*lookup), GFP_NOFS); - if (!lookup) - return -ENOMEM; - lookup->block_start = block_start; - lookup->num_blocks = num_blocks; - lookup->bdev = bdev; - lookup->device_id = device_id; - - ret = radix_tree_insert(&root->fs_info->dev_radix, block_start + - num_blocks - 1, lookup); - return ret; -} - u64 bh_blocknr(struct buffer_head *bh) { - int blkbits = bh->b_page->mapping->host->i_blkbits; - u64 blocknr = bh->b_page->index << (PAGE_CACHE_SHIFT - blkbits); - unsigned long offset; - - if (PageHighMem(bh->b_page)) - offset = (unsigned long)bh->b_data; - else - offset = bh->b_data - (char *)page_address(bh->b_page); - blocknr += offset >> (PAGE_CACHE_SHIFT - blkbits); - return blocknr; + return bh->b_blocknr; } static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) @@ -102,32 +64,14 @@ out_unlock: int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, u64 logical) { - struct dev_lookup *lookup[2]; - - int ret; - if (logical == 0) { bh->b_bdev = NULL; bh->b_blocknr = 0; set_buffer_mapped(bh); - return 0; - } - root = root->fs_info->dev_root; - ret = radix_tree_gang_lookup(&root->fs_info->dev_radix, - (void **)lookup, - (unsigned long)logical, - ARRAY_SIZE(lookup)); - if (ret == 0 || lookup[0]->block_start > logical || - lookup[0]->block_start + lookup[0]->num_blocks <= logical) { - ret = -ENOENT; - goto out; + } else { + map_bh(bh, root->fs_info->sb, logical); } - bh->b_bdev = lookup[0]->bdev; - bh->b_blocknr = logical - lookup[0]->block_start; - set_buffer_mapped(bh); - ret = 0; -out: - return ret; + return 0; } struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, @@ -382,24 +326,18 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, u64 highest_inode; int ret = 0; -printk("read_fs_root looking for %Lu %Lu %u\n", location->objectid, location->offset, location->flags); root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); - if (root) { -printk("found %p in cache\n", root); + if (root) return root; - } root = kmalloc(sizeof(*root), GFP_NOFS); - if (!root) { -printk("failed1\n"); + if (!root) return ERR_PTR(-ENOMEM); - } if (location->offset == (u64)-1) { ret = find_and_setup_root(fs_info->sb->s_blocksize, fs_info->tree_root, fs_info, location->objectid, root); if (ret) { -printk("failed2\n"); kfree(root); return ERR_PTR(ret); } @@ -413,7 +351,6 @@ printk("failed2\n"); BUG_ON(!path); ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); if (ret != 0) { -printk("internal search_slot gives us %d\n", ret); if (ret > 0) ret = -ENOENT; goto out; @@ -435,13 +372,11 @@ out: btrfs_root_blocknr(&root->root_item)); BUG_ON(!root->node); insert: -printk("inserting %p\n", root); root->ref_cows = 1; ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); if (ret) { -printk("radix_tree_insert gives us %d\n", ret); brelse(root->node); kfree(root); return ERR_PTR(ret); @@ -450,116 +385,25 @@ printk("radix_tree_insert gives us %d\n", ret); if (ret == 0) { root->highest_inode = highest_inode; root->last_inode_alloc = highest_inode; -printk("highest inode is %Lu\n", highest_inode); } -printk("all worked\n"); return root; } -static int btrfs_open_disk(struct btrfs_root *root, u64 device_id, - u64 block_start, u64 num_blocks, - char *filename, int name_len) -{ - char *null_filename; - struct block_device *bdev; - int ret; - - null_filename = kmalloc(name_len + 1, GFP_NOFS); - if (!null_filename) - return -ENOMEM; - memcpy(null_filename, filename, name_len); - null_filename[name_len] = '\0'; - - bdev = open_bdev_excl(null_filename, O_RDWR, root->fs_info->sb); - if (IS_ERR(bdev)) { - ret = PTR_ERR(bdev); - goto out; - } - set_blocksize(bdev, root->fs_info->sb->s_blocksize); - ret = btrfs_insert_dev_radix(root, bdev, device_id, - block_start, num_blocks); - BUG_ON(ret); - ret = 0; -out: - kfree(null_filename); - return ret; -} - -static int read_device_info(struct btrfs_root *root) -{ - struct btrfs_path *path; - int ret; - struct btrfs_key key; - struct btrfs_leaf *leaf; - struct btrfs_device_item *dev_item; - int nritems; - int slot; - - root = root->fs_info->dev_root; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - key.objectid = 0; - key.offset = 0; - key.flags = 0; - btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY); - - mutex_lock(&root->fs_info->fs_mutex); - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); - while(1) { - slot = path->slots[0]; - if (slot >= nritems) { - ret = btrfs_next_leaf(root, path); - if (ret) - break; - leaf = btrfs_buffer_leaf(path->nodes[0]); - nritems = btrfs_header_nritems(&leaf->header); - slot = path->slots[0]; - } - btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key); - if (btrfs_key_type(&key) != BTRFS_DEV_ITEM_KEY) { - path->slots[0]++; - continue; - } - dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_device_item); -printk("found key %Lu %Lu\n", key.objectid, key.offset); - if (btrfs_device_id(dev_item) != - btrfs_super_device_id(root->fs_info->disk_super)) { - ret = btrfs_open_disk(root, btrfs_device_id(dev_item), - key.objectid, key.offset, - (char *)(dev_item + 1), - btrfs_device_pathlen(dev_item)); - BUG_ON(ret); - } - path->slots[0]++; - } - btrfs_free_path(path); - mutex_unlock(&root->fs_info->fs_mutex); - return 0; -} - struct btrfs_root *open_ctree(struct super_block *sb) { struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), - GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; struct btrfs_super_block *disk_super; - struct dev_lookup *dev_lookup; init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_map_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); - INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); @@ -568,7 +412,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->running_transaction = NULL; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; - fs_info->dev_root = dev_root; fs_info->sb = sb; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -595,19 +438,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - __setup_root(sb->s_blocksize, dev_root, - fs_info, BTRFS_DEV_TREE_OBJECTID); - __setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - dev_lookup = kmalloc(sizeof(*dev_lookup), GFP_NOFS); - dev_lookup->block_start = 0; - dev_lookup->num_blocks = (u32)-2; - dev_lookup->bdev = sb->s_bdev; - dev_lookup->device_id = 0; - ret = radix_tree_insert(&fs_info->dev_radix, (u32)-2, dev_lookup); - BUG_ON(ret); fs_info->sb_buffer = read_tree_block(tree_root, BTRFS_SUPER_INFO_OFFSET / sb->s_blocksize); @@ -622,24 +455,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_super_total_blocks(disk_super) << fs_info->btree_inode->i_blkbits); - radix_tree_delete(&fs_info->dev_radix, (u32)-2); - dev_lookup->block_start = btrfs_super_device_block_start(disk_super); - dev_lookup->num_blocks = btrfs_super_device_num_blocks(disk_super); - dev_lookup->device_id = btrfs_super_device_id(disk_super); - - ret = radix_tree_insert(&fs_info->dev_radix, - dev_lookup->block_start + - dev_lookup->num_blocks - 1, dev_lookup); - BUG_ON(ret); - fs_info->disk_super = disk_super; - - dev_root->node = read_tree_block(tree_root, - btrfs_super_device_root(disk_super)); - - ret = read_device_info(dev_root); - BUG_ON(ret); - tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); BUG_ON(!tree_root->node); @@ -719,30 +535,6 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) return 0; } -static int free_dev_radix(struct btrfs_fs_info *fs_info) -{ - struct dev_lookup *lookup[8]; - struct block_device *super_bdev = fs_info->sb->s_bdev; - int ret; - int i; - while(1) { - ret = radix_tree_gang_lookup(&fs_info->dev_radix, - (void **)lookup, 0, - ARRAY_SIZE(lookup)); - if (!ret) - break; - for (i = 0; i < ret; i++) { - if (lookup[i]->bdev != super_bdev) - close_bdev_excl(lookup[i]->bdev); - radix_tree_delete(&fs_info->dev_radix, - lookup[i]->block_start + - lookup[i]->num_blocks - 1); - kfree(lookup[i]); - } - } - return 0; -} - int close_ctree(struct btrfs_root *root) { int ret; @@ -765,9 +557,6 @@ int close_ctree(struct btrfs_root *root) if (fs_info->extent_root->node) btrfs_block_release(fs_info->extent_root, fs_info->extent_root->node); - if (fs_info->dev_root->node) - btrfs_block_release(fs_info->dev_root, - fs_info->dev_root->node); if (fs_info->tree_root->node) btrfs_block_release(fs_info->tree_root, fs_info->tree_root->node); @@ -776,7 +565,6 @@ int close_ctree(struct btrfs_root *root) truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); - free_dev_radix(fs_info); btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); kfree(fs_info->extent_root); -- cgit v1.2.3 From 39279cc3d2704cfbf9c35dcb5bdd392159ae4625 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 06:35:45 -0400 Subject: Btrfs: split up super.c Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8a88404525e8..96bf3ef3a798 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -22,7 +22,7 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { printk(KERN_CRIT "bh_blocknr(buf) is %Lu, header is %Lu\n", bh_blocknr(buf), btrfs_header_blocknr(&node->header)); - BUG(); + return 1; } return 0; } @@ -253,7 +253,7 @@ uptodate: set_buffer_checked(bh); } if (check_tree_block(root, bh)) - BUG(); + goto fail; return bh; fail: brelse(bh); @@ -398,8 +398,13 @@ struct btrfs_root *open_ctree(struct super_block *sb) struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); int ret; + int err = -EIO; struct btrfs_super_block *disk_super; + if (!extent_root || !tree_root || !fs_info) { + err = -ENOMEM; + goto fail; + } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_map_radix); @@ -431,9 +436,11 @@ struct btrfs_root *open_ctree(struct super_block *sb) mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->hash_tfm = crypto_alloc_hash("crc32c", 0, CRYPTO_ALG_ASYNC); spin_lock_init(&fs_info->hash_lock); + if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { - printk("failed to allocate digest hash\n"); - return NULL; + printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); + err = -ENOMEM; + goto fail_iput; } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); @@ -446,30 +453,53 @@ struct btrfs_root *open_ctree(struct super_block *sb) sb->s_blocksize); if (!fs_info->sb_buffer) - return NULL; + goto fail_iput; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + if (!btrfs_super_root(disk_super)) - return NULL; + goto fail_sb_buffer; i_size_write(fs_info->btree_inode, btrfs_super_total_blocks(disk_super) << fs_info->btree_inode->i_blkbits); fs_info->disk_super = disk_super; + + if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, + sizeof(disk_super->magic))) { + printk("btrfs: valid FS not found on %s\n", sb->s_id); + goto fail_sb_buffer; + } tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super)); - BUG_ON(!tree_root->node); + if (!tree_root->node) + goto fail_sb_buffer; mutex_lock(&fs_info->fs_mutex); ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); - BUG_ON(ret); + if (ret) { + mutex_unlock(&fs_info->fs_mutex); + goto fail_tree_root; + } btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; mutex_unlock(&fs_info->fs_mutex); return tree_root; + +fail_tree_root: + btrfs_block_release(tree_root, tree_root->node); +fail_sb_buffer: + btrfs_block_release(tree_root, fs_info->sb_buffer); +fail_iput: + iput(fs_info->btree_inode); +fail: + kfree(extent_root); + kfree(tree_root); + kfree(fs_info); + return ERR_PTR(err); } int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root -- cgit v1.2.3 From 5af3981c1878b0657b9babd2ef7ec98c2008cf2c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 07:50:13 -0400 Subject: Btrfs: printk fixes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 96bf3ef3a798..878101172546 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -20,8 +20,9 @@ static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) { struct btrfs_node *node = btrfs_buffer_node(buf); if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { - printk(KERN_CRIT "bh_blocknr(buf) is %Lu, header is %Lu\n", - bh_blocknr(buf), btrfs_header_blocknr(&node->header)); + printk(KERN_CRIT "bh_blocknr(buf) is %llu, header is %llu\n", + (unsigned long long)bh_blocknr(buf), + (unsigned long long)btrfs_header_blocknr(&node->header)); return 1; } return 0; @@ -157,8 +158,9 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, return ret; if (verify) { if (memcmp(bh->b_data, result, BTRFS_CRC32_SIZE)) { - printk("checksum verify failed on %Lu\n", - bh_blocknr(bh)); + printk("btrfs: %s checksum verify failed on %llu\n", + root->fs_info->sb->s_id, + (unsigned long long)bh_blocknr(bh)); return 1; } } else { -- cgit v1.2.3 From 6cbd55707802b98843f953d1ae6d8f5bcd9a76c0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 09:07:21 -0400 Subject: Btrfs: add GPLv2 Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 878101172546..8c68a64c0ceb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1,3 +1,21 @@ +/* + * Copyright (C) 2007 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + #include #include #include -- cgit v1.2.3 From 340887809d92a4dd6c71caa97d658cef32c098c3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 12 Jun 2007 11:36:58 -0400 Subject: Btrfs: i386 fixes from axboe Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8c68a64c0ceb..b0ebba6f2e46 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -106,7 +106,7 @@ struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, int err; u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); - page = grab_cache_page(mapping, index); + page = find_or_create_page(mapping, index, GFP_NOFS); if (!page) return NULL; -- cgit v1.2.3 From 9ebefb180bad4914a31c4e1748ba187a30e1e990 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Jun 2007 13:50:00 -0400 Subject: Btrfs: patch queue: page_mkwrite Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b0ebba6f2e46..602b63dc76c3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -155,9 +155,9 @@ int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, desc.tfm = tfm; desc.flags = 0; sg_init_one(&sg, data, len); - spin_lock(&root->fs_info->hash_lock); + spin_lock_irq(&root->fs_info->hash_lock); ret = crypto_hash_digest(&desc, &sg, 1, result); - spin_unlock(&root->fs_info->hash_lock); + spin_unlock_irq(&root->fs_info->hash_lock); if (ret) { printk("digest failed\n"); } -- cgit v1.2.3 From 8c2383c3dd2cb5bb39598ce4fa97154bc591020a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 18 Jun 2007 09:57:58 -0400 Subject: Subject: Rework btrfs_file_write to only allocate while page locks are held Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 602b63dc76c3..15cc9ec92308 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -541,6 +541,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root else ret = submit_bh(WRITE, bh); if (ret == -EOPNOTSUPP) { + get_bh(bh); + lock_buffer(bh); set_buffer_uptodate(bh); root->fs_info->do_barriers = 0; ret = submit_bh(WRITE, bh); -- cgit v1.2.3 From 11bd143fc8243cf48c934dc1c4479a5aacf58ce3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 22 Jun 2007 14:16:24 -0400 Subject: Btrfs: Switch to libcrc32c to avoid problems with cryptomgr on highmem machines Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 15cc9ec92308..3712e946f9c8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include @@ -147,22 +147,12 @@ static int btree_get_block(struct inode *inode, sector_t iblock, int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result) { - struct scatterlist sg; - struct crypto_hash *tfm = root->fs_info->hash_tfm; - struct hash_desc desc; - int ret; - - desc.tfm = tfm; - desc.flags = 0; - sg_init_one(&sg, data, len); - spin_lock_irq(&root->fs_info->hash_lock); - ret = crypto_hash_digest(&desc, &sg, 1, result); - spin_unlock_irq(&root->fs_info->hash_lock); - if (ret) { - printk("digest failed\n"); - } - return ret; + u32 crc; + crc = crc32c(0, data, len); + memcpy(result, &crc, BTRFS_CRC32_SIZE); + return 0; } + static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, int verify) { @@ -454,14 +444,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) sizeof(struct btrfs_key)); insert_inode_hash(fs_info->btree_inode); mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); - fs_info->hash_tfm = crypto_alloc_hash("crc32c", 0, CRYPTO_ALG_ASYNC); - spin_lock_init(&fs_info->hash_lock); - if (!fs_info->hash_tfm || IS_ERR(fs_info->hash_tfm)) { - printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); - err = -ENOMEM; - goto fail_iput; - } mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); @@ -613,7 +596,6 @@ int close_ctree(struct btrfs_root *root) btrfs_block_release(fs_info->tree_root, fs_info->tree_root->node); btrfs_block_release(root, fs_info->sb_buffer); - crypto_free_hash(fs_info->hash_tfm); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); -- cgit v1.2.3 From 54aa1f4dfdacd60a19c4471220b24e581be6f774 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 22 Jun 2007 14:16:25 -0400 Subject: Btrfs: Audit callers and return codes to make sure -ENOSPC gets up the stack Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3712e946f9c8..865a284aa06c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -580,7 +580,7 @@ int close_ctree(struct btrfs_root *root) btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); trans = btrfs_start_transaction(root, 1); - btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); -- cgit v1.2.3 From 5eda7b5e9b0bed864dd18284c7df9b3c8207dad7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 22 Jun 2007 14:16:25 -0400 Subject: Btrfs: Add the ability to find and remove dead roots after a crash. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 865a284aa06c..d7615e1578cc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -326,8 +326,8 @@ static int find_and_setup_root(int blocksize, return 0; } -struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) +struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) { struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; @@ -336,11 +336,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, u64 highest_inode; int ret = 0; - root = radix_tree_lookup(&fs_info->fs_roots_radix, - (unsigned long)location->objectid); - if (root) - return root; - root = kmalloc(sizeof(*root), GFP_NOFS); + root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); if (location->offset == (u64)-1) { @@ -383,6 +379,28 @@ out: BUG_ON(!root->node); insert: root->ref_cows = 1; + ret = btrfs_find_highest_inode(root, &highest_inode); + if (ret == 0) { + root->highest_inode = highest_inode; + root->last_inode_alloc = highest_inode; + } + return root; +} + +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) +{ + struct btrfs_root *root; + int ret; + + root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)location->objectid); + if (root) + return root; + + root = btrfs_read_fs_root_no_radix(fs_info, location); + if (IS_ERR(root)) + return root; ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); @@ -391,11 +409,6 @@ insert: kfree(root); return ERR_PTR(ret); } - ret = btrfs_find_highest_inode(root, &highest_inode); - if (ret == 0) { - root->highest_inode = highest_inode; - root->last_inode_alloc = highest_inode; - } return root; } @@ -489,6 +502,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; + ret = btrfs_find_dead_roots(tree_root); + if (ret) + goto fail_tree_root; mutex_unlock(&fs_info->fs_mutex); return tree_root; @@ -538,7 +554,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root return 0; } -static int free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) +int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); @@ -565,7 +581,7 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) if (!ret) break; for (i = 0; i < ret; i++) - free_fs_root(fs_info, gang[i]); + btrfs_free_fs_root(fs_info, gang[i]); } return 0; } -- cgit v1.2.3 From 4b52dff6d371b9b93bc99f64c32831ea9a8ec3ac Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 26 Jun 2007 10:06:50 -0400 Subject: Btrfs: Fix super block updates during transaction commit The super block written during commit was not consistent with the state of the trees. This change adds an in-memory copy of the super so that we can make sure to write out consistent data during a commit. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d7615e1578cc..7081729d5b16 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -471,6 +471,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!fs_info->sb_buffer) goto fail_iput; disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; + fs_info->disk_super = disk_super; + memcpy(&fs_info->super_copy, disk_super, sizeof(fs_info->super_copy)); if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -479,7 +481,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_super_total_blocks(disk_super) << fs_info->btree_inode->i_blkbits); - fs_info->disk_super = disk_super; if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { @@ -527,8 +528,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root int ret; struct buffer_head *bh = root->fs_info->sb_buffer; - btrfs_set_super_root(root->fs_info->disk_super, - bh_blocknr(root->fs_info->tree_root->node)); lock_buffer(bh); WARN_ON(atomic_read(&bh->b_count) < 1); clear_buffer_dirty(bh); -- cgit v1.2.3 From ccd467d60e81b48cdbecae93532b66bcdedca91d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 28 Jun 2007 15:57:36 -0400 Subject: Btrfs: crash recovery fixes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7081729d5b16..d1bf5bc1bc14 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -270,14 +270,6 @@ fail: return NULL; } -int dirty_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) -{ - WARN_ON(atomic_read(&buf->b_count) == 0); - mark_buffer_dirty(buf); - return 0; -} - int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { @@ -621,6 +613,20 @@ int close_ctree(struct btrfs_root *root) return 0; } +void btrfs_mark_buffer_dirty(struct buffer_head *bh) +{ + struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; + u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + WARN_ON(!atomic_read(&bh->b_count)); + if (transid != root->fs_info->generation) { + printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", + (unsigned long long)bh->b_blocknr, + transid, root->fs_info->generation); + WARN_ON(1); + } + mark_buffer_dirty(bh); +} + void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) { brelse(buf); -- cgit v1.2.3 From ec6b910fb330f29e169c9f502c3ac209515af8d1 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Wed, 11 Jul 2007 10:00:37 -0400 Subject: Btrfs: trivial include fixups Almost none of the files including module.h need to do so, remove them. Include sched.h in extent-tree.c to silence a warning about cond_resched() being undeclared. Signed-off-by: Zach Brown Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d1bf5bc1bc14..60db85bc4ba8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include -- cgit v1.2.3 From 6702ed490ca0bb44e17131818a5a18b773957c5a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 Aug 2007 16:15:09 -0400 Subject: Btrfs: Add run time btree defrag, and an ioctl to force btree defrag This adds two types of btree defrag, a run time form that tries to defrag recently allocated blocks in the btree when they are still in ram, and an ioctl that forces defrag of all btree blocks. File data blocks are not defragged yet, but this can make a huge difference in sequential btree reads. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60db85bc4ba8..c948416fea3a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -273,7 +273,9 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct buffer_head *buf) { WARN_ON(atomic_read(&buf->b_count) == 0); + lock_buffer(buf); clear_buffer_dirty(buf); + unlock_buffer(buf); return 0; } @@ -294,6 +296,9 @@ static int __setup_root(int blocksize, root->last_inode_alloc = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); + memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + root->defrag_running = 0; + root->defrag_level = 0; root->root_key.objectid = objectid; return 0; } @@ -585,6 +590,7 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; btrfs_transaction_flush_work(root); mutex_lock(&fs_info->fs_mutex); + btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -616,7 +622,9 @@ void btrfs_mark_buffer_dirty(struct buffer_head *bh) { struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + WARN_ON(!atomic_read(&bh->b_count)); + if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", (unsigned long long)bh->b_blocknr, -- cgit v1.2.3 From 26b8003f10569a9155b7539ef5a7379ee0c6b050 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 8 Aug 2007 20:17:12 -0400 Subject: Btrfs: Replace extent tree preallocation code with some bit radix magic. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c948416fea3a..192211274ce5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -427,6 +427,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_map_radix); + init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); @@ -443,8 +444,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; fs_info->do_barriers = 1; - fs_info->extent_tree_insert_nr = 0; - fs_info->extent_tree_prealloc_nr = 0; fs_info->closing = 0; INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); -- cgit v1.2.3 From 15ee9bc7ed171248d1405df5854da5fa91bfdc39 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 10 Aug 2007 16:22:09 -0400 Subject: Btrfs: delay commits during fsync to allow more writers Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 192211274ce5..b2f79878d51a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -435,6 +435,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_LIST_HEAD(&fs_info->dead_roots); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; + fs_info->last_trans_committed = 0; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->sb = sb; -- cgit v1.2.3 From 58176a9604c5db1784d2c979aea472b3be40b6f0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 29 Aug 2007 15:47:34 -0400 Subject: Btrfs: Add per-root block accounting and sysfs entries Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b2f79878d51a..c25ef0a68f18 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -294,9 +294,12 @@ static int __setup_root(int blocksize, root->last_trans = 0; root->highest_inode = 0; root->last_inode_alloc = 0; + root->name = NULL; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); + memset(&root->root_kobj, 0, sizeof(root->root_kobj)); + init_completion(&root->kobj_unregister); root->defrag_running = 0; root->defrag_level = 0; root->root_key.objectid = objectid; @@ -384,7 +387,8 @@ insert: } struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) + struct btrfs_key *location, + const char *name, int namelen) { struct btrfs_root *root; int ret; @@ -405,6 +409,22 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } + + ret = btrfs_set_root_name(root, name, namelen); + if (ret) { + brelse(root->node); + kfree(root); + return ERR_PTR(ret); + } + + ret = btrfs_sysfs_add_root(root); + if (ret) { + brelse(root->node); + kfree(root->name); + kfree(root); + return ERR_PTR(ret); + } + return root; } @@ -433,6 +453,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); + memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); + init_completion(&fs_info->kobj_unregister); sb_set_blocksize(sb, 4096); fs_info->running_transaction = NULL; fs_info->last_trans_committed = 0; @@ -500,8 +522,10 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->generation = btrfs_super_generation(disk_super) + 1; ret = btrfs_find_dead_roots(tree_root); - if (ret) + if (ret) { + mutex_unlock(&fs_info->fs_mutex); goto fail_tree_root; + } mutex_unlock(&fs_info->fs_mutex); return tree_root; @@ -553,12 +577,15 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); + btrfs_sysfs_del_root(root); if (root->inode) iput(root->inode); if (root->node) brelse(root->node); if (root->commit_root) brelse(root->commit_root); + if (root->name) + kfree(root->name); kfree(root); return 0; } -- cgit v1.2.3 From 011410bd859a481a335d7db1fb559542c5663fd5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 10 Sep 2007 19:58:36 -0400 Subject: Btrfs: Add more synchronization before creating a snapshot File data checksums are only done during writepage, so we have to make sure all pages are written when the snapshot is taken. This also adds some locking so that new writes don't race in and add new dirty pages. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c25ef0a68f18..6c953a0e0aa3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -300,6 +300,7 @@ static int __setup_root(int blocksize, memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); init_completion(&root->kobj_unregister); + init_rwsem(&root->snap_sem); root->defrag_running = 0; root->defrag_level = 0; root->root_key.objectid = objectid; -- cgit v1.2.3 From 5ce14bbcdd1b5d9233d26a1e89faf3a26c820c58 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 11 Sep 2007 11:15:39 -0400 Subject: Btrfs: Find and remove dead roots the first time a root is loaded. Dead roots are trees left over after a crash, and they were either in the process of being removed or were waiting to be removed when the box crashed. Before, a search of the entire tree of root pointers was done on mount looking for dead roots. Now, the search is done the first time we load a root. This makes mount faster when there are a large number of snapshots, and it enables the block accounting code to properly update the block counts on the latest root as old versions of the root are reaped after a crash. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c953a0e0aa3..4296839eea1a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -426,6 +426,10 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, return ERR_PTR(ret); } + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid, root); + BUG_ON(ret); + return root; } @@ -522,11 +526,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; - ret = btrfs_find_dead_roots(tree_root); - if (ret) { - mutex_unlock(&fs_info->fs_mutex); - goto fail_tree_root; - } mutex_unlock(&fs_info->fs_mutex); return tree_root; -- cgit v1.2.3 From d3c2fdcf7b79079f60ac64e61d886964d4647910 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Sep 2007 10:58:06 -0400 Subject: Btrfs: Use balance_dirty_pages_nr on btree blocks btrfs_btree_balance_dirty is changed to pass the number of pages dirtied for more accurate dirty throttling. This lets the VM make better decisions about when to force some writeback. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4296839eea1a..ef6d76f2ec20 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -666,7 +666,8 @@ void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) brelse(buf); } -void btrfs_btree_balance_dirty(struct btrfs_root *root) +void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { - balance_dirty_pages_ratelimited(root->fs_info->btree_inode->i_mapping); + balance_dirty_pages_ratelimited_nr( + root->fs_info->btree_inode->i_mapping, nr); } -- cgit v1.2.3 From 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:19 -0400 Subject: Btrfs: Create extent_buffer interface for large blocksizes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 391 ++++++++++++++++++++++++----------------------------- 1 file changed, 180 insertions(+), 211 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ef6d76f2ec20..0c1f90cbedb0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,138 +23,132 @@ #include #include #include +#include // for block_sync_page #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -u64 bh_blocknr(struct buffer_head *bh) +#if 0 +static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) { - return bh->b_blocknr; -} - -static int check_tree_block(struct btrfs_root *root, struct buffer_head *buf) -{ - struct btrfs_node *node = btrfs_buffer_node(buf); - if (bh_blocknr(buf) != btrfs_header_blocknr(&node->header)) { - printk(KERN_CRIT "bh_blocknr(buf) is %llu, header is %llu\n", - (unsigned long long)bh_blocknr(buf), - (unsigned long long)btrfs_header_blocknr(&node->header)); + if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) { + printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n", + (unsigned long long)extent_buffer_blocknr(buf), + (unsigned long long)btrfs_header_blocknr(buf)); return 1; } return 0; } +#endif -struct buffer_head *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 blocknr) { - struct address_space *mapping = root->fs_info->btree_inode->i_mapping; - int blockbits = root->fs_info->sb->s_blocksize_bits; - unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); - struct page *page; - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *ret = NULL; - - - page = find_lock_page(mapping, index); - if (!page) - return NULL; - - if (!page_has_buffers(page)) - goto out_unlock; + struct inode *btree_inode = root->fs_info->btree_inode; + return find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + blocknr * root->sectorsize, + root->sectorsize, GFP_NOFS); +} - head = page_buffers(page); - bh = head; - do { - if (buffer_mapped(bh) && bh_blocknr(bh) == blocknr) { - ret = bh; - get_bh(bh); - goto out_unlock; - } - bh = bh->b_this_page; - } while (bh != head); -out_unlock: - unlock_page(page); - page_cache_release(page); - return ret; +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 blocknr) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + return alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + blocknr * root->sectorsize, + root->sectorsize, GFP_NOFS); } -int btrfs_map_bh_to_logical(struct btrfs_root *root, struct buffer_head *bh, - u64 logical) +struct extent_map *btree_get_extent(struct inode *inode, struct page *page, + size_t page_offset, u64 start, u64 end, + int create) { - if (logical == 0) { - bh->b_bdev = NULL; - bh->b_blocknr = 0; - set_buffer_mapped(bh); - } else { - map_bh(bh, root->fs_info->sb, logical); + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + struct extent_map *em; + int ret; + +again: + em = lookup_extent_mapping(em_tree, start, end); + if (em) { + goto out; } - return 0; + em = alloc_extent_map(GFP_NOFS); + if (!em) { + em = ERR_PTR(-ENOMEM); + goto out; + } + em->start = 0; + em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1; + em->block_start = 0; + em->block_end = em->end; + em->bdev = inode->i_sb->s_bdev; + ret = add_extent_mapping(em_tree, em); + if (ret == -EEXIST) { + free_extent_map(em); + em = NULL; + goto again; + } else if (ret) { + em = ERR_PTR(ret); + } +out: + return em; } -struct buffer_head *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr) +static int btree_writepage(struct page *page, struct writeback_control *wbc) { - struct address_space *mapping = root->fs_info->btree_inode->i_mapping; - int blockbits = root->fs_info->sb->s_blocksize_bits; - unsigned long index = blocknr >> (PAGE_CACHE_SHIFT - blockbits); - struct page *page; - struct buffer_head *bh; - struct buffer_head *head; - struct buffer_head *ret = NULL; - int err; - u64 first_block = index << (PAGE_CACHE_SHIFT - blockbits); + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_write_full_page(tree, page, btree_get_extent, wbc); +} +int btree_readpage(struct file *file, struct page *page) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + return extent_read_full_page(tree, page, btree_get_extent); +} - page = find_or_create_page(mapping, index, GFP_NOFS); - if (!page) - return NULL; +static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) +{ + struct extent_map_tree *tree; + int ret; - if (!page_has_buffers(page)) - create_empty_buffers(page, root->fs_info->sb->s_blocksize, 0); - head = page_buffers(page); - bh = head; - do { - if (!buffer_mapped(bh)) { - err = btrfs_map_bh_to_logical(root, bh, first_block); - BUG_ON(err); - } - if (bh_blocknr(bh) == blocknr) { - ret = bh; - get_bh(bh); - goto out_unlock; - } - bh = bh->b_this_page; - first_block++; - } while (bh != head); -out_unlock: - unlock_page(page); - if (ret) - touch_buffer(ret); - page_cache_release(page); + BUG_ON(page->private != 1); + tree = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(tree, page); + if (ret == 1) { + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); + } return ret; } -static int btree_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create) +static void btree_invalidatepage(struct page *page, unsigned long offset) { - int err; - struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - err = btrfs_map_bh_to_logical(root, bh, iblock); - return err; + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + extent_invalidatepage(tree, page, offset); + btree_releasepage(page, GFP_NOFS); } int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, char *result) { + return 0; +#if 0 u32 crc; crc = crc32c(0, data, len); memcpy(result, &crc, BTRFS_CRC32_SIZE); return 0; +#endif } -static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, +#if 0 +static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { + return 0; char result[BTRFS_CRC32_SIZE]; int ret; struct btrfs_node *node; @@ -176,7 +170,9 @@ static int csum_tree_block(struct btrfs_root *root, struct buffer_head *bh, } return 0; } +#endif +#if 0 static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct buffer_head *bh; @@ -195,87 +191,65 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) } while (bh != head); return block_write_full_page(page, btree_get_block, wbc); } - -static int btree_readpage(struct file * file, struct page * page) -{ - return block_read_full_page(page, btree_get_block); -} +#endif static struct address_space_operations btree_aops = { .readpage = btree_readpage, .writepage = btree_writepage, + .releasepage = btree_releasepage, + .invalidatepage = btree_invalidatepage, .sync_page = block_sync_page, }; int readahead_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *bh = NULL; + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; int ret = 0; - bh = btrfs_find_create_tree_block(root, blocknr); - if (!bh) + buf = btrfs_find_create_tree_block(root, blocknr); + if (!buf) return 0; - if (buffer_uptodate(bh)) { - ret = 1; - goto done; - } - if (test_set_buffer_locked(bh)) { - ret = 1; - goto done; - } - if (!buffer_uptodate(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - } else { - unlock_buffer(bh); - ret = 1; - } -done: - brelse(bh); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 0); + free_extent_buffer(buf); return ret; } -struct buffer_head *read_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) { - struct buffer_head *bh = NULL; - - bh = btrfs_find_create_tree_block(root, blocknr); - if (!bh) - return bh; - if (buffer_uptodate(bh)) - goto uptodate; - lock_buffer(bh); - if (!buffer_uptodate(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ, bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - goto fail; - } else { - unlock_buffer(bh); - } -uptodate: - if (!buffer_checked(bh)) { - csum_tree_block(root, bh, 1); - set_buffer_checked(bh); - } - if (check_tree_block(root, bh)) - goto fail; - return bh; -fail: - brelse(bh); - return NULL; + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; + + buf = btrfs_find_create_tree_block(root, blocknr); + if (!buf) + return NULL; + read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 1); + return buf; } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct buffer_head *buf) + struct extent_buffer *buf) { - WARN_ON(atomic_read(&buf->b_count) == 0); - lock_buffer(buf); - clear_buffer_dirty(buf); - unlock_buffer(buf); + struct inode *btree_inode = root->fs_info->btree_inode; + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + return 0; +} + +int wait_on_tree_block_writeback(struct btrfs_root *root, + struct extent_buffer *buf) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree, + buf); + return 0; +} + +int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); return 0; } @@ -287,7 +261,9 @@ static int __setup_root(int blocksize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; - root->blocksize = blocksize; + root->sectorsize = blocksize; + root->nodesize = blocksize; + root->leafsize = blocksize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -332,7 +308,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_path *path; - struct btrfs_leaf *l; + struct extent_buffer *l; u64 highest_inode; int ret = 0; @@ -361,11 +337,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, ret = -ENOENT; goto out; } - l = btrfs_buffer_leaf(path->nodes[0]); - memcpy(&root->root_item, - btrfs_item_ptr(l, path->slots[0], struct btrfs_root_item), + l = path->nodes[0]; + read_extent_buffer(l, &root->root_item, + btrfs_item_ptr_offset(l, path->slots[0]), sizeof(root->root_item)); - memcpy(&root->root_key, location, sizeof(*location)); ret = 0; out: btrfs_release_path(root, path); @@ -406,21 +381,21 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, (unsigned long)root->root_key.objectid, root); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root); return ERR_PTR(ret); } ret = btrfs_set_root_name(root, name, namelen); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root); return ERR_PTR(ret); } ret = btrfs_sysfs_add_root(root); if (ret) { - brelse(root->node); + free_extent_buffer(root->node); kfree(root->name); kfree(root); return ERR_PTR(ret); @@ -471,6 +446,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + fs_info->btree_inode->i_mapping, + GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -493,10 +471,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!fs_info->sb_buffer) goto fail_iput; - disk_super = (struct btrfs_super_block *)fs_info->sb_buffer->b_data; - fs_info->disk_super = disk_super; - memcpy(&fs_info->super_copy, disk_super, sizeof(fs_info->super_copy)); + read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0, + sizeof(fs_info->super_copy)); + + read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, + (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), + BTRFS_FSID_SIZE); + disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -530,9 +512,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) return tree_root; fail_tree_root: - btrfs_block_release(tree_root, tree_root->node); + free_extent_buffer(tree_root->node); fail_sb_buffer: - btrfs_block_release(tree_root, fs_info->sb_buffer); + free_extent_buffer(fs_info->sb_buffer); fail_iput: iput(fs_info->btree_inode); fail: @@ -546,31 +528,13 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; - struct buffer_head *bh = root->fs_info->sb_buffer; - - lock_buffer(bh); - WARN_ON(atomic_read(&bh->b_count) < 1); - clear_buffer_dirty(bh); - csum_tree_block(root, bh, 0); - bh->b_end_io = end_buffer_write_sync; - get_bh(bh); - if (root->fs_info->do_barriers) - ret = submit_bh(WRITE_BARRIER, bh); - else - ret = submit_bh(WRITE, bh); - if (ret == -EOPNOTSUPP) { - get_bh(bh); - lock_buffer(bh); - set_buffer_uptodate(bh); - root->fs_info->do_barriers = 0; - ret = submit_bh(WRITE, bh); - } - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - WARN_ON(1); - return -EIO; - } - return 0; + struct extent_buffer *super = root->fs_info->sb_buffer; + struct inode *btree_inode = root->fs_info->btree_inode; + + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); + ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, + super->start, super->len); + return ret; } int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) @@ -581,9 +545,9 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) if (root->inode) iput(root->inode); if (root->node) - brelse(root->node); + free_extent_buffer(root->node); if (root->commit_root) - brelse(root->commit_root); + free_extent_buffer(root->commit_root); if (root->name) kfree(root->name); kfree(root); @@ -629,12 +593,10 @@ int close_ctree(struct btrfs_root *root) mutex_unlock(&fs_info->fs_mutex); if (fs_info->extent_root->node) - btrfs_block_release(fs_info->extent_root, - fs_info->extent_root->node); + free_extent_buffer(fs_info->extent_root->node); if (fs_info->tree_root->node) - btrfs_block_release(fs_info->tree_root, - fs_info->tree_root->node); - btrfs_block_release(root, fs_info->sb_buffer); + free_extent_buffer(fs_info->tree_root->node); + free_extent_buffer(fs_info->sb_buffer); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); @@ -645,25 +607,32 @@ int close_ctree(struct btrfs_root *root) return 0; } -void btrfs_mark_buffer_dirty(struct buffer_head *bh) +int btrfs_buffer_uptodate(struct extent_buffer *buf) +{ + struct inode *btree_inode = buf->pages[0]->mapping->host; + return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); +} + +int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; - u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); + struct inode *btree_inode = buf->pages[0]->mapping->host; + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, + buf); +} - WARN_ON(!atomic_read(&bh->b_count)); +void btrfs_mark_buffer_dirty(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + u64 transid = btrfs_header_generation(buf); + struct inode *btree_inode = root->fs_info->btree_inode; if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", - (unsigned long long)bh->b_blocknr, + (unsigned long long)extent_buffer_blocknr(buf), transid, root->fs_info->generation); WARN_ON(1); } - mark_buffer_dirty(bh); -} - -void btrfs_block_release(struct btrfs_root *root, struct buffer_head *buf) -{ - brelse(buf); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) -- cgit v1.2.3 From 6d36dcd48f1e4e7446d603a3df9638bd314a182d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:37 -0400 Subject: Btrfs: Avoid memcpy where possible in extent_buffers Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0c1f90cbedb0..8242933a1d8b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -609,20 +609,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; -- cgit v1.2.3 From f510cfecfc98759d75283823cfccf0cc0d59a4c6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:14:48 -0400 Subject: Btrfs: Fix extent_buffer and extent_state leaks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8242933a1d8b..09f4e694624d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -46,18 +46,25 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 blocknr) { struct inode *btree_inode = root->fs_info->btree_inode; - return find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + struct extent_buffer *eb; + eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, blocknr * root->sectorsize, root->sectorsize, GFP_NOFS); + if (eb) + eb->alloc_addr = (unsigned long)__builtin_return_address(0); + return eb; } struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 blocknr) { struct inode *btree_inode = root->fs_info->btree_inode; - return alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + struct extent_buffer *eb; + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, blocknr * root->sectorsize, root->sectorsize, GFP_NOFS); + eb->alloc_addr = (unsigned long)__builtin_return_address(0); + return eb; } struct extent_map *btree_get_extent(struct inode *inode, struct page *page, @@ -226,6 +233,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); + buf->alloc_addr = (unsigned long)__builtin_return_address(0); return buf; } @@ -426,7 +434,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } init_bit_radix(&fs_info->pinned_radix); init_bit_radix(&fs_info->pending_del_radix); - init_bit_radix(&fs_info->extent_map_radix); init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); @@ -449,6 +456,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->free_space_cache, + fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -594,8 +603,10 @@ int close_ctree(struct btrfs_root *root) if (fs_info->extent_root->node) free_extent_buffer(fs_info->extent_root->node); + if (fs_info->tree_root->node) free_extent_buffer(fs_info->tree_root->node); + free_extent_buffer(fs_info->sb_buffer); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); -- cgit v1.2.3 From 96b5179d0d9b6368c203856f2ad6e8e12a8b2a2c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:19 -0400 Subject: Btrfs: Stop using radix trees for the block group cache Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 09f4e694624d..aac7c82b0dce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -436,8 +436,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) init_bit_radix(&fs_info->pending_del_radix); init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); - INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL); - INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); @@ -458,6 +456,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); extent_map_tree_init(&fs_info->free_space_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->block_group_cache, + fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; -- cgit v1.2.3 From 1a5bc167f6707542b79a55452075525620ed43f5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:26 -0400 Subject: Btrfs: Change the remaining radix trees used by extent-tree.c to extent_map trees Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index aac7c82b0dce..2b86a1d779b7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -432,9 +432,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) err = -ENOMEM; goto fail; } - init_bit_radix(&fs_info->pinned_radix); - init_bit_radix(&fs_info->pending_del_radix); - init_bit_radix(&fs_info->extent_ins_radix); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); @@ -458,6 +455,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_mapping, GFP_NOFS); extent_map_tree_init(&fs_info->block_group_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->pinned_extents, + fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->pending_del, + fs_info->btree_inode->i_mapping, GFP_NOFS); + extent_map_tree_init(&fs_info->extent_ins, + fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; -- cgit v1.2.3 From db94535db75e67fab12ccbb7f5ee548e33fed891 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:15:53 -0400 Subject: Btrfs: Allow tree blocks larger than the page size Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 92 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 34 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2b86a1d779b7..fad9298c6962 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,6 +28,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "print-tree.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -43,26 +44,25 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) #endif struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 blocknr) + u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - blocknr * root->sectorsize, - root->sectorsize, GFP_NOFS); + bytenr, blocksize, GFP_NOFS); if (eb) eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 blocknr) + u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - blocknr * root->sectorsize, - root->sectorsize, GFP_NOFS); + bytenr, blocksize, GFP_NOFS); eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -208,13 +208,13 @@ static struct address_space_operations btree_aops = { .sync_page = block_sync_page, }; -int readahead_tree_block(struct btrfs_root *root, u64 blocknr) +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; int ret = 0; - buf = btrfs_find_create_tree_block(root, blocknr); + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, @@ -223,12 +223,13 @@ int readahead_tree_block(struct btrfs_root *root, u64 blocknr) return ret; } -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 blocknr) +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, + u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - buf = btrfs_find_create_tree_block(root, blocknr); + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, @@ -261,7 +262,7 @@ int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) return 0; } -static int __setup_root(int blocksize, +static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) @@ -269,9 +270,9 @@ static int __setup_root(int blocksize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; - root->sectorsize = blocksize; - root->nodesize = blocksize; - root->leafsize = blocksize; + root->sectorsize = sectorsize; + root->nodesize = nodesize; + root->leafsize = leafsize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -291,21 +292,23 @@ static int __setup_root(int blocksize, return 0; } -static int find_and_setup_root(int blocksize, - struct btrfs_root *tree_root, +static int find_and_setup_root(struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info, u64 objectid, struct btrfs_root *root) { int ret; + u32 blocksize; - __setup_root(blocksize, root, fs_info, objectid); + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); - root->node = read_tree_block(root, - btrfs_root_blocknr(&root->root_item)); + blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + blocksize); BUG_ON(!root->node); return 0; } @@ -318,14 +321,14 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, struct btrfs_path *path; struct extent_buffer *l; u64 highest_inode; + u32 blocksize; int ret = 0; root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); if (location->offset == (u64)-1) { - ret = find_and_setup_root(fs_info->sb->s_blocksize, - fs_info->tree_root, fs_info, + ret = find_and_setup_root(tree_root, fs_info, location->objectid, root); if (ret) { kfree(root); @@ -334,7 +337,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, goto insert; } - __setup_root(fs_info->sb->s_blocksize, root, fs_info, + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, root, fs_info, location->objectid); path = btrfs_alloc_path(); @@ -357,8 +361,9 @@ out: kfree(root); return ERR_PTR(ret); } - root->node = read_tree_block(root, - btrfs_root_blocknr(&root->root_item)); + blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), + blocksize); BUG_ON(!root->node); insert: root->ref_cows = 1; @@ -418,6 +423,10 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *open_ctree(struct super_block *sb) { + u32 sectorsize; + u32 nodesize; + u32 leafsize; + u32 blocksize; struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -474,12 +483,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - __setup_root(sb->s_blocksize, tree_root, + __setup_root(512, 512, 512, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, - BTRFS_SUPER_INFO_OFFSET / - sb->s_blocksize); + BTRFS_SUPER_INFO_OFFSET, + 512); if (!fs_info->sb_buffer) goto fail_iput; @@ -494,9 +503,15 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; + nodesize = btrfs_super_nodesize(disk_super); + leafsize = btrfs_super_leafsize(disk_super); + sectorsize = btrfs_super_sectorsize(disk_super); + tree_root->nodesize = nodesize; + tree_root->leafsize = leafsize; + tree_root->sectorsize = sectorsize; + i_size_write(fs_info->btree_inode, - btrfs_super_total_blocks(disk_super) << - fs_info->btree_inode->i_blkbits); + btrfs_super_total_bytes(disk_super)); if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, @@ -504,13 +519,22 @@ struct btrfs_root *open_ctree(struct super_block *sb) printk("btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } + blocksize = btrfs_level_size(tree_root, + btrfs_super_root_level(disk_super)); tree_root->node = read_tree_block(tree_root, - btrfs_super_root(disk_super)); + btrfs_super_root(disk_super), + blocksize); if (!tree_root->node) goto fail_sb_buffer; +#if 0 + btrfs_print_leaf(tree_root, tree_root->node); + err = -EIO; + goto fail_tree_root; +#endif mutex_lock(&fs_info->fs_mutex); - ret = find_and_setup_root(sb->s_blocksize, tree_root, fs_info, + + ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); if (ret) { mutex_unlock(&fs_info->fs_mutex); @@ -611,11 +635,11 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(fs_info->tree_root->node); free_extent_buffer(fs_info->sb_buffer); - truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); - iput(fs_info->btree_inode); btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); + truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + iput(fs_info->btree_inode); kfree(fs_info->extent_root); kfree(fs_info->tree_root); return 0; @@ -642,7 +666,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", - (unsigned long long)extent_buffer_blocknr(buf), + (unsigned long long)buf->start, transid, root->fs_info->generation); WARN_ON(1); } -- cgit v1.2.3 From 09e71a326341f40111400c88aaf0498ef622824b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:17:04 -0400 Subject: Btrfs: Use an array of pages in the extent buffers to reduce the cost of find_get_page Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fad9298c6962..0636f79672e9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -647,20 +647,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->first_page->mapping->host; + struct inode *btree_inode = buf->pages[0]->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->first_page->mapping->host; + struct inode *btree_inode = buf->pages[0]->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; -- cgit v1.2.3 From 6b80053d02be41886344b5007d04e345311ec0b5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:17:34 -0400 Subject: Btrfs: Add back the online defragging code Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0636f79672e9..8e606e6658aa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -678,3 +678,63 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) balance_dirty_pages_ratelimited_nr( root->fs_info->btree_inode->i_mapping, nr); } + +void btrfs_set_buffer_defrag(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); +} + +void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, + GFP_NOFS); +} + +int btrfs_buffer_defrag(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); +} + +int btrfs_buffer_defrag_done(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, + EXTENT_DEFRAG_DONE, 0); +} + +int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, + EXTENT_DEFRAG_DONE, GFP_NOFS); +} + +int btrfs_clear_buffer_defrag(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + buf->start, buf->start + buf->len - 1, + EXTENT_DEFRAG, GFP_NOFS); +} + +int btrfs_read_buffer(struct extent_buffer *buf) +{ + struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct inode *btree_inode = root->fs_info->btree_inode; + return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + buf, 1); +} -- cgit v1.2.3 From 4dc119046d0d8501afa4346472917fb05586ad9c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:18:14 -0400 Subject: Btrfs: Add an extent buffer LRU to reduce radix tree hits Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e606e6658aa..fd7e6c182b9d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -50,8 +50,6 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, struct extent_buffer *eb; eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, bytenr, blocksize, GFP_NOFS); - if (eb) - eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -63,7 +61,6 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, bytenr, blocksize, GFP_NOFS); - eb->alloc_addr = (unsigned long)__builtin_return_address(0); return eb; } @@ -234,7 +231,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); - buf->alloc_addr = (unsigned long)__builtin_return_address(0); return buf; } @@ -638,6 +634,7 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); + extent_map_tree_cleanup(&BTRFS_I(fs_info->btree_inode)->extent_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); kfree(fs_info->extent_root); @@ -647,20 +644,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->last_page->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->pages[0]->mapping->host; + struct inode *btree_inode = buf->last_page->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; @@ -681,7 +678,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) void btrfs_set_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); @@ -689,7 +686,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, @@ -698,7 +695,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) int btrfs_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); @@ -706,7 +703,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) int btrfs_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -715,7 +712,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -724,7 +721,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -733,7 +730,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) int btrfs_read_buffer(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); -- cgit v1.2.3 From 810191ff3087e8143b41a944fcf4fd8c693f00e3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:18:55 -0400 Subject: Btrfs: extent_map optimizations to cut down on CPU usage Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fd7e6c182b9d..16f0260fca66 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -644,20 +644,20 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->last_page->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { - struct inode *btree_inode = buf->last_page->mapping->host; + struct inode *btree_inode = buf->first_page->mapping->host; return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); } void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; @@ -678,7 +678,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) void btrfs_set_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); @@ -686,7 +686,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, @@ -695,7 +695,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) int btrfs_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); @@ -703,7 +703,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) int btrfs_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -712,7 +712,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -721,7 +721,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, buf->start + buf->len - 1, @@ -730,7 +730,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) int btrfs_read_buffer(struct extent_buffer *buf) { - struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root; + struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 1); -- cgit v1.2.3 From 19c00ddcc31ad4bdfb86b57085e06d6135b9b1d7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:19:22 -0400 Subject: Btrfs: Add back metadata checksumming Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 215 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 155 insertions(+), 60 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 16f0260fca66..1176e5420c56 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -60,7 +60,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct extent_buffer *eb; eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, - bytenr, blocksize, GFP_NOFS); + bytenr, blocksize, NULL, GFP_NOFS); return eb; } @@ -99,10 +99,102 @@ out: return em; } +u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) +{ + return crc32c(seed, data, len); +} + +void btrfs_csum_final(u32 crc, char *result) +{ + *(__le32 *)result = ~cpu_to_le32(crc); +} + +static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, + int verify) +{ + char result[BTRFS_CRC32_SIZE]; + unsigned long len; + unsigned long cur_len; + unsigned long offset = BTRFS_CSUM_SIZE; + char *map_token = NULL; + char *kaddr; + unsigned long map_start; + unsigned long map_len; + int err; + u32 crc = ~(u32)0; + + len = buf->len - offset; + while(len > 0) { + err = map_private_extent_buffer(buf, offset, 32, + &map_token, &kaddr, + &map_start, &map_len, KM_USER0); + if (err) { + printk("failed to map extent buffer! %lu\n", + offset); + return 1; + } + cur_len = min(len, map_len - (offset - map_start)); + crc = btrfs_csum_data(root, kaddr + offset - map_start, + crc, cur_len); + len -= cur_len; + offset += cur_len; + unmap_extent_buffer(buf, map_token, KM_USER0); + } + btrfs_csum_final(crc, result); + + if (verify) { + if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { + printk("btrfs: %s checksum verify failed on %llu\n", + root->fs_info->sb->s_id, + buf->start); + return 1; + } + } else { + write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE); + } + return 0; +} + + +int csum_dirty_buffer(struct btrfs_root *root, struct page *page) +{ + struct extent_map_tree *tree; + u64 start = page->index << PAGE_CACHE_SHIFT; + u64 found_start; + int found_level; + unsigned long len; + struct extent_buffer *eb; + tree = &BTRFS_I(page->mapping->host)->extent_tree; + + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + if (!page->private) + goto out; + len = page->private >> 2; + if (len == 0) { + WARN_ON(1); + } + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1); + found_start = btrfs_header_bytenr(eb); + if (found_start != start) { + printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", + start, found_start, len); + } + found_level = btrfs_header_level(eb); + csum_tree_block(root, eb, 0); + free_extent_buffer(eb); +out: + return 0; +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_map_tree *tree; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; tree = &BTRFS_I(page->mapping->host)->extent_tree; + + csum_dirty_buffer(root, page); return extent_write_full_page(tree, page, btree_get_extent, wbc); } int btree_readpage(struct file *file, struct page *page) @@ -117,7 +209,6 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) struct extent_map_tree *tree; int ret; - BUG_ON(page->private != 1); tree = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(tree, page); if (ret == 1) { @@ -136,46 +227,6 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) btree_releasepage(page, GFP_NOFS); } -int btrfs_csum_data(struct btrfs_root * root, char *data, size_t len, - char *result) -{ - return 0; -#if 0 - u32 crc; - crc = crc32c(0, data, len); - memcpy(result, &crc, BTRFS_CRC32_SIZE); - return 0; -#endif -} - -#if 0 -static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, - int verify) -{ - return 0; - char result[BTRFS_CRC32_SIZE]; - int ret; - struct btrfs_node *node; - - ret = btrfs_csum_data(root, bh->b_data + BTRFS_CSUM_SIZE, - bh->b_size - BTRFS_CSUM_SIZE, result); - if (ret) - return ret; - if (verify) { - if (memcmp(bh->b_data, result, BTRFS_CRC32_SIZE)) { - printk("btrfs: %s checksum verify failed on %llu\n", - root->fs_info->sb->s_id, - (unsigned long long)bh_blocknr(bh)); - return 1; - } - } else { - node = btrfs_buffer_node(bh); - memcpy(node->header.csum, result, BTRFS_CRC32_SIZE); - } - return 0; -} -#endif - #if 0 static int btree_writepage(struct page *page, struct writeback_control *wbc) { @@ -215,7 +266,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 0); + buf, 0, 0); free_extent_buffer(buf); return ret; } @@ -225,12 +276,29 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_map_tree *extent_tree; + int ret; + + extent_tree = &BTRFS_I(btree_inode)->extent_tree; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 1); + buf, 0, 1); + if (buf->flags & EXTENT_CSUM) { + return buf; + } + if (test_range_bit(extent_tree, buf->start, buf->start + buf->len - 1, + EXTENT_CSUM, 1)) { + buf->flags |= EXTENT_CSUM; + return buf; + } + ret = csum_tree_block(root, buf, 1); + set_extent_bits(extent_tree, buf->start, + buf->start + buf->len - 1, + EXTENT_CSUM, GFP_NOFS); + buf->flags |= EXTENT_CSUM; return buf; } @@ -251,13 +319,6 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, return 0; } -int set_tree_block_dirty(struct btrfs_root *root, struct extent_buffer *buf) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); - return 0; -} - static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -416,7 +477,24 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, return root; } - +#if 0 +static int add_hasher(struct btrfs_fs_info *info, char *type) { + struct btrfs_hasher *hasher; + + hasher = kmalloc(sizeof(*hasher), GFP_NOFS); + if (!hasher) + return -ENOMEM; + hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC); + if (!hasher->hash_tfm) { + kfree(hasher); + return -EINVAL; + } + spin_lock(&info->hash_lock); + list_add(&hasher->list, &info->hashers); + spin_unlock(&info->hash_lock); + return 0; +} +#endif struct btrfs_root *open_ctree(struct super_block *sb) { u32 sectorsize; @@ -440,6 +518,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); + INIT_LIST_HEAD(&fs_info->hashers); + spin_lock_init(&fs_info->hash_lock); + memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); init_completion(&fs_info->kobj_unregister); sb_set_blocksize(sb, 4096); @@ -479,6 +560,14 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); +#if 0 + ret = add_hasher(fs_info, "crc32c"); + if (ret) { + printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); + err = -ENOMEM; + goto fail_iput; + } +#endif __setup_root(512, 512, 512, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); @@ -509,25 +598,21 @@ struct btrfs_root *open_ctree(struct super_block *sb) i_size_write(fs_info->btree_inode, btrfs_super_total_bytes(disk_super)); - if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { printk("btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } + blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), blocksize); if (!tree_root->node) goto fail_sb_buffer; -#if 0 - btrfs_print_leaf(tree_root, tree_root->node); - err = -EIO; - goto fail_tree_root; -#endif mutex_lock(&fs_info->fs_mutex); ret = find_and_setup_root(tree_root, fs_info, @@ -634,9 +719,19 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); - extent_map_tree_cleanup(&BTRFS_I(fs_info->btree_inode)->extent_tree); + extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); iput(fs_info->btree_inode); +#if 0 + while(!list_empty(&fs_info->hashers)) { + struct btrfs_hasher *hasher; + hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, + hashers); + list_del(&hasher->hashers); + crypto_free_hash(&fs_info->hash_tfm); + kfree(hasher); + } +#endif kfree(fs_info->extent_root); kfree(fs_info->tree_root); return 0; @@ -733,5 +828,5 @@ int btrfs_read_buffer(struct extent_buffer *buf) struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 1); + buf, 0, 1); } -- cgit v1.2.3 From 304fced668079ce222200d5742657d34cc44af09 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:21:17 -0400 Subject: Btrfs: balance_dirty_pages_ratelimited is causing problems, use nr == 1 always Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1176e5420c56..3878772fcffd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -768,7 +768,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { balance_dirty_pages_ratelimited_nr( - root->fs_info->btree_inode->i_mapping, nr); + root->fs_info->btree_inode->i_mapping, 1); } void btrfs_set_buffer_defrag(struct extent_buffer *buf) -- cgit v1.2.3 From ff79f8190b6e955ff7a71faf804a3017d526e657 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 15 Oct 2007 16:22:25 -0400 Subject: Btrfs: Add back file data checksumming Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3878772fcffd..5accced2858a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -594,6 +594,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) tree_root->nodesize = nodesize; tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; + sb_set_blocksize(sb, sectorsize); i_size_write(fs_info->btree_inode, btrfs_super_total_bytes(disk_super)); -- cgit v1.2.3 From 44b36eb20bb60d7da1375c1fb45784740a4dc358 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Fri, 19 Oct 2007 09:22:56 -0400 Subject: Btrfs: Properly setup root key while reading the root Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5accced2858a..5a7e60d7f443 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -410,6 +410,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, read_extent_buffer(l, &root->root_item, btrfs_item_ptr_offset(l, path->slots[0]), sizeof(root->root_item)); + memcpy(&root->root_key, location, sizeof(*location)); ret = 0; out: btrfs_release_path(root, path); -- cgit v1.2.3 From 35ebb934bd7fcc7ca991b155b7980c3c4ff9f1a5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 30 Oct 2007 16:56:53 -0400 Subject: Btrfs: Fix PAGE_CACHE_SHIFT shifts on 32 bit machines Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5a7e60d7f443..8935eec31ee2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -159,7 +159,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_map_tree *tree; - u64 start = page->index << PAGE_CACHE_SHIFT; + u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 found_start; int found_level; unsigned long len; -- cgit v1.2.3 From 0da5468f4724a59e745d938395beffe830c424ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 Nov 2007 21:08:01 -0500 Subject: Btrfs: Use writepages for the metadata too Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8935eec31ee2..6c8533fba7c9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -43,6 +43,8 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) } #endif +static struct extent_map_ops btree_extent_map_ops; + struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -188,15 +190,29 @@ out: return 0; } -static int btree_writepage(struct page *page, struct writeback_control *wbc) +static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) { - struct extent_map_tree *tree; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; - tree = &BTRFS_I(page->mapping->host)->extent_tree; csum_dirty_buffer(root, page); + return 0; +} + +static int btree_writepage(struct page *page, struct writeback_control *wbc) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(page->mapping->host)->extent_tree; return extent_write_full_page(tree, page, btree_get_extent, wbc); } + +static int btree_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct extent_map_tree *tree; + tree = &BTRFS_I(mapping->host)->extent_tree; + return extent_writepages(tree, mapping, btree_get_extent, wbc); +} + int btree_readpage(struct file *file, struct page *page) { struct extent_map_tree *tree; @@ -251,6 +267,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) static struct address_space_operations btree_aops = { .readpage = btree_readpage, .writepage = btree_writepage, + .writepages = btree_writepages, .releasepage = btree_releasepage, .invalidatepage = btree_invalidatepage, .sync_page = block_sync_page, @@ -538,6 +555,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); + BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops; + extent_map_tree_init(&fs_info->free_space_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); extent_map_tree_init(&fs_info->block_group_cache, @@ -832,3 +851,7 @@ int btrfs_read_buffer(struct extent_buffer *buf) return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 0, 1); } + +static struct extent_map_ops btree_extent_map_ops = { + .writepage_io_hook = btree_writepage_io_hook, +}; -- cgit v1.2.3 From 324ae4df00fdc1a6a179bf584d8addf027bb75fb Mon Sep 17 00:00:00 2001 From: Yan Date: Fri, 16 Nov 2007 14:57:08 -0500 Subject: Btrfs: Add block group pinned accounting back This patch adds a helper function 'update_pinned_extents' to extent-tree.c. The usage of the helper function is similar to 'update_block_group', the last parameter of the function indicates pin vs unpin. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c8533fba7c9..3e16cca72b49 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -569,6 +569,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; + fs_info->total_pinned = 0; INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); BTRFS_I(fs_info->btree_inode)->root = tree_root; -- cgit v1.2.3 From 793955bca66c99defdffc857ae6eb7e8431d6bbe Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Nov 2007 16:34:41 -0800 Subject: Btrfs: Limit btree writeback to prevent seeks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3e16cca72b49..28e9ef8a3253 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,6 +210,15 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; + if (wbc->sync_mode == WB_SYNC_NONE) { + u64 num_dirty; + u64 start = 0; + unsigned long thresh = 96 * 1024 * 1024; + num_dirty = count_range_bits(tree, &start, thresh, EXTENT_DIRTY); + if (num_dirty < thresh) { + return 0; + } + } return extent_writepages(tree, mapping, btree_get_extent, wbc); } -- cgit v1.2.3 From 8790bad7fb5ac4929dcd52c5dd8d6b81f8c05be8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 26 Nov 2007 19:47:05 -0500 Subject: Btrfs: Only limit btree writeback for pdflush Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 28e9ef8a3253..8a74f9027d65 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; - if (wbc->sync_mode == WB_SYNC_NONE) { + if (wbc->sync_mode == WB_SYNC_NONE && current_is_pdflush()) { u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; -- cgit v1.2.3 From 448d640b668dae3928591e83f2bf4ca9f4c06e52 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Nov 2007 07:52:01 -0800 Subject: Btrfs: Fine tune the btree writeback exclusion some more Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8a74f9027d65..de0552532d23 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -214,6 +214,10 @@ static int btree_writepages(struct address_space *mapping, u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; + + if (wbc->for_kupdate) + return 0; + num_dirty = count_range_bits(tree, &start, thresh, EXTENT_DIRTY); if (num_dirty < thresh) { return 0; -- cgit v1.2.3 From ca6646264b7dab662d84435441164bb2a8e8885a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 Nov 2007 11:16:35 -0500 Subject: Btrfs: Add efficient dirty accounting to the extent_map tree Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index de0552532d23..eef4ab56b9ca 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; - if (wbc->sync_mode == WB_SYNC_NONE && current_is_pdflush()) { + if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; @@ -218,6 +218,11 @@ static int btree_writepages(struct address_space *mapping, if (wbc->for_kupdate) return 0; + if (current_is_pdflush()) { + thresh = 96 * 1024 * 1024; + } else { + thresh = 8 * 1024 * 1024; + } num_dirty = count_range_bits(tree, &start, thresh, EXTENT_DIRTY); if (num_dirty < thresh) { return 0; -- cgit v1.2.3 From 87ee04eb0f2f0c63314cef4a76bd1adac748425e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 30 Nov 2007 11:30:34 -0500 Subject: Btrfs: Add simple stripe size parameter Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eef4ab56b9ca..60a30da6af00 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -355,7 +355,7 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, } static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, - struct btrfs_root *root, + u32 stripesize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { @@ -365,6 +365,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->sectorsize = sectorsize; root->nodesize = nodesize; root->leafsize = leafsize; + root->stripesize = stripesize; root->ref_cows = 0; root->fs_info = fs_info; root->objectid = objectid; @@ -393,7 +394,8 @@ static int find_and_setup_root(struct btrfs_root *tree_root, u32 blocksize; __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, root, fs_info, objectid); + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, objectid); ret = btrfs_find_last_root(tree_root, objectid, &root->root_item, &root->root_key); BUG_ON(ret); @@ -430,8 +432,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, } __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, root, fs_info, - location->objectid); + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, location->objectid); path = btrfs_alloc_path(); BUG_ON(!path); @@ -537,6 +539,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) u32 nodesize; u32 leafsize; u32 blocksize; + u32 stripesize; struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -607,7 +610,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) goto fail_iput; } #endif - __setup_root(512, 512, 512, tree_root, + __setup_root(512, 512, 512, 512, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, @@ -630,9 +633,11 @@ struct btrfs_root *open_ctree(struct super_block *sb) nodesize = btrfs_super_nodesize(disk_super); leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); + stripesize = btrfs_super_stripesize(disk_super); tree_root->nodesize = nodesize; tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; + tree_root->stripesize = stripesize; sb_set_blocksize(sb, sectorsize); i_size_write(fs_info->btree_inode, -- cgit v1.2.3 From 7bb86316c3961d1bc401ef184fd996f999556c7f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 11 Dec 2007 09:25:06 -0500 Subject: Btrfs: Add back pointers from extents to the btree or file referencing them Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60a30da6af00..0ac21e3aac87 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; - if (wbc->sync_mode == WB_SYNC_NONE) { + if (0 && wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; -- cgit v1.2.3 From d8d5f3e16d1ae4fe9b93312e083f2c04a95520f0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 11 Dec 2007 12:42:00 -0500 Subject: Btrfs: Add lowest key information to back refs for extent tree blocks as well. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0ac21e3aac87..60a30da6af00 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -210,7 +210,7 @@ static int btree_writepages(struct address_space *mapping, { struct extent_map_tree *tree; tree = &BTRFS_I(mapping->host)->extent_tree; - if (0 && wbc->sync_mode == WB_SYNC_NONE) { + if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; unsigned long thresh = 96 * 1024 * 1024; -- cgit v1.2.3 From b6cda9bcb4df7544c67fc3548a53bc1607d59f46 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 14 Dec 2007 15:30:32 -0500 Subject: Btrfs: Add mount -o nodatasum to turn of file data checksumming Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60a30da6af00..4338b00c2a25 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -568,6 +568,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->sb = sb; + fs_info->mount_opt = 0; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; -- cgit v1.2.3 From d10c5f31c79c6a703974ae29d0a05781b828fe30 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Dec 2007 20:14:04 -0500 Subject: Btrfs: Fix extent_map and extent_state leaks by flushing lrus on FS unmount Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4338b00c2a25..278564178993 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -765,8 +765,18 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); + + filemap_write_and_wait(fs_info->btree_inode->i_mapping); + + extent_map_tree_empty_lru(&fs_info->free_space_cache); + extent_map_tree_empty_lru(&fs_info->block_group_cache); + extent_map_tree_empty_lru(&fs_info->pinned_extents); + extent_map_tree_empty_lru(&fs_info->pending_del); + extent_map_tree_empty_lru(&fs_info->extent_ins); extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree); + truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + iput(fs_info->btree_inode); #if 0 while(!list_empty(&fs_info->hashers)) { -- cgit v1.2.3 From c59f8951d48c5eb000926935f3ab063d8181d1ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Dec 2007 20:14:04 -0500 Subject: Btrfs: Add mount option to enforce a max extent size Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 278564178993..7213012c27d5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -569,6 +569,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->sb = sb; fs_info->mount_opt = 0; + fs_info->max_extent = (u64)-1; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; -- cgit v1.2.3 From 6da6abae027e2dbc59bca5f4168b0760f25068c7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 18 Dec 2007 16:15:09 -0500 Subject: Btrfs: Back port to 2.6.18-el kernels Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7213012c27d5..ebb2db624fdd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -593,8 +593,11 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->closing = 0; fs_info->total_pinned = 0; - +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); +#else INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); +#endif BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); -- cgit v1.2.3 From 1832a6d5ee3b1af61001cadba9e10da9e91af4a4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:27:21 -0500 Subject: Btrfs: Implement basic support for -ENOSPC This is intended to prevent accidentally filling the drive. A determined user can still make things oops. It includes some accounting of the current bytes under delayed allocation, but this will change as things get optimized Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ebb2db624fdd..eebb4fb65c61 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -223,7 +223,8 @@ static int btree_writepages(struct address_space *mapping, } else { thresh = 8 * 1024 * 1024; } - num_dirty = count_range_bits(tree, &start, thresh, EXTENT_DIRTY); + num_dirty = count_range_bits(tree, &start, (u64)-1, + thresh, EXTENT_DIRTY); if (num_dirty < thresh) { return 0; } @@ -559,6 +560,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); spin_lock_init(&fs_info->hash_lock); + spin_lock_init(&fs_info->delalloc_lock); memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); init_completion(&fs_info->kobj_unregister); @@ -570,6 +572,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->sb = sb; fs_info->mount_opt = 0; fs_info->max_extent = (u64)-1; + fs_info->delalloc_bytes = 0; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; -- cgit v1.2.3 From edbd8d4efe4ddaf29a175ae504e2c9a05a96ebee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 21 Dec 2007 16:27:24 -0500 Subject: Btrfs: Support for online FS resize (grow and shrink) Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eebb4fb65c61..de026d9d9b2b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -471,13 +471,17 @@ insert: return root; } -struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, - struct btrfs_key *location, - const char *name, int namelen) +struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, + struct btrfs_key *location) { struct btrfs_root *root; int ret; + if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) + return fs_info->tree_root; + if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) + return fs_info->extent_root; + root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); if (root) @@ -494,6 +498,23 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid, root); + BUG_ON(ret); + + return root; +} + +struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_key *location, + const char *name, int namelen) +{ + struct btrfs_root *root; + int ret; + + root = btrfs_read_fs_root_no_name(fs_info, location); + if (!root) + return NULL; ret = btrfs_set_root_name(root, name, namelen); if (ret) { @@ -509,11 +530,6 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } - - ret = btrfs_find_dead_roots(fs_info->tree_root, - root->root_key.objectid, root); - BUG_ON(ret); - return root; } #if 0 -- cgit v1.2.3 From 4313b3994d719fcdeb7e661473019ca3d62e829b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Jan 2008 09:08:48 -0500 Subject: Btrfs: Reduce stack usage in the resizer, fix 32 bit compiles Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index de026d9d9b2b..67d9fd728868 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -374,6 +374,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->highest_inode = 0; root->last_inode_alloc = 0; root->name = NULL; + root->in_sysfs = 0; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -516,6 +517,9 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, if (!root) return NULL; + if (root->in_sysfs) + return root; + ret = btrfs_set_root_name(root, name, namelen); if (ret) { free_extent_buffer(root->node); @@ -530,6 +534,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } + root->in_sysfs = 1; return root; } #if 0 -- cgit v1.2.3 From e4204dedbbaa3a614605cb83cc0ac5161af6b4e6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:27 -0500 Subject: Btrfs: Change tree block csum tagging to avoid false error messages Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 67d9fd728868..a6170ff19e7f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -145,10 +145,25 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, btrfs_csum_final(crc, result); if (verify) { - if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { - printk("btrfs: %s checksum verify failed on %llu\n", + int from_this_trans = 0; + + if (root->fs_info->running_transaction && + btrfs_header_generation(buf) == + root->fs_info->running_transaction->transid) + from_this_trans = 1; + + /* FIXME, this is not good */ + if (from_this_trans == 0 && + memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { + u32 val; + u32 found = 0; + memcpy(&found, result, BTRFS_CRC32_SIZE); + + read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE); + printk("btrfs: %s checksum verify failed on %llu " + "wanted %X found %X from_this_trans %d\n", root->fs_info->sb->s_id, - buf->start); + buf->start, val, found, from_this_trans); return 1; } } else { @@ -313,6 +328,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; struct extent_map_tree *extent_tree; + u64 end; int ret; extent_tree = &BTRFS_I(btree_inode)->extent_tree; @@ -322,19 +338,29 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, return NULL; read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, buf, 0, 1); - if (buf->flags & EXTENT_CSUM) { + + if (buf->flags & EXTENT_CSUM) return buf; - } - if (test_range_bit(extent_tree, buf->start, buf->start + buf->len - 1, - EXTENT_CSUM, 1)) { + + end = buf->start + PAGE_CACHE_SIZE - 1; + if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; return buf; } + + lock_extent(extent_tree, buf->start, end, GFP_NOFS); + + if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + buf->flags |= EXTENT_CSUM; + goto out_unlock; + } + ret = csum_tree_block(root, buf, 1); - set_extent_bits(extent_tree, buf->start, - buf->start + buf->len - 1, - EXTENT_CSUM, GFP_NOFS); + set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; + +out_unlock: + unlock_extent(extent_tree, buf->start, end, GFP_NOFS); return buf; } -- cgit v1.2.3 From dc17ff8f11d129db9e83ab7244769e4eae05e14d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:30 -0500 Subject: Btrfs: Add data=ordered support This forces file data extents down the disk along with the metadata that references them. The current implementation is fairly simple, and just writes out all of the dirty pages in an inode before the commit. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a6170ff19e7f..34cf1f1f47be 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -406,7 +406,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); init_completion(&root->kobj_unregister); - init_rwsem(&root->snap_sem); root->defrag_running = 0; root->defrag_level = 0; root->root_key.objectid = objectid; @@ -498,6 +497,21 @@ insert: return root; } +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_objectid) +{ + struct btrfs_root *root; + + if (root_objectid == BTRFS_ROOT_TREE_OBJECTID) + return fs_info->tree_root; + if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID) + return fs_info->extent_root; + + root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)root_objectid); + return root; +} + struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, struct btrfs_key *location) { -- cgit v1.2.3 From e2008b61401ecb467a8ce1788fcd2116ae1cfbc1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jan 2008 15:46:30 -0500 Subject: Btrfs: Add some simple throttling to wait for data=ordered and snapshot deletion Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 34cf1f1f47be..e0940a39ff07 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -631,6 +631,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->sb = sb; + fs_info->throttles = 0; fs_info->mount_opt = 0; fs_info->max_extent = (u64)-1; fs_info->delalloc_bytes = 0; @@ -889,6 +890,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); } +void btrfs_throttle(struct btrfs_root *root) +{ + if (root->fs_info->throttles) + congestion_wait(WRITE, HZ/10); +} + void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { balance_dirty_pages_ratelimited_nr( -- cgit v1.2.3 From 21ad10cf3e9c1ef42e725e5c3a593c49f779a16b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Jan 2008 09:23:21 -0500 Subject: Btrfs: Add flush barriers on commit Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0940a39ff07..a481b970608c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -762,10 +762,15 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root int ret; struct extent_buffer *super = root->fs_info->sb_buffer; struct inode *btree_inode = root->fs_info->btree_inode; + struct super_block *sb = root->fs_info->sb; + if (!btrfs_test_opt(root, NOBARRIER)) + blkdev_issue_flush(sb->s_bdev, NULL); set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, super->start, super->len); + if (!btrfs_test_opt(root, NOBARRIER)) + blkdev_issue_flush(sb->s_bdev, NULL); return ret; } -- cgit v1.2.3 From 55c69072d6bd5be170a85467f64a20963cddf490 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Jan 2008 15:55:33 -0500 Subject: Btrfs: Fix extent_buffer usage when nodesize != leafsize Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a481b970608c..0338f8fd382d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -197,9 +197,23 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", start, found_start, len); + WARN_ON(1); + goto err; + } + if (eb->first_page != page) { + printk("bad first page %lu %lu\n", eb->first_page->index, + page->index); + WARN_ON(1); + goto err; + } + if (!PageUptodate(page)) { + printk("csum not up to date page %lu\n", page->index); + WARN_ON(1); + goto err; } found_level = btrfs_header_level(eb); csum_tree_block(root, eb, 0); +err: free_extent_buffer(eb); out: return 0; @@ -368,7 +382,10 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *buf) { struct inode *btree_inode = root->fs_info->btree_inode; - clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + if (btrfs_header_generation(buf) == + root->fs_info->running_transaction->transid) + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, + buf); return 0; } @@ -897,8 +914,11 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) void btrfs_throttle(struct btrfs_root *root) { - if (root->fs_info->throttles) - congestion_wait(WRITE, HZ/10); + struct backing_dev_info *bdi; + + bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + if (root->fs_info->throttles && bdi_write_congested(bdi)) + congestion_wait(WRITE, HZ/20); } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) -- cgit v1.2.3 From b99aa6cb548ae9461c5dc0e38f88ebc618ea68d7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Jan 2008 14:41:16 -0500 Subject: Btrfs: Only delete roots from sysfs when they were added to sysfs Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0338f8fd382d..87daed632f37 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -795,7 +795,8 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); - btrfs_sysfs_del_root(root); + if (root->in_sysfs) + btrfs_sysfs_del_root(root); if (root->inode) iput(root->inode); if (root->node) -- cgit v1.2.3 From cee36a03e8f7c6e14aefd497d3acf01bcd3ef153 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 15 Jan 2008 08:40:48 -0500 Subject: Rework btrfs_drop_inode to avoid scheduling Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 87daed632f37..cd29922d4073 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -639,6 +639,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) INIT_LIST_HEAD(&fs_info->hashers); spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->delalloc_lock); + spin_lock_init(&fs_info->new_trans_lock); memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); init_completion(&fs_info->kobj_unregister); -- cgit v1.2.3 From 04005cc7a1feef94237204566ccac38571af7991 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jan 2008 12:01:41 -0500 Subject: Btrfs: Use blk_congestion_wait on older kernels Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index cd29922d4073..731a534f81f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -919,8 +919,13 @@ void btrfs_throttle(struct btrfs_root *root) struct backing_dev_info *bdi; bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info; - if (root->fs_info->throttles && bdi_write_congested(bdi)) + if (root->fs_info->throttles && bdi_write_congested(bdi)) { +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) congestion_wait(WRITE, HZ/20); +#else + blk_congestion_wait(WRITE, HZ/20); +#endif + } } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) -- cgit v1.2.3 From e18e4809b10e6c9efb5fe10c1ddcb4ebb690d517 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Jan 2008 10:54:22 -0500 Subject: Btrfs: Add mount -o ssd, which includes optimizations for seek free storage Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 731a534f81f5..5d1f9bca2712 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -193,6 +193,7 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1); + btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", @@ -676,6 +677,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->do_barriers = 1; fs_info->closing = 0; fs_info->total_pinned = 0; + fs_info->last_alloc = 0; + #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else -- cgit v1.2.3 From d1310b2e0cd98eb1348553e69b73827b436dca7b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jan 2008 16:13:08 -0500 Subject: Btrfs: Split the extent_map code into two parts There is now extent_map for mapping offsets in the file to disk and extent_io for state tracking, IO submission and extent_bufers. The new extent_map code shifts from [start,end] pairs to [start,len], and pushes the locking out into the caller. This allows a few performance optimizations and is easier to use. A number of extent_map usage bugs were fixed, mostly with failing to remove extent_map entries when changing the file. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 123 ++++++++++++++++++++++++++++------------------------- 1 file changed, 66 insertions(+), 57 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5d1f9bca2712..4c4ebea0b2a9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -43,14 +43,14 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) } #endif -static struct extent_map_ops btree_extent_map_ops; +static struct extent_io_ops btree_extent_io_ops; struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, GFP_NOFS); return eb; } @@ -61,13 +61,13 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; struct extent_buffer *eb; - eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree, + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr, blocksize, NULL, GFP_NOFS); return eb; } struct extent_map *btree_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 end, + size_t page_offset, u64 start, u64 len, int create) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; @@ -75,7 +75,9 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, int ret; again: - em = lookup_extent_mapping(em_tree, start, end); + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + spin_unlock(&em_tree->lock); if (em) { goto out; } @@ -85,11 +87,14 @@ again: goto out; } em->start = 0; - em->end = (i_size_read(inode) & ~((u64)PAGE_CACHE_SIZE -1)) - 1; + em->len = i_size_read(inode); em->block_start = 0; - em->block_end = em->end; em->bdev = inode->i_sb->s_bdev; + + spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret == -EEXIST) { free_extent_map(em); em = NULL; @@ -175,13 +180,13 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 found_start; int found_level; unsigned long len; struct extent_buffer *eb; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; if (page->private == EXTENT_PAGE_PRIVATE) goto out; @@ -230,16 +235,16 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) static int btree_writepage(struct page *page, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_write_full_page(tree, page, btree_get_extent, wbc); } static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct extent_map_tree *tree; - tree = &BTRFS_I(mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(mapping->host)->io_tree; if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; @@ -264,18 +269,20 @@ static int btree_writepages(struct address_space *mapping, int btree_readpage(struct file *file, struct page *page) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; return extent_read_full_page(tree, page, btree_get_extent); } static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) { - struct extent_map_tree *tree; + struct extent_io_tree *tree; + struct extent_map_tree *map; int ret; - tree = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(tree, page); + tree = &BTRFS_I(page->mapping->host)->io_tree; + map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_mapping(map, tree, page); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); @@ -286,8 +293,8 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) static void btree_invalidatepage(struct page *page, unsigned long offset) { - struct extent_map_tree *tree; - tree = &BTRFS_I(page->mapping->host)->extent_tree; + struct extent_io_tree *tree; + tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); } @@ -331,7 +338,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return 0; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 0); free_extent_buffer(buf); return ret; @@ -342,40 +349,39 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_map_tree *extent_tree; + struct extent_io_tree *io_tree; u64 end; int ret; - extent_tree = &BTRFS_I(btree_inode)->extent_tree; + io_tree = &BTRFS_I(btree_inode)->io_tree; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, - buf, 0, 1); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); if (buf->flags & EXTENT_CSUM) return buf; end = buf->start + PAGE_CACHE_SIZE - 1; - if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; return buf; } - lock_extent(extent_tree, buf->start, end, GFP_NOFS); + lock_extent(io_tree, buf->start, end, GFP_NOFS); - if (test_range_bit(extent_tree, buf->start, end, EXTENT_CSUM, 1)) { + if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; goto out_unlock; } ret = csum_tree_block(root, buf, 1); - set_extent_bits(extent_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); + set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); buf->flags |= EXTENT_CSUM; out_unlock: - unlock_extent(extent_tree, buf->start, end, GFP_NOFS); + unlock_extent(io_tree, buf->start, end, GFP_NOFS); return buf; } @@ -385,7 +391,7 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) - clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); return 0; } @@ -394,7 +400,7 @@ int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf) { struct inode *btree_inode = root->fs_info->btree_inode; - wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->extent_tree, + wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree, buf); return 0; } @@ -659,20 +665,23 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; - extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); - BTRFS_I(fs_info->btree_inode)->extent_tree.ops = &btree_extent_map_ops; + extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree, + GFP_NOFS); + + BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; - extent_map_tree_init(&fs_info->free_space_cache, + extent_io_tree_init(&fs_info->free_space_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->block_group_cache, + extent_io_tree_init(&fs_info->block_group_cache, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->pinned_extents, + extent_io_tree_init(&fs_info->pinned_extents, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->pending_del, + extent_io_tree_init(&fs_info->pending_del, fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_map_tree_init(&fs_info->extent_ins, + extent_io_tree_init(&fs_info->extent_ins, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; fs_info->closing = 0; @@ -787,7 +796,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root if (!btrfs_test_opt(root, NOBARRIER)) blkdev_issue_flush(sb->s_bdev, NULL); - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, super); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, super->start, super->len); if (!btrfs_test_opt(root, NOBARRIER)) @@ -864,12 +873,12 @@ int close_ctree(struct btrfs_root *root) filemap_write_and_wait(fs_info->btree_inode->i_mapping); - extent_map_tree_empty_lru(&fs_info->free_space_cache); - extent_map_tree_empty_lru(&fs_info->block_group_cache); - extent_map_tree_empty_lru(&fs_info->pinned_extents); - extent_map_tree_empty_lru(&fs_info->pending_del); - extent_map_tree_empty_lru(&fs_info->extent_ins); - extent_map_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->extent_tree); + extent_io_tree_empty_lru(&fs_info->free_space_cache); + extent_io_tree_empty_lru(&fs_info->block_group_cache); + extent_io_tree_empty_lru(&fs_info->pinned_extents); + extent_io_tree_empty_lru(&fs_info->pending_del); + extent_io_tree_empty_lru(&fs_info->extent_ins); + extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); @@ -892,13 +901,13 @@ int close_ctree(struct btrfs_root *root) int btrfs_buffer_uptodate(struct extent_buffer *buf) { struct inode *btree_inode = buf->first_page->mapping->host; - return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf); + return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) { struct inode *btree_inode = buf->first_page->mapping->host; - return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, + return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); } @@ -914,7 +923,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) transid, root->fs_info->generation); WARN_ON(1); } - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->extent_tree, buf); + set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } void btrfs_throttle(struct btrfs_root *root) @@ -941,7 +950,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); } @@ -949,7 +958,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start, + set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, GFP_NOFS); } @@ -958,7 +967,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + return test_range_bit(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); } @@ -966,7 +975,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->extent_tree, + return test_range_bit(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, 0); } @@ -975,7 +984,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, GFP_NOFS); } @@ -984,7 +993,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree, + return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); } @@ -993,10 +1002,10 @@ int btrfs_read_buffer(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree, + return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); } -static struct extent_map_ops btree_extent_map_ops = { +static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, }; -- cgit v1.2.3 From 70dec8079d78691e476cc6c7cede40656078ad30 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jan 2008 09:59:12 -0500 Subject: Btrfs: extent_io and extent_state optimizations The end_bio routines are changed to take a pointer to the extent state struct, and the state tree is walked in order to set/clear appropriate bits as IO completes. This greatly reduces the number of rbtree searches done by the end_bio handlers, and reduces lock contention. The extent_io releasepage function is changed to avoid expensive searches for locked state. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4c4ebea0b2a9..e29a0716f6a9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -274,7 +274,7 @@ int btree_readpage(struct file *file, struct page *page) return extent_read_full_page(tree, page, btree_get_extent); } -static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) +static int btree_releasepage(struct page *page, gfp_t gfp_flags) { struct extent_io_tree *tree; struct extent_map_tree *map; @@ -282,7 +282,7 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(map, tree, page); + ret = try_release_extent_mapping(map, tree, page, gfp_flags); if (ret == 1) { ClearPagePrivate(page); set_page_private(page, 0); -- cgit v1.2.3 From 6f568d35a045dbb8a13fe71bfc32e85e39a986cb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jan 2008 16:03:38 -0500 Subject: Btrfs: mount -o max_inline=size to control the maximum inline extent size Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e29a0716f6a9..6647b90ef745 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -659,6 +659,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->throttles = 0; fs_info->mount_opt = 0; fs_info->max_extent = (u64)-1; + fs_info->max_inline = 8192 * 1024; fs_info->delalloc_bytes = 0; fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; -- cgit v1.2.3 From b0c68f8bed058d9f2023b067b16ed06a8c439544 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jan 2008 11:05:37 -0500 Subject: Btrfs: Enable delalloc accounting Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6647b90ef745..ad72e2192015 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -861,6 +861,10 @@ int close_ctree(struct btrfs_root *root) write_ctree_super(NULL, root); mutex_unlock(&fs_info->fs_mutex); + if (fs_info->delalloc_bytes) { + printk("btrfs: at unmount delalloc count %Lu\n", + fs_info->delalloc_bytes); + } if (fs_info->extent_root->node) free_extent_buffer(fs_info->extent_root->node); -- cgit v1.2.3 From 4529ba495c6fd0d79247784d0df55ae6512fa883 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jan 2008 16:45:07 -0500 Subject: Btrfs: Add data block hints to SSD mode too Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ad72e2192015..dda8ad6c0077 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -688,6 +688,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->closing = 0; fs_info->total_pinned = 0; fs_info->last_alloc = 0; + fs_info->last_data_alloc = 0; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); -- cgit v1.2.3 From a86c12c73d982c545a37a8ecdd66528ab260b770 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 7 Feb 2008 10:50:54 -0500 Subject: Btrfs: Create larger bios for btree blocks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dda8ad6c0077..6f0825b617d1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -197,7 +197,8 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) WARN_ON(1); } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1); + read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1, + btree_get_extent); btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { @@ -339,7 +340,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, 0, 0); + buf, 0, 0, btree_get_extent); free_extent_buffer(buf); return ret; } @@ -358,7 +359,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1); + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, + btree_get_extent); if (buf->flags & EXTENT_CSUM) return buf; @@ -1009,7 +1011,7 @@ int btrfs_read_buffer(struct extent_buffer *buf) struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, 0, 1); + buf, 0, 1, btree_get_extent); } static struct extent_io_ops btree_extent_io_ops = { -- cgit v1.2.3 From d7fc640e6fed46932f7c74e14f9b58b8637c66cf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 18 Feb 2008 12:12:38 -0500 Subject: Btrfs: Allocator improvements Reduce CPU time searching for free blocks by optimizing find_first_extent_bit Fix find_free_extent to make better use of the last_alloc hint. Before it was often finding blocks just before the hint. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6f0825b617d1..88e21bdbc478 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -951,7 +951,7 @@ void btrfs_throttle(struct btrfs_root *root) void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { balance_dirty_pages_ratelimited_nr( - root->fs_info->btree_inode->i_mapping, 1); + root->fs_info->btree_inode->i_mapping, 1); } void btrfs_set_buffer_defrag(struct extent_buffer *buf) -- cgit v1.2.3 From 0b86a832a1f38abec695864ec2eaedc9d2383f1b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:01:56 -0400 Subject: Btrfs: Add support for multiple devices per filesystem Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 91 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 88e21bdbc478..8e37fa120cc8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -28,6 +28,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" +#include "volumes.h" #include "print-tree.h" #if 0 @@ -234,6 +235,19 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) return 0; } +static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 offset; + offset = bio->bi_sector << 9; + if (offset == BTRFS_SUPER_INFO_OFFSET) { + bio->bi_bdev = root->fs_info->sb->s_bdev; + submit_bio(rw, bio); + return 0; + } + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; @@ -345,6 +359,23 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) return ret; } +static int close_all_devices(struct btrfs_fs_info *fs_info) +{ + struct list_head *list; + struct list_head *next; + struct btrfs_device *device; + + list = &fs_info->devices; + while(!list_empty(list)) { + next = list->next; + list_del(next); + device = list_entry(next, struct btrfs_device, dev_list); + kfree(device->name); + kfree(device); + } + return 0; +} + struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -420,6 +451,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->leafsize = leafsize; root->stripesize = stripesize; root->ref_cows = 0; + root->track_dirty = 0; + root->fs_info = fs_info; root->objectid = objectid; root->last_trans = 0; @@ -427,6 +460,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->last_inode_alloc = 0; root->name = NULL; root->in_sysfs = 0; + + INIT_LIST_HEAD(&root->dirty_list); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -634,6 +669,10 @@ struct btrfs_root *open_ctree(struct super_block *sb) GFP_NOFS); struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), GFP_NOFS); + struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); + struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), + GFP_NOFS); int ret; int err = -EIO; struct btrfs_super_block *disk_super; @@ -657,6 +696,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->last_trans_committed = 0; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; + fs_info->chunk_root = chunk_root; + fs_info->dev_root = dev_root; + INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); + INIT_LIST_HEAD(&fs_info->devices); + btrfs_mapping_init(&fs_info->mapping_tree); + fs_info->last_device = &fs_info->devices; fs_info->sb = sb; fs_info->throttles = 0; fs_info->mount_opt = 0; @@ -714,12 +759,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) goto fail_iput; } #endif - __setup_root(512, 512, 512, 512, tree_root, + __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); fs_info->sb_buffer = read_tree_block(tree_root, BTRFS_SUPER_INFO_OFFSET, - 512); + 4096); if (!fs_info->sb_buffer) goto fail_iput; @@ -730,6 +775,7 @@ struct btrfs_root *open_ctree(struct super_block *sb) read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), BTRFS_FSID_SIZE); + disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; @@ -753,23 +799,47 @@ struct btrfs_root *open_ctree(struct super_block *sb) goto fail_sb_buffer; } + mutex_lock(&fs_info->fs_mutex); + ret = btrfs_read_sys_array(tree_root); + BUG_ON(ret); + + blocksize = btrfs_level_size(tree_root, + btrfs_super_chunk_root_level(disk_super)); + + __setup_root(nodesize, leafsize, sectorsize, stripesize, + chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); + + chunk_root->node = read_tree_block(chunk_root, + btrfs_super_chunk_root(disk_super), + blocksize); + BUG_ON(!chunk_root->node); + + ret = btrfs_read_chunk_tree(chunk_root); + BUG_ON(ret); + blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); + tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), blocksize); if (!tree_root->node) goto fail_sb_buffer; - mutex_lock(&fs_info->fs_mutex); ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); - if (ret) { - mutex_unlock(&fs_info->fs_mutex); + if (ret) goto fail_tree_root; - } + extent_root->track_dirty = 1; + + ret = find_and_setup_root(tree_root, fs_info, + BTRFS_DEV_TREE_OBJECTID, dev_root); + dev_root->track_dirty = 1; + + if (ret) + goto fail_extent_root; btrfs_read_block_groups(extent_root); @@ -777,7 +847,10 @@ struct btrfs_root *open_ctree(struct super_block *sb) mutex_unlock(&fs_info->fs_mutex); return tree_root; +fail_extent_root: + free_extent_buffer(extent_root->node); fail_tree_root: + mutex_unlock(&fs_info->fs_mutex); free_extent_buffer(tree_root->node); fail_sb_buffer: free_extent_buffer(fs_info->sb_buffer); @@ -874,6 +947,12 @@ int close_ctree(struct btrfs_root *root) if (fs_info->tree_root->node) free_extent_buffer(fs_info->tree_root->node); + if (root->fs_info->chunk_root->node); + free_extent_buffer(root->fs_info->chunk_root->node); + + if (root->fs_info->dev_root->node); + free_extent_buffer(root->fs_info->dev_root->node); + free_extent_buffer(fs_info->sb_buffer); btrfs_free_block_groups(root->fs_info); @@ -901,8 +980,13 @@ int close_ctree(struct btrfs_root *root) kfree(hasher); } #endif + close_all_devices(fs_info); + btrfs_mapping_tree_free(&fs_info->mapping_tree); + kfree(fs_info->extent_root); kfree(fs_info->tree_root); + kfree(fs_info->chunk_root); + kfree(fs_info->dev_root); return 0; } @@ -1016,4 +1100,5 @@ int btrfs_read_buffer(struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, + .submit_bio_hook = btree_submit_bio_hook, }; -- cgit v1.2.3 From 6324fbf334f4586325057197da7752f4ffa409d3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:01:59 -0400 Subject: Btrfs: Dynamic chunk and block group allocation Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e37fa120cc8..2a239ae49f78 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -700,8 +700,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->dev_root = dev_root; INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->devices); + INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); - fs_info->last_device = &fs_info->devices; fs_info->sb = sb; fs_info->throttles = 0; fs_info->mount_opt = 0; -- cgit v1.2.3 From 0d81ba5dbedef0c3970d6aa318aa84920943e6e3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:07 -0400 Subject: Btrfs: Move device information into the super block so it can be scanned Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2a239ae49f78..26185d46712c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -370,7 +370,6 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) next = list->next; list_del(next); device = list_entry(next, struct btrfs_device, dev_list); - kfree(device->name); kfree(device); } return 0; @@ -800,6 +799,9 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_lock(&fs_info->fs_mutex); + ret = btrfs_read_super_device(tree_root, fs_info->sb_buffer); + BUG_ON(ret); + ret = btrfs_read_sys_array(tree_root); BUG_ON(ret); -- cgit v1.2.3 From 239b14b32dc39232ebf9cce29ff77c4c564355fd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:07 -0400 Subject: Btrfs: Bring back mount -o ssd optimizations Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 26185d46712c..4890151cd68d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1103,4 +1103,6 @@ int btrfs_read_buffer(struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, .submit_bio_hook = btree_submit_bio_hook, + /* note we're sharing with inode.c for the merge bio hook */ + .merge_bio_hook = btrfs_merge_bio_hook, }; -- cgit v1.2.3 From 8a4b83cc8bd75fca29ac68615896d9e92820e7c2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 24 Mar 2008 15:02:07 -0400 Subject: Btrfs: Add support for device scanning and detection ioctls Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4890151cd68d..f971a29e4f20 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -365,12 +365,12 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) struct list_head *next; struct btrfs_device *device; - list = &fs_info->devices; - while(!list_empty(list)) { - next = list->next; - list_del(next); + list = &fs_info->fs_devices->devices; + list_for_each(next, list) { device = list_entry(next, struct btrfs_device, dev_list); - kfree(device); + if (device->bdev && device->bdev != fs_info->sb->s_bdev) + close_bdev_excl(device->bdev); + device->bdev = NULL; } return 0; } @@ -655,7 +655,8 @@ static int add_hasher(struct btrfs_fs_info *info, char *type) { return 0; } #endif -struct btrfs_root *open_ctree(struct super_block *sb) +struct btrfs_root *open_ctree(struct super_block *sb, + struct btrfs_fs_devices *fs_devices) { u32 sectorsize; u32 nodesize; @@ -697,8 +698,8 @@ struct btrfs_root *open_ctree(struct super_block *sb) fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; fs_info->dev_root = dev_root; + fs_info->fs_devices = fs_devices; INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); - INIT_LIST_HEAD(&fs_info->devices); INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); fs_info->sb = sb; @@ -779,6 +780,12 @@ struct btrfs_root *open_ctree(struct super_block *sb) if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; + if (btrfs_super_num_devices(disk_super) != fs_devices->num_devices) { + printk("Btrfs: wanted %llu devices, but found %llu\n", + (unsigned long long)btrfs_super_num_devices(disk_super), + (unsigned long long)fs_devices->num_devices); + goto fail_sb_buffer; + } nodesize = btrfs_super_nodesize(disk_super); leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); @@ -799,8 +806,6 @@ struct btrfs_root *open_ctree(struct super_block *sb) } mutex_lock(&fs_info->fs_mutex); - ret = btrfs_read_super_device(tree_root, fs_info->sb_buffer); - BUG_ON(ret); ret = btrfs_read_sys_array(tree_root); BUG_ON(ret); @@ -859,6 +864,7 @@ fail_sb_buffer: fail_iput: iput(fs_info->btree_inode); fail: + close_all_devices(fs_info); kfree(extent_root); kfree(tree_root); kfree(fs_info); -- cgit v1.2.3 From 593060d756e0c2382d59cf00d5f3b9e3a336c408 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 25 Mar 2008 16:50:33 -0400 Subject: Btrfs: Implement raid0 when multiple devices are present Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f971a29e4f20..d8474796dea2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -736,6 +736,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->total_pinned = 0; fs_info->last_alloc = 0; fs_info->last_data_alloc = 0; + fs_info->extra_alloc_bits = 0; + fs_info->extra_data_alloc_bits = 0; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); -- cgit v1.2.3 From 04160088141a6be06d5cb9e53e34076b51803ffc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 26 Mar 2008 10:28:07 -0400 Subject: Create a btrfs backing dev info This allows intelligent versions of unplug and congestion functions Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d8474796dea2..6a70e4916ab4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -655,6 +655,55 @@ static int add_hasher(struct btrfs_fs_info *info, char *type) { return 0; } #endif + +static int btrfs_congested_fn(void *congested_data, int bdi_bits) +{ + struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; + int ret = 0; + struct list_head *cur; + struct btrfs_device *device; + struct backing_dev_info *bdi; + + list_for_each(cur, &info->fs_devices->devices) { + device = list_entry(cur, struct btrfs_device, dev_list); + bdi = blk_get_backing_dev_info(device->bdev); + if (bdi && bdi_congested(bdi, bdi_bits)) { + ret = 1; + break; + } + } + return ret; +} + +void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) +{ + struct list_head *cur; + struct btrfs_device *device; + struct btrfs_fs_info *info; + + info = (struct btrfs_fs_info *)bdi->unplug_io_data; + list_for_each(cur, &info->fs_devices->devices) { + device = list_entry(cur, struct btrfs_device, dev_list); + bdi = blk_get_backing_dev_info(device->bdev); + if (bdi->unplug_io_fn) { + bdi->unplug_io_fn(bdi, page); + } + } +} + +static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) +{ + bdi_init(bdi); + bdi->ra_pages = default_backing_dev_info.ra_pages; + bdi->state = 0; + bdi->capabilities = default_backing_dev_info.capabilities; + bdi->unplug_io_fn = btrfs_unplug_io_fn; + bdi->unplug_io_data = info; + bdi->congested_fn = btrfs_congested_fn; + bdi->congested_data = info; + return 0; +} + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices) { @@ -708,11 +757,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; fs_info->delalloc_bytes = 0; + setup_bdi(fs_info, &fs_info->bdi); fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; + fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; + extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -992,6 +1044,7 @@ int close_ctree(struct btrfs_root *root) #endif close_all_devices(fs_info); btrfs_mapping_tree_free(&fs_info->mapping_tree); + bdi_destroy(&fs_info->bdi); kfree(fs_info->extent_root); kfree(fs_info->tree_root); -- cgit v1.2.3 From 83041add611056e830e29fda913029e37e857239 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 26 Mar 2008 12:02:55 -0400 Subject: Btrfs: Use a higher default ra pages Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6a70e4916ab4..134b722587ec 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -694,7 +694,7 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { bdi_init(bdi); - bdi->ra_pages = default_backing_dev_info.ra_pages; + bdi->ra_pages = default_backing_dev_info.ra_pages * 4; bdi->state = 0; bdi->capabilities = default_backing_dev_info.capabilities; bdi->unplug_io_fn = btrfs_unplug_io_fn; -- cgit v1.2.3 From 2d2ae547979854d10b75d557b3abdb3eb7511bbc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 26 Mar 2008 16:24:23 -0400 Subject: Btrfs: Add leak debugging for extent_buffer and extent_state This also fixes one leak around the super block when failing to mount the FS. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 134b722587ec..1c5e097a4df2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -915,12 +915,14 @@ fail_tree_root: free_extent_buffer(tree_root->node); fail_sb_buffer: free_extent_buffer(fs_info->sb_buffer); + extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); fail_iput: iput(fs_info->btree_inode); fail: close_all_devices(fs_info); kfree(extent_root); kfree(tree_root); + bdi_destroy(&fs_info->bdi); kfree(fs_info); return ERR_PTR(err); } -- cgit v1.2.3 From 63b10fc4874a014e22bc4c64e3d92b71180661fe Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 1 Apr 2008 11:21:32 -0400 Subject: Reorder the flags field in struct btrfs_header and record a flag on writeout This allows detection of blocks that have already been written in the running transaction so they can be recowed instead of modified again. It is step one in trusting the transid field of the block pointers. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1c5e097a4df2..709e33d08bb5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -159,17 +159,19 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, from_this_trans = 1; /* FIXME, this is not good */ - if (from_this_trans == 0 && - memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { + if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { u32 val; u32 found = 0; memcpy(&found, result, BTRFS_CRC32_SIZE); read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE); + WARN_ON(1); printk("btrfs: %s checksum verify failed on %llu " - "wanted %X found %X from_this_trans %d\n", + "wanted %X found %X from_this_trans %d " + "level %d\n", root->fs_info->sb->s_id, - buf->start, val, found, from_this_trans); + buf->start, val, found, from_this_trans, + btrfs_header_level(buf)); return 1; } } else { @@ -220,6 +222,9 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) goto err; } found_level = btrfs_header_level(eb); + spin_lock(&root->fs_info->hash_lock); + btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + spin_unlock(&root->fs_info->hash_lock); csum_tree_block(root, eb, 0); err: free_extent_buffer(eb); -- cgit v1.2.3 From e58ca0203d32869a01540a293df40ddc480dc378 Mon Sep 17 00:00:00 2001 From: Yan Date: Tue, 1 Apr 2008 11:21:34 -0400 Subject: Fix btrfs_fill_super to return -EINVAL when no FS found Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 709e33d08bb5..5547607681f4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -728,9 +728,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); int ret; - int err = -EIO; + int err = -EINVAL; struct btrfs_super_block *disk_super; - if (!extent_root || !tree_root || !fs_info) { err = -ENOMEM; goto fail; -- cgit v1.2.3 From 0999df54f850fe1aba29b10d5c869493af107478 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 1 Apr 2008 13:48:14 -0400 Subject: Btrfs: Verify checksums on tree blocks found without read_tree_block Checksums were only verified by btrfs_read_tree_block, which meant the functions to probe the page cache for blocks were not validating checksums. Normally this is fine because the buffers will only be in cache if they have already been validated. But, there is a window while the buffer is being read from disk where it could be up to date in the cache but not yet verified. This patch makes sure all buffers go through checksum verification before they are used. This is safer, and it prevents modification of buffers before they go through the csum code. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 86 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 36 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5547607681f4..e40fb318ad99 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -46,27 +46,6 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops; -struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_buffer *eb; - eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, - bytenr, blocksize, GFP_NOFS); - return eb; -} - -struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - struct extent_buffer *eb; - - eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, - bytenr, blocksize, NULL, GFP_NOFS); - return eb; -} - struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, int create) @@ -380,36 +359,29 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) return 0; } -struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize) +int btrfs_verify_block_csum(struct btrfs_root *root, + struct extent_buffer *buf) { - struct extent_buffer *buf = NULL; - struct inode *btree_inode = root->fs_info->btree_inode; struct extent_io_tree *io_tree; u64 end; int ret; - io_tree = &BTRFS_I(btree_inode)->io_tree; - - buf = btrfs_find_create_tree_block(root, bytenr, blocksize); - if (!buf) - return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, - btree_get_extent); - + io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; if (buf->flags & EXTENT_CSUM) - return buf; + return 0; - end = buf->start + PAGE_CACHE_SIZE - 1; + end = min_t(u64, buf->len, PAGE_CACHE_SIZE); + end = buf->start + end - 1; if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; - return buf; + return 0; } lock_extent(io_tree, buf->start, end, GFP_NOFS); if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { buf->flags |= EXTENT_CSUM; + ret = 0; goto out_unlock; } @@ -419,6 +391,48 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, out_unlock: unlock_extent(io_tree, buf->start, end, GFP_NOFS); + return ret; +} + +struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, + u64 bytenr, u32 blocksize) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_buffer *eb; + eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, + bytenr, blocksize, GFP_NOFS); + return eb; +} + +struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, + u64 bytenr, u32 blocksize) +{ + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_buffer *eb; + + eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, + bytenr, blocksize, NULL, GFP_NOFS); + return eb; +} + + +struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, + u32 blocksize) +{ + struct extent_buffer *buf = NULL; + struct inode *btree_inode = root->fs_info->btree_inode; + struct extent_io_tree *io_tree; + int ret; + + io_tree = &BTRFS_I(btree_inode)->io_tree; + + buf = btrfs_find_create_tree_block(root, bytenr, blocksize); + if (!buf) + return NULL; + read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, + btree_get_extent); + + ret = btrfs_verify_block_csum(root, buf); return buf; } -- cgit v1.2.3 From 8790d502e4401a4a3a4175b83a3a47e8d595c771 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Apr 2008 16:29:03 -0400 Subject: Btrfs: Add support for mirroring across drives Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e40fb318ad99..ff75ad586767 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -735,7 +735,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_fs_info *fs_info = kmalloc(sizeof(*fs_info), + struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), GFP_NOFS); struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -744,6 +744,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, int ret; int err = -EINVAL; struct btrfs_super_block *disk_super; + if (!extent_root || !tree_root || !fs_info) { err = -ENOMEM; goto fail; @@ -756,11 +757,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); - memset(&fs_info->super_kobj, 0, sizeof(fs_info->super_kobj)); init_completion(&fs_info->kobj_unregister); sb_set_blocksize(sb, 4096); - fs_info->running_transaction = NULL; - fs_info->last_trans_committed = 0; fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; @@ -770,11 +768,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); fs_info->sb = sb; - fs_info->throttles = 0; - fs_info->mount_opt = 0; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; - fs_info->delalloc_bytes = 0; setup_bdi(fs_info, &fs_info->bdi); fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; @@ -802,12 +797,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, extent_io_tree_init(&fs_info->extent_ins, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; - fs_info->closing = 0; - fs_info->total_pinned = 0; - fs_info->last_alloc = 0; - fs_info->last_data_alloc = 0; - fs_info->extra_alloc_bits = 0; - fs_info->extra_data_alloc_bits = 0; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); @@ -923,6 +912,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; + if (btrfs_super_num_devices(disk_super) > 0) { + fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; + fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1; + fs_info->system_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; + } mutex_unlock(&fs_info->fs_mutex); return tree_root; -- cgit v1.2.3 From 611f0e00a27fe0e5a571194a12443ecdc99a43ef Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 3 Apr 2008 16:29:03 -0400 Subject: Btrfs: Add support for duplicate blocks on a single spindle Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ff75ad586767..42522232fde4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -913,9 +913,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->generation = btrfs_super_generation(disk_super) + 1; if (btrfs_super_num_devices(disk_super) > 0) { - fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; - fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1; - fs_info->system_alloc_profile = BTRFS_BLOCK_GROUP_RAID0; + fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID1; + fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP; + fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; } mutex_unlock(&fs_info->fs_mutex); return tree_root; -- cgit v1.2.3 From d18a2c447524751137a12cc8ccaf9d1e0b7fa1b3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 4 Apr 2008 15:40:00 -0400 Subject: Btrfs: Fix allocation profile init Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 42522232fde4..bc5bd5abb28a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -912,13 +912,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_read_block_groups(extent_root); fs_info->generation = btrfs_super_generation(disk_super) + 1; - if (btrfs_super_num_devices(disk_super) > 0) { - fs_info->data_alloc_profile = BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID1; - fs_info->metadata_alloc_profile = BTRFS_BLOCK_GROUP_RAID1 | - BTRFS_BLOCK_GROUP_DUP; - fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; - } + fs_info->data_alloc_profile = (u64)-1; + fs_info->metadata_alloc_profile = (u64)-1; + fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + mutex_unlock(&fs_info->fs_mutex); return tree_root; -- cgit v1.2.3 From 728131d8e40c2a47c59ca91a806299c4708029f9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Apr 2008 16:28:12 -0400 Subject: Btrfs: Add additional debugging for metadata checksum failures Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bc5bd5abb28a..e444b99e02da 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -283,6 +283,7 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) map = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(map, tree, page, gfp_flags); if (ret == 1) { + invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -376,7 +377,6 @@ int btrfs_verify_block_csum(struct btrfs_root *root, buf->flags |= EXTENT_CSUM; return 0; } - lock_extent(io_tree, buf->start, end, GFP_NOFS); if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { @@ -384,6 +384,7 @@ int btrfs_verify_block_csum(struct btrfs_root *root, ret = 0; goto out_unlock; } +WARN_ON(buf->flags & EXTENT_CSUM); ret = csum_tree_block(root, buf, 1); set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); -- cgit v1.2.3 From ce9adaa5a792c2099a83246265eb4055bc38b6b8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Apr 2008 16:28:12 -0400 Subject: Btrfs: Do metadata checksums for reads via a workqueue Before, metadata checksumming was done by the callers of read_tree_block, which would set EXTENT_CSUM bits in the extent tree to show that a given range of pages was already checksummed and didn't need to be verified again. But, those bits could go away via try_to_releasepage, and the end result was bogus checksum failures on pages that never left the cache. The new code validates checksums when the page is read. It is a little tricky because metadata blocks can span pages and a single read may end up going via multiple bios. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 258 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 224 insertions(+), 34 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e444b99e02da..82109204788c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -24,6 +24,7 @@ #include #include #include // for block_sync_page +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -45,6 +46,16 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) #endif static struct extent_io_ops btree_extent_io_ops; +static struct workqueue_struct *end_io_workqueue; + +struct end_io_wq { + struct bio *bio; + bio_end_io_t *end_io; + void *private; + struct btrfs_fs_info *info; + int error; + struct list_head list; +}; struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, @@ -219,11 +230,108 @@ static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) return 0; } +int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, + struct extent_state *state) +{ + struct extent_io_tree *tree; + u64 found_start; + int found_level; + unsigned long len; + struct extent_buffer *eb; + struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; + int ret; + + tree = &BTRFS_I(page->mapping->host)->io_tree; + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + if (!page->private) + goto out; + len = page->private >> 2; + if (len == 0) { + WARN_ON(1); + } + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); + read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1, + btree_get_extent); + btrfs_clear_buffer_defrag(eb); + found_start = btrfs_header_bytenr(eb); + if (found_start != start) { + printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", + start, found_start, len); + WARN_ON(1); + goto err; + } + if (eb->first_page != page) { + printk("bad first page %lu %lu\n", eb->first_page->index, + page->index); + WARN_ON(1); + goto err; + } + found_level = btrfs_header_level(eb); + + ret = csum_tree_block(root, eb, 1); + + end = min_t(u64, eb->len, PAGE_CACHE_SIZE); + end = eb->start + end - 1; + release_extent_buffer_tail_pages(eb); +err: + free_extent_buffer(eb); +out: + return 0; +} + +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) +static void end_workqueue_bio(struct bio *bio, int err) +#else +static int end_workqueue_bio(struct bio *bio, + unsigned int bytes_done, int err) +#endif +{ + struct end_io_wq *end_io_wq = bio->bi_private; + struct btrfs_fs_info *fs_info; + unsigned long flags; + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + if (bio->bi_size) + return 1; +#endif + + fs_info = end_io_wq->info; + spin_lock_irqsave(&fs_info->end_io_work_lock, flags); + end_io_wq->error = err; + list_add_tail(&end_io_wq->list, &fs_info->end_io_work_list); + spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); + queue_work(end_io_workqueue, &fs_info->end_io_work); + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + return 0; +#endif +} + static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) { struct btrfs_root *root = BTRFS_I(inode)->root; + struct end_io_wq *end_io_wq; u64 offset; offset = bio->bi_sector << 9; + + if (rw & (1 << BIO_RW)) { + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); + } + + end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); + if (!end_io_wq) + return -ENOMEM; + + end_io_wq->private = bio->bi_private; + end_io_wq->end_io = bio->bi_end_io; + end_io_wq->info = root->fs_info; + end_io_wq->error = 0; + end_io_wq->bio = bio; + + bio->bi_private = end_io_wq; + bio->bi_end_io = end_workqueue_bio; + if (offset == BTRFS_SUPER_INFO_OFFSET) { bio->bi_bdev = root->fs_info->sb->s_bdev; submit_bio(rw, bio); @@ -363,36 +471,7 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) int btrfs_verify_block_csum(struct btrfs_root *root, struct extent_buffer *buf) { - struct extent_io_tree *io_tree; - u64 end; - int ret; - - io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; - if (buf->flags & EXTENT_CSUM) - return 0; - - end = min_t(u64, buf->len, PAGE_CACHE_SIZE); - end = buf->start + end - 1; - if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { - buf->flags |= EXTENT_CSUM; - return 0; - } - lock_extent(io_tree, buf->start, end, GFP_NOFS); - - if (test_range_bit(io_tree, buf->start, end, EXTENT_CSUM, 1)) { - buf->flags |= EXTENT_CSUM; - ret = 0; - goto out_unlock; - } -WARN_ON(buf->flags & EXTENT_CSUM); - - ret = csum_tree_block(root, buf, 1); - set_extent_bits(io_tree, buf->start, end, EXTENT_CSUM, GFP_NOFS); - buf->flags |= EXTENT_CSUM; - -out_unlock: - unlock_extent(io_tree, buf->start, end, GFP_NOFS); - return ret; + return btrfs_buffer_uptodate(buf); } struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, @@ -430,11 +509,15 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) return NULL; - read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, - btree_get_extent); - ret = btrfs_verify_block_csum(root, buf); + ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, + 1, btree_get_extent); + + if (ret == 0) { + buf->flags |= EXTENT_UPTODATE; + } return buf; + } int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -724,6 +807,99 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) return 0; } +static int bio_ready_for_csum(struct bio *bio) +{ + u64 length = 0; + u64 buf_len = 0; + u64 start = 0; + struct page *page; + struct extent_io_tree *io_tree = NULL; + struct btrfs_fs_info *info = NULL; + struct bio_vec *bvec; + int i; + int ret; + + bio_for_each_segment(bvec, bio, i) { + page = bvec->bv_page; + if (page->private == EXTENT_PAGE_PRIVATE) { + length += bvec->bv_len; + continue; + } + if (!page->private) { + length += bvec->bv_len; + continue; + } + length = bvec->bv_len; + buf_len = page->private >> 2; + start = page_offset(page) + bvec->bv_offset; + io_tree = &BTRFS_I(page->mapping->host)->io_tree; + info = BTRFS_I(page->mapping->host)->root->fs_info; + } + /* are we fully contained in this bio? */ + if (buf_len <= length) + return 1; + + ret = extent_range_uptodate(io_tree, start + length, + start + buf_len - 1); + if (ret == 1) + return ret; + return ret; +} + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +void btrfs_end_io_csum(void *p) +#else +void btrfs_end_io_csum(struct work_struct *work) +#endif +{ +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct btrfs_fs_info *fs_info = p; +#else + struct btrfs_fs_info *fs_info = container_of(work, + struct btrfs_fs_info, + end_io_work); +#endif + unsigned long flags; + struct end_io_wq *end_io_wq; + struct bio *bio; + struct list_head *next; + int error; + int was_empty; + + while(1) { + spin_lock_irqsave(&fs_info->end_io_work_lock, flags); + if (list_empty(&fs_info->end_io_work_list)) { + spin_unlock_irqrestore(&fs_info->end_io_work_lock, + flags); + return; + } + next = fs_info->end_io_work_list.next; + list_del(next); + spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); + + end_io_wq = list_entry(next, struct end_io_wq, list); + + bio = end_io_wq->bio; + if (!bio_ready_for_csum(bio)) { + spin_lock_irqsave(&fs_info->end_io_work_lock, flags); + was_empty = list_empty(&fs_info->end_io_work_list); + list_add_tail(&end_io_wq->list, + &fs_info->end_io_work_list); + spin_unlock_irqrestore(&fs_info->end_io_work_lock, + flags); + if (was_empty) + return; + continue; + } + error = end_io_wq->error; + bio->bi_private = end_io_wq->private; + bio->bi_end_io = end_io_wq->end_io; + kfree(end_io_wq); + bio_endio(bio, error); + } +} + + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices) { @@ -750,11 +926,16 @@ struct btrfs_root *open_ctree(struct super_block *sb, err = -ENOMEM; goto fail; } + end_io_workqueue = create_workqueue("btrfs-end-io"); + BUG_ON(!end_io_workqueue); + INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); + INIT_LIST_HEAD(&fs_info->end_io_work_list); spin_lock_init(&fs_info->hash_lock); + spin_lock_init(&fs_info->end_io_work_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); @@ -799,6 +980,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; + INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else @@ -1044,6 +1226,8 @@ int close_ctree(struct btrfs_root *root) extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + flush_workqueue(end_io_workqueue); + destroy_workqueue(end_io_workqueue); iput(fs_info->btree_inode); #if 0 @@ -1171,12 +1355,18 @@ int btrfs_read_buffer(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; struct inode *btree_inode = root->fs_info->btree_inode; - return read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, + int ret; + ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, 1, btree_get_extent); + if (ret == 0) { + buf->flags |= EXTENT_UPTODATE; + } + return ret; } static struct extent_io_ops btree_extent_io_ops = { .writepage_io_hook = btree_writepage_io_hook, + .readpage_end_io_hook = btree_readpage_end_io_hook, .submit_bio_hook = btree_submit_bio_hook, /* note we're sharing with inode.c for the merge bio hook */ .merge_bio_hook = btrfs_merge_bio_hook, -- cgit v1.2.3 From 22c599485b1fdd95e4476a4752596a6cf6c6629a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Apr 2008 16:28:12 -0400 Subject: Btrfs: Handle data block end_io through the async work queue Before it was done by the bio end_io routine, the work queue code is able to scale much better with faster IO subsystems. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 82109204788c..94ff87d0eae4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -54,6 +54,7 @@ struct end_io_wq { void *private; struct btrfs_fs_info *info; int error; + int metadata; struct list_head list; }; @@ -308,29 +309,40 @@ static int end_workqueue_bio(struct bio *bio, #endif } -static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) +int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, + int metadata) { - struct btrfs_root *root = BTRFS_I(inode)->root; struct end_io_wq *end_io_wq; - u64 offset; - offset = bio->bi_sector << 9; - - if (rw & (1 << BIO_RW)) { - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); - } - end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); if (!end_io_wq) return -ENOMEM; end_io_wq->private = bio->bi_private; end_io_wq->end_io = bio->bi_end_io; - end_io_wq->info = root->fs_info; + end_io_wq->info = info; end_io_wq->error = 0; end_io_wq->bio = bio; + end_io_wq->metadata = metadata; bio->bi_private = end_io_wq; bio->bi_end_io = end_workqueue_bio; + return 0; +} + +static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + u64 offset; + int ret; + + offset = bio->bi_sector << 9; + + if (rw & (1 << BIO_RW)) { + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); + } + + ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); + BUG_ON(ret); if (offset == BTRFS_SUPER_INFO_OFFSET) { bio->bi_bdev = root->fs_info->sb->s_bdev; @@ -880,7 +892,7 @@ void btrfs_end_io_csum(struct work_struct *work) end_io_wq = list_entry(next, struct end_io_wq, list); bio = end_io_wq->bio; - if (!bio_ready_for_csum(bio)) { + if (end_io_wq->metadata && !bio_ready_for_csum(bio)) { spin_lock_irqsave(&fs_info->end_io_work_lock, flags); was_empty = list_empty(&fs_info->end_io_work_list); list_add_tail(&end_io_wq->list, -- cgit v1.2.3 From f188591e987e21b6f7f8864c66a02858b95b530e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 9 Apr 2008 16:28:12 -0400 Subject: Btrfs: Retry metadata reads in the face of checksum failures Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 72 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 94ff87d0eae4..59bdf0474be3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -156,7 +156,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, memcpy(&found, result, BTRFS_CRC32_SIZE); read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE); - WARN_ON(1); printk("btrfs: %s checksum verify failed on %llu " "wanted %X found %X from_this_trans %d " "level %d\n", @@ -171,6 +170,40 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, return 0; } +static int btree_read_extent_buffer_pages(struct btrfs_root *root, + struct extent_buffer *eb, + u64 start) +{ + struct extent_io_tree *io_tree; + int ret; + int num_copies = 0; + int mirror_num = 0; + + io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; + while (1) { + ret = read_extent_buffer_pages(io_tree, eb, start, 1, + btree_get_extent, mirror_num); + if (!ret) { + if (mirror_num) +printk("good read %Lu mirror %d total %d\n", eb->start, mirror_num, num_copies); + return ret; + } + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, + eb->start, eb->len); +printk("failed to read %Lu mirror %d total %d\n", eb->start, mirror_num, num_copies); + if (num_copies == 1) { +printk("reading %Lu failed only one copy\n", eb->start); + return ret; + } + mirror_num++; + if (mirror_num > num_copies) { +printk("bailing at mirror %d of %d\n", mirror_num, num_copies); + return ret; + } + } +printk("read extent buffer page last\n"); + return -EIO; +} int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { @@ -180,6 +213,8 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) int found_level; unsigned long len; struct extent_buffer *eb; + int ret; + tree = &BTRFS_I(page->mapping->host)->io_tree; if (page->private == EXTENT_PAGE_PRIVATE) @@ -191,8 +226,8 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) WARN_ON(1); } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1, - btree_get_extent); + ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE); + BUG_ON(ret); btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { @@ -240,7 +275,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, unsigned long len; struct extent_buffer *eb; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; - int ret; + int ret = 0; tree = &BTRFS_I(page->mapping->host)->io_tree; if (page->private == EXTENT_PAGE_PRIVATE) @@ -252,25 +287,26 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, WARN_ON(1); } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1, - btree_get_extent); + btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { - printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", - start, found_start, len); - WARN_ON(1); +printk("bad start on %Lu found %Lu\n", eb->start, found_start); + ret = -EIO; goto err; } if (eb->first_page != page) { printk("bad first page %lu %lu\n", eb->first_page->index, page->index); WARN_ON(1); + ret = -EIO; goto err; } found_level = btrfs_header_level(eb); ret = csum_tree_block(root, eb, 1); + if (ret) + ret = -EIO; end = min_t(u64, eb->len, PAGE_CACHE_SIZE); end = eb->start + end - 1; @@ -278,7 +314,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, err: free_extent_buffer(eb); out: - return 0; + return ret; } #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) @@ -329,7 +365,8 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } -static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) +static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, + int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 offset; @@ -338,7 +375,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) offset = bio->bi_sector << 9; if (rw & (1 << BIO_RW)) { - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); } ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); @@ -349,7 +386,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio) submit_bio(rw, bio); return 0; } - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio); + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); } static int btree_writepage(struct page *page, struct writeback_control *wbc) @@ -459,7 +496,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) if (!buf) return 0; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, 0, 0, btree_get_extent); + buf, 0, 0, btree_get_extent, 0); free_extent_buffer(buf); return ret; } @@ -522,8 +559,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, if (!buf) return NULL; - ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, - 1, btree_get_extent); + ret = btree_read_extent_buffer_pages(root, buf, 0); if (ret == 0) { buf->flags |= EXTENT_UPTODATE; @@ -1366,10 +1402,8 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) int btrfs_read_buffer(struct extent_buffer *buf) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; int ret; - ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, 0, 1, btree_get_extent); + ret = btree_read_extent_buffer_pages(root, buf, 0); if (ret == 0) { buf->flags |= EXTENT_UPTODATE; } -- cgit v1.2.3 From f29844623de29a12358d7fba35d0959465b64adf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 10 Apr 2008 16:19:33 -0400 Subject: Btrfs: Write out all super blocks on commit, and bring back proper barrier support Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 113 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 59bdf0474be3..cf1de75f088a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -382,7 +382,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, BUG_ON(ret); if (offset == BTRFS_SUPER_INFO_OFFSET) { - bio->bi_bdev = root->fs_info->sb->s_bdev; + bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; submit_bio(rw, bio); return 0; } @@ -988,7 +988,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->new_trans_lock); init_completion(&fs_info->kobj_unregister); - sb_set_blocksize(sb, 4096); + sb_set_blocksize(sb, BTRFS_SUPER_INFO_SIZE); fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; @@ -1169,14 +1169,121 @@ fail: return ERR_PTR(err); } +static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) +{ + char b[BDEVNAME_SIZE]; + + if (uptodate) { + set_buffer_uptodate(bh); + } else { + if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { + printk(KERN_WARNING "lost page write due to " + "I/O error on %s\n", + bdevname(bh->b_bdev, b)); + } + set_buffer_write_io_error(bh); + clear_buffer_uptodate(bh); + } + unlock_buffer(bh); + put_bh(bh); +} + +int write_all_supers(struct btrfs_root *root) +{ + struct list_head *cur; + struct list_head *head = &root->fs_info->fs_devices->devices; + struct btrfs_device *dev; + struct extent_buffer *sb; + struct btrfs_dev_item *dev_item; + struct buffer_head *bh; + int ret; + int do_barriers; + + do_barriers = !btrfs_test_opt(root, NOBARRIER); + + sb = root->fs_info->sb_buffer; + dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, + dev_item); + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + btrfs_set_device_type(sb, dev_item, dev->type); + btrfs_set_device_id(sb, dev_item, dev->devid); + btrfs_set_device_total_bytes(sb, dev_item, dev->total_bytes); + btrfs_set_device_bytes_used(sb, dev_item, dev->bytes_used); + btrfs_set_device_io_align(sb, dev_item, dev->io_align); + btrfs_set_device_io_width(sb, dev_item, dev->io_width); + btrfs_set_device_sector_size(sb, dev_item, dev->sector_size); + write_extent_buffer(sb, dev->uuid, + (unsigned long)btrfs_device_uuid(dev_item), + BTRFS_DEV_UUID_SIZE); + + btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN); + csum_tree_block(root, sb, 0); + + bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / + root->fs_info->sb->s_blocksize, + BTRFS_SUPER_INFO_SIZE); + + read_extent_buffer(sb, bh->b_data, 0, BTRFS_SUPER_INFO_SIZE); + dev->pending_io = bh; + + get_bh(bh); + set_buffer_uptodate(bh); + lock_buffer(bh); + bh->b_end_io = btrfs_end_buffer_write_sync; + + if (do_barriers && dev->barriers) { + ret = submit_bh(WRITE_BARRIER, bh); + if (ret == -EOPNOTSUPP) { + printk("btrfs: disabling barriers on dev %s\n", + dev->name); + set_buffer_uptodate(bh); + dev->barriers = 0; + get_bh(bh); + lock_buffer(bh); + ret = submit_bh(WRITE, bh); + } + } else { + ret = submit_bh(WRITE, bh); + } + BUG_ON(ret); + } + + list_for_each(cur, head) { + dev = list_entry(cur, struct btrfs_device, dev_list); + BUG_ON(!dev->pending_io); + bh = dev->pending_io; + wait_on_buffer(bh); + if (!buffer_uptodate(dev->pending_io)) { + if (do_barriers && dev->barriers) { + printk("btrfs: disabling barriers on dev %s\n", + dev->name); + set_buffer_uptodate(bh); + get_bh(bh); + lock_buffer(bh); + dev->barriers = 0; + ret = submit_bh(WRITE, bh); + BUG_ON(ret); + wait_on_buffer(bh); + BUG_ON(!buffer_uptodate(bh)); + } else { + BUG(); + } + + } + dev->pending_io = NULL; + brelse(bh); + } + return 0; +} + int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; - struct extent_buffer *super = root->fs_info->sb_buffer; - struct inode *btree_inode = root->fs_info->btree_inode; - struct super_block *sb = root->fs_info->sb; + ret = write_all_supers(root); +#if 0 if (!btrfs_test_opt(root, NOBARRIER)) blkdev_issue_flush(sb->s_bdev, NULL); set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); @@ -1184,6 +1291,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root super->start, super->len); if (!btrfs_test_opt(root, NOBARRIER)) blkdev_issue_flush(sb->s_bdev, NULL); +#endif return ret; } -- cgit v1.2.3 From 3dd39914bc0737d055c780d29c20a1d117819cdb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 11 Apr 2008 10:51:07 -0400 Subject: Btrfs: Add extra checks to avoid removing extent_state from pages we can't free Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index cf1de75f088a..e22960e23501 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -436,6 +436,12 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) struct extent_map_tree *map; int ret; + if (page_count(page) > 3) { + /* once for page->private, once for the caller, once + * once for the page cache + */ + return 0; + } tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_mapping(map, tree, page, gfp_flags); -- cgit v1.2.3 From a5eb62e345fc1818d0d8b6181463200a9e8dfe39 Mon Sep 17 00:00:00 2001 From: Miguel Date: Fri, 11 Apr 2008 15:45:51 -0400 Subject: Btrfs: Endianess bug fix for v0.13 with kernels Fix for a endianess BUG when using btrfs v0.13 with kernels older than 2.6.23 Problem: Has of v0.13, btrfs-progs is using crc32c.c equivalent to the one found on linux-2.6.23/lib/libcrc32c.c Since crc32c_le() changed in linux-2.6.23, when running btrfs v0.13 with older kernels we have a missmatch between the versions of crc32c_le() from btrfs-progs and libcrc32c in the kernel. This missmatch causes a bug when using btrfs on big endian machines. Solution: btrfs_crc32c() macro that when compiling for kernels older than 2.6.23, does endianess conversion to parameters and return value of crc32c(). This endianess conversion nullifies the differences in implementation of crc32c_le(). If kernel 2.6.23 or better, it calls crc32c(). Signed-off-by: Miguel Sousa Filipe --- Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e22960e23501..0bfcc31d94d7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -18,13 +18,13 @@ #include #include -#include #include #include #include #include #include // for block_sync_page #include +#include "crc32c.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -100,7 +100,7 @@ out: u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len) { - return crc32c(seed, data, len); + return btrfs_crc32c(seed, data, len); } void btrfs_csum_final(u32 crc, char *result) -- cgit v1.2.3 From 73f61b2a6459df982cb4faf0e5cf5ac8b153aaff Mon Sep 17 00:00:00 2001 From: Miguel Date: Fri, 11 Apr 2008 15:50:59 -0400 Subject: Btrfs: bio_endio support for linux 2.6.23 and older. bio_endio() changed prototype on linux 2.6.24, support older kernels using the older prototype. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0bfcc31d94d7..aebe8c21ec80 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -949,11 +949,14 @@ void btrfs_end_io_csum(struct work_struct *work) bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; kfree(end_io_wq); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + bio_endio(bio, bio->bi_size, err); +#else bio_endio(bio, error); +#endif } } - struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices) { -- cgit v1.2.3 From b248a4152956cbae1b23f4c70ef5b51d6ea2ecfb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 14 Apr 2008 09:48:18 -0400 Subject: Btrfs: A few updates for 2.6.18 and versions older than 2.6.25 This includes fixing a missing spinlock init call that caused oops on mount for most kernels other than 2.6.25. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index aebe8c21ec80..9c94dddde704 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -850,7 +850,9 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) bdi_init(bdi); +#endif bdi->ra_pages = default_backing_dev_info.ra_pages * 4; bdi->state = 0; bdi->capabilities = default_backing_dev_info.capabilities; @@ -950,7 +952,7 @@ void btrfs_end_io_csum(struct work_struct *work) bio->bi_end_io = end_io_wq->end_io; kfree(end_io_wq); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - bio_endio(bio, bio->bi_size, err); + bio_endio(bio, bio->bi_size, error); #else bio_endio(bio, error); #endif @@ -1037,10 +1039,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; - INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum, fs_info); INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else + INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum); INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); #endif BTRFS_I(fs_info->btree_inode)->root = tree_root; @@ -1173,7 +1176,9 @@ fail: close_all_devices(fs_info); kfree(extent_root); kfree(tree_root); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); +#endif kfree(fs_info); return ERR_PTR(err); } @@ -1407,7 +1412,10 @@ int close_ctree(struct btrfs_root *root) #endif close_all_devices(fs_info); btrfs_mapping_tree_free(&fs_info->mapping_tree); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); +#endif kfree(fs_info->extent_root); kfree(fs_info->tree_root); -- cgit v1.2.3 From e17cade25ff8074101d653557a78df09c16ca276 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 15 Apr 2008 15:41:47 -0400 Subject: Btrfs: Add chunk uuids and update multi-device back references Block headers now store the chunk tree uuid Chunk items records the device uuid for each stripes Device extent items record better back refs to the chunk tree Block groups record better back refs to the chunk tree The chunk tree format has also changed. The objectid of BTRFS_CHUNK_ITEM_KEY used to be the logical offset of the chunk. Now it is a chunk tree id, with the logical offset being stored in the offset field of the key. This allows a single chunk tree to record multiple logical address spaces, upping the number of bytes indexed by a chunk tree from 2^64 to 2^128. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9c94dddde704..79c284c87286 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1125,6 +1125,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, blocksize); BUG_ON(!chunk_root->node); + read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, + (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), + BTRFS_UUID_SIZE); + ret = btrfs_read_chunk_tree(chunk_root); BUG_ON(ret); @@ -1229,7 +1233,7 @@ int write_all_supers(struct btrfs_root *root) btrfs_set_device_sector_size(sb, dev_item, dev->sector_size); write_extent_buffer(sb, dev->uuid, (unsigned long)btrfs_device_uuid(dev_item), - BTRFS_DEV_UUID_SIZE); + BTRFS_UUID_SIZE); btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN); csum_tree_block(root, sb, 0); -- cgit v1.2.3 From 44b8bd7edda4f63de180d0f7325c9fb704b3806b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Apr 2008 11:14:51 -0400 Subject: Btrfs: Create a work queue for bio writes This allows checksumming to happen in parallel among many cpus, and keeps us from bogging down pdflush with the checksumming code. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 79c284c87286..9e41ea93ebce 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -47,6 +47,7 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops; static struct workqueue_struct *end_io_workqueue; +static struct workqueue_struct *async_submit_workqueue; struct end_io_wq { struct bio *bio; @@ -58,6 +59,15 @@ struct end_io_wq { struct list_head list; }; +struct async_submit_bio { + struct inode *inode; + struct bio *bio; + struct list_head list; + extent_submit_bio_hook_t *submit_bio_hook; + int rw; + int mirror_num; +}; + struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, int create) @@ -365,7 +375,31 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } -static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, +int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, + int rw, struct bio *bio, int mirror_num, + extent_submit_bio_hook_t *submit_bio_hook) +{ + struct async_submit_bio *async; + + async = kmalloc(sizeof(*async), GFP_NOFS); + if (!async) + return -ENOMEM; + + async->inode = inode; + async->rw = rw; + async->bio = bio; + async->mirror_num = mirror_num; + async->submit_bio_hook = submit_bio_hook; + + spin_lock(&fs_info->async_submit_work_lock); + list_add_tail(&async->list, &fs_info->async_submit_work_list); + spin_unlock(&fs_info->async_submit_work_lock); + + queue_work(async_submit_workqueue, &fs_info->async_submit_work); + return 0; +} + +static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; @@ -389,6 +423,17 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); } +static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, + int mirror_num) +{ + if (!(rw & (1 << BIO_RW))) { + return __btree_submit_bio_hook(inode, rw, bio, mirror_num); + } + return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, + inode, rw, bio, mirror_num, + __btree_submit_bio_hook); +} + static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; @@ -903,9 +948,9 @@ static int bio_ready_for_csum(struct bio *bio) } #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -void btrfs_end_io_csum(void *p) +static void btrfs_end_io_csum(void *p) #else -void btrfs_end_io_csum(struct work_struct *work) +static void btrfs_end_io_csum(struct work_struct *work) #endif { #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) @@ -959,6 +1004,39 @@ void btrfs_end_io_csum(struct work_struct *work) } } +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) +static void btrfs_async_submit_work(void *p) +#else +static void btrfs_async_submit_work(struct work_struct *work) +#endif +{ +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) + struct btrfs_fs_info *fs_info = p; +#else + struct btrfs_fs_info *fs_info = container_of(work, + struct btrfs_fs_info, + async_submit_work); +#endif + struct async_submit_bio *async; + struct list_head *next; + + while(1) { + spin_lock(&fs_info->async_submit_work_lock); + if (list_empty(&fs_info->async_submit_work_list)) { + spin_unlock(&fs_info->async_submit_work_lock); + return; + } + next = fs_info->async_submit_work_list.next; + list_del(next); + spin_unlock(&fs_info->async_submit_work_lock); + + async = list_entry(next, struct async_submit_bio, list); + async->submit_bio_hook(async->inode, async->rw, async->bio, + async->mirror_num); + kfree(async); + } +} + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices) { @@ -987,14 +1065,17 @@ struct btrfs_root *open_ctree(struct super_block *sb, } end_io_workqueue = create_workqueue("btrfs-end-io"); BUG_ON(!end_io_workqueue); + async_submit_workqueue = create_workqueue("btrfs-async-submit"); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); INIT_LIST_HEAD(&fs_info->end_io_work_list); + INIT_LIST_HEAD(&fs_info->async_submit_work_list); spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->end_io_work_lock); + spin_lock_init(&fs_info->async_submit_work_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); @@ -1041,9 +1122,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum, fs_info); + INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work, + fs_info); INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum); + INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work); INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); #endif BTRFS_I(fs_info->btree_inode)->root = tree_root; @@ -1403,6 +1487,9 @@ int close_ctree(struct btrfs_root *root) flush_workqueue(end_io_workqueue); destroy_workqueue(end_io_workqueue); + flush_workqueue(async_submit_workqueue); + destroy_workqueue(async_submit_workqueue); + iput(fs_info->btree_inode); #if 0 while(!list_empty(&fs_info->hashers)) { -- cgit v1.2.3 From 7b859fe7cd383fa5e7e379884a748680d0af7bcc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 16 Apr 2008 13:06:16 -0400 Subject: Btrfs: Only do async bio submission for pdflush Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9e41ea93ebce..76ee7a4ae00c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -381,6 +381,13 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, { struct async_submit_bio *async; + /* + * inline writerback should stay inline, only hop to the async + * queue if we're pdflush + */ + if (!current_is_pdflush()) + return submit_bio_hook(inode, rw, bio, mirror_num); + async = kmalloc(sizeof(*async), GFP_NOFS); if (!async) return -ENOMEM; -- cgit v1.2.3 From 7b13b7b119c932a5eca486db4113f4c1fe3b97a8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Apr 2008 10:29:50 -0400 Subject: Btrfs: Don't drop extent_map cache during releasepage on the btree inode The btree inode should only have a single extent_map in the cache, it doesn't make sense to ever drop it. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 76ee7a4ae00c..2de2b00afebc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -76,13 +76,12 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, struct extent_map *em; int ret; -again: spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); spin_unlock(&em_tree->lock); - if (em) { + if (em) goto out; - } + em = alloc_extent_map(GFP_NOFS); if (!em) { em = ERR_PTR(-ENOMEM); @@ -95,15 +94,21 @@ again: spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); - spin_unlock(&em_tree->lock); - if (ret == -EEXIST) { free_extent_map(em); - em = NULL; - goto again; + em = lookup_extent_mapping(em_tree, start, len); + if (em) + ret = 0; + else + ret = -EIO; } else if (ret) { - em = ERR_PTR(ret); + free_extent_map(em); + em = NULL; } + spin_unlock(&em_tree->lock); + + if (ret) + em = ERR_PTR(ret); out: return em; } @@ -496,7 +501,7 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) } tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; - ret = try_release_extent_mapping(map, tree, page, gfp_flags); + ret = try_release_extent_state(map, tree, page, gfp_flags); if (ret == 1) { invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); -- cgit v1.2.3 From 0afbaf8c8216a0de64b55c2d455de2b6c9dde5eb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Apr 2008 14:17:20 -0400 Subject: Btrfs: Set the btree inode i_size to OFFSET_MAX Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2de2b00afebc..9d648f2f41a5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -88,19 +88,35 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, goto out; } em->start = 0; - em->len = i_size_read(inode); + em->len = (u64)-1; em->block_start = 0; em->bdev = inode->i_sb->s_bdev; spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); if (ret == -EEXIST) { + u64 failed_start = em->start; + u64 failed_len = em->len; + + printk("failed to insert %Lu %Lu -> %Lu into tree\n", + em->start, em->len, em->block_start); free_extent_map(em); em = lookup_extent_mapping(em_tree, start, len); - if (em) + if (em) { + printk("after failing, found %Lu %Lu %Lu\n", + em->start, em->len, em->block_start); ret = 0; - else + } else { + em = lookup_extent_mapping(em_tree, failed_start, + failed_len); + if (em) { + printk("double failure lookup gives us " + "%Lu %Lu -> %Lu\n", em->start, + em->len, em->block_start); + free_extent_map(em); + } ret = -EIO; + } } else if (ret) { free_extent_map(em); em = NULL; @@ -1108,7 +1124,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; - fs_info->btree_inode->i_size = sb->s_bdev->bd_inode->i_size; + + /* + * we set the i_size on the btree inode to the max possible int. + * the real end of the address space is determined by all of + * the devices in the system + */ + fs_info->btree_inode->i_size = OFFSET_MAX; fs_info->btree_inode->i_mapping->a_ops = &btree_aops; fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi; @@ -1196,9 +1218,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root->stripesize = stripesize; sb_set_blocksize(sb, sectorsize); - i_size_write(fs_info->btree_inode, - btrfs_super_total_bytes(disk_super)); - if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { printk("btrfs: valid FS not found on %s\n", sb->s_id); -- cgit v1.2.3 From 9ad6b7bc2e00ba02f915cffd5b6bcd6564bb2c75 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Apr 2008 16:11:30 -0400 Subject: Force page->private removal in btrfs_invalidatepage btrfs_invalidatepage is not allowed to leave pages around on the lru. Any such pages will trigger an oops later on because the VM will see page->private and assume it is a buffer head. This also forces extra flushes of the async work queues before dropping all the pages on the btree inode during unmount. Left over items on the work queues are one possible cause of busy state ranges during truncate_inode_pages. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9d648f2f41a5..b479cdfe3ee8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -533,6 +533,12 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) tree = &BTRFS_I(page->mapping->host)->io_tree; extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); + if (PagePrivate(page)) { + printk("2invalidate page cleaning up after releasepage\n"); + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); + } } #if 0 @@ -1484,6 +1490,8 @@ int close_ctree(struct btrfs_root *root) write_ctree_super(NULL, root); mutex_unlock(&fs_info->fs_mutex); + btrfs_transaction_flush_work(root); + if (fs_info->delalloc_bytes) { printk("btrfs: at unmount delalloc count %Lu\n", fs_info->delalloc_bytes); @@ -1514,7 +1522,11 @@ int close_ctree(struct btrfs_root *root) extent_io_tree_empty_lru(&fs_info->extent_ins); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); + flush_workqueue(end_io_workqueue); + flush_workqueue(async_submit_workqueue); + truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + flush_workqueue(end_io_workqueue); destroy_workqueue(end_io_workqueue); -- cgit v1.2.3 From 4575c9cceeca2f51c50536850e15e1dc5187f3d9 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 18 Apr 2008 16:13:31 -0400 Subject: Btrfs: Scale the bdi ra_pages by the number of devices in the FS Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b479cdfe3ee8..46188ee16623 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -932,7 +932,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) bdi_init(bdi); #endif - bdi->ra_pages = default_backing_dev_info.ra_pages * 4; + bdi->ra_pages = default_backing_dev_info.ra_pages; bdi->state = 0; bdi->capabilities = default_backing_dev_info.capabilities; bdi->unplug_io_fn = btrfs_unplug_io_fn; @@ -1214,6 +1214,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, (unsigned long long)fs_devices->num_devices); goto fail_sb_buffer; } + fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); + nodesize = btrfs_super_nodesize(disk_super); leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); -- cgit v1.2.3 From 4ef64eae288a3644d1f8b748eb075426701e96d1 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 21 Apr 2008 08:52:50 -0400 Subject: Btrfs: Remove debugging statements from the invalidatepage calls Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 46188ee16623..c829612c797e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -534,7 +534,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); if (PagePrivate(page)) { - printk("2invalidate page cleaning up after releasepage\n"); + invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); -- cgit v1.2.3 From f2d8d74d7874f8f81222363cd6459a365796e35a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 21 Apr 2008 10:03:05 -0400 Subject: Btrfs: Make an unplug function that doesn't unplug every spindle Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c829612c797e..7f5aca35494d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -913,18 +913,22 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { - struct list_head *cur; - struct btrfs_device *device; - struct btrfs_fs_info *info; + struct inode *inode = page->mapping->host; + struct extent_map_tree *em_tree; + struct extent_map *em; + u64 offset = page_offset(page); - info = (struct btrfs_fs_info *)bdi->unplug_io_data; - list_for_each(cur, &info->fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); - bdi = blk_get_backing_dev_info(device->bdev); - if (bdi->unplug_io_fn) { - bdi->unplug_io_fn(bdi, page); - } - } + em_tree = &BTRFS_I(inode)->extent_tree; + spin_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); + spin_unlock(&em_tree->lock); + if (!em) + return; + + offset = offset - em->start; + btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree, + em->block_start + offset, page); + free_extent_map(em); } static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) -- cgit v1.2.3 From 38b669880df44cba24ab1b01715b87318d469217 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Apr 2008 09:22:11 -0400 Subject: Deal with page == NULL in the btrfs_unplug_io_fn Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7f5aca35494d..19c258d2c7e2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -911,12 +911,40 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) return ret; } +/* + * this unplugs every device on the box, and it is only used when page + * is null + */ +static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) +{ + struct list_head *cur; + struct btrfs_device *device; + struct btrfs_fs_info *info; + + info = (struct btrfs_fs_info *)bdi->unplug_io_data; + list_for_each(cur, &info->fs_devices->devices) { + device = list_entry(cur, struct btrfs_device, dev_list); + bdi = blk_get_backing_dev_info(device->bdev); + if (bdi->unplug_io_fn) { + bdi->unplug_io_fn(bdi, page); + } + } +} + void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode; struct extent_map_tree *em_tree; struct extent_map *em; - u64 offset = page_offset(page); + u64 offset; + + if (!page) { + __unplug_io_fn(bdi, page); + return; + } + + inode = page->mapping->host; + offset = page_offset(page); em_tree = &BTRFS_I(inode)->extent_tree; spin_lock(&em_tree->lock); -- cgit v1.2.3 From bcbfce8abd5f8d3f84eab60a9df1ec147f81c34f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Apr 2008 13:26:47 -0400 Subject: Btrfs: Fix the unplug_io_fn to grab a consistent copy of page->mapping Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 19c258d2c7e2..1281c393c7e6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -936,14 +936,25 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) struct inode *inode; struct extent_map_tree *em_tree; struct extent_map *em; + struct address_space *mapping; u64 offset; + /* the generic O_DIRECT read code does this */ if (!page) { __unplug_io_fn(bdi, page); return; } - inode = page->mapping->host; + /* + * page->mapping may change at any time. Get a consistent copy + * and use that for everything below + */ + smp_mb(); + mapping = page->mapping; + if (!mapping) + return; + + inode = mapping->host; offset = page_offset(page); em_tree = &BTRFS_I(inode)->extent_tree; -- cgit v1.2.3 From 84eed90fac1b927a2657ff3bb7a0f18b9cb688f7 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 25 Apr 2008 09:04:37 -0400 Subject: Btrfs: Add failure handling for read_sys_array Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1281c393c7e6..71838264ca6b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1278,7 +1278,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_lock(&fs_info->fs_mutex); ret = btrfs_read_sys_array(tree_root); - BUG_ON(ret); + if (ret) { + printk("btrfs: failed to read the system array on %s\n", + sb->s_id); + goto fail_sys_array; + } blocksize = btrfs_level_size(tree_root, btrfs_super_chunk_root_level(disk_super)); @@ -1335,8 +1339,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, fail_extent_root: free_extent_buffer(extent_root->node); fail_tree_root: - mutex_unlock(&fs_info->fs_mutex); free_extent_buffer(tree_root->node); +fail_sys_array: + mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: free_extent_buffer(fs_info->sb_buffer); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); @@ -1344,6 +1349,8 @@ fail_iput: iput(fs_info->btree_inode); fail: close_all_devices(fs_info); + btrfs_mapping_tree_free(&fs_info->mapping_tree); + kfree(extent_root); kfree(tree_root); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) -- cgit v1.2.3 From 8f18cf13396caae5a3d7ae91201cfb15181a9642 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 25 Apr 2008 16:53:30 -0400 Subject: Btrfs: Make the resizer work based on shrinking and growing devices Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 71838264ca6b..a9ce491d279b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -818,6 +818,10 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, return fs_info->tree_root; if (location->objectid == BTRFS_EXTENT_TREE_OBJECTID) return fs_info->extent_root; + if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID) + return fs_info->chunk_root; + if (location->objectid == BTRFS_DEV_TREE_OBJECTID) + return fs_info->dev_root; root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); -- cgit v1.2.3 From 4235298e4fc3c1a09f659cfe2fd285024eeb2241 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 28 Apr 2008 16:40:52 -0400 Subject: Btrfs: Drop some verbose printks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a9ce491d279b..1c7d84aff864 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -214,25 +214,18 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, while (1) { ret = read_extent_buffer_pages(io_tree, eb, start, 1, btree_get_extent, mirror_num); - if (!ret) { - if (mirror_num) -printk("good read %Lu mirror %d total %d\n", eb->start, mirror_num, num_copies); + if (!ret) return ret; - } + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); -printk("failed to read %Lu mirror %d total %d\n", eb->start, mirror_num, num_copies); - if (num_copies == 1) { -printk("reading %Lu failed only one copy\n", eb->start); + if (num_copies == 1) return ret; - } + mirror_num++; - if (mirror_num > num_copies) { -printk("bailing at mirror %d of %d\n", mirror_num, num_copies); + if (mirror_num > num_copies) return ret; - } } -printk("read extent buffer page last\n"); return -EIO; } @@ -322,7 +315,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { -printk("bad start on %Lu found %Lu\n", eb->start, found_start); ret = -EIO; goto err; } -- cgit v1.2.3 From a236aed14ccb0661611d4416f6b573d892bdc60a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Apr 2008 09:38:00 -0400 Subject: Btrfs: Deal with failed writes in mirrored configurations Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1c7d84aff864..e35e70165b53 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1385,7 +1385,10 @@ int write_all_supers(struct btrfs_root *root) struct buffer_head *bh; int ret; int do_barriers; + int max_errors; + int total_errors = 0; + max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); sb = root->fs_info->sb_buffer; @@ -1433,8 +1436,14 @@ int write_all_supers(struct btrfs_root *root) } else { ret = submit_bh(WRITE, bh); } - BUG_ON(ret); + if (ret) + total_errors++; } + if (total_errors > max_errors) { + printk("btrfs: %d errors while writing supers\n", total_errors); + BUG(); + } + total_errors = 0; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); @@ -1454,13 +1463,17 @@ int write_all_supers(struct btrfs_root *root) wait_on_buffer(bh); BUG_ON(!buffer_uptodate(bh)); } else { - BUG(); + total_errors++; } } dev->pending_io = NULL; brelse(bh); } + if (total_errors > max_errors) { + printk("btrfs: %d errors while writing supers\n", total_errors); + BUG(); + } return 0; } -- cgit v1.2.3 From d6bfde8765668c8687de72f7a40f52acdf4f2f19 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 30 Apr 2008 13:59:35 -0400 Subject: Btrfs: Fixes for 2.6.18 enterprise kernels 2.6.18 seems to get caught in an infinite loop when cancel_rearming_delayed_workqueue is called more than once, so this switches to cancel_delayed_work, which is arguably more correct. Also, balance_dirty_pages can run into problems with 2.6.18 based kernels because it doesn't have the per-bdi dirty limits. This avoids calling balance_dirty_pages on the btree inode unless there is actually something to balance, which is a good optimization in general. Finally there's a compile fix for ordered-data.h Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e35e70165b53..fabc31b334b6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1548,6 +1548,7 @@ int close_ctree(struct btrfs_root *root) btrfs_commit_transaction(trans, root); ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); + write_ctree_super(NULL, root); mutex_unlock(&fs_info->fs_mutex); @@ -1583,17 +1584,17 @@ int close_ctree(struct btrfs_root *root) extent_io_tree_empty_lru(&fs_info->extent_ins); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); - flush_workqueue(end_io_workqueue); flush_workqueue(async_submit_workqueue); + flush_workqueue(end_io_workqueue); truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); - flush_workqueue(end_io_workqueue); - destroy_workqueue(end_io_workqueue); - flush_workqueue(async_submit_workqueue); destroy_workqueue(async_submit_workqueue); + flush_workqueue(end_io_workqueue); + destroy_workqueue(end_io_workqueue); + iput(fs_info->btree_inode); #if 0 while(!list_empty(&fs_info->hashers)) { @@ -1663,8 +1664,21 @@ void btrfs_throttle(struct btrfs_root *root) void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { - balance_dirty_pages_ratelimited_nr( + struct extent_io_tree *tree; + u64 num_dirty; + u64 start = 0; + unsigned long thresh = 16 * 1024 * 1024; + tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; + + if (current_is_pdflush()) + return; + + num_dirty = count_range_bits(tree, &start, (u64)-1, + thresh, EXTENT_DIRTY); + if (num_dirty > thresh) { + balance_dirty_pages_ratelimited_nr( root->fs_info->btree_inode->i_mapping, 1); + } } void btrfs_set_buffer_defrag(struct extent_buffer *buf) -- cgit v1.2.3 From a061fc8da7b990faa41ca503e66faef3ecdeead0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 7 May 2008 11:43:44 -0400 Subject: Btrfs: Add support for online device removal This required a few structural changes to the code that manages bdev pointers: The VFS super block now gets an anon-bdev instead of a pointer to the lowest bdev. This allows us to avoid swapping the super block bdev pointer around at run time. The code to read in the super block no longer goes through the extent buffer interface. Things got ugly keeping the mapping constant. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 101 ++++++++++++++++++++++++----------------------------- 1 file changed, 46 insertions(+), 55 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fabc31b334b6..9d5424ad01a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -78,9 +78,13 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, start, len); - spin_unlock(&em_tree->lock); - if (em) + if (em) { + em->bdev = + BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; + spin_unlock(&em_tree->lock); goto out; + } + spin_unlock(&em_tree->lock); em = alloc_extent_map(GFP_NOFS); if (!em) { @@ -90,7 +94,7 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, em->start = 0; em->len = (u64)-1; em->block_start = 0; - em->bdev = inode->i_sb->s_bdev; + em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; spin_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); @@ -435,11 +439,6 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); BUG_ON(ret); - if (offset == BTRFS_SUPER_INFO_OFFSET) { - bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; - submit_bio(rw, bio); - return 0; - } return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); } @@ -587,8 +586,7 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) list = &fs_info->fs_devices->devices; list_for_each(next, list) { device = list_entry(next, struct btrfs_device, dev_list); - if (device->bdev && device->bdev != fs_info->sb->s_bdev) - close_bdev_excl(device->bdev); + close_bdev_excl(device->bdev); device->bdev = NULL; } return 0; @@ -1118,6 +1116,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 leafsize; u32 blocksize; u32 stripesize; + struct buffer_head *bh; struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), @@ -1153,7 +1152,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->new_trans_lock); init_completion(&fs_info->kobj_unregister); - sb_set_blocksize(sb, BTRFS_SUPER_INFO_SIZE); fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; fs_info->chunk_root = chunk_root; @@ -1170,6 +1168,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; + sb->s_blocksize = 4096; + sb->s_blocksize_bits = blksize_bits(4096); + /* * we set the i_size on the btree inode to the max possible int. * the real end of the address space is determined by all of @@ -1229,19 +1230,16 @@ struct btrfs_root *open_ctree(struct super_block *sb, __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); - fs_info->sb_buffer = read_tree_block(tree_root, - BTRFS_SUPER_INFO_OFFSET, - 4096); - if (!fs_info->sb_buffer) + bh = __bread(fs_devices->latest_bdev, + BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + if (!bh) goto fail_iput; - read_extent_buffer(fs_info->sb_buffer, &fs_info->super_copy, 0, - sizeof(fs_info->super_copy)); + memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); + brelse(bh); - read_extent_buffer(fs_info->sb_buffer, fs_info->fsid, - (unsigned long)btrfs_super_fsid(fs_info->sb_buffer), - BTRFS_FSID_SIZE); + memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) @@ -1263,7 +1261,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; tree_root->stripesize = stripesize; - sb_set_blocksize(sb, sectorsize); + + sb->s_blocksize = sectorsize; + sb->s_blocksize_bits = blksize_bits(sectorsize); if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { @@ -1339,7 +1339,6 @@ fail_tree_root: fail_sys_array: mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: - free_extent_buffer(fs_info->sb_buffer); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); fail_iput: iput(fs_info->btree_inode); @@ -1380,41 +1379,44 @@ int write_all_supers(struct btrfs_root *root) struct list_head *cur; struct list_head *head = &root->fs_info->fs_devices->devices; struct btrfs_device *dev; - struct extent_buffer *sb; + struct btrfs_super_block *sb; struct btrfs_dev_item *dev_item; struct buffer_head *bh; int ret; int do_barriers; int max_errors; int total_errors = 0; + u32 crc; + u64 flags; max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); - sb = root->fs_info->sb_buffer; - dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, - dev_item); + sb = &root->fs_info->super_for_commit; + dev_item = &sb->dev_item; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); - btrfs_set_device_type(sb, dev_item, dev->type); - btrfs_set_device_id(sb, dev_item, dev->devid); - btrfs_set_device_total_bytes(sb, dev_item, dev->total_bytes); - btrfs_set_device_bytes_used(sb, dev_item, dev->bytes_used); - btrfs_set_device_io_align(sb, dev_item, dev->io_align); - btrfs_set_device_io_width(sb, dev_item, dev->io_width); - btrfs_set_device_sector_size(sb, dev_item, dev->sector_size); - write_extent_buffer(sb, dev->uuid, - (unsigned long)btrfs_device_uuid(dev_item), - BTRFS_UUID_SIZE); - - btrfs_set_header_flag(sb, BTRFS_HEADER_FLAG_WRITTEN); - csum_tree_block(root, sb, 0); - - bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / - root->fs_info->sb->s_blocksize, + btrfs_set_stack_device_type(dev_item, dev->type); + btrfs_set_stack_device_id(dev_item, dev->devid); + btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); + btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); + btrfs_set_stack_device_io_align(dev_item, dev->io_align); + btrfs_set_stack_device_io_width(dev_item, dev->io_width); + btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); + memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); + flags = btrfs_super_flags(sb); + btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); + + + crc = ~(u32)0; + crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, sb->csum); + + bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096, BTRFS_SUPER_INFO_SIZE); - read_extent_buffer(sb, bh->b_data, 0, BTRFS_SUPER_INFO_SIZE); + memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); dev->pending_io = bh; get_bh(bh); @@ -1483,15 +1485,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root int ret; ret = write_all_supers(root); -#if 0 - if (!btrfs_test_opt(root, NOBARRIER)) - blkdev_issue_flush(sb->s_bdev, NULL); - set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, super); - ret = sync_page_range_nolock(btree_inode, btree_inode->i_mapping, - super->start, super->len); - if (!btrfs_test_opt(root, NOBARRIER)) - blkdev_issue_flush(sb->s_bdev, NULL); -#endif return ret; } @@ -1570,8 +1563,6 @@ int close_ctree(struct btrfs_root *root) if (root->fs_info->dev_root->node); free_extent_buffer(root->fs_info->dev_root->node); - free_extent_buffer(fs_info->sb_buffer); - btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); @@ -1652,7 +1643,7 @@ void btrfs_throttle(struct btrfs_root *root) { struct backing_dev_info *bdi; - bdi = root->fs_info->sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + bdi = &root->fs_info->bdi; if (root->fs_info->throttles && bdi_write_congested(bdi)) { #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) congestion_wait(WRITE, HZ/20); -- cgit v1.2.3 From 188de649c51f03df3f8a601aea0110a3870815c2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 9 May 2008 11:52:25 -0400 Subject: Btrfs: Don't do btree balance_dirty_pages on old kernels, it stalls forever Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9d5424ad01a3..c3083165b172 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1655,6 +1655,11 @@ void btrfs_throttle(struct btrfs_root *root) void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { + /* + * looks as though older kernels can get into trouble with + * this code, they end up stuck in balance_dirty_pages forever + */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) struct extent_io_tree *tree; u64 num_dirty; u64 start = 0; @@ -1670,6 +1675,9 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) balance_dirty_pages_ratelimited_nr( root->fs_info->btree_inode->i_mapping, 1); } +#else + return; +#endif } void btrfs_set_buffer_defrag(struct extent_buffer *buf) -- cgit v1.2.3 From ca7a79ad8dbe24669a8ced01f9fc0126b872745d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 May 2008 12:59:19 -0400 Subject: Btrfs: Pass down the expected generation number when reading tree blocks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c3083165b172..edee7a44f861 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -207,7 +207,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, - u64 start) + u64 start, u64 parent_transid) { struct extent_io_tree *io_tree; int ret; @@ -254,7 +254,8 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) WARN_ON(1); } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE); + ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, + btrfs_header_generation(eb)); BUG_ON(ret); btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); @@ -562,7 +563,8 @@ static struct address_space_operations btree_aops = { .sync_page = block_sync_page, }; -int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, + u64 parent_transid) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; @@ -592,12 +594,6 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) return 0; } -int btrfs_verify_block_csum(struct btrfs_root *root, - struct extent_buffer *buf) -{ - return btrfs_buffer_uptodate(buf); -} - struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -621,7 +617,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize) + u32 blocksize, u64 parent_transid) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; @@ -634,7 +630,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, if (!buf) return NULL; - ret = btree_read_extent_buffer_pages(root, buf, 0); + ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) { buf->flags |= EXTENT_UPTODATE; @@ -715,7 +711,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root, blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize); + blocksize, 0); BUG_ON(!root->node); return 0; } @@ -771,7 +767,7 @@ out: } blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize); + blocksize, 0); BUG_ON(!root->node); insert: root->ref_cows = 1; @@ -1288,7 +1284,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), - blocksize); + blocksize, 0); BUG_ON(!chunk_root->node); read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, @@ -1304,7 +1300,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), - blocksize); + blocksize, 0); if (!tree_root->node) goto fail_sb_buffer; @@ -1732,11 +1728,11 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf) EXTENT_DEFRAG, GFP_NOFS); } -int btrfs_read_buffer(struct extent_buffer *buf) +int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; int ret; - ret = btree_read_extent_buffer_pages(root, buf, 0); + ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) { buf->flags |= EXTENT_UPTODATE; } -- cgit v1.2.3 From 1259ab75c62462b8ffad90067b5e1f6312786a18 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 12 May 2008 13:39:03 -0400 Subject: Btrfs: Handle write errors on raid1 and raid10 When duplicate copies exist, writes are allowed to fail to one of those copies. This changeset includes a few changes that allow the FS to continue even when some IOs fail. It also adds verification of the parent generation number for btree blocks. This generation is stored in the pointer to a block, and it ensures that missed writes to are detected. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index edee7a44f861..574b1245964e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -205,6 +205,33 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, return 0; } +static int verify_parent_transid(struct extent_io_tree *io_tree, + struct extent_buffer *eb, u64 parent_transid) +{ + int ret; + + if (!parent_transid || btrfs_header_generation(eb) == parent_transid) + return 0; + + lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); + if (extent_buffer_uptodate(io_tree, eb) && + btrfs_header_generation(eb) == parent_transid) { + ret = 0; + goto out; + } + printk("parent transid verify failed on %llu wanted %llu found %llu\n", + (unsigned long long)eb->start, + (unsigned long long)parent_transid, + (unsigned long long)btrfs_header_generation(eb)); + ret = 1; +out: + clear_extent_buffer_uptodate(io_tree, eb); + unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, + GFP_NOFS); + return ret; + +} + static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, u64 start, u64 parent_transid) @@ -218,7 +245,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, while (1) { ret = read_extent_buffer_pages(io_tree, eb, start, 1, btree_get_extent, mirror_num); - if (!ret) + if (!ret && + !verify_parent_transid(io_tree, eb, parent_transid)) return ret; num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, @@ -330,6 +358,13 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, ret = -EIO; goto err; } + if (memcmp_extent_buffer(eb, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(eb), + BTRFS_FSID_SIZE)) { + printk("bad fsid on block %Lu\n", eb->start); + ret = -EIO; + goto err; + } found_level = btrfs_header_level(eb); ret = csum_tree_block(root, eb, 1); @@ -1363,7 +1398,9 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) "I/O error on %s\n", bdevname(bh->b_bdev, b)); } - set_buffer_write_io_error(bh); + /* note, we dont' set_buffer_write_io_error because we have + * our own ways of dealing with the IO errors + */ clear_buffer_uptodate(bh); } unlock_buffer(bh); @@ -1459,7 +1496,8 @@ int write_all_supers(struct btrfs_root *root) ret = submit_bh(WRITE, bh); BUG_ON(ret); wait_on_buffer(bh); - BUG_ON(!buffer_uptodate(bh)); + if (!buffer_uptodate(bh)) + total_errors++; } else { total_errors++; } @@ -1607,10 +1645,18 @@ int close_ctree(struct btrfs_root *root) return 0; } -int btrfs_buffer_uptodate(struct extent_buffer *buf) +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) { + int ret; struct inode *btree_inode = buf->first_page->mapping->host; - return extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); + + ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); + if (!ret) + return ret; + + ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, + parent_transid); + return !ret; } int btrfs_set_buffer_uptodate(struct extent_buffer *buf) -- cgit v1.2.3 From dfe25020689bb2d318782d2c9c7141203583fc70 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 May 2008 13:46:40 -0400 Subject: Btrfs: Add mount -o degraded to allow mounts to continue with missing devices Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 49 +++++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 574b1245964e..38b0d9ecda6a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -614,21 +614,6 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, return ret; } -static int close_all_devices(struct btrfs_fs_info *fs_info) -{ - struct list_head *list; - struct list_head *next; - struct btrfs_device *device; - - list = &fs_info->fs_devices->devices; - list_for_each(next, list) { - device = list_entry(next, struct btrfs_device, dev_list); - close_bdev_excl(device->bdev); - device->bdev = NULL; - } - return 0; -} - struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { @@ -927,6 +912,8 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) list_for_each(cur, &info->fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); + if (!device->bdev) + continue; bdi = blk_get_backing_dev_info(device->bdev); if (bdi && bdi_congested(bdi, bdi_bits)) { ret = 1; @@ -1140,7 +1127,8 @@ static void btrfs_async_submit_work(struct work_struct *work) } struct btrfs_root *open_ctree(struct super_block *sb, - struct btrfs_fs_devices *fs_devices) + struct btrfs_fs_devices *fs_devices, + char *options) { u32 sectorsize; u32 nodesize; @@ -1276,12 +1264,19 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; - if (btrfs_super_num_devices(disk_super) != fs_devices->num_devices) { + btrfs_parse_options(options, tree_root, NULL); + + if (btrfs_super_num_devices(disk_super) > fs_devices->num_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", (unsigned long long)btrfs_super_num_devices(disk_super), (unsigned long long)fs_devices->num_devices); - goto fail_sb_buffer; + if (btrfs_test_opt(tree_root, DEGRADED)) + printk("continuing in degraded mode\n"); + else { + goto fail_sb_buffer; + } } + fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); nodesize = btrfs_super_nodesize(disk_super); @@ -1329,6 +1324,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_read_chunk_tree(chunk_root); BUG_ON(ret); + btrfs_close_extra_devices(fs_devices); + blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); @@ -1374,7 +1371,7 @@ fail_sb_buffer: fail_iput: iput(fs_info->btree_inode); fail: - close_all_devices(fs_info); + btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); kfree(extent_root); @@ -1429,6 +1426,13 @@ int write_all_supers(struct btrfs_root *root) dev_item = &sb->dev_item; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); + if (!dev->bdev) { + total_errors++; + continue; + } + if (!dev->in_fs_metadata) + continue; + btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); @@ -1482,6 +1486,11 @@ int write_all_supers(struct btrfs_root *root) list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); + if (!dev->bdev) + continue; + if (!dev->in_fs_metadata) + continue; + BUG_ON(!dev->pending_io); bh = dev->pending_io; wait_on_buffer(bh); @@ -1631,7 +1640,7 @@ int close_ctree(struct btrfs_root *root) kfree(hasher); } #endif - close_all_devices(fs_info); + btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) -- cgit v1.2.3 From a0af469b58944f6e8c5c8ecbebb42997baf0cb9e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 13 May 2008 16:03:06 -0400 Subject: Fix btrfs_open_devices to deal with changes since the scan ioctls Devices can change after the scan ioctls are done, and btrfs_open_devices needs to be able to verify them as they are opened and used by the FS. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 38b0d9ecda6a..264f297260f8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1266,10 +1266,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_parse_options(options, tree_root, NULL); - if (btrfs_super_num_devices(disk_super) > fs_devices->num_devices) { + if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", (unsigned long long)btrfs_super_num_devices(disk_super), - (unsigned long long)fs_devices->num_devices); + (unsigned long long)fs_devices->open_devices); if (btrfs_test_opt(tree_root, DEGRADED)) printk("continuing in degraded mode\n"); else { -- cgit v1.2.3 From cb03c743c6486e4efb8be6bdf7bf23177826d156 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 15 May 2008 16:15:45 -0400 Subject: Btrfs: Change the congestion functions to meter the number of async submits as well The async submit workqueue was absorbing too many requests, leading to long stalls where the async submitters were stalling. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 264f297260f8..373374340e9b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -453,6 +453,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, spin_lock(&fs_info->async_submit_work_lock); list_add_tail(&async->list, &fs_info->async_submit_work_list); + atomic_inc(&fs_info->nr_async_submits); spin_unlock(&fs_info->async_submit_work_lock); queue_work(async_submit_workqueue, &fs_info->async_submit_work); @@ -906,10 +907,16 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) { struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; int ret = 0; + int limit = 256 * info->fs_devices->open_devices; struct list_head *cur; struct btrfs_device *device; struct backing_dev_info *bdi; + if ((bdi_bits & (1 << BDI_write_congested)) && + atomic_read(&info->nr_async_submits) > limit) { + return 1; + } + list_for_each(cur, &info->fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); if (!device->bdev) @@ -1117,6 +1124,7 @@ static void btrfs_async_submit_work(struct work_struct *work) } next = fs_info->async_submit_work_list.next; list_del(next); + atomic_dec(&fs_info->nr_async_submits); spin_unlock(&fs_info->async_submit_work_lock); async = list_entry(next, struct async_submit_bio, list); @@ -1179,6 +1187,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); + atomic_set(&fs_info->nr_async_submits, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; -- cgit v1.2.3 From 1c8cfcc1590d378c82ec75888f71e87fd26551e3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 May 2008 13:06:51 -0400 Subject: Btrfs: Enable btree balancing on old kernels again Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 373374340e9b..d8a256d3c83a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1719,7 +1719,6 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) * looks as though older kernels can get into trouble with * this code, they end up stuck in balance_dirty_pages forever */ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) struct extent_io_tree *tree; u64 num_dirty; u64 start = 0; @@ -1735,9 +1734,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) balance_dirty_pages_ratelimited_nr( root->fs_info->btree_inode->i_mapping, 1); } -#else return; -#endif } void btrfs_set_buffer_defrag(struct extent_buffer *buf) -- cgit v1.2.3 From da496f2acf61153e0d4d42ded6a53f3367939db0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 27 May 2008 10:52:17 -0400 Subject: Btrfs: Always use the async submission queue for checksummed writes This avoids IO stalls and poorly ordered IO from inline writers mixing in with the async submission queue Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d8a256d3c83a..016c0c0226ed 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -434,13 +434,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, { struct async_submit_bio *async; - /* - * inline writerback should stay inline, only hop to the async - * queue if we're pdflush - */ - if (!current_is_pdflush()) - return submit_bio_hook(inode, rw, bio, mirror_num); - async = kmalloc(sizeof(*async), GFP_NOFS); if (!async) return -ENOMEM; -- cgit v1.2.3 From 51ebc0d3d5cd1a2728068499a8fc1bdb0a581ab5 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 9 Jun 2008 22:19:40 -0400 Subject: Btrfs: bdi_init and bdi_destroy come with 2.6.23 Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 016c0c0226ed..3805e7eab82d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -984,7 +984,7 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_init(bdi); #endif bdi->ra_pages = default_backing_dev_info.ra_pages; @@ -1378,7 +1378,7 @@ fail: kfree(extent_root); kfree(tree_root); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); #endif kfree(fs_info); @@ -1645,7 +1645,7 @@ int close_ctree(struct btrfs_root *root) btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); #endif -- cgit v1.2.3 From edf24abe51493ccda384644d487fe2f796ac21c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Jun 2008 10:40:29 -0400 Subject: btrfs: sanity mount option parsing and early mount code Also adds lots of comments to describe what's going on here. Signed-off-by: Christoph Hellwig Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3805e7eab82d..b9a53646ceb2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1266,8 +1266,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; - btrfs_parse_options(options, tree_root, NULL); + err = btrfs_parse_options(tree_root, options); + if (err) + goto fail_sb_buffer; + err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", (unsigned long long)btrfs_super_num_devices(disk_super), -- cgit v1.2.3 From 8b7128429235d9bd72cfd5ed20c77c4f3118f744 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Jun 2008 16:50:36 -0400 Subject: Btrfs: Add async worker threads for pre and post IO checksumming Btrfs has been using workqueues to spread the checksumming load across other CPUs in the system. But, workqueues only schedule work on the same CPU that queued the work, giving them a limited benefit for systems with higher CPU counts. This code adds a generic facility to schedule work with pools of kthreads, and changes the bio submission code to queue bios up. The queueing is important to make sure large numbers of procs on the system don't turn streaming workloads into random workloads by sending IO down concurrently. The end result of all of this is much higher performance (and CPU usage) when doing checksumming on large machines. Two worker pools are created, one for writes and one for endio processing. The two could deadlock if we tried to service both from a single pool. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 200 ++++++++++++++++++++++------------------------------- 1 file changed, 82 insertions(+), 118 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b9a53646ceb2..98ff4fbcb386 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -31,6 +31,7 @@ #include "btrfs_inode.h" #include "volumes.h" #include "print-tree.h" +#include "async-thread.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -46,8 +47,7 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) #endif static struct extent_io_ops btree_extent_io_ops; -static struct workqueue_struct *end_io_workqueue; -static struct workqueue_struct *async_submit_workqueue; +static void end_workqueue_fn(struct btrfs_work *work); struct end_io_wq { struct bio *bio; @@ -57,6 +57,7 @@ struct end_io_wq { int error; int metadata; struct list_head list; + struct btrfs_work work; }; struct async_submit_bio { @@ -66,6 +67,7 @@ struct async_submit_bio { extent_submit_bio_hook_t *submit_bio_hook; int rw; int mirror_num; + struct btrfs_work work; }; struct extent_map *btree_get_extent(struct inode *inode, struct page *page, @@ -389,7 +391,6 @@ static int end_workqueue_bio(struct bio *bio, { struct end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; - unsigned long flags; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) if (bio->bi_size) @@ -397,11 +398,10 @@ static int end_workqueue_bio(struct bio *bio, #endif fs_info = end_io_wq->info; - spin_lock_irqsave(&fs_info->end_io_work_lock, flags); end_io_wq->error = err; - list_add_tail(&end_io_wq->list, &fs_info->end_io_work_list); - spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); - queue_work(end_io_workqueue, &fs_info->end_io_work); + end_io_wq->work.func = end_workqueue_fn; + end_io_wq->work.flags = 0; + btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; @@ -428,6 +428,19 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } +static void run_one_async_submit(struct btrfs_work *work) +{ + struct btrfs_fs_info *fs_info; + struct async_submit_bio *async; + + async = container_of(work, struct async_submit_bio, work); + fs_info = BTRFS_I(async->inode)->root->fs_info; + atomic_dec(&fs_info->nr_async_submits); + async->submit_bio_hook(async->inode, async->rw, async->bio, + async->mirror_num); + kfree(async); +} + int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, int rw, struct bio *bio, int mirror_num, extent_submit_bio_hook_t *submit_bio_hook) @@ -443,13 +456,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->bio = bio; async->mirror_num = mirror_num; async->submit_bio_hook = submit_bio_hook; - - spin_lock(&fs_info->async_submit_work_lock); - list_add_tail(&async->list, &fs_info->async_submit_work_list); + async->work.func = run_one_async_submit; + async->work.flags = 0; atomic_inc(&fs_info->nr_async_submits); - spin_unlock(&fs_info->async_submit_work_lock); - - queue_work(async_submit_workqueue, &fs_info->async_submit_work); + btrfs_queue_worker(&fs_info->workers, &async->work); return 0; } @@ -462,19 +472,32 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, offset = bio->bi_sector << 9; + /* + * when we're called for a write, we're already in the async + * submission context. Just jump ingo btrfs_map_bio + */ if (rw & (1 << BIO_RW)) { - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, + mirror_num, 0); } + /* + * called for a read, do the setup so that checksum validation + * can happen in the async kernel threads + */ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); BUG_ON(ret); - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num); + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); } static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { + /* + * kthread helpers are used to submit writes so that checksumming + * can happen in parallel across all CPUs + */ if (!(rw & (1 << BIO_RW))) { return __btree_submit_bio_hook(inode, rw, bio, mirror_num); } @@ -1036,95 +1059,40 @@ static int bio_ready_for_csum(struct bio *bio) return ret; } -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -static void btrfs_end_io_csum(void *p) -#else -static void btrfs_end_io_csum(struct work_struct *work) -#endif +/* + * called by the kthread helper functions to finally call the bio end_io + * functions. This is where read checksum verification actually happens + */ +static void end_workqueue_fn(struct btrfs_work *work) { -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - struct btrfs_fs_info *fs_info = p; -#else - struct btrfs_fs_info *fs_info = container_of(work, - struct btrfs_fs_info, - end_io_work); -#endif - unsigned long flags; - struct end_io_wq *end_io_wq; struct bio *bio; - struct list_head *next; + struct end_io_wq *end_io_wq; + struct btrfs_fs_info *fs_info; int error; - int was_empty; - while(1) { - spin_lock_irqsave(&fs_info->end_io_work_lock, flags); - if (list_empty(&fs_info->end_io_work_list)) { - spin_unlock_irqrestore(&fs_info->end_io_work_lock, - flags); - return; - } - next = fs_info->end_io_work_list.next; - list_del(next); - spin_unlock_irqrestore(&fs_info->end_io_work_lock, flags); - - end_io_wq = list_entry(next, struct end_io_wq, list); - - bio = end_io_wq->bio; - if (end_io_wq->metadata && !bio_ready_for_csum(bio)) { - spin_lock_irqsave(&fs_info->end_io_work_lock, flags); - was_empty = list_empty(&fs_info->end_io_work_list); - list_add_tail(&end_io_wq->list, - &fs_info->end_io_work_list); - spin_unlock_irqrestore(&fs_info->end_io_work_lock, - flags); - if (was_empty) - return; - continue; - } - error = end_io_wq->error; - bio->bi_private = end_io_wq->private; - bio->bi_end_io = end_io_wq->end_io; - kfree(end_io_wq); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - bio_endio(bio, bio->bi_size, error); -#else - bio_endio(bio, error); -#endif - } -} + end_io_wq = container_of(work, struct end_io_wq, work); + bio = end_io_wq->bio; + fs_info = end_io_wq->info; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -static void btrfs_async_submit_work(void *p) -#else -static void btrfs_async_submit_work(struct work_struct *work) -#endif -{ -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - struct btrfs_fs_info *fs_info = p; + /* metadata bios are special because the whole tree block must + * be checksummed at once. This makes sure the entire block is in + * ram and up to date before trying to verify things. For + * blocksize <= pagesize, it is basically a noop + */ + if (end_io_wq->metadata && !bio_ready_for_csum(bio)) { + btrfs_queue_worker(&fs_info->endio_workers, + &end_io_wq->work); + return; + } + error = end_io_wq->error; + bio->bi_private = end_io_wq->private; + bio->bi_end_io = end_io_wq->end_io; + kfree(end_io_wq); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) + bio_endio(bio, bio->bi_size, error); #else - struct btrfs_fs_info *fs_info = container_of(work, - struct btrfs_fs_info, - async_submit_work); + bio_endio(bio, error); #endif - struct async_submit_bio *async; - struct list_head *next; - - while(1) { - spin_lock(&fs_info->async_submit_work_lock); - if (list_empty(&fs_info->async_submit_work_list)) { - spin_unlock(&fs_info->async_submit_work_lock); - return; - } - next = fs_info->async_submit_work_list.next; - list_del(next); - atomic_dec(&fs_info->nr_async_submits); - spin_unlock(&fs_info->async_submit_work_lock); - - async = list_entry(next, struct async_submit_bio, list); - async->submit_bio_hook(async->inode, async->rw, async->bio, - async->mirror_num); - kfree(async); - } } struct btrfs_root *open_ctree(struct super_block *sb, @@ -1155,19 +1123,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, err = -ENOMEM; goto fail; } - end_io_workqueue = create_workqueue("btrfs-end-io"); - BUG_ON(!end_io_workqueue); - async_submit_workqueue = create_workqueue("btrfs-async-submit"); - INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); - INIT_LIST_HEAD(&fs_info->end_io_work_list); - INIT_LIST_HEAD(&fs_info->async_submit_work_list); spin_lock_init(&fs_info->hash_lock); - spin_lock_init(&fs_info->end_io_work_lock); - spin_lock_init(&fs_info->async_submit_work_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); @@ -1222,13 +1182,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->do_barriers = 1; #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum, fs_info); - INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work, - fs_info); INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); #else - INIT_WORK(&fs_info->end_io_work, btrfs_end_io_csum); - INIT_WORK(&fs_info->async_submit_work, btrfs_async_submit_work); INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); #endif BTRFS_I(fs_info->btree_inode)->root = tree_root; @@ -1240,6 +1195,19 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); + /* we need to start all the end_io workers up front because the + * queue work function gets called at interrupt time. The endio + * workers don't normally start IO, so some number of them <= the + * number of cpus is fine. They handle checksumming after a read. + * + * The other worker threads do start IO, so the max is larger than + * the number of CPUs. FIXME, tune this for huge machines + */ + btrfs_init_workers(&fs_info->workers, num_online_cpus() * 2); + btrfs_init_workers(&fs_info->endio_workers, num_online_cpus()); + btrfs_start_workers(&fs_info->workers, 1); + btrfs_start_workers(&fs_info->endio_workers, num_online_cpus()); + #if 0 ret = add_hasher(fs_info, "crc32c"); if (ret) { @@ -1375,6 +1343,8 @@ fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); fail_iput: iput(fs_info->btree_inode); + btrfs_stop_workers(&fs_info->workers); + btrfs_stop_workers(&fs_info->endio_workers); fail: btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); @@ -1623,16 +1593,10 @@ int close_ctree(struct btrfs_root *root) extent_io_tree_empty_lru(&fs_info->extent_ins); extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); - flush_workqueue(async_submit_workqueue); - flush_workqueue(end_io_workqueue); - truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); - flush_workqueue(async_submit_workqueue); - destroy_workqueue(async_submit_workqueue); - - flush_workqueue(end_io_workqueue); - destroy_workqueue(end_io_workqueue); + btrfs_stop_workers(&fs_info->workers); + btrfs_stop_workers(&fs_info->endio_workers); iput(fs_info->btree_inode); #if 0 -- cgit v1.2.3 From 4543df7ecc8ae4928c1e51d6e7dc188d650abee4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 11 Jun 2008 21:47:56 -0400 Subject: Btrfs: Add a mount option to control worker thread pool size mount -o thread_pool_size changes the default, which is min(num_cpus + 2, 8). Larger thread pools would make more sense on very large disk arrays. This mount option controls the max size of each thread pool. There are multiple thread pools, so the total worker count will be larger than the mount option. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 98ff4fbcb386..c6a710a668cb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1117,6 +1117,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, GFP_NOFS); int ret; int err = -EINVAL; + struct btrfs_super_block *disk_super; if (!extent_root || !tree_root || !fs_info) { @@ -1148,6 +1149,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; + fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); @@ -1195,19 +1197,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); - /* we need to start all the end_io workers up front because the - * queue work function gets called at interrupt time. The endio - * workers don't normally start IO, so some number of them <= the - * number of cpus is fine. They handle checksumming after a read. - * - * The other worker threads do start IO, so the max is larger than - * the number of CPUs. FIXME, tune this for huge machines - */ - btrfs_init_workers(&fs_info->workers, num_online_cpus() * 2); - btrfs_init_workers(&fs_info->endio_workers, num_online_cpus()); - btrfs_start_workers(&fs_info->workers, 1); - btrfs_start_workers(&fs_info->endio_workers, num_online_cpus()); - #if 0 ret = add_hasher(fs_info, "crc32c"); if (ret) { @@ -1238,6 +1227,17 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (err) goto fail_sb_buffer; + /* + * we need to start all the end_io workers up front because the + * queue work function gets called at interrupt time, and so it + * cannot dynamically grow. + */ + btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->workers, 1); + btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + + err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", @@ -1341,10 +1341,10 @@ fail_sys_array: mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); -fail_iput: - iput(fs_info->btree_inode); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); +fail_iput: + iput(fs_info->btree_inode); fail: btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); -- cgit v1.2.3 From 1cc127b5d1b71453091859301de4a7dd6ee96fa8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 12 Jun 2008 14:46:17 -0400 Subject: Btrfs: Add a thread pool just for submit_bio If a bio submission is after a lock holder waiting for the bio on the work queue, it is possible to deadlock. Move the bios into their own pool. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c6a710a668cb..e5c758e306d5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1233,8 +1233,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, * cannot dynamically grow. */ btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_start_workers(&fs_info->workers, 1); + btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); @@ -1343,6 +1345,7 @@ fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->submit_workers); fail_iput: iput(fs_info->btree_inode); fail: @@ -1597,6 +1600,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->submit_workers); iput(fs_info->btree_inode); #if 0 -- cgit v1.2.3 From 925baeddc5b0764a53f2214a1253251bab0e0324 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: Btrfs: Start btree concurrency work. The allocation trees and the chunk trees are serialized via their own dedicated mutexes. This means allocation location is still not very fine grained. The main FS btree is protected by locks on each block in the btree. Locks are taken top / down, and as processing finishes on a given level of the tree, the lock is released after locking the lower level. The end result of a search is now a path where only the lowest level is locked. Releasing or freeing the path drops any locks held. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e5c758e306d5..fe40bdd984ff 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -32,6 +32,7 @@ #include "volumes.h" #include "print-tree.h" #include "async-thread.h" +#include "locking.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -681,9 +682,11 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, { struct inode *btree_inode = root->fs_info->btree_inode; if (btrfs_header_generation(buf) == - root->fs_info->running_transaction->transid) + root->fs_info->running_transaction->transid) { + WARN_ON(!btrfs_tree_locked(buf)); clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); + } return 0; } @@ -720,6 +723,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->in_sysfs = 0; INIT_LIST_HEAD(&root->dirty_list); + spin_lock_init(&root->node_lock); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -1196,6 +1200,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->fs_mutex); + mutex_init(&fs_info->alloc_mutex); + mutex_init(&fs_info->chunk_mutex); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1274,7 +1280,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_lock(&fs_info->fs_mutex); + mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(tree_root); + mutex_unlock(&fs_info->chunk_mutex); if (ret) { printk("btrfs: failed to read the system array on %s\n", sb->s_id); @@ -1296,7 +1304,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE); + mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_chunk_tree(chunk_root); + mutex_unlock(&fs_info->chunk_mutex); BUG_ON(ret); btrfs_close_extra_devices(fs_devices); @@ -1654,6 +1664,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; + WARN_ON(!btrfs_tree_locked(buf)); if (transid != root->fs_info->generation) { printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", (unsigned long long)buf->start, -- cgit v1.2.3 From a213501153fd66e2359e091b1612841305ba6551 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: Btrfs: Replace the big fs_mutex with a collection of other locks Extent alloctions are still protected by a large alloc_mutex. Objectid allocations are covered by a objectid mutex Other btree operations are protected by a lock on individual btree nodes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fe40bdd984ff..f638803549e0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -724,6 +724,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, INIT_LIST_HEAD(&root->dirty_list); spin_lock_init(&root->node_lock); + mutex_init(&root->objectid_mutex); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -1146,6 +1147,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); + atomic_set(&fs_info->throttles, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; @@ -1199,7 +1201,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); mutex_init(&fs_info->trans_mutex); - mutex_init(&fs_info->fs_mutex); + mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); @@ -1278,8 +1280,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_sb_buffer; } - mutex_lock(&fs_info->fs_mutex); - mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); @@ -1342,7 +1342,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; - mutex_unlock(&fs_info->fs_mutex); return tree_root; fail_extent_root: @@ -1350,7 +1349,6 @@ fail_extent_root: fail_tree_root: free_extent_buffer(tree_root->node); fail_sys_array: - mutex_unlock(&fs_info->fs_mutex); fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->workers); @@ -1562,8 +1560,9 @@ int close_ctree(struct btrfs_root *root) struct btrfs_fs_info *fs_info = root->fs_info; fs_info->closing = 1; + smp_mb(); + btrfs_transaction_flush_work(root); - mutex_lock(&fs_info->fs_mutex); btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1574,7 +1573,6 @@ int close_ctree(struct btrfs_root *root) BUG_ON(ret); write_ctree_super(NULL, root); - mutex_unlock(&fs_info->fs_mutex); btrfs_transaction_flush_work(root); @@ -1679,7 +1677,8 @@ void btrfs_throttle(struct btrfs_root *root) struct backing_dev_info *bdi; bdi = &root->fs_info->bdi; - if (root->fs_info->throttles && bdi_write_congested(bdi)) { + if (atomic_read(&root->fs_info->throttles) && + bdi_write_congested(bdi)) { #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) congestion_wait(WRITE, HZ/20); #else -- cgit v1.2.3 From 051e1b9f748ae673b7325d3fc049bb838606cffa Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: Drop locks in btrfs_search_slot when reading a tree block. One lock per btree block can make for significant congestion if everyone has to wait for IO at the high levels of the btree. This drops locks held by a path when doing reads during a tree search. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f638803549e0..ffc363d2fb24 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1684,6 +1684,7 @@ void btrfs_throttle(struct btrfs_root *root) #else blk_congestion_wait(WRITE, HZ/20); #endif + } } -- cgit v1.2.3 From 333db94cdde9e6dfdedab9290d04d812f83e0922 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:30 -0400 Subject: Btrfs: Fix snapshot deletion to release the alloc_mutex much more often. This lowers the impact of snapshot deletion on the rest of the FS. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ffc363d2fb24..3cc480b83819 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1674,6 +1674,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) void btrfs_throttle(struct btrfs_root *root) { +#if 0 struct backing_dev_info *bdi; bdi = &root->fs_info->bdi; @@ -1686,6 +1687,7 @@ void btrfs_throttle(struct btrfs_root *root) #endif } +#endif } void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) -- cgit v1.2.3 From 89ce8a63d0c761fbb02089850605360f389477d8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: Add btrfs_end_transaction_throttle to force writers to wait for pending commits The existing throttle mechanism was often not sufficient to prevent new writers from coming in and making a given transaction run forever. This adds an explicit wait at the end of most operations so they will allow the current transaction to close. There is no wait inside file_write, inode updates, or cow filling, all which have different deadlock possibilities. This is a temporary measure until better asynchronous commit support is added. This code leads to stalls as it waits for data=ordered writeback, and it really needs to be fixed. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3cc480b83819..52569b57692d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1672,24 +1672,6 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } -void btrfs_throttle(struct btrfs_root *root) -{ -#if 0 - struct backing_dev_info *bdi; - - bdi = &root->fs_info->bdi; - if (atomic_read(&root->fs_info->throttles) && - bdi_write_congested(bdi)) { -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) - congestion_wait(WRITE, HZ/20); -#else - blk_congestion_wait(WRITE, HZ/20); -#endif - - } -#endif -} - void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) { /* -- cgit v1.2.3 From a74a4b97b61beede185b4b3ad359d7d378b0d312 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: Btrfs: Replace the transaction work queue with kthreads This creates one kthread for commits and one kthread for deleting old snapshots. All the work queues are removed. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 52569b57692d..31ca9f89388d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ +#include #include #include #include @@ -24,6 +25,12 @@ #include #include // for block_sync_page #include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) +# include +#else +# include +#endif #include "crc32c.h" #include "ctree.h" #include "disk-io.h" @@ -1100,6 +1107,87 @@ static void end_workqueue_fn(struct btrfs_work *work) #endif } +static int cleaner_kthread(void *arg) +{ + struct btrfs_root *root = arg; + + do { + smp_mb(); + if (root->fs_info->closing) + break; + + vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); + mutex_lock(&root->fs_info->cleaner_mutex); +printk("cleaner awake\n"); + btrfs_clean_old_snapshots(root); +printk("cleaner done\n"); + mutex_unlock(&root->fs_info->cleaner_mutex); + + if (freezing(current)) { + refrigerator(); + } else { + smp_mb(); + if (root->fs_info->closing) + break; + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + __set_current_state(TASK_RUNNING); + } + } while (!kthread_should_stop()); + return 0; +} + +static int transaction_kthread(void *arg) +{ + struct btrfs_root *root = arg; + struct btrfs_trans_handle *trans; + struct btrfs_transaction *cur; + unsigned long now; + unsigned long delay; + int ret; + + do { + smp_mb(); + if (root->fs_info->closing) + break; + + delay = HZ * 30; + vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); + mutex_lock(&root->fs_info->transaction_kthread_mutex); + + mutex_lock(&root->fs_info->trans_mutex); + cur = root->fs_info->running_transaction; + if (!cur) { + mutex_unlock(&root->fs_info->trans_mutex); + goto sleep; + } + now = get_seconds(); + if (now < cur->start_time || now - cur->start_time < 30) { + mutex_unlock(&root->fs_info->trans_mutex); + delay = HZ * 5; + goto sleep; + } + mutex_unlock(&root->fs_info->trans_mutex); + btrfs_defrag_dirty_roots(root->fs_info); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_commit_transaction(trans, root); +sleep: + wake_up_process(root->fs_info->cleaner_kthread); + mutex_unlock(&root->fs_info->transaction_kthread_mutex); + + if (freezing(current)) { + refrigerator(); + } else { + if (root->fs_info->closing) + break; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(delay); + __set_current_state(TASK_RUNNING); + } + } while (!kthread_should_stop()); + return 0; +} + struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices, char *options) @@ -1189,11 +1277,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) - INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); -#else - INIT_DELAYED_WORK(&fs_info->trans_work, btrfs_transaction_cleaner); -#endif BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); @@ -1204,6 +1287,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); + mutex_init(&fs_info->transaction_kthread_mutex); + mutex_init(&fs_info->cleaner_mutex); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1247,7 +1332,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); - err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { printk("Btrfs: wanted %llu devices, but found %llu\n", @@ -1341,9 +1425,22 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->data_alloc_profile = (u64)-1; fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, + "btrfs-cleaner"); + if (!fs_info->cleaner_kthread) + goto fail_extent_root; + + fs_info->transaction_kthread = kthread_run(transaction_kthread, + tree_root, + "btrfs-transaction"); + if (!fs_info->transaction_kthread) + goto fail_trans_kthread; + return tree_root; +fail_trans_kthread: + kthread_stop(fs_info->cleaner_kthread); fail_extent_root: free_extent_buffer(extent_root->node); fail_tree_root: @@ -1562,8 +1659,11 @@ int close_ctree(struct btrfs_root *root) fs_info->closing = 1; smp_mb(); - btrfs_transaction_flush_work(root); + kthread_stop(root->fs_info->transaction_kthread); + kthread_stop(root->fs_info->cleaner_kthread); + btrfs_defrag_dirty_roots(root->fs_info); + btrfs_clean_old_snapshots(root); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); /* run commit again to drop the original snapshot */ @@ -1574,8 +1674,6 @@ int close_ctree(struct btrfs_root *root) write_ctree_super(NULL, root); - btrfs_transaction_flush_work(root); - if (fs_info->delalloc_bytes) { printk("btrfs: at unmount delalloc count %Lu\n", fs_info->delalloc_bytes); -- cgit v1.2.3 From 3f157a2fd2ad731e1ed9964fecdc5f459f04a4a4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 25 Jun 2008 16:01:31 -0400 Subject: Btrfs: Online btree defragmentation fixes The btree defragger wasn't making forward progress because the new key wasn't being saved by the btrfs_search_forward function. This also disables the automatic btree defrag, it wasn't scaling well to huge filesystems. The auto-defrag needs to be done differently. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 61 +++--------------------------------------------------- 1 file changed, 3 insertions(+), 58 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 31ca9f89388d..4cdc0b6a2672 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -295,7 +295,6 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); - btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", @@ -355,7 +354,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, } eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); - btrfs_clear_buffer_defrag(eb); found_start = btrfs_header_bytenr(eb); if (found_start != start) { ret = -EIO; @@ -736,6 +734,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); memset(&root->root_kobj, 0, sizeof(root->root_kobj)); + root->defrag_trans_start = fs_info->generation; init_completion(&root->kobj_unregister); root->defrag_running = 0; root->defrag_level = 0; @@ -1168,7 +1167,6 @@ static int transaction_kthread(void *arg) goto sleep; } mutex_unlock(&root->fs_info->trans_mutex); - btrfs_defrag_dirty_roots(root->fs_info); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sleep: @@ -1434,12 +1432,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, tree_root, "btrfs-transaction"); if (!fs_info->transaction_kthread) - goto fail_trans_kthread; + goto fail_cleaner; return tree_root; -fail_trans_kthread: +fail_cleaner: kthread_stop(fs_info->cleaner_kthread); fail_extent_root: free_extent_buffer(extent_root->node); @@ -1662,7 +1660,6 @@ int close_ctree(struct btrfs_root *root) kthread_stop(root->fs_info->transaction_kthread); kthread_stop(root->fs_info->cleaner_kthread); - btrfs_defrag_dirty_roots(root->fs_info); btrfs_clean_old_snapshots(root); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); @@ -1794,58 +1791,6 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) return; } -void btrfs_set_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, - buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS); -} - -void btrfs_set_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start, - buf->start + buf->len - 1, EXTENT_DEFRAG_DONE, - GFP_NOFS); -} - -int btrfs_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0); -} - -int btrfs_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return test_range_bit(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG_DONE, 0); -} - -int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG_DONE, GFP_NOFS); -} - -int btrfs_clear_buffer_defrag(struct extent_buffer *buf) -{ - struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; - struct inode *btree_inode = root->fs_info->btree_inode; - return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree, - buf->start, buf->start + buf->len - 1, - EXTENT_DEFRAG, GFP_NOFS); -} - int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) { struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; -- cgit v1.2.3 From 7d9eb12c8739e7dc80c78c6b3596f912ecd8f941 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jul 2008 14:19:17 -0400 Subject: Btrfs: Add locking around volume management (device add/remove/balance) Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4cdc0b6a2672..8f4c40033e92 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1287,6 +1287,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); + mutex_init(&fs_info->volume_mutex); #if 0 ret = add_hasher(fs_info, "crc32c"); -- cgit v1.2.3 From 77a41afb7d0dd0f27b6f2f1a5bc701929c7034de Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 8 Jul 2008 14:32:12 -0400 Subject: Btrfs: Drop some verbose printks Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8f4c40033e92..b01b3f4f92a9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1117,9 +1117,7 @@ static int cleaner_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->cleaner_mutex); -printk("cleaner awake\n"); btrfs_clean_old_snapshots(root); -printk("cleaner done\n"); mutex_unlock(&root->fs_info->cleaner_mutex); if (freezing(current)) { -- cgit v1.2.3 From e6dcd2dc9c489108648e2ed543315dd134d50a9a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:53:50 -0400 Subject: Btrfs: New data=ordered implementation The old data=ordered code would force commit to wait until all the data extents from the transaction were fully on disk. This introduced large latencies into the commit and stalled new writers in the transaction for a long time. The new code changes the way data allocations and extents work: * When delayed allocation is filled, data extents are reserved, and the extent bit EXTENT_ORDERED is set on the entire range of the extent. A struct btrfs_ordered_extent is allocated an inserted into a per-inode rbtree to track the pending extents. * As each page is written EXTENT_ORDERED is cleared on the bytes corresponding to that page. * When all of the bytes corresponding to a single struct btrfs_ordered_extent are written, The previously reserved extent is inserted into the FS btree and into the extent allocation trees. The checksums for the file data are also updated. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b01b3f4f92a9..4a5ebafb935a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -407,7 +407,11 @@ static int end_workqueue_bio(struct bio *bio, end_io_wq->error = err; end_io_wq->work.func = end_workqueue_fn; end_io_wq->work.flags = 0; - btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); + if (bio->bi_rw & (1 << BIO_RW)) + btrfs_queue_worker(&fs_info->endio_write_workers, + &end_io_wq->work); + else + btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) return 0; @@ -1286,6 +1290,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); + init_waitqueue_head(&fs_info->transaction_throttle); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1325,9 +1330,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->endio_write_workers, + fs_info->thread_pool_size); btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->endio_write_workers, + fs_info->thread_pool_size); err = -EINVAL; if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { @@ -1447,6 +1456,7 @@ fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); fail_iput: iput(fs_info->btree_inode); @@ -1702,6 +1712,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); iput(fs_info->btree_inode); -- cgit v1.2.3 From 247e743cbe6e655768c3679f84821e03c1577902 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:53:51 -0400 Subject: Btrfs: Use async helpers to deal with pages that have been improperly dirtied Higher layers sometimes call set_page_dirty without asking the filesystem to help. This causes many problems for the data=ordered and cow code. This commit detects pages that haven't been properly setup for IO and kicks off an async helper to deal with them. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4a5ebafb935a..66466d125c05 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1329,11 +1329,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, */ btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->fixup_workers, 1); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_write_workers, fs_info->thread_pool_size); btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); + btrfs_start_workers(&fs_info->fixup_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_start_workers(&fs_info->endio_write_workers, fs_info->thread_pool_size); @@ -1454,6 +1456,7 @@ fail_tree_root: fail_sys_array: fail_sb_buffer: extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); + btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_write_workers); @@ -1710,6 +1713,7 @@ int close_ctree(struct btrfs_root *root) truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_write_workers); -- cgit v1.2.3 From f9295749388f82c8d2f485e99c72cd7c7876a99b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Jul 2008 12:54:14 -0400 Subject: btrfs_start_transaction: wait for commits in progress to finish btrfs_commit_transaction has to loop waiting for any writers in the transaction to finish before it can proceed. btrfs_start_transaction should be polite and not join a transaction that is in the process of being finished off. There are a few places that can't wait, basically the ones doing IO that might be needed to finish the transaction. For them, btrfs_join_transaction is added. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 66466d125c05..99bb385c2982 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1291,6 +1291,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); init_waitqueue_head(&fs_info->transaction_throttle); + init_waitqueue_head(&fs_info->transaction_wait); #if 0 ret = add_hasher(fs_info, "crc32c"); -- cgit v1.2.3 From 6af118ce51b52ceda357c671550c79628b9c4a65 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 22 Jul 2008 11:18:07 -0400 Subject: Btrfs: Index extent buffers in an rbtree Before, extent buffers were a temporary object, meant to map a number of pages at once and collect operations on them. But, a few extra fields have crept in, and they are also the best place to store a per-tree block lock field as well. This commit puts the extent buffers into an rbtree, and ensures a single extent buffer for each tree block. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 99bb385c2982..86e84a8579e3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -381,7 +381,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, end = min_t(u64, eb->len, PAGE_CACHE_SIZE); end = eb->start + end - 1; - release_extent_buffer_tail_pages(eb); err: free_extent_buffer(eb); out: @@ -563,21 +562,21 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) struct extent_map_tree *map; int ret; - if (page_count(page) > 3) { - /* once for page->private, once for the caller, once - * once for the page cache - */ - return 0; - } tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; + ret = try_release_extent_state(map, tree, page, gfp_flags); + if (!ret) { + return 0; + } + + ret = try_release_extent_buffer(tree, page); if (ret == 1) { - invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); } + return ret; } @@ -588,7 +587,8 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); if (PagePrivate(page)) { - invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE); + printk("warning page private not zero on page %Lu\n", + page_offset(page)); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -1456,7 +1456,6 @@ fail_tree_root: free_extent_buffer(tree_root->node); fail_sys_array: fail_sb_buffer: - extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); @@ -1705,13 +1704,6 @@ int close_ctree(struct btrfs_root *root) filemap_write_and_wait(fs_info->btree_inode->i_mapping); - extent_io_tree_empty_lru(&fs_info->free_space_cache); - extent_io_tree_empty_lru(&fs_info->block_group_cache); - extent_io_tree_empty_lru(&fs_info->pinned_extents); - extent_io_tree_empty_lru(&fs_info->pending_del); - extent_io_tree_empty_lru(&fs_info->extent_ins); - extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree); - truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); btrfs_stop_workers(&fs_info->fixup_workers); -- cgit v1.2.3 From 89642229a582a5c2b6d2ed8ec16986387d9a9047 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jul 2008 09:41:53 -0400 Subject: Btrfs: Search data ordered extents first for checksums on read Checksum items are not inserted into the tree until all of the io from a given extent is complete. This means one dirty page from an extent may be written, freed, and then read again before the entire extent is on disk and the checksum item is inserted. The checksums themselves are stored in the ordered extent so they can be inserted in bulk when IO is complete. On read, if a checksum item isn't found, the ordered extents were being searched for a checksum record. This all worked most of the time, but the checksum insertion code tries to reduce the number of tree operations by pre-inserting checksum items based on i_size and a few other factors. This means the read code might find a checksum item that hasn't yet really been filled in. This commit changes things to check the ordered extents first and only dive into the btree if nothing was found. This removes the need for extra locking and is more reliable. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 86e84a8579e3..7ce3f83c5dd6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1011,9 +1011,16 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) spin_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); spin_unlock(&em_tree->lock); - if (!em) + if (!em) { + __unplug_io_fn(bdi, page); return; + } + if (em->block_start >= EXTENT_MAP_LAST_BYTE) { + free_extent_map(em); + __unplug_io_fn(bdi, page); + return; + } offset = offset - em->start; btrfs_unplug_page(&BTRFS_I(inode)->root->fs_info->mapping_tree, em->block_start + offset, page); -- cgit v1.2.3 From 3eaa2885276fd6dac7b076a793932428b7168e74 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 24 Jul 2008 11:57:52 -0400 Subject: Btrfs: Fix the defragmention code and the block relocation code for data=ordered Before setting an extent to delalloc, the code needs to wait for pending ordered extents. Also, the relocation code needs to wait for ordered IO before scanning the block group again. This is because the extents are not removed until the IO for the new extents is finished Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7ce3f83c5dd6..ec01062eb41d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1252,6 +1252,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_nlink = 1; fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); + INIT_LIST_HEAD(&fs_info->ordered_extents); + spin_lock_init(&fs_info->ordered_extent_lock); + sb->s_blocksize = 4096; sb->s_blocksize_bits = blksize_bits(4096); -- cgit v1.2.3 From 7b1287662304c3cb05cb38f5e3e2d69f386e8f10 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 24 Jul 2008 12:17:14 -0400 Subject: Btrfs: Create orphan inode records to prevent lost files after a crash Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ec01062eb41d..d60923967347 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -732,7 +732,9 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->in_sysfs = 0; INIT_LIST_HEAD(&root->dirty_list); + INIT_LIST_HEAD(&root->orphan_list); spin_lock_init(&root->node_lock); + spin_lock_init(&root->orphan_lock); mutex_init(&root->objectid_mutex); memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); -- cgit v1.2.3 From 31153d81284934601d08110ac7698fd9a535e4c0 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 28 Jul 2008 15:32:19 -0400 Subject: Btrfs: Add a leaf reference cache Much of the IO done while dropping snapshots is done looking up leaves in the filesystem trees to see if they point to any extents and to drop the references on any extents found. This creates a cache so that IO isn't required. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d60923967347..4f0e1d06c384 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -716,6 +716,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->node = NULL; root->inode = NULL; root->commit_root = NULL; + root->ref_tree = NULL; root->sectorsize = sectorsize; root->nodesize = nodesize; root->leafsize = leafsize; @@ -1165,12 +1166,19 @@ static int transaction_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); + printk("btrfs: total reference cache size %Lu\n", + root->fs_info->total_ref_cache_size); + mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; if (!cur) { mutex_unlock(&root->fs_info->trans_mutex); goto sleep; } + + printk("btrfs: running reference cache size %Lu\n", + root->fs_info->running_ref_cache_size); + now = get_seconds(); if (now < cur->start_time || now - cur->start_time < 30) { mutex_unlock(&root->fs_info->trans_mutex); @@ -1233,6 +1241,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); + spin_lock_init(&fs_info->ref_cache_lock); init_completion(&fs_info->kobj_unregister); fs_info->tree_root = tree_root; @@ -1699,6 +1708,11 @@ int close_ctree(struct btrfs_root *root) printk("btrfs: at unmount delalloc count %Lu\n", fs_info->delalloc_bytes); } + if (fs_info->total_ref_cache_size) { + printk("btrfs: at umount reference cache size %Lu\n", + fs_info->total_ref_cache_size); + } + if (fs_info->extent_root->node) free_extent_buffer(fs_info->extent_root->node); -- cgit v1.2.3 From 017e5369eb353559d68a11d4a718faa634533821 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 28 Jul 2008 15:32:51 -0400 Subject: Btrfs: Leaf reference cache update This changes the reference cache to make a single cache per root instead of one cache per transaction, and to key by the byte number of the disk block instead of the keys inside. This makes it much less likely to have cache misses if a snapshot or something has an extra reference on a higher node or a leaf while the first transaction that added the leaf into the cache is dropping. Some throttling is added to functions that free blocks heavily so they wait for old transactions to drop. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4f0e1d06c384..eccdf13a95ac 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -40,6 +40,7 @@ #include "print-tree.h" #include "async-thread.h" #include "locking.h" +#include "ref-cache.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -737,6 +738,10 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->node_lock); spin_lock_init(&root->orphan_lock); mutex_init(&root->objectid_mutex); + + btrfs_leaf_ref_tree_init(&root->ref_tree_struct); + root->ref_tree = &root->ref_tree_struct; + memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); @@ -1176,9 +1181,6 @@ static int transaction_kthread(void *arg) goto sleep; } - printk("btrfs: running reference cache size %Lu\n", - root->fs_info->running_ref_cache_size); - now = get_seconds(); if (now < cur->start_time || now - cur->start_time < 30) { mutex_unlock(&root->fs_info->trans_mutex); -- cgit v1.2.3 From ab78c84de1ce4db1b2a2cef361625ad80abbab3f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 29 Jul 2008 16:15:18 -0400 Subject: Btrfs: Throttle operations if the reference cache gets too large A large reference cache is directly related to a lot of work pending for the cleaner thread. This throttles back new operations based on the size of the reference cache so the cleaner thread will be able to keep up. Overall, this actually makes the FS faster because the cleaner thread will be more likely to find things in cache. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eccdf13a95ac..27ffa9b7ddc8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1171,8 +1171,10 @@ static int transaction_kthread(void *arg) vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); mutex_lock(&root->fs_info->transaction_kthread_mutex); - printk("btrfs: total reference cache size %Lu\n", - root->fs_info->total_ref_cache_size); + if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { + printk("btrfs: total reference cache size %Lu\n", + root->fs_info->total_ref_cache_size); + } mutex_lock(&root->fs_info->trans_mutex); cur = root->fs_info->running_transaction; @@ -1256,6 +1258,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); atomic_set(&fs_info->throttles, 0); + atomic_set(&fs_info->throttle_gen, 0); fs_info->sb = sb; fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; -- cgit v1.2.3 From 33958dc6d38fb4ca7e62273855fcb2db7e616263 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 30 Jul 2008 10:29:12 -0400 Subject: Btrfs: Fix verify_parent_transid It was incorrectly clearing the up to date flag on the buffer even when the buffer properly verified. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 27ffa9b7ddc8..ec1ba8ddb35f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -235,8 +235,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, (unsigned long long)parent_transid, (unsigned long long)btrfs_header_generation(eb)); ret = 1; -out: clear_extent_buffer_uptodate(io_tree, eb); +out: unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); return ret; -- cgit v1.2.3 From bcc63abbf3e9bf948a1b0129b3e6120ec7d7f698 Mon Sep 17 00:00:00 2001 From: Yan Date: Wed, 30 Jul 2008 16:29:20 -0400 Subject: Btrfs: implement memory reclaim for leaf reference cache The memory reclaiming issue happens when snapshot exists. In that case, some cache entries may not be used during old snapshot dropping, so they will remain in the cache until umount. The patch adds a field to struct btrfs_leaf_ref to record create time. Besides, the patch makes all dead roots of a given snapshot linked together in order of create time. After a old snapshot was completely dropped, we check the dead root list and remove all cache entries created before the oldest dead root in the list. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ec1ba8ddb35f..e826730d750f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -735,8 +735,9 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, INIT_LIST_HEAD(&root->dirty_list); INIT_LIST_HEAD(&root->orphan_list); + INIT_LIST_HEAD(&root->dead_list); spin_lock_init(&root->node_lock); - spin_lock_init(&root->orphan_lock); + spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); btrfs_leaf_ref_tree_init(&root->ref_tree_struct); @@ -1717,7 +1718,7 @@ int close_ctree(struct btrfs_root *root) printk("btrfs: at umount reference cache size %Lu\n", fs_info->total_ref_cache_size); } - + if (fs_info->extent_root->node) free_extent_buffer(fs_info->extent_root->node); -- cgit v1.2.3 From 61b4944018449003ac5f9757f4d125dce519cf51 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 31 Jul 2008 15:42:53 -0400 Subject: Btrfs: Fix streaming read performance with checksumming on Large streaming reads make for large bios, which means each entry on the list async work queues represents a large amount of data. IO congestion throttling on the device was kicking in before the async worker threads decided a single thread was busy and needed some help. The end result was that a streaming read would result in a single CPU running at 100% instead of balancing the work off to other CPUs. This patch also changes the pre-IO checksum lookup done by reads to work on a per-bio basis instead of a per-page. This results in many extra btree lookups on large streaming reads. Doing the checksum lookup right before bio submit allows us to reuse searches while processing adjacent offsets. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e826730d750f..d2d1cc87e8ad 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1357,10 +1357,25 @@ struct btrfs_root *open_ctree(struct super_block *sb, */ btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); + + /* a higher idle thresh on the submit workers makes it much more + * likely that bios will be send down in a sane order to the + * devices + */ + fs_info->submit_workers.idle_thresh = 64; + btrfs_init_workers(&fs_info->fixup_workers, 1); btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_write_workers, fs_info->thread_pool_size); + + /* + * endios are largely parallel and should have a very + * low idle thresh + */ + fs_info->endio_workers.idle_thresh = 4; + fs_info->endio_write_workers.idle_thresh = 4; + btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); btrfs_start_workers(&fs_info->fixup_workers, 1); -- cgit v1.2.3 From 2dd3e67b1eaec8504da7e12b8afee77323a49f38 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Aug 2008 08:20:15 -0400 Subject: Btrfs: More throttle tuning * Make walk_down_tree wake up throttled tasks more often * Make walk_down_tree call cond_resched during long loops * As the size of the ref cache grows, wait longer in throttle * Get rid of the reada code in walk_down_tree, the leaves don't get read anymore, thanks to the ref cache. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d2d1cc87e8ad..da9dda4338a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -188,13 +188,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, btrfs_csum_final(crc, result); if (verify) { - int from_this_trans = 0; - - if (root->fs_info->running_transaction && - btrfs_header_generation(buf) == - root->fs_info->running_transaction->transid) - from_this_trans = 1; - /* FIXME, this is not good */ if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { u32 val; @@ -203,11 +196,9 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE); printk("btrfs: %s checksum verify failed on %llu " - "wanted %X found %X from_this_trans %d " - "level %d\n", + "wanted %X found %X level %d\n", root->fs_info->sb->s_id, - buf->start, val, found, from_this_trans, - btrfs_header_level(buf)); + buf->start, val, found, btrfs_header_level(buf)); return 1; } } else { -- cgit v1.2.3 From ea8c281947950fac5f78818b767821d696c9512a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 4 Aug 2008 23:17:27 -0400 Subject: Btrfs: Maintain a list of inodes that are delalloc and a way to wait on them Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index da9dda4338a3..76543683f3b9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1234,6 +1234,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); + INIT_LIST_HEAD(&fs_info->delalloc_inodes); spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); -- cgit v1.2.3 From 4ca8b41e3fe76cc5774fbcc6cba64259d69e0f96 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 5 Aug 2008 13:30:48 -0400 Subject: Btrfs: Avoid calling into the FS for the final iput on fake root inodes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 76543683f3b9..56c54a41dbbb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1739,6 +1739,7 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(root->fs_info->dev_root->node); btrfs_free_block_groups(root->fs_info); + fs_info->closing = 2; del_fs_roots(fs_info); filemap_write_and_wait(fs_info->btree_inode->i_mapping); -- cgit v1.2.3 From b720d209524fccdd33a499351f2ca880b8c0ffb4 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:14 -0400 Subject: Btrfs: Limit the number of async bio submission kthreads to the number of devices Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 56c54a41dbbb..99bd9f9b9eed 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1348,7 +1348,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, * cannot dynamically grow. */ btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); - btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->submit_workers, + min_t(u64, fs_devices->num_devices, + fs_info->thread_pool_size)); /* a higher idle thresh on the submit workers makes it much more * likely that bios will be send down in a sane order to the -- cgit v1.2.3 From 0986fe9eac24fd186927c3b87af51d62f8ab92cd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:15 -0400 Subject: Btrfs: Count async bios separately from async checksum work items Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 99bd9f9b9eed..9902d29abd06 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -429,6 +429,21 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } +static int congested_async(struct btrfs_fs_info *info, int iodone) +{ + int limit = 256 * info->fs_devices->open_devices; + + if (iodone) + limit = (limit * 3) / 2; + if (atomic_read(&info->nr_async_submits) > limit) + return 1; + + limit = 8192 * info->fs_devices->open_devices; + if (iodone) + limit = (limit * 3) / 2; + return atomic_read(&info->nr_async_bios) > limit; +} + static void run_one_async_submit(struct btrfs_work *work) { struct btrfs_fs_info *fs_info; @@ -437,6 +452,11 @@ static void run_one_async_submit(struct btrfs_work *work) async = container_of(work, struct async_submit_bio, work); fs_info = BTRFS_I(async->inode)->root->fs_info; atomic_dec(&fs_info->nr_async_submits); + + if ((async->bio->bi_rw & (1 << BIO_RW)) && + !congested_async(fs_info, 1)) { + clear_bdi_congested(&fs_info->bdi, WRITE); + } async->submit_bio_hook(async->inode, async->rw, async->bio, async->mirror_num); kfree(async); @@ -938,15 +958,13 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) { struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; int ret = 0; - int limit = 256 * info->fs_devices->open_devices; struct list_head *cur; struct btrfs_device *device; struct backing_dev_info *bdi; if ((bdi_bits & (1 << BDI_write_congested)) && - atomic_read(&info->nr_async_submits) > limit) { + congested_async(info, 0)) return 1; - } list_for_each(cur, &info->fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); @@ -1250,6 +1268,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); + atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->throttles, 0); atomic_set(&fs_info->throttle_gen, 0); fs_info->sb = sb; -- cgit v1.2.3 From 777e6bd706ee40897545463871de5b456fbc46dc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:15 -0400 Subject: Btrfs: Transaction commit: don't use filemap_fdatawait After writing out all the remaining btree blocks in the transaction, the commit code would use filemap_fdatawait to make sure it was all on disk. This means it would wait for blocks written by other procs as well. The new code walks the list of blocks for this transaction again and waits only for those required by this transaction. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9902d29abd06..9601b13c7d7a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -429,7 +429,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } -static int congested_async(struct btrfs_fs_info *info, int iodone) +int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) { int limit = 256 * info->fs_devices->open_devices; @@ -438,9 +438,6 @@ static int congested_async(struct btrfs_fs_info *info, int iodone) if (atomic_read(&info->nr_async_submits) > limit) return 1; - limit = 8192 * info->fs_devices->open_devices; - if (iodone) - limit = (limit * 3) / 2; return atomic_read(&info->nr_async_bios) > limit; } @@ -454,7 +451,7 @@ static void run_one_async_submit(struct btrfs_work *work) atomic_dec(&fs_info->nr_async_submits); if ((async->bio->bi_rw & (1 << BIO_RW)) && - !congested_async(fs_info, 1)) { + !btrfs_congested_async(fs_info, 1)) { clear_bdi_congested(&fs_info->bdi, WRITE); } async->submit_bio_hook(async->inode, async->rw, async->bio, @@ -963,7 +960,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) struct backing_dev_info *bdi; if ((bdi_bits & (1 << BDI_write_congested)) && - congested_async(info, 0)) + btrfs_congested_async(info, 0)) return 1; list_for_each(cur, &info->fs_devices->devices) { @@ -1844,7 +1841,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) struct extent_io_tree *tree; u64 num_dirty; u64 start = 0; - unsigned long thresh = 16 * 1024 * 1024; + unsigned long thresh = 2 * 1024 * 1024; tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; if (current_is_pdflush()) -- cgit v1.2.3 From 5443be45f5cb57d02fd895a0bcaf7e7d9890b1df Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:16 -0400 Subject: Btrfs: Give all the worker threads descriptive names Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9601b13c7d7a..1bf210dadef6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -492,11 +492,11 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, /* * when we're called for a write, we're already in the async - * submission context. Just jump ingo btrfs_map_bio + * submission context. Just jump into btrfs_map_bio */ if (rw & (1 << BIO_RW)) { return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, - mirror_num, 0); + mirror_num, 1); } /* @@ -528,6 +528,12 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; tree = &BTRFS_I(page->mapping->host)->io_tree; + + if (current->flags & PF_MEMALLOC) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } return extent_write_full_page(tree, page, btree_get_extent, wbc); } @@ -1363,8 +1369,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, * queue work function gets called at interrupt time, and so it * cannot dynamically grow. */ - btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size); - btrfs_init_workers(&fs_info->submit_workers, + btrfs_init_workers(&fs_info->workers, "worker", + fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->submit_workers, "submit", min_t(u64, fs_devices->num_devices, fs_info->thread_pool_size)); @@ -1374,9 +1381,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, */ fs_info->submit_workers.idle_thresh = 64; - btrfs_init_workers(&fs_info->fixup_workers, 1); - btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size); - btrfs_init_workers(&fs_info->endio_write_workers, + btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); + btrfs_init_workers(&fs_info->endio_workers, "endio", + fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", fs_info->thread_pool_size); /* -- cgit v1.2.3 From 4854ddd0ed0a687fc2d7c45a529c406232e31e7b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:17 -0400 Subject: Btrfs: Wait for kernel threads to make progress during async submission Before this change, btrfs would use a bdi congestion function to make sure there weren't too many pending async checksum work items. This change makes the process creating async work items wait instead, leading to fewer congestion returns from the bdi. This improves pdflush background_writeout scanning. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 45 ++++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1bf210dadef6..1aed1f4616b6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -429,31 +429,36 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } -int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) +static unsigned long async_submit_limit(struct btrfs_fs_info *info) { - int limit = 256 * info->fs_devices->open_devices; - - if (iodone) - limit = (limit * 3) / 2; - if (atomic_read(&info->nr_async_submits) > limit) - return 1; + unsigned long limit = min_t(unsigned long, + info->workers.max_workers, + info->fs_devices->open_devices); + return 256 * limit; +} - return atomic_read(&info->nr_async_bios) > limit; +int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) +{ + return atomic_read(&info->nr_async_bios) > async_submit_limit(info); } static void run_one_async_submit(struct btrfs_work *work) { struct btrfs_fs_info *fs_info; struct async_submit_bio *async; + int limit; async = container_of(work, struct async_submit_bio, work); fs_info = BTRFS_I(async->inode)->root->fs_info; + + limit = async_submit_limit(fs_info); + limit = limit * 2 / 3; + atomic_dec(&fs_info->nr_async_submits); - if ((async->bio->bi_rw & (1 << BIO_RW)) && - !btrfs_congested_async(fs_info, 1)) { - clear_bdi_congested(&fs_info->bdi, WRITE); - } + if (atomic_read(&fs_info->nr_async_submits) < limit) + wake_up(&fs_info->async_submit_wait); + async->submit_bio_hook(async->inode, async->rw, async->bio, async->mirror_num); kfree(async); @@ -464,6 +469,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, extent_submit_bio_hook_t *submit_bio_hook) { struct async_submit_bio *async; + int limit = async_submit_limit(fs_info); async = kmalloc(sizeof(*async), GFP_NOFS); if (!async) @@ -478,6 +484,10 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->work.flags = 0; atomic_inc(&fs_info->nr_async_submits); btrfs_queue_worker(&fs_info->workers, &async->work); + + wait_event_timeout(fs_info->async_submit_wait, + (atomic_read(&fs_info->nr_async_submits) < limit), + HZ/10); return 0; } @@ -545,16 +555,11 @@ static int btree_writepages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; - unsigned long thresh = 96 * 1024 * 1024; + unsigned long thresh = 8 * 1024 * 1024; if (wbc->for_kupdate) return 0; - if (current_is_pdflush()) { - thresh = 96 * 1024 * 1024; - } else { - thresh = 8 * 1024 * 1024; - } num_dirty = count_range_bits(tree, &start, (u64)-1, thresh, EXTENT_DIRTY); if (num_dirty < thresh) { @@ -1333,6 +1338,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->volume_mutex); init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); + init_waitqueue_head(&fs_info->async_submit_wait); #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1380,6 +1386,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, * devices */ fs_info->submit_workers.idle_thresh = 64; + fs_info->workers.idle_thresh = 32; btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); btrfs_init_workers(&fs_info->endio_workers, "endio", @@ -1849,7 +1856,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) struct extent_io_tree *tree; u64 num_dirty; u64 start = 0; - unsigned long thresh = 2 * 1024 * 1024; + unsigned long thresh = 12 * 1024 * 1024; tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; if (current_is_pdflush()) -- cgit v1.2.3 From 53863232ef961778aa414b700ed88a48e8e871e6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 15 Aug 2008 15:34:18 -0400 Subject: Btrfs: Lower contention on the csum mutex This takes the csum mutex deeper in the call chain and releases it more often. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1aed1f4616b6..92e14dd9bddb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1386,7 +1386,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, * devices */ fs_info->submit_workers.idle_thresh = 64; - fs_info->workers.idle_thresh = 32; + + /* fs_info->workers is responsible for checksumming file data + * blocks and metadata. Using a larger idle thresh allows each + * worker thread to operate on things in roughly the order they + * were sent by the writeback daemons, improving overall locality + * of the IO going down the pipe. + */ + fs_info->workers.idle_thresh = 128; btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); btrfs_init_workers(&fs_info->endio_workers, "endio", -- cgit v1.2.3 From b64a2851ba25b3410a87d3d1b751155612105c8e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 20 Aug 2008 13:39:41 -0400 Subject: Btrfs: Wait for async bio submissions to make some progress at queue time Before, the btrfs bdi congestion function was used to test for too many async bios. This keeps that check to throttle pdflush, but also adds a check while queuing bios. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 92e14dd9bddb..bbba14b629d2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -429,7 +429,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, return 0; } -static unsigned long async_submit_limit(struct btrfs_fs_info *info) +unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) { unsigned long limit = min_t(unsigned long, info->workers.max_workers, @@ -439,7 +439,8 @@ static unsigned long async_submit_limit(struct btrfs_fs_info *info) int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) { - return atomic_read(&info->nr_async_bios) > async_submit_limit(info); + return atomic_read(&info->nr_async_bios) > + btrfs_async_submit_limit(info); } static void run_one_async_submit(struct btrfs_work *work) @@ -451,12 +452,13 @@ static void run_one_async_submit(struct btrfs_work *work) async = container_of(work, struct async_submit_bio, work); fs_info = BTRFS_I(async->inode)->root->fs_info; - limit = async_submit_limit(fs_info); + limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; atomic_dec(&fs_info->nr_async_submits); - if (atomic_read(&fs_info->nr_async_submits) < limit) + if (atomic_read(&fs_info->nr_async_submits) < limit && + waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); async->submit_bio_hook(async->inode, async->rw, async->bio, @@ -469,7 +471,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, extent_submit_bio_hook_t *submit_bio_hook) { struct async_submit_bio *async; - int limit = async_submit_limit(fs_info); + int limit = btrfs_async_submit_limit(fs_info); async = kmalloc(sizeof(*async), GFP_NOFS); if (!async) @@ -1863,10 +1865,10 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) struct extent_io_tree *tree; u64 num_dirty; u64 start = 0; - unsigned long thresh = 12 * 1024 * 1024; + unsigned long thresh = 96 * 1024 * 1024; tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; - if (current_is_pdflush()) + if (current_is_pdflush() || current->flags & PF_MEMALLOC) return; num_dirty = count_range_bits(tree, &start, (u64)-1, -- cgit v1.2.3 From 9473f16c75606fe6b2e5000525fe9766114505f3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 28 Aug 2008 06:15:24 -0400 Subject: Btrfs: Throttle for async bio submits higher up the chain The current code waits for the count of async bio submits to get below a given threshold if it is too high right after adding the latest bio to the work queue. This isn't optimal because the caller may have sequential adjacent bios pending they are waiting to send down the pipe. This changeset requires the caller to wait on the async bio count, and changes the async checksumming submits to wait for async bios any time they self throttle. The end result is much higher sequential throughput. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bbba14b629d2..6a218f792e59 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -487,9 +487,15 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, atomic_inc(&fs_info->nr_async_submits); btrfs_queue_worker(&fs_info->workers, &async->work); - wait_event_timeout(fs_info->async_submit_wait, + if (atomic_read(&fs_info->nr_async_submits) > limit) { + wait_event_timeout(fs_info->async_submit_wait, (atomic_read(&fs_info->nr_async_submits) < limit), HZ/10); + + wait_event_timeout(fs_info->async_submit_wait, + (atomic_read(&fs_info->nr_async_bios) < limit), + HZ/10); + } return 0; } -- cgit v1.2.3 From a1b32a5932cfac7c38b442582285f3da2a09dfd8 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 5 Sep 2008 16:09:51 -0400 Subject: Btrfs: Add debugging checks to track down corrupted metadata Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6a218f792e59..8e7a938bfbc7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -250,7 +250,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) return ret; - +printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror_num); num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); if (num_copies == 1) @@ -348,6 +348,9 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, found_start = btrfs_header_bytenr(eb); if (found_start != start) { + printk("bad tree block start %llu %llu\n", + (unsigned long long)found_start, + (unsigned long long)eb->start); ret = -EIO; goto err; } @@ -709,6 +712,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, if (ret == 0) { buf->flags |= EXTENT_UPTODATE; + } else { + WARN_ON(1); } return buf; -- cgit v1.2.3 From e02119d5a7b4396c5a872582fddc8bd6d305a70a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 5 Sep 2008 16:13:11 -0400 Subject: Btrfs: Add a write ahead tree log to optimize synchronous operations File syncs and directory syncs are optimized by copying their items into a special (copy-on-write) log tree. There is one log tree per subvolume and the btrfs super block points to a tree of log tree roots. After a crash, items are copied out of the log tree and back into the subvolume. See tree-log.c for all the details. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 138 +++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 117 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8e7a938bfbc7..a4373db5967a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -41,6 +41,7 @@ #include "async-thread.h" #include "locking.h" #include "ref-cache.h" +#include "tree-log.h" #if 0 static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) @@ -694,6 +695,18 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, } +int btrfs_write_tree_block(struct extent_buffer *buf) +{ + return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, + buf->start + buf->len - 1, WB_SYNC_NONE); +} + +int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) +{ + return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, + buf->start, buf->start + buf->len -1); +} + struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, u64 parent_transid) { @@ -732,15 +745,6 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, return 0; } -int wait_on_tree_block_writeback(struct btrfs_root *root, - struct extent_buffer *buf) -{ - struct inode *btree_inode = root->fs_info->btree_inode; - wait_on_extent_buffer_writeback(&BTRFS_I(btree_inode)->io_tree, - buf); - return 0; -} - static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, u32 stripesize, struct btrfs_root *root, struct btrfs_fs_info *fs_info, @@ -771,6 +775,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->node_lock); spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); + mutex_init(&root->log_mutex); btrfs_leaf_ref_tree_init(&root->ref_tree_struct); root->ref_tree = &root->ref_tree_struct; @@ -809,11 +814,74 @@ static int find_and_setup_root(struct btrfs_root *tree_root, return 0; } -struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_fs_info *fs_info, - struct btrfs_key *location) +int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct extent_buffer *eb; + int ret; + + if (!fs_info->log_root_tree) + return 0; + + eb = fs_info->log_root_tree->node; + + WARN_ON(btrfs_header_level(eb) != 0); + WARN_ON(btrfs_header_nritems(eb) != 0); + + ret = btrfs_free_extent(trans, fs_info->tree_root, + eb->start, eb->len, + BTRFS_TREE_LOG_OBJECTID, 0, 0, 0, 1); + BUG_ON(ret); + + free_extent_buffer(eb); + kfree(fs_info->log_root_tree); + fs_info->log_root_tree = NULL; + return 0; +} + +int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; + + root = kzalloc(sizeof(*root), GFP_NOFS); + if (!root) + return -ENOMEM; + + __setup_root(tree_root->nodesize, tree_root->leafsize, + tree_root->sectorsize, tree_root->stripesize, + root, fs_info, BTRFS_TREE_LOG_OBJECTID); + + root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; + root->root_key.type = BTRFS_ROOT_ITEM_KEY; + root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; + root->ref_cows = 0; + + root->node = btrfs_alloc_free_block(trans, root, root->leafsize, + BTRFS_TREE_LOG_OBJECTID, + 0, 0, 0, 0, 0); + + btrfs_set_header_nritems(root->node, 0); + btrfs_set_header_level(root->node, 0); + btrfs_set_header_bytenr(root->node, root->node->start); + btrfs_set_header_generation(root->node, trans->transid); + btrfs_set_header_owner(root->node, BTRFS_TREE_LOG_OBJECTID); + + write_extent_buffer(root->node, root->fs_info->fsid, + (unsigned long)btrfs_header_fsid(root->node), + BTRFS_FSID_SIZE); + btrfs_mark_buffer_dirty(root->node); + btrfs_tree_unlock(root->node); + fs_info->log_root_tree = root; + return 0; +} + +struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, + struct btrfs_key *location) +{ + struct btrfs_root *root; + struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_path *path; struct extent_buffer *l; u64 highest_inode; @@ -863,11 +931,13 @@ out: blocksize, 0); BUG_ON(!root->node); insert: - root->ref_cows = 1; - ret = btrfs_find_highest_inode(root, &highest_inode); - if (ret == 0) { - root->highest_inode = highest_inode; - root->last_inode_alloc = highest_inode; + if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { + root->ref_cows = 1; + ret = btrfs_find_highest_inode(root, &highest_inode); + if (ret == 0) { + root->highest_inode = highest_inode; + root->last_inode_alloc = highest_inode; + } } return root; } @@ -907,7 +977,7 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, if (root) return root; - root = btrfs_read_fs_root_no_radix(fs_info, location); + root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); if (IS_ERR(root)) return root; ret = radix_tree_insert(&fs_info->fs_roots_radix, @@ -1250,16 +1320,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 blocksize; u32 stripesize; struct buffer_head *bh; - struct btrfs_root *extent_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *tree_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), GFP_NOFS); - struct btrfs_root *chunk_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); - struct btrfs_root *dev_root = kmalloc(sizeof(struct btrfs_root), + struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + struct btrfs_root *log_tree_root; + int ret; int err = -EINVAL; @@ -1343,6 +1415,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); mutex_init(&fs_info->trans_mutex); + mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->drop_mutex); mutex_init(&fs_info->alloc_mutex); mutex_init(&fs_info->chunk_mutex); @@ -1352,6 +1425,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->async_submit_wait); + init_waitqueue_head(&fs_info->tree_log_wait); + atomic_set(&fs_info->tree_log_commit, 0); + atomic_set(&fs_info->tree_log_writers, 0); + fs_info->tree_log_transid = 0; #if 0 ret = add_hasher(fs_info, "crc32c"); @@ -1532,7 +1609,26 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!fs_info->transaction_kthread) goto fail_cleaner; + if (btrfs_super_log_root(disk_super) != 0) { + u32 blocksize; + u64 bytenr = btrfs_super_log_root(disk_super); + + blocksize = + btrfs_level_size(tree_root, + btrfs_super_log_root_level(disk_super)); + log_tree_root = kzalloc(sizeof(struct btrfs_root), + GFP_NOFS); + + __setup_root(nodesize, leafsize, sectorsize, stripesize, + log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); + + log_tree_root->node = read_tree_block(tree_root, bytenr, + blocksize, 0); + ret = btrfs_recover_log_trees(log_tree_root); + BUG_ON(ret); + } + fs_info->last_trans_committed = btrfs_super_generation(disk_super); return tree_root; fail_cleaner: -- cgit v1.2.3 From 4bef084857ab8fe71cf49eae349c25e440a49150 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 8 Sep 2008 11:18:08 -0400 Subject: Btrfs: Tree logging fixes * Pin down data blocks to prevent them from being reallocated like so: trans 1: allocate file extent trans 2: free file extent trans 3: free file extent during old snapshot deletion trans 3: allocate file extent to new file trans 3: fsync new file Before the tree logging code, this was legal because the fsync would commit the transation that did the final data extent free and the transaction that allocated the extent to the new file at the same time. With the tree logging code, the tree log subtransaction can commit before the transaction that freed the extent. If we crash, we're left with two different files using the extent. * Don't wait in start_transaction if log replay is going on. This avoids deadlocks from iput while we're cleaning up link counts in the replay code. * Don't deadlock in replay_one_name by trying to read an inode off the disk while holding paths for the directory * Hold the buffer lock while we mark a buffer as written. This closes a race where someone is changing a buffer while we write it. They are supposed to mark it dirty again after they change it, but this violates the cow rules. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a4373db5967a..42bf99168056 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -307,9 +307,7 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) goto err; } found_level = btrfs_header_level(eb); - spin_lock(&root->fs_info->hash_lock); - btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - spin_unlock(&root->fs_info->hash_lock); + csum_tree_block(root, eb, 0); err: free_extent_buffer(eb); @@ -1998,7 +1996,36 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) return ret; } +int btree_lock_page_hook(struct page *page) +{ + struct inode *inode = page->mapping->host; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_buffer *eb; + unsigned long len; + u64 bytenr = page_offset(page); + + if (page->private == EXTENT_PAGE_PRIVATE) + goto out; + + len = page->private >> 2; + eb = find_extent_buffer(io_tree, bytenr, len, GFP_NOFS); + if (!eb) + goto out; + + btrfs_tree_lock(eb); + spin_lock(&root->fs_info->hash_lock); + btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); + spin_unlock(&root->fs_info->hash_lock); + btrfs_tree_unlock(eb); + free_extent_buffer(eb); +out: + lock_page(page); + return 0; +} + static struct extent_io_ops btree_extent_io_ops = { + .write_cache_pages_lock_hook = btree_lock_page_hook, .writepage_io_hook = btree_writepage_io_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, .submit_bio_hook = btree_submit_bio_hook, -- cgit v1.2.3 From 98509cfc5a6857bddcfe4b19a9539726655ec9bd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 15:51:43 -0400 Subject: Btrfs: Fix releasepage to properly keep dirty and writeback pages Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 42bf99168056..ecb74b720262 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -346,7 +346,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); found_start = btrfs_header_bytenr(eb); - if (found_start != start) { + if (0 && found_start != start) { printk("bad tree block start %llu %llu\n", (unsigned long long)found_start, (unsigned long long)eb->start); @@ -592,6 +592,9 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) struct extent_map_tree *map; int ret; + if (PageWriteback(page) || PageDirty(page)) + return 0; + tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; -- cgit v1.2.3 From 3a5f1d458ad1610a06e38f0be2fbc6ac215439c0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 15:53:37 -0400 Subject: Btrfs: Optimize btree walking while logging inodes Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ecb74b720262..7c06eb4ecfdd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1497,7 +1497,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, * low idle thresh */ fs_info->endio_workers.idle_thresh = 4; - fs_info->endio_write_workers.idle_thresh = 4; + fs_info->endio_write_workers.idle_thresh = 64; btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); -- cgit v1.2.3 From d00aff00139b40f2e9c60299d76aac29d72e48ba Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 15:54:42 -0400 Subject: Btrfs: Optimize tree log block allocations Since tree log blocks get freed every transaction, they never really need to be written to disk. This skips the step where we update metadata to record they were allocated. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7c06eb4ecfdd..5edb7f885799 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -829,9 +829,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_level(eb) != 0); WARN_ON(btrfs_header_nritems(eb) != 0); - ret = btrfs_free_extent(trans, fs_info->tree_root, - eb->start, eb->len, - BTRFS_TREE_LOG_OBJECTID, 0, 0, 0, 1); + ret = btrfs_free_reserved_extent(fs_info->tree_root, + eb->start, eb->len); BUG_ON(ret); free_extent_buffer(eb); -- cgit v1.2.3 From d0c803c4049c5ca322d4795d8b74f28768603e0e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Sep 2008 16:17:57 -0400 Subject: Btrfs: Record dirty pages tree-log pages in an extent_io tree This is the same way the transaction code makes sure that all the other tree blocks are safely on disk. There's an extent_io tree for each root, and any blocks allocated to the tree logs are recorded in that tree. At tree-log sync, the extent_io tree is walked to flush down the dirty pages and wait for them. The main benefit is less time spent walking the tree log and skipping clean pages, and getting sequential IO down to the drive. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5edb7f885799..57fbf107e59f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -777,6 +777,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); + extent_io_tree_init(&root->dirty_log_pages, + fs_info->btree_inode->i_mapping, GFP_NOFS); btrfs_leaf_ref_tree_init(&root->ref_tree_struct); root->ref_tree = &root->ref_tree_struct; @@ -819,11 +821,23 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct extent_buffer *eb; + struct btrfs_root *log_root_tree = fs_info->log_root_tree; + u64 start = 0; + u64 end = 0; int ret; - if (!fs_info->log_root_tree) + if (!log_root_tree) return 0; + while(1) { + ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, + 0, &start, &end, EXTENT_DIRTY); + if (ret) + break; + + clear_extent_dirty(&log_root_tree->dirty_log_pages, + start, end, GFP_NOFS); + } eb = fs_info->log_root_tree->node; WARN_ON(btrfs_header_level(eb) != 0); @@ -1412,7 +1426,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); insert_inode_hash(fs_info->btree_inode); - mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->tree_log_mutex); -- cgit v1.2.3 From 23a07867b78ee0f33b01466e52bb608c336b26ee Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 12 Sep 2008 08:57:47 -0400 Subject: Btrfs: Fix mismerge in block header checks I had incorrectly disabled the check for the block number being correct in the header block. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 57fbf107e59f..f6f7821d43a5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -346,7 +346,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); found_start = btrfs_header_bytenr(eb); - if (0 && found_start != start) { + if (found_start != start) { printk("bad tree block start %llu %llu\n", (unsigned long long)found_start, (unsigned long long)eb->start); -- cgit v1.2.3 From 0f9dd46cda36b8de3b9f48bc42bd09d20b9c3b52 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 23 Sep 2008 13:14:11 -0400 Subject: Btrfs: free space accounting redo 1) replace the per fs_info extent_io_tree that tracked free space with two rb-trees per block group to track free space areas via offset and size. The reason to do this is because most allocations come with a hint byte where to start, so we can usually find a chunk of free space at that hint byte to satisfy the allocation and get good space packing. If we cannot find free space at or after the given offset we fall back on looking for a chunk of the given size as close to that given offset as possible. When we fall back on the size search we also try to find a slot as close to the size we want as possible, to avoid breaking small chunks off of huge areas if possible. 2) remove the extent_io_tree that tracked the block group cache from fs_info and replaced it with an rb-tree thats tracks block group cache via offset. also added a per space_info list that tracks the block group cache for the particular space so we can lookup related block groups easily. 3) cleaned up the allocation code to make it a little easier to read and a little less complicated. Basically there are 3 steps, first look from our provided hint. If we couldn't find from that given hint, start back at our original search start and look for space from there. If that fails try to allocate space if we can and start looking again. If not we're screwed and need to start over again. 4) small fixes. there were some issues in volumes.c where we wouldn't allocate the rest of the disk. fixed cow_file_range to actually pass the alloc_hint, which has helped a good bit in making the fs_mark test I run have semi-normal results as we run out of space. Generally with data allocations we don't track where we last allocated from, so everytime we did a data allocation we'd search through every block group that we have looking for free space. Now searching a block group with no free space isn't terribly time consuming, it was causing a slight degradation as we got more data block groups. The alloc_hint has fixed this slight degredation and made things semi-normal. There is still one nagging problem I'm working on where we will get ENOSPC when there is definitely plenty of space. This only happens with metadata allocations, and only when we are almost full. So you generally hit the 85% mark first, but sometimes you'll hit the BUG before you hit the 85% wall. I'm still tracking it down, but until then this seems to be pretty stable and make a significant performance gain. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f6f7821d43a5..535bd0fe1a71 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1410,10 +1410,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; - extent_io_tree_init(&fs_info->free_space_cache, - fs_info->btree_inode->i_mapping, GFP_NOFS); - extent_io_tree_init(&fs_info->block_group_cache, - fs_info->btree_inode->i_mapping, GFP_NOFS); + spin_lock_init(&fs_info->block_group_cache_lock); + fs_info->block_group_cache_tree.rb_node = NULL; + extent_io_tree_init(&fs_info->pinned_extents, fs_info->btree_inode->i_mapping, GFP_NOFS); extent_io_tree_init(&fs_info->pending_del, -- cgit v1.2.3 From ce3ed71a58094544da1e1dc4723f1f553bb5b2d5 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 23 Sep 2008 13:14:12 -0400 Subject: Btrfs: Checksum tree blocks in the background Tree blocks were using async bio submission, but the sum was still being done directly during writepage. This moves the checksumming into the worker thread. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 535bd0fe1a71..25be96946a2f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -315,14 +315,6 @@ out: return 0; } -static int btree_writepage_io_hook(struct page *page, u64 start, u64 end) -{ - struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; - - csum_dirty_buffer(root, page); - return 0; -} - int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -501,6 +493,22 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, return 0; } +static int btree_csum_one_bio(struct bio *bio) +{ + struct bio_vec *bvec = bio->bi_io_vec; + int bio_index = 0; + struct btrfs_root *root; + + WARN_ON(bio->bi_vcnt <= 0); + while(bio_index < bio->bi_vcnt) { + root = BTRFS_I(bvec->bv_page->mapping->host)->root; + csum_dirty_buffer(root, bvec->bv_page); + bio_index++; + bvec++; + } + return 0; +} + static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { @@ -515,6 +523,7 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, * submission context. Just jump into btrfs_map_bio */ if (rw & (1 << BIO_RW)) { + btree_csum_one_bio(bio); return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); } @@ -2040,7 +2049,6 @@ out: static struct extent_io_ops btree_extent_io_ops = { .write_cache_pages_lock_hook = btree_lock_page_hook, - .writepage_io_hook = btree_writepage_io_hook, .readpage_end_io_hook = btree_readpage_end_io_hook, .submit_bio_hook = btree_submit_bio_hook, /* note we're sharing with inode.c for the merge bio hook */ -- cgit v1.2.3 From 31840ae1a6b433ca0e6a8d341756ff478bbf959e Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Tue, 23 Sep 2008 13:14:14 -0400 Subject: Btrfs: Full back reference support This patch makes the back reference system to explicit record the location of parent node for all types of extents. The location of parent node is placed into the offset field of backref key. Every time a tree block is balanced, the back references for the affected lower level extents are updated. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 25be96946a2f..d35ca6a3f513 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -882,8 +882,8 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, root->ref_cows = 0; root->node = btrfs_alloc_free_block(trans, root, root->leafsize, - BTRFS_TREE_LOG_OBJECTID, - 0, 0, 0, 0, 0); + 0, BTRFS_TREE_LOG_OBJECTID, + trans->transid, 0, 0, 0); btrfs_set_header_nritems(root->node, 0); btrfs_set_header_level(root->node, 0); -- cgit v1.2.3 From 2b1f55b0f0d0d1a66470ef4ea2696cd5dd741a12 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 24 Sep 2008 11:48:04 -0400 Subject: Remove Btrfs compat code for older kernels Btrfs had compatibility code for kernels back to 2.6.18. These have been removed, and will be maintained in a separate backport git tree from now on. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d35ca6a3f513..dffb8dabd533 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -26,11 +26,7 @@ #include // for block_sync_page #include #include -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) # include -#else -# include -#endif #include "crc32c.h" #include "ctree.h" #include "disk-io.h" @@ -373,21 +369,11 @@ out: return ret; } -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) static void end_workqueue_bio(struct bio *bio, int err) -#else -static int end_workqueue_bio(struct bio *bio, - unsigned int bytes_done, int err) -#endif { struct end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - if (bio->bi_size) - return 1; -#endif - fs_info = end_io_wq->info; end_io_wq->error = err; end_io_wq->work.func = end_workqueue_fn; @@ -397,10 +383,6 @@ static int end_workqueue_bio(struct bio *bio, &end_io_wq->work); else btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - return 0; -#endif } int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, @@ -1161,9 +1143,7 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_init(bdi); -#endif bdi->ra_pages = default_backing_dev_info.ra_pages; bdi->state = 0; bdi->capabilities = default_backing_dev_info.capabilities; @@ -1242,11 +1222,7 @@ static void end_workqueue_fn(struct btrfs_work *work) bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; kfree(end_io_wq); -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) - bio_endio(bio, bio->bi_size, error); -#else bio_endio(bio, error); -#endif } static int cleaner_kthread(void *arg) @@ -1673,9 +1649,7 @@ fail: kfree(extent_root); kfree(tree_root); -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); -#endif kfree(fs_info); return ERR_PTR(err); } @@ -1936,9 +1910,7 @@ int close_ctree(struct btrfs_root *root) btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) bdi_destroy(&fs_info->bdi); -#endif kfree(fs_info->extent_root); kfree(fs_info->tree_root); -- cgit v1.2.3 From 24ab9cd85c11bccacbd0cce7f8e1aebd4930404c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 24 Sep 2008 14:51:30 -0400 Subject: Btrfs: Raise thresholds for metadata writeback Btrfs metadata writeback is fairly expensive. Once a tree block is written it must be cowed before it can be changed again. The btree writepages code has a threshold based on a count of dirty btree bytes which is updated as IO is sent out. This changes btree_writepages to skip the writeout if there are less than 32MB of dirty bytes from the btrees, improving performance across many workloads. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dffb8dabd533..71e81f3a765b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -556,7 +556,7 @@ static int btree_writepages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_NONE) { u64 num_dirty; u64 start = 0; - unsigned long thresh = 8 * 1024 * 1024; + unsigned long thresh = 32 * 1024 * 1024; if (wbc->for_kupdate) return 0; @@ -690,7 +690,7 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, int btrfs_write_tree_block(struct extent_buffer *buf) { return btrfs_fdatawrite_range(buf->first_page->mapping, buf->start, - buf->start + buf->len - 1, WB_SYNC_NONE); + buf->start + buf->len - 1, WB_SYNC_ALL); } int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) -- cgit v1.2.3 From e465768938f95388723b0fd3c50a0ae48173edb9 Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 26 Sep 2008 10:04:53 -0400 Subject: Btrfs: Add shared reference cache Btrfs has a cache of reference counts in leaves, allowing it to avoid reading tree leaves while deleting snapshots. To reduce contention with multiple subvolumes, this cache is private to each subvolume. This patch adds shared reference cache support. The new space balancing code plays with multiple subvols at the same time, So the old per-subvol reference cache is not well suited. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 71e81f3a765b..8969fee23318 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1406,6 +1406,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; + btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); + BTRFS_I(fs_info->btree_inode)->root = tree_root; memset(&BTRFS_I(fs_info->btree_inode)->location, 0, sizeof(struct btrfs_key)); -- cgit v1.2.3 From 1a40e23b95da45051ee4d74374c58ae87a14051c Mon Sep 17 00:00:00 2001 From: Zheng Yan Date: Fri, 26 Sep 2008 10:09:34 -0400 Subject: Btrfs: update space balancing code This patch updates the space balancing code to utilize the new backref format. Before, btrfs-vol -b would break any COW links on data blocks or metadata. This was slow and caused the amount of space used to explode if a large number of snapshots were present. The new code can keeps the sharing of all data extents and most of the tree blocks. To maintain the sharing of data extents, the space balance code uses a seperate inode hold data extent pointers, then updates the references to point to the new location. To maintain the sharing of tree blocks, the space balance code uses reloc trees to relocate tree blocks in reference counted roots. There is one reloc tree for each subvol, and all reloc trees share same root key objectid. Reloc trees are snapshots of the latest committed roots of subvols (root->commit_root). To relocate a tree block referenced by a subvol, there are two steps. COW the block through subvol's reloc tree, then update block pointer in the subvol to point to the new block. Since all reloc trees share same root key objectid, doing special handing for tree blocks owned by them is easy. Once a tree block has been COWed in one reloc tree, we can use the resulting new block directly when the same block is required to COW again through other reloc trees. In this way, relocated tree blocks are shared between reloc trees, so they are also shared between subvols. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8969fee23318..45bc3132b054 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1406,6 +1406,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; + extent_io_tree_init(&fs_info->reloc_mapping_tree, + fs_info->btree_inode->i_mapping, GFP_NOFS); + INIT_LIST_HEAD(&fs_info->dead_reloc_roots); + btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); BTRFS_I(fs_info->btree_inode)->root = tree_root; @@ -1421,6 +1425,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); + mutex_init(&fs_info->tree_reloc_mutex); init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->async_submit_wait); @@ -1627,6 +1632,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_recover_log_trees(log_tree_root); BUG_ON(ret); } + + ret = btrfs_cleanup_reloc_trees(tree_root); + BUG_ON(ret); + fs_info->last_trans_committed = btrfs_super_generation(disk_super); return tree_root; -- cgit v1.2.3 From 8c8bee1d7ca47fc75b6bd24a8085c525a2394c02 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 29 Sep 2008 11:19:10 -0400 Subject: Btrfs: Wait for IO on the block device inodes of newly added devices btrfs-vol -a /dev/xxx will zero the first and last two MB of the device. The kernel code needs to wait for this IO to finish before it adds the device. btrfs metadata IO does not happen through the block device inode. A separate address space is used, allowing the zero filled buffer heads in the block device inode to be written to disk after FS metadata starts going down to the disk via the btrfs metadata inode. The end result is zero filled metadata blocks after adding new devices into the filesystem. The fix is a simple filemap_write_and_wait on the block device inode before actually inserting it into the pool of available devices. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 45bc3132b054..45b4f7285275 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -460,6 +460,13 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->submit_bio_hook = submit_bio_hook; async->work.func = run_one_async_submit; async->work.flags = 0; + + while(atomic_read(&fs_info->async_submit_draining) && + atomic_read(&fs_info->nr_async_submits)) { + wait_event(fs_info->async_submit_wait, + (atomic_read(&fs_info->nr_async_submits) == 0)); + } + atomic_inc(&fs_info->nr_async_submits); btrfs_queue_worker(&fs_info->workers, &async->work); @@ -495,11 +502,8 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 offset; int ret; - offset = bio->bi_sector << 9; - /* * when we're called for a write, we're already in the async * submission context. Just jump into btrfs_map_bio @@ -1360,6 +1364,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); + atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->throttles, 0); atomic_set(&fs_info->throttle_gen, 0); -- cgit v1.2.3 From d352ac68148b69937d39ca5d48bcc4478e118dbf Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 29 Sep 2008 15:18:18 -0400 Subject: Btrfs: add and improve comments This improves the comments at the top of many functions. It didn't dive into the guts of functions because I was trying to avoid merging problems with the new allocator and back reference work. extent-tree.c and volumes.c were both skipped, and there is definitely more work todo in cleaning and commenting the code. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 45b4f7285275..5ee10d3136f5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -55,6 +55,11 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); +/* + * end_io_wq structs are used to do processing in task context when an IO is + * complete. This is used during reads to verify checksums, and it is used + * by writes to insert metadata for new file extents after IO is complete. + */ struct end_io_wq { struct bio *bio; bio_end_io_t *end_io; @@ -66,6 +71,11 @@ struct end_io_wq { struct btrfs_work work; }; +/* + * async submit bios are used to offload expensive checksumming + * onto the worker threads. They checksum file and metadata bios + * just before they are sent down the IO stack. + */ struct async_submit_bio { struct inode *inode; struct bio *bio; @@ -76,6 +86,10 @@ struct async_submit_bio { struct btrfs_work work; }; +/* + * extents on the btree inode are pretty simple, there's one extent + * that covers the entire device + */ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, size_t page_offset, u64 start, u64 len, int create) @@ -151,6 +165,10 @@ void btrfs_csum_final(u32 crc, char *result) *(__le32 *)result = ~cpu_to_le32(crc); } +/* + * compute the csum for a btree block, and either verify it or write it + * into the csum field of the block. + */ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { @@ -204,6 +222,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, return 0; } +/* + * we can't consider a given block up to date unless the transid of the + * block matches the transid in the parent node's pointer. This is how we + * detect blocks that either didn't get written at all or got written + * in the wrong place. + */ static int verify_parent_transid(struct extent_io_tree *io_tree, struct extent_buffer *eb, u64 parent_transid) { @@ -228,9 +252,12 @@ out: unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); return ret; - } +/* + * helper to read a given tree block, doing retries as required when + * the checksums don't match and we have alternate mirrors to try. + */ static int btree_read_extent_buffer_pages(struct btrfs_root *root, struct extent_buffer *eb, u64 start, u64 parent_transid) @@ -260,6 +287,10 @@ printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror return -EIO; } +/* + * checksum a dirty tree block before IO. This has extra checks to make + * sure we only fill in the checksum field in the first page of a multi-page block + */ int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_io_tree *tree; -- cgit v1.2.3 From 0463bb4e8d2f717a9bf3be6cc12c0aec51cc261d Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Wed, 1 Oct 2008 19:09:04 -0400 Subject: Btrfs: disk-io.c (open_ctree): Don't deref. NULL upon failed kzalloc Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5ee10d3136f5..41b7d24d07e2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1371,7 +1371,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_super_block *disk_super; - if (!extent_root || !tree_root || !fs_info) { + if (!extent_root || !tree_root || !fs_info || + !chunk_root || !dev_root) { err = -ENOMEM; goto fail; } -- cgit v1.2.3 From 83afeac42c5680b0b70d64fb8c4724cf05483fc2 Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Wed, 1 Oct 2008 19:09:51 -0400 Subject: Btrfs: disk-io.c (open_ctree): avoid leaks upon allocation failure Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 41b7d24d07e2..0be044bb6194 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1699,6 +1699,8 @@ fail: kfree(tree_root); bdi_destroy(&fs_info->bdi); kfree(fs_info); + kfree(chunk_root); + kfree(dev_root); return ERR_PTR(err); } -- cgit v1.2.3 From c8b978188c9a0fd3d535c13debd19d522b726f1f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 29 Oct 2008 14:49:59 -0400 Subject: Btrfs: Add zlib compression support This is a large change for adding compression on reading and writing, both for inline and regular extents. It does some fairly large surgery to the writeback paths. Compression is off by default and enabled by mount -o compress. Even when the -o compress mount option is not used, it is possible to read compressed extents off the disk. If compression for a given set of pages fails to make them smaller, the file is flagged to avoid future compression attempts later. * While finding delalloc extents, the pages are locked before being sent down to the delalloc handler. This allows the delalloc handler to do complex things such as cleaning the pages, marking them writeback and starting IO on their behalf. * Inline extents are inserted at delalloc time now. This allows us to compress the data before inserting the inline extent, and it allows us to insert an inline extent that spans multiple pages. * All of the in-memory extent representations (extent_map.c, ordered-data.c etc) are changed to record both an in-memory size and an on disk size, as well as a flag for compression. From a disk format point of view, the extent pointers in the file are changed to record the on disk size of a given extent and some encoding flags. Space in the disk format is allocated for compression encoding, as well as encryption and a generic 'other' field. Neither the encryption or the 'other' field are currently used. In order to limit the amount of data read for a single random read in the file, the size of a compressed extent is limited to 128k. This is a software only limit, the disk format supports u64 sized compressed extents. In order to limit the ram consumed while processing extents, the uncompressed size of a compressed extent is limited to 256k. This is a software only limit and will be subject to tuning later. Checksumming is still done on compressed extents, and it is done on the uncompressed version of the data. This way additional encodings can be layered on without having to figure out which encoding to checksum. Compression happens at delalloc time, which is basically singled threaded because it is usually done by a single pdflush thread. This makes it tricky to spread the compression load across all the cpus on the box. We'll have to look at parallel pdflush walks of dirty inodes at a later time. Decompression is hooked into readpages and it does spread across CPUs nicely. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0be044bb6194..dc95f636a11b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -83,6 +83,7 @@ struct async_submit_bio { extent_submit_bio_hook_t *submit_bio_hook; int rw; int mirror_num; + unsigned long bio_flags; struct btrfs_work work; }; @@ -115,6 +116,7 @@ struct extent_map *btree_get_extent(struct inode *inode, struct page *page, } em->start = 0; em->len = (u64)-1; + em->block_len = (u64)-1; em->block_start = 0; em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; @@ -469,12 +471,13 @@ static void run_one_async_submit(struct btrfs_work *work) wake_up(&fs_info->async_submit_wait); async->submit_bio_hook(async->inode, async->rw, async->bio, - async->mirror_num); + async->mirror_num, async->bio_flags); kfree(async); } int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, int rw, struct bio *bio, int mirror_num, + unsigned long bio_flags, extent_submit_bio_hook_t *submit_bio_hook) { struct async_submit_bio *async; @@ -491,6 +494,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->submit_bio_hook = submit_bio_hook; async->work.func = run_one_async_submit; async->work.flags = 0; + async->bio_flags = bio_flags; while(atomic_read(&fs_info->async_submit_draining) && atomic_read(&fs_info->nr_async_submits)) { @@ -530,7 +534,7 @@ static int btree_csum_one_bio(struct bio *bio) } static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, - int mirror_num) + int mirror_num, unsigned long bio_flags) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; @@ -556,17 +560,17 @@ static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, } static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, - int mirror_num) + int mirror_num, unsigned long bio_flags) { /* * kthread helpers are used to submit writes so that checksumming * can happen in parallel across all CPUs */ if (!(rw & (1 << BIO_RW))) { - return __btree_submit_bio_hook(inode, rw, bio, mirror_num); + return __btree_submit_bio_hook(inode, rw, bio, mirror_num, 0); } return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, - inode, rw, bio, mirror_num, + inode, rw, bio, mirror_num, 0, __btree_submit_bio_hook); } @@ -1407,6 +1411,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode = new_inode(sb); fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; + fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); INIT_LIST_HEAD(&fs_info->ordered_extents); @@ -1508,6 +1513,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, */ btrfs_init_workers(&fs_info->workers, "worker", fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->submit_workers, "submit", min_t(u64, fs_devices->num_devices, fs_info->thread_pool_size)); @@ -1559,6 +1565,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, } fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); + fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, + 4 * 1024 * 1024 / PAGE_CACHE_SIZE); nodesize = btrfs_super_nodesize(disk_super); leafsize = btrfs_super_leafsize(disk_super); -- cgit v1.2.3 From f82d02d9d8222183b7945e893111a6d1bf67ae4a Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 29 Oct 2008 14:49:05 -0400 Subject: Btrfs: Improve space balancing code This patch improves the space balancing code to keep more sharing of tree blocks. The only case that breaks sharing of tree blocks is data extents get fragmented during balancing. The main changes in this patch are: Add a 'drop sub-tree' function. This solves the problem in old code that BTRFS_HEADER_FLAG_WRITTEN check breaks sharing of tree block. Remove relocation mapping tree. Relocation mappings are stored in struct btrfs_ref_path and updated dynamically during walking up/down the reference path. This reduces CPU usage and simplifies code. This patch also fixes a bug. Root items for reloc trees should be updated in btrfs_free_reloc_root. Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dc95f636a11b..796256440dfa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1448,8 +1448,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; - extent_io_tree_init(&fs_info->reloc_mapping_tree, - fs_info->btree_inode->i_mapping, GFP_NOFS); INIT_LIST_HEAD(&fs_info->dead_reloc_roots); btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); -- cgit v1.2.3 From 2517920135b0d29e70453e5b03d70d7b94207df3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 29 Oct 2008 14:49:05 -0400 Subject: Btrfs: nuke fs wide allocation mutex V2 This patch removes the giant fs_info->alloc_mutex and replaces it with a bunch of little locks. There is now a pinned_mutex, which is used when messing with the pinned_extents extent io tree, and the extent_ins_mutex which is used with the pending_del and extent_ins extent io trees. The locking for the extent tree stuff was inspired by a patch that Yan Zheng wrote to fix a race condition, I cleaned it up some and changed the locking around a little bit, but the idea remains the same. Basically instead of holding the extent_ins_mutex throughout the processing of an extent on the extent_ins or pending_del trees, we just hold it while we're searching and when we clear the bits on those trees, and lock the extent for the duration of the operations on the extent. Also to keep from getting hung up waiting to lock an extent, I've added a try_lock_extent so if we cannot lock the extent, move on to the next one in the tree and we'll come back to that one. I have tested this heavily and it does not appear to break anything. This has to be applied on top of my find_free_extent redo patch. I tested this patch on top of Yan's space reblancing code and it worked fine. The only thing that has changed since the last version is I pulled out all my debugging stuff, apparently I forgot to run guilt refresh before I sent the last patch out. Thank you, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 796256440dfa..d1137d7ea8d4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1460,7 +1460,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->trans_mutex); mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->drop_mutex); - mutex_init(&fs_info->alloc_mutex); + mutex_init(&fs_info->extent_ins_mutex); + mutex_init(&fs_info->pinned_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); -- cgit v1.2.3 From 84234f3a1f7c532e4afeba03cc8e7e4a8a5277ea Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 29 Oct 2008 14:49:05 -0400 Subject: Btrfs: Add root tree pointer transaction ids This patch adds transaction IDs to root tree pointers. Transaction IDs in tree pointers are compared with the generation numbers in block headers when reading root blocks of trees. This can detect some types of IO errors. Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d1137d7ea8d4..94b4e50f6b2c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -832,6 +832,7 @@ static int find_and_setup_root(struct btrfs_root *tree_root, { int ret; u32 blocksize; + u64 generation; __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, @@ -840,9 +841,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root, &root->root_item, &root->root_key); BUG_ON(ret); + generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize, 0); + blocksize, generation); BUG_ON(!root->node); return 0; } @@ -929,6 +931,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, struct btrfs_path *path; struct extent_buffer *l; u64 highest_inode; + u64 generation; u32 blocksize; int ret = 0; @@ -970,9 +973,10 @@ out: kfree(root); return ERR_PTR(ret); } + generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize, 0); + blocksize, generation); BUG_ON(!root->node); insert: if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { @@ -1357,6 +1361,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 leafsize; u32 blocksize; u32 stripesize; + u64 generation; struct buffer_head *bh; struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -1596,13 +1601,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, blocksize = btrfs_level_size(tree_root, btrfs_super_chunk_root_level(disk_super)); + generation = btrfs_super_chunk_root_generation(disk_super); __setup_root(nodesize, leafsize, sectorsize, stripesize, chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), - blocksize, 0); + blocksize, generation); BUG_ON(!chunk_root->node); read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, @@ -1618,11 +1624,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, blocksize = btrfs_level_size(tree_root, btrfs_super_root_level(disk_super)); - + generation = btrfs_super_generation(disk_super); tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), - blocksize, 0); + blocksize, generation); if (!tree_root->node) goto fail_sb_buffer; @@ -1672,15 +1678,16 @@ struct btrfs_root *open_ctree(struct super_block *sb, log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); log_tree_root->node = read_tree_block(tree_root, bytenr, - blocksize, 0); + blocksize, + generation + 1); ret = btrfs_recover_log_trees(log_tree_root); BUG_ON(ret); } + fs_info->last_trans_committed = btrfs_super_generation(disk_super); ret = btrfs_cleanup_reloc_trees(tree_root); BUG_ON(ret); - fs_info->last_trans_committed = btrfs_super_generation(disk_super); return tree_root; fail_cleaner: -- cgit v1.2.3 From 4a69a41009c4ac691f7d9c289f5f37fabeddce46 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 6 Nov 2008 22:03:00 -0500 Subject: Btrfs: Add ordered async work queues Btrfs uses kernel threads to create async work queues for cpu intensive operations such as checksumming and decompression. These work well, but they make it difficult to keep IO order intact. A single writepages call from pdflush or fsync will turn into a number of bios, and each bio is checksummed in parallel. Once the checksum is computed, the bio is sent down to the disk, and since we don't control the order in which the parallel operations happen, they might go down to the disk in almost any order. The code deals with this somewhat by having deep work queues for a single kernel thread, making it very likely that a single thread will process all the bios for a single inode. This patch introduces an explicitly ordered work queue. As work structs are placed into the queue they are put onto the tail of a list. They have three callbacks: ->func (cpu intensive processing here) ->ordered_func (order sensitive processing here) ->ordered_free (free the work struct, all processing is done) The work struct has three callbacks. The func callback does the cpu intensive work, and when it completes the work struct is marked as done. Every time a work struct completes, the list is checked to see if the head is marked as done. If so the ordered_func callback is used to do the order sensitive processing and the ordered_free callback is used to do any cleanup. Then we loop back and check the head of the list again. This patch also changes the checksumming code to use the ordered workqueues. One a 4 drive array, it increases streaming writes from 280MB/s to 350MB/s. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 81 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 94b4e50f6b2c..e0a28f705a64 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -80,7 +80,8 @@ struct async_submit_bio { struct inode *inode; struct bio *bio; struct list_head list; - extent_submit_bio_hook_t *submit_bio_hook; + extent_submit_bio_hook_t *submit_bio_start; + extent_submit_bio_hook_t *submit_bio_done; int rw; int mirror_num; unsigned long bio_flags; @@ -452,7 +453,18 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone) btrfs_async_submit_limit(info); } -static void run_one_async_submit(struct btrfs_work *work) +static void run_one_async_start(struct btrfs_work *work) +{ + struct btrfs_fs_info *fs_info; + struct async_submit_bio *async; + + async = container_of(work, struct async_submit_bio, work); + fs_info = BTRFS_I(async->inode)->root->fs_info; + async->submit_bio_start(async->inode, async->rw, async->bio, + async->mirror_num, async->bio_flags); +} + +static void run_one_async_done(struct btrfs_work *work) { struct btrfs_fs_info *fs_info; struct async_submit_bio *async; @@ -470,15 +482,23 @@ static void run_one_async_submit(struct btrfs_work *work) waitqueue_active(&fs_info->async_submit_wait)) wake_up(&fs_info->async_submit_wait); - async->submit_bio_hook(async->inode, async->rw, async->bio, + async->submit_bio_done(async->inode, async->rw, async->bio, async->mirror_num, async->bio_flags); +} + +static void run_one_async_free(struct btrfs_work *work) +{ + struct async_submit_bio *async; + + async = container_of(work, struct async_submit_bio, work); kfree(async); } int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, int rw, struct bio *bio, int mirror_num, unsigned long bio_flags, - extent_submit_bio_hook_t *submit_bio_hook) + extent_submit_bio_hook_t *submit_bio_start, + extent_submit_bio_hook_t *submit_bio_done) { struct async_submit_bio *async; int limit = btrfs_async_submit_limit(fs_info); @@ -491,8 +511,13 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->rw = rw; async->bio = bio; async->mirror_num = mirror_num; - async->submit_bio_hook = submit_bio_hook; - async->work.func = run_one_async_submit; + async->submit_bio_start = submit_bio_start; + async->submit_bio_done = submit_bio_done; + + async->work.func = run_one_async_start; + async->work.ordered_func = run_one_async_done; + async->work.ordered_free = run_one_async_free; + async->work.flags = 0; async->bio_flags = bio_flags; @@ -533,29 +558,25 @@ static int btree_csum_one_bio(struct bio *bio) return 0; } -static int __btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, - int mirror_num, unsigned long bio_flags) +static int __btree_submit_bio_start(struct inode *inode, int rw, + struct bio *bio, int mirror_num, + unsigned long bio_flags) { - struct btrfs_root *root = BTRFS_I(inode)->root; - int ret; - /* * when we're called for a write, we're already in the async * submission context. Just jump into btrfs_map_bio */ - if (rw & (1 << BIO_RW)) { - btree_csum_one_bio(bio); - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, - mirror_num, 1); - } + btree_csum_one_bio(bio); + return 0; +} +static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, + int mirror_num, unsigned long bio_flags) +{ /* - * called for a read, do the setup so that checksum validation - * can happen in the async kernel threads + * when we're called for a write, we're already in the async + * submission context. Just jump into btrfs_map_bio */ - ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1); - BUG_ON(ret); - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); } @@ -567,11 +588,22 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, * can happen in parallel across all CPUs */ if (!(rw & (1 << BIO_RW))) { - return __btree_submit_bio_hook(inode, rw, bio, mirror_num, 0); + int ret; + /* + * called for a read, do the setup so that checksum validation + * can happen in the async kernel threads + */ + ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, + bio, 1); + BUG_ON(ret); + + return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, + mirror_num, 1); } return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, inode, rw, bio, mirror_num, 0, - __btree_submit_bio_hook); + __btree_submit_bio_start, + __btree_submit_bio_done); } static int btree_writepage(struct page *page, struct writeback_control *wbc) @@ -1534,7 +1566,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, * were sent by the writeback daemons, improving overall locality * of the IO going down the pipe. */ - fs_info->workers.idle_thresh = 128; + fs_info->workers.idle_thresh = 8; + fs_info->workers.ordered = 1; btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); btrfs_init_workers(&fs_info->endio_workers, "endio", -- cgit v1.2.3 From 771ed689d2cd53439e28e095bc38fbe40a71429e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 6 Nov 2008 22:02:51 -0500 Subject: Btrfs: Optimize compressed writeback and reads When reading compressed extents, try to put pages into the page cache for any pages covered by the compressed extent that readpages didn't already preload. Add an async work queue to handle transformations at delayed allocation processing time. Right now this is just compression. The workflow is: 1) Find offsets in the file marked for delayed allocation 2) Lock the pages 3) Lock the state bits 4) Call the async delalloc code The async delalloc code clears the state lock bits and delalloc bits. It is important this happens before the range goes into the work queue because otherwise it might deadlock with other work queue items that try to lock those extent bits. The file pages are compressed, and if the compression doesn't work the pages are written back directly. An ordered work queue is used to make sure the inodes are written in the same order that pdflush or writepages sent them down. This changes extent_write_cache_pages to let the writepage function update the wbc nr_written count. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0a28f705a64..8efc123d222b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -539,6 +539,13 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, (atomic_read(&fs_info->nr_async_bios) < limit), HZ/10); } + + while(atomic_read(&fs_info->async_submit_draining) && + atomic_read(&fs_info->nr_async_submits)) { + wait_event(fs_info->async_submit_wait, + (atomic_read(&fs_info->nr_async_submits) == 0)); + } + return 0; } @@ -1437,6 +1444,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); btrfs_mapping_init(&fs_info->mapping_tree); atomic_set(&fs_info->nr_async_submits, 0); + atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->throttles, 0); @@ -1550,6 +1558,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->workers, "worker", fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", + fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->submit_workers, "submit", min_t(u64, fs_devices->num_devices, fs_info->thread_pool_size)); @@ -1560,15 +1571,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, */ fs_info->submit_workers.idle_thresh = 64; - /* fs_info->workers is responsible for checksumming file data - * blocks and metadata. Using a larger idle thresh allows each - * worker thread to operate on things in roughly the order they - * were sent by the writeback daemons, improving overall locality - * of the IO going down the pipe. - */ - fs_info->workers.idle_thresh = 8; + fs_info->workers.idle_thresh = 16; fs_info->workers.ordered = 1; + fs_info->delalloc_workers.idle_thresh = 2; + fs_info->delalloc_workers.ordered = 1; + btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); btrfs_init_workers(&fs_info->endio_workers, "endio", fs_info->thread_pool_size); @@ -1584,6 +1592,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); + btrfs_start_workers(&fs_info->delalloc_workers, 1); btrfs_start_workers(&fs_info->fixup_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_start_workers(&fs_info->endio_write_workers, @@ -1732,6 +1741,7 @@ fail_tree_root: fail_sys_array: fail_sb_buffer: btrfs_stop_workers(&fs_info->fixup_workers); + btrfs_stop_workers(&fs_info->delalloc_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_write_workers); @@ -1988,6 +1998,7 @@ int close_ctree(struct btrfs_root *root) truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); btrfs_stop_workers(&fs_info->fixup_workers); + btrfs_stop_workers(&fs_info->delalloc_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_write_workers); @@ -2062,7 +2073,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) struct extent_io_tree *tree; u64 num_dirty; u64 start = 0; - unsigned long thresh = 96 * 1024 * 1024; + unsigned long thresh = 32 * 1024 * 1024; tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; if (current_is_pdflush() || current->flags & PF_MEMALLOC) -- cgit v1.2.3 From 240d5d482b69415b92ffce4b8dfca10799c9890b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 10 Nov 2008 13:08:31 -0500 Subject: Btrfs: tune btrfs unplug functions for a small number of devices When btrfs unplugs, it tries to find the correct device to unplug via search through the extent_map tree. This avoids unplugging a device that doesn't need it, but is a waste of time for filesystems with a small number of devices. This patch checks the total number of devices before doing the search. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8efc123d222b..1bb54d69fbb2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1201,6 +1201,16 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) return; inode = mapping->host; + + /* + * don't do the expensive searching for a small number of + * devices + */ + if (BTRFS_I(inode)->root->fs_info->fs_devices->open_devices <= 2) { + __unplug_io_fn(bdi, page); + return; + } + offset = page_offset(page); em_tree = &BTRFS_I(inode)->extent_tree; -- cgit v1.2.3 From 6f3577bdc768e6dae3c4d419e89b5a904f470728 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 13 Nov 2008 09:59:36 -0500 Subject: Btrfs: Improve metadata read latencies This fixes latency problems on metadata reads by making sure they don't go through the async submit queue, and by tuning down the amount of readahead done during btree searches. Also, the btrfs bdi congestion function is tuned to ignore the number of pending async bios and checksums pending. There is additional code that throttles new async bios now and the congestion function doesn't need to worry about it anymore. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1bb54d69fbb2..3b0e974a9e9c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -605,7 +605,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, BUG_ON(ret); return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, - mirror_num, 1); + mirror_num, 0); } return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, inode, rw, bio, mirror_num, 0, @@ -1139,11 +1139,11 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) struct list_head *cur; struct btrfs_device *device; struct backing_dev_info *bdi; - +#if 0 if ((bdi_bits & (1 << BDI_write_congested)) && btrfs_congested_async(info, 0)) return 1; - +#endif list_for_each(cur, &info->fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); if (!device->bdev) -- cgit v1.2.3 From c146afad2c7fea6a366d4945c1bab9b03880f526 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 12 Nov 2008 14:34:12 -0500 Subject: Btrfs: mount ro and remount support This patch adds mount ro and remount support. The main changes in patch are: adding btrfs_remount and related helper function; splitting the transaction related code out of close_ctree into btrfs_commit_super; updating allocator to properly handle read only block group. Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 83 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 63 insertions(+), 20 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3b0e974a9e9c..c599f0ee997a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1075,10 +1075,12 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } - ret = btrfs_find_dead_roots(fs_info->tree_root, - root->root_key.objectid, root); - BUG_ON(ret); - + if (!(fs_info->sb->s_flags & MS_RDONLY)) { + ret = btrfs_find_dead_roots(fs_info->tree_root, + root->root_key.objectid, root); + BUG_ON(ret); + btrfs_orphan_cleanup(root); + } return root; } @@ -1700,7 +1702,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_read_block_groups(extent_root); - fs_info->generation = btrfs_super_generation(disk_super) + 1; + fs_info->generation = generation + 1; + fs_info->last_trans_committed = generation; fs_info->data_alloc_profile = (u64)-1; fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; @@ -1715,6 +1718,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!fs_info->transaction_kthread) goto fail_cleaner; + if (sb->s_flags & MS_RDONLY) + return tree_root; + if (btrfs_super_log_root(disk_super) != 0) { u32 blocksize; u64 bytenr = btrfs_super_log_root(disk_super); @@ -1735,7 +1741,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_recover_log_trees(log_tree_root); BUG_ON(ret); } - fs_info->last_trans_committed = btrfs_super_generation(disk_super); ret = btrfs_cleanup_reloc_trees(tree_root); BUG_ON(ret); @@ -1955,28 +1960,69 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) return 0; } -int close_ctree(struct btrfs_root *root) +int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) { + u64 root_objectid = 0; + struct btrfs_root *gang[8]; + int i; int ret; - struct btrfs_trans_handle *trans; - struct btrfs_fs_info *fs_info = root->fs_info; - fs_info->closing = 1; - smp_mb(); + while (1) { + ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, + (void **)gang, root_objectid, + ARRAY_SIZE(gang)); + if (!ret) + break; + for (i = 0; i < ret; i++) { + root_objectid = gang[i]->root_key.objectid; + ret = btrfs_find_dead_roots(fs_info->tree_root, + root_objectid, gang[i]); + BUG_ON(ret); + btrfs_orphan_cleanup(gang[i]); + } + root_objectid++; + } + return 0; +} - kthread_stop(root->fs_info->transaction_kthread); - kthread_stop(root->fs_info->cleaner_kthread); +int btrfs_commit_super(struct btrfs_root *root) +{ + struct btrfs_trans_handle *trans; + int ret; + mutex_lock(&root->fs_info->cleaner_mutex); btrfs_clean_old_snapshots(root); + mutex_unlock(&root->fs_info->cleaner_mutex); trans = btrfs_start_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); - /* run commit again to drop the original snapshot */ + BUG_ON(ret); + /* run commit again to drop the original snapshot */ trans = btrfs_start_transaction(root, 1); btrfs_commit_transaction(trans, root); ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); - write_ctree_super(NULL, root); + ret = write_ctree_super(NULL, root); + return ret; +} + +int close_ctree(struct btrfs_root *root) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + int ret; + + fs_info->closing = 1; + smp_mb(); + + kthread_stop(root->fs_info->transaction_kthread); + kthread_stop(root->fs_info->cleaner_kthread); + + if (!(fs_info->sb->s_flags & MS_RDONLY)) { + ret = btrfs_commit_super(root); + if (ret) { + printk("btrfs: commit super returns %d\n", ret); + } + } if (fs_info->delalloc_bytes) { printk("btrfs: at unmount delalloc count %Lu\n", @@ -2000,12 +2046,10 @@ int close_ctree(struct btrfs_root *root) free_extent_buffer(root->fs_info->dev_root->node); btrfs_free_block_groups(root->fs_info); - fs_info->closing = 2; - del_fs_roots(fs_info); - filemap_write_and_wait(fs_info->btree_inode->i_mapping); + del_fs_roots(fs_info); - truncate_inode_pages(fs_info->btree_inode->i_mapping, 0); + iput(fs_info->btree_inode); btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->delalloc_workers); @@ -2014,7 +2058,6 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); - iput(fs_info->btree_inode); #if 0 while(!list_empty(&fs_info->hashers)) { struct btrfs_hasher *hasher; -- cgit v1.2.3 From 2b82032c34ec40515d3c45c36cd1961f37977de8 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 17 Nov 2008 21:11:30 -0500 Subject: Btrfs: Seed device support Seed device is a special btrfs with SEEDING super flag set and can only be mounted in read-only mode. Seed devices allow people to create new btrfs on top of it. The new FS contains the same contents as the seed device, but it can be mounted in read-write mode. This patch does the following: 1) split code in btrfs_alloc_chunk into two parts. The first part does makes the newly allocated chunk usable, but does not do any operation that modifies the chunk tree. The second part does the the chunk tree modifications. This division is for the bootstrap step of adding storage to the seed device. 2) Update device management code to handle seed device. The basic idea is: For an FS grown from seed devices, its seed devices are put into a list. Seed devices are opened on demand at mounting time. If any seed device is missing or has been changed, btrfs kernel module will refuse to mount the FS. 3) make btrfs_find_block_group not return NULL when all block groups are read-only. Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 56 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 21 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c599f0ee997a..82833e5d84b6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -345,6 +345,25 @@ out: return 0; } +static int check_tree_block_fsid(struct btrfs_root *root, + struct extent_buffer *eb) +{ + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; + u8 fsid[BTRFS_UUID_SIZE]; + int ret = 1; + + read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb), + BTRFS_FSID_SIZE); + while (fs_devices) { + if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) { + ret = 0; + break; + } + fs_devices = fs_devices->seed; + } + return ret; +} + int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -382,9 +401,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, ret = -EIO; goto err; } - if (memcmp_extent_buffer(eb, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(eb), - BTRFS_FSID_SIZE)) { + if (check_tree_block_fsid(root, eb)) { printk("bad fsid on block %Lu\n", eb->start); ret = -EIO; goto err; @@ -1558,9 +1575,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!btrfs_super_root(disk_super)) goto fail_sb_buffer; - err = btrfs_parse_options(tree_root, options); - if (err) + ret = btrfs_parse_options(tree_root, options); + if (ret) { + err = ret; goto fail_sb_buffer; + } /* * we need to start all the end_io workers up front because the @@ -1610,18 +1629,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->endio_write_workers, fs_info->thread_pool_size); - err = -EINVAL; - if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) { - printk("Btrfs: wanted %llu devices, but found %llu\n", - (unsigned long long)btrfs_super_num_devices(disk_super), - (unsigned long long)fs_devices->open_devices); - if (btrfs_test_opt(tree_root, DEGRADED)) - printk("continuing in degraded mode\n"); - else { - goto fail_sb_buffer; - } - } - fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 4 * 1024 * 1024 / PAGE_CACHE_SIZE); @@ -1672,7 +1679,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_chunk_tree(chunk_root); mutex_unlock(&fs_info->chunk_mutex); - BUG_ON(ret); + if (ret) { + printk("btrfs: failed to read chunk tree on %s\n", sb->s_id); + goto fail_chunk_root; + } btrfs_close_extra_devices(fs_devices); @@ -1684,7 +1694,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_super_root(disk_super), blocksize, generation); if (!tree_root->node) - goto fail_sb_buffer; + goto fail_chunk_root; ret = find_and_setup_root(tree_root, fs_info, @@ -1753,6 +1763,8 @@ fail_extent_root: free_extent_buffer(extent_root->node); fail_tree_root: free_extent_buffer(tree_root->node); +fail_chunk_root: + free_extent_buffer(chunk_root->node); fail_sys_array: fail_sb_buffer: btrfs_stop_workers(&fs_info->fixup_workers); @@ -1823,9 +1835,10 @@ int write_all_supers(struct btrfs_root *root) total_errors++; continue; } - if (!dev->in_fs_metadata) + if (!dev->in_fs_metadata || !dev->writeable) continue; + btrfs_set_stack_device_generation(dev_item, 0); btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes); @@ -1834,6 +1847,7 @@ int write_all_supers(struct btrfs_root *root) btrfs_set_stack_device_io_width(dev_item, dev->io_width); btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); + memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); flags = btrfs_super_flags(sb); btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); @@ -1881,7 +1895,7 @@ int write_all_supers(struct btrfs_root *root) dev = list_entry(cur, struct btrfs_device, dev_list); if (!dev->bdev) continue; - if (!dev->in_fs_metadata) + if (!dev->in_fs_metadata || !dev->writeable) continue; BUG_ON(!dev->pending_io); -- cgit v1.2.3 From 3de4586c5278a28107030c336956381f69ff7a9d Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Nov 2008 21:02:50 -0500 Subject: Btrfs: Allow subvolumes and snapshots anywhere in the directory tree Before, all snapshots and subvolumes lived in a single flat directory. This was awkward and confusing because the single flat directory was only writable with the ioctls. This commit changes the ioctls to create subvols and snapshots at any point in the directory tree. This requires making separate ioctls for snapshot and subvol creation instead of a combining them into one. The subvol ioctl does: btrfsctl -S subvol_name parent_dir After the ioctl is done subvol_name lives inside parent_dir. The snapshot ioctl does: btrfsctl -s path_for_snapshot root_to_snapshot path_for_snapshot can be an absolute or relative path. btrfsctl breaks it up into directory and basename components. root_to_snapshot can be any file or directory in the FS. The snapshot is taken of the entire root where that file lives. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 82833e5d84b6..0a5350573f61 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -838,7 +838,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, u64 objectid) { root->node = NULL; - root->inode = NULL; root->commit_root = NULL; root->ref_tree = NULL; root->sectorsize = sectorsize; @@ -1430,6 +1429,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 blocksize; u32 stripesize; u64 generation; + struct btrfs_key location; struct buffer_head *bh; struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); @@ -1729,7 +1729,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_cleaner; if (sb->s_flags & MS_RDONLY) - return tree_root; + goto read_fs_root; if (btrfs_super_log_root(disk_super) != 0) { u32 blocksize; @@ -1755,6 +1755,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_cleanup_reloc_trees(tree_root); BUG_ON(ret); + location.objectid = BTRFS_FS_TREE_OBJECTID; + location.type = BTRFS_ROOT_ITEM_KEY; + location.offset = (u64)-1; + +read_fs_root: + fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); + if (!fs_info->fs_root) + goto fail_cleaner; return tree_root; fail_cleaner: @@ -1944,8 +1952,6 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) (unsigned long)root->root_key.objectid); if (root->in_sysfs) btrfs_sysfs_del_root(root); - if (root->inode) - iput(root->inode); if (root->node) free_extent_buffer(root->node); if (root->commit_root) -- cgit v1.2.3 From 3394e1607eaf870ebba37d303fbd590a4c569908 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Nov 2008 20:42:26 -0500 Subject: Btrfs: Give each subvol and snapshot their own anonymous devid Each subvolume has its own private inode number space, and so we need to fill in different device numbers for each subvolume to avoid confusing applications. This commit puts a struct super_block into struct btrfs_root so it can call set_anon_super() and get a different device number generated for each root. btrfs_rename is changed to prevent renames across subvols. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0a5350573f61..8d7866b733d6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -877,6 +877,12 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->defrag_running = 0; root->defrag_level = 0; root->root_key.objectid = objectid; + root->anon_super.s_root = NULL; + root->anon_super.s_dev = 0; + INIT_LIST_HEAD(&root->anon_super.s_list); + INIT_LIST_HEAD(&root->anon_super.s_instances); + init_rwsem(&root->anon_super.s_umount); + return 0; } @@ -1083,6 +1089,9 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); if (IS_ERR(root)) return root; + + set_anon_super(&root->anon_super, NULL); + ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, root); @@ -1950,6 +1959,10 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid); + if (root->anon_super.s_dev) { + down_write(&root->anon_super.s_umount); + kill_anon_super(&root->anon_super); + } if (root->in_sysfs) btrfs_sysfs_del_root(root); if (root->node) -- cgit v1.2.3 From ea9e8b11bd1252dcbc23afefcf1a52ec6aa3c113 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 17 Nov 2008 21:14:24 -0500 Subject: Btrfs: prevent loops in the directory tree when creating snapshots For a directory tree: /mnt/subvolA/subvolB btrfsctl -s /mnt/subvolA/subvolB /mnt Will create a directory loop with subvolA under subvolB. This commit uses the forward refs for each subvol and snapshot to error out before creating the loop. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8d7866b733d6..e18250a6fd2d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1129,7 +1129,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } - +#if 0 ret = btrfs_sysfs_add_root(root); if (ret) { free_extent_buffer(root->node); @@ -1137,6 +1137,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, kfree(root); return ERR_PTR(ret); } +#endif root->in_sysfs = 1; return root; } @@ -1963,8 +1964,10 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) down_write(&root->anon_super.s_umount); kill_anon_super(&root->anon_super); } +#if 0 if (root->in_sysfs) btrfs_sysfs_del_root(root); +#endif if (root->node) free_extent_buffer(root->node); if (root->commit_root) -- cgit v1.2.3 From 9f0ba5bd91d42706ba78f97ec638c6a821db1c5e Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 18 Nov 2008 10:31:22 -0500 Subject: Btrfs: unplug all devices in the unplug call back For larger multi-device filesystems, there was logic to limit the number of devices unplugged to just the page that was sent to our sync_page function. But, the code wasn't always unplugging the right device. Since this was just an optimization, disable it for now. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e18250a6fd2d..0e8d31274c92 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1214,7 +1214,7 @@ void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) u64 offset; /* the generic O_DIRECT read code does this */ - if (!page) { + if (1 || !page) { __unplug_io_fn(bdi, page); return; } -- cgit v1.2.3 From d2c3f4f695edac4d75c1b3eb01a1d16072de63bb Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 19 Nov 2008 12:44:22 -0500 Subject: Btrfs: Avoid writeback stalls While building large bios in writepages, btrfs may end up waiting for other page writeback to finish if WB_SYNC_ALL is used. While it is waiting, the bio it is building has a number of pages with the writeback bit set and they aren't getting to the disk any time soon. This lowers the latencies of writeback in general by sending down the bio being built before waiting for other pages. The bio submission code tries to limit the total number of async bios in flight by waiting when we're over a certain number of async bios. But, the waits are happening while writepages is building bios, and this can easily lead to stalls and other problems for people calling wait_on_page_writeback. The current fix is to let the congestion tests take care of waiting. sync() and others make sure to drain the current async requests to make sure that everything that was pending when the sync was started really get to disk. The code would drain pending requests both before and after submitting a new request. But, if one of the requests is waiting for page writeback to finish, the draining waits might block that page writeback. This changes the draining code to only wait after submitting the bio being processed. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0e8d31274c92..8d03e4a3c4e9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -538,15 +538,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, async->work.flags = 0; async->bio_flags = bio_flags; - while(atomic_read(&fs_info->async_submit_draining) && - atomic_read(&fs_info->nr_async_submits)) { - wait_event(fs_info->async_submit_wait, - (atomic_read(&fs_info->nr_async_submits) == 0)); - } - atomic_inc(&fs_info->nr_async_submits); btrfs_queue_worker(&fs_info->workers, &async->work); - +#if 0 if (atomic_read(&fs_info->nr_async_submits) > limit) { wait_event_timeout(fs_info->async_submit_wait, (atomic_read(&fs_info->nr_async_submits) < limit), @@ -556,7 +550,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, (atomic_read(&fs_info->nr_async_bios) < limit), HZ/10); } - +#endif while(atomic_read(&fs_info->async_submit_draining) && atomic_read(&fs_info->nr_async_submits)) { wait_event(fs_info->async_submit_wait, @@ -1765,11 +1759,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_cleanup_reloc_trees(tree_root); BUG_ON(ret); +read_fs_root: location.objectid = BTRFS_FS_TREE_OBJECTID; location.type = BTRFS_ROOT_ITEM_KEY; location.offset = (u64)-1; -read_fs_root: fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); if (!fs_info->fs_root) goto fail_cleaner; -- cgit v1.2.3 From 7c2ca4682a2b401cb0c43f96b05eced25eb83eec Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 19 Nov 2008 15:13:35 -0500 Subject: Btrfs: Do fsync log replay when mount -o ro, except when on readonly media fsync log replay can change the filesystem, so it cannot be delayed until mount -o rw,remount, and it can't be forgotten entirely. So, this patch changes btrfs to do with reiserfs, ext3 and xfs do, which is to do the log replay even when mounted readonly. On a readonly device if log replay is required, the mount is aborted. Getting all of this right had required fixing up some of the error handling in open_ctree. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8d03e4a3c4e9..c8dcb47b6d7d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -518,7 +518,6 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, extent_submit_bio_hook_t *submit_bio_done) { struct async_submit_bio *async; - int limit = btrfs_async_submit_limit(fs_info); async = kmalloc(sizeof(*async), GFP_NOFS); if (!async) @@ -541,6 +540,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, atomic_inc(&fs_info->nr_async_submits); btrfs_queue_worker(&fs_info->workers, &async->work); #if 0 + int limit = btrfs_async_submit_limit(fs_info); if (atomic_read(&fs_info->nr_async_submits) > limit) { wait_event_timeout(fs_info->async_submit_wait, (atomic_read(&fs_info->nr_async_submits) < limit), @@ -1732,13 +1732,15 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!fs_info->transaction_kthread) goto fail_cleaner; - if (sb->s_flags & MS_RDONLY) - goto read_fs_root; - if (btrfs_super_log_root(disk_super) != 0) { u32 blocksize; u64 bytenr = btrfs_super_log_root(disk_super); + if (fs_devices->rw_devices == 0) { + printk("Btrfs log replay required on RO media\n"); + err = -EIO; + goto fail_trans_kthread; + } blocksize = btrfs_level_size(tree_root, btrfs_super_log_root_level(disk_super)); @@ -1756,21 +1758,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, BUG_ON(ret); } - ret = btrfs_cleanup_reloc_trees(tree_root); - BUG_ON(ret); + if (!(sb->s_flags & MS_RDONLY)) { + ret = btrfs_cleanup_reloc_trees(tree_root); + BUG_ON(ret); + } -read_fs_root: location.objectid = BTRFS_FS_TREE_OBJECTID; location.type = BTRFS_ROOT_ITEM_KEY; location.offset = (u64)-1; fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); if (!fs_info->fs_root) - goto fail_cleaner; + goto fail_trans_kthread; return tree_root; +fail_trans_kthread: + kthread_stop(fs_info->transaction_kthread); fail_cleaner: kthread_stop(fs_info->cleaner_kthread); + + /* + * make sure we're done with the btree inode before we stop our + * kthreads + */ + filemap_write_and_wait(fs_info->btree_inode->i_mapping); + invalidate_inode_pages2(fs_info->btree_inode->i_mapping); + fail_extent_root: free_extent_buffer(extent_root->node); fail_tree_root: @@ -1778,6 +1791,7 @@ fail_tree_root: fail_chunk_root: free_extent_buffer(chunk_root->node); fail_sys_array: + free_extent_buffer(dev_root->node); fail_sb_buffer: btrfs_stop_workers(&fs_info->fixup_workers); btrfs_stop_workers(&fs_info->delalloc_workers); @@ -1786,6 +1800,7 @@ fail_sb_buffer: btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); fail_iput: + invalidate_inode_pages2(fs_info->btree_inode->i_mapping); iput(fs_info->btree_inode); fail: btrfs_close_devices(fs_info->fs_devices); -- cgit v1.2.3 From 4b4e25f2a6ddb070bab7f7dd2bd2926fb8db9e04 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 20 Nov 2008 10:22:27 -0500 Subject: Btrfs: compat code fixes The btrfs git kernel trees is used to build a standalone tree for compiling against older kernels. This commit makes the standalone tree work with 2.6.27 Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c8dcb47b6d7d..981652233f70 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -26,7 +26,8 @@ #include // for block_sync_page #include #include -# include +#include +#include "compat.h" #include "crc32c.h" #include "ctree.h" #include "disk-io.h" -- cgit v1.2.3 From e556ce2c9d2163aea122e91a1512c9a110fece5d Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Thu, 20 Nov 2008 10:25:19 -0500 Subject: Btrfs: Drop dirty roots created by log replay immediately when The log replay produces dirty roots. These dirty roots should be dropped immediately if the fs is mounted as ro. Otherwise they can be added to the dirty root list again when remounting the fs as rw. Thank you, Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 981652233f70..fb04665e5005 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1757,6 +1757,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, generation + 1); ret = btrfs_recover_log_trees(log_tree_root); BUG_ON(ret); + + if (sb->s_flags & MS_RDONLY) { + ret = btrfs_commit_super(tree_root); + BUG_ON(ret); + } } if (!(sb->s_flags & MS_RDONLY)) { -- cgit v1.2.3 From b2950863c61bc24cf0f63bc05947d9d50663c4c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Dec 2008 09:54:17 -0500 Subject: Btrfs: make things static and include the right headers Shut up various sparse warnings about symbols that should be either static or have their declarations in scope. Signed-off-by: Christoph Hellwig --- fs/btrfs/disk-io.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fb04665e5005..8a2bcc7024fe 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -93,9 +93,9 @@ struct async_submit_bio { * extents on the btree inode are pretty simple, there's one extent * that covers the entire device */ -struct extent_map *btree_get_extent(struct inode *inode, struct page *page, - size_t page_offset, u64 start, u64 len, - int create) +static struct extent_map *btree_get_extent(struct inode *inode, + struct page *page, size_t page_offset, u64 start, u64 len, + int create) { struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em; @@ -295,7 +295,7 @@ printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror * checksum a dirty tree block before IO. This has extra checks to make * sure we only fill in the checksum field in the first page of a multi-page block */ -int csum_dirty_buffer(struct btrfs_root *root, struct page *page) +static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_io_tree *tree; u64 start = (u64)page->index << PAGE_CACHE_SHIFT; @@ -365,7 +365,7 @@ static int check_tree_block_fsid(struct btrfs_root *root, return ret; } -int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, +static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { struct extent_io_tree *tree; @@ -660,7 +660,7 @@ static int btree_writepages(struct address_space *mapping, return extent_writepages(tree, mapping, btree_get_extent, wbc); } -int btree_readpage(struct file *file, struct page *page) +static int btree_readpage(struct file *file, struct page *page) { struct extent_io_tree *tree; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -1200,7 +1200,7 @@ static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) } } -void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) +static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { struct inode *inode; struct extent_map_tree *em_tree; @@ -1842,7 +1842,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) put_bh(bh); } -int write_all_supers(struct btrfs_root *root) +static int write_all_supers(struct btrfs_root *root) { struct list_head *cur; struct list_head *head = &root->fs_info->fs_devices->devices; -- cgit v1.2.3 From 6e430f94e508fee1aefd1dfec88da3c24ce64433 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Dec 2008 06:36:09 -0500 Subject: Btrfs: fix shadowed variable declarations Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8a2bcc7024fe..2464c099db57 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1734,7 +1734,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_cleaner; if (btrfs_super_log_root(disk_super) != 0) { - u32 blocksize; u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { -- cgit v1.2.3 From f2b636e80d8206dd4012de6e973c2367259a7d22 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 2 Dec 2008 06:36:08 -0500 Subject: Btrfs: add support for compat flags to btrfs This adds the necessary disk format for handling compatibility flags in the future to handle disk format changes. We have a compat_flags, compat_ro_flags and incompat_flags set for the super block. Compat flags will be to hold the features that are compatible with older versions of btrfs, compat_ro flags have features that are compatible with older versions of btrfs if the fs is mounted read only, and incompat_flags has features that are incompatible with older versions of btrfs. This also axes the compat_flags field for the inode and just makes the flags field a 64bit field, and changes the root item flags field to 64bit. Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2464c099db57..6ae9bdf98b68 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1434,6 +1434,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, u32 blocksize; u32 stripesize; u64 generation; + u64 features; struct btrfs_key location; struct buffer_head *bh; struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), @@ -1586,6 +1587,26 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_sb_buffer; } + features = btrfs_super_incompat_flags(disk_super) & + ~BTRFS_FEATURE_INCOMPAT_SUPP; + if (features) { + printk(KERN_ERR "BTRFS: couldn't mount because of " + "unsupported optional features (%Lx).\n", + features); + err = -EINVAL; + goto fail_sb_buffer; + } + + features = btrfs_super_compat_ro_flags(disk_super) & + ~BTRFS_FEATURE_COMPAT_RO_SUPP; + if (!(sb->s_flags & MS_RDONLY) && features) { + printk(KERN_ERR "BTRFS: couldn't mount RDWR because of " + "unsupported option features (%Lx).\n", + features); + err = -EINVAL; + goto fail_sb_buffer; + } + /* * we need to start all the end_io workers up front because the * queue work function gets called at interrupt time, and so it -- cgit v1.2.3 From c6e2bac1a52ffc36dd10769b594dfa3994e95f77 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 2 Dec 2008 06:36:10 -0500 Subject: Btrfs: fix panic on error during mount This needs to be applied on top of my previous patches, but is needed for more than just my new stuff. We're going to the wrong label when we have an error, we try to stop the workers, but they are started below all of this code. This fixes it so we go to the right error label and not panic when we fail one of these cases. Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6ae9bdf98b68..dfd5ba05ce45 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1579,12 +1579,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, disk_super = &fs_info->super_copy; if (!btrfs_super_root(disk_super)) - goto fail_sb_buffer; + goto fail_iput; ret = btrfs_parse_options(tree_root, options); if (ret) { err = ret; - goto fail_sb_buffer; + goto fail_iput; } features = btrfs_super_incompat_flags(disk_super) & @@ -1594,7 +1594,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, "unsupported optional features (%Lx).\n", features); err = -EINVAL; - goto fail_sb_buffer; + goto fail_iput; } features = btrfs_super_compat_ro_flags(disk_super) & @@ -1604,7 +1604,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, "unsupported option features (%Lx).\n", features); err = -EINVAL; - goto fail_sb_buffer; + goto fail_iput; } /* -- cgit v1.2.3 From 607d432da0542e84ddcd358adfddac6f68500e3d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 2 Dec 2008 07:17:45 -0500 Subject: Btrfs: add support for multiple csum algorithms This patch gives us the space we will need in order to have different csum algorithims at some point in the future. We save the csum algorithim type in the superblock, and use those instead of define's. Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dfd5ba05ce45..3eb7c2576fe5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -176,7 +176,9 @@ void btrfs_csum_final(u32 crc, char *result) static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, int verify) { - char result[BTRFS_CRC32_SIZE]; + u16 csum_size = + btrfs_super_csum_size(&root->fs_info->super_copy); + char *result = NULL; unsigned long len; unsigned long cur_len; unsigned long offset = BTRFS_CSUM_SIZE; @@ -186,6 +188,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, unsigned long map_len; int err; u32 crc = ~(u32)0; + unsigned long inline_result; len = buf->len - offset; while(len > 0) { @@ -204,25 +207,37 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, offset += cur_len; unmap_extent_buffer(buf, map_token, KM_USER0); } + if (csum_size > sizeof(inline_result)) { + result = kzalloc(csum_size * sizeof(char), GFP_NOFS); + if (!result) + return 1; + } else { + result = (char *)&inline_result; + } + btrfs_csum_final(crc, result); if (verify) { /* FIXME, this is not good */ - if (memcmp_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE)) { + if (memcmp_extent_buffer(buf, result, 0, csum_size)) { u32 val; u32 found = 0; - memcpy(&found, result, BTRFS_CRC32_SIZE); + memcpy(&found, result, csum_size); - read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE); + read_extent_buffer(buf, &val, 0, csum_size); printk("btrfs: %s checksum verify failed on %llu " "wanted %X found %X level %d\n", root->fs_info->sb->s_id, buf->start, val, found, btrfs_header_level(buf)); + if (result != (char *)&inline_result) + kfree(result); return 1; } } else { - write_extent_buffer(buf, result, 0, BTRFS_CRC32_SIZE); + write_extent_buffer(buf, result, 0, csum_size); } + if (result != (char *)&inline_result) + kfree(result); return 0; } -- cgit v1.2.3 From d20f7043fa65659136c1a7c3c456eeeb5c6f431f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 8 Dec 2008 16:58:54 -0500 Subject: Btrfs: move data checksumming into a dedicated tree Btrfs stores checksums for each data block. Until now, they have been stored in the subvolume trees, indexed by the inode that is referencing the data block. This means that when we read the inode, we've probably read in at least some checksums as well. But, this has a few problems: * The checksums are indexed by logical offset in the file. When compression is on, this means we have to do the expensive checksumming on the uncompressed data. It would be faster if we could checksum the compressed data instead. * If we implement encryption, we'll be checksumming the plain text and storing that on disk. This is significantly less secure. * For either compression or encryption, we have to get the plain text back before we can verify the checksum as correct. This makes the raid layer balancing and extent moving much more expensive. * It makes the front end caching code more complex, as we have touch the subvolume and inodes as we cache extents. * There is potentitally one copy of the checksum in each subvolume referencing an extent. The solution used here is to store the extent checksums in a dedicated tree. This allows us to index the checksums by phyiscal extent start and length. It means: * The checksum is against the data stored on disk, after any compression or encryption is done. * The checksum is stored in a central location, and can be verified without following back references, or reading inodes. This makes compression significantly faster by reducing the amount of data that needs to be checksummed. It will also allow much faster raid management code in general. The checksums are indexed by a key with a fixed objectid (a magic value in ctree.h) and offset set to the starting byte of the extent. This allows us to copy the checksum items into the fsync log tree directly (or any other tree), without having to invent a second format for them. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 45 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3eb7c2576fe5..61dc3b2c834b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -445,11 +445,18 @@ static void end_workqueue_bio(struct bio *bio, int err) end_io_wq->error = err; end_io_wq->work.func = end_workqueue_fn; end_io_wq->work.flags = 0; - if (bio->bi_rw & (1 << BIO_RW)) + + if (bio->bi_rw & (1 << BIO_RW)) { btrfs_queue_worker(&fs_info->endio_write_workers, &end_io_wq->work); - else - btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); + } else { + if (end_io_wq->metadata) + btrfs_queue_worker(&fs_info->endio_meta_workers, + &end_io_wq->work); + else + btrfs_queue_worker(&fs_info->endio_workers, + &end_io_wq->work); + } } int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, @@ -1208,6 +1215,9 @@ static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) info = (struct btrfs_fs_info *)bdi->unplug_io_data; list_for_each(cur, &info->fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); + if (!device->bdev) + continue; + bdi = blk_get_backing_dev_info(device->bdev); if (bdi->unplug_io_fn) { bdi->unplug_io_fn(bdi, page); @@ -1344,7 +1354,7 @@ static void end_workqueue_fn(struct btrfs_work *work) * blocksize <= pagesize, it is basically a noop */ if (end_io_wq->metadata && !bio_ready_for_csum(bio)) { - btrfs_queue_worker(&fs_info->endio_workers, + btrfs_queue_worker(&fs_info->endio_meta_workers, &end_io_wq->work); return; } @@ -1454,6 +1464,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, struct buffer_head *bh; struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); + struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), + GFP_NOFS); struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), @@ -1470,7 +1482,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, struct btrfs_super_block *disk_super; if (!extent_root || !tree_root || !fs_info || - !chunk_root || !dev_root) { + !chunk_root || !dev_root || !csum_root) { err = -ENOMEM; goto fail; } @@ -1487,6 +1499,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_completion(&fs_info->kobj_unregister); fs_info->tree_root = tree_root; fs_info->extent_root = extent_root; + fs_info->csum_root = csum_root; fs_info->chunk_root = chunk_root; fs_info->dev_root = dev_root; fs_info->fs_devices = fs_devices; @@ -1652,6 +1665,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); btrfs_init_workers(&fs_info->endio_workers, "endio", fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", + fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", fs_info->thread_pool_size); @@ -1667,6 +1682,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->delalloc_workers, 1); btrfs_start_workers(&fs_info->fixup_workers, 1); btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->endio_meta_workers, + fs_info->thread_pool_size); btrfs_start_workers(&fs_info->endio_write_workers, fs_info->thread_pool_size); @@ -1751,6 +1768,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (ret) goto fail_extent_root; + ret = find_and_setup_root(tree_root, fs_info, + BTRFS_CSUM_TREE_OBJECTID, csum_root); + if (ret) + goto fail_extent_root; + + csum_root->track_dirty = 1; + btrfs_read_block_groups(extent_root); fs_info->generation = generation + 1; @@ -1761,7 +1785,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); if (!fs_info->cleaner_kthread) - goto fail_extent_root; + goto fail_csum_root; fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, @@ -1825,6 +1849,8 @@ fail_cleaner: filemap_write_and_wait(fs_info->btree_inode->i_mapping); invalidate_inode_pages2(fs_info->btree_inode->i_mapping); +fail_csum_root: + free_extent_buffer(csum_root->node); fail_extent_root: free_extent_buffer(extent_root->node); fail_tree_root: @@ -1838,6 +1864,7 @@ fail_sb_buffer: btrfs_stop_workers(&fs_info->delalloc_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_meta_workers); btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); fail_iput: @@ -1853,6 +1880,7 @@ fail: kfree(fs_info); kfree(chunk_root); kfree(dev_root); + kfree(csum_root); return ERR_PTR(err); } @@ -2131,6 +2159,9 @@ int close_ctree(struct btrfs_root *root) if (root->fs_info->dev_root->node); free_extent_buffer(root->fs_info->dev_root->node); + if (root->fs_info->csum_root->node); + free_extent_buffer(root->fs_info->csum_root->node); + btrfs_free_block_groups(root->fs_info); del_fs_roots(fs_info); @@ -2141,6 +2172,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->delalloc_workers); btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); + btrfs_stop_workers(&fs_info->endio_meta_workers); btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); @@ -2163,6 +2195,7 @@ int close_ctree(struct btrfs_root *root) kfree(fs_info->tree_root); kfree(fs_info->chunk_root); kfree(fs_info->dev_root); + kfree(fs_info->csum_root); return 0; } -- cgit v1.2.3 From a512bbf855ff0af474257475f2e6da7acd854f52 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 8 Dec 2008 16:46:26 -0500 Subject: Btrfs: superblock duplication This patch implements superblock duplication. Superblocks are stored at offset 16K, 64M and 256G on every devices. Spaces used by superblocks are preserved by the allocator, which uses a reverse mapping function to find the logical addresses that correspond to superblocks. Thank you, Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 208 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 143 insertions(+), 65 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 61dc3b2c834b..c72f4f3b912c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1595,8 +1595,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info, BTRFS_ROOT_TREE_OBJECTID); - bh = __bread(fs_devices->latest_bdev, - BTRFS_SUPER_INFO_OFFSET / 4096, 4096); + bh = btrfs_read_dev_super(fs_devices->latest_bdev); if (!bh) goto fail_iput; @@ -1710,7 +1709,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, } mutex_lock(&fs_info->chunk_mutex); - ret = btrfs_read_sys_array(tree_root); + ret = btrfs_read_sys_array(tree_root, btrfs_super_bytenr(disk_super)); mutex_unlock(&fs_info->chunk_mutex); if (ret) { printk("btrfs: failed to read the system array on %s\n", @@ -1905,19 +1904,147 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) put_bh(bh); } -static int write_all_supers(struct btrfs_root *root) +struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) +{ + struct buffer_head *bh; + struct buffer_head *latest = NULL; + struct btrfs_super_block *super; + int i; + u64 transid = 0; + u64 bytenr; + + /* we would like to check all the supers, but that would make + * a btrfs mount succeed after a mkfs from a different FS. + * So, we need to add a special mount option to scan for + * later supers, using BTRFS_SUPER_MIRROR_MAX instead + */ + for (i = 0; i < 1; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr + 4096 >= i_size_read(bdev->bd_inode)) + break; + bh = __bread(bdev, bytenr / 4096, 4096); + if (!bh) + continue; + + super = (struct btrfs_super_block *)bh->b_data; + if (btrfs_super_bytenr(super) != bytenr || + strncmp((char *)(&super->magic), BTRFS_MAGIC, + sizeof(super->magic))) { + brelse(bh); + continue; + } + + if (!latest || btrfs_super_generation(super) > transid) { + brelse(latest); + latest = bh; + transid = btrfs_super_generation(super); + } else { + brelse(bh); + } + } + return latest; +} + +static int write_dev_supers(struct btrfs_device *device, + struct btrfs_super_block *sb, + int do_barriers, int wait, int max_mirrors) +{ + struct buffer_head *bh; + int i; + int ret; + int errors = 0; + u32 crc; + u64 bytenr; + int last_barrier = 0; + + if (max_mirrors == 0) + max_mirrors = BTRFS_SUPER_MIRROR_MAX; + + /* make sure only the last submit_bh does a barrier */ + if (do_barriers) { + for (i = 0; i < max_mirrors; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr + BTRFS_SUPER_INFO_SIZE >= + device->total_bytes) + break; + last_barrier = i; + } + } + + for (i = 0; i < max_mirrors; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) + break; + + if (wait) { + bh = __find_get_block(device->bdev, bytenr / 4096, + BTRFS_SUPER_INFO_SIZE); + BUG_ON(!bh); + brelse(bh); + wait_on_buffer(bh); + if (buffer_uptodate(bh)) { + brelse(bh); + continue; + } + } else { + btrfs_set_super_bytenr(sb, bytenr); + + crc = ~(u32)0; + crc = btrfs_csum_data(NULL, (char *)sb + + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - + BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, sb->csum); + + bh = __getblk(device->bdev, bytenr / 4096, + BTRFS_SUPER_INFO_SIZE); + memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); + + set_buffer_uptodate(bh); + get_bh(bh); + lock_buffer(bh); + bh->b_end_io = btrfs_end_buffer_write_sync; + } + + if (i == last_barrier && do_barriers && device->barriers) { + ret = submit_bh(WRITE_BARRIER, bh); + if (ret == -EOPNOTSUPP) { + printk("btrfs: disabling barriers on dev %s\n", + device->name); + set_buffer_uptodate(bh); + device->barriers = 0; + get_bh(bh); + lock_buffer(bh); + ret = submit_bh(WRITE, bh); + } + } else { + ret = submit_bh(WRITE, bh); + } + + if (!ret && wait) { + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + errors++; + } else if (ret) { + errors++; + } + if (wait) + brelse(bh); + } + return errors < i ? 0 : -1; +} + +int write_all_supers(struct btrfs_root *root, int max_mirrors) { struct list_head *cur; struct list_head *head = &root->fs_info->fs_devices->devices; struct btrfs_device *dev; struct btrfs_super_block *sb; struct btrfs_dev_item *dev_item; - struct buffer_head *bh; int ret; int do_barriers; int max_errors; int total_errors = 0; - u32 crc; u64 flags; max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; @@ -1944,40 +2071,11 @@ static int write_all_supers(struct btrfs_root *root) btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); + flags = btrfs_super_flags(sb); btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); - - crc = ~(u32)0; - crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc, - BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, sb->csum); - - bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096, - BTRFS_SUPER_INFO_SIZE); - - memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); - dev->pending_io = bh; - - get_bh(bh); - set_buffer_uptodate(bh); - lock_buffer(bh); - bh->b_end_io = btrfs_end_buffer_write_sync; - - if (do_barriers && dev->barriers) { - ret = submit_bh(WRITE_BARRIER, bh); - if (ret == -EOPNOTSUPP) { - printk("btrfs: disabling barriers on dev %s\n", - dev->name); - set_buffer_uptodate(bh); - dev->barriers = 0; - get_bh(bh); - lock_buffer(bh); - ret = submit_bh(WRITE, bh); - } - } else { - ret = submit_bh(WRITE, bh); - } + ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors); if (ret) total_errors++; } @@ -1985,8 +2083,8 @@ static int write_all_supers(struct btrfs_root *root) printk("btrfs: %d errors while writing supers\n", total_errors); BUG(); } - total_errors = 0; + total_errors = 0; list_for_each(cur, head) { dev = list_entry(cur, struct btrfs_device, dev_list); if (!dev->bdev) @@ -1994,29 +2092,9 @@ static int write_all_supers(struct btrfs_root *root) if (!dev->in_fs_metadata || !dev->writeable) continue; - BUG_ON(!dev->pending_io); - bh = dev->pending_io; - wait_on_buffer(bh); - if (!buffer_uptodate(dev->pending_io)) { - if (do_barriers && dev->barriers) { - printk("btrfs: disabling barriers on dev %s\n", - dev->name); - set_buffer_uptodate(bh); - get_bh(bh); - lock_buffer(bh); - dev->barriers = 0; - ret = submit_bh(WRITE, bh); - BUG_ON(ret); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - total_errors++; - } else { - total_errors++; - } - - } - dev->pending_io = NULL; - brelse(bh); + ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors); + if (ret) + total_errors++; } if (total_errors > max_errors) { printk("btrfs: %d errors while writing supers\n", total_errors); @@ -2025,12 +2103,12 @@ static int write_all_supers(struct btrfs_root *root) return 0; } -int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root - *root) +int write_ctree_super(struct btrfs_trans_handle *trans, + struct btrfs_root *root, int max_mirrors) { int ret; - ret = write_all_supers(root); + ret = write_all_supers(root, max_mirrors); return ret; } @@ -2116,7 +2194,7 @@ int btrfs_commit_super(struct btrfs_root *root) ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); - ret = write_ctree_super(NULL, root); + ret = write_ctree_super(NULL, root, 0); return ret; } -- cgit v1.2.3 From 0403e47ee26f26e960ee9038552bc89df4a1fb3d Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Wed, 10 Dec 2008 20:32:51 -0500 Subject: Btrfs: Add checking of csum tree in balancing code This updates the space balancing code for the new checksum format. Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c72f4f3b912c..6f042de1ac43 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1097,6 +1097,8 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, return fs_info->chunk_root; if (location->objectid == BTRFS_DEV_TREE_OBJECTID) return fs_info->dev_root; + if (location->objectid == BTRFS_CSUM_TREE_OBJECTID) + return fs_info->csum_root; root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)location->objectid); -- cgit v1.2.3 From e4404d6e8da678d852b7f767f665f8edf76c9e9f Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Fri, 12 Dec 2008 10:03:26 -0500 Subject: Btrfs: shared seed device This patch makes seed device possible to be shared by multiple mounted file systems. The sharing is achieved by cloning seed device's btrfs_fs_devices structure. Thanks you, Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6f042de1ac43..541a8279ac71 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1711,7 +1711,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, } mutex_lock(&fs_info->chunk_mutex); - ret = btrfs_read_sys_array(tree_root, btrfs_super_bytenr(disk_super)); + ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); if (ret) { printk("btrfs: failed to read the system array on %s\n", -- cgit v1.2.3 From cad321ad529400c6ab24c501a67c3be720a0744c Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 17 Dec 2008 14:51:42 -0500 Subject: Btrfs: shift all end_io work to thread pools bio_end_io for reads without checksumming on and btree writes were happening without using async thread pools. This means the extent_io.c code had to use spin_lock_irq and friends on the rb tree locks for extent state. There were some irq safe vs unsafe lock inversions between the delallock lock and the extent state locks. This patch gets rid of them by moving all end_io code into the thread pools. To avoid contention and deadlocks between the data end_io processing and the metadata end_io processing yet another thread pool is added to finish off metadata writes. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 541a8279ac71..04f8d7080b17 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -447,8 +447,12 @@ static void end_workqueue_bio(struct bio *bio, int err) end_io_wq->work.flags = 0; if (bio->bi_rw & (1 << BIO_RW)) { - btrfs_queue_worker(&fs_info->endio_write_workers, - &end_io_wq->work); + if (end_io_wq->metadata) + btrfs_queue_worker(&fs_info->endio_meta_write_workers, + &end_io_wq->work); + else + btrfs_queue_worker(&fs_info->endio_write_workers, + &end_io_wq->work); } else { if (end_io_wq->metadata) btrfs_queue_worker(&fs_info->endio_meta_workers, @@ -624,23 +628,24 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, int mirror_num, unsigned long bio_flags) { - /* - * kthread helpers are used to submit writes so that checksumming - * can happen in parallel across all CPUs - */ + int ret; + + ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, + bio, 1); + BUG_ON(ret); + if (!(rw & (1 << BIO_RW))) { - int ret; /* * called for a read, do the setup so that checksum validation * can happen in the async kernel threads */ - ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, - bio, 1); - BUG_ON(ret); - return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 0); } + /* + * kthread helpers are used to submit writes so that checksumming + * can happen in parallel across all CPUs + */ return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, inode, rw, bio, mirror_num, 0, __btree_submit_bio_start, @@ -1350,12 +1355,13 @@ static void end_workqueue_fn(struct btrfs_work *work) bio = end_io_wq->bio; fs_info = end_io_wq->info; - /* metadata bios are special because the whole tree block must + /* metadata bio reads are special because the whole tree block must * be checksummed at once. This makes sure the entire block is in * ram and up to date before trying to verify things. For * blocksize <= pagesize, it is basically a noop */ - if (end_io_wq->metadata && !bio_ready_for_csum(bio)) { + if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata && + !bio_ready_for_csum(bio)) { btrfs_queue_worker(&fs_info->endio_meta_workers, &end_io_wq->work); return; @@ -1668,6 +1674,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", fs_info->thread_pool_size); + btrfs_init_workers(&fs_info->endio_meta_write_workers, + "endio-meta-write", fs_info->thread_pool_size); btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", fs_info->thread_pool_size); @@ -1677,6 +1685,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, */ fs_info->endio_workers.idle_thresh = 4; fs_info->endio_write_workers.idle_thresh = 64; + fs_info->endio_meta_write_workers.idle_thresh = 64; btrfs_start_workers(&fs_info->workers, 1); btrfs_start_workers(&fs_info->submit_workers, 1); @@ -1685,6 +1694,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); btrfs_start_workers(&fs_info->endio_meta_workers, fs_info->thread_pool_size); + btrfs_start_workers(&fs_info->endio_meta_write_workers, + fs_info->thread_pool_size); btrfs_start_workers(&fs_info->endio_write_workers, fs_info->thread_pool_size); @@ -1866,6 +1877,7 @@ fail_sb_buffer: btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_meta_workers); + btrfs_stop_workers(&fs_info->endio_meta_write_workers); btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); fail_iput: @@ -2253,6 +2265,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->workers); btrfs_stop_workers(&fs_info->endio_workers); btrfs_stop_workers(&fs_info->endio_meta_workers); + btrfs_stop_workers(&fs_info->endio_meta_write_workers); btrfs_stop_workers(&fs_info->endio_write_workers); btrfs_stop_workers(&fs_info->submit_workers); -- cgit v1.2.3 From b34b086c1c1d934c5314d46ba25ccfa9acc471ae Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 19 Dec 2008 15:43:22 -0500 Subject: Btrfs: Fix compile warning around num_online_cpus() in a min statement Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 04f8d7080b17..40a540f31161 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1528,7 +1528,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_ino = 1; fs_info->btree_inode->i_nlink = 1; - fs_info->thread_pool_size = min(num_online_cpus() + 2, 8); + fs_info->thread_pool_size = min_t(unsigned long, + num_online_cpus() + 2, 8); INIT_LIST_HEAD(&fs_info->ordered_extents); spin_lock_init(&fs_info->ordered_extent_lock); -- cgit v1.2.3 From 2d69a0f88459fae35df3ddef4934a2dad67e2765 Mon Sep 17 00:00:00 2001 From: Yan Zheng Date: Mon, 5 Jan 2009 15:43:42 -0500 Subject: Btrfs: avoid potential super block corruption The data in fs_info->super_for_commit are zeros before the first transaction commit. If tree log sync and system crash both occur before the first transaction commit, super block will get corrupted. This fixes it by properly filling in the super_for_commit field at open time. Signed-off-by: Yan Zheng --- fs/btrfs/disk-io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 40a540f31161..dae25e78a6b7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1609,6 +1609,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_iput; memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); + memcpy(&fs_info->super_for_commit, &fs_info->super_copy, + sizeof(fs_info->super_for_commit)); brelse(bh); memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); @@ -1790,7 +1792,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_read_block_groups(extent_root); - fs_info->generation = generation + 1; + fs_info->generation = generation; fs_info->last_trans_committed = generation; fs_info->data_alloc_profile = (u64)-1; fs_info->metadata_alloc_profile = (u64)-1; -- cgit v1.2.3 From d397712bcc6a759a560fd247e6053ecae091f958 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 5 Jan 2009 21:25:51 -0500 Subject: Btrfs: Fix checkpatch.pl warnings There were many, most are fixed now. struct-funcs.c generates some warnings but these are bogus. Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 187 ++++++++++++++++++----------------------------------- 1 file changed, 63 insertions(+), 124 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dae25e78a6b7..81a313874ae5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,7 +23,7 @@ #include #include #include -#include // for block_sync_page +#include #include #include #include @@ -40,19 +40,6 @@ #include "ref-cache.h" #include "tree-log.h" -#if 0 -static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) -{ - if (extent_buffer_blocknr(buf) != btrfs_header_blocknr(buf)) { - printk(KERN_CRIT "buf blocknr(buf) is %llu, header is %llu\n", - (unsigned long long)extent_buffer_blocknr(buf), - (unsigned long long)btrfs_header_blocknr(buf)); - return 1; - } - return 0; -} -#endif - static struct extent_io_ops btree_extent_io_ops; static void end_workqueue_fn(struct btrfs_work *work); @@ -128,23 +115,13 @@ static struct extent_map *btree_get_extent(struct inode *inode, u64 failed_start = em->start; u64 failed_len = em->len; - printk("failed to insert %Lu %Lu -> %Lu into tree\n", - em->start, em->len, em->block_start); free_extent_map(em); em = lookup_extent_mapping(em_tree, start, len); if (em) { - printk("after failing, found %Lu %Lu %Lu\n", - em->start, em->len, em->block_start); ret = 0; } else { em = lookup_extent_mapping(em_tree, failed_start, failed_len); - if (em) { - printk("double failure lookup gives us " - "%Lu %Lu -> %Lu\n", em->start, - em->len, em->block_start); - free_extent_map(em); - } ret = -EIO; } } else if (ret) { @@ -191,15 +168,12 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, unsigned long inline_result; len = buf->len - offset; - while(len > 0) { + while (len > 0) { err = map_private_extent_buffer(buf, offset, 32, &map_token, &kaddr, &map_start, &map_len, KM_USER0); - if (err) { - printk("failed to map extent buffer! %lu\n", - offset); + if (err) return 1; - } cur_len = min(len, map_len - (offset - map_start)); crc = btrfs_csum_data(root, kaddr + offset - map_start, crc, cur_len); @@ -218,15 +192,14 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, btrfs_csum_final(crc, result); if (verify) { - /* FIXME, this is not good */ if (memcmp_extent_buffer(buf, result, 0, csum_size)) { u32 val; u32 found = 0; memcpy(&found, result, csum_size); read_extent_buffer(buf, &val, 0, csum_size); - printk("btrfs: %s checksum verify failed on %llu " - "wanted %X found %X level %d\n", + printk(KERN_INFO "btrfs: %s checksum verify failed " + "on %llu wanted %X found %X level %d\n", root->fs_info->sb->s_id, buf->start, val, found, btrfs_header_level(buf)); if (result != (char *)&inline_result) @@ -293,7 +266,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) return ret; -printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror_num); + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); if (num_copies == 1) @@ -307,9 +280,10 @@ printk("read extent buffer pages failed with ret %d mirror no %d\n", ret, mirror } /* - * checksum a dirty tree block before IO. This has extra checks to make - * sure we only fill in the checksum field in the first page of a multi-page block + * checksum a dirty tree block before IO. This has extra checks to make sure + * we only fill in the checksum field in the first page of a multi-page block */ + static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) { struct extent_io_tree *tree; @@ -327,28 +301,22 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) if (!page->private) goto out; len = page->private >> 2; - if (len == 0) { - WARN_ON(1); - } + WARN_ON(len == 0); + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, btrfs_header_generation(eb)); BUG_ON(ret); found_start = btrfs_header_bytenr(eb); if (found_start != start) { - printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n", - start, found_start, len); WARN_ON(1); goto err; } if (eb->first_page != page) { - printk("bad first page %lu %lu\n", eb->first_page->index, - page->index); WARN_ON(1); goto err; } if (!PageUptodate(page)) { - printk("csum not up to date page %lu\n", page->index); WARN_ON(1); goto err; } @@ -396,29 +364,30 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, goto out; if (!page->private) goto out; + len = page->private >> 2; - if (len == 0) { - WARN_ON(1); - } + WARN_ON(len == 0); + eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS); found_start = btrfs_header_bytenr(eb); if (found_start != start) { - printk("bad tree block start %llu %llu\n", + printk(KERN_INFO "btrfs bad tree block start %llu %llu\n", (unsigned long long)found_start, (unsigned long long)eb->start); ret = -EIO; goto err; } if (eb->first_page != page) { - printk("bad first page %lu %lu\n", eb->first_page->index, - page->index); + printk(KERN_INFO "btrfs bad first page %lu %lu\n", + eb->first_page->index, page->index); WARN_ON(1); ret = -EIO; goto err; } if (check_tree_block_fsid(root, eb)) { - printk("bad fsid on block %Lu\n", eb->start); + printk(KERN_INFO "btrfs bad fsid on block %llu\n", + (unsigned long long)eb->start); ret = -EIO; goto err; } @@ -578,7 +547,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, HZ/10); } #endif - while(atomic_read(&fs_info->async_submit_draining) && + while (atomic_read(&fs_info->async_submit_draining) && atomic_read(&fs_info->nr_async_submits)) { wait_event(fs_info->async_submit_wait, (atomic_read(&fs_info->nr_async_submits) == 0)); @@ -594,7 +563,7 @@ static int btree_csum_one_bio(struct bio *bio) struct btrfs_root *root; WARN_ON(bio->bi_vcnt <= 0); - while(bio_index < bio->bi_vcnt) { + while (bio_index < bio->bi_vcnt) { root = BTRFS_I(bvec->bv_page->mapping->host)->root; csum_dirty_buffer(root, bvec->bv_page); bio_index++; @@ -680,9 +649,8 @@ static int btree_writepages(struct address_space *mapping, num_dirty = count_range_bits(tree, &start, (u64)-1, thresh, EXTENT_DIRTY); - if (num_dirty < thresh) { + if (num_dirty < thresh) return 0; - } } return extent_writepages(tree, mapping, btree_get_extent, wbc); } @@ -701,15 +669,14 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) int ret; if (PageWriteback(page) || PageDirty(page)) - return 0; + return 0; tree = &BTRFS_I(page->mapping->host)->io_tree; map = &BTRFS_I(page->mapping->host)->extent_tree; ret = try_release_extent_state(map, tree, page, gfp_flags); - if (!ret) { + if (!ret) return 0; - } ret = try_release_extent_buffer(tree, page); if (ret == 1) { @@ -728,8 +695,8 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) extent_invalidatepage(tree, page, offset); btree_releasepage(page, GFP_NOFS); if (PagePrivate(page)) { - printk("warning page private not zero on page %Lu\n", - page_offset(page)); + printk(KERN_WARNING "btrfs warning page private not zero " + "on page %llu\n", (unsigned long long)page_offset(page)); ClearPagePrivate(page); set_page_private(page, 0); page_cache_release(page); @@ -813,7 +780,7 @@ int btrfs_write_tree_block(struct extent_buffer *buf) int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) { return btrfs_wait_on_page_writeback_range(buf->first_page->mapping, - buf->start, buf->start + buf->len -1); + buf->start, buf->start + buf->len - 1); } struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, @@ -832,11 +799,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); - if (ret == 0) { + if (ret == 0) buf->flags |= EXTENT_UPTODATE; - } else { + else WARN_ON(1); - } return buf; } @@ -944,7 +910,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, if (!log_root_tree) return 0; - while(1) { + while (1) { ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, 0, &start, &end, EXTENT_DIRTY); if (ret) @@ -1165,24 +1131,6 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info, root->in_sysfs = 1; return root; } -#if 0 -static int add_hasher(struct btrfs_fs_info *info, char *type) { - struct btrfs_hasher *hasher; - - hasher = kmalloc(sizeof(*hasher), GFP_NOFS); - if (!hasher) - return -ENOMEM; - hasher->hash_tfm = crypto_alloc_hash(type, 0, CRYPTO_ALG_ASYNC); - if (!hasher->hash_tfm) { - kfree(hasher); - return -EINVAL; - } - spin_lock(&info->hash_lock); - list_add(&hasher->list, &info->hashers); - spin_unlock(&info->hash_lock); - return 0; -} -#endif static int btrfs_congested_fn(void *congested_data, int bdi_bits) { @@ -1226,9 +1174,8 @@ static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) continue; bdi = blk_get_backing_dev_info(device->bdev); - if (bdi->unplug_io_fn) { + if (bdi->unplug_io_fn) bdi->unplug_io_fn(bdi, page); - } } } @@ -1420,8 +1367,9 @@ static int transaction_kthread(void *arg) mutex_lock(&root->fs_info->transaction_kthread_mutex); if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { - printk("btrfs: total reference cache size %Lu\n", - root->fs_info->total_ref_cache_size); + printk(KERN_INFO "btrfs: total reference cache " + "size %llu\n", + root->fs_info->total_ref_cache_size); } mutex_lock(&root->fs_info->trans_mutex); @@ -1592,14 +1540,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, atomic_set(&fs_info->tree_log_writers, 0); fs_info->tree_log_transid = 0; -#if 0 - ret = add_hasher(fs_info, "crc32c"); - if (ret) { - printk("btrfs: failed hash setup, modprobe cryptomgr?\n"); - err = -ENOMEM; - goto fail_iput; - } -#endif __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); @@ -1720,7 +1660,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, sizeof(disk_super->magic))) { - printk("btrfs: valid FS not found on %s\n", sb->s_id); + printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); goto fail_sb_buffer; } @@ -1728,8 +1668,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_read_sys_array(tree_root); mutex_unlock(&fs_info->chunk_mutex); if (ret) { - printk("btrfs: failed to read the system array on %s\n", - sb->s_id); + printk(KERN_WARNING "btrfs: failed to read the system " + "array on %s\n", sb->s_id); goto fail_sys_array; } @@ -1746,14 +1686,15 @@ struct btrfs_root *open_ctree(struct super_block *sb, BUG_ON(!chunk_root->node); read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, - (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), - BTRFS_UUID_SIZE); + (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node), + BTRFS_UUID_SIZE); mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_chunk_tree(chunk_root); mutex_unlock(&fs_info->chunk_mutex); if (ret) { - printk("btrfs: failed to read chunk tree on %s\n", sb->s_id); + printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", + sb->s_id); goto fail_chunk_root; } @@ -1812,7 +1753,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, u64 bytenr = btrfs_super_log_root(disk_super); if (fs_devices->rw_devices == 0) { - printk("Btrfs log replay required on RO media\n"); + printk(KERN_WARNING "Btrfs log replay required " + "on RO media\n"); err = -EIO; goto fail_trans_kthread; } @@ -2097,7 +2039,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) total_errors++; } if (total_errors > max_errors) { - printk("btrfs: %d errors while writing supers\n", total_errors); + printk(KERN_ERR "btrfs: %d errors while writing supers\n", + total_errors); BUG(); } @@ -2114,7 +2057,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) total_errors++; } if (total_errors > max_errors) { - printk("btrfs: %d errors while writing supers\n", total_errors); + printk(KERN_ERR "btrfs: %d errors while writing supers\n", + total_errors); BUG(); } return 0; @@ -2137,16 +2081,11 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) down_write(&root->anon_super.s_umount); kill_anon_super(&root->anon_super); } -#if 0 - if (root->in_sysfs) - btrfs_sysfs_del_root(root); -#endif if (root->node) free_extent_buffer(root->node); if (root->commit_root) free_extent_buffer(root->commit_root); - if (root->name) - kfree(root->name); + kfree(root->name); kfree(root); return 0; } @@ -2157,7 +2096,7 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) struct btrfs_root *gang[8]; int i; - while(1) { + while (1) { ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)gang, 0, ARRAY_SIZE(gang)); @@ -2228,18 +2167,17 @@ int close_ctree(struct btrfs_root *root) if (!(fs_info->sb->s_flags & MS_RDONLY)) { ret = btrfs_commit_super(root); - if (ret) { - printk("btrfs: commit super returns %d\n", ret); - } + if (ret) + printk(KERN_ERR "btrfs: commit super ret %d\n", ret); } if (fs_info->delalloc_bytes) { - printk("btrfs: at unmount delalloc count %Lu\n", + printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", fs_info->delalloc_bytes); } if (fs_info->total_ref_cache_size) { - printk("btrfs: at umount reference cache size %Lu\n", - fs_info->total_ref_cache_size); + printk(KERN_INFO "btrfs: at umount reference cache size %llu\n", + (unsigned long long)fs_info->total_ref_cache_size); } if (fs_info->extent_root->node) @@ -2248,13 +2186,13 @@ int close_ctree(struct btrfs_root *root) if (fs_info->tree_root->node) free_extent_buffer(fs_info->tree_root->node); - if (root->fs_info->chunk_root->node); + if (root->fs_info->chunk_root->node) free_extent_buffer(root->fs_info->chunk_root->node); - if (root->fs_info->dev_root->node); + if (root->fs_info->dev_root->node) free_extent_buffer(root->fs_info->dev_root->node); - if (root->fs_info->csum_root->node); + if (root->fs_info->csum_root->node) free_extent_buffer(root->fs_info->csum_root->node); btrfs_free_block_groups(root->fs_info); @@ -2273,7 +2211,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->submit_workers); #if 0 - while(!list_empty(&fs_info->hashers)) { + while (!list_empty(&fs_info->hashers)) { struct btrfs_hasher *hasher; hasher = list_entry(fs_info->hashers.next, struct btrfs_hasher, hashers); @@ -2324,9 +2262,11 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) WARN_ON(!btrfs_tree_locked(buf)); if (transid != root->fs_info->generation) { - printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", + printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " + "found %llu running %llu\n", (unsigned long long)buf->start, - transid, root->fs_info->generation); + (unsigned long long)transid, + (unsigned long long)root->fs_info->generation); WARN_ON(1); } set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); @@ -2361,9 +2301,8 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; int ret; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); - if (ret == 0) { + if (ret == 0) buf->flags |= EXTENT_UPTODATE; - } return ret; } -- cgit v1.2.3