From 748de6736c1e482e111f9d1b5a5d5b1787600cad Mon Sep 17 00:00:00 2001 From: Akira Fujita Date: Wed, 17 Jun 2009 19:24:03 -0400 Subject: ext4: online defrag -- Add EXT4_IOC_MOVE_EXT ioctl The EXT4_IOC_MOVE_EXT exchanges the blocks between orig_fd and donor_fd, and then write the file data of orig_fd to donor_fd. ext4_mext_move_extent() is the main fucntion of ext4 online defrag, and this patch includes all functions related to ext4 online defrag. Signed-off-by: Akira Fujita Signed-off-by: Takashi Sato Signed-off-by: Kazuya Mio Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cc7d5edc38c9..276a26f117e6 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -352,6 +352,7 @@ struct ext4_new_group_data { /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */ /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) +#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) /* * ioctl commands in 32 bit emulation @@ -447,6 +448,15 @@ struct ext4_inode { __le32 i_version_hi; /* high 32 bits for 64-bit version */ }; +struct move_extent { + __u32 reserved; /* should be zero */ + __u32 donor_fd; /* donor file descriptor */ + __u64 orig_start; /* logical start offset in block for orig */ + __u64 donor_start; /* logical start offset in block for donor */ + __u64 len; /* block length to be moved */ + __u64 moved_len; /* moved block length */ +}; +#define MAX_DEFRAG_SIZE ((1UL<<31) - 1) #define EXT4_EPOCH_BITS 2 #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) @@ -1647,6 +1657,11 @@ extern int ext4_get_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh, int flags); extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); +/* move_extent.c */ +extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, + __u64 start_orig, __u64 start_donor, + __u64 len, __u64 *moved_len); + /* * Add new method to test wether block and inode bitmaps are properly -- cgit v1.2.3 From 7f4520cc6242780ce720aa440ad4b391f998b558 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 13 Jun 2009 10:09:41 -0400 Subject: ext4: change s_mount_opt to be an unsigned int We can only fit 32 options in s_mount_opt because an unsigned long is 32-bits on a x86 machine. So use an unsigned int to save space on 64-bit platforms. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 276a26f117e6..569f527080bf 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -852,7 +852,7 @@ struct ext4_sb_info { struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ struct buffer_head **s_group_desc; - unsigned long s_mount_opt; + unsigned int s_mount_opt; ext4_fsblk_t s_sb_block; uid_t s_resuid; gid_t s_resgid; -- cgit v1.2.3 From bc0b0d6d69ee9022f18ae264e62beb30ddeb322a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 13 Jun 2009 10:09:48 -0400 Subject: ext4: update the s_last_mounted field in the superblock This field can be very helpful when a system administrator is trying to sort through large numbers of block devices or filesystem images. What is stored in this field can be ambiguous if multiple filesystem namespaces are in play; what we store in practice is the mountpoint interpreted by the process's namespace which first opens a file in the filesystem. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 569f527080bf..9e268c97eeca 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -834,6 +834,12 @@ struct ext4_super_block { }; #ifdef __KERNEL__ + +/* + * Mount flags + */ +#define EXT4_MF_MNTDIR_SAMPLED 0x0001 + /* * fourth extended-fs super-block data in memory */ @@ -853,6 +859,7 @@ struct ext4_sb_info { struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ struct buffer_head **s_group_desc; unsigned int s_mount_opt; + unsigned int s_mount_flags; ext4_fsblk_t s_sb_block; uid_t s_resuid; gid_t s_resgid; -- cgit v1.2.3 From 4ab2f15b7f709c3626a7eed075a7225b4c775c7e Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 13 Jun 2009 10:09:36 -0400 Subject: ext4: move the abort flag from s_mount_opts to s_mount_flags We're running out of space in the mount options word, and EXT4_MOUNT_ABORT isn't really a mount option, but a run-time flag. So move it to become EXT4_MF_FS_ABORTED in s_mount_flags. Also remove bogus ext2_fs.h / ext4.h simultaneous #include protection, which can never happen. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9e268c97eeca..06ee5a582917 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -684,7 +684,6 @@ struct ext4_inode_info { #define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ #define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ #define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ -#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */ #define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ #define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ #define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ @@ -706,17 +705,10 @@ struct ext4_inode_info { #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ -/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ -#ifndef _LINUX_EXT2_FS_H #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt #define set_opt(o, opt) o |= EXT4_MOUNT_##opt #define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \ EXT4_MOUNT_##opt) -#else -#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD -#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT -#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS -#endif #define ext4_set_bit ext2_set_bit #define ext4_set_bit_atomic ext2_set_bit_atomic @@ -836,9 +828,10 @@ struct ext4_super_block { #ifdef __KERNEL__ /* - * Mount flags + * run-time mount flags */ #define EXT4_MF_MNTDIR_SAMPLED 0x0001 +#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ /* * fourth extended-fs super-block data in memory -- cgit v1.2.3 From f157a4aa98a18bd3817a72bea90d48494e2586e7 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 13 Jun 2009 11:09:42 -0400 Subject: ext4: Use a hash of the topdir directory name for the Orlov parent group Instead of using a random number to determine the goal parent grop for the Orlov top directories, use a hash of the directory name. This allows for repeatable results when trying to benchmark filesystem layout algorithms. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 06ee5a582917..d035cf149e0e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1315,7 +1315,8 @@ extern int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo); /* ialloc.c */ -extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); +extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, + const struct qstr *qstr); extern void ext4_free_inode(handle_t *, struct inode *); extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); extern unsigned long ext4_count_free_inodes(struct super_block *); -- cgit v1.2.3 From 11013911daea4820147ae6d7094dd7c6894e8651 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Sat, 13 Jun 2009 11:45:35 -0400 Subject: ext4: teach the inode allocator to use a goal inode number Enhance the inode allocator to take a goal inode number as a paremeter; if it is specified, it takes precedence over Orlov or parent directory inode allocation algorithms. The extents migration function uses the goal inode number so that the extent trees allocated the migration function use the correct flex_bg. In the future, the goal inode functionality will also be used to allocate an adjacent inode for the extended attributes. Also, for testing purposes the goal inode number can be specified via /sys/fs/{dev}/inode_goal. This can be useful for testing inode allocation beyond 2^32 blocks on very large filesystems. Signed-off-by: Andreas Dilger Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d035cf149e0e..746cdcba969d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -863,6 +863,7 @@ struct ext4_sb_info { int s_inode_size; int s_first_ino; unsigned int s_inode_readahead_blks; + unsigned int s_inode_goal; spinlock_t s_next_gen_lock; u32 s_next_generation; u32 s_hash_seed[4]; @@ -1316,7 +1317,7 @@ extern int ext4fs_dirhash(const char *name, int len, struct /* ialloc.c */ extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, - const struct qstr *qstr); + const struct qstr *qstr, __u32 goal); extern void ext4_free_inode(handle_t *, struct inode *); extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); extern unsigned long ext4_count_free_inodes(struct super_block *); -- cgit v1.2.3 From 0610b6e99939828b77eec020ead0e1f44cba38ca Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jun 2009 03:45:05 -0400 Subject: ext4: Fix 64-bit block type problem on 32-bit platforms The function ext4_mb_free_blocks() was using an "unsigned long" to pass a block number; this will cause 64-bit block numbers to get truncated on x86 and other 32-bit platforms. Signed-off-by: "Theodore Ts'o" Reviewed-by: Eric Sandeen --- fs/ext4/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 746cdcba969d..17b9998680e3 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1341,7 +1341,7 @@ extern void ext4_discard_preallocations(struct inode *); extern int __init init_ext4_mballoc(void); extern void exit_ext4_mballoc(void); extern void ext4_mb_free_blocks(handle_t *, struct inode *, - unsigned long, unsigned long, int, unsigned long *); + ext4_fsblk_t, unsigned long, int, unsigned long *); extern int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t i, struct ext4_group_desc *desc); extern void ext4_mb_update_group_info(struct ext4_group_info *grp, -- cgit v1.2.3