diff options
Diffstat (limited to 'fs/ext4/extents_status.c')
-rw-r--r-- | fs/ext4/extents_status.c | 310 |
1 files changed, 218 insertions, 92 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index fee54ab42bba..cccbdfd49a86 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -144,14 +144,17 @@ static struct kmem_cache *ext4_es_cachep; static struct kmem_cache *ext4_pending_cachep; -static int __es_insert_extent(struct inode *inode, struct extent_status *newes); +static int __es_insert_extent(struct inode *inode, struct extent_status *newes, + struct extent_status *prealloc); static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t end, int *reserved); + ext4_lblk_t end, int *reserved, + struct extent_status *prealloc); static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan); static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, struct ext4_inode_info *locked_ei); -static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t len); +static int __revise_pending(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len, + struct pending_reservation **prealloc); int __init ext4_init_es(void) { @@ -448,22 +451,49 @@ static void ext4_es_list_del(struct inode *inode) spin_unlock(&sbi->s_es_lock); } -static struct extent_status * -ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, - ext4_fsblk_t pblk) +static inline struct pending_reservation *__alloc_pending(bool nofail) +{ + if (!nofail) + return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC); + + return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL); +} + +static inline void __free_pending(struct pending_reservation *pr) +{ + kmem_cache_free(ext4_pending_cachep, pr); +} + +/* + * Returns true if we cannot fail to allocate memory for this extent_status + * entry and cannot reclaim it until its status changes. + */ +static inline bool ext4_es_must_keep(struct extent_status *es) +{ + /* fiemap, bigalloc, and seek_data/hole need to use it. */ + if (ext4_es_is_delayed(es)) + return true; + + return false; +} + +static inline struct extent_status *__es_alloc_extent(bool nofail) +{ + if (!nofail) + return kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); + + return kmem_cache_zalloc(ext4_es_cachep, GFP_KERNEL | __GFP_NOFAIL); +} + +static void ext4_es_init_extent(struct inode *inode, struct extent_status *es, + ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk) { - struct extent_status *es; - es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); - if (es == NULL) - return NULL; es->es_lblk = lblk; es->es_len = len; es->es_pblk = pblk; - /* - * We don't count delayed extent because we never try to reclaim them - */ - if (!ext4_es_is_delayed(es)) { + /* We never try to reclaim a must kept extent, so we don't count it. */ + if (!ext4_es_must_keep(es)) { if (!EXT4_I(inode)->i_es_shk_nr++) ext4_es_list_add(inode); percpu_counter_inc(&EXT4_SB(inode->i_sb)-> @@ -472,8 +502,11 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, EXT4_I(inode)->i_es_all_nr++; percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); +} - return es; +static inline void __es_free_extent(struct extent_status *es) +{ + kmem_cache_free(ext4_es_cachep, es); } static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) @@ -481,8 +514,8 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) EXT4_I(inode)->i_es_all_nr--; percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); - /* Decrease the shrink counter when this es is not delayed */ - if (!ext4_es_is_delayed(es)) { + /* Decrease the shrink counter when we can reclaim the extent. */ + if (!ext4_es_must_keep(es)) { BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0); if (!--EXT4_I(inode)->i_es_shk_nr) ext4_es_list_del(inode); @@ -490,7 +523,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) s_es_stats.es_stats_shk_cnt); } - kmem_cache_free(ext4_es_cachep, es); + __es_free_extent(es); } /* @@ -752,7 +785,8 @@ static inline void ext4_es_insert_extent_check(struct inode *inode, } #endif -static int __es_insert_extent(struct inode *inode, struct extent_status *newes) +static int __es_insert_extent(struct inode *inode, struct extent_status *newes, + struct extent_status *prealloc) { struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; struct rb_node **p = &tree->root.rb_node; @@ -792,10 +826,15 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes) } } - es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len, - newes->es_pblk); + if (prealloc) + es = prealloc; + else + es = __es_alloc_extent(false); if (!es) return -ENOMEM; + ext4_es_init_extent(inode, es, newes->es_lblk, newes->es_len, + newes->es_pblk); + rb_link_node(&es->rb_node, parent, p); rb_insert_color(&es->rb_node, &tree->root); @@ -816,8 +855,12 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, { struct extent_status newes; ext4_lblk_t end = lblk + len - 1; - int err = 0; + int err1 = 0, err2 = 0, err3 = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct extent_status *es1 = NULL; + struct extent_status *es2 = NULL; + struct pending_reservation *pr = NULL; + bool revise_pending = false; if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) return 0; @@ -845,29 +888,57 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, ext4_es_insert_extent_check(inode, &newes); + revise_pending = sbi->s_cluster_ratio > 1 && + test_opt(inode->i_sb, DELALLOC) && + (status & (EXTENT_STATUS_WRITTEN | + EXTENT_STATUS_UNWRITTEN)); +retry: + if (err1 && !es1) + es1 = __es_alloc_extent(true); + if ((err1 || err2) && !es2) + es2 = __es_alloc_extent(true); + if ((err1 || err2 || err3) && revise_pending && !pr) + pr = __alloc_pending(true); write_lock(&EXT4_I(inode)->i_es_lock); - err = __es_remove_extent(inode, lblk, end, NULL); - if (err != 0) + + err1 = __es_remove_extent(inode, lblk, end, NULL, es1); + if (err1 != 0) goto error; -retry: - err = __es_insert_extent(inode, &newes); - if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), - 128, EXT4_I(inode))) - goto retry; - if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) - err = 0; + /* Free preallocated extent if it didn't get used. */ + if (es1) { + if (!es1->es_len) + __es_free_extent(es1); + es1 = NULL; + } - if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) && - (status & EXTENT_STATUS_WRITTEN || - status & EXTENT_STATUS_UNWRITTEN)) - __revise_pending(inode, lblk, len); + err2 = __es_insert_extent(inode, &newes, es2); + if (err2 == -ENOMEM && !ext4_es_must_keep(&newes)) + err2 = 0; + if (err2 != 0) + goto error; + /* Free preallocated extent if it didn't get used. */ + if (es2) { + if (!es2->es_len) + __es_free_extent(es2); + es2 = NULL; + } + if (revise_pending) { + err3 = __revise_pending(inode, lblk, len, &pr); + if (err3 != 0) + goto error; + if (pr) { + __free_pending(pr); + pr = NULL; + } + } error: write_unlock(&EXT4_I(inode)->i_es_lock); + if (err1 || err2 || err3) + goto retry; ext4_es_print_tree(inode); - - return err; + return 0; } /* @@ -900,7 +971,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk); if (!es || es->es_lblk > end) - __es_insert_extent(inode, &newes); + __es_insert_extent(inode, &newes, NULL); write_unlock(&EXT4_I(inode)->i_es_lock); } @@ -1271,7 +1342,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end, rc->ndelonly--; node = rb_next(&pr->rb_node); rb_erase(&pr->rb_node, &tree->root); - kmem_cache_free(ext4_pending_cachep, pr); + __free_pending(pr); if (!node) break; pr = rb_entry(node, struct pending_reservation, @@ -1290,6 +1361,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end, * @lblk - first block in range * @end - last block in range * @reserved - number of cluster reservations released + * @prealloc - pre-allocated es to avoid memory allocation failures * * If @reserved is not NULL and delayed allocation is enabled, counts * block/cluster reservations freed by removing range and if bigalloc @@ -1297,7 +1369,8 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end, * error code on failure. */ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t end, int *reserved) + ext4_lblk_t end, int *reserved, + struct extent_status *prealloc) { struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; struct rb_node *node; @@ -1305,14 +1378,12 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, struct extent_status orig_es; ext4_lblk_t len1, len2; ext4_fsblk_t block; - int err; + int err = 0; bool count_reserved = true; struct rsvd_count rc; if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC)) count_reserved = false; -retry: - err = 0; es = __es_tree_search(&tree->root, lblk); if (!es) @@ -1346,14 +1417,13 @@ retry: orig_es.es_len - len2; ext4_es_store_pblock_status(&newes, block, ext4_es_status(&orig_es)); - err = __es_insert_extent(inode, &newes); + err = __es_insert_extent(inode, &newes, prealloc); if (err) { + if (!ext4_es_must_keep(&newes)) + return 0; + es->es_lblk = orig_es.es_lblk; es->es_len = orig_es.es_len; - if ((err == -ENOMEM) && - __es_shrink(EXT4_SB(inode->i_sb), - 128, EXT4_I(inode))) - goto retry; goto out; } } else { @@ -1366,8 +1436,8 @@ retry: } } if (count_reserved) - count_rsvd(inode, lblk, orig_es.es_len - len1 - len2, - &orig_es, &rc); + count_rsvd(inode, orig_es.es_lblk + len1, + orig_es.es_len - len1 - len2, &orig_es, &rc); goto out_get_reserved; } @@ -1433,6 +1503,7 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t end; int err = 0; int reserved = 0; + struct extent_status *es = NULL; if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) return 0; @@ -1447,17 +1518,29 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, end = lblk + len - 1; BUG_ON(end < lblk); +retry: + if (err && !es) + es = __es_alloc_extent(true); /* * ext4_clear_inode() depends on us taking i_es_lock unconditionally * so that we are sure __es_shrink() is done with the inode before it * is reclaimed. */ write_lock(&EXT4_I(inode)->i_es_lock); - err = __es_remove_extent(inode, lblk, end, &reserved); + err = __es_remove_extent(inode, lblk, end, &reserved, es); + /* Free preallocated extent if it didn't get used. */ + if (es) { + if (!es->es_len) + __es_free_extent(es); + es = NULL; + } write_unlock(&EXT4_I(inode)->i_es_lock); + if (err) + goto retry; + ext4_es_print_tree(inode); ext4_da_release_space(inode, reserved); - return err; + return 0; } static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, @@ -1704,11 +1787,8 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end, (*nr_to_scan)--; node = rb_next(&es->rb_node); - /* - * We can't reclaim delayed extent from status tree because - * fiemap, bigallic, and seek_data/hole need to use it. - */ - if (ext4_es_is_delayed(es)) + + if (ext4_es_must_keep(es)) goto next; if (ext4_es_is_referenced(es)) { ext4_es_clear_referenced(es); @@ -1772,7 +1852,7 @@ void ext4_clear_inode_es(struct inode *inode) while (node) { es = rb_entry(node, struct extent_status, rb_node); node = rb_next(node); - if (!ext4_es_is_delayed(es)) { + if (!ext4_es_must_keep(es)) { rb_erase(&es->rb_node, &tree->root); ext4_es_free_extent(inode, es); } @@ -1859,11 +1939,13 @@ static struct pending_reservation *__get_pending(struct inode *inode, * * @inode - file containing the cluster * @lblk - logical block in the cluster to be added + * @prealloc - preallocated pending entry * * Returns 0 on successful insertion and -ENOMEM on failure. If the * pending reservation is already in the set, returns successfully. */ -static int __insert_pending(struct inode *inode, ext4_lblk_t lblk) +static int __insert_pending(struct inode *inode, ext4_lblk_t lblk, + struct pending_reservation **prealloc) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree; @@ -1889,10 +1971,15 @@ static int __insert_pending(struct inode *inode, ext4_lblk_t lblk) } } - pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC); - if (pr == NULL) { - ret = -ENOMEM; - goto out; + if (likely(*prealloc == NULL)) { + pr = __alloc_pending(false); + if (!pr) { + ret = -ENOMEM; + goto out; + } + } else { + pr = *prealloc; + *prealloc = NULL; } pr->lclu = lclu; @@ -1922,7 +2009,7 @@ static void __remove_pending(struct inode *inode, ext4_lblk_t lblk) if (pr != NULL) { tree = &EXT4_I(inode)->i_pending_tree; rb_erase(&pr->rb_node, &tree->root); - kmem_cache_free(ext4_pending_cachep, pr); + __free_pending(pr); } } @@ -1983,7 +2070,10 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, bool allocated) { struct extent_status newes; - int err = 0; + int err1 = 0, err2 = 0, err3 = 0; + struct extent_status *es1 = NULL; + struct extent_status *es2 = NULL; + struct pending_reservation *pr = NULL; if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) return 0; @@ -1998,29 +2088,52 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, ext4_es_insert_extent_check(inode, &newes); +retry: + if (err1 && !es1) + es1 = __es_alloc_extent(true); + if ((err1 || err2) && !es2) + es2 = __es_alloc_extent(true); + if ((err1 || err2 || err3) && allocated && !pr) + pr = __alloc_pending(true); write_lock(&EXT4_I(inode)->i_es_lock); - err = __es_remove_extent(inode, lblk, lblk, NULL); - if (err != 0) - goto error; -retry: - err = __es_insert_extent(inode, &newes); - if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), - 128, EXT4_I(inode))) - goto retry; - if (err != 0) + err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1); + if (err1 != 0) goto error; + /* Free preallocated extent if it didn't get used. */ + if (es1) { + if (!es1->es_len) + __es_free_extent(es1); + es1 = NULL; + } - if (allocated) - __insert_pending(inode, lblk); + err2 = __es_insert_extent(inode, &newes, es2); + if (err2 != 0) + goto error; + /* Free preallocated extent if it didn't get used. */ + if (es2) { + if (!es2->es_len) + __es_free_extent(es2); + es2 = NULL; + } + if (allocated) { + err3 = __insert_pending(inode, lblk, &pr); + if (err3 != 0) + goto error; + if (pr) { + __free_pending(pr); + pr = NULL; + } + } error: write_unlock(&EXT4_I(inode)->i_es_lock); + if (err1 || err2 || err3) + goto retry; ext4_es_print_tree(inode); ext4_print_pending_tree(inode); - - return err; + return 0; } /* @@ -2121,21 +2234,24 @@ unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk, * @inode - file containing the range * @lblk - logical block defining the start of range * @len - length of range in blocks + * @prealloc - preallocated pending entry * * Used after a newly allocated extent is added to the extents status tree. * Requires that the extents in the range have either written or unwritten * status. Must be called while holding i_es_lock. */ -static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, - ext4_lblk_t len) +static int __revise_pending(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len, + struct pending_reservation **prealloc) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_lblk_t end = lblk + len - 1; ext4_lblk_t first, last; bool f_del = false, l_del = false; + int ret = 0; if (len == 0) - return; + return 0; /* * Two cases - block range within single cluster and block range @@ -2156,7 +2272,9 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, f_del = __es_scan_range(inode, &ext4_es_is_delonly, first, lblk - 1); if (f_del) { - __insert_pending(inode, first); + ret = __insert_pending(inode, first, prealloc); + if (ret < 0) + goto out; } else { last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1; @@ -2164,9 +2282,11 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, l_del = __es_scan_range(inode, &ext4_es_is_delonly, end + 1, last); - if (l_del) - __insert_pending(inode, last); - else + if (l_del) { + ret = __insert_pending(inode, last, prealloc); + if (ret < 0) + goto out; + } else __remove_pending(inode, last); } } else { @@ -2174,18 +2294,24 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk, if (first != lblk) f_del = __es_scan_range(inode, &ext4_es_is_delonly, first, lblk - 1); - if (f_del) - __insert_pending(inode, first); - else + if (f_del) { + ret = __insert_pending(inode, first, prealloc); + if (ret < 0) + goto out; + } else __remove_pending(inode, first); last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1; if (last != end) l_del = __es_scan_range(inode, &ext4_es_is_delonly, end + 1, last); - if (l_del) - __insert_pending(inode, last); - else + if (l_del) { + ret = __insert_pending(inode, last, prealloc); + if (ret < 0) + goto out; + } else __remove_pending(inode, last); } +out: + return ret; } |