From 9a0785b0da561e1e9c6617df85e93ae107a42f18 Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Thu, 1 Apr 2010 15:01:04 -0700 Subject: blkio: Remove per-cfqq nr_sectors as we'll be passing that info at request dispatch with other stats now. This patch removes the existing support for accounting sectors for a blkio_group. This will be added back differently in the next two patches. Signed-off-by: Divyesh Shah Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 2c7a0f4f3cd7..7471d36bce89 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -142,8 +142,6 @@ struct cfq_queue { struct cfq_queue *new_cfqq; struct cfq_group *cfqg; struct cfq_group *orig_cfqg; - /* Sectors dispatched in current dispatch round */ - unsigned long nr_sectors; }; /* @@ -883,8 +881,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) slice_used = cfqq->allocated_slice; } - cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u sect=%lu", slice_used, - cfqq->nr_sectors); + cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u", slice_used); return slice_used; } @@ -918,8 +915,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, st->min_vdisktime); - blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl, - cfqq->nr_sectors); + blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl); } #ifdef CONFIG_CFQ_GROUP_IOSCHED @@ -1525,7 +1521,6 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, cfqq->allocated_slice = 0; cfqq->slice_end = 0; cfqq->slice_dispatch = 0; - cfqq->nr_sectors = 0; cfq_clear_cfqq_wait_request(cfqq); cfq_clear_cfqq_must_dispatch(cfqq); @@ -1870,7 +1865,6 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) elv_dispatch_sort(q, rq); cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; - cfqq->nr_sectors += blk_rq_sectors(rq); } /* -- cgit v1.2.3 From 303a3acb2362f16c7e7f4c53b40c2f4b396dc8d5 Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Thu, 1 Apr 2010 15:01:24 -0700 Subject: blkio: Add io controller stats like - io_service_time - io_wait_time - io_serviced - io_service_bytes These stats are accumulated per operation type helping us to distinguish between read and write, and sync and async IO. This patch does not increment any of these stats. Signed-off-by: Divyesh Shah Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 7471d36bce89..c5161bbf2fe9 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -915,7 +915,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, st->min_vdisktime); - blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl); + blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); } #ifdef CONFIG_CFQ_GROUP_IOSCHED -- cgit v1.2.3 From 9195291e5f05e01d67f9a09c756b8aca8f009089 Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Thu, 1 Apr 2010 15:01:41 -0700 Subject: blkio: Increment the blkio cgroup stats for real now We also add start_time_ns and io_start_time_ns fields to struct request here to record the time when a request is created and when it is dispatched to device. We use ns uints here as ms and jiffies are not very useful for non-rotational media. Signed-off-by: Divyesh Shah Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c5161bbf2fe9..42028e7128a7 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -855,7 +855,7 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) if (!RB_EMPTY_NODE(&cfqg->rb_node)) cfq_rb_erase(&cfqg->rb_node, st); cfqg->saved_workload_slice = 0; - blkiocg_update_blkio_group_dequeue_stats(&cfqg->blkg, 1); + blkiocg_update_dequeue_stats(&cfqg->blkg, 1); } static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) @@ -1865,6 +1865,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) elv_dispatch_sort(q, rq); cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; + blkiocg_update_request_dispatch_stats(&cfqq->cfqg->blkg, rq); } /* @@ -3285,6 +3286,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) WARN_ON(!cfqq->dispatched); cfqd->rq_in_driver--; cfqq->dispatched--; + blkiocg_update_request_completion_stats(&cfqq->cfqg->blkg, rq); cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; -- cgit v1.2.3 From 84c124da9ff50bd71fab9c939ee5b7cd8bef2bd9 Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Fri, 9 Apr 2010 08:31:19 +0200 Subject: blkio: Changes to IO controller additional stats patches that include some minor fixes and addresses all comments. Changelog: (most based on Vivek Goyal's comments) o renamed blkiocg_reset_write to blkiocg_reset_stats o more clarification in the documentation on io_service_time and io_wait_time o Initialize blkg->stats_lock o rename io_add_stat to blkio_add_stat and declare it static o use bool for direction and sync o derive direction and sync info from existing rq methods o use 12 for major:minor string length o define io_service_time better to cover the NCQ case o add a separate reset_stats interface o make the indexed stats a 2d array to simplify macro and function pointer code o blkio.time now exports in jiffies as before o Added stats description in patch description and Documentation/cgroup/blkio-controller.txt o Prefix all stats functions with blkio and make them static as applicable o replace IO_TYPE_MAX with IO_TYPE_TOTAL o Moved #define constant to top of blk-cgroup.c o Pass dev_t around instead of char * o Add note to documentation file about resetting stats o use BLK_CGROUP_MODULE in addition to BLK_CGROUP config option in #ifdef statements o Avoid struct request specific knowledge in blk-cgroup. blk-cgroup.h now has rq_direction() and rq_sync() functions which are used by CFQ and when using io-controller at a higher level, bio_* functions can be added. Signed-off-by: Divyesh Shah Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 42028e7128a7..5617ae030b15 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -955,6 +955,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) for_each_cfqg_st(cfqg, i, j, st) *st = CFQ_RB_ROOT; RB_CLEAR_NODE(&cfqg->rb_node); + blkio_group_init(&cfqg->blkg); /* * Take the initial reference that will be released on destroy @@ -1865,7 +1866,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq) elv_dispatch_sort(q, rq); cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++; - blkiocg_update_request_dispatch_stats(&cfqq->cfqg->blkg, rq); + blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq), + rq_data_dir(rq), rq_is_sync(rq)); } /* @@ -3286,7 +3288,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) WARN_ON(!cfqq->dispatched); cfqd->rq_in_driver--; cfqq->dispatched--; - blkiocg_update_request_completion_stats(&cfqq->cfqg->blkg, rq); + blkiocg_update_completion_stats(&cfqq->cfqg->blkg, rq_start_time_ns(rq), + rq_io_start_time_ns(rq), rq_data_dir(rq), + rq_is_sync(rq)); cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; -- cgit v1.2.3 From 812d402648f4fc1ab1091b2172a46fc1b367c724 Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Thu, 8 Apr 2010 21:14:23 -0700 Subject: blkio: Add io_merged stat This includes both the number of bios merged into requests belonging to this cgroup as well as the number of requests merged together. In the past, we've observed different merging behavior across upstream kernels, some by design some actual bugs. This stat helps a lot in debugging such problems when applications report decreased throughput with a new kernel version. This needed adding an extra elevator function to capture bios being merged as I did not want to pollute elevator code with blkiocg knowledge and hence needed the accounting invocation to come from CFQ. Signed-off-by: Divyesh Shah Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5617ae030b15..4eb1906cf6c6 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1467,6 +1467,14 @@ static void cfq_merged_request(struct request_queue *q, struct request *req, } } +static void cfq_bio_merged(struct request_queue *q, struct request *req, + struct bio *bio) +{ + struct cfq_queue *cfqq = RQ_CFQQ(req); + blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, bio_data_dir(bio), + cfq_bio_sync(bio)); +} + static void cfq_merged_requests(struct request_queue *q, struct request *rq, struct request *next) @@ -1484,6 +1492,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, if (cfqq->next_rq == next) cfqq->next_rq = rq; cfq_remove_request(next); + blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, rq_data_dir(next), + rq_is_sync(next)); } static int cfq_allow_merge(struct request_queue *q, struct request *rq, @@ -3861,6 +3871,7 @@ static struct elevator_type iosched_cfq = { .elevator_merged_fn = cfq_merged_request, .elevator_merge_req_fn = cfq_merged_requests, .elevator_allow_merge_fn = cfq_allow_merge, + .elevator_bio_merged_fn = cfq_bio_merged, .elevator_dispatch_fn = cfq_dispatch_requests, .elevator_add_req_fn = cfq_insert_request, .elevator_activate_req_fn = cfq_activate_request, -- cgit v1.2.3 From cdc1184cf4a7bd99f5473a91244197accc49146b Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Thu, 8 Apr 2010 21:15:10 -0700 Subject: blkio: Add io_queued and avg_queue_size stats These stats are useful for getting a feel for the queue depth of the cgroup, i.e., how filled up its queues are at a given instant and over the existence of the cgroup. This ability is useful when debugging problems in the wild as it helps understand the application's IO pattern w/o having to read through the userspace code (coz its tedious or just not available) or w/o the ability to run blktrace (since you may not have root access and/or not want to disturb performance). Signed-off-by: Divyesh Shah Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 4eb1906cf6c6..8e0b86a9111a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1380,7 +1380,12 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) { elv_rb_del(&cfqq->sort_list, rq); cfqq->queued[rq_is_sync(rq)]--; + blkiocg_update_request_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), + rq_is_sync(rq)); cfq_add_rq_rb(rq); + blkiocg_update_request_add_stats( + &cfqq->cfqg->blkg, &cfqq->cfqd->serving_group->blkg, + rq_data_dir(rq), rq_is_sync(rq)); } static struct request * @@ -1436,6 +1441,8 @@ static void cfq_remove_request(struct request *rq) cfq_del_rq_rb(rq); cfqq->cfqd->rq_queued--; + blkiocg_update_request_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), + rq_is_sync(rq)); if (rq_is_meta(rq)) { WARN_ON(!cfqq->meta_pending); cfqq->meta_pending--; @@ -1527,6 +1534,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, if (cfqq) { cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", cfqd->serving_prio, cfqd->serving_type); + blkiocg_update_set_active_queue_stats(&cfqq->cfqg->blkg); cfqq->slice_start = 0; cfqq->dispatch_start = jiffies; cfqq->allocated_slice = 0; @@ -3213,6 +3221,9 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) list_add_tail(&rq->queuelist, &cfqq->fifo); cfq_add_rq_rb(rq); + blkiocg_update_request_add_stats(&cfqq->cfqg->blkg, + &cfqd->serving_group->blkg, rq_data_dir(rq), + rq_is_sync(rq)); cfq_rq_enqueued(cfqd, cfqq, rq); } -- cgit v1.2.3 From 812df48d127365ffd0869aa139738f572a86759c Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Thu, 8 Apr 2010 21:15:35 -0700 Subject: blkio: Add more debug-only per-cgroup stats 1) group_wait_time - This is the amount of time the cgroup had to wait to get a timeslice for one of its queues from when it became busy, i.e., went from 0 to 1 request queued. This is different from the io_wait_time which is the cumulative total of the amount of time spent by each IO in that cgroup waiting in the scheduler queue. This stat is a great way to find out any jobs in the fleet that are being starved or waiting for longer than what is expected (due to an IO controller bug or any other issue). 2) empty_time - This is the amount of time a cgroup spends w/o any pending requests. This stat is useful when a job does not seem to be able to use its assigned disk share by helping check if that is happening due to an IO controller bug or because the job is not submitting enough IOs. 3) idle_time - This is the amount of time spent by the IO scheduler idling for a given cgroup in anticipation of a better request than the exising ones from other queues/cgroups. All these stats are recorded using start and stop events. When reading these stats, we do not add the delta between the current time and the last start time if we're between the start and stop events. We avoid doing this to make sure that these numbers are always monotonically increasing when read. Since we're using sched_clock() which may use the tsc as its source, it may induce some inconsistency (due to tsc resync across cpus) if we included the current delta. Signed-off-by: Divyesh Shah Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 50 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 8e0b86a9111a..b6e095c7ef5e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -886,7 +886,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) } static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, - struct cfq_queue *cfqq) + struct cfq_queue *cfqq, bool forced) { struct cfq_rb_root *st = &cfqd->grp_service_tree; unsigned int used_sl, charge_sl; @@ -916,6 +916,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, st->min_vdisktime); blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); + blkiocg_set_start_empty_time(&cfqg->blkg, forced); } #ifdef CONFIG_CFQ_GROUP_IOSCHED @@ -1528,6 +1529,12 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq, return cfqq == RQ_CFQQ(rq); } +static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + del_timer(&cfqd->idle_slice_timer); + blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg); +} + static void __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) { @@ -1547,7 +1554,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, cfq_clear_cfqq_fifo_expire(cfqq); cfq_mark_cfqq_slice_new(cfqq); - del_timer(&cfqd->idle_slice_timer); + cfq_del_timer(cfqd, cfqq); } cfqd->active_queue = cfqq; @@ -1558,12 +1565,12 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, */ static void __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, - bool timed_out) + bool timed_out, bool forced) { cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); if (cfq_cfqq_wait_request(cfqq)) - del_timer(&cfqd->idle_slice_timer); + cfq_del_timer(cfqd, cfqq); cfq_clear_cfqq_wait_request(cfqq); cfq_clear_cfqq_wait_busy(cfqq); @@ -1585,7 +1592,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); } - cfq_group_served(cfqd, cfqq->cfqg, cfqq); + cfq_group_served(cfqd, cfqq->cfqg, cfqq, forced); if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) cfq_del_cfqq_rr(cfqd, cfqq); @@ -1604,12 +1611,13 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, } } -static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) +static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out, + bool forced) { struct cfq_queue *cfqq = cfqd->active_queue; if (cfqq) - __cfq_slice_expired(cfqd, cfqq, timed_out); + __cfq_slice_expired(cfqd, cfqq, timed_out, forced); } /* @@ -1865,6 +1873,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) sl = cfqd->cfq_slice_idle; mod_timer(&cfqd->idle_slice_timer, jiffies + sl); + blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg); cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl); } @@ -2176,7 +2185,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) } expire: - cfq_slice_expired(cfqd, 0); + cfq_slice_expired(cfqd, 0, false); new_queue: /* * Current queue expired. Check if we have to switch to a new @@ -2202,7 +2211,7 @@ static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) BUG_ON(!list_empty(&cfqq->fifo)); /* By default cfqq is not expired if it is empty. Do it explicitly */ - __cfq_slice_expired(cfqq->cfqd, cfqq, 0); + __cfq_slice_expired(cfqq->cfqd, cfqq, 0, true); return dispatched; } @@ -2218,7 +2227,7 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) dispatched += __cfq_forced_dispatch_cfqq(cfqq); - cfq_slice_expired(cfqd, 0); + cfq_slice_expired(cfqd, 0, true); BUG_ON(cfqd->busy_queues); cfq_log(cfqd, "forced_dispatch=%d", dispatched); @@ -2382,10 +2391,15 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || cfq_class_idle(cfqq))) { cfqq->slice_end = jiffies + 1; - cfq_slice_expired(cfqd, 0); + cfq_slice_expired(cfqd, 0, false); } cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); + /* + * This is needed since we don't exactly match the mod_timer() and + * del_timer() calls in CFQ. + */ + blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg); return 1; } @@ -2413,7 +2427,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) orig_cfqg = cfqq->orig_cfqg; if (unlikely(cfqd->active_queue == cfqq)) { - __cfq_slice_expired(cfqd, cfqq, 0); + __cfq_slice_expired(cfqd, cfqq, 0, false); cfq_schedule_dispatch(cfqd); } @@ -2514,7 +2528,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) struct cfq_queue *__cfqq, *next; if (unlikely(cfqq == cfqd->active_queue)) { - __cfq_slice_expired(cfqd, cfqq, 0); + __cfq_slice_expired(cfqd, cfqq, 0, false); cfq_schedule_dispatch(cfqd); } @@ -3143,7 +3157,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) { cfq_log_cfqq(cfqd, cfqq, "preempt"); - cfq_slice_expired(cfqd, 1); + cfq_slice_expired(cfqd, 1, false); /* * Put the new queue at the front of the of the current list, @@ -3191,7 +3205,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfq_cfqq_wait_request(cfqq)) { if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE || cfqd->busy_queues > 1) { - del_timer(&cfqd->idle_slice_timer); + cfq_del_timer(cfqd, cfqq); cfq_clear_cfqq_wait_request(cfqq); __blk_run_queue(cfqd->queue); } else @@ -3352,7 +3366,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) * - when there is a close cooperator */ if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) - cfq_slice_expired(cfqd, 1); + cfq_slice_expired(cfqd, 1, false); else if (sync && cfqq_empty && !cfq_close_cooperator(cfqd, cfqq)) { cfqd->noidle_tree_requires_idle |= !rq_noidle(rq); @@ -3612,7 +3626,7 @@ static void cfq_idle_slice_timer(unsigned long data) cfq_clear_cfqq_deep(cfqq); } expire: - cfq_slice_expired(cfqd, timed_out); + cfq_slice_expired(cfqd, timed_out, false); out_kick: cfq_schedule_dispatch(cfqd); out_cont: @@ -3655,7 +3669,7 @@ static void cfq_exit_queue(struct elevator_queue *e) spin_lock_irq(q->queue_lock); if (cfqd->active_queue) - __cfq_slice_expired(cfqd, cfqd->active_queue, 0); + __cfq_slice_expired(cfqd, cfqd->active_queue, 0, false); while (!list_empty(&cfqd->cic_list)) { struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, -- cgit v1.2.3 From 34d0f179d6dd711d3fc13c0820a456c59aae8048 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Tue, 13 Apr 2010 16:05:49 +0800 Subject: io-controller: Add a new interface "weight_device" for IO-Controller Currently, IO Controller makes use of blkio.weight to assign weight for all devices. Here a new user interface "blkio.weight_device" is introduced to assign different weights for different devices. blkio.weight becomes the default value for devices which are not configured by "blkio.weight_device" You can use the following format to assigned specific weight for a given device: #echo "major:minor weight" > blkio.weight_device major:minor represents device number. And you can remove weight for a given device as following: #echo "major:minor 0" > blkio.weight_device V1->V2 changes: - use user interface "weight_device" instead of "policy" suggested by Vivek - rename some struct suggested by Vivek - rebase to 2.6-block "for-linus" branch - remove an useless list_empty check pointed out by Li Zefan - some trivial typo fix V2->V3 changes: - Move policy_*_node() functions up to get rid of forward declarations - rename related functions by adding prefix "blkio_" Signed-off-by: Gui Jianfeng Acked-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index b6e095c7ef5e..91af2f2e59ce 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -952,7 +952,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) if (!cfqg) goto done; - cfqg->weight = blkcg->weight; for_each_cfqg_st(cfqg, i, j, st) *st = CFQ_RB_ROOT; RB_CLEAR_NODE(&cfqg->rb_node); @@ -970,6 +969,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, MKDEV(major, minor)); + cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); /* Add group on cfqd list */ hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); -- cgit v1.2.3 From a11cdaa7af56423a921a8bdad8f5a5f4ddca918a Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Tue, 13 Apr 2010 19:59:17 +0200 Subject: block: Update to io-controller stats Changelog from v1: o Call blkiocg_update_idle_time_stats() at cfq_rq_enqueued() instead of at dispatch time. Changelog from original patchset: (in response to Vivek Goyal's comments) o group blkiocg_update_blkio_group_dequeue_stats() with other DEBUG functions o rename blkiocg_update_set_active_queue_stats() to blkiocg_update_avg_queue_size_stats() o s/request/io/ in blkiocg_update_request_add_stats() and blkiocg_update_request_remove_stats() o Call cfq_del_timer() at request dispatch() instead of blkiocg_update_idle_time_stats() Signed-off-by: Divyesh Shah Acked-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 91af2f2e59ce..42be3b68d356 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1381,10 +1381,10 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) { elv_rb_del(&cfqq->sort_list, rq); cfqq->queued[rq_is_sync(rq)]--; - blkiocg_update_request_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), + blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), rq_is_sync(rq)); cfq_add_rq_rb(rq); - blkiocg_update_request_add_stats( + blkiocg_update_io_add_stats( &cfqq->cfqg->blkg, &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq), rq_is_sync(rq)); } @@ -1442,7 +1442,7 @@ static void cfq_remove_request(struct request *rq) cfq_del_rq_rb(rq); cfqq->cfqd->rq_queued--; - blkiocg_update_request_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), + blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), rq_is_sync(rq)); if (rq_is_meta(rq)) { WARN_ON(!cfqq->meta_pending); @@ -1541,7 +1541,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, if (cfqq) { cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", cfqd->serving_prio, cfqd->serving_type); - blkiocg_update_set_active_queue_stats(&cfqq->cfqg->blkg); + blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg); cfqq->slice_start = 0; cfqq->dispatch_start = jiffies; cfqq->allocated_slice = 0; @@ -2395,11 +2395,6 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) } cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); - /* - * This is needed since we don't exactly match the mod_timer() and - * del_timer() calls in CFQ. - */ - blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg); return 1; } @@ -3208,8 +3203,11 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfq_del_timer(cfqd, cfqq); cfq_clear_cfqq_wait_request(cfqq); __blk_run_queue(cfqd->queue); - } else + } else { + blkiocg_update_idle_time_stats( + &cfqq->cfqg->blkg); cfq_mark_cfqq_must_dispatch(cfqq); + } } } else if (cfq_should_preempt(cfqd, cfqq, rq)) { /* @@ -3235,7 +3233,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) list_add_tail(&rq->queuelist, &cfqq->fifo); cfq_add_rq_rb(rq); - blkiocg_update_request_add_stats(&cfqq->cfqg->blkg, + blkiocg_update_io_add_stats(&cfqq->cfqg->blkg, &cfqd->serving_group->blkg, rq_data_dir(rq), rq_is_sync(rq)); cfq_rq_enqueued(cfqd, cfqq, rq); -- cgit v1.2.3 From 28baf44299e0480d66ebb3093de5d51deff04e9f Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Wed, 14 Apr 2010 11:22:38 +0200 Subject: blkio: Fix compile errors Fixes compile errors in blk-cgroup code for empty_time stat and a merge fix in CFQ. The first error was when CONFIG_DEBUG_CFQ_IOSCHED is not set. Signed-off-by: Divyesh Shah Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 9e0df2bdcf21..01771098355d 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2231,7 +2231,7 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) int dispatched = 0; /* Expire the timeslice of the current active queue first */ - cfq_slice_expired(cfqd, 0); + cfq_slice_expired(cfqd, 0, true); while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) { __cfq_set_active_queue(cfqd, cfqq); dispatched += __cfq_forced_dispatch_cfqq(cfqq); -- cgit v1.2.3 From 8d2a91f8960b230b8bbcc4d97ed2015f5271c87d Mon Sep 17 00:00:00 2001 From: Divyesh Shah Date: Fri, 16 Apr 2010 08:10:51 +0200 Subject: blkio: Initialize blkg->stats_lock for the root cfqg too This fixes the lockdep warning reported by Gui Jianfeng. Signed-off-by: Divyesh Shah Reviewed-by: Gui Jianfeng Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 1 - 1 file changed, 1 deletion(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 01771098355d..62defd05518f 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -961,7 +961,6 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) for_each_cfqg_st(cfqg, i, j, st) *st = CFQ_RB_ROOT; RB_CLEAR_NODE(&cfqg->rb_node); - blkio_group_init(&cfqg->blkg); /* * Take the initial reference that will be released on destroy -- cgit v1.2.3 From 7f1dc8a2d2f45fc557b27fd56115338b1d34fc24 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 21 Apr 2010 17:44:16 +0200 Subject: blkio: Fix blkio crash during rq stat update blkio + cfq was crashing even when two sequential readers were put in two separate cgroups (group_isolation=0). The reason being that cfqq can migrate across groups based on its being sync-noidle or not, it can happen that at request insertion time, cfqq belonged to one cfqg and at request dispatch time, it belonged to root group. In this case request stats per cgroup can go wrong and it also runs into BUG_ON(). This patch implements rq stashing away a cfq group pointer and not relying on cfqq->cfqg pointer alone for rq stat accounting. [ 65.163523] ------------[ cut here ]------------ [ 65.164301] kernel BUG at block/blk-cgroup.c:117! [ 65.164301] invalid opcode: 0000 [#1] SMP [ 65.164301] last sysfs file: /sys/devices/pci0000:00/0000:00:05.0/0000:60:00.1/host9/rport-9:0-0/target9:0:0/9:0:0:2/block/sde/stat [ 65.164301] CPU 1 [ 65.164301] Modules linked in: dm_round_robin dm_multipath qla2xxx scsi_transport_fc dm_zero dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan] [ 65.164301] [ 65.164301] Pid: 4505, comm: fio Not tainted 2.6.34-rc4-blk-for-35 #34 0A98h/HP xw8600 Workstation [ 65.164301] RIP: 0010:[] [] blkiocg_update_io_remove_stats+0x5b/0xaf [ 65.164301] RSP: 0018:ffff8800ba5a79e8 EFLAGS: 00010046 [ 65.164301] RAX: 0000000000000096 RBX: ffff8800bb268d60 RCX: 0000000000000000 [ 65.164301] RDX: ffff8800bb268eb8 RSI: 0000000000000000 RDI: ffff8800bb268e00 [ 65.164301] RBP: ffff8800ba5a7a08 R08: 0000000000000064 R09: 0000000000000001 [ 65.164301] R10: 0000000000079640 R11: ffff8800a0bd5bf0 R12: ffff8800bab4af01 [ 65.164301] R13: ffff8800bab4af00 R14: ffff8800bb1d8928 R15: 0000000000000000 [ 65.164301] FS: 00007f18f75056f0(0000) GS:ffff880001e40000(0000) knlGS:0000000000000000 [ 65.164301] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 65.164301] CR2: 000000000040e7f0 CR3: 00000000ba52b000 CR4: 00000000000006e0 [ 65.164301] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 65.164301] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 65.164301] Process fio (pid: 4505, threadinfo ffff8800ba5a6000, task ffff8800ba45ae80) [ 65.164301] Stack: [ 65.164301] ffff8800ba5a7a08 ffff8800ba722540 ffff8800bab4af68 ffff8800bab4af68 [ 65.164301] <0> ffff8800ba5a7a38 ffffffff8121d814 ffff8800ba722540 ffff8800bab4af68 [ 65.164301] <0> ffff8800ba722540 ffff8800a08f6800 ffff8800ba5a7a68 ffffffff8121d8ca [ 65.164301] Call Trace: [ 65.164301] [] cfq_remove_request+0xe4/0x116 [ 65.164301] [] cfq_dispatch_insert+0x84/0xe1 [ 65.164301] [] cfq_dispatch_requests+0x767/0x8e8 [ 65.164301] [] ? submit_bio+0xc3/0xcc [ 65.164301] [] ? sync_page_killable+0x0/0x35 [ 65.164301] [] blk_peek_request+0x191/0x1a7 [ 65.164301] [] ? dm_get_live_table+0x44/0x4f [dm_mod] [ 65.164301] [] dm_request_fn+0x38/0x14c [dm_mod] [ 65.164301] [] ? sync_page_killable+0x0/0x35 [ 65.164301] [] __generic_unplug_device+0x32/0x37 [ 65.164301] [] generic_unplug_device+0x2e/0x3c [ 65.164301] [] dm_unplug_all+0x42/0x5b [dm_mod] [ 65.164301] [] blk_unplug+0x29/0x2d [ 65.164301] [] blk_backing_dev_unplug+0x12/0x14 [ 65.164301] [] block_sync_page+0x35/0x39 [ 65.164301] [] sync_page+0x41/0x4a [ 65.164301] [] sync_page_killable+0xe/0x35 [ 65.164301] [] __wait_on_bit_lock+0x46/0x8f [ 65.164301] [] __lock_page_killable+0x66/0x6d [ 65.164301] [] ? wake_bit_function+0x0/0x33 [ 65.164301] [] lock_page_killable+0x2c/0x2e [ 65.164301] [] generic_file_aio_read+0x361/0x4f0 [ 65.164301] [] do_sync_read+0xcb/0x108 [ 65.164301] [] ? security_file_permission+0x16/0x18 [ 65.164301] [] vfs_read+0xab/0x108 [ 65.164301] [] sys_read+0x4a/0x6e [ 65.164301] [] system_call_fastpath+0x16/0x1b [ 65.164301] Code: 00 74 1c 48 8b 8b 60 01 00 00 48 85 c9 75 04 0f 0b eb fe 48 ff c9 48 89 8b 60 01 00 00 eb 1a 48 8b 8b 58 01 00 00 48 85 c9 75 04 <0f> 0b eb fe 48 ff c9 48 89 8b 58 01 00 00 45 84 e4 74 16 48 8b [ 65.164301] RIP [] blkiocg_update_io_remove_stats+0x5b/0xaf [ 65.164301] RSP [ 65.164301] ---[ end trace 1b2b828753032e68 ]--- Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 62defd05518f..d5927b53020e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -55,6 +55,7 @@ static const int cfq_hist_divisor = 4; #define RQ_CIC(rq) \ ((struct cfq_io_context *) (rq)->elevator_private) #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) +#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3) static struct kmem_cache *cfq_pool; static struct kmem_cache *cfq_ioc_pool; @@ -1001,6 +1002,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) return cfqg; } +static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) +{ + atomic_inc(&cfqg->ref); + return cfqg; +} + static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { /* Currently, all async queues are mapped to root group */ @@ -1084,6 +1091,12 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) { return &cfqd->root_group; } + +static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) +{ + return NULL; +} + static inline void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { cfqq->cfqg = cfqg; @@ -1386,12 +1399,12 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq) { elv_rb_del(&cfqq->sort_list, rq); cfqq->queued[rq_is_sync(rq)]--; - blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), + blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); cfq_add_rq_rb(rq); - blkiocg_update_io_add_stats( - &cfqq->cfqg->blkg, &cfqq->cfqd->serving_group->blkg, - rq_data_dir(rq), rq_is_sync(rq)); + blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg, + &cfqq->cfqd->serving_group->blkg, rq_data_dir(rq), + rq_is_sync(rq)); } static struct request * @@ -1447,7 +1460,7 @@ static void cfq_remove_request(struct request *rq) cfq_del_rq_rb(rq); cfqq->cfqd->rq_queued--; - blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq), + blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); if (rq_is_meta(rq)) { WARN_ON(!cfqq->meta_pending); @@ -1483,8 +1496,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req, static void cfq_bio_merged(struct request_queue *q, struct request *req, struct bio *bio) { - struct cfq_queue *cfqq = RQ_CFQQ(req); - blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, bio_data_dir(bio), + blkiocg_update_io_merged_stats(&(RQ_CFQG(req))->blkg, bio_data_dir(bio), cfq_bio_sync(bio)); } @@ -1505,7 +1517,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, if (cfqq->next_rq == next) cfqq->next_rq = rq; cfq_remove_request(next); - blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, rq_data_dir(next), + blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(next), rq_is_sync(next)); } @@ -3240,8 +3252,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]); list_add_tail(&rq->queuelist, &cfqq->fifo); cfq_add_rq_rb(rq); - - blkiocg_update_io_add_stats(&cfqq->cfqg->blkg, + blkiocg_update_io_add_stats(&(RQ_CFQG(rq))->blkg, &cfqd->serving_group->blkg, rq_data_dir(rq), rq_is_sync(rq)); cfq_rq_enqueued(cfqd, cfqq, rq); @@ -3472,6 +3483,10 @@ static void cfq_put_request(struct request *rq) rq->elevator_private = NULL; rq->elevator_private2 = NULL; + /* Put down rq reference on cfqg */ + cfq_put_cfqg(RQ_CFQG(rq)); + rq->elevator_private3 = NULL; + cfq_put_queue(cfqq); } } @@ -3560,6 +3575,7 @@ new_queue: rq->elevator_private = cic; rq->elevator_private2 = cfqq; + rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); return 0; queue_fail: -- cgit v1.2.3 From e5ff082e8a68d9a6874990597497c7e6a96ad752 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 26 Apr 2010 19:25:11 +0200 Subject: blkio: Fix another BUG_ON() crash due to cfqq movement across groups o Once in a while, I was hitting a BUG_ON() in blkio code. empty_time was assuming that upon slice expiry, group can't be marked empty already (except forced dispatch). But this assumption is broken if cfqq can move (group_isolation=0) across groups after receiving a request. I think most likely in this case we got a request in a cfqq and accounted the rq in one group, later while adding the cfqq to tree, we moved the queue to a different group which was already marked empty and after dispatch from slice we found group already marked empty and raised alarm. This patch does not error out if group is already marked empty. This can introduce some empty_time stat error only in case of group_isolation=0. This is better than crashing. In case of group_isolation=1 we should still get same stats as before this patch. [ 222.308546] ------------[ cut here ]------------ [ 222.309311] kernel BUG at block/blk-cgroup.c:236! [ 222.309311] invalid opcode: 0000 [#1] SMP [ 222.309311] last sysfs file: /sys/devices/virtual/block/dm-3/queue/scheduler [ 222.309311] CPU 1 [ 222.309311] Modules linked in: dm_round_robin dm_multipath qla2xxx scsi_transport_fc dm_zero dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan] [ 222.309311] [ 222.309311] Pid: 4780, comm: fio Not tainted 2.6.34-rc4-blkio-config #68 0A98h/HP xw8600 Workstation [ 222.309311] RIP: 0010:[] [] blkiocg_set_start_empty_time+0x50/0x83 [ 222.309311] RSP: 0018:ffff8800ba6e79f8 EFLAGS: 00010002 [ 222.309311] RAX: 0000000000000082 RBX: ffff8800a13b7990 RCX: ffff8800a13b7808 [ 222.309311] RDX: 0000000000002121 RSI: 0000000000000082 RDI: ffff8800a13b7a30 [ 222.309311] RBP: ffff8800ba6e7a18 R08: 0000000000000000 R09: 0000000000000001 [ 222.309311] R10: 000000000002f8c8 R11: ffff8800ba6e7ad8 R12: ffff8800a13b78ff [ 222.309311] R13: ffff8800a13b7990 R14: 0000000000000001 R15: ffff8800a13b7808 [ 222.309311] FS: 00007f3beec476f0(0000) GS:ffff880001e40000(0000) knlGS:0000000000000000 [ 222.309311] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 222.309311] CR2: 000000000040e7f0 CR3: 00000000a12d5000 CR4: 00000000000006e0 [ 222.309311] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 222.309311] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 222.309311] Process fio (pid: 4780, threadinfo ffff8800ba6e6000, task ffff8800b3d6bf00) [ 222.309311] Stack: [ 222.309311] 0000000000000001 ffff8800bab17a48 ffff8800bab17a48 ffff8800a13b7800 [ 222.309311] <0> ffff8800ba6e7a68 ffffffff8121da35 ffff880000000001 00ff8800ba5c5698 [ 222.309311] <0> ffff8800ba6e7a68 ffff8800a13b7800 0000000000000000 ffff8800bab17a48 [ 222.309311] Call Trace: [ 222.309311] [] __cfq_slice_expired+0x2af/0x3ec [ 222.309311] [] cfq_dispatch_requests+0x2c8/0x8e8 [ 222.309311] [] ? spin_unlock_irqrestore+0xe/0x10 [ 222.309311] [] ? blk_insert_cloned_request+0x70/0x7b [ 222.309311] [] blk_peek_request+0x191/0x1a7 [ 222.309311] [] dm_request_fn+0x38/0x14c [dm_mod] [ 222.309311] [] ? sync_page_killable+0x0/0x35 [ 222.309311] [] __generic_unplug_device+0x32/0x37 [ 222.309311] [] generic_unplug_device+0x2e/0x3c [ 222.309311] [] dm_unplug_all+0x42/0x5b [dm_mod] [ 222.309311] [] blk_unplug+0x29/0x2d [ 222.309311] [] blk_backing_dev_unplug+0x12/0x14 [ 222.309311] [] block_sync_page+0x35/0x39 [ 222.309311] [] sync_page+0x41/0x4a [ 222.309311] [] sync_page_killable+0xe/0x35 [ 222.309311] [] __wait_on_bit_lock+0x46/0x8f [ 222.309311] [] __lock_page_killable+0x66/0x6d [ 222.309311] [] ? wake_bit_function+0x0/0x33 [ 222.309311] [] lock_page_killable+0x2c/0x2e [ 222.309311] [] generic_file_aio_read+0x361/0x4f0 [ 222.309311] [] do_sync_read+0xcb/0x108 [ 222.309311] [] ? security_file_permission+0x16/0x18 [ 222.309311] [] vfs_read+0xab/0x108 [ 222.309311] [] sys_read+0x4a/0x6e [ 222.309311] [] system_call_fastpath+0x16/0x1b [ 222.309311] Code: 58 01 00 00 00 48 89 c6 75 0a 48 83 bb 60 01 00 00 00 74 09 48 8d bb a0 00 00 00 eb 35 41 fe cc 74 0d f6 83 c0 01 00 00 04 74 04 <0f> 0b eb fe 48 89 75 e8 e8 be e0 de ff 66 83 8b c0 01 00 00 04 [ 222.309311] RIP [] blkiocg_set_start_empty_time+0x50/0x83 [ 222.309311] RSP [ 222.309311] ---[ end trace 32b4f71dffc15712 ]--- Signed-off-by: Vivek Goyal Acked-by: Divyesh Shah Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index d5927b53020e..002a5b621653 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -888,7 +888,7 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) } static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, - struct cfq_queue *cfqq, bool forced) + struct cfq_queue *cfqq) { struct cfq_rb_root *st = &cfqd->grp_service_tree; unsigned int used_sl, charge_sl; @@ -918,7 +918,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, st->min_vdisktime); blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); - blkiocg_set_start_empty_time(&cfqg->blkg, forced); + blkiocg_set_start_empty_time(&cfqg->blkg); } #ifdef CONFIG_CFQ_GROUP_IOSCHED @@ -1582,7 +1582,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, */ static void __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, - bool timed_out, bool forced) + bool timed_out) { cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out); @@ -1609,7 +1609,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); } - cfq_group_served(cfqd, cfqq->cfqg, cfqq, forced); + cfq_group_served(cfqd, cfqq->cfqg, cfqq); if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list)) cfq_del_cfqq_rr(cfqd, cfqq); @@ -1628,13 +1628,12 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, } } -static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out, - bool forced) +static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) { struct cfq_queue *cfqq = cfqd->active_queue; if (cfqq) - __cfq_slice_expired(cfqd, cfqq, timed_out, forced); + __cfq_slice_expired(cfqd, cfqq, timed_out); } /* @@ -2202,7 +2201,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) } expire: - cfq_slice_expired(cfqd, 0, false); + cfq_slice_expired(cfqd, 0); new_queue: /* * Current queue expired. Check if we have to switch to a new @@ -2228,7 +2227,7 @@ static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) BUG_ON(!list_empty(&cfqq->fifo)); /* By default cfqq is not expired if it is empty. Do it explicitly */ - __cfq_slice_expired(cfqq->cfqd, cfqq, 0, true); + __cfq_slice_expired(cfqq->cfqd, cfqq, 0); return dispatched; } @@ -2242,7 +2241,7 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) int dispatched = 0; /* Expire the timeslice of the current active queue first */ - cfq_slice_expired(cfqd, 0, true); + cfq_slice_expired(cfqd, 0); while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) { __cfq_set_active_queue(cfqd, cfqq); dispatched += __cfq_forced_dispatch_cfqq(cfqq); @@ -2411,7 +2410,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || cfq_class_idle(cfqq))) { cfqq->slice_end = jiffies + 1; - cfq_slice_expired(cfqd, 0, false); + cfq_slice_expired(cfqd, 0); } cfq_log_cfqq(cfqd, cfqq, "dispatched a request"); @@ -2442,7 +2441,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) orig_cfqg = cfqq->orig_cfqg; if (unlikely(cfqd->active_queue == cfqq)) { - __cfq_slice_expired(cfqd, cfqq, 0, false); + __cfq_slice_expired(cfqd, cfqq, 0); cfq_schedule_dispatch(cfqd); } @@ -2543,7 +2542,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq) struct cfq_queue *__cfqq, *next; if (unlikely(cfqq == cfqd->active_queue)) { - __cfq_slice_expired(cfqd, cfqq, 0, false); + __cfq_slice_expired(cfqd, cfqq, 0); cfq_schedule_dispatch(cfqd); } @@ -3172,7 +3171,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) { cfq_log_cfqq(cfqd, cfqq, "preempt"); - cfq_slice_expired(cfqd, 1, false); + cfq_slice_expired(cfqd, 1); /* * Put the new queue at the front of the of the current list, @@ -3383,7 +3382,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) * - when there is a close cooperator */ if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) - cfq_slice_expired(cfqd, 1, false); + cfq_slice_expired(cfqd, 1); else if (sync && cfqq_empty && !cfq_close_cooperator(cfqd, cfqq)) { cfqd->noidle_tree_requires_idle |= !rq_noidle(rq); @@ -3648,7 +3647,7 @@ static void cfq_idle_slice_timer(unsigned long data) cfq_clear_cfqq_deep(cfqq); } expire: - cfq_slice_expired(cfqd, timed_out, false); + cfq_slice_expired(cfqd, timed_out); out_kick: cfq_schedule_dispatch(cfqd); out_cont: @@ -3691,7 +3690,7 @@ static void cfq_exit_queue(struct elevator_queue *e) spin_lock_irq(q->queue_lock); if (cfqd->active_queue) - __cfq_slice_expired(cfqd, cfqd->active_queue, 0, false); + __cfq_slice_expired(cfqd, cfqd->active_queue, 0); while (!list_empty(&cfqd->cic_list)) { struct cfq_io_context *cic = list_entry(cfqd->cic_list.next, -- cgit v1.2.3 From afc24d49c1e5dbeef745c1c1246f5ae6ebd97c71 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Mon, 26 Apr 2010 19:27:56 +0200 Subject: blk-cgroup: config options re-arrangement This patch fixes few usability and configurability issues. o All the cgroup based controller options are configurable from "Genral Setup/Control Group Support/" menu. blkio is the only exception. Hence make this option visible in above menu and make it configurable from there to bring it inline with rest of the cgroup based controllers. o Get rid of CONFIG_DEBUG_CFQ_IOSCHED. This option currently does two things. - Enable printing of cgroup paths in blktrace - Enables CONFIG_DEBUG_BLK_CGROUP, which in turn displays additional stat files in cgroup. If we are using group scheduling, blktrace data is of not really much use if cgroup information is not present. To get this data, currently one has to also enable CONFIG_DEBUG_CFQ_IOSCHED, which in turn brings the overhead of all the additional debug stat files which is not desired. Hence, this patch moves printing of cgroup paths under CONFIG_CFQ_GROUP_IOSCHED. This allows us to get rid of CONFIG_DEBUG_CFQ_IOSCHED completely. Now all the debug stat files are controlled only by CONFIG_DEBUG_BLK_CGROUP which can be enabled through config menu. Signed-off-by: Vivek Goyal Acked-by: Divyesh Shah Reviewed-by: Gui Jianfeng Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 002a5b621653..286008cf889e 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -345,7 +345,7 @@ CFQ_CFQQ_FNS(deep); CFQ_CFQQ_FNS(wait_busy); #undef CFQ_CFQQ_FNS -#ifdef CONFIG_DEBUG_CFQ_IOSCHED +#ifdef CONFIG_CFQ_GROUP_IOSCHED #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ -- cgit v1.2.3 From 50eaeb323a170e231263ccb433bb2f99bd9e27ac Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Wed, 28 Apr 2010 19:50:33 +0200 Subject: cfq-iosched: fix broken cfq_ref_get_cfqf() for CONFIG_BLK_CGROUP=y && CFQ_GROUP_IOSCHED=n We should return the cfq_group for this case, not NULL. Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 286008cf889e..0f3eb70f9ce1 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1094,7 +1094,7 @@ static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) { - return NULL; + return cfqg; } static inline void -- cgit v1.2.3 From f1ac2502e19c59e996242d406fcc60e4c563e8ce Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 19 May 2010 08:27:30 +0200 Subject: block: remove all rcu head initializations Remove all rcu head inits. We don't care about the RCU head state before passing it to call_rcu() anyway. Only leave the "on_stack" variants so debugobjects can keep track of objects on stack. Signed-off-by: Mathieu Desnoyers Signed-off-by: Paul E. McKenney Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 1 - 1 file changed, 1 deletion(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 0f3eb70f9ce1..b1472bc2d49c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3788,7 +3788,6 @@ static void *cfq_init_queue(struct request_queue *q) * second, in order to have larger depth for async operations. */ cfqd->last_delayed_sync = jiffies - HZ; - INIT_RCU_HEAD(&cfqd->rcu); return cfqd; } -- cgit v1.2.3