From e9bb8fb0b6d61a822201537b25206a0ca34b9d1d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 21 Sep 2008 22:36:49 -0700 Subject: aoe: Use SKB interfaces for list management instead of home-grown stuff. Signed-off-by: David S. Miller --- drivers/block/aoe/aoecmd.c | 85 +++++++++++++++++----------------------------- 1 file changed, 32 insertions(+), 53 deletions(-) (limited to 'drivers/block/aoe/aoecmd.c') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 2f1746295d06..e33da30be4c4 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -114,29 +114,22 @@ ifrotate(struct aoetgt *t) static void skb_pool_put(struct aoedev *d, struct sk_buff *skb) { - if (!d->skbpool_hd) - d->skbpool_hd = skb; - else - d->skbpool_tl->next = skb; - d->skbpool_tl = skb; + __skb_queue_tail(&d->skbpool, skb); } static struct sk_buff * skb_pool_get(struct aoedev *d) { - struct sk_buff *skb; + struct sk_buff *skb = skb_peek(&d->skbpool); - skb = d->skbpool_hd; if (skb && atomic_read(&skb_shinfo(skb)->dataref) == 1) { - d->skbpool_hd = skb->next; - skb->next = NULL; + __skb_unlink(skb, &d->skbpool); return skb; } - if (d->nskbpool < NSKBPOOLMAX - && (skb = new_skb(ETH_ZLEN))) { - d->nskbpool++; + if (skb_queue_len(&d->skbpool) < NSKBPOOLMAX && + (skb = new_skb(ETH_ZLEN))) return skb; - } + return NULL; } @@ -293,29 +286,22 @@ aoecmd_ata_rw(struct aoedev *d) skb->dev = t->ifp->nd; skb = skb_clone(skb, GFP_ATOMIC); - if (skb) { - if (d->sendq_hd) - d->sendq_tl->next = skb; - else - d->sendq_hd = skb; - d->sendq_tl = skb; - } + if (skb) + __skb_queue_tail(&d->sendq, skb); return 1; } /* some callers cannot sleep, and they can call this function, * transmitting the packets later, when interrupts are on */ -static struct sk_buff * -aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail) +static void +aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue) { struct aoe_hdr *h; struct aoe_cfghdr *ch; - struct sk_buff *skb, *sl, *sl_tail; + struct sk_buff *skb; struct net_device *ifp; - sl = sl_tail = NULL; - read_lock(&dev_base_lock); for_each_netdev(&init_net, ifp) { dev_hold(ifp); @@ -329,8 +315,7 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail) } skb_put(skb, sizeof *h + sizeof *ch); skb->dev = ifp; - if (sl_tail == NULL) - sl_tail = skb; + __skb_queue_tail(queue, skb); h = (struct aoe_hdr *) skb_mac_header(skb); memset(h, 0, sizeof *h + sizeof *ch); @@ -342,16 +327,10 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail) h->minor = aoeminor; h->cmd = AOECMD_CFG; - skb->next = sl; - sl = skb; cont: dev_put(ifp); } read_unlock(&dev_base_lock); - - if (tail != NULL) - *tail = sl_tail; - return sl; } static void @@ -406,11 +385,7 @@ resend(struct aoedev *d, struct aoetgt *t, struct frame *f) skb = skb_clone(skb, GFP_ATOMIC); if (skb == NULL) return; - if (d->sendq_hd) - d->sendq_tl->next = skb; - else - d->sendq_hd = skb; - d->sendq_tl = skb; + __skb_queue_tail(&d->sendq, skb); } static int @@ -508,16 +483,15 @@ ata_scnt(unsigned char *packet) { static void rexmit_timer(ulong vp) { + struct sk_buff_head queue; struct aoedev *d; struct aoetgt *t, **tt, **te; struct aoeif *ifp; struct frame *f, *e; - struct sk_buff *sl; register long timeout; ulong flags, n; d = (struct aoedev *) vp; - sl = NULL; /* timeout is always ~150% of the moving average */ timeout = d->rttavg; @@ -589,7 +563,7 @@ rexmit_timer(ulong vp) } } - if (d->sendq_hd) { + if (!skb_queue_empty(&d->sendq)) { n = d->rttavg <<= 1; if (n > MAXTIMER) d->rttavg = MAXTIMER; @@ -600,15 +574,15 @@ rexmit_timer(ulong vp) aoecmd_work(d); } - sl = d->sendq_hd; - d->sendq_hd = d->sendq_tl = NULL; + __skb_queue_head_init(&queue); + skb_queue_splice_init(&d->sendq, &queue); d->timer.expires = jiffies + TIMERTICK; add_timer(&d->timer); spin_unlock_irqrestore(&d->lock, flags); - aoenet_xmit(sl); + aoenet_xmit(&queue); } /* enters with d->lock held */ @@ -767,12 +741,12 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector void aoecmd_ata_rsp(struct sk_buff *skb) { + struct sk_buff_head queue; struct aoedev *d; struct aoe_hdr *hin, *hout; struct aoe_atahdr *ahin, *ahout; struct frame *f; struct buf *buf; - struct sk_buff *sl; struct aoetgt *t; struct aoeif *ifp; register long n; @@ -893,21 +867,21 @@ aoecmd_ata_rsp(struct sk_buff *skb) aoecmd_work(d); xmit: - sl = d->sendq_hd; - d->sendq_hd = d->sendq_tl = NULL; + __skb_queue_head_init(&queue); + skb_queue_splice_init(&d->sendq, &queue); spin_unlock_irqrestore(&d->lock, flags); - aoenet_xmit(sl); + aoenet_xmit(&queue); } void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) { - struct sk_buff *sl; - - sl = aoecmd_cfg_pkts(aoemajor, aoeminor, NULL); + struct sk_buff_head queue; - aoenet_xmit(sl); + __skb_queue_head_init(&queue); + aoecmd_cfg_pkts(aoemajor, aoeminor, &queue); + aoenet_xmit(&queue); } struct sk_buff * @@ -1076,7 +1050,12 @@ aoecmd_cfg_rsp(struct sk_buff *skb) spin_unlock_irqrestore(&d->lock, flags); - aoenet_xmit(sl); + if (sl) { + struct sk_buff_head queue; + __skb_queue_head_init(&queue); + __skb_queue_tail(&queue, sl); + aoenet_xmit(&queue); + } } void -- cgit v1.2.3 From 310a2c1012934f590192377f65940cad4aa72b15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:17 +0900 Subject: block: misc updates This patch makes the following misc updates in preparation for disk->part dereference fix and extended block devt support. * implment part_to_disk() * fix comment about gendisk->part indexing * rename get_part() to disk_map_sector() * don't use n which is always zero while printing disk information in diskstats_show() Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/aoe/aoecmd.c') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 2f1746295d06..885d1409521f 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -757,7 +757,7 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector const int rw = bio_data_dir(bio); struct hd_struct *part; - part = get_part(disk, sector); + part = disk_map_sector(disk, sector); all_stat_inc(disk, part, ios[rw], sector); all_stat_add(disk, part, ticks[rw], duration, sector); all_stat_add(disk, part, sectors[rw], n_sect, sector); -- cgit v1.2.3 From e71bf0d0ee89e51b92776391c5634938236977d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Sep 2008 09:03:02 +0200 Subject: block: fix disk->part[] dereferencing race disk->part[] is protected by its matching bdev's lock. However, non-critical accesses like collecting stats and printing out sysfs and proc information used to be performed without any locking. As partitions can come and go dynamically, partitions can go away underneath those non-critical accesses. As some of those accesses are writes, this theoretically can lead to silent corruption. This patch fixes the race by using RCU for the partition array and dev reference counter to hold partitions. * Rename disk->part[] to disk->__part[] to make sure no one outside genhd layer proper accesses it directly. * Use RCU for disk->__part[] dereferencing. * Implement disk_{get|put}_part() which can be used to get and put partitions from gendisk respectively. * Iterators are implemented to help iterate through all partitions safely. * Functions which require RCU readlock are marked with _rcu suffix. * Use disk_put_part() in __blkdev_put() instead of directly putting the contained kobject. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/block/aoe/aoecmd.c') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 885d1409521f..84c03d65dcc5 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -757,11 +757,15 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector const int rw = bio_data_dir(bio); struct hd_struct *part; - part = disk_map_sector(disk, sector); + rcu_read_lock(); + + part = disk_map_sector_rcu(disk, sector); all_stat_inc(disk, part, ios[rw], sector); all_stat_add(disk, part, ticks[rw], duration, sector); all_stat_add(disk, part, sectors[rw], n_sect, sector); all_stat_add(disk, part, io_ticks, duration, sector); + + rcu_read_unlock(); } void -- cgit v1.2.3 From c9959059161ddd7bf4670cf47367033d6b2f79c4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:47:21 +0900 Subject: block: fix diskstats access There are two variants of stat functions - ones prefixed with double underbars which don't care about preemption and ones without which disable preemption before manipulating per-cpu counters. It's unclear whether the underbarred ones assume that preemtion is disabled on entry as some callers don't do that. This patch unifies diskstats access by implementing disk_stat_lock() and disk_stat_unlock() which take care of both RCU (for partition access) and preemption (for per-cpu counter access). diskstats access should always be enclosed between the two functions. As such, there's no need for the versions which disables preemption. They're removed and double underbars ones are renamed to drop the underbars. As an extra argument is added, there's no danger of using the old version unconverted. disk_stat_lock() uses get_cpu() and returns the cpu index and all diskstat functions which access per-cpu counters now has @cpu argument to help RT. This change adds RCU or preemption operations at some places but also collapses several preemption ops into one at others. Overall, the performance difference should be negligible as all involved ops are very lightweight per-cpu ones. Signed-off-by: Tejun Heo Cc: Peter Zijlstra Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/block/aoe/aoecmd.c') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 84c03d65dcc5..17eed8c025d0 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -756,16 +756,17 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector unsigned long n_sect = bio->bi_size >> 9; const int rw = bio_data_dir(bio); struct hd_struct *part; + int cpu; - rcu_read_lock(); - + cpu = disk_stat_lock(); part = disk_map_sector_rcu(disk, sector); - all_stat_inc(disk, part, ios[rw], sector); - all_stat_add(disk, part, ticks[rw], duration, sector); - all_stat_add(disk, part, sectors[rw], n_sect, sector); - all_stat_add(disk, part, io_ticks, duration, sector); - rcu_read_unlock(); + all_stat_inc(cpu, disk, part, ios[rw], sector); + all_stat_add(cpu, disk, part, ticks[rw], duration, sector); + all_stat_add(cpu, disk, part, sectors[rw], n_sect, sector); + all_stat_add(cpu, disk, part, io_ticks, duration, sector); + + disk_stat_unlock(); } void -- cgit v1.2.3 From 80795aefb76d10c5d698e60c7e7750b5330787da Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:07 +0900 Subject: block: move capacity from disk to part0 Move disk->capacity to part0->nr_sects and convert all users who directly accessed the field to use {get|set}_capacity(). This is done early to allow the __dev field to be moved. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block/aoe/aoecmd.c') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 17eed8c025d0..934800f979c9 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -645,7 +645,7 @@ aoecmd_sleepwork(struct work_struct *work) unsigned long flags; u64 ssize; - ssize = d->gd->capacity; + ssize = get_capacity(d->gd); bd = bdget_disk(d->gd, 0); if (bd) { @@ -707,7 +707,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) return; if (d->gd != NULL) { - d->gd->capacity = ssize; + set_capacity(d->gd, ssize); d->flags |= DEVFL_NEWSIZE; } else d->flags |= DEVFL_GDALLOC; -- cgit v1.2.3 From 074a7aca7afa6f230104e8e65eba3420263714a5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Aug 2008 19:56:14 +0900 Subject: block: move stats from disk to part0 Move stats related fields - stamp, in_flight, dkstats - from disk to part0 and unify stat handling such that... * part_stat_*() now updates part0 together if the specified partition is not part0. ie. part_stat_*() are now essentially all_stat_*(). * {disk|all}_stat_*() are gone. * part_round_stats() is updated similary. It handles part0 stats automatically and disk_round_stats() is killed. * part_{inc|dec}_in_fligh() is implemented which automatically updates part0 stats for parts other than part0. * disk_map_sector_rcu() is updated to return part0 if no part matches. Combined with the above changes, this makes NULL special case handling in callers unnecessary. * Separate stats show code paths for disk are collapsed into part stats show code paths. * Rename disk_stat_lock/unlock() to part_stat_lock/unlock() While at it, reposition stat handling macros a bit and add missing parentheses around macro parameters. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- drivers/block/aoe/aoecmd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/block/aoe/aoecmd.c') diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 934800f979c9..961d29a53cab 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -758,15 +758,15 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector struct hd_struct *part; int cpu; - cpu = disk_stat_lock(); + cpu = part_stat_lock(); part = disk_map_sector_rcu(disk, sector); - all_stat_inc(cpu, disk, part, ios[rw], sector); - all_stat_add(cpu, disk, part, ticks[rw], duration, sector); - all_stat_add(cpu, disk, part, sectors[rw], n_sect, sector); - all_stat_add(cpu, disk, part, io_ticks, duration, sector); + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_stat_add(cpu, part, sectors[rw], n_sect); + part_stat_add(cpu, part, io_ticks, duration); - disk_stat_unlock(); + part_stat_unlock(); } void -- cgit v1.2.3