Переглянути джерело

Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
  Enhanced partition statistics: documentation update
  Enhanced partition statistics: remove old partition statistics
  Enhanced partition statistics: procfs
  Enhanced partition statistics: sysfs
  Enhanced partition statistics: aoe fix
  Enhanced partition statistics: update partition statitics
  Enhanced partition statistics: core statistics
  block: fixup rq_init() a bit

Manually fixed conflict in drivers/block/aoe/aoecmd.c due to statistics
support.
Linus Torvalds 17 роки тому
батько
коміт
03054de1e0

+ 22 - 0
Documentation/ABI/testing/procfs-diskstats

@@ -0,0 +1,22 @@
+What:		/proc/diskstats
+Date:		February 2008
+Contact:	Jerome Marchand <jmarchan@redhat.com>
+Description:
+		The /proc/diskstats file displays the I/O statistics
+		of block devices. Each line contains the following 14
+		fields:
+		 1 - major number
+		 2 - minor mumber
+		 3 - device name
+		 4 - reads completed succesfully
+		 5 - reads merged
+		 6 - sectors read
+		 7 - time spent reading (ms)
+		 8 - writes completed
+		 9 - writes merged
+		10 - sectors written
+		11 - time spent writing (ms)
+		12 - I/Os currently in progress
+		13 - time spent doing I/Os (ms)
+		14 - weighted time spent doing I/Os (ms)
+		For more details refer to Documentation/iostats.txt

+ 28 - 0
Documentation/ABI/testing/sysfs-block

@@ -0,0 +1,28 @@
+What:		/sys/block/<disk>/stat
+Date:		February 2008
+Contact:	Jerome Marchand <jmarchan@redhat.com>
+Description:
+		The /sys/block/<disk>/stat files displays the I/O
+		statistics of disk <disk>. They contain 11 fields:
+		 1 - reads completed succesfully
+		 2 - reads merged
+		 3 - sectors read
+		 4 - time spent reading (ms)
+		 5 - writes completed
+		 6 - writes merged
+		 7 - sectors written
+		 8 - time spent writing (ms)
+		 9 - I/Os currently in progress
+		10 - time spent doing I/Os (ms)
+		11 - weighted time spent doing I/Os (ms)
+		For more details refer Documentation/iostats.txt
+
+
+What:		/sys/block/<disk>/<part>/stat
+Date:		February 2008
+Contact:	Jerome Marchand <jmarchan@redhat.com>
+Description:
+		The /sys/block/<disk>/<part>/stat files display the
+		I/O statistics of partition <part>. The format is the
+		same as the above-written /sys/block/<disk>/stat
+		format.

+ 14 - 1
Documentation/iostats.txt

@@ -58,7 +58,7 @@ they should not wrap twice before you notice them.
 Each set of stats only applies to the indicated device; if you want
 system-wide stats you'll have to find all the devices and sum them all up.
 
-Field  1 -- # of reads issued
+Field  1 -- # of reads completed
     This is the total number of reads completed successfully.
 Field  2 -- # of reads merged, field 6 -- # of writes merged
     Reads and writes which are adjacent to each other may be merged for
@@ -132,6 +132,19 @@ words, the number of reads for partitions is counted slightly before time
 of queuing for partitions, and at completion for whole disks.  This is
 a subtle distinction that is probably uninteresting for most cases.
 
+More significant is the error induced by counting the numbers of
+reads/writes before merges for partitions and after for disks. Since a
+typical workload usually contains a lot of successive and adjacent requests,
+the number of reads/writes issued can be several times higher than the
+number of reads/writes completed.
+
+In 2.6.25, the full statistic set is again available for partitions and
+disk and partition statistics are consistent again. Since we still don't
+keep record of the partition-relative address, an operation is attributed to
+the partition which contains the first sector of the request after the
+eventual merges. As requests can be merged across partition, this could lead
+to some (probably insignificant) innacuracy.
+
 Additional notes
 ----------------
 

+ 47 - 14
block/blk-core.c

@@ -60,10 +60,15 @@ static void drive_stat_acct(struct request *rq, int new_io)
 		return;
 
 	if (!new_io) {
-		__disk_stat_inc(rq->rq_disk, merges[rw]);
+		__all_stat_inc(rq->rq_disk, merges[rw], rq->sector);
 	} else {
+		struct hd_struct *part = get_part(rq->rq_disk, rq->sector);
 		disk_round_stats(rq->rq_disk);
 		rq->rq_disk->in_flight++;
+		if (part) {
+			part_round_stats(part);
+			part->in_flight++;
+		}
 	}
 }
 
@@ -102,27 +107,38 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 
+/*
+ * We can't just memset() the structure, since the allocation path
+ * already stored some information in the request.
+ */
 void rq_init(struct request_queue *q, struct request *rq)
 {
 	INIT_LIST_HEAD(&rq->queuelist);
 	INIT_LIST_HEAD(&rq->donelist);
-
-	rq->errors = 0;
+	rq->q = q;
+	rq->sector = rq->hard_sector = (sector_t) -1;
+	rq->nr_sectors = rq->hard_nr_sectors = 0;
+	rq->current_nr_sectors = rq->hard_cur_sectors = 0;
 	rq->bio = rq->biotail = NULL;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
+	rq->rq_disk = NULL;
+	rq->nr_phys_segments = 0;
+	rq->nr_hw_segments = 0;
 	rq->ioprio = 0;
+	rq->special = NULL;
 	rq->buffer = NULL;
+	rq->tag = -1;
+	rq->errors = 0;
 	rq->ref_count = 1;
-	rq->q = q;
-	rq->special = NULL;
+	rq->cmd_len = 0;
+	memset(rq->cmd, 0, sizeof(rq->cmd));
 	rq->data_len = 0;
+	rq->sense_len = 0;
 	rq->data = NULL;
-	rq->nr_phys_segments = 0;
 	rq->sense = NULL;
 	rq->end_io = NULL;
 	rq->end_io_data = NULL;
-	rq->completion_data = NULL;
 	rq->next_rq = NULL;
 }
 
@@ -986,6 +1002,21 @@ void disk_round_stats(struct gendisk *disk)
 }
 EXPORT_SYMBOL_GPL(disk_round_stats);
 
+void part_round_stats(struct hd_struct *part)
+{
+	unsigned long now = jiffies;
+
+	if (now == part->stamp)
+		return;
+
+	if (part->in_flight) {
+		__part_stat_add(part, time_in_queue,
+				part->in_flight * (now - part->stamp));
+		__part_stat_add(part, io_ticks, (now - part->stamp));
+	}
+	part->stamp = now;
+}
+
 /*
  * queue lock must be held
  */
@@ -1188,10 +1219,6 @@ static inline void blk_partition_remap(struct bio *bio)
 
 	if (bio_sectors(bio) && bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
-		const int rw = bio_data_dir(bio);
-
-		p->sectors[rw] += bio_sectors(bio);
-		p->ios[rw]++;
 
 		bio->bi_sector += p->start_sect;
 		bio->bi_bdev = bdev->bd_contains;
@@ -1519,7 +1546,8 @@ static int __end_that_request_first(struct request *req, int error,
 	if (blk_fs_request(req) && req->rq_disk) {
 		const int rw = rq_data_dir(req);
 
-		disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9);
+		all_stat_add(req->rq_disk, sectors[rw],
+			     nr_bytes >> 9, req->sector);
 	}
 
 	total_bytes = bio_nbytes = 0;
@@ -1704,11 +1732,16 @@ static void end_that_request_last(struct request *req, int error)
 	if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
+		struct hd_struct *part = get_part(disk, req->sector);
 
-		__disk_stat_inc(disk, ios[rw]);
-		__disk_stat_add(disk, ticks[rw], duration);
+		__all_stat_inc(disk, ios[rw], req->sector);
+		__all_stat_add(disk, ticks[rw], duration, req->sector);
 		disk_round_stats(disk);
 		disk->in_flight--;
+		if (part) {
+			part_round_stats(part);
+			part->in_flight--;
+		}
 	}
 
 	if (req->end_io)

+ 6 - 0
block/blk-merge.c

@@ -454,8 +454,14 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	elv_merge_requests(q, req, next);
 
 	if (req->rq_disk) {
+		struct hd_struct *part
+			= get_part(req->rq_disk, req->sector);
 		disk_round_stats(req->rq_disk);
 		req->rq_disk->in_flight--;
+		if (part) {
+			part_round_stats(part);
+			part->in_flight--;
+		}
 	}
 
 	req->ioprio = ioprio_best(req->ioprio, next->ioprio);

+ 22 - 6
block/genhd.c

@@ -584,12 +584,28 @@ static int diskstats_show(struct seq_file *s, void *v)
 	for (n = 0; n < gp->minors - 1; n++) {
 		struct hd_struct *hd = gp->part[n];
 
-		if (hd && hd->nr_sects)
-			seq_printf(s, "%4d %4d %s %u %u %u %u\n",
-				gp->major, n + gp->first_minor + 1,
-				disk_name(gp, n + 1, buf),
-				hd->ios[0], hd->sectors[0],
-				hd->ios[1], hd->sectors[1]);
+		if (!hd || !hd->nr_sects)
+			continue;
+
+		preempt_disable();
+		part_round_stats(hd);
+		preempt_enable();
+		seq_printf(s, "%4d %4d %s %lu %lu %llu "
+			   "%u %lu %lu %llu %u %u %u %u\n",
+			   gp->major, n + gp->first_minor + 1,
+			   disk_name(gp, n + 1, buf),
+			   part_stat_read(hd, ios[0]),
+			   part_stat_read(hd, merges[0]),
+			   (unsigned long long)part_stat_read(hd, sectors[0]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[0])),
+			   part_stat_read(hd, ios[1]),
+			   part_stat_read(hd, merges[1]),
+			   (unsigned long long)part_stat_read(hd, sectors[1]),
+			   jiffies_to_msecs(part_stat_read(hd, ticks[1])),
+			   hd->in_flight,
+			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
+			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
+			);
 	}
  
 	return 0;

+ 6 - 6
drivers/block/aoe/aoecmd.c

@@ -751,15 +751,15 @@ gettgt(struct aoedev *d, char *addr)
 }
 
 static inline void
-diskstats(struct gendisk *disk, struct bio *bio, ulong duration)
+diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
 {
 	unsigned long n_sect = bio->bi_size >> 9;
 	const int rw = bio_data_dir(bio);
 
-	disk_stat_inc(disk, ios[rw]);
-	disk_stat_add(disk, ticks[rw], duration);
-	disk_stat_add(disk, sectors[rw], n_sect);
-	disk_stat_add(disk, io_ticks, duration);
+	all_stat_inc(disk, ios[rw], sector);
+	all_stat_add(disk, ticks[rw], duration, sector);
+	all_stat_add(disk, sectors[rw], n_sect, sector);
+	all_stat_add(disk, io_ticks, duration, sector);
 }
 
 void
@@ -879,7 +879,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 	}
 
 	if (buf && --buf->nframesout == 0 && buf->resid == 0) {
-		diskstats(d->gd, buf->bio, jiffies - buf->stime);
+		diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
 		n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
 		bio_endio(buf->bio, n);
 		mempool_free(buf, d->bufpool);

+ 26 - 5
fs/partitions/check.c

@@ -18,6 +18,7 @@
 #include <linux/fs.h>
 #include <linux/kmod.h>
 #include <linux/ctype.h>
+#include <linux/genhd.h>
 
 #include "check.h"
 
@@ -215,9 +216,25 @@ static ssize_t part_stat_show(struct device *dev,
 {
 	struct hd_struct *p = dev_to_part(dev);
 
-	return sprintf(buf, "%8u %8llu %8u %8llu\n",
-		       p->ios[0], (unsigned long long)p->sectors[0],
-		       p->ios[1], (unsigned long long)p->sectors[1]);
+	preempt_disable();
+	part_round_stats(p);
+	preempt_enable();
+	return sprintf(buf,
+		"%8lu %8lu %8llu %8u "
+		"%8lu %8lu %8llu %8u "
+		"%8u %8u %8u"
+		"\n",
+		part_stat_read(p, ios[READ]),
+		part_stat_read(p, merges[READ]),
+		(unsigned long long)part_stat_read(p, sectors[READ]),
+		jiffies_to_msecs(part_stat_read(p, ticks[READ])),
+		part_stat_read(p, ios[WRITE]),
+		part_stat_read(p, merges[WRITE]),
+		(unsigned long long)part_stat_read(p, sectors[WRITE]),
+		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
+		p->in_flight,
+		jiffies_to_msecs(part_stat_read(p, io_ticks)),
+		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 }
 
 #ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -273,6 +290,7 @@ static struct attribute_group *part_attr_groups[] = {
 static void part_release(struct device *dev)
 {
 	struct hd_struct *p = dev_to_part(dev);
+	free_part_stats(p);
 	kfree(p);
 }
 
@@ -312,8 +330,7 @@ void delete_partition(struct gendisk *disk, int part)
 	disk->part[part-1] = NULL;
 	p->start_sect = 0;
 	p->nr_sects = 0;
-	p->ios[0] = p->ios[1] = 0;
-	p->sectors[0] = p->sectors[1] = 0;
+	part_stat_set_all(p, 0);
 	kobject_put(p->holder_dir);
 	device_del(&p->dev);
 	put_device(&p->dev);
@@ -336,6 +353,10 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
 	if (!p)
 		return;
 
+	if (!init_part_stats(p)) {
+		kfree(p);
+		return;
+	}
 	p->start_sect = start;
 	p->nr_sects = len;
 	p->partno = part;

+ 3 - 1
include/linux/blkdev.h

@@ -137,7 +137,9 @@ enum rq_flag_bits {
 #define BLK_MAX_CDB	16
 
 /*
- * try to put the fields that are referenced together in the same cacheline
+ * try to put the fields that are referenced together in the same cacheline.
+ * if you modify this structure, be sure to check block/blk-core.c:rq_init()
+ * as well!
  */
 struct request {
 	struct list_head queuelist;

+ 142 - 11
include/linux/genhd.h

@@ -91,15 +91,30 @@ struct partition {
 	__le32 nr_sects;		/* nr of sectors in partition */
 } __attribute__((packed));
 
+struct disk_stats {
+	unsigned long sectors[2];	/* READs and WRITEs */
+	unsigned long ios[2];
+	unsigned long merges[2];
+	unsigned long ticks[2];
+	unsigned long io_ticks;
+	unsigned long time_in_queue;
+};
+	
 struct hd_struct {
 	sector_t start_sect;
 	sector_t nr_sects;
 	struct device dev;
 	struct kobject *holder_dir;
-	unsigned ios[2], sectors[2];	/* READs and WRITEs */
 	int policy, partno;
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	int make_it_fail;
+#endif
+	unsigned long stamp;
+	int in_flight;
+#ifdef	CONFIG_SMP
+	struct disk_stats *dkstats;
+#else
+	struct disk_stats dkstats;
 #endif
 };
 
@@ -111,15 +126,7 @@ struct hd_struct {
 #define GENHD_FL_SUPPRESS_PARTITION_INFO	32
 #define GENHD_FL_FAIL				64
 
-struct disk_stats {
-	unsigned long sectors[2];	/* READs and WRITEs */
-	unsigned long ios[2];
-	unsigned long merges[2];
-	unsigned long ticks[2];
-	unsigned long io_ticks;
-	unsigned long time_in_queue;
-};
-	
+
 struct gendisk {
 	int major;			/* major number of driver */
 	int first_minor;
@@ -158,6 +165,20 @@ struct gendisk {
  * The __ variants should only be called in critical sections. The full
  * variants disable/enable preemption.
  */
+static inline struct hd_struct *get_part(struct gendisk *gendiskp,
+					 sector_t sector)
+{
+	struct hd_struct *part;
+	int i;
+	for (i = 0; i < gendiskp->minors - 1; i++) {
+		part = gendiskp->part[i];
+		if (part && part->start_sect <= sector
+		    && sector < part->start_sect + part->nr_sects)
+			return part;
+	}
+	return NULL;
+}
+
 #ifdef	CONFIG_SMP
 #define __disk_stat_add(gendiskp, field, addnd) 	\
 	(per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd)
@@ -177,15 +198,62 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
 		memset(per_cpu_ptr(gendiskp->dkstats, i), value,
 				sizeof (struct disk_stats));
 }		
+
+#define __part_stat_add(part, field, addnd)				\
+	(per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd)
+
+#define __all_stat_add(gendiskp, field, addnd, sector)		\
+({								\
+	struct hd_struct *part = get_part(gendiskp, sector);	\
+	if (part)						\
+		__part_stat_add(part, field, addnd);		\
+	__disk_stat_add(gendiskp, field, addnd);		\
+})
+
+#define part_stat_read(part, field)					\
+({									\
+	typeof(part->dkstats->field) res = 0;				\
+	int i;								\
+	for_each_possible_cpu(i)					\
+		res += per_cpu_ptr(part->dkstats, i)->field;		\
+	res;								\
+})
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)	{
+	int i;
+	for_each_possible_cpu(i)
+		memset(per_cpu_ptr(part->dkstats, i), value,
+		       sizeof(struct disk_stats));
+}
 				
 #else
 #define __disk_stat_add(gendiskp, field, addnd) \
 				(gendiskp->dkstats.field += addnd)
 #define disk_stat_read(gendiskp, field)	(gendiskp->dkstats.field)
 
-static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
+static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)
+{
 	memset(&gendiskp->dkstats, value, sizeof (struct disk_stats));
 }
+
+#define __part_stat_add(part, field, addnd) \
+	(part->dkstats.field += addnd)
+
+#define __all_stat_add(gendiskp, field, addnd, sector)		\
+({								\
+	struct hd_struct *part = get_part(gendiskp, sector);	\
+	if (part)						\
+		part->dkstats.field += addnd;			\
+	__disk_stat_add(gendiskp, field, addnd);		\
+})
+
+#define part_stat_read(part, field)	(part->dkstats.field)
+
+static inline void part_stat_set_all(struct hd_struct *part, int value)
+{
+	memset(&part->dkstats, value, sizeof(struct disk_stats));
+}
+
 #endif
 
 #define disk_stat_add(gendiskp, field, addnd)			\
@@ -206,6 +274,45 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
 #define disk_stat_sub(gendiskp, field, subnd) \
 		disk_stat_add(gendiskp, field, -subnd)
 
+#define part_stat_add(gendiskp, field, addnd)		\
+	do {						\
+		preempt_disable();			\
+		__part_stat_add(gendiskp, field, addnd);\
+		preempt_enable();			\
+	} while (0)
+
+#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1)
+#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1)
+
+#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1)
+#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1)
+
+#define __part_stat_sub(gendiskp, field, subnd) \
+		__part_stat_add(gendiskp, field, -subnd)
+#define part_stat_sub(gendiskp, field, subnd) \
+		part_stat_add(gendiskp, field, -subnd)
+
+#define all_stat_add(gendiskp, field, addnd, sector)		\
+	do {							\
+		preempt_disable();				\
+		__all_stat_add(gendiskp, field, addnd, sector);	\
+		preempt_enable();				\
+	} while (0)
+
+#define __all_stat_dec(gendiskp, field, sector) \
+		__all_stat_add(gendiskp, field, -1, sector)
+#define all_stat_dec(gendiskp, field, sector) \
+		all_stat_add(gendiskp, field, -1, sector)
+
+#define __all_stat_inc(gendiskp, field, sector) \
+		__all_stat_add(gendiskp, field, 1, sector)
+#define all_stat_inc(gendiskp, field, sector) \
+		all_stat_add(gendiskp, field, 1, sector)
+
+#define __all_stat_sub(gendiskp, field, subnd, sector) \
+		__all_stat_add(gendiskp, field, -subnd, sector)
+#define all_stat_sub(gendiskp, field, subnd, sector) \
+		all_stat_add(gendiskp, field, -subnd, sector)
 
 /* Inlines to alloc and free disk stats in struct gendisk */
 #ifdef  CONFIG_SMP
@@ -221,6 +328,20 @@ static inline void free_disk_stats(struct gendisk *disk)
 {
 	free_percpu(disk->dkstats);
 }
+
+static inline int init_part_stats(struct hd_struct *part)
+{
+	part->dkstats = alloc_percpu(struct disk_stats);
+	if (!part->dkstats)
+		return 0;
+	return 1;
+}
+
+static inline void free_part_stats(struct hd_struct *part)
+{
+	free_percpu(part->dkstats);
+}
+
 #else	/* CONFIG_SMP */
 static inline int init_disk_stats(struct gendisk *disk)
 {
@@ -230,10 +351,20 @@ static inline int init_disk_stats(struct gendisk *disk)
 static inline void free_disk_stats(struct gendisk *disk)
 {
 }
+
+static inline int init_part_stats(struct hd_struct *part)
+{
+	return 1;
+}
+
+static inline void free_part_stats(struct hd_struct *part)
+{
+}
 #endif	/* CONFIG_SMP */
 
 /* drivers/block/ll_rw_blk.c */
 extern void disk_round_stats(struct gendisk *disk);
+extern void part_round_stats(struct hd_struct *part);
 
 /* drivers/block/genhd.c */
 extern int get_blkdev_list(char *, int);