|
@@ -385,25 +385,40 @@ void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,
|
|
|
|
|
|
spin_lock_irqsave(&blkg->stats_lock, flags);
|
|
|
blkg->stats.time += time;
|
|
|
+#ifdef CONFIG_DEBUG_BLK_CGROUP
|
|
|
blkg->stats.unaccounted_time += unaccounted_time;
|
|
|
+#endif
|
|
|
spin_unlock_irqrestore(&blkg->stats_lock, flags);
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
|
|
|
|
|
|
+/*
|
|
|
+ * should be called under rcu read lock or queue lock to make sure blkg pointer
|
|
|
+ * is valid.
|
|
|
+ */
|
|
|
void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
|
|
|
uint64_t bytes, bool direction, bool sync)
|
|
|
{
|
|
|
- struct blkio_group_stats *stats;
|
|
|
+ struct blkio_group_stats_cpu *stats_cpu;
|
|
|
unsigned long flags;
|
|
|
|
|
|
- spin_lock_irqsave(&blkg->stats_lock, flags);
|
|
|
- stats = &blkg->stats;
|
|
|
- stats->sectors += bytes >> 9;
|
|
|
- blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction,
|
|
|
- sync);
|
|
|
- blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes,
|
|
|
- direction, sync);
|
|
|
- spin_unlock_irqrestore(&blkg->stats_lock, flags);
|
|
|
+ /*
|
|
|
+ * Disabling interrupts to provide mutual exclusion between two
|
|
|
+ * writes on same cpu. It probably is not needed for 64bit. Not
|
|
|
+ * optimizing that case yet.
|
|
|
+ */
|
|
|
+ local_irq_save(flags);
|
|
|
+
|
|
|
+ stats_cpu = this_cpu_ptr(blkg->stats_cpu);
|
|
|
+
|
|
|
+ u64_stats_update_begin(&stats_cpu->syncp);
|
|
|
+ stats_cpu->sectors += bytes >> 9;
|
|
|
+ blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICED],
|
|
|
+ 1, direction, sync);
|
|
|
+ blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_SERVICE_BYTES],
|
|
|
+ bytes, direction, sync);
|
|
|
+ u64_stats_update_end(&stats_cpu->syncp);
|
|
|
+ local_irq_restore(flags);
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
|
|
|
|
|
@@ -426,18 +441,44 @@ void blkiocg_update_completion_stats(struct blkio_group *blkg,
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
|
|
|
|
|
|
+/* Merged stats are per cpu. */
|
|
|
void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
|
|
|
bool sync)
|
|
|
{
|
|
|
+ struct blkio_group_stats_cpu *stats_cpu;
|
|
|
unsigned long flags;
|
|
|
|
|
|
- spin_lock_irqsave(&blkg->stats_lock, flags);
|
|
|
- blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction,
|
|
|
- sync);
|
|
|
- spin_unlock_irqrestore(&blkg->stats_lock, flags);
|
|
|
+ /*
|
|
|
+ * Disabling interrupts to provide mutual exclusion between two
|
|
|
+ * writes on same cpu. It probably is not needed for 64bit. Not
|
|
|
+ * optimizing that case yet.
|
|
|
+ */
|
|
|
+ local_irq_save(flags);
|
|
|
+
|
|
|
+ stats_cpu = this_cpu_ptr(blkg->stats_cpu);
|
|
|
+
|
|
|
+ u64_stats_update_begin(&stats_cpu->syncp);
|
|
|
+ blkio_add_stat(stats_cpu->stat_arr_cpu[BLKIO_STAT_CPU_MERGED], 1,
|
|
|
+ direction, sync);
|
|
|
+ u64_stats_update_end(&stats_cpu->syncp);
|
|
|
+ local_irq_restore(flags);
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
|
|
|
|
|
|
+/*
|
|
|
+ * This function allocates the per cpu stats for blkio_group. Should be called
|
|
|
+ * from sleepable context as alloc_per_cpu() requires that.
|
|
|
+ */
|
|
|
+int blkio_alloc_blkg_stats(struct blkio_group *blkg)
|
|
|
+{
|
|
|
+ /* Allocate memory for per cpu stats */
|
|
|
+ blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
|
|
|
+ if (!blkg->stats_cpu)
|
|
|
+ return -ENOMEM;
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats);
|
|
|
+
|
|
|
void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
|
|
|
struct blkio_group *blkg, void *key, dev_t dev,
|
|
|
enum blkio_policy_id plid)
|
|
@@ -508,6 +549,30 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
|
|
|
|
|
|
+static void blkio_reset_stats_cpu(struct blkio_group *blkg)
|
|
|
+{
|
|
|
+ struct blkio_group_stats_cpu *stats_cpu;
|
|
|
+ int i, j, k;
|
|
|
+ /*
|
|
|
+ * Note: On 64 bit arch this should not be an issue. This has the
|
|
|
+ * possibility of returning some inconsistent value on 32bit arch
|
|
|
+ * as 64bit update on 32bit is non atomic. Taking care of this
|
|
|
+ * corner case makes code very complicated, like sending IPIs to
|
|
|
+ * cpus, taking care of stats of offline cpus etc.
|
|
|
+ *
|
|
|
+ * reset stats is anyway more of a debug feature and this sounds a
|
|
|
+ * corner case. So I am not complicating the code yet until and
|
|
|
+ * unless this becomes a real issue.
|
|
|
+ */
|
|
|
+ for_each_possible_cpu(i) {
|
|
|
+ stats_cpu = per_cpu_ptr(blkg->stats_cpu, i);
|
|
|
+ stats_cpu->sectors = 0;
|
|
|
+ for(j = 0; j < BLKIO_STAT_CPU_NR; j++)
|
|
|
+ for (k = 0; k < BLKIO_STAT_TOTAL; k++)
|
|
|
+ stats_cpu->stat_arr_cpu[j][k] = 0;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static int
|
|
|
blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
|
|
|
{
|
|
@@ -552,7 +617,11 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
|
|
|
}
|
|
|
#endif
|
|
|
spin_unlock(&blkg->stats_lock);
|
|
|
+
|
|
|
+ /* Reset Per cpu stats which don't take blkg->stats_lock */
|
|
|
+ blkio_reset_stats_cpu(blkg);
|
|
|
}
|
|
|
+
|
|
|
spin_unlock_irq(&blkcg->lock);
|
|
|
return 0;
|
|
|
}
|
|
@@ -598,6 +667,59 @@ static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
|
|
|
return val;
|
|
|
}
|
|
|
|
|
|
+
|
|
|
+static uint64_t blkio_read_stat_cpu(struct blkio_group *blkg,
|
|
|
+ enum stat_type_cpu type, enum stat_sub_type sub_type)
|
|
|
+{
|
|
|
+ int cpu;
|
|
|
+ struct blkio_group_stats_cpu *stats_cpu;
|
|
|
+ u64 val = 0, tval;
|
|
|
+
|
|
|
+ for_each_possible_cpu(cpu) {
|
|
|
+ unsigned int start;
|
|
|
+ stats_cpu = per_cpu_ptr(blkg->stats_cpu, cpu);
|
|
|
+
|
|
|
+ do {
|
|
|
+ start = u64_stats_fetch_begin(&stats_cpu->syncp);
|
|
|
+ if (type == BLKIO_STAT_CPU_SECTORS)
|
|
|
+ tval = stats_cpu->sectors;
|
|
|
+ else
|
|
|
+ tval = stats_cpu->stat_arr_cpu[type][sub_type];
|
|
|
+ } while(u64_stats_fetch_retry(&stats_cpu->syncp, start));
|
|
|
+
|
|
|
+ val += tval;
|
|
|
+ }
|
|
|
+
|
|
|
+ return val;
|
|
|
+}
|
|
|
+
|
|
|
+static uint64_t blkio_get_stat_cpu(struct blkio_group *blkg,
|
|
|
+ struct cgroup_map_cb *cb, dev_t dev, enum stat_type_cpu type)
|
|
|
+{
|
|
|
+ uint64_t disk_total, val;
|
|
|
+ char key_str[MAX_KEY_LEN];
|
|
|
+ enum stat_sub_type sub_type;
|
|
|
+
|
|
|
+ if (type == BLKIO_STAT_CPU_SECTORS) {
|
|
|
+ val = blkio_read_stat_cpu(blkg, type, 0);
|
|
|
+ return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, val, cb, dev);
|
|
|
+ }
|
|
|
+
|
|
|
+ for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
|
|
|
+ sub_type++) {
|
|
|
+ blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
|
|
|
+ val = blkio_read_stat_cpu(blkg, type, sub_type);
|
|
|
+ cb->fill(cb, key_str, val);
|
|
|
+ }
|
|
|
+
|
|
|
+ disk_total = blkio_read_stat_cpu(blkg, type, BLKIO_STAT_READ) +
|
|
|
+ blkio_read_stat_cpu(blkg, type, BLKIO_STAT_WRITE);
|
|
|
+
|
|
|
+ blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
|
|
|
+ cb->fill(cb, key_str, disk_total);
|
|
|
+ return disk_total;
|
|
|
+}
|
|
|
+
|
|
|
/* This should be called with blkg->stats_lock held */
|
|
|
static uint64_t blkio_get_stat(struct blkio_group *blkg,
|
|
|
struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
|
|
@@ -609,9 +731,6 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
|
|
|
if (type == BLKIO_STAT_TIME)
|
|
|
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
|
|
|
blkg->stats.time, cb, dev);
|
|
|
- if (type == BLKIO_STAT_SECTORS)
|
|
|
- return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
|
|
|
- blkg->stats.sectors, cb, dev);
|
|
|
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
|
|
if (type == BLKIO_STAT_UNACCOUNTED_TIME)
|
|
|
return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
|
|
@@ -1075,8 +1194,8 @@ static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
|
|
|
}
|
|
|
|
|
|
static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
|
|
|
- struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type,
|
|
|
- bool show_total)
|
|
|
+ struct cftype *cft, struct cgroup_map_cb *cb,
|
|
|
+ enum stat_type type, bool show_total, bool pcpu)
|
|
|
{
|
|
|
struct blkio_group *blkg;
|
|
|
struct hlist_node *n;
|
|
@@ -1087,10 +1206,15 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
|
|
|
if (blkg->dev) {
|
|
|
if (!cftype_blkg_same_policy(cft, blkg))
|
|
|
continue;
|
|
|
- spin_lock_irq(&blkg->stats_lock);
|
|
|
- cgroup_total += blkio_get_stat(blkg, cb, blkg->dev,
|
|
|
- type);
|
|
|
- spin_unlock_irq(&blkg->stats_lock);
|
|
|
+ if (pcpu)
|
|
|
+ cgroup_total += blkio_get_stat_cpu(blkg, cb,
|
|
|
+ blkg->dev, type);
|
|
|
+ else {
|
|
|
+ spin_lock_irq(&blkg->stats_lock);
|
|
|
+ cgroup_total += blkio_get_stat(blkg, cb,
|
|
|
+ blkg->dev, type);
|
|
|
+ spin_unlock_irq(&blkg->stats_lock);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
if (show_total)
|
|
@@ -1114,47 +1238,47 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
|
|
|
switch(name) {
|
|
|
case BLKIO_PROP_time:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_TIME, 0);
|
|
|
+ BLKIO_STAT_TIME, 0, 0);
|
|
|
case BLKIO_PROP_sectors:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_SECTORS, 0);
|
|
|
+ BLKIO_STAT_CPU_SECTORS, 0, 1);
|
|
|
case BLKIO_PROP_io_service_bytes:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_SERVICE_BYTES, 1);
|
|
|
+ BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
|
|
|
case BLKIO_PROP_io_serviced:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_SERVICED, 1);
|
|
|
+ BLKIO_STAT_CPU_SERVICED, 1, 1);
|
|
|
case BLKIO_PROP_io_service_time:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_SERVICE_TIME, 1);
|
|
|
+ BLKIO_STAT_SERVICE_TIME, 1, 0);
|
|
|
case BLKIO_PROP_io_wait_time:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_WAIT_TIME, 1);
|
|
|
+ BLKIO_STAT_WAIT_TIME, 1, 0);
|
|
|
case BLKIO_PROP_io_merged:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_MERGED, 1);
|
|
|
+ BLKIO_STAT_CPU_MERGED, 1, 1);
|
|
|
case BLKIO_PROP_io_queued:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_QUEUED, 1);
|
|
|
+ BLKIO_STAT_QUEUED, 1, 0);
|
|
|
#ifdef CONFIG_DEBUG_BLK_CGROUP
|
|
|
case BLKIO_PROP_unaccounted_time:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_UNACCOUNTED_TIME, 0);
|
|
|
+ BLKIO_STAT_UNACCOUNTED_TIME, 0, 0);
|
|
|
case BLKIO_PROP_dequeue:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_DEQUEUE, 0);
|
|
|
+ BLKIO_STAT_DEQUEUE, 0, 0);
|
|
|
case BLKIO_PROP_avg_queue_size:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_AVG_QUEUE_SIZE, 0);
|
|
|
+ BLKIO_STAT_AVG_QUEUE_SIZE, 0, 0);
|
|
|
case BLKIO_PROP_group_wait_time:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_GROUP_WAIT_TIME, 0);
|
|
|
+ BLKIO_STAT_GROUP_WAIT_TIME, 0, 0);
|
|
|
case BLKIO_PROP_idle_time:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_IDLE_TIME, 0);
|
|
|
+ BLKIO_STAT_IDLE_TIME, 0, 0);
|
|
|
case BLKIO_PROP_empty_time:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_EMPTY_TIME, 0);
|
|
|
+ BLKIO_STAT_EMPTY_TIME, 0, 0);
|
|
|
#endif
|
|
|
default:
|
|
|
BUG();
|
|
@@ -1164,10 +1288,10 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
|
|
|
switch(name){
|
|
|
case BLKIO_THROTL_io_service_bytes:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_SERVICE_BYTES, 1);
|
|
|
+ BLKIO_STAT_CPU_SERVICE_BYTES, 1, 1);
|
|
|
case BLKIO_THROTL_io_serviced:
|
|
|
return blkio_read_blkg_stats(blkcg, cft, cb,
|
|
|
- BLKIO_STAT_SERVICED, 1);
|
|
|
+ BLKIO_STAT_CPU_SERVICED, 1, 1);
|
|
|
default:
|
|
|
BUG();
|
|
|
}
|