dm-stats.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980
  1. #include <linux/errno.h>
  2. #include <linux/numa.h>
  3. #include <linux/slab.h>
  4. #include <linux/rculist.h>
  5. #include <linux/threads.h>
  6. #include <linux/preempt.h>
  7. #include <linux/irqflags.h>
  8. #include <linux/vmalloc.h>
  9. #include <linux/mm.h>
  10. #include <linux/module.h>
  11. #include <linux/device-mapper.h>
  12. #include "dm.h"
  13. #include "dm-stats.h"
  14. #define DM_MSG_PREFIX "stats"
  15. static int dm_stat_need_rcu_barrier;
  16. /*
  17. * Using 64-bit values to avoid overflow (which is a
  18. * problem that block/genhd.c's IO accounting has).
  19. */
  20. struct dm_stat_percpu {
  21. unsigned long long sectors[2];
  22. unsigned long long ios[2];
  23. unsigned long long merges[2];
  24. unsigned long long ticks[2];
  25. unsigned long long io_ticks[2];
  26. unsigned long long io_ticks_total;
  27. unsigned long long time_in_queue;
  28. };
  29. struct dm_stat_shared {
  30. atomic_t in_flight[2];
  31. unsigned long stamp;
  32. struct dm_stat_percpu tmp;
  33. };
  34. struct dm_stat {
  35. struct list_head list_entry;
  36. int id;
  37. size_t n_entries;
  38. sector_t start;
  39. sector_t end;
  40. sector_t step;
  41. const char *program_id;
  42. const char *aux_data;
  43. struct rcu_head rcu_head;
  44. size_t shared_alloc_size;
  45. size_t percpu_alloc_size;
  46. struct dm_stat_percpu *stat_percpu[NR_CPUS];
  47. struct dm_stat_shared stat_shared[0];
  48. };
  49. struct dm_stats_last_position {
  50. sector_t last_sector;
  51. unsigned last_rw;
  52. };
  53. /*
  54. * A typo on the command line could possibly make the kernel run out of memory
  55. * and crash. To prevent the crash we account all used memory. We fail if we
  56. * exhaust 1/4 of all memory or 1/2 of vmalloc space.
  57. */
  58. #define DM_STATS_MEMORY_FACTOR 4
  59. #define DM_STATS_VMALLOC_FACTOR 2
  60. static DEFINE_SPINLOCK(shared_memory_lock);
  61. static unsigned long shared_memory_amount;
  62. static bool __check_shared_memory(size_t alloc_size)
  63. {
  64. size_t a;
  65. a = shared_memory_amount + alloc_size;
  66. if (a < shared_memory_amount)
  67. return false;
  68. if (a >> PAGE_SHIFT > totalram_pages / DM_STATS_MEMORY_FACTOR)
  69. return false;
  70. #ifdef CONFIG_MMU
  71. if (a > (VMALLOC_END - VMALLOC_START) / DM_STATS_VMALLOC_FACTOR)
  72. return false;
  73. #endif
  74. return true;
  75. }
  76. static bool check_shared_memory(size_t alloc_size)
  77. {
  78. bool ret;
  79. spin_lock_irq(&shared_memory_lock);
  80. ret = __check_shared_memory(alloc_size);
  81. spin_unlock_irq(&shared_memory_lock);
  82. return ret;
  83. }
  84. static bool claim_shared_memory(size_t alloc_size)
  85. {
  86. spin_lock_irq(&shared_memory_lock);
  87. if (!__check_shared_memory(alloc_size)) {
  88. spin_unlock_irq(&shared_memory_lock);
  89. return false;
  90. }
  91. shared_memory_amount += alloc_size;
  92. spin_unlock_irq(&shared_memory_lock);
  93. return true;
  94. }
  95. static void free_shared_memory(size_t alloc_size)
  96. {
  97. unsigned long flags;
  98. spin_lock_irqsave(&shared_memory_lock, flags);
  99. if (WARN_ON_ONCE(shared_memory_amount < alloc_size)) {
  100. spin_unlock_irqrestore(&shared_memory_lock, flags);
  101. DMCRIT("Memory usage accounting bug.");
  102. return;
  103. }
  104. shared_memory_amount -= alloc_size;
  105. spin_unlock_irqrestore(&shared_memory_lock, flags);
  106. }
  107. static void *dm_kvzalloc(size_t alloc_size, int node)
  108. {
  109. void *p;
  110. if (!claim_shared_memory(alloc_size))
  111. return NULL;
  112. if (alloc_size <= KMALLOC_MAX_SIZE) {
  113. p = kzalloc_node(alloc_size, GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN, node);
  114. if (p)
  115. return p;
  116. }
  117. p = vzalloc_node(alloc_size, node);
  118. if (p)
  119. return p;
  120. free_shared_memory(alloc_size);
  121. return NULL;
  122. }
  123. static void dm_kvfree(void *ptr, size_t alloc_size)
  124. {
  125. if (!ptr)
  126. return;
  127. free_shared_memory(alloc_size);
  128. if (is_vmalloc_addr(ptr))
  129. vfree(ptr);
  130. else
  131. kfree(ptr);
  132. }
  133. static void dm_stat_free(struct rcu_head *head)
  134. {
  135. int cpu;
  136. struct dm_stat *s = container_of(head, struct dm_stat, rcu_head);
  137. kfree(s->program_id);
  138. kfree(s->aux_data);
  139. for_each_possible_cpu(cpu)
  140. dm_kvfree(s->stat_percpu[cpu], s->percpu_alloc_size);
  141. dm_kvfree(s, s->shared_alloc_size);
  142. }
  143. static int dm_stat_in_flight(struct dm_stat_shared *shared)
  144. {
  145. return atomic_read(&shared->in_flight[READ]) +
  146. atomic_read(&shared->in_flight[WRITE]);
  147. }
  148. void dm_stats_init(struct dm_stats *stats)
  149. {
  150. int cpu;
  151. struct dm_stats_last_position *last;
  152. mutex_init(&stats->mutex);
  153. INIT_LIST_HEAD(&stats->list);
  154. stats->last = alloc_percpu(struct dm_stats_last_position);
  155. for_each_possible_cpu(cpu) {
  156. last = per_cpu_ptr(stats->last, cpu);
  157. last->last_sector = (sector_t)ULLONG_MAX;
  158. last->last_rw = UINT_MAX;
  159. }
  160. }
  161. void dm_stats_cleanup(struct dm_stats *stats)
  162. {
  163. size_t ni;
  164. struct dm_stat *s;
  165. struct dm_stat_shared *shared;
  166. while (!list_empty(&stats->list)) {
  167. s = container_of(stats->list.next, struct dm_stat, list_entry);
  168. list_del(&s->list_entry);
  169. for (ni = 0; ni < s->n_entries; ni++) {
  170. shared = &s->stat_shared[ni];
  171. if (WARN_ON(dm_stat_in_flight(shared))) {
  172. DMCRIT("leaked in-flight counter at index %lu "
  173. "(start %llu, end %llu, step %llu): reads %d, writes %d",
  174. (unsigned long)ni,
  175. (unsigned long long)s->start,
  176. (unsigned long long)s->end,
  177. (unsigned long long)s->step,
  178. atomic_read(&shared->in_flight[READ]),
  179. atomic_read(&shared->in_flight[WRITE]));
  180. }
  181. }
  182. dm_stat_free(&s->rcu_head);
  183. }
  184. free_percpu(stats->last);
  185. }
  186. static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
  187. sector_t step, const char *program_id, const char *aux_data,
  188. void (*suspend_callback)(struct mapped_device *),
  189. void (*resume_callback)(struct mapped_device *),
  190. struct mapped_device *md)
  191. {
  192. struct list_head *l;
  193. struct dm_stat *s, *tmp_s;
  194. sector_t n_entries;
  195. size_t ni;
  196. size_t shared_alloc_size;
  197. size_t percpu_alloc_size;
  198. struct dm_stat_percpu *p;
  199. int cpu;
  200. int ret_id;
  201. int r;
  202. if (end < start || !step)
  203. return -EINVAL;
  204. n_entries = end - start;
  205. if (dm_sector_div64(n_entries, step))
  206. n_entries++;
  207. if (n_entries != (size_t)n_entries || !(size_t)(n_entries + 1))
  208. return -EOVERFLOW;
  209. shared_alloc_size = sizeof(struct dm_stat) + (size_t)n_entries * sizeof(struct dm_stat_shared);
  210. if ((shared_alloc_size - sizeof(struct dm_stat)) / sizeof(struct dm_stat_shared) != n_entries)
  211. return -EOVERFLOW;
  212. percpu_alloc_size = (size_t)n_entries * sizeof(struct dm_stat_percpu);
  213. if (percpu_alloc_size / sizeof(struct dm_stat_percpu) != n_entries)
  214. return -EOVERFLOW;
  215. if (!check_shared_memory(shared_alloc_size + num_possible_cpus() * percpu_alloc_size))
  216. return -ENOMEM;
  217. s = dm_kvzalloc(shared_alloc_size, NUMA_NO_NODE);
  218. if (!s)
  219. return -ENOMEM;
  220. s->n_entries = n_entries;
  221. s->start = start;
  222. s->end = end;
  223. s->step = step;
  224. s->shared_alloc_size = shared_alloc_size;
  225. s->percpu_alloc_size = percpu_alloc_size;
  226. s->program_id = kstrdup(program_id, GFP_KERNEL);
  227. if (!s->program_id) {
  228. r = -ENOMEM;
  229. goto out;
  230. }
  231. s->aux_data = kstrdup(aux_data, GFP_KERNEL);
  232. if (!s->aux_data) {
  233. r = -ENOMEM;
  234. goto out;
  235. }
  236. for (ni = 0; ni < n_entries; ni++) {
  237. atomic_set(&s->stat_shared[ni].in_flight[READ], 0);
  238. atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0);
  239. }
  240. for_each_possible_cpu(cpu) {
  241. p = dm_kvzalloc(percpu_alloc_size, cpu_to_node(cpu));
  242. if (!p) {
  243. r = -ENOMEM;
  244. goto out;
  245. }
  246. s->stat_percpu[cpu] = p;
  247. }
  248. /*
  249. * Suspend/resume to make sure there is no i/o in flight,
  250. * so that newly created statistics will be exact.
  251. *
  252. * (note: we couldn't suspend earlier because we must not
  253. * allocate memory while suspended)
  254. */
  255. suspend_callback(md);
  256. mutex_lock(&stats->mutex);
  257. s->id = 0;
  258. list_for_each(l, &stats->list) {
  259. tmp_s = container_of(l, struct dm_stat, list_entry);
  260. if (WARN_ON(tmp_s->id < s->id)) {
  261. r = -EINVAL;
  262. goto out_unlock_resume;
  263. }
  264. if (tmp_s->id > s->id)
  265. break;
  266. if (unlikely(s->id == INT_MAX)) {
  267. r = -ENFILE;
  268. goto out_unlock_resume;
  269. }
  270. s->id++;
  271. }
  272. ret_id = s->id;
  273. list_add_tail_rcu(&s->list_entry, l);
  274. mutex_unlock(&stats->mutex);
  275. resume_callback(md);
  276. return ret_id;
  277. out_unlock_resume:
  278. mutex_unlock(&stats->mutex);
  279. resume_callback(md);
  280. out:
  281. dm_stat_free(&s->rcu_head);
  282. return r;
  283. }
  284. static struct dm_stat *__dm_stats_find(struct dm_stats *stats, int id)
  285. {
  286. struct dm_stat *s;
  287. list_for_each_entry(s, &stats->list, list_entry) {
  288. if (s->id > id)
  289. break;
  290. if (s->id == id)
  291. return s;
  292. }
  293. return NULL;
  294. }
  295. static int dm_stats_delete(struct dm_stats *stats, int id)
  296. {
  297. struct dm_stat *s;
  298. int cpu;
  299. mutex_lock(&stats->mutex);
  300. s = __dm_stats_find(stats, id);
  301. if (!s) {
  302. mutex_unlock(&stats->mutex);
  303. return -ENOENT;
  304. }
  305. list_del_rcu(&s->list_entry);
  306. mutex_unlock(&stats->mutex);
  307. /*
  308. * vfree can't be called from RCU callback
  309. */
  310. for_each_possible_cpu(cpu)
  311. if (is_vmalloc_addr(s->stat_percpu))
  312. goto do_sync_free;
  313. if (is_vmalloc_addr(s)) {
  314. do_sync_free:
  315. synchronize_rcu_expedited();
  316. dm_stat_free(&s->rcu_head);
  317. } else {
  318. ACCESS_ONCE(dm_stat_need_rcu_barrier) = 1;
  319. call_rcu(&s->rcu_head, dm_stat_free);
  320. }
  321. return 0;
  322. }
  323. static int dm_stats_list(struct dm_stats *stats, const char *program,
  324. char *result, unsigned maxlen)
  325. {
  326. struct dm_stat *s;
  327. sector_t len;
  328. unsigned sz = 0;
  329. /*
  330. * Output format:
  331. * <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
  332. */
  333. mutex_lock(&stats->mutex);
  334. list_for_each_entry(s, &stats->list, list_entry) {
  335. if (!program || !strcmp(program, s->program_id)) {
  336. len = s->end - s->start;
  337. DMEMIT("%d: %llu+%llu %llu %s %s\n", s->id,
  338. (unsigned long long)s->start,
  339. (unsigned long long)len,
  340. (unsigned long long)s->step,
  341. s->program_id,
  342. s->aux_data);
  343. }
  344. }
  345. mutex_unlock(&stats->mutex);
  346. return 1;
  347. }
  348. static void dm_stat_round(struct dm_stat_shared *shared, struct dm_stat_percpu *p)
  349. {
  350. /*
  351. * This is racy, but so is part_round_stats_single.
  352. */
  353. unsigned long now = jiffies;
  354. unsigned in_flight_read;
  355. unsigned in_flight_write;
  356. unsigned long difference = now - shared->stamp;
  357. if (!difference)
  358. return;
  359. in_flight_read = (unsigned)atomic_read(&shared->in_flight[READ]);
  360. in_flight_write = (unsigned)atomic_read(&shared->in_flight[WRITE]);
  361. if (in_flight_read)
  362. p->io_ticks[READ] += difference;
  363. if (in_flight_write)
  364. p->io_ticks[WRITE] += difference;
  365. if (in_flight_read + in_flight_write) {
  366. p->io_ticks_total += difference;
  367. p->time_in_queue += (in_flight_read + in_flight_write) * difference;
  368. }
  369. shared->stamp = now;
  370. }
  371. static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
  372. unsigned long bi_rw, sector_t len, bool merged,
  373. bool end, unsigned long duration)
  374. {
  375. unsigned long idx = bi_rw & REQ_WRITE;
  376. struct dm_stat_shared *shared = &s->stat_shared[entry];
  377. struct dm_stat_percpu *p;
  378. /*
  379. * For strict correctness we should use local_irq_save/restore
  380. * instead of preempt_disable/enable.
  381. *
  382. * preempt_disable/enable is racy if the driver finishes bios
  383. * from non-interrupt context as well as from interrupt context
  384. * or from more different interrupts.
  385. *
  386. * On 64-bit architectures the race only results in not counting some
  387. * events, so it is acceptable. On 32-bit architectures the race could
  388. * cause the counter going off by 2^32, so we need to do proper locking
  389. * there.
  390. *
  391. * part_stat_lock()/part_stat_unlock() have this race too.
  392. */
  393. #if BITS_PER_LONG == 32
  394. unsigned long flags;
  395. local_irq_save(flags);
  396. #else
  397. preempt_disable();
  398. #endif
  399. p = &s->stat_percpu[smp_processor_id()][entry];
  400. if (!end) {
  401. dm_stat_round(shared, p);
  402. atomic_inc(&shared->in_flight[idx]);
  403. } else {
  404. dm_stat_round(shared, p);
  405. atomic_dec(&shared->in_flight[idx]);
  406. p->sectors[idx] += len;
  407. p->ios[idx] += 1;
  408. p->merges[idx] += merged;
  409. p->ticks[idx] += duration;
  410. }
  411. #if BITS_PER_LONG == 32
  412. local_irq_restore(flags);
  413. #else
  414. preempt_enable();
  415. #endif
  416. }
  417. static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
  418. sector_t bi_sector, sector_t end_sector,
  419. bool end, unsigned long duration,
  420. struct dm_stats_aux *stats_aux)
  421. {
  422. sector_t rel_sector, offset, todo, fragment_len;
  423. size_t entry;
  424. if (end_sector <= s->start || bi_sector >= s->end)
  425. return;
  426. if (unlikely(bi_sector < s->start)) {
  427. rel_sector = 0;
  428. todo = end_sector - s->start;
  429. } else {
  430. rel_sector = bi_sector - s->start;
  431. todo = end_sector - bi_sector;
  432. }
  433. if (unlikely(end_sector > s->end))
  434. todo -= (end_sector - s->end);
  435. offset = dm_sector_div64(rel_sector, s->step);
  436. entry = rel_sector;
  437. do {
  438. if (WARN_ON_ONCE(entry >= s->n_entries)) {
  439. DMCRIT("Invalid area access in region id %d", s->id);
  440. return;
  441. }
  442. fragment_len = todo;
  443. if (fragment_len > s->step - offset)
  444. fragment_len = s->step - offset;
  445. dm_stat_for_entry(s, entry, bi_rw, fragment_len,
  446. stats_aux->merged, end, duration);
  447. todo -= fragment_len;
  448. entry++;
  449. offset = 0;
  450. } while (unlikely(todo != 0));
  451. }
  452. void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
  453. sector_t bi_sector, unsigned bi_sectors, bool end,
  454. unsigned long duration, struct dm_stats_aux *stats_aux)
  455. {
  456. struct dm_stat *s;
  457. sector_t end_sector;
  458. struct dm_stats_last_position *last;
  459. if (unlikely(!bi_sectors))
  460. return;
  461. end_sector = bi_sector + bi_sectors;
  462. if (!end) {
  463. /*
  464. * A race condition can at worst result in the merged flag being
  465. * misrepresented, so we don't have to disable preemption here.
  466. */
  467. last = __this_cpu_ptr(stats->last);
  468. stats_aux->merged =
  469. (bi_sector == (ACCESS_ONCE(last->last_sector) &&
  470. ((bi_rw & (REQ_WRITE | REQ_DISCARD)) ==
  471. (ACCESS_ONCE(last->last_rw) & (REQ_WRITE | REQ_DISCARD)))
  472. ));
  473. ACCESS_ONCE(last->last_sector) = end_sector;
  474. ACCESS_ONCE(last->last_rw) = bi_rw;
  475. }
  476. rcu_read_lock();
  477. list_for_each_entry_rcu(s, &stats->list, list_entry)
  478. __dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration, stats_aux);
  479. rcu_read_unlock();
  480. }
  481. static void __dm_stat_init_temporary_percpu_totals(struct dm_stat_shared *shared,
  482. struct dm_stat *s, size_t x)
  483. {
  484. int cpu;
  485. struct dm_stat_percpu *p;
  486. local_irq_disable();
  487. p = &s->stat_percpu[smp_processor_id()][x];
  488. dm_stat_round(shared, p);
  489. local_irq_enable();
  490. memset(&shared->tmp, 0, sizeof(shared->tmp));
  491. for_each_possible_cpu(cpu) {
  492. p = &s->stat_percpu[cpu][x];
  493. shared->tmp.sectors[READ] += ACCESS_ONCE(p->sectors[READ]);
  494. shared->tmp.sectors[WRITE] += ACCESS_ONCE(p->sectors[WRITE]);
  495. shared->tmp.ios[READ] += ACCESS_ONCE(p->ios[READ]);
  496. shared->tmp.ios[WRITE] += ACCESS_ONCE(p->ios[WRITE]);
  497. shared->tmp.merges[READ] += ACCESS_ONCE(p->merges[READ]);
  498. shared->tmp.merges[WRITE] += ACCESS_ONCE(p->merges[WRITE]);
  499. shared->tmp.ticks[READ] += ACCESS_ONCE(p->ticks[READ]);
  500. shared->tmp.ticks[WRITE] += ACCESS_ONCE(p->ticks[WRITE]);
  501. shared->tmp.io_ticks[READ] += ACCESS_ONCE(p->io_ticks[READ]);
  502. shared->tmp.io_ticks[WRITE] += ACCESS_ONCE(p->io_ticks[WRITE]);
  503. shared->tmp.io_ticks_total += ACCESS_ONCE(p->io_ticks_total);
  504. shared->tmp.time_in_queue += ACCESS_ONCE(p->time_in_queue);
  505. }
  506. }
  507. static void __dm_stat_clear(struct dm_stat *s, size_t idx_start, size_t idx_end,
  508. bool init_tmp_percpu_totals)
  509. {
  510. size_t x;
  511. struct dm_stat_shared *shared;
  512. struct dm_stat_percpu *p;
  513. for (x = idx_start; x < idx_end; x++) {
  514. shared = &s->stat_shared[x];
  515. if (init_tmp_percpu_totals)
  516. __dm_stat_init_temporary_percpu_totals(shared, s, x);
  517. local_irq_disable();
  518. p = &s->stat_percpu[smp_processor_id()][x];
  519. p->sectors[READ] -= shared->tmp.sectors[READ];
  520. p->sectors[WRITE] -= shared->tmp.sectors[WRITE];
  521. p->ios[READ] -= shared->tmp.ios[READ];
  522. p->ios[WRITE] -= shared->tmp.ios[WRITE];
  523. p->merges[READ] -= shared->tmp.merges[READ];
  524. p->merges[WRITE] -= shared->tmp.merges[WRITE];
  525. p->ticks[READ] -= shared->tmp.ticks[READ];
  526. p->ticks[WRITE] -= shared->tmp.ticks[WRITE];
  527. p->io_ticks[READ] -= shared->tmp.io_ticks[READ];
  528. p->io_ticks[WRITE] -= shared->tmp.io_ticks[WRITE];
  529. p->io_ticks_total -= shared->tmp.io_ticks_total;
  530. p->time_in_queue -= shared->tmp.time_in_queue;
  531. local_irq_enable();
  532. }
  533. }
  534. static int dm_stats_clear(struct dm_stats *stats, int id)
  535. {
  536. struct dm_stat *s;
  537. mutex_lock(&stats->mutex);
  538. s = __dm_stats_find(stats, id);
  539. if (!s) {
  540. mutex_unlock(&stats->mutex);
  541. return -ENOENT;
  542. }
  543. __dm_stat_clear(s, 0, s->n_entries, true);
  544. mutex_unlock(&stats->mutex);
  545. return 1;
  546. }
  547. /*
  548. * This is like jiffies_to_msec, but works for 64-bit values.
  549. */
  550. static unsigned long long dm_jiffies_to_msec64(unsigned long long j)
  551. {
  552. unsigned long long result = 0;
  553. unsigned mult;
  554. if (j)
  555. result = jiffies_to_msecs(j & 0x3fffff);
  556. if (j >= 1 << 22) {
  557. mult = jiffies_to_msecs(1 << 22);
  558. result += (unsigned long long)mult * (unsigned long long)jiffies_to_msecs((j >> 22) & 0x3fffff);
  559. }
  560. if (j >= 1ULL << 44)
  561. result += (unsigned long long)mult * (unsigned long long)mult * (unsigned long long)jiffies_to_msecs(j >> 44);
  562. return result;
  563. }
  564. static int dm_stats_print(struct dm_stats *stats, int id,
  565. size_t idx_start, size_t idx_len,
  566. bool clear, char *result, unsigned maxlen)
  567. {
  568. unsigned sz = 0;
  569. struct dm_stat *s;
  570. size_t x;
  571. sector_t start, end, step;
  572. size_t idx_end;
  573. struct dm_stat_shared *shared;
  574. /*
  575. * Output format:
  576. * <start_sector>+<length> counters
  577. */
  578. mutex_lock(&stats->mutex);
  579. s = __dm_stats_find(stats, id);
  580. if (!s) {
  581. mutex_unlock(&stats->mutex);
  582. return -ENOENT;
  583. }
  584. idx_end = idx_start + idx_len;
  585. if (idx_end < idx_start ||
  586. idx_end > s->n_entries)
  587. idx_end = s->n_entries;
  588. if (idx_start > idx_end)
  589. idx_start = idx_end;
  590. step = s->step;
  591. start = s->start + (step * idx_start);
  592. for (x = idx_start; x < idx_end; x++, start = end) {
  593. shared = &s->stat_shared[x];
  594. end = start + step;
  595. if (unlikely(end > s->end))
  596. end = s->end;
  597. __dm_stat_init_temporary_percpu_totals(shared, s, x);
  598. DMEMIT("%llu+%llu %llu %llu %llu %llu %llu %llu %llu %llu %d %llu %llu %llu %llu\n",
  599. (unsigned long long)start,
  600. (unsigned long long)step,
  601. shared->tmp.ios[READ],
  602. shared->tmp.merges[READ],
  603. shared->tmp.sectors[READ],
  604. dm_jiffies_to_msec64(shared->tmp.ticks[READ]),
  605. shared->tmp.ios[WRITE],
  606. shared->tmp.merges[WRITE],
  607. shared->tmp.sectors[WRITE],
  608. dm_jiffies_to_msec64(shared->tmp.ticks[WRITE]),
  609. dm_stat_in_flight(shared),
  610. dm_jiffies_to_msec64(shared->tmp.io_ticks_total),
  611. dm_jiffies_to_msec64(shared->tmp.time_in_queue),
  612. dm_jiffies_to_msec64(shared->tmp.io_ticks[READ]),
  613. dm_jiffies_to_msec64(shared->tmp.io_ticks[WRITE]));
  614. if (unlikely(sz + 1 >= maxlen))
  615. goto buffer_overflow;
  616. }
  617. if (clear)
  618. __dm_stat_clear(s, idx_start, idx_end, false);
  619. buffer_overflow:
  620. mutex_unlock(&stats->mutex);
  621. return 1;
  622. }
  623. static int dm_stats_set_aux(struct dm_stats *stats, int id, const char *aux_data)
  624. {
  625. struct dm_stat *s;
  626. const char *new_aux_data;
  627. mutex_lock(&stats->mutex);
  628. s = __dm_stats_find(stats, id);
  629. if (!s) {
  630. mutex_unlock(&stats->mutex);
  631. return -ENOENT;
  632. }
  633. new_aux_data = kstrdup(aux_data, GFP_KERNEL);
  634. if (!new_aux_data) {
  635. mutex_unlock(&stats->mutex);
  636. return -ENOMEM;
  637. }
  638. kfree(s->aux_data);
  639. s->aux_data = new_aux_data;
  640. mutex_unlock(&stats->mutex);
  641. return 0;
  642. }
  643. static int message_stats_create(struct mapped_device *md,
  644. unsigned argc, char **argv,
  645. char *result, unsigned maxlen)
  646. {
  647. int id;
  648. char dummy;
  649. unsigned long long start, end, len, step;
  650. unsigned divisor;
  651. const char *program_id, *aux_data;
  652. /*
  653. * Input format:
  654. * <range> <step> [<program_id> [<aux_data>]]
  655. */
  656. if (argc < 3 || argc > 5)
  657. return -EINVAL;
  658. if (!strcmp(argv[1], "-")) {
  659. start = 0;
  660. len = dm_get_size(md);
  661. if (!len)
  662. len = 1;
  663. } else if (sscanf(argv[1], "%llu+%llu%c", &start, &len, &dummy) != 2 ||
  664. start != (sector_t)start || len != (sector_t)len)
  665. return -EINVAL;
  666. end = start + len;
  667. if (start >= end)
  668. return -EINVAL;
  669. if (sscanf(argv[2], "/%u%c", &divisor, &dummy) == 1) {
  670. step = end - start;
  671. if (do_div(step, divisor))
  672. step++;
  673. if (!step)
  674. step = 1;
  675. } else if (sscanf(argv[2], "%llu%c", &step, &dummy) != 1 ||
  676. step != (sector_t)step || !step)
  677. return -EINVAL;
  678. program_id = "-";
  679. aux_data = "-";
  680. if (argc > 3)
  681. program_id = argv[3];
  682. if (argc > 4)
  683. aux_data = argv[4];
  684. /*
  685. * If a buffer overflow happens after we created the region,
  686. * it's too late (the userspace would retry with a larger
  687. * buffer, but the region id that caused the overflow is already
  688. * leaked). So we must detect buffer overflow in advance.
  689. */
  690. snprintf(result, maxlen, "%d", INT_MAX);
  691. if (dm_message_test_buffer_overflow(result, maxlen))
  692. return 1;
  693. id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data,
  694. dm_internal_suspend, dm_internal_resume, md);
  695. if (id < 0)
  696. return id;
  697. snprintf(result, maxlen, "%d", id);
  698. return 1;
  699. }
  700. static int message_stats_delete(struct mapped_device *md,
  701. unsigned argc, char **argv)
  702. {
  703. int id;
  704. char dummy;
  705. if (argc != 2)
  706. return -EINVAL;
  707. if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
  708. return -EINVAL;
  709. return dm_stats_delete(dm_get_stats(md), id);
  710. }
  711. static int message_stats_clear(struct mapped_device *md,
  712. unsigned argc, char **argv)
  713. {
  714. int id;
  715. char dummy;
  716. if (argc != 2)
  717. return -EINVAL;
  718. if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
  719. return -EINVAL;
  720. return dm_stats_clear(dm_get_stats(md), id);
  721. }
  722. static int message_stats_list(struct mapped_device *md,
  723. unsigned argc, char **argv,
  724. char *result, unsigned maxlen)
  725. {
  726. int r;
  727. const char *program = NULL;
  728. if (argc < 1 || argc > 2)
  729. return -EINVAL;
  730. if (argc > 1) {
  731. program = kstrdup(argv[1], GFP_KERNEL);
  732. if (!program)
  733. return -ENOMEM;
  734. }
  735. r = dm_stats_list(dm_get_stats(md), program, result, maxlen);
  736. kfree(program);
  737. return r;
  738. }
  739. static int message_stats_print(struct mapped_device *md,
  740. unsigned argc, char **argv, bool clear,
  741. char *result, unsigned maxlen)
  742. {
  743. int id;
  744. char dummy;
  745. unsigned long idx_start = 0, idx_len = ULONG_MAX;
  746. if (argc != 2 && argc != 4)
  747. return -EINVAL;
  748. if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
  749. return -EINVAL;
  750. if (argc > 3) {
  751. if (strcmp(argv[2], "-") &&
  752. sscanf(argv[2], "%lu%c", &idx_start, &dummy) != 1)
  753. return -EINVAL;
  754. if (strcmp(argv[3], "-") &&
  755. sscanf(argv[3], "%lu%c", &idx_len, &dummy) != 1)
  756. return -EINVAL;
  757. }
  758. return dm_stats_print(dm_get_stats(md), id, idx_start, idx_len, clear,
  759. result, maxlen);
  760. }
  761. static int message_stats_set_aux(struct mapped_device *md,
  762. unsigned argc, char **argv)
  763. {
  764. int id;
  765. char dummy;
  766. if (argc != 3)
  767. return -EINVAL;
  768. if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
  769. return -EINVAL;
  770. return dm_stats_set_aux(dm_get_stats(md), id, argv[2]);
  771. }
  772. int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv,
  773. char *result, unsigned maxlen)
  774. {
  775. int r;
  776. if (dm_request_based(md)) {
  777. DMWARN("Statistics are only supported for bio-based devices");
  778. return -EOPNOTSUPP;
  779. }
  780. /* All messages here must start with '@' */
  781. if (!strcasecmp(argv[0], "@stats_create"))
  782. r = message_stats_create(md, argc, argv, result, maxlen);
  783. else if (!strcasecmp(argv[0], "@stats_delete"))
  784. r = message_stats_delete(md, argc, argv);
  785. else if (!strcasecmp(argv[0], "@stats_clear"))
  786. r = message_stats_clear(md, argc, argv);
  787. else if (!strcasecmp(argv[0], "@stats_list"))
  788. r = message_stats_list(md, argc, argv, result, maxlen);
  789. else if (!strcasecmp(argv[0], "@stats_print"))
  790. r = message_stats_print(md, argc, argv, false, result, maxlen);
  791. else if (!strcasecmp(argv[0], "@stats_print_clear"))
  792. r = message_stats_print(md, argc, argv, true, result, maxlen);
  793. else if (!strcasecmp(argv[0], "@stats_set_aux"))
  794. r = message_stats_set_aux(md, argc, argv);
  795. else
  796. return 2; /* this wasn't a stats message */
  797. if (r == -EINVAL)
  798. DMWARN("Invalid parameters for message %s", argv[0]);
  799. return r;
  800. }
  801. int __init dm_statistics_init(void)
  802. {
  803. dm_stat_need_rcu_barrier = 0;
  804. return 0;
  805. }
  806. void dm_statistics_exit(void)
  807. {
  808. if (dm_stat_need_rcu_barrier)
  809. rcu_barrier();
  810. if (WARN_ON(shared_memory_amount))
  811. DMCRIT("shared_memory_amount leaked: %lu", shared_memory_amount);
  812. }
  813. module_param_named(stats_current_allocated_bytes, shared_memory_amount, ulong, S_IRUGO);
  814. MODULE_PARM_DESC(stats_current_allocated_bytes, "Memory currently used by statistics");