dm-stats.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969
  1. #include <linux/errno.h>
  2. #include <linux/numa.h>
  3. #include <linux/slab.h>
  4. #include <linux/rculist.h>
  5. #include <linux/threads.h>
  6. #include <linux/preempt.h>
  7. #include <linux/irqflags.h>
  8. #include <linux/vmalloc.h>
  9. #include <linux/mm.h>
  10. #include <linux/module.h>
  11. #include <linux/device-mapper.h>
  12. #include "dm.h"
  13. #include "dm-stats.h"
  14. #define DM_MSG_PREFIX "stats"
  15. static int dm_stat_need_rcu_barrier;
  16. /*
  17. * Using 64-bit values to avoid overflow (which is a
  18. * problem that block/genhd.c's IO accounting has).
  19. */
  20. struct dm_stat_percpu {
  21. unsigned long long sectors[2];
  22. unsigned long long ios[2];
  23. unsigned long long merges[2];
  24. unsigned long long ticks[2];
  25. unsigned long long io_ticks[2];
  26. unsigned long long io_ticks_total;
  27. unsigned long long time_in_queue;
  28. };
  29. struct dm_stat_shared {
  30. atomic_t in_flight[2];
  31. unsigned long stamp;
  32. struct dm_stat_percpu tmp;
  33. };
  34. struct dm_stat {
  35. struct list_head list_entry;
  36. int id;
  37. size_t n_entries;
  38. sector_t start;
  39. sector_t end;
  40. sector_t step;
  41. const char *program_id;
  42. const char *aux_data;
  43. struct rcu_head rcu_head;
  44. size_t shared_alloc_size;
  45. size_t percpu_alloc_size;
  46. struct dm_stat_percpu *stat_percpu[NR_CPUS];
  47. struct dm_stat_shared stat_shared[0];
  48. };
  49. struct dm_stats_last_position {
  50. sector_t last_sector;
  51. unsigned last_rw;
  52. };
  53. /*
  54. * A typo on the command line could possibly make the kernel run out of memory
  55. * and crash. To prevent the crash we account all used memory. We fail if we
  56. * exhaust 1/4 of all memory or 1/2 of vmalloc space.
  57. */
  58. #define DM_STATS_MEMORY_FACTOR 4
  59. #define DM_STATS_VMALLOC_FACTOR 2
  60. static DEFINE_SPINLOCK(shared_memory_lock);
  61. static unsigned long shared_memory_amount;
  62. static bool __check_shared_memory(size_t alloc_size)
  63. {
  64. size_t a;
  65. a = shared_memory_amount + alloc_size;
  66. if (a < shared_memory_amount)
  67. return false;
  68. if (a >> PAGE_SHIFT > totalram_pages / DM_STATS_MEMORY_FACTOR)
  69. return false;
  70. #ifdef CONFIG_MMU
  71. if (a > (VMALLOC_END - VMALLOC_START) / DM_STATS_VMALLOC_FACTOR)
  72. return false;
  73. #endif
  74. return true;
  75. }
  76. static bool check_shared_memory(size_t alloc_size)
  77. {
  78. bool ret;
  79. spin_lock_irq(&shared_memory_lock);
  80. ret = __check_shared_memory(alloc_size);
  81. spin_unlock_irq(&shared_memory_lock);
  82. return ret;
  83. }
  84. static bool claim_shared_memory(size_t alloc_size)
  85. {
  86. spin_lock_irq(&shared_memory_lock);
  87. if (!__check_shared_memory(alloc_size)) {
  88. spin_unlock_irq(&shared_memory_lock);
  89. return false;
  90. }
  91. shared_memory_amount += alloc_size;
  92. spin_unlock_irq(&shared_memory_lock);
  93. return true;
  94. }
  95. static void free_shared_memory(size_t alloc_size)
  96. {
  97. unsigned long flags;
  98. spin_lock_irqsave(&shared_memory_lock, flags);
  99. if (WARN_ON_ONCE(shared_memory_amount < alloc_size)) {
  100. spin_unlock_irqrestore(&shared_memory_lock, flags);
  101. DMCRIT("Memory usage accounting bug.");
  102. return;
  103. }
  104. shared_memory_amount -= alloc_size;
  105. spin_unlock_irqrestore(&shared_memory_lock, flags);
  106. }
  107. static void *dm_kvzalloc(size_t alloc_size, int node)
  108. {
  109. void *p;
  110. if (!claim_shared_memory(alloc_size))
  111. return NULL;
  112. if (alloc_size <= KMALLOC_MAX_SIZE) {
  113. p = kzalloc_node(alloc_size, GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN, node);
  114. if (p)
  115. return p;
  116. }
  117. p = vzalloc_node(alloc_size, node);
  118. if (p)
  119. return p;
  120. free_shared_memory(alloc_size);
  121. return NULL;
  122. }
  123. static void dm_kvfree(void *ptr, size_t alloc_size)
  124. {
  125. if (!ptr)
  126. return;
  127. free_shared_memory(alloc_size);
  128. if (is_vmalloc_addr(ptr))
  129. vfree(ptr);
  130. else
  131. kfree(ptr);
  132. }
  133. static void dm_stat_free(struct rcu_head *head)
  134. {
  135. int cpu;
  136. struct dm_stat *s = container_of(head, struct dm_stat, rcu_head);
  137. kfree(s->program_id);
  138. kfree(s->aux_data);
  139. for_each_possible_cpu(cpu)
  140. dm_kvfree(s->stat_percpu[cpu], s->percpu_alloc_size);
  141. dm_kvfree(s, s->shared_alloc_size);
  142. }
  143. static int dm_stat_in_flight(struct dm_stat_shared *shared)
  144. {
  145. return atomic_read(&shared->in_flight[READ]) +
  146. atomic_read(&shared->in_flight[WRITE]);
  147. }
  148. void dm_stats_init(struct dm_stats *stats)
  149. {
  150. int cpu;
  151. struct dm_stats_last_position *last;
  152. mutex_init(&stats->mutex);
  153. INIT_LIST_HEAD(&stats->list);
  154. stats->last = alloc_percpu(struct dm_stats_last_position);
  155. for_each_possible_cpu(cpu) {
  156. last = per_cpu_ptr(stats->last, cpu);
  157. last->last_sector = (sector_t)ULLONG_MAX;
  158. last->last_rw = UINT_MAX;
  159. }
  160. }
  161. void dm_stats_cleanup(struct dm_stats *stats)
  162. {
  163. size_t ni;
  164. struct dm_stat *s;
  165. struct dm_stat_shared *shared;
  166. while (!list_empty(&stats->list)) {
  167. s = container_of(stats->list.next, struct dm_stat, list_entry);
  168. list_del(&s->list_entry);
  169. for (ni = 0; ni < s->n_entries; ni++) {
  170. shared = &s->stat_shared[ni];
  171. if (WARN_ON(dm_stat_in_flight(shared))) {
  172. DMCRIT("leaked in-flight counter at index %lu "
  173. "(start %llu, end %llu, step %llu): reads %d, writes %d",
  174. (unsigned long)ni,
  175. (unsigned long long)s->start,
  176. (unsigned long long)s->end,
  177. (unsigned long long)s->step,
  178. atomic_read(&shared->in_flight[READ]),
  179. atomic_read(&shared->in_flight[WRITE]));
  180. }
  181. }
  182. dm_stat_free(&s->rcu_head);
  183. }
  184. free_percpu(stats->last);
  185. }
  186. static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
  187. sector_t step, const char *program_id, const char *aux_data,
  188. void (*suspend_callback)(struct mapped_device *),
  189. void (*resume_callback)(struct mapped_device *),
  190. struct mapped_device *md)
  191. {
  192. struct list_head *l;
  193. struct dm_stat *s, *tmp_s;
  194. sector_t n_entries;
  195. size_t ni;
  196. size_t shared_alloc_size;
  197. size_t percpu_alloc_size;
  198. struct dm_stat_percpu *p;
  199. int cpu;
  200. int ret_id;
  201. int r;
  202. if (end < start || !step)
  203. return -EINVAL;
  204. n_entries = end - start;
  205. if (dm_sector_div64(n_entries, step))
  206. n_entries++;
  207. if (n_entries != (size_t)n_entries || !(size_t)(n_entries + 1))
  208. return -EOVERFLOW;
  209. shared_alloc_size = sizeof(struct dm_stat) + (size_t)n_entries * sizeof(struct dm_stat_shared);
  210. if ((shared_alloc_size - sizeof(struct dm_stat)) / sizeof(struct dm_stat_shared) != n_entries)
  211. return -EOVERFLOW;
  212. percpu_alloc_size = (size_t)n_entries * sizeof(struct dm_stat_percpu);
  213. if (percpu_alloc_size / sizeof(struct dm_stat_percpu) != n_entries)
  214. return -EOVERFLOW;
  215. if (!check_shared_memory(shared_alloc_size + num_possible_cpus() * percpu_alloc_size))
  216. return -ENOMEM;
  217. s = dm_kvzalloc(shared_alloc_size, NUMA_NO_NODE);
  218. if (!s)
  219. return -ENOMEM;
  220. s->n_entries = n_entries;
  221. s->start = start;
  222. s->end = end;
  223. s->step = step;
  224. s->shared_alloc_size = shared_alloc_size;
  225. s->percpu_alloc_size = percpu_alloc_size;
  226. s->program_id = kstrdup(program_id, GFP_KERNEL);
  227. if (!s->program_id) {
  228. r = -ENOMEM;
  229. goto out;
  230. }
  231. s->aux_data = kstrdup(aux_data, GFP_KERNEL);
  232. if (!s->aux_data) {
  233. r = -ENOMEM;
  234. goto out;
  235. }
  236. for (ni = 0; ni < n_entries; ni++) {
  237. atomic_set(&s->stat_shared[ni].in_flight[READ], 0);
  238. atomic_set(&s->stat_shared[ni].in_flight[WRITE], 0);
  239. }
  240. for_each_possible_cpu(cpu) {
  241. p = dm_kvzalloc(percpu_alloc_size, cpu_to_node(cpu));
  242. if (!p) {
  243. r = -ENOMEM;
  244. goto out;
  245. }
  246. s->stat_percpu[cpu] = p;
  247. }
  248. /*
  249. * Suspend/resume to make sure there is no i/o in flight,
  250. * so that newly created statistics will be exact.
  251. *
  252. * (note: we couldn't suspend earlier because we must not
  253. * allocate memory while suspended)
  254. */
  255. suspend_callback(md);
  256. mutex_lock(&stats->mutex);
  257. s->id = 0;
  258. list_for_each(l, &stats->list) {
  259. tmp_s = container_of(l, struct dm_stat, list_entry);
  260. if (WARN_ON(tmp_s->id < s->id)) {
  261. r = -EINVAL;
  262. goto out_unlock_resume;
  263. }
  264. if (tmp_s->id > s->id)
  265. break;
  266. if (unlikely(s->id == INT_MAX)) {
  267. r = -ENFILE;
  268. goto out_unlock_resume;
  269. }
  270. s->id++;
  271. }
  272. ret_id = s->id;
  273. list_add_tail_rcu(&s->list_entry, l);
  274. mutex_unlock(&stats->mutex);
  275. resume_callback(md);
  276. return ret_id;
  277. out_unlock_resume:
  278. mutex_unlock(&stats->mutex);
  279. resume_callback(md);
  280. out:
  281. dm_stat_free(&s->rcu_head);
  282. return r;
  283. }
  284. static struct dm_stat *__dm_stats_find(struct dm_stats *stats, int id)
  285. {
  286. struct dm_stat *s;
  287. list_for_each_entry(s, &stats->list, list_entry) {
  288. if (s->id > id)
  289. break;
  290. if (s->id == id)
  291. return s;
  292. }
  293. return NULL;
  294. }
  295. static int dm_stats_delete(struct dm_stats *stats, int id)
  296. {
  297. struct dm_stat *s;
  298. int cpu;
  299. mutex_lock(&stats->mutex);
  300. s = __dm_stats_find(stats, id);
  301. if (!s) {
  302. mutex_unlock(&stats->mutex);
  303. return -ENOENT;
  304. }
  305. list_del_rcu(&s->list_entry);
  306. mutex_unlock(&stats->mutex);
  307. /*
  308. * vfree can't be called from RCU callback
  309. */
  310. for_each_possible_cpu(cpu)
  311. if (is_vmalloc_addr(s->stat_percpu))
  312. goto do_sync_free;
  313. if (is_vmalloc_addr(s)) {
  314. do_sync_free:
  315. synchronize_rcu_expedited();
  316. dm_stat_free(&s->rcu_head);
  317. } else {
  318. ACCESS_ONCE(dm_stat_need_rcu_barrier) = 1;
  319. call_rcu(&s->rcu_head, dm_stat_free);
  320. }
  321. return 0;
  322. }
  323. static int dm_stats_list(struct dm_stats *stats, const char *program,
  324. char *result, unsigned maxlen)
  325. {
  326. struct dm_stat *s;
  327. sector_t len;
  328. unsigned sz = 0;
  329. /*
  330. * Output format:
  331. * <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
  332. */
  333. mutex_lock(&stats->mutex);
  334. list_for_each_entry(s, &stats->list, list_entry) {
  335. if (!program || !strcmp(program, s->program_id)) {
  336. len = s->end - s->start;
  337. DMEMIT("%d: %llu+%llu %llu %s %s\n", s->id,
  338. (unsigned long long)s->start,
  339. (unsigned long long)len,
  340. (unsigned long long)s->step,
  341. s->program_id,
  342. s->aux_data);
  343. }
  344. }
  345. mutex_unlock(&stats->mutex);
  346. return 1;
  347. }
  348. static void dm_stat_round(struct dm_stat_shared *shared, struct dm_stat_percpu *p)
  349. {
  350. /*
  351. * This is racy, but so is part_round_stats_single.
  352. */
  353. unsigned long now = jiffies;
  354. unsigned in_flight_read;
  355. unsigned in_flight_write;
  356. unsigned long difference = now - shared->stamp;
  357. if (!difference)
  358. return;
  359. in_flight_read = (unsigned)atomic_read(&shared->in_flight[READ]);
  360. in_flight_write = (unsigned)atomic_read(&shared->in_flight[WRITE]);
  361. if (in_flight_read)
  362. p->io_ticks[READ] += difference;
  363. if (in_flight_write)
  364. p->io_ticks[WRITE] += difference;
  365. if (in_flight_read + in_flight_write) {
  366. p->io_ticks_total += difference;
  367. p->time_in_queue += (in_flight_read + in_flight_write) * difference;
  368. }
  369. shared->stamp = now;
  370. }
  371. static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
  372. unsigned long bi_rw, sector_t len, bool merged,
  373. bool end, unsigned long duration)
  374. {
  375. unsigned long idx = bi_rw & REQ_WRITE;
  376. struct dm_stat_shared *shared = &s->stat_shared[entry];
  377. struct dm_stat_percpu *p;
  378. /*
  379. * For strict correctness we should use local_irq_disable/enable
  380. * instead of preempt_disable/enable.
  381. *
  382. * This is racy if the driver finishes bios from non-interrupt
  383. * context as well as from interrupt context or from more different
  384. * interrupts.
  385. *
  386. * However, the race only results in not counting some events,
  387. * so it is acceptable.
  388. *
  389. * part_stat_lock()/part_stat_unlock() have this race too.
  390. */
  391. preempt_disable();
  392. p = &s->stat_percpu[smp_processor_id()][entry];
  393. if (!end) {
  394. dm_stat_round(shared, p);
  395. atomic_inc(&shared->in_flight[idx]);
  396. } else {
  397. dm_stat_round(shared, p);
  398. atomic_dec(&shared->in_flight[idx]);
  399. p->sectors[idx] += len;
  400. p->ios[idx] += 1;
  401. p->merges[idx] += merged;
  402. p->ticks[idx] += duration;
  403. }
  404. preempt_enable();
  405. }
  406. static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
  407. sector_t bi_sector, sector_t end_sector,
  408. bool end, unsigned long duration,
  409. struct dm_stats_aux *stats_aux)
  410. {
  411. sector_t rel_sector, offset, todo, fragment_len;
  412. size_t entry;
  413. if (end_sector <= s->start || bi_sector >= s->end)
  414. return;
  415. if (unlikely(bi_sector < s->start)) {
  416. rel_sector = 0;
  417. todo = end_sector - s->start;
  418. } else {
  419. rel_sector = bi_sector - s->start;
  420. todo = end_sector - bi_sector;
  421. }
  422. if (unlikely(end_sector > s->end))
  423. todo -= (end_sector - s->end);
  424. offset = dm_sector_div64(rel_sector, s->step);
  425. entry = rel_sector;
  426. do {
  427. if (WARN_ON_ONCE(entry >= s->n_entries)) {
  428. DMCRIT("Invalid area access in region id %d", s->id);
  429. return;
  430. }
  431. fragment_len = todo;
  432. if (fragment_len > s->step - offset)
  433. fragment_len = s->step - offset;
  434. dm_stat_for_entry(s, entry, bi_rw, fragment_len,
  435. stats_aux->merged, end, duration);
  436. todo -= fragment_len;
  437. entry++;
  438. offset = 0;
  439. } while (unlikely(todo != 0));
  440. }
  441. void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
  442. sector_t bi_sector, unsigned bi_sectors, bool end,
  443. unsigned long duration, struct dm_stats_aux *stats_aux)
  444. {
  445. struct dm_stat *s;
  446. sector_t end_sector;
  447. struct dm_stats_last_position *last;
  448. if (unlikely(!bi_sectors))
  449. return;
  450. end_sector = bi_sector + bi_sectors;
  451. if (!end) {
  452. /*
  453. * A race condition can at worst result in the merged flag being
  454. * misrepresented, so we don't have to disable preemption here.
  455. */
  456. last = __this_cpu_ptr(stats->last);
  457. stats_aux->merged =
  458. (bi_sector == (ACCESS_ONCE(last->last_sector) &&
  459. ((bi_rw & (REQ_WRITE | REQ_DISCARD)) ==
  460. (ACCESS_ONCE(last->last_rw) & (REQ_WRITE | REQ_DISCARD)))
  461. ));
  462. ACCESS_ONCE(last->last_sector) = end_sector;
  463. ACCESS_ONCE(last->last_rw) = bi_rw;
  464. }
  465. rcu_read_lock();
  466. list_for_each_entry_rcu(s, &stats->list, list_entry)
  467. __dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration, stats_aux);
  468. rcu_read_unlock();
  469. }
  470. static void __dm_stat_init_temporary_percpu_totals(struct dm_stat_shared *shared,
  471. struct dm_stat *s, size_t x)
  472. {
  473. int cpu;
  474. struct dm_stat_percpu *p;
  475. local_irq_disable();
  476. p = &s->stat_percpu[smp_processor_id()][x];
  477. dm_stat_round(shared, p);
  478. local_irq_enable();
  479. memset(&shared->tmp, 0, sizeof(shared->tmp));
  480. for_each_possible_cpu(cpu) {
  481. p = &s->stat_percpu[cpu][x];
  482. shared->tmp.sectors[READ] += ACCESS_ONCE(p->sectors[READ]);
  483. shared->tmp.sectors[WRITE] += ACCESS_ONCE(p->sectors[WRITE]);
  484. shared->tmp.ios[READ] += ACCESS_ONCE(p->ios[READ]);
  485. shared->tmp.ios[WRITE] += ACCESS_ONCE(p->ios[WRITE]);
  486. shared->tmp.merges[READ] += ACCESS_ONCE(p->merges[READ]);
  487. shared->tmp.merges[WRITE] += ACCESS_ONCE(p->merges[WRITE]);
  488. shared->tmp.ticks[READ] += ACCESS_ONCE(p->ticks[READ]);
  489. shared->tmp.ticks[WRITE] += ACCESS_ONCE(p->ticks[WRITE]);
  490. shared->tmp.io_ticks[READ] += ACCESS_ONCE(p->io_ticks[READ]);
  491. shared->tmp.io_ticks[WRITE] += ACCESS_ONCE(p->io_ticks[WRITE]);
  492. shared->tmp.io_ticks_total += ACCESS_ONCE(p->io_ticks_total);
  493. shared->tmp.time_in_queue += ACCESS_ONCE(p->time_in_queue);
  494. }
  495. }
  496. static void __dm_stat_clear(struct dm_stat *s, size_t idx_start, size_t idx_end,
  497. bool init_tmp_percpu_totals)
  498. {
  499. size_t x;
  500. struct dm_stat_shared *shared;
  501. struct dm_stat_percpu *p;
  502. for (x = idx_start; x < idx_end; x++) {
  503. shared = &s->stat_shared[x];
  504. if (init_tmp_percpu_totals)
  505. __dm_stat_init_temporary_percpu_totals(shared, s, x);
  506. local_irq_disable();
  507. p = &s->stat_percpu[smp_processor_id()][x];
  508. p->sectors[READ] -= shared->tmp.sectors[READ];
  509. p->sectors[WRITE] -= shared->tmp.sectors[WRITE];
  510. p->ios[READ] -= shared->tmp.ios[READ];
  511. p->ios[WRITE] -= shared->tmp.ios[WRITE];
  512. p->merges[READ] -= shared->tmp.merges[READ];
  513. p->merges[WRITE] -= shared->tmp.merges[WRITE];
  514. p->ticks[READ] -= shared->tmp.ticks[READ];
  515. p->ticks[WRITE] -= shared->tmp.ticks[WRITE];
  516. p->io_ticks[READ] -= shared->tmp.io_ticks[READ];
  517. p->io_ticks[WRITE] -= shared->tmp.io_ticks[WRITE];
  518. p->io_ticks_total -= shared->tmp.io_ticks_total;
  519. p->time_in_queue -= shared->tmp.time_in_queue;
  520. local_irq_enable();
  521. }
  522. }
  523. static int dm_stats_clear(struct dm_stats *stats, int id)
  524. {
  525. struct dm_stat *s;
  526. mutex_lock(&stats->mutex);
  527. s = __dm_stats_find(stats, id);
  528. if (!s) {
  529. mutex_unlock(&stats->mutex);
  530. return -ENOENT;
  531. }
  532. __dm_stat_clear(s, 0, s->n_entries, true);
  533. mutex_unlock(&stats->mutex);
  534. return 1;
  535. }
  536. /*
  537. * This is like jiffies_to_msec, but works for 64-bit values.
  538. */
  539. static unsigned long long dm_jiffies_to_msec64(unsigned long long j)
  540. {
  541. unsigned long long result = 0;
  542. unsigned mult;
  543. if (j)
  544. result = jiffies_to_msecs(j & 0x3fffff);
  545. if (j >= 1 << 22) {
  546. mult = jiffies_to_msecs(1 << 22);
  547. result += (unsigned long long)mult * (unsigned long long)jiffies_to_msecs((j >> 22) & 0x3fffff);
  548. }
  549. if (j >= 1ULL << 44)
  550. result += (unsigned long long)mult * (unsigned long long)mult * (unsigned long long)jiffies_to_msecs(j >> 44);
  551. return result;
  552. }
  553. static int dm_stats_print(struct dm_stats *stats, int id,
  554. size_t idx_start, size_t idx_len,
  555. bool clear, char *result, unsigned maxlen)
  556. {
  557. unsigned sz = 0;
  558. struct dm_stat *s;
  559. size_t x;
  560. sector_t start, end, step;
  561. size_t idx_end;
  562. struct dm_stat_shared *shared;
  563. /*
  564. * Output format:
  565. * <start_sector>+<length> counters
  566. */
  567. mutex_lock(&stats->mutex);
  568. s = __dm_stats_find(stats, id);
  569. if (!s) {
  570. mutex_unlock(&stats->mutex);
  571. return -ENOENT;
  572. }
  573. idx_end = idx_start + idx_len;
  574. if (idx_end < idx_start ||
  575. idx_end > s->n_entries)
  576. idx_end = s->n_entries;
  577. if (idx_start > idx_end)
  578. idx_start = idx_end;
  579. step = s->step;
  580. start = s->start + (step * idx_start);
  581. for (x = idx_start; x < idx_end; x++, start = end) {
  582. shared = &s->stat_shared[x];
  583. end = start + step;
  584. if (unlikely(end > s->end))
  585. end = s->end;
  586. __dm_stat_init_temporary_percpu_totals(shared, s, x);
  587. DMEMIT("%llu+%llu %llu %llu %llu %llu %llu %llu %llu %llu %d %llu %llu %llu %llu\n",
  588. (unsigned long long)start,
  589. (unsigned long long)step,
  590. shared->tmp.ios[READ],
  591. shared->tmp.merges[READ],
  592. shared->tmp.sectors[READ],
  593. dm_jiffies_to_msec64(shared->tmp.ticks[READ]),
  594. shared->tmp.ios[WRITE],
  595. shared->tmp.merges[WRITE],
  596. shared->tmp.sectors[WRITE],
  597. dm_jiffies_to_msec64(shared->tmp.ticks[WRITE]),
  598. dm_stat_in_flight(shared),
  599. dm_jiffies_to_msec64(shared->tmp.io_ticks_total),
  600. dm_jiffies_to_msec64(shared->tmp.time_in_queue),
  601. dm_jiffies_to_msec64(shared->tmp.io_ticks[READ]),
  602. dm_jiffies_to_msec64(shared->tmp.io_ticks[WRITE]));
  603. if (unlikely(sz + 1 >= maxlen))
  604. goto buffer_overflow;
  605. }
  606. if (clear)
  607. __dm_stat_clear(s, idx_start, idx_end, false);
  608. buffer_overflow:
  609. mutex_unlock(&stats->mutex);
  610. return 1;
  611. }
  612. static int dm_stats_set_aux(struct dm_stats *stats, int id, const char *aux_data)
  613. {
  614. struct dm_stat *s;
  615. const char *new_aux_data;
  616. mutex_lock(&stats->mutex);
  617. s = __dm_stats_find(stats, id);
  618. if (!s) {
  619. mutex_unlock(&stats->mutex);
  620. return -ENOENT;
  621. }
  622. new_aux_data = kstrdup(aux_data, GFP_KERNEL);
  623. if (!new_aux_data) {
  624. mutex_unlock(&stats->mutex);
  625. return -ENOMEM;
  626. }
  627. kfree(s->aux_data);
  628. s->aux_data = new_aux_data;
  629. mutex_unlock(&stats->mutex);
  630. return 0;
  631. }
  632. static int message_stats_create(struct mapped_device *md,
  633. unsigned argc, char **argv,
  634. char *result, unsigned maxlen)
  635. {
  636. int id;
  637. char dummy;
  638. unsigned long long start, end, len, step;
  639. unsigned divisor;
  640. const char *program_id, *aux_data;
  641. /*
  642. * Input format:
  643. * <range> <step> [<program_id> [<aux_data>]]
  644. */
  645. if (argc < 3 || argc > 5)
  646. return -EINVAL;
  647. if (!strcmp(argv[1], "-")) {
  648. start = 0;
  649. len = dm_get_size(md);
  650. if (!len)
  651. len = 1;
  652. } else if (sscanf(argv[1], "%llu+%llu%c", &start, &len, &dummy) != 2 ||
  653. start != (sector_t)start || len != (sector_t)len)
  654. return -EINVAL;
  655. end = start + len;
  656. if (start >= end)
  657. return -EINVAL;
  658. if (sscanf(argv[2], "/%u%c", &divisor, &dummy) == 1) {
  659. step = end - start;
  660. if (do_div(step, divisor))
  661. step++;
  662. if (!step)
  663. step = 1;
  664. } else if (sscanf(argv[2], "%llu%c", &step, &dummy) != 1 ||
  665. step != (sector_t)step || !step)
  666. return -EINVAL;
  667. program_id = "-";
  668. aux_data = "-";
  669. if (argc > 3)
  670. program_id = argv[3];
  671. if (argc > 4)
  672. aux_data = argv[4];
  673. /*
  674. * If a buffer overflow happens after we created the region,
  675. * it's too late (the userspace would retry with a larger
  676. * buffer, but the region id that caused the overflow is already
  677. * leaked). So we must detect buffer overflow in advance.
  678. */
  679. snprintf(result, maxlen, "%d", INT_MAX);
  680. if (dm_message_test_buffer_overflow(result, maxlen))
  681. return 1;
  682. id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data,
  683. dm_internal_suspend, dm_internal_resume, md);
  684. if (id < 0)
  685. return id;
  686. snprintf(result, maxlen, "%d", id);
  687. return 1;
  688. }
  689. static int message_stats_delete(struct mapped_device *md,
  690. unsigned argc, char **argv)
  691. {
  692. int id;
  693. char dummy;
  694. if (argc != 2)
  695. return -EINVAL;
  696. if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
  697. return -EINVAL;
  698. return dm_stats_delete(dm_get_stats(md), id);
  699. }
  700. static int message_stats_clear(struct mapped_device *md,
  701. unsigned argc, char **argv)
  702. {
  703. int id;
  704. char dummy;
  705. if (argc != 2)
  706. return -EINVAL;
  707. if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
  708. return -EINVAL;
  709. return dm_stats_clear(dm_get_stats(md), id);
  710. }
  711. static int message_stats_list(struct mapped_device *md,
  712. unsigned argc, char **argv,
  713. char *result, unsigned maxlen)
  714. {
  715. int r;
  716. const char *program = NULL;
  717. if (argc < 1 || argc > 2)
  718. return -EINVAL;
  719. if (argc > 1) {
  720. program = kstrdup(argv[1], GFP_KERNEL);
  721. if (!program)
  722. return -ENOMEM;
  723. }
  724. r = dm_stats_list(dm_get_stats(md), program, result, maxlen);
  725. kfree(program);
  726. return r;
  727. }
  728. static int message_stats_print(struct mapped_device *md,
  729. unsigned argc, char **argv, bool clear,
  730. char *result, unsigned maxlen)
  731. {
  732. int id;
  733. char dummy;
  734. unsigned long idx_start = 0, idx_len = ULONG_MAX;
  735. if (argc != 2 && argc != 4)
  736. return -EINVAL;
  737. if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
  738. return -EINVAL;
  739. if (argc > 3) {
  740. if (strcmp(argv[2], "-") &&
  741. sscanf(argv[2], "%lu%c", &idx_start, &dummy) != 1)
  742. return -EINVAL;
  743. if (strcmp(argv[3], "-") &&
  744. sscanf(argv[3], "%lu%c", &idx_len, &dummy) != 1)
  745. return -EINVAL;
  746. }
  747. return dm_stats_print(dm_get_stats(md), id, idx_start, idx_len, clear,
  748. result, maxlen);
  749. }
  750. static int message_stats_set_aux(struct mapped_device *md,
  751. unsigned argc, char **argv)
  752. {
  753. int id;
  754. char dummy;
  755. if (argc != 3)
  756. return -EINVAL;
  757. if (sscanf(argv[1], "%d%c", &id, &dummy) != 1 || id < 0)
  758. return -EINVAL;
  759. return dm_stats_set_aux(dm_get_stats(md), id, argv[2]);
  760. }
  761. int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv,
  762. char *result, unsigned maxlen)
  763. {
  764. int r;
  765. if (dm_request_based(md)) {
  766. DMWARN("Statistics are only supported for bio-based devices");
  767. return -EOPNOTSUPP;
  768. }
  769. /* All messages here must start with '@' */
  770. if (!strcasecmp(argv[0], "@stats_create"))
  771. r = message_stats_create(md, argc, argv, result, maxlen);
  772. else if (!strcasecmp(argv[0], "@stats_delete"))
  773. r = message_stats_delete(md, argc, argv);
  774. else if (!strcasecmp(argv[0], "@stats_clear"))
  775. r = message_stats_clear(md, argc, argv);
  776. else if (!strcasecmp(argv[0], "@stats_list"))
  777. r = message_stats_list(md, argc, argv, result, maxlen);
  778. else if (!strcasecmp(argv[0], "@stats_print"))
  779. r = message_stats_print(md, argc, argv, false, result, maxlen);
  780. else if (!strcasecmp(argv[0], "@stats_print_clear"))
  781. r = message_stats_print(md, argc, argv, true, result, maxlen);
  782. else if (!strcasecmp(argv[0], "@stats_set_aux"))
  783. r = message_stats_set_aux(md, argc, argv);
  784. else
  785. return 2; /* this wasn't a stats message */
  786. if (r == -EINVAL)
  787. DMWARN("Invalid parameters for message %s", argv[0]);
  788. return r;
  789. }
  790. int __init dm_statistics_init(void)
  791. {
  792. dm_stat_need_rcu_barrier = 0;
  793. return 0;
  794. }
  795. void dm_statistics_exit(void)
  796. {
  797. if (dm_stat_need_rcu_barrier)
  798. rcu_barrier();
  799. if (WARN_ON(shared_memory_amount))
  800. DMCRIT("shared_memory_amount leaked: %lu", shared_memory_amount);
  801. }
  802. module_param_named(stats_current_allocated_bytes, shared_memory_amount, ulong, S_IRUGO);
  803. MODULE_PARM_DESC(stats_current_allocated_bytes, "Memory currently used by statistics");