null_blk.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635
  1. #include <linux/module.h>
  2. #include <linux/moduleparam.h>
  3. #include <linux/sched.h>
  4. #include <linux/fs.h>
  5. #include <linux/blkdev.h>
  6. #include <linux/init.h>
  7. #include <linux/slab.h>
  8. #include <linux/blk-mq.h>
  9. #include <linux/hrtimer.h>
  10. struct nullb_cmd {
  11. struct list_head list;
  12. struct llist_node ll_list;
  13. struct call_single_data csd;
  14. struct request *rq;
  15. struct bio *bio;
  16. unsigned int tag;
  17. struct nullb_queue *nq;
  18. };
  19. struct nullb_queue {
  20. unsigned long *tag_map;
  21. wait_queue_head_t wait;
  22. unsigned int queue_depth;
  23. struct nullb_cmd *cmds;
  24. };
  25. struct nullb {
  26. struct list_head list;
  27. unsigned int index;
  28. struct request_queue *q;
  29. struct gendisk *disk;
  30. struct hrtimer timer;
  31. unsigned int queue_depth;
  32. spinlock_t lock;
  33. struct nullb_queue *queues;
  34. unsigned int nr_queues;
  35. };
  36. static LIST_HEAD(nullb_list);
  37. static struct mutex lock;
  38. static int null_major;
  39. static int nullb_indexes;
  40. struct completion_queue {
  41. struct llist_head list;
  42. struct hrtimer timer;
  43. };
  44. /*
  45. * These are per-cpu for now, they will need to be configured by the
  46. * complete_queues parameter and appropriately mapped.
  47. */
  48. static DEFINE_PER_CPU(struct completion_queue, completion_queues);
  49. enum {
  50. NULL_IRQ_NONE = 0,
  51. NULL_IRQ_SOFTIRQ = 1,
  52. NULL_IRQ_TIMER = 2,
  53. NULL_Q_BIO = 0,
  54. NULL_Q_RQ = 1,
  55. NULL_Q_MQ = 2,
  56. };
  57. static int submit_queues = 1;
  58. module_param(submit_queues, int, S_IRUGO);
  59. MODULE_PARM_DESC(submit_queues, "Number of submission queues");
  60. static int home_node = NUMA_NO_NODE;
  61. module_param(home_node, int, S_IRUGO);
  62. MODULE_PARM_DESC(home_node, "Home node for the device");
  63. static int queue_mode = NULL_Q_MQ;
  64. module_param(queue_mode, int, S_IRUGO);
  65. MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)");
  66. static int gb = 250;
  67. module_param(gb, int, S_IRUGO);
  68. MODULE_PARM_DESC(gb, "Size in GB");
  69. static int bs = 512;
  70. module_param(bs, int, S_IRUGO);
  71. MODULE_PARM_DESC(bs, "Block size (in bytes)");
  72. static int nr_devices = 2;
  73. module_param(nr_devices, int, S_IRUGO);
  74. MODULE_PARM_DESC(nr_devices, "Number of devices to register");
  75. static int irqmode = NULL_IRQ_SOFTIRQ;
  76. module_param(irqmode, int, S_IRUGO);
  77. MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
  78. static int completion_nsec = 10000;
  79. module_param(completion_nsec, int, S_IRUGO);
  80. MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
  81. static int hw_queue_depth = 64;
  82. module_param(hw_queue_depth, int, S_IRUGO);
  83. MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
  84. static bool use_per_node_hctx = true;
  85. module_param(use_per_node_hctx, bool, S_IRUGO);
  86. MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true");
  87. static void put_tag(struct nullb_queue *nq, unsigned int tag)
  88. {
  89. clear_bit_unlock(tag, nq->tag_map);
  90. if (waitqueue_active(&nq->wait))
  91. wake_up(&nq->wait);
  92. }
  93. static unsigned int get_tag(struct nullb_queue *nq)
  94. {
  95. unsigned int tag;
  96. do {
  97. tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
  98. if (tag >= nq->queue_depth)
  99. return -1U;
  100. } while (test_and_set_bit_lock(tag, nq->tag_map));
  101. return tag;
  102. }
  103. static void free_cmd(struct nullb_cmd *cmd)
  104. {
  105. put_tag(cmd->nq, cmd->tag);
  106. }
  107. static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
  108. {
  109. struct nullb_cmd *cmd;
  110. unsigned int tag;
  111. tag = get_tag(nq);
  112. if (tag != -1U) {
  113. cmd = &nq->cmds[tag];
  114. cmd->tag = tag;
  115. cmd->nq = nq;
  116. return cmd;
  117. }
  118. return NULL;
  119. }
  120. static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
  121. {
  122. struct nullb_cmd *cmd;
  123. DEFINE_WAIT(wait);
  124. cmd = __alloc_cmd(nq);
  125. if (cmd || !can_wait)
  126. return cmd;
  127. do {
  128. prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
  129. cmd = __alloc_cmd(nq);
  130. if (cmd)
  131. break;
  132. io_schedule();
  133. } while (1);
  134. finish_wait(&nq->wait, &wait);
  135. return cmd;
  136. }
  137. static void end_cmd(struct nullb_cmd *cmd)
  138. {
  139. if (cmd->rq) {
  140. if (queue_mode == NULL_Q_MQ)
  141. blk_mq_end_io(cmd->rq, 0);
  142. else {
  143. INIT_LIST_HEAD(&cmd->rq->queuelist);
  144. blk_end_request_all(cmd->rq, 0);
  145. }
  146. } else if (cmd->bio)
  147. bio_endio(cmd->bio, 0);
  148. if (queue_mode != NULL_Q_MQ)
  149. free_cmd(cmd);
  150. }
  151. static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
  152. {
  153. struct completion_queue *cq;
  154. struct llist_node *entry;
  155. struct nullb_cmd *cmd;
  156. cq = &per_cpu(completion_queues, smp_processor_id());
  157. while ((entry = llist_del_all(&cq->list)) != NULL) {
  158. do {
  159. cmd = container_of(entry, struct nullb_cmd, ll_list);
  160. end_cmd(cmd);
  161. entry = entry->next;
  162. } while (entry);
  163. }
  164. return HRTIMER_NORESTART;
  165. }
  166. static void null_cmd_end_timer(struct nullb_cmd *cmd)
  167. {
  168. struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());
  169. cmd->ll_list.next = NULL;
  170. if (llist_add(&cmd->ll_list, &cq->list)) {
  171. ktime_t kt = ktime_set(0, completion_nsec);
  172. hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL);
  173. }
  174. put_cpu();
  175. }
  176. static void null_softirq_done_fn(struct request *rq)
  177. {
  178. blk_end_request_all(rq, 0);
  179. }
  180. #ifdef CONFIG_SMP
  181. static void null_ipi_cmd_end_io(void *data)
  182. {
  183. struct completion_queue *cq;
  184. struct llist_node *entry, *next;
  185. struct nullb_cmd *cmd;
  186. cq = &per_cpu(completion_queues, smp_processor_id());
  187. entry = llist_del_all(&cq->list);
  188. while (entry) {
  189. next = entry->next;
  190. cmd = llist_entry(entry, struct nullb_cmd, ll_list);
  191. end_cmd(cmd);
  192. entry = next;
  193. }
  194. }
  195. static void null_cmd_end_ipi(struct nullb_cmd *cmd)
  196. {
  197. struct call_single_data *data = &cmd->csd;
  198. int cpu = get_cpu();
  199. struct completion_queue *cq = &per_cpu(completion_queues, cpu);
  200. cmd->ll_list.next = NULL;
  201. if (llist_add(&cmd->ll_list, &cq->list)) {
  202. data->func = null_ipi_cmd_end_io;
  203. data->flags = 0;
  204. __smp_call_function_single(cpu, data, 0);
  205. }
  206. put_cpu();
  207. }
  208. #endif /* CONFIG_SMP */
  209. static inline void null_handle_cmd(struct nullb_cmd *cmd)
  210. {
  211. /* Complete IO by inline, softirq or timer */
  212. switch (irqmode) {
  213. case NULL_IRQ_NONE:
  214. end_cmd(cmd);
  215. break;
  216. case NULL_IRQ_SOFTIRQ:
  217. #ifdef CONFIG_SMP
  218. null_cmd_end_ipi(cmd);
  219. #else
  220. end_cmd(cmd);
  221. #endif
  222. break;
  223. case NULL_IRQ_TIMER:
  224. null_cmd_end_timer(cmd);
  225. break;
  226. }
  227. }
  228. static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
  229. {
  230. int index = 0;
  231. if (nullb->nr_queues != 1)
  232. index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
  233. return &nullb->queues[index];
  234. }
  235. static void null_queue_bio(struct request_queue *q, struct bio *bio)
  236. {
  237. struct nullb *nullb = q->queuedata;
  238. struct nullb_queue *nq = nullb_to_queue(nullb);
  239. struct nullb_cmd *cmd;
  240. cmd = alloc_cmd(nq, 1);
  241. cmd->bio = bio;
  242. null_handle_cmd(cmd);
  243. }
  244. static int null_rq_prep_fn(struct request_queue *q, struct request *req)
  245. {
  246. struct nullb *nullb = q->queuedata;
  247. struct nullb_queue *nq = nullb_to_queue(nullb);
  248. struct nullb_cmd *cmd;
  249. cmd = alloc_cmd(nq, 0);
  250. if (cmd) {
  251. cmd->rq = req;
  252. req->special = cmd;
  253. return BLKPREP_OK;
  254. }
  255. return BLKPREP_DEFER;
  256. }
  257. static void null_request_fn(struct request_queue *q)
  258. {
  259. struct request *rq;
  260. while ((rq = blk_fetch_request(q)) != NULL) {
  261. struct nullb_cmd *cmd = rq->special;
  262. spin_unlock_irq(q->queue_lock);
  263. null_handle_cmd(cmd);
  264. spin_lock_irq(q->queue_lock);
  265. }
  266. }
  267. static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
  268. {
  269. struct nullb_cmd *cmd = rq->special;
  270. cmd->rq = rq;
  271. cmd->nq = hctx->driver_data;
  272. null_handle_cmd(cmd);
  273. return BLK_MQ_RQ_QUEUE_OK;
  274. }
  275. static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index)
  276. {
  277. return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL,
  278. hctx_index);
  279. }
  280. static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index)
  281. {
  282. kfree(hctx);
  283. }
  284. static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
  285. unsigned int index)
  286. {
  287. struct nullb *nullb = data;
  288. struct nullb_queue *nq = &nullb->queues[index];
  289. init_waitqueue_head(&nq->wait);
  290. nq->queue_depth = nullb->queue_depth;
  291. nullb->nr_queues++;
  292. hctx->driver_data = nq;
  293. return 0;
  294. }
  295. static struct blk_mq_ops null_mq_ops = {
  296. .queue_rq = null_queue_rq,
  297. .map_queue = blk_mq_map_queue,
  298. .init_hctx = null_init_hctx,
  299. };
  300. static struct blk_mq_reg null_mq_reg = {
  301. .ops = &null_mq_ops,
  302. .queue_depth = 64,
  303. .cmd_size = sizeof(struct nullb_cmd),
  304. .flags = BLK_MQ_F_SHOULD_MERGE,
  305. };
  306. static void null_del_dev(struct nullb *nullb)
  307. {
  308. list_del_init(&nullb->list);
  309. del_gendisk(nullb->disk);
  310. if (queue_mode == NULL_Q_MQ)
  311. blk_mq_free_queue(nullb->q);
  312. else
  313. blk_cleanup_queue(nullb->q);
  314. put_disk(nullb->disk);
  315. kfree(nullb);
  316. }
  317. static int null_open(struct block_device *bdev, fmode_t mode)
  318. {
  319. return 0;
  320. }
  321. static void null_release(struct gendisk *disk, fmode_t mode)
  322. {
  323. }
  324. static const struct block_device_operations null_fops = {
  325. .owner = THIS_MODULE,
  326. .open = null_open,
  327. .release = null_release,
  328. };
  329. static int setup_commands(struct nullb_queue *nq)
  330. {
  331. struct nullb_cmd *cmd;
  332. int i, tag_size;
  333. nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL);
  334. if (!nq->cmds)
  335. return 1;
  336. tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
  337. nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL);
  338. if (!nq->tag_map) {
  339. kfree(nq->cmds);
  340. return 1;
  341. }
  342. for (i = 0; i < nq->queue_depth; i++) {
  343. cmd = &nq->cmds[i];
  344. INIT_LIST_HEAD(&cmd->list);
  345. cmd->ll_list.next = NULL;
  346. cmd->tag = -1U;
  347. }
  348. return 0;
  349. }
  350. static void cleanup_queue(struct nullb_queue *nq)
  351. {
  352. kfree(nq->tag_map);
  353. kfree(nq->cmds);
  354. }
  355. static void cleanup_queues(struct nullb *nullb)
  356. {
  357. int i;
  358. for (i = 0; i < nullb->nr_queues; i++)
  359. cleanup_queue(&nullb->queues[i]);
  360. kfree(nullb->queues);
  361. }
  362. static int setup_queues(struct nullb *nullb)
  363. {
  364. struct nullb_queue *nq;
  365. int i;
  366. nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL);
  367. if (!nullb->queues)
  368. return 1;
  369. nullb->nr_queues = 0;
  370. nullb->queue_depth = hw_queue_depth;
  371. if (queue_mode == NULL_Q_MQ)
  372. return 0;
  373. for (i = 0; i < submit_queues; i++) {
  374. nq = &nullb->queues[i];
  375. init_waitqueue_head(&nq->wait);
  376. nq->queue_depth = hw_queue_depth;
  377. if (setup_commands(nq))
  378. break;
  379. nullb->nr_queues++;
  380. }
  381. if (i == submit_queues)
  382. return 0;
  383. cleanup_queues(nullb);
  384. return 1;
  385. }
  386. static int null_add_dev(void)
  387. {
  388. struct gendisk *disk;
  389. struct nullb *nullb;
  390. sector_t size;
  391. nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
  392. if (!nullb)
  393. return -ENOMEM;
  394. spin_lock_init(&nullb->lock);
  395. if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
  396. submit_queues = nr_online_nodes;
  397. if (setup_queues(nullb))
  398. goto err;
  399. if (queue_mode == NULL_Q_MQ) {
  400. null_mq_reg.numa_node = home_node;
  401. null_mq_reg.queue_depth = hw_queue_depth;
  402. null_mq_reg.nr_hw_queues = submit_queues;
  403. if (use_per_node_hctx) {
  404. null_mq_reg.ops->alloc_hctx = null_alloc_hctx;
  405. null_mq_reg.ops->free_hctx = null_free_hctx;
  406. } else {
  407. null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue;
  408. null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue;
  409. }
  410. nullb->q = blk_mq_init_queue(&null_mq_reg, nullb);
  411. } else if (queue_mode == NULL_Q_BIO) {
  412. nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
  413. blk_queue_make_request(nullb->q, null_queue_bio);
  414. } else {
  415. nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
  416. blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
  417. if (nullb->q)
  418. blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
  419. }
  420. if (!nullb->q)
  421. goto queue_fail;
  422. nullb->q->queuedata = nullb;
  423. queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
  424. disk = nullb->disk = alloc_disk_node(1, home_node);
  425. if (!disk) {
  426. queue_fail:
  427. if (queue_mode == NULL_Q_MQ)
  428. blk_mq_free_queue(nullb->q);
  429. else
  430. blk_cleanup_queue(nullb->q);
  431. cleanup_queues(nullb);
  432. err:
  433. kfree(nullb);
  434. return -ENOMEM;
  435. }
  436. mutex_lock(&lock);
  437. list_add_tail(&nullb->list, &nullb_list);
  438. nullb->index = nullb_indexes++;
  439. mutex_unlock(&lock);
  440. blk_queue_logical_block_size(nullb->q, bs);
  441. blk_queue_physical_block_size(nullb->q, bs);
  442. size = gb * 1024 * 1024 * 1024ULL;
  443. sector_div(size, bs);
  444. set_capacity(disk, size);
  445. disk->flags |= GENHD_FL_EXT_DEVT;
  446. disk->major = null_major;
  447. disk->first_minor = nullb->index;
  448. disk->fops = &null_fops;
  449. disk->private_data = nullb;
  450. disk->queue = nullb->q;
  451. sprintf(disk->disk_name, "nullb%d", nullb->index);
  452. add_disk(disk);
  453. return 0;
  454. }
  455. static int __init null_init(void)
  456. {
  457. unsigned int i;
  458. #if !defined(CONFIG_SMP)
  459. if (irqmode == NULL_IRQ_SOFTIRQ) {
  460. pr_warn("null_blk: softirq completions not available.\n");
  461. pr_warn("null_blk: using direct completions.\n");
  462. irqmode = NULL_IRQ_NONE;
  463. }
  464. #endif
  465. if (submit_queues > nr_cpu_ids)
  466. submit_queues = nr_cpu_ids;
  467. else if (!submit_queues)
  468. submit_queues = 1;
  469. mutex_init(&lock);
  470. /* Initialize a separate list for each CPU for issuing softirqs */
  471. for_each_possible_cpu(i) {
  472. struct completion_queue *cq = &per_cpu(completion_queues, i);
  473. init_llist_head(&cq->list);
  474. if (irqmode != NULL_IRQ_TIMER)
  475. continue;
  476. hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  477. cq->timer.function = null_cmd_timer_expired;
  478. }
  479. null_major = register_blkdev(0, "nullb");
  480. if (null_major < 0)
  481. return null_major;
  482. for (i = 0; i < nr_devices; i++) {
  483. if (null_add_dev()) {
  484. unregister_blkdev(null_major, "nullb");
  485. return -EINVAL;
  486. }
  487. }
  488. pr_info("null: module loaded\n");
  489. return 0;
  490. }
  491. static void __exit null_exit(void)
  492. {
  493. struct nullb *nullb;
  494. unregister_blkdev(null_major, "nullb");
  495. mutex_lock(&lock);
  496. while (!list_empty(&nullb_list)) {
  497. nullb = list_entry(nullb_list.next, struct nullb, list);
  498. null_del_dev(nullb);
  499. }
  500. mutex_unlock(&lock);
  501. }
  502. module_init(null_init);
  503. module_exit(null_exit);
  504. MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
  505. MODULE_LICENSE("GPL");