mr.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080
  1. /*
  2. * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/kref.h>
  33. #include <linux/random.h>
  34. #include <linux/debugfs.h>
  35. #include <linux/export.h>
  36. #include <linux/delay.h>
  37. #include <rdma/ib_umem.h>
  38. #include "mlx5_ib.h"
  39. enum {
  40. MAX_PENDING_REG_MR = 8,
  41. };
  42. enum {
  43. MLX5_UMR_ALIGN = 2048
  44. };
  45. static __be64 *mr_align(__be64 *ptr, int align)
  46. {
  47. unsigned long mask = align - 1;
  48. return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
  49. }
  50. static int order2idx(struct mlx5_ib_dev *dev, int order)
  51. {
  52. struct mlx5_mr_cache *cache = &dev->cache;
  53. if (order < cache->ent[0].order)
  54. return 0;
  55. else
  56. return order - cache->ent[0].order;
  57. }
  58. static void reg_mr_callback(int status, void *context)
  59. {
  60. struct mlx5_ib_mr *mr = context;
  61. struct mlx5_ib_dev *dev = mr->dev;
  62. struct mlx5_mr_cache *cache = &dev->cache;
  63. int c = order2idx(dev, mr->order);
  64. struct mlx5_cache_ent *ent = &cache->ent[c];
  65. u8 key;
  66. unsigned long flags;
  67. spin_lock_irqsave(&ent->lock, flags);
  68. ent->pending--;
  69. spin_unlock_irqrestore(&ent->lock, flags);
  70. if (status) {
  71. mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
  72. kfree(mr);
  73. dev->fill_delay = 1;
  74. mod_timer(&dev->delay_timer, jiffies + HZ);
  75. return;
  76. }
  77. if (mr->out.hdr.status) {
  78. mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
  79. mr->out.hdr.status,
  80. be32_to_cpu(mr->out.hdr.syndrome));
  81. kfree(mr);
  82. dev->fill_delay = 1;
  83. mod_timer(&dev->delay_timer, jiffies + HZ);
  84. return;
  85. }
  86. spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags);
  87. key = dev->mdev.priv.mkey_key++;
  88. spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags);
  89. mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
  90. cache->last_add = jiffies;
  91. spin_lock_irqsave(&ent->lock, flags);
  92. list_add_tail(&mr->list, &ent->head);
  93. ent->cur++;
  94. ent->size++;
  95. spin_unlock_irqrestore(&ent->lock, flags);
  96. }
  97. static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
  98. {
  99. struct mlx5_mr_cache *cache = &dev->cache;
  100. struct mlx5_cache_ent *ent = &cache->ent[c];
  101. struct mlx5_create_mkey_mbox_in *in;
  102. struct mlx5_ib_mr *mr;
  103. int npages = 1 << ent->order;
  104. int err = 0;
  105. int i;
  106. in = kzalloc(sizeof(*in), GFP_KERNEL);
  107. if (!in)
  108. return -ENOMEM;
  109. for (i = 0; i < num; i++) {
  110. if (ent->pending >= MAX_PENDING_REG_MR) {
  111. err = -EAGAIN;
  112. break;
  113. }
  114. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  115. if (!mr) {
  116. err = -ENOMEM;
  117. break;
  118. }
  119. mr->order = ent->order;
  120. mr->umred = 1;
  121. mr->dev = dev;
  122. in->seg.status = 1 << 6;
  123. in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
  124. in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
  125. in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
  126. in->seg.log2_page_size = 12;
  127. spin_lock_irq(&ent->lock);
  128. ent->pending++;
  129. spin_unlock_irq(&ent->lock);
  130. mr->start = jiffies;
  131. err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
  132. sizeof(*in), reg_mr_callback,
  133. mr, &mr->out);
  134. if (err) {
  135. mlx5_ib_warn(dev, "create mkey failed %d\n", err);
  136. kfree(mr);
  137. break;
  138. }
  139. }
  140. kfree(in);
  141. return err;
  142. }
  143. static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
  144. {
  145. struct mlx5_mr_cache *cache = &dev->cache;
  146. struct mlx5_cache_ent *ent = &cache->ent[c];
  147. struct mlx5_ib_mr *mr;
  148. int err;
  149. int i;
  150. for (i = 0; i < num; i++) {
  151. spin_lock_irq(&ent->lock);
  152. if (list_empty(&ent->head)) {
  153. spin_unlock_irq(&ent->lock);
  154. return;
  155. }
  156. mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
  157. list_del(&mr->list);
  158. ent->cur--;
  159. ent->size--;
  160. spin_unlock_irq(&ent->lock);
  161. err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
  162. if (err)
  163. mlx5_ib_warn(dev, "failed destroy mkey\n");
  164. else
  165. kfree(mr);
  166. }
  167. }
  168. static ssize_t size_write(struct file *filp, const char __user *buf,
  169. size_t count, loff_t *pos)
  170. {
  171. struct mlx5_cache_ent *ent = filp->private_data;
  172. struct mlx5_ib_dev *dev = ent->dev;
  173. char lbuf[20];
  174. u32 var;
  175. int err;
  176. int c;
  177. if (copy_from_user(lbuf, buf, sizeof(lbuf)))
  178. return -EFAULT;
  179. c = order2idx(dev, ent->order);
  180. lbuf[sizeof(lbuf) - 1] = 0;
  181. if (sscanf(lbuf, "%u", &var) != 1)
  182. return -EINVAL;
  183. if (var < ent->limit)
  184. return -EINVAL;
  185. if (var > ent->size) {
  186. do {
  187. err = add_keys(dev, c, var - ent->size);
  188. if (err && err != -EAGAIN)
  189. return err;
  190. usleep_range(3000, 5000);
  191. } while (err);
  192. } else if (var < ent->size) {
  193. remove_keys(dev, c, ent->size - var);
  194. }
  195. return count;
  196. }
  197. static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
  198. loff_t *pos)
  199. {
  200. struct mlx5_cache_ent *ent = filp->private_data;
  201. char lbuf[20];
  202. int err;
  203. if (*pos)
  204. return 0;
  205. err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
  206. if (err < 0)
  207. return err;
  208. if (copy_to_user(buf, lbuf, err))
  209. return -EFAULT;
  210. *pos += err;
  211. return err;
  212. }
  213. static const struct file_operations size_fops = {
  214. .owner = THIS_MODULE,
  215. .open = simple_open,
  216. .write = size_write,
  217. .read = size_read,
  218. };
  219. static ssize_t limit_write(struct file *filp, const char __user *buf,
  220. size_t count, loff_t *pos)
  221. {
  222. struct mlx5_cache_ent *ent = filp->private_data;
  223. struct mlx5_ib_dev *dev = ent->dev;
  224. char lbuf[20];
  225. u32 var;
  226. int err;
  227. int c;
  228. if (copy_from_user(lbuf, buf, sizeof(lbuf)))
  229. return -EFAULT;
  230. c = order2idx(dev, ent->order);
  231. lbuf[sizeof(lbuf) - 1] = 0;
  232. if (sscanf(lbuf, "%u", &var) != 1)
  233. return -EINVAL;
  234. if (var > ent->size)
  235. return -EINVAL;
  236. ent->limit = var;
  237. if (ent->cur < ent->limit) {
  238. err = add_keys(dev, c, 2 * ent->limit - ent->cur);
  239. if (err)
  240. return err;
  241. }
  242. return count;
  243. }
  244. static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
  245. loff_t *pos)
  246. {
  247. struct mlx5_cache_ent *ent = filp->private_data;
  248. char lbuf[20];
  249. int err;
  250. if (*pos)
  251. return 0;
  252. err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
  253. if (err < 0)
  254. return err;
  255. if (copy_to_user(buf, lbuf, err))
  256. return -EFAULT;
  257. *pos += err;
  258. return err;
  259. }
  260. static const struct file_operations limit_fops = {
  261. .owner = THIS_MODULE,
  262. .open = simple_open,
  263. .write = limit_write,
  264. .read = limit_read,
  265. };
  266. static int someone_adding(struct mlx5_mr_cache *cache)
  267. {
  268. int i;
  269. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  270. if (cache->ent[i].cur < cache->ent[i].limit)
  271. return 1;
  272. }
  273. return 0;
  274. }
  275. static void __cache_work_func(struct mlx5_cache_ent *ent)
  276. {
  277. struct mlx5_ib_dev *dev = ent->dev;
  278. struct mlx5_mr_cache *cache = &dev->cache;
  279. int i = order2idx(dev, ent->order);
  280. int err;
  281. if (cache->stopped)
  282. return;
  283. ent = &dev->cache.ent[i];
  284. if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
  285. err = add_keys(dev, i, 1);
  286. if (ent->cur < 2 * ent->limit) {
  287. if (err == -EAGAIN) {
  288. mlx5_ib_dbg(dev, "returned eagain, order %d\n",
  289. i + 2);
  290. queue_delayed_work(cache->wq, &ent->dwork,
  291. msecs_to_jiffies(3));
  292. } else if (err) {
  293. mlx5_ib_warn(dev, "command failed order %d, err %d\n",
  294. i + 2, err);
  295. queue_delayed_work(cache->wq, &ent->dwork,
  296. msecs_to_jiffies(1000));
  297. } else {
  298. queue_work(cache->wq, &ent->work);
  299. }
  300. }
  301. } else if (ent->cur > 2 * ent->limit) {
  302. if (!someone_adding(cache) &&
  303. time_after(jiffies, cache->last_add + 300 * HZ)) {
  304. remove_keys(dev, i, 1);
  305. if (ent->cur > ent->limit)
  306. queue_work(cache->wq, &ent->work);
  307. } else {
  308. queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
  309. }
  310. }
  311. }
  312. static void delayed_cache_work_func(struct work_struct *work)
  313. {
  314. struct mlx5_cache_ent *ent;
  315. ent = container_of(work, struct mlx5_cache_ent, dwork.work);
  316. __cache_work_func(ent);
  317. }
  318. static void cache_work_func(struct work_struct *work)
  319. {
  320. struct mlx5_cache_ent *ent;
  321. ent = container_of(work, struct mlx5_cache_ent, work);
  322. __cache_work_func(ent);
  323. }
  324. static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
  325. {
  326. struct mlx5_mr_cache *cache = &dev->cache;
  327. struct mlx5_ib_mr *mr = NULL;
  328. struct mlx5_cache_ent *ent;
  329. int c;
  330. int i;
  331. c = order2idx(dev, order);
  332. if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
  333. mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
  334. return NULL;
  335. }
  336. for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
  337. ent = &cache->ent[i];
  338. mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
  339. spin_lock_irq(&ent->lock);
  340. if (!list_empty(&ent->head)) {
  341. mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
  342. list);
  343. list_del(&mr->list);
  344. ent->cur--;
  345. spin_unlock_irq(&ent->lock);
  346. if (ent->cur < ent->limit)
  347. queue_work(cache->wq, &ent->work);
  348. break;
  349. }
  350. spin_unlock_irq(&ent->lock);
  351. queue_work(cache->wq, &ent->work);
  352. if (mr)
  353. break;
  354. }
  355. if (!mr)
  356. cache->ent[c].miss++;
  357. return mr;
  358. }
  359. static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  360. {
  361. struct mlx5_mr_cache *cache = &dev->cache;
  362. struct mlx5_cache_ent *ent;
  363. int shrink = 0;
  364. int c;
  365. c = order2idx(dev, mr->order);
  366. if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
  367. mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
  368. return;
  369. }
  370. ent = &cache->ent[c];
  371. spin_lock_irq(&ent->lock);
  372. list_add_tail(&mr->list, &ent->head);
  373. ent->cur++;
  374. if (ent->cur > 2 * ent->limit)
  375. shrink = 1;
  376. spin_unlock_irq(&ent->lock);
  377. if (shrink)
  378. queue_work(cache->wq, &ent->work);
  379. }
  380. static void clean_keys(struct mlx5_ib_dev *dev, int c)
  381. {
  382. struct mlx5_mr_cache *cache = &dev->cache;
  383. struct mlx5_cache_ent *ent = &cache->ent[c];
  384. struct mlx5_ib_mr *mr;
  385. int err;
  386. cancel_delayed_work(&ent->dwork);
  387. while (1) {
  388. spin_lock_irq(&ent->lock);
  389. if (list_empty(&ent->head)) {
  390. spin_unlock_irq(&ent->lock);
  391. return;
  392. }
  393. mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
  394. list_del(&mr->list);
  395. ent->cur--;
  396. ent->size--;
  397. spin_unlock_irq(&ent->lock);
  398. err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
  399. if (err)
  400. mlx5_ib_warn(dev, "failed destroy mkey\n");
  401. else
  402. kfree(mr);
  403. }
  404. }
  405. static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
  406. {
  407. struct mlx5_mr_cache *cache = &dev->cache;
  408. struct mlx5_cache_ent *ent;
  409. int i;
  410. if (!mlx5_debugfs_root)
  411. return 0;
  412. cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root);
  413. if (!cache->root)
  414. return -ENOMEM;
  415. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  416. ent = &cache->ent[i];
  417. sprintf(ent->name, "%d", ent->order);
  418. ent->dir = debugfs_create_dir(ent->name, cache->root);
  419. if (!ent->dir)
  420. return -ENOMEM;
  421. ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
  422. &size_fops);
  423. if (!ent->fsize)
  424. return -ENOMEM;
  425. ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
  426. &limit_fops);
  427. if (!ent->flimit)
  428. return -ENOMEM;
  429. ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
  430. &ent->cur);
  431. if (!ent->fcur)
  432. return -ENOMEM;
  433. ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
  434. &ent->miss);
  435. if (!ent->fmiss)
  436. return -ENOMEM;
  437. }
  438. return 0;
  439. }
  440. static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
  441. {
  442. if (!mlx5_debugfs_root)
  443. return;
  444. debugfs_remove_recursive(dev->cache.root);
  445. }
  446. static void delay_time_func(unsigned long ctx)
  447. {
  448. struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
  449. dev->fill_delay = 0;
  450. }
  451. int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
  452. {
  453. struct mlx5_mr_cache *cache = &dev->cache;
  454. struct mlx5_cache_ent *ent;
  455. int limit;
  456. int err;
  457. int i;
  458. cache->wq = create_singlethread_workqueue("mkey_cache");
  459. if (!cache->wq) {
  460. mlx5_ib_warn(dev, "failed to create work queue\n");
  461. return -ENOMEM;
  462. }
  463. setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
  464. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  465. INIT_LIST_HEAD(&cache->ent[i].head);
  466. spin_lock_init(&cache->ent[i].lock);
  467. ent = &cache->ent[i];
  468. INIT_LIST_HEAD(&ent->head);
  469. spin_lock_init(&ent->lock);
  470. ent->order = i + 2;
  471. ent->dev = dev;
  472. if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE)
  473. limit = dev->mdev.profile->mr_cache[i].limit;
  474. else
  475. limit = 0;
  476. INIT_WORK(&ent->work, cache_work_func);
  477. INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
  478. ent->limit = limit;
  479. queue_work(cache->wq, &ent->work);
  480. }
  481. err = mlx5_mr_cache_debugfs_init(dev);
  482. if (err)
  483. mlx5_ib_warn(dev, "cache debugfs failure\n");
  484. return 0;
  485. }
  486. int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
  487. {
  488. int i;
  489. dev->cache.stopped = 1;
  490. flush_workqueue(dev->cache.wq);
  491. mlx5_mr_cache_debugfs_cleanup(dev);
  492. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
  493. clean_keys(dev, i);
  494. destroy_workqueue(dev->cache.wq);
  495. del_timer_sync(&dev->delay_timer);
  496. return 0;
  497. }
  498. struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
  499. {
  500. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  501. struct mlx5_core_dev *mdev = &dev->mdev;
  502. struct mlx5_create_mkey_mbox_in *in;
  503. struct mlx5_mkey_seg *seg;
  504. struct mlx5_ib_mr *mr;
  505. int err;
  506. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  507. if (!mr)
  508. return ERR_PTR(-ENOMEM);
  509. in = kzalloc(sizeof(*in), GFP_KERNEL);
  510. if (!in) {
  511. err = -ENOMEM;
  512. goto err_free;
  513. }
  514. seg = &in->seg;
  515. seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
  516. seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
  517. seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
  518. seg->start_addr = 0;
  519. err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
  520. NULL);
  521. if (err)
  522. goto err_in;
  523. kfree(in);
  524. mr->ibmr.lkey = mr->mmr.key;
  525. mr->ibmr.rkey = mr->mmr.key;
  526. mr->umem = NULL;
  527. return &mr->ibmr;
  528. err_in:
  529. kfree(in);
  530. err_free:
  531. kfree(mr);
  532. return ERR_PTR(err);
  533. }
  534. static int get_octo_len(u64 addr, u64 len, int page_size)
  535. {
  536. u64 offset;
  537. int npages;
  538. offset = addr & (page_size - 1);
  539. npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
  540. return (npages + 1) / 2;
  541. }
  542. static int use_umr(int order)
  543. {
  544. return order <= 17;
  545. }
  546. static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
  547. struct ib_sge *sg, u64 dma, int n, u32 key,
  548. int page_shift, u64 virt_addr, u64 len,
  549. int access_flags)
  550. {
  551. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  552. struct ib_mr *mr = dev->umrc.mr;
  553. sg->addr = dma;
  554. sg->length = ALIGN(sizeof(u64) * n, 64);
  555. sg->lkey = mr->lkey;
  556. wr->next = NULL;
  557. wr->send_flags = 0;
  558. wr->sg_list = sg;
  559. if (n)
  560. wr->num_sge = 1;
  561. else
  562. wr->num_sge = 0;
  563. wr->opcode = MLX5_IB_WR_UMR;
  564. wr->wr.fast_reg.page_list_len = n;
  565. wr->wr.fast_reg.page_shift = page_shift;
  566. wr->wr.fast_reg.rkey = key;
  567. wr->wr.fast_reg.iova_start = virt_addr;
  568. wr->wr.fast_reg.length = len;
  569. wr->wr.fast_reg.access_flags = access_flags;
  570. wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
  571. }
  572. static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
  573. struct ib_send_wr *wr, u32 key)
  574. {
  575. wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
  576. wr->opcode = MLX5_IB_WR_UMR;
  577. wr->wr.fast_reg.rkey = key;
  578. }
  579. void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
  580. {
  581. struct mlx5_ib_mr *mr;
  582. struct ib_wc wc;
  583. int err;
  584. while (1) {
  585. err = ib_poll_cq(cq, 1, &wc);
  586. if (err < 0) {
  587. pr_warn("poll cq error %d\n", err);
  588. return;
  589. }
  590. if (err == 0)
  591. break;
  592. mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
  593. mr->status = wc.status;
  594. complete(&mr->done);
  595. }
  596. ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
  597. }
  598. static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
  599. u64 virt_addr, u64 len, int npages,
  600. int page_shift, int order, int access_flags)
  601. {
  602. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  603. struct device *ddev = dev->ib_dev.dma_device;
  604. struct umr_common *umrc = &dev->umrc;
  605. struct ib_send_wr wr, *bad;
  606. struct mlx5_ib_mr *mr;
  607. struct ib_sge sg;
  608. int size = sizeof(u64) * npages;
  609. int err;
  610. int i;
  611. for (i = 0; i < 1; i++) {
  612. mr = alloc_cached_mr(dev, order);
  613. if (mr)
  614. break;
  615. err = add_keys(dev, order2idx(dev, order), 1);
  616. if (err && err != -EAGAIN) {
  617. mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
  618. break;
  619. }
  620. }
  621. if (!mr)
  622. return ERR_PTR(-EAGAIN);
  623. mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
  624. if (!mr->pas) {
  625. err = -ENOMEM;
  626. goto error;
  627. }
  628. mlx5_ib_populate_pas(dev, umem, page_shift,
  629. mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
  630. mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
  631. DMA_TO_DEVICE);
  632. if (dma_mapping_error(ddev, mr->dma)) {
  633. kfree(mr->pas);
  634. err = -ENOMEM;
  635. goto error;
  636. }
  637. memset(&wr, 0, sizeof(wr));
  638. wr.wr_id = (u64)(unsigned long)mr;
  639. prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
  640. /* We serialize polls so one process does not kidnap another's
  641. * completion. This is not a problem since wr is completed in
  642. * around 1 usec
  643. */
  644. down(&umrc->sem);
  645. init_completion(&mr->done);
  646. err = ib_post_send(umrc->qp, &wr, &bad);
  647. if (err) {
  648. mlx5_ib_warn(dev, "post send failed, err %d\n", err);
  649. up(&umrc->sem);
  650. goto error;
  651. }
  652. wait_for_completion(&mr->done);
  653. up(&umrc->sem);
  654. dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
  655. kfree(mr->pas);
  656. if (mr->status != IB_WC_SUCCESS) {
  657. mlx5_ib_warn(dev, "reg umr failed\n");
  658. err = -EFAULT;
  659. goto error;
  660. }
  661. return mr;
  662. error:
  663. free_cached_mr(dev, mr);
  664. return ERR_PTR(err);
  665. }
  666. static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
  667. u64 length, struct ib_umem *umem,
  668. int npages, int page_shift,
  669. int access_flags)
  670. {
  671. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  672. struct mlx5_create_mkey_mbox_in *in;
  673. struct mlx5_ib_mr *mr;
  674. int inlen;
  675. int err;
  676. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  677. if (!mr)
  678. return ERR_PTR(-ENOMEM);
  679. inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
  680. in = mlx5_vzalloc(inlen);
  681. if (!in) {
  682. err = -ENOMEM;
  683. goto err_1;
  684. }
  685. mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
  686. in->seg.flags = convert_access(access_flags) |
  687. MLX5_ACCESS_MODE_MTT;
  688. in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
  689. in->seg.start_addr = cpu_to_be64(virt_addr);
  690. in->seg.len = cpu_to_be64(length);
  691. in->seg.bsfs_octo_size = 0;
  692. in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
  693. in->seg.log2_page_size = page_shift;
  694. in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
  695. in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
  696. 1 << page_shift));
  697. err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL,
  698. NULL, NULL);
  699. if (err) {
  700. mlx5_ib_warn(dev, "create mkey failed\n");
  701. goto err_2;
  702. }
  703. mr->umem = umem;
  704. mlx5_vfree(in);
  705. mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
  706. return mr;
  707. err_2:
  708. mlx5_vfree(in);
  709. err_1:
  710. kfree(mr);
  711. return ERR_PTR(err);
  712. }
  713. struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
  714. u64 virt_addr, int access_flags,
  715. struct ib_udata *udata)
  716. {
  717. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  718. struct mlx5_ib_mr *mr = NULL;
  719. struct ib_umem *umem;
  720. int page_shift;
  721. int npages;
  722. int ncont;
  723. int order;
  724. int err;
  725. mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
  726. start, virt_addr, length);
  727. umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
  728. 0);
  729. if (IS_ERR(umem)) {
  730. mlx5_ib_dbg(dev, "umem get failed\n");
  731. return (void *)umem;
  732. }
  733. mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
  734. if (!npages) {
  735. mlx5_ib_warn(dev, "avoid zero region\n");
  736. err = -EINVAL;
  737. goto error;
  738. }
  739. mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
  740. npages, ncont, order, page_shift);
  741. if (use_umr(order)) {
  742. mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
  743. order, access_flags);
  744. if (PTR_ERR(mr) == -EAGAIN) {
  745. mlx5_ib_dbg(dev, "cache empty for order %d", order);
  746. mr = NULL;
  747. }
  748. }
  749. if (!mr)
  750. mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
  751. access_flags);
  752. if (IS_ERR(mr)) {
  753. err = PTR_ERR(mr);
  754. goto error;
  755. }
  756. mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
  757. mr->umem = umem;
  758. mr->npages = npages;
  759. spin_lock(&dev->mr_lock);
  760. dev->mdev.priv.reg_pages += npages;
  761. spin_unlock(&dev->mr_lock);
  762. mr->ibmr.lkey = mr->mmr.key;
  763. mr->ibmr.rkey = mr->mmr.key;
  764. return &mr->ibmr;
  765. error:
  766. ib_umem_release(umem);
  767. return ERR_PTR(err);
  768. }
  769. static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  770. {
  771. struct umr_common *umrc = &dev->umrc;
  772. struct ib_send_wr wr, *bad;
  773. int err;
  774. memset(&wr, 0, sizeof(wr));
  775. wr.wr_id = (u64)(unsigned long)mr;
  776. prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
  777. down(&umrc->sem);
  778. init_completion(&mr->done);
  779. err = ib_post_send(umrc->qp, &wr, &bad);
  780. if (err) {
  781. up(&umrc->sem);
  782. mlx5_ib_dbg(dev, "err %d\n", err);
  783. goto error;
  784. }
  785. wait_for_completion(&mr->done);
  786. up(&umrc->sem);
  787. if (mr->status != IB_WC_SUCCESS) {
  788. mlx5_ib_warn(dev, "unreg umr failed\n");
  789. err = -EFAULT;
  790. goto error;
  791. }
  792. return 0;
  793. error:
  794. return err;
  795. }
  796. int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
  797. {
  798. struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
  799. struct mlx5_ib_mr *mr = to_mmr(ibmr);
  800. struct ib_umem *umem = mr->umem;
  801. int npages = mr->npages;
  802. int umred = mr->umred;
  803. int err;
  804. if (!umred) {
  805. err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
  806. if (err) {
  807. mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
  808. mr->mmr.key, err);
  809. return err;
  810. }
  811. } else {
  812. err = unreg_umr(dev, mr);
  813. if (err) {
  814. mlx5_ib_warn(dev, "failed unregister\n");
  815. return err;
  816. }
  817. free_cached_mr(dev, mr);
  818. }
  819. if (umem) {
  820. ib_umem_release(umem);
  821. spin_lock(&dev->mr_lock);
  822. dev->mdev.priv.reg_pages -= npages;
  823. spin_unlock(&dev->mr_lock);
  824. }
  825. if (!umred)
  826. kfree(mr);
  827. return 0;
  828. }
  829. struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
  830. int max_page_list_len)
  831. {
  832. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  833. struct mlx5_create_mkey_mbox_in *in;
  834. struct mlx5_ib_mr *mr;
  835. int err;
  836. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  837. if (!mr)
  838. return ERR_PTR(-ENOMEM);
  839. in = kzalloc(sizeof(*in), GFP_KERNEL);
  840. if (!in) {
  841. err = -ENOMEM;
  842. goto err_free;
  843. }
  844. in->seg.status = 1 << 6; /* free */
  845. in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
  846. in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
  847. in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
  848. in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
  849. /*
  850. * TBD not needed - issue 197292 */
  851. in->seg.log2_page_size = PAGE_SHIFT;
  852. err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL,
  853. NULL, NULL);
  854. kfree(in);
  855. if (err)
  856. goto err_free;
  857. mr->ibmr.lkey = mr->mmr.key;
  858. mr->ibmr.rkey = mr->mmr.key;
  859. mr->umem = NULL;
  860. return &mr->ibmr;
  861. err_free:
  862. kfree(mr);
  863. return ERR_PTR(err);
  864. }
  865. struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
  866. int page_list_len)
  867. {
  868. struct mlx5_ib_fast_reg_page_list *mfrpl;
  869. int size = page_list_len * sizeof(u64);
  870. mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
  871. if (!mfrpl)
  872. return ERR_PTR(-ENOMEM);
  873. mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
  874. if (!mfrpl->ibfrpl.page_list)
  875. goto err_free;
  876. mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
  877. size, &mfrpl->map,
  878. GFP_KERNEL);
  879. if (!mfrpl->mapped_page_list)
  880. goto err_free;
  881. WARN_ON(mfrpl->map & 0x3f);
  882. return &mfrpl->ibfrpl;
  883. err_free:
  884. kfree(mfrpl->ibfrpl.page_list);
  885. kfree(mfrpl);
  886. return ERR_PTR(-ENOMEM);
  887. }
  888. void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
  889. {
  890. struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
  891. struct mlx5_ib_dev *dev = to_mdev(page_list->device);
  892. int size = page_list->max_page_list_len * sizeof(u64);
  893. dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list,
  894. mfrpl->map);
  895. kfree(mfrpl->ibfrpl.page_list);
  896. kfree(mfrpl);
  897. }