vfio.c 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421
  1. /*
  2. * VFIO core
  3. *
  4. * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
  5. * Author: Alex Williamson <alex.williamson@redhat.com>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. *
  11. * Derived from original vfio:
  12. * Copyright 2010 Cisco Systems, Inc. All rights reserved.
  13. * Author: Tom Lyon, pugs@cisco.com
  14. */
  15. #include <linux/cdev.h>
  16. #include <linux/compat.h>
  17. #include <linux/device.h>
  18. #include <linux/file.h>
  19. #include <linux/anon_inodes.h>
  20. #include <linux/fs.h>
  21. #include <linux/idr.h>
  22. #include <linux/iommu.h>
  23. #include <linux/list.h>
  24. #include <linux/module.h>
  25. #include <linux/mutex.h>
  26. #include <linux/sched.h>
  27. #include <linux/slab.h>
  28. #include <linux/string.h>
  29. #include <linux/uaccess.h>
  30. #include <linux/vfio.h>
  31. #include <linux/wait.h>
  32. #define DRIVER_VERSION "0.3"
  33. #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
  34. #define DRIVER_DESC "VFIO - User Level meta-driver"
  35. static struct vfio {
  36. struct class *class;
  37. struct list_head iommu_drivers_list;
  38. struct mutex iommu_drivers_lock;
  39. struct list_head group_list;
  40. struct idr group_idr;
  41. struct mutex group_lock;
  42. struct cdev group_cdev;
  43. struct device *dev;
  44. dev_t devt;
  45. struct cdev cdev;
  46. wait_queue_head_t release_q;
  47. } vfio;
  48. struct vfio_iommu_driver {
  49. const struct vfio_iommu_driver_ops *ops;
  50. struct list_head vfio_next;
  51. };
  52. struct vfio_container {
  53. struct kref kref;
  54. struct list_head group_list;
  55. struct mutex group_lock;
  56. struct vfio_iommu_driver *iommu_driver;
  57. void *iommu_data;
  58. };
  59. struct vfio_group {
  60. struct kref kref;
  61. int minor;
  62. atomic_t container_users;
  63. struct iommu_group *iommu_group;
  64. struct vfio_container *container;
  65. struct list_head device_list;
  66. struct mutex device_lock;
  67. struct device *dev;
  68. struct notifier_block nb;
  69. struct list_head vfio_next;
  70. struct list_head container_next;
  71. };
  72. struct vfio_device {
  73. struct kref kref;
  74. struct device *dev;
  75. const struct vfio_device_ops *ops;
  76. struct vfio_group *group;
  77. struct list_head group_next;
  78. void *device_data;
  79. };
  80. /**
  81. * IOMMU driver registration
  82. */
  83. int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
  84. {
  85. struct vfio_iommu_driver *driver, *tmp;
  86. driver = kzalloc(sizeof(*driver), GFP_KERNEL);
  87. if (!driver)
  88. return -ENOMEM;
  89. driver->ops = ops;
  90. mutex_lock(&vfio.iommu_drivers_lock);
  91. /* Check for duplicates */
  92. list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
  93. if (tmp->ops == ops) {
  94. mutex_unlock(&vfio.iommu_drivers_lock);
  95. kfree(driver);
  96. return -EINVAL;
  97. }
  98. }
  99. list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
  100. mutex_unlock(&vfio.iommu_drivers_lock);
  101. return 0;
  102. }
  103. EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
  104. void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
  105. {
  106. struct vfio_iommu_driver *driver;
  107. mutex_lock(&vfio.iommu_drivers_lock);
  108. list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
  109. if (driver->ops == ops) {
  110. list_del(&driver->vfio_next);
  111. mutex_unlock(&vfio.iommu_drivers_lock);
  112. kfree(driver);
  113. return;
  114. }
  115. }
  116. mutex_unlock(&vfio.iommu_drivers_lock);
  117. }
  118. EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
  119. /**
  120. * Group minor allocation/free - both called with vfio.group_lock held
  121. */
  122. static int vfio_alloc_group_minor(struct vfio_group *group)
  123. {
  124. int ret, minor;
  125. again:
  126. if (unlikely(idr_pre_get(&vfio.group_idr, GFP_KERNEL) == 0))
  127. return -ENOMEM;
  128. /* index 0 is used by /dev/vfio/vfio */
  129. ret = idr_get_new_above(&vfio.group_idr, group, 1, &minor);
  130. if (ret == -EAGAIN)
  131. goto again;
  132. if (ret || minor > MINORMASK) {
  133. if (minor > MINORMASK)
  134. idr_remove(&vfio.group_idr, minor);
  135. return -ENOSPC;
  136. }
  137. return minor;
  138. }
  139. static void vfio_free_group_minor(int minor)
  140. {
  141. idr_remove(&vfio.group_idr, minor);
  142. }
  143. static int vfio_iommu_group_notifier(struct notifier_block *nb,
  144. unsigned long action, void *data);
  145. static void vfio_group_get(struct vfio_group *group);
  146. /**
  147. * Container objects - containers are created when /dev/vfio/vfio is
  148. * opened, but their lifecycle extends until the last user is done, so
  149. * it's freed via kref. Must support container/group/device being
  150. * closed in any order.
  151. */
  152. static void vfio_container_get(struct vfio_container *container)
  153. {
  154. kref_get(&container->kref);
  155. }
  156. static void vfio_container_release(struct kref *kref)
  157. {
  158. struct vfio_container *container;
  159. container = container_of(kref, struct vfio_container, kref);
  160. kfree(container);
  161. }
  162. static void vfio_container_put(struct vfio_container *container)
  163. {
  164. kref_put(&container->kref, vfio_container_release);
  165. }
  166. /**
  167. * Group objects - create, release, get, put, search
  168. */
  169. static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
  170. {
  171. struct vfio_group *group, *tmp;
  172. struct device *dev;
  173. int ret, minor;
  174. group = kzalloc(sizeof(*group), GFP_KERNEL);
  175. if (!group)
  176. return ERR_PTR(-ENOMEM);
  177. kref_init(&group->kref);
  178. INIT_LIST_HEAD(&group->device_list);
  179. mutex_init(&group->device_lock);
  180. atomic_set(&group->container_users, 0);
  181. group->iommu_group = iommu_group;
  182. group->nb.notifier_call = vfio_iommu_group_notifier;
  183. /*
  184. * blocking notifiers acquire a rwsem around registering and hold
  185. * it around callback. Therefore, need to register outside of
  186. * vfio.group_lock to avoid A-B/B-A contention. Our callback won't
  187. * do anything unless it can find the group in vfio.group_list, so
  188. * no harm in registering early.
  189. */
  190. ret = iommu_group_register_notifier(iommu_group, &group->nb);
  191. if (ret) {
  192. kfree(group);
  193. return ERR_PTR(ret);
  194. }
  195. mutex_lock(&vfio.group_lock);
  196. minor = vfio_alloc_group_minor(group);
  197. if (minor < 0) {
  198. mutex_unlock(&vfio.group_lock);
  199. kfree(group);
  200. return ERR_PTR(minor);
  201. }
  202. /* Did we race creating this group? */
  203. list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
  204. if (tmp->iommu_group == iommu_group) {
  205. vfio_group_get(tmp);
  206. vfio_free_group_minor(minor);
  207. mutex_unlock(&vfio.group_lock);
  208. kfree(group);
  209. return tmp;
  210. }
  211. }
  212. dev = device_create(vfio.class, NULL, MKDEV(MAJOR(vfio.devt), minor),
  213. group, "%d", iommu_group_id(iommu_group));
  214. if (IS_ERR(dev)) {
  215. vfio_free_group_minor(minor);
  216. mutex_unlock(&vfio.group_lock);
  217. kfree(group);
  218. return (struct vfio_group *)dev; /* ERR_PTR */
  219. }
  220. group->minor = minor;
  221. group->dev = dev;
  222. list_add(&group->vfio_next, &vfio.group_list);
  223. mutex_unlock(&vfio.group_lock);
  224. return group;
  225. }
  226. static void vfio_group_release(struct kref *kref)
  227. {
  228. struct vfio_group *group = container_of(kref, struct vfio_group, kref);
  229. WARN_ON(!list_empty(&group->device_list));
  230. device_destroy(vfio.class, MKDEV(MAJOR(vfio.devt), group->minor));
  231. list_del(&group->vfio_next);
  232. vfio_free_group_minor(group->minor);
  233. mutex_unlock(&vfio.group_lock);
  234. /*
  235. * Unregister outside of lock. A spurious callback is harmless now
  236. * that the group is no longer in vfio.group_list.
  237. */
  238. iommu_group_unregister_notifier(group->iommu_group, &group->nb);
  239. kfree(group);
  240. }
  241. static void vfio_group_put(struct vfio_group *group)
  242. {
  243. mutex_lock(&vfio.group_lock);
  244. /*
  245. * Release needs to unlock to unregister the notifier, so only
  246. * unlock if not released.
  247. */
  248. if (!kref_put(&group->kref, vfio_group_release))
  249. mutex_unlock(&vfio.group_lock);
  250. }
  251. /* Assume group_lock or group reference is held */
  252. static void vfio_group_get(struct vfio_group *group)
  253. {
  254. kref_get(&group->kref);
  255. }
  256. /*
  257. * Not really a try as we will sleep for mutex, but we need to make
  258. * sure the group pointer is valid under lock and get a reference.
  259. */
  260. static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
  261. {
  262. struct vfio_group *target = group;
  263. mutex_lock(&vfio.group_lock);
  264. list_for_each_entry(group, &vfio.group_list, vfio_next) {
  265. if (group == target) {
  266. vfio_group_get(group);
  267. mutex_unlock(&vfio.group_lock);
  268. return group;
  269. }
  270. }
  271. mutex_unlock(&vfio.group_lock);
  272. return NULL;
  273. }
  274. static
  275. struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
  276. {
  277. struct vfio_group *group;
  278. mutex_lock(&vfio.group_lock);
  279. list_for_each_entry(group, &vfio.group_list, vfio_next) {
  280. if (group->iommu_group == iommu_group) {
  281. vfio_group_get(group);
  282. mutex_unlock(&vfio.group_lock);
  283. return group;
  284. }
  285. }
  286. mutex_unlock(&vfio.group_lock);
  287. return NULL;
  288. }
  289. static struct vfio_group *vfio_group_get_from_minor(int minor)
  290. {
  291. struct vfio_group *group;
  292. mutex_lock(&vfio.group_lock);
  293. group = idr_find(&vfio.group_idr, minor);
  294. if (!group) {
  295. mutex_unlock(&vfio.group_lock);
  296. return NULL;
  297. }
  298. vfio_group_get(group);
  299. mutex_unlock(&vfio.group_lock);
  300. return group;
  301. }
  302. /**
  303. * Device objects - create, release, get, put, search
  304. */
  305. static
  306. struct vfio_device *vfio_group_create_device(struct vfio_group *group,
  307. struct device *dev,
  308. const struct vfio_device_ops *ops,
  309. void *device_data)
  310. {
  311. struct vfio_device *device;
  312. int ret;
  313. device = kzalloc(sizeof(*device), GFP_KERNEL);
  314. if (!device)
  315. return ERR_PTR(-ENOMEM);
  316. kref_init(&device->kref);
  317. device->dev = dev;
  318. device->group = group;
  319. device->ops = ops;
  320. device->device_data = device_data;
  321. ret = dev_set_drvdata(dev, device);
  322. if (ret) {
  323. kfree(device);
  324. return ERR_PTR(ret);
  325. }
  326. /* No need to get group_lock, caller has group reference */
  327. vfio_group_get(group);
  328. mutex_lock(&group->device_lock);
  329. list_add(&device->group_next, &group->device_list);
  330. mutex_unlock(&group->device_lock);
  331. return device;
  332. }
  333. static void vfio_device_release(struct kref *kref)
  334. {
  335. struct vfio_device *device = container_of(kref,
  336. struct vfio_device, kref);
  337. struct vfio_group *group = device->group;
  338. mutex_lock(&group->device_lock);
  339. list_del(&device->group_next);
  340. mutex_unlock(&group->device_lock);
  341. dev_set_drvdata(device->dev, NULL);
  342. kfree(device);
  343. /* vfio_del_group_dev may be waiting for this device */
  344. wake_up(&vfio.release_q);
  345. }
  346. /* Device reference always implies a group reference */
  347. static void vfio_device_put(struct vfio_device *device)
  348. {
  349. struct vfio_group *group = device->group;
  350. kref_put(&device->kref, vfio_device_release);
  351. vfio_group_put(group);
  352. }
  353. static void vfio_device_get(struct vfio_device *device)
  354. {
  355. vfio_group_get(device->group);
  356. kref_get(&device->kref);
  357. }
  358. static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
  359. struct device *dev)
  360. {
  361. struct vfio_device *device;
  362. mutex_lock(&group->device_lock);
  363. list_for_each_entry(device, &group->device_list, group_next) {
  364. if (device->dev == dev) {
  365. vfio_device_get(device);
  366. mutex_unlock(&group->device_lock);
  367. return device;
  368. }
  369. }
  370. mutex_unlock(&group->device_lock);
  371. return NULL;
  372. }
  373. /*
  374. * Whitelist some drivers that we know are safe (no dma) or just sit on
  375. * a device. It's not always practical to leave a device within a group
  376. * driverless as it could get re-bound to something unsafe.
  377. */
  378. static const char * const vfio_driver_whitelist[] = { "pci-stub" };
  379. static bool vfio_whitelisted_driver(struct device_driver *drv)
  380. {
  381. int i;
  382. for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
  383. if (!strcmp(drv->name, vfio_driver_whitelist[i]))
  384. return true;
  385. }
  386. return false;
  387. }
  388. /*
  389. * A vfio group is viable for use by userspace if all devices are either
  390. * driver-less or bound to a vfio or whitelisted driver. We test the
  391. * latter by the existence of a struct vfio_device matching the dev.
  392. */
  393. static int vfio_dev_viable(struct device *dev, void *data)
  394. {
  395. struct vfio_group *group = data;
  396. struct vfio_device *device;
  397. if (!dev->driver || vfio_whitelisted_driver(dev->driver))
  398. return 0;
  399. device = vfio_group_get_device(group, dev);
  400. if (device) {
  401. vfio_device_put(device);
  402. return 0;
  403. }
  404. return -EINVAL;
  405. }
  406. /**
  407. * Async device support
  408. */
  409. static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
  410. {
  411. struct vfio_device *device;
  412. /* Do we already know about it? We shouldn't */
  413. device = vfio_group_get_device(group, dev);
  414. if (WARN_ON_ONCE(device)) {
  415. vfio_device_put(device);
  416. return 0;
  417. }
  418. /* Nothing to do for idle groups */
  419. if (!atomic_read(&group->container_users))
  420. return 0;
  421. /* TODO Prevent device auto probing */
  422. WARN("Device %s added to live group %d!\n", dev_name(dev),
  423. iommu_group_id(group->iommu_group));
  424. return 0;
  425. }
  426. static int vfio_group_nb_del_dev(struct vfio_group *group, struct device *dev)
  427. {
  428. struct vfio_device *device;
  429. /*
  430. * Expect to fall out here. If a device was in use, it would
  431. * have been bound to a vfio sub-driver, which would have blocked
  432. * in .remove at vfio_del_group_dev. Sanity check that we no
  433. * longer track the device, so it's safe to remove.
  434. */
  435. device = vfio_group_get_device(group, dev);
  436. if (likely(!device))
  437. return 0;
  438. WARN("Device %s removed from live group %d!\n", dev_name(dev),
  439. iommu_group_id(group->iommu_group));
  440. vfio_device_put(device);
  441. return 0;
  442. }
  443. static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
  444. {
  445. /* We don't care what happens when the group isn't in use */
  446. if (!atomic_read(&group->container_users))
  447. return 0;
  448. return vfio_dev_viable(dev, group);
  449. }
  450. static int vfio_iommu_group_notifier(struct notifier_block *nb,
  451. unsigned long action, void *data)
  452. {
  453. struct vfio_group *group = container_of(nb, struct vfio_group, nb);
  454. struct device *dev = data;
  455. /*
  456. * Need to go through a group_lock lookup to get a reference or
  457. * we risk racing a group being removed. Leave a WARN_ON for
  458. * debuging, but if the group no longer exists, a spurious notify
  459. * is harmless.
  460. */
  461. group = vfio_group_try_get(group);
  462. if (WARN_ON(!group))
  463. return NOTIFY_OK;
  464. switch (action) {
  465. case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
  466. vfio_group_nb_add_dev(group, dev);
  467. break;
  468. case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
  469. vfio_group_nb_del_dev(group, dev);
  470. break;
  471. case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
  472. pr_debug("%s: Device %s, group %d binding to driver\n",
  473. __func__, dev_name(dev),
  474. iommu_group_id(group->iommu_group));
  475. break;
  476. case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
  477. pr_debug("%s: Device %s, group %d bound to driver %s\n",
  478. __func__, dev_name(dev),
  479. iommu_group_id(group->iommu_group), dev->driver->name);
  480. BUG_ON(vfio_group_nb_verify(group, dev));
  481. break;
  482. case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
  483. pr_debug("%s: Device %s, group %d unbinding from driver %s\n",
  484. __func__, dev_name(dev),
  485. iommu_group_id(group->iommu_group), dev->driver->name);
  486. break;
  487. case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
  488. pr_debug("%s: Device %s, group %d unbound from driver\n",
  489. __func__, dev_name(dev),
  490. iommu_group_id(group->iommu_group));
  491. /*
  492. * XXX An unbound device in a live group is ok, but we'd
  493. * really like to avoid the above BUG_ON by preventing other
  494. * drivers from binding to it. Once that occurs, we have to
  495. * stop the system to maintain isolation. At a minimum, we'd
  496. * want a toggle to disable driver auto probe for this device.
  497. */
  498. break;
  499. }
  500. vfio_group_put(group);
  501. return NOTIFY_OK;
  502. }
  503. /**
  504. * VFIO driver API
  505. */
  506. int vfio_add_group_dev(struct device *dev,
  507. const struct vfio_device_ops *ops, void *device_data)
  508. {
  509. struct iommu_group *iommu_group;
  510. struct vfio_group *group;
  511. struct vfio_device *device;
  512. iommu_group = iommu_group_get(dev);
  513. if (!iommu_group)
  514. return -EINVAL;
  515. group = vfio_group_get_from_iommu(iommu_group);
  516. if (!group) {
  517. group = vfio_create_group(iommu_group);
  518. if (IS_ERR(group)) {
  519. iommu_group_put(iommu_group);
  520. return PTR_ERR(group);
  521. }
  522. }
  523. device = vfio_group_get_device(group, dev);
  524. if (device) {
  525. WARN(1, "Device %s already exists on group %d\n",
  526. dev_name(dev), iommu_group_id(iommu_group));
  527. vfio_device_put(device);
  528. vfio_group_put(group);
  529. iommu_group_put(iommu_group);
  530. return -EBUSY;
  531. }
  532. device = vfio_group_create_device(group, dev, ops, device_data);
  533. if (IS_ERR(device)) {
  534. vfio_group_put(group);
  535. iommu_group_put(iommu_group);
  536. return PTR_ERR(device);
  537. }
  538. /*
  539. * Added device holds reference to iommu_group and vfio_device
  540. * (which in turn holds reference to vfio_group). Drop extra
  541. * group reference used while acquiring device.
  542. */
  543. vfio_group_put(group);
  544. return 0;
  545. }
  546. EXPORT_SYMBOL_GPL(vfio_add_group_dev);
  547. /* Test whether a struct device is present in our tracking */
  548. static bool vfio_dev_present(struct device *dev)
  549. {
  550. struct iommu_group *iommu_group;
  551. struct vfio_group *group;
  552. struct vfio_device *device;
  553. iommu_group = iommu_group_get(dev);
  554. if (!iommu_group)
  555. return false;
  556. group = vfio_group_get_from_iommu(iommu_group);
  557. if (!group) {
  558. iommu_group_put(iommu_group);
  559. return false;
  560. }
  561. device = vfio_group_get_device(group, dev);
  562. if (!device) {
  563. vfio_group_put(group);
  564. iommu_group_put(iommu_group);
  565. return false;
  566. }
  567. vfio_device_put(device);
  568. vfio_group_put(group);
  569. iommu_group_put(iommu_group);
  570. return true;
  571. }
  572. /*
  573. * Decrement the device reference count and wait for the device to be
  574. * removed. Open file descriptors for the device... */
  575. void *vfio_del_group_dev(struct device *dev)
  576. {
  577. struct vfio_device *device = dev_get_drvdata(dev);
  578. struct vfio_group *group = device->group;
  579. struct iommu_group *iommu_group = group->iommu_group;
  580. void *device_data = device->device_data;
  581. vfio_device_put(device);
  582. /* TODO send a signal to encourage this to be released */
  583. wait_event(vfio.release_q, !vfio_dev_present(dev));
  584. iommu_group_put(iommu_group);
  585. return device_data;
  586. }
  587. EXPORT_SYMBOL_GPL(vfio_del_group_dev);
  588. /**
  589. * VFIO base fd, /dev/vfio/vfio
  590. */
  591. static long vfio_ioctl_check_extension(struct vfio_container *container,
  592. unsigned long arg)
  593. {
  594. struct vfio_iommu_driver *driver = container->iommu_driver;
  595. long ret = 0;
  596. switch (arg) {
  597. /* No base extensions yet */
  598. default:
  599. /*
  600. * If no driver is set, poll all registered drivers for
  601. * extensions and return the first positive result. If
  602. * a driver is already set, further queries will be passed
  603. * only to that driver.
  604. */
  605. if (!driver) {
  606. mutex_lock(&vfio.iommu_drivers_lock);
  607. list_for_each_entry(driver, &vfio.iommu_drivers_list,
  608. vfio_next) {
  609. if (!try_module_get(driver->ops->owner))
  610. continue;
  611. ret = driver->ops->ioctl(NULL,
  612. VFIO_CHECK_EXTENSION,
  613. arg);
  614. module_put(driver->ops->owner);
  615. if (ret > 0)
  616. break;
  617. }
  618. mutex_unlock(&vfio.iommu_drivers_lock);
  619. } else
  620. ret = driver->ops->ioctl(container->iommu_data,
  621. VFIO_CHECK_EXTENSION, arg);
  622. }
  623. return ret;
  624. }
  625. /* hold container->group_lock */
  626. static int __vfio_container_attach_groups(struct vfio_container *container,
  627. struct vfio_iommu_driver *driver,
  628. void *data)
  629. {
  630. struct vfio_group *group;
  631. int ret = -ENODEV;
  632. list_for_each_entry(group, &container->group_list, container_next) {
  633. ret = driver->ops->attach_group(data, group->iommu_group);
  634. if (ret)
  635. goto unwind;
  636. }
  637. return ret;
  638. unwind:
  639. list_for_each_entry_continue_reverse(group, &container->group_list,
  640. container_next) {
  641. driver->ops->detach_group(data, group->iommu_group);
  642. }
  643. return ret;
  644. }
  645. static long vfio_ioctl_set_iommu(struct vfio_container *container,
  646. unsigned long arg)
  647. {
  648. struct vfio_iommu_driver *driver;
  649. long ret = -ENODEV;
  650. mutex_lock(&container->group_lock);
  651. /*
  652. * The container is designed to be an unprivileged interface while
  653. * the group can be assigned to specific users. Therefore, only by
  654. * adding a group to a container does the user get the privilege of
  655. * enabling the iommu, which may allocate finite resources. There
  656. * is no unset_iommu, but by removing all the groups from a container,
  657. * the container is deprivileged and returns to an unset state.
  658. */
  659. if (list_empty(&container->group_list) || container->iommu_driver) {
  660. mutex_unlock(&container->group_lock);
  661. return -EINVAL;
  662. }
  663. mutex_lock(&vfio.iommu_drivers_lock);
  664. list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
  665. void *data;
  666. if (!try_module_get(driver->ops->owner))
  667. continue;
  668. /*
  669. * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
  670. * so test which iommu driver reported support for this
  671. * extension and call open on them. We also pass them the
  672. * magic, allowing a single driver to support multiple
  673. * interfaces if they'd like.
  674. */
  675. if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
  676. module_put(driver->ops->owner);
  677. continue;
  678. }
  679. /* module reference holds the driver we're working on */
  680. mutex_unlock(&vfio.iommu_drivers_lock);
  681. data = driver->ops->open(arg);
  682. if (IS_ERR(data)) {
  683. ret = PTR_ERR(data);
  684. module_put(driver->ops->owner);
  685. goto skip_drivers_unlock;
  686. }
  687. ret = __vfio_container_attach_groups(container, driver, data);
  688. if (!ret) {
  689. container->iommu_driver = driver;
  690. container->iommu_data = data;
  691. } else {
  692. driver->ops->release(data);
  693. module_put(driver->ops->owner);
  694. }
  695. goto skip_drivers_unlock;
  696. }
  697. mutex_unlock(&vfio.iommu_drivers_lock);
  698. skip_drivers_unlock:
  699. mutex_unlock(&container->group_lock);
  700. return ret;
  701. }
  702. static long vfio_fops_unl_ioctl(struct file *filep,
  703. unsigned int cmd, unsigned long arg)
  704. {
  705. struct vfio_container *container = filep->private_data;
  706. struct vfio_iommu_driver *driver;
  707. void *data;
  708. long ret = -EINVAL;
  709. if (!container)
  710. return ret;
  711. driver = container->iommu_driver;
  712. data = container->iommu_data;
  713. switch (cmd) {
  714. case VFIO_GET_API_VERSION:
  715. ret = VFIO_API_VERSION;
  716. break;
  717. case VFIO_CHECK_EXTENSION:
  718. ret = vfio_ioctl_check_extension(container, arg);
  719. break;
  720. case VFIO_SET_IOMMU:
  721. ret = vfio_ioctl_set_iommu(container, arg);
  722. break;
  723. default:
  724. if (driver) /* passthrough all unrecognized ioctls */
  725. ret = driver->ops->ioctl(data, cmd, arg);
  726. }
  727. return ret;
  728. }
  729. #ifdef CONFIG_COMPAT
  730. static long vfio_fops_compat_ioctl(struct file *filep,
  731. unsigned int cmd, unsigned long arg)
  732. {
  733. arg = (unsigned long)compat_ptr(arg);
  734. return vfio_fops_unl_ioctl(filep, cmd, arg);
  735. }
  736. #endif /* CONFIG_COMPAT */
  737. static int vfio_fops_open(struct inode *inode, struct file *filep)
  738. {
  739. struct vfio_container *container;
  740. container = kzalloc(sizeof(*container), GFP_KERNEL);
  741. if (!container)
  742. return -ENOMEM;
  743. INIT_LIST_HEAD(&container->group_list);
  744. mutex_init(&container->group_lock);
  745. kref_init(&container->kref);
  746. filep->private_data = container;
  747. return 0;
  748. }
  749. static int vfio_fops_release(struct inode *inode, struct file *filep)
  750. {
  751. struct vfio_container *container = filep->private_data;
  752. filep->private_data = NULL;
  753. vfio_container_put(container);
  754. return 0;
  755. }
  756. /*
  757. * Once an iommu driver is set, we optionally pass read/write/mmap
  758. * on to the driver, allowing management interfaces beyond ioctl.
  759. */
  760. static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
  761. size_t count, loff_t *ppos)
  762. {
  763. struct vfio_container *container = filep->private_data;
  764. struct vfio_iommu_driver *driver = container->iommu_driver;
  765. if (unlikely(!driver || !driver->ops->read))
  766. return -EINVAL;
  767. return driver->ops->read(container->iommu_data, buf, count, ppos);
  768. }
  769. static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
  770. size_t count, loff_t *ppos)
  771. {
  772. struct vfio_container *container = filep->private_data;
  773. struct vfio_iommu_driver *driver = container->iommu_driver;
  774. if (unlikely(!driver || !driver->ops->write))
  775. return -EINVAL;
  776. return driver->ops->write(container->iommu_data, buf, count, ppos);
  777. }
  778. static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
  779. {
  780. struct vfio_container *container = filep->private_data;
  781. struct vfio_iommu_driver *driver = container->iommu_driver;
  782. if (unlikely(!driver || !driver->ops->mmap))
  783. return -EINVAL;
  784. return driver->ops->mmap(container->iommu_data, vma);
  785. }
  786. static const struct file_operations vfio_fops = {
  787. .owner = THIS_MODULE,
  788. .open = vfio_fops_open,
  789. .release = vfio_fops_release,
  790. .read = vfio_fops_read,
  791. .write = vfio_fops_write,
  792. .unlocked_ioctl = vfio_fops_unl_ioctl,
  793. #ifdef CONFIG_COMPAT
  794. .compat_ioctl = vfio_fops_compat_ioctl,
  795. #endif
  796. .mmap = vfio_fops_mmap,
  797. };
  798. /**
  799. * VFIO Group fd, /dev/vfio/$GROUP
  800. */
  801. static void __vfio_group_unset_container(struct vfio_group *group)
  802. {
  803. struct vfio_container *container = group->container;
  804. struct vfio_iommu_driver *driver;
  805. mutex_lock(&container->group_lock);
  806. driver = container->iommu_driver;
  807. if (driver)
  808. driver->ops->detach_group(container->iommu_data,
  809. group->iommu_group);
  810. group->container = NULL;
  811. list_del(&group->container_next);
  812. /* Detaching the last group deprivileges a container, remove iommu */
  813. if (driver && list_empty(&container->group_list)) {
  814. driver->ops->release(container->iommu_data);
  815. module_put(driver->ops->owner);
  816. container->iommu_driver = NULL;
  817. container->iommu_data = NULL;
  818. }
  819. mutex_unlock(&container->group_lock);
  820. vfio_container_put(container);
  821. }
  822. /*
  823. * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
  824. * if there was no container to unset. Since the ioctl is called on
  825. * the group, we know that still exists, therefore the only valid
  826. * transition here is 1->0.
  827. */
  828. static int vfio_group_unset_container(struct vfio_group *group)
  829. {
  830. int users = atomic_cmpxchg(&group->container_users, 1, 0);
  831. if (!users)
  832. return -EINVAL;
  833. if (users != 1)
  834. return -EBUSY;
  835. __vfio_group_unset_container(group);
  836. return 0;
  837. }
  838. /*
  839. * When removing container users, anything that removes the last user
  840. * implicitly removes the group from the container. That is, if the
  841. * group file descriptor is closed, as well as any device file descriptors,
  842. * the group is free.
  843. */
  844. static void vfio_group_try_dissolve_container(struct vfio_group *group)
  845. {
  846. if (0 == atomic_dec_if_positive(&group->container_users))
  847. __vfio_group_unset_container(group);
  848. }
  849. static int vfio_group_set_container(struct vfio_group *group, int container_fd)
  850. {
  851. struct file *filep;
  852. struct vfio_container *container;
  853. struct vfio_iommu_driver *driver;
  854. int ret = 0;
  855. if (atomic_read(&group->container_users))
  856. return -EINVAL;
  857. filep = fget(container_fd);
  858. if (!filep)
  859. return -EBADF;
  860. /* Sanity check, is this really our fd? */
  861. if (filep->f_op != &vfio_fops) {
  862. fput(filep);
  863. return -EINVAL;
  864. }
  865. container = filep->private_data;
  866. WARN_ON(!container); /* fget ensures we don't race vfio_release */
  867. mutex_lock(&container->group_lock);
  868. driver = container->iommu_driver;
  869. if (driver) {
  870. ret = driver->ops->attach_group(container->iommu_data,
  871. group->iommu_group);
  872. if (ret)
  873. goto unlock_out;
  874. }
  875. group->container = container;
  876. list_add(&group->container_next, &container->group_list);
  877. /* Get a reference on the container and mark a user within the group */
  878. vfio_container_get(container);
  879. atomic_inc(&group->container_users);
  880. unlock_out:
  881. mutex_unlock(&container->group_lock);
  882. fput(filep);
  883. return ret;
  884. }
  885. static bool vfio_group_viable(struct vfio_group *group)
  886. {
  887. return (iommu_group_for_each_dev(group->iommu_group,
  888. group, vfio_dev_viable) == 0);
  889. }
  890. static const struct file_operations vfio_device_fops;
  891. static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
  892. {
  893. struct vfio_device *device;
  894. struct file *filep;
  895. int ret = -ENODEV;
  896. if (0 == atomic_read(&group->container_users) ||
  897. !group->container->iommu_driver || !vfio_group_viable(group))
  898. return -EINVAL;
  899. mutex_lock(&group->device_lock);
  900. list_for_each_entry(device, &group->device_list, group_next) {
  901. if (strcmp(dev_name(device->dev), buf))
  902. continue;
  903. ret = device->ops->open(device->device_data);
  904. if (ret)
  905. break;
  906. /*
  907. * We can't use anon_inode_getfd() because we need to modify
  908. * the f_mode flags directly to allow more than just ioctls
  909. */
  910. ret = get_unused_fd();
  911. if (ret < 0) {
  912. device->ops->release(device->device_data);
  913. break;
  914. }
  915. filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
  916. device, O_RDWR);
  917. if (IS_ERR(filep)) {
  918. put_unused_fd(ret);
  919. ret = PTR_ERR(filep);
  920. device->ops->release(device->device_data);
  921. break;
  922. }
  923. /*
  924. * TODO: add an anon_inode interface to do this.
  925. * Appears to be missing by lack of need rather than
  926. * explicitly prevented. Now there's need.
  927. */
  928. filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
  929. fd_install(ret, filep);
  930. vfio_device_get(device);
  931. atomic_inc(&group->container_users);
  932. break;
  933. }
  934. mutex_unlock(&group->device_lock);
  935. return ret;
  936. }
  937. static long vfio_group_fops_unl_ioctl(struct file *filep,
  938. unsigned int cmd, unsigned long arg)
  939. {
  940. struct vfio_group *group = filep->private_data;
  941. long ret = -ENOTTY;
  942. switch (cmd) {
  943. case VFIO_GROUP_GET_STATUS:
  944. {
  945. struct vfio_group_status status;
  946. unsigned long minsz;
  947. minsz = offsetofend(struct vfio_group_status, flags);
  948. if (copy_from_user(&status, (void __user *)arg, minsz))
  949. return -EFAULT;
  950. if (status.argsz < minsz)
  951. return -EINVAL;
  952. status.flags = 0;
  953. if (vfio_group_viable(group))
  954. status.flags |= VFIO_GROUP_FLAGS_VIABLE;
  955. if (group->container)
  956. status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
  957. if (copy_to_user((void __user *)arg, &status, minsz))
  958. return -EFAULT;
  959. ret = 0;
  960. break;
  961. }
  962. case VFIO_GROUP_SET_CONTAINER:
  963. {
  964. int fd;
  965. if (get_user(fd, (int __user *)arg))
  966. return -EFAULT;
  967. if (fd < 0)
  968. return -EINVAL;
  969. ret = vfio_group_set_container(group, fd);
  970. break;
  971. }
  972. case VFIO_GROUP_UNSET_CONTAINER:
  973. ret = vfio_group_unset_container(group);
  974. break;
  975. case VFIO_GROUP_GET_DEVICE_FD:
  976. {
  977. char *buf;
  978. buf = strndup_user((const char __user *)arg, PAGE_SIZE);
  979. if (IS_ERR(buf))
  980. return PTR_ERR(buf);
  981. ret = vfio_group_get_device_fd(group, buf);
  982. kfree(buf);
  983. break;
  984. }
  985. }
  986. return ret;
  987. }
  988. #ifdef CONFIG_COMPAT
  989. static long vfio_group_fops_compat_ioctl(struct file *filep,
  990. unsigned int cmd, unsigned long arg)
  991. {
  992. arg = (unsigned long)compat_ptr(arg);
  993. return vfio_group_fops_unl_ioctl(filep, cmd, arg);
  994. }
  995. #endif /* CONFIG_COMPAT */
  996. static int vfio_group_fops_open(struct inode *inode, struct file *filep)
  997. {
  998. struct vfio_group *group;
  999. group = vfio_group_get_from_minor(iminor(inode));
  1000. if (!group)
  1001. return -ENODEV;
  1002. if (group->container) {
  1003. vfio_group_put(group);
  1004. return -EBUSY;
  1005. }
  1006. filep->private_data = group;
  1007. return 0;
  1008. }
  1009. static int vfio_group_fops_release(struct inode *inode, struct file *filep)
  1010. {
  1011. struct vfio_group *group = filep->private_data;
  1012. filep->private_data = NULL;
  1013. vfio_group_try_dissolve_container(group);
  1014. vfio_group_put(group);
  1015. return 0;
  1016. }
  1017. static const struct file_operations vfio_group_fops = {
  1018. .owner = THIS_MODULE,
  1019. .unlocked_ioctl = vfio_group_fops_unl_ioctl,
  1020. #ifdef CONFIG_COMPAT
  1021. .compat_ioctl = vfio_group_fops_compat_ioctl,
  1022. #endif
  1023. .open = vfio_group_fops_open,
  1024. .release = vfio_group_fops_release,
  1025. };
  1026. /**
  1027. * VFIO Device fd
  1028. */
  1029. static int vfio_device_fops_release(struct inode *inode, struct file *filep)
  1030. {
  1031. struct vfio_device *device = filep->private_data;
  1032. device->ops->release(device->device_data);
  1033. vfio_group_try_dissolve_container(device->group);
  1034. vfio_device_put(device);
  1035. return 0;
  1036. }
  1037. static long vfio_device_fops_unl_ioctl(struct file *filep,
  1038. unsigned int cmd, unsigned long arg)
  1039. {
  1040. struct vfio_device *device = filep->private_data;
  1041. if (unlikely(!device->ops->ioctl))
  1042. return -EINVAL;
  1043. return device->ops->ioctl(device->device_data, cmd, arg);
  1044. }
  1045. static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
  1046. size_t count, loff_t *ppos)
  1047. {
  1048. struct vfio_device *device = filep->private_data;
  1049. if (unlikely(!device->ops->read))
  1050. return -EINVAL;
  1051. return device->ops->read(device->device_data, buf, count, ppos);
  1052. }
  1053. static ssize_t vfio_device_fops_write(struct file *filep,
  1054. const char __user *buf,
  1055. size_t count, loff_t *ppos)
  1056. {
  1057. struct vfio_device *device = filep->private_data;
  1058. if (unlikely(!device->ops->write))
  1059. return -EINVAL;
  1060. return device->ops->write(device->device_data, buf, count, ppos);
  1061. }
  1062. static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
  1063. {
  1064. struct vfio_device *device = filep->private_data;
  1065. if (unlikely(!device->ops->mmap))
  1066. return -EINVAL;
  1067. return device->ops->mmap(device->device_data, vma);
  1068. }
  1069. #ifdef CONFIG_COMPAT
  1070. static long vfio_device_fops_compat_ioctl(struct file *filep,
  1071. unsigned int cmd, unsigned long arg)
  1072. {
  1073. arg = (unsigned long)compat_ptr(arg);
  1074. return vfio_device_fops_unl_ioctl(filep, cmd, arg);
  1075. }
  1076. #endif /* CONFIG_COMPAT */
  1077. static const struct file_operations vfio_device_fops = {
  1078. .owner = THIS_MODULE,
  1079. .release = vfio_device_fops_release,
  1080. .read = vfio_device_fops_read,
  1081. .write = vfio_device_fops_write,
  1082. .unlocked_ioctl = vfio_device_fops_unl_ioctl,
  1083. #ifdef CONFIG_COMPAT
  1084. .compat_ioctl = vfio_device_fops_compat_ioctl,
  1085. #endif
  1086. .mmap = vfio_device_fops_mmap,
  1087. };
  1088. /**
  1089. * Module/class support
  1090. */
  1091. static char *vfio_devnode(struct device *dev, umode_t *mode)
  1092. {
  1093. return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
  1094. }
  1095. static int __init vfio_init(void)
  1096. {
  1097. int ret;
  1098. idr_init(&vfio.group_idr);
  1099. mutex_init(&vfio.group_lock);
  1100. mutex_init(&vfio.iommu_drivers_lock);
  1101. INIT_LIST_HEAD(&vfio.group_list);
  1102. INIT_LIST_HEAD(&vfio.iommu_drivers_list);
  1103. init_waitqueue_head(&vfio.release_q);
  1104. vfio.class = class_create(THIS_MODULE, "vfio");
  1105. if (IS_ERR(vfio.class)) {
  1106. ret = PTR_ERR(vfio.class);
  1107. goto err_class;
  1108. }
  1109. vfio.class->devnode = vfio_devnode;
  1110. ret = alloc_chrdev_region(&vfio.devt, 0, MINORMASK, "vfio");
  1111. if (ret)
  1112. goto err_base_chrdev;
  1113. cdev_init(&vfio.cdev, &vfio_fops);
  1114. ret = cdev_add(&vfio.cdev, vfio.devt, 1);
  1115. if (ret)
  1116. goto err_base_cdev;
  1117. vfio.dev = device_create(vfio.class, NULL, vfio.devt, NULL, "vfio");
  1118. if (IS_ERR(vfio.dev)) {
  1119. ret = PTR_ERR(vfio.dev);
  1120. goto err_base_dev;
  1121. }
  1122. /* /dev/vfio/$GROUP */
  1123. cdev_init(&vfio.group_cdev, &vfio_group_fops);
  1124. ret = cdev_add(&vfio.group_cdev,
  1125. MKDEV(MAJOR(vfio.devt), 1), MINORMASK - 1);
  1126. if (ret)
  1127. goto err_groups_cdev;
  1128. pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
  1129. /*
  1130. * Attempt to load known iommu-drivers. This gives us a working
  1131. * environment without the user needing to explicitly load iommu
  1132. * drivers.
  1133. */
  1134. request_module_nowait("vfio_iommu_type1");
  1135. return 0;
  1136. err_groups_cdev:
  1137. device_destroy(vfio.class, vfio.devt);
  1138. err_base_dev:
  1139. cdev_del(&vfio.cdev);
  1140. err_base_cdev:
  1141. unregister_chrdev_region(vfio.devt, MINORMASK);
  1142. err_base_chrdev:
  1143. class_destroy(vfio.class);
  1144. vfio.class = NULL;
  1145. err_class:
  1146. return ret;
  1147. }
  1148. static void __exit vfio_cleanup(void)
  1149. {
  1150. WARN_ON(!list_empty(&vfio.group_list));
  1151. idr_destroy(&vfio.group_idr);
  1152. cdev_del(&vfio.group_cdev);
  1153. device_destroy(vfio.class, vfio.devt);
  1154. cdev_del(&vfio.cdev);
  1155. unregister_chrdev_region(vfio.devt, MINORMASK);
  1156. class_destroy(vfio.class);
  1157. vfio.class = NULL;
  1158. }
  1159. module_init(vfio_init);
  1160. module_exit(vfio_cleanup);
  1161. MODULE_VERSION(DRIVER_VERSION);
  1162. MODULE_LICENSE("GPL v2");
  1163. MODULE_AUTHOR(DRIVER_AUTHOR);
  1164. MODULE_DESCRIPTION(DRIVER_DESC);