vfio.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. /*
  2. * VFIO API definition
  3. *
  4. * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
  5. * Author: Alex Williamson <alex.williamson@redhat.com>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. */
  11. #ifndef _UAPIVFIO_H
  12. #define _UAPIVFIO_H
  13. #include <linux/types.h>
  14. #include <linux/ioctl.h>
  15. #define VFIO_API_VERSION 0
  16. /* Kernel & User level defines for VFIO IOCTLs. */
  17. /* Extensions */
  18. #define VFIO_TYPE1_IOMMU 1
  19. #define VFIO_SPAPR_TCE_IOMMU 2
  20. /*
  21. * The IOCTL interface is designed for extensibility by embedding the
  22. * structure length (argsz) and flags into structures passed between
  23. * kernel and userspace. We therefore use the _IO() macro for these
  24. * defines to avoid implicitly embedding a size into the ioctl request.
  25. * As structure fields are added, argsz will increase to match and flag
  26. * bits will be defined to indicate additional fields with valid data.
  27. * It's *always* the caller's responsibility to indicate the size of
  28. * the structure passed by setting argsz appropriately.
  29. */
  30. #define VFIO_TYPE (';')
  31. #define VFIO_BASE 100
  32. /* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
  33. /**
  34. * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0)
  35. *
  36. * Report the version of the VFIO API. This allows us to bump the entire
  37. * API version should we later need to add or change features in incompatible
  38. * ways.
  39. * Return: VFIO_API_VERSION
  40. * Availability: Always
  41. */
  42. #define VFIO_GET_API_VERSION _IO(VFIO_TYPE, VFIO_BASE + 0)
  43. /**
  44. * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32)
  45. *
  46. * Check whether an extension is supported.
  47. * Return: 0 if not supported, 1 (or some other positive integer) if supported.
  48. * Availability: Always
  49. */
  50. #define VFIO_CHECK_EXTENSION _IO(VFIO_TYPE, VFIO_BASE + 1)
  51. /**
  52. * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32)
  53. *
  54. * Set the iommu to the given type. The type must be supported by an
  55. * iommu driver as verified by calling CHECK_EXTENSION using the same
  56. * type. A group must be set to this file descriptor before this
  57. * ioctl is available. The IOMMU interfaces enabled by this call are
  58. * specific to the value set.
  59. * Return: 0 on success, -errno on failure
  60. * Availability: When VFIO group attached
  61. */
  62. #define VFIO_SET_IOMMU _IO(VFIO_TYPE, VFIO_BASE + 2)
  63. /* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */
  64. /**
  65. * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3,
  66. * struct vfio_group_status)
  67. *
  68. * Retrieve information about the group. Fills in provided
  69. * struct vfio_group_info. Caller sets argsz.
  70. * Return: 0 on succes, -errno on failure.
  71. * Availability: Always
  72. */
  73. struct vfio_group_status {
  74. __u32 argsz;
  75. __u32 flags;
  76. #define VFIO_GROUP_FLAGS_VIABLE (1 << 0)
  77. #define VFIO_GROUP_FLAGS_CONTAINER_SET (1 << 1)
  78. };
  79. #define VFIO_GROUP_GET_STATUS _IO(VFIO_TYPE, VFIO_BASE + 3)
  80. /**
  81. * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32)
  82. *
  83. * Set the container for the VFIO group to the open VFIO file
  84. * descriptor provided. Groups may only belong to a single
  85. * container. Containers may, at their discretion, support multiple
  86. * groups. Only when a container is set are all of the interfaces
  87. * of the VFIO file descriptor and the VFIO group file descriptor
  88. * available to the user.
  89. * Return: 0 on success, -errno on failure.
  90. * Availability: Always
  91. */
  92. #define VFIO_GROUP_SET_CONTAINER _IO(VFIO_TYPE, VFIO_BASE + 4)
  93. /**
  94. * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5)
  95. *
  96. * Remove the group from the attached container. This is the
  97. * opposite of the SET_CONTAINER call and returns the group to
  98. * an initial state. All device file descriptors must be released
  99. * prior to calling this interface. When removing the last group
  100. * from a container, the IOMMU will be disabled and all state lost,
  101. * effectively also returning the VFIO file descriptor to an initial
  102. * state.
  103. * Return: 0 on success, -errno on failure.
  104. * Availability: When attached to container
  105. */
  106. #define VFIO_GROUP_UNSET_CONTAINER _IO(VFIO_TYPE, VFIO_BASE + 5)
  107. /**
  108. * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char)
  109. *
  110. * Return a new file descriptor for the device object described by
  111. * the provided string. The string should match a device listed in
  112. * the devices subdirectory of the IOMMU group sysfs entry. The
  113. * group containing the device must already be added to this context.
  114. * Return: new file descriptor on success, -errno on failure.
  115. * Availability: When attached to container
  116. */
  117. #define VFIO_GROUP_GET_DEVICE_FD _IO(VFIO_TYPE, VFIO_BASE + 6)
  118. /* --------------- IOCTLs for DEVICE file descriptors --------------- */
  119. /**
  120. * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7,
  121. * struct vfio_device_info)
  122. *
  123. * Retrieve information about the device. Fills in provided
  124. * struct vfio_device_info. Caller sets argsz.
  125. * Return: 0 on success, -errno on failure.
  126. */
  127. struct vfio_device_info {
  128. __u32 argsz;
  129. __u32 flags;
  130. #define VFIO_DEVICE_FLAGS_RESET (1 << 0) /* Device supports reset */
  131. #define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */
  132. __u32 num_regions; /* Max region index + 1 */
  133. __u32 num_irqs; /* Max IRQ index + 1 */
  134. };
  135. #define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7)
  136. /**
  137. * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
  138. * struct vfio_region_info)
  139. *
  140. * Retrieve information about a device region. Caller provides
  141. * struct vfio_region_info with index value set. Caller sets argsz.
  142. * Implementation of region mapping is bus driver specific. This is
  143. * intended to describe MMIO, I/O port, as well as bus specific
  144. * regions (ex. PCI config space). Zero sized regions may be used
  145. * to describe unimplemented regions (ex. unimplemented PCI BARs).
  146. * Return: 0 on success, -errno on failure.
  147. */
  148. struct vfio_region_info {
  149. __u32 argsz;
  150. __u32 flags;
  151. #define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */
  152. #define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */
  153. #define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */
  154. __u32 index; /* Region index */
  155. __u32 resv; /* Reserved for alignment */
  156. __u64 size; /* Region size (bytes) */
  157. __u64 offset; /* Region offset from start of device fd */
  158. };
  159. #define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8)
  160. /**
  161. * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
  162. * struct vfio_irq_info)
  163. *
  164. * Retrieve information about a device IRQ. Caller provides
  165. * struct vfio_irq_info with index value set. Caller sets argsz.
  166. * Implementation of IRQ mapping is bus driver specific. Indexes
  167. * using multiple IRQs are primarily intended to support MSI-like
  168. * interrupt blocks. Zero count irq blocks may be used to describe
  169. * unimplemented interrupt types.
  170. *
  171. * The EVENTFD flag indicates the interrupt index supports eventfd based
  172. * signaling.
  173. *
  174. * The MASKABLE flags indicates the index supports MASK and UNMASK
  175. * actions described below.
  176. *
  177. * AUTOMASKED indicates that after signaling, the interrupt line is
  178. * automatically masked by VFIO and the user needs to unmask the line
  179. * to receive new interrupts. This is primarily intended to distinguish
  180. * level triggered interrupts.
  181. *
  182. * The NORESIZE flag indicates that the interrupt lines within the index
  183. * are setup as a set and new subindexes cannot be enabled without first
  184. * disabling the entire index. This is used for interrupts like PCI MSI
  185. * and MSI-X where the driver may only use a subset of the available
  186. * indexes, but VFIO needs to enable a specific number of vectors
  187. * upfront. In the case of MSI-X, where the user can enable MSI-X and
  188. * then add and unmask vectors, it's up to userspace to make the decision
  189. * whether to allocate the maximum supported number of vectors or tear
  190. * down setup and incrementally increase the vectors as each is enabled.
  191. */
  192. struct vfio_irq_info {
  193. __u32 argsz;
  194. __u32 flags;
  195. #define VFIO_IRQ_INFO_EVENTFD (1 << 0)
  196. #define VFIO_IRQ_INFO_MASKABLE (1 << 1)
  197. #define VFIO_IRQ_INFO_AUTOMASKED (1 << 2)
  198. #define VFIO_IRQ_INFO_NORESIZE (1 << 3)
  199. __u32 index; /* IRQ index */
  200. __u32 count; /* Number of IRQs within this index */
  201. };
  202. #define VFIO_DEVICE_GET_IRQ_INFO _IO(VFIO_TYPE, VFIO_BASE + 9)
  203. /**
  204. * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
  205. *
  206. * Set signaling, masking, and unmasking of interrupts. Caller provides
  207. * struct vfio_irq_set with all fields set. 'start' and 'count' indicate
  208. * the range of subindexes being specified.
  209. *
  210. * The DATA flags specify the type of data provided. If DATA_NONE, the
  211. * operation performs the specified action immediately on the specified
  212. * interrupt(s). For example, to unmask AUTOMASKED interrupt [0,0]:
  213. * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1.
  214. *
  215. * DATA_BOOL allows sparse support for the same on arrays of interrupts.
  216. * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]):
  217. * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3,
  218. * data = {1,0,1}
  219. *
  220. * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd.
  221. * A value of -1 can be used to either de-assign interrupts if already
  222. * assigned or skip un-assigned interrupts. For example, to set an eventfd
  223. * to be trigger for interrupts [0,0] and [0,2]:
  224. * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3,
  225. * data = {fd1, -1, fd2}
  226. * If index [0,1] is previously set, two count = 1 ioctls calls would be
  227. * required to set [0,0] and [0,2] without changing [0,1].
  228. *
  229. * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used
  230. * with ACTION_TRIGGER to perform kernel level interrupt loopback testing
  231. * from userspace (ie. simulate hardware triggering).
  232. *
  233. * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER
  234. * enables the interrupt index for the device. Individual subindex interrupts
  235. * can be disabled using the -1 value for DATA_EVENTFD or the index can be
  236. * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0.
  237. *
  238. * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while
  239. * ACTION_TRIGGER specifies kernel->user signaling.
  240. */
  241. struct vfio_irq_set {
  242. __u32 argsz;
  243. __u32 flags;
  244. #define VFIO_IRQ_SET_DATA_NONE (1 << 0) /* Data not present */
  245. #define VFIO_IRQ_SET_DATA_BOOL (1 << 1) /* Data is bool (u8) */
  246. #define VFIO_IRQ_SET_DATA_EVENTFD (1 << 2) /* Data is eventfd (s32) */
  247. #define VFIO_IRQ_SET_ACTION_MASK (1 << 3) /* Mask interrupt */
  248. #define VFIO_IRQ_SET_ACTION_UNMASK (1 << 4) /* Unmask interrupt */
  249. #define VFIO_IRQ_SET_ACTION_TRIGGER (1 << 5) /* Trigger interrupt */
  250. __u32 index;
  251. __u32 start;
  252. __u32 count;
  253. __u8 data[];
  254. };
  255. #define VFIO_DEVICE_SET_IRQS _IO(VFIO_TYPE, VFIO_BASE + 10)
  256. #define VFIO_IRQ_SET_DATA_TYPE_MASK (VFIO_IRQ_SET_DATA_NONE | \
  257. VFIO_IRQ_SET_DATA_BOOL | \
  258. VFIO_IRQ_SET_DATA_EVENTFD)
  259. #define VFIO_IRQ_SET_ACTION_TYPE_MASK (VFIO_IRQ_SET_ACTION_MASK | \
  260. VFIO_IRQ_SET_ACTION_UNMASK | \
  261. VFIO_IRQ_SET_ACTION_TRIGGER)
  262. /**
  263. * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
  264. *
  265. * Reset a device.
  266. */
  267. #define VFIO_DEVICE_RESET _IO(VFIO_TYPE, VFIO_BASE + 11)
  268. /*
  269. * The VFIO-PCI bus driver makes use of the following fixed region and
  270. * IRQ index mapping. Unimplemented regions return a size of zero.
  271. * Unimplemented IRQ types return a count of zero.
  272. */
  273. enum {
  274. VFIO_PCI_BAR0_REGION_INDEX,
  275. VFIO_PCI_BAR1_REGION_INDEX,
  276. VFIO_PCI_BAR2_REGION_INDEX,
  277. VFIO_PCI_BAR3_REGION_INDEX,
  278. VFIO_PCI_BAR4_REGION_INDEX,
  279. VFIO_PCI_BAR5_REGION_INDEX,
  280. VFIO_PCI_ROM_REGION_INDEX,
  281. VFIO_PCI_CONFIG_REGION_INDEX,
  282. /*
  283. * Expose VGA regions defined for PCI base class 03, subclass 00.
  284. * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df
  285. * as well as the MMIO range 0xa0000 to 0xbffff. Each implemented
  286. * range is found at it's identity mapped offset from the region
  287. * offset, for example 0x3b0 is region_info.offset + 0x3b0. Areas
  288. * between described ranges are unimplemented.
  289. */
  290. VFIO_PCI_VGA_REGION_INDEX,
  291. VFIO_PCI_NUM_REGIONS
  292. };
  293. enum {
  294. VFIO_PCI_INTX_IRQ_INDEX,
  295. VFIO_PCI_MSI_IRQ_INDEX,
  296. VFIO_PCI_MSIX_IRQ_INDEX,
  297. VFIO_PCI_ERR_IRQ_INDEX,
  298. VFIO_PCI_NUM_IRQS
  299. };
  300. /* -------- API for Type1 VFIO IOMMU -------- */
  301. /**
  302. * VFIO_IOMMU_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 12, struct vfio_iommu_info)
  303. *
  304. * Retrieve information about the IOMMU object. Fills in provided
  305. * struct vfio_iommu_info. Caller sets argsz.
  306. *
  307. * XXX Should we do these by CHECK_EXTENSION too?
  308. */
  309. struct vfio_iommu_type1_info {
  310. __u32 argsz;
  311. __u32 flags;
  312. #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */
  313. __u64 iova_pgsizes; /* Bitmap of supported page sizes */
  314. };
  315. #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
  316. /**
  317. * VFIO_IOMMU_MAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 13, struct vfio_dma_map)
  318. *
  319. * Map process virtual addresses to IO virtual addresses using the
  320. * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
  321. */
  322. struct vfio_iommu_type1_dma_map {
  323. __u32 argsz;
  324. __u32 flags;
  325. #define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */
  326. #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
  327. __u64 vaddr; /* Process virtual address */
  328. __u64 iova; /* IO virtual address */
  329. __u64 size; /* Size of mapping (bytes) */
  330. };
  331. #define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
  332. /**
  333. * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14,
  334. * struct vfio_dma_unmap)
  335. *
  336. * Unmap IO virtual addresses using the provided struct vfio_dma_unmap.
  337. * Caller sets argsz. The actual unmapped size is returned in the size
  338. * field. No guarantee is made to the user that arbitrary unmaps of iova
  339. * or size different from those used in the original mapping call will
  340. * succeed.
  341. */
  342. struct vfio_iommu_type1_dma_unmap {
  343. __u32 argsz;
  344. __u32 flags;
  345. __u64 iova; /* IO virtual address */
  346. __u64 size; /* Size of mapping (bytes) */
  347. };
  348. #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
  349. /*
  350. * IOCTLs to enable/disable IOMMU container usage.
  351. * No parameters are supported.
  352. */
  353. #define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15)
  354. #define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16)
  355. /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
  356. /*
  357. * The SPAPR TCE info struct provides the information about the PCI bus
  358. * address ranges available for DMA, these values are programmed into
  359. * the hardware so the guest has to know that information.
  360. *
  361. * The DMA 32 bit window start is an absolute PCI bus address.
  362. * The IOVA address passed via map/unmap ioctls are absolute PCI bus
  363. * addresses too so the window works as a filter rather than an offset
  364. * for IOVA addresses.
  365. *
  366. * A flag will need to be added if other page sizes are supported,
  367. * so as defined here, it is always 4k.
  368. */
  369. struct vfio_iommu_spapr_tce_info {
  370. __u32 argsz;
  371. __u32 flags; /* reserved for future use */
  372. __u32 dma32_window_start; /* 32 bit window start (bytes) */
  373. __u32 dma32_window_size; /* 32 bit window size (bytes) */
  374. };
  375. #define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
  376. /* ***************************************************************** */
  377. #endif /* _UAPIVFIO_H */