vringh.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007
  1. /*
  2. * Helpers for the host side of a virtio ring.
  3. *
  4. * Since these may be in userspace, we use (inline) accessors.
  5. */
  6. #include <linux/vringh.h>
  7. #include <linux/virtio_ring.h>
  8. #include <linux/kernel.h>
  9. #include <linux/ratelimit.h>
  10. #include <linux/uaccess.h>
  11. #include <linux/slab.h>
  12. #include <linux/export.h>
  13. static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
  14. {
  15. static DEFINE_RATELIMIT_STATE(vringh_rs,
  16. DEFAULT_RATELIMIT_INTERVAL,
  17. DEFAULT_RATELIMIT_BURST);
  18. if (__ratelimit(&vringh_rs)) {
  19. va_list ap;
  20. va_start(ap, fmt);
  21. printk(KERN_NOTICE "vringh:");
  22. vprintk(fmt, ap);
  23. va_end(ap);
  24. }
  25. }
  26. /* Returns vring->num if empty, -ve on error. */
  27. static inline int __vringh_get_head(const struct vringh *vrh,
  28. int (*getu16)(u16 *val, const u16 *p),
  29. u16 *last_avail_idx)
  30. {
  31. u16 avail_idx, i, head;
  32. int err;
  33. err = getu16(&avail_idx, &vrh->vring.avail->idx);
  34. if (err) {
  35. vringh_bad("Failed to access avail idx at %p",
  36. &vrh->vring.avail->idx);
  37. return err;
  38. }
  39. if (*last_avail_idx == avail_idx)
  40. return vrh->vring.num;
  41. /* Only get avail ring entries after they have been exposed by guest. */
  42. virtio_rmb(vrh->weak_barriers);
  43. i = *last_avail_idx & (vrh->vring.num - 1);
  44. err = getu16(&head, &vrh->vring.avail->ring[i]);
  45. if (err) {
  46. vringh_bad("Failed to read head: idx %d address %p",
  47. *last_avail_idx, &vrh->vring.avail->ring[i]);
  48. return err;
  49. }
  50. if (head >= vrh->vring.num) {
  51. vringh_bad("Guest says index %u > %u is available",
  52. head, vrh->vring.num);
  53. return -EINVAL;
  54. }
  55. (*last_avail_idx)++;
  56. return head;
  57. }
  58. /* Copy some bytes to/from the iovec. Returns num copied. */
  59. static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov,
  60. void *ptr, size_t len,
  61. int (*xfer)(void *addr, void *ptr,
  62. size_t len))
  63. {
  64. int err, done = 0;
  65. while (len && iov->i < iov->used) {
  66. size_t partlen;
  67. partlen = min(iov->iov[iov->i].iov_len, len);
  68. err = xfer(iov->iov[iov->i].iov_base, ptr, partlen);
  69. if (err)
  70. return err;
  71. done += partlen;
  72. len -= partlen;
  73. ptr += partlen;
  74. iov->consumed += partlen;
  75. iov->iov[iov->i].iov_len -= partlen;
  76. iov->iov[iov->i].iov_base += partlen;
  77. if (!iov->iov[iov->i].iov_len) {
  78. /* Fix up old iov element then increment. */
  79. iov->iov[iov->i].iov_len = iov->consumed;
  80. iov->iov[iov->i].iov_base -= iov->consumed;
  81. iov->consumed = 0;
  82. iov->i++;
  83. }
  84. }
  85. return done;
  86. }
  87. /* May reduce *len if range is shorter. */
  88. static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len,
  89. struct vringh_range *range,
  90. bool (*getrange)(struct vringh *,
  91. u64, struct vringh_range *))
  92. {
  93. if (addr < range->start || addr > range->end_incl) {
  94. if (!getrange(vrh, addr, range))
  95. return false;
  96. }
  97. BUG_ON(addr < range->start || addr > range->end_incl);
  98. /* To end of memory? */
  99. if (unlikely(addr + *len == 0)) {
  100. if (range->end_incl == -1ULL)
  101. return true;
  102. goto truncate;
  103. }
  104. /* Otherwise, don't wrap. */
  105. if (addr + *len < addr) {
  106. vringh_bad("Wrapping descriptor %zu@0x%llx",
  107. *len, (unsigned long long)addr);
  108. return false;
  109. }
  110. if (unlikely(addr + *len - 1 > range->end_incl))
  111. goto truncate;
  112. return true;
  113. truncate:
  114. *len = range->end_incl + 1 - addr;
  115. return true;
  116. }
  117. static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len,
  118. struct vringh_range *range,
  119. bool (*getrange)(struct vringh *,
  120. u64, struct vringh_range *))
  121. {
  122. return true;
  123. }
  124. /* No reason for this code to be inline. */
  125. static int move_to_indirect(int *up_next, u16 *i, void *addr,
  126. const struct vring_desc *desc,
  127. struct vring_desc **descs, int *desc_max)
  128. {
  129. /* Indirect tables can't have indirect. */
  130. if (*up_next != -1) {
  131. vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
  132. return -EINVAL;
  133. }
  134. if (unlikely(desc->len % sizeof(struct vring_desc))) {
  135. vringh_bad("Strange indirect len %u", desc->len);
  136. return -EINVAL;
  137. }
  138. /* We will check this when we follow it! */
  139. if (desc->flags & VRING_DESC_F_NEXT)
  140. *up_next = desc->next;
  141. else
  142. *up_next = -2;
  143. *descs = addr;
  144. *desc_max = desc->len / sizeof(struct vring_desc);
  145. /* Now, start at the first indirect. */
  146. *i = 0;
  147. return 0;
  148. }
  149. static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp)
  150. {
  151. struct kvec *new;
  152. unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2;
  153. if (new_num < 8)
  154. new_num = 8;
  155. flag = (iov->max_num & VRINGH_IOV_ALLOCATED);
  156. if (flag)
  157. new = krealloc(iov->iov, new_num * sizeof(struct iovec), gfp);
  158. else {
  159. new = kmalloc(new_num * sizeof(struct iovec), gfp);
  160. if (new) {
  161. memcpy(new, iov->iov,
  162. iov->max_num * sizeof(struct iovec));
  163. flag = VRINGH_IOV_ALLOCATED;
  164. }
  165. }
  166. if (!new)
  167. return -ENOMEM;
  168. iov->iov = new;
  169. iov->max_num = (new_num | flag);
  170. return 0;
  171. }
  172. static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next,
  173. struct vring_desc **descs, int *desc_max)
  174. {
  175. u16 i = *up_next;
  176. *up_next = -1;
  177. *descs = vrh->vring.desc;
  178. *desc_max = vrh->vring.num;
  179. return i;
  180. }
  181. static int slow_copy(struct vringh *vrh, void *dst, const void *src,
  182. bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
  183. struct vringh_range *range,
  184. bool (*getrange)(struct vringh *vrh,
  185. u64,
  186. struct vringh_range *)),
  187. bool (*getrange)(struct vringh *vrh,
  188. u64 addr,
  189. struct vringh_range *r),
  190. struct vringh_range *range,
  191. int (*copy)(void *dst, const void *src, size_t len))
  192. {
  193. size_t part, len = sizeof(struct vring_desc);
  194. do {
  195. u64 addr;
  196. int err;
  197. part = len;
  198. addr = (u64)(unsigned long)src - range->offset;
  199. if (!rcheck(vrh, addr, &part, range, getrange))
  200. return -EINVAL;
  201. err = copy(dst, src, part);
  202. if (err)
  203. return err;
  204. dst += part;
  205. src += part;
  206. len -= part;
  207. } while (len);
  208. return 0;
  209. }
  210. static inline int
  211. __vringh_iov(struct vringh *vrh, u16 i,
  212. struct vringh_kiov *riov,
  213. struct vringh_kiov *wiov,
  214. bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
  215. struct vringh_range *range,
  216. bool (*getrange)(struct vringh *, u64,
  217. struct vringh_range *)),
  218. bool (*getrange)(struct vringh *, u64, struct vringh_range *),
  219. gfp_t gfp,
  220. int (*copy)(void *dst, const void *src, size_t len))
  221. {
  222. int err, count = 0, up_next, desc_max;
  223. struct vring_desc desc, *descs;
  224. struct vringh_range range = { -1ULL, 0 }, slowrange;
  225. bool slow = false;
  226. /* We start traversing vring's descriptor table. */
  227. descs = vrh->vring.desc;
  228. desc_max = vrh->vring.num;
  229. up_next = -1;
  230. if (riov)
  231. riov->i = riov->used = 0;
  232. else if (wiov)
  233. wiov->i = wiov->used = 0;
  234. else
  235. /* You must want something! */
  236. BUG();
  237. for (;;) {
  238. void *addr;
  239. struct vringh_kiov *iov;
  240. size_t len;
  241. if (unlikely(slow))
  242. err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange,
  243. &slowrange, copy);
  244. else
  245. err = copy(&desc, &descs[i], sizeof(desc));
  246. if (unlikely(err))
  247. goto fail;
  248. if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
  249. /* Make sure it's OK, and get offset. */
  250. len = desc.len;
  251. if (!rcheck(vrh, desc.addr, &len, &range, getrange)) {
  252. err = -EINVAL;
  253. goto fail;
  254. }
  255. if (unlikely(len != desc.len)) {
  256. slow = true;
  257. /* We need to save this range to use offset */
  258. slowrange = range;
  259. }
  260. addr = (void *)(long)(desc.addr + range.offset);
  261. err = move_to_indirect(&up_next, &i, addr, &desc,
  262. &descs, &desc_max);
  263. if (err)
  264. goto fail;
  265. continue;
  266. }
  267. if (count++ == vrh->vring.num) {
  268. vringh_bad("Descriptor loop in %p", descs);
  269. err = -ELOOP;
  270. goto fail;
  271. }
  272. if (desc.flags & VRING_DESC_F_WRITE)
  273. iov = wiov;
  274. else {
  275. iov = riov;
  276. if (unlikely(wiov && wiov->i)) {
  277. vringh_bad("Readable desc %p after writable",
  278. &descs[i]);
  279. err = -EINVAL;
  280. goto fail;
  281. }
  282. }
  283. if (!iov) {
  284. vringh_bad("Unexpected %s desc",
  285. !wiov ? "writable" : "readable");
  286. err = -EPROTO;
  287. goto fail;
  288. }
  289. again:
  290. /* Make sure it's OK, and get offset. */
  291. len = desc.len;
  292. if (!rcheck(vrh, desc.addr, &len, &range, getrange)) {
  293. err = -EINVAL;
  294. goto fail;
  295. }
  296. addr = (void *)(unsigned long)(desc.addr + range.offset);
  297. if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
  298. err = resize_iovec(iov, gfp);
  299. if (err)
  300. goto fail;
  301. }
  302. iov->iov[iov->used].iov_base = addr;
  303. iov->iov[iov->used].iov_len = len;
  304. iov->used++;
  305. if (unlikely(len != desc.len)) {
  306. desc.len -= len;
  307. desc.addr += len;
  308. goto again;
  309. }
  310. if (desc.flags & VRING_DESC_F_NEXT) {
  311. i = desc.next;
  312. } else {
  313. /* Just in case we need to finish traversing above. */
  314. if (unlikely(up_next > 0)) {
  315. i = return_from_indirect(vrh, &up_next,
  316. &descs, &desc_max);
  317. slow = false;
  318. } else
  319. break;
  320. }
  321. if (i >= desc_max) {
  322. vringh_bad("Chained index %u > %u", i, desc_max);
  323. err = -EINVAL;
  324. goto fail;
  325. }
  326. }
  327. return 0;
  328. fail:
  329. return err;
  330. }
  331. static inline int __vringh_complete(struct vringh *vrh,
  332. const struct vring_used_elem *used,
  333. unsigned int num_used,
  334. int (*putu16)(u16 *p, u16 val),
  335. int (*putused)(struct vring_used_elem *dst,
  336. const struct vring_used_elem
  337. *src, unsigned num))
  338. {
  339. struct vring_used *used_ring;
  340. int err;
  341. u16 used_idx, off;
  342. used_ring = vrh->vring.used;
  343. used_idx = vrh->last_used_idx + vrh->completed;
  344. off = used_idx % vrh->vring.num;
  345. /* Compiler knows num_used == 1 sometimes, hence extra check */
  346. if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) {
  347. u16 part = vrh->vring.num - off;
  348. err = putused(&used_ring->ring[off], used, part);
  349. if (!err)
  350. err = putused(&used_ring->ring[0], used + part,
  351. num_used - part);
  352. } else
  353. err = putused(&used_ring->ring[off], used, num_used);
  354. if (err) {
  355. vringh_bad("Failed to write %u used entries %u at %p",
  356. num_used, off, &used_ring->ring[off]);
  357. return err;
  358. }
  359. /* Make sure buffer is written before we update index. */
  360. virtio_wmb(vrh->weak_barriers);
  361. err = putu16(&vrh->vring.used->idx, used_idx + num_used);
  362. if (err) {
  363. vringh_bad("Failed to update used index at %p",
  364. &vrh->vring.used->idx);
  365. return err;
  366. }
  367. vrh->completed += num_used;
  368. return 0;
  369. }
  370. static inline int __vringh_need_notify(struct vringh *vrh,
  371. int (*getu16)(u16 *val, const u16 *p))
  372. {
  373. bool notify;
  374. u16 used_event;
  375. int err;
  376. /* Flush out used index update. This is paired with the
  377. * barrier that the Guest executes when enabling
  378. * interrupts. */
  379. virtio_mb(vrh->weak_barriers);
  380. /* Old-style, without event indices. */
  381. if (!vrh->event_indices) {
  382. u16 flags;
  383. err = getu16(&flags, &vrh->vring.avail->flags);
  384. if (err) {
  385. vringh_bad("Failed to get flags at %p",
  386. &vrh->vring.avail->flags);
  387. return err;
  388. }
  389. return (!(flags & VRING_AVAIL_F_NO_INTERRUPT));
  390. }
  391. /* Modern: we know when other side wants to know. */
  392. err = getu16(&used_event, &vring_used_event(&vrh->vring));
  393. if (err) {
  394. vringh_bad("Failed to get used event idx at %p",
  395. &vring_used_event(&vrh->vring));
  396. return err;
  397. }
  398. /* Just in case we added so many that we wrap. */
  399. if (unlikely(vrh->completed > 0xffff))
  400. notify = true;
  401. else
  402. notify = vring_need_event(used_event,
  403. vrh->last_used_idx + vrh->completed,
  404. vrh->last_used_idx);
  405. vrh->last_used_idx += vrh->completed;
  406. vrh->completed = 0;
  407. return notify;
  408. }
  409. static inline bool __vringh_notify_enable(struct vringh *vrh,
  410. int (*getu16)(u16 *val, const u16 *p),
  411. int (*putu16)(u16 *p, u16 val))
  412. {
  413. u16 avail;
  414. if (!vrh->event_indices) {
  415. /* Old-school; update flags. */
  416. if (putu16(&vrh->vring.used->flags, 0) != 0) {
  417. vringh_bad("Clearing used flags %p",
  418. &vrh->vring.used->flags);
  419. return true;
  420. }
  421. } else {
  422. if (putu16(&vring_avail_event(&vrh->vring),
  423. vrh->last_avail_idx) != 0) {
  424. vringh_bad("Updating avail event index %p",
  425. &vring_avail_event(&vrh->vring));
  426. return true;
  427. }
  428. }
  429. /* They could have slipped one in as we were doing that: make
  430. * sure it's written, then check again. */
  431. virtio_mb(vrh->weak_barriers);
  432. if (getu16(&avail, &vrh->vring.avail->idx) != 0) {
  433. vringh_bad("Failed to check avail idx at %p",
  434. &vrh->vring.avail->idx);
  435. return true;
  436. }
  437. /* This is unlikely, so we just leave notifications enabled
  438. * (if we're using event_indices, we'll only get one
  439. * notification anyway). */
  440. return avail == vrh->last_avail_idx;
  441. }
  442. static inline void __vringh_notify_disable(struct vringh *vrh,
  443. int (*putu16)(u16 *p, u16 val))
  444. {
  445. if (!vrh->event_indices) {
  446. /* Old-school; update flags. */
  447. if (putu16(&vrh->vring.used->flags, VRING_USED_F_NO_NOTIFY)) {
  448. vringh_bad("Setting used flags %p",
  449. &vrh->vring.used->flags);
  450. }
  451. }
  452. }
  453. /* Userspace access helpers: in this case, addresses are really userspace. */
  454. static inline int getu16_user(u16 *val, const u16 *p)
  455. {
  456. return get_user(*val, (__force u16 __user *)p);
  457. }
  458. static inline int putu16_user(u16 *p, u16 val)
  459. {
  460. return put_user(val, (__force u16 __user *)p);
  461. }
  462. static inline int copydesc_user(void *dst, const void *src, size_t len)
  463. {
  464. return copy_from_user(dst, (__force void __user *)src, len) ?
  465. -EFAULT : 0;
  466. }
  467. static inline int putused_user(struct vring_used_elem *dst,
  468. const struct vring_used_elem *src,
  469. unsigned int num)
  470. {
  471. return copy_to_user((__force void __user *)dst, src,
  472. sizeof(*dst) * num) ? -EFAULT : 0;
  473. }
  474. static inline int xfer_from_user(void *src, void *dst, size_t len)
  475. {
  476. return copy_from_user(dst, (__force void __user *)src, len) ?
  477. -EFAULT : 0;
  478. }
  479. static inline int xfer_to_user(void *dst, void *src, size_t len)
  480. {
  481. return copy_to_user((__force void __user *)dst, src, len) ?
  482. -EFAULT : 0;
  483. }
  484. /**
  485. * vringh_init_user - initialize a vringh for a userspace vring.
  486. * @vrh: the vringh to initialize.
  487. * @features: the feature bits for this ring.
  488. * @num: the number of elements.
  489. * @weak_barriers: true if we only need memory barriers, not I/O.
  490. * @desc: the userpace descriptor pointer.
  491. * @avail: the userpace avail pointer.
  492. * @used: the userpace used pointer.
  493. *
  494. * Returns an error if num is invalid: you should check pointers
  495. * yourself!
  496. */
  497. int vringh_init_user(struct vringh *vrh, u32 features,
  498. unsigned int num, bool weak_barriers,
  499. struct vring_desc __user *desc,
  500. struct vring_avail __user *avail,
  501. struct vring_used __user *used)
  502. {
  503. /* Sane power of 2 please! */
  504. if (!num || num > 0xffff || (num & (num - 1))) {
  505. vringh_bad("Bad ring size %u", num);
  506. return -EINVAL;
  507. }
  508. vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
  509. vrh->weak_barriers = weak_barriers;
  510. vrh->completed = 0;
  511. vrh->last_avail_idx = 0;
  512. vrh->last_used_idx = 0;
  513. vrh->vring.num = num;
  514. /* vring expects kernel addresses, but only used via accessors. */
  515. vrh->vring.desc = (__force struct vring_desc *)desc;
  516. vrh->vring.avail = (__force struct vring_avail *)avail;
  517. vrh->vring.used = (__force struct vring_used *)used;
  518. return 0;
  519. }
  520. EXPORT_SYMBOL(vringh_init_user);
  521. /**
  522. * vringh_getdesc_user - get next available descriptor from userspace ring.
  523. * @vrh: the userspace vring.
  524. * @riov: where to put the readable descriptors (or NULL)
  525. * @wiov: where to put the writable descriptors (or NULL)
  526. * @getrange: function to call to check ranges.
  527. * @head: head index we received, for passing to vringh_complete_user().
  528. *
  529. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  530. *
  531. * Note that on error return, you can tell the difference between an
  532. * invalid ring and a single invalid descriptor: in the former case,
  533. * *head will be vrh->vring.num. You may be able to ignore an invalid
  534. * descriptor, but there's not much you can do with an invalid ring.
  535. *
  536. * Note that you may need to clean up riov and wiov, even on error!
  537. */
  538. int vringh_getdesc_user(struct vringh *vrh,
  539. struct vringh_iov *riov,
  540. struct vringh_iov *wiov,
  541. bool (*getrange)(struct vringh *vrh,
  542. u64 addr, struct vringh_range *r),
  543. u16 *head)
  544. {
  545. int err;
  546. *head = vrh->vring.num;
  547. err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx);
  548. if (err < 0)
  549. return err;
  550. /* Empty... */
  551. if (err == vrh->vring.num)
  552. return 0;
  553. /* We need the layouts to be the identical for this to work */
  554. BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov));
  555. BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) !=
  556. offsetof(struct vringh_iov, iov));
  557. BUILD_BUG_ON(offsetof(struct vringh_kiov, i) !=
  558. offsetof(struct vringh_iov, i));
  559. BUILD_BUG_ON(offsetof(struct vringh_kiov, used) !=
  560. offsetof(struct vringh_iov, used));
  561. BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) !=
  562. offsetof(struct vringh_iov, max_num));
  563. BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
  564. BUILD_BUG_ON(offsetof(struct iovec, iov_base) !=
  565. offsetof(struct kvec, iov_base));
  566. BUILD_BUG_ON(offsetof(struct iovec, iov_len) !=
  567. offsetof(struct kvec, iov_len));
  568. BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base)
  569. != sizeof(((struct kvec *)NULL)->iov_base));
  570. BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len)
  571. != sizeof(((struct kvec *)NULL)->iov_len));
  572. *head = err;
  573. err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov,
  574. (struct vringh_kiov *)wiov,
  575. range_check, getrange, GFP_KERNEL, copydesc_user);
  576. if (err)
  577. return err;
  578. return 1;
  579. }
  580. EXPORT_SYMBOL(vringh_getdesc_user);
  581. /**
  582. * vringh_iov_pull_user - copy bytes from vring_iov.
  583. * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume)
  584. * @dst: the place to copy.
  585. * @len: the maximum length to copy.
  586. *
  587. * Returns the bytes copied <= len or a negative errno.
  588. */
  589. ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len)
  590. {
  591. return vringh_iov_xfer((struct vringh_kiov *)riov,
  592. dst, len, xfer_from_user);
  593. }
  594. EXPORT_SYMBOL(vringh_iov_pull_user);
  595. /**
  596. * vringh_iov_push_user - copy bytes into vring_iov.
  597. * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume)
  598. * @dst: the place to copy.
  599. * @len: the maximum length to copy.
  600. *
  601. * Returns the bytes copied <= len or a negative errno.
  602. */
  603. ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
  604. const void *src, size_t len)
  605. {
  606. return vringh_iov_xfer((struct vringh_kiov *)wiov,
  607. (void *)src, len, xfer_to_user);
  608. }
  609. EXPORT_SYMBOL(vringh_iov_push_user);
  610. /**
  611. * vringh_abandon_user - we've decided not to handle the descriptor(s).
  612. * @vrh: the vring.
  613. * @num: the number of descriptors to put back (ie. num
  614. * vringh_get_user() to undo).
  615. *
  616. * The next vringh_get_user() will return the old descriptor(s) again.
  617. */
  618. void vringh_abandon_user(struct vringh *vrh, unsigned int num)
  619. {
  620. /* We only update vring_avail_event(vr) when we want to be notified,
  621. * so we haven't changed that yet. */
  622. vrh->last_avail_idx -= num;
  623. }
  624. EXPORT_SYMBOL(vringh_abandon_user);
  625. /**
  626. * vringh_complete_user - we've finished with descriptor, publish it.
  627. * @vrh: the vring.
  628. * @head: the head as filled in by vringh_getdesc_user.
  629. * @len: the length of data we have written.
  630. *
  631. * You should check vringh_need_notify_user() after one or more calls
  632. * to this function.
  633. */
  634. int vringh_complete_user(struct vringh *vrh, u16 head, u32 len)
  635. {
  636. struct vring_used_elem used;
  637. used.id = head;
  638. used.len = len;
  639. return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
  640. }
  641. EXPORT_SYMBOL(vringh_complete_user);
  642. /**
  643. * vringh_complete_multi_user - we've finished with many descriptors.
  644. * @vrh: the vring.
  645. * @used: the head, length pairs.
  646. * @num_used: the number of used elements.
  647. *
  648. * You should check vringh_need_notify_user() after one or more calls
  649. * to this function.
  650. */
  651. int vringh_complete_multi_user(struct vringh *vrh,
  652. const struct vring_used_elem used[],
  653. unsigned num_used)
  654. {
  655. return __vringh_complete(vrh, used, num_used,
  656. putu16_user, putused_user);
  657. }
  658. EXPORT_SYMBOL(vringh_complete_multi_user);
  659. /**
  660. * vringh_notify_enable_user - we want to know if something changes.
  661. * @vrh: the vring.
  662. *
  663. * This always enables notifications, but returns false if there are
  664. * now more buffers available in the vring.
  665. */
  666. bool vringh_notify_enable_user(struct vringh *vrh)
  667. {
  668. return __vringh_notify_enable(vrh, getu16_user, putu16_user);
  669. }
  670. EXPORT_SYMBOL(vringh_notify_enable_user);
  671. /**
  672. * vringh_notify_disable_user - don't tell us if something changes.
  673. * @vrh: the vring.
  674. *
  675. * This is our normal running state: we disable and then only enable when
  676. * we're going to sleep.
  677. */
  678. void vringh_notify_disable_user(struct vringh *vrh)
  679. {
  680. __vringh_notify_disable(vrh, putu16_user);
  681. }
  682. EXPORT_SYMBOL(vringh_notify_disable_user);
  683. /**
  684. * vringh_need_notify_user - must we tell the other side about used buffers?
  685. * @vrh: the vring we've called vringh_complete_user() on.
  686. *
  687. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  688. */
  689. int vringh_need_notify_user(struct vringh *vrh)
  690. {
  691. return __vringh_need_notify(vrh, getu16_user);
  692. }
  693. EXPORT_SYMBOL(vringh_need_notify_user);
  694. /* Kernelspace access helpers. */
  695. static inline int getu16_kern(u16 *val, const u16 *p)
  696. {
  697. *val = ACCESS_ONCE(*p);
  698. return 0;
  699. }
  700. static inline int putu16_kern(u16 *p, u16 val)
  701. {
  702. ACCESS_ONCE(*p) = val;
  703. return 0;
  704. }
  705. static inline int copydesc_kern(void *dst, const void *src, size_t len)
  706. {
  707. memcpy(dst, src, len);
  708. return 0;
  709. }
  710. static inline int putused_kern(struct vring_used_elem *dst,
  711. const struct vring_used_elem *src,
  712. unsigned int num)
  713. {
  714. memcpy(dst, src, num * sizeof(*dst));
  715. return 0;
  716. }
  717. static inline int xfer_kern(void *src, void *dst, size_t len)
  718. {
  719. memcpy(dst, src, len);
  720. return 0;
  721. }
  722. /**
  723. * vringh_init_kern - initialize a vringh for a kernelspace vring.
  724. * @vrh: the vringh to initialize.
  725. * @features: the feature bits for this ring.
  726. * @num: the number of elements.
  727. * @weak_barriers: true if we only need memory barriers, not I/O.
  728. * @desc: the userpace descriptor pointer.
  729. * @avail: the userpace avail pointer.
  730. * @used: the userpace used pointer.
  731. *
  732. * Returns an error if num is invalid.
  733. */
  734. int vringh_init_kern(struct vringh *vrh, u32 features,
  735. unsigned int num, bool weak_barriers,
  736. struct vring_desc *desc,
  737. struct vring_avail *avail,
  738. struct vring_used *used)
  739. {
  740. /* Sane power of 2 please! */
  741. if (!num || num > 0xffff || (num & (num - 1))) {
  742. vringh_bad("Bad ring size %u", num);
  743. return -EINVAL;
  744. }
  745. vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
  746. vrh->weak_barriers = weak_barriers;
  747. vrh->completed = 0;
  748. vrh->last_avail_idx = 0;
  749. vrh->last_used_idx = 0;
  750. vrh->vring.num = num;
  751. vrh->vring.desc = desc;
  752. vrh->vring.avail = avail;
  753. vrh->vring.used = used;
  754. return 0;
  755. }
  756. EXPORT_SYMBOL(vringh_init_kern);
  757. /**
  758. * vringh_getdesc_kern - get next available descriptor from kernelspace ring.
  759. * @vrh: the kernelspace vring.
  760. * @riov: where to put the readable descriptors (or NULL)
  761. * @wiov: where to put the writable descriptors (or NULL)
  762. * @head: head index we received, for passing to vringh_complete_kern().
  763. * @gfp: flags for allocating larger riov/wiov.
  764. *
  765. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  766. *
  767. * Note that on error return, you can tell the difference between an
  768. * invalid ring and a single invalid descriptor: in the former case,
  769. * *head will be vrh->vring.num. You may be able to ignore an invalid
  770. * descriptor, but there's not much you can do with an invalid ring.
  771. *
  772. * Note that you may need to clean up riov and wiov, even on error!
  773. */
  774. int vringh_getdesc_kern(struct vringh *vrh,
  775. struct vringh_kiov *riov,
  776. struct vringh_kiov *wiov,
  777. u16 *head,
  778. gfp_t gfp)
  779. {
  780. int err;
  781. err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx);
  782. if (err < 0)
  783. return err;
  784. /* Empty... */
  785. if (err == vrh->vring.num)
  786. return 0;
  787. *head = err;
  788. err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
  789. gfp, copydesc_kern);
  790. if (err)
  791. return err;
  792. return 1;
  793. }
  794. EXPORT_SYMBOL(vringh_getdesc_kern);
  795. /**
  796. * vringh_iov_pull_kern - copy bytes from vring_iov.
  797. * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume)
  798. * @dst: the place to copy.
  799. * @len: the maximum length to copy.
  800. *
  801. * Returns the bytes copied <= len or a negative errno.
  802. */
  803. ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len)
  804. {
  805. return vringh_iov_xfer(riov, dst, len, xfer_kern);
  806. }
  807. EXPORT_SYMBOL(vringh_iov_pull_kern);
  808. /**
  809. * vringh_iov_push_kern - copy bytes into vring_iov.
  810. * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
  811. * @dst: the place to copy.
  812. * @len: the maximum length to copy.
  813. *
  814. * Returns the bytes copied <= len or a negative errno.
  815. */
  816. ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
  817. const void *src, size_t len)
  818. {
  819. return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern);
  820. }
  821. EXPORT_SYMBOL(vringh_iov_push_kern);
  822. /**
  823. * vringh_abandon_kern - we've decided not to handle the descriptor(s).
  824. * @vrh: the vring.
  825. * @num: the number of descriptors to put back (ie. num
  826. * vringh_get_kern() to undo).
  827. *
  828. * The next vringh_get_kern() will return the old descriptor(s) again.
  829. */
  830. void vringh_abandon_kern(struct vringh *vrh, unsigned int num)
  831. {
  832. /* We only update vring_avail_event(vr) when we want to be notified,
  833. * so we haven't changed that yet. */
  834. vrh->last_avail_idx -= num;
  835. }
  836. EXPORT_SYMBOL(vringh_abandon_kern);
  837. /**
  838. * vringh_complete_kern - we've finished with descriptor, publish it.
  839. * @vrh: the vring.
  840. * @head: the head as filled in by vringh_getdesc_kern.
  841. * @len: the length of data we have written.
  842. *
  843. * You should check vringh_need_notify_kern() after one or more calls
  844. * to this function.
  845. */
  846. int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len)
  847. {
  848. struct vring_used_elem used;
  849. used.id = head;
  850. used.len = len;
  851. return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
  852. }
  853. EXPORT_SYMBOL(vringh_complete_kern);
  854. /**
  855. * vringh_notify_enable_kern - we want to know if something changes.
  856. * @vrh: the vring.
  857. *
  858. * This always enables notifications, but returns false if there are
  859. * now more buffers available in the vring.
  860. */
  861. bool vringh_notify_enable_kern(struct vringh *vrh)
  862. {
  863. return __vringh_notify_enable(vrh, getu16_kern, putu16_kern);
  864. }
  865. EXPORT_SYMBOL(vringh_notify_enable_kern);
  866. /**
  867. * vringh_notify_disable_kern - don't tell us if something changes.
  868. * @vrh: the vring.
  869. *
  870. * This is our normal running state: we disable and then only enable when
  871. * we're going to sleep.
  872. */
  873. void vringh_notify_disable_kern(struct vringh *vrh)
  874. {
  875. __vringh_notify_disable(vrh, putu16_kern);
  876. }
  877. EXPORT_SYMBOL(vringh_notify_disable_kern);
  878. /**
  879. * vringh_need_notify_kern - must we tell the other side about used buffers?
  880. * @vrh: the vring we've called vringh_complete_kern() on.
  881. *
  882. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  883. */
  884. int vringh_need_notify_kern(struct vringh *vrh)
  885. {
  886. return __vringh_need_notify(vrh, getu16_kern);
  887. }
  888. EXPORT_SYMBOL(vringh_need_notify_kern);