vringh.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010
  1. /*
  2. * Helpers for the host side of a virtio ring.
  3. *
  4. * Since these may be in userspace, we use (inline) accessors.
  5. */
  6. #include <linux/module.h>
  7. #include <linux/vringh.h>
  8. #include <linux/virtio_ring.h>
  9. #include <linux/kernel.h>
  10. #include <linux/ratelimit.h>
  11. #include <linux/uaccess.h>
  12. #include <linux/slab.h>
  13. #include <linux/export.h>
  14. static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
  15. {
  16. static DEFINE_RATELIMIT_STATE(vringh_rs,
  17. DEFAULT_RATELIMIT_INTERVAL,
  18. DEFAULT_RATELIMIT_BURST);
  19. if (__ratelimit(&vringh_rs)) {
  20. va_list ap;
  21. va_start(ap, fmt);
  22. printk(KERN_NOTICE "vringh:");
  23. vprintk(fmt, ap);
  24. va_end(ap);
  25. }
  26. }
  27. /* Returns vring->num if empty, -ve on error. */
  28. static inline int __vringh_get_head(const struct vringh *vrh,
  29. int (*getu16)(u16 *val, const u16 *p),
  30. u16 *last_avail_idx)
  31. {
  32. u16 avail_idx, i, head;
  33. int err;
  34. err = getu16(&avail_idx, &vrh->vring.avail->idx);
  35. if (err) {
  36. vringh_bad("Failed to access avail idx at %p",
  37. &vrh->vring.avail->idx);
  38. return err;
  39. }
  40. if (*last_avail_idx == avail_idx)
  41. return vrh->vring.num;
  42. /* Only get avail ring entries after they have been exposed by guest. */
  43. virtio_rmb(vrh->weak_barriers);
  44. i = *last_avail_idx & (vrh->vring.num - 1);
  45. err = getu16(&head, &vrh->vring.avail->ring[i]);
  46. if (err) {
  47. vringh_bad("Failed to read head: idx %d address %p",
  48. *last_avail_idx, &vrh->vring.avail->ring[i]);
  49. return err;
  50. }
  51. if (head >= vrh->vring.num) {
  52. vringh_bad("Guest says index %u > %u is available",
  53. head, vrh->vring.num);
  54. return -EINVAL;
  55. }
  56. (*last_avail_idx)++;
  57. return head;
  58. }
  59. /* Copy some bytes to/from the iovec. Returns num copied. */
  60. static inline ssize_t vringh_iov_xfer(struct vringh_kiov *iov,
  61. void *ptr, size_t len,
  62. int (*xfer)(void *addr, void *ptr,
  63. size_t len))
  64. {
  65. int err, done = 0;
  66. while (len && iov->i < iov->used) {
  67. size_t partlen;
  68. partlen = min(iov->iov[iov->i].iov_len, len);
  69. err = xfer(iov->iov[iov->i].iov_base, ptr, partlen);
  70. if (err)
  71. return err;
  72. done += partlen;
  73. len -= partlen;
  74. ptr += partlen;
  75. iov->consumed += partlen;
  76. iov->iov[iov->i].iov_len -= partlen;
  77. iov->iov[iov->i].iov_base += partlen;
  78. if (!iov->iov[iov->i].iov_len) {
  79. /* Fix up old iov element then increment. */
  80. iov->iov[iov->i].iov_len = iov->consumed;
  81. iov->iov[iov->i].iov_base -= iov->consumed;
  82. iov->consumed = 0;
  83. iov->i++;
  84. }
  85. }
  86. return done;
  87. }
  88. /* May reduce *len if range is shorter. */
  89. static inline bool range_check(struct vringh *vrh, u64 addr, size_t *len,
  90. struct vringh_range *range,
  91. bool (*getrange)(struct vringh *,
  92. u64, struct vringh_range *))
  93. {
  94. if (addr < range->start || addr > range->end_incl) {
  95. if (!getrange(vrh, addr, range))
  96. return false;
  97. }
  98. BUG_ON(addr < range->start || addr > range->end_incl);
  99. /* To end of memory? */
  100. if (unlikely(addr + *len == 0)) {
  101. if (range->end_incl == -1ULL)
  102. return true;
  103. goto truncate;
  104. }
  105. /* Otherwise, don't wrap. */
  106. if (addr + *len < addr) {
  107. vringh_bad("Wrapping descriptor %zu@0x%llx",
  108. *len, (unsigned long long)addr);
  109. return false;
  110. }
  111. if (unlikely(addr + *len - 1 > range->end_incl))
  112. goto truncate;
  113. return true;
  114. truncate:
  115. *len = range->end_incl + 1 - addr;
  116. return true;
  117. }
  118. static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len,
  119. struct vringh_range *range,
  120. bool (*getrange)(struct vringh *,
  121. u64, struct vringh_range *))
  122. {
  123. return true;
  124. }
  125. /* No reason for this code to be inline. */
  126. static int move_to_indirect(int *up_next, u16 *i, void *addr,
  127. const struct vring_desc *desc,
  128. struct vring_desc **descs, int *desc_max)
  129. {
  130. /* Indirect tables can't have indirect. */
  131. if (*up_next != -1) {
  132. vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
  133. return -EINVAL;
  134. }
  135. if (unlikely(desc->len % sizeof(struct vring_desc))) {
  136. vringh_bad("Strange indirect len %u", desc->len);
  137. return -EINVAL;
  138. }
  139. /* We will check this when we follow it! */
  140. if (desc->flags & VRING_DESC_F_NEXT)
  141. *up_next = desc->next;
  142. else
  143. *up_next = -2;
  144. *descs = addr;
  145. *desc_max = desc->len / sizeof(struct vring_desc);
  146. /* Now, start at the first indirect. */
  147. *i = 0;
  148. return 0;
  149. }
  150. static int resize_iovec(struct vringh_kiov *iov, gfp_t gfp)
  151. {
  152. struct kvec *new;
  153. unsigned int flag, new_num = (iov->max_num & ~VRINGH_IOV_ALLOCATED) * 2;
  154. if (new_num < 8)
  155. new_num = 8;
  156. flag = (iov->max_num & VRINGH_IOV_ALLOCATED);
  157. if (flag)
  158. new = krealloc(iov->iov, new_num * sizeof(struct iovec), gfp);
  159. else {
  160. new = kmalloc(new_num * sizeof(struct iovec), gfp);
  161. if (new) {
  162. memcpy(new, iov->iov,
  163. iov->max_num * sizeof(struct iovec));
  164. flag = VRINGH_IOV_ALLOCATED;
  165. }
  166. }
  167. if (!new)
  168. return -ENOMEM;
  169. iov->iov = new;
  170. iov->max_num = (new_num | flag);
  171. return 0;
  172. }
  173. static u16 __cold return_from_indirect(const struct vringh *vrh, int *up_next,
  174. struct vring_desc **descs, int *desc_max)
  175. {
  176. u16 i = *up_next;
  177. *up_next = -1;
  178. *descs = vrh->vring.desc;
  179. *desc_max = vrh->vring.num;
  180. return i;
  181. }
  182. static int slow_copy(struct vringh *vrh, void *dst, const void *src,
  183. bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
  184. struct vringh_range *range,
  185. bool (*getrange)(struct vringh *vrh,
  186. u64,
  187. struct vringh_range *)),
  188. bool (*getrange)(struct vringh *vrh,
  189. u64 addr,
  190. struct vringh_range *r),
  191. struct vringh_range *range,
  192. int (*copy)(void *dst, const void *src, size_t len))
  193. {
  194. size_t part, len = sizeof(struct vring_desc);
  195. do {
  196. u64 addr;
  197. int err;
  198. part = len;
  199. addr = (u64)(unsigned long)src - range->offset;
  200. if (!rcheck(vrh, addr, &part, range, getrange))
  201. return -EINVAL;
  202. err = copy(dst, src, part);
  203. if (err)
  204. return err;
  205. dst += part;
  206. src += part;
  207. len -= part;
  208. } while (len);
  209. return 0;
  210. }
  211. static inline int
  212. __vringh_iov(struct vringh *vrh, u16 i,
  213. struct vringh_kiov *riov,
  214. struct vringh_kiov *wiov,
  215. bool (*rcheck)(struct vringh *vrh, u64 addr, size_t *len,
  216. struct vringh_range *range,
  217. bool (*getrange)(struct vringh *, u64,
  218. struct vringh_range *)),
  219. bool (*getrange)(struct vringh *, u64, struct vringh_range *),
  220. gfp_t gfp,
  221. int (*copy)(void *dst, const void *src, size_t len))
  222. {
  223. int err, count = 0, up_next, desc_max;
  224. struct vring_desc desc, *descs;
  225. struct vringh_range range = { -1ULL, 0 }, slowrange;
  226. bool slow = false;
  227. /* We start traversing vring's descriptor table. */
  228. descs = vrh->vring.desc;
  229. desc_max = vrh->vring.num;
  230. up_next = -1;
  231. if (riov)
  232. riov->i = riov->used = 0;
  233. else if (wiov)
  234. wiov->i = wiov->used = 0;
  235. else
  236. /* You must want something! */
  237. BUG();
  238. for (;;) {
  239. void *addr;
  240. struct vringh_kiov *iov;
  241. size_t len;
  242. if (unlikely(slow))
  243. err = slow_copy(vrh, &desc, &descs[i], rcheck, getrange,
  244. &slowrange, copy);
  245. else
  246. err = copy(&desc, &descs[i], sizeof(desc));
  247. if (unlikely(err))
  248. goto fail;
  249. if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
  250. /* Make sure it's OK, and get offset. */
  251. len = desc.len;
  252. if (!rcheck(vrh, desc.addr, &len, &range, getrange)) {
  253. err = -EINVAL;
  254. goto fail;
  255. }
  256. if (unlikely(len != desc.len)) {
  257. slow = true;
  258. /* We need to save this range to use offset */
  259. slowrange = range;
  260. }
  261. addr = (void *)(long)(desc.addr + range.offset);
  262. err = move_to_indirect(&up_next, &i, addr, &desc,
  263. &descs, &desc_max);
  264. if (err)
  265. goto fail;
  266. continue;
  267. }
  268. if (count++ == vrh->vring.num) {
  269. vringh_bad("Descriptor loop in %p", descs);
  270. err = -ELOOP;
  271. goto fail;
  272. }
  273. if (desc.flags & VRING_DESC_F_WRITE)
  274. iov = wiov;
  275. else {
  276. iov = riov;
  277. if (unlikely(wiov && wiov->i)) {
  278. vringh_bad("Readable desc %p after writable",
  279. &descs[i]);
  280. err = -EINVAL;
  281. goto fail;
  282. }
  283. }
  284. if (!iov) {
  285. vringh_bad("Unexpected %s desc",
  286. !wiov ? "writable" : "readable");
  287. err = -EPROTO;
  288. goto fail;
  289. }
  290. again:
  291. /* Make sure it's OK, and get offset. */
  292. len = desc.len;
  293. if (!rcheck(vrh, desc.addr, &len, &range, getrange)) {
  294. err = -EINVAL;
  295. goto fail;
  296. }
  297. addr = (void *)(unsigned long)(desc.addr + range.offset);
  298. if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
  299. err = resize_iovec(iov, gfp);
  300. if (err)
  301. goto fail;
  302. }
  303. iov->iov[iov->used].iov_base = addr;
  304. iov->iov[iov->used].iov_len = len;
  305. iov->used++;
  306. if (unlikely(len != desc.len)) {
  307. desc.len -= len;
  308. desc.addr += len;
  309. goto again;
  310. }
  311. if (desc.flags & VRING_DESC_F_NEXT) {
  312. i = desc.next;
  313. } else {
  314. /* Just in case we need to finish traversing above. */
  315. if (unlikely(up_next > 0)) {
  316. i = return_from_indirect(vrh, &up_next,
  317. &descs, &desc_max);
  318. slow = false;
  319. } else
  320. break;
  321. }
  322. if (i >= desc_max) {
  323. vringh_bad("Chained index %u > %u", i, desc_max);
  324. err = -EINVAL;
  325. goto fail;
  326. }
  327. }
  328. return 0;
  329. fail:
  330. return err;
  331. }
  332. static inline int __vringh_complete(struct vringh *vrh,
  333. const struct vring_used_elem *used,
  334. unsigned int num_used,
  335. int (*putu16)(u16 *p, u16 val),
  336. int (*putused)(struct vring_used_elem *dst,
  337. const struct vring_used_elem
  338. *src, unsigned num))
  339. {
  340. struct vring_used *used_ring;
  341. int err;
  342. u16 used_idx, off;
  343. used_ring = vrh->vring.used;
  344. used_idx = vrh->last_used_idx + vrh->completed;
  345. off = used_idx % vrh->vring.num;
  346. /* Compiler knows num_used == 1 sometimes, hence extra check */
  347. if (num_used > 1 && unlikely(off + num_used >= vrh->vring.num)) {
  348. u16 part = vrh->vring.num - off;
  349. err = putused(&used_ring->ring[off], used, part);
  350. if (!err)
  351. err = putused(&used_ring->ring[0], used + part,
  352. num_used - part);
  353. } else
  354. err = putused(&used_ring->ring[off], used, num_used);
  355. if (err) {
  356. vringh_bad("Failed to write %u used entries %u at %p",
  357. num_used, off, &used_ring->ring[off]);
  358. return err;
  359. }
  360. /* Make sure buffer is written before we update index. */
  361. virtio_wmb(vrh->weak_barriers);
  362. err = putu16(&vrh->vring.used->idx, used_idx + num_used);
  363. if (err) {
  364. vringh_bad("Failed to update used index at %p",
  365. &vrh->vring.used->idx);
  366. return err;
  367. }
  368. vrh->completed += num_used;
  369. return 0;
  370. }
  371. static inline int __vringh_need_notify(struct vringh *vrh,
  372. int (*getu16)(u16 *val, const u16 *p))
  373. {
  374. bool notify;
  375. u16 used_event;
  376. int err;
  377. /* Flush out used index update. This is paired with the
  378. * barrier that the Guest executes when enabling
  379. * interrupts. */
  380. virtio_mb(vrh->weak_barriers);
  381. /* Old-style, without event indices. */
  382. if (!vrh->event_indices) {
  383. u16 flags;
  384. err = getu16(&flags, &vrh->vring.avail->flags);
  385. if (err) {
  386. vringh_bad("Failed to get flags at %p",
  387. &vrh->vring.avail->flags);
  388. return err;
  389. }
  390. return (!(flags & VRING_AVAIL_F_NO_INTERRUPT));
  391. }
  392. /* Modern: we know when other side wants to know. */
  393. err = getu16(&used_event, &vring_used_event(&vrh->vring));
  394. if (err) {
  395. vringh_bad("Failed to get used event idx at %p",
  396. &vring_used_event(&vrh->vring));
  397. return err;
  398. }
  399. /* Just in case we added so many that we wrap. */
  400. if (unlikely(vrh->completed > 0xffff))
  401. notify = true;
  402. else
  403. notify = vring_need_event(used_event,
  404. vrh->last_used_idx + vrh->completed,
  405. vrh->last_used_idx);
  406. vrh->last_used_idx += vrh->completed;
  407. vrh->completed = 0;
  408. return notify;
  409. }
  410. static inline bool __vringh_notify_enable(struct vringh *vrh,
  411. int (*getu16)(u16 *val, const u16 *p),
  412. int (*putu16)(u16 *p, u16 val))
  413. {
  414. u16 avail;
  415. if (!vrh->event_indices) {
  416. /* Old-school; update flags. */
  417. if (putu16(&vrh->vring.used->flags, 0) != 0) {
  418. vringh_bad("Clearing used flags %p",
  419. &vrh->vring.used->flags);
  420. return true;
  421. }
  422. } else {
  423. if (putu16(&vring_avail_event(&vrh->vring),
  424. vrh->last_avail_idx) != 0) {
  425. vringh_bad("Updating avail event index %p",
  426. &vring_avail_event(&vrh->vring));
  427. return true;
  428. }
  429. }
  430. /* They could have slipped one in as we were doing that: make
  431. * sure it's written, then check again. */
  432. virtio_mb(vrh->weak_barriers);
  433. if (getu16(&avail, &vrh->vring.avail->idx) != 0) {
  434. vringh_bad("Failed to check avail idx at %p",
  435. &vrh->vring.avail->idx);
  436. return true;
  437. }
  438. /* This is unlikely, so we just leave notifications enabled
  439. * (if we're using event_indices, we'll only get one
  440. * notification anyway). */
  441. return avail == vrh->last_avail_idx;
  442. }
  443. static inline void __vringh_notify_disable(struct vringh *vrh,
  444. int (*putu16)(u16 *p, u16 val))
  445. {
  446. if (!vrh->event_indices) {
  447. /* Old-school; update flags. */
  448. if (putu16(&vrh->vring.used->flags, VRING_USED_F_NO_NOTIFY)) {
  449. vringh_bad("Setting used flags %p",
  450. &vrh->vring.used->flags);
  451. }
  452. }
  453. }
  454. /* Userspace access helpers: in this case, addresses are really userspace. */
  455. static inline int getu16_user(u16 *val, const u16 *p)
  456. {
  457. return get_user(*val, (__force u16 __user *)p);
  458. }
  459. static inline int putu16_user(u16 *p, u16 val)
  460. {
  461. return put_user(val, (__force u16 __user *)p);
  462. }
  463. static inline int copydesc_user(void *dst, const void *src, size_t len)
  464. {
  465. return copy_from_user(dst, (__force void __user *)src, len) ?
  466. -EFAULT : 0;
  467. }
  468. static inline int putused_user(struct vring_used_elem *dst,
  469. const struct vring_used_elem *src,
  470. unsigned int num)
  471. {
  472. return copy_to_user((__force void __user *)dst, src,
  473. sizeof(*dst) * num) ? -EFAULT : 0;
  474. }
  475. static inline int xfer_from_user(void *src, void *dst, size_t len)
  476. {
  477. return copy_from_user(dst, (__force void __user *)src, len) ?
  478. -EFAULT : 0;
  479. }
  480. static inline int xfer_to_user(void *dst, void *src, size_t len)
  481. {
  482. return copy_to_user((__force void __user *)dst, src, len) ?
  483. -EFAULT : 0;
  484. }
  485. /**
  486. * vringh_init_user - initialize a vringh for a userspace vring.
  487. * @vrh: the vringh to initialize.
  488. * @features: the feature bits for this ring.
  489. * @num: the number of elements.
  490. * @weak_barriers: true if we only need memory barriers, not I/O.
  491. * @desc: the userpace descriptor pointer.
  492. * @avail: the userpace avail pointer.
  493. * @used: the userpace used pointer.
  494. *
  495. * Returns an error if num is invalid: you should check pointers
  496. * yourself!
  497. */
  498. int vringh_init_user(struct vringh *vrh, u32 features,
  499. unsigned int num, bool weak_barriers,
  500. struct vring_desc __user *desc,
  501. struct vring_avail __user *avail,
  502. struct vring_used __user *used)
  503. {
  504. /* Sane power of 2 please! */
  505. if (!num || num > 0xffff || (num & (num - 1))) {
  506. vringh_bad("Bad ring size %u", num);
  507. return -EINVAL;
  508. }
  509. vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
  510. vrh->weak_barriers = weak_barriers;
  511. vrh->completed = 0;
  512. vrh->last_avail_idx = 0;
  513. vrh->last_used_idx = 0;
  514. vrh->vring.num = num;
  515. /* vring expects kernel addresses, but only used via accessors. */
  516. vrh->vring.desc = (__force struct vring_desc *)desc;
  517. vrh->vring.avail = (__force struct vring_avail *)avail;
  518. vrh->vring.used = (__force struct vring_used *)used;
  519. return 0;
  520. }
  521. EXPORT_SYMBOL(vringh_init_user);
  522. /**
  523. * vringh_getdesc_user - get next available descriptor from userspace ring.
  524. * @vrh: the userspace vring.
  525. * @riov: where to put the readable descriptors (or NULL)
  526. * @wiov: where to put the writable descriptors (or NULL)
  527. * @getrange: function to call to check ranges.
  528. * @head: head index we received, for passing to vringh_complete_user().
  529. *
  530. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  531. *
  532. * Note that on error return, you can tell the difference between an
  533. * invalid ring and a single invalid descriptor: in the former case,
  534. * *head will be vrh->vring.num. You may be able to ignore an invalid
  535. * descriptor, but there's not much you can do with an invalid ring.
  536. *
  537. * Note that you may need to clean up riov and wiov, even on error!
  538. */
  539. int vringh_getdesc_user(struct vringh *vrh,
  540. struct vringh_iov *riov,
  541. struct vringh_iov *wiov,
  542. bool (*getrange)(struct vringh *vrh,
  543. u64 addr, struct vringh_range *r),
  544. u16 *head)
  545. {
  546. int err;
  547. *head = vrh->vring.num;
  548. err = __vringh_get_head(vrh, getu16_user, &vrh->last_avail_idx);
  549. if (err < 0)
  550. return err;
  551. /* Empty... */
  552. if (err == vrh->vring.num)
  553. return 0;
  554. /* We need the layouts to be the identical for this to work */
  555. BUILD_BUG_ON(sizeof(struct vringh_kiov) != sizeof(struct vringh_iov));
  556. BUILD_BUG_ON(offsetof(struct vringh_kiov, iov) !=
  557. offsetof(struct vringh_iov, iov));
  558. BUILD_BUG_ON(offsetof(struct vringh_kiov, i) !=
  559. offsetof(struct vringh_iov, i));
  560. BUILD_BUG_ON(offsetof(struct vringh_kiov, used) !=
  561. offsetof(struct vringh_iov, used));
  562. BUILD_BUG_ON(offsetof(struct vringh_kiov, max_num) !=
  563. offsetof(struct vringh_iov, max_num));
  564. BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
  565. BUILD_BUG_ON(offsetof(struct iovec, iov_base) !=
  566. offsetof(struct kvec, iov_base));
  567. BUILD_BUG_ON(offsetof(struct iovec, iov_len) !=
  568. offsetof(struct kvec, iov_len));
  569. BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_base)
  570. != sizeof(((struct kvec *)NULL)->iov_base));
  571. BUILD_BUG_ON(sizeof(((struct iovec *)NULL)->iov_len)
  572. != sizeof(((struct kvec *)NULL)->iov_len));
  573. *head = err;
  574. err = __vringh_iov(vrh, *head, (struct vringh_kiov *)riov,
  575. (struct vringh_kiov *)wiov,
  576. range_check, getrange, GFP_KERNEL, copydesc_user);
  577. if (err)
  578. return err;
  579. return 1;
  580. }
  581. EXPORT_SYMBOL(vringh_getdesc_user);
  582. /**
  583. * vringh_iov_pull_user - copy bytes from vring_iov.
  584. * @riov: the riov as passed to vringh_getdesc_user() (updated as we consume)
  585. * @dst: the place to copy.
  586. * @len: the maximum length to copy.
  587. *
  588. * Returns the bytes copied <= len or a negative errno.
  589. */
  590. ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len)
  591. {
  592. return vringh_iov_xfer((struct vringh_kiov *)riov,
  593. dst, len, xfer_from_user);
  594. }
  595. EXPORT_SYMBOL(vringh_iov_pull_user);
  596. /**
  597. * vringh_iov_push_user - copy bytes into vring_iov.
  598. * @wiov: the wiov as passed to vringh_getdesc_user() (updated as we consume)
  599. * @dst: the place to copy.
  600. * @len: the maximum length to copy.
  601. *
  602. * Returns the bytes copied <= len or a negative errno.
  603. */
  604. ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
  605. const void *src, size_t len)
  606. {
  607. return vringh_iov_xfer((struct vringh_kiov *)wiov,
  608. (void *)src, len, xfer_to_user);
  609. }
  610. EXPORT_SYMBOL(vringh_iov_push_user);
  611. /**
  612. * vringh_abandon_user - we've decided not to handle the descriptor(s).
  613. * @vrh: the vring.
  614. * @num: the number of descriptors to put back (ie. num
  615. * vringh_get_user() to undo).
  616. *
  617. * The next vringh_get_user() will return the old descriptor(s) again.
  618. */
  619. void vringh_abandon_user(struct vringh *vrh, unsigned int num)
  620. {
  621. /* We only update vring_avail_event(vr) when we want to be notified,
  622. * so we haven't changed that yet. */
  623. vrh->last_avail_idx -= num;
  624. }
  625. EXPORT_SYMBOL(vringh_abandon_user);
  626. /**
  627. * vringh_complete_user - we've finished with descriptor, publish it.
  628. * @vrh: the vring.
  629. * @head: the head as filled in by vringh_getdesc_user.
  630. * @len: the length of data we have written.
  631. *
  632. * You should check vringh_need_notify_user() after one or more calls
  633. * to this function.
  634. */
  635. int vringh_complete_user(struct vringh *vrh, u16 head, u32 len)
  636. {
  637. struct vring_used_elem used;
  638. used.id = head;
  639. used.len = len;
  640. return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
  641. }
  642. EXPORT_SYMBOL(vringh_complete_user);
  643. /**
  644. * vringh_complete_multi_user - we've finished with many descriptors.
  645. * @vrh: the vring.
  646. * @used: the head, length pairs.
  647. * @num_used: the number of used elements.
  648. *
  649. * You should check vringh_need_notify_user() after one or more calls
  650. * to this function.
  651. */
  652. int vringh_complete_multi_user(struct vringh *vrh,
  653. const struct vring_used_elem used[],
  654. unsigned num_used)
  655. {
  656. return __vringh_complete(vrh, used, num_used,
  657. putu16_user, putused_user);
  658. }
  659. EXPORT_SYMBOL(vringh_complete_multi_user);
  660. /**
  661. * vringh_notify_enable_user - we want to know if something changes.
  662. * @vrh: the vring.
  663. *
  664. * This always enables notifications, but returns false if there are
  665. * now more buffers available in the vring.
  666. */
  667. bool vringh_notify_enable_user(struct vringh *vrh)
  668. {
  669. return __vringh_notify_enable(vrh, getu16_user, putu16_user);
  670. }
  671. EXPORT_SYMBOL(vringh_notify_enable_user);
  672. /**
  673. * vringh_notify_disable_user - don't tell us if something changes.
  674. * @vrh: the vring.
  675. *
  676. * This is our normal running state: we disable and then only enable when
  677. * we're going to sleep.
  678. */
  679. void vringh_notify_disable_user(struct vringh *vrh)
  680. {
  681. __vringh_notify_disable(vrh, putu16_user);
  682. }
  683. EXPORT_SYMBOL(vringh_notify_disable_user);
  684. /**
  685. * vringh_need_notify_user - must we tell the other side about used buffers?
  686. * @vrh: the vring we've called vringh_complete_user() on.
  687. *
  688. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  689. */
  690. int vringh_need_notify_user(struct vringh *vrh)
  691. {
  692. return __vringh_need_notify(vrh, getu16_user);
  693. }
  694. EXPORT_SYMBOL(vringh_need_notify_user);
  695. /* Kernelspace access helpers. */
  696. static inline int getu16_kern(u16 *val, const u16 *p)
  697. {
  698. *val = ACCESS_ONCE(*p);
  699. return 0;
  700. }
  701. static inline int putu16_kern(u16 *p, u16 val)
  702. {
  703. ACCESS_ONCE(*p) = val;
  704. return 0;
  705. }
  706. static inline int copydesc_kern(void *dst, const void *src, size_t len)
  707. {
  708. memcpy(dst, src, len);
  709. return 0;
  710. }
  711. static inline int putused_kern(struct vring_used_elem *dst,
  712. const struct vring_used_elem *src,
  713. unsigned int num)
  714. {
  715. memcpy(dst, src, num * sizeof(*dst));
  716. return 0;
  717. }
  718. static inline int xfer_kern(void *src, void *dst, size_t len)
  719. {
  720. memcpy(dst, src, len);
  721. return 0;
  722. }
  723. /**
  724. * vringh_init_kern - initialize a vringh for a kernelspace vring.
  725. * @vrh: the vringh to initialize.
  726. * @features: the feature bits for this ring.
  727. * @num: the number of elements.
  728. * @weak_barriers: true if we only need memory barriers, not I/O.
  729. * @desc: the userpace descriptor pointer.
  730. * @avail: the userpace avail pointer.
  731. * @used: the userpace used pointer.
  732. *
  733. * Returns an error if num is invalid.
  734. */
  735. int vringh_init_kern(struct vringh *vrh, u32 features,
  736. unsigned int num, bool weak_barriers,
  737. struct vring_desc *desc,
  738. struct vring_avail *avail,
  739. struct vring_used *used)
  740. {
  741. /* Sane power of 2 please! */
  742. if (!num || num > 0xffff || (num & (num - 1))) {
  743. vringh_bad("Bad ring size %u", num);
  744. return -EINVAL;
  745. }
  746. vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
  747. vrh->weak_barriers = weak_barriers;
  748. vrh->completed = 0;
  749. vrh->last_avail_idx = 0;
  750. vrh->last_used_idx = 0;
  751. vrh->vring.num = num;
  752. vrh->vring.desc = desc;
  753. vrh->vring.avail = avail;
  754. vrh->vring.used = used;
  755. return 0;
  756. }
  757. EXPORT_SYMBOL(vringh_init_kern);
  758. /**
  759. * vringh_getdesc_kern - get next available descriptor from kernelspace ring.
  760. * @vrh: the kernelspace vring.
  761. * @riov: where to put the readable descriptors (or NULL)
  762. * @wiov: where to put the writable descriptors (or NULL)
  763. * @head: head index we received, for passing to vringh_complete_kern().
  764. * @gfp: flags for allocating larger riov/wiov.
  765. *
  766. * Returns 0 if there was no descriptor, 1 if there was, or -errno.
  767. *
  768. * Note that on error return, you can tell the difference between an
  769. * invalid ring and a single invalid descriptor: in the former case,
  770. * *head will be vrh->vring.num. You may be able to ignore an invalid
  771. * descriptor, but there's not much you can do with an invalid ring.
  772. *
  773. * Note that you may need to clean up riov and wiov, even on error!
  774. */
  775. int vringh_getdesc_kern(struct vringh *vrh,
  776. struct vringh_kiov *riov,
  777. struct vringh_kiov *wiov,
  778. u16 *head,
  779. gfp_t gfp)
  780. {
  781. int err;
  782. err = __vringh_get_head(vrh, getu16_kern, &vrh->last_avail_idx);
  783. if (err < 0)
  784. return err;
  785. /* Empty... */
  786. if (err == vrh->vring.num)
  787. return 0;
  788. *head = err;
  789. err = __vringh_iov(vrh, *head, riov, wiov, no_range_check, NULL,
  790. gfp, copydesc_kern);
  791. if (err)
  792. return err;
  793. return 1;
  794. }
  795. EXPORT_SYMBOL(vringh_getdesc_kern);
  796. /**
  797. * vringh_iov_pull_kern - copy bytes from vring_iov.
  798. * @riov: the riov as passed to vringh_getdesc_kern() (updated as we consume)
  799. * @dst: the place to copy.
  800. * @len: the maximum length to copy.
  801. *
  802. * Returns the bytes copied <= len or a negative errno.
  803. */
  804. ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len)
  805. {
  806. return vringh_iov_xfer(riov, dst, len, xfer_kern);
  807. }
  808. EXPORT_SYMBOL(vringh_iov_pull_kern);
  809. /**
  810. * vringh_iov_push_kern - copy bytes into vring_iov.
  811. * @wiov: the wiov as passed to vringh_getdesc_kern() (updated as we consume)
  812. * @dst: the place to copy.
  813. * @len: the maximum length to copy.
  814. *
  815. * Returns the bytes copied <= len or a negative errno.
  816. */
  817. ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
  818. const void *src, size_t len)
  819. {
  820. return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern);
  821. }
  822. EXPORT_SYMBOL(vringh_iov_push_kern);
  823. /**
  824. * vringh_abandon_kern - we've decided not to handle the descriptor(s).
  825. * @vrh: the vring.
  826. * @num: the number of descriptors to put back (ie. num
  827. * vringh_get_kern() to undo).
  828. *
  829. * The next vringh_get_kern() will return the old descriptor(s) again.
  830. */
  831. void vringh_abandon_kern(struct vringh *vrh, unsigned int num)
  832. {
  833. /* We only update vring_avail_event(vr) when we want to be notified,
  834. * so we haven't changed that yet. */
  835. vrh->last_avail_idx -= num;
  836. }
  837. EXPORT_SYMBOL(vringh_abandon_kern);
  838. /**
  839. * vringh_complete_kern - we've finished with descriptor, publish it.
  840. * @vrh: the vring.
  841. * @head: the head as filled in by vringh_getdesc_kern.
  842. * @len: the length of data we have written.
  843. *
  844. * You should check vringh_need_notify_kern() after one or more calls
  845. * to this function.
  846. */
  847. int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len)
  848. {
  849. struct vring_used_elem used;
  850. used.id = head;
  851. used.len = len;
  852. return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
  853. }
  854. EXPORT_SYMBOL(vringh_complete_kern);
  855. /**
  856. * vringh_notify_enable_kern - we want to know if something changes.
  857. * @vrh: the vring.
  858. *
  859. * This always enables notifications, but returns false if there are
  860. * now more buffers available in the vring.
  861. */
  862. bool vringh_notify_enable_kern(struct vringh *vrh)
  863. {
  864. return __vringh_notify_enable(vrh, getu16_kern, putu16_kern);
  865. }
  866. EXPORT_SYMBOL(vringh_notify_enable_kern);
  867. /**
  868. * vringh_notify_disable_kern - don't tell us if something changes.
  869. * @vrh: the vring.
  870. *
  871. * This is our normal running state: we disable and then only enable when
  872. * we're going to sleep.
  873. */
  874. void vringh_notify_disable_kern(struct vringh *vrh)
  875. {
  876. __vringh_notify_disable(vrh, putu16_kern);
  877. }
  878. EXPORT_SYMBOL(vringh_notify_disable_kern);
  879. /**
  880. * vringh_need_notify_kern - must we tell the other side about used buffers?
  881. * @vrh: the vring we've called vringh_complete_kern() on.
  882. *
  883. * Returns -errno or 0 if we don't need to tell the other side, 1 if we do.
  884. */
  885. int vringh_need_notify_kern(struct vringh *vrh)
  886. {
  887. return __vringh_need_notify(vrh, getu16_kern);
  888. }
  889. EXPORT_SYMBOL(vringh_need_notify_kern);
  890. MODULE_LICENSE("GPL");