ring_buffer.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. /*
  2. *
  3. * Copyright (c) 2009, Microsoft Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms and conditions of the GNU General Public License,
  7. * version 2, as published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope it will be useful, but WITHOUT
  10. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. * more details.
  13. *
  14. * You should have received a copy of the GNU General Public License along with
  15. * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  16. * Place - Suite 330, Boston, MA 02111-1307 USA.
  17. *
  18. * Authors:
  19. * Haiyang Zhang <haiyangz@microsoft.com>
  20. * Hank Janssen <hjanssen@microsoft.com>
  21. * K. Y. Srinivasan <kys@microsoft.com>
  22. *
  23. */
  24. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  25. #include <linux/kernel.h>
  26. #include <linux/mm.h>
  27. #include <linux/hyperv.h>
  28. #include "hyperv_vmbus.h"
  29. void hv_begin_read(struct hv_ring_buffer_info *rbi)
  30. {
  31. rbi->ring_buffer->interrupt_mask = 1;
  32. mb();
  33. }
  34. u32 hv_end_read(struct hv_ring_buffer_info *rbi)
  35. {
  36. u32 read;
  37. u32 write;
  38. rbi->ring_buffer->interrupt_mask = 0;
  39. mb();
  40. /*
  41. * Now check to see if the ring buffer is still empty.
  42. * If it is not, we raced and we need to process new
  43. * incoming messages.
  44. */
  45. hv_get_ringbuffer_availbytes(rbi, &read, &write);
  46. return read;
  47. }
  48. /*
  49. * When we write to the ring buffer, check if the host needs to
  50. * be signaled. Here is the details of this protocol:
  51. *
  52. * 1. The host guarantees that while it is draining the
  53. * ring buffer, it will set the interrupt_mask to
  54. * indicate it does not need to be interrupted when
  55. * new data is placed.
  56. *
  57. * 2. The host guarantees that it will completely drain
  58. * the ring buffer before exiting the read loop. Further,
  59. * once the ring buffer is empty, it will clear the
  60. * interrupt_mask and re-check to see if new data has
  61. * arrived.
  62. */
  63. static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi)
  64. {
  65. mb();
  66. if (rbi->ring_buffer->interrupt_mask)
  67. return false;
  68. /* check interrupt_mask before read_index */
  69. rmb();
  70. /*
  71. * This is the only case we need to signal when the
  72. * ring transitions from being empty to non-empty.
  73. */
  74. if (old_write == rbi->ring_buffer->read_index)
  75. return true;
  76. return false;
  77. }
  78. /*
  79. * To optimize the flow management on the send-side,
  80. * when the sender is blocked because of lack of
  81. * sufficient space in the ring buffer, potential the
  82. * consumer of the ring buffer can signal the producer.
  83. * This is controlled by the following parameters:
  84. *
  85. * 1. pending_send_sz: This is the size in bytes that the
  86. * producer is trying to send.
  87. * 2. The feature bit feat_pending_send_sz set to indicate if
  88. * the consumer of the ring will signal when the ring
  89. * state transitions from being full to a state where
  90. * there is room for the producer to send the pending packet.
  91. */
  92. static bool hv_need_to_signal_on_read(u32 old_rd,
  93. struct hv_ring_buffer_info *rbi)
  94. {
  95. u32 prev_write_sz;
  96. u32 cur_write_sz;
  97. u32 r_size;
  98. u32 write_loc = rbi->ring_buffer->write_index;
  99. u32 read_loc = rbi->ring_buffer->read_index;
  100. u32 pending_sz = rbi->ring_buffer->pending_send_sz;
  101. /*
  102. * If the other end is not blocked on write don't bother.
  103. */
  104. if (pending_sz == 0)
  105. return false;
  106. r_size = rbi->ring_datasize;
  107. cur_write_sz = write_loc >= read_loc ? r_size - (write_loc - read_loc) :
  108. read_loc - write_loc;
  109. prev_write_sz = write_loc >= old_rd ? r_size - (write_loc - old_rd) :
  110. old_rd - write_loc;
  111. if ((prev_write_sz < pending_sz) && (cur_write_sz >= pending_sz))
  112. return true;
  113. return false;
  114. }
  115. /*
  116. * hv_get_next_write_location()
  117. *
  118. * Get the next write location for the specified ring buffer
  119. *
  120. */
  121. static inline u32
  122. hv_get_next_write_location(struct hv_ring_buffer_info *ring_info)
  123. {
  124. u32 next = ring_info->ring_buffer->write_index;
  125. return next;
  126. }
  127. /*
  128. * hv_set_next_write_location()
  129. *
  130. * Set the next write location for the specified ring buffer
  131. *
  132. */
  133. static inline void
  134. hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
  135. u32 next_write_location)
  136. {
  137. ring_info->ring_buffer->write_index = next_write_location;
  138. }
  139. /*
  140. * hv_get_next_read_location()
  141. *
  142. * Get the next read location for the specified ring buffer
  143. */
  144. static inline u32
  145. hv_get_next_read_location(struct hv_ring_buffer_info *ring_info)
  146. {
  147. u32 next = ring_info->ring_buffer->read_index;
  148. return next;
  149. }
  150. /*
  151. * hv_get_next_readlocation_withoffset()
  152. *
  153. * Get the next read location + offset for the specified ring buffer.
  154. * This allows the caller to skip
  155. */
  156. static inline u32
  157. hv_get_next_readlocation_withoffset(struct hv_ring_buffer_info *ring_info,
  158. u32 offset)
  159. {
  160. u32 next = ring_info->ring_buffer->read_index;
  161. next += offset;
  162. next %= ring_info->ring_datasize;
  163. return next;
  164. }
  165. /*
  166. *
  167. * hv_set_next_read_location()
  168. *
  169. * Set the next read location for the specified ring buffer
  170. *
  171. */
  172. static inline void
  173. hv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
  174. u32 next_read_location)
  175. {
  176. ring_info->ring_buffer->read_index = next_read_location;
  177. }
  178. /*
  179. *
  180. * hv_get_ring_buffer()
  181. *
  182. * Get the start of the ring buffer
  183. */
  184. static inline void *
  185. hv_get_ring_buffer(struct hv_ring_buffer_info *ring_info)
  186. {
  187. return (void *)ring_info->ring_buffer->buffer;
  188. }
  189. /*
  190. *
  191. * hv_get_ring_buffersize()
  192. *
  193. * Get the size of the ring buffer
  194. */
  195. static inline u32
  196. hv_get_ring_buffersize(struct hv_ring_buffer_info *ring_info)
  197. {
  198. return ring_info->ring_datasize;
  199. }
  200. /*
  201. *
  202. * hv_get_ring_bufferindices()
  203. *
  204. * Get the read and write indices as u64 of the specified ring buffer
  205. *
  206. */
  207. static inline u64
  208. hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
  209. {
  210. return (u64)ring_info->ring_buffer->write_index << 32;
  211. }
  212. /*
  213. *
  214. * hv_copyfrom_ringbuffer()
  215. *
  216. * Helper routine to copy to source from ring buffer.
  217. * Assume there is enough room. Handles wrap-around in src case only!!
  218. *
  219. */
  220. static u32 hv_copyfrom_ringbuffer(
  221. struct hv_ring_buffer_info *ring_info,
  222. void *dest,
  223. u32 destlen,
  224. u32 start_read_offset)
  225. {
  226. void *ring_buffer = hv_get_ring_buffer(ring_info);
  227. u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
  228. u32 frag_len;
  229. /* wrap-around detected at the src */
  230. if (destlen > ring_buffer_size - start_read_offset) {
  231. frag_len = ring_buffer_size - start_read_offset;
  232. memcpy(dest, ring_buffer + start_read_offset, frag_len);
  233. memcpy(dest + frag_len, ring_buffer, destlen - frag_len);
  234. } else
  235. memcpy(dest, ring_buffer + start_read_offset, destlen);
  236. start_read_offset += destlen;
  237. start_read_offset %= ring_buffer_size;
  238. return start_read_offset;
  239. }
  240. /*
  241. *
  242. * hv_copyto_ringbuffer()
  243. *
  244. * Helper routine to copy from source to ring buffer.
  245. * Assume there is enough room. Handles wrap-around in dest case only!!
  246. *
  247. */
  248. static u32 hv_copyto_ringbuffer(
  249. struct hv_ring_buffer_info *ring_info,
  250. u32 start_write_offset,
  251. void *src,
  252. u32 srclen)
  253. {
  254. void *ring_buffer = hv_get_ring_buffer(ring_info);
  255. u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
  256. u32 frag_len;
  257. /* wrap-around detected! */
  258. if (srclen > ring_buffer_size - start_write_offset) {
  259. frag_len = ring_buffer_size - start_write_offset;
  260. memcpy(ring_buffer + start_write_offset, src, frag_len);
  261. memcpy(ring_buffer, src + frag_len, srclen - frag_len);
  262. } else
  263. memcpy(ring_buffer + start_write_offset, src, srclen);
  264. start_write_offset += srclen;
  265. start_write_offset %= ring_buffer_size;
  266. return start_write_offset;
  267. }
  268. /*
  269. *
  270. * hv_ringbuffer_get_debuginfo()
  271. *
  272. * Get various debug metrics for the specified ring buffer
  273. *
  274. */
  275. void hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
  276. struct hv_ring_buffer_debug_info *debug_info)
  277. {
  278. u32 bytes_avail_towrite;
  279. u32 bytes_avail_toread;
  280. if (ring_info->ring_buffer) {
  281. hv_get_ringbuffer_availbytes(ring_info,
  282. &bytes_avail_toread,
  283. &bytes_avail_towrite);
  284. debug_info->bytes_avail_toread = bytes_avail_toread;
  285. debug_info->bytes_avail_towrite = bytes_avail_towrite;
  286. debug_info->current_read_index =
  287. ring_info->ring_buffer->read_index;
  288. debug_info->current_write_index =
  289. ring_info->ring_buffer->write_index;
  290. debug_info->current_interrupt_mask =
  291. ring_info->ring_buffer->interrupt_mask;
  292. }
  293. }
  294. /*
  295. *
  296. * hv_ringbuffer_init()
  297. *
  298. *Initialize the ring buffer
  299. *
  300. */
  301. int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
  302. void *buffer, u32 buflen)
  303. {
  304. if (sizeof(struct hv_ring_buffer) != PAGE_SIZE)
  305. return -EINVAL;
  306. memset(ring_info, 0, sizeof(struct hv_ring_buffer_info));
  307. ring_info->ring_buffer = (struct hv_ring_buffer *)buffer;
  308. ring_info->ring_buffer->read_index =
  309. ring_info->ring_buffer->write_index = 0;
  310. ring_info->ring_size = buflen;
  311. ring_info->ring_datasize = buflen - sizeof(struct hv_ring_buffer);
  312. spin_lock_init(&ring_info->ring_lock);
  313. return 0;
  314. }
  315. /*
  316. *
  317. * hv_ringbuffer_cleanup()
  318. *
  319. * Cleanup the ring buffer
  320. *
  321. */
  322. void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
  323. {
  324. }
  325. /*
  326. *
  327. * hv_ringbuffer_write()
  328. *
  329. * Write to the ring buffer
  330. *
  331. */
  332. int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
  333. struct scatterlist *sglist, u32 sgcount, bool *signal)
  334. {
  335. int i = 0;
  336. u32 bytes_avail_towrite;
  337. u32 bytes_avail_toread;
  338. u32 totalbytes_towrite = 0;
  339. struct scatterlist *sg;
  340. u32 next_write_location;
  341. u32 old_write;
  342. u64 prev_indices = 0;
  343. unsigned long flags;
  344. for_each_sg(sglist, sg, sgcount, i)
  345. {
  346. totalbytes_towrite += sg->length;
  347. }
  348. totalbytes_towrite += sizeof(u64);
  349. spin_lock_irqsave(&outring_info->ring_lock, flags);
  350. hv_get_ringbuffer_availbytes(outring_info,
  351. &bytes_avail_toread,
  352. &bytes_avail_towrite);
  353. /* If there is only room for the packet, assume it is full. */
  354. /* Otherwise, the next time around, we think the ring buffer */
  355. /* is empty since the read index == write index */
  356. if (bytes_avail_towrite <= totalbytes_towrite) {
  357. spin_unlock_irqrestore(&outring_info->ring_lock, flags);
  358. return -EAGAIN;
  359. }
  360. /* Write to the ring buffer */
  361. next_write_location = hv_get_next_write_location(outring_info);
  362. old_write = next_write_location;
  363. for_each_sg(sglist, sg, sgcount, i)
  364. {
  365. next_write_location = hv_copyto_ringbuffer(outring_info,
  366. next_write_location,
  367. sg_virt(sg),
  368. sg->length);
  369. }
  370. /* Set previous packet start */
  371. prev_indices = hv_get_ring_bufferindices(outring_info);
  372. next_write_location = hv_copyto_ringbuffer(outring_info,
  373. next_write_location,
  374. &prev_indices,
  375. sizeof(u64));
  376. /* Issue a full memory barrier before updating the write index */
  377. mb();
  378. /* Now, update the write location */
  379. hv_set_next_write_location(outring_info, next_write_location);
  380. spin_unlock_irqrestore(&outring_info->ring_lock, flags);
  381. *signal = hv_need_to_signal(old_write, outring_info);
  382. return 0;
  383. }
  384. /*
  385. *
  386. * hv_ringbuffer_peek()
  387. *
  388. * Read without advancing the read index
  389. *
  390. */
  391. int hv_ringbuffer_peek(struct hv_ring_buffer_info *Inring_info,
  392. void *Buffer, u32 buflen)
  393. {
  394. u32 bytes_avail_towrite;
  395. u32 bytes_avail_toread;
  396. u32 next_read_location = 0;
  397. unsigned long flags;
  398. spin_lock_irqsave(&Inring_info->ring_lock, flags);
  399. hv_get_ringbuffer_availbytes(Inring_info,
  400. &bytes_avail_toread,
  401. &bytes_avail_towrite);
  402. /* Make sure there is something to read */
  403. if (bytes_avail_toread < buflen) {
  404. spin_unlock_irqrestore(&Inring_info->ring_lock, flags);
  405. return -EAGAIN;
  406. }
  407. /* Convert to byte offset */
  408. next_read_location = hv_get_next_read_location(Inring_info);
  409. next_read_location = hv_copyfrom_ringbuffer(Inring_info,
  410. Buffer,
  411. buflen,
  412. next_read_location);
  413. spin_unlock_irqrestore(&Inring_info->ring_lock, flags);
  414. return 0;
  415. }
  416. /*
  417. *
  418. * hv_ringbuffer_read()
  419. *
  420. * Read and advance the read index
  421. *
  422. */
  423. int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer,
  424. u32 buflen, u32 offset, bool *signal)
  425. {
  426. u32 bytes_avail_towrite;
  427. u32 bytes_avail_toread;
  428. u32 next_read_location = 0;
  429. u64 prev_indices = 0;
  430. unsigned long flags;
  431. u32 old_read;
  432. if (buflen <= 0)
  433. return -EINVAL;
  434. spin_lock_irqsave(&inring_info->ring_lock, flags);
  435. hv_get_ringbuffer_availbytes(inring_info,
  436. &bytes_avail_toread,
  437. &bytes_avail_towrite);
  438. old_read = bytes_avail_toread;
  439. /* Make sure there is something to read */
  440. if (bytes_avail_toread < buflen) {
  441. spin_unlock_irqrestore(&inring_info->ring_lock, flags);
  442. return -EAGAIN;
  443. }
  444. next_read_location =
  445. hv_get_next_readlocation_withoffset(inring_info, offset);
  446. next_read_location = hv_copyfrom_ringbuffer(inring_info,
  447. buffer,
  448. buflen,
  449. next_read_location);
  450. next_read_location = hv_copyfrom_ringbuffer(inring_info,
  451. &prev_indices,
  452. sizeof(u64),
  453. next_read_location);
  454. /* Make sure all reads are done before we update the read index since */
  455. /* the writer may start writing to the read area once the read index */
  456. /*is updated */
  457. mb();
  458. /* Update the read index */
  459. hv_set_next_read_location(inring_info, next_read_location);
  460. spin_unlock_irqrestore(&inring_info->ring_lock, flags);
  461. *signal = hv_need_to_signal_on_read(old_read, inring_info);
  462. return 0;
  463. }