|
@@ -22,23 +22,27 @@
|
|
|
#include <linux/device.h>
|
|
|
#include <linux/slab.h>
|
|
|
#include <linux/module.h>
|
|
|
+#include <linux/hrtimer.h>
|
|
|
|
|
|
/* virtio guest is communicating with a virtual "device" that actually runs on
|
|
|
* a host processor. Memory barriers are used to control SMP effects. */
|
|
|
#ifdef CONFIG_SMP
|
|
|
/* Where possible, use SMP barriers which are more lightweight than mandatory
|
|
|
* barriers, because mandatory barriers control MMIO effects on accesses
|
|
|
- * through relaxed memory I/O windows (which virtio does not use). */
|
|
|
-#define virtio_mb() smp_mb()
|
|
|
-#define virtio_rmb() smp_rmb()
|
|
|
-#define virtio_wmb() smp_wmb()
|
|
|
+ * through relaxed memory I/O windows (which virtio-pci does not use). */
|
|
|
+#define virtio_mb(vq) \
|
|
|
+ do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0)
|
|
|
+#define virtio_rmb(vq) \
|
|
|
+ do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
|
|
|
+#define virtio_wmb(vq) \
|
|
|
+ do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
|
|
|
#else
|
|
|
/* We must force memory ordering even if guest is UP since host could be
|
|
|
* running on another CPU, but SMP barriers are defined to barrier() in that
|
|
|
* configuration. So fall back to mandatory barriers instead. */
|
|
|
-#define virtio_mb() mb()
|
|
|
-#define virtio_rmb() rmb()
|
|
|
-#define virtio_wmb() wmb()
|
|
|
+#define virtio_mb(vq) mb()
|
|
|
+#define virtio_rmb(vq) rmb()
|
|
|
+#define virtio_wmb(vq) wmb()
|
|
|
#endif
|
|
|
|
|
|
#ifdef DEBUG
|
|
@@ -77,6 +81,9 @@ struct vring_virtqueue
|
|
|
/* Actual memory layout for this queue */
|
|
|
struct vring vring;
|
|
|
|
|
|
+ /* Can we use weak barriers? */
|
|
|
+ bool weak_barriers;
|
|
|
+
|
|
|
/* Other side has made a mess, don't try any more. */
|
|
|
bool broken;
|
|
|
|
|
@@ -102,6 +109,10 @@ struct vring_virtqueue
|
|
|
#ifdef DEBUG
|
|
|
/* They're supposed to lock for us. */
|
|
|
unsigned int in_use;
|
|
|
+
|
|
|
+ /* Figure out if their kicks are too delayed. */
|
|
|
+ bool last_add_time_valid;
|
|
|
+ ktime_t last_add_time;
|
|
|
#endif
|
|
|
|
|
|
/* Tokens for callbacks. */
|
|
@@ -160,12 +171,29 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
|
|
|
return head;
|
|
|
}
|
|
|
|
|
|
-int virtqueue_add_buf_gfp(struct virtqueue *_vq,
|
|
|
- struct scatterlist sg[],
|
|
|
- unsigned int out,
|
|
|
- unsigned int in,
|
|
|
- void *data,
|
|
|
- gfp_t gfp)
|
|
|
+/**
|
|
|
+ * virtqueue_add_buf - expose buffer to other end
|
|
|
+ * @vq: the struct virtqueue we're talking about.
|
|
|
+ * @sg: the description of the buffer(s).
|
|
|
+ * @out_num: the number of sg readable by other side
|
|
|
+ * @in_num: the number of sg which are writable (after readable ones)
|
|
|
+ * @data: the token identifying the buffer.
|
|
|
+ * @gfp: how to do memory allocations (if necessary).
|
|
|
+ *
|
|
|
+ * Caller must ensure we don't call this with other virtqueue operations
|
|
|
+ * at the same time (except where noted).
|
|
|
+ *
|
|
|
+ * Returns remaining capacity of queue or a negative error
|
|
|
+ * (ie. ENOSPC). Note that it only really makes sense to treat all
|
|
|
+ * positive return values as "available": indirect buffers mean that
|
|
|
+ * we can put an entire sg[] array inside a single queue entry.
|
|
|
+ */
|
|
|
+int virtqueue_add_buf(struct virtqueue *_vq,
|
|
|
+ struct scatterlist sg[],
|
|
|
+ unsigned int out,
|
|
|
+ unsigned int in,
|
|
|
+ void *data,
|
|
|
+ gfp_t gfp)
|
|
|
{
|
|
|
struct vring_virtqueue *vq = to_vvq(_vq);
|
|
|
unsigned int i, avail, uninitialized_var(prev);
|
|
@@ -175,6 +203,19 @@ int virtqueue_add_buf_gfp(struct virtqueue *_vq,
|
|
|
|
|
|
BUG_ON(data == NULL);
|
|
|
|
|
|
+#ifdef DEBUG
|
|
|
+ {
|
|
|
+ ktime_t now = ktime_get();
|
|
|
+
|
|
|
+ /* No kick or get, with .1 second between? Warn. */
|
|
|
+ if (vq->last_add_time_valid)
|
|
|
+ WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
|
|
|
+ > 100);
|
|
|
+ vq->last_add_time = now;
|
|
|
+ vq->last_add_time_valid = true;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
/* If the host supports indirect descriptor tables, and we have multiple
|
|
|
* buffers, then go indirect. FIXME: tune this threshold */
|
|
|
if (vq->indirect && (out + in) > 1 && vq->num_free) {
|
|
@@ -227,40 +268,102 @@ add_head:
|
|
|
vq->data[head] = data;
|
|
|
|
|
|
/* Put entry in available array (but don't update avail->idx until they
|
|
|
- * do sync). FIXME: avoid modulus here? */
|
|
|
- avail = (vq->vring.avail->idx + vq->num_added++) % vq->vring.num;
|
|
|
+ * do sync). */
|
|
|
+ avail = (vq->vring.avail->idx & (vq->vring.num-1));
|
|
|
vq->vring.avail->ring[avail] = head;
|
|
|
|
|
|
+ /* Descriptors and available array need to be set before we expose the
|
|
|
+ * new available array entries. */
|
|
|
+ virtio_wmb(vq);
|
|
|
+ vq->vring.avail->idx++;
|
|
|
+ vq->num_added++;
|
|
|
+
|
|
|
+ /* This is very unlikely, but theoretically possible. Kick
|
|
|
+ * just in case. */
|
|
|
+ if (unlikely(vq->num_added == (1 << 16) - 1))
|
|
|
+ virtqueue_kick(_vq);
|
|
|
+
|
|
|
pr_debug("Added buffer head %i to %p\n", head, vq);
|
|
|
END_USE(vq);
|
|
|
|
|
|
return vq->num_free;
|
|
|
}
|
|
|
-EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp);
|
|
|
+EXPORT_SYMBOL_GPL(virtqueue_add_buf);
|
|
|
|
|
|
-void virtqueue_kick(struct virtqueue *_vq)
|
|
|
+/**
|
|
|
+ * virtqueue_kick_prepare - first half of split virtqueue_kick call.
|
|
|
+ * @vq: the struct virtqueue
|
|
|
+ *
|
|
|
+ * Instead of virtqueue_kick(), you can do:
|
|
|
+ * if (virtqueue_kick_prepare(vq))
|
|
|
+ * virtqueue_notify(vq);
|
|
|
+ *
|
|
|
+ * This is sometimes useful because the virtqueue_kick_prepare() needs
|
|
|
+ * to be serialized, but the actual virtqueue_notify() call does not.
|
|
|
+ */
|
|
|
+bool virtqueue_kick_prepare(struct virtqueue *_vq)
|
|
|
{
|
|
|
struct vring_virtqueue *vq = to_vvq(_vq);
|
|
|
u16 new, old;
|
|
|
+ bool needs_kick;
|
|
|
+
|
|
|
START_USE(vq);
|
|
|
/* Descriptors and available array need to be set before we expose the
|
|
|
* new available array entries. */
|
|
|
- virtio_wmb();
|
|
|
+ virtio_wmb(vq);
|
|
|
|
|
|
- old = vq->vring.avail->idx;
|
|
|
- new = vq->vring.avail->idx = old + vq->num_added;
|
|
|
+ old = vq->vring.avail->idx - vq->num_added;
|
|
|
+ new = vq->vring.avail->idx;
|
|
|
vq->num_added = 0;
|
|
|
|
|
|
- /* Need to update avail index before checking if we should notify */
|
|
|
- virtio_mb();
|
|
|
-
|
|
|
- if (vq->event ?
|
|
|
- vring_need_event(vring_avail_event(&vq->vring), new, old) :
|
|
|
- !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY))
|
|
|
- /* Prod other side to tell it about changes. */
|
|
|
- vq->notify(&vq->vq);
|
|
|
+#ifdef DEBUG
|
|
|
+ if (vq->last_add_time_valid) {
|
|
|
+ WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
|
|
|
+ vq->last_add_time)) > 100);
|
|
|
+ }
|
|
|
+ vq->last_add_time_valid = false;
|
|
|
+#endif
|
|
|
|
|
|
+ if (vq->event) {
|
|
|
+ needs_kick = vring_need_event(vring_avail_event(&vq->vring),
|
|
|
+ new, old);
|
|
|
+ } else {
|
|
|
+ needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
|
|
|
+ }
|
|
|
END_USE(vq);
|
|
|
+ return needs_kick;
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
|
|
|
+
|
|
|
+/**
|
|
|
+ * virtqueue_notify - second half of split virtqueue_kick call.
|
|
|
+ * @vq: the struct virtqueue
|
|
|
+ *
|
|
|
+ * This does not need to be serialized.
|
|
|
+ */
|
|
|
+void virtqueue_notify(struct virtqueue *_vq)
|
|
|
+{
|
|
|
+ struct vring_virtqueue *vq = to_vvq(_vq);
|
|
|
+
|
|
|
+ /* Prod other side to tell it about changes. */
|
|
|
+ vq->notify(_vq);
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(virtqueue_notify);
|
|
|
+
|
|
|
+/**
|
|
|
+ * virtqueue_kick - update after add_buf
|
|
|
+ * @vq: the struct virtqueue
|
|
|
+ *
|
|
|
+ * After one or more virtqueue_add_buf calls, invoke this to kick
|
|
|
+ * the other side.
|
|
|
+ *
|
|
|
+ * Caller must ensure we don't call this with other virtqueue
|
|
|
+ * operations at the same time (except where noted).
|
|
|
+ */
|
|
|
+void virtqueue_kick(struct virtqueue *vq)
|
|
|
+{
|
|
|
+ if (virtqueue_kick_prepare(vq))
|
|
|
+ virtqueue_notify(vq);
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(virtqueue_kick);
|
|
|
|
|
@@ -294,11 +397,28 @@ static inline bool more_used(const struct vring_virtqueue *vq)
|
|
|
return vq->last_used_idx != vq->vring.used->idx;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * virtqueue_get_buf - get the next used buffer
|
|
|
+ * @vq: the struct virtqueue we're talking about.
|
|
|
+ * @len: the length written into the buffer
|
|
|
+ *
|
|
|
+ * If the driver wrote data into the buffer, @len will be set to the
|
|
|
+ * amount written. This means you don't need to clear the buffer
|
|
|
+ * beforehand to ensure there's no data leakage in the case of short
|
|
|
+ * writes.
|
|
|
+ *
|
|
|
+ * Caller must ensure we don't call this with other virtqueue
|
|
|
+ * operations at the same time (except where noted).
|
|
|
+ *
|
|
|
+ * Returns NULL if there are no used buffers, or the "data" token
|
|
|
+ * handed to virtqueue_add_buf().
|
|
|
+ */
|
|
|
void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
|
|
|
{
|
|
|
struct vring_virtqueue *vq = to_vvq(_vq);
|
|
|
void *ret;
|
|
|
unsigned int i;
|
|
|
+ u16 last_used;
|
|
|
|
|
|
START_USE(vq);
|
|
|
|
|
@@ -314,10 +434,11 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
|
|
|
}
|
|
|
|
|
|
/* Only get used array entries after they have been exposed by host. */
|
|
|
- virtio_rmb();
|
|
|
+ virtio_rmb(vq);
|
|
|
|
|
|
- i = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].id;
|
|
|
- *len = vq->vring.used->ring[vq->last_used_idx%vq->vring.num].len;
|
|
|
+ last_used = (vq->last_used_idx & (vq->vring.num - 1));
|
|
|
+ i = vq->vring.used->ring[last_used].id;
|
|
|
+ *len = vq->vring.used->ring[last_used].len;
|
|
|
|
|
|
if (unlikely(i >= vq->vring.num)) {
|
|
|
BAD_RING(vq, "id %u out of range\n", i);
|
|
@@ -337,14 +458,27 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
|
|
|
* the read in the next get_buf call. */
|
|
|
if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
|
|
|
vring_used_event(&vq->vring) = vq->last_used_idx;
|
|
|
- virtio_mb();
|
|
|
+ virtio_mb(vq);
|
|
|
}
|
|
|
|
|
|
+#ifdef DEBUG
|
|
|
+ vq->last_add_time_valid = false;
|
|
|
+#endif
|
|
|
+
|
|
|
END_USE(vq);
|
|
|
return ret;
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(virtqueue_get_buf);
|
|
|
|
|
|
+/**
|
|
|
+ * virtqueue_disable_cb - disable callbacks
|
|
|
+ * @vq: the struct virtqueue we're talking about.
|
|
|
+ *
|
|
|
+ * Note that this is not necessarily synchronous, hence unreliable and only
|
|
|
+ * useful as an optimization.
|
|
|
+ *
|
|
|
+ * Unlike other operations, this need not be serialized.
|
|
|
+ */
|
|
|
void virtqueue_disable_cb(struct virtqueue *_vq)
|
|
|
{
|
|
|
struct vring_virtqueue *vq = to_vvq(_vq);
|
|
@@ -353,6 +487,17 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
|
|
|
|
|
|
+/**
|
|
|
+ * virtqueue_enable_cb - restart callbacks after disable_cb.
|
|
|
+ * @vq: the struct virtqueue we're talking about.
|
|
|
+ *
|
|
|
+ * This re-enables callbacks; it returns "false" if there are pending
|
|
|
+ * buffers in the queue, to detect a possible race between the driver
|
|
|
+ * checking for more work, and enabling callbacks.
|
|
|
+ *
|
|
|
+ * Caller must ensure we don't call this with other virtqueue
|
|
|
+ * operations at the same time (except where noted).
|
|
|
+ */
|
|
|
bool virtqueue_enable_cb(struct virtqueue *_vq)
|
|
|
{
|
|
|
struct vring_virtqueue *vq = to_vvq(_vq);
|
|
@@ -366,7 +511,7 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
|
|
|
* entry. Always do both to keep code simple. */
|
|
|
vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
|
|
|
vring_used_event(&vq->vring) = vq->last_used_idx;
|
|
|
- virtio_mb();
|
|
|
+ virtio_mb(vq);
|
|
|
if (unlikely(more_used(vq))) {
|
|
|
END_USE(vq);
|
|
|
return false;
|
|
@@ -377,6 +522,19 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
|
|
|
|
|
|
+/**
|
|
|
+ * virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
|
|
|
+ * @vq: the struct virtqueue we're talking about.
|
|
|
+ *
|
|
|
+ * This re-enables callbacks but hints to the other side to delay
|
|
|
+ * interrupts until most of the available buffers have been processed;
|
|
|
+ * it returns "false" if there are many pending buffers in the queue,
|
|
|
+ * to detect a possible race between the driver checking for more work,
|
|
|
+ * and enabling callbacks.
|
|
|
+ *
|
|
|
+ * Caller must ensure we don't call this with other virtqueue
|
|
|
+ * operations at the same time (except where noted).
|
|
|
+ */
|
|
|
bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
|
|
|
{
|
|
|
struct vring_virtqueue *vq = to_vvq(_vq);
|
|
@@ -393,7 +551,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
|
|
|
/* TODO: tune this threshold */
|
|
|
bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
|
|
|
vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
|
|
|
- virtio_mb();
|
|
|
+ virtio_mb(vq);
|
|
|
if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
|
|
|
END_USE(vq);
|
|
|
return false;
|
|
@@ -404,6 +562,14 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
|
|
|
|
|
|
+/**
|
|
|
+ * virtqueue_detach_unused_buf - detach first unused buffer
|
|
|
+ * @vq: the struct virtqueue we're talking about.
|
|
|
+ *
|
|
|
+ * Returns NULL or the "data" token handed to virtqueue_add_buf().
|
|
|
+ * This is not valid on an active queue; it is useful only for device
|
|
|
+ * shutdown.
|
|
|
+ */
|
|
|
void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
|
|
|
{
|
|
|
struct vring_virtqueue *vq = to_vvq(_vq);
|
|
@@ -453,6 +619,7 @@ EXPORT_SYMBOL_GPL(vring_interrupt);
|
|
|
struct virtqueue *vring_new_virtqueue(unsigned int num,
|
|
|
unsigned int vring_align,
|
|
|
struct virtio_device *vdev,
|
|
|
+ bool weak_barriers,
|
|
|
void *pages,
|
|
|
void (*notify)(struct virtqueue *),
|
|
|
void (*callback)(struct virtqueue *),
|
|
@@ -476,12 +643,14 @@ struct virtqueue *vring_new_virtqueue(unsigned int num,
|
|
|
vq->vq.vdev = vdev;
|
|
|
vq->vq.name = name;
|
|
|
vq->notify = notify;
|
|
|
+ vq->weak_barriers = weak_barriers;
|
|
|
vq->broken = false;
|
|
|
vq->last_used_idx = 0;
|
|
|
vq->num_added = 0;
|
|
|
list_add_tail(&vq->vq.list, &vdev->vqs);
|
|
|
#ifdef DEBUG
|
|
|
vq->in_use = false;
|
|
|
+ vq->last_add_time_valid = false;
|
|
|
#endif
|
|
|
|
|
|
vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
|
|
@@ -530,7 +699,13 @@ void vring_transport_features(struct virtio_device *vdev)
|
|
|
}
|
|
|
EXPORT_SYMBOL_GPL(vring_transport_features);
|
|
|
|
|
|
-/* return the size of the vring within the virtqueue */
|
|
|
+/**
|
|
|
+ * virtqueue_get_vring_size - return the size of the virtqueue's vring
|
|
|
+ * @vq: the struct virtqueue containing the vring of interest.
|
|
|
+ *
|
|
|
+ * Returns the size of the vring. This is mainly used for boasting to
|
|
|
+ * userspace. Unlike other operations, this need not be serialized.
|
|
|
+ */
|
|
|
unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
|
|
|
{
|
|
|
|