|
@@ -60,7 +60,7 @@ struct vfio_dma {
|
|
|
struct rb_node node;
|
|
|
dma_addr_t iova; /* Device address */
|
|
|
unsigned long vaddr; /* Process virtual addr */
|
|
|
- long npage; /* Number of pages */
|
|
|
+ size_t size; /* Map size (bytes) */
|
|
|
int prot; /* IOMMU_READ/WRITE */
|
|
|
};
|
|
|
|
|
@@ -74,8 +74,6 @@ struct vfio_group {
|
|
|
* into DMA'ble space using the IOMMU
|
|
|
*/
|
|
|
|
|
|
-#define NPAGE_TO_SIZE(npage) ((size_t)(npage) << PAGE_SHIFT)
|
|
|
-
|
|
|
static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
|
|
|
dma_addr_t start, size_t size)
|
|
|
{
|
|
@@ -86,7 +84,7 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
|
|
|
|
|
|
if (start + size <= dma->iova)
|
|
|
node = node->rb_left;
|
|
|
- else if (start >= dma->iova + NPAGE_TO_SIZE(dma->npage))
|
|
|
+ else if (start >= dma->iova + dma->size)
|
|
|
node = node->rb_right;
|
|
|
else
|
|
|
return dma;
|
|
@@ -104,7 +102,7 @@ static void vfio_insert_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
|
|
|
parent = *link;
|
|
|
dma = rb_entry(parent, struct vfio_dma, node);
|
|
|
|
|
|
- if (new->iova + NPAGE_TO_SIZE(new->npage) <= dma->iova)
|
|
|
+ if (new->iova + new->size <= dma->iova)
|
|
|
link = &(*link)->rb_left;
|
|
|
else
|
|
|
link = &(*link)->rb_right;
|
|
@@ -144,8 +142,8 @@ static void vfio_lock_acct(long npage)
|
|
|
struct vwork *vwork;
|
|
|
struct mm_struct *mm;
|
|
|
|
|
|
- if (!current->mm)
|
|
|
- return; /* process exited */
|
|
|
+ if (!current->mm || !npage)
|
|
|
+ return; /* process exited or nothing to do */
|
|
|
|
|
|
if (down_write_trylock(¤t->mm->mmap_sem)) {
|
|
|
current->mm->locked_vm += npage;
|
|
@@ -217,33 +215,6 @@ static int put_pfn(unsigned long pfn, int prot)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
-/* Unmap DMA region */
|
|
|
-static long __vfio_dma_do_unmap(struct vfio_iommu *iommu, dma_addr_t iova,
|
|
|
- long npage, int prot)
|
|
|
-{
|
|
|
- long i, unlocked = 0;
|
|
|
-
|
|
|
- for (i = 0; i < npage; i++, iova += PAGE_SIZE) {
|
|
|
- unsigned long pfn;
|
|
|
-
|
|
|
- pfn = iommu_iova_to_phys(iommu->domain, iova) >> PAGE_SHIFT;
|
|
|
- if (pfn) {
|
|
|
- iommu_unmap(iommu->domain, iova, PAGE_SIZE);
|
|
|
- unlocked += put_pfn(pfn, prot);
|
|
|
- }
|
|
|
- }
|
|
|
- return unlocked;
|
|
|
-}
|
|
|
-
|
|
|
-static void vfio_dma_unmap(struct vfio_iommu *iommu, dma_addr_t iova,
|
|
|
- long npage, int prot)
|
|
|
-{
|
|
|
- long unlocked;
|
|
|
-
|
|
|
- unlocked = __vfio_dma_do_unmap(iommu, iova, npage, prot);
|
|
|
- vfio_lock_acct(-unlocked);
|
|
|
-}
|
|
|
-
|
|
|
static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
|
|
|
{
|
|
|
struct page *page[1];
|
|
@@ -270,79 +241,142 @@ static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-/* Map DMA region */
|
|
|
-static int __vfio_dma_map(struct vfio_iommu *iommu, dma_addr_t iova,
|
|
|
- unsigned long vaddr, long npage, int prot)
|
|
|
+/*
|
|
|
+ * Attempt to pin pages. We really don't want to track all the pfns and
|
|
|
+ * the iommu can only map chunks of consecutive pfns anyway, so get the
|
|
|
+ * first page and all consecutive pages with the same locking.
|
|
|
+ */
|
|
|
+static long vfio_pin_pages(unsigned long vaddr, long npage,
|
|
|
+ int prot, unsigned long *pfn_base)
|
|
|
{
|
|
|
- dma_addr_t start = iova;
|
|
|
- long i, locked = 0;
|
|
|
- int ret;
|
|
|
+ unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
|
|
+ bool lock_cap = capable(CAP_IPC_LOCK);
|
|
|
+ long ret, i;
|
|
|
|
|
|
- /* Verify that pages are not already mapped */
|
|
|
- for (i = 0; i < npage; i++, iova += PAGE_SIZE)
|
|
|
- if (iommu_iova_to_phys(iommu->domain, iova))
|
|
|
- return -EBUSY;
|
|
|
+ if (!current->mm)
|
|
|
+ return -ENODEV;
|
|
|
|
|
|
- iova = start;
|
|
|
+ ret = vaddr_get_pfn(vaddr, prot, pfn_base);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
|
|
|
- if (iommu->cache)
|
|
|
- prot |= IOMMU_CACHE;
|
|
|
+ if (is_invalid_reserved_pfn(*pfn_base))
|
|
|
+ return 1;
|
|
|
|
|
|
- /*
|
|
|
- * XXX We break mappings into pages and use get_user_pages_fast to
|
|
|
- * pin the pages in memory. It's been suggested that mlock might
|
|
|
- * provide a more efficient mechanism, but nothing prevents the
|
|
|
- * user from munlocking the pages, which could then allow the user
|
|
|
- * access to random host memory. We also have no guarantee from the
|
|
|
- * IOMMU API that the iommu driver can unmap sub-pages of previous
|
|
|
- * mappings. This means we might lose an entire range if a single
|
|
|
- * page within it is unmapped. Single page mappings are inefficient,
|
|
|
- * but provide the most flexibility for now.
|
|
|
- */
|
|
|
- for (i = 0; i < npage; i++, iova += PAGE_SIZE, vaddr += PAGE_SIZE) {
|
|
|
+ if (!lock_cap && current->mm->locked_vm + 1 > limit) {
|
|
|
+ put_pfn(*pfn_base, prot);
|
|
|
+ pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
|
|
|
+ limit << PAGE_SHIFT);
|
|
|
+ return -ENOMEM;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Lock all the consecutive pages from pfn_base */
|
|
|
+ for (i = 1, vaddr += PAGE_SIZE; i < npage; i++, vaddr += PAGE_SIZE) {
|
|
|
unsigned long pfn = 0;
|
|
|
|
|
|
ret = vaddr_get_pfn(vaddr, prot, &pfn);
|
|
|
- if (ret) {
|
|
|
- __vfio_dma_do_unmap(iommu, start, i, prot);
|
|
|
- return ret;
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
+
|
|
|
+ if (pfn != *pfn_base + i || is_invalid_reserved_pfn(pfn)) {
|
|
|
+ put_pfn(pfn, prot);
|
|
|
+ break;
|
|
|
}
|
|
|
|
|
|
+ if (!lock_cap && current->mm->locked_vm + i + 1 > limit) {
|
|
|
+ put_pfn(pfn, prot);
|
|
|
+ pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
|
|
|
+ __func__, limit << PAGE_SHIFT);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ vfio_lock_acct(i);
|
|
|
+
|
|
|
+ return i;
|
|
|
+}
|
|
|
+
|
|
|
+static long vfio_unpin_pages(unsigned long pfn, long npage,
|
|
|
+ int prot, bool do_accounting)
|
|
|
+{
|
|
|
+ unsigned long unlocked = 0;
|
|
|
+ long i;
|
|
|
+
|
|
|
+ for (i = 0; i < npage; i++)
|
|
|
+ unlocked += put_pfn(pfn++, prot);
|
|
|
+
|
|
|
+ if (do_accounting)
|
|
|
+ vfio_lock_acct(-unlocked);
|
|
|
+
|
|
|
+ return unlocked;
|
|
|
+}
|
|
|
+
|
|
|
+static int vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
|
|
|
+ dma_addr_t iova, size_t *size)
|
|
|
+{
|
|
|
+ dma_addr_t start = iova, end = iova + *size;
|
|
|
+ long unlocked = 0;
|
|
|
+
|
|
|
+ while (iova < end) {
|
|
|
+ size_t unmapped;
|
|
|
+ phys_addr_t phys;
|
|
|
+
|
|
|
/*
|
|
|
- * Only add actual locked pages to accounting
|
|
|
- * XXX We're effectively marking a page locked for every
|
|
|
- * IOVA page even though it's possible the user could be
|
|
|
- * backing multiple IOVAs with the same vaddr. This over-
|
|
|
- * penalizes the user process, but we currently have no
|
|
|
- * easy way to do this properly.
|
|
|
+ * We use the IOMMU to track the physical address. This
|
|
|
+ * saves us from having a lot more entries in our mapping
|
|
|
+ * tree. The downside is that we don't track the size
|
|
|
+ * used to do the mapping. We request unmap of a single
|
|
|
+ * page, but expect IOMMUs that support large pages to
|
|
|
+ * unmap a larger chunk.
|
|
|
*/
|
|
|
- if (!is_invalid_reserved_pfn(pfn))
|
|
|
- locked++;
|
|
|
-
|
|
|
- ret = iommu_map(iommu->domain, iova,
|
|
|
- (phys_addr_t)pfn << PAGE_SHIFT,
|
|
|
- PAGE_SIZE, prot);
|
|
|
- if (ret) {
|
|
|
- /* Back out mappings on error */
|
|
|
- put_pfn(pfn, prot);
|
|
|
- __vfio_dma_do_unmap(iommu, start, i, prot);
|
|
|
- return ret;
|
|
|
+ phys = iommu_iova_to_phys(iommu->domain, iova);
|
|
|
+ if (WARN_ON(!phys)) {
|
|
|
+ iova += PAGE_SIZE;
|
|
|
+ continue;
|
|
|
}
|
|
|
+
|
|
|
+ unmapped = iommu_unmap(iommu->domain, iova, PAGE_SIZE);
|
|
|
+ if (!unmapped)
|
|
|
+ break;
|
|
|
+
|
|
|
+ unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT,
|
|
|
+ unmapped >> PAGE_SHIFT,
|
|
|
+ dma->prot, false);
|
|
|
+ iova += unmapped;
|
|
|
}
|
|
|
- vfio_lock_acct(locked);
|
|
|
+
|
|
|
+ vfio_lock_acct(-unlocked);
|
|
|
+
|
|
|
+ *size = iova - start;
|
|
|
+
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
static int vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start,
|
|
|
- size_t size, struct vfio_dma *dma)
|
|
|
+ size_t *size, struct vfio_dma *dma)
|
|
|
{
|
|
|
+ size_t offset, overlap, tmp;
|
|
|
struct vfio_dma *split;
|
|
|
- long npage_lo, npage_hi;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Existing dma region is completely covered, unmap all. This is
|
|
|
+ * the likely case since userspace tends to map and unmap buffers
|
|
|
+ * in one shot rather than multiple mappings within a buffer.
|
|
|
+ */
|
|
|
+ if (likely(start <= dma->iova &&
|
|
|
+ start + *size >= dma->iova + dma->size)) {
|
|
|
+ *size = dma->size;
|
|
|
+ ret = vfio_unmap_unpin(iommu, dma, dma->iova, size);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Did we remove more than we have? Should never happen
|
|
|
+ * since a vfio_dma is contiguous in iova and vaddr.
|
|
|
+ */
|
|
|
+ WARN_ON(*size != dma->size);
|
|
|
|
|
|
- /* Existing dma region is completely covered, unmap all */
|
|
|
- if (start <= dma->iova &&
|
|
|
- start + size >= dma->iova + NPAGE_TO_SIZE(dma->npage)) {
|
|
|
- vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot);
|
|
|
vfio_remove_dma(iommu, dma);
|
|
|
kfree(dma);
|
|
|
return 0;
|
|
@@ -350,47 +384,79 @@ static int vfio_remove_dma_overlap(struct vfio_iommu *iommu, dma_addr_t start,
|
|
|
|
|
|
/* Overlap low address of existing range */
|
|
|
if (start <= dma->iova) {
|
|
|
- size_t overlap;
|
|
|
+ overlap = start + *size - dma->iova;
|
|
|
+ ret = vfio_unmap_unpin(iommu, dma, dma->iova, &overlap);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
|
|
|
- overlap = start + size - dma->iova;
|
|
|
- npage_lo = overlap >> PAGE_SHIFT;
|
|
|
+ vfio_remove_dma(iommu, dma);
|
|
|
|
|
|
- vfio_dma_unmap(iommu, dma->iova, npage_lo, dma->prot);
|
|
|
- dma->iova += overlap;
|
|
|
- dma->vaddr += overlap;
|
|
|
- dma->npage -= npage_lo;
|
|
|
+ /*
|
|
|
+ * Check, we may have removed to whole vfio_dma. If not
|
|
|
+ * fixup and re-insert.
|
|
|
+ */
|
|
|
+ if (overlap < dma->size) {
|
|
|
+ dma->iova += overlap;
|
|
|
+ dma->vaddr += overlap;
|
|
|
+ dma->size -= overlap;
|
|
|
+ vfio_insert_dma(iommu, dma);
|
|
|
+ }
|
|
|
+ *size = overlap;
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
/* Overlap high address of existing range */
|
|
|
- if (start + size >= dma->iova + NPAGE_TO_SIZE(dma->npage)) {
|
|
|
- size_t overlap;
|
|
|
+ if (start + *size >= dma->iova + dma->size) {
|
|
|
+ offset = start - dma->iova;
|
|
|
+ overlap = dma->size - offset;
|
|
|
|
|
|
- overlap = dma->iova + NPAGE_TO_SIZE(dma->npage) - start;
|
|
|
- npage_hi = overlap >> PAGE_SHIFT;
|
|
|
+ ret = vfio_unmap_unpin(iommu, dma, start, &overlap);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We may have unmapped the entire vfio_dma if the user is
|
|
|
+ * trying to unmap a sub-region of what was originally
|
|
|
+ * mapped. If anything left, we can resize in place since
|
|
|
+ * iova is unchanged.
|
|
|
+ */
|
|
|
+ if (overlap < dma->size)
|
|
|
+ dma->size -= overlap;
|
|
|
+ else
|
|
|
+ vfio_remove_dma(iommu, dma);
|
|
|
|
|
|
- vfio_dma_unmap(iommu, start, npage_hi, dma->prot);
|
|
|
- dma->npage -= npage_hi;
|
|
|
+ *size = overlap;
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
/* Split existing */
|
|
|
- npage_lo = (start - dma->iova) >> PAGE_SHIFT;
|
|
|
- npage_hi = dma->npage - (size >> PAGE_SHIFT) - npage_lo;
|
|
|
+ offset = start - dma->iova;
|
|
|
|
|
|
- split = kzalloc(sizeof *split, GFP_KERNEL);
|
|
|
- if (!split)
|
|
|
- return -ENOMEM;
|
|
|
+ ret = vfio_unmap_unpin(iommu, dma, start, size);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
|
|
|
- vfio_dma_unmap(iommu, start, size >> PAGE_SHIFT, dma->prot);
|
|
|
+ WARN_ON(!*size);
|
|
|
+ tmp = dma->size;
|
|
|
|
|
|
- dma->npage = npage_lo;
|
|
|
+ /*
|
|
|
+ * Resize the lower vfio_dma in place, insert new for remaining
|
|
|
+ * upper segment.
|
|
|
+ */
|
|
|
+ dma->size = offset;
|
|
|
+
|
|
|
+ if (offset + *size < tmp) {
|
|
|
+ split = kzalloc(sizeof(*split), GFP_KERNEL);
|
|
|
+ if (!split)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ split->size = tmp - offset - *size;
|
|
|
+ split->iova = dma->iova + offset + *size;
|
|
|
+ split->vaddr = dma->vaddr + offset + *size;
|
|
|
+ split->prot = dma->prot;
|
|
|
+ vfio_insert_dma(iommu, split);
|
|
|
+ }
|
|
|
|
|
|
- split->npage = npage_hi;
|
|
|
- split->iova = start + size;
|
|
|
- split->vaddr = dma->vaddr + NPAGE_TO_SIZE(npage_lo) + size;
|
|
|
- split->prot = dma->prot;
|
|
|
- vfio_insert_dma(iommu, split);
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -399,6 +465,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
|
|
|
{
|
|
|
uint64_t mask;
|
|
|
struct vfio_dma *dma;
|
|
|
+ size_t unmapped = 0, size;
|
|
|
int ret = 0;
|
|
|
|
|
|
mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1;
|
|
@@ -408,30 +475,66 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
|
|
|
if (unmap->size & mask)
|
|
|
return -EINVAL;
|
|
|
|
|
|
- /* XXX We still break these down into PAGE_SIZE */
|
|
|
WARN_ON(mask & PAGE_MASK);
|
|
|
|
|
|
mutex_lock(&iommu->lock);
|
|
|
|
|
|
- while (!ret && (dma = vfio_find_dma(iommu,
|
|
|
- unmap->iova, unmap->size)))
|
|
|
- ret = vfio_remove_dma_overlap(iommu, unmap->iova,
|
|
|
- unmap->size, dma);
|
|
|
+ while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
|
|
|
+ size = unmap->size;
|
|
|
+ ret = vfio_remove_dma_overlap(iommu, unmap->iova, &size, dma);
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
+ unmapped += size;
|
|
|
+ }
|
|
|
|
|
|
mutex_unlock(&iommu->lock);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We may unmap more than requested, update the unmap struct so
|
|
|
+ * userspace can know.
|
|
|
+ */
|
|
|
+ unmap->size = unmapped;
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Turns out AMD IOMMU has a page table bug where it won't map large pages
|
|
|
+ * to a region that previously mapped smaller pages. This should be fixed
|
|
|
+ * soon, so this is just a temporary workaround to break mappings down into
|
|
|
+ * PAGE_SIZE. Better to map smaller pages than nothing.
|
|
|
+ */
|
|
|
+static int map_try_harder(struct vfio_iommu *iommu, dma_addr_t iova,
|
|
|
+ unsigned long pfn, long npage, int prot)
|
|
|
+{
|
|
|
+ long i;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ for (i = 0; i < npage; i++, pfn++, iova += PAGE_SIZE) {
|
|
|
+ ret = iommu_map(iommu->domain, iova,
|
|
|
+ (phys_addr_t)pfn << PAGE_SHIFT,
|
|
|
+ PAGE_SIZE, prot);
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (; i < npage && i > 0; i--, iova -= PAGE_SIZE)
|
|
|
+ iommu_unmap(iommu->domain, iova, PAGE_SIZE);
|
|
|
+
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
static int vfio_dma_do_map(struct vfio_iommu *iommu,
|
|
|
struct vfio_iommu_type1_dma_map *map)
|
|
|
{
|
|
|
- struct vfio_dma *dma;
|
|
|
- dma_addr_t iova = map->iova;
|
|
|
- unsigned long locked, lock_limit, vaddr = map->vaddr;
|
|
|
+ dma_addr_t end, iova;
|
|
|
+ unsigned long vaddr = map->vaddr;
|
|
|
size_t size = map->size;
|
|
|
+ long npage;
|
|
|
int ret = 0, prot = 0;
|
|
|
uint64_t mask;
|
|
|
- long npage;
|
|
|
+
|
|
|
+ end = map->iova + map->size;
|
|
|
|
|
|
mask = ((uint64_t)1 << __ffs(iommu->domain->ops->pgsize_bitmap)) - 1;
|
|
|
|
|
@@ -444,92 +547,138 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
|
|
|
if (!prot)
|
|
|
return -EINVAL; /* No READ/WRITE? */
|
|
|
|
|
|
+ if (iommu->cache)
|
|
|
+ prot |= IOMMU_CACHE;
|
|
|
+
|
|
|
if (vaddr & mask)
|
|
|
return -EINVAL;
|
|
|
- if (iova & mask)
|
|
|
+ if (map->iova & mask)
|
|
|
return -EINVAL;
|
|
|
- if (size & mask)
|
|
|
+ if (!map->size || map->size & mask)
|
|
|
return -EINVAL;
|
|
|
|
|
|
- /* XXX We still break these down into PAGE_SIZE */
|
|
|
WARN_ON(mask & PAGE_MASK);
|
|
|
|
|
|
/* Don't allow IOVA wrap */
|
|
|
- if (iova + size && iova + size < iova)
|
|
|
+ if (end && end < map->iova)
|
|
|
return -EINVAL;
|
|
|
|
|
|
/* Don't allow virtual address wrap */
|
|
|
- if (vaddr + size && vaddr + size < vaddr)
|
|
|
- return -EINVAL;
|
|
|
-
|
|
|
- npage = size >> PAGE_SHIFT;
|
|
|
- if (!npage)
|
|
|
+ if (vaddr + map->size && vaddr + map->size < vaddr)
|
|
|
return -EINVAL;
|
|
|
|
|
|
- dma = kzalloc(sizeof *dma, GFP_KERNEL);
|
|
|
- if (!dma)
|
|
|
- return -ENOMEM;
|
|
|
-
|
|
|
mutex_lock(&iommu->lock);
|
|
|
|
|
|
- if (vfio_find_dma(iommu, iova, size)) {
|
|
|
- ret = -EBUSY;
|
|
|
- goto out_lock;
|
|
|
+ if (vfio_find_dma(iommu, map->iova, map->size)) {
|
|
|
+ mutex_unlock(&iommu->lock);
|
|
|
+ return -EEXIST;
|
|
|
}
|
|
|
|
|
|
- /* account for locked pages */
|
|
|
- locked = current->mm->locked_vm + npage;
|
|
|
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
|
|
- if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
|
|
|
- pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
|
|
|
- __func__, rlimit(RLIMIT_MEMLOCK));
|
|
|
- ret = -ENOMEM;
|
|
|
- goto out_lock;
|
|
|
- }
|
|
|
+ for (iova = map->iova; iova < end; iova += size, vaddr += size) {
|
|
|
+ struct vfio_dma *dma = NULL;
|
|
|
+ unsigned long pfn;
|
|
|
+ long i;
|
|
|
+
|
|
|
+ /* Pin a contiguous chunk of memory */
|
|
|
+ npage = vfio_pin_pages(vaddr, (end - iova) >> PAGE_SHIFT,
|
|
|
+ prot, &pfn);
|
|
|
+ if (npage <= 0) {
|
|
|
+ WARN_ON(!npage);
|
|
|
+ ret = (int)npage;
|
|
|
+ break;
|
|
|
+ }
|
|
|
|
|
|
- ret = __vfio_dma_map(iommu, iova, vaddr, npage, prot);
|
|
|
- if (ret)
|
|
|
- goto out_lock;
|
|
|
-
|
|
|
- dma->npage = npage;
|
|
|
- dma->iova = iova;
|
|
|
- dma->vaddr = vaddr;
|
|
|
- dma->prot = prot;
|
|
|
-
|
|
|
- /* Check if we abut a region below - nothing below 0 */
|
|
|
- if (iova) {
|
|
|
- struct vfio_dma *tmp = vfio_find_dma(iommu, iova - 1, 1);
|
|
|
- if (tmp && tmp->prot == prot &&
|
|
|
- tmp->vaddr + NPAGE_TO_SIZE(tmp->npage) == vaddr) {
|
|
|
- vfio_remove_dma(iommu, tmp);
|
|
|
- dma->npage += tmp->npage;
|
|
|
- dma->iova = iova = tmp->iova;
|
|
|
- dma->vaddr = vaddr = tmp->vaddr;
|
|
|
- kfree(tmp);
|
|
|
- npage = dma->npage;
|
|
|
- size = NPAGE_TO_SIZE(npage);
|
|
|
+ /* Verify pages are not already mapped */
|
|
|
+ for (i = 0; i < npage; i++) {
|
|
|
+ if (iommu_iova_to_phys(iommu->domain,
|
|
|
+ iova + (i << PAGE_SHIFT))) {
|
|
|
+ vfio_unpin_pages(pfn, npage, prot, true);
|
|
|
+ ret = -EBUSY;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ ret = iommu_map(iommu->domain, iova,
|
|
|
+ (phys_addr_t)pfn << PAGE_SHIFT,
|
|
|
+ npage << PAGE_SHIFT, prot);
|
|
|
+ if (ret) {
|
|
|
+ if (ret != -EBUSY ||
|
|
|
+ map_try_harder(iommu, iova, pfn, npage, prot)) {
|
|
|
+ vfio_unpin_pages(pfn, npage, prot, true);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ size = npage << PAGE_SHIFT;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Check if we abut a region below - nothing below 0.
|
|
|
+ * This is the most likely case when mapping chunks of
|
|
|
+ * physically contiguous regions within a virtual address
|
|
|
+ * range. Update the abutting entry in place since iova
|
|
|
+ * doesn't change.
|
|
|
+ */
|
|
|
+ if (likely(iova)) {
|
|
|
+ struct vfio_dma *tmp;
|
|
|
+ tmp = vfio_find_dma(iommu, iova - 1, 1);
|
|
|
+ if (tmp && tmp->prot == prot &&
|
|
|
+ tmp->vaddr + tmp->size == vaddr) {
|
|
|
+ tmp->size += size;
|
|
|
+
|
|
|
+ iova = tmp->iova;
|
|
|
+ size = tmp->size;
|
|
|
+ vaddr = tmp->vaddr;
|
|
|
+ dma = tmp;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Check if we abut a region above - nothing above ~0 + 1 */
|
|
|
+ if (likely(iova + size)) {
|
|
|
+ struct vfio_dma *tmp;
|
|
|
+
|
|
|
+ tmp = vfio_find_dma(iommu, iova + size, 1);
|
|
|
+ if (tmp && tmp->prot == prot &&
|
|
|
+ tmp->vaddr == vaddr + size) {
|
|
|
+ vfio_remove_dma(iommu, tmp);
|
|
|
+ if (dma)
|
|
|
+ dma->size += tmp->size;
|
|
|
+ else
|
|
|
+ size += tmp->size;
|
|
|
+ kfree(tmp);
|
|
|
+ }
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- /* Check if we abut a region above - nothing above ~0 + 1 */
|
|
|
- if (iova + size) {
|
|
|
- struct vfio_dma *tmp = vfio_find_dma(iommu, iova + size, 1);
|
|
|
- if (tmp && tmp->prot == prot &&
|
|
|
- tmp->vaddr == vaddr + size) {
|
|
|
- vfio_remove_dma(iommu, tmp);
|
|
|
- dma->npage += tmp->npage;
|
|
|
- kfree(tmp);
|
|
|
- npage = dma->npage;
|
|
|
- size = NPAGE_TO_SIZE(npage);
|
|
|
+ if (!dma) {
|
|
|
+ dma = kzalloc(sizeof(*dma), GFP_KERNEL);
|
|
|
+ if (!dma) {
|
|
|
+ iommu_unmap(iommu->domain, iova, size);
|
|
|
+ vfio_unpin_pages(pfn, npage, prot, true);
|
|
|
+ ret = -ENOMEM;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ dma->size = size;
|
|
|
+ dma->iova = iova;
|
|
|
+ dma->vaddr = vaddr;
|
|
|
+ dma->prot = prot;
|
|
|
+ vfio_insert_dma(iommu, dma);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- vfio_insert_dma(iommu, dma);
|
|
|
+ if (ret) {
|
|
|
+ struct vfio_dma *tmp;
|
|
|
+ iova = map->iova;
|
|
|
+ size = map->size;
|
|
|
+ while ((tmp = vfio_find_dma(iommu, iova, size))) {
|
|
|
+ if (vfio_remove_dma_overlap(iommu, iova, &size, tmp)) {
|
|
|
+ pr_warn("%s: Error rolling back failed map\n",
|
|
|
+ __func__);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
-out_lock:
|
|
|
mutex_unlock(&iommu->lock);
|
|
|
- if (ret)
|
|
|
- kfree(dma);
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -651,9 +800,8 @@ static void vfio_iommu_type1_release(void *iommu_data)
|
|
|
|
|
|
while ((node = rb_first(&iommu->dma_list))) {
|
|
|
struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
|
|
|
- vfio_dma_unmap(iommu, dma->iova, dma->npage, dma->prot);
|
|
|
- vfio_remove_dma(iommu, dma);
|
|
|
- kfree(dma);
|
|
|
+ size_t size = dma->size;
|
|
|
+ vfio_remove_dma_overlap(iommu, dma->iova, &size, dma);
|
|
|
}
|
|
|
|
|
|
iommu_domain_free(iommu->domain);
|
|
@@ -708,6 +856,7 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
|
|
|
|
|
|
} else if (cmd == VFIO_IOMMU_UNMAP_DMA) {
|
|
|
struct vfio_iommu_type1_dma_unmap unmap;
|
|
|
+ long ret;
|
|
|
|
|
|
minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
|
|
|
|
|
@@ -717,7 +866,11 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
|
|
|
if (unmap.argsz < minsz || unmap.flags)
|
|
|
return -EINVAL;
|
|
|
|
|
|
- return vfio_dma_do_unmap(iommu, &unmap);
|
|
|
+ ret = vfio_dma_do_unmap(iommu, &unmap);
|
|
|
+ if (ret)
|
|
|
+ return ret;
|
|
|
+
|
|
|
+ return copy_to_user((void __user *)arg, &unmap, minsz);
|
|
|
}
|
|
|
|
|
|
return -ENOTTY;
|