Эх сурвалжийг харах

Automatic merge of rsync://rsync.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6

Linus Torvalds 20 жил өмнө
parent
commit
f9a2223925

+ 68 - 20
arch/sparc64/kernel/pci_iommu.c

@@ -196,6 +196,34 @@ static iopte_t *alloc_consistent_cluster(struct pci_iommu *iommu, unsigned long
 	return NULL;
 }
 
+static int iommu_alloc_ctx(struct pci_iommu *iommu)
+{
+	int lowest = iommu->ctx_lowest_free;
+	int sz = IOMMU_NUM_CTXS - lowest;
+	int n = find_next_zero_bit(iommu->ctx_bitmap, sz, lowest);
+
+	if (unlikely(n == sz)) {
+		n = find_next_zero_bit(iommu->ctx_bitmap, lowest, 1);
+		if (unlikely(n == lowest)) {
+			printk(KERN_WARNING "IOMMU: Ran out of contexts.\n");
+			n = 0;
+		}
+	}
+	if (n)
+		__set_bit(n, iommu->ctx_bitmap);
+
+	return n;
+}
+
+static inline void iommu_free_ctx(struct pci_iommu *iommu, int ctx)
+{
+	if (likely(ctx)) {
+		__clear_bit(ctx, iommu->ctx_bitmap);
+		if (ctx < iommu->ctx_lowest_free)
+			iommu->ctx_lowest_free = ctx;
+	}
+}
+
 /* Allocate and map kernel buffer of size SIZE using consistent mode
  * DMA for PCI device PDEV.  Return non-NULL cpu-side address if
  * successful and set *DMA_ADDRP to the PCI side dma address.
@@ -236,7 +264,7 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_ad
 	npages = size >> IO_PAGE_SHIFT;
 	ctx = 0;
 	if (iommu->iommu_ctxflush)
-		ctx = iommu->iommu_cur_ctx++;
+		ctx = iommu_alloc_ctx(iommu);
 	first_page = __pa(first_page);
 	while (npages--) {
 		iopte_val(*iopte) = (IOPTE_CONSISTENT(ctx) |
@@ -317,6 +345,8 @@ void pci_free_consistent(struct pci_dev *pdev, size_t size, void *cpu, dma_addr_
 		}
 	}
 
+	iommu_free_ctx(iommu, ctx);
+
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	order = get_order(size);
@@ -360,7 +390,7 @@ dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, size_t sz, int direct
 	base_paddr = __pa(oaddr & IO_PAGE_MASK);
 	ctx = 0;
 	if (iommu->iommu_ctxflush)
-		ctx = iommu->iommu_cur_ctx++;
+		ctx = iommu_alloc_ctx(iommu);
 	if (strbuf->strbuf_enabled)
 		iopte_protection = IOPTE_STREAMING(ctx);
 	else
@@ -380,39 +410,53 @@ bad:
 	return PCI_DMA_ERROR_CODE;
 }
 
-static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages)
+static void pci_strbuf_flush(struct pci_strbuf *strbuf, struct pci_iommu *iommu, u32 vaddr, unsigned long ctx, unsigned long npages, int direction)
 {
 	int limit;
 
-	PCI_STC_FLUSHFLAG_INIT(strbuf);
 	if (strbuf->strbuf_ctxflush &&
 	    iommu->iommu_ctxflush) {
 		unsigned long matchreg, flushreg;
+		u64 val;
 
 		flushreg = strbuf->strbuf_ctxflush;
 		matchreg = PCI_STC_CTXMATCH_ADDR(strbuf, ctx);
 
-		limit = 100000;
 		pci_iommu_write(flushreg, ctx);
-		for(;;) {
-			if (((long)pci_iommu_read(matchreg)) >= 0L)
-				break;
-			limit--;
-			if (!limit)
-				break;
-			udelay(1);
+		val = pci_iommu_read(matchreg);
+		val &= 0xffff;
+		if (!val)
+			goto do_flush_sync;
+
+		while (val) {
+			if (val & 0x1)
+				pci_iommu_write(flushreg, ctx);
+			val >>= 1;
 		}
-		if (!limit)
+		val = pci_iommu_read(matchreg);
+		if (unlikely(val)) {
 			printk(KERN_WARNING "pci_strbuf_flush: ctx flush "
-			       "timeout vaddr[%08x] ctx[%lx]\n",
-			       vaddr, ctx);
+			       "timeout matchreg[%lx] ctx[%lx]\n",
+			       val, ctx);
+			goto do_page_flush;
+		}
 	} else {
 		unsigned long i;
 
+	do_page_flush:
 		for (i = 0; i < npages; i++, vaddr += IO_PAGE_SIZE)
 			pci_iommu_write(strbuf->strbuf_pflush, vaddr);
 	}
 
+do_flush_sync:
+	/* If the device could not have possibly put dirty data into
+	 * the streaming cache, no flush-flag synchronization needs
+	 * to be performed.
+	 */
+	if (direction == PCI_DMA_TODEVICE)
+		return;
+
+	PCI_STC_FLUSHFLAG_INIT(strbuf);
 	pci_iommu_write(strbuf->strbuf_fsync, strbuf->strbuf_flushflag_pa);
 	(void) pci_iommu_read(iommu->write_complete_reg);
 
@@ -466,7 +510,7 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
 
 	/* Step 1: Kick data out of streaming buffers if necessary. */
 	if (strbuf->strbuf_enabled)
-		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
+		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
 
 	/* Step 2: Clear out first TSB entry. */
 	iopte_make_dummy(iommu, base);
@@ -474,6 +518,8 @@ void pci_unmap_single(struct pci_dev *pdev, dma_addr_t bus_addr, size_t sz, int
 	free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
 			       npages, ctx);
 
+	iommu_free_ctx(iommu, ctx);
+
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -613,7 +659,7 @@ int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems, int
 	/* Step 4: Choose a context if necessary. */
 	ctx = 0;
 	if (iommu->iommu_ctxflush)
-		ctx = iommu->iommu_cur_ctx++;
+		ctx = iommu_alloc_ctx(iommu);
 
 	/* Step 5: Create the mappings. */
 	if (strbuf->strbuf_enabled)
@@ -678,7 +724,7 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
 
 	/* Step 1: Kick data out of streaming buffers if necessary. */
 	if (strbuf->strbuf_enabled)
-		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
+		pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
 
 	/* Step 2: Clear out first TSB entry. */
 	iopte_make_dummy(iommu, base);
@@ -686,6 +732,8 @@ void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sglist, int nelems,
 	free_streaming_cluster(iommu, bus_addr - iommu->page_table_map_base,
 			       npages, ctx);
 
+	iommu_free_ctx(iommu, ctx);
+
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -724,7 +772,7 @@ void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, dma_addr_t bus_addr, size
 	}
 
 	/* Step 2: Kick data out of streaming buffers. */
-	pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
+	pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -768,7 +816,7 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, struct scatterlist *sglist, i
 	i--;
 	npages = (IO_PAGE_ALIGN(sglist[i].dma_address + sglist[i].dma_length)
 		  - bus_addr) >> IO_PAGE_SHIFT;
-	pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages);
+	pci_strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction);
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }

+ 1 - 1
arch/sparc64/kernel/pci_psycho.c

@@ -1212,7 +1212,7 @@ static void __init psycho_iommu_init(struct pci_controller_info *p)
 
 	/* Setup initial software IOMMU state. */
 	spin_lock_init(&iommu->lock);
-	iommu->iommu_cur_ctx = 0;
+	iommu->ctx_lowest_free = 1;
 
 	/* Register addresses. */
 	iommu->iommu_control  = p->pbm_A.controller_regs + PSYCHO_IOMMU_CONTROL;

+ 1 - 1
arch/sparc64/kernel/pci_sabre.c

@@ -1265,7 +1265,7 @@ static void __init sabre_iommu_init(struct pci_controller_info *p,
 
 	/* Setup initial software IOMMU state. */
 	spin_lock_init(&iommu->lock);
-	iommu->iommu_cur_ctx = 0;
+	iommu->ctx_lowest_free = 1;
 
 	/* Register addresses. */
 	iommu->iommu_control  = p->pbm_A.controller_regs + SABRE_IOMMU_CONTROL;

+ 1 - 1
arch/sparc64/kernel/pci_schizo.c

@@ -1753,7 +1753,7 @@ static void schizo_pbm_iommu_init(struct pci_pbm_info *pbm)
 
 	/* Setup initial software IOMMU state. */
 	spin_lock_init(&iommu->lock);
-	iommu->iommu_cur_ctx = 0;
+	iommu->ctx_lowest_free = 1;
 
 	/* Register addresses, SCHIZO has iommu ctx flushing. */
 	iommu->iommu_control  = pbm->pbm_regs + SCHIZO_IOMMU_CONTROL;

+ 14 - 6
arch/sparc64/kernel/sbus.c

@@ -117,17 +117,25 @@ static void iommu_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages
 
 #define STRBUF_TAG_VALID	0x02UL
 
-static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages)
+static void sbus_strbuf_flush(struct sbus_iommu *iommu, u32 base, unsigned long npages, int direction)
 {
 	unsigned long n;
 	int limit;
 
-	iommu->strbuf_flushflag = 0UL;
 	n = npages;
 	while (n--)
 		upa_writeq(base + (n << IO_PAGE_SHIFT),
 			   iommu->strbuf_regs + STRBUF_PFLUSH);
 
+	/* If the device could not have possibly put dirty data into
+	 * the streaming cache, no flush-flag synchronization needs
+	 * to be performed.
+	 */
+	if (direction == SBUS_DMA_TODEVICE)
+		return;
+
+	iommu->strbuf_flushflag = 0UL;
+
 	/* Whoopee cushion! */
 	upa_writeq(__pa(&iommu->strbuf_flushflag),
 		   iommu->strbuf_regs + STRBUF_FSYNC);
@@ -421,7 +429,7 @@ void sbus_unmap_single(struct sbus_dev *sdev, dma_addr_t dma_addr, size_t size,
 
 	spin_lock_irqsave(&iommu->lock, flags);
 	free_streaming_cluster(iommu, dma_base, size >> IO_PAGE_SHIFT);
-	sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT);
+	sbus_strbuf_flush(iommu, dma_base, size >> IO_PAGE_SHIFT, direction);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -584,7 +592,7 @@ void sbus_unmap_sg(struct sbus_dev *sdev, struct scatterlist *sg, int nents, int
 	iommu = sdev->bus->iommu;
 	spin_lock_irqsave(&iommu->lock, flags);
 	free_streaming_cluster(iommu, dvma_base, size >> IO_PAGE_SHIFT);
-	sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT);
+	sbus_strbuf_flush(iommu, dvma_base, size >> IO_PAGE_SHIFT, direction);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -596,7 +604,7 @@ void sbus_dma_sync_single_for_cpu(struct sbus_dev *sdev, dma_addr_t base, size_t
 	size = (IO_PAGE_ALIGN(base + size) - (base & IO_PAGE_MASK));
 
 	spin_lock_irqsave(&iommu->lock, flags);
-	sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT);
+	sbus_strbuf_flush(iommu, base & IO_PAGE_MASK, size >> IO_PAGE_SHIFT, direction);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
@@ -620,7 +628,7 @@ void sbus_dma_sync_sg_for_cpu(struct sbus_dev *sdev, struct scatterlist *sg, int
 	size = IO_PAGE_ALIGN(sg[i].dma_address + sg[i].dma_length) - base;
 
 	spin_lock_irqsave(&iommu->lock, flags);
-	sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT);
+	sbus_strbuf_flush(iommu, base, size >> IO_PAGE_SHIFT, direction);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 

+ 2 - 0
include/asm-sparc64/iommu.h

@@ -16,4 +16,6 @@
 #define IOPTE_CACHE   0x0000000000000010UL /* Cached (in UPA E-cache)         */
 #define IOPTE_WRITE   0x0000000000000002UL /* Writeable                       */
 
+#define IOMMU_NUM_CTXS	4096
+
 #endif /* !(_SPARC_IOMMU_H) */

+ 5 - 3
include/asm-sparc64/pbm.h

@@ -15,6 +15,7 @@
 #include <asm/io.h>
 #include <asm/page.h>
 #include <asm/oplib.h>
+#include <asm/iommu.h>
 
 /* The abstraction used here is that there are PCI controllers,
  * each with one (Sabre) or two (PSYCHO/SCHIZO) PCI bus modules
@@ -40,9 +41,6 @@ struct pci_iommu {
 	 */
 	spinlock_t	lock;
 
-	/* Context allocator. */
-	unsigned int	iommu_cur_ctx;
-
 	/* IOMMU page table, a linear array of ioptes. */
 	iopte_t		*page_table;		/* The page table itself. */
 	int		page_table_sz_bits;	/* log2 of ow many pages does it map? */
@@ -87,6 +85,10 @@ struct pci_iommu {
 		u16	flush;
 	} alloc_info[PBM_NCLUSTERS];
 
+	/* CTX allocation. */
+	unsigned long ctx_lowest_free;
+	unsigned long ctx_bitmap[IOMMU_NUM_CTXS / (sizeof(unsigned long) * 8)];
+
 	/* Here a PCI controller driver describes the areas of
 	 * PCI memory space where DMA to/from physical memory
 	 * are addressed.  Drivers interrogate the PCI layer