Browse Source

Merge branch 'master'

Jeff Garzik 19 years ago
parent
commit
96b88fb850
100 changed files with 2853 additions and 3004 deletions
  1. 52 61
      Documentation/block/biodoc.txt
  2. 251 231
      Documentation/kernel-parameters.txt
  3. 3 2
      Documentation/networking/bonding.txt
  4. 2 2
      Makefile
  5. 1 1
      arch/alpha/kernel/pci-noop.c
  6. 1 1
      arch/alpha/kernel/pci_iommu.c
  7. 8 7
      arch/arm/mach-integrator/impd1.c
  8. 2 0
      arch/arm/mach-pxa/corgi_lcd.c
  9. 20 0
      arch/arm/mach-pxa/generic.c
  10. 3 3
      arch/arm/mach-s3c2410/mach-bast.c
  11. 4 4
      arch/arm/mm/consistent.c
  12. 1 1
      arch/frv/mb93090-mb00/pci-dma-nommu.c
  13. 1 1
      arch/frv/mb93090-mb00/pci-dma.c
  14. 1 1
      arch/frv/mm/dma-alloc.c
  15. 19 11
      arch/i386/kernel/cpu/cpufreq/powernow-k8.c
  16. 1 1
      arch/ia64/hp/common/hwsw_iommu.c
  17. 1 1
      arch/ia64/hp/common/sba_iommu.c
  18. 1 1
      arch/ia64/lib/swiotlb.c
  19. 1 1
      arch/ia64/sn/kernel/xpc.h
  20. 1 1
      arch/ia64/sn/pci/pci_dma.c
  21. 2 2
      arch/mips/mm/dma-coherent.c
  22. 2 2
      arch/mips/mm/dma-ip27.c
  23. 2 2
      arch/mips/mm/dma-ip32.c
  24. 2 2
      arch/mips/mm/dma-noncoherent.c
  25. 3 3
      arch/parisc/kernel/pci-dma.c
  26. 1 1
      arch/ppc/8xx_io/cs4218.h
  27. 2 2
      arch/ppc/8xx_io/cs4218_tdm.c
  28. 2 2
      arch/ppc/kernel/dma-mapping.c
  29. 2 2
      arch/ppc/mm/pgtable.c
  30. 1 1
      arch/ppc64/kernel/iSeries_htab.c
  31. 2 2
      arch/ppc64/kernel/mpic.c
  32. 1 1
      arch/ppc64/kernel/time.c
  33. 1 2
      arch/ppc64/mm/init.c
  34. 1 1
      arch/sh/boards/renesas/rts7751r2d/mach.c
  35. 1 1
      arch/sh/cchips/voyagergx/consistent.c
  36. 1 1
      arch/sh/drivers/pci/dma-dreamcast.c
  37. 1 1
      arch/sh/mm/consistent.c
  38. 1 1
      arch/sparc64/solaris/socksys.c
  39. 1 1
      arch/sparc64/solaris/timod.c
  40. 1 1
      arch/um/include/sysdep-i386/thread.h
  41. 1 1
      arch/um/include/sysdep-x86_64/thread.h
  42. 1 1
      arch/um/kernel/mem.c
  43. 1 1
      arch/um/kernel/process_kern.c
  44. 2 2
      arch/x86_64/kernel/pci-gart.c
  45. 1 1
      arch/x86_64/kernel/pci-nommu.c
  46. 1 1
      arch/xtensa/kernel/pci-dma.c
  47. 90 237
      drivers/block/as-iosched.c
  48. 85 287
      drivers/block/cfq-iosched.c
  49. 19 106
      drivers/block/deadline-iosched.c
  50. 207 138
      drivers/block/elevator.c
  51. 55 138
      drivers/block/ll_rw_blk.c
  52. 1 1
      drivers/block/loop.c
  53. 5 43
      drivers/block/noop-iosched.c
  54. 1 1
      drivers/block/rd.c
  55. 2 1
      drivers/char/drm/drm_vm.c
  56. 1 1
      drivers/char/drm/mga_drv.h
  57. 1 1
      drivers/char/drm/mga_state.c
  58. 6 5
      drivers/char/drm/radeon_cp.c
  59. 1 1
      drivers/char/n_tty.c
  60. 3 3
      drivers/cpufreq/cpufreq_conservative.c
  61. 1 1
      drivers/ieee1394/eth1394.c
  62. 1 1
      drivers/infiniband/hw/mthca/mthca_cmd.c
  63. 1 1
      drivers/infiniband/hw/mthca/mthca_cmd.h
  64. 11 10
      drivers/infiniband/hw/mthca/mthca_eq.c
  65. 1 1
      drivers/infiniband/hw/mthca/mthca_memfree.c
  66. 1 1
      drivers/infiniband/hw/mthca/mthca_memfree.h
  67. 1 1
      drivers/md/bitmap.c
  68. 1 1
      drivers/md/dm-crypt.c
  69. 6 4
      drivers/md/md.c
  70. 0 1
      drivers/media/video/Kconfig
  71. 8 4
      drivers/message/fusion/mptsas.c
  72. 3 2
      drivers/net/8139cp.c
  73. 4 1
      drivers/net/8139too.c
  74. 23 0
      drivers/net/Kconfig
  75. 3 1
      drivers/net/Makefile
  76. 5 8
      drivers/net/au1000_eth.c
  77. 128 8
      drivers/net/b44.c
  78. 2 0
      drivers/net/b44.h
  79. 55 2
      drivers/net/bonding/bond_main.c
  80. 2 2
      drivers/net/cassini.c
  81. 16 21
      drivers/net/declance.c
  82. 3 1
      drivers/net/e100.c
  83. 60 14
      drivers/net/e1000/e1000.h
  84. 67 28
      drivers/net/e1000/e1000_ethtool.c
  85. 187 33
      drivers/net/e1000/e1000_hw.c
  86. 84 12
      drivers/net/e1000/e1000_hw.h
  87. 496 112
      drivers/net/e1000/e1000_main.c
  88. 8 2
      drivers/net/e1000/e1000_param.c
  89. 2 2
      drivers/net/epic100.c
  90. 202 108
      drivers/net/forcedeth.c
  91. 86 326
      drivers/net/gianfar.c
  92. 16 14
      drivers/net/gianfar.h
  93. 64 36
      drivers/net/gianfar_ethtool.c
  94. 219 0
      drivers/net/gianfar_mii.c
  95. 45 0
      drivers/net/gianfar_mii.h
  96. 0 661
      drivers/net/gianfar_phy.c
  97. 0 213
      drivers/net/gianfar_phy.h
  98. 1 0
      drivers/net/hamradio/Kconfig
  99. 3 6
      drivers/net/hamradio/bpqether.c
  100. 149 33
      drivers/net/hamradio/mkiss.c

+ 52 - 61
Documentation/block/biodoc.txt

@@ -906,9 +906,20 @@ Aside:
 
 
 4. The I/O scheduler
-I/O schedulers are now per queue. They should be runtime switchable and modular
-but aren't yet. Jens has most bits to do this, but the sysfs implementation is
-missing.
+I/O scheduler, a.k.a. elevator, is implemented in two layers.  Generic dispatch
+queue and specific I/O schedulers.  Unless stated otherwise, elevator is used
+to refer to both parts and I/O scheduler to specific I/O schedulers.
+
+Block layer implements generic dispatch queue in ll_rw_blk.c and elevator.c.
+The generic dispatch queue is responsible for properly ordering barrier
+requests, requeueing, handling non-fs requests and all other subtleties.
+
+Specific I/O schedulers are responsible for ordering normal filesystem
+requests.  They can also choose to delay certain requests to improve
+throughput or whatever purpose.  As the plural form indicates, there are
+multiple I/O schedulers.  They can be built as modules but at least one should
+be built inside the kernel.  Each queue can choose different one and can also
+change to another one dynamically.
 
 A block layer call to the i/o scheduler follows the convention elv_xxx(). This
 calls elevator_xxx_fn in the elevator switch (drivers/block/elevator.c). Oh,
@@ -921,44 +932,36 @@ keeping work.
 The functions an elevator may implement are: (* are mandatory)
 elevator_merge_fn		called to query requests for merge with a bio
 
-elevator_merge_req_fn		" " "  with another request
+elevator_merge_req_fn		called when two requests get merged. the one
+				which gets merged into the other one will be
+				never seen by I/O scheduler again. IOW, after
+				being merged, the request is gone.
 
 elevator_merged_fn		called when a request in the scheduler has been
 				involved in a merge. It is used in the deadline
 				scheduler for example, to reposition the request
 				if its sorting order has changed.
 
-*elevator_next_req_fn		returns the next scheduled request, or NULL
-				if there are none (or none are ready).
+elevator_dispatch_fn		fills the dispatch queue with ready requests.
+				I/O schedulers are free to postpone requests by
+				not filling the dispatch queue unless @force
+				is non-zero.  Once dispatched, I/O schedulers
+				are not allowed to manipulate the requests -
+				they belong to generic dispatch queue.
 
-*elevator_add_req_fn		called to add a new request into the scheduler
+elevator_add_req_fn		called to add a new request into the scheduler
 
 elevator_queue_empty_fn		returns true if the merge queue is empty.
 				Drivers shouldn't use this, but rather check
 				if elv_next_request is NULL (without losing the
 				request if one exists!)
 
-elevator_remove_req_fn		This is called when a driver claims ownership of
-				the target request - it now belongs to the
-				driver. It must not be modified or merged.
-				Drivers must not lose the request! A subsequent
-				call of elevator_next_req_fn must  return the
-				_next_ request.
-
-elevator_requeue_req_fn		called to add a request to the scheduler. This
-				is used when the request has alrnadebeen
-				returned by elv_next_request, but hasn't
-				completed. If this is not implemented then
-				elevator_add_req_fn is called instead.
-
 elevator_former_req_fn
 elevator_latter_req_fn		These return the request before or after the
 				one specified in disk sort order. Used by the
 				block layer to find merge possibilities.
 
-elevator_completed_req_fn	called when a request is completed. This might
-				come about due to being merged with another or
-				when the device completes the request.
+elevator_completed_req_fn	called when a request is completed.
 
 elevator_may_queue_fn		returns true if the scheduler wants to allow the
 				current context to queue a new request even if
@@ -967,13 +970,33 @@ elevator_may_queue_fn		returns true if the scheduler wants to allow the
 
 elevator_set_req_fn
 elevator_put_req_fn		Must be used to allocate and free any elevator
-				specific storate for a request.
+				specific storage for a request.
+
+elevator_activate_req_fn	Called when device driver first sees a request.
+				I/O schedulers can use this callback to
+				determine when actual execution of a request
+				starts.
+elevator_deactivate_req_fn	Called when device driver decides to delay
+				a request by requeueing it.
 
 elevator_init_fn
 elevator_exit_fn		Allocate and free any elevator specific storage
 				for a queue.
 
-4.2 I/O scheduler implementation
+4.2 Request flows seen by I/O schedulers
+All requests seens by I/O schedulers strictly follow one of the following three
+flows.
+
+ set_req_fn ->
+
+ i.   add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn ->
+      (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn
+ ii.  add_req_fn -> (merged_fn ->)* -> merge_req_fn
+ iii. [none]
+
+ -> put_req_fn
+
+4.3 I/O scheduler implementation
 The generic i/o scheduler algorithm attempts to sort/merge/batch requests for
 optimal disk scan and request servicing performance (based on generic
 principles and device capabilities), optimized for:
@@ -993,18 +1016,7 @@ request in sort order to prevent binary tree lookups.
 This arrangement is not a generic block layer characteristic however, so
 elevators may implement queues as they please.
 
-ii. Last merge hint
-The last merge hint is part of the generic queue layer. I/O schedulers must do
-some management on it. For the most part, the most important thing is to make
-sure q->last_merge is cleared (set to NULL) when the request on it is no longer
-a candidate for merging (for example if it has been sent to the driver).
-
-The last merge performed is cached as a hint for the subsequent request. If
-sequential data is being submitted, the hint is used to perform merges without
-any scanning. This is not sufficient when there are multiple processes doing
-I/O though, so a "merge hash" is used by some schedulers.
-
-iii. Merge hash
+ii. Merge hash
 AS and deadline use a hash table indexed by the last sector of a request. This
 enables merging code to quickly look up "back merge" candidates, even when
 multiple I/O streams are being performed at once on one disk.
@@ -1013,29 +1025,8 @@ multiple I/O streams are being performed at once on one disk.
 are far less common than "back merges" due to the nature of most I/O patterns.
 Front merges are handled by the binary trees in AS and deadline schedulers.
 
-iv. Handling barrier cases
-A request with flags REQ_HARDBARRIER or REQ_SOFTBARRIER must not be ordered
-around. That is, they must be processed after all older requests, and before
-any newer ones. This includes merges!
-
-In AS and deadline schedulers, barriers have the effect of flushing the reorder
-queue. The performance cost of this will vary from nothing to a lot depending
-on i/o patterns and device characteristics. Obviously they won't improve
-performance, so their use should be kept to a minimum.
-
-v. Handling insertion position directives
-A request may be inserted with a position directive. The directives are one of
-ELEVATOR_INSERT_BACK, ELEVATOR_INSERT_FRONT, ELEVATOR_INSERT_SORT.
-
-ELEVATOR_INSERT_SORT is a general directive for non-barrier requests.
-ELEVATOR_INSERT_BACK is used to insert a barrier to the back of the queue.
-ELEVATOR_INSERT_FRONT is used to insert a barrier to the front of the queue, and
-overrides the ordering requested by any previous barriers. In practice this is
-harmless and required, because it is used for SCSI requeueing. This does not
-require flushing the reorder queue, so does not impose a performance penalty.
-
-vi. Plugging the queue to batch requests in anticipation of opportunities for
-  merge/sort optimizations
+iii. Plugging the queue to batch requests in anticipation of opportunities for
+     merge/sort optimizations
 
 This is just the same as in 2.4 so far, though per-device unplugging
 support is anticipated for 2.5. Also with a priority-based i/o scheduler,
@@ -1069,7 +1060,7 @@ Aside:
   blk_kick_queue() to unplug a specific queue (right away ?)
   or optionally, all queues, is in the plan.
 
-4.3 I/O contexts
+4.4 I/O contexts
 I/O contexts provide a dynamically allocated per process data area. They may
 be used in I/O schedulers, and in the block layer (could be used for IO statis,
 priorities for example). See *io_context in drivers/block/ll_rw_blk.c, and

File diff suppressed because it is too large
+ 251 - 231
Documentation/kernel-parameters.txt


+ 3 - 2
Documentation/networking/bonding.txt

@@ -777,7 +777,7 @@ doing so is the same as described in the "Configuring Multiple Bonds
 Manually" section, below.
 
 	NOTE: It has been observed that some Red Hat supplied kernels
-are apparently unable to rename modules at load time (the "-obonding1"
+are apparently unable to rename modules at load time (the "-o bond1"
 part).  Attempts to pass that option to modprobe will produce an
 "Operation not permitted" error.  This has been reported on some
 Fedora Core kernels, and has been seen on RHEL 4 as well.  On kernels
@@ -883,7 +883,8 @@ the above does not work, and the second bonding instance never sees
 its options.  In that case, the second options line can be substituted
 as follows:
 
-install bonding1 /sbin/modprobe bonding -obond1 mode=balance-alb miimon=50
+install bond1 /sbin/modprobe --ignore-install bonding -o bond1 \
+	mode=balance-alb miimon=50
 
 	This may be repeated any number of times, specifying a new and
 unique name in place of bond1 for each subsequent instance.

+ 2 - 2
Makefile

@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 14
-EXTRAVERSION =-rc5
+EXTRAVERSION =
 NAME=Affluent Albatross
 
 # *DOCUMENTATION*
@@ -334,7 +334,7 @@ KALLSYMS	= scripts/kallsyms
 PERL		= perl
 CHECK		= sparse
 
-CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ $(CF)
+CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise $(CF)
 MODFLAGS	= -DMODULE
 CFLAGS_MODULE   = $(MODFLAGS)
 AFLAGS_MODULE   = $(MODFLAGS)

+ 1 - 1
arch/alpha/kernel/pci-noop.c

@@ -154,7 +154,7 @@ pci_dma_supported(struct pci_dev *hwdev, dma_addr_t mask)
 
 void *
 dma_alloc_coherent(struct device *dev, size_t size,
-		   dma_addr_t *dma_handle, int gfp)
+		   dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret;
 

+ 1 - 1
arch/alpha/kernel/pci_iommu.c

@@ -397,7 +397,7 @@ pci_alloc_consistent(struct pci_dev *pdev, size_t size, dma_addr_t *dma_addrp)
 {
 	void *cpu_addr;
 	long order = get_order(size);
-	int gfp = GFP_ATOMIC;
+	gfp_t gfp = GFP_ATOMIC;
 
 try_again:
 	cpu_addr = (void *)__get_free_pages(gfp, order);

+ 8 - 7
arch/arm/mach-integrator/impd1.c

@@ -67,7 +67,7 @@ static void impd1_setvco(struct clk *clk, struct icst525_vco vco)
 	}
 	writel(0, impd1->base + IMPD1_LOCK);
 
-#if DEBUG
+#ifdef DEBUG
 	vco.v = val & 0x1ff;
 	vco.r = (val >> 9) & 0x7f;
 	vco.s = (val >> 16) & 7;
@@ -427,17 +427,18 @@ static int impd1_probe(struct lm_device *dev)
 	return ret;
 }
 
+static int impd1_remove_one(struct device *dev, void *data)
+{
+	device_unregister(dev);
+	return 0;
+}
+
 static void impd1_remove(struct lm_device *dev)
 {
 	struct impd1_module *impd1 = lm_get_drvdata(dev);
-	struct list_head *l, *n;
 	int i;
 
-	list_for_each_safe(l, n, &dev->dev.children) {
-		struct device *d = list_to_dev(l);
-
-		device_unregister(d);
-	}
+	device_for_each_child(&dev->dev, NULL, impd1_remove_one);
 
 	for (i = 0; i < ARRAY_SIZE(impd1->vcos); i++)
 		clk_unregister(&impd1->vcos[i]);

+ 2 - 0
arch/arm/mach-pxa/corgi_lcd.c

@@ -488,6 +488,7 @@ static int is_pxafb_device(struct device * dev, void * data)
 
 unsigned long spitz_get_hsync_len(void)
 {
+#ifdef CONFIG_FB_PXA
 	if (!spitz_pxafb_dev) {
 		spitz_pxafb_dev = bus_find_device(&platform_bus_type, NULL, NULL, is_pxafb_device);
 		if (!spitz_pxafb_dev)
@@ -496,6 +497,7 @@ unsigned long spitz_get_hsync_len(void)
 	if (!get_hsync_time)
 		get_hsync_time = symbol_get(pxafb_get_hsync_time);
 	if (!get_hsync_time)
+#endif
 		return 0;
 
 	return pxafb_get_hsync_time(spitz_pxafb_dev);

+ 20 - 0
arch/arm/mach-pxa/generic.c

@@ -250,6 +250,25 @@ void __init pxa_set_i2c_info(struct i2c_pxa_platform_data *info)
 	i2c_device.dev.platform_data = info;
 }
 
+static struct resource i2s_resources[] = {
+	{
+		.start	= 0x40400000,
+		.end	= 0x40400083,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.start	= IRQ_I2S,
+		.end	= IRQ_I2S,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device i2s_device = {
+	.name		= "pxa2xx-i2s",
+	.id		= -1,
+	.resource	= i2c_resources,
+	.num_resources	= ARRAY_SIZE(i2s_resources),
+};
+
 static struct platform_device *devices[] __initdata = {
 	&pxamci_device,
 	&udc_device,
@@ -258,6 +277,7 @@ static struct platform_device *devices[] __initdata = {
 	&btuart_device,
 	&stuart_device,
 	&i2c_device,
+	&i2s_device,
 };
 
 static int __init pxa_init(void)

+ 3 - 3
arch/arm/mach-s3c2410/mach-bast.c

@@ -307,9 +307,9 @@ static void bast_nand_select(struct s3c2410_nand_set *set, int slot)
 }
 
 static struct s3c2410_platform_nand bast_nand_info = {
-	.tacls		= 40,
-	.twrph0		= 80,
-	.twrph1		= 80,
+	.tacls		= 30,
+	.twrph0		= 60,
+	.twrph1		= 60,
 	.nr_sets	= ARRAY_SIZE(bast_nand_sets),
 	.sets		= bast_nand_sets,
 	.select_chip	= bast_nand_select,

+ 4 - 4
arch/arm/mm/consistent.c

@@ -75,7 +75,7 @@ static struct vm_region consistent_head = {
 };
 
 static struct vm_region *
-vm_region_alloc(struct vm_region *head, size_t size, int gfp)
+vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
 {
 	unsigned long addr = head->vm_start, end = head->vm_end - size;
 	unsigned long flags;
@@ -133,7 +133,7 @@ static struct vm_region *vm_region_find(struct vm_region *head, unsigned long ad
 #endif
 
 static void *
-__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, int gfp,
+__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
 	    pgprot_t prot)
 {
 	struct page *page;
@@ -251,7 +251,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, int gfp,
  * virtual and bus address for that space.
  */
 void *
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, int gfp)
+dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
 {
 	return __dma_alloc(dev, size, handle, gfp,
 			   pgprot_noncached(pgprot_kernel));
@@ -263,7 +263,7 @@ EXPORT_SYMBOL(dma_alloc_coherent);
  * dma_alloc_coherent above.
  */
 void *
-dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, int gfp)
+dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
 {
 	return __dma_alloc(dev, size, handle, gfp,
 			   pgprot_writecombine(pgprot_kernel));

+ 1 - 1
arch/frv/mb93090-mb00/pci-dma-nommu.c

@@ -33,7 +33,7 @@ struct dma_alloc_record {
 static DEFINE_SPINLOCK(dma_alloc_lock);
 static LIST_HEAD(dma_alloc_list);
 
-void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, int gfp)
+void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t gfp)
 {
 	struct dma_alloc_record *new;
 	struct list_head *this = &dma_alloc_list;

+ 1 - 1
arch/frv/mb93090-mb00/pci-dma.c

@@ -17,7 +17,7 @@
 #include <linux/highmem.h>
 #include <asm/io.h>
 
-void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, int gfp)
+void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret;
 

+ 1 - 1
arch/frv/mm/dma-alloc.c

@@ -81,7 +81,7 @@ static int map_page(unsigned long va, unsigned long pa, pgprot_t prot)
  * portions of the kernel with single large page TLB entries, and
  * still get unique uncached pages for consistent DMA.
  */
-void *consistent_alloc(int gfp, size_t size, dma_addr_t *dma_handle)
+void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle)
 {
 	struct vm_struct *area;
 	unsigned long page, va, pa;

+ 19 - 11
arch/i386/kernel/cpu/cpufreq/powernow-k8.c

@@ -44,7 +44,7 @@
 
 #define PFX "powernow-k8: "
 #define BFX PFX "BIOS error: "
-#define VERSION "version 1.50.3"
+#define VERSION "version 1.50.4"
 #include "powernow-k8.h"
 
 /* serialize freq changes  */
@@ -111,8 +111,8 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
 	u32 i = 0;
 
 	do {
-		if (i++ > 0x1000000) {
-			printk(KERN_ERR PFX "detected change pending stuck\n");
+		if (i++ > 10000) {
+			dprintk("detected change pending stuck\n");
 			return 1;
 		}
 		rdmsr(MSR_FIDVID_STATUS, lo, hi);
@@ -159,6 +159,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
 {
 	u32 lo;
 	u32 savevid = data->currvid;
+	u32 i = 0;
 
 	if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) {
 		printk(KERN_ERR PFX "internal error - overflow on fid write\n");
@@ -170,10 +171,13 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid)
 	dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n",
 		fid, lo, data->plllock * PLL_LOCK_CONVERSION);
 
-	wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
+	do {
+		wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION);
+		if (i++ > 100) {
+			printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
+			return 1;
+		}			
+	} while (query_current_values_with_pending_wait(data));
 
 	count_off_irt(data);
 
@@ -197,6 +201,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
 {
 	u32 lo;
 	u32 savefid = data->currfid;
+	int i = 0;
 
 	if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) {
 		printk(KERN_ERR PFX "internal error - overflow on vid write\n");
@@ -208,10 +213,13 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid)
 	dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n",
 		vid, lo, STOP_GRANT_5NS);
 
-	wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
-
-	if (query_current_values_with_pending_wait(data))
-		return 1;
+	do {
+		wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS);
+                if (i++ > 100) {
+                        printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n");
+                        return 1;
+                }
+	} while (query_current_values_with_pending_wait(data));
 
 	if (savefid != data->currfid) {
 		printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n",

+ 1 - 1
arch/ia64/hp/common/hwsw_iommu.c

@@ -71,7 +71,7 @@ hwsw_init (void)
 }
 
 void *
-hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags)
+hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
 {
 	if (use_swiotlb(dev))
 		return swiotlb_alloc_coherent(dev, size, dma_handle, flags);

+ 1 - 1
arch/ia64/hp/common/sba_iommu.c

@@ -1076,7 +1076,7 @@ void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
  * See Documentation/DMA-mapping.txt
  */
 void *
-sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags)
+sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
 {
 	struct ioc *ioc;
 	void *addr;

+ 1 - 1
arch/ia64/lib/swiotlb.c

@@ -314,7 +314,7 @@ sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 
 void *
 swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-		       dma_addr_t *dma_handle, int flags)
+		       dma_addr_t *dma_handle, gfp_t flags)
 {
 	unsigned long dev_addr;
 	void *ret;

+ 1 - 1
arch/ia64/sn/kernel/xpc.h

@@ -939,7 +939,7 @@ xpc_map_bte_errors(bte_result_t error)
 
 
 static inline void *
-xpc_kmalloc_cacheline_aligned(size_t size, int flags, void **base)
+xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
 {
 	/* see if kmalloc will give us cachline aligned memory by default */
 	*base = kmalloc(size, flags);

+ 1 - 1
arch/ia64/sn/pci/pci_dma.c

@@ -75,7 +75,7 @@ EXPORT_SYMBOL(sn_dma_set_mask);
  * more information.
  */
 void *sn_dma_alloc_coherent(struct device *dev, size_t size,
-			    dma_addr_t * dma_handle, int flags)
+			    dma_addr_t * dma_handle, gfp_t flags)
 {
 	void *cpuaddr;
 	unsigned long phys_addr;

+ 2 - 2
arch/mips/mm/dma-coherent.c

@@ -18,7 +18,7 @@
 #include <asm/io.h>
 
 void *dma_alloc_noncoherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 {
 	void *ret;
 	/* ignore region specifiers */
@@ -39,7 +39,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
 EXPORT_SYMBOL(dma_alloc_noncoherent);
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 	__attribute__((alias("dma_alloc_noncoherent")));
 
 EXPORT_SYMBOL(dma_alloc_coherent);

+ 2 - 2
arch/mips/mm/dma-ip27.c

@@ -22,7 +22,7 @@
 	pdev_to_baddr(to_pci_dev(dev), (addr))
 
 void *dma_alloc_noncoherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 {
 	void *ret;
 
@@ -44,7 +44,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
 EXPORT_SYMBOL(dma_alloc_noncoherent);
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 	__attribute__((alias("dma_alloc_noncoherent")));
 
 EXPORT_SYMBOL(dma_alloc_coherent);

+ 2 - 2
arch/mips/mm/dma-ip32.c

@@ -37,7 +37,7 @@
 #define RAM_OFFSET_MASK	0x3fffffff
 
 void *dma_alloc_noncoherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 {
 	void *ret;
 	/* ignore region specifiers */
@@ -61,7 +61,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
 EXPORT_SYMBOL(dma_alloc_noncoherent);
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 {
 	void *ret;
 

+ 2 - 2
arch/mips/mm/dma-noncoherent.c

@@ -24,7 +24,7 @@
  */
 
 void *dma_alloc_noncoherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 {
 	void *ret;
 	/* ignore region specifiers */
@@ -45,7 +45,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
 EXPORT_SYMBOL(dma_alloc_noncoherent);
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
-	dma_addr_t * dma_handle, int gfp)
+	dma_addr_t * dma_handle, gfp_t gfp)
 {
 	void *ret;
 

+ 3 - 3
arch/parisc/kernel/pci-dma.c

@@ -349,7 +349,7 @@ pcxl_dma_init(void)
 
 __initcall(pcxl_dma_init);
 
-static void * pa11_dma_alloc_consistent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flag)
+static void * pa11_dma_alloc_consistent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag)
 {
 	unsigned long vaddr;
 	unsigned long paddr;
@@ -502,13 +502,13 @@ struct hppa_dma_ops pcxl_dma_ops = {
 };
 
 static void *fail_alloc_consistent(struct device *dev, size_t size,
-				   dma_addr_t *dma_handle, int flag)
+				   dma_addr_t *dma_handle, gfp_t flag)
 {
 	return NULL;
 }
 
 static void *pa11_dma_alloc_noncoherent(struct device *dev, size_t size,
-					  dma_addr_t *dma_handle, int flag)
+					  dma_addr_t *dma_handle, gfp_t flag)
 {
 	void *addr = NULL;
 

+ 1 - 1
arch/ppc/8xx_io/cs4218.h

@@ -78,7 +78,7 @@ typedef struct {
     const char *name2;
     void (*open)(void);
     void (*release)(void);
-    void *(*dma_alloc)(unsigned int, int);
+    void *(*dma_alloc)(unsigned int, gfp_t);
     void (*dma_free)(void *, unsigned int);
     int (*irqinit)(void);
 #ifdef MODULE

+ 2 - 2
arch/ppc/8xx_io/cs4218_tdm.c

@@ -318,7 +318,7 @@ struct cs_sound_settings {
 
 static struct cs_sound_settings sound;
 
-static void *CS_Alloc(unsigned int size, int flags);
+static void *CS_Alloc(unsigned int size, gfp_t flags);
 static void CS_Free(void *ptr, unsigned int size);
 static int CS_IrqInit(void);
 #ifdef MODULE
@@ -959,7 +959,7 @@ static TRANS transCSNormalRead = {
 
 /*** Low level stuff *********************************************************/
 
-static void *CS_Alloc(unsigned int size, int flags)
+static void *CS_Alloc(unsigned int size, gfp_t flags)
 {
 	int	order;
 

+ 2 - 2
arch/ppc/kernel/dma-mapping.c

@@ -115,7 +115,7 @@ static struct vm_region consistent_head = {
 };
 
 static struct vm_region *
-vm_region_alloc(struct vm_region *head, size_t size, int gfp)
+vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
 {
 	unsigned long addr = head->vm_start, end = head->vm_end - size;
 	unsigned long flags;
@@ -173,7 +173,7 @@ static struct vm_region *vm_region_find(struct vm_region *head, unsigned long ad
  * virtual and bus address for that space.
  */
 void *
-__dma_alloc_coherent(size_t size, dma_addr_t *handle, int gfp)
+__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
 {
 	struct page *page;
 	struct vm_region *c;

+ 2 - 2
arch/ppc/mm/pgtable.c

@@ -114,9 +114,9 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	struct page *ptepage;
 
 #ifdef CONFIG_HIGHPTE
-	int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
+	gfp_t flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT;
 #else
-	int flags = GFP_KERNEL | __GFP_REPEAT;
+	gfp_t flags = GFP_KERNEL | __GFP_REPEAT;
 #endif
 
 	ptepage = alloc_pages(flags, 0);

+ 1 - 1
arch/ppc64/kernel/iSeries_htab.c

@@ -66,7 +66,7 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va,
 	}
 
 	if (slot < 0) {		/* MSB set means secondary group */
-		vflags |= HPTE_V_VALID;
+		vflags |= HPTE_V_SECONDARY;
 		secondary = 1;
 		slot &= 0x7fffffffffffffff;
 	}

+ 2 - 2
arch/ppc64/kernel/mpic.c

@@ -506,8 +506,8 @@ struct mpic * __init mpic_alloc(unsigned long phys_addr,
 	mpic->senses_count = senses_count;
 
 	/* Map the global registers */
-	mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x1000);
-	mpic->tmregs = mpic->gregs + (MPIC_TIMER_BASE >> 2);
+	mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x2000);
+	mpic->tmregs = mpic->gregs + ((MPIC_TIMER_BASE - MPIC_GREG_BASE) >> 2);
 	BUG_ON(mpic->gregs == NULL);
 
 	/* Reset */

+ 1 - 1
arch/ppc64/kernel/time.c

@@ -870,7 +870,7 @@ void div128_by_32( unsigned long dividend_high, unsigned long dividend_low,
 	rb = ((ra + b) - (x * divisor)) << 32;
 
 	y = (rb + c)/divisor;
-	rc = ((rb + b) - (y * divisor)) << 32;
+	rc = ((rb + c) - (y * divisor)) << 32;
 
 	z = (rc + d)/divisor;
 

+ 1 - 2
arch/ppc64/mm/init.c

@@ -799,8 +799,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea,
 	if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp))
 		local = 1;
 
-	__hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep,
-		    0x300, local);
+	__hash_page(ea, 0, vsid, ptep, 0x300, local);
 	local_irq_restore(flags);
 }
 

+ 1 - 1
arch/sh/boards/renesas/rts7751r2d/mach.c

@@ -23,7 +23,7 @@ extern void init_rts7751r2d_IRQ(void);
 extern void *rts7751r2d_ioremap(unsigned long, unsigned long);
 extern int rts7751r2d_irq_demux(int irq);
 
-extern void *voyagergx_consistent_alloc(struct device *, size_t, dma_addr_t *, int);
+extern void *voyagergx_consistent_alloc(struct device *, size_t, dma_addr_t *, gfp_t);
 extern int voyagergx_consistent_free(struct device *, size_t, void *, dma_addr_t);
 
 /*

+ 1 - 1
arch/sh/cchips/voyagergx/consistent.c

@@ -31,7 +31,7 @@ static LIST_HEAD(voya_alloc_list);
 #define OHCI_SRAM_SIZE	0x10000
 
 void *voyagergx_consistent_alloc(struct device *dev, size_t size,
-				 dma_addr_t *handle, int flag)
+				 dma_addr_t *handle, gfp_t flag)
 {
 	struct list_head *list = &voya_alloc_list;
 	struct voya_alloc_entry *entry;

+ 1 - 1
arch/sh/drivers/pci/dma-dreamcast.c

@@ -33,7 +33,7 @@
 static int gapspci_dma_used = 0;
 
 void *dreamcast_consistent_alloc(struct device *dev, size_t size,
-				 dma_addr_t *dma_handle, int flag)
+				 dma_addr_t *dma_handle, gfp_t flag)
 {
 	unsigned long buf;
 

+ 1 - 1
arch/sh/mm/consistent.c

@@ -11,7 +11,7 @@
 #include <linux/dma-mapping.h>
 #include <asm/io.h>
 
-void *consistent_alloc(int gfp, size_t size, dma_addr_t *handle)
+void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle)
 {
 	struct page *page, *end, *free;
 	void *ret;

+ 1 - 1
arch/sparc64/solaris/socksys.c

@@ -49,7 +49,7 @@ IPPROTO_EGP, IPPROTO_PUP, IPPROTO_UDP, IPPROTO_IDP, IPPROTO_RAW,
 
 #else
 
-extern void * mykmalloc(size_t s, int gfp);
+extern void * mykmalloc(size_t s, gfp_t gfp);
 extern void mykfree(void *);
 
 #endif

+ 1 - 1
arch/sparc64/solaris/timod.c

@@ -39,7 +39,7 @@ static char * page = NULL ;
 
 #else
 
-void * mykmalloc(size_t s, int gfp)
+void * mykmalloc(size_t s, gfp_t gfp)
 {
 	static char * page;
 	static size_t free;

+ 1 - 1
arch/um/include/sysdep-i386/thread.h

@@ -4,7 +4,7 @@
 #include <kern_constants.h>
 
 #define TASK_DEBUGREGS(task) ((unsigned long *) &(((char *) (task))[HOST_TASK_DEBUGREGS]))
-#ifdef CONFIG_MODE_TT
+#ifdef UML_CONFIG_MODE_TT
 #define TASK_EXTERN_PID(task) *((int *) &(((char *) (task))[HOST_TASK_EXTERN_PID]))
 #endif
 

+ 1 - 1
arch/um/include/sysdep-x86_64/thread.h

@@ -3,7 +3,7 @@
 
 #include <kern_constants.h>
 
-#ifdef CONFIG_MODE_TT
+#ifdef UML_CONFIG_MODE_TT
 #define TASK_EXTERN_PID(task) *((int *) &(((char *) (task))[HOST_TASK_EXTERN_PID]))
 #endif
 

+ 1 - 1
arch/um/kernel/mem.c

@@ -252,7 +252,7 @@ void paging_init(void)
 #endif
 }
 
-struct page *arch_validate(struct page *page, int mask, int order)
+struct page *arch_validate(struct page *page, gfp_t mask, int order)
 {
 	unsigned long addr, zero = 0;
 	int i;

+ 1 - 1
arch/um/kernel/process_kern.c

@@ -80,7 +80,7 @@ void free_stack(unsigned long stack, int order)
 unsigned long alloc_stack(int order, int atomic)
 {
 	unsigned long page;
-	int flags = GFP_KERNEL;
+	gfp_t flags = GFP_KERNEL;
 
 	if (atomic)
 		flags = GFP_ATOMIC;

+ 2 - 2
arch/x86_64/kernel/pci-gart.c

@@ -187,7 +187,7 @@ static void flush_gart(struct device *dev)
 
 /* Allocate DMA memory on node near device */
 noinline
-static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order)
+static void *dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
 {
 	struct page *page;
 	int node;
@@ -204,7 +204,7 @@ static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order)
  */
 void *
 dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		   unsigned gfp)
+		   gfp_t gfp)
 {
 	void *memory;
 	unsigned long dma_mask = 0;

+ 1 - 1
arch/x86_64/kernel/pci-nommu.c

@@ -24,7 +24,7 @@ EXPORT_SYMBOL(iommu_sac_force);
  */
 
 void *dma_alloc_coherent(struct device *hwdev, size_t size,
-			 dma_addr_t *dma_handle, unsigned gfp)
+			 dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret;
 	u64 mask;

+ 1 - 1
arch/xtensa/kernel/pci-dma.c

@@ -29,7 +29,7 @@
  */
 
 void *
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, int gfp)
+dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
 {
 	void *ret;
 

+ 90 - 237
drivers/block/as-iosched.c

@@ -98,7 +98,6 @@ struct as_data {
 
 	struct as_rq *next_arq[2];	/* next in sort order */
 	sector_t last_sector[2];	/* last REQ_SYNC & REQ_ASYNC sectors */
-	struct list_head *dispatch;	/* driver dispatch queue */
 	struct list_head *hash;		/* request hash */
 
 	unsigned long exit_prob;	/* probability a task will exit while
@@ -239,6 +238,25 @@ static struct io_context *as_get_io_context(void)
 	return ioc;
 }
 
+static void as_put_io_context(struct as_rq *arq)
+{
+	struct as_io_context *aic;
+
+	if (unlikely(!arq->io_context))
+		return;
+
+	aic = arq->io_context->aic;
+
+	if (arq->is_sync == REQ_SYNC && aic) {
+		spin_lock(&aic->lock);
+		set_bit(AS_TASK_IORUNNING, &aic->state);
+		aic->last_end_request = jiffies;
+		spin_unlock(&aic->lock);
+	}
+
+	put_io_context(arq->io_context);
+}
+
 /*
  * the back merge hash support functions
  */
@@ -261,14 +279,6 @@ static inline void as_del_arq_hash(struct as_rq *arq)
 		__as_del_arq_hash(arq);
 }
 
-static void as_remove_merge_hints(request_queue_t *q, struct as_rq *arq)
-{
-	as_del_arq_hash(arq);
-
-	if (q->last_merge == arq->request)
-		q->last_merge = NULL;
-}
-
 static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
 {
 	struct request *rq = arq->request;
@@ -312,7 +322,7 @@ static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
 		BUG_ON(!arq->on_hash);
 
 		if (!rq_mergeable(__rq)) {
-			as_remove_merge_hints(ad->q, arq);
+			as_del_arq_hash(arq);
 			continue;
 		}
 
@@ -950,23 +960,12 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
 
 	WARN_ON(!list_empty(&rq->queuelist));
 
-	if (arq->state == AS_RQ_PRESCHED) {
-		WARN_ON(arq->io_context);
-		goto out;
-	}
-
-	if (arq->state == AS_RQ_MERGED)
-		goto out_ioc;
-
 	if (arq->state != AS_RQ_REMOVED) {
 		printk("arq->state %d\n", arq->state);
 		WARN_ON(1);
 		goto out;
 	}
 
-	if (!blk_fs_request(rq))
-		goto out;
-
 	if (ad->changed_batch && ad->nr_dispatched == 1) {
 		kblockd_schedule_work(&ad->antic_work);
 		ad->changed_batch = 0;
@@ -1001,21 +1000,7 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
 		}
 	}
 
-out_ioc:
-	if (!arq->io_context)
-		goto out;
-
-	if (arq->is_sync == REQ_SYNC) {
-		struct as_io_context *aic = arq->io_context->aic;
-		if (aic) {
-			spin_lock(&aic->lock);
-			set_bit(AS_TASK_IORUNNING, &aic->state);
-			aic->last_end_request = jiffies;
-			spin_unlock(&aic->lock);
-		}
-	}
-
-	put_io_context(arq->io_context);
+	as_put_io_context(arq);
 out:
 	arq->state = AS_RQ_POSTSCHED;
 }
@@ -1047,72 +1032,10 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
 		ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
 
 	list_del_init(&arq->fifo);
-	as_remove_merge_hints(q, arq);
+	as_del_arq_hash(arq);
 	as_del_arq_rb(ad, arq);
 }
 
-/*
- * as_remove_dispatched_request is called to remove a request which has gone
- * to the dispatch list.
- */
-static void as_remove_dispatched_request(request_queue_t *q, struct request *rq)
-{
-	struct as_rq *arq = RQ_DATA(rq);
-	struct as_io_context *aic;
-
-	if (!arq) {
-		WARN_ON(1);
-		return;
-	}
-
-	WARN_ON(arq->state != AS_RQ_DISPATCHED);
-	WARN_ON(ON_RB(&arq->rb_node));
-	if (arq->io_context && arq->io_context->aic) {
-		aic = arq->io_context->aic;
-		if (aic) {
-			WARN_ON(!atomic_read(&aic->nr_dispatched));
-			atomic_dec(&aic->nr_dispatched);
-		}
-	}
-}
-
-/*
- * as_remove_request is called when a driver has finished with a request.
- * This should be only called for dispatched requests, but for some reason
- * a POWER4 box running hwscan it does not.
- */
-static void as_remove_request(request_queue_t *q, struct request *rq)
-{
-	struct as_rq *arq = RQ_DATA(rq);
-
-	if (unlikely(arq->state == AS_RQ_NEW))
-		goto out;
-
-	if (ON_RB(&arq->rb_node)) {
-		if (arq->state != AS_RQ_QUEUED) {
-			printk("arq->state %d\n", arq->state);
-			WARN_ON(1);
-			goto out;
-		}
-		/*
-		 * We'll lose the aliased request(s) here. I don't think this
-		 * will ever happen, but if it does, hopefully someone will
-		 * report it.
-		 */
-		WARN_ON(!list_empty(&rq->queuelist));
-		as_remove_queued_request(q, rq);
-	} else {
-		if (arq->state != AS_RQ_DISPATCHED) {
-			printk("arq->state %d\n", arq->state);
-			WARN_ON(1);
-			goto out;
-		}
-		as_remove_dispatched_request(q, rq);
-	}
-out:
-	arq->state = AS_RQ_REMOVED;
-}
-
 /*
  * as_fifo_expired returns 0 if there are no expired reads on the fifo,
  * 1 otherwise.  It is ratelimited so that we only perform the check once per
@@ -1165,7 +1088,6 @@ static inline int as_batch_expired(struct as_data *ad)
 static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 {
 	struct request *rq = arq->request;
-	struct list_head *insert;
 	const int data_dir = arq->is_sync;
 
 	BUG_ON(!ON_RB(&arq->rb_node));
@@ -1198,13 +1120,13 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 	/*
 	 * take it off the sort and fifo list, add to dispatch queue
 	 */
-	insert = ad->dispatch->prev;
-
 	while (!list_empty(&rq->queuelist)) {
 		struct request *__rq = list_entry_rq(rq->queuelist.next);
 		struct as_rq *__arq = RQ_DATA(__rq);
 
-		list_move_tail(&__rq->queuelist, ad->dispatch);
+		list_del(&__rq->queuelist);
+
+		elv_dispatch_add_tail(ad->q, __rq);
 
 		if (__arq->io_context && __arq->io_context->aic)
 			atomic_inc(&__arq->io_context->aic->nr_dispatched);
@@ -1218,7 +1140,8 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
 	as_remove_queued_request(ad->q, rq);
 	WARN_ON(arq->state != AS_RQ_QUEUED);
 
-	list_add(&rq->queuelist, insert);
+	elv_dispatch_sort(ad->q, rq);
+
 	arq->state = AS_RQ_DISPATCHED;
 	if (arq->io_context && arq->io_context->aic)
 		atomic_inc(&arq->io_context->aic->nr_dispatched);
@@ -1230,12 +1153,42 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
  * read/write expire, batch expire, etc, and moves it to the dispatch
  * queue. Returns 1 if a request was found, 0 otherwise.
  */
-static int as_dispatch_request(struct as_data *ad)
+static int as_dispatch_request(request_queue_t *q, int force)
 {
+	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq;
 	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
 	const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
 
+	if (unlikely(force)) {
+		/*
+		 * Forced dispatch, accounting is useless.  Reset
+		 * accounting states and dump fifo_lists.  Note that
+		 * batch_data_dir is reset to REQ_SYNC to avoid
+		 * screwing write batch accounting as write batch
+		 * accounting occurs on W->R transition.
+		 */
+		int dispatched = 0;
+
+		ad->batch_data_dir = REQ_SYNC;
+		ad->changed_batch = 0;
+		ad->new_batch = 0;
+
+		while (ad->next_arq[REQ_SYNC]) {
+			as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
+			dispatched++;
+		}
+		ad->last_check_fifo[REQ_SYNC] = jiffies;
+
+		while (ad->next_arq[REQ_ASYNC]) {
+			as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
+			dispatched++;
+		}
+		ad->last_check_fifo[REQ_ASYNC] = jiffies;
+
+		return dispatched;
+	}
+
 	/* Signal that the write batch was uncontended, so we can't time it */
 	if (ad->batch_data_dir == REQ_ASYNC && !reads) {
 		if (ad->current_write_count == 0 || !writes)
@@ -1359,20 +1312,6 @@ fifo_expired:
 	return 1;
 }
 
-static struct request *as_next_request(request_queue_t *q)
-{
-	struct as_data *ad = q->elevator->elevator_data;
-	struct request *rq = NULL;
-
-	/*
-	 * if there are still requests on the dispatch queue, grab the first
-	 */
-	if (!list_empty(ad->dispatch) || as_dispatch_request(ad))
-		rq = list_entry_rq(ad->dispatch->next);
-
-	return rq;
-}
-
 /*
  * Add arq to a list behind alias
  */
@@ -1404,17 +1343,25 @@ as_add_aliased_request(struct as_data *ad, struct as_rq *arq, struct as_rq *alia
 	/*
 	 * Don't want to have to handle merges.
 	 */
-	as_remove_merge_hints(ad->q, arq);
+	as_del_arq_hash(arq);
 }
 
 /*
  * add arq to rbtree and fifo
  */
-static void as_add_request(struct as_data *ad, struct as_rq *arq)
+static void as_add_request(request_queue_t *q, struct request *rq)
 {
+	struct as_data *ad = q->elevator->elevator_data;
+	struct as_rq *arq = RQ_DATA(rq);
 	struct as_rq *alias;
 	int data_dir;
 
+	if (arq->state != AS_RQ_PRESCHED) {
+		printk("arq->state: %d\n", arq->state);
+		WARN_ON(1);
+	}
+	arq->state = AS_RQ_NEW;
+
 	if (rq_data_dir(arq->request) == READ
 			|| current->flags&PF_SYNCWRITE)
 		arq->is_sync = 1;
@@ -1437,12 +1384,8 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
 		arq->expires = jiffies + ad->fifo_expire[data_dir];
 		list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
 
-		if (rq_mergeable(arq->request)) {
+		if (rq_mergeable(arq->request))
 			as_add_arq_hash(ad, arq);
-
-			if (!ad->q->last_merge)
-				ad->q->last_merge = arq->request;
-		}
 		as_update_arq(ad, arq); /* keep state machine up to date */
 
 	} else {
@@ -1463,96 +1406,24 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
 	arq->state = AS_RQ_QUEUED;
 }
 
-static void as_deactivate_request(request_queue_t *q, struct request *rq)
+static void as_activate_request(request_queue_t *q, struct request *rq)
 {
-	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = RQ_DATA(rq);
 
-	if (arq) {
-		if (arq->state == AS_RQ_REMOVED) {
-			arq->state = AS_RQ_DISPATCHED;
-			if (arq->io_context && arq->io_context->aic)
-				atomic_inc(&arq->io_context->aic->nr_dispatched);
-		}
-	} else
-		WARN_ON(blk_fs_request(rq)
-			&& (!(rq->flags & (REQ_HARDBARRIER|REQ_SOFTBARRIER))) );
-
-	/* Stop anticipating - let this request get through */
-	as_antic_stop(ad);
-}
-
-/*
- * requeue the request. The request has not been completed, nor is it a
- * new request, so don't touch accounting.
- */
-static void as_requeue_request(request_queue_t *q, struct request *rq)
-{
-	as_deactivate_request(q, rq);
-	list_add(&rq->queuelist, &q->queue_head);
-}
-
-/*
- * Account a request that is inserted directly onto the dispatch queue.
- * arq->io_context->aic->nr_dispatched should not need to be incremented
- * because only new requests should come through here: requeues go through
- * our explicit requeue handler.
- */
-static void as_account_queued_request(struct as_data *ad, struct request *rq)
-{
-	if (blk_fs_request(rq)) {
-		struct as_rq *arq = RQ_DATA(rq);
-		arq->state = AS_RQ_DISPATCHED;
-		ad->nr_dispatched++;
-	}
+	WARN_ON(arq->state != AS_RQ_DISPATCHED);
+	arq->state = AS_RQ_REMOVED;
+	if (arq->io_context && arq->io_context->aic)
+		atomic_dec(&arq->io_context->aic->nr_dispatched);
 }
 
-static void
-as_insert_request(request_queue_t *q, struct request *rq, int where)
+static void as_deactivate_request(request_queue_t *q, struct request *rq)
 {
-	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = RQ_DATA(rq);
 
-	if (arq) {
-		if (arq->state != AS_RQ_PRESCHED) {
-			printk("arq->state: %d\n", arq->state);
-			WARN_ON(1);
-		}
-		arq->state = AS_RQ_NEW;
-	}
-
-	/* barriers must flush the reorder queue */
-	if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
-			&& where == ELEVATOR_INSERT_SORT)) {
-		WARN_ON(1);
-		where = ELEVATOR_INSERT_BACK;
-	}
-
-	switch (where) {
-		case ELEVATOR_INSERT_BACK:
-			while (ad->next_arq[REQ_SYNC])
-				as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
-
-			while (ad->next_arq[REQ_ASYNC])
-				as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
-
-			list_add_tail(&rq->queuelist, ad->dispatch);
-			as_account_queued_request(ad, rq);
-			as_antic_stop(ad);
-			break;
-		case ELEVATOR_INSERT_FRONT:
-			list_add(&rq->queuelist, ad->dispatch);
-			as_account_queued_request(ad, rq);
-			as_antic_stop(ad);
-			break;
-		case ELEVATOR_INSERT_SORT:
-			BUG_ON(!blk_fs_request(rq));
-			as_add_request(ad, arq);
-			break;
-		default:
-			BUG();
-			return;
-	}
+	WARN_ON(arq->state != AS_RQ_REMOVED);
+	arq->state = AS_RQ_DISPATCHED;
+	if (arq->io_context && arq->io_context->aic)
+		atomic_inc(&arq->io_context->aic->nr_dispatched);
 }
 
 /*
@@ -1565,12 +1436,8 @@ static int as_queue_empty(request_queue_t *q)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 
-	if (!list_empty(&ad->fifo_list[REQ_ASYNC])
-		|| !list_empty(&ad->fifo_list[REQ_SYNC])
-		|| !list_empty(ad->dispatch))
-			return 0;
-
-	return 1;
+	return list_empty(&ad->fifo_list[REQ_ASYNC])
+		&& list_empty(&ad->fifo_list[REQ_SYNC]);
 }
 
 static struct request *
@@ -1607,15 +1474,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct request *__rq;
 	int ret;
 
-	/*
-	 * try last_merge to avoid going to hash
-	 */
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE) {
-		__rq = q->last_merge;
-		goto out_insert;
-	}
-
 	/*
 	 * see if the merge hash can satisfy a back merge
 	 */
@@ -1644,9 +1502,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	if (rq_mergeable(__rq))
-		q->last_merge = __rq;
-out_insert:
 	if (ret) {
 		if (rq_mergeable(__rq))
 			as_hot_arq_hash(ad, RQ_DATA(__rq));
@@ -1693,9 +1548,6 @@ static void as_merged_request(request_queue_t *q, struct request *req)
 		 * behind the disk head. We currently don't bother adjusting.
 		 */
 	}
-
-	if (arq->on_hash)
-		q->last_merge = req;
 }
 
 static void
@@ -1763,6 +1615,7 @@ as_merged_requests(request_queue_t *q, struct request *req,
 	 * kill knowledge of next, this one is a goner
 	 */
 	as_remove_queued_request(q, next);
+	as_put_io_context(anext);
 
 	anext->state = AS_RQ_MERGED;
 }
@@ -1782,7 +1635,7 @@ static void as_work_handler(void *data)
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (as_next_request(q))
+	if (!as_queue_empty(q))
 		q->request_fn(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
@@ -1797,7 +1650,9 @@ static void as_put_request(request_queue_t *q, struct request *rq)
 		return;
 	}
 
-	if (arq->state != AS_RQ_POSTSCHED && arq->state != AS_RQ_PRESCHED) {
+	if (unlikely(arq->state != AS_RQ_POSTSCHED &&
+		     arq->state != AS_RQ_PRESCHED &&
+		     arq->state != AS_RQ_MERGED)) {
 		printk("arq->state %d\n", arq->state);
 		WARN_ON(1);
 	}
@@ -1807,7 +1662,7 @@ static void as_put_request(request_queue_t *q, struct request *rq)
 }
 
 static int as_set_request(request_queue_t *q, struct request *rq,
-			  struct bio *bio, int gfp_mask)
+			  struct bio *bio, gfp_t gfp_mask)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 	struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask);
@@ -1907,7 +1762,6 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
 	ad->sort_list[REQ_SYNC] = RB_ROOT;
 	ad->sort_list[REQ_ASYNC] = RB_ROOT;
-	ad->dispatch = &q->queue_head;
 	ad->fifo_expire[REQ_SYNC] = default_read_expire;
 	ad->fifo_expire[REQ_ASYNC] = default_write_expire;
 	ad->antic_expire = default_antic_expire;
@@ -2072,10 +1926,9 @@ static struct elevator_type iosched_as = {
 		.elevator_merge_fn = 		as_merge,
 		.elevator_merged_fn =		as_merged_request,
 		.elevator_merge_req_fn =	as_merged_requests,
-		.elevator_next_req_fn =		as_next_request,
-		.elevator_add_req_fn =		as_insert_request,
-		.elevator_remove_req_fn =	as_remove_request,
-		.elevator_requeue_req_fn = 	as_requeue_request,
+		.elevator_dispatch_fn =		as_dispatch_request,
+		.elevator_add_req_fn =		as_add_request,
+		.elevator_activate_req_fn =	as_activate_request,
 		.elevator_deactivate_req_fn = 	as_deactivate_request,
 		.elevator_queue_empty_fn =	as_queue_empty,
 		.elevator_completed_req_fn =	as_completed_request,

+ 85 - 287
drivers/block/cfq-iosched.c

@@ -84,7 +84,6 @@ static int cfq_max_depth = 2;
 	(node)->rb_left = NULL;		\
 } while (0)
 #define RB_CLEAR_ROOT(root)	((root)->rb_node = NULL)
-#define ON_RB(node)		((node)->rb_color != RB_NONE)
 #define rb_entry_crq(node)	rb_entry((node), struct cfq_rq, rb_node)
 #define rq_rb_key(rq)		(rq)->sector
 
@@ -271,10 +270,7 @@ CFQ_CFQQ_FNS(expired);
 #undef CFQ_CFQQ_FNS
 
 enum cfq_rq_state_flags {
-	CFQ_CRQ_FLAG_in_flight = 0,
-	CFQ_CRQ_FLAG_in_driver,
-	CFQ_CRQ_FLAG_is_sync,
-	CFQ_CRQ_FLAG_requeued,
+	CFQ_CRQ_FLAG_is_sync = 0,
 };
 
 #define CFQ_CRQ_FNS(name)						\
@@ -291,14 +287,11 @@ static inline int cfq_crq_##name(const struct cfq_rq *crq)		\
 	return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0;	\
 }
 
-CFQ_CRQ_FNS(in_flight);
-CFQ_CRQ_FNS(in_driver);
 CFQ_CRQ_FNS(is_sync);
-CFQ_CRQ_FNS(requeued);
 #undef CFQ_CRQ_FNS
 
 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
-static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
+static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
 static void cfq_put_cfqd(struct cfq_data *cfqd);
 
 #define process_sync(tsk)	((tsk)->flags & PF_SYNCWRITE)
@@ -311,14 +304,6 @@ static inline void cfq_del_crq_hash(struct cfq_rq *crq)
 	hlist_del_init(&crq->hash);
 }
 
-static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
-{
-	cfq_del_crq_hash(crq);
-
-	if (q->last_merge == crq->request)
-		q->last_merge = NULL;
-}
-
 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
 {
 	const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
@@ -347,18 +332,13 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
 	return NULL;
 }
 
-static inline int cfq_pending_requests(struct cfq_data *cfqd)
-{
-	return !list_empty(&cfqd->queue->queue_head) || cfqd->busy_queues;
-}
-
 /*
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
  */
 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 {
-	if (!cfqd->rq_in_driver && cfq_pending_requests(cfqd))
+	if (!cfqd->rq_in_driver && cfqd->busy_queues)
 		kblockd_schedule_work(&cfqd->unplug_work);
 }
 
@@ -366,7 +346,7 @@ static int cfq_queue_empty(request_queue_t *q)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 
-	return !cfq_pending_requests(cfqd);
+	return !cfqd->busy_queues;
 }
 
 /*
@@ -386,11 +366,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
 	if (crq2 == NULL)
 		return crq1;
 
-	if (cfq_crq_requeued(crq1) && !cfq_crq_requeued(crq2))
-		return crq1;
-	else if (cfq_crq_requeued(crq2) && !cfq_crq_requeued(crq1))
-		return crq2;
-
 	if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
 		return crq1;
 	else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
@@ -461,10 +436,7 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
 	struct rb_node *rbnext, *rbprev;
 
-	rbnext = NULL;
-	if (ON_RB(&last->rb_node))
-		rbnext = rb_next(&last->rb_node);
-	if (!rbnext) {
+	if (!(rbnext = rb_next(&last->rb_node))) {
 		rbnext = rb_first(&cfqq->sort_list);
 		if (rbnext == &last->rb_node)
 			rbnext = NULL;
@@ -545,13 +517,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
  * the pending list according to last request service
  */
 static inline void
-cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue)
+cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	BUG_ON(cfq_cfqq_on_rr(cfqq));
 	cfq_mark_cfqq_on_rr(cfqq);
 	cfqd->busy_queues++;
 
-	cfq_resort_rr_list(cfqq, requeue);
+	cfq_resort_rr_list(cfqq, 0);
 }
 
 static inline void
@@ -571,22 +543,19 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static inline void cfq_del_crq_rb(struct cfq_rq *crq)
 {
 	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct cfq_data *cfqd = cfqq->cfqd;
+	const int sync = cfq_crq_is_sync(crq);
 
-	if (ON_RB(&crq->rb_node)) {
-		struct cfq_data *cfqd = cfqq->cfqd;
-		const int sync = cfq_crq_is_sync(crq);
+	BUG_ON(!cfqq->queued[sync]);
+	cfqq->queued[sync]--;
 
-		BUG_ON(!cfqq->queued[sync]);
-		cfqq->queued[sync]--;
+	cfq_update_next_crq(crq);
 
-		cfq_update_next_crq(crq);
+	rb_erase(&crq->rb_node, &cfqq->sort_list);
+	RB_CLEAR_COLOR(&crq->rb_node);
 
-		rb_erase(&crq->rb_node, &cfqq->sort_list);
-		RB_CLEAR_COLOR(&crq->rb_node);
-
-		if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
-			cfq_del_cfqq_rr(cfqd, cfqq);
-	}
+	if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
+		cfq_del_cfqq_rr(cfqd, cfqq);
 }
 
 static struct cfq_rq *
@@ -627,12 +596,12 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
 	 * if that happens, put the alias on the dispatch list
 	 */
 	while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
-		cfq_dispatch_sort(cfqd->queue, __alias);
+		cfq_dispatch_insert(cfqd->queue, __alias);
 
 	rb_insert_color(&crq->rb_node, &cfqq->sort_list);
 
 	if (!cfq_cfqq_on_rr(cfqq))
-		cfq_add_cfqq_rr(cfqd, cfqq, cfq_crq_requeued(crq));
+		cfq_add_cfqq_rr(cfqd, cfqq);
 
 	/*
 	 * check if this request is a better next-serve candidate
@@ -643,10 +612,8 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
 static inline void
 cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
 {
-	if (ON_RB(&crq->rb_node)) {
-		rb_erase(&crq->rb_node, &cfqq->sort_list);
-		cfqq->queued[cfq_crq_is_sync(crq)]--;
-	}
+	rb_erase(&crq->rb_node, &cfqq->sort_list);
+	cfqq->queued[cfq_crq_is_sync(crq)]--;
 
 	cfq_add_crq_rb(crq);
 }
@@ -676,49 +643,28 @@ out:
 	return NULL;
 }
 
-static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
+static void cfq_activate_request(request_queue_t *q, struct request *rq)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct cfq_rq *crq = RQ_DATA(rq);
-
-	if (crq) {
-		struct cfq_queue *cfqq = crq->cfq_queue;
-
-		if (cfq_crq_in_driver(crq)) {
-			cfq_clear_crq_in_driver(crq);
-			WARN_ON(!cfqd->rq_in_driver);
-			cfqd->rq_in_driver--;
-		}
-		if (cfq_crq_in_flight(crq)) {
-			const int sync = cfq_crq_is_sync(crq);
 
-			cfq_clear_crq_in_flight(crq);
-			WARN_ON(!cfqq->on_dispatch[sync]);
-			cfqq->on_dispatch[sync]--;
-		}
-		cfq_mark_crq_requeued(crq);
-	}
+	cfqd->rq_in_driver++;
 }
 
-/*
- * make sure the service time gets corrected on reissue of this request
- */
-static void cfq_requeue_request(request_queue_t *q, struct request *rq)
+static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
 {
-	cfq_deactivate_request(q, rq);
-	list_add(&rq->queuelist, &q->queue_head);
+	struct cfq_data *cfqd = q->elevator->elevator_data;
+
+	WARN_ON(!cfqd->rq_in_driver);
+	cfqd->rq_in_driver--;
 }
 
-static void cfq_remove_request(request_queue_t *q, struct request *rq)
+static void cfq_remove_request(struct request *rq)
 {
 	struct cfq_rq *crq = RQ_DATA(rq);
 
-	if (crq) {
-		list_del_init(&rq->queuelist);
-		cfq_del_crq_rb(crq);
-		cfq_remove_merge_hints(q, crq);
-
-	}
+	list_del_init(&rq->queuelist);
+	cfq_del_crq_rb(crq);
+	cfq_del_crq_hash(crq);
 }
 
 static int
@@ -728,12 +674,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct request *__rq;
 	int ret;
 
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE) {
-		__rq = q->last_merge;
-		goto out_insert;
-	}
-
 	__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
 		ret = ELEVATOR_BACK_MERGE;
@@ -748,8 +688,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	q->last_merge = __rq;
-out_insert:
 	*req = __rq;
 	return ret;
 }
@@ -762,14 +700,12 @@ static void cfq_merged_request(request_queue_t *q, struct request *req)
 	cfq_del_crq_hash(crq);
 	cfq_add_crq_hash(cfqd, crq);
 
-	if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
+	if (rq_rb_key(req) != crq->rb_key) {
 		struct cfq_queue *cfqq = crq->cfq_queue;
 
 		cfq_update_next_crq(crq);
 		cfq_reposition_crq_rb(cfqq, crq);
 	}
-
-	q->last_merge = req;
 }
 
 static void
@@ -785,7 +721,7 @@ cfq_merged_requests(request_queue_t *q, struct request *rq,
 	    time_before(next->start_time, rq->start_time))
 		list_move(&rq->queuelist, &next->queuelist);
 
-	cfq_remove_request(q, next);
+	cfq_remove_request(next);
 }
 
 static inline void
@@ -992,53 +928,15 @@ static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	return 1;
 }
 
-/*
- * we dispatch cfqd->cfq_quantum requests in total from the rr_list queues,
- * this function sector sorts the selected request to minimize seeks. we start
- * at cfqd->last_sector, not 0.
- */
-static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq)
+static void cfq_dispatch_insert(request_queue_t *q, struct cfq_rq *crq)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq = crq->cfq_queue;
-	struct list_head *head = &q->queue_head, *entry = head;
-	struct request *__rq;
-	sector_t last;
-
-	list_del(&crq->request->queuelist);
-
-	last = cfqd->last_sector;
-	list_for_each_entry_reverse(__rq, head, queuelist) {
-		struct cfq_rq *__crq = RQ_DATA(__rq);
-
-		if (blk_barrier_rq(__rq))
-			break;
-		if (!blk_fs_request(__rq))
-			break;
-		if (cfq_crq_requeued(__crq))
-			break;
-
-		if (__rq->sector <= crq->request->sector)
-			break;
-		if (__rq->sector > last && crq->request->sector < last) {
-			last = crq->request->sector + crq->request->nr_sectors;
-			break;
-		}
-		entry = &__rq->queuelist;
-	}
-
-	cfqd->last_sector = last;
 
 	cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq);
-
-	cfq_del_crq_rb(crq);
-	cfq_remove_merge_hints(q, crq);
-
-	cfq_mark_crq_in_flight(crq);
-	cfq_clear_crq_requeued(crq);
-
+	cfq_remove_request(crq->request);
 	cfqq->on_dispatch[cfq_crq_is_sync(crq)]++;
-	list_add_tail(&crq->request->queuelist, entry);
+	elv_dispatch_sort(q, crq->request);
 }
 
 /*
@@ -1159,7 +1057,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		/*
 		 * finally, insert request into driver dispatch list
 		 */
-		cfq_dispatch_sort(cfqd->queue, crq);
+		cfq_dispatch_insert(cfqd->queue, crq);
 
 		cfqd->dispatch_slice++;
 		dispatched++;
@@ -1194,7 +1092,7 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 }
 
 static int
-cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
+cfq_dispatch_requests(request_queue_t *q, int force)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq;
@@ -1204,12 +1102,25 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
 
 	cfqq = cfq_select_queue(cfqd, force);
 	if (cfqq) {
+		int max_dispatch;
+
+		/*
+		 * if idle window is disabled, allow queue buildup
+		 */
+		if (!cfq_cfqq_idle_window(cfqq) &&
+		    cfqd->rq_in_driver >= cfqd->cfq_max_depth)
+			return 0;
+
 		cfq_clear_cfqq_must_dispatch(cfqq);
 		cfq_clear_cfqq_wait_request(cfqq);
 		del_timer(&cfqd->idle_slice_timer);
 
-		if (cfq_class_idle(cfqq))
-			max_dispatch = 1;
+		if (!force) {
+			max_dispatch = cfqd->cfq_quantum;
+			if (cfq_class_idle(cfqq))
+				max_dispatch = 1;
+		} else
+			max_dispatch = INT_MAX;
 
 		return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
 	}
@@ -1217,93 +1128,6 @@ cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force)
 	return 0;
 }
 
-static inline void cfq_account_dispatch(struct cfq_rq *crq)
-{
-	struct cfq_queue *cfqq = crq->cfq_queue;
-	struct cfq_data *cfqd = cfqq->cfqd;
-
-	if (unlikely(!blk_fs_request(crq->request)))
-		return;
-
-	/*
-	 * accounted bit is necessary since some drivers will call
-	 * elv_next_request() many times for the same request (eg ide)
-	 */
-	if (cfq_crq_in_driver(crq))
-		return;
-
-	cfq_mark_crq_in_driver(crq);
-	cfqd->rq_in_driver++;
-}
-
-static inline void
-cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq)
-{
-	struct cfq_data *cfqd = cfqq->cfqd;
-	unsigned long now;
-
-	if (!cfq_crq_in_driver(crq))
-		return;
-
-	now = jiffies;
-
-	WARN_ON(!cfqd->rq_in_driver);
-	cfqd->rq_in_driver--;
-
-	if (!cfq_class_idle(cfqq))
-		cfqd->last_end_request = now;
-
-	if (!cfq_cfqq_dispatched(cfqq)) {
-		if (cfq_cfqq_on_rr(cfqq)) {
-			cfqq->service_last = now;
-			cfq_resort_rr_list(cfqq, 0);
-		}
-		if (cfq_cfqq_expired(cfqq)) {
-			__cfq_slice_expired(cfqd, cfqq, 0);
-			cfq_schedule_dispatch(cfqd);
-		}
-	}
-
-	if (cfq_crq_is_sync(crq))
-		crq->io_context->last_end_request = now;
-}
-
-static struct request *cfq_next_request(request_queue_t *q)
-{
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-	struct request *rq;
-
-	if (!list_empty(&q->queue_head)) {
-		struct cfq_rq *crq;
-dispatch:
-		rq = list_entry_rq(q->queue_head.next);
-
-		crq = RQ_DATA(rq);
-		if (crq) {
-			struct cfq_queue *cfqq = crq->cfq_queue;
-
-			/*
-			 * if idle window is disabled, allow queue buildup
-			 */
-			if (!cfq_crq_in_driver(crq) &&
-			    !cfq_cfqq_idle_window(cfqq) &&
-			    !blk_barrier_rq(rq) &&
-			    cfqd->rq_in_driver >= cfqd->cfq_max_depth)
-				return NULL;
-
-			cfq_remove_merge_hints(q, crq);
-			cfq_account_dispatch(crq);
-		}
-
-		return rq;
-	}
-
-	if (cfq_dispatch_requests(q, cfqd->cfq_quantum, 0))
-		goto dispatch;
-
-	return NULL;
-}
-
 /*
  * task holds one reference to the queue, dropped when task exits. each crq
  * in-flight on this queue also holds a reference, dropped when crq is freed.
@@ -1422,7 +1246,7 @@ static void cfq_exit_io_context(struct cfq_io_context *cic)
 }
 
 static struct cfq_io_context *
-cfq_alloc_io_context(struct cfq_data *cfqd, int gfp_mask)
+cfq_alloc_io_context(struct cfq_data *cfqd, gfp_t gfp_mask)
 {
 	struct cfq_io_context *cic = kmem_cache_alloc(cfq_ioc_pool, gfp_mask);
 
@@ -1517,7 +1341,7 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio)
 
 static struct cfq_queue *
 cfq_get_queue(struct cfq_data *cfqd, unsigned int key, unsigned short ioprio,
-	      int gfp_mask)
+	      gfp_t gfp_mask)
 {
 	const int hashval = hash_long(key, CFQ_QHASH_SHIFT);
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
@@ -1578,7 +1402,7 @@ out:
  * cfqq, so we don't need to worry about it disappearing
  */
 static struct cfq_io_context *
-cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, int gfp_mask)
+cfq_get_io_context(struct cfq_data *cfqd, pid_t pid, gfp_t gfp_mask)
 {
 	struct io_context *ioc = NULL;
 	struct cfq_io_context *cic;
@@ -1816,8 +1640,9 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	}
 }
 
-static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
+static void cfq_insert_request(request_queue_t *q, struct request *rq)
 {
+	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_rq *crq = RQ_DATA(rq);
 	struct cfq_queue *cfqq = crq->cfq_queue;
 
@@ -1827,66 +1652,43 @@ static void cfq_enqueue(struct cfq_data *cfqd, struct request *rq)
 
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 
-	if (rq_mergeable(rq)) {
+	if (rq_mergeable(rq))
 		cfq_add_crq_hash(cfqd, crq);
 
-		if (!cfqd->queue->last_merge)
-			cfqd->queue->last_merge = rq;
-	}
-
 	cfq_crq_enqueued(cfqd, cfqq, crq);
 }
 
-static void
-cfq_insert_request(request_queue_t *q, struct request *rq, int where)
-{
-	struct cfq_data *cfqd = q->elevator->elevator_data;
-
-	switch (where) {
-		case ELEVATOR_INSERT_BACK:
-			while (cfq_dispatch_requests(q, INT_MAX, 1))
-				;
-			list_add_tail(&rq->queuelist, &q->queue_head);
-			/*
-			 * If we were idling with pending requests on
-			 * inactive cfqqs, force dispatching will
-			 * remove the idle timer and the queue won't
-			 * be kicked by __make_request() afterward.
-			 * Kick it here.
-			 */
-			cfq_schedule_dispatch(cfqd);
-			break;
-		case ELEVATOR_INSERT_FRONT:
-			list_add(&rq->queuelist, &q->queue_head);
-			break;
-		case ELEVATOR_INSERT_SORT:
-			BUG_ON(!blk_fs_request(rq));
-			cfq_enqueue(cfqd, rq);
-			break;
-		default:
-			printk("%s: bad insert point %d\n", __FUNCTION__,where);
-			return;
-	}
-}
-
 static void cfq_completed_request(request_queue_t *q, struct request *rq)
 {
 	struct cfq_rq *crq = RQ_DATA(rq);
-	struct cfq_queue *cfqq;
+	struct cfq_queue *cfqq = crq->cfq_queue;
+	struct cfq_data *cfqd = cfqq->cfqd;
+	const int sync = cfq_crq_is_sync(crq);
+	unsigned long now;
 
-	if (unlikely(!blk_fs_request(rq)))
-		return;
+	now = jiffies;
 
-	cfqq = crq->cfq_queue;
+	WARN_ON(!cfqd->rq_in_driver);
+	WARN_ON(!cfqq->on_dispatch[sync]);
+	cfqd->rq_in_driver--;
+	cfqq->on_dispatch[sync]--;
 
-	if (cfq_crq_in_flight(crq)) {
-		const int sync = cfq_crq_is_sync(crq);
+	if (!cfq_class_idle(cfqq))
+		cfqd->last_end_request = now;
 
-		WARN_ON(!cfqq->on_dispatch[sync]);
-		cfqq->on_dispatch[sync]--;
+	if (!cfq_cfqq_dispatched(cfqq)) {
+		if (cfq_cfqq_on_rr(cfqq)) {
+			cfqq->service_last = now;
+			cfq_resort_rr_list(cfqq, 0);
+		}
+		if (cfq_cfqq_expired(cfqq)) {
+			__cfq_slice_expired(cfqd, cfqq, 0);
+			cfq_schedule_dispatch(cfqd);
+		}
 	}
 
-	cfq_account_completion(cfqq, crq);
+	if (cfq_crq_is_sync(crq))
+		crq->io_context->last_end_request = now;
 }
 
 static struct request *
@@ -2075,7 +1877,7 @@ static void cfq_put_request(request_queue_t *q, struct request *rq)
  */
 static int
 cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		int gfp_mask)
+		gfp_t gfp_mask)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct task_struct *tsk = current;
@@ -2118,9 +1920,6 @@ cfq_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
 		INIT_HLIST_NODE(&crq->hash);
 		crq->cfq_queue = cfqq;
 		crq->io_context = cic;
-		cfq_clear_crq_in_flight(crq);
-		cfq_clear_crq_in_driver(crq);
-		cfq_clear_crq_requeued(crq);
 
 		if (rw == READ || process_sync(tsk))
 			cfq_mark_crq_is_sync(crq);
@@ -2201,7 +2000,7 @@ static void cfq_idle_slice_timer(unsigned long data)
 		 * only expire and reinvoke request handler, if there are
 		 * other queues with pending requests
 		 */
-		if (!cfq_pending_requests(cfqd)) {
+		if (!cfqd->busy_queues) {
 			cfqd->idle_slice_timer.expires = min(now + cfqd->cfq_slice_idle, cfqq->slice_end);
 			add_timer(&cfqd->idle_slice_timer);
 			goto out_cont;
@@ -2576,10 +2375,9 @@ static struct elevator_type iosched_cfq = {
 		.elevator_merge_fn = 		cfq_merge,
 		.elevator_merged_fn =		cfq_merged_request,
 		.elevator_merge_req_fn =	cfq_merged_requests,
-		.elevator_next_req_fn =		cfq_next_request,
+		.elevator_dispatch_fn =		cfq_dispatch_requests,
 		.elevator_add_req_fn =		cfq_insert_request,
-		.elevator_remove_req_fn =	cfq_remove_request,
-		.elevator_requeue_req_fn =	cfq_requeue_request,
+		.elevator_activate_req_fn =	cfq_activate_request,
 		.elevator_deactivate_req_fn =	cfq_deactivate_request,
 		.elevator_queue_empty_fn =	cfq_queue_empty,
 		.elevator_completed_req_fn =	cfq_completed_request,

+ 19 - 106
drivers/block/deadline-iosched.c

@@ -50,7 +50,6 @@ struct deadline_data {
 	 * next in sort order. read, write or both are NULL
 	 */
 	struct deadline_rq *next_drq[2];
-	struct list_head *dispatch;	/* driver dispatch queue */
 	struct list_head *hash;		/* request hash */
 	unsigned int batching;		/* number of sequential requests made */
 	sector_t last_sector;		/* head position */
@@ -113,15 +112,6 @@ static inline void deadline_del_drq_hash(struct deadline_rq *drq)
 		__deadline_del_drq_hash(drq);
 }
 
-static void
-deadline_remove_merge_hints(request_queue_t *q, struct deadline_rq *drq)
-{
-	deadline_del_drq_hash(drq);
-
-	if (q->last_merge == drq->request)
-		q->last_merge = NULL;
-}
-
 static inline void
 deadline_add_drq_hash(struct deadline_data *dd, struct deadline_rq *drq)
 {
@@ -239,10 +229,9 @@ deadline_del_drq_rb(struct deadline_data *dd, struct deadline_rq *drq)
 			dd->next_drq[data_dir] = rb_entry_drq(rbnext);
 	}
 
-	if (ON_RB(&drq->rb_node)) {
-		rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
-		RB_CLEAR(&drq->rb_node);
-	}
+	BUG_ON(!ON_RB(&drq->rb_node));
+	rb_erase(&drq->rb_node, DRQ_RB_ROOT(dd, drq));
+	RB_CLEAR(&drq->rb_node);
 }
 
 static struct request *
@@ -286,7 +275,7 @@ deadline_find_first_drq(struct deadline_data *dd, int data_dir)
 /*
  * add drq to rbtree and fifo
  */
-static inline void
+static void
 deadline_add_request(struct request_queue *q, struct request *rq)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
@@ -301,12 +290,8 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 	drq->expires = jiffies + dd->fifo_expire[data_dir];
 	list_add_tail(&drq->fifo, &dd->fifo_list[data_dir]);
 
-	if (rq_mergeable(rq)) {
+	if (rq_mergeable(rq))
 		deadline_add_drq_hash(dd, drq);
-
-		if (!q->last_merge)
-			q->last_merge = rq;
-	}
 }
 
 /*
@@ -315,14 +300,11 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 static void deadline_remove_request(request_queue_t *q, struct request *rq)
 {
 	struct deadline_rq *drq = RQ_DATA(rq);
+	struct deadline_data *dd = q->elevator->elevator_data;
 
-	if (drq) {
-		struct deadline_data *dd = q->elevator->elevator_data;
-
-		list_del_init(&drq->fifo);
-		deadline_remove_merge_hints(q, drq);
-		deadline_del_drq_rb(dd, drq);
-	}
+	list_del_init(&drq->fifo);
+	deadline_del_drq_rb(dd, drq);
+	deadline_del_drq_hash(drq);
 }
 
 static int
@@ -332,15 +314,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 	struct request *__rq;
 	int ret;
 
-	/*
-	 * try last_merge to avoid going to hash
-	 */
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE) {
-		__rq = q->last_merge;
-		goto out_insert;
-	}
-
 	/*
 	 * see if the merge hash can satisfy a back merge
 	 */
@@ -373,8 +346,6 @@ deadline_merge(request_queue_t *q, struct request **req, struct bio *bio)
 
 	return ELEVATOR_NO_MERGE;
 out:
-	q->last_merge = __rq;
-out_insert:
 	if (ret)
 		deadline_hot_drq_hash(dd, RQ_DATA(__rq));
 	*req = __rq;
@@ -399,8 +370,6 @@ static void deadline_merged_request(request_queue_t *q, struct request *req)
 		deadline_del_drq_rb(dd, drq);
 		deadline_add_drq_rb(dd, drq);
 	}
-
-	q->last_merge = req;
 }
 
 static void
@@ -452,7 +421,7 @@ deadline_move_to_dispatch(struct deadline_data *dd, struct deadline_rq *drq)
 	request_queue_t *q = drq->request->q;
 
 	deadline_remove_request(q, drq->request);
-	list_add_tail(&drq->request->queuelist, dd->dispatch);
+	elv_dispatch_add_tail(q, drq->request);
 }
 
 /*
@@ -502,8 +471,9 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
  * deadline_dispatch_requests selects the best request according to
  * read/write expire, fifo_batch, etc
  */
-static int deadline_dispatch_requests(struct deadline_data *dd)
+static int deadline_dispatch_requests(request_queue_t *q, int force)
 {
+	struct deadline_data *dd = q->elevator->elevator_data;
 	const int reads = !list_empty(&dd->fifo_list[READ]);
 	const int writes = !list_empty(&dd->fifo_list[WRITE]);
 	struct deadline_rq *drq;
@@ -597,65 +567,12 @@ dispatch_request:
 	return 1;
 }
 
-static struct request *deadline_next_request(request_queue_t *q)
-{
-	struct deadline_data *dd = q->elevator->elevator_data;
-	struct request *rq;
-
-	/*
-	 * if there are still requests on the dispatch queue, grab the first one
-	 */
-	if (!list_empty(dd->dispatch)) {
-dispatch:
-		rq = list_entry_rq(dd->dispatch->next);
-		return rq;
-	}
-
-	if (deadline_dispatch_requests(dd))
-		goto dispatch;
-
-	return NULL;
-}
-
-static void
-deadline_insert_request(request_queue_t *q, struct request *rq, int where)
-{
-	struct deadline_data *dd = q->elevator->elevator_data;
-
-	/* barriers must flush the reorder queue */
-	if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
-			&& where == ELEVATOR_INSERT_SORT))
-		where = ELEVATOR_INSERT_BACK;
-
-	switch (where) {
-		case ELEVATOR_INSERT_BACK:
-			while (deadline_dispatch_requests(dd))
-				;
-			list_add_tail(&rq->queuelist, dd->dispatch);
-			break;
-		case ELEVATOR_INSERT_FRONT:
-			list_add(&rq->queuelist, dd->dispatch);
-			break;
-		case ELEVATOR_INSERT_SORT:
-			BUG_ON(!blk_fs_request(rq));
-			deadline_add_request(q, rq);
-			break;
-		default:
-			printk("%s: bad insert point %d\n", __FUNCTION__,where);
-			return;
-	}
-}
-
 static int deadline_queue_empty(request_queue_t *q)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 
-	if (!list_empty(&dd->fifo_list[WRITE])
-	    || !list_empty(&dd->fifo_list[READ])
-	    || !list_empty(dd->dispatch))
-		return 0;
-
-	return 1;
+	return list_empty(&dd->fifo_list[WRITE])
+		&& list_empty(&dd->fifo_list[READ]);
 }
 
 static struct request *
@@ -733,7 +650,6 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e)
 	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
 	dd->sort_list[READ] = RB_ROOT;
 	dd->sort_list[WRITE] = RB_ROOT;
-	dd->dispatch = &q->queue_head;
 	dd->fifo_expire[READ] = read_expire;
 	dd->fifo_expire[WRITE] = write_expire;
 	dd->writes_starved = writes_starved;
@@ -748,15 +664,13 @@ static void deadline_put_request(request_queue_t *q, struct request *rq)
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq = RQ_DATA(rq);
 
-	if (drq) {
-		mempool_free(drq, dd->drq_pool);
-		rq->elevator_private = NULL;
-	}
+	mempool_free(drq, dd->drq_pool);
+	rq->elevator_private = NULL;
 }
 
 static int
 deadline_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		     int gfp_mask)
+		     gfp_t gfp_mask)
 {
 	struct deadline_data *dd = q->elevator->elevator_data;
 	struct deadline_rq *drq;
@@ -917,9 +831,8 @@ static struct elevator_type iosched_deadline = {
 		.elevator_merge_fn = 		deadline_merge,
 		.elevator_merged_fn =		deadline_merged_request,
 		.elevator_merge_req_fn =	deadline_merged_requests,
-		.elevator_next_req_fn =		deadline_next_request,
-		.elevator_add_req_fn =		deadline_insert_request,
-		.elevator_remove_req_fn =	deadline_remove_request,
+		.elevator_dispatch_fn =		deadline_dispatch_requests,
+		.elevator_add_req_fn =		deadline_add_request,
 		.elevator_queue_empty_fn =	deadline_queue_empty,
 		.elevator_former_req_fn =	deadline_former_request,
 		.elevator_latter_req_fn =	deadline_latter_request,

+ 207 - 138
drivers/block/elevator.c

@@ -34,6 +34,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
+#include <linux/delay.h>
 
 #include <asm/uaccess.h>
 
@@ -83,21 +84,11 @@ inline int elv_try_merge(struct request *__rq, struct bio *bio)
 }
 EXPORT_SYMBOL(elv_try_merge);
 
-inline int elv_try_last_merge(request_queue_t *q, struct bio *bio)
-{
-	if (q->last_merge)
-		return elv_try_merge(q->last_merge, bio);
-
-	return ELEVATOR_NO_MERGE;
-}
-EXPORT_SYMBOL(elv_try_last_merge);
-
 static struct elevator_type *elevator_find(const char *name)
 {
 	struct elevator_type *e = NULL;
 	struct list_head *entry;
 
-	spin_lock_irq(&elv_list_lock);
 	list_for_each(entry, &elv_list) {
 		struct elevator_type *__e;
 
@@ -108,7 +99,6 @@ static struct elevator_type *elevator_find(const char *name)
 			break;
 		}
 	}
-	spin_unlock_irq(&elv_list_lock);
 
 	return e;
 }
@@ -120,12 +110,15 @@ static void elevator_put(struct elevator_type *e)
 
 static struct elevator_type *elevator_get(const char *name)
 {
-	struct elevator_type *e = elevator_find(name);
+	struct elevator_type *e;
 
-	if (!e)
-		return NULL;
-	if (!try_module_get(e->elevator_owner))
-		return NULL;
+	spin_lock_irq(&elv_list_lock);
+
+	e = elevator_find(name);
+	if (e && !try_module_get(e->elevator_owner))
+		e = NULL;
+
+	spin_unlock_irq(&elv_list_lock);
 
 	return e;
 }
@@ -139,8 +132,6 @@ static int elevator_attach(request_queue_t *q, struct elevator_type *e,
 	eq->ops = &e->ops;
 	eq->elevator_type = e;
 
-	INIT_LIST_HEAD(&q->queue_head);
-	q->last_merge = NULL;
 	q->elevator = eq;
 
 	if (eq->ops->elevator_init_fn)
@@ -153,11 +144,15 @@ static char chosen_elevator[16];
 
 static void elevator_setup_default(void)
 {
+	struct elevator_type *e;
+
 	/*
 	 * check if default is set and exists
 	 */
-	if (chosen_elevator[0] && elevator_find(chosen_elevator))
+	if (chosen_elevator[0] && (e = elevator_get(chosen_elevator))) {
+		elevator_put(e);
 		return;
+	}
 
 #if defined(CONFIG_IOSCHED_AS)
 	strcpy(chosen_elevator, "anticipatory");
@@ -186,6 +181,11 @@ int elevator_init(request_queue_t *q, char *name)
 	struct elevator_queue *eq;
 	int ret = 0;
 
+	INIT_LIST_HEAD(&q->queue_head);
+	q->last_merge = NULL;
+	q->end_sector = 0;
+	q->boundary_rq = NULL;
+
 	elevator_setup_default();
 
 	if (!name)
@@ -220,9 +220,52 @@ void elevator_exit(elevator_t *e)
 	kfree(e);
 }
 
+/*
+ * Insert rq into dispatch queue of q.  Queue lock must be held on
+ * entry.  If sort != 0, rq is sort-inserted; otherwise, rq will be
+ * appended to the dispatch queue.  To be used by specific elevators.
+ */
+void elv_dispatch_sort(request_queue_t *q, struct request *rq)
+{
+	sector_t boundary;
+	struct list_head *entry;
+
+	if (q->last_merge == rq)
+		q->last_merge = NULL;
+
+	boundary = q->end_sector;
+
+	list_for_each_prev(entry, &q->queue_head) {
+		struct request *pos = list_entry_rq(entry);
+
+		if (pos->flags & (REQ_SOFTBARRIER|REQ_HARDBARRIER|REQ_STARTED))
+			break;
+		if (rq->sector >= boundary) {
+			if (pos->sector < boundary)
+				continue;
+		} else {
+			if (pos->sector >= boundary)
+				break;
+		}
+		if (rq->sector >= pos->sector)
+			break;
+	}
+
+	list_add(&rq->queuelist, entry);
+}
+
 int elv_merge(request_queue_t *q, struct request **req, struct bio *bio)
 {
 	elevator_t *e = q->elevator;
+	int ret;
+
+	if (q->last_merge) {
+		ret = elv_try_merge(q->last_merge, bio);
+		if (ret != ELEVATOR_NO_MERGE) {
+			*req = q->last_merge;
+			return ret;
+		}
+	}
 
 	if (e->ops->elevator_merge_fn)
 		return e->ops->elevator_merge_fn(q, req, bio);
@@ -236,6 +279,8 @@ void elv_merged_request(request_queue_t *q, struct request *rq)
 
 	if (e->ops->elevator_merged_fn)
 		e->ops->elevator_merged_fn(q, rq);
+
+	q->last_merge = rq;
 }
 
 void elv_merge_requests(request_queue_t *q, struct request *rq,
@@ -243,20 +288,13 @@ void elv_merge_requests(request_queue_t *q, struct request *rq,
 {
 	elevator_t *e = q->elevator;
 
-	if (q->last_merge == next)
-		q->last_merge = NULL;
-
 	if (e->ops->elevator_merge_req_fn)
 		e->ops->elevator_merge_req_fn(q, rq, next);
+
+	q->last_merge = rq;
 }
 
-/*
- * For careful internal use by the block layer. Essentially the same as
- * a requeue in that it tells the io scheduler that this request is not
- * active in the driver or hardware anymore, but we don't want the request
- * added back to the scheduler. Function is not exported.
- */
-void elv_deactivate_request(request_queue_t *q, struct request *rq)
+void elv_requeue_request(request_queue_t *q, struct request *rq)
 {
 	elevator_t *e = q->elevator;
 
@@ -264,19 +302,14 @@ void elv_deactivate_request(request_queue_t *q, struct request *rq)
 	 * it already went through dequeue, we need to decrement the
 	 * in_flight count again
 	 */
-	if (blk_account_rq(rq))
+	if (blk_account_rq(rq)) {
 		q->in_flight--;
+		if (blk_sorted_rq(rq) && e->ops->elevator_deactivate_req_fn)
+			e->ops->elevator_deactivate_req_fn(q, rq);
+	}
 
 	rq->flags &= ~REQ_STARTED;
 
-	if (e->ops->elevator_deactivate_req_fn)
-		e->ops->elevator_deactivate_req_fn(q, rq);
-}
-
-void elv_requeue_request(request_queue_t *q, struct request *rq)
-{
-	elv_deactivate_request(q, rq);
-
 	/*
 	 * if this is the flush, requeue the original instead and drop the flush
 	 */
@@ -285,31 +318,27 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
 		rq = rq->end_io_data;
 	}
 
-	/*
-	 * the request is prepped and may have some resources allocated.
-	 * allowing unprepped requests to pass this one may cause resource
-	 * deadlock.  turn on softbarrier.
-	 */
-	rq->flags |= REQ_SOFTBARRIER;
-
-	/*
-	 * if iosched has an explicit requeue hook, then use that. otherwise
-	 * just put the request at the front of the queue
-	 */
-	if (q->elevator->ops->elevator_requeue_req_fn)
-		q->elevator->ops->elevator_requeue_req_fn(q, rq);
-	else
-		__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
+	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
 }
 
 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 		       int plug)
 {
-	/*
-	 * barriers implicitly indicate back insertion
-	 */
-	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) &&
-	    where == ELEVATOR_INSERT_SORT)
+	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
+		/*
+		 * barriers implicitly indicate back insertion
+		 */
+		if (where == ELEVATOR_INSERT_SORT)
+			where = ELEVATOR_INSERT_BACK;
+
+		/*
+		 * this request is scheduling boundary, update end_sector
+		 */
+		if (blk_fs_request(rq)) {
+			q->end_sector = rq_end_sector(rq);
+			q->boundary_rq = rq;
+		}
+	} else if (!(rq->flags & REQ_ELVPRIV) && where == ELEVATOR_INSERT_SORT)
 		where = ELEVATOR_INSERT_BACK;
 
 	if (plug)
@@ -317,23 +346,54 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 
 	rq->q = q;
 
-	if (!test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) {
-		q->elevator->ops->elevator_add_req_fn(q, rq, where);
+	switch (where) {
+	case ELEVATOR_INSERT_FRONT:
+		rq->flags |= REQ_SOFTBARRIER;
 
-		if (blk_queue_plugged(q)) {
-			int nrq = q->rq.count[READ] + q->rq.count[WRITE]
-				  - q->in_flight;
+		list_add(&rq->queuelist, &q->queue_head);
+		break;
 
-			if (nrq >= q->unplug_thresh)
-				__generic_unplug_device(q);
-		}
-	} else
+	case ELEVATOR_INSERT_BACK:
+		rq->flags |= REQ_SOFTBARRIER;
+
+		while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+			;
+		list_add_tail(&rq->queuelist, &q->queue_head);
 		/*
-		 * if drain is set, store the request "locally". when the drain
-		 * is finished, the requests will be handed ordered to the io
-		 * scheduler
+		 * We kick the queue here for the following reasons.
+		 * - The elevator might have returned NULL previously
+		 *   to delay requests and returned them now.  As the
+		 *   queue wasn't empty before this request, ll_rw_blk
+		 *   won't run the queue on return, resulting in hang.
+		 * - Usually, back inserted requests won't be merged
+		 *   with anything.  There's no point in delaying queue
+		 *   processing.
 		 */
-		list_add_tail(&rq->queuelist, &q->drain_list);
+		blk_remove_plug(q);
+		q->request_fn(q);
+		break;
+
+	case ELEVATOR_INSERT_SORT:
+		BUG_ON(!blk_fs_request(rq));
+		rq->flags |= REQ_SORTED;
+		q->elevator->ops->elevator_add_req_fn(q, rq);
+		if (q->last_merge == NULL && rq_mergeable(rq))
+			q->last_merge = rq;
+		break;
+
+	default:
+		printk(KERN_ERR "%s: bad insertion point %d\n",
+		       __FUNCTION__, where);
+		BUG();
+	}
+
+	if (blk_queue_plugged(q)) {
+		int nrq = q->rq.count[READ] + q->rq.count[WRITE]
+			- q->in_flight;
+
+		if (nrq >= q->unplug_thresh)
+			__generic_unplug_device(q);
+	}
 }
 
 void elv_add_request(request_queue_t *q, struct request *rq, int where,
@@ -348,13 +408,19 @@ void elv_add_request(request_queue_t *q, struct request *rq, int where,
 
 static inline struct request *__elv_next_request(request_queue_t *q)
 {
-	struct request *rq = q->elevator->ops->elevator_next_req_fn(q);
+	struct request *rq;
+
+	if (unlikely(list_empty(&q->queue_head) &&
+		     !q->elevator->ops->elevator_dispatch_fn(q, 0)))
+		return NULL;
+
+	rq = list_entry_rq(q->queue_head.next);
 
 	/*
 	 * if this is a barrier write and the device has to issue a
 	 * flush sequence to support it, check how far we are
 	 */
-	if (rq && blk_fs_request(rq) && blk_barrier_rq(rq)) {
+	if (blk_fs_request(rq) && blk_barrier_rq(rq)) {
 		BUG_ON(q->ordered == QUEUE_ORDERED_NONE);
 
 		if (q->ordered == QUEUE_ORDERED_FLUSH &&
@@ -371,15 +437,30 @@ struct request *elv_next_request(request_queue_t *q)
 	int ret;
 
 	while ((rq = __elv_next_request(q)) != NULL) {
-		/*
-		 * just mark as started even if we don't start it, a request
-		 * that has been delayed should not be passed by new incoming
-		 * requests
-		 */
-		rq->flags |= REQ_STARTED;
+		if (!(rq->flags & REQ_STARTED)) {
+			elevator_t *e = q->elevator;
 
-		if (rq == q->last_merge)
-			q->last_merge = NULL;
+			/*
+			 * This is the first time the device driver
+			 * sees this request (possibly after
+			 * requeueing).  Notify IO scheduler.
+			 */
+			if (blk_sorted_rq(rq) &&
+			    e->ops->elevator_activate_req_fn)
+				e->ops->elevator_activate_req_fn(q, rq);
+
+			/*
+			 * just mark as started even if we don't start
+			 * it, a request that has been delayed should
+			 * not be passed by new incoming requests
+			 */
+			rq->flags |= REQ_STARTED;
+		}
+
+		if (!q->boundary_rq || q->boundary_rq == rq) {
+			q->end_sector = rq_end_sector(rq);
+			q->boundary_rq = NULL;
+		}
 
 		if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
 			break;
@@ -391,9 +472,9 @@ struct request *elv_next_request(request_queue_t *q)
 			/*
 			 * the request may have been (partially) prepped.
 			 * we need to keep this request in the front to
-			 * avoid resource deadlock.  turn on softbarrier.
+			 * avoid resource deadlock.  REQ_STARTED will
+			 * prevent other fs requests from passing this one.
 			 */
-			rq->flags |= REQ_SOFTBARRIER;
 			rq = NULL;
 			break;
 		} else if (ret == BLKPREP_KILL) {
@@ -416,42 +497,32 @@ struct request *elv_next_request(request_queue_t *q)
 	return rq;
 }
 
-void elv_remove_request(request_queue_t *q, struct request *rq)
+void elv_dequeue_request(request_queue_t *q, struct request *rq)
 {
-	elevator_t *e = q->elevator;
+	BUG_ON(list_empty(&rq->queuelist));
+
+	list_del_init(&rq->queuelist);
 
 	/*
 	 * the time frame between a request being removed from the lists
 	 * and to it is freed is accounted as io that is in progress at
-	 * the driver side. note that we only account requests that the
-	 * driver has seen (REQ_STARTED set), to avoid false accounting
-	 * for request-request merges
+	 * the driver side.
 	 */
 	if (blk_account_rq(rq))
 		q->in_flight++;
-
-	/*
-	 * the main clearing point for q->last_merge is on retrieval of
-	 * request by driver (it calls elv_next_request()), but it _can_
-	 * also happen here if a request is added to the queue but later
-	 * deleted without ever being given to driver (merged with another
-	 * request).
-	 */
-	if (rq == q->last_merge)
-		q->last_merge = NULL;
-
-	if (e->ops->elevator_remove_req_fn)
-		e->ops->elevator_remove_req_fn(q, rq);
 }
 
 int elv_queue_empty(request_queue_t *q)
 {
 	elevator_t *e = q->elevator;
 
+	if (!list_empty(&q->queue_head))
+		return 0;
+
 	if (e->ops->elevator_queue_empty_fn)
 		return e->ops->elevator_queue_empty_fn(q);
 
-	return list_empty(&q->queue_head);
+	return 1;
 }
 
 struct request *elv_latter_request(request_queue_t *q, struct request *rq)
@@ -487,7 +558,7 @@ struct request *elv_former_request(request_queue_t *q, struct request *rq)
 }
 
 int elv_set_request(request_queue_t *q, struct request *rq, struct bio *bio,
-		    int gfp_mask)
+		    gfp_t gfp_mask)
 {
 	elevator_t *e = q->elevator;
 
@@ -523,11 +594,11 @@ void elv_completed_request(request_queue_t *q, struct request *rq)
 	/*
 	 * request is released from the driver, io must be done
 	 */
-	if (blk_account_rq(rq))
+	if (blk_account_rq(rq)) {
 		q->in_flight--;
-
-	if (e->ops->elevator_completed_req_fn)
-		e->ops->elevator_completed_req_fn(q, rq);
+		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
+			e->ops->elevator_completed_req_fn(q, rq);
+	}
 }
 
 int elv_register_queue(struct request_queue *q)
@@ -555,10 +626,9 @@ void elv_unregister_queue(struct request_queue *q)
 
 int elv_register(struct elevator_type *e)
 {
+	spin_lock_irq(&elv_list_lock);
 	if (elevator_find(e->elevator_name))
 		BUG();
-
-	spin_lock_irq(&elv_list_lock);
 	list_add_tail(&e->list, &elv_list);
 	spin_unlock_irq(&elv_list_lock);
 
@@ -582,25 +652,36 @@ EXPORT_SYMBOL_GPL(elv_unregister);
  * switch to new_e io scheduler. be careful not to introduce deadlocks -
  * we don't free the old io scheduler, before we have allocated what we
  * need for the new one. this way we have a chance of going back to the old
- * one, if the new one fails init for some reason. we also do an intermediate
- * switch to noop to ensure safety with stack-allocated requests, since they
- * don't originate from the block layer allocator. noop is safe here, because
- * it never needs to touch the elevator itself for completion events. DRAIN
- * flags will make sure we don't touch it for additions either.
+ * one, if the new one fails init for some reason.
  */
 static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 {
-	elevator_t *e = kmalloc(sizeof(elevator_t), GFP_KERNEL);
-	struct elevator_type *noop_elevator = NULL;
-	elevator_t *old_elevator;
+	elevator_t *old_elevator, *e;
 
+	/*
+	 * Allocate new elevator
+	 */
+	e = kmalloc(sizeof(elevator_t), GFP_KERNEL);
 	if (!e)
 		goto error;
 
 	/*
-	 * first step, drain requests from the block freelist
+	 * Turn on BYPASS and drain all requests w/ elevator private data
 	 */
-	blk_wait_queue_drained(q, 0);
+	spin_lock_irq(q->queue_lock);
+
+	set_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+
+	while (q->elevator->ops->elevator_dispatch_fn(q, 1))
+		;
+
+	while (q->rq.elvpriv) {
+		spin_unlock_irq(q->queue_lock);
+		msleep(10);
+		spin_lock_irq(q->queue_lock);
+	}
+
+	spin_unlock_irq(q->queue_lock);
 
 	/*
 	 * unregister old elevator data
@@ -608,18 +689,6 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 	elv_unregister_queue(q);
 	old_elevator = q->elevator;
 
-	/*
- 	 * next step, switch to noop since it uses no private rq structures
-	 * and doesn't allocate any memory for anything. then wait for any
-	 * non-fs requests in-flight
- 	 */
-	noop_elevator = elevator_get("noop");
-	spin_lock_irq(q->queue_lock);
-	elevator_attach(q, noop_elevator, e);
-	spin_unlock_irq(q->queue_lock);
-
-	blk_wait_queue_drained(q, 1);
-
 	/*
 	 * attach and start new elevator
 	 */
@@ -630,11 +699,10 @@ static void elevator_switch(request_queue_t *q, struct elevator_type *new_e)
 		goto fail_register;
 
 	/*
-	 * finally exit old elevator and start queue again
+	 * finally exit old elevator and turn off BYPASS.
 	 */
 	elevator_exit(old_elevator);
-	blk_finish_queue_drain(q);
-	elevator_put(noop_elevator);
+	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
 	return;
 
 fail_register:
@@ -643,13 +711,13 @@ fail_register:
 	 * one again (along with re-adding the sysfs dir)
 	 */
 	elevator_exit(e);
+	e = NULL;
 fail:
 	q->elevator = old_elevator;
 	elv_register_queue(q);
-	blk_finish_queue_drain(q);
+	clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+	kfree(e);
 error:
-	if (noop_elevator)
-		elevator_put(noop_elevator);
 	elevator_put(new_e);
 	printk(KERN_ERR "elevator: switch to %s failed\n",new_e->elevator_name);
 }
@@ -701,11 +769,12 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name)
 	return len;
 }
 
+EXPORT_SYMBOL(elv_dispatch_sort);
 EXPORT_SYMBOL(elv_add_request);
 EXPORT_SYMBOL(__elv_add_request);
 EXPORT_SYMBOL(elv_requeue_request);
 EXPORT_SYMBOL(elv_next_request);
-EXPORT_SYMBOL(elv_remove_request);
+EXPORT_SYMBOL(elv_dequeue_request);
 EXPORT_SYMBOL(elv_queue_empty);
 EXPORT_SYMBOL(elv_completed_request);
 EXPORT_SYMBOL(elevator_exit);

+ 55 - 138
drivers/block/ll_rw_blk.c

@@ -263,8 +263,6 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
 
 	blk_queue_activity_fn(q, NULL, NULL);
-
-	INIT_LIST_HEAD(&q->drain_list);
 }
 
 EXPORT_SYMBOL(blk_queue_make_request);
@@ -353,6 +351,8 @@ static void blk_pre_flush_end_io(struct request *flush_rq)
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 
+	elv_completed_request(q, flush_rq);
+
 	rq->flags |= REQ_BAR_PREFLUSH;
 
 	if (!flush_rq->errors)
@@ -369,6 +369,8 @@ static void blk_post_flush_end_io(struct request *flush_rq)
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 
+	elv_completed_request(q, flush_rq);
+
 	rq->flags |= REQ_BAR_POSTFLUSH;
 
 	q->end_flush_fn(q, flush_rq);
@@ -408,8 +410,6 @@ struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)
 	if (!list_empty(&rq->queuelist))
 		blkdev_dequeue_request(rq);
 
-	elv_deactivate_request(q, rq);
-
 	flush_rq->end_io_data = rq;
 	flush_rq->end_io = blk_pre_flush_end_io;
 
@@ -1040,6 +1040,7 @@ EXPORT_SYMBOL(blk_queue_invalidate_tags);
 static char *rq_flags[] = {
 	"REQ_RW",
 	"REQ_FAILFAST",
+	"REQ_SORTED",
 	"REQ_SOFTBARRIER",
 	"REQ_HARDBARRIER",
 	"REQ_CMD",
@@ -1047,6 +1048,7 @@ static char *rq_flags[] = {
 	"REQ_STARTED",
 	"REQ_DONTPREP",
 	"REQ_QUEUED",
+	"REQ_ELVPRIV",
 	"REQ_PC",
 	"REQ_BLOCK_PC",
 	"REQ_SENSE",
@@ -1637,9 +1639,9 @@ static int blk_init_free_list(request_queue_t *q)
 
 	rl->count[READ] = rl->count[WRITE] = 0;
 	rl->starved[READ] = rl->starved[WRITE] = 0;
+	rl->elvpriv = 0;
 	init_waitqueue_head(&rl->wait[READ]);
 	init_waitqueue_head(&rl->wait[WRITE]);
-	init_waitqueue_head(&rl->drain);
 
 	rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab,
 				mempool_free_slab, request_cachep, q->node);
@@ -1652,13 +1654,13 @@ static int blk_init_free_list(request_queue_t *q)
 
 static int __make_request(request_queue_t *, struct bio *);
 
-request_queue_t *blk_alloc_queue(int gfp_mask)
+request_queue_t *blk_alloc_queue(gfp_t gfp_mask)
 {
 	return blk_alloc_queue_node(gfp_mask, -1);
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
-request_queue_t *blk_alloc_queue_node(int gfp_mask, int node_id)
+request_queue_t *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
 	request_queue_t *q;
 
@@ -1782,12 +1784,14 @@ EXPORT_SYMBOL(blk_get_queue);
 
 static inline void blk_free_request(request_queue_t *q, struct request *rq)
 {
-	elv_put_request(q, rq);
+	if (rq->flags & REQ_ELVPRIV)
+		elv_put_request(q, rq);
 	mempool_free(rq, q->rq.rq_pool);
 }
 
 static inline struct request *
-blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
+blk_alloc_request(request_queue_t *q, int rw, struct bio *bio,
+		  int priv, gfp_t gfp_mask)
 {
 	struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask);
 
@@ -1800,11 +1804,15 @@ blk_alloc_request(request_queue_t *q, int rw, struct bio *bio, int gfp_mask)
 	 */
 	rq->flags = rw;
 
-	if (!elv_set_request(q, rq, bio, gfp_mask))
-		return rq;
+	if (priv) {
+		if (unlikely(elv_set_request(q, rq, bio, gfp_mask))) {
+			mempool_free(rq, q->rq.rq_pool);
+			return NULL;
+		}
+		rq->flags |= REQ_ELVPRIV;
+	}
 
-	mempool_free(rq, q->rq.rq_pool);
-	return NULL;
+	return rq;
 }
 
 /*
@@ -1860,22 +1868,18 @@ static void __freed_request(request_queue_t *q, int rw)
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
-static void freed_request(request_queue_t *q, int rw)
+static void freed_request(request_queue_t *q, int rw, int priv)
 {
 	struct request_list *rl = &q->rq;
 
 	rl->count[rw]--;
+	if (priv)
+		rl->elvpriv--;
 
 	__freed_request(q, rw);
 
 	if (unlikely(rl->starved[rw ^ 1]))
 		__freed_request(q, rw ^ 1);
-
-	if (!rl->count[READ] && !rl->count[WRITE]) {
-		smp_mb();
-		if (unlikely(waitqueue_active(&rl->drain)))
-			wake_up(&rl->drain);
-	}
 }
 
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
@@ -1885,14 +1889,12 @@ static void freed_request(request_queue_t *q, int rw)
  * Returns !NULL on success, with queue_lock *not held*.
  */
 static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
-				   int gfp_mask)
+				   gfp_t gfp_mask)
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
 	struct io_context *ioc = current_io_context(GFP_ATOMIC);
-
-	if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)))
-		goto out;
+	int priv;
 
 	if (rl->count[rw]+1 >= q->nr_requests) {
 		/*
@@ -1937,9 +1939,14 @@ get_rq:
 	rl->starved[rw] = 0;
 	if (rl->count[rw] >= queue_congestion_on_threshold(q))
 		set_queue_congested(q, rw);
+
+	priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
+	if (priv)
+		rl->elvpriv++;
+
 	spin_unlock_irq(q->queue_lock);
 
-	rq = blk_alloc_request(q, rw, bio, gfp_mask);
+	rq = blk_alloc_request(q, rw, bio, priv, gfp_mask);
 	if (!rq) {
 		/*
 		 * Allocation failed presumably due to memory. Undo anything
@@ -1949,7 +1956,7 @@ get_rq:
 		 * wait queue, but this is pretty rare.
 		 */
 		spin_lock_irq(q->queue_lock);
-		freed_request(q, rw);
+		freed_request(q, rw, priv);
 
 		/*
 		 * in the very unlikely event that allocation failed and no
@@ -2019,7 +2026,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw,
 	return rq;
 }
 
-struct request *blk_get_request(request_queue_t *q, int rw, int gfp_mask)
+struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask)
 {
 	struct request *rq;
 
@@ -2251,7 +2258,7 @@ EXPORT_SYMBOL(blk_rq_unmap_user);
  * @gfp_mask:	memory allocation flags
  */
 int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
-		    unsigned int len, unsigned int gfp_mask)
+		    unsigned int len, gfp_t gfp_mask)
 {
 	struct bio *bio;
 
@@ -2433,13 +2440,15 @@ void disk_round_stats(struct gendisk *disk)
 {
 	unsigned long now = jiffies;
 
-	__disk_stat_add(disk, time_in_queue,
-			disk->in_flight * (now - disk->stamp));
-	disk->stamp = now;
+	if (now == disk->stamp)
+		return;
 
-	if (disk->in_flight)
-		__disk_stat_add(disk, io_ticks, (now - disk->stamp_idle));
-	disk->stamp_idle = now;
+	if (disk->in_flight) {
+		__disk_stat_add(disk, time_in_queue,
+				disk->in_flight * (now - disk->stamp));
+		__disk_stat_add(disk, io_ticks, (now - disk->stamp));
+	}
+	disk->stamp = now;
 }
 
 /*
@@ -2454,6 +2463,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 	if (unlikely(--req->ref_count))
 		return;
 
+	elv_completed_request(q, req);
+
 	req->rq_status = RQ_INACTIVE;
 	req->rl = NULL;
 
@@ -2463,26 +2474,25 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 	 */
 	if (rl) {
 		int rw = rq_data_dir(req);
-
-		elv_completed_request(q, req);
+		int priv = req->flags & REQ_ELVPRIV;
 
 		BUG_ON(!list_empty(&req->queuelist));
 
 		blk_free_request(q, req);
-		freed_request(q, rw);
+		freed_request(q, rw, priv);
 	}
 }
 
 void blk_put_request(struct request *req)
 {
+	unsigned long flags;
+	request_queue_t *q = req->q;
+
 	/*
-	 * if req->rl isn't set, this request didnt originate from the
-	 * block layer, so it's safe to just disregard it
+	 * Gee, IDE calls in w/ NULL q.  Fix IDE and remove the
+	 * following if (q) test.
 	 */
-	if (req->rl) {
-		unsigned long flags;
-		request_queue_t *q = req->q;
-
+	if (q) {
 		spin_lock_irqsave(q->queue_lock, flags);
 		__blk_put_request(q, req);
 		spin_unlock_irqrestore(q->queue_lock, flags);
@@ -2797,97 +2807,6 @@ static inline void blk_partition_remap(struct bio *bio)
 	}
 }
 
-void blk_finish_queue_drain(request_queue_t *q)
-{
-	struct request_list *rl = &q->rq;
-	struct request *rq;
-	int requeued = 0;
-
-	spin_lock_irq(q->queue_lock);
-	clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
-
-	while (!list_empty(&q->drain_list)) {
-		rq = list_entry_rq(q->drain_list.next);
-
-		list_del_init(&rq->queuelist);
-		elv_requeue_request(q, rq);
-		requeued++;
-	}
-
-	if (requeued)
-		q->request_fn(q);
-
-	spin_unlock_irq(q->queue_lock);
-
-	wake_up(&rl->wait[0]);
-	wake_up(&rl->wait[1]);
-	wake_up(&rl->drain);
-}
-
-static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch)
-{
-	int wait = rl->count[READ] + rl->count[WRITE];
-
-	if (dispatch)
-		wait += !list_empty(&q->queue_head);
-
-	return wait;
-}
-
-/*
- * We rely on the fact that only requests allocated through blk_alloc_request()
- * have io scheduler private data structures associated with them. Any other
- * type of request (allocated on stack or through kmalloc()) should not go
- * to the io scheduler core, but be attached to the queue head instead.
- */
-void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch)
-{
-	struct request_list *rl = &q->rq;
-	DEFINE_WAIT(wait);
-
-	spin_lock_irq(q->queue_lock);
-	set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags);
-
-	while (wait_drain(q, rl, wait_dispatch)) {
-		prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE);
-
-		if (wait_drain(q, rl, wait_dispatch)) {
-			__generic_unplug_device(q);
-			spin_unlock_irq(q->queue_lock);
-			io_schedule();
-			spin_lock_irq(q->queue_lock);
-		}
-
-		finish_wait(&rl->drain, &wait);
-	}
-
-	spin_unlock_irq(q->queue_lock);
-}
-
-/*
- * block waiting for the io scheduler being started again.
- */
-static inline void block_wait_queue_running(request_queue_t *q)
-{
-	DEFINE_WAIT(wait);
-
-	while (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) {
-		struct request_list *rl = &q->rq;
-
-		prepare_to_wait_exclusive(&rl->drain, &wait,
-				TASK_UNINTERRUPTIBLE);
-
-		/*
-		 * re-check the condition. avoids using prepare_to_wait()
-		 * in the fast path (queue is running)
-		 */
-		if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))
-			io_schedule();
-
-		finish_wait(&rl->drain, &wait);
-	}
-}
-
 static void handle_bad_sector(struct bio *bio)
 {
 	char b[BDEVNAME_SIZE];
@@ -2983,8 +2902,6 @@ end_io:
 		if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)))
 			goto end_io;
 
-		block_wait_queue_running(q);
-
 		/*
 		 * If this device has partitions, remap block n
 		 * of partition p to block n+start(p) of the disk.
@@ -3393,7 +3310,7 @@ void exit_io_context(void)
  * but since the current task itself holds a reference, the context can be
  * used in general code, so long as it stays within `current` context.
  */
-struct io_context *current_io_context(int gfp_flags)
+struct io_context *current_io_context(gfp_t gfp_flags)
 {
 	struct task_struct *tsk = current;
 	struct io_context *ret;
@@ -3424,7 +3341,7 @@ EXPORT_SYMBOL(current_io_context);
  *
  * This is always called in the context of the task which submitted the I/O.
  */
-struct io_context *get_io_context(int gfp_flags)
+struct io_context *get_io_context(gfp_t gfp_flags)
 {
 	struct io_context *ret;
 	ret = current_io_context(gfp_flags);

+ 1 - 1
drivers/block/loop.c

@@ -881,7 +881,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
 static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 {
 	struct file *filp = lo->lo_backing_file;
-	int gfp = lo->old_gfp_mask;
+	gfp_t gfp = lo->old_gfp_mask;
 
 	if (lo->lo_state != Lo_bound)
 		return -ENXIO;

+ 5 - 43
drivers/block/noop-iosched.c

@@ -7,57 +7,19 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-/*
- * See if we can find a request that this buffer can be coalesced with.
- */
-static int elevator_noop_merge(request_queue_t *q, struct request **req,
-			       struct bio *bio)
-{
-	int ret;
-
-	ret = elv_try_last_merge(q, bio);
-	if (ret != ELEVATOR_NO_MERGE)
-		*req = q->last_merge;
-
-	return ret;
-}
-
-static void elevator_noop_merge_requests(request_queue_t *q, struct request *req,
-					 struct request *next)
-{
-	list_del_init(&next->queuelist);
-}
-
-static void elevator_noop_add_request(request_queue_t *q, struct request *rq,
-				      int where)
+static void elevator_noop_add_request(request_queue_t *q, struct request *rq)
 {
-	if (where == ELEVATOR_INSERT_FRONT)
-		list_add(&rq->queuelist, &q->queue_head);
-	else
-		list_add_tail(&rq->queuelist, &q->queue_head);
-
-	/*
-	 * new merges must not precede this barrier
-	 */
-	if (rq->flags & REQ_HARDBARRIER)
-		q->last_merge = NULL;
-	else if (!q->last_merge)
-		q->last_merge = rq;
+	elv_dispatch_add_tail(q, rq);
 }
 
-static struct request *elevator_noop_next_request(request_queue_t *q)
+static int elevator_noop_dispatch(request_queue_t *q, int force)
 {
-	if (!list_empty(&q->queue_head))
-		return list_entry_rq(q->queue_head.next);
-
-	return NULL;
+	return 0;
 }
 
 static struct elevator_type elevator_noop = {
 	.ops = {
-		.elevator_merge_fn		= elevator_noop_merge,
-		.elevator_merge_req_fn		= elevator_noop_merge_requests,
-		.elevator_next_req_fn		= elevator_noop_next_request,
+		.elevator_dispatch_fn		= elevator_noop_dispatch,
 		.elevator_add_req_fn		= elevator_noop_add_request,
 	},
 	.elevator_name = "noop",

+ 1 - 1
drivers/block/rd.c

@@ -348,7 +348,7 @@ static int rd_open(struct inode *inode, struct file *filp)
 		struct block_device *bdev = inode->i_bdev;
 		struct address_space *mapping;
 		unsigned bsize;
-		int gfp_mask;
+		gfp_t gfp_mask;
 
 		inode = igrab(bdev->bd_inode);
 		rd_bdev[unit] = bdev;

+ 2 - 1
drivers/char/drm/drm_vm.c

@@ -148,7 +148,8 @@ static __inline__ struct page *drm_do_vm_shm_nopage(struct vm_area_struct *vma,
 
 	offset	 = address - vma->vm_start;
 	i = (unsigned long)map->handle + offset;
-	page = vmalloc_to_page((void *)i);
+	page = (map->type == _DRM_CONSISTENT) ?
+		virt_to_page((void *)i) : vmalloc_to_page((void *)i);
 	if (!page)
 		return NOPAGE_OOM;
 	get_page(page);

+ 1 - 1
drivers/char/drm/mga_drv.h

@@ -227,7 +227,7 @@ static inline u32 _MGA_READ(u32 *addr)
 #define MGA_EMIT_STATE( dev_priv, dirty )				\
 do {									\
 	if ( (dirty) & ~MGA_UPLOAD_CLIPRECTS ) {			\
-		if ( dev_priv->chipset == MGA_CARD_TYPE_G400 ) {	\
+		if ( dev_priv->chipset >= MGA_CARD_TYPE_G400 ) {	\
 			mga_g400_emit_state( dev_priv );		\
 		} else {						\
 			mga_g200_emit_state( dev_priv );		\

+ 1 - 1
drivers/char/drm/mga_state.c

@@ -53,7 +53,7 @@ static void mga_emit_clip_rect( drm_mga_private_t *dev_priv,
 
 	/* Force reset of DWGCTL on G400 (eliminates clip disable bit).
 	 */
-	if (dev_priv->chipset == MGA_CARD_TYPE_G400) {
+	if (dev_priv->chipset >= MGA_CARD_TYPE_G400) {
 		DMA_BLOCK(MGA_DWGCTL, ctx->dwgctl,
 			  MGA_LEN + MGA_EXEC, 0x80000000,
 			  MGA_DWGCTL, ctx->dwgctl,

+ 6 - 5
drivers/char/drm/radeon_cp.c

@@ -1133,10 +1133,10 @@ static void radeon_cp_init_ring_buffer( drm_device_t *dev,
 		ring_start = (dev_priv->cp_ring->offset
 			      - dev->agp->base
 			      + dev_priv->gart_vm_start);
-       } else
+	} else
 #endif
 		ring_start = (dev_priv->cp_ring->offset
-			      - dev->sg->handle
+			      - (unsigned long)dev->sg->virtual
 			      + dev_priv->gart_vm_start);
 
 	RADEON_WRITE( RADEON_CP_RB_BASE, ring_start );
@@ -1164,7 +1164,8 @@ static void radeon_cp_init_ring_buffer( drm_device_t *dev,
 		drm_sg_mem_t *entry = dev->sg;
 		unsigned long tmp_ofs, page_ofs;
 
-		tmp_ofs = dev_priv->ring_rptr->offset - dev->sg->handle;
+		tmp_ofs = dev_priv->ring_rptr->offset -
+				(unsigned long)dev->sg->virtual;
 		page_ofs = tmp_ofs >> PAGE_SHIFT;
 
 		RADEON_WRITE( RADEON_CP_RB_RPTR_ADDR,
@@ -1491,8 +1492,8 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init )
 	else
 #endif
 		dev_priv->gart_buffers_offset = (dev->agp_buffer_map->offset
-						- dev->sg->handle
-						+ dev_priv->gart_vm_start);
+					- (unsigned long)dev->sg->virtual
+					+ dev_priv->gart_vm_start);
 
 	DRM_DEBUG( "dev_priv->gart_size %d\n",
 		   dev_priv->gart_size );

+ 1 - 1
drivers/char/n_tty.c

@@ -62,7 +62,7 @@
 
 static inline unsigned char *alloc_buf(void)
 {
-	unsigned int prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
+	gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
 
 	if (PAGE_SIZE != N_TTY_BUF_SIZE)
 		return kmalloc(N_TTY_BUF_SIZE, prio);

+ 3 - 3
drivers/cpufreq/cpufreq_conservative.c

@@ -315,9 +315,9 @@ static void dbs_check_cpu(int cpu)
 	policy = this_dbs_info->cur_policy;
 
 	if ( init_flag == 0 ) {
-		for ( /* NULL */; init_flag < NR_CPUS; init_flag++ ) {
-			dbs_info = &per_cpu(cpu_dbs_info, init_flag);
-			requested_freq[cpu] = dbs_info->cur_policy->cur;
+		for_each_online_cpu(j) {
+			dbs_info = &per_cpu(cpu_dbs_info, j);
+			requested_freq[j] = dbs_info->cur_policy->cur;
 		}
 		init_flag = 1;
 	}

+ 1 - 1
drivers/ieee1394/eth1394.c

@@ -1630,7 +1630,7 @@ static void ether1394_complete_cb(void *__ptask)
 /* Transmit a packet (called by kernel) */
 static int ether1394_tx (struct sk_buff *skb, struct net_device *dev)
 {
-	int kmflags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
+	gfp_t kmflags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
 	struct eth1394hdr *eth;
 	struct eth1394_priv *priv = netdev_priv(dev);
 	int proto;

+ 1 - 1
drivers/infiniband/hw/mthca/mthca_cmd.c

@@ -524,7 +524,7 @@ void mthca_cmd_use_polling(struct mthca_dev *dev)
 }
 
 struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
-					  unsigned int gfp_mask)
+					  gfp_t gfp_mask)
 {
 	struct mthca_mailbox *mailbox;
 

+ 1 - 1
drivers/infiniband/hw/mthca/mthca_cmd.h

@@ -248,7 +248,7 @@ void mthca_cmd_event(struct mthca_dev *dev, u16 token,
 		     u8  status, u64 out_param);
 
 struct mthca_mailbox *mthca_alloc_mailbox(struct mthca_dev *dev,
-					  unsigned int gfp_mask);
+					  gfp_t gfp_mask);
 void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox);
 
 int mthca_SYS_EN(struct mthca_dev *dev, u8 *status);

+ 11 - 10
drivers/infiniband/hw/mthca/mthca_eq.c

@@ -396,20 +396,21 @@ static irqreturn_t mthca_tavor_interrupt(int irq, void *dev_ptr, struct pt_regs
 		writel(dev->eq_table.clr_mask, dev->eq_table.clr_int);
 
 	ecr = readl(dev->eq_regs.tavor.ecr_base + 4);
-	if (ecr) {
-		writel(ecr, dev->eq_regs.tavor.ecr_base +
-		       MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
+	if (!ecr)
+		return IRQ_NONE;
 
-		for (i = 0; i < MTHCA_NUM_EQ; ++i)
-			if (ecr & dev->eq_table.eq[i].eqn_mask &&
-			    mthca_eq_int(dev, &dev->eq_table.eq[i])) {
+	writel(ecr, dev->eq_regs.tavor.ecr_base +
+	       MTHCA_ECR_CLR_BASE - MTHCA_ECR_BASE + 4);
+
+	for (i = 0; i < MTHCA_NUM_EQ; ++i)
+		if (ecr & dev->eq_table.eq[i].eqn_mask) {
+			if (mthca_eq_int(dev, &dev->eq_table.eq[i]))
 				tavor_set_eq_ci(dev, &dev->eq_table.eq[i],
 						dev->eq_table.eq[i].cons_index);
-				tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
-			}
-	}
+			tavor_eq_req_not(dev, dev->eq_table.eq[i].eqn);
+		}
 
-	return IRQ_RETVAL(ecr);
+	return IRQ_HANDLED;
 }
 
 static irqreturn_t mthca_tavor_msi_x_interrupt(int irq, void *eq_ptr,

+ 1 - 1
drivers/infiniband/hw/mthca/mthca_memfree.c

@@ -82,7 +82,7 @@ void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm)
 }
 
 struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
-				  unsigned int gfp_mask)
+				  gfp_t gfp_mask)
 {
 	struct mthca_icm *icm;
 	struct mthca_icm_chunk *chunk = NULL;

+ 1 - 1
drivers/infiniband/hw/mthca/mthca_memfree.h

@@ -77,7 +77,7 @@ struct mthca_icm_iter {
 struct mthca_dev;
 
 struct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
-				  unsigned int gfp_mask);
+				  gfp_t gfp_mask);
 void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm);
 
 struct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,

+ 1 - 1
drivers/md/bitmap.c

@@ -91,7 +91,7 @@ int bitmap_active(struct bitmap *bitmap)
 
 #define WRITE_POOL_SIZE 256
 /* mempool for queueing pending writes on the bitmap file */
-static void *write_pool_alloc(unsigned int gfp_flags, void *data)
+static void *write_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	return kmalloc(sizeof(struct page_list), gfp_flags);
 }

+ 1 - 1
drivers/md/dm-crypt.c

@@ -331,7 +331,7 @@ crypt_alloc_buffer(struct crypt_config *cc, unsigned int size,
 {
 	struct bio *bio;
 	unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	int gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
+	gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
 	unsigned int i;
 
 	/*

+ 6 - 4
drivers/md/md.c

@@ -3568,7 +3568,8 @@ static void md_do_sync(mddev_t *mddev)
 		mddev->curr_resync = 2;
 
 	try_again:
-		if (signal_pending(current)) {
+		if (signal_pending(current) ||
+		    kthread_should_stop()) {
 			flush_signals(current);
 			set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 			goto skip;
@@ -3590,8 +3591,9 @@ static void md_do_sync(mddev_t *mddev)
 					 */
 					continue;
 				prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
-				if (!signal_pending(current)
-				    && mddev2->curr_resync >= mddev->curr_resync) {
+				if (!signal_pending(current) &&
+				    !kthread_should_stop() &&
+				    mddev2->curr_resync >= mddev->curr_resync) {
 					printk(KERN_INFO "md: delaying resync of %s"
 					       " until %s has finished resync (they"
 					       " share one or more physical units)\n",
@@ -3697,7 +3699,7 @@ static void md_do_sync(mddev_t *mddev)
 		}
 
 
-		if (signal_pending(current)) {
+		if (signal_pending(current) || kthread_should_stop()) {
 			/*
 			 * got a signal, exit.
 			 */

+ 0 - 1
drivers/media/video/Kconfig

@@ -262,7 +262,6 @@ config VIDEO_SAA7134_DVB
 	depends on VIDEO_SAA7134 && DVB_CORE
 	select VIDEO_BUF_DVB
 	select DVB_MT352
-	select DVB_CX22702
 	select DVB_TDA1004X
 	---help---
 	  This adds support for DVB cards based on the

+ 8 - 4
drivers/message/fusion/mptsas.c

@@ -257,8 +257,8 @@ static void mptsas_print_device_pg0(SasDevicePage0_t *pg0)
 	printk("SAS Address=0x%llX\n", le64_to_cpu(sas_address));
 	printk("Target ID=0x%X\n", pg0->TargetID);
 	printk("Bus=0x%X\n", pg0->Bus);
-	printk("PhyNum=0x%X\n", pg0->PhyNum);
-	printk("AccessStatus=0x%X\n", le16_to_cpu(pg0->AccessStatus));
+	printk("Parent Phy Num=0x%X\n", pg0->PhyNum);
+	printk("Access Status=0x%X\n", le16_to_cpu(pg0->AccessStatus));
 	printk("Device Info=0x%X\n", le32_to_cpu(pg0->DeviceInfo));
 	printk("Flags=0x%X\n", le16_to_cpu(pg0->Flags));
 	printk("Physical Port=0x%X\n", pg0->PhysicalPort);
@@ -270,7 +270,7 @@ static void mptsas_print_expander_pg1(SasExpanderPage1_t *pg1)
 	printk("---- SAS EXPANDER PAGE 1 ------------\n");
 
 	printk("Physical Port=0x%X\n", pg1->PhysicalPort);
-	printk("PHY Identifier=0x%X\n", pg1->Phy);
+	printk("PHY Identifier=0x%X\n", pg1->PhyIdentifier);
 	printk("Negotiated Link Rate=0x%X\n", pg1->NegotiatedLinkRate);
 	printk("Programmed Link Rate=0x%X\n", pg1->ProgrammedLinkRate);
 	printk("Hardware Link Rate=0x%X\n", pg1->HwLinkRate);
@@ -604,7 +604,7 @@ mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info,
 	mptsas_print_expander_pg1(buffer);
 
 	/* save config data */
-	phy_info->phy_id = buffer->Phy;
+	phy_info->phy_id = buffer->PhyIdentifier;
 	phy_info->port_id = buffer->PhysicalPort;
 	phy_info->negotiated_link_rate = buffer->NegotiatedLinkRate;
 	phy_info->programmed_link_rate = buffer->ProgrammedLinkRate;
@@ -825,6 +825,8 @@ mptsas_probe_hba_phys(MPT_ADAPTER *ioc, int *index)
 		mptsas_sas_device_pg0(ioc, &port_info->phy_info[i].identify,
 			(MPI_SAS_DEVICE_PGAD_FORM_GET_NEXT_HANDLE <<
 			 MPI_SAS_DEVICE_PGAD_FORM_SHIFT), handle);
+		port_info->phy_info[i].identify.phy_id =
+		    port_info->phy_info[i].phy_id;
 		handle = port_info->phy_info[i].identify.handle;
 
 		if (port_info->phy_info[i].attached.handle) {
@@ -881,6 +883,8 @@ mptsas_probe_expander_phys(MPT_ADAPTER *ioc, u32 *handle, int *index)
 				(MPI_SAS_DEVICE_PGAD_FORM_HANDLE <<
 				 MPI_SAS_DEVICE_PGAD_FORM_SHIFT),
 				port_info->phy_info[i].identify.handle);
+			port_info->phy_info[i].identify.phy_id =
+			    port_info->phy_info[i].phy_id;
 		}
 
 		if (port_info->phy_info[i].attached.handle) {

+ 3 - 2
drivers/net/8139cp.c

@@ -1027,8 +1027,7 @@ static void cp_reset_hw (struct cp_private *cp)
 		if (!(cpr8(Cmd) & CmdReset))
 			return;
 
-		set_current_state(TASK_UNINTERRUPTIBLE);
-		schedule_timeout(10);
+		schedule_timeout_uninterruptible(10);
 	}
 
 	printk(KERN_ERR "%s: hardware reset timeout\n", cp->dev->name);
@@ -1575,6 +1574,7 @@ static struct ethtool_ops cp_ethtool_ops = {
 	.set_wol		= cp_set_wol,
 	.get_strings		= cp_get_strings,
 	.get_ethtool_stats	= cp_get_ethtool_stats,
+	.get_perm_addr		= ethtool_op_get_perm_addr,
 };
 
 static int cp_ioctl (struct net_device *dev, struct ifreq *rq, int cmd)
@@ -1773,6 +1773,7 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	for (i = 0; i < 3; i++)
 		((u16 *) (dev->dev_addr))[i] =
 		    le16_to_cpu (read_eeprom (regs, i + 7, addr_len));
+	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
 
 	dev->open = cp_open;
 	dev->stop = cp_close;

+ 4 - 1
drivers/net/8139too.c

@@ -552,7 +552,8 @@ const static struct {
 
 	{ "RTL-8100B/8139D",
 	  HW_REVID(1, 1, 1, 0, 1, 0, 1),
-	  HasLWake,
+	  HasHltClk /* XXX undocumented? */
+	| HasLWake,
 	},
 
 	{ "RTL-8101",
@@ -970,6 +971,7 @@ static int __devinit rtl8139_init_one (struct pci_dev *pdev,
 	for (i = 0; i < 3; i++)
 		((u16 *) (dev->dev_addr))[i] =
 		    le16_to_cpu (read_eeprom (ioaddr, i + 7, addr_len));
+	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
 
 	/* The Rtl8139-specific entries in the device structure. */
 	dev->open = rtl8139_open;
@@ -2465,6 +2467,7 @@ static struct ethtool_ops rtl8139_ethtool_ops = {
 	.get_strings		= rtl8139_get_strings,
 	.get_stats_count	= rtl8139_get_stats_count,
 	.get_ethtool_stats	= rtl8139_get_ethtool_stats,
+	.get_perm_addr		= ethtool_op_get_perm_addr,
 };
 
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)

+ 23 - 0
drivers/net/Kconfig

@@ -475,6 +475,14 @@ config SGI_IOC3_ETH_HW_TX_CSUM
 	  the moment only acceleration of IPv4 is supported.  This option
 	  enables offloading for checksums on transmit.  If unsure, say Y.
 
+config MIPS_SIM_NET
+	tristate "MIPS simulator Network device (EXPERIMENTAL)"
+	depends on NETDEVICES && MIPS_SIM && EXPERIMENTAL
+	help
+	  The MIPSNET device is a simple Ethernet network device which is
+	  emulated by the MIPS Simulator.
+	  If you are not using a MIPSsim or are unsure, say N.
+
 config SGI_O2MACE_ETH
 	tristate "SGI O2 MACE Fast Ethernet support"
 	depends on NET_ETHERNET && SGI_IP32=y
@@ -2083,6 +2091,7 @@ config SPIDER_NET
 config GIANFAR
 	tristate "Gianfar Ethernet"
 	depends on 85xx || 83xx
+	select PHYLIB
 	help
 	  This driver supports the Gigabit TSEC on the MPC85xx 
 	  family of chips, and the FEC on the 8540
@@ -2243,6 +2252,20 @@ config ISERIES_VETH
 	tristate "iSeries Virtual Ethernet driver support"
 	depends on PPC_ISERIES
 
+config RIONET
+	tristate "RapidIO Ethernet over messaging driver support"
+	depends on NETDEVICES && RAPIDIO
+
+config RIONET_TX_SIZE
+	int "Number of outbound queue entries"
+	depends on RIONET
+	default "128"
+
+config RIONET_RX_SIZE
+	int "Number of inbound queue entries"
+	depends on RIONET
+	default "128"
+
 config FDDI
 	bool "FDDI driver support"
 	depends on (PCI || EISA)

+ 3 - 1
drivers/net/Makefile

@@ -13,7 +13,7 @@ obj-$(CONFIG_CHELSIO_T1) += chelsio/
 obj-$(CONFIG_BONDING) += bonding/
 obj-$(CONFIG_GIANFAR) += gianfar_driver.o
 
-gianfar_driver-objs := gianfar.o gianfar_ethtool.o gianfar_phy.o
+gianfar_driver-objs := gianfar.o gianfar_ethtool.o gianfar_mii.o
 
 #
 # link order important here
@@ -64,6 +64,7 @@ obj-$(CONFIG_SKFP) += skfp/
 obj-$(CONFIG_VIA_RHINE) += via-rhine.o
 obj-$(CONFIG_VIA_VELOCITY) += via-velocity.o
 obj-$(CONFIG_ADAPTEC_STARFIRE) += starfire.o
+obj-$(CONFIG_RIONET) += rionet.o
 
 #
 # end link order section
@@ -166,6 +167,7 @@ obj-$(CONFIG_EQUALIZER) += eql.o
 obj-$(CONFIG_MIPS_JAZZ_SONIC) += jazzsonic.o
 obj-$(CONFIG_MIPS_GT96100ETH) += gt96100eth.o
 obj-$(CONFIG_MIPS_AU1X00_ENET) += au1000_eth.o
+obj-$(CONFIG_MIPS_SIM_NET) += mipsnet.o
 obj-$(CONFIG_SGI_IOC3_ETH) += ioc3-eth.o
 obj-$(CONFIG_DECLANCE) += declance.o
 obj-$(CONFIG_ATARILANCE) += atarilance.o

+ 5 - 8
drivers/net/au1000_eth.c

@@ -151,13 +151,6 @@ struct au1000_private *au_macs[NUM_ETH_INTERFACES];
 	SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | \
 	SUPPORTED_Autoneg
 
-static char *phy_link[] = 
-{	"unknown", 
-	"10Base2", "10BaseT", 
-	"AUI",
-	"100BaseT", "100BaseTX", "100BaseFX"
-};
-
 int bcm_5201_init(struct net_device *dev, int phy_addr)
 {
 	s16 data;
@@ -785,6 +778,7 @@ static struct mii_chip_info {
 	{"Broadcom BCM5201 10/100 BaseT PHY",0x0040,0x6212, &bcm_5201_ops,0},
 	{"Broadcom BCM5221 10/100 BaseT PHY",0x0040,0x61e4, &bcm_5201_ops,0},
 	{"Broadcom BCM5222 10/100 BaseT PHY",0x0040,0x6322, &bcm_5201_ops,1},
+	{"NS DP83847 PHY", 0x2000, 0x5c30, &bcm_5201_ops ,0},
 	{"AMD 79C901 HomePNA PHY",0x0000,0x35c8, &am79c901_ops,0},
 	{"AMD 79C874 10/100 BaseT PHY",0x0022,0x561b, &am79c874_ops,0},
 	{"LSI 80227 10/100 BaseT PHY",0x0016,0xf840, &lsi_80227_ops,0},
@@ -1045,7 +1039,7 @@ found:
 #endif
 
 	if (aup->mii->chip_info == NULL) {
-		printk(KERN_ERR "%s: Au1x No MII transceivers found!\n",
+		printk(KERN_ERR "%s: Au1x No known MII transceivers found!\n",
 				dev->name);
 		return -1;
 	}
@@ -1546,6 +1540,9 @@ au1000_probe(u32 ioaddr, int irq, int port_num)
 		printk(KERN_ERR "%s: out of memory\n", dev->name);
 		goto err_out;
 	}
+	aup->mii->next = NULL;
+	aup->mii->chip_info = NULL;
+	aup->mii->status = 0;
 	aup->mii->mii_control_reg = 0;
 	aup->mii->mii_data_reg = 0;
 

+ 128 - 8
drivers/net/b44.c

@@ -106,6 +106,29 @@ static int b44_poll(struct net_device *dev, int *budget);
 static void b44_poll_controller(struct net_device *dev);
 #endif
 
+static int dma_desc_align_mask;
+static int dma_desc_sync_size;
+
+static inline void b44_sync_dma_desc_for_device(struct pci_dev *pdev,
+                                                dma_addr_t dma_base,
+                                                unsigned long offset,
+                                                enum dma_data_direction dir)
+{
+	dma_sync_single_range_for_device(&pdev->dev, dma_base,
+	                                 offset & dma_desc_align_mask,
+	                                 dma_desc_sync_size, dir);
+}
+
+static inline void b44_sync_dma_desc_for_cpu(struct pci_dev *pdev,
+                                             dma_addr_t dma_base,
+                                             unsigned long offset,
+                                             enum dma_data_direction dir)
+{
+	dma_sync_single_range_for_cpu(&pdev->dev, dma_base,
+	                              offset & dma_desc_align_mask,
+	                              dma_desc_sync_size, dir);
+}
+
 static inline unsigned long br32(const struct b44 *bp, unsigned long reg)
 {
 	return readl(bp->regs + reg);
@@ -668,6 +691,11 @@ static int b44_alloc_rx_skb(struct b44 *bp, int src_idx, u32 dest_idx_unmasked)
 	dp->ctrl = cpu_to_le32(ctrl);
 	dp->addr = cpu_to_le32((u32) mapping + bp->rx_offset + bp->dma_offset);
 
+	if (bp->flags & B44_FLAG_RX_RING_HACK)
+		b44_sync_dma_desc_for_device(bp->pdev, bp->rx_ring_dma,
+		                             dest_idx * sizeof(dp),
+		                             DMA_BIDIRECTIONAL);
+
 	return RX_PKT_BUF_SZ;
 }
 
@@ -692,6 +720,11 @@ static void b44_recycle_rx(struct b44 *bp, int src_idx, u32 dest_idx_unmasked)
 	pci_unmap_addr_set(dest_map, mapping,
 			   pci_unmap_addr(src_map, mapping));
 
+	if (bp->flags & B44_FLAG_RX_RING_HACK)
+		b44_sync_dma_desc_for_cpu(bp->pdev, bp->rx_ring_dma,
+		                          src_idx * sizeof(src_desc),
+		                          DMA_BIDIRECTIONAL);
+
 	ctrl = src_desc->ctrl;
 	if (dest_idx == (B44_RX_RING_SIZE - 1))
 		ctrl |= cpu_to_le32(DESC_CTRL_EOT);
@@ -700,8 +733,14 @@ static void b44_recycle_rx(struct b44 *bp, int src_idx, u32 dest_idx_unmasked)
 
 	dest_desc->ctrl = ctrl;
 	dest_desc->addr = src_desc->addr;
+
 	src_map->skb = NULL;
 
+	if (bp->flags & B44_FLAG_RX_RING_HACK)
+		b44_sync_dma_desc_for_device(bp->pdev, bp->rx_ring_dma,
+		                             dest_idx * sizeof(dest_desc),
+		                             DMA_BIDIRECTIONAL);
+
 	pci_dma_sync_single_for_device(bp->pdev, src_desc->addr,
 				       RX_PKT_BUF_SZ,
 				       PCI_DMA_FROMDEVICE);
@@ -959,6 +998,11 @@ static int b44_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	bp->tx_ring[entry].ctrl = cpu_to_le32(ctrl);
 	bp->tx_ring[entry].addr = cpu_to_le32((u32) mapping+bp->dma_offset);
 
+	if (bp->flags & B44_FLAG_TX_RING_HACK)
+		b44_sync_dma_desc_for_device(bp->pdev, bp->tx_ring_dma,
+		                             entry * sizeof(bp->tx_ring[0]),
+		                             DMA_TO_DEVICE);
+
 	entry = NEXT_TX(entry);
 
 	bp->tx_prod = entry;
@@ -1064,6 +1108,16 @@ static void b44_init_rings(struct b44 *bp)
 	memset(bp->rx_ring, 0, B44_RX_RING_BYTES);
 	memset(bp->tx_ring, 0, B44_TX_RING_BYTES);
 
+	if (bp->flags & B44_FLAG_RX_RING_HACK)
+		dma_sync_single_for_device(&bp->pdev->dev, bp->rx_ring_dma,
+		                           DMA_TABLE_BYTES,
+		                           PCI_DMA_BIDIRECTIONAL);
+
+	if (bp->flags & B44_FLAG_TX_RING_HACK)
+		dma_sync_single_for_device(&bp->pdev->dev, bp->tx_ring_dma,
+		                           DMA_TABLE_BYTES,
+		                           PCI_DMA_TODEVICE);
+
 	for (i = 0; i < bp->rx_pending; i++) {
 		if (b44_alloc_rx_skb(bp, -1, i) < 0)
 			break;
@@ -1085,14 +1139,28 @@ static void b44_free_consistent(struct b44 *bp)
 		bp->tx_buffers = NULL;
 	}
 	if (bp->rx_ring) {
-		pci_free_consistent(bp->pdev, DMA_TABLE_BYTES,
-				    bp->rx_ring, bp->rx_ring_dma);
+		if (bp->flags & B44_FLAG_RX_RING_HACK) {
+			dma_unmap_single(&bp->pdev->dev, bp->rx_ring_dma,
+				         DMA_TABLE_BYTES,
+				         DMA_BIDIRECTIONAL);
+			kfree(bp->rx_ring);
+		} else
+			pci_free_consistent(bp->pdev, DMA_TABLE_BYTES,
+					    bp->rx_ring, bp->rx_ring_dma);
 		bp->rx_ring = NULL;
+		bp->flags &= ~B44_FLAG_RX_RING_HACK;
 	}
 	if (bp->tx_ring) {
-		pci_free_consistent(bp->pdev, DMA_TABLE_BYTES,
-				    bp->tx_ring, bp->tx_ring_dma);
+		if (bp->flags & B44_FLAG_TX_RING_HACK) {
+			dma_unmap_single(&bp->pdev->dev, bp->tx_ring_dma,
+				         DMA_TABLE_BYTES,
+				         DMA_TO_DEVICE);
+			kfree(bp->tx_ring);
+		} else
+			pci_free_consistent(bp->pdev, DMA_TABLE_BYTES,
+					    bp->tx_ring, bp->tx_ring_dma);
 		bp->tx_ring = NULL;
+		bp->flags &= ~B44_FLAG_TX_RING_HACK;
 	}
 }
 
@@ -1118,12 +1186,56 @@ static int b44_alloc_consistent(struct b44 *bp)
 
 	size = DMA_TABLE_BYTES;
 	bp->rx_ring = pci_alloc_consistent(bp->pdev, size, &bp->rx_ring_dma);
-	if (!bp->rx_ring)
-		goto out_err;
+	if (!bp->rx_ring) {
+		/* Allocation may have failed due to pci_alloc_consistent
+		   insisting on use of GFP_DMA, which is more restrictive
+		   than necessary...  */
+		struct dma_desc *rx_ring;
+		dma_addr_t rx_ring_dma;
+
+		if (!(rx_ring = (struct dma_desc *)kmalloc(size, GFP_KERNEL)))
+			goto out_err;
+
+		memset(rx_ring, 0, size);
+		rx_ring_dma = dma_map_single(&bp->pdev->dev, rx_ring,
+		                             DMA_TABLE_BYTES,
+		                             DMA_BIDIRECTIONAL);
+
+		if (rx_ring_dma + size > B44_DMA_MASK) {
+			kfree(rx_ring);
+			goto out_err;
+		}
+
+		bp->rx_ring = rx_ring;
+		bp->rx_ring_dma = rx_ring_dma;
+		bp->flags |= B44_FLAG_RX_RING_HACK;
+	}
 
 	bp->tx_ring = pci_alloc_consistent(bp->pdev, size, &bp->tx_ring_dma);
-	if (!bp->tx_ring)
-		goto out_err;
+	if (!bp->tx_ring) {
+		/* Allocation may have failed due to pci_alloc_consistent
+		   insisting on use of GFP_DMA, which is more restrictive
+		   than necessary...  */
+		struct dma_desc *tx_ring;
+		dma_addr_t tx_ring_dma;
+
+		if (!(tx_ring = (struct dma_desc *)kmalloc(size, GFP_KERNEL)))
+			goto out_err;
+
+		memset(tx_ring, 0, size);
+		tx_ring_dma = dma_map_single(&bp->pdev->dev, tx_ring,
+		                             DMA_TABLE_BYTES,
+		                             DMA_TO_DEVICE);
+
+		if (tx_ring_dma + size > B44_DMA_MASK) {
+			kfree(tx_ring);
+			goto out_err;
+		}
+
+		bp->tx_ring = tx_ring;
+		bp->tx_ring_dma = tx_ring_dma;
+		bp->flags |= B44_FLAG_TX_RING_HACK;
+	}
 
 	return 0;
 
@@ -1676,6 +1788,7 @@ static struct ethtool_ops b44_ethtool_ops = {
 	.set_pauseparam		= b44_set_pauseparam,
 	.get_msglevel		= b44_get_msglevel,
 	.set_msglevel		= b44_set_msglevel,
+	.get_perm_addr		= ethtool_op_get_perm_addr,
 };
 
 static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -1718,6 +1831,7 @@ static int __devinit b44_get_invariants(struct b44 *bp)
 	bp->dev->dev_addr[3] = eeprom[80];
 	bp->dev->dev_addr[4] = eeprom[83];
 	bp->dev->dev_addr[5] = eeprom[82];
+	memcpy(bp->dev->perm_addr, bp->dev->dev_addr, bp->dev->addr_len);
 
 	bp->phy_addr = eeprom[90] & 0x1f;
 
@@ -1971,6 +2085,12 @@ static struct pci_driver b44_driver = {
 
 static int __init b44_init(void)
 {
+	unsigned int dma_desc_align_size = dma_get_cache_alignment();
+
+	/* Setup paramaters for syncing RX/TX DMA descriptors */
+	dma_desc_align_mask = ~(dma_desc_align_size - 1);
+	dma_desc_sync_size = max(dma_desc_align_size, sizeof(struct dma_desc));
+
 	return pci_module_init(&b44_driver);
 }
 

+ 2 - 0
drivers/net/b44.h

@@ -400,6 +400,8 @@ struct b44 {
 #define B44_FLAG_ADV_100HALF	0x04000000
 #define B44_FLAG_ADV_100FULL	0x08000000
 #define B44_FLAG_INTERNAL_PHY	0x10000000
+#define B44_FLAG_RX_RING_HACK	0x20000000
+#define B44_FLAG_TX_RING_HACK	0x40000000
 
 	u32			rx_offset;
 

+ 55 - 2
drivers/net/bonding/bond_main.c

@@ -4241,6 +4241,43 @@ out:
 	return 0;
 }
 
+static void bond_activebackup_xmit_copy(struct sk_buff *skb,
+                                        struct bonding *bond,
+                                        struct slave *slave)
+{
+	struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
+	struct ethhdr *eth_data;
+	u8 *hwaddr;
+	int res;
+
+	if (!skb2) {
+		printk(KERN_ERR DRV_NAME ": Error: "
+		       "bond_activebackup_xmit_copy(): skb_copy() failed\n");
+		return;
+	}
+
+	skb2->mac.raw = (unsigned char *)skb2->data;
+	eth_data = eth_hdr(skb2);
+
+	/* Pick an appropriate source MAC address
+	 *	-- use slave's perm MAC addr, unless used by bond
+	 *	-- otherwise, borrow active slave's perm MAC addr
+	 *	   since that will not be used
+	 */
+	hwaddr = slave->perm_hwaddr;
+	if (!memcmp(eth_data->h_source, hwaddr, ETH_ALEN))
+		hwaddr = bond->curr_active_slave->perm_hwaddr;
+
+	/* Set source MAC address appropriately */
+	memcpy(eth_data->h_source, hwaddr, ETH_ALEN);
+
+	res = bond_dev_queue_xmit(bond, skb2, slave->dev);
+	if (res)
+		dev_kfree_skb(skb2);
+
+	return;
+}
+
 /*
  * in active-backup mode, we know that bond->curr_active_slave is always valid if
  * the bond has a usable interface.
@@ -4257,10 +4294,26 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
 		goto out;
 	}
 
-	if (bond->curr_active_slave) { /* one usable interface */
-		res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev);
+	if (!bond->curr_active_slave)
+		goto out;
+
+	/* Xmit IGMP frames on all slaves to ensure rapid fail-over
+	   for multicast traffic on snooping switches */
+	if (skb->protocol == __constant_htons(ETH_P_IP) &&
+	    skb->nh.iph->protocol == IPPROTO_IGMP) {
+		struct slave *slave, *active_slave;
+		int i;
+
+		active_slave = bond->curr_active_slave;
+		bond_for_each_slave_from_to(bond, slave, i, active_slave->next,
+		                            active_slave->prev)
+			if (IS_UP(slave->dev) &&
+			    (slave->link == BOND_LINK_UP))
+				bond_activebackup_xmit_copy(skb, bond, slave);
 	}
 
+	res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev);
+
 out:
 	if (res) {
 		/* no suitable interface, frame not sent */

+ 2 - 2
drivers/net/cassini.c

@@ -489,7 +489,7 @@ static int cas_page_free(struct cas *cp, cas_page_t *page)
 /* local page allocation routines for the receive buffers. jumbo pages
  * require at least 8K contiguous and 8K aligned buffers.
  */
-static cas_page_t *cas_page_alloc(struct cas *cp, const int flags)
+static cas_page_t *cas_page_alloc(struct cas *cp, const gfp_t flags)
 {
 	cas_page_t *page;
 
@@ -561,7 +561,7 @@ static void cas_spare_free(struct cas *cp)
 }
 
 /* replenish spares if needed */
-static void cas_spare_recover(struct cas *cp, const int flags)
+static void cas_spare_recover(struct cas *cp, const gfp_t flags)
 {
 	struct list_head list, *elem, *tmp;
 	int needed, i;

+ 16 - 21
drivers/net/declance.c

@@ -5,7 +5,7 @@
  *
  *      adopted from sunlance.c by Richard van den Berg
  *
- *      Copyright (C) 2002, 2003  Maciej W. Rozycki
+ *      Copyright (C) 2002, 2003, 2005  Maciej W. Rozycki
  *
  *      additional sources:
  *      - PMAD-AA TURBOchannel Ethernet Module Functional Specification,
@@ -57,13 +57,15 @@
 #include <linux/string.h>
 
 #include <asm/addrspace.h>
+#include <asm/system.h>
+
 #include <asm/dec/interrupts.h>
 #include <asm/dec/ioasic.h>
 #include <asm/dec/ioasic_addrs.h>
 #include <asm/dec/kn01.h>
 #include <asm/dec/machtype.h>
+#include <asm/dec/system.h>
 #include <asm/dec/tc.h>
-#include <asm/system.h>
 
 static char version[] __devinitdata =
 "declance.c: v0.009 by Linux MIPS DECstation task force\n";
@@ -79,10 +81,6 @@ MODULE_LICENSE("GPL");
 #define PMAD_LANCE 2
 #define PMAX_LANCE 3
 
-#ifndef CONFIG_TC
-unsigned long system_base;
-unsigned long dmaptr;
-#endif
 
 #define LE_CSR0 0
 #define LE_CSR1 1
@@ -237,7 +235,7 @@ struct lance_init_block {
 /*
  * This works *only* for the ring descriptors
  */
-#define LANCE_ADDR(x) (PHYSADDR(x) >> 1)
+#define LANCE_ADDR(x) (CPHYSADDR(x) >> 1)
 
 struct lance_private {
 	struct net_device *next;
@@ -697,12 +695,13 @@ out:
 	spin_unlock(&lp->lock);
 }
 
-static void lance_dma_merr_int(const int irq, void *dev_id,
-				struct pt_regs *regs)
+static irqreturn_t lance_dma_merr_int(const int irq, void *dev_id,
+				      struct pt_regs *regs)
 {
 	struct net_device *dev = (struct net_device *) dev_id;
 
 	printk("%s: DMA error\n", dev->name);
+	return IRQ_HANDLED;
 }
 
 static irqreturn_t
@@ -1026,10 +1025,6 @@ static int __init dec_lance_init(const int type, const int slot)
 	unsigned long esar_base;
 	unsigned char *esar;
 
-#ifndef CONFIG_TC
-	system_base = KN01_LANCE_BASE;
-#endif
-
 	if (dec_lance_debug && version_printed++ == 0)
 		printk(version);
 
@@ -1062,16 +1057,16 @@ static int __init dec_lance_init(const int type, const int slot)
 	switch (type) {
 #ifdef CONFIG_TC
 	case ASIC_LANCE:
-		dev->base_addr = system_base + IOASIC_LANCE;
+		dev->base_addr = CKSEG1ADDR(dec_kn_slot_base + IOASIC_LANCE);
 
 		/* buffer space for the on-board LANCE shared memory */
 		/*
 		 * FIXME: ugly hack!
 		 */
-		dev->mem_start = KSEG1ADDR(0x00020000);
+		dev->mem_start = CKSEG1ADDR(0x00020000);
 		dev->mem_end = dev->mem_start + 0x00020000;
 		dev->irq = dec_interrupt[DEC_IRQ_LANCE];
-		esar_base = system_base + IOASIC_ESAR;
+		esar_base = CKSEG1ADDR(dec_kn_slot_base + IOASIC_ESAR);
 
 		/* Workaround crash with booting KN04 2.1k from Disk */
 		memset((void *)dev->mem_start, 0,
@@ -1101,14 +1096,14 @@ static int __init dec_lance_init(const int type, const int slot)
 		/* Setup I/O ASIC LANCE DMA.  */
 		lp->dma_irq = dec_interrupt[DEC_IRQ_LANCE_MERR];
 		ioasic_write(IO_REG_LANCE_DMA_P,
-			     PHYSADDR(dev->mem_start) << 3);
+			     CPHYSADDR(dev->mem_start) << 3);
 
 		break;
 
 	case PMAD_LANCE:
 		claim_tc_card(slot);
 
-		dev->mem_start = get_tc_base_addr(slot);
+		dev->mem_start = CKSEG1ADDR(get_tc_base_addr(slot));
 		dev->base_addr = dev->mem_start + 0x100000;
 		dev->irq = get_tc_irq_nr(slot);
 		esar_base = dev->mem_start + 0x1c0002;
@@ -1137,9 +1132,9 @@ static int __init dec_lance_init(const int type, const int slot)
 
 	case PMAX_LANCE:
 		dev->irq = dec_interrupt[DEC_IRQ_LANCE];
-		dev->base_addr = KN01_LANCE_BASE;
-		dev->mem_start = KN01_LANCE_BASE + 0x01000000;
-		esar_base = KN01_RTC_BASE + 1;
+		dev->base_addr = CKSEG1ADDR(KN01_SLOT_BASE + KN01_LANCE);
+		dev->mem_start = CKSEG1ADDR(KN01_SLOT_BASE + KN01_LANCE_MEM);
+		esar_base = CKSEG1ADDR(KN01_SLOT_BASE + KN01_ESAR + 1);
 		lp->dma_irq = -1;
 
 		/*

+ 3 - 1
drivers/net/e100.c

@@ -2201,6 +2201,7 @@ static struct ethtool_ops e100_ethtool_ops = {
 	.phys_id		= e100_phys_id,
 	.get_stats_count	= e100_get_stats_count,
 	.get_ethtool_stats	= e100_get_ethtool_stats,
+	.get_perm_addr		= ethtool_op_get_perm_addr,
 };
 
 static int e100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
@@ -2351,7 +2352,8 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 	e100_phy_init(nic);
 
 	memcpy(netdev->dev_addr, nic->eeprom, ETH_ALEN);
-	if(!is_valid_ether_addr(netdev->dev_addr)) {
+	memcpy(netdev->perm_addr, nic->eeprom, ETH_ALEN);
+	if(!is_valid_ether_addr(netdev->perm_addr)) {
 		DPRINTK(PROBE, ERR, "Invalid MAC address from "
 			"EEPROM, aborting.\n");
 		err = -EAGAIN;

+ 60 - 14
drivers/net/e1000/e1000.h

@@ -72,6 +72,10 @@
 #include <linux/mii.h>
 #include <linux/ethtool.h>
 #include <linux/if_vlan.h>
+#ifdef CONFIG_E1000_MQ
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#endif
 
 #define BAR_0		0
 #define BAR_1		1
@@ -165,10 +169,33 @@ struct e1000_buffer {
 	uint16_t next_to_watch;
 };
 
-struct e1000_ps_page { struct page *ps_page[MAX_PS_BUFFERS]; };
-struct e1000_ps_page_dma { uint64_t ps_page_dma[MAX_PS_BUFFERS]; };
+struct e1000_ps_page { struct page *ps_page[PS_PAGE_BUFFERS]; };
+struct e1000_ps_page_dma { uint64_t ps_page_dma[PS_PAGE_BUFFERS]; };
+
+struct e1000_tx_ring {
+	/* pointer to the descriptor ring memory */
+	void *desc;
+	/* physical address of the descriptor ring */
+	dma_addr_t dma;
+	/* length of descriptor ring in bytes */
+	unsigned int size;
+	/* number of descriptors in the ring */
+	unsigned int count;
+	/* next descriptor to associate a buffer with */
+	unsigned int next_to_use;
+	/* next descriptor to check for DD status bit */
+	unsigned int next_to_clean;
+	/* array of buffer information structs */
+	struct e1000_buffer *buffer_info;
+
+	struct e1000_buffer previous_buffer_info;
+	spinlock_t tx_lock;
+	uint16_t tdh;
+	uint16_t tdt;
+	uint64_t pkt;
+};
 
-struct e1000_desc_ring {
+struct e1000_rx_ring {
 	/* pointer to the descriptor ring memory */
 	void *desc;
 	/* physical address of the descriptor ring */
@@ -186,6 +213,10 @@ struct e1000_desc_ring {
 	/* arrays of page information for packet split */
 	struct e1000_ps_page *ps_page;
 	struct e1000_ps_page_dma *ps_page_dma;
+
+	uint16_t rdh;
+	uint16_t rdt;
+	uint64_t pkt;
 };
 
 #define E1000_DESC_UNUSED(R) \
@@ -227,9 +258,10 @@ struct e1000_adapter {
 	unsigned long led_status;
 
 	/* TX */
-	struct e1000_desc_ring tx_ring;
-	struct e1000_buffer previous_buffer_info;
-	spinlock_t tx_lock;
+	struct e1000_tx_ring *tx_ring;      /* One per active queue */
+#ifdef CONFIG_E1000_MQ
+	struct e1000_tx_ring **cpu_tx_ring; /* per-cpu */
+#endif
 	uint32_t txd_cmd;
 	uint32_t tx_int_delay;
 	uint32_t tx_abs_int_delay;
@@ -246,19 +278,33 @@ struct e1000_adapter {
 
 	/* RX */
 #ifdef CONFIG_E1000_NAPI
-	boolean_t (*clean_rx) (struct e1000_adapter *adapter, int *work_done,
-			  int work_to_do);
+	boolean_t (*clean_rx) (struct e1000_adapter *adapter,
+			       struct e1000_rx_ring *rx_ring,
+			       int *work_done, int work_to_do);
 #else
-	boolean_t (*clean_rx) (struct e1000_adapter *adapter);
+	boolean_t (*clean_rx) (struct e1000_adapter *adapter,
+			       struct e1000_rx_ring *rx_ring);
 #endif
-	void (*alloc_rx_buf) (struct e1000_adapter *adapter);
-	struct e1000_desc_ring rx_ring;
+	void (*alloc_rx_buf) (struct e1000_adapter *adapter,
+			      struct e1000_rx_ring *rx_ring);
+	struct e1000_rx_ring *rx_ring;      /* One per active queue */
+#ifdef CONFIG_E1000_NAPI
+	struct net_device *polling_netdev;  /* One per active queue */
+#endif
+#ifdef CONFIG_E1000_MQ
+	struct net_device **cpu_netdev;     /* per-cpu */
+	struct call_async_data_struct rx_sched_call_data;
+	int cpu_for_queue[4];
+#endif
+	int num_queues;
+
 	uint64_t hw_csum_err;
 	uint64_t hw_csum_good;
+	uint64_t rx_hdr_split;
 	uint32_t rx_int_delay;
 	uint32_t rx_abs_int_delay;
 	boolean_t rx_csum;
-	boolean_t rx_ps;
+	unsigned int rx_ps_pages;
 	uint32_t gorcl;
 	uint64_t gorcl_old;
 	uint16_t rx_ps_bsize0;
@@ -278,8 +324,8 @@ struct e1000_adapter {
 	struct e1000_phy_stats phy_stats;
 
 	uint32_t test_icr;
-	struct e1000_desc_ring test_tx_ring;
-	struct e1000_desc_ring test_rx_ring;
+	struct e1000_tx_ring test_tx_ring;
+	struct e1000_rx_ring test_rx_ring;
 
 
 	int msg_enable;

+ 67 - 28
drivers/net/e1000/e1000_ethtool.c

@@ -39,10 +39,10 @@ extern int e1000_up(struct e1000_adapter *adapter);
 extern void e1000_down(struct e1000_adapter *adapter);
 extern void e1000_reset(struct e1000_adapter *adapter);
 extern int e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx);
-extern int e1000_setup_rx_resources(struct e1000_adapter *adapter);
-extern int e1000_setup_tx_resources(struct e1000_adapter *adapter);
-extern void e1000_free_rx_resources(struct e1000_adapter *adapter);
-extern void e1000_free_tx_resources(struct e1000_adapter *adapter);
+extern int e1000_setup_all_rx_resources(struct e1000_adapter *adapter);
+extern int e1000_setup_all_tx_resources(struct e1000_adapter *adapter);
+extern void e1000_free_all_rx_resources(struct e1000_adapter *adapter);
+extern void e1000_free_all_tx_resources(struct e1000_adapter *adapter);
 extern void e1000_update_stats(struct e1000_adapter *adapter);
 
 struct e1000_stats {
@@ -91,7 +91,8 @@ static const struct e1000_stats e1000_gstrings_stats[] = {
 	{ "tx_flow_control_xoff", E1000_STAT(stats.xofftxc) },
 	{ "rx_long_byte_count", E1000_STAT(stats.gorcl) },
 	{ "rx_csum_offload_good", E1000_STAT(hw_csum_good) },
-	{ "rx_csum_offload_errors", E1000_STAT(hw_csum_err) }
+	{ "rx_csum_offload_errors", E1000_STAT(hw_csum_err) },
+	{ "rx_header_split", E1000_STAT(rx_hdr_split) },
 };
 #define E1000_STATS_LEN	\
 	sizeof(e1000_gstrings_stats) / sizeof(struct e1000_stats)
@@ -546,8 +547,10 @@ e1000_set_eeprom(struct net_device *netdev,
 	ret_val = e1000_write_eeprom(hw, first_word,
 				     last_word - first_word + 1, eeprom_buff);
 
-	/* Update the checksum over the first part of the EEPROM if needed */
-	if((ret_val == 0) && first_word <= EEPROM_CHECKSUM_REG)
+	/* Update the checksum over the first part of the EEPROM if needed 
+	 * and flush shadow RAM for 82573 conrollers */
+	if((ret_val == 0) && ((first_word <= EEPROM_CHECKSUM_REG) || 
+				(hw->mac_type == e1000_82573)))
 		e1000_update_eeprom_checksum(hw);
 
 	kfree(eeprom_buff);
@@ -576,8 +579,8 @@ e1000_get_ringparam(struct net_device *netdev,
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	e1000_mac_type mac_type = adapter->hw.mac_type;
-	struct e1000_desc_ring *txdr = &adapter->tx_ring;
-	struct e1000_desc_ring *rxdr = &adapter->rx_ring;
+	struct e1000_tx_ring *txdr = adapter->tx_ring;
+	struct e1000_rx_ring *rxdr = adapter->rx_ring;
 
 	ring->rx_max_pending = (mac_type < e1000_82544) ? E1000_MAX_RXD :
 		E1000_MAX_82544_RXD;
@@ -597,20 +600,40 @@ e1000_set_ringparam(struct net_device *netdev,
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	e1000_mac_type mac_type = adapter->hw.mac_type;
-	struct e1000_desc_ring *txdr = &adapter->tx_ring;
-	struct e1000_desc_ring *rxdr = &adapter->rx_ring;
-	struct e1000_desc_ring tx_old, tx_new, rx_old, rx_new;
-	int err;
+	struct e1000_tx_ring *txdr, *tx_old, *tx_new;
+	struct e1000_rx_ring *rxdr, *rx_old, *rx_new;
+	int i, err, tx_ring_size, rx_ring_size;
+
+	tx_ring_size = sizeof(struct e1000_tx_ring) * adapter->num_queues;
+	rx_ring_size = sizeof(struct e1000_rx_ring) * adapter->num_queues;
+
+	if (netif_running(adapter->netdev))
+		e1000_down(adapter);
 
 	tx_old = adapter->tx_ring;
 	rx_old = adapter->rx_ring;
 
+	adapter->tx_ring = kmalloc(tx_ring_size, GFP_KERNEL);
+	if (!adapter->tx_ring) {
+		err = -ENOMEM;
+		goto err_setup_rx;
+	}
+	memset(adapter->tx_ring, 0, tx_ring_size);
+
+	adapter->rx_ring = kmalloc(rx_ring_size, GFP_KERNEL);
+	if (!adapter->rx_ring) {
+		kfree(adapter->tx_ring);
+		err = -ENOMEM;
+		goto err_setup_rx;
+	}
+	memset(adapter->rx_ring, 0, rx_ring_size);
+
+	txdr = adapter->tx_ring;
+	rxdr = adapter->rx_ring;
+
 	if((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
 		return -EINVAL;
 
-	if(netif_running(adapter->netdev))
-		e1000_down(adapter);
-
 	rxdr->count = max(ring->rx_pending,(uint32_t)E1000_MIN_RXD);
 	rxdr->count = min(rxdr->count,(uint32_t)(mac_type < e1000_82544 ?
 		E1000_MAX_RXD : E1000_MAX_82544_RXD));
@@ -621,11 +644,16 @@ e1000_set_ringparam(struct net_device *netdev,
 		E1000_MAX_TXD : E1000_MAX_82544_TXD));
 	E1000_ROUNDUP(txdr->count, REQ_TX_DESCRIPTOR_MULTIPLE); 
 
+	for (i = 0; i < adapter->num_queues; i++) {
+		txdr[i].count = txdr->count;
+		rxdr[i].count = rxdr->count;
+	}
+
 	if(netif_running(adapter->netdev)) {
 		/* Try to get new resources before deleting old */
-		if((err = e1000_setup_rx_resources(adapter)))
+		if ((err = e1000_setup_all_rx_resources(adapter)))
 			goto err_setup_rx;
-		if((err = e1000_setup_tx_resources(adapter)))
+		if ((err = e1000_setup_all_tx_resources(adapter)))
 			goto err_setup_tx;
 
 		/* save the new, restore the old in order to free it,
@@ -635,8 +663,10 @@ e1000_set_ringparam(struct net_device *netdev,
 		tx_new = adapter->tx_ring;
 		adapter->rx_ring = rx_old;
 		adapter->tx_ring = tx_old;
-		e1000_free_rx_resources(adapter);
-		e1000_free_tx_resources(adapter);
+		e1000_free_all_rx_resources(adapter);
+		e1000_free_all_tx_resources(adapter);
+		kfree(tx_old);
+		kfree(rx_old);
 		adapter->rx_ring = rx_new;
 		adapter->tx_ring = tx_new;
 		if((err = e1000_up(adapter)))
@@ -645,7 +675,7 @@ e1000_set_ringparam(struct net_device *netdev,
 
 	return 0;
 err_setup_tx:
-	e1000_free_rx_resources(adapter);
+	e1000_free_all_rx_resources(adapter);
 err_setup_rx:
 	adapter->rx_ring = rx_old;
 	adapter->tx_ring = tx_old;
@@ -696,6 +726,11 @@ e1000_reg_test(struct e1000_adapter *adapter, uint64_t *data)
 	 * Some bits that get toggled are ignored.
 	 */
         switch (adapter->hw.mac_type) {
+	/* there are several bits on newer hardware that are r/w */
+	case e1000_82571:
+	case e1000_82572:
+		toggle = 0x7FFFF3FF;
+		break;
 	case e1000_82573:
 		toggle = 0x7FFFF033;
 		break;
@@ -898,8 +933,8 @@ e1000_intr_test(struct e1000_adapter *adapter, uint64_t *data)
 static void
 e1000_free_desc_rings(struct e1000_adapter *adapter)
 {
-	struct e1000_desc_ring *txdr = &adapter->test_tx_ring;
-	struct e1000_desc_ring *rxdr = &adapter->test_rx_ring;
+	struct e1000_tx_ring *txdr = &adapter->test_tx_ring;
+	struct e1000_rx_ring *rxdr = &adapter->test_rx_ring;
 	struct pci_dev *pdev = adapter->pdev;
 	int i;
 
@@ -941,8 +976,8 @@ e1000_free_desc_rings(struct e1000_adapter *adapter)
 static int
 e1000_setup_desc_rings(struct e1000_adapter *adapter)
 {
-	struct e1000_desc_ring *txdr = &adapter->test_tx_ring;
-	struct e1000_desc_ring *rxdr = &adapter->test_rx_ring;
+	struct e1000_tx_ring *txdr = &adapter->test_tx_ring;
+	struct e1000_rx_ring *rxdr = &adapter->test_rx_ring;
 	struct pci_dev *pdev = adapter->pdev;
 	uint32_t rctl;
 	int size, i, ret_val;
@@ -1245,6 +1280,8 @@ e1000_set_phy_loopback(struct e1000_adapter *adapter)
 	case e1000_82541_rev_2:
 	case e1000_82547:
 	case e1000_82547_rev_2:
+	case e1000_82571:
+	case e1000_82572:
 	case e1000_82573:
 		return e1000_integrated_phy_loopback(adapter);
 		break;
@@ -1340,8 +1377,8 @@ e1000_check_lbtest_frame(struct sk_buff *skb, unsigned int frame_size)
 static int
 e1000_run_loopback_test(struct e1000_adapter *adapter)
 {
-	struct e1000_desc_ring *txdr = &adapter->test_tx_ring;
-	struct e1000_desc_ring *rxdr = &adapter->test_rx_ring;
+	struct e1000_tx_ring *txdr = &adapter->test_tx_ring;
+	struct e1000_rx_ring *rxdr = &adapter->test_rx_ring;
 	struct pci_dev *pdev = adapter->pdev;
 	int i, j, k, l, lc, good_cnt, ret_val=0;
 	unsigned long time;
@@ -1509,6 +1546,7 @@ e1000_diag_test(struct net_device *netdev,
 		data[2] = 0;
 		data[3] = 0;
 	}
+	msleep_interruptible(4 * 1000);
 }
 
 static void
@@ -1625,7 +1663,7 @@ e1000_phys_id(struct net_device *netdev, uint32_t data)
 	if(!data || data > (uint32_t)(MAX_SCHEDULE_TIMEOUT / HZ))
 		data = (uint32_t)(MAX_SCHEDULE_TIMEOUT / HZ);
 
-	if(adapter->hw.mac_type < e1000_82573) {
+	if(adapter->hw.mac_type < e1000_82571) {
 		if(!adapter->blink_timer.function) {
 			init_timer(&adapter->blink_timer);
 			adapter->blink_timer.function = e1000_led_blink_callback;
@@ -1739,6 +1777,7 @@ struct ethtool_ops e1000_ethtool_ops = {
 	.phys_id                = e1000_phys_id,
 	.get_stats_count        = e1000_get_stats_count,
 	.get_ethtool_stats      = e1000_get_ethtool_stats,
+	.get_perm_addr		= ethtool_op_get_perm_addr,
 };
 
 void e1000_set_ethtool_ops(struct net_device *netdev)

+ 187 - 33
drivers/net/e1000/e1000_hw.c

@@ -83,14 +83,14 @@ uint16_t e1000_igp_cable_length_table[IGP01E1000_AGC_LENGTH_TABLE_SIZE] =
 
 static const
 uint16_t e1000_igp_2_cable_length_table[IGP02E1000_AGC_LENGTH_TABLE_SIZE] =
-    { 8, 13, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43,
-      22, 24, 27, 30, 32, 35, 37, 40, 42, 44, 47, 49, 51, 54, 56, 58,
-      32, 35, 38, 41, 44, 47, 50, 53, 55, 58, 61, 63, 66, 69, 71, 74,
-      43, 47, 51, 54, 58, 61, 64, 67, 71, 74, 77, 80, 82, 85, 88, 90,
-      57, 62, 66, 70, 74, 77, 81, 85, 88, 91, 94, 97, 100, 103, 106, 108,
-      73, 78, 82, 87, 91, 95, 98, 102, 105, 109, 112, 114, 117, 119, 122, 124,
-      91, 96, 101, 105, 109, 113, 116, 119, 122, 125, 127, 128, 128, 128, 128, 128,
-      108, 113, 117, 121, 124, 127, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128};
+    { 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21,
+      0, 0, 0, 3, 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41,
+      6, 10, 14, 18, 22, 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61,
+      21, 26, 31, 35, 40, 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82,
+      40, 45, 51, 56, 61, 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104,
+      60, 66, 72, 77, 82, 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121,
+      83, 89, 95, 100, 105, 109, 113, 116, 119, 122, 124,
+      104, 109, 114, 118, 121, 124};
 
 
 /******************************************************************************
@@ -286,7 +286,6 @@ e1000_set_mac_type(struct e1000_hw *hw)
     case E1000_DEV_ID_82546GB_FIBER:
     case E1000_DEV_ID_82546GB_SERDES:
     case E1000_DEV_ID_82546GB_PCIE:
-    case E1000_DEV_ID_82546GB_QUAD_COPPER:
         hw->mac_type = e1000_82546_rev_3;
         break;
     case E1000_DEV_ID_82541EI:
@@ -305,8 +304,19 @@ e1000_set_mac_type(struct e1000_hw *hw)
     case E1000_DEV_ID_82547GI:
         hw->mac_type = e1000_82547_rev_2;
         break;
+    case E1000_DEV_ID_82571EB_COPPER:
+    case E1000_DEV_ID_82571EB_FIBER:
+    case E1000_DEV_ID_82571EB_SERDES:
+            hw->mac_type = e1000_82571;
+        break;
+    case E1000_DEV_ID_82572EI_COPPER:
+    case E1000_DEV_ID_82572EI_FIBER:
+    case E1000_DEV_ID_82572EI_SERDES:
+        hw->mac_type = e1000_82572;
+        break;
     case E1000_DEV_ID_82573E:
     case E1000_DEV_ID_82573E_IAMT:
+    case E1000_DEV_ID_82573L:
         hw->mac_type = e1000_82573;
         break;
     default:
@@ -315,6 +325,8 @@ e1000_set_mac_type(struct e1000_hw *hw)
     }
 
     switch(hw->mac_type) {
+    case e1000_82571:
+    case e1000_82572:
     case e1000_82573:
         hw->eeprom_semaphore_present = TRUE;
         /* fall through */
@@ -351,6 +363,8 @@ e1000_set_media_type(struct e1000_hw *hw)
     switch (hw->device_id) {
     case E1000_DEV_ID_82545GM_SERDES:
     case E1000_DEV_ID_82546GB_SERDES:
+    case E1000_DEV_ID_82571EB_SERDES:
+    case E1000_DEV_ID_82572EI_SERDES:
         hw->media_type = e1000_media_type_internal_serdes;
         break;
     default:
@@ -523,6 +537,8 @@ e1000_reset_hw(struct e1000_hw *hw)
             E1000_WRITE_REG(hw, CTRL_EXT, ctrl_ext);
             E1000_WRITE_FLUSH(hw);
             /* fall through */
+        case e1000_82571:
+        case e1000_82572:
             ret_val = e1000_get_auto_rd_done(hw);
             if(ret_val)
                 /* We don't want to continue accessing MAC registers. */
@@ -683,6 +699,9 @@ e1000_init_hw(struct e1000_hw *hw)
         switch (hw->mac_type) {
         default:
             break;
+        case e1000_82571:
+        case e1000_82572:
+            ctrl |= (1 << 22);
         case e1000_82573:
             ctrl |= E1000_TXDCTL_COUNT_DESC;
             break;
@@ -694,6 +713,26 @@ e1000_init_hw(struct e1000_hw *hw)
         e1000_enable_tx_pkt_filtering(hw); 
     }
 
+    switch (hw->mac_type) {
+    default:
+        break;
+    case e1000_82571:
+    case e1000_82572:
+        ctrl = E1000_READ_REG(hw, TXDCTL1);
+        ctrl &= ~E1000_TXDCTL_WTHRESH;
+        ctrl |= E1000_TXDCTL_COUNT_DESC | E1000_TXDCTL_FULL_TX_DESC_WB;
+        ctrl |= (1 << 22);
+        E1000_WRITE_REG(hw, TXDCTL1, ctrl);
+        break;
+    }
+
+
+
+    if (hw->mac_type == e1000_82573) {
+        uint32_t gcr = E1000_READ_REG(hw, GCR);
+        gcr |= E1000_GCR_L1_ACT_WITHOUT_L0S_RX;
+        E1000_WRITE_REG(hw, GCR, gcr);
+    }
 
     /* Clear all of the statistics registers (clear on read).  It is
      * important that we do this after we have tried to establish link
@@ -878,6 +917,14 @@ e1000_setup_fiber_serdes_link(struct e1000_hw *hw)
 
     DEBUGFUNC("e1000_setup_fiber_serdes_link");
 
+    /* On 82571 and 82572 Fiber connections, SerDes loopback mode persists
+     * until explicitly turned off or a power cycle is performed.  A read to
+     * the register does not indicate its status.  Therefore, we ensure
+     * loopback mode is disabled during initialization.
+     */
+    if (hw->mac_type == e1000_82571 || hw->mac_type == e1000_82572)
+        E1000_WRITE_REG(hw, SCTL, E1000_DISABLE_SERDES_LOOPBACK);
+
     /* On adapters with a MAC newer than 82544, SW Defineable pin 1 will be
      * set when the optics detect a signal. On older adapters, it will be
      * cleared when there is a signal.  This applies to fiber media only.
@@ -2943,6 +2990,8 @@ e1000_phy_reset(struct e1000_hw *hw)
 
     switch (hw->mac_type) {
     case e1000_82541_rev_2:
+    case e1000_82571:
+    case e1000_82572:
         ret_val = e1000_phy_hw_reset(hw);
         if(ret_val)
             return ret_val;
@@ -2981,6 +3030,16 @@ e1000_detect_gig_phy(struct e1000_hw *hw)
 
     DEBUGFUNC("e1000_detect_gig_phy");
 
+    /* The 82571 firmware may still be configuring the PHY.  In this
+     * case, we cannot access the PHY until the configuration is done.  So
+     * we explicitly set the PHY values. */
+    if(hw->mac_type == e1000_82571 ||
+       hw->mac_type == e1000_82572) {
+        hw->phy_id = IGP01E1000_I_PHY_ID;
+        hw->phy_type = e1000_phy_igp_2;
+        return E1000_SUCCESS;
+    }
+
     /* Read the PHY ID Registers to identify which PHY is onboard. */
     ret_val = e1000_read_phy_reg(hw, PHY_ID1, &phy_id_high);
     if(ret_val)
@@ -3334,6 +3393,21 @@ e1000_init_eeprom_params(struct e1000_hw *hw)
         eeprom->use_eerd = FALSE;
         eeprom->use_eewr = FALSE;
         break;
+    case e1000_82571:
+    case e1000_82572:
+        eeprom->type = e1000_eeprom_spi;
+        eeprom->opcode_bits = 8;
+        eeprom->delay_usec = 1;
+        if (eecd & E1000_EECD_ADDR_BITS) {
+            eeprom->page_size = 32;
+            eeprom->address_bits = 16;
+        } else {
+            eeprom->page_size = 8;
+            eeprom->address_bits = 8;
+        }
+        eeprom->use_eerd = FALSE;
+        eeprom->use_eewr = FALSE;
+        break;
     case e1000_82573:
         eeprom->type = e1000_eeprom_spi;
         eeprom->opcode_bits = 8;
@@ -3543,25 +3617,26 @@ e1000_acquire_eeprom(struct e1000_hw *hw)
     eecd = E1000_READ_REG(hw, EECD);
 
     if (hw->mac_type != e1000_82573) {
-    /* Request EEPROM Access */
-    if(hw->mac_type > e1000_82544) {
-        eecd |= E1000_EECD_REQ;
-        E1000_WRITE_REG(hw, EECD, eecd);
-        eecd = E1000_READ_REG(hw, EECD);
-        while((!(eecd & E1000_EECD_GNT)) &&
-              (i < E1000_EEPROM_GRANT_ATTEMPTS)) {
-            i++;
-            udelay(5);
-            eecd = E1000_READ_REG(hw, EECD);
-        }
-        if(!(eecd & E1000_EECD_GNT)) {
-            eecd &= ~E1000_EECD_REQ;
+        /* Request EEPROM Access */
+        if(hw->mac_type > e1000_82544) {
+            eecd |= E1000_EECD_REQ;
             E1000_WRITE_REG(hw, EECD, eecd);
-            DEBUGOUT("Could not acquire EEPROM grant\n");
-            return -E1000_ERR_EEPROM;
+            eecd = E1000_READ_REG(hw, EECD);
+            while((!(eecd & E1000_EECD_GNT)) &&
+                  (i < E1000_EEPROM_GRANT_ATTEMPTS)) {
+                i++;
+                udelay(5);
+                eecd = E1000_READ_REG(hw, EECD);
+            }
+            if(!(eecd & E1000_EECD_GNT)) {
+                eecd &= ~E1000_EECD_REQ;
+                E1000_WRITE_REG(hw, EECD, eecd);
+                DEBUGOUT("Could not acquire EEPROM grant\n");
+                e1000_put_hw_eeprom_semaphore(hw);
+                return -E1000_ERR_EEPROM;
+            }
         }
     }
-    }
 
     /* Setup EEPROM for Read/Write */
 
@@ -4064,7 +4139,7 @@ e1000_write_eeprom(struct e1000_hw *hw,
         return -E1000_ERR_EEPROM;
     }
 
-    /* 82573 reads only through eerd */
+    /* 82573 writes only through eewr */
     if(eeprom->use_eewr == TRUE)
         return e1000_write_eeprom_eewr(hw, offset, words, data);
 
@@ -4353,9 +4428,16 @@ e1000_read_mac_addr(struct e1000_hw * hw)
         hw->perm_mac_addr[i] = (uint8_t) (eeprom_data & 0x00FF);
         hw->perm_mac_addr[i+1] = (uint8_t) (eeprom_data >> 8);
     }
-    if(((hw->mac_type == e1000_82546) || (hw->mac_type == e1000_82546_rev_3)) &&
-       (E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1))
+    switch (hw->mac_type) {
+    default:
+        break;
+    case e1000_82546:
+    case e1000_82546_rev_3:
+    case e1000_82571:
+        if(E1000_READ_REG(hw, STATUS) & E1000_STATUS_FUNC_1)
             hw->perm_mac_addr[5] ^= 0x01;
+        break;
+    }
 
     for(i = 0; i < NODE_ADDRESS_SIZE; i++)
         hw->mac_addr[i] = hw->perm_mac_addr[i];
@@ -4385,6 +4467,12 @@ e1000_init_rx_addrs(struct e1000_hw *hw)
     e1000_rar_set(hw, hw->mac_addr, 0);
 
     rar_num = E1000_RAR_ENTRIES;
+
+    /* Reserve a spot for the Locally Administered Address to work around
+     * an 82571 issue in which a reset on one port will reload the MAC on
+     * the other port. */
+    if ((hw->mac_type == e1000_82571) && (hw->laa_is_present == TRUE))
+        rar_num -= 1;
     /* Zero out the other 15 receive addresses. */
     DEBUGOUT("Clearing RAR[1-15]\n");
     for(i = 1; i < rar_num; i++) {
@@ -4427,6 +4515,12 @@ e1000_mc_addr_list_update(struct e1000_hw *hw,
     /* Clear RAR[1-15] */
     DEBUGOUT(" Clearing RAR[1-15]\n");
     num_rar_entry = E1000_RAR_ENTRIES;
+    /* Reserve a spot for the Locally Administered Address to work around
+     * an 82571 issue in which a reset on one port will reload the MAC on
+     * the other port. */
+    if ((hw->mac_type == e1000_82571) && (hw->laa_is_present == TRUE))
+        num_rar_entry -= 1;
+
     for(i = rar_used_count; i < num_rar_entry; i++) {
         E1000_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
         E1000_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
@@ -4984,7 +5078,6 @@ e1000_clear_hw_cntrs(struct e1000_hw *hw)
     temp = E1000_READ_REG(hw, ICTXQEC);
     temp = E1000_READ_REG(hw, ICTXQMTC);
     temp = E1000_READ_REG(hw, ICRXDMTC);
-
 }
 
 /******************************************************************************
@@ -5151,6 +5244,8 @@ e1000_get_bus_info(struct e1000_hw *hw)
         hw->bus_speed = e1000_bus_speed_unknown;
         hw->bus_width = e1000_bus_width_unknown;
         break;
+    case e1000_82571:
+    case e1000_82572:
     case e1000_82573:
         hw->bus_type = e1000_bus_type_pci_express;
         hw->bus_speed = e1000_bus_speed_2500;
@@ -5250,6 +5345,7 @@ e1000_get_cable_length(struct e1000_hw *hw,
     int32_t ret_val;
     uint16_t agc_value = 0;
     uint16_t cur_agc, min_agc = IGP01E1000_AGC_LENGTH_TABLE_SIZE;
+    uint16_t max_agc = 0;
     uint16_t i, phy_data;
     uint16_t cable_length;
 
@@ -5338,6 +5434,40 @@ e1000_get_cable_length(struct e1000_hw *hw,
                        IGP01E1000_AGC_RANGE) : 0;
         *max_length = e1000_igp_cable_length_table[agc_value] +
                       IGP01E1000_AGC_RANGE;
+    } else if (hw->phy_type == e1000_phy_igp_2) {
+        uint16_t agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] =
+                                                         {IGP02E1000_PHY_AGC_A,
+                                                          IGP02E1000_PHY_AGC_B,
+                                                          IGP02E1000_PHY_AGC_C,
+                                                          IGP02E1000_PHY_AGC_D};
+        /* Read the AGC registers for all channels */
+        for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) {
+            ret_val = e1000_read_phy_reg(hw, agc_reg_array[i], &phy_data);
+            if (ret_val)
+                return ret_val;
+
+	    /* Getting bits 15:9, which represent the combination of course and
+             * fine gain values.  The result is a number that can be put into
+             * the lookup table to obtain the approximate cable length. */
+            cur_agc = (phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) &
+                      IGP02E1000_AGC_LENGTH_MASK;
+
+            /* Remove min & max AGC values from calculation. */
+            if (e1000_igp_2_cable_length_table[min_agc] > e1000_igp_2_cable_length_table[cur_agc])
+                min_agc = cur_agc;
+	    if (e1000_igp_2_cable_length_table[max_agc] < e1000_igp_2_cable_length_table[cur_agc])
+                max_agc = cur_agc;
+
+            agc_value += e1000_igp_2_cable_length_table[cur_agc];
+        }
+
+        agc_value -= (e1000_igp_2_cable_length_table[min_agc] + e1000_igp_2_cable_length_table[max_agc]);
+        agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2);
+
+        /* Calculate cable length with the error range of +/- 10 meters. */
+        *min_length = ((agc_value - IGP02E1000_AGC_RANGE) > 0) ?
+                       (agc_value - IGP02E1000_AGC_RANGE) : 0;
+        *max_length = agc_value + IGP02E1000_AGC_RANGE;
     }
 
     return E1000_SUCCESS;
@@ -6465,6 +6595,8 @@ e1000_get_auto_rd_done(struct e1000_hw *hw)
     default:
         msec_delay(5);
         break;
+    case e1000_82571:
+    case e1000_82572:
     case e1000_82573:
         while(timeout) {
             if (E1000_READ_REG(hw, EECD) & E1000_EECD_AUTO_RD) break;
@@ -6494,10 +6626,31 @@ e1000_get_auto_rd_done(struct e1000_hw *hw)
 int32_t
 e1000_get_phy_cfg_done(struct e1000_hw *hw)
 {
+    int32_t timeout = PHY_CFG_TIMEOUT;
+    uint32_t cfg_mask = E1000_EEPROM_CFG_DONE;
+
     DEBUGFUNC("e1000_get_phy_cfg_done");
 
-    /* Simply wait for 10ms */
-    msec_delay(10);
+    switch (hw->mac_type) {
+    default:
+        msec_delay(10);
+        break;
+    case e1000_82571:
+    case e1000_82572:
+        while (timeout) {
+            if (E1000_READ_REG(hw, EEMNGCTL) & cfg_mask)
+                break;
+            else
+                msec_delay(1);
+            timeout--;
+        }
+
+        if (!timeout) {
+            DEBUGOUT("MNG configuration cycle has not completed.\n");
+            return -E1000_ERR_RESET;
+        }
+        break;
+    }
 
     return E1000_SUCCESS;
 }
@@ -6569,8 +6722,7 @@ e1000_put_hw_eeprom_semaphore(struct e1000_hw *hw)
         return;
 
     swsm = E1000_READ_REG(hw, SWSM);
-    /* Release both semaphores. */
-    swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI);
+        swsm &= ~(E1000_SWSM_SWESMBI);
     E1000_WRITE_REG(hw, SWSM, swsm);
 }
 
@@ -6606,6 +6758,8 @@ e1000_arc_subsystem_valid(struct e1000_hw *hw)
      * if this is the case.  We read FWSM to determine the manageability mode.
      */
     switch (hw->mac_type) {
+    case e1000_82571:
+    case e1000_82572:
     case e1000_82573:
         fwsm = E1000_READ_REG(hw, FWSM);
         if((fwsm & E1000_FWSM_MODE_MASK) != 0)

+ 84 - 12
drivers/net/e1000/e1000_hw.h

@@ -57,6 +57,8 @@ typedef enum {
     e1000_82541_rev_2,
     e1000_82547,
     e1000_82547_rev_2,
+    e1000_82571,
+    e1000_82572,
     e1000_82573,
     e1000_num_macs
 } e1000_mac_type;
@@ -478,10 +480,16 @@ uint8_t e1000_arc_subsystem_valid(struct e1000_hw *hw);
 #define E1000_DEV_ID_82546GB_SERDES      0x107B
 #define E1000_DEV_ID_82546GB_PCIE        0x108A
 #define E1000_DEV_ID_82547EI             0x1019
+#define E1000_DEV_ID_82571EB_COPPER      0x105E
+#define E1000_DEV_ID_82571EB_FIBER       0x105F
+#define E1000_DEV_ID_82571EB_SERDES      0x1060
+#define E1000_DEV_ID_82572EI_COPPER      0x107D
+#define E1000_DEV_ID_82572EI_FIBER       0x107E
+#define E1000_DEV_ID_82572EI_SERDES      0x107F
 #define E1000_DEV_ID_82573E              0x108B
 #define E1000_DEV_ID_82573E_IAMT         0x108C
+#define E1000_DEV_ID_82573L              0x109A
 
-#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099
 
 #define NODE_ADDRESS_SIZE 6
 #define ETH_LENGTH_OF_ADDRESS 6
@@ -833,6 +841,8 @@ struct e1000_ffvt_entry {
 #define E1000_FFMT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
 #define E1000_FFVT_SIZE E1000_FLEXIBLE_FILTER_SIZE_MAX
 
+#define E1000_DISABLE_SERDES_LOOPBACK   0x0400
+
 /* Register Set. (82543, 82544)
  *
  * Registers are defined to be 32 bits and  should be accessed as 32 bit values.
@@ -853,6 +863,7 @@ struct e1000_ffvt_entry {
 #define E1000_CTRL_EXT 0x00018  /* Extended Device Control - RW */
 #define E1000_FLA      0x0001C  /* Flash Access - RW */
 #define E1000_MDIC     0x00020  /* MDI Control - RW */
+#define E1000_SCTL     0x00024  /* SerDes Control - RW */
 #define E1000_FCAL     0x00028  /* Flow Control Address Low - RW */
 #define E1000_FCAH     0x0002C  /* Flow Control Address High -RW */
 #define E1000_FCT      0x00030  /* Flow Control Type - RW */
@@ -864,6 +875,12 @@ struct e1000_ffvt_entry {
 #define E1000_IMC      0x000D8  /* Interrupt Mask Clear - WO */
 #define E1000_IAM      0x000E0  /* Interrupt Acknowledge Auto Mask */
 #define E1000_RCTL     0x00100  /* RX Control - RW */
+#define E1000_RDTR1    0x02820  /* RX Delay Timer (1) - RW */
+#define E1000_RDBAL1   0x02900  /* RX Descriptor Base Address Low (1) - RW */
+#define E1000_RDBAH1   0x02904  /* RX Descriptor Base Address High (1) - RW */
+#define E1000_RDLEN1   0x02908  /* RX Descriptor Length (1) - RW */
+#define E1000_RDH1     0x02910  /* RX Descriptor Head (1) - RW */
+#define E1000_RDT1     0x02918  /* RX Descriptor Tail (1) - RW */
 #define E1000_FCTTV    0x00170  /* Flow Control Transmit Timer Value - RW */
 #define E1000_TXCW     0x00178  /* TX Configuration Word - RW */
 #define E1000_RXCW     0x00180  /* RX Configuration Word - RO */
@@ -895,6 +912,12 @@ struct e1000_ffvt_entry {
 #define E1000_RDH      0x02810  /* RX Descriptor Head - RW */
 #define E1000_RDT      0x02818  /* RX Descriptor Tail - RW */
 #define E1000_RDTR     0x02820  /* RX Delay Timer - RW */
+#define E1000_RDBAL0   E1000_RDBAL /* RX Desc Base Address Low (0) - RW */
+#define E1000_RDBAH0   E1000_RDBAH /* RX Desc Base Address High (0) - RW */
+#define E1000_RDLEN0   E1000_RDLEN /* RX Desc Length (0) - RW */
+#define E1000_RDH0     E1000_RDH   /* RX Desc Head (0) - RW */
+#define E1000_RDT0     E1000_RDT   /* RX Desc Tail (0) - RW */
+#define E1000_RDTR0    E1000_RDTR  /* RX Delay Timer (0) - RW */
 #define E1000_RXDCTL   0x02828  /* RX Descriptor Control - RW */
 #define E1000_RADV     0x0282C  /* RX Interrupt Absolute Delay Timer - RW */
 #define E1000_RSRPD    0x02C00  /* RX Small Packet Detect - RW */
@@ -980,15 +1003,15 @@ struct e1000_ffvt_entry {
 #define E1000_BPTC     0x040F4  /* Broadcast Packets TX Count - R/clr */
 #define E1000_TSCTC    0x040F8  /* TCP Segmentation Context TX - R/clr */
 #define E1000_TSCTFC   0x040FC  /* TCP Segmentation Context TX Fail - R/clr */
-#define E1000_IAC       0x4100  /* Interrupt Assertion Count */
-#define E1000_ICRXPTC   0x4104  /* Interrupt Cause Rx Packet Timer Expire Count */
-#define E1000_ICRXATC   0x4108  /* Interrupt Cause Rx Absolute Timer Expire Count */
-#define E1000_ICTXPTC   0x410C  /* Interrupt Cause Tx Packet Timer Expire Count */
-#define E1000_ICTXATC   0x4110  /* Interrupt Cause Tx Absolute Timer Expire Count */
-#define E1000_ICTXQEC   0x4118  /* Interrupt Cause Tx Queue Empty Count */
-#define E1000_ICTXQMTC  0x411C  /* Interrupt Cause Tx Queue Minimum Threshold Count */
-#define E1000_ICRXDMTC  0x4120  /* Interrupt Cause Rx Descriptor Minimum Threshold Count */
-#define E1000_ICRXOC    0x4124  /* Interrupt Cause Receiver Overrun Count */
+#define E1000_IAC      0x04100  /* Interrupt Assertion Count */
+#define E1000_ICRXPTC  0x04104  /* Interrupt Cause Rx Packet Timer Expire Count */
+#define E1000_ICRXATC  0x04108  /* Interrupt Cause Rx Absolute Timer Expire Count */
+#define E1000_ICTXPTC  0x0410C  /* Interrupt Cause Tx Packet Timer Expire Count */
+#define E1000_ICTXATC  0x04110  /* Interrupt Cause Tx Absolute Timer Expire Count */
+#define E1000_ICTXQEC  0x04118  /* Interrupt Cause Tx Queue Empty Count */
+#define E1000_ICTXQMTC 0x0411C  /* Interrupt Cause Tx Queue Minimum Threshold Count */
+#define E1000_ICRXDMTC 0x04120  /* Interrupt Cause Rx Descriptor Minimum Threshold Count */
+#define E1000_ICRXOC   0x04124  /* Interrupt Cause Receiver Overrun Count */
 #define E1000_RXCSUM   0x05000  /* RX Checksum Control - RW */
 #define E1000_RFCTL    0x05008  /* Receive Filter Control*/
 #define E1000_MTA      0x05200  /* Multicast Table Array - RW Array */
@@ -1018,6 +1041,14 @@ struct e1000_ffvt_entry {
 #define E1000_FWSM      0x05B54 /* FW Semaphore */
 #define E1000_FFLT_DBG  0x05F04 /* Debug Register */
 #define E1000_HICR      0x08F00 /* Host Inteface Control */
+
+/* RSS registers */
+#define E1000_CPUVEC    0x02C10 /* CPU Vector Register - RW */
+#define E1000_MRQC      0x05818 /* Multiple Receive Control - RW */
+#define E1000_RETA      0x05C00 /* Redirection Table - RW Array */
+#define E1000_RSSRK     0x05C80 /* RSS Random Key - RW Array */
+#define E1000_RSSIM     0x05864 /* RSS Interrupt Mask */
+#define E1000_RSSIR     0x05868 /* RSS Interrupt Request */
 /* Register Set (82542)
  *
  * Some of the 82542 registers are located at different offsets than they are
@@ -1032,6 +1063,7 @@ struct e1000_ffvt_entry {
 #define E1000_82542_CTRL_EXT E1000_CTRL_EXT
 #define E1000_82542_FLA      E1000_FLA
 #define E1000_82542_MDIC     E1000_MDIC
+#define E1000_82542_SCTL     E1000_SCTL
 #define E1000_82542_FCAL     E1000_FCAL
 #define E1000_82542_FCAH     E1000_FCAH
 #define E1000_82542_FCT      E1000_FCT
@@ -1049,6 +1081,18 @@ struct e1000_ffvt_entry {
 #define E1000_82542_RDLEN    0x00118
 #define E1000_82542_RDH      0x00120
 #define E1000_82542_RDT      0x00128
+#define E1000_82542_RDTR0    E1000_82542_RDTR
+#define E1000_82542_RDBAL0   E1000_82542_RDBAL
+#define E1000_82542_RDBAH0   E1000_82542_RDBAH
+#define E1000_82542_RDLEN0   E1000_82542_RDLEN
+#define E1000_82542_RDH0     E1000_82542_RDH
+#define E1000_82542_RDT0     E1000_82542_RDT
+#define E1000_82542_RDTR1    0x00130
+#define E1000_82542_RDBAL1   0x00138
+#define E1000_82542_RDBAH1   0x0013C
+#define E1000_82542_RDLEN1   0x00140
+#define E1000_82542_RDH1     0x00148
+#define E1000_82542_RDT1     0x00150
 #define E1000_82542_FCRTH    0x00160
 #define E1000_82542_FCRTL    0x00168
 #define E1000_82542_FCTTV    E1000_FCTTV
@@ -1197,6 +1241,13 @@ struct e1000_ffvt_entry {
 #define E1000_82542_ICRXOC      E1000_ICRXOC
 #define E1000_82542_HICR        E1000_HICR
 
+#define E1000_82542_CPUVEC      E1000_CPUVEC
+#define E1000_82542_MRQC        E1000_MRQC
+#define E1000_82542_RETA        E1000_RETA
+#define E1000_82542_RSSRK       E1000_RSSRK
+#define E1000_82542_RSSIM       E1000_RSSIM
+#define E1000_82542_RSSIR       E1000_RSSIR
+
 /* Statistics counters collected by the MAC */
 struct e1000_hw_stats {
     uint64_t crcerrs;
@@ -1336,6 +1387,7 @@ struct e1000_hw {
     boolean_t serdes_link_down;
     boolean_t tbi_compatibility_en;
     boolean_t tbi_compatibility_on;
+    boolean_t laa_is_present;
     boolean_t phy_reset_disable;
     boolean_t fc_send_xon;
     boolean_t fc_strict_ieee;
@@ -1374,6 +1426,7 @@ struct e1000_hw {
 #define E1000_CTRL_BEM32    0x00000400  /* Big Endian 32 mode */
 #define E1000_CTRL_FRCSPD   0x00000800  /* Force Speed */
 #define E1000_CTRL_FRCDPX   0x00001000  /* Force Duplex */
+#define E1000_CTRL_D_UD_EN  0x00002000  /* Dock/Undock enable */
 #define E1000_CTRL_D_UD_POLARITY 0x00004000 /* Defined polarity of Dock/Undock indication in SDP[0] */
 #define E1000_CTRL_SWDPIN0  0x00040000  /* SWDPIN 0 value */
 #define E1000_CTRL_SWDPIN1  0x00080000  /* SWDPIN 1 value */
@@ -1491,6 +1544,8 @@ struct e1000_hw {
 #define E1000_CTRL_EXT_WR_WMARK_320   0x01000000
 #define E1000_CTRL_EXT_WR_WMARK_384   0x02000000
 #define E1000_CTRL_EXT_WR_WMARK_448   0x03000000
+#define E1000_CTRL_EXT_CANC           0x04000000  /* Interrupt delay cancellation */
+#define E1000_CTRL_EXT_DRV_LOAD       0x10000000  /* Driver loaded bit for FW */
 #define E1000_CTRL_EXT_IAME           0x08000000  /* Interrupt acknowledge Auto-mask */
 #define E1000_CTRL_EXT_INT_TIMER_CLR  0x20000000  /* Clear Interrupt timers after IMS clear */
 
@@ -1524,6 +1579,7 @@ struct e1000_hw {
 #define E1000_LEDCTL_LED2_BLINK           0x00800000
 #define E1000_LEDCTL_LED3_MODE_MASK       0x0F000000
 #define E1000_LEDCTL_LED3_MODE_SHIFT      24
+#define E1000_LEDCTL_LED3_BLINK_RATE      0x20000000
 #define E1000_LEDCTL_LED3_IVRT            0x40000000
 #define E1000_LEDCTL_LED3_BLINK           0x80000000
 
@@ -1784,6 +1840,16 @@ struct e1000_hw {
 #define E1000_RXCSUM_IPPCSE    0x00001000   /* IP payload checksum enable */
 #define E1000_RXCSUM_PCSD      0x00002000   /* packet checksum disabled */
 
+/* Multiple Receive Queue Control */
+#define E1000_MRQC_ENABLE_MASK              0x00000003
+#define E1000_MRQC_ENABLE_RSS_2Q            0x00000001
+#define E1000_MRQC_ENABLE_RSS_INT           0x00000004
+#define E1000_MRQC_RSS_FIELD_MASK           0xFFFF0000
+#define E1000_MRQC_RSS_FIELD_IPV4_TCP       0x00010000
+#define E1000_MRQC_RSS_FIELD_IPV4           0x00020000
+#define E1000_MRQC_RSS_FIELD_IPV6_TCP       0x00040000
+#define E1000_MRQC_RSS_FIELD_IPV6_EX        0x00080000
+#define E1000_MRQC_RSS_FIELD_IPV6           0x00100000
 
 /* Definitions for power management and wakeup registers */
 /* Wake Up Control */
@@ -1928,6 +1994,7 @@ struct e1000_host_command_info {
 #define E1000_MDALIGN          4096
 
 #define E1000_GCR_BEM32                 0x00400000
+#define E1000_GCR_L1_ACT_WITHOUT_L0S_RX 0x08000000
 /* Function Active and Power State to MNG */
 #define E1000_FACTPS_FUNC0_POWER_STATE_MASK         0x00000003
 #define E1000_FACTPS_LAN0_VALID                     0x00000004
@@ -1980,6 +2047,7 @@ struct e1000_host_command_info {
 /* EEPROM Word Offsets */
 #define EEPROM_COMPAT                 0x0003
 #define EEPROM_ID_LED_SETTINGS        0x0004
+#define EEPROM_VERSION                0x0005
 #define EEPROM_SERDES_AMPLITUDE       0x0006 /* For SERDES output amplitude adjustment. */
 #define EEPROM_PHY_CLASS_WORD         0x0007
 #define EEPROM_INIT_CONTROL1_REG      0x000A
@@ -1990,6 +2058,8 @@ struct e1000_host_command_info {
 #define EEPROM_FLASH_VERSION          0x0032
 #define EEPROM_CHECKSUM_REG           0x003F
 
+#define E1000_EEPROM_CFG_DONE         0x00040000   /* MNG config cycle done */
+
 /* Word definitions for ID LED Settings */
 #define ID_LED_RESERVED_0000 0x0000
 #define ID_LED_RESERVED_FFFF 0xFFFF
@@ -2108,6 +2178,8 @@ struct e1000_host_command_info {
 #define E1000_PBA_22K 0x0016
 #define E1000_PBA_24K 0x0018
 #define E1000_PBA_30K 0x001E
+#define E1000_PBA_32K 0x0020
+#define E1000_PBA_38K 0x0026
 #define E1000_PBA_40K 0x0028
 #define E1000_PBA_48K 0x0030    /* 48KB, default RX allocation */
 
@@ -2592,11 +2664,11 @@ struct e1000_host_command_info {
 
 /* 7 bits (3 Coarse + 4 Fine) --> 128 optional values */
 #define IGP01E1000_AGC_LENGTH_TABLE_SIZE 128
-#define IGP02E1000_AGC_LENGTH_TABLE_SIZE 128
+#define IGP02E1000_AGC_LENGTH_TABLE_SIZE 113
 
 /* The precision error of the cable length is +/- 10 meters */
 #define IGP01E1000_AGC_RANGE    10
-#define IGP02E1000_AGC_RANGE    10
+#define IGP02E1000_AGC_RANGE    15
 
 /* IGP01E1000 PCS Initialization register */
 /* bits 3:6 in the PCS registers stores the channels polarity */

File diff suppressed because it is too large
+ 496 - 112
drivers/net/e1000/e1000_main.c


+ 8 - 2
drivers/net/e1000/e1000_param.c

@@ -306,7 +306,8 @@ e1000_check_options(struct e1000_adapter *adapter)
 			.def  = E1000_DEFAULT_TXD,
 			.arg  = { .r = { .min = E1000_MIN_TXD }}
 		};
-		struct e1000_desc_ring *tx_ring = &adapter->tx_ring;
+		struct e1000_tx_ring *tx_ring = adapter->tx_ring;
+		int i;
 		e1000_mac_type mac_type = adapter->hw.mac_type;
 		opt.arg.r.max = mac_type < e1000_82544 ?
 			E1000_MAX_TXD : E1000_MAX_82544_TXD;
@@ -319,6 +320,8 @@ e1000_check_options(struct e1000_adapter *adapter)
 		} else {
 			tx_ring->count = opt.def;
 		}
+		for (i = 0; i < adapter->num_queues; i++)
+			tx_ring[i].count = tx_ring->count;
 	}
 	{ /* Receive Descriptor Count */
 		struct e1000_option opt = {
@@ -329,7 +332,8 @@ e1000_check_options(struct e1000_adapter *adapter)
 			.def  = E1000_DEFAULT_RXD,
 			.arg  = { .r = { .min = E1000_MIN_RXD }}
 		};
-		struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
+		struct e1000_rx_ring *rx_ring = adapter->rx_ring;
+		int i;
 		e1000_mac_type mac_type = adapter->hw.mac_type;
 		opt.arg.r.max = mac_type < e1000_82544 ? E1000_MAX_RXD :
 			E1000_MAX_82544_RXD;
@@ -342,6 +346,8 @@ e1000_check_options(struct e1000_adapter *adapter)
 		} else {
 			rx_ring->count = opt.def;
 		}
+		for (i = 0; i < adapter->num_queues; i++)
+			rx_ring[i].count = rx_ring->count;
 	}
 	{ /* Checksum Offload Enable/Disable */
 		struct e1000_option opt = {

+ 2 - 2
drivers/net/epic100.c

@@ -1334,7 +1334,7 @@ static void epic_rx_err(struct net_device *dev, struct epic_private *ep)
 static int epic_poll(struct net_device *dev, int *budget)
 {
 	struct epic_private *ep = dev->priv;
-	int work_done, orig_budget;
+	int work_done = 0, orig_budget;
 	long ioaddr = dev->base_addr;
 
 	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
@@ -1343,7 +1343,7 @@ rx_action:
 
 	epic_tx(dev, ep);
 
-	work_done = epic_rx(dev, *budget);
+	work_done += epic_rx(dev, *budget);
 
 	epic_rx_err(dev, ep);
 

+ 202 - 108
drivers/net/forcedeth.c

@@ -95,6 +95,8 @@
  *			   of nv_remove
  *      0.42: 06 Aug 2005: Fix lack of link speed initialization
  *			   in the second (and later) nv_open call
+ *      0.43: 10 Aug 2005: Add support for tx checksum.
+ *      0.44: 20 Aug 2005: Add support for scatter gather and segmentation.
  *
  * Known bugs:
  * We suspect that on some hardware no TX done interrupts are generated.
@@ -106,7 +108,7 @@
  * DEV_NEED_TIMERIRQ will not harm you on sane hardware, only generating a few
  * superfluous timer interrupts from the nic.
  */
-#define FORCEDETH_VERSION		"0.41"
+#define FORCEDETH_VERSION		"0.44"
 #define DRV_NAME			"forcedeth"
 
 #include <linux/module.h>
@@ -145,6 +147,7 @@
 #define DEV_NEED_LINKTIMER	0x0002	/* poll link settings. Relies on the timer irq */
 #define DEV_HAS_LARGEDESC	0x0004	/* device supports jumbo frames and needs packet format 2 */
 #define DEV_HAS_HIGH_DMA        0x0008  /* device supports 64bit dma */
+#define DEV_HAS_CHECKSUM        0x0010  /* device supports tx and rx checksum offloads */
 
 enum {
 	NvRegIrqStatus = 0x000,
@@ -241,6 +244,9 @@ enum {
 #define NVREG_TXRXCTL_IDLE	0x0008
 #define NVREG_TXRXCTL_RESET	0x0010
 #define NVREG_TXRXCTL_RXCHECK	0x0400
+#define NVREG_TXRXCTL_DESC_1	0
+#define NVREG_TXRXCTL_DESC_2	0x02100
+#define NVREG_TXRXCTL_DESC_3	0x02200
 	NvRegMIIStatus = 0x180,
 #define NVREG_MIISTAT_ERROR		0x0001
 #define NVREG_MIISTAT_LINKCHANGE	0x0008
@@ -335,6 +341,10 @@ typedef union _ring_type {
 /* error and valid are the same for both */
 #define NV_TX2_ERROR		(1<<30)
 #define NV_TX2_VALID		(1<<31)
+#define NV_TX2_TSO		(1<<28)
+#define NV_TX2_TSO_SHIFT	14
+#define NV_TX2_CHECKSUM_L3	(1<<27)
+#define NV_TX2_CHECKSUM_L4	(1<<26)
 
 #define NV_RX_DESCRIPTORVALID	(1<<16)
 #define NV_RX_MISSEDFRAME	(1<<17)
@@ -417,14 +427,14 @@ typedef union _ring_type {
 
 /* 
  * desc_ver values:
- * This field has two purposes:
- * - Newer nics uses a different ring layout. The layout is selected by
- *   comparing np->desc_ver with DESC_VER_xy.
- * - It contains bits that are forced on when writing to NvRegTxRxControl.
+ * The nic supports three different descriptor types:
+ * - DESC_VER_1: Original
+ * - DESC_VER_2: support for jumbo frames.
+ * - DESC_VER_3: 64-bit format.
  */
-#define DESC_VER_1	0x0
-#define DESC_VER_2	(0x02100|NVREG_TXRXCTL_RXCHECK)
-#define DESC_VER_3      (0x02200|NVREG_TXRXCTL_RXCHECK)
+#define DESC_VER_1	1
+#define DESC_VER_2	2
+#define DESC_VER_3	3
 
 /* PHY defines */
 #define PHY_OUI_MARVELL	0x5043
@@ -491,6 +501,7 @@ struct fe_priv {
 	u32 orig_mac[2];
 	u32 irqmask;
 	u32 desc_ver;
+	u32 txrxctl_bits;
 
 	void __iomem *base;
 
@@ -534,7 +545,7 @@ static inline struct fe_priv *get_nvpriv(struct net_device *dev)
 
 static inline u8 __iomem *get_hwbase(struct net_device *dev)
 {
-	return get_nvpriv(dev)->base;
+	return ((struct fe_priv *)netdev_priv(dev))->base;
 }
 
 static inline void pci_push(u8 __iomem *base)
@@ -623,7 +634,7 @@ static int mii_rw(struct net_device *dev, int addr, int miireg, int value)
 
 static int phy_reset(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u32 miicontrol;
 	unsigned int tries = 0;
 
@@ -726,7 +737,7 @@ static int phy_init(struct net_device *dev)
 
 static void nv_start_rx(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 
 	dprintk(KERN_DEBUG "%s: nv_start_rx\n", dev->name);
@@ -782,14 +793,14 @@ static void nv_stop_tx(struct net_device *dev)
 
 static void nv_txrx_reset(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 
 	dprintk(KERN_DEBUG "%s: nv_txrx_reset\n", dev->name);
-	writel(NVREG_TXRXCTL_BIT2 | NVREG_TXRXCTL_RESET | np->desc_ver, base + NvRegTxRxControl);
+	writel(NVREG_TXRXCTL_BIT2 | NVREG_TXRXCTL_RESET | np->txrxctl_bits, base + NvRegTxRxControl);
 	pci_push(base);
 	udelay(NV_TXRX_RESET_DELAY);
-	writel(NVREG_TXRXCTL_BIT2 | np->desc_ver, base + NvRegTxRxControl);
+	writel(NVREG_TXRXCTL_BIT2 | np->txrxctl_bits, base + NvRegTxRxControl);
 	pci_push(base);
 }
 
@@ -801,7 +812,7 @@ static void nv_txrx_reset(struct net_device *dev)
  */
 static struct net_device_stats *nv_get_stats(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 
 	/* It seems that the nic always generates interrupts and doesn't
 	 * accumulate errors internally. Thus the current values in np->stats
@@ -817,7 +828,7 @@ static struct net_device_stats *nv_get_stats(struct net_device *dev)
  */
 static int nv_alloc_rx(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	unsigned int refill_rx = np->refill_rx;
 	int nr;
 
@@ -861,7 +872,7 @@ static int nv_alloc_rx(struct net_device *dev)
 static void nv_do_rx_refill(unsigned long data)
 {
 	struct net_device *dev = (struct net_device *) data;
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 
 	disable_irq(dev->irq);
 	if (nv_alloc_rx(dev)) {
@@ -875,7 +886,7 @@ static void nv_do_rx_refill(unsigned long data)
 
 static void nv_init_rx(struct net_device *dev) 
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	int i;
 
 	np->cur_rx = RX_RING;
@@ -889,15 +900,17 @@ static void nv_init_rx(struct net_device *dev)
 
 static void nv_init_tx(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	int i;
 
 	np->next_tx = np->nic_tx = 0;
-	for (i = 0; i < TX_RING; i++)
+	for (i = 0; i < TX_RING; i++) {
 		if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
 			np->tx_ring.orig[i].FlagLen = 0;
 	        else
 			np->tx_ring.ex[i].FlagLen = 0;
+		np->tx_skbuff[i] = NULL;
+	}
 }
 
 static int nv_init_ring(struct net_device *dev)
@@ -907,21 +920,44 @@ static int nv_init_ring(struct net_device *dev)
 	return nv_alloc_rx(dev);
 }
 
+static void nv_release_txskb(struct net_device *dev, unsigned int skbnr)
+{
+	struct fe_priv *np = netdev_priv(dev);
+	struct sk_buff *skb = np->tx_skbuff[skbnr];
+	unsigned int j, entry, fragments;
+			
+	dprintk(KERN_INFO "%s: nv_release_txskb for skbnr %d, skb %p\n",
+		dev->name, skbnr, np->tx_skbuff[skbnr]);
+	
+	entry = skbnr;
+	if ((fragments = skb_shinfo(skb)->nr_frags) != 0) {
+		for (j = fragments; j >= 1; j--) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[j-1];
+			pci_unmap_page(np->pci_dev, np->tx_dma[entry],
+				       frag->size,
+				       PCI_DMA_TODEVICE);
+			entry = (entry - 1) % TX_RING;
+		}
+	}
+	pci_unmap_single(np->pci_dev, np->tx_dma[entry],
+			 skb->len - skb->data_len,
+			 PCI_DMA_TODEVICE);
+	dev_kfree_skb_irq(skb);
+	np->tx_skbuff[skbnr] = NULL;
+}
+
 static void nv_drain_tx(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
-	int i;
+	struct fe_priv *np = netdev_priv(dev);
+	unsigned int i;
+	
 	for (i = 0; i < TX_RING; i++) {
 		if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
 			np->tx_ring.orig[i].FlagLen = 0;
 		else
 			np->tx_ring.ex[i].FlagLen = 0;
 		if (np->tx_skbuff[i]) {
-			pci_unmap_single(np->pci_dev, np->tx_dma[i],
-						np->tx_skbuff[i]->len,
-						PCI_DMA_TODEVICE);
-			dev_kfree_skb(np->tx_skbuff[i]);
-			np->tx_skbuff[i] = NULL;
+			nv_release_txskb(dev, i);
 			np->stats.tx_dropped++;
 		}
 	}
@@ -929,7 +965,7 @@ static void nv_drain_tx(struct net_device *dev)
 
 static void nv_drain_rx(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	int i;
 	for (i = 0; i < RX_RING; i++) {
 		if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
@@ -959,28 +995,69 @@ static void drain_ring(struct net_device *dev)
  */
 static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
-	int nr = np->next_tx % TX_RING;
+	struct fe_priv *np = netdev_priv(dev);
+	u32 tx_flags_extra = (np->desc_ver == DESC_VER_1 ? NV_TX_LASTPACKET : NV_TX2_LASTPACKET);
+	unsigned int fragments = skb_shinfo(skb)->nr_frags;
+	unsigned int nr = (np->next_tx + fragments) % TX_RING;
+	unsigned int i;
+
+	spin_lock_irq(&np->lock);
+
+	if ((np->next_tx - np->nic_tx + fragments) > TX_LIMIT_STOP) {
+		spin_unlock_irq(&np->lock);
+		netif_stop_queue(dev);
+		return NETDEV_TX_BUSY;
+	}
 
 	np->tx_skbuff[nr] = skb;
-	np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data,skb->len,
-					PCI_DMA_TODEVICE);
+	
+	if (fragments) {
+		dprintk(KERN_DEBUG "%s: nv_start_xmit: buffer contains %d fragments\n", dev->name, fragments);
+		/* setup descriptors in reverse order */
+		for (i = fragments; i >= 1; i--) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+			np->tx_dma[nr] = pci_map_page(np->pci_dev, frag->page, frag->page_offset, frag->size,
+							PCI_DMA_TODEVICE);
 
-	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+			if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+				np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]);
+				np->tx_ring.orig[nr].FlagLen = cpu_to_le32( (frag->size-1) | np->tx_flags | tx_flags_extra);
+			} else {
+				np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32;
+				np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF;
+				np->tx_ring.ex[nr].FlagLen = cpu_to_le32( (frag->size-1) | np->tx_flags | tx_flags_extra);
+			}
+			
+			nr = (nr - 1) % TX_RING;
+
+			if (np->desc_ver == DESC_VER_1)
+				tx_flags_extra &= ~NV_TX_LASTPACKET;
+			else
+				tx_flags_extra &= ~NV_TX2_LASTPACKET;		
+		}
+	}
+
+#ifdef NETIF_F_TSO
+	if (skb_shinfo(skb)->tso_size)
+		tx_flags_extra |= NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT);
+	else
+#endif
+	tx_flags_extra |= (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
+
+	np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data, skb->len-skb->data_len,
+					PCI_DMA_TODEVICE);
+	
+	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
 		np->tx_ring.orig[nr].PacketBuffer = cpu_to_le32(np->tx_dma[nr]);
-	else {
+		np->tx_ring.orig[nr].FlagLen = cpu_to_le32( (skb->len-skb->data_len-1) | np->tx_flags | tx_flags_extra);
+	} else {
 		np->tx_ring.ex[nr].PacketBufferHigh = cpu_to_le64(np->tx_dma[nr]) >> 32;
 		np->tx_ring.ex[nr].PacketBufferLow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF;
-	}
+		np->tx_ring.ex[nr].FlagLen = cpu_to_le32( (skb->len-skb->data_len-1) | np->tx_flags | tx_flags_extra);
+	}	
 
-	spin_lock_irq(&np->lock);
-	wmb();
-	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
-		np->tx_ring.orig[nr].FlagLen = cpu_to_le32( (skb->len-1) | np->tx_flags );
-	else
-		np->tx_ring.ex[nr].FlagLen = cpu_to_le32( (skb->len-1) | np->tx_flags );
-	dprintk(KERN_DEBUG "%s: nv_start_xmit: packet packet %d queued for transmission.\n",
-				dev->name, np->next_tx);
+	dprintk(KERN_DEBUG "%s: nv_start_xmit: packet packet %d queued for transmission. tx_flags_extra: %x\n",
+				dev->name, np->next_tx, tx_flags_extra);
 	{
 		int j;
 		for (j=0; j<64; j++) {
@@ -991,15 +1068,13 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		dprintk("\n");
 	}
 
-	np->next_tx++;
+	np->next_tx += 1 + fragments;
 
 	dev->trans_start = jiffies;
-	if (np->next_tx - np->nic_tx >= TX_LIMIT_STOP)
-		netif_stop_queue(dev);
 	spin_unlock_irq(&np->lock);
-	writel(NVREG_TXRXCTL_KICK|np->desc_ver, get_hwbase(dev) + NvRegTxRxControl);
+	writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
 	pci_push(get_hwbase(dev));
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 /*
@@ -1009,9 +1084,10 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
  */
 static void nv_tx_done(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u32 Flags;
-	int i;
+	unsigned int i;
+	struct sk_buff *skb;
 
 	while (np->nic_tx != np->next_tx) {
 		i = np->nic_tx % TX_RING;
@@ -1026,35 +1102,38 @@ static void nv_tx_done(struct net_device *dev)
 		if (Flags & NV_TX_VALID)
 			break;
 		if (np->desc_ver == DESC_VER_1) {
-			if (Flags & (NV_TX_RETRYERROR|NV_TX_CARRIERLOST|NV_TX_LATECOLLISION|
-							NV_TX_UNDERFLOW|NV_TX_ERROR)) {
-				if (Flags & NV_TX_UNDERFLOW)
-					np->stats.tx_fifo_errors++;
-				if (Flags & NV_TX_CARRIERLOST)
-					np->stats.tx_carrier_errors++;
-				np->stats.tx_errors++;
-			} else {
-				np->stats.tx_packets++;
-				np->stats.tx_bytes += np->tx_skbuff[i]->len;
+			if (Flags & NV_TX_LASTPACKET) {
+				skb = np->tx_skbuff[i];
+				if (Flags & (NV_TX_RETRYERROR|NV_TX_CARRIERLOST|NV_TX_LATECOLLISION|
+					     NV_TX_UNDERFLOW|NV_TX_ERROR)) {
+					if (Flags & NV_TX_UNDERFLOW)
+						np->stats.tx_fifo_errors++;
+					if (Flags & NV_TX_CARRIERLOST)
+						np->stats.tx_carrier_errors++;
+					np->stats.tx_errors++;
+				} else {
+					np->stats.tx_packets++;
+					np->stats.tx_bytes += skb->len;
+				}
+				nv_release_txskb(dev, i);
 			}
 		} else {
-			if (Flags & (NV_TX2_RETRYERROR|NV_TX2_CARRIERLOST|NV_TX2_LATECOLLISION|
-							NV_TX2_UNDERFLOW|NV_TX2_ERROR)) {
-				if (Flags & NV_TX2_UNDERFLOW)
-					np->stats.tx_fifo_errors++;
-				if (Flags & NV_TX2_CARRIERLOST)
-					np->stats.tx_carrier_errors++;
-				np->stats.tx_errors++;
-			} else {
-				np->stats.tx_packets++;
-				np->stats.tx_bytes += np->tx_skbuff[i]->len;
+			if (Flags & NV_TX2_LASTPACKET) {
+				skb = np->tx_skbuff[i];
+				if (Flags & (NV_TX2_RETRYERROR|NV_TX2_CARRIERLOST|NV_TX2_LATECOLLISION|
+					     NV_TX2_UNDERFLOW|NV_TX2_ERROR)) {
+					if (Flags & NV_TX2_UNDERFLOW)
+						np->stats.tx_fifo_errors++;
+					if (Flags & NV_TX2_CARRIERLOST)
+						np->stats.tx_carrier_errors++;
+					np->stats.tx_errors++;
+				} else {
+					np->stats.tx_packets++;
+					np->stats.tx_bytes += skb->len;
+				}				
+				nv_release_txskb(dev, i);
 			}
 		}
-		pci_unmap_single(np->pci_dev, np->tx_dma[i],
-					np->tx_skbuff[i]->len,
-					PCI_DMA_TODEVICE);
-		dev_kfree_skb_irq(np->tx_skbuff[i]);
-		np->tx_skbuff[i] = NULL;
 		np->nic_tx++;
 	}
 	if (np->next_tx - np->nic_tx < TX_LIMIT_START)
@@ -1067,7 +1146,7 @@ static void nv_tx_done(struct net_device *dev)
  */
 static void nv_tx_timeout(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 
 	printk(KERN_INFO "%s: Got tx_timeout. irq: %08x\n", dev->name,
@@ -1200,7 +1279,7 @@ static int nv_getlen(struct net_device *dev, void *packet, int datalen)
 
 static void nv_rx_process(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u32 Flags;
 
 	for (;;) {
@@ -1355,7 +1434,7 @@ static void set_bufsize(struct net_device *dev)
  */
 static int nv_change_mtu(struct net_device *dev, int new_mtu)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	int old_mtu;
 
 	if (new_mtu < 64 || new_mtu > np->pkt_limit)
@@ -1408,7 +1487,7 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
 		writel( ((RX_RING-1) << NVREG_RINGSZ_RXSHIFT) + ((TX_RING-1) << NVREG_RINGSZ_TXSHIFT),
 			base + NvRegRingSizes);
 		pci_push(base);
-		writel(NVREG_TXRXCTL_KICK|np->desc_ver, get_hwbase(dev) + NvRegTxRxControl);
+		writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
 		pci_push(base);
 
 		/* restart rx engine */
@@ -1440,7 +1519,7 @@ static void nv_copy_mac_to_hw(struct net_device *dev)
  */
 static int nv_set_mac_address(struct net_device *dev, void *addr)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	struct sockaddr *macaddr = (struct sockaddr*)addr;
 
 	if(!is_valid_ether_addr(macaddr->sa_data))
@@ -1475,7 +1554,7 @@ static int nv_set_mac_address(struct net_device *dev, void *addr)
  */
 static void nv_set_multicast(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	u32 addr[2];
 	u32 mask[2];
@@ -1535,7 +1614,7 @@ static void nv_set_multicast(struct net_device *dev)
 
 static int nv_update_linkspeed(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	int adv, lpa;
 	int newls = np->linkspeed;
@@ -1705,7 +1784,7 @@ static void nv_link_irq(struct net_device *dev)
 static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs)
 {
 	struct net_device *dev = (struct net_device *) data;
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	u32 events;
 	int i;
@@ -1777,7 +1856,7 @@ static irqreturn_t nv_nic_irq(int foo, void *data, struct pt_regs *regs)
 static void nv_do_nic_poll(unsigned long data)
 {
 	struct net_device *dev = (struct net_device *) data;
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 
 	disable_irq(dev->irq);
@@ -1801,7 +1880,7 @@ static void nv_poll_controller(struct net_device *dev)
 
 static void nv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	strcpy(info->driver, "forcedeth");
 	strcpy(info->version, FORCEDETH_VERSION);
 	strcpy(info->bus_info, pci_name(np->pci_dev));
@@ -1809,7 +1888,7 @@ static void nv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 
 static void nv_get_wol(struct net_device *dev, struct ethtool_wolinfo *wolinfo)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	wolinfo->supported = WAKE_MAGIC;
 
 	spin_lock_irq(&np->lock);
@@ -1820,7 +1899,7 @@ static void nv_get_wol(struct net_device *dev, struct ethtool_wolinfo *wolinfo)
 
 static int nv_set_wol(struct net_device *dev, struct ethtool_wolinfo *wolinfo)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 
 	spin_lock_irq(&np->lock);
@@ -2021,7 +2100,7 @@ static int nv_get_regs_len(struct net_device *dev)
 
 static void nv_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *buf)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	u32 *rbuf = buf;
 	int i;
@@ -2035,7 +2114,7 @@ static void nv_get_regs(struct net_device *dev, struct ethtool_regs *regs, void
 
 static int nv_nway_reset(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	int ret;
 
 	spin_lock_irq(&np->lock);
@@ -2065,11 +2144,12 @@ static struct ethtool_ops ops = {
 	.get_regs_len = nv_get_regs_len,
 	.get_regs = nv_get_regs,
 	.nway_reset = nv_nway_reset,
+	.get_perm_addr = ethtool_op_get_perm_addr,
 };
 
 static int nv_open(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	int ret, oom, i;
 
@@ -2114,9 +2194,9 @@ static int nv_open(struct net_device *dev)
 	/* 5) continue setup */
 	writel(np->linkspeed, base + NvRegLinkSpeed);
 	writel(NVREG_UNKSETUP3_VAL1, base + NvRegUnknownSetupReg3);
-	writel(np->desc_ver, base + NvRegTxRxControl);
+	writel(np->txrxctl_bits, base + NvRegTxRxControl);
 	pci_push(base);
-	writel(NVREG_TXRXCTL_BIT1|np->desc_ver, base + NvRegTxRxControl);
+	writel(NVREG_TXRXCTL_BIT1|np->txrxctl_bits, base + NvRegTxRxControl);
 	reg_delay(dev, NvRegUnknownSetupReg5, NVREG_UNKSETUP5_BIT31, NVREG_UNKSETUP5_BIT31,
 			NV_SETUP5_DELAY, NV_SETUP5_DELAYMAX,
 			KERN_INFO "open: SetupReg5, Bit 31 remained off\n");
@@ -2205,7 +2285,7 @@ out_drain:
 
 static int nv_close(struct net_device *dev)
 {
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base;
 
 	spin_lock_irq(&np->lock);
@@ -2261,7 +2341,7 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 	if (!dev)
 		goto out;
 
-	np = get_nvpriv(dev);
+	np = netdev_priv(dev);
 	np->pci_dev = pci_dev;
 	spin_lock_init(&np->lock);
 	SET_MODULE_OWNER(dev);
@@ -2313,19 +2393,32 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 		if (pci_set_dma_mask(pci_dev, 0x0000007fffffffffULL)) {
 			printk(KERN_INFO "forcedeth: 64-bit DMA failed, using 32-bit addressing for device %s.\n",
 					pci_name(pci_dev));
+		} else {
+			dev->features |= NETIF_F_HIGHDMA;
 		}
+		np->txrxctl_bits = NVREG_TXRXCTL_DESC_3;
 	} else if (id->driver_data & DEV_HAS_LARGEDESC) {
 		/* packet format 2: supports jumbo frames */
 		np->desc_ver = DESC_VER_2;
+		np->txrxctl_bits = NVREG_TXRXCTL_DESC_2;
 	} else {
 		/* original packet format */
 		np->desc_ver = DESC_VER_1;
+		np->txrxctl_bits = NVREG_TXRXCTL_DESC_1;
 	}
 
 	np->pkt_limit = NV_PKTLIMIT_1;
 	if (id->driver_data & DEV_HAS_LARGEDESC)
 		np->pkt_limit = NV_PKTLIMIT_2;
 
+	if (id->driver_data & DEV_HAS_CHECKSUM) {
+		np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK;
+		dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
+#ifdef NETIF_F_TSO
+		dev->features |= NETIF_F_TSO;
+#endif
+ 	}
+
 	err = -ENOMEM;
 	np->base = ioremap(addr, NV_PCI_REGSZ);
 	if (!np->base)
@@ -2377,8 +2470,9 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 	dev->dev_addr[3] = (np->orig_mac[0] >> 16) & 0xff;
 	dev->dev_addr[4] = (np->orig_mac[0] >>  8) & 0xff;
 	dev->dev_addr[5] = (np->orig_mac[0] >>  0) & 0xff;
+	memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
 
-	if (!is_valid_ether_addr(dev->dev_addr)) {
+	if (!is_valid_ether_addr(dev->perm_addr)) {
 		/*
 		 * Bad mac address. At least one bios sets the mac address
 		 * to 01:23:45:67:89:ab
@@ -2403,9 +2497,9 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 	np->wolenabled = 0;
 
 	if (np->desc_ver == DESC_VER_1) {
-		np->tx_flags = NV_TX_LASTPACKET|NV_TX_VALID;
+		np->tx_flags = NV_TX_VALID;
 	} else {
-		np->tx_flags = NV_TX2_LASTPACKET|NV_TX2_VALID;
+		np->tx_flags = NV_TX2_VALID;
 	}
 	np->irqmask = NVREG_IRQMASK_WANTED;
 	if (id->driver_data & DEV_NEED_TIMERIRQ)
@@ -2494,7 +2588,7 @@ out:
 static void __devexit nv_remove(struct pci_dev *pci_dev)
 {
 	struct net_device *dev = pci_get_drvdata(pci_dev);
-	struct fe_priv *np = get_nvpriv(dev);
+	struct fe_priv *np = netdev_priv(dev);
 
 	unregister_netdev(dev);
 
@@ -2525,35 +2619,35 @@ static struct pci_device_id pci_tbl[] = {
 	},
 	{	/* nForce3 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_4),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM,
 	},
 	{	/* nForce3 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_5),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM,
 	},
 	{	/* nForce3 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_6),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM,
 	},
 	{	/* nForce3 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_7),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM,
 	},
 	{	/* CK804 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_8),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
 	},
 	{	/* CK804 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_9),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
 	},
 	{	/* MCP04 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_10),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
 	},
 	{	/* MCP04 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_11),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
 	},
 	{	/* MCP51 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_12),
@@ -2565,11 +2659,11 @@ static struct pci_device_id pci_tbl[] = {
 	},
 	{	/* MCP55 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_14),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
 	},
 	{	/* MCP55 Ethernet Controller */
 		PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NVENET_15),
-		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_HIGH_DMA,
+		.driver_data = DEV_NEED_TIMERIRQ|DEV_NEED_LINKTIMER|DEV_HAS_LARGEDESC|DEV_HAS_CHECKSUM|DEV_HAS_HIGH_DMA,
 	},
 	{0,},
 };

+ 86 - 326
drivers/net/gianfar.c

@@ -29,12 +29,7 @@
  *  define the configuration needed by the board are defined in a
  *  board structure in arch/ppc/platforms (though I do not
  *  discount the possibility that other architectures could one
- *  day be supported.  One assumption the driver currently makes
- *  is that the PHY is configured in such a way to advertise all
- *  capabilities.  This is a sensible default, and on certain
- *  PHYs, changing this default encounters substantial errata
- *  issues.  Future versions may remove this requirement, but for
- *  now, it is best for the firmware to ensure this is the case.
+ *  day be supported.
  *
  *  The Gianfar Ethernet Controller uses a ring of buffer
  *  descriptors.  The beginning is indicated by a register
@@ -47,7 +42,7 @@
  *  corresponding bit in the IMASK register is also set (if
  *  interrupt coalescing is active, then the interrupt may not
  *  happen immediately, but will wait until either a set number
- *  of frames or amount of time have passed.).  In NAPI, the
+ *  of frames or amount of time have passed).  In NAPI, the
  *  interrupt handler will signal there is work to be done, and
  *  exit.  Without NAPI, the packet(s) will be handled
  *  immediately.  Both methods will start at the last known empty
@@ -75,6 +70,7 @@
 #include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/errno.h>
+#include <linux/unistd.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
@@ -97,9 +93,11 @@
 #include <linux/version.h>
 #include <linux/dma-mapping.h>
 #include <linux/crc32.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
 
 #include "gianfar.h"
-#include "gianfar_phy.h"
+#include "gianfar_mii.h"
 
 #define TX_TIMEOUT      (1*HZ)
 #define SKB_ALLOC_TIMEOUT 1000000
@@ -113,9 +111,8 @@
 #endif
 
 const char gfar_driver_name[] = "Gianfar Ethernet";
-const char gfar_driver_version[] = "1.1";
+const char gfar_driver_version[] = "1.2";
 
-int startup_gfar(struct net_device *dev);
 static int gfar_enet_open(struct net_device *dev);
 static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static void gfar_timeout(struct net_device *dev);
@@ -126,17 +123,13 @@ static int gfar_set_mac_address(struct net_device *dev);
 static int gfar_change_mtu(struct net_device *dev, int new_mtu);
 static irqreturn_t gfar_error(int irq, void *dev_id, struct pt_regs *regs);
 static irqreturn_t gfar_transmit(int irq, void *dev_id, struct pt_regs *regs);
-static irqreturn_t gfar_receive(int irq, void *dev_id, struct pt_regs *regs);
 static irqreturn_t gfar_interrupt(int irq, void *dev_id, struct pt_regs *regs);
-static irqreturn_t phy_interrupt(int irq, void *dev_id, struct pt_regs *regs);
-static void gfar_phy_change(void *data);
-static void gfar_phy_timer(unsigned long data);
 static void adjust_link(struct net_device *dev);
 static void init_registers(struct net_device *dev);
 static int init_phy(struct net_device *dev);
 static int gfar_probe(struct device *device);
 static int gfar_remove(struct device *device);
-void free_skb_resources(struct gfar_private *priv);
+static void free_skb_resources(struct gfar_private *priv);
 static void gfar_set_multi(struct net_device *dev);
 static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr);
 #ifdef CONFIG_GFAR_NAPI
@@ -144,7 +137,6 @@ static int gfar_poll(struct net_device *dev, int *budget);
 #endif
 int gfar_clean_rx_ring(struct net_device *dev, int rx_work_limit);
 static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb, int length);
-static void gfar_phy_startup_timer(unsigned long data);
 static void gfar_vlan_rx_register(struct net_device *netdev,
 		                struct vlan_group *grp);
 static void gfar_vlan_rx_kill_vid(struct net_device *netdev, uint16_t vid);
@@ -162,6 +154,9 @@ int gfar_uses_fcb(struct gfar_private *priv)
 	else
 		return 0;
 }
+
+/* Set up the ethernet device structure, private data,
+ * and anything else we need before we start */
 static int gfar_probe(struct device *device)
 {
 	u32 tempval;
@@ -175,7 +170,7 @@ static int gfar_probe(struct device *device)
 
 	einfo = (struct gianfar_platform_data *) pdev->dev.platform_data;
 
-	if (einfo == NULL) {
+	if (NULL == einfo) {
 		printk(KERN_ERR "gfar %d: Missing additional data!\n",
 		       pdev->id);
 
@@ -185,7 +180,7 @@ static int gfar_probe(struct device *device)
 	/* Create an ethernet device instance */
 	dev = alloc_etherdev(sizeof (*priv));
 
-	if (dev == NULL)
+	if (NULL == dev)
 		return -ENOMEM;
 
 	priv = netdev_priv(dev);
@@ -207,20 +202,11 @@ static int gfar_probe(struct device *device)
 	priv->regs = (struct gfar *)
 		ioremap(r->start, sizeof (struct gfar));
 
-	if (priv->regs == NULL) {
+	if (NULL == priv->regs) {
 		err = -ENOMEM;
 		goto regs_fail;
 	}
 
-	/* Set the PHY base address */
-	priv->phyregs = (struct gfar *)
-	    ioremap(einfo->phy_reg_addr, sizeof (struct gfar));
-
-	if (priv->phyregs == NULL) {
-		err = -ENOMEM;
-		goto phy_regs_fail;
-	}
-
 	spin_lock_init(&priv->lock);
 
 	dev_set_drvdata(device, dev);
@@ -386,12 +372,10 @@ static int gfar_probe(struct device *device)
 	return 0;
 
 register_fail:
-	iounmap((void *) priv->phyregs);
-phy_regs_fail:
 	iounmap((void *) priv->regs);
 regs_fail:
 	free_netdev(dev);
-	return -ENOMEM;
+	return err;
 }
 
 static int gfar_remove(struct device *device)
@@ -402,108 +386,41 @@ static int gfar_remove(struct device *device)
 	dev_set_drvdata(device, NULL);
 
 	iounmap((void *) priv->regs);
-	iounmap((void *) priv->phyregs);
 	free_netdev(dev);
 
 	return 0;
 }
 
 
-/* Configure the PHY for dev.
- * returns 0 if success.  -1 if failure
+/* Initializes driver's PHY state, and attaches to the PHY.
+ * Returns 0 on success.
  */
 static int init_phy(struct net_device *dev)
 {
 	struct gfar_private *priv = netdev_priv(dev);
-	struct phy_info *curphy;
-	unsigned int timeout = PHY_INIT_TIMEOUT;
-	struct gfar *phyregs = priv->phyregs;
-	struct gfar_mii_info *mii_info;
-	int err;
+	uint gigabit_support =
+		priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ?
+		SUPPORTED_1000baseT_Full : 0;
+	struct phy_device *phydev;
 
 	priv->oldlink = 0;
 	priv->oldspeed = 0;
 	priv->oldduplex = -1;
 
-	mii_info = kmalloc(sizeof(struct gfar_mii_info),
-			GFP_KERNEL);
-
-	if(NULL == mii_info) {
-		if (netif_msg_ifup(priv))
-			printk(KERN_ERR "%s: Could not allocate mii_info\n",
-					dev->name);
-		return -ENOMEM;
-	}
-
-	mii_info->speed = SPEED_1000;
-	mii_info->duplex = DUPLEX_FULL;
-	mii_info->pause = 0;
-	mii_info->link = 1;
-
-	mii_info->advertising = (ADVERTISED_10baseT_Half |
-			ADVERTISED_10baseT_Full |
-			ADVERTISED_100baseT_Half |
-			ADVERTISED_100baseT_Full |
-			ADVERTISED_1000baseT_Full);
-	mii_info->autoneg = 1;
+	phydev = phy_connect(dev, priv->einfo->bus_id, &adjust_link, 0);
 
-	spin_lock_init(&mii_info->mdio_lock);
-
-	mii_info->mii_id = priv->einfo->phyid;
-
-	mii_info->dev = dev;
-
-	mii_info->mdio_read = &read_phy_reg;
-	mii_info->mdio_write = &write_phy_reg;
-
-	priv->mii_info = mii_info;
-
-	/* Reset the management interface */
-	gfar_write(&phyregs->miimcfg, MIIMCFG_RESET);
-
-	/* Setup the MII Mgmt clock speed */
-	gfar_write(&phyregs->miimcfg, MIIMCFG_INIT_VALUE);
-
-	/* Wait until the bus is free */
-	while ((gfar_read(&phyregs->miimind) & MIIMIND_BUSY) &&
-			timeout--)
-		cpu_relax();
-
-	if(timeout <= 0) {
-		printk(KERN_ERR "%s: The MII Bus is stuck!\n",
-				dev->name);
-		err = -1;
-		goto bus_fail;
-	}
-
-	/* get info for this PHY */
-	curphy = get_phy_info(priv->mii_info);
-
-	if (curphy == NULL) {
-		if (netif_msg_ifup(priv))
-			printk(KERN_ERR "%s: No PHY found\n", dev->name);
-		err = -1;
-		goto no_phy;
+	if (IS_ERR(phydev)) {
+		printk(KERN_ERR "%s: Could not attach to PHY\n", dev->name);
+		return PTR_ERR(phydev);
 	}
 
-	mii_info->phyinfo = curphy;
+	/* Remove any features not supported by the controller */
+	phydev->supported &= (GFAR_SUPPORTED | gigabit_support);
+	phydev->advertising = phydev->supported;
 
-	/* Run the commands which initialize the PHY */
-	if(curphy->init) {
-		err = curphy->init(priv->mii_info);
-
-		if (err)
-			goto phy_init_fail;
-	}
+	priv->phydev = phydev;
 
 	return 0;
-
-phy_init_fail:
-no_phy:
-bus_fail:
-	kfree(mii_info);
-
-	return err;
 }
 
 static void init_registers(struct net_device *dev)
@@ -603,24 +520,13 @@ void stop_gfar(struct net_device *dev)
 	struct gfar *regs = priv->regs;
 	unsigned long flags;
 
+	phy_stop(priv->phydev);
+
 	/* Lock it down */
 	spin_lock_irqsave(&priv->lock, flags);
 
-	/* Tell the kernel the link is down */
-	priv->mii_info->link = 0;
-	adjust_link(dev);
-
 	gfar_halt(dev);
 
-	if (priv->einfo->board_flags & FSL_GIANFAR_BRD_HAS_PHY_INTR) {
-		/* Clear any pending interrupts */
-		mii_clear_phy_interrupt(priv->mii_info);
-
-		/* Disable PHY Interrupts */
-		mii_configure_phy_interrupt(priv->mii_info,
-				MII_INTERRUPT_DISABLED);
-	}
-
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	/* Free the IRQs */
@@ -629,13 +535,7 @@ void stop_gfar(struct net_device *dev)
 		free_irq(priv->interruptTransmit, dev);
 		free_irq(priv->interruptReceive, dev);
 	} else {
-		free_irq(priv->interruptTransmit, dev);
-	}
-
-	if (priv->einfo->board_flags & FSL_GIANFAR_BRD_HAS_PHY_INTR) {
-		free_irq(priv->einfo->interruptPHY, dev);
-	} else {
-		del_timer_sync(&priv->phy_info_timer);
+ 		free_irq(priv->interruptTransmit, dev);
 	}
 
 	free_skb_resources(priv);
@@ -649,7 +549,7 @@ void stop_gfar(struct net_device *dev)
 
 /* If there are any tx skbs or rx skbs still around, free them.
  * Then free tx_skbuff and rx_skbuff */
-void free_skb_resources(struct gfar_private *priv)
+static void free_skb_resources(struct gfar_private *priv)
 {
 	struct rxbd8 *rxbdp;
 	struct txbd8 *txbdp;
@@ -770,7 +670,7 @@ int startup_gfar(struct net_device *dev)
 	    (struct sk_buff **) kmalloc(sizeof (struct sk_buff *) *
 					priv->tx_ring_size, GFP_KERNEL);
 
-	if (priv->tx_skbuff == NULL) {
+	if (NULL == priv->tx_skbuff) {
 		if (netif_msg_ifup(priv))
 			printk(KERN_ERR "%s: Could not allocate tx_skbuff\n",
 					dev->name);
@@ -785,7 +685,7 @@ int startup_gfar(struct net_device *dev)
 	    (struct sk_buff **) kmalloc(sizeof (struct sk_buff *) *
 					priv->rx_ring_size, GFP_KERNEL);
 
-	if (priv->rx_skbuff == NULL) {
+	if (NULL == priv->rx_skbuff) {
 		if (netif_msg_ifup(priv))
 			printk(KERN_ERR "%s: Could not allocate rx_skbuff\n",
 					dev->name);
@@ -879,13 +779,7 @@ int startup_gfar(struct net_device *dev)
 		}
 	}
 
-	/* Set up the PHY change work queue */
-	INIT_WORK(&priv->tq, gfar_phy_change, dev);
-
-	init_timer(&priv->phy_info_timer);
-	priv->phy_info_timer.function = &gfar_phy_startup_timer;
-	priv->phy_info_timer.data = (unsigned long) priv->mii_info;
-	mod_timer(&priv->phy_info_timer, jiffies + HZ);
+	phy_start(priv->phydev);
 
 	/* Configure the coalescing support */
 	if (priv->txcoalescing)
@@ -933,11 +827,6 @@ tx_skb_fail:
 			priv->tx_bd_base,
 			gfar_read(&regs->tbase0));
 
-	if (priv->mii_info->phyinfo->close)
-		priv->mii_info->phyinfo->close(priv->mii_info);
-
-	kfree(priv->mii_info);
-
 	return err;
 }
 
@@ -1035,7 +924,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	txbdp->status &= TXBD_WRAP;
 
 	/* Set up checksumming */
-	if ((dev->features & NETIF_F_IP_CSUM) 
+	if ((dev->features & NETIF_F_IP_CSUM)
 			&& (CHECKSUM_HW == skb->ip_summed)) {
 		fcb = gfar_add_fcb(skb, txbdp);
 		gfar_tx_checksum(skb, fcb);
@@ -1103,11 +992,9 @@ static int gfar_close(struct net_device *dev)
 	struct gfar_private *priv = netdev_priv(dev);
 	stop_gfar(dev);
 
-	/* Shutdown the PHY */
-	if (priv->mii_info->phyinfo->close)
-		priv->mii_info->phyinfo->close(priv->mii_info);
-
-	kfree(priv->mii_info);
+	/* Disconnect from the PHY */
+	phy_disconnect(priv->phydev);
+	priv->phydev = NULL;
 
 	netif_stop_queue(dev);
 
@@ -1343,7 +1230,7 @@ struct sk_buff * gfar_new_skb(struct net_device *dev, struct rxbd8 *bdp)
 	while ((!skb) && timeout--)
 		skb = dev_alloc_skb(priv->rx_buffer_size + RXBUF_ALIGNMENT);
 
-	if (skb == NULL)
+	if (NULL == skb)
 		return NULL;
 
 	/* We need the data buffer to be aligned properly.  We will reserve
@@ -1490,7 +1377,7 @@ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
 	struct gfar_private *priv = netdev_priv(dev);
 	struct rxfcb *fcb = NULL;
 
-	if (skb == NULL) {
+	if (NULL == skb) {
 		if (netif_msg_rx_err(priv))
 			printk(KERN_WARNING "%s: Missing skb!!.\n", dev->name);
 		priv->stats.rx_dropped++;
@@ -1718,131 +1605,9 @@ static irqreturn_t gfar_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t phy_interrupt(int irq, void *dev_id, struct pt_regs *regs)
-{
-	struct net_device *dev = (struct net_device *) dev_id;
-	struct gfar_private *priv = netdev_priv(dev);
-
-	/* Clear the interrupt */
-	mii_clear_phy_interrupt(priv->mii_info);
-
-	/* Disable PHY interrupts */
-	mii_configure_phy_interrupt(priv->mii_info,
-			MII_INTERRUPT_DISABLED);
-
-	/* Schedule the phy change */
-	schedule_work(&priv->tq);
-
-	return IRQ_HANDLED;
-}
-
-/* Scheduled by the phy_interrupt/timer to handle PHY changes */
-static void gfar_phy_change(void *data)
-{
-	struct net_device *dev = (struct net_device *) data;
-	struct gfar_private *priv = netdev_priv(dev);
-	int result = 0;
-
-	/* Delay to give the PHY a chance to change the
-	 * register state */
-	msleep(1);
-
-	/* Update the link, speed, duplex */
-	result = priv->mii_info->phyinfo->read_status(priv->mii_info);
-
-	/* Adjust the known status as long as the link
-	 * isn't still coming up */
-	if((0 == result) || (priv->mii_info->link == 0))
-		adjust_link(dev);
-
-	/* Reenable interrupts, if needed */
-	if (priv->einfo->board_flags & FSL_GIANFAR_BRD_HAS_PHY_INTR)
-		mii_configure_phy_interrupt(priv->mii_info,
-				MII_INTERRUPT_ENABLED);
-}
-
-/* Called every so often on systems that don't interrupt
- * the core for PHY changes */
-static void gfar_phy_timer(unsigned long data)
-{
-	struct net_device *dev = (struct net_device *) data;
-	struct gfar_private *priv = netdev_priv(dev);
-
-	schedule_work(&priv->tq);
-
-	mod_timer(&priv->phy_info_timer, jiffies +
-			GFAR_PHY_CHANGE_TIME * HZ);
-}
-
-/* Keep trying aneg for some time
- * If, after GFAR_AN_TIMEOUT seconds, it has not
- * finished, we switch to forced.
- * Either way, once the process has completed, we either
- * request the interrupt, or switch the timer over to
- * using gfar_phy_timer to check status */
-static void gfar_phy_startup_timer(unsigned long data)
-{
-	int result;
-	static int secondary = GFAR_AN_TIMEOUT;
-	struct gfar_mii_info *mii_info = (struct gfar_mii_info *)data;
-	struct gfar_private *priv = netdev_priv(mii_info->dev);
-
-	/* Configure the Auto-negotiation */
-	result = mii_info->phyinfo->config_aneg(mii_info);
-
-	/* If autonegotiation failed to start, and
-	 * we haven't timed out, reset the timer, and return */
-	if (result && secondary--) {
-		mod_timer(&priv->phy_info_timer, jiffies + HZ);
-		return;
-	} else if (result) {
-		/* Couldn't start autonegotiation.
-		 * Try switching to forced */
-		mii_info->autoneg = 0;
-		result = mii_info->phyinfo->config_aneg(mii_info);
-
-		/* Forcing failed!  Give up */
-		if(result) {
-			if (netif_msg_link(priv))
-				printk(KERN_ERR "%s: Forcing failed!\n",
-						mii_info->dev->name);
-			return;
-		}
-	}
-
-	/* Kill the timer so it can be restarted */
-	del_timer_sync(&priv->phy_info_timer);
-
-	/* Grab the PHY interrupt, if necessary/possible */
-	if (priv->einfo->board_flags & FSL_GIANFAR_BRD_HAS_PHY_INTR) {
-		if (request_irq(priv->einfo->interruptPHY,
-					phy_interrupt,
-					SA_SHIRQ,
-					"phy_interrupt",
-					mii_info->dev) < 0) {
-			if (netif_msg_intr(priv))
-				printk(KERN_ERR "%s: Can't get IRQ %d (PHY)\n",
-						mii_info->dev->name,
-					priv->einfo->interruptPHY);
-		} else {
-			mii_configure_phy_interrupt(priv->mii_info,
-					MII_INTERRUPT_ENABLED);
-			return;
-		}
-	}
-
-	/* Start the timer again, this time in order to
-	 * handle a change in status */
-	init_timer(&priv->phy_info_timer);
-	priv->phy_info_timer.function = &gfar_phy_timer;
-	priv->phy_info_timer.data = (unsigned long) mii_info->dev;
-	mod_timer(&priv->phy_info_timer, jiffies +
-			GFAR_PHY_CHANGE_TIME * HZ);
-}
-
 /* Called every time the controller might need to be made
  * aware of new link state.  The PHY code conveys this
- * information through variables in the priv structure, and this
+ * information through variables in the phydev structure, and this
  * function converts those variables into the appropriate
  * register values, and can bring down the device if needed.
  */
@@ -1850,84 +1615,68 @@ static void adjust_link(struct net_device *dev)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 	struct gfar *regs = priv->regs;
-	u32 tempval;
-	struct gfar_mii_info *mii_info = priv->mii_info;
+	unsigned long flags;
+	struct phy_device *phydev = priv->phydev;
+	int new_state = 0;
+
+	spin_lock_irqsave(&priv->lock, flags);
+	if (phydev->link) {
+		u32 tempval = gfar_read(&regs->maccfg2);
 
-	if (mii_info->link) {
 		/* Now we make sure that we can be in full duplex mode.
 		 * If not, we operate in half-duplex mode. */
-		if (mii_info->duplex != priv->oldduplex) {
-			if (!(mii_info->duplex)) {
-				tempval = gfar_read(&regs->maccfg2);
+		if (phydev->duplex != priv->oldduplex) {
+			new_state = 1;
+			if (!(phydev->duplex))
 				tempval &= ~(MACCFG2_FULL_DUPLEX);
-				gfar_write(&regs->maccfg2, tempval);
-
-				if (netif_msg_link(priv))
-					printk(KERN_INFO "%s: Half Duplex\n",
-							dev->name);
-			} else {
-				tempval = gfar_read(&regs->maccfg2);
+			else
 				tempval |= MACCFG2_FULL_DUPLEX;
-				gfar_write(&regs->maccfg2, tempval);
 
-				if (netif_msg_link(priv))
-					printk(KERN_INFO "%s: Full Duplex\n",
-							dev->name);
-			}
-
-			priv->oldduplex = mii_info->duplex;
+			priv->oldduplex = phydev->duplex;
 		}
 
-		if (mii_info->speed != priv->oldspeed) {
-			switch (mii_info->speed) {
+		if (phydev->speed != priv->oldspeed) {
+			new_state = 1;
+			switch (phydev->speed) {
 			case 1000:
-				tempval = gfar_read(&regs->maccfg2);
 				tempval =
 				    ((tempval & ~(MACCFG2_IF)) | MACCFG2_GMII);
-				gfar_write(&regs->maccfg2, tempval);
 				break;
 			case 100:
 			case 10:
-				tempval = gfar_read(&regs->maccfg2);
 				tempval =
 				    ((tempval & ~(MACCFG2_IF)) | MACCFG2_MII);
-				gfar_write(&regs->maccfg2, tempval);
 				break;
 			default:
 				if (netif_msg_link(priv))
 					printk(KERN_WARNING
-							"%s: Ack!  Speed (%d) is not 10/100/1000!\n",
-							dev->name, mii_info->speed);
+						"%s: Ack!  Speed (%d) is not 10/100/1000!\n",
+						dev->name, phydev->speed);
 				break;
 			}
 
-			if (netif_msg_link(priv))
-				printk(KERN_INFO "%s: Speed %dBT\n", dev->name,
-						mii_info->speed);
-
-			priv->oldspeed = mii_info->speed;
+			priv->oldspeed = phydev->speed;
 		}
 
+		gfar_write(&regs->maccfg2, tempval);
+
 		if (!priv->oldlink) {
-			if (netif_msg_link(priv))
-				printk(KERN_INFO "%s: Link is up\n", dev->name);
+			new_state = 1;
 			priv->oldlink = 1;
-			netif_carrier_on(dev);
 			netif_schedule(dev);
 		}
-	} else {
-		if (priv->oldlink) {
-			if (netif_msg_link(priv))
-				printk(KERN_INFO "%s: Link is down\n",
-						dev->name);
-			priv->oldlink = 0;
-			priv->oldspeed = 0;
-			priv->oldduplex = -1;
-			netif_carrier_off(dev);
-		}
+	} else if (priv->oldlink) {
+		new_state = 1;
+		priv->oldlink = 0;
+		priv->oldspeed = 0;
+		priv->oldduplex = -1;
 	}
-}
 
+	if (new_state && netif_msg_link(priv))
+		phy_print_status(phydev);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
 
 /* Update the hash table based on the current list of multicast
  * addresses we subscribe to.  Also, change the promiscuity of
@@ -2122,12 +1871,23 @@ static struct device_driver gfar_driver = {
 
 static int __init gfar_init(void)
 {
-	return driver_register(&gfar_driver);
+	int err = gfar_mdio_init();
+
+	if (err)
+		return err;
+
+	err = driver_register(&gfar_driver);
+
+	if (err)
+		gfar_mdio_exit();
+	
+	return err;
 }
 
 static void __exit gfar_exit(void)
 {
 	driver_unregister(&gfar_driver);
+	gfar_mdio_exit();
 }
 
 module_init(gfar_init);

+ 16 - 14
drivers/net/gianfar.h

@@ -17,7 +17,6 @@
  *
  *  Still left to do:
  *      -Add support for module parameters
- *	-Add support for ethtool -s
  *	-Add patch for ethtool phys id
  */
 #ifndef __GIANFAR_H
@@ -37,7 +36,8 @@
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
-#include <linux/fsl_devices.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -48,7 +48,8 @@
 #include <linux/workqueue.h>
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
-#include "gianfar_phy.h"
+#include <linux/fsl_devices.h>
+#include "gianfar_mii.h"
 
 /* The maximum number of packets to be handled in one call of gfar_poll */
 #define GFAR_DEV_WEIGHT 64
@@ -73,7 +74,7 @@
 #define PHY_INIT_TIMEOUT 100000
 #define GFAR_PHY_CHANGE_TIME 2
 
-#define DEVICE_NAME "%s: Gianfar Ethernet Controller Version 1.1, "
+#define DEVICE_NAME "%s: Gianfar Ethernet Controller Version 1.2, "
 #define DRV_NAME "gfar-enet"
 extern const char gfar_driver_name[];
 extern const char gfar_driver_version[];
@@ -578,12 +579,7 @@ struct gfar {
 	u32	hafdup;		/* 0x.50c - Half Duplex Register */
 	u32	maxfrm;		/* 0x.510 - Maximum Frame Length Register */
 	u8	res18[12];
-	u32	miimcfg;	/* 0x.520 - MII Management Configuration Register */
-	u32	miimcom;	/* 0x.524 - MII Management Command Register */
-	u32	miimadd;	/* 0x.528 - MII Management Address Register */
-	u32	miimcon;	/* 0x.52c - MII Management Control Register */
-	u32	miimstat;	/* 0x.530 - MII Management Status Register */
-	u32	miimind;	/* 0x.534 - MII Management Indicator Register */
+	u8	gfar_mii_regs[24];	/* See gianfar_phy.h */
 	u8	res19[4];
 	u32	ifstat;		/* 0x.53c - Interface Status Register */
 	u32	macstnaddr1;	/* 0x.540 - Station Address Part 1 Register */
@@ -688,9 +684,6 @@ struct gfar_private {
 	struct gfar *regs;	/* Pointer to the GFAR memory mapped Registers */
 	u32 *hash_regs[16];
 	int hash_width;
-	struct gfar *phyregs;
-	struct work_struct tq;
-	struct timer_list phy_info_timer;
 	struct net_device_stats stats; /* linux network statistics */
 	struct gfar_extra_stats extra_stats;
 	spinlock_t lock;
@@ -710,7 +703,8 @@ struct gfar_private {
 	unsigned int interruptError;
 	struct gianfar_platform_data *einfo;
 
-	struct gfar_mii_info *mii_info;
+	struct phy_device *phydev;
+	struct mii_bus *mii_bus;
 	int oldspeed;
 	int oldduplex;
 	int oldlink;
@@ -732,4 +726,12 @@ extern inline void gfar_write(volatile unsigned *addr, u32 val)
 
 extern struct ethtool_ops *gfar_op_array[];
 
+extern irqreturn_t gfar_receive(int irq, void *dev_id, struct pt_regs *regs);
+extern int startup_gfar(struct net_device *dev);
+extern void stop_gfar(struct net_device *dev);
+extern void gfar_halt(struct net_device *dev);
+extern void gfar_phy_test(struct mii_bus *bus, struct phy_device *phydev,
+		int enable, u32 regnum, u32 read);
+void gfar_setup_stashing(struct net_device *dev);
+
 #endif /* __GIANFAR_H */

+ 64 - 36
drivers/net/gianfar_ethtool.c

@@ -39,17 +39,18 @@
 #include <asm/types.h>
 #include <asm/uaccess.h>
 #include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
 
 #include "gianfar.h"
 
 #define is_power_of_2(x)        ((x) != 0 && (((x) & ((x) - 1)) == 0))
 
-extern int startup_gfar(struct net_device *dev);
-extern void stop_gfar(struct net_device *dev);
-extern void gfar_halt(struct net_device *dev);
 extern void gfar_start(struct net_device *dev);
 extern int gfar_clean_rx_ring(struct net_device *dev, int rx_work_limit);
 
+#define GFAR_MAX_COAL_USECS 0xffff
+#define GFAR_MAX_COAL_FRAMES 0xff
 static void gfar_fill_stats(struct net_device *dev, struct ethtool_stats *dummy,
 		     u64 * buf);
 static void gfar_gstrings(struct net_device *dev, u32 stringset, u8 * buf);
@@ -182,38 +183,32 @@ static void gfar_gdrvinfo(struct net_device *dev, struct
 	drvinfo->eedump_len = 0;
 }
 
+
+static int gfar_ssettings(struct net_device *dev, struct ethtool_cmd *cmd)
+{
+	struct gfar_private *priv = netdev_priv(dev);
+	struct phy_device *phydev = priv->phydev;
+
+	if (NULL == phydev)
+		return -ENODEV;
+
+	return phy_ethtool_sset(phydev, cmd);
+}
+
+
 /* Return the current settings in the ethtool_cmd structure */
 static int gfar_gsettings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct gfar_private *priv = netdev_priv(dev);
-	uint gigabit_support = 
-		priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ?
-			SUPPORTED_1000baseT_Full : 0;
-	uint gigabit_advert = 
-		priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ?
-			ADVERTISED_1000baseT_Full: 0;
-
-	cmd->supported = (SUPPORTED_10baseT_Half
-			  | SUPPORTED_100baseT_Half
-			  | SUPPORTED_100baseT_Full
-			  | gigabit_support | SUPPORTED_Autoneg);
-
-	/* For now, we always advertise everything */
-	cmd->advertising = (ADVERTISED_10baseT_Half
-			    | ADVERTISED_100baseT_Half
-			    | ADVERTISED_100baseT_Full
-			    | gigabit_advert | ADVERTISED_Autoneg);
-
-	cmd->speed = priv->mii_info->speed;
-	cmd->duplex = priv->mii_info->duplex;
-	cmd->port = PORT_MII;
-	cmd->phy_address = priv->mii_info->mii_id;
-	cmd->transceiver = XCVR_EXTERNAL;
-	cmd->autoneg = AUTONEG_ENABLE;
+	struct phy_device *phydev = priv->phydev;
+
+	if (NULL == phydev)
+		return -ENODEV;
+	
 	cmd->maxtxpkt = priv->txcount;
 	cmd->maxrxpkt = priv->rxcount;
 
-	return 0;
+	return phy_ethtool_gset(phydev, cmd);
 }
 
 /* Return the length of the register structure */
@@ -241,14 +236,14 @@ static unsigned int gfar_usecs2ticks(struct gfar_private *priv, unsigned int use
 	unsigned int count;
 
 	/* The timer is different, depending on the interface speed */
-	switch (priv->mii_info->speed) {
-	case 1000:
+	switch (priv->phydev->speed) {
+	case SPEED_1000:
 		count = GFAR_GBIT_TIME;
 		break;
-	case 100:
+	case SPEED_100:
 		count = GFAR_100_TIME;
 		break;
-	case 10:
+	case SPEED_10:
 	default:
 		count = GFAR_10_TIME;
 		break;
@@ -265,14 +260,14 @@ static unsigned int gfar_ticks2usecs(struct gfar_private *priv, unsigned int tic
 	unsigned int count;
 
 	/* The timer is different, depending on the interface speed */
-	switch (priv->mii_info->speed) {
-	case 1000:
+	switch (priv->phydev->speed) {
+	case SPEED_1000:
 		count = GFAR_GBIT_TIME;
 		break;
-	case 100:
+	case SPEED_100:
 		count = GFAR_100_TIME;
 		break;
-	case 10:
+	case SPEED_10:
 	default:
 		count = GFAR_10_TIME;
 		break;
@@ -292,6 +287,9 @@ static int gfar_gcoalesce(struct net_device *dev, struct ethtool_coalesce *cvals
 	if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_COALESCE))
 		return -EOPNOTSUPP;
 
+	if (NULL == priv->phydev)
+		return -ENODEV;
+
 	cvals->rx_coalesce_usecs = gfar_ticks2usecs(priv, priv->rxtime);
 	cvals->rx_max_coalesced_frames = priv->rxcount;
 
@@ -348,6 +346,22 @@ static int gfar_scoalesce(struct net_device *dev, struct ethtool_coalesce *cvals
 	else
 		priv->rxcoalescing = 1;
 
+	if (NULL == priv->phydev)
+		return -ENODEV;
+
+	/* Check the bounds of the values */
+	if (cvals->rx_coalesce_usecs > GFAR_MAX_COAL_USECS) {
+		pr_info("Coalescing is limited to %d microseconds\n",
+				GFAR_MAX_COAL_USECS);
+		return -EINVAL;
+	}
+
+	if (cvals->rx_max_coalesced_frames > GFAR_MAX_COAL_FRAMES) {
+		pr_info("Coalescing is limited to %d frames\n",
+				GFAR_MAX_COAL_FRAMES);
+		return -EINVAL;
+	}
+
 	priv->rxtime = gfar_usecs2ticks(priv, cvals->rx_coalesce_usecs);
 	priv->rxcount = cvals->rx_max_coalesced_frames;
 
@@ -358,6 +372,19 @@ static int gfar_scoalesce(struct net_device *dev, struct ethtool_coalesce *cvals
 	else
 		priv->txcoalescing = 1;
 
+	/* Check the bounds of the values */
+	if (cvals->tx_coalesce_usecs > GFAR_MAX_COAL_USECS) {
+		pr_info("Coalescing is limited to %d microseconds\n",
+				GFAR_MAX_COAL_USECS);
+		return -EINVAL;
+	}
+
+	if (cvals->tx_max_coalesced_frames > GFAR_MAX_COAL_FRAMES) {
+		pr_info("Coalescing is limited to %d frames\n",
+				GFAR_MAX_COAL_FRAMES);
+		return -EINVAL;
+	}
+
 	priv->txtime = gfar_usecs2ticks(priv, cvals->tx_coalesce_usecs);
 	priv->txcount = cvals->tx_max_coalesced_frames;
 
@@ -536,6 +563,7 @@ static void gfar_set_msglevel(struct net_device *dev, uint32_t data)
 
 struct ethtool_ops gfar_ethtool_ops = {
 	.get_settings = gfar_gsettings,
+	.set_settings = gfar_ssettings,
 	.get_drvinfo = gfar_gdrvinfo,
 	.get_regs_len = gfar_reglen,
 	.get_regs = gfar_get_regs,

+ 219 - 0
drivers/net/gianfar_mii.c

@@ -0,0 +1,219 @@
+/* 
+ * drivers/net/gianfar_mii.c
+ *
+ * Gianfar Ethernet Driver -- MIIM bus implementation
+ * Provides Bus interface for MIIM regs
+ *
+ * Author: Andy Fleming
+ * Maintainer: Kumar Gala (kumar.gala@freescale.com)
+ *
+ * Copyright (c) 2002-2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <asm/ocp.h>
+#include <linux/crc32.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+
+#include "gianfar.h"
+#include "gianfar_mii.h"
+
+/* Write value to the PHY at mii_id at register regnum,
+ * on the bus, waiting until the write is done before returning.
+ * All PHY configuration is done through the TSEC1 MIIM regs */
+int gfar_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value)
+{
+	struct gfar_mii *regs = bus->priv;
+
+	/* Set the PHY address and the register address we want to write */
+	gfar_write(&regs->miimadd, (mii_id << 8) | regnum);
+
+	/* Write out the value we want */
+	gfar_write(&regs->miimcon, value);
+
+	/* Wait for the transaction to finish */
+	while (gfar_read(&regs->miimind) & MIIMIND_BUSY)
+		cpu_relax();
+
+	return 0;
+}
+
+/* Read the bus for PHY at addr mii_id, register regnum, and
+ * return the value.  Clears miimcom first.  All PHY
+ * configuration has to be done through the TSEC1 MIIM regs */
+int gfar_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+	struct gfar_mii *regs = bus->priv;
+	u16 value;
+
+	/* Set the PHY address and the register address we want to read */
+	gfar_write(&regs->miimadd, (mii_id << 8) | regnum);
+
+	/* Clear miimcom, and then initiate a read */
+	gfar_write(&regs->miimcom, 0);
+	gfar_write(&regs->miimcom, MII_READ_COMMAND);
+
+	/* Wait for the transaction to finish */
+	while (gfar_read(&regs->miimind) & (MIIMIND_NOTVALID | MIIMIND_BUSY))
+		cpu_relax();
+
+	/* Grab the value of the register from miimstat */
+	value = gfar_read(&regs->miimstat);
+
+	return value;
+}
+
+
+/* Reset the MIIM registers, and wait for the bus to free */
+int gfar_mdio_reset(struct mii_bus *bus)
+{
+	struct gfar_mii *regs = bus->priv;
+	unsigned int timeout = PHY_INIT_TIMEOUT;
+
+	spin_lock_bh(&bus->mdio_lock);
+
+	/* Reset the management interface */
+	gfar_write(&regs->miimcfg, MIIMCFG_RESET);
+
+	/* Setup the MII Mgmt clock speed */
+	gfar_write(&regs->miimcfg, MIIMCFG_INIT_VALUE);
+
+	/* Wait until the bus is free */
+	while ((gfar_read(&regs->miimind) & MIIMIND_BUSY) &&
+			timeout--)
+		cpu_relax();
+
+	spin_unlock_bh(&bus->mdio_lock);
+
+	if(timeout <= 0) {
+		printk(KERN_ERR "%s: The MII Bus is stuck!\n",
+				bus->name);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+
+int gfar_mdio_probe(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct gianfar_mdio_data *pdata;
+	struct gfar_mii *regs;
+	struct mii_bus *new_bus;
+	int err = 0;
+
+	if (NULL == dev)
+		return -EINVAL;
+
+	new_bus = kmalloc(sizeof(struct mii_bus), GFP_KERNEL);
+
+	if (NULL == new_bus)
+		return -ENOMEM;
+
+	new_bus->name = "Gianfar MII Bus",
+	new_bus->read = &gfar_mdio_read,
+	new_bus->write = &gfar_mdio_write,
+	new_bus->reset = &gfar_mdio_reset,
+	new_bus->id = pdev->id;
+
+	pdata = (struct gianfar_mdio_data *)pdev->dev.platform_data;
+
+	if (NULL == pdata) {
+		printk(KERN_ERR "gfar mdio %d: Missing platform data!\n", pdev->id);
+		return -ENODEV;
+	}
+
+	/* Set the PHY base address */
+	regs = (struct gfar_mii *) ioremap(pdata->paddr, 
+			sizeof (struct gfar_mii));
+
+	if (NULL == regs) {
+		err = -ENOMEM;
+		goto reg_map_fail;
+	}
+
+	new_bus->priv = regs;
+
+	new_bus->irq = pdata->irq;
+
+	new_bus->dev = dev;
+	dev_set_drvdata(dev, new_bus);
+
+	err = mdiobus_register(new_bus);
+
+	if (0 != err) {
+		printk (KERN_ERR "%s: Cannot register as MDIO bus\n", 
+				new_bus->name);
+		goto bus_register_fail;
+	}
+
+	return 0;
+
+bus_register_fail:
+	iounmap((void *) regs);
+reg_map_fail:
+	kfree(new_bus);
+
+	return err;
+}
+
+
+int gfar_mdio_remove(struct device *dev)
+{
+	struct mii_bus *bus = dev_get_drvdata(dev);
+
+	mdiobus_unregister(bus);
+
+	dev_set_drvdata(dev, NULL);
+
+	iounmap((void *) (&bus->priv));
+	bus->priv = NULL;
+	kfree(bus);
+
+	return 0;
+}
+
+static struct device_driver gianfar_mdio_driver = {
+	.name = "fsl-gianfar_mdio",
+	.bus = &platform_bus_type,
+	.probe = gfar_mdio_probe,
+	.remove = gfar_mdio_remove,
+};
+
+int __init gfar_mdio_init(void)
+{
+	return driver_register(&gianfar_mdio_driver);
+}
+
+void __exit gfar_mdio_exit(void)
+{
+	driver_unregister(&gianfar_mdio_driver);
+}

+ 45 - 0
drivers/net/gianfar_mii.h

@@ -0,0 +1,45 @@
+/* 
+ * drivers/net/gianfar_mii.h
+ *
+ * Gianfar Ethernet Driver -- MII Management Bus Implementation
+ * Driver for the MDIO bus controller in the Gianfar register space
+ *
+ * Author: Andy Fleming
+ * Maintainer: Kumar Gala (kumar.gala@freescale.com)
+ *
+ * Copyright (c) 2002-2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#ifndef __GIANFAR_MII_H
+#define __GIANFAR_MII_H
+
+#define MIIMIND_BUSY            0x00000001
+#define MIIMIND_NOTVALID        0x00000004
+
+#define MII_READ_COMMAND       0x00000001
+
+#define GFAR_SUPPORTED (SUPPORTED_10baseT_Half \
+		| SUPPORTED_100baseT_Half \
+		| SUPPORTED_100baseT_Full \
+		| SUPPORTED_Autoneg \
+		| SUPPORTED_MII)
+
+struct gfar_mii {
+	u32	miimcfg;	/* 0x.520 - MII Management Config Register */
+	u32	miimcom;	/* 0x.524 - MII Management Command Register */
+	u32	miimadd;	/* 0x.528 - MII Management Address Register */
+	u32	miimcon;	/* 0x.52c - MII Management Control Register */
+	u32	miimstat;	/* 0x.530 - MII Management Status Register */
+	u32	miimind;	/* 0x.534 - MII Management Indicator Register */
+};
+
+int gfar_mdio_read(struct mii_bus *bus, int mii_id, int regnum);
+int gfar_mdio_write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
+int __init gfar_mdio_init(void);
+void __exit gfar_mdio_exit(void);
+#endif /* GIANFAR_PHY_H */

+ 0 - 661
drivers/net/gianfar_phy.c

@@ -1,661 +0,0 @@
-/* 
- * drivers/net/gianfar_phy.c
- *
- * Gianfar Ethernet Driver -- PHY handling
- * Driver for FEC on MPC8540 and TSEC on MPC8540/MPC8560
- * Based on 8260_io/fcc_enet.c
- *
- * Author: Andy Fleming
- * Maintainer: Kumar Gala (kumar.gala@freescale.com)
- *
- * Copyright (c) 2002-2004 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- */
-
-#include <linux/config.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/mm.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/uaccess.h>
-#include <linux/module.h>
-#include <linux/version.h>
-#include <linux/crc32.h>
-#include <linux/mii.h>
-
-#include "gianfar.h"
-#include "gianfar_phy.h"
-
-static void config_genmii_advert(struct gfar_mii_info *mii_info);
-static void genmii_setup_forced(struct gfar_mii_info *mii_info);
-static void genmii_restart_aneg(struct gfar_mii_info *mii_info);
-static int gbit_config_aneg(struct gfar_mii_info *mii_info);
-static int genmii_config_aneg(struct gfar_mii_info *mii_info);
-static int genmii_update_link(struct gfar_mii_info *mii_info);
-static int genmii_read_status(struct gfar_mii_info *mii_info);
-u16 phy_read(struct gfar_mii_info *mii_info, u16 regnum);
-void phy_write(struct gfar_mii_info *mii_info, u16 regnum, u16 val);
-
-/* Write value to the PHY for this device to the register at regnum, */
-/* waiting until the write is done before it returns.  All PHY */
-/* configuration has to be done through the TSEC1 MIIM regs */
-void write_phy_reg(struct net_device *dev, int mii_id, int regnum, int value)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar *regbase = priv->phyregs;
-
-	/* Set the PHY address and the register address we want to write */
-	gfar_write(&regbase->miimadd, (mii_id << 8) | regnum);
-
-	/* Write out the value we want */
-	gfar_write(&regbase->miimcon, value);
-
-	/* Wait for the transaction to finish */
-	while (gfar_read(&regbase->miimind) & MIIMIND_BUSY)
-		cpu_relax();
-}
-
-/* Reads from register regnum in the PHY for device dev, */
-/* returning the value.  Clears miimcom first.  All PHY */
-/* configuration has to be done through the TSEC1 MIIM regs */
-int read_phy_reg(struct net_device *dev, int mii_id, int regnum)
-{
-	struct gfar_private *priv = netdev_priv(dev);
-	struct gfar *regbase = priv->phyregs;
-	u16 value;
-
-	/* Set the PHY address and the register address we want to read */
-	gfar_write(&regbase->miimadd, (mii_id << 8) | regnum);
-
-	/* Clear miimcom, and then initiate a read */
-	gfar_write(&regbase->miimcom, 0);
-	gfar_write(&regbase->miimcom, MII_READ_COMMAND);
-
-	/* Wait for the transaction to finish */
-	while (gfar_read(&regbase->miimind) & (MIIMIND_NOTVALID | MIIMIND_BUSY))
-		cpu_relax();
-
-	/* Grab the value of the register from miimstat */
-	value = gfar_read(&regbase->miimstat);
-
-	return value;
-}
-
-void mii_clear_phy_interrupt(struct gfar_mii_info *mii_info)
-{
-	if(mii_info->phyinfo->ack_interrupt)
-		mii_info->phyinfo->ack_interrupt(mii_info);
-}
-
-
-void mii_configure_phy_interrupt(struct gfar_mii_info *mii_info, u32 interrupts)
-{
-	mii_info->interrupts = interrupts;
-	if(mii_info->phyinfo->config_intr)
-		mii_info->phyinfo->config_intr(mii_info);
-}
-
-
-/* Writes MII_ADVERTISE with the appropriate values, after
- * sanitizing advertise to make sure only supported features
- * are advertised 
- */
-static void config_genmii_advert(struct gfar_mii_info *mii_info)
-{
-	u32 advertise;
-	u16 adv;
-
-	/* Only allow advertising what this PHY supports */
-	mii_info->advertising &= mii_info->phyinfo->features;
-	advertise = mii_info->advertising;
-
-	/* Setup standard advertisement */
-	adv = phy_read(mii_info, MII_ADVERTISE);
-	adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4);
-	if (advertise & ADVERTISED_10baseT_Half)
-		adv |= ADVERTISE_10HALF;
-	if (advertise & ADVERTISED_10baseT_Full)
-		adv |= ADVERTISE_10FULL;
-	if (advertise & ADVERTISED_100baseT_Half)
-		adv |= ADVERTISE_100HALF;
-	if (advertise & ADVERTISED_100baseT_Full)
-		adv |= ADVERTISE_100FULL;
-	phy_write(mii_info, MII_ADVERTISE, adv);
-}
-
-static void genmii_setup_forced(struct gfar_mii_info *mii_info)
-{
-	u16 ctrl;
-	u32 features = mii_info->phyinfo->features;
-	
-	ctrl = phy_read(mii_info, MII_BMCR);
-
-	ctrl &= ~(BMCR_FULLDPLX|BMCR_SPEED100|BMCR_SPEED1000|BMCR_ANENABLE);
-	ctrl |= BMCR_RESET;
-
-	switch(mii_info->speed) {
-		case SPEED_1000:
-			if(features & (SUPPORTED_1000baseT_Half
-						| SUPPORTED_1000baseT_Full)) {
-				ctrl |= BMCR_SPEED1000;
-				break;
-			}
-			mii_info->speed = SPEED_100;
-		case SPEED_100:
-			if (features & (SUPPORTED_100baseT_Half
-						| SUPPORTED_100baseT_Full)) {
-				ctrl |= BMCR_SPEED100;
-				break;
-			}
-			mii_info->speed = SPEED_10;
-		case SPEED_10:
-			if (features & (SUPPORTED_10baseT_Half
-						| SUPPORTED_10baseT_Full))
-				break;
-		default: /* Unsupported speed! */
-			printk(KERN_ERR "%s: Bad speed!\n", 
-					mii_info->dev->name);
-			break;
-	}
-
-	phy_write(mii_info, MII_BMCR, ctrl);
-}
-
-
-/* Enable and Restart Autonegotiation */
-static void genmii_restart_aneg(struct gfar_mii_info *mii_info)
-{
-	u16 ctl;
-
-	ctl = phy_read(mii_info, MII_BMCR);
-	ctl |= (BMCR_ANENABLE | BMCR_ANRESTART);
-	phy_write(mii_info, MII_BMCR, ctl);
-}
-
-
-static int gbit_config_aneg(struct gfar_mii_info *mii_info)
-{
-	u16 adv;
-	u32 advertise;
-
-	if(mii_info->autoneg) {
-		/* Configure the ADVERTISE register */
-		config_genmii_advert(mii_info);
-		advertise = mii_info->advertising;
-
-		adv = phy_read(mii_info, MII_1000BASETCONTROL);
-		adv &= ~(MII_1000BASETCONTROL_FULLDUPLEXCAP |
-				MII_1000BASETCONTROL_HALFDUPLEXCAP);
-		if (advertise & SUPPORTED_1000baseT_Half)
-			adv |= MII_1000BASETCONTROL_HALFDUPLEXCAP;
-		if (advertise & SUPPORTED_1000baseT_Full)
-			adv |= MII_1000BASETCONTROL_FULLDUPLEXCAP;
-		phy_write(mii_info, MII_1000BASETCONTROL, adv);
-
-		/* Start/Restart aneg */
-		genmii_restart_aneg(mii_info);
-	} else
-		genmii_setup_forced(mii_info);
-
-	return 0;
-}
-
-static int marvell_config_aneg(struct gfar_mii_info *mii_info)
-{
-	/* The Marvell PHY has an errata which requires
-	 * that certain registers get written in order
-	 * to restart autonegotiation */
-	phy_write(mii_info, MII_BMCR, BMCR_RESET);
-
-	phy_write(mii_info, 0x1d, 0x1f);
-	phy_write(mii_info, 0x1e, 0x200c);
-	phy_write(mii_info, 0x1d, 0x5);
-	phy_write(mii_info, 0x1e, 0);
-	phy_write(mii_info, 0x1e, 0x100);
-
-	gbit_config_aneg(mii_info);
-
-	return 0;
-}
-static int genmii_config_aneg(struct gfar_mii_info *mii_info)
-{
-	if (mii_info->autoneg) {
-		config_genmii_advert(mii_info);
-		genmii_restart_aneg(mii_info);
-	} else
-		genmii_setup_forced(mii_info);
-
-	return 0;
-}
-
-
-static int genmii_update_link(struct gfar_mii_info *mii_info)
-{
-	u16 status;
-
-	/* Do a fake read */
-	phy_read(mii_info, MII_BMSR);
-
-	/* Read link and autonegotiation status */
-	status = phy_read(mii_info, MII_BMSR);
-	if ((status & BMSR_LSTATUS) == 0)
-		mii_info->link = 0;
-	else
-		mii_info->link = 1;
-
-	/* If we are autonegotiating, and not done, 
-	 * return an error */
-	if (mii_info->autoneg && !(status & BMSR_ANEGCOMPLETE))
-		return -EAGAIN;
-
-	return 0;
-}
-
-static int genmii_read_status(struct gfar_mii_info *mii_info)
-{
-	u16 status;
-	int err;
-
-	/* Update the link, but return if there
-	 * was an error */
-	err = genmii_update_link(mii_info);
-	if (err)
-		return err;
-
-	if (mii_info->autoneg) {
-		status = phy_read(mii_info, MII_LPA);
-
-		if (status & (LPA_10FULL | LPA_100FULL))
-			mii_info->duplex = DUPLEX_FULL;
-		else
-			mii_info->duplex = DUPLEX_HALF;
-		if (status & (LPA_100FULL | LPA_100HALF))
-			mii_info->speed = SPEED_100;
-		else
-			mii_info->speed = SPEED_10;
-		mii_info->pause = 0;
-	}
-	/* On non-aneg, we assume what we put in BMCR is the speed,
-	 * though magic-aneg shouldn't prevent this case from occurring
-	 */
-
-	return 0;
-}
-static int marvell_read_status(struct gfar_mii_info *mii_info)
-{
-	u16 status;
-	int err;
-
-	/* Update the link, but return if there
-	 * was an error */
-	err = genmii_update_link(mii_info);
-	if (err)
-		return err;
-
-	/* If the link is up, read the speed and duplex */
-	/* If we aren't autonegotiating, assume speeds 
-	 * are as set */
-	if (mii_info->autoneg && mii_info->link) {
-		int speed;
-		status = phy_read(mii_info, MII_M1011_PHY_SPEC_STATUS);
-
-#if 0
-		/* If speed and duplex aren't resolved,
-		 * return an error.  Isn't this handled
-		 * by checking aneg?
-		 */
-		if ((status & MII_M1011_PHY_SPEC_STATUS_RESOLVED) == 0)
-			return -EAGAIN;
-#endif
-
-		/* Get the duplexity */
-		if (status & MII_M1011_PHY_SPEC_STATUS_FULLDUPLEX)
-			mii_info->duplex = DUPLEX_FULL;
-		else
-			mii_info->duplex = DUPLEX_HALF;
-
-		/* Get the speed */
-		speed = status & MII_M1011_PHY_SPEC_STATUS_SPD_MASK;
-		switch(speed) {
-			case MII_M1011_PHY_SPEC_STATUS_1000:
-				mii_info->speed = SPEED_1000;
-				break;
-			case MII_M1011_PHY_SPEC_STATUS_100:
-				mii_info->speed = SPEED_100;
-				break;
-			default:
-				mii_info->speed = SPEED_10;
-				break;
-		}
-		mii_info->pause = 0;
-	}
-
-	return 0;
-}
-
-
-static int cis820x_read_status(struct gfar_mii_info *mii_info)
-{
-	u16 status;
-	int err;
-
-	/* Update the link, but return if there
-	 * was an error */
-	err = genmii_update_link(mii_info);
-	if (err)
-		return err;
-
-	/* If the link is up, read the speed and duplex */
-	/* If we aren't autonegotiating, assume speeds 
-	 * are as set */
-	if (mii_info->autoneg && mii_info->link) {
-		int speed;
-
-		status = phy_read(mii_info, MII_CIS8201_AUX_CONSTAT);
-		if (status & MII_CIS8201_AUXCONSTAT_DUPLEX)
-			mii_info->duplex = DUPLEX_FULL;
-		else
-			mii_info->duplex = DUPLEX_HALF;
-
-		speed = status & MII_CIS8201_AUXCONSTAT_SPEED;
-
-		switch (speed) {
-		case MII_CIS8201_AUXCONSTAT_GBIT:
-			mii_info->speed = SPEED_1000;
-			break;
-		case MII_CIS8201_AUXCONSTAT_100:
-			mii_info->speed = SPEED_100;
-			break;
-		default:
-			mii_info->speed = SPEED_10;
-			break;
-		}
-	}
-
-	return 0;
-}
-
-static int marvell_ack_interrupt(struct gfar_mii_info *mii_info)
-{
-	/* Clear the interrupts by reading the reg */
-	phy_read(mii_info, MII_M1011_IEVENT);
-
-	return 0;
-}
-
-static int marvell_config_intr(struct gfar_mii_info *mii_info)
-{
-	if(mii_info->interrupts == MII_INTERRUPT_ENABLED)
-		phy_write(mii_info, MII_M1011_IMASK, MII_M1011_IMASK_INIT);
-	else
-		phy_write(mii_info, MII_M1011_IMASK, MII_M1011_IMASK_CLEAR);
-
-	return 0;
-}
-
-static int cis820x_init(struct gfar_mii_info *mii_info)
-{
-	phy_write(mii_info, MII_CIS8201_AUX_CONSTAT, 
-			MII_CIS8201_AUXCONSTAT_INIT);
-	phy_write(mii_info, MII_CIS8201_EXT_CON1,
-			MII_CIS8201_EXTCON1_INIT);
-
-	return 0;
-}
-
-static int cis820x_ack_interrupt(struct gfar_mii_info *mii_info)
-{
-	phy_read(mii_info, MII_CIS8201_ISTAT);
-
-	return 0;
-}
-
-static int cis820x_config_intr(struct gfar_mii_info *mii_info)
-{
-	if(mii_info->interrupts == MII_INTERRUPT_ENABLED)
-		phy_write(mii_info, MII_CIS8201_IMASK, MII_CIS8201_IMASK_MASK);
-	else
-		phy_write(mii_info, MII_CIS8201_IMASK, 0);
-
-	return 0;
-}
-
-#define DM9161_DELAY 10
-
-static int dm9161_read_status(struct gfar_mii_info *mii_info)
-{
-	u16 status;
-	int err;
-
-	/* Update the link, but return if there
-	 * was an error */
-	err = genmii_update_link(mii_info);
-	if (err)
-		return err;
-
-	/* If the link is up, read the speed and duplex */
-	/* If we aren't autonegotiating, assume speeds 
-	 * are as set */
-	if (mii_info->autoneg && mii_info->link) {
-		status = phy_read(mii_info, MII_DM9161_SCSR);
-		if (status & (MII_DM9161_SCSR_100F | MII_DM9161_SCSR_100H))
-			mii_info->speed = SPEED_100;
-		else
-			mii_info->speed = SPEED_10;
-
-		if (status & (MII_DM9161_SCSR_100F | MII_DM9161_SCSR_10F))
-			mii_info->duplex = DUPLEX_FULL;
-		else
-			mii_info->duplex = DUPLEX_HALF;
-	}
-
-	return 0;
-}
-
-
-static int dm9161_config_aneg(struct gfar_mii_info *mii_info)
-{
-	struct dm9161_private *priv = mii_info->priv;
-
-	if(0 == priv->resetdone)
-		return -EAGAIN;
-
-	return 0;
-}
-
-static void dm9161_timer(unsigned long data)
-{
-	struct gfar_mii_info *mii_info = (struct gfar_mii_info *)data;
-	struct dm9161_private *priv = mii_info->priv;
-	u16 status = phy_read(mii_info, MII_BMSR);
-
-	if (status & BMSR_ANEGCOMPLETE) {
-		priv->resetdone = 1;
-	} else
-		mod_timer(&priv->timer, jiffies + DM9161_DELAY * HZ);
-}
-
-static int dm9161_init(struct gfar_mii_info *mii_info)
-{
-	struct dm9161_private *priv;
-
-	/* Allocate the private data structure */
-	priv = kmalloc(sizeof(struct dm9161_private), GFP_KERNEL);
-
-	if (NULL == priv)
-		return -ENOMEM;
-
-	mii_info->priv = priv;
-
-	/* Reset is not done yet */
-	priv->resetdone = 0;
-
-	/* Isolate the PHY */
-	phy_write(mii_info, MII_BMCR, BMCR_ISOLATE);
-
-	/* Do not bypass the scrambler/descrambler */
-	phy_write(mii_info, MII_DM9161_SCR, MII_DM9161_SCR_INIT);
-
-	/* Clear 10BTCSR to default */
-	phy_write(mii_info, MII_DM9161_10BTCSR, MII_DM9161_10BTCSR_INIT);
-
-	/* Reconnect the PHY, and enable Autonegotiation */
-	phy_write(mii_info, MII_BMCR, BMCR_ANENABLE);
-
-	/* Start a timer for DM9161_DELAY seconds to wait
-	 * for the PHY to be ready */
-	init_timer(&priv->timer);
-	priv->timer.function = &dm9161_timer;
-	priv->timer.data = (unsigned long) mii_info;
-	mod_timer(&priv->timer, jiffies + DM9161_DELAY * HZ);
-
-	return 0;
-}
-
-static void dm9161_close(struct gfar_mii_info *mii_info)
-{
-	struct dm9161_private *priv = mii_info->priv;
-
-	del_timer_sync(&priv->timer);
-	kfree(priv);
-}
-
-#if 0
-static int dm9161_ack_interrupt(struct gfar_mii_info *mii_info)
-{
-	phy_read(mii_info, MII_DM9161_INTR);
-
-	return 0;
-}
-#endif
-
-/* Cicada 820x */
-static struct phy_info phy_info_cis820x = {
-	0x000fc440,
-	"Cicada Cis8204",
-	0x000fffc0,
-	.features	= MII_GBIT_FEATURES,
-	.init		= &cis820x_init,
-	.config_aneg	= &gbit_config_aneg,
-	.read_status	= &cis820x_read_status,
-	.ack_interrupt	= &cis820x_ack_interrupt,
-	.config_intr	= &cis820x_config_intr,
-};
-
-static struct phy_info phy_info_dm9161 = {
-	.phy_id		= 0x0181b880,
-	.name		= "Davicom DM9161E",
-	.phy_id_mask	= 0x0ffffff0,
-	.init		= dm9161_init,
-	.config_aneg	= dm9161_config_aneg,
-	.read_status	= dm9161_read_status,
-	.close		= dm9161_close,
-};
-
-static struct phy_info phy_info_marvell = {
-	.phy_id		= 0x01410c00,
-	.phy_id_mask	= 0xffffff00,
-	.name		= "Marvell 88E1101/88E1111",
-	.features	= MII_GBIT_FEATURES,
-	.config_aneg	= &marvell_config_aneg,
-	.read_status	= &marvell_read_status,
-	.ack_interrupt	= &marvell_ack_interrupt,
-	.config_intr	= &marvell_config_intr,
-};
-
-static struct phy_info phy_info_genmii= {
-	.phy_id		= 0x00000000,
-	.phy_id_mask	= 0x00000000,
-	.name		= "Generic MII",
-	.features	= MII_BASIC_FEATURES,
-	.config_aneg	= genmii_config_aneg,
-	.read_status	= genmii_read_status,
-};
-
-static struct phy_info *phy_info[] = {
-	&phy_info_cis820x,
-	&phy_info_marvell,
-	&phy_info_dm9161,
-	&phy_info_genmii,
-	NULL
-};
-
-u16 phy_read(struct gfar_mii_info *mii_info, u16 regnum)
-{
-	u16 retval;
-	unsigned long flags;
-
-	spin_lock_irqsave(&mii_info->mdio_lock, flags);
-	retval = mii_info->mdio_read(mii_info->dev, mii_info->mii_id, regnum);
-	spin_unlock_irqrestore(&mii_info->mdio_lock, flags);
-
-	return retval;
-}
-
-void phy_write(struct gfar_mii_info *mii_info, u16 regnum, u16 val)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&mii_info->mdio_lock, flags);
-	mii_info->mdio_write(mii_info->dev, 
-			mii_info->mii_id, 
-			regnum, val);
-	spin_unlock_irqrestore(&mii_info->mdio_lock, flags);
-}
-
-/* Use the PHY ID registers to determine what type of PHY is attached
- * to device dev.  return a struct phy_info structure describing that PHY
- */
-struct phy_info * get_phy_info(struct gfar_mii_info *mii_info)
-{
-	u16 phy_reg;
-	u32 phy_ID;
-	int i;
-	struct phy_info *theInfo = NULL;
-	struct net_device *dev = mii_info->dev;
-
-	/* Grab the bits from PHYIR1, and put them in the upper half */
-	phy_reg = phy_read(mii_info, MII_PHYSID1);
-	phy_ID = (phy_reg & 0xffff) << 16;
-
-	/* Grab the bits from PHYIR2, and put them in the lower half */
-	phy_reg = phy_read(mii_info, MII_PHYSID2);
-	phy_ID |= (phy_reg & 0xffff);
-
-	/* loop through all the known PHY types, and find one that */
-	/* matches the ID we read from the PHY. */
-	for (i = 0; phy_info[i]; i++)
-		if (phy_info[i]->phy_id == 
-				(phy_ID & phy_info[i]->phy_id_mask)) {
-			theInfo = phy_info[i];
-			break;
-		}
-
-	/* This shouldn't happen, as we have generic PHY support */
-	if (theInfo == NULL) {
-		printk("%s: PHY id %x is not supported!\n", dev->name, phy_ID);
-		return NULL;
-	} else {
-		printk("%s: PHY is %s (%x)\n", dev->name, theInfo->name,
-		       phy_ID);
-	}
-
-	return theInfo;
-}

+ 0 - 213
drivers/net/gianfar_phy.h

@@ -1,213 +0,0 @@
-/* 
- * drivers/net/gianfar_phy.h
- *
- * Gianfar Ethernet Driver -- PHY handling
- * Driver for FEC on MPC8540 and TSEC on MPC8540/MPC8560
- * Based on 8260_io/fcc_enet.c
- *
- * Author: Andy Fleming
- * Maintainer: Kumar Gala (kumar.gala@freescale.com)
- *
- * Copyright (c) 2002-2004 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- */
-#ifndef __GIANFAR_PHY_H
-#define __GIANFAR_PHY_H
-
-#define MII_end ((u32)-2)
-#define MII_read ((u32)-1)
-
-#define MIIMIND_BUSY            0x00000001
-#define MIIMIND_NOTVALID        0x00000004
-
-#define GFAR_AN_TIMEOUT         2000
-
-/* 1000BT control (Marvell & BCM54xx at least) */
-#define MII_1000BASETCONTROL			0x09
-#define MII_1000BASETCONTROL_FULLDUPLEXCAP	0x0200
-#define MII_1000BASETCONTROL_HALFDUPLEXCAP	0x0100
-
-/* Cicada Extended Control Register 1 */
-#define MII_CIS8201_EXT_CON1           0x17
-#define MII_CIS8201_EXTCON1_INIT       0x0000
-
-/* Cicada Interrupt Mask Register */
-#define MII_CIS8201_IMASK		0x19
-#define MII_CIS8201_IMASK_IEN		0x8000
-#define MII_CIS8201_IMASK_SPEED	0x4000
-#define MII_CIS8201_IMASK_LINK		0x2000
-#define MII_CIS8201_IMASK_DUPLEX	0x1000
-#define MII_CIS8201_IMASK_MASK		0xf000
-
-/* Cicada Interrupt Status Register */
-#define MII_CIS8201_ISTAT		0x1a
-#define MII_CIS8201_ISTAT_STATUS	0x8000
-#define MII_CIS8201_ISTAT_SPEED	0x4000
-#define MII_CIS8201_ISTAT_LINK		0x2000
-#define MII_CIS8201_ISTAT_DUPLEX	0x1000
-
-/* Cicada Auxiliary Control/Status Register */
-#define MII_CIS8201_AUX_CONSTAT        0x1c
-#define MII_CIS8201_AUXCONSTAT_INIT    0x0004
-#define MII_CIS8201_AUXCONSTAT_DUPLEX  0x0020
-#define MII_CIS8201_AUXCONSTAT_SPEED   0x0018
-#define MII_CIS8201_AUXCONSTAT_GBIT    0x0010
-#define MII_CIS8201_AUXCONSTAT_100     0x0008
-                                                                                
-/* 88E1011 PHY Status Register */
-#define MII_M1011_PHY_SPEC_STATUS		0x11
-#define MII_M1011_PHY_SPEC_STATUS_1000		0x8000
-#define MII_M1011_PHY_SPEC_STATUS_100		0x4000
-#define MII_M1011_PHY_SPEC_STATUS_SPD_MASK	0xc000
-#define MII_M1011_PHY_SPEC_STATUS_FULLDUPLEX	0x2000
-#define MII_M1011_PHY_SPEC_STATUS_RESOLVED	0x0800
-#define MII_M1011_PHY_SPEC_STATUS_LINK		0x0400
-
-#define MII_M1011_IEVENT		0x13
-#define MII_M1011_IEVENT_CLEAR		0x0000
-
-#define MII_M1011_IMASK			0x12
-#define MII_M1011_IMASK_INIT		0x6400
-#define MII_M1011_IMASK_CLEAR		0x0000
-
-#define MII_DM9161_SCR		0x10
-#define MII_DM9161_SCR_INIT	0x0610
-
-/* DM9161 Specified Configuration and Status Register */
-#define MII_DM9161_SCSR	0x11
-#define MII_DM9161_SCSR_100F	0x8000
-#define MII_DM9161_SCSR_100H	0x4000
-#define MII_DM9161_SCSR_10F	0x2000
-#define MII_DM9161_SCSR_10H	0x1000
-
-/* DM9161 Interrupt Register */
-#define MII_DM9161_INTR	0x15
-#define MII_DM9161_INTR_PEND		0x8000
-#define MII_DM9161_INTR_DPLX_MASK	0x0800
-#define MII_DM9161_INTR_SPD_MASK	0x0400
-#define MII_DM9161_INTR_LINK_MASK	0x0200
-#define MII_DM9161_INTR_MASK		0x0100
-#define MII_DM9161_INTR_DPLX_CHANGE	0x0010
-#define MII_DM9161_INTR_SPD_CHANGE	0x0008
-#define MII_DM9161_INTR_LINK_CHANGE	0x0004
-#define MII_DM9161_INTR_INIT 		0x0000
-#define MII_DM9161_INTR_STOP	\
-(MII_DM9161_INTR_DPLX_MASK | MII_DM9161_INTR_SPD_MASK \
- | MII_DM9161_INTR_LINK_MASK | MII_DM9161_INTR_MASK)
-
-/* DM9161 10BT Configuration/Status */
-#define MII_DM9161_10BTCSR	0x12
-#define MII_DM9161_10BTCSR_INIT	0x7800
-
-#define MII_BASIC_FEATURES	(SUPPORTED_10baseT_Half | \
-				 SUPPORTED_10baseT_Full | \
-				 SUPPORTED_100baseT_Half | \
-				 SUPPORTED_100baseT_Full | \
-				 SUPPORTED_Autoneg | \
-				 SUPPORTED_TP | \
-				 SUPPORTED_MII)
-
-#define MII_GBIT_FEATURES	(MII_BASIC_FEATURES | \
-				 SUPPORTED_1000baseT_Half | \
-				 SUPPORTED_1000baseT_Full)
-
-#define MII_READ_COMMAND       0x00000001
-
-#define MII_INTERRUPT_DISABLED 0x0
-#define MII_INTERRUPT_ENABLED 0x1
-/* Taken from mii_if_info and sungem_phy.h */
-struct gfar_mii_info {
-	/* Information about the PHY type */
-	/* And management functions */
-	struct phy_info *phyinfo;
-
-	/* forced speed & duplex (no autoneg)
-	 * partner speed & duplex & pause (autoneg)
-	 */
-	int speed;
-	int duplex;
-	int pause;
-
-	/* The most recently read link state */
-	int link;
-
-	/* Enabled Interrupts */
-	u32 interrupts;
-
-	u32 advertising;
-	int autoneg;
-	int mii_id;
-
-	/* private data pointer */
-	/* For use by PHYs to maintain extra state */
-	void *priv;
-
-	/* Provided by host chip */
-	struct net_device *dev;
-
-	/* A lock to ensure that only one thing can read/write
-	 * the MDIO bus at a time */
-	spinlock_t mdio_lock;
-
-	/* Provided by ethernet driver */
-	int (*mdio_read) (struct net_device *dev, int mii_id, int reg);
-	void (*mdio_write) (struct net_device *dev, int mii_id, int reg, int val);
-};
-
-/* struct phy_info: a structure which defines attributes for a PHY
- *
- * id will contain a number which represents the PHY.  During
- * startup, the driver will poll the PHY to find out what its
- * UID--as defined by registers 2 and 3--is.  The 32-bit result
- * gotten from the PHY will be ANDed with phy_id_mask to
- * discard any bits which may change based on revision numbers
- * unimportant to functionality
- *
- * There are 6 commands which take a gfar_mii_info structure.
- * Each PHY must declare config_aneg, and read_status.
- */
-struct phy_info {
-	u32 phy_id;
-	char *name;
-	unsigned int phy_id_mask;
-	u32 features;
-
-	/* Called to initialize the PHY */
-	int (*init)(struct gfar_mii_info *mii_info);
-
-	/* Called to suspend the PHY for power */
-	int (*suspend)(struct gfar_mii_info *mii_info);
-
-	/* Reconfigures autonegotiation (or disables it) */
-	int (*config_aneg)(struct gfar_mii_info *mii_info);
-
-	/* Determines the negotiated speed and duplex */
-	int (*read_status)(struct gfar_mii_info *mii_info);
-
-	/* Clears any pending interrupts */
-	int (*ack_interrupt)(struct gfar_mii_info *mii_info);
-
-	/* Enables or disables interrupts */
-	int (*config_intr)(struct gfar_mii_info *mii_info);
-
-	/* Clears up any memory if needed */
-	void (*close)(struct gfar_mii_info *mii_info);
-};
-
-struct phy_info *get_phy_info(struct gfar_mii_info *mii_info);
-int read_phy_reg(struct net_device *dev, int mii_id, int regnum);
-void write_phy_reg(struct net_device *dev, int mii_id, int regnum, int value);
-void mii_clear_phy_interrupt(struct gfar_mii_info *mii_info);
-void mii_configure_phy_interrupt(struct gfar_mii_info *mii_info, u32 interrupts);
-
-struct dm9161_private {
-	struct timer_list timer;
-	int resetdone;
-};
-
-#endif /* GIANFAR_PHY_H */

+ 1 - 0
drivers/net/hamradio/Kconfig

@@ -1,6 +1,7 @@
 config MKISS
 	tristate "Serial port KISS driver"
 	depends on AX25
+	select CRC16
 	---help---
 	  KISS is a protocol used for the exchange of data between a computer
 	  and a Terminal Node Controller (a small embedded system commonly

+ 3 - 6
drivers/net/hamradio/bpqether.c

@@ -144,7 +144,7 @@ static inline struct net_device *bpq_get_ax25_dev(struct net_device *dev)
 {
 	struct bpqdev *bpq;
 
-	list_for_each_entry(bpq, &bpq_devices, bpq_list) {
+	list_for_each_entry_rcu(bpq, &bpq_devices, bpq_list) {
 		if (bpq->ethdev == dev)
 			return bpq->axdev;
 	}
@@ -399,7 +399,7 @@ static void *bpq_seq_start(struct seq_file *seq, loff_t *pos)
 	if (*pos == 0)
 		return SEQ_START_TOKEN;
 	
-	list_for_each_entry(bpqdev, &bpq_devices, bpq_list) {
+	list_for_each_entry_rcu(bpqdev, &bpq_devices, bpq_list) {
 		if (i == *pos)
 			return bpqdev;
 	}
@@ -418,7 +418,7 @@ static void *bpq_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		p = ((struct bpqdev *)v)->bpq_list.next;
 
 	return (p == &bpq_devices) ? NULL 
-		: list_entry(p, struct bpqdev, bpq_list);
+		: rcu_dereference(list_entry(p, struct bpqdev, bpq_list));
 }
 
 static void bpq_seq_stop(struct seq_file *seq, void *v)
@@ -561,8 +561,6 @@ static int bpq_device_event(struct notifier_block *this,unsigned long event, voi
 	if (!dev_is_ethdev(dev))
 		return NOTIFY_DONE;
 
-	rcu_read_lock();
-
 	switch (event) {
 	case NETDEV_UP:		/* new ethernet device -> new BPQ interface */
 		if (bpq_get_ax25_dev(dev) == NULL)
@@ -581,7 +579,6 @@ static int bpq_device_event(struct notifier_block *this,unsigned long event, voi
 	default:
 		break;
 	}
-	rcu_read_unlock();
 
 	return NOTIFY_DONE;
 }

+ 149 - 33
drivers/net/hamradio/mkiss.c

@@ -14,13 +14,14 @@
  *
  * Copyright (C) Hans Alblas PE1AYX <hans@esrac.ele.tue.nl>
  * Copyright (C) 2004, 05 Ralf Baechle DL5RB <ralf@linux-mips.org>
+ * Copyright (C) 2004, 05 Thomas Osterried DL9SAU <thomas@x-berg.in-berlin.de>
  */
-
 #include <linux/config.h>
 #include <linux/module.h>
 #include <asm/system.h>
 #include <linux/bitops.h>
 #include <asm/uaccess.h>
+#include <linux/crc16.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
@@ -39,11 +40,6 @@
 
 #include <net/ax25.h>
 
-#ifdef CONFIG_INET
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#endif
-
 #define AX_MTU		236
 
 /* SLIP/KISS protocol characters. */
@@ -80,9 +76,13 @@ struct mkiss {
 
 	int		mode;
         int		crcmode;	/* MW: for FlexNet, SMACK etc.  */
-#define CRC_MODE_NONE   0
-#define CRC_MODE_FLEX   1
-#define CRC_MODE_SMACK  2
+	int		crcauto;	/* CRC auto mode */
+
+#define CRC_MODE_NONE		0
+#define CRC_MODE_FLEX		1
+#define CRC_MODE_SMACK		2
+#define CRC_MODE_FLEX_TEST	3
+#define CRC_MODE_SMACK_TEST	4
 
 	atomic_t		refcnt;
 	struct semaphore	dead_sem;
@@ -151,6 +151,21 @@ static int check_crc_flex(unsigned char *cp, int size)
 	return 0;
 }
 
+static int check_crc_16(unsigned char *cp, int size)
+{
+	unsigned short crc = 0x0000;
+
+	if (size < 3)
+		return -1;
+
+	crc = crc16(0, cp, size);
+
+	if (crc != 0x0000)
+		return -1;
+
+	return 0;
+}
+
 /*
  * Standard encapsulation
  */
@@ -237,19 +252,42 @@ static void ax_bump(struct mkiss *ax)
 
 	spin_lock_bh(&ax->buflock);
 	if (ax->rbuff[0] > 0x0f) {
-		if (ax->rbuff[0] & 0x20) {
-		        ax->crcmode = CRC_MODE_FLEX;
+		if (ax->rbuff[0] & 0x80) {
+			if (check_crc_16(ax->rbuff, ax->rcount) < 0) {
+				ax->stats.rx_errors++;
+				spin_unlock_bh(&ax->buflock);
+
+				return;
+			}
+			if (ax->crcmode != CRC_MODE_SMACK && ax->crcauto) {
+				printk(KERN_INFO
+				       "mkiss: %s: Switchting to crc-smack\n",
+				       ax->dev->name);
+				ax->crcmode = CRC_MODE_SMACK;
+			}
+			ax->rcount -= 2;
+			*ax->rbuff &= ~0x80;
+		} else if (ax->rbuff[0] & 0x20)  {
 			if (check_crc_flex(ax->rbuff, ax->rcount) < 0) {
-			        ax->stats.rx_errors++;
+				ax->stats.rx_errors++;
+				spin_unlock_bh(&ax->buflock);
 				return;
 			}
+			if (ax->crcmode != CRC_MODE_FLEX && ax->crcauto) {
+				printk(KERN_INFO
+				       "mkiss: %s: Switchting to crc-flexnet\n",
+				       ax->dev->name);
+				ax->crcmode = CRC_MODE_FLEX;
+			}
 			ax->rcount -= 2;
-                        /* dl9sau bugfix: the trailling two bytes flexnet crc
-                         * will not be passed to the kernel. thus we have
-                         * to correct the kissparm signature, because it
-                         * indicates a crc but there's none
+
+			/*
+			 * dl9sau bugfix: the trailling two bytes flexnet crc
+			 * will not be passed to the kernel. thus we have to
+			 * correct the kissparm signature, because it indicates
+			 * a crc but there's none
 			 */
-                        *ax->rbuff &= ~0x20;
+			*ax->rbuff &= ~0x20;
 		}
  	}
 	spin_unlock_bh(&ax->buflock);
@@ -417,20 +455,69 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
 	p = icp;
 
 	spin_lock_bh(&ax->buflock);
-        switch (ax->crcmode) {
-	         unsigned short crc;
+	if ((*p & 0x0f) != 0) {
+		/* Configuration Command (kissparms(1).
+		 * Protocol spec says: never append CRC.
+		 * This fixes a very old bug in the linux
+		 * kiss driver. -- dl9sau */
+		switch (*p & 0xff) {
+		case 0x85:
+			/* command from userspace especially for us,
+			 * not for delivery to the tnc */
+			if (len > 1) {
+				int cmd = (p[1] & 0xff);
+				switch(cmd) {
+				case 3:
+				  ax->crcmode = CRC_MODE_SMACK;
+				  break;
+				case 2:
+				  ax->crcmode = CRC_MODE_FLEX;
+				  break;
+				case 1:
+				  ax->crcmode = CRC_MODE_NONE;
+				  break;
+				case 0:
+				default:
+				  ax->crcmode = CRC_MODE_SMACK_TEST;
+				  cmd = 0;
+				}
+				ax->crcauto = (cmd ? 0 : 1);
+				printk(KERN_INFO "mkiss: %s: crc mode %s %d\n", ax->dev->name, (len) ? "set to" : "is", cmd);
+			}
+			spin_unlock_bh(&ax->buflock);
+			netif_start_queue(dev);
 
-	case CRC_MODE_FLEX:
-	         *p |= 0x20;
-	         crc = calc_crc_flex(p, len);
-		 count = kiss_esc_crc(p, (unsigned char *)ax->xbuff, crc, len+2);
-		 break;
+			return;
+		default:
+			count = kiss_esc(p, (unsigned char *)ax->xbuff, len);
+		}
+	} else {
+		unsigned short crc;
+		switch (ax->crcmode) {
+		case CRC_MODE_SMACK_TEST:
+			ax->crcmode  = CRC_MODE_FLEX_TEST;
+			printk(KERN_INFO "mkiss: %s: Trying crc-smack\n", ax->dev->name);
+			// fall through
+		case CRC_MODE_SMACK:
+			*p |= 0x80;
+			crc = swab16(crc16(0, p, len));
+			count = kiss_esc_crc(p, (unsigned char *)ax->xbuff, crc, len+2);
+			break;
+		case CRC_MODE_FLEX_TEST:
+			ax->crcmode = CRC_MODE_NONE;
+			printk(KERN_INFO "mkiss: %s: Trying crc-flexnet\n", ax->dev->name);
+			// fall through
+		case CRC_MODE_FLEX:
+			*p |= 0x20;
+			crc = calc_crc_flex(p, len);
+			count = kiss_esc_crc(p, (unsigned char *)ax->xbuff, crc, len+2);
+			break;
+
+		default:
+			count = kiss_esc(p, (unsigned char *)ax->xbuff, len);
+		}
+  	}
 
-	default:
-	         count = kiss_esc(p, (unsigned char *)ax->xbuff, len);
-		 break;
-	}
-	
 	set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags);
 	actual = ax->tty->driver->write(ax->tty, ax->xbuff, count);
 	ax->stats.tx_packets++;
@@ -439,8 +526,6 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
 	ax->dev->trans_start = jiffies;
 	ax->xleft = count - actual;
 	ax->xhead = ax->xbuff + actual;
-
-	spin_unlock_bh(&ax->buflock);
 }
 
 /* Encapsulate an AX.25 packet and kick it into a TTY queue. */
@@ -622,7 +707,7 @@ static void ax_setup(struct net_device *dev)
  * best way to fix this is to use a rwlock in the tty struct, but for now we
  * use a single global rwlock for all ttys in ppp line discipline.
  */
-static rwlock_t disc_data_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(disc_data_lock);
 
 static struct mkiss *mkiss_get(struct tty_struct *tty)
 {
@@ -643,6 +728,8 @@ static void mkiss_put(struct mkiss *ax)
 		up(&ax->dead_sem);
 }
 
+static int crc_force = 0;	/* Can be overridden with insmod */
+
 static int mkiss_open(struct tty_struct *tty)
 {
 	struct net_device *dev;
@@ -682,6 +769,33 @@ static int mkiss_open(struct tty_struct *tty)
 	if (register_netdev(dev))
 		goto out_free_buffers;
 
+	/* after register_netdev() - because else printk smashes the kernel */
+	switch (crc_force) {
+	case 3:
+		ax->crcmode  = CRC_MODE_SMACK;
+		printk(KERN_INFO "mkiss: %s: crc mode smack forced.\n",
+		       ax->dev->name);
+		break;
+	case 2:
+		ax->crcmode  = CRC_MODE_FLEX;
+		printk(KERN_INFO "mkiss: %s: crc mode flexnet forced.\n",
+		       ax->dev->name);
+		break;
+	case 1:
+		ax->crcmode  = CRC_MODE_NONE;
+		printk(KERN_INFO "mkiss: %s: crc mode disabled.\n",
+		       ax->dev->name);
+		break;
+	case 0:
+		/* fall through */
+	default:
+		crc_force = 0;
+		printk(KERN_INFO "mkiss: %s: crc mode is auto.\n",
+		       ax->dev->name);
+		ax->crcmode  = CRC_MODE_SMACK_TEST;
+	}
+	ax->crcauto = (crc_force ? 0 : 1);
+
 	netif_start_queue(dev);
 
 	/* Done.  We have linked the TTY line to a channel. */
@@ -765,7 +879,6 @@ static int mkiss_ioctl(struct tty_struct *tty, struct file *file,
 
 	case SIOCSIFHWADDR: {
 		char addr[AX25_ADDR_LEN];
-printk(KERN_INFO "In SIOCSIFHWADDR");
 
 		if (copy_from_user(&addr,
 		                   (void __user *) arg, AX25_ADDR_LEN)) {
@@ -864,6 +977,7 @@ out:
 }
 
 static struct tty_ldisc ax_ldisc = {
+	.owner		= THIS_MODULE,
 	.magic		= TTY_LDISC_MAGIC,
 	.name		= "mkiss",
 	.open		= mkiss_open,
@@ -904,6 +1018,8 @@ static void __exit mkiss_exit_driver(void)
 
 MODULE_AUTHOR("Ralf Baechle DL5RB <ralf@linux-mips.org>");
 MODULE_DESCRIPTION("KISS driver for AX.25 over TTYs");
+MODULE_PARM(crc_force, "i");
+MODULE_PARM_DESC(crc_force, "crc [0 = auto | 1 = none | 2 = flexnet | 3 = smack]");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_LDISC(N_AX25);
 

Some files were not shown because too many files changed in this diff