Эх сурвалжийг харах

Merge master.kernel.org:/pub/scm/linux/kernel/git/davej/cpufreq

* master.kernel.org:/pub/scm/linux/kernel/git/davej/cpufreq:
  [CPUFREQ] sw_any_bug_dmi_table can be used on resume, so it isn't initdata
  [CPUFREQ] Fix some more CPU hotplug locking.
  [CPUFREQ] Workaround for BIOS bug in software coordination of frequency
  [CPUFREQ] Longhaul - Add voltage scaling to driver
  [CPUFREQ] Fix sparse warning in ondemand
  [CPUFREQ] make drivers/cpufreq/cpufreq_ondemand.c:powersave_bias_target() static
  [CPUFREQ] Longhaul - Add ignore_latency option
  [CPUFREQ] Longhaul - Disable arbiter
  [CPUFREQ][2/2] ondemand: updated add powersave_bias tunable
  [CPUFREQ][1/2] ondemand: updated tune for hardware coordination
  [CPUFREQ] Fix typo.
Linus Torvalds 18 жил өмнө
parent
commit
2ee8099f2c

+ 38 - 1
arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c

@@ -32,6 +32,7 @@
 #include <linux/seq_file.h>
 #include <linux/compiler.h>
 #include <linux/sched.h>	/* current */
+#include <linux/dmi.h>
 #include <asm/io.h>
 #include <asm/delay.h>
 #include <asm/uaccess.h>
@@ -387,6 +388,33 @@ static int acpi_cpufreq_early_init_acpi(void)
 	return acpi_processor_preregister_performance(acpi_perf_data);
 }
 
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+
+static int __init sw_any_bug_found(struct dmi_system_id *d)
+{
+	bios_with_sw_any_bug = 1;
+	return 0;
+}
+
+static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = {
+	{
+		.callback = sw_any_bug_found,
+		.ident = "Supermicro Server X6DLP",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+		},
+	},
+	{ }
+};
+
 static int
 acpi_cpufreq_cpu_init (
 	struct cpufreq_policy   *policy)
@@ -422,8 +450,17 @@ acpi_cpufreq_cpu_init (
 	 * coordination is required.
 	 */
 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
-	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
 		policy->cpus = perf->shared_cpu_map;
+	}
+
+#ifdef CONFIG_SMP
+	dmi_check_system(sw_any_bug_dmi_table);
+	if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
+		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+		policy->cpus = cpu_core_map[cpu];
+	}
+#endif
 
 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
 		acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;

+ 123 - 63
arch/i386/kernel/cpu/cpufreq/longhaul.c

@@ -27,6 +27,7 @@
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/cpufreq.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
@@ -52,18 +53,26 @@
 #define	CPU_NEHEMIAH	5
 
 static int cpu_model;
-static unsigned int numscales=16, numvscales;
+static unsigned int numscales=16;
 static unsigned int fsb;
-static int minvid, maxvid;
+
+static struct mV_pos *vrm_mV_table;
+static unsigned char *mV_vrm_table;
+struct f_msr {
+	unsigned char vrm;
+};
+static struct f_msr f_msr_table[32];
+
+static unsigned int highest_speed, lowest_speed; /* kHz */
 static unsigned int minmult, maxmult;
 static int can_scale_voltage;
-static int vrmrev;
 static struct acpi_processor *pr = NULL;
 static struct acpi_processor_cx *cx = NULL;
+static int port22_en;
 
 /* Module parameters */
-static int dont_scale_voltage;
-
+static int scale_voltage;
+static int ignore_latency;
 
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
 
@@ -71,7 +80,6 @@ static int dont_scale_voltage;
 /* Clock ratios multiplied by 10 */
 static int clock_ratio[32];
 static int eblcr_table[32];
-static int voltage_table[32];
 static unsigned int highest_speed, lowest_speed; /* kHz */
 static int longhaul_version;
 static struct cpufreq_frequency_table *longhaul_table;
@@ -124,10 +132,9 @@ static int longhaul_get_cpu_mult(void)
 
 /* For processor with BCR2 MSR */
 
-static void do_longhaul1(int cx_address, unsigned int clock_ratio_index)
+static void do_longhaul1(unsigned int clock_ratio_index)
 {
 	union msr_bcr2 bcr2;
-	u32 t;
 
 	rdmsrl(MSR_VIA_BCR2, bcr2.val);
 	/* Enable software clock multiplier */
@@ -136,13 +143,11 @@ static void do_longhaul1(int cx_address, unsigned int clock_ratio_index)
 
 	/* Sync to timer tick */
 	safe_halt();
-	ACPI_FLUSH_CPU_CACHE();
 	/* Change frequency on next halt or sleep */
 	wrmsrl(MSR_VIA_BCR2, bcr2.val);
-	/* Invoke C3 */
-	inb(cx_address);
-	/* Dummy op - must do something useless after P_LVL3 read */
-	t = inl(acpi_fadt.xpm_tmr_blk.address);
+	/* Invoke transition */
+	ACPI_FLUSH_CPU_CACHE();
+	halt();
 
 	/* Disable software clock multiplier */
 	local_irq_disable();
@@ -164,11 +169,16 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
 	longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
 	longhaul.bits.EnableSoftBusRatio = 1;
 
+	if (can_scale_voltage) {
+		longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm;
+		longhaul.bits.EnableSoftVID = 1;
+	}
+
 	/* Sync to timer tick */
 	safe_halt();
-	ACPI_FLUSH_CPU_CACHE();
 	/* Change frequency on next halt or sleep */
 	wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	ACPI_FLUSH_CPU_CACHE();
 	/* Invoke C3 */
 	inb(cx_address);
 	/* Dummy op - must do something useless after P_LVL3 read */
@@ -227,10 +237,13 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
 	outb(0xFF,0xA1);	/* Overkill */
 	outb(0xFE,0x21);	/* TMR0 only */
 
-	/* Disable bus master arbitration */
-	if (pr->flags.bm_check) {
+	if (pr->flags.bm_control) {
+ 		/* Disable bus master arbitration */
 		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1,
 				  ACPI_MTX_DO_NOT_LOCK);
+	} else if (port22_en) {
+		/* Disable AGP and PCI arbiters */
+		outb(3, 0x22);
 	}
 
 	switch (longhaul_version) {
@@ -244,7 +257,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
 	 */
 	case TYPE_LONGHAUL_V1:
 	case TYPE_LONGHAUL_V2:
-		do_longhaul1(cx->address, clock_ratio_index);
+		do_longhaul1(clock_ratio_index);
 		break;
 
 	/*
@@ -259,14 +272,20 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
 	 * to work in practice.
 	 */
 	case TYPE_POWERSAVER:
+		/* Don't allow wakeup */
+		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0,
+				  ACPI_MTX_DO_NOT_LOCK);
 		do_powersaver(cx->address, clock_ratio_index);
 		break;
 	}
 
-	/* Enable bus master arbitration */
-	if (pr->flags.bm_check) {
+	if (pr->flags.bm_control) {
+		/* Enable bus master arbitration */
 		acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0,
 				  ACPI_MTX_DO_NOT_LOCK);
+	} else if (port22_en) {
+		/* Enable arbiters */
+		outb(0, 0x22);
 	}
 
 	outb(pic2_mask,0xA1);	/* restore mask */
@@ -446,53 +465,57 @@ static int __init longhaul_get_ranges(void)
 static void __init longhaul_setup_voltagescaling(void)
 {
 	union msr_longhaul longhaul;
+	struct mV_pos minvid, maxvid;
+	unsigned int j, speed, pos, kHz_step, numvscales;
 
-	rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
-	if (!(longhaul.bits.RevisionID & 1))
+	rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+	if (!(longhaul.bits.RevisionID & 1)) {
+		printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
 		return;
+	}
+
+	if (!longhaul.bits.VRMRev) {
+		printk (KERN_INFO PFX "VRM 8.5\n");
+		vrm_mV_table = &vrm85_mV[0];
+		mV_vrm_table = &mV_vrm85[0];
+	} else {
+		printk (KERN_INFO PFX "Mobile VRM\n");
+		vrm_mV_table = &mobilevrm_mV[0];
+		mV_vrm_table = &mV_mobilevrm[0];
+	}
 
-	minvid = longhaul.bits.MinimumVID;
-	maxvid = longhaul.bits.MaximumVID;
-	vrmrev = longhaul.bits.VRMRev;
+	minvid = vrm_mV_table[longhaul.bits.MinimumVID];
+	maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
+	numvscales = maxvid.pos - minvid.pos + 1;
+	kHz_step = (highest_speed - lowest_speed) / numvscales;
 
-	if (minvid == 0 || maxvid == 0) {
+	if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
 		printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
 					"Voltage scaling disabled.\n",
-					minvid/1000, minvid%1000, maxvid/1000, maxvid%1000);
+					minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
-	if (minvid == maxvid) {
+	if (minvid.mV == maxvid.mV) {
 		printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are "
 				"both %d.%03d. Voltage scaling disabled\n",
-				maxvid/1000, maxvid%1000);
+				maxvid.mV/1000, maxvid.mV%1000);
 		return;
 	}
 
-	if (vrmrev==0) {
-		dprintk ("VRM 8.5\n");
-		memcpy (voltage_table, vrm85scales, sizeof(voltage_table));
-		numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25;
-	} else {
-		dprintk ("Mobile VRM\n");
-		memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table));
-		numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5;
+	printk(KERN_INFO PFX "Max VID=%d.%03d  Min VID=%d.%03d, %d possible voltage scales\n",
+		maxvid.mV/1000, maxvid.mV%1000,
+		minvid.mV/1000, minvid.mV%1000,
+		numvscales);
+	
+	j = 0;
+	while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
+		speed = longhaul_table[j].frequency;
+		pos = (speed - lowest_speed) / kHz_step + minvid.pos;
+		f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos];
+		j++;
 	}
 
-	/* Current voltage isn't readable at first, so we need to
-	   set it to a known value. The spec says to use maxvid */
-	longhaul.bits.RevisionKey = longhaul.bits.RevisionID;	/* FIXME: This is bad. */
-	longhaul.bits.EnableSoftVID = 1;
-	longhaul.bits.SoftVID = maxvid;
-	wrmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
-	minvid = voltage_table[minvid];
-	maxvid = voltage_table[maxvid];
-
-	dprintk ("Min VID=%d.%03d Max VID=%d.%03d, %d possible voltage scales\n",
-		maxvid/1000, maxvid%1000, minvid/1000, minvid%1000, numvscales);
-
 	can_scale_voltage = 1;
 }
 
@@ -540,21 +563,33 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
 	return 1;
 }
 
+/* VIA don't support PM2 reg, but have something similar */
+static int enable_arbiter_disable(void)
+{
+	struct pci_dev *dev;
+	u8 pci_cmd;
+
+	/* Find PLE133 host bridge */
+	dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, NULL);
+	if (dev != NULL) {
+		/* Enable access to port 0x22 */
+		pci_read_config_byte(dev, 0x78, &pci_cmd);
+		if ( !(pci_cmd & 1<<7) ) {
+			pci_cmd |= 1<<7;
+			pci_write_config_byte(dev, 0x78, pci_cmd);
+		}
+		return 1;
+	}
+	return 0;
+}
+
 static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 {
 	struct cpuinfo_x86 *c = cpu_data;
 	char *cpuname=NULL;
 	int ret;
 
-	/* Check ACPI support for C3 state */
-	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
-			 &longhaul_walk_callback, NULL, (void *)&pr);
-	if (pr == NULL) goto err_acpi;
-
-	cx = &pr->power.states[ACPI_STATE_C3];
-	if (cx->address == 0 || cx->latency > 1000) goto err_acpi;
-
-	/* Now check what we have on this motherboard */
+	/* Check what we have on this motherboard */
 	switch (c->x86_model) {
 	case 6:
 		cpu_model = CPU_SAMUEL;
@@ -636,12 +671,36 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 		break;
 	};
 
+	/* Find ACPI data for processor */
+	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
+			    &longhaul_walk_callback, NULL, (void *)&pr);
+	if (pr == NULL)
+		goto err_acpi;
+
+	if (longhaul_version == TYPE_POWERSAVER) {
+		/* Check ACPI support for C3 state */
+		cx = &pr->power.states[ACPI_STATE_C3];
+		if (cx->address == 0 ||
+		   (cx->latency > 1000 && ignore_latency == 0) )
+			goto err_acpi;
+
+	} else {
+		/* Check ACPI support for bus master arbiter disable */
+		if (!pr->flags.bm_control) {
+			if (!enable_arbiter_disable()) {
+				printk(KERN_ERR PFX "No ACPI support. No VT8601 host bridge. Aborting.\n");
+				return -ENODEV;
+			} else
+				port22_en = 1;
+		}
+	}
+
 	ret = longhaul_get_ranges();
 	if (ret != 0)
 		return ret;
 
 	if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) &&
-		 (dont_scale_voltage==0))
+		 (scale_voltage != 0))
 		longhaul_setup_voltagescaling();
 
 	policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
@@ -729,8 +788,10 @@ static void __exit longhaul_exit(void)
 	kfree(longhaul_table);
 }
 
-module_param (dont_scale_voltage, int, 0644);
-MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor");
+module_param (scale_voltage, int, 0644);
+MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
+module_param(ignore_latency, int, 0644);
+MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test");
 
 MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
 MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
@@ -738,4 +799,3 @@ MODULE_LICENSE ("GPL");
 
 late_initcall(longhaul_init);
 module_exit(longhaul_exit);
-

+ 38 - 10
arch/i386/kernel/cpu/cpufreq/longhaul.h

@@ -450,17 +450,45 @@ static int __initdata nehemiah_c_eblcr[32] = {
  * Voltage scales. Div/Mod by 1000 to get actual voltage.
  * Which scale to use depends on the VRM type in use.
  */
-static int __initdata vrm85scales[32] = {
-	1250, 1200, 1150, 1100, 1050, 1800, 1750, 1700,
-	1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300,
-	1275, 1225, 1175, 1125, 1075, 1825, 1775, 1725,
-	1675, 1625, 1575, 1525, 1475, 1425, 1375, 1325,
+
+struct mV_pos {
+	unsigned short mV;
+	unsigned short pos;
+};
+
+static struct mV_pos __initdata vrm85_mV[32] = {
+	{1250, 8},	{1200, 6},	{1150, 4},	{1100, 2},
+	{1050, 0},	{1800, 30},	{1750, 28},	{1700, 26},
+	{1650, 24},	{1600, 22},	{1550, 20},	{1500, 18},
+	{1450, 16},	{1400, 14},	{1350, 12},	{1300, 10},
+	{1275, 9},	{1225, 7},	{1175, 5},	{1125, 3},
+	{1075, 1},	{1825, 31},	{1775, 29},	{1725, 27},
+	{1675, 25},	{1625, 23},	{1575, 21},	{1525, 19},
+	{1475, 17},	{1425, 15},	{1375, 13},	{1325, 11}
+};
+
+static unsigned char __initdata mV_vrm85[32] = {
+	0x04,	0x14,	0x03,	0x13,	0x02,	0x12,	0x01,	0x11,
+	0x00,	0x10,	0x0f,	0x1f,	0x0e,	0x1e,	0x0d,	0x1d,
+	0x0c,	0x1c,	0x0b,	0x1b,	0x0a,	0x1a,	0x09,	0x19,
+	0x08,	0x18,	0x07,	0x17,	0x06,	0x16,	0x05,	0x15
+};
+
+static struct mV_pos __initdata mobilevrm_mV[32] = {
+	{1750, 31},	{1700, 30},	{1650, 29},	{1600, 28},
+	{1550, 27},	{1500, 26},	{1450, 25},	{1400, 24},
+	{1350, 23},	{1300, 22},	{1250, 21},	{1200, 20},
+	{1150, 19},	{1100, 18},	{1050, 17},	{1000, 16},
+	{975, 15},	{950, 14},	{925, 13},	{900, 12},
+	{875, 11},	{850, 10},	{825, 9},	{800, 8},
+	{775, 7},	{750, 6},	{725, 5},	{700, 4},
+	{675, 3},	{650, 2},	{625, 1},	{600, 0}
 };
 
-static int __initdata mobilevrmscales[32] = {
-	2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
-	1600, 1550, 1500, 1450, 1500, 1350, 1300, -1,
-	1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
-	1075, 1050, 1025, 1000, 975, 950, 925, -1,
+static unsigned char __initdata mV_mobilevrm[32] = {
+	0x1f,	0x1e,	0x1d,	0x1c,	0x1b,	0x1a,	0x19,	0x18,
+	0x17,	0x16,	0x15,	0x14,	0x13,	0x12,	0x11,	0x10,
+	0x0f,	0x0e,	0x0d,	0x0c,	0x0b,	0x0a,	0x09,	0x08,
+	0x07,	0x06,	0x05,	0x04,	0x03,	0x02,	0x01,	0x00
 };
 

+ 41 - 1
arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c

@@ -23,6 +23,7 @@
 
 #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <acpi/processor.h>
 #endif
 
@@ -377,6 +378,35 @@ static int centrino_cpu_early_init_acpi(void)
 	return 0;
 }
 
+
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+static int __init sw_any_bug_found(struct dmi_system_id *d)
+{
+	bios_with_sw_any_bug = 1;
+	return 0;
+}
+
+
+static struct dmi_system_id sw_any_bug_dmi_table[] = {
+	{
+		.callback = sw_any_bug_found,
+		.ident = "Supermicro Server X6DLP",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+			DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+		},
+	},
+	{ }
+};
+
+
 /*
  * centrino_cpu_init_acpi - register with ACPI P-States library
  *
@@ -398,14 +428,24 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
 		dprintk(PFX "obtaining ACPI data failed\n");
 		return -EIO;
 	}
+
 	policy->shared_type = p->shared_type;
 	/*
 	 * Will let policy->cpus know about dependency only when software 
 	 * coordination is required.
 	 */
 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
-	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+	    policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
 		policy->cpus = p->shared_cpu_map;
+	}
+
+#ifdef CONFIG_SMP
+	dmi_check_system(sw_any_bug_dmi_table);
+	if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
+		policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+		policy->cpus = cpu_core_map[cpu];
+	}
+#endif
 
 	/* verify the acpi_data */
 	if (p->state_count <= 1) {

+ 1 - 1
drivers/cpufreq/cpufreq.c

@@ -32,7 +32,7 @@
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "cpufreq-core", msg)
 
 /**
- * The "cpufreq driver" - the arch- or hardware-dependend low
+ * The "cpufreq driver" - the arch- or hardware-dependent low
  * level driver of CPUFreq support, and its spinlock. This lock
  * also protects the cpufreq_cpu_data array.
  */

+ 150 - 23
drivers/cpufreq/cpufreq_ondemand.c

@@ -55,6 +55,10 @@ struct cpu_dbs_info_s {
 	struct cpufreq_policy *cur_policy;
  	struct work_struct work;
 	unsigned int enable;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned int freq_lo;
+	unsigned int freq_lo_jiffies;
+	unsigned int freq_hi_jiffies;
 };
 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
 
@@ -72,15 +76,15 @@ static DEFINE_MUTEX(dbs_mutex);
 
 static struct workqueue_struct	*kondemand_wq;
 
-struct dbs_tuners {
+static struct dbs_tuners {
 	unsigned int sampling_rate;
 	unsigned int up_threshold;
 	unsigned int ignore_nice;
-};
-
-static struct dbs_tuners dbs_tuners_ins = {
+	unsigned int powersave_bias;
+} dbs_tuners_ins = {
 	.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
 	.ignore_nice = 0,
+	.powersave_bias = 0,
 };
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
@@ -96,6 +100,70 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
 	return retval;
 }
 
+/*
+ * Find right freq to be set now with powersave_bias on.
+ * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
+ * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
+ */
+static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
+					  unsigned int freq_next,
+					  unsigned int relation)
+{
+	unsigned int freq_req, freq_reduc, freq_avg;
+	unsigned int freq_hi, freq_lo;
+	unsigned int index = 0;
+	unsigned int jiffies_total, jiffies_hi, jiffies_lo;
+	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu);
+
+	if (!dbs_info->freq_table) {
+		dbs_info->freq_lo = 0;
+		dbs_info->freq_lo_jiffies = 0;
+		return freq_next;
+	}
+
+	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
+			relation, &index);
+	freq_req = dbs_info->freq_table[index].frequency;
+	freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
+	freq_avg = freq_req - freq_reduc;
+
+	/* Find freq bounds for freq_avg in freq_table */
+	index = 0;
+	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
+			CPUFREQ_RELATION_H, &index);
+	freq_lo = dbs_info->freq_table[index].frequency;
+	index = 0;
+	cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
+			CPUFREQ_RELATION_L, &index);
+	freq_hi = dbs_info->freq_table[index].frequency;
+
+	/* Find out how long we have to be in hi and lo freqs */
+	if (freq_hi == freq_lo) {
+		dbs_info->freq_lo = 0;
+		dbs_info->freq_lo_jiffies = 0;
+		return freq_lo;
+	}
+	jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+	jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
+	jiffies_hi += ((freq_hi - freq_lo) / 2);
+	jiffies_hi /= (freq_hi - freq_lo);
+	jiffies_lo = jiffies_total - jiffies_hi;
+	dbs_info->freq_lo = freq_lo;
+	dbs_info->freq_lo_jiffies = jiffies_lo;
+	dbs_info->freq_hi_jiffies = jiffies_hi;
+	return freq_hi;
+}
+
+static void ondemand_powersave_bias_init(void)
+{
+	int i;
+	for_each_online_cpu(i) {
+		struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i);
+		dbs_info->freq_table = cpufreq_frequency_get_table(i);
+		dbs_info->freq_lo = 0;
+	}
+}
+
 /************************** sysfs interface ************************/
 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
 {
@@ -124,6 +192,7 @@ static ssize_t show_##file_name						\
 show_one(sampling_rate, sampling_rate);
 show_one(up_threshold, up_threshold);
 show_one(ignore_nice_load, ignore_nice);
+show_one(powersave_bias, powersave_bias);
 
 static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
 		const char *buf, size_t count)
@@ -198,6 +267,27 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
 	return count;
 }
 
+static ssize_t store_powersave_bias(struct cpufreq_policy *unused,
+		const char *buf, size_t count)
+{
+	unsigned int input;
+	int ret;
+	ret = sscanf(buf, "%u", &input);
+
+	if (ret != 1)
+		return -EINVAL;
+
+	if (input > 1000)
+		input = 1000;
+
+	mutex_lock(&dbs_mutex);
+	dbs_tuners_ins.powersave_bias = input;
+	ondemand_powersave_bias_init();
+	mutex_unlock(&dbs_mutex);
+
+	return count;
+}
+
 #define define_one_rw(_name) \
 static struct freq_attr _name = \
 __ATTR(_name, 0644, show_##_name, store_##_name)
@@ -205,6 +295,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name)
 define_one_rw(sampling_rate);
 define_one_rw(up_threshold);
 define_one_rw(ignore_nice_load);
+define_one_rw(powersave_bias);
 
 static struct attribute * dbs_attributes[] = {
 	&sampling_rate_max.attr,
@@ -212,6 +303,7 @@ static struct attribute * dbs_attributes[] = {
 	&sampling_rate.attr,
 	&up_threshold.attr,
 	&ignore_nice_load.attr,
+	&powersave_bias.attr,
 	NULL
 };
 
@@ -234,6 +326,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 	if (!this_dbs_info->enable)
 		return;
 
+	this_dbs_info->freq_lo = 0;
 	policy = this_dbs_info->cur_policy;
 	cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
 	total_ticks = (unsigned int) cputime64_sub(cur_jiffies,
@@ -274,11 +367,18 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 	/* Check for frequency increase */
 	if (load > dbs_tuners_ins.up_threshold) {
 		/* if we are already at full speed then break out early */
-		if (policy->cur == policy->max)
-			return;
-
-		__cpufreq_driver_target(policy, policy->max,
-			CPUFREQ_RELATION_H);
+		if (!dbs_tuners_ins.powersave_bias) {
+			if (policy->cur == policy->max)
+				return;
+
+			__cpufreq_driver_target(policy, policy->max,
+				CPUFREQ_RELATION_H);
+		} else {
+			int freq = powersave_bias_target(policy, policy->max,
+					CPUFREQ_RELATION_H);
+			__cpufreq_driver_target(policy, freq,
+				CPUFREQ_RELATION_L);
+		}
 		return;
 	}
 
@@ -293,37 +393,64 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
 	 * policy. To be safe, we focus 10 points under the threshold.
 	 */
 	if (load < (dbs_tuners_ins.up_threshold - 10)) {
-		unsigned int freq_next;
-		freq_next = (policy->cur * load) /
+		unsigned int freq_next = (policy->cur * load) /
 			(dbs_tuners_ins.up_threshold - 10);
-
-		__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
+		if (!dbs_tuners_ins.powersave_bias) {
+			__cpufreq_driver_target(policy, freq_next,
+					CPUFREQ_RELATION_L);
+		} else {
+			int freq = powersave_bias_target(policy, freq_next,
+					CPUFREQ_RELATION_L);
+			__cpufreq_driver_target(policy, freq,
+				CPUFREQ_RELATION_L);
+		}
 	}
 }
 
+/* Sampling types */
+enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
+
 static void do_dbs_timer(void *data)
 {
 	unsigned int cpu = smp_processor_id();
 	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+	/* We want all CPUs to do sampling nearly on same jiffy */
+	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+	delay -= jiffies % delay;
 
 	if (!dbs_info->enable)
 		return;
-
-	lock_cpu_hotplug();
-	dbs_check_cpu(dbs_info);
-	unlock_cpu_hotplug();
-	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
-			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
+	/* Common NORMAL_SAMPLE setup */
+	INIT_WORK(&dbs_info->work, do_dbs_timer, (void *)DBS_NORMAL_SAMPLE);
+	if (!dbs_tuners_ins.powersave_bias ||
+	    (unsigned long) data == DBS_NORMAL_SAMPLE) {
+		lock_cpu_hotplug();
+		dbs_check_cpu(dbs_info);
+		unlock_cpu_hotplug();
+		if (dbs_info->freq_lo) {
+			/* Setup timer for SUB_SAMPLE */
+			INIT_WORK(&dbs_info->work, do_dbs_timer,
+					(void *)DBS_SUB_SAMPLE);
+			delay = dbs_info->freq_hi_jiffies;
+		}
+	} else {
+		__cpufreq_driver_target(dbs_info->cur_policy,
+	                        	dbs_info->freq_lo,
+	                        	CPUFREQ_RELATION_H);
+	}
+	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
 static inline void dbs_timer_init(unsigned int cpu)
 {
 	struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+	/* We want all CPUs to do sampling nearly on same jiffy */
+	int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+	delay -= jiffies % delay;
 
-	INIT_WORK(&dbs_info->work, do_dbs_timer, 0);
-	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
-			usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
-	return;
+	ondemand_powersave_bias_init();
+	INIT_WORK(&dbs_info->work, do_dbs_timer, NULL);
+	queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)

+ 0 - 2
drivers/cpufreq/cpufreq_stats.c

@@ -350,12 +350,10 @@ __init cpufreq_stats_init(void)
 	}
 
 	register_hotcpu_notifier(&cpufreq_stat_cpu_notifier);
-	lock_cpu_hotplug();
 	for_each_online_cpu(cpu) {
 		cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_ONLINE,
 			(void *)(long)cpu);
 	}
-	unlock_cpu_hotplug();
 	return 0;
 }
 static void