Forráskód Böngészése

Merge branch 'pm-devfreq' into pm-for-linus

* pm-devfreq:
  PM / devfreq: Add basic governors
  PM / devfreq: Add common sysfs interfaces
  PM: Introduce devfreq: generic DVFS framework with device-specific OPPs
  PM / OPP: Add OPP availability change notifier.
Rafael J. Wysocki 13 éve
szülő
commit
7811ac276b

+ 52 - 0
Documentation/ABI/testing/sysfs-class-devfreq

@@ -0,0 +1,52 @@
+What:		/sys/class/devfreq/.../
+Date:		September 2011
+Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+		Provide a place in sysfs for the devfreq objects.
+		This allows accessing various devfreq specific variables.
+		The name of devfreq object denoted as ... is same as the
+		name of device using devfreq.
+
+What:		/sys/class/devfreq/.../governor
+Date:		September 2011
+Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+		The /sys/class/devfreq/.../governor shows the name of the
+		governor used by the corresponding devfreq object.
+
+What:		/sys/class/devfreq/.../cur_freq
+Date:		September 2011
+Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+		The /sys/class/devfreq/.../cur_freq shows the current
+		frequency of the corresponding devfreq object.
+
+What:		/sys/class/devfreq/.../central_polling
+Date:		September 2011
+Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+		The /sys/class/devfreq/.../central_polling shows whether
+		the devfreq ojbect is using devfreq-provided central
+		polling mechanism or not.
+
+What:		/sys/class/devfreq/.../polling_interval
+Date:		September 2011
+Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+		The /sys/class/devfreq/.../polling_interval shows and sets
+		the requested polling interval of the corresponding devfreq
+		object. The values are represented in ms. If the value is
+		less than 1 jiffy, it is considered to be 0, which means
+		no polling. This value is meaningless if the governor is
+		not polling; thus. If the governor is not using
+		devfreq-provided central polling
+		(/sys/class/devfreq/.../central_polling is 0), this value
+		may be useless.
+
+What:		/sys/class/devfreq/.../userspace/set_freq
+Date:		September 2011
+Contact:	MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+		The /sys/class/devfreq/.../userspace/set_freq shows and
+		sets the requested frequency for the devfreq object if
+		userspace governor is in effect.

+ 2 - 0
drivers/Kconfig

@@ -130,4 +130,6 @@ source "drivers/iommu/Kconfig"
 
 source "drivers/virt/Kconfig"
 
+source "drivers/devfreq/Kconfig"
+
 endmenu

+ 2 - 0
drivers/Makefile

@@ -127,3 +127,5 @@ obj-$(CONFIG_IOMMU_SUPPORT)	+= iommu/
 
 # Virtualization drivers
 obj-$(CONFIG_VIRT_DRIVERS)	+= virt/
+
+obj-$(CONFIG_PM_DEVFREQ)	+= devfreq/

+ 30 - 0
drivers/base/power/opp.c

@@ -73,6 +73,7 @@ struct opp {
  *		RCU usage: nodes are not modified in the list of device_opp,
  *		however addition is possible and is secured by dev_opp_list_lock
  * @dev:	device pointer
+ * @head:	notifier head to notify the OPP availability changes.
  * @opp_list:	list of opps
  *
  * This is an internal data structure maintaining the link to opps attached to
@@ -83,6 +84,7 @@ struct device_opp {
 	struct list_head node;
 
 	struct device *dev;
+	struct srcu_notifier_head head;
 	struct list_head opp_list;
 };
 
@@ -404,6 +406,7 @@ int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 		}
 
 		dev_opp->dev = dev;
+		srcu_init_notifier_head(&dev_opp->head);
 		INIT_LIST_HEAD(&dev_opp->opp_list);
 
 		/* Secure the device list modification */
@@ -428,6 +431,11 @@ int opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 	list_add_rcu(&new_opp->node, head);
 	mutex_unlock(&dev_opp_list_lock);
 
+	/*
+	 * Notify the changes in the availability of the operable
+	 * frequency/voltage list.
+	 */
+	srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ADD, new_opp);
 	return 0;
 }
 
@@ -504,6 +512,14 @@ static int opp_set_availability(struct device *dev, unsigned long freq,
 	mutex_unlock(&dev_opp_list_lock);
 	synchronize_rcu();
 
+	/* Notify the change of the OPP availability */
+	if (availability_req)
+		srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_ENABLE,
+					 new_opp);
+	else
+		srcu_notifier_call_chain(&dev_opp->head, OPP_EVENT_DISABLE,
+					 new_opp);
+
 	/* clean up old opp */
 	new_opp = opp;
 	goto out;
@@ -643,3 +659,17 @@ void opp_free_cpufreq_table(struct device *dev,
 	*table = NULL;
 }
 #endif		/* CONFIG_CPU_FREQ */
+
+/**
+ * opp_get_notifier() - find notifier_head of the device with opp
+ * @dev:	device pointer used to lookup device OPPs.
+ */
+struct srcu_notifier_head *opp_get_notifier(struct device *dev)
+{
+	struct device_opp *dev_opp = find_device_opp(dev);
+
+	if (IS_ERR(dev_opp))
+		return ERR_PTR(PTR_ERR(dev_opp)); /* matching type */
+
+	return &dev_opp->head;
+}

+ 75 - 0
drivers/devfreq/Kconfig

@@ -0,0 +1,75 @@
+config ARCH_HAS_DEVFREQ
+	bool
+	depends on ARCH_HAS_OPP
+	help
+	  Denotes that the architecture supports DEVFREQ. If the architecture
+	  supports multiple OPP entries per device and the frequency of the
+	  devices with OPPs may be altered dynamically, the architecture
+	  supports DEVFREQ.
+
+menuconfig PM_DEVFREQ
+	bool "Generic Dynamic Voltage and Frequency Scaling (DVFS) support"
+	depends on PM_OPP && ARCH_HAS_DEVFREQ
+	help
+	  With OPP support, a device may have a list of frequencies and
+	  voltages available. DEVFREQ, a generic DVFS framework can be
+	  registered for a device with OPP support in order to let the
+	  governor provided to DEVFREQ choose an operating frequency
+	  based on the OPP's list and the policy given with DEVFREQ.
+
+	  Each device may have its own governor and policy. DEVFREQ can
+	  reevaluate the device state periodically and/or based on the
+	  OPP list changes (each frequency/voltage pair in OPP may be
+	  disabled or enabled).
+
+	  Like some CPUs with CPUFREQ, a device may have multiple clocks.
+	  However, because the clock frequencies of a single device are
+	  determined by the single device's state, an instance of DEVFREQ
+	  is attached to a single device and returns a "representative"
+	  clock frequency from the OPP of the device, which is also attached
+	  to a device by 1-to-1. The device registering DEVFREQ takes the
+	  responsiblity to "interpret" the frequency listed in OPP and
+	  to set its every clock accordingly with the "target" callback
+	  given to DEVFREQ.
+
+if PM_DEVFREQ
+
+comment "DEVFREQ Governors"
+
+config DEVFREQ_GOV_SIMPLE_ONDEMAND
+	bool "Simple Ondemand"
+	help
+	  Chooses frequency based on the recent load on the device. Works
+	  similar as ONDEMAND governor of CPUFREQ does. A device with
+	  Simple-Ondemand should be able to provide busy/total counter
+	  values that imply the usage rate. A device may provide tuned
+	  values to the governor with data field at devfreq_add_device().
+
+config DEVFREQ_GOV_PERFORMANCE
+	bool "Performance"
+	help
+	  Sets the frequency at the maximum available frequency.
+	  This governor always returns UINT_MAX as frequency so that
+	  the DEVFREQ framework returns the highest frequency available
+	  at any time.
+
+config DEVFREQ_GOV_POWERSAVE
+	bool "Powersave"
+	help
+	  Sets the frequency at the minimum available frequency.
+	  This governor always returns 0 as frequency so that
+	  the DEVFREQ framework returns the lowest frequency available
+	  at any time.
+
+config DEVFREQ_GOV_USERSPACE
+	bool "Userspace"
+	help
+	  Sets the frequency at the user specified one.
+	  This governor returns the user configured frequency if there
+	  has been an input to /sys/devices/.../power/devfreq_set_freq.
+	  Otherwise, the governor does not change the frequnecy
+	  given at the initialization.
+
+comment "DEVFREQ Drivers"
+
+endif # PM_DEVFREQ

+ 5 - 0
drivers/devfreq/Makefile

@@ -0,0 +1,5 @@
+obj-$(CONFIG_PM_DEVFREQ)	+= devfreq.o
+obj-$(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)	+= governor_simpleondemand.o
+obj-$(CONFIG_DEVFREQ_GOV_PERFORMANCE)	+= governor_performance.o
+obj-$(CONFIG_DEVFREQ_GOV_POWERSAVE)	+= governor_powersave.o
+obj-$(CONFIG_DEVFREQ_GOV_USERSPACE)	+= governor_userspace.o

+ 601 - 0
drivers/devfreq/devfreq.c

@@ -0,0 +1,601 @@
+/*
+ * devfreq: Generic Dynamic Voltage and Frequency Scaling (DVFS) Framework
+ *	    for Non-CPU Devices.
+ *
+ * Copyright (C) 2011 Samsung Electronics
+ *	MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/opp.h>
+#include <linux/devfreq.h>
+#include <linux/workqueue.h>
+#include <linux/platform_device.h>
+#include <linux/list.h>
+#include <linux/printk.h>
+#include <linux/hrtimer.h>
+#include "governor.h"
+
+struct class *devfreq_class;
+
+/*
+ * devfreq_work periodically monitors every registered device.
+ * The minimum polling interval is one jiffy. The polling interval is
+ * determined by the minimum polling period among all polling devfreq
+ * devices. The resolution of polling interval is one jiffy.
+ */
+static bool polling;
+static struct workqueue_struct *devfreq_wq;
+static struct delayed_work devfreq_work;
+
+/* wait removing if this is to be removed */
+static struct devfreq *wait_remove_device;
+
+/* The list of all device-devfreq */
+static LIST_HEAD(devfreq_list);
+static DEFINE_MUTEX(devfreq_list_lock);
+
+/**
+ * find_device_devfreq() - find devfreq struct using device pointer
+ * @dev:	device pointer used to lookup device devfreq.
+ *
+ * Search the list of device devfreqs and return the matched device's
+ * devfreq info. devfreq_list_lock should be held by the caller.
+ */
+static struct devfreq *find_device_devfreq(struct device *dev)
+{
+	struct devfreq *tmp_devfreq;
+
+	if (unlikely(IS_ERR_OR_NULL(dev))) {
+		pr_err("DEVFREQ: %s: Invalid parameters\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+	WARN(!mutex_is_locked(&devfreq_list_lock),
+	     "devfreq_list_lock must be locked.");
+
+	list_for_each_entry(tmp_devfreq, &devfreq_list, node) {
+		if (tmp_devfreq->dev.parent == dev)
+			return tmp_devfreq;
+	}
+
+	return ERR_PTR(-ENODEV);
+}
+
+/**
+ * update_devfreq() - Reevaluate the device and configure frequency.
+ * @devfreq:	the devfreq instance.
+ *
+ * Note: Lock devfreq->lock before calling update_devfreq
+ *	 This function is exported for governors.
+ */
+int update_devfreq(struct devfreq *devfreq)
+{
+	unsigned long freq;
+	int err = 0;
+
+	if (!mutex_is_locked(&devfreq->lock)) {
+		WARN(true, "devfreq->lock must be locked by the caller.\n");
+		return -EINVAL;
+	}
+
+	/* Reevaluate the proper frequency */
+	err = devfreq->governor->get_target_freq(devfreq, &freq);
+	if (err)
+		return err;
+
+	err = devfreq->profile->target(devfreq->dev.parent, &freq);
+	if (err)
+		return err;
+
+	devfreq->previous_freq = freq;
+	return err;
+}
+
+/**
+ * devfreq_notifier_call() - Notify that the device frequency requirements
+ *			   has been changed out of devfreq framework.
+ * @nb		the notifier_block (supposed to be devfreq->nb)
+ * @type	not used
+ * @devp	not used
+ *
+ * Called by a notifier that uses devfreq->nb.
+ */
+static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
+				 void *devp)
+{
+	struct devfreq *devfreq = container_of(nb, struct devfreq, nb);
+	int ret;
+
+	mutex_lock(&devfreq->lock);
+	ret = update_devfreq(devfreq);
+	mutex_unlock(&devfreq->lock);
+
+	return ret;
+}
+
+/**
+ * _remove_devfreq() - Remove devfreq from the device.
+ * @devfreq:	the devfreq struct
+ * @skip:	skip calling device_unregister().
+ *
+ * Note that the caller should lock devfreq->lock before calling
+ * this. _remove_devfreq() will unlock it and free devfreq
+ * internally. devfreq_list_lock should be locked by the caller
+ * as well (not relased at return)
+ *
+ * Lock usage:
+ * devfreq->lock: locked before call.
+ *		  unlocked at return (and freed)
+ * devfreq_list_lock: locked before call.
+ *		      kept locked at return.
+ *		      if devfreq is centrally polled.
+ *
+ * Freed memory:
+ * devfreq
+ */
+static void _remove_devfreq(struct devfreq *devfreq, bool skip)
+{
+	if (!mutex_is_locked(&devfreq->lock)) {
+		WARN(true, "devfreq->lock must be locked by the caller.\n");
+		return;
+	}
+	if (!devfreq->governor->no_central_polling &&
+	    !mutex_is_locked(&devfreq_list_lock)) {
+		WARN(true, "devfreq_list_lock must be locked by the caller.\n");
+		return;
+	}
+
+	if (devfreq->being_removed)
+		return;
+
+	devfreq->being_removed = true;
+
+	if (devfreq->profile->exit)
+		devfreq->profile->exit(devfreq->dev.parent);
+
+	if (devfreq->governor->exit)
+		devfreq->governor->exit(devfreq);
+
+	if (!skip && get_device(&devfreq->dev)) {
+		device_unregister(&devfreq->dev);
+		put_device(&devfreq->dev);
+	}
+
+	if (!devfreq->governor->no_central_polling)
+		list_del(&devfreq->node);
+
+	mutex_unlock(&devfreq->lock);
+	mutex_destroy(&devfreq->lock);
+
+	kfree(devfreq);
+}
+
+/**
+ * devfreq_dev_release() - Callback for struct device to release the device.
+ * @dev:	the devfreq device
+ *
+ * This calls _remove_devfreq() if _remove_devfreq() is not called.
+ * Note that devfreq_dev_release() could be called by _remove_devfreq() as
+ * well as by others unregistering the device.
+ */
+static void devfreq_dev_release(struct device *dev)
+{
+	struct devfreq *devfreq = to_devfreq(dev);
+	bool central_polling = !devfreq->governor->no_central_polling;
+
+	/*
+	 * If devfreq_dev_release() was called by device_unregister() of
+	 * _remove_devfreq(), we cannot mutex_lock(&devfreq->lock) and
+	 * being_removed is already set. This also partially checks the case
+	 * where devfreq_dev_release() is called from a thread other than
+	 * the one called _remove_devfreq(); however, this case is
+	 * dealt completely with another following being_removed check.
+	 *
+	 * Because being_removed is never being
+	 * unset, we do not need to worry about race conditions on
+	 * being_removed.
+	 */
+	if (devfreq->being_removed)
+		return;
+
+	if (central_polling)
+		mutex_lock(&devfreq_list_lock);
+
+	mutex_lock(&devfreq->lock);
+
+	/*
+	 * Check being_removed flag again for the case where
+	 * devfreq_dev_release() was called in a thread other than the one
+	 * possibly called _remove_devfreq().
+	 */
+	if (devfreq->being_removed) {
+		mutex_unlock(&devfreq->lock);
+		goto out;
+	}
+
+	/* devfreq->lock is unlocked and removed in _removed_devfreq() */
+	_remove_devfreq(devfreq, true);
+
+out:
+	if (central_polling)
+		mutex_unlock(&devfreq_list_lock);
+}
+
+/**
+ * devfreq_monitor() - Periodically poll devfreq objects.
+ * @work: the work struct used to run devfreq_monitor periodically.
+ *
+ */
+static void devfreq_monitor(struct work_struct *work)
+{
+	static unsigned long last_polled_at;
+	struct devfreq *devfreq, *tmp;
+	int error;
+	unsigned long jiffies_passed;
+	unsigned long next_jiffies = ULONG_MAX, now = jiffies;
+	struct device *dev;
+
+	/* Initially last_polled_at = 0, polling every device at bootup */
+	jiffies_passed = now - last_polled_at;
+	last_polled_at = now;
+	if (jiffies_passed == 0)
+		jiffies_passed = 1;
+
+	mutex_lock(&devfreq_list_lock);
+	list_for_each_entry_safe(devfreq, tmp, &devfreq_list, node) {
+		mutex_lock(&devfreq->lock);
+		dev = devfreq->dev.parent;
+
+		/* Do not remove tmp for a while */
+		wait_remove_device = tmp;
+
+		if (devfreq->governor->no_central_polling ||
+		    devfreq->next_polling == 0) {
+			mutex_unlock(&devfreq->lock);
+			continue;
+		}
+		mutex_unlock(&devfreq_list_lock);
+
+		/*
+		 * Reduce more next_polling if devfreq_wq took an extra
+		 * delay. (i.e., CPU has been idled.)
+		 */
+		if (devfreq->next_polling <= jiffies_passed) {
+			error = update_devfreq(devfreq);
+
+			/* Remove a devfreq with an error. */
+			if (error && error != -EAGAIN) {
+
+				dev_err(dev, "Due to update_devfreq error(%d), devfreq(%s) is removed from the device\n",
+					error, devfreq->governor->name);
+
+				/*
+				 * Unlock devfreq before locking the list
+				 * in order to avoid deadlock with
+				 * find_device_devfreq or others
+				 */
+				mutex_unlock(&devfreq->lock);
+				mutex_lock(&devfreq_list_lock);
+				/* Check if devfreq is already removed */
+				if (IS_ERR(find_device_devfreq(dev)))
+					continue;
+				mutex_lock(&devfreq->lock);
+				/* This unlocks devfreq->lock and free it */
+				_remove_devfreq(devfreq, false);
+				continue;
+			}
+			devfreq->next_polling = devfreq->polling_jiffies;
+		} else {
+			devfreq->next_polling -= jiffies_passed;
+		}
+
+		if (devfreq->next_polling)
+			next_jiffies = (next_jiffies > devfreq->next_polling) ?
+					devfreq->next_polling : next_jiffies;
+
+		mutex_unlock(&devfreq->lock);
+		mutex_lock(&devfreq_list_lock);
+	}
+	wait_remove_device = NULL;
+	mutex_unlock(&devfreq_list_lock);
+
+	if (next_jiffies > 0 && next_jiffies < ULONG_MAX) {
+		polling = true;
+		queue_delayed_work(devfreq_wq, &devfreq_work, next_jiffies);
+	} else {
+		polling = false;
+	}
+}
+
+/**
+ * devfreq_add_device() - Add devfreq feature to the device
+ * @dev:	the device to add devfreq feature.
+ * @profile:	device-specific profile to run devfreq.
+ * @governor:	the policy to choose frequency.
+ * @data:	private data for the governor. The devfreq framework does not
+ *		touch this value.
+ */
+struct devfreq *devfreq_add_device(struct device *dev,
+				   struct devfreq_dev_profile *profile,
+				   const struct devfreq_governor *governor,
+				   void *data)
+{
+	struct devfreq *devfreq;
+	int err = 0;
+
+	if (!dev || !profile || !governor) {
+		dev_err(dev, "%s: Invalid parameters.\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+
+
+	if (!governor->no_central_polling) {
+		mutex_lock(&devfreq_list_lock);
+		devfreq = find_device_devfreq(dev);
+		mutex_unlock(&devfreq_list_lock);
+		if (!IS_ERR(devfreq)) {
+			dev_err(dev, "%s: Unable to create devfreq for the device. It already has one.\n", __func__);
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	devfreq = kzalloc(sizeof(struct devfreq), GFP_KERNEL);
+	if (!devfreq) {
+		dev_err(dev, "%s: Unable to create devfreq for the device\n",
+			__func__);
+		err = -ENOMEM;
+		goto out;
+	}
+
+	mutex_init(&devfreq->lock);
+	mutex_lock(&devfreq->lock);
+	devfreq->dev.parent = dev;
+	devfreq->dev.class = devfreq_class;
+	devfreq->dev.release = devfreq_dev_release;
+	devfreq->profile = profile;
+	devfreq->governor = governor;
+	devfreq->previous_freq = profile->initial_freq;
+	devfreq->data = data;
+	devfreq->next_polling = devfreq->polling_jiffies
+			      = msecs_to_jiffies(devfreq->profile->polling_ms);
+	devfreq->nb.notifier_call = devfreq_notifier_call;
+
+	dev_set_name(&devfreq->dev, dev_name(dev));
+	err = device_register(&devfreq->dev);
+	if (err) {
+		put_device(&devfreq->dev);
+		goto err_dev;
+	}
+
+	if (governor->init)
+		err = governor->init(devfreq);
+	if (err)
+		goto err_init;
+
+	mutex_unlock(&devfreq->lock);
+
+	if (governor->no_central_polling)
+		goto out;
+
+	mutex_lock(&devfreq_list_lock);
+
+	list_add(&devfreq->node, &devfreq_list);
+
+	if (devfreq_wq && devfreq->next_polling && !polling) {
+		polling = true;
+		queue_delayed_work(devfreq_wq, &devfreq_work,
+				   devfreq->next_polling);
+	}
+	mutex_unlock(&devfreq_list_lock);
+	goto out;
+err_init:
+	device_unregister(&devfreq->dev);
+err_dev:
+	mutex_unlock(&devfreq->lock);
+	kfree(devfreq);
+out:
+	if (err)
+		return ERR_PTR(err);
+	else
+		return devfreq;
+}
+
+/**
+ * devfreq_remove_device() - Remove devfreq feature from a device.
+ * @devfreq	the devfreq instance to be removed
+ */
+int devfreq_remove_device(struct devfreq *devfreq)
+{
+	if (!devfreq)
+		return -EINVAL;
+
+	if (!devfreq->governor->no_central_polling) {
+		mutex_lock(&devfreq_list_lock);
+		while (wait_remove_device == devfreq) {
+			mutex_unlock(&devfreq_list_lock);
+			schedule();
+			mutex_lock(&devfreq_list_lock);
+		}
+	}
+
+	mutex_lock(&devfreq->lock);
+	_remove_devfreq(devfreq, false); /* it unlocks devfreq->lock */
+
+	if (!devfreq->governor->no_central_polling)
+		mutex_unlock(&devfreq_list_lock);
+
+	return 0;
+}
+
+static ssize_t show_governor(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%s\n", to_devfreq(dev)->governor->name);
+}
+
+static ssize_t show_freq(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%lu\n", to_devfreq(dev)->previous_freq);
+}
+
+static ssize_t show_polling_interval(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", to_devfreq(dev)->profile->polling_ms);
+}
+
+static ssize_t store_polling_interval(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct devfreq *df = to_devfreq(dev);
+	unsigned int value;
+	int ret;
+
+	ret = sscanf(buf, "%u", &value);
+	if (ret != 1)
+		goto out;
+
+	mutex_lock(&df->lock);
+	df->profile->polling_ms = value;
+	df->next_polling = df->polling_jiffies
+			 = msecs_to_jiffies(value);
+	mutex_unlock(&df->lock);
+
+	ret = count;
+
+	if (df->governor->no_central_polling)
+		goto out;
+
+	mutex_lock(&devfreq_list_lock);
+	if (df->next_polling > 0 && !polling) {
+		polling = true;
+		queue_delayed_work(devfreq_wq, &devfreq_work,
+				   df->next_polling);
+	}
+	mutex_unlock(&devfreq_list_lock);
+out:
+	return ret;
+}
+
+static ssize_t show_central_polling(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n",
+		       !to_devfreq(dev)->governor->no_central_polling);
+}
+
+static struct device_attribute devfreq_attrs[] = {
+	__ATTR(governor, S_IRUGO, show_governor, NULL),
+	__ATTR(cur_freq, S_IRUGO, show_freq, NULL),
+	__ATTR(central_polling, S_IRUGO, show_central_polling, NULL),
+	__ATTR(polling_interval, S_IRUGO | S_IWUSR, show_polling_interval,
+	       store_polling_interval),
+	{ },
+};
+
+/**
+ * devfreq_start_polling() - Initialize data structure for devfreq framework and
+ *			   start polling registered devfreq devices.
+ */
+static int __init devfreq_start_polling(void)
+{
+	mutex_lock(&devfreq_list_lock);
+	polling = false;
+	devfreq_wq = create_freezable_workqueue("devfreq_wq");
+	INIT_DELAYED_WORK_DEFERRABLE(&devfreq_work, devfreq_monitor);
+	mutex_unlock(&devfreq_list_lock);
+
+	devfreq_monitor(&devfreq_work.work);
+	return 0;
+}
+late_initcall(devfreq_start_polling);
+
+static int __init devfreq_init(void)
+{
+	devfreq_class = class_create(THIS_MODULE, "devfreq");
+	if (IS_ERR(devfreq_class)) {
+		pr_err("%s: couldn't create class\n", __FILE__);
+		return PTR_ERR(devfreq_class);
+	}
+	devfreq_class->dev_attrs = devfreq_attrs;
+	return 0;
+}
+subsys_initcall(devfreq_init);
+
+static void __exit devfreq_exit(void)
+{
+	class_destroy(devfreq_class);
+}
+module_exit(devfreq_exit);
+
+/*
+ * The followings are helper functions for devfreq user device drivers with
+ * OPP framework.
+ */
+
+/**
+ * devfreq_recommended_opp() - Helper function to get proper OPP for the
+ *			     freq value given to target callback.
+ * @dev		The devfreq user device. (parent of devfreq)
+ * @freq	The frequency given to target function
+ *
+ */
+struct opp *devfreq_recommended_opp(struct device *dev, unsigned long *freq)
+{
+	struct opp *opp = opp_find_freq_ceil(dev, freq);
+
+	if (opp == ERR_PTR(-ENODEV))
+		opp = opp_find_freq_floor(dev, freq);
+	return opp;
+}
+
+/**
+ * devfreq_register_opp_notifier() - Helper function to get devfreq notified
+ *				   for any changes in the OPP availability
+ *				   changes
+ * @dev		The devfreq user device. (parent of devfreq)
+ * @devfreq	The devfreq object.
+ */
+int devfreq_register_opp_notifier(struct device *dev, struct devfreq *devfreq)
+{
+	struct srcu_notifier_head *nh = opp_get_notifier(dev);
+
+	if (IS_ERR(nh))
+		return PTR_ERR(nh);
+	return srcu_notifier_chain_register(nh, &devfreq->nb);
+}
+
+/**
+ * devfreq_unregister_opp_notifier() - Helper function to stop getting devfreq
+ *				     notified for any changes in the OPP
+ *				     availability changes anymore.
+ * @dev		The devfreq user device. (parent of devfreq)
+ * @devfreq	The devfreq object.
+ *
+ * At exit() callback of devfreq_dev_profile, this must be included if
+ * devfreq_recommended_opp is used.
+ */
+int devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq)
+{
+	struct srcu_notifier_head *nh = opp_get_notifier(dev);
+
+	if (IS_ERR(nh))
+		return PTR_ERR(nh);
+	return srcu_notifier_chain_unregister(nh, &devfreq->nb);
+}
+
+MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
+MODULE_DESCRIPTION("devfreq class support");
+MODULE_LICENSE("GPL");

+ 24 - 0
drivers/devfreq/governor.h

@@ -0,0 +1,24 @@
+/*
+ * governor.h - internal header for devfreq governors.
+ *
+ * Copyright (C) 2011 Samsung Electronics
+ *	MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This header is for devfreq governors in drivers/devfreq/
+ */
+
+#ifndef _GOVERNOR_H
+#define _GOVERNOR_H
+
+#include <linux/devfreq.h>
+
+#define to_devfreq(DEV)	container_of((DEV), struct devfreq, dev)
+
+/* Caution: devfreq->lock must be locked before calling update_devfreq */
+extern int update_devfreq(struct devfreq *devfreq);
+
+#endif /* _GOVERNOR_H */

+ 29 - 0
drivers/devfreq/governor_performance.c

@@ -0,0 +1,29 @@
+/*
+ *  linux/drivers/devfreq/governor_performance.c
+ *
+ *  Copyright (C) 2011 Samsung Electronics
+ *	MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/devfreq.h>
+
+static int devfreq_performance_func(struct devfreq *df,
+				    unsigned long *freq)
+{
+	/*
+	 * target callback should be able to get floor value as
+	 * said in devfreq.h
+	 */
+	*freq = UINT_MAX;
+	return 0;
+}
+
+const struct devfreq_governor devfreq_performance = {
+	.name = "performance",
+	.get_target_freq = devfreq_performance_func,
+	.no_central_polling = true,
+};

+ 29 - 0
drivers/devfreq/governor_powersave.c

@@ -0,0 +1,29 @@
+/*
+ *  linux/drivers/devfreq/governor_powersave.c
+ *
+ *  Copyright (C) 2011 Samsung Electronics
+ *	MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/devfreq.h>
+
+static int devfreq_powersave_func(struct devfreq *df,
+				  unsigned long *freq)
+{
+	/*
+	 * target callback should be able to get ceiling value as
+	 * said in devfreq.h
+	 */
+	*freq = 0;
+	return 0;
+}
+
+const struct devfreq_governor devfreq_powersave = {
+	.name = "powersave",
+	.get_target_freq = devfreq_powersave_func,
+	.no_central_polling = true,
+};

+ 88 - 0
drivers/devfreq/governor_simpleondemand.c

@@ -0,0 +1,88 @@
+/*
+ *  linux/drivers/devfreq/governor_simpleondemand.c
+ *
+ *  Copyright (C) 2011 Samsung Electronics
+ *	MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/errno.h>
+#include <linux/devfreq.h>
+#include <linux/math64.h>
+
+/* Default constants for DevFreq-Simple-Ondemand (DFSO) */
+#define DFSO_UPTHRESHOLD	(90)
+#define DFSO_DOWNDIFFERENCTIAL	(5)
+static int devfreq_simple_ondemand_func(struct devfreq *df,
+					unsigned long *freq)
+{
+	struct devfreq_dev_status stat;
+	int err = df->profile->get_dev_status(df->dev.parent, &stat);
+	unsigned long long a, b;
+	unsigned int dfso_upthreshold = DFSO_UPTHRESHOLD;
+	unsigned int dfso_downdifferential = DFSO_DOWNDIFFERENCTIAL;
+	struct devfreq_simple_ondemand_data *data = df->data;
+
+	if (err)
+		return err;
+
+	if (data) {
+		if (data->upthreshold)
+			dfso_upthreshold = data->upthreshold;
+		if (data->downdifferential)
+			dfso_downdifferential = data->downdifferential;
+	}
+	if (dfso_upthreshold > 100 ||
+	    dfso_upthreshold < dfso_downdifferential)
+		return -EINVAL;
+
+	/* Assume MAX if it is going to be divided by zero */
+	if (stat.total_time == 0) {
+		*freq = UINT_MAX;
+		return 0;
+	}
+
+	/* Prevent overflow */
+	if (stat.busy_time >= (1 << 24) || stat.total_time >= (1 << 24)) {
+		stat.busy_time >>= 7;
+		stat.total_time >>= 7;
+	}
+
+	/* Set MAX if it's busy enough */
+	if (stat.busy_time * 100 >
+	    stat.total_time * dfso_upthreshold) {
+		*freq = UINT_MAX;
+		return 0;
+	}
+
+	/* Set MAX if we do not know the initial frequency */
+	if (stat.current_frequency == 0) {
+		*freq = UINT_MAX;
+		return 0;
+	}
+
+	/* Keep the current frequency */
+	if (stat.busy_time * 100 >
+	    stat.total_time * (dfso_upthreshold - dfso_downdifferential)) {
+		*freq = stat.current_frequency;
+		return 0;
+	}
+
+	/* Set the desired frequency based on the load */
+	a = stat.busy_time;
+	a *= stat.current_frequency;
+	b = div_u64(a, stat.total_time);
+	b *= 100;
+	b = div_u64(b, (dfso_upthreshold - dfso_downdifferential / 2));
+	*freq = (unsigned long) b;
+
+	return 0;
+}
+
+const struct devfreq_governor devfreq_simple_ondemand = {
+	.name = "simple_ondemand",
+	.get_target_freq = devfreq_simple_ondemand_func,
+};

+ 116 - 0
drivers/devfreq/governor_userspace.c

@@ -0,0 +1,116 @@
+/*
+ *  linux/drivers/devfreq/governor_simpleondemand.c
+ *
+ *  Copyright (C) 2011 Samsung Electronics
+ *	MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/devfreq.h>
+#include <linux/pm.h>
+#include <linux/mutex.h>
+#include "governor.h"
+
+struct userspace_data {
+	unsigned long user_frequency;
+	bool valid;
+};
+
+static int devfreq_userspace_func(struct devfreq *df, unsigned long *freq)
+{
+	struct userspace_data *data = df->data;
+
+	if (!data->valid)
+		*freq = df->previous_freq; /* No user freq specified yet */
+	else
+		*freq = data->user_frequency;
+	return 0;
+}
+
+static ssize_t store_freq(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t count)
+{
+	struct devfreq *devfreq = to_devfreq(dev);
+	struct userspace_data *data;
+	unsigned long wanted;
+	int err = 0;
+
+
+	mutex_lock(&devfreq->lock);
+	data = devfreq->data;
+
+	sscanf(buf, "%lu", &wanted);
+	data->user_frequency = wanted;
+	data->valid = true;
+	err = update_devfreq(devfreq);
+	if (err == 0)
+		err = count;
+	mutex_unlock(&devfreq->lock);
+	return err;
+}
+
+static ssize_t show_freq(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct devfreq *devfreq = to_devfreq(dev);
+	struct userspace_data *data;
+	int err = 0;
+
+	mutex_lock(&devfreq->lock);
+	data = devfreq->data;
+
+	if (data->valid)
+		err = sprintf(buf, "%lu\n", data->user_frequency);
+	else
+		err = sprintf(buf, "undefined\n");
+	mutex_unlock(&devfreq->lock);
+	return err;
+}
+
+static DEVICE_ATTR(set_freq, 0644, show_freq, store_freq);
+static struct attribute *dev_entries[] = {
+	&dev_attr_set_freq.attr,
+	NULL,
+};
+static struct attribute_group dev_attr_group = {
+	.name	= "userspace",
+	.attrs	= dev_entries,
+};
+
+static int userspace_init(struct devfreq *devfreq)
+{
+	int err = 0;
+	struct userspace_data *data = kzalloc(sizeof(struct userspace_data),
+					      GFP_KERNEL);
+
+	if (!data) {
+		err = -ENOMEM;
+		goto out;
+	}
+	data->valid = false;
+	devfreq->data = data;
+
+	err = sysfs_create_group(&devfreq->dev.kobj, &dev_attr_group);
+out:
+	return err;
+}
+
+static void userspace_exit(struct devfreq *devfreq)
+{
+	sysfs_remove_group(&devfreq->dev.kobj, &dev_attr_group);
+	kfree(devfreq->data);
+	devfreq->data = NULL;
+}
+
+const struct devfreq_governor devfreq_userspace = {
+	.name = "userspace",
+	.get_target_freq = devfreq_userspace_func,
+	.init = userspace_init,
+	.exit = userspace_exit,
+	.no_central_polling = true,
+};

+ 238 - 0
include/linux/devfreq.h

@@ -0,0 +1,238 @@
+/*
+ * devfreq: Generic Dynamic Voltage and Frequency Scaling (DVFS) Framework
+ *	    for Non-CPU Devices.
+ *
+ * Copyright (C) 2011 Samsung Electronics
+ *	MyungJoo Ham <myungjoo.ham@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_DEVFREQ_H__
+#define __LINUX_DEVFREQ_H__
+
+#include <linux/device.h>
+#include <linux/notifier.h>
+#include <linux/opp.h>
+
+#define DEVFREQ_NAME_LEN 16
+
+struct devfreq;
+
+/**
+ * struct devfreq_dev_status - Data given from devfreq user device to
+ *			     governors. Represents the performance
+ *			     statistics.
+ * @total_time		The total time represented by this instance of
+ *			devfreq_dev_status
+ * @busy_time		The time that the device was working among the
+ *			total_time.
+ * @current_frequency	The operating frequency.
+ * @private_data	An entry not specified by the devfreq framework.
+ *			A device and a specific governor may have their
+ *			own protocol with private_data. However, because
+ *			this is governor-specific, a governor using this
+ *			will be only compatible with devices aware of it.
+ */
+struct devfreq_dev_status {
+	/* both since the last measure */
+	unsigned long total_time;
+	unsigned long busy_time;
+	unsigned long current_frequency;
+	void *private_date;
+};
+
+/**
+ * struct devfreq_dev_profile - Devfreq's user device profile
+ * @initial_freq	The operating frequency when devfreq_add_device() is
+ *			called.
+ * @polling_ms		The polling interval in ms. 0 disables polling.
+ * @target		The device should set its operating frequency at
+ *			freq or lowest-upper-than-freq value. If freq is
+ *			higher than any operable frequency, set maximum.
+ *			Before returning, target function should set
+ *			freq at the current frequency.
+ * @get_dev_status	The device should provide the current performance
+ *			status to devfreq, which is used by governors.
+ * @exit		An optional callback that is called when devfreq
+ *			is removing the devfreq object due to error or
+ *			from devfreq_remove_device() call. If the user
+ *			has registered devfreq->nb at a notifier-head,
+ *			this is the time to unregister it.
+ */
+struct devfreq_dev_profile {
+	unsigned long initial_freq;
+	unsigned int polling_ms;
+
+	int (*target)(struct device *dev, unsigned long *freq);
+	int (*get_dev_status)(struct device *dev,
+			      struct devfreq_dev_status *stat);
+	void (*exit)(struct device *dev);
+};
+
+/**
+ * struct devfreq_governor - Devfreq policy governor
+ * @name		Governor's name
+ * @get_target_freq	Returns desired operating frequency for the device.
+ *			Basically, get_target_freq will run
+ *			devfreq_dev_profile.get_dev_status() to get the
+ *			status of the device (load = busy_time / total_time).
+ *			If no_central_polling is set, this callback is called
+ *			only with update_devfreq() notified by OPP.
+ * @init		Called when the devfreq is being attached to a device
+ * @exit		Called when the devfreq is being removed from a
+ *			device. Governor should stop any internal routines
+ *			before return because related data may be
+ *			freed after exit().
+ * @no_central_polling	Do not use devfreq's central polling mechanism.
+ *			When this is set, devfreq will not call
+ *			get_target_freq with devfreq_monitor(). However,
+ *			devfreq will call get_target_freq with
+ *			devfreq_update() notified by OPP framework.
+ *
+ * Note that the callbacks are called with devfreq->lock locked by devfreq.
+ */
+struct devfreq_governor {
+	const char name[DEVFREQ_NAME_LEN];
+	int (*get_target_freq)(struct devfreq *this, unsigned long *freq);
+	int (*init)(struct devfreq *this);
+	void (*exit)(struct devfreq *this);
+	const bool no_central_polling;
+};
+
+/**
+ * struct devfreq - Device devfreq structure
+ * @node	list node - contains the devices with devfreq that have been
+ *		registered.
+ * @lock	a mutex to protect accessing devfreq.
+ * @dev		device registered by devfreq class. dev.parent is the device
+ *		using devfreq.
+ * @profile	device-specific devfreq profile
+ * @governor	method how to choose frequency based on the usage.
+ * @nb		notifier block used to notify devfreq object that it should
+ *		reevaluate operable frequencies. Devfreq users may use
+ *		devfreq.nb to the corresponding register notifier call chain.
+ * @polling_jiffies	interval in jiffies.
+ * @previous_freq	previously configured frequency value.
+ * @next_polling	the number of remaining jiffies to poll with
+ *			"devfreq_monitor" executions to reevaluate
+ *			frequency/voltage of the device. Set by
+ *			profile's polling_ms interval.
+ * @data	Private data of the governor. The devfreq framework does not
+ *		touch this.
+ * @being_removed	a flag to mark that this object is being removed in
+ *			order to prevent trying to remove the object multiple times.
+ *
+ * This structure stores the devfreq information for a give device.
+ *
+ * Note that when a governor accesses entries in struct devfreq in its
+ * functions except for the context of callbacks defined in struct
+ * devfreq_governor, the governor should protect its access with the
+ * struct mutex lock in struct devfreq. A governor may use this mutex
+ * to protect its own private data in void *data as well.
+ */
+struct devfreq {
+	struct list_head node;
+
+	struct mutex lock;
+	struct device dev;
+	struct devfreq_dev_profile *profile;
+	const struct devfreq_governor *governor;
+	struct notifier_block nb;
+
+	unsigned long polling_jiffies;
+	unsigned long previous_freq;
+	unsigned int next_polling;
+
+	void *data; /* private data for governors */
+
+	bool being_removed;
+};
+
+#if defined(CONFIG_PM_DEVFREQ)
+extern struct devfreq *devfreq_add_device(struct device *dev,
+				  struct devfreq_dev_profile *profile,
+				  const struct devfreq_governor *governor,
+				  void *data);
+extern int devfreq_remove_device(struct devfreq *devfreq);
+
+/* Helper functions for devfreq user device driver with OPP. */
+extern struct opp *devfreq_recommended_opp(struct device *dev,
+					   unsigned long *freq);
+extern int devfreq_register_opp_notifier(struct device *dev,
+					 struct devfreq *devfreq);
+extern int devfreq_unregister_opp_notifier(struct device *dev,
+					   struct devfreq *devfreq);
+
+#ifdef CONFIG_DEVFREQ_GOV_POWERSAVE
+extern const struct devfreq_governor devfreq_powersave;
+#endif
+#ifdef CONFIG_DEVFREQ_GOV_PERFORMANCE
+extern const struct devfreq_governor devfreq_performance;
+#endif
+#ifdef CONFIG_DEVFREQ_GOV_USERSPACE
+extern const struct devfreq_governor devfreq_userspace;
+#endif
+#ifdef CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND
+extern const struct devfreq_governor devfreq_simple_ondemand;
+/**
+ * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq
+ *	and devfreq_add_device
+ * @ upthreshold	If the load is over this value, the frequency jumps.
+ *			Specify 0 to use the default. Valid value = 0 to 100.
+ * @ downdifferential	If the load is under upthreshold - downdifferential,
+ *			the governor may consider slowing the frequency down.
+ *			Specify 0 to use the default. Valid value = 0 to 100.
+ *			downdifferential < upthreshold must hold.
+ *
+ * If the fed devfreq_simple_ondemand_data pointer is NULL to the governor,
+ * the governor uses the default values.
+ */
+struct devfreq_simple_ondemand_data {
+	unsigned int upthreshold;
+	unsigned int downdifferential;
+};
+#endif
+
+#else /* !CONFIG_PM_DEVFREQ */
+static struct devfreq *devfreq_add_device(struct device *dev,
+					  struct devfreq_dev_profile *profile,
+					  struct devfreq_governor *governor,
+					  void *data);
+{
+	return NULL;
+}
+
+static int devfreq_remove_device(struct devfreq *devfreq);
+{
+	return 0;
+}
+
+static struct opp *devfreq_recommended_opp(struct device *dev,
+					   unsigned long *freq)
+{
+	return -EINVAL;
+}
+
+static int devfreq_register_opp_notifier(struct device *dev,
+					 struct devfreq *devfreq)
+{
+	return -EINVAL;
+}
+
+static int devfreq_unregister_opp_notifier(struct device *dev,
+					   struct devfreq *devfreq)
+{
+	return -EINVAL;
+}
+
+#define devfreq_powersave	NULL
+#define devfreq_performance	NULL
+#define devfreq_userspace	NULL
+#define devfreq_simple_ondemand	NULL
+
+#endif /* CONFIG_PM_DEVFREQ */
+
+#endif /* __LINUX_DEVFREQ_H__ */

+ 12 - 0
include/linux/opp.h

@@ -16,9 +16,14 @@
 
 #include <linux/err.h>
 #include <linux/cpufreq.h>
+#include <linux/notifier.h>
 
 struct opp;
 
+enum opp_event {
+	OPP_EVENT_ADD, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
+};
+
 #if defined(CONFIG_PM_OPP)
 
 unsigned long opp_get_voltage(struct opp *opp);
@@ -40,6 +45,8 @@ int opp_enable(struct device *dev, unsigned long freq);
 
 int opp_disable(struct device *dev, unsigned long freq);
 
+struct srcu_notifier_head *opp_get_notifier(struct device *dev);
+
 #else
 static inline unsigned long opp_get_voltage(struct opp *opp)
 {
@@ -89,6 +96,11 @@ static inline int opp_disable(struct device *dev, unsigned long freq)
 {
 	return 0;
 }
+
+struct srcu_notifier_head *opp_get_notifier(struct device *dev)
+{
+	return ERR_PTR(-EINVAL);
+}
 #endif		/* CONFIG_PM */
 
 #if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP)