%!s(int64=17) %!d(string=hai) anos · cf475ad28a
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -735,6 +735,7 @@ static int exec_mmap(struct mm_struct *mm)
 
															 	tsk->active_mm = mm;
														
 
															 	activate_mm(active_mm, mm);
														
 
															 	task_unlock(tsk);
														
 
															+	mm_update_next_owner(mm);
														
 
															 	arch_pick_mmap_layout(mm);
														
 
															 	if (old_mm) {
														
 
															 		up_read(&old_mm->mmap_sem);
														
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -305,6 +305,12 @@ struct cgroup_subsys {
 
															 			struct cgroup *cgrp);
														
 
															 	void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp);
														
 
															 	void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
														
 
															+	/*
														
 
															+	 * This routine is called with the task_lock of mm->owner held
														
 
															+	 */
														
 
															+	void (*mm_owner_changed)(struct cgroup_subsys *ss,
														
 
															+					struct cgroup *old,
														
 
															+					struct cgroup *new);
														
 
															 	int subsys_id;
														
 
															 	int active;
														
 
															 	int disabled;
														
@@ -390,4 +396,13 @@ static inline int cgroupstats_build(struct cgroupstats *stats,
 
															 #endif /* !CONFIG_CGROUPS */
														
 
															+#ifdef CONFIG_MM_OWNER
														
 
															+extern void
														
 
															+cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new);
														
 
															+#else /* !CONFIG_MM_OWNER */
														
 
															+static inline void
														
 
															+cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
														
 
															+{
														
 
															+}
														
 
															+#endif /* CONFIG_MM_OWNER */
														
 
															 #endif /* _LINUX_CGROUP_H */
														
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -27,9 +27,6 @@ struct mm_struct;
 
															 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
														
 
															-extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p);
														
 
															-extern void mm_free_cgroup(struct mm_struct *mm);
														
 
															-
														
 
															 #define page_reset_bad_cgroup(page)	((page)->page_cgroup = 0)
														
 
															 extern struct page_cgroup *page_get_page_cgroup(struct page *page);
														
@@ -48,8 +45,10 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 
															 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
														
 
															 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
														
 
															+extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
														
 
															+
														
 
															 #define mm_match_cgroup(mm, cgroup)	\
														
 
															-	((cgroup) == rcu_dereference((mm)->mem_cgroup))
														
 
															+	((cgroup) == mem_cgroup_from_task((mm)->owner))
														
 
															 extern int mem_cgroup_prepare_migration(struct page *page);
														
 
															 extern void mem_cgroup_end_migration(struct page *page);
														
@@ -73,15 +72,6 @@ extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
 
															 				struct zone *zone, int priority);
														
 
															 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
														
 
															-static inline void mm_init_cgroup(struct mm_struct *mm,
														
 
															-					struct task_struct *p)
														
 
															-{
														
 
															-}
														
 
															-
														
 
															-static inline void mm_free_cgroup(struct mm_struct *mm)
														
 
															-{
														
 
															-}
														
 
															-
														
 
															 static inline void page_reset_bad_cgroup(struct page *page)
														
 
															 {
														
 
															 }
														
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -225,8 +225,9 @@ struct mm_struct {
 
															 	/* aio bits */
														
 
															 	rwlock_t		ioctx_list_lock;	/* aio lock */
														
 
															 	struct kioctx		*ioctx_list;
														
 
															-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
														
 
															-	struct mem_cgroup *mem_cgroup;
														
 
															+#ifdef CONFIG_MM_OWNER
														
 
															+	struct task_struct *owner;	/* The thread group leader that */
														
 
															+					/* owns the mm_struct.		*/
														
 
															 #endif
														
 
															 };
														
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2148,6 +2148,19 @@ static inline void migration_init(void)
 
															 #define TASK_SIZE_OF(tsk)	TASK_SIZE
														
 
															 #endif
														
 
															+#ifdef CONFIG_MM_OWNER
														
 
															+extern void mm_update_next_owner(struct mm_struct *mm);
														
 
															+extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
														
 
															+#else
														
 
															+static inline void mm_update_next_owner(struct mm_struct *mm)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
														
 
															+{
														
 
															+}
														
 
															+#endif /* CONFIG_MM_OWNER */
														
 
															+
														
 
															 #endif /* __KERNEL__ */
														
 
															 #endif
														
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -378,9 +378,13 @@ config RESOURCE_COUNTERS
 
															           infrastructure that works with cgroups
														
 
															 	depends on CGROUPS
														
 
															+config MM_OWNER
														
 
															+	bool
														
 
															+
														
 
															 config CGROUP_MEM_RES_CTLR
														
 
															 	bool "Memory Resource Controller for Control Groups"
														
 
															 	depends on CGROUPS && RESOURCE_COUNTERS
														
 
															+	select MM_OWNER
														
 
															 	help
														
 
															 	  Provides a memory resource controller that manages both page cache and
														
 
															 	  RSS memory.
														
@@ -393,6 +397,9 @@ config CGROUP_MEM_RES_CTLR
 
															 	  Only enable when you're ok with these trade offs and really
														
 
															 	  sure you need the memory resource controller.
														
 
															+	  This config option also selects MM_OWNER config option, which
														
 
															+	  could in turn add some fork/exit overhead.
														
 
															+
														
 
															 config SYSFS_DEPRECATED
														
 
															 	bool
														
--- a/init/main.c
+++ b/init/main.c
@@ -559,6 +559,7 @@ asmlinkage void __init start_kernel(void)
 
															 	printk(KERN_NOTICE);
														
 
															 	printk(linux_banner);
														
 
															 	setup_arch(&command_line);
														
 
															+	mm_init_owner(&init_mm, &init_task);
														
 
															 	setup_command_line(command_line);
														
 
															 	unwind_setup();
														
 
															 	setup_per_cpu_areas();
														
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -119,6 +119,7 @@ static int root_count;
 
															  * be called.
														
 
															  */
														
 
															 static int need_forkexit_callback;
														
 
															+static int need_mm_owner_callback __read_mostly;
														
 
															 /* convenient tests for these bits */
														
 
															 inline int cgroup_is_removed(const struct cgroup *cgrp)
														
@@ -2498,6 +2499,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 
															 	init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
														
 
															 	need_forkexit_callback |= ss->fork || ss->exit;
														
 
															+	need_mm_owner_callback |= !!ss->mm_owner_changed;
														
 
															 	/* At system boot, before all subsystems have been
														
 
															 	 * registered, no tasks have been forked, so we don't
														
@@ -2748,6 +2750,34 @@ void cgroup_fork_callbacks(struct task_struct *child)
 
															 	}
														
 
															 }
														
 
															+#ifdef CONFIG_MM_OWNER
														
 
															+/**
														
 
															+ * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes
														
 
															+ * @p: the new owner
														
 
															+ *
														
 
															+ * Called on every change to mm->owner. mm_init_owner() does not
														
 
															+ * invoke this routine, since it assigns the mm->owner the first time
														
 
															+ * and does not change it.
														
 
															+ */
														
 
															+void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
														
 
															+{
														
 
															+	struct cgroup *oldcgrp, *newcgrp;
														
 
															+
														
 
															+	if (need_mm_owner_callback) {
														
 
															+		int i;
														
 
															+		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
														
 
															+			struct cgroup_subsys *ss = subsys[i];
														
 
															+			oldcgrp = task_cgroup(old, ss->subsys_id);
														
 
															+			newcgrp = task_cgroup(new, ss->subsys_id);
														
 
															+			if (oldcgrp == newcgrp)
														
 
															+				continue;
														
 
															+			if (ss->mm_owner_changed)
														
 
															+				ss->mm_owner_changed(ss, oldcgrp, newcgrp);
														
 
															+		}
														
 
															+	}
														
 
															+}
														
 
															+#endif /* CONFIG_MM_OWNER */
														
 
															+
														
 
															 /**
														
 
															  * cgroup_post_fork - called on a new task after adding it to the task list
														
 
															  * @child: the task in question
														
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -557,6 +557,88 @@ void exit_fs(struct task_struct *tsk)
 
															 EXPORT_SYMBOL_GPL(exit_fs);
														
 
															+#ifdef CONFIG_MM_OWNER
														
 
															+/*
														
 
															+ * Task p is exiting and it owned mm, lets find a new owner for it
														
 
															+ */
														
 
															+static inline int
														
 
															+mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
														
 
															+{
														
 
															+	/*
														
 
															+	 * If there are other users of the mm and the owner (us) is exiting
														
 
															+	 * we need to find a new owner to take on the responsibility.
														
 
															+	 */
														
 
															+	if (!mm)
														
 
															+		return 0;
														
 
															+	if (atomic_read(&mm->mm_users) <= 1)
														
 
															+		return 0;
														
 
															+	if (mm->owner != p)
														
 
															+		return 0;
														
 
															+	return 1;
														
 
															+}
														
 
															+
														
 
															+void mm_update_next_owner(struct mm_struct *mm)
														
 
															+{
														
 
															+	struct task_struct *c, *g, *p = current;
														
 
															+
														
 
															+retry:
														
 
															+	if (!mm_need_new_owner(mm, p))
														
 
															+		return;
														
 
															+
														
 
															+	read_lock(&tasklist_lock);
														
 
															+	/*
														
 
															+	 * Search in the children
														
 
															+	 */
														
 
															+	list_for_each_entry(c, &p->children, sibling) {
														
 
															+		if (c->mm == mm)
														
 
															+			goto assign_new_owner;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * Search in the siblings
														
 
															+	 */
														
 
															+	list_for_each_entry(c, &p->parent->children, sibling) {
														
 
															+		if (c->mm == mm)
														
 
															+			goto assign_new_owner;
														
 
															+	}
														
 
															+
														
 
															+	/*
														
 
															+	 * Search through everything else. We should not get
														
 
															+	 * here often
														
 
															+	 */
														
 
															+	do_each_thread(g, c) {
														
 
															+		if (c->mm == mm)
														
 
															+			goto assign_new_owner;
														
 
															+	} while_each_thread(g, c);
														
 
															+
														
 
															+	read_unlock(&tasklist_lock);
														
 
															+	return;
														
 
															+
														
 
															+assign_new_owner:
														
 
															+	BUG_ON(c == p);
														
 
															+	get_task_struct(c);
														
 
															+	/*
														
 
															+	 * The task_lock protects c->mm from changing.
														
 
															+	 * We always want mm->owner->mm == mm
														
 
															+	 */
														
 
															+	task_lock(c);
														
 
															+	/*
														
 
															+	 * Delay read_unlock() till we have the task_lock()
														
 
															+	 * to ensure that c does not slip away underneath us
														
 
															+	 */
														
 
															+	read_unlock(&tasklist_lock);
														
 
															+	if (c->mm != mm) {
														
 
															+		task_unlock(c);
														
 
															+		put_task_struct(c);
														
 
															+		goto retry;
														
 
															+	}
														
 
															+	cgroup_mm_owner_callbacks(mm->owner, c);
														
 
															+	mm->owner = c;
														
 
															+	task_unlock(c);
														
 
															+	put_task_struct(c);
														
 
															+}
														
 
															+#endif /* CONFIG_MM_OWNER */
														
 
															+
														
 
															 /*
														
 
															  * Turn us into a lazy TLB process if we
														
 
															  * aren't already..
														
@@ -596,6 +678,7 @@ static void exit_mm(struct task_struct * tsk)
 
															 	/* We don't want this task to be frozen prematurely */
														
 
															 	clear_freeze_flag(tsk);
														
 
															 	task_unlock(tsk);
														
 
															+	mm_update_next_owner(mm);
														
 
															 	mmput(mm);
														
 
															 }
														
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -381,14 +381,13 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 
															 	mm->ioctx_list = NULL;
														
 
															 	mm->free_area_cache = TASK_UNMAPPED_BASE;
														
 
															 	mm->cached_hole_size = ~0UL;
														
 
															-	mm_init_cgroup(mm, p);
														
 
															+	mm_init_owner(mm, p);
														
 
															 	if (likely(!mm_alloc_pgd(mm))) {
														
 
															 		mm->def_flags = 0;
														
 
															 		return mm;
														
 
															 	}
														
 
															-	mm_free_cgroup(mm);
														
 
															 	free_mm(mm);
														
 
															 	return NULL;
														
 
															 }
														
@@ -438,7 +437,6 @@ void mmput(struct mm_struct *mm)
 
															 			spin_unlock(&mmlist_lock);
														
 
															 		}
														
 
															 		put_swap_token(mm);
														
 
															-		mm_free_cgroup(mm);
														
 
															 		mmdrop(mm);
														
 
															 	}
														
 
															 }
														
@@ -982,6 +980,13 @@ static void rt_mutex_init_task(struct task_struct *p)
 
															 #endif
														
 
															 }
														
 
															+#ifdef CONFIG_MM_OWNER
														
 
															+void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
														
 
															+{
														
 
															+	mm->owner = p;
														
 
															+}
														
 
															+#endif /* CONFIG_MM_OWNER */
														
 
															+
														
 
															 /*
														
 
															  * This creates a new process as a copy of the old one,
														
 
															  * but does not actually start it yet.
														
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -236,26 +236,12 @@ static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
 
															 				css);
														
 
															 }
														
 
															-static struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
														
 
															+struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
														
 
															 {
														
 
															 	return container_of(task_subsys_state(p, mem_cgroup_subsys_id),
														
 
															 				struct mem_cgroup, css);
														
 
															 }
														
 
															-void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p)
														
 
															-{
														
 
															-	struct mem_cgroup *mem;
														
 
															-
														
 
															-	mem = mem_cgroup_from_task(p);
														
 
															-	css_get(&mem->css);
														
 
															-	mm->mem_cgroup = mem;
														
 
															-}
														
 
															-
														
 
															-void mm_free_cgroup(struct mm_struct *mm)
														
 
															-{
														
 
															-	css_put(&mm->mem_cgroup->css);
														
 
															-}
														
 
															-
														
 
															 static inline int page_cgroup_locked(struct page *page)
														
 
															 {
														
 
															 	return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
														
@@ -476,6 +462,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 
															 	int zid = zone_idx(z);
														
 
															 	struct mem_cgroup_per_zone *mz;
														
 
															+	BUG_ON(!mem_cont);
														
 
															 	mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
														
 
															 	if (active)
														
 
															 		src = &mz->active_list;
														
@@ -574,7 +561,7 @@ retry:
 
															 		mm = &init_mm;
														
 
															 	rcu_read_lock();
														
 
															-	mem = rcu_dereference(mm->mem_cgroup);
														
 
															+	mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
														
 
															 	/*
														
 
															 	 * For every charge from the cgroup, increment reference count
														
 
															 	 */
														
@@ -985,10 +972,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 
															 	struct mem_cgroup *mem;
														
 
															 	int node;
														
 
															-	if (unlikely((cont->parent) == NULL)) {
														
 
															+	if (unlikely((cont->parent) == NULL))
														
 
															 		mem = &init_mem_cgroup;
														
 
															-		init_mm.mem_cgroup = mem;
														
 
															-	} else
														
 
															+	else
														
 
															 		mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL);
														
 
															 	if (mem == NULL)
														
@@ -1067,10 +1053,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 
															 	if (!thread_group_leader(p))
														
 
															 		goto out;
														
 
															-	css_get(&mem->css);
														
 
															-	rcu_assign_pointer(mm->mem_cgroup, mem);
														
 
															-	css_put(&old_mem->css);
														
 
															-
														
 
															 out:
														
 
															 	mmput(mm);
														
 
															 }