|
@@ -162,6 +162,48 @@ static void __always_inline unlock_page_cgroup(struct page *page)
|
|
bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
|
|
bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Tie new page_cgroup to struct page under lock_page_cgroup()
|
|
|
|
+ * This can fail if the page has been tied to a page_cgroup.
|
|
|
|
+ * If success, returns 0.
|
|
|
|
+ */
|
|
|
|
+static inline int
|
|
|
|
+page_cgroup_assign_new_page_cgroup(struct page *page, struct page_cgroup *pc)
|
|
|
|
+{
|
|
|
|
+ int ret = 0;
|
|
|
|
+
|
|
|
|
+ lock_page_cgroup(page);
|
|
|
|
+ if (!page_get_page_cgroup(page))
|
|
|
|
+ page_assign_page_cgroup(page, pc);
|
|
|
|
+ else /* A page is tied to other pc. */
|
|
|
|
+ ret = 1;
|
|
|
|
+ unlock_page_cgroup(page);
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Clear page->page_cgroup member under lock_page_cgroup().
|
|
|
|
+ * If given "pc" value is different from one page->page_cgroup,
|
|
|
|
+ * page->cgroup is not cleared.
|
|
|
|
+ * Returns a value of page->page_cgroup at lock taken.
|
|
|
|
+ * A can can detect failure of clearing by following
|
|
|
|
+ * clear_page_cgroup(page, pc) == pc
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+static inline struct page_cgroup *
|
|
|
|
+clear_page_cgroup(struct page *page, struct page_cgroup *pc)
|
|
|
|
+{
|
|
|
|
+ struct page_cgroup *ret;
|
|
|
|
+ /* lock and clear */
|
|
|
|
+ lock_page_cgroup(page);
|
|
|
|
+ ret = page_get_page_cgroup(page);
|
|
|
|
+ if (likely(ret == pc))
|
|
|
|
+ page_assign_page_cgroup(page, NULL);
|
|
|
|
+ unlock_page_cgroup(page);
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
|
|
static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
|
|
{
|
|
{
|
|
if (active)
|
|
if (active)
|
|
@@ -270,7 +312,7 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
|
|
gfp_t gfp_mask)
|
|
gfp_t gfp_mask)
|
|
{
|
|
{
|
|
struct mem_cgroup *mem;
|
|
struct mem_cgroup *mem;
|
|
- struct page_cgroup *pc, *race_pc;
|
|
|
|
|
|
+ struct page_cgroup *pc;
|
|
unsigned long flags;
|
|
unsigned long flags;
|
|
unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
|
|
unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
|
|
|
|
|
|
@@ -293,8 +335,10 @@ retry:
|
|
unlock_page_cgroup(page);
|
|
unlock_page_cgroup(page);
|
|
cpu_relax();
|
|
cpu_relax();
|
|
goto retry;
|
|
goto retry;
|
|
- } else
|
|
|
|
|
|
+ } else {
|
|
|
|
+ unlock_page_cgroup(page);
|
|
goto done;
|
|
goto done;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
unlock_page_cgroup(page);
|
|
unlock_page_cgroup(page);
|
|
@@ -364,31 +408,26 @@ noreclaim:
|
|
goto free_pc;
|
|
goto free_pc;
|
|
}
|
|
}
|
|
|
|
|
|
- lock_page_cgroup(page);
|
|
|
|
- /*
|
|
|
|
- * Check if somebody else beat us to allocating the page_cgroup
|
|
|
|
- */
|
|
|
|
- race_pc = page_get_page_cgroup(page);
|
|
|
|
- if (race_pc) {
|
|
|
|
- kfree(pc);
|
|
|
|
- pc = race_pc;
|
|
|
|
- atomic_inc(&pc->ref_cnt);
|
|
|
|
- res_counter_uncharge(&mem->res, PAGE_SIZE);
|
|
|
|
- css_put(&mem->css);
|
|
|
|
- goto done;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
atomic_set(&pc->ref_cnt, 1);
|
|
atomic_set(&pc->ref_cnt, 1);
|
|
pc->mem_cgroup = mem;
|
|
pc->mem_cgroup = mem;
|
|
pc->page = page;
|
|
pc->page = page;
|
|
- page_assign_page_cgroup(page, pc);
|
|
|
|
|
|
+ if (page_cgroup_assign_new_page_cgroup(page, pc)) {
|
|
|
|
+ /*
|
|
|
|
+ * an another charge is added to this page already.
|
|
|
|
+ * we do take lock_page_cgroup(page) again and read
|
|
|
|
+ * page->cgroup, increment refcnt.... just retry is OK.
|
|
|
|
+ */
|
|
|
|
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
|
|
|
|
+ css_put(&mem->css);
|
|
|
|
+ kfree(pc);
|
|
|
|
+ goto retry;
|
|
|
|
+ }
|
|
|
|
|
|
spin_lock_irqsave(&mem->lru_lock, flags);
|
|
spin_lock_irqsave(&mem->lru_lock, flags);
|
|
list_add(&pc->lru, &mem->active_list);
|
|
list_add(&pc->lru, &mem->active_list);
|
|
spin_unlock_irqrestore(&mem->lru_lock, flags);
|
|
spin_unlock_irqrestore(&mem->lru_lock, flags);
|
|
|
|
|
|
done:
|
|
done:
|
|
- unlock_page_cgroup(page);
|
|
|
|
return 0;
|
|
return 0;
|
|
free_pc:
|
|
free_pc:
|
|
kfree(pc);
|
|
kfree(pc);
|
|
@@ -432,17 +471,25 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
|
|
|
|
|
|
if (atomic_dec_and_test(&pc->ref_cnt)) {
|
|
if (atomic_dec_and_test(&pc->ref_cnt)) {
|
|
page = pc->page;
|
|
page = pc->page;
|
|
- lock_page_cgroup(page);
|
|
|
|
- mem = pc->mem_cgroup;
|
|
|
|
- css_put(&mem->css);
|
|
|
|
- page_assign_page_cgroup(page, NULL);
|
|
|
|
- unlock_page_cgroup(page);
|
|
|
|
- res_counter_uncharge(&mem->res, PAGE_SIZE);
|
|
|
|
-
|
|
|
|
- spin_lock_irqsave(&mem->lru_lock, flags);
|
|
|
|
- list_del_init(&pc->lru);
|
|
|
|
- spin_unlock_irqrestore(&mem->lru_lock, flags);
|
|
|
|
- kfree(pc);
|
|
|
|
|
|
+ /*
|
|
|
|
+ * get page->cgroup and clear it under lock.
|
|
|
|
+ */
|
|
|
|
+ if (clear_page_cgroup(page, pc) == pc) {
|
|
|
|
+ mem = pc->mem_cgroup;
|
|
|
|
+ css_put(&mem->css);
|
|
|
|
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
|
|
|
|
+ spin_lock_irqsave(&mem->lru_lock, flags);
|
|
|
|
+ list_del_init(&pc->lru);
|
|
|
|
+ spin_unlock_irqrestore(&mem->lru_lock, flags);
|
|
|
|
+ kfree(pc);
|
|
|
|
+ } else {
|
|
|
|
+ /*
|
|
|
|
+ * Note:This will be removed when force-empty patch is
|
|
|
|
+ * applied. just show warning here.
|
|
|
|
+ */
|
|
|
|
+ printk(KERN_ERR "Race in mem_cgroup_uncharge() ?");
|
|
|
|
+ dump_stack();
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|