|
@@ -862,17 +862,46 @@ int clear_page_dirty_for_io(struct page *page)
|
|
|
{
|
|
|
struct address_space *mapping = page_mapping(page);
|
|
|
|
|
|
- if (!mapping)
|
|
|
- return TestClearPageDirty(page);
|
|
|
-
|
|
|
- if (TestClearPageDirty(page)) {
|
|
|
- if (mapping_cap_account_dirty(mapping)) {
|
|
|
- page_mkclean(page);
|
|
|
+ if (mapping && mapping_cap_account_dirty(mapping)) {
|
|
|
+ /*
|
|
|
+ * Yes, Virginia, this is indeed insane.
|
|
|
+ *
|
|
|
+ * We use this sequence to make sure that
|
|
|
+ * (a) we account for dirty stats properly
|
|
|
+ * (b) we tell the low-level filesystem to
|
|
|
+ * mark the whole page dirty if it was
|
|
|
+ * dirty in a pagetable. Only to then
|
|
|
+ * (c) clean the page again and return 1 to
|
|
|
+ * cause the writeback.
|
|
|
+ *
|
|
|
+ * This way we avoid all nasty races with the
|
|
|
+ * dirty bit in multiple places and clearing
|
|
|
+ * them concurrently from different threads.
|
|
|
+ *
|
|
|
+ * Note! Normally the "set_page_dirty(page)"
|
|
|
+ * has no effect on the actual dirty bit - since
|
|
|
+ * that will already usually be set. But we
|
|
|
+ * need the side effects, and it can help us
|
|
|
+ * avoid races.
|
|
|
+ *
|
|
|
+ * We basically use the page "master dirty bit"
|
|
|
+ * as a serialization point for all the different
|
|
|
+ * threads doing their things.
|
|
|
+ *
|
|
|
+ * FIXME! We still have a race here: if somebody
|
|
|
+ * adds the page back to the page tables in
|
|
|
+ * between the "page_mkclean()" and the "TestClearPageDirty()",
|
|
|
+ * we might have it mapped without the dirty bit set.
|
|
|
+ */
|
|
|
+ if (page_mkclean(page))
|
|
|
+ set_page_dirty(page);
|
|
|
+ if (TestClearPageDirty(page)) {
|
|
|
dec_zone_page_state(page, NR_FILE_DIRTY);
|
|
|
+ return 1;
|
|
|
}
|
|
|
- return 1;
|
|
|
+ return 0;
|
|
|
}
|
|
|
- return 0;
|
|
|
+ return TestClearPageDirty(page);
|
|
|
}
|
|
|
EXPORT_SYMBOL(clear_page_dirty_for_io);
|
|
|
|