|
@@ -804,6 +804,41 @@ void __init page_writeback_init(void)
|
|
prop_descriptor_init(&vm_dirties, shift);
|
|
prop_descriptor_init(&vm_dirties, shift);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/**
|
|
|
|
+ * tag_pages_for_writeback - tag pages to be written by write_cache_pages
|
|
|
|
+ * @mapping: address space structure to write
|
|
|
|
+ * @start: starting page index
|
|
|
|
+ * @end: ending page index (inclusive)
|
|
|
|
+ *
|
|
|
|
+ * This function scans the page range from @start to @end (inclusive) and tags
|
|
|
|
+ * all pages that have DIRTY tag set with a special TOWRITE tag. The idea is
|
|
|
|
+ * that write_cache_pages (or whoever calls this function) will then use
|
|
|
|
+ * TOWRITE tag to identify pages eligible for writeback. This mechanism is
|
|
|
|
+ * used to avoid livelocking of writeback by a process steadily creating new
|
|
|
|
+ * dirty pages in the file (thus it is important for this function to be quick
|
|
|
|
+ * so that it can tag pages faster than a dirtying process can create them).
|
|
|
|
+ */
|
|
|
|
+/*
|
|
|
|
+ * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce tree_lock latency.
|
|
|
|
+ */
|
|
|
|
+#define WRITEBACK_TAG_BATCH 4096
|
|
|
|
+void tag_pages_for_writeback(struct address_space *mapping,
|
|
|
|
+ pgoff_t start, pgoff_t end)
|
|
|
|
+{
|
|
|
|
+ unsigned long tagged;
|
|
|
|
+
|
|
|
|
+ do {
|
|
|
|
+ spin_lock_irq(&mapping->tree_lock);
|
|
|
|
+ tagged = radix_tree_range_tag_if_tagged(&mapping->page_tree,
|
|
|
|
+ &start, end, WRITEBACK_TAG_BATCH,
|
|
|
|
+ PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
|
|
|
|
+ spin_unlock_irq(&mapping->tree_lock);
|
|
|
|
+ WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
|
|
|
|
+ cond_resched();
|
|
|
|
+ } while (tagged >= WRITEBACK_TAG_BATCH);
|
|
|
|
+}
|
|
|
|
+EXPORT_SYMBOL(tag_pages_for_writeback);
|
|
|
|
+
|
|
/**
|
|
/**
|
|
* write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
|
|
* write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
|
|
* @mapping: address space structure to write
|
|
* @mapping: address space structure to write
|
|
@@ -818,6 +853,13 @@ void __init page_writeback_init(void)
|
|
* the call was made get new I/O started against them. If wbc->sync_mode is
|
|
* the call was made get new I/O started against them. If wbc->sync_mode is
|
|
* WB_SYNC_ALL then we were called for data integrity and we must wait for
|
|
* WB_SYNC_ALL then we were called for data integrity and we must wait for
|
|
* existing IO to complete.
|
|
* existing IO to complete.
|
|
|
|
+ *
|
|
|
|
+ * To avoid livelocks (when other process dirties new pages), we first tag
|
|
|
|
+ * pages which should be written back with TOWRITE tag and only then start
|
|
|
|
+ * writing them. For data-integrity sync we have to be careful so that we do
|
|
|
|
+ * not miss some pages (e.g., because some other process has cleared TOWRITE
|
|
|
|
+ * tag we set). The rule we follow is that TOWRITE tag can be cleared only
|
|
|
|
+ * by the process clearing the DIRTY tag (and submitting the page for IO).
|
|
*/
|
|
*/
|
|
int write_cache_pages(struct address_space *mapping,
|
|
int write_cache_pages(struct address_space *mapping,
|
|
struct writeback_control *wbc, writepage_t writepage,
|
|
struct writeback_control *wbc, writepage_t writepage,
|
|
@@ -833,6 +875,7 @@ int write_cache_pages(struct address_space *mapping,
|
|
pgoff_t done_index;
|
|
pgoff_t done_index;
|
|
int cycled;
|
|
int cycled;
|
|
int range_whole = 0;
|
|
int range_whole = 0;
|
|
|
|
+ int tag;
|
|
|
|
|
|
pagevec_init(&pvec, 0);
|
|
pagevec_init(&pvec, 0);
|
|
if (wbc->range_cyclic) {
|
|
if (wbc->range_cyclic) {
|
|
@@ -849,29 +892,19 @@ int write_cache_pages(struct address_space *mapping,
|
|
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
|
|
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
|
|
range_whole = 1;
|
|
range_whole = 1;
|
|
cycled = 1; /* ignore range_cyclic tests */
|
|
cycled = 1; /* ignore range_cyclic tests */
|
|
-
|
|
|
|
- /*
|
|
|
|
- * If this is a data integrity sync, cap the writeback to the
|
|
|
|
- * current end of file. Any extension to the file that occurs
|
|
|
|
- * after this is a new write and we don't need to write those
|
|
|
|
- * pages out to fulfil our data integrity requirements. If we
|
|
|
|
- * try to write them out, we can get stuck in this scan until
|
|
|
|
- * the concurrent writer stops adding dirty pages and extending
|
|
|
|
- * EOF.
|
|
|
|
- */
|
|
|
|
- if (wbc->sync_mode == WB_SYNC_ALL &&
|
|
|
|
- wbc->range_end == LLONG_MAX) {
|
|
|
|
- end = i_size_read(mapping->host) >> PAGE_CACHE_SHIFT;
|
|
|
|
- }
|
|
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+ if (wbc->sync_mode == WB_SYNC_ALL)
|
|
|
|
+ tag = PAGECACHE_TAG_TOWRITE;
|
|
|
|
+ else
|
|
|
|
+ tag = PAGECACHE_TAG_DIRTY;
|
|
retry:
|
|
retry:
|
|
|
|
+ if (wbc->sync_mode == WB_SYNC_ALL)
|
|
|
|
+ tag_pages_for_writeback(mapping, index, end);
|
|
done_index = index;
|
|
done_index = index;
|
|
while (!done && (index <= end)) {
|
|
while (!done && (index <= end)) {
|
|
int i;
|
|
int i;
|
|
|
|
|
|
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
|
|
|
|
- PAGECACHE_TAG_DIRTY,
|
|
|
|
|
|
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
|
|
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
|
|
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
|
|
if (nr_pages == 0)
|
|
if (nr_pages == 0)
|
|
break;
|
|
break;
|
|
@@ -1327,6 +1360,9 @@ int test_set_page_writeback(struct page *page)
|
|
radix_tree_tag_clear(&mapping->page_tree,
|
|
radix_tree_tag_clear(&mapping->page_tree,
|
|
page_index(page),
|
|
page_index(page),
|
|
PAGECACHE_TAG_DIRTY);
|
|
PAGECACHE_TAG_DIRTY);
|
|
|
|
+ radix_tree_tag_clear(&mapping->page_tree,
|
|
|
|
+ page_index(page),
|
|
|
|
+ PAGECACHE_TAG_TOWRITE);
|
|
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
|
spin_unlock_irqrestore(&mapping->tree_lock, flags);
|
|
} else {
|
|
} else {
|
|
ret = TestSetPageWriteback(page);
|
|
ret = TestSetPageWriteback(page);
|