|
@@ -25,6 +25,7 @@
|
|
|
#include <linux/timer.h>
|
|
|
#include <linux/mm.h>
|
|
|
#include <linux/highmem.h>
|
|
|
+#include <linux/hrtimer.h>
|
|
|
|
|
|
static void __journal_temp_unlink_buffer(struct journal_head *jh);
|
|
|
|
|
@@ -49,6 +50,7 @@ get_transaction(journal_t *journal, transaction_t *transaction)
|
|
|
{
|
|
|
transaction->t_journal = journal;
|
|
|
transaction->t_state = T_RUNNING;
|
|
|
+ transaction->t_start_time = ktime_get();
|
|
|
transaction->t_tid = journal->j_transaction_sequence++;
|
|
|
transaction->t_expires = jiffies + journal->j_commit_interval;
|
|
|
spin_lock_init(&transaction->t_handle_lock);
|
|
@@ -1370,7 +1372,7 @@ int journal_stop(handle_t *handle)
|
|
|
{
|
|
|
transaction_t *transaction = handle->h_transaction;
|
|
|
journal_t *journal = transaction->t_journal;
|
|
|
- int old_handle_count, err;
|
|
|
+ int err;
|
|
|
pid_t pid;
|
|
|
|
|
|
J_ASSERT(journal_current_handle() == handle);
|
|
@@ -1399,6 +1401,17 @@ int journal_stop(handle_t *handle)
|
|
|
* on IO anyway. Speeds up many-threaded, many-dir operations
|
|
|
* by 30x or more...
|
|
|
*
|
|
|
+ * We try and optimize the sleep time against what the underlying disk
|
|
|
+ * can do, instead of having a static sleep time. This is usefull for
|
|
|
+ * the case where our storage is so fast that it is more optimal to go
|
|
|
+ * ahead and force a flush and wait for the transaction to be committed
|
|
|
+ * than it is to wait for an arbitrary amount of time for new writers to
|
|
|
+ * join the transaction. We acheive this by measuring how long it takes
|
|
|
+ * to commit a transaction, and compare it with how long this
|
|
|
+ * transaction has been running, and if run time < commit time then we
|
|
|
+ * sleep for the delta and commit. This greatly helps super fast disks
|
|
|
+ * that would see slowdowns as more threads started doing fsyncs.
|
|
|
+ *
|
|
|
* But don't do this if this process was the most recent one to
|
|
|
* perform a synchronous write. We do this to detect the case where a
|
|
|
* single process is doing a stream of sync writes. No point in waiting
|
|
@@ -1406,11 +1419,26 @@ int journal_stop(handle_t *handle)
|
|
|
*/
|
|
|
pid = current->pid;
|
|
|
if (handle->h_sync && journal->j_last_sync_writer != pid) {
|
|
|
+ u64 commit_time, trans_time;
|
|
|
+
|
|
|
journal->j_last_sync_writer = pid;
|
|
|
- do {
|
|
|
- old_handle_count = transaction->t_handle_count;
|
|
|
- schedule_timeout_uninterruptible(1);
|
|
|
- } while (old_handle_count != transaction->t_handle_count);
|
|
|
+
|
|
|
+ spin_lock(&journal->j_state_lock);
|
|
|
+ commit_time = journal->j_average_commit_time;
|
|
|
+ spin_unlock(&journal->j_state_lock);
|
|
|
+
|
|
|
+ trans_time = ktime_to_ns(ktime_sub(ktime_get(),
|
|
|
+ transaction->t_start_time));
|
|
|
+
|
|
|
+ commit_time = min_t(u64, commit_time,
|
|
|
+ 1000*jiffies_to_usecs(1));
|
|
|
+
|
|
|
+ if (trans_time < commit_time) {
|
|
|
+ ktime_t expires = ktime_add_ns(ktime_get(),
|
|
|
+ commit_time);
|
|
|
+ set_current_state(TASK_UNINTERRUPTIBLE);
|
|
|
+ schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
current->journal_info = NULL;
|