|
@@ -31,13 +31,13 @@
|
|
|
#include "locking.h"
|
|
|
#include "ulist.h"
|
|
|
#include "backref.h"
|
|
|
+#include "extent_io.h"
|
|
|
|
|
|
/* TODO XXX FIXME
|
|
|
* - subvol delete -> delete when ref goes to 0? delete limits also?
|
|
|
* - reorganize keys
|
|
|
* - compressed
|
|
|
* - sync
|
|
|
- * - rescan
|
|
|
* - copy also limits on subvol creation
|
|
|
* - limit
|
|
|
* - caches fuer ulists
|
|
@@ -98,6 +98,14 @@ struct btrfs_qgroup_list {
|
|
|
struct btrfs_qgroup *member;
|
|
|
};
|
|
|
|
|
|
+struct qgroup_rescan {
|
|
|
+ struct btrfs_work work;
|
|
|
+ struct btrfs_fs_info *fs_info;
|
|
|
+};
|
|
|
+
|
|
|
+static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
|
|
|
+ struct qgroup_rescan *qscan);
|
|
|
+
|
|
|
/* must be called with qgroup_ioctl_lock held */
|
|
|
static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
|
|
|
u64 qgroupid)
|
|
@@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
|
|
|
}
|
|
|
fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
|
|
|
ptr);
|
|
|
- /* FIXME read scan element */
|
|
|
+ fs_info->qgroup_rescan_progress.objectid =
|
|
|
+ btrfs_qgroup_status_rescan(l, ptr);
|
|
|
+ if (fs_info->qgroup_flags &
|
|
|
+ BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
|
|
|
+ struct qgroup_rescan *qscan =
|
|
|
+ kmalloc(sizeof(*qscan), GFP_NOFS);
|
|
|
+ if (!qscan) {
|
|
|
+ ret = -ENOMEM;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ fs_info->qgroup_rescan_progress.type = 0;
|
|
|
+ fs_info->qgroup_rescan_progress.offset = 0;
|
|
|
+ qgroup_rescan_start(fs_info, qscan);
|
|
|
+ }
|
|
|
goto next1;
|
|
|
}
|
|
|
|
|
@@ -719,7 +740,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
|
|
|
ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
|
|
|
btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
|
|
|
btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
|
|
|
- /* XXX scan */
|
|
|
+ btrfs_set_qgroup_status_rescan(l, ptr,
|
|
|
+ fs_info->qgroup_rescan_progress.objectid);
|
|
|
|
|
|
btrfs_mark_buffer_dirty(l);
|
|
|
|
|
@@ -830,7 +852,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
|
|
|
fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
|
|
|
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
|
|
btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
|
|
|
- btrfs_set_qgroup_status_scan(leaf, ptr, 0);
|
|
|
+ btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
|
|
|
|
|
|
btrfs_mark_buffer_dirty(leaf);
|
|
|
|
|
@@ -944,10 +966,11 @@ out:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
|
|
|
+static void qgroup_dirty(struct btrfs_fs_info *fs_info,
|
|
|
+ struct btrfs_qgroup *qgroup)
|
|
|
{
|
|
|
- /* FIXME */
|
|
|
- return 0;
|
|
|
+ if (list_empty(&qgroup->dirty))
|
|
|
+ list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
|
|
|
}
|
|
|
|
|
|
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
|
|
@@ -1155,13 +1178,6 @@ out:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static void qgroup_dirty(struct btrfs_fs_info *fs_info,
|
|
|
- struct btrfs_qgroup *qgroup)
|
|
|
-{
|
|
|
- if (list_empty(&qgroup->dirty))
|
|
|
- list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
|
|
|
-}
|
|
|
-
|
|
|
/*
|
|
|
* btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
|
|
|
* the modification into a list that's later used by btrfs_end_transaction to
|
|
@@ -1390,6 +1406,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
|
|
|
BUG();
|
|
|
}
|
|
|
|
|
|
+ mutex_lock(&fs_info->qgroup_rescan_lock);
|
|
|
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
|
|
|
+ if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
|
|
|
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
+
|
|
|
/*
|
|
|
* the delayed ref sequence number we pass depends on the direction of
|
|
|
* the operation. for add operations, we pass
|
|
@@ -1403,7 +1428,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
|
|
|
if (ret < 0)
|
|
|
return ret;
|
|
|
|
|
|
+ mutex_lock(&fs_info->qgroup_rescan_lock);
|
|
|
spin_lock(&fs_info->qgroup_lock);
|
|
|
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
|
|
|
+ if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
|
|
|
+ ret = 0;
|
|
|
+ goto unlock;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
quota_root = fs_info->quota_root;
|
|
|
if (!quota_root)
|
|
|
goto unlock;
|
|
@@ -1445,6 +1478,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
|
|
|
|
|
|
unlock:
|
|
|
spin_unlock(&fs_info->qgroup_lock);
|
|
|
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
ulist_free(roots);
|
|
|
ulist_free(tmp);
|
|
|
|
|
@@ -1823,3 +1857,259 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
|
|
|
(u32)trans->delayed_ref_elem.seq);
|
|
|
BUG();
|
|
|
}
|
|
|
+
|
|
|
+/*
|
|
|
+ * returns < 0 on error, 0 when more leafs are to be scanned.
|
|
|
+ * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
|
|
|
+ */
|
|
|
+static int
|
|
|
+qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
|
|
|
+ struct btrfs_trans_handle *trans, struct ulist *tmp,
|
|
|
+ struct extent_buffer *scratch_leaf)
|
|
|
+{
|
|
|
+ struct btrfs_key found;
|
|
|
+ struct btrfs_fs_info *fs_info = qscan->fs_info;
|
|
|
+ struct ulist *roots = NULL;
|
|
|
+ struct ulist_node *unode;
|
|
|
+ struct ulist_iterator uiter;
|
|
|
+ struct seq_list tree_mod_seq_elem = {};
|
|
|
+ u64 seq;
|
|
|
+ int slot;
|
|
|
+ int ret;
|
|
|
+
|
|
|
+ path->leave_spinning = 1;
|
|
|
+ mutex_lock(&fs_info->qgroup_rescan_lock);
|
|
|
+ ret = btrfs_search_slot_for_read(fs_info->extent_root,
|
|
|
+ &fs_info->qgroup_rescan_progress,
|
|
|
+ path, 1, 0);
|
|
|
+
|
|
|
+ pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
|
|
|
+ (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
|
|
|
+ fs_info->qgroup_rescan_progress.type,
|
|
|
+ (unsigned long long)fs_info->qgroup_rescan_progress.offset,
|
|
|
+ ret);
|
|
|
+
|
|
|
+ if (ret) {
|
|
|
+ /*
|
|
|
+ * The rescan is about to end, we will not be scanning any
|
|
|
+ * further blocks. We cannot unset the RESCAN flag here, because
|
|
|
+ * we want to commit the transaction if everything went well.
|
|
|
+ * To make the live accounting work in this phase, we set our
|
|
|
+ * scan progress pointer such that every real extent objectid
|
|
|
+ * will be smaller.
|
|
|
+ */
|
|
|
+ fs_info->qgroup_rescan_progress.objectid = (u64)-1;
|
|
|
+ btrfs_release_path(path);
|
|
|
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ btrfs_item_key_to_cpu(path->nodes[0], &found,
|
|
|
+ btrfs_header_nritems(path->nodes[0]) - 1);
|
|
|
+ fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
|
|
|
+
|
|
|
+ btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
|
|
|
+ memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
|
|
|
+ slot = path->slots[0];
|
|
|
+ btrfs_release_path(path);
|
|
|
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
+
|
|
|
+ for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
|
|
|
+ btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
|
|
|
+ if (found.type != BTRFS_EXTENT_ITEM_KEY)
|
|
|
+ continue;
|
|
|
+ ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
|
|
|
+ tree_mod_seq_elem.seq, &roots);
|
|
|
+ if (ret < 0)
|
|
|
+ goto out;
|
|
|
+ spin_lock(&fs_info->qgroup_lock);
|
|
|
+ seq = fs_info->qgroup_seq;
|
|
|
+ fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
|
|
|
+
|
|
|
+ ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
|
|
|
+ if (ret) {
|
|
|
+ spin_unlock(&fs_info->qgroup_lock);
|
|
|
+ ulist_free(roots);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * step2 of btrfs_qgroup_account_ref works from a single root,
|
|
|
+ * we're doing all at once here.
|
|
|
+ */
|
|
|
+ ulist_reinit(tmp);
|
|
|
+ ULIST_ITER_INIT(&uiter);
|
|
|
+ while ((unode = ulist_next(roots, &uiter))) {
|
|
|
+ struct btrfs_qgroup *qg;
|
|
|
+
|
|
|
+ qg = find_qgroup_rb(fs_info, unode->val);
|
|
|
+ if (!qg)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
|
|
|
+ GFP_ATOMIC);
|
|
|
+ if (ret < 0) {
|
|
|
+ spin_unlock(&fs_info->qgroup_lock);
|
|
|
+ ulist_free(roots);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
|
|
|
+ ULIST_ITER_INIT(&uiter);
|
|
|
+ while ((unode = ulist_next(tmp, &uiter))) {
|
|
|
+ struct btrfs_qgroup *qg;
|
|
|
+ struct btrfs_qgroup_list *glist;
|
|
|
+
|
|
|
+ qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
|
|
|
+ qg->rfer += found.offset;
|
|
|
+ qg->rfer_cmpr += found.offset;
|
|
|
+ WARN_ON(qg->tag >= seq);
|
|
|
+ if (qg->refcnt - seq == roots->nnodes) {
|
|
|
+ qg->excl += found.offset;
|
|
|
+ qg->excl_cmpr += found.offset;
|
|
|
+ }
|
|
|
+ qgroup_dirty(fs_info, qg);
|
|
|
+
|
|
|
+ list_for_each_entry(glist, &qg->groups, next_group) {
|
|
|
+ ret = ulist_add(tmp, glist->group->qgroupid,
|
|
|
+ (uintptr_t)glist->group,
|
|
|
+ GFP_ATOMIC);
|
|
|
+ if (ret < 0) {
|
|
|
+ spin_unlock(&fs_info->qgroup_lock);
|
|
|
+ ulist_free(roots);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ spin_unlock(&fs_info->qgroup_lock);
|
|
|
+ ulist_free(roots);
|
|
|
+ ret = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+out:
|
|
|
+ btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
+static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
|
|
+{
|
|
|
+ struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
|
|
|
+ work);
|
|
|
+ struct btrfs_path *path;
|
|
|
+ struct btrfs_trans_handle *trans = NULL;
|
|
|
+ struct btrfs_fs_info *fs_info = qscan->fs_info;
|
|
|
+ struct ulist *tmp = NULL;
|
|
|
+ struct extent_buffer *scratch_leaf = NULL;
|
|
|
+ int err = -ENOMEM;
|
|
|
+
|
|
|
+ path = btrfs_alloc_path();
|
|
|
+ if (!path)
|
|
|
+ goto out;
|
|
|
+ tmp = ulist_alloc(GFP_NOFS);
|
|
|
+ if (!tmp)
|
|
|
+ goto out;
|
|
|
+ scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
|
|
|
+ if (!scratch_leaf)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ err = 0;
|
|
|
+ while (!err) {
|
|
|
+ trans = btrfs_start_transaction(fs_info->fs_root, 0);
|
|
|
+ if (IS_ERR(trans)) {
|
|
|
+ err = PTR_ERR(trans);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (!fs_info->quota_enabled) {
|
|
|
+ err = -EINTR;
|
|
|
+ } else {
|
|
|
+ err = qgroup_rescan_leaf(qscan, path, trans,
|
|
|
+ tmp, scratch_leaf);
|
|
|
+ }
|
|
|
+ if (err > 0)
|
|
|
+ btrfs_commit_transaction(trans, fs_info->fs_root);
|
|
|
+ else
|
|
|
+ btrfs_end_transaction(trans, fs_info->fs_root);
|
|
|
+ }
|
|
|
+
|
|
|
+out:
|
|
|
+ kfree(scratch_leaf);
|
|
|
+ ulist_free(tmp);
|
|
|
+ btrfs_free_path(path);
|
|
|
+ kfree(qscan);
|
|
|
+
|
|
|
+ mutex_lock(&fs_info->qgroup_rescan_lock);
|
|
|
+ fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
|
|
|
+
|
|
|
+ if (err == 2 &&
|
|
|
+ fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
|
|
|
+ fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
|
|
+ } else if (err < 0) {
|
|
|
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
|
|
+ }
|
|
|
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
+
|
|
|
+ if (err >= 0) {
|
|
|
+ pr_info("btrfs: qgroup scan completed%s\n",
|
|
|
+ err == 2 ? " (inconsistency flag cleared)" : "");
|
|
|
+ } else {
|
|
|
+ pr_err("btrfs: qgroup scan failed with %d\n", err);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan)
|
|
|
+{
|
|
|
+ memset(&qscan->work, 0, sizeof(qscan->work));
|
|
|
+ qscan->work.func = btrfs_qgroup_rescan_worker;
|
|
|
+ qscan->fs_info = fs_info;
|
|
|
+
|
|
|
+ pr_info("btrfs: qgroup scan started\n");
|
|
|
+ btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work);
|
|
|
+}
|
|
|
+
|
|
|
+int
|
|
|
+btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
|
|
|
+{
|
|
|
+ int ret = 0;
|
|
|
+ struct rb_node *n;
|
|
|
+ struct btrfs_qgroup *qgroup;
|
|
|
+ struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
|
|
|
+
|
|
|
+ if (!qscan)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ mutex_lock(&fs_info->qgroup_rescan_lock);
|
|
|
+ spin_lock(&fs_info->qgroup_lock);
|
|
|
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
|
|
|
+ ret = -EINPROGRESS;
|
|
|
+ else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
|
|
|
+ ret = -EINVAL;
|
|
|
+ if (ret) {
|
|
|
+ spin_unlock(&fs_info->qgroup_lock);
|
|
|
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
+ kfree(qscan);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
|
|
|
+ memset(&fs_info->qgroup_rescan_progress, 0,
|
|
|
+ sizeof(fs_info->qgroup_rescan_progress));
|
|
|
+
|
|
|
+ /* clear all current qgroup tracking information */
|
|
|
+ for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
|
|
|
+ qgroup = rb_entry(n, struct btrfs_qgroup, node);
|
|
|
+ qgroup->rfer = 0;
|
|
|
+ qgroup->rfer_cmpr = 0;
|
|
|
+ qgroup->excl = 0;
|
|
|
+ qgroup->excl_cmpr = 0;
|
|
|
+ }
|
|
|
+ spin_unlock(&fs_info->qgroup_lock);
|
|
|
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
|
|
|
+
|
|
|
+ qgroup_rescan_start(fs_info, qscan);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|