|
@@ -177,105 +177,149 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
|
|
* pNFS client layout cache
|
|
* pNFS client layout cache
|
|
*/
|
|
*/
|
|
|
|
|
|
|
|
+/* Need to hold i_lock if caller does not already hold reference */
|
|
|
|
+void
|
|
|
|
+get_layout_hdr(struct pnfs_layout_hdr *lo)
|
|
|
|
+{
|
|
|
|
+ atomic_inc(&lo->plh_refcount);
|
|
|
|
+}
|
|
|
|
+
|
|
static void
|
|
static void
|
|
-get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
|
|
|
|
|
|
+destroy_layout_hdr(struct pnfs_layout_hdr *lo)
|
|
{
|
|
{
|
|
- assert_spin_locked(&lo->inode->i_lock);
|
|
|
|
- lo->refcount++;
|
|
|
|
|
|
+ dprintk("%s: freeing layout cache %p\n", __func__, lo);
|
|
|
|
+ BUG_ON(!list_empty(&lo->plh_layouts));
|
|
|
|
+ NFS_I(lo->plh_inode)->layout = NULL;
|
|
|
|
+ kfree(lo);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
static void
|
|
put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
|
|
put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
|
|
{
|
|
{
|
|
- assert_spin_locked(&lo->inode->i_lock);
|
|
|
|
- BUG_ON(lo->refcount == 0);
|
|
|
|
-
|
|
|
|
- lo->refcount--;
|
|
|
|
- if (!lo->refcount) {
|
|
|
|
- dprintk("%s: freeing layout cache %p\n", __func__, lo);
|
|
|
|
- BUG_ON(!list_empty(&lo->layouts));
|
|
|
|
- NFS_I(lo->inode)->layout = NULL;
|
|
|
|
- kfree(lo);
|
|
|
|
- }
|
|
|
|
|
|
+ if (atomic_dec_and_test(&lo->plh_refcount))
|
|
|
|
+ destroy_layout_hdr(lo);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
void
|
|
-put_layout_hdr(struct inode *inode)
|
|
|
|
|
|
+put_layout_hdr(struct pnfs_layout_hdr *lo)
|
|
{
|
|
{
|
|
- spin_lock(&inode->i_lock);
|
|
|
|
- put_layout_hdr_locked(NFS_I(inode)->layout);
|
|
|
|
- spin_unlock(&inode->i_lock);
|
|
|
|
|
|
+ struct inode *inode = lo->plh_inode;
|
|
|
|
+
|
|
|
|
+ if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
|
|
|
|
+ destroy_layout_hdr(lo);
|
|
|
|
+ spin_unlock(&inode->i_lock);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
static void
|
|
init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
|
|
init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
|
|
{
|
|
{
|
|
- INIT_LIST_HEAD(&lseg->fi_list);
|
|
|
|
- kref_init(&lseg->kref);
|
|
|
|
- lseg->layout = lo;
|
|
|
|
|
|
+ INIT_LIST_HEAD(&lseg->pls_list);
|
|
|
|
+ atomic_set(&lseg->pls_refcount, 1);
|
|
|
|
+ smp_mb();
|
|
|
|
+ set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
|
|
|
|
+ lseg->pls_layout = lo;
|
|
}
|
|
}
|
|
|
|
|
|
-/* Called without i_lock held, as the free_lseg call may sleep */
|
|
|
|
-static void
|
|
|
|
-destroy_lseg(struct kref *kref)
|
|
|
|
|
|
+static void free_lseg(struct pnfs_layout_segment *lseg)
|
|
{
|
|
{
|
|
- struct pnfs_layout_segment *lseg =
|
|
|
|
- container_of(kref, struct pnfs_layout_segment, kref);
|
|
|
|
- struct inode *ino = lseg->layout->inode;
|
|
|
|
|
|
+ struct inode *ino = lseg->pls_layout->plh_inode;
|
|
|
|
|
|
- dprintk("--> %s\n", __func__);
|
|
|
|
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
|
|
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
|
|
- /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
|
|
|
|
- put_layout_hdr(ino);
|
|
|
|
|
|
+ /* Matched by get_layout_hdr in pnfs_insert_layout */
|
|
|
|
+ put_layout_hdr(NFS_I(ino)->layout);
|
|
}
|
|
}
|
|
|
|
|
|
-static void
|
|
|
|
-put_lseg(struct pnfs_layout_segment *lseg)
|
|
|
|
|
|
+/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
|
|
|
|
+ * could sleep, so must be called outside of the lock.
|
|
|
|
+ * Returns 1 if object was removed, otherwise return 0.
|
|
|
|
+ */
|
|
|
|
+static int
|
|
|
|
+put_lseg_locked(struct pnfs_layout_segment *lseg,
|
|
|
|
+ struct list_head *tmp_list)
|
|
|
|
+{
|
|
|
|
+ dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
|
|
|
|
+ atomic_read(&lseg->pls_refcount),
|
|
|
|
+ test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
|
|
|
|
+ if (atomic_dec_and_test(&lseg->pls_refcount)) {
|
|
|
|
+ struct inode *ino = lseg->pls_layout->plh_inode;
|
|
|
|
+
|
|
|
|
+ BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
|
|
|
|
+ list_del(&lseg->pls_list);
|
|
|
|
+ if (list_empty(&lseg->pls_layout->plh_segs)) {
|
|
|
|
+ struct nfs_client *clp;
|
|
|
|
+
|
|
|
|
+ clp = NFS_SERVER(ino)->nfs_client;
|
|
|
|
+ spin_lock(&clp->cl_lock);
|
|
|
|
+ /* List does not take a reference, so no need for put here */
|
|
|
|
+ list_del_init(&lseg->pls_layout->plh_layouts);
|
|
|
|
+ spin_unlock(&clp->cl_lock);
|
|
|
|
+ clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
|
|
|
|
+ }
|
|
|
|
+ rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
|
|
|
|
+ list_add(&lseg->pls_list, tmp_list);
|
|
|
|
+ return 1;
|
|
|
|
+ }
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static bool
|
|
|
|
+should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
|
|
{
|
|
{
|
|
- if (!lseg)
|
|
|
|
- return;
|
|
|
|
|
|
+ return (recall_iomode == IOMODE_ANY ||
|
|
|
|
+ lseg_iomode == recall_iomode);
|
|
|
|
+}
|
|
|
|
|
|
- dprintk("%s: lseg %p ref %d\n", __func__, lseg,
|
|
|
|
- atomic_read(&lseg->kref.refcount));
|
|
|
|
- kref_put(&lseg->kref, destroy_lseg);
|
|
|
|
|
|
+/* Returns 1 if lseg is removed from list, 0 otherwise */
|
|
|
|
+static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
|
|
|
|
+ struct list_head *tmp_list)
|
|
|
|
+{
|
|
|
|
+ int rv = 0;
|
|
|
|
+
|
|
|
|
+ if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
|
|
|
|
+ /* Remove the reference keeping the lseg in the
|
|
|
|
+ * list. It will now be removed when all
|
|
|
|
+ * outstanding io is finished.
|
|
|
|
+ */
|
|
|
|
+ rv = put_lseg_locked(lseg, tmp_list);
|
|
|
|
+ }
|
|
|
|
+ return rv;
|
|
}
|
|
}
|
|
|
|
|
|
-static void
|
|
|
|
-pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
|
|
|
|
|
|
+/* Returns count of number of matching invalid lsegs remaining in list
|
|
|
|
+ * after call.
|
|
|
|
+ */
|
|
|
|
+int
|
|
|
|
+mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
|
|
|
|
+ struct list_head *tmp_list,
|
|
|
|
+ u32 iomode)
|
|
{
|
|
{
|
|
struct pnfs_layout_segment *lseg, *next;
|
|
struct pnfs_layout_segment *lseg, *next;
|
|
- struct nfs_client *clp;
|
|
|
|
|
|
+ int invalid = 0, removed = 0;
|
|
|
|
|
|
dprintk("%s:Begin lo %p\n", __func__, lo);
|
|
dprintk("%s:Begin lo %p\n", __func__, lo);
|
|
|
|
|
|
- assert_spin_locked(&lo->inode->i_lock);
|
|
|
|
- list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
|
|
|
|
- dprintk("%s: freeing lseg %p\n", __func__, lseg);
|
|
|
|
- list_move(&lseg->fi_list, tmp_list);
|
|
|
|
- }
|
|
|
|
- clp = NFS_SERVER(lo->inode)->nfs_client;
|
|
|
|
- spin_lock(&clp->cl_lock);
|
|
|
|
- /* List does not take a reference, so no need for put here */
|
|
|
|
- list_del_init(&lo->layouts);
|
|
|
|
- spin_unlock(&clp->cl_lock);
|
|
|
|
- write_seqlock(&lo->seqlock);
|
|
|
|
- clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
|
|
|
|
- write_sequnlock(&lo->seqlock);
|
|
|
|
-
|
|
|
|
- dprintk("%s:Return\n", __func__);
|
|
|
|
|
|
+ list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
|
|
|
|
+ if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
|
|
|
|
+ dprintk("%s: freeing lseg %p iomode %d "
|
|
|
|
+ "offset %llu length %llu\n", __func__,
|
|
|
|
+ lseg, lseg->pls_range.iomode, lseg->pls_range.offset,
|
|
|
|
+ lseg->pls_range.length);
|
|
|
|
+ invalid++;
|
|
|
|
+ removed += mark_lseg_invalid(lseg, tmp_list);
|
|
|
|
+ }
|
|
|
|
+ dprintk("%s:Return %i\n", __func__, invalid - removed);
|
|
|
|
+ return invalid - removed;
|
|
}
|
|
}
|
|
|
|
|
|
-static void
|
|
|
|
-pnfs_free_lseg_list(struct list_head *tmp_list)
|
|
|
|
|
|
+void
|
|
|
|
+pnfs_free_lseg_list(struct list_head *free_me)
|
|
{
|
|
{
|
|
- struct pnfs_layout_segment *lseg;
|
|
|
|
|
|
+ struct pnfs_layout_segment *lseg, *tmp;
|
|
|
|
|
|
- while (!list_empty(tmp_list)) {
|
|
|
|
- lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
|
|
|
|
- fi_list);
|
|
|
|
- dprintk("%s calling put_lseg on %p\n", __func__, lseg);
|
|
|
|
- list_del(&lseg->fi_list);
|
|
|
|
- put_lseg(lseg);
|
|
|
|
|
|
+ list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
|
|
|
|
+ list_del(&lseg->pls_list);
|
|
|
|
+ free_lseg(lseg);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -288,7 +332,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
|
|
spin_lock(&nfsi->vfs_inode.i_lock);
|
|
spin_lock(&nfsi->vfs_inode.i_lock);
|
|
lo = nfsi->layout;
|
|
lo = nfsi->layout;
|
|
if (lo) {
|
|
if (lo) {
|
|
- pnfs_clear_lseg_list(lo, &tmp_list);
|
|
|
|
|
|
+ set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags);
|
|
|
|
+ mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
|
|
/* Matched by refcount set to 1 in alloc_init_layout_hdr */
|
|
/* Matched by refcount set to 1 in alloc_init_layout_hdr */
|
|
put_layout_hdr_locked(lo);
|
|
put_layout_hdr_locked(lo);
|
|
}
|
|
}
|
|
@@ -312,76 +357,80 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
|
|
|
|
|
|
while (!list_empty(&tmp_list)) {
|
|
while (!list_empty(&tmp_list)) {
|
|
lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
|
|
lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
|
|
- layouts);
|
|
|
|
|
|
+ plh_layouts);
|
|
dprintk("%s freeing layout for inode %lu\n", __func__,
|
|
dprintk("%s freeing layout for inode %lu\n", __func__,
|
|
- lo->inode->i_ino);
|
|
|
|
- pnfs_destroy_layout(NFS_I(lo->inode));
|
|
|
|
|
|
+ lo->plh_inode->i_ino);
|
|
|
|
+ pnfs_destroy_layout(NFS_I(lo->plh_inode));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-/* update lo->stateid with new if is more recent
|
|
|
|
- *
|
|
|
|
- * lo->stateid could be the open stateid, in which case we just use what given.
|
|
|
|
- */
|
|
|
|
-static void
|
|
|
|
-pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
|
|
|
|
- const nfs4_stateid *new)
|
|
|
|
-{
|
|
|
|
- nfs4_stateid *old = &lo->stateid;
|
|
|
|
- bool overwrite = false;
|
|
|
|
-
|
|
|
|
- write_seqlock(&lo->seqlock);
|
|
|
|
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
|
|
|
|
- memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
|
|
|
|
- overwrite = true;
|
|
|
|
- else {
|
|
|
|
- u32 oldseq, newseq;
|
|
|
|
-
|
|
|
|
- oldseq = be32_to_cpu(old->stateid.seqid);
|
|
|
|
- newseq = be32_to_cpu(new->stateid.seqid);
|
|
|
|
- if ((int)(newseq - oldseq) > 0)
|
|
|
|
- overwrite = true;
|
|
|
|
|
|
+/* update lo->plh_stateid with new if is more recent */
|
|
|
|
+void
|
|
|
|
+pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
|
|
|
|
+ bool update_barrier)
|
|
|
|
+{
|
|
|
|
+ u32 oldseq, newseq;
|
|
|
|
+
|
|
|
|
+ oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid);
|
|
|
|
+ newseq = be32_to_cpu(new->stateid.seqid);
|
|
|
|
+ if ((int)(newseq - oldseq) > 0) {
|
|
|
|
+ memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid));
|
|
|
|
+ if (update_barrier) {
|
|
|
|
+ u32 new_barrier = be32_to_cpu(new->stateid.seqid);
|
|
|
|
+
|
|
|
|
+ if ((int)(new_barrier - lo->plh_barrier))
|
|
|
|
+ lo->plh_barrier = new_barrier;
|
|
|
|
+ } else {
|
|
|
|
+ /* Because of wraparound, we want to keep the barrier
|
|
|
|
+ * "close" to the current seqids. It needs to be
|
|
|
|
+ * within 2**31 to count as "behind", so if it
|
|
|
|
+ * gets too near that limit, give us a litle leeway
|
|
|
|
+ * and bring it to within 2**30.
|
|
|
|
+ * NOTE - and yes, this is all unsigned arithmetic.
|
|
|
|
+ */
|
|
|
|
+ if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
|
|
|
|
+ lo->plh_barrier = newseq - (1 << 30);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
- if (overwrite)
|
|
|
|
- memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
|
|
|
|
- write_sequnlock(&lo->seqlock);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
-static void
|
|
|
|
-pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
|
|
|
|
- struct nfs4_state *state)
|
|
|
|
|
|
+/* lget is set to 1 if called from inside send_layoutget call chain */
|
|
|
|
+static bool
|
|
|
|
+pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
|
|
|
|
+ int lget)
|
|
{
|
|
{
|
|
- int seq;
|
|
|
|
-
|
|
|
|
- dprintk("--> %s\n", __func__);
|
|
|
|
- write_seqlock(&lo->seqlock);
|
|
|
|
- do {
|
|
|
|
- seq = read_seqbegin(&state->seqlock);
|
|
|
|
- memcpy(lo->stateid.data, state->stateid.data,
|
|
|
|
- sizeof(state->stateid.data));
|
|
|
|
- } while (read_seqretry(&state->seqlock, seq));
|
|
|
|
- set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
|
|
|
|
- write_sequnlock(&lo->seqlock);
|
|
|
|
- dprintk("<-- %s\n", __func__);
|
|
|
|
|
|
+ if ((stateid) &&
|
|
|
|
+ (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
|
|
|
|
+ return true;
|
|
|
|
+ return lo->plh_block_lgets ||
|
|
|
|
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
|
|
|
|
+ (list_empty(&lo->plh_segs) &&
|
|
|
|
+ (atomic_read(&lo->plh_outstanding) > lget));
|
|
}
|
|
}
|
|
|
|
|
|
-void
|
|
|
|
-pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
|
|
|
|
- struct nfs4_state *open_state)
|
|
|
|
|
|
+int
|
|
|
|
+pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
|
|
|
|
+ struct nfs4_state *open_state)
|
|
{
|
|
{
|
|
- int seq;
|
|
|
|
|
|
+ int status = 0;
|
|
|
|
|
|
dprintk("--> %s\n", __func__);
|
|
dprintk("--> %s\n", __func__);
|
|
- do {
|
|
|
|
- seq = read_seqbegin(&lo->seqlock);
|
|
|
|
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
|
|
|
|
- /* This will trigger retry of the read */
|
|
|
|
- pnfs_layout_from_open_stateid(lo, open_state);
|
|
|
|
- } else
|
|
|
|
- memcpy(dst->data, lo->stateid.data,
|
|
|
|
- sizeof(lo->stateid.data));
|
|
|
|
- } while (read_seqretry(&lo->seqlock, seq));
|
|
|
|
|
|
+ spin_lock(&lo->plh_inode->i_lock);
|
|
|
|
+ if (pnfs_layoutgets_blocked(lo, NULL, 1)) {
|
|
|
|
+ status = -EAGAIN;
|
|
|
|
+ } else if (list_empty(&lo->plh_segs)) {
|
|
|
|
+ int seq;
|
|
|
|
+
|
|
|
|
+ do {
|
|
|
|
+ seq = read_seqbegin(&open_state->seqlock);
|
|
|
|
+ memcpy(dst->data, open_state->stateid.data,
|
|
|
|
+ sizeof(open_state->stateid.data));
|
|
|
|
+ } while (read_seqretry(&open_state->seqlock, seq));
|
|
|
|
+ } else
|
|
|
|
+ memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data));
|
|
|
|
+ spin_unlock(&lo->plh_inode->i_lock);
|
|
dprintk("<-- %s\n", __func__);
|
|
dprintk("<-- %s\n", __func__);
|
|
|
|
+ return status;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -395,7 +444,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
|
|
struct nfs_open_context *ctx,
|
|
struct nfs_open_context *ctx,
|
|
u32 iomode)
|
|
u32 iomode)
|
|
{
|
|
{
|
|
- struct inode *ino = lo->inode;
|
|
|
|
|
|
+ struct inode *ino = lo->plh_inode;
|
|
struct nfs_server *server = NFS_SERVER(ino);
|
|
struct nfs_server *server = NFS_SERVER(ino);
|
|
struct nfs4_layoutget *lgp;
|
|
struct nfs4_layoutget *lgp;
|
|
struct pnfs_layout_segment *lseg = NULL;
|
|
struct pnfs_layout_segment *lseg = NULL;
|
|
@@ -404,10 +453,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
|
|
|
|
|
|
BUG_ON(ctx == NULL);
|
|
BUG_ON(ctx == NULL);
|
|
lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
|
|
lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
|
|
- if (lgp == NULL) {
|
|
|
|
- put_layout_hdr(lo->inode);
|
|
|
|
|
|
+ if (lgp == NULL)
|
|
return NULL;
|
|
return NULL;
|
|
- }
|
|
|
|
lgp->args.minlength = NFS4_MAX_UINT64;
|
|
lgp->args.minlength = NFS4_MAX_UINT64;
|
|
lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
|
|
lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
|
|
lgp->args.range.iomode = iomode;
|
|
lgp->args.range.iomode = iomode;
|
|
@@ -424,11 +471,88 @@ send_layoutget(struct pnfs_layout_hdr *lo,
|
|
nfs4_proc_layoutget(lgp);
|
|
nfs4_proc_layoutget(lgp);
|
|
if (!lseg) {
|
|
if (!lseg) {
|
|
/* remember that LAYOUTGET failed and suspend trying */
|
|
/* remember that LAYOUTGET failed and suspend trying */
|
|
- set_bit(lo_fail_bit(iomode), &lo->state);
|
|
|
|
|
|
+ set_bit(lo_fail_bit(iomode), &lo->plh_flags);
|
|
}
|
|
}
|
|
return lseg;
|
|
return lseg;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+bool pnfs_roc(struct inode *ino)
|
|
|
|
+{
|
|
|
|
+ struct pnfs_layout_hdr *lo;
|
|
|
|
+ struct pnfs_layout_segment *lseg, *tmp;
|
|
|
|
+ LIST_HEAD(tmp_list);
|
|
|
|
+ bool found = false;
|
|
|
|
+
|
|
|
|
+ spin_lock(&ino->i_lock);
|
|
|
|
+ lo = NFS_I(ino)->layout;
|
|
|
|
+ if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) ||
|
|
|
|
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
|
|
|
|
+ goto out_nolayout;
|
|
|
|
+ list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list)
|
|
|
|
+ if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
|
|
|
|
+ mark_lseg_invalid(lseg, &tmp_list);
|
|
|
|
+ found = true;
|
|
|
|
+ }
|
|
|
|
+ if (!found)
|
|
|
|
+ goto out_nolayout;
|
|
|
|
+ lo->plh_block_lgets++;
|
|
|
|
+ get_layout_hdr(lo); /* matched in pnfs_roc_release */
|
|
|
|
+ spin_unlock(&ino->i_lock);
|
|
|
|
+ pnfs_free_lseg_list(&tmp_list);
|
|
|
|
+ return true;
|
|
|
|
+
|
|
|
|
+out_nolayout:
|
|
|
|
+ spin_unlock(&ino->i_lock);
|
|
|
|
+ return false;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void pnfs_roc_release(struct inode *ino)
|
|
|
|
+{
|
|
|
|
+ struct pnfs_layout_hdr *lo;
|
|
|
|
+
|
|
|
|
+ spin_lock(&ino->i_lock);
|
|
|
|
+ lo = NFS_I(ino)->layout;
|
|
|
|
+ lo->plh_block_lgets--;
|
|
|
|
+ put_layout_hdr_locked(lo);
|
|
|
|
+ spin_unlock(&ino->i_lock);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
|
|
|
|
+{
|
|
|
|
+ struct pnfs_layout_hdr *lo;
|
|
|
|
+
|
|
|
|
+ spin_lock(&ino->i_lock);
|
|
|
|
+ lo = NFS_I(ino)->layout;
|
|
|
|
+ if ((int)(barrier - lo->plh_barrier) > 0)
|
|
|
|
+ lo->plh_barrier = barrier;
|
|
|
|
+ spin_unlock(&ino->i_lock);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
|
|
|
|
+{
|
|
|
|
+ struct nfs_inode *nfsi = NFS_I(ino);
|
|
|
|
+ struct pnfs_layout_segment *lseg;
|
|
|
|
+ bool found = false;
|
|
|
|
+
|
|
|
|
+ spin_lock(&ino->i_lock);
|
|
|
|
+ list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
|
|
|
|
+ if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
|
|
|
|
+ found = true;
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ if (!found) {
|
|
|
|
+ struct pnfs_layout_hdr *lo = nfsi->layout;
|
|
|
|
+ u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid);
|
|
|
|
+
|
|
|
|
+ /* Since close does not return a layout stateid for use as
|
|
|
|
+ * a barrier, we choose the worst-case barrier.
|
|
|
|
+ */
|
|
|
|
+ *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
|
|
|
|
+ }
|
|
|
|
+ spin_unlock(&ino->i_lock);
|
|
|
|
+ return found;
|
|
|
|
+}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Compare two layout segments for sorting into layout cache.
|
|
* Compare two layout segments for sorting into layout cache.
|
|
* We want to preferentially return RW over RO layouts, so ensure those
|
|
* We want to preferentially return RW over RO layouts, so ensure those
|
|
@@ -450,37 +574,29 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
|
|
|
|
|
|
dprintk("%s:Begin\n", __func__);
|
|
dprintk("%s:Begin\n", __func__);
|
|
|
|
|
|
- assert_spin_locked(&lo->inode->i_lock);
|
|
|
|
- if (list_empty(&lo->segs)) {
|
|
|
|
- struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
|
|
|
|
-
|
|
|
|
- spin_lock(&clp->cl_lock);
|
|
|
|
- BUG_ON(!list_empty(&lo->layouts));
|
|
|
|
- list_add_tail(&lo->layouts, &clp->cl_layouts);
|
|
|
|
- spin_unlock(&clp->cl_lock);
|
|
|
|
- }
|
|
|
|
- list_for_each_entry(lp, &lo->segs, fi_list) {
|
|
|
|
- if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
|
|
|
|
|
|
+ assert_spin_locked(&lo->plh_inode->i_lock);
|
|
|
|
+ list_for_each_entry(lp, &lo->plh_segs, pls_list) {
|
|
|
|
+ if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0)
|
|
continue;
|
|
continue;
|
|
- list_add_tail(&lseg->fi_list, &lp->fi_list);
|
|
|
|
|
|
+ list_add_tail(&lseg->pls_list, &lp->pls_list);
|
|
dprintk("%s: inserted lseg %p "
|
|
dprintk("%s: inserted lseg %p "
|
|
"iomode %d offset %llu length %llu before "
|
|
"iomode %d offset %llu length %llu before "
|
|
"lp %p iomode %d offset %llu length %llu\n",
|
|
"lp %p iomode %d offset %llu length %llu\n",
|
|
- __func__, lseg, lseg->range.iomode,
|
|
|
|
- lseg->range.offset, lseg->range.length,
|
|
|
|
- lp, lp->range.iomode, lp->range.offset,
|
|
|
|
- lp->range.length);
|
|
|
|
|
|
+ __func__, lseg, lseg->pls_range.iomode,
|
|
|
|
+ lseg->pls_range.offset, lseg->pls_range.length,
|
|
|
|
+ lp, lp->pls_range.iomode, lp->pls_range.offset,
|
|
|
|
+ lp->pls_range.length);
|
|
found = 1;
|
|
found = 1;
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
if (!found) {
|
|
if (!found) {
|
|
- list_add_tail(&lseg->fi_list, &lo->segs);
|
|
|
|
|
|
+ list_add_tail(&lseg->pls_list, &lo->plh_segs);
|
|
dprintk("%s: inserted lseg %p "
|
|
dprintk("%s: inserted lseg %p "
|
|
"iomode %d offset %llu length %llu at tail\n",
|
|
"iomode %d offset %llu length %llu at tail\n",
|
|
- __func__, lseg, lseg->range.iomode,
|
|
|
|
- lseg->range.offset, lseg->range.length);
|
|
|
|
|
|
+ __func__, lseg, lseg->pls_range.iomode,
|
|
|
|
+ lseg->pls_range.offset, lseg->pls_range.length);
|
|
}
|
|
}
|
|
- get_layout_hdr_locked(lo);
|
|
|
|
|
|
+ get_layout_hdr(lo);
|
|
|
|
|
|
dprintk("%s:Return\n", __func__);
|
|
dprintk("%s:Return\n", __func__);
|
|
}
|
|
}
|
|
@@ -493,11 +609,11 @@ alloc_init_layout_hdr(struct inode *ino)
|
|
lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
|
|
lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
|
|
if (!lo)
|
|
if (!lo)
|
|
return NULL;
|
|
return NULL;
|
|
- lo->refcount = 1;
|
|
|
|
- INIT_LIST_HEAD(&lo->layouts);
|
|
|
|
- INIT_LIST_HEAD(&lo->segs);
|
|
|
|
- seqlock_init(&lo->seqlock);
|
|
|
|
- lo->inode = ino;
|
|
|
|
|
|
+ atomic_set(&lo->plh_refcount, 1);
|
|
|
|
+ INIT_LIST_HEAD(&lo->plh_layouts);
|
|
|
|
+ INIT_LIST_HEAD(&lo->plh_segs);
|
|
|
|
+ INIT_LIST_HEAD(&lo->plh_bulk_recall);
|
|
|
|
+ lo->plh_inode = ino;
|
|
return lo;
|
|
return lo;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -510,9 +626,12 @@ pnfs_find_alloc_layout(struct inode *ino)
|
|
dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
|
|
dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
|
|
|
|
|
|
assert_spin_locked(&ino->i_lock);
|
|
assert_spin_locked(&ino->i_lock);
|
|
- if (nfsi->layout)
|
|
|
|
- return nfsi->layout;
|
|
|
|
-
|
|
|
|
|
|
+ if (nfsi->layout) {
|
|
|
|
+ if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
|
|
|
|
+ return NULL;
|
|
|
|
+ else
|
|
|
|
+ return nfsi->layout;
|
|
|
|
+ }
|
|
spin_unlock(&ino->i_lock);
|
|
spin_unlock(&ino->i_lock);
|
|
new = alloc_init_layout_hdr(ino);
|
|
new = alloc_init_layout_hdr(ino);
|
|
spin_lock(&ino->i_lock);
|
|
spin_lock(&ino->i_lock);
|
|
@@ -538,31 +657,32 @@ pnfs_find_alloc_layout(struct inode *ino)
|
|
static int
|
|
static int
|
|
is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
|
|
is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
|
|
{
|
|
{
|
|
- return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
|
|
|
|
|
|
+ return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
* lookup range in layout
|
|
* lookup range in layout
|
|
*/
|
|
*/
|
|
static struct pnfs_layout_segment *
|
|
static struct pnfs_layout_segment *
|
|
-pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
|
|
|
|
|
|
+pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
|
|
{
|
|
{
|
|
struct pnfs_layout_segment *lseg, *ret = NULL;
|
|
struct pnfs_layout_segment *lseg, *ret = NULL;
|
|
|
|
|
|
dprintk("%s:Begin\n", __func__);
|
|
dprintk("%s:Begin\n", __func__);
|
|
|
|
|
|
- assert_spin_locked(&lo->inode->i_lock);
|
|
|
|
- list_for_each_entry(lseg, &lo->segs, fi_list) {
|
|
|
|
- if (is_matching_lseg(lseg, iomode)) {
|
|
|
|
|
|
+ assert_spin_locked(&lo->plh_inode->i_lock);
|
|
|
|
+ list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
|
|
|
|
+ if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
|
|
|
|
+ is_matching_lseg(lseg, iomode)) {
|
|
ret = lseg;
|
|
ret = lseg;
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
- if (cmp_layout(iomode, lseg->range.iomode) > 0)
|
|
|
|
|
|
+ if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
dprintk("%s:Return lseg %p ref %d\n",
|
|
dprintk("%s:Return lseg %p ref %d\n",
|
|
- __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
|
|
|
|
|
|
+ __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0);
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -576,6 +696,7 @@ pnfs_update_layout(struct inode *ino,
|
|
enum pnfs_iomode iomode)
|
|
enum pnfs_iomode iomode)
|
|
{
|
|
{
|
|
struct nfs_inode *nfsi = NFS_I(ino);
|
|
struct nfs_inode *nfsi = NFS_I(ino);
|
|
|
|
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
|
|
struct pnfs_layout_hdr *lo;
|
|
struct pnfs_layout_hdr *lo;
|
|
struct pnfs_layout_segment *lseg = NULL;
|
|
struct pnfs_layout_segment *lseg = NULL;
|
|
|
|
|
|
@@ -588,25 +709,53 @@ pnfs_update_layout(struct inode *ino,
|
|
goto out_unlock;
|
|
goto out_unlock;
|
|
}
|
|
}
|
|
|
|
|
|
- /* Check to see if the layout for the given range already exists */
|
|
|
|
- lseg = pnfs_has_layout(lo, iomode);
|
|
|
|
- if (lseg) {
|
|
|
|
- dprintk("%s: Using cached lseg %p for iomode %d)\n",
|
|
|
|
- __func__, lseg, iomode);
|
|
|
|
|
|
+ /* Do we even need to bother with this? */
|
|
|
|
+ if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
|
|
|
|
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
|
|
|
|
+ dprintk("%s matches recall, use MDS\n", __func__);
|
|
goto out_unlock;
|
|
goto out_unlock;
|
|
}
|
|
}
|
|
|
|
+ /* Check to see if the layout for the given range already exists */
|
|
|
|
+ lseg = pnfs_find_lseg(lo, iomode);
|
|
|
|
+ if (lseg)
|
|
|
|
+ goto out_unlock;
|
|
|
|
|
|
/* if LAYOUTGET already failed once we don't try again */
|
|
/* if LAYOUTGET already failed once we don't try again */
|
|
- if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
|
|
|
|
|
|
+ if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
|
|
|
|
+ goto out_unlock;
|
|
|
|
+
|
|
|
|
+ if (pnfs_layoutgets_blocked(lo, NULL, 0))
|
|
goto out_unlock;
|
|
goto out_unlock;
|
|
|
|
+ atomic_inc(&lo->plh_outstanding);
|
|
|
|
|
|
- get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
|
|
|
|
|
|
+ get_layout_hdr(lo);
|
|
|
|
+ if (list_empty(&lo->plh_segs)) {
|
|
|
|
+ /* The lo must be on the clp list if there is any
|
|
|
|
+ * chance of a CB_LAYOUTRECALL(FILE) coming in.
|
|
|
|
+ */
|
|
|
|
+ spin_lock(&clp->cl_lock);
|
|
|
|
+ BUG_ON(!list_empty(&lo->plh_layouts));
|
|
|
|
+ list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
|
|
|
|
+ spin_unlock(&clp->cl_lock);
|
|
|
|
+ }
|
|
spin_unlock(&ino->i_lock);
|
|
spin_unlock(&ino->i_lock);
|
|
|
|
|
|
lseg = send_layoutget(lo, ctx, iomode);
|
|
lseg = send_layoutget(lo, ctx, iomode);
|
|
|
|
+ if (!lseg) {
|
|
|
|
+ spin_lock(&ino->i_lock);
|
|
|
|
+ if (list_empty(&lo->plh_segs)) {
|
|
|
|
+ spin_lock(&clp->cl_lock);
|
|
|
|
+ list_del_init(&lo->plh_layouts);
|
|
|
|
+ spin_unlock(&clp->cl_lock);
|
|
|
|
+ clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
|
|
|
|
+ }
|
|
|
|
+ spin_unlock(&ino->i_lock);
|
|
|
|
+ }
|
|
|
|
+ atomic_dec(&lo->plh_outstanding);
|
|
|
|
+ put_layout_hdr(lo);
|
|
out:
|
|
out:
|
|
dprintk("%s end, state 0x%lx lseg %p\n", __func__,
|
|
dprintk("%s end, state 0x%lx lseg %p\n", __func__,
|
|
- nfsi->layout->state, lseg);
|
|
|
|
|
|
+ nfsi->layout->plh_flags, lseg);
|
|
return lseg;
|
|
return lseg;
|
|
out_unlock:
|
|
out_unlock:
|
|
spin_unlock(&ino->i_lock);
|
|
spin_unlock(&ino->i_lock);
|
|
@@ -619,9 +768,21 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
|
|
struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
|
|
struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
|
|
struct nfs4_layoutget_res *res = &lgp->res;
|
|
struct nfs4_layoutget_res *res = &lgp->res;
|
|
struct pnfs_layout_segment *lseg;
|
|
struct pnfs_layout_segment *lseg;
|
|
- struct inode *ino = lo->inode;
|
|
|
|
|
|
+ struct inode *ino = lo->plh_inode;
|
|
|
|
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
|
|
int status = 0;
|
|
int status = 0;
|
|
|
|
|
|
|
|
+ /* Verify we got what we asked for.
|
|
|
|
+ * Note that because the xdr parsing only accepts a single
|
|
|
|
+ * element array, this can fail even if the server is behaving
|
|
|
|
+ * correctly.
|
|
|
|
+ */
|
|
|
|
+ if (lgp->args.range.iomode > res->range.iomode ||
|
|
|
|
+ res->range.offset != 0 ||
|
|
|
|
+ res->range.length != NFS4_MAX_UINT64) {
|
|
|
|
+ status = -EINVAL;
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
/* Inject layout blob into I/O device driver */
|
|
/* Inject layout blob into I/O device driver */
|
|
lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
|
|
lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
|
|
if (!lseg || IS_ERR(lseg)) {
|
|
if (!lseg || IS_ERR(lseg)) {
|
|
@@ -635,16 +796,37 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
|
|
}
|
|
}
|
|
|
|
|
|
spin_lock(&ino->i_lock);
|
|
spin_lock(&ino->i_lock);
|
|
|
|
+ if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) ||
|
|
|
|
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
|
|
|
|
+ dprintk("%s forget reply due to recall\n", __func__);
|
|
|
|
+ goto out_forget_reply;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) {
|
|
|
|
+ dprintk("%s forget reply due to state\n", __func__);
|
|
|
|
+ goto out_forget_reply;
|
|
|
|
+ }
|
|
init_lseg(lo, lseg);
|
|
init_lseg(lo, lseg);
|
|
- lseg->range = res->range;
|
|
|
|
|
|
+ lseg->pls_range = res->range;
|
|
*lgp->lsegpp = lseg;
|
|
*lgp->lsegpp = lseg;
|
|
pnfs_insert_layout(lo, lseg);
|
|
pnfs_insert_layout(lo, lseg);
|
|
|
|
|
|
|
|
+ if (res->return_on_close) {
|
|
|
|
+ set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
|
|
|
|
+ set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
|
|
|
|
+ }
|
|
|
|
+
|
|
/* Done processing layoutget. Set the layout stateid */
|
|
/* Done processing layoutget. Set the layout stateid */
|
|
- pnfs_set_layout_stateid(lo, &res->stateid);
|
|
|
|
|
|
+ pnfs_set_layout_stateid(lo, &res->stateid, false);
|
|
spin_unlock(&ino->i_lock);
|
|
spin_unlock(&ino->i_lock);
|
|
out:
|
|
out:
|
|
return status;
|
|
return status;
|
|
|
|
+
|
|
|
|
+out_forget_reply:
|
|
|
|
+ spin_unlock(&ino->i_lock);
|
|
|
|
+ lseg->pls_layout = lo;
|
|
|
|
+ NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
|
|
|
|
+ goto out;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|