|
@@ -88,6 +88,35 @@ EXPORT_SYMBOL(rename_lock);
|
|
|
|
|
|
static struct kmem_cache *dentry_cache __read_mostly;
|
|
|
|
|
|
+/**
|
|
|
+ * read_seqbegin_or_lock - begin a sequence number check or locking block
|
|
|
+ * lock: sequence lock
|
|
|
+ * seq : sequence number to be checked
|
|
|
+ *
|
|
|
+ * First try it once optimistically without taking the lock. If that fails,
|
|
|
+ * take the lock. The sequence number is also used as a marker for deciding
|
|
|
+ * whether to be a reader (even) or writer (odd).
|
|
|
+ * N.B. seq must be initialized to an even number to begin with.
|
|
|
+ */
|
|
|
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
|
|
|
+{
|
|
|
+ if (!(*seq & 1)) /* Even */
|
|
|
+ *seq = read_seqbegin(lock);
|
|
|
+ else /* Odd */
|
|
|
+ write_seqlock(lock);
|
|
|
+}
|
|
|
+
|
|
|
+static inline int need_seqretry(seqlock_t *lock, int seq)
|
|
|
+{
|
|
|
+ return !(seq & 1) && read_seqretry(lock, seq);
|
|
|
+}
|
|
|
+
|
|
|
+static inline void done_seqretry(seqlock_t *lock, int seq)
|
|
|
+{
|
|
|
+ if (seq & 1)
|
|
|
+ write_sequnlock(lock);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* This is the single most critical data structure when it comes
|
|
|
* to the dcache: the hashtable for lookups. Somebody should try
|
|
@@ -1012,7 +1041,7 @@ void shrink_dcache_for_umount(struct super_block *sb)
|
|
|
* the parenthood after dropping the lock and check
|
|
|
* that the sequence number still matches.
|
|
|
*/
|
|
|
-static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
|
|
|
+static struct dentry *try_to_ascend(struct dentry *old, unsigned seq)
|
|
|
{
|
|
|
struct dentry *new = old->d_parent;
|
|
|
|
|
@@ -1026,7 +1055,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq
|
|
|
*/
|
|
|
if (new != old->d_parent ||
|
|
|
(old->d_flags & DCACHE_DENTRY_KILLED) ||
|
|
|
- (!locked && read_seqretry(&rename_lock, seq))) {
|
|
|
+ need_seqretry(&rename_lock, seq)) {
|
|
|
spin_unlock(&new->d_lock);
|
|
|
new = NULL;
|
|
|
}
|
|
@@ -1063,13 +1092,12 @@ static void d_walk(struct dentry *parent, void *data,
|
|
|
{
|
|
|
struct dentry *this_parent;
|
|
|
struct list_head *next;
|
|
|
- unsigned seq;
|
|
|
- int locked = 0;
|
|
|
+ unsigned seq = 0;
|
|
|
enum d_walk_ret ret;
|
|
|
bool retry = true;
|
|
|
|
|
|
- seq = read_seqbegin(&rename_lock);
|
|
|
again:
|
|
|
+ read_seqbegin_or_lock(&rename_lock, &seq);
|
|
|
this_parent = parent;
|
|
|
spin_lock(&this_parent->d_lock);
|
|
|
|
|
@@ -1123,13 +1151,13 @@ resume:
|
|
|
*/
|
|
|
if (this_parent != parent) {
|
|
|
struct dentry *child = this_parent;
|
|
|
- this_parent = try_to_ascend(this_parent, locked, seq);
|
|
|
+ this_parent = try_to_ascend(this_parent, seq);
|
|
|
if (!this_parent)
|
|
|
goto rename_retry;
|
|
|
next = child->d_u.d_child.next;
|
|
|
goto resume;
|
|
|
}
|
|
|
- if (!locked && read_seqretry(&rename_lock, seq)) {
|
|
|
+ if (need_seqretry(&rename_lock, seq)) {
|
|
|
spin_unlock(&this_parent->d_lock);
|
|
|
goto rename_retry;
|
|
|
}
|
|
@@ -1138,17 +1166,13 @@ resume:
|
|
|
|
|
|
out_unlock:
|
|
|
spin_unlock(&this_parent->d_lock);
|
|
|
- if (locked)
|
|
|
- write_sequnlock(&rename_lock);
|
|
|
+ done_seqretry(&rename_lock, seq);
|
|
|
return;
|
|
|
|
|
|
rename_retry:
|
|
|
if (!retry)
|
|
|
return;
|
|
|
- if (locked)
|
|
|
- goto again;
|
|
|
- locked = 1;
|
|
|
- write_seqlock(&rename_lock);
|
|
|
+ seq = 1;
|
|
|
goto again;
|
|
|
}
|
|
|
|
|
@@ -2647,9 +2671,39 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * prepend_name - prepend a pathname in front of current buffer pointer
|
|
|
+ * buffer: buffer pointer
|
|
|
+ * buflen: allocated length of the buffer
|
|
|
+ * name: name string and length qstr structure
|
|
|
+ *
|
|
|
+ * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to
|
|
|
+ * make sure that either the old or the new name pointer and length are
|
|
|
+ * fetched. However, there may be mismatch between length and pointer.
|
|
|
+ * The length cannot be trusted, we need to copy it byte-by-byte until
|
|
|
+ * the length is reached or a null byte is found. It also prepends "/" at
|
|
|
+ * the beginning of the name. The sequence number check at the caller will
|
|
|
+ * retry it again when a d_move() does happen. So any garbage in the buffer
|
|
|
+ * due to mismatched pointer and length will be discarded.
|
|
|
+ */
|
|
|
static int prepend_name(char **buffer, int *buflen, struct qstr *name)
|
|
|
{
|
|
|
- return prepend(buffer, buflen, name->name, name->len);
|
|
|
+ const char *dname = ACCESS_ONCE(name->name);
|
|
|
+ u32 dlen = ACCESS_ONCE(name->len);
|
|
|
+ char *p;
|
|
|
+
|
|
|
+ if (*buflen < dlen + 1)
|
|
|
+ return -ENAMETOOLONG;
|
|
|
+ *buflen -= dlen + 1;
|
|
|
+ p = *buffer -= dlen + 1;
|
|
|
+ *p++ = '/';
|
|
|
+ while (dlen--) {
|
|
|
+ char c = *dname++;
|
|
|
+ if (!c)
|
|
|
+ break;
|
|
|
+ *p++ = c;
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -2659,7 +2713,14 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
|
|
|
* @buffer: pointer to the end of the buffer
|
|
|
* @buflen: pointer to buffer length
|
|
|
*
|
|
|
- * Caller holds the rename_lock.
|
|
|
+ * The function tries to write out the pathname without taking any lock other
|
|
|
+ * than the RCU read lock to make sure that dentries won't go away. It only
|
|
|
+ * checks the sequence number of the global rename_lock as any change in the
|
|
|
+ * dentry's d_seq will be preceded by changes in the rename_lock sequence
|
|
|
+ * number. If the sequence number had been change, it will restart the whole
|
|
|
+ * pathname back-tracing sequence again. It performs a total of 3 trials of
|
|
|
+ * lockless back-tracing sequences before falling back to take the
|
|
|
+ * rename_lock.
|
|
|
*/
|
|
|
static int prepend_path(const struct path *path,
|
|
|
const struct path *root,
|
|
@@ -2668,54 +2729,66 @@ static int prepend_path(const struct path *path,
|
|
|
struct dentry *dentry = path->dentry;
|
|
|
struct vfsmount *vfsmnt = path->mnt;
|
|
|
struct mount *mnt = real_mount(vfsmnt);
|
|
|
- bool slash = false;
|
|
|
int error = 0;
|
|
|
+ unsigned seq = 0;
|
|
|
+ char *bptr;
|
|
|
+ int blen;
|
|
|
|
|
|
+ rcu_read_lock();
|
|
|
+restart:
|
|
|
+ bptr = *buffer;
|
|
|
+ blen = *buflen;
|
|
|
+ read_seqbegin_or_lock(&rename_lock, &seq);
|
|
|
while (dentry != root->dentry || vfsmnt != root->mnt) {
|
|
|
struct dentry * parent;
|
|
|
|
|
|
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
|
|
|
/* Global root? */
|
|
|
- if (!mnt_has_parent(mnt))
|
|
|
- goto global_root;
|
|
|
- dentry = mnt->mnt_mountpoint;
|
|
|
- mnt = mnt->mnt_parent;
|
|
|
- vfsmnt = &mnt->mnt;
|
|
|
- continue;
|
|
|
+ if (mnt_has_parent(mnt)) {
|
|
|
+ dentry = mnt->mnt_mountpoint;
|
|
|
+ mnt = mnt->mnt_parent;
|
|
|
+ vfsmnt = &mnt->mnt;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ /*
|
|
|
+ * Filesystems needing to implement special "root names"
|
|
|
+ * should do so with ->d_dname()
|
|
|
+ */
|
|
|
+ if (IS_ROOT(dentry) &&
|
|
|
+ (dentry->d_name.len != 1 ||
|
|
|
+ dentry->d_name.name[0] != '/')) {
|
|
|
+ WARN(1, "Root dentry has weird name <%.*s>\n",
|
|
|
+ (int) dentry->d_name.len,
|
|
|
+ dentry->d_name.name);
|
|
|
+ }
|
|
|
+ if (!error)
|
|
|
+ error = is_mounted(vfsmnt) ? 1 : 2;
|
|
|
+ break;
|
|
|
}
|
|
|
parent = dentry->d_parent;
|
|
|
prefetch(parent);
|
|
|
- spin_lock(&dentry->d_lock);
|
|
|
- error = prepend_name(buffer, buflen, &dentry->d_name);
|
|
|
- spin_unlock(&dentry->d_lock);
|
|
|
- if (!error)
|
|
|
- error = prepend(buffer, buflen, "/", 1);
|
|
|
+ error = prepend_name(&bptr, &blen, &dentry->d_name);
|
|
|
if (error)
|
|
|
break;
|
|
|
|
|
|
- slash = true;
|
|
|
dentry = parent;
|
|
|
}
|
|
|
+ if (!(seq & 1))
|
|
|
+ rcu_read_unlock();
|
|
|
+ if (need_seqretry(&rename_lock, seq)) {
|
|
|
+ seq = 1;
|
|
|
+ goto restart;
|
|
|
+ }
|
|
|
+ done_seqretry(&rename_lock, seq);
|
|
|
|
|
|
- if (!error && !slash)
|
|
|
- error = prepend(buffer, buflen, "/", 1);
|
|
|
-
|
|
|
- return error;
|
|
|
-
|
|
|
-global_root:
|
|
|
- /*
|
|
|
- * Filesystems needing to implement special "root names"
|
|
|
- * should do so with ->d_dname()
|
|
|
- */
|
|
|
- if (IS_ROOT(dentry) &&
|
|
|
- (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
|
|
|
- WARN(1, "Root dentry has weird name <%.*s>\n",
|
|
|
- (int) dentry->d_name.len, dentry->d_name.name);
|
|
|
- }
|
|
|
- if (!slash)
|
|
|
- error = prepend(buffer, buflen, "/", 1);
|
|
|
- if (!error)
|
|
|
- error = is_mounted(vfsmnt) ? 1 : 2;
|
|
|
+ if (error >= 0 && bptr == *buffer) {
|
|
|
+ if (--blen < 0)
|
|
|
+ error = -ENAMETOOLONG;
|
|
|
+ else
|
|
|
+ *--bptr = '/';
|
|
|
+ }
|
|
|
+ *buffer = bptr;
|
|
|
+ *buflen = blen;
|
|
|
return error;
|
|
|
}
|
|
|
|
|
@@ -2744,9 +2817,7 @@ char *__d_path(const struct path *path,
|
|
|
|
|
|
prepend(&res, &buflen, "\0", 1);
|
|
|
br_read_lock(&vfsmount_lock);
|
|
|
- write_seqlock(&rename_lock);
|
|
|
error = prepend_path(path, root, &res, &buflen);
|
|
|
- write_sequnlock(&rename_lock);
|
|
|
br_read_unlock(&vfsmount_lock);
|
|
|
|
|
|
if (error < 0)
|
|
@@ -2765,9 +2836,7 @@ char *d_absolute_path(const struct path *path,
|
|
|
|
|
|
prepend(&res, &buflen, "\0", 1);
|
|
|
br_read_lock(&vfsmount_lock);
|
|
|
- write_seqlock(&rename_lock);
|
|
|
error = prepend_path(path, &root, &res, &buflen);
|
|
|
- write_sequnlock(&rename_lock);
|
|
|
br_read_unlock(&vfsmount_lock);
|
|
|
|
|
|
if (error > 1)
|
|
@@ -2833,9 +2902,7 @@ char *d_path(const struct path *path, char *buf, int buflen)
|
|
|
|
|
|
get_fs_root(current->fs, &root);
|
|
|
br_read_lock(&vfsmount_lock);
|
|
|
- write_seqlock(&rename_lock);
|
|
|
error = path_with_deleted(path, &root, &res, &buflen);
|
|
|
- write_sequnlock(&rename_lock);
|
|
|
br_read_unlock(&vfsmount_lock);
|
|
|
if (error < 0)
|
|
|
res = ERR_PTR(error);
|
|
@@ -2870,10 +2937,10 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
|
|
|
char *end = buffer + buflen;
|
|
|
/* these dentries are never renamed, so d_lock is not needed */
|
|
|
if (prepend(&end, &buflen, " (deleted)", 11) ||
|
|
|
- prepend_name(&end, &buflen, &dentry->d_name) ||
|
|
|
+ prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
|
|
|
prepend(&end, &buflen, "/", 1))
|
|
|
end = ERR_PTR(-ENAMETOOLONG);
|
|
|
- return end;
|
|
|
+ return end;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2881,30 +2948,42 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
|
|
|
*/
|
|
|
static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
|
|
|
{
|
|
|
- char *end = buf + buflen;
|
|
|
- char *retval;
|
|
|
+ char *end, *retval;
|
|
|
+ int len, seq = 0;
|
|
|
+ int error = 0;
|
|
|
|
|
|
- prepend(&end, &buflen, "\0", 1);
|
|
|
+ rcu_read_lock();
|
|
|
+restart:
|
|
|
+ end = buf + buflen;
|
|
|
+ len = buflen;
|
|
|
+ prepend(&end, &len, "\0", 1);
|
|
|
if (buflen < 1)
|
|
|
goto Elong;
|
|
|
/* Get '/' right */
|
|
|
retval = end-1;
|
|
|
*retval = '/';
|
|
|
-
|
|
|
+ read_seqbegin_or_lock(&rename_lock, &seq);
|
|
|
while (!IS_ROOT(dentry)) {
|
|
|
struct dentry *parent = dentry->d_parent;
|
|
|
int error;
|
|
|
|
|
|
prefetch(parent);
|
|
|
- spin_lock(&dentry->d_lock);
|
|
|
- error = prepend_name(&end, &buflen, &dentry->d_name);
|
|
|
- spin_unlock(&dentry->d_lock);
|
|
|
- if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
|
|
|
- goto Elong;
|
|
|
+ error = prepend_name(&end, &len, &dentry->d_name);
|
|
|
+ if (error)
|
|
|
+ break;
|
|
|
|
|
|
retval = end;
|
|
|
dentry = parent;
|
|
|
}
|
|
|
+ if (!(seq & 1))
|
|
|
+ rcu_read_unlock();
|
|
|
+ if (need_seqretry(&rename_lock, seq)) {
|
|
|
+ seq = 1;
|
|
|
+ goto restart;
|
|
|
+ }
|
|
|
+ done_seqretry(&rename_lock, seq);
|
|
|
+ if (error)
|
|
|
+ goto Elong;
|
|
|
return retval;
|
|
|
Elong:
|
|
|
return ERR_PTR(-ENAMETOOLONG);
|
|
@@ -2912,13 +2991,7 @@ Elong:
|
|
|
|
|
|
char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
|
|
|
{
|
|
|
- char *retval;
|
|
|
-
|
|
|
- write_seqlock(&rename_lock);
|
|
|
- retval = __dentry_path(dentry, buf, buflen);
|
|
|
- write_sequnlock(&rename_lock);
|
|
|
-
|
|
|
- return retval;
|
|
|
+ return __dentry_path(dentry, buf, buflen);
|
|
|
}
|
|
|
EXPORT_SYMBOL(dentry_path_raw);
|
|
|
|
|
@@ -2927,7 +3000,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
|
|
|
char *p = NULL;
|
|
|
char *retval;
|
|
|
|
|
|
- write_seqlock(&rename_lock);
|
|
|
if (d_unlinked(dentry)) {
|
|
|
p = buf + buflen;
|
|
|
if (prepend(&p, &buflen, "//deleted", 10) != 0)
|
|
@@ -2935,7 +3007,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
|
|
|
buflen++;
|
|
|
}
|
|
|
retval = __dentry_path(dentry, buf, buflen);
|
|
|
- write_sequnlock(&rename_lock);
|
|
|
if (!IS_ERR(retval) && p)
|
|
|
*p = '/'; /* restore '/' overriden with '\0' */
|
|
|
return retval;
|
|
@@ -2974,7 +3045,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
|
|
|
|
|
|
error = -ENOENT;
|
|
|
br_read_lock(&vfsmount_lock);
|
|
|
- write_seqlock(&rename_lock);
|
|
|
if (!d_unlinked(pwd.dentry)) {
|
|
|
unsigned long len;
|
|
|
char *cwd = page + PAGE_SIZE;
|
|
@@ -2982,7 +3052,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
|
|
|
|
|
|
prepend(&cwd, &buflen, "\0", 1);
|
|
|
error = prepend_path(&pwd, &root, &cwd, &buflen);
|
|
|
- write_sequnlock(&rename_lock);
|
|
|
br_read_unlock(&vfsmount_lock);
|
|
|
|
|
|
if (error < 0)
|
|
@@ -3003,7 +3072,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
|
|
|
error = -EFAULT;
|
|
|
}
|
|
|
} else {
|
|
|
- write_sequnlock(&rename_lock);
|
|
|
br_read_unlock(&vfsmount_lock);
|
|
|
}
|
|
|
|