|
@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file)
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * No need to block if we have any caps. Update wanted set
|
|
|
|
|
|
+ * No need to block if we have caps on the auth MDS (for
|
|
|
|
+ * write) or any MDS (for read). Update wanted set
|
|
* asynchronously.
|
|
* asynchronously.
|
|
*/
|
|
*/
|
|
spin_lock(&inode->i_lock);
|
|
spin_lock(&inode->i_lock);
|
|
- if (__ceph_is_any_real_caps(ci)) {
|
|
|
|
|
|
+ if (__ceph_is_any_real_caps(ci) &&
|
|
|
|
+ (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
|
|
int mds_wanted = __ceph_caps_mds_wanted(ci);
|
|
int mds_wanted = __ceph_caps_mds_wanted(ci);
|
|
int issued = __ceph_caps_issued(ci, NULL);
|
|
int issued = __ceph_caps_issued(ci, NULL);
|
|
|
|
|
|
@@ -280,11 +282,12 @@ int ceph_release(struct inode *inode, struct file *file)
|
|
static int striped_read(struct inode *inode,
|
|
static int striped_read(struct inode *inode,
|
|
u64 off, u64 len,
|
|
u64 off, u64 len,
|
|
struct page **pages, int num_pages,
|
|
struct page **pages, int num_pages,
|
|
- int *checkeof)
|
|
|
|
|
|
+ int *checkeof, bool align_to_pages)
|
|
{
|
|
{
|
|
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
|
|
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
|
|
struct ceph_inode_info *ci = ceph_inode(inode);
|
|
struct ceph_inode_info *ci = ceph_inode(inode);
|
|
u64 pos, this_len;
|
|
u64 pos, this_len;
|
|
|
|
+ int io_align, page_align;
|
|
int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
|
|
int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
|
|
int left, pages_left;
|
|
int left, pages_left;
|
|
int read;
|
|
int read;
|
|
@@ -300,14 +303,19 @@ static int striped_read(struct inode *inode,
|
|
page_pos = pages;
|
|
page_pos = pages;
|
|
pages_left = num_pages;
|
|
pages_left = num_pages;
|
|
read = 0;
|
|
read = 0;
|
|
|
|
+ io_align = off & ~PAGE_MASK;
|
|
|
|
|
|
more:
|
|
more:
|
|
|
|
+ if (align_to_pages)
|
|
|
|
+ page_align = (pos - io_align) & ~PAGE_MASK;
|
|
|
|
+ else
|
|
|
|
+ page_align = pos & ~PAGE_MASK;
|
|
this_len = left;
|
|
this_len = left;
|
|
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
|
|
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
|
|
&ci->i_layout, pos, &this_len,
|
|
&ci->i_layout, pos, &this_len,
|
|
ci->i_truncate_seq,
|
|
ci->i_truncate_seq,
|
|
ci->i_truncate_size,
|
|
ci->i_truncate_size,
|
|
- page_pos, pages_left);
|
|
|
|
|
|
+ page_pos, pages_left, page_align);
|
|
hit_stripe = this_len < left;
|
|
hit_stripe = this_len < left;
|
|
was_short = ret >= 0 && ret < this_len;
|
|
was_short = ret >= 0 && ret < this_len;
|
|
if (ret == -ENOENT)
|
|
if (ret == -ENOENT)
|
|
@@ -374,26 +382,25 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
|
|
dout("sync_read on file %p %llu~%u %s\n", file, off, len,
|
|
dout("sync_read on file %p %llu~%u %s\n", file, off, len,
|
|
(file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
|
|
(file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
|
|
|
|
|
|
- if (file->f_flags & O_DIRECT) {
|
|
|
|
- pages = ceph_get_direct_page_vector(data, num_pages, off, len);
|
|
|
|
-
|
|
|
|
- /*
|
|
|
|
- * flush any page cache pages in this range. this
|
|
|
|
- * will make concurrent normal and O_DIRECT io slow,
|
|
|
|
- * but it will at least behave sensibly when they are
|
|
|
|
- * in sequence.
|
|
|
|
- */
|
|
|
|
- } else {
|
|
|
|
|
|
+ if (file->f_flags & O_DIRECT)
|
|
|
|
+ pages = ceph_get_direct_page_vector(data, num_pages);
|
|
|
|
+ else
|
|
pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
|
|
pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
|
|
- }
|
|
|
|
if (IS_ERR(pages))
|
|
if (IS_ERR(pages))
|
|
return PTR_ERR(pages);
|
|
return PTR_ERR(pages);
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * flush any page cache pages in this range. this
|
|
|
|
+ * will make concurrent normal and sync io slow,
|
|
|
|
+ * but it will at least behave sensibly when they are
|
|
|
|
+ * in sequence.
|
|
|
|
+ */
|
|
ret = filemap_write_and_wait(inode->i_mapping);
|
|
ret = filemap_write_and_wait(inode->i_mapping);
|
|
if (ret < 0)
|
|
if (ret < 0)
|
|
goto done;
|
|
goto done;
|
|
|
|
|
|
- ret = striped_read(inode, off, len, pages, num_pages, checkeof);
|
|
|
|
|
|
+ ret = striped_read(inode, off, len, pages, num_pages, checkeof,
|
|
|
|
+ file->f_flags & O_DIRECT);
|
|
|
|
|
|
if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
|
|
if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
|
|
ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
|
|
ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
|
|
@@ -448,6 +455,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
|
|
int flags;
|
|
int flags;
|
|
int do_sync = 0;
|
|
int do_sync = 0;
|
|
int check_caps = 0;
|
|
int check_caps = 0;
|
|
|
|
+ int page_align, io_align;
|
|
int ret;
|
|
int ret;
|
|
struct timespec mtime = CURRENT_TIME;
|
|
struct timespec mtime = CURRENT_TIME;
|
|
|
|
|
|
@@ -462,6 +470,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
|
|
else
|
|
else
|
|
pos = *offset;
|
|
pos = *offset;
|
|
|
|
|
|
|
|
+ io_align = pos & ~PAGE_MASK;
|
|
|
|
+
|
|
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
|
|
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
|
|
if (ret < 0)
|
|
if (ret < 0)
|
|
return ret;
|
|
return ret;
|
|
@@ -486,20 +496,26 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
|
|
*/
|
|
*/
|
|
more:
|
|
more:
|
|
len = left;
|
|
len = left;
|
|
|
|
+ if (file->f_flags & O_DIRECT)
|
|
|
|
+ /* write from beginning of first page, regardless of
|
|
|
|
+ io alignment */
|
|
|
|
+ page_align = (pos - io_align) & ~PAGE_MASK;
|
|
|
|
+ else
|
|
|
|
+ page_align = pos & ~PAGE_MASK;
|
|
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
|
|
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
|
|
ceph_vino(inode), pos, &len,
|
|
ceph_vino(inode), pos, &len,
|
|
CEPH_OSD_OP_WRITE, flags,
|
|
CEPH_OSD_OP_WRITE, flags,
|
|
ci->i_snap_realm->cached_context,
|
|
ci->i_snap_realm->cached_context,
|
|
do_sync,
|
|
do_sync,
|
|
ci->i_truncate_seq, ci->i_truncate_size,
|
|
ci->i_truncate_seq, ci->i_truncate_size,
|
|
- &mtime, false, 2);
|
|
|
|
|
|
+ &mtime, false, 2, page_align);
|
|
if (!req)
|
|
if (!req)
|
|
return -ENOMEM;
|
|
return -ENOMEM;
|
|
|
|
|
|
num_pages = calc_pages_for(pos, len);
|
|
num_pages = calc_pages_for(pos, len);
|
|
|
|
|
|
if (file->f_flags & O_DIRECT) {
|
|
if (file->f_flags & O_DIRECT) {
|
|
- pages = ceph_get_direct_page_vector(data, num_pages, pos, len);
|
|
|
|
|
|
+ pages = ceph_get_direct_page_vector(data, num_pages);
|
|
if (IS_ERR(pages)) {
|
|
if (IS_ERR(pages)) {
|
|
ret = PTR_ERR(pages);
|
|
ret = PTR_ERR(pages);
|
|
goto out;
|
|
goto out;
|