|
@@ -787,6 +787,11 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
|
|
|
if (!zero_to)
|
|
|
zero_to = PAGE_CACHE_SIZE;
|
|
|
|
|
|
+ mlog(0,
|
|
|
+ "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
|
|
|
+ (unsigned long long)abs_from, (unsigned long long)abs_to,
|
|
|
+ index, zero_from, zero_to);
|
|
|
+
|
|
|
/* We know that zero_from is block aligned */
|
|
|
for (block_start = zero_from; block_start < zero_to;
|
|
|
block_start = block_end) {
|
|
@@ -833,25 +838,114 @@ out:
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-static int ocfs2_zero_extend(struct inode *inode,
|
|
|
- u64 zero_to_size)
|
|
|
+/*
|
|
|
+ * Find the next range to zero. We do this in terms of bytes because
|
|
|
+ * that's what ocfs2_zero_extend() wants, and it is dealing with the
|
|
|
+ * pagecache. We may return multiple extents.
|
|
|
+ *
|
|
|
+ * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
|
|
|
+ * needs to be zeroed. range_start and range_end return the next zeroing
|
|
|
+ * range. A subsequent call should pass the previous range_end as its
|
|
|
+ * zero_start. If range_end is 0, there's nothing to do.
|
|
|
+ *
|
|
|
+ * Unwritten extents are skipped over. Refcounted extents are CoWd.
|
|
|
+ */
|
|
|
+static int ocfs2_zero_extend_get_range(struct inode *inode,
|
|
|
+ struct buffer_head *di_bh,
|
|
|
+ u64 zero_start, u64 zero_end,
|
|
|
+ u64 *range_start, u64 *range_end)
|
|
|
{
|
|
|
- int ret = 0;
|
|
|
- u64 start_off, next_off;
|
|
|
- struct super_block *sb = inode->i_sb;
|
|
|
+ int rc = 0, needs_cow = 0;
|
|
|
+ u32 p_cpos, zero_clusters = 0;
|
|
|
+ u32 zero_cpos =
|
|
|
+ zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
|
|
|
+ u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
|
|
|
+ unsigned int num_clusters = 0;
|
|
|
+ unsigned int ext_flags = 0;
|
|
|
|
|
|
- start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
|
|
|
- while (start_off < zero_to_size) {
|
|
|
- next_off = (start_off & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
|
|
|
- if (next_off > zero_to_size)
|
|
|
- next_off = zero_to_size;
|
|
|
- ret = ocfs2_write_zero_page(inode, start_off, next_off);
|
|
|
- if (ret < 0) {
|
|
|
- mlog_errno(ret);
|
|
|
+ while (zero_cpos < last_cpos) {
|
|
|
+ rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
|
|
|
+ &num_clusters, &ext_flags);
|
|
|
+ if (rc) {
|
|
|
+ mlog_errno(rc);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
|
|
|
+ zero_clusters = num_clusters;
|
|
|
+ if (ext_flags & OCFS2_EXT_REFCOUNTED)
|
|
|
+ needs_cow = 1;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ zero_cpos += num_clusters;
|
|
|
+ }
|
|
|
+ if (!zero_clusters) {
|
|
|
+ *range_end = 0;
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ while ((zero_cpos + zero_clusters) < last_cpos) {
|
|
|
+ rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
|
|
|
+ &p_cpos, &num_clusters,
|
|
|
+ &ext_flags);
|
|
|
+ if (rc) {
|
|
|
+ mlog_errno(rc);
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
|
|
|
+ break;
|
|
|
+ if (ext_flags & OCFS2_EXT_REFCOUNTED)
|
|
|
+ needs_cow = 1;
|
|
|
+ zero_clusters += num_clusters;
|
|
|
+ }
|
|
|
+ if ((zero_cpos + zero_clusters) > last_cpos)
|
|
|
+ zero_clusters = last_cpos - zero_cpos;
|
|
|
+
|
|
|
+ if (needs_cow) {
|
|
|
+ rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
|
|
|
+ UINT_MAX);
|
|
|
+ if (rc) {
|
|
|
+ mlog_errno(rc);
|
|
|
goto out;
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- start_off = next_off;
|
|
|
+ *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
|
|
|
+ *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
|
|
|
+ zero_cpos + zero_clusters);
|
|
|
+
|
|
|
+out:
|
|
|
+ return rc;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Zero one range returned from ocfs2_zero_extend_get_range(). The caller
|
|
|
+ * has made sure that the entire range needs zeroing.
|
|
|
+ */
|
|
|
+static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
|
|
|
+ u64 range_end)
|
|
|
+{
|
|
|
+ int rc = 0;
|
|
|
+ u64 next_pos;
|
|
|
+ u64 zero_pos = range_start;
|
|
|
+
|
|
|
+ mlog(0, "range_start = %llu, range_end = %llu\n",
|
|
|
+ (unsigned long long)range_start,
|
|
|
+ (unsigned long long)range_end);
|
|
|
+ BUG_ON(range_start >= range_end);
|
|
|
+
|
|
|
+ while (zero_pos < range_end) {
|
|
|
+ next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
|
|
|
+ if (next_pos > range_end)
|
|
|
+ next_pos = range_end;
|
|
|
+ rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
|
|
|
+ if (rc < 0) {
|
|
|
+ mlog_errno(rc);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ zero_pos = next_pos;
|
|
|
|
|
|
/*
|
|
|
* Very large extends have the potential to lock up
|
|
@@ -860,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
|
|
|
cond_resched();
|
|
|
}
|
|
|
|
|
|
-out:
|
|
|
+ return rc;
|
|
|
+}
|
|
|
+
|
|
|
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
|
|
|
+ loff_t zero_to_size)
|
|
|
+{
|
|
|
+ int ret = 0;
|
|
|
+ u64 zero_start, range_start = 0, range_end = 0;
|
|
|
+ struct super_block *sb = inode->i_sb;
|
|
|
+
|
|
|
+ zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
|
|
|
+ mlog(0, "zero_start %llu for i_size %llu\n",
|
|
|
+ (unsigned long long)zero_start,
|
|
|
+ (unsigned long long)i_size_read(inode));
|
|
|
+ while (zero_start < zero_to_size) {
|
|
|
+ ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
|
|
|
+ zero_to_size,
|
|
|
+ &range_start,
|
|
|
+ &range_end);
|
|
|
+ if (ret) {
|
|
|
+ mlog_errno(ret);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (!range_end)
|
|
|
+ break;
|
|
|
+ /* Trim the ends */
|
|
|
+ if (range_start < zero_start)
|
|
|
+ range_start = zero_start;
|
|
|
+ if (range_end > zero_to_size)
|
|
|
+ range_end = zero_to_size;
|
|
|
+
|
|
|
+ ret = ocfs2_zero_extend_range(inode, range_start,
|
|
|
+ range_end);
|
|
|
+ if (ret) {
|
|
|
+ mlog_errno(ret);
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ zero_start = range_end;
|
|
|
+ }
|
|
|
+
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
|
|
|
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
|
|
|
+ u64 new_i_size, u64 zero_to)
|
|
|
{
|
|
|
int ret;
|
|
|
u32 clusters_to_add;
|
|
|
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
|
|
|
|
|
+ /*
|
|
|
+ * Only quota files call this without a bh, and they can't be
|
|
|
+ * refcounted.
|
|
|
+ */
|
|
|
+ BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
|
|
|
+ BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
|
|
|
+
|
|
|
clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
|
|
|
if (clusters_to_add < oi->ip_clusters)
|
|
|
clusters_to_add = 0;
|
|
@@ -890,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
|
|
|
* still need to zero the area between the old i_size and the
|
|
|
* new i_size.
|
|
|
*/
|
|
|
- ret = ocfs2_zero_extend(inode, zero_to);
|
|
|
+ ret = ocfs2_zero_extend(inode, di_bh, zero_to);
|
|
|
if (ret < 0)
|
|
|
mlog_errno(ret);
|
|
|
|
|
@@ -912,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
|
|
|
goto out;
|
|
|
|
|
|
if (i_size_read(inode) == new_i_size)
|
|
|
- goto out;
|
|
|
+ goto out;
|
|
|
BUG_ON(new_i_size < i_size_read(inode));
|
|
|
|
|
|
- /*
|
|
|
- * Fall through for converting inline data, even if the fs
|
|
|
- * supports sparse files.
|
|
|
- *
|
|
|
- * The check for inline data here is legal - nobody can add
|
|
|
- * the feature since we have i_mutex. We must check it again
|
|
|
- * after acquiring ip_alloc_sem though, as paths like mmap
|
|
|
- * might have raced us to converting the inode to extents.
|
|
|
- */
|
|
|
- if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
|
|
|
- && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
|
|
|
- goto out_update_size;
|
|
|
-
|
|
|
/*
|
|
|
* The alloc sem blocks people in read/write from reading our
|
|
|
* allocation until we're done changing it. We depend on
|
|
|
* i_mutex to block other extend/truncate calls while we're
|
|
|
- * here.
|
|
|
+ * here. We even have to hold it for sparse files because there
|
|
|
+ * might be some tail zeroing.
|
|
|
*/
|
|
|
down_write(&oi->ip_alloc_sem);
|
|
|
|
|
@@ -949,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
|
|
|
ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
|
|
|
if (ret) {
|
|
|
up_write(&oi->ip_alloc_sem);
|
|
|
-
|
|
|
mlog_errno(ret);
|
|
|
goto out;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
|
|
|
- ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
|
|
|
+ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
|
|
|
+ ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
|
|
|
+ else
|
|
|
+ ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
|
|
|
+ new_i_size);
|
|
|
|
|
|
up_write(&oi->ip_alloc_sem);
|
|
|
|