├── .gitignore
├── add-ext4-journal-lazy-mount-option
├── add-indirection-to-metadata-block-read-paths
├── add-journal-no-cleanup-option
├── add-support-for-log-metadata-block-tracking-in-log
├── archive
    ├── add-WARN_ON-with-unmapped-dirty-bh-in-writepage
    ├── auto-enable-journal_async_commit
    ├── avoid-unnecessarily-writing-back-dirty-pages-before-hole-punching
    ├── bio-debug
    ├── introduce-new-i_write_mutex
    └── jbd2-dont-write-non-commit-blocks-synchronously
├── cleaner
├── disable-writeback
├── jbd2-dont-double-bump-transaction-number
├── journal-superblock-changes
├── load-jmap-from-journal
├── old-patches
    ├── add-blkdiscard-ioctl
    ├── add-encryption-debug-files
    ├── add-fallocate-mode-blocking-for-debugging
    ├── add-squelch-errors-support
    ├── add-sysfs-bool-support
    ├── akpm-jbd2-locking-fix
    ├── block-dio-during-truncate
    ├── commit-as-soon-as-possible-after-log_start_commit
    ├── crypto-add-ciphertext_access-mount-option
    ├── crypto-add-ioctls-to-backup-crypto-metadata
    ├── crypto-rename-ext4_get_encryption_info
    ├── delalloc-debug
    ├── dont-use-io-end-if-not-needed
    ├── dump-in-use-buffers
    ├── include-mpage-functions-into-readpage.c
    ├── inline-ext4_get_block-into-readpage
    ├── move-read-page-functions-to-new-file
    ├── only-call-ext4_truncate-if-there-is-data-to-truncate
    ├── series
    └── use-discard-if-possible-in-blkdev_issue_zeroout
├── series
├── stable-boundary
├── stable-boundary-undo.patch
└── timestamps


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | status
3 | 
4 | 


--------------------------------------------------------------------------------
/add-ext4-journal-lazy-mount-option:
--------------------------------------------------------------------------------
  1 | ext4: add journal_lazy mount option
  2 | 
  3 | This option turns out the lazy journalling option, as described in the
  4 | FAST 2017 paper, "Evolving Ext4 for Shingled Disks"[1].
  5 | 
  6 | [1] https://www.usenix.org/conference/fast17/technical-sessions/presentation/aghayev
  7 | 
  8 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
  9 | ---
 10 |  fs/ext4/ext4.h  |  1 +
 11 |  fs/ext4/inode.c |  2 +-
 12 |  fs/ext4/ioctl.c | 42 ++++++++++++++++++++++++++++++++----------
 13 |  fs/ext4/super.c | 56 ++++++++++++++++++++++++++++++++++++++++++++------------
 14 |  4 files changed, 78 insertions(+), 23 deletions(-)
 15 | 
 16 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
 17 | index fc2bdaa71c44..e19b6bac2d91 100644
 18 | --- a/fs/ext4/ext4.h
 19 | +++ b/fs/ext4/ext4.h
 20 | @@ -1079,6 +1079,7 @@ struct ext4_inode_info {
 21 |   * Mount flags set via mount options or defaults
 22 |   */
 23 |  #define EXT4_MOUNT_NO_MBCACHE		0x00001 /* Do not use mbcache */
 24 | +#define EXT4_MOUNT_JOURNAL_LAZY		0x00002 /* Do lazy writeback of journalled metadata */
 25 |  #define EXT4_MOUNT_GRPID		0x00004	/* Create files with directory's group */
 26 |  #define EXT4_MOUNT_DEBUG		0x00008	/* Some debugging messages */
 27 |  #define EXT4_MOUNT_ERRORS_CONT		0x00010	/* Continue on errors */
 28 | diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
 29 | index 3969d0278469..d43c326f4048 100644
 30 | --- a/fs/ext4/inode.c
 31 | +++ b/fs/ext4/inode.c
 32 | @@ -3287,7 +3287,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
 33 |  		filemap_write_and_wait(mapping);
 34 |  	}
 35 |  
 36 | -	if (EXT4_JOURNAL(inode) &&
 37 | +	if (EXT4_JOURNAL(inode) && !test_opt(inode->i_sb, JOURNAL_LAZY) &&
 38 |  	    ext4_test_inode_state(inode, EXT4_STATE_JDATA)) {
 39 |  		/*
 40 |  		 * This is a REALLY heavyweight approach, but the use of
 41 | diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
 42 | index a7074115d6f6..8556d6003d15 100644
 43 | --- a/fs/ext4/ioctl.c
 44 | +++ b/fs/ext4/ioctl.c
 45 | @@ -239,6 +239,20 @@ static int ext4_ioctl_setflags(struct inode *inode,
 46 |  		if (!capable(CAP_SYS_RESOURCE))
 47 |  			goto flags_out;
 48 |  	}
 49 | +
 50 | +	/*
 51 | +	 * Clearing the JOURNAL_DATA flag is *hard* with lazy
 52 | +	 * journalling.  We can't use jbd2_journal_flush(); instead,
 53 | +	 * we would have to make sure all blocks belonging to the file
 54 | +	 * are evacuated from the journal and saved to their final
 55 | +	 * location on disk.  Punt for now.
 56 | +	 */
 57 | +	if ((oldflags & EXT4_JOURNAL_DATA_FL) && !jflag &&
 58 | +	    test_opt(inode->i_sb, JOURNAL_LAZY)) {
 59 | +		err = -EOPNOTSUPP;
 60 | +		goto flags_out;
 61 | +	}
 62 | +
 63 |  	if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
 64 |  		migrate = 1;
 65 |  
 66 | @@ -626,6 +640,22 @@ static long ext4_ioctl_group_add(struct file *file,
 67 |  	return err;
 68 |  }
 69 |  
 70 | +/*
 71 | + * If we are using journalling (excepting JBD2 lazy mode), make sure
 72 | + * the block group descriptors are written out immediately
 73 | + */
 74 | +static int flush_fs_group_descriptors(struct super_block *sb)
 75 | +{
 76 | +	int err = 0;
 77 | +
 78 | +	if (EXT4_SB(sb)->s_journal && !test_opt(sb, JOURNAL_LAZY)) {
 79 | +		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
 80 | +		err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
 81 | +		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 82 | +	}
 83 | +	return err;
 84 | +}
 85 | +
 86 |  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 87 |  {
 88 |  	struct inode *inode = file_inode(filp);
 89 | @@ -744,11 +774,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 90 |  			goto group_extend_out;
 91 |  
 92 |  		err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
 93 | -		if (EXT4_SB(sb)->s_journal) {
 94 | -			jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
 95 | -			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
 96 | -			jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
 97 | -		}
 98 | +		err2 = flush_fs_group_descriptors(sb);
 99 |  		if (err == 0)
100 |  			err = err2;
101 |  		mnt_drop_write_file(filp);
102 | @@ -886,11 +912,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
103 |  			goto resizefs_out;
104 |  
105 |  		err = ext4_resize_fs(sb, n_blocks_count);
106 | -		if (EXT4_SB(sb)->s_journal) {
107 | -			jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
108 | -			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
109 | -			jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
110 | -		}
111 | +		err2 = flush_fs_group_descriptors(sb);
112 |  		if (err == 0)
113 |  			err = err2;
114 |  		mnt_drop_write_file(filp);
115 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c
116 | index 9339717b85c8..406e4d4ffae0 100644
117 | --- a/fs/ext4/super.c
118 | +++ b/fs/ext4/super.c
119 | @@ -928,7 +928,8 @@ static void ext4_put_super(struct super_block *sb)
120 |  	ext4_mb_release(sb);
121 |  	ext4_ext_release(sb);
122 |  
123 | -	if (!sb_rdonly(sb) && !aborted && !test_opt(sb, JOURNAL_NOCLEANUP)) {
124 | +	if (!sb_rdonly(sb) && !aborted && !test_opt(sb, JOURNAL_NOCLEANUP) &&
125 | +	    !test_opt(sb, JOURNAL_LAZY)) {
126 |  		ext4_clear_feature_journal_needs_recovery(sb);
127 |  		es->s_state = cpu_to_le16(sbi->s_mount_state);
128 |  	}
129 | @@ -1384,6 +1385,7 @@ enum {
130 |  	Opt_inode_readahead_blks, Opt_journal_ioprio,
131 |  	Opt_dioread_nolock, Opt_dioread_lock,
132 |  	Opt_journal_nocleanup, Opt_journal_cleanup,
133 | +	Opt_journal_nolazy, Opt_journal_lazy,
134 |  	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
135 |  	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
136 |  };
137 | @@ -1474,6 +1476,8 @@ static const match_table_t tokens = {
138 |  	{Opt_nombcache, "no_mbcache"},	/* for backward compatibility */
139 |  	{Opt_journal_nocleanup, "journal_nocleanup"},
140 |  	{Opt_journal_cleanup, "journal_cleanup"},
141 | +	{Opt_journal_lazy, "journal_lazy"},
142 | +	{Opt_journal_nolazy, "journal_nolazy"},
143 |  	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
144 |  	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
145 |  	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
146 | @@ -1686,6 +1690,8 @@ static const struct mount_opts {
147 |  	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
148 |  	{Opt_journal_nocleanup, EXT4_MOUNT_JOURNAL_NOCLEANUP, MOPT_SET},
149 |  	{Opt_journal_cleanup, EXT4_MOUNT_JOURNAL_NOCLEANUP, MOPT_CLEAR},
150 | +	{Opt_journal_lazy, EXT4_MOUNT_JOURNAL_LAZY, MOPT_SET},
151 | +	{Opt_journal_nolazy, EXT4_MOUNT_JOURNAL_LAZY, MOPT_CLEAR},
152 |  	{Opt_err, 0, 0}
153 |  };
154 |  
155 | @@ -4570,6 +4576,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
156 |  		journal->j_flags |= JBD2_NO_CLEANUP;
157 |  	else
158 |  		journal->j_flags &= ~JBD2_NO_CLEANUP;
159 | +	if (test_opt(sb, JOURNAL_LAZY))
160 | +		journal->j_flags |= JBD2_LAZY;
161 | +	else
162 | +		journal->j_flags &= ~JBD2_LAZY;
163 |  	write_unlock(&journal->j_state_lock);
164 |  }
165 |  
166 | @@ -4804,6 +4814,24 @@ static int ext4_load_journal(struct super_block *sb,
167 |  
168 |  	EXT4_SB(sb)->s_journal = journal;
169 |  	ext4_clear_journal_err(sb, es);
170 | +
171 | +	if (test_opt(sb, JOURNAL_LAZY)) {
172 | +		struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
173 | +
174 | +		/* Read the latest version of the superblock from the journal */
175 | +		lock_buffer(sbh);
176 | +		clear_buffer_uptodate(sbh);
177 | +		err = jbd2_bh_submit_read(journal, sbh, __func__);
178 | +		if (err) {
179 | +			ext4_msg(sb, KERN_ERR, "error rereading superblock %d",
180 | +				err);
181 | +			set_buffer_uptodate(sbh);
182 | +		}
183 | +		if (!ext4_superblock_csum_verify(sb, es))
184 | +			ext4_msg(sb, KERN_ERR,
185 | +				 "superblock csum doesn't verify"
186 | +				 "after journal replay!");
187 | +	}
188 |  	return 0;
189 |  }
190 |  
191 | @@ -4894,6 +4922,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
192 |  {
193 |  	journal_t *journal = EXT4_SB(sb)->s_journal;
194 |  
195 | +	if (test_opt(sb, JOURNAL_LAZY))
196 | +		return;
197 | +
198 |  	if (!ext4_has_feature_journal(sb)) {
199 |  		BUG_ON(journal != NULL);
200 |  		return;
201 | @@ -5029,21 +5060,20 @@ static int ext4_freeze(struct super_block *sb)
202 |  	journal = EXT4_SB(sb)->s_journal;
203 |  
204 |  	if (journal) {
205 | -		/* Now we set up the journal barrier. */
206 | -		jbd2_journal_lock_updates(journal);
207 | -
208 |  		/*
209 | -		 * Don't clear the needs_recovery flag if we failed to
210 | -		 * flush the journal.
211 | +		 * Set the journal barrier, then flush the journal and
212 | +		 * clear the needs_recovery flag if we are not in
213 | +		 * JBD2_LAZY mode.
214 |  		 */
215 | -		error = jbd2_journal_flush(journal);
216 | -		if (error < 0)
217 | -			goto out;
218 | +		jbd2_journal_lock_updates(journal);
219 |  
220 | -		/* Journal blocked and flushed, clear needs_recovery flag. */
221 | +		if (!test_opt(sb, JOURNAL_LAZY)) {
222 | +			error = jbd2_journal_flush(journal);
223 | +			if (error < 0)
224 | +				goto out;
225 | +		}
226 |  		ext4_clear_feature_journal_needs_recovery(sb);
227 |  	}
228 | -
229 |  	error = ext4_commit_super(sb, 1);
230 |  out:
231 |  	if (journal)
232 | @@ -5061,7 +5091,7 @@ static int ext4_unfreeze(struct super_block *sb)
233 |  	if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
234 |  		return 0;
235 |  
236 | -	if (EXT4_SB(sb)->s_journal) {
237 | +	if (EXT4_SB(sb)->s_journal && !test_opt(sb, JOURNAL_LAZY)) {
238 |  		/* Reset the needs_recovery flag before the fs is unlocked. */
239 |  		ext4_set_feature_journal_needs_recovery(sb);
240 |  	}
241 | @@ -5595,6 +5625,8 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
242 |  		 * We don't need to lock updates but journal_flush() could
243 |  		 * otherwise be livelocked...
244 |  		 */
245 | +		if (test_opt(sb, JOURNAL_LAZY))
246 | +			return -EOPNOTSUPP;
247 |  		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
248 |  		err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
249 |  		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
250 | 


--------------------------------------------------------------------------------
/add-indirection-to-metadata-block-read-paths:
--------------------------------------------------------------------------------
  1 | Add indirection to metadata read paths
  2 | 
  3 | From: Abutalib Aghayev <agayev@cs.cmu.edu>
  4 | 
  5 | Change all metadata block reads to use jmap-aware function that first looks
  6 | up the metadata block in the jmap.  If lookup is successful, the function
  7 | reads the corresponding log block from the journal and copies it to the
  8 | metadata block buffer head.  Otherwise, it reads the metadata block from
  9 | the file system, just like standard jmap-unaware function.
 10 | 
 11 | Signed-off-by: Abutalib Aghayev <agayev@cs.cmu.edu>
 12 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
 13 | ---
 14 |  fs/ext4/extents.c     |  3 ++-
 15 |  fs/ext4/ialloc.c      |  5 ++++-
 16 |  fs/ext4/indirect.c    |  3 ++-
 17 |  fs/ext4/inode.c       | 20 ++++++++++++++------
 18 |  fs/ext4/move_extent.c |  3 ++-
 19 |  fs/ext4/resize.c      |  4 +++-
 20 |  6 files changed, 27 insertions(+), 11 deletions(-)
 21 | 
 22 | diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
 23 | index 97f0fd06728d..47914c6a2556 100644
 24 | --- a/fs/ext4/extents.c
 25 | +++ b/fs/ext4/extents.c
 26 | @@ -517,6 +517,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
 27 |  {
 28 |  	struct buffer_head		*bh;
 29 |  	int				err;
 30 | +	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
 31 |  
 32 |  	bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS);
 33 |  	if (unlikely(!bh))
 34 | @@ -524,7 +525,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
 35 |  
 36 |  	if (!bh_uptodate_or_lock(bh)) {
 37 |  		trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
 38 | -		err = bh_submit_read(bh);
 39 | +		err = jbd2_bh_submit_read(journal, bh, __func__);
 40 |  		if (err < 0)
 41 |  			goto errout;
 42 |  	}
 43 | diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
 44 | index 507bfb3344d4..1c3447629e76 100644
 45 | --- a/fs/ext4/ialloc.c
 46 | +++ b/fs/ext4/ialloc.c
 47 | @@ -14,6 +14,7 @@
 48 |  
 49 |  #include <linux/time.h>
 50 |  #include <linux/fs.h>
 51 | +#include <linux/jbd2.h>
 52 |  #include <linux/stat.h>
 53 |  #include <linux/string.h>
 54 |  #include <linux/quotaops.h>
 55 | @@ -162,6 +163,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 56 |  	struct buffer_head *bh = NULL;
 57 |  	ext4_fsblk_t bitmap_blk;
 58 |  	int err;
 59 | +	journal_t *journal = EXT4_SB(sb)->s_journal;
 60 |  
 61 |  	desc = ext4_get_group_desc(sb, block_group, NULL);
 62 |  	if (!desc)
 63 | @@ -216,7 +218,8 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 64 |  	trace_ext4_load_inode_bitmap(sb, block_group);
 65 |  	bh->b_end_io = ext4_end_bitmap_read;
 66 |  	get_bh(bh);
 67 | -	submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
 68 | +	jbd2_submit_bh(journal, REQ_OP_READ, REQ_META | REQ_PRIO, bh, __func__);
 69 | +
 70 |  	wait_on_buffer(bh);
 71 |  	if (!buffer_uptodate(bh)) {
 72 |  		put_bh(bh);
 73 | diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
 74 | index 7ffa290cbb8e..06a79f5e563e 100644
 75 | --- a/fs/ext4/indirect.c
 76 | +++ b/fs/ext4/indirect.c
 77 | @@ -145,6 +145,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
 78 |  				 Indirect chain[4], int *err)
 79 |  {
 80 |  	struct super_block *sb = inode->i_sb;
 81 | +	journal_t *journal = EXT4_SB(sb)->s_journal;
 82 |  	Indirect *p = chain;
 83 |  	struct buffer_head *bh;
 84 |  	int ret = -EIO;
 85 | @@ -162,7 +163,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
 86 |  		}
 87 |  
 88 |  		if (!bh_uptodate_or_lock(bh)) {
 89 | -			if (bh_submit_read(bh) < 0) {
 90 | +			if (jbd2_bh_submit_read(journal, bh, __func__) < 0) {
 91 |  				put_bh(bh);
 92 |  				goto failure;
 93 |  			}
 94 | diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
 95 | index c774bdc22759..a56e717b39be 100644
 96 | --- a/fs/ext4/inode.c
 97 | +++ b/fs/ext4/inode.c
 98 | @@ -1001,13 +1001,15 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
 99 |  			       ext4_lblk_t block, int map_flags)
100 |  {
101 |  	struct buffer_head *bh;
102 | +	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
103 |  
104 |  	bh = ext4_getblk(handle, inode, block, map_flags);
105 |  	if (IS_ERR(bh))
106 |  		return bh;
107 |  	if (!bh || buffer_uptodate(bh))
108 |  		return bh;
109 | -	ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh);
110 | +	jbd2_ll_rw_block(journal, REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh,
111 | +			 __func__);
112 |  	wait_on_buffer(bh);
113 |  	if (buffer_uptodate(bh))
114 |  		return bh;
115 | @@ -1020,6 +1022,7 @@ int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
116 |  		     bool wait, struct buffer_head **bhs)
117 |  {
118 |  	int i, err;
119 | +	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
120 |  
121 |  	for (i = 0; i < bh_count; i++) {
122 |  		bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */);
123 | @@ -1033,8 +1036,9 @@ int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
124 |  	for (i = 0; i < bh_count; i++)
125 |  		/* Note that NULL bhs[i] is valid because of holes. */
126 |  		if (bhs[i] && !buffer_uptodate(bhs[i]))
127 | -			ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1,
128 | -				    &bhs[i]);
129 | +			jbd2_ll_rw_block(journal, REQ_OP_READ,
130 | +					 REQ_META | REQ_PRIO, 1, &bhs[i],
131 | +					 __func__);
132 |  
133 |  	if (!wait)
134 |  		return 0;
135 | @@ -4448,6 +4452,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
136 |  	struct super_block	*sb = inode->i_sb;
137 |  	ext4_fsblk_t		block;
138 |  	int			inodes_per_block, inode_offset;
139 | +	journal_t		*journal = EXT4_SB(sb)->s_journal;
140 |  
141 |  	iloc->bh = NULL;
142 |  	if (!ext4_valid_inum(sb, inode->i_ino))
143 | @@ -4551,8 +4556,10 @@ static int __ext4_get_inode_loc(struct inode *inode,
144 |  			table += num / inodes_per_block;
145 |  			if (end > table)
146 |  				end = table;
147 | -			while (b <= end)
148 | -				sb_breadahead(sb, b++);
149 | +			if (journal) {
150 | +				while (b <= end)
151 | +					jbd2_sb_breadahead(journal, sb, b++);
152 | +			}
153 |  		}
154 |  
155 |  		/*
156 | @@ -4563,7 +4570,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
157 |  		trace_ext4_load_inode(inode);
158 |  		get_bh(bh);
159 |  		bh->b_end_io = end_buffer_read_sync;
160 | -		submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh);
161 | +		jbd2_submit_bh(journal, REQ_OP_READ, REQ_META | REQ_PRIO, bh,
162 | +			       __func__);
163 |  		wait_on_buffer(bh);
164 |  		if (!buffer_uptodate(bh)) {
165 |  			EXT4_ERROR_INODE_BLOCK(inode, block,
166 | diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
167 | index 9bb36909ec92..0f6c00d0df17 100644
168 | --- a/fs/ext4/move_extent.c
169 | +++ b/fs/ext4/move_extent.c
170 | @@ -177,6 +177,7 @@ static int
171 |  mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
172 |  {
173 |  	struct inode *inode = page->mapping->host;
174 | +	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
175 |  	sector_t block;
176 |  	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
177 |  	unsigned int blocksize, block_start, block_end;
178 | @@ -225,7 +226,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
179 |  	for (i = 0; i < nr; i++) {
180 |  		bh = arr[i];
181 |  		if (!bh_uptodate_or_lock(bh)) {
182 | -			err = bh_submit_read(bh);
183 | +			err = jbd2_bh_submit_read(journal, bh, __func__);
184 |  			if (err)
185 |  				return err;
186 |  		}
187 | diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
188 | index 035cd3f4785e..5c817953053b 100644
189 | --- a/fs/ext4/resize.c
190 | +++ b/fs/ext4/resize.c
191 | @@ -1193,10 +1193,12 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
192 |  static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
193 |  {
194 |  	struct buffer_head *bh = sb_getblk(sb, block);
195 | +	journal_t *journal = EXT4_SB(sb)->s_journal;
196 | +
197 |  	if (unlikely(!bh))
198 |  		return NULL;
199 |  	if (!bh_uptodate_or_lock(bh)) {
200 | -		if (bh_submit_read(bh) < 0) {
201 | +		if (jbd2_bh_submit_read(journal, bh, __func__) < 0) {
202 |  			brelse(bh);
203 |  			return NULL;
204 |  		}
205 | 


--------------------------------------------------------------------------------
/add-journal-no-cleanup-option:
--------------------------------------------------------------------------------
  1 | ext4, jbd2: add the journal_nocleanup mount option
  2 | 
  3 | This debugging option is useful for generating test cases for the
  4 | journal replay code.
  5 | 
  6 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
  7 | ---
  8 |  fs/ext4/ext4.h       |  1 +
  9 |  fs/ext4/super.c      | 11 ++++++++++-
 10 |  fs/jbd2/journal.c    | 12 +++++++++---
 11 |  include/linux/jbd2.h |  1 +
 12 |  4 files changed, 21 insertions(+), 4 deletions(-)
 13 | 
 14 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
 15 | index 7c7123f265c2..fc2bdaa71c44 100644
 16 | --- a/fs/ext4/ext4.h
 17 | +++ b/fs/ext4/ext4.h
 18 | @@ -1115,6 +1115,7 @@ struct ext4_inode_info {
 19 |  #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
 20 |  #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
 21 |  #define EXT4_MOUNT_WARN_ON_ERROR	0x2000000 /* Trigger WARN_ON on error */
 22 | +#define EXT4_MOUNT_JOURNAL_NOCLEANUP	0x4000000 /* Preserve the journal on unmount */
 23 |  #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
 24 |  #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
 25 |  #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
 26 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c
 27 | index ae86983cbf60..9339717b85c8 100644
 28 | --- a/fs/ext4/super.c
 29 | +++ b/fs/ext4/super.c
 30 | @@ -928,7 +928,7 @@ static void ext4_put_super(struct super_block *sb)
 31 |  	ext4_mb_release(sb);
 32 |  	ext4_ext_release(sb);
 33 |  
 34 | -	if (!sb_rdonly(sb) && !aborted) {
 35 | +	if (!sb_rdonly(sb) && !aborted && !test_opt(sb, JOURNAL_NOCLEANUP)) {
 36 |  		ext4_clear_feature_journal_needs_recovery(sb);
 37 |  		es->s_state = cpu_to_le16(sbi->s_mount_state);
 38 |  	}
 39 | @@ -1383,6 +1383,7 @@ enum {
 40 |  	Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
 41 |  	Opt_inode_readahead_blks, Opt_journal_ioprio,
 42 |  	Opt_dioread_nolock, Opt_dioread_lock,
 43 | +	Opt_journal_nocleanup, Opt_journal_cleanup,
 44 |  	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
 45 |  	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
 46 |  };
 47 | @@ -1471,6 +1472,8 @@ static const match_table_t tokens = {
 48 |  	{Opt_test_dummy_encryption, "test_dummy_encryption"},
 49 |  	{Opt_nombcache, "nombcache"},
 50 |  	{Opt_nombcache, "no_mbcache"},	/* for backward compatibility */
 51 | +	{Opt_journal_nocleanup, "journal_nocleanup"},
 52 | +	{Opt_journal_cleanup, "journal_cleanup"},
 53 |  	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
 54 |  	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
 55 |  	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
 56 | @@ -1681,6 +1684,8 @@ static const struct mount_opts {
 57 |  	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
 58 |  	{Opt_test_dummy_encryption, 0, MOPT_GTE0},
 59 |  	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
 60 | +	{Opt_journal_nocleanup, EXT4_MOUNT_JOURNAL_NOCLEANUP, MOPT_SET},
 61 | +	{Opt_journal_cleanup, EXT4_MOUNT_JOURNAL_NOCLEANUP, MOPT_CLEAR},
 62 |  	{Opt_err, 0, 0}
 63 |  };
 64 |  
 65 | @@ -4561,6 +4566,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
 66 |  		journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
 67 |  	else
 68 |  		journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
 69 | +	if (test_opt(sb, JOURNAL_NOCLEANUP))
 70 | +		journal->j_flags |= JBD2_NO_CLEANUP;
 71 | +	else
 72 | +		journal->j_flags &= ~JBD2_NO_CLEANUP;
 73 |  	write_unlock(&journal->j_state_lock);
 74 |  }
 75 |  
 76 | diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
 77 | index 8ef6b6daaa7a..860ab3c802a4 100644
 78 | --- a/fs/jbd2/journal.c
 79 | +++ b/fs/jbd2/journal.c
 80 | @@ -1727,6 +1727,11 @@ int jbd2_journal_destroy(journal_t *journal)
 81 |  	if (journal->j_running_transaction)
 82 |  		jbd2_journal_commit_transaction(journal);
 83 |  
 84 | +	if (journal->j_flags & JBD2_NO_CLEANUP) {
 85 | +		jbd2_journal_destroy_checkpoint(journal);
 86 | +		journal->j_checkpoint_transactions = NULL;
 87 | +	}
 88 | +
 89 |  	/* Force any old transactions to disk */
 90 |  
 91 |  	/* Totally anal locking here... */
 92 | @@ -1754,7 +1759,9 @@ int jbd2_journal_destroy(journal_t *journal)
 93 |  	spin_unlock(&journal->j_list_lock);
 94 |  
 95 |  	if (journal->j_sb_buffer) {
 96 | -		if (!is_journal_aborted(journal)) {
 97 | +		if (is_journal_aborted(journal))
 98 | +			err = -EIO;
 99 | +		else if ((journal->j_flags & JBD2_NO_CLEANUP) == 0) {
100 |  			mutex_lock_io(&journal->j_checkpoint_mutex);
101 |  
102 |  			write_lock(&journal->j_state_lock);
103 | @@ -1765,8 +1772,7 @@ int jbd2_journal_destroy(journal_t *journal)
104 |  			jbd2_mark_journal_empty(journal,
105 |  					REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
106 |  			mutex_unlock(&journal->j_checkpoint_mutex);
107 | -		} else
108 | -			err = -EIO;
109 | +		}
110 |  		brelse(journal->j_sb_buffer);
111 |  	}
112 |  
113 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
114 | index b708e5169d1d..81fa9fa7ce9c 100644
115 | --- a/include/linux/jbd2.h
116 | +++ b/include/linux/jbd2.h
117 | @@ -1235,6 +1235,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3,		CSUM_V3)
118 |  						 * data write error in ordered
119 |  						 * mode */
120 |  #define JBD2_REC_ERR	0x080	/* The errno in the sb has been recorded */
121 | +#define JBD2_NO_CLEANUP	0x100	/* Don't flush empty the journal on shutdown  */
122 |  
123 |  /*
124 |   * Function declarations for the journaling transaction and buffer
125 | 


--------------------------------------------------------------------------------
/archive/add-WARN_ON-with-unmapped-dirty-bh-in-writepage:
--------------------------------------------------------------------------------
 1 | ext4: add WARN_ON on unmapped dirty buffer_heads in writepage
 2 | 
 3 | From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
 4 | 
 5 | Now that block_lock_hole_extend() clears the dirty flag of
 6 | buffer_heads outside i_size we should not find buffer_heads which are
 7 | unmapped and dirty in writepage. If we find do a WARN_ON.  We can
 8 | still continue because block_write_full page look at the mapped flag
 9 | only.
10 | 
11 | Following sequence of events would result in the above condition.
12 | 1) truncate(f, 1024)
13 | 2) mmap(f, 0, 4096)
14 | 3) a[0] = 'a'
15 | 4) truncate(f, 4096)
16 | 5) writepage(...)
17 | 
18 | After step 3 we would have unmapped buffer_heads outside i_size.
19 | After step 4 we would have unmapped buffer_heads within i_size.
20 | 
21 | Now that truncate is calling block_lock_hole_extend which in turn
22 | is clearing the dirty flag, we can safely assume that we won't
23 | find unmapped dirty buffer_heads in write page. If we did find one
24 | we should find out why.
25 | 
26 | Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
27 | Acked-by: Jan Kara <jack@suse.cz>
28 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
29 | ---
30 |  fs/ext4/inode.c |   12 ++++++++++++
31 |  1 files changed, 12 insertions(+), 0 deletions(-)
32 | 
33 | diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
34 | index 2219daa..9bba474 100644
35 | --- a/fs/ext4/inode.c
36 | +++ b/fs/ext4/inode.c
37 | @@ -2488,6 +2488,10 @@ static int __ext4_journalled_writepage(struct page *page,
38 |  	return ret;
39 |  }
40 |  
41 | +static int ext4_bh_unmapped_and_dirty(handle_t *handle, struct buffer_head *bh)
42 | +{
43 | +	return !buffer_mapped(bh) && buffer_dirty(bh);
44 | +}
45 |  
46 |  /*
47 |   * Note that we don't need to start a transaction unless we're journaling data
48 | @@ -2602,6 +2606,14 @@ static int ext4_writepage(struct page *page,
49 |  		/* now mark the buffer_heads as dirty and uptodate */
50 |  		block_commit_write(page, 0, len);
51 |  	}
52 | +	/*
53 | +	 * There should not be any unmapped and dirty
54 | +	 * buffer_heads at this point. Look at block_lock_hole_extend
55 | +	 * for more info. If we find one print more info
56 | +	 */
57 | +	 WARN(walk_page_buffers(NULL, page_bufs, 0, len, NULL,
58 | +				 ext4_bh_unmapped_and_dirty),
59 | +		 "Unmapped dirty buffer_heads found in %s\n", __func__);
60 |  
61 |  	if (PageChecked(page) && ext4_should_journal_data(inode)) {
62 |  		/*
63 | -- 
64 | 1.6.3.1.244.gf9275
65 | 
66 | --
67 | To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
68 | the body of a message to majordomo@vger.kernel.org
69 | More majordomo info at  http://vger.kernel.org/majordomo-info.html
70 | 
71 | 


--------------------------------------------------------------------------------
/archive/auto-enable-journal_async_commit:
--------------------------------------------------------------------------------
  1 | ext4: automatically enable journal_async_commit on ext4 file systems
  2 | 
  3 | Now that we have cleaned up journal_async_commit, it's safe to enable
  4 | it all the time.  But we only want to do so if ext4-specific INCOMPAT
  5 | features are enabled, since otherwise we will prevent the filesystem
  6 | from being mounted using ext3.
  7 | 
  8 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
  9 | 
 10 | ---
 11 |  Documentation/filesystems/ext4.txt |   11 ++++++++---
 12 |  fs/ext4/super.c                    |   29 ++++++++++++++++++++++++++---
 13 |  2 files changed, 34 insertions(+), 6 deletions(-)
 14 | 
 15 | diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
 16 | index 6ab9442..713f5d5 100644
 17 | --- a/Documentation/filesystems/ext4.txt
 18 | +++ b/Documentation/filesystems/ext4.txt
 19 | @@ -140,9 +140,14 @@ journal_checksum	Enable checksumming of the journal transactions.
 20 |  			compatible change and will be ignored by older kernels.
 21 |  
 22 |  journal_async_commit	Commit block can be written to disk without waiting
 23 | -			for descriptor blocks. If enabled older kernels cannot
 24 | -			mount the device. This will enable 'journal_checksum'
 25 | -			internally.
 26 | +			for descriptor blocks.  This will enable
 27 | +			'journal_checksum' internally.  This mount
 28 | +			option will be automatically enabled if
 29 | +			ext4-specific INCOMPAT features are present in
 30 | +			the file system.
 31 | +
 32 | +nojournal_async_commit	Disable the journal_async_commit option, even
 33 | +			for ext4 filesystems.
 34 |  
 35 |  journal=update		Update the ext4 file system's journal to the current
 36 |  			format.
 37 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c
 38 | index 9706981..d719551 100644
 39 | --- a/fs/ext4/super.c
 40 | +++ b/fs/ext4/super.c
 41 | @@ -203,6 +203,15 @@ void ext4_itable_unused_set(struct super_block *sb,
 42 |  		bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
 43 |  }
 44 |  
 45 | +/*
 46 | + * If ext4 filesystem features are enabled, then enable async_commits
 47 | + * by default.
 48 | + */
 49 | +#define ASYNC_COMMIT_DEFAULT(sb) (EXT4_HAS_INCOMPAT_FEATURE(sb, \
 50 | +					(EXT4_FEATURE_INCOMPAT_EXTENTS| \
 51 | +					 EXT4_FEATURE_INCOMPAT_64BIT| \
 52 | +					 EXT4_FEATURE_INCOMPAT_FLEX_BG)))
 53 | +
 54 |  
 55 |  /* Just increment the non-pointer handle value */
 56 |  static handle_t *ext4_get_nojournal(void)
 57 | @@ -1020,9 +1029,15 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 58 |  	 */
 59 |  	seq_puts(seq, ",barrier=");
 60 |  	seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
 61 | -	if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
 62 | -		seq_puts(seq, ",journal_async_commit");
 63 | -	else if (test_opt(sb, JOURNAL_CHECKSUM))
 64 | +	if (ASYNC_COMMIT_DEFAULT(sb)) {
 65 | +		if (!test_opt(sb, JOURNAL_ASYNC_COMMIT))
 66 | +			seq_puts(seq, ",nojournal_async_commit");
 67 | +	} else {
 68 | +		if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
 69 | +			seq_puts(seq, ",journal_async_commit");
 70 | +	}
 71 | +	if (test_opt(sb, JOURNAL_CHECKSUM) &&
 72 | +	    !test_opt(sb, JOURNAL_ASYNC_COMMIT))
 73 |  		seq_puts(seq, ",journal_checksum");
 74 |  	if (test_opt(sb, I_VERSION))
 75 |  		seq_puts(seq, ",i_version");
 76 | @@ -1239,6 +1254,7 @@ enum {
 77 |  	Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
 78 |  	Opt_journal_update, Opt_journal_dev,
 79 |  	Opt_journal_checksum, Opt_journal_async_commit,
 80 | +	Opt_nojournal_async_commit,
 81 |  	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 82 |  	Opt_data_err_abort, Opt_data_err_ignore,
 83 |  	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 84 | @@ -1285,6 +1301,7 @@ static const match_table_t tokens = {
 85 |  	{Opt_journal_dev, "journal_dev=%u"},
 86 |  	{Opt_journal_checksum, "journal_checksum"},
 87 |  	{Opt_journal_async_commit, "journal_async_commit"},
 88 | +	{Opt_nojournal_async_commit, "nojournal_async_commit"},
 89 |  	{Opt_abort, "abort"},
 90 |  	{Opt_data_journal, "data=journal"},
 91 |  	{Opt_data_ordered, "data=ordered"},
 92 | @@ -1559,6 +1576,9 @@ static int parse_options(char *options, struct super_block *sb,
 93 |  			set_opt(sb, JOURNAL_ASYNC_COMMIT);
 94 |  			set_opt(sb, JOURNAL_CHECKSUM);
 95 |  			break;
 96 | +		case Opt_nojournal_async_commit:
 97 | +			clear_opt(sb, JOURNAL_ASYNC_COMMIT);
 98 | +			break;
 99 |  		case Opt_noload:
100 |  			set_opt(sb, NOLOAD);
101 |  			break;
102 | @@ -3161,6 +3181,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
103 |  	    ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
104 |  		set_opt(sb, DELALLOC);
105 |  
106 | +	if (ASYNC_COMMIT_DEFAULT(sb))
107 | +		set_opt(sb, JOURNAL_ASYNC_COMMIT);
108 | +
109 |  	if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
110 |  			   &journal_devnum, &journal_ioprio, NULL, 0)) {
111 |  		ext4_msg(sb, KERN_WARNING,
112 | 


--------------------------------------------------------------------------------
/archive/avoid-unnecessarily-writing-back-dirty-pages-before-hole-punching:
--------------------------------------------------------------------------------
  1 | ext4: avoid unnecessarily writing back dirty pages before hole punching
  2 | 
  3 | From: Li Wang <liwang@ubuntukylin.com>
  4 | 
  5 | For hole punching, currently ext4 will synchronously write back the
  6 | dirty pages fit into the hole, since the data on the disk responding
  7 | to those pages are to be deleted, it is benefical to directly release
  8 | those pages, no matter they are dirty or not, except the ordered case.
  9 | 
 10 | [ Fixed error return to unlock i_mutex if ext4_begin_ordered_punch_hole()
 11 |   fails.  Thanks to Wei Yongjun <yongjun_wei@trendmicro.com.cn> for
 12 |   pointing this out.]
 13 | 
 14 | Signed-off-by: Li Wang <liwang@ubuntukylin.com>
 15 | Signed-off-by: Yunchuan Wen <yunchuanwen@ubuntukylin.com>
 16 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
 17 | Cc: Dmitry Monakhov <dmonakhov@openvz.org>
 18 | Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
 19 | Reviewed-by: Jan Kara <jack@suse.cz>
 20 | ---
 21 |  fs/ext4/inode.c       | 28 ++++++++++++++++------------
 22 |  fs/jbd2/journal.c     |  2 +-
 23 |  fs/jbd2/transaction.c | 29 ++++++-----------------------
 24 |  include/linux/jbd2.h  | 33 +++++++++++++++++++++++++++++++--
 25 |  4 files changed, 54 insertions(+), 38 deletions(-)
 26 | 
 27 | diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
 28 | index 0db830d..06136b5 100644
 29 | --- a/fs/ext4/inode.c
 30 | +++ b/fs/ext4/inode.c
 31 | @@ -3466,6 +3466,16 @@ int ext4_can_truncate(struct inode *inode)
 32 |  	return 0;
 33 |  }
 34 |  
 35 | +static inline int ext4_begin_ordered_punch_hole(struct inode *inode,
 36 | +					       loff_t start, loff_t length)
 37 | +{
 38 | +	if (!EXT4_I(inode)->jinode)
 39 | +		return 0;
 40 | +	return jbd2_journal_begin_ordered_punch_hole(EXT4_JOURNAL(inode),
 41 | +						    EXT4_I(inode)->jinode,
 42 | +						    start, start+length-1);
 43 | +}
 44 | +
 45 |  /*
 46 |   * ext4_punch_hole: punches a hole in a file by releaseing the blocks
 47 |   * associated with the given offset and length
 48 | @@ -3482,7 +3492,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 49 |  	struct inode *inode = file_inode(file);
 50 |  	struct super_block *sb = inode->i_sb;
 51 |  	ext4_lblk_t first_block, stop_block;
 52 | -	struct address_space *mapping = inode->i_mapping;
 53 |  	loff_t first_block_offset, last_block_offset;
 54 |  	handle_t *handle;
 55 |  	unsigned int credits;
 56 | @@ -3498,17 +3507,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 57 |  
 58 |  	trace_ext4_punch_hole(inode, offset, length);
 59 |  
 60 | -	/*
 61 | -	 * Write out all dirty pages to avoid race conditions
 62 | -	 * Then release them.
 63 | -	 */
 64 | -	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
 65 | -		ret = filemap_write_and_wait_range(mapping, offset,
 66 | -						   offset + length - 1);
 67 | -		if (ret)
 68 | -			return ret;
 69 | -	}
 70 | -
 71 |  	mutex_lock(&inode->i_mutex);
 72 |  	/* It's not possible punch hole on append only file */
 73 |  	if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
 74 | @@ -3537,6 +3535,12 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 75 |  	first_block_offset = round_up(offset, sb->s_blocksize);
 76 |  	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 77 |  
 78 | +	if (ext4_should_order_data(inode)) {
 79 | +		ret = ext4_begin_ordered_punch_hole(inode, offset, length);
 80 | +		if (ret)
 81 | +			goto out_mutex;
 82 | +	}
 83 | +
 84 |  	/* Now release the pages and zero block aligned part of pages*/
 85 |  	if (last_block_offset > first_block_offset)
 86 |  		truncate_pagecache_range(inode, first_block_offset,
 87 | diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
 88 | index 915dd57..4c8b8d4 100644
 89 | --- a/fs/jbd2/journal.c
 90 | +++ b/fs/jbd2/journal.c
 91 | @@ -97,7 +97,7 @@ EXPORT_SYMBOL(jbd2_journal_force_commit);
 92 |  EXPORT_SYMBOL(jbd2_journal_file_inode);
 93 |  EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
 94 |  EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
 95 | -EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
 96 | +EXPORT_SYMBOL(jbd2_journal_begin_ordered_punch_hole);
 97 |  EXPORT_SYMBOL(jbd2_inode_cache);
 98 |  
 99 |  static void __journal_abort_soft (journal_t *journal, int errno);
100 | diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
101 | index dd422e6..91d62e1 100644
102 | --- a/fs/jbd2/transaction.c
103 | +++ b/fs/jbd2/transaction.c
104 | @@ -2419,29 +2419,10 @@ done:
105 |  	return 0;
106 |  }
107 |  
108 | -/*
109 | - * File truncate and transaction commit interact with each other in a
110 | - * non-trivial way.  If a transaction writing data block A is
111 | - * committing, we cannot discard the data by truncate until we have
112 | - * written them.  Otherwise if we crashed after the transaction with
113 | - * write has committed but before the transaction with truncate has
114 | - * committed, we could see stale data in block A.  This function is a
115 | - * helper to solve this problem.  It starts writeout of the truncated
116 | - * part in case it is in the committing transaction.
117 | - *
118 | - * Filesystem code must call this function when inode is journaled in
119 | - * ordered mode before truncation happens and after the inode has been
120 | - * placed on orphan list with the new inode size. The second condition
121 | - * avoids the race that someone writes new data and we start
122 | - * committing the transaction after this function has been called but
123 | - * before a transaction for truncate is started (and furthermore it
124 | - * allows us to optimize the case where the addition to orphan list
125 | - * happens in the same transaction as write --- we don't have to write
126 | - * any data in such case).
127 | - */
128 | -int jbd2_journal_begin_ordered_truncate(journal_t *journal,
129 | +
130 | +int jbd2_journal_begin_ordered_punch_hole(journal_t *journal,
131 |  					struct jbd2_inode *jinode,
132 | -					loff_t new_size)
133 | +					loff_t start, loff_t end)
134 |  {
135 |  	transaction_t *inode_trans, *commit_trans;
136 |  	int ret = 0;
137 | @@ -2460,10 +2441,12 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
138 |  	spin_unlock(&journal->j_list_lock);
139 |  	if (inode_trans == commit_trans) {
140 |  		ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
141 | -			new_size, LLONG_MAX);
142 | +			start, end);
143 |  		if (ret)
144 |  			jbd2_journal_abort(journal, ret);
145 |  	}
146 |  out:
147 |  	return ret;
148 |  }
149 | +
150 | +
151 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
152 | index 0302f3f..5f3c094 100644
153 | --- a/include/linux/jbd2.h
154 | +++ b/include/linux/jbd2.h
155 | @@ -1157,12 +1157,41 @@ extern int	   jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *
156 |  extern int	   jbd2_journal_force_commit(journal_t *);
157 |  extern int	   jbd2_journal_force_commit_nested(journal_t *);
158 |  extern int	   jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
159 | -extern int	   jbd2_journal_begin_ordered_truncate(journal_t *journal,
160 | -				struct jbd2_inode *inode, loff_t new_size);
161 | +extern int	   jbd2_journal_begin_ordered_punch_hole(journal_t *,
162 | +					struct jbd2_inode *,
163 | +					loff_t, loff_t);
164 |  extern void	   jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
165 |  extern void	   jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
166 |  
167 |  /*
168 | + * File truncate and transaction commit interact with each other in a
169 | + * non-trivial way.  If a transaction writing data block A is
170 | + * committing, we cannot discard the data by truncate until we have
171 | + * written them.  Otherwise if we crashed after the transaction with
172 | + * write has committed but before the transaction with truncate has
173 | + * committed, we could see stale data in block A.  This function is a
174 | + * helper to solve this problem.  It starts writeout of the truncated
175 | + * part in case it is in the committing transaction.
176 | + *
177 | + * Filesystem code must call this function when inode is journaled in
178 | + * ordered mode before truncation happens and after the inode has been
179 | + * placed on orphan list with the new inode size. The second condition
180 | + * avoids the race that someone writes new data and we start
181 | + * committing the transaction after this function has been called but
182 | + * before a transaction for truncate is started (and furthermore it
183 | + * allows us to optimize the case where the addition to orphan list
184 | + * happens in the same transaction as write --- we don't have to write
185 | + * any data in such case).
186 | + */
187 | +static inline int jbd2_journal_begin_ordered_truncate(journal_t *journal,
188 | +					struct jbd2_inode *jinode,
189 | +					loff_t new_size)
190 | +{
191 | +	return jbd2_journal_begin_ordered_punch_hole(journal, jinode,
192 | +						  new_size, LLONG_MAX);
193 | +}
194 | +
195 | +/*
196 |   * journal_head management
197 |   */
198 |  struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh);
199 | 


--------------------------------------------------------------------------------
/archive/bio-debug:
--------------------------------------------------------------------------------
  1 | ext4: debugging patches for the bio code
  2 | 
  3 | ---
  4 |  block/blk-core.c      |   17 ++++++++++
  5 |  fs/buffer.c           |   36 ++++++++++++++++++++++
  6 |  fs/ext4/inode.c       |   18 +++++++++++
  7 |  fs/ext4/mballoc.c     |    8 ++++-
  8 |  fs/ext4/page-io.c     |   80 +++++++++++++++++++++++++++++++++++++++++++++++-
  9 |  fs/jbd2/commit.c      |   18 +++++++++++
 10 |  fs/jbd2/transaction.c |    5 +++
 11 |  mm/filemap.c          |    7 ++++
 12 |  8 files changed, 186 insertions(+), 3 deletions(-)
 13 | 
 14 | diff --git a/block/blk-core.c b/block/blk-core.c
 15 | index 32a1c12..115574b 100644
 16 | --- a/block/blk-core.c
 17 | +++ b/block/blk-core.c
 18 | @@ -248,6 +248,9 @@ int blk_remove_plug(struct request_queue *q)
 19 |  	if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
 20 |  		return 0;
 21 |  
 22 | +#if 1				/* PDEBUG */
 23 | +	trace_printk("del timer %s\n", q->backing_dev_info.name);
 24 | +#endif
 25 |  	del_timer(&q->unplug_timer);
 26 |  	return 1;
 27 |  }
 28 | @@ -379,6 +382,9 @@ EXPORT_SYMBOL(blk_stop_queue);
 29 |   */
 30 |  void blk_sync_queue(struct request_queue *q)
 31 |  {
 32 | +#if 1				/* PDEBUG */
 33 | +	trace_printk("del timer %s\n", q->backing_dev_info.name);
 34 | +#endif
 35 |  	del_timer_sync(&q->unplug_timer);
 36 |  	del_timer_sync(&q->timeout);
 37 |  	cancel_work_sync(&q->unplug_work);
 38 | @@ -1525,6 +1531,17 @@ static inline void __generic_make_request(struct bio *bio)
 39 |  		trace_block_bio_queue(q, bio);
 40 |  
 41 |  		ret = q->make_request_fn(q, bio);
 42 | +
 43 | +#if 1				/* PDEBUG */
 44 | +		{
 45 | +			char str[KSYM_SYMBOL_LEN];
 46 | +
 47 | +			kallsyms_lookup((unsigned long) q->make_request_fn,
 48 | +					NULL, NULL, NULL, str);
 49 | +			trace_printk("returned from %s (pid %d)\n",
 50 | +				     str, task_pid_nr(current));
 51 | +		}
 52 | +#endif
 53 |  	} while (ret);
 54 |  
 55 |  	return;
 56 | diff --git a/fs/buffer.c b/fs/buffer.c
 57 | index 3e7dca2..ed188f5 100644
 58 | --- a/fs/buffer.c
 59 | +++ b/fs/buffer.c
 60 | @@ -70,6 +70,19 @@ static int sync_buffer(void *word)
 61 |  
 62 |  void __lock_buffer(struct buffer_head *bh)
 63 |  {
 64 | +#if 1				/* PDEBUG */
 65 | +	void *ip = __builtin_return_address(0);
 66 | +	char str[KSYM_SYMBOL_LEN];
 67 | +	char b[BDEVNAME_SIZE];
 68 | +
 69 | +	if (buffer_locked(bh)) {
 70 | +		kallsyms_lookup((unsigned long) ip, NULL, NULL, NULL, str);
 71 | +		trace_printk("lock bh %s blk %lu, ret_pc %p (%s) pid %d\n",
 72 | +			     bdevname(bh->b_bdev, b),
 73 | +			     (unsigned long) bh->b_blocknr, ip, str,
 74 | +			     task_pid_nr(current));
 75 | +	}
 76 | +#endif
 77 |  	wait_on_bit_lock(&bh->b_state, BH_Lock, sync_buffer,
 78 |  							TASK_UNINTERRUPTIBLE);
 79 |  }
 80 | @@ -90,6 +103,17 @@ EXPORT_SYMBOL(unlock_buffer);
 81 |   */
 82 |  void __wait_on_buffer(struct buffer_head * bh)
 83 |  {
 84 | +#if 1				/* PDEBUG */
 85 | +	void *ip = __builtin_return_address(0);
 86 | +	char str[KSYM_SYMBOL_LEN];
 87 | +	char b[BDEVNAME_SIZE];
 88 | +
 89 | +	kallsyms_lookup((unsigned long) ip, NULL, NULL, NULL, str);
 90 | +	trace_printk("dev %s blk %lu, ret_pc %p (%s) pid %d\n",
 91 | +		     bdevname(bh->b_bdev, b),
 92 | +		     (unsigned long) bh->b_blocknr, ip, str,
 93 | +		     task_pid_nr(current));
 94 | +#endif
 95 |  	wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
 96 |  }
 97 |  EXPORT_SYMBOL(__wait_on_buffer);
 98 | @@ -2906,6 +2930,18 @@ int submit_bh(int rw, struct buffer_head * bh)
 99 |  	struct bio *bio;
100 |  	int ret = 0;
101 |  
102 | +#if 1				/* PDEBUG */
103 | +	void *ip = __builtin_return_address(0);
104 | +	char str[KSYM_SYMBOL_LEN];
105 | +	char b[BDEVNAME_SIZE];
106 | +
107 | +	kallsyms_lookup((unsigned long) ip, NULL, NULL, NULL, str);
108 | +	trace_printk("dev %s blk %lu, ret_pc %p (%s) pid %d\n",
109 | +		     bdevname(bh->b_bdev, b),
110 | +		     (unsigned long) bh->b_blocknr, ip, str,
111 | +		     task_pid_nr(current));
112 | +#endif
113 | +
114 |  	BUG_ON(!buffer_locked(bh));
115 |  	BUG_ON(!buffer_mapped(bh));
116 |  	BUG_ON(!bh->b_end_io);
117 | diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
118 | index 79fdace..82eafb9 100644
119 | --- a/fs/ext4/inode.c
120 | +++ b/fs/ext4/inode.c
121 | @@ -2029,6 +2029,11 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
122 |  	index = mpd->first_page;
123 |  	end = mpd->next_page - 1;
124 |  
125 | +#if 1				/* PDEBUG */
126 | +	trace_printk("%s: ino %lu index %lu end %lu size %lu\n",
127 | +		     inode->i_sb->s_id, inode->i_ino,
128 | +		     index, end, (unsigned long) size);
129 | +#endif
130 |  	pagevec_init(&pvec, 0);
131 |  	while (index <= end) {
132 |  		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
133 | @@ -3654,6 +3659,9 @@ int flush_completed_IO(struct inode *inode)
134 |  	if (list_empty(&ei->i_completed_io_list))
135 |  		return ret;
136 |  
137 | +#if 1				/* PDEBUG */
138 | +	trace_printk("%s: ino %lu\n", inode->i_sb->s_id, inode->i_ino);
139 | +#endif
140 |  	dump_completed_IO(inode);
141 |  	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
142 |  	while (!list_empty(&ei->i_completed_io_list)){
143 | @@ -3694,6 +3702,16 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
144 |  	unsigned long flags;
145 |  	struct ext4_inode_info *ei;
146 |  
147 | +#if 1				/* PDEBUG */
148 | +	if (io_end)
149 | +		trace_printk("%s: ino %lu io_end %p size %lu\n",
150 | +			     io_end->inode->i_sb->s_id,
151 | +			     io_end->inode->i_ino, io_end,
152 | +			     (unsigned long) size);
153 | +	else
154 | +		trace_printk("null io_end\n");
155 | +#endif
156 | +
157 |  	/* if not async direct IO or dio with 0 bytes write, just return */
158 |  	if (!io_end || !size)
159 |  		goto out;
160 | diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
161 | index d732ef5..d1b68b7 100644
162 | --- a/fs/ext4/mballoc.c
163 | +++ b/fs/ext4/mballoc.c
164 | @@ -861,8 +861,14 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
165 |  	}
166 |  
167 |  	/* wait for I/O completion */
168 | -	for (i = 0; i < groups_per_page && bh[i]; i++)
169 | +	for (i = 0; i < groups_per_page && bh[i]; i++) {
170 | +#if 1				/* PDEBUG */
171 | +		if (buffer_locked(bh[i]))
172 | +			trace_printk("%s: wait on %lu\n", sb->s_id,
173 | +			       (unsigned long) bh[i]->b_blocknr);
174 | +#endif
175 |  		wait_on_buffer(bh[i]);
176 | +	}
177 |  
178 |  	err = -EIO;
179 |  	for (i = 0; i < groups_per_page && bh[i]; i++)
180 | diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
181 | index ccce2c6..c2f42e0 100644
182 | --- a/fs/ext4/page-io.c
183 | +++ b/fs/ext4/page-io.c
184 | @@ -30,6 +30,8 @@
185 |  #include "acl.h"
186 |  #include "ext4_extents.h"
187 |  
188 | +#define PDEBUG
189 | +
190 |  static struct kmem_cache *io_page_cachep, *io_end_cachep;
191 |  
192 |  int __init init_ext4_pageio(void)
193 | @@ -56,6 +58,9 @@ void ext4_free_io_end(ext4_io_end_t *io)
194 |  {
195 |  	int i;
196 |  
197 | +#ifdef PDEBUG
198 | +	trace_printk("%p\n", io);
199 | +#endif
200 |  	BUG_ON(!io);
201 |  	if (io->page)
202 |  		put_page(io->page);
203 | @@ -63,6 +68,11 @@ void ext4_free_io_end(ext4_io_end_t *io)
204 |  		if (--io->pages[i]->p_count == 0) {
205 |  			struct page *page = io->pages[i]->p_page;
206 |  
207 | +#ifdef PDEBUG
208 | +			trace_printk("%s: end_page_writeback for %lu:%lu\n",
209 | +				     io->inode->i_sb->s_id, io->inode->i_ino,
210 | +				     (unsigned long) page->index);
211 | +#endif
212 |  			end_page_writeback(page);
213 |  			put_page(page);
214 |  			kmem_cache_free(io_page_cachep, io->pages[i]);
215 | @@ -121,6 +131,9 @@ static void ext4_end_io_work(struct work_struct *work)
216 |  	int			ret;
217 |  
218 |  	mutex_lock(&inode->i_mutex);
219 | +#ifdef PDEBUG
220 | +	trace_printk("%p\n", io);
221 | +#endif
222 |  	ret = ext4_end_io_nolock(io);
223 |  	if (ret < 0) {
224 |  		mutex_unlock(&inode->i_mutex);
225 | @@ -147,6 +160,9 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
226 |  		INIT_WORK(&io->work, ext4_end_io_work);
227 |  		INIT_LIST_HEAD(&io->list);
228 |  	}
229 | +#ifdef PDEBUG
230 | +	trace_printk("%p\n", io);
231 | +#endif
232 |  	return io;
233 |  }
234 |  
235 | @@ -175,6 +191,12 @@ static void ext4_end_bio(struct bio *bio, int error)
236 |  
237 |  	BUG_ON(!io_end);
238 |  	inode = io_end->inode;
239 | +#ifdef PDEBUG
240 | +	trace_printk("%s: enter: ino %lu offset %lu size %ld io_end=%p\n",
241 | +		     inode->i_sb->s_id, inode->i_ino,
242 | +		     (unsigned long) io_end->offset,
243 | +		     (long) io_end->size, io_end);
244 | +#endif
245 |  	bio->bi_private = NULL;
246 |  	bio->bi_end_io = NULL;
247 |  	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
248 | @@ -235,6 +257,12 @@ static void ext4_end_bio(struct bio *bio, int error)
249 |  		if (--io_end->pages[i]->p_count == 0) {
250 |  			struct page *page = io_end->pages[i]->p_page;
251 |  
252 | +#ifdef PDEBUG
253 | +			trace_printk("%s: end_page_writeback for %lu:%lu\n",
254 | +				     io_end->inode->i_sb->s_id,
255 | +				     io_end->inode->i_ino,
256 | +				     (unsigned long) page->index);
257 | +#endif
258 |  			end_page_writeback(page);
259 |  			put_page(page);
260 |  			kmem_cache_free(io_page_cachep, io_end->pages[i]);
261 | @@ -261,12 +289,24 @@ static void ext4_end_bio(struct bio *bio, int error)
262 |  	wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
263 |  	/* queue the work to convert unwritten extents to written */
264 |  	queue_work(wq, &io_end->work);
265 | +#ifdef PDEBUG
266 | +	trace_printk("%s: exit: ino %lu\n", inode->i_sb->s_id,
267 | +		     io_end->inode->i_ino);
268 | +#endif
269 |  }
270 |  
271 |  void ext4_io_submit(struct ext4_io_submit *io)
272 |  {
273 |  	struct bio *bio = io->io_bio;
274 |  
275 | +#ifdef PDEBUG
276 | +	if (io->io_end)
277 | +		trace_printk("%s: io submitted io_end %p\n",
278 | +			     io->io_end->inode->i_sb->s_id, io->io_end);
279 | +	else
280 | +		trace_printk("io submitted io_end %p\n",
281 | +			     io->io_end);
282 | +#endif
283 |  	if (bio) {
284 |  		bio_get(io->io_bio);
285 |  		submit_bio(io->io_op, io->io_bio);
286 | @@ -308,10 +348,14 @@ static int io_submit_init(struct ext4_io_submit *io,
287 |  	io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
288 |  			WRITE_SYNC_PLUG : WRITE);
289 |  	io->io_next_block = bh->b_blocknr;
290 | +#ifdef PDEBUG
291 | +	trace_printk("%s: io_submit_init for ino %lu, nvecs = %d\n",
292 | +		     inode->i_sb->s_id, inode->i_ino, nvecs);
293 | +#endif
294 |  	return 0;
295 |  }
296 |  
297 | -static int io_submit_add_bh(struct ext4_io_submit *io,
298 | +static noinline int io_submit_add_bh(struct ext4_io_submit *io,
299 |  			    struct ext4_io_page *io_page,
300 |  			    struct inode *inode,
301 |  			    struct writeback_control *wbc,
302 | @@ -320,6 +364,14 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
303 |  	ext4_io_end_t *io_end;
304 |  	int ret;
305 |  
306 | +#ifdef PDEBUG
307 | +	trace_printk("%s enter: ino %lu blk %lu %s%s%s%s\n", inode->i_sb->s_id,
308 | +		     inode->i_ino, (unsigned long) bh->b_blocknr,
309 | +		     buffer_new(bh) ? "N" : "",
310 | +		     buffer_mapped(bh) ? "M" : "",
311 | +		     buffer_delay(bh) ? "D" : "",
312 | +		     buffer_dirty(bh) ? "d" : "");
313 | +#endif
314 |  	if (buffer_new(bh)) {
315 |  		clear_buffer_new(bh);
316 |  		unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
317 | @@ -351,13 +403,29 @@ submit_and_retry:
318 |  	io->io_end->size += bh->b_size;
319 |  	io->io_next_block++;
320 |  	ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
321 | -	if (ret != bh->b_size)
322 | +	if (ret != bh->b_size) {
323 | +#ifdef PDEBUG
324 | +		trace_printk("%s: submit and retry (ret = %d, size=%d, "
325 | +			     "offset=%lu)\n", inode->i_sb->s_id, ret,
326 | +			     bh->b_size, bh_offset(bh));
327 | +#endif
328 |  		goto submit_and_retry;
329 | +	}
330 |  	if ((io_end->num_io_pages == 0) ||
331 |  	    (io_end->pages[io_end->num_io_pages-1] != io_page)) {
332 |  		io_end->pages[io_end->num_io_pages++] = io_page;
333 |  		io_page->p_count++;
334 |  	}
335 | +#ifdef PDEBUG
336 | +	if (io->io_end)
337 | +		trace_printk("%s: exit: ino %lu offset %lu size %ld\n",
338 | +			     inode->i_sb->s_id, inode->i_ino,
339 | +			     (unsigned long) io->io_end->offset,
340 | +			     (unsigned long) io->io_end->size);
341 | +	else
342 | +		trace_printk("%s: exit: ino %lu no_io_end\n",
343 | +			     inode->i_sb->s_id, inode->i_ino);
344 | +#endif
345 |  	return 0;
346 |  }
347 |  
348 | @@ -372,6 +440,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
349 |  	struct buffer_head *bh, *head;
350 |  	int ret = 0;
351 |  
352 | +#ifdef PDEBUG
353 | +	trace_printk("%s: enter: ino %lu page %lu len %d\n", inode->i_sb->s_id,
354 | +		     inode->i_ino, page->index, len);
355 | +#endif
356 |  	blocksize = 1 << inode->i_blkbits;
357 |  
358 |  	BUG_ON(PageWriteback(page));
359 | @@ -422,5 +494,9 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
360 |  		end_page_writeback(page);
361 |  		kmem_cache_free(io_page_cachep, io_page);
362 |  	}
363 | +#ifdef PDEBUG
364 | +	trace_printk("%s: exit: for ino %lu\n", inode->i_sb->s_id,
365 | +		     inode->i_ino);
366 | +#endif
367 |  	return ret;
368 |  }
369 | diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
370 | index 6494c81..d3f8634 100644
371 | --- a/fs/jbd2/commit.c
372 | +++ b/fs/jbd2/commit.c
373 | @@ -631,6 +631,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
374 |  		 * (which is of type BJ_IO)
375 |  		 */
376 |  		JBUFFER_TRACE(jh, "ph3: write metadata");
377 | +#if 1				/* PDEBUG */
378 | +		trace_printk("@635 %s block %llu\n", journal->j_devname,
379 | +			     blocknr);
380 | +#endif
381 |  		flags = jbd2_journal_write_metadata_buffer(commit_transaction,
382 |  						      jh, &new_jh, blocknr);
383 |  		if (flags < 0) {
384 | @@ -693,6 +697,11 @@ start_journal_io:
385 |  				clear_buffer_dirty(bh);
386 |  				set_buffer_uptodate(bh);
387 |  				bh->b_end_io = journal_end_buffer_io_sync;
388 | +#if 1				/* PDEBUG */
389 | +				trace_printk("@700 %s block %llu\n",
390 | +					     journal->j_devname,
391 | +					     bh->b_blocknr);
392 | +#endif
393 |  				submit_bh(write_op, bh);
394 |  			}
395 |  			cond_resched();
396 | @@ -762,6 +771,10 @@ wait_for_iobuf:
397 |  		jh = commit_transaction->t_iobuf_list->b_tprev;
398 |  		bh = jh2bh(jh);
399 |  		if (buffer_locked(bh)) {
400 | +#if 1				/* PDEBUG */
401 | +			trace_printk("jbd wait_on_buffer@765: %lu\n",
402 | +				     (unsigned long) bh->b_blocknr);
403 | +#endif
404 |  			wait_on_buffer(bh);
405 |  			goto wait_for_iobuf;
406 |  		}
407 | @@ -818,6 +831,11 @@ wait_for_iobuf:
408 |  		jh = commit_transaction->t_log_list->b_tprev;
409 |  		bh = jh2bh(jh);
410 |  		if (buffer_locked(bh)) {
411 | +#if 1				/* PDEBUG */
412 | +			trace_printk("%s: jbd wait_on_buffer@823: %lu\n",
413 | +				     journal->j_devname,
414 | +				     (unsigned long) bh->b_blocknr);
415 | +#endif
416 |  			wait_on_buffer(bh);
417 |  			goto wait_for_ctlbuf;
418 |  		}
419 | diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
420 | index 6bf0a24..8873caa 100644
421 | --- a/fs/jbd2/transaction.c
422 | +++ b/fs/jbd2/transaction.c
423 | @@ -701,6 +701,11 @@ repeat:
424 |  			for ( ; ; ) {
425 |  				prepare_to_wait(wqh, &wait.wait,
426 |  						TASK_UNINTERRUPTIBLE);
427 | +#if 1				/* PDEBUG */
428 | +				trace_printk("%s: BJ shadow waiting on %lu\n",
429 | +				       journal->j_devname,
430 | +				       (unsigned long) bh->b_blocknr);
431 | +#endif
432 |  				if (jh->b_jlist != BJ_Shadow)
433 |  					break;
434 |  				schedule();
435 | diff --git a/mm/filemap.c b/mm/filemap.c
436 | index 3d4df44..e0c7061 100644
437 | --- a/mm/filemap.c
438 | +++ b/mm/filemap.c
439 | @@ -295,6 +295,13 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
440 |  			if (page->index > end)
441 |  				continue;
442 |  
443 | +#if 1				/* PDEBUG */
444 | +			if (PageWriteback(page))
445 | +				trace_printk("pid %d waiting on %lu:%lu\n",
446 | +					     task_pid_nr(current),
447 | +					     mapping->host->i_ino,
448 | +					     (unsigned long) page->index);
449 | +#endif
450 |  			wait_on_page_writeback(page);
451 |  			if (PageError(page))
452 |  				ret = -EIO;
453 | 


--------------------------------------------------------------------------------
/archive/introduce-new-i_write_mutex:
--------------------------------------------------------------------------------
  1 | ext4: introduce new i_write_mutex to protect fallocate
  2 | 
  3 | From: Namjae Jeon <namjae.jeon@samsung.com>
  4 | 
  5 | Introduce new i_write_mutex to protect new writes from coming while doing
  6 | fallocate operations. Also, get rid of aio_mutex as it is covered by
  7 | i_write_mutex.
  8 | 
  9 | Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
 10 | Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
 11 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
 12 | ---
 13 |  fs/ext4/ext4.h    |  6 +++---
 14 |  fs/ext4/extents.c | 19 +++++++++++++++----
 15 |  fs/ext4/file.c    | 23 +++++++++++++----------
 16 |  fs/ext4/inode.c   |  7 ++++++-
 17 |  fs/ext4/super.c   |  3 +--
 18 |  5 files changed, 38 insertions(+), 20 deletions(-)
 19 | 
 20 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
 21 | index 1479e2a..0519715 100644
 22 | --- a/fs/ext4/ext4.h
 23 | +++ b/fs/ext4/ext4.h
 24 | @@ -943,6 +943,9 @@ struct ext4_inode_info {
 25 |  
 26 |  	/* Precomputed uuid+inum+igen checksum for seeding inode checksums */
 27 |  	__u32 i_csum_seed;
 28 | +
 29 | +	/* protects fallocate operations racing with new writes */
 30 | +	struct mutex i_write_mutex;
 31 |  };
 32 |  
 33 |  /*
 34 | @@ -2805,10 +2808,7 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
 35 |  #define EXT4_WQ_HASH_SZ		37
 36 |  #define ext4_ioend_wq(v)   (&ext4__ioend_wq[((unsigned long)(v)) %\
 37 |  					    EXT4_WQ_HASH_SZ])
 38 | -#define ext4_aio_mutex(v)  (&ext4__aio_mutex[((unsigned long)(v)) %\
 39 | -					     EXT4_WQ_HASH_SZ])
 40 |  extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
 41 | -extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
 42 |  
 43 |  #define EXT4_RESIZING	0
 44 |  extern int ext4_resize_begin(struct super_block *sb);
 45 | diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
 46 | index 5bbe4256..cb23a34 100644
 47 | --- a/fs/ext4/extents.c
 48 | +++ b/fs/ext4/extents.c
 49 | @@ -4741,6 +4741,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 50 |  	if (!S_ISREG(inode->i_mode))
 51 |  		return -EINVAL;
 52 |  
 53 | +	mutex_lock(&EXT4_I(inode)->i_write_mutex);
 54 | +
 55 |  	/*
 56 |  	 * Write out all dirty pages to avoid race conditions
 57 |  	 * Then release them.
 58 | @@ -4748,8 +4750,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 59 |  	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
 60 |  		ret = filemap_write_and_wait_range(mapping, offset,
 61 |  						   offset + len - 1);
 62 | -		if (ret)
 63 | +		if (ret) {
 64 | +			mutex_unlock(&EXT4_I(inode)->i_write_mutex);
 65 |  			return ret;
 66 | +		}
 67 |  	}
 68 |  
 69 |  	/*
 70 | @@ -4761,8 +4765,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 71 |  	start = round_up(offset, 1 << blkbits);
 72 |  	end = round_down((offset + len), 1 << blkbits);
 73 |  
 74 | -	if (start < offset || end > offset + len)
 75 | +	if (start < offset || end > offset + len) {
 76 | +		mutex_unlock(&EXT4_I(inode)->i_write_mutex);
 77 |  		return -EINVAL;
 78 | +	}
 79 |  	partial = (offset + len) & ((1 << blkbits) - 1);
 80 |  
 81 |  	lblk = start >> blkbits;
 82 | @@ -4859,6 +4865,7 @@ out_dio:
 83 |  	ext4_inode_resume_unlocked_dio(inode);
 84 |  out_mutex:
 85 |  	mutex_unlock(&inode->i_mutex);
 86 | +	mutex_unlock(&EXT4_I(inode)->i_write_mutex);
 87 |  	return ret;
 88 |  }
 89 |  
 90 | @@ -5411,11 +5418,13 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 91 |  	punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
 92 |  	punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
 93 |  
 94 | +	mutex_lock(&EXT4_I(inode)->i_write_mutex);
 95 | +
 96 |  	/* Call ext4_force_commit to flush all data in case of data=journal. */
 97 |  	if (ext4_should_journal_data(inode)) {
 98 |  		ret = ext4_force_commit(inode->i_sb);
 99 |  		if (ret)
100 | -			return ret;
101 | +			goto out_i_write_mutex;
102 |  	}
103 |  
104 |  	/*
105 | @@ -5428,7 +5437,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
106 |  	ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
107 |  					   LLONG_MAX);
108 |  	if (ret)
109 | -		return ret;
110 | +		goto out_i_write_mutex;
111 |  
112 |  	/* Take mutex lock */
113 |  	mutex_lock(&inode->i_mutex);
114 | @@ -5501,5 +5510,7 @@ out_dio:
115 |  	ext4_inode_resume_unlocked_dio(inode);
116 |  out_mutex:
117 |  	mutex_unlock(&inode->i_mutex);
118 | +out_i_write_mutex:
119 | +	mutex_unlock(&EXT4_I(inode)->i_write_mutex);
120 |  	return ret;
121 |  }
122 | diff --git a/fs/ext4/file.c b/fs/ext4/file.c
123 | index 4e8bc284..e5cd87f 100644
124 | --- a/fs/ext4/file.c
125 | +++ b/fs/ext4/file.c
126 | @@ -97,7 +97,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
127 |  {
128 |  	struct file *file = iocb->ki_filp;
129 |  	struct inode *inode = file_inode(iocb->ki_filp);
130 | -	struct mutex *aio_mutex = NULL;
131 | +	bool unaligned_direct_aio = false;
132 |  	struct blk_plug plug;
133 |  	int o_direct = file->f_flags & O_DIRECT;
134 |  	int overwrite = 0;
135 | @@ -106,6 +106,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
136 |  
137 |  	BUG_ON(iocb->ki_pos != pos);
138 |  
139 | +	mutex_lock(&EXT4_I(inode)->i_write_mutex);
140 | +
141 |  	/*
142 |  	 * Unaligned direct AIO must be serialized; see comment above
143 |  	 * In the case of O_APPEND, assume that we must always serialize
144 | @@ -115,8 +117,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
145 |  	    !is_sync_kiocb(iocb) &&
146 |  	    (file->f_flags & O_APPEND ||
147 |  	     ext4_unaligned_aio(inode, iov, nr_segs, pos))) {
148 | -		aio_mutex = ext4_aio_mutex(inode);
149 | -		mutex_lock(aio_mutex);
150 | +		unaligned_direct_aio = true;
151 |  		ext4_unwritten_wait(inode);
152 |  	}
153 |  
154 | @@ -134,8 +135,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
155 |  		if ((pos > sbi->s_bitmap_maxbytes) ||
156 |  		    (pos == sbi->s_bitmap_maxbytes && length > 0)) {
157 |  			mutex_unlock(&inode->i_mutex);
158 | -			ret = -EFBIG;
159 | -			goto errout;
160 | +			mutex_unlock(&EXT4_I(inode)->i_write_mutex);
161 | +			return -EFBIG;
162 |  		}
163 |  
164 |  		if (pos + length > sbi->s_bitmap_maxbytes) {
165 | @@ -150,8 +151,9 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
166 |  		iocb->private = &overwrite;
167 |  
168 |  		/* check whether we do a DIO overwrite or not */
169 | -		if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
170 | -		    !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
171 | +		if (ext4_should_dioread_nolock(inode) &&
172 | +		    !unaligned_direct_aio && !file->f_mapping->nrpages &&
173 | +		    pos + length <= i_size_read(inode)) {
174 |  			struct ext4_map_blocks map;
175 |  			unsigned int blkbits = inode->i_blkbits;
176 |  			int err, len;
177 | @@ -181,6 +183,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
178 |  
179 |  	ret = __generic_file_aio_write(iocb, iov, nr_segs);
180 |  	mutex_unlock(&inode->i_mutex);
181 | +	if (!unaligned_direct_aio)
182 | +		mutex_unlock(&EXT4_I(inode)->i_write_mutex);
183 |  
184 |  	if (ret > 0) {
185 |  		ssize_t err;
186 | @@ -192,9 +196,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
187 |  	if (o_direct)
188 |  		blk_finish_plug(&plug);
189 |  
190 | -errout:
191 | -	if (aio_mutex)
192 | -		mutex_unlock(aio_mutex);
193 | +	if (unaligned_direct_aio)
194 | +		mutex_unlock(&EXT4_I(inode)->i_write_mutex);
195 |  	return ret;
196 |  }
197 |  
198 | diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
199 | index 645de3e..55f999a 100644
200 | --- a/fs/ext4/inode.c
201 | +++ b/fs/ext4/inode.c
202 | @@ -3534,6 +3534,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
203 |  
204 |  	trace_ext4_punch_hole(inode, offset, length, 0);
205 |  
206 | +	mutex_lock(&EXT4_I(inode)->i_write_mutex);
207 | +
208 |  	/*
209 |  	 * Write out all dirty pages to avoid race conditions
210 |  	 * Then release them.
211 | @@ -3541,8 +3543,10 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
212 |  	if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
213 |  		ret = filemap_write_and_wait_range(mapping, offset,
214 |  						   offset + length - 1);
215 | -		if (ret)
216 | +		if (ret) {
217 | +			mutex_unlock(&EXT4_I(inode)->i_write_mutex);
218 |  			return ret;
219 | +		}
220 |  	}
221 |  
222 |  	mutex_lock(&inode->i_mutex);
223 | @@ -3643,6 +3647,7 @@ out_dio:
224 |  	ext4_inode_resume_unlocked_dio(inode);
225 |  out_mutex:
226 |  	mutex_unlock(&inode->i_mutex);
227 | +	mutex_unlock(&EXT4_I(inode)->i_write_mutex);
228 |  	return ret;
229 |  }
230 |  
231 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c
232 | index b9b9aab..7667a5b 100644
233 | --- a/fs/ext4/super.c
234 | +++ b/fs/ext4/super.c
235 | @@ -904,6 +904,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
236 |  	atomic_set(&ei->i_ioend_count, 0);
237 |  	atomic_set(&ei->i_unwritten, 0);
238 |  	INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
239 | +	mutex_init(&ei->i_write_mutex);
240 |  
241 |  	return &ei->vfs_inode;
242 |  }
243 | @@ -5516,7 +5517,6 @@ static void ext4_exit_feat_adverts(void)
244 |  
245 |  /* Shared across all ext4 file systems */
246 |  wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
247 | -struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
248 |  
249 |  static int __init ext4_init_fs(void)
250 |  {
251 | @@ -5529,7 +5529,6 @@ static int __init ext4_init_fs(void)
252 |  	ext4_check_flag_values();
253 |  
254 |  	for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
255 | -		mutex_init(&ext4__aio_mutex[i]);
256 |  		init_waitqueue_head(&ext4__ioend_wq[i]);
257 |  	}
258 |  
259 | 


--------------------------------------------------------------------------------
/archive/jbd2-dont-write-non-commit-blocks-synchronously:
--------------------------------------------------------------------------------
 1 | jbd2: don't write non-commit blocks synchronously
 2 | 
 3 | We don't need to write the revoke blocks and descriptor blocks using
 4 | WRITE_SYNC, since when we issue the commit block, thos blocks will get
 5 | pushed out via REQ_FLUSH.  This will allow the journal blocks to be
 6 | written in fewer i/o operations (otherwise we end up issuing a whole
 7 | series of 4k writes unnecessarily).
 8 | 
 9 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
10 | ---
11 |  fs/jbd2/commit.c | 4 ++--
12 |  1 file changed, 2 insertions(+), 2 deletions(-)
13 | 
14 | diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
15 | index cf2fc05..fb64629 100644
16 | --- a/fs/jbd2/commit.c
17 | +++ b/fs/jbd2/commit.c
18 | @@ -554,7 +554,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
19 |  
20 |  	blk_start_plug(&plug);
21 |  	jbd2_journal_write_revoke_records(journal, commit_transaction,
22 | -					  &log_bufs, WRITE_SYNC);
23 | +					  &log_bufs, WRITE);
24 |  	blk_finish_plug(&plug);
25 |  
26 |  	jbd_debug(3, "JBD2: commit phase 2b\n");
27 | @@ -739,7 +739,7 @@ start_journal_io:
28 |  				clear_buffer_dirty(bh);
29 |  				set_buffer_uptodate(bh);
30 |  				bh->b_end_io = journal_end_buffer_io_sync;
31 | -				submit_bh(WRITE_SYNC, bh);
32 | +				submit_bh(WRITE, bh);
33 |  			}
34 |  			cond_resched();
35 |  			stats.run.rs_blocks_logged += bufs;
36 | 


--------------------------------------------------------------------------------
/cleaner:
--------------------------------------------------------------------------------
  1 | Introduce cleaner
  2 | 
  3 | From: Abutalib Aghayev <agayev@cs.cmu.edu>
  4 | 
  5 | An experimental cleaner.  Copy the live blocks from the transaction at the
  6 | tail in batches to the transaction at the head.  After a commit ends, check
  7 | if free space is below watermark and start cleaning until free space is
  8 | above high watermark.
  9 | 
 10 | Signed-off-by: Abutalib Aghayev <agayev@cs.cmu.edu>
 11 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
 12 | ---
 13 |  fs/jbd2/Makefile     |   2 +-
 14 |  fs/jbd2/checkpoint.c |   3 +
 15 |  fs/jbd2/cleaner.c    | 368 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 16 |  fs/jbd2/jmap.c       |  34 ++++++++
 17 |  fs/jbd2/jmap.h       |  77 +++++++++++++++++
 18 |  fs/jbd2/journal.c    |  23 +++++-
 19 |  include/linux/jbd2.h |   8 ++
 20 |  7 files changed, 512 insertions(+), 3 deletions(-)
 21 | 
 22 | diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile
 23 | index a54f50b3a06e..b6a2dddcc0a7 100644
 24 | --- a/fs/jbd2/Makefile
 25 | +++ b/fs/jbd2/Makefile
 26 | @@ -5,4 +5,4 @@
 27 |  obj-$(CONFIG_JBD2) += jbd2.o
 28 |  
 29 |  jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o \
 30 | -		jmap.o
 31 | +		jmap.o cleaner.o
 32 | diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
 33 | index c125d662777c..b2468698f566 100644
 34 | --- a/fs/jbd2/checkpoint.c
 35 | +++ b/fs/jbd2/checkpoint.c
 36 | @@ -386,6 +386,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 37 |  	tid_t		first_tid;
 38 |  	unsigned long	blocknr;
 39 |  
 40 | +	if (journal->j_flags & JBD2_LAZY)
 41 | +		return 0;
 42 | +
 43 |  	if (is_journal_aborted(journal))
 44 |  		return -EIO;
 45 |  
 46 | diff --git a/fs/jbd2/cleaner.c b/fs/jbd2/cleaner.c
 47 | new file mode 100644
 48 | index 000000000000..06ec11e1d2dd
 49 | --- /dev/null
 50 | +++ b/fs/jbd2/cleaner.c
 51 | @@ -0,0 +1,368 @@
 52 | +#include <linux/blk_types.h>
 53 | +#include <linux/jbd2.h>
 54 | +#include "jmap.h"
 55 | +#include <linux/list.h>
 56 | +#include <linux/blkdev.h>
 57 | +#include <linux/completion.h>
 58 | +#include <linux/delay.h>
 59 | +#include <trace/events/jbd2.h>
 60 | +
 61 | +static inline int jbd2_low_on_space(journal_t *journal)
 62 | +{
 63 | +	int x = atomic_read(&journal->j_cleaner_ctx->nr_txns_committed);
 64 | +	if (x > 10) {
 65 | +		trace_jbd2_jmap_printf1("low on space", x);
 66 | +		return true;
 67 | +	}
 68 | +	trace_jbd2_jmap_printf1("not low on space", x);
 69 | +	return false;
 70 | +}
 71 | +
 72 | +static inline int jbd2_high_on_space(journal_t *journal)
 73 | +{
 74 | +	if (atomic_read(&journal->j_cleaner_ctx->nr_txns_cleaned) < 2) {
 75 | +		trace_jbd2_jmap_printf("not enough cleaned");
 76 | +		return false;
 77 | +	}
 78 | +	trace_jbd2_jmap_printf("enough cleaned");
 79 | +	atomic_set(&journal->j_cleaner_ctx->nr_txns_cleaned, 0);
 80 | +	atomic_set(&journal->j_cleaner_ctx->nr_txns_committed, 0);
 81 | +	return true;
 82 | +}
 83 | +
 84 | +/*
 85 | + * Tries to move the tail forward (hence free space) as long as the transaction
 86 | + * at the tail has only stale blocks.  Returns true if manages to free a
 87 | + * transaction, false otherwise.
 88 | + */
 89 | +static bool jbd2_try_to_move_tail(journal_t *journal)
 90 | +{
 91 | +	struct transaction_infos *tis = journal->j_transaction_infos;
 92 | +	struct transaction_info *ti, *ti1;
 93 | +
 94 | +	/*
 95 | +	 * Advance the tail as far as possible by skipping over transactions
 96 | +	 * with no live blocks.
 97 | +	 */
 98 | +	write_lock(&journal->j_jmap_lock);
 99 | +	ti = ti1 = &tis->buf[tis->tail];
100 | +
101 | +	for ( ; list_empty(&ti->live_blks); ti = &tis->buf[tis->tail]) {
102 | +		trace_jbd2_jmap_printf2("cleaned a transaction",
103 | +					tis->tail, ti->tid);
104 | +		tis->tail = (tis->tail + 1) & (MAX_LIVE_TRANSACTIONS - 1);
105 | +		atomic_inc(&journal->j_cleaner_ctx->nr_txns_cleaned);
106 | +	}
107 | +	write_unlock(&journal->j_jmap_lock);
108 | +
109 | +	if (ti == ti1)
110 | +		return false;
111 | +	/*
112 | +	 * In the worst case, this will end up updating the journal superblock
113 | +	 * after cleaning up every transaction.  Should we avoid it?
114 | +	 */
115 | +	write_unlock(&journal->j_state_lock);
116 | +	jbd2_update_log_tail(journal, ti->tid, ti->offset);
117 | +	write_lock(&journal->j_state_lock);
118 | +
119 | +	return true;
120 | +}
121 | +
122 | +/*
123 | + * Finds the live blocks at the tail transaction and copies the corresponding
124 | + * mappings to |ctx->mappings|.  Returns the number of live block mappings
125 | + * copied.  Should be called with a read lock on |j_jmap_lock|.
126 | + */
127 | +static int find_live_blocks(struct cleaner_ctx *ctx)
128 | +{
129 | +	journal_t *journal = ctx->journal;
130 | +	struct transaction_infos *tis = journal->j_transaction_infos;
131 | +	struct transaction_info *ti = &tis->buf[tis->tail];
132 | +	struct jmap_entry *je = NULL;
133 | +	int i, nr_live = 0;
134 | +
135 | +	if (unlikely(list_empty(&ti->live_blks)))
136 | +		goto done;
137 | +
138 | +	spin_lock(&ctx->pos_lock);
139 | +	if (!ctx->pos)
140 | +		ctx->pos = list_first_entry(&ti->live_blks, typeof(*je), list);
141 | +	je = ctx->pos;
142 | +	spin_unlock(&ctx->pos_lock);
143 | +
144 | +	list_for_each_entry_from(je, &ti->live_blks, list) {
145 | +		if (je->revoked)
146 | +			continue;
147 | +		ctx->mappings[nr_live++] = je->mapping;
148 | +		if (nr_live == CLEANER_BATCH_SIZE)
149 | +			break;
150 | +	}
151 | +
152 | +done:
153 | +	trace_jbd2_jmap_printf1("found live blocks", nr_live);
154 | +	for (i = 0; i < nr_live; ++i)
155 | +		trace_jbd2_jmap_printf2("m",
156 | +					ctx->mappings[i].fsblk,
157 | +					ctx->mappings[i].logblk);
158 | +	return nr_live;
159 | +}
160 | +
161 | +static void live_block_read_end_io(struct buffer_head *bh, int uptodate)
162 | +{
163 | +	struct cleaner_ctx *ctx = bh->b_private;
164 | +
165 | +	if (uptodate) {
166 | +		set_buffer_uptodate(bh);
167 | +		if (atomic_dec_and_test(&ctx->nr_pending_reads))
168 | +			wake_up(&ctx->live_block_reads);
169 | +	} else {
170 | +		WARN_ON(1);
171 | +		clear_buffer_uptodate(bh);
172 | +	}
173 | +
174 | +	unlock_buffer(bh);
175 | +	put_bh(bh);
176 | +}
177 | +
178 | +/*
179 | + * Reads live blocks in |ctx->mappings| populated by find_live_blocks into
180 | + * buffer heads in |ctx->bhs|.  Returns true if at least one of the reads goes
181 | + * out to disk and false otherwise.  If this function returns true then the
182 | + * client should sleep on the condition variable |ctx->live_block_reads|.  The
183 | + * client will be woken up when all reads are complete, through the end_io
184 | + * handler attached to buffer heads read from disk.
185 | + */
186 | +static bool read_live_blocks(struct cleaner_ctx *ctx, int nr_live)
187 | +{
188 | +	journal_t *journal = ctx->journal;
189 | +	bool slow = false;
190 | +	struct blk_plug plug;
191 | +	bool plugged = false;
192 | +	int i, rc;
193 | +
194 | +	for (i = 0; i < nr_live; ++i) {
195 | +		ctx->bhs[i] = __getblk(journal->j_dev, ctx->mappings[i].fsblk,
196 | +				journal->j_blocksize);
197 | +		if (unlikely(!ctx->bhs[i])) {
198 | +			rc = -ENOMEM;
199 | +			goto out_err;
200 | +		}
201 | +		if (buffer_uptodate(ctx->bhs[i]))
202 | +			continue;
203 | +		if (!plugged) {
204 | +			plugged = true;
205 | +			blk_start_plug(&plug);
206 | +		}
207 | +		lock_buffer(ctx->bhs[i]);
208 | +		if (buffer_uptodate(ctx->bhs[i]))
209 | +			continue;
210 | +		ctx->bhs[i]->b_private = ctx;
211 | +		ctx->bhs[i]->b_end_io = live_block_read_end_io;
212 | +		get_bh(ctx->bhs[i]);
213 | +		rc = read_block_from_log(ctx->journal, ctx->bhs[i],
214 | +					 REQ_RAHEAD, ctx->mappings[i].logblk);
215 | +		if (unlikely(rc < 0))
216 | +			goto out_err;
217 | +		atomic_inc(&ctx->nr_pending_reads);
218 | +		if (rc) {
219 | +			slow = true;
220 | +			trace_jbd2_jmap_printf2("reading from disk",
221 | +						ctx->mappings[i].fsblk,
222 | +						ctx->mappings[i].logblk);
223 | +		} else {
224 | +			trace_jbd2_jmap_printf2("cached",
225 | +						ctx->mappings[i].fsblk,
226 | +						ctx->mappings[i].logblk);
227 | +		}
228 | +	}
229 | +	if (plugged)
230 | +		blk_finish_plug(&plug);
231 | +	return slow;
232 | +
233 | +out_err:
234 | +	if (plugged)
235 | +		blk_finish_plug(&plug);
236 | +	jbd2_journal_abort(ctx->journal, rc);
237 | +	return false;
238 | +}
239 | +
240 | +/*
241 | + * This function finds the live blocks that became stale between the call to
242 | + * find_live_blocks and now, and discards them.  It returns true if there are no
243 | + * more live blocks left at the tail transaction.
244 | + */
245 | +static bool discard_stale_blocks(struct cleaner_ctx *ctx, int nr_live)
246 | +{
247 | +	journal_t *journal = ctx->journal;
248 | +	struct transaction_infos *tis = journal->j_transaction_infos;
249 | +	struct transaction_info *ti = &tis->buf[tis->tail];
250 | +	struct jmap_entry *je = NULL;
251 | +	int i = 0, j = 0, next = 0;
252 | +
253 | +	trace_jbd2_jmap_printf(__func__);
254 | +	spin_lock(&ctx->pos_lock);
255 | +	BUG_ON(!ctx->pos);
256 | +	je = ctx->pos;
257 | +	list_for_each_entry_from(je, &ti->live_blks, list) {
258 | +		for (j = next; j < nr_live; ++j) {
259 | +			if (je->mapping.fsblk == ctx->mappings[j].fsblk) {
260 | +				next = j+1;
261 | +				ctx->pos = list_next_entry(je, list);
262 | +				if (je->revoked) {
263 | +					brelse(ctx->bhs[j]);
264 | +					ctx->bhs[j] = NULL;
265 | +					trace_jbd2_jmap_printf2(
266 | +						"revoked",
267 | +						ctx->mappings[i].fsblk,
268 | +						ctx->mappings[i].logblk);
269 | +				}
270 | +				break;
271 | +			} else {
272 | +				trace_jbd2_jmap_printf2(
273 | +						"moved to another list",
274 | +						ctx->mappings[i].fsblk,
275 | +						ctx->mappings[i].logblk);
276 | +				brelse(ctx->bhs[j]);
277 | +				ctx->bhs[j] = NULL;
278 | +			}
279 | +		}
280 | +		if (++i == nr_live || j == nr_live)
281 | +			break;
282 | +	}
283 | +	spin_unlock(&ctx->pos_lock);
284 | +
285 | +	/*
286 | +	 * We have exited the loop.  If we haven't processed all the entries in
287 | +	 * |ctx->mappings|, that is if (j < nr_live) at the exit, and we have
288 | +	 * not processed |nr_live| entries from the live blocks list at the
289 | +	 * tail, that is if (i < nr_live) at the exit, then the live blocks list
290 | +	 * has shrunk and the tail transaction has no live blocks left.
291 | +	 */
292 | +	return j < nr_live && i < nr_live;
293 | +}
294 | +
295 | +static void attach_live_blocks(struct cleaner_ctx *ctx, handle_t *handle,
296 | +			       int nr_live)
297 | +{
298 | +	int err, i;
299 | +
300 | +	trace_jbd2_jmap_printf(__func__);
301 | +	for (i = 0; i < nr_live; ++i) {
302 | +		if (!ctx->bhs[i])
303 | +			continue;
304 | +		trace_jbd2_jmap_printf2("attaching",
305 | +					ctx->mappings[i].fsblk,
306 | +					ctx->mappings[i].logblk);
307 | +		err = jbd2_journal_get_write_access(handle, ctx->bhs[i]);
308 | +		if (!err)
309 | +			err = jbd2_journal_dirty_metadata(handle, ctx->bhs[i]);
310 | +		if (err) {
311 | +			jbd2_journal_abort(ctx->journal, err);
312 | +			return;
313 | +		}
314 | +	}
315 | +}
316 | +
317 | +/*
318 | + * Read the live blocks from the tail transaction and attach them to the current
319 | + * transaction.
320 | + */
321 | +void jbd2_jmap_do_clean_batch(struct work_struct *work)
322 | +{
323 | +	struct cleaner_ctx *ctx = container_of(work, struct cleaner_ctx, work);
324 | +	journal_t *journal = ctx->journal;
325 | +	bool wake_up_commit_thread = true;
326 | +	handle_t *handle = NULL;
327 | +	int nr_live, err;
328 | +
329 | +	read_lock(&journal->j_jmap_lock);
330 | +	nr_live = find_live_blocks(ctx);
331 | +	read_unlock(&journal->j_jmap_lock);
332 | +
333 | +	if (nr_live < CLEANER_BATCH_SIZE)
334 | +		wake_up_commit_thread = false;
335 | +	if (nr_live == 0)
336 | +		goto done;
337 | +
338 | +	read_live_blocks(ctx, nr_live);
339 | +	wait_event(ctx->live_block_reads,
340 | +		   atomic_read(&ctx->nr_pending_reads) <= 0);
341 | +
342 | +	handle = jbd2_journal_start(journal, nr_live);
343 | +	if (IS_ERR(handle)) {
344 | +		jbd2_journal_abort(journal, PTR_ERR(handle));
345 | +		return;
346 | +	}
347 | +
348 | +	read_lock(&journal->j_jmap_lock);
349 | +	if (discard_stale_blocks(ctx, nr_live))
350 | +		wake_up_commit_thread = false;
351 | +	read_unlock(&journal->j_jmap_lock);
352 | +	/*
353 | +	 * I'm not sure why this function was under the jmap_lock
354 | +	 * previously, but it can't be, since it calls functions that
355 | +	 * can block due to memory allocation.  I don't think it needs
356 | +	 * to be protected, since it appears that ctx->mapping is only
357 | +	 * used by the cleaner code, and so it can't be run multiple
358 | +	 * times.  -- TYT
359 | +	 */
360 | +	attach_live_blocks(ctx, handle, nr_live);
361 | +
362 | +	err = jbd2_journal_stop(handle);
363 | +	if (err) {
364 | +		jbd2_journal_abort(journal, err);
365 | +		return;
366 | +	}
367 | +
368 | +done:
369 | +	atomic_set(&ctx->batch_in_progress, 0);
370 | +	atomic_inc(&ctx->nr_txns_cleaned);
371 | +	if (wake_up_commit_thread) {
372 | +		trace_jbd2_jmap_printf("waking up commit thread");
373 | +		wake_up(&journal->j_wait_commit);
374 | +	} else {
375 | +		trace_jbd2_jmap_printf("not waking up commit thread");
376 | +		spin_lock(&ctx->pos_lock);
377 | +		ctx->pos = NULL;
378 | +		spin_unlock(&ctx->pos_lock);
379 | +	}
380 | +	write_lock(&journal->j_state_lock);
381 | +	journal->j_flags &= ~JBD2_CLEANING;
382 | +	write_unlock(&journal->j_state_lock);
383 | +}
384 | +
385 | +/*
386 | + * Called by the commit thread to see if we need to do any cleaning
387 | + * work.
388 | + * Called with j_state_lock write locked.
389 | + */
390 | +void jbd2_check_cleaner(journal_t *journal)
391 | +{
392 | +	/*
393 | +	 * If there is cleaning going on in the workqueue, don't check
394 | +	 * until we're done.
395 | +	 */
396 | +	if (journal->j_flags & JBD2_CLEANING)
397 | +		return;
398 | +
399 | +	if (journal->j_flags & JBD2_STOP_CLEANING) {
400 | +	disengage_cleaner:
401 | +		journal->j_flags &= ~JBD2_CLEANER_ENGAGED;
402 | +		return;
403 | +	}
404 | +
405 | +	if (journal->j_flags & JBD2_CLEANER_ENGAGED) {
406 | +		if (jbd2_try_to_move_tail(journal) &&
407 | +		    jbd2_high_on_space(journal))
408 | +			goto disengage_cleaner;
409 | +	schedule_batch:
410 | +		journal->j_flags |= JBD2_CLEANING;
411 | +		schedule_work(&journal->j_cleaner_ctx->work);
412 | +		return;
413 | +	}
414 | +
415 | +	if (jbd2_low_on_space(journal)) {
416 | +		journal->j_flags |= JBD2_CLEANER_ENGAGED;
417 | +		goto schedule_batch;
418 | +	}
419 | +}
420 | diff --git a/fs/jbd2/jmap.c b/fs/jbd2/jmap.c
421 | index 7de6f4a0a1dc..0e759cc095f5 100644
422 | --- a/fs/jbd2/jmap.c
423 | +++ b/fs/jbd2/jmap.c
424 | @@ -91,8 +91,17 @@ static int process_existing_mappings(journal_t *journal,
425 |  		 * We are either deleting the entry because it was revoked, or
426 |  		 * we are moving it to the live blocks list of this transaction.
427 |  		 * In either case, we remove it from its existing list.
428 | +		 * However, before removing it we check to see if this is an
429 | +		 * entry in the live blocks list of the tail transaction a
430 | +		 * pointer to whom is cached by the cleaner and update the
431 | +		 * cached pointer if so.
432 |  		 */
433 | +		spin_lock(&journal->j_cleaner_ctx->pos_lock);
434 | +		if (je == journal->j_cleaner_ctx->pos) {
435 | +			journal->j_cleaner_ctx->pos = list_next_entry(je, list);
436 | +		}
437 |  		list_del(&je->list);
438 | +		spin_unlock(&journal->j_cleaner_ctx->pos_lock);
439 |  
440 |  		if (je->revoked) {
441 |  			rb_erase(&je->rb_node, &journal->j_jmap);
442 | @@ -216,6 +225,8 @@ void jbd2_finish_transaction_infos(journal_t *journal)
443 |  {
444 |  	struct transaction_infos *tis = journal->j_transaction_infos;
445 |  
446 | +	atomic_inc(&journal->j_cleaner_ctx->nr_txns_committed);
447 | +
448 |  	write_lock(&journal->j_jmap_lock);
449 |  	tis->head = (tis->head + 1) & (MAX_LIVE_TRANSACTIONS - 1);
450 |  	write_unlock(&journal->j_jmap_lock);
451 | @@ -243,6 +254,8 @@ int jbd2_transaction_infos_add(journal_t *journal, transaction_t *transaction,
452 |  	 */
453 |  	BUG_ON(!list_empty(&ti->live_blks));
454 |  
455 | +	atomic_inc(&journal->j_cleaner_ctx->nr_txns_committed);
456 | +
457 |  	write_lock(&journal->j_jmap_lock);
458 |  	nr_new = process_existing_mappings(journal, ti, t_idx, mappings,
459 |  					nr_mappings);
460 | @@ -489,11 +502,32 @@ int jbd2_smr_journal_init(journal_t *journal)
461 |  {
462 |  	journal->j_jmap = RB_ROOT;
463 |  	rwlock_init(&journal->j_jmap_lock);
464 | +	journal->j_cleaner_ctx = kzalloc(sizeof(struct cleaner_ctx),
465 | +					 GFP_KERNEL);
466 | +	if (!journal->j_cleaner_ctx)
467 | +		return -ENOMEM;
468 | +
469 | +	journal->j_cleaner_ctx->journal = journal;
470 | +	journal->j_cleaner_ctx->pos = NULL;
471 | +	spin_lock_init(&journal->j_cleaner_ctx->pos_lock);
472 | +	atomic_set(&journal->j_cleaner_ctx->cleaning, 0);
473 | +	atomic_set(&journal->j_cleaner_ctx->batch_in_progress, 0);
474 | +	atomic_set(&journal->j_cleaner_ctx->nr_pending_reads, 0);
475 | +	atomic_set(&journal->j_cleaner_ctx->nr_txns_committed, 0);
476 | +	atomic_set(&journal->j_cleaner_ctx->nr_txns_cleaned, 0);
477 | +	init_waitqueue_head(&journal->j_cleaner_ctx->live_block_reads);
478 | +	INIT_WORK(&journal->j_cleaner_ctx->work, jbd2_jmap_do_clean_batch);
479 |  	return jbd2_init_transaction_infos(journal);
480 |  }
481 |  
482 |  void jbd2_smr_journal_exit(journal_t *journal)
483 |  {
484 | +	if (journal->j_cleaner_ctx) {
485 | +		atomic_set(&journal->j_cleaner_ctx->cleaning, 0);
486 | +		flush_work(&journal->j_cleaner_ctx->work);
487 | +		kfree(journal->j_cleaner_ctx);
488 | +		journal->j_cleaner_ctx = NULL;
489 | +	}
490 |  	jbd2_free_transaction_infos(journal);
491 |  }
492 |  
493 | diff --git a/fs/jbd2/jmap.h b/fs/jbd2/jmap.h
494 | index 91564ce9bbda..a44f15152536 100644
495 | --- a/fs/jbd2/jmap.h
496 | +++ b/fs/jbd2/jmap.h
497 | @@ -125,4 +125,81 @@ extern void jbd2_jmap_cancel_revoke(journal_t *journal, sector_t fsblk);
498 |  extern int read_block_from_log(journal_t *journal, struct buffer_head *bh,
499 |  			       int op_flags, sector_t blk);
500 |  
501 | +extern void jbd2_jmap_do_clean_batch(struct work_struct *work);
502 | +
503 | +/*
504 | + * Cleaner stuff is below.
505 | + */
506 | +
507 | +/*
508 | + * Number of blocks to read at once, for cleaning.
509 | + */
510 | +#define CLEANER_BATCH_SIZE 16
511 | +
512 | +/*
513 | + * Context structure for the cleaner.
514 | + */
515 | +struct cleaner_ctx {
516 | +	/*
517 | +	 * We set to true once we drop below low watermark and it stays so until
518 | +	 * we rise above the high watermark.  It is accessed by the commit
519 | +	 * thread and the foreground kernel threads during the journal
520 | +	 * destruction, therefore it is atomic.
521 | +	 */
522 | +	atomic_t cleaning;
523 | +
524 | +	/*
525 | +	 * We clean in batches of blocks.  This flag indicates if we are
526 | +	 * currently cleaning a batch.  It is accessed by the commit thread and
527 | +	 * the cleaner thread, therefore it is atomic.
528 | +	 */
529 | +	atomic_t batch_in_progress;
530 | +
531 | +	/*
532 | +	 * We find live blocks to clean from the live blocks list of the
533 | +	 * transaction at the tail.  This list can be larger than our batch size
534 | +	 * and we may need several attempts to process it.  We cache the
535 | +	 * position of the next entry to start from in |pos|.  Since cleaner
536 | +	 * thread can run concurrently with the commit thread that can modify
537 | +	 * the live blocks list of the transaction at the tail (for example, if
538 | +	 * it needs to drop a revoked entry or if |pos| points to an entry that
539 | +	 * has been updated and should move from the live blocks list of the
540 | +	 * transaction at the tail to the live blocks list of current
541 | +	 * transaction) we protect |pos| with |pos_lock|.
542 | +	 */
543 | +	struct jmap_entry *pos;
544 | +	spinlock_t pos_lock;
545 | +
546 | +	/*
547 | +	 * Live block mappings for the blocks that we copy in a batch.
548 | +	 */
549 | +	struct blk_mapping mappings[CLEANER_BATCH_SIZE];
550 | +
551 | +	/*
552 | +	 * Buffer heads for the live blocks read in a batch.
553 | +	 */
554 | +	struct buffer_head *bhs[CLEANER_BATCH_SIZE];
555 | +
556 | +	/*
557 | +	 * Number of pending reads in a batch.  Every submitted read increments
558 | +	 * it and every completed read decrements it.
559 | +	 */
560 | +	atomic_t nr_pending_reads;
561 | +
562 | +	/*
563 | +	 * The cleaner thread sleeps on this wait queue until the last
564 | +	 * completed read wakes the up the cleaner thread.
565 | +	 */
566 | +	wait_queue_head_t live_block_reads;
567 | +
568 | +	/* TODO: temporary for debugging, remove once done. */
569 | +	atomic_t nr_txns_committed;
570 | +	atomic_t nr_txns_cleaned;
571 | +
572 | +	journal_t *journal;
573 | +	struct work_struct work;
574 | +};
575 | +
576 | +void jbd2_check_cleaner(journal_t *journal);
577 | +
578 |  #endif
579 | diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
580 | index 114c7636d706..5fdcaff927cf 100644
581 | --- a/fs/jbd2/journal.c
582 | +++ b/fs/jbd2/journal.c
583 | @@ -230,10 +230,16 @@ static int kjournald2(void *arg)
584 |  		del_timer_sync(&journal->j_commit_timer);
585 |  		jbd2_journal_commit_transaction(journal);
586 |  		write_lock(&journal->j_state_lock);
587 | -		goto loop;
588 |  	}
589 |  
590 |  	wake_up(&journal->j_wait_done_commit);
591 | +
592 | +	if (journal->j_flags & JBD2_LAZY)
593 | +		jbd2_check_cleaner(journal);
594 | +
595 | +	if (journal->j_commit_sequence != journal->j_commit_request)
596 | +		goto loop;
597 | +
598 |  	if (freezing(current)) {
599 |  		/*
600 |  		 * The simpler the better. Flushing journal isn't a
601 | @@ -262,6 +268,9 @@ static int kjournald2(void *arg)
602 |  			should_sleep = 0;
603 |  		if (journal->j_flags & JBD2_UNMOUNT)
604 |  			should_sleep = 0;
605 | +		if ((journal->j_flags & JBD2_CLEANER_ENGAGED) &&
606 | +		    !(journal->j_flags & JBD2_CLEANING))
607 | +			should_sleep = 0;
608 |  		if (should_sleep) {
609 |  			write_unlock(&journal->j_state_lock);
610 |  			schedule();
611 | @@ -307,14 +316,24 @@ static int jbd2_journal_start_thread(journal_t *journal)
612 |  static void journal_kill_thread(journal_t *journal)
613 |  {
614 |  	write_lock(&journal->j_state_lock);
615 | -	journal->j_flags |= JBD2_UNMOUNT;
616 |  
617 | +	journal->j_flags |= JBD2_STOP_CLEANING;
618 | +	while (journal->j_flags & JBD2_CLEANING) {
619 | +		write_unlock(&journal->j_state_lock);
620 | +		wake_up(&journal->j_wait_commit);
621 | +		wait_event(journal->j_wait_done_commit,
622 | +			   (journal->j_flags & JBD2_CLEANING) == 0);
623 | +		write_lock(&journal->j_state_lock);
624 | +	}
625 | +
626 | +	journal->j_flags |= JBD2_UNMOUNT;
627 |  	while (journal->j_task) {
628 |  		write_unlock(&journal->j_state_lock);
629 |  		wake_up(&journal->j_wait_commit);
630 |  		wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
631 |  		write_lock(&journal->j_state_lock);
632 |  	}
633 | +
634 |  	write_unlock(&journal->j_state_lock);
635 |  }
636 |  
637 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
638 | index a53c7d333199..bb994983cdba 100644
639 | --- a/include/linux/jbd2.h
640 | +++ b/include/linux/jbd2.h
641 | @@ -786,6 +786,11 @@ struct journal_s
642 |  	 */
643 |  	rwlock_t		j_jmap_lock;
644 |  
645 | +	/**
646 | +	 * @j_cleaner_ctx: Cleaner state
647 | +	 */
648 | +	struct cleaner_ctx	*j_cleaner_ctx;
649 | +
650 |  	/**
651 |  	 * @j_format_version: Version of the superblock format.
652 |  	 */
653 | @@ -1254,6 +1259,9 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3,		CSUM_V3)
654 |  #define JBD2_REC_ERR	0x080	/* The errno in the sb has been recorded */
655 |  #define JBD2_NO_CLEANUP	0x100	/* Don't flush empty the journal on shutdown  */
656 |  #define JBD2_LAZY	0x200	/* Do lazy journalling  */
657 | +#define JBD2_CLEANING	0x400	/* Lazy journalling cleaning in progress */
658 | +#define JBD2_CLEANER_ENGAGED	0x400	/* Cleaner has been engaged */
659 | +#define JBD2_STOP_CLEANING 0x800 /* Request the cleaning thread to stop */
660 |  
661 |  /*
662 |   * Function declarations for the journaling transaction and buffer
663 | 


--------------------------------------------------------------------------------
/disable-writeback:
--------------------------------------------------------------------------------
 1 | Disable writeback
 2 | 
 3 | From: Abutalib Aghayev <agayev@cs.cmu.edu>
 4 | 
 5 | Now that we have a working cleaner, disable writeback of metadata blocks.
 6 | 
 7 | Signed-off-by: Abutalib Aghayev <agayev@cs.cmu.edu>
 8 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
 9 | ---
10 |  fs/jbd2/transaction.c        | 5 ++++-
11 |  include/linux/journal-head.h | 5 +++++
12 |  2 files changed, 9 insertions(+), 1 deletion(-)
13 | 
14 | diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
15 | index 5e659ee08d6a..1bd1a1530fbc 100644
16 | --- a/fs/jbd2/transaction.c
17 | +++ b/fs/jbd2/transaction.c
18 | @@ -894,6 +894,8 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
19 |  	    jh->b_next_transaction == transaction)
20 |  		goto done;
21 |  
22 | +	jh->b_jflags = journal->j_flags;
23 | +
24 |  	/*
25 |  	 * this is the first time this transaction is touching this buffer,
26 |  	 * reset the modified flag
27 | @@ -1863,7 +1865,8 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
28 |  
29 |  	__blist_del_buffer(list, jh);
30 |  	jh->b_jlist = BJ_None;
31 | -	if (transaction && is_journal_aborted(transaction->t_journal))
32 | +	if ((transaction && is_journal_aborted(transaction->t_journal)) ||
33 | +	    (jh->b_jflags & JBD2_LAZY))
34 |  		clear_buffer_jbddirty(bh);
35 |  	else if (test_clear_buffer_jbddirty(bh))
36 |  		mark_buffer_dirty(bh);	/* Expose it to the VM */
37 | diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h
38 | index 98cd41bb39c8..d4cce2bab7ff 100644
39 | --- a/include/linux/journal-head.h
40 | +++ b/include/linux/journal-head.h
41 | @@ -58,6 +58,11 @@ struct journal_head {
42 |  	char *b_committed_data;
43 |  
44 |  	/*
45 | +	 * Copy of journal->j_flags
46 | +	 */
47 | +	unsigned b_jflags;
48 | +
49 | +	/*
50 |  	 * Pointer to the compound transaction which owns this buffer's
51 |  	 * metadata: either the running transaction or the committing
52 |  	 * transaction (if there is one).  Only applies to buffers on a
53 | 


--------------------------------------------------------------------------------
/jbd2-dont-double-bump-transaction-number:
--------------------------------------------------------------------------------
 1 | jbd2: don't skip a transaction number when recovering journal
 2 | 
 3 | In the lazy journalling patches we retain the journal, so skipping a
 4 | transaction after the replay is problematic.
 5 | 
 6 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
 7 | ---
 8 |  fs/jbd2/recovery.c | 2 +-
 9 |  1 file changed, 1 insertion(+), 1 deletion(-)
10 | 
11 | diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
12 | index 02dd3360cb20..da100044566c 100644
13 | --- a/fs/jbd2/recovery.c
14 | +++ b/fs/jbd2/recovery.c
15 | @@ -281,7 +281,7 @@ int jbd2_journal_recover(journal_t *journal)
16 |  
17 |  	/* Restart the log at the next transaction ID, thus invalidating
18 |  	 * any existing commit records in the log. */
19 | -	journal->j_transaction_sequence = ++info.end_transaction;
20 | +	journal->j_transaction_sequence = info.end_transaction;
21 |  
22 |  	jbd2_journal_clear_revoke(journal);
23 |  	err2 = sync_blockdev(journal->j_fs_dev);
24 | 


--------------------------------------------------------------------------------
/journal-superblock-changes:
--------------------------------------------------------------------------------
  1 | ext4: journal superblock changes
  2 | 
  3 | There are a number of changes to the ext4 superblock during the mount
  4 | process which are done without using the journal, but instead via the
  5 | brute-force call to ext4_commit_super().  Concentrate these changes to
  6 | ext4_setup_super(), and make them using the journalling mechanism.
  7 | 
  8 | Not only is this more efficient, but it also avoids some cases where
  9 | the ext4 superblock's checksum was not properly set.
 10 | 
 11 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
 12 | ---
 13 |  fs/ext4/super.c | 50 ++++++++++++++++++++++++++++----------------------
 14 |  1 file changed, 28 insertions(+), 22 deletions(-)
 15 | 
 16 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c
 17 | index 680526e9ee96..ae86983cbf60 100644
 18 | --- a/fs/ext4/super.c
 19 | +++ b/fs/ext4/super.c
 20 | @@ -2148,9 +2148,10 @@ int ext4_seq_options_show(struct seq_file *seq, void *offset)
 21 |  }
 22 |  
 23 |  static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 24 | -			    int read_only)
 25 | +			    unsigned long journal_devnum, int read_only)
 26 |  {
 27 |  	struct ext4_sb_info *sbi = EXT4_SB(sb);
 28 | +	handle_t *handle;
 29 |  	int err = 0;
 30 |  
 31 |  	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
 32 | @@ -2158,7 +2159,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 33 |  			 "forcing read-only mode");
 34 |  		err = -EROFS;
 35 |  	}
 36 | -	if (read_only)
 37 | +	if (read_only || err)
 38 |  		goto done;
 39 |  	if (!(sbi->s_mount_state & EXT4_VALID_FS))
 40 |  		ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
 41 | @@ -2179,6 +2180,15 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 42 |  		ext4_msg(sb, KERN_WARNING,
 43 |  			 "warning: checktime reached, "
 44 |  			 "running e2fsck is recommended");
 45 | +	handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
 46 | +	if (IS_ERR(handle))
 47 | +		return PTR_ERR(handle);
 48 | +	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
 49 | +	if (err) {
 50 | +	stop_journal:
 51 | +		ext4_journal_stop(handle);
 52 | +		return err;
 53 | +	}
 54 |  	if (!sbi->s_journal)
 55 |  		es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
 56 |  	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
 57 | @@ -2188,7 +2198,17 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 58 |  	ext4_update_dynamic_rev(sb);
 59 |  	if (sbi->s_journal)
 60 |  		ext4_set_feature_journal_needs_recovery(sb);
 61 | -
 62 | +	if (journal_devnum)
 63 | +		es->s_journal_dev = cpu_to_le32(journal_devnum);
 64 | +	if (DUMMY_ENCRYPTION_ENABLED(sbi))
 65 | +		ext4_set_feature_encrypt(sb);
 66 | +	err = ext4_handle_dirty_super(handle, sb);
 67 | +	if (err)
 68 | +		goto stop_journal;
 69 | +	err = ext4_journal_stop(handle);
 70 | +	if (err)
 71 | +		return err;
 72 | +	ext4_journal_force_commit(sbi->s_journal);
 73 |  	err = ext4_commit_super(sb, 1);
 74 |  done:
 75 |  	if (test_opt(sb, DEBUG))
 76 | @@ -4229,8 +4249,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 77 |  
 78 |  	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
 79 |  
 80 | -	sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
 81 | -
 82 |  no_journal:
 83 |  	if (!test_opt(sb, NO_MBCACHE)) {
 84 |  		sbi->s_ea_block_cache = ext4_xattr_create_cache();
 85 | @@ -4257,12 +4275,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 86 |  		goto failed_mount_wq;
 87 |  	}
 88 |  
 89 | -	if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
 90 | -	    !ext4_has_feature_encrypt(sb)) {
 91 | -		ext4_set_feature_encrypt(sb);
 92 | -		ext4_commit_super(sb, 1);
 93 | -	}
 94 | -
 95 |  	/*
 96 |  	 * Get the # of file system overhead blocks from the
 97 |  	 * superblock if present.
 98 | @@ -4311,7 +4323,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 99 |  		goto failed_mount4;
100 |  	}
101 |  
102 | -	ret = ext4_setup_super(sb, es, sb_rdonly(sb));
103 | +	ret = ext4_setup_super(sb, es, journal_devnum, sb_rdonly(sb));
104 |  	if (ret == -EROFS) {
105 |  		sb->s_flags |= SB_RDONLY;
106 |  		ret = 0;
107 | @@ -4410,6 +4422,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
108 |  	}
109 |  #endif  /* CONFIG_QUOTA */
110 |  
111 | +	if (sbi->s_journal)
112 | +		sbi->s_journal->j_commit_callback =
113 | +			ext4_journal_commit_callback;
114 |  	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
115 |  	ext4_orphan_cleanup(sb, es);
116 |  	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
117 | @@ -4780,15 +4795,6 @@ static int ext4_load_journal(struct super_block *sb,
118 |  
119 |  	EXT4_SB(sb)->s_journal = journal;
120 |  	ext4_clear_journal_err(sb, es);
121 | -
122 | -	if (!really_read_only && journal_devnum &&
123 | -	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
124 | -		es->s_journal_dev = cpu_to_le32(journal_devnum);
125 | -
126 | -		/* Make sure we flush the recovery flag to disk. */
127 | -		ext4_commit_super(sb, 1);
128 | -	}
129 | -
130 |  	return 0;
131 |  }
132 |  
133 | @@ -5263,7 +5269,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
134 |  				ext4_clear_journal_err(sb, es);
135 |  			sbi->s_mount_state = le16_to_cpu(es->s_state);
136 |  
137 | -			err = ext4_setup_super(sb, es, 0);
138 | +			err = ext4_setup_super(sb, es, 0, 0);
139 |  			if (err)
140 |  				goto restore_opts;
141 |  
142 | 


--------------------------------------------------------------------------------
/load-jmap-from-journal:
--------------------------------------------------------------------------------
  1 | jbd2: load jmap from journal
  2 | 
  3 | If the lazy journal feature is enabled, instead of replaying the
  4 | journal, read the journal into journal map.
  5 | 
  6 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
  7 | ---
  8 |  fs/jbd2/journal.c  |  27 +++++++++---------------
  9 |  fs/jbd2/recovery.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
 10 |  2 files changed, 90 insertions(+), 42 deletions(-)
 11 | 
 12 | diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
 13 | index 9c097ddfe63f..8060ab4805eb 100644
 14 | --- a/fs/jbd2/journal.c
 15 | +++ b/fs/jbd2/journal.c
 16 | @@ -1276,31 +1276,24 @@ static void journal_fail_superblock (journal_t *journal)
 17 |  
 18 |  /*
 19 |   * Given a journal_t structure, initialise the various fields for
 20 | - * startup of a new journaling session.  We use this both when creating
 21 | - * a journal, and after recovering an old journal to reset it for
 22 | - * subsequent use.
 23 | + * startup of a new journaling session.
 24 |   */
 25 | -
 26 |  static int journal_reset(journal_t *journal)
 27 |  {
 28 |  	journal_superblock_t *sb = journal->j_superblock;
 29 | -	unsigned long long first, last;
 30 | +	int free;
 31 |  
 32 | -	first = be32_to_cpu(sb->s_first);
 33 | -	last = be32_to_cpu(sb->s_maxlen);
 34 | -	if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
 35 | -		printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
 36 | -		       first, last);
 37 | +	if (journal->j_first + JBD2_MIN_JOURNAL_BLOCKS > journal->j_last + 1) {
 38 | +		printk(KERN_ERR "JBD2: Journal too short (blocks %lu-%lu).\n",
 39 | +		       journal->j_first, journal->j_last);
 40 |  		journal_fail_superblock(journal);
 41 |  		return -EINVAL;
 42 |  	}
 43 |  
 44 | -	journal->j_first = first;
 45 | -	journal->j_last = last;
 46 | -
 47 | -	journal->j_head = first;
 48 | -	journal->j_tail = first;
 49 | -	journal->j_free = last - first;
 50 | +	free = journal->j_tail - journal->j_head;
 51 | +	if (free <= 0)
 52 | +		free += journal->j_last - journal->j_first;
 53 | +	journal->j_free = free;
 54 |  
 55 |  	journal->j_tail_sequence = journal->j_transaction_sequence;
 56 |  	journal->j_commit_sequence = journal->j_transaction_sequence - 1;
 57 | @@ -1320,7 +1313,7 @@ static int journal_reset(journal_t *journal)
 58 |  			journal->j_tail, journal->j_tail_sequence,
 59 |  			journal->j_errno);
 60 |  		journal->j_flags |= JBD2_FLUSHED;
 61 | -	} else {
 62 | +	} else if ((journal->j_flags & JBD2_LAZY) == 0) {
 63 |  		/* Lock here to make assertions happy... */
 64 |  		mutex_lock_io(&journal->j_checkpoint_mutex);
 65 |  		/*
 66 | diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
 67 | index da100044566c..7a74ea1860a9 100644
 68 | --- a/fs/jbd2/recovery.c
 69 | +++ b/fs/jbd2/recovery.c
 70 | @@ -22,6 +22,7 @@
 71 |  #include <linux/errno.h>
 72 |  #include <linux/crc32.h>
 73 |  #include <linux/blkdev.h>
 74 | +#include "jmap.h"
 75 |  #endif
 76 |  
 77 |  /*
 78 | @@ -32,17 +33,18 @@ struct recovery_info
 79 |  {
 80 |  	tid_t		start_transaction;
 81 |  	tid_t		end_transaction;
 82 | +	int		head_block;
 83 |  
 84 |  	int		nr_replays;
 85 |  	int		nr_revokes;
 86 |  	int		nr_revoke_hits;
 87 |  };
 88 |  
 89 | -enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
 90 | +enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY, PASS_JMAP};
 91 |  static int do_one_pass(journal_t *journal,
 92 |  				struct recovery_info *info, enum passtype pass);
 93 | -static int scan_revoke_records(journal_t *, struct buffer_head *,
 94 | -				tid_t, struct recovery_info *);
 95 | +static int scan_revoke_records(journal_t *, struct buffer_head *, enum passtype,
 96 | +			       tid_t, struct recovery_info *);
 97 |  
 98 |  #ifdef __KERNEL__
 99 |  
100 | @@ -255,11 +257,16 @@ int jbd2_journal_recover(journal_t *journal)
101 |  	sb = journal->j_superblock;
102 |  
103 |  	/*
104 | +	 * Initialize journal's head and tail assuming the recovery
105 | +	 * was successful and we're not doing lazy journalling.
106 | +	 */
107 | +	journal->j_head = journal->j_tail = journal->j_first;
108 | +
109 | +	/*
110 |  	 * The journal superblock's s_start field (the current log head)
111 |  	 * is always zero if, and only if, the journal was cleanly
112 |  	 * unmounted.
113 |  	 */
114 | -
115 |  	if (!sb->s_start) {
116 |  		jbd_debug(1, "No recovery required, last transaction %d\n",
117 |  			  be32_to_cpu(sb->s_sequence));
118 | @@ -267,11 +274,15 @@ int jbd2_journal_recover(journal_t *journal)
119 |  		return 0;
120 |  	}
121 |  
122 | -	err = do_one_pass(journal, &info, PASS_SCAN);
123 | -	if (!err)
124 | -		err = do_one_pass(journal, &info, PASS_REVOKE);
125 | -	if (!err)
126 | -		err = do_one_pass(journal, &info, PASS_REPLAY);
127 | +	if (journal->j_flags & JBD2_LAZY)
128 | +		err = do_one_pass(journal, &info, PASS_JMAP);
129 | +	else {
130 | +		err = do_one_pass(journal, &info, PASS_SCAN);
131 | +		if (!err)
132 | +			err = do_one_pass(journal, &info, PASS_REVOKE);
133 | +		if (!err)
134 | +			err = do_one_pass(journal, &info, PASS_REPLAY);
135 | +	}
136 |  
137 |  	jbd_debug(1, "JBD2: recovery, exit status %d, "
138 |  		  "recovered transactions %u to %u\n",
139 | @@ -279,10 +290,22 @@ int jbd2_journal_recover(journal_t *journal)
140 |  	jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
141 |  		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
142 |  
143 | -	/* Restart the log at the next transaction ID, thus invalidating
144 | -	 * any existing commit records in the log. */
145 | +	/* Restart the log at the next transaction ID */
146 |  	journal->j_transaction_sequence = info.end_transaction;
147 |  
148 | +	/*
149 | +	 * In lazy journalling mode, we need to preserve the existing
150 | +	 * contents of the journal, so set j_head and j_tail
151 | +	 * accordingly.
152 | +	 */
153 | +	if (journal->j_flags & JBD2_LAZY) {
154 | +		if (err)
155 | +			return err;
156 | +		journal->j_head = info.head_block;
157 | +		journal->j_tail = be32_to_cpu(sb->s_start);
158 | +		return 0;
159 | +	}
160 | +
161 |  	jbd2_journal_clear_revoke(journal);
162 |  	err2 = sync_blockdev(journal->j_fs_dev);
163 |  	if (!err)
164 | @@ -431,6 +454,7 @@ static int do_one_pass(journal_t *journal,
165 |  	__u32			crc32_sum = ~0; /* Transactional Checksums */
166 |  	int			descr_csum_size = 0;
167 |  	int			block_error = 0;
168 | +	int			new_txn = 1;
169 |  
170 |  	/*
171 |  	 * First thing is to establish what we expect to find in the log
172 | @@ -443,7 +467,7 @@ static int do_one_pass(journal_t *journal,
173 |  	next_log_block = be32_to_cpu(sb->s_start);
174 |  
175 |  	first_commit_ID = next_commit_ID;
176 | -	if (pass == PASS_SCAN)
177 | +	if (pass == PASS_SCAN || pass == PASS_JMAP)
178 |  		info->start_transaction = first_commit_ID;
179 |  
180 |  	jbd_debug(1, "Starting recovery pass %d\n", pass);
181 | @@ -468,7 +492,7 @@ static int do_one_pass(journal_t *journal,
182 |  		 * check right now that we haven't gone past the end of
183 |  		 * the log. */
184 |  
185 | -		if (pass != PASS_SCAN)
186 | +		if (pass != PASS_SCAN && pass != PASS_JMAP)
187 |  			if (tid_geq(next_commit_ID, info->end_transaction))
188 |  				break;
189 |  
190 | @@ -484,9 +508,6 @@ static int do_one_pass(journal_t *journal,
191 |  		if (err)
192 |  			goto failed;
193 |  
194 | -		next_log_block++;
195 | -		wrap(journal, next_log_block);
196 | -
197 |  		/* What kind of buffer is it?
198 |  		 *
199 |  		 * If it is a descriptor block, check that it has the
200 | @@ -510,6 +531,14 @@ static int do_one_pass(journal_t *journal,
201 |  			break;
202 |  		}
203 |  
204 | +		if ((pass == PASS_JMAP) && new_txn) {
205 | +			jbd2_add_new_transaction_infos(journal, sequence, next_log_block);
206 | +			new_txn = 0;
207 | +		}
208 | +
209 | +		next_log_block++;
210 | +		wrap(journal, next_log_block);
211 | +
212 |  		/* OK, we have a valid descriptor block which matches
213 |  		 * all of the sequence number checks.  What are we going
214 |  		 * to do with it?  That depends on the pass... */
215 | @@ -535,7 +564,7 @@ static int do_one_pass(journal_t *journal,
216 |  			 * in pass REPLAY; if journal_checksums enabled, then
217 |  			 * calculate checksums in PASS_SCAN, otherwise,
218 |  			 * just skip over the blocks it describes. */
219 | -			if (pass != PASS_REPLAY) {
220 | +			if ((pass != PASS_REPLAY) && (pass != PASS_JMAP)) {
221 |  				if (pass == PASS_SCAN &&
222 |  				    jbd2_has_feature_checksum(journal) &&
223 |  				    !info->end_transaction) {
224 | @@ -562,12 +591,28 @@ static int do_one_pass(journal_t *journal,
225 |  			while ((tagp - bh->b_data + tag_bytes)
226 |  			       <= journal->j_blocksize - descr_csum_size) {
227 |  				unsigned long io_block;
228 | +				unsigned long long log_block;
229 |  
230 |  				tag = (journal_block_tag_t *) tagp;
231 |  				flags = be16_to_cpu(tag->t_flags);
232 |  
233 |  				io_block = next_log_block++;
234 |  				wrap(journal, next_log_block);
235 | +				if (pass == PASS_JMAP) {
236 | +					struct blk_mapping map;
237 | +
238 | +					err = jbd2_journal_bmap(journal,
239 | +								io_block,
240 | +								&log_block);
241 | +					if (err)
242 | +						goto failed;
243 | +					map.fsblk = read_tag_block(journal, tag);
244 | +					map.logblk = log_block;
245 | +					err = jbd2_add_mapping(journal, &map);
246 | +					if (err)
247 | +						goto failed;
248 | +					goto skip_write;
249 | +				}
250 |  				err = jread(&obh, journal, io_block);
251 |  				if (err) {
252 |  					/* Recover what we can, but
253 | @@ -753,6 +798,10 @@ static int do_one_pass(journal_t *journal,
254 |  					break;
255 |  				}
256 |  			}
257 | +			if (pass == PASS_JMAP) {
258 | +				jbd2_finish_transaction_infos(journal);
259 | +				new_txn = 1;
260 | +			}
261 |  			brelse(bh);
262 |  			next_commit_ID++;
263 |  			continue;
264 | @@ -760,12 +809,12 @@ static int do_one_pass(journal_t *journal,
265 |  		case JBD2_REVOKE_BLOCK:
266 |  			/* If we aren't in the REVOKE pass, then we can
267 |  			 * just skip over this block. */
268 | -			if (pass != PASS_REVOKE) {
269 | +			if (pass != PASS_REVOKE && pass != PASS_JMAP) {
270 |  				brelse(bh);
271 |  				continue;
272 |  			}
273 |  
274 | -			err = scan_revoke_records(journal, bh,
275 | +			err = scan_revoke_records(journal, bh, pass,
276 |  						  next_commit_ID, info);
277 |  			brelse(bh);
278 |  			if (err)
279 | @@ -788,9 +837,10 @@ static int do_one_pass(journal_t *journal,
280 |  	 * transaction marks the end of the valid log.
281 |  	 */
282 |  
283 | -	if (pass == PASS_SCAN) {
284 | +	if (pass == PASS_SCAN || pass == PASS_JMAP) {
285 |  		if (!info->end_transaction)
286 |  			info->end_transaction = next_commit_ID;
287 | +		info->head_block = next_log_block;
288 |  	} else {
289 |  		/* It's really bad news if different passes end up at
290 |  		 * different places (but possible due to IO errors). */
291 | @@ -813,7 +863,8 @@ static int do_one_pass(journal_t *journal,
292 |  /* Scan a revoke record, marking all blocks mentioned as revoked. */
293 |  
294 |  static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
295 | -			       tid_t sequence, struct recovery_info *info)
296 | +			       enum passtype pass, tid_t sequence,
297 | +			       struct recovery_info *info)
298 |  {
299 |  	jbd2_journal_revoke_header_t *header;
300 |  	int offset, max;
301 | @@ -839,16 +890,20 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
302 |  
303 |  	while (offset + record_len <= max) {
304 |  		unsigned long long blocknr;
305 | -		int err;
306 |  
307 |  		if (record_len == 4)
308 |  			blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
309 |  		else
310 |  			blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
311 |  		offset += record_len;
312 | -		err = jbd2_journal_set_revoke(journal, blocknr, sequence);
313 | -		if (err)
314 | -			return err;
315 | +		if (pass == PASS_JMAP)
316 | +			jbd2_jmap_revoke(journal, blocknr);
317 | +		else {
318 | +			int err = jbd2_journal_set_revoke(journal, blocknr,
319 | +							  sequence);
320 | +			if (err)
321 | +				return err;
322 | +		}
323 |  		++info->nr_revokes;
324 |  	}
325 |  	return 0;
326 | 


--------------------------------------------------------------------------------
/old-patches/add-blkdiscard-ioctl:
--------------------------------------------------------------------------------
  1 | ext4: add BLKDISCARD ioctl
  2 | 
  3 | The blkdicard ioctl previously only worked on block devices.  Allow
  4 | this ioctl to work on ext4 files.
  5 | 
  6 | Google-Bug-Id: 11517631
  7 | 
  8 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
  9 | ---
 10 |  fs/ext4/ext4.h    |   5 ++++
 11 |  fs/ext4/extents.c |  38 +++++++++++++++++++------
 12 |  fs/ext4/ioctl.c   | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 13 |  3 files changed, 172 insertions(+), 9 deletions(-)
 14 | 
 15 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
 16 | index 122cc74..68d88c7 100644
 17 | --- a/fs/ext4/ext4.h
 18 | +++ b/fs/ext4/ext4.h
 19 | @@ -3208,6 +3208,8 @@ extern int ext4_check_blockref(const char *, unsigned int,
 20 |  /* extents.c */
 21 |  struct ext4_ext_path;
 22 |  struct ext4_extent;
 23 | +typedef int (*extent_iterator_t)(struct inode *inode, struct extent_status *es,
 24 | +				 unsigned int flags, void *private);
 25 |  
 26 |  /*
 27 |   * Maximum number of logical blocks in a file; ext4_extent's ee_block is
 28 | @@ -3252,6 +3254,9 @@ extern int ext4_find_delalloc_range(struct inode *inode,
 29 |  				    ext4_lblk_t lblk_end);
 30 |  extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
 31 |  extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
 32 | +extern int ext4_extent_iterator(struct inode *inode,
 33 | +				ext4_lblk_t block, ext4_lblk_t num,
 34 | +				extent_iterator_t callback, void *private);
 35 |  extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 36 |  			__u64 start, __u64 len);
 37 |  extern int ext4_ext_precache(struct inode *inode);
 38 | diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
 39 | index 572fb4346..c32f0c1 100644
 40 | --- a/fs/ext4/extents.c
 41 | +++ b/fs/ext4/extents.c
 42 | @@ -2150,9 +2150,13 @@ cleanup:
 43 |  	return err;
 44 |  }
 45 |  
 46 | -static int ext4_fill_fiemap_extents(struct inode *inode,
 47 | -				    ext4_lblk_t block, ext4_lblk_t num,
 48 | -				    struct fiemap_extent_info *fieinfo)
 49 | +
 50 | +typedef int (*extent_iterator_t)(struct inode *inode, struct extent_status *es,
 51 | +				 unsigned int flags, void *private);
 52 | +
 53 | +int ext4_extent_iterator(struct inode *inode,
 54 | +			 ext4_lblk_t block, ext4_lblk_t num,
 55 | +			 extent_iterator_t callback, void *private)
 56 |  {
 57 |  	struct ext4_ext_path *path = NULL;
 58 |  	struct ext4_extent *ex;
 59 | @@ -2161,7 +2165,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
 60 |  	ext4_lblk_t last = block + num;
 61 |  	int exists, depth = 0, err = 0;
 62 |  	unsigned int flags = 0;
 63 | -	unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
 64 |  
 65 |  	while (block < last && block != EXT_MAX_BLOCKS) {
 66 |  		num = last - block;
 67 | @@ -2278,11 +2281,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
 68 |  		}
 69 |  
 70 |  		if (exists) {
 71 | -			err = fiemap_fill_next_extent(fieinfo,
 72 | -				(__u64)es.es_lblk << blksize_bits,
 73 | -				(__u64)es.es_pblk << blksize_bits,
 74 | -				(__u64)es.es_len << blksize_bits,
 75 | -				flags);
 76 | +			err = callback(inode, &es, flags, private);
 77 |  			if (err < 0)
 78 |  				break;
 79 |  			if (err == 1) {
 80 | @@ -2341,6 +2340,27 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
 81 |  	return len;
 82 |  }
 83 |  
 84 | +static int call_fill_fiemap(struct inode *inode, struct extent_status *es,
 85 | +			    unsigned int flags, void *private)
 86 | +{
 87 | +	unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
 88 | +
 89 | +	return fiemap_fill_next_extent(private,
 90 | +				       (__u64)es->es_lblk << blksize_bits,
 91 | +				       (__u64)es->es_pblk << blksize_bits,
 92 | +				       (__u64)es->es_len << blksize_bits,
 93 | +				       flags);
 94 | +}
 95 | +
 96 | +static int ext4_fill_fiemap_extents(struct inode *inode,
 97 | +				    ext4_lblk_t block, ext4_lblk_t num,
 98 | +				    struct fiemap_extent_info *fieinfo)
 99 | +{
100 | +	return ext4_extent_iterator(inode, block, num,
101 | +				    call_fill_fiemap, fieinfo);
102 | +}
103 | +
104 | +
105 |  /*
106 |   * ext4_ext_put_gap_in_cache:
107 |   * calculate boundaries of the gap that the requested block fits into
108 | diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
109 | index 6c6be1d..fa8bac0 100644
110 | --- a/fs/ext4/ioctl.c
111 | +++ b/fs/ext4/ioctl.c
112 | @@ -468,6 +468,132 @@ static int write_user_mdata(unsigned long arg,
113 |  }
114 |  #endif
115 |  
116 | +static int discard_callback(struct inode *inode, struct extent_status *es,
117 | +			    unsigned int flags, void *private)
118 | +{
119 | +	struct ext4_map_blocks *map = private;
120 | +	ext4_lblk_t es_lblk = es->es_lblk;
121 | +	ext4_lblk_t es_len = es->es_len;
122 | +	ext4_fsblk_t es_pblk = es->es_pblk;
123 | +
124 | +	if (flags & (FIEMAP_EXTENT_UNKNOWN |
125 | +		     FIEMAP_EXTENT_ENCODED |
126 | +		     FIEMAP_EXTENT_DATA_ENCRYPTED |
127 | +		     FIEMAP_EXTENT_DELALLOC |
128 | +		     FIEMAP_EXTENT_DATA_TAIL |
129 | +		     FIEMAP_EXTENT_DATA_INLINE |
130 | +		     FIEMAP_EXTENT_NOT_ALIGNED |
131 | +		     FIEMAP_EXTENT_SHARED))
132 | +		return 0;
133 | +
134 | +	if (es_lblk < map->m_lblk) {
135 | +		ext4_lblk_t d = map->m_lblk - es_lblk;
136 | +		if (d > es_len)
137 | +			return 0;
138 | +		es_lblk += d;
139 | +		es_pblk += d;
140 | +		es_len -= d;
141 | +	}
142 | +
143 | +	if (es_lblk + es_len > map->m_lblk + map->m_len)
144 | +		es_len -= es_lblk + es_len - (map->m_lblk + map->m_len);
145 | +#ifdef BLKDISCARD_DEBUG
146 | +	ext4_msg(inode->i_sb, KERN_NOTICE, "discard: %llu len %lu",
147 | +		 (unsigned long long) es_pblk, (unsigned long) es_len);
148 | +	return 0;
149 | +#else
150 | +	return sb_issue_discard(inode->i_sb, es_pblk, es_len, GFP_KERNEL, 0);
151 | +#endif
152 | +}
153 | +
154 | +static int blkdiscard_inode(struct inode *inode, u64 start_offset, u64 len)
155 | +{
156 | +	struct super_block *sb = inode->i_sb;
157 | +	struct ext4_map_blocks map;
158 | +	unsigned int num;
159 | +
160 | +	if (!S_ISREG(inode->i_mode))
161 | +		return -EINVAL;
162 | +
163 | +       if (!blk_queue_discard(bdev_get_queue(sb->s_bdev)))
164 | +	       return -EOPNOTSUPP;
165 | +
166 | +       if (!bdev_discard_zeroes_data(sb->s_bdev) && !capable(CAP_SYS_ADMIN))
167 | +	       return -EOPNOTSUPP;
168 | +
169 | +	num = start_offset & (sb->s_blocksize - 1);
170 | +	if (num) {
171 | +		num = sb->s_blocksize - num;
172 | +		start_offset += num;
173 | +		len = (len > num) ? len - num : 0;
174 | +	}
175 | +	if (len == 0)
176 | +		return 0;
177 | +	if (start_offset > sb->s_maxbytes)
178 | +		return -EFBIG;
179 | +	if (len > sb->s_maxbytes || (sb->s_maxbytes - len) < start_offset)
180 | +		len = sb->s_maxbytes - start_offset;
181 | +
182 | +	map.m_lblk = start_offset >> sb->s_blocksize_bits;
183 | +	map.m_len = len >> sb->s_blocksize_bits;
184 | +
185 | +#ifdef BLKDISCARD_DEBUG
186 | +	ext4_msg(sb, KERN_NOTICE, "blkdiscard range: %lu len %lu",
187 | +		 (unsigned long) map.m_lblk, (unsigned long) map.m_len);
188 | +#endif
189 | +
190 | +	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
191 | +		return ext4_extent_iterator(inode, map.m_lblk, map.m_len,
192 | +					    discard_callback, &map);
193 | +
194 | +	num = map.m_len;
195 | +	while (num) {
196 | +		int ret = ext4_map_blocks(NULL, inode, &map, 0);
197 | +
198 | +		if (ret < 0)
199 | +			return ret;
200 | +
201 | +		if (ret == 0) {
202 | +#ifdef BLKDISCARD_DEBUG
203 | +			ext4_msg(sb, KERN_NOTICE,
204 | +				 "skip: lblk %lu len %lu ret %lu num %lu",
205 | +				 (unsigned long) map.m_lblk,
206 | +				 (unsigned long) map.m_len,
207 | +				 (unsigned long) ret,
208 | +				 (unsigned long) num);
209 | +#endif
210 | +			map.m_lblk++;
211 | +			num--;
212 | +			continue;
213 | +		}
214 | +#ifdef BLKDISCARD_DEBUG
215 | +		ext4_msg(sb, KERN_NOTICE,
216 | +			 "walk: lblk %lu pblk %llu len %lu ret %lu num %lu",
217 | +			 (unsigned long) map.m_lblk,
218 | +			 (unsigned long long) map.m_pblk,
219 | +			 (unsigned long) map.m_len,
220 | +			 (unsigned long) ret,
221 | +			 (unsigned long) num);
222 | +#endif
223 | +		if (ret > num)
224 | +			ret = num;
225 | +		map.m_lblk += ret;
226 | +		num -= ret;
227 | +		map.m_len = num;
228 | +
229 | +#ifdef BLKDISCARD_DEBUG
230 | +		ext4_msg(sb, KERN_NOTICE, "discard: %llu len %lu",
231 | +			 (unsigned long long) map.m_pblk, (unsigned long) ret);
232 | +#else
233 | +		ret = sb_issue_discard(sb, map.m_pblk, ret,
234 | +				       GFP_KERNEL, 0);
235 | +		if (ret)
236 | +			return ret;
237 | +#endif
238 | +	}
239 | +	return 0;
240 | +}
241 | +
242 |  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
243 |  {
244 |  	struct inode *inode = file_inode(filp);
245 | @@ -1006,6 +1132,17 @@ encryption_policy_out:
246 |  		return -EOPNOTSUPP;
247 |  #endif
248 |  	}
249 | +	case BLKDISCARD: {
250 | +		uint64_t range[2];
251 | +
252 | +		if (!(filp->f_mode & FMODE_WRITE))
253 | +			return -EBADF;
254 | +
255 | +		if (copy_from_user(range, (void __user *)arg, sizeof(range)))
256 | +			return -EFAULT;
257 | +
258 | +		return blkdiscard_inode(file_inode(filp), range[0], range[1]);
259 | +	}
260 |  	default:
261 |  		return -ENOTTY;
262 |  	}
263 | @@ -1075,6 +1212,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
264 |  	case EXT4_IOC_GET_ENCRYPTION_METADATA:
265 |  	case EXT4_IOC_SET_ENCRYPTION_METADATA:
266 |  	case EXT4_IOC_GET_ENCRYPTED_FILENAME:
267 | +	case BLKDISCARD:
268 |  		break;
269 |  	default:
270 |  		return -ENOIOCTLCMD;
271 | 


--------------------------------------------------------------------------------
/old-patches/add-encryption-debug-files:
--------------------------------------------------------------------------------
 1 | ext4: add debugging counters for crypto allocations
 2 | 
 3 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
 4 | ---
 5 |  fs/ext4/counter_debug_list.h |  3 +++
 6 |  fs/ext4/ext4.h               |  6 ++++++
 7 |  fs/ext4/page-io.c            |  2 ++
 8 |  fs/ext4/sysfs.c              | 19 +++++++++++++++++++
 9 |  4 files changed, 30 insertions(+)
10 | 
11 | diff --git a/fs/ext4/counter_debug_list.h b/fs/ext4/counter_debug_list.h
12 | new file mode 100644
13 | index 0000000..a0eb6d2
14 | --- /dev/null
15 | +++ b/fs/ext4/counter_debug_list.h
16 | @@ -0,0 +1,3 @@
17 | +EXT4_COUNTER_DEBUG(pageio_bio_submit)
18 | +EXT4_COUNTER_DEBUG(pageio_bio_finish)
19 | +
20 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
21 | index 766b7f7..e4990ac 100644
22 | --- a/fs/ext4/ext4.h
23 | +++ b/fs/ext4/ext4.h
24 | @@ -59,6 +59,12 @@
25 |  #define ext4_debug(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
26 |  #endif
27 |  
28 | +#define EXT4_DEBUG_COUNTER(x) atomic_inc(&ext4_##x)
29 | +
30 | +#define EXT4_COUNTER_DEBUG(x) extern atomic_t ext4_##x;
31 | +#include "counter_debug_list.h"
32 | +#undef EXT4_COUNTER_DEBUG
33 | +
34 |  /*
35 |   * Turn on EXT_DEBUG to get lots of info about extents operations.
36 |   */
37 | diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
38 | index 17fbe38..29b5d61 100644
39 | --- a/fs/ext4/page-io.c
40 | +++ b/fs/ext4/page-io.c
41 | @@ -63,6 +63,7 @@ static void ext4_finish_bio(struct bio *bio)
42 |  	int i;
43 |  	struct bio_vec *bvec;
44 |  
45 | +	EXT4_DEBUG_COUNTER(pageio_bio_finish);
46 |  	bio_for_each_segment_all(bvec, bio, i) {
47 |  		struct page *page = bvec->bv_page;
48 |  #ifdef CONFIG_EXT4_FS_ENCRYPTION
49 | @@ -358,6 +359,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
50 |  			    WRITE_SYNC : WRITE;
51 |  		bio_get(io->io_bio);
52 |  		submit_bio(io_op, io->io_bio);
53 | +		EXT4_DEBUG_COUNTER(pageio_bio_submit);
54 |  		bio_put(io->io_bio);
55 |  	}
56 |  	io->io_bio = NULL;
57 | diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
58 | index 62bef0f..12aa1bd 100644
59 | --- a/fs/ext4/sysfs.c
60 | +++ b/fs/ext4/sysfs.c
61 | @@ -233,6 +233,24 @@ static struct attribute *ext4_feat_attrs[] = {
62 |  	NULL,
63 |  };
64 |  
65 | +#define EXT4_ATTR_DEBUG_COUNTER(_name) \
66 | +	EXT4_ATTR_PTR(_name, 0444, pointer_atomic, &ext4_##_name)
67 | +
68 | +#define EXT4_COUNTER_DEBUG(x) atomic_t ext4_##x;
69 | +#include "counter_debug_list.h"
70 | +#undef EXT4_COUNTER_DEBUG
71 | +
72 | +#define EXT4_COUNTER_DEBUG(x) EXT4_ATTR_DEBUG_COUNTER(x);
73 | +#include "counter_debug_list.h"
74 | +#undef EXT4_COUNTER_DEBUG
75 | +
76 | +#define EXT4_COUNTER_DEBUG(x) ATTR_LIST(x),
77 | +static struct attribute *ext4_global_attrs[] = {
78 | +#include "counter_debug_list.h"
79 | +	NULL,
80 | +};
81 | +#undef EXT4_COUNTER_DEBUG
82 | +
83 |  static void *calc_ptr(struct ext4_attr *a, struct ext4_sb_info *sbi)
84 |  {
85 |  	switch (a->attr_ptr) {
86 | @@ -334,6 +352,7 @@ static struct kobj_type ext4_sb_ktype = {
87 |  };
88 |  
89 |  static struct kobj_type ext4_ktype = {
90 | +	.default_attrs	= ext4_global_attrs,
91 |  	.sysfs_ops	= &ext4_attr_ops,
92 |  };
93 |  
94 | 


--------------------------------------------------------------------------------
/old-patches/add-fallocate-mode-blocking-for-debugging:
--------------------------------------------------------------------------------
 1 | ext4: add fallocate mode blocking for debugging purposes
 2 | 
 3 | If a particular fallocate mode is causing test failures, give the
 4 | tester the ability to block a particular fallocate mode so that the
 5 | use of a particular fallocate mode will be reported as not supported.
 6 | 
 7 | For example, if the COLLAPSE_RANGE fallocate mode is causing test
 8 | failures, this allows us to suppress it so we can more easily test the
 9 | rest of the file system code.
10 | 
11 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
12 | ---
13 |  fs/ext4/extents.c | 18 ++++++++++++++++++
14 |  1 file changed, 18 insertions(+)
15 | 
16 | diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
17 | index 64b4003..f477832 100644
18 | --- a/fs/ext4/extents.c
19 | +++ b/fs/ext4/extents.c
20 | @@ -29,6 +29,7 @@
21 |   *   - smart tree reduction
22 |   */
23 |  
24 | +#include <linux/module.h>
25 |  #include <linux/fs.h>
26 |  #include <linux/time.h>
27 |  #include <linux/jbd2.h>
28 | @@ -4862,6 +4863,14 @@ out_mutex:
29 |  	return ret;
30 |  }
31 |  
32 | +#ifdef CONFIG_EXT4_DEBUG
33 | +int ext4_fallocate_mode_block __read_mostly;
34 | +
35 | +module_param_named(fallocate_mode_block, ext4_fallocate_mode_block, int, 0644);
36 | +MODULE_PARM_DESC(fallocate_mode_block,
37 | +		 "Fallocate modes which are blocked for debugging purposes");
38 | +#endif
39 | +
40 |  /*
41 |   * preallocate space for a file. This implements ext4's fallocate file
42 |   * operation, which gets called from sys_fallocate system call.
43 | @@ -4881,6 +4890,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
44 |  	struct timespec tv;
45 |  	unsigned int blkbits = inode->i_blkbits;
46 |  
47 | +#ifdef CONFIG_EXT4_DEBUG
48 | +	/*
49 | +	 * For debugging purposes, allow certain fallocate operations
50 | +	 * to be disabled
51 | +	 */
52 | +	if (unlikely(mode & ext4_fallocate_mode_block))
53 | +		return -EOPNOTSUPP;
54 | +#endif
55 | +
56 |  	/* Return error if mode is not supported */
57 |  	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
58 |  		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
59 | 


--------------------------------------------------------------------------------
/old-patches/add-squelch-errors-support:
--------------------------------------------------------------------------------
 1 | ext4: add option for squelching ext4 errors to prevent dmesg from filling up
 2 | 
 3 | Only print one error per inode; this is enough to know that something
 4 | is wrong with an inode, without filling dmesg by spamming the system
 5 | with messages over and over again.
 6 | 
 7 | This is enabled via sysfs option, which is currently off by default.
 8 | Some environments may want to turn this on by default.  Eventually we
 9 | may want to make this be something which is tunable by a superblock
10 | flag, perhaps.
11 | 
12 | Addresses-Google-Bug: #2507977
13 | 
14 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
15 | ---
16 |  fs/ext4/ext4.h  | 2 ++
17 |  fs/ext4/super.c | 2 ++
18 |  2 files changed, 4 insertions(+)
19 | 
20 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
21 | index fca1efb..fafa312 100644
22 | --- a/fs/ext4/ext4.h
23 | +++ b/fs/ext4/ext4.h
24 | @@ -1203,6 +1203,7 @@ struct ext4_super_block {
25 |  #define EXT4_MF_MNTDIR_SAMPLED		0x0001
26 |  #define EXT4_MF_FS_ABORTED		0x0002	/* Fatal error detected */
27 |  #define EXT4_MF_TEST_DUMMY_ENCRYPTION	0x0004
28 | +#define EXT4_MF_FS_SQUELCH		0x0008	/* Squelch file system errors */
29 |  
30 |  #ifdef CONFIG_EXT4_FS_ENCRYPTION
31 |  #define DUMMY_ENCRYPTION_ENABLED(sbi) (unlikely((sbi)->s_mount_flags & \
32 | @@ -1450,6 +1451,7 @@ enum {
33 |  	EXT4_STATE_MAY_INLINE_DATA,	/* may have in-inode data */
34 |  	EXT4_STATE_ORDERED_MODE,	/* data=ordered mode */
35 |  	EXT4_STATE_EXT_PRECACHED,	/* extents have been precached */
36 | +	EXT4_STATE_ERR_SQUELCHED,	/* squeched error */
37 |  };
38 |  
39 |  #define EXT4_INODE_BIT_FNS(name, field, offset)				\
40 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c
41 | index f106700..e57ce82 100644
42 | --- a/fs/ext4/super.c
43 | +++ b/fs/ext4/super.c
44 | @@ -2750,6 +2750,7 @@ EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
45 |  EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
46 |  EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time);
47 |  EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time);
48 | +EXT4_RW_ATTR_SBI_BOOL(squelch_errors, s_mount_flags, EXT4_MF_FS_SQUELCH);
49 |  
50 |  static struct attribute *ext4_attrs[] = {
51 |  	ATTR_LIST(delayed_allocation_blocks),
52 | @@ -2776,6 +2777,7 @@ static struct attribute *ext4_attrs[] = {
53 |  	ATTR_LIST(errors_count),
54 |  	ATTR_LIST(first_error_time),
55 |  	ATTR_LIST(last_error_time),
56 | +	ATTR_LIST(squelch_errors),
57 |  	NULL,
58 |  };
59 |  
60 | 


--------------------------------------------------------------------------------
/old-patches/add-sysfs-bool-support:
--------------------------------------------------------------------------------
 1 | ext4: add support for adding boolean toggles to ext4's sysfs directory
 2 | 
 3 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
 4 | ---
 5 |  fs/ext4/super.c | 38 ++++++++++++++++++++++++++++++++++----
 6 |  1 file changed, 34 insertions(+), 4 deletions(-)
 7 | 
 8 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c
 9 | index bf1b8a8..2d294b7 100644
10 | --- a/fs/ext4/super.c
11 | +++ b/fs/ext4/super.c
12 | @@ -2535,6 +2535,7 @@ struct ext4_attr {
13 |  		int offset;
14 |  		int deprecated_val;
15 |  	} u;
16 | +	unsigned int mask;
17 |  };
18 |  
19 |  static int parse_strtoull(const char *buf,
20 | @@ -2677,7 +2678,33 @@ static ssize_t sbi_deprecated_show(struct ext4_attr *a,
21 |  	return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val);
22 |  }
23 |  
24 | -#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \
25 | +static ssize_t sbi_bool_show(struct ext4_attr *a,
26 | +			     struct ext4_sb_info *sbi, char *buf)
27 | +{
28 | +	unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
29 | +
30 | +	return snprintf(buf, PAGE_SIZE, "%d\n",
31 | +			((*ui & a->mask) == 0) ? 0 : 1);
32 | +}
33 | +
34 | +static ssize_t sbi_bool_store(struct ext4_attr *a,
35 | +			      struct ext4_sb_info *sbi,
36 | +			      const char *buf, size_t count)
37 | +{
38 | +	unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);
39 | +	unsigned long long t;
40 | +
41 | +	if (parse_strtoull(buf, 0xffffffff, &t))
42 | +		return -EINVAL;
43 | +	if (t)
44 | +		*ui |= a->mask;
45 | +	else
46 | +		*ui &= ~a->mask;
47 | +
48 | +	return count;
49 | +}
50 | +
51 | +#define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname,_mask)\
52 |  static struct ext4_attr ext4_attr_##_name = {			\
53 |  	.attr = {.name = __stringify(_name), .mode = _mode },	\
54 |  	.show	= _show,					\
55 | @@ -2685,6 +2712,7 @@ static struct ext4_attr ext4_attr_##_name = {			\
56 |  	.u = {							\
57 |  		.offset = offsetof(struct ext4_sb_info, _elname),\
58 |  	},							\
59 | +	.mask   = (_mask),                                      	\
60 |  }
61 |  
62 |  #define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname)		\
63 | @@ -2707,8 +2735,10 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
64 |  #define EXT4_RO_ATTR_ES_UI(name, elname)	\
65 |  	EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname)
66 |  #define EXT4_RW_ATTR_SBI_UI(name, elname)	\
67 | -	EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
68 | -
69 | +	EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname, 0)
70 | +#define EXT4_RW_ATTR_SBI_BOOL(name, elname, mask)			\
71 | +	EXT4_ATTR_OFFSET(name, 0644, sbi_bool_show, sbi_bool_store,	\
72 | +			 elname, mask)
73 |  #define ATTR_LIST(name) &ext4_attr_##name.attr
74 |  #define EXT4_DEPRECATED_ATTR(_name, _val)	\
75 |  static struct ext4_attr ext4_attr_##_name = {			\
76 | @@ -2724,7 +2754,7 @@ EXT4_RO_ATTR(session_write_kbytes);
77 |  EXT4_RO_ATTR(lifetime_write_kbytes);
78 |  EXT4_RW_ATTR(reserved_clusters);
79 |  EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,
80 | -		 inode_readahead_blks_store, s_inode_readahead_blks);
81 | +		 inode_readahead_blks_store, s_inode_readahead_blks, 0);
82 |  EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
83 |  EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
84 |  EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
85 | 


--------------------------------------------------------------------------------
/old-patches/akpm-jbd2-locking-fix:
--------------------------------------------------------------------------------
  1 | ext4: akpm's locking hack to fix locking delays
  2 | 
  3 | This is a port of the following patch from Andrew Morton to ext4:
  4 | 
  5 | 	http://lkml.org/lkml/2008/10/3/22
  6 | 
  7 | This fixes a major contention problem in do_get_write_access() when a
  8 | buffer is modified in both the current and committing transaction.
  9 | 
 10 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
 11 | Cc: akpm@linux-foundation.org
 12 | ---
 13 |  fs/ext4/ext4.h        |  3 +++
 14 |  fs/ext4/super.c       | 11 +++++++++++
 15 |  fs/jbd2/transaction.c | 12 ++++++++++--
 16 |  include/linux/jbd2.h  |  1 +
 17 |  4 files changed, 25 insertions(+), 2 deletions(-)
 18 | 
 19 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
 20 | index b7dbaf1..c5b26f7 100644
 21 | --- a/fs/ext4/ext4.h
 22 | +++ b/fs/ext4/ext4.h
 23 | @@ -1006,6 +1006,9 @@ struct ext4_inode_info {
 24 |  #define EXT4_MOUNT2_HURD_COMPAT		0x00000004 /* Support HURD-castrated
 25 |  						      file systems */
 26 |  
 27 | +#define EXT4_MOUNT2_AKPM_LOCK_HACK	0x80000000 /* akpm lock hack */
 28 | +
 29 | +
 30 |  #define clear_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt &= \
 31 |  						~EXT4_MOUNT_##opt
 32 |  #define set_opt(sb, opt)		EXT4_SB(sb)->s_mount_opt |= \
 33 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c
 34 | index 7b3a41c..ccab545 100644
 35 | --- a/fs/ext4/super.c
 36 | +++ b/fs/ext4/super.c
 37 | @@ -1135,6 +1135,7 @@ enum {
 38 |  	Opt_dioread_nolock, Opt_dioread_lock,
 39 |  	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
 40 |  	Opt_max_dir_size_kb, Opt_nojournal_checksum,
 41 | +	Opt_akpm_lock_hack,
 42 |  };
 43 |  
 44 |  static const match_table_t tokens = {
 45 | @@ -1193,6 +1194,7 @@ static const match_table_t tokens = {
 46 |  	{Opt_i_version, "i_version"},
 47 |  	{Opt_dax, "dax"},
 48 |  	{Opt_stripe, "stripe=%u"},
 49 | +	{Opt_akpm_lock_hack, "akpm_lock_hack"},
 50 |  	{Opt_delalloc, "delalloc"},
 51 |  	{Opt_lazytime, "lazytime"},
 52 |  	{Opt_nolazytime, "nolazytime"},
 53 | @@ -1460,6 +1462,9 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
 54 |  	case Opt_nolazytime:
 55 |  		sb->s_flags &= ~MS_LAZYTIME;
 56 |  		return 1;
 57 | +	case Opt_akpm_lock_hack:
 58 | +		set_opt2(sb, AKPM_LOCK_HACK);
 59 | +		return 1;
 60 |  	}
 61 |  
 62 |  	for (m = ext4_mount_opts; m->token != Opt_err; m++)
 63 | @@ -1813,6 +1818,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
 64 |  		SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
 65 |  	if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
 66 |  		SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
 67 | +	if (test_opt2(sb, AKPM_LOCK_HACK))
 68 | +		seq_puts(seq, ",akpm_lock_hack");
 69 |  	if (sb->s_flags & MS_I_VERSION)
 70 |  		SEQ_OPTS_PUTS("i_version");
 71 |  	if (nodefs || sbi->s_stripe)
 72 | @@ -4442,6 +4449,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
 73 |  		journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
 74 |  	else
 75 |  		journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
 76 | +	if (test_opt2(sb, AKPM_LOCK_HACK))
 77 | +		journal->j_flags |= JBD2_LOCK_HACK;
 78 | +	else
 79 | +		journal->j_flags &= ~JBD2_LOCK_HACK;
 80 |  	write_unlock(&journal->j_state_lock);
 81 |  }
 82 |  
 83 | diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
 84 | index 93ffee2..4ba2b76 100644
 85 | --- a/fs/jbd2/transaction.c
 86 | +++ b/fs/jbd2/transaction.c
 87 | @@ -784,6 +784,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
 88 |  	char *frozen_buffer = NULL;
 89 |  	int need_copy = 0;
 90 |  	unsigned long start_lock, time_lock;
 91 | +	int locked = 0;
 92 |  
 93 |  	WARN_ON(!transaction);
 94 |  	if (is_handle_aborted(handle))
 95 | @@ -799,7 +800,13 @@ repeat:
 96 |  	/* @@@ Need to check for errors here at some point. */
 97 |  
 98 |   	start_lock = jiffies;
 99 | -	lock_buffer(bh);
100 | +	if (journal->j_flags & JBD2_LOCK_HACK) {
101 | +		if (trylock_buffer(bh))
102 | +			locked = 1;	/* lolz */
103 | +	} else {
104 | +		lock_buffer(bh);
105 | +		locked = 1;
106 | +	}
107 |  	jbd_lock_bh_state(bh);
108 |  
109 |  	/* If it takes too long to lock the buffer, trace it */
110 | @@ -846,7 +853,8 @@ repeat:
111 |  		set_buffer_jbddirty(bh);
112 |  	}
113 |  
114 | -	unlock_buffer(bh);
115 | +	if (locked)
116 | +		unlock_buffer(bh);
117 |  
118 |  	error = -EROFS;
119 |  	if (is_handle_aborted(handle)) {
120 | diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
121 | index 20e7f78..0f17d76 100644
122 | --- a/include/linux/jbd2.h
123 | +++ b/include/linux/jbd2.h
124 | @@ -1007,6 +1007,7 @@ struct journal_s
125 |  #define JBD2_ABORT_ON_SYNCDATA_ERR	0x040	/* Abort the journal on file
126 |  						 * data write error in ordered
127 |  						 * mode */
128 | +#define JBD2_LOCK_HACK	0x080	/* akpm's locking hack */
129 |  
130 |  /*
131 |   * Function declarations for the journaling transaction and buffer
132 | 


--------------------------------------------------------------------------------
/old-patches/block-dio-during-truncate:
--------------------------------------------------------------------------------
 1 | ext4: block direct I/O writes during ext4_truncate
 2 | 
 3 | Just as in ext4_punch_hole() it is important that we block DIO writes
 4 | while the truncate is proceeding, since during the overwriting DIO
 5 | write, we drop i_mutex, which means a truncate could start while the
 6 | Direct I/O operation is still in progress.
 7 | 
 8 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
 9 | Cc: stable@vger.kernel.org
10 | ---
11 |  fs/ext4/inode.c | 10 ++++++++--
12 |  1 file changed, 8 insertions(+), 2 deletions(-)
13 | 
14 | diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
15 | index 98b9bff..3c5edf2 100644
16 | --- a/fs/ext4/inode.c
17 | +++ b/fs/ext4/inode.c
18 | @@ -3659,12 +3659,16 @@ void ext4_truncate(struct inode *inode)
19 |  	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
20 |  		ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
21 |  
22 | +	/* Wait all existing dio workers, newcomers will block on i_mutex */
23 | +	ext4_inode_block_unlocked_dio(inode);
24 | +	inode_dio_wait(inode);
25 | +
26 |  	if (ext4_has_inline_data(inode)) {
27 |  		int has_inline = 1;
28 |  
29 |  		ext4_inline_data_truncate(inode, &has_inline);
30 |  		if (has_inline)
31 | -			return;
32 | +			goto out_resume;
33 |  	}
34 |  
35 |  	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
36 | @@ -3675,7 +3679,7 @@ void ext4_truncate(struct inode *inode)
37 |  	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
38 |  	if (IS_ERR(handle)) {
39 |  		ext4_std_error(inode->i_sb, PTR_ERR(handle));
40 | -		return;
41 | +		goto out_resume;
42 |  	}
43 |  
44 |  	if (inode->i_size & (inode->i_sb->s_blocksize - 1))
45 | @@ -3722,6 +3726,8 @@ out_stop:
46 |  	ext4_mark_inode_dirty(handle, inode);
47 |  	ext4_journal_stop(handle);
48 |  
49 | +out_resume:
50 | +	ext4_inode_resume_unlocked_dio(inode);
51 |  	trace_ext4_truncate_exit(inode);
52 |  }
53 |  
54 | 


--------------------------------------------------------------------------------
/old-patches/commit-as-soon-as-possible-after-log_start_commit:
--------------------------------------------------------------------------------
 1 | jbd2: commit as soon as possible after log_start_commit
 2 | 
 3 | Once a transaction has been requested to be committed, don't let any
 4 | other handles start under that transaction, and don't allow any
 5 | pending transactions to be extended (i.e., in the case of
 6 | unlink/ftruncate).
 7 | 
 8 | The idea is once the transaction has had log_start_commit() called on
 9 | it, at least one thread is blocked waiting for that transaction to
10 | commit, and over time, more and more threads will end up getting
11 | blocked.  In order to avoid high variability in file system operations
12 | getting blocked behind the a blocked start_this_handle(), we should
13 | try to get the commit started as soon as possible.
14 | 
15 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
16 | ---
17 |  fs/jbd2/transaction.c | 4 ++--
18 |  1 file changed, 2 insertions(+), 2 deletions(-)
19 | 
20 | diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
21 | index 7aa9a32..e1c0b4a 100644
22 | --- a/fs/jbd2/transaction.c
23 | +++ b/fs/jbd2/transaction.c
24 | @@ -186,7 +186,7 @@ static int add_transaction_credits(journal_t *journal, int blocks,
25 |  	 * If the current transaction is locked down for commit, wait
26 |  	 * for the lock to be released.
27 |  	 */
28 | -	if (t->t_state == T_LOCKED) {
29 | +	if (t->t_state == T_LOCKED || t->t_requested) {
30 |  		wait_transaction_locked(journal);
31 |  		return 1;
32 |  	}
33 | @@ -559,7 +559,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
34 |  	read_lock(&journal->j_state_lock);
35 |  
36 |  	/* Don't extend a locked-down transaction! */
37 | -	if (transaction->t_state != T_RUNNING) {
38 | +	if (transaction->t_state != T_RUNNING || transaction->t_requested) {
39 |  		jbd_debug(3, "denied handle %p %d blocks: "
40 |  			  "transaction not running\n", handle, nblocks);
41 |  		goto error_out;
42 | 


--------------------------------------------------------------------------------
/old-patches/crypto-add-ciphertext_access-mount-option:
--------------------------------------------------------------------------------
  1 | ext4 crypto: add ciphertext_access mount option
  2 | 
  3 | Add a mount option which allows root to be able to access the
  4 | ciphertext of a file by reading it using O_DIRECT.
  5 | 
  6 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
  7 | ---
  8 |  fs/ext4/ext4.h     |  1 +
  9 |  fs/ext4/file.c     |  5 ++++-
 10 |  fs/ext4/indirect.c |  5 ++---
 11 |  fs/ext4/inode.c    | 17 ++++++++++-------
 12 |  fs/ext4/super.c    |  5 +++++
 13 |  5 files changed, 22 insertions(+), 11 deletions(-)
 14 | 
 15 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
 16 | index 1e20fa9..44278aa7 100644
 17 | --- a/fs/ext4/ext4.h
 18 | +++ b/fs/ext4/ext4.h
 19 | @@ -1052,6 +1052,7 @@ struct ext4_inode_info {
 20 |  #define EXT4_MOUNT_DIOREAD_NOLOCK	0x400000 /* Enable support for dio read nolocking */
 21 |  #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
 22 |  #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
 23 | +#define EXT4_MOUNT_CIPHERTEXT_ACCESS	0x2000000 /* Direct I/O to ciphertext */
 24 |  #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
 25 |  #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
 26 |  #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
 27 | diff --git a/fs/ext4/file.c b/fs/ext4/file.c
 28 | index 749b222..60683ab 100644
 29 | --- a/fs/ext4/file.c
 30 | +++ b/fs/ext4/file.c
 31 | @@ -388,7 +388,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 32 |  		ret = ext4_get_encryption_info(inode);
 33 |  		if (ret)
 34 |  			return -EACCES;
 35 | -		if (ext4_encryption_info(inode) == NULL)
 36 | +		if ((ext4_encryption_info(inode) == NULL) &&
 37 | +		    !(test_opt(inode->i_sb, CIPHERTEXT_ACCESS) &&
 38 | +		      ((filp->f_flags & O_ACCMODE) == O_RDONLY) &&
 39 | +		      capable(CAP_SYS_ADMIN)))
 40 |  			return -ENOKEY;
 41 |  	}
 42 |  	/*
 43 | diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
 44 | index 355ef9c..bd9d89e 100644
 45 | --- a/fs/ext4/indirect.c
 46 | +++ b/fs/ext4/indirect.c
 47 | @@ -655,11 +655,10 @@ ssize_t ext4_ind_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 48 |  	int orphan = 0;
 49 |  	size_t count = iov_iter_count(iter);
 50 |  	int retries = 0;
 51 | +	loff_t final_size = offset + count;
 52 |  
 53 |  	if (iov_iter_rw(iter) == WRITE) {
 54 | -		loff_t final_size = offset + count;
 55 | -
 56 | -		if (final_size > inode->i_size) {
 57 | +		if (final_size > i_size_read(inode)) {
 58 |  			/* Credits for sb + inode write */
 59 |  			handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
 60 |  			if (IS_ERR(handle)) {
 61 | diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
 62 | index ff2f3cd..16f6537 100644
 63 | --- a/fs/ext4/inode.c
 64 | +++ b/fs/ext4/inode.c
 65 | @@ -3279,9 +3279,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 66 |  		get_block_func = ext4_get_block_write;
 67 |  		dio_flags = DIO_LOCKING;
 68 |  	}
 69 | -#ifdef CONFIG_EXT4_FS_ENCRYPTION
 70 | -	BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
 71 | -#endif
 72 |  	if (IS_DAX(inode))
 73 |  		ret = dax_do_io(iocb, inode, iter, offset, get_block_func,
 74 |  				ext4_end_io_dio, dio_flags);
 75 | @@ -3344,10 +3341,16 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 76 |  	size_t count = iov_iter_count(iter);
 77 |  	ssize_t ret;
 78 |  
 79 | -#ifdef CONFIG_EXT4_FS_ENCRYPTION
 80 | -	if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
 81 | -		return 0;
 82 | -#endif
 83 | +	if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
 84 | +		if (iov_iter_rw(iter) == WRITE)
 85 | +			return 0;
 86 | +		if (test_opt(inode->i_sb, CIPHERTEXT_ACCESS) &&
 87 | +		    capable(CAP_SYS_ADMIN)) {
 88 | +			if (iov_iter_rw(iter) == WRITE)
 89 | +				return -EPERM;
 90 | +		} else
 91 | +			return 0;
 92 | +	}
 93 |  
 94 |  	/*
 95 |  	 * If we are doing data journalling we don't support O_DIRECT
 96 | diff --git a/fs/ext4/super.c b/fs/ext4/super.c
 97 | index 486e869..de875b4 100644
 98 | --- a/fs/ext4/super.c
 99 | +++ b/fs/ext4/super.c
100 | @@ -1182,6 +1182,7 @@ enum {
101 |  	Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
102 |  	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
103 |  	Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
104 | +	Opt_ciphertext_access, Opt_nociphertext_access,
105 |  	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
106 |  	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
107 |  	Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
108 | @@ -1273,6 +1274,8 @@ static const match_table_t tokens = {
109 |  	{Opt_noinit_itable, "noinit_itable"},
110 |  	{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
111 |  	{Opt_test_dummy_encryption, "test_dummy_encryption"},
112 | +	{Opt_ciphertext_access, "ciphertext_access"},
113 | +	{Opt_nociphertext_access, "nociphertext_access"},
114 |  	{Opt_removed, "check=none"},	/* mount option from ext2/3 */
115 |  	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */
116 |  	{Opt_removed, "reservation"},	/* mount option from ext2/3 */
117 | @@ -1475,6 +1478,8 @@ static const struct mount_opts {
118 |  	{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
119 |  	{Opt_max_dir_size_kb, 0, MOPT_GTE0},
120 |  	{Opt_test_dummy_encryption, 0, MOPT_GTE0},
121 | +	{Opt_ciphertext_access, EXT4_MOUNT_CIPHERTEXT_ACCESS, MOPT_SET},
122 | +	{Opt_nociphertext_access, EXT4_MOUNT_CIPHERTEXT_ACCESS, MOPT_CLEAR},
123 |  	{Opt_err, 0, 0}
124 |  };
125 |  
126 | 


--------------------------------------------------------------------------------
/old-patches/crypto-add-ioctls-to-backup-crypto-metadata:
--------------------------------------------------------------------------------
  1 | ext4 crypto: add ioctls to allow backup of encryption metadata
  2 | 
  3 | Add new ioctls which allow for the metadata of encrypted files (both
  4 | the filename and the crypto policy) to be backed up and restored.
  5 | 
  6 | [ Included fix from Dan Carpenter for a missing mutex_unlock. ]
  7 | 
  8 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
  9 | Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
 10 | ---
 11 |  fs/ext4/crypto_key.c    | 127 +++++++++++++++++++++++++++++++++-
 12 |  fs/ext4/crypto_policy.c |  10 +++
 13 |  fs/ext4/ext4.h          |  25 ++++++-
 14 |  fs/ext4/ext4_crypto.h   |  14 ++++
 15 |  fs/ext4/ialloc.c        |   5 +-
 16 |  fs/ext4/ioctl.c         | 113 +++++++++++++++++++++++++++++++
 17 |  fs/ext4/namei.c         | 307 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 18 |  7 files changed, 571 insertions(+), 30 deletions(-)
 19 | 
 20 | diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
 21 | index 0129d68..865e41d 100644
 22 | --- a/fs/ext4/crypto_key.c
 23 | +++ b/fs/ext4/crypto_key.c
 24 | @@ -11,11 +11,12 @@
 25 |  #include <crypto/skcipher.h>
 26 |  #include <keys/encrypted-type.h>
 27 |  #include <keys/user-type.h>
 28 | +#include <linux/crc16.h>
 29 |  #include <linux/random.h>
 30 |  #include <linux/scatterlist.h>
 31 |  #include <uapi/linux/keyctl.h>
 32 |  
 33 | -#include "ext4.h"
 34 | +#include "ext4_jbd2.h"
 35 |  #include "xattr.h"
 36 |  
 37 |  static void derive_crypt_complete(struct crypto_async_request *req, int rc)
 38 | @@ -272,3 +273,127 @@ int ext4_has_encryption_key(struct inode *inode)
 39 |  
 40 |  	return (ei->i_crypt_info != NULL);
 41 |  }
 42 | +
 43 | +int ext4_get_encryption_metadata(struct inode *inode,
 44 | +				 struct ext4_rw_enc_mdata *mdata)
 45 | +{
 46 | +	unsigned char *cp = mdata->buf;
 47 | +	size_t size = mdata->u.len;
 48 | +	loff_t isize;
 49 | +	int res;
 50 | +
 51 | +	if (size < sizeof(struct ext4_encryption_context) + 12)
 52 | +		return -EINVAL;
 53 | +
 54 | +	if (!inode_owner_or_capable(inode) && !capable(CAP_SYS_ADMIN))
 55 | +		return -EACCES;
 56 | +
 57 | +	*cp++ = 'e';
 58 | +	*cp++ = '5';
 59 | +	*cp++ = 0;
 60 | +	*cp++ = 0;
 61 | +	isize = i_size_read(inode);
 62 | +	*((u32 *)cp) = cpu_to_le32(isize & 0xFFFFFFFF);
 63 | +	cp += 4;
 64 | +	*((u32 *)cp) = cpu_to_le32(isize >> 32);
 65 | +	cp += 4;
 66 | +	size -= 12;
 67 | +
 68 | +	res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
 69 | +			     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
 70 | +			     cp, size);
 71 | +
 72 | +	if (res < 0)
 73 | +		return res;
 74 | +	if (res > size)
 75 | +		return -ENOSPC;
 76 | +
 77 | +	mdata->u.len = res + 12;
 78 | +
 79 | +	*((u16 *) &mdata->buf[2]) = cpu_to_le16(crc16(~0, mdata->buf, mdata->u.len));
 80 | +	return 0;
 81 | +}
 82 | +
 83 | +int ext4_set_encryption_metadata(struct inode *inode,
 84 | +				 struct ext4_rw_enc_mdata *mdata)
 85 | +{
 86 | +	struct ext4_encryption_context *ctx;
 87 | +	unsigned char *cp = mdata->buf;
 88 | +	handle_t *handle = NULL;
 89 | +	loff_t size;
 90 | +	unsigned bs = inode->i_sb->s_blocksize;
 91 | +	int res;
 92 | +	u16 crc;
 93 | +
 94 | +	if (!inode_owner_or_capable(inode) && !capable(CAP_SYS_ADMIN))
 95 | +		return -EACCES;
 96 | +
 97 | +	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
 98 | +		return -EINVAL;
 99 | +
100 | +	if (mdata->u.len != sizeof(struct ext4_encryption_context) + 12)
101 | +		return -EINVAL;
102 | +
103 | +	if (cp[0] != 'e' || cp[1] != '5')
104 | +		return -EINVAL;
105 | +	crc = le16_to_cpu(*(u16 *)(cp+2));
106 | +	cp[2] = cp[3] = 0;
107 | +	cp += 4;
108 | +
109 | +	if (crc != crc16(~0, mdata->buf, mdata->u.len))
110 | +		return -EINVAL;
111 | +
112 | +	size = le32_to_cpu(*(u32 *) cp);
113 | +	cp += 4;
114 | +	size += ((u64) le32_to_cpu(*(u32 *) cp)) << 32;
115 | +	cp += 4;
116 | +
117 | +	ctx = (struct ext4_encryption_context *) cp;
118 | +	res = ext4_validate_encryption_context(ctx);
119 | +	if (res)
120 | +		return res;
121 | +
122 | +	res = ext4_convert_inline_data(inode);
123 | +	if (res)
124 | +		return res;
125 | +
126 | +	res = filemap_write_and_wait(&inode->i_data);
127 | +	if (res)
128 | +		return res;
129 | +
130 | +	mutex_lock(&inode->i_mutex);
131 | +	if (S_ISREG(inode->i_mode) &&
132 | +	    round_up(size, bs) != round_up(i_size_read(inode), bs)) {
133 | +		res = -EINVAL;
134 | +		goto errout;
135 | +	}
136 | +
137 | +	handle = ext4_journal_start(inode, EXT4_HT_MISC,
138 | +				    ext4_jbd2_credits_xattr(inode));
139 | +	if (IS_ERR(handle)) {
140 | +		res = PTR_ERR(handle);
141 | +		goto errout;
142 | +	}
143 | +	res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
144 | +			     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx,
145 | +			     sizeof(struct ext4_encryption_context), 0);
146 | +	if (res < 0)
147 | +		goto errout;
148 | +	ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
149 | +	ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
150 | +
151 | +	if (S_ISREG(inode->i_mode)) {
152 | +		i_size_write(inode, size);
153 | +		EXT4_I(inode)->i_disksize = size;
154 | +	}
155 | +	res = ext4_mark_inode_dirty(handle, inode);
156 | +	if (res)
157 | +		EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
158 | +	else
159 | +		res = ext4_get_encryption_info(inode);
160 | +errout:
161 | +	mutex_unlock(&inode->i_mutex);
162 | +	if (handle)
163 | +		ext4_journal_stop(handle);
164 | +	return res;
165 | +}
166 | diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
167 | index ad05069..08565f5 100644
168 | --- a/fs/ext4/crypto_policy.c
169 | +++ b/fs/ext4/crypto_policy.c
170 | @@ -180,6 +180,16 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent,
171 |  		(parent_ci->ci_flags == child_ci->ci_flags));
172 |  }
173 |  
174 | +int ext4_validate_encryption_context(struct ext4_encryption_context *ctx)
175 | +{
176 | +	if ((ctx->format != EXT4_ENCRYPTION_CONTEXT_FORMAT_V1) ||
177 | +	    !ext4_valid_contents_enc_mode(ctx->contents_encryption_mode) ||
178 | +	    !ext4_valid_filenames_enc_mode(ctx->filenames_encryption_mode) ||
179 | +	    (ctx->flags & ~EXT4_POLICY_FLAGS_VALID))
180 | +		return -EINVAL;
181 | +	return 0;
182 | +}
183 | +
184 |  /**
185 |   * ext4_inherit_context() - Sets a child context from its parent
186 |   * @parent: Parent inode from which the context is inherited.
187 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
188 | index 9a92f8b..bb2c31d 100644
189 | --- a/fs/ext4/ext4.h
190 | +++ b/fs/ext4/ext4.h
191 | @@ -637,6 +637,10 @@ enum {
192 |  #define EXT4_IOC_SET_ENCRYPTION_POLICY	_IOR('f', 19, struct ext4_encryption_policy)
193 |  #define EXT4_IOC_GET_ENCRYPTION_PWSALT	_IOW('f', 20, __u8[16])
194 |  #define EXT4_IOC_GET_ENCRYPTION_POLICY	_IOW('f', 21, struct ext4_encryption_policy)
195 | +#define EXT4_IOC_GET_ENCRYPTION_METADATA _IOWR('f', 22, struct ext4_encrypted_metadata)
196 | +#define EXT4_IOC_SET_ENCRYPTION_METADATA _IOR('f', 23, struct ext4_encrypted_metadata)
197 | +#define EXT4_IOC_GET_ENCRYPTED_FILENAME	_IOWR('f', 24, struct ext4_encrypted_metadata)
198 | +#define EXT4_IOC_SET_ENCRYPTED_FILENAME	_IOR('f', 25, struct ext4_encrypted_metadata)
199 |  
200 |  #ifndef FS_IOC_FSGETXATTR
201 |  /* Until the uapi changes get merged for project quota... */
202 | @@ -2294,6 +2298,7 @@ ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
203 |  /* crypto_policy.c */
204 |  int ext4_is_child_context_consistent_with_parent(struct inode *parent,
205 |  						 struct inode *child);
206 | +int ext4_validate_encryption_context(struct ext4_encryption_context *ctx);
207 |  int ext4_inherit_context(struct inode *parent, struct inode *child);
208 |  void ext4_to_hex(char *dst, char *src, size_t src_size);
209 |  int ext4_process_policy(const struct ext4_encryption_policy *policy,
210 | @@ -2380,6 +2385,10 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
211 |  void ext4_free_crypt_info(struct ext4_crypt_info *ci);
212 |  void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci);
213 |  int _ext4_get_encryption_info(struct inode *inode);
214 | +int ext4_set_encryption_metadata(struct inode *inode,
215 | +				 struct ext4_rw_enc_mdata *mdata);
216 | +int ext4_get_encryption_metadata(struct inode *inode,
217 | +				 struct ext4_rw_enc_mdata *mdata);
218 |  
219 |  #ifdef CONFIG_EXT4_FS_ENCRYPTION
220 |  int ext4_has_encryption_key(struct inode *inode);
221 | @@ -2469,18 +2478,24 @@ extern int ext4fs_dirhash(const char *name, int len, struct
222 |  			  dx_hash_info *hinfo);
223 |  
224 |  /* ialloc.c */
225 | +#define EXT4_NEW_INODE_NOENCRYPT	0x0001
226 |  extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
227 |  				      const struct qstr *qstr, __u32 goal,
228 |  				      uid_t *owner, int handle_type,
229 | -				      unsigned int line_no, int nblocks);
230 | +				      unsigned int line_no, int nblocks,
231 | +				      int flags);
232 |  
233 |  #define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \
234 |  	__ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \
235 | -			 0, 0, 0)
236 | +			 0, 0, 0, 0)
237 |  #define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \
238 |  				    type, nblocks)		    \
239 |  	__ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
240 | -			 (type), __LINE__, (nblocks))
241 | +			 (type), __LINE__, (nblocks), 0)
242 | +#define ext4_new_inode_start_handle_flags(dir, mode, qstr, goal, owner, \
243 | +					  type, nblocks, flags)		\
244 | +	__ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
245 | +			 (type), __LINE__, (nblocks), (flags))
246 |  
247 |  
248 |  extern void ext4_free_inode(handle_t *, struct inode *);
249 | @@ -2621,6 +2636,10 @@ extern int ext4_generic_delete_entry(handle_t *handle,
250 |  				     int buf_size,
251 |  				     int csum_size);
252 |  extern int ext4_empty_dir(struct inode *inode);
253 | +extern int ext4_get_encrypted_filename(struct file *filp,
254 | +				       struct ext4_rw_enc_mdata *mdata);
255 | +extern int ext4_set_encrypted_filename(struct inode *dir,
256 | +				       struct ext4_rw_enc_mdata *efn);
257 |  
258 |  /* resize.c */
259 |  extern int ext4_group_add(struct super_block *sb,
260 | diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
261 | index 1f73c29..600da7e 100644
262 | --- a/fs/ext4/ext4_crypto.h
263 | +++ b/fs/ext4/ext4_crypto.h
264 | @@ -156,4 +156,18 @@ static inline u32 encrypted_symlink_data_len(u32 l)
265 |  	return (l + sizeof(struct ext4_encrypted_symlink_data) - 1);
266 |  }
267 |  
268 | +/**
269 | + * Structure used for communicating encrypted metadata with userspace
270 | + */
271 | +struct ext4_encrypted_metadata {
272 | +	s32 fd;			/* Only used by EXT4_IOC_SET_ENCRYPTED_FILENAME */
273 | +	u32 len;
274 | +	unsigned char __user *data;
275 | +};
276 | +
277 | +/* In-kernel structure */
278 | +struct ext4_rw_enc_mdata {
279 | +	struct ext4_encrypted_metadata u;
280 | +	unsigned char *buf;
281 | +};
282 |  #endif	/* _EXT4_CRYPTO_H */
283 | diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
284 | index 237b877..e96e4ae 100644
285 | --- a/fs/ext4/ialloc.c
286 | +++ b/fs/ext4/ialloc.c
287 | @@ -742,7 +742,7 @@ out:
288 |  struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
289 |  			       umode_t mode, const struct qstr *qstr,
290 |  			       __u32 goal, uid_t *owner, int handle_type,
291 | -			       unsigned int line_no, int nblocks)
292 | +			       unsigned int line_no, int nblocks, int flags)
293 |  {
294 |  	struct super_block *sb;
295 |  	struct buffer_head *inode_bitmap_bh = NULL;
296 | @@ -764,7 +764,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
297 |  	if (!dir || !dir->i_nlink)
298 |  		return ERR_PTR(-EPERM);
299 |  
300 | -	if ((ext4_encrypted_inode(dir) ||
301 | +	if (!(flags & EXT4_NEW_INODE_NOENCRYPT) &&
302 | +	    (ext4_encrypted_inode(dir) ||
303 |  	     DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
304 |  	    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
305 |  		err = ext4_get_encryption_info(dir);
306 | diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
307 | index eae5917..65d38e9 100644
308 | --- a/fs/ext4/ioctl.c
309 | +++ b/fs/ext4/ioctl.c
310 | @@ -435,6 +435,39 @@ static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)
311 |  	return iflags;
312 |  }
313 |  
314 | +#ifdef CONFIG_EXT4_FS_ENCRYPTION
315 | +static int read_user_mdata(unsigned long arg,
316 | +			   struct ext4_rw_enc_mdata *mdata)
317 | +{
318 | +	if (copy_from_user(&mdata->u,
319 | +			   (struct ext4_encrypted_metadata __user *)arg,
320 | +			   sizeof(struct ext4_encrypted_metadata)))
321 | +		return -EFAULT;
322 | +	/* Sanity check, as nothing should need to be this big */
323 | +	if (mdata->u.len > PAGE_SIZE)
324 | +		return -EINVAL;
325 | +	mdata->buf = kmalloc(mdata->u.len, GFP_KERNEL);
326 | +	if (!mdata->buf)
327 | +		return -ENOMEM;
328 | +	if (copy_from_user(mdata->buf, mdata->u.data, mdata->u.len))
329 | +		return -EFAULT;
330 | +	return 0;
331 | +
332 | +}
333 | +
334 | +static int write_user_mdata(unsigned long arg,
335 | +			  struct ext4_rw_enc_mdata *mdata)
336 | +{
337 | +	if (copy_to_user(mdata->u.data, mdata->buf, mdata->u.len))
338 | +		return -EFAULT;
339 | +	if (copy_to_user((struct ext4_encrypted_metadata __user *)arg,
340 | +			   &mdata->u,
341 | +			   sizeof(struct ext4_encrypted_metadata)))
342 | +		return -EFAULT;
343 | +	return 0;
344 | +}
345 | +#endif
346 | +
347 |  long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
348 |  {
349 |  	struct inode *inode = file_inode(filp);
350 | @@ -896,6 +929,83 @@ encryption_policy_out:
351 |  
352 |  		return 0;
353 |  	}
354 | +	case EXT4_IOC_GET_ENCRYPTION_METADATA: {
355 | +#ifdef CONFIG_EXT4_FS_ENCRYPTION
356 | +		struct ext4_rw_enc_mdata mdata;
357 | +		int err = 0;
358 | +
359 | +		if (!ext4_encrypted_inode(inode))
360 | +			return -ENOENT;
361 | +
362 | +		err = read_user_mdata(arg, &mdata);
363 | +		if (err)
364 | +			return err;
365 | +		err = ext4_get_encryption_metadata(inode, &mdata);
366 | +		if (!err)
367 | +			err = write_user_mdata(arg, &mdata);
368 | +		kfree(mdata.buf);
369 | +		return err;
370 | +#else
371 | +		return -EOPNOTSUPP;
372 | +#endif
373 | +	}
374 | +	case EXT4_IOC_SET_ENCRYPTION_METADATA: {
375 | +#ifdef CONFIG_EXT4_FS_ENCRYPTION
376 | +		struct ext4_rw_enc_mdata mdata;
377 | +		int err = 0;
378 | +
379 | +		if (ext4_encrypted_inode(inode))
380 | +			return -EINVAL;
381 | +		err = read_user_mdata(arg, &mdata);
382 | +		if (err)
383 | +			return err;
384 | +		err = mnt_want_write_file(filp);
385 | +		if (!err)
386 | +			err = ext4_set_encryption_metadata(inode, &mdata);
387 | +		mnt_drop_write_file(filp);
388 | +		kfree(mdata.buf);
389 | +		return err;
390 | +#else
391 | +		return -EOPNOTSUPP;
392 | +#endif
393 | +	}
394 | +	case EXT4_IOC_GET_ENCRYPTED_FILENAME: {
395 | +#ifdef CONFIG_EXT4_FS_ENCRYPTION
396 | +		struct ext4_rw_enc_mdata mdata;
397 | +		int err = 0;
398 | +
399 | +		if (!ext4_encrypted_inode(inode))
400 | +			return -ENOENT;
401 | +		err = read_user_mdata(arg, &mdata);
402 | +		if (err)
403 | +			return err;
404 | +		err = ext4_get_encrypted_filename(filp, &mdata);
405 | +		if (!err)
406 | +			err = write_user_mdata(arg, &mdata);
407 | +		kfree(mdata.buf);
408 | +		return err;
409 | +#else
410 | +		return -EOPNOTSUPP;
411 | +#endif
412 | +	}
413 | +	case EXT4_IOC_SET_ENCRYPTED_FILENAME: {
414 | +#ifdef CONFIG_EXT4_FS_ENCRYPTION
415 | +		struct ext4_rw_enc_mdata mdata;
416 | +		int err = 0;
417 | +
418 | +		err = read_user_mdata(arg, &mdata);
419 | +		if (err)
420 | +			return err;
421 | +		err = mnt_want_write_file(filp);
422 | +		if (!err)
423 | +			err = ext4_set_encrypted_filename(inode, &mdata);
424 | +		mnt_drop_write_file(filp);
425 | +		kfree(mdata.buf);
426 | +		return err;
427 | +#else
428 | +		return -EOPNOTSUPP;
429 | +#endif
430 | +	}
431 |  	default:
432 |  		return -ENOTTY;
433 |  	}
434 | @@ -962,6 +1072,9 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
435 |  	case EXT4_IOC_SET_ENCRYPTION_POLICY:
436 |  	case EXT4_IOC_GET_ENCRYPTION_PWSALT:
437 |  	case EXT4_IOC_GET_ENCRYPTION_POLICY:
438 | +	case EXT4_IOC_GET_ENCRYPTION_METADATA:
439 | +	case EXT4_IOC_SET_ENCRYPTION_METADATA:
440 | +	case EXT4_IOC_GET_ENCRYPTED_FILENAME:
441 |  		break;
442 |  	default:
443 |  		return -ENOIOCTLCMD;
444 | diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
445 | index c07422d..32edbe3 100644
446 | --- a/fs/ext4/namei.c
447 | +++ b/fs/ext4/namei.c
448 | @@ -33,6 +33,8 @@
449 |  #include <linux/quotaops.h>
450 |  #include <linux/buffer_head.h>
451 |  #include <linux/bio.h>
452 | +#include <linux/crc16.h>
453 | +#include <linux/file.h>
454 |  #include "ext4.h"
455 |  #include "ext4_jbd2.h"
456 |  
457 | @@ -2075,24 +2077,16 @@ out_frames:
458 |  }
459 |  
460 |  /*
461 | - *	ext4_add_entry()
462 | - *
463 | - * adds a file entry to the specified directory, using the same
464 | - * semantics as ext4_find_entry(). It returns NULL if it failed.
465 | - *
466 | - * NOTE!! The inode part of 'de' is left at 0 - which means you
467 | - * may not sleep between calling this and putting something into
468 | - * the entry, as someone else might have used it while you slept.
469 | + * Add a directory entry to a directory, given the filename and the
470 | + * inode it will point to.
471 |   */
472 | -static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
473 | -			  struct inode *inode)
474 | +static int ext4_add_fname(handle_t *handle, struct inode *dir,
475 | +			  struct ext4_filename *fname, struct inode *inode)
476 |  {
477 | -	struct inode *dir = d_inode(dentry->d_parent);
478 |  	struct buffer_head *bh = NULL;
479 |  	struct ext4_dir_entry_2 *de;
480 |  	struct ext4_dir_entry_tail *t;
481 |  	struct super_block *sb;
482 | -	struct ext4_filename fname;
483 |  	int	retval;
484 |  	int	dx_fallback=0;
485 |  	unsigned blocksize;
486 | @@ -2104,15 +2098,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
487 |  
488 |  	sb = dir->i_sb;
489 |  	blocksize = sb->s_blocksize;
490 | -	if (!dentry->d_name.len)
491 | -		return -EINVAL;
492 | -
493 | -	retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
494 | -	if (retval)
495 | -		return retval;
496 |  
497 |  	if (ext4_has_inline_data(dir)) {
498 | -		retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
499 | +		retval = ext4_try_add_inline_entry(handle, fname, dir, inode);
500 |  		if (retval < 0)
501 |  			goto out;
502 |  		if (retval == 1) {
503 | @@ -2122,7 +2110,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
504 |  	}
505 |  
506 |  	if (is_dx(dir)) {
507 | -		retval = ext4_dx_add_entry(handle, &fname, dir, inode);
508 | +		retval = ext4_dx_add_entry(handle, fname, dir, inode);
509 |  		if (!retval || (retval != ERR_BAD_DX_DIR))
510 |  			goto out;
511 |  		ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
512 | @@ -2137,14 +2125,14 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
513 |  			bh = NULL;
514 |  			goto out;
515 |  		}
516 | -		retval = add_dirent_to_buf(handle, &fname, dir, inode,
517 | +		retval = add_dirent_to_buf(handle, fname, dir, inode,
518 |  					   NULL, bh);
519 |  		if (retval != -ENOSPC)
520 |  			goto out;
521 |  
522 |  		if (blocks == 1 && !dx_fallback &&
523 |  		    ext4_has_feature_dir_index(sb)) {
524 | -			retval = make_indexed_dir(handle, &fname, dir,
525 | +			retval = make_indexed_dir(handle, fname, dir,
526 |  						  inode, bh);
527 |  			bh = NULL; /* make_indexed_dir releases bh */
528 |  			goto out;
529 | @@ -2166,9 +2154,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
530 |  		initialize_dirent_tail(t, blocksize);
531 |  	}
532 |  
533 | -	retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
534 | +	retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
535 |  out:
536 | -	ext4_fname_free_filename(&fname);
537 |  	brelse(bh);
538 |  	if (retval == 0)
539 |  		ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
540 | @@ -2176,6 +2163,29 @@ out:
541 |  }
542 |  
543 |  /*
544 | + * Create a directory entry associated with the specified dentry and
545 | + * inode.
546 | + */
547 | +static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
548 | +			  struct inode *inode)
549 | +{
550 | +	struct inode *dir = d_inode(dentry->d_parent);
551 | +	struct ext4_filename fname;
552 | +	int	retval;
553 | +
554 | +	if (!dentry->d_name.len)
555 | +		return -EINVAL;
556 | +
557 | +	retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
558 | +	if (retval)
559 | +		return retval;
560 | +
561 | +	retval = ext4_add_fname(handle, dir, &fname, inode);
562 | +	ext4_fname_free_filename(&fname);
563 | +	return retval;
564 | +}
565 | +
566 | +/*
567 |   * Returns 0 for success, or a negative error value
568 |   */
569 |  static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
570 | @@ -3905,3 +3915,252 @@ const struct inode_operations ext4_special_inode_operations = {
571 |  	.get_acl	= ext4_get_acl,
572 |  	.set_acl	= ext4_set_acl,
573 |  };
574 | +
575 | +int ext4_get_encrypted_filename(struct file *filp,
576 | +				struct ext4_rw_enc_mdata *mdata)
577 | +{
578 | +	unsigned char *cp = mdata->buf;
579 | +	struct dentry *dentry = filp->f_path.dentry;
580 | +	struct inode *inode = file_inode(filp);
581 | +	struct inode *dir = dentry->d_parent->d_inode;
582 | +	struct buffer_head *bh;
583 | +	struct ext4_dir_entry_2 *de;
584 | +	int isdir = S_ISDIR(inode->i_mode);
585 | +	int len = isdir ? 10 : 4;
586 | +	int ret;
587 | +
588 | +	if (!dir || !ext4_encrypted_inode(dir))
589 | +		return -EINVAL;
590 | +
591 | +	if (!inode_owner_or_capable(dir) && !capable(CAP_SYS_ADMIN))
592 | +		return -EACCES;
593 | +
594 | +	if (mdata->u.len < len)
595 | +		return -ENOSPC;
596 | +
597 | +	*cp++ = 'e';
598 | +	*cp++ = isdir ? 'd' : 'f';
599 | +	*cp++ = 0;
600 | +	*cp++ = 0;
601 | +
602 | +	if (isdir) {
603 | +		*((u32 *)cp) = cpu_to_le32(inode->i_mode);
604 | +		cp += 4;
605 | +		ret = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
606 | +				     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
607 | +				     NULL, 0);
608 | +		if (ret < 0)
609 | +			return ret;
610 | +		*((u16 *)cp) = cpu_to_le16((u16) ret);
611 | +		cp += 2;
612 | +
613 | +		len += ret;
614 | +		if (mdata->u.len < len)
615 | +			return -ENOSPC;
616 | +		ret = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
617 | +				     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
618 | +				     cp, ret);
619 | +		if (ret < 0)
620 | +			return ret;
621 | +		cp += ret;
622 | +	}
623 | +
624 | +	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
625 | +	if (IS_ERR(bh))
626 | +		return PTR_ERR(bh);
627 | +	if (de == NULL)
628 | +		return -ENOENT;
629 | +
630 | +	len += de->name_len;
631 | +	if (mdata->u.len < len)
632 | +		return -ENOSPC;
633 | +
634 | +	mdata->u.len = len;
635 | +	memcpy(cp, de->name, de->name_len);
636 | +	*((u16 *) &mdata->buf[2]) = cpu_to_le16(crc16(~0, mdata->buf,
637 | +						      mdata->u.len));
638 | +	return 0;
639 | +}
640 | +
641 | +int ext4_set_encrypted_filename(struct inode *dir,
642 | +				struct ext4_rw_enc_mdata *mdata)
643 | +{
644 | +	struct ext4_encryption_context *ctx = NULL;
645 | +	struct ext4_filename		fname;
646 | +	unsigned char			*cp = mdata->buf;
647 | +	struct inode			*inode = NULL;
648 | +	struct fd			fd;
649 | +	handle_t			*handle = NULL;
650 | +	umode_t				mode;
651 | +	u16				crc, xlen, credits;
652 | +	int				retval = 0, retries = 0, do_retry = 0;
653 | +	int				len = mdata->u.len;
654 | +
655 | +	if (!dir || !ext4_encrypted_inode(dir))
656 | +		return -EINVAL;
657 | +
658 | +	retval = inode_permission(dir, MAY_WRITE | MAY_EXEC);
659 | +	if (retval)
660 | +		return retval;
661 | +
662 | +	if (len < 4)
663 | +		return -EINVAL;
664 | +
665 | +	if (cp[0] != 'e' ||
666 | +	    cp[1] != ((mdata->u.fd == -1) ? 'd' : 'f'))
667 | +		return -EINVAL;
668 | +	crc = le16_to_cpu(*(u16 *)(cp+2));
669 | +	cp[2] = cp[3] = 0;
670 | +	cp += 4; len -= 4;
671 | +
672 | +	if (crc != crc16(~0, mdata->buf, mdata->u.len))
673 | +		return -EINVAL;
674 | +
675 | +	if ((len < EXT4_CRYPTO_BLOCK_SIZE) || (len > EXT4_NAME_LEN + 1))
676 | +		return -EINVAL;
677 | +
678 | +	retval = dquot_initialize(dir);
679 | +	if (retval)
680 | +		return retval;
681 | +
682 | +	credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
683 | +		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
684 | +
685 | +	if (mdata->u.fd >= 0) {
686 | +		fd = fdget(mdata->u.fd);
687 | +		if (!fd.file)
688 | +			return -EBADF;
689 | +		inode = file_inode(fd.file);
690 | +		mode = inode->i_mode;
691 | +		retval = -EISDIR;
692 | +		if (S_ISDIR(mode))
693 | +			goto out;
694 | +	} else if (mdata->u.fd == -1) {
695 | +		/* do an encrypted mkdir */
696 | +		fd.file = NULL;
697 | +		if (EXT4_DIR_LINK_MAX(dir))
698 | +			return -EMLINK;
699 | +		if (len < 6)
700 | +			return -EINVAL;
701 | +		mode = le32_to_cpu(*(u32 *)cp);
702 | +		cp += 4;
703 | +		xlen = le16_to_cpu(*(u16 *)cp);
704 | +		cp += 2; len -= 6;
705 | +
706 | +		if (len < xlen ||
707 | +		    xlen != sizeof(struct ext4_encryption_context))
708 | +			return -EINVAL;
709 | +
710 | +		ctx = (struct ext4_encryption_context *) cp;
711 | +		retval = ext4_validate_encryption_context(ctx);
712 | +		if (retval)
713 | +			return retval;
714 | +		cp += xlen; len -= xlen;
715 | +
716 | +		/* credits for the mkdir and xattr set */
717 | +		credits += (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
718 | +			    EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
719 | +			    ext4_jbd2_credits_xattr(dir));
720 | +	retry:
721 | +		inode = ext4_new_inode_start_handle_flags(dir, mode, NULL, 0,
722 | +					NULL, EXT4_HT_DIR, credits,
723 | +					EXT4_NEW_INODE_NOENCRYPT);
724 | +		handle = ext4_journal_current_handle();
725 | +		if (IS_ERR(inode)) {
726 | +			retval = PTR_ERR(inode);
727 | +			inode = NULL;
728 | +			goto out;
729 | +		}
730 | +		inode->i_op = &ext4_dir_inode_operations;
731 | +		inode->i_fop = &ext4_dir_operations;
732 | +		retval = ext4_init_new_dir(handle, dir, inode);
733 | +		if (retval)
734 | +			goto out;
735 | +
736 | +		retval = ext4_xattr_set_handle(handle, inode,
737 | +				EXT4_XATTR_INDEX_ENCRYPTION,
738 | +				EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx,
739 | +				sizeof(struct ext4_encryption_context),
740 | +				fd.file ? XATTR_REPLACE : XATTR_CREATE);
741 | +		if (retval)
742 | +			goto out;
743 | +		ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
744 | +		ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
745 | +
746 | +		goto insert_fname;
747 | +	} else
748 | +		return -EINVAL;
749 | +
750 | +
751 | +	if ((mode & S_ISUID) ||
752 | +	    ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) {
753 | +		/*
754 | +		 * root or the inode owner can link even in the case
755 | +		 * of "unsafe" hard link sources.  See
756 | +		 * safe_hardlink_sources() in fs/namei.c
757 | +		 */
758 | +		if (!inode_owner_or_capable(inode) && !capable(CAP_SYS_ADMIN)) {
759 | +			retval = -EACCES;
760 | +			goto out;
761 | +		}
762 | +	}
763 | +
764 | +	retval = inode_permission(inode, MAY_READ | MAY_WRITE);
765 | +	if (!retval && !inode_owner_or_capable(inode) &&
766 | +	    !capable(CAP_SYS_ADMIN))
767 | +		goto out;
768 | +
769 | +	handle = ext4_journal_start(dir, EXT4_HT_DIR,
770 | +		(EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
771 | +		 EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 2);
772 | +	if (IS_ERR(handle)) {
773 | +		retval = PTR_ERR(handle);
774 | +		goto out;
775 | +	}
776 | +
777 | +insert_fname:
778 | +	if (!ext4_is_child_context_consistent_with_parent(dir, inode)) {
779 | +		retval = -EPERM;
780 | +		goto out;
781 | +	}
782 | +
783 | +	memset(&fname, 0, sizeof(fname));
784 | +	fname.disk_name.name = cp;
785 | +	fname.disk_name.len = len;
786 | +	retval = ext4_add_fname(handle, dir, &fname, inode);
787 | +	if (retval)
788 | +		goto out;
789 | +
790 | +	if (fd.file)
791 | +		ext4_inc_count(handle, inode);
792 | +	ext4_mark_inode_dirty(handle, inode);
793 | +	if (!fd.file)
794 | +		ext4_inc_count(handle, dir);
795 | +	ext4_update_dx_flag(dir);
796 | +	ext4_mark_inode_dirty(handle, dir);
797 | +	if (fd.file == NULL) {
798 | +		unlock_new_inode(inode);
799 | +		iput(inode);
800 | +	}
801 | +
802 | +out:
803 | +	if (fd.file)
804 | +		fdput(fd);
805 | +	else if (retval && inode && (mdata->u.fd == -1)) {
806 | +		/* need to undo a failed attempted mkdir */
807 | +		clear_nlink(inode);
808 | +		unlock_new_inode(inode);
809 | +		ext4_mark_inode_dirty(handle, inode);
810 | +		iput(inode);
811 | +		if (retval == -ENOSPC &&
812 | +		    ext4_should_retry_alloc(dir->i_sb, &retries))
813 | +			do_retry++;
814 | +	}
815 | +	if (handle)
816 | +		ext4_journal_stop(handle);
817 | +	if (do_retry) {
818 | +		do_retry = 0;
819 | +		goto retry;
820 | +	}
821 | +	return retval;
822 | +}
823 | 


--------------------------------------------------------------------------------
/old-patches/crypto-rename-ext4_get_encryption_info:
--------------------------------------------------------------------------------
  1 | ---
  2 |  fs/ext4/crypto.c        | 15 +++++++++++----
  3 |  fs/ext4/crypto_fname.c  | 64 +++++++++++++++++++++++++++++++++++++++++-----------------------
  4 |  fs/ext4/crypto_key.c    |  2 +-
  5 |  fs/ext4/crypto_policy.c | 43 ++++++++++++++++++++++---------------------
  6 |  fs/ext4/dir.c           |  8 ++++++--
  7 |  fs/ext4/ext4.h          | 37 ++++++++++++++++++++++---------------
  8 |  fs/ext4/file.c          | 12 +++++-------
  9 |  fs/ext4/ialloc.c        |  6 +++---
 10 |  fs/ext4/namei.c         | 17 +++--------------
 11 |  fs/ext4/symlink.c       |  4 ----
 12 |  10 files changed, 114 insertions(+), 94 deletions(-)
 13 | 
 14 | diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
 15 | index c802120..e38908d 100644
 16 | --- a/fs/ext4/crypto.c
 17 | +++ b/fs/ext4/crypto.c
 18 | @@ -98,9 +98,8 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
 19 |  	struct ext4_crypto_ctx *ctx = NULL;
 20 |  	int res = 0;
 21 |  	unsigned long flags;
 22 | -	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
 23 |  
 24 | -	if (ci == NULL)
 25 | +	if (EXT4_I(inode)->i_crypt_info == NULL)
 26 |  		return ERR_PTR(-ENOKEY);
 27 |  
 28 |  	/*
 29 | @@ -264,10 +263,18 @@ static int ext4_page_crypto(struct inode *inode,
 30 |  	struct ablkcipher_request *req = NULL;
 31 |  	DECLARE_EXT4_COMPLETION_RESULT(ecr);
 32 |  	struct scatterlist dst, src;
 33 | -	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
 34 | -	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
 35 | +	struct ext4_crypt_info *ci;
 36 | +	struct crypto_ablkcipher *tfm;
 37 |  	int res = 0;
 38 |  
 39 | +	rcu_read_lock();
 40 | +	ci = EXT4_I(inode)->i_crypt_info;
 41 | +	if (ci == NULL) {
 42 | +		rcu_read_unlock();
 43 | +		return -ENOKEY;
 44 | +	}
 45 | +	tfm = ci->ci_ctfm;
 46 | +
 47 |  	req = ablkcipher_request_alloc(tfm, GFP_NOFS);
 48 |  	if (!req) {
 49 |  		printk_ratelimited(KERN_ERR
 50 | diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
 51 | index 2fbef8a..db2d134 100644
 52 | --- a/fs/ext4/crypto_fname.c
 53 | +++ b/fs/ext4/crypto_fname.c
 54 | @@ -61,13 +61,13 @@ static unsigned max_name_len(struct inode *inode)
 55 |   * allocate sufficient memory to oname string.
 56 |   */
 57 |  static int ext4_fname_encrypt(struct inode *inode,
 58 | +			      struct ext4_crypt_info *ci,
 59 |  			      const struct qstr *iname,
 60 |  			      struct ext4_str *oname)
 61 |  {
 62 |  	u32 ciphertext_len;
 63 |  	struct ablkcipher_request *req = NULL;
 64 |  	DECLARE_EXT4_COMPLETION_RESULT(ecr);
 65 | -	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
 66 |  	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
 67 |  	int res = 0;
 68 |  	char iv[EXT4_CRYPTO_BLOCK_SIZE];
 69 | @@ -141,6 +141,7 @@ static int ext4_fname_encrypt(struct inode *inode,
 70 |   *	We trust the caller to allocate sufficient memory to oname string.
 71 |   */
 72 |  static int ext4_fname_decrypt(struct inode *inode,
 73 | +			      struct ext4_crypt_info *ci,
 74 |  			      const struct ext4_str *iname,
 75 |  			      struct ext4_str *oname)
 76 |  {
 77 | @@ -148,7 +149,6 @@ static int ext4_fname_decrypt(struct inode *inode,
 78 |  	struct ablkcipher_request *req = NULL;
 79 |  	DECLARE_EXT4_COMPLETION_RESULT(ecr);
 80 |  	struct scatterlist src_sg, dst_sg;
 81 | -	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
 82 |  	struct crypto_ablkcipher *tfm = ci->ci_ctfm;
 83 |  	int res = 0;
 84 |  	char iv[EXT4_CRYPTO_BLOCK_SIZE];
 85 | @@ -261,11 +261,13 @@ u32 ext4_fname_crypto_round_up(u32 size, u32 blksize)
 86 |  
 87 |  unsigned ext4_fname_encrypted_size(struct inode *inode, u32 ilen)
 88 |  {
 89 | -	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
 90 | +	struct ext4_crypt_info *ci;
 91 |  	int padding = 32;
 92 |  
 93 | -	if (ci)
 94 | +	ci = ext4_get_crypt_info_rcu(inode);
 95 | +	if (!IS_ERR(ci) && ci)
 96 |  		padding = 4 << (ci->ci_flags & EXT4_POLICY_FLAGS_PAD_MASK);
 97 | +	rcu_read_unlock();
 98 |  	if (ilen < EXT4_CRYPTO_BLOCK_SIZE)
 99 |  		ilen = EXT4_CRYPTO_BLOCK_SIZE;
100 |  	return ext4_fname_crypto_round_up(ilen, padding);
101 | @@ -316,6 +318,7 @@ int _ext4_fname_disk_to_usr(struct inode *inode,
102 |  {
103 |  	char buf[24];
104 |  	int ret;
105 | +	struct ext4_crypt_info *ci;
106 |  
107 |  	if (iname->len < 3) {
108 |  		/*Check for . and .. */
109 | @@ -330,8 +333,15 @@ int _ext4_fname_disk_to_usr(struct inode *inode,
110 |  		EXT4_ERROR_INODE(inode, "encrypted inode too small");
111 |  		return -EUCLEAN;
112 |  	}
113 | -	if (EXT4_I(inode)->i_crypt_info)
114 | -		return ext4_fname_decrypt(inode, iname, oname);
115 | +	ci = ext4_get_crypt_info_rcu(inode);
116 | +	if (IS_ERR(ci))
117 | +		return PTR_ERR(ci);
118 | +	if (ci) {
119 | +		int ret = ext4_fname_decrypt(inode, ci, iname, oname);
120 | +
121 | +		rcu_read_unlock();
122 | +		return ret;
123 | +	}
124 |  
125 |  	if (iname->len <= EXT4_FNAME_CRYPTO_DIGEST_SIZE) {
126 |  		ret = digest_encode(iname->name, iname->len, oname->name);
127 | @@ -369,8 +379,7 @@ int ext4_fname_usr_to_disk(struct inode *inode,
128 |  			   const struct qstr *iname,
129 |  			   struct ext4_str *oname)
130 |  {
131 | -	int res;
132 | -	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
133 | +	struct ext4_crypt_info *ci;
134 |  
135 |  	if (iname->len < 3) {
136 |  		/*Check for . and .. */
137 | @@ -382,8 +391,13 @@ int ext4_fname_usr_to_disk(struct inode *inode,
138 |  			return oname->len;
139 |  		}
140 |  	}
141 | +	ci = ext4_get_crypt_info_rcu(inode);
142 | +	if (IS_ERR(ci))
143 | +		return PTR_ERR(ci);
144 |  	if (ci) {
145 | -		res = ext4_fname_encrypt(inode, iname, oname);
146 | +		int res = ext4_fname_encrypt(inode, ci, iname, oname);
147 | +
148 | +		rcu_read_unlock();
149 |  		return res;
150 |  	}
151 |  	/* Without a proper key, a user is not allowed to modify the filenames
152 | @@ -409,24 +423,29 @@ int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
153 |  		fname->disk_name.len = iname->len;
154 |  		return 0;
155 |  	}
156 | -	ret = ext4_get_encryption_info(dir);
157 | +	ret = ext4_fname_crypto_alloc_buffer(dir, iname->len,
158 | +					     &fname->crypto_buf);
159 |  	if (ret)
160 |  		return ret;
161 | -	ci = EXT4_I(dir)->i_crypt_info;
162 | +
163 | +	ci = ext4_get_crypt_info_rcu(dir);
164 | +	if (IS_ERR(ci)) {
165 | +		ret = PTR_ERR(ci);
166 | +		goto errout;
167 | +	}
168 |  	if (ci) {
169 | -		ret = ext4_fname_crypto_alloc_buffer(dir, iname->len,
170 | -						     &fname->crypto_buf);
171 | -		if (ret < 0)
172 | -			return ret;
173 | -		ret = ext4_fname_encrypt(dir, iname, &fname->crypto_buf);
174 | +		ret = ext4_fname_encrypt(dir, ci, iname, &fname->crypto_buf);
175 | +		rcu_read_unlock();
176 |  		if (ret < 0)
177 |  			goto errout;
178 |  		fname->disk_name.name = fname->crypto_buf.name;
179 |  		fname->disk_name.len = fname->crypto_buf.len;
180 |  		return 0;
181 |  	}
182 | -	if (!lookup)
183 | -		return -EACCES;
184 | +	if (!lookup) {
185 | +		ret = -EACCES;
186 | +		goto errout;
187 | +	}
188 |  
189 |  	/* We don't have the key and we are doing a lookup; decode the
190 |  	 * user-supplied name
191 | @@ -434,12 +453,11 @@ int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
192 |  	if (iname->name[0] == '_')
193 |  		bigname = 1;
194 |  	if ((bigname && (iname->len != 33)) ||
195 | -	    (!bigname && (iname->len > 43)))
196 | -		return -ENOENT;
197 | +	    (!bigname && (iname->len > 43))) {
198 | +		ret = -ENOENT;
199 | +		goto errout;
200 | +	}
201 |  
202 | -	fname->crypto_buf.name = kmalloc(32, GFP_KERNEL);
203 | -	if (fname->crypto_buf.name == NULL)
204 | -		return -ENOMEM;
205 |  	ret = digest_decode(iname->name + bigname, iname->len - bigname,
206 |  			    fname->crypto_buf.name);
207 |  	if (ret < 0) {
208 | diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
209 | index 9a16d1e..d9b4cc0 100644
210 | --- a/fs/ext4/crypto_key.c
211 | +++ b/fs/ext4/crypto_key.c
212 | @@ -111,7 +111,7 @@ void ext4_free_encryption_info(struct inode *inode,
213 |  	ext4_free_crypt_info(ci);
214 |  }
215 |  
216 | -int _ext4_get_encryption_info(struct inode *inode)
217 | +int ext4_setup_encryption_info(struct inode *inode)
218 |  {
219 |  	struct ext4_inode_info *ei = EXT4_I(inode);
220 |  	struct ext4_crypt_info *crypt_info;
221 | diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
222 | index ad05069..2f5743f 100644
223 | --- a/fs/ext4/crypto_policy.c
224 | +++ b/fs/ext4/crypto_policy.c
225 | @@ -159,25 +159,26 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent,
226 |  	/* if the child directory is not encrypted, this is always a problem */
227 |  	if (!ext4_encrypted_inode(child))
228 |  		return 0;
229 | -	res = ext4_get_encryption_info(parent);
230 | -	if (res)
231 | +	parent_ci = ext4_get_crypt_info_rcu(parent);
232 | +	if (IS_ERR(parent_ci))
233 |  		return 0;
234 | -	res = ext4_get_encryption_info(child);
235 | -	if (res)
236 | +	child_ci = ext4_get_crypt_info_rcu(child);
237 | +	if (IS_ERR(child_ci))
238 |  		return 0;
239 | -	parent_ci = EXT4_I(parent)->i_crypt_info;
240 | -	child_ci = EXT4_I(child)->i_crypt_info;
241 |  	if (!parent_ci && !child_ci)
242 | -		return 1;
243 | -	if (!parent_ci || !child_ci)
244 | -		return 0;
245 | -
246 | -	return (memcmp(parent_ci->ci_master_key,
247 | -		       child_ci->ci_master_key,
248 | -		       EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
249 | -		(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
250 | -		(parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
251 | -		(parent_ci->ci_flags == child_ci->ci_flags));
252 | +		res = 1;
253 | +	else if (!parent_ci || !child_ci)
254 | +		res = 0;
255 | +	else
256 | +		res = (memcmp(parent_ci->ci_master_key,
257 | +			      child_ci->ci_master_key,
258 | +			      EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
259 | +		       (parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
260 | +		       (parent_ci->ci_filename_mode ==
261 | +			child_ci->ci_filename_mode) &&
262 | +		       (parent_ci->ci_flags == child_ci->ci_flags));
263 | +	rcu_read_unlock();
264 | +	return res;
265 |  }
266 |  
267 |  /**
268 | @@ -193,10 +194,9 @@ int ext4_inherit_context(struct inode *parent, struct inode *child)
269 |  	struct ext4_crypt_info *ci;
270 |  	int res;
271 |  
272 | -	res = ext4_get_encryption_info(parent);
273 | -	if (res < 0)
274 | -		return res;
275 | -	ci = EXT4_I(parent)->i_crypt_info;
276 | +	ci = ext4_get_crypt_info_rcu(parent);
277 | +	if (IS_ERR(ci))
278 | +		return PTR_ERR(ci);
279 |  	if (ci == NULL)
280 |  		return -ENOKEY;
281 |  
282 | @@ -216,6 +216,7 @@ int ext4_inherit_context(struct inode *parent, struct inode *child)
283 |  		memcpy(ctx.master_key_descriptor, ci->ci_master_key,
284 |  		       EXT4_KEY_DESCRIPTOR_SIZE);
285 |  	}
286 | +	rcu_read_unlock();
287 |  	get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
288 |  	res = ext4_xattr_set(child, EXT4_XATTR_INDEX_ENCRYPTION,
289 |  			     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
290 | @@ -223,7 +224,7 @@ int ext4_inherit_context(struct inode *parent, struct inode *child)
291 |  	if (!res) {
292 |  		ext4_set_inode_flag(child, EXT4_INODE_ENCRYPT);
293 |  		ext4_clear_inode_state(child, EXT4_STATE_MAY_INLINE_DATA);
294 | -		res = ext4_get_encryption_info(child);
295 | +		res = ext4_setup_encryption_info(child);
296 |  	}
297 |  	return res;
298 |  }
299 | diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
300 | index 1d1bca7..f4dba17 100644
301 | --- a/fs/ext4/dir.c
302 | +++ b/fs/ext4/dir.c
303 | @@ -594,8 +594,12 @@ finished:
304 |  
305 |  static int ext4_dir_open(struct inode * inode, struct file * filp)
306 |  {
307 | -	if (ext4_encrypted_inode(inode))
308 | -		return ext4_get_encryption_info(inode) ? -EACCES : 0;
309 | +	if (ext4_encrypted_inode(inode)) {
310 | +		int ret = ext4_setup_encryption_info(inode);
311 | +
312 | +		if (ret && ret != -ENOKEY)
313 | +			return -EACCES;
314 | +	}
315 |  	return 0;
316 |  }
317 |  
318 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
319 | index edbe347..16ca80d 100644
320 | --- a/fs/ext4/ext4.h
321 | +++ b/fs/ext4/ext4.h
322 | @@ -2327,39 +2327,46 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
323 |  /* crypto_key.c */
324 |  void ext4_free_crypt_info(struct ext4_crypt_info *ci);
325 |  void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci);
326 | -int _ext4_get_encryption_info(struct inode *inode);
327 | +int ext4_setup_encryption_info(struct inode *inode);
328 |  
329 |  #ifdef CONFIG_EXT4_FS_ENCRYPTION
330 |  int ext4_has_encryption_key(struct inode *inode);
331 |  
332 | -static inline int ext4_get_encryption_info(struct inode *inode)
333 | +static inline
334 | +struct ext4_crypt_info *ext4_get_crypt_info_rcu(struct inode *inode)
335 |  {
336 | -	struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
337 | +	int ret;
338 | +	struct ext4_crypt_info *ci;
339 |  
340 | +	rcu_read_lock();
341 | +	ci = EXT4_I(inode)->i_crypt_info;
342 |  	if (!ci ||
343 |  	    (ci->ci_keyring_key &&
344 |  	     (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
345 |  					   (1 << KEY_FLAG_REVOKED) |
346 | -					   (1 << KEY_FLAG_DEAD)))))
347 | -		return _ext4_get_encryption_info(inode);
348 | -	return 0;
349 | -}
350 | -
351 | -static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
352 | -{
353 | -	return EXT4_I(inode)->i_crypt_info;
354 | +					   (1 << KEY_FLAG_DEAD))))) {
355 | +		rcu_read_unlock();
356 | +		ret = ext4_setup_encryption_info(inode);
357 | +		if (ret && ret != -ENOKEY) {
358 | +			return ERR_PTR(ret);
359 | +		}
360 | +		rcu_read_lock();
361 | +	}
362 | +	ci = EXT4_I(inode)->i_crypt_info;
363 | +	if (ci == NULL)
364 | +		rcu_read_unlock();
365 | +	return ci;
366 |  }
367 | -
368 |  #else
369 |  static inline int ext4_has_encryption_key(struct inode *inode)
370 |  {
371 |  	return 0;
372 |  }
373 | -static inline int ext4_get_encryption_info(struct inode *inode)
374 | +static inline int ext4_setup_encryption_info(struct inode *inode)
375 |  {
376 | -	return 0;
377 | +	return -ENOKEY;
378 |  }
379 | -static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
380 | +struct ext4_crypt_info *ext4_get_crypt_info_rcu(struct inode *inode)
381 |  {
382 |  	return NULL;
383 |  }
384 | diff --git a/fs/ext4/file.c b/fs/ext4/file.c
385 | index 749b222..c977c7a 100644
386 | --- a/fs/ext4/file.c
387 | +++ b/fs/ext4/file.c
388 | @@ -327,11 +327,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
389 |  	struct inode *inode = file->f_mapping->host;
390 |  
391 |  	if (ext4_encrypted_inode(inode)) {
392 | -		int err = ext4_get_encryption_info(inode);
393 | +		int err = ext4_setup_encryption_info(inode);
394 |  		if (err)
395 | -			return 0;
396 | -		if (ext4_encryption_info(inode) == NULL)
397 | -			return -ENOKEY;
398 | +			return err;
399 |  	}
400 |  	file_accessed(file);
401 |  	if (IS_DAX(file_inode(file))) {
402 | @@ -385,11 +383,11 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
403 |  		}
404 |  	}
405 |  	if (ext4_encrypted_inode(inode)) {
406 | -		ret = ext4_get_encryption_info(inode);
407 | +		ret = ext4_setup_encryption_info(inode);
408 | +		if (ret == -ENOKEY)
409 | +			return ret;
410 |  		if (ret)
411 |  			return -EACCES;
412 | -		if (ext4_encryption_info(inode) == NULL)
413 | -			return -ENOKEY;
414 |  	}
415 |  	/*
416 |  	 * Set up the jbd2_inode if we are opening the inode for
417 | diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
418 | index 3fcfd50..3abf659 100644
419 | --- a/fs/ext4/ialloc.c
420 | +++ b/fs/ext4/ialloc.c
421 | @@ -765,11 +765,11 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
422 |  	if ((ext4_encrypted_inode(dir) ||
423 |  	     DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb))) &&
424 |  	    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
425 | -		err = ext4_get_encryption_info(dir);
426 | +		err = ext4_setup_encryption_info(dir);
427 | +		if (err == -ENOKEY)
428 | +			return ERR_PTR(-EPERM);
429 |  		if (err)
430 |  			return ERR_PTR(err);
431 | -		if (ext4_encryption_info(dir) == NULL)
432 | -			return ERR_PTR(-EPERM);
433 |  		if (!handle)
434 |  			nblocks += EXT4_DATA_TRANS_BLOCKS(dir->i_sb);
435 |  		encrypt = 1;
436 | diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
437 | index 2047ff7..9992a22 100644
438 | --- a/fs/ext4/namei.c
439 | +++ b/fs/ext4/namei.c
440 | @@ -618,12 +618,6 @@ static struct stats dx_show_leaf(struct inode *dir,
441 |  
442 |  				name  = de->name;
443 |  				len = de->name_len;
444 | -				if (ext4_encrypted_inode(inode))
445 | -					res = ext4_get_encryption_info(dir);
446 | -				if (res) {
447 | -					printk(KERN_WARNING "Error setting up"
448 | -					       " fname crypto: %d\n", res);
449 | -				}
450 |  				if (ctx == NULL) {
451 |  					/* Directory is not encrypted */
452 |  					ext4fs_dirhash(de->name,
453 | @@ -967,11 +961,6 @@ static int htree_dirblock_to_tree(struct file *dir_file,
454 |  #ifdef CONFIG_EXT4_FS_ENCRYPTION
455 |  	/* Check if the directory is encrypted */
456 |  	if (ext4_encrypted_inode(dir)) {
457 | -		err = ext4_get_encryption_info(dir);
458 | -		if (err < 0) {
459 | -			brelse(bh);
460 | -			return err;
461 | -		}
462 |  		err = ext4_fname_crypto_alloc_buffer(dir, EXT4_NAME_LEN,
463 |  						     &fname_crypto_str);
464 |  		if (err < 0) {
465 | @@ -3058,11 +3047,11 @@ static int ext4_symlink(struct inode *dir,
466 |  	encryption_required = (ext4_encrypted_inode(dir) ||
467 |  			       DUMMY_ENCRYPTION_ENABLED(EXT4_SB(dir->i_sb)));
468 |  	if (encryption_required) {
469 | -		err = ext4_get_encryption_info(dir);
470 | +		err = ext4_setup_encryption_info(dir);
471 | +		if (err == -ENOKEY)
472 | +			return -EPERM;
473 |  		if (err)
474 |  			return err;
475 | -		if (ext4_encryption_info(dir) == NULL)
476 | -			return -EPERM;
477 |  		disk_link.len = (ext4_fname_encrypted_size(dir, len) +
478 |  				 sizeof(struct ext4_encrypted_symlink_data));
479 |  		sd = kzalloc(disk_link.len, GFP_KERNEL);
480 | diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
481 | index e8e7af6..a9b5777 100644
482 | --- a/fs/ext4/symlink.c
483 | +++ b/fs/ext4/symlink.c
484 | @@ -34,10 +34,6 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook
485 |  	int res;
486 |  	u32 plen, max_size = inode->i_sb->s_blocksize;
487 |  
488 | -	res = ext4_get_encryption_info(inode);
489 | -	if (res)
490 | -		return ERR_PTR(res);
491 | -
492 |  	if (ext4_inode_is_fast_symlink(inode)) {
493 |  		caddr = (char *) EXT4_I(inode)->i_data;
494 |  		max_size = sizeof(EXT4_I(inode)->i_data);
495 | 


--------------------------------------------------------------------------------
/old-patches/delalloc-debug:
--------------------------------------------------------------------------------
 1 | ext4: add delalloc debugging
 2 | 
 3 | This adds a file in /proc/fs/ext4/<dev> which when opened for reading,
 4 | will trigger debugging code that dumps a lot of information about
 5 | inodes subject to delayed allocation to the console.
 6 | 
 7 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
 8 | ---
 9 |  fs/ext4/sysfs.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 |  1 file changed, 69 insertions(+)
11 | 
12 | diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
13 | index 9d29723..507d33f 100644
14 | --- a/fs/ext4/sysfs.c
15 | +++ b/fs/ext4/sysfs.c
16 | @@ -371,6 +371,74 @@ static struct kobject ext4_feat = {
17 |  	.kset	= &ext4_kset,
18 |  };
19 |  
20 | +#ifdef CONFIG_EXT4_DEBUG
21 | +static void print_inode_delalloc_info(struct inode *inode)
22 | +{
23 | +	if (!EXT4_I(inode)->i_reserved_data_blocks ||
24 | +	    !EXT4_I(inode)->i_reserved_meta_blocks)
25 | +		return;
26 | +
27 | +	printk(KERN_DEBUG "ino %lu: %u %u\n", inode->i_ino,
28 | +	       EXT4_I(inode)->i_reserved_data_blocks,
29 | +	       EXT4_I(inode)->i_reserved_meta_blocks);
30 | +}
31 | +
32 | +static int debug_delalloc_show(struct seq_file *seq, void *offset)
33 | +{
34 | +	return 0;
35 | +}
36 | +
37 | +static int options_delalloc_debug_open_fs(struct inode *proc_inode,
38 | +					  struct file *file)
39 | +{
40 | +	struct super_block *sb = PDE_DATA(proc_inode);
41 | +	struct ext4_sb_info *sbi = EXT4_SB(sb);
42 | +	struct inode *inode;
43 | +	extern spinlock_t inode_sb_list_lock;
44 | +
45 | +	printk(KERN_DEBUG "EXT4-fs debug delalloc of %s\n", sb->s_id);
46 | +	printk(KERN_DEBUG "EXT4-fs: dirty clusters %lld free clusters %lld\n",
47 | +	       percpu_counter_sum(&sbi->s_dirtyclusters_counter),
48 | +	       percpu_counter_sum(&sbi->s_freeclusters_counter));
49 | +
50 | +#ifndef MODULE
51 | +	spin_lock(&inode_sb_list_lock);
52 | +	if (!list_empty(&sb->s_bdi->wb.b_dirty)) {
53 | +		printk(KERN_DEBUG "s_bdi->wb.b_dirty list:\n");
54 | +		list_for_each_entry(inode, &sb->s_bdi->wb.b_dirty,
55 | +				    i_io_list) {
56 | +			print_inode_delalloc_info(inode);
57 | +		}
58 | +	}
59 | +	if (!list_empty(&sb->s_bdi->wb.b_io)) {
60 | +		printk(KERN_DEBUG "s_bdi->wb.b_io list:\n");
61 | +		list_for_each_entry(inode, &sb->s_bdi->wb.b_io,
62 | +				    i_io_list) {
63 | +			print_inode_delalloc_info(inode);
64 | +		}
65 | +	}
66 | +	if (!list_empty(&sb->s_bdi->wb.b_more_io)) {
67 | +		printk(KERN_DEBUG "s_bdi->wb.b_more_io list:\n");
68 | +		list_for_each_entry(inode, &sb->s_bdi->wb.b_more_io,
69 | +				    i_io_list) {
70 | +			print_inode_delalloc_info(inode);
71 | +		}
72 | +	}
73 | +	spin_unlock(&inode_sb_list_lock);
74 | +	printk(KERN_DEBUG "ext4 debug delalloc done\n");
75 | +#endif
76 | +	return single_open(file, debug_delalloc_show, sb);
77 | +}
78 | +
79 | +static const struct file_operations ext4_seq_delalloc_debug_fops = {
80 | +	.owner = THIS_MODULE,
81 | +	.open = options_delalloc_debug_open_fs,
82 | +	.read = seq_read,
83 | +	.llseek = seq_lseek,
84 | +	.release = single_release,
85 | +};
86 | +#endif
87 | +
88 |  #define PROC_FILE_SHOW_DEFN(name) \
89 |  static int name##_open(struct inode *inode, struct file *file) \
90 |  { \
91 | @@ -398,6 +466,7 @@ static struct ext4_proc_files {
92 |  	PROC_FILE_LIST(options),
93 |  	PROC_FILE_LIST(es_shrinker_info),
94 |  	PROC_FILE_LIST(mb_groups),
95 | +	PROC_FILE_LIST(delalloc_debug),
96 |  	{ NULL, NULL },
97 |  };
98 |  
99 | 


--------------------------------------------------------------------------------
/old-patches/dont-use-io-end-if-not-needed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tytso/ext4-patch-queue/fc40a5a6ccf08de7a0119c7f52759a33909a7177/old-patches/dont-use-io-end-if-not-needed


--------------------------------------------------------------------------------
/old-patches/dump-in-use-buffers:
--------------------------------------------------------------------------------
  1 | Add a ioctl which dumps out all of the in-use buffer heads for a block device
  2 | 
  3 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
  4 | ---
  5 |  block/compat_ioctl.c |    4 ++++
  6 |  block/ioctl.c        |   11 +++++++++++
  7 |  fs/buffer.c          |   40 ++++++++++++++++++++++++++++++++++++++++
  8 |  3 files changed, 55 insertions(+), 0 deletions(-)
  9 | 
 10 | diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
 11 | index 4eb8e9e..2535cb1 100644
 12 | --- a/block/compat_ioctl.c
 13 | +++ b/block/compat_ioctl.c
 14 | @@ -11,6 +11,9 @@
 15 |  #include <linux/types.h>
 16 |  #include <linux/uaccess.h>
 17 |  
 18 | +/* For debugging purposes; see block/ioctl.c */
 19 | +#define BLKDUMPUSEDBUFFERS _IO(0x12,130)
 20 | +
 21 |  static int compat_put_ushort(unsigned long arg, unsigned short val)
 22 |  {
 23 |  	return put_user(val, (unsigned short __user *)compat_ptr(arg));
 24 | @@ -749,6 +752,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 25 |  		return compat_put_int(arg, bdev_alignment_offset(bdev));
 26 |  	case BLKDISCARDZEROES:
 27 |  		return compat_put_uint(arg, bdev_discard_zeroes_data(bdev));
 28 | +	case BLKDUMPUSEDBUFFERS:
 29 |  	case BLKFLSBUF:
 30 |  	case BLKROSET:
 31 |  	case BLKDISCARD:
 32 | diff --git a/block/ioctl.c b/block/ioctl.c
 33 | index be48ea5..8cc002b 100644
 34 | --- a/block/ioctl.c
 35 | +++ b/block/ioctl.c
 36 | @@ -8,6 +8,10 @@
 37 |  #include <linux/blktrace_api.h>
 38 |  #include <asm/uaccess.h>
 39 |  
 40 | +/* For debugging purposes */
 41 | +#define BLKDUMPUSEDBUFFERS _IO(0x12,130)
 42 | +extern void dump_used_buffers(struct block_device *bdev);
 43 | +
 44 |  static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
 45 |  {
 46 |  	struct block_device *bdevp;
 47 | @@ -332,6 +336,13 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 48 |  		ret = blk_trace_ioctl(bdev, cmd, (char __user *) arg);
 49 |  		unlock_kernel();
 50 |  		break;
 51 | +	case BLKDUMPUSEDBUFFERS:
 52 | +		if (!capable(CAP_SYS_ADMIN))
 53 | +			return -EACCES;
 54 | +		dump_used_buffers(bdev);
 55 | +		ret = 0;
 56 | +		break;
 57 | +
 58 |  	default:
 59 |  		ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 60 |  	}
 61 | diff --git a/fs/buffer.c b/fs/buffer.c
 62 | index 6fa5302..8438330 100644
 63 | --- a/fs/buffer.c
 64 | +++ b/fs/buffer.c
 65 | @@ -33,6 +33,7 @@
 66 |  #include <linux/writeback.h>
 67 |  #include <linux/hash.h>
 68 |  #include <linux/suspend.h>
 69 | +#include <linux/pagevec.h>
 70 |  #include <linux/buffer_head.h>
 71 |  #include <linux/task_io_accounting_ops.h>
 72 |  #include <linux/bio.h>
 73 | @@ -300,6 +301,45 @@ static void free_more_memory(void)
 74 |  	}
 75 |  }
 76 |  
 77 | +void dump_used_buffers(struct block_device *bdev)
 78 | +{
 79 | +	struct inode *bd_inode = bdev->bd_inode;
 80 | +	struct address_space *bd_mapping = bd_inode->i_mapping;
 81 | +	struct buffer_head *bh, *head;
 82 | +	struct pagevec pvec;
 83 | +	unsigned long index = 0;
 84 | +	int nr_pages, i, count, total = 0;
 85 | +	char b[BDEVNAME_SIZE];
 86 | +
 87 | +	spin_lock(&bd_mapping->private_lock);
 88 | +	printk(KERN_INFO "Begin dump of block device %s\n", bdevname(bdev, b));
 89 | +	while (1) {
 90 | +		nr_pages = pagevec_lookup(&pvec, bd_mapping, index, PAGEVEC_SIZE);
 91 | +		if (nr_pages == 0)
 92 | +			break;
 93 | +		for (i = 0; i < nr_pages; i++) {
 94 | +			struct page *page = pvec.pages[i];
 95 | +			index = page->index + 1;
 96 | +
 97 | +			if (!page_has_buffers(page))
 98 | +				continue;
 99 | +			bh = head = page_buffers(page);
100 | +			do {
101 | +				count = atomic_read(&bh->b_count);
102 | +				if (count) {
103 | +					printk(KERN_INFO
104 | +					       "buffer in-use: block %Lu count %d\n",
105 | +					       (unsigned long long) bh->b_blocknr, count);
106 | +					total++;
107 | +				}
108 | +				bh = bh->b_this_page;
109 | +			} while (bh != head);
110 | +		}
111 | +	}
112 | +	printk(KERN_INFO "Total number of in-use buffers: %d\n", total);
113 | +	spin_unlock(&bd_mapping->private_lock);
114 | +}
115 | +
116 |  /*
117 |   * I/O completion handler for block_read_full_page() - pages
118 |   * which come unlocked at the end of I/O.
119 | 


--------------------------------------------------------------------------------
/old-patches/include-mpage-functions-into-readpage.c:
--------------------------------------------------------------------------------
  1 | ext4: copy mpage_readpage() and mpage_readpages() fs/ext4/readpage.c
  2 | 
  3 | Move the functions which we need from fs/mpage.c into
  4 | fs/ext4/readpage.c.  This will allow us to proceed with the
  5 | refactorization of these functions and eventual merger with the
  6 | functions in fs/ext4/page_io.c.
  7 | 
  8 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
  9 | ---
 10 |  fs/ext4/readpage.c | 326 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 11 |  1 file changed, 320 insertions(+), 6 deletions(-)
 12 | 
 13 | diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
 14 | index b5249db..3b29da1 100644
 15 | --- a/fs/ext4/readpage.c
 16 | +++ b/fs/ext4/readpage.c
 17 | @@ -23,6 +23,7 @@
 18 |  #include <linux/ratelimit.h>
 19 |  #include <linux/aio.h>
 20 |  #include <linux/bitops.h>
 21 | +#include <linux/cleancache.h>
 22 |  
 23 |  #include "ext4_jbd2.h"
 24 |  #include "xattr.h"
 25 | @@ -30,31 +31,344 @@
 26 |  
 27 |  #include <trace/events/ext4.h>
 28 |  
 29 | -int ext4_readpage(struct file *file, struct page *page)
 30 | +/*
 31 | + * I/O completion handler for multipage BIOs.
 32 | + *
 33 | + * The mpage code never puts partial pages into a BIO (except for end-of-file).
 34 | + * If a page does not map to a contiguous run of blocks then it simply falls
 35 | + * back to block_read_full_page().
 36 | + *
 37 | + * Why is this?  If a page's completion depends on a number of different BIOs
 38 | + * which can complete in any order (or at the same time) then determining the
 39 | + * status of that page is hard.  See end_buffer_async_read() for the details.
 40 | + * There is no point in duplicating all that complexity.
 41 | + */
 42 | +static void mpage_end_io(struct bio *bio, int err)
 43 | +{
 44 | +	struct bio_vec *bv;
 45 | +	int i;
 46 | +
 47 | +	bio_for_each_segment_all(bv, bio, i) {
 48 | +		struct page *page = bv->bv_page;
 49 | +		page_endio(page, bio_data_dir(bio), err);
 50 | +	}
 51 | +
 52 | +	bio_put(bio);
 53 | +}
 54 | +
 55 | +static struct bio *mpage_bio_submit(int rw, struct bio *bio)
 56 | +{
 57 | +	bio->bi_end_io = mpage_end_io;
 58 | +	submit_bio(rw, bio);
 59 | +	return NULL;
 60 | +}
 61 | +
 62 | +static struct bio *
 63 | +mpage_alloc(struct block_device *bdev,
 64 | +		sector_t first_sector, int nr_vecs,
 65 | +		gfp_t gfp_flags)
 66 | +{
 67 | +	struct bio *bio;
 68 | +
 69 | +	bio = bio_alloc(gfp_flags, nr_vecs);
 70 | +
 71 | +	if (bio == NULL && (current->flags & PF_MEMALLOC)) {
 72 | +		while (!bio && (nr_vecs /= 2))
 73 | +			bio = bio_alloc(gfp_flags, nr_vecs);
 74 | +	}
 75 | +
 76 | +	if (bio) {
 77 | +		bio->bi_bdev = bdev;
 78 | +		bio->bi_iter.bi_sector = first_sector;
 79 | +	}
 80 | +	return bio;
 81 | +}
 82 | +
 83 | +/*
 84 | + * support function for mpage_readpages.  The fs supplied get_block might
 85 | + * return an up to date buffer.  This is used to map that buffer into
 86 | + * the page, which allows readpage to avoid triggering a duplicate call
 87 | + * to get_block.
 88 | + *
 89 | + * The idea is to avoid adding buffers to pages that don't already have
 90 | + * them.  So when the buffer is up to date and the page size == block size,
 91 | + * this marks the page up to date instead of adding new buffers.
 92 | + */
 93 | +static void
 94 | +map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
 95 | +{
 96 | +	struct inode *inode = page->mapping->host;
 97 | +	struct buffer_head *page_bh, *head;
 98 | +	int block = 0;
 99 | +
100 | +	if (!page_has_buffers(page)) {
101 | +		/*
102 | +		 * don't make any buffers if there is only one buffer on
103 | +		 * the page and the page just needs to be set up to date
104 | +		 */
105 | +		if (inode->i_blkbits == PAGE_CACHE_SHIFT &&
106 | +		    buffer_uptodate(bh)) {
107 | +			SetPageUptodate(page);
108 | +			return;
109 | +		}
110 | +		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
111 | +	}
112 | +	head = page_buffers(page);
113 | +	page_bh = head;
114 | +	do {
115 | +		if (block == page_block) {
116 | +			page_bh->b_state = bh->b_state;
117 | +			page_bh->b_bdev = bh->b_bdev;
118 | +			page_bh->b_blocknr = bh->b_blocknr;
119 | +			break;
120 | +		}
121 | +		page_bh = page_bh->b_this_page;
122 | +		block++;
123 | +	} while (page_bh != head);
124 | +}
125 | +
126 | +/*
127 | + * This is the worker routine which does all the work of mapping the disk
128 | + * blocks and constructs largest possible bios, submits them for IO if the
129 | + * blocks are not contiguous on the disk.
130 | + *
131 | + * We pass a buffer_head back and forth and use its buffer_mapped() flag to
132 | + * represent the validity of its disk mapping and to decide when to do the next
133 | + * get_block() call.
134 | + */
135 | +static struct bio *
136 | +do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
137 | +		sector_t *last_block_in_bio, struct buffer_head *map_bh,
138 | +		unsigned long *first_logical_block, get_block_t get_block)
139 |  {
140 | -	int ret = -EAGAIN;
141 |  	struct inode *inode = page->mapping->host;
142 | +	const unsigned blkbits = inode->i_blkbits;
143 | +	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
144 | +	const unsigned blocksize = 1 << blkbits;
145 | +	sector_t block_in_file;
146 | +	sector_t last_block;
147 | +	sector_t last_block_in_file;
148 | +	sector_t blocks[MAX_BUF_PER_PAGE];
149 | +	unsigned page_block;
150 | +	unsigned first_hole = blocks_per_page;
151 | +	struct block_device *bdev = NULL;
152 | +	int length;
153 | +	int fully_mapped = 1;
154 | +	unsigned nblocks;
155 | +	unsigned relative_block;
156 | +
157 | +	if (page_has_buffers(page))
158 | +		goto confused;
159 | +
160 | +	block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
161 | +	last_block = block_in_file + nr_pages * blocks_per_page;
162 | +	last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
163 | +	if (last_block > last_block_in_file)
164 | +		last_block = last_block_in_file;
165 | +	page_block = 0;
166 | +
167 | +	/*
168 | +	 * Map blocks using the result from the previous get_blocks call first.
169 | +	 */
170 | +	nblocks = map_bh->b_size >> blkbits;
171 | +	if (buffer_mapped(map_bh) && block_in_file > *first_logical_block &&
172 | +			block_in_file < (*first_logical_block + nblocks)) {
173 | +		unsigned map_offset = block_in_file - *first_logical_block;
174 | +		unsigned last = nblocks - map_offset;
175 | +
176 | +		for (relative_block = 0; ; relative_block++) {
177 | +			if (relative_block == last) {
178 | +				clear_buffer_mapped(map_bh);
179 | +				break;
180 | +			}
181 | +			if (page_block == blocks_per_page)
182 | +				break;
183 | +			blocks[page_block] = map_bh->b_blocknr + map_offset +
184 | +						relative_block;
185 | +			page_block++;
186 | +			block_in_file++;
187 | +		}
188 | +		bdev = map_bh->b_bdev;
189 | +	}
190 | +
191 | +	/*
192 | +	 * Then do more get_blocks calls until we are done with this page.
193 | +	 */
194 | +	map_bh->b_page = page;
195 | +	while (page_block < blocks_per_page) {
196 | +		map_bh->b_state = 0;
197 | +		map_bh->b_size = 0;
198 | +
199 | +		if (block_in_file < last_block) {
200 | +			map_bh->b_size = (last_block-block_in_file) << blkbits;
201 | +			if (get_block(inode, block_in_file, map_bh, 0))
202 | +				goto confused;
203 | +			*first_logical_block = block_in_file;
204 | +		}
205 | +
206 | +		if (!buffer_mapped(map_bh)) {
207 | +			fully_mapped = 0;
208 | +			if (first_hole == blocks_per_page)
209 | +				first_hole = page_block;
210 | +			page_block++;
211 | +			block_in_file++;
212 | +			continue;
213 | +		}
214 | +
215 | +		/* some filesystems will copy data into the page during
216 | +		 * the get_block call, in which case we don't want to
217 | +		 * read it again.  map_buffer_to_page copies the data
218 | +		 * we just collected from get_block into the page's buffers
219 | +		 * so readpage doesn't have to repeat the get_block call
220 | +		 */
221 | +		if (buffer_uptodate(map_bh)) {
222 | +			map_buffer_to_page(page, map_bh, page_block);
223 | +			goto confused;
224 | +		}
225 | +
226 | +		if (first_hole != blocks_per_page)
227 | +			goto confused;		/* hole -> non-hole */
228 | +
229 | +		/* Contiguous blocks? */
230 | +		if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
231 | +			goto confused;
232 | +		nblocks = map_bh->b_size >> blkbits;
233 | +		for (relative_block = 0; ; relative_block++) {
234 | +			if (relative_block == nblocks) {
235 | +				clear_buffer_mapped(map_bh);
236 | +				break;
237 | +			} else if (page_block == blocks_per_page)
238 | +				break;
239 | +			blocks[page_block] = map_bh->b_blocknr+relative_block;
240 | +			page_block++;
241 | +			block_in_file++;
242 | +		}
243 | +		bdev = map_bh->b_bdev;
244 | +	}
245 | +
246 | +	if (first_hole != blocks_per_page) {
247 | +		zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE);
248 | +		if (first_hole == 0) {
249 | +			SetPageUptodate(page);
250 | +			unlock_page(page);
251 | +			goto out;
252 | +		}
253 | +	} else if (fully_mapped) {
254 | +		SetPageMappedToDisk(page);
255 | +	}
256 | +
257 | +	if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
258 | +	    cleancache_get_page(page) == 0) {
259 | +		SetPageUptodate(page);
260 | +		goto confused;
261 | +	}
262 | +
263 | +	/*
264 | +	 * This page will go to BIO.  Do we need to send this BIO off first?
265 | +	 */
266 | +	if (bio && (*last_block_in_bio != blocks[0] - 1))
267 | +		bio = mpage_bio_submit(READ, bio);
268 | +
269 | +alloc_new:
270 | +	if (bio == NULL) {
271 | +		if (first_hole == blocks_per_page) {
272 | +			if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
273 | +								page))
274 | +				goto out;
275 | +		}
276 | +		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
277 | +				min_t(int, nr_pages, bio_get_nr_vecs(bdev)),
278 | +				GFP_KERNEL);
279 | +		if (bio == NULL)
280 | +			goto confused;
281 | +	}
282 | +
283 | +	length = first_hole << blkbits;
284 | +	if (bio_add_page(bio, page, length, 0) < length) {
285 | +		bio = mpage_bio_submit(READ, bio);
286 | +		goto alloc_new;
287 | +	}
288 | +
289 | +	relative_block = block_in_file - *first_logical_block;
290 | +	nblocks = map_bh->b_size >> blkbits;
291 | +	if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
292 | +	    (first_hole != blocks_per_page))
293 | +		bio = mpage_bio_submit(READ, bio);
294 | +	else
295 | +		*last_block_in_bio = blocks[blocks_per_page - 1];
296 | +out:
297 | +	return bio;
298 | +
299 | +confused:
300 | +	if (bio)
301 | +		bio = mpage_bio_submit(READ, bio);
302 | +	if (!PageUptodate(page))
303 | +	        block_read_full_page(page, get_block);
304 | +	else
305 | +		unlock_page(page);
306 | +	goto out;
307 | +}
308 | +
309 | +int ext4_readpage(struct file *file, struct page *page)
310 | +{
311 | +	unsigned long		first_logical_block = 0;
312 | +	struct buffer_head	map_bh;
313 | +	struct inode		*inode = page->mapping->host;
314 | +	struct bio 		*bio = NULL;
315 | +	sector_t		last_block_in_bio = 0;
316 | +	int			ret = -EAGAIN;
317 |  
318 |  	trace_ext4_readpage(page);
319 |  
320 |  	if (ext4_has_inline_data(inode))
321 |  		ret = ext4_readpage_inline(inode, page);
322 |  
323 | -	if (ret == -EAGAIN)
324 | -		return mpage_readpage(page, ext4_get_block);
325 | +	if (ret != -EAGAIN)
326 | +		return ret;
327 |  
328 | -	return ret;
329 | +	map_bh.b_state = 0;
330 | +	map_bh.b_size = 0;
331 | +	bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
332 | +			&map_bh, &first_logical_block, ext4_get_block);
333 | +	if (bio)
334 | +		mpage_bio_submit(READ, bio);
335 | +	return 0;
336 |  }
337 |  
338 |  int ext4_readpages(struct file *file, struct address_space *mapping,
339 |  		   struct list_head *pages, unsigned nr_pages)
340 |  {
341 |  	struct inode *inode = mapping->host;
342 | +	struct bio *bio = NULL;
343 | +	unsigned page_idx;
344 | +	sector_t last_block_in_bio = 0;
345 | +	struct buffer_head map_bh;
346 | +	unsigned long first_logical_block = 0;
347 |  
348 |  	/* If the file has inline data, no need to do readpages. */
349 |  	if (ext4_has_inline_data(inode))
350 |  		return 0;
351 |  
352 | -	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
353 | +	map_bh.b_state = 0;
354 | +	map_bh.b_size = 0;
355 | +	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
356 | +		struct page *page = list_entry(pages->prev, struct page, lru);
357 | +
358 | +		prefetchw(&page->flags);
359 | +		list_del(&page->lru);
360 | +		if (!add_to_page_cache_lru(page, mapping,
361 | +					page->index, GFP_KERNEL)) {
362 | +			bio = do_mpage_readpage(bio, page,
363 | +					nr_pages - page_idx,
364 | +					&last_block_in_bio, &map_bh,
365 | +					&first_logical_block,
366 | +					ext4_get_block);
367 | +		}
368 | +		page_cache_release(page);
369 | +	}
370 | +	BUG_ON(!list_empty(pages));
371 | +	if (bio)
372 | +		mpage_bio_submit(READ, bio);
373 | +	return 0;
374 |  }
375 |  
376 | 


--------------------------------------------------------------------------------
/old-patches/inline-ext4_get_block-into-readpage:
--------------------------------------------------------------------------------
  1 | ext4: call ext4_map_blocks() directly from read_page.c
  2 | 
  3 | Use ext4_map_blocks() directly instead of going through
  4 | ext4_get_block().  This allows us to drop out a lot of generic code
  5 | that was not needed for ext4.
  6 | 
  7 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
  8 | 
  9 | 
 10 | ---
 11 |  fs/ext4/readpage.c | 83 ++++++++++++++++++-----------------------------------------------------------------
 12 |  1 file changed, 18 insertions(+), 65 deletions(-)
 13 | 
 14 | diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
 15 | index 3b29da1..ce3ecc1 100644
 16 | --- a/fs/ext4/readpage.c
 17 | +++ b/fs/ext4/readpage.c
 18 | @@ -85,49 +85,6 @@ mpage_alloc(struct block_device *bdev,
 19 |  }
 20 |  
 21 |  /*
 22 | - * support function for mpage_readpages.  The fs supplied get_block might
 23 | - * return an up to date buffer.  This is used to map that buffer into
 24 | - * the page, which allows readpage to avoid triggering a duplicate call
 25 | - * to get_block.
 26 | - *
 27 | - * The idea is to avoid adding buffers to pages that don't already have
 28 | - * them.  So when the buffer is up to date and the page size == block size,
 29 | - * this marks the page up to date instead of adding new buffers.
 30 | - */
 31 | -static void
 32 | -map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
 33 | -{
 34 | -	struct inode *inode = page->mapping->host;
 35 | -	struct buffer_head *page_bh, *head;
 36 | -	int block = 0;
 37 | -
 38 | -	if (!page_has_buffers(page)) {
 39 | -		/*
 40 | -		 * don't make any buffers if there is only one buffer on
 41 | -		 * the page and the page just needs to be set up to date
 42 | -		 */
 43 | -		if (inode->i_blkbits == PAGE_CACHE_SHIFT &&
 44 | -		    buffer_uptodate(bh)) {
 45 | -			SetPageUptodate(page);
 46 | -			return;
 47 | -		}
 48 | -		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
 49 | -	}
 50 | -	head = page_buffers(page);
 51 | -	page_bh = head;
 52 | -	do {
 53 | -		if (block == page_block) {
 54 | -			page_bh->b_state = bh->b_state;
 55 | -			page_bh->b_bdev = bh->b_bdev;
 56 | -			page_bh->b_blocknr = bh->b_blocknr;
 57 | -			break;
 58 | -		}
 59 | -		page_bh = page_bh->b_this_page;
 60 | -		block++;
 61 | -	} while (page_bh != head);
 62 | -}
 63 | -
 64 | -/*
 65 |   * This is the worker routine which does all the work of mapping the disk
 66 |   * blocks and constructs largest possible bios, submits them for IO if the
 67 |   * blocks are not contiguous on the disk.
 68 | @@ -138,8 +95,8 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
 69 |   */
 70 |  static struct bio *
 71 |  do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 72 | -		sector_t *last_block_in_bio, struct buffer_head *map_bh,
 73 | -		unsigned long *first_logical_block, get_block_t get_block)
 74 | +		  sector_t *last_block_in_bio, struct buffer_head *map_bh,
 75 | +		  unsigned long *first_logical_block)
 76 |  {
 77 |  	struct inode *inode = page->mapping->host;
 78 |  	const unsigned blkbits = inode->i_blkbits;
 79 | @@ -151,7 +108,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 80 |  	sector_t blocks[MAX_BUF_PER_PAGE];
 81 |  	unsigned page_block;
 82 |  	unsigned first_hole = blocks_per_page;
 83 | -	struct block_device *bdev = NULL;
 84 | +	struct block_device *bdev = inode->i_sb->s_bdev;
 85 |  	int length;
 86 |  	int fully_mapped = 1;
 87 |  	unsigned nblocks;
 88 | @@ -188,7 +145,6 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 89 |  			page_block++;
 90 |  			block_in_file++;
 91 |  		}
 92 | -		bdev = map_bh->b_bdev;
 93 |  	}
 94 |  
 95 |  	/*
 96 | @@ -200,9 +156,19 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 97 |  		map_bh->b_size = 0;
 98 |  
 99 |  		if (block_in_file < last_block) {
100 | -			map_bh->b_size = (last_block-block_in_file) << blkbits;
101 | -			if (get_block(inode, block_in_file, map_bh, 0))
102 | +			struct ext4_map_blocks map;
103 | +			int ret;
104 | +
105 | +			map.m_lblk = block_in_file;
106 | +			map.m_len = last_block - block_in_file;
107 | +			ret = ext4_map_blocks(NULL, inode, &map, 0);
108 | +			if (ret < 0)
109 |  				goto confused;
110 | +			map_bh->b_blocknr = map.m_pblk;
111 | +			map_bh->b_bdev = bdev;
112 | +			map_bh->b_size = inode->i_sb->s_blocksize * map.m_len;
113 | +			map_bh->b_state = map.m_flags;
114 | +
115 |  			*first_logical_block = block_in_file;
116 |  		}
117 |  
118 | @@ -215,17 +181,6 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
119 |  			continue;
120 |  		}
121 |  
122 | -		/* some filesystems will copy data into the page during
123 | -		 * the get_block call, in which case we don't want to
124 | -		 * read it again.  map_buffer_to_page copies the data
125 | -		 * we just collected from get_block into the page's buffers
126 | -		 * so readpage doesn't have to repeat the get_block call
127 | -		 */
128 | -		if (buffer_uptodate(map_bh)) {
129 | -			map_buffer_to_page(page, map_bh, page_block);
130 | -			goto confused;
131 | -		}
132 | -
133 |  		if (first_hole != blocks_per_page)
134 |  			goto confused;		/* hole -> non-hole */
135 |  
136 | @@ -243,7 +198,6 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
137 |  			page_block++;
138 |  			block_in_file++;
139 |  		}
140 | -		bdev = map_bh->b_bdev;
141 |  	}
142 |  
143 |  	if (first_hole != blocks_per_page) {
144 | @@ -303,7 +257,7 @@ confused:
145 |  	if (bio)
146 |  		bio = mpage_bio_submit(READ, bio);
147 |  	if (!PageUptodate(page))
148 | -	        block_read_full_page(page, get_block);
149 | +	        block_read_full_page(page, ext4_get_block);
150 |  	else
151 |  		unlock_page(page);
152 |  	goto out;
153 | @@ -329,7 +283,7 @@ int ext4_readpage(struct file *file, struct page *page)
154 |  	map_bh.b_state = 0;
155 |  	map_bh.b_size = 0;
156 |  	bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
157 | -			&map_bh, &first_logical_block, ext4_get_block);
158 | +			&map_bh, &first_logical_block);
159 |  	if (bio)
160 |  		mpage_bio_submit(READ, bio);
161 |  	return 0;
162 | @@ -361,8 +315,7 @@ int ext4_readpages(struct file *file, struct address_space *mapping,
163 |  			bio = do_mpage_readpage(bio, page,
164 |  					nr_pages - page_idx,
165 |  					&last_block_in_bio, &map_bh,
166 | -					&first_logical_block,
167 | -					ext4_get_block);
168 | +					&first_logical_block);
169 |  		}
170 |  		page_cache_release(page);
171 |  	}
172 | 


--------------------------------------------------------------------------------
/old-patches/move-read-page-functions-to-new-file:
--------------------------------------------------------------------------------
  1 | ext4: move ext4_readpage() and ext4_readpages() to their own file
  2 | 
  3 | In preparation for weaning ext4 completely off of fs/mpage.c, move the
  4 | readpage[s] function to their own file.  Eventually we'll probably end
  5 | up moving the writepage[s] function here and renaming this to
  6 | something like read_write_page.c, or some such, but for now, let's
  7 | keep things simple.
  8 | 
  9 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
 10 | ---
 11 |  fs/ext4/Makefile   |  2 +-
 12 |  fs/ext4/ext4.h     |  5 +++++
 13 |  fs/ext4/inode.c    | 29 -----------------------------
 14 |  fs/ext4/readpage.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 15 |  4 files changed, 66 insertions(+), 30 deletions(-)
 16 | 
 17 | diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
 18 | index 0310fec..cd6f50f 100644
 19 | --- a/fs/ext4/Makefile
 20 | +++ b/fs/ext4/Makefile
 21 | @@ -8,7 +8,7 @@ ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
 22 |  		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
 23 |  		ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
 24 |  		mmp.o indirect.o extents_status.o xattr.o xattr_user.o \
 25 | -		xattr_trusted.o inline.o
 26 | +		xattr_trusted.o inline.o readpage.o
 27 |  
 28 |  ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
 29 |  ext4-$(CONFIG_EXT4_FS_SECURITY)		+= xattr_security.o
 30 | diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
 31 | index f70c3fc..5c115ea 100644
 32 | --- a/fs/ext4/ext4.h
 33 | +++ b/fs/ext4/ext4.h
 34 | @@ -2775,6 +2775,11 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
 35 |  			       struct writeback_control *wbc,
 36 |  			       bool keep_towrite);
 37 |  
 38 | +/* readpage.c */
 39 | +extern int ext4_readpage(struct file *file, struct page *page);
 40 | +extern int ext4_readpages(struct file *file, struct address_space *mapping,
 41 | +			  struct list_head *pages, unsigned nr_pages);
 42 | +
 43 |  /* mmp.c */
 44 |  extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
 45 |  
 46 | diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
 47 | index d5dd7d4..b3c7b92 100644
 48 | --- a/fs/ext4/inode.c
 49 | +++ b/fs/ext4/inode.c
 50 | @@ -2798,35 +2798,6 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
 51 |  	return generic_block_bmap(mapping, block, ext4_get_block);
 52 |  }
 53 |  
 54 | -static int ext4_readpage(struct file *file, struct page *page)
 55 | -{
 56 | -	int ret = -EAGAIN;
 57 | -	struct inode *inode = page->mapping->host;
 58 | -
 59 | -	trace_ext4_readpage(page);
 60 | -
 61 | -	if (ext4_has_inline_data(inode))
 62 | -		ret = ext4_readpage_inline(inode, page);
 63 | -
 64 | -	if (ret == -EAGAIN)
 65 | -		return mpage_readpage(page, ext4_get_block);
 66 | -
 67 | -	return ret;
 68 | -}
 69 | -
 70 | -static int
 71 | -ext4_readpages(struct file *file, struct address_space *mapping,
 72 | -		struct list_head *pages, unsigned nr_pages)
 73 | -{
 74 | -	struct inode *inode = mapping->host;
 75 | -
 76 | -	/* If the file has inline data, no need to do readpages. */
 77 | -	if (ext4_has_inline_data(inode))
 78 | -		return 0;
 79 | -
 80 | -	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
 81 | -}
 82 | -
 83 |  static void ext4_invalidatepage(struct page *page, unsigned int offset,
 84 |  				unsigned int length)
 85 |  {
 86 | diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
 87 | new file mode 100644
 88 | index 0000000..b5249db
 89 | --- /dev/null
 90 | +++ b/fs/ext4/readpage.c
 91 | @@ -0,0 +1,60 @@
 92 | +/*
 93 | + *  linux/fs/ext4/readpage.c
 94 | + */
 95 | +
 96 | +#include <linux/fs.h>
 97 | +#include <linux/time.h>
 98 | +#include <linux/jbd2.h>
 99 | +#include <linux/highuid.h>
100 | +#include <linux/pagemap.h>
101 | +#include <linux/quotaops.h>
102 | +#include <linux/string.h>
103 | +#include <linux/buffer_head.h>
104 | +#include <linux/writeback.h>
105 | +#include <linux/pagevec.h>
106 | +#include <linux/mpage.h>
107 | +#include <linux/namei.h>
108 | +#include <linux/uio.h>
109 | +#include <linux/bio.h>
110 | +#include <linux/workqueue.h>
111 | +#include <linux/kernel.h>
112 | +#include <linux/printk.h>
113 | +#include <linux/slab.h>
114 | +#include <linux/ratelimit.h>
115 | +#include <linux/aio.h>
116 | +#include <linux/bitops.h>
117 | +
118 | +#include "ext4_jbd2.h"
119 | +#include "xattr.h"
120 | +#include "acl.h"
121 | +
122 | +#include <trace/events/ext4.h>
123 | +
124 | +int ext4_readpage(struct file *file, struct page *page)
125 | +{
126 | +	int ret = -EAGAIN;
127 | +	struct inode *inode = page->mapping->host;
128 | +
129 | +	trace_ext4_readpage(page);
130 | +
131 | +	if (ext4_has_inline_data(inode))
132 | +		ret = ext4_readpage_inline(inode, page);
133 | +
134 | +	if (ret == -EAGAIN)
135 | +		return mpage_readpage(page, ext4_get_block);
136 | +
137 | +	return ret;
138 | +}
139 | +
140 | +int ext4_readpages(struct file *file, struct address_space *mapping,
141 | +		   struct list_head *pages, unsigned nr_pages)
142 | +{
143 | +	struct inode *inode = mapping->host;
144 | +
145 | +	/* If the file has inline data, no need to do readpages. */
146 | +	if (ext4_has_inline_data(inode))
147 | +		return 0;
148 | +
149 | +	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
150 | +}
151 | +
152 | 


--------------------------------------------------------------------------------
/old-patches/only-call-ext4_truncate-if-there-is-data-to-truncate:
--------------------------------------------------------------------------------
 1 | ext4: in ext4_setattr(), only call ext4_truncate() if there is no data to drop
 2 | 
 3 | If there are no blocks associated with the inode (and no inline data),
 4 | there's no point calling ext4_truncate().  This avoids setting the
 5 | replace-via-truncate hueristic if there is an attempt to truncate a
 6 | file which is already zero-length --- which is something that happens
 7 | in the core dumping code, in case there is an already existing core
 8 | file.  In the comon case, there is not a previous core file, so by not
 9 | enabling the replace-via-truncate hueristic, we can speed up core
10 | dumps.
11 | 
12 | Reported-by: Omar Sandoval <osandov@fb.com>
13 | Signed-off-by: Theodore Ts'o <tytso@mit.edu>
14 | ---
15 |  fs/ext4/inode.c | 2 +-
16 |  1 file changed, 1 insertion(+), 1 deletion(-)
17 | 
18 | diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
19 | index 44ee5d9..cd757f8 100644
20 | --- a/fs/ext4/inode.c
21 | +++ b/fs/ext4/inode.c
22 | @@ -5171,7 +5171,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
23 |  		 * in data=journal mode to make pages freeable.
24 |  		 */
25 |  		truncate_pagecache(inode, inode->i_size);
26 | -		if (shrink)
27 | +		if (shrink && (inode->i_blocks || ext4_has_inline_data(inode)))
28 |  			ext4_truncate(inode);
29 |  		up_write(&EXT4_I(inode)->i_mmap_sem);
30 |  	}
31 | 


--------------------------------------------------------------------------------
/old-patches/series:
--------------------------------------------------------------------------------
 1 | only-call-ext4_truncate-if-there-is-data-to-truncate
 2 | 
 3 | #crypto-rename-ext4_get_encryption_info
 4 | 
 5 | crypto-add-ciphertext_access-mount-option
 6 | crypto-add-ioctls-to-backup-crypto-metadata
 7 |   
 8 | add-encryption-debug-files
 9 | 
10 | # not yet ready
11 | #dont-use-io-end-if-not-needed
12 | 
13 | # not yet ready; patch series so ext4 has has full responsibility
14 | # for ext4_readpage[s] and does not use mpage.
15 | #
16 | #move-read-page-functions-to-new-file
17 | #include-mpage-functions-into-readpage.c
18 | #inline-ext4_get_block-into-readpage
19 | 
20 | add-fallocate-mode-blocking-for-debugging
21 | 
22 | # use-discard-if-possible-in-blkdev_issue_zeroout
23 | add-blkdiscard-ioctl
24 | 
25 | block-dio-during-truncate
26 | 
27 | delalloc-debug
28 | 
29 | # note: this may make things slower...
30 | commit-as-soon-as-possible-after-log_start_commit
31 | 
32 | # Ted's squelch series, still needs work
33 | add-sysfs-bool-support
34 | add-squelch-errors-support
35 | 
36 | # Various disabled patches...
37 | #
38 | #auto-enable-journal_async_commit
39 | #mballoc-allocate-larger-extents
40 | 
41 | # various debugging/benchmarking assists
42 | dump-in-use-buffers
43 | akpm-jbd2-locking-fix
44 | 
45 | 


--------------------------------------------------------------------------------
/old-patches/use-discard-if-possible-in-blkdev_issue_zeroout:
--------------------------------------------------------------------------------
  1 | block: use discard if possible in blkdev_issue_zeroout()
  2 | 
  3 | If the block device supports discards and guarantees that subsequent
  4 | reads will return zeros (sometimes known as DZAT, for Deterministic
  5 | read Zeros After Trim), use this to implement blkdev_issue_zeroout()
  6 | 
  7 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
  8 | ---
  9 |  block/blk-lib.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
 10 |  1 file changed, 62 insertions(+), 10 deletions(-)
 11 | 
 12 | diff --git a/block/blk-lib.c b/block/blk-lib.c
 13 | index 2da76c9..62cbf28 100644
 14 | --- a/block/blk-lib.c
 15 | +++ b/block/blk-lib.c
 16 | @@ -269,6 +269,32 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 17 |  	return ret;
 18 |  }
 19 |  
 20 | +static int issue_zeroout_or_write_same(struct block_device *bdev,
 21 | +				       sector_t sector,
 22 | +				       sector_t nr_sects, gfp_t gfp_mask)
 23 | +{
 24 | +	if (bdev_write_same(bdev)) {
 25 | +		unsigned char bdn[BDEVNAME_SIZE];
 26 | +
 27 | +		if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
 28 | +					     ZERO_PAGE(0)))
 29 | +			return 0;
 30 | +
 31 | +		bdevname(bdev, bdn);
 32 | +		pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
 33 | +	}
 34 | +
 35 | +	return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
 36 | +}
 37 | +
 38 | +/*
 39 | + * Like sector_div except don't modify s.
 40 | + */
 41 | +static unsigned int sector_mod(sector_t s, unsigned int m)
 42 | +{
 43 | +	return sector_div(s, m);
 44 | +}
 45 | +
 46 |  /**
 47 |   * blkdev_issue_zeroout - zero-fill a block range
 48 |   * @bdev:	blockdev to write
 49 | @@ -277,23 +303,49 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 50 |   * @gfp_mask:	memory allocation flags (for bio_alloc)
 51 |   *
 52 |   * Description:
 53 | - *  Generate and issue number of bios with zerofiled pages.
 54 | + *  Issues bios which zeros the requested block range.
 55 |   */
 56 | -
 57 |  int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 58 |  			 sector_t nr_sects, gfp_t gfp_mask)
 59 |  {
 60 | -	if (bdev_write_same(bdev)) {
 61 | -		unsigned char bdn[BDEVNAME_SIZE];
 62 | +	struct request_queue *q = bdev_get_queue(bdev);
 63 | +	unsigned int alignment, granularity;
 64 | +	unsigned int c;
 65 | +	int ret;
 66 |  
 67 | -		if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
 68 | -					     ZERO_PAGE(0)))
 69 | -			return 0;
 70 | +	if (!q)
 71 | +		return -ENXIO;
 72 |  
 73 | -		bdevname(bdev, bdn);
 74 | -		pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
 75 | +	if (!blk_queue_discard(q) || !queue_discard_zeroes_data(q) ||
 76 | +	    q->limits.discard_misaligned)
 77 | +		return issue_zeroout_or_write_same(bdev, sector,
 78 | +
 79 | +						   nr_sects, gfp_mask);
 80 | +
 81 | +	alignment = q->limits.discard_alignment >> 9;
 82 | +	granularity = q->limits.discard_granularity >> 9;
 83 | +
 84 | +	c = sector_mod(granularity + alignment - sector, granularity);
 85 | +	if (c > nr_sects)
 86 | +		c = nr_sects;
 87 | +
 88 | +	if (c) {
 89 | +		int ret = issue_zeroout_or_write_same(bdev, sector,
 90 | +						      c, gfp_mask);
 91 | +		if (ret)
 92 | +			return ret;
 93 | +		nr_sects -= c;
 94 |  	}
 95 | +	if (nr_sects == 0)
 96 | +		return 0;
 97 |  
 98 | -	return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
 99 | +	c = sector_mod(nr_sects, granularity);
100 | +
101 | +	ret = blkdev_issue_discard(bdev, sector, nr_sects - c, gfp_mask, 0);
102 | +	if (ret || c == 0)
103 | +		return ret;
104 | +
105 | +	return issue_zeroout_or_write_same(bdev, sector + nr_sects - c, c,
106 | +					   gfp_mask);
107 |  }
108 |  EXPORT_SYMBOL(blkdev_issue_zeroout);
109 | 


--------------------------------------------------------------------------------
/series:
--------------------------------------------------------------------------------
 1 | # v5.2-rc2
 2 | 
 3 | ####################################################
 4 | # unstable patches
 5 | ####################################################
 6 | 
 7 | stable-boundary
 8 | stable-boundary-undo.patch
 9 | 
10 | # Lazy journalling patches
11 | jbd2-dont-double-bump-transaction-number
12 | journal-superblock-changes
13 | add-journal-no-cleanup-option
14 | add-support-for-log-metadata-block-tracking-in-log
15 | add-indirection-to-metadata-block-read-paths
16 | cleaner
17 | load-jmap-from-journal
18 | disable-writeback
19 | add-ext4-journal-lazy-mount-option
20 | #end lazy journal patches
21 | 
22 | 


--------------------------------------------------------------------------------
/stable-boundary:
--------------------------------------------------------------------------------
 1 | ext4: Stable/Unstable boundary
 2 | 
 3 | From: Theodore Ts'o <tytso@mit.edu>
 4 | 
 5 | This is the boundary between the stable and unstable patches in the
 6 | ext4 patch queue.
 7 | 
 8 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
 9 | ---
10 | 
11 |  fs/ext4/extents.c |    1 +
12 |  1 file changed, 1 insertion(+)
13 | 
14 | 
15 | Index: linux-2.6.26-rc9/fs/ext4/extents.c
16 | ===================================================================
17 | --- linux-2.6.26-rc9.orig/fs/ext4/extents.c	2008-07-11 16:05:13.000000000 -0700
18 | +++ linux-2.6.26-rc9/fs/ext4/extents.c	2008-07-11 16:05:17.000000000 -0700
19 | @@ -27,6 +27,7 @@
20 |   *   - ext4*_error() should be used in some situations
21 |   *   - analyze all BUG()/BUG_ON(), use -EIO where appropriate
22 |   *   - smart tree reduction
23 | + *    stable boundary change
24 |   */
25 |  
26 |  #include <linux/module.h>
27 | 


--------------------------------------------------------------------------------
/stable-boundary-undo.patch:
--------------------------------------------------------------------------------
 1 | ext4: undo the stable boundary patch changes
 2 | 
 3 | From: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
 4 | 
 5 | This helps in applying the series with different type of
 6 | tools that expect a code diff to apply any patch.
 7 | 
 8 | Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
 9 | Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
10 | ---
11 | 
12 |  fs/ext4/extents.c |    1 -
13 |  1 file changed, 1 deletion(-)
14 | 
15 | 
16 | Index: linux-2.6.26-rc9/fs/ext4/extents.c
17 | ===================================================================
18 | --- linux-2.6.26-rc9.orig/fs/ext4/extents.c	2008-07-11 16:05:17.000000000 -0700
19 | +++ linux-2.6.26-rc9/fs/ext4/extents.c	2008-07-11 16:05:17.000000000 -0700
20 | @@ -27,7 +27,6 @@
21 |   *   - ext4*_error() should be used in some situations
22 |   *   - analyze all BUG()/BUG_ON(), use -EIO where appropriate
23 |   *   - smart tree reduction
24 | - *    stable boundary change
25 |   */
26 |  
27 |  #include <linux/module.h>
28 | 


--------------------------------------------------------------------------------
/timestamps:
--------------------------------------------------------------------------------
 1 | touch -d @1421646888 archive
 2 | touch -d @1493511621 old-patches
 3 | touch -d @1543184491 stable-boundary-undo.patch
 4 | touch -d @1543184551 jbd2-dont-double-bump-transaction-number
 5 | touch -d @1543184611 journal-superblock-changes
 6 | touch -d @1543184671 add-journal-no-cleanup-option
 7 | touch -d @1543184731 add-support-for-log-metadata-block-tracking-in-log
 8 | touch -d @1543184791 add-indirection-to-metadata-block-read-paths
 9 | touch -d @1543184851 cleaner
10 | touch -d @1543184911 load-jmap-from-journal
11 | touch -d @1543184971 disable-writeback
12 | touch -d @1543185031 add-ext4-journal-lazy-mount-option
13 | touch -d @1558669179 stable-boundary
14 | touch -d @1558930704 status
15 | touch -d @1558930766 save-patch
16 | touch -d @1558930859 series
17 | touch -d @1558930873 timestamps
18 | 


--------------------------------------------------------------------------------