2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 /** @addtogroup lwext4
34 * @file ext4_journal.c
35 * @brief Journal handle functions
38 #include "ext4_config.h"
39 #include "ext4_types.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32c.h"
46 #include "ext4_debug.h"
52 /**@brief Revoke entry during journal replay.*/
54 /**@brief Block number not to be replayed.*/
57 /**@brief For any transaction id smaller
58 * than trans_id, records of @block
59 * in those transactions should not
63 /**@brief Revoke tree node.*/
64 RB_ENTRY(revoke_entry) revoke_node;
67 /**@brief Valid journal replay information.*/
69 /**@brief Starting transaction id.*/
70 uint32_t start_trans_id;
72 /**@brief Ending transaction id.*/
73 uint32_t last_trans_id;
75 /**@brief Used as internal argument.*/
76 uint32_t this_trans_id;
78 /**@brief RB-Tree storing revoke entries.*/
79 RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
82 /**@brief Journal replay internal arguments.*/
84 /**@brief Journal replay information.*/
85 struct recover_info *info;
87 /**@brief Current block we are on.*/
90 /**@brief Current trans_id we are on.*/
91 uint32_t this_trans_id;
95 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
97 if (a->block > b->block)
99 else if (a->block < b->block)
104 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
105 jbd_revoke_entry_cmp, static inline)
107 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
108 #define jbd_free_revoke_entry(addr) free(addr)
110 /**@brief Write jbd superblock to disk.
111 * @param jbd_fs jbd filesystem
112 * @param s jbd superblock
113 * @return standard error code*/
114 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
117 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
120 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
124 offset = fblock * ext4_sb_get_block_size(&fs->sb);
125 return ext4_block_writebytes(fs->bdev, offset, s,
126 EXT4_SUPERBLOCK_SIZE);
129 /**@brief Read jbd superblock from disk.
130 * @param jbd_fs jbd filesystem
131 * @param s jbd superblock
132 * @return standard error code*/
133 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
136 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
139 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
143 offset = fblock * ext4_sb_get_block_size(&fs->sb);
144 return ext4_block_readbytes(fs->bdev, offset, s,
145 EXT4_SUPERBLOCK_SIZE);
148 /**@brief Verify jbd superblock.
149 * @param sb jbd superblock
150 * @return true if jbd superblock is valid */
151 static bool jbd_verify_sb(struct jbd_sb *sb)
153 struct jbd_bhdr *header = &sb->header;
154 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
157 if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
158 jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
164 /**@brief Write back dirty jbd superblock to disk.
165 * @param jbd_fs jbd filesystem
166 * @return standard error code*/
167 static int jbd_write_sb(struct jbd_fs *jbd_fs)
171 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
175 jbd_fs->dirty = false;
180 /**@brief Get reference to jbd filesystem.
181 * @param fs Filesystem to load journal of
182 * @param jbd_fs jbd filesystem
183 * @return standard error code*/
184 int jbd_get_fs(struct ext4_fs *fs,
185 struct jbd_fs *jbd_fs)
188 uint32_t journal_ino;
190 memset(jbd_fs, 0, sizeof(struct jbd_fs));
191 /* See if there is journal inode on this filesystem.*/
192 /* FIXME: detection on existance ofbkejournal bdev is
194 journal_ino = ext4_get32(&fs->sb, journal_inode_number);
196 rc = ext4_fs_get_inode_ref(fs,
200 memset(jbd_fs, 0, sizeof(struct jbd_fs));
203 rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
205 memset(jbd_fs, 0, sizeof(struct jbd_fs));
206 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
209 if (!jbd_verify_sb(&jbd_fs->sb)) {
210 memset(jbd_fs, 0, sizeof(struct jbd_fs));
211 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
218 /**@brief Put reference of jbd filesystem.
219 * @param jbd_fs jbd filesystem
220 * @return standard error code*/
221 int jbd_put_fs(struct jbd_fs *jbd_fs)
224 rc = jbd_write_sb(jbd_fs);
226 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
230 /**@brief Data block lookup helper.
231 * @param jbd_fs jbd filesystem
232 * @param iblock block index
233 * @param fblock logical block address
234 * @return standard error code*/
235 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
237 ext4_fsblk_t *fblock)
239 int rc = ext4_fs_get_inode_dblk_idx(
247 /**@brief jbd block get function (through cache).
248 * @param jbd_fs jbd filesystem
249 * @param block block descriptor
250 * @param fblock jbd logical block address
251 * @return standard error code*/
252 static int jbd_block_get(struct jbd_fs *jbd_fs,
253 struct ext4_block *block,
256 /* TODO: journal device. */
258 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
260 /* Lookup the logical block address of
262 rc = jbd_inode_bmap(jbd_fs, iblock,
267 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
268 rc = ext4_block_get(bdev, block, fblock);
270 /* If succeeded, mark buffer as BC_FLUSH to indicate
271 * that data should be written to disk immediately.*/
273 ext4_bcache_set_flag(block->buf, BC_FLUSH);
278 /**@brief jbd block get function (through cache, don't read).
279 * @param jbd_fs jbd filesystem
280 * @param block block descriptor
281 * @param fblock jbd logical block address
282 * @return standard error code*/
283 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
284 struct ext4_block *block,
287 /* TODO: journal device. */
289 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
290 rc = jbd_inode_bmap(jbd_fs, iblock,
295 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
296 rc = ext4_block_get_noread(bdev, block, fblock);
298 ext4_bcache_set_flag(block->buf, BC_FLUSH);
303 /**@brief jbd block set procedure (through cache).
304 * @param jbd_fs jbd filesystem
305 * @param block block descriptor
306 * @return standard error code*/
307 static int jbd_block_set(struct jbd_fs *jbd_fs,
308 struct ext4_block *block)
310 return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
314 /**@brief helper functions to calculate
315 * block tag size, not including UUID part.
316 * @param jbd_fs jbd filesystem
317 * @return tag size in bytes*/
318 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
322 /* It is very easy to deal with the case which
323 * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
324 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
325 JBD_FEATURE_INCOMPAT_CSUM_V3))
326 return sizeof(struct jbd_block_tag3);
328 size = sizeof(struct jbd_block_tag);
330 /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
331 * add 2 bytes to size.*/
332 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
333 JBD_FEATURE_INCOMPAT_CSUM_V2))
334 size += sizeof(uint16_t);
336 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
337 JBD_FEATURE_INCOMPAT_64BIT))
340 /* If block number is 4 bytes in size,
341 * minus 4 bytes from size */
342 return size - sizeof(uint32_t);
345 /**@brief Tag information. */
347 /**@brief Tag size in bytes, including UUID part.*/
350 /**@brief block number stored in this tag.*/
353 /**@brief whether UUID part exists or not.*/
356 /**@brief UUID content if UUID part exists.*/
357 uint8_t uuid[UUID_SIZE];
359 /**@brief Is this the last tag? */
363 /**@brief Extract information from a block tag.
364 * @param __tag pointer to the block tag
365 * @param tag_bytes block tag size of this jbd filesystem
366 * @param remaining size in buffer containing the block tag
367 * @param tag_info information of this tag.
368 * @return EOK when succeed, otherwise return EINVAL.*/
370 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
373 int32_t remain_buf_size,
374 struct tag_info *tag_info)
377 tag_info->tag_bytes = tag_bytes;
378 tag_info->uuid_exist = false;
379 tag_info->last_tag = false;
381 /* See whether it is possible to hold a valid block tag.*/
382 if (remain_buf_size - tag_bytes < 0)
385 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
386 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
387 struct jbd_block_tag3 *tag = __tag;
388 tag_info->block = jbd_get32(tag, blocknr);
389 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
390 JBD_FEATURE_INCOMPAT_64BIT))
392 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
394 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
397 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
398 /* See whether it is possible to hold UUID part.*/
399 if (remain_buf_size - tag_bytes < UUID_SIZE)
402 uuid_start = (char *)tag + tag_bytes;
403 tag_info->uuid_exist = true;
404 tag_info->tag_bytes += UUID_SIZE;
405 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
408 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
409 tag_info->last_tag = true;
412 struct jbd_block_tag *tag = __tag;
413 tag_info->block = jbd_get32(tag, blocknr);
414 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
415 JBD_FEATURE_INCOMPAT_64BIT))
417 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
419 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
422 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
423 /* See whether it is possible to hold UUID part.*/
424 if (remain_buf_size - tag_bytes < UUID_SIZE)
427 uuid_start = (char *)tag + tag_bytes;
428 tag_info->uuid_exist = true;
429 tag_info->tag_bytes += UUID_SIZE;
430 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
433 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
434 tag_info->last_tag = true;
440 /**@brief Write information to a block tag.
441 * @param __tag pointer to the block tag
442 * @param remaining size in buffer containing the block tag
443 * @param tag_info information of this tag.
444 * @return EOK when succeed, otherwise return EINVAL.*/
446 jbd_write_block_tag(struct jbd_fs *jbd_fs,
448 int32_t remain_buf_size,
449 struct tag_info *tag_info)
452 int tag_bytes = jbd_tag_bytes(jbd_fs);
454 tag_info->tag_bytes = tag_bytes;
456 /* See whether it is possible to hold a valid block tag.*/
457 if (remain_buf_size - tag_bytes < 0)
460 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
461 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
462 struct jbd_block_tag3 *tag = __tag;
463 memset(tag, 0, sizeof(struct jbd_block_tag3));
464 jbd_set32(tag, blocknr, tag_info->block);
465 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
466 JBD_FEATURE_INCOMPAT_64BIT))
467 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
469 if (tag_info->uuid_exist) {
470 /* See whether it is possible to hold UUID part.*/
471 if (remain_buf_size - tag_bytes < UUID_SIZE)
474 uuid_start = (char *)tag + tag_bytes;
475 tag_info->tag_bytes += UUID_SIZE;
476 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
478 jbd_set32(tag, flags,
479 jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
481 if (tag_info->last_tag)
482 jbd_set32(tag, flags,
483 jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
486 struct jbd_block_tag *tag = __tag;
487 memset(tag, 0, sizeof(struct jbd_block_tag));
488 jbd_set32(tag, blocknr, tag_info->block);
489 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
490 JBD_FEATURE_INCOMPAT_64BIT))
491 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
493 if (tag_info->uuid_exist) {
494 /* See whether it is possible to hold UUID part.*/
495 if (remain_buf_size - tag_bytes < UUID_SIZE)
498 uuid_start = (char *)tag + tag_bytes;
499 tag_info->tag_bytes += UUID_SIZE;
500 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
502 jbd_set16(tag, flags,
503 jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
505 if (tag_info->last_tag)
506 jbd_set16(tag, flags,
507 jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
513 /**@brief Iterate all block tags in a block.
514 * @param jbd_fs jbd filesystem
515 * @param __tag_start pointer to the block
516 * @param tag_tbl_size size of the block
517 * @param func callback routine to indicate that
518 * a block tag is found
519 * @param arg additional argument to be passed to func */
521 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
523 int32_t tag_tbl_size,
524 void (*func)(struct jbd_fs * jbd_fs,
530 char *tag_start, *tag_ptr;
531 int tag_bytes = jbd_tag_bytes(jbd_fs);
532 tag_start = __tag_start;
535 /* Cut off the size of block tail storing checksum. */
536 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
537 JBD_FEATURE_INCOMPAT_CSUM_V2) ||
538 JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
539 JBD_FEATURE_INCOMPAT_CSUM_V3))
540 tag_tbl_size -= sizeof(struct jbd_block_tail);
542 while (tag_tbl_size) {
543 struct tag_info tag_info;
544 int rc = jbd_extract_block_tag(jbd_fs,
553 func(jbd_fs, tag_info.block, tag_info.uuid, arg);
555 /* Stop the iteration when we reach the last tag. */
556 if (tag_info.last_tag)
559 tag_ptr += tag_info.tag_bytes;
560 tag_tbl_size -= tag_info.tag_bytes;
564 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
569 uint32_t *iblock = arg;
570 ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
577 static struct revoke_entry *
578 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
580 struct revoke_entry tmp = {
584 return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
587 /**@brief Replay a block in a transaction.
588 * @param jbd_fs jbd filesystem
589 * @param block block address to be replayed.*/
590 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
592 uint8_t *uuid __unused,
596 struct replay_arg *arg = __arg;
597 struct recover_info *info = arg->info;
598 uint32_t *this_block = arg->this_block;
599 struct revoke_entry *revoke_entry;
600 struct ext4_block journal_block, ext4_block;
601 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
605 /* We replay this block only if the current transaction id
606 * is equal or greater than that in revoke entry.*/
607 revoke_entry = jbd_revoke_entry_lookup(info, block);
609 arg->this_trans_id < revoke_entry->trans_id)
613 "Replaying block in block_tag: %" PRIu64 "\n",
616 r = jbd_block_get(jbd_fs, &journal_block, *this_block);
620 /* We need special treatment for ext4 superblock. */
622 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
624 jbd_block_set(jbd_fs, &journal_block);
628 memcpy(ext4_block.data,
630 jbd_get32(&jbd_fs->sb, blocksize));
632 ext4_bcache_set_dirty(ext4_block.buf);
633 ext4_block_set(fs->bdev, &ext4_block);
635 uint16_t mount_count, state;
636 mount_count = ext4_get16(&fs->sb, mount_count);
637 state = ext4_get16(&fs->sb, state);
640 journal_block.data + EXT4_SUPERBLOCK_OFFSET,
641 EXT4_SUPERBLOCK_SIZE);
643 /* Mark system as mounted */
644 ext4_set16(&fs->sb, state, state);
645 r = ext4_sb_write(fs->bdev, &fs->sb);
649 /*Update mount count*/
650 ext4_set16(&fs->sb, mount_count, mount_count);
653 jbd_block_set(jbd_fs, &journal_block);
658 /**@brief Add block address to revoke tree, along with
659 * its transaction id.
660 * @param info journal replay info
661 * @param block block address to be replayed.*/
662 static void jbd_add_revoke_block_tags(struct recover_info *info,
665 struct revoke_entry *revoke_entry;
667 ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
668 /* If the revoke entry with respect to the block address
669 * exists already, update its transaction id.*/
670 revoke_entry = jbd_revoke_entry_lookup(info, block);
672 revoke_entry->trans_id = info->this_trans_id;
676 revoke_entry = jbd_alloc_revoke_entry();
677 ext4_assert(revoke_entry);
678 revoke_entry->block = block;
679 revoke_entry->trans_id = info->this_trans_id;
680 RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
685 static void jbd_destroy_revoke_tree(struct recover_info *info)
687 while (!RB_EMPTY(&info->revoke_root)) {
688 struct revoke_entry *revoke_entry =
689 RB_MIN(jbd_revoke, &info->revoke_root);
690 ext4_assert(revoke_entry);
691 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
692 jbd_free_revoke_entry(revoke_entry);
696 /* Make sure we wrap around the log correctly! */
697 #define wrap(sb, var) \
699 if (var >= jbd_get32((sb), maxlen)) \
700 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first)); \
703 #define ACTION_SCAN 0
704 #define ACTION_REVOKE 1
705 #define ACTION_RECOVER 2
707 /**@brief Add entries in a revoke block to revoke tree.
708 * @param jbd_fs jbd filesystem
709 * @param header revoke block header
710 * @param recover_info journal replay info*/
711 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
712 struct jbd_bhdr *header,
713 struct recover_info *info)
716 struct jbd_revoke_header *revoke_hdr =
717 (struct jbd_revoke_header *)header;
718 uint32_t i, nr_entries, record_len = 4;
720 /* If we are working on a 64bit jbd filesystem, */
721 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
722 JBD_FEATURE_INCOMPAT_64BIT))
725 nr_entries = (jbd_get32(revoke_hdr, count) -
726 sizeof(struct jbd_revoke_header)) /
729 blocks_entry = (char *)(revoke_hdr + 1);
731 for (i = 0;i < nr_entries;i++) {
732 if (record_len == 8) {
734 (uint64_t *)blocks_entry;
735 jbd_add_revoke_block_tags(info, to_be64(*blocks));
738 (uint32_t *)blocks_entry;
739 jbd_add_revoke_block_tags(info, to_be32(*blocks));
741 blocks_entry += record_len;
745 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
746 struct jbd_bhdr *header,
749 jbd_iterate_block_table(jbd_fs,
751 jbd_get32(&jbd_fs->sb, blocksize) -
752 sizeof(struct jbd_bhdr),
753 jbd_display_block_tags,
757 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
758 struct jbd_bhdr *header,
759 struct replay_arg *arg)
761 jbd_iterate_block_table(jbd_fs,
763 jbd_get32(&jbd_fs->sb, blocksize) -
764 sizeof(struct jbd_bhdr),
765 jbd_replay_block_tags,
769 /**@brief The core routine of journal replay.
770 * @param jbd_fs jbd filesystem
771 * @param recover_info journal replay info
772 * @param action action needed to be taken
773 * @return standard error code*/
774 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
775 struct recover_info *info,
779 bool log_end = false;
780 struct jbd_sb *sb = &jbd_fs->sb;
781 uint32_t start_trans_id, this_trans_id;
782 uint32_t start_block, this_block;
784 /* We start iterating valid blocks in the whole journal.*/
785 start_trans_id = this_trans_id = jbd_get32(sb, sequence);
786 start_block = this_block = jbd_get32(sb, start);
788 ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
792 struct ext4_block block;
793 struct jbd_bhdr *header;
794 /* If we are not scanning for the last
795 * valid transaction in the journal,
796 * we will stop when we reach the end of
798 if (action != ACTION_SCAN)
799 if (this_trans_id > info->last_trans_id) {
804 r = jbd_block_get(jbd_fs, &block, this_block);
808 header = (struct jbd_bhdr *)block.data;
809 /* This block does not have a valid magic number,
810 * so we have reached the end of the journal.*/
811 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
812 jbd_block_set(jbd_fs, &block);
817 /* If the transaction id we found is not expected,
818 * we may have reached the end of the journal.
820 * If we are not scanning the journal, something
821 * bad might have taken place. :-( */
822 if (jbd_get32(header, sequence) != this_trans_id) {
823 if (action != ACTION_SCAN)
826 jbd_block_set(jbd_fs, &block);
831 switch (jbd_get32(header, blocktype)) {
832 case JBD_DESCRIPTOR_BLOCK:
833 ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
834 "trans_id: %" PRIu32"\n",
835 this_block, this_trans_id);
836 if (action == ACTION_RECOVER) {
837 struct replay_arg replay_arg;
838 replay_arg.info = info;
839 replay_arg.this_block = &this_block;
840 replay_arg.this_trans_id = this_trans_id;
842 jbd_replay_descriptor_block(jbd_fs,
843 header, &replay_arg);
845 jbd_debug_descriptor_block(jbd_fs,
846 header, &this_block);
849 case JBD_COMMIT_BLOCK:
850 ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
851 "trans_id: %" PRIu32"\n",
852 this_block, this_trans_id);
853 /* This is the end of a transaction,
854 * we may now proceed to the next transaction.
858 case JBD_REVOKE_BLOCK:
859 ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
860 "trans_id: %" PRIu32"\n",
861 this_block, this_trans_id);
862 if (action == ACTION_REVOKE) {
863 info->this_trans_id = this_trans_id;
864 jbd_build_revoke_tree(jbd_fs,
872 jbd_block_set(jbd_fs, &block);
874 wrap(sb, this_block);
875 if (this_block == start_block)
879 ext4_dbg(DEBUG_JBD, "End of journal.\n");
880 if (r == EOK && action == ACTION_SCAN) {
881 /* We have finished scanning the journal. */
882 info->start_trans_id = start_trans_id;
883 if (this_trans_id > start_trans_id)
884 info->last_trans_id = this_trans_id - 1;
886 info->last_trans_id = this_trans_id;
892 /**@brief Replay journal.
893 * @param jbd_fs jbd filesystem
894 * @return standard error code*/
895 int jbd_recover(struct jbd_fs *jbd_fs)
898 struct recover_info info;
899 struct jbd_sb *sb = &jbd_fs->sb;
903 RB_INIT(&info.revoke_root);
905 r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
909 r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
913 r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
915 /* If we successfully replay the journal,
916 * clear EXT4_FINCOM_RECOVER flag on the
917 * ext4 superblock, and set the start of
919 uint32_t features_incompatible =
920 ext4_get32(&jbd_fs->inode_ref.fs->sb,
921 features_incompatible);
922 jbd_set32(&jbd_fs->sb, start, 0);
923 features_incompatible &= ~EXT4_FINCOM_RECOVER;
924 ext4_set32(&jbd_fs->inode_ref.fs->sb,
925 features_incompatible,
926 features_incompatible);
927 jbd_fs->dirty = true;
928 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
929 &jbd_fs->inode_ref.fs->sb);
931 jbd_destroy_revoke_tree(&info);
935 static void jbd_journal_write_sb(struct jbd_journal *journal)
937 struct jbd_fs *jbd_fs = journal->jbd_fs;
938 jbd_set32(&jbd_fs->sb, start, journal->start);
939 jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
940 jbd_fs->dirty = true;
943 /**@brief Start accessing the journal.
944 * @param jbd_fs jbd filesystem
945 * @param journal current journal session
946 * @return standard error code*/
947 int jbd_journal_start(struct jbd_fs *jbd_fs,
948 struct jbd_journal *journal)
951 uint32_t features_incompatible =
952 ext4_get32(&jbd_fs->inode_ref.fs->sb,
953 features_incompatible);
954 features_incompatible |= EXT4_FINCOM_RECOVER;
955 ext4_set32(&jbd_fs->inode_ref.fs->sb,
956 features_incompatible,
957 features_incompatible);
958 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
959 &jbd_fs->inode_ref.fs->sb);
963 journal->first = jbd_get32(&jbd_fs->sb, first);
964 journal->start = journal->first;
965 journal->last = journal->first;
966 journal->trans_id = 1;
967 journal->alloc_trans_id = 1;
969 journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
971 TAILQ_INIT(&journal->trans_queue);
972 TAILQ_INIT(&journal->cp_queue);
973 journal->jbd_fs = jbd_fs;
974 jbd_journal_write_sb(journal);
975 return jbd_write_sb(jbd_fs);
978 /**@brief Stop accessing the journal.
979 * @param journal current journal session
980 * @return standard error code*/
981 int jbd_journal_stop(struct jbd_journal *journal)
984 struct jbd_fs *jbd_fs = journal->jbd_fs;
985 uint32_t features_incompatible;
987 /* Commit all the transactions to the journal.*/
988 jbd_journal_commit_all(journal);
989 /* Make sure that journalled content have reached
991 ext4_block_cache_flush(jbd_fs->inode_ref.fs->bdev);
993 features_incompatible =
994 ext4_get32(&jbd_fs->inode_ref.fs->sb,
995 features_incompatible);
996 features_incompatible &= ~EXT4_FINCOM_RECOVER;
997 ext4_set32(&jbd_fs->inode_ref.fs->sb,
998 features_incompatible,
999 features_incompatible);
1000 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1001 &jbd_fs->inode_ref.fs->sb);
1006 journal->trans_id = 0;
1007 jbd_journal_write_sb(journal);
1008 return jbd_write_sb(journal->jbd_fs);
1011 /**@brief Allocate a block in the journal.
1012 * @param journal current journal session
1013 * @param trans transaction
1014 * @return allocated block address*/
1015 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1016 struct jbd_trans *trans)
1018 uint32_t start_block;
1020 start_block = journal->last++;
1021 trans->alloc_blocks++;
1022 wrap(&journal->jbd_fs->sb, journal->last);
1024 /* If there is no space left, flush all journalled
1025 * blocks to disk first.*/
1026 if (journal->last == journal->start)
1027 ext4_block_cache_flush(journal->jbd_fs->inode_ref.fs->bdev);
1032 /**@brief Allocate a new transaction
1033 * @param journal current journal session
1034 * @return transaction allocated*/
1036 jbd_journal_new_trans(struct jbd_journal *journal)
1038 struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1042 /* We will assign a trans_id to this transaction,
1043 * once it has been committed.*/
1044 trans->journal = journal;
1049 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1050 struct ext4_buf *buf __unused,
1054 /**@brief Add block to a transaction and gain
1055 * access to it before making any modications.
1056 * @param trans transaction
1057 * @param block block descriptor
1058 * @return standard error code*/
1059 int jbd_trans_add_block(struct jbd_trans *trans,
1060 struct ext4_block *block)
1062 struct jbd_buf *buf;
1063 struct ext4_fs *fs =
1064 trans->journal->jbd_fs->inode_ref.fs;
1066 /* If the buffer has already been modified, we should
1067 * flush dirty data in this buffer to disk.*/
1068 if (ext4_bcache_test_flag(block->buf, BC_DIRTY)) {
1069 /* XXX: i don't want to know whether the call
1070 * succeeds or not. */
1071 ext4_block_flush_buf(fs->bdev, block->buf);
1074 buf = calloc(1, sizeof(struct jbd_buf));
1079 buf->block = *block;
1080 ext4_bcache_inc_ref(block->buf);
1082 /* If the content reach the disk, notify us
1083 * so that we may do a checkpoint. */
1084 block->buf->end_write = jbd_trans_end_write;
1085 block->buf->end_write_arg = buf;
1088 LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
1092 /**@brief Add block to be revoked to a transaction
1093 * @param trans transaction
1094 * @param lba logical block address
1095 * @return standard error code*/
1096 int jbd_trans_revoke_block(struct jbd_trans *trans,
1099 struct jbd_revoke_rec *rec =
1100 calloc(1, sizeof(struct jbd_revoke_rec));
1105 LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1109 /**@brief Free a transaction
1110 * @param journal current journal session
1111 * @param trans transaction
1112 * @param abort discard all the modifications on the block?
1113 * @return standard error code*/
1114 void jbd_journal_free_trans(struct jbd_journal *journal,
1115 struct jbd_trans *trans,
1118 struct jbd_buf *jbd_buf, *tmp;
1119 struct jbd_revoke_rec *rec, *tmp2;
1120 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1121 LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1124 ext4_bcache_clear_dirty(jbd_buf->block.buf);
1125 ext4_block_set(fs->bdev, &jbd_buf->block);
1128 LIST_REMOVE(jbd_buf, buf_node);
1131 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1133 LIST_REMOVE(rec, revoke_node);
1140 /**@brief Write commit block for a transaction
1141 * @param trans transaction
1142 * @return standard error code*/
1143 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1146 struct jbd_commit_header *header;
1147 uint32_t commit_iblock = 0;
1148 struct ext4_block commit_block;
1149 struct jbd_journal *journal = trans->journal;
1151 commit_iblock = jbd_journal_alloc_block(journal, trans);
1152 rc = jbd_block_get_noread(journal->jbd_fs,
1153 &commit_block, commit_iblock);
1157 header = (struct jbd_commit_header *)commit_block.data;
1158 jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1159 jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1160 jbd_set32(&header->header, sequence, trans->trans_id);
1162 ext4_bcache_set_dirty(commit_block.buf);
1163 rc = jbd_block_set(journal->jbd_fs, &commit_block);
1170 /**@brief Write descriptor block for a transaction
1171 * @param journal current journal session
1172 * @param trans transaction
1173 * @return standard error code*/
1174 static int jbd_journal_prepare(struct jbd_journal *journal,
1175 struct jbd_trans *trans)
1177 int rc = EOK, i = 0;
1178 int32_t tag_tbl_size;
1179 uint32_t desc_iblock = 0;
1180 uint32_t data_iblock = 0;
1181 char *tag_start = NULL, *tag_ptr = NULL;
1182 struct jbd_buf *jbd_buf, *tmp;
1183 struct ext4_block desc_block, data_block;
1184 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1186 LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node, tmp) {
1187 struct tag_info tag_info;
1188 bool uuid_exist = false;
1189 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1191 /* The buffer has not been modified, just release
1193 ext4_block_set(fs->bdev, &jbd_buf->block);
1194 LIST_REMOVE(jbd_buf, buf_node);
1200 struct jbd_bhdr *bhdr;
1201 desc_iblock = jbd_journal_alloc_block(journal, trans);
1202 rc = jbd_block_get_noread(journal->jbd_fs,
1203 &desc_block, desc_iblock);
1207 ext4_bcache_set_dirty(desc_block.buf);
1209 bhdr = (struct jbd_bhdr *)desc_block.data;
1210 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1211 jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1212 jbd_set32(bhdr, sequence, trans->trans_id);
1214 tag_start = (char *)(bhdr + 1);
1215 tag_ptr = tag_start;
1217 tag_tbl_size = journal->block_size -
1218 sizeof(struct jbd_bhdr);
1220 if (!trans->start_iblock)
1221 trans->start_iblock = desc_iblock;
1224 tag_info.block = jbd_buf->block.lb_id;
1225 tag_info.uuid_exist = uuid_exist;
1226 if (i == trans->data_cnt - 1)
1227 tag_info.last_tag = true;
1230 memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1233 rc = jbd_write_block_tag(journal->jbd_fs,
1238 jbd_block_set(journal->jbd_fs, &desc_block);
1243 data_iblock = jbd_journal_alloc_block(journal, trans);
1244 rc = jbd_block_get_noread(journal->jbd_fs,
1245 &data_block, data_iblock);
1249 ext4_bcache_set_dirty(data_block.buf);
1251 memcpy(data_block.data, jbd_buf->block.data,
1252 journal->block_size);
1254 rc = jbd_block_set(journal->jbd_fs, &data_block);
1258 tag_ptr += tag_info.tag_bytes;
1259 tag_tbl_size -= tag_info.tag_bytes;
1263 if (rc == EOK && desc_iblock)
1264 jbd_block_set(journal->jbd_fs, &desc_block);
1269 /**@brief Write revoke block for a transaction
1270 * @param journal current journal session
1271 * @param trans transaction
1272 * @return standard error code*/
1274 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1275 struct jbd_trans *trans)
1277 int rc = EOK, i = 0;
1278 int32_t tag_tbl_size;
1279 uint32_t desc_iblock = 0;
1280 char *blocks_entry = NULL;
1281 struct jbd_revoke_rec *rec, *tmp;
1282 struct ext4_block desc_block;
1283 struct jbd_revoke_header *header = NULL;
1284 int32_t record_len = 4;
1286 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1287 JBD_FEATURE_INCOMPAT_64BIT))
1290 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1294 struct jbd_bhdr *bhdr;
1295 desc_iblock = jbd_journal_alloc_block(journal, trans);
1296 rc = jbd_block_get_noread(journal->jbd_fs,
1297 &desc_block, desc_iblock);
1302 ext4_bcache_set_dirty(desc_block.buf);
1304 bhdr = (struct jbd_bhdr *)desc_block.data;
1305 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1306 jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1307 jbd_set32(bhdr, sequence, trans->trans_id);
1309 header = (struct jbd_revoke_header *)bhdr;
1310 blocks_entry = (char *)(header + 1);
1311 tag_tbl_size = journal->block_size -
1312 sizeof(struct jbd_revoke_header);
1314 if (!trans->start_iblock)
1315 trans->start_iblock = desc_iblock;
1319 if (tag_tbl_size < record_len) {
1320 jbd_set32(header, count,
1321 journal->block_size - tag_tbl_size);
1322 jbd_block_set(journal->jbd_fs, &desc_block);
1327 if (record_len == 8) {
1329 (uint64_t *)blocks_entry;
1330 *blocks = to_be64(rec->lba);
1333 (uint32_t *)blocks_entry;
1334 *blocks = to_be32(rec->lba);
1336 blocks_entry += record_len;
1337 tag_tbl_size -= record_len;
1341 if (rc == EOK && desc_iblock) {
1343 jbd_set32(header, count,
1344 journal->block_size - tag_tbl_size);
1346 jbd_block_set(journal->jbd_fs, &desc_block);
1352 /**@brief Submit the transaction to transaction queue.
1353 * @param journal current journal session
1354 * @param trans transaction*/
1356 jbd_journal_submit_trans(struct jbd_journal *journal,
1357 struct jbd_trans *trans)
1359 TAILQ_INSERT_TAIL(&journal->trans_queue,
1364 /**@brief Put references of block descriptors in a transaction.
1365 * @param journal current journal session
1366 * @param trans transaction*/
1367 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1369 struct jbd_buf *jbd_buf, *tmp;
1370 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1371 LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1373 struct ext4_block block = jbd_buf->block;
1374 ext4_block_set(fs->bdev, &block);
1378 /**@brief Update the start block of the journal when
1379 * all the contents in a transaction reach the disk.*/
1380 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1381 struct ext4_buf *buf,
1385 struct jbd_buf *jbd_buf = arg;
1386 struct jbd_trans *trans = jbd_buf->trans;
1387 struct jbd_journal *journal = trans->journal;
1388 bool first_in_queue =
1389 trans == TAILQ_FIRST(&journal->cp_queue);
1393 /* Clear the end_write and end_write_arg fields. */
1394 buf->end_write = NULL;
1395 buf->end_write_arg = NULL;
1397 trans->written_cnt++;
1398 if (trans->written_cnt == trans->data_cnt) {
1399 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1401 if (first_in_queue) {
1402 journal->start = trans->start_iblock +
1403 trans->alloc_blocks;
1404 wrap(&journal->jbd_fs->sb, journal->start);
1405 journal->trans_id = trans->trans_id + 1;
1407 jbd_journal_free_trans(journal, trans, false);
1409 if (first_in_queue) {
1410 while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1411 if (!trans->data_cnt) {
1412 TAILQ_REMOVE(&journal->cp_queue,
1415 journal->start = trans->start_iblock +
1416 trans->alloc_blocks;
1417 wrap(&journal->jbd_fs->sb, journal->start);
1418 journal->trans_id = trans->trans_id + 1;
1419 jbd_journal_free_trans(journal,
1422 journal->start = trans->start_iblock;
1423 wrap(&journal->jbd_fs->sb, journal->start);
1424 journal->trans_id = trans->trans_id;
1428 jbd_journal_write_sb(journal);
1429 jbd_write_sb(journal->jbd_fs);
1434 /**@brief Commit a transaction to the journal immediately.
1435 * @param journal current journal session
1436 * @param trans transaction
1437 * @return standard error code*/
1438 int jbd_journal_commit_trans(struct jbd_journal *journal,
1439 struct jbd_trans *trans)
1442 uint32_t last = journal->last;
1444 trans->trans_id = journal->alloc_trans_id;
1445 rc = jbd_journal_prepare(journal, trans);
1449 rc = jbd_journal_prepare_revoke(journal, trans);
1453 if (LIST_EMPTY(&trans->buf_list) &&
1454 LIST_EMPTY(&trans->revoke_list)) {
1455 /* Since there are no entries in both buffer list
1456 * and revoke entry list, we do not consider trans as
1457 * complete transaction and just return EOK.*/
1458 jbd_journal_free_trans(journal, trans, false);
1462 rc = jbd_trans_write_commit_block(trans);
1466 journal->alloc_trans_id++;
1467 if (TAILQ_EMPTY(&journal->cp_queue)) {
1468 if (trans->data_cnt) {
1469 journal->start = trans->start_iblock;
1470 wrap(&journal->jbd_fs->sb, journal->start);
1471 journal->trans_id = trans->trans_id;
1472 jbd_journal_write_sb(journal);
1473 jbd_write_sb(journal->jbd_fs);
1474 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1476 jbd_journal_cp_trans(journal, trans);
1478 journal->start = trans->start_iblock +
1479 trans->alloc_blocks;
1480 wrap(&journal->jbd_fs->sb, journal->start);
1481 journal->trans_id = trans->trans_id + 1;
1482 jbd_journal_write_sb(journal);
1483 jbd_journal_free_trans(journal, trans, false);
1486 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1488 if (trans->data_cnt)
1489 jbd_journal_cp_trans(journal, trans);
1494 journal->last = last;
1495 jbd_journal_free_trans(journal, trans, true);
1500 /**@brief Commit one transaction on transaction queue
1502 * @param journal current journal session.*/
1503 void jbd_journal_commit_one(struct jbd_journal *journal)
1505 struct jbd_trans *trans;
1507 if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
1508 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
1509 jbd_journal_commit_trans(journal, trans);
1513 /**@brief Commit all the transactions on transaction queue
1515 * @param journal current journal session.*/
1516 void jbd_journal_commit_all(struct jbd_journal *journal)
1518 while (!TAILQ_EMPTY(&journal->trans_queue)) {
1519 jbd_journal_commit_one(journal);