2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 /** @addtogroup lwext4
34 * @file ext4_journal.c
35 * @brief Journal handle functions
38 #include "ext4_config.h"
39 #include "ext4_types.h"
41 #include "ext4_super.h"
42 #include "ext4_errno.h"
43 #include "ext4_blockdev.h"
44 #include "ext4_crc32c.h"
45 #include "ext4_debug.h"
54 RB_ENTRY(revoke_entry) revoke_node;
58 uint32_t start_trans_id;
59 uint32_t last_trans_id;
60 uint32_t this_trans_id;
61 RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
65 struct recover_info *info;
67 uint32_t this_trans_id;
71 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
73 if (a->block > b->block)
75 else if (a->block < b->block)
80 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
81 jbd_revoke_entry_cmp, static inline)
83 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
84 #define jbd_free_revoke_entry(addr) free(addr)
86 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
88 ext4_fsblk_t *fblock);
90 int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
93 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
96 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
100 offset = fblock * ext4_sb_get_block_size(&fs->sb);
101 return ext4_block_writebytes(fs->bdev, offset, s,
102 EXT4_SUPERBLOCK_SIZE);
105 int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
108 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
111 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
115 offset = fblock * ext4_sb_get_block_size(&fs->sb);
116 return ext4_block_readbytes(fs->bdev, offset, s,
117 EXT4_SUPERBLOCK_SIZE);
120 static bool jbd_verify_sb(struct jbd_sb *sb)
122 struct jbd_bhdr *header = &sb->header;
123 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
126 if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
127 jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
133 int jbd_get_fs(struct ext4_fs *fs,
134 struct jbd_fs *jbd_fs)
137 uint32_t journal_ino;
139 memset(jbd_fs, 0, sizeof(struct jbd_fs));
140 journal_ino = ext4_get32(&fs->sb, journal_inode_number);
142 rc = ext4_fs_get_inode_ref(fs,
146 memset(jbd_fs, 0, sizeof(struct jbd_fs));
149 rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
151 memset(jbd_fs, 0, sizeof(struct jbd_fs));
152 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
155 if (!jbd_verify_sb(&jbd_fs->sb)) {
156 memset(jbd_fs, 0, sizeof(struct jbd_fs));
157 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
164 int jbd_put_fs(struct jbd_fs *jbd_fs)
168 jbd_sb_write(jbd_fs, &jbd_fs->sb);
170 rc = ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
174 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
176 ext4_fsblk_t *fblock)
178 int rc = ext4_fs_get_inode_dblk_idx(
186 int jbd_block_get(struct jbd_fs *jbd_fs,
187 struct ext4_block *block,
190 /* TODO: journal device. */
192 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
193 rc = jbd_inode_bmap(jbd_fs, iblock,
198 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
199 rc = ext4_block_get(bdev, block, fblock);
203 int jbd_block_get_noread(struct jbd_fs *jbd_fs,
204 struct ext4_block *block,
207 /* TODO: journal device. */
209 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
210 rc = jbd_inode_bmap(jbd_fs, iblock,
215 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
216 rc = ext4_block_get_noread(bdev, block, fblock);
220 int jbd_block_set(struct jbd_fs *jbd_fs,
221 struct ext4_block *block)
223 return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
228 * helper functions to deal with 32 or 64bit block numbers.
230 int jbd_tag_bytes(struct jbd_fs *jbd_fs)
234 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
235 JBD_FEATURE_INCOMPAT_CSUM_V3))
236 return sizeof(struct jbd_block_tag3);
238 size = sizeof(struct jbd_block_tag);
240 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
241 JBD_FEATURE_INCOMPAT_CSUM_V2))
242 size += sizeof(uint16_t);
244 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
245 JBD_FEATURE_INCOMPAT_64BIT))
248 return size - sizeof(uint32_t);
252 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
263 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
264 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
265 struct jbd_block_tag3 *tag = __tag;
266 *block = jbd_get32(tag, blocknr);
267 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
268 JBD_FEATURE_INCOMPAT_64BIT))
269 *block |= (uint64_t)jbd_get32(tag, blocknr_high) << 32;
271 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
274 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
275 uuid_start = (char *)tag + tag_bytes;
277 memcpy(uuid, uuid_start, UUID_SIZE);
280 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
284 struct jbd_block_tag *tag = __tag;
285 *block = jbd_get32(tag, blocknr);
286 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
287 JBD_FEATURE_INCOMPAT_64BIT))
288 *block |= (uint64_t)jbd_get32(tag, blocknr_high) << 32;
290 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
293 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
294 uuid_start = (char *)tag + tag_bytes;
296 memcpy(uuid, uuid_start, UUID_SIZE);
299 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
306 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
308 uint32_t tag_tbl_size,
309 void (*func)(struct jbd_fs * jbd_fs,
315 ext4_fsblk_t block = 0;
316 uint8_t uuid[UUID_SIZE];
317 char *tag_start, *tag_ptr;
318 uint32_t tag_bytes = jbd_tag_bytes(jbd_fs);
319 tag_start = __tag_start;
322 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
323 JBD_FEATURE_INCOMPAT_CSUM_V2) ||
324 JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
325 JBD_FEATURE_INCOMPAT_CSUM_V3))
326 tag_tbl_size -= sizeof(struct jbd_block_tail);
328 while (tag_ptr - tag_start + tag_bytes <= tag_tbl_size) {
331 jbd_extract_block_tag(jbd_fs,
339 func(jbd_fs, block, uuid, arg);
344 tag_ptr += tag_bytes;
346 tag_ptr += UUID_SIZE;
351 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
356 uint32_t *iblock = arg;
357 ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
364 static struct revoke_entry *
365 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
367 struct revoke_entry tmp = {
371 return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
374 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
376 uint8_t *uuid __unused,
380 struct replay_arg *arg = __arg;
381 struct recover_info *info = arg->info;
382 uint32_t *this_block = arg->this_block;
383 struct revoke_entry *revoke_entry;
384 struct ext4_block journal_block, ext4_block;
385 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
389 revoke_entry = jbd_revoke_entry_lookup(info, block);
391 arg->this_trans_id < revoke_entry->trans_id)
395 "Replaying block in block_tag: %" PRIu64 "\n",
398 r = jbd_block_get(jbd_fs, &journal_block, *this_block);
403 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
405 jbd_block_set(jbd_fs, &journal_block);
409 memcpy(ext4_block.data,
411 jbd_get32(&jbd_fs->sb, blocksize));
413 ext4_bcache_set_dirty(ext4_block.buf);
414 ext4_block_set(fs->bdev, &ext4_block);
416 uint16_t mount_count, state;
417 mount_count = ext4_get16(&fs->sb, mount_count);
418 state = ext4_get16(&fs->sb, state);
421 journal_block.data + EXT4_SUPERBLOCK_OFFSET,
422 EXT4_SUPERBLOCK_SIZE);
424 /* Mark system as mounted */
425 ext4_set16(&fs->sb, state, state);
426 r = ext4_sb_write(fs->bdev, &fs->sb);
430 /*Update mount count*/
431 ext4_set16(&fs->sb, mount_count, mount_count);
434 jbd_block_set(jbd_fs, &journal_block);
439 static void jbd_add_revoke_block_tags(struct recover_info *info,
442 struct revoke_entry *revoke_entry;
444 ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
445 revoke_entry = jbd_revoke_entry_lookup(info, block);
447 revoke_entry->trans_id = info->this_trans_id;
451 revoke_entry = jbd_alloc_revoke_entry();
452 ext4_assert(revoke_entry);
453 revoke_entry->block = block;
454 revoke_entry->trans_id = info->this_trans_id;
455 RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
460 static void jbd_destroy_revoke_tree(struct recover_info *info)
462 while (!RB_EMPTY(&info->revoke_root)) {
463 struct revoke_entry *revoke_entry =
464 RB_MIN(jbd_revoke, &info->revoke_root);
465 ext4_assert(revoke_entry);
466 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
467 jbd_free_revoke_entry(revoke_entry);
471 /* Make sure we wrap around the log correctly! */
472 #define wrap(sb, var) \
474 if (var >= jbd_get32((sb), maxlen)) \
475 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first)); \
478 #define ACTION_SCAN 0
479 #define ACTION_REVOKE 1
480 #define ACTION_RECOVER 2
483 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
484 struct jbd_bhdr *header,
485 struct recover_info *info)
488 struct jbd_revoke_header *revoke_hdr =
489 (struct jbd_revoke_header *)header;
490 uint32_t i, nr_entries, record_len = 4;
491 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
492 JBD_FEATURE_INCOMPAT_64BIT))
495 nr_entries = (jbd_get32(revoke_hdr, count) -
496 sizeof(struct jbd_revoke_header)) /
499 blocks_entry = (char *)(revoke_hdr + 1);
501 for (i = 0;i < nr_entries;i++) {
502 if (record_len == 8) {
504 (uint64_t *)blocks_entry;
505 jbd_add_revoke_block_tags(info, to_be64(*blocks));
508 (uint32_t *)blocks_entry;
509 jbd_add_revoke_block_tags(info, to_be32(*blocks));
511 blocks_entry += record_len;
515 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
516 struct jbd_bhdr *header,
519 jbd_iterate_block_table(jbd_fs,
521 jbd_get32(&jbd_fs->sb, blocksize) -
522 sizeof(struct jbd_bhdr),
523 jbd_display_block_tags,
527 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
528 struct jbd_bhdr *header,
529 struct replay_arg *arg)
531 jbd_iterate_block_table(jbd_fs,
533 jbd_get32(&jbd_fs->sb, blocksize) -
534 sizeof(struct jbd_bhdr),
535 jbd_replay_block_tags,
539 int jbd_iterate_log(struct jbd_fs *jbd_fs,
540 struct recover_info *info,
544 bool log_end = false;
545 struct jbd_sb *sb = &jbd_fs->sb;
546 uint32_t start_trans_id, this_trans_id;
547 uint32_t start_block, this_block;
549 start_trans_id = this_trans_id = jbd_get32(sb, sequence);
550 start_block = this_block = jbd_get32(sb, start);
552 ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
556 struct ext4_block block;
557 struct jbd_bhdr *header;
558 if (action != ACTION_SCAN)
559 if (this_trans_id > info->last_trans_id) {
564 r = jbd_block_get(jbd_fs, &block, this_block);
568 header = (struct jbd_bhdr *)block.data;
569 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
570 jbd_block_set(jbd_fs, &block);
575 if (jbd_get32(header, sequence) != this_trans_id) {
576 if (action != ACTION_SCAN)
579 jbd_block_set(jbd_fs, &block);
584 switch (jbd_get32(header, blocktype)) {
585 case JBD_DESCRIPTOR_BLOCK:
586 ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
587 "trans_id: %" PRIu32"\n",
588 this_block, this_trans_id);
589 if (action == ACTION_RECOVER) {
590 struct replay_arg replay_arg;
591 replay_arg.info = info;
592 replay_arg.this_block = &this_block;
593 replay_arg.this_trans_id = this_trans_id;
595 jbd_replay_descriptor_block(jbd_fs,
596 header, &replay_arg);
598 jbd_debug_descriptor_block(jbd_fs,
599 header, &this_block);
602 case JBD_COMMIT_BLOCK:
603 ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
604 "trans_id: %" PRIu32"\n",
605 this_block, this_trans_id);
608 case JBD_REVOKE_BLOCK:
609 ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
610 "trans_id: %" PRIu32"\n",
611 this_block, this_trans_id);
612 if (action == ACTION_REVOKE) {
613 info->this_trans_id = this_trans_id;
614 jbd_build_revoke_tree(jbd_fs,
622 jbd_block_set(jbd_fs, &block);
624 wrap(sb, this_block);
625 if (this_block == start_block)
629 ext4_dbg(DEBUG_JBD, "End of journal.\n");
630 if (r == EOK && action == ACTION_SCAN) {
631 info->start_trans_id = start_trans_id;
632 if (this_trans_id > start_trans_id)
633 info->last_trans_id = this_trans_id - 1;
635 info->last_trans_id = this_trans_id;
641 int jbd_recover(struct jbd_fs *jbd_fs)
644 struct recover_info info;
645 struct jbd_sb *sb = &jbd_fs->sb;
649 RB_INIT(&info.revoke_root);
651 r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
655 r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
659 r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
661 jbd_set32(&jbd_fs->sb, start, 0);
662 jbd_fs->dirty = true;
664 jbd_destroy_revoke_tree(&info);