2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 /** @addtogroup lwext4
34 * @file ext4_journal.c
35 * @brief Journal handle functions
38 #include "ext4_config.h"
39 #include "ext4_types.h"
41 #include "ext4_super.h"
42 #include "ext4_errno.h"
43 #include "ext4_blockdev.h"
44 #include "ext4_crc32c.h"
45 #include "ext4_debug.h"
54 RB_ENTRY(revoke_entry) revoke_node;
58 uint32_t start_trans_id;
59 uint32_t last_trans_id;
60 uint32_t this_trans_id;
61 RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
65 struct recover_info *info;
67 uint32_t this_trans_id;
71 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
73 if (a->block > b->block)
75 else if (a->block < b->block)
80 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
81 jbd_revoke_entry_cmp, static inline)
83 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
84 #define jbd_free_revoke_entry(addr) free(addr)
86 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
88 ext4_fsblk_t *fblock);
90 int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
93 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
96 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
100 offset = fblock * ext4_sb_get_block_size(&fs->sb);
101 return ext4_block_writebytes(fs->bdev, offset, s,
102 EXT4_SUPERBLOCK_SIZE);
105 int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
108 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
111 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
115 offset = fblock * ext4_sb_get_block_size(&fs->sb);
116 return ext4_block_readbytes(fs->bdev, offset, s,
117 EXT4_SUPERBLOCK_SIZE);
120 static bool jbd_verify_sb(struct jbd_sb *sb)
122 struct jbd_bhdr *header = &sb->header;
123 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
126 if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
127 jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
133 static int jbd_write_sb(struct jbd_fs *jbd_fs)
137 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
141 jbd_fs->dirty = false;
146 int jbd_get_fs(struct ext4_fs *fs,
147 struct jbd_fs *jbd_fs)
150 uint32_t journal_ino;
152 memset(jbd_fs, 0, sizeof(struct jbd_fs));
153 journal_ino = ext4_get32(&fs->sb, journal_inode_number);
155 rc = ext4_fs_get_inode_ref(fs,
159 memset(jbd_fs, 0, sizeof(struct jbd_fs));
162 rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
164 memset(jbd_fs, 0, sizeof(struct jbd_fs));
165 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
168 if (!jbd_verify_sb(&jbd_fs->sb)) {
169 memset(jbd_fs, 0, sizeof(struct jbd_fs));
170 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
177 int jbd_put_fs(struct jbd_fs *jbd_fs)
180 rc = jbd_write_sb(jbd_fs);
182 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
186 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
188 ext4_fsblk_t *fblock)
190 int rc = ext4_fs_get_inode_dblk_idx(
198 int jbd_block_get(struct jbd_fs *jbd_fs,
199 struct ext4_block *block,
202 /* TODO: journal device. */
204 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
205 rc = jbd_inode_bmap(jbd_fs, iblock,
210 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
211 rc = ext4_block_get(bdev, block, fblock);
213 ext4_bcache_set_flag(block->buf, BC_FLUSH);
218 int jbd_block_get_noread(struct jbd_fs *jbd_fs,
219 struct ext4_block *block,
222 /* TODO: journal device. */
224 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
225 rc = jbd_inode_bmap(jbd_fs, iblock,
230 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
231 rc = ext4_block_get_noread(bdev, block, fblock);
233 ext4_bcache_set_flag(block->buf, BC_FLUSH);
238 int jbd_block_set(struct jbd_fs *jbd_fs,
239 struct ext4_block *block)
241 return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
246 * helper functions to deal with 32 or 64bit block numbers.
248 int jbd_tag_bytes(struct jbd_fs *jbd_fs)
252 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
253 JBD_FEATURE_INCOMPAT_CSUM_V3))
254 return sizeof(struct jbd_block_tag3);
256 size = sizeof(struct jbd_block_tag);
258 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
259 JBD_FEATURE_INCOMPAT_CSUM_V2))
260 size += sizeof(uint16_t);
262 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
263 JBD_FEATURE_INCOMPAT_64BIT))
266 return size - sizeof(uint32_t);
269 /**@brief: tag information. */
274 uint8_t uuid[UUID_SIZE];
279 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
282 int32_t remain_buf_size,
283 struct tag_info *tag_info)
286 tag_info->tag_bytes = tag_bytes;
287 tag_info->uuid_exist = false;
288 tag_info->last_tag = false;
290 if (remain_buf_size - tag_bytes < 0)
293 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
294 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
295 struct jbd_block_tag3 *tag = __tag;
296 tag_info->block = jbd_get32(tag, blocknr);
297 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
298 JBD_FEATURE_INCOMPAT_64BIT))
300 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
302 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
305 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
306 if (remain_buf_size - tag_bytes < UUID_SIZE)
309 uuid_start = (char *)tag + tag_bytes;
310 tag_info->uuid_exist = true;
311 tag_info->tag_bytes += UUID_SIZE;
312 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
315 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
316 tag_info->last_tag = true;
319 struct jbd_block_tag *tag = __tag;
320 tag_info->block = jbd_get32(tag, blocknr);
321 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
322 JBD_FEATURE_INCOMPAT_64BIT))
324 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
326 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
329 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
330 if (remain_buf_size - tag_bytes < UUID_SIZE)
333 uuid_start = (char *)tag + tag_bytes;
334 tag_info->uuid_exist = true;
335 tag_info->tag_bytes += UUID_SIZE;
336 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
339 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
340 tag_info->last_tag = true;
347 jbd_write_block_tag(struct jbd_fs *jbd_fs,
349 int32_t remain_buf_size,
350 struct tag_info *tag_info)
353 int tag_bytes = jbd_tag_bytes(jbd_fs);
355 tag_info->tag_bytes = tag_bytes;
357 if (remain_buf_size - tag_bytes < 0)
360 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
361 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
362 struct jbd_block_tag3 *tag = __tag;
363 jbd_set32(tag, blocknr, tag_info->block);
364 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
365 JBD_FEATURE_INCOMPAT_64BIT))
366 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
368 if (tag_info->uuid_exist) {
369 if (remain_buf_size - tag_bytes < UUID_SIZE)
372 uuid_start = (char *)tag + tag_bytes;
373 tag_info->tag_bytes += UUID_SIZE;
374 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
376 jbd_set32(tag, flags,
377 jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
379 if (tag_info->last_tag)
380 jbd_set32(tag, flags,
381 jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
384 struct jbd_block_tag *tag = __tag;
385 jbd_set32(tag, blocknr, tag_info->block);
386 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
387 JBD_FEATURE_INCOMPAT_64BIT))
388 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
390 if (tag_info->uuid_exist) {
391 if (remain_buf_size - tag_bytes < UUID_SIZE)
394 uuid_start = (char *)tag + tag_bytes;
395 tag_info->tag_bytes += UUID_SIZE;
396 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
398 jbd_set16(tag, flags,
399 jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
401 if (tag_info->last_tag)
402 jbd_set16(tag, flags,
403 jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
410 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
412 int32_t tag_tbl_size,
413 void (*func)(struct jbd_fs * jbd_fs,
419 char *tag_start, *tag_ptr;
420 int tag_bytes = jbd_tag_bytes(jbd_fs);
421 tag_start = __tag_start;
424 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
425 JBD_FEATURE_INCOMPAT_CSUM_V2) ||
426 JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
427 JBD_FEATURE_INCOMPAT_CSUM_V3))
428 tag_tbl_size -= sizeof(struct jbd_block_tail);
430 while (tag_tbl_size) {
431 struct tag_info tag_info;
432 int rc = jbd_extract_block_tag(jbd_fs,
441 func(jbd_fs, tag_info.block, tag_info.uuid, arg);
443 if (tag_info.last_tag)
446 tag_ptr += tag_info.tag_bytes;
447 tag_tbl_size -= tag_info.tag_bytes;
451 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
456 uint32_t *iblock = arg;
457 ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
464 static struct revoke_entry *
465 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
467 struct revoke_entry tmp = {
471 return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
474 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
476 uint8_t *uuid __unused,
480 struct replay_arg *arg = __arg;
481 struct recover_info *info = arg->info;
482 uint32_t *this_block = arg->this_block;
483 struct revoke_entry *revoke_entry;
484 struct ext4_block journal_block, ext4_block;
485 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
489 revoke_entry = jbd_revoke_entry_lookup(info, block);
491 arg->this_trans_id < revoke_entry->trans_id)
495 "Replaying block in block_tag: %" PRIu64 "\n",
498 r = jbd_block_get(jbd_fs, &journal_block, *this_block);
503 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
505 jbd_block_set(jbd_fs, &journal_block);
509 memcpy(ext4_block.data,
511 jbd_get32(&jbd_fs->sb, blocksize));
513 ext4_bcache_set_dirty(ext4_block.buf);
514 ext4_block_set(fs->bdev, &ext4_block);
516 uint16_t mount_count, state;
517 mount_count = ext4_get16(&fs->sb, mount_count);
518 state = ext4_get16(&fs->sb, state);
521 journal_block.data + EXT4_SUPERBLOCK_OFFSET,
522 EXT4_SUPERBLOCK_SIZE);
524 /* Mark system as mounted */
525 ext4_set16(&fs->sb, state, state);
526 r = ext4_sb_write(fs->bdev, &fs->sb);
530 /*Update mount count*/
531 ext4_set16(&fs->sb, mount_count, mount_count);
534 jbd_block_set(jbd_fs, &journal_block);
539 static void jbd_add_revoke_block_tags(struct recover_info *info,
542 struct revoke_entry *revoke_entry;
544 ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
545 revoke_entry = jbd_revoke_entry_lookup(info, block);
547 revoke_entry->trans_id = info->this_trans_id;
551 revoke_entry = jbd_alloc_revoke_entry();
552 ext4_assert(revoke_entry);
553 revoke_entry->block = block;
554 revoke_entry->trans_id = info->this_trans_id;
555 RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
560 static void jbd_destroy_revoke_tree(struct recover_info *info)
562 while (!RB_EMPTY(&info->revoke_root)) {
563 struct revoke_entry *revoke_entry =
564 RB_MIN(jbd_revoke, &info->revoke_root);
565 ext4_assert(revoke_entry);
566 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
567 jbd_free_revoke_entry(revoke_entry);
571 /* Make sure we wrap around the log correctly! */
572 #define wrap(sb, var) \
574 if (var >= jbd_get32((sb), maxlen)) \
575 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first)); \
578 #define ACTION_SCAN 0
579 #define ACTION_REVOKE 1
580 #define ACTION_RECOVER 2
583 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
584 struct jbd_bhdr *header,
585 struct recover_info *info)
588 struct jbd_revoke_header *revoke_hdr =
589 (struct jbd_revoke_header *)header;
590 uint32_t i, nr_entries, record_len = 4;
591 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
592 JBD_FEATURE_INCOMPAT_64BIT))
595 nr_entries = (jbd_get32(revoke_hdr, count) -
596 sizeof(struct jbd_revoke_header)) /
599 blocks_entry = (char *)(revoke_hdr + 1);
601 for (i = 0;i < nr_entries;i++) {
602 if (record_len == 8) {
604 (uint64_t *)blocks_entry;
605 jbd_add_revoke_block_tags(info, to_be64(*blocks));
608 (uint32_t *)blocks_entry;
609 jbd_add_revoke_block_tags(info, to_be32(*blocks));
611 blocks_entry += record_len;
615 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
616 struct jbd_bhdr *header,
619 jbd_iterate_block_table(jbd_fs,
621 jbd_get32(&jbd_fs->sb, blocksize) -
622 sizeof(struct jbd_bhdr),
623 jbd_display_block_tags,
627 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
628 struct jbd_bhdr *header,
629 struct replay_arg *arg)
631 jbd_iterate_block_table(jbd_fs,
633 jbd_get32(&jbd_fs->sb, blocksize) -
634 sizeof(struct jbd_bhdr),
635 jbd_replay_block_tags,
639 int jbd_iterate_log(struct jbd_fs *jbd_fs,
640 struct recover_info *info,
644 bool log_end = false;
645 struct jbd_sb *sb = &jbd_fs->sb;
646 uint32_t start_trans_id, this_trans_id;
647 uint32_t start_block, this_block;
649 start_trans_id = this_trans_id = jbd_get32(sb, sequence);
650 start_block = this_block = jbd_get32(sb, start);
652 ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
656 struct ext4_block block;
657 struct jbd_bhdr *header;
658 if (action != ACTION_SCAN)
659 if (this_trans_id > info->last_trans_id) {
664 r = jbd_block_get(jbd_fs, &block, this_block);
668 header = (struct jbd_bhdr *)block.data;
669 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
670 jbd_block_set(jbd_fs, &block);
675 if (jbd_get32(header, sequence) != this_trans_id) {
676 if (action != ACTION_SCAN)
679 jbd_block_set(jbd_fs, &block);
684 switch (jbd_get32(header, blocktype)) {
685 case JBD_DESCRIPTOR_BLOCK:
686 ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
687 "trans_id: %" PRIu32"\n",
688 this_block, this_trans_id);
689 if (action == ACTION_RECOVER) {
690 struct replay_arg replay_arg;
691 replay_arg.info = info;
692 replay_arg.this_block = &this_block;
693 replay_arg.this_trans_id = this_trans_id;
695 jbd_replay_descriptor_block(jbd_fs,
696 header, &replay_arg);
698 jbd_debug_descriptor_block(jbd_fs,
699 header, &this_block);
702 case JBD_COMMIT_BLOCK:
703 ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
704 "trans_id: %" PRIu32"\n",
705 this_block, this_trans_id);
708 case JBD_REVOKE_BLOCK:
709 ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
710 "trans_id: %" PRIu32"\n",
711 this_block, this_trans_id);
712 if (action == ACTION_REVOKE) {
713 info->this_trans_id = this_trans_id;
714 jbd_build_revoke_tree(jbd_fs,
722 jbd_block_set(jbd_fs, &block);
724 wrap(sb, this_block);
725 if (this_block == start_block)
729 ext4_dbg(DEBUG_JBD, "End of journal.\n");
730 if (r == EOK && action == ACTION_SCAN) {
731 info->start_trans_id = start_trans_id;
732 if (this_trans_id > start_trans_id)
733 info->last_trans_id = this_trans_id - 1;
735 info->last_trans_id = this_trans_id;
741 int jbd_recover(struct jbd_fs *jbd_fs)
744 struct recover_info info;
745 struct jbd_sb *sb = &jbd_fs->sb;
749 RB_INIT(&info.revoke_root);
751 r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
755 r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
759 r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
761 uint32_t features_incompatible =
762 ext4_get32(&jbd_fs->inode_ref.fs->sb,
763 features_incompatible);
764 jbd_set32(&jbd_fs->sb, start, 0);
765 features_incompatible &= ~EXT4_FINCOM_RECOVER;
766 ext4_set32(&jbd_fs->inode_ref.fs->sb,
767 features_incompatible,
768 features_incompatible);
769 jbd_fs->dirty = true;
770 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
771 &jbd_fs->inode_ref.fs->sb);
773 jbd_destroy_revoke_tree(&info);
777 void jbd_journal_write_sb(struct jbd_journal *journal)
779 struct jbd_fs *jbd_fs = journal->jbd_fs;
780 jbd_set32(&jbd_fs->sb, start, journal->start);
781 jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
782 jbd_fs->dirty = true;
785 int jbd_journal_start(struct jbd_fs *jbd_fs,
786 struct jbd_journal *journal)
789 uint32_t features_incompatible =
790 ext4_get32(&jbd_fs->inode_ref.fs->sb,
791 features_incompatible);
792 features_incompatible |= EXT4_FINCOM_RECOVER;
793 ext4_set32(&jbd_fs->inode_ref.fs->sb,
794 features_incompatible,
795 features_incompatible);
796 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
797 &jbd_fs->inode_ref.fs->sb);
801 journal->first = jbd_get32(&jbd_fs->sb, first);
802 journal->start = journal->first;
803 journal->last = journal->first;
804 journal->trans_id = 1;
805 journal->alloc_trans_id = 1;
807 journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
809 TAILQ_INIT(&journal->trans_queue);
810 TAILQ_INIT(&journal->cp_queue);
811 journal->jbd_fs = jbd_fs;
812 jbd_journal_write_sb(journal);
813 return jbd_write_sb(jbd_fs);
816 int jbd_journal_stop(struct jbd_journal *journal)
819 struct jbd_fs *jbd_fs = journal->jbd_fs;
820 uint32_t features_incompatible =
821 ext4_get32(&jbd_fs->inode_ref.fs->sb,
822 features_incompatible);
823 features_incompatible &= ~EXT4_FINCOM_RECOVER;
824 ext4_set32(&jbd_fs->inode_ref.fs->sb,
825 features_incompatible,
826 features_incompatible);
827 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
828 &jbd_fs->inode_ref.fs->sb);
833 journal->trans_id = 0;
834 jbd_journal_write_sb(journal);
835 return jbd_write_sb(journal->jbd_fs);
838 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
839 struct jbd_trans *trans)
841 uint32_t start_block = journal->last++;
842 trans->alloc_blocks++;
843 wrap(&journal->jbd_fs->sb, journal->last);
848 jbd_journal_new_trans(struct jbd_journal *journal)
850 struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
854 /* We will assign a trans_id to this transaction,
855 * once it has been committed.*/
856 trans->journal = journal;
861 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
862 struct ext4_buf *buf __unused,
866 int jbd_trans_add_block(struct jbd_trans *trans,
867 struct ext4_block *block)
870 /* We do not need to add those unmodified buffer to
872 if (!ext4_bcache_test_flag(block->buf, BC_DIRTY))
875 buf = calloc(1, sizeof(struct jbd_buf));
881 ext4_bcache_inc_ref(block->buf);
883 block->buf->end_write = jbd_trans_end_write;
884 block->buf->end_write_arg = trans;
887 LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
891 int jbd_trans_revoke_block(struct jbd_trans *trans,
894 struct jbd_revoke_rec *rec =
895 calloc(1, sizeof(struct jbd_revoke_rec));
900 LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
904 void jbd_journal_free_trans(struct jbd_journal *journal,
905 struct jbd_trans *trans,
908 struct jbd_buf *jbd_buf, *tmp;
909 struct jbd_revoke_rec *rec, *tmp2;
910 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
911 LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
914 ext4_block_set(fs->bdev, &jbd_buf->block);
916 LIST_REMOVE(jbd_buf, buf_node);
919 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
921 LIST_REMOVE(rec, revoke_node);
928 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
931 struct jbd_commit_header *header;
932 uint32_t commit_iblock = 0;
933 struct ext4_block commit_block;
934 struct jbd_journal *journal = trans->journal;
936 commit_iblock = jbd_journal_alloc_block(journal, trans);
937 rc = jbd_block_get_noread(journal->jbd_fs,
938 &commit_block, commit_iblock);
942 header = (struct jbd_commit_header *)commit_block.data;
943 jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
944 jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
945 jbd_set32(&header->header, sequence, trans->trans_id);
947 ext4_bcache_set_dirty(commit_block.buf);
948 rc = jbd_block_set(journal->jbd_fs, &commit_block);
955 static int jbd_journal_prepare(struct jbd_journal *journal,
956 struct jbd_trans *trans)
959 int32_t tag_tbl_size;
960 uint32_t desc_iblock = 0;
961 uint32_t data_iblock = 0;
962 char *tag_start = NULL, *tag_ptr = NULL;
963 struct jbd_buf *jbd_buf;
964 struct ext4_block desc_block, data_block;
966 LIST_FOREACH(jbd_buf, &trans->buf_list, buf_node) {
967 struct tag_info tag_info;
968 bool uuid_exist = false;
971 struct jbd_bhdr *bhdr;
972 desc_iblock = jbd_journal_alloc_block(journal, trans);
973 rc = jbd_block_get_noread(journal->jbd_fs,
974 &desc_block, desc_iblock);
978 ext4_bcache_set_dirty(desc_block.buf);
980 bhdr = (struct jbd_bhdr *)desc_block.data;
981 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
982 jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
983 jbd_set32(bhdr, sequence, trans->trans_id);
985 tag_start = (char *)(bhdr + 1);
988 tag_tbl_size = journal->block_size -
989 sizeof(struct jbd_bhdr);
991 if (!trans->start_iblock)
992 trans->start_iblock = desc_iblock;
995 tag_info.block = jbd_buf->block.lb_id;
996 tag_info.uuid_exist = uuid_exist;
997 if (i == trans->data_cnt - 1)
998 tag_info.last_tag = true;
1001 memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1004 rc = jbd_write_block_tag(journal->jbd_fs,
1009 jbd_block_set(journal->jbd_fs, &desc_block);
1014 data_iblock = jbd_journal_alloc_block(journal, trans);
1015 rc = jbd_block_get_noread(journal->jbd_fs,
1016 &data_block, data_iblock);
1020 ext4_bcache_set_dirty(data_block.buf);
1022 memcpy(data_block.data, jbd_buf->block.data,
1023 journal->block_size);
1025 rc = jbd_block_set(journal->jbd_fs, &data_block);
1029 tag_ptr += tag_info.tag_bytes;
1030 tag_tbl_size -= tag_info.tag_bytes;
1034 if (rc == EOK && desc_iblock)
1035 jbd_block_set(journal->jbd_fs, &desc_block);
1041 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1042 struct jbd_trans *trans)
1044 int rc = EOK, i = 0;
1045 int32_t tag_tbl_size;
1046 uint32_t desc_iblock = 0;
1047 char *blocks_entry = NULL;
1048 struct jbd_revoke_rec *rec, *tmp;
1049 struct ext4_block desc_block;
1050 struct jbd_revoke_header *header = NULL;
1051 int32_t record_len = 4;
1053 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1054 JBD_FEATURE_INCOMPAT_64BIT))
1057 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1061 struct jbd_bhdr *bhdr;
1062 desc_iblock = jbd_journal_alloc_block(journal, trans);
1063 rc = jbd_block_get_noread(journal->jbd_fs,
1064 &desc_block, desc_iblock);
1069 ext4_bcache_set_dirty(desc_block.buf);
1071 bhdr = (struct jbd_bhdr *)desc_block.data;
1072 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1073 jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1074 jbd_set32(bhdr, sequence, trans->trans_id);
1076 header = (struct jbd_revoke_header *)bhdr;
1077 blocks_entry = (char *)(header + 1);
1078 tag_tbl_size = journal->block_size -
1079 sizeof(struct jbd_revoke_header);
1081 if (!trans->start_iblock)
1082 trans->start_iblock = desc_iblock;
1086 if (tag_tbl_size < record_len) {
1087 jbd_set32(header, count,
1088 journal->block_size - tag_tbl_size);
1089 jbd_block_set(journal->jbd_fs, &desc_block);
1094 if (record_len == 8) {
1096 (uint64_t *)blocks_entry;
1097 *blocks = to_be64(rec->lba);
1100 (uint32_t *)blocks_entry;
1101 *blocks = to_be32(rec->lba);
1103 blocks_entry += record_len;
1104 tag_tbl_size -= record_len;
1108 if (rc == EOK && desc_iblock) {
1110 jbd_set32(header, count,
1111 journal->block_size - tag_tbl_size);
1113 jbd_block_set(journal->jbd_fs, &desc_block);
1120 jbd_journal_submit_trans(struct jbd_journal *journal,
1121 struct jbd_trans *trans)
1123 TAILQ_INSERT_TAIL(&journal->trans_queue,
1128 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1130 struct jbd_buf *jbd_buf, *tmp;
1131 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1132 LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1134 struct ext4_block block = jbd_buf->block;
1135 ext4_block_set(fs->bdev, &block);
1139 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1140 struct ext4_buf *buf __unused,
1144 struct jbd_trans *trans = arg;
1145 struct jbd_journal *journal = trans->journal;
1146 bool first_in_queue =
1147 trans == TAILQ_FIRST(&journal->cp_queue);
1151 trans->written_cnt++;
1152 if (trans->written_cnt == trans->data_cnt) {
1153 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1155 if (first_in_queue) {
1156 journal->start = trans->start_iblock +
1157 trans->alloc_blocks;
1158 journal->trans_id = trans->trans_id + 1;
1160 jbd_journal_free_trans(journal, trans, false);
1162 if (first_in_queue) {
1163 while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1164 if (!trans->data_cnt) {
1165 TAILQ_REMOVE(&journal->cp_queue,
1168 journal->start = trans->start_iblock +
1169 trans->alloc_blocks;
1170 journal->trans_id = trans->trans_id + 1;
1171 jbd_journal_free_trans(journal,
1174 journal->start = trans->start_iblock;
1175 journal->trans_id = trans->trans_id;
1179 jbd_journal_write_sb(journal);
1180 jbd_write_sb(journal->jbd_fs);
1186 * XXX: one should disable cache writeback first.
1188 void jbd_journal_commit_one(struct jbd_journal *journal)
1191 uint32_t last = journal->last;
1192 struct jbd_trans *trans;
1193 if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
1194 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
1196 trans->trans_id = journal->alloc_trans_id;
1197 rc = jbd_journal_prepare(journal, trans);
1201 rc = jbd_journal_prepare_revoke(journal, trans);
1205 rc = jbd_trans_write_commit_block(trans);
1209 journal->alloc_trans_id++;
1210 if (TAILQ_EMPTY(&journal->cp_queue)) {
1211 if (trans->data_cnt) {
1212 journal->start = trans->start_iblock;
1213 journal->trans_id = trans->trans_id;
1214 jbd_journal_write_sb(journal);
1215 jbd_write_sb(journal->jbd_fs);
1216 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1218 jbd_journal_cp_trans(journal, trans);
1220 journal->start = trans->start_iblock +
1221 trans->alloc_blocks;
1222 journal->trans_id = trans->trans_id + 1;
1223 jbd_journal_write_sb(journal);
1224 jbd_journal_free_trans(journal, trans, false);
1227 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1229 if (trans->data_cnt)
1230 jbd_journal_cp_trans(journal, trans);
1236 journal->last = last;
1237 jbd_journal_free_trans(journal, trans, true);