2 * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3 * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 /** @addtogroup lwext4
34 * @file ext4_journal.c
35 * @brief Journal handle functions
38 #include "ext4_config.h"
39 #include "ext4_types.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32.h"
46 #include "ext4_debug.h"
51 /**@brief Revoke entry during journal replay.*/
53 /**@brief Block number not to be replayed.*/
56 /**@brief For any transaction id smaller
57 * than trans_id, records of @block
58 * in those transactions should not
62 /**@brief Revoke tree node.*/
63 RB_ENTRY(revoke_entry) revoke_node;
66 /**@brief Valid journal replay information.*/
68 /**@brief Starting transaction id.*/
69 uint32_t start_trans_id;
71 /**@brief Ending transaction id.*/
72 uint32_t last_trans_id;
74 /**@brief Used as internal argument.*/
75 uint32_t this_trans_id;
77 /**@brief No of transactions went through.*/
80 /**@brief RB-Tree storing revoke entries.*/
81 RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
84 /**@brief Journal replay internal arguments.*/
86 /**@brief Journal replay information.*/
87 struct recover_info *info;
89 /**@brief Current block we are on.*/
92 /**@brief Current trans_id we are on.*/
93 uint32_t this_trans_id;
97 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
99 if (a->block > b->block)
101 else if (a->block < b->block)
107 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
111 else if (a->lba < b->lba)
116 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
117 jbd_revoke_entry_cmp, static inline)
118 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
119 jbd_block_rec_cmp, static inline)
121 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
122 #define jbd_free_revoke_entry(addr) free(addr)
124 static int jbd_has_csum(struct jbd_sb *jbd_sb)
126 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
129 if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
135 #if CONFIG_META_CSUM_ENABLE
136 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
138 uint32_t checksum = 0;
140 if (jbd_has_csum(jbd_sb)) {
141 uint32_t orig_checksum = jbd_sb->checksum;
142 jbd_set32(jbd_sb, checksum, 0);
143 /* Calculate crc32c checksum against tho whole superblock */
144 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
145 JBD_SUPERBLOCK_SIZE);
146 jbd_sb->checksum = orig_checksum;
151 #define jbd_sb_csum(...) 0
154 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
156 if (!jbd_has_csum(jbd_sb))
159 jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
162 #if CONFIG_META_CSUM_ENABLE
164 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
166 if (!jbd_has_csum(jbd_sb))
169 return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
172 #define jbd_verify_sb_csum(...) true
175 #if CONFIG_META_CSUM_ENABLE
176 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
177 struct jbd_bhdr *bhdr)
179 uint32_t checksum = 0;
181 if (jbd_has_csum(&jbd_fs->sb)) {
182 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
183 struct jbd_block_tail *tail =
184 (struct jbd_block_tail *)((char *)bhdr + block_size -
185 sizeof(struct jbd_block_tail));
186 uint32_t orig_checksum = tail->checksum;
189 /* First calculate crc32c checksum against fs uuid */
190 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
191 sizeof(jbd_fs->sb.uuid));
192 /* Calculate crc32c checksum against tho whole block */
193 checksum = ext4_crc32c(checksum, bhdr,
195 tail->checksum = orig_checksum;
200 #define jbd_meta_csum(...) 0
203 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
204 struct jbd_bhdr *bhdr)
206 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
207 struct jbd_block_tail *tail = (struct jbd_block_tail *)
208 ((char *)bhdr + block_size -
209 sizeof(struct jbd_block_tail));
210 if (!jbd_has_csum(&jbd_fs->sb))
213 tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
216 #if CONFIG_META_CSUM_ENABLE
218 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
219 struct jbd_bhdr *bhdr)
221 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
222 struct jbd_block_tail *tail = (struct jbd_block_tail *)
223 ((char *)bhdr + block_size -
224 sizeof(struct jbd_block_tail));
225 if (!jbd_has_csum(&jbd_fs->sb))
228 return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
231 #define jbd_verify_meta_csum(...) true
234 #if CONFIG_META_CSUM_ENABLE
235 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
236 struct jbd_commit_header *header)
238 uint32_t checksum = 0;
240 if (jbd_has_csum(&jbd_fs->sb)) {
241 uint32_t orig_checksum_type = header->chksum_type,
242 orig_checksum_size = header->chksum_size,
243 orig_checksum = header->chksum[0];
244 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
245 header->chksum_type = 0;
246 header->chksum_size = 0;
247 header->chksum[0] = 0;
249 /* First calculate crc32c checksum against fs uuid */
250 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
251 sizeof(jbd_fs->sb.uuid));
252 /* Calculate crc32c checksum against tho whole block */
253 checksum = ext4_crc32c(checksum, header,
256 header->chksum_type = orig_checksum_type;
257 header->chksum_size = orig_checksum_size;
258 header->chksum[0] = orig_checksum;
263 #define jbd_commit_csum(...) 0
266 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
267 struct jbd_commit_header *header)
269 if (!jbd_has_csum(&jbd_fs->sb))
272 header->chksum_type = 0;
273 header->chksum_size = 0;
274 header->chksum[0] = jbd_commit_csum(jbd_fs, header);
277 #if CONFIG_META_CSUM_ENABLE
278 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
279 struct jbd_commit_header *header)
281 if (!jbd_has_csum(&jbd_fs->sb))
284 return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
288 #define jbd_verify_commit_csum(...) true
291 #if CONFIG_META_CSUM_ENABLE
293 * NOTE: We only make use of @csum parameter when
294 * JBD_FEATURE_COMPAT_CHECKSUM is enabled.
296 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
300 uint32_t checksum = 0;
302 if (jbd_has_csum(&jbd_fs->sb)) {
303 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
304 /* First calculate crc32c checksum against fs uuid */
305 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
306 sizeof(jbd_fs->sb.uuid));
307 /* Then calculate crc32c checksum against sequence no. */
308 checksum = ext4_crc32c(checksum, &sequence,
310 /* Calculate crc32c checksum against tho whole block */
311 checksum = ext4_crc32c(checksum, buf,
313 } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
314 JBD_FEATURE_COMPAT_CHECKSUM)) {
315 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
316 /* Calculate crc32c checksum against tho whole block */
317 checksum = ext4_crc32(csum, buf,
323 #define jbd_block_csum(...) 0
326 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
329 int ver = jbd_has_csum(&jbd_fs->sb);
334 struct jbd_block_tag *tag = __tag;
335 tag->checksum = (uint16_t)to_be32(checksum);
337 struct jbd_block_tag3 *tag = __tag;
338 tag->checksum = to_be32(checksum);
342 /**@brief Write jbd superblock to disk.
343 * @param jbd_fs jbd filesystem
344 * @param s jbd superblock
345 * @return standard error code*/
346 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
349 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
352 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
357 offset = fblock * ext4_sb_get_block_size(&fs->sb);
358 return ext4_block_writebytes(fs->bdev, offset, s,
359 EXT4_SUPERBLOCK_SIZE);
362 /**@brief Read jbd superblock from disk.
363 * @param jbd_fs jbd filesystem
364 * @param s jbd superblock
365 * @return standard error code*/
366 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
369 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
372 rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
376 offset = fblock * ext4_sb_get_block_size(&fs->sb);
377 return ext4_block_readbytes(fs->bdev, offset, s,
378 EXT4_SUPERBLOCK_SIZE);
381 /**@brief Verify jbd superblock.
382 * @param sb jbd superblock
383 * @return true if jbd superblock is valid */
384 static bool jbd_verify_sb(struct jbd_sb *sb)
386 struct jbd_bhdr *header = &sb->header;
387 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
390 if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
391 jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
394 return jbd_verify_sb_csum(sb);
397 /**@brief Write back dirty jbd superblock to disk.
398 * @param jbd_fs jbd filesystem
399 * @return standard error code*/
400 static int jbd_write_sb(struct jbd_fs *jbd_fs)
404 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
408 jbd_fs->dirty = false;
413 /**@brief Get reference to jbd filesystem.
414 * @param fs Filesystem to load journal of
415 * @param jbd_fs jbd filesystem
416 * @return standard error code*/
417 int jbd_get_fs(struct ext4_fs *fs,
418 struct jbd_fs *jbd_fs)
421 uint32_t journal_ino;
423 memset(jbd_fs, 0, sizeof(struct jbd_fs));
424 /* See if there is journal inode on this filesystem.*/
425 /* FIXME: detection on existance ofbkejournal bdev is
427 journal_ino = ext4_get32(&fs->sb, journal_inode_number);
429 rc = ext4_fs_get_inode_ref(fs,
433 memset(jbd_fs, 0, sizeof(struct jbd_fs));
436 rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
438 memset(jbd_fs, 0, sizeof(struct jbd_fs));
439 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
442 if (!jbd_verify_sb(&jbd_fs->sb)) {
443 memset(jbd_fs, 0, sizeof(struct jbd_fs));
444 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
451 /**@brief Put reference of jbd filesystem.
452 * @param jbd_fs jbd filesystem
453 * @return standard error code*/
454 int jbd_put_fs(struct jbd_fs *jbd_fs)
457 rc = jbd_write_sb(jbd_fs);
459 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
463 /**@brief Data block lookup helper.
464 * @param jbd_fs jbd filesystem
465 * @param iblock block index
466 * @param fblock logical block address
467 * @return standard error code*/
468 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
470 ext4_fsblk_t *fblock)
472 int rc = ext4_fs_get_inode_dblk_idx(
480 /**@brief jbd block get function (through cache).
481 * @param jbd_fs jbd filesystem
482 * @param block block descriptor
483 * @param fblock jbd logical block address
484 * @return standard error code*/
485 static int jbd_block_get(struct jbd_fs *jbd_fs,
486 struct ext4_block *block,
489 /* TODO: journal device. */
491 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
493 /* Lookup the logical block address of
495 rc = jbd_inode_bmap(jbd_fs, iblock,
500 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
501 rc = ext4_block_get(bdev, block, fblock);
503 /* If succeeded, mark buffer as BC_FLUSH to indicate
504 * that data should be written to disk immediately.*/
506 ext4_bcache_set_flag(block->buf, BC_FLUSH);
507 /* As we don't want to occupy too much space
508 * in block cache, we set this buffer BC_TMP.*/
509 ext4_bcache_set_flag(block->buf, BC_TMP);
515 /**@brief jbd block get function (through cache, don't read).
516 * @param jbd_fs jbd filesystem
517 * @param block block descriptor
518 * @param fblock jbd logical block address
519 * @return standard error code*/
520 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
521 struct ext4_block *block,
524 /* TODO: journal device. */
526 ext4_lblk_t iblock = (ext4_lblk_t)fblock;
527 rc = jbd_inode_bmap(jbd_fs, iblock,
532 struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
533 rc = ext4_block_get_noread(bdev, block, fblock);
535 ext4_bcache_set_flag(block->buf, BC_FLUSH);
540 /**@brief jbd block set procedure (through cache).
541 * @param jbd_fs jbd filesystem
542 * @param block block descriptor
543 * @return standard error code*/
544 static int jbd_block_set(struct jbd_fs *jbd_fs,
545 struct ext4_block *block)
547 return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
551 /**@brief helper functions to calculate
552 * block tag size, not including UUID part.
553 * @param jbd_fs jbd filesystem
554 * @return tag size in bytes*/
555 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
559 /* It is very easy to deal with the case which
560 * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
561 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
562 JBD_FEATURE_INCOMPAT_CSUM_V3))
563 return sizeof(struct jbd_block_tag3);
565 size = sizeof(struct jbd_block_tag);
567 /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
568 * add 2 bytes to size.*/
569 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
570 JBD_FEATURE_INCOMPAT_CSUM_V2))
571 size += sizeof(uint16_t);
573 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
574 JBD_FEATURE_INCOMPAT_64BIT))
577 /* If block number is 4 bytes in size,
578 * minus 4 bytes from size */
579 return size - sizeof(uint32_t);
582 /**@brief Tag information. */
584 /**@brief Tag size in bytes, including UUID part.*/
587 /**@brief block number stored in this tag.*/
590 /**@brief whether UUID part exists or not.*/
593 /**@brief UUID content if UUID part exists.*/
594 uint8_t uuid[UUID_SIZE];
596 /**@brief Is this the last tag? */
599 /**@brief crc32c checksum. */
603 /**@brief Extract information from a block tag.
604 * @param __tag pointer to the block tag
605 * @param tag_bytes block tag size of this jbd filesystem
606 * @param remaining size in buffer containing the block tag
607 * @param tag_info information of this tag.
608 * @return EOK when succeed, otherwise return EINVAL.*/
610 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
613 int32_t remain_buf_size,
614 struct tag_info *tag_info)
617 tag_info->tag_bytes = tag_bytes;
618 tag_info->uuid_exist = false;
619 tag_info->last_tag = false;
621 /* See whether it is possible to hold a valid block tag.*/
622 if (remain_buf_size - tag_bytes < 0)
625 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
626 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
627 struct jbd_block_tag3 *tag = __tag;
628 tag_info->block = jbd_get32(tag, blocknr);
629 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
630 JBD_FEATURE_INCOMPAT_64BIT))
632 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
634 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
637 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
638 /* See whether it is possible to hold UUID part.*/
639 if (remain_buf_size - tag_bytes < UUID_SIZE)
642 uuid_start = (char *)tag + tag_bytes;
643 tag_info->uuid_exist = true;
644 tag_info->tag_bytes += UUID_SIZE;
645 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
648 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
649 tag_info->last_tag = true;
652 struct jbd_block_tag *tag = __tag;
653 tag_info->block = jbd_get32(tag, blocknr);
654 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
655 JBD_FEATURE_INCOMPAT_64BIT))
657 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
659 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
662 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
663 /* See whether it is possible to hold UUID part.*/
664 if (remain_buf_size - tag_bytes < UUID_SIZE)
667 uuid_start = (char *)tag + tag_bytes;
668 tag_info->uuid_exist = true;
669 tag_info->tag_bytes += UUID_SIZE;
670 memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
673 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
674 tag_info->last_tag = true;
680 /**@brief Write information to a block tag.
681 * @param __tag pointer to the block tag
682 * @param remaining size in buffer containing the block tag
683 * @param tag_info information of this tag.
684 * @return EOK when succeed, otherwise return EINVAL.*/
686 jbd_write_block_tag(struct jbd_fs *jbd_fs,
688 int32_t remain_buf_size,
689 struct tag_info *tag_info)
692 int tag_bytes = jbd_tag_bytes(jbd_fs);
694 tag_info->tag_bytes = tag_bytes;
696 /* See whether it is possible to hold a valid block tag.*/
697 if (remain_buf_size - tag_bytes < 0)
700 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
701 JBD_FEATURE_INCOMPAT_CSUM_V3)) {
702 struct jbd_block_tag3 *tag = __tag;
703 memset(tag, 0, sizeof(struct jbd_block_tag3));
704 jbd_set32(tag, blocknr, tag_info->block);
705 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
706 JBD_FEATURE_INCOMPAT_64BIT))
707 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
709 if (tag_info->uuid_exist) {
710 /* See whether it is possible to hold UUID part.*/
711 if (remain_buf_size - tag_bytes < UUID_SIZE)
714 uuid_start = (char *)tag + tag_bytes;
715 tag_info->tag_bytes += UUID_SIZE;
716 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
718 jbd_set32(tag, flags,
719 jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
721 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
723 if (tag_info->last_tag)
724 jbd_set32(tag, flags,
725 jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
728 struct jbd_block_tag *tag = __tag;
729 memset(tag, 0, sizeof(struct jbd_block_tag));
730 jbd_set32(tag, blocknr, tag_info->block);
731 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
732 JBD_FEATURE_INCOMPAT_64BIT))
733 jbd_set32(tag, blocknr_high, tag_info->block >> 32);
735 if (tag_info->uuid_exist) {
736 /* See whether it is possible to hold UUID part.*/
737 if (remain_buf_size - tag_bytes < UUID_SIZE)
740 uuid_start = (char *)tag + tag_bytes;
741 tag_info->tag_bytes += UUID_SIZE;
742 memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
744 jbd_set16(tag, flags,
745 jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
747 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
749 if (tag_info->last_tag)
750 jbd_set16(tag, flags,
751 jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
757 /**@brief Iterate all block tags in a block.
758 * @param jbd_fs jbd filesystem
759 * @param __tag_start pointer to the block
760 * @param tag_tbl_size size of the block
761 * @param func callback routine to indicate that
762 * a block tag is found
763 * @param arg additional argument to be passed to func */
765 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
767 int32_t tag_tbl_size,
768 void (*func)(struct jbd_fs * jbd_fs,
774 char *tag_start, *tag_ptr;
775 int tag_bytes = jbd_tag_bytes(jbd_fs);
776 tag_start = __tag_start;
779 /* Cut off the size of block tail storing checksum. */
780 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
781 JBD_FEATURE_INCOMPAT_CSUM_V2) ||
782 JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
783 JBD_FEATURE_INCOMPAT_CSUM_V3))
784 tag_tbl_size -= sizeof(struct jbd_block_tail);
786 while (tag_tbl_size) {
787 struct tag_info tag_info;
788 int rc = jbd_extract_block_tag(jbd_fs,
797 func(jbd_fs, tag_info.block, tag_info.uuid, arg);
799 /* Stop the iteration when we reach the last tag. */
800 if (tag_info.last_tag)
803 tag_ptr += tag_info.tag_bytes;
804 tag_tbl_size -= tag_info.tag_bytes;
808 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
813 uint32_t *iblock = arg;
814 ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
821 static struct revoke_entry *
822 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
824 struct revoke_entry tmp = {
828 return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
831 /**@brief Replay a block in a transaction.
832 * @param jbd_fs jbd filesystem
833 * @param block block address to be replayed.*/
834 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
836 uint8_t *uuid __unused,
840 struct replay_arg *arg = __arg;
841 struct recover_info *info = arg->info;
842 uint32_t *this_block = arg->this_block;
843 struct revoke_entry *revoke_entry;
844 struct ext4_block journal_block, ext4_block;
845 struct ext4_fs *fs = jbd_fs->inode_ref.fs;
849 /* We replay this block only if the current transaction id
850 * is equal or greater than that in revoke entry.*/
851 revoke_entry = jbd_revoke_entry_lookup(info, block);
853 arg->this_trans_id < revoke_entry->trans_id)
857 "Replaying block in block_tag: %" PRIu64 "\n",
860 r = jbd_block_get(jbd_fs, &journal_block, *this_block);
864 /* We need special treatment for ext4 superblock. */
866 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
868 jbd_block_set(jbd_fs, &journal_block);
872 memcpy(ext4_block.data,
874 jbd_get32(&jbd_fs->sb, blocksize));
876 ext4_bcache_set_dirty(ext4_block.buf);
877 ext4_block_set(fs->bdev, &ext4_block);
879 uint16_t mount_count, state;
880 mount_count = ext4_get16(&fs->sb, mount_count);
881 state = ext4_get16(&fs->sb, state);
884 journal_block.data + EXT4_SUPERBLOCK_OFFSET,
885 EXT4_SUPERBLOCK_SIZE);
887 /* Mark system as mounted */
888 ext4_set16(&fs->sb, state, state);
889 r = ext4_sb_write(fs->bdev, &fs->sb);
893 /*Update mount count*/
894 ext4_set16(&fs->sb, mount_count, mount_count);
897 jbd_block_set(jbd_fs, &journal_block);
902 /**@brief Add block address to revoke tree, along with
903 * its transaction id.
904 * @param info journal replay info
905 * @param block block address to be replayed.*/
906 static void jbd_add_revoke_block_tags(struct recover_info *info,
909 struct revoke_entry *revoke_entry;
911 ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
912 /* If the revoke entry with respect to the block address
913 * exists already, update its transaction id.*/
914 revoke_entry = jbd_revoke_entry_lookup(info, block);
916 revoke_entry->trans_id = info->this_trans_id;
920 revoke_entry = jbd_alloc_revoke_entry();
921 ext4_assert(revoke_entry);
922 revoke_entry->block = block;
923 revoke_entry->trans_id = info->this_trans_id;
924 RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
929 static void jbd_destroy_revoke_tree(struct recover_info *info)
931 while (!RB_EMPTY(&info->revoke_root)) {
932 struct revoke_entry *revoke_entry =
933 RB_MIN(jbd_revoke, &info->revoke_root);
934 ext4_assert(revoke_entry);
935 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
936 jbd_free_revoke_entry(revoke_entry);
940 /* Make sure we wrap around the log correctly! */
941 #define wrap(sb, var) \
943 if (var >= jbd_get32((sb), maxlen)) \
944 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first)); \
947 #define ACTION_SCAN 0
948 #define ACTION_REVOKE 1
949 #define ACTION_RECOVER 2
951 /**@brief Add entries in a revoke block to revoke tree.
952 * @param jbd_fs jbd filesystem
953 * @param header revoke block header
954 * @param recover_info journal replay info*/
955 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
956 struct jbd_bhdr *header,
957 struct recover_info *info)
960 struct jbd_revoke_header *revoke_hdr =
961 (struct jbd_revoke_header *)header;
962 uint32_t i, nr_entries, record_len = 4;
964 /* If we are working on a 64bit jbd filesystem, */
965 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
966 JBD_FEATURE_INCOMPAT_64BIT))
969 nr_entries = (jbd_get32(revoke_hdr, count) -
970 sizeof(struct jbd_revoke_header)) /
973 blocks_entry = (char *)(revoke_hdr + 1);
975 for (i = 0;i < nr_entries;i++) {
976 if (record_len == 8) {
978 (uint64_t *)blocks_entry;
979 jbd_add_revoke_block_tags(info, to_be64(*blocks));
982 (uint32_t *)blocks_entry;
983 jbd_add_revoke_block_tags(info, to_be32(*blocks));
985 blocks_entry += record_len;
989 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
990 struct jbd_bhdr *header,
993 jbd_iterate_block_table(jbd_fs,
995 jbd_get32(&jbd_fs->sb, blocksize) -
996 sizeof(struct jbd_bhdr),
997 jbd_display_block_tags,
1001 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
1002 struct jbd_bhdr *header,
1003 struct replay_arg *arg)
1005 jbd_iterate_block_table(jbd_fs,
1007 jbd_get32(&jbd_fs->sb, blocksize) -
1008 sizeof(struct jbd_bhdr),
1009 jbd_replay_block_tags,
1013 /**@brief The core routine of journal replay.
1014 * @param jbd_fs jbd filesystem
1015 * @param recover_info journal replay info
1016 * @param action action needed to be taken
1017 * @return standard error code*/
1018 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1019 struct recover_info *info,
1023 bool log_end = false;
1024 struct jbd_sb *sb = &jbd_fs->sb;
1025 uint32_t start_trans_id, this_trans_id;
1026 uint32_t start_block, this_block;
1028 /* We start iterating valid blocks in the whole journal.*/
1029 start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1030 start_block = this_block = jbd_get32(sb, start);
1031 if (action == ACTION_SCAN)
1032 info->trans_cnt = 0;
1033 else if (!info->trans_cnt)
1036 ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1040 struct ext4_block block;
1041 struct jbd_bhdr *header;
1042 /* If we are not scanning for the last
1043 * valid transaction in the journal,
1044 * we will stop when we reach the end of
1046 if (action != ACTION_SCAN)
1047 if (this_trans_id > info->last_trans_id) {
1052 r = jbd_block_get(jbd_fs, &block, this_block);
1056 header = (struct jbd_bhdr *)block.data;
1057 /* This block does not have a valid magic number,
1058 * so we have reached the end of the journal.*/
1059 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1060 jbd_block_set(jbd_fs, &block);
1065 /* If the transaction id we found is not expected,
1066 * we may have reached the end of the journal.
1068 * If we are not scanning the journal, something
1069 * bad might have taken place. :-( */
1070 if (jbd_get32(header, sequence) != this_trans_id) {
1071 if (action != ACTION_SCAN)
1074 jbd_block_set(jbd_fs, &block);
1079 switch (jbd_get32(header, blocktype)) {
1080 case JBD_DESCRIPTOR_BLOCK:
1081 if (!jbd_verify_meta_csum(jbd_fs, header)) {
1083 DBG_WARN "Descriptor block checksum failed."
1084 "Journal block: %" PRIu32"\n",
1089 ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1090 "trans_id: %" PRIu32"\n",
1091 this_block, this_trans_id);
1092 if (action == ACTION_RECOVER) {
1093 struct replay_arg replay_arg;
1094 replay_arg.info = info;
1095 replay_arg.this_block = &this_block;
1096 replay_arg.this_trans_id = this_trans_id;
1098 jbd_replay_descriptor_block(jbd_fs,
1099 header, &replay_arg);
1101 jbd_debug_descriptor_block(jbd_fs,
1102 header, &this_block);
1105 case JBD_COMMIT_BLOCK:
1106 if (!jbd_verify_commit_csum(jbd_fs,
1107 (struct jbd_commit_header *)header)) {
1109 DBG_WARN "Commit block checksum failed."
1110 "Journal block: %" PRIu32"\n",
1115 ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1116 "trans_id: %" PRIu32"\n",
1117 this_block, this_trans_id);
1118 /* This is the end of a transaction,
1119 * we may now proceed to the next transaction.
1124 case JBD_REVOKE_BLOCK:
1125 if (!jbd_verify_meta_csum(jbd_fs, header)) {
1127 DBG_WARN "Revoke block checksum failed."
1128 "Journal block: %" PRIu32"\n",
1133 ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1134 "trans_id: %" PRIu32"\n",
1135 this_block, this_trans_id);
1136 if (action == ACTION_REVOKE) {
1137 info->this_trans_id = this_trans_id;
1138 jbd_build_revoke_tree(jbd_fs,
1146 jbd_block_set(jbd_fs, &block);
1148 wrap(sb, this_block);
1149 if (this_block == start_block)
1153 ext4_dbg(DEBUG_JBD, "End of journal.\n");
1154 if (r == EOK && action == ACTION_SCAN) {
1155 /* We have finished scanning the journal. */
1156 info->start_trans_id = start_trans_id;
1157 if (this_trans_id > start_trans_id)
1158 info->last_trans_id = this_trans_id - 1;
1160 info->last_trans_id = this_trans_id;
1166 /**@brief Replay journal.
1167 * @param jbd_fs jbd filesystem
1168 * @return standard error code*/
1169 int jbd_recover(struct jbd_fs *jbd_fs)
1172 struct recover_info info;
1173 struct jbd_sb *sb = &jbd_fs->sb;
1177 RB_INIT(&info.revoke_root);
1179 r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1183 r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1187 r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1189 /* If we successfully replay the journal,
1190 * clear EXT4_FINCOM_RECOVER flag on the
1191 * ext4 superblock, and set the start of
1193 uint32_t features_incompatible =
1194 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1195 features_incompatible);
1196 jbd_set32(&jbd_fs->sb, start, 0);
1197 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1198 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1199 features_incompatible,
1200 features_incompatible);
1201 jbd_fs->dirty = true;
1202 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1203 &jbd_fs->inode_ref.fs->sb);
1205 jbd_destroy_revoke_tree(&info);
1209 static void jbd_journal_write_sb(struct jbd_journal *journal)
1211 struct jbd_fs *jbd_fs = journal->jbd_fs;
1212 jbd_set32(&jbd_fs->sb, start, journal->start);
1213 jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1214 jbd_fs->dirty = true;
1217 /**@brief Start accessing the journal.
1218 * @param jbd_fs jbd filesystem
1219 * @param journal current journal session
1220 * @return standard error code*/
1221 int jbd_journal_start(struct jbd_fs *jbd_fs,
1222 struct jbd_journal *journal)
1225 uint32_t features_incompatible =
1226 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1227 features_incompatible);
1228 struct ext4_block block = EXT4_BLOCK_ZERO();
1229 features_incompatible |= EXT4_FINCOM_RECOVER;
1230 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1231 features_incompatible,
1232 features_incompatible);
1233 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1234 &jbd_fs->inode_ref.fs->sb);
1238 journal->first = jbd_get32(&jbd_fs->sb, first);
1239 journal->start = journal->first;
1240 journal->last = journal->first;
1241 journal->trans_id = 1;
1242 journal->alloc_trans_id = 1;
1244 journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1246 r = jbd_block_get_noread(jbd_fs,
1250 memset(journal, 0, sizeof(struct jbd_journal));
1253 memset(block.data, 0, journal->block_size);
1254 ext4_bcache_set_dirty(block.buf);
1255 r = jbd_block_set(jbd_fs, &block);
1257 memset(journal, 0, sizeof(struct jbd_journal));
1261 TAILQ_INIT(&journal->trans_queue);
1262 TAILQ_INIT(&journal->cp_queue);
1263 RB_INIT(&journal->block_rec_root);
1264 journal->jbd_fs = jbd_fs;
1265 jbd_journal_write_sb(journal);
1266 return jbd_write_sb(jbd_fs);
1269 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1270 struct ext4_buf *buf __unused,
1274 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1276 struct jbd_buf *jbd_buf, *tmp;
1277 struct jbd_journal *journal = trans->journal;
1278 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1279 void *tmp_data = malloc(journal->block_size);
1283 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1285 struct ext4_buf *buf = jbd_buf->block_rec->buf;
1286 /* The buffer in memory is still dirty. */
1288 if (jbd_buf->block_rec->trans != trans) {
1290 struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
1291 struct jbd_buf *orig_arg = buf->end_write_arg;
1292 ext4_assert(ext4_block_get(fs->bdev,
1294 jbd_buf->jbd_lba) == EOK);
1295 memcpy(tmp_data, jbd_block.data,
1296 journal->block_size);
1297 ext4_block_set(fs->bdev, &jbd_block);
1298 r = ext4_blocks_set_direct(fs->bdev, tmp_data,
1300 jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
1301 buf->end_write = jbd_trans_end_write;
1302 buf->end_write_arg = orig_arg;
1303 orig_arg->block_rec->buf = buf;
1305 ext4_block_flush_buf(fs->bdev, buf);
1314 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1315 struct jbd_trans *trans)
1317 journal->start = trans->start_iblock +
1318 trans->alloc_blocks;
1319 wrap(&journal->jbd_fs->sb, journal->start);
1320 journal->trans_id = trans->trans_id + 1;
1321 jbd_journal_free_trans(journal,
1323 jbd_journal_write_sb(journal);
1327 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1330 struct jbd_trans *trans;
1331 while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1332 if (!trans->data_cnt) {
1333 TAILQ_REMOVE(&journal->cp_queue,
1336 jbd_journal_skip_pure_revoke(journal, trans);
1338 if (trans->data_cnt ==
1339 trans->written_cnt) {
1341 trans->start_iblock +
1342 trans->alloc_blocks;
1343 wrap(&journal->jbd_fs->sb,
1346 trans->trans_id + 1;
1347 TAILQ_REMOVE(&journal->cp_queue,
1350 jbd_journal_free_trans(journal,
1353 jbd_journal_write_sb(journal);
1354 } else if (!flush) {
1356 trans->start_iblock;
1357 wrap(&journal->jbd_fs->sb,
1361 jbd_journal_write_sb(journal);
1364 jbd_journal_flush_trans(trans);
1369 /**@brief Stop accessing the journal.
1370 * @param journal current journal session
1371 * @return standard error code*/
1372 int jbd_journal_stop(struct jbd_journal *journal)
1375 struct jbd_fs *jbd_fs = journal->jbd_fs;
1376 uint32_t features_incompatible;
1378 /* Commit all the transactions to the journal.*/
1379 jbd_journal_commit_all(journal);
1381 /* Make sure that journalled content have reached
1383 jbd_journal_purge_cp_trans(journal, true);
1385 /* There should be no block record in this journal
1387 if (!RB_EMPTY(&journal->block_rec_root))
1389 DBG_WARN "There are still block records "
1390 "in this journal session!\n");
1392 features_incompatible =
1393 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1394 features_incompatible);
1395 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1396 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1397 features_incompatible,
1398 features_incompatible);
1399 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1400 &jbd_fs->inode_ref.fs->sb);
1405 journal->trans_id = 0;
1406 jbd_journal_write_sb(journal);
1407 return jbd_write_sb(journal->jbd_fs);
1410 /**@brief Allocate a block in the journal.
1411 * @param journal current journal session
1412 * @param trans transaction
1413 * @return allocated block address*/
1414 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1415 struct jbd_trans *trans)
1417 uint32_t start_block;
1419 start_block = journal->last++;
1420 trans->alloc_blocks++;
1421 wrap(&journal->jbd_fs->sb, journal->last);
1423 /* If there is no space left, flush all journalled
1424 * blocks to disk first.*/
1425 if (journal->last == journal->start)
1426 jbd_journal_purge_cp_trans(journal, true);
1431 /**@brief Allocate a new transaction
1432 * @param journal current journal session
1433 * @return transaction allocated*/
1435 jbd_journal_new_trans(struct jbd_journal *journal)
1437 struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1441 /* We will assign a trans_id to this transaction,
1442 * once it has been committed.*/
1443 trans->journal = journal;
1444 trans->data_csum = EXT4_CRC32_INIT;
1446 TAILQ_INIT(&trans->buf_queue);
1450 /**@brief gain access to it before making any modications.
1451 * @param journal current journal session
1452 * @param trans transaction
1453 * @param block descriptor
1454 * @return standard error code.*/
1455 int jbd_trans_get_access(struct jbd_journal *journal,
1456 struct jbd_trans *trans,
1457 struct ext4_block *block)
1460 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1461 struct jbd_buf *jbd_buf = block->buf->end_write_arg;
1463 /* If the buffer has already been modified, we should
1464 * flush dirty data in this buffer to disk.*/
1465 if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1466 block->buf->end_write == jbd_trans_end_write) {
1467 ext4_assert(jbd_buf);
1468 if (jbd_buf->trans != trans)
1469 r = ext4_block_flush_buf(fs->bdev, block->buf);
1475 static struct jbd_block_rec *
1476 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1479 struct jbd_block_rec tmp = {
1483 return RB_FIND(jbd_block,
1484 &journal->block_rec_root,
1489 jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
1490 struct jbd_trans *new_trans,
1491 struct ext4_buf *new_buf)
1493 LIST_REMOVE(block_rec, tbrec_node);
1494 /* Now this block record belongs to this transaction. */
1495 LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
1496 block_rec->trans = new_trans;
1497 block_rec->buf = new_buf;
1500 static inline struct jbd_block_rec *
1501 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1503 struct ext4_buf *buf)
1505 struct jbd_block_rec *block_rec;
1506 block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1508 jbd_trans_change_ownership(block_rec, trans, buf);
1511 block_rec = calloc(1, sizeof(struct jbd_block_rec));
1515 block_rec->lba = lba;
1516 block_rec->buf = buf;
1517 block_rec->trans = trans;
1518 TAILQ_INIT(&block_rec->dirty_buf_queue);
1519 LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1520 RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1525 jbd_trans_finish_callback(struct jbd_journal *journal,
1526 const struct jbd_trans *trans,
1527 struct jbd_block_rec *block_rec,
1530 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1531 if (block_rec->trans != trans)
1535 struct jbd_buf *jbd_buf, *tmp;
1536 TAILQ_FOREACH_SAFE(jbd_buf,
1537 &block_rec->dirty_buf_queue,
1540 /* All we need is a fake ext4_buf. */
1541 struct ext4_buf buf;
1543 jbd_trans_end_write(fs->bdev->bc,
1549 struct jbd_buf *jbd_buf;
1550 struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
1551 block = EXT4_BLOCK_ZERO();
1552 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
1555 ext4_assert(ext4_block_get(fs->bdev,
1557 jbd_buf->jbd_lba) == EOK);
1558 ext4_assert(ext4_block_get_noread(fs->bdev,
1560 block_rec->lba) == EOK);
1561 memcpy(block.data, jbd_block.data,
1562 journal->block_size);
1564 jbd_trans_change_ownership(block_rec,
1565 jbd_buf->trans, block.buf);
1567 block.buf->end_write = jbd_trans_end_write;
1568 block.buf->end_write_arg = jbd_buf;
1570 ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
1571 ext4_bcache_set_dirty(block.buf);
1573 ext4_block_set(fs->bdev, &jbd_block);
1574 ext4_block_set(fs->bdev, &block);
1581 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1582 struct jbd_block_rec *block_rec,
1583 struct jbd_trans *trans)
1585 /* If this block record doesn't belong to this transaction,
1587 if (block_rec->trans == trans) {
1588 LIST_REMOVE(block_rec, tbrec_node);
1589 RB_REMOVE(jbd_block,
1590 &journal->block_rec_root,
1596 /**@brief Add block to a transaction and mark it dirty.
1597 * @param trans transaction
1598 * @param block block descriptor
1599 * @return standard error code*/
1600 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1601 struct ext4_block *block)
1603 struct jbd_buf *buf;
1605 struct jbd_block_rec *block_rec;
1606 if (block->buf->end_write == jbd_trans_end_write) {
1607 buf = block->buf->end_write_arg;
1608 if (buf && buf->trans == trans)
1611 buf = calloc(1, sizeof(struct jbd_buf));
1615 if ((block_rec = jbd_trans_insert_block_rec(trans,
1617 block->buf)) == NULL) {
1622 TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
1626 buf->block_rec = block_rec;
1628 buf->block = *block;
1629 ext4_bcache_inc_ref(block->buf);
1631 /* If the content reach the disk, notify us
1632 * so that we may do a checkpoint. */
1633 block->buf->end_write = jbd_trans_end_write;
1634 block->buf->end_write_arg = buf;
1637 TAILQ_INSERT_HEAD(&trans->buf_queue, buf, buf_node);
1639 ext4_bcache_set_dirty(block->buf);
1643 /**@brief Add block to be revoked to a transaction
1644 * @param trans transaction
1645 * @param lba logical block address
1646 * @return standard error code*/
1647 int jbd_trans_revoke_block(struct jbd_trans *trans,
1650 struct jbd_revoke_rec *rec =
1651 calloc(1, sizeof(struct jbd_revoke_rec));
1656 LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1660 /**@brief Try to add block to be revoked to a transaction.
1661 * If @lba still remains in an transaction on checkpoint
1662 * queue, add @lba as a revoked block to the transaction.
1663 * @param trans transaction
1664 * @param lba logical block address
1665 * @return standard error code*/
1666 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1670 struct jbd_journal *journal = trans->journal;
1671 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1672 struct jbd_block_rec *block_rec =
1673 jbd_trans_block_rec_lookup(journal, lba);
1675 /* Make sure we don't flush any buffers belong to this transaction. */
1676 if (block_rec && block_rec->trans != trans) {
1677 /* If the buffer has not been flushed yet, flush it now. */
1678 if (block_rec->buf) {
1679 r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
1685 jbd_trans_revoke_block(trans, lba);
1691 /**@brief Free a transaction
1692 * @param journal current journal session
1693 * @param trans transaction
1694 * @param abort discard all the modifications on the block?
1695 * @return standard error code*/
1696 void jbd_journal_free_trans(struct jbd_journal *journal,
1697 struct jbd_trans *trans,
1700 struct jbd_buf *jbd_buf, *tmp;
1701 struct jbd_revoke_rec *rec, *tmp2;
1702 struct jbd_block_rec *block_rec, *tmp3;
1703 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1704 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1706 block_rec = jbd_buf->block_rec;
1708 jbd_buf->block.buf->end_write = NULL;
1709 jbd_buf->block.buf->end_write_arg = NULL;
1710 ext4_bcache_clear_dirty(jbd_buf->block.buf);
1711 ext4_block_set(fs->bdev, &jbd_buf->block);
1714 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1717 jbd_trans_finish_callback(journal,
1721 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1724 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1726 LIST_REMOVE(rec, revoke_node);
1729 LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1731 jbd_trans_remove_block_rec(journal, block_rec, trans);
1737 /**@brief Write commit block for a transaction
1738 * @param trans transaction
1739 * @return standard error code*/
1740 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1743 struct jbd_commit_header *header;
1744 uint32_t commit_iblock = 0;
1745 struct ext4_block commit_block;
1746 struct jbd_journal *journal = trans->journal;
1748 commit_iblock = jbd_journal_alloc_block(journal, trans);
1749 rc = jbd_block_get_noread(journal->jbd_fs,
1750 &commit_block, commit_iblock);
1754 header = (struct jbd_commit_header *)commit_block.data;
1755 jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1756 jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1757 jbd_set32(&header->header, sequence, trans->trans_id);
1759 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1760 JBD_FEATURE_COMPAT_CHECKSUM)) {
1761 jbd_set32(header, chksum_type, JBD_CRC32_CHKSUM);
1762 jbd_set32(header, chksum_size, JBD_CRC32_CHKSUM_SIZE);
1763 jbd_set32(header, chksum[0], trans->data_csum);
1765 jbd_commit_csum_set(journal->jbd_fs, header);
1766 ext4_bcache_set_dirty(commit_block.buf);
1767 rc = jbd_block_set(journal->jbd_fs, &commit_block);
1774 /**@brief Write descriptor block for a transaction
1775 * @param journal current journal session
1776 * @param trans transaction
1777 * @return standard error code*/
1778 static int jbd_journal_prepare(struct jbd_journal *journal,
1779 struct jbd_trans *trans)
1781 int rc = EOK, i = 0;
1782 int32_t tag_tbl_size;
1783 uint32_t desc_iblock = 0;
1784 uint32_t data_iblock = 0;
1785 char *tag_start = NULL, *tag_ptr = NULL;
1786 struct jbd_buf *jbd_buf, *tmp;
1787 struct ext4_block desc_block, data_block;
1788 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1789 uint32_t checksum = EXT4_CRC32_INIT;
1791 /* Try to remove any non-dirty buffers from the tail of
1793 TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1794 jbd_trans_buf, buf_node, tmp) {
1795 /* We stop the iteration when we find a dirty buffer. */
1796 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1800 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1804 jbd_trans_finish_callback(journal,
1809 /* The buffer has not been modified, just release
1811 jbd_trans_remove_block_rec(journal,
1812 jbd_buf->block_rec, trans);
1815 jbd_buf->block.buf->end_write = NULL;
1816 jbd_buf->block.buf->end_write_arg = NULL;
1817 ext4_block_set(fs->bdev, &jbd_buf->block);
1818 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1822 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1823 struct tag_info tag_info;
1824 bool uuid_exist = false;
1825 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1827 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1831 jbd_trans_finish_callback(journal,
1836 /* The buffer has not been modified, just release
1838 jbd_trans_remove_block_rec(journal,
1839 jbd_buf->block_rec, trans);
1842 jbd_buf->block.buf->end_write = NULL;
1843 jbd_buf->block.buf->end_write_arg = NULL;
1844 ext4_block_set(fs->bdev, &jbd_buf->block);
1845 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1849 checksum = jbd_block_csum(journal->jbd_fs,
1850 jbd_buf->block.data,
1855 struct jbd_bhdr *bhdr;
1856 desc_iblock = jbd_journal_alloc_block(journal, trans);
1857 rc = jbd_block_get_noread(journal->jbd_fs,
1858 &desc_block, desc_iblock);
1862 ext4_bcache_set_dirty(desc_block.buf);
1864 bhdr = (struct jbd_bhdr *)desc_block.data;
1865 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1866 jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1867 jbd_set32(bhdr, sequence, trans->trans_id);
1869 tag_start = (char *)(bhdr + 1);
1870 tag_ptr = tag_start;
1872 tag_tbl_size = journal->block_size -
1873 sizeof(struct jbd_bhdr);
1875 if (jbd_has_csum(&journal->jbd_fs->sb))
1876 tag_tbl_size -= sizeof(struct jbd_block_tail);
1878 if (!trans->start_iblock)
1879 trans->start_iblock = desc_iblock;
1882 tag_info.block = jbd_buf->block.lb_id;
1883 tag_info.uuid_exist = uuid_exist;
1884 if (i == trans->data_cnt - 1)
1885 tag_info.last_tag = true;
1887 tag_info.last_tag = false;
1889 tag_info.checksum = checksum;
1892 memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1895 rc = jbd_write_block_tag(journal->jbd_fs,
1900 jbd_meta_csum_set(journal->jbd_fs,
1901 (struct jbd_bhdr *)desc_block.data);
1902 jbd_block_set(journal->jbd_fs, &desc_block);
1907 data_iblock = jbd_journal_alloc_block(journal, trans);
1908 rc = jbd_block_get_noread(journal->jbd_fs,
1909 &data_block, data_iblock);
1913 ext4_bcache_set_dirty(data_block.buf);
1915 memcpy(data_block.data, jbd_buf->block.data,
1916 journal->block_size);
1917 jbd_buf->jbd_lba = data_block.lb_id;
1919 rc = jbd_block_set(journal->jbd_fs, &data_block);
1923 tag_ptr += tag_info.tag_bytes;
1924 tag_tbl_size -= tag_info.tag_bytes;
1928 if (rc == EOK && desc_iblock) {
1929 jbd_meta_csum_set(journal->jbd_fs,
1930 (struct jbd_bhdr *)desc_block.data);
1931 trans->data_csum = checksum;
1932 jbd_block_set(journal->jbd_fs, &desc_block);
1938 /**@brief Write revoke block for a transaction
1939 * @param journal current journal session
1940 * @param trans transaction
1941 * @return standard error code*/
1943 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1944 struct jbd_trans *trans)
1946 int rc = EOK, i = 0;
1947 int32_t tag_tbl_size;
1948 uint32_t desc_iblock = 0;
1949 char *blocks_entry = NULL;
1950 struct jbd_revoke_rec *rec, *tmp;
1951 struct ext4_block desc_block;
1952 struct jbd_revoke_header *header = NULL;
1953 int32_t record_len = 4;
1955 if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1956 JBD_FEATURE_INCOMPAT_64BIT))
1959 LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1963 struct jbd_bhdr *bhdr;
1964 desc_iblock = jbd_journal_alloc_block(journal, trans);
1965 rc = jbd_block_get_noread(journal->jbd_fs,
1966 &desc_block, desc_iblock);
1971 ext4_bcache_set_dirty(desc_block.buf);
1973 bhdr = (struct jbd_bhdr *)desc_block.data;
1974 jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1975 jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1976 jbd_set32(bhdr, sequence, trans->trans_id);
1978 header = (struct jbd_revoke_header *)bhdr;
1979 blocks_entry = (char *)(header + 1);
1980 tag_tbl_size = journal->block_size -
1981 sizeof(struct jbd_revoke_header);
1983 if (jbd_has_csum(&journal->jbd_fs->sb))
1984 tag_tbl_size -= sizeof(struct jbd_block_tail);
1986 if (!trans->start_iblock)
1987 trans->start_iblock = desc_iblock;
1991 if (tag_tbl_size < record_len) {
1992 jbd_set32(header, count,
1993 journal->block_size - tag_tbl_size);
1994 jbd_meta_csum_set(journal->jbd_fs,
1995 (struct jbd_bhdr *)desc_block.data);
1996 jbd_block_set(journal->jbd_fs, &desc_block);
2001 if (record_len == 8) {
2003 (uint64_t *)blocks_entry;
2004 *blocks = to_be64(rec->lba);
2007 (uint32_t *)blocks_entry;
2008 *blocks = to_be32(rec->lba);
2010 blocks_entry += record_len;
2011 tag_tbl_size -= record_len;
2015 if (rc == EOK && desc_iblock) {
2017 jbd_set32(header, count,
2018 journal->block_size - tag_tbl_size);
2020 jbd_meta_csum_set(journal->jbd_fs,
2021 (struct jbd_bhdr *)desc_block.data);
2022 jbd_block_set(journal->jbd_fs, &desc_block);
2028 /**@brief Submit the transaction to transaction queue.
2029 * @param journal current journal session
2030 * @param trans transaction*/
2032 jbd_journal_submit_trans(struct jbd_journal *journal,
2033 struct jbd_trans *trans)
2035 TAILQ_INSERT_TAIL(&journal->trans_queue,
2040 /**@brief Put references of block descriptors in a transaction.
2041 * @param journal current journal session
2042 * @param trans transaction*/
2043 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
2045 struct jbd_buf *jbd_buf, *tmp;
2046 struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
2047 TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
2049 struct ext4_block block = jbd_buf->block;
2050 ext4_block_set(fs->bdev, &block);
2054 /**@brief Update the start block of the journal when
2055 * all the contents in a transaction reach the disk.*/
2056 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
2057 struct ext4_buf *buf,
2061 struct jbd_buf *jbd_buf = arg;
2062 struct jbd_trans *trans = jbd_buf->trans;
2063 struct jbd_journal *journal = trans->journal;
2064 bool first_in_queue =
2065 trans == TAILQ_FIRST(&journal->cp_queue);
2069 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
2070 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
2073 jbd_trans_finish_callback(journal,
2077 jbd_buf->block_rec->buf = NULL;
2080 /* Clear the end_write and end_write_arg fields. */
2081 buf->end_write = NULL;
2082 buf->end_write_arg = NULL;
2084 trans->written_cnt++;
2085 if (trans->written_cnt == trans->data_cnt) {
2086 /* If it is the first transaction on checkpoint queue,
2087 * we will shift the start of the journal to the next
2088 * transaction, and remove subsequent written
2089 * transactions from checkpoint queue until we find
2090 * an unwritten one. */
2091 if (first_in_queue) {
2092 journal->start = trans->start_iblock +
2093 trans->alloc_blocks;
2094 wrap(&journal->jbd_fs->sb, journal->start);
2095 journal->trans_id = trans->trans_id + 1;
2096 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
2097 jbd_journal_free_trans(journal, trans, false);
2099 jbd_journal_purge_cp_trans(journal, false);
2100 jbd_journal_write_sb(journal);
2101 jbd_write_sb(journal->jbd_fs);
2106 /**@brief Commit a transaction to the journal immediately.
2107 * @param journal current journal session
2108 * @param trans transaction
2109 * @return standard error code*/
2110 int jbd_journal_commit_trans(struct jbd_journal *journal,
2111 struct jbd_trans *trans)
2114 uint32_t last = journal->last;
2116 trans->trans_id = journal->alloc_trans_id;
2117 rc = jbd_journal_prepare(journal, trans);
2121 rc = jbd_journal_prepare_revoke(journal, trans);
2125 if (TAILQ_EMPTY(&trans->buf_queue) &&
2126 LIST_EMPTY(&trans->revoke_list)) {
2127 /* Since there are no entries in both buffer list
2128 * and revoke entry list, we do not consider trans as
2129 * complete transaction and just return EOK.*/
2130 jbd_journal_free_trans(journal, trans, false);
2134 rc = jbd_trans_write_commit_block(trans);
2138 journal->alloc_trans_id++;
2139 if (TAILQ_EMPTY(&journal->cp_queue)) {
2140 if (trans->data_cnt) {
2141 journal->start = trans->start_iblock;
2142 wrap(&journal->jbd_fs->sb, journal->start);
2143 journal->trans_id = trans->trans_id;
2144 jbd_journal_write_sb(journal);
2145 jbd_write_sb(journal->jbd_fs);
2146 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2148 jbd_journal_cp_trans(journal, trans);
2150 journal->start = trans->start_iblock +
2151 trans->alloc_blocks;
2152 wrap(&journal->jbd_fs->sb, journal->start);
2153 journal->trans_id = trans->trans_id + 1;
2154 jbd_journal_write_sb(journal);
2155 jbd_journal_free_trans(journal, trans, false);
2158 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2160 if (trans->data_cnt)
2161 jbd_journal_cp_trans(journal, trans);
2166 journal->last = last;
2167 jbd_journal_free_trans(journal, trans, true);
2172 /**@brief Commit one transaction on transaction queue
2174 * @param journal current journal session.*/
2175 void jbd_journal_commit_one(struct jbd_journal *journal)
2177 struct jbd_trans *trans;
2179 if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
2180 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
2181 jbd_journal_commit_trans(journal, trans);
2185 /**@brief Commit all the transactions on transaction queue
2187 * @param journal current journal session.*/
2188 void jbd_journal_commit_all(struct jbd_journal *journal)
2190 while (!TAILQ_EMPTY(&journal->trans_queue)) {
2191 jbd_journal_commit_one(journal);