ext4_journal: initial support of journal checksumming is added.
[lwext4.git] / lwext4 / ext4_journal.c
index 7eea7d6045a8fb2b6640c34fb00abb9ccea9ddc6..1f5c0bf6cf4885de28b7c552b57aaefa296d680a 100644 (file)
@@ -42,9 +42,8 @@
 #include "ext4_journal.h"
 #include "ext4_errno.h"
 #include "ext4_blockdev.h"
-#include "ext4_crc32c.h"
+#include "ext4_crc32.h"
 #include "ext4_debug.h"
-#include "tree.h"
 
 #include <string.h>
 #include <stdlib.h>
@@ -119,6 +118,209 @@ RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
 #define jbd_free_revoke_entry(addr) free(addr)
 
+static int jbd_has_csum(struct jbd_sb *jbd_sb)
+{
+       if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
+               return 2;
+
+       if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
+               return 3;
+
+       return 0;
+}
+
+#if CONFIG_META_CSUM_ENABLE
+static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
+{
+       uint32_t checksum = 0;
+
+       if (jbd_has_csum(jbd_sb)) {
+               uint32_t orig_checksum = jbd_sb->checksum;
+               jbd_set32(jbd_sb, checksum, 0);
+               /* Calculate crc32c checksum against tho whole superblock */
+               checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
+                               JBD_SUPERBLOCK_SIZE);
+               jbd_sb->checksum = orig_checksum;
+       }
+       return checksum;
+}
+#else
+#define jbd_sb_csum(...) 0
+#endif
+
+static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
+{
+       if (!jbd_has_csum(jbd_sb))
+               return;
+
+       jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
+}
+
+#if CONFIG_META_CSUM_ENABLE
+static bool
+jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
+{
+       if (!jbd_has_csum(jbd_sb))
+               return true;
+
+       return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
+}
+#else
+#define jbd_verify_sb_csum(...) true
+#endif
+
+#if CONFIG_META_CSUM_ENABLE
+static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
+                             struct jbd_bhdr *bhdr)
+{
+       uint32_t checksum = 0;
+
+       if (jbd_has_csum(&jbd_fs->sb)) {
+               uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
+               struct jbd_block_tail *tail =
+                       (struct jbd_block_tail *)((char *)bhdr + block_size -
+                               sizeof(struct jbd_block_tail));
+               uint32_t orig_checksum = tail->checksum;
+               tail->checksum = 0;
+
+               /* First calculate crc32c checksum against fs uuid */
+               checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
+                                      sizeof(jbd_fs->sb.uuid));
+               /* Calculate crc32c checksum against tho whole block */
+               checksum = ext4_crc32(checksum, bhdr,
+                               block_size);
+               tail->checksum = orig_checksum;
+       }
+       return checksum;
+}
+#else
+#define jbd_meta_csum(...) 0
+#endif
+
+static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
+                             struct jbd_bhdr *bhdr)
+{
+       uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
+       struct jbd_block_tail *tail = (struct jbd_block_tail *)
+                               ((char *)bhdr + block_size -
+                               sizeof(struct jbd_block_tail));
+       if (!jbd_has_csum(&jbd_fs->sb))
+               return;
+
+       tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
+}
+
+#if CONFIG_META_CSUM_ENABLE
+static bool
+jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
+                    struct jbd_bhdr *bhdr)
+{
+       uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
+       struct jbd_block_tail *tail = (struct jbd_block_tail *)
+                               ((char *)bhdr + block_size -
+                               sizeof(struct jbd_block_tail));
+       if (!jbd_has_csum(&jbd_fs->sb))
+               return true;
+
+       return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
+}
+#else
+#define jbd_verify_meta_csum(...) true
+#endif
+
+#if CONFIG_META_CSUM_ENABLE
+static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
+                             struct jbd_commit_header *header)
+{
+       uint32_t checksum = 0;
+
+       if (jbd_has_csum(&jbd_fs->sb)) {
+               uint32_t orig_checksum_type = header->chksum_type,
+                        orig_checksum_size = header->chksum_size,
+                        orig_checksum = header->chksum[0];
+               uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
+               header->chksum_type = 0;
+               header->chksum_size = 0;
+               header->chksum[0] = 0;
+
+               /* First calculate crc32c checksum against fs uuid */
+               checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
+                                      sizeof(jbd_fs->sb.uuid));
+               /* Calculate crc32c checksum against tho whole block */
+               checksum = ext4_crc32(checksum, header,
+                               block_size);
+
+               header->chksum_type = orig_checksum_type;
+               header->chksum_size = orig_checksum_size;
+               header->chksum[0] = orig_checksum;
+       }
+       return checksum;
+}
+#else
+#define jbd_commit_csum(...) 0
+#endif
+
+static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
+                             struct jbd_commit_header *header)
+{
+       if (!jbd_has_csum(&jbd_fs->sb))
+               return;
+
+       header->chksum_type = 0;
+       header->chksum_size = 0;
+       header->chksum[0] = jbd_commit_csum(jbd_fs, header);
+}
+
+#if CONFIG_META_CSUM_ENABLE
+static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
+                                  struct jbd_commit_header *header)
+{
+       if (!jbd_has_csum(&jbd_fs->sb))
+               return true;
+
+       return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
+                                           header));
+}
+#else
+#define jbd_verify_commit_csum(...) true
+#endif
+
+#if CONFIG_META_CSUM_ENABLE
+static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf)
+{
+       uint32_t checksum = 0;
+
+       if (jbd_has_csum(&jbd_fs->sb)) {
+               uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
+               /* First calculate crc32c checksum against fs uuid */
+               checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
+                                      sizeof(jbd_fs->sb.uuid));
+               /* Calculate crc32c checksum against tho whole block */
+               checksum = ext4_crc32(checksum, buf,
+                               block_size);
+       }
+       return checksum;
+}
+#else
+#define jbd_block_csum(...) 0
+#endif
+
+static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
+                                  uint32_t checksum)
+{
+       int ver = jbd_has_csum(&jbd_fs->sb);
+       if (!ver)
+               return;
+
+       if (ver == 2) {
+               struct jbd_block_tag *tag = __tag;
+               tag->checksum = (uint16_t)to_be32(checksum);
+       } else {
+               struct jbd_block_tag3 *tag = __tag;
+               tag->checksum = to_be32(checksum);
+       }
+}
+
 /**@brief  Write jbd superblock to disk.
  * @param  jbd_fs jbd filesystem
  * @param  s jbd superblock
@@ -133,6 +335,7 @@ static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
        if (rc != EOK)
                return rc;
 
+       jbd_sb_csum_set(s);
        offset = fblock * ext4_sb_get_block_size(&fs->sb);
        return ext4_block_writebytes(fs->bdev, offset, s,
                                     EXT4_SUPERBLOCK_SIZE);
@@ -170,7 +373,7 @@ static bool jbd_verify_sb(struct jbd_sb *sb)
            jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
                return false;
 
-       return true;
+       return jbd_verify_sb_csum(sb);
 }
 
 /**@brief  Write back dirty jbd superblock to disk.
@@ -281,8 +484,12 @@ static int jbd_block_get(struct jbd_fs *jbd_fs,
 
        /* If succeeded, mark buffer as BC_FLUSH to indicate
         * that data should be written to disk immediately.*/
-       if (rc == EOK)
+       if (rc == EOK) {
                ext4_bcache_set_flag(block->buf, BC_FLUSH);
+               /* As we don't want to occupy too much space
+                * in block cache, we set this buffer BC_TMP.*/
+               ext4_bcache_set_flag(block->buf, BC_TMP);
+       }
 
        return rc;
 }
@@ -370,6 +577,9 @@ struct tag_info {
 
        /**@brief  Is this the last tag? */
        bool last_tag;
+
+       /**@brief  crc32c checksum. */
+       uint32_t checksum;
 };
 
 /**@brief  Extract information from a block tag.
@@ -490,6 +700,8 @@ jbd_write_block_tag(struct jbd_fs *jbd_fs,
                        jbd_set32(tag, flags,
                                  jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
 
+               jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
+
                if (tag_info->last_tag)
                        jbd_set32(tag, flags,
                                  jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
@@ -514,6 +726,8 @@ jbd_write_block_tag(struct jbd_fs *jbd_fs,
                        jbd_set16(tag, flags,
                                  jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
 
+               jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
+
                if (tag_info->last_tag)
                        jbd_set16(tag, flags,
                                  jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
@@ -842,6 +1056,14 @@ static int jbd_iterate_log(struct jbd_fs *jbd_fs,
 
                switch (jbd_get32(header, blocktype)) {
                case JBD_DESCRIPTOR_BLOCK:
+                       if (!jbd_verify_meta_csum(jbd_fs, header)) {
+                               ext4_dbg(DEBUG_JBD,
+                                       DBG_WARN "Descriptor block checksum failed."
+                                               "Journal block: %" PRIu32"\n",
+                                               this_block);
+                               log_end = true;
+                               break;
+                       }
                        ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
                                            "trans_id: %" PRIu32"\n",
                                            this_block, this_trans_id);
@@ -859,6 +1081,15 @@ static int jbd_iterate_log(struct jbd_fs *jbd_fs,
 
                        break;
                case JBD_COMMIT_BLOCK:
+                       if (!jbd_verify_commit_csum(jbd_fs,
+                                       (struct jbd_commit_header *)header)) {
+                               ext4_dbg(DEBUG_JBD,
+                                       DBG_WARN "Commit block checksum failed."
+                                               "Journal block: %" PRIu32"\n",
+                                               this_block);
+                               log_end = true;
+                               break;
+                       }
                        ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
                                            "trans_id: %" PRIu32"\n",
                                            this_block, this_trans_id);
@@ -868,6 +1099,14 @@ static int jbd_iterate_log(struct jbd_fs *jbd_fs,
                        this_trans_id++;
                        break;
                case JBD_REVOKE_BLOCK:
+                       if (!jbd_verify_meta_csum(jbd_fs, header)) {
+                               ext4_dbg(DEBUG_JBD,
+                                       DBG_WARN "Revoke block checksum failed."
+                                               "Journal block: %" PRIu32"\n",
+                                               this_block);
+                               log_end = true;
+                               break;
+                       }
                        ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
                                            "trans_id: %" PRIu32"\n",
                                            this_block, this_trans_id);
@@ -982,6 +1221,7 @@ int jbd_journal_start(struct jbd_fs *jbd_fs,
 
        TAILQ_INIT(&journal->trans_queue);
        TAILQ_INIT(&journal->cp_queue);
+       RB_INIT(&journal->block_rec_root);
        journal->jbd_fs = jbd_fs;
        jbd_journal_write_sb(journal);
        return jbd_write_sb(jbd_fs);
@@ -992,7 +1232,7 @@ static void jbd_journal_flush_trans(struct jbd_trans *trans)
        struct jbd_buf *jbd_buf, *tmp;
        struct jbd_journal *journal = trans->journal;
        struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
-       LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
+       TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
                        tmp) {
                struct ext4_block block = jbd_buf->block;
                ext4_block_flush_buf(fs->bdev, block.buf);
@@ -1012,7 +1252,9 @@ jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
        jbd_journal_write_sb(journal);
 }
 
-static void jbd_journal_flush_all_trans(struct jbd_journal *journal)
+static void
+jbd_journal_purge_cp_trans(struct jbd_journal *journal,
+                          bool flush)
 {
        struct jbd_trans *trans;
        while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
@@ -1021,9 +1263,35 @@ static void jbd_journal_flush_all_trans(struct jbd_journal *journal)
                                        trans,
                                        trans_node);
                        jbd_journal_skip_pure_revoke(journal, trans);
-               } else
-                       jbd_journal_flush_trans(trans);
-
+               } else {
+                       if (trans->data_cnt ==
+                                       trans->written_cnt) {
+                               journal->start =
+                                       trans->start_iblock +
+                                       trans->alloc_blocks;
+                               wrap(&journal->jbd_fs->sb,
+                                               journal->start);
+                               journal->trans_id =
+                                       trans->trans_id + 1;
+                               TAILQ_REMOVE(&journal->cp_queue,
+                                               trans,
+                                               trans_node);
+                               jbd_journal_free_trans(journal,
+                                               trans,
+                                               false);
+                               jbd_journal_write_sb(journal);
+                       } else if (!flush) {
+                               journal->start =
+                                       trans->start_iblock;
+                               wrap(&journal->jbd_fs->sb,
+                                               journal->start);
+                               journal->trans_id =
+                                       trans->trans_id;
+                               jbd_journal_write_sb(journal);
+                               break;
+                       } else
+                               jbd_journal_flush_trans(trans);
+               }
        }
 }
 
@@ -1041,7 +1309,14 @@ int jbd_journal_stop(struct jbd_journal *journal)
 
        /* Make sure that journalled content have reached
         * the disk.*/
-       jbd_journal_flush_all_trans(journal);
+       jbd_journal_purge_cp_trans(journal, true);
+
+       /* There should be no block record in this journal
+        * session. */
+       if (!RB_EMPTY(&journal->block_rec_root))
+               ext4_dbg(DEBUG_JBD,
+                        DBG_WARN "There are still block records "
+                                 "in this journal session!\n");
 
        features_incompatible =
                ext4_get32(&jbd_fs->inode_ref.fs->sb,
@@ -1077,7 +1352,7 @@ static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
        /* If there is no space left, flush all journalled
         * blocks to disk first.*/
        if (journal->last == journal->start)
-               jbd_journal_flush_all_trans(journal);
+               jbd_journal_purge_cp_trans(journal, true);
 
        return start_block;
 }
@@ -1092,12 +1367,11 @@ jbd_journal_new_trans(struct jbd_journal *journal)
        if (!trans)
                return NULL;
 
-       RB_INIT(&trans->block_rec_root);
-
        /* We will assign a trans_id to this transaction,
         * once it has been committed.*/
        trans->journal = journal;
        trans->error = EOK;
+       TAILQ_INIT(&trans->buf_queue);
        return trans;
 }
 
@@ -1117,53 +1391,73 @@ int jbd_trans_get_access(struct jbd_journal *journal,
 {
        int r = EOK;
        struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+       struct jbd_buf *jbd_buf = block->buf->end_write_arg;
 
        /* If the buffer has already been modified, we should
         * flush dirty data in this buffer to disk.*/
        if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
-           block->buf->end_write == jbd_trans_end_write &&
-           block->buf->end_write_arg != trans) {
-               r = ext4_block_flush_buf(fs->bdev, block->buf);
+           block->buf->end_write == jbd_trans_end_write) {
+               ext4_assert(jbd_buf);
+               if (jbd_buf->trans != trans)
+                       r = ext4_block_flush_buf(fs->bdev, block->buf);
+
        }
        return r;
 }
 
-static inline int
-jbd_trans_insert_block_rec(struct jbd_trans *trans,
-                          ext4_fsblk_t lba)
-{
-       struct jbd_block_rec *block_rec;
-       block_rec = calloc(1, sizeof(struct jbd_block_rec));
-       if (!block_rec)
-               return ENOMEM;
-
-       block_rec->lba = lba;
-       RB_INSERT(jbd_block, &trans->block_rec_root, block_rec);
-       return EOK;
-}
-
 static struct jbd_block_rec *
-jbd_trans_block_rec_lookup(struct jbd_trans *trans,
+jbd_trans_block_rec_lookup(struct jbd_journal *journal,
                           ext4_fsblk_t lba)
 {
        struct jbd_block_rec tmp = {
                .lba = lba
        };
 
-       return RB_FIND(jbd_block, &trans->block_rec_root, &tmp);
+       return RB_FIND(jbd_block,
+                      &journal->block_rec_root,
+                      &tmp);
+}
+
+static inline struct jbd_block_rec *
+jbd_trans_insert_block_rec(struct jbd_trans *trans,
+                          ext4_fsblk_t lba,
+                          struct ext4_buf *buf)
+{
+       struct jbd_block_rec *block_rec;
+       block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
+       if (block_rec) {
+               LIST_REMOVE(block_rec, tbrec_node);
+               /* Data should be flushed to disk already. */
+               ext4_assert(!block_rec->buf);
+               /* Now this block record belongs to this transaction. */
+               LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
+               block_rec->trans = trans;
+               return block_rec;
+       }
+       block_rec = calloc(1, sizeof(struct jbd_block_rec));
+       if (!block_rec)
+               return NULL;
+
+       block_rec->lba = lba;
+       block_rec->buf = buf;
+       block_rec->trans = trans;
+       LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
+       RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
+       return block_rec;
 }
 
 static inline void
-jbd_trans_remove_block_recs(struct jbd_trans *trans)
+jbd_trans_remove_block_rec(struct jbd_journal *journal,
+                          struct jbd_block_rec *block_rec,
+                          struct jbd_trans *trans)
 {
-       struct jbd_block_rec *block_rec, *tmp;
-       RB_FOREACH_SAFE(block_rec,
-                       jbd_block,
-                       &trans->block_rec_root,
-                       tmp) {
+       /* If this block record doesn't belong to this transaction,
+        * give up.*/
+       if (block_rec->trans == trans) {
+               LIST_REMOVE(block_rec, tbrec_node);
                RB_REMOVE(jbd_block,
-                         &trans->block_rec_root,
-                         block_rec);
+                               &journal->block_rec_root,
+                               block_rec);
                free(block_rec);
        }
 }
@@ -1179,15 +1473,19 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans,
 
        if (!ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
            block->buf->end_write != jbd_trans_end_write) {
+               struct jbd_block_rec *block_rec;
                buf = calloc(1, sizeof(struct jbd_buf));
                if (!buf)
                        return ENOMEM;
 
-               if (jbd_trans_insert_block_rec(trans, block->lb_id) != EOK) {
+               if ((block_rec = jbd_trans_insert_block_rec(trans,
+                                       block->lb_id,
+                                       block->buf)) == NULL) {
                        free(buf);
                        return ENOMEM;
                }
 
+               buf->block_rec = block_rec;
                buf->trans = trans;
                buf->block = *block;
                ext4_bcache_inc_ref(block->buf);
@@ -1198,7 +1496,7 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans,
                block->buf->end_write_arg = buf;
 
                trans->data_cnt++;
-               LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
+               TAILQ_INSERT_HEAD(&trans->buf_queue, buf, buf_node);
 
                ext4_bcache_set_dirty(block->buf);
        }
@@ -1232,16 +1530,25 @@ int jbd_trans_try_revoke_block(struct jbd_trans *trans,
                               ext4_fsblk_t lba)
 {
        int r = EOK;
-       struct jbd_trans *tmp;
        struct jbd_journal *journal = trans->journal;
-       TAILQ_FOREACH(tmp, &journal->cp_queue, trans_node) {
-               struct jbd_block_rec *block_rec =
-                       jbd_trans_block_rec_lookup(trans, lba);
-               if (block_rec)
-                       jbd_trans_revoke_block(trans, lba);
+       struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+       struct jbd_block_rec *block_rec =
+               jbd_trans_block_rec_lookup(journal, lba);
+
+       /* Make sure we don't flush any buffers belong to this transaction. */
+       if (block_rec && block_rec->trans != trans) {
+               /* If the buffer has not been flushed yet, flush it now. */
+               if (block_rec->buf) {
+                       r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
+                       if (r != EOK)
+                               return r;
 
+               }
+
+               jbd_trans_revoke_block(trans, lba);
        }
-       return r;
+
+       return EOK;
 }
 
 /**@brief  Free a transaction
@@ -1255,8 +1562,9 @@ void jbd_journal_free_trans(struct jbd_journal *journal,
 {
        struct jbd_buf *jbd_buf, *tmp;
        struct jbd_revoke_rec *rec, *tmp2;
+       struct jbd_block_rec *block_rec, *tmp3;
        struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
-       LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
+       TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
                          tmp) {
                if (abort) {
                        jbd_buf->block.buf->end_write = NULL;
@@ -1265,7 +1573,7 @@ void jbd_journal_free_trans(struct jbd_journal *journal,
                        ext4_block_set(fs->bdev, &jbd_buf->block);
                }
 
-               LIST_REMOVE(jbd_buf, buf_node);
+               TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
                free(jbd_buf);
        }
        LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
@@ -1273,8 +1581,11 @@ void jbd_journal_free_trans(struct jbd_journal *journal,
                LIST_REMOVE(rec, revoke_node);
                free(rec);
        }
+       LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
+                         tmp3) {
+               jbd_trans_remove_block_rec(journal, block_rec, trans);
+       }
 
-       jbd_trans_remove_block_recs(trans);
        free(trans);
 }
 
@@ -1300,6 +1611,7 @@ static int jbd_trans_write_commit_block(struct jbd_trans *trans)
        jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
        jbd_set32(&header->header, sequence, trans->trans_id);
 
+       jbd_commit_csum_set(journal->jbd_fs, header);
        ext4_bcache_set_dirty(commit_block.buf);
        rc = jbd_block_set(journal->jbd_fs, &commit_block);
        if (rc != EOK)
@@ -1324,20 +1636,49 @@ static int jbd_journal_prepare(struct jbd_journal *journal,
        struct ext4_block desc_block, data_block;
        struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
 
-       LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node, tmp) {
+       /* Try to remove any non-dirty buffers from the tail of
+        * buf_queue. */
+       TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
+                       jbd_trans_buf, buf_node, tmp) {
+               /* We stop the iteration when we find a dirty buffer. */
+               if (ext4_bcache_test_flag(jbd_buf->block.buf,
+                                       BC_DIRTY))
+                       break;
+
+               /* The buffer has not been modified, just release
+                * that jbd_buf. */
+               jbd_trans_remove_block_rec(journal,
+                               jbd_buf->block_rec, trans);
+               trans->data_cnt--;
+
+               jbd_buf->block.buf->end_write = NULL;
+               jbd_buf->block.buf->end_write_arg = NULL;
+               ext4_block_set(fs->bdev, &jbd_buf->block);
+               TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
+               free(jbd_buf);
+       }
+
+       TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
                struct tag_info tag_info;
                bool uuid_exist = false;
+               uint32_t checksum;
                if (!ext4_bcache_test_flag(jbd_buf->block.buf,
                                           BC_DIRTY)) {
                        /* The buffer has not been modified, just release
                         * that jbd_buf. */
+                       jbd_trans_remove_block_rec(journal,
+                                       jbd_buf->block_rec, trans);
+                       trans->data_cnt--;
+
                        jbd_buf->block.buf->end_write = NULL;
                        jbd_buf->block.buf->end_write_arg = NULL;
                        ext4_block_set(fs->bdev, &jbd_buf->block);
-                       LIST_REMOVE(jbd_buf, buf_node);
+                       TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
                        free(jbd_buf);
                        continue;
                }
+               checksum = jbd_block_csum(journal->jbd_fs,
+                                         jbd_buf->block.data);
 again:
                if (!desc_iblock) {
                        struct jbd_bhdr *bhdr;
@@ -1360,6 +1701,9 @@ again:
                        tag_tbl_size = journal->block_size -
                                sizeof(struct jbd_bhdr);
 
+                       if (jbd_has_csum(&journal->jbd_fs->sb))
+                               tag_tbl_size -= sizeof(struct jbd_block_tail);
+
                        if (!trans->start_iblock)
                                trans->start_iblock = desc_iblock;
 
@@ -1368,6 +1712,9 @@ again:
                tag_info.uuid_exist = uuid_exist;
                if (i == trans->data_cnt - 1)
                        tag_info.last_tag = true;
+               else
+                       tag_info.last_tag = false;
+               tag_info.checksum = checksum;
 
                if (uuid_exist)
                        memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
@@ -1378,6 +1725,8 @@ again:
                                tag_tbl_size,
                                &tag_info);
                if (rc != EOK) {
+                       jbd_meta_csum_set(journal->jbd_fs,
+                                       (struct jbd_bhdr *)desc_block.data);
                        jbd_block_set(journal->jbd_fs, &desc_block);
                        desc_iblock = 0;
                        goto again;
@@ -1403,8 +1752,11 @@ again:
 
                i++;
        }
-       if (rc == EOK && desc_iblock)
+       if (rc == EOK && desc_iblock) {
+               jbd_meta_csum_set(journal->jbd_fs,
+                               (struct jbd_bhdr *)desc_block.data);
                jbd_block_set(journal->jbd_fs, &desc_block);
+       }
 
        return rc;
 }
@@ -1454,6 +1806,9 @@ again:
                        tag_tbl_size = journal->block_size -
                                sizeof(struct jbd_revoke_header);
 
+                       if (jbd_has_csum(&journal->jbd_fs->sb))
+                               tag_tbl_size -= sizeof(struct jbd_block_tail);
+
                        if (!trans->start_iblock)
                                trans->start_iblock = desc_iblock;
 
@@ -1462,6 +1817,8 @@ again:
                if (tag_tbl_size < record_len) {
                        jbd_set32(header, count,
                                  journal->block_size - tag_tbl_size);
+                       jbd_meta_csum_set(journal->jbd_fs,
+                                       (struct jbd_bhdr *)desc_block.data);
                        jbd_block_set(journal->jbd_fs, &desc_block);
                        desc_iblock = 0;
                        header = NULL;
@@ -1486,6 +1843,8 @@ again:
                        jbd_set32(header, count,
                                  journal->block_size - tag_tbl_size);
 
+               jbd_meta_csum_set(journal->jbd_fs,
+                               (struct jbd_bhdr *)desc_block.data);
                jbd_block_set(journal->jbd_fs, &desc_block);
        }
 
@@ -1511,7 +1870,7 @@ void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
 {
        struct jbd_buf *jbd_buf, *tmp;
        struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
-       LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
+       TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
                        tmp) {
                struct ext4_block block = jbd_buf->block;
                ext4_block_set(fs->bdev, &block);
@@ -1533,7 +1892,8 @@ static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
        if (res != EOK)
                trans->error = res;
 
-       LIST_REMOVE(jbd_buf, buf_node);
+       TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
+       jbd_buf->block_rec->buf = NULL;
        free(jbd_buf);
 
        /* Clear the end_write and end_write_arg fields. */
@@ -1542,31 +1902,20 @@ static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
 
        trans->written_cnt++;
        if (trans->written_cnt == trans->data_cnt) {
-               TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
-
+               /* If it is the first transaction on checkpoint queue,
+                * we will shift the start of the journal to the next
+                * transaction, and remove subsequent written
+                * transactions from checkpoint queue until we find
+                * an unwritten one. */
                if (first_in_queue) {
                        journal->start = trans->start_iblock +
                                trans->alloc_blocks;
                        wrap(&journal->jbd_fs->sb, journal->start);
                        journal->trans_id = trans->trans_id + 1;
-               }
-               jbd_journal_free_trans(journal, trans, false);
+                       TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
+                       jbd_journal_free_trans(journal, trans, false);
 
-               if (first_in_queue) {
-                       while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
-                               if (!trans->data_cnt) {
-                                       TAILQ_REMOVE(&journal->cp_queue,
-                                                    trans,
-                                                    trans_node);
-                                       jbd_journal_skip_pure_revoke(journal,
-                                                                    trans);
-                               } else {
-                                       journal->start = trans->start_iblock;
-                                       wrap(&journal->jbd_fs->sb, journal->start);
-                                       journal->trans_id = trans->trans_id;
-                                       break;
-                               }
-                       }
+                       jbd_journal_purge_cp_trans(journal, false);
                        jbd_journal_write_sb(journal);
                        jbd_write_sb(journal->jbd_fs);
                }
@@ -1592,7 +1941,7 @@ int jbd_journal_commit_trans(struct jbd_journal *journal,
        if (rc != EOK)
                goto Finish;
 
-       if (LIST_EMPTY(&trans->buf_list) &&
+       if (TAILQ_EMPTY(&trans->buf_queue) &&
            LIST_EMPTY(&trans->revoke_list)) {
                /* Since there are no entries in both buffer list
                 * and revoke entry list, we do not consider trans as