X-Git-Url: https://main.carlh.net/gitweb/?a=blobdiff_plain;f=lwext4%2Fext4_journal.c;h=86366a1bf044a5792dea5395908bc37a6c7f104c;hb=ea7ce7dc4a1cb5af01175324c5da8dceb3f8652d;hp=e062f6a88a76fb93c6e8921b993304c57198a2a6;hpb=f3d5e9676d582496bc50e1b59ad44c86888634b8;p=lwext4.git diff --git a/lwext4/ext4_journal.c b/lwext4/ext4_journal.c index e062f6a..86366a1 100644 --- a/lwext4/ext4_journal.c +++ b/lwext4/ext4_journal.c @@ -42,9 +42,8 @@ #include "ext4_journal.h" #include "ext4_errno.h" #include "ext4_blockdev.h" -#include "ext4_crc32c.h" +#include "ext4_crc32.h" #include "ext4_debug.h" -#include "tree.h" #include #include @@ -75,6 +74,9 @@ struct recover_info { /**@brief Used as internal argument.*/ uint32_t this_trans_id; + /**@brief No of transactions went through.*/ + uint32_t trans_cnt; + /**@brief RB-Tree storing revoke entries.*/ RB_HEAD(jbd_revoke, revoke_entry) revoke_root; }; @@ -101,12 +103,242 @@ jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b) return 0; } +static int +jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b) +{ + if (a->lba > b->lba) + return 1; + else if (a->lba < b->lba) + return -1; + return 0; +} + RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node, jbd_revoke_entry_cmp, static inline) +RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node, + jbd_block_rec_cmp, static inline) #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry)) #define jbd_free_revoke_entry(addr) free(addr) +static int jbd_has_csum(struct jbd_sb *jbd_sb) +{ + if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2)) + return 2; + + if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3)) + return 3; + + return 0; +} + +#if CONFIG_META_CSUM_ENABLE +static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb) +{ + uint32_t checksum = 0; + + if (jbd_has_csum(jbd_sb)) { + uint32_t orig_checksum = jbd_sb->checksum; + jbd_set32(jbd_sb, checksum, 0); + /* Calculate crc32c checksum against tho whole superblock */ + checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb, + JBD_SUPERBLOCK_SIZE); + jbd_sb->checksum = orig_checksum; + } + return checksum; +} +#else +#define jbd_sb_csum(...) 0 +#endif + +static void jbd_sb_csum_set(struct jbd_sb *jbd_sb) +{ + if (!jbd_has_csum(jbd_sb)) + return; + + jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb)); +} + +#if CONFIG_META_CSUM_ENABLE +static bool +jbd_verify_sb_csum(struct jbd_sb *jbd_sb) +{ + if (!jbd_has_csum(jbd_sb)) + return true; + + return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum); +} +#else +#define jbd_verify_sb_csum(...) true +#endif + +#if CONFIG_META_CSUM_ENABLE +static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs, + struct jbd_bhdr *bhdr) +{ + uint32_t checksum = 0; + + if (jbd_has_csum(&jbd_fs->sb)) { + uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize); + struct jbd_block_tail *tail = + (struct jbd_block_tail *)((char *)bhdr + block_size - + sizeof(struct jbd_block_tail)); + uint32_t orig_checksum = tail->checksum; + tail->checksum = 0; + + /* First calculate crc32c checksum against fs uuid */ + checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid, + sizeof(jbd_fs->sb.uuid)); + /* Calculate crc32c checksum against tho whole block */ + checksum = ext4_crc32c(checksum, bhdr, + block_size); + tail->checksum = orig_checksum; + } + return checksum; +} +#else +#define jbd_meta_csum(...) 0 +#endif + +static void jbd_meta_csum_set(struct jbd_fs *jbd_fs, + struct jbd_bhdr *bhdr) +{ + uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize); + struct jbd_block_tail *tail = (struct jbd_block_tail *) + ((char *)bhdr + block_size - + sizeof(struct jbd_block_tail)); + if (!jbd_has_csum(&jbd_fs->sb)) + return; + + tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr)); +} + +#if CONFIG_META_CSUM_ENABLE +static bool +jbd_verify_meta_csum(struct jbd_fs *jbd_fs, + struct jbd_bhdr *bhdr) +{ + uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize); + struct jbd_block_tail *tail = (struct jbd_block_tail *) + ((char *)bhdr + block_size - + sizeof(struct jbd_block_tail)); + if (!jbd_has_csum(&jbd_fs->sb)) + return true; + + return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum); +} +#else +#define jbd_verify_meta_csum(...) true +#endif + +#if CONFIG_META_CSUM_ENABLE +static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs, + struct jbd_commit_header *header) +{ + uint32_t checksum = 0; + + if (jbd_has_csum(&jbd_fs->sb)) { + uint32_t orig_checksum_type = header->chksum_type, + orig_checksum_size = header->chksum_size, + orig_checksum = header->chksum[0]; + uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize); + header->chksum_type = 0; + header->chksum_size = 0; + header->chksum[0] = 0; + + /* First calculate crc32c checksum against fs uuid */ + checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid, + sizeof(jbd_fs->sb.uuid)); + /* Calculate crc32c checksum against tho whole block */ + checksum = ext4_crc32c(checksum, header, + block_size); + + header->chksum_type = orig_checksum_type; + header->chksum_size = orig_checksum_size; + header->chksum[0] = orig_checksum; + } + return checksum; +} +#else +#define jbd_commit_csum(...) 0 +#endif + +static void jbd_commit_csum_set(struct jbd_fs *jbd_fs, + struct jbd_commit_header *header) +{ + if (!jbd_has_csum(&jbd_fs->sb)) + return; + + header->chksum_type = 0; + header->chksum_size = 0; + header->chksum[0] = jbd_commit_csum(jbd_fs, header); +} + +#if CONFIG_META_CSUM_ENABLE +static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs, + struct jbd_commit_header *header) +{ + if (!jbd_has_csum(&jbd_fs->sb)) + return true; + + return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs, + header)); +} +#else +#define jbd_verify_commit_csum(...) true +#endif + +#if CONFIG_META_CSUM_ENABLE +/* + * NOTE: We only make use of @csum parameter when + * JBD_FEATURE_COMPAT_CHECKSUM is enabled. + */ +static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf, + uint32_t csum, + uint32_t sequence) +{ + uint32_t checksum = 0; + + if (jbd_has_csum(&jbd_fs->sb)) { + uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize); + /* First calculate crc32c checksum against fs uuid */ + checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid, + sizeof(jbd_fs->sb.uuid)); + /* Then calculate crc32c checksum against sequence no. */ + checksum = ext4_crc32c(checksum, &sequence, + sizeof(uint32_t)); + /* Calculate crc32c checksum against tho whole block */ + checksum = ext4_crc32c(checksum, buf, + block_size); + } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb, + JBD_FEATURE_COMPAT_CHECKSUM)) { + uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize); + /* Calculate crc32c checksum against tho whole block */ + checksum = ext4_crc32(csum, buf, + block_size); + } + return checksum; +} +#else +#define jbd_block_csum(...) 0 +#endif + +static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag, + uint32_t checksum) +{ + int ver = jbd_has_csum(&jbd_fs->sb); + if (!ver) + return; + + if (ver == 2) { + struct jbd_block_tag *tag = __tag; + tag->checksum = (uint16_t)to_be32(checksum); + } else { + struct jbd_block_tag3 *tag = __tag; + tag->checksum = to_be32(checksum); + } +} + /**@brief Write jbd superblock to disk. * @param jbd_fs jbd filesystem * @param s jbd superblock @@ -121,6 +353,7 @@ static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s) if (rc != EOK) return rc; + jbd_sb_csum_set(s); offset = fblock * ext4_sb_get_block_size(&fs->sb); return ext4_block_writebytes(fs->bdev, offset, s, EXT4_SUPERBLOCK_SIZE); @@ -158,7 +391,7 @@ static bool jbd_verify_sb(struct jbd_sb *sb) jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2) return false; - return true; + return jbd_verify_sb_csum(sb); } /**@brief Write back dirty jbd superblock to disk. @@ -269,8 +502,12 @@ static int jbd_block_get(struct jbd_fs *jbd_fs, /* If succeeded, mark buffer as BC_FLUSH to indicate * that data should be written to disk immediately.*/ - if (rc == EOK) + if (rc == EOK) { ext4_bcache_set_flag(block->buf, BC_FLUSH); + /* As we don't want to occupy too much space + * in block cache, we set this buffer BC_TMP.*/ + ext4_bcache_set_flag(block->buf, BC_TMP); + } return rc; } @@ -358,6 +595,9 @@ struct tag_info { /**@brief Is this the last tag? */ bool last_tag; + + /**@brief crc32c checksum. */ + uint32_t checksum; }; /**@brief Extract information from a block tag. @@ -478,6 +718,8 @@ jbd_write_block_tag(struct jbd_fs *jbd_fs, jbd_set32(tag, flags, jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID); + jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum); + if (tag_info->last_tag) jbd_set32(tag, flags, jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG); @@ -502,6 +744,8 @@ jbd_write_block_tag(struct jbd_fs *jbd_fs, jbd_set16(tag, flags, jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID); + jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum); + if (tag_info->last_tag) jbd_set16(tag, flags, jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG); @@ -784,6 +1028,10 @@ static int jbd_iterate_log(struct jbd_fs *jbd_fs, /* We start iterating valid blocks in the whole journal.*/ start_trans_id = this_trans_id = jbd_get32(sb, sequence); start_block = this_block = jbd_get32(sb, start); + if (action == ACTION_SCAN) + info->trans_cnt = 0; + else if (!info->trans_cnt) + log_end = true; ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n", start_trans_id); @@ -830,6 +1078,14 @@ static int jbd_iterate_log(struct jbd_fs *jbd_fs, switch (jbd_get32(header, blocktype)) { case JBD_DESCRIPTOR_BLOCK: + if (!jbd_verify_meta_csum(jbd_fs, header)) { + ext4_dbg(DEBUG_JBD, + DBG_WARN "Descriptor block checksum failed." + "Journal block: %" PRIu32"\n", + this_block); + log_end = true; + break; + } ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", " "trans_id: %" PRIu32"\n", this_block, this_trans_id); @@ -847,6 +1103,15 @@ static int jbd_iterate_log(struct jbd_fs *jbd_fs, break; case JBD_COMMIT_BLOCK: + if (!jbd_verify_commit_csum(jbd_fs, + (struct jbd_commit_header *)header)) { + ext4_dbg(DEBUG_JBD, + DBG_WARN "Commit block checksum failed." + "Journal block: %" PRIu32"\n", + this_block); + log_end = true; + break; + } ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", " "trans_id: %" PRIu32"\n", this_block, this_trans_id); @@ -854,8 +1119,17 @@ static int jbd_iterate_log(struct jbd_fs *jbd_fs, * we may now proceed to the next transaction. */ this_trans_id++; + info->trans_cnt++; break; case JBD_REVOKE_BLOCK: + if (!jbd_verify_meta_csum(jbd_fs, header)) { + ext4_dbg(DEBUG_JBD, + DBG_WARN "Revoke block checksum failed." + "Journal block: %" PRIu32"\n", + this_block); + log_end = true; + break; + } ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", " "trans_id: %" PRIu32"\n", this_block, this_trans_id); @@ -951,6 +1225,7 @@ int jbd_journal_start(struct jbd_fs *jbd_fs, uint32_t features_incompatible = ext4_get32(&jbd_fs->inode_ref.fs->sb, features_incompatible); + struct ext4_block block = EXT4_BLOCK_ZERO(); features_incompatible |= EXT4_FINCOM_RECOVER; ext4_set32(&jbd_fs->inode_ref.fs->sb, features_incompatible, @@ -968,23 +1243,66 @@ int jbd_journal_start(struct jbd_fs *jbd_fs, journal->block_size = jbd_get32(&jbd_fs->sb, blocksize); + r = jbd_block_get_noread(jbd_fs, + &block, + journal->start); + if (r != EOK) { + memset(journal, 0, sizeof(struct jbd_journal)); + return r; + } + memset(block.data, 0, journal->block_size); + ext4_bcache_set_dirty(block.buf); + r = jbd_block_set(jbd_fs, &block); + if (r != EOK) { + memset(journal, 0, sizeof(struct jbd_journal)); + return r; + } + TAILQ_INIT(&journal->trans_queue); TAILQ_INIT(&journal->cp_queue); + RB_INIT(&journal->block_rec_root); journal->jbd_fs = jbd_fs; jbd_journal_write_sb(journal); return jbd_write_sb(jbd_fs); } +static void jbd_trans_end_write(struct ext4_bcache *bc __unused, + struct ext4_buf *buf __unused, + int res, + void *arg); + static void jbd_journal_flush_trans(struct jbd_trans *trans) { struct jbd_buf *jbd_buf, *tmp; struct jbd_journal *journal = trans->journal; struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs; - LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node, + void *tmp_data = malloc(journal->block_size); + ext4_assert(tmp_data); + + TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) { - struct ext4_block block = jbd_buf->block; - ext4_block_flush_buf(fs->bdev, block.buf); + struct ext4_buf *buf = jbd_buf->block_rec->buf; + /* The buffer in memory is still dirty. */ + if (buf) { + if (jbd_buf->block_rec->trans != trans) { + int r; + struct ext4_block jbd_block = EXT4_BLOCK_ZERO(); + ext4_assert(ext4_block_get(fs->bdev, + &jbd_block, + jbd_buf->jbd_lba) == EOK); + memcpy(tmp_data, jbd_block.data, + journal->block_size); + ext4_block_set(fs->bdev, &jbd_block); + r = ext4_blocks_set_direct(fs->bdev, tmp_data, + buf->lba, 1); + jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf); + } else + ext4_block_flush_buf(fs->bdev, buf); + + } } + + free(tmp_data); } static void @@ -1000,7 +1318,9 @@ jbd_journal_skip_pure_revoke(struct jbd_journal *journal, jbd_journal_write_sb(journal); } -static void jbd_journal_flush_all_trans(struct jbd_journal *journal) +static void +jbd_journal_purge_cp_trans(struct jbd_journal *journal, + bool flush) { struct jbd_trans *trans; while ((trans = TAILQ_FIRST(&journal->cp_queue))) { @@ -1009,9 +1329,35 @@ static void jbd_journal_flush_all_trans(struct jbd_journal *journal) trans, trans_node); jbd_journal_skip_pure_revoke(journal, trans); - } else - jbd_journal_flush_trans(trans); - + } else { + if (trans->data_cnt == + trans->written_cnt) { + journal->start = + trans->start_iblock + + trans->alloc_blocks; + wrap(&journal->jbd_fs->sb, + journal->start); + journal->trans_id = + trans->trans_id + 1; + TAILQ_REMOVE(&journal->cp_queue, + trans, + trans_node); + jbd_journal_free_trans(journal, + trans, + false); + jbd_journal_write_sb(journal); + } else if (!flush) { + journal->start = + trans->start_iblock; + wrap(&journal->jbd_fs->sb, + journal->start); + journal->trans_id = + trans->trans_id; + jbd_journal_write_sb(journal); + break; + } else + jbd_journal_flush_trans(trans); + } } } @@ -1024,12 +1370,16 @@ int jbd_journal_stop(struct jbd_journal *journal) struct jbd_fs *jbd_fs = journal->jbd_fs; uint32_t features_incompatible; - /* Commit all the transactions to the journal.*/ - jbd_journal_commit_all(journal); - /* Make sure that journalled content have reached * the disk.*/ - jbd_journal_flush_all_trans(journal); + jbd_journal_purge_cp_trans(journal, true); + + /* There should be no block record in this journal + * session. */ + if (!RB_EMPTY(&journal->block_rec_root)) + ext4_dbg(DEBUG_JBD, + DBG_WARN "There are still block records " + "in this journal session!\n"); features_incompatible = ext4_get32(&jbd_fs->inode_ref.fs->sb, @@ -1065,7 +1415,7 @@ static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal, /* If there is no space left, flush all journalled * blocks to disk first.*/ if (journal->last == journal->start) - ext4_block_cache_flush(journal->jbd_fs->inode_ref.fs->bdev); + jbd_journal_purge_cp_trans(journal, true); return start_block; } @@ -1083,34 +1433,158 @@ jbd_journal_new_trans(struct jbd_journal *journal) /* We will assign a trans_id to this transaction, * once it has been committed.*/ trans->journal = journal; + trans->data_csum = EXT4_CRC32_INIT; trans->error = EOK; + TAILQ_INIT(&trans->buf_queue); return trans; } -static void jbd_trans_end_write(struct ext4_bcache *bc __unused, - struct ext4_buf *buf __unused, - int res, - void *arg); - /**@brief gain access to it before making any modications. * @param journal current journal session + * @param trans transaction * @param block descriptor * @return standard error code.*/ int jbd_trans_get_access(struct jbd_journal *journal, + struct jbd_trans *trans, struct ext4_block *block) { int r = EOK; struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs; + struct jbd_buf *jbd_buf = block->buf->end_write_arg; /* If the buffer has already been modified, we should * flush dirty data in this buffer to disk.*/ if (ext4_bcache_test_flag(block->buf, BC_DIRTY) && block->buf->end_write == jbd_trans_end_write) { - r = ext4_block_flush_buf(fs->bdev, block->buf); + ext4_assert(jbd_buf); + if (jbd_buf->trans != trans) + r = ext4_block_flush_buf(fs->bdev, block->buf); + } return r; } +static struct jbd_block_rec * +jbd_trans_block_rec_lookup(struct jbd_journal *journal, + ext4_fsblk_t lba) +{ + struct jbd_block_rec tmp = { + .lba = lba + }; + + return RB_FIND(jbd_block, + &journal->block_rec_root, + &tmp); +} + +static void +jbd_trans_change_ownership(struct jbd_block_rec *block_rec, + struct jbd_trans *new_trans, + struct ext4_buf *new_buf) +{ + LIST_REMOVE(block_rec, tbrec_node); + /* Now this block record belongs to this transaction. */ + LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node); + block_rec->trans = new_trans; + block_rec->buf = new_buf; +} + +static inline struct jbd_block_rec * +jbd_trans_insert_block_rec(struct jbd_trans *trans, + ext4_fsblk_t lba, + struct ext4_buf *buf) +{ + struct jbd_block_rec *block_rec; + block_rec = jbd_trans_block_rec_lookup(trans->journal, lba); + if (block_rec) { + jbd_trans_change_ownership(block_rec, trans, buf); + return block_rec; + } + block_rec = calloc(1, sizeof(struct jbd_block_rec)); + if (!block_rec) + return NULL; + + block_rec->lba = lba; + block_rec->buf = buf; + block_rec->trans = trans; + TAILQ_INIT(&block_rec->dirty_buf_queue); + LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node); + RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec); + return block_rec; +} + +static void +jbd_trans_finish_callback(struct jbd_journal *journal, + const struct jbd_trans *trans, + struct jbd_block_rec *block_rec, + bool abort) +{ + struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs; + if (block_rec->trans != trans) + return; + + if (!abort) { + struct jbd_buf *jbd_buf, *tmp; + TAILQ_FOREACH_SAFE(jbd_buf, + &block_rec->dirty_buf_queue, + dirty_buf_node, + tmp) { + /* All we need is a fake ext4_buf. */ + struct ext4_buf buf; + + jbd_trans_end_write(fs->bdev->bc, + &buf, + EOK, + jbd_buf); + } + } else { + struct jbd_buf *jbd_buf; + struct ext4_block jbd_block = EXT4_BLOCK_ZERO(), + block = EXT4_BLOCK_ZERO(); + jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue, + jbd_buf_dirty); + if (jbd_buf) { + ext4_assert(ext4_block_get(fs->bdev, + &jbd_block, + jbd_buf->jbd_lba) == EOK); + ext4_assert(ext4_block_get_noread(fs->bdev, + &block, + block_rec->lba) == EOK); + memcpy(block.data, jbd_block.data, + journal->block_size); + + jbd_trans_change_ownership(block_rec, + jbd_buf->trans, block.buf); + + block.buf->end_write = jbd_trans_end_write; + block.buf->end_write_arg = jbd_buf; + + ext4_bcache_set_flag(jbd_block.buf, BC_TMP); + ext4_bcache_set_dirty(block.buf); + + ext4_block_set(fs->bdev, &jbd_block); + ext4_block_set(fs->bdev, &block); + return; + } + } +} + +static inline void +jbd_trans_remove_block_rec(struct jbd_journal *journal, + struct jbd_block_rec *block_rec, + struct jbd_trans *trans) +{ + /* If this block record doesn't belong to this transaction, + * give up.*/ + if (block_rec->trans == trans) { + LIST_REMOVE(block_rec, tbrec_node); + RB_REMOVE(jbd_block, + &journal->block_rec_root, + block_rec); + free(block_rec); + } +} + /**@brief Add block to a transaction and mark it dirty. * @param trans transaction * @param block block descriptor @@ -1120,10 +1594,28 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans, { struct jbd_buf *buf; + struct jbd_block_rec *block_rec; + if (block->buf->end_write == jbd_trans_end_write) { + buf = block->buf->end_write_arg; + if (buf && buf->trans == trans) + return EOK; + } buf = calloc(1, sizeof(struct jbd_buf)); if (!buf) return ENOMEM; + if ((block_rec = jbd_trans_insert_block_rec(trans, + block->lb_id, + block->buf)) == NULL) { + free(buf); + return ENOMEM; + } + + TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue, + buf, + dirty_buf_node); + + buf->block_rec = block_rec; buf->trans = trans; buf->block = *block; ext4_bcache_inc_ref(block->buf); @@ -1134,7 +1626,7 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans, block->buf->end_write_arg = buf; trans->data_cnt++; - LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node); + TAILQ_INSERT_HEAD(&trans->buf_queue, buf, buf_node); ext4_bcache_set_dirty(block->buf); return EOK; @@ -1157,6 +1649,37 @@ int jbd_trans_revoke_block(struct jbd_trans *trans, return EOK; } +/**@brief Try to add block to be revoked to a transaction. + * If @lba still remains in an transaction on checkpoint + * queue, add @lba as a revoked block to the transaction. + * @param trans transaction + * @param lba logical block address + * @return standard error code*/ +int jbd_trans_try_revoke_block(struct jbd_trans *trans, + ext4_fsblk_t lba) +{ + int r = EOK; + struct jbd_journal *journal = trans->journal; + struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs; + struct jbd_block_rec *block_rec = + jbd_trans_block_rec_lookup(journal, lba); + + /* Make sure we don't flush any buffers belong to this transaction. */ + if (block_rec && block_rec->trans != trans) { + /* If the buffer has not been flushed yet, flush it now. */ + if (block_rec->buf) { + r = ext4_block_flush_buf(fs->bdev, block_rec->buf); + if (r != EOK) + return r; + + } + + jbd_trans_revoke_block(trans, lba); + } + + return EOK; +} + /**@brief Free a transaction * @param journal current journal session * @param trans transaction @@ -1168,9 +1691,11 @@ void jbd_journal_free_trans(struct jbd_journal *journal, { struct jbd_buf *jbd_buf, *tmp; struct jbd_revoke_rec *rec, *tmp2; + struct jbd_block_rec *block_rec, *tmp3; struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs; - LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node, + TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) { + block_rec = jbd_buf->block_rec; if (abort) { jbd_buf->block.buf->end_write = NULL; jbd_buf->block.buf->end_write_arg = NULL; @@ -1178,7 +1703,14 @@ void jbd_journal_free_trans(struct jbd_journal *journal, ext4_block_set(fs->bdev, &jbd_buf->block); } - LIST_REMOVE(jbd_buf, buf_node); + TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue, + jbd_buf, + dirty_buf_node); + jbd_trans_finish_callback(journal, + trans, + block_rec, + abort); + TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node); free(jbd_buf); } LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node, @@ -1186,6 +1718,10 @@ void jbd_journal_free_trans(struct jbd_journal *journal, LIST_REMOVE(rec, revoke_node); free(rec); } + LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node, + tmp3) { + jbd_trans_remove_block_rec(journal, block_rec, trans); + } free(trans); } @@ -1212,6 +1748,13 @@ static int jbd_trans_write_commit_block(struct jbd_trans *trans) jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK); jbd_set32(&header->header, sequence, trans->trans_id); + if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb, + JBD_FEATURE_COMPAT_CHECKSUM)) { + jbd_set32(header, chksum_type, JBD_CRC32_CHKSUM); + jbd_set32(header, chksum_size, JBD_CRC32_CHKSUM_SIZE); + jbd_set32(header, chksum[0], trans->data_csum); + } + jbd_commit_csum_set(journal->jbd_fs, header); ext4_bcache_set_dirty(commit_block.buf); rc = jbd_block_set(journal->jbd_fs, &commit_block); if (rc != EOK) @@ -1235,21 +1778,70 @@ static int jbd_journal_prepare(struct jbd_journal *journal, struct jbd_buf *jbd_buf, *tmp; struct ext4_block desc_block, data_block; struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs; + uint32_t checksum = EXT4_CRC32_INIT; + + /* Try to remove any non-dirty buffers from the tail of + * buf_queue. */ + TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue, + jbd_trans_buf, buf_node, tmp) { + /* We stop the iteration when we find a dirty buffer. */ + if (ext4_bcache_test_flag(jbd_buf->block.buf, + BC_DIRTY)) + break; + + TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue, + jbd_buf, + dirty_buf_node); + + jbd_buf->block.buf->end_write = NULL; + jbd_buf->block.buf->end_write_arg = NULL; + jbd_trans_finish_callback(journal, + trans, + jbd_buf->block_rec, + true); + + /* The buffer has not been modified, just release + * that jbd_buf. */ + jbd_trans_remove_block_rec(journal, + jbd_buf->block_rec, trans); + trans->data_cnt--; + + ext4_block_set(fs->bdev, &jbd_buf->block); + TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node); + free(jbd_buf); + } - LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node, tmp) { + TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) { struct tag_info tag_info; bool uuid_exist = false; if (!ext4_bcache_test_flag(jbd_buf->block.buf, BC_DIRTY)) { - /* The buffer has not been modified, just release - * that jbd_buf. */ + TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue, + jbd_buf, + dirty_buf_node); + jbd_buf->block.buf->end_write = NULL; jbd_buf->block.buf->end_write_arg = NULL; + jbd_trans_finish_callback(journal, + trans, + jbd_buf->block_rec, + true); + + /* The buffer has not been modified, just release + * that jbd_buf. */ + jbd_trans_remove_block_rec(journal, + jbd_buf->block_rec, trans); + trans->data_cnt--; + ext4_block_set(fs->bdev, &jbd_buf->block); - LIST_REMOVE(jbd_buf, buf_node); + TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node); free(jbd_buf); continue; } + checksum = jbd_block_csum(journal->jbd_fs, + jbd_buf->block.data, + checksum, + trans->trans_id); again: if (!desc_iblock) { struct jbd_bhdr *bhdr; @@ -1272,6 +1864,9 @@ again: tag_tbl_size = journal->block_size - sizeof(struct jbd_bhdr); + if (jbd_has_csum(&journal->jbd_fs->sb)) + tag_tbl_size -= sizeof(struct jbd_block_tail); + if (!trans->start_iblock) trans->start_iblock = desc_iblock; @@ -1280,6 +1875,10 @@ again: tag_info.uuid_exist = uuid_exist; if (i == trans->data_cnt - 1) tag_info.last_tag = true; + else + tag_info.last_tag = false; + + tag_info.checksum = checksum; if (uuid_exist) memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid, @@ -1290,6 +1889,8 @@ again: tag_tbl_size, &tag_info); if (rc != EOK) { + jbd_meta_csum_set(journal->jbd_fs, + (struct jbd_bhdr *)desc_block.data); jbd_block_set(journal->jbd_fs, &desc_block); desc_iblock = 0; goto again; @@ -1305,6 +1906,7 @@ again: memcpy(data_block.data, jbd_buf->block.data, journal->block_size); + jbd_buf->jbd_lba = data_block.lb_id; rc = jbd_block_set(journal->jbd_fs, &data_block); if (rc != EOK) @@ -1315,8 +1917,12 @@ again: i++; } - if (rc == EOK && desc_iblock) + if (rc == EOK && desc_iblock) { + jbd_meta_csum_set(journal->jbd_fs, + (struct jbd_bhdr *)desc_block.data); + trans->data_csum = checksum; jbd_block_set(journal->jbd_fs, &desc_block); + } return rc; } @@ -1366,6 +1972,9 @@ again: tag_tbl_size = journal->block_size - sizeof(struct jbd_revoke_header); + if (jbd_has_csum(&journal->jbd_fs->sb)) + tag_tbl_size -= sizeof(struct jbd_block_tail); + if (!trans->start_iblock) trans->start_iblock = desc_iblock; @@ -1374,6 +1983,8 @@ again: if (tag_tbl_size < record_len) { jbd_set32(header, count, journal->block_size - tag_tbl_size); + jbd_meta_csum_set(journal->jbd_fs, + (struct jbd_bhdr *)desc_block.data); jbd_block_set(journal->jbd_fs, &desc_block); desc_iblock = 0; header = NULL; @@ -1398,24 +2009,14 @@ again: jbd_set32(header, count, journal->block_size - tag_tbl_size); + jbd_meta_csum_set(journal->jbd_fs, + (struct jbd_bhdr *)desc_block.data); jbd_block_set(journal->jbd_fs, &desc_block); } return rc; } -/**@brief Submit the transaction to transaction queue. - * @param journal current journal session - * @param trans transaction*/ -void -jbd_journal_submit_trans(struct jbd_journal *journal, - struct jbd_trans *trans) -{ - TAILQ_INSERT_TAIL(&journal->trans_queue, - trans, - trans_node); -} - /**@brief Put references of block descriptors in a transaction. * @param journal current journal session * @param trans transaction*/ @@ -1423,7 +2024,7 @@ void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans) { struct jbd_buf *jbd_buf, *tmp; struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs; - LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node, + TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) { struct ext4_block block = jbd_buf->block; ext4_block_set(fs->bdev, &block); @@ -1439,46 +2040,47 @@ static void jbd_trans_end_write(struct ext4_bcache *bc __unused, { struct jbd_buf *jbd_buf = arg; struct jbd_trans *trans = jbd_buf->trans; + struct jbd_block_rec *block_rec = jbd_buf->block_rec; struct jbd_journal *journal = trans->journal; bool first_in_queue = trans == TAILQ_FIRST(&journal->cp_queue); if (res != EOK) trans->error = res; - LIST_REMOVE(jbd_buf, buf_node); - free(jbd_buf); + TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node); + TAILQ_REMOVE(&block_rec->dirty_buf_queue, + jbd_buf, + dirty_buf_node); - /* Clear the end_write and end_write_arg fields. */ - buf->end_write = NULL; - buf->end_write_arg = NULL; + jbd_trans_finish_callback(journal, + trans, + jbd_buf->block_rec, + false); + if (block_rec->trans == trans) { + block_rec->buf = NULL; + /* Clear the end_write and end_write_arg fields. */ + buf->end_write = NULL; + buf->end_write_arg = NULL; + } + + free(jbd_buf); trans->written_cnt++; if (trans->written_cnt == trans->data_cnt) { - TAILQ_REMOVE(&journal->cp_queue, trans, trans_node); - + /* If it is the first transaction on checkpoint queue, + * we will shift the start of the journal to the next + * transaction, and remove subsequent written + * transactions from checkpoint queue until we find + * an unwritten one. */ if (first_in_queue) { journal->start = trans->start_iblock + trans->alloc_blocks; wrap(&journal->jbd_fs->sb, journal->start); journal->trans_id = trans->trans_id + 1; - } - jbd_journal_free_trans(journal, trans, false); + TAILQ_REMOVE(&journal->cp_queue, trans, trans_node); + jbd_journal_free_trans(journal, trans, false); - if (first_in_queue) { - while ((trans = TAILQ_FIRST(&journal->cp_queue))) { - if (!trans->data_cnt) { - TAILQ_REMOVE(&journal->cp_queue, - trans, - trans_node); - jbd_journal_skip_pure_revoke(journal, - trans); - } else { - journal->start = trans->start_iblock; - wrap(&journal->jbd_fs->sb, journal->start); - journal->trans_id = trans->trans_id; - break; - } - } + jbd_journal_purge_cp_trans(journal, false); jbd_journal_write_sb(journal); jbd_write_sb(journal->jbd_fs); } @@ -1504,7 +2106,7 @@ int jbd_journal_commit_trans(struct jbd_journal *journal, if (rc != EOK) goto Finish; - if (LIST_EMPTY(&trans->buf_list) && + if (TAILQ_EMPTY(&trans->buf_queue) && LIST_EMPTY(&trans->revoke_list)) { /* Since there are no entries in both buffer list * and revoke entry list, we do not consider trans as @@ -1551,29 +2153,6 @@ Finish: return rc; } -/**@brief Commit one transaction on transaction queue - * to the journal. - * @param journal current journal session.*/ -void jbd_journal_commit_one(struct jbd_journal *journal) -{ - struct jbd_trans *trans; - - if ((trans = TAILQ_FIRST(&journal->trans_queue))) { - TAILQ_REMOVE(&journal->trans_queue, trans, trans_node); - jbd_journal_commit_trans(journal, trans); - } -} - -/**@brief Commit all the transactions on transaction queue - * to the journal. - * @param journal current journal session.*/ -void jbd_journal_commit_all(struct jbd_journal *journal) -{ - while (!TAILQ_EMPTY(&journal->trans_queue)) { - jbd_journal_commit_one(journal); - } -} - /** * @} */