ext4_journal: forcibly flush data to disk when stop journalling.
[lwext4.git] / lwext4 / ext4_journal.c
index be8550d64ab0d8fed9d014b7c4c41fac2c09c85e..d0032a0edfd51bac4bfffacf888b03bb1e246eb7 100644 (file)
@@ -39,6 +39,7 @@
 #include "ext4_types.h"
 #include "ext4_fs.h"
 #include "ext4_super.h"
+#include "ext4_journal.h"
 #include "ext4_errno.h"
 #include "ext4_blockdev.h"
 #include "ext4_crc32c.h"
@@ -209,6 +210,9 @@ int jbd_block_get(struct jbd_fs *jbd_fs,
 
        struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
        rc = ext4_block_get(bdev, block, fblock);
+       if (rc == EOK)
+               ext4_bcache_set_flag(block->buf, BC_FLUSH);
+
        return rc;
 }
 
@@ -226,6 +230,9 @@ int jbd_block_get_noread(struct jbd_fs *jbd_fs,
 
        struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
        rc = ext4_block_get_noread(bdev, block, fblock);
+       if (rc == EOK)
+               ext4_bcache_set_flag(block->buf, BC_FLUSH);
+
        return rc;
 }
 
@@ -359,16 +366,16 @@ jbd_write_block_tag(struct jbd_fs *jbd_fs,
                                             JBD_FEATURE_INCOMPAT_64BIT))
                        jbd_set32(tag, blocknr_high, tag_info->block >> 32);
 
-               if (!tag_info->uuid_exist) {
+               if (tag_info->uuid_exist) {
                        if (remain_buf_size - tag_bytes < UUID_SIZE)
                                return EINVAL;
 
                        uuid_start = (char *)tag + tag_bytes;
                        tag_info->tag_bytes += UUID_SIZE;
                        memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
+               } else
                        jbd_set32(tag, flags,
                                  jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
-               }
 
                if (tag_info->last_tag)
                        jbd_set32(tag, flags,
@@ -381,16 +388,16 @@ jbd_write_block_tag(struct jbd_fs *jbd_fs,
                                             JBD_FEATURE_INCOMPAT_64BIT))
                        jbd_set32(tag, blocknr_high, tag_info->block >> 32);
 
-               if (!tag_info->uuid_exist) {
+               if (tag_info->uuid_exist) {
                        if (remain_buf_size - tag_bytes < UUID_SIZE)
                                return EINVAL;
 
                        uuid_start = (char *)tag + tag_bytes;
                        tag_info->tag_bytes += UUID_SIZE;
                        memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
+               } else
                        jbd_set16(tag, flags,
                                  jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
-               }
 
                if (tag_info->last_tag)
                        jbd_set16(tag, flags,
@@ -752,8 +759,17 @@ int jbd_recover(struct jbd_fs *jbd_fs)
 
        r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
        if (r == EOK) {
+               uint32_t features_incompatible =
+                       ext4_get32(&jbd_fs->inode_ref.fs->sb,
+                                  features_incompatible);
                jbd_set32(&jbd_fs->sb, start, 0);
+               features_incompatible &= ~EXT4_FINCOM_RECOVER;
+               ext4_set32(&jbd_fs->inode_ref.fs->sb,
+                          features_incompatible,
+                          features_incompatible);
                jbd_fs->dirty = true;
+               r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+                                 &jbd_fs->inode_ref.fs->sb);
        }
        jbd_destroy_revoke_tree(&info);
        return r;
@@ -762,7 +778,7 @@ int jbd_recover(struct jbd_fs *jbd_fs)
 void jbd_journal_write_sb(struct jbd_journal *journal)
 {
        struct jbd_fs *jbd_fs = journal->jbd_fs;
-       jbd_set32(&jbd_fs->sb, start, journal->first);
+       jbd_set32(&jbd_fs->sb, start, journal->start);
        jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
        jbd_fs->dirty = true;
 }
@@ -770,6 +786,19 @@ void jbd_journal_write_sb(struct jbd_journal *journal)
 int jbd_journal_start(struct jbd_fs *jbd_fs,
                      struct jbd_journal *journal)
 {
+       int r;
+       uint32_t features_incompatible =
+                       ext4_get32(&jbd_fs->inode_ref.fs->sb,
+                                  features_incompatible);
+       features_incompatible |= EXT4_FINCOM_RECOVER;
+       ext4_set32(&jbd_fs->inode_ref.fs->sb,
+                       features_incompatible,
+                       features_incompatible);
+       r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+                       &jbd_fs->inode_ref.fs->sb);
+       if (r != EOK)
+               return r;
+
        journal->first = jbd_get32(&jbd_fs->sb, first);
        journal->start = journal->first;
        journal->last = journal->first;
@@ -779,6 +808,7 @@ int jbd_journal_start(struct jbd_fs *jbd_fs,
        journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
 
        TAILQ_INIT(&journal->trans_queue);
+       TAILQ_INIT(&journal->cp_queue);
        journal->jbd_fs = jbd_fs;
        jbd_journal_write_sb(journal);
        return jbd_write_sb(jbd_fs);
@@ -786,6 +816,26 @@ int jbd_journal_start(struct jbd_fs *jbd_fs,
 
 int jbd_journal_stop(struct jbd_journal *journal)
 {
+
+       int r;
+       struct jbd_fs *jbd_fs = journal->jbd_fs;
+       uint32_t features_incompatible;
+
+       jbd_journal_commit_all(journal);
+       ext4_block_cache_flush(jbd_fs->inode_ref.fs->bdev);
+
+       features_incompatible =
+               ext4_get32(&jbd_fs->inode_ref.fs->sb,
+                          features_incompatible);
+       features_incompatible &= ~EXT4_FINCOM_RECOVER;
+       ext4_set32(&jbd_fs->inode_ref.fs->sb,
+                       features_incompatible,
+                       features_incompatible);
+       r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+                       &jbd_fs->inode_ref.fs->sb);
+       if (r != EOK)
+               return r;
+
        journal->start = 0;
        journal->trans_id = 0;
        jbd_journal_write_sb(journal);
@@ -795,9 +845,14 @@ int jbd_journal_stop(struct jbd_journal *journal)
 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
                                        struct jbd_trans *trans)
 {
-       uint32_t start_block = journal->last++;
+       uint32_t start_block;
+
+       start_block = journal->last++;
        trans->alloc_blocks++;
        wrap(&journal->jbd_fs->sb, journal->last);
+       if (journal->last == journal->start)
+               ext4_block_cache_flush(journal->jbd_fs->inode_ref.fs->bdev);
+
        return start_block;
 }
 
@@ -823,7 +878,13 @@ static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
 int jbd_trans_add_block(struct jbd_trans *trans,
                        struct ext4_block *block)
 {
-       struct jbd_buf *buf = calloc(1, sizeof(struct jbd_buf));
+       struct jbd_buf *buf;
+       /* We do not need to add those unmodified buffer to
+        * a transaction. */
+       if (!ext4_bcache_test_flag(block->buf, BC_DIRTY))
+               return EOK;
+
+       buf = calloc(1, sizeof(struct jbd_buf));
        if (!buf)
                return ENOMEM;
 
@@ -858,10 +919,11 @@ void jbd_journal_free_trans(struct jbd_journal *journal,
 {
        struct jbd_buf *jbd_buf, *tmp;
        struct jbd_revoke_rec *rec, *tmp2;
+       struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
        LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
                          tmp) {
                if (abort)
-                       ext4_block_set(journal->jbd_fs->bdev, &jbd_buf->block);
+                       ext4_block_set(fs->bdev, &jbd_buf->block);
 
                LIST_REMOVE(jbd_buf, buf_node);
                free(jbd_buf);
@@ -890,9 +952,9 @@ static int jbd_trans_write_commit_block(struct jbd_trans *trans)
                return rc;
 
        header = (struct jbd_commit_header *)commit_block.data;
-       header->header.magic = JBD_MAGIC_NUMBER;
-       header->header.blocktype = JBD_COMMIT_BLOCK;
-       header->header.sequence = trans->trans_id;
+       jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
+       jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
+       jbd_set32(&header->header, sequence, trans->trans_id);
 
        ext4_bcache_set_dirty(commit_block.buf);
        rc = jbd_block_set(journal->jbd_fs, &commit_block);
@@ -922,21 +984,25 @@ again:
                        desc_iblock = jbd_journal_alloc_block(journal, trans);
                        rc = jbd_block_get_noread(journal->jbd_fs,
                                           &desc_block, desc_iblock);
-                       if (!rc)
+                       if (rc != EOK)
                                break;
 
                        ext4_bcache_set_dirty(desc_block.buf);
 
                        bhdr = (struct jbd_bhdr *)desc_block.data;
-                       bhdr->magic = JBD_MAGIC_NUMBER;
-                       bhdr->blocktype = JBD_DESCRIPTOR_BLOCK;
-                       bhdr->sequence = trans->trans_id;
+                       jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
+                       jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
+                       jbd_set32(bhdr, sequence, trans->trans_id);
 
                        tag_start = (char *)(bhdr + 1);
                        tag_ptr = tag_start;
                        uuid_exist = true;
                        tag_tbl_size = journal->block_size -
                                sizeof(struct jbd_bhdr);
+
+                       if (!trans->start_iblock)
+                               trans->start_iblock = desc_iblock;
+
                }
                tag_info.block = jbd_buf->block.lb_id;
                tag_info.uuid_exist = uuid_exist;
@@ -1008,25 +1074,30 @@ again:
                        desc_iblock = jbd_journal_alloc_block(journal, trans);
                        rc = jbd_block_get_noread(journal->jbd_fs,
                                           &desc_block, desc_iblock);
-                       if (!rc) {
+                       if (rc != EOK) {
                                break;
                        }
 
                        ext4_bcache_set_dirty(desc_block.buf);
 
                        bhdr = (struct jbd_bhdr *)desc_block.data;
-                       bhdr->magic = JBD_MAGIC_NUMBER;
-                       bhdr->blocktype = JBD_REVOKE_BLOCK;
-                       bhdr->sequence = trans->trans_id;
+                       jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
+                       jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
+                       jbd_set32(bhdr, sequence, trans->trans_id);
                        
                        header = (struct jbd_revoke_header *)bhdr;
                        blocks_entry = (char *)(header + 1);
                        tag_tbl_size = journal->block_size -
                                sizeof(struct jbd_revoke_header);
+
+                       if (!trans->start_iblock)
+                               trans->start_iblock = desc_iblock;
+
                }
 
                if (tag_tbl_size < record_len) {
-                       header->count = journal->block_size - tag_tbl_size;
+                       jbd_set32(header, count,
+                                 journal->block_size - tag_tbl_size);
                        jbd_block_set(journal->jbd_fs, &desc_block);
                        desc_iblock = 0;
                        header = NULL;
@@ -1048,7 +1119,8 @@ again:
        }
        if (rc == EOK && desc_iblock) {
                if (header != NULL)
-                       header->count = journal->block_size - tag_tbl_size;
+                       jbd_set32(header, count,
+                                 journal->block_size - tag_tbl_size);
 
                jbd_block_set(journal->jbd_fs, &desc_block);
        }
@@ -1068,9 +1140,11 @@ jbd_journal_submit_trans(struct jbd_journal *journal,
 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
 {
        struct jbd_buf *jbd_buf, *tmp;
+       struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
        LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
                        tmp) {
-               ext4_block_set(journal->jbd_fs->bdev, &jbd_buf->block);
+               struct ext4_block block = jbd_buf->block;
+               ext4_block_set(fs->bdev, &block);
        }
 }
 
@@ -1081,31 +1155,54 @@ static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
 {
        struct jbd_trans *trans = arg;
        struct jbd_journal *journal = trans->journal;
+       bool first_in_queue =
+               trans == TAILQ_FIRST(&journal->cp_queue);
        if (res != EOK)
                trans->error = res;
 
        trans->written_cnt++;
        if (trans->written_cnt == trans->data_cnt) {
                TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
-               journal->start += trans->alloc_blocks;
-               journal->trans_id = ++trans->trans_id;
-               jbd_journal_write_sb(journal);
+
+               if (first_in_queue) {
+                       journal->start = trans->start_iblock +
+                               trans->alloc_blocks;
+                       wrap(&journal->jbd_fs->sb, journal->start);
+                       journal->trans_id = trans->trans_id + 1;
+               }
                jbd_journal_free_trans(journal, trans, false);
 
-               if ((trans = TAILQ_FIRST(&journal->cp_queue))) {
-                       jbd_journal_cp_trans(journal, trans);
+               if (first_in_queue) {
+                       while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
+                               if (!trans->data_cnt) {
+                                       TAILQ_REMOVE(&journal->cp_queue,
+                                                    trans,
+                                                    trans_node);
+                                       journal->start = trans->start_iblock +
+                                               trans->alloc_blocks;
+                                       wrap(&journal->jbd_fs->sb, journal->start);
+                                       journal->trans_id = trans->trans_id + 1;
+                                       jbd_journal_free_trans(journal,
+                                                              trans, false);
+                               } else {
+                                       journal->start = trans->start_iblock;
+                                       wrap(&journal->jbd_fs->sb, journal->start);
+                                       journal->trans_id = trans->trans_id;
+                                       break;
+                               }
+                       }
+                       jbd_journal_write_sb(journal);
+                       jbd_write_sb(journal->jbd_fs);
                }
        }
 }
 
-/*
- * XXX: one should disable cache writeback first.
- */
 void jbd_journal_commit_one(struct jbd_journal *journal)
 {
        int rc = EOK;
        uint32_t last = journal->last;
        struct jbd_trans *trans;
+
        if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
                TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
 
@@ -1123,10 +1220,30 @@ void jbd_journal_commit_one(struct jbd_journal *journal)
                        goto Finish;
 
                journal->alloc_trans_id++;
-               TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
-                         trans_node);
-               if (trans == TAILQ_FIRST(&journal->cp_queue)) {
-                       jbd_journal_cp_trans(journal, trans);
+               if (TAILQ_EMPTY(&journal->cp_queue)) {
+                       if (trans->data_cnt) {
+                               journal->start = trans->start_iblock;
+                               wrap(&journal->jbd_fs->sb, journal->start);
+                               journal->trans_id = trans->trans_id;
+                               jbd_journal_write_sb(journal);
+                               jbd_write_sb(journal->jbd_fs);
+                               TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
+                                               trans_node);
+                               jbd_journal_cp_trans(journal, trans);
+                       } else {
+                               journal->start = trans->start_iblock +
+                                       trans->alloc_blocks;
+                               wrap(&journal->jbd_fs->sb, journal->start);
+                               journal->trans_id = trans->trans_id + 1;
+                               jbd_journal_write_sb(journal);
+                               jbd_journal_free_trans(journal, trans, false);
+                       }
+               } else {
+                       TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
+                                       trans_node);
+                       if (trans->data_cnt)
+                               jbd_journal_cp_trans(journal, trans);
+
                }
        }
 Finish:
@@ -1136,6 +1253,13 @@ Finish:
        }
 }
 
+void jbd_journal_commit_all(struct jbd_journal *journal)
+{
+       while (!TAILQ_EMPTY(&journal->trans_queue)) {
+               jbd_journal_commit_one(journal);
+       }
+}
+
 /**
  * @}
  */