ext4_journal: handle EXT4_FINCOM_RECOVER flag properly.
[lwext4.git] / lwext4 / ext4_journal.c
index 95bbd5a3260d8c3e56120a4a75c27937637935ea..ff14c537dde38296f1eadabeba0b9e45bda7b3ce 100644 (file)
@@ -359,16 +359,16 @@ jbd_write_block_tag(struct jbd_fs *jbd_fs,
                                             JBD_FEATURE_INCOMPAT_64BIT))
                        jbd_set32(tag, blocknr_high, tag_info->block >> 32);
 
-               if (!tag_info->uuid_exist) {
+               if (tag_info->uuid_exist) {
                        if (remain_buf_size - tag_bytes < UUID_SIZE)
                                return EINVAL;
 
                        uuid_start = (char *)tag + tag_bytes;
                        tag_info->tag_bytes += UUID_SIZE;
                        memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
+               } else
                        jbd_set32(tag, flags,
                                  jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
-               }
 
                if (tag_info->last_tag)
                        jbd_set32(tag, flags,
@@ -381,16 +381,16 @@ jbd_write_block_tag(struct jbd_fs *jbd_fs,
                                             JBD_FEATURE_INCOMPAT_64BIT))
                        jbd_set32(tag, blocknr_high, tag_info->block >> 32);
 
-               if (!tag_info->uuid_exist) {
+               if (tag_info->uuid_exist) {
                        if (remain_buf_size - tag_bytes < UUID_SIZE)
                                return EINVAL;
 
                        uuid_start = (char *)tag + tag_bytes;
                        tag_info->tag_bytes += UUID_SIZE;
                        memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
+               } else
                        jbd_set16(tag, flags,
                                  jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
-               }
 
                if (tag_info->last_tag)
                        jbd_set16(tag, flags,
@@ -410,7 +410,6 @@ jbd_iterate_block_table(struct jbd_fs *jbd_fs,
                                        void *arg),
                        void *arg)
 {
-       ext4_fsblk_t block = 0;
        char *tag_start, *tag_ptr;
        int tag_bytes = jbd_tag_bytes(jbd_fs);
        tag_start = __tag_start;
@@ -433,7 +432,7 @@ jbd_iterate_block_table(struct jbd_fs *jbd_fs,
                        break;
 
                if (func)
-                       func(jbd_fs, block, tag_info.uuid, arg);
+                       func(jbd_fs, tag_info.block, tag_info.uuid, arg);
 
                if (tag_info.last_tag)
                        break;
@@ -753,8 +752,17 @@ int jbd_recover(struct jbd_fs *jbd_fs)
 
        r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
        if (r == EOK) {
+               uint32_t features_incompatible =
+                       ext4_get32(&jbd_fs->inode_ref.fs->sb,
+                                  features_incompatible);
                jbd_set32(&jbd_fs->sb, start, 0);
+               features_incompatible &= ~EXT4_FINCOM_RECOVER;
+               ext4_set32(&jbd_fs->inode_ref.fs->sb,
+                          features_incompatible,
+                          features_incompatible);
                jbd_fs->dirty = true;
+               r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+                                 &jbd_fs->inode_ref.fs->sb);
        }
        jbd_destroy_revoke_tree(&info);
        return r;
@@ -763,7 +771,7 @@ int jbd_recover(struct jbd_fs *jbd_fs)
 void jbd_journal_write_sb(struct jbd_journal *journal)
 {
        struct jbd_fs *jbd_fs = journal->jbd_fs;
-       jbd_set32(&jbd_fs->sb, start, journal->first);
+       jbd_set32(&jbd_fs->sb, start, journal->start);
        jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
        jbd_fs->dirty = true;
 }
@@ -771,14 +779,29 @@ void jbd_journal_write_sb(struct jbd_journal *journal)
 int jbd_journal_start(struct jbd_fs *jbd_fs,
                      struct jbd_journal *journal)
 {
+       int r;
+       uint32_t features_incompatible =
+                       ext4_get32(&jbd_fs->inode_ref.fs->sb,
+                                  features_incompatible);
+       features_incompatible |= EXT4_FINCOM_RECOVER;
+       ext4_set32(&jbd_fs->inode_ref.fs->sb,
+                       features_incompatible,
+                       features_incompatible);
+       r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+                       &jbd_fs->inode_ref.fs->sb);
+       if (r != EOK)
+               return r;
+
        journal->first = jbd_get32(&jbd_fs->sb, first);
        journal->start = journal->first;
        journal->last = journal->first;
        journal->trans_id = 1;
+       journal->alloc_trans_id = 1;
 
        journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
 
        TAILQ_INIT(&journal->trans_queue);
+       TAILQ_INIT(&journal->cp_queue);
        journal->jbd_fs = jbd_fs;
        jbd_journal_write_sb(journal);
        return jbd_write_sb(jbd_fs);
@@ -786,15 +809,31 @@ int jbd_journal_start(struct jbd_fs *jbd_fs,
 
 int jbd_journal_stop(struct jbd_journal *journal)
 {
+       int r;
+       struct jbd_fs *jbd_fs = journal->jbd_fs;
+       uint32_t features_incompatible =
+                       ext4_get32(&jbd_fs->inode_ref.fs->sb,
+                                  features_incompatible);
+       features_incompatible &= ~EXT4_FINCOM_RECOVER;
+       ext4_set32(&jbd_fs->inode_ref.fs->sb,
+                       features_incompatible,
+                       features_incompatible);
+       r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
+                       &jbd_fs->inode_ref.fs->sb);
+       if (r != EOK)
+               return r;
+
        journal->start = 0;
        journal->trans_id = 0;
        jbd_journal_write_sb(journal);
        return jbd_write_sb(journal->jbd_fs);
 }
 
-static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal)
+static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
+                                       struct jbd_trans *trans)
 {
        uint32_t start_block = journal->last++;
+       trans->alloc_blocks++;
        wrap(&journal->jbd_fs->sb, journal->last);
        return start_block;
 }
@@ -813,16 +852,31 @@ jbd_journal_new_trans(struct jbd_journal *journal)
        return trans;
 }
 
+static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
+                         struct ext4_buf *buf __unused,
+                         int res,
+                         void *arg);
+
 int jbd_trans_add_block(struct jbd_trans *trans,
                        struct ext4_block *block)
 {
-       struct jbd_buf *buf = calloc(1, sizeof(struct jbd_buf));
+       struct jbd_buf *buf;
+       /* We do not need to add those unmodified buffer to
+        * a transaction. */
+       if (!ext4_bcache_test_flag(block->buf, BC_DIRTY))
+               return EOK;
+
+       buf = calloc(1, sizeof(struct jbd_buf));
        if (!buf)
                return ENOMEM;
 
        buf->trans = trans;
        buf->block = *block;
        ext4_bcache_inc_ref(block->buf);
+
+       block->buf->end_write = jbd_trans_end_write;
+       block->buf->end_write_arg = trans;
+
        trans->data_cnt++;
        LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
        return EOK;
@@ -842,13 +896,17 @@ int jbd_trans_revoke_block(struct jbd_trans *trans,
 }
 
 void jbd_journal_free_trans(struct jbd_journal *journal,
-                           struct jbd_trans *trans)
+                           struct jbd_trans *trans,
+                           bool abort)
 {
        struct jbd_buf *jbd_buf, *tmp;
        struct jbd_revoke_rec *rec, *tmp2;
+       struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
        LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
                          tmp) {
-               ext4_block_set(journal->jbd_fs->bdev, &jbd_buf->block);
+               if (abort)
+                       ext4_block_set(fs->bdev, &jbd_buf->block);
+
                LIST_REMOVE(jbd_buf, buf_node);
                free(jbd_buf);
        }
@@ -861,15 +919,6 @@ void jbd_journal_free_trans(struct jbd_journal *journal,
        free(trans);
 }
 
-static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
-                         struct ext4_buf *buf __unused,
-                         int res,
-                         void *arg)
-{
-       struct jbd_trans *trans = arg;
-       trans->error = res;
-}
-
 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
 {
        int rc;
@@ -878,16 +927,16 @@ static int jbd_trans_write_commit_block(struct jbd_trans *trans)
        struct ext4_block commit_block;
        struct jbd_journal *journal = trans->journal;
 
-       commit_iblock = jbd_journal_alloc_block(trans->journal);
+       commit_iblock = jbd_journal_alloc_block(journal, trans);
        rc = jbd_block_get_noread(journal->jbd_fs,
                        &commit_block, commit_iblock);
        if (rc != EOK)
                return rc;
 
        header = (struct jbd_commit_header *)commit_block.data;
-       header->header.magic = JBD_MAGIC_NUMBER;
-       header->header.blocktype = JBD_COMMIT_BLOCK;
-       header->header.sequence = trans->trans_id;
+       jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
+       jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
+       jbd_set32(&header->header, sequence, trans->trans_id);
 
        ext4_bcache_set_dirty(commit_block.buf);
        rc = jbd_block_set(journal->jbd_fs, &commit_block);
@@ -914,24 +963,28 @@ static int jbd_journal_prepare(struct jbd_journal *journal,
 again:
                if (!desc_iblock) {
                        struct jbd_bhdr *bhdr;
-                       desc_iblock = jbd_journal_alloc_block(journal);
+                       desc_iblock = jbd_journal_alloc_block(journal, trans);
                        rc = jbd_block_get_noread(journal->jbd_fs,
                                           &desc_block, desc_iblock);
-                       if (!rc)
+                       if (rc != EOK)
                                break;
 
                        ext4_bcache_set_dirty(desc_block.buf);
 
                        bhdr = (struct jbd_bhdr *)desc_block.data;
-                       bhdr->magic = JBD_MAGIC_NUMBER;
-                       bhdr->blocktype = JBD_DESCRIPTOR_BLOCK;
-                       bhdr->sequence = trans->trans_id;
+                       jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
+                       jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
+                       jbd_set32(bhdr, sequence, trans->trans_id);
 
                        tag_start = (char *)(bhdr + 1);
                        tag_ptr = tag_start;
                        uuid_exist = true;
                        tag_tbl_size = journal->block_size -
                                sizeof(struct jbd_bhdr);
+
+                       if (!trans->start_iblock)
+                               trans->start_iblock = desc_iblock;
+
                }
                tag_info.block = jbd_buf->block.lb_id;
                tag_info.uuid_exist = uuid_exist;
@@ -952,7 +1005,7 @@ again:
                        goto again;
                }
 
-               data_iblock = jbd_journal_alloc_block(journal);
+               data_iblock = jbd_journal_alloc_block(journal, trans);
                rc = jbd_block_get_noread(journal->jbd_fs,
                                &data_block, data_iblock);
                if (rc != EOK)
@@ -1000,28 +1053,33 @@ jbd_journal_prepare_revoke(struct jbd_journal *journal,
 again:
                if (!desc_iblock) {
                        struct jbd_bhdr *bhdr;
-                       desc_iblock = jbd_journal_alloc_block(journal);
+                       desc_iblock = jbd_journal_alloc_block(journal, trans);
                        rc = jbd_block_get_noread(journal->jbd_fs,
                                           &desc_block, desc_iblock);
-                       if (!rc) {
+                       if (rc != EOK) {
                                break;
                        }
 
                        ext4_bcache_set_dirty(desc_block.buf);
 
                        bhdr = (struct jbd_bhdr *)desc_block.data;
-                       bhdr->magic = JBD_MAGIC_NUMBER;
-                       bhdr->blocktype = JBD_REVOKE_BLOCK;
-                       bhdr->sequence = trans->trans_id;
+                       jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
+                       jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
+                       jbd_set32(bhdr, sequence, trans->trans_id);
                        
                        header = (struct jbd_revoke_header *)bhdr;
                        blocks_entry = (char *)(header + 1);
                        tag_tbl_size = journal->block_size -
                                sizeof(struct jbd_revoke_header);
+
+                       if (!trans->start_iblock)
+                               trans->start_iblock = desc_iblock;
+
                }
 
                if (tag_tbl_size < record_len) {
-                       header->count = journal->block_size - tag_tbl_size;
+                       jbd_set32(header, count,
+                                 journal->block_size - tag_tbl_size);
                        jbd_block_set(journal->jbd_fs, &desc_block);
                        desc_iblock = 0;
                        header = NULL;
@@ -1043,7 +1101,8 @@ again:
        }
        if (rc == EOK && desc_iblock) {
                if (header != NULL)
-                       header->count = journal->block_size - tag_tbl_size;
+                       jbd_set32(header, count,
+                                 journal->block_size - tag_tbl_size);
 
                jbd_block_set(journal->jbd_fs, &desc_block);
        }
@@ -1060,64 +1119,94 @@ jbd_journal_submit_trans(struct jbd_journal *journal,
                          trans_node);
 }
 
+void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
+{
+       struct jbd_buf *jbd_buf, *tmp;
+       struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
+       LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
+                       tmp) {
+               ext4_block_set(fs->bdev, &jbd_buf->block);
+       }
+}
+
+static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
+                         struct ext4_buf *buf __unused,
+                         int res,
+                         void *arg)
+{
+       struct jbd_trans *trans = arg;
+       struct jbd_journal *journal = trans->journal;
+       if (res != EOK)
+               trans->error = res;
+
+       trans->written_cnt++;
+       if (trans->written_cnt == trans->data_cnt) {
+again:
+               TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
+               journal->start = trans->start_iblock +
+                                trans->alloc_blocks;
+               journal->trans_id = trans->trans_id + 1;
+               jbd_journal_write_sb(journal);
+               jbd_write_sb(journal->jbd_fs);
+               jbd_journal_free_trans(journal, trans, false);
+
+               if ((trans = TAILQ_FIRST(&journal->cp_queue))) {
+                       if (trans->data_cnt) {
+                               jbd_journal_cp_trans(journal, trans);
+                               return;
+                       }
+                       goto again;
+               }
+       }
+}
+
 /*
  * XXX: one should disable cache writeback first.
  */
-void
-jbd_journal_commit_to_disk(struct jbd_journal *journal)
+void jbd_journal_commit_one(struct jbd_journal *journal)
 {
-       int rc;
-       uint32_t last = journal->last,
-                trans_id = journal->trans_id,
-                start = journal->start;
-       struct jbd_trans *trans, *tmp;
-       TAILQ_FOREACH_SAFE(trans, &journal->trans_queue,
-                          trans_node,
-                          tmp) {
-               struct jbd_buf *jbd_buf;
+       int rc = EOK;
+       uint32_t last = journal->last;
+       struct jbd_trans *trans;
+       if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
                TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
 
-               trans->trans_id = trans_id + 1;
+               trans->trans_id = journal->alloc_trans_id;
                rc = jbd_journal_prepare(journal, trans);
-               if (rc != EOK) {
-                       journal->last = last;
-                       jbd_journal_free_trans(journal, trans);
-                       continue;
-               }
+               if (rc != EOK)
+                       goto Finish;
+
                rc = jbd_journal_prepare_revoke(journal, trans);
-               if (rc != EOK) {
-                       journal->last = last;
-                       jbd_journal_free_trans(journal, trans);
-                       continue;
-               }
-               rc = jbd_trans_write_commit_block(trans);
-               if (rc != EOK) {
-                       journal->last = last;
-                       jbd_journal_free_trans(journal, trans);
-                       continue;
-               }
-               LIST_FOREACH(jbd_buf, &trans->buf_list, buf_node) {
-                       struct ext4_block *block = &jbd_buf->block;
-                       block->buf->end_write = jbd_trans_end_write;
-                       block->buf->end_write_arg = trans;
-                       ext4_block_set(journal->jbd_fs->inode_ref.fs->bdev,
-                                       block);
-               }
-               if (trans->error != EOK) {
-                       journal->last = last;
-                       jbd_journal_free_trans(journal, trans);
-                       continue;
-               }
+               if (rc != EOK)
+                       goto Finish;
 
-               start = last;
-               trans_id++;
-               last = journal->last;
-               jbd_journal_free_trans(journal, trans);
+               rc = jbd_trans_write_commit_block(trans);
+               if (rc != EOK)
+                       goto Finish;
+
+               journal->alloc_trans_id++;
+               if (TAILQ_EMPTY(&journal->cp_queue)) {
+                       if (trans->data_cnt) {
+                               TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
+                                               trans_node);
+                               jbd_journal_cp_trans(journal, trans);
+                       } else {
+                               journal->start = trans->start_iblock +
+                                       trans->alloc_blocks;
+                               journal->trans_id = trans->trans_id + 1;
+                               jbd_journal_write_sb(journal);
+                               jbd_write_sb(journal->jbd_fs);
+                               jbd_journal_free_trans(journal, trans, false);
+                       }
+               } else
+                       TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
+                                       trans_node);
+       }
+Finish:
+       if (rc != EOK) {
+               journal->last = last;
+               jbd_journal_free_trans(journal, trans, true);
        }
-       
-       journal->start = start;
-       journal->trans_id = trans_id;
-       jbd_journal_write_sb(journal);
 }
 
 /**