ext4_journal: modify the first 4 bytes of the logged block when needed
[lwext4.git] / src / ext4_journal.c
index c68d5a14d81f9e018cc342475c1c5f691c1d7016..1d924a6668f4f4efbfb86800eeec8faeb4ae5e5a 100644 (file)
@@ -107,7 +107,7 @@ static inline int32_t
 trans_id_diff(uint32_t x, uint32_t y)
 {
        int32_t diff = x - y;
-       return (diff >= 0);
+       return diff;
 }
 
 static int
@@ -147,8 +147,8 @@ RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
 RB_GENERATE_INTERNAL(jbd_revoke_tree, jbd_revoke_rec, revoke_node,
                     jbd_revoke_rec_cmp, static inline)
 
-#define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
-#define jbd_free_revoke_entry(addr) free(addr)
+#define jbd_alloc_revoke_entry() ext4_calloc(1, sizeof(struct revoke_entry))
+#define jbd_free_revoke_entry(addr) ext4_free(addr)
 
 static int jbd_has_csum(struct jbd_sb *jbd_sb)
 {
@@ -619,6 +619,10 @@ struct tag_info {
        /**@brief  block number stored in this tag.*/
        ext4_fsblk_t block;
 
+       /**@brief  Is the first 4 bytes of block equals to
+        *         JBD_MAGIC_NUMBER? */
+       bool is_escape;
+
        /**@brief  whether UUID part exists or not.*/
        bool uuid_exist;
 
@@ -649,6 +653,7 @@ jbd_extract_block_tag(struct jbd_fs *jbd_fs,
        tag_info->tag_bytes = tag_bytes;
        tag_info->uuid_exist = false;
        tag_info->last_tag = false;
+       tag_info->is_escape = false;
 
        /* See whether it is possible to hold a valid block tag.*/
        if (remain_buf_size - tag_bytes < 0)
@@ -664,7 +669,7 @@ jbd_extract_block_tag(struct jbd_fs *jbd_fs,
                                 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
 
                if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
-                       tag_info->block = 0;
+                       tag_info->is_escape = true;
 
                if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
                        /* See whether it is possible to hold UUID part.*/
@@ -689,7 +694,7 @@ jbd_extract_block_tag(struct jbd_fs *jbd_fs,
                                 (uint64_t)jbd_get32(tag, blocknr_high) << 32;
 
                if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
-                       tag_info->block = 0;
+                       tag_info->is_escape = true;
 
                if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
                        /* See whether it is possible to hold UUID part.*/
@@ -756,6 +761,10 @@ jbd_write_block_tag(struct jbd_fs *jbd_fs,
                        jbd_set32(tag, flags,
                                  jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
 
+               if (tag_info->is_escape)
+                       jbd_set32(tag, flags,
+                                 jbd_get32(tag, flags) | JBD_FLAG_ESCAPE);
+
        } else {
                struct jbd_block_tag *tag = __tag;
                memset(tag, 0, sizeof(struct jbd_block_tag));
@@ -782,6 +791,11 @@ jbd_write_block_tag(struct jbd_fs *jbd_fs,
                        jbd_set16(tag, flags,
                                  jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
 
+
+               if (tag_info->is_escape)
+                       jbd_set16(tag, flags,
+                                 jbd_get16(tag, flags) | JBD_FLAG_ESCAPE);
+
        }
        return EOK;
 }
@@ -798,9 +812,8 @@ jbd_iterate_block_table(struct jbd_fs *jbd_fs,
                        void *__tag_start,
                        int32_t tag_tbl_size,
                        void (*func)(struct jbd_fs * jbd_fs,
-                                       ext4_fsblk_t block,
-                                       uint8_t *uuid,
-                                       void *arg),
+                                    struct tag_info *tag_info,
+                                    void *arg),
                        void *arg)
 {
        char *tag_start, *tag_ptr;
@@ -826,7 +839,7 @@ jbd_iterate_block_table(struct jbd_fs *jbd_fs,
                        break;
 
                if (func)
-                       func(jbd_fs, tag_info.block, tag_info.uuid, arg);
+                       func(jbd_fs, &tag_info, arg);
 
                /* Stop the iteration when we reach the last tag. */
                if (tag_info.last_tag)
@@ -838,16 +851,14 @@ jbd_iterate_block_table(struct jbd_fs *jbd_fs,
 }
 
 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
-                                  ext4_fsblk_t block,
-                                  uint8_t *uuid,
+                                  struct tag_info *tag_info,
                                   void *arg)
 {
        uint32_t *iblock = arg;
-       ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
+       ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", tag_info->block);
        (*iblock)++;
        wrap(&jbd_fs->sb, *iblock);
        (void)jbd_fs;
-       (void)uuid;
        return;
 }
 
@@ -863,10 +874,9 @@ jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
 
 /**@brief  Replay a block in a transaction.
  * @param  jbd_fs jbd filesystem
- * @param  block  block address to be replayed.*/
+ * @param  tag_info tag_info of the logged block.*/
 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
-                                 ext4_fsblk_t block,
-                                 uint8_t *uuid __unused,
+                                 struct tag_info *tag_info,
                                  void *__arg)
 {
        int r;
@@ -882,22 +892,22 @@ static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
 
        /* We replay this block only if the current transaction id
         * is equal or greater than that in revoke entry.*/
-       revoke_entry = jbd_revoke_entry_lookup(info, block);
+       revoke_entry = jbd_revoke_entry_lookup(info, tag_info->block);
        if (revoke_entry &&
-           trans_id_diff(arg->this_trans_id, revoke_entry->trans_id) < 0)
+           trans_id_diff(arg->this_trans_id, revoke_entry->trans_id) <= 0)
                return;
 
        ext4_dbg(DEBUG_JBD,
                 "Replaying block in block_tag: %" PRIu64 "\n",
-                block);
+                tag_info->block);
 
        r = jbd_block_get(jbd_fs, &journal_block, *this_block);
        if (r != EOK)
                return;
 
        /* We need special treatment for ext4 superblock. */
-       if (block) {
-               r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
+       if (tag_info->block) {
+               r = ext4_block_get_noread(fs->bdev, &ext4_block, tag_info->block);
                if (r != EOK) {
                        jbd_block_set(jbd_fs, &journal_block);
                        return;
@@ -907,6 +917,10 @@ static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
                        journal_block.data,
                        jbd_get32(&jbd_fs->sb, blocksize));
 
+               if (tag_info->is_escape)
+                       ((struct jbd_bhdr *)ext4_block.data)->magic =
+                                       to_be32(JBD_MAGIC_NUMBER);
+
                ext4_bcache_set_dirty(ext4_block.buf);
                ext4_block_set(fs->bdev, &ext4_block);
        } else {
@@ -1310,7 +1324,7 @@ static void jbd_journal_flush_trans(struct jbd_trans *trans)
        struct jbd_buf *jbd_buf, *tmp;
        struct jbd_journal *journal = trans->journal;
        struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
-       void *tmp_data = malloc(journal->block_size);
+       void *tmp_data = ext4_malloc(journal->block_size);
        ext4_assert(tmp_data);
 
        TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
@@ -1340,7 +1354,7 @@ static void jbd_journal_flush_trans(struct jbd_trans *trans)
                        ext4_block_set(fs->bdev, &block);
        }
 
-       free(tmp_data);
+       ext4_free(tmp_data);
 }
 
 static void
@@ -1496,7 +1510,7 @@ jbd_trans_insert_block_rec(struct jbd_trans *trans,
                jbd_trans_change_ownership(block_rec, trans);
                return block_rec;
        }
-       block_rec = calloc(1, sizeof(struct jbd_block_rec));
+       block_rec = ext4_calloc(1, sizeof(struct jbd_block_rec));
        if (!block_rec)
                return NULL;
 
@@ -1587,7 +1601,7 @@ jbd_trans_remove_block_rec(struct jbd_journal *journal,
                RB_REMOVE(jbd_block,
                                &journal->block_rec_root,
                                block_rec);
-               free(block_rec);
+               ext4_free(block_rec);
        }
 }
 
@@ -1609,13 +1623,13 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans,
                if (jbd_buf && jbd_buf->trans == trans)
                        return EOK;
        }
-       jbd_buf = calloc(1, sizeof(struct jbd_buf));
+       jbd_buf = ext4_calloc(1, sizeof(struct jbd_buf));
        if (!jbd_buf)
                return ENOMEM;
 
        if ((block_rec = jbd_trans_insert_block_rec(trans,
                                        block->lb_id)) == NULL) {
-               free(jbd_buf);
+               ext4_free(jbd_buf);
                return ENOMEM;
        }
 
@@ -1640,9 +1654,11 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans,
        rec = RB_FIND(jbd_revoke_tree,
                        &trans->revoke_root,
                        &tmp_rec);
-       if (rec)
+       if (rec) {
                RB_REMOVE(jbd_revoke_tree, &trans->revoke_root,
                          rec);
+               ext4_free(rec);
+       }
 
        return EOK;
 }
@@ -1654,8 +1670,16 @@ int jbd_trans_set_block_dirty(struct jbd_trans *trans,
 int jbd_trans_revoke_block(struct jbd_trans *trans,
                           ext4_fsblk_t lba)
 {
-       struct jbd_revoke_rec *rec =
-               calloc(1, sizeof(struct jbd_revoke_rec));
+       struct jbd_revoke_rec tmp_rec = {
+               .lba = lba
+       }, *rec;
+       rec = RB_FIND(jbd_revoke_tree,
+                     &trans->revoke_root,
+                     &tmp_rec);
+       if (rec)
+               return EOK;
+
+       rec = ext4_calloc(1, sizeof(struct jbd_revoke_rec));
        if (!rec)
                return ENOMEM;
 
@@ -1726,19 +1750,19 @@ void jbd_journal_free_trans(struct jbd_journal *journal,
                                abort,
                                false);
                TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
-               free(jbd_buf);
+               ext4_free(jbd_buf);
        }
        RB_FOREACH_SAFE(rec, jbd_revoke_tree, &trans->revoke_root,
                          tmp2) {
                RB_REMOVE(jbd_revoke_tree, &trans->revoke_root, rec);
-               free(rec);
+               ext4_free(rec);
        }
        LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
                          tmp3) {
                jbd_trans_remove_block_rec(journal, block_rec, trans);
        }
 
-       free(trans);
+       ext4_free(trans);
 }
 
 /**@brief  Write commit block for a transaction
@@ -1753,7 +1777,25 @@ static int jbd_trans_write_commit_block(struct jbd_trans *trans)
        struct jbd_journal *journal = trans->journal;
 
        commit_iblock = jbd_journal_alloc_block(journal, trans);
-       rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
+       orig_commit_iblock = commit_iblock;
+       commit_iblock++;
+       wrap(&journal->jbd_fs->sb, commit_iblock);
+
+       /* To prevent accidental reference to stale journalling metadata. */
+       if (orig_commit_iblock < commit_iblock) {
+               rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
+               if (rc != EOK)
+                       return rc;
+
+               memset(block.data, 0, journal->block_size);
+               ext4_bcache_set_dirty(block.buf);
+               ext4_bcache_set_flag(block.buf, BC_TMP);
+               rc = jbd_block_set(journal->jbd_fs, &block);
+               if (rc != EOK)
+                       return rc;
+       }
+
+       rc = jbd_block_get_noread(journal->jbd_fs, &block, orig_commit_iblock);
        if (rc != EOK)
                return rc;
 
@@ -1772,25 +1814,6 @@ static int jbd_trans_write_commit_block(struct jbd_trans *trans)
        ext4_bcache_set_dirty(block.buf);
        ext4_bcache_set_flag(block.buf, BC_TMP);
        rc = jbd_block_set(journal->jbd_fs, &block);
-       if (rc != EOK)
-               return rc;
-
-       orig_commit_iblock = commit_iblock;
-       commit_iblock++;
-       wrap(&journal->jbd_fs->sb, commit_iblock);
-
-       /* To prevent accidental reference to stale journalling metadata. */
-       if (orig_commit_iblock < commit_iblock) {
-               rc = jbd_block_get_noread(journal->jbd_fs, &block, commit_iblock);
-               if (rc != EOK)
-                       return rc;
-
-               memset(block.data, 0, journal->block_size);
-               ext4_bcache_set_dirty(block.buf);
-               ext4_bcache_set_flag(block.buf, BC_TMP);
-               rc = jbd_block_set(journal->jbd_fs, &block);
-       }
-
        return rc;
 }
 
@@ -1845,12 +1868,13 @@ static int jbd_journal_prepare(struct jbd_journal *journal,
 
                ext4_block_set(fs->bdev, &jbd_buf->block);
                TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
-               free(jbd_buf);
+               ext4_free(jbd_buf);
        }
 
        TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
                struct tag_info tag_info;
                bool uuid_exist = false;
+               bool is_escape = false;
                struct jbd_revoke_rec tmp_rec = {
                        .lba = jbd_buf->block_rec->lba
                };
@@ -1878,13 +1902,17 @@ static int jbd_journal_prepare(struct jbd_journal *journal,
 
                        ext4_block_set(fs->bdev, &jbd_buf->block);
                        TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
-                       free(jbd_buf);
+                       ext4_free(jbd_buf);
                        continue;
                }
                checksum = jbd_block_csum(journal->jbd_fs,
                                          jbd_buf->block.data,
                                          checksum,
                                          trans->trans_id);
+               if (((struct jbd_bhdr *)jbd_buf->block.data)->magic ==
+                               to_be32(JBD_MAGIC_NUMBER))
+                       is_escape = true;
+
 again:
                if (!desc_iblock) {
                        desc_iblock = jbd_journal_alloc_block(journal, trans);
@@ -1914,6 +1942,7 @@ again:
                }
                tag_info.block = jbd_buf->block.lb_id;
                tag_info.uuid_exist = uuid_exist;
+               tag_info.is_escape = is_escape;
                if (i == trans->data_cnt - 1)
                        tag_info.last_tag = true;
                else
@@ -1951,6 +1980,9 @@ again:
                data = data_block.data;
                memcpy(data, jbd_buf->block.data,
                        journal->block_size);
+               if (is_escape)
+                       ((struct jbd_bhdr *)data)->magic = 0;
+
                ext4_bcache_set_dirty(data_block.buf);
                ext4_bcache_set_flag(data_block.buf, BC_TMP);
                rc = jbd_block_set(journal->jbd_fs, &data_block);
@@ -2114,7 +2146,7 @@ static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
                buf->end_write_arg = NULL;
        }
 
-       free(jbd_buf);
+       ext4_free(jbd_buf);
 
        trans->written_cnt++;
        if (trans->written_cnt == trans->data_cnt) {
@@ -2248,7 +2280,7 @@ struct jbd_trans *
 jbd_journal_new_trans(struct jbd_journal *journal)
 {
        struct jbd_trans *trans = NULL;
-       trans = calloc(1, sizeof(struct jbd_trans));
+       trans = ext4_calloc(1, sizeof(struct jbd_trans));
        if (!trans)
                return NULL;