ext4_journal: fix mistreating an empty journal as a bad one.
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32.h"
46 #include "ext4_debug.h"
47
48 #include <string.h>
49 #include <stdlib.h>
50
51 /**@brief  Revoke entry during journal replay.*/
52 struct revoke_entry {
53         /**@brief  Block number not to be replayed.*/
54         ext4_fsblk_t block;
55
56         /**@brief  For any transaction id smaller
57          *         than trans_id, records of @block
58          *         in those transactions should not
59          *         be replayed.*/
60         uint32_t trans_id;
61
62         /**@brief  Revoke tree node.*/
63         RB_ENTRY(revoke_entry) revoke_node;
64 };
65
66 /**@brief  Valid journal replay information.*/
67 struct recover_info {
68         /**@brief  Starting transaction id.*/
69         uint32_t start_trans_id;
70
71         /**@brief  Ending transaction id.*/
72         uint32_t last_trans_id;
73
74         /**@brief  Used as internal argument.*/
75         uint32_t this_trans_id;
76
77         /**@brief  No of transactions went through.*/
78         uint32_t trans_cnt;
79
80         /**@brief  RB-Tree storing revoke entries.*/
81         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
82 };
83
84 /**@brief  Journal replay internal arguments.*/
85 struct replay_arg {
86         /**@brief  Journal replay information.*/
87         struct recover_info *info;
88
89         /**@brief  Current block we are on.*/
90         uint32_t *this_block;
91
92         /**@brief  Current trans_id we are on.*/
93         uint32_t this_trans_id;
94 };
95
96 static int
97 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
98 {
99         if (a->block > b->block)
100                 return 1;
101         else if (a->block < b->block)
102                 return -1;
103         return 0;
104 }
105
106 static int
107 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
108 {
109         if (a->lba > b->lba)
110                 return 1;
111         else if (a->lba < b->lba)
112                 return -1;
113         return 0;
114 }
115
116 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
117                      jbd_revoke_entry_cmp, static inline)
118 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
119                      jbd_block_rec_cmp, static inline)
120
121 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
122 #define jbd_free_revoke_entry(addr) free(addr)
123
124 static int jbd_has_csum(struct jbd_sb *jbd_sb)
125 {
126         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
127                 return 2;
128
129         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
130                 return 3;
131
132         return 0;
133 }
134
135 #if CONFIG_META_CSUM_ENABLE
136 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
137 {
138         uint32_t checksum = 0;
139
140         if (jbd_has_csum(jbd_sb)) {
141                 uint32_t orig_checksum = jbd_sb->checksum;
142                 jbd_set32(jbd_sb, checksum, 0);
143                 /* Calculate crc32c checksum against tho whole superblock */
144                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
145                                 JBD_SUPERBLOCK_SIZE);
146                 jbd_sb->checksum = orig_checksum;
147         }
148         return checksum;
149 }
150 #else
151 #define jbd_sb_csum(...) 0
152 #endif
153
154 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
155 {
156         if (!jbd_has_csum(jbd_sb))
157                 return;
158
159         jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
160 }
161
162 #if CONFIG_META_CSUM_ENABLE
163 static bool
164 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
165 {
166         if (!jbd_has_csum(jbd_sb))
167                 return true;
168
169         return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
170 }
171 #else
172 #define jbd_verify_sb_csum(...) true
173 #endif
174
175 #if CONFIG_META_CSUM_ENABLE
176 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
177                               struct jbd_bhdr *bhdr)
178 {
179         uint32_t checksum = 0;
180
181         if (jbd_has_csum(&jbd_fs->sb)) {
182                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
183                 struct jbd_block_tail *tail =
184                         (struct jbd_block_tail *)((char *)bhdr + block_size -
185                                 sizeof(struct jbd_block_tail));
186                 uint32_t orig_checksum = tail->checksum;
187                 tail->checksum = 0;
188
189                 /* First calculate crc32c checksum against fs uuid */
190                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
191                                        sizeof(jbd_fs->sb.uuid));
192                 /* Calculate crc32c checksum against tho whole block */
193                 checksum = ext4_crc32c(checksum, bhdr,
194                                 block_size);
195                 tail->checksum = orig_checksum;
196         }
197         return checksum;
198 }
199 #else
200 #define jbd_meta_csum(...) 0
201 #endif
202
203 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
204                               struct jbd_bhdr *bhdr)
205 {
206         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
207         struct jbd_block_tail *tail = (struct jbd_block_tail *)
208                                 ((char *)bhdr + block_size -
209                                 sizeof(struct jbd_block_tail));
210         if (!jbd_has_csum(&jbd_fs->sb))
211                 return;
212
213         tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
214 }
215
216 #if CONFIG_META_CSUM_ENABLE
217 static bool
218 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
219                      struct jbd_bhdr *bhdr)
220 {
221         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
222         struct jbd_block_tail *tail = (struct jbd_block_tail *)
223                                 ((char *)bhdr + block_size -
224                                 sizeof(struct jbd_block_tail));
225         if (!jbd_has_csum(&jbd_fs->sb))
226                 return true;
227
228         return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
229 }
230 #else
231 #define jbd_verify_meta_csum(...) true
232 #endif
233
234 #if CONFIG_META_CSUM_ENABLE
235 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
236                               struct jbd_commit_header *header)
237 {
238         uint32_t checksum = 0;
239
240         if (jbd_has_csum(&jbd_fs->sb)) {
241                 uint32_t orig_checksum_type = header->chksum_type,
242                          orig_checksum_size = header->chksum_size,
243                          orig_checksum = header->chksum[0];
244                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
245                 header->chksum_type = 0;
246                 header->chksum_size = 0;
247                 header->chksum[0] = 0;
248
249                 /* First calculate crc32c checksum against fs uuid */
250                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
251                                        sizeof(jbd_fs->sb.uuid));
252                 /* Calculate crc32c checksum against tho whole block */
253                 checksum = ext4_crc32c(checksum, header,
254                                 block_size);
255
256                 header->chksum_type = orig_checksum_type;
257                 header->chksum_size = orig_checksum_size;
258                 header->chksum[0] = orig_checksum;
259         }
260         return checksum;
261 }
262 #else
263 #define jbd_commit_csum(...) 0
264 #endif
265
266 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
267                               struct jbd_commit_header *header)
268 {
269         if (!jbd_has_csum(&jbd_fs->sb))
270                 return;
271
272         header->chksum_type = 0;
273         header->chksum_size = 0;
274         header->chksum[0] = jbd_commit_csum(jbd_fs, header);
275 }
276
277 #if CONFIG_META_CSUM_ENABLE
278 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
279                                    struct jbd_commit_header *header)
280 {
281         if (!jbd_has_csum(&jbd_fs->sb))
282                 return true;
283
284         return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
285                                             header));
286 }
287 #else
288 #define jbd_verify_commit_csum(...) true
289 #endif
290
291 #if CONFIG_META_CSUM_ENABLE
292 /*
293  * NOTE: We only make use of @csum parameter when
294  *       JBD_FEATURE_COMPAT_CHECKSUM is enabled.
295  */
296 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
297                                uint32_t csum,
298                                uint32_t sequence)
299 {
300         uint32_t checksum = 0;
301
302         if (jbd_has_csum(&jbd_fs->sb)) {
303                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
304                 /* First calculate crc32c checksum against fs uuid */
305                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
306                                        sizeof(jbd_fs->sb.uuid));
307                 /* Then calculate crc32c checksum against sequence no. */
308                 checksum = ext4_crc32c(checksum, &sequence,
309                                 sizeof(uint32_t));
310                 /* Calculate crc32c checksum against tho whole block */
311                 checksum = ext4_crc32c(checksum, buf,
312                                 block_size);
313         } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
314                                      JBD_FEATURE_COMPAT_CHECKSUM)) {
315                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
316                 /* Calculate crc32c checksum against tho whole block */
317                 checksum = ext4_crc32(csum, buf,
318                                 block_size);
319         }
320         return checksum;
321 }
322 #else
323 #define jbd_block_csum(...) 0
324 #endif
325
326 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
327                                    uint32_t checksum)
328 {
329         int ver = jbd_has_csum(&jbd_fs->sb);
330         if (!ver)
331                 return;
332
333         if (ver == 2) {
334                 struct jbd_block_tag *tag = __tag;
335                 tag->checksum = (uint16_t)to_be32(checksum);
336         } else {
337                 struct jbd_block_tag3 *tag = __tag;
338                 tag->checksum = to_be32(checksum);
339         }
340 }
341
342 /**@brief  Write jbd superblock to disk.
343  * @param  jbd_fs jbd filesystem
344  * @param  s jbd superblock
345  * @return standard error code*/
346 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
347 {
348         int rc;
349         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
350         uint64_t offset;
351         ext4_fsblk_t fblock;
352         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
353         if (rc != EOK)
354                 return rc;
355
356         jbd_sb_csum_set(s);
357         offset = fblock * ext4_sb_get_block_size(&fs->sb);
358         return ext4_block_writebytes(fs->bdev, offset, s,
359                                      EXT4_SUPERBLOCK_SIZE);
360 }
361
362 /**@brief  Read jbd superblock from disk.
363  * @param  jbd_fs jbd filesystem
364  * @param  s jbd superblock
365  * @return standard error code*/
366 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
367 {
368         int rc;
369         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
370         uint64_t offset;
371         ext4_fsblk_t fblock;
372         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
373         if (rc != EOK)
374                 return rc;
375
376         offset = fblock * ext4_sb_get_block_size(&fs->sb);
377         return ext4_block_readbytes(fs->bdev, offset, s,
378                                     EXT4_SUPERBLOCK_SIZE);
379 }
380
381 /**@brief  Verify jbd superblock.
382  * @param  sb jbd superblock
383  * @return true if jbd superblock is valid */
384 static bool jbd_verify_sb(struct jbd_sb *sb)
385 {
386         struct jbd_bhdr *header = &sb->header;
387         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
388                 return false;
389
390         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
391             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
392                 return false;
393
394         return jbd_verify_sb_csum(sb);
395 }
396
397 /**@brief  Write back dirty jbd superblock to disk.
398  * @param  jbd_fs jbd filesystem
399  * @return standard error code*/
400 static int jbd_write_sb(struct jbd_fs *jbd_fs)
401 {
402         int rc = EOK;
403         if (jbd_fs->dirty) {
404                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
405                 if (rc != EOK)
406                         return rc;
407
408                 jbd_fs->dirty = false;
409         }
410         return rc;
411 }
412
413 /**@brief  Get reference to jbd filesystem.
414  * @param  fs Filesystem to load journal of
415  * @param  jbd_fs jbd filesystem
416  * @return standard error code*/
417 int jbd_get_fs(struct ext4_fs *fs,
418                struct jbd_fs *jbd_fs)
419 {
420         int rc;
421         uint32_t journal_ino;
422
423         memset(jbd_fs, 0, sizeof(struct jbd_fs));
424         /* See if there is journal inode on this filesystem.*/
425         /* FIXME: detection on existance ofbkejournal bdev is
426          *        missing.*/
427         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
428
429         rc = ext4_fs_get_inode_ref(fs,
430                                    journal_ino,
431                                    &jbd_fs->inode_ref);
432         if (rc != EOK) {
433                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
434                 return rc;
435         }
436         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
437         if (rc != EOK) {
438                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
439                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
440                 return rc;
441         }
442         if (!jbd_verify_sb(&jbd_fs->sb)) {
443                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
444                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
445                 rc = EIO;
446         }
447
448         return rc;
449 }
450
451 /**@brief  Put reference of jbd filesystem.
452  * @param  jbd_fs jbd filesystem
453  * @return standard error code*/
454 int jbd_put_fs(struct jbd_fs *jbd_fs)
455 {
456         int rc = EOK;
457         rc = jbd_write_sb(jbd_fs);
458
459         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
460         return rc;
461 }
462
463 /**@brief  Data block lookup helper.
464  * @param  jbd_fs jbd filesystem
465  * @param  iblock block index
466  * @param  fblock logical block address
467  * @return standard error code*/
468 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
469                    ext4_lblk_t iblock,
470                    ext4_fsblk_t *fblock)
471 {
472         int rc = ext4_fs_get_inode_dblk_idx(
473                         &jbd_fs->inode_ref,
474                         iblock,
475                         fblock,
476                         false);
477         return rc;
478 }
479
480 /**@brief   jbd block get function (through cache).
481  * @param   jbd_fs jbd filesystem
482  * @param   block block descriptor
483  * @param   fblock jbd logical block address
484  * @return  standard error code*/
485 static int jbd_block_get(struct jbd_fs *jbd_fs,
486                   struct ext4_block *block,
487                   ext4_fsblk_t fblock)
488 {
489         /* TODO: journal device. */
490         int rc;
491         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
492
493         /* Lookup the logical block address of
494          * fblock.*/
495         rc = jbd_inode_bmap(jbd_fs, iblock,
496                             &fblock);
497         if (rc != EOK)
498                 return rc;
499
500         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
501         rc = ext4_block_get(bdev, block, fblock);
502
503         /* If succeeded, mark buffer as BC_FLUSH to indicate
504          * that data should be written to disk immediately.*/
505         if (rc == EOK) {
506                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
507                 /* As we don't want to occupy too much space
508                  * in block cache, we set this buffer BC_TMP.*/
509                 ext4_bcache_set_flag(block->buf, BC_TMP);
510         }
511
512         return rc;
513 }
514
515 /**@brief   jbd block get function (through cache, don't read).
516  * @param   jbd_fs jbd filesystem
517  * @param   block block descriptor
518  * @param   fblock jbd logical block address
519  * @return  standard error code*/
520 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
521                          struct ext4_block *block,
522                          ext4_fsblk_t fblock)
523 {
524         /* TODO: journal device. */
525         int rc;
526         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
527         rc = jbd_inode_bmap(jbd_fs, iblock,
528                             &fblock);
529         if (rc != EOK)
530                 return rc;
531
532         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
533         rc = ext4_block_get_noread(bdev, block, fblock);
534         if (rc == EOK)
535                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
536
537         return rc;
538 }
539
540 /**@brief   jbd block set procedure (through cache).
541  * @param   jbd_fs jbd filesystem
542  * @param   block block descriptor
543  * @return  standard error code*/
544 static int jbd_block_set(struct jbd_fs *jbd_fs,
545                   struct ext4_block *block)
546 {
547         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
548                               block);
549 }
550
551 /**@brief  helper functions to calculate
552  *         block tag size, not including UUID part.
553  * @param  jbd_fs jbd filesystem
554  * @return tag size in bytes*/
555 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
556 {
557         int size;
558
559         /* It is very easy to deal with the case which
560          * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
561         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
562                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
563                 return sizeof(struct jbd_block_tag3);
564
565         size = sizeof(struct jbd_block_tag);
566
567         /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
568          * add 2 bytes to size.*/
569         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
570                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
571                 size += sizeof(uint16_t);
572
573         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
574                                      JBD_FEATURE_INCOMPAT_64BIT))
575                 return size;
576
577         /* If block number is 4 bytes in size,
578          * minus 4 bytes from size */
579         return size - sizeof(uint32_t);
580 }
581
582 /**@brief  Tag information. */
583 struct tag_info {
584         /**@brief  Tag size in bytes, including UUID part.*/
585         int tag_bytes;
586
587         /**@brief  block number stored in this tag.*/
588         ext4_fsblk_t block;
589
590         /**@brief  whether UUID part exists or not.*/
591         bool uuid_exist;
592
593         /**@brief  UUID content if UUID part exists.*/
594         uint8_t uuid[UUID_SIZE];
595
596         /**@brief  Is this the last tag? */
597         bool last_tag;
598
599         /**@brief  crc32c checksum. */
600         uint32_t checksum;
601 };
602
603 /**@brief  Extract information from a block tag.
604  * @param  __tag pointer to the block tag
605  * @param  tag_bytes block tag size of this jbd filesystem
606  * @param  remaining size in buffer containing the block tag
607  * @param  tag_info information of this tag.
608  * @return  EOK when succeed, otherwise return EINVAL.*/
609 static int
610 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
611                       void *__tag,
612                       int tag_bytes,
613                       int32_t remain_buf_size,
614                       struct tag_info *tag_info)
615 {
616         char *uuid_start;
617         tag_info->tag_bytes = tag_bytes;
618         tag_info->uuid_exist = false;
619         tag_info->last_tag = false;
620
621         /* See whether it is possible to hold a valid block tag.*/
622         if (remain_buf_size - tag_bytes < 0)
623                 return EINVAL;
624
625         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
626                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
627                 struct jbd_block_tag3 *tag = __tag;
628                 tag_info->block = jbd_get32(tag, blocknr);
629                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
630                                              JBD_FEATURE_INCOMPAT_64BIT))
631                          tag_info->block |=
632                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
633
634                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
635                         tag_info->block = 0;
636
637                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
638                         /* See whether it is possible to hold UUID part.*/
639                         if (remain_buf_size - tag_bytes < UUID_SIZE)
640                                 return EINVAL;
641
642                         uuid_start = (char *)tag + tag_bytes;
643                         tag_info->uuid_exist = true;
644                         tag_info->tag_bytes += UUID_SIZE;
645                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
646                 }
647
648                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
649                         tag_info->last_tag = true;
650
651         } else {
652                 struct jbd_block_tag *tag = __tag;
653                 tag_info->block = jbd_get32(tag, blocknr);
654                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
655                                              JBD_FEATURE_INCOMPAT_64BIT))
656                          tag_info->block |=
657                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
658
659                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
660                         tag_info->block = 0;
661
662                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
663                         /* See whether it is possible to hold UUID part.*/
664                         if (remain_buf_size - tag_bytes < UUID_SIZE)
665                                 return EINVAL;
666
667                         uuid_start = (char *)tag + tag_bytes;
668                         tag_info->uuid_exist = true;
669                         tag_info->tag_bytes += UUID_SIZE;
670                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
671                 }
672
673                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
674                         tag_info->last_tag = true;
675
676         }
677         return EOK;
678 }
679
680 /**@brief  Write information to a block tag.
681  * @param  __tag pointer to the block tag
682  * @param  remaining size in buffer containing the block tag
683  * @param  tag_info information of this tag.
684  * @return  EOK when succeed, otherwise return EINVAL.*/
685 static int
686 jbd_write_block_tag(struct jbd_fs *jbd_fs,
687                     void *__tag,
688                     int32_t remain_buf_size,
689                     struct tag_info *tag_info)
690 {
691         char *uuid_start;
692         int tag_bytes = jbd_tag_bytes(jbd_fs);
693
694         tag_info->tag_bytes = tag_bytes;
695
696         /* See whether it is possible to hold a valid block tag.*/
697         if (remain_buf_size - tag_bytes < 0)
698                 return EINVAL;
699
700         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
701                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
702                 struct jbd_block_tag3 *tag = __tag;
703                 memset(tag, 0, sizeof(struct jbd_block_tag3));
704                 jbd_set32(tag, blocknr, tag_info->block);
705                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
706                                              JBD_FEATURE_INCOMPAT_64BIT))
707                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
708
709                 if (tag_info->uuid_exist) {
710                         /* See whether it is possible to hold UUID part.*/
711                         if (remain_buf_size - tag_bytes < UUID_SIZE)
712                                 return EINVAL;
713
714                         uuid_start = (char *)tag + tag_bytes;
715                         tag_info->tag_bytes += UUID_SIZE;
716                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
717                 } else
718                         jbd_set32(tag, flags,
719                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
720
721                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
722
723                 if (tag_info->last_tag)
724                         jbd_set32(tag, flags,
725                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
726
727         } else {
728                 struct jbd_block_tag *tag = __tag;
729                 memset(tag, 0, sizeof(struct jbd_block_tag));
730                 jbd_set32(tag, blocknr, tag_info->block);
731                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
732                                              JBD_FEATURE_INCOMPAT_64BIT))
733                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
734
735                 if (tag_info->uuid_exist) {
736                         /* See whether it is possible to hold UUID part.*/
737                         if (remain_buf_size - tag_bytes < UUID_SIZE)
738                                 return EINVAL;
739
740                         uuid_start = (char *)tag + tag_bytes;
741                         tag_info->tag_bytes += UUID_SIZE;
742                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
743                 } else
744                         jbd_set16(tag, flags,
745                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
746
747                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
748
749                 if (tag_info->last_tag)
750                         jbd_set16(tag, flags,
751                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
752
753         }
754         return EOK;
755 }
756
757 /**@brief  Iterate all block tags in a block.
758  * @param  jbd_fs jbd filesystem
759  * @param  __tag_start pointer to the block
760  * @param  tag_tbl_size size of the block
761  * @param  func callback routine to indicate that
762  *         a block tag is found
763  * @param  arg additional argument to be passed to func */
764 static void
765 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
766                         void *__tag_start,
767                         int32_t tag_tbl_size,
768                         void (*func)(struct jbd_fs * jbd_fs,
769                                         ext4_fsblk_t block,
770                                         uint8_t *uuid,
771                                         void *arg),
772                         void *arg)
773 {
774         char *tag_start, *tag_ptr;
775         int tag_bytes = jbd_tag_bytes(jbd_fs);
776         tag_start = __tag_start;
777         tag_ptr = tag_start;
778
779         /* Cut off the size of block tail storing checksum. */
780         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
781                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
782             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
783                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
784                 tag_tbl_size -= sizeof(struct jbd_block_tail);
785
786         while (tag_tbl_size) {
787                 struct tag_info tag_info;
788                 int rc = jbd_extract_block_tag(jbd_fs,
789                                       tag_ptr,
790                                       tag_bytes,
791                                       tag_tbl_size,
792                                       &tag_info);
793                 if (rc != EOK)
794                         break;
795
796                 if (func)
797                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
798
799                 /* Stop the iteration when we reach the last tag. */
800                 if (tag_info.last_tag)
801                         break;
802
803                 tag_ptr += tag_info.tag_bytes;
804                 tag_tbl_size -= tag_info.tag_bytes;
805         }
806 }
807
808 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
809                                    ext4_fsblk_t block,
810                                    uint8_t *uuid,
811                                    void *arg)
812 {
813         uint32_t *iblock = arg;
814         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
815         (*iblock)++;
816         (void)jbd_fs;
817         (void)uuid;
818         return;
819 }
820
821 static struct revoke_entry *
822 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
823 {
824         struct revoke_entry tmp = {
825                 .block = block
826         };
827
828         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
829 }
830
831 /**@brief  Replay a block in a transaction.
832  * @param  jbd_fs jbd filesystem
833  * @param  block  block address to be replayed.*/
834 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
835                                   ext4_fsblk_t block,
836                                   uint8_t *uuid __unused,
837                                   void *__arg)
838 {
839         int r;
840         struct replay_arg *arg = __arg;
841         struct recover_info *info = arg->info;
842         uint32_t *this_block = arg->this_block;
843         struct revoke_entry *revoke_entry;
844         struct ext4_block journal_block, ext4_block;
845         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
846
847         (*this_block)++;
848
849         /* We replay this block only if the current transaction id
850          * is equal or greater than that in revoke entry.*/
851         revoke_entry = jbd_revoke_entry_lookup(info, block);
852         if (revoke_entry &&
853             arg->this_trans_id < revoke_entry->trans_id)
854                 return;
855
856         ext4_dbg(DEBUG_JBD,
857                  "Replaying block in block_tag: %" PRIu64 "\n",
858                  block);
859
860         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
861         if (r != EOK)
862                 return;
863
864         /* We need special treatment for ext4 superblock. */
865         if (block) {
866                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
867                 if (r != EOK) {
868                         jbd_block_set(jbd_fs, &journal_block);
869                         return;
870                 }
871
872                 memcpy(ext4_block.data,
873                         journal_block.data,
874                         jbd_get32(&jbd_fs->sb, blocksize));
875
876                 ext4_bcache_set_dirty(ext4_block.buf);
877                 ext4_block_set(fs->bdev, &ext4_block);
878         } else {
879                 uint16_t mount_count, state;
880                 mount_count = ext4_get16(&fs->sb, mount_count);
881                 state = ext4_get16(&fs->sb, state);
882
883                 memcpy(&fs->sb,
884                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
885                         EXT4_SUPERBLOCK_SIZE);
886
887                 /* Mark system as mounted */
888                 ext4_set16(&fs->sb, state, state);
889                 r = ext4_sb_write(fs->bdev, &fs->sb);
890                 if (r != EOK)
891                         return;
892
893                 /*Update mount count*/
894                 ext4_set16(&fs->sb, mount_count, mount_count);
895         }
896
897         jbd_block_set(jbd_fs, &journal_block);
898         
899         return;
900 }
901
902 /**@brief  Add block address to revoke tree, along with
903  *         its transaction id.
904  * @param  info  journal replay info
905  * @param  block  block address to be replayed.*/
906 static void jbd_add_revoke_block_tags(struct recover_info *info,
907                                       ext4_fsblk_t block)
908 {
909         struct revoke_entry *revoke_entry;
910
911         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
912         /* If the revoke entry with respect to the block address
913          * exists already, update its transaction id.*/
914         revoke_entry = jbd_revoke_entry_lookup(info, block);
915         if (revoke_entry) {
916                 revoke_entry->trans_id = info->this_trans_id;
917                 return;
918         }
919
920         revoke_entry = jbd_alloc_revoke_entry();
921         ext4_assert(revoke_entry);
922         revoke_entry->block = block;
923         revoke_entry->trans_id = info->this_trans_id;
924         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
925
926         return;
927 }
928
929 static void jbd_destroy_revoke_tree(struct recover_info *info)
930 {
931         while (!RB_EMPTY(&info->revoke_root)) {
932                 struct revoke_entry *revoke_entry =
933                         RB_MIN(jbd_revoke, &info->revoke_root);
934                 ext4_assert(revoke_entry);
935                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
936                 jbd_free_revoke_entry(revoke_entry);
937         }
938 }
939
940 /* Make sure we wrap around the log correctly! */
941 #define wrap(sb, var)                                           \
942 do {                                                                    \
943         if (var >= jbd_get32((sb), maxlen))                                     \
944                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
945 } while (0)
946
947 #define ACTION_SCAN 0
948 #define ACTION_REVOKE 1
949 #define ACTION_RECOVER 2
950
951 /**@brief  Add entries in a revoke block to revoke tree.
952  * @param  jbd_fs jbd filesystem
953  * @param  header revoke block header
954  * @param  recover_info  journal replay info*/
955 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
956                                   struct jbd_bhdr *header,
957                                   struct recover_info *info)
958 {
959         char *blocks_entry;
960         struct jbd_revoke_header *revoke_hdr =
961                 (struct jbd_revoke_header *)header;
962         uint32_t i, nr_entries, record_len = 4;
963
964         /* If we are working on a 64bit jbd filesystem, */
965         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
966                                      JBD_FEATURE_INCOMPAT_64BIT))
967                 record_len = 8;
968
969         nr_entries = (jbd_get32(revoke_hdr, count) -
970                         sizeof(struct jbd_revoke_header)) /
971                         record_len;
972
973         blocks_entry = (char *)(revoke_hdr + 1);
974
975         for (i = 0;i < nr_entries;i++) {
976                 if (record_len == 8) {
977                         uint64_t *blocks =
978                                 (uint64_t *)blocks_entry;
979                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
980                 } else {
981                         uint32_t *blocks =
982                                 (uint32_t *)blocks_entry;
983                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
984                 }
985                 blocks_entry += record_len;
986         }
987 }
988
989 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
990                                        struct jbd_bhdr *header,
991                                        uint32_t *iblock)
992 {
993         jbd_iterate_block_table(jbd_fs,
994                                 header + 1,
995                                 jbd_get32(&jbd_fs->sb, blocksize) -
996                                         sizeof(struct jbd_bhdr),
997                                 jbd_display_block_tags,
998                                 iblock);
999 }
1000
1001 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
1002                                         struct jbd_bhdr *header,
1003                                         struct replay_arg *arg)
1004 {
1005         jbd_iterate_block_table(jbd_fs,
1006                                 header + 1,
1007                                 jbd_get32(&jbd_fs->sb, blocksize) -
1008                                         sizeof(struct jbd_bhdr),
1009                                 jbd_replay_block_tags,
1010                                 arg);
1011 }
1012
1013 /**@brief  The core routine of journal replay.
1014  * @param  jbd_fs jbd filesystem
1015  * @param  recover_info  journal replay info
1016  * @param  action action needed to be taken
1017  * @return standard error code*/
1018 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1019                            struct recover_info *info,
1020                            int action)
1021 {
1022         int r = EOK;
1023         bool log_end = false;
1024         struct jbd_sb *sb = &jbd_fs->sb;
1025         uint32_t start_trans_id, this_trans_id;
1026         uint32_t start_block, this_block;
1027
1028         /* We start iterating valid blocks in the whole journal.*/
1029         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1030         start_block = this_block = jbd_get32(sb, start);
1031         if (action == ACTION_SCAN)
1032                 info->trans_cnt = 0;
1033         else if (!info->trans_cnt)
1034                 log_end = true;
1035
1036         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1037                             start_trans_id);
1038
1039         while (!log_end) {
1040                 struct ext4_block block;
1041                 struct jbd_bhdr *header;
1042                 /* If we are not scanning for the last
1043                  * valid transaction in the journal,
1044                  * we will stop when we reach the end of
1045                  * the journal.*/
1046                 if (action != ACTION_SCAN)
1047                         if (this_trans_id > info->last_trans_id) {
1048                                 log_end = true;
1049                                 continue;
1050                         }
1051
1052                 r = jbd_block_get(jbd_fs, &block, this_block);
1053                 if (r != EOK)
1054                         break;
1055
1056                 header = (struct jbd_bhdr *)block.data;
1057                 /* This block does not have a valid magic number,
1058                  * so we have reached the end of the journal.*/
1059                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1060                         jbd_block_set(jbd_fs, &block);
1061                         log_end = true;
1062                         continue;
1063                 }
1064
1065                 /* If the transaction id we found is not expected,
1066                  * we may have reached the end of the journal.
1067                  *
1068                  * If we are not scanning the journal, something
1069                  * bad might have taken place. :-( */
1070                 if (jbd_get32(header, sequence) != this_trans_id) {
1071                         if (action != ACTION_SCAN)
1072                                 r = EIO;
1073
1074                         jbd_block_set(jbd_fs, &block);
1075                         log_end = true;
1076                         continue;
1077                 }
1078
1079                 switch (jbd_get32(header, blocktype)) {
1080                 case JBD_DESCRIPTOR_BLOCK:
1081                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1082                                 ext4_dbg(DEBUG_JBD,
1083                                         DBG_WARN "Descriptor block checksum failed."
1084                                                 "Journal block: %" PRIu32"\n",
1085                                                 this_block);
1086                                 log_end = true;
1087                                 break;
1088                         }
1089                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1090                                             "trans_id: %" PRIu32"\n",
1091                                             this_block, this_trans_id);
1092                         if (action == ACTION_RECOVER) {
1093                                 struct replay_arg replay_arg;
1094                                 replay_arg.info = info;
1095                                 replay_arg.this_block = &this_block;
1096                                 replay_arg.this_trans_id = this_trans_id;
1097
1098                                 jbd_replay_descriptor_block(jbd_fs,
1099                                                 header, &replay_arg);
1100                         } else
1101                                 jbd_debug_descriptor_block(jbd_fs,
1102                                                 header, &this_block);
1103
1104                         break;
1105                 case JBD_COMMIT_BLOCK:
1106                         if (!jbd_verify_commit_csum(jbd_fs,
1107                                         (struct jbd_commit_header *)header)) {
1108                                 ext4_dbg(DEBUG_JBD,
1109                                         DBG_WARN "Commit block checksum failed."
1110                                                 "Journal block: %" PRIu32"\n",
1111                                                 this_block);
1112                                 log_end = true;
1113                                 break;
1114                         }
1115                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1116                                             "trans_id: %" PRIu32"\n",
1117                                             this_block, this_trans_id);
1118                         /* This is the end of a transaction,
1119                          * we may now proceed to the next transaction.
1120                          */
1121                         this_trans_id++;
1122                         info->trans_cnt++;
1123                         break;
1124                 case JBD_REVOKE_BLOCK:
1125                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1126                                 ext4_dbg(DEBUG_JBD,
1127                                         DBG_WARN "Revoke block checksum failed."
1128                                                 "Journal block: %" PRIu32"\n",
1129                                                 this_block);
1130                                 log_end = true;
1131                                 break;
1132                         }
1133                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1134                                             "trans_id: %" PRIu32"\n",
1135                                             this_block, this_trans_id);
1136                         if (action == ACTION_REVOKE) {
1137                                 info->this_trans_id = this_trans_id;
1138                                 jbd_build_revoke_tree(jbd_fs,
1139                                                 header, info);
1140                         }
1141                         break;
1142                 default:
1143                         log_end = true;
1144                         break;
1145                 }
1146                 jbd_block_set(jbd_fs, &block);
1147                 this_block++;
1148                 wrap(sb, this_block);
1149                 if (this_block == start_block)
1150                         log_end = true;
1151
1152         }
1153         ext4_dbg(DEBUG_JBD, "End of journal.\n");
1154         if (r == EOK && action == ACTION_SCAN) {
1155                 /* We have finished scanning the journal. */
1156                 info->start_trans_id = start_trans_id;
1157                 if (this_trans_id > start_trans_id)
1158                         info->last_trans_id = this_trans_id - 1;
1159                 else
1160                         info->last_trans_id = this_trans_id;
1161         }
1162
1163         return r;
1164 }
1165
1166 /**@brief  Replay journal.
1167  * @param  jbd_fs jbd filesystem
1168  * @return standard error code*/
1169 int jbd_recover(struct jbd_fs *jbd_fs)
1170 {
1171         int r;
1172         struct recover_info info;
1173         struct jbd_sb *sb = &jbd_fs->sb;
1174         if (!sb->start)
1175                 return EOK;
1176
1177         RB_INIT(&info.revoke_root);
1178
1179         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1180         if (r != EOK)
1181                 return r;
1182
1183         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1184         if (r != EOK)
1185                 return r;
1186
1187         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1188         if (r == EOK) {
1189                 /* If we successfully replay the journal,
1190                  * clear EXT4_FINCOM_RECOVER flag on the
1191                  * ext4 superblock, and set the start of
1192                  * journal to 0.*/
1193                 uint32_t features_incompatible =
1194                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1195                                    features_incompatible);
1196                 jbd_set32(&jbd_fs->sb, start, 0);
1197                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1198                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1199                            features_incompatible,
1200                            features_incompatible);
1201                 jbd_fs->dirty = true;
1202                 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1203                                   &jbd_fs->inode_ref.fs->sb);
1204         }
1205         jbd_destroy_revoke_tree(&info);
1206         return r;
1207 }
1208
1209 static void jbd_journal_write_sb(struct jbd_journal *journal)
1210 {
1211         struct jbd_fs *jbd_fs = journal->jbd_fs;
1212         jbd_set32(&jbd_fs->sb, start, journal->start);
1213         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1214         jbd_fs->dirty = true;
1215 }
1216
1217 /**@brief  Start accessing the journal.
1218  * @param  jbd_fs jbd filesystem
1219  * @param  journal current journal session
1220  * @return standard error code*/
1221 int jbd_journal_start(struct jbd_fs *jbd_fs,
1222                       struct jbd_journal *journal)
1223 {
1224         int r;
1225         uint32_t features_incompatible =
1226                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1227                                    features_incompatible);
1228         struct ext4_block block = EXT4_BLOCK_ZERO();
1229         features_incompatible |= EXT4_FINCOM_RECOVER;
1230         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1231                         features_incompatible,
1232                         features_incompatible);
1233         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1234                         &jbd_fs->inode_ref.fs->sb);
1235         if (r != EOK)
1236                 return r;
1237
1238         journal->first = jbd_get32(&jbd_fs->sb, first);
1239         journal->start = journal->first;
1240         journal->last = journal->first;
1241         journal->trans_id = 1;
1242         journal->alloc_trans_id = 1;
1243
1244         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1245
1246         r = jbd_block_get_noread(jbd_fs,
1247                          &block,
1248                          journal->start);
1249         if (r != EOK) {
1250                 memset(journal, 0, sizeof(struct jbd_journal));
1251                 return r;
1252         }
1253         memset(block.data, 0, journal->block_size);
1254         ext4_bcache_set_dirty(block.buf);
1255         r = jbd_block_set(jbd_fs, &block);
1256         if (r != EOK) {
1257                 memset(journal, 0, sizeof(struct jbd_journal));
1258                 return r;
1259         }
1260
1261         TAILQ_INIT(&journal->trans_queue);
1262         TAILQ_INIT(&journal->cp_queue);
1263         RB_INIT(&journal->block_rec_root);
1264         journal->jbd_fs = jbd_fs;
1265         jbd_journal_write_sb(journal);
1266         return jbd_write_sb(jbd_fs);
1267 }
1268
1269 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1270 {
1271         struct jbd_buf *jbd_buf, *tmp;
1272         struct jbd_journal *journal = trans->journal;
1273         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1274         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1275                         tmp) {
1276                 struct ext4_block block = jbd_buf->block;
1277                 ext4_block_flush_buf(fs->bdev, block.buf);
1278         }
1279 }
1280
1281 static void
1282 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1283                              struct jbd_trans *trans)
1284 {
1285         journal->start = trans->start_iblock +
1286                 trans->alloc_blocks;
1287         wrap(&journal->jbd_fs->sb, journal->start);
1288         journal->trans_id = trans->trans_id + 1;
1289         jbd_journal_free_trans(journal,
1290                         trans, false);
1291         jbd_journal_write_sb(journal);
1292 }
1293
1294 static void
1295 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1296                            bool flush)
1297 {
1298         struct jbd_trans *trans;
1299         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1300                 if (!trans->data_cnt) {
1301                         TAILQ_REMOVE(&journal->cp_queue,
1302                                         trans,
1303                                         trans_node);
1304                         jbd_journal_skip_pure_revoke(journal, trans);
1305                 } else {
1306                         if (trans->data_cnt ==
1307                                         trans->written_cnt) {
1308                                 journal->start =
1309                                         trans->start_iblock +
1310                                         trans->alloc_blocks;
1311                                 wrap(&journal->jbd_fs->sb,
1312                                                 journal->start);
1313                                 journal->trans_id =
1314                                         trans->trans_id + 1;
1315                                 TAILQ_REMOVE(&journal->cp_queue,
1316                                                 trans,
1317                                                 trans_node);
1318                                 jbd_journal_free_trans(journal,
1319                                                 trans,
1320                                                 false);
1321                                 jbd_journal_write_sb(journal);
1322                         } else if (!flush) {
1323                                 journal->start =
1324                                         trans->start_iblock;
1325                                 wrap(&journal->jbd_fs->sb,
1326                                                 journal->start);
1327                                 journal->trans_id =
1328                                         trans->trans_id;
1329                                 jbd_journal_write_sb(journal);
1330                                 break;
1331                         } else
1332                                 jbd_journal_flush_trans(trans);
1333                 }
1334         }
1335 }
1336
1337 /**@brief  Stop accessing the journal.
1338  * @param  journal current journal session
1339  * @return standard error code*/
1340 int jbd_journal_stop(struct jbd_journal *journal)
1341 {
1342         int r;
1343         struct jbd_fs *jbd_fs = journal->jbd_fs;
1344         uint32_t features_incompatible;
1345
1346         /* Commit all the transactions to the journal.*/
1347         jbd_journal_commit_all(journal);
1348
1349         /* Make sure that journalled content have reached
1350          * the disk.*/
1351         jbd_journal_purge_cp_trans(journal, true);
1352
1353         /* There should be no block record in this journal
1354          * session. */
1355         if (!RB_EMPTY(&journal->block_rec_root))
1356                 ext4_dbg(DEBUG_JBD,
1357                          DBG_WARN "There are still block records "
1358                                   "in this journal session!\n");
1359
1360         features_incompatible =
1361                 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1362                            features_incompatible);
1363         features_incompatible &= ~EXT4_FINCOM_RECOVER;
1364         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1365                         features_incompatible,
1366                         features_incompatible);
1367         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1368                         &jbd_fs->inode_ref.fs->sb);
1369         if (r != EOK)
1370                 return r;
1371
1372         journal->start = 0;
1373         journal->trans_id = 0;
1374         jbd_journal_write_sb(journal);
1375         return jbd_write_sb(journal->jbd_fs);
1376 }
1377
1378 /**@brief  Allocate a block in the journal.
1379  * @param  journal current journal session
1380  * @param  trans transaction
1381  * @return allocated block address*/
1382 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1383                                         struct jbd_trans *trans)
1384 {
1385         uint32_t start_block;
1386
1387         start_block = journal->last++;
1388         trans->alloc_blocks++;
1389         wrap(&journal->jbd_fs->sb, journal->last);
1390         
1391         /* If there is no space left, flush all journalled
1392          * blocks to disk first.*/
1393         if (journal->last == journal->start)
1394                 jbd_journal_purge_cp_trans(journal, true);
1395
1396         return start_block;
1397 }
1398
1399 /**@brief  Allocate a new transaction
1400  * @param  journal current journal session
1401  * @return transaction allocated*/
1402 struct jbd_trans *
1403 jbd_journal_new_trans(struct jbd_journal *journal)
1404 {
1405         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1406         if (!trans)
1407                 return NULL;
1408
1409         /* We will assign a trans_id to this transaction,
1410          * once it has been committed.*/
1411         trans->journal = journal;
1412         trans->data_csum = EXT4_CRC32_INIT;
1413         trans->error = EOK;
1414         TAILQ_INIT(&trans->buf_queue);
1415         return trans;
1416 }
1417
1418 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1419                           struct ext4_buf *buf __unused,
1420                           int res,
1421                           void *arg);
1422
1423 /**@brief  gain access to it before making any modications.
1424  * @param  journal current journal session
1425  * @param  trans transaction
1426  * @param  block descriptor
1427  * @return standard error code.*/
1428 int jbd_trans_get_access(struct jbd_journal *journal,
1429                          struct jbd_trans *trans,
1430                          struct ext4_block *block)
1431 {
1432         int r = EOK;
1433         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1434         struct jbd_buf *jbd_buf = block->buf->end_write_arg;
1435
1436         /* If the buffer has already been modified, we should
1437          * flush dirty data in this buffer to disk.*/
1438         if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1439             block->buf->end_write == jbd_trans_end_write) {
1440                 ext4_assert(jbd_buf);
1441                 if (jbd_buf->trans != trans)
1442                         r = ext4_block_flush_buf(fs->bdev, block->buf);
1443
1444         }
1445         return r;
1446 }
1447
1448 static struct jbd_block_rec *
1449 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1450                            ext4_fsblk_t lba)
1451 {
1452         struct jbd_block_rec tmp = {
1453                 .lba = lba
1454         };
1455
1456         return RB_FIND(jbd_block,
1457                        &journal->block_rec_root,
1458                        &tmp);
1459 }
1460
1461 static inline struct jbd_block_rec *
1462 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1463                            ext4_fsblk_t lba,
1464                            struct ext4_buf *buf)
1465 {
1466         struct jbd_block_rec *block_rec;
1467         block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1468         if (block_rec) {
1469                 LIST_REMOVE(block_rec, tbrec_node);
1470                 /* Data should be flushed to disk already. */
1471                 ext4_assert(!block_rec->buf);
1472                 /* Now this block record belongs to this transaction. */
1473                 LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1474                 block_rec->trans = trans;
1475                 return block_rec;
1476         }
1477         block_rec = calloc(1, sizeof(struct jbd_block_rec));
1478         if (!block_rec)
1479                 return NULL;
1480
1481         block_rec->lba = lba;
1482         block_rec->buf = buf;
1483         block_rec->trans = trans;
1484         LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1485         RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1486         return block_rec;
1487 }
1488
1489 static inline void
1490 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1491                            struct jbd_block_rec *block_rec,
1492                            struct jbd_trans *trans)
1493 {
1494         /* If this block record doesn't belong to this transaction,
1495          * give up.*/
1496         if (block_rec->trans == trans) {
1497                 LIST_REMOVE(block_rec, tbrec_node);
1498                 RB_REMOVE(jbd_block,
1499                                 &journal->block_rec_root,
1500                                 block_rec);
1501                 free(block_rec);
1502         }
1503 }
1504
1505 /**@brief  Add block to a transaction and mark it dirty.
1506  * @param  trans transaction
1507  * @param  block block descriptor
1508  * @return standard error code*/
1509 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1510                               struct ext4_block *block)
1511 {
1512         struct jbd_buf *buf;
1513
1514         if (!ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1515             block->buf->end_write != jbd_trans_end_write) {
1516                 struct jbd_block_rec *block_rec;
1517                 buf = calloc(1, sizeof(struct jbd_buf));
1518                 if (!buf)
1519                         return ENOMEM;
1520
1521                 if ((block_rec = jbd_trans_insert_block_rec(trans,
1522                                         block->lb_id,
1523                                         block->buf)) == NULL) {
1524                         free(buf);
1525                         return ENOMEM;
1526                 }
1527
1528                 buf->block_rec = block_rec;
1529                 buf->trans = trans;
1530                 buf->block = *block;
1531                 ext4_bcache_inc_ref(block->buf);
1532
1533                 /* If the content reach the disk, notify us
1534                  * so that we may do a checkpoint. */
1535                 block->buf->end_write = jbd_trans_end_write;
1536                 block->buf->end_write_arg = buf;
1537
1538                 trans->data_cnt++;
1539                 TAILQ_INSERT_HEAD(&trans->buf_queue, buf, buf_node);
1540
1541                 ext4_bcache_set_dirty(block->buf);
1542         }
1543         return EOK;
1544 }
1545
1546 /**@brief  Add block to be revoked to a transaction
1547  * @param  trans transaction
1548  * @param  lba logical block address
1549  * @return standard error code*/
1550 int jbd_trans_revoke_block(struct jbd_trans *trans,
1551                            ext4_fsblk_t lba)
1552 {
1553         struct jbd_revoke_rec *rec =
1554                 calloc(1, sizeof(struct jbd_revoke_rec));
1555         if (!rec)
1556                 return ENOMEM;
1557
1558         rec->lba = lba;
1559         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1560         return EOK;
1561 }
1562
1563 /**@brief  Try to add block to be revoked to a transaction.
1564  *         If @lba still remains in an transaction on checkpoint
1565  *         queue, add @lba as a revoked block to the transaction.
1566  * @param  trans transaction
1567  * @param  lba logical block address
1568  * @return standard error code*/
1569 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1570                                ext4_fsblk_t lba)
1571 {
1572         int r = EOK;
1573         struct jbd_journal *journal = trans->journal;
1574         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1575         struct jbd_block_rec *block_rec =
1576                 jbd_trans_block_rec_lookup(journal, lba);
1577
1578         /* Make sure we don't flush any buffers belong to this transaction. */
1579         if (block_rec && block_rec->trans != trans) {
1580                 /* If the buffer has not been flushed yet, flush it now. */
1581                 if (block_rec->buf) {
1582                         r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
1583                         if (r != EOK)
1584                                 return r;
1585
1586                 }
1587
1588                 jbd_trans_revoke_block(trans, lba);
1589         }
1590
1591         return EOK;
1592 }
1593
1594 /**@brief  Free a transaction
1595  * @param  journal current journal session
1596  * @param  trans transaction
1597  * @param  abort discard all the modifications on the block?
1598  * @return standard error code*/
1599 void jbd_journal_free_trans(struct jbd_journal *journal,
1600                             struct jbd_trans *trans,
1601                             bool abort)
1602 {
1603         struct jbd_buf *jbd_buf, *tmp;
1604         struct jbd_revoke_rec *rec, *tmp2;
1605         struct jbd_block_rec *block_rec, *tmp3;
1606         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1607         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1608                           tmp) {
1609                 if (abort) {
1610                         jbd_buf->block.buf->end_write = NULL;
1611                         jbd_buf->block.buf->end_write_arg = NULL;
1612                         ext4_bcache_clear_dirty(jbd_buf->block.buf);
1613                         ext4_block_set(fs->bdev, &jbd_buf->block);
1614                 }
1615
1616                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1617                 free(jbd_buf);
1618         }
1619         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1620                           tmp2) {
1621                 LIST_REMOVE(rec, revoke_node);
1622                 free(rec);
1623         }
1624         LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1625                           tmp3) {
1626                 jbd_trans_remove_block_rec(journal, block_rec, trans);
1627         }
1628
1629         free(trans);
1630 }
1631
1632 /**@brief  Write commit block for a transaction
1633  * @param  trans transaction
1634  * @return standard error code*/
1635 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1636 {
1637         int rc;
1638         struct jbd_commit_header *header;
1639         uint32_t commit_iblock = 0;
1640         struct ext4_block commit_block;
1641         struct jbd_journal *journal = trans->journal;
1642
1643         commit_iblock = jbd_journal_alloc_block(journal, trans);
1644         rc = jbd_block_get_noread(journal->jbd_fs,
1645                         &commit_block, commit_iblock);
1646         if (rc != EOK)
1647                 return rc;
1648
1649         header = (struct jbd_commit_header *)commit_block.data;
1650         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1651         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1652         jbd_set32(&header->header, sequence, trans->trans_id);
1653
1654         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1655                                 JBD_FEATURE_COMPAT_CHECKSUM)) {
1656                 jbd_set32(header, chksum_type, JBD_CRC32_CHKSUM);
1657                 jbd_set32(header, chksum_size, JBD_CRC32_CHKSUM_SIZE);
1658                 jbd_set32(header, chksum[0], trans->data_csum);
1659         }
1660         jbd_commit_csum_set(journal->jbd_fs, header);
1661         ext4_bcache_set_dirty(commit_block.buf);
1662         rc = jbd_block_set(journal->jbd_fs, &commit_block);
1663         if (rc != EOK)
1664                 return rc;
1665
1666         return EOK;
1667 }
1668
1669 /**@brief  Write descriptor block for a transaction
1670  * @param  journal current journal session
1671  * @param  trans transaction
1672  * @return standard error code*/
1673 static int jbd_journal_prepare(struct jbd_journal *journal,
1674                                struct jbd_trans *trans)
1675 {
1676         int rc = EOK, i = 0;
1677         int32_t tag_tbl_size;
1678         uint32_t desc_iblock = 0;
1679         uint32_t data_iblock = 0;
1680         char *tag_start = NULL, *tag_ptr = NULL;
1681         struct jbd_buf *jbd_buf, *tmp;
1682         struct ext4_block desc_block, data_block;
1683         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1684         uint32_t checksum = EXT4_CRC32_INIT;
1685
1686         /* Try to remove any non-dirty buffers from the tail of
1687          * buf_queue. */
1688         TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1689                         jbd_trans_buf, buf_node, tmp) {
1690                 /* We stop the iteration when we find a dirty buffer. */
1691                 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1692                                         BC_DIRTY))
1693                         break;
1694
1695                 /* The buffer has not been modified, just release
1696                  * that jbd_buf. */
1697                 jbd_trans_remove_block_rec(journal,
1698                                 jbd_buf->block_rec, trans);
1699                 trans->data_cnt--;
1700
1701                 jbd_buf->block.buf->end_write = NULL;
1702                 jbd_buf->block.buf->end_write_arg = NULL;
1703                 ext4_block_set(fs->bdev, &jbd_buf->block);
1704                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1705                 free(jbd_buf);
1706         }
1707
1708         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1709                 struct tag_info tag_info;
1710                 bool uuid_exist = false;
1711                 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1712                                            BC_DIRTY)) {
1713                         /* The buffer has not been modified, just release
1714                          * that jbd_buf. */
1715                         jbd_trans_remove_block_rec(journal,
1716                                         jbd_buf->block_rec, trans);
1717                         trans->data_cnt--;
1718
1719                         jbd_buf->block.buf->end_write = NULL;
1720                         jbd_buf->block.buf->end_write_arg = NULL;
1721                         ext4_block_set(fs->bdev, &jbd_buf->block);
1722                         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1723                         free(jbd_buf);
1724                         continue;
1725                 }
1726                 checksum = jbd_block_csum(journal->jbd_fs,
1727                                           jbd_buf->block.data,
1728                                           checksum,
1729                                           trans->trans_id);
1730 again:
1731                 if (!desc_iblock) {
1732                         struct jbd_bhdr *bhdr;
1733                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1734                         rc = jbd_block_get_noread(journal->jbd_fs,
1735                                            &desc_block, desc_iblock);
1736                         if (rc != EOK)
1737                                 break;
1738
1739                         ext4_bcache_set_dirty(desc_block.buf);
1740
1741                         bhdr = (struct jbd_bhdr *)desc_block.data;
1742                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1743                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1744                         jbd_set32(bhdr, sequence, trans->trans_id);
1745
1746                         tag_start = (char *)(bhdr + 1);
1747                         tag_ptr = tag_start;
1748                         uuid_exist = true;
1749                         tag_tbl_size = journal->block_size -
1750                                 sizeof(struct jbd_bhdr);
1751
1752                         if (jbd_has_csum(&journal->jbd_fs->sb))
1753                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1754
1755                         if (!trans->start_iblock)
1756                                 trans->start_iblock = desc_iblock;
1757
1758                 }
1759                 tag_info.block = jbd_buf->block.lb_id;
1760                 tag_info.uuid_exist = uuid_exist;
1761                 if (i == trans->data_cnt - 1)
1762                         tag_info.last_tag = true;
1763                 else
1764                         tag_info.last_tag = false;
1765
1766                 tag_info.checksum = checksum;
1767
1768                 if (uuid_exist)
1769                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1770                                         UUID_SIZE);
1771
1772                 rc = jbd_write_block_tag(journal->jbd_fs,
1773                                 tag_ptr,
1774                                 tag_tbl_size,
1775                                 &tag_info);
1776                 if (rc != EOK) {
1777                         jbd_meta_csum_set(journal->jbd_fs,
1778                                         (struct jbd_bhdr *)desc_block.data);
1779                         jbd_block_set(journal->jbd_fs, &desc_block);
1780                         desc_iblock = 0;
1781                         goto again;
1782                 }
1783
1784                 data_iblock = jbd_journal_alloc_block(journal, trans);
1785                 rc = jbd_block_get_noread(journal->jbd_fs,
1786                                 &data_block, data_iblock);
1787                 if (rc != EOK)
1788                         break;
1789
1790                 ext4_bcache_set_dirty(data_block.buf);
1791
1792                 memcpy(data_block.data, jbd_buf->block.data,
1793                         journal->block_size);
1794
1795                 rc = jbd_block_set(journal->jbd_fs, &data_block);
1796                 if (rc != EOK)
1797                         break;
1798
1799                 tag_ptr += tag_info.tag_bytes;
1800                 tag_tbl_size -= tag_info.tag_bytes;
1801
1802                 i++;
1803         }
1804         if (rc == EOK && desc_iblock) {
1805                 jbd_meta_csum_set(journal->jbd_fs,
1806                                 (struct jbd_bhdr *)desc_block.data);
1807                 trans->data_csum = checksum;
1808                 jbd_block_set(journal->jbd_fs, &desc_block);
1809         }
1810
1811         return rc;
1812 }
1813
1814 /**@brief  Write revoke block for a transaction
1815  * @param  journal current journal session
1816  * @param  trans transaction
1817  * @return standard error code*/
1818 static int
1819 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1820                            struct jbd_trans *trans)
1821 {
1822         int rc = EOK, i = 0;
1823         int32_t tag_tbl_size;
1824         uint32_t desc_iblock = 0;
1825         char *blocks_entry = NULL;
1826         struct jbd_revoke_rec *rec, *tmp;
1827         struct ext4_block desc_block;
1828         struct jbd_revoke_header *header = NULL;
1829         int32_t record_len = 4;
1830
1831         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1832                                      JBD_FEATURE_INCOMPAT_64BIT))
1833                 record_len = 8;
1834
1835         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1836                           tmp) {
1837 again:
1838                 if (!desc_iblock) {
1839                         struct jbd_bhdr *bhdr;
1840                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1841                         rc = jbd_block_get_noread(journal->jbd_fs,
1842                                            &desc_block, desc_iblock);
1843                         if (rc != EOK) {
1844                                 break;
1845                         }
1846
1847                         ext4_bcache_set_dirty(desc_block.buf);
1848
1849                         bhdr = (struct jbd_bhdr *)desc_block.data;
1850                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1851                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1852                         jbd_set32(bhdr, sequence, trans->trans_id);
1853                         
1854                         header = (struct jbd_revoke_header *)bhdr;
1855                         blocks_entry = (char *)(header + 1);
1856                         tag_tbl_size = journal->block_size -
1857                                 sizeof(struct jbd_revoke_header);
1858
1859                         if (jbd_has_csum(&journal->jbd_fs->sb))
1860                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1861
1862                         if (!trans->start_iblock)
1863                                 trans->start_iblock = desc_iblock;
1864
1865                 }
1866
1867                 if (tag_tbl_size < record_len) {
1868                         jbd_set32(header, count,
1869                                   journal->block_size - tag_tbl_size);
1870                         jbd_meta_csum_set(journal->jbd_fs,
1871                                         (struct jbd_bhdr *)desc_block.data);
1872                         jbd_block_set(journal->jbd_fs, &desc_block);
1873                         desc_iblock = 0;
1874                         header = NULL;
1875                         goto again;
1876                 }
1877                 if (record_len == 8) {
1878                         uint64_t *blocks =
1879                                 (uint64_t *)blocks_entry;
1880                         *blocks = to_be64(rec->lba);
1881                 } else {
1882                         uint32_t *blocks =
1883                                 (uint32_t *)blocks_entry;
1884                         *blocks = to_be32(rec->lba);
1885                 }
1886                 blocks_entry += record_len;
1887                 tag_tbl_size -= record_len;
1888
1889                 i++;
1890         }
1891         if (rc == EOK && desc_iblock) {
1892                 if (header != NULL)
1893                         jbd_set32(header, count,
1894                                   journal->block_size - tag_tbl_size);
1895
1896                 jbd_meta_csum_set(journal->jbd_fs,
1897                                 (struct jbd_bhdr *)desc_block.data);
1898                 jbd_block_set(journal->jbd_fs, &desc_block);
1899         }
1900
1901         return rc;
1902 }
1903
1904 /**@brief  Submit the transaction to transaction queue.
1905  * @param  journal current journal session
1906  * @param  trans transaction*/
1907 void
1908 jbd_journal_submit_trans(struct jbd_journal *journal,
1909                          struct jbd_trans *trans)
1910 {
1911         TAILQ_INSERT_TAIL(&journal->trans_queue,
1912                           trans,
1913                           trans_node);
1914 }
1915
1916 /**@brief  Put references of block descriptors in a transaction.
1917  * @param  journal current journal session
1918  * @param  trans transaction*/
1919 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1920 {
1921         struct jbd_buf *jbd_buf, *tmp;
1922         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1923         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1924                         tmp) {
1925                 struct ext4_block block = jbd_buf->block;
1926                 ext4_block_set(fs->bdev, &block);
1927         }
1928 }
1929
1930 /**@brief  Update the start block of the journal when
1931  *         all the contents in a transaction reach the disk.*/
1932 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1933                           struct ext4_buf *buf,
1934                           int res,
1935                           void *arg)
1936 {
1937         struct jbd_buf *jbd_buf = arg;
1938         struct jbd_trans *trans = jbd_buf->trans;
1939         struct jbd_journal *journal = trans->journal;
1940         bool first_in_queue =
1941                 trans == TAILQ_FIRST(&journal->cp_queue);
1942         if (res != EOK)
1943                 trans->error = res;
1944
1945         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1946         jbd_buf->block_rec->buf = NULL;
1947         free(jbd_buf);
1948
1949         /* Clear the end_write and end_write_arg fields. */
1950         buf->end_write = NULL;
1951         buf->end_write_arg = NULL;
1952
1953         trans->written_cnt++;
1954         if (trans->written_cnt == trans->data_cnt) {
1955                 /* If it is the first transaction on checkpoint queue,
1956                  * we will shift the start of the journal to the next
1957                  * transaction, and remove subsequent written
1958                  * transactions from checkpoint queue until we find
1959                  * an unwritten one. */
1960                 if (first_in_queue) {
1961                         journal->start = trans->start_iblock +
1962                                 trans->alloc_blocks;
1963                         wrap(&journal->jbd_fs->sb, journal->start);
1964                         journal->trans_id = trans->trans_id + 1;
1965                         TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1966                         jbd_journal_free_trans(journal, trans, false);
1967
1968                         jbd_journal_purge_cp_trans(journal, false);
1969                         jbd_journal_write_sb(journal);
1970                         jbd_write_sb(journal->jbd_fs);
1971                 }
1972         }
1973 }
1974
1975 /**@brief  Commit a transaction to the journal immediately.
1976  * @param  journal current journal session
1977  * @param  trans transaction
1978  * @return standard error code*/
1979 int jbd_journal_commit_trans(struct jbd_journal *journal,
1980                              struct jbd_trans *trans)
1981 {
1982         int rc = EOK;
1983         uint32_t last = journal->last;
1984
1985         trans->trans_id = journal->alloc_trans_id;
1986         rc = jbd_journal_prepare(journal, trans);
1987         if (rc != EOK)
1988                 goto Finish;
1989
1990         rc = jbd_journal_prepare_revoke(journal, trans);
1991         if (rc != EOK)
1992                 goto Finish;
1993
1994         if (TAILQ_EMPTY(&trans->buf_queue) &&
1995             LIST_EMPTY(&trans->revoke_list)) {
1996                 /* Since there are no entries in both buffer list
1997                  * and revoke entry list, we do not consider trans as
1998                  * complete transaction and just return EOK.*/
1999                 jbd_journal_free_trans(journal, trans, false);
2000                 goto Finish;
2001         }
2002
2003         rc = jbd_trans_write_commit_block(trans);
2004         if (rc != EOK)
2005                 goto Finish;
2006
2007         journal->alloc_trans_id++;
2008         if (TAILQ_EMPTY(&journal->cp_queue)) {
2009                 if (trans->data_cnt) {
2010                         journal->start = trans->start_iblock;
2011                         wrap(&journal->jbd_fs->sb, journal->start);
2012                         journal->trans_id = trans->trans_id;
2013                         jbd_journal_write_sb(journal);
2014                         jbd_write_sb(journal->jbd_fs);
2015                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2016                                         trans_node);
2017                         jbd_journal_cp_trans(journal, trans);
2018                 } else {
2019                         journal->start = trans->start_iblock +
2020                                 trans->alloc_blocks;
2021                         wrap(&journal->jbd_fs->sb, journal->start);
2022                         journal->trans_id = trans->trans_id + 1;
2023                         jbd_journal_write_sb(journal);
2024                         jbd_journal_free_trans(journal, trans, false);
2025                 }
2026         } else {
2027                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2028                                 trans_node);
2029                 if (trans->data_cnt)
2030                         jbd_journal_cp_trans(journal, trans);
2031
2032         }
2033 Finish:
2034         if (rc != EOK) {
2035                 journal->last = last;
2036                 jbd_journal_free_trans(journal, trans, true);
2037         }
2038         return rc;
2039 }
2040
2041 /**@brief  Commit one transaction on transaction queue
2042  *         to the journal.
2043  * @param  journal current journal session.*/
2044 void jbd_journal_commit_one(struct jbd_journal *journal)
2045 {
2046         struct jbd_trans *trans;
2047
2048         if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
2049                 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
2050                 jbd_journal_commit_trans(journal, trans);
2051         }
2052 }
2053
2054 /**@brief  Commit all the transactions on transaction queue
2055  *         to the journal.
2056  * @param  journal current journal session.*/
2057 void jbd_journal_commit_all(struct jbd_journal *journal)
2058 {
2059         while (!TAILQ_EMPTY(&journal->trans_queue)) {
2060                 jbd_journal_commit_one(journal);
2061         }
2062 }
2063
2064 /**
2065  * @}
2066  */