ext4_journal: remove pending transaction support. (useless)
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32.h"
46 #include "ext4_debug.h"
47
48 #include <string.h>
49 #include <stdlib.h>
50
51 /**@brief  Revoke entry during journal replay.*/
52 struct revoke_entry {
53         /**@brief  Block number not to be replayed.*/
54         ext4_fsblk_t block;
55
56         /**@brief  For any transaction id smaller
57          *         than trans_id, records of @block
58          *         in those transactions should not
59          *         be replayed.*/
60         uint32_t trans_id;
61
62         /**@brief  Revoke tree node.*/
63         RB_ENTRY(revoke_entry) revoke_node;
64 };
65
66 /**@brief  Valid journal replay information.*/
67 struct recover_info {
68         /**@brief  Starting transaction id.*/
69         uint32_t start_trans_id;
70
71         /**@brief  Ending transaction id.*/
72         uint32_t last_trans_id;
73
74         /**@brief  Used as internal argument.*/
75         uint32_t this_trans_id;
76
77         /**@brief  No of transactions went through.*/
78         uint32_t trans_cnt;
79
80         /**@brief  RB-Tree storing revoke entries.*/
81         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
82 };
83
84 /**@brief  Journal replay internal arguments.*/
85 struct replay_arg {
86         /**@brief  Journal replay information.*/
87         struct recover_info *info;
88
89         /**@brief  Current block we are on.*/
90         uint32_t *this_block;
91
92         /**@brief  Current trans_id we are on.*/
93         uint32_t this_trans_id;
94 };
95
96 static int
97 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
98 {
99         if (a->block > b->block)
100                 return 1;
101         else if (a->block < b->block)
102                 return -1;
103         return 0;
104 }
105
106 static int
107 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
108 {
109         if (a->lba > b->lba)
110                 return 1;
111         else if (a->lba < b->lba)
112                 return -1;
113         return 0;
114 }
115
116 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
117                      jbd_revoke_entry_cmp, static inline)
118 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
119                      jbd_block_rec_cmp, static inline)
120
121 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
122 #define jbd_free_revoke_entry(addr) free(addr)
123
124 static int jbd_has_csum(struct jbd_sb *jbd_sb)
125 {
126         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
127                 return 2;
128
129         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
130                 return 3;
131
132         return 0;
133 }
134
135 #if CONFIG_META_CSUM_ENABLE
136 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
137 {
138         uint32_t checksum = 0;
139
140         if (jbd_has_csum(jbd_sb)) {
141                 uint32_t orig_checksum = jbd_sb->checksum;
142                 jbd_set32(jbd_sb, checksum, 0);
143                 /* Calculate crc32c checksum against tho whole superblock */
144                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
145                                 JBD_SUPERBLOCK_SIZE);
146                 jbd_sb->checksum = orig_checksum;
147         }
148         return checksum;
149 }
150 #else
151 #define jbd_sb_csum(...) 0
152 #endif
153
154 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
155 {
156         if (!jbd_has_csum(jbd_sb))
157                 return;
158
159         jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
160 }
161
162 #if CONFIG_META_CSUM_ENABLE
163 static bool
164 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
165 {
166         if (!jbd_has_csum(jbd_sb))
167                 return true;
168
169         return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
170 }
171 #else
172 #define jbd_verify_sb_csum(...) true
173 #endif
174
175 #if CONFIG_META_CSUM_ENABLE
176 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
177                               struct jbd_bhdr *bhdr)
178 {
179         uint32_t checksum = 0;
180
181         if (jbd_has_csum(&jbd_fs->sb)) {
182                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
183                 struct jbd_block_tail *tail =
184                         (struct jbd_block_tail *)((char *)bhdr + block_size -
185                                 sizeof(struct jbd_block_tail));
186                 uint32_t orig_checksum = tail->checksum;
187                 tail->checksum = 0;
188
189                 /* First calculate crc32c checksum against fs uuid */
190                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
191                                        sizeof(jbd_fs->sb.uuid));
192                 /* Calculate crc32c checksum against tho whole block */
193                 checksum = ext4_crc32c(checksum, bhdr,
194                                 block_size);
195                 tail->checksum = orig_checksum;
196         }
197         return checksum;
198 }
199 #else
200 #define jbd_meta_csum(...) 0
201 #endif
202
203 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
204                               struct jbd_bhdr *bhdr)
205 {
206         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
207         struct jbd_block_tail *tail = (struct jbd_block_tail *)
208                                 ((char *)bhdr + block_size -
209                                 sizeof(struct jbd_block_tail));
210         if (!jbd_has_csum(&jbd_fs->sb))
211                 return;
212
213         tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
214 }
215
216 #if CONFIG_META_CSUM_ENABLE
217 static bool
218 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
219                      struct jbd_bhdr *bhdr)
220 {
221         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
222         struct jbd_block_tail *tail = (struct jbd_block_tail *)
223                                 ((char *)bhdr + block_size -
224                                 sizeof(struct jbd_block_tail));
225         if (!jbd_has_csum(&jbd_fs->sb))
226                 return true;
227
228         return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
229 }
230 #else
231 #define jbd_verify_meta_csum(...) true
232 #endif
233
234 #if CONFIG_META_CSUM_ENABLE
235 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
236                               struct jbd_commit_header *header)
237 {
238         uint32_t checksum = 0;
239
240         if (jbd_has_csum(&jbd_fs->sb)) {
241                 uint32_t orig_checksum_type = header->chksum_type,
242                          orig_checksum_size = header->chksum_size,
243                          orig_checksum = header->chksum[0];
244                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
245                 header->chksum_type = 0;
246                 header->chksum_size = 0;
247                 header->chksum[0] = 0;
248
249                 /* First calculate crc32c checksum against fs uuid */
250                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
251                                        sizeof(jbd_fs->sb.uuid));
252                 /* Calculate crc32c checksum against tho whole block */
253                 checksum = ext4_crc32c(checksum, header,
254                                 block_size);
255
256                 header->chksum_type = orig_checksum_type;
257                 header->chksum_size = orig_checksum_size;
258                 header->chksum[0] = orig_checksum;
259         }
260         return checksum;
261 }
262 #else
263 #define jbd_commit_csum(...) 0
264 #endif
265
266 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
267                               struct jbd_commit_header *header)
268 {
269         if (!jbd_has_csum(&jbd_fs->sb))
270                 return;
271
272         header->chksum_type = 0;
273         header->chksum_size = 0;
274         header->chksum[0] = jbd_commit_csum(jbd_fs, header);
275 }
276
277 #if CONFIG_META_CSUM_ENABLE
278 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
279                                    struct jbd_commit_header *header)
280 {
281         if (!jbd_has_csum(&jbd_fs->sb))
282                 return true;
283
284         return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
285                                             header));
286 }
287 #else
288 #define jbd_verify_commit_csum(...) true
289 #endif
290
291 #if CONFIG_META_CSUM_ENABLE
292 /*
293  * NOTE: We only make use of @csum parameter when
294  *       JBD_FEATURE_COMPAT_CHECKSUM is enabled.
295  */
296 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
297                                uint32_t csum,
298                                uint32_t sequence)
299 {
300         uint32_t checksum = 0;
301
302         if (jbd_has_csum(&jbd_fs->sb)) {
303                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
304                 /* First calculate crc32c checksum against fs uuid */
305                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
306                                        sizeof(jbd_fs->sb.uuid));
307                 /* Then calculate crc32c checksum against sequence no. */
308                 checksum = ext4_crc32c(checksum, &sequence,
309                                 sizeof(uint32_t));
310                 /* Calculate crc32c checksum against tho whole block */
311                 checksum = ext4_crc32c(checksum, buf,
312                                 block_size);
313         } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
314                                      JBD_FEATURE_COMPAT_CHECKSUM)) {
315                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
316                 /* Calculate crc32c checksum against tho whole block */
317                 checksum = ext4_crc32(csum, buf,
318                                 block_size);
319         }
320         return checksum;
321 }
322 #else
323 #define jbd_block_csum(...) 0
324 #endif
325
326 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
327                                    uint32_t checksum)
328 {
329         int ver = jbd_has_csum(&jbd_fs->sb);
330         if (!ver)
331                 return;
332
333         if (ver == 2) {
334                 struct jbd_block_tag *tag = __tag;
335                 tag->checksum = (uint16_t)to_be32(checksum);
336         } else {
337                 struct jbd_block_tag3 *tag = __tag;
338                 tag->checksum = to_be32(checksum);
339         }
340 }
341
342 /**@brief  Write jbd superblock to disk.
343  * @param  jbd_fs jbd filesystem
344  * @param  s jbd superblock
345  * @return standard error code*/
346 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
347 {
348         int rc;
349         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
350         uint64_t offset;
351         ext4_fsblk_t fblock;
352         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
353         if (rc != EOK)
354                 return rc;
355
356         jbd_sb_csum_set(s);
357         offset = fblock * ext4_sb_get_block_size(&fs->sb);
358         return ext4_block_writebytes(fs->bdev, offset, s,
359                                      EXT4_SUPERBLOCK_SIZE);
360 }
361
362 /**@brief  Read jbd superblock from disk.
363  * @param  jbd_fs jbd filesystem
364  * @param  s jbd superblock
365  * @return standard error code*/
366 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
367 {
368         int rc;
369         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
370         uint64_t offset;
371         ext4_fsblk_t fblock;
372         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
373         if (rc != EOK)
374                 return rc;
375
376         offset = fblock * ext4_sb_get_block_size(&fs->sb);
377         return ext4_block_readbytes(fs->bdev, offset, s,
378                                     EXT4_SUPERBLOCK_SIZE);
379 }
380
381 /**@brief  Verify jbd superblock.
382  * @param  sb jbd superblock
383  * @return true if jbd superblock is valid */
384 static bool jbd_verify_sb(struct jbd_sb *sb)
385 {
386         struct jbd_bhdr *header = &sb->header;
387         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
388                 return false;
389
390         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
391             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
392                 return false;
393
394         return jbd_verify_sb_csum(sb);
395 }
396
397 /**@brief  Write back dirty jbd superblock to disk.
398  * @param  jbd_fs jbd filesystem
399  * @return standard error code*/
400 static int jbd_write_sb(struct jbd_fs *jbd_fs)
401 {
402         int rc = EOK;
403         if (jbd_fs->dirty) {
404                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
405                 if (rc != EOK)
406                         return rc;
407
408                 jbd_fs->dirty = false;
409         }
410         return rc;
411 }
412
413 /**@brief  Get reference to jbd filesystem.
414  * @param  fs Filesystem to load journal of
415  * @param  jbd_fs jbd filesystem
416  * @return standard error code*/
417 int jbd_get_fs(struct ext4_fs *fs,
418                struct jbd_fs *jbd_fs)
419 {
420         int rc;
421         uint32_t journal_ino;
422
423         memset(jbd_fs, 0, sizeof(struct jbd_fs));
424         /* See if there is journal inode on this filesystem.*/
425         /* FIXME: detection on existance ofbkejournal bdev is
426          *        missing.*/
427         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
428
429         rc = ext4_fs_get_inode_ref(fs,
430                                    journal_ino,
431                                    &jbd_fs->inode_ref);
432         if (rc != EOK) {
433                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
434                 return rc;
435         }
436         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
437         if (rc != EOK) {
438                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
439                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
440                 return rc;
441         }
442         if (!jbd_verify_sb(&jbd_fs->sb)) {
443                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
444                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
445                 rc = EIO;
446         }
447
448         return rc;
449 }
450
451 /**@brief  Put reference of jbd filesystem.
452  * @param  jbd_fs jbd filesystem
453  * @return standard error code*/
454 int jbd_put_fs(struct jbd_fs *jbd_fs)
455 {
456         int rc = EOK;
457         rc = jbd_write_sb(jbd_fs);
458
459         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
460         return rc;
461 }
462
463 /**@brief  Data block lookup helper.
464  * @param  jbd_fs jbd filesystem
465  * @param  iblock block index
466  * @param  fblock logical block address
467  * @return standard error code*/
468 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
469                    ext4_lblk_t iblock,
470                    ext4_fsblk_t *fblock)
471 {
472         int rc = ext4_fs_get_inode_dblk_idx(
473                         &jbd_fs->inode_ref,
474                         iblock,
475                         fblock,
476                         false);
477         return rc;
478 }
479
480 /**@brief   jbd block get function (through cache).
481  * @param   jbd_fs jbd filesystem
482  * @param   block block descriptor
483  * @param   fblock jbd logical block address
484  * @return  standard error code*/
485 static int jbd_block_get(struct jbd_fs *jbd_fs,
486                   struct ext4_block *block,
487                   ext4_fsblk_t fblock)
488 {
489         /* TODO: journal device. */
490         int rc;
491         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
492
493         /* Lookup the logical block address of
494          * fblock.*/
495         rc = jbd_inode_bmap(jbd_fs, iblock,
496                             &fblock);
497         if (rc != EOK)
498                 return rc;
499
500         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
501         rc = ext4_block_get(bdev, block, fblock);
502
503         /* If succeeded, mark buffer as BC_FLUSH to indicate
504          * that data should be written to disk immediately.*/
505         if (rc == EOK) {
506                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
507                 /* As we don't want to occupy too much space
508                  * in block cache, we set this buffer BC_TMP.*/
509                 ext4_bcache_set_flag(block->buf, BC_TMP);
510         }
511
512         return rc;
513 }
514
515 /**@brief   jbd block get function (through cache, don't read).
516  * @param   jbd_fs jbd filesystem
517  * @param   block block descriptor
518  * @param   fblock jbd logical block address
519  * @return  standard error code*/
520 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
521                          struct ext4_block *block,
522                          ext4_fsblk_t fblock)
523 {
524         /* TODO: journal device. */
525         int rc;
526         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
527         rc = jbd_inode_bmap(jbd_fs, iblock,
528                             &fblock);
529         if (rc != EOK)
530                 return rc;
531
532         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
533         rc = ext4_block_get_noread(bdev, block, fblock);
534         if (rc == EOK)
535                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
536
537         return rc;
538 }
539
540 /**@brief   jbd block set procedure (through cache).
541  * @param   jbd_fs jbd filesystem
542  * @param   block block descriptor
543  * @return  standard error code*/
544 static int jbd_block_set(struct jbd_fs *jbd_fs,
545                   struct ext4_block *block)
546 {
547         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
548                               block);
549 }
550
551 /**@brief  helper functions to calculate
552  *         block tag size, not including UUID part.
553  * @param  jbd_fs jbd filesystem
554  * @return tag size in bytes*/
555 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
556 {
557         int size;
558
559         /* It is very easy to deal with the case which
560          * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
561         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
562                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
563                 return sizeof(struct jbd_block_tag3);
564
565         size = sizeof(struct jbd_block_tag);
566
567         /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
568          * add 2 bytes to size.*/
569         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
570                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
571                 size += sizeof(uint16_t);
572
573         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
574                                      JBD_FEATURE_INCOMPAT_64BIT))
575                 return size;
576
577         /* If block number is 4 bytes in size,
578          * minus 4 bytes from size */
579         return size - sizeof(uint32_t);
580 }
581
582 /**@brief  Tag information. */
583 struct tag_info {
584         /**@brief  Tag size in bytes, including UUID part.*/
585         int tag_bytes;
586
587         /**@brief  block number stored in this tag.*/
588         ext4_fsblk_t block;
589
590         /**@brief  whether UUID part exists or not.*/
591         bool uuid_exist;
592
593         /**@brief  UUID content if UUID part exists.*/
594         uint8_t uuid[UUID_SIZE];
595
596         /**@brief  Is this the last tag? */
597         bool last_tag;
598
599         /**@brief  crc32c checksum. */
600         uint32_t checksum;
601 };
602
603 /**@brief  Extract information from a block tag.
604  * @param  __tag pointer to the block tag
605  * @param  tag_bytes block tag size of this jbd filesystem
606  * @param  remaining size in buffer containing the block tag
607  * @param  tag_info information of this tag.
608  * @return  EOK when succeed, otherwise return EINVAL.*/
609 static int
610 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
611                       void *__tag,
612                       int tag_bytes,
613                       int32_t remain_buf_size,
614                       struct tag_info *tag_info)
615 {
616         char *uuid_start;
617         tag_info->tag_bytes = tag_bytes;
618         tag_info->uuid_exist = false;
619         tag_info->last_tag = false;
620
621         /* See whether it is possible to hold a valid block tag.*/
622         if (remain_buf_size - tag_bytes < 0)
623                 return EINVAL;
624
625         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
626                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
627                 struct jbd_block_tag3 *tag = __tag;
628                 tag_info->block = jbd_get32(tag, blocknr);
629                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
630                                              JBD_FEATURE_INCOMPAT_64BIT))
631                          tag_info->block |=
632                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
633
634                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
635                         tag_info->block = 0;
636
637                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
638                         /* See whether it is possible to hold UUID part.*/
639                         if (remain_buf_size - tag_bytes < UUID_SIZE)
640                                 return EINVAL;
641
642                         uuid_start = (char *)tag + tag_bytes;
643                         tag_info->uuid_exist = true;
644                         tag_info->tag_bytes += UUID_SIZE;
645                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
646                 }
647
648                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
649                         tag_info->last_tag = true;
650
651         } else {
652                 struct jbd_block_tag *tag = __tag;
653                 tag_info->block = jbd_get32(tag, blocknr);
654                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
655                                              JBD_FEATURE_INCOMPAT_64BIT))
656                          tag_info->block |=
657                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
658
659                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
660                         tag_info->block = 0;
661
662                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
663                         /* See whether it is possible to hold UUID part.*/
664                         if (remain_buf_size - tag_bytes < UUID_SIZE)
665                                 return EINVAL;
666
667                         uuid_start = (char *)tag + tag_bytes;
668                         tag_info->uuid_exist = true;
669                         tag_info->tag_bytes += UUID_SIZE;
670                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
671                 }
672
673                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
674                         tag_info->last_tag = true;
675
676         }
677         return EOK;
678 }
679
680 /**@brief  Write information to a block tag.
681  * @param  __tag pointer to the block tag
682  * @param  remaining size in buffer containing the block tag
683  * @param  tag_info information of this tag.
684  * @return  EOK when succeed, otherwise return EINVAL.*/
685 static int
686 jbd_write_block_tag(struct jbd_fs *jbd_fs,
687                     void *__tag,
688                     int32_t remain_buf_size,
689                     struct tag_info *tag_info)
690 {
691         char *uuid_start;
692         int tag_bytes = jbd_tag_bytes(jbd_fs);
693
694         tag_info->tag_bytes = tag_bytes;
695
696         /* See whether it is possible to hold a valid block tag.*/
697         if (remain_buf_size - tag_bytes < 0)
698                 return EINVAL;
699
700         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
701                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
702                 struct jbd_block_tag3 *tag = __tag;
703                 memset(tag, 0, sizeof(struct jbd_block_tag3));
704                 jbd_set32(tag, blocknr, tag_info->block);
705                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
706                                              JBD_FEATURE_INCOMPAT_64BIT))
707                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
708
709                 if (tag_info->uuid_exist) {
710                         /* See whether it is possible to hold UUID part.*/
711                         if (remain_buf_size - tag_bytes < UUID_SIZE)
712                                 return EINVAL;
713
714                         uuid_start = (char *)tag + tag_bytes;
715                         tag_info->tag_bytes += UUID_SIZE;
716                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
717                 } else
718                         jbd_set32(tag, flags,
719                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
720
721                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
722
723                 if (tag_info->last_tag)
724                         jbd_set32(tag, flags,
725                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
726
727         } else {
728                 struct jbd_block_tag *tag = __tag;
729                 memset(tag, 0, sizeof(struct jbd_block_tag));
730                 jbd_set32(tag, blocknr, tag_info->block);
731                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
732                                              JBD_FEATURE_INCOMPAT_64BIT))
733                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
734
735                 if (tag_info->uuid_exist) {
736                         /* See whether it is possible to hold UUID part.*/
737                         if (remain_buf_size - tag_bytes < UUID_SIZE)
738                                 return EINVAL;
739
740                         uuid_start = (char *)tag + tag_bytes;
741                         tag_info->tag_bytes += UUID_SIZE;
742                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
743                 } else
744                         jbd_set16(tag, flags,
745                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
746
747                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
748
749                 if (tag_info->last_tag)
750                         jbd_set16(tag, flags,
751                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
752
753         }
754         return EOK;
755 }
756
757 /**@brief  Iterate all block tags in a block.
758  * @param  jbd_fs jbd filesystem
759  * @param  __tag_start pointer to the block
760  * @param  tag_tbl_size size of the block
761  * @param  func callback routine to indicate that
762  *         a block tag is found
763  * @param  arg additional argument to be passed to func */
764 static void
765 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
766                         void *__tag_start,
767                         int32_t tag_tbl_size,
768                         void (*func)(struct jbd_fs * jbd_fs,
769                                         ext4_fsblk_t block,
770                                         uint8_t *uuid,
771                                         void *arg),
772                         void *arg)
773 {
774         char *tag_start, *tag_ptr;
775         int tag_bytes = jbd_tag_bytes(jbd_fs);
776         tag_start = __tag_start;
777         tag_ptr = tag_start;
778
779         /* Cut off the size of block tail storing checksum. */
780         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
781                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
782             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
783                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
784                 tag_tbl_size -= sizeof(struct jbd_block_tail);
785
786         while (tag_tbl_size) {
787                 struct tag_info tag_info;
788                 int rc = jbd_extract_block_tag(jbd_fs,
789                                       tag_ptr,
790                                       tag_bytes,
791                                       tag_tbl_size,
792                                       &tag_info);
793                 if (rc != EOK)
794                         break;
795
796                 if (func)
797                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
798
799                 /* Stop the iteration when we reach the last tag. */
800                 if (tag_info.last_tag)
801                         break;
802
803                 tag_ptr += tag_info.tag_bytes;
804                 tag_tbl_size -= tag_info.tag_bytes;
805         }
806 }
807
808 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
809                                    ext4_fsblk_t block,
810                                    uint8_t *uuid,
811                                    void *arg)
812 {
813         uint32_t *iblock = arg;
814         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
815         (*iblock)++;
816         (void)jbd_fs;
817         (void)uuid;
818         return;
819 }
820
821 static struct revoke_entry *
822 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
823 {
824         struct revoke_entry tmp = {
825                 .block = block
826         };
827
828         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
829 }
830
831 /**@brief  Replay a block in a transaction.
832  * @param  jbd_fs jbd filesystem
833  * @param  block  block address to be replayed.*/
834 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
835                                   ext4_fsblk_t block,
836                                   uint8_t *uuid __unused,
837                                   void *__arg)
838 {
839         int r;
840         struct replay_arg *arg = __arg;
841         struct recover_info *info = arg->info;
842         uint32_t *this_block = arg->this_block;
843         struct revoke_entry *revoke_entry;
844         struct ext4_block journal_block, ext4_block;
845         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
846
847         (*this_block)++;
848
849         /* We replay this block only if the current transaction id
850          * is equal or greater than that in revoke entry.*/
851         revoke_entry = jbd_revoke_entry_lookup(info, block);
852         if (revoke_entry &&
853             arg->this_trans_id < revoke_entry->trans_id)
854                 return;
855
856         ext4_dbg(DEBUG_JBD,
857                  "Replaying block in block_tag: %" PRIu64 "\n",
858                  block);
859
860         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
861         if (r != EOK)
862                 return;
863
864         /* We need special treatment for ext4 superblock. */
865         if (block) {
866                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
867                 if (r != EOK) {
868                         jbd_block_set(jbd_fs, &journal_block);
869                         return;
870                 }
871
872                 memcpy(ext4_block.data,
873                         journal_block.data,
874                         jbd_get32(&jbd_fs->sb, blocksize));
875
876                 ext4_bcache_set_dirty(ext4_block.buf);
877                 ext4_block_set(fs->bdev, &ext4_block);
878         } else {
879                 uint16_t mount_count, state;
880                 mount_count = ext4_get16(&fs->sb, mount_count);
881                 state = ext4_get16(&fs->sb, state);
882
883                 memcpy(&fs->sb,
884                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
885                         EXT4_SUPERBLOCK_SIZE);
886
887                 /* Mark system as mounted */
888                 ext4_set16(&fs->sb, state, state);
889                 r = ext4_sb_write(fs->bdev, &fs->sb);
890                 if (r != EOK)
891                         return;
892
893                 /*Update mount count*/
894                 ext4_set16(&fs->sb, mount_count, mount_count);
895         }
896
897         jbd_block_set(jbd_fs, &journal_block);
898         
899         return;
900 }
901
902 /**@brief  Add block address to revoke tree, along with
903  *         its transaction id.
904  * @param  info  journal replay info
905  * @param  block  block address to be replayed.*/
906 static void jbd_add_revoke_block_tags(struct recover_info *info,
907                                       ext4_fsblk_t block)
908 {
909         struct revoke_entry *revoke_entry;
910
911         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
912         /* If the revoke entry with respect to the block address
913          * exists already, update its transaction id.*/
914         revoke_entry = jbd_revoke_entry_lookup(info, block);
915         if (revoke_entry) {
916                 revoke_entry->trans_id = info->this_trans_id;
917                 return;
918         }
919
920         revoke_entry = jbd_alloc_revoke_entry();
921         ext4_assert(revoke_entry);
922         revoke_entry->block = block;
923         revoke_entry->trans_id = info->this_trans_id;
924         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
925
926         return;
927 }
928
929 static void jbd_destroy_revoke_tree(struct recover_info *info)
930 {
931         while (!RB_EMPTY(&info->revoke_root)) {
932                 struct revoke_entry *revoke_entry =
933                         RB_MIN(jbd_revoke, &info->revoke_root);
934                 ext4_assert(revoke_entry);
935                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
936                 jbd_free_revoke_entry(revoke_entry);
937         }
938 }
939
940 /* Make sure we wrap around the log correctly! */
941 #define wrap(sb, var)                                           \
942 do {                                                                    \
943         if (var >= jbd_get32((sb), maxlen))                                     \
944                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
945 } while (0)
946
947 #define ACTION_SCAN 0
948 #define ACTION_REVOKE 1
949 #define ACTION_RECOVER 2
950
951 /**@brief  Add entries in a revoke block to revoke tree.
952  * @param  jbd_fs jbd filesystem
953  * @param  header revoke block header
954  * @param  recover_info  journal replay info*/
955 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
956                                   struct jbd_bhdr *header,
957                                   struct recover_info *info)
958 {
959         char *blocks_entry;
960         struct jbd_revoke_header *revoke_hdr =
961                 (struct jbd_revoke_header *)header;
962         uint32_t i, nr_entries, record_len = 4;
963
964         /* If we are working on a 64bit jbd filesystem, */
965         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
966                                      JBD_FEATURE_INCOMPAT_64BIT))
967                 record_len = 8;
968
969         nr_entries = (jbd_get32(revoke_hdr, count) -
970                         sizeof(struct jbd_revoke_header)) /
971                         record_len;
972
973         blocks_entry = (char *)(revoke_hdr + 1);
974
975         for (i = 0;i < nr_entries;i++) {
976                 if (record_len == 8) {
977                         uint64_t *blocks =
978                                 (uint64_t *)blocks_entry;
979                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
980                 } else {
981                         uint32_t *blocks =
982                                 (uint32_t *)blocks_entry;
983                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
984                 }
985                 blocks_entry += record_len;
986         }
987 }
988
989 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
990                                        struct jbd_bhdr *header,
991                                        uint32_t *iblock)
992 {
993         jbd_iterate_block_table(jbd_fs,
994                                 header + 1,
995                                 jbd_get32(&jbd_fs->sb, blocksize) -
996                                         sizeof(struct jbd_bhdr),
997                                 jbd_display_block_tags,
998                                 iblock);
999 }
1000
1001 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
1002                                         struct jbd_bhdr *header,
1003                                         struct replay_arg *arg)
1004 {
1005         jbd_iterate_block_table(jbd_fs,
1006                                 header + 1,
1007                                 jbd_get32(&jbd_fs->sb, blocksize) -
1008                                         sizeof(struct jbd_bhdr),
1009                                 jbd_replay_block_tags,
1010                                 arg);
1011 }
1012
1013 /**@brief  The core routine of journal replay.
1014  * @param  jbd_fs jbd filesystem
1015  * @param  recover_info  journal replay info
1016  * @param  action action needed to be taken
1017  * @return standard error code*/
1018 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1019                            struct recover_info *info,
1020                            int action)
1021 {
1022         int r = EOK;
1023         bool log_end = false;
1024         struct jbd_sb *sb = &jbd_fs->sb;
1025         uint32_t start_trans_id, this_trans_id;
1026         uint32_t start_block, this_block;
1027
1028         /* We start iterating valid blocks in the whole journal.*/
1029         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1030         start_block = this_block = jbd_get32(sb, start);
1031         if (action == ACTION_SCAN)
1032                 info->trans_cnt = 0;
1033         else if (!info->trans_cnt)
1034                 log_end = true;
1035
1036         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1037                             start_trans_id);
1038
1039         while (!log_end) {
1040                 struct ext4_block block;
1041                 struct jbd_bhdr *header;
1042                 /* If we are not scanning for the last
1043                  * valid transaction in the journal,
1044                  * we will stop when we reach the end of
1045                  * the journal.*/
1046                 if (action != ACTION_SCAN)
1047                         if (this_trans_id > info->last_trans_id) {
1048                                 log_end = true;
1049                                 continue;
1050                         }
1051
1052                 r = jbd_block_get(jbd_fs, &block, this_block);
1053                 if (r != EOK)
1054                         break;
1055
1056                 header = (struct jbd_bhdr *)block.data;
1057                 /* This block does not have a valid magic number,
1058                  * so we have reached the end of the journal.*/
1059                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1060                         jbd_block_set(jbd_fs, &block);
1061                         log_end = true;
1062                         continue;
1063                 }
1064
1065                 /* If the transaction id we found is not expected,
1066                  * we may have reached the end of the journal.
1067                  *
1068                  * If we are not scanning the journal, something
1069                  * bad might have taken place. :-( */
1070                 if (jbd_get32(header, sequence) != this_trans_id) {
1071                         if (action != ACTION_SCAN)
1072                                 r = EIO;
1073
1074                         jbd_block_set(jbd_fs, &block);
1075                         log_end = true;
1076                         continue;
1077                 }
1078
1079                 switch (jbd_get32(header, blocktype)) {
1080                 case JBD_DESCRIPTOR_BLOCK:
1081                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1082                                 ext4_dbg(DEBUG_JBD,
1083                                         DBG_WARN "Descriptor block checksum failed."
1084                                                 "Journal block: %" PRIu32"\n",
1085                                                 this_block);
1086                                 log_end = true;
1087                                 break;
1088                         }
1089                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1090                                             "trans_id: %" PRIu32"\n",
1091                                             this_block, this_trans_id);
1092                         if (action == ACTION_RECOVER) {
1093                                 struct replay_arg replay_arg;
1094                                 replay_arg.info = info;
1095                                 replay_arg.this_block = &this_block;
1096                                 replay_arg.this_trans_id = this_trans_id;
1097
1098                                 jbd_replay_descriptor_block(jbd_fs,
1099                                                 header, &replay_arg);
1100                         } else
1101                                 jbd_debug_descriptor_block(jbd_fs,
1102                                                 header, &this_block);
1103
1104                         break;
1105                 case JBD_COMMIT_BLOCK:
1106                         if (!jbd_verify_commit_csum(jbd_fs,
1107                                         (struct jbd_commit_header *)header)) {
1108                                 ext4_dbg(DEBUG_JBD,
1109                                         DBG_WARN "Commit block checksum failed."
1110                                                 "Journal block: %" PRIu32"\n",
1111                                                 this_block);
1112                                 log_end = true;
1113                                 break;
1114                         }
1115                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1116                                             "trans_id: %" PRIu32"\n",
1117                                             this_block, this_trans_id);
1118                         /* This is the end of a transaction,
1119                          * we may now proceed to the next transaction.
1120                          */
1121                         this_trans_id++;
1122                         info->trans_cnt++;
1123                         break;
1124                 case JBD_REVOKE_BLOCK:
1125                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1126                                 ext4_dbg(DEBUG_JBD,
1127                                         DBG_WARN "Revoke block checksum failed."
1128                                                 "Journal block: %" PRIu32"\n",
1129                                                 this_block);
1130                                 log_end = true;
1131                                 break;
1132                         }
1133                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1134                                             "trans_id: %" PRIu32"\n",
1135                                             this_block, this_trans_id);
1136                         if (action == ACTION_REVOKE) {
1137                                 info->this_trans_id = this_trans_id;
1138                                 jbd_build_revoke_tree(jbd_fs,
1139                                                 header, info);
1140                         }
1141                         break;
1142                 default:
1143                         log_end = true;
1144                         break;
1145                 }
1146                 jbd_block_set(jbd_fs, &block);
1147                 this_block++;
1148                 wrap(sb, this_block);
1149                 if (this_block == start_block)
1150                         log_end = true;
1151
1152         }
1153         ext4_dbg(DEBUG_JBD, "End of journal.\n");
1154         if (r == EOK && action == ACTION_SCAN) {
1155                 /* We have finished scanning the journal. */
1156                 info->start_trans_id = start_trans_id;
1157                 if (this_trans_id > start_trans_id)
1158                         info->last_trans_id = this_trans_id - 1;
1159                 else
1160                         info->last_trans_id = this_trans_id;
1161         }
1162
1163         return r;
1164 }
1165
1166 /**@brief  Replay journal.
1167  * @param  jbd_fs jbd filesystem
1168  * @return standard error code*/
1169 int jbd_recover(struct jbd_fs *jbd_fs)
1170 {
1171         int r;
1172         struct recover_info info;
1173         struct jbd_sb *sb = &jbd_fs->sb;
1174         if (!sb->start)
1175                 return EOK;
1176
1177         RB_INIT(&info.revoke_root);
1178
1179         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1180         if (r != EOK)
1181                 return r;
1182
1183         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1184         if (r != EOK)
1185                 return r;
1186
1187         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1188         if (r == EOK) {
1189                 /* If we successfully replay the journal,
1190                  * clear EXT4_FINCOM_RECOVER flag on the
1191                  * ext4 superblock, and set the start of
1192                  * journal to 0.*/
1193                 uint32_t features_incompatible =
1194                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1195                                    features_incompatible);
1196                 jbd_set32(&jbd_fs->sb, start, 0);
1197                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1198                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1199                            features_incompatible,
1200                            features_incompatible);
1201                 jbd_fs->dirty = true;
1202                 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1203                                   &jbd_fs->inode_ref.fs->sb);
1204         }
1205         jbd_destroy_revoke_tree(&info);
1206         return r;
1207 }
1208
1209 static void jbd_journal_write_sb(struct jbd_journal *journal)
1210 {
1211         struct jbd_fs *jbd_fs = journal->jbd_fs;
1212         jbd_set32(&jbd_fs->sb, start, journal->start);
1213         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1214         jbd_fs->dirty = true;
1215 }
1216
1217 /**@brief  Start accessing the journal.
1218  * @param  jbd_fs jbd filesystem
1219  * @param  journal current journal session
1220  * @return standard error code*/
1221 int jbd_journal_start(struct jbd_fs *jbd_fs,
1222                       struct jbd_journal *journal)
1223 {
1224         int r;
1225         uint32_t features_incompatible =
1226                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1227                                    features_incompatible);
1228         struct ext4_block block = EXT4_BLOCK_ZERO();
1229         features_incompatible |= EXT4_FINCOM_RECOVER;
1230         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1231                         features_incompatible,
1232                         features_incompatible);
1233         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1234                         &jbd_fs->inode_ref.fs->sb);
1235         if (r != EOK)
1236                 return r;
1237
1238         journal->first = jbd_get32(&jbd_fs->sb, first);
1239         journal->start = journal->first;
1240         journal->last = journal->first;
1241         journal->trans_id = 1;
1242         journal->alloc_trans_id = 1;
1243
1244         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1245
1246         r = jbd_block_get_noread(jbd_fs,
1247                          &block,
1248                          journal->start);
1249         if (r != EOK) {
1250                 memset(journal, 0, sizeof(struct jbd_journal));
1251                 return r;
1252         }
1253         memset(block.data, 0, journal->block_size);
1254         ext4_bcache_set_dirty(block.buf);
1255         r = jbd_block_set(jbd_fs, &block);
1256         if (r != EOK) {
1257                 memset(journal, 0, sizeof(struct jbd_journal));
1258                 return r;
1259         }
1260
1261         TAILQ_INIT(&journal->trans_queue);
1262         TAILQ_INIT(&journal->cp_queue);
1263         RB_INIT(&journal->block_rec_root);
1264         journal->jbd_fs = jbd_fs;
1265         jbd_journal_write_sb(journal);
1266         return jbd_write_sb(jbd_fs);
1267 }
1268
1269 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1270                           struct ext4_buf *buf __unused,
1271                           int res,
1272                           void *arg);
1273
1274 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1275 {
1276         struct jbd_buf *jbd_buf, *tmp;
1277         struct jbd_journal *journal = trans->journal;
1278         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1279         void *tmp_data = malloc(journal->block_size);
1280         ext4_assert(tmp_data);
1281
1282         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1283                         tmp) {
1284                 struct ext4_buf *buf = jbd_buf->block_rec->buf;
1285                 /* The buffer in memory is still dirty. */
1286                 if (buf) {
1287                         if (jbd_buf->block_rec->trans != trans) {
1288                                 int r;
1289                                 struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
1290                                 struct jbd_buf *orig_arg = buf->end_write_arg;
1291                                 ext4_assert(ext4_block_get(fs->bdev,
1292                                                         &jbd_block,
1293                                                         jbd_buf->jbd_lba) == EOK);
1294                                 memcpy(tmp_data, jbd_block.data,
1295                                                 journal->block_size);
1296                                 ext4_block_set(fs->bdev, &jbd_block);
1297                                 r = ext4_blocks_set_direct(fs->bdev, tmp_data,
1298                                                 buf->lba, 1);
1299                                 jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
1300                                 buf->end_write = jbd_trans_end_write;
1301                                 buf->end_write_arg = orig_arg;
1302                                 orig_arg->block_rec->buf = buf;
1303                         } else
1304                                 ext4_block_flush_buf(fs->bdev, buf);
1305
1306                 }
1307         }
1308
1309         free(tmp_data);
1310 }
1311
1312 static void
1313 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1314                              struct jbd_trans *trans)
1315 {
1316         journal->start = trans->start_iblock +
1317                 trans->alloc_blocks;
1318         wrap(&journal->jbd_fs->sb, journal->start);
1319         journal->trans_id = trans->trans_id + 1;
1320         jbd_journal_free_trans(journal,
1321                         trans, false);
1322         jbd_journal_write_sb(journal);
1323 }
1324
1325 static void
1326 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1327                            bool flush)
1328 {
1329         struct jbd_trans *trans;
1330         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1331                 if (!trans->data_cnt) {
1332                         TAILQ_REMOVE(&journal->cp_queue,
1333                                         trans,
1334                                         trans_node);
1335                         jbd_journal_skip_pure_revoke(journal, trans);
1336                 } else {
1337                         if (trans->data_cnt ==
1338                                         trans->written_cnt) {
1339                                 journal->start =
1340                                         trans->start_iblock +
1341                                         trans->alloc_blocks;
1342                                 wrap(&journal->jbd_fs->sb,
1343                                                 journal->start);
1344                                 journal->trans_id =
1345                                         trans->trans_id + 1;
1346                                 TAILQ_REMOVE(&journal->cp_queue,
1347                                                 trans,
1348                                                 trans_node);
1349                                 jbd_journal_free_trans(journal,
1350                                                 trans,
1351                                                 false);
1352                                 jbd_journal_write_sb(journal);
1353                         } else if (!flush) {
1354                                 journal->start =
1355                                         trans->start_iblock;
1356                                 wrap(&journal->jbd_fs->sb,
1357                                                 journal->start);
1358                                 journal->trans_id =
1359                                         trans->trans_id;
1360                                 jbd_journal_write_sb(journal);
1361                                 break;
1362                         } else
1363                                 jbd_journal_flush_trans(trans);
1364                 }
1365         }
1366 }
1367
1368 /**@brief  Stop accessing the journal.
1369  * @param  journal current journal session
1370  * @return standard error code*/
1371 int jbd_journal_stop(struct jbd_journal *journal)
1372 {
1373         int r;
1374         struct jbd_fs *jbd_fs = journal->jbd_fs;
1375         uint32_t features_incompatible;
1376
1377         /* Make sure that journalled content have reached
1378          * the disk.*/
1379         jbd_journal_purge_cp_trans(journal, true);
1380
1381         /* There should be no block record in this journal
1382          * session. */
1383         if (!RB_EMPTY(&journal->block_rec_root))
1384                 ext4_dbg(DEBUG_JBD,
1385                          DBG_WARN "There are still block records "
1386                                   "in this journal session!\n");
1387
1388         features_incompatible =
1389                 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1390                            features_incompatible);
1391         features_incompatible &= ~EXT4_FINCOM_RECOVER;
1392         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1393                         features_incompatible,
1394                         features_incompatible);
1395         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1396                         &jbd_fs->inode_ref.fs->sb);
1397         if (r != EOK)
1398                 return r;
1399
1400         journal->start = 0;
1401         journal->trans_id = 0;
1402         jbd_journal_write_sb(journal);
1403         return jbd_write_sb(journal->jbd_fs);
1404 }
1405
1406 /**@brief  Allocate a block in the journal.
1407  * @param  journal current journal session
1408  * @param  trans transaction
1409  * @return allocated block address*/
1410 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1411                                         struct jbd_trans *trans)
1412 {
1413         uint32_t start_block;
1414
1415         start_block = journal->last++;
1416         trans->alloc_blocks++;
1417         wrap(&journal->jbd_fs->sb, journal->last);
1418         
1419         /* If there is no space left, flush all journalled
1420          * blocks to disk first.*/
1421         if (journal->last == journal->start)
1422                 jbd_journal_purge_cp_trans(journal, true);
1423
1424         return start_block;
1425 }
1426
1427 /**@brief  Allocate a new transaction
1428  * @param  journal current journal session
1429  * @return transaction allocated*/
1430 struct jbd_trans *
1431 jbd_journal_new_trans(struct jbd_journal *journal)
1432 {
1433         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1434         if (!trans)
1435                 return NULL;
1436
1437         /* We will assign a trans_id to this transaction,
1438          * once it has been committed.*/
1439         trans->journal = journal;
1440         trans->data_csum = EXT4_CRC32_INIT;
1441         trans->error = EOK;
1442         TAILQ_INIT(&trans->buf_queue);
1443         return trans;
1444 }
1445
1446 /**@brief  gain access to it before making any modications.
1447  * @param  journal current journal session
1448  * @param  trans transaction
1449  * @param  block descriptor
1450  * @return standard error code.*/
1451 int jbd_trans_get_access(struct jbd_journal *journal,
1452                          struct jbd_trans *trans,
1453                          struct ext4_block *block)
1454 {
1455         int r = EOK;
1456         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1457         struct jbd_buf *jbd_buf = block->buf->end_write_arg;
1458
1459         /* If the buffer has already been modified, we should
1460          * flush dirty data in this buffer to disk.*/
1461         if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1462             block->buf->end_write == jbd_trans_end_write) {
1463                 ext4_assert(jbd_buf);
1464                 if (jbd_buf->trans != trans)
1465                         r = ext4_block_flush_buf(fs->bdev, block->buf);
1466
1467         }
1468         return r;
1469 }
1470
1471 static struct jbd_block_rec *
1472 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1473                            ext4_fsblk_t lba)
1474 {
1475         struct jbd_block_rec tmp = {
1476                 .lba = lba
1477         };
1478
1479         return RB_FIND(jbd_block,
1480                        &journal->block_rec_root,
1481                        &tmp);
1482 }
1483
1484 static void
1485 jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
1486                            struct jbd_trans *new_trans,
1487                            struct ext4_buf *new_buf)
1488 {
1489         LIST_REMOVE(block_rec, tbrec_node);
1490         /* Now this block record belongs to this transaction. */
1491         LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
1492         block_rec->trans = new_trans;
1493         block_rec->buf = new_buf;
1494 }
1495
1496 static inline struct jbd_block_rec *
1497 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1498                            ext4_fsblk_t lba,
1499                            struct ext4_buf *buf)
1500 {
1501         struct jbd_block_rec *block_rec;
1502         block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1503         if (block_rec) {
1504                 jbd_trans_change_ownership(block_rec, trans, buf);
1505                 return block_rec;
1506         }
1507         block_rec = calloc(1, sizeof(struct jbd_block_rec));
1508         if (!block_rec)
1509                 return NULL;
1510
1511         block_rec->lba = lba;
1512         block_rec->buf = buf;
1513         block_rec->trans = trans;
1514         TAILQ_INIT(&block_rec->dirty_buf_queue);
1515         LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1516         RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1517         return block_rec;
1518 }
1519
1520 static void
1521 jbd_trans_finish_callback(struct jbd_journal *journal,
1522                           const struct jbd_trans *trans,
1523                           struct jbd_block_rec *block_rec,
1524                           bool abort)
1525 {
1526         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1527         if (block_rec->trans != trans)
1528                 return;
1529
1530         if (!abort) {
1531                 struct jbd_buf *jbd_buf, *tmp;
1532                 TAILQ_FOREACH_SAFE(jbd_buf,
1533                                 &block_rec->dirty_buf_queue,
1534                                 dirty_buf_node,
1535                                 tmp) {
1536                         /* All we need is a fake ext4_buf. */
1537                         struct ext4_buf buf;
1538
1539                         jbd_trans_end_write(fs->bdev->bc,
1540                                         &buf,
1541                                         EOK,
1542                                         jbd_buf);
1543                 }
1544         } else {
1545                 struct jbd_buf *jbd_buf;
1546                 struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
1547                                   block = EXT4_BLOCK_ZERO();
1548                 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
1549                                 jbd_buf_dirty);
1550                 if (jbd_buf) {
1551                         ext4_assert(ext4_block_get(fs->bdev,
1552                                                 &jbd_block,
1553                                                 jbd_buf->jbd_lba) == EOK);
1554                         ext4_assert(ext4_block_get_noread(fs->bdev,
1555                                                 &block,
1556                                                 block_rec->lba) == EOK);
1557                         memcpy(block.data, jbd_block.data,
1558                                         journal->block_size);
1559
1560                         jbd_trans_change_ownership(block_rec,
1561                                         jbd_buf->trans, block.buf);
1562
1563                         block.buf->end_write = jbd_trans_end_write;
1564                         block.buf->end_write_arg = jbd_buf;
1565
1566                         ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
1567                         ext4_bcache_set_dirty(block.buf);
1568
1569                         ext4_block_set(fs->bdev, &jbd_block);
1570                         ext4_block_set(fs->bdev, &block);
1571                         return;
1572                 }
1573         }
1574 }
1575
1576 static inline void
1577 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1578                            struct jbd_block_rec *block_rec,
1579                            struct jbd_trans *trans)
1580 {
1581         /* If this block record doesn't belong to this transaction,
1582          * give up.*/
1583         if (block_rec->trans == trans) {
1584                 LIST_REMOVE(block_rec, tbrec_node);
1585                 RB_REMOVE(jbd_block,
1586                                 &journal->block_rec_root,
1587                                 block_rec);
1588                 free(block_rec);
1589         }
1590 }
1591
1592 /**@brief  Add block to a transaction and mark it dirty.
1593  * @param  trans transaction
1594  * @param  block block descriptor
1595  * @return standard error code*/
1596 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1597                               struct ext4_block *block)
1598 {
1599         struct jbd_buf *buf;
1600
1601         struct jbd_block_rec *block_rec;
1602         if (block->buf->end_write == jbd_trans_end_write) {
1603                 buf = block->buf->end_write_arg;
1604                 if (buf && buf->trans == trans)
1605                         return EOK;
1606         }
1607         buf = calloc(1, sizeof(struct jbd_buf));
1608         if (!buf)
1609                 return ENOMEM;
1610
1611         if ((block_rec = jbd_trans_insert_block_rec(trans,
1612                                         block->lb_id,
1613                                         block->buf)) == NULL) {
1614                 free(buf);
1615                 return ENOMEM;
1616         }
1617
1618         TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
1619                         buf,
1620                         dirty_buf_node);
1621
1622         buf->block_rec = block_rec;
1623         buf->trans = trans;
1624         buf->block = *block;
1625         ext4_bcache_inc_ref(block->buf);
1626
1627         /* If the content reach the disk, notify us
1628          * so that we may do a checkpoint. */
1629         block->buf->end_write = jbd_trans_end_write;
1630         block->buf->end_write_arg = buf;
1631
1632         trans->data_cnt++;
1633         TAILQ_INSERT_HEAD(&trans->buf_queue, buf, buf_node);
1634
1635         ext4_bcache_set_dirty(block->buf);
1636         return EOK;
1637 }
1638
1639 /**@brief  Add block to be revoked to a transaction
1640  * @param  trans transaction
1641  * @param  lba logical block address
1642  * @return standard error code*/
1643 int jbd_trans_revoke_block(struct jbd_trans *trans,
1644                            ext4_fsblk_t lba)
1645 {
1646         struct jbd_revoke_rec *rec =
1647                 calloc(1, sizeof(struct jbd_revoke_rec));
1648         if (!rec)
1649                 return ENOMEM;
1650
1651         rec->lba = lba;
1652         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1653         return EOK;
1654 }
1655
1656 /**@brief  Try to add block to be revoked to a transaction.
1657  *         If @lba still remains in an transaction on checkpoint
1658  *         queue, add @lba as a revoked block to the transaction.
1659  * @param  trans transaction
1660  * @param  lba logical block address
1661  * @return standard error code*/
1662 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1663                                ext4_fsblk_t lba)
1664 {
1665         int r = EOK;
1666         struct jbd_journal *journal = trans->journal;
1667         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1668         struct jbd_block_rec *block_rec =
1669                 jbd_trans_block_rec_lookup(journal, lba);
1670
1671         /* Make sure we don't flush any buffers belong to this transaction. */
1672         if (block_rec && block_rec->trans != trans) {
1673                 /* If the buffer has not been flushed yet, flush it now. */
1674                 if (block_rec->buf) {
1675                         r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
1676                         if (r != EOK)
1677                                 return r;
1678
1679                 }
1680
1681                 jbd_trans_revoke_block(trans, lba);
1682         }
1683
1684         return EOK;
1685 }
1686
1687 /**@brief  Free a transaction
1688  * @param  journal current journal session
1689  * @param  trans transaction
1690  * @param  abort discard all the modifications on the block?
1691  * @return standard error code*/
1692 void jbd_journal_free_trans(struct jbd_journal *journal,
1693                             struct jbd_trans *trans,
1694                             bool abort)
1695 {
1696         struct jbd_buf *jbd_buf, *tmp;
1697         struct jbd_revoke_rec *rec, *tmp2;
1698         struct jbd_block_rec *block_rec, *tmp3;
1699         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1700         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1701                           tmp) {
1702                 block_rec = jbd_buf->block_rec;
1703                 if (abort) {
1704                         jbd_buf->block.buf->end_write = NULL;
1705                         jbd_buf->block.buf->end_write_arg = NULL;
1706                         ext4_bcache_clear_dirty(jbd_buf->block.buf);
1707                         ext4_block_set(fs->bdev, &jbd_buf->block);
1708                 }
1709
1710                 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1711                         jbd_buf,
1712                         dirty_buf_node);
1713                 jbd_trans_finish_callback(journal,
1714                                 trans,
1715                                 block_rec,
1716                                 abort);
1717                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1718                 free(jbd_buf);
1719         }
1720         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1721                           tmp2) {
1722                 LIST_REMOVE(rec, revoke_node);
1723                 free(rec);
1724         }
1725         LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1726                           tmp3) {
1727                 jbd_trans_remove_block_rec(journal, block_rec, trans);
1728         }
1729
1730         free(trans);
1731 }
1732
1733 /**@brief  Write commit block for a transaction
1734  * @param  trans transaction
1735  * @return standard error code*/
1736 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1737 {
1738         int rc;
1739         struct jbd_commit_header *header;
1740         uint32_t commit_iblock = 0;
1741         struct ext4_block commit_block;
1742         struct jbd_journal *journal = trans->journal;
1743
1744         commit_iblock = jbd_journal_alloc_block(journal, trans);
1745         rc = jbd_block_get_noread(journal->jbd_fs,
1746                         &commit_block, commit_iblock);
1747         if (rc != EOK)
1748                 return rc;
1749
1750         header = (struct jbd_commit_header *)commit_block.data;
1751         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1752         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1753         jbd_set32(&header->header, sequence, trans->trans_id);
1754
1755         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1756                                 JBD_FEATURE_COMPAT_CHECKSUM)) {
1757                 jbd_set32(header, chksum_type, JBD_CRC32_CHKSUM);
1758                 jbd_set32(header, chksum_size, JBD_CRC32_CHKSUM_SIZE);
1759                 jbd_set32(header, chksum[0], trans->data_csum);
1760         }
1761         jbd_commit_csum_set(journal->jbd_fs, header);
1762         ext4_bcache_set_dirty(commit_block.buf);
1763         rc = jbd_block_set(journal->jbd_fs, &commit_block);
1764         if (rc != EOK)
1765                 return rc;
1766
1767         return EOK;
1768 }
1769
1770 /**@brief  Write descriptor block for a transaction
1771  * @param  journal current journal session
1772  * @param  trans transaction
1773  * @return standard error code*/
1774 static int jbd_journal_prepare(struct jbd_journal *journal,
1775                                struct jbd_trans *trans)
1776 {
1777         int rc = EOK, i = 0;
1778         int32_t tag_tbl_size;
1779         uint32_t desc_iblock = 0;
1780         uint32_t data_iblock = 0;
1781         char *tag_start = NULL, *tag_ptr = NULL;
1782         struct jbd_buf *jbd_buf, *tmp;
1783         struct ext4_block desc_block, data_block;
1784         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1785         uint32_t checksum = EXT4_CRC32_INIT;
1786
1787         /* Try to remove any non-dirty buffers from the tail of
1788          * buf_queue. */
1789         TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1790                         jbd_trans_buf, buf_node, tmp) {
1791                 /* We stop the iteration when we find a dirty buffer. */
1792                 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1793                                         BC_DIRTY))
1794                         break;
1795         
1796                 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1797                         jbd_buf,
1798                         dirty_buf_node);
1799
1800                 jbd_trans_finish_callback(journal,
1801                                 trans,
1802                                 jbd_buf->block_rec,
1803                                 false);
1804
1805                 /* The buffer has not been modified, just release
1806                  * that jbd_buf. */
1807                 jbd_trans_remove_block_rec(journal,
1808                                 jbd_buf->block_rec, trans);
1809                 trans->data_cnt--;
1810
1811                 jbd_buf->block.buf->end_write = NULL;
1812                 jbd_buf->block.buf->end_write_arg = NULL;
1813                 ext4_block_set(fs->bdev, &jbd_buf->block);
1814                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1815                 free(jbd_buf);
1816         }
1817
1818         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1819                 struct tag_info tag_info;
1820                 bool uuid_exist = false;
1821                 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1822                                            BC_DIRTY)) {
1823                         TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1824                                         jbd_buf,
1825                                         dirty_buf_node);
1826
1827                         jbd_trans_finish_callback(journal,
1828                                         trans,
1829                                         jbd_buf->block_rec,
1830                                         false);
1831
1832                         /* The buffer has not been modified, just release
1833                          * that jbd_buf. */
1834                         jbd_trans_remove_block_rec(journal,
1835                                         jbd_buf->block_rec, trans);
1836                         trans->data_cnt--;
1837
1838                         jbd_buf->block.buf->end_write = NULL;
1839                         jbd_buf->block.buf->end_write_arg = NULL;
1840                         ext4_block_set(fs->bdev, &jbd_buf->block);
1841                         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1842                         free(jbd_buf);
1843                         continue;
1844                 }
1845                 checksum = jbd_block_csum(journal->jbd_fs,
1846                                           jbd_buf->block.data,
1847                                           checksum,
1848                                           trans->trans_id);
1849 again:
1850                 if (!desc_iblock) {
1851                         struct jbd_bhdr *bhdr;
1852                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1853                         rc = jbd_block_get_noread(journal->jbd_fs,
1854                                            &desc_block, desc_iblock);
1855                         if (rc != EOK)
1856                                 break;
1857
1858                         ext4_bcache_set_dirty(desc_block.buf);
1859
1860                         bhdr = (struct jbd_bhdr *)desc_block.data;
1861                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1862                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1863                         jbd_set32(bhdr, sequence, trans->trans_id);
1864
1865                         tag_start = (char *)(bhdr + 1);
1866                         tag_ptr = tag_start;
1867                         uuid_exist = true;
1868                         tag_tbl_size = journal->block_size -
1869                                 sizeof(struct jbd_bhdr);
1870
1871                         if (jbd_has_csum(&journal->jbd_fs->sb))
1872                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1873
1874                         if (!trans->start_iblock)
1875                                 trans->start_iblock = desc_iblock;
1876
1877                 }
1878                 tag_info.block = jbd_buf->block.lb_id;
1879                 tag_info.uuid_exist = uuid_exist;
1880                 if (i == trans->data_cnt - 1)
1881                         tag_info.last_tag = true;
1882                 else
1883                         tag_info.last_tag = false;
1884
1885                 tag_info.checksum = checksum;
1886
1887                 if (uuid_exist)
1888                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1889                                         UUID_SIZE);
1890
1891                 rc = jbd_write_block_tag(journal->jbd_fs,
1892                                 tag_ptr,
1893                                 tag_tbl_size,
1894                                 &tag_info);
1895                 if (rc != EOK) {
1896                         jbd_meta_csum_set(journal->jbd_fs,
1897                                         (struct jbd_bhdr *)desc_block.data);
1898                         jbd_block_set(journal->jbd_fs, &desc_block);
1899                         desc_iblock = 0;
1900                         goto again;
1901                 }
1902
1903                 data_iblock = jbd_journal_alloc_block(journal, trans);
1904                 rc = jbd_block_get_noread(journal->jbd_fs,
1905                                 &data_block, data_iblock);
1906                 if (rc != EOK)
1907                         break;
1908
1909                 ext4_bcache_set_dirty(data_block.buf);
1910
1911                 memcpy(data_block.data, jbd_buf->block.data,
1912                         journal->block_size);
1913                 jbd_buf->jbd_lba = data_block.lb_id;
1914
1915                 rc = jbd_block_set(journal->jbd_fs, &data_block);
1916                 if (rc != EOK)
1917                         break;
1918
1919                 tag_ptr += tag_info.tag_bytes;
1920                 tag_tbl_size -= tag_info.tag_bytes;
1921
1922                 i++;
1923         }
1924         if (rc == EOK && desc_iblock) {
1925                 jbd_meta_csum_set(journal->jbd_fs,
1926                                 (struct jbd_bhdr *)desc_block.data);
1927                 trans->data_csum = checksum;
1928                 jbd_block_set(journal->jbd_fs, &desc_block);
1929         }
1930
1931         return rc;
1932 }
1933
1934 /**@brief  Write revoke block for a transaction
1935  * @param  journal current journal session
1936  * @param  trans transaction
1937  * @return standard error code*/
1938 static int
1939 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1940                            struct jbd_trans *trans)
1941 {
1942         int rc = EOK, i = 0;
1943         int32_t tag_tbl_size;
1944         uint32_t desc_iblock = 0;
1945         char *blocks_entry = NULL;
1946         struct jbd_revoke_rec *rec, *tmp;
1947         struct ext4_block desc_block;
1948         struct jbd_revoke_header *header = NULL;
1949         int32_t record_len = 4;
1950
1951         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1952                                      JBD_FEATURE_INCOMPAT_64BIT))
1953                 record_len = 8;
1954
1955         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1956                           tmp) {
1957 again:
1958                 if (!desc_iblock) {
1959                         struct jbd_bhdr *bhdr;
1960                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1961                         rc = jbd_block_get_noread(journal->jbd_fs,
1962                                            &desc_block, desc_iblock);
1963                         if (rc != EOK) {
1964                                 break;
1965                         }
1966
1967                         ext4_bcache_set_dirty(desc_block.buf);
1968
1969                         bhdr = (struct jbd_bhdr *)desc_block.data;
1970                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1971                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1972                         jbd_set32(bhdr, sequence, trans->trans_id);
1973                         
1974                         header = (struct jbd_revoke_header *)bhdr;
1975                         blocks_entry = (char *)(header + 1);
1976                         tag_tbl_size = journal->block_size -
1977                                 sizeof(struct jbd_revoke_header);
1978
1979                         if (jbd_has_csum(&journal->jbd_fs->sb))
1980                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1981
1982                         if (!trans->start_iblock)
1983                                 trans->start_iblock = desc_iblock;
1984
1985                 }
1986
1987                 if (tag_tbl_size < record_len) {
1988                         jbd_set32(header, count,
1989                                   journal->block_size - tag_tbl_size);
1990                         jbd_meta_csum_set(journal->jbd_fs,
1991                                         (struct jbd_bhdr *)desc_block.data);
1992                         jbd_block_set(journal->jbd_fs, &desc_block);
1993                         desc_iblock = 0;
1994                         header = NULL;
1995                         goto again;
1996                 }
1997                 if (record_len == 8) {
1998                         uint64_t *blocks =
1999                                 (uint64_t *)blocks_entry;
2000                         *blocks = to_be64(rec->lba);
2001                 } else {
2002                         uint32_t *blocks =
2003                                 (uint32_t *)blocks_entry;
2004                         *blocks = to_be32(rec->lba);
2005                 }
2006                 blocks_entry += record_len;
2007                 tag_tbl_size -= record_len;
2008
2009                 i++;
2010         }
2011         if (rc == EOK && desc_iblock) {
2012                 if (header != NULL)
2013                         jbd_set32(header, count,
2014                                   journal->block_size - tag_tbl_size);
2015
2016                 jbd_meta_csum_set(journal->jbd_fs,
2017                                 (struct jbd_bhdr *)desc_block.data);
2018                 jbd_block_set(journal->jbd_fs, &desc_block);
2019         }
2020
2021         return rc;
2022 }
2023
2024 /**@brief  Put references of block descriptors in a transaction.
2025  * @param  journal current journal session
2026  * @param  trans transaction*/
2027 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
2028 {
2029         struct jbd_buf *jbd_buf, *tmp;
2030         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
2031         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
2032                         tmp) {
2033                 struct ext4_block block = jbd_buf->block;
2034                 ext4_block_set(fs->bdev, &block);
2035         }
2036 }
2037
2038 /**@brief  Update the start block of the journal when
2039  *         all the contents in a transaction reach the disk.*/
2040 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
2041                           struct ext4_buf *buf,
2042                           int res,
2043                           void *arg)
2044 {
2045         struct jbd_buf *jbd_buf = arg;
2046         struct jbd_trans *trans = jbd_buf->trans;
2047         struct jbd_journal *journal = trans->journal;
2048         bool first_in_queue =
2049                 trans == TAILQ_FIRST(&journal->cp_queue);
2050         if (res != EOK)
2051                 trans->error = res;
2052
2053         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
2054         TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
2055                         jbd_buf,
2056                         dirty_buf_node);
2057         jbd_trans_finish_callback(journal,
2058                         trans,
2059                         jbd_buf->block_rec,
2060                         false);
2061         jbd_buf->block_rec->buf = NULL;
2062         free(jbd_buf);
2063
2064         /* Clear the end_write and end_write_arg fields. */
2065         buf->end_write = NULL;
2066         buf->end_write_arg = NULL;
2067
2068         trans->written_cnt++;
2069         if (trans->written_cnt == trans->data_cnt) {
2070                 /* If it is the first transaction on checkpoint queue,
2071                  * we will shift the start of the journal to the next
2072                  * transaction, and remove subsequent written
2073                  * transactions from checkpoint queue until we find
2074                  * an unwritten one. */
2075                 if (first_in_queue) {
2076                         journal->start = trans->start_iblock +
2077                                 trans->alloc_blocks;
2078                         wrap(&journal->jbd_fs->sb, journal->start);
2079                         journal->trans_id = trans->trans_id + 1;
2080                         TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
2081                         jbd_journal_free_trans(journal, trans, false);
2082
2083                         jbd_journal_purge_cp_trans(journal, false);
2084                         jbd_journal_write_sb(journal);
2085                         jbd_write_sb(journal->jbd_fs);
2086                 }
2087         }
2088 }
2089
2090 /**@brief  Commit a transaction to the journal immediately.
2091  * @param  journal current journal session
2092  * @param  trans transaction
2093  * @return standard error code*/
2094 int jbd_journal_commit_trans(struct jbd_journal *journal,
2095                              struct jbd_trans *trans)
2096 {
2097         int rc = EOK;
2098         uint32_t last = journal->last;
2099
2100         trans->trans_id = journal->alloc_trans_id;
2101         rc = jbd_journal_prepare(journal, trans);
2102         if (rc != EOK)
2103                 goto Finish;
2104
2105         rc = jbd_journal_prepare_revoke(journal, trans);
2106         if (rc != EOK)
2107                 goto Finish;
2108
2109         if (TAILQ_EMPTY(&trans->buf_queue) &&
2110             LIST_EMPTY(&trans->revoke_list)) {
2111                 /* Since there are no entries in both buffer list
2112                  * and revoke entry list, we do not consider trans as
2113                  * complete transaction and just return EOK.*/
2114                 jbd_journal_free_trans(journal, trans, false);
2115                 goto Finish;
2116         }
2117
2118         rc = jbd_trans_write_commit_block(trans);
2119         if (rc != EOK)
2120                 goto Finish;
2121
2122         journal->alloc_trans_id++;
2123         if (TAILQ_EMPTY(&journal->cp_queue)) {
2124                 if (trans->data_cnt) {
2125                         journal->start = trans->start_iblock;
2126                         wrap(&journal->jbd_fs->sb, journal->start);
2127                         journal->trans_id = trans->trans_id;
2128                         jbd_journal_write_sb(journal);
2129                         jbd_write_sb(journal->jbd_fs);
2130                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2131                                         trans_node);
2132                         jbd_journal_cp_trans(journal, trans);
2133                 } else {
2134                         journal->start = trans->start_iblock +
2135                                 trans->alloc_blocks;
2136                         wrap(&journal->jbd_fs->sb, journal->start);
2137                         journal->trans_id = trans->trans_id + 1;
2138                         jbd_journal_write_sb(journal);
2139                         jbd_journal_free_trans(journal, trans, false);
2140                 }
2141         } else {
2142                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2143                                 trans_node);
2144                 if (trans->data_cnt)
2145                         jbd_journal_cp_trans(journal, trans);
2146
2147         }
2148 Finish:
2149         if (rc != EOK) {
2150                 journal->last = last;
2151                 jbd_journal_free_trans(journal, trans, true);
2152         }
2153         return rc;
2154 }
2155
2156 /**
2157  * @}
2158  */