7ca517271e611fb3f8851ab15be2390070648498
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32.h"
46 #include "ext4_debug.h"
47
48 #include <string.h>
49 #include <stdlib.h>
50
51 /**@brief  Revoke entry during journal replay.*/
52 struct revoke_entry {
53         /**@brief  Block number not to be replayed.*/
54         ext4_fsblk_t block;
55
56         /**@brief  For any transaction id smaller
57          *         than trans_id, records of @block
58          *         in those transactions should not
59          *         be replayed.*/
60         uint32_t trans_id;
61
62         /**@brief  Revoke tree node.*/
63         RB_ENTRY(revoke_entry) revoke_node;
64 };
65
66 /**@brief  Valid journal replay information.*/
67 struct recover_info {
68         /**@brief  Starting transaction id.*/
69         uint32_t start_trans_id;
70
71         /**@brief  Ending transaction id.*/
72         uint32_t last_trans_id;
73
74         /**@brief  Used as internal argument.*/
75         uint32_t this_trans_id;
76
77         /**@brief  No of transactions went through.*/
78         uint32_t trans_cnt;
79
80         /**@brief  RB-Tree storing revoke entries.*/
81         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
82 };
83
84 /**@brief  Journal replay internal arguments.*/
85 struct replay_arg {
86         /**@brief  Journal replay information.*/
87         struct recover_info *info;
88
89         /**@brief  Current block we are on.*/
90         uint32_t *this_block;
91
92         /**@brief  Current trans_id we are on.*/
93         uint32_t this_trans_id;
94 };
95
96 static int
97 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
98 {
99         if (a->block > b->block)
100                 return 1;
101         else if (a->block < b->block)
102                 return -1;
103         return 0;
104 }
105
106 static int
107 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
108 {
109         if (a->lba > b->lba)
110                 return 1;
111         else if (a->lba < b->lba)
112                 return -1;
113         return 0;
114 }
115
116 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
117                      jbd_revoke_entry_cmp, static inline)
118 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
119                      jbd_block_rec_cmp, static inline)
120
121 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
122 #define jbd_free_revoke_entry(addr) free(addr)
123
124 static int jbd_has_csum(struct jbd_sb *jbd_sb)
125 {
126         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
127                 return 2;
128
129         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
130                 return 3;
131
132         return 0;
133 }
134
135 #if CONFIG_META_CSUM_ENABLE
136 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
137 {
138         uint32_t checksum = 0;
139
140         if (jbd_has_csum(jbd_sb)) {
141                 uint32_t orig_checksum = jbd_sb->checksum;
142                 jbd_set32(jbd_sb, checksum, 0);
143                 /* Calculate crc32c checksum against tho whole superblock */
144                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
145                                 JBD_SUPERBLOCK_SIZE);
146                 jbd_sb->checksum = orig_checksum;
147         }
148         return checksum;
149 }
150 #else
151 #define jbd_sb_csum(...) 0
152 #endif
153
154 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
155 {
156         if (!jbd_has_csum(jbd_sb))
157                 return;
158
159         jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
160 }
161
162 #if CONFIG_META_CSUM_ENABLE
163 static bool
164 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
165 {
166         if (!jbd_has_csum(jbd_sb))
167                 return true;
168
169         return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
170 }
171 #else
172 #define jbd_verify_sb_csum(...) true
173 #endif
174
175 #if CONFIG_META_CSUM_ENABLE
176 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
177                               struct jbd_bhdr *bhdr)
178 {
179         uint32_t checksum = 0;
180
181         if (jbd_has_csum(&jbd_fs->sb)) {
182                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
183                 struct jbd_block_tail *tail =
184                         (struct jbd_block_tail *)((char *)bhdr + block_size -
185                                 sizeof(struct jbd_block_tail));
186                 uint32_t orig_checksum = tail->checksum;
187                 tail->checksum = 0;
188
189                 /* First calculate crc32c checksum against fs uuid */
190                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
191                                        sizeof(jbd_fs->sb.uuid));
192                 /* Calculate crc32c checksum against tho whole block */
193                 checksum = ext4_crc32c(checksum, bhdr,
194                                 block_size);
195                 tail->checksum = orig_checksum;
196         }
197         return checksum;
198 }
199 #else
200 #define jbd_meta_csum(...) 0
201 #endif
202
203 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
204                               struct jbd_bhdr *bhdr)
205 {
206         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
207         struct jbd_block_tail *tail = (struct jbd_block_tail *)
208                                 ((char *)bhdr + block_size -
209                                 sizeof(struct jbd_block_tail));
210         if (!jbd_has_csum(&jbd_fs->sb))
211                 return;
212
213         tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
214 }
215
216 #if CONFIG_META_CSUM_ENABLE
217 static bool
218 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
219                      struct jbd_bhdr *bhdr)
220 {
221         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
222         struct jbd_block_tail *tail = (struct jbd_block_tail *)
223                                 ((char *)bhdr + block_size -
224                                 sizeof(struct jbd_block_tail));
225         if (!jbd_has_csum(&jbd_fs->sb))
226                 return true;
227
228         return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
229 }
230 #else
231 #define jbd_verify_meta_csum(...) true
232 #endif
233
234 #if CONFIG_META_CSUM_ENABLE
235 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
236                               struct jbd_commit_header *header)
237 {
238         uint32_t checksum = 0;
239
240         if (jbd_has_csum(&jbd_fs->sb)) {
241                 uint32_t orig_checksum_type = header->chksum_type,
242                          orig_checksum_size = header->chksum_size,
243                          orig_checksum = header->chksum[0];
244                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
245                 header->chksum_type = 0;
246                 header->chksum_size = 0;
247                 header->chksum[0] = 0;
248
249                 /* First calculate crc32c checksum against fs uuid */
250                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
251                                        sizeof(jbd_fs->sb.uuid));
252                 /* Calculate crc32c checksum against tho whole block */
253                 checksum = ext4_crc32c(checksum, header,
254                                 block_size);
255
256                 header->chksum_type = orig_checksum_type;
257                 header->chksum_size = orig_checksum_size;
258                 header->chksum[0] = orig_checksum;
259         }
260         return checksum;
261 }
262 #else
263 #define jbd_commit_csum(...) 0
264 #endif
265
266 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
267                               struct jbd_commit_header *header)
268 {
269         if (!jbd_has_csum(&jbd_fs->sb))
270                 return;
271
272         header->chksum_type = 0;
273         header->chksum_size = 0;
274         header->chksum[0] = jbd_commit_csum(jbd_fs, header);
275 }
276
277 #if CONFIG_META_CSUM_ENABLE
278 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
279                                    struct jbd_commit_header *header)
280 {
281         if (!jbd_has_csum(&jbd_fs->sb))
282                 return true;
283
284         return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
285                                             header));
286 }
287 #else
288 #define jbd_verify_commit_csum(...) true
289 #endif
290
291 #if CONFIG_META_CSUM_ENABLE
292 /*
293  * NOTE: We only make use of @csum parameter when
294  *       JBD_FEATURE_COMPAT_CHECKSUM is enabled.
295  */
296 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
297                                uint32_t csum,
298                                uint32_t sequence)
299 {
300         uint32_t checksum = 0;
301
302         if (jbd_has_csum(&jbd_fs->sb)) {
303                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
304                 /* First calculate crc32c checksum against fs uuid */
305                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
306                                        sizeof(jbd_fs->sb.uuid));
307                 /* Then calculate crc32c checksum against sequence no. */
308                 checksum = ext4_crc32c(checksum, &sequence,
309                                 sizeof(uint32_t));
310                 /* Calculate crc32c checksum against tho whole block */
311                 checksum = ext4_crc32c(checksum, buf,
312                                 block_size);
313         } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
314                                      JBD_FEATURE_COMPAT_CHECKSUM)) {
315                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
316                 /* Calculate crc32c checksum against tho whole block */
317                 checksum = ext4_crc32(csum, buf,
318                                 block_size);
319         }
320         return checksum;
321 }
322 #else
323 #define jbd_block_csum(...) 0
324 #endif
325
326 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
327                                    uint32_t checksum)
328 {
329         int ver = jbd_has_csum(&jbd_fs->sb);
330         if (!ver)
331                 return;
332
333         if (ver == 2) {
334                 struct jbd_block_tag *tag = __tag;
335                 tag->checksum = (uint16_t)to_be32(checksum);
336         } else {
337                 struct jbd_block_tag3 *tag = __tag;
338                 tag->checksum = to_be32(checksum);
339         }
340 }
341
342 /**@brief  Write jbd superblock to disk.
343  * @param  jbd_fs jbd filesystem
344  * @param  s jbd superblock
345  * @return standard error code*/
346 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
347 {
348         int rc;
349         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
350         uint64_t offset;
351         ext4_fsblk_t fblock;
352         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
353         if (rc != EOK)
354                 return rc;
355
356         jbd_sb_csum_set(s);
357         offset = fblock * ext4_sb_get_block_size(&fs->sb);
358         return ext4_block_writebytes(fs->bdev, offset, s,
359                                      EXT4_SUPERBLOCK_SIZE);
360 }
361
362 /**@brief  Read jbd superblock from disk.
363  * @param  jbd_fs jbd filesystem
364  * @param  s jbd superblock
365  * @return standard error code*/
366 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
367 {
368         int rc;
369         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
370         uint64_t offset;
371         ext4_fsblk_t fblock;
372         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
373         if (rc != EOK)
374                 return rc;
375
376         offset = fblock * ext4_sb_get_block_size(&fs->sb);
377         return ext4_block_readbytes(fs->bdev, offset, s,
378                                     EXT4_SUPERBLOCK_SIZE);
379 }
380
381 /**@brief  Verify jbd superblock.
382  * @param  sb jbd superblock
383  * @return true if jbd superblock is valid */
384 static bool jbd_verify_sb(struct jbd_sb *sb)
385 {
386         struct jbd_bhdr *header = &sb->header;
387         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
388                 return false;
389
390         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
391             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
392                 return false;
393
394         return jbd_verify_sb_csum(sb);
395 }
396
397 /**@brief  Write back dirty jbd superblock to disk.
398  * @param  jbd_fs jbd filesystem
399  * @return standard error code*/
400 static int jbd_write_sb(struct jbd_fs *jbd_fs)
401 {
402         int rc = EOK;
403         if (jbd_fs->dirty) {
404                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
405                 if (rc != EOK)
406                         return rc;
407
408                 jbd_fs->dirty = false;
409         }
410         return rc;
411 }
412
413 /**@brief  Get reference to jbd filesystem.
414  * @param  fs Filesystem to load journal of
415  * @param  jbd_fs jbd filesystem
416  * @return standard error code*/
417 int jbd_get_fs(struct ext4_fs *fs,
418                struct jbd_fs *jbd_fs)
419 {
420         int rc;
421         uint32_t journal_ino;
422
423         memset(jbd_fs, 0, sizeof(struct jbd_fs));
424         /* See if there is journal inode on this filesystem.*/
425         /* FIXME: detection on existance ofbkejournal bdev is
426          *        missing.*/
427         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
428
429         rc = ext4_fs_get_inode_ref(fs,
430                                    journal_ino,
431                                    &jbd_fs->inode_ref);
432         if (rc != EOK) {
433                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
434                 return rc;
435         }
436         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
437         if (rc != EOK) {
438                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
439                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
440                 return rc;
441         }
442         if (!jbd_verify_sb(&jbd_fs->sb)) {
443                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
444                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
445                 rc = EIO;
446         }
447
448         return rc;
449 }
450
451 /**@brief  Put reference of jbd filesystem.
452  * @param  jbd_fs jbd filesystem
453  * @return standard error code*/
454 int jbd_put_fs(struct jbd_fs *jbd_fs)
455 {
456         int rc = EOK;
457         rc = jbd_write_sb(jbd_fs);
458
459         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
460         return rc;
461 }
462
463 /**@brief  Data block lookup helper.
464  * @param  jbd_fs jbd filesystem
465  * @param  iblock block index
466  * @param  fblock logical block address
467  * @return standard error code*/
468 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
469                    ext4_lblk_t iblock,
470                    ext4_fsblk_t *fblock)
471 {
472         int rc = ext4_fs_get_inode_dblk_idx(
473                         &jbd_fs->inode_ref,
474                         iblock,
475                         fblock,
476                         false);
477         return rc;
478 }
479
480 /**@brief   jbd block get function (through cache).
481  * @param   jbd_fs jbd filesystem
482  * @param   block block descriptor
483  * @param   fblock jbd logical block address
484  * @return  standard error code*/
485 static int jbd_block_get(struct jbd_fs *jbd_fs,
486                   struct ext4_block *block,
487                   ext4_fsblk_t fblock)
488 {
489         /* TODO: journal device. */
490         int rc;
491         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
492
493         /* Lookup the logical block address of
494          * fblock.*/
495         rc = jbd_inode_bmap(jbd_fs, iblock,
496                             &fblock);
497         if (rc != EOK)
498                 return rc;
499
500         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
501         rc = ext4_block_get(bdev, block, fblock);
502
503         /* If succeeded, mark buffer as BC_FLUSH to indicate
504          * that data should be written to disk immediately.*/
505         if (rc == EOK) {
506                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
507                 /* As we don't want to occupy too much space
508                  * in block cache, we set this buffer BC_TMP.*/
509                 ext4_bcache_set_flag(block->buf, BC_TMP);
510         }
511
512         return rc;
513 }
514
515 /**@brief   jbd block get function (through cache, don't read).
516  * @param   jbd_fs jbd filesystem
517  * @param   block block descriptor
518  * @param   fblock jbd logical block address
519  * @return  standard error code*/
520 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
521                          struct ext4_block *block,
522                          ext4_fsblk_t fblock)
523 {
524         /* TODO: journal device. */
525         int rc;
526         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
527         rc = jbd_inode_bmap(jbd_fs, iblock,
528                             &fblock);
529         if (rc != EOK)
530                 return rc;
531
532         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
533         rc = ext4_block_get_noread(bdev, block, fblock);
534         if (rc == EOK)
535                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
536
537         return rc;
538 }
539
540 /**@brief   jbd block set procedure (through cache).
541  * @param   jbd_fs jbd filesystem
542  * @param   block block descriptor
543  * @return  standard error code*/
544 static int jbd_block_set(struct jbd_fs *jbd_fs,
545                   struct ext4_block *block)
546 {
547         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
548                               block);
549 }
550
551 /**@brief  helper functions to calculate
552  *         block tag size, not including UUID part.
553  * @param  jbd_fs jbd filesystem
554  * @return tag size in bytes*/
555 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
556 {
557         int size;
558
559         /* It is very easy to deal with the case which
560          * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
561         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
562                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
563                 return sizeof(struct jbd_block_tag3);
564
565         size = sizeof(struct jbd_block_tag);
566
567         /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
568          * add 2 bytes to size.*/
569         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
570                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
571                 size += sizeof(uint16_t);
572
573         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
574                                      JBD_FEATURE_INCOMPAT_64BIT))
575                 return size;
576
577         /* If block number is 4 bytes in size,
578          * minus 4 bytes from size */
579         return size - sizeof(uint32_t);
580 }
581
582 /**@brief  Tag information. */
583 struct tag_info {
584         /**@brief  Tag size in bytes, including UUID part.*/
585         int tag_bytes;
586
587         /**@brief  block number stored in this tag.*/
588         ext4_fsblk_t block;
589
590         /**@brief  whether UUID part exists or not.*/
591         bool uuid_exist;
592
593         /**@brief  UUID content if UUID part exists.*/
594         uint8_t uuid[UUID_SIZE];
595
596         /**@brief  Is this the last tag? */
597         bool last_tag;
598
599         /**@brief  crc32c checksum. */
600         uint32_t checksum;
601 };
602
603 /**@brief  Extract information from a block tag.
604  * @param  __tag pointer to the block tag
605  * @param  tag_bytes block tag size of this jbd filesystem
606  * @param  remaining size in buffer containing the block tag
607  * @param  tag_info information of this tag.
608  * @return  EOK when succeed, otherwise return EINVAL.*/
609 static int
610 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
611                       void *__tag,
612                       int tag_bytes,
613                       int32_t remain_buf_size,
614                       struct tag_info *tag_info)
615 {
616         char *uuid_start;
617         tag_info->tag_bytes = tag_bytes;
618         tag_info->uuid_exist = false;
619         tag_info->last_tag = false;
620
621         /* See whether it is possible to hold a valid block tag.*/
622         if (remain_buf_size - tag_bytes < 0)
623                 return EINVAL;
624
625         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
626                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
627                 struct jbd_block_tag3 *tag = __tag;
628                 tag_info->block = jbd_get32(tag, blocknr);
629                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
630                                              JBD_FEATURE_INCOMPAT_64BIT))
631                          tag_info->block |=
632                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
633
634                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
635                         tag_info->block = 0;
636
637                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
638                         /* See whether it is possible to hold UUID part.*/
639                         if (remain_buf_size - tag_bytes < UUID_SIZE)
640                                 return EINVAL;
641
642                         uuid_start = (char *)tag + tag_bytes;
643                         tag_info->uuid_exist = true;
644                         tag_info->tag_bytes += UUID_SIZE;
645                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
646                 }
647
648                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
649                         tag_info->last_tag = true;
650
651         } else {
652                 struct jbd_block_tag *tag = __tag;
653                 tag_info->block = jbd_get32(tag, blocknr);
654                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
655                                              JBD_FEATURE_INCOMPAT_64BIT))
656                          tag_info->block |=
657                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
658
659                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
660                         tag_info->block = 0;
661
662                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
663                         /* See whether it is possible to hold UUID part.*/
664                         if (remain_buf_size - tag_bytes < UUID_SIZE)
665                                 return EINVAL;
666
667                         uuid_start = (char *)tag + tag_bytes;
668                         tag_info->uuid_exist = true;
669                         tag_info->tag_bytes += UUID_SIZE;
670                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
671                 }
672
673                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
674                         tag_info->last_tag = true;
675
676         }
677         return EOK;
678 }
679
680 /**@brief  Write information to a block tag.
681  * @param  __tag pointer to the block tag
682  * @param  remaining size in buffer containing the block tag
683  * @param  tag_info information of this tag.
684  * @return  EOK when succeed, otherwise return EINVAL.*/
685 static int
686 jbd_write_block_tag(struct jbd_fs *jbd_fs,
687                     void *__tag,
688                     int32_t remain_buf_size,
689                     struct tag_info *tag_info)
690 {
691         char *uuid_start;
692         int tag_bytes = jbd_tag_bytes(jbd_fs);
693
694         tag_info->tag_bytes = tag_bytes;
695
696         /* See whether it is possible to hold a valid block tag.*/
697         if (remain_buf_size - tag_bytes < 0)
698                 return EINVAL;
699
700         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
701                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
702                 struct jbd_block_tag3 *tag = __tag;
703                 memset(tag, 0, sizeof(struct jbd_block_tag3));
704                 jbd_set32(tag, blocknr, tag_info->block);
705                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
706                                              JBD_FEATURE_INCOMPAT_64BIT))
707                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
708
709                 if (tag_info->uuid_exist) {
710                         /* See whether it is possible to hold UUID part.*/
711                         if (remain_buf_size - tag_bytes < UUID_SIZE)
712                                 return EINVAL;
713
714                         uuid_start = (char *)tag + tag_bytes;
715                         tag_info->tag_bytes += UUID_SIZE;
716                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
717                 } else
718                         jbd_set32(tag, flags,
719                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
720
721                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
722
723                 if (tag_info->last_tag)
724                         jbd_set32(tag, flags,
725                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
726
727         } else {
728                 struct jbd_block_tag *tag = __tag;
729                 memset(tag, 0, sizeof(struct jbd_block_tag));
730                 jbd_set32(tag, blocknr, tag_info->block);
731                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
732                                              JBD_FEATURE_INCOMPAT_64BIT))
733                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
734
735                 if (tag_info->uuid_exist) {
736                         /* See whether it is possible to hold UUID part.*/
737                         if (remain_buf_size - tag_bytes < UUID_SIZE)
738                                 return EINVAL;
739
740                         uuid_start = (char *)tag + tag_bytes;
741                         tag_info->tag_bytes += UUID_SIZE;
742                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
743                 } else
744                         jbd_set16(tag, flags,
745                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
746
747                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
748
749                 if (tag_info->last_tag)
750                         jbd_set16(tag, flags,
751                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
752
753         }
754         return EOK;
755 }
756
757 /**@brief  Iterate all block tags in a block.
758  * @param  jbd_fs jbd filesystem
759  * @param  __tag_start pointer to the block
760  * @param  tag_tbl_size size of the block
761  * @param  func callback routine to indicate that
762  *         a block tag is found
763  * @param  arg additional argument to be passed to func */
764 static void
765 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
766                         void *__tag_start,
767                         int32_t tag_tbl_size,
768                         void (*func)(struct jbd_fs * jbd_fs,
769                                         ext4_fsblk_t block,
770                                         uint8_t *uuid,
771                                         void *arg),
772                         void *arg)
773 {
774         char *tag_start, *tag_ptr;
775         int tag_bytes = jbd_tag_bytes(jbd_fs);
776         tag_start = __tag_start;
777         tag_ptr = tag_start;
778
779         /* Cut off the size of block tail storing checksum. */
780         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
781                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
782             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
783                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
784                 tag_tbl_size -= sizeof(struct jbd_block_tail);
785
786         while (tag_tbl_size) {
787                 struct tag_info tag_info;
788                 int rc = jbd_extract_block_tag(jbd_fs,
789                                       tag_ptr,
790                                       tag_bytes,
791                                       tag_tbl_size,
792                                       &tag_info);
793                 if (rc != EOK)
794                         break;
795
796                 if (func)
797                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
798
799                 /* Stop the iteration when we reach the last tag. */
800                 if (tag_info.last_tag)
801                         break;
802
803                 tag_ptr += tag_info.tag_bytes;
804                 tag_tbl_size -= tag_info.tag_bytes;
805         }
806 }
807
808 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
809                                    ext4_fsblk_t block,
810                                    uint8_t *uuid,
811                                    void *arg)
812 {
813         uint32_t *iblock = arg;
814         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
815         (*iblock)++;
816         (void)jbd_fs;
817         (void)uuid;
818         return;
819 }
820
821 static struct revoke_entry *
822 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
823 {
824         struct revoke_entry tmp = {
825                 .block = block
826         };
827
828         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
829 }
830
831 /**@brief  Replay a block in a transaction.
832  * @param  jbd_fs jbd filesystem
833  * @param  block  block address to be replayed.*/
834 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
835                                   ext4_fsblk_t block,
836                                   uint8_t *uuid __unused,
837                                   void *__arg)
838 {
839         int r;
840         struct replay_arg *arg = __arg;
841         struct recover_info *info = arg->info;
842         uint32_t *this_block = arg->this_block;
843         struct revoke_entry *revoke_entry;
844         struct ext4_block journal_block, ext4_block;
845         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
846
847         (*this_block)++;
848
849         /* We replay this block only if the current transaction id
850          * is equal or greater than that in revoke entry.*/
851         revoke_entry = jbd_revoke_entry_lookup(info, block);
852         if (revoke_entry &&
853             arg->this_trans_id < revoke_entry->trans_id)
854                 return;
855
856         ext4_dbg(DEBUG_JBD,
857                  "Replaying block in block_tag: %" PRIu64 "\n",
858                  block);
859
860         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
861         if (r != EOK)
862                 return;
863
864         /* We need special treatment for ext4 superblock. */
865         if (block) {
866                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
867                 if (r != EOK) {
868                         jbd_block_set(jbd_fs, &journal_block);
869                         return;
870                 }
871
872                 memcpy(ext4_block.data,
873                         journal_block.data,
874                         jbd_get32(&jbd_fs->sb, blocksize));
875
876                 ext4_bcache_set_dirty(ext4_block.buf);
877                 ext4_block_set(fs->bdev, &ext4_block);
878         } else {
879                 uint16_t mount_count, state;
880                 mount_count = ext4_get16(&fs->sb, mount_count);
881                 state = ext4_get16(&fs->sb, state);
882
883                 memcpy(&fs->sb,
884                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
885                         EXT4_SUPERBLOCK_SIZE);
886
887                 /* Mark system as mounted */
888                 ext4_set16(&fs->sb, state, state);
889                 r = ext4_sb_write(fs->bdev, &fs->sb);
890                 if (r != EOK)
891                         return;
892
893                 /*Update mount count*/
894                 ext4_set16(&fs->sb, mount_count, mount_count);
895         }
896
897         jbd_block_set(jbd_fs, &journal_block);
898         
899         return;
900 }
901
902 /**@brief  Add block address to revoke tree, along with
903  *         its transaction id.
904  * @param  info  journal replay info
905  * @param  block  block address to be replayed.*/
906 static void jbd_add_revoke_block_tags(struct recover_info *info,
907                                       ext4_fsblk_t block)
908 {
909         struct revoke_entry *revoke_entry;
910
911         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
912         /* If the revoke entry with respect to the block address
913          * exists already, update its transaction id.*/
914         revoke_entry = jbd_revoke_entry_lookup(info, block);
915         if (revoke_entry) {
916                 revoke_entry->trans_id = info->this_trans_id;
917                 return;
918         }
919
920         revoke_entry = jbd_alloc_revoke_entry();
921         ext4_assert(revoke_entry);
922         revoke_entry->block = block;
923         revoke_entry->trans_id = info->this_trans_id;
924         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
925
926         return;
927 }
928
929 static void jbd_destroy_revoke_tree(struct recover_info *info)
930 {
931         while (!RB_EMPTY(&info->revoke_root)) {
932                 struct revoke_entry *revoke_entry =
933                         RB_MIN(jbd_revoke, &info->revoke_root);
934                 ext4_assert(revoke_entry);
935                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
936                 jbd_free_revoke_entry(revoke_entry);
937         }
938 }
939
940 /* Make sure we wrap around the log correctly! */
941 #define wrap(sb, var)                                           \
942 do {                                                                    \
943         if (var >= jbd_get32((sb), maxlen))                                     \
944                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
945 } while (0)
946
947 #define ACTION_SCAN 0
948 #define ACTION_REVOKE 1
949 #define ACTION_RECOVER 2
950
951 /**@brief  Add entries in a revoke block to revoke tree.
952  * @param  jbd_fs jbd filesystem
953  * @param  header revoke block header
954  * @param  recover_info  journal replay info*/
955 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
956                                   struct jbd_bhdr *header,
957                                   struct recover_info *info)
958 {
959         char *blocks_entry;
960         struct jbd_revoke_header *revoke_hdr =
961                 (struct jbd_revoke_header *)header;
962         uint32_t i, nr_entries, record_len = 4;
963
964         /* If we are working on a 64bit jbd filesystem, */
965         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
966                                      JBD_FEATURE_INCOMPAT_64BIT))
967                 record_len = 8;
968
969         nr_entries = (jbd_get32(revoke_hdr, count) -
970                         sizeof(struct jbd_revoke_header)) /
971                         record_len;
972
973         blocks_entry = (char *)(revoke_hdr + 1);
974
975         for (i = 0;i < nr_entries;i++) {
976                 if (record_len == 8) {
977                         uint64_t *blocks =
978                                 (uint64_t *)blocks_entry;
979                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
980                 } else {
981                         uint32_t *blocks =
982                                 (uint32_t *)blocks_entry;
983                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
984                 }
985                 blocks_entry += record_len;
986         }
987 }
988
989 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
990                                        struct jbd_bhdr *header,
991                                        uint32_t *iblock)
992 {
993         jbd_iterate_block_table(jbd_fs,
994                                 header + 1,
995                                 jbd_get32(&jbd_fs->sb, blocksize) -
996                                         sizeof(struct jbd_bhdr),
997                                 jbd_display_block_tags,
998                                 iblock);
999 }
1000
1001 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
1002                                         struct jbd_bhdr *header,
1003                                         struct replay_arg *arg)
1004 {
1005         jbd_iterate_block_table(jbd_fs,
1006                                 header + 1,
1007                                 jbd_get32(&jbd_fs->sb, blocksize) -
1008                                         sizeof(struct jbd_bhdr),
1009                                 jbd_replay_block_tags,
1010                                 arg);
1011 }
1012
1013 /**@brief  The core routine of journal replay.
1014  * @param  jbd_fs jbd filesystem
1015  * @param  recover_info  journal replay info
1016  * @param  action action needed to be taken
1017  * @return standard error code*/
1018 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1019                            struct recover_info *info,
1020                            int action)
1021 {
1022         int r = EOK;
1023         bool log_end = false;
1024         struct jbd_sb *sb = &jbd_fs->sb;
1025         uint32_t start_trans_id, this_trans_id;
1026         uint32_t start_block, this_block;
1027
1028         /* We start iterating valid blocks in the whole journal.*/
1029         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1030         start_block = this_block = jbd_get32(sb, start);
1031         if (action == ACTION_SCAN)
1032                 info->trans_cnt = 0;
1033         else if (!info->trans_cnt)
1034                 log_end = true;
1035
1036         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1037                             start_trans_id);
1038
1039         while (!log_end) {
1040                 struct ext4_block block;
1041                 struct jbd_bhdr *header;
1042                 /* If we are not scanning for the last
1043                  * valid transaction in the journal,
1044                  * we will stop when we reach the end of
1045                  * the journal.*/
1046                 if (action != ACTION_SCAN)
1047                         if (this_trans_id > info->last_trans_id) {
1048                                 log_end = true;
1049                                 continue;
1050                         }
1051
1052                 r = jbd_block_get(jbd_fs, &block, this_block);
1053                 if (r != EOK)
1054                         break;
1055
1056                 header = (struct jbd_bhdr *)block.data;
1057                 /* This block does not have a valid magic number,
1058                  * so we have reached the end of the journal.*/
1059                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1060                         jbd_block_set(jbd_fs, &block);
1061                         log_end = true;
1062                         continue;
1063                 }
1064
1065                 /* If the transaction id we found is not expected,
1066                  * we may have reached the end of the journal.
1067                  *
1068                  * If we are not scanning the journal, something
1069                  * bad might have taken place. :-( */
1070                 if (jbd_get32(header, sequence) != this_trans_id) {
1071                         if (action != ACTION_SCAN)
1072                                 r = EIO;
1073
1074                         jbd_block_set(jbd_fs, &block);
1075                         log_end = true;
1076                         continue;
1077                 }
1078
1079                 switch (jbd_get32(header, blocktype)) {
1080                 case JBD_DESCRIPTOR_BLOCK:
1081                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1082                                 ext4_dbg(DEBUG_JBD,
1083                                         DBG_WARN "Descriptor block checksum failed."
1084                                                 "Journal block: %" PRIu32"\n",
1085                                                 this_block);
1086                                 log_end = true;
1087                                 break;
1088                         }
1089                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1090                                             "trans_id: %" PRIu32"\n",
1091                                             this_block, this_trans_id);
1092                         if (action == ACTION_RECOVER) {
1093                                 struct replay_arg replay_arg;
1094                                 replay_arg.info = info;
1095                                 replay_arg.this_block = &this_block;
1096                                 replay_arg.this_trans_id = this_trans_id;
1097
1098                                 jbd_replay_descriptor_block(jbd_fs,
1099                                                 header, &replay_arg);
1100                         } else
1101                                 jbd_debug_descriptor_block(jbd_fs,
1102                                                 header, &this_block);
1103
1104                         break;
1105                 case JBD_COMMIT_BLOCK:
1106                         if (!jbd_verify_commit_csum(jbd_fs,
1107                                         (struct jbd_commit_header *)header)) {
1108                                 ext4_dbg(DEBUG_JBD,
1109                                         DBG_WARN "Commit block checksum failed."
1110                                                 "Journal block: %" PRIu32"\n",
1111                                                 this_block);
1112                                 log_end = true;
1113                                 break;
1114                         }
1115                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1116                                             "trans_id: %" PRIu32"\n",
1117                                             this_block, this_trans_id);
1118                         /* This is the end of a transaction,
1119                          * we may now proceed to the next transaction.
1120                          */
1121                         this_trans_id++;
1122                         info->trans_cnt++;
1123                         break;
1124                 case JBD_REVOKE_BLOCK:
1125                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1126                                 ext4_dbg(DEBUG_JBD,
1127                                         DBG_WARN "Revoke block checksum failed."
1128                                                 "Journal block: %" PRIu32"\n",
1129                                                 this_block);
1130                                 log_end = true;
1131                                 break;
1132                         }
1133                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1134                                             "trans_id: %" PRIu32"\n",
1135                                             this_block, this_trans_id);
1136                         if (action == ACTION_REVOKE) {
1137                                 info->this_trans_id = this_trans_id;
1138                                 jbd_build_revoke_tree(jbd_fs,
1139                                                 header, info);
1140                         }
1141                         break;
1142                 default:
1143                         log_end = true;
1144                         break;
1145                 }
1146                 jbd_block_set(jbd_fs, &block);
1147                 this_block++;
1148                 wrap(sb, this_block);
1149                 if (this_block == start_block)
1150                         log_end = true;
1151
1152         }
1153         ext4_dbg(DEBUG_JBD, "End of journal.\n");
1154         if (r == EOK && action == ACTION_SCAN) {
1155                 /* We have finished scanning the journal. */
1156                 info->start_trans_id = start_trans_id;
1157                 if (this_trans_id > start_trans_id)
1158                         info->last_trans_id = this_trans_id - 1;
1159                 else
1160                         info->last_trans_id = this_trans_id;
1161         }
1162
1163         return r;
1164 }
1165
1166 /**@brief  Replay journal.
1167  * @param  jbd_fs jbd filesystem
1168  * @return standard error code*/
1169 int jbd_recover(struct jbd_fs *jbd_fs)
1170 {
1171         int r;
1172         struct recover_info info;
1173         struct jbd_sb *sb = &jbd_fs->sb;
1174         if (!sb->start)
1175                 return EOK;
1176
1177         RB_INIT(&info.revoke_root);
1178
1179         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1180         if (r != EOK)
1181                 return r;
1182
1183         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1184         if (r != EOK)
1185                 return r;
1186
1187         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1188         if (r == EOK) {
1189                 /* If we successfully replay the journal,
1190                  * clear EXT4_FINCOM_RECOVER flag on the
1191                  * ext4 superblock, and set the start of
1192                  * journal to 0.*/
1193                 uint32_t features_incompatible =
1194                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1195                                    features_incompatible);
1196                 jbd_set32(&jbd_fs->sb, start, 0);
1197                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1198                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1199                            features_incompatible,
1200                            features_incompatible);
1201                 jbd_fs->dirty = true;
1202                 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1203                                   &jbd_fs->inode_ref.fs->sb);
1204         }
1205         jbd_destroy_revoke_tree(&info);
1206         return r;
1207 }
1208
1209 static void jbd_journal_write_sb(struct jbd_journal *journal)
1210 {
1211         struct jbd_fs *jbd_fs = journal->jbd_fs;
1212         jbd_set32(&jbd_fs->sb, start, journal->start);
1213         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1214         jbd_fs->dirty = true;
1215 }
1216
1217 /**@brief  Start accessing the journal.
1218  * @param  jbd_fs jbd filesystem
1219  * @param  journal current journal session
1220  * @return standard error code*/
1221 int jbd_journal_start(struct jbd_fs *jbd_fs,
1222                       struct jbd_journal *journal)
1223 {
1224         int r;
1225         uint32_t features_incompatible =
1226                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1227                                    features_incompatible);
1228         struct ext4_block block = EXT4_BLOCK_ZERO();
1229         features_incompatible |= EXT4_FINCOM_RECOVER;
1230         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1231                         features_incompatible,
1232                         features_incompatible);
1233         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1234                         &jbd_fs->inode_ref.fs->sb);
1235         if (r != EOK)
1236                 return r;
1237
1238         journal->first = jbd_get32(&jbd_fs->sb, first);
1239         journal->start = journal->first;
1240         journal->last = journal->first;
1241         journal->trans_id = 1;
1242         journal->alloc_trans_id = 1;
1243
1244         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1245
1246         r = jbd_block_get_noread(jbd_fs,
1247                          &block,
1248                          journal->start);
1249         if (r != EOK) {
1250                 memset(journal, 0, sizeof(struct jbd_journal));
1251                 return r;
1252         }
1253         memset(block.data, 0, journal->block_size);
1254         ext4_bcache_set_dirty(block.buf);
1255         r = jbd_block_set(jbd_fs, &block);
1256         if (r != EOK) {
1257                 memset(journal, 0, sizeof(struct jbd_journal));
1258                 return r;
1259         }
1260
1261         TAILQ_INIT(&journal->trans_queue);
1262         TAILQ_INIT(&journal->cp_queue);
1263         RB_INIT(&journal->block_rec_root);
1264         journal->jbd_fs = jbd_fs;
1265         jbd_journal_write_sb(journal);
1266         return jbd_write_sb(jbd_fs);
1267 }
1268
1269 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1270                           struct ext4_buf *buf __unused,
1271                           int res,
1272                           void *arg);
1273
1274 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1275 {
1276         struct jbd_buf *jbd_buf, *tmp;
1277         struct jbd_journal *journal = trans->journal;
1278         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1279         void *tmp_data = malloc(journal->block_size);
1280         if (!tmp_data)
1281                 return;
1282
1283         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1284                         tmp) {
1285                 struct ext4_buf *buf = jbd_buf->block_rec->buf;
1286                 /* The buffer in memory is still dirty. */
1287                 if (buf) {
1288                         if (jbd_buf->block_rec->trans != trans) {
1289                                 int r;
1290                                 struct ext4_block jbd_block = EXT4_BLOCK_ZERO();
1291                                 struct jbd_buf *orig_arg = buf->end_write_arg;
1292                                 ext4_assert(ext4_block_get(fs->bdev,
1293                                                         &jbd_block,
1294                                                         jbd_buf->jbd_lba) == EOK);
1295                                 memcpy(tmp_data, jbd_block.data,
1296                                                 journal->block_size);
1297                                 ext4_block_set(fs->bdev, &jbd_block);
1298                                 r = ext4_blocks_set_direct(fs->bdev, tmp_data,
1299                                                 buf->lba, 1);
1300                                 jbd_trans_end_write(fs->bdev->bc, buf, r, jbd_buf);
1301                                 buf->end_write = jbd_trans_end_write;
1302                                 buf->end_write_arg = orig_arg;
1303                                 orig_arg->block_rec->buf = buf;
1304                         } else
1305                                 ext4_block_flush_buf(fs->bdev, buf);
1306
1307                 }
1308         }
1309
1310         free(tmp_data);
1311 }
1312
1313 static void
1314 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1315                              struct jbd_trans *trans)
1316 {
1317         journal->start = trans->start_iblock +
1318                 trans->alloc_blocks;
1319         wrap(&journal->jbd_fs->sb, journal->start);
1320         journal->trans_id = trans->trans_id + 1;
1321         jbd_journal_free_trans(journal,
1322                         trans, false);
1323         jbd_journal_write_sb(journal);
1324 }
1325
1326 static void
1327 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1328                            bool flush)
1329 {
1330         struct jbd_trans *trans;
1331         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1332                 if (!trans->data_cnt) {
1333                         TAILQ_REMOVE(&journal->cp_queue,
1334                                         trans,
1335                                         trans_node);
1336                         jbd_journal_skip_pure_revoke(journal, trans);
1337                 } else {
1338                         if (trans->data_cnt ==
1339                                         trans->written_cnt) {
1340                                 journal->start =
1341                                         trans->start_iblock +
1342                                         trans->alloc_blocks;
1343                                 wrap(&journal->jbd_fs->sb,
1344                                                 journal->start);
1345                                 journal->trans_id =
1346                                         trans->trans_id + 1;
1347                                 TAILQ_REMOVE(&journal->cp_queue,
1348                                                 trans,
1349                                                 trans_node);
1350                                 jbd_journal_free_trans(journal,
1351                                                 trans,
1352                                                 false);
1353                                 jbd_journal_write_sb(journal);
1354                         } else if (!flush) {
1355                                 journal->start =
1356                                         trans->start_iblock;
1357                                 wrap(&journal->jbd_fs->sb,
1358                                                 journal->start);
1359                                 journal->trans_id =
1360                                         trans->trans_id;
1361                                 jbd_journal_write_sb(journal);
1362                                 break;
1363                         } else
1364                                 jbd_journal_flush_trans(trans);
1365                 }
1366         }
1367 }
1368
1369 /**@brief  Stop accessing the journal.
1370  * @param  journal current journal session
1371  * @return standard error code*/
1372 int jbd_journal_stop(struct jbd_journal *journal)
1373 {
1374         int r;
1375         struct jbd_fs *jbd_fs = journal->jbd_fs;
1376         uint32_t features_incompatible;
1377
1378         /* Commit all the transactions to the journal.*/
1379         jbd_journal_commit_all(journal);
1380
1381         /* Make sure that journalled content have reached
1382          * the disk.*/
1383         jbd_journal_purge_cp_trans(journal, true);
1384
1385         /* There should be no block record in this journal
1386          * session. */
1387         if (!RB_EMPTY(&journal->block_rec_root))
1388                 ext4_dbg(DEBUG_JBD,
1389                          DBG_WARN "There are still block records "
1390                                   "in this journal session!\n");
1391
1392         features_incompatible =
1393                 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1394                            features_incompatible);
1395         features_incompatible &= ~EXT4_FINCOM_RECOVER;
1396         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1397                         features_incompatible,
1398                         features_incompatible);
1399         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1400                         &jbd_fs->inode_ref.fs->sb);
1401         if (r != EOK)
1402                 return r;
1403
1404         journal->start = 0;
1405         journal->trans_id = 0;
1406         jbd_journal_write_sb(journal);
1407         return jbd_write_sb(journal->jbd_fs);
1408 }
1409
1410 /**@brief  Allocate a block in the journal.
1411  * @param  journal current journal session
1412  * @param  trans transaction
1413  * @return allocated block address*/
1414 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1415                                         struct jbd_trans *trans)
1416 {
1417         uint32_t start_block;
1418
1419         start_block = journal->last++;
1420         trans->alloc_blocks++;
1421         wrap(&journal->jbd_fs->sb, journal->last);
1422         
1423         /* If there is no space left, flush all journalled
1424          * blocks to disk first.*/
1425         if (journal->last == journal->start)
1426                 jbd_journal_purge_cp_trans(journal, true);
1427
1428         return start_block;
1429 }
1430
1431 /**@brief  Allocate a new transaction
1432  * @param  journal current journal session
1433  * @return transaction allocated*/
1434 struct jbd_trans *
1435 jbd_journal_new_trans(struct jbd_journal *journal)
1436 {
1437         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1438         if (!trans)
1439                 return NULL;
1440
1441         /* We will assign a trans_id to this transaction,
1442          * once it has been committed.*/
1443         trans->journal = journal;
1444         trans->data_csum = EXT4_CRC32_INIT;
1445         trans->error = EOK;
1446         TAILQ_INIT(&trans->buf_queue);
1447         return trans;
1448 }
1449
1450 /**@brief  gain access to it before making any modications.
1451  * @param  journal current journal session
1452  * @param  trans transaction
1453  * @param  block descriptor
1454  * @return standard error code.*/
1455 int jbd_trans_get_access(struct jbd_journal *journal,
1456                          struct jbd_trans *trans,
1457                          struct ext4_block *block)
1458 {
1459         int r = EOK;
1460         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1461         struct jbd_buf *jbd_buf = block->buf->end_write_arg;
1462
1463         /* If the buffer has already been modified, we should
1464          * flush dirty data in this buffer to disk.*/
1465         if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1466             block->buf->end_write == jbd_trans_end_write) {
1467                 ext4_assert(jbd_buf);
1468                 if (jbd_buf->trans != trans)
1469                         r = ext4_block_flush_buf(fs->bdev, block->buf);
1470
1471         }
1472         return r;
1473 }
1474
1475 static struct jbd_block_rec *
1476 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1477                            ext4_fsblk_t lba)
1478 {
1479         struct jbd_block_rec tmp = {
1480                 .lba = lba
1481         };
1482
1483         return RB_FIND(jbd_block,
1484                        &journal->block_rec_root,
1485                        &tmp);
1486 }
1487
1488 static void
1489 jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
1490                            struct jbd_trans *new_trans,
1491                            struct ext4_buf *new_buf)
1492 {
1493         LIST_REMOVE(block_rec, tbrec_node);
1494         /* Now this block record belongs to this transaction. */
1495         LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
1496         block_rec->trans = new_trans;
1497         block_rec->buf = new_buf;
1498 }
1499
1500 static inline struct jbd_block_rec *
1501 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1502                            ext4_fsblk_t lba,
1503                            struct ext4_buf *buf)
1504 {
1505         struct jbd_block_rec *block_rec;
1506         block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1507         if (block_rec) {
1508                 jbd_trans_change_ownership(block_rec, trans, buf);
1509                 return block_rec;
1510         }
1511         block_rec = calloc(1, sizeof(struct jbd_block_rec));
1512         if (!block_rec)
1513                 return NULL;
1514
1515         block_rec->lba = lba;
1516         block_rec->buf = buf;
1517         block_rec->trans = trans;
1518         TAILQ_INIT(&block_rec->dirty_buf_queue);
1519         LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1520         RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1521         return block_rec;
1522 }
1523
1524 static void
1525 jbd_trans_finish_callback(struct jbd_journal *journal,
1526                           const struct jbd_trans *trans,
1527                           struct jbd_block_rec *block_rec,
1528                           bool abort)
1529 {
1530         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1531         if (block_rec->trans != trans)
1532                 return;
1533
1534         if (!abort) {
1535                 struct jbd_buf *jbd_buf, *tmp;
1536                 TAILQ_FOREACH_SAFE(jbd_buf,
1537                                 &block_rec->dirty_buf_queue,
1538                                 dirty_buf_node,
1539                                 tmp) {
1540                         /* All we need is a fake ext4_buf. */
1541                         struct ext4_buf buf;
1542
1543                         jbd_trans_end_write(fs->bdev->bc,
1544                                         &buf,
1545                                         EOK,
1546                                         jbd_buf);
1547                 }
1548         } else {
1549                 struct jbd_buf *jbd_buf;
1550                 struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
1551                                   block = EXT4_BLOCK_ZERO();
1552                 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
1553                                 jbd_buf_dirty);
1554                 if (jbd_buf) {
1555                         ext4_assert(ext4_block_get(fs->bdev,
1556                                                 &jbd_block,
1557                                                 jbd_buf->jbd_lba) == EOK);
1558                         ext4_assert(ext4_block_get_noread(fs->bdev,
1559                                                 &block,
1560                                                 block_rec->lba) == EOK);
1561                         memcpy(block.data, jbd_block.data,
1562                                         journal->block_size);
1563
1564                         jbd_trans_change_ownership(block_rec,
1565                                         jbd_buf->trans, block.buf);
1566
1567                         block.buf->end_write = jbd_trans_end_write;
1568                         block.buf->end_write_arg = jbd_buf;
1569
1570                         ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
1571                         ext4_bcache_set_dirty(block.buf);
1572
1573                         ext4_block_set(fs->bdev, &jbd_block);
1574                         ext4_block_set(fs->bdev, &block);
1575                         return;
1576                 }
1577         }
1578 }
1579
1580 static inline void
1581 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1582                            struct jbd_block_rec *block_rec,
1583                            struct jbd_trans *trans)
1584 {
1585         /* If this block record doesn't belong to this transaction,
1586          * give up.*/
1587         if (block_rec->trans == trans) {
1588                 LIST_REMOVE(block_rec, tbrec_node);
1589                 RB_REMOVE(jbd_block,
1590                                 &journal->block_rec_root,
1591                                 block_rec);
1592                 free(block_rec);
1593         }
1594 }
1595
1596 /**@brief  Add block to a transaction and mark it dirty.
1597  * @param  trans transaction
1598  * @param  block block descriptor
1599  * @return standard error code*/
1600 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1601                               struct ext4_block *block)
1602 {
1603         struct jbd_buf *buf;
1604
1605         struct jbd_block_rec *block_rec;
1606         if (block->buf->end_write == jbd_trans_end_write) {
1607                 buf = block->buf->end_write_arg;
1608                 if (buf && buf->trans == trans)
1609                         return EOK;
1610         }
1611         buf = calloc(1, sizeof(struct jbd_buf));
1612         if (!buf)
1613                 return ENOMEM;
1614
1615         if ((block_rec = jbd_trans_insert_block_rec(trans,
1616                                         block->lb_id,
1617                                         block->buf)) == NULL) {
1618                 free(buf);
1619                 return ENOMEM;
1620         }
1621
1622         TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
1623                         buf,
1624                         dirty_buf_node);
1625
1626         buf->block_rec = block_rec;
1627         buf->trans = trans;
1628         buf->block = *block;
1629         ext4_bcache_inc_ref(block->buf);
1630
1631         /* If the content reach the disk, notify us
1632          * so that we may do a checkpoint. */
1633         block->buf->end_write = jbd_trans_end_write;
1634         block->buf->end_write_arg = buf;
1635
1636         trans->data_cnt++;
1637         TAILQ_INSERT_HEAD(&trans->buf_queue, buf, buf_node);
1638
1639         ext4_bcache_set_dirty(block->buf);
1640         return EOK;
1641 }
1642
1643 /**@brief  Add block to be revoked to a transaction
1644  * @param  trans transaction
1645  * @param  lba logical block address
1646  * @return standard error code*/
1647 int jbd_trans_revoke_block(struct jbd_trans *trans,
1648                            ext4_fsblk_t lba)
1649 {
1650         struct jbd_revoke_rec *rec =
1651                 calloc(1, sizeof(struct jbd_revoke_rec));
1652         if (!rec)
1653                 return ENOMEM;
1654
1655         rec->lba = lba;
1656         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1657         return EOK;
1658 }
1659
1660 /**@brief  Try to add block to be revoked to a transaction.
1661  *         If @lba still remains in an transaction on checkpoint
1662  *         queue, add @lba as a revoked block to the transaction.
1663  * @param  trans transaction
1664  * @param  lba logical block address
1665  * @return standard error code*/
1666 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1667                                ext4_fsblk_t lba)
1668 {
1669         int r = EOK;
1670         struct jbd_journal *journal = trans->journal;
1671         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1672         struct jbd_block_rec *block_rec =
1673                 jbd_trans_block_rec_lookup(journal, lba);
1674
1675         /* Make sure we don't flush any buffers belong to this transaction. */
1676         if (block_rec && block_rec->trans != trans) {
1677                 /* If the buffer has not been flushed yet, flush it now. */
1678                 if (block_rec->buf) {
1679                         r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
1680                         if (r != EOK)
1681                                 return r;
1682
1683                 }
1684
1685                 jbd_trans_revoke_block(trans, lba);
1686         }
1687
1688         return EOK;
1689 }
1690
1691 /**@brief  Free a transaction
1692  * @param  journal current journal session
1693  * @param  trans transaction
1694  * @param  abort discard all the modifications on the block?
1695  * @return standard error code*/
1696 void jbd_journal_free_trans(struct jbd_journal *journal,
1697                             struct jbd_trans *trans,
1698                             bool abort)
1699 {
1700         struct jbd_buf *jbd_buf, *tmp;
1701         struct jbd_revoke_rec *rec, *tmp2;
1702         struct jbd_block_rec *block_rec, *tmp3;
1703         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1704         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1705                           tmp) {
1706                 block_rec = jbd_buf->block_rec;
1707                 if (abort) {
1708                         jbd_buf->block.buf->end_write = NULL;
1709                         jbd_buf->block.buf->end_write_arg = NULL;
1710                         ext4_bcache_clear_dirty(jbd_buf->block.buf);
1711                         ext4_block_set(fs->bdev, &jbd_buf->block);
1712                 }
1713
1714                 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1715                         jbd_buf,
1716                         dirty_buf_node);
1717                 jbd_trans_finish_callback(journal,
1718                                 trans,
1719                                 block_rec,
1720                                 abort);
1721                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1722                 free(jbd_buf);
1723         }
1724         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1725                           tmp2) {
1726                 LIST_REMOVE(rec, revoke_node);
1727                 free(rec);
1728         }
1729         LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1730                           tmp3) {
1731                 jbd_trans_remove_block_rec(journal, block_rec, trans);
1732         }
1733
1734         free(trans);
1735 }
1736
1737 /**@brief  Write commit block for a transaction
1738  * @param  trans transaction
1739  * @return standard error code*/
1740 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1741 {
1742         int rc;
1743         struct jbd_commit_header *header;
1744         uint32_t commit_iblock = 0;
1745         struct ext4_block commit_block;
1746         struct jbd_journal *journal = trans->journal;
1747
1748         commit_iblock = jbd_journal_alloc_block(journal, trans);
1749         rc = jbd_block_get_noread(journal->jbd_fs,
1750                         &commit_block, commit_iblock);
1751         if (rc != EOK)
1752                 return rc;
1753
1754         header = (struct jbd_commit_header *)commit_block.data;
1755         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1756         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1757         jbd_set32(&header->header, sequence, trans->trans_id);
1758
1759         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1760                                 JBD_FEATURE_COMPAT_CHECKSUM)) {
1761                 jbd_set32(header, chksum_type, JBD_CRC32_CHKSUM);
1762                 jbd_set32(header, chksum_size, JBD_CRC32_CHKSUM_SIZE);
1763                 jbd_set32(header, chksum[0], trans->data_csum);
1764         }
1765         jbd_commit_csum_set(journal->jbd_fs, header);
1766         ext4_bcache_set_dirty(commit_block.buf);
1767         rc = jbd_block_set(journal->jbd_fs, &commit_block);
1768         if (rc != EOK)
1769                 return rc;
1770
1771         return EOK;
1772 }
1773
1774 /**@brief  Write descriptor block for a transaction
1775  * @param  journal current journal session
1776  * @param  trans transaction
1777  * @return standard error code*/
1778 static int jbd_journal_prepare(struct jbd_journal *journal,
1779                                struct jbd_trans *trans)
1780 {
1781         int rc = EOK, i = 0;
1782         int32_t tag_tbl_size;
1783         uint32_t desc_iblock = 0;
1784         uint32_t data_iblock = 0;
1785         char *tag_start = NULL, *tag_ptr = NULL;
1786         struct jbd_buf *jbd_buf, *tmp;
1787         struct ext4_block desc_block, data_block;
1788         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1789         uint32_t checksum = EXT4_CRC32_INIT;
1790
1791         /* Try to remove any non-dirty buffers from the tail of
1792          * buf_queue. */
1793         TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1794                         jbd_trans_buf, buf_node, tmp) {
1795                 /* We stop the iteration when we find a dirty buffer. */
1796                 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1797                                         BC_DIRTY))
1798                         break;
1799         
1800                 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1801                         jbd_buf,
1802                         dirty_buf_node);
1803
1804                 jbd_trans_finish_callback(journal,
1805                                 trans,
1806                                 jbd_buf->block_rec,
1807                                 false);
1808
1809                 /* The buffer has not been modified, just release
1810                  * that jbd_buf. */
1811                 jbd_trans_remove_block_rec(journal,
1812                                 jbd_buf->block_rec, trans);
1813                 trans->data_cnt--;
1814
1815                 jbd_buf->block.buf->end_write = NULL;
1816                 jbd_buf->block.buf->end_write_arg = NULL;
1817                 ext4_block_set(fs->bdev, &jbd_buf->block);
1818                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1819                 free(jbd_buf);
1820         }
1821
1822         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1823                 struct tag_info tag_info;
1824                 bool uuid_exist = false;
1825                 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1826                                            BC_DIRTY)) {
1827                         TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1828                                         jbd_buf,
1829                                         dirty_buf_node);
1830
1831                         jbd_trans_finish_callback(journal,
1832                                         trans,
1833                                         jbd_buf->block_rec,
1834                                         false);
1835
1836                         /* The buffer has not been modified, just release
1837                          * that jbd_buf. */
1838                         jbd_trans_remove_block_rec(journal,
1839                                         jbd_buf->block_rec, trans);
1840                         trans->data_cnt--;
1841
1842                         jbd_buf->block.buf->end_write = NULL;
1843                         jbd_buf->block.buf->end_write_arg = NULL;
1844                         ext4_block_set(fs->bdev, &jbd_buf->block);
1845                         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1846                         free(jbd_buf);
1847                         continue;
1848                 }
1849                 checksum = jbd_block_csum(journal->jbd_fs,
1850                                           jbd_buf->block.data,
1851                                           checksum,
1852                                           trans->trans_id);
1853 again:
1854                 if (!desc_iblock) {
1855                         struct jbd_bhdr *bhdr;
1856                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1857                         rc = jbd_block_get_noread(journal->jbd_fs,
1858                                            &desc_block, desc_iblock);
1859                         if (rc != EOK)
1860                                 break;
1861
1862                         ext4_bcache_set_dirty(desc_block.buf);
1863
1864                         bhdr = (struct jbd_bhdr *)desc_block.data;
1865                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1866                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1867                         jbd_set32(bhdr, sequence, trans->trans_id);
1868
1869                         tag_start = (char *)(bhdr + 1);
1870                         tag_ptr = tag_start;
1871                         uuid_exist = true;
1872                         tag_tbl_size = journal->block_size -
1873                                 sizeof(struct jbd_bhdr);
1874
1875                         if (jbd_has_csum(&journal->jbd_fs->sb))
1876                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1877
1878                         if (!trans->start_iblock)
1879                                 trans->start_iblock = desc_iblock;
1880
1881                 }
1882                 tag_info.block = jbd_buf->block.lb_id;
1883                 tag_info.uuid_exist = uuid_exist;
1884                 if (i == trans->data_cnt - 1)
1885                         tag_info.last_tag = true;
1886                 else
1887                         tag_info.last_tag = false;
1888
1889                 tag_info.checksum = checksum;
1890
1891                 if (uuid_exist)
1892                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1893                                         UUID_SIZE);
1894
1895                 rc = jbd_write_block_tag(journal->jbd_fs,
1896                                 tag_ptr,
1897                                 tag_tbl_size,
1898                                 &tag_info);
1899                 if (rc != EOK) {
1900                         jbd_meta_csum_set(journal->jbd_fs,
1901                                         (struct jbd_bhdr *)desc_block.data);
1902                         jbd_block_set(journal->jbd_fs, &desc_block);
1903                         desc_iblock = 0;
1904                         goto again;
1905                 }
1906
1907                 data_iblock = jbd_journal_alloc_block(journal, trans);
1908                 rc = jbd_block_get_noread(journal->jbd_fs,
1909                                 &data_block, data_iblock);
1910                 if (rc != EOK)
1911                         break;
1912
1913                 ext4_bcache_set_dirty(data_block.buf);
1914
1915                 memcpy(data_block.data, jbd_buf->block.data,
1916                         journal->block_size);
1917                 jbd_buf->jbd_lba = data_block.lb_id;
1918
1919                 rc = jbd_block_set(journal->jbd_fs, &data_block);
1920                 if (rc != EOK)
1921                         break;
1922
1923                 tag_ptr += tag_info.tag_bytes;
1924                 tag_tbl_size -= tag_info.tag_bytes;
1925
1926                 i++;
1927         }
1928         if (rc == EOK && desc_iblock) {
1929                 jbd_meta_csum_set(journal->jbd_fs,
1930                                 (struct jbd_bhdr *)desc_block.data);
1931                 trans->data_csum = checksum;
1932                 jbd_block_set(journal->jbd_fs, &desc_block);
1933         }
1934
1935         return rc;
1936 }
1937
1938 /**@brief  Write revoke block for a transaction
1939  * @param  journal current journal session
1940  * @param  trans transaction
1941  * @return standard error code*/
1942 static int
1943 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1944                            struct jbd_trans *trans)
1945 {
1946         int rc = EOK, i = 0;
1947         int32_t tag_tbl_size;
1948         uint32_t desc_iblock = 0;
1949         char *blocks_entry = NULL;
1950         struct jbd_revoke_rec *rec, *tmp;
1951         struct ext4_block desc_block;
1952         struct jbd_revoke_header *header = NULL;
1953         int32_t record_len = 4;
1954
1955         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1956                                      JBD_FEATURE_INCOMPAT_64BIT))
1957                 record_len = 8;
1958
1959         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1960                           tmp) {
1961 again:
1962                 if (!desc_iblock) {
1963                         struct jbd_bhdr *bhdr;
1964                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1965                         rc = jbd_block_get_noread(journal->jbd_fs,
1966                                            &desc_block, desc_iblock);
1967                         if (rc != EOK) {
1968                                 break;
1969                         }
1970
1971                         ext4_bcache_set_dirty(desc_block.buf);
1972
1973                         bhdr = (struct jbd_bhdr *)desc_block.data;
1974                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1975                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1976                         jbd_set32(bhdr, sequence, trans->trans_id);
1977                         
1978                         header = (struct jbd_revoke_header *)bhdr;
1979                         blocks_entry = (char *)(header + 1);
1980                         tag_tbl_size = journal->block_size -
1981                                 sizeof(struct jbd_revoke_header);
1982
1983                         if (jbd_has_csum(&journal->jbd_fs->sb))
1984                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1985
1986                         if (!trans->start_iblock)
1987                                 trans->start_iblock = desc_iblock;
1988
1989                 }
1990
1991                 if (tag_tbl_size < record_len) {
1992                         jbd_set32(header, count,
1993                                   journal->block_size - tag_tbl_size);
1994                         jbd_meta_csum_set(journal->jbd_fs,
1995                                         (struct jbd_bhdr *)desc_block.data);
1996                         jbd_block_set(journal->jbd_fs, &desc_block);
1997                         desc_iblock = 0;
1998                         header = NULL;
1999                         goto again;
2000                 }
2001                 if (record_len == 8) {
2002                         uint64_t *blocks =
2003                                 (uint64_t *)blocks_entry;
2004                         *blocks = to_be64(rec->lba);
2005                 } else {
2006                         uint32_t *blocks =
2007                                 (uint32_t *)blocks_entry;
2008                         *blocks = to_be32(rec->lba);
2009                 }
2010                 blocks_entry += record_len;
2011                 tag_tbl_size -= record_len;
2012
2013                 i++;
2014         }
2015         if (rc == EOK && desc_iblock) {
2016                 if (header != NULL)
2017                         jbd_set32(header, count,
2018                                   journal->block_size - tag_tbl_size);
2019
2020                 jbd_meta_csum_set(journal->jbd_fs,
2021                                 (struct jbd_bhdr *)desc_block.data);
2022                 jbd_block_set(journal->jbd_fs, &desc_block);
2023         }
2024
2025         return rc;
2026 }
2027
2028 /**@brief  Submit the transaction to transaction queue.
2029  * @param  journal current journal session
2030  * @param  trans transaction*/
2031 void
2032 jbd_journal_submit_trans(struct jbd_journal *journal,
2033                          struct jbd_trans *trans)
2034 {
2035         TAILQ_INSERT_TAIL(&journal->trans_queue,
2036                           trans,
2037                           trans_node);
2038 }
2039
2040 /**@brief  Put references of block descriptors in a transaction.
2041  * @param  journal current journal session
2042  * @param  trans transaction*/
2043 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
2044 {
2045         struct jbd_buf *jbd_buf, *tmp;
2046         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
2047         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
2048                         tmp) {
2049                 struct ext4_block block = jbd_buf->block;
2050                 ext4_block_set(fs->bdev, &block);
2051         }
2052 }
2053
2054 /**@brief  Update the start block of the journal when
2055  *         all the contents in a transaction reach the disk.*/
2056 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
2057                           struct ext4_buf *buf,
2058                           int res,
2059                           void *arg)
2060 {
2061         struct jbd_buf *jbd_buf = arg;
2062         struct jbd_trans *trans = jbd_buf->trans;
2063         struct jbd_journal *journal = trans->journal;
2064         bool first_in_queue =
2065                 trans == TAILQ_FIRST(&journal->cp_queue);
2066         if (res != EOK)
2067                 trans->error = res;
2068
2069         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
2070         TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
2071                         jbd_buf,
2072                         dirty_buf_node);
2073         jbd_trans_finish_callback(journal,
2074                         trans,
2075                         jbd_buf->block_rec,
2076                         false);
2077         jbd_buf->block_rec->buf = NULL;
2078         free(jbd_buf);
2079
2080         /* Clear the end_write and end_write_arg fields. */
2081         buf->end_write = NULL;
2082         buf->end_write_arg = NULL;
2083
2084         trans->written_cnt++;
2085         if (trans->written_cnt == trans->data_cnt) {
2086                 /* If it is the first transaction on checkpoint queue,
2087                  * we will shift the start of the journal to the next
2088                  * transaction, and remove subsequent written
2089                  * transactions from checkpoint queue until we find
2090                  * an unwritten one. */
2091                 if (first_in_queue) {
2092                         journal->start = trans->start_iblock +
2093                                 trans->alloc_blocks;
2094                         wrap(&journal->jbd_fs->sb, journal->start);
2095                         journal->trans_id = trans->trans_id + 1;
2096                         TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
2097                         jbd_journal_free_trans(journal, trans, false);
2098
2099                         jbd_journal_purge_cp_trans(journal, false);
2100                         jbd_journal_write_sb(journal);
2101                         jbd_write_sb(journal->jbd_fs);
2102                 }
2103         }
2104 }
2105
2106 /**@brief  Commit a transaction to the journal immediately.
2107  * @param  journal current journal session
2108  * @param  trans transaction
2109  * @return standard error code*/
2110 int jbd_journal_commit_trans(struct jbd_journal *journal,
2111                              struct jbd_trans *trans)
2112 {
2113         int rc = EOK;
2114         uint32_t last = journal->last;
2115
2116         trans->trans_id = journal->alloc_trans_id;
2117         rc = jbd_journal_prepare(journal, trans);
2118         if (rc != EOK)
2119                 goto Finish;
2120
2121         rc = jbd_journal_prepare_revoke(journal, trans);
2122         if (rc != EOK)
2123                 goto Finish;
2124
2125         if (TAILQ_EMPTY(&trans->buf_queue) &&
2126             LIST_EMPTY(&trans->revoke_list)) {
2127                 /* Since there are no entries in both buffer list
2128                  * and revoke entry list, we do not consider trans as
2129                  * complete transaction and just return EOK.*/
2130                 jbd_journal_free_trans(journal, trans, false);
2131                 goto Finish;
2132         }
2133
2134         rc = jbd_trans_write_commit_block(trans);
2135         if (rc != EOK)
2136                 goto Finish;
2137
2138         journal->alloc_trans_id++;
2139         if (TAILQ_EMPTY(&journal->cp_queue)) {
2140                 if (trans->data_cnt) {
2141                         journal->start = trans->start_iblock;
2142                         wrap(&journal->jbd_fs->sb, journal->start);
2143                         journal->trans_id = trans->trans_id;
2144                         jbd_journal_write_sb(journal);
2145                         jbd_write_sb(journal->jbd_fs);
2146                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2147                                         trans_node);
2148                         jbd_journal_cp_trans(journal, trans);
2149                 } else {
2150                         journal->start = trans->start_iblock +
2151                                 trans->alloc_blocks;
2152                         wrap(&journal->jbd_fs->sb, journal->start);
2153                         journal->trans_id = trans->trans_id + 1;
2154                         jbd_journal_write_sb(journal);
2155                         jbd_journal_free_trans(journal, trans, false);
2156                 }
2157         } else {
2158                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2159                                 trans_node);
2160                 if (trans->data_cnt)
2161                         jbd_journal_cp_trans(journal, trans);
2162
2163         }
2164 Finish:
2165         if (rc != EOK) {
2166                 journal->last = last;
2167                 jbd_journal_free_trans(journal, trans, true);
2168         }
2169         return rc;
2170 }
2171
2172 /**@brief  Commit one transaction on transaction queue
2173  *         to the journal.
2174  * @param  journal current journal session.*/
2175 void jbd_journal_commit_one(struct jbd_journal *journal)
2176 {
2177         struct jbd_trans *trans;
2178
2179         if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
2180                 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
2181                 jbd_journal_commit_trans(journal, trans);
2182         }
2183 }
2184
2185 /**@brief  Commit all the transactions on transaction queue
2186  *         to the journal.
2187  * @param  journal current journal session.*/
2188 void jbd_journal_commit_all(struct jbd_journal *journal)
2189 {
2190         while (!TAILQ_EMPTY(&journal->trans_queue)) {
2191                 jbd_journal_commit_one(journal);
2192         }
2193 }
2194
2195 /**
2196  * @}
2197  */