Numorous changes. See below:
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32.h"
46 #include "ext4_debug.h"
47
48 #include <string.h>
49 #include <stdlib.h>
50
51 /**@brief  Revoke entry during journal replay.*/
52 struct revoke_entry {
53         /**@brief  Block number not to be replayed.*/
54         ext4_fsblk_t block;
55
56         /**@brief  For any transaction id smaller
57          *         than trans_id, records of @block
58          *         in those transactions should not
59          *         be replayed.*/
60         uint32_t trans_id;
61
62         /**@brief  Revoke tree node.*/
63         RB_ENTRY(revoke_entry) revoke_node;
64 };
65
66 /**@brief  Valid journal replay information.*/
67 struct recover_info {
68         /**@brief  Starting transaction id.*/
69         uint32_t start_trans_id;
70
71         /**@brief  Ending transaction id.*/
72         uint32_t last_trans_id;
73
74         /**@brief  Used as internal argument.*/
75         uint32_t this_trans_id;
76
77         /**@brief  No of transactions went through.*/
78         uint32_t trans_cnt;
79
80         /**@brief  RB-Tree storing revoke entries.*/
81         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
82 };
83
84 /**@brief  Journal replay internal arguments.*/
85 struct replay_arg {
86         /**@brief  Journal replay information.*/
87         struct recover_info *info;
88
89         /**@brief  Current block we are on.*/
90         uint32_t *this_block;
91
92         /**@brief  Current trans_id we are on.*/
93         uint32_t this_trans_id;
94 };
95
96 static int
97 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
98 {
99         if (a->block > b->block)
100                 return 1;
101         else if (a->block < b->block)
102                 return -1;
103         return 0;
104 }
105
106 static int
107 jbd_block_rec_cmp(struct jbd_block_rec *a, struct jbd_block_rec *b)
108 {
109         if (a->lba > b->lba)
110                 return 1;
111         else if (a->lba < b->lba)
112                 return -1;
113         return 0;
114 }
115
116 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
117                      jbd_revoke_entry_cmp, static inline)
118 RB_GENERATE_INTERNAL(jbd_block, jbd_block_rec, block_rec_node,
119                      jbd_block_rec_cmp, static inline)
120
121 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
122 #define jbd_free_revoke_entry(addr) free(addr)
123
124 static int jbd_has_csum(struct jbd_sb *jbd_sb)
125 {
126         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V2))
127                 return 2;
128
129         if (JBD_HAS_INCOMPAT_FEATURE(jbd_sb, JBD_FEATURE_INCOMPAT_CSUM_V3))
130                 return 3;
131
132         return 0;
133 }
134
135 #if CONFIG_META_CSUM_ENABLE
136 static uint32_t jbd_sb_csum(struct jbd_sb *jbd_sb)
137 {
138         uint32_t checksum = 0;
139
140         if (jbd_has_csum(jbd_sb)) {
141                 uint32_t orig_checksum = jbd_sb->checksum;
142                 jbd_set32(jbd_sb, checksum, 0);
143                 /* Calculate crc32c checksum against tho whole superblock */
144                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_sb,
145                                 JBD_SUPERBLOCK_SIZE);
146                 jbd_sb->checksum = orig_checksum;
147         }
148         return checksum;
149 }
150 #else
151 #define jbd_sb_csum(...) 0
152 #endif
153
154 static void jbd_sb_csum_set(struct jbd_sb *jbd_sb)
155 {
156         if (!jbd_has_csum(jbd_sb))
157                 return;
158
159         jbd_set32(jbd_sb, checksum, jbd_sb_csum(jbd_sb));
160 }
161
162 #if CONFIG_META_CSUM_ENABLE
163 static bool
164 jbd_verify_sb_csum(struct jbd_sb *jbd_sb)
165 {
166         if (!jbd_has_csum(jbd_sb))
167                 return true;
168
169         return jbd_sb_csum(jbd_sb) == jbd_get32(jbd_sb, checksum);
170 }
171 #else
172 #define jbd_verify_sb_csum(...) true
173 #endif
174
175 #if CONFIG_META_CSUM_ENABLE
176 static uint32_t jbd_meta_csum(struct jbd_fs *jbd_fs,
177                               struct jbd_bhdr *bhdr)
178 {
179         uint32_t checksum = 0;
180
181         if (jbd_has_csum(&jbd_fs->sb)) {
182                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
183                 struct jbd_block_tail *tail =
184                         (struct jbd_block_tail *)((char *)bhdr + block_size -
185                                 sizeof(struct jbd_block_tail));
186                 uint32_t orig_checksum = tail->checksum;
187                 tail->checksum = 0;
188
189                 /* First calculate crc32c checksum against fs uuid */
190                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
191                                        sizeof(jbd_fs->sb.uuid));
192                 /* Calculate crc32c checksum against tho whole block */
193                 checksum = ext4_crc32c(checksum, bhdr,
194                                 block_size);
195                 tail->checksum = orig_checksum;
196         }
197         return checksum;
198 }
199 #else
200 #define jbd_meta_csum(...) 0
201 #endif
202
203 static void jbd_meta_csum_set(struct jbd_fs *jbd_fs,
204                               struct jbd_bhdr *bhdr)
205 {
206         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
207         struct jbd_block_tail *tail = (struct jbd_block_tail *)
208                                 ((char *)bhdr + block_size -
209                                 sizeof(struct jbd_block_tail));
210         if (!jbd_has_csum(&jbd_fs->sb))
211                 return;
212
213         tail->checksum = to_be32(jbd_meta_csum(jbd_fs, bhdr));
214 }
215
216 #if CONFIG_META_CSUM_ENABLE
217 static bool
218 jbd_verify_meta_csum(struct jbd_fs *jbd_fs,
219                      struct jbd_bhdr *bhdr)
220 {
221         uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
222         struct jbd_block_tail *tail = (struct jbd_block_tail *)
223                                 ((char *)bhdr + block_size -
224                                 sizeof(struct jbd_block_tail));
225         if (!jbd_has_csum(&jbd_fs->sb))
226                 return true;
227
228         return jbd_meta_csum(jbd_fs, bhdr) == to_be32(tail->checksum);
229 }
230 #else
231 #define jbd_verify_meta_csum(...) true
232 #endif
233
234 #if CONFIG_META_CSUM_ENABLE
235 static uint32_t jbd_commit_csum(struct jbd_fs *jbd_fs,
236                               struct jbd_commit_header *header)
237 {
238         uint32_t checksum = 0;
239
240         if (jbd_has_csum(&jbd_fs->sb)) {
241                 uint32_t orig_checksum_type = header->chksum_type,
242                          orig_checksum_size = header->chksum_size,
243                          orig_checksum = header->chksum[0];
244                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
245                 header->chksum_type = 0;
246                 header->chksum_size = 0;
247                 header->chksum[0] = 0;
248
249                 /* First calculate crc32c checksum against fs uuid */
250                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
251                                        sizeof(jbd_fs->sb.uuid));
252                 /* Calculate crc32c checksum against tho whole block */
253                 checksum = ext4_crc32c(checksum, header,
254                                 block_size);
255
256                 header->chksum_type = orig_checksum_type;
257                 header->chksum_size = orig_checksum_size;
258                 header->chksum[0] = orig_checksum;
259         }
260         return checksum;
261 }
262 #else
263 #define jbd_commit_csum(...) 0
264 #endif
265
266 static void jbd_commit_csum_set(struct jbd_fs *jbd_fs,
267                               struct jbd_commit_header *header)
268 {
269         if (!jbd_has_csum(&jbd_fs->sb))
270                 return;
271
272         header->chksum_type = 0;
273         header->chksum_size = 0;
274         header->chksum[0] = jbd_commit_csum(jbd_fs, header);
275 }
276
277 #if CONFIG_META_CSUM_ENABLE
278 static bool jbd_verify_commit_csum(struct jbd_fs *jbd_fs,
279                                    struct jbd_commit_header *header)
280 {
281         if (!jbd_has_csum(&jbd_fs->sb))
282                 return true;
283
284         return header->chksum[0] == to_be32(jbd_commit_csum(jbd_fs,
285                                             header));
286 }
287 #else
288 #define jbd_verify_commit_csum(...) true
289 #endif
290
291 #if CONFIG_META_CSUM_ENABLE
292 /*
293  * NOTE: We only make use of @csum parameter when
294  *       JBD_FEATURE_COMPAT_CHECKSUM is enabled.
295  */
296 static uint32_t jbd_block_csum(struct jbd_fs *jbd_fs, const void *buf,
297                                uint32_t csum,
298                                uint32_t sequence)
299 {
300         uint32_t checksum = 0;
301
302         if (jbd_has_csum(&jbd_fs->sb)) {
303                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
304                 /* First calculate crc32c checksum against fs uuid */
305                 checksum = ext4_crc32c(EXT4_CRC32_INIT, jbd_fs->sb.uuid,
306                                        sizeof(jbd_fs->sb.uuid));
307                 /* Then calculate crc32c checksum against sequence no. */
308                 checksum = ext4_crc32c(checksum, &sequence,
309                                 sizeof(uint32_t));
310                 /* Calculate crc32c checksum against tho whole block */
311                 checksum = ext4_crc32c(checksum, buf,
312                                 block_size);
313         } else if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
314                                      JBD_FEATURE_COMPAT_CHECKSUM)) {
315                 uint32_t block_size = jbd_get32(&jbd_fs->sb, blocksize);
316                 /* Calculate crc32c checksum against tho whole block */
317                 checksum = ext4_crc32(csum, buf,
318                                 block_size);
319         }
320         return checksum;
321 }
322 #else
323 #define jbd_block_csum(...) 0
324 #endif
325
326 static void jbd_block_tag_csum_set(struct jbd_fs *jbd_fs, void *__tag,
327                                    uint32_t checksum)
328 {
329         int ver = jbd_has_csum(&jbd_fs->sb);
330         if (!ver)
331                 return;
332
333         if (ver == 2) {
334                 struct jbd_block_tag *tag = __tag;
335                 tag->checksum = (uint16_t)to_be32(checksum);
336         } else {
337                 struct jbd_block_tag3 *tag = __tag;
338                 tag->checksum = to_be32(checksum);
339         }
340 }
341
342 /**@brief  Write jbd superblock to disk.
343  * @param  jbd_fs jbd filesystem
344  * @param  s jbd superblock
345  * @return standard error code*/
346 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
347 {
348         int rc;
349         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
350         uint64_t offset;
351         ext4_fsblk_t fblock;
352         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
353         if (rc != EOK)
354                 return rc;
355
356         jbd_sb_csum_set(s);
357         offset = fblock * ext4_sb_get_block_size(&fs->sb);
358         return ext4_block_writebytes(fs->bdev, offset, s,
359                                      EXT4_SUPERBLOCK_SIZE);
360 }
361
362 /**@brief  Read jbd superblock from disk.
363  * @param  jbd_fs jbd filesystem
364  * @param  s jbd superblock
365  * @return standard error code*/
366 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
367 {
368         int rc;
369         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
370         uint64_t offset;
371         ext4_fsblk_t fblock;
372         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
373         if (rc != EOK)
374                 return rc;
375
376         offset = fblock * ext4_sb_get_block_size(&fs->sb);
377         return ext4_block_readbytes(fs->bdev, offset, s,
378                                     EXT4_SUPERBLOCK_SIZE);
379 }
380
381 /**@brief  Verify jbd superblock.
382  * @param  sb jbd superblock
383  * @return true if jbd superblock is valid */
384 static bool jbd_verify_sb(struct jbd_sb *sb)
385 {
386         struct jbd_bhdr *header = &sb->header;
387         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
388                 return false;
389
390         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
391             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
392                 return false;
393
394         return jbd_verify_sb_csum(sb);
395 }
396
397 /**@brief  Write back dirty jbd superblock to disk.
398  * @param  jbd_fs jbd filesystem
399  * @return standard error code*/
400 static int jbd_write_sb(struct jbd_fs *jbd_fs)
401 {
402         int rc = EOK;
403         if (jbd_fs->dirty) {
404                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
405                 if (rc != EOK)
406                         return rc;
407
408                 jbd_fs->dirty = false;
409         }
410         return rc;
411 }
412
413 /**@brief  Get reference to jbd filesystem.
414  * @param  fs Filesystem to load journal of
415  * @param  jbd_fs jbd filesystem
416  * @return standard error code*/
417 int jbd_get_fs(struct ext4_fs *fs,
418                struct jbd_fs *jbd_fs)
419 {
420         int rc;
421         uint32_t journal_ino;
422
423         memset(jbd_fs, 0, sizeof(struct jbd_fs));
424         /* See if there is journal inode on this filesystem.*/
425         /* FIXME: detection on existance ofbkejournal bdev is
426          *        missing.*/
427         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
428
429         rc = ext4_fs_get_inode_ref(fs,
430                                    journal_ino,
431                                    &jbd_fs->inode_ref);
432         if (rc != EOK) {
433                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
434                 return rc;
435         }
436         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
437         if (rc != EOK) {
438                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
439                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
440                 return rc;
441         }
442         if (!jbd_verify_sb(&jbd_fs->sb)) {
443                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
444                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
445                 rc = EIO;
446         }
447
448         return rc;
449 }
450
451 /**@brief  Put reference of jbd filesystem.
452  * @param  jbd_fs jbd filesystem
453  * @return standard error code*/
454 int jbd_put_fs(struct jbd_fs *jbd_fs)
455 {
456         int rc = EOK;
457         rc = jbd_write_sb(jbd_fs);
458
459         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
460         return rc;
461 }
462
463 /**@brief  Data block lookup helper.
464  * @param  jbd_fs jbd filesystem
465  * @param  iblock block index
466  * @param  fblock logical block address
467  * @return standard error code*/
468 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
469                    ext4_lblk_t iblock,
470                    ext4_fsblk_t *fblock)
471 {
472         int rc = ext4_fs_get_inode_dblk_idx(
473                         &jbd_fs->inode_ref,
474                         iblock,
475                         fblock,
476                         false);
477         return rc;
478 }
479
480 /**@brief   jbd block get function (through cache).
481  * @param   jbd_fs jbd filesystem
482  * @param   block block descriptor
483  * @param   fblock jbd logical block address
484  * @return  standard error code*/
485 static int jbd_block_get(struct jbd_fs *jbd_fs,
486                   struct ext4_block *block,
487                   ext4_fsblk_t fblock)
488 {
489         /* TODO: journal device. */
490         int rc;
491         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
492
493         /* Lookup the logical block address of
494          * fblock.*/
495         rc = jbd_inode_bmap(jbd_fs, iblock,
496                             &fblock);
497         if (rc != EOK)
498                 return rc;
499
500         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
501         rc = ext4_block_get(bdev, block, fblock);
502
503         /* If succeeded, mark buffer as BC_FLUSH to indicate
504          * that data should be written to disk immediately.*/
505         if (rc == EOK) {
506                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
507                 /* As we don't want to occupy too much space
508                  * in block cache, we set this buffer BC_TMP.*/
509                 ext4_bcache_set_flag(block->buf, BC_TMP);
510         }
511
512         return rc;
513 }
514
515 /**@brief   jbd block get function (through cache, don't read).
516  * @param   jbd_fs jbd filesystem
517  * @param   block block descriptor
518  * @param   fblock jbd logical block address
519  * @return  standard error code*/
520 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
521                          struct ext4_block *block,
522                          ext4_fsblk_t fblock)
523 {
524         /* TODO: journal device. */
525         int rc;
526         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
527         rc = jbd_inode_bmap(jbd_fs, iblock,
528                             &fblock);
529         if (rc != EOK)
530                 return rc;
531
532         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
533         rc = ext4_block_get_noread(bdev, block, fblock);
534         if (rc == EOK)
535                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
536
537         return rc;
538 }
539
540 /**@brief   jbd block set procedure (through cache).
541  * @param   jbd_fs jbd filesystem
542  * @param   block block descriptor
543  * @return  standard error code*/
544 static int jbd_block_set(struct jbd_fs *jbd_fs,
545                   struct ext4_block *block)
546 {
547         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
548                               block);
549 }
550
551 /**@brief  helper functions to calculate
552  *         block tag size, not including UUID part.
553  * @param  jbd_fs jbd filesystem
554  * @return tag size in bytes*/
555 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
556 {
557         int size;
558
559         /* It is very easy to deal with the case which
560          * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
561         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
562                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
563                 return sizeof(struct jbd_block_tag3);
564
565         size = sizeof(struct jbd_block_tag);
566
567         /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
568          * add 2 bytes to size.*/
569         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
570                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
571                 size += sizeof(uint16_t);
572
573         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
574                                      JBD_FEATURE_INCOMPAT_64BIT))
575                 return size;
576
577         /* If block number is 4 bytes in size,
578          * minus 4 bytes from size */
579         return size - sizeof(uint32_t);
580 }
581
582 /**@brief  Tag information. */
583 struct tag_info {
584         /**@brief  Tag size in bytes, including UUID part.*/
585         int tag_bytes;
586
587         /**@brief  block number stored in this tag.*/
588         ext4_fsblk_t block;
589
590         /**@brief  whether UUID part exists or not.*/
591         bool uuid_exist;
592
593         /**@brief  UUID content if UUID part exists.*/
594         uint8_t uuid[UUID_SIZE];
595
596         /**@brief  Is this the last tag? */
597         bool last_tag;
598
599         /**@brief  crc32c checksum. */
600         uint32_t checksum;
601 };
602
603 /**@brief  Extract information from a block tag.
604  * @param  __tag pointer to the block tag
605  * @param  tag_bytes block tag size of this jbd filesystem
606  * @param  remaining size in buffer containing the block tag
607  * @param  tag_info information of this tag.
608  * @return  EOK when succeed, otherwise return EINVAL.*/
609 static int
610 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
611                       void *__tag,
612                       int tag_bytes,
613                       int32_t remain_buf_size,
614                       struct tag_info *tag_info)
615 {
616         char *uuid_start;
617         tag_info->tag_bytes = tag_bytes;
618         tag_info->uuid_exist = false;
619         tag_info->last_tag = false;
620
621         /* See whether it is possible to hold a valid block tag.*/
622         if (remain_buf_size - tag_bytes < 0)
623                 return EINVAL;
624
625         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
626                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
627                 struct jbd_block_tag3 *tag = __tag;
628                 tag_info->block = jbd_get32(tag, blocknr);
629                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
630                                              JBD_FEATURE_INCOMPAT_64BIT))
631                          tag_info->block |=
632                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
633
634                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
635                         tag_info->block = 0;
636
637                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
638                         /* See whether it is possible to hold UUID part.*/
639                         if (remain_buf_size - tag_bytes < UUID_SIZE)
640                                 return EINVAL;
641
642                         uuid_start = (char *)tag + tag_bytes;
643                         tag_info->uuid_exist = true;
644                         tag_info->tag_bytes += UUID_SIZE;
645                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
646                 }
647
648                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
649                         tag_info->last_tag = true;
650
651         } else {
652                 struct jbd_block_tag *tag = __tag;
653                 tag_info->block = jbd_get32(tag, blocknr);
654                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
655                                              JBD_FEATURE_INCOMPAT_64BIT))
656                          tag_info->block |=
657                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
658
659                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
660                         tag_info->block = 0;
661
662                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
663                         /* See whether it is possible to hold UUID part.*/
664                         if (remain_buf_size - tag_bytes < UUID_SIZE)
665                                 return EINVAL;
666
667                         uuid_start = (char *)tag + tag_bytes;
668                         tag_info->uuid_exist = true;
669                         tag_info->tag_bytes += UUID_SIZE;
670                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
671                 }
672
673                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
674                         tag_info->last_tag = true;
675
676         }
677         return EOK;
678 }
679
680 /**@brief  Write information to a block tag.
681  * @param  __tag pointer to the block tag
682  * @param  remaining size in buffer containing the block tag
683  * @param  tag_info information of this tag.
684  * @return  EOK when succeed, otherwise return EINVAL.*/
685 static int
686 jbd_write_block_tag(struct jbd_fs *jbd_fs,
687                     void *__tag,
688                     int32_t remain_buf_size,
689                     struct tag_info *tag_info)
690 {
691         char *uuid_start;
692         int tag_bytes = jbd_tag_bytes(jbd_fs);
693
694         tag_info->tag_bytes = tag_bytes;
695
696         /* See whether it is possible to hold a valid block tag.*/
697         if (remain_buf_size - tag_bytes < 0)
698                 return EINVAL;
699
700         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
701                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
702                 struct jbd_block_tag3 *tag = __tag;
703                 memset(tag, 0, sizeof(struct jbd_block_tag3));
704                 jbd_set32(tag, blocknr, tag_info->block);
705                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
706                                              JBD_FEATURE_INCOMPAT_64BIT))
707                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
708
709                 if (tag_info->uuid_exist) {
710                         /* See whether it is possible to hold UUID part.*/
711                         if (remain_buf_size - tag_bytes < UUID_SIZE)
712                                 return EINVAL;
713
714                         uuid_start = (char *)tag + tag_bytes;
715                         tag_info->tag_bytes += UUID_SIZE;
716                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
717                 } else
718                         jbd_set32(tag, flags,
719                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
720
721                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
722
723                 if (tag_info->last_tag)
724                         jbd_set32(tag, flags,
725                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
726
727         } else {
728                 struct jbd_block_tag *tag = __tag;
729                 memset(tag, 0, sizeof(struct jbd_block_tag));
730                 jbd_set32(tag, blocknr, tag_info->block);
731                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
732                                              JBD_FEATURE_INCOMPAT_64BIT))
733                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
734
735                 if (tag_info->uuid_exist) {
736                         /* See whether it is possible to hold UUID part.*/
737                         if (remain_buf_size - tag_bytes < UUID_SIZE)
738                                 return EINVAL;
739
740                         uuid_start = (char *)tag + tag_bytes;
741                         tag_info->tag_bytes += UUID_SIZE;
742                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
743                 } else
744                         jbd_set16(tag, flags,
745                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
746
747                 jbd_block_tag_csum_set(jbd_fs, __tag, tag_info->checksum);
748
749                 if (tag_info->last_tag)
750                         jbd_set16(tag, flags,
751                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
752
753         }
754         return EOK;
755 }
756
757 /**@brief  Iterate all block tags in a block.
758  * @param  jbd_fs jbd filesystem
759  * @param  __tag_start pointer to the block
760  * @param  tag_tbl_size size of the block
761  * @param  func callback routine to indicate that
762  *         a block tag is found
763  * @param  arg additional argument to be passed to func */
764 static void
765 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
766                         void *__tag_start,
767                         int32_t tag_tbl_size,
768                         void (*func)(struct jbd_fs * jbd_fs,
769                                         ext4_fsblk_t block,
770                                         uint8_t *uuid,
771                                         void *arg),
772                         void *arg)
773 {
774         char *tag_start, *tag_ptr;
775         int tag_bytes = jbd_tag_bytes(jbd_fs);
776         tag_start = __tag_start;
777         tag_ptr = tag_start;
778
779         /* Cut off the size of block tail storing checksum. */
780         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
781                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
782             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
783                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
784                 tag_tbl_size -= sizeof(struct jbd_block_tail);
785
786         while (tag_tbl_size) {
787                 struct tag_info tag_info;
788                 int rc = jbd_extract_block_tag(jbd_fs,
789                                       tag_ptr,
790                                       tag_bytes,
791                                       tag_tbl_size,
792                                       &tag_info);
793                 if (rc != EOK)
794                         break;
795
796                 if (func)
797                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
798
799                 /* Stop the iteration when we reach the last tag. */
800                 if (tag_info.last_tag)
801                         break;
802
803                 tag_ptr += tag_info.tag_bytes;
804                 tag_tbl_size -= tag_info.tag_bytes;
805         }
806 }
807
808 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
809                                    ext4_fsblk_t block,
810                                    uint8_t *uuid,
811                                    void *arg)
812 {
813         uint32_t *iblock = arg;
814         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
815         (*iblock)++;
816         (void)jbd_fs;
817         (void)uuid;
818         return;
819 }
820
821 static struct revoke_entry *
822 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
823 {
824         struct revoke_entry tmp = {
825                 .block = block
826         };
827
828         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
829 }
830
831 /**@brief  Replay a block in a transaction.
832  * @param  jbd_fs jbd filesystem
833  * @param  block  block address to be replayed.*/
834 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
835                                   ext4_fsblk_t block,
836                                   uint8_t *uuid __unused,
837                                   void *__arg)
838 {
839         int r;
840         struct replay_arg *arg = __arg;
841         struct recover_info *info = arg->info;
842         uint32_t *this_block = arg->this_block;
843         struct revoke_entry *revoke_entry;
844         struct ext4_block journal_block, ext4_block;
845         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
846
847         (*this_block)++;
848
849         /* We replay this block only if the current transaction id
850          * is equal or greater than that in revoke entry.*/
851         revoke_entry = jbd_revoke_entry_lookup(info, block);
852         if (revoke_entry &&
853             arg->this_trans_id < revoke_entry->trans_id)
854                 return;
855
856         ext4_dbg(DEBUG_JBD,
857                  "Replaying block in block_tag: %" PRIu64 "\n",
858                  block);
859
860         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
861         if (r != EOK)
862                 return;
863
864         /* We need special treatment for ext4 superblock. */
865         if (block) {
866                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
867                 if (r != EOK) {
868                         jbd_block_set(jbd_fs, &journal_block);
869                         return;
870                 }
871
872                 memcpy(ext4_block.data,
873                         journal_block.data,
874                         jbd_get32(&jbd_fs->sb, blocksize));
875
876                 ext4_bcache_set_dirty(ext4_block.buf);
877                 ext4_block_set(fs->bdev, &ext4_block);
878         } else {
879                 uint16_t mount_count, state;
880                 mount_count = ext4_get16(&fs->sb, mount_count);
881                 state = ext4_get16(&fs->sb, state);
882
883                 memcpy(&fs->sb,
884                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
885                         EXT4_SUPERBLOCK_SIZE);
886
887                 /* Mark system as mounted */
888                 ext4_set16(&fs->sb, state, state);
889                 r = ext4_sb_write(fs->bdev, &fs->sb);
890                 if (r != EOK)
891                         return;
892
893                 /*Update mount count*/
894                 ext4_set16(&fs->sb, mount_count, mount_count);
895         }
896
897         jbd_block_set(jbd_fs, &journal_block);
898         
899         return;
900 }
901
902 /**@brief  Add block address to revoke tree, along with
903  *         its transaction id.
904  * @param  info  journal replay info
905  * @param  block  block address to be replayed.*/
906 static void jbd_add_revoke_block_tags(struct recover_info *info,
907                                       ext4_fsblk_t block)
908 {
909         struct revoke_entry *revoke_entry;
910
911         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
912         /* If the revoke entry with respect to the block address
913          * exists already, update its transaction id.*/
914         revoke_entry = jbd_revoke_entry_lookup(info, block);
915         if (revoke_entry) {
916                 revoke_entry->trans_id = info->this_trans_id;
917                 return;
918         }
919
920         revoke_entry = jbd_alloc_revoke_entry();
921         ext4_assert(revoke_entry);
922         revoke_entry->block = block;
923         revoke_entry->trans_id = info->this_trans_id;
924         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
925
926         return;
927 }
928
929 static void jbd_destroy_revoke_tree(struct recover_info *info)
930 {
931         while (!RB_EMPTY(&info->revoke_root)) {
932                 struct revoke_entry *revoke_entry =
933                         RB_MIN(jbd_revoke, &info->revoke_root);
934                 ext4_assert(revoke_entry);
935                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
936                 jbd_free_revoke_entry(revoke_entry);
937         }
938 }
939
940 /* Make sure we wrap around the log correctly! */
941 #define wrap(sb, var)                                           \
942 do {                                                                    \
943         if (var >= jbd_get32((sb), maxlen))                                     \
944                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
945 } while (0)
946
947 #define ACTION_SCAN 0
948 #define ACTION_REVOKE 1
949 #define ACTION_RECOVER 2
950
951 /**@brief  Add entries in a revoke block to revoke tree.
952  * @param  jbd_fs jbd filesystem
953  * @param  header revoke block header
954  * @param  recover_info  journal replay info*/
955 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
956                                   struct jbd_bhdr *header,
957                                   struct recover_info *info)
958 {
959         char *blocks_entry;
960         struct jbd_revoke_header *revoke_hdr =
961                 (struct jbd_revoke_header *)header;
962         uint32_t i, nr_entries, record_len = 4;
963
964         /* If we are working on a 64bit jbd filesystem, */
965         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
966                                      JBD_FEATURE_INCOMPAT_64BIT))
967                 record_len = 8;
968
969         nr_entries = (jbd_get32(revoke_hdr, count) -
970                         sizeof(struct jbd_revoke_header)) /
971                         record_len;
972
973         blocks_entry = (char *)(revoke_hdr + 1);
974
975         for (i = 0;i < nr_entries;i++) {
976                 if (record_len == 8) {
977                         uint64_t *blocks =
978                                 (uint64_t *)blocks_entry;
979                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
980                 } else {
981                         uint32_t *blocks =
982                                 (uint32_t *)blocks_entry;
983                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
984                 }
985                 blocks_entry += record_len;
986         }
987 }
988
989 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
990                                        struct jbd_bhdr *header,
991                                        uint32_t *iblock)
992 {
993         jbd_iterate_block_table(jbd_fs,
994                                 header + 1,
995                                 jbd_get32(&jbd_fs->sb, blocksize) -
996                                         sizeof(struct jbd_bhdr),
997                                 jbd_display_block_tags,
998                                 iblock);
999 }
1000
1001 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
1002                                         struct jbd_bhdr *header,
1003                                         struct replay_arg *arg)
1004 {
1005         jbd_iterate_block_table(jbd_fs,
1006                                 header + 1,
1007                                 jbd_get32(&jbd_fs->sb, blocksize) -
1008                                         sizeof(struct jbd_bhdr),
1009                                 jbd_replay_block_tags,
1010                                 arg);
1011 }
1012
1013 /**@brief  The core routine of journal replay.
1014  * @param  jbd_fs jbd filesystem
1015  * @param  recover_info  journal replay info
1016  * @param  action action needed to be taken
1017  * @return standard error code*/
1018 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
1019                            struct recover_info *info,
1020                            int action)
1021 {
1022         int r = EOK;
1023         bool log_end = false;
1024         struct jbd_sb *sb = &jbd_fs->sb;
1025         uint32_t start_trans_id, this_trans_id;
1026         uint32_t start_block, this_block;
1027
1028         /* We start iterating valid blocks in the whole journal.*/
1029         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
1030         start_block = this_block = jbd_get32(sb, start);
1031         if (action == ACTION_SCAN)
1032                 info->trans_cnt = 0;
1033         else if (!info->trans_cnt)
1034                 log_end = true;
1035
1036         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
1037                             start_trans_id);
1038
1039         while (!log_end) {
1040                 struct ext4_block block;
1041                 struct jbd_bhdr *header;
1042                 /* If we are not scanning for the last
1043                  * valid transaction in the journal,
1044                  * we will stop when we reach the end of
1045                  * the journal.*/
1046                 if (action != ACTION_SCAN)
1047                         if (this_trans_id > info->last_trans_id) {
1048                                 log_end = true;
1049                                 continue;
1050                         }
1051
1052                 r = jbd_block_get(jbd_fs, &block, this_block);
1053                 if (r != EOK)
1054                         break;
1055
1056                 header = (struct jbd_bhdr *)block.data;
1057                 /* This block does not have a valid magic number,
1058                  * so we have reached the end of the journal.*/
1059                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
1060                         jbd_block_set(jbd_fs, &block);
1061                         log_end = true;
1062                         continue;
1063                 }
1064
1065                 /* If the transaction id we found is not expected,
1066                  * we may have reached the end of the journal.
1067                  *
1068                  * If we are not scanning the journal, something
1069                  * bad might have taken place. :-( */
1070                 if (jbd_get32(header, sequence) != this_trans_id) {
1071                         if (action != ACTION_SCAN)
1072                                 r = EIO;
1073
1074                         jbd_block_set(jbd_fs, &block);
1075                         log_end = true;
1076                         continue;
1077                 }
1078
1079                 switch (jbd_get32(header, blocktype)) {
1080                 case JBD_DESCRIPTOR_BLOCK:
1081                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1082                                 ext4_dbg(DEBUG_JBD,
1083                                         DBG_WARN "Descriptor block checksum failed."
1084                                                 "Journal block: %" PRIu32"\n",
1085                                                 this_block);
1086                                 log_end = true;
1087                                 break;
1088                         }
1089                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
1090                                             "trans_id: %" PRIu32"\n",
1091                                             this_block, this_trans_id);
1092                         if (action == ACTION_RECOVER) {
1093                                 struct replay_arg replay_arg;
1094                                 replay_arg.info = info;
1095                                 replay_arg.this_block = &this_block;
1096                                 replay_arg.this_trans_id = this_trans_id;
1097
1098                                 jbd_replay_descriptor_block(jbd_fs,
1099                                                 header, &replay_arg);
1100                         } else
1101                                 jbd_debug_descriptor_block(jbd_fs,
1102                                                 header, &this_block);
1103
1104                         break;
1105                 case JBD_COMMIT_BLOCK:
1106                         if (!jbd_verify_commit_csum(jbd_fs,
1107                                         (struct jbd_commit_header *)header)) {
1108                                 ext4_dbg(DEBUG_JBD,
1109                                         DBG_WARN "Commit block checksum failed."
1110                                                 "Journal block: %" PRIu32"\n",
1111                                                 this_block);
1112                                 log_end = true;
1113                                 break;
1114                         }
1115                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
1116                                             "trans_id: %" PRIu32"\n",
1117                                             this_block, this_trans_id);
1118                         /* This is the end of a transaction,
1119                          * we may now proceed to the next transaction.
1120                          */
1121                         this_trans_id++;
1122                         info->trans_cnt++;
1123                         break;
1124                 case JBD_REVOKE_BLOCK:
1125                         if (!jbd_verify_meta_csum(jbd_fs, header)) {
1126                                 ext4_dbg(DEBUG_JBD,
1127                                         DBG_WARN "Revoke block checksum failed."
1128                                                 "Journal block: %" PRIu32"\n",
1129                                                 this_block);
1130                                 log_end = true;
1131                                 break;
1132                         }
1133                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
1134                                             "trans_id: %" PRIu32"\n",
1135                                             this_block, this_trans_id);
1136                         if (action == ACTION_REVOKE) {
1137                                 info->this_trans_id = this_trans_id;
1138                                 jbd_build_revoke_tree(jbd_fs,
1139                                                 header, info);
1140                         }
1141                         break;
1142                 default:
1143                         log_end = true;
1144                         break;
1145                 }
1146                 jbd_block_set(jbd_fs, &block);
1147                 this_block++;
1148                 wrap(sb, this_block);
1149                 if (this_block == start_block)
1150                         log_end = true;
1151
1152         }
1153         ext4_dbg(DEBUG_JBD, "End of journal.\n");
1154         if (r == EOK && action == ACTION_SCAN) {
1155                 /* We have finished scanning the journal. */
1156                 info->start_trans_id = start_trans_id;
1157                 if (this_trans_id > start_trans_id)
1158                         info->last_trans_id = this_trans_id - 1;
1159                 else
1160                         info->last_trans_id = this_trans_id;
1161         }
1162
1163         return r;
1164 }
1165
1166 /**@brief  Replay journal.
1167  * @param  jbd_fs jbd filesystem
1168  * @return standard error code*/
1169 int jbd_recover(struct jbd_fs *jbd_fs)
1170 {
1171         int r;
1172         struct recover_info info;
1173         struct jbd_sb *sb = &jbd_fs->sb;
1174         if (!sb->start)
1175                 return EOK;
1176
1177         RB_INIT(&info.revoke_root);
1178
1179         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
1180         if (r != EOK)
1181                 return r;
1182
1183         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
1184         if (r != EOK)
1185                 return r;
1186
1187         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
1188         if (r == EOK) {
1189                 /* If we successfully replay the journal,
1190                  * clear EXT4_FINCOM_RECOVER flag on the
1191                  * ext4 superblock, and set the start of
1192                  * journal to 0.*/
1193                 uint32_t features_incompatible =
1194                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1195                                    features_incompatible);
1196                 jbd_set32(&jbd_fs->sb, start, 0);
1197                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
1198                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
1199                            features_incompatible,
1200                            features_incompatible);
1201                 jbd_fs->dirty = true;
1202                 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1203                                   &jbd_fs->inode_ref.fs->sb);
1204         }
1205         jbd_destroy_revoke_tree(&info);
1206         return r;
1207 }
1208
1209 static void jbd_journal_write_sb(struct jbd_journal *journal)
1210 {
1211         struct jbd_fs *jbd_fs = journal->jbd_fs;
1212         jbd_set32(&jbd_fs->sb, start, journal->start);
1213         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
1214         jbd_fs->dirty = true;
1215 }
1216
1217 /**@brief  Start accessing the journal.
1218  * @param  jbd_fs jbd filesystem
1219  * @param  journal current journal session
1220  * @return standard error code*/
1221 int jbd_journal_start(struct jbd_fs *jbd_fs,
1222                       struct jbd_journal *journal)
1223 {
1224         int r;
1225         uint32_t features_incompatible =
1226                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
1227                                    features_incompatible);
1228         struct ext4_block block = EXT4_BLOCK_ZERO();
1229         features_incompatible |= EXT4_FINCOM_RECOVER;
1230         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1231                         features_incompatible,
1232                         features_incompatible);
1233         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1234                         &jbd_fs->inode_ref.fs->sb);
1235         if (r != EOK)
1236                 return r;
1237
1238         journal->first = jbd_get32(&jbd_fs->sb, first);
1239         journal->start = journal->first;
1240         journal->last = journal->first;
1241         journal->trans_id = 1;
1242         journal->alloc_trans_id = 1;
1243
1244         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
1245
1246         r = jbd_block_get_noread(jbd_fs,
1247                          &block,
1248                          journal->start);
1249         if (r != EOK) {
1250                 memset(journal, 0, sizeof(struct jbd_journal));
1251                 return r;
1252         }
1253         memset(block.data, 0, journal->block_size);
1254         ext4_bcache_set_dirty(block.buf);
1255         r = jbd_block_set(jbd_fs, &block);
1256         if (r != EOK) {
1257                 memset(journal, 0, sizeof(struct jbd_journal));
1258                 return r;
1259         }
1260
1261         TAILQ_INIT(&journal->trans_queue);
1262         TAILQ_INIT(&journal->cp_queue);
1263         RB_INIT(&journal->block_rec_root);
1264         journal->jbd_fs = jbd_fs;
1265         jbd_journal_write_sb(journal);
1266         return jbd_write_sb(jbd_fs);
1267 }
1268
1269 static void jbd_journal_flush_trans(struct jbd_trans *trans)
1270 {
1271         struct jbd_buf *jbd_buf, *tmp;
1272         struct jbd_journal *journal = trans->journal;
1273         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1274         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1275                         tmp) {
1276                 struct ext4_buf *buf = jbd_buf->block_rec->buf;
1277                 if (buf)
1278                         ext4_block_flush_buf(fs->bdev, buf);
1279         }
1280 }
1281
1282 static void
1283 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
1284                              struct jbd_trans *trans)
1285 {
1286         journal->start = trans->start_iblock +
1287                 trans->alloc_blocks;
1288         wrap(&journal->jbd_fs->sb, journal->start);
1289         journal->trans_id = trans->trans_id + 1;
1290         jbd_journal_free_trans(journal,
1291                         trans, false);
1292         jbd_journal_write_sb(journal);
1293 }
1294
1295 static void
1296 jbd_journal_purge_cp_trans(struct jbd_journal *journal,
1297                            bool flush)
1298 {
1299         struct jbd_trans *trans;
1300         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1301                 if (!trans->data_cnt) {
1302                         TAILQ_REMOVE(&journal->cp_queue,
1303                                         trans,
1304                                         trans_node);
1305                         jbd_journal_skip_pure_revoke(journal, trans);
1306                 } else {
1307                         if (trans->data_cnt ==
1308                                         trans->written_cnt) {
1309                                 journal->start =
1310                                         trans->start_iblock +
1311                                         trans->alloc_blocks;
1312                                 wrap(&journal->jbd_fs->sb,
1313                                                 journal->start);
1314                                 journal->trans_id =
1315                                         trans->trans_id + 1;
1316                                 TAILQ_REMOVE(&journal->cp_queue,
1317                                                 trans,
1318                                                 trans_node);
1319                                 jbd_journal_free_trans(journal,
1320                                                 trans,
1321                                                 false);
1322                                 jbd_journal_write_sb(journal);
1323                         } else if (!flush) {
1324                                 journal->start =
1325                                         trans->start_iblock;
1326                                 wrap(&journal->jbd_fs->sb,
1327                                                 journal->start);
1328                                 journal->trans_id =
1329                                         trans->trans_id;
1330                                 jbd_journal_write_sb(journal);
1331                                 break;
1332                         } else
1333                                 jbd_journal_flush_trans(trans);
1334                 }
1335         }
1336 }
1337
1338 /**@brief  Stop accessing the journal.
1339  * @param  journal current journal session
1340  * @return standard error code*/
1341 int jbd_journal_stop(struct jbd_journal *journal)
1342 {
1343         int r;
1344         struct jbd_fs *jbd_fs = journal->jbd_fs;
1345         uint32_t features_incompatible;
1346
1347         /* Commit all the transactions to the journal.*/
1348         jbd_journal_commit_all(journal);
1349
1350         /* Make sure that journalled content have reached
1351          * the disk.*/
1352         jbd_journal_purge_cp_trans(journal, true);
1353
1354         /* There should be no block record in this journal
1355          * session. */
1356         if (!RB_EMPTY(&journal->block_rec_root))
1357                 ext4_dbg(DEBUG_JBD,
1358                          DBG_WARN "There are still block records "
1359                                   "in this journal session!\n");
1360
1361         features_incompatible =
1362                 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1363                            features_incompatible);
1364         features_incompatible &= ~EXT4_FINCOM_RECOVER;
1365         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1366                         features_incompatible,
1367                         features_incompatible);
1368         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1369                         &jbd_fs->inode_ref.fs->sb);
1370         if (r != EOK)
1371                 return r;
1372
1373         journal->start = 0;
1374         journal->trans_id = 0;
1375         jbd_journal_write_sb(journal);
1376         return jbd_write_sb(journal->jbd_fs);
1377 }
1378
1379 /**@brief  Allocate a block in the journal.
1380  * @param  journal current journal session
1381  * @param  trans transaction
1382  * @return allocated block address*/
1383 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1384                                         struct jbd_trans *trans)
1385 {
1386         uint32_t start_block;
1387
1388         start_block = journal->last++;
1389         trans->alloc_blocks++;
1390         wrap(&journal->jbd_fs->sb, journal->last);
1391         
1392         /* If there is no space left, flush all journalled
1393          * blocks to disk first.*/
1394         if (journal->last == journal->start)
1395                 jbd_journal_purge_cp_trans(journal, true);
1396
1397         return start_block;
1398 }
1399
1400 /**@brief  Allocate a new transaction
1401  * @param  journal current journal session
1402  * @return transaction allocated*/
1403 struct jbd_trans *
1404 jbd_journal_new_trans(struct jbd_journal *journal)
1405 {
1406         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1407         if (!trans)
1408                 return NULL;
1409
1410         /* We will assign a trans_id to this transaction,
1411          * once it has been committed.*/
1412         trans->journal = journal;
1413         trans->data_csum = EXT4_CRC32_INIT;
1414         trans->error = EOK;
1415         TAILQ_INIT(&trans->buf_queue);
1416         return trans;
1417 }
1418
1419 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1420                           struct ext4_buf *buf __unused,
1421                           int res,
1422                           void *arg);
1423
1424 /**@brief  gain access to it before making any modications.
1425  * @param  journal current journal session
1426  * @param  trans transaction
1427  * @param  block descriptor
1428  * @return standard error code.*/
1429 int jbd_trans_get_access(struct jbd_journal *journal,
1430                          struct jbd_trans *trans,
1431                          struct ext4_block *block)
1432 {
1433         int r = EOK;
1434         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1435         struct jbd_buf *jbd_buf = block->buf->end_write_arg;
1436
1437         /* If the buffer has already been modified, we should
1438          * flush dirty data in this buffer to disk.*/
1439         if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1440             block->buf->end_write == jbd_trans_end_write) {
1441                 ext4_assert(jbd_buf);
1442                 if (jbd_buf->trans != trans)
1443                         r = ext4_block_flush_buf(fs->bdev, block->buf);
1444
1445         }
1446         return r;
1447 }
1448
1449 static struct jbd_block_rec *
1450 jbd_trans_block_rec_lookup(struct jbd_journal *journal,
1451                            ext4_fsblk_t lba)
1452 {
1453         struct jbd_block_rec tmp = {
1454                 .lba = lba
1455         };
1456
1457         return RB_FIND(jbd_block,
1458                        &journal->block_rec_root,
1459                        &tmp);
1460 }
1461
1462 static void
1463 jbd_trans_change_ownership(struct jbd_block_rec *block_rec,
1464                            struct jbd_trans *new_trans,
1465                            struct ext4_buf *new_buf)
1466 {
1467         LIST_REMOVE(block_rec, tbrec_node);
1468         /* Now this block record belongs to this transaction. */
1469         LIST_INSERT_HEAD(&new_trans->tbrec_list, block_rec, tbrec_node);
1470         block_rec->trans = new_trans;
1471         block_rec->buf = new_buf;
1472 }
1473
1474 static inline struct jbd_block_rec *
1475 jbd_trans_insert_block_rec(struct jbd_trans *trans,
1476                            ext4_fsblk_t lba,
1477                            struct ext4_buf *buf)
1478 {
1479         struct jbd_block_rec *block_rec;
1480         block_rec = jbd_trans_block_rec_lookup(trans->journal, lba);
1481         if (block_rec) {
1482                 jbd_trans_change_ownership(block_rec, trans, buf);
1483                 return block_rec;
1484         }
1485         block_rec = calloc(1, sizeof(struct jbd_block_rec));
1486         if (!block_rec)
1487                 return NULL;
1488
1489         block_rec->lba = lba;
1490         block_rec->buf = buf;
1491         block_rec->trans = trans;
1492         TAILQ_INIT(&block_rec->dirty_buf_queue);
1493         LIST_INSERT_HEAD(&trans->tbrec_list, block_rec, tbrec_node);
1494         RB_INSERT(jbd_block, &trans->journal->block_rec_root, block_rec);
1495         return block_rec;
1496 }
1497
1498 static void
1499 jbd_trans_finish_callback(struct jbd_journal *journal,
1500                           const struct jbd_trans *trans,
1501                           struct jbd_block_rec *block_rec,
1502                           bool abort)
1503 {
1504         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1505         if (block_rec->trans != trans)
1506                 return;
1507
1508         if (!abort) {
1509                 struct jbd_buf *jbd_buf, *tmp;
1510                 TAILQ_FOREACH_SAFE(jbd_buf,
1511                                 &block_rec->dirty_buf_queue,
1512                                 dirty_buf_node,
1513                                 tmp) {
1514                         /* All we need is a fake ext4_buf. */
1515                         struct ext4_buf buf;
1516
1517                         jbd_trans_end_write(fs->bdev->bc,
1518                                         &buf,
1519                                         EOK,
1520                                         jbd_buf);
1521                 }
1522         } else {
1523                 struct jbd_buf *jbd_buf;
1524                 struct ext4_block jbd_block = EXT4_BLOCK_ZERO(),
1525                                   block = EXT4_BLOCK_ZERO();
1526                 jbd_buf = TAILQ_LAST(&block_rec->dirty_buf_queue,
1527                                 jbd_buf_dirty);
1528                 if (jbd_buf) {
1529                         ext4_assert(ext4_block_get(fs->bdev,
1530                                                 &jbd_block,
1531                                                 jbd_buf->jbd_lba) == EOK);
1532                         ext4_assert(ext4_block_get_noread(fs->bdev,
1533                                                 &block,
1534                                                 block_rec->lba) == EOK);
1535                         memcpy(block.data, jbd_block.data,
1536                                         journal->block_size);
1537
1538                         jbd_trans_change_ownership(block_rec,
1539                                         jbd_buf->trans, block.buf);
1540
1541                         block.buf->end_write = jbd_trans_end_write;
1542                         block.buf->end_write_arg = jbd_buf;
1543
1544                         ext4_bcache_set_flag(jbd_block.buf, BC_TMP);
1545                         ext4_bcache_set_dirty(block.buf);
1546
1547                         ext4_block_set(fs->bdev, &jbd_block);
1548                         ext4_block_set(fs->bdev, &block);
1549                         return;
1550                 }
1551         }
1552 }
1553
1554 static inline void
1555 jbd_trans_remove_block_rec(struct jbd_journal *journal,
1556                            struct jbd_block_rec *block_rec,
1557                            struct jbd_trans *trans)
1558 {
1559         /* If this block record doesn't belong to this transaction,
1560          * give up.*/
1561         if (block_rec->trans == trans) {
1562                 LIST_REMOVE(block_rec, tbrec_node);
1563                 RB_REMOVE(jbd_block,
1564                                 &journal->block_rec_root,
1565                                 block_rec);
1566                 free(block_rec);
1567         }
1568 }
1569
1570 /**@brief  Add block to a transaction and mark it dirty.
1571  * @param  trans transaction
1572  * @param  block block descriptor
1573  * @return standard error code*/
1574 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1575                               struct ext4_block *block)
1576 {
1577         struct jbd_buf *buf;
1578
1579         struct jbd_block_rec *block_rec;
1580         if (block->buf->end_write == jbd_trans_end_write) {
1581                 buf = block->buf->end_write_arg;
1582                 if (buf && buf->trans == trans)
1583                         return EOK;
1584         }
1585         buf = calloc(1, sizeof(struct jbd_buf));
1586         if (!buf)
1587                 return ENOMEM;
1588
1589         if ((block_rec = jbd_trans_insert_block_rec(trans,
1590                                         block->lb_id,
1591                                         block->buf)) == NULL) {
1592                 free(buf);
1593                 return ENOMEM;
1594         }
1595
1596         TAILQ_INSERT_TAIL(&block_rec->dirty_buf_queue,
1597                         buf,
1598                         dirty_buf_node);
1599
1600         buf->block_rec = block_rec;
1601         buf->trans = trans;
1602         buf->block = *block;
1603         ext4_bcache_inc_ref(block->buf);
1604
1605         /* If the content reach the disk, notify us
1606          * so that we may do a checkpoint. */
1607         block->buf->end_write = jbd_trans_end_write;
1608         block->buf->end_write_arg = buf;
1609
1610         trans->data_cnt++;
1611         TAILQ_INSERT_HEAD(&trans->buf_queue, buf, buf_node);
1612
1613         ext4_bcache_set_dirty(block->buf);
1614         return EOK;
1615 }
1616
1617 /**@brief  Add block to be revoked to a transaction
1618  * @param  trans transaction
1619  * @param  lba logical block address
1620  * @return standard error code*/
1621 int jbd_trans_revoke_block(struct jbd_trans *trans,
1622                            ext4_fsblk_t lba)
1623 {
1624         struct jbd_revoke_rec *rec =
1625                 calloc(1, sizeof(struct jbd_revoke_rec));
1626         if (!rec)
1627                 return ENOMEM;
1628
1629         rec->lba = lba;
1630         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1631         return EOK;
1632 }
1633
1634 /**@brief  Try to add block to be revoked to a transaction.
1635  *         If @lba still remains in an transaction on checkpoint
1636  *         queue, add @lba as a revoked block to the transaction.
1637  * @param  trans transaction
1638  * @param  lba logical block address
1639  * @return standard error code*/
1640 int jbd_trans_try_revoke_block(struct jbd_trans *trans,
1641                                ext4_fsblk_t lba)
1642 {
1643         int r = EOK;
1644         struct jbd_journal *journal = trans->journal;
1645         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1646         struct jbd_block_rec *block_rec =
1647                 jbd_trans_block_rec_lookup(journal, lba);
1648
1649         /* Make sure we don't flush any buffers belong to this transaction. */
1650         if (block_rec && block_rec->trans != trans) {
1651                 /* If the buffer has not been flushed yet, flush it now. */
1652                 if (block_rec->buf) {
1653                         r = ext4_block_flush_buf(fs->bdev, block_rec->buf);
1654                         if (r != EOK)
1655                                 return r;
1656
1657                 }
1658
1659                 jbd_trans_revoke_block(trans, lba);
1660         }
1661
1662         return EOK;
1663 }
1664
1665 /**@brief  Free a transaction
1666  * @param  journal current journal session
1667  * @param  trans transaction
1668  * @param  abort discard all the modifications on the block?
1669  * @return standard error code*/
1670 void jbd_journal_free_trans(struct jbd_journal *journal,
1671                             struct jbd_trans *trans,
1672                             bool abort)
1673 {
1674         struct jbd_buf *jbd_buf, *tmp;
1675         struct jbd_revoke_rec *rec, *tmp2;
1676         struct jbd_block_rec *block_rec, *tmp3;
1677         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1678         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
1679                           tmp) {
1680                 block_rec = jbd_buf->block_rec;
1681                 if (abort) {
1682                         jbd_buf->block.buf->end_write = NULL;
1683                         jbd_buf->block.buf->end_write_arg = NULL;
1684                         ext4_bcache_clear_dirty(jbd_buf->block.buf);
1685                         ext4_block_set(fs->bdev, &jbd_buf->block);
1686                 }
1687
1688                 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1689                         jbd_buf,
1690                         dirty_buf_node);
1691                 jbd_trans_finish_callback(journal,
1692                                 trans,
1693                                 block_rec,
1694                                 abort);
1695                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1696                 free(jbd_buf);
1697         }
1698         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1699                           tmp2) {
1700                 LIST_REMOVE(rec, revoke_node);
1701                 free(rec);
1702         }
1703         LIST_FOREACH_SAFE(block_rec, &trans->tbrec_list, tbrec_node,
1704                           tmp3) {
1705                 jbd_trans_remove_block_rec(journal, block_rec, trans);
1706         }
1707
1708         free(trans);
1709 }
1710
1711 /**@brief  Write commit block for a transaction
1712  * @param  trans transaction
1713  * @return standard error code*/
1714 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1715 {
1716         int rc;
1717         struct jbd_commit_header *header;
1718         uint32_t commit_iblock = 0;
1719         struct ext4_block commit_block;
1720         struct jbd_journal *journal = trans->journal;
1721
1722         commit_iblock = jbd_journal_alloc_block(journal, trans);
1723         rc = jbd_block_get_noread(journal->jbd_fs,
1724                         &commit_block, commit_iblock);
1725         if (rc != EOK)
1726                 return rc;
1727
1728         header = (struct jbd_commit_header *)commit_block.data;
1729         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1730         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1731         jbd_set32(&header->header, sequence, trans->trans_id);
1732
1733         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1734                                 JBD_FEATURE_COMPAT_CHECKSUM)) {
1735                 jbd_set32(header, chksum_type, JBD_CRC32_CHKSUM);
1736                 jbd_set32(header, chksum_size, JBD_CRC32_CHKSUM_SIZE);
1737                 jbd_set32(header, chksum[0], trans->data_csum);
1738         }
1739         jbd_commit_csum_set(journal->jbd_fs, header);
1740         ext4_bcache_set_dirty(commit_block.buf);
1741         rc = jbd_block_set(journal->jbd_fs, &commit_block);
1742         if (rc != EOK)
1743                 return rc;
1744
1745         return EOK;
1746 }
1747
1748 /**@brief  Write descriptor block for a transaction
1749  * @param  journal current journal session
1750  * @param  trans transaction
1751  * @return standard error code*/
1752 static int jbd_journal_prepare(struct jbd_journal *journal,
1753                                struct jbd_trans *trans)
1754 {
1755         int rc = EOK, i = 0;
1756         int32_t tag_tbl_size;
1757         uint32_t desc_iblock = 0;
1758         uint32_t data_iblock = 0;
1759         char *tag_start = NULL, *tag_ptr = NULL;
1760         struct jbd_buf *jbd_buf, *tmp;
1761         struct ext4_block desc_block, data_block;
1762         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1763         uint32_t checksum = EXT4_CRC32_INIT;
1764
1765         /* Try to remove any non-dirty buffers from the tail of
1766          * buf_queue. */
1767         TAILQ_FOREACH_REVERSE_SAFE(jbd_buf, &trans->buf_queue,
1768                         jbd_trans_buf, buf_node, tmp) {
1769                 /* We stop the iteration when we find a dirty buffer. */
1770                 if (ext4_bcache_test_flag(jbd_buf->block.buf,
1771                                         BC_DIRTY))
1772                         break;
1773         
1774                 TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1775                         jbd_buf,
1776                         dirty_buf_node);
1777
1778                 jbd_trans_finish_callback(journal,
1779                                 trans,
1780                                 jbd_buf->block_rec,
1781                                 false);
1782
1783                 /* The buffer has not been modified, just release
1784                  * that jbd_buf. */
1785                 jbd_trans_remove_block_rec(journal,
1786                                 jbd_buf->block_rec, trans);
1787                 trans->data_cnt--;
1788
1789                 jbd_buf->block.buf->end_write = NULL;
1790                 jbd_buf->block.buf->end_write_arg = NULL;
1791                 ext4_block_set(fs->bdev, &jbd_buf->block);
1792                 TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1793                 free(jbd_buf);
1794         }
1795
1796         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node, tmp) {
1797                 struct tag_info tag_info;
1798                 bool uuid_exist = false;
1799                 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1800                                            BC_DIRTY)) {
1801                         TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
1802                                         jbd_buf,
1803                                         dirty_buf_node);
1804
1805                         jbd_trans_finish_callback(journal,
1806                                         trans,
1807                                         jbd_buf->block_rec,
1808                                         false);
1809
1810                         /* The buffer has not been modified, just release
1811                          * that jbd_buf. */
1812                         jbd_trans_remove_block_rec(journal,
1813                                         jbd_buf->block_rec, trans);
1814                         trans->data_cnt--;
1815
1816                         jbd_buf->block.buf->end_write = NULL;
1817                         jbd_buf->block.buf->end_write_arg = NULL;
1818                         ext4_block_set(fs->bdev, &jbd_buf->block);
1819                         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
1820                         free(jbd_buf);
1821                         continue;
1822                 }
1823                 checksum = jbd_block_csum(journal->jbd_fs,
1824                                           jbd_buf->block.data,
1825                                           checksum,
1826                                           trans->trans_id);
1827 again:
1828                 if (!desc_iblock) {
1829                         struct jbd_bhdr *bhdr;
1830                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1831                         rc = jbd_block_get_noread(journal->jbd_fs,
1832                                            &desc_block, desc_iblock);
1833                         if (rc != EOK)
1834                                 break;
1835
1836                         ext4_bcache_set_dirty(desc_block.buf);
1837
1838                         bhdr = (struct jbd_bhdr *)desc_block.data;
1839                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1840                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1841                         jbd_set32(bhdr, sequence, trans->trans_id);
1842
1843                         tag_start = (char *)(bhdr + 1);
1844                         tag_ptr = tag_start;
1845                         uuid_exist = true;
1846                         tag_tbl_size = journal->block_size -
1847                                 sizeof(struct jbd_bhdr);
1848
1849                         if (jbd_has_csum(&journal->jbd_fs->sb))
1850                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1851
1852                         if (!trans->start_iblock)
1853                                 trans->start_iblock = desc_iblock;
1854
1855                 }
1856                 tag_info.block = jbd_buf->block.lb_id;
1857                 tag_info.uuid_exist = uuid_exist;
1858                 if (i == trans->data_cnt - 1)
1859                         tag_info.last_tag = true;
1860                 else
1861                         tag_info.last_tag = false;
1862
1863                 tag_info.checksum = checksum;
1864
1865                 if (uuid_exist)
1866                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1867                                         UUID_SIZE);
1868
1869                 rc = jbd_write_block_tag(journal->jbd_fs,
1870                                 tag_ptr,
1871                                 tag_tbl_size,
1872                                 &tag_info);
1873                 if (rc != EOK) {
1874                         jbd_meta_csum_set(journal->jbd_fs,
1875                                         (struct jbd_bhdr *)desc_block.data);
1876                         jbd_block_set(journal->jbd_fs, &desc_block);
1877                         desc_iblock = 0;
1878                         goto again;
1879                 }
1880
1881                 data_iblock = jbd_journal_alloc_block(journal, trans);
1882                 rc = jbd_block_get_noread(journal->jbd_fs,
1883                                 &data_block, data_iblock);
1884                 if (rc != EOK)
1885                         break;
1886
1887                 ext4_bcache_set_dirty(data_block.buf);
1888
1889                 memcpy(data_block.data, jbd_buf->block.data,
1890                         journal->block_size);
1891                 jbd_buf->jbd_lba = data_block.lb_id;
1892
1893                 rc = jbd_block_set(journal->jbd_fs, &data_block);
1894                 if (rc != EOK)
1895                         break;
1896
1897                 tag_ptr += tag_info.tag_bytes;
1898                 tag_tbl_size -= tag_info.tag_bytes;
1899
1900                 i++;
1901         }
1902         if (rc == EOK && desc_iblock) {
1903                 jbd_meta_csum_set(journal->jbd_fs,
1904                                 (struct jbd_bhdr *)desc_block.data);
1905                 trans->data_csum = checksum;
1906                 jbd_block_set(journal->jbd_fs, &desc_block);
1907         }
1908
1909         return rc;
1910 }
1911
1912 /**@brief  Write revoke block for a transaction
1913  * @param  journal current journal session
1914  * @param  trans transaction
1915  * @return standard error code*/
1916 static int
1917 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1918                            struct jbd_trans *trans)
1919 {
1920         int rc = EOK, i = 0;
1921         int32_t tag_tbl_size;
1922         uint32_t desc_iblock = 0;
1923         char *blocks_entry = NULL;
1924         struct jbd_revoke_rec *rec, *tmp;
1925         struct ext4_block desc_block;
1926         struct jbd_revoke_header *header = NULL;
1927         int32_t record_len = 4;
1928
1929         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1930                                      JBD_FEATURE_INCOMPAT_64BIT))
1931                 record_len = 8;
1932
1933         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1934                           tmp) {
1935 again:
1936                 if (!desc_iblock) {
1937                         struct jbd_bhdr *bhdr;
1938                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1939                         rc = jbd_block_get_noread(journal->jbd_fs,
1940                                            &desc_block, desc_iblock);
1941                         if (rc != EOK) {
1942                                 break;
1943                         }
1944
1945                         ext4_bcache_set_dirty(desc_block.buf);
1946
1947                         bhdr = (struct jbd_bhdr *)desc_block.data;
1948                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1949                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1950                         jbd_set32(bhdr, sequence, trans->trans_id);
1951                         
1952                         header = (struct jbd_revoke_header *)bhdr;
1953                         blocks_entry = (char *)(header + 1);
1954                         tag_tbl_size = journal->block_size -
1955                                 sizeof(struct jbd_revoke_header);
1956
1957                         if (jbd_has_csum(&journal->jbd_fs->sb))
1958                                 tag_tbl_size -= sizeof(struct jbd_block_tail);
1959
1960                         if (!trans->start_iblock)
1961                                 trans->start_iblock = desc_iblock;
1962
1963                 }
1964
1965                 if (tag_tbl_size < record_len) {
1966                         jbd_set32(header, count,
1967                                   journal->block_size - tag_tbl_size);
1968                         jbd_meta_csum_set(journal->jbd_fs,
1969                                         (struct jbd_bhdr *)desc_block.data);
1970                         jbd_block_set(journal->jbd_fs, &desc_block);
1971                         desc_iblock = 0;
1972                         header = NULL;
1973                         goto again;
1974                 }
1975                 if (record_len == 8) {
1976                         uint64_t *blocks =
1977                                 (uint64_t *)blocks_entry;
1978                         *blocks = to_be64(rec->lba);
1979                 } else {
1980                         uint32_t *blocks =
1981                                 (uint32_t *)blocks_entry;
1982                         *blocks = to_be32(rec->lba);
1983                 }
1984                 blocks_entry += record_len;
1985                 tag_tbl_size -= record_len;
1986
1987                 i++;
1988         }
1989         if (rc == EOK && desc_iblock) {
1990                 if (header != NULL)
1991                         jbd_set32(header, count,
1992                                   journal->block_size - tag_tbl_size);
1993
1994                 jbd_meta_csum_set(journal->jbd_fs,
1995                                 (struct jbd_bhdr *)desc_block.data);
1996                 jbd_block_set(journal->jbd_fs, &desc_block);
1997         }
1998
1999         return rc;
2000 }
2001
2002 /**@brief  Submit the transaction to transaction queue.
2003  * @param  journal current journal session
2004  * @param  trans transaction*/
2005 void
2006 jbd_journal_submit_trans(struct jbd_journal *journal,
2007                          struct jbd_trans *trans)
2008 {
2009         TAILQ_INSERT_TAIL(&journal->trans_queue,
2010                           trans,
2011                           trans_node);
2012 }
2013
2014 /**@brief  Put references of block descriptors in a transaction.
2015  * @param  journal current journal session
2016  * @param  trans transaction*/
2017 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
2018 {
2019         struct jbd_buf *jbd_buf, *tmp;
2020         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
2021         TAILQ_FOREACH_SAFE(jbd_buf, &trans->buf_queue, buf_node,
2022                         tmp) {
2023                 struct ext4_block block = jbd_buf->block;
2024                 ext4_block_set(fs->bdev, &block);
2025         }
2026 }
2027
2028 /**@brief  Update the start block of the journal when
2029  *         all the contents in a transaction reach the disk.*/
2030 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
2031                           struct ext4_buf *buf,
2032                           int res,
2033                           void *arg)
2034 {
2035         struct jbd_buf *jbd_buf = arg;
2036         struct jbd_trans *trans = jbd_buf->trans;
2037         struct jbd_journal *journal = trans->journal;
2038         bool first_in_queue =
2039                 trans == TAILQ_FIRST(&journal->cp_queue);
2040         if (res != EOK)
2041                 trans->error = res;
2042
2043         TAILQ_REMOVE(&trans->buf_queue, jbd_buf, buf_node);
2044         TAILQ_REMOVE(&jbd_buf->block_rec->dirty_buf_queue,
2045                         jbd_buf,
2046                         dirty_buf_node);
2047         jbd_trans_finish_callback(journal,
2048                         trans,
2049                         jbd_buf->block_rec,
2050                         false);
2051         jbd_buf->block_rec->buf = NULL;
2052         free(jbd_buf);
2053
2054         /* Clear the end_write and end_write_arg fields. */
2055         buf->end_write = NULL;
2056         buf->end_write_arg = NULL;
2057
2058         trans->written_cnt++;
2059         if (trans->written_cnt == trans->data_cnt) {
2060                 /* If it is the first transaction on checkpoint queue,
2061                  * we will shift the start of the journal to the next
2062                  * transaction, and remove subsequent written
2063                  * transactions from checkpoint queue until we find
2064                  * an unwritten one. */
2065                 if (first_in_queue) {
2066                         journal->start = trans->start_iblock +
2067                                 trans->alloc_blocks;
2068                         wrap(&journal->jbd_fs->sb, journal->start);
2069                         journal->trans_id = trans->trans_id + 1;
2070                         TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
2071                         jbd_journal_free_trans(journal, trans, false);
2072
2073                         jbd_journal_purge_cp_trans(journal, false);
2074                         jbd_journal_write_sb(journal);
2075                         jbd_write_sb(journal->jbd_fs);
2076                 }
2077         }
2078 }
2079
2080 /**@brief  Commit a transaction to the journal immediately.
2081  * @param  journal current journal session
2082  * @param  trans transaction
2083  * @return standard error code*/
2084 int jbd_journal_commit_trans(struct jbd_journal *journal,
2085                              struct jbd_trans *trans)
2086 {
2087         int rc = EOK;
2088         uint32_t last = journal->last;
2089
2090         trans->trans_id = journal->alloc_trans_id;
2091         rc = jbd_journal_prepare(journal, trans);
2092         if (rc != EOK)
2093                 goto Finish;
2094
2095         rc = jbd_journal_prepare_revoke(journal, trans);
2096         if (rc != EOK)
2097                 goto Finish;
2098
2099         if (TAILQ_EMPTY(&trans->buf_queue) &&
2100             LIST_EMPTY(&trans->revoke_list)) {
2101                 /* Since there are no entries in both buffer list
2102                  * and revoke entry list, we do not consider trans as
2103                  * complete transaction and just return EOK.*/
2104                 jbd_journal_free_trans(journal, trans, false);
2105                 goto Finish;
2106         }
2107
2108         rc = jbd_trans_write_commit_block(trans);
2109         if (rc != EOK)
2110                 goto Finish;
2111
2112         journal->alloc_trans_id++;
2113         if (TAILQ_EMPTY(&journal->cp_queue)) {
2114                 if (trans->data_cnt) {
2115                         journal->start = trans->start_iblock;
2116                         wrap(&journal->jbd_fs->sb, journal->start);
2117                         journal->trans_id = trans->trans_id;
2118                         jbd_journal_write_sb(journal);
2119                         jbd_write_sb(journal->jbd_fs);
2120                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2121                                         trans_node);
2122                         jbd_journal_cp_trans(journal, trans);
2123                 } else {
2124                         journal->start = trans->start_iblock +
2125                                 trans->alloc_blocks;
2126                         wrap(&journal->jbd_fs->sb, journal->start);
2127                         journal->trans_id = trans->trans_id + 1;
2128                         jbd_journal_write_sb(journal);
2129                         jbd_journal_free_trans(journal, trans, false);
2130                 }
2131         } else {
2132                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
2133                                 trans_node);
2134                 if (trans->data_cnt)
2135                         jbd_journal_cp_trans(journal, trans);
2136
2137         }
2138 Finish:
2139         if (rc != EOK) {
2140                 journal->last = last;
2141                 jbd_journal_free_trans(journal, trans, true);
2142         }
2143         return rc;
2144 }
2145
2146 /**@brief  Commit one transaction on transaction queue
2147  *         to the journal.
2148  * @param  journal current journal session.*/
2149 void jbd_journal_commit_one(struct jbd_journal *journal)
2150 {
2151         struct jbd_trans *trans;
2152
2153         if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
2154                 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
2155                 jbd_journal_commit_trans(journal, trans);
2156         }
2157 }
2158
2159 /**@brief  Commit all the transactions on transaction queue
2160  *         to the journal.
2161  * @param  journal current journal session.*/
2162 void jbd_journal_commit_all(struct jbd_journal *journal)
2163 {
2164         while (!TAILQ_EMPTY(&journal->trans_queue)) {
2165                 jbd_journal_commit_one(journal);
2166         }
2167 }
2168
2169 /**
2170  * @}
2171  */