ext4_journal: add trans parameter to jbd_trans_get_access routine.
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32c.h"
46 #include "ext4_debug.h"
47 #include "tree.h"
48
49 #include <string.h>
50 #include <stdlib.h>
51
52 /**@brief  Revoke entry during journal replay.*/
53 struct revoke_entry {
54         /**@brief  Block number not to be replayed.*/
55         ext4_fsblk_t block;
56
57         /**@brief  For any transaction id smaller
58          *         than trans_id, records of @block
59          *         in those transactions should not
60          *         be replayed.*/
61         uint32_t trans_id;
62
63         /**@brief  Revoke tree node.*/
64         RB_ENTRY(revoke_entry) revoke_node;
65 };
66
67 /**@brief  Valid journal replay information.*/
68 struct recover_info {
69         /**@brief  Starting transaction id.*/
70         uint32_t start_trans_id;
71
72         /**@brief  Ending transaction id.*/
73         uint32_t last_trans_id;
74
75         /**@brief  Used as internal argument.*/
76         uint32_t this_trans_id;
77
78         /**@brief  RB-Tree storing revoke entries.*/
79         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
80 };
81
82 /**@brief  Journal replay internal arguments.*/
83 struct replay_arg {
84         /**@brief  Journal replay information.*/
85         struct recover_info *info;
86
87         /**@brief  Current block we are on.*/
88         uint32_t *this_block;
89
90         /**@brief  Current trans_id we are on.*/
91         uint32_t this_trans_id;
92 };
93
94 static int
95 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
96 {
97         if (a->block > b->block)
98                 return 1;
99         else if (a->block < b->block)
100                 return -1;
101         return 0;
102 }
103
104 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
105                      jbd_revoke_entry_cmp, static inline)
106
107 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
108 #define jbd_free_revoke_entry(addr) free(addr)
109
110 /**@brief  Write jbd superblock to disk.
111  * @param  jbd_fs jbd filesystem
112  * @param  s jbd superblock
113  * @return standard error code*/
114 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
115 {
116         int rc;
117         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
118         uint64_t offset;
119         ext4_fsblk_t fblock;
120         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
121         if (rc != EOK)
122                 return rc;
123
124         offset = fblock * ext4_sb_get_block_size(&fs->sb);
125         return ext4_block_writebytes(fs->bdev, offset, s,
126                                      EXT4_SUPERBLOCK_SIZE);
127 }
128
129 /**@brief  Read jbd superblock from disk.
130  * @param  jbd_fs jbd filesystem
131  * @param  s jbd superblock
132  * @return standard error code*/
133 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
134 {
135         int rc;
136         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
137         uint64_t offset;
138         ext4_fsblk_t fblock;
139         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
140         if (rc != EOK)
141                 return rc;
142
143         offset = fblock * ext4_sb_get_block_size(&fs->sb);
144         return ext4_block_readbytes(fs->bdev, offset, s,
145                                     EXT4_SUPERBLOCK_SIZE);
146 }
147
148 /**@brief  Verify jbd superblock.
149  * @param  sb jbd superblock
150  * @return true if jbd superblock is valid */
151 static bool jbd_verify_sb(struct jbd_sb *sb)
152 {
153         struct jbd_bhdr *header = &sb->header;
154         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
155                 return false;
156
157         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
158             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
159                 return false;
160
161         return true;
162 }
163
164 /**@brief  Write back dirty jbd superblock to disk.
165  * @param  jbd_fs jbd filesystem
166  * @return standard error code*/
167 static int jbd_write_sb(struct jbd_fs *jbd_fs)
168 {
169         int rc = EOK;
170         if (jbd_fs->dirty) {
171                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
172                 if (rc != EOK)
173                         return rc;
174
175                 jbd_fs->dirty = false;
176         }
177         return rc;
178 }
179
180 /**@brief  Get reference to jbd filesystem.
181  * @param  fs Filesystem to load journal of
182  * @param  jbd_fs jbd filesystem
183  * @return standard error code*/
184 int jbd_get_fs(struct ext4_fs *fs,
185                struct jbd_fs *jbd_fs)
186 {
187         int rc;
188         uint32_t journal_ino;
189
190         memset(jbd_fs, 0, sizeof(struct jbd_fs));
191         /* See if there is journal inode on this filesystem.*/
192         /* FIXME: detection on existance ofbkejournal bdev is
193          *        missing.*/
194         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
195
196         rc = ext4_fs_get_inode_ref(fs,
197                                    journal_ino,
198                                    &jbd_fs->inode_ref);
199         if (rc != EOK) {
200                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
201                 return rc;
202         }
203         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
204         if (rc != EOK) {
205                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
206                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
207                 return rc;
208         }
209         if (!jbd_verify_sb(&jbd_fs->sb)) {
210                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
211                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
212                 rc = EIO;
213         }
214
215         return rc;
216 }
217
218 /**@brief  Put reference of jbd filesystem.
219  * @param  jbd_fs jbd filesystem
220  * @return standard error code*/
221 int jbd_put_fs(struct jbd_fs *jbd_fs)
222 {
223         int rc = EOK;
224         rc = jbd_write_sb(jbd_fs);
225
226         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
227         return rc;
228 }
229
230 /**@brief  Data block lookup helper.
231  * @param  jbd_fs jbd filesystem
232  * @param  iblock block index
233  * @param  fblock logical block address
234  * @return standard error code*/
235 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
236                    ext4_lblk_t iblock,
237                    ext4_fsblk_t *fblock)
238 {
239         int rc = ext4_fs_get_inode_dblk_idx(
240                         &jbd_fs->inode_ref,
241                         iblock,
242                         fblock,
243                         false);
244         return rc;
245 }
246
247 /**@brief   jbd block get function (through cache).
248  * @param   jbd_fs jbd filesystem
249  * @param   block block descriptor
250  * @param   fblock jbd logical block address
251  * @return  standard error code*/
252 static int jbd_block_get(struct jbd_fs *jbd_fs,
253                   struct ext4_block *block,
254                   ext4_fsblk_t fblock)
255 {
256         /* TODO: journal device. */
257         int rc;
258         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
259
260         /* Lookup the logical block address of
261          * fblock.*/
262         rc = jbd_inode_bmap(jbd_fs, iblock,
263                             &fblock);
264         if (rc != EOK)
265                 return rc;
266
267         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
268         rc = ext4_block_get(bdev, block, fblock);
269
270         /* If succeeded, mark buffer as BC_FLUSH to indicate
271          * that data should be written to disk immediately.*/
272         if (rc == EOK)
273                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
274
275         return rc;
276 }
277
278 /**@brief   jbd block get function (through cache, don't read).
279  * @param   jbd_fs jbd filesystem
280  * @param   block block descriptor
281  * @param   fblock jbd logical block address
282  * @return  standard error code*/
283 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
284                          struct ext4_block *block,
285                          ext4_fsblk_t fblock)
286 {
287         /* TODO: journal device. */
288         int rc;
289         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
290         rc = jbd_inode_bmap(jbd_fs, iblock,
291                             &fblock);
292         if (rc != EOK)
293                 return rc;
294
295         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
296         rc = ext4_block_get_noread(bdev, block, fblock);
297         if (rc == EOK)
298                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
299
300         return rc;
301 }
302
303 /**@brief   jbd block set procedure (through cache).
304  * @param   jbd_fs jbd filesystem
305  * @param   block block descriptor
306  * @return  standard error code*/
307 static int jbd_block_set(struct jbd_fs *jbd_fs,
308                   struct ext4_block *block)
309 {
310         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
311                               block);
312 }
313
314 /**@brief  helper functions to calculate
315  *         block tag size, not including UUID part.
316  * @param  jbd_fs jbd filesystem
317  * @return tag size in bytes*/
318 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
319 {
320         int size;
321
322         /* It is very easy to deal with the case which
323          * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
324         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
325                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
326                 return sizeof(struct jbd_block_tag3);
327
328         size = sizeof(struct jbd_block_tag);
329
330         /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
331          * add 2 bytes to size.*/
332         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
333                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
334                 size += sizeof(uint16_t);
335
336         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
337                                      JBD_FEATURE_INCOMPAT_64BIT))
338                 return size;
339
340         /* If block number is 4 bytes in size,
341          * minus 4 bytes from size */
342         return size - sizeof(uint32_t);
343 }
344
345 /**@brief  Tag information. */
346 struct tag_info {
347         /**@brief  Tag size in bytes, including UUID part.*/
348         int tag_bytes;
349
350         /**@brief  block number stored in this tag.*/
351         ext4_fsblk_t block;
352
353         /**@brief  whether UUID part exists or not.*/
354         bool uuid_exist;
355
356         /**@brief  UUID content if UUID part exists.*/
357         uint8_t uuid[UUID_SIZE];
358
359         /**@brief  Is this the last tag? */
360         bool last_tag;
361 };
362
363 /**@brief  Extract information from a block tag.
364  * @param  __tag pointer to the block tag
365  * @param  tag_bytes block tag size of this jbd filesystem
366  * @param  remaining size in buffer containing the block tag
367  * @param  tag_info information of this tag.
368  * @return  EOK when succeed, otherwise return EINVAL.*/
369 static int
370 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
371                       void *__tag,
372                       int tag_bytes,
373                       int32_t remain_buf_size,
374                       struct tag_info *tag_info)
375 {
376         char *uuid_start;
377         tag_info->tag_bytes = tag_bytes;
378         tag_info->uuid_exist = false;
379         tag_info->last_tag = false;
380
381         /* See whether it is possible to hold a valid block tag.*/
382         if (remain_buf_size - tag_bytes < 0)
383                 return EINVAL;
384
385         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
386                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
387                 struct jbd_block_tag3 *tag = __tag;
388                 tag_info->block = jbd_get32(tag, blocknr);
389                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
390                                              JBD_FEATURE_INCOMPAT_64BIT))
391                          tag_info->block |=
392                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
393
394                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
395                         tag_info->block = 0;
396
397                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
398                         /* See whether it is possible to hold UUID part.*/
399                         if (remain_buf_size - tag_bytes < UUID_SIZE)
400                                 return EINVAL;
401
402                         uuid_start = (char *)tag + tag_bytes;
403                         tag_info->uuid_exist = true;
404                         tag_info->tag_bytes += UUID_SIZE;
405                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
406                 }
407
408                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
409                         tag_info->last_tag = true;
410
411         } else {
412                 struct jbd_block_tag *tag = __tag;
413                 tag_info->block = jbd_get32(tag, blocknr);
414                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
415                                              JBD_FEATURE_INCOMPAT_64BIT))
416                          tag_info->block |=
417                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
418
419                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
420                         tag_info->block = 0;
421
422                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
423                         /* See whether it is possible to hold UUID part.*/
424                         if (remain_buf_size - tag_bytes < UUID_SIZE)
425                                 return EINVAL;
426
427                         uuid_start = (char *)tag + tag_bytes;
428                         tag_info->uuid_exist = true;
429                         tag_info->tag_bytes += UUID_SIZE;
430                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
431                 }
432
433                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
434                         tag_info->last_tag = true;
435
436         }
437         return EOK;
438 }
439
440 /**@brief  Write information to a block tag.
441  * @param  __tag pointer to the block tag
442  * @param  remaining size in buffer containing the block tag
443  * @param  tag_info information of this tag.
444  * @return  EOK when succeed, otherwise return EINVAL.*/
445 static int
446 jbd_write_block_tag(struct jbd_fs *jbd_fs,
447                     void *__tag,
448                     int32_t remain_buf_size,
449                     struct tag_info *tag_info)
450 {
451         char *uuid_start;
452         int tag_bytes = jbd_tag_bytes(jbd_fs);
453
454         tag_info->tag_bytes = tag_bytes;
455
456         /* See whether it is possible to hold a valid block tag.*/
457         if (remain_buf_size - tag_bytes < 0)
458                 return EINVAL;
459
460         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
461                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
462                 struct jbd_block_tag3 *tag = __tag;
463                 memset(tag, 0, sizeof(struct jbd_block_tag3));
464                 jbd_set32(tag, blocknr, tag_info->block);
465                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
466                                              JBD_FEATURE_INCOMPAT_64BIT))
467                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
468
469                 if (tag_info->uuid_exist) {
470                         /* See whether it is possible to hold UUID part.*/
471                         if (remain_buf_size - tag_bytes < UUID_SIZE)
472                                 return EINVAL;
473
474                         uuid_start = (char *)tag + tag_bytes;
475                         tag_info->tag_bytes += UUID_SIZE;
476                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
477                 } else
478                         jbd_set32(tag, flags,
479                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
480
481                 if (tag_info->last_tag)
482                         jbd_set32(tag, flags,
483                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
484
485         } else {
486                 struct jbd_block_tag *tag = __tag;
487                 memset(tag, 0, sizeof(struct jbd_block_tag));
488                 jbd_set32(tag, blocknr, tag_info->block);
489                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
490                                              JBD_FEATURE_INCOMPAT_64BIT))
491                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
492
493                 if (tag_info->uuid_exist) {
494                         /* See whether it is possible to hold UUID part.*/
495                         if (remain_buf_size - tag_bytes < UUID_SIZE)
496                                 return EINVAL;
497
498                         uuid_start = (char *)tag + tag_bytes;
499                         tag_info->tag_bytes += UUID_SIZE;
500                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
501                 } else
502                         jbd_set16(tag, flags,
503                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
504
505                 if (tag_info->last_tag)
506                         jbd_set16(tag, flags,
507                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
508
509         }
510         return EOK;
511 }
512
513 /**@brief  Iterate all block tags in a block.
514  * @param  jbd_fs jbd filesystem
515  * @param  __tag_start pointer to the block
516  * @param  tag_tbl_size size of the block
517  * @param  func callback routine to indicate that
518  *         a block tag is found
519  * @param  arg additional argument to be passed to func */
520 static void
521 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
522                         void *__tag_start,
523                         int32_t tag_tbl_size,
524                         void (*func)(struct jbd_fs * jbd_fs,
525                                         ext4_fsblk_t block,
526                                         uint8_t *uuid,
527                                         void *arg),
528                         void *arg)
529 {
530         char *tag_start, *tag_ptr;
531         int tag_bytes = jbd_tag_bytes(jbd_fs);
532         tag_start = __tag_start;
533         tag_ptr = tag_start;
534
535         /* Cut off the size of block tail storing checksum. */
536         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
537                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
538             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
539                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
540                 tag_tbl_size -= sizeof(struct jbd_block_tail);
541
542         while (tag_tbl_size) {
543                 struct tag_info tag_info;
544                 int rc = jbd_extract_block_tag(jbd_fs,
545                                       tag_ptr,
546                                       tag_bytes,
547                                       tag_tbl_size,
548                                       &tag_info);
549                 if (rc != EOK)
550                         break;
551
552                 if (func)
553                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
554
555                 /* Stop the iteration when we reach the last tag. */
556                 if (tag_info.last_tag)
557                         break;
558
559                 tag_ptr += tag_info.tag_bytes;
560                 tag_tbl_size -= tag_info.tag_bytes;
561         }
562 }
563
564 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
565                                    ext4_fsblk_t block,
566                                    uint8_t *uuid,
567                                    void *arg)
568 {
569         uint32_t *iblock = arg;
570         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
571         (*iblock)++;
572         (void)jbd_fs;
573         (void)uuid;
574         return;
575 }
576
577 static struct revoke_entry *
578 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
579 {
580         struct revoke_entry tmp = {
581                 .block = block
582         };
583
584         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
585 }
586
587 /**@brief  Replay a block in a transaction.
588  * @param  jbd_fs jbd filesystem
589  * @param  block  block address to be replayed.*/
590 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
591                                   ext4_fsblk_t block,
592                                   uint8_t *uuid __unused,
593                                   void *__arg)
594 {
595         int r;
596         struct replay_arg *arg = __arg;
597         struct recover_info *info = arg->info;
598         uint32_t *this_block = arg->this_block;
599         struct revoke_entry *revoke_entry;
600         struct ext4_block journal_block, ext4_block;
601         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
602
603         (*this_block)++;
604
605         /* We replay this block only if the current transaction id
606          * is equal or greater than that in revoke entry.*/
607         revoke_entry = jbd_revoke_entry_lookup(info, block);
608         if (revoke_entry &&
609             arg->this_trans_id < revoke_entry->trans_id)
610                 return;
611
612         ext4_dbg(DEBUG_JBD,
613                  "Replaying block in block_tag: %" PRIu64 "\n",
614                  block);
615
616         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
617         if (r != EOK)
618                 return;
619
620         /* We need special treatment for ext4 superblock. */
621         if (block) {
622                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
623                 if (r != EOK) {
624                         jbd_block_set(jbd_fs, &journal_block);
625                         return;
626                 }
627
628                 memcpy(ext4_block.data,
629                         journal_block.data,
630                         jbd_get32(&jbd_fs->sb, blocksize));
631
632                 ext4_bcache_set_dirty(ext4_block.buf);
633                 ext4_block_set(fs->bdev, &ext4_block);
634         } else {
635                 uint16_t mount_count, state;
636                 mount_count = ext4_get16(&fs->sb, mount_count);
637                 state = ext4_get16(&fs->sb, state);
638
639                 memcpy(&fs->sb,
640                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
641                         EXT4_SUPERBLOCK_SIZE);
642
643                 /* Mark system as mounted */
644                 ext4_set16(&fs->sb, state, state);
645                 r = ext4_sb_write(fs->bdev, &fs->sb);
646                 if (r != EOK)
647                         return;
648
649                 /*Update mount count*/
650                 ext4_set16(&fs->sb, mount_count, mount_count);
651         }
652
653         jbd_block_set(jbd_fs, &journal_block);
654         
655         return;
656 }
657
658 /**@brief  Add block address to revoke tree, along with
659  *         its transaction id.
660  * @param  info  journal replay info
661  * @param  block  block address to be replayed.*/
662 static void jbd_add_revoke_block_tags(struct recover_info *info,
663                                       ext4_fsblk_t block)
664 {
665         struct revoke_entry *revoke_entry;
666
667         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
668         /* If the revoke entry with respect to the block address
669          * exists already, update its transaction id.*/
670         revoke_entry = jbd_revoke_entry_lookup(info, block);
671         if (revoke_entry) {
672                 revoke_entry->trans_id = info->this_trans_id;
673                 return;
674         }
675
676         revoke_entry = jbd_alloc_revoke_entry();
677         ext4_assert(revoke_entry);
678         revoke_entry->block = block;
679         revoke_entry->trans_id = info->this_trans_id;
680         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
681
682         return;
683 }
684
685 static void jbd_destroy_revoke_tree(struct recover_info *info)
686 {
687         while (!RB_EMPTY(&info->revoke_root)) {
688                 struct revoke_entry *revoke_entry =
689                         RB_MIN(jbd_revoke, &info->revoke_root);
690                 ext4_assert(revoke_entry);
691                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
692                 jbd_free_revoke_entry(revoke_entry);
693         }
694 }
695
696 /* Make sure we wrap around the log correctly! */
697 #define wrap(sb, var)                                           \
698 do {                                                                    \
699         if (var >= jbd_get32((sb), maxlen))                                     \
700                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
701 } while (0)
702
703 #define ACTION_SCAN 0
704 #define ACTION_REVOKE 1
705 #define ACTION_RECOVER 2
706
707 /**@brief  Add entries in a revoke block to revoke tree.
708  * @param  jbd_fs jbd filesystem
709  * @param  header revoke block header
710  * @param  recover_info  journal replay info*/
711 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
712                                   struct jbd_bhdr *header,
713                                   struct recover_info *info)
714 {
715         char *blocks_entry;
716         struct jbd_revoke_header *revoke_hdr =
717                 (struct jbd_revoke_header *)header;
718         uint32_t i, nr_entries, record_len = 4;
719
720         /* If we are working on a 64bit jbd filesystem, */
721         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
722                                      JBD_FEATURE_INCOMPAT_64BIT))
723                 record_len = 8;
724
725         nr_entries = (jbd_get32(revoke_hdr, count) -
726                         sizeof(struct jbd_revoke_header)) /
727                         record_len;
728
729         blocks_entry = (char *)(revoke_hdr + 1);
730
731         for (i = 0;i < nr_entries;i++) {
732                 if (record_len == 8) {
733                         uint64_t *blocks =
734                                 (uint64_t *)blocks_entry;
735                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
736                 } else {
737                         uint32_t *blocks =
738                                 (uint32_t *)blocks_entry;
739                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
740                 }
741                 blocks_entry += record_len;
742         }
743 }
744
745 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
746                                        struct jbd_bhdr *header,
747                                        uint32_t *iblock)
748 {
749         jbd_iterate_block_table(jbd_fs,
750                                 header + 1,
751                                 jbd_get32(&jbd_fs->sb, blocksize) -
752                                         sizeof(struct jbd_bhdr),
753                                 jbd_display_block_tags,
754                                 iblock);
755 }
756
757 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
758                                         struct jbd_bhdr *header,
759                                         struct replay_arg *arg)
760 {
761         jbd_iterate_block_table(jbd_fs,
762                                 header + 1,
763                                 jbd_get32(&jbd_fs->sb, blocksize) -
764                                         sizeof(struct jbd_bhdr),
765                                 jbd_replay_block_tags,
766                                 arg);
767 }
768
769 /**@brief  The core routine of journal replay.
770  * @param  jbd_fs jbd filesystem
771  * @param  recover_info  journal replay info
772  * @param  action action needed to be taken
773  * @return standard error code*/
774 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
775                            struct recover_info *info,
776                            int action)
777 {
778         int r = EOK;
779         bool log_end = false;
780         struct jbd_sb *sb = &jbd_fs->sb;
781         uint32_t start_trans_id, this_trans_id;
782         uint32_t start_block, this_block;
783
784         /* We start iterating valid blocks in the whole journal.*/
785         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
786         start_block = this_block = jbd_get32(sb, start);
787
788         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
789                             start_trans_id);
790
791         while (!log_end) {
792                 struct ext4_block block;
793                 struct jbd_bhdr *header;
794                 /* If we are not scanning for the last
795                  * valid transaction in the journal,
796                  * we will stop when we reach the end of
797                  * the journal.*/
798                 if (action != ACTION_SCAN)
799                         if (this_trans_id > info->last_trans_id) {
800                                 log_end = true;
801                                 continue;
802                         }
803
804                 r = jbd_block_get(jbd_fs, &block, this_block);
805                 if (r != EOK)
806                         break;
807
808                 header = (struct jbd_bhdr *)block.data;
809                 /* This block does not have a valid magic number,
810                  * so we have reached the end of the journal.*/
811                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
812                         jbd_block_set(jbd_fs, &block);
813                         log_end = true;
814                         continue;
815                 }
816
817                 /* If the transaction id we found is not expected,
818                  * we may have reached the end of the journal.
819                  *
820                  * If we are not scanning the journal, something
821                  * bad might have taken place. :-( */
822                 if (jbd_get32(header, sequence) != this_trans_id) {
823                         if (action != ACTION_SCAN)
824                                 r = EIO;
825
826                         jbd_block_set(jbd_fs, &block);
827                         log_end = true;
828                         continue;
829                 }
830
831                 switch (jbd_get32(header, blocktype)) {
832                 case JBD_DESCRIPTOR_BLOCK:
833                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
834                                             "trans_id: %" PRIu32"\n",
835                                             this_block, this_trans_id);
836                         if (action == ACTION_RECOVER) {
837                                 struct replay_arg replay_arg;
838                                 replay_arg.info = info;
839                                 replay_arg.this_block = &this_block;
840                                 replay_arg.this_trans_id = this_trans_id;
841
842                                 jbd_replay_descriptor_block(jbd_fs,
843                                                 header, &replay_arg);
844                         } else
845                                 jbd_debug_descriptor_block(jbd_fs,
846                                                 header, &this_block);
847
848                         break;
849                 case JBD_COMMIT_BLOCK:
850                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
851                                             "trans_id: %" PRIu32"\n",
852                                             this_block, this_trans_id);
853                         /* This is the end of a transaction,
854                          * we may now proceed to the next transaction.
855                          */
856                         this_trans_id++;
857                         break;
858                 case JBD_REVOKE_BLOCK:
859                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
860                                             "trans_id: %" PRIu32"\n",
861                                             this_block, this_trans_id);
862                         if (action == ACTION_REVOKE) {
863                                 info->this_trans_id = this_trans_id;
864                                 jbd_build_revoke_tree(jbd_fs,
865                                                 header, info);
866                         }
867                         break;
868                 default:
869                         log_end = true;
870                         break;
871                 }
872                 jbd_block_set(jbd_fs, &block);
873                 this_block++;
874                 wrap(sb, this_block);
875                 if (this_block == start_block)
876                         log_end = true;
877
878         }
879         ext4_dbg(DEBUG_JBD, "End of journal.\n");
880         if (r == EOK && action == ACTION_SCAN) {
881                 /* We have finished scanning the journal. */
882                 info->start_trans_id = start_trans_id;
883                 if (this_trans_id > start_trans_id)
884                         info->last_trans_id = this_trans_id - 1;
885                 else
886                         info->last_trans_id = this_trans_id;
887         }
888
889         return r;
890 }
891
892 /**@brief  Replay journal.
893  * @param  jbd_fs jbd filesystem
894  * @return standard error code*/
895 int jbd_recover(struct jbd_fs *jbd_fs)
896 {
897         int r;
898         struct recover_info info;
899         struct jbd_sb *sb = &jbd_fs->sb;
900         if (!sb->start)
901                 return EOK;
902
903         RB_INIT(&info.revoke_root);
904
905         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
906         if (r != EOK)
907                 return r;
908
909         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
910         if (r != EOK)
911                 return r;
912
913         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
914         if (r == EOK) {
915                 /* If we successfully replay the journal,
916                  * clear EXT4_FINCOM_RECOVER flag on the
917                  * ext4 superblock, and set the start of
918                  * journal to 0.*/
919                 uint32_t features_incompatible =
920                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
921                                    features_incompatible);
922                 jbd_set32(&jbd_fs->sb, start, 0);
923                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
924                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
925                            features_incompatible,
926                            features_incompatible);
927                 jbd_fs->dirty = true;
928                 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
929                                   &jbd_fs->inode_ref.fs->sb);
930         }
931         jbd_destroy_revoke_tree(&info);
932         return r;
933 }
934
935 static void jbd_journal_write_sb(struct jbd_journal *journal)
936 {
937         struct jbd_fs *jbd_fs = journal->jbd_fs;
938         jbd_set32(&jbd_fs->sb, start, journal->start);
939         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
940         jbd_fs->dirty = true;
941 }
942
943 /**@brief  Start accessing the journal.
944  * @param  jbd_fs jbd filesystem
945  * @param  journal current journal session
946  * @return standard error code*/
947 int jbd_journal_start(struct jbd_fs *jbd_fs,
948                       struct jbd_journal *journal)
949 {
950         int r;
951         uint32_t features_incompatible =
952                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
953                                    features_incompatible);
954         features_incompatible |= EXT4_FINCOM_RECOVER;
955         ext4_set32(&jbd_fs->inode_ref.fs->sb,
956                         features_incompatible,
957                         features_incompatible);
958         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
959                         &jbd_fs->inode_ref.fs->sb);
960         if (r != EOK)
961                 return r;
962
963         journal->first = jbd_get32(&jbd_fs->sb, first);
964         journal->start = journal->first;
965         journal->last = journal->first;
966         journal->trans_id = 1;
967         journal->alloc_trans_id = 1;
968
969         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
970
971         TAILQ_INIT(&journal->trans_queue);
972         TAILQ_INIT(&journal->cp_queue);
973         journal->jbd_fs = jbd_fs;
974         jbd_journal_write_sb(journal);
975         return jbd_write_sb(jbd_fs);
976 }
977
978 static void jbd_journal_flush_trans(struct jbd_trans *trans)
979 {
980         struct jbd_buf *jbd_buf, *tmp;
981         struct jbd_journal *journal = trans->journal;
982         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
983         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
984                         tmp) {
985                 struct ext4_block block = jbd_buf->block;
986                 ext4_block_flush_buf(fs->bdev, block.buf);
987         }
988 }
989
990 static void
991 jbd_journal_skip_pure_revoke(struct jbd_journal *journal,
992                              struct jbd_trans *trans)
993 {
994         journal->start = trans->start_iblock +
995                 trans->alloc_blocks;
996         wrap(&journal->jbd_fs->sb, journal->start);
997         journal->trans_id = trans->trans_id + 1;
998         jbd_journal_free_trans(journal,
999                         trans, false);
1000         jbd_journal_write_sb(journal);
1001 }
1002
1003 static void jbd_journal_flush_all_trans(struct jbd_journal *journal)
1004 {
1005         struct jbd_trans *trans;
1006         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1007                 if (!trans->data_cnt) {
1008                         TAILQ_REMOVE(&journal->cp_queue,
1009                                         trans,
1010                                         trans_node);
1011                         jbd_journal_skip_pure_revoke(journal, trans);
1012                 } else
1013                         jbd_journal_flush_trans(trans);
1014
1015         }
1016 }
1017
1018 /**@brief  Stop accessing the journal.
1019  * @param  journal current journal session
1020  * @return standard error code*/
1021 int jbd_journal_stop(struct jbd_journal *journal)
1022 {
1023         int r;
1024         struct jbd_fs *jbd_fs = journal->jbd_fs;
1025         uint32_t features_incompatible;
1026
1027         /* Commit all the transactions to the journal.*/
1028         jbd_journal_commit_all(journal);
1029
1030         /* Make sure that journalled content have reached
1031          * the disk.*/
1032         jbd_journal_flush_all_trans(journal);
1033
1034         features_incompatible =
1035                 ext4_get32(&jbd_fs->inode_ref.fs->sb,
1036                            features_incompatible);
1037         features_incompatible &= ~EXT4_FINCOM_RECOVER;
1038         ext4_set32(&jbd_fs->inode_ref.fs->sb,
1039                         features_incompatible,
1040                         features_incompatible);
1041         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1042                         &jbd_fs->inode_ref.fs->sb);
1043         if (r != EOK)
1044                 return r;
1045
1046         journal->start = 0;
1047         journal->trans_id = 0;
1048         jbd_journal_write_sb(journal);
1049         return jbd_write_sb(journal->jbd_fs);
1050 }
1051
1052 /**@brief  Allocate a block in the journal.
1053  * @param  journal current journal session
1054  * @param  trans transaction
1055  * @return allocated block address*/
1056 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1057                                         struct jbd_trans *trans)
1058 {
1059         uint32_t start_block;
1060
1061         start_block = journal->last++;
1062         trans->alloc_blocks++;
1063         wrap(&journal->jbd_fs->sb, journal->last);
1064         
1065         /* If there is no space left, flush all journalled
1066          * blocks to disk first.*/
1067         if (journal->last == journal->start)
1068                 jbd_journal_flush_all_trans(journal);
1069
1070         return start_block;
1071 }
1072
1073 /**@brief  Allocate a new transaction
1074  * @param  journal current journal session
1075  * @return transaction allocated*/
1076 struct jbd_trans *
1077 jbd_journal_new_trans(struct jbd_journal *journal)
1078 {
1079         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1080         if (!trans)
1081                 return NULL;
1082
1083         /* We will assign a trans_id to this transaction,
1084          * once it has been committed.*/
1085         trans->journal = journal;
1086         trans->error = EOK;
1087         return trans;
1088 }
1089
1090 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1091                           struct ext4_buf *buf __unused,
1092                           int res,
1093                           void *arg);
1094
1095 /**@brief  gain access to it before making any modications.
1096  * @param  journal current journal session
1097  * @param  trans transaction
1098  * @param  block descriptor
1099  * @return standard error code.*/
1100 int jbd_trans_get_access(struct jbd_journal *journal,
1101                          struct jbd_trans *trans,
1102                          struct ext4_block *block)
1103 {
1104         int r = EOK;
1105         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1106
1107         /* If the buffer has already been modified, we should
1108          * flush dirty data in this buffer to disk.*/
1109         if (ext4_bcache_test_flag(block->buf, BC_DIRTY) &&
1110             block->buf->end_write == jbd_trans_end_write &&
1111             block->buf->end_write_arg != trans) {
1112                 r = ext4_block_flush_buf(fs->bdev, block->buf);
1113         }
1114         return r;
1115 }
1116
1117 /**@brief  Add block to a transaction and mark it dirty.
1118  * @param  trans transaction
1119  * @param  block block descriptor
1120  * @return standard error code*/
1121 int jbd_trans_set_block_dirty(struct jbd_trans *trans,
1122                               struct ext4_block *block)
1123 {
1124         struct jbd_buf *buf;
1125
1126         buf = calloc(1, sizeof(struct jbd_buf));
1127         if (!buf)
1128                 return ENOMEM;
1129
1130         buf->trans = trans;
1131         buf->block = *block;
1132         ext4_bcache_inc_ref(block->buf);
1133
1134         /* If the content reach the disk, notify us
1135          * so that we may do a checkpoint. */
1136         block->buf->end_write = jbd_trans_end_write;
1137         block->buf->end_write_arg = buf;
1138
1139         trans->data_cnt++;
1140         LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
1141
1142         ext4_bcache_set_dirty(block->buf);
1143         return EOK;
1144 }
1145
1146 /**@brief  Add block to be revoked to a transaction
1147  * @param  trans transaction
1148  * @param  lba logical block address
1149  * @return standard error code*/
1150 int jbd_trans_revoke_block(struct jbd_trans *trans,
1151                            ext4_fsblk_t lba)
1152 {
1153         struct jbd_revoke_rec *rec =
1154                 calloc(1, sizeof(struct jbd_revoke_rec));
1155         if (!rec)
1156                 return ENOMEM;
1157
1158         rec->lba = lba;
1159         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1160         return EOK;
1161 }
1162
1163 /**@brief  Free a transaction
1164  * @param  journal current journal session
1165  * @param  trans transaction
1166  * @param  abort discard all the modifications on the block?
1167  * @return standard error code*/
1168 void jbd_journal_free_trans(struct jbd_journal *journal,
1169                             struct jbd_trans *trans,
1170                             bool abort)
1171 {
1172         struct jbd_buf *jbd_buf, *tmp;
1173         struct jbd_revoke_rec *rec, *tmp2;
1174         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1175         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1176                           tmp) {
1177                 if (abort) {
1178                         jbd_buf->block.buf->end_write = NULL;
1179                         jbd_buf->block.buf->end_write_arg = NULL;
1180                         ext4_bcache_clear_dirty(jbd_buf->block.buf);
1181                         ext4_block_set(fs->bdev, &jbd_buf->block);
1182                 }
1183
1184                 LIST_REMOVE(jbd_buf, buf_node);
1185                 free(jbd_buf);
1186         }
1187         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1188                           tmp2) {
1189                 LIST_REMOVE(rec, revoke_node);
1190                 free(rec);
1191         }
1192
1193         free(trans);
1194 }
1195
1196 /**@brief  Write commit block for a transaction
1197  * @param  trans transaction
1198  * @return standard error code*/
1199 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1200 {
1201         int rc;
1202         struct jbd_commit_header *header;
1203         uint32_t commit_iblock = 0;
1204         struct ext4_block commit_block;
1205         struct jbd_journal *journal = trans->journal;
1206
1207         commit_iblock = jbd_journal_alloc_block(journal, trans);
1208         rc = jbd_block_get_noread(journal->jbd_fs,
1209                         &commit_block, commit_iblock);
1210         if (rc != EOK)
1211                 return rc;
1212
1213         header = (struct jbd_commit_header *)commit_block.data;
1214         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1215         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1216         jbd_set32(&header->header, sequence, trans->trans_id);
1217
1218         ext4_bcache_set_dirty(commit_block.buf);
1219         rc = jbd_block_set(journal->jbd_fs, &commit_block);
1220         if (rc != EOK)
1221                 return rc;
1222
1223         return EOK;
1224 }
1225
1226 /**@brief  Write descriptor block for a transaction
1227  * @param  journal current journal session
1228  * @param  trans transaction
1229  * @return standard error code*/
1230 static int jbd_journal_prepare(struct jbd_journal *journal,
1231                                struct jbd_trans *trans)
1232 {
1233         int rc = EOK, i = 0;
1234         int32_t tag_tbl_size;
1235         uint32_t desc_iblock = 0;
1236         uint32_t data_iblock = 0;
1237         char *tag_start = NULL, *tag_ptr = NULL;
1238         struct jbd_buf *jbd_buf, *tmp;
1239         struct ext4_block desc_block, data_block;
1240         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1241
1242         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node, tmp) {
1243                 struct tag_info tag_info;
1244                 bool uuid_exist = false;
1245                 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1246                                            BC_DIRTY)) {
1247                         /* The buffer has not been modified, just release
1248                          * that jbd_buf. */
1249                         jbd_buf->block.buf->end_write = NULL;
1250                         jbd_buf->block.buf->end_write_arg = NULL;
1251                         ext4_block_set(fs->bdev, &jbd_buf->block);
1252                         LIST_REMOVE(jbd_buf, buf_node);
1253                         free(jbd_buf);
1254                         continue;
1255                 }
1256 again:
1257                 if (!desc_iblock) {
1258                         struct jbd_bhdr *bhdr;
1259                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1260                         rc = jbd_block_get_noread(journal->jbd_fs,
1261                                            &desc_block, desc_iblock);
1262                         if (rc != EOK)
1263                                 break;
1264
1265                         ext4_bcache_set_dirty(desc_block.buf);
1266
1267                         bhdr = (struct jbd_bhdr *)desc_block.data;
1268                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1269                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1270                         jbd_set32(bhdr, sequence, trans->trans_id);
1271
1272                         tag_start = (char *)(bhdr + 1);
1273                         tag_ptr = tag_start;
1274                         uuid_exist = true;
1275                         tag_tbl_size = journal->block_size -
1276                                 sizeof(struct jbd_bhdr);
1277
1278                         if (!trans->start_iblock)
1279                                 trans->start_iblock = desc_iblock;
1280
1281                 }
1282                 tag_info.block = jbd_buf->block.lb_id;
1283                 tag_info.uuid_exist = uuid_exist;
1284                 if (i == trans->data_cnt - 1)
1285                         tag_info.last_tag = true;
1286
1287                 if (uuid_exist)
1288                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1289                                         UUID_SIZE);
1290
1291                 rc = jbd_write_block_tag(journal->jbd_fs,
1292                                 tag_ptr,
1293                                 tag_tbl_size,
1294                                 &tag_info);
1295                 if (rc != EOK) {
1296                         jbd_block_set(journal->jbd_fs, &desc_block);
1297                         desc_iblock = 0;
1298                         goto again;
1299                 }
1300
1301                 data_iblock = jbd_journal_alloc_block(journal, trans);
1302                 rc = jbd_block_get_noread(journal->jbd_fs,
1303                                 &data_block, data_iblock);
1304                 if (rc != EOK)
1305                         break;
1306
1307                 ext4_bcache_set_dirty(data_block.buf);
1308
1309                 memcpy(data_block.data, jbd_buf->block.data,
1310                         journal->block_size);
1311
1312                 rc = jbd_block_set(journal->jbd_fs, &data_block);
1313                 if (rc != EOK)
1314                         break;
1315
1316                 tag_ptr += tag_info.tag_bytes;
1317                 tag_tbl_size -= tag_info.tag_bytes;
1318
1319                 i++;
1320         }
1321         if (rc == EOK && desc_iblock)
1322                 jbd_block_set(journal->jbd_fs, &desc_block);
1323
1324         return rc;
1325 }
1326
1327 /**@brief  Write revoke block for a transaction
1328  * @param  journal current journal session
1329  * @param  trans transaction
1330  * @return standard error code*/
1331 static int
1332 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1333                            struct jbd_trans *trans)
1334 {
1335         int rc = EOK, i = 0;
1336         int32_t tag_tbl_size;
1337         uint32_t desc_iblock = 0;
1338         char *blocks_entry = NULL;
1339         struct jbd_revoke_rec *rec, *tmp;
1340         struct ext4_block desc_block;
1341         struct jbd_revoke_header *header = NULL;
1342         int32_t record_len = 4;
1343
1344         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1345                                      JBD_FEATURE_INCOMPAT_64BIT))
1346                 record_len = 8;
1347
1348         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1349                           tmp) {
1350 again:
1351                 if (!desc_iblock) {
1352                         struct jbd_bhdr *bhdr;
1353                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1354                         rc = jbd_block_get_noread(journal->jbd_fs,
1355                                            &desc_block, desc_iblock);
1356                         if (rc != EOK) {
1357                                 break;
1358                         }
1359
1360                         ext4_bcache_set_dirty(desc_block.buf);
1361
1362                         bhdr = (struct jbd_bhdr *)desc_block.data;
1363                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1364                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1365                         jbd_set32(bhdr, sequence, trans->trans_id);
1366                         
1367                         header = (struct jbd_revoke_header *)bhdr;
1368                         blocks_entry = (char *)(header + 1);
1369                         tag_tbl_size = journal->block_size -
1370                                 sizeof(struct jbd_revoke_header);
1371
1372                         if (!trans->start_iblock)
1373                                 trans->start_iblock = desc_iblock;
1374
1375                 }
1376
1377                 if (tag_tbl_size < record_len) {
1378                         jbd_set32(header, count,
1379                                   journal->block_size - tag_tbl_size);
1380                         jbd_block_set(journal->jbd_fs, &desc_block);
1381                         desc_iblock = 0;
1382                         header = NULL;
1383                         goto again;
1384                 }
1385                 if (record_len == 8) {
1386                         uint64_t *blocks =
1387                                 (uint64_t *)blocks_entry;
1388                         *blocks = to_be64(rec->lba);
1389                 } else {
1390                         uint32_t *blocks =
1391                                 (uint32_t *)blocks_entry;
1392                         *blocks = to_be32(rec->lba);
1393                 }
1394                 blocks_entry += record_len;
1395                 tag_tbl_size -= record_len;
1396
1397                 i++;
1398         }
1399         if (rc == EOK && desc_iblock) {
1400                 if (header != NULL)
1401                         jbd_set32(header, count,
1402                                   journal->block_size - tag_tbl_size);
1403
1404                 jbd_block_set(journal->jbd_fs, &desc_block);
1405         }
1406
1407         return rc;
1408 }
1409
1410 /**@brief  Submit the transaction to transaction queue.
1411  * @param  journal current journal session
1412  * @param  trans transaction*/
1413 void
1414 jbd_journal_submit_trans(struct jbd_journal *journal,
1415                          struct jbd_trans *trans)
1416 {
1417         TAILQ_INSERT_TAIL(&journal->trans_queue,
1418                           trans,
1419                           trans_node);
1420 }
1421
1422 /**@brief  Put references of block descriptors in a transaction.
1423  * @param  journal current journal session
1424  * @param  trans transaction*/
1425 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1426 {
1427         struct jbd_buf *jbd_buf, *tmp;
1428         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1429         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1430                         tmp) {
1431                 struct ext4_block block = jbd_buf->block;
1432                 ext4_block_set(fs->bdev, &block);
1433         }
1434 }
1435
1436 /**@brief  Update the start block of the journal when
1437  *         all the contents in a transaction reach the disk.*/
1438 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1439                           struct ext4_buf *buf,
1440                           int res,
1441                           void *arg)
1442 {
1443         struct jbd_buf *jbd_buf = arg;
1444         struct jbd_trans *trans = jbd_buf->trans;
1445         struct jbd_journal *journal = trans->journal;
1446         bool first_in_queue =
1447                 trans == TAILQ_FIRST(&journal->cp_queue);
1448         if (res != EOK)
1449                 trans->error = res;
1450
1451         LIST_REMOVE(jbd_buf, buf_node);
1452         free(jbd_buf);
1453
1454         /* Clear the end_write and end_write_arg fields. */
1455         buf->end_write = NULL;
1456         buf->end_write_arg = NULL;
1457
1458         trans->written_cnt++;
1459         if (trans->written_cnt == trans->data_cnt) {
1460                 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1461
1462                 if (first_in_queue) {
1463                         journal->start = trans->start_iblock +
1464                                 trans->alloc_blocks;
1465                         wrap(&journal->jbd_fs->sb, journal->start);
1466                         journal->trans_id = trans->trans_id + 1;
1467                 }
1468                 jbd_journal_free_trans(journal, trans, false);
1469
1470                 if (first_in_queue) {
1471                         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1472                                 if (!trans->data_cnt) {
1473                                         TAILQ_REMOVE(&journal->cp_queue,
1474                                                      trans,
1475                                                      trans_node);
1476                                         jbd_journal_skip_pure_revoke(journal,
1477                                                                      trans);
1478                                 } else {
1479                                         journal->start = trans->start_iblock;
1480                                         wrap(&journal->jbd_fs->sb, journal->start);
1481                                         journal->trans_id = trans->trans_id;
1482                                         break;
1483                                 }
1484                         }
1485                         jbd_journal_write_sb(journal);
1486                         jbd_write_sb(journal->jbd_fs);
1487                 }
1488         }
1489 }
1490
1491 /**@brief  Commit a transaction to the journal immediately.
1492  * @param  journal current journal session
1493  * @param  trans transaction
1494  * @return standard error code*/
1495 int jbd_journal_commit_trans(struct jbd_journal *journal,
1496                              struct jbd_trans *trans)
1497 {
1498         int rc = EOK;
1499         uint32_t last = journal->last;
1500
1501         trans->trans_id = journal->alloc_trans_id;
1502         rc = jbd_journal_prepare(journal, trans);
1503         if (rc != EOK)
1504                 goto Finish;
1505
1506         rc = jbd_journal_prepare_revoke(journal, trans);
1507         if (rc != EOK)
1508                 goto Finish;
1509
1510         if (LIST_EMPTY(&trans->buf_list) &&
1511             LIST_EMPTY(&trans->revoke_list)) {
1512                 /* Since there are no entries in both buffer list
1513                  * and revoke entry list, we do not consider trans as
1514                  * complete transaction and just return EOK.*/
1515                 jbd_journal_free_trans(journal, trans, false);
1516                 goto Finish;
1517         }
1518
1519         rc = jbd_trans_write_commit_block(trans);
1520         if (rc != EOK)
1521                 goto Finish;
1522
1523         journal->alloc_trans_id++;
1524         if (TAILQ_EMPTY(&journal->cp_queue)) {
1525                 if (trans->data_cnt) {
1526                         journal->start = trans->start_iblock;
1527                         wrap(&journal->jbd_fs->sb, journal->start);
1528                         journal->trans_id = trans->trans_id;
1529                         jbd_journal_write_sb(journal);
1530                         jbd_write_sb(journal->jbd_fs);
1531                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1532                                         trans_node);
1533                         jbd_journal_cp_trans(journal, trans);
1534                 } else {
1535                         journal->start = trans->start_iblock +
1536                                 trans->alloc_blocks;
1537                         wrap(&journal->jbd_fs->sb, journal->start);
1538                         journal->trans_id = trans->trans_id + 1;
1539                         jbd_journal_write_sb(journal);
1540                         jbd_journal_free_trans(journal, trans, false);
1541                 }
1542         } else {
1543                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1544                                 trans_node);
1545                 if (trans->data_cnt)
1546                         jbd_journal_cp_trans(journal, trans);
1547
1548         }
1549 Finish:
1550         if (rc != EOK) {
1551                 journal->last = last;
1552                 jbd_journal_free_trans(journal, trans, true);
1553         }
1554         return rc;
1555 }
1556
1557 /**@brief  Commit one transaction on transaction queue
1558  *         to the journal.
1559  * @param  journal current journal session.*/
1560 void jbd_journal_commit_one(struct jbd_journal *journal)
1561 {
1562         struct jbd_trans *trans;
1563
1564         if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
1565                 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
1566                 jbd_journal_commit_trans(journal, trans);
1567         }
1568 }
1569
1570 /**@brief  Commit all the transactions on transaction queue
1571  *         to the journal.
1572  * @param  journal current journal session.*/
1573 void jbd_journal_commit_all(struct jbd_journal *journal)
1574 {
1575         while (!TAILQ_EMPTY(&journal->trans_queue)) {
1576                 jbd_journal_commit_one(journal);
1577         }
1578 }
1579
1580 /**
1581  * @}
1582  */