ext4_journal: initialize block tag with 0 before writing to it.
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32c.h"
46 #include "ext4_debug.h"
47 #include "tree.h"
48
49 #include <string.h>
50 #include <stdlib.h>
51
52 /**@brief  Revoke entry during journal replay.*/
53 struct revoke_entry {
54         /**@brief  Block number not to be replayed.*/
55         ext4_fsblk_t block;
56
57         /**@brief  For any transaction id smaller
58          *         than trans_id, records of @block
59          *         in those transactions should not
60          *         be replayed.*/
61         uint32_t trans_id;
62
63         /**@brief  Revoke tree node.*/
64         RB_ENTRY(revoke_entry) revoke_node;
65 };
66
67 /**@brief  Valid journal replay information.*/
68 struct recover_info {
69         /**@brief  Starting transaction id.*/
70         uint32_t start_trans_id;
71
72         /**@brief  Ending transaction id.*/
73         uint32_t last_trans_id;
74
75         /**@brief  Used as internal argument.*/
76         uint32_t this_trans_id;
77
78         /**@brief  RB-Tree storing revoke entries.*/
79         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
80 };
81
82 /**@brief  Journal replay internal arguments.*/
83 struct replay_arg {
84         /**@brief  Journal replay information.*/
85         struct recover_info *info;
86
87         /**@brief  Current block we are on.*/
88         uint32_t *this_block;
89
90         /**@brief  Current trans_id we are on.*/
91         uint32_t this_trans_id;
92 };
93
94 static int
95 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
96 {
97         if (a->block > b->block)
98                 return 1;
99         else if (a->block < b->block)
100                 return -1;
101         return 0;
102 }
103
104 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
105                      jbd_revoke_entry_cmp, static inline)
106
107 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
108 #define jbd_free_revoke_entry(addr) free(addr)
109
110 /**@brief  Write jbd superblock to disk.
111  * @param  jbd_fs jbd filesystem
112  * @param  s jbd superblock
113  * @return standard error code*/
114 static int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
115 {
116         int rc;
117         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
118         uint64_t offset;
119         ext4_fsblk_t fblock;
120         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
121         if (rc != EOK)
122                 return rc;
123
124         offset = fblock * ext4_sb_get_block_size(&fs->sb);
125         return ext4_block_writebytes(fs->bdev, offset, s,
126                                      EXT4_SUPERBLOCK_SIZE);
127 }
128
129 /**@brief  Read jbd superblock from disk.
130  * @param  jbd_fs jbd filesystem
131  * @param  s jbd superblock
132  * @return standard error code*/
133 static int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
134 {
135         int rc;
136         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
137         uint64_t offset;
138         ext4_fsblk_t fblock;
139         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
140         if (rc != EOK)
141                 return rc;
142
143         offset = fblock * ext4_sb_get_block_size(&fs->sb);
144         return ext4_block_readbytes(fs->bdev, offset, s,
145                                     EXT4_SUPERBLOCK_SIZE);
146 }
147
148 /**@brief  Verify jbd superblock.
149  * @param  sb jbd superblock
150  * @return true if jbd superblock is valid */
151 static bool jbd_verify_sb(struct jbd_sb *sb)
152 {
153         struct jbd_bhdr *header = &sb->header;
154         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
155                 return false;
156
157         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
158             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
159                 return false;
160
161         return true;
162 }
163
164 /**@brief  Write back dirty jbd superblock to disk.
165  * @param  jbd_fs jbd filesystem
166  * @return standard error code*/
167 static int jbd_write_sb(struct jbd_fs *jbd_fs)
168 {
169         int rc = EOK;
170         if (jbd_fs->dirty) {
171                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
172                 if (rc != EOK)
173                         return rc;
174
175                 jbd_fs->dirty = false;
176         }
177         return rc;
178 }
179
180 /**@brief  Get reference to jbd filesystem.
181  * @param  fs Filesystem to load journal of
182  * @param  jbd_fs jbd filesystem
183  * @return standard error code*/
184 int jbd_get_fs(struct ext4_fs *fs,
185                struct jbd_fs *jbd_fs)
186 {
187         int rc;
188         uint32_t journal_ino;
189
190         memset(jbd_fs, 0, sizeof(struct jbd_fs));
191         /* See if there is journal inode on this filesystem.*/
192         /* FIXME: detection on existance ofbkejournal bdev is
193          *        missing.*/
194         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
195
196         rc = ext4_fs_get_inode_ref(fs,
197                                    journal_ino,
198                                    &jbd_fs->inode_ref);
199         if (rc != EOK) {
200                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
201                 return rc;
202         }
203         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
204         if (rc != EOK) {
205                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
206                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
207                 return rc;
208         }
209         if (!jbd_verify_sb(&jbd_fs->sb)) {
210                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
211                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
212                 rc = EIO;
213         }
214
215         return rc;
216 }
217
218 /**@brief  Put reference of jbd filesystem.
219  * @param  jbd_fs jbd filesystem
220  * @return standard error code*/
221 int jbd_put_fs(struct jbd_fs *jbd_fs)
222 {
223         int rc = EOK;
224         rc = jbd_write_sb(jbd_fs);
225
226         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
227         return rc;
228 }
229
230 /**@brief  Data block lookup helper.
231  * @param  jbd_fs jbd filesystem
232  * @param  iblock block index
233  * @param  fblock logical block address
234  * @return standard error code*/
235 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
236                    ext4_lblk_t iblock,
237                    ext4_fsblk_t *fblock)
238 {
239         int rc = ext4_fs_get_inode_dblk_idx(
240                         &jbd_fs->inode_ref,
241                         iblock,
242                         fblock,
243                         false);
244         return rc;
245 }
246
247 /**@brief   jbd block get function (through cache).
248  * @param   jbd_fs jbd filesystem
249  * @param   block block descriptor
250  * @param   fblock jbd logical block address
251  * @return  standard error code*/
252 static int jbd_block_get(struct jbd_fs *jbd_fs,
253                   struct ext4_block *block,
254                   ext4_fsblk_t fblock)
255 {
256         /* TODO: journal device. */
257         int rc;
258         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
259
260         /* Lookup the logical block address of
261          * fblock.*/
262         rc = jbd_inode_bmap(jbd_fs, iblock,
263                             &fblock);
264         if (rc != EOK)
265                 return rc;
266
267         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
268         rc = ext4_block_get(bdev, block, fblock);
269
270         /* If succeeded, mark buffer as BC_FLUSH to indicate
271          * that data should be written to disk immediately.*/
272         if (rc == EOK)
273                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
274
275         return rc;
276 }
277
278 /**@brief   jbd block get function (through cache, don't read).
279  * @param   jbd_fs jbd filesystem
280  * @param   block block descriptor
281  * @param   fblock jbd logical block address
282  * @return  standard error code*/
283 static int jbd_block_get_noread(struct jbd_fs *jbd_fs,
284                          struct ext4_block *block,
285                          ext4_fsblk_t fblock)
286 {
287         /* TODO: journal device. */
288         int rc;
289         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
290         rc = jbd_inode_bmap(jbd_fs, iblock,
291                             &fblock);
292         if (rc != EOK)
293                 return rc;
294
295         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
296         rc = ext4_block_get_noread(bdev, block, fblock);
297         if (rc == EOK)
298                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
299
300         return rc;
301 }
302
303 /**@brief   jbd block set procedure (through cache).
304  * @param   jbd_fs jbd filesystem
305  * @param   block block descriptor
306  * @return  standard error code*/
307 static int jbd_block_set(struct jbd_fs *jbd_fs,
308                   struct ext4_block *block)
309 {
310         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
311                               block);
312 }
313
314 /**@brief  helper functions to calculate
315  *         block tag size, not including UUID part.
316  * @param  jbd_fs jbd filesystem
317  * @return tag size in bytes*/
318 static int jbd_tag_bytes(struct jbd_fs *jbd_fs)
319 {
320         int size;
321
322         /* It is very easy to deal with the case which
323          * JBD_FEATURE_INCOMPAT_CSUM_V3 is enabled.*/
324         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
325                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
326                 return sizeof(struct jbd_block_tag3);
327
328         size = sizeof(struct jbd_block_tag);
329
330         /* If JBD_FEATURE_INCOMPAT_CSUM_V2 is enabled,
331          * add 2 bytes to size.*/
332         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
333                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
334                 size += sizeof(uint16_t);
335
336         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
337                                      JBD_FEATURE_INCOMPAT_64BIT))
338                 return size;
339
340         /* If block number is 4 bytes in size,
341          * minus 4 bytes from size */
342         return size - sizeof(uint32_t);
343 }
344
345 /**@brief  Tag information. */
346 struct tag_info {
347         /**@brief  Tag size in bytes, including UUID part.*/
348         int tag_bytes;
349
350         /**@brief  block number stored in this tag.*/
351         ext4_fsblk_t block;
352
353         /**@brief  whether UUID part exists or not.*/
354         bool uuid_exist;
355
356         /**@brief  UUID content if UUID part exists.*/
357         uint8_t uuid[UUID_SIZE];
358
359         /**@brief  Is this the last tag? */
360         bool last_tag;
361 };
362
363 /**@brief  Extract information from a block tag.
364  * @param  __tag pointer to the block tag
365  * @param  tag_bytes block tag size of this jbd filesystem
366  * @param  remaining size in buffer containing the block tag
367  * @param  tag_info information of this tag.
368  * @return  EOK when succeed, otherwise return EINVAL.*/
369 static int
370 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
371                       void *__tag,
372                       int tag_bytes,
373                       int32_t remain_buf_size,
374                       struct tag_info *tag_info)
375 {
376         char *uuid_start;
377         tag_info->tag_bytes = tag_bytes;
378         tag_info->uuid_exist = false;
379         tag_info->last_tag = false;
380
381         /* See whether it is possible to hold a valid block tag.*/
382         if (remain_buf_size - tag_bytes < 0)
383                 return EINVAL;
384
385         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
386                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
387                 struct jbd_block_tag3 *tag = __tag;
388                 tag_info->block = jbd_get32(tag, blocknr);
389                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
390                                              JBD_FEATURE_INCOMPAT_64BIT))
391                          tag_info->block |=
392                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
393
394                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
395                         tag_info->block = 0;
396
397                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
398                         /* See whether it is possible to hold UUID part.*/
399                         if (remain_buf_size - tag_bytes < UUID_SIZE)
400                                 return EINVAL;
401
402                         uuid_start = (char *)tag + tag_bytes;
403                         tag_info->uuid_exist = true;
404                         tag_info->tag_bytes += UUID_SIZE;
405                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
406                 }
407
408                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
409                         tag_info->last_tag = true;
410
411         } else {
412                 struct jbd_block_tag *tag = __tag;
413                 tag_info->block = jbd_get32(tag, blocknr);
414                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
415                                              JBD_FEATURE_INCOMPAT_64BIT))
416                          tag_info->block |=
417                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
418
419                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
420                         tag_info->block = 0;
421
422                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
423                         /* See whether it is possible to hold UUID part.*/
424                         if (remain_buf_size - tag_bytes < UUID_SIZE)
425                                 return EINVAL;
426
427                         uuid_start = (char *)tag + tag_bytes;
428                         tag_info->uuid_exist = true;
429                         tag_info->tag_bytes += UUID_SIZE;
430                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
431                 }
432
433                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
434                         tag_info->last_tag = true;
435
436         }
437         return EOK;
438 }
439
440 /**@brief  Write information to a block tag.
441  * @param  __tag pointer to the block tag
442  * @param  remaining size in buffer containing the block tag
443  * @param  tag_info information of this tag.
444  * @return  EOK when succeed, otherwise return EINVAL.*/
445 static int
446 jbd_write_block_tag(struct jbd_fs *jbd_fs,
447                     void *__tag,
448                     int32_t remain_buf_size,
449                     struct tag_info *tag_info)
450 {
451         char *uuid_start;
452         int tag_bytes = jbd_tag_bytes(jbd_fs);
453
454         tag_info->tag_bytes = tag_bytes;
455
456         /* See whether it is possible to hold a valid block tag.*/
457         if (remain_buf_size - tag_bytes < 0)
458                 return EINVAL;
459
460         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
461                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
462                 struct jbd_block_tag3 *tag = __tag;
463                 memset(tag, 0, sizeof(struct jbd_block_tag3));
464                 jbd_set32(tag, blocknr, tag_info->block);
465                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
466                                              JBD_FEATURE_INCOMPAT_64BIT))
467                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
468
469                 if (tag_info->uuid_exist) {
470                         /* See whether it is possible to hold UUID part.*/
471                         if (remain_buf_size - tag_bytes < UUID_SIZE)
472                                 return EINVAL;
473
474                         uuid_start = (char *)tag + tag_bytes;
475                         tag_info->tag_bytes += UUID_SIZE;
476                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
477                 } else
478                         jbd_set32(tag, flags,
479                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
480
481                 if (tag_info->last_tag)
482                         jbd_set32(tag, flags,
483                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
484
485         } else {
486                 struct jbd_block_tag *tag = __tag;
487                 memset(tag, 0, sizeof(struct jbd_block_tag));
488                 jbd_set32(tag, blocknr, tag_info->block);
489                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
490                                              JBD_FEATURE_INCOMPAT_64BIT))
491                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
492
493                 if (tag_info->uuid_exist) {
494                         /* See whether it is possible to hold UUID part.*/
495                         if (remain_buf_size - tag_bytes < UUID_SIZE)
496                                 return EINVAL;
497
498                         uuid_start = (char *)tag + tag_bytes;
499                         tag_info->tag_bytes += UUID_SIZE;
500                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
501                 } else
502                         jbd_set16(tag, flags,
503                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
504
505                 if (tag_info->last_tag)
506                         jbd_set16(tag, flags,
507                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
508
509         }
510         return EOK;
511 }
512
513 /**@brief  Iterate all block tags in a block.
514  * @param  jbd_fs jbd filesystem
515  * @param  __tag_start pointer to the block
516  * @param  tag_tbl_size size of the block
517  * @param  func callback routine to indicate that
518  *         a block tag is found
519  * @param  arg additional argument to be passed to func */
520 static void
521 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
522                         void *__tag_start,
523                         int32_t tag_tbl_size,
524                         void (*func)(struct jbd_fs * jbd_fs,
525                                         ext4_fsblk_t block,
526                                         uint8_t *uuid,
527                                         void *arg),
528                         void *arg)
529 {
530         char *tag_start, *tag_ptr;
531         int tag_bytes = jbd_tag_bytes(jbd_fs);
532         tag_start = __tag_start;
533         tag_ptr = tag_start;
534
535         /* Cut off the size of block tail storing checksum. */
536         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
537                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
538             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
539                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
540                 tag_tbl_size -= sizeof(struct jbd_block_tail);
541
542         while (tag_tbl_size) {
543                 struct tag_info tag_info;
544                 int rc = jbd_extract_block_tag(jbd_fs,
545                                       tag_ptr,
546                                       tag_bytes,
547                                       tag_tbl_size,
548                                       &tag_info);
549                 if (rc != EOK)
550                         break;
551
552                 if (func)
553                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
554
555                 /* Stop the iteration when we reach the last tag. */
556                 if (tag_info.last_tag)
557                         break;
558
559                 tag_ptr += tag_info.tag_bytes;
560                 tag_tbl_size -= tag_info.tag_bytes;
561         }
562 }
563
564 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
565                                    ext4_fsblk_t block,
566                                    uint8_t *uuid,
567                                    void *arg)
568 {
569         uint32_t *iblock = arg;
570         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
571         (*iblock)++;
572         (void)jbd_fs;
573         (void)uuid;
574         return;
575 }
576
577 static struct revoke_entry *
578 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
579 {
580         struct revoke_entry tmp = {
581                 .block = block
582         };
583
584         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
585 }
586
587 /**@brief  Replay a block in a transaction.
588  * @param  jbd_fs jbd filesystem
589  * @param  block  block address to be replayed.*/
590 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
591                                   ext4_fsblk_t block,
592                                   uint8_t *uuid __unused,
593                                   void *__arg)
594 {
595         int r;
596         struct replay_arg *arg = __arg;
597         struct recover_info *info = arg->info;
598         uint32_t *this_block = arg->this_block;
599         struct revoke_entry *revoke_entry;
600         struct ext4_block journal_block, ext4_block;
601         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
602
603         (*this_block)++;
604
605         /* We replay this block only if the current transaction id
606          * is equal or greater than that in revoke entry.*/
607         revoke_entry = jbd_revoke_entry_lookup(info, block);
608         if (revoke_entry &&
609             arg->this_trans_id < revoke_entry->trans_id)
610                 return;
611
612         ext4_dbg(DEBUG_JBD,
613                  "Replaying block in block_tag: %" PRIu64 "\n",
614                  block);
615
616         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
617         if (r != EOK)
618                 return;
619
620         /* We need special treatment for ext4 superblock. */
621         if (block) {
622                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
623                 if (r != EOK) {
624                         jbd_block_set(jbd_fs, &journal_block);
625                         return;
626                 }
627
628                 memcpy(ext4_block.data,
629                         journal_block.data,
630                         jbd_get32(&jbd_fs->sb, blocksize));
631
632                 ext4_bcache_set_dirty(ext4_block.buf);
633                 ext4_block_set(fs->bdev, &ext4_block);
634         } else {
635                 uint16_t mount_count, state;
636                 mount_count = ext4_get16(&fs->sb, mount_count);
637                 state = ext4_get16(&fs->sb, state);
638
639                 memcpy(&fs->sb,
640                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
641                         EXT4_SUPERBLOCK_SIZE);
642
643                 /* Mark system as mounted */
644                 ext4_set16(&fs->sb, state, state);
645                 r = ext4_sb_write(fs->bdev, &fs->sb);
646                 if (r != EOK)
647                         return;
648
649                 /*Update mount count*/
650                 ext4_set16(&fs->sb, mount_count, mount_count);
651         }
652
653         jbd_block_set(jbd_fs, &journal_block);
654         
655         return;
656 }
657
658 /**@brief  Add block address to revoke tree, along with
659  *         its transaction id.
660  * @param  info  journal replay info
661  * @param  block  block address to be replayed.*/
662 static void jbd_add_revoke_block_tags(struct recover_info *info,
663                                       ext4_fsblk_t block)
664 {
665         struct revoke_entry *revoke_entry;
666
667         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
668         /* If the revoke entry with respect to the block address
669          * exists already, update its transaction id.*/
670         revoke_entry = jbd_revoke_entry_lookup(info, block);
671         if (revoke_entry) {
672                 revoke_entry->trans_id = info->this_trans_id;
673                 return;
674         }
675
676         revoke_entry = jbd_alloc_revoke_entry();
677         ext4_assert(revoke_entry);
678         revoke_entry->block = block;
679         revoke_entry->trans_id = info->this_trans_id;
680         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
681
682         return;
683 }
684
685 static void jbd_destroy_revoke_tree(struct recover_info *info)
686 {
687         while (!RB_EMPTY(&info->revoke_root)) {
688                 struct revoke_entry *revoke_entry =
689                         RB_MIN(jbd_revoke, &info->revoke_root);
690                 ext4_assert(revoke_entry);
691                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
692                 jbd_free_revoke_entry(revoke_entry);
693         }
694 }
695
696 /* Make sure we wrap around the log correctly! */
697 #define wrap(sb, var)                                           \
698 do {                                                                    \
699         if (var >= jbd_get32((sb), maxlen))                                     \
700                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
701 } while (0)
702
703 #define ACTION_SCAN 0
704 #define ACTION_REVOKE 1
705 #define ACTION_RECOVER 2
706
707 /**@brief  Add entries in a revoke block to revoke tree.
708  * @param  jbd_fs jbd filesystem
709  * @param  header revoke block header
710  * @param  recover_info  journal replay info*/
711 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
712                                   struct jbd_bhdr *header,
713                                   struct recover_info *info)
714 {
715         char *blocks_entry;
716         struct jbd_revoke_header *revoke_hdr =
717                 (struct jbd_revoke_header *)header;
718         uint32_t i, nr_entries, record_len = 4;
719
720         /* If we are working on a 64bit jbd filesystem, */
721         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
722                                      JBD_FEATURE_INCOMPAT_64BIT))
723                 record_len = 8;
724
725         nr_entries = (jbd_get32(revoke_hdr, count) -
726                         sizeof(struct jbd_revoke_header)) /
727                         record_len;
728
729         blocks_entry = (char *)(revoke_hdr + 1);
730
731         for (i = 0;i < nr_entries;i++) {
732                 if (record_len == 8) {
733                         uint64_t *blocks =
734                                 (uint64_t *)blocks_entry;
735                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
736                 } else {
737                         uint32_t *blocks =
738                                 (uint32_t *)blocks_entry;
739                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
740                 }
741                 blocks_entry += record_len;
742         }
743 }
744
745 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
746                                        struct jbd_bhdr *header,
747                                        uint32_t *iblock)
748 {
749         jbd_iterate_block_table(jbd_fs,
750                                 header + 1,
751                                 jbd_get32(&jbd_fs->sb, blocksize) -
752                                         sizeof(struct jbd_bhdr),
753                                 jbd_display_block_tags,
754                                 iblock);
755 }
756
757 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
758                                         struct jbd_bhdr *header,
759                                         struct replay_arg *arg)
760 {
761         jbd_iterate_block_table(jbd_fs,
762                                 header + 1,
763                                 jbd_get32(&jbd_fs->sb, blocksize) -
764                                         sizeof(struct jbd_bhdr),
765                                 jbd_replay_block_tags,
766                                 arg);
767 }
768
769 /**@brief  The core routine of journal replay.
770  * @param  jbd_fs jbd filesystem
771  * @param  recover_info  journal replay info
772  * @param  action action needed to be taken
773  * @return standard error code*/
774 static int jbd_iterate_log(struct jbd_fs *jbd_fs,
775                            struct recover_info *info,
776                            int action)
777 {
778         int r = EOK;
779         bool log_end = false;
780         struct jbd_sb *sb = &jbd_fs->sb;
781         uint32_t start_trans_id, this_trans_id;
782         uint32_t start_block, this_block;
783
784         /* We start iterating valid blocks in the whole journal.*/
785         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
786         start_block = this_block = jbd_get32(sb, start);
787
788         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
789                             start_trans_id);
790
791         while (!log_end) {
792                 struct ext4_block block;
793                 struct jbd_bhdr *header;
794                 /* If we are not scanning for the last
795                  * valid transaction in the journal,
796                  * we will stop when we reach the end of
797                  * the journal.*/
798                 if (action != ACTION_SCAN)
799                         if (this_trans_id > info->last_trans_id) {
800                                 log_end = true;
801                                 continue;
802                         }
803
804                 r = jbd_block_get(jbd_fs, &block, this_block);
805                 if (r != EOK)
806                         break;
807
808                 header = (struct jbd_bhdr *)block.data;
809                 /* This block does not have a valid magic number,
810                  * so we have reached the end of the journal.*/
811                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
812                         jbd_block_set(jbd_fs, &block);
813                         log_end = true;
814                         continue;
815                 }
816
817                 /* If the transaction id we found is not expected,
818                  * we may have reached the end of the journal.
819                  *
820                  * If we are not scanning the journal, something
821                  * bad might have taken place. :-( */
822                 if (jbd_get32(header, sequence) != this_trans_id) {
823                         if (action != ACTION_SCAN)
824                                 r = EIO;
825
826                         jbd_block_set(jbd_fs, &block);
827                         log_end = true;
828                         continue;
829                 }
830
831                 switch (jbd_get32(header, blocktype)) {
832                 case JBD_DESCRIPTOR_BLOCK:
833                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
834                                             "trans_id: %" PRIu32"\n",
835                                             this_block, this_trans_id);
836                         if (action == ACTION_RECOVER) {
837                                 struct replay_arg replay_arg;
838                                 replay_arg.info = info;
839                                 replay_arg.this_block = &this_block;
840                                 replay_arg.this_trans_id = this_trans_id;
841
842                                 jbd_replay_descriptor_block(jbd_fs,
843                                                 header, &replay_arg);
844                         } else
845                                 jbd_debug_descriptor_block(jbd_fs,
846                                                 header, &this_block);
847
848                         break;
849                 case JBD_COMMIT_BLOCK:
850                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
851                                             "trans_id: %" PRIu32"\n",
852                                             this_block, this_trans_id);
853                         /* This is the end of a transaction,
854                          * we may now proceed to the next transaction.
855                          */
856                         this_trans_id++;
857                         break;
858                 case JBD_REVOKE_BLOCK:
859                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
860                                             "trans_id: %" PRIu32"\n",
861                                             this_block, this_trans_id);
862                         if (action == ACTION_REVOKE) {
863                                 info->this_trans_id = this_trans_id;
864                                 jbd_build_revoke_tree(jbd_fs,
865                                                 header, info);
866                         }
867                         break;
868                 default:
869                         log_end = true;
870                         break;
871                 }
872                 jbd_block_set(jbd_fs, &block);
873                 this_block++;
874                 wrap(sb, this_block);
875                 if (this_block == start_block)
876                         log_end = true;
877
878         }
879         ext4_dbg(DEBUG_JBD, "End of journal.\n");
880         if (r == EOK && action == ACTION_SCAN) {
881                 /* We have finished scanning the journal. */
882                 info->start_trans_id = start_trans_id;
883                 if (this_trans_id > start_trans_id)
884                         info->last_trans_id = this_trans_id - 1;
885                 else
886                         info->last_trans_id = this_trans_id;
887         }
888
889         return r;
890 }
891
892 /**@brief  Replay journal.
893  * @param  jbd_fs jbd filesystem
894  * @return standard error code*/
895 int jbd_recover(struct jbd_fs *jbd_fs)
896 {
897         int r;
898         struct recover_info info;
899         struct jbd_sb *sb = &jbd_fs->sb;
900         if (!sb->start)
901                 return EOK;
902
903         RB_INIT(&info.revoke_root);
904
905         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
906         if (r != EOK)
907                 return r;
908
909         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
910         if (r != EOK)
911                 return r;
912
913         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
914         if (r == EOK) {
915                 /* If we successfully replay the journal,
916                  * clear EXT4_FINCOM_RECOVER flag on the
917                  * ext4 superblock, and set the start of
918                  * journal to 0.*/
919                 uint32_t features_incompatible =
920                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
921                                    features_incompatible);
922                 jbd_set32(&jbd_fs->sb, start, 0);
923                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
924                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
925                            features_incompatible,
926                            features_incompatible);
927                 jbd_fs->dirty = true;
928                 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
929                                   &jbd_fs->inode_ref.fs->sb);
930         }
931         jbd_destroy_revoke_tree(&info);
932         return r;
933 }
934
935 static void jbd_journal_write_sb(struct jbd_journal *journal)
936 {
937         struct jbd_fs *jbd_fs = journal->jbd_fs;
938         jbd_set32(&jbd_fs->sb, start, journal->start);
939         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
940         jbd_fs->dirty = true;
941 }
942
943 /**@brief  Start accessing the journal.
944  * @param  jbd_fs jbd filesystem
945  * @param  journal current journal session
946  * @return standard error code*/
947 int jbd_journal_start(struct jbd_fs *jbd_fs,
948                       struct jbd_journal *journal)
949 {
950         int r;
951         uint32_t features_incompatible =
952                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
953                                    features_incompatible);
954         features_incompatible |= EXT4_FINCOM_RECOVER;
955         ext4_set32(&jbd_fs->inode_ref.fs->sb,
956                         features_incompatible,
957                         features_incompatible);
958         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
959                         &jbd_fs->inode_ref.fs->sb);
960         if (r != EOK)
961                 return r;
962
963         journal->first = jbd_get32(&jbd_fs->sb, first);
964         journal->start = journal->first;
965         journal->last = journal->first;
966         journal->trans_id = 1;
967         journal->alloc_trans_id = 1;
968
969         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
970
971         TAILQ_INIT(&journal->trans_queue);
972         TAILQ_INIT(&journal->cp_queue);
973         journal->jbd_fs = jbd_fs;
974         jbd_journal_write_sb(journal);
975         return jbd_write_sb(jbd_fs);
976 }
977
978 /**@brief  Stop accessing the journal.
979  * @param  journal current journal session
980  * @return standard error code*/
981 int jbd_journal_stop(struct jbd_journal *journal)
982 {
983         int r;
984         struct jbd_fs *jbd_fs = journal->jbd_fs;
985         uint32_t features_incompatible;
986
987         /* Commit all the transactions to the journal.*/
988         jbd_journal_commit_all(journal);
989         /* Make sure that journalled content have reached
990          * the disk.*/
991         ext4_block_cache_flush(jbd_fs->inode_ref.fs->bdev);
992
993         features_incompatible =
994                 ext4_get32(&jbd_fs->inode_ref.fs->sb,
995                            features_incompatible);
996         features_incompatible &= ~EXT4_FINCOM_RECOVER;
997         ext4_set32(&jbd_fs->inode_ref.fs->sb,
998                         features_incompatible,
999                         features_incompatible);
1000         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
1001                         &jbd_fs->inode_ref.fs->sb);
1002         if (r != EOK)
1003                 return r;
1004
1005         journal->start = 0;
1006         journal->trans_id = 0;
1007         jbd_journal_write_sb(journal);
1008         return jbd_write_sb(journal->jbd_fs);
1009 }
1010
1011 /**@brief  Allocate a block in the journal.
1012  * @param  journal current journal session
1013  * @param  trans transaction
1014  * @return allocated block address*/
1015 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
1016                                         struct jbd_trans *trans)
1017 {
1018         uint32_t start_block;
1019
1020         start_block = journal->last++;
1021         trans->alloc_blocks++;
1022         wrap(&journal->jbd_fs->sb, journal->last);
1023         
1024         /* If there is no space left, flush all journalled
1025          * blocks to disk first.*/
1026         if (journal->last == journal->start)
1027                 ext4_block_cache_flush(journal->jbd_fs->inode_ref.fs->bdev);
1028
1029         return start_block;
1030 }
1031
1032 /**@brief  Allocate a new transaction
1033  * @param  journal current journal session
1034  * @return transaction allocated*/
1035 struct jbd_trans *
1036 jbd_journal_new_trans(struct jbd_journal *journal)
1037 {
1038         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
1039         if (!trans)
1040                 return NULL;
1041
1042         /* We will assign a trans_id to this transaction,
1043          * once it has been committed.*/
1044         trans->journal = journal;
1045         trans->error = EOK;
1046         return trans;
1047 }
1048
1049 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1050                           struct ext4_buf *buf __unused,
1051                           int res,
1052                           void *arg);
1053
1054 /**@brief  Add block to a transaction and gain
1055  *         access to it before making any modications.
1056  * @param  trans transaction
1057  * @param  block block descriptor
1058  * @return standard error code*/
1059 int jbd_trans_add_block(struct jbd_trans *trans,
1060                         struct ext4_block *block)
1061 {
1062         struct jbd_buf *buf;
1063         struct ext4_fs *fs =
1064                 trans->journal->jbd_fs->inode_ref.fs;
1065
1066         /* If the buffer has already been modified, we should
1067          * flush dirty data in this buffer to disk.*/
1068         if (ext4_bcache_test_flag(block->buf, BC_DIRTY)) {
1069                 /* XXX: i don't want to know whether the call
1070                  * succeeds or not. */
1071                 ext4_block_flush_buf(fs->bdev, block->buf);
1072         }
1073
1074         buf = calloc(1, sizeof(struct jbd_buf));
1075         if (!buf)
1076                 return ENOMEM;
1077
1078         buf->trans = trans;
1079         buf->block = *block;
1080         ext4_bcache_inc_ref(block->buf);
1081
1082         /* If the content reach the disk, notify us
1083          * so that we may do a checkpoint. */
1084         block->buf->end_write = jbd_trans_end_write;
1085         block->buf->end_write_arg = buf;
1086
1087         trans->data_cnt++;
1088         LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
1089         return EOK;
1090 }
1091
1092 /**@brief  Add block to be revoked to a transaction
1093  * @param  trans transaction
1094  * @param  lba logical block address
1095  * @return standard error code*/
1096 int jbd_trans_revoke_block(struct jbd_trans *trans,
1097                            ext4_fsblk_t lba)
1098 {
1099         struct jbd_revoke_rec *rec =
1100                 calloc(1, sizeof(struct jbd_revoke_rec));
1101         if (!rec)
1102                 return ENOMEM;
1103
1104         rec->lba = lba;
1105         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
1106         return EOK;
1107 }
1108
1109 /**@brief  Free a transaction
1110  * @param  journal current journal session
1111  * @param  trans transaction
1112  * @param  abort discard all the modifications on the block?
1113  * @return standard error code*/
1114 void jbd_journal_free_trans(struct jbd_journal *journal,
1115                             struct jbd_trans *trans,
1116                             bool abort)
1117 {
1118         struct jbd_buf *jbd_buf, *tmp;
1119         struct jbd_revoke_rec *rec, *tmp2;
1120         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1121         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1122                           tmp) {
1123                 if (abort) {
1124                         ext4_bcache_clear_dirty(jbd_buf->block.buf);
1125                         ext4_block_set(fs->bdev, &jbd_buf->block);
1126                 }
1127
1128                 LIST_REMOVE(jbd_buf, buf_node);
1129                 free(jbd_buf);
1130         }
1131         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1132                           tmp2) {
1133                 LIST_REMOVE(rec, revoke_node);
1134                 free(rec);
1135         }
1136
1137         free(trans);
1138 }
1139
1140 /**@brief  Write commit block for a transaction
1141  * @param  trans transaction
1142  * @return standard error code*/
1143 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
1144 {
1145         int rc;
1146         struct jbd_commit_header *header;
1147         uint32_t commit_iblock = 0;
1148         struct ext4_block commit_block;
1149         struct jbd_journal *journal = trans->journal;
1150
1151         commit_iblock = jbd_journal_alloc_block(journal, trans);
1152         rc = jbd_block_get_noread(journal->jbd_fs,
1153                         &commit_block, commit_iblock);
1154         if (rc != EOK)
1155                 return rc;
1156
1157         header = (struct jbd_commit_header *)commit_block.data;
1158         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
1159         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
1160         jbd_set32(&header->header, sequence, trans->trans_id);
1161
1162         ext4_bcache_set_dirty(commit_block.buf);
1163         rc = jbd_block_set(journal->jbd_fs, &commit_block);
1164         if (rc != EOK)
1165                 return rc;
1166
1167         return EOK;
1168 }
1169
1170 /**@brief  Write descriptor block for a transaction
1171  * @param  journal current journal session
1172  * @param  trans transaction
1173  * @return standard error code*/
1174 static int jbd_journal_prepare(struct jbd_journal *journal,
1175                                struct jbd_trans *trans)
1176 {
1177         int rc = EOK, i = 0;
1178         int32_t tag_tbl_size;
1179         uint32_t desc_iblock = 0;
1180         uint32_t data_iblock = 0;
1181         char *tag_start = NULL, *tag_ptr = NULL;
1182         struct jbd_buf *jbd_buf, *tmp;
1183         struct ext4_block desc_block, data_block;
1184         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1185
1186         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node, tmp) {
1187                 struct tag_info tag_info;
1188                 bool uuid_exist = false;
1189                 if (!ext4_bcache_test_flag(jbd_buf->block.buf,
1190                                            BC_DIRTY)) {
1191                         /* The buffer has not been modified, just release
1192                          * that jbd_buf. */
1193                         ext4_block_set(fs->bdev, &jbd_buf->block);
1194                         LIST_REMOVE(jbd_buf, buf_node);
1195                         free(jbd_buf);
1196                         continue;
1197                 }
1198 again:
1199                 if (!desc_iblock) {
1200                         struct jbd_bhdr *bhdr;
1201                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1202                         rc = jbd_block_get_noread(journal->jbd_fs,
1203                                            &desc_block, desc_iblock);
1204                         if (rc != EOK)
1205                                 break;
1206
1207                         ext4_bcache_set_dirty(desc_block.buf);
1208
1209                         bhdr = (struct jbd_bhdr *)desc_block.data;
1210                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1211                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
1212                         jbd_set32(bhdr, sequence, trans->trans_id);
1213
1214                         tag_start = (char *)(bhdr + 1);
1215                         tag_ptr = tag_start;
1216                         uuid_exist = true;
1217                         tag_tbl_size = journal->block_size -
1218                                 sizeof(struct jbd_bhdr);
1219
1220                         if (!trans->start_iblock)
1221                                 trans->start_iblock = desc_iblock;
1222
1223                 }
1224                 tag_info.block = jbd_buf->block.lb_id;
1225                 tag_info.uuid_exist = uuid_exist;
1226                 if (i == trans->data_cnt - 1)
1227                         tag_info.last_tag = true;
1228
1229                 if (uuid_exist)
1230                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1231                                         UUID_SIZE);
1232
1233                 rc = jbd_write_block_tag(journal->jbd_fs,
1234                                 tag_ptr,
1235                                 tag_tbl_size,
1236                                 &tag_info);
1237                 if (rc != EOK) {
1238                         jbd_block_set(journal->jbd_fs, &desc_block);
1239                         desc_iblock = 0;
1240                         goto again;
1241                 }
1242
1243                 data_iblock = jbd_journal_alloc_block(journal, trans);
1244                 rc = jbd_block_get_noread(journal->jbd_fs,
1245                                 &data_block, data_iblock);
1246                 if (rc != EOK)
1247                         break;
1248
1249                 ext4_bcache_set_dirty(data_block.buf);
1250
1251                 memcpy(data_block.data, jbd_buf->block.data,
1252                         journal->block_size);
1253
1254                 rc = jbd_block_set(journal->jbd_fs, &data_block);
1255                 if (rc != EOK)
1256                         break;
1257
1258                 tag_ptr += tag_info.tag_bytes;
1259                 tag_tbl_size -= tag_info.tag_bytes;
1260
1261                 i++;
1262         }
1263         if (rc == EOK && desc_iblock)
1264                 jbd_block_set(journal->jbd_fs, &desc_block);
1265
1266         return rc;
1267 }
1268
1269 /**@brief  Write revoke block for a transaction
1270  * @param  journal current journal session
1271  * @param  trans transaction
1272  * @return standard error code*/
1273 static int
1274 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1275                            struct jbd_trans *trans)
1276 {
1277         int rc = EOK, i = 0;
1278         int32_t tag_tbl_size;
1279         uint32_t desc_iblock = 0;
1280         char *blocks_entry = NULL;
1281         struct jbd_revoke_rec *rec, *tmp;
1282         struct ext4_block desc_block;
1283         struct jbd_revoke_header *header = NULL;
1284         int32_t record_len = 4;
1285
1286         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1287                                      JBD_FEATURE_INCOMPAT_64BIT))
1288                 record_len = 8;
1289
1290         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1291                           tmp) {
1292 again:
1293                 if (!desc_iblock) {
1294                         struct jbd_bhdr *bhdr;
1295                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1296                         rc = jbd_block_get_noread(journal->jbd_fs,
1297                                            &desc_block, desc_iblock);
1298                         if (rc != EOK) {
1299                                 break;
1300                         }
1301
1302                         ext4_bcache_set_dirty(desc_block.buf);
1303
1304                         bhdr = (struct jbd_bhdr *)desc_block.data;
1305                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1306                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1307                         jbd_set32(bhdr, sequence, trans->trans_id);
1308                         
1309                         header = (struct jbd_revoke_header *)bhdr;
1310                         blocks_entry = (char *)(header + 1);
1311                         tag_tbl_size = journal->block_size -
1312                                 sizeof(struct jbd_revoke_header);
1313
1314                         if (!trans->start_iblock)
1315                                 trans->start_iblock = desc_iblock;
1316
1317                 }
1318
1319                 if (tag_tbl_size < record_len) {
1320                         jbd_set32(header, count,
1321                                   journal->block_size - tag_tbl_size);
1322                         jbd_block_set(journal->jbd_fs, &desc_block);
1323                         desc_iblock = 0;
1324                         header = NULL;
1325                         goto again;
1326                 }
1327                 if (record_len == 8) {
1328                         uint64_t *blocks =
1329                                 (uint64_t *)blocks_entry;
1330                         *blocks = to_be64(rec->lba);
1331                 } else {
1332                         uint32_t *blocks =
1333                                 (uint32_t *)blocks_entry;
1334                         *blocks = to_be32(rec->lba);
1335                 }
1336                 blocks_entry += record_len;
1337                 tag_tbl_size -= record_len;
1338
1339                 i++;
1340         }
1341         if (rc == EOK && desc_iblock) {
1342                 if (header != NULL)
1343                         jbd_set32(header, count,
1344                                   journal->block_size - tag_tbl_size);
1345
1346                 jbd_block_set(journal->jbd_fs, &desc_block);
1347         }
1348
1349         return rc;
1350 }
1351
1352 /**@brief  Submit the transaction to transaction queue.
1353  * @param  journal current journal session
1354  * @param  trans transaction*/
1355 void
1356 jbd_journal_submit_trans(struct jbd_journal *journal,
1357                          struct jbd_trans *trans)
1358 {
1359         TAILQ_INSERT_TAIL(&journal->trans_queue,
1360                           trans,
1361                           trans_node);
1362 }
1363
1364 /**@brief  Put references of block descriptors in a transaction.
1365  * @param  journal current journal session
1366  * @param  trans transaction*/
1367 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1368 {
1369         struct jbd_buf *jbd_buf, *tmp;
1370         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1371         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1372                         tmp) {
1373                 struct ext4_block block = jbd_buf->block;
1374                 ext4_block_set(fs->bdev, &block);
1375         }
1376 }
1377
1378 /**@brief  Update the start block of the journal when
1379  *         all the contents in a transaction reach the disk.*/
1380 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1381                           struct ext4_buf *buf,
1382                           int res,
1383                           void *arg)
1384 {
1385         struct jbd_buf *jbd_buf = arg;
1386         struct jbd_trans *trans = jbd_buf->trans;
1387         struct jbd_journal *journal = trans->journal;
1388         bool first_in_queue =
1389                 trans == TAILQ_FIRST(&journal->cp_queue);
1390         if (res != EOK)
1391                 trans->error = res;
1392
1393         /* Clear the end_write and end_write_arg fields. */
1394         buf->end_write = NULL;
1395         buf->end_write_arg = NULL;
1396
1397         trans->written_cnt++;
1398         if (trans->written_cnt == trans->data_cnt) {
1399                 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1400
1401                 if (first_in_queue) {
1402                         journal->start = trans->start_iblock +
1403                                 trans->alloc_blocks;
1404                         wrap(&journal->jbd_fs->sb, journal->start);
1405                         journal->trans_id = trans->trans_id + 1;
1406                 }
1407                 jbd_journal_free_trans(journal, trans, false);
1408
1409                 if (first_in_queue) {
1410                         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1411                                 if (!trans->data_cnt) {
1412                                         TAILQ_REMOVE(&journal->cp_queue,
1413                                                      trans,
1414                                                      trans_node);
1415                                         journal->start = trans->start_iblock +
1416                                                 trans->alloc_blocks;
1417                                         wrap(&journal->jbd_fs->sb, journal->start);
1418                                         journal->trans_id = trans->trans_id + 1;
1419                                         jbd_journal_free_trans(journal,
1420                                                                trans, false);
1421                                 } else {
1422                                         journal->start = trans->start_iblock;
1423                                         wrap(&journal->jbd_fs->sb, journal->start);
1424                                         journal->trans_id = trans->trans_id;
1425                                         break;
1426                                 }
1427                         }
1428                         jbd_journal_write_sb(journal);
1429                         jbd_write_sb(journal->jbd_fs);
1430                 }
1431         }
1432 }
1433
1434 /**@brief  Commit a transaction to the journal immediately.
1435  * @param  journal current journal session
1436  * @param  trans transaction
1437  * @return standard error code*/
1438 int jbd_journal_commit_trans(struct jbd_journal *journal,
1439                              struct jbd_trans *trans)
1440 {
1441         int rc = EOK;
1442         uint32_t last = journal->last;
1443
1444         trans->trans_id = journal->alloc_trans_id;
1445         rc = jbd_journal_prepare(journal, trans);
1446         if (rc != EOK)
1447                 goto Finish;
1448
1449         rc = jbd_journal_prepare_revoke(journal, trans);
1450         if (rc != EOK)
1451                 goto Finish;
1452
1453         if (LIST_EMPTY(&trans->buf_list) &&
1454             LIST_EMPTY(&trans->revoke_list)) {
1455                 /* Since there are no entries in both buffer list
1456                  * and revoke entry list, we do not consider trans as
1457                  * complete transaction and just return EOK.*/
1458                 jbd_journal_free_trans(journal, trans, false);
1459                 goto Finish;
1460         }
1461
1462         rc = jbd_trans_write_commit_block(trans);
1463         if (rc != EOK)
1464                 goto Finish;
1465
1466         journal->alloc_trans_id++;
1467         if (TAILQ_EMPTY(&journal->cp_queue)) {
1468                 if (trans->data_cnt) {
1469                         journal->start = trans->start_iblock;
1470                         wrap(&journal->jbd_fs->sb, journal->start);
1471                         journal->trans_id = trans->trans_id;
1472                         jbd_journal_write_sb(journal);
1473                         jbd_write_sb(journal->jbd_fs);
1474                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1475                                         trans_node);
1476                         jbd_journal_cp_trans(journal, trans);
1477                 } else {
1478                         journal->start = trans->start_iblock +
1479                                 trans->alloc_blocks;
1480                         wrap(&journal->jbd_fs->sb, journal->start);
1481                         journal->trans_id = trans->trans_id + 1;
1482                         jbd_journal_write_sb(journal);
1483                         jbd_journal_free_trans(journal, trans, false);
1484                 }
1485         } else {
1486                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1487                                 trans_node);
1488                 if (trans->data_cnt)
1489                         jbd_journal_cp_trans(journal, trans);
1490
1491         }
1492 Finish:
1493         if (rc != EOK) {
1494                 journal->last = last;
1495                 jbd_journal_free_trans(journal, trans, true);
1496         }
1497         return rc;
1498 }
1499
1500 /**@brief  Commit one transaction on transaction queue
1501  *         to the journal.
1502  * @param  journal current journal session.*/
1503 void jbd_journal_commit_one(struct jbd_journal *journal)
1504 {
1505         struct jbd_trans *trans;
1506
1507         if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
1508                 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
1509                 jbd_journal_commit_trans(journal, trans);
1510         }
1511 }
1512
1513 /**@brief  Commit all the transactions on transaction queue
1514  *         to the journal.
1515  * @param  journal current journal session.*/
1516 void jbd_journal_commit_all(struct jbd_journal *journal)
1517 {
1518         while (!TAILQ_EMPTY(&journal->trans_queue)) {
1519                 jbd_journal_commit_one(journal);
1520         }
1521 }
1522
1523 /**
1524  * @}
1525  */