4bbaeb07af8fa9e7c9e2935af2ef0b547435a44b
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_errno.h"
43 #include "ext4_blockdev.h"
44 #include "ext4_crc32c.h"
45 #include "ext4_debug.h"
46 #include "tree.h"
47
48 #include <string.h>
49 #include <stdlib.h>
50
51 struct revoke_entry {
52         ext4_fsblk_t block;
53         uint32_t trans_id;
54         RB_ENTRY(revoke_entry) revoke_node;
55 };
56
57 struct recover_info {
58         uint32_t start_trans_id;
59         uint32_t last_trans_id;
60         uint32_t this_trans_id;
61         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
62 };
63
64 struct replay_arg {
65         struct recover_info *info;
66         uint32_t *this_block;
67         uint32_t this_trans_id;
68 };
69
70 static int
71 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
72 {
73         if (a->block > b->block)
74                 return 1;
75         else if (a->block < b->block)
76                 return -1;
77         return 0;
78 }
79
80 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
81                      jbd_revoke_entry_cmp, static inline)
82
83 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
84 #define jbd_free_revoke_entry(addr) free(addr)
85
86 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
87                    ext4_lblk_t iblock,
88                    ext4_fsblk_t *fblock);
89
90 int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
91 {
92         int rc;
93         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
94         uint64_t offset;
95         ext4_fsblk_t fblock;
96         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
97         if (rc != EOK)
98                 return rc;
99
100         offset = fblock * ext4_sb_get_block_size(&fs->sb);
101         return ext4_block_writebytes(fs->bdev, offset, s,
102                                      EXT4_SUPERBLOCK_SIZE);
103 }
104
105 int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
106 {
107         int rc;
108         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
109         uint64_t offset;
110         ext4_fsblk_t fblock;
111         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
112         if (rc != EOK)
113                 return rc;
114
115         offset = fblock * ext4_sb_get_block_size(&fs->sb);
116         return ext4_block_readbytes(fs->bdev, offset, s,
117                                     EXT4_SUPERBLOCK_SIZE);
118 }
119
120 static bool jbd_verify_sb(struct jbd_sb *sb)
121 {
122         struct jbd_bhdr *header = &sb->header;
123         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
124                 return false;
125
126         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
127             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
128                 return false;
129
130         return true;
131 }
132
133 static int jbd_write_sb(struct jbd_fs *jbd_fs)
134 {
135         int rc = EOK;
136         if (jbd_fs->dirty) {
137                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
138                 if (rc != EOK)
139                         return rc;
140
141                 jbd_fs->dirty = false;
142         }
143         return rc;
144 }
145
146 int jbd_get_fs(struct ext4_fs *fs,
147                struct jbd_fs *jbd_fs)
148 {
149         int rc;
150         uint32_t journal_ino;
151
152         memset(jbd_fs, 0, sizeof(struct jbd_fs));
153         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
154
155         rc = ext4_fs_get_inode_ref(fs,
156                                    journal_ino,
157                                    &jbd_fs->inode_ref);
158         if (rc != EOK) {
159                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
160                 return rc;
161         }
162         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
163         if (rc != EOK) {
164                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
165                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
166                 return rc;
167         }
168         if (!jbd_verify_sb(&jbd_fs->sb)) {
169                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
170                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
171                 rc = EIO;
172         }
173
174         return rc;
175 }
176
177 int jbd_put_fs(struct jbd_fs *jbd_fs)
178 {
179         int rc = EOK;
180         rc = jbd_write_sb(jbd_fs);
181
182         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
183         return rc;
184 }
185
186 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
187                    ext4_lblk_t iblock,
188                    ext4_fsblk_t *fblock)
189 {
190         int rc = ext4_fs_get_inode_dblk_idx(
191                         &jbd_fs->inode_ref,
192                         iblock,
193                         fblock,
194                         false);
195         return rc;
196 }
197
198 int jbd_block_get(struct jbd_fs *jbd_fs,
199                   struct ext4_block *block,
200                   ext4_fsblk_t fblock)
201 {
202         /* TODO: journal device. */
203         int rc;
204         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
205         rc = jbd_inode_bmap(jbd_fs, iblock,
206                             &fblock);
207         if (rc != EOK)
208                 return rc;
209
210         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
211         rc = ext4_block_get(bdev, block, fblock);
212         if (rc == EOK)
213                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
214
215         return rc;
216 }
217
218 int jbd_block_get_noread(struct jbd_fs *jbd_fs,
219                          struct ext4_block *block,
220                          ext4_fsblk_t fblock)
221 {
222         /* TODO: journal device. */
223         int rc;
224         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
225         rc = jbd_inode_bmap(jbd_fs, iblock,
226                             &fblock);
227         if (rc != EOK)
228                 return rc;
229
230         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
231         rc = ext4_block_get_noread(bdev, block, fblock);
232         if (rc == EOK)
233                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
234
235         return rc;
236 }
237
238 int jbd_block_set(struct jbd_fs *jbd_fs,
239                   struct ext4_block *block)
240 {
241         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
242                               block);
243 }
244
245 /*
246  * helper functions to deal with 32 or 64bit block numbers.
247  */
248 int jbd_tag_bytes(struct jbd_fs *jbd_fs)
249 {
250         int size;
251
252         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
253                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
254                 return sizeof(struct jbd_block_tag3);
255
256         size = sizeof(struct jbd_block_tag);
257
258         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
259                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
260                 size += sizeof(uint16_t);
261
262         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
263                                      JBD_FEATURE_INCOMPAT_64BIT))
264                 return size;
265
266         return size - sizeof(uint32_t);
267 }
268
269 /**@brief: tag information. */
270 struct tag_info {
271         int tag_bytes;
272         ext4_fsblk_t block;
273         bool uuid_exist;
274         uint8_t uuid[UUID_SIZE];
275         bool last_tag;
276 };
277
278 static int
279 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
280                       void *__tag,
281                       int tag_bytes,
282                       int32_t remain_buf_size,
283                       struct tag_info *tag_info)
284 {
285         char *uuid_start;
286         tag_info->tag_bytes = tag_bytes;
287         tag_info->uuid_exist = false;
288         tag_info->last_tag = false;
289
290         if (remain_buf_size - tag_bytes < 0)
291                 return EINVAL;
292
293         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
294                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
295                 struct jbd_block_tag3 *tag = __tag;
296                 tag_info->block = jbd_get32(tag, blocknr);
297                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
298                                              JBD_FEATURE_INCOMPAT_64BIT))
299                          tag_info->block |=
300                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
301
302                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
303                         tag_info->block = 0;
304
305                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
306                         if (remain_buf_size - tag_bytes < UUID_SIZE)
307                                 return EINVAL;
308
309                         uuid_start = (char *)tag + tag_bytes;
310                         tag_info->uuid_exist = true;
311                         tag_info->tag_bytes += UUID_SIZE;
312                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
313                 }
314
315                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
316                         tag_info->last_tag = true;
317
318         } else {
319                 struct jbd_block_tag *tag = __tag;
320                 tag_info->block = jbd_get32(tag, blocknr);
321                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
322                                              JBD_FEATURE_INCOMPAT_64BIT))
323                          tag_info->block |=
324                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
325
326                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
327                         tag_info->block = 0;
328
329                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
330                         if (remain_buf_size - tag_bytes < UUID_SIZE)
331                                 return EINVAL;
332
333                         uuid_start = (char *)tag + tag_bytes;
334                         tag_info->uuid_exist = true;
335                         tag_info->tag_bytes += UUID_SIZE;
336                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
337                 }
338
339                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
340                         tag_info->last_tag = true;
341
342         }
343         return EOK;
344 }
345
346 static int
347 jbd_write_block_tag(struct jbd_fs *jbd_fs,
348                     void *__tag,
349                     int32_t remain_buf_size,
350                     struct tag_info *tag_info)
351 {
352         char *uuid_start;
353         int tag_bytes = jbd_tag_bytes(jbd_fs);
354
355         tag_info->tag_bytes = tag_bytes;
356
357         if (remain_buf_size - tag_bytes < 0)
358                 return EINVAL;
359
360         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
361                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
362                 struct jbd_block_tag3 *tag = __tag;
363                 jbd_set32(tag, blocknr, tag_info->block);
364                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
365                                              JBD_FEATURE_INCOMPAT_64BIT))
366                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
367
368                 if (tag_info->uuid_exist) {
369                         if (remain_buf_size - tag_bytes < UUID_SIZE)
370                                 return EINVAL;
371
372                         uuid_start = (char *)tag + tag_bytes;
373                         tag_info->tag_bytes += UUID_SIZE;
374                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
375                 } else
376                         jbd_set32(tag, flags,
377                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
378
379                 if (tag_info->last_tag)
380                         jbd_set32(tag, flags,
381                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
382
383         } else {
384                 struct jbd_block_tag *tag = __tag;
385                 jbd_set32(tag, blocknr, tag_info->block);
386                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
387                                              JBD_FEATURE_INCOMPAT_64BIT))
388                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
389
390                 if (tag_info->uuid_exist) {
391                         if (remain_buf_size - tag_bytes < UUID_SIZE)
392                                 return EINVAL;
393
394                         uuid_start = (char *)tag + tag_bytes;
395                         tag_info->tag_bytes += UUID_SIZE;
396                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
397                 } else
398                         jbd_set16(tag, flags,
399                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
400
401                 if (tag_info->last_tag)
402                         jbd_set16(tag, flags,
403                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
404
405         }
406         return EOK;
407 }
408
409 static void
410 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
411                         void *__tag_start,
412                         int32_t tag_tbl_size,
413                         void (*func)(struct jbd_fs * jbd_fs,
414                                         ext4_fsblk_t block,
415                                         uint8_t *uuid,
416                                         void *arg),
417                         void *arg)
418 {
419         char *tag_start, *tag_ptr;
420         int tag_bytes = jbd_tag_bytes(jbd_fs);
421         tag_start = __tag_start;
422         tag_ptr = tag_start;
423
424         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
425                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
426             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
427                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
428                 tag_tbl_size -= sizeof(struct jbd_block_tail);
429
430         while (tag_tbl_size) {
431                 struct tag_info tag_info;
432                 int rc = jbd_extract_block_tag(jbd_fs,
433                                       tag_ptr,
434                                       tag_bytes,
435                                       tag_tbl_size,
436                                       &tag_info);
437                 if (rc != EOK)
438                         break;
439
440                 if (func)
441                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
442
443                 if (tag_info.last_tag)
444                         break;
445
446                 tag_ptr += tag_info.tag_bytes;
447                 tag_tbl_size -= tag_info.tag_bytes;
448         }
449 }
450
451 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
452                                    ext4_fsblk_t block,
453                                    uint8_t *uuid,
454                                    void *arg)
455 {
456         uint32_t *iblock = arg;
457         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
458         (*iblock)++;
459         (void)jbd_fs;
460         (void)uuid;
461         return;
462 }
463
464 static struct revoke_entry *
465 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
466 {
467         struct revoke_entry tmp = {
468                 .block = block
469         };
470
471         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
472 }
473
474 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
475                                   ext4_fsblk_t block,
476                                   uint8_t *uuid __unused,
477                                   void *__arg)
478 {
479         int r;
480         struct replay_arg *arg = __arg;
481         struct recover_info *info = arg->info;
482         uint32_t *this_block = arg->this_block;
483         struct revoke_entry *revoke_entry;
484         struct ext4_block journal_block, ext4_block;
485         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
486
487         (*this_block)++;
488
489         revoke_entry = jbd_revoke_entry_lookup(info, block);
490         if (revoke_entry &&
491             arg->this_trans_id < revoke_entry->trans_id)
492                 return;
493
494         ext4_dbg(DEBUG_JBD,
495                  "Replaying block in block_tag: %" PRIu64 "\n",
496                  block);
497
498         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
499         if (r != EOK)
500                 return;
501
502         if (block) {
503                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
504                 if (r != EOK) {
505                         jbd_block_set(jbd_fs, &journal_block);
506                         return;
507                 }
508
509                 memcpy(ext4_block.data,
510                         journal_block.data,
511                         jbd_get32(&jbd_fs->sb, blocksize));
512
513                 ext4_bcache_set_dirty(ext4_block.buf);
514                 ext4_block_set(fs->bdev, &ext4_block);
515         } else {
516                 uint16_t mount_count, state;
517                 mount_count = ext4_get16(&fs->sb, mount_count);
518                 state = ext4_get16(&fs->sb, state);
519
520                 memcpy(&fs->sb,
521                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
522                         EXT4_SUPERBLOCK_SIZE);
523
524                 /* Mark system as mounted */
525                 ext4_set16(&fs->sb, state, state);
526                 r = ext4_sb_write(fs->bdev, &fs->sb);
527                 if (r != EOK)
528                         return;
529
530                 /*Update mount count*/
531                 ext4_set16(&fs->sb, mount_count, mount_count);
532         }
533
534         jbd_block_set(jbd_fs, &journal_block);
535         
536         return;
537 }
538
539 static void jbd_add_revoke_block_tags(struct recover_info *info,
540                                       ext4_fsblk_t block)
541 {
542         struct revoke_entry *revoke_entry;
543
544         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
545         revoke_entry = jbd_revoke_entry_lookup(info, block);
546         if (revoke_entry) {
547                 revoke_entry->trans_id = info->this_trans_id;
548                 return;
549         }
550
551         revoke_entry = jbd_alloc_revoke_entry();
552         ext4_assert(revoke_entry);
553         revoke_entry->block = block;
554         revoke_entry->trans_id = info->this_trans_id;
555         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
556
557         return;
558 }
559
560 static void jbd_destroy_revoke_tree(struct recover_info *info)
561 {
562         while (!RB_EMPTY(&info->revoke_root)) {
563                 struct revoke_entry *revoke_entry =
564                         RB_MIN(jbd_revoke, &info->revoke_root);
565                 ext4_assert(revoke_entry);
566                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
567                 jbd_free_revoke_entry(revoke_entry);
568         }
569 }
570
571 /* Make sure we wrap around the log correctly! */
572 #define wrap(sb, var)                                           \
573 do {                                                                    \
574         if (var >= jbd_get32((sb), maxlen))                                     \
575                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
576 } while (0)
577
578 #define ACTION_SCAN 0
579 #define ACTION_REVOKE 1
580 #define ACTION_RECOVER 2
581
582
583 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
584                                   struct jbd_bhdr *header,
585                                   struct recover_info *info)
586 {
587         char *blocks_entry;
588         struct jbd_revoke_header *revoke_hdr =
589                 (struct jbd_revoke_header *)header;
590         uint32_t i, nr_entries, record_len = 4;
591         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
592                                      JBD_FEATURE_INCOMPAT_64BIT))
593                 record_len = 8;
594
595         nr_entries = (jbd_get32(revoke_hdr, count) -
596                         sizeof(struct jbd_revoke_header)) /
597                         record_len;
598
599         blocks_entry = (char *)(revoke_hdr + 1);
600
601         for (i = 0;i < nr_entries;i++) {
602                 if (record_len == 8) {
603                         uint64_t *blocks =
604                                 (uint64_t *)blocks_entry;
605                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
606                 } else {
607                         uint32_t *blocks =
608                                 (uint32_t *)blocks_entry;
609                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
610                 }
611                 blocks_entry += record_len;
612         }
613 }
614
615 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
616                                        struct jbd_bhdr *header,
617                                        uint32_t *iblock)
618 {
619         jbd_iterate_block_table(jbd_fs,
620                                 header + 1,
621                                 jbd_get32(&jbd_fs->sb, blocksize) -
622                                         sizeof(struct jbd_bhdr),
623                                 jbd_display_block_tags,
624                                 iblock);
625 }
626
627 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
628                                         struct jbd_bhdr *header,
629                                         struct replay_arg *arg)
630 {
631         jbd_iterate_block_table(jbd_fs,
632                                 header + 1,
633                                 jbd_get32(&jbd_fs->sb, blocksize) -
634                                         sizeof(struct jbd_bhdr),
635                                 jbd_replay_block_tags,
636                                 arg);
637 }
638
639 int jbd_iterate_log(struct jbd_fs *jbd_fs,
640                     struct recover_info *info,
641                     int action)
642 {
643         int r = EOK;
644         bool log_end = false;
645         struct jbd_sb *sb = &jbd_fs->sb;
646         uint32_t start_trans_id, this_trans_id;
647         uint32_t start_block, this_block;
648
649         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
650         start_block = this_block = jbd_get32(sb, start);
651
652         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
653                             start_trans_id);
654
655         while (!log_end) {
656                 struct ext4_block block;
657                 struct jbd_bhdr *header;
658                 if (action != ACTION_SCAN)
659                         if (this_trans_id > info->last_trans_id) {
660                                 log_end = true;
661                                 continue;
662                         }
663
664                 r = jbd_block_get(jbd_fs, &block, this_block);
665                 if (r != EOK)
666                         break;
667
668                 header = (struct jbd_bhdr *)block.data;
669                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
670                         jbd_block_set(jbd_fs, &block);
671                         log_end = true;
672                         continue;
673                 }
674
675                 if (jbd_get32(header, sequence) != this_trans_id) {
676                         if (action != ACTION_SCAN)
677                                 r = EIO;
678
679                         jbd_block_set(jbd_fs, &block);
680                         log_end = true;
681                         continue;
682                 }
683
684                 switch (jbd_get32(header, blocktype)) {
685                 case JBD_DESCRIPTOR_BLOCK:
686                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
687                                             "trans_id: %" PRIu32"\n",
688                                             this_block, this_trans_id);
689                         if (action == ACTION_RECOVER) {
690                                 struct replay_arg replay_arg;
691                                 replay_arg.info = info;
692                                 replay_arg.this_block = &this_block;
693                                 replay_arg.this_trans_id = this_trans_id;
694
695                                 jbd_replay_descriptor_block(jbd_fs,
696                                                 header, &replay_arg);
697                         } else
698                                 jbd_debug_descriptor_block(jbd_fs,
699                                                 header, &this_block);
700
701                         break;
702                 case JBD_COMMIT_BLOCK:
703                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
704                                             "trans_id: %" PRIu32"\n",
705                                             this_block, this_trans_id);
706                         this_trans_id++;
707                         break;
708                 case JBD_REVOKE_BLOCK:
709                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
710                                             "trans_id: %" PRIu32"\n",
711                                             this_block, this_trans_id);
712                         if (action == ACTION_REVOKE) {
713                                 info->this_trans_id = this_trans_id;
714                                 jbd_build_revoke_tree(jbd_fs,
715                                                 header, info);
716                         }
717                         break;
718                 default:
719                         log_end = true;
720                         break;
721                 }
722                 jbd_block_set(jbd_fs, &block);
723                 this_block++;
724                 wrap(sb, this_block);
725                 if (this_block == start_block)
726                         log_end = true;
727
728         }
729         ext4_dbg(DEBUG_JBD, "End of journal.\n");
730         if (r == EOK && action == ACTION_SCAN) {
731                 info->start_trans_id = start_trans_id;
732                 if (this_trans_id > start_trans_id)
733                         info->last_trans_id = this_trans_id - 1;
734                 else
735                         info->last_trans_id = this_trans_id;
736         }
737
738         return r;
739 }
740
741 int jbd_recover(struct jbd_fs *jbd_fs)
742 {
743         int r;
744         struct recover_info info;
745         struct jbd_sb *sb = &jbd_fs->sb;
746         if (!sb->start)
747                 return EOK;
748
749         RB_INIT(&info.revoke_root);
750
751         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
752         if (r != EOK)
753                 return r;
754
755         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
756         if (r != EOK)
757                 return r;
758
759         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
760         if (r == EOK) {
761                 uint32_t features_incompatible =
762                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
763                                    features_incompatible);
764                 jbd_set32(&jbd_fs->sb, start, 0);
765                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
766                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
767                            features_incompatible,
768                            features_incompatible);
769                 jbd_fs->dirty = true;
770                 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
771                                   &jbd_fs->inode_ref.fs->sb);
772         }
773         jbd_destroy_revoke_tree(&info);
774         return r;
775 }
776
777 void jbd_journal_write_sb(struct jbd_journal *journal)
778 {
779         struct jbd_fs *jbd_fs = journal->jbd_fs;
780         jbd_set32(&jbd_fs->sb, start, journal->start);
781         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
782         jbd_fs->dirty = true;
783 }
784
785 int jbd_journal_start(struct jbd_fs *jbd_fs,
786                       struct jbd_journal *journal)
787 {
788         int r;
789         uint32_t features_incompatible =
790                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
791                                    features_incompatible);
792         features_incompatible |= EXT4_FINCOM_RECOVER;
793         ext4_set32(&jbd_fs->inode_ref.fs->sb,
794                         features_incompatible,
795                         features_incompatible);
796         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
797                         &jbd_fs->inode_ref.fs->sb);
798         if (r != EOK)
799                 return r;
800
801         journal->first = jbd_get32(&jbd_fs->sb, first);
802         journal->start = journal->first;
803         journal->last = journal->first;
804         journal->trans_id = 1;
805         journal->alloc_trans_id = 1;
806
807         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
808
809         TAILQ_INIT(&journal->trans_queue);
810         TAILQ_INIT(&journal->cp_queue);
811         journal->jbd_fs = jbd_fs;
812         jbd_journal_write_sb(journal);
813         return jbd_write_sb(jbd_fs);
814 }
815
816 int jbd_journal_stop(struct jbd_journal *journal)
817 {
818         int r;
819         struct jbd_fs *jbd_fs = journal->jbd_fs;
820         uint32_t features_incompatible =
821                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
822                                    features_incompatible);
823         features_incompatible &= ~EXT4_FINCOM_RECOVER;
824         ext4_set32(&jbd_fs->inode_ref.fs->sb,
825                         features_incompatible,
826                         features_incompatible);
827         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
828                         &jbd_fs->inode_ref.fs->sb);
829         if (r != EOK)
830                 return r;
831
832         journal->start = 0;
833         journal->trans_id = 0;
834         jbd_journal_write_sb(journal);
835         return jbd_write_sb(journal->jbd_fs);
836 }
837
838 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
839                                         struct jbd_trans *trans)
840 {
841         uint32_t start_block = journal->last++;
842         trans->alloc_blocks++;
843         wrap(&journal->jbd_fs->sb, journal->last);
844         return start_block;
845 }
846
847 struct jbd_trans *
848 jbd_journal_new_trans(struct jbd_journal *journal)
849 {
850         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
851         if (!trans)
852                 return NULL;
853
854         /* We will assign a trans_id to this transaction,
855          * once it has been committed.*/
856         trans->journal = journal;
857         trans->error = EOK;
858         return trans;
859 }
860
861 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
862                           struct ext4_buf *buf __unused,
863                           int res,
864                           void *arg);
865
866 int jbd_trans_add_block(struct jbd_trans *trans,
867                         struct ext4_block *block)
868 {
869         struct jbd_buf *buf;
870         /* We do not need to add those unmodified buffer to
871          * a transaction. */
872         if (!ext4_bcache_test_flag(block->buf, BC_DIRTY))
873                 return EOK;
874
875         buf = calloc(1, sizeof(struct jbd_buf));
876         if (!buf)
877                 return ENOMEM;
878
879         buf->trans = trans;
880         buf->block = *block;
881         ext4_bcache_inc_ref(block->buf);
882
883         block->buf->end_write = jbd_trans_end_write;
884         block->buf->end_write_arg = trans;
885
886         trans->data_cnt++;
887         LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
888         return EOK;
889 }
890
891 int jbd_trans_revoke_block(struct jbd_trans *trans,
892                            ext4_fsblk_t lba)
893 {
894         struct jbd_revoke_rec *rec =
895                 calloc(1, sizeof(struct jbd_revoke_rec));
896         if (!rec)
897                 return ENOMEM;
898
899         rec->lba = lba;
900         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
901         return EOK;
902 }
903
904 void jbd_journal_free_trans(struct jbd_journal *journal,
905                             struct jbd_trans *trans,
906                             bool abort)
907 {
908         struct jbd_buf *jbd_buf, *tmp;
909         struct jbd_revoke_rec *rec, *tmp2;
910         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
911         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
912                           tmp) {
913                 if (abort)
914                         ext4_block_set(fs->bdev, &jbd_buf->block);
915
916                 LIST_REMOVE(jbd_buf, buf_node);
917                 free(jbd_buf);
918         }
919         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
920                           tmp2) {
921                 LIST_REMOVE(rec, revoke_node);
922                 free(rec);
923         }
924
925         free(trans);
926 }
927
928 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
929 {
930         int rc;
931         struct jbd_commit_header *header;
932         uint32_t commit_iblock = 0;
933         struct ext4_block commit_block;
934         struct jbd_journal *journal = trans->journal;
935
936         commit_iblock = jbd_journal_alloc_block(journal, trans);
937         rc = jbd_block_get_noread(journal->jbd_fs,
938                         &commit_block, commit_iblock);
939         if (rc != EOK)
940                 return rc;
941
942         header = (struct jbd_commit_header *)commit_block.data;
943         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
944         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
945         jbd_set32(&header->header, sequence, trans->trans_id);
946
947         ext4_bcache_set_dirty(commit_block.buf);
948         rc = jbd_block_set(journal->jbd_fs, &commit_block);
949         if (rc != EOK)
950                 return rc;
951
952         return EOK;
953 }
954
955 static int jbd_journal_prepare(struct jbd_journal *journal,
956                                struct jbd_trans *trans)
957 {
958         int rc = EOK, i = 0;
959         int32_t tag_tbl_size;
960         uint32_t desc_iblock = 0;
961         uint32_t data_iblock = 0;
962         char *tag_start = NULL, *tag_ptr = NULL;
963         struct jbd_buf *jbd_buf;
964         struct ext4_block desc_block, data_block;
965
966         LIST_FOREACH(jbd_buf, &trans->buf_list, buf_node) {
967                 struct tag_info tag_info;
968                 bool uuid_exist = false;
969 again:
970                 if (!desc_iblock) {
971                         struct jbd_bhdr *bhdr;
972                         desc_iblock = jbd_journal_alloc_block(journal, trans);
973                         rc = jbd_block_get_noread(journal->jbd_fs,
974                                            &desc_block, desc_iblock);
975                         if (rc != EOK)
976                                 break;
977
978                         ext4_bcache_set_dirty(desc_block.buf);
979
980                         bhdr = (struct jbd_bhdr *)desc_block.data;
981                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
982                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
983                         jbd_set32(bhdr, sequence, trans->trans_id);
984
985                         tag_start = (char *)(bhdr + 1);
986                         tag_ptr = tag_start;
987                         uuid_exist = true;
988                         tag_tbl_size = journal->block_size -
989                                 sizeof(struct jbd_bhdr);
990
991                         if (!trans->start_iblock)
992                                 trans->start_iblock = desc_iblock;
993
994                 }
995                 tag_info.block = jbd_buf->block.lb_id;
996                 tag_info.uuid_exist = uuid_exist;
997                 if (i == trans->data_cnt - 1)
998                         tag_info.last_tag = true;
999
1000                 if (uuid_exist)
1001                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1002                                         UUID_SIZE);
1003
1004                 rc = jbd_write_block_tag(journal->jbd_fs,
1005                                 tag_ptr,
1006                                 tag_tbl_size,
1007                                 &tag_info);
1008                 if (rc != EOK) {
1009                         jbd_block_set(journal->jbd_fs, &desc_block);
1010                         desc_iblock = 0;
1011                         goto again;
1012                 }
1013
1014                 data_iblock = jbd_journal_alloc_block(journal, trans);
1015                 rc = jbd_block_get_noread(journal->jbd_fs,
1016                                 &data_block, data_iblock);
1017                 if (rc != EOK)
1018                         break;
1019
1020                 ext4_bcache_set_dirty(data_block.buf);
1021
1022                 memcpy(data_block.data, jbd_buf->block.data,
1023                         journal->block_size);
1024
1025                 rc = jbd_block_set(journal->jbd_fs, &data_block);
1026                 if (rc != EOK)
1027                         break;
1028
1029                 tag_ptr += tag_info.tag_bytes;
1030                 tag_tbl_size -= tag_info.tag_bytes;
1031
1032                 i++;
1033         }
1034         if (rc == EOK && desc_iblock)
1035                 jbd_block_set(journal->jbd_fs, &desc_block);
1036
1037         return rc;
1038 }
1039
1040 static int
1041 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1042                            struct jbd_trans *trans)
1043 {
1044         int rc = EOK, i = 0;
1045         int32_t tag_tbl_size;
1046         uint32_t desc_iblock = 0;
1047         char *blocks_entry = NULL;
1048         struct jbd_revoke_rec *rec, *tmp;
1049         struct ext4_block desc_block;
1050         struct jbd_revoke_header *header = NULL;
1051         int32_t record_len = 4;
1052
1053         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1054                                      JBD_FEATURE_INCOMPAT_64BIT))
1055                 record_len = 8;
1056
1057         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1058                           tmp) {
1059 again:
1060                 if (!desc_iblock) {
1061                         struct jbd_bhdr *bhdr;
1062                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1063                         rc = jbd_block_get_noread(journal->jbd_fs,
1064                                            &desc_block, desc_iblock);
1065                         if (rc != EOK) {
1066                                 break;
1067                         }
1068
1069                         ext4_bcache_set_dirty(desc_block.buf);
1070
1071                         bhdr = (struct jbd_bhdr *)desc_block.data;
1072                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1073                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1074                         jbd_set32(bhdr, sequence, trans->trans_id);
1075                         
1076                         header = (struct jbd_revoke_header *)bhdr;
1077                         blocks_entry = (char *)(header + 1);
1078                         tag_tbl_size = journal->block_size -
1079                                 sizeof(struct jbd_revoke_header);
1080
1081                         if (!trans->start_iblock)
1082                                 trans->start_iblock = desc_iblock;
1083
1084                 }
1085
1086                 if (tag_tbl_size < record_len) {
1087                         jbd_set32(header, count,
1088                                   journal->block_size - tag_tbl_size);
1089                         jbd_block_set(journal->jbd_fs, &desc_block);
1090                         desc_iblock = 0;
1091                         header = NULL;
1092                         goto again;
1093                 }
1094                 if (record_len == 8) {
1095                         uint64_t *blocks =
1096                                 (uint64_t *)blocks_entry;
1097                         *blocks = to_be64(rec->lba);
1098                 } else {
1099                         uint32_t *blocks =
1100                                 (uint32_t *)blocks_entry;
1101                         *blocks = to_be32(rec->lba);
1102                 }
1103                 blocks_entry += record_len;
1104                 tag_tbl_size -= record_len;
1105
1106                 i++;
1107         }
1108         if (rc == EOK && desc_iblock) {
1109                 if (header != NULL)
1110                         jbd_set32(header, count,
1111                                   journal->block_size - tag_tbl_size);
1112
1113                 jbd_block_set(journal->jbd_fs, &desc_block);
1114         }
1115
1116         return rc;
1117 }
1118
1119 void
1120 jbd_journal_submit_trans(struct jbd_journal *journal,
1121                          struct jbd_trans *trans)
1122 {
1123         TAILQ_INSERT_TAIL(&journal->trans_queue,
1124                           trans,
1125                           trans_node);
1126 }
1127
1128 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1129 {
1130         struct jbd_buf *jbd_buf, *tmp;
1131         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1132         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1133                         tmp) {
1134                 struct ext4_block block = jbd_buf->block;
1135                 ext4_block_set(fs->bdev, &block);
1136         }
1137 }
1138
1139 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1140                           struct ext4_buf *buf __unused,
1141                           int res,
1142                           void *arg)
1143 {
1144         struct jbd_trans *trans = arg;
1145         struct jbd_journal *journal = trans->journal;
1146         bool first_in_queue =
1147                 trans == TAILQ_FIRST(&journal->cp_queue);
1148         if (res != EOK)
1149                 trans->error = res;
1150
1151         trans->written_cnt++;
1152         if (trans->written_cnt == trans->data_cnt) {
1153                 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1154
1155                 if (first_in_queue) {
1156                         journal->start = trans->start_iblock +
1157                                 trans->alloc_blocks;
1158                         journal->trans_id = trans->trans_id + 1;
1159                 }
1160                 jbd_journal_free_trans(journal, trans, false);
1161
1162                 if (first_in_queue) {
1163                         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1164                                 if (!trans->data_cnt) {
1165                                         TAILQ_REMOVE(&journal->cp_queue,
1166                                                      trans,
1167                                                      trans_node);
1168                                         journal->start = trans->start_iblock +
1169                                                 trans->alloc_blocks;
1170                                         journal->trans_id = trans->trans_id + 1;
1171                                         jbd_journal_free_trans(journal,
1172                                                                trans, false);
1173                                 } else {
1174                                         journal->start = trans->start_iblock;
1175                                         journal->trans_id = trans->trans_id;
1176                                         break;
1177                                 }
1178                         }
1179                         jbd_journal_write_sb(journal);
1180                         jbd_write_sb(journal->jbd_fs);
1181                 }
1182         }
1183 }
1184
1185 /*
1186  * XXX: one should disable cache writeback first.
1187  */
1188 void jbd_journal_commit_one(struct jbd_journal *journal)
1189 {
1190         int rc = EOK;
1191         uint32_t last = journal->last;
1192         struct jbd_trans *trans;
1193         if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
1194                 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
1195
1196                 trans->trans_id = journal->alloc_trans_id;
1197                 rc = jbd_journal_prepare(journal, trans);
1198                 if (rc != EOK)
1199                         goto Finish;
1200
1201                 rc = jbd_journal_prepare_revoke(journal, trans);
1202                 if (rc != EOK)
1203                         goto Finish;
1204
1205                 rc = jbd_trans_write_commit_block(trans);
1206                 if (rc != EOK)
1207                         goto Finish;
1208
1209                 journal->alloc_trans_id++;
1210                 if (TAILQ_EMPTY(&journal->cp_queue)) {
1211                         if (trans->data_cnt) {
1212                                 journal->start = trans->start_iblock;
1213                                 journal->trans_id = trans->trans_id;
1214                                 jbd_journal_write_sb(journal);
1215                                 jbd_write_sb(journal->jbd_fs);
1216                                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1217                                                 trans_node);
1218                                 jbd_journal_cp_trans(journal, trans);
1219                         } else {
1220                                 journal->start = trans->start_iblock +
1221                                         trans->alloc_blocks;
1222                                 journal->trans_id = trans->trans_id + 1;
1223                                 jbd_journal_write_sb(journal);
1224                                 jbd_journal_free_trans(journal, trans, false);
1225                         }
1226                 } else {
1227                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1228                                         trans_node);
1229                         if (trans->data_cnt)
1230                                 jbd_journal_cp_trans(journal, trans);
1231
1232                 }
1233         }
1234 Finish:
1235         if (rc != EOK) {
1236                 journal->last = last;
1237                 jbd_journal_free_trans(journal, trans, true);
1238         }
1239 }
1240
1241 /**
1242  * @}
1243  */