ext4_journal: forcibly flush data to disk when stop journalling.
[lwext4.git] / lwext4 / ext4_journal.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12  * - Redistributions in binary form must reproduce the above copyright
13  *   notice, this list of conditions and the following disclaimer in the
14  *   documentation and/or other materials provided with the distribution.
15  * - The name of the author may not be used to endorse or promote products
16  *   derived from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29
30 /** @addtogroup lwext4
31  * @{
32  */
33 /**
34  * @file  ext4_journal.c
35  * @brief Journal handle functions
36  */
37
38 #include "ext4_config.h"
39 #include "ext4_types.h"
40 #include "ext4_fs.h"
41 #include "ext4_super.h"
42 #include "ext4_journal.h"
43 #include "ext4_errno.h"
44 #include "ext4_blockdev.h"
45 #include "ext4_crc32c.h"
46 #include "ext4_debug.h"
47 #include "tree.h"
48
49 #include <string.h>
50 #include <stdlib.h>
51
52 struct revoke_entry {
53         ext4_fsblk_t block;
54         uint32_t trans_id;
55         RB_ENTRY(revoke_entry) revoke_node;
56 };
57
58 struct recover_info {
59         uint32_t start_trans_id;
60         uint32_t last_trans_id;
61         uint32_t this_trans_id;
62         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
63 };
64
65 struct replay_arg {
66         struct recover_info *info;
67         uint32_t *this_block;
68         uint32_t this_trans_id;
69 };
70
71 static int
72 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
73 {
74         if (a->block > b->block)
75                 return 1;
76         else if (a->block < b->block)
77                 return -1;
78         return 0;
79 }
80
81 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
82                      jbd_revoke_entry_cmp, static inline)
83
84 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
85 #define jbd_free_revoke_entry(addr) free(addr)
86
87 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
88                    ext4_lblk_t iblock,
89                    ext4_fsblk_t *fblock);
90
91 int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
92 {
93         int rc;
94         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
95         uint64_t offset;
96         ext4_fsblk_t fblock;
97         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
98         if (rc != EOK)
99                 return rc;
100
101         offset = fblock * ext4_sb_get_block_size(&fs->sb);
102         return ext4_block_writebytes(fs->bdev, offset, s,
103                                      EXT4_SUPERBLOCK_SIZE);
104 }
105
106 int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
107 {
108         int rc;
109         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
110         uint64_t offset;
111         ext4_fsblk_t fblock;
112         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
113         if (rc != EOK)
114                 return rc;
115
116         offset = fblock * ext4_sb_get_block_size(&fs->sb);
117         return ext4_block_readbytes(fs->bdev, offset, s,
118                                     EXT4_SUPERBLOCK_SIZE);
119 }
120
121 static bool jbd_verify_sb(struct jbd_sb *sb)
122 {
123         struct jbd_bhdr *header = &sb->header;
124         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
125                 return false;
126
127         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
128             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
129                 return false;
130
131         return true;
132 }
133
134 static int jbd_write_sb(struct jbd_fs *jbd_fs)
135 {
136         int rc = EOK;
137         if (jbd_fs->dirty) {
138                 rc = jbd_sb_write(jbd_fs, &jbd_fs->sb);
139                 if (rc != EOK)
140                         return rc;
141
142                 jbd_fs->dirty = false;
143         }
144         return rc;
145 }
146
147 int jbd_get_fs(struct ext4_fs *fs,
148                struct jbd_fs *jbd_fs)
149 {
150         int rc;
151         uint32_t journal_ino;
152
153         memset(jbd_fs, 0, sizeof(struct jbd_fs));
154         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
155
156         rc = ext4_fs_get_inode_ref(fs,
157                                    journal_ino,
158                                    &jbd_fs->inode_ref);
159         if (rc != EOK) {
160                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
161                 return rc;
162         }
163         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
164         if (rc != EOK) {
165                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
166                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
167                 return rc;
168         }
169         if (!jbd_verify_sb(&jbd_fs->sb)) {
170                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
171                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
172                 rc = EIO;
173         }
174
175         return rc;
176 }
177
178 int jbd_put_fs(struct jbd_fs *jbd_fs)
179 {
180         int rc = EOK;
181         rc = jbd_write_sb(jbd_fs);
182
183         ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
184         return rc;
185 }
186
187 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
188                    ext4_lblk_t iblock,
189                    ext4_fsblk_t *fblock)
190 {
191         int rc = ext4_fs_get_inode_dblk_idx(
192                         &jbd_fs->inode_ref,
193                         iblock,
194                         fblock,
195                         false);
196         return rc;
197 }
198
199 int jbd_block_get(struct jbd_fs *jbd_fs,
200                   struct ext4_block *block,
201                   ext4_fsblk_t fblock)
202 {
203         /* TODO: journal device. */
204         int rc;
205         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
206         rc = jbd_inode_bmap(jbd_fs, iblock,
207                             &fblock);
208         if (rc != EOK)
209                 return rc;
210
211         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
212         rc = ext4_block_get(bdev, block, fblock);
213         if (rc == EOK)
214                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
215
216         return rc;
217 }
218
219 int jbd_block_get_noread(struct jbd_fs *jbd_fs,
220                          struct ext4_block *block,
221                          ext4_fsblk_t fblock)
222 {
223         /* TODO: journal device. */
224         int rc;
225         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
226         rc = jbd_inode_bmap(jbd_fs, iblock,
227                             &fblock);
228         if (rc != EOK)
229                 return rc;
230
231         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
232         rc = ext4_block_get_noread(bdev, block, fblock);
233         if (rc == EOK)
234                 ext4_bcache_set_flag(block->buf, BC_FLUSH);
235
236         return rc;
237 }
238
239 int jbd_block_set(struct jbd_fs *jbd_fs,
240                   struct ext4_block *block)
241 {
242         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
243                               block);
244 }
245
246 /*
247  * helper functions to deal with 32 or 64bit block numbers.
248  */
249 int jbd_tag_bytes(struct jbd_fs *jbd_fs)
250 {
251         int size;
252
253         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
254                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
255                 return sizeof(struct jbd_block_tag3);
256
257         size = sizeof(struct jbd_block_tag);
258
259         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
260                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
261                 size += sizeof(uint16_t);
262
263         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
264                                      JBD_FEATURE_INCOMPAT_64BIT))
265                 return size;
266
267         return size - sizeof(uint32_t);
268 }
269
270 /**@brief: tag information. */
271 struct tag_info {
272         int tag_bytes;
273         ext4_fsblk_t block;
274         bool uuid_exist;
275         uint8_t uuid[UUID_SIZE];
276         bool last_tag;
277 };
278
279 static int
280 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
281                       void *__tag,
282                       int tag_bytes,
283                       int32_t remain_buf_size,
284                       struct tag_info *tag_info)
285 {
286         char *uuid_start;
287         tag_info->tag_bytes = tag_bytes;
288         tag_info->uuid_exist = false;
289         tag_info->last_tag = false;
290
291         if (remain_buf_size - tag_bytes < 0)
292                 return EINVAL;
293
294         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
295                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
296                 struct jbd_block_tag3 *tag = __tag;
297                 tag_info->block = jbd_get32(tag, blocknr);
298                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
299                                              JBD_FEATURE_INCOMPAT_64BIT))
300                          tag_info->block |=
301                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
302
303                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
304                         tag_info->block = 0;
305
306                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
307                         if (remain_buf_size - tag_bytes < UUID_SIZE)
308                                 return EINVAL;
309
310                         uuid_start = (char *)tag + tag_bytes;
311                         tag_info->uuid_exist = true;
312                         tag_info->tag_bytes += UUID_SIZE;
313                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
314                 }
315
316                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
317                         tag_info->last_tag = true;
318
319         } else {
320                 struct jbd_block_tag *tag = __tag;
321                 tag_info->block = jbd_get32(tag, blocknr);
322                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
323                                              JBD_FEATURE_INCOMPAT_64BIT))
324                          tag_info->block |=
325                                  (uint64_t)jbd_get32(tag, blocknr_high) << 32;
326
327                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
328                         tag_info->block = 0;
329
330                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
331                         if (remain_buf_size - tag_bytes < UUID_SIZE)
332                                 return EINVAL;
333
334                         uuid_start = (char *)tag + tag_bytes;
335                         tag_info->uuid_exist = true;
336                         tag_info->tag_bytes += UUID_SIZE;
337                         memcpy(tag_info->uuid, uuid_start, UUID_SIZE);
338                 }
339
340                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
341                         tag_info->last_tag = true;
342
343         }
344         return EOK;
345 }
346
347 static int
348 jbd_write_block_tag(struct jbd_fs *jbd_fs,
349                     void *__tag,
350                     int32_t remain_buf_size,
351                     struct tag_info *tag_info)
352 {
353         char *uuid_start;
354         int tag_bytes = jbd_tag_bytes(jbd_fs);
355
356         tag_info->tag_bytes = tag_bytes;
357
358         if (remain_buf_size - tag_bytes < 0)
359                 return EINVAL;
360
361         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
362                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
363                 struct jbd_block_tag3 *tag = __tag;
364                 jbd_set32(tag, blocknr, tag_info->block);
365                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
366                                              JBD_FEATURE_INCOMPAT_64BIT))
367                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
368
369                 if (tag_info->uuid_exist) {
370                         if (remain_buf_size - tag_bytes < UUID_SIZE)
371                                 return EINVAL;
372
373                         uuid_start = (char *)tag + tag_bytes;
374                         tag_info->tag_bytes += UUID_SIZE;
375                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
376                 } else
377                         jbd_set32(tag, flags,
378                                   jbd_get32(tag, flags) | JBD_FLAG_SAME_UUID);
379
380                 if (tag_info->last_tag)
381                         jbd_set32(tag, flags,
382                                   jbd_get32(tag, flags) | JBD_FLAG_LAST_TAG);
383
384         } else {
385                 struct jbd_block_tag *tag = __tag;
386                 jbd_set32(tag, blocknr, tag_info->block);
387                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
388                                              JBD_FEATURE_INCOMPAT_64BIT))
389                         jbd_set32(tag, blocknr_high, tag_info->block >> 32);
390
391                 if (tag_info->uuid_exist) {
392                         if (remain_buf_size - tag_bytes < UUID_SIZE)
393                                 return EINVAL;
394
395                         uuid_start = (char *)tag + tag_bytes;
396                         tag_info->tag_bytes += UUID_SIZE;
397                         memcpy(uuid_start, tag_info->uuid, UUID_SIZE);
398                 } else
399                         jbd_set16(tag, flags,
400                                   jbd_get16(tag, flags) | JBD_FLAG_SAME_UUID);
401
402                 if (tag_info->last_tag)
403                         jbd_set16(tag, flags,
404                                   jbd_get16(tag, flags) | JBD_FLAG_LAST_TAG);
405
406         }
407         return EOK;
408 }
409
410 static void
411 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
412                         void *__tag_start,
413                         int32_t tag_tbl_size,
414                         void (*func)(struct jbd_fs * jbd_fs,
415                                         ext4_fsblk_t block,
416                                         uint8_t *uuid,
417                                         void *arg),
418                         void *arg)
419 {
420         char *tag_start, *tag_ptr;
421         int tag_bytes = jbd_tag_bytes(jbd_fs);
422         tag_start = __tag_start;
423         tag_ptr = tag_start;
424
425         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
426                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
427             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
428                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
429                 tag_tbl_size -= sizeof(struct jbd_block_tail);
430
431         while (tag_tbl_size) {
432                 struct tag_info tag_info;
433                 int rc = jbd_extract_block_tag(jbd_fs,
434                                       tag_ptr,
435                                       tag_bytes,
436                                       tag_tbl_size,
437                                       &tag_info);
438                 if (rc != EOK)
439                         break;
440
441                 if (func)
442                         func(jbd_fs, tag_info.block, tag_info.uuid, arg);
443
444                 if (tag_info.last_tag)
445                         break;
446
447                 tag_ptr += tag_info.tag_bytes;
448                 tag_tbl_size -= tag_info.tag_bytes;
449         }
450 }
451
452 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
453                                    ext4_fsblk_t block,
454                                    uint8_t *uuid,
455                                    void *arg)
456 {
457         uint32_t *iblock = arg;
458         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
459         (*iblock)++;
460         (void)jbd_fs;
461         (void)uuid;
462         return;
463 }
464
465 static struct revoke_entry *
466 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
467 {
468         struct revoke_entry tmp = {
469                 .block = block
470         };
471
472         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
473 }
474
475 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
476                                   ext4_fsblk_t block,
477                                   uint8_t *uuid __unused,
478                                   void *__arg)
479 {
480         int r;
481         struct replay_arg *arg = __arg;
482         struct recover_info *info = arg->info;
483         uint32_t *this_block = arg->this_block;
484         struct revoke_entry *revoke_entry;
485         struct ext4_block journal_block, ext4_block;
486         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
487
488         (*this_block)++;
489
490         revoke_entry = jbd_revoke_entry_lookup(info, block);
491         if (revoke_entry &&
492             arg->this_trans_id < revoke_entry->trans_id)
493                 return;
494
495         ext4_dbg(DEBUG_JBD,
496                  "Replaying block in block_tag: %" PRIu64 "\n",
497                  block);
498
499         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
500         if (r != EOK)
501                 return;
502
503         if (block) {
504                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
505                 if (r != EOK) {
506                         jbd_block_set(jbd_fs, &journal_block);
507                         return;
508                 }
509
510                 memcpy(ext4_block.data,
511                         journal_block.data,
512                         jbd_get32(&jbd_fs->sb, blocksize));
513
514                 ext4_bcache_set_dirty(ext4_block.buf);
515                 ext4_block_set(fs->bdev, &ext4_block);
516         } else {
517                 uint16_t mount_count, state;
518                 mount_count = ext4_get16(&fs->sb, mount_count);
519                 state = ext4_get16(&fs->sb, state);
520
521                 memcpy(&fs->sb,
522                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
523                         EXT4_SUPERBLOCK_SIZE);
524
525                 /* Mark system as mounted */
526                 ext4_set16(&fs->sb, state, state);
527                 r = ext4_sb_write(fs->bdev, &fs->sb);
528                 if (r != EOK)
529                         return;
530
531                 /*Update mount count*/
532                 ext4_set16(&fs->sb, mount_count, mount_count);
533         }
534
535         jbd_block_set(jbd_fs, &journal_block);
536         
537         return;
538 }
539
540 static void jbd_add_revoke_block_tags(struct recover_info *info,
541                                       ext4_fsblk_t block)
542 {
543         struct revoke_entry *revoke_entry;
544
545         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
546         revoke_entry = jbd_revoke_entry_lookup(info, block);
547         if (revoke_entry) {
548                 revoke_entry->trans_id = info->this_trans_id;
549                 return;
550         }
551
552         revoke_entry = jbd_alloc_revoke_entry();
553         ext4_assert(revoke_entry);
554         revoke_entry->block = block;
555         revoke_entry->trans_id = info->this_trans_id;
556         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
557
558         return;
559 }
560
561 static void jbd_destroy_revoke_tree(struct recover_info *info)
562 {
563         while (!RB_EMPTY(&info->revoke_root)) {
564                 struct revoke_entry *revoke_entry =
565                         RB_MIN(jbd_revoke, &info->revoke_root);
566                 ext4_assert(revoke_entry);
567                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
568                 jbd_free_revoke_entry(revoke_entry);
569         }
570 }
571
572 /* Make sure we wrap around the log correctly! */
573 #define wrap(sb, var)                                           \
574 do {                                                                    \
575         if (var >= jbd_get32((sb), maxlen))                                     \
576                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
577 } while (0)
578
579 #define ACTION_SCAN 0
580 #define ACTION_REVOKE 1
581 #define ACTION_RECOVER 2
582
583
584 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
585                                   struct jbd_bhdr *header,
586                                   struct recover_info *info)
587 {
588         char *blocks_entry;
589         struct jbd_revoke_header *revoke_hdr =
590                 (struct jbd_revoke_header *)header;
591         uint32_t i, nr_entries, record_len = 4;
592         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
593                                      JBD_FEATURE_INCOMPAT_64BIT))
594                 record_len = 8;
595
596         nr_entries = (jbd_get32(revoke_hdr, count) -
597                         sizeof(struct jbd_revoke_header)) /
598                         record_len;
599
600         blocks_entry = (char *)(revoke_hdr + 1);
601
602         for (i = 0;i < nr_entries;i++) {
603                 if (record_len == 8) {
604                         uint64_t *blocks =
605                                 (uint64_t *)blocks_entry;
606                         jbd_add_revoke_block_tags(info, to_be64(*blocks));
607                 } else {
608                         uint32_t *blocks =
609                                 (uint32_t *)blocks_entry;
610                         jbd_add_revoke_block_tags(info, to_be32(*blocks));
611                 }
612                 blocks_entry += record_len;
613         }
614 }
615
616 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
617                                        struct jbd_bhdr *header,
618                                        uint32_t *iblock)
619 {
620         jbd_iterate_block_table(jbd_fs,
621                                 header + 1,
622                                 jbd_get32(&jbd_fs->sb, blocksize) -
623                                         sizeof(struct jbd_bhdr),
624                                 jbd_display_block_tags,
625                                 iblock);
626 }
627
628 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
629                                         struct jbd_bhdr *header,
630                                         struct replay_arg *arg)
631 {
632         jbd_iterate_block_table(jbd_fs,
633                                 header + 1,
634                                 jbd_get32(&jbd_fs->sb, blocksize) -
635                                         sizeof(struct jbd_bhdr),
636                                 jbd_replay_block_tags,
637                                 arg);
638 }
639
640 int jbd_iterate_log(struct jbd_fs *jbd_fs,
641                     struct recover_info *info,
642                     int action)
643 {
644         int r = EOK;
645         bool log_end = false;
646         struct jbd_sb *sb = &jbd_fs->sb;
647         uint32_t start_trans_id, this_trans_id;
648         uint32_t start_block, this_block;
649
650         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
651         start_block = this_block = jbd_get32(sb, start);
652
653         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
654                             start_trans_id);
655
656         while (!log_end) {
657                 struct ext4_block block;
658                 struct jbd_bhdr *header;
659                 if (action != ACTION_SCAN)
660                         if (this_trans_id > info->last_trans_id) {
661                                 log_end = true;
662                                 continue;
663                         }
664
665                 r = jbd_block_get(jbd_fs, &block, this_block);
666                 if (r != EOK)
667                         break;
668
669                 header = (struct jbd_bhdr *)block.data;
670                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
671                         jbd_block_set(jbd_fs, &block);
672                         log_end = true;
673                         continue;
674                 }
675
676                 if (jbd_get32(header, sequence) != this_trans_id) {
677                         if (action != ACTION_SCAN)
678                                 r = EIO;
679
680                         jbd_block_set(jbd_fs, &block);
681                         log_end = true;
682                         continue;
683                 }
684
685                 switch (jbd_get32(header, blocktype)) {
686                 case JBD_DESCRIPTOR_BLOCK:
687                         ext4_dbg(DEBUG_JBD, "Descriptor block: %" PRIu32", "
688                                             "trans_id: %" PRIu32"\n",
689                                             this_block, this_trans_id);
690                         if (action == ACTION_RECOVER) {
691                                 struct replay_arg replay_arg;
692                                 replay_arg.info = info;
693                                 replay_arg.this_block = &this_block;
694                                 replay_arg.this_trans_id = this_trans_id;
695
696                                 jbd_replay_descriptor_block(jbd_fs,
697                                                 header, &replay_arg);
698                         } else
699                                 jbd_debug_descriptor_block(jbd_fs,
700                                                 header, &this_block);
701
702                         break;
703                 case JBD_COMMIT_BLOCK:
704                         ext4_dbg(DEBUG_JBD, "Commit block: %" PRIu32", "
705                                             "trans_id: %" PRIu32"\n",
706                                             this_block, this_trans_id);
707                         this_trans_id++;
708                         break;
709                 case JBD_REVOKE_BLOCK:
710                         ext4_dbg(DEBUG_JBD, "Revoke block: %" PRIu32", "
711                                             "trans_id: %" PRIu32"\n",
712                                             this_block, this_trans_id);
713                         if (action == ACTION_REVOKE) {
714                                 info->this_trans_id = this_trans_id;
715                                 jbd_build_revoke_tree(jbd_fs,
716                                                 header, info);
717                         }
718                         break;
719                 default:
720                         log_end = true;
721                         break;
722                 }
723                 jbd_block_set(jbd_fs, &block);
724                 this_block++;
725                 wrap(sb, this_block);
726                 if (this_block == start_block)
727                         log_end = true;
728
729         }
730         ext4_dbg(DEBUG_JBD, "End of journal.\n");
731         if (r == EOK && action == ACTION_SCAN) {
732                 info->start_trans_id = start_trans_id;
733                 if (this_trans_id > start_trans_id)
734                         info->last_trans_id = this_trans_id - 1;
735                 else
736                         info->last_trans_id = this_trans_id;
737         }
738
739         return r;
740 }
741
742 int jbd_recover(struct jbd_fs *jbd_fs)
743 {
744         int r;
745         struct recover_info info;
746         struct jbd_sb *sb = &jbd_fs->sb;
747         if (!sb->start)
748                 return EOK;
749
750         RB_INIT(&info.revoke_root);
751
752         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
753         if (r != EOK)
754                 return r;
755
756         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
757         if (r != EOK)
758                 return r;
759
760         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
761         if (r == EOK) {
762                 uint32_t features_incompatible =
763                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
764                                    features_incompatible);
765                 jbd_set32(&jbd_fs->sb, start, 0);
766                 features_incompatible &= ~EXT4_FINCOM_RECOVER;
767                 ext4_set32(&jbd_fs->inode_ref.fs->sb,
768                            features_incompatible,
769                            features_incompatible);
770                 jbd_fs->dirty = true;
771                 r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
772                                   &jbd_fs->inode_ref.fs->sb);
773         }
774         jbd_destroy_revoke_tree(&info);
775         return r;
776 }
777
778 void jbd_journal_write_sb(struct jbd_journal *journal)
779 {
780         struct jbd_fs *jbd_fs = journal->jbd_fs;
781         jbd_set32(&jbd_fs->sb, start, journal->start);
782         jbd_set32(&jbd_fs->sb, sequence, journal->trans_id);
783         jbd_fs->dirty = true;
784 }
785
786 int jbd_journal_start(struct jbd_fs *jbd_fs,
787                       struct jbd_journal *journal)
788 {
789         int r;
790         uint32_t features_incompatible =
791                         ext4_get32(&jbd_fs->inode_ref.fs->sb,
792                                    features_incompatible);
793         features_incompatible |= EXT4_FINCOM_RECOVER;
794         ext4_set32(&jbd_fs->inode_ref.fs->sb,
795                         features_incompatible,
796                         features_incompatible);
797         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
798                         &jbd_fs->inode_ref.fs->sb);
799         if (r != EOK)
800                 return r;
801
802         journal->first = jbd_get32(&jbd_fs->sb, first);
803         journal->start = journal->first;
804         journal->last = journal->first;
805         journal->trans_id = 1;
806         journal->alloc_trans_id = 1;
807
808         journal->block_size = jbd_get32(&jbd_fs->sb, blocksize);
809
810         TAILQ_INIT(&journal->trans_queue);
811         TAILQ_INIT(&journal->cp_queue);
812         journal->jbd_fs = jbd_fs;
813         jbd_journal_write_sb(journal);
814         return jbd_write_sb(jbd_fs);
815 }
816
817 int jbd_journal_stop(struct jbd_journal *journal)
818 {
819
820         int r;
821         struct jbd_fs *jbd_fs = journal->jbd_fs;
822         uint32_t features_incompatible;
823
824         jbd_journal_commit_all(journal);
825         ext4_block_cache_flush(jbd_fs->inode_ref.fs->bdev);
826
827         features_incompatible =
828                 ext4_get32(&jbd_fs->inode_ref.fs->sb,
829                            features_incompatible);
830         features_incompatible &= ~EXT4_FINCOM_RECOVER;
831         ext4_set32(&jbd_fs->inode_ref.fs->sb,
832                         features_incompatible,
833                         features_incompatible);
834         r = ext4_sb_write(jbd_fs->inode_ref.fs->bdev,
835                         &jbd_fs->inode_ref.fs->sb);
836         if (r != EOK)
837                 return r;
838
839         journal->start = 0;
840         journal->trans_id = 0;
841         jbd_journal_write_sb(journal);
842         return jbd_write_sb(journal->jbd_fs);
843 }
844
845 static uint32_t jbd_journal_alloc_block(struct jbd_journal *journal,
846                                         struct jbd_trans *trans)
847 {
848         uint32_t start_block;
849
850         start_block = journal->last++;
851         trans->alloc_blocks++;
852         wrap(&journal->jbd_fs->sb, journal->last);
853         if (journal->last == journal->start)
854                 ext4_block_cache_flush(journal->jbd_fs->inode_ref.fs->bdev);
855
856         return start_block;
857 }
858
859 struct jbd_trans *
860 jbd_journal_new_trans(struct jbd_journal *journal)
861 {
862         struct jbd_trans *trans = calloc(1, sizeof(struct jbd_trans));
863         if (!trans)
864                 return NULL;
865
866         /* We will assign a trans_id to this transaction,
867          * once it has been committed.*/
868         trans->journal = journal;
869         trans->error = EOK;
870         return trans;
871 }
872
873 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
874                           struct ext4_buf *buf __unused,
875                           int res,
876                           void *arg);
877
878 int jbd_trans_add_block(struct jbd_trans *trans,
879                         struct ext4_block *block)
880 {
881         struct jbd_buf *buf;
882         /* We do not need to add those unmodified buffer to
883          * a transaction. */
884         if (!ext4_bcache_test_flag(block->buf, BC_DIRTY))
885                 return EOK;
886
887         buf = calloc(1, sizeof(struct jbd_buf));
888         if (!buf)
889                 return ENOMEM;
890
891         buf->trans = trans;
892         buf->block = *block;
893         ext4_bcache_inc_ref(block->buf);
894
895         block->buf->end_write = jbd_trans_end_write;
896         block->buf->end_write_arg = trans;
897
898         trans->data_cnt++;
899         LIST_INSERT_HEAD(&trans->buf_list, buf, buf_node);
900         return EOK;
901 }
902
903 int jbd_trans_revoke_block(struct jbd_trans *trans,
904                            ext4_fsblk_t lba)
905 {
906         struct jbd_revoke_rec *rec =
907                 calloc(1, sizeof(struct jbd_revoke_rec));
908         if (!rec)
909                 return ENOMEM;
910
911         rec->lba = lba;
912         LIST_INSERT_HEAD(&trans->revoke_list, rec, revoke_node);
913         return EOK;
914 }
915
916 void jbd_journal_free_trans(struct jbd_journal *journal,
917                             struct jbd_trans *trans,
918                             bool abort)
919 {
920         struct jbd_buf *jbd_buf, *tmp;
921         struct jbd_revoke_rec *rec, *tmp2;
922         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
923         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
924                           tmp) {
925                 if (abort)
926                         ext4_block_set(fs->bdev, &jbd_buf->block);
927
928                 LIST_REMOVE(jbd_buf, buf_node);
929                 free(jbd_buf);
930         }
931         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
932                           tmp2) {
933                 LIST_REMOVE(rec, revoke_node);
934                 free(rec);
935         }
936
937         free(trans);
938 }
939
940 static int jbd_trans_write_commit_block(struct jbd_trans *trans)
941 {
942         int rc;
943         struct jbd_commit_header *header;
944         uint32_t commit_iblock = 0;
945         struct ext4_block commit_block;
946         struct jbd_journal *journal = trans->journal;
947
948         commit_iblock = jbd_journal_alloc_block(journal, trans);
949         rc = jbd_block_get_noread(journal->jbd_fs,
950                         &commit_block, commit_iblock);
951         if (rc != EOK)
952                 return rc;
953
954         header = (struct jbd_commit_header *)commit_block.data;
955         jbd_set32(&header->header, magic, JBD_MAGIC_NUMBER);
956         jbd_set32(&header->header, blocktype, JBD_COMMIT_BLOCK);
957         jbd_set32(&header->header, sequence, trans->trans_id);
958
959         ext4_bcache_set_dirty(commit_block.buf);
960         rc = jbd_block_set(journal->jbd_fs, &commit_block);
961         if (rc != EOK)
962                 return rc;
963
964         return EOK;
965 }
966
967 static int jbd_journal_prepare(struct jbd_journal *journal,
968                                struct jbd_trans *trans)
969 {
970         int rc = EOK, i = 0;
971         int32_t tag_tbl_size;
972         uint32_t desc_iblock = 0;
973         uint32_t data_iblock = 0;
974         char *tag_start = NULL, *tag_ptr = NULL;
975         struct jbd_buf *jbd_buf;
976         struct ext4_block desc_block, data_block;
977
978         LIST_FOREACH(jbd_buf, &trans->buf_list, buf_node) {
979                 struct tag_info tag_info;
980                 bool uuid_exist = false;
981 again:
982                 if (!desc_iblock) {
983                         struct jbd_bhdr *bhdr;
984                         desc_iblock = jbd_journal_alloc_block(journal, trans);
985                         rc = jbd_block_get_noread(journal->jbd_fs,
986                                            &desc_block, desc_iblock);
987                         if (rc != EOK)
988                                 break;
989
990                         ext4_bcache_set_dirty(desc_block.buf);
991
992                         bhdr = (struct jbd_bhdr *)desc_block.data;
993                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
994                         jbd_set32(bhdr, blocktype, JBD_DESCRIPTOR_BLOCK);
995                         jbd_set32(bhdr, sequence, trans->trans_id);
996
997                         tag_start = (char *)(bhdr + 1);
998                         tag_ptr = tag_start;
999                         uuid_exist = true;
1000                         tag_tbl_size = journal->block_size -
1001                                 sizeof(struct jbd_bhdr);
1002
1003                         if (!trans->start_iblock)
1004                                 trans->start_iblock = desc_iblock;
1005
1006                 }
1007                 tag_info.block = jbd_buf->block.lb_id;
1008                 tag_info.uuid_exist = uuid_exist;
1009                 if (i == trans->data_cnt - 1)
1010                         tag_info.last_tag = true;
1011
1012                 if (uuid_exist)
1013                         memcpy(tag_info.uuid, journal->jbd_fs->sb.uuid,
1014                                         UUID_SIZE);
1015
1016                 rc = jbd_write_block_tag(journal->jbd_fs,
1017                                 tag_ptr,
1018                                 tag_tbl_size,
1019                                 &tag_info);
1020                 if (rc != EOK) {
1021                         jbd_block_set(journal->jbd_fs, &desc_block);
1022                         desc_iblock = 0;
1023                         goto again;
1024                 }
1025
1026                 data_iblock = jbd_journal_alloc_block(journal, trans);
1027                 rc = jbd_block_get_noread(journal->jbd_fs,
1028                                 &data_block, data_iblock);
1029                 if (rc != EOK)
1030                         break;
1031
1032                 ext4_bcache_set_dirty(data_block.buf);
1033
1034                 memcpy(data_block.data, jbd_buf->block.data,
1035                         journal->block_size);
1036
1037                 rc = jbd_block_set(journal->jbd_fs, &data_block);
1038                 if (rc != EOK)
1039                         break;
1040
1041                 tag_ptr += tag_info.tag_bytes;
1042                 tag_tbl_size -= tag_info.tag_bytes;
1043
1044                 i++;
1045         }
1046         if (rc == EOK && desc_iblock)
1047                 jbd_block_set(journal->jbd_fs, &desc_block);
1048
1049         return rc;
1050 }
1051
1052 static int
1053 jbd_journal_prepare_revoke(struct jbd_journal *journal,
1054                            struct jbd_trans *trans)
1055 {
1056         int rc = EOK, i = 0;
1057         int32_t tag_tbl_size;
1058         uint32_t desc_iblock = 0;
1059         char *blocks_entry = NULL;
1060         struct jbd_revoke_rec *rec, *tmp;
1061         struct ext4_block desc_block;
1062         struct jbd_revoke_header *header = NULL;
1063         int32_t record_len = 4;
1064
1065         if (JBD_HAS_INCOMPAT_FEATURE(&journal->jbd_fs->sb,
1066                                      JBD_FEATURE_INCOMPAT_64BIT))
1067                 record_len = 8;
1068
1069         LIST_FOREACH_SAFE(rec, &trans->revoke_list, revoke_node,
1070                           tmp) {
1071 again:
1072                 if (!desc_iblock) {
1073                         struct jbd_bhdr *bhdr;
1074                         desc_iblock = jbd_journal_alloc_block(journal, trans);
1075                         rc = jbd_block_get_noread(journal->jbd_fs,
1076                                            &desc_block, desc_iblock);
1077                         if (rc != EOK) {
1078                                 break;
1079                         }
1080
1081                         ext4_bcache_set_dirty(desc_block.buf);
1082
1083                         bhdr = (struct jbd_bhdr *)desc_block.data;
1084                         jbd_set32(bhdr, magic, JBD_MAGIC_NUMBER);
1085                         jbd_set32(bhdr, blocktype, JBD_REVOKE_BLOCK);
1086                         jbd_set32(bhdr, sequence, trans->trans_id);
1087                         
1088                         header = (struct jbd_revoke_header *)bhdr;
1089                         blocks_entry = (char *)(header + 1);
1090                         tag_tbl_size = journal->block_size -
1091                                 sizeof(struct jbd_revoke_header);
1092
1093                         if (!trans->start_iblock)
1094                                 trans->start_iblock = desc_iblock;
1095
1096                 }
1097
1098                 if (tag_tbl_size < record_len) {
1099                         jbd_set32(header, count,
1100                                   journal->block_size - tag_tbl_size);
1101                         jbd_block_set(journal->jbd_fs, &desc_block);
1102                         desc_iblock = 0;
1103                         header = NULL;
1104                         goto again;
1105                 }
1106                 if (record_len == 8) {
1107                         uint64_t *blocks =
1108                                 (uint64_t *)blocks_entry;
1109                         *blocks = to_be64(rec->lba);
1110                 } else {
1111                         uint32_t *blocks =
1112                                 (uint32_t *)blocks_entry;
1113                         *blocks = to_be32(rec->lba);
1114                 }
1115                 blocks_entry += record_len;
1116                 tag_tbl_size -= record_len;
1117
1118                 i++;
1119         }
1120         if (rc == EOK && desc_iblock) {
1121                 if (header != NULL)
1122                         jbd_set32(header, count,
1123                                   journal->block_size - tag_tbl_size);
1124
1125                 jbd_block_set(journal->jbd_fs, &desc_block);
1126         }
1127
1128         return rc;
1129 }
1130
1131 void
1132 jbd_journal_submit_trans(struct jbd_journal *journal,
1133                          struct jbd_trans *trans)
1134 {
1135         TAILQ_INSERT_TAIL(&journal->trans_queue,
1136                           trans,
1137                           trans_node);
1138 }
1139
1140 void jbd_journal_cp_trans(struct jbd_journal *journal, struct jbd_trans *trans)
1141 {
1142         struct jbd_buf *jbd_buf, *tmp;
1143         struct ext4_fs *fs = journal->jbd_fs->inode_ref.fs;
1144         LIST_FOREACH_SAFE(jbd_buf, &trans->buf_list, buf_node,
1145                         tmp) {
1146                 struct ext4_block block = jbd_buf->block;
1147                 ext4_block_set(fs->bdev, &block);
1148         }
1149 }
1150
1151 static void jbd_trans_end_write(struct ext4_bcache *bc __unused,
1152                           struct ext4_buf *buf __unused,
1153                           int res,
1154                           void *arg)
1155 {
1156         struct jbd_trans *trans = arg;
1157         struct jbd_journal *journal = trans->journal;
1158         bool first_in_queue =
1159                 trans == TAILQ_FIRST(&journal->cp_queue);
1160         if (res != EOK)
1161                 trans->error = res;
1162
1163         trans->written_cnt++;
1164         if (trans->written_cnt == trans->data_cnt) {
1165                 TAILQ_REMOVE(&journal->cp_queue, trans, trans_node);
1166
1167                 if (first_in_queue) {
1168                         journal->start = trans->start_iblock +
1169                                 trans->alloc_blocks;
1170                         wrap(&journal->jbd_fs->sb, journal->start);
1171                         journal->trans_id = trans->trans_id + 1;
1172                 }
1173                 jbd_journal_free_trans(journal, trans, false);
1174
1175                 if (first_in_queue) {
1176                         while ((trans = TAILQ_FIRST(&journal->cp_queue))) {
1177                                 if (!trans->data_cnt) {
1178                                         TAILQ_REMOVE(&journal->cp_queue,
1179                                                      trans,
1180                                                      trans_node);
1181                                         journal->start = trans->start_iblock +
1182                                                 trans->alloc_blocks;
1183                                         wrap(&journal->jbd_fs->sb, journal->start);
1184                                         journal->trans_id = trans->trans_id + 1;
1185                                         jbd_journal_free_trans(journal,
1186                                                                trans, false);
1187                                 } else {
1188                                         journal->start = trans->start_iblock;
1189                                         wrap(&journal->jbd_fs->sb, journal->start);
1190                                         journal->trans_id = trans->trans_id;
1191                                         break;
1192                                 }
1193                         }
1194                         jbd_journal_write_sb(journal);
1195                         jbd_write_sb(journal->jbd_fs);
1196                 }
1197         }
1198 }
1199
1200 void jbd_journal_commit_one(struct jbd_journal *journal)
1201 {
1202         int rc = EOK;
1203         uint32_t last = journal->last;
1204         struct jbd_trans *trans;
1205
1206         if ((trans = TAILQ_FIRST(&journal->trans_queue))) {
1207                 TAILQ_REMOVE(&journal->trans_queue, trans, trans_node);
1208
1209                 trans->trans_id = journal->alloc_trans_id;
1210                 rc = jbd_journal_prepare(journal, trans);
1211                 if (rc != EOK)
1212                         goto Finish;
1213
1214                 rc = jbd_journal_prepare_revoke(journal, trans);
1215                 if (rc != EOK)
1216                         goto Finish;
1217
1218                 rc = jbd_trans_write_commit_block(trans);
1219                 if (rc != EOK)
1220                         goto Finish;
1221
1222                 journal->alloc_trans_id++;
1223                 if (TAILQ_EMPTY(&journal->cp_queue)) {
1224                         if (trans->data_cnt) {
1225                                 journal->start = trans->start_iblock;
1226                                 wrap(&journal->jbd_fs->sb, journal->start);
1227                                 journal->trans_id = trans->trans_id;
1228                                 jbd_journal_write_sb(journal);
1229                                 jbd_write_sb(journal->jbd_fs);
1230                                 TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1231                                                 trans_node);
1232                                 jbd_journal_cp_trans(journal, trans);
1233                         } else {
1234                                 journal->start = trans->start_iblock +
1235                                         trans->alloc_blocks;
1236                                 wrap(&journal->jbd_fs->sb, journal->start);
1237                                 journal->trans_id = trans->trans_id + 1;
1238                                 jbd_journal_write_sb(journal);
1239                                 jbd_journal_free_trans(journal, trans, false);
1240                         }
1241                 } else {
1242                         TAILQ_INSERT_TAIL(&journal->cp_queue, trans,
1243                                         trans_node);
1244                         if (trans->data_cnt)
1245                                 jbd_journal_cp_trans(journal, trans);
1246
1247                 }
1248         }
1249 Finish:
1250         if (rc != EOK) {
1251                 journal->last = last;
1252                 jbd_journal_free_trans(journal, trans, true);
1253         }
1254 }
1255
1256 void jbd_journal_commit_all(struct jbd_journal *journal)
1257 {
1258         while (!TAILQ_EMPTY(&journal->trans_queue)) {
1259                 jbd_journal_commit_one(journal);
1260         }
1261 }
1262
1263 /**
1264  * @}
1265  */