ext4_journal: simplify the code logic of superblock replaying.
[lwext4.git] / lwext4 / ext4_journal.c
1 /**
2  * @file  ext4_journal.c
3  * @brief Journalling
4  */
5
6 #include "ext4_config.h"
7 #include "ext4_types.h"
8 #include "ext4_fs.h"
9 #include "ext4_super.h"
10 #include "ext4_errno.h"
11 #include "ext4_blockdev.h"
12 #include "ext4_crc32c.h"
13 #include "ext4_debug.h"
14 #include "tree.h"
15
16 #include <string.h>
17 #include <malloc.h>
18
19 struct revoke_entry {
20         ext4_fsblk_t block;
21         uint32_t trans_id;
22         RB_ENTRY(revoke_entry) revoke_node;
23 };
24
25 struct recover_info {
26         uint32_t start_trans_id;
27         uint32_t last_trans_id;
28         uint32_t this_trans_id;
29         RB_HEAD(jbd_revoke, revoke_entry) revoke_root;
30 };
31
32 struct replay_arg {
33         struct recover_info *info;
34         uint32_t *this_block;
35 };
36
37 static int
38 jbd_revoke_entry_cmp(struct revoke_entry *a, struct revoke_entry *b)
39 {
40         if (a->block > b->block)
41                 return 1;
42         else if (a->block < b->block)
43                 return -1;
44         return 0;
45 }
46
47 RB_GENERATE_INTERNAL(jbd_revoke, revoke_entry, revoke_node,
48                      jbd_revoke_entry_cmp, static inline)
49
50 #define jbd_alloc_revoke_entry() calloc(1, sizeof(struct revoke_entry))
51 #define jbd_free_revoke_entry(addr) free(addr)
52
53 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
54                    ext4_lblk_t iblock,
55                    ext4_fsblk_t *fblock);
56
57 int jbd_sb_write(struct jbd_fs *jbd_fs, struct jbd_sb *s)
58 {
59         int rc;
60         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
61         uint64_t offset;
62         ext4_fsblk_t fblock;
63         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
64         if (rc != EOK)
65                 return rc;
66
67         offset = fblock * ext4_sb_get_block_size(&fs->sb);
68         return ext4_block_writebytes(fs->bdev, offset, s,
69                                      EXT4_SUPERBLOCK_SIZE);
70 }
71
72 int jbd_sb_read(struct jbd_fs *jbd_fs, struct jbd_sb *s)
73 {
74         int rc;
75         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
76         uint64_t offset;
77         ext4_fsblk_t fblock;
78         rc = jbd_inode_bmap(jbd_fs, 0, &fblock);
79         if (rc != EOK)
80                 return rc;
81
82         offset = fblock * ext4_sb_get_block_size(&fs->sb);
83         return ext4_block_readbytes(fs->bdev, offset, s,
84                                     EXT4_SUPERBLOCK_SIZE);
85 }
86
87 static bool jbd_verify_sb(struct jbd_sb *sb)
88 {
89         struct jbd_bhdr *header = &sb->header;
90         if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER)
91                 return false;
92
93         if (jbd_get32(header, blocktype) != JBD_SUPERBLOCK &&
94             jbd_get32(header, blocktype) != JBD_SUPERBLOCK_V2)
95                 return false;
96
97         return true;
98 }
99
100 int jbd_get_fs(struct ext4_fs *fs,
101                struct jbd_fs *jbd_fs)
102 {
103         int rc;
104         uint32_t journal_ino;
105
106         memset(jbd_fs, 0, sizeof(struct jbd_fs));
107         journal_ino = ext4_get32(&fs->sb, journal_inode_number);
108
109         rc = ext4_fs_get_inode_ref(fs,
110                                    journal_ino,
111                                    &jbd_fs->inode_ref);
112         if (rc != EOK) {
113                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
114                 return rc;
115         }
116         rc = jbd_sb_read(jbd_fs, &jbd_fs->sb);
117         if (rc != EOK) {
118                 memset(jbd_fs, 0, sizeof(struct jbd_fs));
119                 ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
120         }
121
122         return rc;
123 }
124
125 int jbd_put_fs(struct jbd_fs *jbd_fs)
126 {
127         int rc;
128         if (jbd_fs->dirty)
129                 jbd_sb_write(jbd_fs, &jbd_fs->sb);
130
131         rc = ext4_fs_put_inode_ref(&jbd_fs->inode_ref);
132         return rc;
133 }
134
135 int jbd_inode_bmap(struct jbd_fs *jbd_fs,
136                    ext4_lblk_t iblock,
137                    ext4_fsblk_t *fblock)
138 {
139         int rc = ext4_fs_get_inode_data_block_index(
140                         &jbd_fs->inode_ref,
141                         iblock,
142                         fblock,
143                         false);
144         return rc;
145 }
146
147 int jbd_block_get(struct jbd_fs *jbd_fs,
148                   struct ext4_block *block,
149                   ext4_fsblk_t fblock)
150 {
151         /* TODO: journal device. */
152         int rc;
153         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
154         rc = jbd_inode_bmap(jbd_fs, iblock,
155                             &fblock);
156         if (rc != EOK)
157                 return rc;
158
159         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
160         rc = ext4_block_get(bdev, block, fblock);
161         return rc;
162 }
163
164 int jbd_block_get_noread(struct jbd_fs *jbd_fs,
165                          struct ext4_block *block,
166                          ext4_fsblk_t fblock)
167 {
168         /* TODO: journal device. */
169         int rc;
170         ext4_lblk_t iblock = (ext4_lblk_t)fblock;
171         rc = jbd_inode_bmap(jbd_fs, iblock,
172                             &fblock);
173         if (rc != EOK)
174                 return rc;
175
176         struct ext4_blockdev *bdev = jbd_fs->inode_ref.fs->bdev;
177         rc = ext4_block_get_noread(bdev, block, fblock);
178         return rc;
179 }
180
181 int jbd_block_set(struct jbd_fs *jbd_fs,
182                   struct ext4_block *block)
183 {
184         return ext4_block_set(jbd_fs->inode_ref.fs->bdev,
185                               block);
186 }
187
188 /*
189  * helper functions to deal with 32 or 64bit block numbers.
190  */
191 int jbd_tag_bytes(struct jbd_fs *jbd_fs)
192 {
193         int size;
194
195         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
196                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
197                 return sizeof(struct jbd_block_tag3);
198
199         size = sizeof(struct jbd_block_tag);
200
201         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
202                                      JBD_FEATURE_INCOMPAT_CSUM_V2))
203                 size += sizeof(uint16_t);
204
205         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
206                                      JBD_FEATURE_INCOMPAT_64BIT))
207                 return size;
208
209         return size - sizeof(uint32_t);
210 }
211
212 static void
213 jbd_extract_block_tag(struct jbd_fs *jbd_fs,
214                       uint32_t tag_bytes,
215                       void *__tag,
216                       ext4_fsblk_t *block,
217                       bool *uuid_exist,
218                       uint8_t *uuid,
219                       bool *last_tag)
220 {
221         char *uuid_start;
222         *uuid_exist = false;
223         *last_tag = false;
224         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
225                                      JBD_FEATURE_INCOMPAT_CSUM_V3)) {
226                 struct jbd_block_tag3 *tag = __tag;
227                 *block = jbd_get32(tag, blocknr);
228                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
229                                              JBD_FEATURE_INCOMPAT_64BIT))
230                          *block |= (uint64_t)jbd_get32(tag, blocknr_high) << 32;
231
232                 if (jbd_get32(tag, flags) & JBD_FLAG_ESCAPE)
233                         *block = 0;
234
235                 if (!(jbd_get32(tag, flags) & JBD_FLAG_SAME_UUID)) {
236                         uuid_start = (char *)tag + tag_bytes;
237                         *uuid_exist = true;
238                         memcpy(uuid, uuid_start, UUID_SIZE);
239                 }
240
241                 if (jbd_get32(tag, flags) & JBD_FLAG_LAST_TAG)
242                         *last_tag = true;
243
244         } else {
245                 struct jbd_block_tag *tag = __tag;
246                 *block = jbd_get32(tag, blocknr);
247                 if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
248                                              JBD_FEATURE_INCOMPAT_64BIT))
249                          *block |= (uint64_t)jbd_get32(tag, blocknr_high) << 32;
250
251                 if (jbd_get16(tag, flags) & JBD_FLAG_ESCAPE)
252                         *block = 0;
253
254                 if (!(jbd_get16(tag, flags) & JBD_FLAG_SAME_UUID)) {
255                         uuid_start = (char *)tag + tag_bytes;
256                         *uuid_exist = true;
257                         memcpy(uuid, uuid_start, UUID_SIZE);
258                 }
259
260                 if (jbd_get16(tag, flags) & JBD_FLAG_LAST_TAG)
261                         *last_tag = true;
262
263         }
264 }
265
266 static void
267 jbd_iterate_block_table(struct jbd_fs *jbd_fs,
268                         void *__tag_start,
269                         uint32_t tag_tbl_size,
270                         void (*func)(struct jbd_fs * jbd_fs,
271                                         ext4_fsblk_t block,
272                                         uint8_t *uuid,
273                                         void *arg),
274                         void *arg)
275 {
276         ext4_fsblk_t block = 0;
277         uint8_t uuid[UUID_SIZE];
278         char *tag_start, *tag_ptr;
279         uint32_t tag_bytes = jbd_tag_bytes(jbd_fs);
280         tag_start = __tag_start;
281         tag_ptr = tag_start;
282
283         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
284                                      JBD_FEATURE_INCOMPAT_CSUM_V2) ||
285             JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
286                                      JBD_FEATURE_INCOMPAT_CSUM_V3))
287                 tag_tbl_size -= sizeof(struct jbd_block_tail);
288
289         while (tag_ptr - tag_start + tag_bytes <= tag_tbl_size) {
290                 bool uuid_exist;
291                 bool last_tag;
292                 jbd_extract_block_tag(jbd_fs,
293                                       tag_bytes,
294                                       tag_ptr,
295                                       &block,
296                                       &uuid_exist,
297                                       uuid,
298                                       &last_tag);
299                 if (func)
300                         func(jbd_fs, block, uuid, arg);
301
302                 if (last_tag)
303                         break;
304
305                 tag_ptr += tag_bytes;
306                 if (uuid_exist)
307                         tag_ptr += UUID_SIZE;
308
309         }
310 }
311
312 static void jbd_display_block_tags(struct jbd_fs *jbd_fs,
313                                    ext4_fsblk_t block,
314                                    uint8_t *uuid,
315                                    void *arg)
316 {
317         uint32_t *iblock = arg;
318         ext4_dbg(DEBUG_JBD, "Block in block_tag: %" PRIu64 "\n", block);
319         (*iblock)++;
320         (void)jbd_fs;
321         (void)uuid;
322         return;
323 }
324
325 static struct revoke_entry *
326 jbd_revoke_entry_lookup(struct recover_info *info, ext4_fsblk_t block)
327 {
328         struct revoke_entry tmp = {
329                 .block = block
330         };
331
332         return RB_FIND(jbd_revoke, &info->revoke_root, &tmp);
333 }
334
335 static void jbd_replay_block_tags(struct jbd_fs *jbd_fs,
336                                   ext4_fsblk_t block,
337                                   uint8_t *uuid __unused,
338                                   void *__arg)
339 {
340         int r;
341         struct replay_arg *arg = __arg;
342         struct recover_info *info = arg->info;
343         uint32_t *this_block = arg->this_block;
344         struct revoke_entry *revoke_entry;
345         struct ext4_block journal_block, ext4_block;
346         struct ext4_fs *fs = jbd_fs->inode_ref.fs;
347         ext4_dbg(DEBUG_JBD,
348                  "Replaying block in block_tag: %" PRIu64 "\n",
349                  block);
350         (*this_block)++;
351
352         revoke_entry = jbd_revoke_entry_lookup(info, block);
353         if (revoke_entry)
354                 return;
355
356         r = jbd_block_get(jbd_fs, &journal_block, *this_block);
357         if (r != EOK)
358                 return;
359
360         if (block) {
361                 r = ext4_block_get_noread(fs->bdev, &ext4_block, block);
362                 if (r != EOK) {
363                         jbd_block_set(jbd_fs, &journal_block);
364                         return;
365                 }
366
367                 memcpy(ext4_block.data,
368                         journal_block.data,
369                         jbd_get32(&jbd_fs->sb, blocksize));
370
371                 ext4_block.dirty = true;
372                 ext4_block_set(fs->bdev, &ext4_block);
373         } else {
374                 uint16_t mount_count, state;
375                 mount_count = ext4_get16(&fs->sb, mount_count);
376                 state = ext4_get16(&fs->sb, state);
377
378                 memcpy(&fs->sb,
379                         journal_block.data + EXT4_SUPERBLOCK_OFFSET,
380                         EXT4_SUPERBLOCK_SIZE);
381
382                 /* Mark system as mounted */
383                 ext4_set16(&fs->sb, state, state);
384                 r = ext4_sb_write(fs->bdev, &fs->sb);
385                 if (r != EOK)
386                         return;
387
388                 /*Update mount count*/
389                 ext4_set16(&fs->sb, mount_count, mount_count);
390         }
391
392         jbd_block_set(jbd_fs, &journal_block);
393         
394         return;
395 }
396
397 static void jbd_add_revoke_block_tags(struct recover_info *info,
398                                       ext4_fsblk_t block)
399 {
400         struct revoke_entry *revoke_entry;
401
402         ext4_dbg(DEBUG_JBD, "Add block %" PRIu64 " to revoke tree\n", block);
403         revoke_entry = jbd_revoke_entry_lookup(info, block);
404         if (revoke_entry) {
405                 revoke_entry->trans_id = info->this_trans_id;
406                 return;
407         }
408
409         revoke_entry = jbd_alloc_revoke_entry();
410         ext4_assert(revoke_entry);
411         revoke_entry->block = block;
412         revoke_entry->trans_id = info->this_trans_id;
413         RB_INSERT(jbd_revoke, &info->revoke_root, revoke_entry);
414
415         return;
416 }
417
418 static void jbd_destroy_revoke_tree(struct recover_info *info)
419 {
420         while (!RB_EMPTY(&info->revoke_root)) {
421                 struct revoke_entry *revoke_entry =
422                         RB_MIN(jbd_revoke, &info->revoke_root);
423                 ext4_assert(revoke_entry);
424                 RB_REMOVE(jbd_revoke, &info->revoke_root, revoke_entry);
425                 jbd_free_revoke_entry(revoke_entry);
426         }
427 }
428
429 /* Make sure we wrap around the log correctly! */
430 #define wrap(sb, var)                                           \
431 do {                                                                    \
432         if (var >= jbd_get32((sb), maxlen))                                     \
433                 var -= (jbd_get32((sb), maxlen) - jbd_get32((sb), first));      \
434 } while (0)
435
436 #define ACTION_SCAN 0
437 #define ACTION_REVOKE 1
438 #define ACTION_RECOVER 2
439
440
441 static void jbd_build_revoke_tree(struct jbd_fs *jbd_fs,
442                                   struct jbd_bhdr *header,
443                                   struct recover_info *info)
444 {
445         char *blocks_entry;
446         struct jbd_revoke_header *revoke_hdr =
447                 (struct jbd_revoke_header *)header;
448         uint32_t i, nr_entries, record_len = 4;
449         if (JBD_HAS_INCOMPAT_FEATURE(&jbd_fs->sb,
450                                      JBD_FEATURE_INCOMPAT_64BIT))
451                 record_len = 8;
452
453         nr_entries = (revoke_hdr->count -
454                         sizeof(struct jbd_revoke_header)) /
455                         record_len;
456
457         blocks_entry = (char *)(revoke_hdr + 1);
458
459         for (i = 0;i < nr_entries;i++) {
460                 if (record_len == 8) {
461                         uint64_t *blocks =
462                                 (uint64_t *)blocks_entry;
463                         jbd_add_revoke_block_tags(info, *blocks);
464                 } else {
465                         uint32_t *blocks =
466                                 (uint32_t *)blocks_entry;
467                         jbd_add_revoke_block_tags(info, *blocks);
468                 }
469                 blocks_entry += record_len;
470         }
471 }
472
473 static void jbd_debug_descriptor_block(struct jbd_fs *jbd_fs,
474                                        struct jbd_bhdr *header,
475                                        uint32_t *iblock)
476 {
477         jbd_iterate_block_table(jbd_fs,
478                                 header + 1,
479                                 jbd_get32(&jbd_fs->sb, blocksize) -
480                                         sizeof(struct jbd_bhdr),
481                                 jbd_display_block_tags,
482                                 iblock);
483 }
484
485 static void jbd_replay_descriptor_block(struct jbd_fs *jbd_fs,
486                                         struct jbd_bhdr *header,
487                                         struct replay_arg *arg)
488 {
489         jbd_iterate_block_table(jbd_fs,
490                                 header + 1,
491                                 jbd_get32(&jbd_fs->sb, blocksize) -
492                                         sizeof(struct jbd_bhdr),
493                                 jbd_replay_block_tags,
494                                 arg);
495 }
496
497 int jbd_iterate_log(struct jbd_fs *jbd_fs,
498                     struct recover_info *info,
499                     int action)
500 {
501         int r = EOK;
502         bool log_end = false;
503         struct jbd_sb *sb = &jbd_fs->sb;
504         uint32_t start_trans_id, this_trans_id;
505         uint32_t start_block, this_block;
506
507         start_trans_id = this_trans_id = jbd_get32(sb, sequence);
508         start_block = this_block = jbd_get32(sb, start);
509
510         ext4_dbg(DEBUG_JBD, "Start of journal at trans id: %" PRIu32 "\n",
511                             start_trans_id);
512
513         while (!log_end) {
514                 struct ext4_block block;
515                 struct jbd_bhdr *header;
516                 if (action != ACTION_SCAN)
517                         if (this_trans_id > info->last_trans_id) {
518                                 log_end = true;
519                                 continue;
520                         }
521
522                 r = jbd_block_get(jbd_fs, &block, this_block);
523                 if (r != EOK)
524                         break;
525
526                 header = (struct jbd_bhdr *)block.data;
527                 if (jbd_get32(header, magic) != JBD_MAGIC_NUMBER) {
528                         jbd_block_set(jbd_fs, &block);
529                         log_end = true;
530                         continue;
531                 }
532
533                 if (jbd_get32(header, sequence) != this_trans_id) {
534                         if (action != ACTION_SCAN)
535                                 r = EIO;
536
537                         jbd_block_set(jbd_fs, &block);
538                         log_end = true;
539                         continue;
540                 }
541
542                 switch (jbd_get32(header, blocktype)) {
543                 case JBD_DESCRIPTOR_BLOCK:
544                         ext4_dbg(DEBUG_JBD, "Descriptor block: %u, "
545                                             "trans_id: %u\n",
546                                             this_block, this_trans_id);
547                         if (action == ACTION_SCAN)
548                                 jbd_debug_descriptor_block(jbd_fs,
549                                                 header, &this_block);
550                         else if (action == ACTION_RECOVER) {
551                                 struct replay_arg replay_arg;
552                                 replay_arg.info = info;
553                                 replay_arg.this_block = &this_block;
554                                 jbd_replay_descriptor_block(jbd_fs,
555                                                 header, &replay_arg);
556                         }
557
558                         break;
559                 case JBD_COMMIT_BLOCK:
560                         ext4_dbg(DEBUG_JBD, "Commit block: %u, "
561                                             "trans_id: %u\n",
562                                             this_block, this_trans_id);
563                         this_trans_id++;
564                         break;
565                 case JBD_REVOKE_BLOCK:
566                         ext4_dbg(DEBUG_JBD, "Revoke block: %u, "
567                                             "trans_id: %u\n",
568                                             this_block, this_trans_id);
569                         if (action == ACTION_REVOKE) {
570                                 info->this_trans_id = this_trans_id;
571                                 jbd_build_revoke_tree(jbd_fs,
572                                                 header, info);
573                         }
574                         break;
575                 default:
576                         log_end = true;
577                         break;
578                 }
579                 jbd_block_set(jbd_fs, &block);
580                 this_block++;
581                 wrap(sb, this_block);
582                 if (this_block == start_block)
583                         log_end = true;
584
585         }
586         ext4_dbg(DEBUG_JBD, "End of journal.\n");
587         if (r == EOK && action == ACTION_SCAN) {
588                 info->start_trans_id = start_trans_id;
589                 if (this_trans_id > start_trans_id)
590                         info->last_trans_id = this_trans_id - 1;
591                 else
592                         info->last_trans_id = this_trans_id;
593         }
594
595         return r;
596 }
597
598 int jbd_recover(struct jbd_fs *jbd_fs)
599 {
600         int r;
601         struct recover_info info;
602         struct jbd_sb *sb = &jbd_fs->sb;
603         if (!sb->start)
604                 return EOK;
605
606         RB_INIT(&info.revoke_root);
607
608         r = jbd_iterate_log(jbd_fs, &info, ACTION_SCAN);
609         if (r != EOK)
610                 return r;
611
612         r = jbd_iterate_log(jbd_fs, &info, ACTION_REVOKE);
613         if (r != EOK)
614                 return r;
615
616         r = jbd_iterate_log(jbd_fs, &info, ACTION_RECOVER);
617         if (r == EOK) {
618                 jbd_set32(&jbd_fs->sb, start, 0);
619                 jbd_fs->dirty = true;
620         }
621         jbd_destroy_revoke_tree(&info);
622         return r;
623 }