METADATA_CSUM: ext4_extent_full: do not do checksum on extent root.
[lwext4.git] / lwext4 / ext4_extent_full.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  * - The name of the author may not be used to endorse or promote products
15  *   derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include "ext4_config.h"
30 #include "ext4_blockdev.h"
31 #include "ext4_fs.h"
32 #include "ext4_super.h"
33 #include "ext4_crc32c.h"
34 #include "ext4_balloc.h"
35 #include "ext4_debug.h"
36
37 #include <stdlib.h>
38 #include <string.h>
39 #include <inttypes.h>
40 #include <stddef.h>
41
42 #include "ext4_extent.h"
43
44 #if CONFIG_EXTENT_FULL
45
46 /*
47  * used by extent splitting.
48  */
49 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
50 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
51 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
52 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
53 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
54
55 static struct ext4_extent_tail *
56 find_ext4_extent_tail(struct ext4_extent_header *eh)
57 {
58         return (struct ext4_extent_tail *)(((char *)eh) +
59                                            EXT4_EXTENT_TAIL_OFFSET(eh));
60 }
61
62 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
63 {
64         return (struct ext4_extent_header *)inode->blocks;
65 }
66
67 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
68 {
69         return (struct ext4_extent_header *)block->data;
70 }
71
72 static uint16_t ext_depth(struct ext4_inode *inode)
73 {
74         return to_le16(ext_inode_hdr(inode)->depth);
75 }
76
77 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
78 {
79         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
80                     ? to_le16(ext->block_count)
81                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
82 }
83
84 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
85 {
86         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
87 }
88
89 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
90 {
91         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
92 }
93
94 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
95 {
96         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
97         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
98 }
99
100 /*
101  * ext4_ext_pblock:
102  * combine low and high parts of physical block number into ext4_fsblk_t
103  */
104 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
105 {
106         ext4_fsblk_t block;
107
108         block = to_le32(ex->start_lo);
109         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
110         return block;
111 }
112
113 /*
114  * ext4_idx_pblock:
115  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
116  */
117 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
118 {
119         ext4_fsblk_t block;
120
121         block = to_le32(ix->leaf_lo);
122         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
123         return block;
124 }
125
126 /*
127  * ext4_ext_store_pblock:
128  * stores a large physical block number into an extent struct,
129  * breaking it into parts
130  */
131 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
132 {
133         ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
134         ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
135 }
136
137 /*
138  * ext4_idx_store_pblock:
139  * stores a large physical block number into an index struct,
140  * breaking it into parts
141  */
142 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
143 {
144         ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
145         ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
146 }
147
148 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
149                                       ext4_fsblk_t goal,
150                                       ext4_fsblk_t *blockp)
151 {
152         return ext4_balloc_alloc_block(inode_ref, goal, blockp);
153 }
154
155 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
156                                          ext4_fsblk_t goal,
157                                          uint32_t flags __unused,
158                                          uint32_t *count, int *errp)
159 {
160         ext4_fsblk_t block = 0;
161
162         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
163         if (count)
164                 *count = 1;
165         return block;
166 }
167
168 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
169                                  ext4_fsblk_t block, uint32_t count,
170                                  uint32_t flags __unused)
171 {
172         ext4_balloc_free_blocks(inode_ref, block, count);
173 }
174
175 static size_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
176 {
177         size_t size;
178         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
179
180         size = (block_size - sizeof(struct ext4_extent_header)) /
181                sizeof(struct ext4_extent);
182 #ifdef AGGRESSIVE_TEST
183         if (size > 6)
184                 size = 6;
185 #endif
186         return size;
187 }
188
189 static size_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
190 {
191         size_t size;
192         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
193
194         size = (block_size - sizeof(struct ext4_extent_header)) /
195                sizeof(struct ext4_extent_index);
196 #ifdef AGGRESSIVE_TEST
197         if (size > 5)
198                 size = 5;
199 #endif
200         return size;
201 }
202
203 static size_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
204 {
205         size_t size;
206
207         size = sizeof(inode_ref->inode->blocks);
208         size -= sizeof(struct ext4_extent_header);
209         size /= sizeof(struct ext4_extent);
210 #ifdef AGGRESSIVE_TEST
211         if (size > 3)
212                 size = 3;
213 #endif
214         return size;
215 }
216
217 static size_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
218 {
219         size_t size;
220
221         size = sizeof(inode_ref->inode->blocks);
222         size -= sizeof(struct ext4_extent_header);
223         size /= sizeof(struct ext4_extent_index);
224 #ifdef AGGRESSIVE_TEST
225         if (size > 4)
226                 size = 4;
227 #endif
228         return size;
229 }
230
231 static size_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
232                                    uint32_t depth)
233 {
234         size_t max;
235
236         if (depth == ext_depth(inode_ref->inode)) {
237                 if (depth == 0)
238                         max = ext4_ext_space_root(inode_ref);
239                 else
240                         max = ext4_ext_space_root_idx(inode_ref);
241         } else {
242                 if (depth == 0)
243                         max = ext4_ext_space_block(inode_ref);
244                 else
245                         max = ext4_ext_space_block_idx(inode_ref);
246         }
247
248         return max;
249 }
250
251 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
252                                        struct ext4_extent_path *path,
253                                        ext4_lblk_t block)
254 {
255         if (path) {
256                 uint32_t depth = path->depth;
257                 struct ext4_extent *ex;
258
259                 /*
260                  * Try to predict block placement assuming that we are
261                  * filling in a file which will eventually be
262                  * non-sparse --- i.e., in the case of libbfd writing
263                  * an ELF object sections out-of-order but in a way
264                  * the eventually results in a contiguous object or
265                  * executable file, or some database extending a table
266                  * space file.  However, this is actually somewhat
267                  * non-ideal if we are writing a sparse file such as
268                  * qemu or KVM writing a raw image file that is going
269                  * to stay fairly sparse, since it will end up
270                  * fragmenting the file system's free space.  Maybe we
271                  * should have some hueristics or some way to allow
272                  * userspace to pass a hint to file system,
273                  * especially if the latter case turns out to be
274                  * common.
275                  */
276                 ex = path[depth].extent;
277                 if (ex) {
278                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
279                         ext4_lblk_t ext_block = to_le32(ex->first_block);
280
281                         if (block > ext_block)
282                                 return ext_pblk + (block - ext_block);
283                         else
284                                 return ext_pblk - (ext_block - block);
285                 }
286
287                 /* it looks like index is empty;
288                  * try to find starting block from index itself */
289                 if (path[depth].block.lb_id)
290                         return path[depth].block.lb_id;
291         }
292
293         /* OK. use inode's group */
294         return ext4_fs_inode_to_goal_block(inode_ref);
295 }
296
297 /*
298  * Allocation for a meta data block
299  */
300 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
301                                             struct ext4_extent_path *path,
302                                             struct ext4_extent *ex, int *err,
303                                             uint32_t flags)
304 {
305         ext4_fsblk_t goal, newblock;
306
307         goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
308         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
309         return newblock;
310 }
311
312 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref,
313                                     struct ext4_extent_header *eh)
314 {
315         uint32_t checksum = 0;
316         struct ext4_sblock *sb = &inode_ref->fs->sb;
317
318         if (ext4_sb_has_feature_read_only(sb,
319                                 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
320                 uint32_t ino_index = to_le32(inode_ref->index);
321                 uint32_t ino_gen =
322                         to_le32(ext4_inode_get_generation(inode_ref->inode));
323                 /* First calculate crc32 checksum against fs uuid */
324                 checksum = ext4_crc32c(~0, sb->uuid, sizeof(sb->uuid));
325                 /* Then calculate crc32 checksum against inode number
326                  * and inode generation */
327                 checksum = ext4_crc32c(checksum, &ino_index,
328                                      sizeof(ino_index));
329                 checksum = ext4_crc32c(checksum, &ino_gen,
330                                      sizeof(ino_gen));
331                 /* Finally calculate crc32 checksum against 
332                  * the entire extent block up to the checksum field */
333                 checksum = ext4_crc32c(checksum, eh,
334                                 EXT4_EXTENT_TAIL_OFFSET(eh));
335         }
336         return checksum;
337 }
338
339 static void ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref,
340                                        struct ext4_extent_header *eh)
341 {
342         struct ext4_extent_tail *tail;
343
344         tail = find_ext4_extent_tail(eh);
345         tail->et_checksum = ext4_ext_block_csum(inode_ref, eh);
346 }
347
348 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
349                           struct ext4_extent_path *path)
350 {
351         if (path->block.lb_id) {
352                 ext4_extent_block_csum_set(inode_ref, path->header);
353                 path->block.dirty = true;
354         } else
355                 inode_ref->dirty = true;
356
357         return EOK;
358 }
359
360 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
361                                struct ext4_extent_path *path, bool keep_other)
362 {
363         int32_t depth, i;
364
365         if (!path)
366                 return;
367         if (keep_other)
368                 depth = 0;
369         else
370                 depth = path->depth;
371
372         for (i = 0; i <= depth; i++, path++) {
373                 if (path->block.lb_id) {
374                         ext4_block_set(inode_ref->fs->bdev, &path->block);
375                 }
376         }
377 }
378
379 /*
380  * Check that whether the basic information inside the extent header
381  * is correct or not.
382  */
383 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
384                           struct ext4_extent_header *eh, uint16_t depth,
385                           ext4_fsblk_t pblk __unused)
386 {
387         struct ext4_extent_tail *tail;
388         const char *error_msg;
389         (void)error_msg;
390
391         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
392                 error_msg = "invalid magic";
393                 goto corrupted;
394         }
395         if (to_le16(eh->depth) != depth) {
396                 error_msg = "unexpected eh_depth";
397                 goto corrupted;
398         }
399         if (eh->max_entries_count == 0) {
400                 error_msg = "invalid eh_max";
401                 goto corrupted;
402         }
403         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
404                 error_msg = "invalid eh_entries";
405                 goto corrupted;
406         }
407
408         tail = find_ext4_extent_tail(eh);
409         if (tail->et_checksum != ext4_ext_block_csum(inode_ref, eh)) {
410                 /* FIXME: Warning: extent checksum damaged? */
411         }
412
413         return EOK;
414
415 corrupted:
416         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
417                                "Blocknr: %" PRId64 "\n",
418                  error_msg, pblk);
419         return EIO;
420 }
421
422 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
423                                   ext4_fsblk_t pblk, int32_t depth,
424                                   struct ext4_block *bh,
425                                   uint32_t flags __unused)
426 {
427         int err;
428
429         err = ext4_block_get(inode_ref->fs->bdev, bh, pblk);
430         if (err != EOK)
431                 goto errout;
432
433         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
434         if (err != EOK)
435                 goto errout;
436
437         return EOK;
438 errout:
439         if (bh->lb_id)
440                 ext4_block_set(inode_ref->fs->bdev, bh);
441
442         return err;
443 }
444
445 /*
446  * ext4_ext_binsearch_idx:
447  * binary search for the closest index of the given block
448  * the header must be checked before calling this
449  */
450 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
451                                    ext4_lblk_t block)
452 {
453         struct ext4_extent_header *eh = path->header;
454         struct ext4_extent_index *r, *l, *m;
455
456         l = EXT_FIRST_INDEX(eh) + 1;
457         r = EXT_LAST_INDEX(eh);
458         while (l <= r) {
459                 m = l + (r - l) / 2;
460                 if (block < to_le32(m->first_block))
461                         r = m - 1;
462                 else
463                         l = m + 1;
464         }
465
466         path->index = l - 1;
467 }
468
469 /*
470  * ext4_ext_binsearch:
471  * binary search for closest extent of the given block
472  * the header must be checked before calling this
473  */
474 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
475 {
476         struct ext4_extent_header *eh = path->header;
477         struct ext4_extent *r, *l, *m;
478
479         if (eh->entries_count == 0) {
480                 /*
481                  * this leaf is empty:
482                  * we get such a leaf in split/add case
483                  */
484                 return;
485         }
486
487         l = EXT_FIRST_EXTENT(eh) + 1;
488         r = EXT_LAST_EXTENT(eh);
489
490         while (l <= r) {
491                 m = l + (r - l) / 2;
492                 if (block < to_le32(m->first_block))
493                         r = m - 1;
494                 else
495                         l = m + 1;
496         }
497
498         path->extent = l - 1;
499 }
500
501 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
502                             struct ext4_extent_path **orig_path, uint32_t flags)
503 {
504         struct ext4_extent_header *eh;
505         struct ext4_block bh = EXT4_BLOCK_ZERO();
506         ext4_fsblk_t buf_block = 0;
507         struct ext4_extent_path *path = *orig_path;
508         int32_t depth, ppos = 0;
509         int32_t i;
510         int ret;
511
512         eh = ext_inode_hdr(inode_ref->inode);
513         depth = ext_depth(inode_ref->inode);
514
515         if (path) {
516                 ext4_ext_drop_refs(inode_ref, path, 0);
517                 if (depth > path[0].maxdepth) {
518                         free(path);
519                         *orig_path = path = NULL;
520                 }
521         }
522         if (!path) {
523                 int32_t path_depth = depth + 1;
524                 /* account possible depth increase */
525                 path = calloc(1, sizeof(struct ext4_extent_path) *
526                                      (path_depth + 1));
527                 if (!path)
528                         return ENOMEM;
529                 path[0].maxdepth = path_depth;
530         }
531         path[0].header = eh;
532         path[0].block = bh;
533
534         i = depth;
535         /* walk through the tree */
536         while (i) {
537                 ext4_ext_binsearch_idx(path + ppos, block);
538                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
539                 path[ppos].depth = i;
540                 path[ppos].extent = NULL;
541                 buf_block = path[ppos].p_block;
542
543                 i--;
544                 ppos++;
545                 if (!path[ppos].block.lb_id ||
546                     path[ppos].block.lb_id != buf_block) {
547                         ret = read_extent_tree_block(inode_ref, buf_block, i,
548                                                      &bh, flags);
549                         if (ret != EOK) {
550                                 goto err;
551                         }
552                         if (ppos > depth) {
553                                 ext4_block_set(inode_ref->fs->bdev, &bh);
554                                 ret = EIO;
555                                 goto err;
556                         }
557
558                         eh = ext_block_hdr(&bh);
559                         path[ppos].block = bh;
560                         path[ppos].header = eh;
561                 }
562         }
563
564         path[ppos].depth = i;
565         path[ppos].extent = NULL;
566         path[ppos].index = NULL;
567
568         /* find extent */
569         ext4_ext_binsearch(path + ppos, block);
570         /* if not an empty leaf */
571         if (path[ppos].extent)
572                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
573
574         *orig_path = path;
575
576         ret = EOK;
577         return ret;
578
579 err:
580         ext4_ext_drop_refs(inode_ref, path, 0);
581         free(path);
582         if (orig_path)
583                 *orig_path = NULL;
584         return ret;
585 }
586
587 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
588                                  struct ext4_extent_header *eh, int32_t depth)
589 {
590         eh->entries_count = 0;
591         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
592         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
593         eh->depth = depth;
594 }
595
596 /*
597  * Be cautious, the buffer_head returned is not yet mark dirtied. */
598 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
599                                struct ext4_extent_path *path, int32_t at,
600                                struct ext4_extent *newext,
601                                ext4_fsblk_t *sibling, struct ext4_block *new_bh)
602 {
603         int ret;
604         ext4_fsblk_t newblock;
605         struct ext4_block bh = EXT4_BLOCK_ZERO();
606         int32_t depth = ext_depth(inode_ref->inode);
607
608         ext4_assert(sibling);
609
610         /* FIXME: currently we split at the point after the current extent. */
611         newblock = ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
612         if (ret)
613                 goto cleanup;
614
615         /*  For write access.# */
616         ret = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
617         if (ret != EOK)
618                 goto cleanup;
619
620         if (at == depth) {
621                 /* start copy from next extent */
622                 ptrdiff_t m = EXT_MAX_EXTENT(path[at].header) - path[at].extent;
623                 struct ext4_extent_header *neh;
624                 neh = ext_block_hdr(&bh);
625                 ext4_ext_init_header(inode_ref, neh, 0);
626                 if (m) {
627                         struct ext4_extent *ex;
628                         ex = EXT_FIRST_EXTENT(neh);
629                         memmove(ex, path[at].extent + 1,
630                                 sizeof(struct ext4_extent) * m);
631                         neh->entries_count =
632                             to_le16(to_le16(neh->entries_count) + m);
633                         path[at].header->entries_count = to_le16(
634                             to_le16(path[at].header->entries_count) - m);
635                         ret = ext4_ext_dirty(inode_ref, path + at);
636                         if (ret)
637                                 goto cleanup;
638                 }
639         } else {
640                 ptrdiff_t m = EXT_MAX_INDEX(path[at].header) - path[at].index;
641                 struct ext4_extent_header *neh;
642                 neh = ext_block_hdr(&bh);
643                 ext4_ext_init_header(inode_ref, neh, depth - at);
644                 if (m) {
645                         struct ext4_extent_index *ix;
646                         ix = EXT_FIRST_INDEX(neh);
647                         memmove(ix, path[at].index + 1,
648                                 sizeof(struct ext4_extent) * m);
649                         neh->entries_count =
650                             to_le16(to_le16(neh->entries_count) + m);
651                         path[at].header->entries_count = to_le16(
652                             to_le16(path[at].header->entries_count) - m);
653                         ret = ext4_ext_dirty(inode_ref, path + at);
654                         if (ret)
655                                 goto cleanup;
656                 }
657         }
658 cleanup:
659         if (ret) {
660                 if (bh.lb_id) {
661                         ext4_block_set(inode_ref->fs->bdev, &bh);
662                 }
663                 if (newblock)
664                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
665
666                 newblock = 0;
667         }
668         *sibling = newblock;
669         *new_bh = bh;
670         return ret;
671 }
672
673 static ext4_lblk_t ext4_ext_block_index(struct ext4_extent_header *eh)
674 {
675         if (eh->depth)
676                 return to_le32(EXT_FIRST_INDEX(eh)->first_block);
677
678         return to_le32(EXT_FIRST_EXTENT(eh)->first_block);
679 }
680
681 struct ext_split_trans {
682         ext4_fsblk_t ptr;
683         struct ext4_extent_path path;
684         int switch_to;
685 };
686
687 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
688                                  struct ext4_extent_path *path,
689                                  int32_t at,
690                                  struct ext4_extent *newext,
691                                  ext4_lblk_t insert_index,
692                                  ext4_fsblk_t insert_block,
693                                  struct ext_split_trans *spt,
694                                  bool *need_grow)
695 {
696         struct ext4_extent_index *ix;
697         struct ext4_extent_path *curp = path + at;
698         struct ext4_block bh = EXT4_BLOCK_ZERO();
699         int32_t len;
700         int err;
701         struct ext4_extent_header *eh;
702
703         *need_grow = false;
704
705         if (curp->index && insert_index == to_le32(curp->index->first_block))
706                 return EIO;
707
708         if (to_le16(curp->header->entries_count) ==
709             to_le16(curp->header->max_entries_count)) {
710                 if (at) {
711                         struct ext4_extent_header *neh;
712                         err = ext4_ext_split_node(inode_ref, path, at, newext,
713                                                   &spt->ptr, &bh);
714                         if (err != EOK)
715                                 goto out;
716
717                         neh = ext_block_hdr(&bh);
718                         if (insert_index > to_le32(curp->index->first_block)) {
719                                 /* Make decision which node should be used to
720                                  * insert the index.*/
721                                 if (to_le16(neh->entries_count) >
722                                     to_le16(curp->header->entries_count)) {
723                                         eh = curp->header;
724                                         /* insert after */
725                                         ix = EXT_LAST_INDEX(eh) + 1;
726                                 } else {
727                                         eh = neh;
728                                         ix = EXT_FIRST_INDEX(eh);
729                                 }
730                         } else {
731                                 eh = curp->header;
732                                 /* insert before */
733                                 ix = EXT_LAST_INDEX(eh);
734                         }
735                 } else {
736                         err = EOK;
737                         *need_grow = true;
738                         goto out;
739                 }
740         } else {
741                 eh = curp->header;
742                 if (curp->index == NULL) {
743                         ix = EXT_FIRST_INDEX(eh);
744                         curp->index = ix;
745                 } else if (insert_index > to_le32(curp->index->first_block)) {
746                         /* insert after */
747                         ix = curp->index + 1;
748                 } else {
749                         /* insert before */
750                         ix = curp->index;
751                 }
752         }
753
754         len = EXT_LAST_INDEX(eh) - ix + 1;
755         ext4_assert(len >= 0);
756         if (len > 0)
757                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
758
759         if (ix > EXT_MAX_INDEX(eh)) {
760                 err = EIO;
761                 goto out;
762         }
763
764         ix->first_block = to_le32(insert_index);
765         ext4_idx_store_pblock(ix, insert_block);
766         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
767
768         if (ix > EXT_LAST_INDEX(eh)) {
769                 err = EIO;
770                 goto out;
771         }
772
773         if (eh == curp->header)
774                 err = ext4_ext_dirty(inode_ref, curp);
775         else
776                 err = EOK;
777
778 out:
779         if (err != EOK || *need_grow) {
780                 if (bh.lb_id)
781                         ext4_block_set(inode_ref->fs->bdev, &bh);
782
783                 spt->ptr = 0;
784         } else if (bh.lb_id) {
785                 /* If we got a sibling leaf. */
786                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
787                 bh.dirty = true;
788
789                 spt->path.p_block = ext4_idx_pblock(ix);
790                 spt->path.depth = to_le16(eh->depth);
791                 spt->path.maxdepth = 0;
792                 spt->path.extent = NULL;
793                 spt->path.index = ix;
794                 spt->path.header = eh;
795                 spt->path.block = bh;
796
797                 /*
798                  * If newext->ee_block can be included into the
799                  * right sub-tree.
800                  */
801                 if (to_le32(newext->first_block) >=
802                     ext4_ext_block_index(ext_block_hdr(&bh)))
803                         spt->switch_to = 1;
804                 else {
805                         curp->index = ix;
806                         curp->p_block = ext4_idx_pblock(ix);
807                 }
808
809         } else {
810                 spt->ptr = 0;
811                 curp->index = ix;
812                 curp->p_block = ext4_idx_pblock(ix);
813         }
814         return err;
815 }
816
817 /*
818  * ext4_ext_correct_indexes:
819  * if leaf gets modified and modified extent is first in the leaf,
820  * then we have to correct all indexes above.
821  */
822 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
823                                     struct ext4_extent_path *path)
824 {
825         struct ext4_extent_header *eh;
826         int32_t depth = ext_depth(inode_ref->inode);
827         struct ext4_extent *ex;
828         uint32_t border;
829         int32_t k;
830         int err = EOK;
831
832         eh = path[depth].header;
833         ex = path[depth].extent;
834
835         if (ex == NULL || eh == NULL) {
836                 return EIO;
837         }
838
839         if (depth == 0) {
840                 /* there is no tree at all */
841                 return EOK;
842         }
843
844         if (ex != EXT_FIRST_EXTENT(eh)) {
845                 /* we correct tree if first leaf got modified only */
846                 return EOK;
847         }
848
849         /*
850          * TODO: we need correction if border is smaller than current one
851          */
852         k = depth - 1;
853         border = path[depth].extent->first_block;
854         path[k].index->first_block = border;
855         err = ext4_ext_dirty(inode_ref, path + k);
856         if (err != EOK)
857                 return err;
858
859         while (k--) {
860                 /* change all left-side indexes */
861                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
862                         break;
863                 path[k].index->first_block = border;
864                 err = ext4_ext_dirty(inode_ref, path + k);
865                 if (err != EOK)
866                         break;
867         }
868
869         return err;
870 }
871
872 static bool ext4_ext_can_prepend(struct ext4_extent *ex1,
873                                  struct ext4_extent *ex2)
874 {
875         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
876             ext4_ext_pblock(ex1))
877                 return false;
878
879 #ifdef AGGRESSIVE_TEST
880         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
881                 return 0;
882 #else
883         if (ext4_ext_is_unwritten(ex1)) {
884                 if (ext4_ext_get_actual_len(ex1) +
885                         ext4_ext_get_actual_len(ex2) >
886                     EXT_UNWRITTEN_MAX_LEN)
887                         return false;
888         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
889                    EXT_INIT_MAX_LEN)
890                 return false;
891 #endif
892
893         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
894             to_le32(ex1->first_block))
895                 return false;
896
897         return true;
898 }
899
900 static bool ext4_ext_can_append(struct ext4_extent *ex1,
901                                 struct ext4_extent *ex2)
902 {
903         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
904             ext4_ext_pblock(ex2))
905                 return false;
906
907 #ifdef AGGRESSIVE_TEST
908         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
909                 return 0;
910 #else
911         if (ext4_ext_is_unwritten(ex1)) {
912                 if (ext4_ext_get_actual_len(ex1) +
913                         ext4_ext_get_actual_len(ex2) >
914                     EXT_UNWRITTEN_MAX_LEN)
915                         return false;
916         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
917                    EXT_INIT_MAX_LEN)
918                 return false;
919 #endif
920
921         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
922             to_le32(ex2->first_block))
923                 return false;
924
925         return true;
926 }
927
928 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
929                                 struct ext4_extent_path *path,
930                                 int32_t at,
931                                 struct ext4_extent *newext,
932                                 struct ext_split_trans *spt,
933                                 uint32_t flags,
934                                 bool *need_grow)
935 {
936         struct ext4_extent_path *curp = path + at;
937         struct ext4_extent *ex = curp->extent;
938         struct ext4_block bh = EXT4_BLOCK_ZERO();
939         int32_t len;
940         int err = EOK;
941         int unwritten;
942         struct ext4_extent_header *eh = NULL;
943
944         *need_grow = false;
945
946         if (curp->extent &&
947             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
948                 return EIO;
949
950         if (!(flags & EXT4_EXT_NO_COMBINE)) {
951                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
952                         unwritten = ext4_ext_is_unwritten(curp->extent);
953                         curp->extent->block_count =
954                             to_le16(ext4_ext_get_actual_len(curp->extent) +
955                                     ext4_ext_get_actual_len(newext));
956                         if (unwritten)
957                                 ext4_ext_mark_unwritten(curp->extent);
958                         err = ext4_ext_dirty(inode_ref, curp);
959                         goto out;
960                 }
961
962                 if (curp->extent &&
963                     ext4_ext_can_prepend(curp->extent, newext)) {
964                         unwritten = ext4_ext_is_unwritten(curp->extent);
965                         curp->extent->first_block = newext->first_block;
966                         curp->extent->block_count =
967                             to_le16(ext4_ext_get_actual_len(curp->extent) +
968                                     ext4_ext_get_actual_len(newext));
969                         if (unwritten)
970                                 ext4_ext_mark_unwritten(curp->extent);
971                         err = ext4_ext_dirty(inode_ref, curp);
972                         goto out;
973                 }
974         }
975
976         if (to_le16(curp->header->entries_count) ==
977             to_le16(curp->header->max_entries_count)) {
978                 if (at) {
979                         struct ext4_extent_header *neh;
980                         err = ext4_ext_split_node(inode_ref, path, at, newext,
981                                                   &spt->ptr, &bh);
982                         if (err != EOK)
983                                 goto out;
984
985                         neh = ext_block_hdr(&bh);
986                         if (to_le32(newext->first_block) >
987                             to_le32(curp->extent->first_block)) {
988                                 if (to_le16(neh->entries_count) >
989                                     to_le16(curp->header->entries_count)) {
990                                         eh = curp->header;
991                                         /* insert after */
992                                         ex = EXT_LAST_EXTENT(eh) + 1;
993                                 } else {
994                                         eh = neh;
995                                         ex = EXT_FIRST_EXTENT(eh);
996                                 }
997                         } else {
998                                 eh = curp->header;
999                                 /* insert before */
1000                                 ex = EXT_LAST_EXTENT(eh);
1001                         }
1002                 } else {
1003                         err = EOK;
1004                         *need_grow = true;
1005                         goto out;
1006                 }
1007         } else {
1008                 eh = curp->header;
1009                 if (curp->extent == NULL) {
1010                         ex = EXT_FIRST_EXTENT(eh);
1011                         curp->extent = ex;
1012                 } else if (to_le32(newext->first_block) >
1013                            to_le32(curp->extent->first_block)) {
1014                         /* insert after */
1015                         ex = curp->extent + 1;
1016                 } else {
1017                         /* insert before */
1018                         ex = curp->extent;
1019                 }
1020         }
1021
1022         len = EXT_LAST_EXTENT(eh) - ex + 1;
1023         ext4_assert(len >= 0);
1024         if (len > 0)
1025                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
1026
1027         if (ex > EXT_MAX_EXTENT(eh)) {
1028                 err = EIO;
1029                 goto out;
1030         }
1031
1032         ex->first_block = newext->first_block;
1033         ex->block_count = newext->block_count;
1034         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1035         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
1036
1037         if (ex > EXT_LAST_EXTENT(eh)) {
1038                 err = EIO;
1039                 goto out;
1040         }
1041
1042         if (eh == curp->header) {
1043                 err = ext4_ext_correct_indexes(inode_ref, path);
1044                 if (err != EOK)
1045                         goto out;
1046                 err = ext4_ext_dirty(inode_ref, curp);
1047         } else
1048                 err = EOK;
1049
1050 out:
1051         if (err != EOK || *need_grow) {
1052                 if (bh.lb_id)
1053                         ext4_block_set(inode_ref->fs->bdev, &bh);
1054
1055                 spt->ptr = 0;
1056         } else if (bh.lb_id) {
1057                 /* If we got a sibling leaf. */
1058                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
1059                 bh.dirty = true;
1060
1061                 spt->path.p_block = ext4_ext_pblock(ex);
1062                 spt->path.depth = to_le16(eh->depth);
1063                 spt->path.maxdepth = 0;
1064                 spt->path.extent = ex;
1065                 spt->path.index = NULL;
1066                 spt->path.header = eh;
1067                 spt->path.block = bh;
1068
1069                 /*
1070                  * If newext->ee_block can be included into the
1071                  * right sub-tree.
1072                  */
1073                 if (to_le32(newext->first_block) >=
1074                     ext4_ext_block_index(ext_block_hdr(&bh)))
1075                         spt->switch_to = 1;
1076                 else {
1077                         curp->extent = ex;
1078                         curp->p_block = ext4_ext_pblock(ex);
1079                 }
1080
1081         } else {
1082                 spt->ptr = 0;
1083                 curp->extent = ex;
1084                 curp->p_block = ext4_ext_pblock(ex);
1085         }
1086
1087         return err;
1088 }
1089
1090 /*
1091  * ext4_ext_grow_indepth:
1092  * implements tree growing procedure:
1093  * - allocates new block
1094  * - moves top-level data (index block or leaf) into the new block
1095  * - initializes new top-level, creating index that points to the
1096  *   just created block
1097  */
1098 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1099                                  uint32_t flags)
1100 {
1101         struct ext4_extent_header *neh;
1102         struct ext4_block bh = EXT4_BLOCK_ZERO();
1103         ext4_fsblk_t newblock, goal = 0;
1104         int err = EOK;
1105
1106         /* Try to prepend new index to old one */
1107         if (ext_depth(inode_ref->inode))
1108                 goal = ext4_idx_pblock(
1109                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1110         else
1111                 goal = ext4_fs_inode_to_goal_block(inode_ref);
1112
1113         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1114         if (newblock == 0)
1115                 return err;
1116
1117         /* # */
1118         err = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
1119         if (err != EOK) {
1120                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1121                 return err;
1122         }
1123
1124         /* move top-level index/leaf into new block */
1125         memmove(bh.data, inode_ref->inode->blocks,
1126                 sizeof(inode_ref->inode->blocks));
1127
1128         /* set size of new block */
1129         neh = ext_block_hdr(&bh);
1130         /* old root could have indexes or leaves
1131          * so calculate e_max right way */
1132         if (ext_depth(inode_ref->inode))
1133                 neh->max_entries_count =
1134                     to_le16(ext4_ext_space_block_idx(inode_ref));
1135         else
1136                 neh->max_entries_count =
1137                     to_le16(ext4_ext_space_block(inode_ref));
1138
1139         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1140
1141         /* Update top-level index: num,max,pointer */
1142         neh = ext_inode_hdr(inode_ref->inode);
1143         neh->entries_count = to_le16(1);
1144         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1145         if (neh->depth == 0) {
1146                 /* Root extent block becomes index block */
1147                 neh->max_entries_count =
1148                     to_le16(ext4_ext_space_root_idx(inode_ref));
1149                 EXT_FIRST_INDEX(neh)
1150                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1151         }
1152         neh->depth = to_le16(to_le16(neh->depth) + 1);
1153
1154         ext4_extent_block_csum_set(inode_ref, neh);
1155         bh.dirty = true;
1156         inode_ref->dirty = true;
1157         ext4_block_set(inode_ref->fs->bdev, &bh);
1158
1159         return err;
1160 }
1161
1162 __unused static void print_path(struct ext4_extent_path *path)
1163 {
1164         int32_t i = path->depth;
1165         while (i >= 0) {
1166
1167                 ptrdiff_t a =
1168                     (path->extent)
1169                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1170                         : 0;
1171                 ptrdiff_t b =
1172                     (path->index)
1173                         ? (path->index - EXT_FIRST_INDEX(path->header))
1174                         : 0;
1175
1176                 (void)a;
1177                 (void)b;
1178                 ext4_dbg(DEBUG_EXTENT,
1179                          "depth %" PRId32 ", p_block: %" PRIu64 ","
1180                          "p_ext offset: %td, p_idx offset: %td\n",
1181                          i, path->p_block, a, b);
1182                 i--;
1183                 path++;
1184         }
1185 }
1186
1187 static void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1188                                   struct ext4_extent_path *path,
1189                                   struct ext_split_trans *spt,
1190                                   int32_t level)
1191 {
1192         int32_t depth = ext_depth(inode_ref->inode);
1193         int32_t i = depth - level;
1194         ext4_ext_drop_refs(inode_ref, path + i, 1);
1195         path[i] = spt[level].path;
1196 }
1197
1198 static int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1199                                   struct ext4_extent_path **ppath,
1200                                   struct ext4_extent *newext, uint32_t flags)
1201 {
1202         int32_t i, depth, level;
1203         int ret = EOK;
1204         ext4_fsblk_t ptr = 0;
1205         bool need_grow = false;
1206         struct ext4_extent_path *path = *ppath;
1207         struct ext_split_trans *spt = NULL;
1208         struct ext_split_trans newblock;
1209
1210         memset(&newblock, 0, sizeof(newblock));
1211
1212         depth = ext_depth(inode_ref->inode);
1213         for (i = depth, level = 0; i >= 0; i--, level++)
1214                 if (EXT_HAS_FREE_INDEX(path + i))
1215                         break;
1216
1217         if (level) {
1218                 spt = calloc(1, sizeof(struct ext_split_trans) * (level));
1219                 if (!spt) {
1220                         ret = ENOMEM;
1221                         goto out;
1222                 }
1223         }
1224         i = 0;
1225 again:
1226         depth = ext_depth(inode_ref->inode);
1227
1228         do {
1229                 if (!i) {
1230                         ret = ext4_ext_insert_leaf(inode_ref, path, depth - i,
1231                                                    newext, &newblock, flags,
1232                                                    &need_grow);
1233                 } else {
1234                         ret = ext4_ext_insert_index(
1235                             inode_ref, path, depth - i, newext,
1236                             ext4_ext_block_index(
1237                                 ext_block_hdr(&spt[i - 1].path.block)),
1238                             spt[i - 1].ptr, &newblock,
1239                             &need_grow);
1240                 }
1241                 ptr = newblock.ptr;
1242
1243                 if (ret != EOK)
1244                         goto out;
1245
1246                 else if (spt && ptr && !ret) {
1247                         /* Prepare for the next iteration after splitting. */
1248                         spt[i] = newblock;
1249                 }
1250
1251                 i++;
1252         } while (ptr != 0 && i <= depth);
1253
1254         if (need_grow) {
1255                 ret = ext4_ext_grow_indepth(inode_ref, 0);
1256                 if (ret)
1257                         goto out;
1258                 ret = ext4_find_extent(inode_ref, to_le32(newext->first_block),
1259                                        ppath, 0);
1260                 if (ret)
1261                         goto out;
1262                 i = depth;
1263                 path = *ppath;
1264                 goto again;
1265         }
1266 out:
1267         if (ret) {
1268                 if (path)
1269                         ext4_ext_drop_refs(inode_ref, path, 0);
1270
1271                 while (--level >= 0 && spt) {
1272                         if (spt[level].ptr) {
1273                                 ext4_ext_free_blocks(inode_ref, spt[level].ptr,
1274                                                      1, 0);
1275                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1276                                                    1);
1277                         }
1278                 }
1279         } else {
1280                 while (--level >= 0 && spt) {
1281                         if (spt[level].switch_to)
1282                                 ext4_ext_replace_path(inode_ref, path, spt,
1283                                                       level);
1284                         else if (spt[level].ptr)
1285                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1286                                                    1);
1287                 }
1288         }
1289         if (spt)
1290                 free(spt);
1291
1292         return ret;
1293 }
1294
1295 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1296                                    struct ext4_extent *ex, ext4_lblk_t from,
1297                                    ext4_lblk_t to)
1298 {
1299         ext4_lblk_t len = to - from + 1;
1300         ext4_lblk_t num;
1301         ext4_fsblk_t start;
1302         num = from - to_le32(ex->first_block);
1303         start = ext4_ext_pblock(ex) + num;
1304         ext4_dbg(DEBUG_EXTENT,
1305                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1306                  start, len);
1307
1308         ext4_ext_free_blocks(inode_ref, start, len, 0);
1309 }
1310
1311 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1312                                struct ext4_extent_path *path, int32_t depth)
1313 {
1314         int err = EOK;
1315         int32_t i = depth;
1316         ext4_fsblk_t leaf;
1317
1318         /* free index block */
1319         leaf = ext4_idx_pblock(path[i].index);
1320
1321         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1322                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1323                 memmove(path[i].index, path[i].index + 1,
1324                         len * sizeof(struct ext4_extent_index));
1325         }
1326
1327         path[i].header->entries_count =
1328             to_le16(to_le16(path[i].header->entries_count) - 1);
1329         err = ext4_ext_dirty(inode_ref, path + i);
1330         if (err != EOK)
1331                 return err;
1332
1333         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1334                  to_le32(path[i].index->first_block), leaf, 1);
1335         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1336
1337         while (i > 0) {
1338                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1339                         break;
1340
1341                 path[i - 1].index->first_block = path[i].index->first_block;
1342                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1343                 if (err != EOK)
1344                         break;
1345
1346                 i--;
1347         }
1348         return err;
1349 }
1350
1351 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1352                                 struct ext4_extent_path *path, ext4_lblk_t from,
1353                                 ext4_lblk_t to)
1354 {
1355
1356         int32_t depth = ext_depth(inode_ref->inode);
1357         struct ext4_extent *ex = path[depth].extent;
1358         struct ext4_extent *start_ex, *ex2 = NULL;
1359         struct ext4_extent_header *eh = path[depth].header;
1360         int32_t len;
1361         int err = EOK;
1362         uint16_t new_entries;
1363
1364         start_ex = ex;
1365         new_entries = to_le16(eh->entries_count);
1366         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1367                to_le32(ex->first_block) <= to) {
1368                 int32_t new_len = 0;
1369                 int unwritten;
1370                 ext4_lblk_t start, new_start;
1371                 ext4_fsblk_t newblock;
1372                 new_start = start = to_le32(ex->first_block);
1373                 len = ext4_ext_get_actual_len(ex);
1374                 newblock = ext4_ext_pblock(ex);
1375                 if (start < from) {
1376                         len -= from - start;
1377                         new_len = from - start;
1378                         start = from;
1379                         start_ex++;
1380                 } else {
1381                         if (start + len - 1 > to) {
1382                                 len -= start + len - 1 - to;
1383                                 new_len = start + len - 1 - to;
1384                                 new_start = to + 1;
1385                                 newblock += to + 1 - start;
1386                                 ex2 = ex;
1387                         }
1388                 }
1389
1390                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1391                 ex->first_block = to_le32(new_start);
1392                 if (!new_len)
1393                         new_entries--;
1394                 else {
1395                         unwritten = ext4_ext_is_unwritten(ex);
1396                         ex->block_count = to_le16(new_len);
1397                         ext4_ext_store_pblock(ex, newblock);
1398                         if (unwritten)
1399                                 ext4_ext_mark_unwritten(ex);
1400                 }
1401
1402                 ex += 1;
1403         }
1404
1405         if (ex2 == NULL)
1406                 ex2 = ex;
1407
1408         if (ex2 <= EXT_LAST_EXTENT(eh))
1409                 memmove(start_ex, ex2, EXT_LAST_EXTENT(eh) - ex2 + 1);
1410
1411         eh->entries_count = to_le16(new_entries);
1412         ext4_ext_dirty(inode_ref, path + depth);
1413         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count)
1414                 err = ext4_ext_correct_indexes(inode_ref, path);
1415
1416         /* if this leaf is free, then we should
1417          * remove it from index block above */
1418         if (err == EOK && eh->entries_count == 0 && path[depth].block.lb_id)
1419                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1420         else if (depth > 0)
1421                 path[depth - 1].index++;
1422
1423         return err;
1424 }
1425
1426 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1427 {
1428         if (!to_le16(path->header->entries_count))
1429                 return false;
1430
1431         if (path->index > EXT_LAST_INDEX(path->header))
1432                 return false;
1433
1434         if (to_le32(path->index->first_block) > to)
1435                 return false;
1436
1437         return true;
1438 }
1439
1440 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1441                           ext4_lblk_t to)
1442 {
1443         struct ext4_extent_path *path = NULL;
1444         int ret = EOK;
1445         int32_t depth = ext_depth(inode_ref->inode);
1446         int32_t i;
1447
1448         ret = ext4_find_extent(inode_ref, from, &path, 0);
1449         if (ret)
1450                 goto out;
1451
1452         if (!path[depth].extent) {
1453                 ret = EOK;
1454                 goto out;
1455         }
1456
1457         bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1458                         ext4_ext_get_actual_len(path[depth].extent));
1459
1460         if (!in_range) {
1461                 ret = EOK;
1462                 goto out;
1463         }
1464
1465         /* If we do remove_space inside the range of an extent */
1466         if ((to_le32(path[depth].extent->first_block) < from) &&
1467             (to < to_le32(path[depth].extent->first_block) +
1468                         ext4_ext_get_actual_len(path[depth].extent) - 1)) {
1469
1470                 struct ext4_extent *ex = path[depth].extent, newex;
1471                 int unwritten = ext4_ext_is_unwritten(ex);
1472                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1473                 int32_t len = ext4_ext_get_actual_len(ex);
1474                 ext4_fsblk_t newblock =
1475                         to + 1 - ee_block + ext4_ext_pblock(ex);
1476
1477                 ex->block_count = to_le16(from - ee_block);
1478                 if (unwritten)
1479                         ext4_ext_mark_unwritten(ex);
1480
1481                 ext4_ext_dirty(inode_ref, path + depth);
1482
1483                 newex.first_block = to_le32(to + 1);
1484                 newex.block_count = to_le16(ee_block + len - 1 - to);
1485                 ext4_ext_store_pblock(&newex, newblock);
1486                 if (unwritten)
1487                         ext4_ext_mark_unwritten(&newex);
1488
1489                 ret = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1490                 goto out;
1491         }
1492
1493         i = depth;
1494         while (i >= 0) {
1495                 if (i == depth) {
1496                         struct ext4_extent_header *eh;
1497                         struct ext4_extent *first_ex, *last_ex;
1498                         ext4_lblk_t leaf_from, leaf_to;
1499                         eh = path[i].header;
1500                         ext4_assert(to_le16(eh->entries_count) > 0);
1501                         first_ex = EXT_FIRST_EXTENT(eh);
1502                         last_ex = EXT_LAST_EXTENT(eh);
1503                         leaf_from = to_le32(first_ex->first_block);
1504                         leaf_to = to_le32(last_ex->first_block) +
1505                                   ext4_ext_get_actual_len(last_ex) - 1;
1506                         if (leaf_from < from)
1507                                 leaf_from = from;
1508
1509                         if (leaf_to > to)
1510                                 leaf_to = to;
1511
1512                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1513                                         leaf_to);
1514                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1515                         i--;
1516                         continue;
1517                 }
1518
1519                 struct ext4_extent_header *eh;
1520                 eh = path[i].header;
1521                 if (ext4_ext_more_to_rm(path + i, to)) {
1522                         struct ext4_block bh = EXT4_BLOCK_ZERO();
1523                         if (path[i + 1].block.lb_id)
1524                                 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1525
1526                         ret = read_extent_tree_block(inode_ref,
1527                                         ext4_idx_pblock(path[i].index),
1528                                         depth - i - 1, &bh, 0);
1529                         if (ret)
1530                                 goto out;
1531
1532                         path[i].p_block =
1533                                         ext4_idx_pblock(path[i].index);
1534                         path[i + 1].block = bh;
1535                         path[i + 1].header = ext_block_hdr(&bh);
1536                         path[i + 1].depth = depth - i - 1;
1537                         if (i + 1 == depth)
1538                                 path[i + 1].extent = EXT_FIRST_EXTENT(
1539                                         path[i + 1].header);
1540                         else
1541                                 path[i + 1].index =
1542                                         EXT_FIRST_INDEX(path[i + 1].header);
1543
1544                         i++;
1545                 } else {
1546                         if (i > 0) {
1547                                 if (!eh->entries_count)
1548                                         ret = ext4_ext_remove_idx(inode_ref, path,
1549                                                         i - 1);
1550                                 else
1551                                         path[i - 1].index++;
1552
1553                         }
1554
1555                         if (i)
1556                                 ext4_block_set(inode_ref->fs->bdev,
1557                                                 &path[i].block);
1558
1559
1560                         i--;
1561                 }
1562
1563         }
1564
1565         /* TODO: flexible tree reduction should be here */
1566         if (path->header->entries_count == 0) {
1567                 /*
1568                  * truncate to zero freed all the tree,
1569                  * so we need to correct eh_depth
1570                  */
1571                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1572                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1573                     to_le16(ext4_ext_space_root(inode_ref));
1574                 ret = ext4_ext_dirty(inode_ref, path);
1575         }
1576
1577 out:
1578         ext4_ext_drop_refs(inode_ref, path, 0);
1579         free(path);
1580         path = NULL;
1581         return ret;
1582 }
1583
1584 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1585                                     struct ext4_extent_path **ppath,
1586                                     ext4_lblk_t split, uint32_t split_flag)
1587 {
1588         struct ext4_extent *ex, newex;
1589         ext4_fsblk_t newblock;
1590         ext4_lblk_t ee_block;
1591         int32_t ee_len;
1592         int32_t depth = ext_depth(inode_ref->inode);
1593         int err = EOK;
1594
1595         ex = (*ppath)[depth].extent;
1596         ee_block = to_le32(ex->first_block);
1597         ee_len = ext4_ext_get_actual_len(ex);
1598         newblock = split - ee_block + ext4_ext_pblock(ex);
1599
1600         if (split == ee_block) {
1601                 /*
1602                  * case b: block @split is the block that the extent begins with
1603                  * then we just change the state of the extent, and splitting
1604                  * is not needed.
1605                  */
1606                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1607                         ext4_ext_mark_unwritten(ex);
1608                 else
1609                         ext4_ext_mark_initialized(ex);
1610
1611                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1612                 goto out;
1613         }
1614
1615         ex->block_count = to_le16(split - ee_block);
1616         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1617                 ext4_ext_mark_unwritten(ex);
1618
1619         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1620         if (err != EOK)
1621                 goto out;
1622
1623         newex.first_block = to_le32(split);
1624         newex.block_count = to_le16(ee_len - (split - ee_block));
1625         ext4_ext_store_pblock(&newex, newblock);
1626         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1627                 ext4_ext_mark_unwritten(&newex);
1628         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1629                                      EXT4_EXT_NO_COMBINE);
1630         if (err != EOK)
1631                 goto restore_extent_len;
1632
1633 out:
1634         return err;
1635 restore_extent_len:
1636         ex->block_count = to_le16(ee_len);
1637         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1638         return err;
1639 }
1640
1641 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1642                                            struct ext4_extent_path **ppath,
1643                                            ext4_lblk_t split, uint32_t blocks)
1644 {
1645         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1646         struct ext4_extent *ex = (*ppath)[depth].extent;
1647
1648         ext4_assert(to_le32(ex->first_block) <= split);
1649
1650         if (split + blocks ==
1651             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1652                 /* split and initialize right part */
1653                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1654                                                EXT4_EXT_MARK_UNWRIT1);
1655         } else if (to_le32(ex->first_block) == split) {
1656                 /* split and initialize left part */
1657                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1658                                                EXT4_EXT_MARK_UNWRIT2);
1659         } else {
1660                 /* split 1 extent to 3 and initialize the 2nd */
1661                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1662                                                EXT4_EXT_MARK_UNWRIT1 |
1663                                                    EXT4_EXT_MARK_UNWRIT2);
1664                 if (!err) {
1665                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1666                                                        EXT4_EXT_MARK_UNWRIT1);
1667                 }
1668         }
1669
1670         return err;
1671 }
1672
1673 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1674 {
1675         int32_t depth;
1676
1677         depth = path->depth;
1678
1679         if (depth == 0 && path->extent == NULL)
1680                 return EXT_MAX_BLOCKS;
1681
1682         while (depth >= 0) {
1683                 if (depth == path->depth) {
1684                         /* leaf */
1685                         if (path[depth].extent &&
1686                             path[depth].extent !=
1687                                 EXT_LAST_EXTENT(path[depth].header))
1688                                 return to_le32(
1689                                     path[depth].extent[1].first_block);
1690                 } else {
1691                         /* index */
1692                         if (path[depth].index !=
1693                             EXT_LAST_INDEX(path[depth].header))
1694                                 return to_le32(
1695                                     path[depth].index[1].first_block);
1696                 }
1697                 depth--;
1698         }
1699
1700         return EXT_MAX_BLOCKS;
1701 }
1702
1703 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1704                                          ext4_fsblk_t block,
1705                                          uint32_t blocks_count)
1706 {
1707         int err = EOK;
1708         uint32_t i;
1709         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1710         for (i = 0; i < blocks_count; i++) {
1711                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1712                 err = ext4_block_get(inode_ref->fs->bdev, &bh, block + i);
1713                 if (err != EOK)
1714                         break;
1715
1716                 memset(bh.data, 0, block_size);
1717                 bh.dirty = true;
1718                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1719                 if (err != EOK)
1720                         break;
1721         }
1722         return err;
1723 }
1724
1725 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_fsblk_t iblock,
1726                         uint32_t max_blocks, ext4_fsblk_t *result, bool create,
1727                         uint32_t *blocks_count)
1728 {
1729         struct ext4_extent_path *path = NULL;
1730         struct ext4_extent newex, *ex;
1731         ext4_fsblk_t goal;
1732         int err = EOK;
1733         int32_t depth;
1734         uint32_t allocated = 0;
1735         ext4_fsblk_t next, newblock;
1736
1737         if (result)
1738                 *result = 0;
1739
1740         if (blocks_count)
1741                 *blocks_count = 0;
1742
1743         /* find extent for this block */
1744         err = ext4_find_extent(inode_ref, iblock, &path, 0);
1745         if (err != EOK) {
1746                 path = NULL;
1747                 goto out2;
1748         }
1749
1750         depth = ext_depth(inode_ref->inode);
1751
1752         /*
1753          * consistent leaf must not be empty
1754          * this situations is possible, though, _during_ tree modification
1755          * this is why assert can't be put in ext4_ext_find_extent()
1756          */
1757         ex = path[depth].extent;
1758         if (ex) {
1759                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1760                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1761                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1762                 /* if found exent covers block, simple return it */
1763                 if (IN_RANGE(iblock, ee_block, ee_len)) {
1764                         /* number of remain blocks in the extent */
1765                         allocated = ee_len - (iblock - ee_block);
1766
1767                         if (!ext4_ext_is_unwritten(ex)) {
1768                                 newblock = iblock - ee_block + ee_start;
1769                                 goto out;
1770                         }
1771
1772                         if (!create) {
1773                                 newblock = 0;
1774                                 goto out;
1775                         }
1776
1777                         uint32_t zero_range;
1778                         zero_range = allocated;
1779                         if (zero_range > max_blocks)
1780                                 zero_range = max_blocks;
1781
1782                         newblock = iblock - ee_block + ee_start;
1783                         err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
1784                                         zero_range);
1785                         if (err != EOK)
1786                                 goto out2;
1787
1788                         err = ext4_ext_convert_to_initialized(inode_ref, &path,
1789                                         iblock, zero_range);
1790                         if (err != EOK)
1791                                 goto out2;
1792
1793                         goto out;
1794                 }
1795         }
1796
1797         /*
1798          * requested block isn't allocated yet
1799          * we couldn't try to create block if create flag is zero
1800          */
1801         if (!create) {
1802                 goto out2;
1803         }
1804
1805         /* find next allocated block so that we know how many
1806          * blocks we can allocate without ovelapping next extent */
1807         next = ext4_ext_next_allocated_block(path);
1808         allocated = next - iblock;
1809         if (allocated > max_blocks)
1810                 allocated = max_blocks;
1811
1812         /* allocate new block */
1813         goal = ext4_ext_find_goal(inode_ref, path, iblock);
1814         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1815         if (!newblock)
1816                 goto out2;
1817
1818         /* try to insert new extent into found leaf and return */
1819         newex.first_block = to_le32(iblock);
1820         ext4_ext_store_pblock(&newex, newblock);
1821         newex.block_count = to_le16(allocated);
1822         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1823         if (err != EOK) {
1824                 /* free data blocks we just allocated */
1825                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1826                                      to_le16(newex.block_count), 0);
1827                 goto out2;
1828         }
1829
1830         /* previous routine could use block we allocated */
1831         newblock = ext4_ext_pblock(&newex);
1832
1833 out:
1834         if (allocated > max_blocks)
1835                 allocated = max_blocks;
1836
1837         if (result)
1838                 *result = newblock;
1839
1840         if (blocks_count)
1841                 *blocks_count = allocated;
1842
1843 out2:
1844         if (path) {
1845                 ext4_ext_drop_refs(inode_ref, path, 0);
1846                 free(path);
1847         }
1848
1849         return err;
1850 }
1851 #endif