FIX: crash occurs when truncating a spare file.
[lwext4.git] / lwext4 / ext4_extent_full.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  * - The name of the author may not be used to endorse or promote products
15  *   derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include "ext4_config.h"
30 #include "ext4_blockdev.h"
31 #include "ext4_fs.h"
32 #include "ext4_super.h"
33 #include "ext4_balloc.h"
34 #include "ext4_debug.h"
35
36 #include <stdlib.h>
37 #include <string.h>
38 #include <inttypes.h>
39 #include <stddef.h>
40
41 #include "ext4_extent.h"
42
43 #if CONFIG_EXTENT_FULL
44
45 /*
46  * used by extent splitting.
47  */
48 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
49 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
50 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
51 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
52 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
53
54 static struct ext4_extent_tail *
55 find_ext4_extent_tail(struct ext4_extent_header *eh)
56 {
57         return (struct ext4_extent_tail *)(((char *)eh) +
58                                            EXT4_EXTENT_TAIL_OFFSET(eh));
59 }
60
61 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
62 {
63         return (struct ext4_extent_header *)inode->blocks;
64 }
65
66 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
67 {
68         return (struct ext4_extent_header *)block->data;
69 }
70
71 static uint16_t ext_depth(struct ext4_inode *inode)
72 {
73         return to_le16(ext_inode_hdr(inode)->depth);
74 }
75
76 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
77 {
78         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
79                     ? to_le16(ext->block_count)
80                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
81 }
82
83 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
84 {
85         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
86 }
87
88 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
89 {
90         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
91 }
92
93 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
94 {
95         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
96         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
97 }
98
99 /*
100  * ext4_ext_pblock:
101  * combine low and high parts of physical block number into ext4_fsblk_t
102  */
103 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
104 {
105         ext4_fsblk_t block;
106
107         block = to_le32(ex->start_lo);
108         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
109         return block;
110 }
111
112 /*
113  * ext4_idx_pblock:
114  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
115  */
116 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
117 {
118         ext4_fsblk_t block;
119
120         block = to_le32(ix->leaf_lo);
121         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
122         return block;
123 }
124
125 /*
126  * ext4_ext_store_pblock:
127  * stores a large physical block number into an extent struct,
128  * breaking it into parts
129  */
130 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
131 {
132         ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
133         ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
134 }
135
136 /*
137  * ext4_idx_store_pblock:
138  * stores a large physical block number into an index struct,
139  * breaking it into parts
140  */
141 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
142 {
143         ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
144         ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
145 }
146
147 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
148                                       ext4_fsblk_t goal,
149                                       ext4_fsblk_t *blockp)
150 {
151         return ext4_balloc_alloc_block(inode_ref, goal, blockp);
152 }
153
154 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
155                                          ext4_fsblk_t goal,
156                                          uint32_t flags __unused,
157                                          uint32_t *count, int *errp)
158 {
159         ext4_fsblk_t block = 0;
160
161         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
162         if (count)
163                 *count = 1;
164         return block;
165 }
166
167 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
168                                  ext4_fsblk_t block, uint32_t count,
169                                  uint32_t flags __unused)
170 {
171         ext4_balloc_free_blocks(inode_ref, block, count);
172 }
173
174 static size_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
175 {
176         size_t size;
177         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
178
179         size = (block_size - sizeof(struct ext4_extent_header)) /
180                sizeof(struct ext4_extent);
181 #ifdef AGGRESSIVE_TEST
182         if (size > 6)
183                 size = 6;
184 #endif
185         return size;
186 }
187
188 static size_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
189 {
190         size_t size;
191         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
192
193         size = (block_size - sizeof(struct ext4_extent_header)) /
194                sizeof(struct ext4_extent_index);
195 #ifdef AGGRESSIVE_TEST
196         if (size > 5)
197                 size = 5;
198 #endif
199         return size;
200 }
201
202 static size_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
203 {
204         size_t size;
205
206         size = sizeof(inode_ref->inode->blocks);
207         size -= sizeof(struct ext4_extent_header);
208         size /= sizeof(struct ext4_extent);
209 #ifdef AGGRESSIVE_TEST
210         if (size > 3)
211                 size = 3;
212 #endif
213         return size;
214 }
215
216 static size_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
217 {
218         size_t size;
219
220         size = sizeof(inode_ref->inode->blocks);
221         size -= sizeof(struct ext4_extent_header);
222         size /= sizeof(struct ext4_extent_index);
223 #ifdef AGGRESSIVE_TEST
224         if (size > 4)
225                 size = 4;
226 #endif
227         return size;
228 }
229
230 static size_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
231                                    uint32_t depth)
232 {
233         size_t max;
234
235         if (depth == ext_depth(inode_ref->inode)) {
236                 if (depth == 0)
237                         max = ext4_ext_space_root(inode_ref);
238                 else
239                         max = ext4_ext_space_root_idx(inode_ref);
240         } else {
241                 if (depth == 0)
242                         max = ext4_ext_space_block(inode_ref);
243                 else
244                         max = ext4_ext_space_block_idx(inode_ref);
245         }
246
247         return max;
248 }
249
250 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
251                                        struct ext4_extent_path *path,
252                                        ext4_lblk_t block)
253 {
254         if (path) {
255                 uint32_t depth = path->depth;
256                 struct ext4_extent *ex;
257
258                 /*
259                  * Try to predict block placement assuming that we are
260                  * filling in a file which will eventually be
261                  * non-sparse --- i.e., in the case of libbfd writing
262                  * an ELF object sections out-of-order but in a way
263                  * the eventually results in a contiguous object or
264                  * executable file, or some database extending a table
265                  * space file.  However, this is actually somewhat
266                  * non-ideal if we are writing a sparse file such as
267                  * qemu or KVM writing a raw image file that is going
268                  * to stay fairly sparse, since it will end up
269                  * fragmenting the file system's free space.  Maybe we
270                  * should have some hueristics or some way to allow
271                  * userspace to pass a hint to file system,
272                  * especially if the latter case turns out to be
273                  * common.
274                  */
275                 ex = path[depth].extent;
276                 if (ex) {
277                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
278                         ext4_lblk_t ext_block = to_le32(ex->first_block);
279
280                         if (block > ext_block)
281                                 return ext_pblk + (block - ext_block);
282                         else
283                                 return ext_pblk - (ext_block - block);
284                 }
285
286                 /* it looks like index is empty;
287                  * try to find starting block from index itself */
288                 if (path[depth].block.lb_id)
289                         return path[depth].block.lb_id;
290         }
291
292         /* OK. use inode's group */
293         return ext4_fs_inode_to_goal_block(inode_ref);
294 }
295
296 /*
297  * Allocation for a meta data block
298  */
299 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
300                                             struct ext4_extent_path *path,
301                                             struct ext4_extent *ex, int *err,
302                                             uint32_t flags)
303 {
304         ext4_fsblk_t goal, newblock;
305
306         goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
307         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
308         return newblock;
309 }
310
311 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
312                           struct ext4_extent_path *path)
313 {
314         if (path->block.lb_id)
315                 path->block.dirty = true;
316         else
317                 inode_ref->dirty = true;
318
319         return EOK;
320 }
321
322 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
323                                struct ext4_extent_path *path, bool keep_other)
324 {
325         int32_t depth, i;
326
327         if (!path)
328                 return;
329         if (keep_other)
330                 depth = 0;
331         else
332                 depth = path->depth;
333
334         for (i = 0; i <= depth; i++, path++) {
335                 if (path->block.lb_id) {
336                         ext4_block_set(inode_ref->fs->bdev, &path->block);
337                 }
338         }
339 }
340
341 /*
342  * Temporarily we don't need to support checksum.
343  */
344 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref __unused,
345                                     struct ext4_extent_header *eh __unused)
346 {
347         /*TODO: should we add crc32 here ?*/
348         /*return ext4_crc32c(inode->i_csum, eh, EXT4_EXTENT_TAIL_OFFSET(eh));*/
349         return 0;
350 }
351
352 static void ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref,
353                                        struct ext4_extent_header *eh)
354 {
355         struct ext4_extent_tail *tail;
356
357         tail = find_ext4_extent_tail(eh);
358         tail->et_checksum = ext4_ext_block_csum(inode_ref, eh);
359 }
360
361 /*
362  * Check that whether the basic information inside the extent header
363  * is correct or not.
364  */
365 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
366                           struct ext4_extent_header *eh, uint16_t depth,
367                           ext4_fsblk_t pblk __unused)
368 {
369         struct ext4_extent_tail *tail;
370         const char *error_msg;
371         (void)error_msg;
372
373         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
374                 error_msg = "invalid magic";
375                 goto corrupted;
376         }
377         if (to_le16(eh->depth) != depth) {
378                 error_msg = "unexpected eh_depth";
379                 goto corrupted;
380         }
381         if (eh->max_entries_count == 0) {
382                 error_msg = "invalid eh_max";
383                 goto corrupted;
384         }
385         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
386                 error_msg = "invalid eh_entries";
387                 goto corrupted;
388         }
389
390         tail = find_ext4_extent_tail(eh);
391         if (tail->et_checksum != ext4_ext_block_csum(inode_ref, eh)) {
392                 /* FIXME: Warning: extent checksum damaged? */
393         }
394
395         return EOK;
396
397 corrupted:
398         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
399                                "Blocknr: %" PRId64 "\n",
400                  error_msg, pblk);
401         return EIO;
402 }
403
404 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
405                                   ext4_fsblk_t pblk, int32_t depth,
406                                   struct ext4_block *bh,
407                                   uint32_t flags __unused)
408 {
409         int err;
410
411         err = ext4_block_get(inode_ref->fs->bdev, bh, pblk);
412         if (err != EOK)
413                 goto errout;
414
415         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
416         if (err != EOK)
417                 goto errout;
418
419         return EOK;
420 errout:
421         if (bh->lb_id)
422                 ext4_block_set(inode_ref->fs->bdev, bh);
423
424         return err;
425 }
426
427 /*
428  * ext4_ext_binsearch_idx:
429  * binary search for the closest index of the given block
430  * the header must be checked before calling this
431  */
432 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
433                                    ext4_lblk_t block)
434 {
435         struct ext4_extent_header *eh = path->header;
436         struct ext4_extent_index *r, *l, *m;
437
438         l = EXT_FIRST_INDEX(eh) + 1;
439         r = EXT_LAST_INDEX(eh);
440         while (l <= r) {
441                 m = l + (r - l) / 2;
442                 if (block < to_le32(m->first_block))
443                         r = m - 1;
444                 else
445                         l = m + 1;
446         }
447
448         path->index = l - 1;
449 }
450
451 /*
452  * ext4_ext_binsearch:
453  * binary search for closest extent of the given block
454  * the header must be checked before calling this
455  */
456 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
457 {
458         struct ext4_extent_header *eh = path->header;
459         struct ext4_extent *r, *l, *m;
460
461         if (eh->entries_count == 0) {
462                 /*
463                  * this leaf is empty:
464                  * we get such a leaf in split/add case
465                  */
466                 return;
467         }
468
469         l = EXT_FIRST_EXTENT(eh) + 1;
470         r = EXT_LAST_EXTENT(eh);
471
472         while (l <= r) {
473                 m = l + (r - l) / 2;
474                 if (block < to_le32(m->first_block))
475                         r = m - 1;
476                 else
477                         l = m + 1;
478         }
479
480         path->extent = l - 1;
481 }
482
483 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
484                             struct ext4_extent_path **orig_path, uint32_t flags)
485 {
486         struct ext4_extent_header *eh;
487         struct ext4_block bh = EXT4_BLOCK_ZERO();
488         ext4_fsblk_t buf_block = 0;
489         struct ext4_extent_path *path = *orig_path;
490         int32_t depth, ppos = 0;
491         int32_t i;
492         int ret;
493
494         eh = ext_inode_hdr(inode_ref->inode);
495         depth = ext_depth(inode_ref->inode);
496
497         if (path) {
498                 ext4_ext_drop_refs(inode_ref, path, 0);
499                 if (depth > path[0].maxdepth) {
500                         free(path);
501                         *orig_path = path = NULL;
502                 }
503         }
504         if (!path) {
505                 int32_t path_depth = depth + 1;
506                 /* account possible depth increase */
507                 path = calloc(1, sizeof(struct ext4_extent_path) *
508                                      (path_depth + 1));
509                 if (!path)
510                         return ENOMEM;
511                 path[0].maxdepth = path_depth;
512         }
513         path[0].header = eh;
514         path[0].block = bh;
515
516         i = depth;
517         /* walk through the tree */
518         while (i) {
519                 ext4_ext_binsearch_idx(path + ppos, block);
520                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
521                 path[ppos].depth = i;
522                 path[ppos].extent = NULL;
523                 buf_block = path[ppos].p_block;
524
525                 i--;
526                 ppos++;
527                 if (!path[ppos].block.lb_id ||
528                     path[ppos].block.lb_id != buf_block) {
529                         ret = read_extent_tree_block(inode_ref, buf_block, i,
530                                                      &bh, flags);
531                         if (ret != EOK) {
532                                 goto err;
533                         }
534                         if (ppos > depth) {
535                                 ext4_block_set(inode_ref->fs->bdev, &bh);
536                                 ret = EIO;
537                                 goto err;
538                         }
539
540                         eh = ext_block_hdr(&bh);
541                         path[ppos].block = bh;
542                         path[ppos].header = eh;
543                 }
544         }
545
546         path[ppos].depth = i;
547         path[ppos].extent = NULL;
548         path[ppos].index = NULL;
549
550         /* find extent */
551         ext4_ext_binsearch(path + ppos, block);
552         /* if not an empty leaf */
553         if (path[ppos].extent)
554                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
555
556         *orig_path = path;
557
558         ret = EOK;
559         return ret;
560
561 err:
562         ext4_ext_drop_refs(inode_ref, path, 0);
563         free(path);
564         if (orig_path)
565                 *orig_path = NULL;
566         return ret;
567 }
568
569 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
570                                  struct ext4_extent_header *eh, int32_t depth)
571 {
572         eh->entries_count = 0;
573         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
574         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
575         eh->depth = depth;
576 }
577
578 /*
579  * Be cautious, the buffer_head returned is not yet mark dirtied. */
580 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
581                                struct ext4_extent_path *path, int32_t at,
582                                struct ext4_extent *newext,
583                                ext4_fsblk_t *sibling, struct ext4_block *new_bh)
584 {
585         int ret;
586         ext4_fsblk_t newblock;
587         struct ext4_block bh = EXT4_BLOCK_ZERO();
588         int32_t depth = ext_depth(inode_ref->inode);
589
590         ext4_assert(sibling);
591
592         /* FIXME: currently we split at the point after the current extent. */
593         newblock = ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
594         if (ret)
595                 goto cleanup;
596
597         /*  For write access.# */
598         ret = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
599         if (ret != EOK)
600                 goto cleanup;
601
602         if (at == depth) {
603                 /* start copy from next extent */
604                 ptrdiff_t m = EXT_MAX_EXTENT(path[at].header) - path[at].extent;
605                 struct ext4_extent_header *neh;
606                 neh = ext_block_hdr(&bh);
607                 ext4_ext_init_header(inode_ref, neh, 0);
608                 if (m) {
609                         struct ext4_extent *ex;
610                         ex = EXT_FIRST_EXTENT(neh);
611                         memmove(ex, path[at].extent + 1,
612                                 sizeof(struct ext4_extent) * m);
613                         neh->entries_count =
614                             to_le16(to_le16(neh->entries_count) + m);
615                         path[at].header->entries_count = to_le16(
616                             to_le16(path[at].header->entries_count) - m);
617                         ret = ext4_ext_dirty(inode_ref, path + at);
618                         if (ret)
619                                 goto cleanup;
620                 }
621         } else {
622                 ptrdiff_t m = EXT_MAX_INDEX(path[at].header) - path[at].index;
623                 struct ext4_extent_header *neh;
624                 neh = ext_block_hdr(&bh);
625                 ext4_ext_init_header(inode_ref, neh, depth - at);
626                 if (m) {
627                         struct ext4_extent_index *ix;
628                         ix = EXT_FIRST_INDEX(neh);
629                         memmove(ix, path[at].index + 1,
630                                 sizeof(struct ext4_extent) * m);
631                         neh->entries_count =
632                             to_le16(to_le16(neh->entries_count) + m);
633                         path[at].header->entries_count = to_le16(
634                             to_le16(path[at].header->entries_count) - m);
635                         ret = ext4_ext_dirty(inode_ref, path + at);
636                         if (ret)
637                                 goto cleanup;
638                 }
639         }
640 cleanup:
641         if (ret) {
642                 if (bh.lb_id) {
643                         ext4_block_set(inode_ref->fs->bdev, &bh);
644                 }
645                 if (newblock)
646                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
647
648                 newblock = 0;
649         }
650         *sibling = newblock;
651         *new_bh = bh;
652         return ret;
653 }
654
655 static ext4_lblk_t ext4_ext_block_index(struct ext4_extent_header *eh)
656 {
657         if (eh->depth)
658                 return to_le32(EXT_FIRST_INDEX(eh)->first_block);
659
660         return to_le32(EXT_FIRST_EXTENT(eh)->first_block);
661 }
662
663 struct ext_split_trans {
664         ext4_fsblk_t ptr;
665         struct ext4_extent_path path;
666         int switch_to;
667 };
668
669 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
670                                  struct ext4_extent_path *path,
671                                  int32_t at,
672                                  struct ext4_extent *newext,
673                                  ext4_lblk_t insert_index,
674                                  ext4_fsblk_t insert_block,
675                                  struct ext_split_trans *spt,
676                                  bool *need_grow)
677 {
678         struct ext4_extent_index *ix;
679         struct ext4_extent_path *curp = path + at;
680         struct ext4_block bh = EXT4_BLOCK_ZERO();
681         int32_t len;
682         int err;
683         struct ext4_extent_header *eh;
684
685         *need_grow = false;
686
687         if (curp->index && insert_index == to_le32(curp->index->first_block))
688                 return EIO;
689
690         if (to_le16(curp->header->entries_count) ==
691             to_le16(curp->header->max_entries_count)) {
692                 if (at) {
693                         struct ext4_extent_header *neh;
694                         err = ext4_ext_split_node(inode_ref, path, at, newext,
695                                                   &spt->ptr, &bh);
696                         if (err != EOK)
697                                 goto out;
698
699                         neh = ext_block_hdr(&bh);
700                         if (insert_index > to_le32(curp->index->first_block)) {
701                                 /* Make decision which node should be used to
702                                  * insert the index.*/
703                                 if (to_le16(neh->entries_count) >
704                                     to_le16(curp->header->entries_count)) {
705                                         eh = curp->header;
706                                         /* insert after */
707                                         ix = EXT_LAST_INDEX(eh) + 1;
708                                 } else {
709                                         eh = neh;
710                                         ix = EXT_FIRST_INDEX(eh);
711                                 }
712                         } else {
713                                 eh = curp->header;
714                                 /* insert before */
715                                 ix = EXT_LAST_INDEX(eh);
716                         }
717                 } else {
718                         err = EOK;
719                         *need_grow = true;
720                         goto out;
721                 }
722         } else {
723                 eh = curp->header;
724                 if (curp->index == NULL) {
725                         ix = EXT_FIRST_INDEX(eh);
726                         curp->index = ix;
727                 } else if (insert_index > to_le32(curp->index->first_block)) {
728                         /* insert after */
729                         ix = curp->index + 1;
730                 } else {
731                         /* insert before */
732                         ix = curp->index;
733                 }
734         }
735
736         len = EXT_LAST_INDEX(eh) - ix + 1;
737         ext4_assert(len >= 0);
738         if (len > 0)
739                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
740
741         if (ix > EXT_MAX_INDEX(eh)) {
742                 err = EIO;
743                 goto out;
744         }
745
746         ix->first_block = to_le32(insert_index);
747         ext4_idx_store_pblock(ix, insert_block);
748         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
749
750         if (ix > EXT_LAST_INDEX(eh)) {
751                 err = EIO;
752                 goto out;
753         }
754
755         if (eh == curp->header)
756                 err = ext4_ext_dirty(inode_ref, curp);
757         else
758                 err = EOK;
759
760 out:
761         if (err != EOK || *need_grow) {
762                 if (bh.lb_id)
763                         ext4_block_set(inode_ref->fs->bdev, &bh);
764
765                 spt->ptr = 0;
766         } else if (bh.lb_id) {
767                 /* If we got a sibling leaf. */
768                 bh.dirty = true;
769
770                 spt->path.p_block = ext4_idx_pblock(ix);
771                 spt->path.depth = to_le16(eh->depth);
772                 spt->path.maxdepth = 0;
773                 spt->path.extent = NULL;
774                 spt->path.index = ix;
775                 spt->path.header = eh;
776                 spt->path.block = bh;
777
778                 /*
779                  * If newext->ee_block can be included into the
780                  * right sub-tree.
781                  */
782                 if (to_le32(newext->first_block) >=
783                     ext4_ext_block_index(ext_block_hdr(&bh)))
784                         spt->switch_to = 1;
785                 else {
786                         curp->index = ix;
787                         curp->p_block = ext4_idx_pblock(ix);
788                 }
789
790         } else {
791                 spt->ptr = 0;
792                 curp->index = ix;
793                 curp->p_block = ext4_idx_pblock(ix);
794         }
795         return err;
796 }
797
798 /*
799  * ext4_ext_correct_indexes:
800  * if leaf gets modified and modified extent is first in the leaf,
801  * then we have to correct all indexes above.
802  */
803 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
804                                     struct ext4_extent_path *path)
805 {
806         struct ext4_extent_header *eh;
807         int32_t depth = ext_depth(inode_ref->inode);
808         struct ext4_extent *ex;
809         uint32_t border;
810         int32_t k;
811         int err = EOK;
812
813         eh = path[depth].header;
814         ex = path[depth].extent;
815
816         if (ex == NULL || eh == NULL) {
817                 return EIO;
818         }
819
820         if (depth == 0) {
821                 /* there is no tree at all */
822                 return EOK;
823         }
824
825         if (ex != EXT_FIRST_EXTENT(eh)) {
826                 /* we correct tree if first leaf got modified only */
827                 return EOK;
828         }
829
830         /*
831          * TODO: we need correction if border is smaller than current one
832          */
833         k = depth - 1;
834         border = path[depth].extent->first_block;
835         path[k].index->first_block = border;
836         err = ext4_ext_dirty(inode_ref, path + k);
837         if (err != EOK)
838                 return err;
839
840         while (k--) {
841                 /* change all left-side indexes */
842                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
843                         break;
844                 path[k].index->first_block = border;
845                 err = ext4_ext_dirty(inode_ref, path + k);
846                 if (err != EOK)
847                         break;
848         }
849
850         return err;
851 }
852
853 static bool ext4_ext_can_prepend(struct ext4_extent *ex1,
854                                  struct ext4_extent *ex2)
855 {
856         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
857             ext4_ext_pblock(ex1))
858                 return false;
859
860 #ifdef AGGRESSIVE_TEST
861         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
862                 return 0;
863 #else
864         if (ext4_ext_is_unwritten(ex1)) {
865                 if (ext4_ext_get_actual_len(ex1) +
866                         ext4_ext_get_actual_len(ex2) >
867                     EXT_UNWRITTEN_MAX_LEN)
868                         return false;
869         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
870                    EXT_INIT_MAX_LEN)
871                 return false;
872 #endif
873
874         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
875             to_le32(ex1->first_block))
876                 return false;
877
878         return true;
879 }
880
881 static bool ext4_ext_can_append(struct ext4_extent *ex1,
882                                 struct ext4_extent *ex2)
883 {
884         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
885             ext4_ext_pblock(ex2))
886                 return false;
887
888 #ifdef AGGRESSIVE_TEST
889         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
890                 return 0;
891 #else
892         if (ext4_ext_is_unwritten(ex1)) {
893                 if (ext4_ext_get_actual_len(ex1) +
894                         ext4_ext_get_actual_len(ex2) >
895                     EXT_UNWRITTEN_MAX_LEN)
896                         return false;
897         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
898                    EXT_INIT_MAX_LEN)
899                 return false;
900 #endif
901
902         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
903             to_le32(ex2->first_block))
904                 return false;
905
906         return true;
907 }
908
909 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
910                                 struct ext4_extent_path *path,
911                                 int32_t at,
912                                 struct ext4_extent *newext,
913                                 struct ext_split_trans *spt,
914                                 uint32_t flags,
915                                 bool *need_grow)
916 {
917         struct ext4_extent_path *curp = path + at;
918         struct ext4_extent *ex = curp->extent;
919         struct ext4_block bh = EXT4_BLOCK_ZERO();
920         int32_t len;
921         int err = EOK;
922         int unwritten;
923         struct ext4_extent_header *eh = NULL;
924
925         *need_grow = false;
926
927         if (curp->extent &&
928             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
929                 return EIO;
930
931         if (!(flags & EXT4_EXT_NO_COMBINE)) {
932                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
933                         unwritten = ext4_ext_is_unwritten(curp->extent);
934                         curp->extent->block_count =
935                             to_le16(ext4_ext_get_actual_len(curp->extent) +
936                                     ext4_ext_get_actual_len(newext));
937                         if (unwritten)
938                                 ext4_ext_mark_unwritten(curp->extent);
939                         err = ext4_ext_dirty(inode_ref, curp);
940                         goto out;
941                 }
942
943                 if (curp->extent &&
944                     ext4_ext_can_prepend(curp->extent, newext)) {
945                         unwritten = ext4_ext_is_unwritten(curp->extent);
946                         curp->extent->first_block = newext->first_block;
947                         curp->extent->block_count =
948                             to_le16(ext4_ext_get_actual_len(curp->extent) +
949                                     ext4_ext_get_actual_len(newext));
950                         if (unwritten)
951                                 ext4_ext_mark_unwritten(curp->extent);
952                         err = ext4_ext_dirty(inode_ref, curp);
953                         goto out;
954                 }
955         }
956
957         if (to_le16(curp->header->entries_count) ==
958             to_le16(curp->header->max_entries_count)) {
959                 if (at) {
960                         struct ext4_extent_header *neh;
961                         err = ext4_ext_split_node(inode_ref, path, at, newext,
962                                                   &spt->ptr, &bh);
963                         if (err != EOK)
964                                 goto out;
965
966                         neh = ext_block_hdr(&bh);
967                         if (to_le32(newext->first_block) >
968                             to_le32(curp->extent->first_block)) {
969                                 if (to_le16(neh->entries_count) >
970                                     to_le16(curp->header->entries_count)) {
971                                         eh = curp->header;
972                                         /* insert after */
973                                         ex = EXT_LAST_EXTENT(eh) + 1;
974                                 } else {
975                                         eh = neh;
976                                         ex = EXT_FIRST_EXTENT(eh);
977                                 }
978                         } else {
979                                 eh = curp->header;
980                                 /* insert before */
981                                 ex = EXT_LAST_EXTENT(eh);
982                         }
983                 } else {
984                         err = EOK;
985                         *need_grow = true;
986                         goto out;
987                 }
988         } else {
989                 eh = curp->header;
990                 if (curp->extent == NULL) {
991                         ex = EXT_FIRST_EXTENT(eh);
992                         curp->extent = ex;
993                 } else if (to_le32(newext->first_block) >
994                            to_le32(curp->extent->first_block)) {
995                         /* insert after */
996                         ex = curp->extent + 1;
997                 } else {
998                         /* insert before */
999                         ex = curp->extent;
1000                 }
1001         }
1002
1003         len = EXT_LAST_EXTENT(eh) - ex + 1;
1004         ext4_assert(len >= 0);
1005         if (len > 0)
1006                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
1007
1008         if (ex > EXT_MAX_EXTENT(eh)) {
1009                 err = EIO;
1010                 goto out;
1011         }
1012
1013         ex->first_block = newext->first_block;
1014         ex->block_count = newext->block_count;
1015         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1016         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
1017
1018         if (ex > EXT_LAST_EXTENT(eh)) {
1019                 err = EIO;
1020                 goto out;
1021         }
1022
1023         if (eh == curp->header) {
1024                 err = ext4_ext_correct_indexes(inode_ref, path);
1025                 if (err != EOK)
1026                         goto out;
1027                 err = ext4_ext_dirty(inode_ref, curp);
1028         } else
1029                 err = EOK;
1030
1031 out:
1032         if (err != EOK || *need_grow) {
1033                 if (bh.lb_id)
1034                         ext4_block_set(inode_ref->fs->bdev, &bh);
1035
1036                 spt->ptr = 0;
1037         } else if (bh.lb_id) {
1038                 /* If we got a sibling leaf. */
1039                 bh.dirty = true;
1040
1041                 spt->path.p_block = ext4_ext_pblock(ex);
1042                 spt->path.depth = to_le16(eh->depth);
1043                 spt->path.maxdepth = 0;
1044                 spt->path.extent = ex;
1045                 spt->path.index = NULL;
1046                 spt->path.header = eh;
1047                 spt->path.block = bh;
1048
1049                 /*
1050                  * If newext->ee_block can be included into the
1051                  * right sub-tree.
1052                  */
1053                 if (to_le32(newext->first_block) >=
1054                     ext4_ext_block_index(ext_block_hdr(&bh)))
1055                         spt->switch_to = 1;
1056                 else {
1057                         curp->extent = ex;
1058                         curp->p_block = ext4_ext_pblock(ex);
1059                 }
1060
1061         } else {
1062                 spt->ptr = 0;
1063                 curp->extent = ex;
1064                 curp->p_block = ext4_ext_pblock(ex);
1065         }
1066
1067         return err;
1068 }
1069
1070 /*
1071  * ext4_ext_grow_indepth:
1072  * implements tree growing procedure:
1073  * - allocates new block
1074  * - moves top-level data (index block or leaf) into the new block
1075  * - initializes new top-level, creating index that points to the
1076  *   just created block
1077  */
1078 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1079                                  uint32_t flags)
1080 {
1081         struct ext4_extent_header *neh;
1082         struct ext4_block bh = EXT4_BLOCK_ZERO();
1083         ext4_fsblk_t newblock, goal = 0;
1084         int err = EOK;
1085
1086         /* Try to prepend new index to old one */
1087         if (ext_depth(inode_ref->inode))
1088                 goal = ext4_idx_pblock(
1089                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1090         else
1091                 goal = ext4_fs_inode_to_goal_block(inode_ref);
1092
1093         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1094         if (newblock == 0)
1095                 return err;
1096
1097         /* # */
1098         err = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
1099         if (err != EOK) {
1100                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1101                 return err;
1102         }
1103
1104         /* move top-level index/leaf into new block */
1105         memmove(bh.data, inode_ref->inode->blocks,
1106                 sizeof(inode_ref->inode->blocks));
1107
1108         /* set size of new block */
1109         neh = ext_block_hdr(&bh);
1110         /* old root could have indexes or leaves
1111          * so calculate e_max right way */
1112         if (ext_depth(inode_ref->inode))
1113                 neh->max_entries_count =
1114                     to_le16(ext4_ext_space_block_idx(inode_ref));
1115         else
1116                 neh->max_entries_count =
1117                     to_le16(ext4_ext_space_block(inode_ref));
1118
1119         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1120         ext4_extent_block_csum_set(inode_ref, neh);
1121
1122         /* Update top-level index: num,max,pointer */
1123         neh = ext_inode_hdr(inode_ref->inode);
1124         neh->entries_count = to_le16(1);
1125         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1126         if (neh->depth == 0) {
1127                 /* Root extent block becomes index block */
1128                 neh->max_entries_count =
1129                     to_le16(ext4_ext_space_root_idx(inode_ref));
1130                 EXT_FIRST_INDEX(neh)
1131                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1132         }
1133         neh->depth = to_le16(to_le16(neh->depth) + 1);
1134
1135         bh.dirty = true;
1136         inode_ref->dirty = true;
1137         ext4_block_set(inode_ref->fs->bdev, &bh);
1138
1139         return err;
1140 }
1141
1142 __unused static void print_path(struct ext4_extent_path *path)
1143 {
1144         int32_t i = path->depth;
1145         while (i >= 0) {
1146
1147                 ptrdiff_t a =
1148                     (path->extent)
1149                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1150                         : 0;
1151                 ptrdiff_t b =
1152                     (path->index)
1153                         ? (path->index - EXT_FIRST_INDEX(path->header))
1154                         : 0;
1155
1156                 (void)a;
1157                 (void)b;
1158                 ext4_dbg(DEBUG_EXTENT,
1159                          "depth %" PRId32 ", p_block: %" PRIu64 ","
1160                          "p_ext offset: %td, p_idx offset: %td\n",
1161                          i, path->p_block, a, b);
1162                 i--;
1163                 path++;
1164         }
1165 }
1166
1167 static void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1168                                   struct ext4_extent_path *path,
1169                                   struct ext_split_trans *spt,
1170                                   int32_t level)
1171 {
1172         int32_t depth = ext_depth(inode_ref->inode);
1173         int32_t i = depth - level;
1174         ext4_ext_drop_refs(inode_ref, path + i, 1);
1175         path[i] = spt[level].path;
1176 }
1177
1178 static int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1179                                   struct ext4_extent_path **ppath,
1180                                   struct ext4_extent *newext, uint32_t flags)
1181 {
1182         int32_t i, depth, level;
1183         int ret = EOK;
1184         ext4_fsblk_t ptr = 0;
1185         bool need_grow = false;
1186         struct ext4_extent_path *path = *ppath;
1187         struct ext_split_trans *spt = NULL;
1188         struct ext_split_trans newblock;
1189
1190         memset(&newblock, 0, sizeof(newblock));
1191
1192         depth = ext_depth(inode_ref->inode);
1193         for (i = depth, level = 0; i >= 0; i--, level++)
1194                 if (EXT_HAS_FREE_INDEX(path + i))
1195                         break;
1196
1197         if (level) {
1198                 spt = calloc(1, sizeof(struct ext_split_trans) * (level));
1199                 if (!spt) {
1200                         ret = ENOMEM;
1201                         goto out;
1202                 }
1203         }
1204         i = 0;
1205 again:
1206         depth = ext_depth(inode_ref->inode);
1207
1208         do {
1209                 if (!i) {
1210                         ret = ext4_ext_insert_leaf(inode_ref, path, depth - i,
1211                                                    newext, &newblock, flags,
1212                                                    &need_grow);
1213                 } else {
1214                         ret = ext4_ext_insert_index(
1215                             inode_ref, path, depth - i, newext,
1216                             ext4_ext_block_index(
1217                                 ext_block_hdr(&spt[i - 1].path.block)),
1218                             spt[i - 1].ptr, &newblock,
1219                             &need_grow);
1220                 }
1221                 ptr = newblock.ptr;
1222
1223                 if (ret != EOK)
1224                         goto out;
1225
1226                 else if (spt && ptr && !ret) {
1227                         /* Prepare for the next iteration after splitting. */
1228                         spt[i] = newblock;
1229                 }
1230
1231                 i++;
1232         } while (ptr != 0 && i <= depth);
1233
1234         if (need_grow) {
1235                 ret = ext4_ext_grow_indepth(inode_ref, 0);
1236                 if (ret)
1237                         goto out;
1238                 ret = ext4_find_extent(inode_ref, to_le32(newext->first_block),
1239                                        ppath, 0);
1240                 if (ret)
1241                         goto out;
1242                 i = depth;
1243                 path = *ppath;
1244                 goto again;
1245         }
1246 out:
1247         if (ret) {
1248                 if (path)
1249                         ext4_ext_drop_refs(inode_ref, path, 0);
1250
1251                 while (--level >= 0 && spt) {
1252                         if (spt[level].ptr) {
1253                                 ext4_ext_free_blocks(inode_ref, spt[level].ptr,
1254                                                      1, 0);
1255                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1256                                                    1);
1257                         }
1258                 }
1259         } else {
1260                 while (--level >= 0 && spt) {
1261                         if (spt[level].switch_to)
1262                                 ext4_ext_replace_path(inode_ref, path, spt,
1263                                                       level);
1264                         else if (spt[level].ptr)
1265                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1266                                                    1);
1267                 }
1268         }
1269         if (spt)
1270                 free(spt);
1271
1272         return ret;
1273 }
1274
1275 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1276                                    struct ext4_extent *ex, ext4_lblk_t from,
1277                                    ext4_lblk_t to)
1278 {
1279         ext4_lblk_t len = to - from + 1;
1280         ext4_lblk_t num;
1281         ext4_fsblk_t start;
1282         num = from - to_le32(ex->first_block);
1283         start = ext4_ext_pblock(ex) + num;
1284         ext4_dbg(DEBUG_EXTENT,
1285                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1286                  start, len);
1287
1288         ext4_ext_free_blocks(inode_ref, start, len, 0);
1289 }
1290
1291 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1292                                struct ext4_extent_path *path, int32_t depth)
1293 {
1294         int err = EOK;
1295         int32_t i = depth;
1296         ext4_fsblk_t leaf;
1297
1298         /* free index block */
1299         leaf = ext4_idx_pblock(path[i].index);
1300
1301         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1302                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1303                 memmove(path[i].index, path[i].index + 1,
1304                         len * sizeof(struct ext4_extent_index));
1305         }
1306
1307         path[i].header->entries_count =
1308             to_le16(to_le16(path[i].header->entries_count) - 1);
1309         err = ext4_ext_dirty(inode_ref, path + i);
1310         if (err != EOK)
1311                 return err;
1312
1313         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1314                  to_le32(path[i].index->first_block), leaf, 1);
1315         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1316
1317         while (i > 0) {
1318                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1319                         break;
1320
1321                 path[i - 1].index->first_block = path[i].index->first_block;
1322                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1323                 if (err != EOK)
1324                         break;
1325
1326                 i--;
1327         }
1328         return err;
1329 }
1330
1331 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1332                                 struct ext4_extent_path *path, ext4_lblk_t from,
1333                                 ext4_lblk_t to)
1334 {
1335
1336         int32_t depth = ext_depth(inode_ref->inode);
1337         struct ext4_extent *ex = path[depth].extent;
1338         struct ext4_extent *start_ex, *ex2 = NULL;
1339         struct ext4_extent_header *eh = path[depth].header;
1340         int32_t len;
1341         int err = EOK;
1342         uint16_t new_entries;
1343
1344         start_ex = ex;
1345         new_entries = to_le16(eh->entries_count);
1346         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1347                to_le32(ex->first_block) <= to) {
1348                 int32_t new_len = 0;
1349                 int unwritten;
1350                 ext4_lblk_t start, new_start;
1351                 new_start = start = to_le32(ex->first_block);
1352                 len = ext4_ext_get_actual_len(ex);
1353                 if (start < from) {
1354                         len -= from - start;
1355                         new_len = from - start;
1356                         start = from;
1357                         start_ex++;
1358                 }
1359                 /* TODO: More complicated truncate operation. */
1360                 /*if (start + len - 1 > to) {*/
1361                         /*len -= start + len - 1 - to;*/
1362                         /*new_len = start + len - 1 - to;*/
1363                         /*new_start = to + 1;*/
1364                         /*ex2 = ex;*/
1365                 /*}*/
1366
1367                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1368                 ex->first_block = to_le32(new_start);
1369                 if (!new_len)
1370                         new_entries--;
1371                 else {
1372                         unwritten = ext4_ext_is_unwritten(ex);
1373                         ex->block_count = to_le16(new_len);
1374                         if (unwritten)
1375                                 ext4_ext_mark_unwritten(ex);
1376                 }
1377
1378                 ex += 1;
1379         }
1380
1381         if (ex2 == NULL)
1382                 ex2 = ex;
1383
1384         if (ex2 <= EXT_LAST_EXTENT(eh))
1385                 memmove(start_ex, ex2, EXT_LAST_EXTENT(eh) - ex2 + 1);
1386
1387         eh->entries_count = to_le16(new_entries);
1388         ext4_ext_dirty(inode_ref, path + depth);
1389         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count)
1390                 err = ext4_ext_correct_indexes(inode_ref, path);
1391
1392         /* if this leaf is free, then we should
1393          * remove it from index block above */
1394         if (err == EOK && eh->entries_count == 0 && path[depth].block.lb_id)
1395                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1396         else if (depth > 0)
1397                 path[depth - 1].index++;
1398
1399         return err;
1400 }
1401
1402 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1403 {
1404         if (!to_le16(path->header->entries_count))
1405                 return false;
1406
1407         if (path->index > EXT_LAST_INDEX(path->header))
1408                 return false;
1409
1410         if (to_le32(path->index->first_block) > to)
1411                 return false;
1412
1413         return true;
1414 }
1415
1416 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1417                           ext4_lblk_t to)
1418 {
1419         struct ext4_extent_path *path = NULL;
1420         int ret = EOK;
1421         int32_t depth = ext_depth(inode_ref->inode);
1422         int32_t i;
1423
1424         ret = ext4_find_extent(inode_ref, from, &path, 0);
1425         if (ret)
1426                 goto out;
1427
1428         if (!path[depth].extent) {
1429                 ret = EOK;
1430                 goto out;
1431         }
1432
1433         bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1434                         ext4_ext_get_actual_len(path[depth].extent));
1435
1436         if (!in_range) {
1437                 ret = EOK;
1438                 goto out;
1439         }
1440
1441         i = depth;
1442         while (i >= 0) {
1443                 if (i == depth) {
1444                         struct ext4_extent_header *eh;
1445                         struct ext4_extent *first_ex, *last_ex;
1446                         ext4_lblk_t leaf_from, leaf_to;
1447                         eh = path[i].header;
1448                         ext4_assert(to_le16(eh->entries_count) > 0);
1449                         first_ex = EXT_FIRST_EXTENT(eh);
1450                         last_ex = EXT_LAST_EXTENT(eh);
1451                         leaf_from = to_le32(first_ex->first_block);
1452                         leaf_to = to_le32(last_ex->first_block) +
1453                                   ext4_ext_get_actual_len(last_ex) - 1;
1454                         if (leaf_from < from)
1455                                 leaf_from = from;
1456
1457                         if (leaf_to > to)
1458                                 leaf_to = to;
1459
1460                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1461                                         leaf_to);
1462                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1463                         i--;
1464                         continue;
1465                 }
1466
1467                 struct ext4_extent_header *eh;
1468                 eh = path[i].header;
1469                 if (ext4_ext_more_to_rm(path + i, to)) {
1470                         struct ext4_block bh = EXT4_BLOCK_ZERO();
1471                         if (path[i + 1].block.lb_id)
1472                                 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1473
1474                         ret = read_extent_tree_block(inode_ref,
1475                                         ext4_idx_pblock(path[i].index),
1476                                         depth - i - 1, &bh, 0);
1477                         if (ret)
1478                                 goto out;
1479
1480                         path[i].p_block =
1481                                         ext4_idx_pblock(path[i].index);
1482                         path[i + 1].block = bh;
1483                         path[i + 1].header = ext_block_hdr(&bh);
1484                         path[i + 1].depth = depth - i - 1;
1485                         if (i + 1 == depth)
1486                                 path[i + 1].extent = EXT_FIRST_EXTENT(
1487                                         path[i + 1].header);
1488                         else
1489                                 path[i + 1].index =
1490                                         EXT_FIRST_INDEX(path[i + 1].header);
1491
1492                         i++;
1493                 } else {
1494                         if (i > 0) {
1495                                 if (!eh->entries_count)
1496                                         ret = ext4_ext_remove_idx(inode_ref, path,
1497                                                         i - 1);
1498                                 else
1499                                         path[i - 1].index++;
1500
1501                         }
1502
1503                         if (i)
1504                                 ext4_block_set(inode_ref->fs->bdev,
1505                                                 &path[i].block);
1506
1507
1508                         i--;
1509                 }
1510
1511         }
1512
1513         /* TODO: flexible tree reduction should be here */
1514         if (path->header->entries_count == 0) {
1515                 /*
1516                  * truncate to zero freed all the tree,
1517                  * so we need to correct eh_depth
1518                  */
1519                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1520                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1521                     to_le16(ext4_ext_space_root(inode_ref));
1522                 ret = ext4_ext_dirty(inode_ref, path);
1523         }
1524
1525 out:
1526         ext4_ext_drop_refs(inode_ref, path, 0);
1527         free(path);
1528         path = NULL;
1529         return ret;
1530 }
1531
1532 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1533                                     struct ext4_extent_path **ppath,
1534                                     ext4_lblk_t split, uint32_t split_flag)
1535 {
1536         struct ext4_extent *ex, newex;
1537         ext4_fsblk_t newblock;
1538         ext4_lblk_t ee_block;
1539         int32_t ee_len;
1540         int32_t depth = ext_depth(inode_ref->inode);
1541         int err = EOK;
1542
1543         ex = (*ppath)[depth].extent;
1544         ee_block = to_le32(ex->first_block);
1545         ee_len = ext4_ext_get_actual_len(ex);
1546         newblock = split - ee_block + ext4_ext_pblock(ex);
1547
1548         if (split == ee_block) {
1549                 /*
1550                  * case b: block @split is the block that the extent begins with
1551                  * then we just change the state of the extent, and splitting
1552                  * is not needed.
1553                  */
1554                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1555                         ext4_ext_mark_unwritten(ex);
1556                 else
1557                         ext4_ext_mark_initialized(ex);
1558
1559                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1560                 goto out;
1561         }
1562
1563         ex->block_count = to_le16(split - ee_block);
1564         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1565                 ext4_ext_mark_unwritten(ex);
1566
1567         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1568         if (err != EOK)
1569                 goto out;
1570
1571         newex.first_block = to_le32(split);
1572         newex.block_count = to_le16(ee_len - (split - ee_block));
1573         ext4_ext_store_pblock(&newex, newblock);
1574         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1575                 ext4_ext_mark_unwritten(&newex);
1576         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1577                                      EXT4_EXT_NO_COMBINE);
1578         if (err != EOK)
1579                 goto restore_extent_len;
1580
1581 out:
1582         return err;
1583 restore_extent_len:
1584         ex->block_count = to_le16(ee_len);
1585         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1586         return err;
1587 }
1588
1589 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1590                                            struct ext4_extent_path **ppath,
1591                                            ext4_lblk_t split, uint32_t blocks)
1592 {
1593         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1594         struct ext4_extent *ex = (*ppath)[depth].extent;
1595
1596         ext4_assert(to_le32(ex->first_block) <= split);
1597
1598         if (split + blocks ==
1599             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1600                 /* split and initialize right part */
1601                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1602                                                EXT4_EXT_MARK_UNWRIT1);
1603         } else if (to_le32(ex->first_block) == split) {
1604                 /* split and initialize left part */
1605                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1606                                                EXT4_EXT_MARK_UNWRIT2);
1607         } else {
1608                 /* split 1 extent to 3 and initialize the 2nd */
1609                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1610                                                EXT4_EXT_MARK_UNWRIT1 |
1611                                                    EXT4_EXT_MARK_UNWRIT2);
1612                 if (!err) {
1613                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1614                                                        EXT4_EXT_MARK_UNWRIT1);
1615                 }
1616         }
1617
1618         return err;
1619 }
1620
1621 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1622 {
1623         int32_t depth;
1624
1625         depth = path->depth;
1626
1627         if (depth == 0 && path->extent == NULL)
1628                 return EXT_MAX_BLOCKS;
1629
1630         while (depth >= 0) {
1631                 if (depth == path->depth) {
1632                         /* leaf */
1633                         if (path[depth].extent &&
1634                             path[depth].extent !=
1635                                 EXT_LAST_EXTENT(path[depth].header))
1636                                 return to_le32(
1637                                     path[depth].extent[1].first_block);
1638                 } else {
1639                         /* index */
1640                         if (path[depth].index !=
1641                             EXT_LAST_INDEX(path[depth].header))
1642                                 return to_le32(
1643                                     path[depth].index[1].first_block);
1644                 }
1645                 depth--;
1646         }
1647
1648         return EXT_MAX_BLOCKS;
1649 }
1650
1651 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1652                                          ext4_fsblk_t block,
1653                                          uint32_t blocks_count)
1654 {
1655         int err = EOK;
1656         uint32_t i;
1657         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1658         for (i = 0; i < blocks_count; i++) {
1659                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1660                 err = ext4_block_get(inode_ref->fs->bdev, &bh, block + i);
1661                 if (err != EOK)
1662                         break;
1663
1664                 memset(bh.data, 0, block_size);
1665                 bh.dirty = true;
1666                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1667                 if (err != EOK)
1668                         break;
1669         }
1670         return err;
1671 }
1672
1673 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_fsblk_t iblock,
1674                         uint32_t max_blocks, ext4_fsblk_t *result, bool create,
1675                         uint32_t *blocks_count)
1676 {
1677         struct ext4_extent_path *path = NULL;
1678         struct ext4_extent newex, *ex;
1679         ext4_fsblk_t goal;
1680         int err = EOK;
1681         int32_t depth;
1682         uint32_t allocated = 0;
1683         ext4_fsblk_t next, newblock;
1684
1685         if (result)
1686                 *result = 0;
1687
1688         if (blocks_count)
1689                 *blocks_count = 0;
1690
1691         /* find extent for this block */
1692         err = ext4_find_extent(inode_ref, iblock, &path, 0);
1693         if (err != EOK) {
1694                 path = NULL;
1695                 goto out2;
1696         }
1697
1698         depth = ext_depth(inode_ref->inode);
1699
1700         /*
1701          * consistent leaf must not be empty
1702          * this situations is possible, though, _during_ tree modification
1703          * this is why assert can't be put in ext4_ext_find_extent()
1704          */
1705         ex = path[depth].extent;
1706         if (ex) {
1707                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1708                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1709                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1710                 /* if found exent covers block, simple return it */
1711                 if (IN_RANGE(iblock, ee_block, ee_len)) {
1712                         /* number of remain blocks in the extent */
1713                         allocated = ee_len - (iblock - ee_block);
1714
1715                         if (!ext4_ext_is_unwritten(ex)) {
1716                                 newblock = iblock - ee_block + ee_start;
1717                                 goto out;
1718                         }
1719
1720                         if (!create) {
1721                                 newblock = 0;
1722                                 goto out;
1723                         }
1724
1725                         uint32_t zero_range;
1726                         zero_range = allocated;
1727                         if (zero_range > max_blocks)
1728                                 zero_range = max_blocks;
1729
1730                         newblock = iblock - ee_block + ee_start;
1731                         err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
1732                                         zero_range);
1733                         if (err != EOK)
1734                                 goto out2;
1735
1736                         err = ext4_ext_convert_to_initialized(inode_ref, &path,
1737                                         iblock, zero_range);
1738                         if (err != EOK)
1739                                 goto out2;
1740
1741                         goto out;
1742                 }
1743         }
1744
1745         /*
1746          * requested block isn't allocated yet
1747          * we couldn't try to create block if create flag is zero
1748          */
1749         if (!create) {
1750                 goto out2;
1751         }
1752
1753         /* find next allocated block so that we know how many
1754          * blocks we can allocate without ovelapping next extent */
1755         next = ext4_ext_next_allocated_block(path);
1756         allocated = next - iblock;
1757         if (allocated > max_blocks)
1758                 allocated = max_blocks;
1759
1760         /* allocate new block */
1761         goal = ext4_ext_find_goal(inode_ref, path, iblock);
1762         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1763         if (!newblock)
1764                 goto out2;
1765
1766         /* try to insert new extent into found leaf and return */
1767         newex.first_block = to_le32(iblock);
1768         ext4_ext_store_pblock(&newex, newblock);
1769         newex.block_count = to_le16(allocated);
1770         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1771         if (err != EOK) {
1772                 /* free data blocks we just allocated */
1773                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1774                                      to_le16(newex.block_count), 0);
1775                 goto out2;
1776         }
1777
1778         /* previous routine could use block we allocated */
1779         newblock = ext4_ext_pblock(&newex);
1780
1781 out:
1782         if (allocated > max_blocks)
1783                 allocated = max_blocks;
1784
1785         if (result)
1786                 *result = newblock;
1787
1788         if (blocks_count)
1789                 *blocks_count = allocated;
1790
1791 out2:
1792         if (path) {
1793                 ext4_ext_drop_refs(inode_ref, path, 0);
1794                 free(path);
1795         }
1796
1797         return err;
1798 }
1799 #endif