FIX: directory HTree root checksum is not assigned correctly.
[lwext4.git] / lwext4 / ext4_extent_full.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  * - The name of the author may not be used to endorse or promote products
15  *   derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include "ext4_config.h"
30 #include "ext4_blockdev.h"
31 #include "ext4_fs.h"
32 #include "ext4_super.h"
33 #include "ext4_crc32c.h"
34 #include "ext4_balloc.h"
35 #include "ext4_debug.h"
36
37 #include <stdlib.h>
38 #include <string.h>
39 #include <inttypes.h>
40 #include <stddef.h>
41
42 #include "ext4_extent.h"
43
44 #if CONFIG_EXTENT_FULL
45
46 /*
47  * used by extent splitting.
48  */
49 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
50 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
51 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
52 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
53 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
54
55 static struct ext4_extent_tail *
56 find_ext4_extent_tail(struct ext4_extent_header *eh)
57 {
58         return (struct ext4_extent_tail *)(((char *)eh) +
59                                            EXT4_EXTENT_TAIL_OFFSET(eh));
60 }
61
62 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
63 {
64         return (struct ext4_extent_header *)inode->blocks;
65 }
66
67 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
68 {
69         return (struct ext4_extent_header *)block->data;
70 }
71
72 static uint16_t ext_depth(struct ext4_inode *inode)
73 {
74         return to_le16(ext_inode_hdr(inode)->depth);
75 }
76
77 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
78 {
79         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
80                     ? to_le16(ext->block_count)
81                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
82 }
83
84 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
85 {
86         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
87 }
88
89 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
90 {
91         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
92 }
93
94 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
95 {
96         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
97         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
98 }
99
100 /*
101  * ext4_ext_pblock:
102  * combine low and high parts of physical block number into ext4_fsblk_t
103  */
104 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
105 {
106         ext4_fsblk_t block;
107
108         block = to_le32(ex->start_lo);
109         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
110         return block;
111 }
112
113 /*
114  * ext4_idx_pblock:
115  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
116  */
117 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
118 {
119         ext4_fsblk_t block;
120
121         block = to_le32(ix->leaf_lo);
122         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
123         return block;
124 }
125
126 /*
127  * ext4_ext_store_pblock:
128  * stores a large physical block number into an extent struct,
129  * breaking it into parts
130  */
131 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
132 {
133         ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
134         ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
135 }
136
137 /*
138  * ext4_idx_store_pblock:
139  * stores a large physical block number into an index struct,
140  * breaking it into parts
141  */
142 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
143 {
144         ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
145         ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
146 }
147
148 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
149                                       ext4_fsblk_t goal,
150                                       ext4_fsblk_t *blockp)
151 {
152         return ext4_balloc_alloc_block(inode_ref, goal, blockp);
153 }
154
155 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
156                                          ext4_fsblk_t goal,
157                                          uint32_t flags __unused,
158                                          uint32_t *count, int *errp)
159 {
160         ext4_fsblk_t block = 0;
161
162         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
163         if (count)
164                 *count = 1;
165         return block;
166 }
167
168 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
169                                  ext4_fsblk_t block, uint32_t count,
170                                  uint32_t flags __unused)
171 {
172         ext4_balloc_free_blocks(inode_ref, block, count);
173 }
174
175 static size_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
176 {
177         size_t size;
178         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
179
180         size = (block_size - sizeof(struct ext4_extent_header)) /
181                sizeof(struct ext4_extent);
182 #ifdef AGGRESSIVE_TEST
183         if (size > 6)
184                 size = 6;
185 #endif
186         return size;
187 }
188
189 static size_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
190 {
191         size_t size;
192         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
193
194         size = (block_size - sizeof(struct ext4_extent_header)) /
195                sizeof(struct ext4_extent_index);
196 #ifdef AGGRESSIVE_TEST
197         if (size > 5)
198                 size = 5;
199 #endif
200         return size;
201 }
202
203 static size_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
204 {
205         size_t size;
206
207         size = sizeof(inode_ref->inode->blocks);
208         size -= sizeof(struct ext4_extent_header);
209         size /= sizeof(struct ext4_extent);
210 #ifdef AGGRESSIVE_TEST
211         if (size > 3)
212                 size = 3;
213 #endif
214         return size;
215 }
216
217 static size_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
218 {
219         size_t size;
220
221         size = sizeof(inode_ref->inode->blocks);
222         size -= sizeof(struct ext4_extent_header);
223         size /= sizeof(struct ext4_extent_index);
224 #ifdef AGGRESSIVE_TEST
225         if (size > 4)
226                 size = 4;
227 #endif
228         return size;
229 }
230
231 static size_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
232                                    uint32_t depth)
233 {
234         size_t max;
235
236         if (depth == ext_depth(inode_ref->inode)) {
237                 if (depth == 0)
238                         max = ext4_ext_space_root(inode_ref);
239                 else
240                         max = ext4_ext_space_root_idx(inode_ref);
241         } else {
242                 if (depth == 0)
243                         max = ext4_ext_space_block(inode_ref);
244                 else
245                         max = ext4_ext_space_block_idx(inode_ref);
246         }
247
248         return max;
249 }
250
251 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
252                                        struct ext4_extent_path *path,
253                                        ext4_lblk_t block)
254 {
255         if (path) {
256                 uint32_t depth = path->depth;
257                 struct ext4_extent *ex;
258
259                 /*
260                  * Try to predict block placement assuming that we are
261                  * filling in a file which will eventually be
262                  * non-sparse --- i.e., in the case of libbfd writing
263                  * an ELF object sections out-of-order but in a way
264                  * the eventually results in a contiguous object or
265                  * executable file, or some database extending a table
266                  * space file.  However, this is actually somewhat
267                  * non-ideal if we are writing a sparse file such as
268                  * qemu or KVM writing a raw image file that is going
269                  * to stay fairly sparse, since it will end up
270                  * fragmenting the file system's free space.  Maybe we
271                  * should have some hueristics or some way to allow
272                  * userspace to pass a hint to file system,
273                  * especially if the latter case turns out to be
274                  * common.
275                  */
276                 ex = path[depth].extent;
277                 if (ex) {
278                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
279                         ext4_lblk_t ext_block = to_le32(ex->first_block);
280
281                         if (block > ext_block)
282                                 return ext_pblk + (block - ext_block);
283                         else
284                                 return ext_pblk - (ext_block - block);
285                 }
286
287                 /* it looks like index is empty;
288                  * try to find starting block from index itself */
289                 if (path[depth].block.lb_id)
290                         return path[depth].block.lb_id;
291         }
292
293         /* OK. use inode's group */
294         return ext4_fs_inode_to_goal_block(inode_ref);
295 }
296
297 /*
298  * Allocation for a meta data block
299  */
300 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
301                                             struct ext4_extent_path *path,
302                                             struct ext4_extent *ex, int *err,
303                                             uint32_t flags)
304 {
305         ext4_fsblk_t goal, newblock;
306
307         goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
308         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
309         return newblock;
310 }
311
312 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref,
313                                     struct ext4_extent_header *eh)
314 {
315         uint32_t checksum = 0;
316         struct ext4_sblock *sb = &inode_ref->fs->sb;
317
318         if (ext4_sb_has_feature_read_only(sb,
319                                 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
320                 uint32_t ino_index = to_le32(inode_ref->index);
321                 uint32_t ino_gen =
322                         to_le32(ext4_inode_get_generation(inode_ref->inode));
323                 /* First calculate crc32 checksum against fs uuid */
324                 checksum = ext4_crc32c(~0, sb->uuid, sizeof(sb->uuid));
325                 /* Then calculate crc32 checksum against inode number
326                  * and inode generation */
327                 checksum = ext4_crc32c(checksum, &ino_index,
328                                      sizeof(ino_index));
329                 checksum = ext4_crc32c(checksum, &ino_gen,
330                                      sizeof(ino_gen));
331                 /* Finally calculate crc32 checksum against 
332                  * the entire extent block up to the checksum field */
333                 checksum = ext4_crc32c(checksum, eh,
334                                 EXT4_EXTENT_TAIL_OFFSET(eh));
335         }
336         return checksum;
337 }
338
339 static void ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref,
340                                        struct ext4_extent_header *eh)
341 {
342         struct ext4_extent_tail *tail;
343
344         tail = find_ext4_extent_tail(eh);
345         tail->et_checksum = to_le32(ext4_ext_block_csum(inode_ref, eh));
346 }
347
348 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
349                           struct ext4_extent_path *path)
350 {
351         if (path->block.lb_id)
352                 path->block.dirty = true;
353         else
354                 inode_ref->dirty = true;
355
356         return EOK;
357 }
358
359 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
360                                struct ext4_extent_path *path, bool keep_other)
361 {
362         int32_t depth, i;
363
364         if (!path)
365                 return;
366         if (keep_other)
367                 depth = 0;
368         else
369                 depth = path->depth;
370
371         for (i = 0; i <= depth; i++, path++) {
372                 if (path->block.lb_id) {
373                         if (path->block.dirty)
374                                 ext4_extent_block_csum_set(inode_ref,
375                                                 path->header);
376
377                         ext4_block_set(inode_ref->fs->bdev, &path->block);
378                 }
379         }
380 }
381
382 /*
383  * Check that whether the basic information inside the extent header
384  * is correct or not.
385  */
386 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
387                           struct ext4_extent_header *eh, uint16_t depth,
388                           ext4_fsblk_t pblk __unused)
389 {
390         struct ext4_extent_tail *tail;
391         const char *error_msg;
392         (void)error_msg;
393
394         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
395                 error_msg = "invalid magic";
396                 goto corrupted;
397         }
398         if (to_le16(eh->depth) != depth) {
399                 error_msg = "unexpected eh_depth";
400                 goto corrupted;
401         }
402         if (eh->max_entries_count == 0) {
403                 error_msg = "invalid eh_max";
404                 goto corrupted;
405         }
406         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
407                 error_msg = "invalid eh_entries";
408                 goto corrupted;
409         }
410
411         tail = find_ext4_extent_tail(eh);
412         if (tail->et_checksum != to_le32(ext4_ext_block_csum(inode_ref, eh))) {
413                 /* FIXME: Warning: extent checksum damaged? */
414         }
415
416         return EOK;
417
418 corrupted:
419         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
420                                "Blocknr: %" PRId64 "\n",
421                  error_msg, pblk);
422         return EIO;
423 }
424
425 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
426                                   ext4_fsblk_t pblk, int32_t depth,
427                                   struct ext4_block *bh,
428                                   uint32_t flags __unused)
429 {
430         int err;
431
432         err = ext4_block_get(inode_ref->fs->bdev, bh, pblk);
433         if (err != EOK)
434                 goto errout;
435
436         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
437         if (err != EOK)
438                 goto errout;
439
440         return EOK;
441 errout:
442         if (bh->lb_id)
443                 ext4_block_set(inode_ref->fs->bdev, bh);
444
445         return err;
446 }
447
448 /*
449  * ext4_ext_binsearch_idx:
450  * binary search for the closest index of the given block
451  * the header must be checked before calling this
452  */
453 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
454                                    ext4_lblk_t block)
455 {
456         struct ext4_extent_header *eh = path->header;
457         struct ext4_extent_index *r, *l, *m;
458
459         l = EXT_FIRST_INDEX(eh) + 1;
460         r = EXT_LAST_INDEX(eh);
461         while (l <= r) {
462                 m = l + (r - l) / 2;
463                 if (block < to_le32(m->first_block))
464                         r = m - 1;
465                 else
466                         l = m + 1;
467         }
468
469         path->index = l - 1;
470 }
471
472 /*
473  * ext4_ext_binsearch:
474  * binary search for closest extent of the given block
475  * the header must be checked before calling this
476  */
477 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
478 {
479         struct ext4_extent_header *eh = path->header;
480         struct ext4_extent *r, *l, *m;
481
482         if (eh->entries_count == 0) {
483                 /*
484                  * this leaf is empty:
485                  * we get such a leaf in split/add case
486                  */
487                 return;
488         }
489
490         l = EXT_FIRST_EXTENT(eh) + 1;
491         r = EXT_LAST_EXTENT(eh);
492
493         while (l <= r) {
494                 m = l + (r - l) / 2;
495                 if (block < to_le32(m->first_block))
496                         r = m - 1;
497                 else
498                         l = m + 1;
499         }
500
501         path->extent = l - 1;
502 }
503
504 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
505                             struct ext4_extent_path **orig_path, uint32_t flags)
506 {
507         struct ext4_extent_header *eh;
508         struct ext4_block bh = EXT4_BLOCK_ZERO();
509         ext4_fsblk_t buf_block = 0;
510         struct ext4_extent_path *path = *orig_path;
511         int32_t depth, ppos = 0;
512         int32_t i;
513         int ret;
514
515         eh = ext_inode_hdr(inode_ref->inode);
516         depth = ext_depth(inode_ref->inode);
517
518         if (path) {
519                 ext4_ext_drop_refs(inode_ref, path, 0);
520                 if (depth > path[0].maxdepth) {
521                         free(path);
522                         *orig_path = path = NULL;
523                 }
524         }
525         if (!path) {
526                 int32_t path_depth = depth + 1;
527                 /* account possible depth increase */
528                 path = calloc(1, sizeof(struct ext4_extent_path) *
529                                      (path_depth + 1));
530                 if (!path)
531                         return ENOMEM;
532                 path[0].maxdepth = path_depth;
533         }
534         path[0].header = eh;
535         path[0].block = bh;
536
537         i = depth;
538         /* walk through the tree */
539         while (i) {
540                 ext4_ext_binsearch_idx(path + ppos, block);
541                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
542                 path[ppos].depth = i;
543                 path[ppos].extent = NULL;
544                 buf_block = path[ppos].p_block;
545
546                 i--;
547                 ppos++;
548                 if (!path[ppos].block.lb_id ||
549                     path[ppos].block.lb_id != buf_block) {
550                         ret = read_extent_tree_block(inode_ref, buf_block, i,
551                                                      &bh, flags);
552                         if (ret != EOK) {
553                                 goto err;
554                         }
555                         if (ppos > depth) {
556                                 ext4_block_set(inode_ref->fs->bdev, &bh);
557                                 ret = EIO;
558                                 goto err;
559                         }
560
561                         eh = ext_block_hdr(&bh);
562                         path[ppos].block = bh;
563                         path[ppos].header = eh;
564                 }
565         }
566
567         path[ppos].depth = i;
568         path[ppos].extent = NULL;
569         path[ppos].index = NULL;
570
571         /* find extent */
572         ext4_ext_binsearch(path + ppos, block);
573         /* if not an empty leaf */
574         if (path[ppos].extent)
575                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
576
577         *orig_path = path;
578
579         ret = EOK;
580         return ret;
581
582 err:
583         ext4_ext_drop_refs(inode_ref, path, 0);
584         free(path);
585         if (orig_path)
586                 *orig_path = NULL;
587         return ret;
588 }
589
590 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
591                                  struct ext4_extent_header *eh, int32_t depth)
592 {
593         eh->entries_count = 0;
594         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
595         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
596         eh->depth = depth;
597 }
598
599 /*
600  * Be cautious, the buffer_head returned is not yet mark dirtied. */
601 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
602                                struct ext4_extent_path *path, int32_t at,
603                                struct ext4_extent *newext,
604                                ext4_fsblk_t *sibling, struct ext4_block *new_bh)
605 {
606         int ret;
607         ext4_fsblk_t newblock;
608         struct ext4_block bh = EXT4_BLOCK_ZERO();
609         int32_t depth = ext_depth(inode_ref->inode);
610
611         ext4_assert(sibling);
612
613         /* FIXME: currently we split at the point after the current extent. */
614         newblock = ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
615         if (ret)
616                 goto cleanup;
617
618         /*  For write access.# */
619         ret = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
620         if (ret != EOK)
621                 goto cleanup;
622
623         if (at == depth) {
624                 /* start copy from next extent */
625                 ptrdiff_t m = EXT_MAX_EXTENT(path[at].header) - path[at].extent;
626                 struct ext4_extent_header *neh;
627                 neh = ext_block_hdr(&bh);
628                 ext4_ext_init_header(inode_ref, neh, 0);
629                 if (m) {
630                         struct ext4_extent *ex;
631                         ex = EXT_FIRST_EXTENT(neh);
632                         memmove(ex, path[at].extent + 1,
633                                 sizeof(struct ext4_extent) * m);
634                         neh->entries_count =
635                             to_le16(to_le16(neh->entries_count) + m);
636                         path[at].header->entries_count = to_le16(
637                             to_le16(path[at].header->entries_count) - m);
638                         ret = ext4_ext_dirty(inode_ref, path + at);
639                         if (ret)
640                                 goto cleanup;
641                 }
642         } else {
643                 ptrdiff_t m = EXT_MAX_INDEX(path[at].header) - path[at].index;
644                 struct ext4_extent_header *neh;
645                 neh = ext_block_hdr(&bh);
646                 ext4_ext_init_header(inode_ref, neh, depth - at);
647                 if (m) {
648                         struct ext4_extent_index *ix;
649                         ix = EXT_FIRST_INDEX(neh);
650                         memmove(ix, path[at].index + 1,
651                                 sizeof(struct ext4_extent) * m);
652                         neh->entries_count =
653                             to_le16(to_le16(neh->entries_count) + m);
654                         path[at].header->entries_count = to_le16(
655                             to_le16(path[at].header->entries_count) - m);
656                         ret = ext4_ext_dirty(inode_ref, path + at);
657                         if (ret)
658                                 goto cleanup;
659                 }
660         }
661 cleanup:
662         if (ret) {
663                 if (bh.lb_id) {
664                         ext4_block_set(inode_ref->fs->bdev, &bh);
665                 }
666                 if (newblock)
667                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
668
669                 newblock = 0;
670         }
671         *sibling = newblock;
672         *new_bh = bh;
673         return ret;
674 }
675
676 static ext4_lblk_t ext4_ext_block_index(struct ext4_extent_header *eh)
677 {
678         if (eh->depth)
679                 return to_le32(EXT_FIRST_INDEX(eh)->first_block);
680
681         return to_le32(EXT_FIRST_EXTENT(eh)->first_block);
682 }
683
684 struct ext_split_trans {
685         ext4_fsblk_t ptr;
686         struct ext4_extent_path path;
687         int switch_to;
688 };
689
690 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
691                                  struct ext4_extent_path *path,
692                                  int32_t at,
693                                  struct ext4_extent *newext,
694                                  ext4_lblk_t insert_index,
695                                  ext4_fsblk_t insert_block,
696                                  struct ext_split_trans *spt,
697                                  bool *need_grow)
698 {
699         struct ext4_extent_index *ix;
700         struct ext4_extent_path *curp = path + at;
701         struct ext4_block bh = EXT4_BLOCK_ZERO();
702         int32_t len;
703         int err;
704         struct ext4_extent_header *eh;
705
706         *need_grow = false;
707
708         if (curp->index && insert_index == to_le32(curp->index->first_block))
709                 return EIO;
710
711         if (to_le16(curp->header->entries_count) ==
712             to_le16(curp->header->max_entries_count)) {
713                 if (at) {
714                         struct ext4_extent_header *neh;
715                         err = ext4_ext_split_node(inode_ref, path, at, newext,
716                                                   &spt->ptr, &bh);
717                         if (err != EOK)
718                                 goto out;
719
720                         neh = ext_block_hdr(&bh);
721                         if (insert_index > to_le32(curp->index->first_block)) {
722                                 /* Make decision which node should be used to
723                                  * insert the index.*/
724                                 if (to_le16(neh->entries_count) >
725                                     to_le16(curp->header->entries_count)) {
726                                         eh = curp->header;
727                                         /* insert after */
728                                         ix = EXT_LAST_INDEX(eh) + 1;
729                                 } else {
730                                         eh = neh;
731                                         ix = EXT_FIRST_INDEX(eh);
732                                 }
733                         } else {
734                                 eh = curp->header;
735                                 /* insert before */
736                                 ix = EXT_LAST_INDEX(eh);
737                         }
738                 } else {
739                         err = EOK;
740                         *need_grow = true;
741                         goto out;
742                 }
743         } else {
744                 eh = curp->header;
745                 if (curp->index == NULL) {
746                         ix = EXT_FIRST_INDEX(eh);
747                         curp->index = ix;
748                 } else if (insert_index > to_le32(curp->index->first_block)) {
749                         /* insert after */
750                         ix = curp->index + 1;
751                 } else {
752                         /* insert before */
753                         ix = curp->index;
754                 }
755         }
756
757         len = EXT_LAST_INDEX(eh) - ix + 1;
758         ext4_assert(len >= 0);
759         if (len > 0)
760                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
761
762         if (ix > EXT_MAX_INDEX(eh)) {
763                 err = EIO;
764                 goto out;
765         }
766
767         ix->first_block = to_le32(insert_index);
768         ext4_idx_store_pblock(ix, insert_block);
769         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
770
771         if (ix > EXT_LAST_INDEX(eh)) {
772                 err = EIO;
773                 goto out;
774         }
775
776         if (eh == curp->header)
777                 err = ext4_ext_dirty(inode_ref, curp);
778         else
779                 err = EOK;
780
781 out:
782         if (err != EOK || *need_grow) {
783                 if (bh.lb_id)
784                         ext4_block_set(inode_ref->fs->bdev, &bh);
785
786                 spt->ptr = 0;
787         } else if (bh.lb_id) {
788                 /* If we got a sibling leaf. */
789                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
790                 bh.dirty = true;
791
792                 spt->path.p_block = ext4_idx_pblock(ix);
793                 spt->path.depth = to_le16(eh->depth);
794                 spt->path.maxdepth = 0;
795                 spt->path.extent = NULL;
796                 spt->path.index = ix;
797                 spt->path.header = eh;
798                 spt->path.block = bh;
799
800                 /*
801                  * If newext->ee_block can be included into the
802                  * right sub-tree.
803                  */
804                 if (to_le32(newext->first_block) >=
805                     ext4_ext_block_index(ext_block_hdr(&bh)))
806                         spt->switch_to = 1;
807                 else {
808                         curp->index = ix;
809                         curp->p_block = ext4_idx_pblock(ix);
810                 }
811
812         } else {
813                 spt->ptr = 0;
814                 curp->index = ix;
815                 curp->p_block = ext4_idx_pblock(ix);
816         }
817         return err;
818 }
819
820 /*
821  * ext4_ext_correct_indexes:
822  * if leaf gets modified and modified extent is first in the leaf,
823  * then we have to correct all indexes above.
824  */
825 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
826                                     struct ext4_extent_path *path)
827 {
828         struct ext4_extent_header *eh;
829         int32_t depth = ext_depth(inode_ref->inode);
830         struct ext4_extent *ex;
831         uint32_t border;
832         int32_t k;
833         int err = EOK;
834
835         eh = path[depth].header;
836         ex = path[depth].extent;
837
838         if (ex == NULL || eh == NULL) {
839                 return EIO;
840         }
841
842         if (depth == 0) {
843                 /* there is no tree at all */
844                 return EOK;
845         }
846
847         if (ex != EXT_FIRST_EXTENT(eh)) {
848                 /* we correct tree if first leaf got modified only */
849                 return EOK;
850         }
851
852         /*
853          * TODO: we need correction if border is smaller than current one
854          */
855         k = depth - 1;
856         border = path[depth].extent->first_block;
857         path[k].index->first_block = border;
858         err = ext4_ext_dirty(inode_ref, path + k);
859         if (err != EOK)
860                 return err;
861
862         while (k--) {
863                 /* change all left-side indexes */
864                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
865                         break;
866                 path[k].index->first_block = border;
867                 err = ext4_ext_dirty(inode_ref, path + k);
868                 if (err != EOK)
869                         break;
870         }
871
872         return err;
873 }
874
875 static bool ext4_ext_can_prepend(struct ext4_extent *ex1,
876                                  struct ext4_extent *ex2)
877 {
878         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
879             ext4_ext_pblock(ex1))
880                 return false;
881
882 #ifdef AGGRESSIVE_TEST
883         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
884                 return 0;
885 #else
886         if (ext4_ext_is_unwritten(ex1)) {
887                 if (ext4_ext_get_actual_len(ex1) +
888                         ext4_ext_get_actual_len(ex2) >
889                     EXT_UNWRITTEN_MAX_LEN)
890                         return false;
891         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
892                    EXT_INIT_MAX_LEN)
893                 return false;
894 #endif
895
896         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
897             to_le32(ex1->first_block))
898                 return false;
899
900         return true;
901 }
902
903 static bool ext4_ext_can_append(struct ext4_extent *ex1,
904                                 struct ext4_extent *ex2)
905 {
906         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
907             ext4_ext_pblock(ex2))
908                 return false;
909
910 #ifdef AGGRESSIVE_TEST
911         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
912                 return 0;
913 #else
914         if (ext4_ext_is_unwritten(ex1)) {
915                 if (ext4_ext_get_actual_len(ex1) +
916                         ext4_ext_get_actual_len(ex2) >
917                     EXT_UNWRITTEN_MAX_LEN)
918                         return false;
919         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
920                    EXT_INIT_MAX_LEN)
921                 return false;
922 #endif
923
924         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
925             to_le32(ex2->first_block))
926                 return false;
927
928         return true;
929 }
930
931 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
932                                 struct ext4_extent_path *path,
933                                 int32_t at,
934                                 struct ext4_extent *newext,
935                                 struct ext_split_trans *spt,
936                                 uint32_t flags,
937                                 bool *need_grow)
938 {
939         struct ext4_extent_path *curp = path + at;
940         struct ext4_extent *ex = curp->extent;
941         struct ext4_block bh = EXT4_BLOCK_ZERO();
942         int32_t len;
943         int err = EOK;
944         int unwritten;
945         struct ext4_extent_header *eh = NULL;
946
947         *need_grow = false;
948
949         if (curp->extent &&
950             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
951                 return EIO;
952
953         if (!(flags & EXT4_EXT_NO_COMBINE)) {
954                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
955                         unwritten = ext4_ext_is_unwritten(curp->extent);
956                         curp->extent->block_count =
957                             to_le16(ext4_ext_get_actual_len(curp->extent) +
958                                     ext4_ext_get_actual_len(newext));
959                         if (unwritten)
960                                 ext4_ext_mark_unwritten(curp->extent);
961                         err = ext4_ext_dirty(inode_ref, curp);
962                         goto out;
963                 }
964
965                 if (curp->extent &&
966                     ext4_ext_can_prepend(curp->extent, newext)) {
967                         unwritten = ext4_ext_is_unwritten(curp->extent);
968                         curp->extent->first_block = newext->first_block;
969                         curp->extent->block_count =
970                             to_le16(ext4_ext_get_actual_len(curp->extent) +
971                                     ext4_ext_get_actual_len(newext));
972                         if (unwritten)
973                                 ext4_ext_mark_unwritten(curp->extent);
974                         err = ext4_ext_dirty(inode_ref, curp);
975                         goto out;
976                 }
977         }
978
979         if (to_le16(curp->header->entries_count) ==
980             to_le16(curp->header->max_entries_count)) {
981                 if (at) {
982                         struct ext4_extent_header *neh;
983                         err = ext4_ext_split_node(inode_ref, path, at, newext,
984                                                   &spt->ptr, &bh);
985                         if (err != EOK)
986                                 goto out;
987
988                         neh = ext_block_hdr(&bh);
989                         if (to_le32(newext->first_block) >
990                             to_le32(curp->extent->first_block)) {
991                                 if (to_le16(neh->entries_count) >
992                                     to_le16(curp->header->entries_count)) {
993                                         eh = curp->header;
994                                         /* insert after */
995                                         ex = EXT_LAST_EXTENT(eh) + 1;
996                                 } else {
997                                         eh = neh;
998                                         ex = EXT_FIRST_EXTENT(eh);
999                                 }
1000                         } else {
1001                                 eh = curp->header;
1002                                 /* insert before */
1003                                 ex = EXT_LAST_EXTENT(eh);
1004                         }
1005                 } else {
1006                         err = EOK;
1007                         *need_grow = true;
1008                         goto out;
1009                 }
1010         } else {
1011                 eh = curp->header;
1012                 if (curp->extent == NULL) {
1013                         ex = EXT_FIRST_EXTENT(eh);
1014                         curp->extent = ex;
1015                 } else if (to_le32(newext->first_block) >
1016                            to_le32(curp->extent->first_block)) {
1017                         /* insert after */
1018                         ex = curp->extent + 1;
1019                 } else {
1020                         /* insert before */
1021                         ex = curp->extent;
1022                 }
1023         }
1024
1025         len = EXT_LAST_EXTENT(eh) - ex + 1;
1026         ext4_assert(len >= 0);
1027         if (len > 0)
1028                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
1029
1030         if (ex > EXT_MAX_EXTENT(eh)) {
1031                 err = EIO;
1032                 goto out;
1033         }
1034
1035         ex->first_block = newext->first_block;
1036         ex->block_count = newext->block_count;
1037         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1038         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
1039
1040         if (ex > EXT_LAST_EXTENT(eh)) {
1041                 err = EIO;
1042                 goto out;
1043         }
1044
1045         if (eh == curp->header) {
1046                 err = ext4_ext_correct_indexes(inode_ref, path);
1047                 if (err != EOK)
1048                         goto out;
1049                 err = ext4_ext_dirty(inode_ref, curp);
1050         } else
1051                 err = EOK;
1052
1053 out:
1054         if (err != EOK || *need_grow) {
1055                 if (bh.lb_id)
1056                         ext4_block_set(inode_ref->fs->bdev, &bh);
1057
1058                 spt->ptr = 0;
1059         } else if (bh.lb_id) {
1060                 /* If we got a sibling leaf. */
1061                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
1062                 bh.dirty = true;
1063
1064                 spt->path.p_block = ext4_ext_pblock(ex);
1065                 spt->path.depth = to_le16(eh->depth);
1066                 spt->path.maxdepth = 0;
1067                 spt->path.extent = ex;
1068                 spt->path.index = NULL;
1069                 spt->path.header = eh;
1070                 spt->path.block = bh;
1071
1072                 /*
1073                  * If newext->ee_block can be included into the
1074                  * right sub-tree.
1075                  */
1076                 if (to_le32(newext->first_block) >=
1077                     ext4_ext_block_index(ext_block_hdr(&bh)))
1078                         spt->switch_to = 1;
1079                 else {
1080                         curp->extent = ex;
1081                         curp->p_block = ext4_ext_pblock(ex);
1082                 }
1083
1084         } else {
1085                 spt->ptr = 0;
1086                 curp->extent = ex;
1087                 curp->p_block = ext4_ext_pblock(ex);
1088         }
1089
1090         return err;
1091 }
1092
1093 /*
1094  * ext4_ext_grow_indepth:
1095  * implements tree growing procedure:
1096  * - allocates new block
1097  * - moves top-level data (index block or leaf) into the new block
1098  * - initializes new top-level, creating index that points to the
1099  *   just created block
1100  */
1101 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1102                                  uint32_t flags)
1103 {
1104         struct ext4_extent_header *neh;
1105         struct ext4_block bh = EXT4_BLOCK_ZERO();
1106         ext4_fsblk_t newblock, goal = 0;
1107         int err = EOK;
1108
1109         /* Try to prepend new index to old one */
1110         if (ext_depth(inode_ref->inode))
1111                 goal = ext4_idx_pblock(
1112                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1113         else
1114                 goal = ext4_fs_inode_to_goal_block(inode_ref);
1115
1116         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1117         if (newblock == 0)
1118                 return err;
1119
1120         /* # */
1121         err = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
1122         if (err != EOK) {
1123                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1124                 return err;
1125         }
1126
1127         /* move top-level index/leaf into new block */
1128         memmove(bh.data, inode_ref->inode->blocks,
1129                 sizeof(inode_ref->inode->blocks));
1130
1131         /* set size of new block */
1132         neh = ext_block_hdr(&bh);
1133         /* old root could have indexes or leaves
1134          * so calculate e_max right way */
1135         if (ext_depth(inode_ref->inode))
1136                 neh->max_entries_count =
1137                     to_le16(ext4_ext_space_block_idx(inode_ref));
1138         else
1139                 neh->max_entries_count =
1140                     to_le16(ext4_ext_space_block(inode_ref));
1141
1142         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1143
1144         /* Update top-level index: num,max,pointer */
1145         neh = ext_inode_hdr(inode_ref->inode);
1146         neh->entries_count = to_le16(1);
1147         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1148         if (neh->depth == 0) {
1149                 /* Root extent block becomes index block */
1150                 neh->max_entries_count =
1151                     to_le16(ext4_ext_space_root_idx(inode_ref));
1152                 EXT_FIRST_INDEX(neh)
1153                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1154         }
1155         neh->depth = to_le16(to_le16(neh->depth) + 1);
1156
1157         ext4_extent_block_csum_set(inode_ref, neh);
1158         bh.dirty = true;
1159         inode_ref->dirty = true;
1160         ext4_block_set(inode_ref->fs->bdev, &bh);
1161
1162         return err;
1163 }
1164
1165 __unused static void print_path(struct ext4_extent_path *path)
1166 {
1167         int32_t i = path->depth;
1168         while (i >= 0) {
1169
1170                 ptrdiff_t a =
1171                     (path->extent)
1172                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1173                         : 0;
1174                 ptrdiff_t b =
1175                     (path->index)
1176                         ? (path->index - EXT_FIRST_INDEX(path->header))
1177                         : 0;
1178
1179                 (void)a;
1180                 (void)b;
1181                 ext4_dbg(DEBUG_EXTENT,
1182                          "depth %" PRId32 ", p_block: %" PRIu64 ","
1183                          "p_ext offset: %td, p_idx offset: %td\n",
1184                          i, path->p_block, a, b);
1185                 i--;
1186                 path++;
1187         }
1188 }
1189
1190 static void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1191                                   struct ext4_extent_path *path,
1192                                   struct ext_split_trans *spt,
1193                                   int32_t level)
1194 {
1195         int32_t depth = ext_depth(inode_ref->inode);
1196         int32_t i = depth - level;
1197         ext4_ext_drop_refs(inode_ref, path + i, 1);
1198         path[i] = spt[level].path;
1199 }
1200
1201 static int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1202                                   struct ext4_extent_path **ppath,
1203                                   struct ext4_extent *newext, uint32_t flags)
1204 {
1205         int32_t i, depth, level;
1206         int ret = EOK;
1207         ext4_fsblk_t ptr = 0;
1208         bool need_grow = false;
1209         struct ext4_extent_path *path = *ppath;
1210         struct ext_split_trans *spt = NULL;
1211         struct ext_split_trans newblock;
1212
1213         memset(&newblock, 0, sizeof(newblock));
1214
1215         depth = ext_depth(inode_ref->inode);
1216         for (i = depth, level = 0; i >= 0; i--, level++)
1217                 if (EXT_HAS_FREE_INDEX(path + i))
1218                         break;
1219
1220         if (level) {
1221                 spt = calloc(1, sizeof(struct ext_split_trans) * (level));
1222                 if (!spt) {
1223                         ret = ENOMEM;
1224                         goto out;
1225                 }
1226         }
1227         i = 0;
1228 again:
1229         depth = ext_depth(inode_ref->inode);
1230
1231         do {
1232                 if (!i) {
1233                         ret = ext4_ext_insert_leaf(inode_ref, path, depth - i,
1234                                                    newext, &newblock, flags,
1235                                                    &need_grow);
1236                 } else {
1237                         ret = ext4_ext_insert_index(
1238                             inode_ref, path, depth - i, newext,
1239                             ext4_ext_block_index(
1240                                 ext_block_hdr(&spt[i - 1].path.block)),
1241                             spt[i - 1].ptr, &newblock,
1242                             &need_grow);
1243                 }
1244                 ptr = newblock.ptr;
1245
1246                 if (ret != EOK)
1247                         goto out;
1248
1249                 else if (spt && ptr && !ret) {
1250                         /* Prepare for the next iteration after splitting. */
1251                         spt[i] = newblock;
1252                 }
1253
1254                 i++;
1255         } while (ptr != 0 && i <= depth);
1256
1257         if (need_grow) {
1258                 ret = ext4_ext_grow_indepth(inode_ref, 0);
1259                 if (ret)
1260                         goto out;
1261                 ret = ext4_find_extent(inode_ref, to_le32(newext->first_block),
1262                                        ppath, 0);
1263                 if (ret)
1264                         goto out;
1265                 i = depth;
1266                 path = *ppath;
1267                 goto again;
1268         }
1269 out:
1270         if (ret) {
1271                 if (path)
1272                         ext4_ext_drop_refs(inode_ref, path, 0);
1273
1274                 while (--level >= 0 && spt) {
1275                         if (spt[level].ptr) {
1276                                 ext4_ext_free_blocks(inode_ref, spt[level].ptr,
1277                                                      1, 0);
1278                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1279                                                    1);
1280                         }
1281                 }
1282         } else {
1283                 while (--level >= 0 && spt) {
1284                         if (spt[level].switch_to)
1285                                 ext4_ext_replace_path(inode_ref, path, spt,
1286                                                       level);
1287                         else if (spt[level].ptr)
1288                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1289                                                    1);
1290                 }
1291         }
1292         if (spt)
1293                 free(spt);
1294
1295         return ret;
1296 }
1297
1298 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1299                                    struct ext4_extent *ex, ext4_lblk_t from,
1300                                    ext4_lblk_t to)
1301 {
1302         ext4_lblk_t len = to - from + 1;
1303         ext4_lblk_t num;
1304         ext4_fsblk_t start;
1305         num = from - to_le32(ex->first_block);
1306         start = ext4_ext_pblock(ex) + num;
1307         ext4_dbg(DEBUG_EXTENT,
1308                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1309                  start, len);
1310
1311         ext4_ext_free_blocks(inode_ref, start, len, 0);
1312 }
1313
1314 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1315                                struct ext4_extent_path *path, int32_t depth)
1316 {
1317         int err = EOK;
1318         int32_t i = depth;
1319         ext4_fsblk_t leaf;
1320
1321         /* free index block */
1322         leaf = ext4_idx_pblock(path[i].index);
1323
1324         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1325                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1326                 memmove(path[i].index, path[i].index + 1,
1327                         len * sizeof(struct ext4_extent_index));
1328         }
1329
1330         path[i].header->entries_count =
1331             to_le16(to_le16(path[i].header->entries_count) - 1);
1332         err = ext4_ext_dirty(inode_ref, path + i);
1333         if (err != EOK)
1334                 return err;
1335
1336         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1337                  to_le32(path[i].index->first_block), leaf, 1);
1338         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1339
1340         while (i > 0) {
1341                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1342                         break;
1343
1344                 path[i - 1].index->first_block = path[i].index->first_block;
1345                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1346                 if (err != EOK)
1347                         break;
1348
1349                 i--;
1350         }
1351         return err;
1352 }
1353
1354 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1355                                 struct ext4_extent_path *path, ext4_lblk_t from,
1356                                 ext4_lblk_t to)
1357 {
1358
1359         int32_t depth = ext_depth(inode_ref->inode);
1360         struct ext4_extent *ex = path[depth].extent;
1361         struct ext4_extent *start_ex, *ex2 = NULL;
1362         struct ext4_extent_header *eh = path[depth].header;
1363         int32_t len;
1364         int err = EOK;
1365         uint16_t new_entries;
1366
1367         start_ex = ex;
1368         new_entries = to_le16(eh->entries_count);
1369         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1370                to_le32(ex->first_block) <= to) {
1371                 int32_t new_len = 0;
1372                 int unwritten;
1373                 ext4_lblk_t start, new_start;
1374                 ext4_fsblk_t newblock;
1375                 new_start = start = to_le32(ex->first_block);
1376                 len = ext4_ext_get_actual_len(ex);
1377                 newblock = ext4_ext_pblock(ex);
1378                 if (start < from) {
1379                         len -= from - start;
1380                         new_len = from - start;
1381                         start = from;
1382                         start_ex++;
1383                 } else {
1384                         if (start + len - 1 > to) {
1385                                 len -= start + len - 1 - to;
1386                                 new_len = start + len - 1 - to;
1387                                 new_start = to + 1;
1388                                 newblock += to + 1 - start;
1389                                 ex2 = ex;
1390                         }
1391                 }
1392
1393                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1394                 ex->first_block = to_le32(new_start);
1395                 if (!new_len)
1396                         new_entries--;
1397                 else {
1398                         unwritten = ext4_ext_is_unwritten(ex);
1399                         ex->block_count = to_le16(new_len);
1400                         ext4_ext_store_pblock(ex, newblock);
1401                         if (unwritten)
1402                                 ext4_ext_mark_unwritten(ex);
1403                 }
1404
1405                 ex += 1;
1406         }
1407
1408         if (ex2 == NULL)
1409                 ex2 = ex;
1410
1411         if (ex2 <= EXT_LAST_EXTENT(eh))
1412                 memmove(start_ex, ex2, EXT_LAST_EXTENT(eh) - ex2 + 1);
1413
1414         eh->entries_count = to_le16(new_entries);
1415         ext4_ext_dirty(inode_ref, path + depth);
1416         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count)
1417                 err = ext4_ext_correct_indexes(inode_ref, path);
1418
1419         /* if this leaf is free, then we should
1420          * remove it from index block above */
1421         if (err == EOK && eh->entries_count == 0 && path[depth].block.lb_id)
1422                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1423         else if (depth > 0)
1424                 path[depth - 1].index++;
1425
1426         return err;
1427 }
1428
1429 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1430 {
1431         if (!to_le16(path->header->entries_count))
1432                 return false;
1433
1434         if (path->index > EXT_LAST_INDEX(path->header))
1435                 return false;
1436
1437         if (to_le32(path->index->first_block) > to)
1438                 return false;
1439
1440         return true;
1441 }
1442
1443 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1444                           ext4_lblk_t to)
1445 {
1446         struct ext4_extent_path *path = NULL;
1447         int ret = EOK;
1448         int32_t depth = ext_depth(inode_ref->inode);
1449         int32_t i;
1450
1451         ret = ext4_find_extent(inode_ref, from, &path, 0);
1452         if (ret)
1453                 goto out;
1454
1455         if (!path[depth].extent) {
1456                 ret = EOK;
1457                 goto out;
1458         }
1459
1460         bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1461                         ext4_ext_get_actual_len(path[depth].extent));
1462
1463         if (!in_range) {
1464                 ret = EOK;
1465                 goto out;
1466         }
1467
1468         /* If we do remove_space inside the range of an extent */
1469         if ((to_le32(path[depth].extent->first_block) < from) &&
1470             (to < to_le32(path[depth].extent->first_block) +
1471                         ext4_ext_get_actual_len(path[depth].extent) - 1)) {
1472
1473                 struct ext4_extent *ex = path[depth].extent, newex;
1474                 int unwritten = ext4_ext_is_unwritten(ex);
1475                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1476                 int32_t len = ext4_ext_get_actual_len(ex);
1477                 ext4_fsblk_t newblock =
1478                         to + 1 - ee_block + ext4_ext_pblock(ex);
1479
1480                 ex->block_count = to_le16(from - ee_block);
1481                 if (unwritten)
1482                         ext4_ext_mark_unwritten(ex);
1483
1484                 ext4_ext_dirty(inode_ref, path + depth);
1485
1486                 newex.first_block = to_le32(to + 1);
1487                 newex.block_count = to_le16(ee_block + len - 1 - to);
1488                 ext4_ext_store_pblock(&newex, newblock);
1489                 if (unwritten)
1490                         ext4_ext_mark_unwritten(&newex);
1491
1492                 ret = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1493                 goto out;
1494         }
1495
1496         i = depth;
1497         while (i >= 0) {
1498                 if (i == depth) {
1499                         struct ext4_extent_header *eh;
1500                         struct ext4_extent *first_ex, *last_ex;
1501                         ext4_lblk_t leaf_from, leaf_to;
1502                         eh = path[i].header;
1503                         ext4_assert(to_le16(eh->entries_count) > 0);
1504                         first_ex = EXT_FIRST_EXTENT(eh);
1505                         last_ex = EXT_LAST_EXTENT(eh);
1506                         leaf_from = to_le32(first_ex->first_block);
1507                         leaf_to = to_le32(last_ex->first_block) +
1508                                   ext4_ext_get_actual_len(last_ex) - 1;
1509                         if (leaf_from < from)
1510                                 leaf_from = from;
1511
1512                         if (leaf_to > to)
1513                                 leaf_to = to;
1514
1515                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1516                                         leaf_to);
1517                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1518                         i--;
1519                         continue;
1520                 }
1521
1522                 struct ext4_extent_header *eh;
1523                 eh = path[i].header;
1524                 if (ext4_ext_more_to_rm(path + i, to)) {
1525                         struct ext4_block bh = EXT4_BLOCK_ZERO();
1526                         if (path[i + 1].block.lb_id)
1527                                 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1528
1529                         ret = read_extent_tree_block(inode_ref,
1530                                         ext4_idx_pblock(path[i].index),
1531                                         depth - i - 1, &bh, 0);
1532                         if (ret)
1533                                 goto out;
1534
1535                         path[i].p_block =
1536                                         ext4_idx_pblock(path[i].index);
1537                         path[i + 1].block = bh;
1538                         path[i + 1].header = ext_block_hdr(&bh);
1539                         path[i + 1].depth = depth - i - 1;
1540                         if (i + 1 == depth)
1541                                 path[i + 1].extent = EXT_FIRST_EXTENT(
1542                                         path[i + 1].header);
1543                         else
1544                                 path[i + 1].index =
1545                                         EXT_FIRST_INDEX(path[i + 1].header);
1546
1547                         i++;
1548                 } else {
1549                         if (i > 0) {
1550                                 if (!eh->entries_count)
1551                                         ret = ext4_ext_remove_idx(inode_ref, path,
1552                                                         i - 1);
1553                                 else
1554                                         path[i - 1].index++;
1555
1556                         }
1557
1558                         if (i)
1559                                 ext4_block_set(inode_ref->fs->bdev,
1560                                                 &path[i].block);
1561
1562
1563                         i--;
1564                 }
1565
1566         }
1567
1568         /* TODO: flexible tree reduction should be here */
1569         if (path->header->entries_count == 0) {
1570                 /*
1571                  * truncate to zero freed all the tree,
1572                  * so we need to correct eh_depth
1573                  */
1574                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1575                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1576                     to_le16(ext4_ext_space_root(inode_ref));
1577                 ret = ext4_ext_dirty(inode_ref, path);
1578         }
1579
1580 out:
1581         ext4_ext_drop_refs(inode_ref, path, 0);
1582         free(path);
1583         path = NULL;
1584         return ret;
1585 }
1586
1587 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1588                                     struct ext4_extent_path **ppath,
1589                                     ext4_lblk_t split, uint32_t split_flag)
1590 {
1591         struct ext4_extent *ex, newex;
1592         ext4_fsblk_t newblock;
1593         ext4_lblk_t ee_block;
1594         int32_t ee_len;
1595         int32_t depth = ext_depth(inode_ref->inode);
1596         int err = EOK;
1597
1598         ex = (*ppath)[depth].extent;
1599         ee_block = to_le32(ex->first_block);
1600         ee_len = ext4_ext_get_actual_len(ex);
1601         newblock = split - ee_block + ext4_ext_pblock(ex);
1602
1603         if (split == ee_block) {
1604                 /*
1605                  * case b: block @split is the block that the extent begins with
1606                  * then we just change the state of the extent, and splitting
1607                  * is not needed.
1608                  */
1609                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1610                         ext4_ext_mark_unwritten(ex);
1611                 else
1612                         ext4_ext_mark_initialized(ex);
1613
1614                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1615                 goto out;
1616         }
1617
1618         ex->block_count = to_le16(split - ee_block);
1619         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1620                 ext4_ext_mark_unwritten(ex);
1621
1622         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1623         if (err != EOK)
1624                 goto out;
1625
1626         newex.first_block = to_le32(split);
1627         newex.block_count = to_le16(ee_len - (split - ee_block));
1628         ext4_ext_store_pblock(&newex, newblock);
1629         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1630                 ext4_ext_mark_unwritten(&newex);
1631         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1632                                      EXT4_EXT_NO_COMBINE);
1633         if (err != EOK)
1634                 goto restore_extent_len;
1635
1636 out:
1637         return err;
1638 restore_extent_len:
1639         ex->block_count = to_le16(ee_len);
1640         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1641         return err;
1642 }
1643
1644 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1645                                            struct ext4_extent_path **ppath,
1646                                            ext4_lblk_t split, uint32_t blocks)
1647 {
1648         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1649         struct ext4_extent *ex = (*ppath)[depth].extent;
1650
1651         ext4_assert(to_le32(ex->first_block) <= split);
1652
1653         if (split + blocks ==
1654             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1655                 /* split and initialize right part */
1656                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1657                                                EXT4_EXT_MARK_UNWRIT1);
1658         } else if (to_le32(ex->first_block) == split) {
1659                 /* split and initialize left part */
1660                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1661                                                EXT4_EXT_MARK_UNWRIT2);
1662         } else {
1663                 /* split 1 extent to 3 and initialize the 2nd */
1664                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1665                                                EXT4_EXT_MARK_UNWRIT1 |
1666                                                    EXT4_EXT_MARK_UNWRIT2);
1667                 if (!err) {
1668                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1669                                                        EXT4_EXT_MARK_UNWRIT1);
1670                 }
1671         }
1672
1673         return err;
1674 }
1675
1676 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1677 {
1678         int32_t depth;
1679
1680         depth = path->depth;
1681
1682         if (depth == 0 && path->extent == NULL)
1683                 return EXT_MAX_BLOCKS;
1684
1685         while (depth >= 0) {
1686                 if (depth == path->depth) {
1687                         /* leaf */
1688                         if (path[depth].extent &&
1689                             path[depth].extent !=
1690                                 EXT_LAST_EXTENT(path[depth].header))
1691                                 return to_le32(
1692                                     path[depth].extent[1].first_block);
1693                 } else {
1694                         /* index */
1695                         if (path[depth].index !=
1696                             EXT_LAST_INDEX(path[depth].header))
1697                                 return to_le32(
1698                                     path[depth].index[1].first_block);
1699                 }
1700                 depth--;
1701         }
1702
1703         return EXT_MAX_BLOCKS;
1704 }
1705
1706 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1707                                          ext4_fsblk_t block,
1708                                          uint32_t blocks_count)
1709 {
1710         int err = EOK;
1711         uint32_t i;
1712         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1713         for (i = 0; i < blocks_count; i++) {
1714                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1715                 err = ext4_block_get(inode_ref->fs->bdev, &bh, block + i);
1716                 if (err != EOK)
1717                         break;
1718
1719                 memset(bh.data, 0, block_size);
1720                 bh.dirty = true;
1721                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1722                 if (err != EOK)
1723                         break;
1724         }
1725         return err;
1726 }
1727
1728 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_fsblk_t iblock,
1729                         uint32_t max_blocks, ext4_fsblk_t *result, bool create,
1730                         uint32_t *blocks_count)
1731 {
1732         struct ext4_extent_path *path = NULL;
1733         struct ext4_extent newex, *ex;
1734         ext4_fsblk_t goal;
1735         int err = EOK;
1736         int32_t depth;
1737         uint32_t allocated = 0;
1738         ext4_fsblk_t next, newblock;
1739
1740         if (result)
1741                 *result = 0;
1742
1743         if (blocks_count)
1744                 *blocks_count = 0;
1745
1746         /* find extent for this block */
1747         err = ext4_find_extent(inode_ref, iblock, &path, 0);
1748         if (err != EOK) {
1749                 path = NULL;
1750                 goto out2;
1751         }
1752
1753         depth = ext_depth(inode_ref->inode);
1754
1755         /*
1756          * consistent leaf must not be empty
1757          * this situations is possible, though, _during_ tree modification
1758          * this is why assert can't be put in ext4_ext_find_extent()
1759          */
1760         ex = path[depth].extent;
1761         if (ex) {
1762                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1763                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1764                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1765                 /* if found exent covers block, simple return it */
1766                 if (IN_RANGE(iblock, ee_block, ee_len)) {
1767                         /* number of remain blocks in the extent */
1768                         allocated = ee_len - (iblock - ee_block);
1769
1770                         if (!ext4_ext_is_unwritten(ex)) {
1771                                 newblock = iblock - ee_block + ee_start;
1772                                 goto out;
1773                         }
1774
1775                         if (!create) {
1776                                 newblock = 0;
1777                                 goto out;
1778                         }
1779
1780                         uint32_t zero_range;
1781                         zero_range = allocated;
1782                         if (zero_range > max_blocks)
1783                                 zero_range = max_blocks;
1784
1785                         newblock = iblock - ee_block + ee_start;
1786                         err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
1787                                         zero_range);
1788                         if (err != EOK)
1789                                 goto out2;
1790
1791                         err = ext4_ext_convert_to_initialized(inode_ref, &path,
1792                                         iblock, zero_range);
1793                         if (err != EOK)
1794                                 goto out2;
1795
1796                         goto out;
1797                 }
1798         }
1799
1800         /*
1801          * requested block isn't allocated yet
1802          * we couldn't try to create block if create flag is zero
1803          */
1804         if (!create) {
1805                 goto out2;
1806         }
1807
1808         /* find next allocated block so that we know how many
1809          * blocks we can allocate without ovelapping next extent */
1810         next = ext4_ext_next_allocated_block(path);
1811         allocated = next - iblock;
1812         if (allocated > max_blocks)
1813                 allocated = max_blocks;
1814
1815         /* allocate new block */
1816         goal = ext4_ext_find_goal(inode_ref, path, iblock);
1817         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1818         if (!newblock)
1819                 goto out2;
1820
1821         /* try to insert new extent into found leaf and return */
1822         newex.first_block = to_le32(iblock);
1823         ext4_ext_store_pblock(&newex, newblock);
1824         newex.block_count = to_le16(allocated);
1825         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1826         if (err != EOK) {
1827                 /* free data blocks we just allocated */
1828                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1829                                      to_le16(newex.block_count), 0);
1830                 goto out2;
1831         }
1832
1833         /* previous routine could use block we allocated */
1834         newblock = ext4_ext_pblock(&newex);
1835
1836 out:
1837         if (allocated > max_blocks)
1838                 allocated = max_blocks;
1839
1840         if (result)
1841                 *result = newblock;
1842
1843         if (blocks_count)
1844                 *blocks_count = allocated;
1845
1846 out2:
1847         if (path) {
1848                 ext4_ext_drop_refs(inode_ref, path, 0);
1849                 free(path);
1850         }
1851
1852         return err;
1853 }
1854 #endif