e7dd00f8d84c0cfa656a5847fc6ceb4d97395df0
[lwext4.git] / lwext4 / ext4_extent_full.c
1 /*
2  * Copyright (c) 2015 Grzegorz Kostka (kostka.grzegorz@gmail.com)
3  * Copyright (c) 2015 Kaho Ng (ngkaho1234@gmail.com)
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  *   notice, this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  * - The name of the author may not be used to endorse or promote products
15  *   derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 #include "ext4_config.h"
30 #include "ext4_blockdev.h"
31 #include "ext4_fs.h"
32 #include "ext4_super.h"
33 #include "ext4_crc32c.h"
34 #include "ext4_balloc.h"
35 #include "ext4_debug.h"
36
37 #include <stdlib.h>
38 #include <string.h>
39 #include <inttypes.h>
40 #include <stddef.h>
41
42 #include "ext4_extent.h"
43
44 #if CONFIG_EXTENT_FULL
45
46 /*
47  * used by extent splitting.
48  */
49 #define EXT4_EXT_MARK_UNWRIT1 0x02 /* mark first half unwritten */
50 #define EXT4_EXT_MARK_UNWRIT2 0x04 /* mark second half unwritten */
51 #define EXT4_EXT_DATA_VALID1 0x08  /* first half contains valid data */
52 #define EXT4_EXT_DATA_VALID2 0x10  /* second half contains valid data */
53 #define EXT4_EXT_NO_COMBINE 0x20   /* do not combine two extents */
54
55 static struct ext4_extent_tail *
56 find_ext4_extent_tail(struct ext4_extent_header *eh)
57 {
58         return (struct ext4_extent_tail *)(((char *)eh) +
59                                            EXT4_EXTENT_TAIL_OFFSET(eh));
60 }
61
62 static struct ext4_extent_header *ext_inode_hdr(struct ext4_inode *inode)
63 {
64         return (struct ext4_extent_header *)inode->blocks;
65 }
66
67 static struct ext4_extent_header *ext_block_hdr(struct ext4_block *block)
68 {
69         return (struct ext4_extent_header *)block->data;
70 }
71
72 static uint16_t ext_depth(struct ext4_inode *inode)
73 {
74         return to_le16(ext_inode_hdr(inode)->depth);
75 }
76
77 static uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext)
78 {
79         return (to_le16(ext->block_count) <= EXT_INIT_MAX_LEN
80                     ? to_le16(ext->block_count)
81                     : (to_le16(ext->block_count) - EXT_INIT_MAX_LEN));
82 }
83
84 static void ext4_ext_mark_initialized(struct ext4_extent *ext)
85 {
86         ext->block_count = to_le16(ext4_ext_get_actual_len(ext));
87 }
88
89 static void ext4_ext_mark_unwritten(struct ext4_extent *ext)
90 {
91         ext->block_count |= to_le16(EXT_INIT_MAX_LEN);
92 }
93
94 static int ext4_ext_is_unwritten(struct ext4_extent *ext)
95 {
96         /* Extent with ee_len of 0x8000 is treated as an initialized extent */
97         return (to_le16(ext->block_count) > EXT_INIT_MAX_LEN);
98 }
99
100 /*
101  * ext4_ext_pblock:
102  * combine low and high parts of physical block number into ext4_fsblk_t
103  */
104 static ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
105 {
106         ext4_fsblk_t block;
107
108         block = to_le32(ex->start_lo);
109         block |= ((ext4_fsblk_t)to_le16(ex->start_hi) << 31) << 1;
110         return block;
111 }
112
113 /*
114  * ext4_idx_pblock:
115  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
116  */
117 static ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_index *ix)
118 {
119         ext4_fsblk_t block;
120
121         block = to_le32(ix->leaf_lo);
122         block |= ((ext4_fsblk_t)to_le16(ix->leaf_hi) << 31) << 1;
123         return block;
124 }
125
126 /*
127  * ext4_ext_store_pblock:
128  * stores a large physical block number into an extent struct,
129  * breaking it into parts
130  */
131 static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
132 {
133         ex->start_lo = to_le32((uint32_t)(pb & 0xffffffff));
134         ex->start_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
135 }
136
137 /*
138  * ext4_idx_store_pblock:
139  * stores a large physical block number into an index struct,
140  * breaking it into parts
141  */
142 static void ext4_idx_store_pblock(struct ext4_extent_index *ix, ext4_fsblk_t pb)
143 {
144         ix->leaf_lo = to_le32((uint32_t)(pb & 0xffffffff));
145         ix->leaf_hi = to_le16((uint16_t)((pb >> 32)) & 0xffff);
146 }
147
148 static int ext4_allocate_single_block(struct ext4_inode_ref *inode_ref,
149                                       ext4_fsblk_t goal,
150                                       ext4_fsblk_t *blockp)
151 {
152         return ext4_balloc_alloc_block(inode_ref, goal, blockp);
153 }
154
155 static ext4_fsblk_t ext4_new_meta_blocks(struct ext4_inode_ref *inode_ref,
156                                          ext4_fsblk_t goal,
157                                          uint32_t flags __unused,
158                                          uint32_t *count, int *errp)
159 {
160         ext4_fsblk_t block = 0;
161
162         *errp = ext4_allocate_single_block(inode_ref, goal, &block);
163         if (count)
164                 *count = 1;
165         return block;
166 }
167
168 static void ext4_ext_free_blocks(struct ext4_inode_ref *inode_ref,
169                                  ext4_fsblk_t block, uint32_t count,
170                                  uint32_t flags __unused)
171 {
172         ext4_balloc_free_blocks(inode_ref, block, count);
173 }
174
175 static size_t ext4_ext_space_block(struct ext4_inode_ref *inode_ref)
176 {
177         size_t size;
178         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
179
180         size = (block_size - sizeof(struct ext4_extent_header)) /
181                sizeof(struct ext4_extent);
182 #ifdef AGGRESSIVE_TEST
183         if (size > 6)
184                 size = 6;
185 #endif
186         return size;
187 }
188
189 static size_t ext4_ext_space_block_idx(struct ext4_inode_ref *inode_ref)
190 {
191         size_t size;
192         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
193
194         size = (block_size - sizeof(struct ext4_extent_header)) /
195                sizeof(struct ext4_extent_index);
196 #ifdef AGGRESSIVE_TEST
197         if (size > 5)
198                 size = 5;
199 #endif
200         return size;
201 }
202
203 static size_t ext4_ext_space_root(struct ext4_inode_ref *inode_ref)
204 {
205         size_t size;
206
207         size = sizeof(inode_ref->inode->blocks);
208         size -= sizeof(struct ext4_extent_header);
209         size /= sizeof(struct ext4_extent);
210 #ifdef AGGRESSIVE_TEST
211         if (size > 3)
212                 size = 3;
213 #endif
214         return size;
215 }
216
217 static size_t ext4_ext_space_root_idx(struct ext4_inode_ref *inode_ref)
218 {
219         size_t size;
220
221         size = sizeof(inode_ref->inode->blocks);
222         size -= sizeof(struct ext4_extent_header);
223         size /= sizeof(struct ext4_extent_index);
224 #ifdef AGGRESSIVE_TEST
225         if (size > 4)
226                 size = 4;
227 #endif
228         return size;
229 }
230
231 static size_t ext4_ext_max_entries(struct ext4_inode_ref *inode_ref,
232                                    uint32_t depth)
233 {
234         size_t max;
235
236         if (depth == ext_depth(inode_ref->inode)) {
237                 if (depth == 0)
238                         max = ext4_ext_space_root(inode_ref);
239                 else
240                         max = ext4_ext_space_root_idx(inode_ref);
241         } else {
242                 if (depth == 0)
243                         max = ext4_ext_space_block(inode_ref);
244                 else
245                         max = ext4_ext_space_block_idx(inode_ref);
246         }
247
248         return max;
249 }
250
251 static ext4_fsblk_t ext4_ext_find_goal(struct ext4_inode_ref *inode_ref,
252                                        struct ext4_extent_path *path,
253                                        ext4_lblk_t block)
254 {
255         if (path) {
256                 uint32_t depth = path->depth;
257                 struct ext4_extent *ex;
258
259                 /*
260                  * Try to predict block placement assuming that we are
261                  * filling in a file which will eventually be
262                  * non-sparse --- i.e., in the case of libbfd writing
263                  * an ELF object sections out-of-order but in a way
264                  * the eventually results in a contiguous object or
265                  * executable file, or some database extending a table
266                  * space file.  However, this is actually somewhat
267                  * non-ideal if we are writing a sparse file such as
268                  * qemu or KVM writing a raw image file that is going
269                  * to stay fairly sparse, since it will end up
270                  * fragmenting the file system's free space.  Maybe we
271                  * should have some hueristics or some way to allow
272                  * userspace to pass a hint to file system,
273                  * especially if the latter case turns out to be
274                  * common.
275                  */
276                 ex = path[depth].extent;
277                 if (ex) {
278                         ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
279                         ext4_lblk_t ext_block = to_le32(ex->first_block);
280
281                         if (block > ext_block)
282                                 return ext_pblk + (block - ext_block);
283                         else
284                                 return ext_pblk - (ext_block - block);
285                 }
286
287                 /* it looks like index is empty;
288                  * try to find starting block from index itself */
289                 if (path[depth].block.lb_id)
290                         return path[depth].block.lb_id;
291         }
292
293         /* OK. use inode's group */
294         return ext4_fs_inode_to_goal_block(inode_ref);
295 }
296
297 /*
298  * Allocation for a meta data block
299  */
300 static ext4_fsblk_t ext4_ext_new_meta_block(struct ext4_inode_ref *inode_ref,
301                                             struct ext4_extent_path *path,
302                                             struct ext4_extent *ex, int *err,
303                                             uint32_t flags)
304 {
305         ext4_fsblk_t goal, newblock;
306
307         goal = ext4_ext_find_goal(inode_ref, path, to_le32(ex->first_block));
308         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, err);
309         return newblock;
310 }
311
312 static uint32_t ext4_ext_block_csum(struct ext4_inode_ref *inode_ref,
313                                     struct ext4_extent_header *eh)
314 {
315         uint32_t checksum = 0;
316         struct ext4_sblock *sb = &inode_ref->fs->sb;
317
318         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
319                 uint32_t ino_index = to_le32(inode_ref->index);
320                 uint32_t ino_gen =
321                         to_le32(ext4_inode_get_generation(inode_ref->inode));
322                 /* First calculate crc32 checksum against fs uuid */
323                 checksum = ext4_crc32c(~0, sb->uuid, sizeof(sb->uuid));
324                 /* Then calculate crc32 checksum against inode number
325                  * and inode generation */
326                 checksum = ext4_crc32c(checksum, &ino_index,
327                                      sizeof(ino_index));
328                 checksum = ext4_crc32c(checksum, &ino_gen,
329                                      sizeof(ino_gen));
330                 /* Finally calculate crc32 checksum against 
331                  * the entire extent block up to the checksum field */
332                 checksum = ext4_crc32c(checksum, eh,
333                                 EXT4_EXTENT_TAIL_OFFSET(eh));
334         }
335         return checksum;
336 }
337
338 static void ext4_extent_block_csum_set(struct ext4_inode_ref *inode_ref,
339                                        struct ext4_extent_header *eh)
340 {
341         struct ext4_extent_tail *tail;
342
343         tail = find_ext4_extent_tail(eh);
344         tail->et_checksum = to_le32(ext4_ext_block_csum(inode_ref, eh));
345 }
346
347 static int ext4_ext_dirty(struct ext4_inode_ref *inode_ref,
348                           struct ext4_extent_path *path)
349 {
350         if (path->block.lb_id)
351                 path->block.dirty = true;
352         else
353                 inode_ref->dirty = true;
354
355         return EOK;
356 }
357
358 static void ext4_ext_drop_refs(struct ext4_inode_ref *inode_ref,
359                                struct ext4_extent_path *path, bool keep_other)
360 {
361         int32_t depth, i;
362
363         if (!path)
364                 return;
365         if (keep_other)
366                 depth = 0;
367         else
368                 depth = path->depth;
369
370         for (i = 0; i <= depth; i++, path++) {
371                 if (path->block.lb_id) {
372                         if (path->block.dirty)
373                                 ext4_extent_block_csum_set(inode_ref,
374                                                 path->header);
375
376                         ext4_block_set(inode_ref->fs->bdev, &path->block);
377                 }
378         }
379 }
380
381 /*
382  * Check that whether the basic information inside the extent header
383  * is correct or not.
384  */
385 static int ext4_ext_check(struct ext4_inode_ref *inode_ref,
386                           struct ext4_extent_header *eh, uint16_t depth,
387                           ext4_fsblk_t pblk __unused)
388 {
389         struct ext4_extent_tail *tail;
390         const char *error_msg;
391         (void)error_msg;
392
393         if (to_le16(eh->magic) != EXT4_EXTENT_MAGIC) {
394                 error_msg = "invalid magic";
395                 goto corrupted;
396         }
397         if (to_le16(eh->depth) != depth) {
398                 error_msg = "unexpected eh_depth";
399                 goto corrupted;
400         }
401         if (eh->max_entries_count == 0) {
402                 error_msg = "invalid eh_max";
403                 goto corrupted;
404         }
405         if (to_le16(eh->entries_count) > to_le16(eh->max_entries_count)) {
406                 error_msg = "invalid eh_entries";
407                 goto corrupted;
408         }
409
410         tail = find_ext4_extent_tail(eh);
411         struct ext4_sblock *sb = &inode_ref->fs->sb;
412         if (ext4_sb_feature_ro_com(sb, EXT4_FRO_COM_METADATA_CSUM)) {
413                 if (tail->et_checksum != to_le32(ext4_ext_block_csum(inode_ref, eh))) {
414                         /* FIXME: Warning: extent checksum damaged? */
415                 }
416         }
417
418         return EOK;
419
420 corrupted:
421         ext4_dbg(DEBUG_EXTENT, "Bad extents B+ tree block: %s. "
422                                "Blocknr: %" PRId64 "\n",
423                  error_msg, pblk);
424         return EIO;
425 }
426
427 static int read_extent_tree_block(struct ext4_inode_ref *inode_ref,
428                                   ext4_fsblk_t pblk, int32_t depth,
429                                   struct ext4_block *bh,
430                                   uint32_t flags __unused)
431 {
432         int err;
433
434         err = ext4_block_get(inode_ref->fs->bdev, bh, pblk);
435         if (err != EOK)
436                 goto errout;
437
438         err = ext4_ext_check(inode_ref, ext_block_hdr(bh), depth, pblk);
439         if (err != EOK)
440                 goto errout;
441
442         return EOK;
443 errout:
444         if (bh->lb_id)
445                 ext4_block_set(inode_ref->fs->bdev, bh);
446
447         return err;
448 }
449
450 /*
451  * ext4_ext_binsearch_idx:
452  * binary search for the closest index of the given block
453  * the header must be checked before calling this
454  */
455 static void ext4_ext_binsearch_idx(struct ext4_extent_path *path,
456                                    ext4_lblk_t block)
457 {
458         struct ext4_extent_header *eh = path->header;
459         struct ext4_extent_index *r, *l, *m;
460
461         l = EXT_FIRST_INDEX(eh) + 1;
462         r = EXT_LAST_INDEX(eh);
463         while (l <= r) {
464                 m = l + (r - l) / 2;
465                 if (block < to_le32(m->first_block))
466                         r = m - 1;
467                 else
468                         l = m + 1;
469         }
470
471         path->index = l - 1;
472 }
473
474 /*
475  * ext4_ext_binsearch:
476  * binary search for closest extent of the given block
477  * the header must be checked before calling this
478  */
479 static void ext4_ext_binsearch(struct ext4_extent_path *path, ext4_lblk_t block)
480 {
481         struct ext4_extent_header *eh = path->header;
482         struct ext4_extent *r, *l, *m;
483
484         if (eh->entries_count == 0) {
485                 /*
486                  * this leaf is empty:
487                  * we get such a leaf in split/add case
488                  */
489                 return;
490         }
491
492         l = EXT_FIRST_EXTENT(eh) + 1;
493         r = EXT_LAST_EXTENT(eh);
494
495         while (l <= r) {
496                 m = l + (r - l) / 2;
497                 if (block < to_le32(m->first_block))
498                         r = m - 1;
499                 else
500                         l = m + 1;
501         }
502
503         path->extent = l - 1;
504 }
505
506 static int ext4_find_extent(struct ext4_inode_ref *inode_ref, ext4_lblk_t block,
507                             struct ext4_extent_path **orig_path, uint32_t flags)
508 {
509         struct ext4_extent_header *eh;
510         struct ext4_block bh = EXT4_BLOCK_ZERO();
511         ext4_fsblk_t buf_block = 0;
512         struct ext4_extent_path *path = *orig_path;
513         int32_t depth, ppos = 0;
514         int32_t i;
515         int ret;
516
517         eh = ext_inode_hdr(inode_ref->inode);
518         depth = ext_depth(inode_ref->inode);
519
520         if (path) {
521                 ext4_ext_drop_refs(inode_ref, path, 0);
522                 if (depth > path[0].maxdepth) {
523                         free(path);
524                         *orig_path = path = NULL;
525                 }
526         }
527         if (!path) {
528                 int32_t path_depth = depth + 1;
529                 /* account possible depth increase */
530                 path = calloc(1, sizeof(struct ext4_extent_path) *
531                                      (path_depth + 1));
532                 if (!path)
533                         return ENOMEM;
534                 path[0].maxdepth = path_depth;
535         }
536         path[0].header = eh;
537         path[0].block = bh;
538
539         i = depth;
540         /* walk through the tree */
541         while (i) {
542                 ext4_ext_binsearch_idx(path + ppos, block);
543                 path[ppos].p_block = ext4_idx_pblock(path[ppos].index);
544                 path[ppos].depth = i;
545                 path[ppos].extent = NULL;
546                 buf_block = path[ppos].p_block;
547
548                 i--;
549                 ppos++;
550                 if (!path[ppos].block.lb_id ||
551                     path[ppos].block.lb_id != buf_block) {
552                         ret = read_extent_tree_block(inode_ref, buf_block, i,
553                                                      &bh, flags);
554                         if (ret != EOK) {
555                                 goto err;
556                         }
557                         if (ppos > depth) {
558                                 ext4_block_set(inode_ref->fs->bdev, &bh);
559                                 ret = EIO;
560                                 goto err;
561                         }
562
563                         eh = ext_block_hdr(&bh);
564                         path[ppos].block = bh;
565                         path[ppos].header = eh;
566                 }
567         }
568
569         path[ppos].depth = i;
570         path[ppos].extent = NULL;
571         path[ppos].index = NULL;
572
573         /* find extent */
574         ext4_ext_binsearch(path + ppos, block);
575         /* if not an empty leaf */
576         if (path[ppos].extent)
577                 path[ppos].p_block = ext4_ext_pblock(path[ppos].extent);
578
579         *orig_path = path;
580
581         ret = EOK;
582         return ret;
583
584 err:
585         ext4_ext_drop_refs(inode_ref, path, 0);
586         free(path);
587         if (orig_path)
588                 *orig_path = NULL;
589         return ret;
590 }
591
592 static void ext4_ext_init_header(struct ext4_inode_ref *inode_ref,
593                                  struct ext4_extent_header *eh, int32_t depth)
594 {
595         eh->entries_count = 0;
596         eh->max_entries_count = to_le16(ext4_ext_max_entries(inode_ref, depth));
597         eh->magic = to_le16(EXT4_EXTENT_MAGIC);
598         eh->depth = depth;
599 }
600
601 /*
602  * Be cautious, the buffer_head returned is not yet mark dirtied. */
603 static int ext4_ext_split_node(struct ext4_inode_ref *inode_ref,
604                                struct ext4_extent_path *path, int32_t at,
605                                struct ext4_extent *newext,
606                                ext4_fsblk_t *sibling, struct ext4_block *new_bh)
607 {
608         int ret;
609         ext4_fsblk_t newblock;
610         struct ext4_block bh = EXT4_BLOCK_ZERO();
611         int32_t depth = ext_depth(inode_ref->inode);
612
613         ext4_assert(sibling);
614
615         /* FIXME: currently we split at the point after the current extent. */
616         newblock = ext4_ext_new_meta_block(inode_ref, path, newext, &ret, 0);
617         if (ret)
618                 goto cleanup;
619
620         /*  For write access.# */
621         ret = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
622         if (ret != EOK)
623                 goto cleanup;
624
625         if (at == depth) {
626                 /* start copy from next extent */
627                 ptrdiff_t m = EXT_MAX_EXTENT(path[at].header) - path[at].extent;
628                 struct ext4_extent_header *neh;
629                 neh = ext_block_hdr(&bh);
630                 ext4_ext_init_header(inode_ref, neh, 0);
631                 if (m) {
632                         struct ext4_extent *ex;
633                         ex = EXT_FIRST_EXTENT(neh);
634                         memmove(ex, path[at].extent + 1,
635                                 sizeof(struct ext4_extent) * m);
636                         neh->entries_count =
637                             to_le16(to_le16(neh->entries_count) + m);
638                         path[at].header->entries_count = to_le16(
639                             to_le16(path[at].header->entries_count) - m);
640                         ret = ext4_ext_dirty(inode_ref, path + at);
641                         if (ret)
642                                 goto cleanup;
643                 }
644         } else {
645                 ptrdiff_t m = EXT_MAX_INDEX(path[at].header) - path[at].index;
646                 struct ext4_extent_header *neh;
647                 neh = ext_block_hdr(&bh);
648                 ext4_ext_init_header(inode_ref, neh, depth - at);
649                 if (m) {
650                         struct ext4_extent_index *ix;
651                         ix = EXT_FIRST_INDEX(neh);
652                         memmove(ix, path[at].index + 1,
653                                 sizeof(struct ext4_extent) * m);
654                         neh->entries_count =
655                             to_le16(to_le16(neh->entries_count) + m);
656                         path[at].header->entries_count = to_le16(
657                             to_le16(path[at].header->entries_count) - m);
658                         ret = ext4_ext_dirty(inode_ref, path + at);
659                         if (ret)
660                                 goto cleanup;
661                 }
662         }
663 cleanup:
664         if (ret) {
665                 if (bh.lb_id) {
666                         ext4_block_set(inode_ref->fs->bdev, &bh);
667                 }
668                 if (newblock)
669                         ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
670
671                 newblock = 0;
672         }
673         *sibling = newblock;
674         *new_bh = bh;
675         return ret;
676 }
677
678 static ext4_lblk_t ext4_ext_block_index(struct ext4_extent_header *eh)
679 {
680         if (eh->depth)
681                 return to_le32(EXT_FIRST_INDEX(eh)->first_block);
682
683         return to_le32(EXT_FIRST_EXTENT(eh)->first_block);
684 }
685
686 struct ext_split_trans {
687         ext4_fsblk_t ptr;
688         struct ext4_extent_path path;
689         int switch_to;
690 };
691
692 static int ext4_ext_insert_index(struct ext4_inode_ref *inode_ref,
693                                  struct ext4_extent_path *path,
694                                  int32_t at,
695                                  struct ext4_extent *newext,
696                                  ext4_lblk_t insert_index,
697                                  ext4_fsblk_t insert_block,
698                                  struct ext_split_trans *spt,
699                                  bool *need_grow)
700 {
701         struct ext4_extent_index *ix;
702         struct ext4_extent_path *curp = path + at;
703         struct ext4_block bh = EXT4_BLOCK_ZERO();
704         int32_t len;
705         int err;
706         struct ext4_extent_header *eh;
707
708         *need_grow = false;
709
710         if (curp->index && insert_index == to_le32(curp->index->first_block))
711                 return EIO;
712
713         if (to_le16(curp->header->entries_count) ==
714             to_le16(curp->header->max_entries_count)) {
715                 if (at) {
716                         struct ext4_extent_header *neh;
717                         err = ext4_ext_split_node(inode_ref, path, at, newext,
718                                                   &spt->ptr, &bh);
719                         if (err != EOK)
720                                 goto out;
721
722                         neh = ext_block_hdr(&bh);
723                         if (insert_index > to_le32(curp->index->first_block)) {
724                                 /* Make decision which node should be used to
725                                  * insert the index.*/
726                                 if (to_le16(neh->entries_count) >
727                                     to_le16(curp->header->entries_count)) {
728                                         eh = curp->header;
729                                         /* insert after */
730                                         ix = EXT_LAST_INDEX(eh) + 1;
731                                 } else {
732                                         eh = neh;
733                                         ix = EXT_FIRST_INDEX(eh);
734                                 }
735                         } else {
736                                 eh = curp->header;
737                                 /* insert before */
738                                 ix = EXT_LAST_INDEX(eh);
739                         }
740                 } else {
741                         err = EOK;
742                         *need_grow = true;
743                         goto out;
744                 }
745         } else {
746                 eh = curp->header;
747                 if (curp->index == NULL) {
748                         ix = EXT_FIRST_INDEX(eh);
749                         curp->index = ix;
750                 } else if (insert_index > to_le32(curp->index->first_block)) {
751                         /* insert after */
752                         ix = curp->index + 1;
753                 } else {
754                         /* insert before */
755                         ix = curp->index;
756                 }
757         }
758
759         len = EXT_LAST_INDEX(eh) - ix + 1;
760         ext4_assert(len >= 0);
761         if (len > 0)
762                 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_index));
763
764         if (ix > EXT_MAX_INDEX(eh)) {
765                 err = EIO;
766                 goto out;
767         }
768
769         ix->first_block = to_le32(insert_index);
770         ext4_idx_store_pblock(ix, insert_block);
771         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
772
773         if (ix > EXT_LAST_INDEX(eh)) {
774                 err = EIO;
775                 goto out;
776         }
777
778         if (eh == curp->header)
779                 err = ext4_ext_dirty(inode_ref, curp);
780         else
781                 err = EOK;
782
783 out:
784         if (err != EOK || *need_grow) {
785                 if (bh.lb_id)
786                         ext4_block_set(inode_ref->fs->bdev, &bh);
787
788                 spt->ptr = 0;
789         } else if (bh.lb_id) {
790                 /* If we got a sibling leaf. */
791                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
792                 bh.dirty = true;
793
794                 spt->path.p_block = ext4_idx_pblock(ix);
795                 spt->path.depth = to_le16(eh->depth);
796                 spt->path.maxdepth = 0;
797                 spt->path.extent = NULL;
798                 spt->path.index = ix;
799                 spt->path.header = eh;
800                 spt->path.block = bh;
801
802                 /*
803                  * If newext->ee_block can be included into the
804                  * right sub-tree.
805                  */
806                 if (to_le32(newext->first_block) >=
807                     ext4_ext_block_index(ext_block_hdr(&bh)))
808                         spt->switch_to = 1;
809                 else {
810                         curp->index = ix;
811                         curp->p_block = ext4_idx_pblock(ix);
812                 }
813
814         } else {
815                 spt->ptr = 0;
816                 curp->index = ix;
817                 curp->p_block = ext4_idx_pblock(ix);
818         }
819         return err;
820 }
821
822 /*
823  * ext4_ext_correct_indexes:
824  * if leaf gets modified and modified extent is first in the leaf,
825  * then we have to correct all indexes above.
826  */
827 static int ext4_ext_correct_indexes(struct ext4_inode_ref *inode_ref,
828                                     struct ext4_extent_path *path)
829 {
830         struct ext4_extent_header *eh;
831         int32_t depth = ext_depth(inode_ref->inode);
832         struct ext4_extent *ex;
833         uint32_t border;
834         int32_t k;
835         int err = EOK;
836
837         eh = path[depth].header;
838         ex = path[depth].extent;
839
840         if (ex == NULL || eh == NULL) {
841                 return EIO;
842         }
843
844         if (depth == 0) {
845                 /* there is no tree at all */
846                 return EOK;
847         }
848
849         if (ex != EXT_FIRST_EXTENT(eh)) {
850                 /* we correct tree if first leaf got modified only */
851                 return EOK;
852         }
853
854         /*
855          * TODO: we need correction if border is smaller than current one
856          */
857         k = depth - 1;
858         border = path[depth].extent->first_block;
859         path[k].index->first_block = border;
860         err = ext4_ext_dirty(inode_ref, path + k);
861         if (err != EOK)
862                 return err;
863
864         while (k--) {
865                 /* change all left-side indexes */
866                 if (path[k + 1].index != EXT_FIRST_INDEX(path[k + 1].header))
867                         break;
868                 path[k].index->first_block = border;
869                 err = ext4_ext_dirty(inode_ref, path + k);
870                 if (err != EOK)
871                         break;
872         }
873
874         return err;
875 }
876
877 static bool ext4_ext_can_prepend(struct ext4_extent *ex1,
878                                  struct ext4_extent *ex2)
879 {
880         if (ext4_ext_pblock(ex2) + ext4_ext_get_actual_len(ex2) !=
881             ext4_ext_pblock(ex1))
882                 return false;
883
884 #ifdef AGGRESSIVE_TEST
885         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
886                 return 0;
887 #else
888         if (ext4_ext_is_unwritten(ex1)) {
889                 if (ext4_ext_get_actual_len(ex1) +
890                         ext4_ext_get_actual_len(ex2) >
891                     EXT_UNWRITTEN_MAX_LEN)
892                         return false;
893         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
894                    EXT_INIT_MAX_LEN)
895                 return false;
896 #endif
897
898         if (to_le32(ex2->first_block) + ext4_ext_get_actual_len(ex2) !=
899             to_le32(ex1->first_block))
900                 return false;
901
902         return true;
903 }
904
905 static bool ext4_ext_can_append(struct ext4_extent *ex1,
906                                 struct ext4_extent *ex2)
907 {
908         if (ext4_ext_pblock(ex1) + ext4_ext_get_actual_len(ex1) !=
909             ext4_ext_pblock(ex2))
910                 return false;
911
912 #ifdef AGGRESSIVE_TEST
913         if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) > 4)
914                 return 0;
915 #else
916         if (ext4_ext_is_unwritten(ex1)) {
917                 if (ext4_ext_get_actual_len(ex1) +
918                         ext4_ext_get_actual_len(ex2) >
919                     EXT_UNWRITTEN_MAX_LEN)
920                         return false;
921         } else if (ext4_ext_get_actual_len(ex1) + ext4_ext_get_actual_len(ex2) >
922                    EXT_INIT_MAX_LEN)
923                 return false;
924 #endif
925
926         if (to_le32(ex1->first_block) + ext4_ext_get_actual_len(ex1) !=
927             to_le32(ex2->first_block))
928                 return false;
929
930         return true;
931 }
932
933 static int ext4_ext_insert_leaf(struct ext4_inode_ref *inode_ref,
934                                 struct ext4_extent_path *path,
935                                 int32_t at,
936                                 struct ext4_extent *newext,
937                                 struct ext_split_trans *spt,
938                                 uint32_t flags,
939                                 bool *need_grow)
940 {
941         struct ext4_extent_path *curp = path + at;
942         struct ext4_extent *ex = curp->extent;
943         struct ext4_block bh = EXT4_BLOCK_ZERO();
944         int32_t len;
945         int err = EOK;
946         int unwritten;
947         struct ext4_extent_header *eh = NULL;
948
949         *need_grow = false;
950
951         if (curp->extent &&
952             to_le32(newext->first_block) == to_le32(curp->extent->first_block))
953                 return EIO;
954
955         if (!(flags & EXT4_EXT_NO_COMBINE)) {
956                 if (curp->extent && ext4_ext_can_append(curp->extent, newext)) {
957                         unwritten = ext4_ext_is_unwritten(curp->extent);
958                         curp->extent->block_count =
959                             to_le16(ext4_ext_get_actual_len(curp->extent) +
960                                     ext4_ext_get_actual_len(newext));
961                         if (unwritten)
962                                 ext4_ext_mark_unwritten(curp->extent);
963                         err = ext4_ext_dirty(inode_ref, curp);
964                         goto out;
965                 }
966
967                 if (curp->extent &&
968                     ext4_ext_can_prepend(curp->extent, newext)) {
969                         unwritten = ext4_ext_is_unwritten(curp->extent);
970                         curp->extent->first_block = newext->first_block;
971                         curp->extent->block_count =
972                             to_le16(ext4_ext_get_actual_len(curp->extent) +
973                                     ext4_ext_get_actual_len(newext));
974                         if (unwritten)
975                                 ext4_ext_mark_unwritten(curp->extent);
976                         err = ext4_ext_dirty(inode_ref, curp);
977                         goto out;
978                 }
979         }
980
981         if (to_le16(curp->header->entries_count) ==
982             to_le16(curp->header->max_entries_count)) {
983                 if (at) {
984                         struct ext4_extent_header *neh;
985                         err = ext4_ext_split_node(inode_ref, path, at, newext,
986                                                   &spt->ptr, &bh);
987                         if (err != EOK)
988                                 goto out;
989
990                         neh = ext_block_hdr(&bh);
991                         if (to_le32(newext->first_block) >
992                             to_le32(curp->extent->first_block)) {
993                                 if (to_le16(neh->entries_count) >
994                                     to_le16(curp->header->entries_count)) {
995                                         eh = curp->header;
996                                         /* insert after */
997                                         ex = EXT_LAST_EXTENT(eh) + 1;
998                                 } else {
999                                         eh = neh;
1000                                         ex = EXT_FIRST_EXTENT(eh);
1001                                 }
1002                         } else {
1003                                 eh = curp->header;
1004                                 /* insert before */
1005                                 ex = EXT_LAST_EXTENT(eh);
1006                         }
1007                 } else {
1008                         err = EOK;
1009                         *need_grow = true;
1010                         goto out;
1011                 }
1012         } else {
1013                 eh = curp->header;
1014                 if (curp->extent == NULL) {
1015                         ex = EXT_FIRST_EXTENT(eh);
1016                         curp->extent = ex;
1017                 } else if (to_le32(newext->first_block) >
1018                            to_le32(curp->extent->first_block)) {
1019                         /* insert after */
1020                         ex = curp->extent + 1;
1021                 } else {
1022                         /* insert before */
1023                         ex = curp->extent;
1024                 }
1025         }
1026
1027         len = EXT_LAST_EXTENT(eh) - ex + 1;
1028         ext4_assert(len >= 0);
1029         if (len > 0)
1030                 memmove(ex + 1, ex, len * sizeof(struct ext4_extent));
1031
1032         if (ex > EXT_MAX_EXTENT(eh)) {
1033                 err = EIO;
1034                 goto out;
1035         }
1036
1037         ex->first_block = newext->first_block;
1038         ex->block_count = newext->block_count;
1039         ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1040         eh->entries_count = to_le16(to_le16(eh->entries_count) + 1);
1041
1042         if (ex > EXT_LAST_EXTENT(eh)) {
1043                 err = EIO;
1044                 goto out;
1045         }
1046
1047         if (eh == curp->header) {
1048                 err = ext4_ext_correct_indexes(inode_ref, path);
1049                 if (err != EOK)
1050                         goto out;
1051                 err = ext4_ext_dirty(inode_ref, curp);
1052         } else
1053                 err = EOK;
1054
1055 out:
1056         if (err != EOK || *need_grow) {
1057                 if (bh.lb_id)
1058                         ext4_block_set(inode_ref->fs->bdev, &bh);
1059
1060                 spt->ptr = 0;
1061         } else if (bh.lb_id) {
1062                 /* If we got a sibling leaf. */
1063                 ext4_extent_block_csum_set(inode_ref, ext_block_hdr(&bh));
1064                 bh.dirty = true;
1065
1066                 spt->path.p_block = ext4_ext_pblock(ex);
1067                 spt->path.depth = to_le16(eh->depth);
1068                 spt->path.maxdepth = 0;
1069                 spt->path.extent = ex;
1070                 spt->path.index = NULL;
1071                 spt->path.header = eh;
1072                 spt->path.block = bh;
1073
1074                 /*
1075                  * If newext->ee_block can be included into the
1076                  * right sub-tree.
1077                  */
1078                 if (to_le32(newext->first_block) >=
1079                     ext4_ext_block_index(ext_block_hdr(&bh)))
1080                         spt->switch_to = 1;
1081                 else {
1082                         curp->extent = ex;
1083                         curp->p_block = ext4_ext_pblock(ex);
1084                 }
1085
1086         } else {
1087                 spt->ptr = 0;
1088                 curp->extent = ex;
1089                 curp->p_block = ext4_ext_pblock(ex);
1090         }
1091
1092         return err;
1093 }
1094
1095 /*
1096  * ext4_ext_grow_indepth:
1097  * implements tree growing procedure:
1098  * - allocates new block
1099  * - moves top-level data (index block or leaf) into the new block
1100  * - initializes new top-level, creating index that points to the
1101  *   just created block
1102  */
1103 static int ext4_ext_grow_indepth(struct ext4_inode_ref *inode_ref,
1104                                  uint32_t flags)
1105 {
1106         struct ext4_extent_header *neh;
1107         struct ext4_block bh = EXT4_BLOCK_ZERO();
1108         ext4_fsblk_t newblock, goal = 0;
1109         int err = EOK;
1110
1111         /* Try to prepend new index to old one */
1112         if (ext_depth(inode_ref->inode))
1113                 goal = ext4_idx_pblock(
1114                     EXT_FIRST_INDEX(ext_inode_hdr(inode_ref->inode)));
1115         else
1116                 goal = ext4_fs_inode_to_goal_block(inode_ref);
1117
1118         newblock = ext4_new_meta_blocks(inode_ref, goal, flags, NULL, &err);
1119         if (newblock == 0)
1120                 return err;
1121
1122         /* # */
1123         err = ext4_block_get(inode_ref->fs->bdev, &bh, newblock);
1124         if (err != EOK) {
1125                 ext4_ext_free_blocks(inode_ref, newblock, 1, 0);
1126                 return err;
1127         }
1128
1129         /* move top-level index/leaf into new block */
1130         memmove(bh.data, inode_ref->inode->blocks,
1131                 sizeof(inode_ref->inode->blocks));
1132
1133         /* set size of new block */
1134         neh = ext_block_hdr(&bh);
1135         /* old root could have indexes or leaves
1136          * so calculate e_max right way */
1137         if (ext_depth(inode_ref->inode))
1138                 neh->max_entries_count =
1139                     to_le16(ext4_ext_space_block_idx(inode_ref));
1140         else
1141                 neh->max_entries_count =
1142                     to_le16(ext4_ext_space_block(inode_ref));
1143
1144         neh->magic = to_le16(EXT4_EXTENT_MAGIC);
1145
1146         /* Update top-level index: num,max,pointer */
1147         neh = ext_inode_hdr(inode_ref->inode);
1148         neh->entries_count = to_le16(1);
1149         ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1150         if (neh->depth == 0) {
1151                 /* Root extent block becomes index block */
1152                 neh->max_entries_count =
1153                     to_le16(ext4_ext_space_root_idx(inode_ref));
1154                 EXT_FIRST_INDEX(neh)
1155                     ->first_block = EXT_FIRST_EXTENT(neh)->first_block;
1156         }
1157         neh->depth = to_le16(to_le16(neh->depth) + 1);
1158
1159         ext4_extent_block_csum_set(inode_ref, neh);
1160         bh.dirty = true;
1161         inode_ref->dirty = true;
1162         ext4_block_set(inode_ref->fs->bdev, &bh);
1163
1164         return err;
1165 }
1166
1167 __unused static void print_path(struct ext4_extent_path *path)
1168 {
1169         int32_t i = path->depth;
1170         while (i >= 0) {
1171
1172                 ptrdiff_t a =
1173                     (path->extent)
1174                         ? (path->extent - EXT_FIRST_EXTENT(path->header))
1175                         : 0;
1176                 ptrdiff_t b =
1177                     (path->index)
1178                         ? (path->index - EXT_FIRST_INDEX(path->header))
1179                         : 0;
1180
1181                 (void)a;
1182                 (void)b;
1183                 ext4_dbg(DEBUG_EXTENT,
1184                          "depth %" PRId32 ", p_block: %" PRIu64 ","
1185                          "p_ext offset: %td, p_idx offset: %td\n",
1186                          i, path->p_block, a, b);
1187                 i--;
1188                 path++;
1189         }
1190 }
1191
1192 static void ext4_ext_replace_path(struct ext4_inode_ref *inode_ref,
1193                                   struct ext4_extent_path *path,
1194                                   struct ext_split_trans *spt,
1195                                   int32_t level)
1196 {
1197         int32_t depth = ext_depth(inode_ref->inode);
1198         int32_t i = depth - level;
1199         ext4_ext_drop_refs(inode_ref, path + i, 1);
1200         path[i] = spt[level].path;
1201 }
1202
1203 static int ext4_ext_insert_extent(struct ext4_inode_ref *inode_ref,
1204                                   struct ext4_extent_path **ppath,
1205                                   struct ext4_extent *newext, uint32_t flags)
1206 {
1207         int32_t i, depth, level;
1208         int ret = EOK;
1209         ext4_fsblk_t ptr = 0;
1210         bool need_grow = false;
1211         struct ext4_extent_path *path = *ppath;
1212         struct ext_split_trans *spt = NULL;
1213         struct ext_split_trans newblock;
1214
1215         memset(&newblock, 0, sizeof(newblock));
1216
1217         depth = ext_depth(inode_ref->inode);
1218         for (i = depth, level = 0; i >= 0; i--, level++)
1219                 if (EXT_HAS_FREE_INDEX(path + i))
1220                         break;
1221
1222         if (level) {
1223                 spt = calloc(1, sizeof(struct ext_split_trans) * (level));
1224                 if (!spt) {
1225                         ret = ENOMEM;
1226                         goto out;
1227                 }
1228         }
1229         i = 0;
1230 again:
1231         depth = ext_depth(inode_ref->inode);
1232
1233         do {
1234                 if (!i) {
1235                         ret = ext4_ext_insert_leaf(inode_ref, path, depth - i,
1236                                                    newext, &newblock, flags,
1237                                                    &need_grow);
1238                 } else {
1239                         ret = ext4_ext_insert_index(
1240                             inode_ref, path, depth - i, newext,
1241                             ext4_ext_block_index(
1242                                 ext_block_hdr(&spt[i - 1].path.block)),
1243                             spt[i - 1].ptr, &newblock,
1244                             &need_grow);
1245                 }
1246                 ptr = newblock.ptr;
1247
1248                 if (ret != EOK)
1249                         goto out;
1250
1251                 else if (spt && ptr && !ret) {
1252                         /* Prepare for the next iteration after splitting. */
1253                         spt[i] = newblock;
1254                 }
1255
1256                 i++;
1257         } while (ptr != 0 && i <= depth);
1258
1259         if (need_grow) {
1260                 ret = ext4_ext_grow_indepth(inode_ref, 0);
1261                 if (ret)
1262                         goto out;
1263                 ret = ext4_find_extent(inode_ref, to_le32(newext->first_block),
1264                                        ppath, 0);
1265                 if (ret)
1266                         goto out;
1267                 i = depth;
1268                 path = *ppath;
1269                 goto again;
1270         }
1271 out:
1272         if (ret) {
1273                 if (path)
1274                         ext4_ext_drop_refs(inode_ref, path, 0);
1275
1276                 while (--level >= 0 && spt) {
1277                         if (spt[level].ptr) {
1278                                 ext4_ext_free_blocks(inode_ref, spt[level].ptr,
1279                                                      1, 0);
1280                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1281                                                    1);
1282                         }
1283                 }
1284         } else {
1285                 while (--level >= 0 && spt) {
1286                         if (spt[level].switch_to)
1287                                 ext4_ext_replace_path(inode_ref, path, spt,
1288                                                       level);
1289                         else if (spt[level].ptr)
1290                                 ext4_ext_drop_refs(inode_ref, &spt[level].path,
1291                                                    1);
1292                 }
1293         }
1294         if (spt)
1295                 free(spt);
1296
1297         return ret;
1298 }
1299
1300 static void ext4_ext_remove_blocks(struct ext4_inode_ref *inode_ref,
1301                                    struct ext4_extent *ex, ext4_lblk_t from,
1302                                    ext4_lblk_t to)
1303 {
1304         ext4_lblk_t len = to - from + 1;
1305         ext4_lblk_t num;
1306         ext4_fsblk_t start;
1307         num = from - to_le32(ex->first_block);
1308         start = ext4_ext_pblock(ex) + num;
1309         ext4_dbg(DEBUG_EXTENT,
1310                  "Freeing %" PRIu32 " at %" PRIu64 ", %" PRIu32 "\n", from,
1311                  start, len);
1312
1313         ext4_ext_free_blocks(inode_ref, start, len, 0);
1314 }
1315
1316 static int ext4_ext_remove_idx(struct ext4_inode_ref *inode_ref,
1317                                struct ext4_extent_path *path, int32_t depth)
1318 {
1319         int err = EOK;
1320         int32_t i = depth;
1321         ext4_fsblk_t leaf;
1322
1323         /* free index block */
1324         leaf = ext4_idx_pblock(path[i].index);
1325
1326         if (path[i].index != EXT_LAST_INDEX(path[i].header)) {
1327                 ptrdiff_t len = EXT_LAST_INDEX(path[i].header) - path[i].index;
1328                 memmove(path[i].index, path[i].index + 1,
1329                         len * sizeof(struct ext4_extent_index));
1330         }
1331
1332         path[i].header->entries_count =
1333             to_le16(to_le16(path[i].header->entries_count) - 1);
1334         err = ext4_ext_dirty(inode_ref, path + i);
1335         if (err != EOK)
1336                 return err;
1337
1338         ext4_dbg(DEBUG_EXTENT, "IDX: Freeing %" PRIu32 " at %" PRIu64 ", %d\n",
1339                  to_le32(path[i].index->first_block), leaf, 1);
1340         ext4_ext_free_blocks(inode_ref, leaf, 1, 0);
1341
1342         while (i > 0) {
1343                 if (path[i].index != EXT_FIRST_INDEX(path[i].header))
1344                         break;
1345
1346                 path[i - 1].index->first_block = path[i].index->first_block;
1347                 err = ext4_ext_dirty(inode_ref, path + i - 1);
1348                 if (err != EOK)
1349                         break;
1350
1351                 i--;
1352         }
1353         return err;
1354 }
1355
1356 static int ext4_ext_remove_leaf(struct ext4_inode_ref *inode_ref,
1357                                 struct ext4_extent_path *path, ext4_lblk_t from,
1358                                 ext4_lblk_t to)
1359 {
1360
1361         int32_t depth = ext_depth(inode_ref->inode);
1362         struct ext4_extent *ex = path[depth].extent;
1363         struct ext4_extent *start_ex, *ex2 = NULL;
1364         struct ext4_extent_header *eh = path[depth].header;
1365         int32_t len;
1366         int err = EOK;
1367         uint16_t new_entries;
1368
1369         start_ex = ex;
1370         new_entries = to_le16(eh->entries_count);
1371         while (ex <= EXT_LAST_EXTENT(path[depth].header) &&
1372                to_le32(ex->first_block) <= to) {
1373                 int32_t new_len = 0;
1374                 int unwritten;
1375                 ext4_lblk_t start, new_start;
1376                 ext4_fsblk_t newblock;
1377                 new_start = start = to_le32(ex->first_block);
1378                 len = ext4_ext_get_actual_len(ex);
1379                 newblock = ext4_ext_pblock(ex);
1380                 if (start < from) {
1381                         len -= from - start;
1382                         new_len = from - start;
1383                         start = from;
1384                         start_ex++;
1385                 } else {
1386                         if (start + len - 1 > to) {
1387                                 len -= start + len - 1 - to;
1388                                 new_len = start + len - 1 - to;
1389                                 new_start = to + 1;
1390                                 newblock += to + 1 - start;
1391                                 ex2 = ex;
1392                         }
1393                 }
1394
1395                 ext4_ext_remove_blocks(inode_ref, ex, start, start + len - 1);
1396                 ex->first_block = to_le32(new_start);
1397                 if (!new_len)
1398                         new_entries--;
1399                 else {
1400                         unwritten = ext4_ext_is_unwritten(ex);
1401                         ex->block_count = to_le16(new_len);
1402                         ext4_ext_store_pblock(ex, newblock);
1403                         if (unwritten)
1404                                 ext4_ext_mark_unwritten(ex);
1405                 }
1406
1407                 ex += 1;
1408         }
1409
1410         if (ex2 == NULL)
1411                 ex2 = ex;
1412
1413         if (ex2 <= EXT_LAST_EXTENT(eh))
1414                 memmove(start_ex, ex2, EXT_LAST_EXTENT(eh) - ex2 + 1);
1415
1416         eh->entries_count = to_le16(new_entries);
1417         ext4_ext_dirty(inode_ref, path + depth);
1418         if (path[depth].extent == EXT_FIRST_EXTENT(eh) && eh->entries_count)
1419                 err = ext4_ext_correct_indexes(inode_ref, path);
1420
1421         /* if this leaf is free, then we should
1422          * remove it from index block above */
1423         if (err == EOK && eh->entries_count == 0 && path[depth].block.lb_id)
1424                 err = ext4_ext_remove_idx(inode_ref, path, depth - 1);
1425         else if (depth > 0)
1426                 path[depth - 1].index++;
1427
1428         return err;
1429 }
1430
1431 static bool ext4_ext_more_to_rm(struct ext4_extent_path *path, ext4_lblk_t to)
1432 {
1433         if (!to_le16(path->header->entries_count))
1434                 return false;
1435
1436         if (path->index > EXT_LAST_INDEX(path->header))
1437                 return false;
1438
1439         if (to_le32(path->index->first_block) > to)
1440                 return false;
1441
1442         return true;
1443 }
1444
1445 int ext4_extent_remove_space(struct ext4_inode_ref *inode_ref, ext4_lblk_t from,
1446                           ext4_lblk_t to)
1447 {
1448         struct ext4_extent_path *path = NULL;
1449         int ret = EOK;
1450         int32_t depth = ext_depth(inode_ref->inode);
1451         int32_t i;
1452
1453         ret = ext4_find_extent(inode_ref, from, &path, 0);
1454         if (ret)
1455                 goto out;
1456
1457         if (!path[depth].extent) {
1458                 ret = EOK;
1459                 goto out;
1460         }
1461
1462         bool in_range = IN_RANGE(from, to_le32(path[depth].extent->first_block),
1463                         ext4_ext_get_actual_len(path[depth].extent));
1464
1465         if (!in_range) {
1466                 ret = EOK;
1467                 goto out;
1468         }
1469
1470         /* If we do remove_space inside the range of an extent */
1471         if ((to_le32(path[depth].extent->first_block) < from) &&
1472             (to < to_le32(path[depth].extent->first_block) +
1473                         ext4_ext_get_actual_len(path[depth].extent) - 1)) {
1474
1475                 struct ext4_extent *ex = path[depth].extent, newex;
1476                 int unwritten = ext4_ext_is_unwritten(ex);
1477                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1478                 int32_t len = ext4_ext_get_actual_len(ex);
1479                 ext4_fsblk_t newblock =
1480                         to + 1 - ee_block + ext4_ext_pblock(ex);
1481
1482                 ex->block_count = to_le16(from - ee_block);
1483                 if (unwritten)
1484                         ext4_ext_mark_unwritten(ex);
1485
1486                 ext4_ext_dirty(inode_ref, path + depth);
1487
1488                 newex.first_block = to_le32(to + 1);
1489                 newex.block_count = to_le16(ee_block + len - 1 - to);
1490                 ext4_ext_store_pblock(&newex, newblock);
1491                 if (unwritten)
1492                         ext4_ext_mark_unwritten(&newex);
1493
1494                 ret = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1495                 goto out;
1496         }
1497
1498         i = depth;
1499         while (i >= 0) {
1500                 if (i == depth) {
1501                         struct ext4_extent_header *eh;
1502                         struct ext4_extent *first_ex, *last_ex;
1503                         ext4_lblk_t leaf_from, leaf_to;
1504                         eh = path[i].header;
1505                         ext4_assert(to_le16(eh->entries_count) > 0);
1506                         first_ex = EXT_FIRST_EXTENT(eh);
1507                         last_ex = EXT_LAST_EXTENT(eh);
1508                         leaf_from = to_le32(first_ex->first_block);
1509                         leaf_to = to_le32(last_ex->first_block) +
1510                                   ext4_ext_get_actual_len(last_ex) - 1;
1511                         if (leaf_from < from)
1512                                 leaf_from = from;
1513
1514                         if (leaf_to > to)
1515                                 leaf_to = to;
1516
1517                         ext4_ext_remove_leaf(inode_ref, path, leaf_from,
1518                                         leaf_to);
1519                         ext4_ext_drop_refs(inode_ref, path + i, 0);
1520                         i--;
1521                         continue;
1522                 }
1523
1524                 struct ext4_extent_header *eh;
1525                 eh = path[i].header;
1526                 if (ext4_ext_more_to_rm(path + i, to)) {
1527                         struct ext4_block bh = EXT4_BLOCK_ZERO();
1528                         if (path[i + 1].block.lb_id)
1529                                 ext4_ext_drop_refs(inode_ref, path + i + 1, 0);
1530
1531                         ret = read_extent_tree_block(inode_ref,
1532                                         ext4_idx_pblock(path[i].index),
1533                                         depth - i - 1, &bh, 0);
1534                         if (ret)
1535                                 goto out;
1536
1537                         path[i].p_block =
1538                                         ext4_idx_pblock(path[i].index);
1539                         path[i + 1].block = bh;
1540                         path[i + 1].header = ext_block_hdr(&bh);
1541                         path[i + 1].depth = depth - i - 1;
1542                         if (i + 1 == depth)
1543                                 path[i + 1].extent = EXT_FIRST_EXTENT(
1544                                         path[i + 1].header);
1545                         else
1546                                 path[i + 1].index =
1547                                         EXT_FIRST_INDEX(path[i + 1].header);
1548
1549                         i++;
1550                 } else {
1551                         if (i > 0) {
1552                                 if (!eh->entries_count)
1553                                         ret = ext4_ext_remove_idx(inode_ref, path,
1554                                                         i - 1);
1555                                 else
1556                                         path[i - 1].index++;
1557
1558                         }
1559
1560                         if (i)
1561                                 ext4_block_set(inode_ref->fs->bdev,
1562                                                 &path[i].block);
1563
1564
1565                         i--;
1566                 }
1567
1568         }
1569
1570         /* TODO: flexible tree reduction should be here */
1571         if (path->header->entries_count == 0) {
1572                 /*
1573                  * truncate to zero freed all the tree,
1574                  * so we need to correct eh_depth
1575                  */
1576                 ext_inode_hdr(inode_ref->inode)->depth = 0;
1577                 ext_inode_hdr(inode_ref->inode)->max_entries_count =
1578                     to_le16(ext4_ext_space_root(inode_ref));
1579                 ret = ext4_ext_dirty(inode_ref, path);
1580         }
1581
1582 out:
1583         ext4_ext_drop_refs(inode_ref, path, 0);
1584         free(path);
1585         path = NULL;
1586         return ret;
1587 }
1588
1589 static int ext4_ext_split_extent_at(struct ext4_inode_ref *inode_ref,
1590                                     struct ext4_extent_path **ppath,
1591                                     ext4_lblk_t split, uint32_t split_flag)
1592 {
1593         struct ext4_extent *ex, newex;
1594         ext4_fsblk_t newblock;
1595         ext4_lblk_t ee_block;
1596         int32_t ee_len;
1597         int32_t depth = ext_depth(inode_ref->inode);
1598         int err = EOK;
1599
1600         ex = (*ppath)[depth].extent;
1601         ee_block = to_le32(ex->first_block);
1602         ee_len = ext4_ext_get_actual_len(ex);
1603         newblock = split - ee_block + ext4_ext_pblock(ex);
1604
1605         if (split == ee_block) {
1606                 /*
1607                  * case b: block @split is the block that the extent begins with
1608                  * then we just change the state of the extent, and splitting
1609                  * is not needed.
1610                  */
1611                 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1612                         ext4_ext_mark_unwritten(ex);
1613                 else
1614                         ext4_ext_mark_initialized(ex);
1615
1616                 err = ext4_ext_dirty(inode_ref, *ppath + depth);
1617                 goto out;
1618         }
1619
1620         ex->block_count = to_le16(split - ee_block);
1621         if (split_flag & EXT4_EXT_MARK_UNWRIT1)
1622                 ext4_ext_mark_unwritten(ex);
1623
1624         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1625         if (err != EOK)
1626                 goto out;
1627
1628         newex.first_block = to_le32(split);
1629         newex.block_count = to_le16(ee_len - (split - ee_block));
1630         ext4_ext_store_pblock(&newex, newblock);
1631         if (split_flag & EXT4_EXT_MARK_UNWRIT2)
1632                 ext4_ext_mark_unwritten(&newex);
1633         err = ext4_ext_insert_extent(inode_ref, ppath, &newex,
1634                                      EXT4_EXT_NO_COMBINE);
1635         if (err != EOK)
1636                 goto restore_extent_len;
1637
1638 out:
1639         return err;
1640 restore_extent_len:
1641         ex->block_count = to_le16(ee_len);
1642         err = ext4_ext_dirty(inode_ref, *ppath + depth);
1643         return err;
1644 }
1645
1646 static int ext4_ext_convert_to_initialized(struct ext4_inode_ref *inode_ref,
1647                                            struct ext4_extent_path **ppath,
1648                                            ext4_lblk_t split, uint32_t blocks)
1649 {
1650         int32_t depth = ext_depth(inode_ref->inode), err = EOK;
1651         struct ext4_extent *ex = (*ppath)[depth].extent;
1652
1653         ext4_assert(to_le32(ex->first_block) <= split);
1654
1655         if (split + blocks ==
1656             to_le32(ex->first_block) + ext4_ext_get_actual_len(ex)) {
1657                 /* split and initialize right part */
1658                 err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1659                                                EXT4_EXT_MARK_UNWRIT1);
1660         } else if (to_le32(ex->first_block) == split) {
1661                 /* split and initialize left part */
1662                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1663                                                EXT4_EXT_MARK_UNWRIT2);
1664         } else {
1665                 /* split 1 extent to 3 and initialize the 2nd */
1666                 err = ext4_ext_split_extent_at(inode_ref, ppath, split + blocks,
1667                                                EXT4_EXT_MARK_UNWRIT1 |
1668                                                    EXT4_EXT_MARK_UNWRIT2);
1669                 if (!err) {
1670                         err = ext4_ext_split_extent_at(inode_ref, ppath, split,
1671                                                        EXT4_EXT_MARK_UNWRIT1);
1672                 }
1673         }
1674
1675         return err;
1676 }
1677
1678 static ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_extent_path *path)
1679 {
1680         int32_t depth;
1681
1682         depth = path->depth;
1683
1684         if (depth == 0 && path->extent == NULL)
1685                 return EXT_MAX_BLOCKS;
1686
1687         while (depth >= 0) {
1688                 if (depth == path->depth) {
1689                         /* leaf */
1690                         if (path[depth].extent &&
1691                             path[depth].extent !=
1692                                 EXT_LAST_EXTENT(path[depth].header))
1693                                 return to_le32(
1694                                     path[depth].extent[1].first_block);
1695                 } else {
1696                         /* index */
1697                         if (path[depth].index !=
1698                             EXT_LAST_INDEX(path[depth].header))
1699                                 return to_le32(
1700                                     path[depth].index[1].first_block);
1701                 }
1702                 depth--;
1703         }
1704
1705         return EXT_MAX_BLOCKS;
1706 }
1707
1708 static int ext4_ext_zero_unwritten_range(struct ext4_inode_ref *inode_ref,
1709                                          ext4_fsblk_t block,
1710                                          uint32_t blocks_count)
1711 {
1712         int err = EOK;
1713         uint32_t i;
1714         uint32_t block_size = ext4_sb_get_block_size(&inode_ref->fs->sb);
1715         for (i = 0; i < blocks_count; i++) {
1716                 struct ext4_block bh = EXT4_BLOCK_ZERO();
1717                 err = ext4_block_get(inode_ref->fs->bdev, &bh, block + i);
1718                 if (err != EOK)
1719                         break;
1720
1721                 memset(bh.data, 0, block_size);
1722                 bh.dirty = true;
1723                 err = ext4_block_set(inode_ref->fs->bdev, &bh);
1724                 if (err != EOK)
1725                         break;
1726         }
1727         return err;
1728 }
1729
1730 int ext4_extent_get_blocks(struct ext4_inode_ref *inode_ref, ext4_fsblk_t iblock,
1731                         uint32_t max_blocks, ext4_fsblk_t *result, bool create,
1732                         uint32_t *blocks_count)
1733 {
1734         struct ext4_extent_path *path = NULL;
1735         struct ext4_extent newex, *ex;
1736         ext4_fsblk_t goal;
1737         int err = EOK;
1738         int32_t depth;
1739         uint32_t allocated = 0;
1740         ext4_fsblk_t next, newblock;
1741
1742         if (result)
1743                 *result = 0;
1744
1745         if (blocks_count)
1746                 *blocks_count = 0;
1747
1748         /* find extent for this block */
1749         err = ext4_find_extent(inode_ref, iblock, &path, 0);
1750         if (err != EOK) {
1751                 path = NULL;
1752                 goto out2;
1753         }
1754
1755         depth = ext_depth(inode_ref->inode);
1756
1757         /*
1758          * consistent leaf must not be empty
1759          * this situations is possible, though, _during_ tree modification
1760          * this is why assert can't be put in ext4_ext_find_extent()
1761          */
1762         ex = path[depth].extent;
1763         if (ex) {
1764                 ext4_lblk_t ee_block = to_le32(ex->first_block);
1765                 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
1766                 uint16_t ee_len = ext4_ext_get_actual_len(ex);
1767                 /* if found exent covers block, simple return it */
1768                 if (IN_RANGE(iblock, ee_block, ee_len)) {
1769                         /* number of remain blocks in the extent */
1770                         allocated = ee_len - (iblock - ee_block);
1771
1772                         if (!ext4_ext_is_unwritten(ex)) {
1773                                 newblock = iblock - ee_block + ee_start;
1774                                 goto out;
1775                         }
1776
1777                         if (!create) {
1778                                 newblock = 0;
1779                                 goto out;
1780                         }
1781
1782                         uint32_t zero_range;
1783                         zero_range = allocated;
1784                         if (zero_range > max_blocks)
1785                                 zero_range = max_blocks;
1786
1787                         newblock = iblock - ee_block + ee_start;
1788                         err = ext4_ext_zero_unwritten_range(inode_ref, newblock,
1789                                         zero_range);
1790                         if (err != EOK)
1791                                 goto out2;
1792
1793                         err = ext4_ext_convert_to_initialized(inode_ref, &path,
1794                                         iblock, zero_range);
1795                         if (err != EOK)
1796                                 goto out2;
1797
1798                         goto out;
1799                 }
1800         }
1801
1802         /*
1803          * requested block isn't allocated yet
1804          * we couldn't try to create block if create flag is zero
1805          */
1806         if (!create) {
1807                 goto out2;
1808         }
1809
1810         /* find next allocated block so that we know how many
1811          * blocks we can allocate without ovelapping next extent */
1812         next = ext4_ext_next_allocated_block(path);
1813         allocated = next - iblock;
1814         if (allocated > max_blocks)
1815                 allocated = max_blocks;
1816
1817         /* allocate new block */
1818         goal = ext4_ext_find_goal(inode_ref, path, iblock);
1819         newblock = ext4_new_meta_blocks(inode_ref, goal, 0, &allocated, &err);
1820         if (!newblock)
1821                 goto out2;
1822
1823         /* try to insert new extent into found leaf and return */
1824         newex.first_block = to_le32(iblock);
1825         ext4_ext_store_pblock(&newex, newblock);
1826         newex.block_count = to_le16(allocated);
1827         err = ext4_ext_insert_extent(inode_ref, &path, &newex, 0);
1828         if (err != EOK) {
1829                 /* free data blocks we just allocated */
1830                 ext4_ext_free_blocks(inode_ref, ext4_ext_pblock(&newex),
1831                                      to_le16(newex.block_count), 0);
1832                 goto out2;
1833         }
1834
1835         /* previous routine could use block we allocated */
1836         newblock = ext4_ext_pblock(&newex);
1837
1838 out:
1839         if (allocated > max_blocks)
1840                 allocated = max_blocks;
1841
1842         if (result)
1843                 *result = newblock;
1844
1845         if (blocks_count)
1846                 *blocks_count = allocated;
1847
1848 out2:
1849         if (path) {
1850                 ext4_ext_drop_refs(inode_ref, path, 0);
1851                 free(path);
1852         }
1853
1854         return err;
1855 }
1856 #endif