nvmap: Improved compaction algorithm implemented

Fast compaction algorithm: for every free block from the beginning of CO, try to reallocate block before and after the free block. Once enough free space for requested allocation is freed, return. Full compaction triggers if fast one fails: it works like fast one but also uses block moving algorithm which doesn't require extra CO memory. It compacts all heap, merging as much free space as possible. Also fixed few bugs. bug 740842 bug 746972 (cherry picked from commit 171edd17a191c8a4a4f9a8434dec796089cda141) Change-Id: I40ec2c49a3c3f37fd7149e8123eda59da4caef39 Reviewed-on: http://git-master/r/10149 Reviewed-by: Kirill Artamonov <kartamonov@nvidia.com> Tested-by: Kirill Artamonov <kartamonov@nvidia.com> Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
author: Kirill Artamonov <kartamonov@nvidia.com> 2010-10-15 12:42:58 +0300
committer: Niket Sirsi <nsirsi@nvidia.com> 2010-11-06 10:05:36 -0800
commit: 86350e7a8c6757f4887ddc5b973b406af1eb9d03 (patch)
tree: 245b372d567f45b4b6a5ce43748e62c5c55f5005
parent: c970c075df97de1e48fa80461e2e52e7446a666f (diff)
1 files changed, 288 insertions, 101 deletions
diff --git a/drivers/video/tegra/nvmap.c b/drivers/video/tegra/nvmap.c
index 6ae55b0d4b8c..f260e1cb9573 100644
--- a/drivers/video/tegra/nvmap.c
+++ b/drivers/video/tegra/nvmap.c
@@ -22,6 +22,8 @@
 
 #define NV_DEBUG 0
 #define NVMAP_DEBUG_FS 1
+#define NVMAP_DEBUG_COMPACTION 0
+
 
 #include <linux/vmalloc.h>
 #include <linux/module.h>
@@ -525,49 +527,28 @@ static int nvmap_carveout_alloc_locked(struct nvmap_carveout_node *n,
 	idx = (idx_last != -1) ? co->block_index : co->free_index;
 
 	while (idx != -1) {
-		size_t end;
 		size_t ljust;
-		size_t rjust;
-		size_t l_max, r_max;
 
 		struct nvmap_mem_block *b = BLOCK(co, idx);
 
-		if (idx == idx_last) {
-			return -1;
-		}
-
 		if (!co_is_free(co, idx)) {
 			goto next;
 		}
 
-		/* try to be a bit more clever about generating block-
-		 * droppings by comparing the results of a left-justified vs
-		 * right-justified block split, and choosing the
-		 * justification style which yields the largest remaining
-		 * block */
-		end = b->base + b->size;
 		ljust = (b->base + align - 1) & ~(align-1);
-		rjust = (end - size) & ~(align-1);
-
-		if (rjust < b->base) rjust = ljust;
-		l_max = max_t(size_t, ljust - b->base, end - (ljust + size));
-		r_max = max_t(size_t, rjust - b->base, end - (rjust + size));
 
 		if (b->base + b->size >= ljust + size) {
-			if (l_max >= r_max) {
-				if (!nvmap_split_block(co,
-						       idx, ljust, size,
-						       align))
-					break;
-			} else {
-				if (!nvmap_split_block(co,
-						       idx, rjust, size,
-						       align))
-					break;
-			}
+			if (!nvmap_split_block(co,
+			      idx, ljust, size,
+			      align))
+			break;
 		}
 
 	next:
+		if (idx == idx_last) {
+			return -1;
+		}
+
 		idx = (idx_last != -1) ? b->next : b->next_free;
 	}
 
@@ -652,6 +633,16 @@ static struct {
 	struct nvmap_file_priv init_data;
 	struct rw_semaphore list_sem;
 	struct list_head heaps;
+
+	/* Compaction stats counters */
+	int compact_kbytes_count;
+	int compact_attempts_count;
+	int fastcompact_count;
+	int fullcompact_count;
+	int fastcompact_fail_count;
+	int fullcompact_fail_count;
+	int relocate_fail_pin_count;
+	int relocate_fail_mem_count;
 } nvmap_context;
 
 static struct vm_operations_struct nvmap_vma_ops = {
@@ -1992,14 +1983,54 @@ static int nvmap_ioctl_getid(struct file *filp, void __user *arg)
 	return -EPERM;
 }
 
+#define NVMAP_NRELOCATE_LIMIT 4096
+
+#if NVMAP_DEBUG_COMPACTION
+
+static void _nvmap_carveout_print_stats(struct nvmap_handle *h,
+        unsigned int heap_type)
+{
+	struct nvmap_carveout_node *n;
+	int co_size_free = 0;
+	int co_size_total = 0;
+	int co_size_largest_free = 0;
+	list_for_each_entry(n, &nvmap_context.heaps, heap_list) {
+		if (heap_type & n->heap_bit) {
+			struct nvmap_carveout* co = &n->carveout;
+			int lfb = _nvmap_carveout_blockstat(co,
+								CARVEOUT_STAT_LARGEST_FREE);
+
+			if (lfb > co_size_largest_free)	{
+				co_size_largest_free = lfb;
+			}
+			co_size_free += _nvmap_carveout_blockstat(co,
+								CARVEOUT_STAT_FREE_SIZE);
+			co_size_total += _nvmap_carveout_blockstat(co,
+								CARVEOUT_STAT_TOTAL_SIZE);
+		}
+	}
+	pr_err("\tTotal CO %dK, Free CO %dK, lfb %dK, lfb*100/free = %d \n",
+			co_size_total >> 10, co_size_free >> 10,
+			co_size_largest_free >> 10,
+			co_size_largest_free / (co_size_free / 100));
+}
+#endif
+
 static int _nvmap_carveout_relocate( struct nvmap_carveout_node *n,
 	struct nvmap_carveout *co, int idx, int idx_last, void *addr_d,
-	void *addr_s, pgprot_t prot)
+	void *addr_s, pgprot_t prot, bool swap)
 {
 	struct nvmap_mem_block *b_d;
 	struct nvmap_mem_block *b_s = BLOCK(co, idx);
 	int idx_relocate;
-	unsigned long offset, size;
+	unsigned long offset, size, src_base;
+	size_t src_align, src_size;
+	struct nvmap_handle *src_handle;
+
+	src_handle 	= b_s->h;
+	src_align 	= b_s->align;
+	src_size	= b_s->size;
+	src_base	= b_s->base;
 
 	spin_lock(&nvmap_handle_lock);
 
@@ -2008,20 +2039,36 @@ static int _nvmap_carveout_relocate( struct nvmap_carveout_node *n,
 	    b_s->mapcount)
 	{
 		spin_unlock(&nvmap_handle_lock);
+		nvmap_context.relocate_fail_pin_count++;
 		return -EINVAL;
 	}
 
-	idx_relocate = nvmap_carveout_alloc_locked(n, co, b_s->align,
-		b_s->size, idx_last);
+	if (swap)
+	{
+		/* nvmap_carveout_free frees at least one spare */
+		/* and enough space for a new allocation */
+		nvmap_carveout_free(co, idx, false);
+	}
+
+	idx_relocate = nvmap_carveout_alloc_locked(n, co, src_align,
+			src_size, idx_last);
 	if (idx_relocate == -1) {
+		if(swap)
+		{
+			pr_err("Compaction ERROR! Block is lost!");
+			BUG_ON(1);
+		}
 		spin_unlock(&nvmap_handle_lock);
+		nvmap_context.relocate_fail_mem_count++;
 		return -ENOMEM;
 	}
 
 	b_d = BLOCK(co, idx_relocate);
 
 	offset = 0;
-	size = b_s->size;
+	size = src_size;
+
+	BUG_ON(b_d->base > src_base);
 
 	while (offset < size) {
 		unsigned long phys_d, phys_s;
@@ -2029,7 +2076,7 @@ static int _nvmap_carveout_relocate( struct nvmap_carveout_node *n,
 		void *dst, *src;
 
 		phys_d = b_d->base + offset;
-		phys_s = b_s->base + offset;
+		phys_s = src_base + offset;
 
 		count = min_t(size_t,
 			      size-offset,
@@ -2038,63 +2085,70 @@ static int _nvmap_carveout_relocate( struct nvmap_carveout_node *n,
 				    PAGE_SIZE-(phys_s&~PAGE_MASK)));
 
 		_nvmap_set_pte_at((unsigned long)addr_d,
-				  __phys_to_pfn(phys_d), prot);
-		_nvmap_set_pte_at((unsigned long)addr_s,
-				  __phys_to_pfn(phys_s), prot);
-
+				__phys_to_pfn(phys_d), prot);
 		dst = addr_d + (phys_d & ~PAGE_MASK);
-		src = addr_s + (phys_s & ~PAGE_MASK);
 
-		memcpy(dst, src, count);
+		_nvmap_set_pte_at((unsigned long)addr_s,
+				__phys_to_pfn(phys_s), prot);
+		src = addr_s + (phys_s & ~PAGE_MASK);
 
+		/* memmove is slower then memcpy, so we use it when moving
+		 * inside single page.
+		 * moving chunks between pages can be done with
+		 * faster memcpy, since pages are guaranteed to come in right order */
+		if (swap && __phys_to_pfn(phys_s) == __phys_to_pfn(phys_d)) {
+			memmove(dst, src, count);
+		}
+		else {
+			memcpy(dst, src, count);
+		}
 		offset += count;
 	}
 
-	b_s->h->carveout.block_idx = idx_relocate;
-	b_s->h->carveout.base = co->blocks[idx_relocate].base;
-	co->blocks[idx_relocate].h = b_s->h;
+	src_handle->carveout.block_idx = idx_relocate;
+	src_handle->carveout.base = co->blocks[idx_relocate].base;
+	co->blocks[idx_relocate].h = src_handle;
 	spin_unlock(&nvmap_handle_lock);
-	nvmap_carveout_free(co, idx, false);
 
-	return 0;
+	nvmap_context.compact_kbytes_count += size >> 10;
+
+	if (!swap)
+	{
+		nvmap_carveout_free(co, idx, false);
+	}
+	return idx_relocate;
 }
 
-#define NVMAP_NRELOCATE_LIMIT 64
 
-static void _nvmap_carveout_do_alloc(struct nvmap_handle *h,
-	unsigned int heap_type, size_t align)
+
+static bool _nvmap_carveout_do_compact(struct nvmap_handle *h,
+		unsigned int heap_type, size_t align,
+		bool compact_minimal, bool use_allocs, bool use_swaps )
 {
+	bool compaction_success = false;
 	struct nvmap_carveout_node *n;
 	pgprot_t prot = _nvmap_flag_to_pgprot(NVMEM_HANDLE_WRITE_COMBINE,
-					      pgprot_kernel);
+			pgprot_kernel);
 	void *addr_d = NULL;
 	void *addr_s = NULL;
 
-	down_read(&nvmap_context.list_sem);
-	list_for_each_entry(n, &nvmap_context.heaps, heap_list) {
-		if (heap_type & n->heap_bit) {
-			struct nvmap_carveout* co = &n->carveout;
-			int idx;
+	int compact_kbytes_count_prev = nvmap_context.compact_kbytes_count;
+	int relocate_fail_mem_count_prev = nvmap_context.relocate_fail_mem_count;
+	int relocate_fail_pin_count_prev = nvmap_context.relocate_fail_pin_count;
+	int relocation_count = 0;
 
-			spin_lock(&co->lock);
-			idx = nvmap_carveout_alloc_locked(n, co, align, h->size, -1);
-			if (idx != -1) {
-				h->carveout.co_heap = co;
-				h->carveout.block_idx = idx;
-				h->carveout.base = co->blocks[idx].base;
-				co->blocks[idx].h = h;
-				h->heap_pgalloc = false;
-				h->alloc = true;
-				spin_unlock(&co->lock);
-				break;
-			}
-			spin_unlock(&co->lock);
-		}
+#if NVMAP_DEBUG_COMPACTION
+	pr_err("Compaction triggered when allocating %dK\n", h->size >> 10);
+	if (compact_minimal) {
+		pr_err("Fast compaction attempt.\n");
 	}
-
-	if (h->alloc) {
-		goto done;
+	else
+	{
+		pr_err("Full compaction attempt.\n");
 	}
+	pr_err("Stats before compaction: \n");
+	_nvmap_carveout_print_stats(h, heap_type);
+#endif
 
 	if (nvmap_map_pte(__phys_to_pfn(0), prot, &addr_d)) {
 		goto fail;
@@ -2110,42 +2164,130 @@ static void _nvmap_carveout_do_alloc(struct nvmap_handle *h,
 		if (heap_type & n->heap_bit) {
 			struct nvmap_carveout* co = &n->carveout;
 			int idx;
-			int nrelocate = 0;
+			int nrelocate =0;
 
 			spin_lock(&co->lock);
 			idx = co->block_index;
-
-			while (idx!=-1 && nrelocate<=NVMAP_NRELOCATE_LIMIT) {
+			while (idx!=-1 && nrelocate <= NVMAP_NRELOCATE_LIMIT) {
+				if (BLOCK(co, co->free_index)->size >= h->size) {
+					compaction_success = true;
+					if (compact_minimal) {
+						break;
+					}
+				}
 				if (co_is_free(co, idx)) {
 					int idx_prev, idx_next;
 
 					idx_prev = BLOCK(co, idx)->prev;
 					idx_next = BLOCK(co, idx)->next;
 
-					if ((idx_prev != -1) &&
-					    !_nvmap_carveout_relocate(n, co,
-						 idx_prev, idx, addr_d,
-						 addr_s, prot))
-					{
-						idx = idx_prev;
-						nrelocate++;
-						continue;
+					if (use_allocs) {
+						if (idx_prev != -1) {
+							if(_nvmap_carveout_relocate(n, co,
+									idx_prev, idx_prev, addr_d,
+									addr_s, prot, false) >= 0) {
+								idx = idx_prev;
+								nrelocate++;
+								relocation_count++;
+								continue;
+							}
+						}
+
+						if(idx_next != -1 && !use_swaps) {
+							if(_nvmap_carveout_relocate(n, co,
+									idx_next, idx, addr_d,
+									addr_s, prot, false) >= 0) {
+								idx = idx_next;
+								nrelocate++;
+								relocation_count++;
+								continue;
+							}
+						}
 					}
 
-					if ((idx_next != -1) &&
-					    !_nvmap_carveout_relocate(n, co,
-						 idx_next, idx, addr_d,
-						 addr_s, prot))
-					{
-						idx = idx_next;
-						nrelocate++;
-						continue;
+					if (use_swaps) {
+						if (idx_next != -1)	{
+							if ( _nvmap_carveout_relocate(n, co,
+									idx_next, idx_next, addr_d,
+									addr_s, prot, true) >= 0) {
+								idx = idx_next;
+								nrelocate++;
+								relocation_count++;
+								continue;
+							}
+						}
 					}
-				}
-
+				} /* endif co is free */
 				idx = co->blocks[idx].next;
-			}
+			} /* end while */
 
+			spin_unlock(&co->lock);
+		}
+	}
+	mutex_unlock(&nvmap_pin_lock);
+
+	fail:
+	if (addr_d) nvmap_unmap_pte(addr_d);
+	if (addr_s) nvmap_unmap_pte(addr_s);
+
+	nvmap_context.compact_attempts_count++;
+
+	if (compaction_success) {
+		if (compact_minimal) {
+			nvmap_context.fastcompact_count++;
+		}
+		else {
+			nvmap_context.fullcompact_count++;
+		}
+
+	}
+	else {
+		if (compact_minimal) {
+			nvmap_context.fastcompact_fail_count++;
+		}
+		else {
+			nvmap_context.fullcompact_fail_count++;
+		}
+	}
+
+#if NVMAP_DEBUG_COMPACTION
+	pr_err("Stats after compaction:\n");
+	pr_err("   Successful relocations count: %d\n",relocation_count);
+	pr_err("   Bytes relocated: %dK\n",
+		nvmap_context.compact_kbytes_count - compact_kbytes_count_prev);
+	pr_err("   Failed reallocs: pinned: %d, OOM: %d\n",
+		nvmap_context.relocate_fail_pin_count - relocate_fail_pin_count_prev,
+		nvmap_context.relocate_fail_mem_count - relocate_fail_mem_count_prev);
+	_nvmap_carveout_print_stats(h, heap_type);
+	pr_err("Total nvmap compaction attempts: %d, moved bytes: %dK \n"
+			"fast compactions: %d full compactions: %d \n"
+			"failed fast compactions: %d failed full compactions: %d \n\n",
+			nvmap_context.compact_attempts_count,
+			nvmap_context.compact_kbytes_count,
+			nvmap_context.fastcompact_count,
+			nvmap_context.fullcompact_count,
+			nvmap_context.fastcompact_fail_count,
+			nvmap_context.fullcompact_fail_count);
+#endif /* NVMAP_DEBUG_COMPACTION */
+
+	return compaction_success;
+}
+
+
+static void _nvmap_carveout_do_alloc(struct nvmap_handle *h,
+	unsigned int heap_type, size_t align)
+{
+	struct nvmap_carveout_node *n;
+	bool enough_free_space = false;
+	down_read(&nvmap_context.list_sem);
+	int free_space = 0;
+
+	list_for_each_entry(n, &nvmap_context.heaps, heap_list) {
+		if (heap_type & n->heap_bit) {
+			struct nvmap_carveout* co = &n->carveout;
+			int idx;
+
+			spin_lock(&co->lock);
 			idx = nvmap_carveout_alloc_locked(n, co, align, h->size, -1);
 			if (idx != -1) {
 				h->carveout.co_heap = co;
@@ -2158,14 +2300,51 @@ static void _nvmap_carveout_do_alloc(struct nvmap_handle *h,
 				break;
 			}
 			spin_unlock(&co->lock);
+
+			/* check if there is enough space to try compaction later */
+			free_space += _nvmap_carveout_blockstat(co, CARVEOUT_STAT_FREE_SIZE);
 		}
 	}
 
-	mutex_unlock(&nvmap_pin_lock);
+	if (h->alloc || free_space < h->size) {
+		goto done;
+	}
 
-fail:
-	if (addr_d) nvmap_unmap_pte(addr_d);
-	if (addr_s) nvmap_unmap_pte(addr_s);
+	/* try fast compaction first */
+	if (!_nvmap_carveout_do_compact(h, heap_type, align,
+			true, /* compact_minimal */
+			true, /* bool use_allocs algorithm */
+			false /* use_swap algorithm */
+	)) {
+		/* do full compaction */
+		_nvmap_carveout_do_compact(h, heap_type, align,
+				false,/* compact_minimal */
+				true, /* bool use_allocs algorithm */
+				true  /* use_swap algorithm */
+		);
+	}
+
+	/* retry allocation */
+	list_for_each_entry(n, &nvmap_context.heaps, heap_list) {
+		if (heap_type & n->heap_bit) {
+			struct nvmap_carveout* co = &n->carveout;
+			int idx;
+
+			spin_lock(&co->lock);
+			idx = nvmap_carveout_alloc_locked(n, co, align, h->size, -1);
+			if (idx != -1) {
+				h->carveout.co_heap = co;
+				h->carveout.block_idx = idx;
+				h->carveout.base = co->blocks[idx].base;
+				co->blocks[idx].h = h;
+				h->heap_pgalloc = false;
+				h->alloc = true;
+				spin_unlock(&co->lock);
+				break;
+			}
+			spin_unlock(&co->lock);
+		}
+	}
 
 done:
 	up_read(&nvmap_context.list_sem);
@@ -2206,8 +2385,8 @@ static int _nvmap_do_alloc(struct nvmap_file_priv *priv,
 	else if ((numpages == 1) &&
 		((heap_mask & (NVMEM_HEAP_CARVEOUT_MASK | NVMEM_HEAP_IOVMM)) !=
 		NVMEM_HEAP_CARVEOUT_IRAM)) {
-		// Non-secure single page iovmm and carveout allocations
-		// should be allowed to go to sysmem
+		/* Non-secure single page iovmm and carveout allocations
+		 * should be allowed to go to sysmem */
 		heap_mask |= NVMEM_HEAP_SYSMEM;
 	}
 
@@ -2946,7 +3125,6 @@ static unsigned int _nvmap_do_get_param(struct nvmap_handle *h,
 
 		return NVMEM_HEAP_SYSMEM;
 	}
-
 	return 0;
 }
 
@@ -3066,6 +3244,15 @@ static int __init nvmap_core_init(void)
 	pte_t *pte;
 	unsigned int i;
 
+	nvmap_context.compact_kbytes_count = 0;
+	nvmap_context.compact_attempts_count = 0;
+	nvmap_context.fastcompact_count = 0;
+	nvmap_context.fullcompact_count = 0;
+	nvmap_context.fastcompact_fail_count = 0;
+	nvmap_context.fullcompact_fail_count = 0;
+	nvmap_context.relocate_fail_pin_count = 0;
+	nvmap_context.relocate_fail_mem_count = 0;
+
 	init_rwsem(&nvmap_context.list_sem);
 	nvmap_context.init_data.handle_refs = RB_ROOT;
 	atomic_set(&nvmap_context.init_data.iovm_commit, 0);
author	Kirill Artamonov <kartamonov@nvidia.com>	2010-10-15 12:42:58 +0300
committer	Niket Sirsi <nsirsi@nvidia.com>	2010-11-06 10:05:36 -0800
commit	86350e7a8c6757f4887ddc5b973b406af1eb9d03 (patch)
tree	245b372d567f45b4b6a5ce43748e62c5c55f5005
parent	c970c075df97de1e48fa80461e2e52e7446a666f (diff)