memleak: expand allocator coverage #1214

i-rinat · 2017-06-03T23:46:40Z

Currently, memleak only handles malloc()/free() and __kmalloc()/kfree() functions. However, there are more ways to allocate memory in both kernel and user space.

In addition to malloc(), it's also worth to handle calloc(), realloc(), posix_memalign(), valloc(), pvalloc(), and memalign(). C11 also adds aligned_alloc(), which is handled in the patchset too.

Kernel mode allocations are a bit more sophisticated. There are not only malloc-like allocation functions. It's also possible to allocate and free whole pages. Fortunately, all of the ways have corresponding tracepoints, so they are used for tracking. To avoid page to address conversions, page frame numbers are used as addresses in "allocs" hashtable. They tend to be relatively small numbers, while kmalloc-and-the-like tend to operate in upper part of the address space. So clashes are extremely unlikely.

Kernel tends to allocate a lot, and to keep that allocations as caches. To handle that, patchset also increases hashtables capacity, and introduces another mode, which decreases kernel-userspace traffic by performing allocation statistics calculations immediately in the kernel.

i-rinat · 2017-06-03T23:51:22Z

Here is leaking userspace app:

#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <unistd.h>

static void
generate_leak(int kind, int amount)
{
    void *ptr = NULL;
    
    switch (kind) {
    case 1:
        printf("leaking via malloc, %p\n", malloc(amount));
        break;
    
    case 2:
        printf("leaking via calloc, %p\n", calloc(amount, 1));
        break;
    
    case 3:
        printf("leaking via realloc, %p\n", realloc(malloc(10), amount));
        break;
    
    case 4:
        posix_memalign(&ptr, 512, amount);
        printf("leaking via posix_memalign, %p\n", ptr);
        break;
    
    case 5:
        printf("leaking via valloc, %p\n", valloc(amount));
        break;
    
    case 6:
        printf("leaking via memalign, %p\n", memalign(512, amount));
        break;
    
    case 7:
        printf("leaking via pvalloc, %p\n", pvalloc(amount));
        break;
    
    case 8:
        printf("leaking via aligned_alloc, %p\n", aligned_alloc(512, amount));
        break;
    }
}

static int
max_int(int a, int b)
{
    return a > b ? a : b;
}

int
main(int argc, char *argv[])
{
    if (argc < 2) {
        printf("usage: leak-userspace <kind-of-leak> [amount] [seconds]\n");
        return EXIT_SUCCESS;
    }
    
    int kind = atoi(argv[1]);
    printf("pid = %d\n", (int)getpid());
    
    if (kind < 1 || kind > 8) {
        printf("unsupported leak kind: %d\n", kind);
        return EXIT_FAILURE;
    }

    int amount = 30;
    if (argc > 2)
        amount = max_int(1, atoi(argv[2]));

    int duration = 1000;
    if (argc > 3)
        duration = max_int(1, atoi(argv[3]));

    for (int k = 0; k < duration; k++) {
        generate_leak(kind, amount);
        sleep(1);
    }

    return EXIT_SUCCESS;
}

and leaking kernel module:

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>

static struct proc_dir_entry *leak_proc_entry;
static struct kmem_cache *leak_cache;
static void *ptr;

static ssize_t
leak_proc_write(struct file *filp, const char __user *buf, size_t count,
                loff_t *offp)
{
	char tmp_buf[20];
	size_t sz = min(count, sizeof(tmp_buf) - 1);
	u32 kind;
	int k;

	if (copy_from_user(tmp_buf, buf, sz) != 0) {
		printk(KERN_INFO "ri: proc write failed\n");
		return -1;
	}

	tmp_buf[sz] = '\0';

	if (kstrtou32(tmp_buf, 10, &kind) != 0) {
		printk(KERN_INFO "ri: not a number: %s\n", tmp_buf);
		return count;
	}

	switch (kind) {
	case 1:
		for (k = 0; k < 1000; k++)
			ptr = kmalloc(17000, GFP_KERNEL);
		printk(KERN_INFO "ri: leaking via kmalloc, %p\n", ptr);
		break;

	case 2:
		for (k = 0; k < 1000; k++)
			ptr = kmem_cache_alloc(leak_cache, GFP_KERNEL);
		printk(KERN_INFO "ri: leaking via kmem_cache_alloc, %p\n", ptr);
		break;

	case 3:
		for (k = 0; k < 1000; k++)
			ptr = (void *)__get_free_pages(GFP_ATOMIC, 2);
		printk(KERN_INFO "ri: leaking via __get_free_pages, %p\n", ptr);
		break;

	default:
		printk(KERN_INFO "ri: unknown leak kind %d\n", kind);
		break;
	}
	return count;
}

struct file_operations proc_fops = {
	.write = leak_proc_write,
};

static int __init
leak_module_init(void)
{
	printk(KERN_INFO "ri: leak module load\n");
	leak_proc_entry = proc_create("create_leak", 0666, NULL, &proc_fops);
	if (!leak_proc_entry) {
		printk(KERN_INFO "ri: can't create proc entry\n");
	}

	leak_cache = kmem_cache_create("leak-cache", 16380, 0, 0, NULL);
	return 0;
}

static void __exit
leak_module_exit(void)
{
	printk(KERN_INFO "ri: leak module unload\n");
	proc_remove(leak_proc_entry);
	kmem_cache_destroy(leak_cache);
}

module_init(leak_module_init);
module_exit(leak_module_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("ri");
MODULE_DESCRIPTION("Generates various kinds of memory leaks.");

goldshtn

Thank you for this! It looks like a very comprehensive contribution. Please note that each tool comes with a man page (man/man8/memleak.8 in this case) and an examples file (tools/memleak_example.txt), which should be updated to reflect the new features and explain which allocation APIs are now traced. Also, using tracepoints restricts this tool to Linux 4.7, which should be mentioned explicitly (I believe the current version would work on 4.6).

Lastly, it looks like you have a very nice test case -- it would be great to see an automatic test, at least for the user-space case, to make sure we have all the new features covered.

goldshtn · 2017-06-04T08:28:38Z

tools/memleak.py

+
+ existing_cinfo = combined_allocs.lookup(&stack_id);
+ if (existing_cinfo != 0)
+ cinfo = *existing_cinfo;


I usually prefer to do something like this, to avoid the struct copy:

struct val *valp, val; valp = map.lookup(&key); if (!valp) valp = &val; ... map.update(&key, &valp);

goldshtn · 2017-06-04T08:29:27Z

tools/memleak.py

+
+ existing_cinfo = combined_allocs.lookup(&stack_id);
+ if (existing_cinfo != 0)
+ cinfo = *existing_cinfo;


Same comment.

First comment disappeared, so I'll write here. Comment was about using pointers to avoid structure copying.

I can't figure out how to change code to do that.
What code does is taking existing hash table entry, updating its fields and reinserting. Currently I use cinfo structure initialized to zeroes, and copy over it the value which I get from hash table. Then cinfo is changed.

If I'm to use pointer which will point either to hash table entry, if it exists, or to separate zeroed structure instance, there will be cases where I need to write to the hash table internal memory. And that code fails to verify. I must use another variable anyway. So, the same structure copying.

Is there another way I'm not seeing?

goldshtn · 2017-06-04T08:32:34Z

[buildbot, add to whitelist]

goldshtn · 2017-06-12T09:58:40Z

@i-rinat Ping?

i-rinat · 2017-06-12T13:39:47Z

Sorry, tough week. I'm still going to make fixes according your comments, make corresponding documentation changes, and a test. But it could take a while.

i-rinat · 2017-06-19T19:38:03Z

Updated commits to include corresponding man page and memleak_example.txt. And also a comment describing why tracepoints are used, not kprobes. I also had difficulties with structure copying avoiding comment (see detailed comment above).

I haven't done the test yet. Going to do that next.

i-rinat · 2017-06-20T18:26:07Z

Oops, I broke it somewhere during editing. Lost handling of free().

i-rinat · 2017-06-20T18:34:25Z

Now free is intercepted again, here: b619fff#diff-8f4d7250666bb397c11fd11fdc7b994fR335
Sorry for the noise.

goldshtn · 2017-06-28T06:25:20Z

@i-rinat I'm waiting with the final review until you say it's done -- I thought you were planning to add a test? Thanks!

i-rinat · 2017-06-29T22:32:31Z

I thought you were planning to add a test?

Yes, I do. It's just I'm slow.

i-rinat · 2017-07-08T18:48:18Z

Added the test for the memleak.py. It compiles an instance of leaking application, and checks that the application leaks expected amount of memory.

~~I finished changing code in this iteration. Waiting for comments.~~

i-rinat · 2017-07-08T18:57:32Z

Test failed on CI. :-(

goldshtn · 2017-07-09T10:28:14Z

Did you see the error log? You can click through the failures and see the full build log.

i-rinat · 2017-07-09T11:00:23Z

Yes, thanks, I saw it. I even installed Fedora 25 in a VM and reproduced it there.

The bug in the test I wrote is that it relies on delays. Since I want it to detect exact amount of memory leaked, memory is allocated once. But because memleak.py attaches to a process with some delay, synchronization is needed. Just calling sleep(1) was sufficient on my machine, but it's not a reliable way. I think, I need to do synchronizations explicitly.

With large number of outstanding allocations, amount of data passed from kernel becomes large, which slows everything down. This patch calculates allocation statistics inside kernel, allowing user- space part to pull combined statistics data only, thus significantly reducing amount of passed data.

There are a lot of allocations happen in kernel. Default values are not enough to keep up.

i-rinat · 2017-07-09T22:30:53Z

Added kind of synchronization. Is should be more robust now.

Test asks memleak.py to make two reports with a one second interval. Tests waits for the first report. When first report arrives, it's guaranteed that the leaking app is started and memleak.py attached its probes. Then test writes a byte to the stdin, which in turn triggers allocation in the leaking app. And after that, second report is analyzed.

Looks like tests are passing on fc24 and fc25. Test on ubuntu1604 is skipped (requires kernel version 4.6, but Ubuntu 16.04 has 4.4).

goldshtn · 2017-07-11T18:13:57Z

Thank you for this! This makes memleak much more robust, and the coverage of additional allocators is very welcome. Merging now.

i-rinat · 2017-07-11T21:03:49Z

Thanks! And sorry for keeping it lingering for a month.

goldshtn requested changes Jun 4, 2017

View reviewed changes

i-rinat changed the title ~~memleak: expand allocator coverage~~ WIP: memleak: expand allocator coverage Jun 20, 2017

i-rinat changed the title ~~WIP: memleak: expand allocator coverage~~ memleak: expand allocator coverage Jun 20, 2017

i-rinat added 5 commits July 10, 2017 00:58

memleak: handle libc allocation functions other than malloc

d7a4271

memleak: use tracepoints to track kernel allocations

af2ca5f

memleak: increase hashtable capacities

7bff021

There are a lot of allocations happen in kernel. Default values are not enough to keep up.

test: add a test for the memleak tool

d21f563

goldshtn approved these changes Jul 11, 2017

View reviewed changes

goldshtn merged commit 2c1799c into iovisor:master Jul 11, 2017

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

memleak: expand allocator coverage #1214

memleak: expand allocator coverage #1214

i-rinat commented Jun 3, 2017

i-rinat commented Jun 3, 2017

goldshtn left a comment

goldshtn Jun 4, 2017

goldshtn Jun 4, 2017

i-rinat Jun 19, 2017

goldshtn commented Jun 4, 2017

goldshtn commented Jun 12, 2017

i-rinat commented Jun 12, 2017

i-rinat commented Jun 19, 2017

i-rinat commented Jun 20, 2017

i-rinat commented Jun 20, 2017

goldshtn commented Jun 28, 2017

i-rinat commented Jun 29, 2017

i-rinat commented Jul 8, 2017 •

edited

Loading

i-rinat commented Jul 8, 2017

goldshtn commented Jul 9, 2017

i-rinat commented Jul 9, 2017

i-rinat commented Jul 9, 2017

goldshtn commented Jul 11, 2017

i-rinat commented Jul 11, 2017

memleak: expand allocator coverage #1214

memleak: expand allocator coverage #1214

Conversation

i-rinat commented Jun 3, 2017

i-rinat commented Jun 3, 2017

goldshtn left a comment

Choose a reason for hiding this comment

goldshtn Jun 4, 2017

Choose a reason for hiding this comment

goldshtn Jun 4, 2017

Choose a reason for hiding this comment

i-rinat Jun 19, 2017

Choose a reason for hiding this comment

goldshtn commented Jun 4, 2017

goldshtn commented Jun 12, 2017

i-rinat commented Jun 12, 2017

i-rinat commented Jun 19, 2017

i-rinat commented Jun 20, 2017

i-rinat commented Jun 20, 2017

goldshtn commented Jun 28, 2017

i-rinat commented Jun 29, 2017

i-rinat commented Jul 8, 2017 • edited Loading

i-rinat commented Jul 8, 2017

goldshtn commented Jul 9, 2017

i-rinat commented Jul 9, 2017

i-rinat commented Jul 9, 2017

goldshtn commented Jul 11, 2017

i-rinat commented Jul 11, 2017

i-rinat commented Jul 8, 2017 •

edited

Loading