Linux 記憶體管理: Kmalloc(2)

發表於2015-09-22

上一篇文章中簡單說了下slab分配器下kmalloc是如何分配記憶體的。在看cache_alloc_refill這個函式的時候邏輯上還有一些困惑。

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
                            bool force_refill)
{
    int batchcount;
    struct kmem_list3 *l3;
    struct array_cache *ac;
    int node;

    check_irq_off();
    node = numa_mem_id();
    if (unlikely(force_refill))
        goto force_grow;
retry:
    ac = cpu_cache_get(cachep);
    batchcount = ac->batchcount;  
    if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
        /*
         * If there was little recent activity on this cache, then
         * perform only a partial refill. Otherwise we could generate
         * refill bouncing.
         */
        batchcount = BATCHREFILL_LIMIT;
    }
    l3 = cachep->nodelists[node];

    BUG_ON(ac->avail > 0 || !l3);
    spin_lock(&l3->list_lock);

    /* See if we can refill from the shared array */
    if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) {
        l3->shared->touched = 1;
        goto alloc_done;
    }

    while (batchcount > 0) {
        struct list_head *entry;
        struct slab *slabp;
        /* Get slab alloc is to come from. */
        entry = l3->slabs_partial.next;
        if (entry == &l3->slabs_partial) {
            l3->free_touched = 1;
            entry = l3->slabs_free.next;
            if (entry == &l3->slabs_free)
                goto must_grow;
        }

        slabp = list_entry(entry, struct slab, list);
        check_slabp(cachep, slabp);
        check_spinlock_acquired(cachep);

        /*
         * The slab was either on partial or free list so
         * there must be at least one object available for
         * allocation.
         */
        BUG_ON(slabp->inuse >= cachep->num);

        while (slabp->inuse < cachep->num && batchcount--) {
            STATS_INC_ALLOCED(cachep);
            STATS_INC_ACTIVE(cachep);
            STATS_SET_HIGH(cachep);

            ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
                                    node));
        }
        check_slabp(cachep, slabp);

        /* move slabp to correct slabp list: */
        list_del(&slabp->list);
        if (slabp->free == BUFCTL_END)
            list_add(&slabp->list, &l3->slabs_full);
        else
            list_add(&slabp->list, &l3->slabs_partial);
    }

must_grow:
    l3->free_objects -= ac->avail;
alloc_done:
    spin_unlock(&l3->list_lock);

    if (unlikely(!ac->avail)) {
        int x;
force_grow:
        x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);

        /* cache_grow can reenable interrupts, then ac could change. */
        ac = cpu_cache_get(cachep);
        node = numa_mem_id();

        /* no objects in sight? abort */
        if (!x && (ac->avail == 0 || force_refill))
            return NULL;

        if (!ac->avail)        /* objects refilled by interrupt? */
            goto retry;
    }
    ac->touched = 1;

    return ac_get_obj(cachep, ac, flags, force_refill);
}

100

static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,

bool force_refill)

{

int batchcount;

struct kmem_list3 *l3;

struct array_cache *ac;

int node;

check_irq_off();

node = numa_mem_id();

if (unlikely(force_refill))

goto force_grow;

retry:

ac = cpu_cache_get(cachep);

batchcount = ac->batchcount;

if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {

* If there was little recent activity on this cache, then

* perform only a partial refill. Otherwise we could generate

* refill bouncing.

batchcount = BATCHREFILL_LIMIT;

}

l3 = cachep->nodelists[node];

BUG_ON(ac->avail > 0 || !l3);

spin_lock(&l3->list_lock);

/* See if we can refill from the shared array */

if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) {

l3->shared->touched = 1;

goto alloc_done;

}

while (batchcount > 0) {

struct list_head *entry;

struct slab *slabp;

/* Get slab alloc is to come from. */

entry = l3->slabs_partial.next;

if (entry == &l3->slabs_partial) {

l3->free_touched = 1;

entry = l3->slabs_free.next;

if (entry == &l3->slabs_free)

goto must_grow;

}

slabp = list_entry(entry, struct slab, list);

check_slabp(cachep, slabp);

check_spinlock_acquired(cachep);

* The slab was either on partial or free list so

* there must be at least one object available for

* allocation.

BUG_ON(slabp->inuse >= cachep->num);

while (slabp->inuse < cachep->num && batchcount--) {

STATS_INC_ALLOCED(cachep);

STATS_INC_ACTIVE(cachep);

STATS_SET_HIGH(cachep);

ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,

node));

}

check_slabp(cachep, slabp);

/* move slabp to correct slabp list: */

list_del(&slabp->list);

if (slabp->free == BUFCTL_END)

list_add(&slabp->list, &l3->slabs_full);

else

list_add(&slabp->list, &l3->slabs_partial);

}

must_grow:

l3->free_objects -= ac->avail;

alloc_done:

spin_unlock(&l3->list_lock);

if (unlikely(!ac->avail)) {

int x;

force_grow:

x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);

/* cache_grow can reenable interrupts, then ac could change. */

ac = cpu_cache_get(cachep);

node = numa_mem_id();

/* no objects in sight? abort */

if (!x && (ac->avail == 0 || force_refill))

return NULL;

if (!ac->avail) /* objects refilled by interrupt? */

goto retry;

}

ac->touched = 1;

return ac_get_obj(cachep, ac, flags, force_refill);

}

主要是關於 batchcount = ac->batchcount; 的問題。在預設初始化的時候即在kmem_cache_init中系統的cache都會呼叫到__kmem_cache_create中setup_cpu_cache的有這樣一段程式碼：

cpu_cache_get(cachep)->avail = 0;
    cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
    cpu_cache_get(cachep)->batchcount = 1;
    cpu_cache_get(cachep)->touched = 0;
    cachep->batchcount = 1;
    cachep->limit = BOOT_CPUCACHE_ENTRIES;
    return 0;

cpu_cache_get(cachep)->avail = 0;

cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;

cpu_cache_get(cachep)->batchcount = 1;

cpu_cache_get(cachep)->touched = 0;

cachep->batchcount = 1;

cachep->limit = BOOT_CPUCACHE_ENTRIES;

return 0;

那麼我是不是就可以認為ac->batchcount的值就是1了呢？那麼 ac_put_obj的時候只放一個obj到array中。每次都這樣，那麼在__cache_alloc中

ac = cpu_cache_get(cachep);
    if (likely(ac->avail)) {
        ac->touched = 1;
        objp = ac_get_obj(cachep, ac, flags, false);

        /*
         * Allow for the possibility all avail objects are not allowed
         * by the current flags
         */
        if (objp) {
            STATS_INC_ALLOCHIT(cachep);
            goto out;
        }
        force_refill = true;
    }

ac = cpu_cache_get(cachep);

if (likely(ac->avail)) {

ac->touched = 1;

objp = ac_get_obj(cachep, ac, flags, false);

* Allow for the possibility all avail objects are not allowed

* by the current flags

if (objp) {

STATS_INC_ALLOCHIT(cachep);

goto out;

}

force_refill = true;

}

它的意義又何在呢？因為batchcount為1的話，每次放入一個obj到array 設定avail從0到1，但是get一個obj後，avail又為0了。當然這樣效率很低。
後來才發現是自己程式碼沒看全- -，我們看這樣一段程式碼它在kmem_cache_init初始化後，呼叫的

void __init kmem_cache_init_late(void)
{
    struct kmem_cache *cachep;

    slab_state = UP;

    /* 6) resize the head arrays to their final sizes */
    mutex_lock(&slab_mutex);
    list_for_each_entry(cachep, &slab_caches, list)
        if (enable_cpucache(cachep, GFP_NOWAIT))
            BUG();
    mutex_unlock(&slab_mutex);

    /* Annotate slab for lockdep -- annotate the malloc caches */
    init_lock_keys();

    /* */
    slab_state = FULL;

    /*
     * Register a cpu startup notifier callback that initializes
     * cpu_cache_get for all new cpus
     */
    register_cpu_notifier(&cpucache_notifier);

#ifdef CONFIG_NUMA
    /*
     * Register a memory hotplug callback that initializes and frees
     * nodelists.
     */
    hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
#endif

    /*
     * The reap timers are started later, with a module init call: That part
     * of the kernel is not yet operational.
     */
}

void __init kmem_cache_init_late(void)

{

struct kmem_cache *cachep;

slab_state = UP;

/* 6) resize the head arrays to their final sizes */

mutex_lock(&slab_mutex);

list_for_each_entry(cachep, &slab_caches, list)

if (enable_cpucache(cachep, GFP_NOWAIT))

BUG();

mutex_unlock(&slab_mutex);

/* Annotate slab for lockdep -- annotate the malloc caches */

init_lock_keys();

/* */

slab_state = FULL;

* Register a cpu startup notifier callback that initializes

* cpu_cache_get for all new cpus

register_cpu_notifier(&cpucache_notifier);

#ifdef CONFIG_NUMA

* Register a memory hotplug callback that initializes and frees

* nodelists.

hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);

#endif

* The reap timers are started later, with a module init call: That part

* of the kernel is not yet operational.

}

這個函式就是把slab_caches連結串列上的所有cache都呼叫enable_cpucache(cachep, GFP_NOWAIT)一遍！

/* Called with slab_mutex held always */
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
{
    int err;
    int limit = 0;
    int shared = 0;
    int batchcount = 0;

    if (!is_root_cache(cachep)) {
        struct kmem_cache *root = memcg_root_cache(cachep);
        limit = root->limit;
        shared = root->shared;
        batchcount = root->batchcount;
    }

    if (limit && shared && batchcount)
        goto skip_setup;
    /*
     * The head array serves three purposes:
     * - create a LIFO ordering, i.e. return objects that are cache-warm
     * - reduce the number of spinlock operations.
     * - reduce the number of linked list operations on the slab and
     * bufctl chains: array operations are cheaper.
     * The numbers are guessed, we should auto-tune as described by
     * Bonwick.
     */
    if (cachep->size > 131072)             // size 大一128k 小於page_size 則limit為1
        limit = 1;
    else if (cachep->size > PAGE_SIZE)
        limit = 8;
    else if (cachep->size > 1024)
        limit = 24;
    else if (cachep->size > 256)
        limit = 54;
    else
        limit = 120;

    /*
     * CPU bound tasks (e.g. network routing) can exhibit cpu bound
     * allocation behaviour: Most allocs on one cpu, most free operations
     * on another cpu. For these cases, an efficient object passing between
     * cpus is necessary. This is provided by a shared array. The array
     * replaces Bonwick's magazine layer.
     * On uniprocessor, it's functionally equivalent (but less efficient)
     * to a larger limit. Thus disabled by default.
     */
    shared = 0;
    if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)   //  smp 下 shared為8  ，單核為0 
        shared = 8;

#if DEBUG
    /*
     * With debugging enabled, large batchcount lead to excessively long
     * periods with disabled local interrupts. Limit the batchcount
     */
    if (limit > 32)
        limit = 32;
#endif
    batchcount = (limit + 1) / 2;
skip_setup:
    err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);     //設定 引數值到cache裡 
    if (err)
        printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
         cachep->name, -err);
    return err;
}

/* Called with slab_mutex held always */

static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)

{

int err;

int limit = 0;

int shared = 0;

int batchcount = 0;

if (!is_root_cache(cachep)) {

struct kmem_cache *root = memcg_root_cache(cachep);

limit = root->limit;

shared = root->shared;

batchcount = root->batchcount;

}

if (limit && shared && batchcount)

goto skip_setup;

* The head array serves three purposes:

* - create a LIFO ordering, i.e. return objects that are cache-warm

* - reduce the number of spinlock operations.

* - reduce the number of linked list operations on the slab and

* bufctl chains: array operations are cheaper.

* The numbers are guessed, we should auto-tune as described by

* Bonwick.

if (cachep->size > 131072) // size 大一128k 小於page_size 則limit為1

limit = 1;

else if (cachep->size > PAGE_SIZE)

limit = 8;

else if (cachep->size > 1024)

limit = 24;

else if (cachep->size > 256)

limit = 54;

else

limit = 120;

* CPU bound tasks (e.g. network routing) can exhibit cpu bound

* allocation behaviour: Most allocs on one cpu, most free operations

* on another cpu. For these cases, an efficient object passing between

* cpus is necessary. This is provided by a shared array. The array

* replaces Bonwick's magazine layer.

* On uniprocessor, it's functionally equivalent (but less efficient)

* to a larger limit. Thus disabled by default.

shared = 0;

if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1) // smp 下 shared為8 ，單核為0

shared = 8;

#if DEBUG

* With debugging enabled, large batchcount lead to excessively long

* periods with disabled local interrupts. Limit the batchcount

if (limit > 32)

limit = 32;

#endif

batchcount = (limit + 1) / 2;

skip_setup:

err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp); //設定引數值到cache裡

if (err)

printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",

cachep->name, -err);

return err;

}

對我們看到了limit 、shared 、 batchcount的新初始化.

static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
                int batchcount, int shared, gfp_t gfp)
{
    int ret;
    struct kmem_cache *c = NULL;
    int i = 0;

    ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);   // 設定傳遞進來的cache的東西

    if (slab_state < FULL)
        return ret;

    if ((ret < 0) || !is_root_cache(cachep))
        return ret;

    VM_BUG_ON(!mutex_is_locked(&slab_mutex));
    for_each_memcg_cache_index(i) {
        c = cache_from_memcg(cachep, i);
        if (c)
            /* return value determined by the parent cache only */
            __do_tune_cpucache(c, limit, batchcount, shared, gfp);
    }

    return ret;
}

static int do_tune_cpucache(struct kmem_cache *cachep, int limit,

int batchcount, int shared, gfp_t gfp)

{

int ret;

struct kmem_cache *c = NULL;

int i = 0;

ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp); // 設定傳遞進來的cache的東西

if (slab_state < FULL)

return ret;

if ((ret < 0) || !is_root_cache(cachep))

return ret;

VM_BUG_ON(!mutex_is_locked(&slab_mutex));

for_each_memcg_cache_index(i) {

c = cache_from_memcg(cachep, i);

if (c)

/* return value determined by the parent cache only */

__do_tune_cpucache(c, limit, batchcount, shared, gfp);

}

return ret;

}

而具體實現在

/* Always called with the slab_mutex held */
static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
                int batchcount, int shared, gfp_t gfp)
{
    struct ccupdate_struct *new;
   // 說明一下上面的結構體
   
struct ccupdate_struct {
    struct kmem_cache *cachep;
    struct array_cache *new[0];
};

    int i;

    new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),   //  這個函式用完new就釋放了。說明它只是起到一箇中轉的作用.
         gfp);
    if (!new)
        return -ENOMEM;

    for_each_online_cpu(i) {
        new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
                        batchcount, gfp);
        if (!new->new[i]) {
            for (i--; i >= 0; i--)
                kfree(new->new[i]);
            kfree(new);
            return -ENOMEM;
        }
    }
    new->cachep = cachep;

    on_each_cpu(do_ccupdate_local, (void *)new, 1);   // 關鍵點： 每個cpu上都呼叫do_ccupdate_local處理new。

    check_irq_on();
    cachep->batchcount = batchcount;
    cachep->limit = limit;
    cachep->shared = shared;

    for_each_online_cpu(i) {
        struct array_cache *ccold = new->new[i];
        if (!ccold)
            continue;
        spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
        free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));         //
        spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
        kfree(ccold);
    }
    kfree(new);
    return alloc_kmemlist(cachep, gfp);
}

/* Always called with the slab_mutex held */

static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,

int batchcount, int shared, gfp_t gfp)

{

struct ccupdate_struct *new;

// 說明一下上面的結構體

struct ccupdate_struct {

struct kmem_cache *cachep;

struct array_cache *new[0];

};

int i;

new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *), // 這個函式用完new就釋放了。說明它只是起到一箇中轉的作用.

gfp);

if (!new)

return -ENOMEM;

for_each_online_cpu(i) {

new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,

batchcount, gfp);

if (!new->new[i]) {

for (i--; i >= 0; i--)

kfree(new->new[i]);

kfree(new);

return -ENOMEM;

}

new->cachep = cachep;

on_each_cpu(do_ccupdate_local, (void *)new, 1); // 關鍵點：每個cpu上都呼叫do_ccupdate_local處理new。

check_irq_on();

cachep->batchcount = batchcount;

cachep->limit = limit;

cachep->shared = shared;

for_each_online_cpu(i) {

struct array_cache *ccold = new->new[i];

if (!ccold)

continue;

spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);

free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); //

spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);

kfree(ccold);

}

kfree(new);

return alloc_kmemlist(cachep, gfp);

}

我們就看看do_ccupdate_local做了什麼

static void do_ccupdate_local(void *info)
{
    struct ccupdate_struct *new = info;
    struct array_cache *old;

    check_irq_off();
    old = cpu_cache_get(new->cachep);

    new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];//  由於之前 new->cache已經指向了我們的cache，所以這裡操作的是我們cache的array指向新的地方.
                       // 而new->new這個array的初始化是在申請它的時候 見上個函式裡的alloc_arraycache：

static struct array_cache *alloc_arraycache(int node, int entries,
                     int batchcount, gfp_t gfp)
{
    int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
    struct array_cache *nc = NULL;

    nc = kmalloc_node(memsize, gfp, node);
    /*
     * The array_cache structures contain pointers to free object.
     * However, when such objects are allocated or transferred to another
     * cache the pointers are not cleared and they could be counted as
     * valid references during a kmemleak scan. Therefore, kmemleak must
     * not scan such objects.
     */
    kmemleak_no_scan(nc);
    if (nc) {
        nc->avail = 0;
        nc->limit = entries;
        nc->batchcount = batchcount;
        nc->touched = 0;
        spin_lock_init(&nc->lock);
    }
    return nc;
}

    new->new[smp_processor_id()] = old;               
}

static void do_ccupdate_local(void *info)

{

struct ccupdate_struct *new = info;

struct array_cache *old;

check_irq_off();

old = cpu_cache_get(new->cachep);

new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];// 由於之前 new->cache已經指向了我們的cache，所以這裡操作的是我們cache的array指向新的地方.

// 而new->new這個array的初始化是在申請它的時候見上個函式裡的alloc_arraycache：

static struct array_cache *alloc_arraycache(int node, int entries,

int batchcount, gfp_t gfp)

{

int memsize = sizeof(void *) * entries + sizeof(struct array_cache);

struct array_cache *nc = NULL;

nc = kmalloc_node(memsize, gfp, node);

* The array_cache structures contain pointers to free object.

* However, when such objects are allocated or transferred to another

* cache the pointers are not cleared and they could be counted as

* valid references during a kmemleak scan. Therefore, kmemleak must

* not scan such objects.

kmemleak_no_scan(nc);

if (nc) {

nc->avail = 0;

nc->limit = entries;

nc->batchcount = batchcount;

nc->touched = 0;

spin_lock_init(&nc->lock);

}

return nc;

}

new->new[smp_processor_id()] = old;

}

這樣就和函式cache_alloc_refill接起來了
我們可以看看實際的核心開啟slab的資訊：

cat /proc/slabinfo 
slabinfo - version: 2.1
# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab> : tunables <limit> <batchcount> <sharedfactor> : slabdata <active_slabs> <num_slabs> <sharedavail>
nf_conntrack_expect 0 0 152 26 1 : tunables 120 60 8 : slabdata 0 0 0
nf_conntrack_8050c5f0 2 26 296 13 1 : tunables 54 27 8 : slabdata 2 2 0
bridge_fdb_cache       4     78     48   78    1 : tunables  120   60    8 : slabdata      1      1      0
fib6_nodes            12    113     32  113    1 : tunables  120   60    8 : slabdata      1      1      0
ip6_dst_cache         25     57    208   19    1 : tunables  120   60    8 : slabdata      3      3      0
ip6_mrt_cache          0      0    112   35    1 : tunables  120   60    8 : slabdata      0      0      0
RAWv6                  8     15    720    5    1 : tunables   54   27    8 : slabdata      3      3      0
UDPLITEv6              0      0    688   11    2 : tunables   54   27    8 : slabdata      0      0      0
UDPv6                  3     22    688   11    2 : tunables   54   27    8 : slabdata      2      2      0
tw_sock_TCPv6          0      0    144   27    1 : tunables  120   60    8 : slabdata      0      0      0
request_sock_TCPv6      0      0    112   35    1 : tunables  120   60    8 : slabdata      0      0      0
TCPv6                  5      6   1328    3    1 : tunables   24   12    8 : slabdata      2      2      0
ubi_wl_entry_slab    463    580     24  145    1 : tunables  120   60    8 : slabdata      4      4      0
sd_ext_cdb             2    113     32  113    1 : tunables  120   60    8 : slabdata      1      1      0
fuse_request           0      0    384   10    1 : tunables   54   27    8 : slabdata      0      0      0
fuse_inode             0      0    416    9    1 : tunables   54   27    8 : slabdata      0      0      0
jffs2_inode_cache     15    145     24  145    1 : tunables  120   60    8 : slabdata      1      1      0
jffs2_node_frag      130    290     24  145    1 : tunables  120   60    8 : slabdata      2      2      0
uid_cache              0      0     48   78    1 : tunables  120   60    8 : slabdata      0      0      0
UNIX                  24     32    480    8    1 : tunables   54   27    8 : slabdata      4      4      0
ip_mrt_cache           0      0     96   40    1 : tunables  120   60    8 : slabdata      0      0      0
UDP-Lite               0      0    560    7    1 : tunables   54   27    8 : slabdata      0      0      0
tcp_bind_bucket        6    113     32  113    1 : tunables  120   60    8 : slabdata      1      1      0
inet_peer_cache        8     24    160   24    1 : tunables  120   60    8 : slabdata      1      1      0
ip_fib_trie            7    113     32  113    1 : tunables  120   60    8 : slabdata      1      1      0
ip_fib_alias           8    145     24  145    1 : tunables  120   60    8 : slabdata      1      1      0
ip_dst_cache           6     27    144   27    1 : tunables  120   60    8 : slabdata      1      1      0
PING                   0      0    528    7    1 : tunables   54   27    8 : slabdata      0      0      0
RAW                    4      7    544    7    1 : tunables   54   27    8 : slabdata      1      1      0
UDP                   13     14    560    7    1 : tunables   54   27    8 : slabdata      2      2      0
tw_sock_TCP            0      0    112   35    1 : tunables  120   60    8 : slabdata      0      0      0
request_sock_TCP       0      0     80   48    1 : tunables  120   60    8 : slabdata      0      0      0
TCP                    1      6   1184    6    2 : tunables   24   12    8 : slabdata      1      1      0
......
size-2048(DMA)         0      0   2048    2    1 : tunables   24   12    8 : slabdata      0      0      0
size-2048            192    192   2048    2    1 : tunables   24   12    8 : slabdata     96     96      0
size-1024(DMA)         0      0   1024    4    1 : tunables   54   27    8 : slabdata      0      0      0
size-1024            215    216   1024    4    1 : tunables   54   27    8 : slabdata     54     54      0
size-512(DMA)          0      0    512    8    1 : tunables   54   27    8 : slabdata      0      0      0
size-512             601    624    512    8    1 : tunables   54   27    8 : slabdata     78     78      0
size-256(DMA)          0      0    256   15    1 : tunables  120   60    8 : slabdata      0      0      0
size-256            1234   1245    256   15    1 : tunables  120   60    8 : slabdata     83     83      0
size-192(DMA)          0      0    256   15    1 : tunables  120   60    8 : slabdata      0      0      0
size-192             287    300    256   15    1 : tunables  120   60    8 : slabdata     20     20      0
size-128(DMA)          0      0    128   30    1 : tunables  120   60    8 : slabdata      0      0      0
size-128            1890   1890    128   30    1 : tunables  120   60    8 : slabdata     63     63      0
size-96(DMA)           0      0    128   30    1 : tunables  120   60    8 : slabdata      0      0      0
size-96              930    930    128   30    1 : tunables  120   60    8 : slabdata     31     31      0
size-64(DMA)           0      0    128   30    1 : tunables  120   60    8 : slabdata      0      0      0
size-32(DMA)           0      0    128   30    1 : tunables  120   60    8 : slabdata      0      0      0
size-64             1577   1650    128   30    1 : tunables  120   60    8 : slabdata     55     55      0
size-32             6213   6300    128   30    1 : tunables  120   60    8 : slabdata    210    210      0
kmem_cache           150    160     96   40    1 : tunables  120   60    8 : slabdata      4      4      0

cat /proc/slabinfo

slabinfo - version: 2.1

# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab> : tunables <limit> <batchcount> <sharedfactor> : slabdata <active_slabs> <num_slabs> <sharedavail>

nf_conntrack_expect 0 0 152 26 1 : tunables 120 60 8 : slabdata 0 0 0

nf_conntrack_8050c5f0 2 26 296 13 1 : tunables 54 27 8 : slabdata 2 2 0

bridge_fdb_cache 4 78 48 78 1 : tunables 120 60 8 : slabdata 1 1 0

fib6_nodes 12 113 32 113 1 : tunables 120 60 8 : slabdata 1 1 0

ip6_dst_cache 25 57 208 19 1 : tunables 120 60 8 : slabdata 3 3 0

ip6_mrt_cache 0 0 112 35 1 : tunables 120 60 8 : slabdata 0 0 0

RAWv6 8 15 720 5 1 : tunables 54 27 8 : slabdata 3 3 0

UDPLITEv6 0 0 688 11 2 : tunables 54 27 8 : slabdata 0 0 0

UDPv6 3 22 688 11 2 : tunables 54 27 8 : slabdata 2 2 0

tw_sock_TCPv6 0 0 144 27 1 : tunables 120 60 8 : slabdata 0 0 0

request_sock_TCPv6 0 0 112 35 1 : tunables 120 60 8 : slabdata 0 0 0

TCPv6 5 6 1328 3 1 : tunables 24 12 8 : slabdata 2 2 0

ubi_wl_entry_slab 463 580 24 145 1 : tunables 120 60 8 : slabdata 4 4 0

sd_ext_cdb 2 113 32 113 1 : tunables 120 60 8 : slabdata 1 1 0

fuse_request 0 0 384 10 1 : tunables 54 27 8 : slabdata 0 0 0

fuse_inode 0 0 416 9 1 : tunables 54 27 8 : slabdata 0 0 0

jffs2_inode_cache 15 145 24 145 1 : tunables 120 60 8 : slabdata 1 1 0

jffs2_node_frag 130 290 24 145 1 : tunables 120 60 8 : slabdata 2 2 0

uid_cache 0 0 48 78 1 : tunables 120 60 8 : slabdata 0 0 0

UNIX 24 32 480 8 1 : tunables 54 27 8 : slabdata 4 4 0

ip_mrt_cache 0 0 96 40 1 : tunables 120 60 8 : slabdata 0 0 0

UDP-Lite 0 0 560 7 1 : tunables 54 27 8 : slabdata 0 0 0

tcp_bind_bucket 6 113 32 113 1 : tunables 120 60 8 : slabdata 1 1 0

inet_peer_cache 8 24 160 24 1 : tunables 120 60 8 : slabdata 1 1 0

ip_fib_trie 7 113 32 113 1 : tunables 120 60 8 : slabdata 1 1 0

ip_fib_alias 8 145 24 145 1 : tunables 120 60 8 : slabdata 1 1 0

ip_dst_cache 6 27 144 27 1 : tunables 120 60 8 : slabdata 1 1 0

PING 0 0 528 7 1 : tunables 54 27 8 : slabdata 0 0 0

RAW 4 7 544 7 1 : tunables 54 27 8 : slabdata 1 1 0

UDP 13 14 560 7 1 : tunables 54 27 8 : slabdata 2 2 0

tw_sock_TCP 0 0 112 35 1 : tunables 120 60 8 : slabdata 0 0 0

request_sock_TCP 0 0 80 48 1 : tunables 120 60 8 : slabdata 0 0 0

TCP 1 6 1184 6 2 : tunables 24 12 8 : slabdata 1 1 0

......

size-2048(DMA) 0 0 2048 2 1 : tunables 24 12 8 : slabdata 0 0 0

size-2048 192 192 2048 2 1 : tunables 24 12 8 : slabdata 96 96 0

size-1024(DMA) 0 0 1024 4 1 : tunables 54 27 8 : slabdata 0 0 0

size-1024 215 216 1024 4 1 : tunables 54 27 8 : slabdata 54 54 0

size-512(DMA) 0 0 512 8 1 : tunables 54 27 8 : slabdata 0 0 0

size-512 601 624 512 8 1 : tunables 54 27 8 : slabdata 78 78 0

size-256(DMA) 0 0 256 15 1 : tunables 120 60 8 : slabdata 0 0 0

size-256 1234 1245 256 15 1 : tunables 120 60 8 : slabdata 83 83 0

size-192(DMA) 0 0 256 15 1 : tunables 120 60 8 : slabdata 0 0 0

size-192 287 300 256 15 1 : tunables 120 60 8 : slabdata 20 20 0

size-128(DMA) 0 0 128 30 1 : tunables 120 60 8 : slabdata 0 0 0

size-128 1890 1890 128 30 1 : tunables 120 60 8 : slabdata 63 63 0

size-96(DMA) 0 0 128 30 1 : tunables 120 60 8 : slabdata 0 0 0

size-96 930 930 128 30 1 : tunables 120 60 8 : slabdata 31 31 0

size-64(DMA) 0 0 128 30 1 : tunables 120 60 8 : slabdata 0 0 0

size-32(DMA) 0 0 128 30 1 : tunables 120 60 8 : slabdata 0 0 0

size-64 1577 1650 128 30 1 : tunables 120 60 8 : slabdata 55 55 0

size-32 6213 6300 128 30 1 : tunables 120 60 8 : slabdata 210 210 0

kmem_cache 150 160 96 40 1 : tunables 120 60 8 : slabdata 4 4 0

或許你看ubuntu系統的時候發現limit batchcount值為0 ，其實它是用了slub分配器.在slub.c中

void __init kmem_cache_init_late(void)
{
}

void __init kmem_cache_init_late(void)

{

}

這裡順便說明一下關於slab、slub、slob的簡單區別：（具體如何實現的請參考核心程式碼slab.c /slub.c/slob.c）
slab是slub和slob的基礎。

SLOB的目標是針對嵌入式系統的，主要是適用於那些記憶體非常有限的系統，比如32MB以下的記憶體，它不太注重large smp系統，雖然最近在這方面有一些小的改進

SLUB allocator，用於替代 slab 程式碼。通過取消了大量的佇列和相關開銷、簡化 slab 的結構，SLUB 承諾提供更好的效能和更好的系統可伸縮性，並且可以同時保持現有的 slab 分配器介面

說了這麼多，我們用個圖來簡單描述下slab機制：

Linux 記憶體管理: Kmalloc
2015-09-22
Linux記憶體
Linux 記憶體管理 pt.2
2023-05-05
Linux記憶體
Linux 記憶體管理：記憶體對映
2015-09-24
Linux記憶體
linux記憶體管理
2014-07-24
Linux記憶體
LINUX 記憶體管理
2010-07-06
Linux記憶體
linux的記憶體管理
2007-03-15
Linux記憶體
Linux記憶體管理：Vmalloc
2015-09-23
Linux記憶體
Linux記憶體管理：Malloc
2015-09-24
Linux記憶體
Linux記憶體管理：DMA
2015-09-25
Linux記憶體
2 Day DBA-管理Oracle例項-管理記憶體-修改記憶體設定-自動記憶體管理
2014-01-27
Oracle記憶體
記憶體管理記憶體管理概述
2020-11-03
記憶體
2 Day DBA-管理Oracle例項-管理記憶體-關於記憶體管理
2014-01-27
Oracle記憶體
Linux共享記憶體的管理
2018-06-07
Linux記憶體
Linux中的記憶體管理
2013-12-19
Linux記憶體
linux記憶體管理機制
2006-11-12
Linux記憶體
自動共享記憶體管理自動記憶體管理手工記憶體管理
2017-11-20
記憶體
Linux記憶體洩露案例分析和記憶體管理分享
2022-11-16
Linux記憶體洩露
記憶體管理篇——實體記憶體的管理
2022-02-23
記憶體
Linux的記憶體分頁管理
2020-03-26
Linux記憶體
Linux 的記憶體分頁管理
2018-08-08
Linux記憶體
Linux 記憶體管理 pt.3
2023-05-17
Linux記憶體
Linux-記憶體和磁碟管理
2022-02-14
Linux記憶體
Linux 記憶體管理 pt.1
2023-04-27
Linux記憶體
Linux C面試題（記憶體管理）
2014-07-01
Linux面試題記憶體
Linux 記憶體區管理 slab
2024-04-26
Linux記憶體
記憶體管理
2016-12-19
記憶體
記憶體管理兩部曲之實體記憶體管理
2021-05-22
記憶體
Go：記憶體管理與記憶體清理
2020-08-04
Go記憶體
Java的記憶體 -JVM 記憶體管理
2018-08-20
Java記憶體JVM
Aerospike的bin記憶體管理--即列記憶體管理
2017-12-01
ROS記憶體
Linux 管理員手冊(4)--記憶體管理(轉)
2007-08-10
Linux記憶體
MIT6.828 Lab2 記憶體管理
2021-06-26
MIT記憶體
Linux記憶體管理複習總結
2017-02-28
Linux記憶體
Linux堆記憶體管理深入分析
2017-02-02
Linux記憶體
淺談Linux記憶體管理機制
2017-02-09
Linux記憶體
Linux 記憶體管理：Kmem_cache_init
2015-09-23
Linux記憶體
【Linux】深入理解Linux中記憶體管理
2014-01-04
Linux記憶體
XV6學習筆記(2) :記憶體管理
2021-08-18
筆記記憶體

Linux 記憶體管理: Kmalloc(2)

相關文章