Linux 核心 tasklet 機制和工作佇列

發表於2016-10-27

1. Tasklet機制分析

上面我們介紹了軟中斷機制，linux核心為什麼還要引入tasklet機制呢？主要原因是軟中斷的pending標誌位也就32位，一般情況是不隨意增加軟中斷處理的。而且核心也沒有提供通用的增加軟中斷的介面。其次內，軟中斷處理函式要求可重入，需要考慮到競爭條件比較多，要求比較高的程式設計技巧。所以核心提供了tasklet這樣的一種通用的機制。

其實每次寫總結的文章，總是想把細節的東西說明白，所以越寫越多。這樣做的好處是能真正理解其中的機制。但是，內容太多的一個壞處就是難道記憶，所以，在講清楚講詳細的同時，我還要把精髓總結出來。Tasklet的特點，也是tasklet的精髓就是：tasklet不能休眠，同一個tasklet不能在兩個CPU上同時執行，但是不同tasklet可能在不同CPU上同時執行，則需要注意共享資料的保護。

主要的資料結構

static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);

static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);

struct tasklet_struct
{
    struct tasklet_struct *next;
    unsigned long state;
    atomic_t count;
    void (*func)(unsigned long);
    unsigned long data;
};

struct tasklet_struct

{

struct tasklet_struct *next;

unsigned long state;

atomic_t count;

void (*func)(unsigned long);

unsigned long data;

};

如何使用tasklet

使用tasklet比較簡單，只需要初始化一個tasklet_struct結構體，然後呼叫tasklet_schedule,就能利用tasklet機制執行初始化的func函式。

static inline void tasklet_schedule(struct tasklet_struct *t)
{
    if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
        __tasklet_schedule(t);
}

static inline void tasklet_schedule(struct tasklet_struct *t)

{

if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))

__tasklet_schedule(t);

}

tasklet_schedule處理過程也比較簡單，就是把tasklet_struct結構體掛到tasklet_vec連結串列或者掛接到tasklet_hi_vec連結串列上，並排程軟中斷TASKLET_SOFTIRQ或者HI_SOFTIRQ

void __tasklet_schedule(struct tasklet_struct *t)
{
    unsigned long flags;local_irq_save(flags);
    t->next = NULL;
    *__get_cpu_var(tasklet_vec).tail = t;
    __get_cpu_var(tasklet_vec).tail = &(t->next);
    raise_softirq_irqoff(TASKLET_SOFTIRQ);
    local_irq_restore(flags);
}

EXPORT_SYMBOL(__tasklet_schedule);

void __tasklet_hi_schedule(struct tasklet_struct *t)
{
    unsigned long flags;
    
    local_irq_save(flags);
    t->next = NULL;
    *__get_cpu_var(tasklet_hi_vec).tail = t;
    __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
    raise_softirq_irqoff(HI_SOFTIRQ);
    local_irq_restore(flags);
}

EXPORT_SYMBOL(__tasklet_hi_schedule);

void __tasklet_schedule(struct tasklet_struct *t)

{

unsigned long flags;local_irq_save(flags);

t->next = NULL;

*__get_cpu_var(tasklet_vec).tail = t;

__get_cpu_var(tasklet_vec).tail = &(t->next);

raise_softirq_irqoff(TASKLET_SOFTIRQ);

local_irq_restore(flags);

}

EXPORT_SYMBOL(__tasklet_schedule);

void __tasklet_hi_schedule(struct tasklet_struct *t)

{

unsigned long flags;

local_irq_save(flags);

t->next = NULL;

*__get_cpu_var(tasklet_hi_vec).tail = t;

__get_cpu_var(tasklet_hi_vec).tail = &(t->next);

raise_softirq_irqoff(HI_SOFTIRQ);

local_irq_restore(flags);

}

EXPORT_SYMBOL(__tasklet_hi_schedule);

Tasklet執行過程

Tasklet_action在軟中斷TASKLET_SOFTIRQ被排程到後會被執行，它從tasklet_vec連結串列中把tasklet_struct結構體都取下來，然後逐個執行。如果t->count的值等於0，說明這個tasklet在排程之後，被disable掉了，所以會將tasklet結構體重新放回到tasklet_vec連結串列，並重新排程TASKLET_SOFTIRQ軟中斷，在之後enable這個tasklet之後重新再執行它。

static void tasklet_action(struct softirq_action *a)
{
    struct tasklet_struct *list;local_irq_disable();
    list = __get_cpu_var(tasklet_vec).head;
    __get_cpu_var(tasklet_vec).head = NULL;
    __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
    local_irq_enable();
    
    while (list)
    {
        struct tasklet_struct *t = list;
        
        list = list->next;
        
        if (tasklet_trylock(t))
        {
            if (!atomic_read(&t->count))
            {
                if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
                    BUG();
                t->func(t->data);
                tasklet_unlock(t);
                continue;
            }
            tasklet_unlock(t);
        }
        
        local_irq_disable();
        t->next = NULL;
        *__get_cpu_var(tasklet_vec).tail = t;
        __get_cpu_var(tasklet_vec).tail = &(t->next);
        __raise_softirq_irqoff(TASKLET_SOFTIRQ);
        local_irq_enable();
    }
}

static void tasklet_action(struct softirq_action *a)

{

struct tasklet_struct *list;local_irq_disable();

list = __get_cpu_var(tasklet_vec).head;

__get_cpu_var(tasklet_vec).head = NULL;

__get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;

local_irq_enable();

while (list)

{

struct tasklet_struct *t = list;

list = list->next;

if (tasklet_trylock(t))

{

if (!atomic_read(&t->count))

{

if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))

BUG();

t->func(t->data);

tasklet_unlock(t);

continue;

}

tasklet_unlock(t);

}

local_irq_disable();

t->next = NULL;

*__get_cpu_var(tasklet_vec).tail = t;

__get_cpu_var(tasklet_vec).tail = &(t->next);

__raise_softirq_irqoff(TASKLET_SOFTIRQ);

local_irq_enable();

}

2. Linux工作佇列

前面已經介紹了tasklet機制，有了tasklet機制為什麼還要增加工作佇列機制呢？我的理解是由於tasklet機制的限制，變形tasklet中的回撥函式有很多的限制，比如不能有休眠的操作等等。而是用工作佇列機制，需要處理的函式在程式上下文中呼叫，休眠操作都是允許的。但是工作佇列的實時性不如tasklet，採用工作佇列的例程可能不能在短時間內被呼叫執行。

資料結構說明

首先需要說明的是workqueue_struct和cpu_workqueue_struct這兩個資料結構，建立一個工作佇列首先需要建立workqueue_struct，然後可以在每個CPU上建立一個cpu_workqueue_struct管理結構體。

struct cpu_workqueue_struct
{
    spinlock_t lock;
    
    struct list_head worklist;
    wait_queue_head_t more_work;
    struct work_struct *current_work;
    
    struct workqueue_struct *wq;
    struct task_struct *thread;
    
    int run_depth;        /* Detect run_workqueue() recursion depth */
} ____cacheline_aligned;

/*
 * The externally visible workqueue abstraction is an array of
 * per-CPU workqueues:
 */
struct workqueue_struct
{
    struct cpu_workqueue_struct *cpu_wq;
    struct list_head list;
    const char *name;
    int singlethread;
    int freezeable;        /* Freeze threads during suspend */
    int rt;
#ifdef CONFIG_LOCKDEP
    struct lockdep_map lockdep_map;
#endif
};

struct cpu_workqueue_struct

{

spinlock_t lock;

struct list_head worklist;

wait_queue_head_t more_work;

struct work_struct *current_work;

struct workqueue_struct *wq;

struct task_struct *thread;

int run_depth; /* Detect run_workqueue() recursion depth */

} ____cacheline_aligned;

* The externally visible workqueue abstraction is an array of

* per-CPU workqueues:

struct workqueue_struct

{

struct cpu_workqueue_struct *cpu_wq;

struct list_head list;

const char *name;

int singlethread;

int freezeable; /* Freeze threads during suspend */

int rt;

#ifdef CONFIG_LOCKDEP

struct lockdep_map lockdep_map;

#endif

};

Work_struct表示將要提交的處理的工作。

struct work_struct
{
    atomic_long_t data;
#define WORK_STRUCT_PENDING 0        /* T if work item pending execution */
#define WORK_STRUCT_FLAG_MASK (3UL)
#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
    struct list_head entry;
    work_func_t func;
#ifdef CONFIG_LOCKDEP
    struct lockdep_map lockdep_map;
#endif
};

struct work_struct

{

atomic_long_t data;

#define WORK_STRUCT_PENDING 0 /* T if work item pending execution */

#define WORK_STRUCT_FLAG_MASK (3UL)

#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)

struct list_head entry;

work_func_t func;

#ifdef CONFIG_LOCKDEP

struct lockdep_map lockdep_map;

#endif

};

上面三個資料結構的關係如下圖所示

介紹主要資料結構的目的並不是想要把工作佇列具體的細節說明白，主要的目的是給大家一個總的架構的輪廓。具體的分析在下面展開。從上面的該模組主要資料結構的關係來看，主要需要分析如下幾個問題：

1. Workqueque是怎樣建立的，包括event/0核心程式的建立

2. Work_queue是如何提交到工作佇列的

3. Event/0核心程式如何處理提交到佇列上的工作

Workqueque的建立

首先申請了workqueue_struct結構體記憶體，cpu_workqueue_struct結構體的記憶體。然後在init_cpu_workqueue函式中對cpu_workqueue_struct結構體進行初始化。同時呼叫create_workqueue_thread函式建立處理工作佇列的核心程式。

create_workqueue_thread中建立瞭如下的核心程式

p = kthread_create(worker_thread, cwq, fmt, wq->name, cpu);

最後呼叫start_workqueue_thread啟動新建立的程式。

struct workqueue_struct *__create_workqueue_key(const char *name,
                                                int singlethread,
                                                int freezeable,
                                                int rt,
                                                struct lock_class_key *key,
                                                const char *lock_name)
{
    struct workqueue_struct *wq;
    struct cpu_workqueue_struct *cwq;
    int err = 0, cpu;wq = kzalloc(sizeof(*wq), GFP_KERNEL);
    if (!wq)
        return NULL;
    
    wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
    if (!wq->cpu_wq)
    {
        kfree(wq);
        return NULL;
    }
    
    wq->name = name;
    lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
    wq->singlethread = singlethread;
    wq->freezeable = freezeable;
    wq->rt = rt;
    INIT_LIST_HEAD(&wq->list);
    
    if (singlethread)
    {
        cwq = init_cpu_workqueue(wq, singlethread_cpu);
        err = create_workqueue_thread(cwq, singlethread_cpu);
        start_workqueue_thread(cwq, -1);
    }
    else
    {
        cpu_maps_update_begin();
        /*
         * We must place this wq on list even if the code below fails.
         * cpu_down(cpu) can remove cpu from cpu_populated_map before
         * destroy_workqueue() takes the lock, in that case we leak
         * cwq[cpu]->thread.
         */
        spin_lock(&workqueue_lock);
        list_add(&wq->list, &workqueues);
        spin_unlock(&workqueue_lock);
        /*
         * We must initialize cwqs for each possible cpu even if we
         * are going to call destroy_workqueue() finally. Otherwise
         * cpu_up() can hit the uninitialized cwq once we drop the
         * lock.
         */
        for_each_possible_cpu(cpu)
        {
            cwq = init_cpu_workqueue(wq, cpu);
            if (err || !cpu_online(cpu))
                continue;
            err = create_workqueue_thread(cwq, cpu);
            start_workqueue_thread(cwq, cpu);
        }
        cpu_maps_update_done();
    }
    
    if (err)
    {
        destroy_workqueue(wq);
        wq = NULL;
    }
    return wq;
}
EXPORT_SYMBOL_GPL(__create_workqueue_key);

struct workqueue_struct *__create_workqueue_key(const char *name,

int singlethread,

int freezeable,

int rt,

struct lock_class_key *key,

const char *lock_name)

{

struct workqueue_struct *wq;

struct cpu_workqueue_struct *cwq;

int err = 0, cpu;wq = kzalloc(sizeof(*wq), GFP_KERNEL);

if (!wq)

return NULL;

wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);

if (!wq->cpu_wq)

{

kfree(wq);

return NULL;

}

wq->name = name;

lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);

wq->singlethread = singlethread;

wq->freezeable = freezeable;

wq->rt = rt;

INIT_LIST_HEAD(&wq->list);

if (singlethread)

{

cwq = init_cpu_workqueue(wq, singlethread_cpu);

err = create_workqueue_thread(cwq, singlethread_cpu);

start_workqueue_thread(cwq, -1);

}

else

{

cpu_maps_update_begin();

* We must place this wq on list even if the code below fails.

* cpu_down(cpu) can remove cpu from cpu_populated_map before

* destroy_workqueue() takes the lock, in that case we leak

* cwq[cpu]->thread.

spin_lock(&workqueue_lock);

list_add(&wq->list, &workqueues);

spin_unlock(&workqueue_lock);

* We must initialize cwqs for each possible cpu even if we

* are going to call destroy_workqueue() finally. Otherwise

* cpu_up() can hit the uninitialized cwq once we drop the

* lock.

for_each_possible_cpu(cpu)

{

cwq = init_cpu_workqueue(wq, cpu);

if (err || !cpu_online(cpu))

continue;

err = create_workqueue_thread(cwq, cpu);

start_workqueue_thread(cwq, cpu);

}

cpu_maps_update_done();

}

if (err)

{

destroy_workqueue(wq);

wq = NULL;

}

return wq;

}

EXPORT_SYMBOL_GPL(__create_workqueue_key);

向工作佇列中新增工作

Shedule_work 函式向工作佇列中新增任務。這個介面比較簡單，無非是一些佇列操作，不再敘述。

/**
 * schedule_work - put work task in global workqueue
 * @work: job to be done
 *
 * This puts a job in the kernel-global workqueue.
 */
int schedule_work(struct work_struct *work)
{
    return queue_work(keventd_wq, work);
}
EXPORT_SYMBOL(schedule_work);

/**

* schedule_work - put work task in global workqueue

* @work: job to be done

* This puts a job in the kernel-global workqueue.

int schedule_work(struct work_struct *work)

{

return queue_work(keventd_wq, work);

}

EXPORT_SYMBOL(schedule_work);

工作佇列核心程式的處理過程

在建立工作佇列的時候，我們建立了一個或者多個程式來處理掛到佇列上的工作。這個核心程式的主要函式體為worker_thread，這個函式比較有意思的地方就是，自己降低的優先順序，說明worker_thread排程的優先順序比較低。在系統負載大大時候，採用工作佇列執行的操作可能存在較大的延遲。

就函式的執行流程來說是真心的簡單，只是從佇列中取出work，從佇列中刪除掉，清除掉pending標記，並執行work設定的回撥函式。

static int worker_thread(void *__cwq)
{
    struct cpu_workqueue_struct *cwq = __cwq;
    DEFINE_WAIT(wait);if (cwq->wq->freezeable)
        set_freezable();
    
    set_user_nice(current, -5);
    
    for (;;)
    {
        prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
        if (!freezing(current) &&
            !kthread_should_stop() &&
            list_empty(&cwq->worklist))
            schedule();
        finish_wait(&cwq->more_work, &wait);
        
        try_to_freeze();
        
        if (kthread_should_stop())
            break;
        
        run_workqueue(cwq);
    }
    
    return 0;
}

static void run_workqueue(struct cpu_workqueue_struct *cwq)
{
    spin_lock_irq(&cwq->lock);
    cwq->run_depth++;
    if (cwq->run_depth > 3)
    {
        /* morton gets to eat his hat */
        printk("%s: recursion depth exceeded: %dn",
               __func__, cwq->run_depth);
        dump_stack();
    }
    while (!list_empty(&cwq->worklist))
    {
        struct work_struct *work = list_entry(cwq->worklist.next,
                                              struct work_struct, entry);
        work_func_t f = work->func;
#ifdef CONFIG_LOCKDEP
        /*
         * It is permissible to free the struct work_struct
         * from inside the function that is called from it,
         * this we need to take into account for lockdep too.
         * To avoid bogus "held lock freed" warnings as well
         * as problems when looking into work->lockdep_map,
         * make a copy and use that here.
         */
        struct lockdep_map lockdep_map = work->lockdep_map;
#endifcwq->current_work = work;
        list_del_init(cwq->worklist.next);
        spin_unlock_irq(&cwq->lock);
        
        BUG_ON(get_wq_data(work) != cwq);
        work_clear_pending(work);
        lock_map_acquire(&cwq->wq->lockdep_map);
        lock_map_acquire(&lockdep_map);
        f(work);
        lock_map_release(&lockdep_map);
        lock_map_release(&cwq->wq->lockdep_map);
        
        if (unlikely(in_atomic() || lockdep_depth(current) > 0))
        {
            printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
                   "%s/0x%08x/%dn",
                   current->comm, preempt_count(),
                   task_pid_nr(current));
            printk(KERN_ERR "    last function: ");
            print_symbol("%sn", (unsigned long)f);
            debug_show_held_locks(current);
            dump_stack();
        }
        
        spin_lock_irq(&cwq->lock);
        cwq->current_work = NULL;
    }
    cwq->run_depth--;
    spin_unlock_irq(&cwq->lock);
}

static int worker_thread(void *__cwq)

{

struct cpu_workqueue_struct *cwq = __cwq;

DEFINE_WAIT(wait);if (cwq->wq->freezeable)

set_freezable();

set_user_nice(current, -5);

for (;;)

{

prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);

if (!freezing(current) &&

!kthread_should_stop() &&

list_empty(&cwq->worklist))

schedule();

finish_wait(&cwq->more_work, &wait);

try_to_freeze();

if (kthread_should_stop())

break;

run_workqueue(cwq);

}

return 0;

}

static void run_workqueue(struct cpu_workqueue_struct *cwq)

{

spin_lock_irq(&cwq->lock);

cwq->run_depth++;

if (cwq->run_depth > 3)

{

/* morton gets to eat his hat */

printk("%s: recursion depth exceeded: %dn",

__func__, cwq->run_depth);

dump_stack();

}

while (!list_empty(&cwq->worklist))

{

struct work_struct *work = list_entry(cwq->worklist.next,

struct work_struct, entry);

work_func_t f = work->func;

#ifdef CONFIG_LOCKDEP

* It is permissible to free the struct work_struct

* from inside the function that is called from it,

* this we need to take into account for lockdep too.

* To avoid bogus "held lock freed" warnings as well

* as problems when looking into work->lockdep_map,

* make a copy and use that here.

struct lockdep_map lockdep_map = work->lockdep_map;

#endifcwq->current_work = work;

list_del_init(cwq->worklist.next);

spin_unlock_irq(&cwq->lock);

BUG_ON(get_wq_data(work) != cwq);

work_clear_pending(work);

lock_map_acquire(&cwq->wq->lockdep_map);

lock_map_acquire(&lockdep_map);

f(work);

lock_map_release(&lockdep_map);

lock_map_release(&cwq->wq->lockdep_map);

if (unlikely(in_atomic() || lockdep_depth(current) > 0))

{

printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "

"%s/0x%08x/%dn",

current->comm, preempt_count(),

task_pid_nr(current));

printk(KERN_ERR " last function: ");

print_symbol("%sn", (unsigned long)f);

debug_show_held_locks(current);

dump_stack();

}

spin_lock_irq(&cwq->lock);

cwq->current_work = NULL;

}

cwq->run_depth--;

spin_unlock_irq(&cwq->lock);

}

linux核心--使用核心佇列實現ringbuffer
2020-11-22
Linux佇列
Softirq和tasklet
2024-04-01
libuv工作佇列
2021-01-29
佇列
linux核心級同步機制--futex
2019-07-29
Linux
Linux核心記憶體保護機制：aslr和canary
2024-12-10
Linux記憶體
如何使用 Laravel 的佇列機制？有哪些場景需要使用佇列 ?
2021-12-03
Laravel佇列
Laravel RabbitMQ 工作佇列
2020-05-12
LaravelMQ佇列
Linux核心機制—smp_hotplug_thread
2024-10-09
Linuxthread
linux核心IDR機制詳解【轉】
2019-03-15
Linux
佇列和迴圈佇列
2020-10-18
佇列
NameNode和SecondaryNameNode工作機制
2020-07-28
【原創】Linux中斷子系統（三）-softirq和tasklet
2020-06-14
Linux
RabbitMQ 入門 - 工作佇列
2018-03-26
MQ佇列
springMVC 的工作原理和機制
2018-03-09
SpringMVC
RabbitMQ訊息佇列（三）：任務分發機制
2018-04-02
MQ佇列
佇列手算到機算入門佇列迴圈佇列
2020-11-23
佇列
直播平臺製作，重試機制和死信佇列的合理運用
2024-01-06
佇列
理解Linux 中sed命令的工作機制
2024-10-29
Linux
rabbitmq confirm機制路由不到佇列無法觸發handleNack
2020-10-28
MQ路由佇列
Android Handler機制之迴圈訊息佇列的退出
2018-09-22
Android佇列
07-主佇列和全域性佇列
2018-12-14
佇列
棧和佇列
2024-11-08
佇列
單機最快的佇列Disruptor解析和使用
2023-04-05
佇列
訊息機制篇——初識訊息與訊息佇列
2022-02-15
佇列
RabbitMQ訊息佇列（九）：Publisher的訊息確認機制
2018-04-02
MQ佇列
容器的工作原理和隔離機制
2018-10-21
Linux核心同步機制之（五）：Read Write spin lock【轉】
2019-03-07
Linux
ETCD核心機制解析
2020-11-04
Java核心反射機制
2021-07-08
Java反射
概述nodejs核心機制
2018-06-11
NodeJS
用redis實現訊息佇列（實時消費+ack機制）
2018-05-03
Redis佇列
【訊息佇列】RabbitMq-宣告佇列與交換機
2024-12-01
佇列MQ
Linux的管道機制和重定向
2018-05-15
Linux
java執行緒池-工作佇列workQueue
2021-09-09
Java執行緒佇列
佇列、阻塞佇列
2018-05-08
佇列
ModStart排程和佇列
2021-12-09
佇列
Chapter 2 棧和佇列
2018-03-25
APT佇列
佇列-單端佇列
2024-04-08
佇列
概述javascript部分核心機制
2018-06-10
JavaScript

Linux 核心 tasklet 機制和工作佇列

相關文章