基於雙核的雙系統製作

Peter盼發表於2016-04-11

最近在做一個專案,是基於雙core跑雙linux系統的架構,有點類似於linux裡的SMP概念,分為主CPU和次CPU。首先主CPU把系統跑起來後,由其中的驅動(我們可以叫做loader驅動)去載入另外一個次CPU然後跑次linux系統。也許有些小夥伴會問兩個linux如何在不同的core上跑起來呢,記憶體怎麼訪問,外設怎麼訪問呢。我們這邊可以把記憶體平均分成兩份,一份給主CPU訪問,一份給次CPU訪問。至於外設,我們可以有選擇性的進行分配,相信這個不是什麼難度也不是我要講的重點。廢話少說,下面進入我們的雙核雙系統設計:

首先我們可以把記憶體分為如下空間:


供不同的核心訪問不同的記憶體空間。

下面是雙核雙系統的整體架構設計:


從圖中可以明顯的看出雙系統的關係,boot_module是主系統裡的一個驅動,負責載入次系統的映象。這裡我不做進一步的解釋,如果看不懂的朋友可以給我留言。

我們知道核心啟動的彙編檔案是head.S。裡面的主要工作是初始化CPU,關閉看門狗,初始化堆疊等工作。最終進入解壓後的核心vmlinux。參考head.S可以實現次linux系統映象的彙編初始化:

/*
 * File      : start_gcc.S
 * This file is part of RT-Thread RTOS
 * COPYRIGHT (C) 2013, RT-Thread Development Team
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Change Logs:
 * Date           Author       Notes
 * 2013-07-05     Bernard      the first version
 */
.equ Mode_USR,        0x10
.equ Mode_FIQ,        0x11
.equ Mode_IRQ,        0x12
.equ Mode_SVC,        0x13
.equ Mode_ABT,        0x17
.equ Mode_UND,        0x1B
.equ Mode_SYS,        0x1F
.equ I_Bit,           0x80            //@ when I bit is set, IRQ is disabled
.equ F_Bit,           0x40            //@ when F bit is set, FIQ is disabled
.equ UND_Stack_Size,  0x00000000
.equ SVC_Stack_Size,  0x00000000
.equ ABT_Stack_Size,  0x00000000
.equ FIQ_Stack_Size,  0x00000100
.equ IRQ_Stack_Size,  0x00000100
.equ USR_Stack_Size,  0x00000000
#define ISR_Stack_Size  (UND_Stack_Size + SVC_Stack_Size + ABT_Stack_Size + \
                 FIQ_Stack_Size + IRQ_Stack_Size)
/* stack */
.globl stack_start
.globl stack_top
.bss
stack_start:
.rept ISR_Stack_Size
.long 0
.endr
stack_top:
.text
/* reset entry */
.globl _reset
_reset:
    /* invalidate SCU */
    ldr	r7, =0xF8F0000C
    ldr	r6, =0xFFFF
    str	r6, [r7]
    /* disable MMU */
    mrc	p15, 0, r0, c1, c0, 0		/* read CP15 register 1 */
    bic	r0, r0, #0x1				/* clear bit 0 */
    mcr	p15, 0, r0, c1, c0, 0		/* write value back */
    /* set the cpu to SVC32 mode and disable interrupt */
    mrs     r0, cpsr
    bic     r0, r0, #0x1f
    orr     r0, r0, #0x13
    msr     cpsr_c, r0
    /* setup stack */
    bl      stack_setup
    /* clear .bss */
    mov     r0,#0                   /* get a zero                       */
    ldr     r1,=__bss_start         /* bss start                        */
    ldr     r2,=__bss_end           /* bss end                          */
bss_loop:
    cmp     r1,r2                   /* check if data to clear           */
    strlo   r0,[r1],#4              /* clear 4 bytes                    */
    blo     bss_loop                /* loop until done                  */
    /* call C++ constructors of global objects                          */
    ldr     r0, =__ctors_start__
    ldr     r1, =__ctors_end__
ctor_loop:
    cmp     r0, r1
    beq     ctor_end
    ldr     r2, [r0], #4
    stmfd   sp!, {r0-r1}
    mov     lr, pc
    bx      r2
    ldmfd   sp!, {r0-r1}
    b       ctor_loop
ctor_end:
    /* start RT-Thread Kernel       */
    ldr     pc, _rtthread_startup
_rtthread_startup:
    .word rtthread_startup
stack_setup:
    ldr     r0, =stack_top
    @  Set the startup stack for svc
    mov     sp, r0
    @  Enter Undefined Instruction Mode and set its Stack Pointer
    msr     cpsr_c, #Mode_UND|I_Bit|F_Bit
    mov     sp, r0
    sub     r0, r0, #UND_Stack_Size
    @  Enter Abort Mode and set its Stack Pointer
    msr     cpsr_c, #Mode_ABT|I_Bit|F_Bit
    mov     sp, r0
    sub     r0, r0, #ABT_Stack_Size
    @  Enter FIQ Mode and set its Stack Pointer
    msr     cpsr_c, #Mode_FIQ|I_Bit|F_Bit
    mov     sp, r0
    sub     r0, r0, #FIQ_Stack_Size
    @  Enter IRQ Mode and set its Stack Pointer
    msr     cpsr_c, #Mode_IRQ|I_Bit|F_Bit
    mov     sp, r0
    sub     r0, r0, #IRQ_Stack_Size
    @  Switch back to SVC
    msr     cpsr_c, #Mode_SVC|I_Bit|F_Bit
    bx      lr
.section .text.isr, "ax"
/* exception handlers: undef, swi, padt, dabt, resv, irq, fiq          */
    .align  5
.globl vector_fiq
vector_fiq:
    stmfd   sp!,{r0-r7,lr}
    bl      rt_hw_trap_fiq
    ldmfd   sp!,{r0-r7,lr}
    subs    pc,lr,#4
.globl      rt_interrupt_enter
.globl      rt_interrupt_leave
.globl      rt_thread_switch_interrupt_flag
.globl      rt_interrupt_from_thread
.globl      rt_interrupt_to_thread
    .align  5
.globl vector_irq
vector_irq:
    stmfd   sp!, {r0-r12,lr}
    bl      rt_interrupt_enter
    bl      rt_hw_trap_irq
    bl      rt_interrupt_leave
    @ if rt_thread_switch_interrupt_flag set, jump to
    @ rt_hw_context_switch_interrupt_do and don't return
    ldr     r0, =rt_thread_switch_interrupt_flag
    ldr     r1, [r0]
    cmp     r1, #1
    beq rt_hw_context_switch_interrupt_do
    ldmfd   sp!, {r0-r12,lr}
    subs    pc, lr, #4
rt_hw_context_switch_interrupt_do:
    mov     r1,  #0         @ clear flag
    str     r1,  [r0]
    mov     r1, sp          @ r1 point to {r0-r3} in stack
    add     sp, sp, #4*4
    ldmfd   sp!, {r4-r12,lr}@ reload saved registers
    mrs     r0,  spsr       @ get cpsr of interrupt thread
    sub     r2,  lr, #4     @ save old task's pc to r2
    @ Switch to SVC mode with no interrupt.
    msr     cpsr_c, #I_Bit|F_Bit|Mode_SVC
    stmfd   sp!, {r2}       @ push old task's pc
    stmfd   sp!, {r4-r12,lr}@ push old task's lr,r12-r4
    ldmfd   r1,  {r1-r4}    @ restore r0-r3 of the interrupt thread
    stmfd   sp!, {r1-r4}    @ push old task's r0-r3
    stmfd   sp!, {r0}       @ push old task's cpsr
    ldr     r4,  =rt_interrupt_from_thread
    ldr     r5,  [r4]
    str     sp,  [r5]       @ store sp in preempted tasks's TCB
    ldr     r6,  =rt_interrupt_to_thread
    ldr     r7,  [r6]
    ldr     sp,  [r7]       @ get new task's stack pointer
    ldmfd   sp!, {r4}       @ pop new task's cpsr to spsr
    msr     spsr_cxsf, r4
    ldmfd   sp!, {r0-r12,lr,pc}^ @ pop new task's r0-r12,lr & pc, copy spsr to cpsr
.macro push_svc_reg
    sub     sp, sp, #17 * 4         @/* Sizeof(struct rt_hw_exp_stack)  */
    stmia   sp, {r0 - r12}          @/* Calling r0-r12                  */
    mov     r0, sp
    mrs     r6, spsr                @/* Save CPSR                       */
    str     lr, [r0, #15*4]         @/* Push PC                         */
    str     r6, [r0, #16*4]         @/* Push CPSR                       */
    cps     #Mode_SVC
    str     sp, [r0, #13*4]         @/* Save calling SP                 */
    str     lr, [r0, #14*4]         @/* Save calling PC                 */
.endm
    .align  5
    .globl	vector_swi
vector_swi:
    push_svc_reg
    bl      rt_hw_trap_swi
    b       .
    .align  5
    .globl	vector_undef
vector_undef:
    push_svc_reg
    bl      rt_hw_trap_undef
    b       .
    .align  5
    .globl	vector_pabt
vector_pabt:
    push_svc_reg
    bl      rt_hw_trap_pabt
    b       .
    .align  5
    .globl	vector_dabt
vector_dabt:
    push_svc_reg
    bl      rt_hw_trap_dabt
    b       .
    .align  5
    .globl	vector_resv
vector_resv:
    push_svc_reg
    bl      rt_hw_trap_resv
    b       .

這裡有個地方需要注意的是在記憶體地址連結的時候有個重定位的概念,由lds檔案來重定位,定位後的結果可以看system.map檔案。我們的設計思路是有寫個 載入映象的驅動loader.ko,該驅動負責載入編寫好的核心映象到指定記憶體位置,然後設定次核啟動次CPU進而啟動次linux系統。下面貼出驅動程式碼loader.c

#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/cpu.h>
#include <linux/memblock.h>
#include <asm/cacheflush.h>
#include <linux/dw_apb_timer.h>
#include <linux/of_irq.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/of_net.h>
#include <linux/stmmac.h>
#include <linux/phy.h>
#include <linux/micrel_phy.h>
#include <linux/sys_soc.h>

#include <asm/hardware/cache-l2x0.h>
#include <asm/hardware/gic.h>
#include <asm/mach/arch.h>
#include <asm/mach/map.h>
#include <asm/smp_twd.h>

//#define _REG32(addr)        (*(volatile uint32_t *)(addr))

//#define CPU1STARTADDR_REG   _REG32(0xFFD080C4)
//#define MPUMODRST_REG       _REG32(0xFFD05010)

#define RT_BASE_ADDR 0x20000000
#define RT_MEM_SIZE  0x00400000
#define BUFF_SZ		(4 * 1024)

unsigned char __iomem	*membase;

/*static inline void __raw_writel(u32 v, volatile void __iomem *addr)
{
	*(volatile u32 __force *)addr = v;
}*/

static void au_serial_out(unsigned char __iomem	*membase, int offset, int value)
{
	//offset = au_io_out_map[offset] << p->regshift;
	//offset = au_io_out_map[offset];
	__raw_writel(value, membase + offset);
}

void boot_cpu1(uint32_t start_addr)
{
	unsigned long dat;
	unsigned long tmp;
	void __iomem *sys_manager_cpu1start_addr;
	void __iomem *rst_manager_mpu_addr;
	
	printk("^^^^^^^^^^^^^^\n");

	//CPU1STARTADDR_REG = start_addr;
    //MPUMODRST_REG &= ~0x2;
	
	sys_manager_cpu1start_addr = ioremap(0xFFD080C4, 0x4000);

	rst_manager_mpu_addr = ioremap(0xFFD05010, 0x1000);

	printk("cpu1start_addr = 0x%lx\n", sys_manager_cpu1start_addr);
	printk("rst_manager_base_addr = 0x%lx\n", rst_manager_mpu_addr);
    
	__raw_writel(start_addr, sys_manager_cpu1start_addr);
	
	//tmp = __raw_readl(sys_manager_cpu1start_addr);
	//printk(">>>>tmp = 0x%lx\n", tmp);

	dat = __raw_readl(rst_manager_mpu_addr);
	dat &= ~0x2;
    __raw_writel(dat, rst_manager_mpu_addr);
	
	__raw_writel(start_addr, sys_manager_cpu1start_addr);
	dat = __raw_readl(rst_manager_mpu_addr);
	dat &= ~0x2;
    __raw_writel(dat, rst_manager_mpu_addr);
	
	__raw_writel(start_addr, sys_manager_cpu1start_addr);
	dat = __raw_readl(rst_manager_mpu_addr);
	dat &= ~0x2;
    __raw_writel(dat, rst_manager_mpu_addr);
	
	__raw_writel(start_addr, sys_manager_cpu1start_addr);
	dat = __raw_readl(rst_manager_mpu_addr);
	dat &= ~0x2;
    __raw_writel(dat, rst_manager_mpu_addr);
}

int do_load_fw(const char* filename,
        unsigned long base_addr,
                size_t mem_size)
{
    mm_segment_t oldfs = {0};
	ssize_t len;
	unsigned long file_sz;
	loff_t pos = 0;
	struct file *flp = NULL;
	unsigned long buf_ptr = base_addr;

    printk("loading u-boot:%s to %08lx....\n",
               filename, buf_ptr);

    flp = filp_open(filename, O_RDONLY, S_IRWXU);
    if(IS_ERR(flp)) {
        printk("loader: open file failed");
        return -1;
    }

    file_sz = vfs_llseek(flp, 0, SEEK_END);
    if (file_sz > mem_size) {
		printk("rtloader: bin file too big. "
			"mem size: 0x%08x, bin file size: 0x%08lx\n",
			mem_size, file_sz);
		filp_close(flp, NULL);
		return -1;
	}
	printk("loader: bin file size: 0x%08lx\n", file_sz);
	vfs_llseek(flp, 0, SEEK_SET);

	oldfs = get_fs();
	set_fs(get_ds());
	while (file_sz > 0) {
		len = vfs_read(flp, (void __user __force*)buf_ptr, BUFF_SZ, &pos);
		if (len < 0) {
			pr_err("read %08lx error: %d\n", buf_ptr, len);
			set_fs(oldfs);
			filp_close(flp, NULL);
			return -1;
		}
		file_sz -= len;
		buf_ptr += len;
	}
	set_fs(oldfs);

	printk("done!\n");
	
	flush_cache_vmap(base_addr, mem_size);

	return 0;
}

static int __init loader_init(void)
{
	void *va;
	unsigned char __iomem	*membase;
	
	membase = ioremap_nocache(0xffc02000, 0x1000);
	
    //va = ioremap_nocache(RT_BASE_ADDR, RT_MEM_SIZE);

	printk(">>>>>>>>>>>>\n");

	va = phys_to_virt(RT_BASE_ADDR);
    pr_info("get mapping :%p -> %08x, size: %08x\n", va, RT_BASE_ADDR, RT_MEM_SIZE);

    if(do_load_fw("/mnt/boot/boot.bin", (unsigned long)va, RT_MEM_SIZE) == 0){	
		printk("start boot boot.bin......\n");
		boot_cpu1(RT_BASE_ADDR);
		printk("end boot boot.bin......\n");
    }

	/*while(1)
	{
		au_serial_out(membase, 0, 123);
	}*/

    return 0;
}

static void __exit loader_exit(void)
{

}

module_init(loader_init);
module_exit(loader_exit);

MODULE_DESCRIPTION("LOADER");
MODULE_LICENSE("GPL");


相關文章