兩種形式的dma 實現memory copy程式碼

最好不过如今發表於2024-08-21

原文網址 : https://www.cnblogs.com/zhanyaowang/p/18371909

在飛思卡爾的時候，需要用SDMA實現記憶體到記憶體memory copy的功能，需要做兩部分的工作：

1：在DMA controller中加入M2M的支援。

2：寫一個驅動來呼叫DMA controller的M2M功能。

上面的2實際上對於不同的SoC來講，思路是一樣的，有通用性，在這裡總結下。

當時在實現的時候，用了兩種方法：

1：cyclic, 用dma_alloc_coherent分配兩段dma 記憶體空間，一段做src, 一段做dst. 呼叫DMA controller介面來將src中的資料往dst中複製。因為DMA操作的是物理內地址上連續的記憶體空間，dma_alloc_coherent分配不了太大的連續實體地址空間，所以，僅僅能實現小批次資料的M2M複製。

2：sg, 用dma_alloc_coherent分配很多段dma 記憶體空間，一半大小的空間做src,一半大小的空間做dst.透過device_prep_dma_sg來將各自獨立的src/dst空間連結起來。這個，可以將若干段分散的實體地址連結成邏輯上連續的，可以實現較大資料的複製。

順便複習下dma engine的用法：
Linux/Documentation/dmaengine.txt

13 The slave DMA usage consists of following steps:
14 1. Allocate a DMA slave channel
15 2. Set slave and controller specific parameters
16 3. Get a descriptor for transaction
17 4. Submit the transaction
18 5. Issue pending requests and wait for callback notification

20 1. Allocate a DMA slave channel

27 Interface:
28 struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
29 dma_filter_fn filter_fn,
30 void *filter_param);

48 2. Set slave and controller specific parameters

61 Interface:
62 int dmaengine_slave_config(struct dma_chan *chan,
63 struct dma_slave_config *config)

70 3. Get a descriptor for transaction

86 Interface:
87 struct dma_async_tx_descriptor *(*chan->device->device_prep_slave_sg)(
88 struct dma_chan *chan, struct scatterlist *sgl,
89 unsigned int sg_len, enum dma_data_direction direction,
90 unsigned long flags);
91
92 struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)(
93 struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
94 size_t period_len, enum dma_data_direction direction);
95
96 struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)(
97 struct dma_chan *chan, struct dma_interleaved_template *xt,
98 unsigned long flags);

139 4. Submit the transaction

144 Interface:
145 dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc)

153 5. Issue pending DMA requests and wait for callback notification

163 Interface:
164 void dma_async_issue_pending(struct dma_chan *chan);

傳輸結束的時候可以用：

168 1. int dmaengine_terminate_all(struct dma_chan *chan)

看下面程式碼：

1: cyclic方式實現

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mman.h>
#include <linux/init.h>
#include <linux/dma-mapping.h>
#include <linux/fs.h>
#include <linux/version.h>
#include <linux/delay.h>
#include <mach/dma.h>

#include <linux/dmaengine.h>
#include <linux/device.h>

#include <linux/io.h>
#include <linux/delay.h>

static int gMajor; //major number of device
static struct class *dma_tm_class;
static char *wbuf;
static char *rbuf;
static dma_addr_t wpaddr;
static dma_addr_t rpaddr;

struct dma_chan *dma_m2m_chan;

struct completion dma_m2m_ok;

#define SDMA_BUF_SIZE 1024

static bool dma_m2m_filter(struct dma_chan *chan, void *param)
{
if (!imx_dma_is_general_purpose(chan))
return false;
chan->private = param;
return true;
}

int sdma_open(struct inode * inode, struct file * filp)
{
dma_cap_mask_t dma_m2m_mask;
struct imx_dma_data m2m_dma_data = {0};

init_completion(&dma_m2m_ok);

dma_cap_zero(dma_m2m_mask);
dma_cap_set(DMA_SLAVE, dma_m2m_mask);
m2m_dma_data.peripheral_type = IMX_DMATYPE_MEMORY;
m2m_dma_data.priority = DMA_PRIO_HIGH;

dma_m2m_chan = dma_request_channel(dma_m2m_mask, dma_m2m_filter, &m2m_dma_data);
if (!dma_m2m_chan) {
printk("Error opening the SDMA memory to memory channel\n");
return -EINVAL;
}

wbuf = dma_alloc_coherent(NULL, SDMA_BUF_SIZE, &wpaddr, GFP_DMA);
rbuf = dma_alloc_coherent(NULL, SDMA_BUF_SIZE, &rpaddr, GFP_DMA);

return 0;
}

int sdma_release(struct inode * inode, struct file * filp)
{
dma_release_channel(dma_m2m_chan);
dma_m2m_chan = NULL;
dma_free_coherent(NULL, SDMA_BUF_SIZE, wbuf, wpaddr);
dma_free_coherent(NULL, SDMA_BUF_SIZE, rbuf, rpaddr);

return 0;
}

ssize_t sdma_read (struct file *filp, char __user * buf, size_t count, loff_t * offset)
{
int i;

wait_for_completion(&dma_m2m_ok);
for (i=0; i<SDMA_BUF_SIZE; i++) {
printk("src_data_%d = %x\n",i, *(wbuf+i) );
}
for (i=0; i<SDMA_BUF_SIZE; i++) {
printk("dst_data_%d = %x\n",i, *(rbuf+i) );
}

return 0;
}

static void dma_m2m_callback(void *data)
{
printk("in %s\n",__func__);
complete(&dma_m2m_ok);
return ;
}

ssize_t sdma_write(struct file * filp, const char __user * buf, size_t count, loff_t * offset)
{
u32 *index1;
struct dma_slave_config dma_m2m_config;
struct dma_async_tx_descriptor *dma_m2m_desc;
int i;
index1 = wbuf;
for (i=0; i<SDMA_BUF_SIZE; i++) {
*(index1 + i) = 0x12345678;
}

for (i=0; i<SDMA_BUF_SIZE; i++) {
printk("%d : %x\n",i, *(wbuf+i) );
}

dma_m2m_config.direction = DMA_MEM_TO_MEM;
dma_m2m_config.dst_addr = rpaddr;
dma_m2m_config.src_addr = wpaddr;
dma_m2m_config.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
dma_m2m_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
dma_m2m_config.dst_maxburst = 4;
dma_m2m_config.src_maxburst = 4;
dmaengine_slave_config(dma_m2m_chan, &dma_m2m_config);
dma_m2m_desc = dma_m2m_chan->device->device_prep_dma_cyclic(
dma_m2m_chan, NULL, SDMA_BUF_SIZE, SDMA_BUF_SIZE/2, DMA_MEM_TO_MEM);
dma_m2m_desc->callback = dma_m2m_callback;
dmaengine_submit(dma_m2m_desc);
return 0;
}

struct file_operations dma_fops = {
open: sdma_open,
release: sdma_release,
read: sdma_read,
write: sdma_write,
};

int __init sdma_init_module(void)
{
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26))
struct device *temp_class;
#else
struct class_device *temp_class;
#endif
int error;

/* register a character device */
error = register_chrdev(0, "sdma_test", &dma_fops);
if (error < 0) {
printk("SDMA test driver can't get major number\n");
return error;
}
gMajor = error;
printk("SDMA test major number = %d\n",gMajor);

dma_tm_class = class_create(THIS_MODULE, "sdma_test");
if (IS_ERR(dma_tm_class)) {
printk(KERN_ERR "Error creating sdma test module class.\n");
unregister_chrdev(gMajor, "sdma_test");
return PTR_ERR(dma_tm_class);
}

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28))
temp_class = device_create(dma_tm_class, NULL,
MKDEV(gMajor, 0), NULL, "sdma_test");
#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26))
temp_class = device_create(dma_tm_class, NULL,
MKDEV(gMajor, 0), "sdma_test");
#else
temp_class = class_device_create(dma_tm_class, NULL,
MKDEV(gMajor, 0), NULL,
"sdma_test");
#endif
if (IS_ERR(temp_class)) {
printk(KERN_ERR "Error creating sdma test class device.\n");
class_destroy(dma_tm_class);
unregister_chrdev(gMajor, "sdma_test");
return -1;
}

printk("SDMA test Driver Module loaded\n");
return 0;
}

static void sdma_cleanup_module(void)
{
unregister_chrdev(gMajor, "sdma_test");
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26))
device_destroy(dma_tm_class, MKDEV(gMajor, 0));
#else
class_device_destroy(dma_tm_class, MKDEV(gMajor, 0));
#endif
class_destroy(dma_tm_class);

printk("SDMA test Driver Module Unloaded\n");
}

module_init(sdma_init_module);
module_exit(sdma_cleanup_module);

2：sg方式實現

#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mman.h>
#include <linux/init.h>
#include <linux/dma-mapping.h>
#include <linux/fs.h>
#include <linux/version.h>
#include <linux/delay.h>
#include <mach/dma.h>

#include <linux/dmaengine.h>
#include <linux/device.h>

#include <linux/io.h>
#include <linux/delay.h>

static int gMajor; /* major number of device */
static struct class *dma_tm_class;
u32 *wbuf, *wbuf2, *wbuf3, *wbuf4;
u32 *rbuf, *rbuf2, *rbuf3, *rbuf4;

struct dma_chan *dma_m2m_chan;

struct completion dma_m2m_ok;

struct scatterlist sg[4], sg2[4];

#define SDMA_BUF_SIZE 1024*60

static bool dma_m2m_filter(struct dma_chan *chan, void *param)
{
if (!imx_dma_is_general_purpose(chan))
return false;
chan->private = param;
return true;
}

int sdma_open(struct inode *inode, struct file *filp)
{
dma_cap_mask_t dma_m2m_mask;
struct imx_dma_data m2m_dma_data;

init_completion(&dma_m2m_ok);

dma_cap_zero(dma_m2m_mask);
dma_cap_set(DMA_SLAVE, dma_m2m_mask);
m2m_dma_data.peripheral_type = IMX_DMATYPE_MEMORY;
m2m_dma_data.priority = DMA_PRIO_HIGH;
dma_m2m_chan = dma_request_channel(dma_m2m_mask, dma_m2m_filter,
&m2m_dma_data);
if (!dma_m2m_chan) {
printk("Error opening the SDMA memory to memory channel\n");
return -EINVAL;
}

wbuf = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!wbuf) {
printk("error wbuf !!!!!!!!!!!\n");
return -1;
}

wbuf2 = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!wbuf2) {
printk("error wbuf2 !!!!!!!!!!!\n");
return -1;
}

wbuf3 = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!wbuf3) {
printk("error wbuf3 !!!!!!!!!!!\n");
return -1;
}

wbuf4 = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!wbuf4) {
printk("error wbuf4 !!!!!!!!!!!\n");
return -1;
}

rbuf = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!rbuf) {
printk("error rbuf !!!!!!!!!!!\n");
return -1;
}

rbuf2 = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!rbuf2) {
printk("error rbuf2 !!!!!!!!!!!\n");
return -1;
}

rbuf3 = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!rbuf3) {
printk("error rbuf3 !!!!!!!!!!!\n");
return -1;
}

rbuf4 = kzalloc(SDMA_BUF_SIZE, GFP_DMA);
if(!rbuf4) {
printk("error rbuf4 !!!!!!!!!!!\n");
return -1;
}

return 0;
}

int sdma_release(struct inode * inode, struct file * filp)
{
dmaengine_terminate_all(dma_m2m_chan);
dma_release_channel(dma_m2m_chan);
dma_m2m_chan = NULL;
kfree(wbuf);
kfree(wbuf2);
kfree(wbuf3);
kfree(rbuf);
kfree(rbuf2);
kfree(rbuf3);
return 0;
}

ssize_t sdma_read (struct file *filp, char __user * buf, size_t count,
loff_t * offset)
{
int i;
#if 0
for (i=0; i<SDMA_BUF_SIZE/4; i++) {
printk("dst data_%d : %x\n", i, *(rbuf+i));
}

for (i=0; i<SDMA_BUF_SIZE/4; i++) {
printk("dst data2_%d : %x\n", i, *(rbuf2+i));
}

for (i=0; i<SDMA_BUF_SIZE/4; i++) {
printk("dst data3_%d : %x\n", i, *(rbuf3+i));
}
#endif

for (i=0; i<SDMA_BUF_SIZE/4; i++) {
if (*(rbuf+i) != *(wbuf+i)) {
printk("buffer 1 copy falled!\n");
return 0;
}
}
printk("buffer 1 copy passed!\n");

for (i=0; i<SDMA_BUF_SIZE/4; i++) {
if (*(rbuf2+i) != *(wbuf2+i)) {
printk("buffer 2 copy falled!\n");
return 0;
}
}
printk("buffer 2 copy passed!\n");

for (i=0; i<SDMA_BUF_SIZE/4; i++) {
if (*(rbuf3+i) != *(wbuf3+i)) {
printk("buffer 3 copy falled!\n");
return 0;
}
}
printk("buffer 3 copy passed!\n");

for (i=0; i<SDMA_BUF_SIZE/4; i++) {
if (*(rbuf4+i) != *(wbuf4+i)) {
printk("buffer 4 copy falled!\n");
return 0;
}
}
printk("buffer 4 copy passed!\n");

return 0;
}

static void dma_m2m_callback(void *data)
{
complete(&dma_m2m_ok);
return ;
}

ssize_t sdma_write(struct file * filp, const char __user * buf, size_t count,
loff_t * offset)
{
u32 *index1, *index2, *index3, *index4, i, ret;
struct dma_slave_config dma_m2m_config;
struct dma_async_tx_descriptor *dma_m2m_desc;
index1 = wbuf;
index2 = wbuf2;
index3 = wbuf3;
index4 = wbuf4;
struct timeval end_time;
unsigned long end, start;
for (i=0; i<SDMA_BUF_SIZE/4; i++) {
*(index1 + i) = 0x12121212;
}

for (i=0; i<SDMA_BUF_SIZE/4; i++) {
*(index2 + i) = 0x34343434;
}

for (i=0; i<SDMA_BUF_SIZE/4; i++) {
*(index3 + i) = 0x56565656;
}

for (i=0; i<SDMA_BUF_SIZE/4; i++) {
*(index4 + i) = 0x78787878;
}

#if 0
for (i=0; i<SDMA_BUF_SIZE/4; i++) {
printk("input data_%d : %x\n", i, *(wbuf+i));
}

for (i=0; i<SDMA_BUF_SIZE/2/4; i++) {
printk("input data2_%d : %x\n", i, *(wbuf2+i));
}

for (i=0; i<SDMA_BUF_SIZE/4; i++) {
printk("input data3_%d : %x\n", i, *(wbuf3+i));
}
#endif
dma_m2m_config.direction = DMA_MEM_TO_MEM;
dma_m2m_config.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
dmaengine_slave_config(dma_m2m_chan, &dma_m2m_config);

sg_init_table(sg, 4);
sg_set_buf(&sg[0], wbuf, SDMA_BUF_SIZE);
sg_set_buf(&sg[1], wbuf2, SDMA_BUF_SIZE);
sg_set_buf(&sg[2], wbuf3, SDMA_BUF_SIZE);
sg_set_buf(&sg[3], wbuf4, SDMA_BUF_SIZE);
ret = dma_map_sg(NULL, sg, 4, dma_m2m_config.direction);

sg_init_table(sg2, 4);
sg_set_buf(&sg2[0], rbuf, SDMA_BUF_SIZE);
sg_set_buf(&sg2[1], rbuf2, SDMA_BUF_SIZE);
sg_set_buf(&sg2[2], rbuf3, SDMA_BUF_SIZE);
sg_set_buf(&sg2[3], rbuf4, SDMA_BUF_SIZE);
ret = dma_map_sg(NULL, sg2, 4, dma_m2m_config.direction);

dma_m2m_desc = dma_m2m_chan->device->
device_prep_dma_sg(dma_m2m_chan, sg2, 4, sg, 4, 0);
dma_m2m_desc->callback = dma_m2m_callback;
//printk("1111111111111\n");
do_gettimeofday(&end_time);
start = end_time.tv_sec*1000000 + end_time.tv_usec;

dmaengine_submit(dma_m2m_desc);
dma_async_issue_pending(dma_m2m_chan);

wait_for_completion(&dma_m2m_ok);
//printk("2222222222222\n");
do_gettimeofday(&end_time);
end = end_time.tv_sec*1000000 + end_time.tv_usec;
printk("end - start = %d\n", end - start);
dma_unmap_sg(NULL, sg, 4, dma_m2m_config.direction);
dma_unmap_sg(NULL, sg2, 4, dma_m2m_config.direction);

return 0;
}

struct file_operations dma_fops = {
open: sdma_open,
release: sdma_release,
read: sdma_read,
write: sdma_write,
};

int __init sdma_init_module(void)
{
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26))
struct device *temp_class;
#else
struct class_device *temp_class;
#endif
int error;

/* register a character device */
error = register_chrdev(0, "sdma_test", &dma_fops);
if (error < 0) {
printk("SDMA test driver can't get major number\n");
return error;
}
gMajor = error;
printk("SDMA test major number = %d\n",gMajor);

dma_tm_class = class_create(THIS_MODULE, "sdma_test");
if (IS_ERR(dma_tm_class)) {
printk(KERN_ERR "Error creating sdma test module class.\n");
unregister_chrdev(gMajor, "sdma_test");
return PTR_ERR(dma_tm_class);
}

#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,28))
temp_class = device_create(dma_tm_class, NULL,
MKDEV(gMajor, 0), NULL, "sdma_test");
#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26))
temp_class = device_create(dma_tm_class, NULL,
MKDEV(gMajor, 0), "sdma_test");
#else
temp_class = class_device_create(dma_tm_class, NULL,
MKDEV(gMajor, 0), NULL,
"sdma_test");
#endif
if (IS_ERR(temp_class)) {
printk(KERN_ERR "Error creating sdma test class device.\n");
class_destroy(dma_tm_class);
unregister_chrdev(gMajor, "sdma_test");
return -1;
}

printk("SDMA test Driver Module loaded\n");
return 0;
}

static void sdma_cleanup_module(void)
{
unregister_chrdev(gMajor, "sdma_test");
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26))
device_destroy(dma_tm_class, MKDEV(gMajor, 0));
#else
class_device_destroy(dma_tm_class, MKDEV(gMajor, 0));
#endif
class_destroy(dma_tm_class);

printk("SDMA test Driver Module Unloaded\n");
}

module_init(sdma_init_module);
module_exit(sdma_cleanup_module);

當時測出大約1秒鐘可以複製50M的資料，但是客戶還覺得不滿意。

原文連結：https://blog.csdn.net/u012769691/article/details/46814305

空中翻書的兩種實現形式以及突出特質
2023-04-07
二分查詢的兩種實現形式遞迴和迭代
2020-11-08
遞迴
python 程式池的兩種不同實現
2023-05-11
Python
虛擬主持人常見的兩種實現形式及特點
2022-01-13
MacOS下NSWindowZoomButton兩種形式（兩種綠色交通燈）
2020-10-09
MacOOM
程式碼安全兩種程式碼漏洞
2022-07-05
互動滑軌屏的幾種實現形式
2022-12-29
ton函式函式hash的兩種形式
2024-10-13
函式
Spring實現IOC容器的兩種實現方式
2020-10-29
Spring
定義函式的兩種形式及區別
2018-05-06
函式
雜湊表的兩種實現
2019-03-05
單利模式的兩種最佳實現
2018-06-11
模式
Git提交程式碼倉庫的兩種方式
2023-04-14
Git
實現高可用的兩種方案與實戰
2018-08-30
DMA第三版程式碼
2024-03-17
Out of sort memory, consider increasing server sort buffer size的兩種情況
2022-11-06
IDEServer
前端--實現隔行變色的兩種方式
2020-02-04
前端
web前端換行程式碼的幾種實現方式！
2021-03-29
Web前端行程
全息展示櫃常用的三種表現形式
2022-04-12
CQRS命令查詢分離架構的多種形式實現 - Kapil
2021-11-19
架構API
談springboot兩種實現結構
2024-05-07
Spring Boot
兩種方式實現輪播圖
2022-02-11
SpringBoot實現熱部署兩種方式！
2021-11-15
Spring Boot熱部署
劍指Offer-17-列印從1到最大的n位數-Java程式碼實現（兩種思路）
2020-10-29
Java
Spring宣告式事務的兩種實現方式
2018-11-15
Spring
zuul實現Cors跨域的兩種方式（https）
2019-01-19
ZuulCORS跨域HTTP
MyBatis中主鍵回填的兩種實現方式
2019-04-24
MyBatis
深入理解二分的兩種實現
2021-06-06
RabbitMQ實現延時訊息的兩種方法
2021-05-10
MQ
所有輸入（程式碼形式）
2024-08-07
兩種方法使vue實現jQuery呼叫
2019-05-10
VuejQuery
app直播原始碼，自定義兩種Activity切換動畫實現
2022-03-16
APP原始碼動畫
突破Android P非SDK API限制的幾種程式碼實現
2018-09-18
AndroidAPI
負載均衡的幾種演算法Java實現程式碼
2020-04-05
負載演算法Java
簡單實現幾種常見的前端效果，附程式碼！
2021-04-19
前端
LRU 快取淘汰演算法的兩種實現
2019-03-02
快取演算法
關於多執行緒的兩種實現方式
2021-10-29
執行緒
C#程式實現軟體開機自動啟動的兩種常用方法
2018-08-22
C#

兩種形式的dma 實現memory copy程式碼

相關文章