Android NDK之使用 arm-v7a 彙編實現兩數之和

Kryo發表於2024-04-05

Android NDK之使用 arm-v7a 彙編實現兩數之和

關鍵詞: NDK armv7a WebRTC arm彙編 CMake

最近適配對講程式,在webrtc的庫編譯的過程中,發現其為arm的平臺定製了彙編程式以最佳化平方根倒數演算法速度,上次寫彙編還是8086的,藉此機會初步嘗試下android上arm彙編

具體jni工程建立就不介紹了,Android Studio直接可以從模板建立

工程目錄如下

kryo@WSL1:/mnt/k/Android/NDK-Project/XXX/src/main$ tree 
.
├── AndroidManifest.xml
├── cpp
│   ├── asm
│   │   ├── CMakeLists.txt
│   │   ├── asm_defines.h
│   │   ├── asm_jni.cpp
│   │   ├── asm_jni.h
│   │   ├── tow_sum_armv7a.S
│   │   └── tow_sum_cpp.cpp
└── java
    └── com
        └── kryo
            ├── asm
            │   └── TowSumAsm.java
            └── ...

1、C++介面編寫

asm_jni.h

#ifndef TOW_SUM_AMS_TEST_H
#define TOW_SUM_AMS_TEST_H

#include <jni.h>

#ifdef USE_ASM
    extern "C" int32_t
    tow_sum_asm(int32_t *data_in, int32_t *data_out, int32_t data_len, int32_t ret_len, int32_t target);
#else
    extern "C" int32_t
    tow_sum_cpp(int32_t *data_in, int32_t *data_out, int32_t data_len, int32_t target);
#endif

#endif //TOW_SUM_AMS_TEST_H

這裡分別使用asm和c程式碼各自實現一個暴搜版本的兩數之和介面。關於asm傳遞5個引數是有用意的,涉及到函式呼叫約定,armv7a前4個引數用暫存器傳參,超過4個的用棧傳遞

2、彙編實現

寫彙編時我習慣先參考C程式碼去推導

tow_sum_cpp.cpp

#include "asm_jni.h"

extern "C" int32_t tow_sum_cpp(int32_t *data_in, int32_t *data_out, int32_t data_len,int32_t target) {
    for (int i = 0; i < data_len; ++i) {
        for (int j = i + 1; j < data_len; ++j) {
            if (data_in[i] + data_in[j] == target) {
                data_out[0] = i;
                data_out[1] = j;
                return 0;
            }
        }
    }
    data_out[0] = 0;
    data_out[1] = 0;
    return -1;
}

以下是具體彙編程式碼的實現,基本每行都給出了註釋

tow_sum_armv7a.S

@ Input:(
@        int32_t* data_in, -> r0 &data_in
@        int32_t* data_out,-> r1 &data_out
@        int32_t  data_len, -> r2
@        int32_t  ret_len, -> r3
@        int32_t target -> [sp])
@ Output: r0 32 bit unsigned integer
@
@ r4: i-index
@ r5: j-index
@ r6: target
@ r7: num1-buff
@ r8: num2-buff
@ r9: sum cache


#include "asm_defines.h"

GLOBAL_FUNCTION tow_sum
.align  4
DEFINE_FUNCTION tow_sum
    push {r4-r11} 		@ 儲存現場

    ldr r6, [sp, #32] 	@ 儲存了8個暫存器,偏移8*4bytes取得第5個引數

    mov r4, #0 			@ 初始化第一個數的索引 i
    mov r5, #0 			@ 初始化第二個數的索引 j


LOOP_1:
    sub r9, r2, #1 		@ 陣列長度-1
    cmp r4, r9			@ 判斷i是否陣列最後一個
    beq FAL				@ 是就查詢失敗
    mov r5, r4  		@ j = i

LOOP_2:
    add r5, r5, #1 		@ j ++
    lsl r9, r4, #2		@ 把索引 i 乘4得到地址偏移量
    ldr r7, [r0, r9]	@ r7 = data_in[i],暫存器相對定址, r0為 data_in的地址,加上偏移量取的陣列元素
    lsl r9, r5, #2
    ldr r8, [r0, r9]	@ 同上得到 r8 = data_in[j]
    add r9, r8, r7		@ 兩數之和
    cmp r9, r6			@ 與目標做比較
    beq SUC				@ 成功
    add r9, r5, #1		@ 沒有成功
    cmp r9, r2			@ if j < data_len
    bne LOOP_2			@ then:下一輪j的查詢
    add r4, r4, #1		@ else: j沒找到,把i++
    b LOOP_1			@ 下一輪 i的查詢

SUC:
    str r4, [r1]		@ data_out[0] = i
    str r5, [r1, #4]	@ data_out[1] = j
    mov r0, #0			@ return 0
    b END

FAL:
    mov r4, #0
    mov r5, #0
    mov r0, #-1			@ return -1
    b SUC

END:
    pop {r4-r11}		@ 還原現場
    bx  lr

3、JNI實現

asm_jni.cpp

#include "asm_jni.h"
#include <android/log.h>

#define TAG "ASM_TEST"

#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__)

#ifdef __cplusplus
extern "C" {
#endif
JNIEXPORT jintArray JNICALL
    Java_com_kryo_asm_TowSumAsm_towsum(JNIEnv *env, jobject thiz, jintArray data, jint target) {

        jintArray r_array = env->NewIntArray(2);
        jint *elements_out = env->GetIntArrayElements(r_array, NULL);

        jsize length = env->GetArrayLength(data);
        jint *elements_in = env->GetIntArrayElements(data, NULL);

    #ifdef USE_ASM
        LOGD("call tow_sum_asm !\n");
        tow_sum_asm(elements_in, elements_out, (size_t) length, 2, (size_t) target);
    #else
        LOGD("call tow_sum_cpp !\n");
        tow_sum_cpp(elements_in, elements_out, (size_t) length, (size_t) target);
    #endif

        env->ReleaseIntArrayElements(data, elements_in, 0);
        env->ReleaseIntArrayElements(r_array, elements_out, 0);

        return r_array;
    }
#ifdef __cplusplus
}
#endif

TowSumAsm.java

public class TowSumAsm {
    static {
        System.loadLibrary("asm");
    }
    public native int[] towsum(int[] data, int target);
}

最後貼一下從webrtc開原始碼中copy來的asm_defines.h

/*
 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef KRYO_INCLUDE_ASM_DEFINES_H_
#define KRYO_INCLUDE_ASM_DEFINES_H_

#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

// Define the macros used in ARM assembly code, so that for Mac or iOS builds
// we add leading underscores for the function names.
#ifdef __APPLE__
.macro GLOBAL_FUNCTION name
.global _\name
.private_extern _\name
.endm
.macro DEFINE_FUNCTION name
_\name:
.endm
.macro CALL_FUNCTION name
bl _\name
.endm
.macro GLOBAL_LABEL name
.global _\name
.private_extern _\name
.endm
#else
.macro GLOBAL_FUNCTION name
.global \name
.hidden \name
.endm
.macro DEFINE_FUNCTION name
#if defined(__linux__) && defined(__ELF__)
.type \name,%function
#endif
\name:
.endm
.macro CALL_FUNCTION name
bl \name
.endm
.macro GLOBAL_LABEL name
.global \name
.hidden \name
.endm
#endif

// With Apple's clang compiler, for instructions ldrb, strh, etc.,
// the condition code is after the width specifier. Here we define
// only the ones that are actually used in the assembly files.
#if (defined __llvm__) && (defined __APPLE__)
.macro streqh reg1, reg2, num
strheq \reg1, \reg2, \num
.endm
#endif
.text
#endif  // KRYO_INCLUDE_ASM_DEFINES_H_

4、CMakeLists.txt編寫生成libasm.so

CMakeLists.txt

cmake_minimum_required(VERSION 3.10.2)

project("asm")

ENABLE_LANGUAGE(ASM) #啟用匯編支援

if(${ANDROID_ABI} STREQUAL "armeabi-v7a")
    add_library(asm SHARED
            asm_jni.cpp
            tow_sum_armv7a.S)
    add_definitions(-DUSE_ASM)
elseif(${ANDROID_ABI} STREQUAL "arm64-v8a")
    add_library(asm SHARED
            asm_jni.cpp
            tow_sum_cpp.cpp)
else()
    message(FATAL_ERROR "Unsupported ABI: ${ANDROID_ABI}")
endif()

target_link_libraries(asm
        log)

5、執行測試

TowSumAsm towSumAsm = new TowSumAsm();
int[] result = towSumAsm.towsum(new int[]{1, 3, 5, 7, 9}, 12);
Log.d(TAG, "result " + result[0] + " " + result[1]);
2024-04-05 10:24:29.269 19863-19863 ASM_TEST                com.kryo.demo                        D  call tow_sum_asm !
2024-04-05 10:24:29.269 19863-19863 JNI_Activity            com.kryo.demo                        D  result 1 4

Reference

  • VS Code 上ARM指令集參考文件外掛: 32位Code4Leg ,64位ARM A64 Instruction Reference
  • Android-Audio-Processing-Using-WebRTC spl_sqrt_floor_arm.S

相關文章