兩種方法實現西里爾哈薩克文到傳統哈薩克文的轉換(JavaScript程式碼)

NoorTy發表於2024-06-19

廢話少說,直接上程式碼!
一,正則處理法:

function kazakhCyrillicToArabic(str) {
    str = str.replace(/,/g, "،");
    str = str.replace(/;/g, "؛");
    str = str.replace(/\?/g, "؟");
    var cyrillicToArabicMap = {
        "а": "ا", "ә": "ءا", "б": "ب", "в": "ۆ",
        "г": "گ", "ғ": "ع", "д": "د", "е": "ە",
        "ё": "يو", "ж": "ج", "з": "ز", "и": "ي",
        "й": "ي", "к": "ك", "қ": "ق", "л": "ل",
        "м": "م", "н": "ن", "ң": "ڭ", "о": "و",
        "ө": "ءو", "п": "پ", "р": "ر", "с": "س",
        "т": "ت", "у": "ۋ", "ұ": "ۇ", "ү": "ءۇ",
        "ф": "ف", "х": "ح", "һ": "ھ", "ц": "س",
        "ч": "چ", "ш": "ش", "щ": "شش", "ъ": "",
        "ы": "ى", "і": "ءى", "ь": "", "э": "ە",
        "ю": "يۋ", "я": "يا"
    };
    str = str.replace(/[аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя]+/gi, function (word) {
        for (var char in cyrillicToArabicMap) {
            var arabicChar = cyrillicToArabicMap[char];
            var charPattern = new RegExp(char, "gi");
            word = word.replace(charPattern, arabicChar);
        }
        if (word.match(/ء/g)) {
            if (word.match(/[ەگك]/g)) {
                word = word.replace(/ء/g, "");
            } else {
                word = "ء" + word.replace(/ء/g, "");
            }
        }
        return word;
    });
    return str;
}

二,字串遍歷法(推薦):

function kazakhCyrillicToArabic(str) {
    var strLength = str.length;
    var outputString = "";
    var cyrillicToArabicMap = {
        "а": "ا", "ә": "ا", "б": "ب", "в": "ۆ",
        "г": "گ", "ғ": "ع", "д": "د", "е": "ە",
        "ё": "يو", "ж": "ج", "з": "ز", "и": "ي",
        "й": "ي", "к": "ك", "қ": "ق", "л": "ل",
        "м": "م", "н": "ن", "ң": "ڭ", "о": "و",
        "ө": "و", "п": "پ", "р": "ر", "с": "س",
        "т": "ت", "у": "ۋ", "ұ": "ۇ", "ү": "ۇ",
        "ф": "ف", "х": "ح", "һ": "ھ", "ц": "س",
        "ч": "چ", "ш": "ش", "щ": "شش", "ъ": "",
        "ы": "ى", "і": "ى", "ь": "", "э": "ە",
        "ю": "يۋ", "я": "يا",
        ",": "،", ";": "؛", "?": "؟"
    };
    var isInCyrillic = false;
    var hasFrontVowels = false;
    var hasGEK = false;
    var currentPosition = 0;
    var currentArabicLength = 0;
    var hamzaCount = 0;
    for (var i = 0; i <= strLength; i++) {
        var char = str.substr(i, 1);
        if (char.charCodeAt(0) >= 1024 && char.charCodeAt(0) <= 1279) { // 西里爾
            char = char.toLowerCase();
            if (isInCyrillic == false) {
                isInCyrillic = true;
            }
            if (char == "ә" || char == "ө" || char == "ү" || char == "і") {
                hasFrontVowels = true;
            }
            if (char == "г" || char == "е" || char == "к" || char == "э") {
                hasGEK = true;
            }
            outputString += cyrillicToArabicMap[char];
            currentPosition += cyrillicToArabicMap[char].length;
            currentArabicLength += cyrillicToArabicMap[char].length;
        } else {
            if (isInCyrillic) {
                isInCyrillic = false;
                if (hasFrontVowels && !hasGEK) {
                    outputString = outputString.substring(0, currentPosition - currentArabicLength + hamzaCount) + "\u0621" + outputString.substring(currentPosition - currentArabicLength + hamzaCount);
                    hamzaCount++;
                }
                hasFrontVowels = false;
                hasGEK = false;
                currentArabicLength = 0;
            }
            outputString += char;
            currentPosition++;
        }
    }
    return outputString;
}

相關文章