從txt檔案讀取u8584之類的轉換成相應的utf-8漢字的方法

okone96發表於2007-08-06
因為每次從txt檔案讀取一個u開始的漢字的時候,程式認為它就是一個普通的字串序列,所以我們必須把它轉換成我們需要的utf編碼的字元序列,這裡用到了java.util.property類裡的一個方法來把不同的字串轉換為utf需要的字串序列,
方法如下:

package com.community.test;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;

public class test {


public static void main(String args[]) {
try {

char []inc=new char[1024];
char []outc=new char[6];

//檔案內容為u8584
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File("d:/log/test.txt"))));

String line = "";
while ((line = br.readLine()) != null) {
inc=getChars(line);
String temp=loadConvert(inc,0,inc.length,outc);
System.out.println(line);
System.out.println(temp);

}

br.close();
} catch (Exception e) {
e.printStackTrace();
}
}

public static char[] getChars(String s) {
char c[]=new char[s.length()];
for (int i = 0; i < s.length(); i++) {
c[i]=s.charAt(i);
}
return c;
}

private static String loadConvert (char[] in, int off, int len, char[] convtBuf) {
if (convtBuf.length < len) {
int newLen = len * 2;
if (newLen < 0) {
newLen = Integer.MAX_VALUE;
}
convtBuf = new char[newLen];
}
char aChar;
char[] out = convtBuf;
int outLen = 0;
int end = off + len;

while (off < end) {
aChar = in[off++];
if (aChar == '') {
aChar = in[off++];
if(aChar == 'u') {
// Read the xxxx
int value=0;
for (int i=0; i<4; i++) {
aChar = in[off++];
switch (aChar) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
value = (value << 4) + aChar - '0';
break;
case 'a': case 'b': case 'c':
case 'd': case 'e': case 'f':
value = (value << 4) + 10 + aChar - 'a';
break;
case 'A': case 'B': case 'C':
case 'D': case 'E': case 'F':
value = (value << 4) + 10 + aChar - 'A';
break;
default:
throw new IllegalArgumentException(
"Malformed uxxxx encoding.");
}
}
out[outLen++] = (char)value;
} else {
if (aChar == 't') aChar = ' ';
else if (aChar == 'r') aChar = ' ';
else if (aChar == 'n') aChar = ' ';
else if (aChar == 'f') aChar = 'f';
out[outLen++] = aChar;
}
} else {
out[outLen++] = (char)aChar;
}
}
return new String (out, 0, outLen);
}
}

輸出:


[@more@]

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/750220/viewspace-932325/,如需轉載,請註明出處,否則將追究法律責任。

相關文章